@gleanwork/mcp-server-tester 0.12.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +421 -0
- package/dist/cli/index.js +2785 -0
- package/dist/fixtures/mcp.d.ts +605 -0
- package/dist/fixtures/mcp.js +2378 -0
- package/dist/fixtures/mcp.js.map +1 -0
- package/dist/fixtures/mcpAuth.d.ts +31 -0
- package/dist/fixtures/mcpAuth.js +317 -0
- package/dist/fixtures/mcpAuth.js.map +1 -0
- package/dist/index.cjs +3658 -0
- package/dist/index.cjs.map +1 -0
- package/dist/index.d.cts +3857 -0
- package/dist/index.d.ts +3857 -0
- package/dist/index.js +3582 -0
- package/dist/index.js.map +1 -0
- package/dist/reporters/mcpReporter.cjs +301 -0
- package/dist/reporters/mcpReporter.cjs.map +1 -0
- package/dist/reporters/mcpReporter.d.cts +85 -0
- package/dist/reporters/mcpReporter.d.ts +85 -0
- package/dist/reporters/mcpReporter.js +297 -0
- package/dist/reporters/mcpReporter.js.map +1 -0
- package/dist/reporters/ui-dist/app.js +174 -0
- package/dist/reporters/ui-dist/index.html +28 -0
- package/dist/reporters/ui-dist/styles.css +1 -0
- package/package.json +138 -0
- package/src/reporters/ui-dist/app.js +174 -0
- package/src/reporters/ui-dist/index.html +28 -0
- package/src/reporters/ui-dist/styles.css +1 -0
package/dist/index.js
ADDED
|
@@ -0,0 +1,3582 @@
|
|
|
1
|
+
import * as fs from 'fs/promises';
|
|
2
|
+
import { readFile } from 'fs/promises';
|
|
3
|
+
import * as path2 from 'path';
|
|
4
|
+
import { z } from 'zod';
|
|
5
|
+
import { expect as expect$1, test as test$1, chromium } from '@playwright/test';
|
|
6
|
+
import { discoverAuthorizationServerMetadata, startAuthorization, exchangeAuthorization } from '@modelcontextprotocol/sdk/client/auth.js';
|
|
7
|
+
import createDebug from 'debug';
|
|
8
|
+
import * as oauth2 from 'oauth4webapi';
|
|
9
|
+
import { homedir } from 'os';
|
|
10
|
+
import * as http from 'http';
|
|
11
|
+
import { Client } from '@modelcontextprotocol/sdk/client/index.js';
|
|
12
|
+
import { StdioClientTransport } from '@modelcontextprotocol/sdk/client/stdio.js';
|
|
13
|
+
import { StreamableHTTPClientTransport } from '@modelcontextprotocol/sdk/client/streamableHttp.js';
|
|
14
|
+
import { query } from '@anthropic-ai/claude-agent-sdk';
|
|
15
|
+
|
|
16
|
+
var __defProp = Object.defineProperty;
|
|
17
|
+
var __getOwnPropNames = Object.getOwnPropertyNames;
|
|
18
|
+
var __require = /* @__PURE__ */ ((x) => typeof require !== "undefined" ? require : typeof Proxy !== "undefined" ? new Proxy(x, {
|
|
19
|
+
get: (a, b) => (typeof require !== "undefined" ? require : a)[b]
|
|
20
|
+
}) : x)(function(x) {
|
|
21
|
+
if (typeof require !== "undefined") return require.apply(this, arguments);
|
|
22
|
+
throw Error('Dynamic require of "' + x + '" is not supported');
|
|
23
|
+
});
|
|
24
|
+
var __esm = (fn, res) => function __init() {
|
|
25
|
+
return fn && (res = (0, fn[__getOwnPropNames(fn)[0]])(fn = 0)), res;
|
|
26
|
+
};
|
|
27
|
+
var __export = (target, all) => {
|
|
28
|
+
for (var name in all)
|
|
29
|
+
__defProp(target, name, { get: all[name], enumerable: true });
|
|
30
|
+
};
|
|
31
|
+
|
|
32
|
+
// src/auth/oauthClientProvider.ts
|
|
33
|
+
var oauthClientProvider_exports = {};
|
|
34
|
+
__export(oauthClientProvider_exports, {
|
|
35
|
+
PlaywrightOAuthClientProvider: () => PlaywrightOAuthClientProvider,
|
|
36
|
+
loadOAuthState: () => loadOAuthState,
|
|
37
|
+
saveOAuthState: () => saveOAuthState
|
|
38
|
+
});
|
|
39
|
+
async function loadOAuthState(storagePath) {
|
|
40
|
+
try {
|
|
41
|
+
const content = await fs.readFile(storagePath, "utf-8");
|
|
42
|
+
return JSON.parse(content);
|
|
43
|
+
} catch (error) {
|
|
44
|
+
if (error.code === "ENOENT") {
|
|
45
|
+
return null;
|
|
46
|
+
}
|
|
47
|
+
throw error;
|
|
48
|
+
}
|
|
49
|
+
}
|
|
50
|
+
async function saveOAuthState(storagePath, state) {
|
|
51
|
+
state.savedAt = Date.now();
|
|
52
|
+
const dir = path2.dirname(storagePath);
|
|
53
|
+
await fs.mkdir(dir, { recursive: true });
|
|
54
|
+
await fs.writeFile(storagePath, JSON.stringify(state, null, 2), "utf-8");
|
|
55
|
+
}
|
|
56
|
+
var PlaywrightOAuthClientProvider;
|
|
57
|
+
var init_oauthClientProvider = __esm({
|
|
58
|
+
"src/auth/oauthClientProvider.ts"() {
|
|
59
|
+
PlaywrightOAuthClientProvider = class {
|
|
60
|
+
config;
|
|
61
|
+
cachedState = null;
|
|
62
|
+
stateParam = null;
|
|
63
|
+
constructor(config) {
|
|
64
|
+
this.config = config;
|
|
65
|
+
}
|
|
66
|
+
/**
|
|
67
|
+
* The URL to redirect the user agent to after authorization
|
|
68
|
+
*/
|
|
69
|
+
get redirectUrl() {
|
|
70
|
+
return this.config.redirectUri;
|
|
71
|
+
}
|
|
72
|
+
/**
|
|
73
|
+
* Metadata about this OAuth client
|
|
74
|
+
*/
|
|
75
|
+
get clientMetadata() {
|
|
76
|
+
return {
|
|
77
|
+
redirect_uris: [this.config.redirectUri],
|
|
78
|
+
token_endpoint_auth_method: this.config.clientSecret ? "client_secret_basic" : "none",
|
|
79
|
+
grant_types: ["authorization_code", "refresh_token"],
|
|
80
|
+
response_types: ["code"],
|
|
81
|
+
client_name: "@gleanwork/mcp-server-tester",
|
|
82
|
+
...this.config.clientMetadata
|
|
83
|
+
};
|
|
84
|
+
}
|
|
85
|
+
/**
|
|
86
|
+
* Returns an OAuth2 state parameter
|
|
87
|
+
*/
|
|
88
|
+
state() {
|
|
89
|
+
if (!this.stateParam) {
|
|
90
|
+
this.stateParam = this.generateRandomString(32);
|
|
91
|
+
}
|
|
92
|
+
return this.stateParam;
|
|
93
|
+
}
|
|
94
|
+
/**
|
|
95
|
+
* Loads information about this OAuth client
|
|
96
|
+
*/
|
|
97
|
+
async clientInformation() {
|
|
98
|
+
if (this.config.clientId) {
|
|
99
|
+
return {
|
|
100
|
+
client_id: this.config.clientId,
|
|
101
|
+
client_secret: this.config.clientSecret,
|
|
102
|
+
redirect_uris: [this.config.redirectUri]
|
|
103
|
+
};
|
|
104
|
+
}
|
|
105
|
+
const state = await this.loadState();
|
|
106
|
+
if (state?.clientInfo) {
|
|
107
|
+
return {
|
|
108
|
+
client_id: state.clientInfo.clientId,
|
|
109
|
+
client_secret: state.clientInfo.clientSecret,
|
|
110
|
+
client_id_issued_at: state.clientInfo.clientIdIssuedAt,
|
|
111
|
+
client_secret_expires_at: state.clientInfo.clientSecretExpiresAt,
|
|
112
|
+
redirect_uris: [this.config.redirectUri]
|
|
113
|
+
};
|
|
114
|
+
}
|
|
115
|
+
return void 0;
|
|
116
|
+
}
|
|
117
|
+
/**
|
|
118
|
+
* Saves client information from Dynamic Client Registration
|
|
119
|
+
*/
|
|
120
|
+
async saveClientInformation(clientInformation) {
|
|
121
|
+
const state = await this.loadState() ?? this.createEmptyState();
|
|
122
|
+
state.clientInfo = {
|
|
123
|
+
clientId: clientInformation.client_id,
|
|
124
|
+
clientSecret: clientInformation.client_secret,
|
|
125
|
+
clientIdIssuedAt: clientInformation.client_id_issued_at,
|
|
126
|
+
clientSecretExpiresAt: clientInformation.client_secret_expires_at
|
|
127
|
+
};
|
|
128
|
+
await this.saveState(state);
|
|
129
|
+
}
|
|
130
|
+
/**
|
|
131
|
+
* Loads any existing OAuth tokens for the current session
|
|
132
|
+
*/
|
|
133
|
+
async tokens() {
|
|
134
|
+
const state = await this.loadState();
|
|
135
|
+
if (state?.tokens) {
|
|
136
|
+
return {
|
|
137
|
+
access_token: state.tokens.accessToken,
|
|
138
|
+
token_type: state.tokens.tokenType,
|
|
139
|
+
refresh_token: state.tokens.refreshToken,
|
|
140
|
+
expires_in: state.tokens.expiresAt ? Math.floor((state.tokens.expiresAt - Date.now()) / 1e3) : void 0
|
|
141
|
+
};
|
|
142
|
+
}
|
|
143
|
+
return void 0;
|
|
144
|
+
}
|
|
145
|
+
/**
|
|
146
|
+
* Stores new OAuth tokens for the current session
|
|
147
|
+
*/
|
|
148
|
+
async saveTokens(tokens) {
|
|
149
|
+
const state = await this.loadState() ?? this.createEmptyState();
|
|
150
|
+
state.tokens = {
|
|
151
|
+
accessToken: tokens.access_token,
|
|
152
|
+
tokenType: tokens.token_type,
|
|
153
|
+
refreshToken: tokens.refresh_token,
|
|
154
|
+
expiresAt: tokens.expires_in ? Date.now() + tokens.expires_in * 1e3 : void 0
|
|
155
|
+
};
|
|
156
|
+
await this.saveState(state);
|
|
157
|
+
}
|
|
158
|
+
/**
|
|
159
|
+
* Invoked to redirect the user agent to the given URL
|
|
160
|
+
*
|
|
161
|
+
* In a testing context, this is typically handled by Playwright automation.
|
|
162
|
+
* This implementation throws an error to signal that the caller needs to
|
|
163
|
+
* handle the redirect externally.
|
|
164
|
+
*/
|
|
165
|
+
async redirectToAuthorization(authorizationUrl) {
|
|
166
|
+
throw new Error(
|
|
167
|
+
`OAuth authorization required. Redirect to: ${authorizationUrl.toString()}
|
|
168
|
+
In a testing context, use performOAuthSetup() in your Playwright globalSetup to complete the OAuth flow before running tests.`
|
|
169
|
+
);
|
|
170
|
+
}
|
|
171
|
+
/**
|
|
172
|
+
* Saves a PKCE code verifier for the current session
|
|
173
|
+
*/
|
|
174
|
+
async saveCodeVerifier(codeVerifier) {
|
|
175
|
+
const state = await this.loadState() ?? this.createEmptyState();
|
|
176
|
+
state.codeVerifier = codeVerifier;
|
|
177
|
+
await this.saveState(state);
|
|
178
|
+
}
|
|
179
|
+
/**
|
|
180
|
+
* Loads the PKCE code verifier for the current session
|
|
181
|
+
*/
|
|
182
|
+
async codeVerifier() {
|
|
183
|
+
const state = await this.loadState();
|
|
184
|
+
if (!state?.codeVerifier) {
|
|
185
|
+
throw new Error("No code verifier found in auth state");
|
|
186
|
+
}
|
|
187
|
+
return state.codeVerifier;
|
|
188
|
+
}
|
|
189
|
+
/**
|
|
190
|
+
* Invalidates the specified credentials
|
|
191
|
+
*/
|
|
192
|
+
async invalidateCredentials(scope) {
|
|
193
|
+
const state = await this.loadState();
|
|
194
|
+
if (!state) {
|
|
195
|
+
return;
|
|
196
|
+
}
|
|
197
|
+
switch (scope) {
|
|
198
|
+
case "all":
|
|
199
|
+
await this.deleteState();
|
|
200
|
+
break;
|
|
201
|
+
case "client":
|
|
202
|
+
delete state.clientInfo;
|
|
203
|
+
await this.saveState(state);
|
|
204
|
+
break;
|
|
205
|
+
case "tokens":
|
|
206
|
+
delete state.tokens;
|
|
207
|
+
await this.saveState(state);
|
|
208
|
+
break;
|
|
209
|
+
case "verifier":
|
|
210
|
+
delete state.codeVerifier;
|
|
211
|
+
await this.saveState(state);
|
|
212
|
+
break;
|
|
213
|
+
}
|
|
214
|
+
}
|
|
215
|
+
// ---- Private helper methods ----
|
|
216
|
+
async loadState() {
|
|
217
|
+
if (this.cachedState) {
|
|
218
|
+
return this.cachedState;
|
|
219
|
+
}
|
|
220
|
+
try {
|
|
221
|
+
const content = await fs.readFile(this.config.storagePath, "utf-8");
|
|
222
|
+
this.cachedState = JSON.parse(content);
|
|
223
|
+
return this.cachedState;
|
|
224
|
+
} catch (error) {
|
|
225
|
+
if (error.code === "ENOENT") {
|
|
226
|
+
return null;
|
|
227
|
+
}
|
|
228
|
+
throw error;
|
|
229
|
+
}
|
|
230
|
+
}
|
|
231
|
+
async saveState(state) {
|
|
232
|
+
state.savedAt = Date.now();
|
|
233
|
+
this.cachedState = state;
|
|
234
|
+
const dir = path2.dirname(this.config.storagePath);
|
|
235
|
+
await fs.mkdir(dir, { recursive: true });
|
|
236
|
+
await fs.writeFile(
|
|
237
|
+
this.config.storagePath,
|
|
238
|
+
JSON.stringify(state, null, 2),
|
|
239
|
+
"utf-8"
|
|
240
|
+
);
|
|
241
|
+
}
|
|
242
|
+
async deleteState() {
|
|
243
|
+
this.cachedState = null;
|
|
244
|
+
try {
|
|
245
|
+
await fs.unlink(this.config.storagePath);
|
|
246
|
+
} catch (error) {
|
|
247
|
+
if (error.code !== "ENOENT") {
|
|
248
|
+
throw error;
|
|
249
|
+
}
|
|
250
|
+
}
|
|
251
|
+
}
|
|
252
|
+
createEmptyState() {
|
|
253
|
+
return {
|
|
254
|
+
savedAt: Date.now()
|
|
255
|
+
};
|
|
256
|
+
}
|
|
257
|
+
generateRandomString(length) {
|
|
258
|
+
const chars = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789";
|
|
259
|
+
let result = "";
|
|
260
|
+
const randomValues = new Uint8Array(length);
|
|
261
|
+
crypto.getRandomValues(randomValues);
|
|
262
|
+
for (let i = 0; i < length; i++) {
|
|
263
|
+
const randomValue = randomValues[i] ?? 0;
|
|
264
|
+
result += chars[randomValue % chars.length];
|
|
265
|
+
}
|
|
266
|
+
return result;
|
|
267
|
+
}
|
|
268
|
+
};
|
|
269
|
+
}
|
|
270
|
+
});
|
|
271
|
+
var MCPHostCapabilitiesSchema = z.object({
|
|
272
|
+
sampling: z.record(z.unknown()).optional(),
|
|
273
|
+
roots: z.object({
|
|
274
|
+
listChanged: z.boolean()
|
|
275
|
+
}).optional()
|
|
276
|
+
});
|
|
277
|
+
var MCPOAuthConfigSchema = z.object({
|
|
278
|
+
serverUrl: z.string().url("serverUrl must be a valid URL"),
|
|
279
|
+
scopes: z.array(z.string()).optional(),
|
|
280
|
+
resource: z.string().url().optional(),
|
|
281
|
+
authStatePath: z.string().optional(),
|
|
282
|
+
clientId: z.string().optional(),
|
|
283
|
+
clientSecret: z.string().optional(),
|
|
284
|
+
redirectUri: z.string().url().optional()
|
|
285
|
+
});
|
|
286
|
+
var MCPAuthConfigSchema = z.object({
|
|
287
|
+
accessToken: z.string().optional(),
|
|
288
|
+
oauth: MCPOAuthConfigSchema.optional()
|
|
289
|
+
}).refine(
|
|
290
|
+
(data) => !(data.accessToken && data.oauth),
|
|
291
|
+
"Cannot specify both accessToken and oauth configuration"
|
|
292
|
+
);
|
|
293
|
+
var StdioConfigSchema = z.object({
|
|
294
|
+
transport: z.literal("stdio"),
|
|
295
|
+
command: z.string().min(1, "command is required for stdio transport"),
|
|
296
|
+
args: z.array(z.string()).optional(),
|
|
297
|
+
cwd: z.string().optional(),
|
|
298
|
+
capabilities: MCPHostCapabilitiesSchema.optional(),
|
|
299
|
+
connectTimeoutMs: z.number().positive().optional(),
|
|
300
|
+
requestTimeoutMs: z.number().positive().optional(),
|
|
301
|
+
quiet: z.boolean().optional()
|
|
302
|
+
});
|
|
303
|
+
var HttpConfigSchema = z.object({
|
|
304
|
+
transport: z.literal("http"),
|
|
305
|
+
serverUrl: z.string().url("serverUrl must be a valid URL"),
|
|
306
|
+
headers: z.record(z.string()).optional(),
|
|
307
|
+
capabilities: MCPHostCapabilitiesSchema.optional(),
|
|
308
|
+
connectTimeoutMs: z.number().positive().optional(),
|
|
309
|
+
requestTimeoutMs: z.number().positive().optional(),
|
|
310
|
+
auth: MCPAuthConfigSchema.optional()
|
|
311
|
+
});
|
|
312
|
+
var MCPConfigSchema = z.discriminatedUnion("transport", [
|
|
313
|
+
StdioConfigSchema,
|
|
314
|
+
HttpConfigSchema
|
|
315
|
+
]);
|
|
316
|
+
function validateMCPConfig(config) {
|
|
317
|
+
return MCPConfigSchema.parse(config);
|
|
318
|
+
}
|
|
319
|
+
function isStdioConfig(config) {
|
|
320
|
+
return config.transport === "stdio" && typeof config.command === "string";
|
|
321
|
+
}
|
|
322
|
+
function isHttpConfig(config) {
|
|
323
|
+
return config.transport === "http" && typeof config.serverUrl === "string";
|
|
324
|
+
}
|
|
325
|
+
|
|
326
|
+
// src/index.ts
|
|
327
|
+
init_oauthClientProvider();
|
|
328
|
+
|
|
329
|
+
// src/auth/tokenAuth.ts
|
|
330
|
+
function createTokenAuthHeaders(accessToken, tokenType = "Bearer") {
|
|
331
|
+
return {
|
|
332
|
+
Authorization: `${tokenType} ${accessToken}`
|
|
333
|
+
};
|
|
334
|
+
}
|
|
335
|
+
function validateAccessToken(accessToken) {
|
|
336
|
+
if (!accessToken) {
|
|
337
|
+
throw new Error("Access token is required but was not provided");
|
|
338
|
+
}
|
|
339
|
+
if (accessToken.trim().length === 0) {
|
|
340
|
+
throw new Error("Access token cannot be empty");
|
|
341
|
+
}
|
|
342
|
+
}
|
|
343
|
+
function isTokenExpired(accessToken) {
|
|
344
|
+
try {
|
|
345
|
+
const parts = accessToken.split(".");
|
|
346
|
+
if (parts.length !== 3) {
|
|
347
|
+
return false;
|
|
348
|
+
}
|
|
349
|
+
const payloadPart = parts[1];
|
|
350
|
+
if (!payloadPart) {
|
|
351
|
+
return false;
|
|
352
|
+
}
|
|
353
|
+
const payload = JSON.parse(
|
|
354
|
+
Buffer.from(payloadPart, "base64url").toString("utf-8")
|
|
355
|
+
);
|
|
356
|
+
if (typeof payload.exp === "number") {
|
|
357
|
+
return payload.exp * 1e3 < Date.now();
|
|
358
|
+
}
|
|
359
|
+
return false;
|
|
360
|
+
} catch {
|
|
361
|
+
return false;
|
|
362
|
+
}
|
|
363
|
+
}
|
|
364
|
+
function isTokenExpiringSoon(expiresAt, bufferMs = 6e4) {
|
|
365
|
+
if (expiresAt === void 0) {
|
|
366
|
+
return false;
|
|
367
|
+
}
|
|
368
|
+
return expiresAt - bufferMs < Date.now();
|
|
369
|
+
}
|
|
370
|
+
|
|
371
|
+
// src/auth/setupOAuth.ts
|
|
372
|
+
init_oauthClientProvider();
|
|
373
|
+
var NAMESPACE = "mcp-server-tester";
|
|
374
|
+
var debugClient = createDebug(`${NAMESPACE}:client`);
|
|
375
|
+
var debugOAuth = createDebug(`${NAMESPACE}:oauth`);
|
|
376
|
+
createDebug(`${NAMESPACE}:eval`);
|
|
377
|
+
|
|
378
|
+
// src/auth/setupOAuth.ts
|
|
379
|
+
var DEFAULT_TIMEOUT_MS = 3e4;
|
|
380
|
+
var DEFAULT_REDIRECT_URI = "http://localhost:3000/oauth/callback";
|
|
381
|
+
async function performOAuthSetup(config) {
|
|
382
|
+
const timeoutMs = config.timeoutMs ?? DEFAULT_TIMEOUT_MS;
|
|
383
|
+
const redirectUri = config.redirectUri ?? DEFAULT_REDIRECT_URI;
|
|
384
|
+
const metadata = await discoverAuthorizationServerMetadata(
|
|
385
|
+
config.authServerUrl
|
|
386
|
+
);
|
|
387
|
+
if (!metadata) {
|
|
388
|
+
throw new Error(
|
|
389
|
+
`Could not discover OAuth metadata at ${config.authServerUrl}`
|
|
390
|
+
);
|
|
391
|
+
}
|
|
392
|
+
const clientInformation = {
|
|
393
|
+
client_id: config.clientId ?? "mcp-server-tester-client",
|
|
394
|
+
client_secret: config.clientSecret
|
|
395
|
+
};
|
|
396
|
+
const { authorizationUrl, codeVerifier } = await startAuthorization(
|
|
397
|
+
config.authServerUrl,
|
|
398
|
+
{
|
|
399
|
+
metadata,
|
|
400
|
+
clientInformation,
|
|
401
|
+
redirectUrl: redirectUri,
|
|
402
|
+
scope: config.scopes.join(" "),
|
|
403
|
+
resource: config.resource ? new URL(config.resource) : void 0
|
|
404
|
+
}
|
|
405
|
+
);
|
|
406
|
+
const browser = await chromium.launch({
|
|
407
|
+
headless: process.env.OAUTH_DEBUG !== "true"
|
|
408
|
+
});
|
|
409
|
+
try {
|
|
410
|
+
const context = await browser.newContext();
|
|
411
|
+
const page = await context.newPage();
|
|
412
|
+
page.setDefaultTimeout(timeoutMs);
|
|
413
|
+
await page.goto(authorizationUrl.toString());
|
|
414
|
+
await completeLoginForm(page, config);
|
|
415
|
+
await page.waitForURL(
|
|
416
|
+
(url) => url.href.startsWith(redirectUri) && url.searchParams.has("code"),
|
|
417
|
+
{ timeout: timeoutMs }
|
|
418
|
+
);
|
|
419
|
+
const callbackUrl = new URL(page.url());
|
|
420
|
+
const code = callbackUrl.searchParams.get("code");
|
|
421
|
+
const error = callbackUrl.searchParams.get("error");
|
|
422
|
+
if (error) {
|
|
423
|
+
const errorDescription = callbackUrl.searchParams.get("error_description");
|
|
424
|
+
throw new Error(
|
|
425
|
+
`OAuth authorization failed: ${error}${errorDescription ? ` - ${errorDescription}` : ""}`
|
|
426
|
+
);
|
|
427
|
+
}
|
|
428
|
+
if (!code) {
|
|
429
|
+
throw new Error("No authorization code in callback URL");
|
|
430
|
+
}
|
|
431
|
+
const tokens = await exchangeAuthorization(config.authServerUrl, {
|
|
432
|
+
metadata,
|
|
433
|
+
clientInformation,
|
|
434
|
+
authorizationCode: code,
|
|
435
|
+
codeVerifier,
|
|
436
|
+
redirectUri,
|
|
437
|
+
resource: config.resource ? new URL(config.resource) : void 0
|
|
438
|
+
});
|
|
439
|
+
const state = {
|
|
440
|
+
tokens: {
|
|
441
|
+
accessToken: tokens.access_token,
|
|
442
|
+
tokenType: tokens.token_type,
|
|
443
|
+
refreshToken: tokens.refresh_token,
|
|
444
|
+
expiresAt: tokens.expires_in ? Date.now() + tokens.expires_in * 1e3 : void 0
|
|
445
|
+
},
|
|
446
|
+
clientInfo: config.clientId ? {
|
|
447
|
+
clientId: config.clientId,
|
|
448
|
+
clientSecret: config.clientSecret
|
|
449
|
+
} : void 0,
|
|
450
|
+
codeVerifier,
|
|
451
|
+
savedAt: Date.now()
|
|
452
|
+
};
|
|
453
|
+
await saveOAuthState(config.outputPath, state);
|
|
454
|
+
debugOAuth("Auth state saved to %s", config.outputPath);
|
|
455
|
+
} finally {
|
|
456
|
+
await browser.close();
|
|
457
|
+
}
|
|
458
|
+
}
|
|
459
|
+
async function completeLoginForm(page, config) {
|
|
460
|
+
const { loginSelectors, credentials } = config;
|
|
461
|
+
await page.waitForSelector(loginSelectors.usernameInput, {
|
|
462
|
+
state: "visible"
|
|
463
|
+
});
|
|
464
|
+
await page.fill(loginSelectors.usernameInput, credentials.username);
|
|
465
|
+
await page.waitForSelector(loginSelectors.passwordInput, {
|
|
466
|
+
state: "visible"
|
|
467
|
+
});
|
|
468
|
+
await page.fill(loginSelectors.passwordInput, credentials.password);
|
|
469
|
+
await page.waitForSelector(loginSelectors.submitButton, {
|
|
470
|
+
state: "visible"
|
|
471
|
+
});
|
|
472
|
+
await page.click(loginSelectors.submitButton);
|
|
473
|
+
if (loginSelectors.consentButton) {
|
|
474
|
+
try {
|
|
475
|
+
await page.waitForSelector(loginSelectors.consentButton, {
|
|
476
|
+
state: "visible",
|
|
477
|
+
timeout: 5e3
|
|
478
|
+
});
|
|
479
|
+
await page.click(loginSelectors.consentButton);
|
|
480
|
+
} catch {
|
|
481
|
+
}
|
|
482
|
+
}
|
|
483
|
+
}
|
|
484
|
+
async function hasValidOAuthState(storagePath) {
|
|
485
|
+
try {
|
|
486
|
+
const { loadOAuthState: loadOAuthState2 } = await Promise.resolve().then(() => (init_oauthClientProvider(), oauthClientProvider_exports));
|
|
487
|
+
const state = await loadOAuthState2(storagePath);
|
|
488
|
+
if (!state?.tokens?.accessToken) {
|
|
489
|
+
return false;
|
|
490
|
+
}
|
|
491
|
+
if (state.tokens.expiresAt) {
|
|
492
|
+
const bufferMs = 6e4;
|
|
493
|
+
if (state.tokens.expiresAt - bufferMs < Date.now()) {
|
|
494
|
+
return false;
|
|
495
|
+
}
|
|
496
|
+
}
|
|
497
|
+
return true;
|
|
498
|
+
} catch {
|
|
499
|
+
return false;
|
|
500
|
+
}
|
|
501
|
+
}
|
|
502
|
+
async function performOAuthSetupIfNeeded(config) {
|
|
503
|
+
const hasValid = await hasValidOAuthState(config.outputPath);
|
|
504
|
+
if (hasValid) {
|
|
505
|
+
debugOAuth("Using existing auth state from %s", config.outputPath);
|
|
506
|
+
return;
|
|
507
|
+
}
|
|
508
|
+
debugOAuth("No valid auth state found, performing OAuth flow...");
|
|
509
|
+
await performOAuthSetup(config);
|
|
510
|
+
}
|
|
511
|
+
var MCP_PROTOCOL_VERSION = "2025-06-18";
|
|
512
|
+
async function discoverProtectedResource(mcpServerUrl) {
|
|
513
|
+
const url = new URL(mcpServerUrl);
|
|
514
|
+
const origin = url.origin;
|
|
515
|
+
const pathname = url.pathname;
|
|
516
|
+
const pathAwareUrl = `${origin}/.well-known/oauth-protected-resource${pathname}`;
|
|
517
|
+
try {
|
|
518
|
+
const metadata = await fetchProtectedResourceMetadata(pathAwareUrl);
|
|
519
|
+
return {
|
|
520
|
+
metadata,
|
|
521
|
+
discoveryUrl: pathAwareUrl,
|
|
522
|
+
usedPathAwareDiscovery: true
|
|
523
|
+
};
|
|
524
|
+
} catch (error) {
|
|
525
|
+
if (error instanceof DiscoveryError && error.status === 404) {
|
|
526
|
+
const baseUrl = `${origin}/.well-known/oauth-protected-resource`;
|
|
527
|
+
const metadata = await fetchProtectedResourceMetadata(baseUrl);
|
|
528
|
+
return {
|
|
529
|
+
metadata,
|
|
530
|
+
discoveryUrl: baseUrl,
|
|
531
|
+
usedPathAwareDiscovery: false
|
|
532
|
+
};
|
|
533
|
+
}
|
|
534
|
+
throw error;
|
|
535
|
+
}
|
|
536
|
+
}
|
|
537
|
+
var DiscoveryError = class extends Error {
|
|
538
|
+
constructor(message, status, url) {
|
|
539
|
+
super(message);
|
|
540
|
+
this.status = status;
|
|
541
|
+
this.url = url;
|
|
542
|
+
this.name = "DiscoveryError";
|
|
543
|
+
}
|
|
544
|
+
};
|
|
545
|
+
async function fetchProtectedResourceMetadata(discoveryUrl) {
|
|
546
|
+
const response = await fetch(discoveryUrl, {
|
|
547
|
+
method: "GET",
|
|
548
|
+
headers: {
|
|
549
|
+
Accept: "application/json",
|
|
550
|
+
"MCP-Protocol-Version": MCP_PROTOCOL_VERSION
|
|
551
|
+
}
|
|
552
|
+
});
|
|
553
|
+
if (!response.ok) {
|
|
554
|
+
throw new DiscoveryError(
|
|
555
|
+
`Protected resource discovery failed: ${response.status} ${response.statusText}`,
|
|
556
|
+
response.status,
|
|
557
|
+
discoveryUrl
|
|
558
|
+
);
|
|
559
|
+
}
|
|
560
|
+
const metadata = await response.json();
|
|
561
|
+
if (!metadata.resource) {
|
|
562
|
+
throw new DiscoveryError(
|
|
563
|
+
'Invalid protected resource metadata: missing required "resource" field',
|
|
564
|
+
void 0,
|
|
565
|
+
discoveryUrl
|
|
566
|
+
);
|
|
567
|
+
}
|
|
568
|
+
return metadata;
|
|
569
|
+
}
|
|
570
|
+
async function discoverAuthorizationServer(authServerUrl) {
|
|
571
|
+
const issuer = new URL(authServerUrl);
|
|
572
|
+
const response = await oauth2.discoveryRequest(issuer, {
|
|
573
|
+
algorithm: "oauth2",
|
|
574
|
+
headers: new Headers({
|
|
575
|
+
"MCP-Protocol-Version": MCP_PROTOCOL_VERSION
|
|
576
|
+
})
|
|
577
|
+
});
|
|
578
|
+
const metadata = await oauth2.processDiscoveryResponse(issuer, response);
|
|
579
|
+
return {
|
|
580
|
+
server: metadata,
|
|
581
|
+
issuer: authServerUrl
|
|
582
|
+
};
|
|
583
|
+
}
|
|
584
|
+
var ENV_VAR_NAMES = {
|
|
585
|
+
accessToken: "MCP_ACCESS_TOKEN",
|
|
586
|
+
refreshToken: "MCP_REFRESH_TOKEN",
|
|
587
|
+
tokenType: "MCP_TOKEN_TYPE",
|
|
588
|
+
expiresAt: "MCP_TOKEN_EXPIRES_AT"
|
|
589
|
+
};
|
|
590
|
+
var DEFAULT_EXPIRY_BUFFER_MS = 6e4;
|
|
591
|
+
function generateServerKey(serverUrl) {
|
|
592
|
+
const url = new URL(serverUrl);
|
|
593
|
+
let key = url.hostname;
|
|
594
|
+
if (url.port) {
|
|
595
|
+
key += `_${url.port}`;
|
|
596
|
+
}
|
|
597
|
+
if (url.pathname && url.pathname !== "/") {
|
|
598
|
+
const cleanPath = url.pathname.replace(/^\/+|\/+$/g, "").replace(/\//g, "_");
|
|
599
|
+
if (cleanPath) {
|
|
600
|
+
key += `_${cleanPath}`;
|
|
601
|
+
}
|
|
602
|
+
}
|
|
603
|
+
return key.replace(/[^a-zA-Z0-9_.-]/g, "_");
|
|
604
|
+
}
|
|
605
|
+
function getStateDir(serverUrl, customDir) {
|
|
606
|
+
const serverKey = generateServerKey(serverUrl);
|
|
607
|
+
if (customDir) {
|
|
608
|
+
return path2.join(customDir, serverKey);
|
|
609
|
+
}
|
|
610
|
+
if (process.platform === "win32") {
|
|
611
|
+
const localAppData = process.env.LOCALAPPDATA;
|
|
612
|
+
if (localAppData) {
|
|
613
|
+
return path2.join(localAppData, "mcp-tests", serverKey);
|
|
614
|
+
}
|
|
615
|
+
return path2.join(homedir(), "AppData", "Local", "mcp-tests", serverKey);
|
|
616
|
+
}
|
|
617
|
+
if (process.platform === "linux" && process.env.XDG_STATE_HOME) {
|
|
618
|
+
return path2.join(process.env.XDG_STATE_HOME, "mcp-tests", serverKey);
|
|
619
|
+
}
|
|
620
|
+
return path2.join(homedir(), ".local", "state", "mcp-tests", serverKey);
|
|
621
|
+
}
|
|
622
|
+
function loadTokensFromEnv() {
|
|
623
|
+
const accessToken = process.env[ENV_VAR_NAMES.accessToken];
|
|
624
|
+
if (!accessToken) {
|
|
625
|
+
return null;
|
|
626
|
+
}
|
|
627
|
+
const expiresAtStr = process.env[ENV_VAR_NAMES.expiresAt];
|
|
628
|
+
const expiresAt = expiresAtStr ? parseInt(expiresAtStr, 10) : void 0;
|
|
629
|
+
return {
|
|
630
|
+
accessToken,
|
|
631
|
+
refreshToken: process.env[ENV_VAR_NAMES.refreshToken],
|
|
632
|
+
tokenType: process.env[ENV_VAR_NAMES.tokenType] ?? "Bearer",
|
|
633
|
+
expiresAt: expiresAt && !isNaN(expiresAt) ? expiresAt : void 0
|
|
634
|
+
};
|
|
635
|
+
}
|
|
636
|
+
async function injectTokens(serverUrl, tokens, stateDir) {
|
|
637
|
+
const storage = createFileOAuthStorage({ serverUrl, stateDir });
|
|
638
|
+
await storage.saveTokens(tokens);
|
|
639
|
+
}
|
|
640
|
+
async function loadTokens(serverUrl, stateDir) {
|
|
641
|
+
const storage = createFileOAuthStorage({ serverUrl, stateDir });
|
|
642
|
+
return storage.loadTokens();
|
|
643
|
+
}
|
|
644
|
+
async function hasValidTokens(serverUrl, options) {
|
|
645
|
+
const storage = createFileOAuthStorage({
|
|
646
|
+
serverUrl,
|
|
647
|
+
stateDir: options?.stateDir
|
|
648
|
+
});
|
|
649
|
+
return storage.hasValidToken(options?.bufferMs);
|
|
650
|
+
}
|
|
651
|
+
function createFileOAuthStorage(config) {
|
|
652
|
+
return new FileOAuthStorage(config);
|
|
653
|
+
}
|
|
654
|
+
var FileOAuthStorage = class {
|
|
655
|
+
stateDir;
|
|
656
|
+
constructor(config) {
|
|
657
|
+
this.stateDir = getStateDir(config.serverUrl, config.stateDir);
|
|
658
|
+
}
|
|
659
|
+
get serverMetadataPath() {
|
|
660
|
+
return path2.join(this.stateDir, "server.json");
|
|
661
|
+
}
|
|
662
|
+
get clientPath() {
|
|
663
|
+
return path2.join(this.stateDir, "client.json");
|
|
664
|
+
}
|
|
665
|
+
get tokensPath() {
|
|
666
|
+
return path2.join(this.stateDir, "tokens.json");
|
|
667
|
+
}
|
|
668
|
+
async loadServerMetadata() {
|
|
669
|
+
return this.loadFile(this.serverMetadataPath);
|
|
670
|
+
}
|
|
671
|
+
async saveServerMetadata(metadata) {
|
|
672
|
+
await this.atomicWrite(this.serverMetadataPath, metadata);
|
|
673
|
+
}
|
|
674
|
+
async loadClient() {
|
|
675
|
+
return this.loadFile(this.clientPath);
|
|
676
|
+
}
|
|
677
|
+
async saveClient(client) {
|
|
678
|
+
await this.atomicWrite(this.clientPath, client);
|
|
679
|
+
}
|
|
680
|
+
async loadTokens() {
|
|
681
|
+
return this.loadFile(this.tokensPath);
|
|
682
|
+
}
|
|
683
|
+
async saveTokens(tokens) {
|
|
684
|
+
await this.atomicWrite(this.tokensPath, tokens);
|
|
685
|
+
}
|
|
686
|
+
async deleteTokens() {
|
|
687
|
+
await this.deleteFile(this.tokensPath);
|
|
688
|
+
}
|
|
689
|
+
async hasValidToken(bufferMs = DEFAULT_EXPIRY_BUFFER_MS) {
|
|
690
|
+
const tokens = await this.loadTokens();
|
|
691
|
+
if (!tokens?.accessToken) {
|
|
692
|
+
return false;
|
|
693
|
+
}
|
|
694
|
+
if (!tokens.expiresAt) {
|
|
695
|
+
return true;
|
|
696
|
+
}
|
|
697
|
+
return tokens.expiresAt > Date.now() + bufferMs;
|
|
698
|
+
}
|
|
699
|
+
/**
|
|
700
|
+
* Load a JSON file, returning null if not found
|
|
701
|
+
*/
|
|
702
|
+
async loadFile(filePath) {
|
|
703
|
+
try {
|
|
704
|
+
const content = await fs.readFile(filePath, "utf-8");
|
|
705
|
+
return JSON.parse(content);
|
|
706
|
+
} catch (error) {
|
|
707
|
+
if (error.code === "ENOENT") {
|
|
708
|
+
return null;
|
|
709
|
+
}
|
|
710
|
+
throw error;
|
|
711
|
+
}
|
|
712
|
+
}
|
|
713
|
+
/**
|
|
714
|
+
* Write data atomically: write to .tmp file, then rename
|
|
715
|
+
* Files are created with 0o600 permissions (user read/write only)
|
|
716
|
+
*/
|
|
717
|
+
async atomicWrite(filePath, data) {
|
|
718
|
+
await fs.mkdir(this.stateDir, { recursive: true, mode: 448 });
|
|
719
|
+
const tmpPath = `${filePath}.tmp`;
|
|
720
|
+
const content = JSON.stringify(data, null, 2);
|
|
721
|
+
await fs.writeFile(tmpPath, content, { encoding: "utf-8", mode: 384 });
|
|
722
|
+
await fs.rename(tmpPath, filePath);
|
|
723
|
+
}
|
|
724
|
+
/**
|
|
725
|
+
* Delete a file, ignoring errors if the file doesn't exist
|
|
726
|
+
*/
|
|
727
|
+
async deleteFile(filePath) {
|
|
728
|
+
try {
|
|
729
|
+
await fs.unlink(filePath);
|
|
730
|
+
} catch (error) {
|
|
731
|
+
if (error.code !== "ENOENT") {
|
|
732
|
+
throw error;
|
|
733
|
+
}
|
|
734
|
+
}
|
|
735
|
+
}
|
|
736
|
+
};
|
|
737
|
+
async function generatePKCE() {
|
|
738
|
+
const codeVerifier = oauth2.generateRandomCodeVerifier();
|
|
739
|
+
const codeChallenge = await oauth2.calculatePKCECodeChallenge(codeVerifier);
|
|
740
|
+
return {
|
|
741
|
+
codeVerifier,
|
|
742
|
+
codeChallenge
|
|
743
|
+
};
|
|
744
|
+
}
|
|
745
|
+
function generateState() {
|
|
746
|
+
return oauth2.generateRandomState();
|
|
747
|
+
}
|
|
748
|
+
function buildAuthorizationUrl(config) {
|
|
749
|
+
const authorizationEndpoint = config.authServer.server.authorization_endpoint;
|
|
750
|
+
if (!authorizationEndpoint) {
|
|
751
|
+
throw new Error(
|
|
752
|
+
"Authorization server does not have an authorization_endpoint"
|
|
753
|
+
);
|
|
754
|
+
}
|
|
755
|
+
const authorizationUrl = new URL(authorizationEndpoint);
|
|
756
|
+
authorizationUrl.searchParams.set("client_id", config.clientId);
|
|
757
|
+
authorizationUrl.searchParams.set("redirect_uri", config.redirectUri);
|
|
758
|
+
authorizationUrl.searchParams.set("response_type", "code");
|
|
759
|
+
authorizationUrl.searchParams.set("scope", config.scopes.join(" "));
|
|
760
|
+
authorizationUrl.searchParams.set("code_challenge", config.codeChallenge);
|
|
761
|
+
authorizationUrl.searchParams.set("code_challenge_method", "S256");
|
|
762
|
+
authorizationUrl.searchParams.set("state", config.state);
|
|
763
|
+
if (config.resource) {
|
|
764
|
+
authorizationUrl.searchParams.set("resource", config.resource);
|
|
765
|
+
}
|
|
766
|
+
return authorizationUrl;
|
|
767
|
+
}
|
|
768
|
+
async function exchangeCodeForTokens(config) {
|
|
769
|
+
const client = {
|
|
770
|
+
client_id: config.clientId,
|
|
771
|
+
token_endpoint_auth_method: config.clientSecret ? "client_secret_basic" : "none"
|
|
772
|
+
};
|
|
773
|
+
const clientAuth = config.clientSecret ? oauth2.ClientSecretBasic(config.clientSecret) : oauth2.None();
|
|
774
|
+
const callbackUrl = new URL(config.redirectUri);
|
|
775
|
+
callbackUrl.searchParams.set("code", config.code);
|
|
776
|
+
callbackUrl.searchParams.set("state", config.state);
|
|
777
|
+
const validatedParams = oauth2.validateAuthResponse(
|
|
778
|
+
config.authServer.server,
|
|
779
|
+
client,
|
|
780
|
+
callbackUrl,
|
|
781
|
+
config.state
|
|
782
|
+
);
|
|
783
|
+
const response = await oauth2.authorizationCodeGrantRequest(
|
|
784
|
+
config.authServer.server,
|
|
785
|
+
client,
|
|
786
|
+
clientAuth,
|
|
787
|
+
validatedParams,
|
|
788
|
+
config.redirectUri,
|
|
789
|
+
config.codeVerifier
|
|
790
|
+
);
|
|
791
|
+
const result = await oauth2.processAuthorizationCodeResponse(
|
|
792
|
+
config.authServer.server,
|
|
793
|
+
client,
|
|
794
|
+
response
|
|
795
|
+
);
|
|
796
|
+
return {
|
|
797
|
+
accessToken: result.access_token,
|
|
798
|
+
tokenType: result.token_type,
|
|
799
|
+
expiresIn: result.expires_in,
|
|
800
|
+
refreshToken: result.refresh_token,
|
|
801
|
+
scope: result.scope
|
|
802
|
+
};
|
|
803
|
+
}
|
|
804
|
+
async function refreshAccessToken(config) {
|
|
805
|
+
const client = {
|
|
806
|
+
client_id: config.clientId,
|
|
807
|
+
token_endpoint_auth_method: config.clientSecret ? "client_secret_basic" : "none"
|
|
808
|
+
};
|
|
809
|
+
const clientAuth = config.clientSecret ? oauth2.ClientSecretBasic(config.clientSecret) : oauth2.None();
|
|
810
|
+
const response = await oauth2.refreshTokenGrantRequest(
|
|
811
|
+
config.authServer.server,
|
|
812
|
+
client,
|
|
813
|
+
clientAuth,
|
|
814
|
+
config.refreshToken
|
|
815
|
+
);
|
|
816
|
+
if (!response.ok) {
|
|
817
|
+
const contentType = response.headers.get("content-type") ?? "";
|
|
818
|
+
let errorMessage = `Token refresh failed: ${response.status} ${response.statusText}`;
|
|
819
|
+
try {
|
|
820
|
+
if (contentType.includes("application/json")) {
|
|
821
|
+
const errorBody = await response.clone().json();
|
|
822
|
+
if (errorBody.error) {
|
|
823
|
+
errorMessage = `Token refresh failed: ${errorBody.error}`;
|
|
824
|
+
if (errorBody.error_description) {
|
|
825
|
+
errorMessage += ` - ${errorBody.error_description}`;
|
|
826
|
+
}
|
|
827
|
+
}
|
|
828
|
+
} else {
|
|
829
|
+
const textBody = await response.clone().text();
|
|
830
|
+
if (textBody) {
|
|
831
|
+
errorMessage = `Token refresh failed: ${response.status} - ${textBody}`;
|
|
832
|
+
}
|
|
833
|
+
}
|
|
834
|
+
} catch {
|
|
835
|
+
}
|
|
836
|
+
throw new Error(errorMessage);
|
|
837
|
+
}
|
|
838
|
+
const result = await oauth2.processRefreshTokenResponse(
|
|
839
|
+
config.authServer.server,
|
|
840
|
+
client,
|
|
841
|
+
response
|
|
842
|
+
);
|
|
843
|
+
return {
|
|
844
|
+
accessToken: result.access_token,
|
|
845
|
+
tokenType: result.token_type,
|
|
846
|
+
expiresIn: result.expires_in,
|
|
847
|
+
refreshToken: result.refresh_token,
|
|
848
|
+
scope: result.scope
|
|
849
|
+
};
|
|
850
|
+
}
|
|
851
|
+
|
|
852
|
+
// src/auth/cli.ts
|
|
853
|
+
var debug = createDebug("mcp-server-tester:cli-oauth");
|
|
854
|
+
var DEFAULT_TIMEOUT_MS2 = 3e5;
|
|
855
|
+
var DEFAULT_CLIENT_NAME = "@gleanwork/mcp-server-tester";
|
|
856
|
+
var DEFAULT_METADATA_TTL_MS = 24 * 60 * 60 * 1e3;
|
|
857
|
+
var CLIOAuthClient = class {
|
|
858
|
+
config;
|
|
859
|
+
storage;
|
|
860
|
+
constructor(config) {
|
|
861
|
+
this.config = config;
|
|
862
|
+
this.storage = createFileOAuthStorage({
|
|
863
|
+
serverUrl: config.mcpServerUrl,
|
|
864
|
+
stateDir: config.stateDir
|
|
865
|
+
});
|
|
866
|
+
}
|
|
867
|
+
/**
|
|
868
|
+
* Get a valid access token, authenticating if necessary
|
|
869
|
+
*
|
|
870
|
+
* Token resolution priority:
|
|
871
|
+
* 1. Check environment variables (for CI/CD)
|
|
872
|
+
* 2. Check file storage for cached tokens
|
|
873
|
+
* 3. Try to refresh if expired but refresh token exists
|
|
874
|
+
* 4. Run full OAuth flow if needed
|
|
875
|
+
*/
|
|
876
|
+
async getAccessToken() {
|
|
877
|
+
const envTokens = loadTokensFromEnv();
|
|
878
|
+
if (envTokens) {
|
|
879
|
+
debug("Using tokens from environment variables");
|
|
880
|
+
return {
|
|
881
|
+
accessToken: envTokens.accessToken,
|
|
882
|
+
tokenType: envTokens.tokenType,
|
|
883
|
+
expiresAt: envTokens.expiresAt,
|
|
884
|
+
refreshed: false,
|
|
885
|
+
fromEnv: true
|
|
886
|
+
};
|
|
887
|
+
}
|
|
888
|
+
const storedTokens = await this.storage.loadTokens();
|
|
889
|
+
if (storedTokens?.accessToken) {
|
|
890
|
+
const isValid = await this.storage.hasValidToken();
|
|
891
|
+
if (isValid) {
|
|
892
|
+
debug("Using cached tokens from storage");
|
|
893
|
+
return {
|
|
894
|
+
accessToken: storedTokens.accessToken,
|
|
895
|
+
tokenType: storedTokens.tokenType,
|
|
896
|
+
expiresAt: storedTokens.expiresAt,
|
|
897
|
+
refreshed: false,
|
|
898
|
+
fromEnv: false
|
|
899
|
+
};
|
|
900
|
+
}
|
|
901
|
+
if (storedTokens.refreshToken) {
|
|
902
|
+
debug("Token expired, attempting refresh");
|
|
903
|
+
try {
|
|
904
|
+
const refreshedTokens = await this.refreshStoredToken(storedTokens);
|
|
905
|
+
return {
|
|
906
|
+
accessToken: refreshedTokens.accessToken,
|
|
907
|
+
tokenType: refreshedTokens.tokenType,
|
|
908
|
+
expiresAt: refreshedTokens.expiresAt,
|
|
909
|
+
refreshed: true,
|
|
910
|
+
fromEnv: false
|
|
911
|
+
};
|
|
912
|
+
} catch (error) {
|
|
913
|
+
debug("Token refresh failed, will re-authenticate:", error);
|
|
914
|
+
}
|
|
915
|
+
}
|
|
916
|
+
}
|
|
917
|
+
debug("Performing full OAuth authentication");
|
|
918
|
+
return this.authenticate();
|
|
919
|
+
}
|
|
920
|
+
/**
|
|
921
|
+
* Try to get a valid access token without triggering browser auth
|
|
922
|
+
*
|
|
923
|
+
* Returns null if no valid token is available (no stored tokens,
|
|
924
|
+
* expired without refresh token, or refresh failed). Unlike getAccessToken(),
|
|
925
|
+
* this will NOT open a browser for authentication.
|
|
926
|
+
*
|
|
927
|
+
* Use this for CLI commands that should prompt the user to run `login`
|
|
928
|
+
* instead of automatically starting the OAuth flow.
|
|
929
|
+
*/
|
|
930
|
+
async tryGetAccessToken() {
|
|
931
|
+
const envTokens = loadTokensFromEnv();
|
|
932
|
+
if (envTokens) {
|
|
933
|
+
debug("Using tokens from environment variables");
|
|
934
|
+
return {
|
|
935
|
+
accessToken: envTokens.accessToken,
|
|
936
|
+
tokenType: envTokens.tokenType,
|
|
937
|
+
expiresAt: envTokens.expiresAt,
|
|
938
|
+
refreshed: false,
|
|
939
|
+
fromEnv: true
|
|
940
|
+
};
|
|
941
|
+
}
|
|
942
|
+
const storedTokens = await this.storage.loadTokens();
|
|
943
|
+
if (storedTokens?.accessToken) {
|
|
944
|
+
const isValid = await this.storage.hasValidToken();
|
|
945
|
+
if (isValid) {
|
|
946
|
+
debug("Using cached tokens from storage");
|
|
947
|
+
return {
|
|
948
|
+
accessToken: storedTokens.accessToken,
|
|
949
|
+
tokenType: storedTokens.tokenType,
|
|
950
|
+
expiresAt: storedTokens.expiresAt,
|
|
951
|
+
refreshed: false,
|
|
952
|
+
fromEnv: false
|
|
953
|
+
};
|
|
954
|
+
}
|
|
955
|
+
if (storedTokens.refreshToken) {
|
|
956
|
+
debug("Token expired, attempting refresh");
|
|
957
|
+
try {
|
|
958
|
+
const refreshedTokens = await this.refreshStoredToken(storedTokens);
|
|
959
|
+
return {
|
|
960
|
+
accessToken: refreshedTokens.accessToken,
|
|
961
|
+
tokenType: refreshedTokens.tokenType,
|
|
962
|
+
expiresAt: refreshedTokens.expiresAt,
|
|
963
|
+
refreshed: true,
|
|
964
|
+
fromEnv: false
|
|
965
|
+
};
|
|
966
|
+
} catch (error) {
|
|
967
|
+
debug("Token refresh failed:", error);
|
|
968
|
+
return null;
|
|
969
|
+
}
|
|
970
|
+
}
|
|
971
|
+
}
|
|
972
|
+
debug("No valid token available");
|
|
973
|
+
return null;
|
|
974
|
+
}
|
|
975
|
+
/**
|
|
976
|
+
* Force a new authentication flow
|
|
977
|
+
*/
|
|
978
|
+
async authenticate() {
|
|
979
|
+
const { protectedResource, authServer } = await this.discoverServers();
|
|
980
|
+
const client = await this.getOrRegisterClient(authServer);
|
|
981
|
+
const { tokens, requestedScopes } = await this.performOAuthFlow(
|
|
982
|
+
authServer,
|
|
983
|
+
client,
|
|
984
|
+
protectedResource
|
|
985
|
+
);
|
|
986
|
+
return {
|
|
987
|
+
accessToken: tokens.accessToken,
|
|
988
|
+
tokenType: tokens.tokenType,
|
|
989
|
+
expiresAt: tokens.expiresAt,
|
|
990
|
+
refreshed: false,
|
|
991
|
+
fromEnv: false,
|
|
992
|
+
requestedScopes
|
|
993
|
+
};
|
|
994
|
+
}
|
|
995
|
+
/**
|
|
996
|
+
* Check if stored credentials exist (may be expired)
|
|
997
|
+
*/
|
|
998
|
+
async hasStoredCredentials() {
|
|
999
|
+
const tokens = await this.storage.loadTokens();
|
|
1000
|
+
return tokens?.accessToken !== void 0;
|
|
1001
|
+
}
|
|
1002
|
+
/**
|
|
1003
|
+
* Clear stored credentials
|
|
1004
|
+
*/
|
|
1005
|
+
async clearCredentials() {
|
|
1006
|
+
await this.storage.deleteTokens();
|
|
1007
|
+
debug("Cleared stored credentials");
|
|
1008
|
+
}
|
|
1009
|
+
/**
|
|
1010
|
+
* Discover protected resource and authorization server
|
|
1011
|
+
*/
|
|
1012
|
+
async discoverServers() {
|
|
1013
|
+
const cachedMetadata = await this.storage.loadServerMetadata();
|
|
1014
|
+
if (cachedMetadata) {
|
|
1015
|
+
const age = Date.now() - cachedMetadata.discoveredAt;
|
|
1016
|
+
if (age < DEFAULT_METADATA_TTL_MS) {
|
|
1017
|
+
debug("Using cached server metadata (age: %dms)", age);
|
|
1018
|
+
debug(
|
|
1019
|
+
"Cached protected resource scopes: %O",
|
|
1020
|
+
cachedMetadata.protectedResource.scopes_supported
|
|
1021
|
+
);
|
|
1022
|
+
debug(
|
|
1023
|
+
"Cached auth server scopes: %O",
|
|
1024
|
+
cachedMetadata.authServer.server.scopes_supported
|
|
1025
|
+
);
|
|
1026
|
+
return {
|
|
1027
|
+
protectedResource: cachedMetadata.protectedResource,
|
|
1028
|
+
authServer: cachedMetadata.authServer
|
|
1029
|
+
};
|
|
1030
|
+
}
|
|
1031
|
+
debug("Cached server metadata is stale (age: %dms), re-discovering", age);
|
|
1032
|
+
}
|
|
1033
|
+
debug("Discovering protected resource:", this.config.mcpServerUrl);
|
|
1034
|
+
const prResult = await discoverProtectedResource(this.config.mcpServerUrl);
|
|
1035
|
+
debug("Found protected resource:", prResult.metadata.resource);
|
|
1036
|
+
debug(
|
|
1037
|
+
"Protected resource scopes_supported: %O",
|
|
1038
|
+
prResult.metadata.scopes_supported
|
|
1039
|
+
);
|
|
1040
|
+
const authServerUrl = prResult.metadata.authorization_servers?.[0];
|
|
1041
|
+
if (!authServerUrl) {
|
|
1042
|
+
throw new Error(
|
|
1043
|
+
"No authorization servers found in protected resource metadata"
|
|
1044
|
+
);
|
|
1045
|
+
}
|
|
1046
|
+
debug("Discovering authorization server:", authServerUrl);
|
|
1047
|
+
const authServer = await discoverAuthorizationServer(authServerUrl);
|
|
1048
|
+
debug("Found authorization server:", authServer.issuer);
|
|
1049
|
+
debug(
|
|
1050
|
+
"Auth server scopes_supported: %O",
|
|
1051
|
+
authServer.server.scopes_supported
|
|
1052
|
+
);
|
|
1053
|
+
const metadata = {
|
|
1054
|
+
authServer,
|
|
1055
|
+
protectedResource: prResult.metadata,
|
|
1056
|
+
discoveredAt: Date.now()
|
|
1057
|
+
};
|
|
1058
|
+
await this.storage.saveServerMetadata(metadata);
|
|
1059
|
+
return {
|
|
1060
|
+
protectedResource: prResult.metadata,
|
|
1061
|
+
authServer
|
|
1062
|
+
};
|
|
1063
|
+
}
|
|
1064
|
+
/**
|
|
1065
|
+
* Get existing client or register new one via DCR
|
|
1066
|
+
*/
|
|
1067
|
+
async getOrRegisterClient(authServer) {
|
|
1068
|
+
if (this.config.clientId) {
|
|
1069
|
+
debug("Using pre-configured client ID");
|
|
1070
|
+
return {
|
|
1071
|
+
clientId: this.config.clientId,
|
|
1072
|
+
clientSecret: this.config.clientSecret
|
|
1073
|
+
};
|
|
1074
|
+
}
|
|
1075
|
+
const cachedClient = await this.storage.loadClient();
|
|
1076
|
+
if (cachedClient?.clientId) {
|
|
1077
|
+
debug("Using cached client registration");
|
|
1078
|
+
return cachedClient;
|
|
1079
|
+
}
|
|
1080
|
+
debug("Registering new client via DCR");
|
|
1081
|
+
const client = await this.registerClient(authServer);
|
|
1082
|
+
await this.storage.saveClient(client);
|
|
1083
|
+
return client;
|
|
1084
|
+
}
|
|
1085
|
+
/**
|
|
1086
|
+
* Register a new client via Dynamic Client Registration
|
|
1087
|
+
*/
|
|
1088
|
+
async registerClient(authServer) {
|
|
1089
|
+
const registrationEndpoint = authServer.server.registration_endpoint;
|
|
1090
|
+
if (!registrationEndpoint) {
|
|
1091
|
+
throw new Error(
|
|
1092
|
+
"Authorization server does not support Dynamic Client Registration. Please provide a clientId in the configuration."
|
|
1093
|
+
);
|
|
1094
|
+
}
|
|
1095
|
+
const redirectUri = "http://127.0.0.1:0/callback";
|
|
1096
|
+
const response = await fetch(registrationEndpoint, {
|
|
1097
|
+
method: "POST",
|
|
1098
|
+
headers: {
|
|
1099
|
+
"Content-Type": "application/json",
|
|
1100
|
+
"MCP-Protocol-Version": MCP_PROTOCOL_VERSION
|
|
1101
|
+
},
|
|
1102
|
+
body: JSON.stringify({
|
|
1103
|
+
redirect_uris: [redirectUri],
|
|
1104
|
+
token_endpoint_auth_method: "none",
|
|
1105
|
+
grant_types: ["authorization_code", "refresh_token"],
|
|
1106
|
+
response_types: ["code"],
|
|
1107
|
+
client_name: this.config.clientName ?? DEFAULT_CLIENT_NAME
|
|
1108
|
+
})
|
|
1109
|
+
});
|
|
1110
|
+
if (!response.ok) {
|
|
1111
|
+
const errorText = await response.text();
|
|
1112
|
+
throw new Error(
|
|
1113
|
+
`Dynamic Client Registration failed: ${response.status} ${response.statusText}
|
|
1114
|
+
${errorText}`
|
|
1115
|
+
);
|
|
1116
|
+
}
|
|
1117
|
+
const data = await response.json();
|
|
1118
|
+
debug("Client registered:", data.client_id);
|
|
1119
|
+
return {
|
|
1120
|
+
clientId: data.client_id,
|
|
1121
|
+
clientSecret: data.client_secret,
|
|
1122
|
+
clientIdIssuedAt: data.client_id_issued_at,
|
|
1123
|
+
clientSecretExpiresAt: data.client_secret_expires_at
|
|
1124
|
+
};
|
|
1125
|
+
}
|
|
1126
|
+
/**
|
|
1127
|
+
* Perform the full OAuth authorization flow
|
|
1128
|
+
*/
|
|
1129
|
+
async performOAuthFlow(authServer, client, protectedResource) {
|
|
1130
|
+
const pkce = await generatePKCE();
|
|
1131
|
+
const state = generateState();
|
|
1132
|
+
const { port, codePromise, close } = await this.startCallbackServer(state);
|
|
1133
|
+
const redirectUri = `http://127.0.0.1:${port}/callback`;
|
|
1134
|
+
try {
|
|
1135
|
+
const requestedScopes = this.config.scopes ?? protectedResource.scopes_supported ?? authServer.server.scopes_supported ?? ["openid"];
|
|
1136
|
+
debug("Scope resolution:");
|
|
1137
|
+
debug(" - User config scopes: %O", this.config.scopes);
|
|
1138
|
+
debug(
|
|
1139
|
+
" - Protected resource scopes_supported: %O",
|
|
1140
|
+
protectedResource.scopes_supported
|
|
1141
|
+
);
|
|
1142
|
+
debug(
|
|
1143
|
+
" - Auth server scopes_supported: %O",
|
|
1144
|
+
authServer.server.scopes_supported
|
|
1145
|
+
);
|
|
1146
|
+
debug(" - Final requested scopes: %O", requestedScopes);
|
|
1147
|
+
const authUrl = buildAuthorizationUrl({
|
|
1148
|
+
authServer,
|
|
1149
|
+
clientId: client.clientId,
|
|
1150
|
+
redirectUri,
|
|
1151
|
+
scopes: requestedScopes,
|
|
1152
|
+
codeChallenge: pkce.codeChallenge,
|
|
1153
|
+
state,
|
|
1154
|
+
resource: protectedResource.resource
|
|
1155
|
+
});
|
|
1156
|
+
debug("Authorization URL: %s", authUrl.toString());
|
|
1157
|
+
debug("Authorization URL params:");
|
|
1158
|
+
debug(" - client_id: %s", authUrl.searchParams.get("client_id"));
|
|
1159
|
+
debug(" - redirect_uri: %s", authUrl.searchParams.get("redirect_uri"));
|
|
1160
|
+
debug(" - scope: %s", authUrl.searchParams.get("scope"));
|
|
1161
|
+
debug(" - resource: %s", authUrl.searchParams.get("resource"));
|
|
1162
|
+
await this.openBrowserOrPrintUrl(authUrl);
|
|
1163
|
+
debug("Waiting for OAuth callback...");
|
|
1164
|
+
const code = await codePromise;
|
|
1165
|
+
debug("Received authorization code");
|
|
1166
|
+
const tokenResult = await exchangeCodeForTokens({
|
|
1167
|
+
authServer,
|
|
1168
|
+
clientId: client.clientId,
|
|
1169
|
+
clientSecret: client.clientSecret,
|
|
1170
|
+
code,
|
|
1171
|
+
state,
|
|
1172
|
+
codeVerifier: pkce.codeVerifier,
|
|
1173
|
+
redirectUri
|
|
1174
|
+
});
|
|
1175
|
+
const tokens = this.tokenResultToStoredTokens(
|
|
1176
|
+
tokenResult,
|
|
1177
|
+
client.clientId
|
|
1178
|
+
);
|
|
1179
|
+
await this.storage.saveTokens(tokens);
|
|
1180
|
+
return { tokens, requestedScopes };
|
|
1181
|
+
} finally {
|
|
1182
|
+
close();
|
|
1183
|
+
}
|
|
1184
|
+
}
|
|
1185
|
+
/**
|
|
1186
|
+
* Refresh an expired token
|
|
1187
|
+
*
|
|
1188
|
+
* Uses the clientId stored with the tokens (if available) to ensure
|
|
1189
|
+
* the refresh request uses the same client that obtained the original tokens.
|
|
1190
|
+
* This is important because refresh tokens are bound to the client_id.
|
|
1191
|
+
*/
|
|
1192
|
+
async refreshStoredToken(storedTokens) {
|
|
1193
|
+
if (!storedTokens.refreshToken) {
|
|
1194
|
+
throw new Error("No refresh token available");
|
|
1195
|
+
}
|
|
1196
|
+
const metadata = await this.storage.loadServerMetadata();
|
|
1197
|
+
if (!metadata) {
|
|
1198
|
+
throw new Error("No cached server metadata for refresh");
|
|
1199
|
+
}
|
|
1200
|
+
let clientId;
|
|
1201
|
+
let clientSecret;
|
|
1202
|
+
if (storedTokens.clientId) {
|
|
1203
|
+
debug("Using clientId from stored tokens for refresh");
|
|
1204
|
+
clientId = storedTokens.clientId;
|
|
1205
|
+
const storedClient = await this.storage.loadClient();
|
|
1206
|
+
if (storedClient?.clientId === clientId) {
|
|
1207
|
+
clientSecret = storedClient.clientSecret;
|
|
1208
|
+
}
|
|
1209
|
+
} else {
|
|
1210
|
+
debug(
|
|
1211
|
+
"No clientId in stored tokens, falling back to stored client (legacy behavior)"
|
|
1212
|
+
);
|
|
1213
|
+
const client = await this.getOrRegisterClient(metadata.authServer);
|
|
1214
|
+
clientId = client.clientId;
|
|
1215
|
+
clientSecret = client.clientSecret;
|
|
1216
|
+
}
|
|
1217
|
+
const tokenResult = await refreshAccessToken({
|
|
1218
|
+
authServer: metadata.authServer,
|
|
1219
|
+
clientId,
|
|
1220
|
+
clientSecret,
|
|
1221
|
+
refreshToken: storedTokens.refreshToken
|
|
1222
|
+
});
|
|
1223
|
+
const tokens = this.tokenResultToStoredTokens(tokenResult, clientId);
|
|
1224
|
+
await this.storage.saveTokens(tokens);
|
|
1225
|
+
return tokens;
|
|
1226
|
+
}
|
|
1227
|
+
/**
|
|
1228
|
+
* Start local callback server
|
|
1229
|
+
*/
|
|
1230
|
+
async startCallbackServer(expectedState) {
|
|
1231
|
+
const timeoutMs = this.config.timeoutMs ?? DEFAULT_TIMEOUT_MS2;
|
|
1232
|
+
return new Promise((resolve, reject) => {
|
|
1233
|
+
const server = http.createServer();
|
|
1234
|
+
const connections = /* @__PURE__ */ new Set();
|
|
1235
|
+
server.on("connection", (socket) => {
|
|
1236
|
+
connections.add(socket);
|
|
1237
|
+
socket.on("close", () => connections.delete(socket));
|
|
1238
|
+
});
|
|
1239
|
+
const forceClose = () => {
|
|
1240
|
+
for (const socket of connections) {
|
|
1241
|
+
socket.destroy();
|
|
1242
|
+
}
|
|
1243
|
+
server.close();
|
|
1244
|
+
};
|
|
1245
|
+
let codeResolve;
|
|
1246
|
+
let codeReject;
|
|
1247
|
+
const codePromise = new Promise((res, rej) => {
|
|
1248
|
+
codeResolve = res;
|
|
1249
|
+
codeReject = rej;
|
|
1250
|
+
});
|
|
1251
|
+
const timeout = setTimeout(() => {
|
|
1252
|
+
forceClose();
|
|
1253
|
+
codeReject(new Error(`OAuth flow timed out after ${timeoutMs}ms`));
|
|
1254
|
+
}, timeoutMs);
|
|
1255
|
+
server.on("request", (req, res) => {
|
|
1256
|
+
const url = new URL(
|
|
1257
|
+
req.url ?? "/",
|
|
1258
|
+
`http://127.0.0.1:${server.address().port}`
|
|
1259
|
+
);
|
|
1260
|
+
if (url.pathname !== "/callback") {
|
|
1261
|
+
res.writeHead(404);
|
|
1262
|
+
res.end("Not Found");
|
|
1263
|
+
return;
|
|
1264
|
+
}
|
|
1265
|
+
const error = url.searchParams.get("error");
|
|
1266
|
+
if (error) {
|
|
1267
|
+
const errorDescription = url.searchParams.get("error_description");
|
|
1268
|
+
clearTimeout(timeout);
|
|
1269
|
+
res.writeHead(400, { "Content-Type": "text/html" });
|
|
1270
|
+
res.end(this.errorHtml(error, errorDescription ?? void 0));
|
|
1271
|
+
codeReject(
|
|
1272
|
+
new Error(
|
|
1273
|
+
`OAuth error: ${error}${errorDescription ? ` - ${errorDescription}` : ""}`
|
|
1274
|
+
)
|
|
1275
|
+
);
|
|
1276
|
+
return;
|
|
1277
|
+
}
|
|
1278
|
+
const state = url.searchParams.get("state");
|
|
1279
|
+
if (state !== expectedState) {
|
|
1280
|
+
clearTimeout(timeout);
|
|
1281
|
+
res.writeHead(400, { "Content-Type": "text/html" });
|
|
1282
|
+
res.end(this.errorHtml("invalid_state", "State parameter mismatch"));
|
|
1283
|
+
codeReject(new Error("OAuth state mismatch - possible CSRF attack"));
|
|
1284
|
+
return;
|
|
1285
|
+
}
|
|
1286
|
+
const code = url.searchParams.get("code");
|
|
1287
|
+
if (!code) {
|
|
1288
|
+
clearTimeout(timeout);
|
|
1289
|
+
res.writeHead(400, { "Content-Type": "text/html" });
|
|
1290
|
+
res.end(
|
|
1291
|
+
this.errorHtml("missing_code", "No authorization code received")
|
|
1292
|
+
);
|
|
1293
|
+
codeReject(new Error("No authorization code in callback"));
|
|
1294
|
+
return;
|
|
1295
|
+
}
|
|
1296
|
+
clearTimeout(timeout);
|
|
1297
|
+
res.writeHead(200, { "Content-Type": "text/html" });
|
|
1298
|
+
res.end(this.successHtml());
|
|
1299
|
+
codeResolve(code);
|
|
1300
|
+
});
|
|
1301
|
+
const preferredPort = this.config.callbackPort ?? 0;
|
|
1302
|
+
server.listen(preferredPort, "127.0.0.1", () => {
|
|
1303
|
+
const address = server.address();
|
|
1304
|
+
debug("Callback server listening on port", address.port);
|
|
1305
|
+
resolve({ port: address.port, codePromise, close: forceClose });
|
|
1306
|
+
});
|
|
1307
|
+
server.on("error", (err) => {
|
|
1308
|
+
reject(err);
|
|
1309
|
+
});
|
|
1310
|
+
});
|
|
1311
|
+
}
|
|
1312
|
+
/**
|
|
1313
|
+
* Open browser or print URL for headless environments
|
|
1314
|
+
*/
|
|
1315
|
+
async openBrowserOrPrintUrl(url) {
|
|
1316
|
+
if (isHeadless()) {
|
|
1317
|
+
console.log("\n" + "=".repeat(60));
|
|
1318
|
+
console.log(
|
|
1319
|
+
"Please open the following URL in your browser to authenticate:"
|
|
1320
|
+
);
|
|
1321
|
+
console.log("\n" + url.toString() + "\n");
|
|
1322
|
+
console.log("=".repeat(60) + "\n");
|
|
1323
|
+
return;
|
|
1324
|
+
}
|
|
1325
|
+
try {
|
|
1326
|
+
const open = await import('open');
|
|
1327
|
+
await open.default(url.toString());
|
|
1328
|
+
debug("Opened browser for authentication");
|
|
1329
|
+
} catch (error) {
|
|
1330
|
+
debug("Failed to open browser:", error);
|
|
1331
|
+
console.log("\nFailed to open browser automatically.");
|
|
1332
|
+
console.log("Please open the following URL manually:\n");
|
|
1333
|
+
console.log(url.toString() + "\n");
|
|
1334
|
+
}
|
|
1335
|
+
}
|
|
1336
|
+
/**
|
|
1337
|
+
* Convert TokenResult to StoredTokens
|
|
1338
|
+
*
|
|
1339
|
+
* @param result - Token result from exchange or refresh
|
|
1340
|
+
* @param clientId - Client ID that was used to obtain these tokens
|
|
1341
|
+
*/
|
|
1342
|
+
tokenResultToStoredTokens(result, clientId) {
|
|
1343
|
+
return {
|
|
1344
|
+
accessToken: result.accessToken,
|
|
1345
|
+
tokenType: result.tokenType,
|
|
1346
|
+
refreshToken: result.refreshToken,
|
|
1347
|
+
expiresAt: result.expiresIn ? Date.now() + result.expiresIn * 1e3 : void 0,
|
|
1348
|
+
clientId
|
|
1349
|
+
};
|
|
1350
|
+
}
|
|
1351
|
+
/**
|
|
1352
|
+
* HTML page for successful authentication
|
|
1353
|
+
*/
|
|
1354
|
+
successHtml() {
|
|
1355
|
+
return `
|
|
1356
|
+
<!DOCTYPE html>
|
|
1357
|
+
<html>
|
|
1358
|
+
<head>
|
|
1359
|
+
<meta charset="UTF-8">
|
|
1360
|
+
<title>Authentication Successful</title>
|
|
1361
|
+
<style>
|
|
1362
|
+
body { font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, sans-serif;
|
|
1363
|
+
display: flex; justify-content: center; align-items: center; height: 100vh; margin: 0;
|
|
1364
|
+
background: #f8fafc; }
|
|
1365
|
+
.container { text-align: center; background: white; padding: 48px 64px; border-radius: 8px;
|
|
1366
|
+
border: 1px solid #e2e8f0; box-shadow: 0 1px 3px rgba(0,0,0,0.1); }
|
|
1367
|
+
.icon { width: 48px; height: 48px; margin: 0 auto 24px; background: #dcfce7; border-radius: 50%;
|
|
1368
|
+
display: flex; align-items: center; justify-content: center; }
|
|
1369
|
+
.icon svg { width: 24px; height: 24px; color: #16a34a; }
|
|
1370
|
+
h1 { color: #0f172a; margin: 0 0 8px 0; font-size: 20px; font-weight: 600; }
|
|
1371
|
+
p { color: #64748b; margin: 0; font-size: 14px; }
|
|
1372
|
+
</style>
|
|
1373
|
+
</head>
|
|
1374
|
+
<body>
|
|
1375
|
+
<div class="container">
|
|
1376
|
+
<div class="icon">
|
|
1377
|
+
<svg fill="none" stroke="currentColor" viewBox="0 0 24 24">
|
|
1378
|
+
<path stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="M5 13l4 4L19 7"/>
|
|
1379
|
+
</svg>
|
|
1380
|
+
</div>
|
|
1381
|
+
<h1>Authentication Successful</h1>
|
|
1382
|
+
<p>You can close this window and return to the terminal.</p>
|
|
1383
|
+
</div>
|
|
1384
|
+
</body>
|
|
1385
|
+
</html>`;
|
|
1386
|
+
}
|
|
1387
|
+
/**
|
|
1388
|
+
* HTML page for authentication error
|
|
1389
|
+
*/
|
|
1390
|
+
errorHtml(error, description) {
|
|
1391
|
+
return `
|
|
1392
|
+
<!DOCTYPE html>
|
|
1393
|
+
<html>
|
|
1394
|
+
<head>
|
|
1395
|
+
<meta charset="UTF-8">
|
|
1396
|
+
<title>Authentication Failed</title>
|
|
1397
|
+
<style>
|
|
1398
|
+
body { font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, sans-serif;
|
|
1399
|
+
display: flex; justify-content: center; align-items: center; height: 100vh; margin: 0;
|
|
1400
|
+
background: #f8fafc; }
|
|
1401
|
+
.container { text-align: center; background: white; padding: 48px 64px; border-radius: 8px;
|
|
1402
|
+
border: 1px solid #e2e8f0; box-shadow: 0 1px 3px rgba(0,0,0,0.1); }
|
|
1403
|
+
.icon { width: 48px; height: 48px; margin: 0 auto 24px; background: #fee2e2; border-radius: 50%;
|
|
1404
|
+
display: flex; align-items: center; justify-content: center; }
|
|
1405
|
+
.icon svg { width: 24px; height: 24px; color: #dc2626; }
|
|
1406
|
+
h1 { color: #0f172a; margin: 0 0 8px 0; font-size: 20px; font-weight: 600; }
|
|
1407
|
+
p { color: #64748b; margin: 0 0 8px 0; font-size: 14px; }
|
|
1408
|
+
code { background: #f1f5f9; padding: 2px 8px; border-radius: 4px; color: #dc2626; font-size: 13px; }
|
|
1409
|
+
</style>
|
|
1410
|
+
</head>
|
|
1411
|
+
<body>
|
|
1412
|
+
<div class="container">
|
|
1413
|
+
<div class="icon">
|
|
1414
|
+
<svg fill="none" stroke="currentColor" viewBox="0 0 24 24">
|
|
1415
|
+
<path stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="M6 18L18 6M6 6l12 12"/>
|
|
1416
|
+
</svg>
|
|
1417
|
+
</div>
|
|
1418
|
+
<h1>Authentication Failed</h1>
|
|
1419
|
+
<p>Error: <code>${escapeHtml(error)}</code></p>
|
|
1420
|
+
${description ? `<p>${escapeHtml(description)}</p>` : ""}
|
|
1421
|
+
</div>
|
|
1422
|
+
</body>
|
|
1423
|
+
</html>`;
|
|
1424
|
+
}
|
|
1425
|
+
};
|
|
1426
|
+
function isHeadless() {
|
|
1427
|
+
if (process.env.CI) {
|
|
1428
|
+
return true;
|
|
1429
|
+
}
|
|
1430
|
+
if (!process.stdin.isTTY) {
|
|
1431
|
+
return true;
|
|
1432
|
+
}
|
|
1433
|
+
if (process.platform === "linux" && !process.env.DISPLAY && !process.env.WAYLAND_DISPLAY) {
|
|
1434
|
+
return true;
|
|
1435
|
+
}
|
|
1436
|
+
return false;
|
|
1437
|
+
}
|
|
1438
|
+
function escapeHtml(text) {
|
|
1439
|
+
return text.replace(/&/g, "&").replace(/</g, "<").replace(/>/g, ">").replace(/"/g, """).replace(/'/g, "'");
|
|
1440
|
+
}
|
|
1441
|
+
async function createMCPClientForConfig(config, options) {
|
|
1442
|
+
const validatedConfig = validateMCPConfig(config);
|
|
1443
|
+
const client = new Client(
|
|
1444
|
+
{
|
|
1445
|
+
name: options?.clientInfo?.name ?? "@gleanwork/mcp-server-tester",
|
|
1446
|
+
version: options?.clientInfo?.version ?? "0.1.0"
|
|
1447
|
+
},
|
|
1448
|
+
{
|
|
1449
|
+
capabilities: validatedConfig.capabilities ?? {}
|
|
1450
|
+
}
|
|
1451
|
+
);
|
|
1452
|
+
if (isStdioConfig(validatedConfig)) {
|
|
1453
|
+
const transport = new StdioClientTransport({
|
|
1454
|
+
command: validatedConfig.command,
|
|
1455
|
+
args: validatedConfig.args ?? [],
|
|
1456
|
+
...validatedConfig.cwd && { cwd: validatedConfig.cwd },
|
|
1457
|
+
// Suppress server stderr when quiet mode is enabled
|
|
1458
|
+
...validatedConfig.quiet && { stderr: "ignore" }
|
|
1459
|
+
});
|
|
1460
|
+
debugClient("Connecting via stdio: %O", {
|
|
1461
|
+
command: validatedConfig.command,
|
|
1462
|
+
args: validatedConfig.args,
|
|
1463
|
+
cwd: validatedConfig.cwd
|
|
1464
|
+
});
|
|
1465
|
+
await client.connect(transport);
|
|
1466
|
+
} else if (isHttpConfig(validatedConfig)) {
|
|
1467
|
+
const headers = { ...validatedConfig.headers };
|
|
1468
|
+
if (validatedConfig.auth?.accessToken && !options?.authProvider) {
|
|
1469
|
+
headers.Authorization = `Bearer ${validatedConfig.auth.accessToken}`;
|
|
1470
|
+
}
|
|
1471
|
+
const transport = new StreamableHTTPClientTransport(
|
|
1472
|
+
new URL(validatedConfig.serverUrl),
|
|
1473
|
+
{
|
|
1474
|
+
requestInit: Object.keys(headers).length > 0 ? { headers } : void 0,
|
|
1475
|
+
// Pass auth provider for OAuth flow - MCP SDK handles it automatically
|
|
1476
|
+
authProvider: options?.authProvider
|
|
1477
|
+
}
|
|
1478
|
+
);
|
|
1479
|
+
debugClient("Connecting via HTTP: %O", {
|
|
1480
|
+
serverUrl: validatedConfig.serverUrl,
|
|
1481
|
+
headers: Object.keys(headers).length > 0 ? Object.keys(headers) : void 0,
|
|
1482
|
+
hasAuthProvider: !!options?.authProvider
|
|
1483
|
+
});
|
|
1484
|
+
await client.connect(transport);
|
|
1485
|
+
}
|
|
1486
|
+
debugClient("Connected successfully");
|
|
1487
|
+
const serverInfo = client.getServerVersion();
|
|
1488
|
+
if (serverInfo) {
|
|
1489
|
+
debugClient("Server info: %O", serverInfo);
|
|
1490
|
+
}
|
|
1491
|
+
return client;
|
|
1492
|
+
}
|
|
1493
|
+
async function closeMCPClient(client) {
|
|
1494
|
+
try {
|
|
1495
|
+
await client.close();
|
|
1496
|
+
} catch (error) {
|
|
1497
|
+
console.error("[MCP] Error closing client:", error);
|
|
1498
|
+
throw error;
|
|
1499
|
+
}
|
|
1500
|
+
}
|
|
1501
|
+
|
|
1502
|
+
// src/mcp/response.ts
|
|
1503
|
+
function normalizeToolResponse(result) {
|
|
1504
|
+
const isError = result.isError ?? false;
|
|
1505
|
+
const contentBlocks = [];
|
|
1506
|
+
const textParts = [];
|
|
1507
|
+
if (Array.isArray(result.content)) {
|
|
1508
|
+
for (const block of result.content) {
|
|
1509
|
+
if (block == null || typeof block !== "object") {
|
|
1510
|
+
continue;
|
|
1511
|
+
}
|
|
1512
|
+
const b = block;
|
|
1513
|
+
const contentBlock = {
|
|
1514
|
+
type: typeof b.type === "string" ? b.type : "unknown"
|
|
1515
|
+
};
|
|
1516
|
+
if (typeof b.text === "string") {
|
|
1517
|
+
contentBlock.text = b.text;
|
|
1518
|
+
textParts.push(b.text);
|
|
1519
|
+
}
|
|
1520
|
+
if (b.data !== void 0) {
|
|
1521
|
+
contentBlock.data = b.data;
|
|
1522
|
+
}
|
|
1523
|
+
if (typeof b.mimeType === "string") {
|
|
1524
|
+
contentBlock.mimeType = b.mimeType;
|
|
1525
|
+
}
|
|
1526
|
+
contentBlocks.push(contentBlock);
|
|
1527
|
+
}
|
|
1528
|
+
}
|
|
1529
|
+
let structuredContent = null;
|
|
1530
|
+
if (result.structuredContent !== void 0) {
|
|
1531
|
+
structuredContent = result.structuredContent;
|
|
1532
|
+
if (textParts.length === 0) {
|
|
1533
|
+
if (typeof result.structuredContent === "string") {
|
|
1534
|
+
textParts.push(result.structuredContent);
|
|
1535
|
+
} else if (result.structuredContent != null) {
|
|
1536
|
+
textParts.push(JSON.stringify(result.structuredContent));
|
|
1537
|
+
}
|
|
1538
|
+
}
|
|
1539
|
+
}
|
|
1540
|
+
const text = textParts.join("\n");
|
|
1541
|
+
return {
|
|
1542
|
+
text,
|
|
1543
|
+
raw: result,
|
|
1544
|
+
isError,
|
|
1545
|
+
contentBlocks,
|
|
1546
|
+
structuredContent
|
|
1547
|
+
};
|
|
1548
|
+
}
|
|
1549
|
+
function extractText(response) {
|
|
1550
|
+
if (response == null) {
|
|
1551
|
+
return "";
|
|
1552
|
+
}
|
|
1553
|
+
if (typeof response === "string") {
|
|
1554
|
+
return response;
|
|
1555
|
+
}
|
|
1556
|
+
if (isNormalizedResponse(response)) {
|
|
1557
|
+
return response.text;
|
|
1558
|
+
}
|
|
1559
|
+
if (isCallToolResult(response)) {
|
|
1560
|
+
return normalizeToolResponse(response).text;
|
|
1561
|
+
}
|
|
1562
|
+
if (Array.isArray(response)) {
|
|
1563
|
+
return extractTextFromContentArray(response);
|
|
1564
|
+
}
|
|
1565
|
+
if (typeof response === "object") {
|
|
1566
|
+
const r = response;
|
|
1567
|
+
if (Array.isArray(r.content)) {
|
|
1568
|
+
return extractTextFromContentArray(r.content);
|
|
1569
|
+
}
|
|
1570
|
+
if (typeof r.content === "string") {
|
|
1571
|
+
return r.content;
|
|
1572
|
+
}
|
|
1573
|
+
if (r.structuredContent !== void 0) {
|
|
1574
|
+
if (typeof r.structuredContent === "string") {
|
|
1575
|
+
return r.structuredContent;
|
|
1576
|
+
}
|
|
1577
|
+
return JSON.stringify(r.structuredContent);
|
|
1578
|
+
}
|
|
1579
|
+
if (typeof r.text === "string") {
|
|
1580
|
+
return r.text;
|
|
1581
|
+
}
|
|
1582
|
+
return JSON.stringify(r);
|
|
1583
|
+
}
|
|
1584
|
+
if (typeof response === "number" || typeof response === "boolean" || typeof response === "bigint") {
|
|
1585
|
+
return String(response);
|
|
1586
|
+
}
|
|
1587
|
+
return "";
|
|
1588
|
+
}
|
|
1589
|
+
function isNormalizedResponse(value) {
|
|
1590
|
+
if (value == null || typeof value !== "object") {
|
|
1591
|
+
return false;
|
|
1592
|
+
}
|
|
1593
|
+
const v = value;
|
|
1594
|
+
return typeof v.text === "string" && typeof v.isError === "boolean" && Array.isArray(v.contentBlocks) && v.raw !== void 0;
|
|
1595
|
+
}
|
|
1596
|
+
function isCallToolResult(value) {
|
|
1597
|
+
if (value == null || typeof value !== "object") {
|
|
1598
|
+
return false;
|
|
1599
|
+
}
|
|
1600
|
+
const v = value;
|
|
1601
|
+
return Array.isArray(v.content) || typeof v.isError === "boolean";
|
|
1602
|
+
}
|
|
1603
|
+
function extractTextFromContentArray(content) {
|
|
1604
|
+
const textParts = [];
|
|
1605
|
+
for (const block of content) {
|
|
1606
|
+
if (block == null || typeof block !== "object") {
|
|
1607
|
+
continue;
|
|
1608
|
+
}
|
|
1609
|
+
const b = block;
|
|
1610
|
+
if (b.type === "text" && typeof b.text === "string") {
|
|
1611
|
+
textParts.push(b.text);
|
|
1612
|
+
}
|
|
1613
|
+
}
|
|
1614
|
+
if (textParts.length > 0) {
|
|
1615
|
+
return textParts.join("\n");
|
|
1616
|
+
}
|
|
1617
|
+
return JSON.stringify(content);
|
|
1618
|
+
}
|
|
1619
|
+
|
|
1620
|
+
// src/assertions/validators/utils.ts
|
|
1621
|
+
var extractText2 = extractText;
|
|
1622
|
+
function getResponseSizeBytes(response) {
|
|
1623
|
+
if (response === null || response === void 0) {
|
|
1624
|
+
return 0;
|
|
1625
|
+
}
|
|
1626
|
+
if (typeof response === "string") {
|
|
1627
|
+
return Buffer.byteLength(response, "utf8");
|
|
1628
|
+
}
|
|
1629
|
+
const serialized = JSON.stringify(response, null, 2);
|
|
1630
|
+
return Buffer.byteLength(serialized, "utf8");
|
|
1631
|
+
}
|
|
1632
|
+
function stringifyResponse(response) {
|
|
1633
|
+
if (response === null || response === void 0) {
|
|
1634
|
+
return "";
|
|
1635
|
+
}
|
|
1636
|
+
if (typeof response === "string") {
|
|
1637
|
+
return response;
|
|
1638
|
+
}
|
|
1639
|
+
return JSON.stringify(response, null, 2);
|
|
1640
|
+
}
|
|
1641
|
+
function isErrorResponse(response) {
|
|
1642
|
+
if (response === null || response === void 0) {
|
|
1643
|
+
return false;
|
|
1644
|
+
}
|
|
1645
|
+
if (typeof response !== "object") {
|
|
1646
|
+
return false;
|
|
1647
|
+
}
|
|
1648
|
+
const r = response;
|
|
1649
|
+
if (r.isError === true) {
|
|
1650
|
+
return true;
|
|
1651
|
+
}
|
|
1652
|
+
if ("raw" in r && typeof r.raw === "object" && r.raw !== null) {
|
|
1653
|
+
const raw = r.raw;
|
|
1654
|
+
return raw.isError === true;
|
|
1655
|
+
}
|
|
1656
|
+
return false;
|
|
1657
|
+
}
|
|
1658
|
+
function extractErrorMessage(response) {
|
|
1659
|
+
if (!isErrorResponse(response)) {
|
|
1660
|
+
return "";
|
|
1661
|
+
}
|
|
1662
|
+
return extractText2(response);
|
|
1663
|
+
}
|
|
1664
|
+
function normalizeWhitespace(text) {
|
|
1665
|
+
return text.replace(/\s+/g, " ").trim();
|
|
1666
|
+
}
|
|
1667
|
+
|
|
1668
|
+
// src/assertions/validators/response.ts
|
|
1669
|
+
function validateResponse(actual, expected) {
|
|
1670
|
+
const actualStr = stringifyResponse(actual);
|
|
1671
|
+
const expectedStr = stringifyResponse(expected);
|
|
1672
|
+
if (actualStr === expectedStr) {
|
|
1673
|
+
return {
|
|
1674
|
+
pass: true,
|
|
1675
|
+
message: "Response matches expected value"
|
|
1676
|
+
};
|
|
1677
|
+
}
|
|
1678
|
+
return {
|
|
1679
|
+
pass: false,
|
|
1680
|
+
message: `Response does not match expected value`,
|
|
1681
|
+
details: {
|
|
1682
|
+
actual: truncateForDisplay(actualStr),
|
|
1683
|
+
expected: truncateForDisplay(expectedStr)
|
|
1684
|
+
}
|
|
1685
|
+
};
|
|
1686
|
+
}
|
|
1687
|
+
function truncateForDisplay(str, maxLength = 500) {
|
|
1688
|
+
if (str.length <= maxLength) {
|
|
1689
|
+
return str;
|
|
1690
|
+
}
|
|
1691
|
+
return str.slice(0, maxLength) + "... (truncated)";
|
|
1692
|
+
}
|
|
1693
|
+
|
|
1694
|
+
// src/assertions/validators/schema.ts
|
|
1695
|
+
function validateSchema(response, schema, options = {}) {
|
|
1696
|
+
const valueToValidate = getValidatableValue(response);
|
|
1697
|
+
if (options.strict && valueToValidate !== null) ;
|
|
1698
|
+
try {
|
|
1699
|
+
schema.parse(valueToValidate);
|
|
1700
|
+
return {
|
|
1701
|
+
pass: true,
|
|
1702
|
+
message: "Response matches schema"
|
|
1703
|
+
};
|
|
1704
|
+
} catch (error) {
|
|
1705
|
+
const zodError = error;
|
|
1706
|
+
const issues = formatZodIssues(zodError);
|
|
1707
|
+
return {
|
|
1708
|
+
pass: false,
|
|
1709
|
+
message: `Response does not match schema: ${issues}`,
|
|
1710
|
+
details: {
|
|
1711
|
+
issues: zodError.issues
|
|
1712
|
+
}
|
|
1713
|
+
};
|
|
1714
|
+
}
|
|
1715
|
+
}
|
|
1716
|
+
function getValidatableValue(response) {
|
|
1717
|
+
if (response === null || response === void 0) {
|
|
1718
|
+
return null;
|
|
1719
|
+
}
|
|
1720
|
+
if (typeof response === "object" && !Array.isArray(response)) {
|
|
1721
|
+
const r = response;
|
|
1722
|
+
if ("structuredContent" in r && r.structuredContent !== void 0) {
|
|
1723
|
+
return r.structuredContent;
|
|
1724
|
+
}
|
|
1725
|
+
if ("raw" in r && "text" in r && "isError" in r && "contentBlocks" in r) {
|
|
1726
|
+
if (r.structuredContent !== void 0) {
|
|
1727
|
+
return r.structuredContent;
|
|
1728
|
+
}
|
|
1729
|
+
const text = r.text;
|
|
1730
|
+
return tryParseJson(text) ?? response;
|
|
1731
|
+
}
|
|
1732
|
+
if ("content" in r && Array.isArray(r.content)) {
|
|
1733
|
+
const text = extractText2(response);
|
|
1734
|
+
return tryParseJson(text) ?? response;
|
|
1735
|
+
}
|
|
1736
|
+
return response;
|
|
1737
|
+
}
|
|
1738
|
+
if (typeof response === "string") {
|
|
1739
|
+
return tryParseJson(response) ?? response;
|
|
1740
|
+
}
|
|
1741
|
+
return response;
|
|
1742
|
+
}
|
|
1743
|
+
function tryParseJson(text) {
|
|
1744
|
+
if (!text || typeof text !== "string") {
|
|
1745
|
+
return null;
|
|
1746
|
+
}
|
|
1747
|
+
const trimmed = text.trim();
|
|
1748
|
+
if (!(trimmed.startsWith("{") || trimmed.startsWith("[")) || !(trimmed.endsWith("}") || trimmed.endsWith("]"))) {
|
|
1749
|
+
return null;
|
|
1750
|
+
}
|
|
1751
|
+
try {
|
|
1752
|
+
return JSON.parse(trimmed);
|
|
1753
|
+
} catch {
|
|
1754
|
+
return null;
|
|
1755
|
+
}
|
|
1756
|
+
}
|
|
1757
|
+
function formatZodIssues(error) {
|
|
1758
|
+
const issues = error.issues.map((issue) => {
|
|
1759
|
+
const path3 = issue.path.length > 0 ? issue.path.join(".") : "root";
|
|
1760
|
+
return `${path3}: ${issue.message}`;
|
|
1761
|
+
});
|
|
1762
|
+
return issues.join("; ");
|
|
1763
|
+
}
|
|
1764
|
+
|
|
1765
|
+
// src/assertions/validators/text.ts
|
|
1766
|
+
function validateText(response, expected, options = {}) {
|
|
1767
|
+
const { caseSensitive = true } = options;
|
|
1768
|
+
const expectedStrings = Array.isArray(expected) ? expected : [expected];
|
|
1769
|
+
const text = extractText2(response);
|
|
1770
|
+
const compareText = caseSensitive ? text : text.toLowerCase();
|
|
1771
|
+
const missing = [];
|
|
1772
|
+
for (const substring of expectedStrings) {
|
|
1773
|
+
const compareSubstring = caseSensitive ? substring : substring.toLowerCase();
|
|
1774
|
+
if (!compareText.includes(compareSubstring)) {
|
|
1775
|
+
missing.push(substring);
|
|
1776
|
+
}
|
|
1777
|
+
}
|
|
1778
|
+
if (missing.length === 0) {
|
|
1779
|
+
return {
|
|
1780
|
+
pass: true,
|
|
1781
|
+
message: expectedStrings.length === 1 ? `Response contains expected text` : `Response contains all ${expectedStrings.length} expected substrings`
|
|
1782
|
+
};
|
|
1783
|
+
}
|
|
1784
|
+
return {
|
|
1785
|
+
pass: false,
|
|
1786
|
+
message: missing.length === 1 ? `Response does not contain expected text: "${missing[0]}"` : `Response is missing ${missing.length} expected substrings: ${missing.map((s) => `"${s}"`).join(", ")}`,
|
|
1787
|
+
details: {
|
|
1788
|
+
missing,
|
|
1789
|
+
textLength: text.length,
|
|
1790
|
+
textPreview: truncateForDisplay2(text)
|
|
1791
|
+
}
|
|
1792
|
+
};
|
|
1793
|
+
}
|
|
1794
|
+
function truncateForDisplay2(str, maxLength = 200) {
|
|
1795
|
+
if (str.length <= maxLength) {
|
|
1796
|
+
return str;
|
|
1797
|
+
}
|
|
1798
|
+
return str.slice(0, maxLength) + "... (truncated)";
|
|
1799
|
+
}
|
|
1800
|
+
|
|
1801
|
+
// src/assertions/validators/pattern.ts
|
|
1802
|
+
function validatePattern(response, patterns, options = {}) {
|
|
1803
|
+
const { caseSensitive = true } = options;
|
|
1804
|
+
const caseInsensitive = !caseSensitive;
|
|
1805
|
+
const patternList = Array.isArray(patterns) ? patterns : [patterns];
|
|
1806
|
+
const text = extractText2(response);
|
|
1807
|
+
const unmatched = [];
|
|
1808
|
+
for (const pattern of patternList) {
|
|
1809
|
+
const regex = toRegExp(pattern, caseInsensitive);
|
|
1810
|
+
if (!regex.test(text)) {
|
|
1811
|
+
unmatched.push(patternToString(pattern));
|
|
1812
|
+
}
|
|
1813
|
+
}
|
|
1814
|
+
if (unmatched.length === 0) {
|
|
1815
|
+
return {
|
|
1816
|
+
pass: true,
|
|
1817
|
+
message: patternList.length === 1 ? `Response matches pattern` : `Response matches all ${patternList.length} patterns`
|
|
1818
|
+
};
|
|
1819
|
+
}
|
|
1820
|
+
return {
|
|
1821
|
+
pass: false,
|
|
1822
|
+
message: unmatched.length === 1 ? `Response does not match pattern: ${unmatched[0]}` : `Response does not match ${unmatched.length} patterns: ${unmatched.join(", ")}`,
|
|
1823
|
+
details: {
|
|
1824
|
+
unmatched,
|
|
1825
|
+
textLength: text.length,
|
|
1826
|
+
textPreview: truncateForDisplay3(text)
|
|
1827
|
+
}
|
|
1828
|
+
};
|
|
1829
|
+
}
|
|
1830
|
+
function toRegExp(pattern, caseInsensitive) {
|
|
1831
|
+
if (pattern instanceof RegExp) {
|
|
1832
|
+
if (caseInsensitive && !pattern.flags.includes("i")) {
|
|
1833
|
+
return new RegExp(pattern.source, pattern.flags + "i");
|
|
1834
|
+
}
|
|
1835
|
+
return pattern;
|
|
1836
|
+
}
|
|
1837
|
+
const flags = caseInsensitive ? "i" : "";
|
|
1838
|
+
return new RegExp(pattern, flags);
|
|
1839
|
+
}
|
|
1840
|
+
function patternToString(pattern) {
|
|
1841
|
+
if (pattern instanceof RegExp) {
|
|
1842
|
+
return pattern.toString();
|
|
1843
|
+
}
|
|
1844
|
+
return `/${pattern}/`;
|
|
1845
|
+
}
|
|
1846
|
+
function truncateForDisplay3(str, maxLength = 200) {
|
|
1847
|
+
if (str.length <= maxLength) {
|
|
1848
|
+
return str;
|
|
1849
|
+
}
|
|
1850
|
+
return str.slice(0, maxLength) + "... (truncated)";
|
|
1851
|
+
}
|
|
1852
|
+
|
|
1853
|
+
// src/assertions/validators/error.ts
|
|
1854
|
+
function validateError(response, expected = true) {
|
|
1855
|
+
const actualIsError = isErrorResponse(response);
|
|
1856
|
+
const errorMessage = actualIsError ? extractErrorMessage(response) : "";
|
|
1857
|
+
if (typeof expected === "boolean") {
|
|
1858
|
+
if (expected) {
|
|
1859
|
+
if (actualIsError) {
|
|
1860
|
+
return {
|
|
1861
|
+
pass: true,
|
|
1862
|
+
message: "Response is an error as expected"
|
|
1863
|
+
};
|
|
1864
|
+
}
|
|
1865
|
+
return {
|
|
1866
|
+
pass: false,
|
|
1867
|
+
message: "Expected an error response but got success",
|
|
1868
|
+
details: {
|
|
1869
|
+
textPreview: truncateForDisplay4(extractText2(response))
|
|
1870
|
+
}
|
|
1871
|
+
};
|
|
1872
|
+
} else {
|
|
1873
|
+
if (!actualIsError) {
|
|
1874
|
+
return {
|
|
1875
|
+
pass: true,
|
|
1876
|
+
message: "Response is not an error as expected"
|
|
1877
|
+
};
|
|
1878
|
+
}
|
|
1879
|
+
return {
|
|
1880
|
+
pass: false,
|
|
1881
|
+
message: `Expected a success response but got error: "${truncateForDisplay4(errorMessage)}"`,
|
|
1882
|
+
details: {
|
|
1883
|
+
errorMessage
|
|
1884
|
+
}
|
|
1885
|
+
};
|
|
1886
|
+
}
|
|
1887
|
+
}
|
|
1888
|
+
const expectedMessages = Array.isArray(expected) ? expected : [expected];
|
|
1889
|
+
if (!actualIsError) {
|
|
1890
|
+
return {
|
|
1891
|
+
pass: false,
|
|
1892
|
+
message: `Expected an error containing "${expectedMessages[0]}" but got success`,
|
|
1893
|
+
details: {
|
|
1894
|
+
textPreview: truncateForDisplay4(extractText2(response))
|
|
1895
|
+
}
|
|
1896
|
+
};
|
|
1897
|
+
}
|
|
1898
|
+
const matched = expectedMessages.some(
|
|
1899
|
+
(msg) => errorMessage.toLowerCase().includes(msg.toLowerCase())
|
|
1900
|
+
);
|
|
1901
|
+
if (matched) {
|
|
1902
|
+
return {
|
|
1903
|
+
pass: true,
|
|
1904
|
+
message: "Error message contains expected text"
|
|
1905
|
+
};
|
|
1906
|
+
}
|
|
1907
|
+
return {
|
|
1908
|
+
pass: false,
|
|
1909
|
+
message: expectedMessages.length === 1 ? `Error message does not contain "${expectedMessages[0]}"` : `Error message does not contain any of: ${expectedMessages.map((m) => `"${m}"`).join(", ")}`,
|
|
1910
|
+
details: {
|
|
1911
|
+
actualErrorMessage: errorMessage,
|
|
1912
|
+
expectedToContain: expectedMessages
|
|
1913
|
+
}
|
|
1914
|
+
};
|
|
1915
|
+
}
|
|
1916
|
+
function truncateForDisplay4(str, maxLength = 200) {
|
|
1917
|
+
if (str.length <= maxLength) {
|
|
1918
|
+
return str;
|
|
1919
|
+
}
|
|
1920
|
+
return str.slice(0, maxLength) + "... (truncated)";
|
|
1921
|
+
}
|
|
1922
|
+
|
|
1923
|
+
// src/assertions/validators/size.ts
|
|
1924
|
+
function validateSize(response, options) {
|
|
1925
|
+
const { maxBytes, minBytes } = options;
|
|
1926
|
+
if (maxBytes === void 0 && minBytes === void 0) {
|
|
1927
|
+
return {
|
|
1928
|
+
pass: false,
|
|
1929
|
+
message: "Size validation requires at least one of maxBytes or minBytes"
|
|
1930
|
+
};
|
|
1931
|
+
}
|
|
1932
|
+
const actualSize = getResponseSizeBytes(response);
|
|
1933
|
+
const issues = [];
|
|
1934
|
+
if (minBytes !== void 0 && actualSize < minBytes) {
|
|
1935
|
+
issues.push(
|
|
1936
|
+
`Response size (${formatBytes(actualSize)}) is below minimum (${formatBytes(minBytes)})`
|
|
1937
|
+
);
|
|
1938
|
+
}
|
|
1939
|
+
if (maxBytes !== void 0 && actualSize > maxBytes) {
|
|
1940
|
+
issues.push(
|
|
1941
|
+
`Response size (${formatBytes(actualSize)}) exceeds maximum (${formatBytes(maxBytes)})`
|
|
1942
|
+
);
|
|
1943
|
+
}
|
|
1944
|
+
if (issues.length === 0) {
|
|
1945
|
+
return {
|
|
1946
|
+
pass: true,
|
|
1947
|
+
message: `Response size (${formatBytes(actualSize)}) is within bounds`,
|
|
1948
|
+
details: {
|
|
1949
|
+
actualBytes: actualSize
|
|
1950
|
+
}
|
|
1951
|
+
};
|
|
1952
|
+
}
|
|
1953
|
+
return {
|
|
1954
|
+
pass: false,
|
|
1955
|
+
message: issues.join("; "),
|
|
1956
|
+
details: {
|
|
1957
|
+
actualBytes: actualSize,
|
|
1958
|
+
minBytes,
|
|
1959
|
+
maxBytes
|
|
1960
|
+
}
|
|
1961
|
+
};
|
|
1962
|
+
}
|
|
1963
|
+
function formatBytes(bytes) {
|
|
1964
|
+
if (bytes < 1024) {
|
|
1965
|
+
return `${bytes} bytes`;
|
|
1966
|
+
}
|
|
1967
|
+
if (bytes < 1024 * 1024) {
|
|
1968
|
+
return `${(bytes / 1024).toFixed(1)} KB`;
|
|
1969
|
+
}
|
|
1970
|
+
return `${(bytes / (1024 * 1024)).toFixed(1)} MB`;
|
|
1971
|
+
}
|
|
1972
|
+
|
|
1973
|
+
// src/mcp/fixtures/mcpFixture.ts
|
|
1974
|
+
var testStep = null;
|
|
1975
|
+
try {
|
|
1976
|
+
const playwright = __require("@playwright/test");
|
|
1977
|
+
if (playwright && playwright.test && playwright.test.step) {
|
|
1978
|
+
testStep = playwright.test.step.bind(playwright.test);
|
|
1979
|
+
}
|
|
1980
|
+
} catch {
|
|
1981
|
+
}
|
|
1982
|
+
function createMCPFixture(client, testInfo, options) {
|
|
1983
|
+
const authType = options?.authType ?? "none";
|
|
1984
|
+
const project = options?.project;
|
|
1985
|
+
if (!testInfo) {
|
|
1986
|
+
return {
|
|
1987
|
+
client,
|
|
1988
|
+
authType,
|
|
1989
|
+
project,
|
|
1990
|
+
async listTools() {
|
|
1991
|
+
const result = await client.listTools();
|
|
1992
|
+
return result.tools;
|
|
1993
|
+
},
|
|
1994
|
+
async callTool(name, args) {
|
|
1995
|
+
const result = await client.callTool({
|
|
1996
|
+
name,
|
|
1997
|
+
arguments: args
|
|
1998
|
+
});
|
|
1999
|
+
return result;
|
|
2000
|
+
},
|
|
2001
|
+
getServerInfo() {
|
|
2002
|
+
const serverVersion = client.getServerVersion();
|
|
2003
|
+
if (!serverVersion) {
|
|
2004
|
+
return null;
|
|
2005
|
+
}
|
|
2006
|
+
return {
|
|
2007
|
+
name: serverVersion.name,
|
|
2008
|
+
version: serverVersion.version
|
|
2009
|
+
};
|
|
2010
|
+
}
|
|
2011
|
+
};
|
|
2012
|
+
}
|
|
2013
|
+
return {
|
|
2014
|
+
client,
|
|
2015
|
+
authType,
|
|
2016
|
+
project,
|
|
2017
|
+
async listTools() {
|
|
2018
|
+
const execute = async () => {
|
|
2019
|
+
const result = await client.listTools();
|
|
2020
|
+
const tools = result.tools;
|
|
2021
|
+
await testInfo.attach("mcp-list-tools", {
|
|
2022
|
+
contentType: "application/json",
|
|
2023
|
+
body: JSON.stringify(
|
|
2024
|
+
{
|
|
2025
|
+
operation: "listTools",
|
|
2026
|
+
toolCount: tools.length,
|
|
2027
|
+
tools: tools.map((t) => ({
|
|
2028
|
+
name: t.name,
|
|
2029
|
+
description: t.description
|
|
2030
|
+
}))
|
|
2031
|
+
},
|
|
2032
|
+
null,
|
|
2033
|
+
2
|
|
2034
|
+
)
|
|
2035
|
+
});
|
|
2036
|
+
return tools;
|
|
2037
|
+
};
|
|
2038
|
+
return testStep ? testStep("MCP: listTools()", execute) : execute();
|
|
2039
|
+
},
|
|
2040
|
+
async callTool(name, args) {
|
|
2041
|
+
const execute = async () => {
|
|
2042
|
+
const startTime = Date.now();
|
|
2043
|
+
const result = await client.callTool({
|
|
2044
|
+
name,
|
|
2045
|
+
arguments: args
|
|
2046
|
+
});
|
|
2047
|
+
const durationMs = Date.now() - startTime;
|
|
2048
|
+
await testInfo.attach(`mcp-call-${name}`, {
|
|
2049
|
+
contentType: "application/json",
|
|
2050
|
+
body: JSON.stringify(
|
|
2051
|
+
{
|
|
2052
|
+
operation: "callTool",
|
|
2053
|
+
toolName: name,
|
|
2054
|
+
args,
|
|
2055
|
+
result,
|
|
2056
|
+
durationMs,
|
|
2057
|
+
isError: result.isError || false,
|
|
2058
|
+
authType,
|
|
2059
|
+
project
|
|
2060
|
+
},
|
|
2061
|
+
null,
|
|
2062
|
+
2
|
|
2063
|
+
)
|
|
2064
|
+
});
|
|
2065
|
+
return result;
|
|
2066
|
+
};
|
|
2067
|
+
return testStep ? testStep(`MCP: callTool("${name}")`, execute) : execute();
|
|
2068
|
+
},
|
|
2069
|
+
getServerInfo() {
|
|
2070
|
+
const serverVersion = client.getServerVersion();
|
|
2071
|
+
const result = serverVersion ? {
|
|
2072
|
+
name: serverVersion.name,
|
|
2073
|
+
version: serverVersion.version
|
|
2074
|
+
} : null;
|
|
2075
|
+
testInfo.attach("mcp-server-info", {
|
|
2076
|
+
contentType: "application/json",
|
|
2077
|
+
body: JSON.stringify(
|
|
2078
|
+
{
|
|
2079
|
+
operation: "getServerInfo",
|
|
2080
|
+
serverInfo: result
|
|
2081
|
+
},
|
|
2082
|
+
null,
|
|
2083
|
+
2
|
|
2084
|
+
)
|
|
2085
|
+
}).catch(() => {
|
|
2086
|
+
});
|
|
2087
|
+
return result;
|
|
2088
|
+
}
|
|
2089
|
+
};
|
|
2090
|
+
}
|
|
2091
|
+
|
|
2092
|
+
// src/assertions/matchers/toMatchToolResponse.ts
|
|
2093
|
+
function toMatchToolResponse(received, expected) {
|
|
2094
|
+
const result = validateResponse(received, expected);
|
|
2095
|
+
return {
|
|
2096
|
+
pass: result.pass,
|
|
2097
|
+
message: () => {
|
|
2098
|
+
if (this.isNot) {
|
|
2099
|
+
return result.pass ? "Expected response NOT to match, but it did" : result.message;
|
|
2100
|
+
}
|
|
2101
|
+
return result.message;
|
|
2102
|
+
}
|
|
2103
|
+
};
|
|
2104
|
+
}
|
|
2105
|
+
|
|
2106
|
+
// src/assertions/matchers/toMatchToolSchema.ts
|
|
2107
|
+
function toMatchToolSchema(received, schema, options = {}) {
|
|
2108
|
+
const result = validateSchema(received, schema, options);
|
|
2109
|
+
return {
|
|
2110
|
+
pass: result.pass,
|
|
2111
|
+
message: () => {
|
|
2112
|
+
if (this.isNot) {
|
|
2113
|
+
return result.pass ? "Expected response NOT to match schema, but it did" : result.message;
|
|
2114
|
+
}
|
|
2115
|
+
return result.message;
|
|
2116
|
+
}
|
|
2117
|
+
};
|
|
2118
|
+
}
|
|
2119
|
+
|
|
2120
|
+
// src/assertions/matchers/toContainToolText.ts
|
|
2121
|
+
function toContainToolText(received, expected, options = {}) {
|
|
2122
|
+
const result = validateText(received, expected, options);
|
|
2123
|
+
return {
|
|
2124
|
+
pass: result.pass,
|
|
2125
|
+
message: () => {
|
|
2126
|
+
if (this.isNot) {
|
|
2127
|
+
const expectedStr = Array.isArray(expected) ? expected.map((s) => `"${s}"`).join(", ") : `"${expected}"`;
|
|
2128
|
+
return result.pass ? `Expected response NOT to contain ${expectedStr}, but it did` : result.message;
|
|
2129
|
+
}
|
|
2130
|
+
return result.message;
|
|
2131
|
+
}
|
|
2132
|
+
};
|
|
2133
|
+
}
|
|
2134
|
+
|
|
2135
|
+
// src/assertions/matchers/toMatchToolPattern.ts
|
|
2136
|
+
function toMatchToolPattern(received, patterns, options = {}) {
|
|
2137
|
+
const result = validatePattern(received, patterns, options);
|
|
2138
|
+
return {
|
|
2139
|
+
pass: result.pass,
|
|
2140
|
+
message: () => {
|
|
2141
|
+
if (this.isNot) {
|
|
2142
|
+
return result.pass ? "Expected response NOT to match pattern(s), but it did" : result.message;
|
|
2143
|
+
}
|
|
2144
|
+
return result.message;
|
|
2145
|
+
}
|
|
2146
|
+
};
|
|
2147
|
+
}
|
|
2148
|
+
var BUILT_IN_PATTERNS = {
|
|
2149
|
+
timestamp: {
|
|
2150
|
+
pattern: /\b\d{10,13}\b/g,
|
|
2151
|
+
replacement: "[TIMESTAMP]"
|
|
2152
|
+
},
|
|
2153
|
+
uuid: {
|
|
2154
|
+
pattern: /\b[0-9a-f]{8}-[0-9a-f]{4}-[1-5][0-9a-f]{3}-[89ab][0-9a-f]{3}-[0-9a-f]{12}\b/gi,
|
|
2155
|
+
replacement: "[UUID]"
|
|
2156
|
+
},
|
|
2157
|
+
"iso-date": {
|
|
2158
|
+
pattern: /\b\d{4}-\d{2}-\d{2}(T\d{2}:\d{2}:\d{2}(\.\d{1,3})?(Z|[+-]\d{2}:?\d{2})?)?\b/g,
|
|
2159
|
+
replacement: "[ISO_DATE]"
|
|
2160
|
+
},
|
|
2161
|
+
objectId: {
|
|
2162
|
+
pattern: /\b[0-9a-f]{24}\b/gi,
|
|
2163
|
+
replacement: "[OBJECT_ID]"
|
|
2164
|
+
},
|
|
2165
|
+
jwt: {
|
|
2166
|
+
pattern: /\beyJ[A-Za-z0-9_-]*\.eyJ[A-Za-z0-9_-]*\.[A-Za-z0-9_-]+\b/g,
|
|
2167
|
+
replacement: "[JWT]"
|
|
2168
|
+
}
|
|
2169
|
+
};
|
|
2170
|
+
function isRegexSanitizer(sanitizer) {
|
|
2171
|
+
return typeof sanitizer === "object" && sanitizer !== null && "pattern" in sanitizer;
|
|
2172
|
+
}
|
|
2173
|
+
function isFieldRemovalSanitizer(sanitizer) {
|
|
2174
|
+
return typeof sanitizer === "object" && sanitizer !== null && "remove" in sanitizer;
|
|
2175
|
+
}
|
|
2176
|
+
function applySanitizers(value, sanitizers) {
|
|
2177
|
+
let result = value;
|
|
2178
|
+
for (const sanitizer of sanitizers) {
|
|
2179
|
+
if (typeof sanitizer === "string") {
|
|
2180
|
+
const builtIn = BUILT_IN_PATTERNS[sanitizer];
|
|
2181
|
+
if (builtIn) {
|
|
2182
|
+
result = result.replace(builtIn.pattern, builtIn.replacement);
|
|
2183
|
+
}
|
|
2184
|
+
continue;
|
|
2185
|
+
}
|
|
2186
|
+
if (isRegexSanitizer(sanitizer)) {
|
|
2187
|
+
const pattern = sanitizer.pattern instanceof RegExp ? sanitizer.pattern : new RegExp(sanitizer.pattern, "g");
|
|
2188
|
+
const replacement = sanitizer.replacement ?? "[SANITIZED]";
|
|
2189
|
+
result = result.replace(pattern, replacement);
|
|
2190
|
+
continue;
|
|
2191
|
+
}
|
|
2192
|
+
if (isFieldRemovalSanitizer(sanitizer)) {
|
|
2193
|
+
try {
|
|
2194
|
+
const parsed = JSON.parse(result);
|
|
2195
|
+
removeFields(parsed, sanitizer.remove);
|
|
2196
|
+
result = JSON.stringify(parsed, null, 2);
|
|
2197
|
+
} catch {
|
|
2198
|
+
}
|
|
2199
|
+
}
|
|
2200
|
+
}
|
|
2201
|
+
return result;
|
|
2202
|
+
}
|
|
2203
|
+
function removeFields(obj, paths) {
|
|
2204
|
+
if (typeof obj !== "object" || obj === null) {
|
|
2205
|
+
return;
|
|
2206
|
+
}
|
|
2207
|
+
for (const path3 of paths) {
|
|
2208
|
+
const parts = path3.split(".");
|
|
2209
|
+
if (parts.length === 0) {
|
|
2210
|
+
continue;
|
|
2211
|
+
}
|
|
2212
|
+
let current = obj;
|
|
2213
|
+
for (let i = 0; i < parts.length - 1; i++) {
|
|
2214
|
+
if (typeof current !== "object" || current === null) {
|
|
2215
|
+
break;
|
|
2216
|
+
}
|
|
2217
|
+
const key = parts[i];
|
|
2218
|
+
if (key !== void 0) {
|
|
2219
|
+
current = current[key];
|
|
2220
|
+
}
|
|
2221
|
+
}
|
|
2222
|
+
if (typeof current === "object" && current !== null) {
|
|
2223
|
+
const lastKey = parts[parts.length - 1];
|
|
2224
|
+
if (lastKey !== void 0) {
|
|
2225
|
+
delete current[lastKey];
|
|
2226
|
+
}
|
|
2227
|
+
}
|
|
2228
|
+
}
|
|
2229
|
+
}
|
|
2230
|
+
async function toMatchToolSnapshot(received, name, sanitizers = []) {
|
|
2231
|
+
let content = extractText2(received);
|
|
2232
|
+
if (sanitizers.length > 0) {
|
|
2233
|
+
content = applySanitizers(content, sanitizers);
|
|
2234
|
+
}
|
|
2235
|
+
if (this.isNot) {
|
|
2236
|
+
try {
|
|
2237
|
+
await expect$1(content).toMatchSnapshot(name);
|
|
2238
|
+
return {
|
|
2239
|
+
pass: false,
|
|
2240
|
+
message: () => `Expected response NOT to match snapshot "${name}", but it did`
|
|
2241
|
+
};
|
|
2242
|
+
} catch {
|
|
2243
|
+
return {
|
|
2244
|
+
pass: true,
|
|
2245
|
+
message: () => `Response does not match snapshot "${name}" as expected`
|
|
2246
|
+
};
|
|
2247
|
+
}
|
|
2248
|
+
}
|
|
2249
|
+
try {
|
|
2250
|
+
await expect$1(content).toMatchSnapshot(name);
|
|
2251
|
+
return {
|
|
2252
|
+
pass: true,
|
|
2253
|
+
message: () => `Response matches snapshot "${name}"`
|
|
2254
|
+
};
|
|
2255
|
+
} catch (error) {
|
|
2256
|
+
return {
|
|
2257
|
+
pass: false,
|
|
2258
|
+
message: () => error instanceof Error ? error.message : `Response does not match snapshot "${name}"`
|
|
2259
|
+
};
|
|
2260
|
+
}
|
|
2261
|
+
}
|
|
2262
|
+
|
|
2263
|
+
// src/assertions/matchers/toBeToolError.ts
|
|
2264
|
+
function toBeToolError(received, expected = true) {
|
|
2265
|
+
const effectiveExpected = this.isNot ? typeof expected === "boolean" ? !expected : false : expected;
|
|
2266
|
+
const result = validateError(received, effectiveExpected);
|
|
2267
|
+
return {
|
|
2268
|
+
pass: this.isNot ? !result.pass : result.pass,
|
|
2269
|
+
message: () => {
|
|
2270
|
+
if (this.isNot) {
|
|
2271
|
+
if (typeof expected === "boolean") {
|
|
2272
|
+
return result.pass ? "Expected response NOT to be an error, but it was" : "Response is not an error as expected";
|
|
2273
|
+
}
|
|
2274
|
+
const expectedStr = Array.isArray(expected) ? expected.join(", ") : expected;
|
|
2275
|
+
return result.pass ? `Expected response NOT to be an error with "${expectedStr}", but it was` : result.message;
|
|
2276
|
+
}
|
|
2277
|
+
return result.message;
|
|
2278
|
+
}
|
|
2279
|
+
};
|
|
2280
|
+
}
|
|
2281
|
+
function createClaudeAgentJudge(config) {
|
|
2282
|
+
const model = config.model ?? "claude-sonnet-4-20250514";
|
|
2283
|
+
const maxBudgetUsd = config.maxBudgetUsd ?? 0.1;
|
|
2284
|
+
const maxToolOutputSize = config.maxToolOutputSize;
|
|
2285
|
+
return {
|
|
2286
|
+
async evaluate(candidate, reference, rubric) {
|
|
2287
|
+
const candidateStr = typeof candidate === "string" ? candidate : JSON.stringify(candidate, null, 2);
|
|
2288
|
+
const candidateSizeBytes = Buffer.byteLength(candidateStr, "utf8");
|
|
2289
|
+
if (maxToolOutputSize !== void 0 && candidateSizeBytes > maxToolOutputSize) {
|
|
2290
|
+
return {
|
|
2291
|
+
pass: false,
|
|
2292
|
+
score: 0,
|
|
2293
|
+
reasoning: `Tool output size (${candidateSizeBytes} bytes) exceeds maximum allowed size (${maxToolOutputSize} bytes)`,
|
|
2294
|
+
candidateSizeBytes,
|
|
2295
|
+
exceedsMaxToolOutputSize: true
|
|
2296
|
+
};
|
|
2297
|
+
}
|
|
2298
|
+
const prompt = buildJudgePrompt(candidate, reference, rubric);
|
|
2299
|
+
try {
|
|
2300
|
+
let resultMessage;
|
|
2301
|
+
for await (const message of query({
|
|
2302
|
+
prompt,
|
|
2303
|
+
options: {
|
|
2304
|
+
model,
|
|
2305
|
+
maxBudgetUsd,
|
|
2306
|
+
// Use empty tools array for response-only mode
|
|
2307
|
+
tools: [],
|
|
2308
|
+
// Bypass permissions since we're not using any tools
|
|
2309
|
+
permissionMode: "bypassPermissions",
|
|
2310
|
+
allowDangerouslySkipPermissions: true,
|
|
2311
|
+
// Use a custom system prompt for JSON output
|
|
2312
|
+
systemPrompt: buildSystemPrompt(),
|
|
2313
|
+
// Limit to 1 turn since this is a simple evaluation
|
|
2314
|
+
maxTurns: 1
|
|
2315
|
+
}
|
|
2316
|
+
})) {
|
|
2317
|
+
if (message.type === "result") {
|
|
2318
|
+
resultMessage = message;
|
|
2319
|
+
}
|
|
2320
|
+
}
|
|
2321
|
+
if (!resultMessage) {
|
|
2322
|
+
throw new Error("No result message received from Claude Agent SDK");
|
|
2323
|
+
}
|
|
2324
|
+
if (resultMessage.subtype !== "success" && resultMessage.errors?.length) {
|
|
2325
|
+
throw new Error(
|
|
2326
|
+
`Claude Agent SDK error: ${resultMessage.errors.join(", ")}`
|
|
2327
|
+
);
|
|
2328
|
+
}
|
|
2329
|
+
const responseText = resultMessage.result ?? "";
|
|
2330
|
+
const parsed = parseJudgeResponse(responseText);
|
|
2331
|
+
const usage = {
|
|
2332
|
+
inputTokens: resultMessage.usage?.input_tokens ?? 0,
|
|
2333
|
+
outputTokens: resultMessage.usage?.output_tokens ?? 0,
|
|
2334
|
+
totalCostUsd: resultMessage.total_cost_usd ?? 0,
|
|
2335
|
+
durationMs: resultMessage.duration_ms ?? 0,
|
|
2336
|
+
durationApiMs: resultMessage.duration_api_ms,
|
|
2337
|
+
cacheReadInputTokens: resultMessage.usage?.cache_read_input_tokens,
|
|
2338
|
+
cacheCreationInputTokens: resultMessage.usage?.cache_creation_input_tokens
|
|
2339
|
+
};
|
|
2340
|
+
return {
|
|
2341
|
+
pass: parsed.pass ?? false,
|
|
2342
|
+
score: parsed.score,
|
|
2343
|
+
reasoning: parsed.reasoning,
|
|
2344
|
+
usage,
|
|
2345
|
+
candidateSizeBytes,
|
|
2346
|
+
exceedsMaxToolOutputSize: false
|
|
2347
|
+
};
|
|
2348
|
+
} catch (error) {
|
|
2349
|
+
throw new Error(
|
|
2350
|
+
`Claude Agent judge evaluation failed: ${error instanceof Error ? error.message : String(error)}`
|
|
2351
|
+
);
|
|
2352
|
+
}
|
|
2353
|
+
}
|
|
2354
|
+
};
|
|
2355
|
+
}
|
|
2356
|
+
function buildSystemPrompt() {
|
|
2357
|
+
return 'You are an expert evaluator. Evaluate the candidate response based on the rubric provided. Respond ONLY with valid JSON in this exact format: {"pass": boolean, "score": number (0-1), "reasoning": string}. Do not include any other text, markdown formatting, or code blocks.';
|
|
2358
|
+
}
|
|
2359
|
+
function buildJudgePrompt(candidate, reference, rubric) {
|
|
2360
|
+
const parts = [];
|
|
2361
|
+
parts.push("# Evaluation Task\n");
|
|
2362
|
+
parts.push(rubric);
|
|
2363
|
+
parts.push("\n\n# Candidate Response\n");
|
|
2364
|
+
parts.push(
|
|
2365
|
+
typeof candidate === "string" ? candidate : JSON.stringify(candidate, null, 2)
|
|
2366
|
+
);
|
|
2367
|
+
if (reference !== null && reference !== void 0) {
|
|
2368
|
+
parts.push("\n\n# Reference Response\n");
|
|
2369
|
+
parts.push(
|
|
2370
|
+
typeof reference === "string" ? reference : JSON.stringify(reference, null, 2)
|
|
2371
|
+
);
|
|
2372
|
+
}
|
|
2373
|
+
parts.push(
|
|
2374
|
+
"\n\n# Instructions\nEvaluate the candidate response based on the rubric. " + (reference !== null && reference !== void 0 ? "Compare it against the reference response if helpful. " : "") + 'Respond with JSON containing "pass" (boolean), "score" (0-1), and "reasoning" (string).'
|
|
2375
|
+
);
|
|
2376
|
+
return parts.join("");
|
|
2377
|
+
}
|
|
2378
|
+
function parseJudgeResponse(text) {
|
|
2379
|
+
let jsonText = text.trim();
|
|
2380
|
+
if (jsonText.startsWith("```json")) {
|
|
2381
|
+
jsonText = jsonText.slice(7);
|
|
2382
|
+
}
|
|
2383
|
+
if (jsonText.startsWith("```")) {
|
|
2384
|
+
jsonText = jsonText.slice(3);
|
|
2385
|
+
}
|
|
2386
|
+
if (jsonText.endsWith("```")) {
|
|
2387
|
+
jsonText = jsonText.slice(0, -3);
|
|
2388
|
+
}
|
|
2389
|
+
jsonText = jsonText.trim();
|
|
2390
|
+
try {
|
|
2391
|
+
return JSON.parse(jsonText);
|
|
2392
|
+
} catch {
|
|
2393
|
+
const jsonMatch = jsonText.match(/\{[\s\S]*"pass"[\s\S]*\}/);
|
|
2394
|
+
if (jsonMatch) {
|
|
2395
|
+
return JSON.parse(jsonMatch[0]);
|
|
2396
|
+
}
|
|
2397
|
+
throw new Error(`Failed to parse judge response as JSON: ${text}`);
|
|
2398
|
+
}
|
|
2399
|
+
}
|
|
2400
|
+
|
|
2401
|
+
// src/judge/judgeClient.ts
|
|
2402
|
+
function createJudge(config = {}) {
|
|
2403
|
+
const provider = config.provider ?? "claude";
|
|
2404
|
+
switch (provider) {
|
|
2405
|
+
case "claude":
|
|
2406
|
+
case "anthropic":
|
|
2407
|
+
return createClaudeAgentJudge(config);
|
|
2408
|
+
case "openai":
|
|
2409
|
+
throw new Error(
|
|
2410
|
+
'OpenAI provider is no longer supported. Please use createJudge() without specifying provider, or use provider: "claude". See migration guide at https://github.com/gleanwork/mcp-server-tester/blob/main/docs/migration-v0.11.md'
|
|
2411
|
+
);
|
|
2412
|
+
case "custom-http":
|
|
2413
|
+
throw new Error(
|
|
2414
|
+
"custom-http provider is no longer supported. Please use createJudge() without specifying provider."
|
|
2415
|
+
);
|
|
2416
|
+
default:
|
|
2417
|
+
throw new Error(`Unsupported LLM provider: ${String(provider)}`);
|
|
2418
|
+
}
|
|
2419
|
+
}
|
|
2420
|
+
|
|
2421
|
+
// src/assertions/matchers/toPassToolJudge.ts
|
|
2422
|
+
var DEFAULT_PASSING_THRESHOLD = 0.7;
|
|
2423
|
+
var DEFAULT_JUDGE_CONFIG = {};
|
|
2424
|
+
async function toPassToolJudge(received, rubric, options = {}) {
|
|
2425
|
+
const {
|
|
2426
|
+
reference = null,
|
|
2427
|
+
passingThreshold = DEFAULT_PASSING_THRESHOLD,
|
|
2428
|
+
judgeConfig = DEFAULT_JUDGE_CONFIG
|
|
2429
|
+
} = options;
|
|
2430
|
+
const judge = createJudge(judgeConfig);
|
|
2431
|
+
try {
|
|
2432
|
+
const result = await judge.evaluate(received, reference, rubric);
|
|
2433
|
+
const score = result.score ?? (result.pass ? 1 : 0);
|
|
2434
|
+
const passes = score >= passingThreshold;
|
|
2435
|
+
if (this.isNot) {
|
|
2436
|
+
return {
|
|
2437
|
+
pass: !passes,
|
|
2438
|
+
message: () => passes ? `Expected judge evaluation to fail, but it passed with score ${score.toFixed(2)}` : `Judge evaluation failed as expected with score ${score.toFixed(2)}`
|
|
2439
|
+
};
|
|
2440
|
+
}
|
|
2441
|
+
if (passes) {
|
|
2442
|
+
return {
|
|
2443
|
+
pass: true,
|
|
2444
|
+
message: () => `Judge evaluation passed with score ${score.toFixed(2)} (threshold: ${passingThreshold})`
|
|
2445
|
+
};
|
|
2446
|
+
}
|
|
2447
|
+
return {
|
|
2448
|
+
pass: false,
|
|
2449
|
+
message: () => `Judge evaluation failed with score ${score.toFixed(2)} (threshold: ${passingThreshold}). Reasoning: ${result.reasoning ?? "No reasoning provided"}`
|
|
2450
|
+
};
|
|
2451
|
+
} catch (error) {
|
|
2452
|
+
return {
|
|
2453
|
+
pass: false,
|
|
2454
|
+
message: () => `Judge evaluation failed with error: ${error instanceof Error ? error.message : String(error)}`
|
|
2455
|
+
};
|
|
2456
|
+
}
|
|
2457
|
+
}
|
|
2458
|
+
|
|
2459
|
+
// src/assertions/matchers/toHaveToolResponseSize.ts
|
|
2460
|
+
function toHaveToolResponseSize(received, options) {
|
|
2461
|
+
const result = validateSize(received, options);
|
|
2462
|
+
return {
|
|
2463
|
+
pass: result.pass,
|
|
2464
|
+
message: () => {
|
|
2465
|
+
if (this.isNot) {
|
|
2466
|
+
return result.pass ? "Expected response size NOT to be within bounds, but it was" : result.message;
|
|
2467
|
+
}
|
|
2468
|
+
return result.message;
|
|
2469
|
+
}
|
|
2470
|
+
};
|
|
2471
|
+
}
|
|
2472
|
+
|
|
2473
|
+
// src/assertions/matchers/toSatisfyToolPredicate.ts
|
|
2474
|
+
function normalizeResult(result) {
|
|
2475
|
+
if (typeof result === "boolean") {
|
|
2476
|
+
return {
|
|
2477
|
+
pass: result,
|
|
2478
|
+
message: result ? "Predicate passed" : "Predicate returned false"
|
|
2479
|
+
};
|
|
2480
|
+
}
|
|
2481
|
+
return result;
|
|
2482
|
+
}
|
|
2483
|
+
async function toSatisfyToolPredicate(received, predicate, description) {
|
|
2484
|
+
const predicateDescription = description ?? "custom predicate";
|
|
2485
|
+
try {
|
|
2486
|
+
const text = extractText2(received);
|
|
2487
|
+
const rawResult = await predicate(received, text);
|
|
2488
|
+
const result = normalizeResult(rawResult);
|
|
2489
|
+
if (this.isNot) {
|
|
2490
|
+
return {
|
|
2491
|
+
pass: !result.pass,
|
|
2492
|
+
message: () => result.pass ? `Expected response NOT to satisfy ${predicateDescription}` : `Response does not satisfy ${predicateDescription} as expected`
|
|
2493
|
+
};
|
|
2494
|
+
}
|
|
2495
|
+
return {
|
|
2496
|
+
pass: result.pass,
|
|
2497
|
+
message: () => result.pass ? result.message ?? `Response satisfies ${predicateDescription}` : result.message ?? `Expected response to satisfy ${predicateDescription}`
|
|
2498
|
+
};
|
|
2499
|
+
} catch (error) {
|
|
2500
|
+
const errorMessage = error instanceof Error ? error.message : String(error);
|
|
2501
|
+
return {
|
|
2502
|
+
pass: this.isNot,
|
|
2503
|
+
// If using .not, an error means the predicate didn't pass
|
|
2504
|
+
message: () => `Predicate threw error: ${errorMessage}`
|
|
2505
|
+
};
|
|
2506
|
+
}
|
|
2507
|
+
}
|
|
2508
|
+
|
|
2509
|
+
// src/assertions/matchers/index.ts
|
|
2510
|
+
var expect = expect$1.extend({
|
|
2511
|
+
toMatchToolResponse,
|
|
2512
|
+
toMatchToolSchema,
|
|
2513
|
+
toContainToolText,
|
|
2514
|
+
toMatchToolPattern,
|
|
2515
|
+
toMatchToolSnapshot,
|
|
2516
|
+
toBeToolError,
|
|
2517
|
+
toPassToolJudge,
|
|
2518
|
+
toHaveToolResponseSize,
|
|
2519
|
+
toSatisfyToolPredicate
|
|
2520
|
+
});
|
|
2521
|
+
|
|
2522
|
+
// src/fixtures/mcp.ts
|
|
2523
|
+
init_oauthClientProvider();
|
|
2524
|
+
var test = test$1.extend({
|
|
2525
|
+
/**
|
|
2526
|
+
* Internal fixture state - tracks resolved auth type between fixtures
|
|
2527
|
+
*/
|
|
2528
|
+
_mcpFixtureState: [
|
|
2529
|
+
// eslint-disable-next-line no-empty-pattern
|
|
2530
|
+
async ({}, use) => {
|
|
2531
|
+
const state = { resolvedAuthType: "none" };
|
|
2532
|
+
await use(state);
|
|
2533
|
+
},
|
|
2534
|
+
{ scope: "test" }
|
|
2535
|
+
],
|
|
2536
|
+
/**
|
|
2537
|
+
* mcpClient fixture: Creates and connects an MCP client
|
|
2538
|
+
*
|
|
2539
|
+
* The client configuration is read from the project's `use.mcpConfig`
|
|
2540
|
+
* setting in playwright.config.ts
|
|
2541
|
+
*
|
|
2542
|
+
* Authentication resolution order:
|
|
2543
|
+
* 1. Explicit authStatePath → uses PlaywrightOAuthClientProvider
|
|
2544
|
+
* 2. Explicit accessToken → uses static Bearer token
|
|
2545
|
+
* 3. HTTP transport with no auth → tries CLI-stored tokens (from `mcp-server-tester login`)
|
|
2546
|
+
* with automatic token refresh
|
|
2547
|
+
*/
|
|
2548
|
+
mcpClient: async ({ _mcpFixtureState }, use, testInfo) => {
|
|
2549
|
+
const useConfig = testInfo.project.use;
|
|
2550
|
+
const mcpConfig = useConfig.mcpConfig;
|
|
2551
|
+
if (!mcpConfig) {
|
|
2552
|
+
throw new Error(
|
|
2553
|
+
`Missing mcpConfig in project.use for project "${testInfo.project.name}". Please add mcpConfig to your project configuration in playwright.config.ts`
|
|
2554
|
+
);
|
|
2555
|
+
}
|
|
2556
|
+
let resolvedAuthType = "none";
|
|
2557
|
+
let authProvider;
|
|
2558
|
+
if (mcpConfig.auth?.oauth?.authStatePath) {
|
|
2559
|
+
authProvider = new PlaywrightOAuthClientProvider({
|
|
2560
|
+
storagePath: mcpConfig.auth.oauth.authStatePath,
|
|
2561
|
+
redirectUri: mcpConfig.auth.oauth.redirectUri ?? "http://localhost:3000/oauth/callback",
|
|
2562
|
+
clientId: mcpConfig.auth.oauth.clientId,
|
|
2563
|
+
clientSecret: mcpConfig.auth.oauth.clientSecret
|
|
2564
|
+
});
|
|
2565
|
+
resolvedAuthType = "oauth";
|
|
2566
|
+
}
|
|
2567
|
+
let effectiveConfig = mcpConfig;
|
|
2568
|
+
if (mcpConfig.auth?.accessToken) {
|
|
2569
|
+
resolvedAuthType = "api-token";
|
|
2570
|
+
}
|
|
2571
|
+
if (isHttpConfig(mcpConfig) && !mcpConfig.auth?.accessToken && !mcpConfig.auth?.oauth?.authStatePath) {
|
|
2572
|
+
const cliClient = new CLIOAuthClient({
|
|
2573
|
+
mcpServerUrl: mcpConfig.serverUrl
|
|
2574
|
+
});
|
|
2575
|
+
const tokenResult = await cliClient.tryGetAccessToken();
|
|
2576
|
+
if (tokenResult) {
|
|
2577
|
+
effectiveConfig = {
|
|
2578
|
+
...mcpConfig,
|
|
2579
|
+
auth: {
|
|
2580
|
+
...mcpConfig.auth,
|
|
2581
|
+
accessToken: tokenResult.accessToken
|
|
2582
|
+
}
|
|
2583
|
+
};
|
|
2584
|
+
resolvedAuthType = "oauth";
|
|
2585
|
+
}
|
|
2586
|
+
}
|
|
2587
|
+
_mcpFixtureState.resolvedAuthType = resolvedAuthType;
|
|
2588
|
+
const client = await createMCPClientForConfig(effectiveConfig, {
|
|
2589
|
+
clientInfo: {
|
|
2590
|
+
name: "@gleanwork/mcp-server-tester",
|
|
2591
|
+
version: "0.1.0"
|
|
2592
|
+
},
|
|
2593
|
+
authProvider
|
|
2594
|
+
});
|
|
2595
|
+
try {
|
|
2596
|
+
await use(client);
|
|
2597
|
+
} finally {
|
|
2598
|
+
await closeMCPClient(client);
|
|
2599
|
+
}
|
|
2600
|
+
},
|
|
2601
|
+
/**
|
|
2602
|
+
* mcp fixture: High-level test API built on mcpClient
|
|
2603
|
+
*
|
|
2604
|
+
* Depends on mcpClient fixture
|
|
2605
|
+
* Automatically tracks all MCP operations for the reporter
|
|
2606
|
+
*/
|
|
2607
|
+
mcp: async ({ mcpClient, _mcpFixtureState }, use, testInfo) => {
|
|
2608
|
+
const api = createMCPFixture(mcpClient, testInfo, {
|
|
2609
|
+
authType: _mcpFixtureState.resolvedAuthType,
|
|
2610
|
+
project: testInfo.project.name
|
|
2611
|
+
});
|
|
2612
|
+
await use(api);
|
|
2613
|
+
}
|
|
2614
|
+
});
|
|
2615
|
+
var LLMHostConfigSchema = z.object({
|
|
2616
|
+
provider: z.enum(["openai", "anthropic"]),
|
|
2617
|
+
apiKeyEnvVar: z.string().optional(),
|
|
2618
|
+
model: z.string().optional(),
|
|
2619
|
+
maxTokens: z.number().optional(),
|
|
2620
|
+
temperature: z.number().optional(),
|
|
2621
|
+
maxToolCalls: z.number().optional()
|
|
2622
|
+
});
|
|
2623
|
+
var SnapshotSanitizerSchema = z.union([
|
|
2624
|
+
// Built-in sanitizers
|
|
2625
|
+
z.enum(["timestamp", "uuid", "iso-date", "objectId", "jwt"]),
|
|
2626
|
+
// Custom regex sanitizer
|
|
2627
|
+
z.object({
|
|
2628
|
+
pattern: z.string(),
|
|
2629
|
+
replacement: z.string().optional()
|
|
2630
|
+
}),
|
|
2631
|
+
// Field removal sanitizer
|
|
2632
|
+
z.object({
|
|
2633
|
+
remove: z.array(z.string())
|
|
2634
|
+
})
|
|
2635
|
+
]);
|
|
2636
|
+
var EvalExpectBlockSchema = z.object({
|
|
2637
|
+
response: z.unknown().optional(),
|
|
2638
|
+
schema: z.string().optional(),
|
|
2639
|
+
containsText: z.union([z.string(), z.array(z.string())]).optional(),
|
|
2640
|
+
matchesPattern: z.union([z.string(), z.array(z.string())]).optional(),
|
|
2641
|
+
snapshot: z.string().optional(),
|
|
2642
|
+
snapshotSanitizers: z.array(SnapshotSanitizerSchema).optional(),
|
|
2643
|
+
isError: z.union([z.boolean(), z.string(), z.array(z.string())]).optional(),
|
|
2644
|
+
passesJudge: z.object({
|
|
2645
|
+
rubric: z.string(),
|
|
2646
|
+
reference: z.unknown().optional(),
|
|
2647
|
+
threshold: z.number().min(0).max(1).optional(),
|
|
2648
|
+
configId: z.string().optional()
|
|
2649
|
+
}).optional(),
|
|
2650
|
+
responseSize: z.object({
|
|
2651
|
+
maxBytes: z.number().optional(),
|
|
2652
|
+
minBytes: z.number().optional()
|
|
2653
|
+
}).optional()
|
|
2654
|
+
});
|
|
2655
|
+
var EvalCaseSchema = z.object({
|
|
2656
|
+
id: z.string().min(1, "id must not be empty"),
|
|
2657
|
+
description: z.string().optional(),
|
|
2658
|
+
mode: z.enum(["direct", "llm_host"]).optional(),
|
|
2659
|
+
toolName: z.string().min(1, "toolName must not be empty").optional(),
|
|
2660
|
+
args: z.record(z.unknown()).optional(),
|
|
2661
|
+
scenario: z.string().optional(),
|
|
2662
|
+
llmHostConfig: LLMHostConfigSchema.optional(),
|
|
2663
|
+
metadata: z.record(z.unknown()).optional(),
|
|
2664
|
+
expect: EvalExpectBlockSchema.optional()
|
|
2665
|
+
});
|
|
2666
|
+
var EvalDatasetSchema = z.object({
|
|
2667
|
+
name: z.string().min(1, "name must not be empty"),
|
|
2668
|
+
description: z.string().optional(),
|
|
2669
|
+
cases: z.array(EvalCaseSchema).min(1, "dataset must have at least one case"),
|
|
2670
|
+
metadata: z.record(z.unknown()).optional()
|
|
2671
|
+
});
|
|
2672
|
+
function validateEvalCase(evalCase) {
|
|
2673
|
+
return EvalCaseSchema.parse(evalCase);
|
|
2674
|
+
}
|
|
2675
|
+
function validateEvalDataset(dataset) {
|
|
2676
|
+
return EvalDatasetSchema.parse(dataset);
|
|
2677
|
+
}
|
|
2678
|
+
async function loadEvalDataset(filePath, options = {}) {
|
|
2679
|
+
const { schemas, validate = true } = options;
|
|
2680
|
+
try {
|
|
2681
|
+
const fileContents = await readFile(filePath, "utf-8");
|
|
2682
|
+
const rawData = JSON.parse(fileContents);
|
|
2683
|
+
const serializedDataset = validate ? validateEvalDataset(rawData) : rawData;
|
|
2684
|
+
const dataset = {
|
|
2685
|
+
...serializedDataset,
|
|
2686
|
+
schemas: schemas ?? {}
|
|
2687
|
+
};
|
|
2688
|
+
return dataset;
|
|
2689
|
+
} catch (error) {
|
|
2690
|
+
if (error instanceof SyntaxError) {
|
|
2691
|
+
throw new Error(
|
|
2692
|
+
`Failed to parse JSON from ${filePath}: ${error.message}`
|
|
2693
|
+
);
|
|
2694
|
+
}
|
|
2695
|
+
throw error;
|
|
2696
|
+
}
|
|
2697
|
+
}
|
|
2698
|
+
function loadEvalDatasetFromObject(data, options = {}) {
|
|
2699
|
+
const { schemas, validate = true } = options;
|
|
2700
|
+
const serializedDataset = validate ? validateEvalDataset(data) : data;
|
|
2701
|
+
const dataset = {
|
|
2702
|
+
...serializedDataset,
|
|
2703
|
+
schemas: schemas ?? {}
|
|
2704
|
+
};
|
|
2705
|
+
return dataset;
|
|
2706
|
+
}
|
|
2707
|
+
|
|
2708
|
+
// src/evals/llmHost/adapter.ts
|
|
2709
|
+
var adapters = /* @__PURE__ */ new Map();
|
|
2710
|
+
function registerAdapter(provider, factory) {
|
|
2711
|
+
adapters.set(provider, factory);
|
|
2712
|
+
}
|
|
2713
|
+
function getAdapter(provider) {
|
|
2714
|
+
const factory = adapters.get(provider);
|
|
2715
|
+
if (!factory) {
|
|
2716
|
+
throw new Error(
|
|
2717
|
+
`No adapter registered for provider: ${provider}. Available: ${Array.from(adapters.keys()).join(", ")}`
|
|
2718
|
+
);
|
|
2719
|
+
}
|
|
2720
|
+
return factory();
|
|
2721
|
+
}
|
|
2722
|
+
function hasAdapter(provider) {
|
|
2723
|
+
return adapters.has(provider);
|
|
2724
|
+
}
|
|
2725
|
+
|
|
2726
|
+
// src/evals/llmHost/retry.ts
|
|
2727
|
+
var DEFAULT_OPTIONS = {
|
|
2728
|
+
maxAttempts: 3,
|
|
2729
|
+
baseDelayMs: 1e3,
|
|
2730
|
+
maxDelayMs: 3e4,
|
|
2731
|
+
isRetryable: isRetryableError
|
|
2732
|
+
};
|
|
2733
|
+
async function withRetry(fn, options = {}) {
|
|
2734
|
+
const {
|
|
2735
|
+
maxAttempts = DEFAULT_OPTIONS.maxAttempts,
|
|
2736
|
+
baseDelayMs = DEFAULT_OPTIONS.baseDelayMs,
|
|
2737
|
+
maxDelayMs = DEFAULT_OPTIONS.maxDelayMs,
|
|
2738
|
+
isRetryable = DEFAULT_OPTIONS.isRetryable,
|
|
2739
|
+
onRetry
|
|
2740
|
+
} = options;
|
|
2741
|
+
let lastError;
|
|
2742
|
+
for (let attempt = 1; attempt <= maxAttempts; attempt++) {
|
|
2743
|
+
try {
|
|
2744
|
+
return await fn();
|
|
2745
|
+
} catch (error) {
|
|
2746
|
+
lastError = error;
|
|
2747
|
+
if (attempt >= maxAttempts || !isRetryable(error)) {
|
|
2748
|
+
throw error;
|
|
2749
|
+
}
|
|
2750
|
+
const exponentialDelay = baseDelayMs * Math.pow(2, attempt - 1);
|
|
2751
|
+
const jitter = Math.random() * 0.1 * exponentialDelay;
|
|
2752
|
+
const delayMs = Math.min(exponentialDelay + jitter, maxDelayMs);
|
|
2753
|
+
if (onRetry) {
|
|
2754
|
+
onRetry(error, attempt, delayMs);
|
|
2755
|
+
}
|
|
2756
|
+
await sleep(delayMs);
|
|
2757
|
+
}
|
|
2758
|
+
}
|
|
2759
|
+
throw lastError;
|
|
2760
|
+
}
|
|
2761
|
+
function isRetryableError(error) {
|
|
2762
|
+
const statusCode = extractStatusCode(error);
|
|
2763
|
+
if (statusCode !== null) {
|
|
2764
|
+
return [429, 500, 502, 503, 504].includes(statusCode);
|
|
2765
|
+
}
|
|
2766
|
+
const message = extractErrorMessage2(error).toLowerCase();
|
|
2767
|
+
return message.includes("rate limit") || message.includes("429") || message.includes("too many requests") || message.includes("timeout") || message.includes("temporarily unavailable") || message.includes("service unavailable") || message.includes("internal server error");
|
|
2768
|
+
}
|
|
2769
|
+
function extractStatusCode(error) {
|
|
2770
|
+
if (error == null || typeof error !== "object") {
|
|
2771
|
+
return null;
|
|
2772
|
+
}
|
|
2773
|
+
const e = error;
|
|
2774
|
+
if (typeof e.status === "number") {
|
|
2775
|
+
return e.status;
|
|
2776
|
+
}
|
|
2777
|
+
if (typeof e.statusCode === "number") {
|
|
2778
|
+
return e.statusCode;
|
|
2779
|
+
}
|
|
2780
|
+
if (e.response && typeof e.response === "object") {
|
|
2781
|
+
const response = e.response;
|
|
2782
|
+
if (typeof response.status === "number") {
|
|
2783
|
+
return response.status;
|
|
2784
|
+
}
|
|
2785
|
+
}
|
|
2786
|
+
if (typeof e.code === "number") {
|
|
2787
|
+
return e.code;
|
|
2788
|
+
}
|
|
2789
|
+
return null;
|
|
2790
|
+
}
|
|
2791
|
+
function extractErrorMessage2(error) {
|
|
2792
|
+
if (error == null) {
|
|
2793
|
+
return "";
|
|
2794
|
+
}
|
|
2795
|
+
if (typeof error === "string") {
|
|
2796
|
+
return error;
|
|
2797
|
+
}
|
|
2798
|
+
if (error instanceof Error) {
|
|
2799
|
+
return error.message;
|
|
2800
|
+
}
|
|
2801
|
+
if (typeof error === "object") {
|
|
2802
|
+
const e = error;
|
|
2803
|
+
if (typeof e.message === "string") {
|
|
2804
|
+
return e.message;
|
|
2805
|
+
}
|
|
2806
|
+
if (typeof e.error === "string") {
|
|
2807
|
+
return e.error;
|
|
2808
|
+
}
|
|
2809
|
+
return JSON.stringify(error);
|
|
2810
|
+
}
|
|
2811
|
+
if (typeof error === "number" || typeof error === "boolean") {
|
|
2812
|
+
return String(error);
|
|
2813
|
+
}
|
|
2814
|
+
return "Unknown error";
|
|
2815
|
+
}
|
|
2816
|
+
function sleep(ms) {
|
|
2817
|
+
return new Promise((resolve) => setTimeout(resolve, ms));
|
|
2818
|
+
}
|
|
2819
|
+
|
|
2820
|
+
// src/evals/llmHost/orchestrator.ts
|
|
2821
|
+
async function runSimulation(adapter, mcp, scenario, config, options = {}) {
|
|
2822
|
+
const maxIterations = config.maxToolCalls || 10;
|
|
2823
|
+
const retryOptions = options.retry || {};
|
|
2824
|
+
const allToolCalls = [];
|
|
2825
|
+
const conversationHistory = [];
|
|
2826
|
+
try {
|
|
2827
|
+
const client = await adapter.createClient(config);
|
|
2828
|
+
const mcpTools = await mcp.listTools();
|
|
2829
|
+
const formattedTools = adapter.formatTools(mcpTools);
|
|
2830
|
+
const messages = [adapter.createUserMessage(scenario)];
|
|
2831
|
+
conversationHistory.push({ role: "user", content: scenario });
|
|
2832
|
+
let finalResponse = "";
|
|
2833
|
+
for (let iteration = 0; iteration < maxIterations; iteration++) {
|
|
2834
|
+
const chatResult = await withRetry(
|
|
2835
|
+
() => adapter.chat(client, messages, formattedTools, config),
|
|
2836
|
+
retryOptions
|
|
2837
|
+
);
|
|
2838
|
+
if (chatResult.wantsToolCalls && chatResult.toolCalls.length > 0) {
|
|
2839
|
+
messages.push(adapter.createAssistantMessage(chatResult));
|
|
2840
|
+
const toolResultMessages = [];
|
|
2841
|
+
for (const toolCall of chatResult.toolCalls) {
|
|
2842
|
+
allToolCalls.push(toolCall);
|
|
2843
|
+
const mcpResult = await mcp.callTool(
|
|
2844
|
+
toolCall.name,
|
|
2845
|
+
toolCall.arguments
|
|
2846
|
+
);
|
|
2847
|
+
const resultText = extractText(mcpResult);
|
|
2848
|
+
const resultMessage = adapter.createToolResultMessage(
|
|
2849
|
+
toolCall,
|
|
2850
|
+
resultText
|
|
2851
|
+
);
|
|
2852
|
+
toolResultMessages.push(resultMessage);
|
|
2853
|
+
conversationHistory.push({ role: "tool", content: resultText });
|
|
2854
|
+
}
|
|
2855
|
+
if (adapter.provider === "anthropic") {
|
|
2856
|
+
messages.push({
|
|
2857
|
+
role: "user",
|
|
2858
|
+
content: toolResultMessages
|
|
2859
|
+
});
|
|
2860
|
+
} else {
|
|
2861
|
+
for (const msg of toolResultMessages) {
|
|
2862
|
+
messages.push(msg);
|
|
2863
|
+
}
|
|
2864
|
+
}
|
|
2865
|
+
} else {
|
|
2866
|
+
finalResponse = chatResult.textContent || "";
|
|
2867
|
+
conversationHistory.push({ role: "assistant", content: finalResponse });
|
|
2868
|
+
break;
|
|
2869
|
+
}
|
|
2870
|
+
}
|
|
2871
|
+
return {
|
|
2872
|
+
success: true,
|
|
2873
|
+
toolCalls: allToolCalls,
|
|
2874
|
+
response: finalResponse,
|
|
2875
|
+
conversationHistory
|
|
2876
|
+
};
|
|
2877
|
+
} catch (error) {
|
|
2878
|
+
return {
|
|
2879
|
+
success: false,
|
|
2880
|
+
toolCalls: allToolCalls,
|
|
2881
|
+
error: error instanceof Error ? error.message : String(error),
|
|
2882
|
+
conversationHistory
|
|
2883
|
+
};
|
|
2884
|
+
}
|
|
2885
|
+
}
|
|
2886
|
+
|
|
2887
|
+
// src/evals/llmHost/adapters/openai.ts
|
|
2888
|
+
function createOpenAIAdapter() {
|
|
2889
|
+
return {
|
|
2890
|
+
provider: "openai",
|
|
2891
|
+
async createClient(config) {
|
|
2892
|
+
let OpenAI;
|
|
2893
|
+
try {
|
|
2894
|
+
const module = await import('openai');
|
|
2895
|
+
OpenAI = module.OpenAI;
|
|
2896
|
+
} catch {
|
|
2897
|
+
throw new Error(
|
|
2898
|
+
"OpenAI SDK is not installed. Install it with: npm install openai"
|
|
2899
|
+
);
|
|
2900
|
+
}
|
|
2901
|
+
const apiKeyEnvVar = config.apiKeyEnvVar || "OPENAI_API_KEY";
|
|
2902
|
+
const apiKey = process.env[apiKeyEnvVar];
|
|
2903
|
+
if (!apiKey) {
|
|
2904
|
+
throw new Error(
|
|
2905
|
+
`OpenAI API key not found in environment variable ${apiKeyEnvVar}`
|
|
2906
|
+
);
|
|
2907
|
+
}
|
|
2908
|
+
return new OpenAI({ apiKey });
|
|
2909
|
+
},
|
|
2910
|
+
formatTools(tools) {
|
|
2911
|
+
return tools.map((tool) => ({
|
|
2912
|
+
type: "function",
|
|
2913
|
+
function: {
|
|
2914
|
+
name: tool.name,
|
|
2915
|
+
description: tool.description || "",
|
|
2916
|
+
parameters: tool.inputSchema || {}
|
|
2917
|
+
}
|
|
2918
|
+
}));
|
|
2919
|
+
},
|
|
2920
|
+
async chat(client, messages, tools, config) {
|
|
2921
|
+
const openai = client;
|
|
2922
|
+
const response = await openai.chat.completions.create({
|
|
2923
|
+
model: config.model || "gpt-4o",
|
|
2924
|
+
messages,
|
|
2925
|
+
tools,
|
|
2926
|
+
temperature: config.temperature ?? 0,
|
|
2927
|
+
max_tokens: config.maxTokens
|
|
2928
|
+
});
|
|
2929
|
+
const resp = response;
|
|
2930
|
+
const message = resp.choices[0]?.message;
|
|
2931
|
+
if (!message) {
|
|
2932
|
+
throw new Error("No response from OpenAI");
|
|
2933
|
+
}
|
|
2934
|
+
if (message.tool_calls && message.tool_calls.length > 0) {
|
|
2935
|
+
const toolCalls = message.tool_calls.map((tc) => ({
|
|
2936
|
+
name: tc.function.name,
|
|
2937
|
+
arguments: JSON.parse(tc.function.arguments),
|
|
2938
|
+
id: tc.id
|
|
2939
|
+
}));
|
|
2940
|
+
return {
|
|
2941
|
+
wantsToolCalls: true,
|
|
2942
|
+
toolCalls,
|
|
2943
|
+
textContent: message.content,
|
|
2944
|
+
rawResponse: response
|
|
2945
|
+
};
|
|
2946
|
+
}
|
|
2947
|
+
return {
|
|
2948
|
+
wantsToolCalls: false,
|
|
2949
|
+
toolCalls: [],
|
|
2950
|
+
textContent: message.content,
|
|
2951
|
+
rawResponse: response
|
|
2952
|
+
};
|
|
2953
|
+
},
|
|
2954
|
+
createUserMessage(scenario) {
|
|
2955
|
+
return {
|
|
2956
|
+
role: "user",
|
|
2957
|
+
content: scenario
|
|
2958
|
+
};
|
|
2959
|
+
},
|
|
2960
|
+
createAssistantMessage(chatResult) {
|
|
2961
|
+
const rawResponse = chatResult.rawResponse;
|
|
2962
|
+
return {
|
|
2963
|
+
role: "assistant",
|
|
2964
|
+
content: chatResult.textContent,
|
|
2965
|
+
tool_calls: rawResponse.choices[0]?.message?.tool_calls
|
|
2966
|
+
};
|
|
2967
|
+
},
|
|
2968
|
+
createToolResultMessage(toolCall, result) {
|
|
2969
|
+
return {
|
|
2970
|
+
role: "tool",
|
|
2971
|
+
tool_call_id: toolCall.id,
|
|
2972
|
+
content: result
|
|
2973
|
+
};
|
|
2974
|
+
}
|
|
2975
|
+
};
|
|
2976
|
+
}
|
|
2977
|
+
|
|
2978
|
+
// src/evals/llmHost/adapters/anthropic.ts
|
|
2979
|
+
function createAnthropicAdapter() {
|
|
2980
|
+
return {
|
|
2981
|
+
provider: "anthropic",
|
|
2982
|
+
async createClient(config) {
|
|
2983
|
+
let Anthropic;
|
|
2984
|
+
try {
|
|
2985
|
+
const module = await import('@anthropic-ai/sdk');
|
|
2986
|
+
Anthropic = module.default;
|
|
2987
|
+
} catch {
|
|
2988
|
+
throw new Error(
|
|
2989
|
+
"Anthropic SDK is not installed. Install it with: npm install @anthropic-ai/sdk"
|
|
2990
|
+
);
|
|
2991
|
+
}
|
|
2992
|
+
const apiKeyEnvVar = config.apiKeyEnvVar || "ANTHROPIC_API_KEY";
|
|
2993
|
+
const apiKey = process.env[apiKeyEnvVar];
|
|
2994
|
+
if (!apiKey) {
|
|
2995
|
+
throw new Error(
|
|
2996
|
+
`Anthropic API key not found in environment variable ${apiKeyEnvVar}`
|
|
2997
|
+
);
|
|
2998
|
+
}
|
|
2999
|
+
return new Anthropic({ apiKey });
|
|
3000
|
+
},
|
|
3001
|
+
formatTools(tools) {
|
|
3002
|
+
return tools.map((tool) => ({
|
|
3003
|
+
name: tool.name,
|
|
3004
|
+
description: tool.description || "",
|
|
3005
|
+
input_schema: tool.inputSchema || {}
|
|
3006
|
+
}));
|
|
3007
|
+
},
|
|
3008
|
+
async chat(client, messages, tools, config) {
|
|
3009
|
+
const anthropic = client;
|
|
3010
|
+
const response = await anthropic.messages.create({
|
|
3011
|
+
model: config.model || "claude-3-5-sonnet-20241022",
|
|
3012
|
+
max_tokens: config.maxTokens || 4096,
|
|
3013
|
+
temperature: config.temperature ?? 0,
|
|
3014
|
+
messages,
|
|
3015
|
+
tools
|
|
3016
|
+
});
|
|
3017
|
+
const resp = response;
|
|
3018
|
+
const textBlock = resp.content.find((c) => c.type === "text");
|
|
3019
|
+
const textContent = textBlock?.text || null;
|
|
3020
|
+
if (resp.stop_reason === "tool_use") {
|
|
3021
|
+
const toolUses = resp.content.filter((c) => c.type === "tool_use");
|
|
3022
|
+
const toolCalls = toolUses.map((tu) => ({
|
|
3023
|
+
name: tu.name,
|
|
3024
|
+
arguments: tu.input,
|
|
3025
|
+
id: tu.id
|
|
3026
|
+
}));
|
|
3027
|
+
return {
|
|
3028
|
+
wantsToolCalls: true,
|
|
3029
|
+
toolCalls,
|
|
3030
|
+
textContent,
|
|
3031
|
+
rawResponse: response
|
|
3032
|
+
};
|
|
3033
|
+
}
|
|
3034
|
+
if (resp.stop_reason === "max_tokens") {
|
|
3035
|
+
throw new Error("Response exceeded max tokens");
|
|
3036
|
+
}
|
|
3037
|
+
return {
|
|
3038
|
+
wantsToolCalls: false,
|
|
3039
|
+
toolCalls: [],
|
|
3040
|
+
textContent,
|
|
3041
|
+
rawResponse: response
|
|
3042
|
+
};
|
|
3043
|
+
},
|
|
3044
|
+
createUserMessage(scenario) {
|
|
3045
|
+
return {
|
|
3046
|
+
role: "user",
|
|
3047
|
+
content: scenario
|
|
3048
|
+
};
|
|
3049
|
+
},
|
|
3050
|
+
createAssistantMessage(chatResult) {
|
|
3051
|
+
const rawResponse = chatResult.rawResponse;
|
|
3052
|
+
return {
|
|
3053
|
+
role: "assistant",
|
|
3054
|
+
content: rawResponse.content
|
|
3055
|
+
};
|
|
3056
|
+
},
|
|
3057
|
+
createToolResultMessage(toolCall, result) {
|
|
3058
|
+
return {
|
|
3059
|
+
type: "tool_result",
|
|
3060
|
+
tool_use_id: toolCall.id,
|
|
3061
|
+
content: result
|
|
3062
|
+
};
|
|
3063
|
+
}
|
|
3064
|
+
};
|
|
3065
|
+
}
|
|
3066
|
+
|
|
3067
|
+
// src/evals/llmHost/llmHostSimulation.ts
|
|
3068
|
+
registerAdapter("openai", createOpenAIAdapter);
|
|
3069
|
+
registerAdapter("anthropic", createAnthropicAdapter);
|
|
3070
|
+
async function simulateLLMHost(mcp, scenario, config) {
|
|
3071
|
+
const adapter = getAdapter(config.provider);
|
|
3072
|
+
return runSimulation(adapter, mcp, scenario, config, {
|
|
3073
|
+
retry: {
|
|
3074
|
+
maxAttempts: 3,
|
|
3075
|
+
baseDelayMs: 1e3,
|
|
3076
|
+
maxDelayMs: 3e4
|
|
3077
|
+
}
|
|
3078
|
+
});
|
|
3079
|
+
}
|
|
3080
|
+
function isProviderAvailable(provider) {
|
|
3081
|
+
return hasAdapter(provider);
|
|
3082
|
+
}
|
|
3083
|
+
function getMissingDependencyMessage(provider) {
|
|
3084
|
+
switch (provider) {
|
|
3085
|
+
case "openai":
|
|
3086
|
+
return "OpenAI SDK is not installed. Install it with: npm install openai";
|
|
3087
|
+
case "anthropic":
|
|
3088
|
+
return "Anthropic SDK is not installed. Install it with: npm install @anthropic-ai/sdk";
|
|
3089
|
+
default:
|
|
3090
|
+
return `Unknown provider: ${String(provider)}`;
|
|
3091
|
+
}
|
|
3092
|
+
}
|
|
3093
|
+
|
|
3094
|
+
// src/evals/evalRunner.ts
|
|
3095
|
+
async function executeToolCall(evalCase, mcp) {
|
|
3096
|
+
const mode = evalCase.mode || "direct";
|
|
3097
|
+
try {
|
|
3098
|
+
if (mode === "llm_host") {
|
|
3099
|
+
if (!evalCase.scenario) {
|
|
3100
|
+
throw new Error(
|
|
3101
|
+
`Eval case ${evalCase.id}: scenario is required for llm_host mode`
|
|
3102
|
+
);
|
|
3103
|
+
}
|
|
3104
|
+
if (!evalCase.llmHostConfig) {
|
|
3105
|
+
throw new Error(
|
|
3106
|
+
`Eval case ${evalCase.id}: llmHostConfig is required for llm_host mode`
|
|
3107
|
+
);
|
|
3108
|
+
}
|
|
3109
|
+
const simulationResult = await simulateLLMHost(
|
|
3110
|
+
mcp,
|
|
3111
|
+
evalCase.scenario,
|
|
3112
|
+
evalCase.llmHostConfig
|
|
3113
|
+
);
|
|
3114
|
+
if (!simulationResult.success) {
|
|
3115
|
+
throw new Error(simulationResult.error || "LLM host simulation failed");
|
|
3116
|
+
}
|
|
3117
|
+
return { response: simulationResult };
|
|
3118
|
+
} else {
|
|
3119
|
+
if (!evalCase.toolName) {
|
|
3120
|
+
throw new Error(
|
|
3121
|
+
`Eval case ${evalCase.id}: toolName is required for direct mode`
|
|
3122
|
+
);
|
|
3123
|
+
}
|
|
3124
|
+
if (!evalCase.args) {
|
|
3125
|
+
throw new Error(
|
|
3126
|
+
`Eval case ${evalCase.id}: args is required for direct mode`
|
|
3127
|
+
);
|
|
3128
|
+
}
|
|
3129
|
+
const result = await mcp.callTool(evalCase.toolName, evalCase.args);
|
|
3130
|
+
if (evalCase.expect?.isError !== void 0) {
|
|
3131
|
+
return { response: result };
|
|
3132
|
+
}
|
|
3133
|
+
return { response: result.structuredContent ?? result.content };
|
|
3134
|
+
}
|
|
3135
|
+
} catch (err) {
|
|
3136
|
+
return {
|
|
3137
|
+
response: void 0,
|
|
3138
|
+
error: err instanceof Error ? err.message : String(err)
|
|
3139
|
+
};
|
|
3140
|
+
}
|
|
3141
|
+
}
|
|
3142
|
+
function didCasePass(error, expectations) {
|
|
3143
|
+
return !error && Object.values(expectations).every(
|
|
3144
|
+
(result) => result === void 0 || result.pass
|
|
3145
|
+
);
|
|
3146
|
+
}
|
|
3147
|
+
async function runExpectBlockValidations(expectBlock, response, config) {
|
|
3148
|
+
const results = {};
|
|
3149
|
+
if (expectBlock.response !== void 0) {
|
|
3150
|
+
const validation = validateResponse(response, expectBlock.response);
|
|
3151
|
+
results.exact = {
|
|
3152
|
+
pass: validation.pass,
|
|
3153
|
+
details: validation.message
|
|
3154
|
+
};
|
|
3155
|
+
}
|
|
3156
|
+
if (expectBlock.schema !== void 0) {
|
|
3157
|
+
const schema = config.schemas?.[expectBlock.schema];
|
|
3158
|
+
if (!schema) {
|
|
3159
|
+
results.schema = {
|
|
3160
|
+
pass: false,
|
|
3161
|
+
details: `Schema "${expectBlock.schema}" not found in schemas registry`
|
|
3162
|
+
};
|
|
3163
|
+
} else {
|
|
3164
|
+
const validation = validateSchema(response, schema);
|
|
3165
|
+
results.schema = {
|
|
3166
|
+
pass: validation.pass,
|
|
3167
|
+
details: validation.message
|
|
3168
|
+
};
|
|
3169
|
+
}
|
|
3170
|
+
}
|
|
3171
|
+
if (expectBlock.containsText !== void 0) {
|
|
3172
|
+
const validation = validateText(response, expectBlock.containsText);
|
|
3173
|
+
results.textContains = {
|
|
3174
|
+
pass: validation.pass,
|
|
3175
|
+
details: validation.message
|
|
3176
|
+
};
|
|
3177
|
+
}
|
|
3178
|
+
if (expectBlock.matchesPattern !== void 0) {
|
|
3179
|
+
const validation = validatePattern(response, expectBlock.matchesPattern);
|
|
3180
|
+
results.regex = {
|
|
3181
|
+
pass: validation.pass,
|
|
3182
|
+
details: validation.message
|
|
3183
|
+
};
|
|
3184
|
+
}
|
|
3185
|
+
if (expectBlock.isError !== void 0) {
|
|
3186
|
+
const validation = validateError(response, expectBlock.isError);
|
|
3187
|
+
results.error = {
|
|
3188
|
+
pass: validation.pass,
|
|
3189
|
+
details: validation.message
|
|
3190
|
+
};
|
|
3191
|
+
}
|
|
3192
|
+
if (expectBlock.responseSize !== void 0) {
|
|
3193
|
+
const validation = validateSize(response, expectBlock.responseSize);
|
|
3194
|
+
results.size = {
|
|
3195
|
+
pass: validation.pass,
|
|
3196
|
+
details: validation.message
|
|
3197
|
+
};
|
|
3198
|
+
}
|
|
3199
|
+
if (expectBlock.passesJudge !== void 0) {
|
|
3200
|
+
const {
|
|
3201
|
+
rubric,
|
|
3202
|
+
reference,
|
|
3203
|
+
threshold = 0.7,
|
|
3204
|
+
configId
|
|
3205
|
+
} = expectBlock.passesJudge;
|
|
3206
|
+
const judgeConfig = configId ? config.judgeConfigs?.[configId] ?? {} : {};
|
|
3207
|
+
try {
|
|
3208
|
+
const judge = createJudge(judgeConfig);
|
|
3209
|
+
const judgeResult = await judge.evaluate(
|
|
3210
|
+
response,
|
|
3211
|
+
reference ?? null,
|
|
3212
|
+
rubric
|
|
3213
|
+
);
|
|
3214
|
+
const score = judgeResult.score ?? (judgeResult.pass ? 1 : 0);
|
|
3215
|
+
const passed = score >= threshold;
|
|
3216
|
+
results.judge = {
|
|
3217
|
+
pass: passed,
|
|
3218
|
+
details: passed ? `Judge passed with score ${score.toFixed(2)}` : `Judge failed with score ${score.toFixed(2)} (threshold: ${threshold}). ${judgeResult.reasoning ?? ""}`
|
|
3219
|
+
};
|
|
3220
|
+
} catch (err) {
|
|
3221
|
+
results.judge = {
|
|
3222
|
+
pass: false,
|
|
3223
|
+
details: `Judge evaluation error: ${err instanceof Error ? err.message : String(err)}`
|
|
3224
|
+
};
|
|
3225
|
+
}
|
|
3226
|
+
}
|
|
3227
|
+
if (expectBlock.snapshot !== void 0) {
|
|
3228
|
+
if (!config.playwrightExpect) {
|
|
3229
|
+
results.snapshot = {
|
|
3230
|
+
pass: false,
|
|
3231
|
+
details: "Snapshot testing requires expect in context"
|
|
3232
|
+
};
|
|
3233
|
+
} else {
|
|
3234
|
+
try {
|
|
3235
|
+
const sanitizers = expectBlock.snapshotSanitizers ?? [];
|
|
3236
|
+
await config.playwrightExpect(response).toMatchToolSnapshot(
|
|
3237
|
+
expectBlock.snapshot,
|
|
3238
|
+
sanitizers
|
|
3239
|
+
);
|
|
3240
|
+
results.snapshot = {
|
|
3241
|
+
pass: true,
|
|
3242
|
+
details: `Matches snapshot "${expectBlock.snapshot}"`
|
|
3243
|
+
};
|
|
3244
|
+
} catch (err) {
|
|
3245
|
+
results.snapshot = {
|
|
3246
|
+
pass: false,
|
|
3247
|
+
details: err instanceof Error ? err.message : String(err)
|
|
3248
|
+
};
|
|
3249
|
+
}
|
|
3250
|
+
}
|
|
3251
|
+
}
|
|
3252
|
+
return results;
|
|
3253
|
+
}
|
|
3254
|
+
async function runEvalCase(evalCase, context, options = {}) {
|
|
3255
|
+
const startTime = Date.now();
|
|
3256
|
+
const mode = evalCase.mode || "direct";
|
|
3257
|
+
const { response, error } = await executeToolCall(evalCase, context.mcp);
|
|
3258
|
+
let expectationResults = {};
|
|
3259
|
+
if (!error && evalCase.expect) {
|
|
3260
|
+
expectationResults = await runExpectBlockValidations(
|
|
3261
|
+
evalCase.expect,
|
|
3262
|
+
response,
|
|
3263
|
+
{
|
|
3264
|
+
schemas: options.schemas,
|
|
3265
|
+
judgeConfigs: options.judgeConfigs,
|
|
3266
|
+
playwrightExpect: context.expect
|
|
3267
|
+
}
|
|
3268
|
+
);
|
|
3269
|
+
}
|
|
3270
|
+
return {
|
|
3271
|
+
id: evalCase.id,
|
|
3272
|
+
datasetName: options.datasetName ?? "single-case",
|
|
3273
|
+
toolName: evalCase.toolName ?? evalCase.scenario ?? "unknown",
|
|
3274
|
+
mode,
|
|
3275
|
+
source: "eval",
|
|
3276
|
+
pass: didCasePass(error, expectationResults),
|
|
3277
|
+
response,
|
|
3278
|
+
error,
|
|
3279
|
+
expectations: expectationResults,
|
|
3280
|
+
authType: context.mcp.authType,
|
|
3281
|
+
project: context.mcp.project,
|
|
3282
|
+
durationMs: Date.now() - startTime
|
|
3283
|
+
};
|
|
3284
|
+
}
|
|
3285
|
+
async function runEvalDataset(options, context) {
|
|
3286
|
+
const {
|
|
3287
|
+
dataset,
|
|
3288
|
+
schemas,
|
|
3289
|
+
judgeConfigs,
|
|
3290
|
+
stopOnFailure = false,
|
|
3291
|
+
onCaseComplete
|
|
3292
|
+
} = options;
|
|
3293
|
+
const startTime = Date.now();
|
|
3294
|
+
const caseResults = [];
|
|
3295
|
+
const enrichedContext = context;
|
|
3296
|
+
const allSchemas = {
|
|
3297
|
+
...dataset.schemas,
|
|
3298
|
+
...schemas
|
|
3299
|
+
};
|
|
3300
|
+
for (const evalCase of dataset.cases) {
|
|
3301
|
+
const result2 = await runEvalCase(evalCase, enrichedContext, {
|
|
3302
|
+
datasetName: dataset.name,
|
|
3303
|
+
schemas: allSchemas,
|
|
3304
|
+
judgeConfigs
|
|
3305
|
+
});
|
|
3306
|
+
caseResults.push(result2);
|
|
3307
|
+
if (onCaseComplete) {
|
|
3308
|
+
await onCaseComplete(result2);
|
|
3309
|
+
}
|
|
3310
|
+
if (stopOnFailure && !result2.pass) {
|
|
3311
|
+
break;
|
|
3312
|
+
}
|
|
3313
|
+
}
|
|
3314
|
+
const total = caseResults.length;
|
|
3315
|
+
const passed = caseResults.filter((r) => r.pass).length;
|
|
3316
|
+
const result = {
|
|
3317
|
+
total,
|
|
3318
|
+
passed,
|
|
3319
|
+
failed: total - passed,
|
|
3320
|
+
caseResults,
|
|
3321
|
+
durationMs: Date.now() - startTime
|
|
3322
|
+
};
|
|
3323
|
+
if (context.testInfo) {
|
|
3324
|
+
await context.testInfo.attach("mcp-test-results", {
|
|
3325
|
+
contentType: "application/json",
|
|
3326
|
+
body: Buffer.from(JSON.stringify({ caseResults }))
|
|
3327
|
+
});
|
|
3328
|
+
}
|
|
3329
|
+
return result;
|
|
3330
|
+
}
|
|
3331
|
+
|
|
3332
|
+
// src/evals/llmHost/toolCallExpectation.ts
|
|
3333
|
+
function argumentsMatch(actual, expected) {
|
|
3334
|
+
for (const key of Object.keys(expected)) {
|
|
3335
|
+
if (!(key in actual)) {
|
|
3336
|
+
return false;
|
|
3337
|
+
}
|
|
3338
|
+
const actualValue = actual[key];
|
|
3339
|
+
const expectedValue = expected[key];
|
|
3340
|
+
if (JSON.stringify(actualValue) !== JSON.stringify(expectedValue)) {
|
|
3341
|
+
return false;
|
|
3342
|
+
}
|
|
3343
|
+
}
|
|
3344
|
+
return true;
|
|
3345
|
+
}
|
|
3346
|
+
function findMatchingCall(expected, actualCalls) {
|
|
3347
|
+
for (const actualCall of actualCalls) {
|
|
3348
|
+
if (actualCall.name !== expected.name) {
|
|
3349
|
+
continue;
|
|
3350
|
+
}
|
|
3351
|
+
if (!expected.arguments) {
|
|
3352
|
+
return actualCall;
|
|
3353
|
+
}
|
|
3354
|
+
if (argumentsMatch(actualCall.arguments, expected.arguments)) {
|
|
3355
|
+
return actualCall;
|
|
3356
|
+
}
|
|
3357
|
+
}
|
|
3358
|
+
return null;
|
|
3359
|
+
}
|
|
3360
|
+
function createToolCallValidator() {
|
|
3361
|
+
return async (evalCase, response) => {
|
|
3362
|
+
const expectedCalls = evalCase.metadata?.expectedToolCalls;
|
|
3363
|
+
if (!expectedCalls || expectedCalls.length === 0) {
|
|
3364
|
+
return {
|
|
3365
|
+
pass: true,
|
|
3366
|
+
details: "No expected tool calls specified"
|
|
3367
|
+
};
|
|
3368
|
+
}
|
|
3369
|
+
const responseObj = response;
|
|
3370
|
+
const actualCalls = responseObj?.toolCalls;
|
|
3371
|
+
if (!actualCalls || actualCalls.length === 0) {
|
|
3372
|
+
const requiredCalls = expectedCalls.filter(
|
|
3373
|
+
(call) => call.required !== false
|
|
3374
|
+
);
|
|
3375
|
+
if (requiredCalls.length > 0) {
|
|
3376
|
+
return {
|
|
3377
|
+
pass: false,
|
|
3378
|
+
details: `Expected ${requiredCalls.length} tool call(s), but LLM made no tool calls`
|
|
3379
|
+
};
|
|
3380
|
+
}
|
|
3381
|
+
return {
|
|
3382
|
+
pass: true,
|
|
3383
|
+
details: "No tool calls expected or made"
|
|
3384
|
+
};
|
|
3385
|
+
}
|
|
3386
|
+
const missingCalls = [];
|
|
3387
|
+
for (const expectedCall of expectedCalls) {
|
|
3388
|
+
const matchingCall = findMatchingCall(expectedCall, actualCalls);
|
|
3389
|
+
if (!matchingCall) {
|
|
3390
|
+
if (expectedCall.required !== false) {
|
|
3391
|
+
missingCalls.push(expectedCall);
|
|
3392
|
+
}
|
|
3393
|
+
}
|
|
3394
|
+
}
|
|
3395
|
+
if (missingCalls.length > 0) {
|
|
3396
|
+
const missingDetails = missingCalls.map((call) => `${call.name}(${JSON.stringify(call.arguments || {})})`).join(", ");
|
|
3397
|
+
return {
|
|
3398
|
+
pass: false,
|
|
3399
|
+
details: `Missing required tool call(s): ${missingDetails}. Actual calls: ${actualCalls.map((c) => c.name).join(", ")}`
|
|
3400
|
+
};
|
|
3401
|
+
}
|
|
3402
|
+
return {
|
|
3403
|
+
pass: true,
|
|
3404
|
+
details: `All ${expectedCalls.length} expected tool call(s) were made correctly`
|
|
3405
|
+
};
|
|
3406
|
+
};
|
|
3407
|
+
}
|
|
3408
|
+
|
|
3409
|
+
// src/spec/conformanceChecks.ts
|
|
3410
|
+
async function runConformanceChecks(mcp, options = {}, testInfo) {
|
|
3411
|
+
const {
|
|
3412
|
+
requiredTools = [],
|
|
3413
|
+
validateSchemas = true,
|
|
3414
|
+
checkServerInfo = true,
|
|
3415
|
+
checkResources = true,
|
|
3416
|
+
checkPrompts = true
|
|
3417
|
+
} = options;
|
|
3418
|
+
const checks = [];
|
|
3419
|
+
const raw = {
|
|
3420
|
+
serverInfo: null,
|
|
3421
|
+
capabilities: null,
|
|
3422
|
+
tools: [],
|
|
3423
|
+
resources: null,
|
|
3424
|
+
prompts: null
|
|
3425
|
+
};
|
|
3426
|
+
const serverInfo = mcp.getServerInfo();
|
|
3427
|
+
if (serverInfo) {
|
|
3428
|
+
raw.serverInfo = serverInfo;
|
|
3429
|
+
}
|
|
3430
|
+
if (checkServerInfo) {
|
|
3431
|
+
checks.push({
|
|
3432
|
+
name: "server_info_present",
|
|
3433
|
+
pass: serverInfo !== null,
|
|
3434
|
+
message: serverInfo ? `Server info: ${serverInfo.name ?? "unknown"} v${serverInfo.version ?? "unknown"}` : "Server info is missing"
|
|
3435
|
+
});
|
|
3436
|
+
}
|
|
3437
|
+
const capabilities = mcp.client.getServerCapabilities();
|
|
3438
|
+
if (capabilities) {
|
|
3439
|
+
raw.capabilities = capabilities;
|
|
3440
|
+
}
|
|
3441
|
+
checks.push({
|
|
3442
|
+
name: "capabilities_valid",
|
|
3443
|
+
pass: capabilities !== void 0,
|
|
3444
|
+
message: capabilities ? `Server capabilities: ${formatCapabilities(capabilities)}` : "Server capabilities not available"
|
|
3445
|
+
});
|
|
3446
|
+
let tools = [];
|
|
3447
|
+
try {
|
|
3448
|
+
tools = await mcp.listTools();
|
|
3449
|
+
raw.tools = tools;
|
|
3450
|
+
checks.push({
|
|
3451
|
+
name: "list_tools_succeeds",
|
|
3452
|
+
pass: true,
|
|
3453
|
+
message: `listTools returned ${tools.length} tools`
|
|
3454
|
+
});
|
|
3455
|
+
} catch (error) {
|
|
3456
|
+
checks.push({
|
|
3457
|
+
name: "list_tools_succeeds",
|
|
3458
|
+
pass: false,
|
|
3459
|
+
message: `listTools failed: ${error instanceof Error ? error.message : String(error)}`
|
|
3460
|
+
});
|
|
3461
|
+
const pass2 = checks.every((check) => check.pass);
|
|
3462
|
+
return { pass: pass2, checks, raw };
|
|
3463
|
+
}
|
|
3464
|
+
if (requiredTools.length > 0) {
|
|
3465
|
+
const toolNames = new Set(tools.map((t) => t.name));
|
|
3466
|
+
const missingTools = requiredTools.filter((name) => !toolNames.has(name));
|
|
3467
|
+
checks.push({
|
|
3468
|
+
name: "required_tools_present",
|
|
3469
|
+
pass: missingTools.length === 0,
|
|
3470
|
+
message: missingTools.length === 0 ? `All ${requiredTools.length} required tools are present` : `Missing required tools: ${missingTools.join(", ")}`
|
|
3471
|
+
});
|
|
3472
|
+
}
|
|
3473
|
+
if (validateSchemas && tools.length > 0) {
|
|
3474
|
+
const invalidTools = [];
|
|
3475
|
+
for (const tool of tools) {
|
|
3476
|
+
if (!tool.name) {
|
|
3477
|
+
invalidTools.push(`(unnamed tool): missing name`);
|
|
3478
|
+
continue;
|
|
3479
|
+
}
|
|
3480
|
+
if (!tool.inputSchema) {
|
|
3481
|
+
invalidTools.push(`${tool.name}: missing inputSchema`);
|
|
3482
|
+
continue;
|
|
3483
|
+
}
|
|
3484
|
+
if (tool.inputSchema.type !== "object") {
|
|
3485
|
+
invalidTools.push(
|
|
3486
|
+
`${tool.name}: inputSchema.type must be "object", got "${String(tool.inputSchema.type)}"`
|
|
3487
|
+
);
|
|
3488
|
+
}
|
|
3489
|
+
}
|
|
3490
|
+
checks.push({
|
|
3491
|
+
name: "tool_schemas_valid",
|
|
3492
|
+
pass: invalidTools.length === 0,
|
|
3493
|
+
message: invalidTools.length === 0 ? `All ${tools.length} tools have valid schemas` : `Invalid tool schemas:
|
|
3494
|
+
${invalidTools.join("\n ")}`
|
|
3495
|
+
});
|
|
3496
|
+
}
|
|
3497
|
+
if (checkResources && capabilities?.resources) {
|
|
3498
|
+
try {
|
|
3499
|
+
const resourcesResult = await mcp.client.listResources();
|
|
3500
|
+
raw.resources = resourcesResult.resources;
|
|
3501
|
+
checks.push({
|
|
3502
|
+
name: "list_resources_succeeds",
|
|
3503
|
+
pass: true,
|
|
3504
|
+
message: `listResources returned ${resourcesResult.resources.length} resources`
|
|
3505
|
+
});
|
|
3506
|
+
} catch (error) {
|
|
3507
|
+
checks.push({
|
|
3508
|
+
name: "list_resources_succeeds",
|
|
3509
|
+
pass: false,
|
|
3510
|
+
message: `listResources failed: ${error instanceof Error ? error.message : String(error)}`
|
|
3511
|
+
});
|
|
3512
|
+
}
|
|
3513
|
+
}
|
|
3514
|
+
if (checkPrompts && capabilities?.prompts) {
|
|
3515
|
+
try {
|
|
3516
|
+
const promptsResult = await mcp.client.listPrompts();
|
|
3517
|
+
raw.prompts = promptsResult.prompts;
|
|
3518
|
+
checks.push({
|
|
3519
|
+
name: "list_prompts_succeeds",
|
|
3520
|
+
pass: true,
|
|
3521
|
+
message: `listPrompts returned ${promptsResult.prompts.length} prompts`
|
|
3522
|
+
});
|
|
3523
|
+
} catch (error) {
|
|
3524
|
+
checks.push({
|
|
3525
|
+
name: "list_prompts_succeeds",
|
|
3526
|
+
pass: false,
|
|
3527
|
+
message: `listPrompts failed: ${error instanceof Error ? error.message : String(error)}`
|
|
3528
|
+
});
|
|
3529
|
+
}
|
|
3530
|
+
}
|
|
3531
|
+
try {
|
|
3532
|
+
const result2 = await mcp.callTool("__nonexistent_tool__", {});
|
|
3533
|
+
const hasError = result2.isError === true;
|
|
3534
|
+
checks.push({
|
|
3535
|
+
name: "invalid_tool_returns_error",
|
|
3536
|
+
pass: hasError,
|
|
3537
|
+
message: hasError ? "Nonexistent tool correctly returned an error" : "Calling nonexistent tool should have returned an error"
|
|
3538
|
+
});
|
|
3539
|
+
} catch {
|
|
3540
|
+
checks.push({
|
|
3541
|
+
name: "invalid_tool_returns_error",
|
|
3542
|
+
pass: true,
|
|
3543
|
+
message: "Nonexistent tool correctly threw an error"
|
|
3544
|
+
});
|
|
3545
|
+
}
|
|
3546
|
+
const pass = checks.every((check) => check.pass);
|
|
3547
|
+
const result = { pass, checks, raw };
|
|
3548
|
+
if (testInfo) {
|
|
3549
|
+
await testInfo.attach("mcp-conformance-checks", {
|
|
3550
|
+
contentType: "application/json",
|
|
3551
|
+
body: JSON.stringify(
|
|
3552
|
+
{
|
|
3553
|
+
operation: "conformanceChecks",
|
|
3554
|
+
pass,
|
|
3555
|
+
checks,
|
|
3556
|
+
serverInfo: raw.serverInfo,
|
|
3557
|
+
capabilities: raw.capabilities,
|
|
3558
|
+
toolCount: raw.tools.length,
|
|
3559
|
+
authType: mcp.authType,
|
|
3560
|
+
project: mcp.project
|
|
3561
|
+
},
|
|
3562
|
+
null,
|
|
3563
|
+
2
|
|
3564
|
+
)
|
|
3565
|
+
});
|
|
3566
|
+
}
|
|
3567
|
+
return result;
|
|
3568
|
+
}
|
|
3569
|
+
function formatCapabilities(capabilities) {
|
|
3570
|
+
const parts = [];
|
|
3571
|
+
if (capabilities.tools) parts.push("tools");
|
|
3572
|
+
if (capabilities.resources) parts.push("resources");
|
|
3573
|
+
if (capabilities.prompts) parts.push("prompts");
|
|
3574
|
+
if (capabilities.logging) parts.push("logging");
|
|
3575
|
+
if (capabilities.completions) parts.push("completions");
|
|
3576
|
+
if (capabilities.experimental) parts.push("experimental");
|
|
3577
|
+
return parts.length > 0 ? parts.join(", ") : "none declared";
|
|
3578
|
+
}
|
|
3579
|
+
|
|
3580
|
+
export { CLIOAuthClient, DiscoveryError, ENV_VAR_NAMES, EvalCaseSchema, EvalDatasetSchema, MCPConfigSchema, MCP_PROTOCOL_VERSION, PlaywrightOAuthClientProvider, closeMCPClient, createJudge, createMCPClientForConfig, createMCPFixture, createTokenAuthHeaders, createToolCallValidator, discoverAuthorizationServer, discoverProtectedResource, expect, extractText, extractText as extractTextFromResponse, getMissingDependencyMessage, getResponseSizeBytes, hasValidTokens, injectTokens, isHttpConfig, isProviderAvailable, isStdioConfig, isTokenExpired, isTokenExpiringSoon, loadEvalDataset, loadEvalDatasetFromObject, loadTokens, loadTokensFromEnv, normalizeToolResponse, normalizeWhitespace, performOAuthSetup, performOAuthSetupIfNeeded, runConformanceChecks, runEvalCase, runEvalDataset, simulateLLMHost, test, validateAccessToken, validateError, validateEvalCase, validateEvalDataset, validateMCPConfig, validatePattern, validateResponse, validateSchema, validateSize, validateText };
|
|
3581
|
+
//# sourceMappingURL=index.js.map
|
|
3582
|
+
//# sourceMappingURL=index.js.map
|