@gleanwork/mcp-server-tester 1.0.0-beta.0 → 1.0.0-beta.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cli/index.js +14 -3
- package/dist/fixtures/mcp.d.ts +1 -1
- package/dist/fixtures/mcp.js +28 -7
- package/dist/fixtures/mcp.js.map +1 -1
- package/dist/index.cjs +147 -20
- package/dist/index.cjs.map +1 -1
- package/dist/index.d.cts +81 -49
- package/dist/index.d.ts +81 -49
- package/dist/index.js +147 -21
- package/dist/index.js.map +1 -1
- package/package.json +2 -1
package/dist/index.cjs
CHANGED
|
@@ -1836,11 +1836,11 @@ function parseNullableDef(def, refs) {
|
|
|
1836
1836
|
]
|
|
1837
1837
|
};
|
|
1838
1838
|
}
|
|
1839
|
-
const
|
|
1839
|
+
const base3 = parseDef(def.innerType._def, {
|
|
1840
1840
|
...refs,
|
|
1841
1841
|
currentPath: [...refs.currentPath, "anyOf", "0"]
|
|
1842
1842
|
});
|
|
1843
|
-
return
|
|
1843
|
+
return base3 && { anyOf: [base3, { type: "null" }] };
|
|
1844
1844
|
}
|
|
1845
1845
|
function parseNumberDef(def) {
|
|
1846
1846
|
const res = {
|
|
@@ -3160,6 +3160,7 @@ var StdioConfigSchema = zod.z.object({
|
|
|
3160
3160
|
command: zod.z.string().min(1, "command is required for stdio transport"),
|
|
3161
3161
|
args: zod.z.array(zod.z.string()).optional(),
|
|
3162
3162
|
cwd: zod.z.string().optional(),
|
|
3163
|
+
env: zod.z.record(zod.z.string(), zod.z.string()).optional(),
|
|
3163
3164
|
capabilities: MCPHostCapabilitiesSchema.optional(),
|
|
3164
3165
|
connectTimeoutMs: zod.z.number().positive().optional(),
|
|
3165
3166
|
requestTimeoutMs: zod.z.number().positive().optional(),
|
|
@@ -4406,7 +4407,7 @@ function escapeHtml(text) {
|
|
|
4406
4407
|
|
|
4407
4408
|
// package.json
|
|
4408
4409
|
var package_default = {
|
|
4409
|
-
version: "1.0.0-beta.
|
|
4410
|
+
version: "1.0.0-beta.2"};
|
|
4410
4411
|
|
|
4411
4412
|
// src/mcp/clientFactory.ts
|
|
4412
4413
|
function getRetryAfterDelayMs(err) {
|
|
@@ -4478,7 +4479,14 @@ async function createMCPClientForConfig(config, options) {
|
|
|
4478
4479
|
args: validatedConfig.args ?? [],
|
|
4479
4480
|
...validatedConfig.cwd && { cwd: validatedConfig.cwd },
|
|
4480
4481
|
// Suppress server stderr when quiet mode is enabled
|
|
4481
|
-
...validatedConfig.quiet && { stderr: "ignore" }
|
|
4482
|
+
...validatedConfig.quiet && { stderr: "ignore" },
|
|
4483
|
+
...validatedConfig.env && {
|
|
4484
|
+
env: Object.fromEntries(
|
|
4485
|
+
Object.entries({ ...process.env, ...validatedConfig.env }).filter(
|
|
4486
|
+
(entry) => entry[1] !== void 0
|
|
4487
|
+
)
|
|
4488
|
+
)
|
|
4489
|
+
}
|
|
4482
4490
|
});
|
|
4483
4491
|
debugClient("Connecting via stdio: %O", {
|
|
4484
4492
|
command: validatedConfig.command,
|
|
@@ -4617,7 +4625,10 @@ async function closeMCPClient(client) {
|
|
|
4617
4625
|
try {
|
|
4618
4626
|
await client.close();
|
|
4619
4627
|
} catch (error) {
|
|
4620
|
-
|
|
4628
|
+
debugClient(
|
|
4629
|
+
"Error closing client: %s",
|
|
4630
|
+
error instanceof Error ? error.message : String(error)
|
|
4631
|
+
);
|
|
4621
4632
|
throw error;
|
|
4622
4633
|
} finally {
|
|
4623
4634
|
const agent = agentRegistry.get(client);
|
|
@@ -5147,7 +5158,7 @@ function validateToolCalls(response, expectation) {
|
|
|
5147
5158
|
).length;
|
|
5148
5159
|
const recall = requiredCalls.length > 0 ? calledRequiredCount / requiredCalls.length : 1;
|
|
5149
5160
|
const allowedNames = new Set(expectation.calls.map((c) => c.name));
|
|
5150
|
-
const precision = actual.length > 0
|
|
5161
|
+
const precision = actual.length > 0 ? actual.filter((c) => allowedNames.has(c.name)).length / actual.length : 1;
|
|
5151
5162
|
const metrics = { precision, recall };
|
|
5152
5163
|
const order = expectation.order ?? "any";
|
|
5153
5164
|
if (order === "strict") {
|
|
@@ -5550,9 +5561,8 @@ Validation errors: ${JSON.stringify(validation.error.issues)}`
|
|
|
5550
5561
|
|
|
5551
5562
|
// src/judge/judgeClient.ts
|
|
5552
5563
|
function createJudge(config = {}) {
|
|
5553
|
-
const provider = config.provider ?? "
|
|
5564
|
+
const provider = config.provider ?? "anthropic";
|
|
5554
5565
|
switch (provider) {
|
|
5555
|
-
case "claude":
|
|
5556
5566
|
case "anthropic":
|
|
5557
5567
|
return createClaudeAgentJudge(config);
|
|
5558
5568
|
case "openai":
|
|
@@ -5905,7 +5915,18 @@ function applySanitizers(value, sanitizers) {
|
|
|
5905
5915
|
continue;
|
|
5906
5916
|
}
|
|
5907
5917
|
if (isRegexSanitizer(sanitizer)) {
|
|
5908
|
-
|
|
5918
|
+
let pattern;
|
|
5919
|
+
if (sanitizer.pattern instanceof RegExp) {
|
|
5920
|
+
pattern = sanitizer.pattern;
|
|
5921
|
+
} else {
|
|
5922
|
+
try {
|
|
5923
|
+
pattern = new RegExp(sanitizer.pattern, "g");
|
|
5924
|
+
} catch {
|
|
5925
|
+
throw new Error(
|
|
5926
|
+
`toMatchToolSnapshot: invalid regex pattern "${sanitizer.pattern}" in sanitizer`
|
|
5927
|
+
);
|
|
5928
|
+
}
|
|
5929
|
+
}
|
|
5909
5930
|
const replacement = sanitizer.replacement ?? "[SANITIZED]";
|
|
5910
5931
|
result = result.replace(pattern, replacement);
|
|
5911
5932
|
continue;
|
|
@@ -6209,6 +6230,106 @@ var test = test$1.test.extend({
|
|
|
6209
6230
|
await use(api);
|
|
6210
6231
|
}
|
|
6211
6232
|
});
|
|
6233
|
+
|
|
6234
|
+
// src/fixtures/mcpAuth.ts
|
|
6235
|
+
init_oauthClientProvider();
|
|
6236
|
+
var StaticTokenAuthProvider = class {
|
|
6237
|
+
accessToken;
|
|
6238
|
+
constructor(accessToken) {
|
|
6239
|
+
this.accessToken = accessToken;
|
|
6240
|
+
}
|
|
6241
|
+
get redirectUrl() {
|
|
6242
|
+
throw new Error("StaticTokenAuthProvider does not support OAuth redirects");
|
|
6243
|
+
}
|
|
6244
|
+
get clientMetadata() {
|
|
6245
|
+
return {
|
|
6246
|
+
redirect_uris: [],
|
|
6247
|
+
token_endpoint_auth_method: "none",
|
|
6248
|
+
grant_types: [],
|
|
6249
|
+
response_types: [],
|
|
6250
|
+
client_name: "@gleanwork/mcp-server-tester"
|
|
6251
|
+
};
|
|
6252
|
+
}
|
|
6253
|
+
async clientInformation() {
|
|
6254
|
+
return void 0;
|
|
6255
|
+
}
|
|
6256
|
+
async tokens() {
|
|
6257
|
+
return {
|
|
6258
|
+
access_token: this.accessToken,
|
|
6259
|
+
token_type: "Bearer"
|
|
6260
|
+
};
|
|
6261
|
+
}
|
|
6262
|
+
async saveTokens() {
|
|
6263
|
+
}
|
|
6264
|
+
async redirectToAuthorization() {
|
|
6265
|
+
throw new Error("StaticTokenAuthProvider does not support OAuth redirects");
|
|
6266
|
+
}
|
|
6267
|
+
async saveCodeVerifier() {
|
|
6268
|
+
throw new Error("StaticTokenAuthProvider does not support PKCE");
|
|
6269
|
+
}
|
|
6270
|
+
async codeVerifier() {
|
|
6271
|
+
throw new Error("StaticTokenAuthProvider does not support PKCE");
|
|
6272
|
+
}
|
|
6273
|
+
};
|
|
6274
|
+
var test2 = test$1.test.extend({
|
|
6275
|
+
/**
|
|
6276
|
+
* Create auth provider based on environment configuration
|
|
6277
|
+
*/
|
|
6278
|
+
// eslint-disable-next-line no-empty-pattern
|
|
6279
|
+
mcpAuthProvider: async ({}, use) => {
|
|
6280
|
+
const authConfig = getAuthConfigFromEnv();
|
|
6281
|
+
if (!authConfig) {
|
|
6282
|
+
await use(void 0);
|
|
6283
|
+
return;
|
|
6284
|
+
}
|
|
6285
|
+
if (authConfig.accessToken) {
|
|
6286
|
+
const provider = new StaticTokenAuthProvider(authConfig.accessToken);
|
|
6287
|
+
await use(provider);
|
|
6288
|
+
return;
|
|
6289
|
+
}
|
|
6290
|
+
if (authConfig.oauth) {
|
|
6291
|
+
const provider = createOAuthProvider(authConfig.oauth);
|
|
6292
|
+
await use(provider);
|
|
6293
|
+
return;
|
|
6294
|
+
}
|
|
6295
|
+
await use(void 0);
|
|
6296
|
+
}
|
|
6297
|
+
});
|
|
6298
|
+
function createOAuthProvider(oauthConfig) {
|
|
6299
|
+
if (!oauthConfig.authStatePath) {
|
|
6300
|
+
throw new Error(
|
|
6301
|
+
"OAuth configuration requires authStatePath. Use performOAuthSetup() in globalSetup to create auth state first."
|
|
6302
|
+
);
|
|
6303
|
+
}
|
|
6304
|
+
const providerConfig = {
|
|
6305
|
+
storagePath: oauthConfig.authStatePath,
|
|
6306
|
+
redirectUri: oauthConfig.redirectUri ?? "http://localhost:3000/oauth/callback",
|
|
6307
|
+
clientId: oauthConfig.clientId,
|
|
6308
|
+
clientSecret: oauthConfig.clientSecret
|
|
6309
|
+
};
|
|
6310
|
+
return new exports.PlaywrightOAuthClientProvider(providerConfig);
|
|
6311
|
+
}
|
|
6312
|
+
function getAuthConfigFromEnv() {
|
|
6313
|
+
const accessToken = process.env.MCP_ACCESS_TOKEN;
|
|
6314
|
+
if (accessToken) {
|
|
6315
|
+
return { accessToken };
|
|
6316
|
+
}
|
|
6317
|
+
const oauthServerUrl = process.env.MCP_OAUTH_SERVER_URL;
|
|
6318
|
+
const authStatePath = process.env.MCP_AUTH_STATE_PATH;
|
|
6319
|
+
if (oauthServerUrl || authStatePath) {
|
|
6320
|
+
return {
|
|
6321
|
+
oauth: {
|
|
6322
|
+
serverUrl: oauthServerUrl ?? "",
|
|
6323
|
+
authStatePath,
|
|
6324
|
+
clientId: process.env.MCP_OAUTH_CLIENT_ID,
|
|
6325
|
+
clientSecret: process.env.MCP_OAUTH_CLIENT_SECRET,
|
|
6326
|
+
scopes: process.env.MCP_OAUTH_SCOPES?.split(","),
|
|
6327
|
+
resource: process.env.MCP_OAUTH_RESOURCE
|
|
6328
|
+
}
|
|
6329
|
+
};
|
|
6330
|
+
}
|
|
6331
|
+
return void 0;
|
|
6332
|
+
}
|
|
6212
6333
|
var LLMHostConfigSchema = zod.z.object({
|
|
6213
6334
|
provider: zod.z.enum([
|
|
6214
6335
|
"openai",
|
|
@@ -6216,7 +6337,6 @@ var LLMHostConfigSchema = zod.z.object({
|
|
|
6216
6337
|
"azure",
|
|
6217
6338
|
"google",
|
|
6218
6339
|
"mistral",
|
|
6219
|
-
"ollama",
|
|
6220
6340
|
"deepseek",
|
|
6221
6341
|
"openrouter",
|
|
6222
6342
|
"xai",
|
|
@@ -6263,7 +6383,7 @@ var EvalExpectBlockSchema = zod.z.object({
|
|
|
6263
6383
|
reference: zod.z.unknown().optional(),
|
|
6264
6384
|
threshold: zod.z.number().min(0).max(1).optional(),
|
|
6265
6385
|
reps: zod.z.number().int().min(1).optional(),
|
|
6266
|
-
provider: zod.z.enum(["
|
|
6386
|
+
provider: zod.z.enum(["anthropic", "openai", "google"]).optional(),
|
|
6267
6387
|
model: zod.z.string().optional(),
|
|
6268
6388
|
apiKeyEnvVar: zod.z.string().optional(),
|
|
6269
6389
|
maxTokens: zod.z.number().int().positive().optional(),
|
|
@@ -6405,10 +6525,6 @@ async function loadModel(provider, model) {
|
|
|
6405
6525
|
const { azure } = await import('@ai-sdk/azure');
|
|
6406
6526
|
return azure(model);
|
|
6407
6527
|
}
|
|
6408
|
-
case "ollama": {
|
|
6409
|
-
const { ollama } = await import('@ai-sdk/ollama');
|
|
6410
|
-
return ollama(model);
|
|
6411
|
-
}
|
|
6412
6528
|
case "deepseek": {
|
|
6413
6529
|
const { deepseek } = await import('@ai-sdk/deepseek');
|
|
6414
6530
|
return deepseek(model);
|
|
@@ -6515,7 +6631,6 @@ var allProviders = [
|
|
|
6515
6631
|
"azure",
|
|
6516
6632
|
"google",
|
|
6517
6633
|
"mistral",
|
|
6518
|
-
"ollama",
|
|
6519
6634
|
"deepseek",
|
|
6520
6635
|
"openrouter",
|
|
6521
6636
|
"xai",
|
|
@@ -6543,7 +6658,6 @@ function getMissingDependencyMessage(provider) {
|
|
|
6543
6658
|
google: "npm install ai @ai-sdk/google",
|
|
6544
6659
|
azure: "npm install ai @ai-sdk/azure",
|
|
6545
6660
|
mistral: "npm install ai @ai-sdk/mistral",
|
|
6546
|
-
ollama: "npm install ai @ai-sdk/ollama",
|
|
6547
6661
|
deepseek: "npm install ai @ai-sdk/deepseek",
|
|
6548
6662
|
openrouter: "npm install ai @openrouter/ai-sdk-provider",
|
|
6549
6663
|
xai: "npm install ai @ai-sdk/xai",
|
|
@@ -6790,15 +6904,17 @@ async function runSingleIteration(evalCase, context, options) {
|
|
|
6790
6904
|
function isInfrastructureError(err) {
|
|
6791
6905
|
let name15;
|
|
6792
6906
|
let msg;
|
|
6907
|
+
let code = "";
|
|
6793
6908
|
if (err instanceof Error) {
|
|
6794
6909
|
name15 = err.name;
|
|
6795
6910
|
msg = err.message.toLowerCase();
|
|
6911
|
+
code = (err.code ?? "").toLowerCase();
|
|
6796
6912
|
} else if (typeof err === "string") {
|
|
6797
6913
|
msg = err.toLowerCase();
|
|
6798
6914
|
} else {
|
|
6799
6915
|
return false;
|
|
6800
6916
|
}
|
|
6801
|
-
return name15 === "AbortError" || msg.includes("econnreset") || msg.includes("etimedout") || msg.includes("econnrefused") || msg.includes("rate limit") || msg.includes("429") || msg.includes("503") || msg.includes("network");
|
|
6917
|
+
return name15 === "AbortError" || msg.includes("econnreset") || msg.includes("etimedout") || msg.includes("econnrefused") || msg.includes("rate limit") || msg.includes("429") || msg.includes("503") || msg.includes("network") || code.includes("econnreset") || code.includes("etimedout") || code.includes("econnrefused");
|
|
6802
6918
|
}
|
|
6803
6919
|
async function runEvalCase(evalCase, context, options = {}) {
|
|
6804
6920
|
const iterations = evalCase.iterations ?? 1;
|
|
@@ -6913,9 +7029,9 @@ async function runEvalDataset(options, context) {
|
|
|
6913
7029
|
const withIterations = evalCase.mode === "llm_host" && evalCase.iterations === void 0 && defaultLlmIterations !== void 0 ? { ...evalCase, iterations: defaultLlmIterations } : evalCase;
|
|
6914
7030
|
if (evalCase.mode === "llm_host") {
|
|
6915
7031
|
const effectiveIterations = withIterations.iterations ?? 1;
|
|
6916
|
-
if (effectiveIterations < 10) {
|
|
7032
|
+
if (effectiveIterations > 1 && effectiveIterations < 10) {
|
|
6917
7033
|
console.warn(
|
|
6918
|
-
`[mcp-server-tester] Eval case "${evalCase.id}"
|
|
7034
|
+
`[mcp-server-tester] Eval case "${evalCase.id}": running ${effectiveIterations} iterations in llm_host mode may not be statistically reliable. Consider using 10+ iterations for accuracy measurements you can trust.`
|
|
6919
7035
|
);
|
|
6920
7036
|
}
|
|
6921
7037
|
}
|
|
@@ -6963,6 +7079,16 @@ async function runEvalDataset(options, context) {
|
|
|
6963
7079
|
const baseline = await loadBaseline(baselineResultsFrom);
|
|
6964
7080
|
const baselinePassRate = baseline.total > 0 ? baseline.passed / baseline.total : 0;
|
|
6965
7081
|
const baselineMap = buildBaselinePassMap(baseline);
|
|
7082
|
+
const currentCaseIds = result.caseResults.map((cr) => cr.id);
|
|
7083
|
+
const unmatchedCount = currentCaseIds.filter(
|
|
7084
|
+
(id) => !baselineMap.has(id)
|
|
7085
|
+
).length;
|
|
7086
|
+
const unmatchedRatio = currentCaseIds.length > 0 ? unmatchedCount / currentCaseIds.length : 0;
|
|
7087
|
+
if (unmatchedRatio > 0.2) {
|
|
7088
|
+
console.warn(
|
|
7089
|
+
`[mcp-server-tester] Baseline comparison: ${unmatchedCount} of ${currentCaseIds.length} cases (${Math.round(unmatchedRatio * 100)}%) have no baseline entry. This may indicate the dataset structure has changed. Results for unmatched cases cannot be compared.`
|
|
7090
|
+
);
|
|
7091
|
+
}
|
|
6966
7092
|
for (const cr of result.caseResults) {
|
|
6967
7093
|
const baselinePass = baselineMap.get(cr.id);
|
|
6968
7094
|
if (baselinePass !== void 0) {
|
|
@@ -7272,6 +7398,7 @@ exports.loadEvalDataset = loadEvalDataset;
|
|
|
7272
7398
|
exports.loadEvalDatasetFromObject = loadEvalDatasetFromObject;
|
|
7273
7399
|
exports.loadTokens = loadTokens;
|
|
7274
7400
|
exports.loadTokensFromEnv = loadTokensFromEnv;
|
|
7401
|
+
exports.mcpAuthTest = test2;
|
|
7275
7402
|
exports.normalizeToolResponse = normalizeToolResponse;
|
|
7276
7403
|
exports.normalizeWhitespace = normalizeWhitespace;
|
|
7277
7404
|
exports.performClientCredentialsFlow = performClientCredentialsFlow;
|