@gleanwork/mcp-server-tester 1.0.0-beta.0 → 1.0.0-beta.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cli/index.js +14 -3
- package/dist/fixtures/mcp.d.ts +1 -1
- package/dist/fixtures/mcp.js +28 -7
- package/dist/fixtures/mcp.js.map +1 -1
- package/dist/index.cjs +147 -20
- package/dist/index.cjs.map +1 -1
- package/dist/index.d.cts +81 -49
- package/dist/index.d.ts +81 -49
- package/dist/index.js +147 -21
- package/dist/index.js.map +1 -1
- package/package.json +2 -1
package/dist/index.js
CHANGED
|
@@ -1809,11 +1809,11 @@ function parseNullableDef(def, refs) {
|
|
|
1809
1809
|
]
|
|
1810
1810
|
};
|
|
1811
1811
|
}
|
|
1812
|
-
const
|
|
1812
|
+
const base3 = parseDef(def.innerType._def, {
|
|
1813
1813
|
...refs,
|
|
1814
1814
|
currentPath: [...refs.currentPath, "anyOf", "0"]
|
|
1815
1815
|
});
|
|
1816
|
-
return
|
|
1816
|
+
return base3 && { anyOf: [base3, { type: "null" }] };
|
|
1817
1817
|
}
|
|
1818
1818
|
function parseNumberDef(def) {
|
|
1819
1819
|
const res = {
|
|
@@ -3133,6 +3133,7 @@ var StdioConfigSchema = z.object({
|
|
|
3133
3133
|
command: z.string().min(1, "command is required for stdio transport"),
|
|
3134
3134
|
args: z.array(z.string()).optional(),
|
|
3135
3135
|
cwd: z.string().optional(),
|
|
3136
|
+
env: z.record(z.string(), z.string()).optional(),
|
|
3136
3137
|
capabilities: MCPHostCapabilitiesSchema.optional(),
|
|
3137
3138
|
connectTimeoutMs: z.number().positive().optional(),
|
|
3138
3139
|
requestTimeoutMs: z.number().positive().optional(),
|
|
@@ -4379,7 +4380,7 @@ function escapeHtml(text) {
|
|
|
4379
4380
|
|
|
4380
4381
|
// package.json
|
|
4381
4382
|
var package_default = {
|
|
4382
|
-
version: "1.0.0-beta.
|
|
4383
|
+
version: "1.0.0-beta.2"};
|
|
4383
4384
|
|
|
4384
4385
|
// src/mcp/clientFactory.ts
|
|
4385
4386
|
function getRetryAfterDelayMs(err) {
|
|
@@ -4451,7 +4452,14 @@ async function createMCPClientForConfig(config, options) {
|
|
|
4451
4452
|
args: validatedConfig.args ?? [],
|
|
4452
4453
|
...validatedConfig.cwd && { cwd: validatedConfig.cwd },
|
|
4453
4454
|
// Suppress server stderr when quiet mode is enabled
|
|
4454
|
-
...validatedConfig.quiet && { stderr: "ignore" }
|
|
4455
|
+
...validatedConfig.quiet && { stderr: "ignore" },
|
|
4456
|
+
...validatedConfig.env && {
|
|
4457
|
+
env: Object.fromEntries(
|
|
4458
|
+
Object.entries({ ...process.env, ...validatedConfig.env }).filter(
|
|
4459
|
+
(entry) => entry[1] !== void 0
|
|
4460
|
+
)
|
|
4461
|
+
)
|
|
4462
|
+
}
|
|
4455
4463
|
});
|
|
4456
4464
|
debugClient("Connecting via stdio: %O", {
|
|
4457
4465
|
command: validatedConfig.command,
|
|
@@ -4590,7 +4598,10 @@ async function closeMCPClient(client) {
|
|
|
4590
4598
|
try {
|
|
4591
4599
|
await client.close();
|
|
4592
4600
|
} catch (error) {
|
|
4593
|
-
|
|
4601
|
+
debugClient(
|
|
4602
|
+
"Error closing client: %s",
|
|
4603
|
+
error instanceof Error ? error.message : String(error)
|
|
4604
|
+
);
|
|
4594
4605
|
throw error;
|
|
4595
4606
|
} finally {
|
|
4596
4607
|
const agent = agentRegistry.get(client);
|
|
@@ -5120,7 +5131,7 @@ function validateToolCalls(response, expectation) {
|
|
|
5120
5131
|
).length;
|
|
5121
5132
|
const recall = requiredCalls.length > 0 ? calledRequiredCount / requiredCalls.length : 1;
|
|
5122
5133
|
const allowedNames = new Set(expectation.calls.map((c) => c.name));
|
|
5123
|
-
const precision = actual.length > 0
|
|
5134
|
+
const precision = actual.length > 0 ? actual.filter((c) => allowedNames.has(c.name)).length / actual.length : 1;
|
|
5124
5135
|
const metrics = { precision, recall };
|
|
5125
5136
|
const order = expectation.order ?? "any";
|
|
5126
5137
|
if (order === "strict") {
|
|
@@ -5523,9 +5534,8 @@ Validation errors: ${JSON.stringify(validation.error.issues)}`
|
|
|
5523
5534
|
|
|
5524
5535
|
// src/judge/judgeClient.ts
|
|
5525
5536
|
function createJudge(config = {}) {
|
|
5526
|
-
const provider = config.provider ?? "
|
|
5537
|
+
const provider = config.provider ?? "anthropic";
|
|
5527
5538
|
switch (provider) {
|
|
5528
|
-
case "claude":
|
|
5529
5539
|
case "anthropic":
|
|
5530
5540
|
return createClaudeAgentJudge(config);
|
|
5531
5541
|
case "openai":
|
|
@@ -5878,7 +5888,18 @@ function applySanitizers(value, sanitizers) {
|
|
|
5878
5888
|
continue;
|
|
5879
5889
|
}
|
|
5880
5890
|
if (isRegexSanitizer(sanitizer)) {
|
|
5881
|
-
|
|
5891
|
+
let pattern;
|
|
5892
|
+
if (sanitizer.pattern instanceof RegExp) {
|
|
5893
|
+
pattern = sanitizer.pattern;
|
|
5894
|
+
} else {
|
|
5895
|
+
try {
|
|
5896
|
+
pattern = new RegExp(sanitizer.pattern, "g");
|
|
5897
|
+
} catch {
|
|
5898
|
+
throw new Error(
|
|
5899
|
+
`toMatchToolSnapshot: invalid regex pattern "${sanitizer.pattern}" in sanitizer`
|
|
5900
|
+
);
|
|
5901
|
+
}
|
|
5902
|
+
}
|
|
5882
5903
|
const replacement = sanitizer.replacement ?? "[SANITIZED]";
|
|
5883
5904
|
result = result.replace(pattern, replacement);
|
|
5884
5905
|
continue;
|
|
@@ -6182,6 +6203,106 @@ var test = test$1.extend({
|
|
|
6182
6203
|
await use(api);
|
|
6183
6204
|
}
|
|
6184
6205
|
});
|
|
6206
|
+
|
|
6207
|
+
// src/fixtures/mcpAuth.ts
|
|
6208
|
+
init_oauthClientProvider();
|
|
6209
|
+
var StaticTokenAuthProvider = class {
|
|
6210
|
+
accessToken;
|
|
6211
|
+
constructor(accessToken) {
|
|
6212
|
+
this.accessToken = accessToken;
|
|
6213
|
+
}
|
|
6214
|
+
get redirectUrl() {
|
|
6215
|
+
throw new Error("StaticTokenAuthProvider does not support OAuth redirects");
|
|
6216
|
+
}
|
|
6217
|
+
get clientMetadata() {
|
|
6218
|
+
return {
|
|
6219
|
+
redirect_uris: [],
|
|
6220
|
+
token_endpoint_auth_method: "none",
|
|
6221
|
+
grant_types: [],
|
|
6222
|
+
response_types: [],
|
|
6223
|
+
client_name: "@gleanwork/mcp-server-tester"
|
|
6224
|
+
};
|
|
6225
|
+
}
|
|
6226
|
+
async clientInformation() {
|
|
6227
|
+
return void 0;
|
|
6228
|
+
}
|
|
6229
|
+
async tokens() {
|
|
6230
|
+
return {
|
|
6231
|
+
access_token: this.accessToken,
|
|
6232
|
+
token_type: "Bearer"
|
|
6233
|
+
};
|
|
6234
|
+
}
|
|
6235
|
+
async saveTokens() {
|
|
6236
|
+
}
|
|
6237
|
+
async redirectToAuthorization() {
|
|
6238
|
+
throw new Error("StaticTokenAuthProvider does not support OAuth redirects");
|
|
6239
|
+
}
|
|
6240
|
+
async saveCodeVerifier() {
|
|
6241
|
+
throw new Error("StaticTokenAuthProvider does not support PKCE");
|
|
6242
|
+
}
|
|
6243
|
+
async codeVerifier() {
|
|
6244
|
+
throw new Error("StaticTokenAuthProvider does not support PKCE");
|
|
6245
|
+
}
|
|
6246
|
+
};
|
|
6247
|
+
var test2 = test$1.extend({
|
|
6248
|
+
/**
|
|
6249
|
+
* Create auth provider based on environment configuration
|
|
6250
|
+
*/
|
|
6251
|
+
// eslint-disable-next-line no-empty-pattern
|
|
6252
|
+
mcpAuthProvider: async ({}, use) => {
|
|
6253
|
+
const authConfig = getAuthConfigFromEnv();
|
|
6254
|
+
if (!authConfig) {
|
|
6255
|
+
await use(void 0);
|
|
6256
|
+
return;
|
|
6257
|
+
}
|
|
6258
|
+
if (authConfig.accessToken) {
|
|
6259
|
+
const provider = new StaticTokenAuthProvider(authConfig.accessToken);
|
|
6260
|
+
await use(provider);
|
|
6261
|
+
return;
|
|
6262
|
+
}
|
|
6263
|
+
if (authConfig.oauth) {
|
|
6264
|
+
const provider = createOAuthProvider(authConfig.oauth);
|
|
6265
|
+
await use(provider);
|
|
6266
|
+
return;
|
|
6267
|
+
}
|
|
6268
|
+
await use(void 0);
|
|
6269
|
+
}
|
|
6270
|
+
});
|
|
6271
|
+
function createOAuthProvider(oauthConfig) {
|
|
6272
|
+
if (!oauthConfig.authStatePath) {
|
|
6273
|
+
throw new Error(
|
|
6274
|
+
"OAuth configuration requires authStatePath. Use performOAuthSetup() in globalSetup to create auth state first."
|
|
6275
|
+
);
|
|
6276
|
+
}
|
|
6277
|
+
const providerConfig = {
|
|
6278
|
+
storagePath: oauthConfig.authStatePath,
|
|
6279
|
+
redirectUri: oauthConfig.redirectUri ?? "http://localhost:3000/oauth/callback",
|
|
6280
|
+
clientId: oauthConfig.clientId,
|
|
6281
|
+
clientSecret: oauthConfig.clientSecret
|
|
6282
|
+
};
|
|
6283
|
+
return new PlaywrightOAuthClientProvider(providerConfig);
|
|
6284
|
+
}
|
|
6285
|
+
function getAuthConfigFromEnv() {
|
|
6286
|
+
const accessToken = process.env.MCP_ACCESS_TOKEN;
|
|
6287
|
+
if (accessToken) {
|
|
6288
|
+
return { accessToken };
|
|
6289
|
+
}
|
|
6290
|
+
const oauthServerUrl = process.env.MCP_OAUTH_SERVER_URL;
|
|
6291
|
+
const authStatePath = process.env.MCP_AUTH_STATE_PATH;
|
|
6292
|
+
if (oauthServerUrl || authStatePath) {
|
|
6293
|
+
return {
|
|
6294
|
+
oauth: {
|
|
6295
|
+
serverUrl: oauthServerUrl ?? "",
|
|
6296
|
+
authStatePath,
|
|
6297
|
+
clientId: process.env.MCP_OAUTH_CLIENT_ID,
|
|
6298
|
+
clientSecret: process.env.MCP_OAUTH_CLIENT_SECRET,
|
|
6299
|
+
scopes: process.env.MCP_OAUTH_SCOPES?.split(","),
|
|
6300
|
+
resource: process.env.MCP_OAUTH_RESOURCE
|
|
6301
|
+
}
|
|
6302
|
+
};
|
|
6303
|
+
}
|
|
6304
|
+
return void 0;
|
|
6305
|
+
}
|
|
6185
6306
|
var LLMHostConfigSchema = z.object({
|
|
6186
6307
|
provider: z.enum([
|
|
6187
6308
|
"openai",
|
|
@@ -6189,7 +6310,6 @@ var LLMHostConfigSchema = z.object({
|
|
|
6189
6310
|
"azure",
|
|
6190
6311
|
"google",
|
|
6191
6312
|
"mistral",
|
|
6192
|
-
"ollama",
|
|
6193
6313
|
"deepseek",
|
|
6194
6314
|
"openrouter",
|
|
6195
6315
|
"xai",
|
|
@@ -6236,7 +6356,7 @@ var EvalExpectBlockSchema = z.object({
|
|
|
6236
6356
|
reference: z.unknown().optional(),
|
|
6237
6357
|
threshold: z.number().min(0).max(1).optional(),
|
|
6238
6358
|
reps: z.number().int().min(1).optional(),
|
|
6239
|
-
provider: z.enum(["
|
|
6359
|
+
provider: z.enum(["anthropic", "openai", "google"]).optional(),
|
|
6240
6360
|
model: z.string().optional(),
|
|
6241
6361
|
apiKeyEnvVar: z.string().optional(),
|
|
6242
6362
|
maxTokens: z.number().int().positive().optional(),
|
|
@@ -6378,10 +6498,6 @@ async function loadModel(provider, model) {
|
|
|
6378
6498
|
const { azure } = await import('@ai-sdk/azure');
|
|
6379
6499
|
return azure(model);
|
|
6380
6500
|
}
|
|
6381
|
-
case "ollama": {
|
|
6382
|
-
const { ollama } = await import('@ai-sdk/ollama');
|
|
6383
|
-
return ollama(model);
|
|
6384
|
-
}
|
|
6385
6501
|
case "deepseek": {
|
|
6386
6502
|
const { deepseek } = await import('@ai-sdk/deepseek');
|
|
6387
6503
|
return deepseek(model);
|
|
@@ -6488,7 +6604,6 @@ var allProviders = [
|
|
|
6488
6604
|
"azure",
|
|
6489
6605
|
"google",
|
|
6490
6606
|
"mistral",
|
|
6491
|
-
"ollama",
|
|
6492
6607
|
"deepseek",
|
|
6493
6608
|
"openrouter",
|
|
6494
6609
|
"xai",
|
|
@@ -6516,7 +6631,6 @@ function getMissingDependencyMessage(provider) {
|
|
|
6516
6631
|
google: "npm install ai @ai-sdk/google",
|
|
6517
6632
|
azure: "npm install ai @ai-sdk/azure",
|
|
6518
6633
|
mistral: "npm install ai @ai-sdk/mistral",
|
|
6519
|
-
ollama: "npm install ai @ai-sdk/ollama",
|
|
6520
6634
|
deepseek: "npm install ai @ai-sdk/deepseek",
|
|
6521
6635
|
openrouter: "npm install ai @openrouter/ai-sdk-provider",
|
|
6522
6636
|
xai: "npm install ai @ai-sdk/xai",
|
|
@@ -6763,15 +6877,17 @@ async function runSingleIteration(evalCase, context, options) {
|
|
|
6763
6877
|
function isInfrastructureError(err) {
|
|
6764
6878
|
let name15;
|
|
6765
6879
|
let msg;
|
|
6880
|
+
let code = "";
|
|
6766
6881
|
if (err instanceof Error) {
|
|
6767
6882
|
name15 = err.name;
|
|
6768
6883
|
msg = err.message.toLowerCase();
|
|
6884
|
+
code = (err.code ?? "").toLowerCase();
|
|
6769
6885
|
} else if (typeof err === "string") {
|
|
6770
6886
|
msg = err.toLowerCase();
|
|
6771
6887
|
} else {
|
|
6772
6888
|
return false;
|
|
6773
6889
|
}
|
|
6774
|
-
return name15 === "AbortError" || msg.includes("econnreset") || msg.includes("etimedout") || msg.includes("econnrefused") || msg.includes("rate limit") || msg.includes("429") || msg.includes("503") || msg.includes("network");
|
|
6890
|
+
return name15 === "AbortError" || msg.includes("econnreset") || msg.includes("etimedout") || msg.includes("econnrefused") || msg.includes("rate limit") || msg.includes("429") || msg.includes("503") || msg.includes("network") || code.includes("econnreset") || code.includes("etimedout") || code.includes("econnrefused");
|
|
6775
6891
|
}
|
|
6776
6892
|
async function runEvalCase(evalCase, context, options = {}) {
|
|
6777
6893
|
const iterations = evalCase.iterations ?? 1;
|
|
@@ -6886,9 +7002,9 @@ async function runEvalDataset(options, context) {
|
|
|
6886
7002
|
const withIterations = evalCase.mode === "llm_host" && evalCase.iterations === void 0 && defaultLlmIterations !== void 0 ? { ...evalCase, iterations: defaultLlmIterations } : evalCase;
|
|
6887
7003
|
if (evalCase.mode === "llm_host") {
|
|
6888
7004
|
const effectiveIterations = withIterations.iterations ?? 1;
|
|
6889
|
-
if (effectiveIterations < 10) {
|
|
7005
|
+
if (effectiveIterations > 1 && effectiveIterations < 10) {
|
|
6890
7006
|
console.warn(
|
|
6891
|
-
`[mcp-server-tester] Eval case "${evalCase.id}"
|
|
7007
|
+
`[mcp-server-tester] Eval case "${evalCase.id}": running ${effectiveIterations} iterations in llm_host mode may not be statistically reliable. Consider using 10+ iterations for accuracy measurements you can trust.`
|
|
6892
7008
|
);
|
|
6893
7009
|
}
|
|
6894
7010
|
}
|
|
@@ -6936,6 +7052,16 @@ async function runEvalDataset(options, context) {
|
|
|
6936
7052
|
const baseline = await loadBaseline(baselineResultsFrom);
|
|
6937
7053
|
const baselinePassRate = baseline.total > 0 ? baseline.passed / baseline.total : 0;
|
|
6938
7054
|
const baselineMap = buildBaselinePassMap(baseline);
|
|
7055
|
+
const currentCaseIds = result.caseResults.map((cr) => cr.id);
|
|
7056
|
+
const unmatchedCount = currentCaseIds.filter(
|
|
7057
|
+
(id) => !baselineMap.has(id)
|
|
7058
|
+
).length;
|
|
7059
|
+
const unmatchedRatio = currentCaseIds.length > 0 ? unmatchedCount / currentCaseIds.length : 0;
|
|
7060
|
+
if (unmatchedRatio > 0.2) {
|
|
7061
|
+
console.warn(
|
|
7062
|
+
`[mcp-server-tester] Baseline comparison: ${unmatchedCount} of ${currentCaseIds.length} cases (${Math.round(unmatchedRatio * 100)}%) have no baseline entry. This may indicate the dataset structure has changed. Results for unmatched cases cannot be compared.`
|
|
7063
|
+
);
|
|
7064
|
+
}
|
|
6939
7065
|
for (const cr of result.caseResults) {
|
|
6940
7066
|
const baselinePass = baselineMap.get(cr.id);
|
|
6941
7067
|
if (baselinePass !== void 0) {
|
|
@@ -7212,6 +7338,6 @@ function formatCapabilities(capabilities) {
|
|
|
7212
7338
|
return parts.length > 0 ? parts.join(", ") : "none declared";
|
|
7213
7339
|
}
|
|
7214
7340
|
|
|
7215
|
-
export { BUILT_IN_RUBRICS, CLIOAuthClient, DiscoveryError, ENV_VAR_NAMES, EvalCaseSchema, EvalDatasetSchema, MCPConfigSchema, MCP_PROTOCOL_VERSION, PlaywrightOAuthClientProvider, SnapshotSanitizers, closeMCPClient, createJudge, createMCPClientForConfig, createMCPFixture, createTokenAuthHeaders, discoverAuthorizationServer, discoverProtectedResource, expect, extractText, getMissingDependencyMessage, getResponseSizeBytes, hasValidTokens, injectTokens, isBuiltInRubric, isHttpConfig, isProviderAvailable, isStdioConfig, isTokenExpired, isTokenExpiringSoon, loadBaseline, loadEvalDataset, loadEvalDatasetFromObject, loadTokens, loadTokensFromEnv, normalizeToolResponse, normalizeWhitespace, performClientCredentialsFlow, performOAuthSetup, performOAuthSetupIfNeeded, resolveRubric, runConformanceChecks, runEvalCase, runEvalDataset, runServerComparison, saveBaseline, simulateLLMHost, test, validateAccessToken, validateError, validateEvalCase, validateEvalDataset, validateJudge, validateMCPConfig, validatePattern, validateResponse, validateSchema, validateSize, validateText, validateToolCallCount, validateToolCalls };
|
|
7341
|
+
export { BUILT_IN_RUBRICS, CLIOAuthClient, DiscoveryError, ENV_VAR_NAMES, EvalCaseSchema, EvalDatasetSchema, MCPConfigSchema, MCP_PROTOCOL_VERSION, PlaywrightOAuthClientProvider, SnapshotSanitizers, closeMCPClient, createJudge, createMCPClientForConfig, createMCPFixture, createTokenAuthHeaders, discoverAuthorizationServer, discoverProtectedResource, expect, extractText, getMissingDependencyMessage, getResponseSizeBytes, hasValidTokens, injectTokens, isBuiltInRubric, isHttpConfig, isProviderAvailable, isStdioConfig, isTokenExpired, isTokenExpiringSoon, loadBaseline, loadEvalDataset, loadEvalDatasetFromObject, loadTokens, loadTokensFromEnv, test2 as mcpAuthTest, normalizeToolResponse, normalizeWhitespace, performClientCredentialsFlow, performOAuthSetup, performOAuthSetupIfNeeded, resolveRubric, runConformanceChecks, runEvalCase, runEvalDataset, runServerComparison, saveBaseline, simulateLLMHost, test, validateAccessToken, validateError, validateEvalCase, validateEvalDataset, validateJudge, validateMCPConfig, validatePattern, validateResponse, validateSchema, validateSize, validateText, validateToolCallCount, validateToolCalls };
|
|
7216
7342
|
//# sourceMappingURL=index.js.map
|
|
7217
7343
|
//# sourceMappingURL=index.js.map
|