@gleanwork/mcp-server-tester 1.0.0-beta.0 → 1.0.0-beta.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -1809,11 +1809,11 @@ function parseNullableDef(def, refs) {
1809
1809
  ]
1810
1810
  };
1811
1811
  }
1812
- const base2 = parseDef(def.innerType._def, {
1812
+ const base3 = parseDef(def.innerType._def, {
1813
1813
  ...refs,
1814
1814
  currentPath: [...refs.currentPath, "anyOf", "0"]
1815
1815
  });
1816
- return base2 && { anyOf: [base2, { type: "null" }] };
1816
+ return base3 && { anyOf: [base3, { type: "null" }] };
1817
1817
  }
1818
1818
  function parseNumberDef(def) {
1819
1819
  const res = {
@@ -3133,6 +3133,7 @@ var StdioConfigSchema = z.object({
3133
3133
  command: z.string().min(1, "command is required for stdio transport"),
3134
3134
  args: z.array(z.string()).optional(),
3135
3135
  cwd: z.string().optional(),
3136
+ env: z.record(z.string(), z.string()).optional(),
3136
3137
  capabilities: MCPHostCapabilitiesSchema.optional(),
3137
3138
  connectTimeoutMs: z.number().positive().optional(),
3138
3139
  requestTimeoutMs: z.number().positive().optional(),
@@ -4379,7 +4380,7 @@ function escapeHtml(text) {
4379
4380
 
4380
4381
  // package.json
4381
4382
  var package_default = {
4382
- version: "1.0.0-beta.0"};
4383
+ version: "1.0.0-beta.2"};
4383
4384
 
4384
4385
  // src/mcp/clientFactory.ts
4385
4386
  function getRetryAfterDelayMs(err) {
@@ -4451,7 +4452,14 @@ async function createMCPClientForConfig(config, options) {
4451
4452
  args: validatedConfig.args ?? [],
4452
4453
  ...validatedConfig.cwd && { cwd: validatedConfig.cwd },
4453
4454
  // Suppress server stderr when quiet mode is enabled
4454
- ...validatedConfig.quiet && { stderr: "ignore" }
4455
+ ...validatedConfig.quiet && { stderr: "ignore" },
4456
+ ...validatedConfig.env && {
4457
+ env: Object.fromEntries(
4458
+ Object.entries({ ...process.env, ...validatedConfig.env }).filter(
4459
+ (entry) => entry[1] !== void 0
4460
+ )
4461
+ )
4462
+ }
4455
4463
  });
4456
4464
  debugClient("Connecting via stdio: %O", {
4457
4465
  command: validatedConfig.command,
@@ -4590,7 +4598,10 @@ async function closeMCPClient(client) {
4590
4598
  try {
4591
4599
  await client.close();
4592
4600
  } catch (error) {
4593
- console.error("[MCP] Error closing client:", error);
4601
+ debugClient(
4602
+ "Error closing client: %s",
4603
+ error instanceof Error ? error.message : String(error)
4604
+ );
4594
4605
  throw error;
4595
4606
  } finally {
4596
4607
  const agent = agentRegistry.get(client);
@@ -5120,7 +5131,7 @@ function validateToolCalls(response, expectation) {
5120
5131
  ).length;
5121
5132
  const recall = requiredCalls.length > 0 ? calledRequiredCount / requiredCalls.length : 1;
5122
5133
  const allowedNames = new Set(expectation.calls.map((c) => c.name));
5123
- const precision = actual.length > 0 && expectation.exclusive === true ? actual.filter((c) => allowedNames.has(c.name)).length / actual.length : 1;
5134
+ const precision = actual.length > 0 ? actual.filter((c) => allowedNames.has(c.name)).length / actual.length : 1;
5124
5135
  const metrics = { precision, recall };
5125
5136
  const order = expectation.order ?? "any";
5126
5137
  if (order === "strict") {
@@ -5523,9 +5534,8 @@ Validation errors: ${JSON.stringify(validation.error.issues)}`
5523
5534
 
5524
5535
  // src/judge/judgeClient.ts
5525
5536
  function createJudge(config = {}) {
5526
- const provider = config.provider ?? "claude";
5537
+ const provider = config.provider ?? "anthropic";
5527
5538
  switch (provider) {
5528
- case "claude":
5529
5539
  case "anthropic":
5530
5540
  return createClaudeAgentJudge(config);
5531
5541
  case "openai":
@@ -5878,7 +5888,18 @@ function applySanitizers(value, sanitizers) {
5878
5888
  continue;
5879
5889
  }
5880
5890
  if (isRegexSanitizer(sanitizer)) {
5881
- const pattern = sanitizer.pattern instanceof RegExp ? sanitizer.pattern : new RegExp(sanitizer.pattern, "g");
5891
+ let pattern;
5892
+ if (sanitizer.pattern instanceof RegExp) {
5893
+ pattern = sanitizer.pattern;
5894
+ } else {
5895
+ try {
5896
+ pattern = new RegExp(sanitizer.pattern, "g");
5897
+ } catch {
5898
+ throw new Error(
5899
+ `toMatchToolSnapshot: invalid regex pattern "${sanitizer.pattern}" in sanitizer`
5900
+ );
5901
+ }
5902
+ }
5882
5903
  const replacement = sanitizer.replacement ?? "[SANITIZED]";
5883
5904
  result = result.replace(pattern, replacement);
5884
5905
  continue;
@@ -6182,6 +6203,106 @@ var test = test$1.extend({
6182
6203
  await use(api);
6183
6204
  }
6184
6205
  });
6206
+
6207
+ // src/fixtures/mcpAuth.ts
6208
+ init_oauthClientProvider();
6209
+ var StaticTokenAuthProvider = class {
6210
+ accessToken;
6211
+ constructor(accessToken) {
6212
+ this.accessToken = accessToken;
6213
+ }
6214
+ get redirectUrl() {
6215
+ throw new Error("StaticTokenAuthProvider does not support OAuth redirects");
6216
+ }
6217
+ get clientMetadata() {
6218
+ return {
6219
+ redirect_uris: [],
6220
+ token_endpoint_auth_method: "none",
6221
+ grant_types: [],
6222
+ response_types: [],
6223
+ client_name: "@gleanwork/mcp-server-tester"
6224
+ };
6225
+ }
6226
+ async clientInformation() {
6227
+ return void 0;
6228
+ }
6229
+ async tokens() {
6230
+ return {
6231
+ access_token: this.accessToken,
6232
+ token_type: "Bearer"
6233
+ };
6234
+ }
6235
+ async saveTokens() {
6236
+ }
6237
+ async redirectToAuthorization() {
6238
+ throw new Error("StaticTokenAuthProvider does not support OAuth redirects");
6239
+ }
6240
+ async saveCodeVerifier() {
6241
+ throw new Error("StaticTokenAuthProvider does not support PKCE");
6242
+ }
6243
+ async codeVerifier() {
6244
+ throw new Error("StaticTokenAuthProvider does not support PKCE");
6245
+ }
6246
+ };
6247
+ var test2 = test$1.extend({
6248
+ /**
6249
+ * Create auth provider based on environment configuration
6250
+ */
6251
+ // eslint-disable-next-line no-empty-pattern
6252
+ mcpAuthProvider: async ({}, use) => {
6253
+ const authConfig = getAuthConfigFromEnv();
6254
+ if (!authConfig) {
6255
+ await use(void 0);
6256
+ return;
6257
+ }
6258
+ if (authConfig.accessToken) {
6259
+ const provider = new StaticTokenAuthProvider(authConfig.accessToken);
6260
+ await use(provider);
6261
+ return;
6262
+ }
6263
+ if (authConfig.oauth) {
6264
+ const provider = createOAuthProvider(authConfig.oauth);
6265
+ await use(provider);
6266
+ return;
6267
+ }
6268
+ await use(void 0);
6269
+ }
6270
+ });
6271
+ function createOAuthProvider(oauthConfig) {
6272
+ if (!oauthConfig.authStatePath) {
6273
+ throw new Error(
6274
+ "OAuth configuration requires authStatePath. Use performOAuthSetup() in globalSetup to create auth state first."
6275
+ );
6276
+ }
6277
+ const providerConfig = {
6278
+ storagePath: oauthConfig.authStatePath,
6279
+ redirectUri: oauthConfig.redirectUri ?? "http://localhost:3000/oauth/callback",
6280
+ clientId: oauthConfig.clientId,
6281
+ clientSecret: oauthConfig.clientSecret
6282
+ };
6283
+ return new PlaywrightOAuthClientProvider(providerConfig);
6284
+ }
6285
+ function getAuthConfigFromEnv() {
6286
+ const accessToken = process.env.MCP_ACCESS_TOKEN;
6287
+ if (accessToken) {
6288
+ return { accessToken };
6289
+ }
6290
+ const oauthServerUrl = process.env.MCP_OAUTH_SERVER_URL;
6291
+ const authStatePath = process.env.MCP_AUTH_STATE_PATH;
6292
+ if (oauthServerUrl || authStatePath) {
6293
+ return {
6294
+ oauth: {
6295
+ serverUrl: oauthServerUrl ?? "",
6296
+ authStatePath,
6297
+ clientId: process.env.MCP_OAUTH_CLIENT_ID,
6298
+ clientSecret: process.env.MCP_OAUTH_CLIENT_SECRET,
6299
+ scopes: process.env.MCP_OAUTH_SCOPES?.split(","),
6300
+ resource: process.env.MCP_OAUTH_RESOURCE
6301
+ }
6302
+ };
6303
+ }
6304
+ return void 0;
6305
+ }
6185
6306
  var LLMHostConfigSchema = z.object({
6186
6307
  provider: z.enum([
6187
6308
  "openai",
@@ -6189,7 +6310,6 @@ var LLMHostConfigSchema = z.object({
6189
6310
  "azure",
6190
6311
  "google",
6191
6312
  "mistral",
6192
- "ollama",
6193
6313
  "deepseek",
6194
6314
  "openrouter",
6195
6315
  "xai",
@@ -6236,7 +6356,7 @@ var EvalExpectBlockSchema = z.object({
6236
6356
  reference: z.unknown().optional(),
6237
6357
  threshold: z.number().min(0).max(1).optional(),
6238
6358
  reps: z.number().int().min(1).optional(),
6239
- provider: z.enum(["claude", "anthropic", "openai", "google"]).optional(),
6359
+ provider: z.enum(["anthropic", "openai", "google"]).optional(),
6240
6360
  model: z.string().optional(),
6241
6361
  apiKeyEnvVar: z.string().optional(),
6242
6362
  maxTokens: z.number().int().positive().optional(),
@@ -6378,10 +6498,6 @@ async function loadModel(provider, model) {
6378
6498
  const { azure } = await import('@ai-sdk/azure');
6379
6499
  return azure(model);
6380
6500
  }
6381
- case "ollama": {
6382
- const { ollama } = await import('@ai-sdk/ollama');
6383
- return ollama(model);
6384
- }
6385
6501
  case "deepseek": {
6386
6502
  const { deepseek } = await import('@ai-sdk/deepseek');
6387
6503
  return deepseek(model);
@@ -6488,7 +6604,6 @@ var allProviders = [
6488
6604
  "azure",
6489
6605
  "google",
6490
6606
  "mistral",
6491
- "ollama",
6492
6607
  "deepseek",
6493
6608
  "openrouter",
6494
6609
  "xai",
@@ -6516,7 +6631,6 @@ function getMissingDependencyMessage(provider) {
6516
6631
  google: "npm install ai @ai-sdk/google",
6517
6632
  azure: "npm install ai @ai-sdk/azure",
6518
6633
  mistral: "npm install ai @ai-sdk/mistral",
6519
- ollama: "npm install ai @ai-sdk/ollama",
6520
6634
  deepseek: "npm install ai @ai-sdk/deepseek",
6521
6635
  openrouter: "npm install ai @openrouter/ai-sdk-provider",
6522
6636
  xai: "npm install ai @ai-sdk/xai",
@@ -6763,15 +6877,17 @@ async function runSingleIteration(evalCase, context, options) {
6763
6877
  function isInfrastructureError(err) {
6764
6878
  let name15;
6765
6879
  let msg;
6880
+ let code = "";
6766
6881
  if (err instanceof Error) {
6767
6882
  name15 = err.name;
6768
6883
  msg = err.message.toLowerCase();
6884
+ code = (err.code ?? "").toLowerCase();
6769
6885
  } else if (typeof err === "string") {
6770
6886
  msg = err.toLowerCase();
6771
6887
  } else {
6772
6888
  return false;
6773
6889
  }
6774
- return name15 === "AbortError" || msg.includes("econnreset") || msg.includes("etimedout") || msg.includes("econnrefused") || msg.includes("rate limit") || msg.includes("429") || msg.includes("503") || msg.includes("network");
6890
+ return name15 === "AbortError" || msg.includes("econnreset") || msg.includes("etimedout") || msg.includes("econnrefused") || msg.includes("rate limit") || msg.includes("429") || msg.includes("503") || msg.includes("network") || code.includes("econnreset") || code.includes("etimedout") || code.includes("econnrefused");
6775
6891
  }
6776
6892
  async function runEvalCase(evalCase, context, options = {}) {
6777
6893
  const iterations = evalCase.iterations ?? 1;
@@ -6886,9 +7002,9 @@ async function runEvalDataset(options, context) {
6886
7002
  const withIterations = evalCase.mode === "llm_host" && evalCase.iterations === void 0 && defaultLlmIterations !== void 0 ? { ...evalCase, iterations: defaultLlmIterations } : evalCase;
6887
7003
  if (evalCase.mode === "llm_host") {
6888
7004
  const effectiveIterations = withIterations.iterations ?? 1;
6889
- if (effectiveIterations < 10) {
7005
+ if (effectiveIterations > 1 && effectiveIterations < 10) {
6890
7006
  console.warn(
6891
- `[mcp-server-tester] Eval case "${evalCase.id}" uses llm_host mode with only ${effectiveIterations} iteration(s). The evals guide recommends >= 10 iterations. See docs/evals-guide.md for guidance on statistical reliability.`
7007
+ `[mcp-server-tester] Eval case "${evalCase.id}": running ${effectiveIterations} iterations in llm_host mode may not be statistically reliable. Consider using 10+ iterations for accuracy measurements you can trust.`
6892
7008
  );
6893
7009
  }
6894
7010
  }
@@ -6936,6 +7052,16 @@ async function runEvalDataset(options, context) {
6936
7052
  const baseline = await loadBaseline(baselineResultsFrom);
6937
7053
  const baselinePassRate = baseline.total > 0 ? baseline.passed / baseline.total : 0;
6938
7054
  const baselineMap = buildBaselinePassMap(baseline);
7055
+ const currentCaseIds = result.caseResults.map((cr) => cr.id);
7056
+ const unmatchedCount = currentCaseIds.filter(
7057
+ (id) => !baselineMap.has(id)
7058
+ ).length;
7059
+ const unmatchedRatio = currentCaseIds.length > 0 ? unmatchedCount / currentCaseIds.length : 0;
7060
+ if (unmatchedRatio > 0.2) {
7061
+ console.warn(
7062
+ `[mcp-server-tester] Baseline comparison: ${unmatchedCount} of ${currentCaseIds.length} cases (${Math.round(unmatchedRatio * 100)}%) have no baseline entry. This may indicate the dataset structure has changed. Results for unmatched cases cannot be compared.`
7063
+ );
7064
+ }
6939
7065
  for (const cr of result.caseResults) {
6940
7066
  const baselinePass = baselineMap.get(cr.id);
6941
7067
  if (baselinePass !== void 0) {
@@ -7212,6 +7338,6 @@ function formatCapabilities(capabilities) {
7212
7338
  return parts.length > 0 ? parts.join(", ") : "none declared";
7213
7339
  }
7214
7340
 
7215
- export { BUILT_IN_RUBRICS, CLIOAuthClient, DiscoveryError, ENV_VAR_NAMES, EvalCaseSchema, EvalDatasetSchema, MCPConfigSchema, MCP_PROTOCOL_VERSION, PlaywrightOAuthClientProvider, SnapshotSanitizers, closeMCPClient, createJudge, createMCPClientForConfig, createMCPFixture, createTokenAuthHeaders, discoverAuthorizationServer, discoverProtectedResource, expect, extractText, getMissingDependencyMessage, getResponseSizeBytes, hasValidTokens, injectTokens, isBuiltInRubric, isHttpConfig, isProviderAvailable, isStdioConfig, isTokenExpired, isTokenExpiringSoon, loadBaseline, loadEvalDataset, loadEvalDatasetFromObject, loadTokens, loadTokensFromEnv, normalizeToolResponse, normalizeWhitespace, performClientCredentialsFlow, performOAuthSetup, performOAuthSetupIfNeeded, resolveRubric, runConformanceChecks, runEvalCase, runEvalDataset, runServerComparison, saveBaseline, simulateLLMHost, test, validateAccessToken, validateError, validateEvalCase, validateEvalDataset, validateJudge, validateMCPConfig, validatePattern, validateResponse, validateSchema, validateSize, validateText, validateToolCallCount, validateToolCalls };
7341
+ export { BUILT_IN_RUBRICS, CLIOAuthClient, DiscoveryError, ENV_VAR_NAMES, EvalCaseSchema, EvalDatasetSchema, MCPConfigSchema, MCP_PROTOCOL_VERSION, PlaywrightOAuthClientProvider, SnapshotSanitizers, closeMCPClient, createJudge, createMCPClientForConfig, createMCPFixture, createTokenAuthHeaders, discoverAuthorizationServer, discoverProtectedResource, expect, extractText, getMissingDependencyMessage, getResponseSizeBytes, hasValidTokens, injectTokens, isBuiltInRubric, isHttpConfig, isProviderAvailable, isStdioConfig, isTokenExpired, isTokenExpiringSoon, loadBaseline, loadEvalDataset, loadEvalDatasetFromObject, loadTokens, loadTokensFromEnv, test2 as mcpAuthTest, normalizeToolResponse, normalizeWhitespace, performClientCredentialsFlow, performOAuthSetup, performOAuthSetupIfNeeded, resolveRubric, runConformanceChecks, runEvalCase, runEvalDataset, runServerComparison, saveBaseline, simulateLLMHost, test, validateAccessToken, validateError, validateEvalCase, validateEvalDataset, validateJudge, validateMCPConfig, validatePattern, validateResponse, validateSchema, validateSize, validateText, validateToolCallCount, validateToolCalls };
7216
7342
  //# sourceMappingURL=index.js.map
7217
7343
  //# sourceMappingURL=index.js.map