@gleanwork/mcp-server-tester 1.0.0-beta.0 → 1.0.0-beta.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.cjs CHANGED
@@ -1836,11 +1836,11 @@ function parseNullableDef(def, refs) {
1836
1836
  ]
1837
1837
  };
1838
1838
  }
1839
- const base2 = parseDef(def.innerType._def, {
1839
+ const base3 = parseDef(def.innerType._def, {
1840
1840
  ...refs,
1841
1841
  currentPath: [...refs.currentPath, "anyOf", "0"]
1842
1842
  });
1843
- return base2 && { anyOf: [base2, { type: "null" }] };
1843
+ return base3 && { anyOf: [base3, { type: "null" }] };
1844
1844
  }
1845
1845
  function parseNumberDef(def) {
1846
1846
  const res = {
@@ -3160,6 +3160,7 @@ var StdioConfigSchema = zod.z.object({
3160
3160
  command: zod.z.string().min(1, "command is required for stdio transport"),
3161
3161
  args: zod.z.array(zod.z.string()).optional(),
3162
3162
  cwd: zod.z.string().optional(),
3163
+ env: zod.z.record(zod.z.string(), zod.z.string()).optional(),
3163
3164
  capabilities: MCPHostCapabilitiesSchema.optional(),
3164
3165
  connectTimeoutMs: zod.z.number().positive().optional(),
3165
3166
  requestTimeoutMs: zod.z.number().positive().optional(),
@@ -4406,7 +4407,7 @@ function escapeHtml(text) {
4406
4407
 
4407
4408
  // package.json
4408
4409
  var package_default = {
4409
- version: "1.0.0-beta.0"};
4410
+ version: "1.0.0-beta.2"};
4410
4411
 
4411
4412
  // src/mcp/clientFactory.ts
4412
4413
  function getRetryAfterDelayMs(err) {
@@ -4478,7 +4479,14 @@ async function createMCPClientForConfig(config, options) {
4478
4479
  args: validatedConfig.args ?? [],
4479
4480
  ...validatedConfig.cwd && { cwd: validatedConfig.cwd },
4480
4481
  // Suppress server stderr when quiet mode is enabled
4481
- ...validatedConfig.quiet && { stderr: "ignore" }
4482
+ ...validatedConfig.quiet && { stderr: "ignore" },
4483
+ ...validatedConfig.env && {
4484
+ env: Object.fromEntries(
4485
+ Object.entries({ ...process.env, ...validatedConfig.env }).filter(
4486
+ (entry) => entry[1] !== void 0
4487
+ )
4488
+ )
4489
+ }
4482
4490
  });
4483
4491
  debugClient("Connecting via stdio: %O", {
4484
4492
  command: validatedConfig.command,
@@ -4617,7 +4625,10 @@ async function closeMCPClient(client) {
4617
4625
  try {
4618
4626
  await client.close();
4619
4627
  } catch (error) {
4620
- console.error("[MCP] Error closing client:", error);
4628
+ debugClient(
4629
+ "Error closing client: %s",
4630
+ error instanceof Error ? error.message : String(error)
4631
+ );
4621
4632
  throw error;
4622
4633
  } finally {
4623
4634
  const agent = agentRegistry.get(client);
@@ -5147,7 +5158,7 @@ function validateToolCalls(response, expectation) {
5147
5158
  ).length;
5148
5159
  const recall = requiredCalls.length > 0 ? calledRequiredCount / requiredCalls.length : 1;
5149
5160
  const allowedNames = new Set(expectation.calls.map((c) => c.name));
5150
- const precision = actual.length > 0 && expectation.exclusive === true ? actual.filter((c) => allowedNames.has(c.name)).length / actual.length : 1;
5161
+ const precision = actual.length > 0 ? actual.filter((c) => allowedNames.has(c.name)).length / actual.length : 1;
5151
5162
  const metrics = { precision, recall };
5152
5163
  const order = expectation.order ?? "any";
5153
5164
  if (order === "strict") {
@@ -5550,9 +5561,8 @@ Validation errors: ${JSON.stringify(validation.error.issues)}`
5550
5561
 
5551
5562
  // src/judge/judgeClient.ts
5552
5563
  function createJudge(config = {}) {
5553
- const provider = config.provider ?? "claude";
5564
+ const provider = config.provider ?? "anthropic";
5554
5565
  switch (provider) {
5555
- case "claude":
5556
5566
  case "anthropic":
5557
5567
  return createClaudeAgentJudge(config);
5558
5568
  case "openai":
@@ -5905,7 +5915,18 @@ function applySanitizers(value, sanitizers) {
5905
5915
  continue;
5906
5916
  }
5907
5917
  if (isRegexSanitizer(sanitizer)) {
5908
- const pattern = sanitizer.pattern instanceof RegExp ? sanitizer.pattern : new RegExp(sanitizer.pattern, "g");
5918
+ let pattern;
5919
+ if (sanitizer.pattern instanceof RegExp) {
5920
+ pattern = sanitizer.pattern;
5921
+ } else {
5922
+ try {
5923
+ pattern = new RegExp(sanitizer.pattern, "g");
5924
+ } catch {
5925
+ throw new Error(
5926
+ `toMatchToolSnapshot: invalid regex pattern "${sanitizer.pattern}" in sanitizer`
5927
+ );
5928
+ }
5929
+ }
5909
5930
  const replacement = sanitizer.replacement ?? "[SANITIZED]";
5910
5931
  result = result.replace(pattern, replacement);
5911
5932
  continue;
@@ -6209,6 +6230,106 @@ var test = test$1.test.extend({
6209
6230
  await use(api);
6210
6231
  }
6211
6232
  });
6233
+
6234
+ // src/fixtures/mcpAuth.ts
6235
+ init_oauthClientProvider();
6236
+ var StaticTokenAuthProvider = class {
6237
+ accessToken;
6238
+ constructor(accessToken) {
6239
+ this.accessToken = accessToken;
6240
+ }
6241
+ get redirectUrl() {
6242
+ throw new Error("StaticTokenAuthProvider does not support OAuth redirects");
6243
+ }
6244
+ get clientMetadata() {
6245
+ return {
6246
+ redirect_uris: [],
6247
+ token_endpoint_auth_method: "none",
6248
+ grant_types: [],
6249
+ response_types: [],
6250
+ client_name: "@gleanwork/mcp-server-tester"
6251
+ };
6252
+ }
6253
+ async clientInformation() {
6254
+ return void 0;
6255
+ }
6256
+ async tokens() {
6257
+ return {
6258
+ access_token: this.accessToken,
6259
+ token_type: "Bearer"
6260
+ };
6261
+ }
6262
+ async saveTokens() {
6263
+ }
6264
+ async redirectToAuthorization() {
6265
+ throw new Error("StaticTokenAuthProvider does not support OAuth redirects");
6266
+ }
6267
+ async saveCodeVerifier() {
6268
+ throw new Error("StaticTokenAuthProvider does not support PKCE");
6269
+ }
6270
+ async codeVerifier() {
6271
+ throw new Error("StaticTokenAuthProvider does not support PKCE");
6272
+ }
6273
+ };
6274
+ var test2 = test$1.test.extend({
6275
+ /**
6276
+ * Create auth provider based on environment configuration
6277
+ */
6278
+ // eslint-disable-next-line no-empty-pattern
6279
+ mcpAuthProvider: async ({}, use) => {
6280
+ const authConfig = getAuthConfigFromEnv();
6281
+ if (!authConfig) {
6282
+ await use(void 0);
6283
+ return;
6284
+ }
6285
+ if (authConfig.accessToken) {
6286
+ const provider = new StaticTokenAuthProvider(authConfig.accessToken);
6287
+ await use(provider);
6288
+ return;
6289
+ }
6290
+ if (authConfig.oauth) {
6291
+ const provider = createOAuthProvider(authConfig.oauth);
6292
+ await use(provider);
6293
+ return;
6294
+ }
6295
+ await use(void 0);
6296
+ }
6297
+ });
6298
+ function createOAuthProvider(oauthConfig) {
6299
+ if (!oauthConfig.authStatePath) {
6300
+ throw new Error(
6301
+ "OAuth configuration requires authStatePath. Use performOAuthSetup() in globalSetup to create auth state first."
6302
+ );
6303
+ }
6304
+ const providerConfig = {
6305
+ storagePath: oauthConfig.authStatePath,
6306
+ redirectUri: oauthConfig.redirectUri ?? "http://localhost:3000/oauth/callback",
6307
+ clientId: oauthConfig.clientId,
6308
+ clientSecret: oauthConfig.clientSecret
6309
+ };
6310
+ return new exports.PlaywrightOAuthClientProvider(providerConfig);
6311
+ }
6312
+ function getAuthConfigFromEnv() {
6313
+ const accessToken = process.env.MCP_ACCESS_TOKEN;
6314
+ if (accessToken) {
6315
+ return { accessToken };
6316
+ }
6317
+ const oauthServerUrl = process.env.MCP_OAUTH_SERVER_URL;
6318
+ const authStatePath = process.env.MCP_AUTH_STATE_PATH;
6319
+ if (oauthServerUrl || authStatePath) {
6320
+ return {
6321
+ oauth: {
6322
+ serverUrl: oauthServerUrl ?? "",
6323
+ authStatePath,
6324
+ clientId: process.env.MCP_OAUTH_CLIENT_ID,
6325
+ clientSecret: process.env.MCP_OAUTH_CLIENT_SECRET,
6326
+ scopes: process.env.MCP_OAUTH_SCOPES?.split(","),
6327
+ resource: process.env.MCP_OAUTH_RESOURCE
6328
+ }
6329
+ };
6330
+ }
6331
+ return void 0;
6332
+ }
6212
6333
  var LLMHostConfigSchema = zod.z.object({
6213
6334
  provider: zod.z.enum([
6214
6335
  "openai",
@@ -6216,7 +6337,6 @@ var LLMHostConfigSchema = zod.z.object({
6216
6337
  "azure",
6217
6338
  "google",
6218
6339
  "mistral",
6219
- "ollama",
6220
6340
  "deepseek",
6221
6341
  "openrouter",
6222
6342
  "xai",
@@ -6263,7 +6383,7 @@ var EvalExpectBlockSchema = zod.z.object({
6263
6383
  reference: zod.z.unknown().optional(),
6264
6384
  threshold: zod.z.number().min(0).max(1).optional(),
6265
6385
  reps: zod.z.number().int().min(1).optional(),
6266
- provider: zod.z.enum(["claude", "anthropic", "openai", "google"]).optional(),
6386
+ provider: zod.z.enum(["anthropic", "openai", "google"]).optional(),
6267
6387
  model: zod.z.string().optional(),
6268
6388
  apiKeyEnvVar: zod.z.string().optional(),
6269
6389
  maxTokens: zod.z.number().int().positive().optional(),
@@ -6405,10 +6525,6 @@ async function loadModel(provider, model) {
6405
6525
  const { azure } = await import('@ai-sdk/azure');
6406
6526
  return azure(model);
6407
6527
  }
6408
- case "ollama": {
6409
- const { ollama } = await import('@ai-sdk/ollama');
6410
- return ollama(model);
6411
- }
6412
6528
  case "deepseek": {
6413
6529
  const { deepseek } = await import('@ai-sdk/deepseek');
6414
6530
  return deepseek(model);
@@ -6515,7 +6631,6 @@ var allProviders = [
6515
6631
  "azure",
6516
6632
  "google",
6517
6633
  "mistral",
6518
- "ollama",
6519
6634
  "deepseek",
6520
6635
  "openrouter",
6521
6636
  "xai",
@@ -6543,7 +6658,6 @@ function getMissingDependencyMessage(provider) {
6543
6658
  google: "npm install ai @ai-sdk/google",
6544
6659
  azure: "npm install ai @ai-sdk/azure",
6545
6660
  mistral: "npm install ai @ai-sdk/mistral",
6546
- ollama: "npm install ai @ai-sdk/ollama",
6547
6661
  deepseek: "npm install ai @ai-sdk/deepseek",
6548
6662
  openrouter: "npm install ai @openrouter/ai-sdk-provider",
6549
6663
  xai: "npm install ai @ai-sdk/xai",
@@ -6790,15 +6904,17 @@ async function runSingleIteration(evalCase, context, options) {
6790
6904
  function isInfrastructureError(err) {
6791
6905
  let name15;
6792
6906
  let msg;
6907
+ let code = "";
6793
6908
  if (err instanceof Error) {
6794
6909
  name15 = err.name;
6795
6910
  msg = err.message.toLowerCase();
6911
+ code = (err.code ?? "").toLowerCase();
6796
6912
  } else if (typeof err === "string") {
6797
6913
  msg = err.toLowerCase();
6798
6914
  } else {
6799
6915
  return false;
6800
6916
  }
6801
- return name15 === "AbortError" || msg.includes("econnreset") || msg.includes("etimedout") || msg.includes("econnrefused") || msg.includes("rate limit") || msg.includes("429") || msg.includes("503") || msg.includes("network");
6917
+ return name15 === "AbortError" || msg.includes("econnreset") || msg.includes("etimedout") || msg.includes("econnrefused") || msg.includes("rate limit") || msg.includes("429") || msg.includes("503") || msg.includes("network") || code.includes("econnreset") || code.includes("etimedout") || code.includes("econnrefused");
6802
6918
  }
6803
6919
  async function runEvalCase(evalCase, context, options = {}) {
6804
6920
  const iterations = evalCase.iterations ?? 1;
@@ -6913,9 +7029,9 @@ async function runEvalDataset(options, context) {
6913
7029
  const withIterations = evalCase.mode === "llm_host" && evalCase.iterations === void 0 && defaultLlmIterations !== void 0 ? { ...evalCase, iterations: defaultLlmIterations } : evalCase;
6914
7030
  if (evalCase.mode === "llm_host") {
6915
7031
  const effectiveIterations = withIterations.iterations ?? 1;
6916
- if (effectiveIterations < 10) {
7032
+ if (effectiveIterations > 1 && effectiveIterations < 10) {
6917
7033
  console.warn(
6918
- `[mcp-server-tester] Eval case "${evalCase.id}" uses llm_host mode with only ${effectiveIterations} iteration(s). The evals guide recommends >= 10 iterations. See docs/evals-guide.md for guidance on statistical reliability.`
7034
+ `[mcp-server-tester] Eval case "${evalCase.id}": running ${effectiveIterations} iterations in llm_host mode may not be statistically reliable. Consider using 10+ iterations for accuracy measurements you can trust.`
6919
7035
  );
6920
7036
  }
6921
7037
  }
@@ -6963,6 +7079,16 @@ async function runEvalDataset(options, context) {
6963
7079
  const baseline = await loadBaseline(baselineResultsFrom);
6964
7080
  const baselinePassRate = baseline.total > 0 ? baseline.passed / baseline.total : 0;
6965
7081
  const baselineMap = buildBaselinePassMap(baseline);
7082
+ const currentCaseIds = result.caseResults.map((cr) => cr.id);
7083
+ const unmatchedCount = currentCaseIds.filter(
7084
+ (id) => !baselineMap.has(id)
7085
+ ).length;
7086
+ const unmatchedRatio = currentCaseIds.length > 0 ? unmatchedCount / currentCaseIds.length : 0;
7087
+ if (unmatchedRatio > 0.2) {
7088
+ console.warn(
7089
+ `[mcp-server-tester] Baseline comparison: ${unmatchedCount} of ${currentCaseIds.length} cases (${Math.round(unmatchedRatio * 100)}%) have no baseline entry. This may indicate the dataset structure has changed. Results for unmatched cases cannot be compared.`
7090
+ );
7091
+ }
6966
7092
  for (const cr of result.caseResults) {
6967
7093
  const baselinePass = baselineMap.get(cr.id);
6968
7094
  if (baselinePass !== void 0) {
@@ -7272,6 +7398,7 @@ exports.loadEvalDataset = loadEvalDataset;
7272
7398
  exports.loadEvalDatasetFromObject = loadEvalDatasetFromObject;
7273
7399
  exports.loadTokens = loadTokens;
7274
7400
  exports.loadTokensFromEnv = loadTokensFromEnv;
7401
+ exports.mcpAuthTest = test2;
7275
7402
  exports.normalizeToolResponse = normalizeToolResponse;
7276
7403
  exports.normalizeWhitespace = normalizeWhitespace;
7277
7404
  exports.performClientCredentialsFlow = performClientCredentialsFlow;