@gleanwork/mcp-server-tester 0.12.0 → 1.0.0-beta.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,14 +1,17 @@
1
1
  import { expect as expect$1, test as test$1 } from '@playwright/test';
2
2
  import { query } from '@anthropic-ai/claude-agent-sdk';
3
+ import { z } from 'zod';
3
4
  import { Client } from '@modelcontextprotocol/sdk/client/index.js';
4
5
  import { StdioClientTransport } from '@modelcontextprotocol/sdk/client/stdio.js';
5
6
  import { StreamableHTTPClientTransport } from '@modelcontextprotocol/sdk/client/streamableHttp.js';
6
- import { z } from 'zod';
7
+ import { SSEClientTransport } from '@modelcontextprotocol/sdk/client/sse.js';
7
8
  import createDebug from 'debug';
9
+ import { ProxyAgent, Agent } from 'undici';
10
+ import { readFileSync } from 'fs';
11
+ import * as oauth from 'oauth4webapi';
8
12
  import * as fs2 from 'fs/promises';
9
13
  import * as path2 from 'path';
10
14
  import * as http from 'http';
11
- import * as oauth from 'oauth4webapi';
12
15
  import { homedir } from 'os';
13
16
 
14
17
  var __require = /* @__PURE__ */ ((x) => typeof require !== "undefined" ? require : typeof Proxy !== "undefined" ? new Proxy(x, {
@@ -117,7 +120,7 @@ function isCallToolResult(value) {
117
120
  return false;
118
121
  }
119
122
  const v = value;
120
- return Array.isArray(v.content) || typeof v.isError === "boolean";
123
+ return Array.isArray(v.content);
121
124
  }
122
125
  function extractTextFromContentArray(content) {
123
126
  const textParts = [];
@@ -625,6 +628,30 @@ function toBeToolError(received, expected = true) {
625
628
  }
626
629
  };
627
630
  }
631
+
632
+ // src/judge/rubrics.ts
633
+ var BUILT_IN_RUBRICS = {
634
+ correctness: "Evaluate whether the response is factually correct and accurately answers the question. Compare against the reference answer if provided. Score 1.0 for fully correct with no errors; Score 0.75 for mostly correct with one minor inaccuracy or omission; Score 0.5 for partially correct \u2014 answers part of the question but misses key elements; Score 0.25 for minimally relevant but substantially incorrect or missing most key details; Score 0.0 for incorrect, irrelevant, or directly contradicting the reference.",
635
+ completeness: "Evaluate whether the response fully addresses all aspects of the question. Score 1.0 if the response covers all key points comprehensively; Score 0.75 if the response covers most key points with one minor gap; Score 0.5 if the response partially answers \u2014 covers some aspects but misses others; Score 0.25 if the response touches on the topic but misses most key aspects; Score 0.0 if major aspects of the question are entirely missing or the response is off-topic.",
636
+ groundedness: "Evaluate whether all claims in the response are supported by the retrieved context or reference. Penalize unsupported assertions or hallucinated facts. Score 1.0 for fully grounded \u2014 every claim is traceable to the provided context; Score 0.75 for mostly grounded with one minor unsupported detail; Score 0.5 for partially grounded \u2014 some claims are supported but notable hallucinations are present; Score 0.25 for minimally grounded \u2014 most claims are unsupported or invented; Score 0.0 for completely hallucinated or contradicting the provided context.",
637
+ "instruction-following": "Evaluate whether the response follows the instructions given in the question. Check format, tone, constraints, and task completion. Score 1.0 for full compliance \u2014 all instructions are followed precisely; Score 0.75 for mostly compliant with one minor deviation from the instructions; Score 0.5 for partial compliance \u2014 some instructions followed but key constraints violated; Score 0.25 for minimal compliance \u2014 the response loosely addresses the task but ignores most instructions; Score 0.0 for non-compliance \u2014 the response disregards the instructions entirely.",
638
+ conciseness: "Evaluate whether the response is appropriately concise without losing important information. Penalize unnecessary verbosity, padding, or repetition. Score 1.0 for well-sized \u2014 concise and complete with no unnecessary content; Score 0.75 for slightly verbose but no information is lost or repeated; Score 0.5 for moderately verbose \u2014 some padding or repetition that reduces clarity; Score 0.25 for excessively verbose \u2014 significantly overlong with substantial filler or repetition; Score 0.0 for extremely verbose \u2014 so padded or repetitive that the core answer is obscured."
639
+ };
640
+ function resolveRubric(rubric) {
641
+ if (typeof rubric === "string") {
642
+ return BUILT_IN_RUBRICS[rubric];
643
+ }
644
+ return rubric.text;
645
+ }
646
+
647
+ // src/judge/judgeTypes.ts
648
+ var JudgeResponseSchema = z.object({
649
+ pass: z.boolean(),
650
+ score: z.number().min(0).max(1),
651
+ reasoning: z.string()
652
+ });
653
+
654
+ // src/judge/claudeAgentJudge.ts
628
655
  function createClaudeAgentJudge(config) {
629
656
  const model = config.model ?? "claude-sonnet-4-20250514";
630
657
  const maxBudgetUsd = config.maxBudgetUsd ?? 0.1;
@@ -685,7 +712,7 @@ function createClaudeAgentJudge(config) {
685
712
  cacheCreationInputTokens: resultMessage.usage?.cache_creation_input_tokens
686
713
  };
687
714
  return {
688
- pass: parsed.pass ?? false,
715
+ pass: parsed.pass,
689
716
  score: parsed.score,
690
717
  reasoning: parsed.reasoning,
691
718
  usage,
@@ -704,21 +731,19 @@ function buildSystemPrompt() {
704
731
  return 'You are an expert evaluator. Evaluate the candidate response based on the rubric provided. Respond ONLY with valid JSON in this exact format: {"pass": boolean, "score": number (0-1), "reasoning": string}. Do not include any other text, markdown formatting, or code blocks.';
705
732
  }
706
733
  function buildJudgePrompt(candidate, reference, rubric) {
734
+ const candidateStr = typeof candidate === "string" ? candidate : JSON.stringify(candidate, null, 2);
735
+ const referenceStr = reference !== null && reference !== void 0 ? typeof reference === "string" ? reference : JSON.stringify(reference, null, 2) : null;
707
736
  const parts = [];
708
- parts.push("# Evaluation Task\n");
737
+ parts.push("Rubric:\n");
709
738
  parts.push(rubric);
710
- parts.push("\n\n# Candidate Response\n");
711
- parts.push(
712
- typeof candidate === "string" ? candidate : JSON.stringify(candidate, null, 2)
713
- );
714
- if (reference !== null && reference !== void 0) {
715
- parts.push("\n\n# Reference Response\n");
716
- parts.push(
717
- typeof reference === "string" ? reference : JSON.stringify(reference, null, 2)
718
- );
719
- }
739
+ parts.push("\n\n<candidate_response>\n");
740
+ parts.push(candidateStr);
741
+ parts.push("\n</candidate_response>\n\n");
742
+ parts.push("<reference_answer>\n");
743
+ parts.push(referenceStr ?? "No reference provided.");
744
+ parts.push("\n</reference_answer>\n\n");
720
745
  parts.push(
721
- "\n\n# Instructions\nEvaluate the candidate response based on the rubric. " + (reference !== null && reference !== void 0 ? "Compare it against the reference response if helpful. " : "") + 'Respond with JSON containing "pass" (boolean), "score" (0-1), and "reasoning" (string).'
746
+ "Evaluate the candidate response against the rubric" + (referenceStr !== null ? ", comparing it with the reference answer if helpful" : "") + '. Return JSON: {"pass": boolean, "score": number (0-1), "reasoning": string}'
722
747
  );
723
748
  return parts.join("");
724
749
  }
@@ -734,15 +759,193 @@ function parseJudgeResponse(text) {
734
759
  jsonText = jsonText.slice(0, -3);
735
760
  }
736
761
  jsonText = jsonText.trim();
762
+ let parsed;
737
763
  try {
738
- return JSON.parse(jsonText);
764
+ parsed = JSON.parse(jsonText);
739
765
  } catch {
740
766
  const jsonMatch = jsonText.match(/\{[\s\S]*"pass"[\s\S]*\}/);
741
767
  if (jsonMatch) {
742
- return JSON.parse(jsonMatch[0]);
768
+ parsed = JSON.parse(jsonMatch[0]);
769
+ } else {
770
+ throw new Error(`Failed to parse judge response as JSON: ${text}`);
743
771
  }
772
+ }
773
+ const result = JudgeResponseSchema.safeParse(parsed);
774
+ if (!result.success) {
775
+ throw new Error(
776
+ `Judge returned invalid response. Expected {pass, score, reasoning} but got: ${jsonText.slice(0, 500)}
777
+ Validation errors: ${JSON.stringify(result.error.issues)}`
778
+ );
779
+ }
780
+ return result.data;
781
+ }
782
+
783
+ // src/judge/openaiJudge.ts
784
+ function createOpenAIJudge(config = {}) {
785
+ const apiKeyEnvVar = config.apiKeyEnvVar ?? "OPENAI_API_KEY";
786
+ const apiKey = process.env[apiKeyEnvVar];
787
+ if (!apiKey) {
788
+ throw new Error(
789
+ `OpenAI judge requires an API key. Set the ${apiKeyEnvVar} environment variable.`
790
+ );
791
+ }
792
+ const model = config.model ?? "gpt-4o";
793
+ const maxTokens = config.maxTokens ?? 1e3;
794
+ const temperature = config.temperature ?? 0;
795
+ return {
796
+ async evaluate(candidate, reference, rubric) {
797
+ let openaiModule;
798
+ try {
799
+ openaiModule = await import('openai');
800
+ } catch (err) {
801
+ throw new Error(
802
+ `OpenAI judge requires the \`openai\` package. Install it with: npm install openai
803
+ Original error: ${err instanceof Error ? err.message : String(err)}`
804
+ );
805
+ }
806
+ const client = new openaiModule.default({ apiKey });
807
+ const prompt = buildJudgePrompt2(candidate, reference, rubric);
808
+ const startTime = Date.now();
809
+ const completion = await client.chat.completions.create({
810
+ model,
811
+ max_tokens: maxTokens,
812
+ temperature,
813
+ messages: [
814
+ {
815
+ role: "system",
816
+ content: 'You are an expert evaluator. Respond with valid JSON only: {"pass": true|false, "score": 0.0-1.0, "reasoning": "explanation"}'
817
+ },
818
+ { role: "user", content: prompt }
819
+ ]
820
+ });
821
+ const durationMs = Date.now() - startTime;
822
+ const text = completion.choices[0]?.message.content ?? "";
823
+ const parsed = parseJudgeResponse2(text);
824
+ return {
825
+ pass: parsed.pass,
826
+ score: parsed.score,
827
+ reasoning: parsed.reasoning,
828
+ usage: {
829
+ inputTokens: completion.usage?.prompt_tokens ?? 0,
830
+ outputTokens: completion.usage?.completion_tokens ?? 0,
831
+ totalCostUsd: 0,
832
+ durationMs
833
+ }
834
+ };
835
+ }
836
+ };
837
+ }
838
+ function buildJudgePrompt2(candidate, reference, rubric) {
839
+ const candidateStr = typeof candidate === "string" ? candidate : JSON.stringify(candidate, null, 2);
840
+ const referenceStr = reference !== null && reference !== void 0 ? typeof reference === "string" ? reference : JSON.stringify(reference, null, 2) : null;
841
+ return `Rubric:
842
+ ${rubric}
843
+
844
+ <candidate_response>
845
+ ${candidateStr}
846
+ </candidate_response>
847
+
848
+ <reference_answer>
849
+ ${referenceStr ?? "No reference provided."}
850
+ </reference_answer>
851
+
852
+ Evaluate and return JSON: {"pass": boolean, "score": number (0-1), "reasoning": string}`;
853
+ }
854
+ function parseJudgeResponse2(text) {
855
+ const cleaned = text.replace(/```json\n?/g, "").replace(/```\n?/g, "").trim();
856
+ let parsed;
857
+ try {
858
+ parsed = JSON.parse(cleaned);
859
+ } catch {
744
860
  throw new Error(`Failed to parse judge response as JSON: ${text}`);
745
861
  }
862
+ const result = JudgeResponseSchema.safeParse(parsed);
863
+ if (!result.success) {
864
+ throw new Error(
865
+ `Judge returned invalid response. Expected {pass, score, reasoning} but got: ${cleaned.slice(0, 500)}
866
+ Validation errors: ${JSON.stringify(result.error.issues)}`
867
+ );
868
+ }
869
+ return result.data;
870
+ }
871
+
872
+ // src/judge/googleJudge.ts
873
+ function createGoogleJudge(config = {}) {
874
+ const apiKeyEnvVar = config.apiKeyEnvVar ?? "GOOGLE_API_KEY";
875
+ const apiKey = process.env[apiKeyEnvVar];
876
+ if (!apiKey) {
877
+ throw new Error(
878
+ `Google judge requires an API key. Set the ${apiKeyEnvVar} environment variable.`
879
+ );
880
+ }
881
+ const model = config.model ?? "gemini-2.0-flash";
882
+ const maxTokens = config.maxTokens ?? 1e3;
883
+ return {
884
+ async evaluate(candidate, reference, rubric) {
885
+ let googleModule;
886
+ try {
887
+ googleModule = await import('@google/generative-ai');
888
+ } catch (err) {
889
+ throw new Error(
890
+ `Google judge requires the \`@google/generative-ai\` package. Install it with: npm install @google/generative-ai
891
+ Original error: ${err instanceof Error ? err.message : String(err)}`
892
+ );
893
+ }
894
+ const genAI = new googleModule.GoogleGenerativeAI(apiKey);
895
+ const gemini = genAI.getGenerativeModel({
896
+ model,
897
+ generationConfig: {
898
+ maxOutputTokens: maxTokens,
899
+ temperature: 0
900
+ },
901
+ systemInstruction: 'You are an expert evaluator. Respond with valid JSON only: {"pass": true|false, "score": 0.0-1.0, "reasoning": "explanation"}'
902
+ });
903
+ const candidateStr = typeof candidate === "string" ? candidate : JSON.stringify(candidate, null, 2);
904
+ const referenceStr = reference !== null && reference !== void 0 ? typeof reference === "string" ? reference : JSON.stringify(reference, null, 2) : null;
905
+ const prompt = `Rubric:
906
+ ${rubric}
907
+
908
+ <candidate_response>
909
+ ${candidateStr}
910
+ </candidate_response>
911
+
912
+ <reference_answer>
913
+ ${referenceStr ?? "No reference provided."}
914
+ </reference_answer>
915
+
916
+ Evaluate and return JSON: {"pass": boolean, "score": number (0-1), "reasoning": string}`;
917
+ const startTime = Date.now();
918
+ const result = await gemini.generateContent(prompt);
919
+ const durationMs = Date.now() - startTime;
920
+ const text = result.response.text();
921
+ const cleaned = text.replace(/```json\n?/g, "").replace(/```\n?/g, "").trim();
922
+ let parsedRaw;
923
+ try {
924
+ parsedRaw = JSON.parse(cleaned);
925
+ } catch {
926
+ throw new Error(`Failed to parse judge response as JSON: ${text}`);
927
+ }
928
+ const validation = JudgeResponseSchema.safeParse(parsedRaw);
929
+ if (!validation.success) {
930
+ throw new Error(
931
+ `Judge returned invalid response. Expected {pass, score, reasoning} but got: ${cleaned.slice(0, 500)}
932
+ Validation errors: ${JSON.stringify(validation.error.issues)}`
933
+ );
934
+ }
935
+ const { pass, score, reasoning } = validation.data;
936
+ return {
937
+ pass,
938
+ score,
939
+ reasoning,
940
+ usage: {
941
+ inputTokens: result.response.usageMetadata?.promptTokenCount ?? 0,
942
+ outputTokens: result.response.usageMetadata?.candidatesTokenCount ?? 0,
943
+ totalCostUsd: 0,
944
+ durationMs
945
+ }
946
+ };
947
+ }
948
+ };
746
949
  }
747
950
 
748
951
  // src/judge/judgeClient.ts
@@ -753,56 +956,124 @@ function createJudge(config = {}) {
753
956
  case "anthropic":
754
957
  return createClaudeAgentJudge(config);
755
958
  case "openai":
756
- throw new Error(
757
- 'OpenAI provider is no longer supported. Please use createJudge() without specifying provider, or use provider: "claude". See migration guide at https://github.com/gleanwork/mcp-server-tester/blob/main/docs/migration-v0.11.md'
758
- );
759
- case "custom-http":
760
- throw new Error(
761
- "custom-http provider is no longer supported. Please use createJudge() without specifying provider."
762
- );
959
+ return createOpenAIJudge(config);
960
+ case "google":
961
+ return createGoogleJudge(config);
763
962
  default:
764
963
  throw new Error(`Unsupported LLM provider: ${String(provider)}`);
765
964
  }
766
965
  }
767
966
 
768
- // src/assertions/matchers/toPassToolJudge.ts
769
- var DEFAULT_PASSING_THRESHOLD = 0.7;
770
- var DEFAULT_JUDGE_CONFIG = {};
771
- async function toPassToolJudge(received, rubric, options = {}) {
967
+ // src/assertions/validators/judge.ts
968
+ function computeStdDev(scores, mean) {
969
+ if (scores.length <= 1) return 0;
970
+ const variance = scores.reduce((sum, s) => sum + (s - mean) ** 2, 0) / scores.length;
971
+ return Math.sqrt(variance);
972
+ }
973
+ async function validateJudge(response, config) {
772
974
  const {
773
- reference = null,
774
- passingThreshold = DEFAULT_PASSING_THRESHOLD,
775
- judgeConfig = DEFAULT_JUDGE_CONFIG
776
- } = options;
777
- const judge = createJudge(judgeConfig);
975
+ rubric,
976
+ reference,
977
+ threshold = 0.7,
978
+ reps = 1,
979
+ provider,
980
+ model,
981
+ apiKeyEnvVar,
982
+ maxTokens,
983
+ temperature,
984
+ maxBudgetUsd,
985
+ maxToolOutputSize
986
+ } = config;
987
+ const resolvedRubric = resolveRubric(rubric);
988
+ const judgeConfig = {
989
+ ...provider !== void 0 && { provider },
990
+ ...model !== void 0 && { model },
991
+ ...apiKeyEnvVar !== void 0 && { apiKeyEnvVar },
992
+ ...maxTokens !== void 0 && { maxTokens },
993
+ ...temperature !== void 0 && { temperature },
994
+ ...maxBudgetUsd !== void 0 && { maxBudgetUsd },
995
+ ...maxToolOutputSize !== void 0 && { maxToolOutputSize }
996
+ };
778
997
  try {
779
- const result = await judge.evaluate(received, reference, rubric);
780
- const score = result.score ?? (result.pass ? 1 : 0);
781
- const passes = score >= passingThreshold;
782
- if (this.isNot) {
783
- return {
784
- pass: !passes,
785
- message: () => passes ? `Expected judge evaluation to fail, but it passed with score ${score.toFixed(2)}` : `Judge evaluation failed as expected with score ${score.toFixed(2)}`
786
- };
998
+ const judge = createJudge(judgeConfig);
999
+ const scores = [];
1000
+ let lastReasoning;
1001
+ for (let i = 0; i < reps; i++) {
1002
+ const judgeResult = await judge.evaluate(
1003
+ response,
1004
+ reference ?? null,
1005
+ resolvedRubric
1006
+ );
1007
+ scores.push(judgeResult.score ?? (judgeResult.pass ? 1 : 0));
1008
+ lastReasoning = judgeResult.reasoning;
787
1009
  }
788
- if (passes) {
1010
+ if (scores.length === 0) {
789
1011
  return {
790
- pass: true,
791
- message: () => `Judge evaluation passed with score ${score.toFixed(2)} (threshold: ${passingThreshold})`
1012
+ pass: false,
1013
+ message: "Judge evaluation failed: no scores collected"
792
1014
  };
793
1015
  }
1016
+ const meanScore = scores.reduce((a, b) => a + b, 0) / scores.length;
1017
+ const passed = meanScore >= threshold;
1018
+ const repNote = reps > 1 ? ` (mean of ${reps} reps: [${scores.map((s) => s.toFixed(2)).join(", ")}])` : "";
1019
+ let stdDev;
1020
+ let highVariance;
1021
+ if (reps > 1) {
1022
+ stdDev = computeStdDev(scores, meanScore);
1023
+ highVariance = stdDev > 0.2;
1024
+ if (highVariance) {
1025
+ console.warn(
1026
+ `[mcp-server-tester] Judge scores have high variance (stdDev=${stdDev.toFixed(2)}, scores=[${scores.map((s) => s.toFixed(2)).join(", ")}]). The rubric may be ambiguous.`
1027
+ );
1028
+ }
1029
+ }
794
1030
  return {
795
- pass: false,
796
- message: () => `Judge evaluation failed with score ${score.toFixed(2)} (threshold: ${passingThreshold}). Reasoning: ${result.reasoning ?? "No reasoning provided"}`
1031
+ pass: passed,
1032
+ message: passed ? `Judge passed with score ${meanScore.toFixed(2)}${repNote}` : `Judge failed with score ${meanScore.toFixed(2)} (threshold: ${threshold})${repNote}. ${lastReasoning ?? ""}`,
1033
+ details: reps > 1 ? {
1034
+ scores,
1035
+ scoreStdDev: stdDev,
1036
+ highVariance
1037
+ } : void 0
797
1038
  };
798
- } catch (error) {
1039
+ } catch (err) {
799
1040
  return {
800
1041
  pass: false,
801
- message: () => `Judge evaluation failed with error: ${error instanceof Error ? error.message : String(error)}`
1042
+ message: `Judge evaluation error: ${err instanceof Error ? err.message : String(err)}`
802
1043
  };
803
1044
  }
804
1045
  }
805
1046
 
1047
+ // src/assertions/matchers/toPassToolJudge.ts
1048
+ var DEFAULT_PASSING_THRESHOLD = 0.7;
1049
+ async function toPassToolJudge(received, rubric, options = {}) {
1050
+ const {
1051
+ reference = null,
1052
+ passingThreshold = DEFAULT_PASSING_THRESHOLD,
1053
+ reps,
1054
+ provider,
1055
+ model
1056
+ } = options;
1057
+ const validation = await validateJudge(received, {
1058
+ rubric,
1059
+ reference: reference ?? void 0,
1060
+ threshold: passingThreshold,
1061
+ ...reps !== void 0 && { reps },
1062
+ ...provider !== void 0 && { provider },
1063
+ ...model !== void 0 && { model }
1064
+ });
1065
+ if (this.isNot) {
1066
+ return {
1067
+ pass: !validation.pass,
1068
+ message: () => validation.pass ? `Expected judge evaluation to fail, but it passed` : `Judge evaluation failed as expected`
1069
+ };
1070
+ }
1071
+ return {
1072
+ pass: validation.pass,
1073
+ message: () => validation.message
1074
+ };
1075
+ }
1076
+
806
1077
  // src/assertions/validators/size.ts
807
1078
  function validateSize(response, options) {
808
1079
  const { maxBytes, minBytes } = options;
@@ -903,6 +1174,144 @@ async function toSatisfyToolPredicate(received, predicate, description) {
903
1174
  }
904
1175
  }
905
1176
 
1177
+ // src/assertions/validators/toolCalls.ts
1178
+ function isSimulationResult(value) {
1179
+ return typeof value === "object" && value !== null && "success" in value && "toolCalls" in value && Array.isArray(value.toolCalls);
1180
+ }
1181
+ function partialMatch(actual, expected) {
1182
+ return Object.entries(expected).every(([k, v]) => {
1183
+ const actualVal = actual[k];
1184
+ if (typeof v === "object" && v !== null && typeof actualVal === "object" && actualVal !== null) {
1185
+ return partialMatch(
1186
+ actualVal,
1187
+ v
1188
+ );
1189
+ }
1190
+ return JSON.stringify(actualVal) === JSON.stringify(v);
1191
+ });
1192
+ }
1193
+ function findMatchingCall(actual, expected, startIndex = 0) {
1194
+ for (let i = startIndex; i < actual.length; i++) {
1195
+ const call = actual[i];
1196
+ if (call.name !== expected.name) continue;
1197
+ if (expected.arguments !== void 0 && !partialMatch(call.arguments ?? {}, expected.arguments)) {
1198
+ continue;
1199
+ }
1200
+ return i;
1201
+ }
1202
+ return -1;
1203
+ }
1204
+ function validateToolCalls(response, expectation) {
1205
+ if (!isSimulationResult(response)) {
1206
+ return {
1207
+ pass: false,
1208
+ message: "toolsTriggered expectation requires llm_host mode \u2014 response must be an LLMHostSimulationResult"
1209
+ };
1210
+ }
1211
+ const actual = response.toolCalls;
1212
+ const requiredCalls = expectation.calls.filter((c) => c.required !== false);
1213
+ const calledRequiredCount = requiredCalls.filter(
1214
+ (expected) => findMatchingCall(actual, expected) !== -1
1215
+ ).length;
1216
+ const recall = requiredCalls.length > 0 ? calledRequiredCount / requiredCalls.length : 1;
1217
+ const allowedNames = new Set(expectation.calls.map((c) => c.name));
1218
+ const precision = actual.length > 0 && expectation.exclusive === true ? actual.filter((c) => allowedNames.has(c.name)).length / actual.length : 1;
1219
+ const metrics = { precision, recall };
1220
+ const order = expectation.order ?? "any";
1221
+ if (order === "strict") {
1222
+ let searchFrom = 0;
1223
+ for (const expected of expectation.calls) {
1224
+ const idx = findMatchingCall(actual, expected, searchFrom);
1225
+ if (idx === -1) {
1226
+ if (expected.required !== false) {
1227
+ return {
1228
+ pass: false,
1229
+ message: `Expected tool '${expected.name}' to be called in sequence (starting from position ${searchFrom}), but it was not found`,
1230
+ metrics
1231
+ };
1232
+ }
1233
+ } else {
1234
+ searchFrom = idx + 1;
1235
+ }
1236
+ }
1237
+ } else {
1238
+ const required = expectation.calls.filter((c) => c.required !== false);
1239
+ for (const expected of required) {
1240
+ const idx = findMatchingCall(actual, expected);
1241
+ if (idx === -1) {
1242
+ const argsNote = expected.arguments !== void 0 ? ` with args ${JSON.stringify(expected.arguments)}` : "";
1243
+ return {
1244
+ pass: false,
1245
+ message: `Expected tool '${expected.name}'${argsNote} to be called, but it was not`,
1246
+ metrics
1247
+ };
1248
+ }
1249
+ }
1250
+ }
1251
+ if (expectation.exclusive === true) {
1252
+ const unexpected = actual.filter((c) => !allowedNames.has(c.name));
1253
+ if (unexpected.length > 0) {
1254
+ const names = unexpected.map((c) => `'${c.name}'`).join(", ");
1255
+ return {
1256
+ pass: false,
1257
+ message: `Unexpected tool calls: ${names}. Only ${[...allowedNames].map((n) => `'${n}'`).join(", ")} are allowed`,
1258
+ metrics
1259
+ };
1260
+ }
1261
+ }
1262
+ return { pass: true, message: "All tool call expectations met", metrics };
1263
+ }
1264
+ function validateToolCallCount(response, options) {
1265
+ if (!isSimulationResult(response)) {
1266
+ return {
1267
+ pass: false,
1268
+ message: "toolCallCount expectation requires llm_host mode \u2014 response must be an LLMHostSimulationResult"
1269
+ };
1270
+ }
1271
+ const count = response.toolCalls.length;
1272
+ const { min, max, exact } = options;
1273
+ if (exact !== void 0 && count !== exact) {
1274
+ return {
1275
+ pass: false,
1276
+ message: `Expected exactly ${exact} tool call(s), but got ${count}`
1277
+ };
1278
+ }
1279
+ if (min !== void 0 && count < min) {
1280
+ return {
1281
+ pass: false,
1282
+ message: `Expected at least ${min} tool call(s), but got ${count}`
1283
+ };
1284
+ }
1285
+ if (max !== void 0 && count > max) {
1286
+ return {
1287
+ pass: false,
1288
+ message: `Expected at most ${max} tool call(s), but got ${count}`
1289
+ };
1290
+ }
1291
+ return {
1292
+ pass: true,
1293
+ message: `Tool call count (${count}) is within expected range`
1294
+ };
1295
+ }
1296
+
1297
+ // src/assertions/matchers/toHaveToolCalls.ts
1298
+ function toHaveToolCalls(received, expectation) {
1299
+ const result = validateToolCalls(received, expectation);
1300
+ return {
1301
+ pass: result.pass,
1302
+ message: () => result.message
1303
+ };
1304
+ }
1305
+
1306
+ // src/assertions/matchers/toHaveToolCallCount.ts
1307
+ function toHaveToolCallCount(received, options) {
1308
+ const result = validateToolCallCount(received, options);
1309
+ return {
1310
+ pass: result.pass,
1311
+ message: () => result.message
1312
+ };
1313
+ }
1314
+
906
1315
  // src/assertions/matchers/index.ts
907
1316
  var expect = expect$1.extend({
908
1317
  toMatchToolResponse,
@@ -913,7 +1322,9 @@ var expect = expect$1.extend({
913
1322
  toBeToolError,
914
1323
  toPassToolJudge,
915
1324
  toHaveToolResponseSize,
916
- toSatisfyToolPredicate
1325
+ toSatisfyToolPredicate,
1326
+ toHaveToolCalls,
1327
+ toHaveToolCallCount
917
1328
  });
918
1329
  var MCPHostCapabilitiesSchema = z.object({
919
1330
  sampling: z.record(z.unknown()).optional(),
@@ -930,9 +1341,16 @@ var MCPOAuthConfigSchema = z.object({
930
1341
  clientSecret: z.string().optional(),
931
1342
  redirectUri: z.string().url().optional()
932
1343
  });
1344
+ var MCPClientCredentialsConfigSchema = z.object({
1345
+ clientId: z.string().optional(),
1346
+ clientSecret: z.string().optional(),
1347
+ tokenEndpoint: z.string().url("tokenEndpoint must be a valid URL").optional(),
1348
+ scopes: z.array(z.string()).optional()
1349
+ });
933
1350
  var MCPAuthConfigSchema = z.object({
934
1351
  accessToken: z.string().optional(),
935
- oauth: MCPOAuthConfigSchema.optional()
1352
+ oauth: MCPOAuthConfigSchema.optional(),
1353
+ clientCredentials: MCPClientCredentialsConfigSchema.optional()
936
1354
  }).refine(
937
1355
  (data) => !(data.accessToken && data.oauth),
938
1356
  "Cannot specify both accessToken and oauth configuration"
@@ -945,16 +1363,44 @@ var StdioConfigSchema = z.object({
945
1363
  capabilities: MCPHostCapabilitiesSchema.optional(),
946
1364
  connectTimeoutMs: z.number().positive().optional(),
947
1365
  requestTimeoutMs: z.number().positive().optional(),
1366
+ callTimeoutMs: z.number().positive().optional(),
948
1367
  quiet: z.boolean().optional()
949
1368
  });
1369
+ function isLocalhost(hostname) {
1370
+ return hostname === "localhost" || hostname === "127.0.0.1" || hostname === "::1";
1371
+ }
950
1372
  var HttpConfigSchema = z.object({
951
1373
  transport: z.literal("http"),
952
- serverUrl: z.string().url("serverUrl must be a valid URL"),
1374
+ serverUrl: z.string().url("serverUrl must be a valid URL").refine((url) => {
1375
+ let parsed;
1376
+ try {
1377
+ parsed = new URL(url);
1378
+ } catch {
1379
+ return true;
1380
+ }
1381
+ if (parsed.protocol === "http:" && !isLocalhost(parsed.hostname)) {
1382
+ console.warn(
1383
+ `[mcp-server-tester] serverUrl uses http:// for non-localhost address "${parsed.hostname}". This transmits tokens unencrypted. Use https:// for remote servers.`
1384
+ );
1385
+ }
1386
+ return true;
1387
+ }),
953
1388
  headers: z.record(z.string()).optional(),
954
1389
  capabilities: MCPHostCapabilitiesSchema.optional(),
955
1390
  connectTimeoutMs: z.number().positive().optional(),
956
1391
  requestTimeoutMs: z.number().positive().optional(),
957
- auth: MCPAuthConfigSchema.optional()
1392
+ callTimeoutMs: z.number().positive().optional(),
1393
+ auth: MCPAuthConfigSchema.optional(),
1394
+ proxy: z.object({
1395
+ url: z.string().url("proxy.url must be a valid URL")
1396
+ }).optional(),
1397
+ retryAttempts: z.number().int().min(0).optional(),
1398
+ tls: z.object({
1399
+ ca: z.string().optional(),
1400
+ cert: z.string().optional(),
1401
+ key: z.string().optional(),
1402
+ rejectUnauthorized: z.boolean().optional()
1403
+ }).optional()
958
1404
  });
959
1405
  var MCPConfigSchema = z.discriminatedUnion("transport", [
960
1406
  StdioConfigSchema,
@@ -964,26 +1410,245 @@ function validateMCPConfig(config) {
964
1410
  return MCPConfigSchema.parse(config);
965
1411
  }
966
1412
  function isStdioConfig(config) {
967
- return config.transport === "stdio" && typeof config.command === "string";
1413
+ return config.transport === "stdio";
968
1414
  }
969
1415
  function isHttpConfig(config) {
970
- return config.transport === "http" && typeof config.serverUrl === "string";
1416
+ return config.transport === "http";
971
1417
  }
972
1418
  var NAMESPACE = "mcp-server-tester";
973
1419
  var debugClient = createDebug(`${NAMESPACE}:client`);
974
1420
  createDebug(`${NAMESPACE}:oauth`);
975
1421
  createDebug(`${NAMESPACE}:eval`);
1422
+ var debugHttp = createDebug(`${NAMESPACE}:http`);
1423
+
1424
+ // package.json
1425
+ var package_default = {
1426
+ version: "1.0.0-beta.0"};
1427
+ var debug = createDebug("mcp-server-tester:oauth-flow");
1428
+ async function generatePKCE() {
1429
+ const codeVerifier = oauth.generateRandomCodeVerifier();
1430
+ const codeChallenge = await oauth.calculatePKCECodeChallenge(codeVerifier);
1431
+ return {
1432
+ codeVerifier,
1433
+ codeChallenge
1434
+ };
1435
+ }
1436
+ function generateState() {
1437
+ return oauth.generateRandomState();
1438
+ }
1439
+ function buildAuthorizationUrl(config) {
1440
+ const authorizationEndpoint = config.authServer.server.authorization_endpoint;
1441
+ if (!authorizationEndpoint) {
1442
+ throw new Error(
1443
+ "Authorization server does not have an authorization_endpoint"
1444
+ );
1445
+ }
1446
+ const authorizationUrl = new URL(authorizationEndpoint);
1447
+ authorizationUrl.searchParams.set("client_id", config.clientId);
1448
+ authorizationUrl.searchParams.set("redirect_uri", config.redirectUri);
1449
+ authorizationUrl.searchParams.set("response_type", "code");
1450
+ authorizationUrl.searchParams.set("scope", config.scopes.join(" "));
1451
+ authorizationUrl.searchParams.set("code_challenge", config.codeChallenge);
1452
+ authorizationUrl.searchParams.set("code_challenge_method", "S256");
1453
+ authorizationUrl.searchParams.set("state", config.state);
1454
+ if (config.resource) {
1455
+ authorizationUrl.searchParams.set("resource", config.resource);
1456
+ }
1457
+ return authorizationUrl;
1458
+ }
1459
+ async function exchangeCodeForTokens(config) {
1460
+ const client = {
1461
+ client_id: config.clientId,
1462
+ token_endpoint_auth_method: config.clientSecret ? "client_secret_basic" : "none"
1463
+ };
1464
+ const clientAuth = config.clientSecret ? oauth.ClientSecretBasic(config.clientSecret) : oauth.None();
1465
+ const callbackUrl = new URL(config.redirectUri);
1466
+ callbackUrl.searchParams.set("code", config.code);
1467
+ callbackUrl.searchParams.set("state", config.state);
1468
+ const validatedParams = oauth.validateAuthResponse(
1469
+ config.authServer.server,
1470
+ client,
1471
+ callbackUrl,
1472
+ config.state
1473
+ );
1474
+ const response = await oauth.authorizationCodeGrantRequest(
1475
+ config.authServer.server,
1476
+ client,
1477
+ clientAuth,
1478
+ validatedParams,
1479
+ config.redirectUri,
1480
+ config.codeVerifier
1481
+ );
1482
+ const result = await oauth.processAuthorizationCodeResponse(
1483
+ config.authServer.server,
1484
+ client,
1485
+ response
1486
+ );
1487
+ return {
1488
+ accessToken: result.access_token,
1489
+ tokenType: result.token_type,
1490
+ expiresIn: result.expires_in,
1491
+ refreshToken: result.refresh_token,
1492
+ scope: result.scope
1493
+ };
1494
+ }
1495
+ async function refreshAccessToken(config) {
1496
+ const client = {
1497
+ client_id: config.clientId,
1498
+ token_endpoint_auth_method: config.clientSecret ? "client_secret_basic" : "none"
1499
+ };
1500
+ const clientAuth = config.clientSecret ? oauth.ClientSecretBasic(config.clientSecret) : oauth.None();
1501
+ const response = await oauth.refreshTokenGrantRequest(
1502
+ config.authServer.server,
1503
+ client,
1504
+ clientAuth,
1505
+ config.refreshToken
1506
+ );
1507
+ if (!response.ok) {
1508
+ const contentType = response.headers.get("content-type") ?? "";
1509
+ let errorMessage = `Token refresh failed: ${response.status} ${response.statusText}`;
1510
+ try {
1511
+ if (contentType.includes("application/json")) {
1512
+ const errorBody = await response.clone().json();
1513
+ if (errorBody.error) {
1514
+ errorMessage = `Token refresh failed: ${errorBody.error}`;
1515
+ if (errorBody.error_description) {
1516
+ errorMessage += ` - ${errorBody.error_description}`;
1517
+ }
1518
+ }
1519
+ } else {
1520
+ const textBody = await response.clone().text();
1521
+ if (textBody) {
1522
+ errorMessage = `Token refresh failed: ${response.status} - ${textBody}`;
1523
+ }
1524
+ }
1525
+ } catch {
1526
+ }
1527
+ throw new Error(errorMessage);
1528
+ }
1529
+ const result = await oauth.processRefreshTokenResponse(
1530
+ config.authServer.server,
1531
+ client,
1532
+ response
1533
+ );
1534
+ return {
1535
+ accessToken: result.access_token,
1536
+ tokenType: result.token_type,
1537
+ expiresIn: result.expires_in,
1538
+ refreshToken: result.refresh_token,
1539
+ scope: result.scope
1540
+ };
1541
+ }
1542
+ async function performClientCredentialsFlow(config) {
1543
+ const tokenEndpointUrl = new URL(config.tokenEndpoint);
1544
+ const authServer = {
1545
+ issuer: tokenEndpointUrl.origin,
1546
+ token_endpoint: config.tokenEndpoint
1547
+ };
1548
+ const client = {
1549
+ client_id: config.clientId
1550
+ };
1551
+ const clientAuth = oauth.ClientSecretBasic(config.clientSecret);
1552
+ const parameters = {};
1553
+ if (config.scopes && config.scopes.length > 0) {
1554
+ parameters["scope"] = config.scopes.join(" ");
1555
+ }
1556
+ const response = await oauth.clientCredentialsGrantRequest(
1557
+ authServer,
1558
+ client,
1559
+ clientAuth,
1560
+ parameters
1561
+ );
1562
+ const result = await oauth.processClientCredentialsResponse(
1563
+ authServer,
1564
+ client,
1565
+ response
1566
+ );
1567
+ const requestedScopes = new Set(
1568
+ config.scopes && config.scopes.length > 0 ? config.scopes : []
1569
+ );
1570
+ const grantedScopes = new Set(
1571
+ (result.scope ?? "").split(" ").filter(Boolean)
1572
+ );
1573
+ const missingScopes = [...requestedScopes].filter(
1574
+ (s) => !grantedScopes.has(s)
1575
+ );
1576
+ if (missingScopes.length > 0 && requestedScopes.size > 0 && grantedScopes.size > 0) {
1577
+ debug(
1578
+ "[oauth] Warning: Token server granted fewer scopes than requested. Missing: %s",
1579
+ missingScopes.join(", ")
1580
+ );
1581
+ }
1582
+ return {
1583
+ accessToken: result.access_token,
1584
+ tokenType: result.token_type,
1585
+ expiresIn: result.expires_in,
1586
+ scope: result.scope
1587
+ };
1588
+ }
976
1589
 
977
1590
  // src/mcp/clientFactory.ts
1591
+ function getRetryAfterDelayMs(err) {
1592
+ const response = err?.response;
1593
+ const retryAfter = response?.headers?.get?.("Retry-After");
1594
+ if (retryAfter) {
1595
+ const seconds = parseInt(retryAfter, 10);
1596
+ if (!isNaN(seconds)) return seconds * 1e3;
1597
+ }
1598
+ return null;
1599
+ }
1600
+ function isRateLimitError(err) {
1601
+ const response = err?.response;
1602
+ return response?.status === 429;
1603
+ }
1604
+ function isTransientNetworkError(err) {
1605
+ if (!(err instanceof Error)) return false;
1606
+ const msg = err.message.toLowerCase();
1607
+ return msg.includes("econnreset") || msg.includes("econnrefused") || msg.includes("etimedout") || msg.includes("enotfound") || msg.includes("network") || msg.includes("socket hang up") || msg.includes("fetch failed");
1608
+ }
1609
+ function isRetryableError(err) {
1610
+ return isTransientNetworkError(err) || isRateLimitError(err);
1611
+ }
1612
+ async function retryWithBackoff(fn, maxAttempts) {
1613
+ let lastErr;
1614
+ for (let attempt = 0; attempt <= maxAttempts; attempt++) {
1615
+ try {
1616
+ return await fn();
1617
+ } catch (err) {
1618
+ lastErr = err;
1619
+ if (attempt < maxAttempts && isRetryableError(err)) {
1620
+ const retryAfterMs = getRetryAfterDelayMs(err);
1621
+ const delayMs = retryAfterMs !== null ? retryAfterMs : Math.min(1e3 * 2 ** attempt, 3e4);
1622
+ debugClient(
1623
+ "Retryable error on attempt %d/%d, retrying in %dms: %s",
1624
+ attempt + 1,
1625
+ maxAttempts + 1,
1626
+ delayMs,
1627
+ err.message
1628
+ );
1629
+ await new Promise((resolve) => setTimeout(resolve, delayMs));
1630
+ } else {
1631
+ throw err;
1632
+ }
1633
+ }
1634
+ }
1635
+ throw lastErr;
1636
+ }
1637
+ var agentRegistry = /* @__PURE__ */ new WeakMap();
978
1638
  async function createMCPClientForConfig(config, options) {
979
1639
  const validatedConfig = validateMCPConfig(config);
980
1640
  const client = new Client(
981
1641
  {
982
1642
  name: options?.clientInfo?.name ?? "@gleanwork/mcp-server-tester",
983
- version: options?.clientInfo?.version ?? "0.1.0"
1643
+ version: options?.clientInfo?.version ?? package_default.version
984
1644
  },
985
1645
  {
986
- capabilities: validatedConfig.capabilities ?? {}
1646
+ capabilities: {
1647
+ ...validatedConfig.capabilities ?? {},
1648
+ // Only advertise sampling if a handler has been registered;
1649
+ // declaring sampling capability without a handler violates the MCP spec
1650
+ sampling: options?.samplingHandler ? validatedConfig.capabilities?.sampling ?? {} : void 0
1651
+ }
987
1652
  }
988
1653
  );
989
1654
  if (isStdioConfig(validatedConfig)) {
@@ -999,26 +1664,126 @@ async function createMCPClientForConfig(config, options) {
999
1664
  args: validatedConfig.args,
1000
1665
  cwd: validatedConfig.cwd
1001
1666
  });
1002
- await client.connect(transport);
1667
+ await client.connect(
1668
+ transport,
1669
+ validatedConfig.connectTimeoutMs !== void 0 ? { timeout: validatedConfig.connectTimeoutMs } : void 0
1670
+ );
1003
1671
  } else if (isHttpConfig(validatedConfig)) {
1004
1672
  const headers = { ...validatedConfig.headers };
1673
+ if (validatedConfig.auth?.clientCredentials && !options?.authProvider) {
1674
+ const ccConfig = validatedConfig.auth.clientCredentials;
1675
+ const clientId = ccConfig.clientId ?? process.env["MCP_CLIENT_ID"];
1676
+ const clientSecret = ccConfig.clientSecret ?? process.env["MCP_CLIENT_SECRET"];
1677
+ if (!clientId || !clientSecret) {
1678
+ throw new Error(
1679
+ "Client credentials require clientId/clientSecret in config or MCP_CLIENT_ID/MCP_CLIENT_SECRET env vars"
1680
+ );
1681
+ }
1682
+ if (!ccConfig.tokenEndpoint) {
1683
+ throw new Error(
1684
+ "Client credentials require tokenEndpoint in auth.clientCredentials config"
1685
+ );
1686
+ }
1687
+ debugClient("Fetching token via client credentials grant");
1688
+ const tokenResult = await performClientCredentialsFlow({
1689
+ tokenEndpoint: ccConfig.tokenEndpoint,
1690
+ clientId,
1691
+ clientSecret,
1692
+ scopes: ccConfig.scopes
1693
+ });
1694
+ headers.Authorization = `Bearer ${tokenResult.accessToken}`;
1695
+ }
1005
1696
  if (validatedConfig.auth?.accessToken && !options?.authProvider) {
1006
1697
  headers.Authorization = `Bearer ${validatedConfig.auth.accessToken}`;
1007
1698
  }
1008
- const transport = new StreamableHTTPClientTransport(
1009
- new URL(validatedConfig.serverUrl),
1010
- {
1011
- requestInit: Object.keys(headers).length > 0 ? { headers } : void 0,
1012
- // Pass auth provider for OAuth flow - MCP SDK handles it automatically
1013
- authProvider: options?.authProvider
1699
+ const url = new URL(validatedConfig.serverUrl);
1700
+ let requestInit = Object.keys(headers).length > 0 ? { headers } : void 0;
1701
+ const proxyUrl = validatedConfig.proxy?.url ?? process.env["HTTPS_PROXY"] ?? process.env["HTTP_PROXY"];
1702
+ if (proxyUrl) {
1703
+ const proxyAgent = new ProxyAgent(proxyUrl);
1704
+ try {
1705
+ const sanitized = new URL(proxyUrl);
1706
+ debugClient(
1707
+ "Using proxy: %s://%s:%s",
1708
+ sanitized.protocol.slice(0, -1),
1709
+ sanitized.hostname,
1710
+ sanitized.port
1711
+ );
1712
+ } catch {
1713
+ debugClient("Using proxy (unparseable URL)");
1014
1714
  }
1015
- );
1715
+ requestInit = {
1716
+ ...requestInit,
1717
+ dispatcher: proxyAgent
1718
+ };
1719
+ }
1720
+ if (validatedConfig.tls) {
1721
+ const tlsCfg = validatedConfig.tls;
1722
+ try {
1723
+ const dispatcher = new Agent({
1724
+ connect: {
1725
+ ...tlsCfg.ca && { ca: readFileSync(tlsCfg.ca) },
1726
+ ...tlsCfg.cert && { cert: readFileSync(tlsCfg.cert) },
1727
+ ...tlsCfg.key && { key: readFileSync(tlsCfg.key) },
1728
+ rejectUnauthorized: tlsCfg.rejectUnauthorized ?? true
1729
+ }
1730
+ });
1731
+ agentRegistry.set(client, dispatcher);
1732
+ requestInit = {
1733
+ ...requestInit,
1734
+ dispatcher
1735
+ };
1736
+ debugClient("TLS configuration applied");
1737
+ } catch (error) {
1738
+ const filePath = tlsCfg.ca ?? tlsCfg.cert ?? tlsCfg.key;
1739
+ const fileType = tlsCfg.ca ? "CA certificate" : tlsCfg.cert ? "client certificate" : "client key";
1740
+ throw new Error(
1741
+ `Failed to load TLS ${fileType} from ${filePath}: ${error instanceof Error ? error.message : String(error)}`
1742
+ );
1743
+ }
1744
+ } else if (proxyUrl) {
1745
+ const existingDispatcher = requestInit?.dispatcher;
1746
+ if (existingDispatcher) {
1747
+ agentRegistry.set(client, existingDispatcher);
1748
+ }
1749
+ }
1016
1750
  debugClient("Connecting via HTTP: %O", {
1017
1751
  serverUrl: validatedConfig.serverUrl,
1018
1752
  headers: Object.keys(headers).length > 0 ? Object.keys(headers) : void 0,
1019
1753
  hasAuthProvider: !!options?.authProvider
1020
1754
  });
1021
- await client.connect(transport);
1755
+ debugHttp("Connecting to %s", validatedConfig.serverUrl);
1756
+ if (Object.keys(headers).length > 0) {
1757
+ debugHttp("Request header names: %O", Object.keys(headers));
1758
+ }
1759
+ const retryAttempts = validatedConfig.retryAttempts ?? 0;
1760
+ const connectOptions = validatedConfig.connectTimeoutMs !== void 0 ? { timeout: validatedConfig.connectTimeoutMs } : void 0;
1761
+ await retryWithBackoff(async () => {
1762
+ try {
1763
+ debugHttp("Attempting transport: streamableHttp");
1764
+ const streamableTransport = new StreamableHTTPClientTransport(url, {
1765
+ requestInit,
1766
+ authProvider: options?.authProvider
1767
+ });
1768
+ await client.connect(streamableTransport, connectOptions);
1769
+ debugClient("Connected via Streamable HTTP");
1770
+ debugHttp("Connection established via streamableHttp");
1771
+ } catch (err) {
1772
+ debugHttp(
1773
+ "streamableHttp failed (%s), falling back to SSE",
1774
+ err.message
1775
+ );
1776
+ debugClient("Streamable HTTP failed, falling back to SSE transport");
1777
+ debugHttp("Attempting transport: sse");
1778
+ const sseTransport = new SSEClientTransport(url, {
1779
+ requestInit,
1780
+ authProvider: options?.authProvider
1781
+ });
1782
+ await client.connect(sseTransport, connectOptions);
1783
+ debugClient("Connected via SSE");
1784
+ debugHttp("Connection established via sse");
1785
+ }
1786
+ }, retryAttempts);
1022
1787
  }
1023
1788
  debugClient("Connected successfully");
1024
1789
  const serverInfo = client.getServerVersion();
@@ -1033,10 +1798,38 @@ async function closeMCPClient(client) {
1033
1798
  } catch (error) {
1034
1799
  console.error("[MCP] Error closing client:", error);
1035
1800
  throw error;
1801
+ } finally {
1802
+ const agent = agentRegistry.get(client);
1803
+ if (agent) {
1804
+ agentRegistry.delete(client);
1805
+ try {
1806
+ await agent.close();
1807
+ } catch (agentError) {
1808
+ debugClient(
1809
+ "Error closing undici agent: %s",
1810
+ agentError.message
1811
+ );
1812
+ }
1813
+ }
1036
1814
  }
1037
1815
  }
1038
1816
 
1039
1817
  // src/mcp/fixtures/mcpFixture.ts
1818
+ var DEFAULT_CALL_TIMEOUT_MS = 3e4;
1819
+ function withCallTimeout(promise, ms, opName) {
1820
+ let timer;
1821
+ return Promise.race([
1822
+ promise,
1823
+ new Promise((_, reject) => {
1824
+ timer = setTimeout(
1825
+ () => reject(
1826
+ new Error(`MCP operation "${opName}" timed out after ${ms}ms`)
1827
+ ),
1828
+ ms
1829
+ );
1830
+ })
1831
+ ]).finally(() => clearTimeout(timer));
1832
+ }
1040
1833
  var testStep = null;
1041
1834
  try {
1042
1835
  const playwright = __require("@playwright/test");
@@ -1048,20 +1841,29 @@ try {
1048
1841
  function createMCPFixture(client, testInfo, options) {
1049
1842
  const authType = options?.authType ?? "none";
1050
1843
  const project = options?.project;
1844
+ const callTimeout = options?.callTimeoutMs ?? DEFAULT_CALL_TIMEOUT_MS;
1051
1845
  if (!testInfo) {
1052
1846
  return {
1053
1847
  client,
1054
1848
  authType,
1055
1849
  project,
1056
1850
  async listTools() {
1057
- const result = await client.listTools();
1851
+ const result = await withCallTimeout(
1852
+ client.listTools(),
1853
+ callTimeout,
1854
+ "listTools"
1855
+ );
1058
1856
  return result.tools;
1059
1857
  },
1060
1858
  async callTool(name, args) {
1061
- const result = await client.callTool({
1062
- name,
1063
- arguments: args
1064
- });
1859
+ const result = await withCallTimeout(
1860
+ client.callTool({
1861
+ name,
1862
+ arguments: args
1863
+ }),
1864
+ callTimeout,
1865
+ `callTool("${name}")`
1866
+ );
1065
1867
  return result;
1066
1868
  },
1067
1869
  getServerInfo() {
@@ -1082,7 +1884,11 @@ function createMCPFixture(client, testInfo, options) {
1082
1884
  project,
1083
1885
  async listTools() {
1084
1886
  const execute = async () => {
1085
- const result = await client.listTools();
1887
+ const result = await withCallTimeout(
1888
+ client.listTools(),
1889
+ callTimeout,
1890
+ "listTools"
1891
+ );
1086
1892
  const tools = result.tools;
1087
1893
  await testInfo.attach("mcp-list-tools", {
1088
1894
  contentType: "application/json",
@@ -1106,10 +1912,14 @@ function createMCPFixture(client, testInfo, options) {
1106
1912
  async callTool(name, args) {
1107
1913
  const execute = async () => {
1108
1914
  const startTime = Date.now();
1109
- const result = await client.callTool({
1110
- name,
1111
- arguments: args
1112
- });
1915
+ const result = await withCallTimeout(
1916
+ client.callTool({
1917
+ name,
1918
+ arguments: args
1919
+ }),
1920
+ callTimeout,
1921
+ `callTool("${name}")`
1922
+ );
1113
1923
  const durationMs = Date.now() - startTime;
1114
1924
  await testInfo.attach(`mcp-call-${name}`, {
1115
1925
  contentType: "application/json",
@@ -1148,7 +1958,8 @@ function createMCPFixture(client, testInfo, options) {
1148
1958
  null,
1149
1959
  2
1150
1960
  )
1151
- }).catch(() => {
1961
+ }).catch((err) => {
1962
+ console.error("[MCPFixture] Failed to attach server info:", err);
1152
1963
  });
1153
1964
  return result;
1154
1965
  }
@@ -1242,6 +2053,9 @@ var PlaywrightOAuthClientProvider = class {
1242
2053
  }
1243
2054
  /**
1244
2055
  * Stores new OAuth tokens for the current session
2056
+ *
2057
+ * The code verifier is cleared after a successful token exchange — it is
2058
+ * single-use per PKCE spec and must not persist beyond the exchange.
1245
2059
  */
1246
2060
  async saveTokens(tokens) {
1247
2061
  const state = await this.loadState() ?? this.createEmptyState();
@@ -1251,6 +2065,7 @@ var PlaywrightOAuthClientProvider = class {
1251
2065
  refreshToken: tokens.refresh_token,
1252
2066
  expiresAt: tokens.expires_in ? Date.now() + tokens.expires_in * 1e3 : void 0
1253
2067
  };
2068
+ delete state.codeVerifier;
1254
2069
  await this.saveState(state);
1255
2070
  }
1256
2071
  /**
@@ -1330,11 +2145,11 @@ In a testing context, use performOAuthSetup() in your Playwright globalSetup to
1330
2145
  state.savedAt = Date.now();
1331
2146
  this.cachedState = state;
1332
2147
  const dir = path2.dirname(this.config.storagePath);
1333
- await fs2.mkdir(dir, { recursive: true });
2148
+ await fs2.mkdir(dir, { recursive: true, mode: 448 });
1334
2149
  await fs2.writeFile(
1335
2150
  this.config.storagePath,
1336
2151
  JSON.stringify(state, null, 2),
1337
- "utf-8"
2152
+ { encoding: "utf-8", mode: 384 }
1338
2153
  );
1339
2154
  }
1340
2155
  async deleteState() {
@@ -1364,119 +2179,27 @@ In a testing context, use performOAuthSetup() in your Playwright globalSetup to
1364
2179
  return result;
1365
2180
  }
1366
2181
  };
1367
- async function generatePKCE() {
1368
- const codeVerifier = oauth.generateRandomCodeVerifier();
1369
- const codeChallenge = await oauth.calculatePKCECodeChallenge(codeVerifier);
1370
- return {
1371
- codeVerifier,
1372
- codeChallenge
1373
- };
1374
- }
1375
- function generateState() {
1376
- return oauth.generateRandomState();
1377
- }
1378
- function buildAuthorizationUrl(config) {
1379
- const authorizationEndpoint = config.authServer.server.authorization_endpoint;
1380
- if (!authorizationEndpoint) {
1381
- throw new Error(
1382
- "Authorization server does not have an authorization_endpoint"
1383
- );
1384
- }
1385
- const authorizationUrl = new URL(authorizationEndpoint);
1386
- authorizationUrl.searchParams.set("client_id", config.clientId);
1387
- authorizationUrl.searchParams.set("redirect_uri", config.redirectUri);
1388
- authorizationUrl.searchParams.set("response_type", "code");
1389
- authorizationUrl.searchParams.set("scope", config.scopes.join(" "));
1390
- authorizationUrl.searchParams.set("code_challenge", config.codeChallenge);
1391
- authorizationUrl.searchParams.set("code_challenge_method", "S256");
1392
- authorizationUrl.searchParams.set("state", config.state);
1393
- if (config.resource) {
1394
- authorizationUrl.searchParams.set("resource", config.resource);
2182
+ function isLocalhostUrl(url) {
2183
+ try {
2184
+ const parsed = new URL(url);
2185
+ const h = parsed.hostname;
2186
+ return h === "localhost" || h === "127.0.0.1" || h === "::1";
2187
+ } catch {
2188
+ return false;
1395
2189
  }
1396
- return authorizationUrl;
1397
- }
1398
- async function exchangeCodeForTokens(config) {
1399
- const client = {
1400
- client_id: config.clientId,
1401
- token_endpoint_auth_method: config.clientSecret ? "client_secret_basic" : "none"
1402
- };
1403
- const clientAuth = config.clientSecret ? oauth.ClientSecretBasic(config.clientSecret) : oauth.None();
1404
- const callbackUrl = new URL(config.redirectUri);
1405
- callbackUrl.searchParams.set("code", config.code);
1406
- callbackUrl.searchParams.set("state", config.state);
1407
- const validatedParams = oauth.validateAuthResponse(
1408
- config.authServer.server,
1409
- client,
1410
- callbackUrl,
1411
- config.state
1412
- );
1413
- const response = await oauth.authorizationCodeGrantRequest(
1414
- config.authServer.server,
1415
- client,
1416
- clientAuth,
1417
- validatedParams,
1418
- config.redirectUri,
1419
- config.codeVerifier
1420
- );
1421
- const result = await oauth.processAuthorizationCodeResponse(
1422
- config.authServer.server,
1423
- client,
1424
- response
1425
- );
1426
- return {
1427
- accessToken: result.access_token,
1428
- tokenType: result.token_type,
1429
- expiresIn: result.expires_in,
1430
- refreshToken: result.refresh_token,
1431
- scope: result.scope
1432
- };
1433
2190
  }
1434
- async function refreshAccessToken(config) {
1435
- const client = {
1436
- client_id: config.clientId,
1437
- token_endpoint_auth_method: config.clientSecret ? "client_secret_basic" : "none"
1438
- };
1439
- const clientAuth = config.clientSecret ? oauth.ClientSecretBasic(config.clientSecret) : oauth.None();
1440
- const response = await oauth.refreshTokenGrantRequest(
1441
- config.authServer.server,
1442
- client,
1443
- clientAuth,
1444
- config.refreshToken
1445
- );
1446
- if (!response.ok) {
1447
- const contentType = response.headers.get("content-type") ?? "";
1448
- let errorMessage = `Token refresh failed: ${response.status} ${response.statusText}`;
1449
- try {
1450
- if (contentType.includes("application/json")) {
1451
- const errorBody = await response.clone().json();
1452
- if (errorBody.error) {
1453
- errorMessage = `Token refresh failed: ${errorBody.error}`;
1454
- if (errorBody.error_description) {
1455
- errorMessage += ` - ${errorBody.error_description}`;
1456
- }
1457
- }
1458
- } else {
1459
- const textBody = await response.clone().text();
1460
- if (textBody) {
1461
- errorMessage = `Token refresh failed: ${response.status} - ${textBody}`;
1462
- }
1463
- }
1464
- } catch {
2191
+ function validateAuthServerEndpoints(authServer) {
2192
+ const endpoints = [
2193
+ { name: "authorization_endpoint", url: authServer.authorization_endpoint },
2194
+ { name: "token_endpoint", url: authServer.token_endpoint }
2195
+ ];
2196
+ for (const { name, url } of endpoints) {
2197
+ if (url && !url.startsWith("https://") && !isLocalhostUrl(url)) {
2198
+ throw new Error(
2199
+ `OAuth discovery returned an insecure ${name}: "${url}". Only HTTPS endpoints are permitted for OAuth flows to prevent token interception.`
2200
+ );
1465
2201
  }
1466
- throw new Error(errorMessage);
1467
2202
  }
1468
- const result = await oauth.processRefreshTokenResponse(
1469
- config.authServer.server,
1470
- client,
1471
- response
1472
- );
1473
- return {
1474
- accessToken: result.access_token,
1475
- tokenType: result.token_type,
1476
- expiresIn: result.expires_in,
1477
- refreshToken: result.refresh_token,
1478
- scope: result.scope
1479
- };
1480
2203
  }
1481
2204
  var MCP_PROTOCOL_VERSION = "2025-06-18";
1482
2205
  async function discoverProtectedResource(mcpServerUrl) {
@@ -1546,6 +2269,7 @@ async function discoverAuthorizationServer(authServerUrl) {
1546
2269
  })
1547
2270
  });
1548
2271
  const metadata = await oauth.processDiscoveryResponse(issuer, response);
2272
+ validateAuthServerEndpoints(metadata);
1549
2273
  return {
1550
2274
  server: metadata,
1551
2275
  issuer: authServerUrl
@@ -1691,7 +2415,7 @@ var FileOAuthStorage = class {
1691
2415
  };
1692
2416
 
1693
2417
  // src/auth/cli.ts
1694
- var debug = createDebug("mcp-server-tester:cli-oauth");
2418
+ var debug2 = createDebug("mcp-server-tester:cli-oauth");
1695
2419
  var DEFAULT_TIMEOUT_MS = 3e5;
1696
2420
  var DEFAULT_CLIENT_NAME = "@gleanwork/mcp-server-tester";
1697
2421
  var DEFAULT_METADATA_TTL_MS = 24 * 60 * 60 * 1e3;
@@ -1717,7 +2441,7 @@ var CLIOAuthClient = class {
1717
2441
  async getAccessToken() {
1718
2442
  const envTokens = loadTokensFromEnv();
1719
2443
  if (envTokens) {
1720
- debug("Using tokens from environment variables");
2444
+ debug2("Using tokens from environment variables");
1721
2445
  return {
1722
2446
  accessToken: envTokens.accessToken,
1723
2447
  tokenType: envTokens.tokenType,
@@ -1730,7 +2454,7 @@ var CLIOAuthClient = class {
1730
2454
  if (storedTokens?.accessToken) {
1731
2455
  const isValid = await this.storage.hasValidToken();
1732
2456
  if (isValid) {
1733
- debug("Using cached tokens from storage");
2457
+ debug2("Using cached tokens from storage");
1734
2458
  return {
1735
2459
  accessToken: storedTokens.accessToken,
1736
2460
  tokenType: storedTokens.tokenType,
@@ -1740,7 +2464,7 @@ var CLIOAuthClient = class {
1740
2464
  };
1741
2465
  }
1742
2466
  if (storedTokens.refreshToken) {
1743
- debug("Token expired, attempting refresh");
2467
+ debug2("Token expired, attempting refresh");
1744
2468
  try {
1745
2469
  const refreshedTokens = await this.refreshStoredToken(storedTokens);
1746
2470
  return {
@@ -1751,11 +2475,11 @@ var CLIOAuthClient = class {
1751
2475
  fromEnv: false
1752
2476
  };
1753
2477
  } catch (error) {
1754
- debug("Token refresh failed, will re-authenticate:", error);
2478
+ debug2("Token refresh failed, will re-authenticate:", error);
1755
2479
  }
1756
2480
  }
1757
2481
  }
1758
- debug("Performing full OAuth authentication");
2482
+ debug2("Performing full OAuth authentication");
1759
2483
  return this.authenticate();
1760
2484
  }
1761
2485
  /**
@@ -1771,7 +2495,7 @@ var CLIOAuthClient = class {
1771
2495
  async tryGetAccessToken() {
1772
2496
  const envTokens = loadTokensFromEnv();
1773
2497
  if (envTokens) {
1774
- debug("Using tokens from environment variables");
2498
+ debug2("Using tokens from environment variables");
1775
2499
  return {
1776
2500
  accessToken: envTokens.accessToken,
1777
2501
  tokenType: envTokens.tokenType,
@@ -1784,7 +2508,7 @@ var CLIOAuthClient = class {
1784
2508
  if (storedTokens?.accessToken) {
1785
2509
  const isValid = await this.storage.hasValidToken();
1786
2510
  if (isValid) {
1787
- debug("Using cached tokens from storage");
2511
+ debug2("Using cached tokens from storage");
1788
2512
  return {
1789
2513
  accessToken: storedTokens.accessToken,
1790
2514
  tokenType: storedTokens.tokenType,
@@ -1794,7 +2518,7 @@ var CLIOAuthClient = class {
1794
2518
  };
1795
2519
  }
1796
2520
  if (storedTokens.refreshToken) {
1797
- debug("Token expired, attempting refresh");
2521
+ debug2("Token expired, attempting refresh");
1798
2522
  try {
1799
2523
  const refreshedTokens = await this.refreshStoredToken(storedTokens);
1800
2524
  return {
@@ -1805,12 +2529,12 @@ var CLIOAuthClient = class {
1805
2529
  fromEnv: false
1806
2530
  };
1807
2531
  } catch (error) {
1808
- debug("Token refresh failed:", error);
2532
+ debug2("Token refresh failed:", error);
1809
2533
  return null;
1810
2534
  }
1811
2535
  }
1812
2536
  }
1813
- debug("No valid token available");
2537
+ debug2("No valid token available");
1814
2538
  return null;
1815
2539
  }
1816
2540
  /**
@@ -1845,7 +2569,7 @@ var CLIOAuthClient = class {
1845
2569
  */
1846
2570
  async clearCredentials() {
1847
2571
  await this.storage.deleteTokens();
1848
- debug("Cleared stored credentials");
2572
+ debug2("Cleared stored credentials");
1849
2573
  }
1850
2574
  /**
1851
2575
  * Discover protected resource and authorization server
@@ -1855,12 +2579,12 @@ var CLIOAuthClient = class {
1855
2579
  if (cachedMetadata) {
1856
2580
  const age = Date.now() - cachedMetadata.discoveredAt;
1857
2581
  if (age < DEFAULT_METADATA_TTL_MS) {
1858
- debug("Using cached server metadata (age: %dms)", age);
1859
- debug(
2582
+ debug2("Using cached server metadata (age: %dms)", age);
2583
+ debug2(
1860
2584
  "Cached protected resource scopes: %O",
1861
2585
  cachedMetadata.protectedResource.scopes_supported
1862
2586
  );
1863
- debug(
2587
+ debug2(
1864
2588
  "Cached auth server scopes: %O",
1865
2589
  cachedMetadata.authServer.server.scopes_supported
1866
2590
  );
@@ -1869,12 +2593,12 @@ var CLIOAuthClient = class {
1869
2593
  authServer: cachedMetadata.authServer
1870
2594
  };
1871
2595
  }
1872
- debug("Cached server metadata is stale (age: %dms), re-discovering", age);
2596
+ debug2("Cached server metadata is stale (age: %dms), re-discovering", age);
1873
2597
  }
1874
- debug("Discovering protected resource:", this.config.mcpServerUrl);
2598
+ debug2("Discovering protected resource:", this.config.mcpServerUrl);
1875
2599
  const prResult = await discoverProtectedResource(this.config.mcpServerUrl);
1876
- debug("Found protected resource:", prResult.metadata.resource);
1877
- debug(
2600
+ debug2("Found protected resource:", prResult.metadata.resource);
2601
+ debug2(
1878
2602
  "Protected resource scopes_supported: %O",
1879
2603
  prResult.metadata.scopes_supported
1880
2604
  );
@@ -1884,10 +2608,10 @@ var CLIOAuthClient = class {
1884
2608
  "No authorization servers found in protected resource metadata"
1885
2609
  );
1886
2610
  }
1887
- debug("Discovering authorization server:", authServerUrl);
2611
+ debug2("Discovering authorization server:", authServerUrl);
1888
2612
  const authServer = await discoverAuthorizationServer(authServerUrl);
1889
- debug("Found authorization server:", authServer.issuer);
1890
- debug(
2613
+ debug2("Found authorization server:", authServer.issuer);
2614
+ debug2(
1891
2615
  "Auth server scopes_supported: %O",
1892
2616
  authServer.server.scopes_supported
1893
2617
  );
@@ -1907,7 +2631,7 @@ var CLIOAuthClient = class {
1907
2631
  */
1908
2632
  async getOrRegisterClient(authServer) {
1909
2633
  if (this.config.clientId) {
1910
- debug("Using pre-configured client ID");
2634
+ debug2("Using pre-configured client ID");
1911
2635
  return {
1912
2636
  clientId: this.config.clientId,
1913
2637
  clientSecret: this.config.clientSecret
@@ -1915,10 +2639,10 @@ var CLIOAuthClient = class {
1915
2639
  }
1916
2640
  const cachedClient = await this.storage.loadClient();
1917
2641
  if (cachedClient?.clientId) {
1918
- debug("Using cached client registration");
2642
+ debug2("Using cached client registration");
1919
2643
  return cachedClient;
1920
2644
  }
1921
- debug("Registering new client via DCR");
2645
+ debug2("Registering new client via DCR");
1922
2646
  const client = await this.registerClient(authServer);
1923
2647
  await this.storage.saveClient(client);
1924
2648
  return client;
@@ -1956,7 +2680,7 @@ ${errorText}`
1956
2680
  );
1957
2681
  }
1958
2682
  const data = await response.json();
1959
- debug("Client registered:", data.client_id);
2683
+ debug2("Client registered:", data.client_id);
1960
2684
  return {
1961
2685
  clientId: data.client_id,
1962
2686
  clientSecret: data.client_secret,
@@ -1974,17 +2698,17 @@ ${errorText}`
1974
2698
  const redirectUri = `http://127.0.0.1:${port}/callback`;
1975
2699
  try {
1976
2700
  const requestedScopes = this.config.scopes ?? protectedResource.scopes_supported ?? authServer.server.scopes_supported ?? ["openid"];
1977
- debug("Scope resolution:");
1978
- debug(" - User config scopes: %O", this.config.scopes);
1979
- debug(
2701
+ debug2("Scope resolution:");
2702
+ debug2(" - User config scopes: %O", this.config.scopes);
2703
+ debug2(
1980
2704
  " - Protected resource scopes_supported: %O",
1981
2705
  protectedResource.scopes_supported
1982
2706
  );
1983
- debug(
2707
+ debug2(
1984
2708
  " - Auth server scopes_supported: %O",
1985
2709
  authServer.server.scopes_supported
1986
2710
  );
1987
- debug(" - Final requested scopes: %O", requestedScopes);
2711
+ debug2(" - Final requested scopes: %O", requestedScopes);
1988
2712
  const authUrl = buildAuthorizationUrl({
1989
2713
  authServer,
1990
2714
  clientId: client.clientId,
@@ -1994,16 +2718,19 @@ ${errorText}`
1994
2718
  state,
1995
2719
  resource: protectedResource.resource
1996
2720
  });
1997
- debug("Authorization URL: %s", authUrl.toString());
1998
- debug("Authorization URL params:");
1999
- debug(" - client_id: %s", authUrl.searchParams.get("client_id"));
2000
- debug(" - redirect_uri: %s", authUrl.searchParams.get("redirect_uri"));
2001
- debug(" - scope: %s", authUrl.searchParams.get("scope"));
2002
- debug(" - resource: %s", authUrl.searchParams.get("resource"));
2721
+ debug2(
2722
+ "Authorization URL (base): %s",
2723
+ `${authUrl.origin}${authUrl.pathname}`
2724
+ );
2725
+ debug2("Authorization URL params:");
2726
+ debug2(" - client_id: %s", authUrl.searchParams.get("client_id"));
2727
+ debug2(" - redirect_uri: %s", authUrl.searchParams.get("redirect_uri"));
2728
+ debug2(" - scope: %s", authUrl.searchParams.get("scope"));
2729
+ debug2(" - resource: %s", authUrl.searchParams.get("resource"));
2003
2730
  await this.openBrowserOrPrintUrl(authUrl);
2004
- debug("Waiting for OAuth callback...");
2731
+ debug2("Waiting for OAuth callback...");
2005
2732
  const code = await codePromise;
2006
- debug("Received authorization code");
2733
+ debug2("Received authorization code");
2007
2734
  const tokenResult = await exchangeCodeForTokens({
2008
2735
  authServer,
2009
2736
  clientId: client.clientId,
@@ -2041,14 +2768,14 @@ ${errorText}`
2041
2768
  let clientId;
2042
2769
  let clientSecret;
2043
2770
  if (storedTokens.clientId) {
2044
- debug("Using clientId from stored tokens for refresh");
2771
+ debug2("Using clientId from stored tokens for refresh");
2045
2772
  clientId = storedTokens.clientId;
2046
2773
  const storedClient = await this.storage.loadClient();
2047
2774
  if (storedClient?.clientId === clientId) {
2048
2775
  clientSecret = storedClient.clientSecret;
2049
2776
  }
2050
2777
  } else {
2051
- debug(
2778
+ debug2(
2052
2779
  "No clientId in stored tokens, falling back to stored client (legacy behavior)"
2053
2780
  );
2054
2781
  const client = await this.getOrRegisterClient(metadata.authServer);
@@ -2142,7 +2869,7 @@ ${errorText}`
2142
2869
  const preferredPort = this.config.callbackPort ?? 0;
2143
2870
  server.listen(preferredPort, "127.0.0.1", () => {
2144
2871
  const address = server.address();
2145
- debug("Callback server listening on port", address.port);
2872
+ debug2("Callback server listening on port", address.port);
2146
2873
  resolve({ port: address.port, codePromise, close: forceClose });
2147
2874
  });
2148
2875
  server.on("error", (err) => {
@@ -2166,9 +2893,9 @@ ${errorText}`
2166
2893
  try {
2167
2894
  const open = await import('open');
2168
2895
  await open.default(url.toString());
2169
- debug("Opened browser for authentication");
2896
+ debug2("Opened browser for authentication");
2170
2897
  } catch (error) {
2171
- debug("Failed to open browser:", error);
2898
+ debug2("Failed to open browser:", error);
2172
2899
  console.log("\nFailed to open browser automatically.");
2173
2900
  console.log("Please open the following URL manually:\n");
2174
2901
  console.log(url.toString() + "\n");
@@ -2314,30 +3041,31 @@ var test = test$1.extend({
2314
3041
  );
2315
3042
  }
2316
3043
  let resolvedAuthType = "none";
3044
+ const httpConfig = isHttpConfig(mcpConfig) ? mcpConfig : null;
2317
3045
  let authProvider;
2318
- if (mcpConfig.auth?.oauth?.authStatePath) {
3046
+ if (httpConfig?.auth?.oauth?.authStatePath) {
2319
3047
  authProvider = new PlaywrightOAuthClientProvider({
2320
- storagePath: mcpConfig.auth.oauth.authStatePath,
2321
- redirectUri: mcpConfig.auth.oauth.redirectUri ?? "http://localhost:3000/oauth/callback",
2322
- clientId: mcpConfig.auth.oauth.clientId,
2323
- clientSecret: mcpConfig.auth.oauth.clientSecret
3048
+ storagePath: httpConfig.auth.oauth.authStatePath,
3049
+ redirectUri: httpConfig.auth.oauth.redirectUri ?? "http://localhost:3000/oauth/callback",
3050
+ clientId: httpConfig.auth.oauth.clientId,
3051
+ clientSecret: httpConfig.auth.oauth.clientSecret
2324
3052
  });
2325
3053
  resolvedAuthType = "oauth";
2326
3054
  }
2327
3055
  let effectiveConfig = mcpConfig;
2328
- if (mcpConfig.auth?.accessToken) {
3056
+ if (httpConfig?.auth?.accessToken) {
2329
3057
  resolvedAuthType = "api-token";
2330
3058
  }
2331
- if (isHttpConfig(mcpConfig) && !mcpConfig.auth?.accessToken && !mcpConfig.auth?.oauth?.authStatePath) {
3059
+ if (httpConfig && !httpConfig.auth?.accessToken && !httpConfig.auth?.oauth?.authStatePath) {
2332
3060
  const cliClient = new CLIOAuthClient({
2333
- mcpServerUrl: mcpConfig.serverUrl
3061
+ mcpServerUrl: httpConfig.serverUrl
2334
3062
  });
2335
3063
  const tokenResult = await cliClient.tryGetAccessToken();
2336
3064
  if (tokenResult) {
2337
3065
  effectiveConfig = {
2338
- ...mcpConfig,
3066
+ ...httpConfig,
2339
3067
  auth: {
2340
- ...mcpConfig.auth,
3068
+ ...httpConfig.auth,
2341
3069
  accessToken: tokenResult.accessToken
2342
3070
  }
2343
3071
  };
@@ -2348,7 +3076,7 @@ var test = test$1.extend({
2348
3076
  const client = await createMCPClientForConfig(effectiveConfig, {
2349
3077
  clientInfo: {
2350
3078
  name: "@gleanwork/mcp-server-tester",
2351
- version: "0.1.0"
3079
+ version: package_default.version
2352
3080
  },
2353
3081
  authProvider
2354
3082
  });
@@ -2365,9 +3093,11 @@ var test = test$1.extend({
2365
3093
  * Automatically tracks all MCP operations for the reporter
2366
3094
  */
2367
3095
  mcp: async ({ mcpClient, _mcpFixtureState }, use, testInfo) => {
3096
+ const useConfig = testInfo.project.use;
2368
3097
  const api = createMCPFixture(mcpClient, testInfo, {
2369
3098
  authType: _mcpFixtureState.resolvedAuthType,
2370
- project: testInfo.project.name
3099
+ project: testInfo.project.name,
3100
+ callTimeoutMs: useConfig.mcpConfig?.callTimeoutMs
2371
3101
  });
2372
3102
  await use(api);
2373
3103
  }