@gleanwork/mcp-server-tester 0.12.0 → 1.0.0-beta.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,14 +1,17 @@
1
1
  import { expect as expect$1, test as test$1 } from '@playwright/test';
2
2
  import { query } from '@anthropic-ai/claude-agent-sdk';
3
+ import { z } from 'zod';
3
4
  import { Client } from '@modelcontextprotocol/sdk/client/index.js';
4
5
  import { StdioClientTransport } from '@modelcontextprotocol/sdk/client/stdio.js';
5
6
  import { StreamableHTTPClientTransport } from '@modelcontextprotocol/sdk/client/streamableHttp.js';
6
- import { z } from 'zod';
7
+ import { SSEClientTransport } from '@modelcontextprotocol/sdk/client/sse.js';
7
8
  import createDebug from 'debug';
9
+ import { ProxyAgent, Agent } from 'undici';
10
+ import { readFileSync } from 'fs';
11
+ import * as oauth from 'oauth4webapi';
8
12
  import * as fs2 from 'fs/promises';
9
13
  import * as path2 from 'path';
10
14
  import * as http from 'http';
11
- import * as oauth from 'oauth4webapi';
12
15
  import { homedir } from 'os';
13
16
 
14
17
  var __require = /* @__PURE__ */ ((x) => typeof require !== "undefined" ? require : typeof Proxy !== "undefined" ? new Proxy(x, {
@@ -117,7 +120,7 @@ function isCallToolResult(value) {
117
120
  return false;
118
121
  }
119
122
  const v = value;
120
- return Array.isArray(v.content) || typeof v.isError === "boolean";
123
+ return Array.isArray(v.content);
121
124
  }
122
125
  function extractTextFromContentArray(content) {
123
126
  const textParts = [];
@@ -625,6 +628,30 @@ function toBeToolError(received, expected = true) {
625
628
  }
626
629
  };
627
630
  }
631
+
632
+ // src/judge/rubrics.ts
633
+ var BUILT_IN_RUBRICS = {
634
+ correctness: "Evaluate whether the response is factually correct and accurately answers the question. Compare against the reference answer if provided. Score 1.0 for fully correct with no errors; Score 0.75 for mostly correct with one minor inaccuracy or omission; Score 0.5 for partially correct \u2014 answers part of the question but misses key elements; Score 0.25 for minimally relevant but substantially incorrect or missing most key details; Score 0.0 for incorrect, irrelevant, or directly contradicting the reference.",
635
+ completeness: "Evaluate whether the response fully addresses all aspects of the question. Score 1.0 if the response covers all key points comprehensively; Score 0.75 if the response covers most key points with one minor gap; Score 0.5 if the response partially answers \u2014 covers some aspects but misses others; Score 0.25 if the response touches on the topic but misses most key aspects; Score 0.0 if major aspects of the question are entirely missing or the response is off-topic.",
636
+ groundedness: "Evaluate whether all claims in the response are supported by the retrieved context or reference. Penalize unsupported assertions or hallucinated facts. Score 1.0 for fully grounded \u2014 every claim is traceable to the provided context; Score 0.75 for mostly grounded with one minor unsupported detail; Score 0.5 for partially grounded \u2014 some claims are supported but notable hallucinations are present; Score 0.25 for minimally grounded \u2014 most claims are unsupported or invented; Score 0.0 for completely hallucinated or contradicting the provided context.",
637
+ "instruction-following": "Evaluate whether the response follows the instructions given in the question. Check format, tone, constraints, and task completion. Score 1.0 for full compliance \u2014 all instructions are followed precisely; Score 0.75 for mostly compliant with one minor deviation from the instructions; Score 0.5 for partial compliance \u2014 some instructions followed but key constraints violated; Score 0.25 for minimal compliance \u2014 the response loosely addresses the task but ignores most instructions; Score 0.0 for non-compliance \u2014 the response disregards the instructions entirely.",
638
+ conciseness: "Evaluate whether the response is appropriately concise without losing important information. Penalize unnecessary verbosity, padding, or repetition. Score 1.0 for well-sized \u2014 concise and complete with no unnecessary content; Score 0.75 for slightly verbose but no information is lost or repeated; Score 0.5 for moderately verbose \u2014 some padding or repetition that reduces clarity; Score 0.25 for excessively verbose \u2014 significantly overlong with substantial filler or repetition; Score 0.0 for extremely verbose \u2014 so padded or repetitive that the core answer is obscured."
639
+ };
640
+ function resolveRubric(rubric) {
641
+ if (typeof rubric === "string") {
642
+ return BUILT_IN_RUBRICS[rubric];
643
+ }
644
+ return rubric.text;
645
+ }
646
+
647
+ // src/judge/judgeTypes.ts
648
+ var JudgeResponseSchema = z.object({
649
+ pass: z.boolean(),
650
+ score: z.number().min(0).max(1),
651
+ reasoning: z.string()
652
+ });
653
+
654
+ // src/judge/claudeAgentJudge.ts
628
655
  function createClaudeAgentJudge(config) {
629
656
  const model = config.model ?? "claude-sonnet-4-20250514";
630
657
  const maxBudgetUsd = config.maxBudgetUsd ?? 0.1;
@@ -685,7 +712,7 @@ function createClaudeAgentJudge(config) {
685
712
  cacheCreationInputTokens: resultMessage.usage?.cache_creation_input_tokens
686
713
  };
687
714
  return {
688
- pass: parsed.pass ?? false,
715
+ pass: parsed.pass,
689
716
  score: parsed.score,
690
717
  reasoning: parsed.reasoning,
691
718
  usage,
@@ -704,21 +731,19 @@ function buildSystemPrompt() {
704
731
  return 'You are an expert evaluator. Evaluate the candidate response based on the rubric provided. Respond ONLY with valid JSON in this exact format: {"pass": boolean, "score": number (0-1), "reasoning": string}. Do not include any other text, markdown formatting, or code blocks.';
705
732
  }
706
733
  function buildJudgePrompt(candidate, reference, rubric) {
734
+ const candidateStr = typeof candidate === "string" ? candidate : JSON.stringify(candidate, null, 2);
735
+ const referenceStr = reference !== null && reference !== void 0 ? typeof reference === "string" ? reference : JSON.stringify(reference, null, 2) : null;
707
736
  const parts = [];
708
- parts.push("# Evaluation Task\n");
737
+ parts.push("Rubric:\n");
709
738
  parts.push(rubric);
710
- parts.push("\n\n# Candidate Response\n");
711
- parts.push(
712
- typeof candidate === "string" ? candidate : JSON.stringify(candidate, null, 2)
713
- );
714
- if (reference !== null && reference !== void 0) {
715
- parts.push("\n\n# Reference Response\n");
716
- parts.push(
717
- typeof reference === "string" ? reference : JSON.stringify(reference, null, 2)
718
- );
719
- }
739
+ parts.push("\n\n<candidate_response>\n");
740
+ parts.push(candidateStr);
741
+ parts.push("\n</candidate_response>\n\n");
742
+ parts.push("<reference_answer>\n");
743
+ parts.push(referenceStr ?? "No reference provided.");
744
+ parts.push("\n</reference_answer>\n\n");
720
745
  parts.push(
721
- "\n\n# Instructions\nEvaluate the candidate response based on the rubric. " + (reference !== null && reference !== void 0 ? "Compare it against the reference response if helpful. " : "") + 'Respond with JSON containing "pass" (boolean), "score" (0-1), and "reasoning" (string).'
746
+ "Evaluate the candidate response against the rubric" + (referenceStr !== null ? ", comparing it with the reference answer if helpful" : "") + '. Return JSON: {"pass": boolean, "score": number (0-1), "reasoning": string}'
722
747
  );
723
748
  return parts.join("");
724
749
  }
@@ -734,75 +759,320 @@ function parseJudgeResponse(text) {
734
759
  jsonText = jsonText.slice(0, -3);
735
760
  }
736
761
  jsonText = jsonText.trim();
762
+ let parsed;
737
763
  try {
738
- return JSON.parse(jsonText);
764
+ parsed = JSON.parse(jsonText);
739
765
  } catch {
740
766
  const jsonMatch = jsonText.match(/\{[\s\S]*"pass"[\s\S]*\}/);
741
767
  if (jsonMatch) {
742
- return JSON.parse(jsonMatch[0]);
768
+ parsed = JSON.parse(jsonMatch[0]);
769
+ } else {
770
+ throw new Error(`Failed to parse judge response as JSON: ${text}`);
743
771
  }
772
+ }
773
+ const result = JudgeResponseSchema.safeParse(parsed);
774
+ if (!result.success) {
775
+ throw new Error(
776
+ `Judge returned invalid response. Expected {pass, score, reasoning} but got: ${jsonText.slice(0, 500)}
777
+ Validation errors: ${JSON.stringify(result.error.issues)}`
778
+ );
779
+ }
780
+ return result.data;
781
+ }
782
+
783
+ // src/judge/openaiJudge.ts
784
+ function createOpenAIJudge(config = {}) {
785
+ const apiKeyEnvVar = config.apiKeyEnvVar ?? "OPENAI_API_KEY";
786
+ const apiKey = process.env[apiKeyEnvVar];
787
+ if (!apiKey) {
788
+ throw new Error(
789
+ `OpenAI judge requires an API key. Set the ${apiKeyEnvVar} environment variable.`
790
+ );
791
+ }
792
+ const model = config.model ?? "gpt-4o";
793
+ const maxTokens = config.maxTokens ?? 1e3;
794
+ const temperature = config.temperature ?? 0;
795
+ return {
796
+ async evaluate(candidate, reference, rubric) {
797
+ let openaiModule;
798
+ try {
799
+ openaiModule = await import('openai');
800
+ } catch (err) {
801
+ throw new Error(
802
+ `OpenAI judge requires the \`openai\` package. Install it with: npm install openai
803
+ Original error: ${err instanceof Error ? err.message : String(err)}`
804
+ );
805
+ }
806
+ const client = new openaiModule.default({ apiKey });
807
+ const prompt = buildJudgePrompt2(candidate, reference, rubric);
808
+ const startTime = Date.now();
809
+ const completion = await client.chat.completions.create({
810
+ model,
811
+ max_tokens: maxTokens,
812
+ temperature,
813
+ messages: [
814
+ {
815
+ role: "system",
816
+ content: 'You are an expert evaluator. Respond with valid JSON only: {"pass": true|false, "score": 0.0-1.0, "reasoning": "explanation"}'
817
+ },
818
+ { role: "user", content: prompt }
819
+ ]
820
+ });
821
+ const durationMs = Date.now() - startTime;
822
+ const text = completion.choices[0]?.message.content ?? "";
823
+ const parsed = parseJudgeResponse2(text);
824
+ return {
825
+ pass: parsed.pass,
826
+ score: parsed.score,
827
+ reasoning: parsed.reasoning,
828
+ usage: {
829
+ inputTokens: completion.usage?.prompt_tokens ?? 0,
830
+ outputTokens: completion.usage?.completion_tokens ?? 0,
831
+ totalCostUsd: 0,
832
+ durationMs
833
+ }
834
+ };
835
+ }
836
+ };
837
+ }
838
+ function buildJudgePrompt2(candidate, reference, rubric) {
839
+ const candidateStr = typeof candidate === "string" ? candidate : JSON.stringify(candidate, null, 2);
840
+ const referenceStr = reference !== null && reference !== void 0 ? typeof reference === "string" ? reference : JSON.stringify(reference, null, 2) : null;
841
+ return `Rubric:
842
+ ${rubric}
843
+
844
+ <candidate_response>
845
+ ${candidateStr}
846
+ </candidate_response>
847
+
848
+ <reference_answer>
849
+ ${referenceStr ?? "No reference provided."}
850
+ </reference_answer>
851
+
852
+ Evaluate and return JSON: {"pass": boolean, "score": number (0-1), "reasoning": string}`;
853
+ }
854
+ function parseJudgeResponse2(text) {
855
+ const cleaned = text.replace(/```json\n?/g, "").replace(/```\n?/g, "").trim();
856
+ let parsed;
857
+ try {
858
+ parsed = JSON.parse(cleaned);
859
+ } catch {
744
860
  throw new Error(`Failed to parse judge response as JSON: ${text}`);
745
861
  }
862
+ const result = JudgeResponseSchema.safeParse(parsed);
863
+ if (!result.success) {
864
+ throw new Error(
865
+ `Judge returned invalid response. Expected {pass, score, reasoning} but got: ${cleaned.slice(0, 500)}
866
+ Validation errors: ${JSON.stringify(result.error.issues)}`
867
+ );
868
+ }
869
+ return result.data;
870
+ }
871
+
872
+ // src/judge/googleJudge.ts
873
+ function createGoogleJudge(config = {}) {
874
+ const apiKeyEnvVar = config.apiKeyEnvVar ?? "GOOGLE_API_KEY";
875
+ const apiKey = process.env[apiKeyEnvVar];
876
+ if (!apiKey) {
877
+ throw new Error(
878
+ `Google judge requires an API key. Set the ${apiKeyEnvVar} environment variable.`
879
+ );
880
+ }
881
+ const model = config.model ?? "gemini-2.0-flash";
882
+ const maxTokens = config.maxTokens ?? 1e3;
883
+ return {
884
+ async evaluate(candidate, reference, rubric) {
885
+ let googleModule;
886
+ try {
887
+ googleModule = await import('@google/generative-ai');
888
+ } catch (err) {
889
+ throw new Error(
890
+ `Google judge requires the \`@google/generative-ai\` package. Install it with: npm install @google/generative-ai
891
+ Original error: ${err instanceof Error ? err.message : String(err)}`
892
+ );
893
+ }
894
+ const genAI = new googleModule.GoogleGenerativeAI(apiKey);
895
+ const gemini = genAI.getGenerativeModel({
896
+ model,
897
+ generationConfig: {
898
+ maxOutputTokens: maxTokens,
899
+ temperature: 0
900
+ },
901
+ systemInstruction: 'You are an expert evaluator. Respond with valid JSON only: {"pass": true|false, "score": 0.0-1.0, "reasoning": "explanation"}'
902
+ });
903
+ const candidateStr = typeof candidate === "string" ? candidate : JSON.stringify(candidate, null, 2);
904
+ const referenceStr = reference !== null && reference !== void 0 ? typeof reference === "string" ? reference : JSON.stringify(reference, null, 2) : null;
905
+ const prompt = `Rubric:
906
+ ${rubric}
907
+
908
+ <candidate_response>
909
+ ${candidateStr}
910
+ </candidate_response>
911
+
912
+ <reference_answer>
913
+ ${referenceStr ?? "No reference provided."}
914
+ </reference_answer>
915
+
916
+ Evaluate and return JSON: {"pass": boolean, "score": number (0-1), "reasoning": string}`;
917
+ const startTime = Date.now();
918
+ const result = await gemini.generateContent(prompt);
919
+ const durationMs = Date.now() - startTime;
920
+ const text = result.response.text();
921
+ const cleaned = text.replace(/```json\n?/g, "").replace(/```\n?/g, "").trim();
922
+ let parsedRaw;
923
+ try {
924
+ parsedRaw = JSON.parse(cleaned);
925
+ } catch {
926
+ throw new Error(`Failed to parse judge response as JSON: ${text}`);
927
+ }
928
+ const validation = JudgeResponseSchema.safeParse(parsedRaw);
929
+ if (!validation.success) {
930
+ throw new Error(
931
+ `Judge returned invalid response. Expected {pass, score, reasoning} but got: ${cleaned.slice(0, 500)}
932
+ Validation errors: ${JSON.stringify(validation.error.issues)}`
933
+ );
934
+ }
935
+ const { pass, score, reasoning } = validation.data;
936
+ return {
937
+ pass,
938
+ score,
939
+ reasoning,
940
+ usage: {
941
+ inputTokens: result.response.usageMetadata?.promptTokenCount ?? 0,
942
+ outputTokens: result.response.usageMetadata?.candidatesTokenCount ?? 0,
943
+ totalCostUsd: 0,
944
+ durationMs
945
+ }
946
+ };
947
+ }
948
+ };
746
949
  }
747
950
 
748
951
  // src/judge/judgeClient.ts
749
952
  function createJudge(config = {}) {
750
- const provider = config.provider ?? "claude";
953
+ const provider = config.provider ?? "anthropic";
751
954
  switch (provider) {
752
- case "claude":
753
955
  case "anthropic":
754
956
  return createClaudeAgentJudge(config);
755
957
  case "openai":
756
- throw new Error(
757
- 'OpenAI provider is no longer supported. Please use createJudge() without specifying provider, or use provider: "claude". See migration guide at https://github.com/gleanwork/mcp-server-tester/blob/main/docs/migration-v0.11.md'
758
- );
759
- case "custom-http":
760
- throw new Error(
761
- "custom-http provider is no longer supported. Please use createJudge() without specifying provider."
762
- );
958
+ return createOpenAIJudge(config);
959
+ case "google":
960
+ return createGoogleJudge(config);
763
961
  default:
764
962
  throw new Error(`Unsupported LLM provider: ${String(provider)}`);
765
963
  }
766
964
  }
767
965
 
768
- // src/assertions/matchers/toPassToolJudge.ts
769
- var DEFAULT_PASSING_THRESHOLD = 0.7;
770
- var DEFAULT_JUDGE_CONFIG = {};
771
- async function toPassToolJudge(received, rubric, options = {}) {
966
+ // src/assertions/validators/judge.ts
967
+ function computeStdDev(scores, mean) {
968
+ if (scores.length <= 1) return 0;
969
+ const variance = scores.reduce((sum, s) => sum + (s - mean) ** 2, 0) / scores.length;
970
+ return Math.sqrt(variance);
971
+ }
972
+ async function validateJudge(response, config) {
772
973
  const {
773
- reference = null,
774
- passingThreshold = DEFAULT_PASSING_THRESHOLD,
775
- judgeConfig = DEFAULT_JUDGE_CONFIG
776
- } = options;
777
- const judge = createJudge(judgeConfig);
974
+ rubric,
975
+ reference,
976
+ threshold = 0.7,
977
+ reps = 1,
978
+ provider,
979
+ model,
980
+ apiKeyEnvVar,
981
+ maxTokens,
982
+ temperature,
983
+ maxBudgetUsd,
984
+ maxToolOutputSize
985
+ } = config;
986
+ const resolvedRubric = resolveRubric(rubric);
987
+ const judgeConfig = {
988
+ ...provider !== void 0 && { provider },
989
+ ...model !== void 0 && { model },
990
+ ...apiKeyEnvVar !== void 0 && { apiKeyEnvVar },
991
+ ...maxTokens !== void 0 && { maxTokens },
992
+ ...temperature !== void 0 && { temperature },
993
+ ...maxBudgetUsd !== void 0 && { maxBudgetUsd },
994
+ ...maxToolOutputSize !== void 0 && { maxToolOutputSize }
995
+ };
778
996
  try {
779
- const result = await judge.evaluate(received, reference, rubric);
780
- const score = result.score ?? (result.pass ? 1 : 0);
781
- const passes = score >= passingThreshold;
782
- if (this.isNot) {
783
- return {
784
- pass: !passes,
785
- message: () => passes ? `Expected judge evaluation to fail, but it passed with score ${score.toFixed(2)}` : `Judge evaluation failed as expected with score ${score.toFixed(2)}`
786
- };
997
+ const judge = createJudge(judgeConfig);
998
+ const scores = [];
999
+ let lastReasoning;
1000
+ for (let i = 0; i < reps; i++) {
1001
+ const judgeResult = await judge.evaluate(
1002
+ response,
1003
+ reference ?? null,
1004
+ resolvedRubric
1005
+ );
1006
+ scores.push(judgeResult.score ?? (judgeResult.pass ? 1 : 0));
1007
+ lastReasoning = judgeResult.reasoning;
787
1008
  }
788
- if (passes) {
1009
+ if (scores.length === 0) {
789
1010
  return {
790
- pass: true,
791
- message: () => `Judge evaluation passed with score ${score.toFixed(2)} (threshold: ${passingThreshold})`
1011
+ pass: false,
1012
+ message: "Judge evaluation failed: no scores collected"
792
1013
  };
793
1014
  }
1015
+ const meanScore = scores.reduce((a, b) => a + b, 0) / scores.length;
1016
+ const passed = meanScore >= threshold;
1017
+ const repNote = reps > 1 ? ` (mean of ${reps} reps: [${scores.map((s) => s.toFixed(2)).join(", ")}])` : "";
1018
+ let stdDev;
1019
+ let highVariance;
1020
+ if (reps > 1) {
1021
+ stdDev = computeStdDev(scores, meanScore);
1022
+ highVariance = stdDev > 0.2;
1023
+ if (highVariance) {
1024
+ console.warn(
1025
+ `[mcp-server-tester] Judge scores have high variance (stdDev=${stdDev.toFixed(2)}, scores=[${scores.map((s) => s.toFixed(2)).join(", ")}]). The rubric may be ambiguous.`
1026
+ );
1027
+ }
1028
+ }
794
1029
  return {
795
- pass: false,
796
- message: () => `Judge evaluation failed with score ${score.toFixed(2)} (threshold: ${passingThreshold}). Reasoning: ${result.reasoning ?? "No reasoning provided"}`
1030
+ pass: passed,
1031
+ message: passed ? `Judge passed with score ${meanScore.toFixed(2)}${repNote}` : `Judge failed with score ${meanScore.toFixed(2)} (threshold: ${threshold})${repNote}. ${lastReasoning ?? ""}`,
1032
+ details: reps > 1 ? {
1033
+ scores,
1034
+ scoreStdDev: stdDev,
1035
+ highVariance
1036
+ } : void 0
797
1037
  };
798
- } catch (error) {
1038
+ } catch (err) {
799
1039
  return {
800
1040
  pass: false,
801
- message: () => `Judge evaluation failed with error: ${error instanceof Error ? error.message : String(error)}`
1041
+ message: `Judge evaluation error: ${err instanceof Error ? err.message : String(err)}`
802
1042
  };
803
1043
  }
804
1044
  }
805
1045
 
1046
+ // src/assertions/matchers/toPassToolJudge.ts
1047
+ var DEFAULT_PASSING_THRESHOLD = 0.7;
1048
+ async function toPassToolJudge(received, rubric, options = {}) {
1049
+ const {
1050
+ reference = null,
1051
+ passingThreshold = DEFAULT_PASSING_THRESHOLD,
1052
+ reps,
1053
+ provider,
1054
+ model
1055
+ } = options;
1056
+ const validation = await validateJudge(received, {
1057
+ rubric,
1058
+ reference: reference ?? void 0,
1059
+ threshold: passingThreshold,
1060
+ ...reps !== void 0 && { reps },
1061
+ ...provider !== void 0 && { provider },
1062
+ ...model !== void 0 && { model }
1063
+ });
1064
+ if (this.isNot) {
1065
+ return {
1066
+ pass: !validation.pass,
1067
+ message: () => validation.pass ? `Expected judge evaluation to fail, but it passed` : `Judge evaluation failed as expected`
1068
+ };
1069
+ }
1070
+ return {
1071
+ pass: validation.pass,
1072
+ message: () => validation.message
1073
+ };
1074
+ }
1075
+
806
1076
  // src/assertions/validators/size.ts
807
1077
  function validateSize(response, options) {
808
1078
  const { maxBytes, minBytes } = options;
@@ -903,6 +1173,144 @@ async function toSatisfyToolPredicate(received, predicate, description) {
903
1173
  }
904
1174
  }
905
1175
 
1176
+ // src/assertions/validators/toolCalls.ts
1177
+ function isSimulationResult(value) {
1178
+ return typeof value === "object" && value !== null && "success" in value && "toolCalls" in value && Array.isArray(value.toolCalls);
1179
+ }
1180
+ function partialMatch(actual, expected) {
1181
+ return Object.entries(expected).every(([k, v]) => {
1182
+ const actualVal = actual[k];
1183
+ if (typeof v === "object" && v !== null && typeof actualVal === "object" && actualVal !== null) {
1184
+ return partialMatch(
1185
+ actualVal,
1186
+ v
1187
+ );
1188
+ }
1189
+ return JSON.stringify(actualVal) === JSON.stringify(v);
1190
+ });
1191
+ }
1192
+ function findMatchingCall(actual, expected, startIndex = 0) {
1193
+ for (let i = startIndex; i < actual.length; i++) {
1194
+ const call = actual[i];
1195
+ if (call.name !== expected.name) continue;
1196
+ if (expected.arguments !== void 0 && !partialMatch(call.arguments ?? {}, expected.arguments)) {
1197
+ continue;
1198
+ }
1199
+ return i;
1200
+ }
1201
+ return -1;
1202
+ }
1203
+ function validateToolCalls(response, expectation) {
1204
+ if (!isSimulationResult(response)) {
1205
+ return {
1206
+ pass: false,
1207
+ message: "toolsTriggered expectation requires llm_host mode \u2014 response must be an LLMHostSimulationResult"
1208
+ };
1209
+ }
1210
+ const actual = response.toolCalls;
1211
+ const requiredCalls = expectation.calls.filter((c) => c.required !== false);
1212
+ const calledRequiredCount = requiredCalls.filter(
1213
+ (expected) => findMatchingCall(actual, expected) !== -1
1214
+ ).length;
1215
+ const recall = requiredCalls.length > 0 ? calledRequiredCount / requiredCalls.length : 1;
1216
+ const allowedNames = new Set(expectation.calls.map((c) => c.name));
1217
+ const precision = actual.length > 0 ? actual.filter((c) => allowedNames.has(c.name)).length / actual.length : 1;
1218
+ const metrics = { precision, recall };
1219
+ const order = expectation.order ?? "any";
1220
+ if (order === "strict") {
1221
+ let searchFrom = 0;
1222
+ for (const expected of expectation.calls) {
1223
+ const idx = findMatchingCall(actual, expected, searchFrom);
1224
+ if (idx === -1) {
1225
+ if (expected.required !== false) {
1226
+ return {
1227
+ pass: false,
1228
+ message: `Expected tool '${expected.name}' to be called in sequence (starting from position ${searchFrom}), but it was not found`,
1229
+ metrics
1230
+ };
1231
+ }
1232
+ } else {
1233
+ searchFrom = idx + 1;
1234
+ }
1235
+ }
1236
+ } else {
1237
+ const required = expectation.calls.filter((c) => c.required !== false);
1238
+ for (const expected of required) {
1239
+ const idx = findMatchingCall(actual, expected);
1240
+ if (idx === -1) {
1241
+ const argsNote = expected.arguments !== void 0 ? ` with args ${JSON.stringify(expected.arguments)}` : "";
1242
+ return {
1243
+ pass: false,
1244
+ message: `Expected tool '${expected.name}'${argsNote} to be called, but it was not`,
1245
+ metrics
1246
+ };
1247
+ }
1248
+ }
1249
+ }
1250
+ if (expectation.exclusive === true) {
1251
+ const unexpected = actual.filter((c) => !allowedNames.has(c.name));
1252
+ if (unexpected.length > 0) {
1253
+ const names = unexpected.map((c) => `'${c.name}'`).join(", ");
1254
+ return {
1255
+ pass: false,
1256
+ message: `Unexpected tool calls: ${names}. Only ${[...allowedNames].map((n) => `'${n}'`).join(", ")} are allowed`,
1257
+ metrics
1258
+ };
1259
+ }
1260
+ }
1261
+ return { pass: true, message: "All tool call expectations met", metrics };
1262
+ }
1263
+ function validateToolCallCount(response, options) {
1264
+ if (!isSimulationResult(response)) {
1265
+ return {
1266
+ pass: false,
1267
+ message: "toolCallCount expectation requires llm_host mode \u2014 response must be an LLMHostSimulationResult"
1268
+ };
1269
+ }
1270
+ const count = response.toolCalls.length;
1271
+ const { min, max, exact } = options;
1272
+ if (exact !== void 0 && count !== exact) {
1273
+ return {
1274
+ pass: false,
1275
+ message: `Expected exactly ${exact} tool call(s), but got ${count}`
1276
+ };
1277
+ }
1278
+ if (min !== void 0 && count < min) {
1279
+ return {
1280
+ pass: false,
1281
+ message: `Expected at least ${min} tool call(s), but got ${count}`
1282
+ };
1283
+ }
1284
+ if (max !== void 0 && count > max) {
1285
+ return {
1286
+ pass: false,
1287
+ message: `Expected at most ${max} tool call(s), but got ${count}`
1288
+ };
1289
+ }
1290
+ return {
1291
+ pass: true,
1292
+ message: `Tool call count (${count}) is within expected range`
1293
+ };
1294
+ }
1295
+
1296
+ // src/assertions/matchers/toHaveToolCalls.ts
1297
+ function toHaveToolCalls(received, expectation) {
1298
+ const result = validateToolCalls(received, expectation);
1299
+ return {
1300
+ pass: result.pass,
1301
+ message: () => result.message
1302
+ };
1303
+ }
1304
+
1305
+ // src/assertions/matchers/toHaveToolCallCount.ts
1306
+ function toHaveToolCallCount(received, options) {
1307
+ const result = validateToolCallCount(received, options);
1308
+ return {
1309
+ pass: result.pass,
1310
+ message: () => result.message
1311
+ };
1312
+ }
1313
+
906
1314
  // src/assertions/matchers/index.ts
907
1315
  var expect = expect$1.extend({
908
1316
  toMatchToolResponse,
@@ -913,7 +1321,9 @@ var expect = expect$1.extend({
913
1321
  toBeToolError,
914
1322
  toPassToolJudge,
915
1323
  toHaveToolResponseSize,
916
- toSatisfyToolPredicate
1324
+ toSatisfyToolPredicate,
1325
+ toHaveToolCalls,
1326
+ toHaveToolCallCount
917
1327
  });
918
1328
  var MCPHostCapabilitiesSchema = z.object({
919
1329
  sampling: z.record(z.unknown()).optional(),
@@ -930,9 +1340,16 @@ var MCPOAuthConfigSchema = z.object({
930
1340
  clientSecret: z.string().optional(),
931
1341
  redirectUri: z.string().url().optional()
932
1342
  });
1343
+ var MCPClientCredentialsConfigSchema = z.object({
1344
+ clientId: z.string().optional(),
1345
+ clientSecret: z.string().optional(),
1346
+ tokenEndpoint: z.string().url("tokenEndpoint must be a valid URL").optional(),
1347
+ scopes: z.array(z.string()).optional()
1348
+ });
933
1349
  var MCPAuthConfigSchema = z.object({
934
1350
  accessToken: z.string().optional(),
935
- oauth: MCPOAuthConfigSchema.optional()
1351
+ oauth: MCPOAuthConfigSchema.optional(),
1352
+ clientCredentials: MCPClientCredentialsConfigSchema.optional()
936
1353
  }).refine(
937
1354
  (data) => !(data.accessToken && data.oauth),
938
1355
  "Cannot specify both accessToken and oauth configuration"
@@ -942,19 +1359,48 @@ var StdioConfigSchema = z.object({
942
1359
  command: z.string().min(1, "command is required for stdio transport"),
943
1360
  args: z.array(z.string()).optional(),
944
1361
  cwd: z.string().optional(),
1362
+ env: z.record(z.string(), z.string()).optional(),
945
1363
  capabilities: MCPHostCapabilitiesSchema.optional(),
946
1364
  connectTimeoutMs: z.number().positive().optional(),
947
1365
  requestTimeoutMs: z.number().positive().optional(),
1366
+ callTimeoutMs: z.number().positive().optional(),
948
1367
  quiet: z.boolean().optional()
949
1368
  });
1369
+ function isLocalhost(hostname) {
1370
+ return hostname === "localhost" || hostname === "127.0.0.1" || hostname === "::1";
1371
+ }
950
1372
  var HttpConfigSchema = z.object({
951
1373
  transport: z.literal("http"),
952
- serverUrl: z.string().url("serverUrl must be a valid URL"),
1374
+ serverUrl: z.string().url("serverUrl must be a valid URL").refine((url) => {
1375
+ let parsed;
1376
+ try {
1377
+ parsed = new URL(url);
1378
+ } catch {
1379
+ return true;
1380
+ }
1381
+ if (parsed.protocol === "http:" && !isLocalhost(parsed.hostname)) {
1382
+ console.warn(
1383
+ `[mcp-server-tester] serverUrl uses http:// for non-localhost address "${parsed.hostname}". This transmits tokens unencrypted. Use https:// for remote servers.`
1384
+ );
1385
+ }
1386
+ return true;
1387
+ }),
953
1388
  headers: z.record(z.string()).optional(),
954
1389
  capabilities: MCPHostCapabilitiesSchema.optional(),
955
1390
  connectTimeoutMs: z.number().positive().optional(),
956
1391
  requestTimeoutMs: z.number().positive().optional(),
957
- auth: MCPAuthConfigSchema.optional()
1392
+ callTimeoutMs: z.number().positive().optional(),
1393
+ auth: MCPAuthConfigSchema.optional(),
1394
+ proxy: z.object({
1395
+ url: z.string().url("proxy.url must be a valid URL")
1396
+ }).optional(),
1397
+ retryAttempts: z.number().int().min(0).optional(),
1398
+ tls: z.object({
1399
+ ca: z.string().optional(),
1400
+ cert: z.string().optional(),
1401
+ key: z.string().optional(),
1402
+ rejectUnauthorized: z.boolean().optional()
1403
+ }).optional()
958
1404
  });
959
1405
  var MCPConfigSchema = z.discriminatedUnion("transport", [
960
1406
  StdioConfigSchema,
@@ -964,26 +1410,245 @@ function validateMCPConfig(config) {
964
1410
  return MCPConfigSchema.parse(config);
965
1411
  }
966
1412
  function isStdioConfig(config) {
967
- return config.transport === "stdio" && typeof config.command === "string";
1413
+ return config.transport === "stdio";
968
1414
  }
969
1415
  function isHttpConfig(config) {
970
- return config.transport === "http" && typeof config.serverUrl === "string";
1416
+ return config.transport === "http";
971
1417
  }
972
1418
  var NAMESPACE = "mcp-server-tester";
973
1419
  var debugClient = createDebug(`${NAMESPACE}:client`);
974
1420
  createDebug(`${NAMESPACE}:oauth`);
975
1421
  createDebug(`${NAMESPACE}:eval`);
1422
+ var debugHttp = createDebug(`${NAMESPACE}:http`);
1423
+
1424
+ // package.json
1425
+ var package_default = {
1426
+ version: "1.0.0-beta.1"};
1427
+ var debug = createDebug("mcp-server-tester:oauth-flow");
1428
+ async function generatePKCE() {
1429
+ const codeVerifier = oauth.generateRandomCodeVerifier();
1430
+ const codeChallenge = await oauth.calculatePKCECodeChallenge(codeVerifier);
1431
+ return {
1432
+ codeVerifier,
1433
+ codeChallenge
1434
+ };
1435
+ }
1436
+ function generateState() {
1437
+ return oauth.generateRandomState();
1438
+ }
1439
+ function buildAuthorizationUrl(config) {
1440
+ const authorizationEndpoint = config.authServer.server.authorization_endpoint;
1441
+ if (!authorizationEndpoint) {
1442
+ throw new Error(
1443
+ "Authorization server does not have an authorization_endpoint"
1444
+ );
1445
+ }
1446
+ const authorizationUrl = new URL(authorizationEndpoint);
1447
+ authorizationUrl.searchParams.set("client_id", config.clientId);
1448
+ authorizationUrl.searchParams.set("redirect_uri", config.redirectUri);
1449
+ authorizationUrl.searchParams.set("response_type", "code");
1450
+ authorizationUrl.searchParams.set("scope", config.scopes.join(" "));
1451
+ authorizationUrl.searchParams.set("code_challenge", config.codeChallenge);
1452
+ authorizationUrl.searchParams.set("code_challenge_method", "S256");
1453
+ authorizationUrl.searchParams.set("state", config.state);
1454
+ if (config.resource) {
1455
+ authorizationUrl.searchParams.set("resource", config.resource);
1456
+ }
1457
+ return authorizationUrl;
1458
+ }
1459
+ async function exchangeCodeForTokens(config) {
1460
+ const client = {
1461
+ client_id: config.clientId,
1462
+ token_endpoint_auth_method: config.clientSecret ? "client_secret_basic" : "none"
1463
+ };
1464
+ const clientAuth = config.clientSecret ? oauth.ClientSecretBasic(config.clientSecret) : oauth.None();
1465
+ const callbackUrl = new URL(config.redirectUri);
1466
+ callbackUrl.searchParams.set("code", config.code);
1467
+ callbackUrl.searchParams.set("state", config.state);
1468
+ const validatedParams = oauth.validateAuthResponse(
1469
+ config.authServer.server,
1470
+ client,
1471
+ callbackUrl,
1472
+ config.state
1473
+ );
1474
+ const response = await oauth.authorizationCodeGrantRequest(
1475
+ config.authServer.server,
1476
+ client,
1477
+ clientAuth,
1478
+ validatedParams,
1479
+ config.redirectUri,
1480
+ config.codeVerifier
1481
+ );
1482
+ const result = await oauth.processAuthorizationCodeResponse(
1483
+ config.authServer.server,
1484
+ client,
1485
+ response
1486
+ );
1487
+ return {
1488
+ accessToken: result.access_token,
1489
+ tokenType: result.token_type,
1490
+ expiresIn: result.expires_in,
1491
+ refreshToken: result.refresh_token,
1492
+ scope: result.scope
1493
+ };
1494
+ }
1495
+ async function refreshAccessToken(config) {
1496
+ const client = {
1497
+ client_id: config.clientId,
1498
+ token_endpoint_auth_method: config.clientSecret ? "client_secret_basic" : "none"
1499
+ };
1500
+ const clientAuth = config.clientSecret ? oauth.ClientSecretBasic(config.clientSecret) : oauth.None();
1501
+ const response = await oauth.refreshTokenGrantRequest(
1502
+ config.authServer.server,
1503
+ client,
1504
+ clientAuth,
1505
+ config.refreshToken
1506
+ );
1507
+ if (!response.ok) {
1508
+ const contentType = response.headers.get("content-type") ?? "";
1509
+ let errorMessage = `Token refresh failed: ${response.status} ${response.statusText}`;
1510
+ try {
1511
+ if (contentType.includes("application/json")) {
1512
+ const errorBody = await response.clone().json();
1513
+ if (errorBody.error) {
1514
+ errorMessage = `Token refresh failed: ${errorBody.error}`;
1515
+ if (errorBody.error_description) {
1516
+ errorMessage += ` - ${errorBody.error_description}`;
1517
+ }
1518
+ }
1519
+ } else {
1520
+ const textBody = await response.clone().text();
1521
+ if (textBody) {
1522
+ errorMessage = `Token refresh failed: ${response.status} - ${textBody}`;
1523
+ }
1524
+ }
1525
+ } catch {
1526
+ }
1527
+ throw new Error(errorMessage);
1528
+ }
1529
+ const result = await oauth.processRefreshTokenResponse(
1530
+ config.authServer.server,
1531
+ client,
1532
+ response
1533
+ );
1534
+ return {
1535
+ accessToken: result.access_token,
1536
+ tokenType: result.token_type,
1537
+ expiresIn: result.expires_in,
1538
+ refreshToken: result.refresh_token,
1539
+ scope: result.scope
1540
+ };
1541
+ }
1542
+ async function performClientCredentialsFlow(config) {
1543
+ const tokenEndpointUrl = new URL(config.tokenEndpoint);
1544
+ const authServer = {
1545
+ issuer: tokenEndpointUrl.origin,
1546
+ token_endpoint: config.tokenEndpoint
1547
+ };
1548
+ const client = {
1549
+ client_id: config.clientId
1550
+ };
1551
+ const clientAuth = oauth.ClientSecretBasic(config.clientSecret);
1552
+ const parameters = {};
1553
+ if (config.scopes && config.scopes.length > 0) {
1554
+ parameters["scope"] = config.scopes.join(" ");
1555
+ }
1556
+ const response = await oauth.clientCredentialsGrantRequest(
1557
+ authServer,
1558
+ client,
1559
+ clientAuth,
1560
+ parameters
1561
+ );
1562
+ const result = await oauth.processClientCredentialsResponse(
1563
+ authServer,
1564
+ client,
1565
+ response
1566
+ );
1567
+ const requestedScopes = new Set(
1568
+ config.scopes && config.scopes.length > 0 ? config.scopes : []
1569
+ );
1570
+ const grantedScopes = new Set(
1571
+ (result.scope ?? "").split(" ").filter(Boolean)
1572
+ );
1573
+ const missingScopes = [...requestedScopes].filter(
1574
+ (s) => !grantedScopes.has(s)
1575
+ );
1576
+ if (missingScopes.length > 0 && requestedScopes.size > 0 && grantedScopes.size > 0) {
1577
+ debug(
1578
+ "[oauth] Warning: Token server granted fewer scopes than requested. Missing: %s",
1579
+ missingScopes.join(", ")
1580
+ );
1581
+ }
1582
+ return {
1583
+ accessToken: result.access_token,
1584
+ tokenType: result.token_type,
1585
+ expiresIn: result.expires_in,
1586
+ scope: result.scope
1587
+ };
1588
+ }
976
1589
 
977
1590
  // src/mcp/clientFactory.ts
1591
+ function getRetryAfterDelayMs(err) {
1592
+ const response = err?.response;
1593
+ const retryAfter = response?.headers?.get?.("Retry-After");
1594
+ if (retryAfter) {
1595
+ const seconds = parseInt(retryAfter, 10);
1596
+ if (!isNaN(seconds)) return seconds * 1e3;
1597
+ }
1598
+ return null;
1599
+ }
1600
+ function isRateLimitError(err) {
1601
+ const response = err?.response;
1602
+ return response?.status === 429;
1603
+ }
1604
+ function isTransientNetworkError(err) {
1605
+ if (!(err instanceof Error)) return false;
1606
+ const msg = err.message.toLowerCase();
1607
+ return msg.includes("econnreset") || msg.includes("econnrefused") || msg.includes("etimedout") || msg.includes("enotfound") || msg.includes("network") || msg.includes("socket hang up") || msg.includes("fetch failed");
1608
+ }
1609
+ function isRetryableError(err) {
1610
+ return isTransientNetworkError(err) || isRateLimitError(err);
1611
+ }
1612
+ async function retryWithBackoff(fn, maxAttempts) {
1613
+ let lastErr;
1614
+ for (let attempt = 0; attempt <= maxAttempts; attempt++) {
1615
+ try {
1616
+ return await fn();
1617
+ } catch (err) {
1618
+ lastErr = err;
1619
+ if (attempt < maxAttempts && isRetryableError(err)) {
1620
+ const retryAfterMs = getRetryAfterDelayMs(err);
1621
+ const delayMs = retryAfterMs !== null ? retryAfterMs : Math.min(1e3 * 2 ** attempt, 3e4);
1622
+ debugClient(
1623
+ "Retryable error on attempt %d/%d, retrying in %dms: %s",
1624
+ attempt + 1,
1625
+ maxAttempts + 1,
1626
+ delayMs,
1627
+ err.message
1628
+ );
1629
+ await new Promise((resolve) => setTimeout(resolve, delayMs));
1630
+ } else {
1631
+ throw err;
1632
+ }
1633
+ }
1634
+ }
1635
+ throw lastErr;
1636
+ }
1637
+ var agentRegistry = /* @__PURE__ */ new WeakMap();
978
1638
  async function createMCPClientForConfig(config, options) {
979
1639
  const validatedConfig = validateMCPConfig(config);
980
1640
  const client = new Client(
981
1641
  {
982
1642
  name: options?.clientInfo?.name ?? "@gleanwork/mcp-server-tester",
983
- version: options?.clientInfo?.version ?? "0.1.0"
1643
+ version: options?.clientInfo?.version ?? package_default.version
984
1644
  },
985
1645
  {
986
- capabilities: validatedConfig.capabilities ?? {}
1646
+ capabilities: {
1647
+ ...validatedConfig.capabilities ?? {},
1648
+ // Only advertise sampling if a handler has been registered;
1649
+ // declaring sampling capability without a handler violates the MCP spec
1650
+ sampling: options?.samplingHandler ? validatedConfig.capabilities?.sampling ?? {} : void 0
1651
+ }
987
1652
  }
988
1653
  );
989
1654
  if (isStdioConfig(validatedConfig)) {
@@ -992,33 +1657,140 @@ async function createMCPClientForConfig(config, options) {
992
1657
  args: validatedConfig.args ?? [],
993
1658
  ...validatedConfig.cwd && { cwd: validatedConfig.cwd },
994
1659
  // Suppress server stderr when quiet mode is enabled
995
- ...validatedConfig.quiet && { stderr: "ignore" }
1660
+ ...validatedConfig.quiet && { stderr: "ignore" },
1661
+ ...validatedConfig.env && {
1662
+ env: Object.fromEntries(
1663
+ Object.entries({ ...process.env, ...validatedConfig.env }).filter(
1664
+ (entry) => entry[1] !== void 0
1665
+ )
1666
+ )
1667
+ }
996
1668
  });
997
1669
  debugClient("Connecting via stdio: %O", {
998
1670
  command: validatedConfig.command,
999
1671
  args: validatedConfig.args,
1000
1672
  cwd: validatedConfig.cwd
1001
1673
  });
1002
- await client.connect(transport);
1674
+ await client.connect(
1675
+ transport,
1676
+ validatedConfig.connectTimeoutMs !== void 0 ? { timeout: validatedConfig.connectTimeoutMs } : void 0
1677
+ );
1003
1678
  } else if (isHttpConfig(validatedConfig)) {
1004
1679
  const headers = { ...validatedConfig.headers };
1680
+ if (validatedConfig.auth?.clientCredentials && !options?.authProvider) {
1681
+ const ccConfig = validatedConfig.auth.clientCredentials;
1682
+ const clientId = ccConfig.clientId ?? process.env["MCP_CLIENT_ID"];
1683
+ const clientSecret = ccConfig.clientSecret ?? process.env["MCP_CLIENT_SECRET"];
1684
+ if (!clientId || !clientSecret) {
1685
+ throw new Error(
1686
+ "Client credentials require clientId/clientSecret in config or MCP_CLIENT_ID/MCP_CLIENT_SECRET env vars"
1687
+ );
1688
+ }
1689
+ if (!ccConfig.tokenEndpoint) {
1690
+ throw new Error(
1691
+ "Client credentials require tokenEndpoint in auth.clientCredentials config"
1692
+ );
1693
+ }
1694
+ debugClient("Fetching token via client credentials grant");
1695
+ const tokenResult = await performClientCredentialsFlow({
1696
+ tokenEndpoint: ccConfig.tokenEndpoint,
1697
+ clientId,
1698
+ clientSecret,
1699
+ scopes: ccConfig.scopes
1700
+ });
1701
+ headers.Authorization = `Bearer ${tokenResult.accessToken}`;
1702
+ }
1005
1703
  if (validatedConfig.auth?.accessToken && !options?.authProvider) {
1006
1704
  headers.Authorization = `Bearer ${validatedConfig.auth.accessToken}`;
1007
1705
  }
1008
- const transport = new StreamableHTTPClientTransport(
1009
- new URL(validatedConfig.serverUrl),
1010
- {
1011
- requestInit: Object.keys(headers).length > 0 ? { headers } : void 0,
1012
- // Pass auth provider for OAuth flow - MCP SDK handles it automatically
1013
- authProvider: options?.authProvider
1706
+ const url = new URL(validatedConfig.serverUrl);
1707
+ let requestInit = Object.keys(headers).length > 0 ? { headers } : void 0;
1708
+ const proxyUrl = validatedConfig.proxy?.url ?? process.env["HTTPS_PROXY"] ?? process.env["HTTP_PROXY"];
1709
+ if (proxyUrl) {
1710
+ const proxyAgent = new ProxyAgent(proxyUrl);
1711
+ try {
1712
+ const sanitized = new URL(proxyUrl);
1713
+ debugClient(
1714
+ "Using proxy: %s://%s:%s",
1715
+ sanitized.protocol.slice(0, -1),
1716
+ sanitized.hostname,
1717
+ sanitized.port
1718
+ );
1719
+ } catch {
1720
+ debugClient("Using proxy (unparseable URL)");
1014
1721
  }
1015
- );
1722
+ requestInit = {
1723
+ ...requestInit,
1724
+ dispatcher: proxyAgent
1725
+ };
1726
+ }
1727
+ if (validatedConfig.tls) {
1728
+ const tlsCfg = validatedConfig.tls;
1729
+ try {
1730
+ const dispatcher = new Agent({
1731
+ connect: {
1732
+ ...tlsCfg.ca && { ca: readFileSync(tlsCfg.ca) },
1733
+ ...tlsCfg.cert && { cert: readFileSync(tlsCfg.cert) },
1734
+ ...tlsCfg.key && { key: readFileSync(tlsCfg.key) },
1735
+ rejectUnauthorized: tlsCfg.rejectUnauthorized ?? true
1736
+ }
1737
+ });
1738
+ agentRegistry.set(client, dispatcher);
1739
+ requestInit = {
1740
+ ...requestInit,
1741
+ dispatcher
1742
+ };
1743
+ debugClient("TLS configuration applied");
1744
+ } catch (error) {
1745
+ const filePath = tlsCfg.ca ?? tlsCfg.cert ?? tlsCfg.key;
1746
+ const fileType = tlsCfg.ca ? "CA certificate" : tlsCfg.cert ? "client certificate" : "client key";
1747
+ throw new Error(
1748
+ `Failed to load TLS ${fileType} from ${filePath}: ${error instanceof Error ? error.message : String(error)}`
1749
+ );
1750
+ }
1751
+ } else if (proxyUrl) {
1752
+ const existingDispatcher = requestInit?.dispatcher;
1753
+ if (existingDispatcher) {
1754
+ agentRegistry.set(client, existingDispatcher);
1755
+ }
1756
+ }
1016
1757
  debugClient("Connecting via HTTP: %O", {
1017
1758
  serverUrl: validatedConfig.serverUrl,
1018
1759
  headers: Object.keys(headers).length > 0 ? Object.keys(headers) : void 0,
1019
1760
  hasAuthProvider: !!options?.authProvider
1020
1761
  });
1021
- await client.connect(transport);
1762
+ debugHttp("Connecting to %s", validatedConfig.serverUrl);
1763
+ if (Object.keys(headers).length > 0) {
1764
+ debugHttp("Request header names: %O", Object.keys(headers));
1765
+ }
1766
+ const retryAttempts = validatedConfig.retryAttempts ?? 0;
1767
+ const connectOptions = validatedConfig.connectTimeoutMs !== void 0 ? { timeout: validatedConfig.connectTimeoutMs } : void 0;
1768
+ await retryWithBackoff(async () => {
1769
+ try {
1770
+ debugHttp("Attempting transport: streamableHttp");
1771
+ const streamableTransport = new StreamableHTTPClientTransport(url, {
1772
+ requestInit,
1773
+ authProvider: options?.authProvider
1774
+ });
1775
+ await client.connect(streamableTransport, connectOptions);
1776
+ debugClient("Connected via Streamable HTTP");
1777
+ debugHttp("Connection established via streamableHttp");
1778
+ } catch (err) {
1779
+ debugHttp(
1780
+ "streamableHttp failed (%s), falling back to SSE",
1781
+ err.message
1782
+ );
1783
+ debugClient("Streamable HTTP failed, falling back to SSE transport");
1784
+ debugHttp("Attempting transport: sse");
1785
+ const sseTransport = new SSEClientTransport(url, {
1786
+ requestInit,
1787
+ authProvider: options?.authProvider
1788
+ });
1789
+ await client.connect(sseTransport, connectOptions);
1790
+ debugClient("Connected via SSE");
1791
+ debugHttp("Connection established via sse");
1792
+ }
1793
+ }, retryAttempts);
1022
1794
  }
1023
1795
  debugClient("Connected successfully");
1024
1796
  const serverInfo = client.getServerVersion();
@@ -1031,12 +1803,43 @@ async function closeMCPClient(client) {
1031
1803
  try {
1032
1804
  await client.close();
1033
1805
  } catch (error) {
1034
- console.error("[MCP] Error closing client:", error);
1806
+ debugClient(
1807
+ "Error closing client: %s",
1808
+ error instanceof Error ? error.message : String(error)
1809
+ );
1035
1810
  throw error;
1811
+ } finally {
1812
+ const agent = agentRegistry.get(client);
1813
+ if (agent) {
1814
+ agentRegistry.delete(client);
1815
+ try {
1816
+ await agent.close();
1817
+ } catch (agentError) {
1818
+ debugClient(
1819
+ "Error closing undici agent: %s",
1820
+ agentError.message
1821
+ );
1822
+ }
1823
+ }
1036
1824
  }
1037
1825
  }
1038
1826
 
1039
1827
  // src/mcp/fixtures/mcpFixture.ts
1828
+ var DEFAULT_CALL_TIMEOUT_MS = 3e4;
1829
+ function withCallTimeout(promise, ms, opName) {
1830
+ let timer;
1831
+ return Promise.race([
1832
+ promise,
1833
+ new Promise((_, reject) => {
1834
+ timer = setTimeout(
1835
+ () => reject(
1836
+ new Error(`MCP operation "${opName}" timed out after ${ms}ms`)
1837
+ ),
1838
+ ms
1839
+ );
1840
+ })
1841
+ ]).finally(() => clearTimeout(timer));
1842
+ }
1040
1843
  var testStep = null;
1041
1844
  try {
1042
1845
  const playwright = __require("@playwright/test");
@@ -1048,20 +1851,29 @@ try {
1048
1851
  function createMCPFixture(client, testInfo, options) {
1049
1852
  const authType = options?.authType ?? "none";
1050
1853
  const project = options?.project;
1854
+ const callTimeout = options?.callTimeoutMs ?? DEFAULT_CALL_TIMEOUT_MS;
1051
1855
  if (!testInfo) {
1052
1856
  return {
1053
1857
  client,
1054
1858
  authType,
1055
1859
  project,
1056
1860
  async listTools() {
1057
- const result = await client.listTools();
1861
+ const result = await withCallTimeout(
1862
+ client.listTools(),
1863
+ callTimeout,
1864
+ "listTools"
1865
+ );
1058
1866
  return result.tools;
1059
1867
  },
1060
1868
  async callTool(name, args) {
1061
- const result = await client.callTool({
1062
- name,
1063
- arguments: args
1064
- });
1869
+ const result = await withCallTimeout(
1870
+ client.callTool({
1871
+ name,
1872
+ arguments: args
1873
+ }),
1874
+ callTimeout,
1875
+ `callTool("${name}")`
1876
+ );
1065
1877
  return result;
1066
1878
  },
1067
1879
  getServerInfo() {
@@ -1082,7 +1894,11 @@ function createMCPFixture(client, testInfo, options) {
1082
1894
  project,
1083
1895
  async listTools() {
1084
1896
  const execute = async () => {
1085
- const result = await client.listTools();
1897
+ const result = await withCallTimeout(
1898
+ client.listTools(),
1899
+ callTimeout,
1900
+ "listTools"
1901
+ );
1086
1902
  const tools = result.tools;
1087
1903
  await testInfo.attach("mcp-list-tools", {
1088
1904
  contentType: "application/json",
@@ -1106,10 +1922,14 @@ function createMCPFixture(client, testInfo, options) {
1106
1922
  async callTool(name, args) {
1107
1923
  const execute = async () => {
1108
1924
  const startTime = Date.now();
1109
- const result = await client.callTool({
1110
- name,
1111
- arguments: args
1112
- });
1925
+ const result = await withCallTimeout(
1926
+ client.callTool({
1927
+ name,
1928
+ arguments: args
1929
+ }),
1930
+ callTimeout,
1931
+ `callTool("${name}")`
1932
+ );
1113
1933
  const durationMs = Date.now() - startTime;
1114
1934
  await testInfo.attach(`mcp-call-${name}`, {
1115
1935
  contentType: "application/json",
@@ -1148,7 +1968,8 @@ function createMCPFixture(client, testInfo, options) {
1148
1968
  null,
1149
1969
  2
1150
1970
  )
1151
- }).catch(() => {
1971
+ }).catch((err) => {
1972
+ console.error("[MCPFixture] Failed to attach server info:", err);
1152
1973
  });
1153
1974
  return result;
1154
1975
  }
@@ -1242,6 +2063,9 @@ var PlaywrightOAuthClientProvider = class {
1242
2063
  }
1243
2064
  /**
1244
2065
  * Stores new OAuth tokens for the current session
2066
+ *
2067
+ * The code verifier is cleared after a successful token exchange — it is
2068
+ * single-use per PKCE spec and must not persist beyond the exchange.
1245
2069
  */
1246
2070
  async saveTokens(tokens) {
1247
2071
  const state = await this.loadState() ?? this.createEmptyState();
@@ -1251,6 +2075,7 @@ var PlaywrightOAuthClientProvider = class {
1251
2075
  refreshToken: tokens.refresh_token,
1252
2076
  expiresAt: tokens.expires_in ? Date.now() + tokens.expires_in * 1e3 : void 0
1253
2077
  };
2078
+ delete state.codeVerifier;
1254
2079
  await this.saveState(state);
1255
2080
  }
1256
2081
  /**
@@ -1330,11 +2155,11 @@ In a testing context, use performOAuthSetup() in your Playwright globalSetup to
1330
2155
  state.savedAt = Date.now();
1331
2156
  this.cachedState = state;
1332
2157
  const dir = path2.dirname(this.config.storagePath);
1333
- await fs2.mkdir(dir, { recursive: true });
2158
+ await fs2.mkdir(dir, { recursive: true, mode: 448 });
1334
2159
  await fs2.writeFile(
1335
2160
  this.config.storagePath,
1336
2161
  JSON.stringify(state, null, 2),
1337
- "utf-8"
2162
+ { encoding: "utf-8", mode: 384 }
1338
2163
  );
1339
2164
  }
1340
2165
  async deleteState() {
@@ -1364,119 +2189,27 @@ In a testing context, use performOAuthSetup() in your Playwright globalSetup to
1364
2189
  return result;
1365
2190
  }
1366
2191
  };
1367
- async function generatePKCE() {
1368
- const codeVerifier = oauth.generateRandomCodeVerifier();
1369
- const codeChallenge = await oauth.calculatePKCECodeChallenge(codeVerifier);
1370
- return {
1371
- codeVerifier,
1372
- codeChallenge
1373
- };
1374
- }
1375
- function generateState() {
1376
- return oauth.generateRandomState();
1377
- }
1378
- function buildAuthorizationUrl(config) {
1379
- const authorizationEndpoint = config.authServer.server.authorization_endpoint;
1380
- if (!authorizationEndpoint) {
1381
- throw new Error(
1382
- "Authorization server does not have an authorization_endpoint"
1383
- );
1384
- }
1385
- const authorizationUrl = new URL(authorizationEndpoint);
1386
- authorizationUrl.searchParams.set("client_id", config.clientId);
1387
- authorizationUrl.searchParams.set("redirect_uri", config.redirectUri);
1388
- authorizationUrl.searchParams.set("response_type", "code");
1389
- authorizationUrl.searchParams.set("scope", config.scopes.join(" "));
1390
- authorizationUrl.searchParams.set("code_challenge", config.codeChallenge);
1391
- authorizationUrl.searchParams.set("code_challenge_method", "S256");
1392
- authorizationUrl.searchParams.set("state", config.state);
1393
- if (config.resource) {
1394
- authorizationUrl.searchParams.set("resource", config.resource);
2192
+ function isLocalhostUrl(url) {
2193
+ try {
2194
+ const parsed = new URL(url);
2195
+ const h = parsed.hostname;
2196
+ return h === "localhost" || h === "127.0.0.1" || h === "::1";
2197
+ } catch {
2198
+ return false;
1395
2199
  }
1396
- return authorizationUrl;
1397
2200
  }
1398
- async function exchangeCodeForTokens(config) {
1399
- const client = {
1400
- client_id: config.clientId,
1401
- token_endpoint_auth_method: config.clientSecret ? "client_secret_basic" : "none"
1402
- };
1403
- const clientAuth = config.clientSecret ? oauth.ClientSecretBasic(config.clientSecret) : oauth.None();
1404
- const callbackUrl = new URL(config.redirectUri);
1405
- callbackUrl.searchParams.set("code", config.code);
1406
- callbackUrl.searchParams.set("state", config.state);
1407
- const validatedParams = oauth.validateAuthResponse(
1408
- config.authServer.server,
1409
- client,
1410
- callbackUrl,
1411
- config.state
1412
- );
1413
- const response = await oauth.authorizationCodeGrantRequest(
1414
- config.authServer.server,
1415
- client,
1416
- clientAuth,
1417
- validatedParams,
1418
- config.redirectUri,
1419
- config.codeVerifier
1420
- );
1421
- const result = await oauth.processAuthorizationCodeResponse(
1422
- config.authServer.server,
1423
- client,
1424
- response
1425
- );
1426
- return {
1427
- accessToken: result.access_token,
1428
- tokenType: result.token_type,
1429
- expiresIn: result.expires_in,
1430
- refreshToken: result.refresh_token,
1431
- scope: result.scope
1432
- };
1433
- }
1434
- async function refreshAccessToken(config) {
1435
- const client = {
1436
- client_id: config.clientId,
1437
- token_endpoint_auth_method: config.clientSecret ? "client_secret_basic" : "none"
1438
- };
1439
- const clientAuth = config.clientSecret ? oauth.ClientSecretBasic(config.clientSecret) : oauth.None();
1440
- const response = await oauth.refreshTokenGrantRequest(
1441
- config.authServer.server,
1442
- client,
1443
- clientAuth,
1444
- config.refreshToken
1445
- );
1446
- if (!response.ok) {
1447
- const contentType = response.headers.get("content-type") ?? "";
1448
- let errorMessage = `Token refresh failed: ${response.status} ${response.statusText}`;
1449
- try {
1450
- if (contentType.includes("application/json")) {
1451
- const errorBody = await response.clone().json();
1452
- if (errorBody.error) {
1453
- errorMessage = `Token refresh failed: ${errorBody.error}`;
1454
- if (errorBody.error_description) {
1455
- errorMessage += ` - ${errorBody.error_description}`;
1456
- }
1457
- }
1458
- } else {
1459
- const textBody = await response.clone().text();
1460
- if (textBody) {
1461
- errorMessage = `Token refresh failed: ${response.status} - ${textBody}`;
1462
- }
1463
- }
1464
- } catch {
2201
+ function validateAuthServerEndpoints(authServer) {
2202
+ const endpoints = [
2203
+ { name: "authorization_endpoint", url: authServer.authorization_endpoint },
2204
+ { name: "token_endpoint", url: authServer.token_endpoint }
2205
+ ];
2206
+ for (const { name, url } of endpoints) {
2207
+ if (url && !url.startsWith("https://") && !isLocalhostUrl(url)) {
2208
+ throw new Error(
2209
+ `OAuth discovery returned an insecure ${name}: "${url}". Only HTTPS endpoints are permitted for OAuth flows to prevent token interception.`
2210
+ );
1465
2211
  }
1466
- throw new Error(errorMessage);
1467
2212
  }
1468
- const result = await oauth.processRefreshTokenResponse(
1469
- config.authServer.server,
1470
- client,
1471
- response
1472
- );
1473
- return {
1474
- accessToken: result.access_token,
1475
- tokenType: result.token_type,
1476
- expiresIn: result.expires_in,
1477
- refreshToken: result.refresh_token,
1478
- scope: result.scope
1479
- };
1480
2213
  }
1481
2214
  var MCP_PROTOCOL_VERSION = "2025-06-18";
1482
2215
  async function discoverProtectedResource(mcpServerUrl) {
@@ -1546,6 +2279,7 @@ async function discoverAuthorizationServer(authServerUrl) {
1546
2279
  })
1547
2280
  });
1548
2281
  const metadata = await oauth.processDiscoveryResponse(issuer, response);
2282
+ validateAuthServerEndpoints(metadata);
1549
2283
  return {
1550
2284
  server: metadata,
1551
2285
  issuer: authServerUrl
@@ -1691,7 +2425,7 @@ var FileOAuthStorage = class {
1691
2425
  };
1692
2426
 
1693
2427
  // src/auth/cli.ts
1694
- var debug = createDebug("mcp-server-tester:cli-oauth");
2428
+ var debug2 = createDebug("mcp-server-tester:cli-oauth");
1695
2429
  var DEFAULT_TIMEOUT_MS = 3e5;
1696
2430
  var DEFAULT_CLIENT_NAME = "@gleanwork/mcp-server-tester";
1697
2431
  var DEFAULT_METADATA_TTL_MS = 24 * 60 * 60 * 1e3;
@@ -1717,7 +2451,7 @@ var CLIOAuthClient = class {
1717
2451
  async getAccessToken() {
1718
2452
  const envTokens = loadTokensFromEnv();
1719
2453
  if (envTokens) {
1720
- debug("Using tokens from environment variables");
2454
+ debug2("Using tokens from environment variables");
1721
2455
  return {
1722
2456
  accessToken: envTokens.accessToken,
1723
2457
  tokenType: envTokens.tokenType,
@@ -1730,7 +2464,7 @@ var CLIOAuthClient = class {
1730
2464
  if (storedTokens?.accessToken) {
1731
2465
  const isValid = await this.storage.hasValidToken();
1732
2466
  if (isValid) {
1733
- debug("Using cached tokens from storage");
2467
+ debug2("Using cached tokens from storage");
1734
2468
  return {
1735
2469
  accessToken: storedTokens.accessToken,
1736
2470
  tokenType: storedTokens.tokenType,
@@ -1740,7 +2474,7 @@ var CLIOAuthClient = class {
1740
2474
  };
1741
2475
  }
1742
2476
  if (storedTokens.refreshToken) {
1743
- debug("Token expired, attempting refresh");
2477
+ debug2("Token expired, attempting refresh");
1744
2478
  try {
1745
2479
  const refreshedTokens = await this.refreshStoredToken(storedTokens);
1746
2480
  return {
@@ -1751,11 +2485,11 @@ var CLIOAuthClient = class {
1751
2485
  fromEnv: false
1752
2486
  };
1753
2487
  } catch (error) {
1754
- debug("Token refresh failed, will re-authenticate:", error);
2488
+ debug2("Token refresh failed, will re-authenticate:", error);
1755
2489
  }
1756
2490
  }
1757
2491
  }
1758
- debug("Performing full OAuth authentication");
2492
+ debug2("Performing full OAuth authentication");
1759
2493
  return this.authenticate();
1760
2494
  }
1761
2495
  /**
@@ -1771,7 +2505,7 @@ var CLIOAuthClient = class {
1771
2505
  async tryGetAccessToken() {
1772
2506
  const envTokens = loadTokensFromEnv();
1773
2507
  if (envTokens) {
1774
- debug("Using tokens from environment variables");
2508
+ debug2("Using tokens from environment variables");
1775
2509
  return {
1776
2510
  accessToken: envTokens.accessToken,
1777
2511
  tokenType: envTokens.tokenType,
@@ -1784,7 +2518,7 @@ var CLIOAuthClient = class {
1784
2518
  if (storedTokens?.accessToken) {
1785
2519
  const isValid = await this.storage.hasValidToken();
1786
2520
  if (isValid) {
1787
- debug("Using cached tokens from storage");
2521
+ debug2("Using cached tokens from storage");
1788
2522
  return {
1789
2523
  accessToken: storedTokens.accessToken,
1790
2524
  tokenType: storedTokens.tokenType,
@@ -1794,7 +2528,7 @@ var CLIOAuthClient = class {
1794
2528
  };
1795
2529
  }
1796
2530
  if (storedTokens.refreshToken) {
1797
- debug("Token expired, attempting refresh");
2531
+ debug2("Token expired, attempting refresh");
1798
2532
  try {
1799
2533
  const refreshedTokens = await this.refreshStoredToken(storedTokens);
1800
2534
  return {
@@ -1805,12 +2539,12 @@ var CLIOAuthClient = class {
1805
2539
  fromEnv: false
1806
2540
  };
1807
2541
  } catch (error) {
1808
- debug("Token refresh failed:", error);
2542
+ debug2("Token refresh failed:", error);
1809
2543
  return null;
1810
2544
  }
1811
2545
  }
1812
2546
  }
1813
- debug("No valid token available");
2547
+ debug2("No valid token available");
1814
2548
  return null;
1815
2549
  }
1816
2550
  /**
@@ -1845,7 +2579,7 @@ var CLIOAuthClient = class {
1845
2579
  */
1846
2580
  async clearCredentials() {
1847
2581
  await this.storage.deleteTokens();
1848
- debug("Cleared stored credentials");
2582
+ debug2("Cleared stored credentials");
1849
2583
  }
1850
2584
  /**
1851
2585
  * Discover protected resource and authorization server
@@ -1855,12 +2589,12 @@ var CLIOAuthClient = class {
1855
2589
  if (cachedMetadata) {
1856
2590
  const age = Date.now() - cachedMetadata.discoveredAt;
1857
2591
  if (age < DEFAULT_METADATA_TTL_MS) {
1858
- debug("Using cached server metadata (age: %dms)", age);
1859
- debug(
2592
+ debug2("Using cached server metadata (age: %dms)", age);
2593
+ debug2(
1860
2594
  "Cached protected resource scopes: %O",
1861
2595
  cachedMetadata.protectedResource.scopes_supported
1862
2596
  );
1863
- debug(
2597
+ debug2(
1864
2598
  "Cached auth server scopes: %O",
1865
2599
  cachedMetadata.authServer.server.scopes_supported
1866
2600
  );
@@ -1869,12 +2603,12 @@ var CLIOAuthClient = class {
1869
2603
  authServer: cachedMetadata.authServer
1870
2604
  };
1871
2605
  }
1872
- debug("Cached server metadata is stale (age: %dms), re-discovering", age);
2606
+ debug2("Cached server metadata is stale (age: %dms), re-discovering", age);
1873
2607
  }
1874
- debug("Discovering protected resource:", this.config.mcpServerUrl);
2608
+ debug2("Discovering protected resource:", this.config.mcpServerUrl);
1875
2609
  const prResult = await discoverProtectedResource(this.config.mcpServerUrl);
1876
- debug("Found protected resource:", prResult.metadata.resource);
1877
- debug(
2610
+ debug2("Found protected resource:", prResult.metadata.resource);
2611
+ debug2(
1878
2612
  "Protected resource scopes_supported: %O",
1879
2613
  prResult.metadata.scopes_supported
1880
2614
  );
@@ -1884,10 +2618,10 @@ var CLIOAuthClient = class {
1884
2618
  "No authorization servers found in protected resource metadata"
1885
2619
  );
1886
2620
  }
1887
- debug("Discovering authorization server:", authServerUrl);
2621
+ debug2("Discovering authorization server:", authServerUrl);
1888
2622
  const authServer = await discoverAuthorizationServer(authServerUrl);
1889
- debug("Found authorization server:", authServer.issuer);
1890
- debug(
2623
+ debug2("Found authorization server:", authServer.issuer);
2624
+ debug2(
1891
2625
  "Auth server scopes_supported: %O",
1892
2626
  authServer.server.scopes_supported
1893
2627
  );
@@ -1907,7 +2641,7 @@ var CLIOAuthClient = class {
1907
2641
  */
1908
2642
  async getOrRegisterClient(authServer) {
1909
2643
  if (this.config.clientId) {
1910
- debug("Using pre-configured client ID");
2644
+ debug2("Using pre-configured client ID");
1911
2645
  return {
1912
2646
  clientId: this.config.clientId,
1913
2647
  clientSecret: this.config.clientSecret
@@ -1915,10 +2649,10 @@ var CLIOAuthClient = class {
1915
2649
  }
1916
2650
  const cachedClient = await this.storage.loadClient();
1917
2651
  if (cachedClient?.clientId) {
1918
- debug("Using cached client registration");
2652
+ debug2("Using cached client registration");
1919
2653
  return cachedClient;
1920
2654
  }
1921
- debug("Registering new client via DCR");
2655
+ debug2("Registering new client via DCR");
1922
2656
  const client = await this.registerClient(authServer);
1923
2657
  await this.storage.saveClient(client);
1924
2658
  return client;
@@ -1956,7 +2690,7 @@ ${errorText}`
1956
2690
  );
1957
2691
  }
1958
2692
  const data = await response.json();
1959
- debug("Client registered:", data.client_id);
2693
+ debug2("Client registered:", data.client_id);
1960
2694
  return {
1961
2695
  clientId: data.client_id,
1962
2696
  clientSecret: data.client_secret,
@@ -1974,17 +2708,17 @@ ${errorText}`
1974
2708
  const redirectUri = `http://127.0.0.1:${port}/callback`;
1975
2709
  try {
1976
2710
  const requestedScopes = this.config.scopes ?? protectedResource.scopes_supported ?? authServer.server.scopes_supported ?? ["openid"];
1977
- debug("Scope resolution:");
1978
- debug(" - User config scopes: %O", this.config.scopes);
1979
- debug(
2711
+ debug2("Scope resolution:");
2712
+ debug2(" - User config scopes: %O", this.config.scopes);
2713
+ debug2(
1980
2714
  " - Protected resource scopes_supported: %O",
1981
2715
  protectedResource.scopes_supported
1982
2716
  );
1983
- debug(
2717
+ debug2(
1984
2718
  " - Auth server scopes_supported: %O",
1985
2719
  authServer.server.scopes_supported
1986
2720
  );
1987
- debug(" - Final requested scopes: %O", requestedScopes);
2721
+ debug2(" - Final requested scopes: %O", requestedScopes);
1988
2722
  const authUrl = buildAuthorizationUrl({
1989
2723
  authServer,
1990
2724
  clientId: client.clientId,
@@ -1994,16 +2728,19 @@ ${errorText}`
1994
2728
  state,
1995
2729
  resource: protectedResource.resource
1996
2730
  });
1997
- debug("Authorization URL: %s", authUrl.toString());
1998
- debug("Authorization URL params:");
1999
- debug(" - client_id: %s", authUrl.searchParams.get("client_id"));
2000
- debug(" - redirect_uri: %s", authUrl.searchParams.get("redirect_uri"));
2001
- debug(" - scope: %s", authUrl.searchParams.get("scope"));
2002
- debug(" - resource: %s", authUrl.searchParams.get("resource"));
2731
+ debug2(
2732
+ "Authorization URL (base): %s",
2733
+ `${authUrl.origin}${authUrl.pathname}`
2734
+ );
2735
+ debug2("Authorization URL params:");
2736
+ debug2(" - client_id: %s", authUrl.searchParams.get("client_id"));
2737
+ debug2(" - redirect_uri: %s", authUrl.searchParams.get("redirect_uri"));
2738
+ debug2(" - scope: %s", authUrl.searchParams.get("scope"));
2739
+ debug2(" - resource: %s", authUrl.searchParams.get("resource"));
2003
2740
  await this.openBrowserOrPrintUrl(authUrl);
2004
- debug("Waiting for OAuth callback...");
2741
+ debug2("Waiting for OAuth callback...");
2005
2742
  const code = await codePromise;
2006
- debug("Received authorization code");
2743
+ debug2("Received authorization code");
2007
2744
  const tokenResult = await exchangeCodeForTokens({
2008
2745
  authServer,
2009
2746
  clientId: client.clientId,
@@ -2041,14 +2778,14 @@ ${errorText}`
2041
2778
  let clientId;
2042
2779
  let clientSecret;
2043
2780
  if (storedTokens.clientId) {
2044
- debug("Using clientId from stored tokens for refresh");
2781
+ debug2("Using clientId from stored tokens for refresh");
2045
2782
  clientId = storedTokens.clientId;
2046
2783
  const storedClient = await this.storage.loadClient();
2047
2784
  if (storedClient?.clientId === clientId) {
2048
2785
  clientSecret = storedClient.clientSecret;
2049
2786
  }
2050
2787
  } else {
2051
- debug(
2788
+ debug2(
2052
2789
  "No clientId in stored tokens, falling back to stored client (legacy behavior)"
2053
2790
  );
2054
2791
  const client = await this.getOrRegisterClient(metadata.authServer);
@@ -2142,7 +2879,7 @@ ${errorText}`
2142
2879
  const preferredPort = this.config.callbackPort ?? 0;
2143
2880
  server.listen(preferredPort, "127.0.0.1", () => {
2144
2881
  const address = server.address();
2145
- debug("Callback server listening on port", address.port);
2882
+ debug2("Callback server listening on port", address.port);
2146
2883
  resolve({ port: address.port, codePromise, close: forceClose });
2147
2884
  });
2148
2885
  server.on("error", (err) => {
@@ -2166,9 +2903,9 @@ ${errorText}`
2166
2903
  try {
2167
2904
  const open = await import('open');
2168
2905
  await open.default(url.toString());
2169
- debug("Opened browser for authentication");
2906
+ debug2("Opened browser for authentication");
2170
2907
  } catch (error) {
2171
- debug("Failed to open browser:", error);
2908
+ debug2("Failed to open browser:", error);
2172
2909
  console.log("\nFailed to open browser automatically.");
2173
2910
  console.log("Please open the following URL manually:\n");
2174
2911
  console.log(url.toString() + "\n");
@@ -2314,30 +3051,31 @@ var test = test$1.extend({
2314
3051
  );
2315
3052
  }
2316
3053
  let resolvedAuthType = "none";
3054
+ const httpConfig = isHttpConfig(mcpConfig) ? mcpConfig : null;
2317
3055
  let authProvider;
2318
- if (mcpConfig.auth?.oauth?.authStatePath) {
3056
+ if (httpConfig?.auth?.oauth?.authStatePath) {
2319
3057
  authProvider = new PlaywrightOAuthClientProvider({
2320
- storagePath: mcpConfig.auth.oauth.authStatePath,
2321
- redirectUri: mcpConfig.auth.oauth.redirectUri ?? "http://localhost:3000/oauth/callback",
2322
- clientId: mcpConfig.auth.oauth.clientId,
2323
- clientSecret: mcpConfig.auth.oauth.clientSecret
3058
+ storagePath: httpConfig.auth.oauth.authStatePath,
3059
+ redirectUri: httpConfig.auth.oauth.redirectUri ?? "http://localhost:3000/oauth/callback",
3060
+ clientId: httpConfig.auth.oauth.clientId,
3061
+ clientSecret: httpConfig.auth.oauth.clientSecret
2324
3062
  });
2325
3063
  resolvedAuthType = "oauth";
2326
3064
  }
2327
3065
  let effectiveConfig = mcpConfig;
2328
- if (mcpConfig.auth?.accessToken) {
3066
+ if (httpConfig?.auth?.accessToken) {
2329
3067
  resolvedAuthType = "api-token";
2330
3068
  }
2331
- if (isHttpConfig(mcpConfig) && !mcpConfig.auth?.accessToken && !mcpConfig.auth?.oauth?.authStatePath) {
3069
+ if (httpConfig && !httpConfig.auth?.accessToken && !httpConfig.auth?.oauth?.authStatePath) {
2332
3070
  const cliClient = new CLIOAuthClient({
2333
- mcpServerUrl: mcpConfig.serverUrl
3071
+ mcpServerUrl: httpConfig.serverUrl
2334
3072
  });
2335
3073
  const tokenResult = await cliClient.tryGetAccessToken();
2336
3074
  if (tokenResult) {
2337
3075
  effectiveConfig = {
2338
- ...mcpConfig,
3076
+ ...httpConfig,
2339
3077
  auth: {
2340
- ...mcpConfig.auth,
3078
+ ...httpConfig.auth,
2341
3079
  accessToken: tokenResult.accessToken
2342
3080
  }
2343
3081
  };
@@ -2348,7 +3086,7 @@ var test = test$1.extend({
2348
3086
  const client = await createMCPClientForConfig(effectiveConfig, {
2349
3087
  clientInfo: {
2350
3088
  name: "@gleanwork/mcp-server-tester",
2351
- version: "0.1.0"
3089
+ version: package_default.version
2352
3090
  },
2353
3091
  authProvider
2354
3092
  });
@@ -2365,9 +3103,11 @@ var test = test$1.extend({
2365
3103
  * Automatically tracks all MCP operations for the reporter
2366
3104
  */
2367
3105
  mcp: async ({ mcpClient, _mcpFixtureState }, use, testInfo) => {
3106
+ const useConfig = testInfo.project.use;
2368
3107
  const api = createMCPFixture(mcpClient, testInfo, {
2369
3108
  authType: _mcpFixtureState.resolvedAuthType,
2370
- project: testInfo.project.name
3109
+ project: testInfo.project.name,
3110
+ callTimeoutMs: useConfig.mcpConfig?.callTimeoutMs
2371
3111
  });
2372
3112
  await use(api);
2373
3113
  }