npm - @gleanwork/mcp-server-tester - Versions diffs - 0.12.0 → 1.0.0-beta.1 - Mend

@gleanwork/mcp-server-tester 0.12.0 → 1.0.0-beta.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (24) hide show

package/README.md +120 -337
package/dist/cli/index.js +468 -176
package/dist/fixtures/mcp.d.ts +121 -44
package/dist/fixtures/mcp.js +988 -248
package/dist/fixtures/mcp.js.map +1 -1
package/dist/fixtures/mcpAuth.js +6 -2
package/dist/fixtures/mcpAuth.js.map +1 -1
package/dist/index.cjs +5034 -1284
package/dist/index.cjs.map +1 -1
package/dist/index.d.cts +1697 -575
package/dist/index.d.ts +1697 -575
package/dist/index.js +5020 -1280
package/dist/index.js.map +1 -1
package/dist/reporters/mcpReporter.cjs +35 -16
package/dist/reporters/mcpReporter.cjs.map +1 -1
package/dist/reporters/mcpReporter.d.cts +8 -3
package/dist/reporters/mcpReporter.d.ts +8 -3
package/dist/reporters/mcpReporter.js +36 -17
package/dist/reporters/mcpReporter.js.map +1 -1
package/dist/reporters/ui-dist/app.js +5 -5
package/dist/reporters/ui-dist/styles.css +1 -1
package/package.json +64 -8
package/src/reporters/ui-dist/app.js +5 -5
package/src/reporters/ui-dist/styles.css +1 -1

package/dist/fixtures/mcp.js CHANGED Viewed

@@ -1,14 +1,17 @@
 import { expect as expect$1, test as test$1 } from '@playwright/test';
 import { query } from '@anthropic-ai/claude-agent-sdk';
+import { z } from 'zod';
 import { Client } from '@modelcontextprotocol/sdk/client/index.js';
 import { StdioClientTransport } from '@modelcontextprotocol/sdk/client/stdio.js';
 import { StreamableHTTPClientTransport } from '@modelcontextprotocol/sdk/client/streamableHttp.js';
-import { z } from 'zod';
+import { SSEClientTransport } from '@modelcontextprotocol/sdk/client/sse.js';
 import createDebug from 'debug';
+import { ProxyAgent, Agent } from 'undici';
+import { readFileSync } from 'fs';
+import * as oauth from 'oauth4webapi';
 import * as fs2 from 'fs/promises';
 import * as path2 from 'path';
 import * as http from 'http';
-import * as oauth from 'oauth4webapi';
 import { homedir } from 'os';
 var __require = /* @__PURE__ */ ((x) => typeof require !== "undefined" ? require : typeof Proxy !== "undefined" ? new Proxy(x, {
@@ -117,7 +120,7 @@ function isCallToolResult(value) {
     return false;
   }
   const v = value;
-  return Array.isArray(v.content) || typeof v.isError === "boolean";
+  return Array.isArray(v.content);
 }
 function extractTextFromContentArray(content) {
   const textParts = [];
@@ -625,6 +628,30 @@ function toBeToolError(received, expected = true) {
     }
   };
 }
+// src/judge/rubrics.ts
+var BUILT_IN_RUBRICS = {
+  correctness: "Evaluate whether the response is factually correct and accurately answers the question. Compare against the reference answer if provided. Score 1.0 for fully correct with no errors; Score 0.75 for mostly correct with one minor inaccuracy or omission; Score 0.5 for partially correct \u2014 answers part of the question but misses key elements; Score 0.25 for minimally relevant but substantially incorrect or missing most key details; Score 0.0 for incorrect, irrelevant, or directly contradicting the reference.",
+  completeness: "Evaluate whether the response fully addresses all aspects of the question. Score 1.0 if the response covers all key points comprehensively; Score 0.75 if the response covers most key points with one minor gap; Score 0.5 if the response partially answers \u2014 covers some aspects but misses others; Score 0.25 if the response touches on the topic but misses most key aspects; Score 0.0 if major aspects of the question are entirely missing or the response is off-topic.",
+  groundedness: "Evaluate whether all claims in the response are supported by the retrieved context or reference. Penalize unsupported assertions or hallucinated facts. Score 1.0 for fully grounded \u2014 every claim is traceable to the provided context; Score 0.75 for mostly grounded with one minor unsupported detail; Score 0.5 for partially grounded \u2014 some claims are supported but notable hallucinations are present; Score 0.25 for minimally grounded \u2014 most claims are unsupported or invented; Score 0.0 for completely hallucinated or contradicting the provided context.",
+  "instruction-following": "Evaluate whether the response follows the instructions given in the question. Check format, tone, constraints, and task completion. Score 1.0 for full compliance \u2014 all instructions are followed precisely; Score 0.75 for mostly compliant with one minor deviation from the instructions; Score 0.5 for partial compliance \u2014 some instructions followed but key constraints violated; Score 0.25 for minimal compliance \u2014 the response loosely addresses the task but ignores most instructions; Score 0.0 for non-compliance \u2014 the response disregards the instructions entirely.",
+  conciseness: "Evaluate whether the response is appropriately concise without losing important information. Penalize unnecessary verbosity, padding, or repetition. Score 1.0 for well-sized \u2014 concise and complete with no unnecessary content; Score 0.75 for slightly verbose but no information is lost or repeated; Score 0.5 for moderately verbose \u2014 some padding or repetition that reduces clarity; Score 0.25 for excessively verbose \u2014 significantly overlong with substantial filler or repetition; Score 0.0 for extremely verbose \u2014 so padded or repetitive that the core answer is obscured."
+};
+function resolveRubric(rubric) {
+  if (typeof rubric === "string") {
+    return BUILT_IN_RUBRICS[rubric];
+  }
+  return rubric.text;
+}
+// src/judge/judgeTypes.ts
+var JudgeResponseSchema = z.object({
+  pass: z.boolean(),
+  score: z.number().min(0).max(1),
+  reasoning: z.string()
+});
+// src/judge/claudeAgentJudge.ts
 function createClaudeAgentJudge(config) {
   const model = config.model ?? "claude-sonnet-4-20250514";
   const maxBudgetUsd = config.maxBudgetUsd ?? 0.1;
@@ -685,7 +712,7 @@ function createClaudeAgentJudge(config) {
           cacheCreationInputTokens: resultMessage.usage?.cache_creation_input_tokens
         };
         return {
-          pass: parsed.pass ?? false,
+          pass: parsed.pass,
           score: parsed.score,
           reasoning: parsed.reasoning,
           usage,
@@ -704,21 +731,19 @@ function buildSystemPrompt() {
   return 'You are an expert evaluator. Evaluate the candidate response based on the rubric provided. Respond ONLY with valid JSON in this exact format: {"pass": boolean, "score": number (0-1), "reasoning": string}. Do not include any other text, markdown formatting, or code blocks.';
 }
 function buildJudgePrompt(candidate, reference, rubric) {
+  const candidateStr = typeof candidate === "string" ? candidate : JSON.stringify(candidate, null, 2);
+  const referenceStr = reference !== null && reference !== void 0 ? typeof reference === "string" ? reference : JSON.stringify(reference, null, 2) : null;
   const parts = [];
-  parts.push("# Evaluation Task\n");
+  parts.push("Rubric:\n");
   parts.push(rubric);
-  parts.push("\n\n# Candidate Response\n");
-  parts.push(
-    typeof candidate === "string" ? candidate : JSON.stringify(candidate, null, 2)
-  );
-  if (reference !== null && reference !== void 0) {
-    parts.push("\n\n# Reference Response\n");
-    parts.push(
-      typeof reference === "string" ? reference : JSON.stringify(reference, null, 2)
-    );
-  }
+  parts.push("\n\n<candidate_response>\n");
+  parts.push(candidateStr);
+  parts.push("\n</candidate_response>\n\n");
+  parts.push("<reference_answer>\n");
+  parts.push(referenceStr ?? "No reference provided.");
+  parts.push("\n</reference_answer>\n\n");
   parts.push(
-    "\n\n# Instructions\nEvaluate the candidate response based on the rubric. " + (reference !== null && reference !== void 0 ? "Compare it against the reference response if helpful. " : "") + 'Respond with JSON containing "pass" (boolean), "score" (0-1), and "reasoning" (string).'
+    "Evaluate the candidate response against the rubric" + (referenceStr !== null ? ", comparing it with the reference answer if helpful" : "") + '. Return JSON: {"pass": boolean, "score": number (0-1), "reasoning": string}'
   );
   return parts.join("");
 }
@@ -734,75 +759,320 @@ function parseJudgeResponse(text) {
     jsonText = jsonText.slice(0, -3);
   }
   jsonText = jsonText.trim();
+  let parsed;
   try {
-    return JSON.parse(jsonText);
+    parsed = JSON.parse(jsonText);
   } catch {
     const jsonMatch = jsonText.match(/\{[\s\S]*"pass"[\s\S]*\}/);
     if (jsonMatch) {
-      return JSON.parse(jsonMatch[0]);
+      parsed = JSON.parse(jsonMatch[0]);
+    } else {
+      throw new Error(`Failed to parse judge response as JSON: ${text}`);
     }
+  }
+  const result = JudgeResponseSchema.safeParse(parsed);
+  if (!result.success) {
+    throw new Error(
+      `Judge returned invalid response. Expected {pass, score, reasoning} but got: ${jsonText.slice(0, 500)}
+Validation errors: ${JSON.stringify(result.error.issues)}`
+    );
+  }
+  return result.data;
+}
+// src/judge/openaiJudge.ts
+function createOpenAIJudge(config = {}) {
+  const apiKeyEnvVar = config.apiKeyEnvVar ?? "OPENAI_API_KEY";
+  const apiKey = process.env[apiKeyEnvVar];
+  if (!apiKey) {
+    throw new Error(
+      `OpenAI judge requires an API key. Set the ${apiKeyEnvVar} environment variable.`
+    );
+  }
+  const model = config.model ?? "gpt-4o";
+  const maxTokens = config.maxTokens ?? 1e3;
+  const temperature = config.temperature ?? 0;
+  return {
+    async evaluate(candidate, reference, rubric) {
+      let openaiModule;
+      try {
+        openaiModule = await import('openai');
+      } catch (err) {
+        throw new Error(
+          `OpenAI judge requires the \`openai\` package. Install it with: npm install openai
+Original error: ${err instanceof Error ? err.message : String(err)}`
+        );
+      }
+      const client = new openaiModule.default({ apiKey });
+      const prompt = buildJudgePrompt2(candidate, reference, rubric);
+      const startTime = Date.now();
+      const completion = await client.chat.completions.create({
+        model,
+        max_tokens: maxTokens,
+        temperature,
+        messages: [
+          {
+            role: "system",
+            content: 'You are an expert evaluator. Respond with valid JSON only: {"pass": true|false, "score": 0.0-1.0, "reasoning": "explanation"}'
+          },
+          { role: "user", content: prompt }
+        ]
+      });
+      const durationMs = Date.now() - startTime;
+      const text = completion.choices[0]?.message.content ?? "";
+      const parsed = parseJudgeResponse2(text);
+      return {
+        pass: parsed.pass,
+        score: parsed.score,
+        reasoning: parsed.reasoning,
+        usage: {
+          inputTokens: completion.usage?.prompt_tokens ?? 0,
+          outputTokens: completion.usage?.completion_tokens ?? 0,
+          totalCostUsd: 0,
+          durationMs
+        }
+      };
+    }
+  };
+}
+function buildJudgePrompt2(candidate, reference, rubric) {
+  const candidateStr = typeof candidate === "string" ? candidate : JSON.stringify(candidate, null, 2);
+  const referenceStr = reference !== null && reference !== void 0 ? typeof reference === "string" ? reference : JSON.stringify(reference, null, 2) : null;
+  return `Rubric:
+${rubric}
+<candidate_response>
+${candidateStr}
+</candidate_response>
+<reference_answer>
+${referenceStr ?? "No reference provided."}
+</reference_answer>
+Evaluate and return JSON: {"pass": boolean, "score": number (0-1), "reasoning": string}`;
+}
+function parseJudgeResponse2(text) {
+  const cleaned = text.replace(/```json\n?/g, "").replace(/```\n?/g, "").trim();
+  let parsed;
+  try {
+    parsed = JSON.parse(cleaned);
+  } catch {
     throw new Error(`Failed to parse judge response as JSON: ${text}`);
   }
+  const result = JudgeResponseSchema.safeParse(parsed);
+  if (!result.success) {
+    throw new Error(
+      `Judge returned invalid response. Expected {pass, score, reasoning} but got: ${cleaned.slice(0, 500)}
+Validation errors: ${JSON.stringify(result.error.issues)}`
+    );
+  }
+  return result.data;
+}
+// src/judge/googleJudge.ts
+function createGoogleJudge(config = {}) {
+  const apiKeyEnvVar = config.apiKeyEnvVar ?? "GOOGLE_API_KEY";
+  const apiKey = process.env[apiKeyEnvVar];
+  if (!apiKey) {
+    throw new Error(
+      `Google judge requires an API key. Set the ${apiKeyEnvVar} environment variable.`
+    );
+  }
+  const model = config.model ?? "gemini-2.0-flash";
+  const maxTokens = config.maxTokens ?? 1e3;
+  return {
+    async evaluate(candidate, reference, rubric) {
+      let googleModule;
+      try {
+        googleModule = await import('@google/generative-ai');
+      } catch (err) {
+        throw new Error(
+          `Google judge requires the \`@google/generative-ai\` package. Install it with: npm install @google/generative-ai
+Original error: ${err instanceof Error ? err.message : String(err)}`
+        );
+      }
+      const genAI = new googleModule.GoogleGenerativeAI(apiKey);
+      const gemini = genAI.getGenerativeModel({
+        model,
+        generationConfig: {
+          maxOutputTokens: maxTokens,
+          temperature: 0
+        },
+        systemInstruction: 'You are an expert evaluator. Respond with valid JSON only: {"pass": true|false, "score": 0.0-1.0, "reasoning": "explanation"}'
+      });
+      const candidateStr = typeof candidate === "string" ? candidate : JSON.stringify(candidate, null, 2);
+      const referenceStr = reference !== null && reference !== void 0 ? typeof reference === "string" ? reference : JSON.stringify(reference, null, 2) : null;
+      const prompt = `Rubric:
+${rubric}
+<candidate_response>
+${candidateStr}
+</candidate_response>
+<reference_answer>
+${referenceStr ?? "No reference provided."}
+</reference_answer>
+Evaluate and return JSON: {"pass": boolean, "score": number (0-1), "reasoning": string}`;
+      const startTime = Date.now();
+      const result = await gemini.generateContent(prompt);
+      const durationMs = Date.now() - startTime;
+      const text = result.response.text();
+      const cleaned = text.replace(/```json\n?/g, "").replace(/```\n?/g, "").trim();
+      let parsedRaw;
+      try {
+        parsedRaw = JSON.parse(cleaned);
+      } catch {
+        throw new Error(`Failed to parse judge response as JSON: ${text}`);
+      }
+      const validation = JudgeResponseSchema.safeParse(parsedRaw);
+      if (!validation.success) {
+        throw new Error(
+          `Judge returned invalid response. Expected {pass, score, reasoning} but got: ${cleaned.slice(0, 500)}
+Validation errors: ${JSON.stringify(validation.error.issues)}`
+        );
+      }
+      const { pass, score, reasoning } = validation.data;
+      return {
+        pass,
+        score,
+        reasoning,
+        usage: {
+          inputTokens: result.response.usageMetadata?.promptTokenCount ?? 0,
+          outputTokens: result.response.usageMetadata?.candidatesTokenCount ?? 0,
+          totalCostUsd: 0,
+          durationMs
+        }
+      };
+    }
+  };
 }
 // src/judge/judgeClient.ts
 function createJudge(config = {}) {
-  const provider = config.provider ?? "claude";
+  const provider = config.provider ?? "anthropic";
   switch (provider) {
-    case "claude":
     case "anthropic":
       return createClaudeAgentJudge(config);
     case "openai":
-      throw new Error(
-        'OpenAI provider is no longer supported. Please use createJudge() without specifying provider, or use provider: "claude". See migration guide at https://github.com/gleanwork/mcp-server-tester/blob/main/docs/migration-v0.11.md'
-      );
-    case "custom-http":
-      throw new Error(
-        "custom-http provider is no longer supported. Please use createJudge() without specifying provider."
-      );
+      return createOpenAIJudge(config);
+    case "google":
+      return createGoogleJudge(config);
     default:
       throw new Error(`Unsupported LLM provider: ${String(provider)}`);
   }
 }
-// src/assertions/matchers/toPassToolJudge.ts
-var DEFAULT_PASSING_THRESHOLD = 0.7;
-var DEFAULT_JUDGE_CONFIG = {};
-async function toPassToolJudge(received, rubric, options = {}) {
+// src/assertions/validators/judge.ts
+function computeStdDev(scores, mean) {
+  if (scores.length <= 1) return 0;
+  const variance = scores.reduce((sum, s) => sum + (s - mean) ** 2, 0) / scores.length;
+  return Math.sqrt(variance);
+}
+async function validateJudge(response, config) {
   const {
-    reference = null,
-    passingThreshold = DEFAULT_PASSING_THRESHOLD,
-    judgeConfig = DEFAULT_JUDGE_CONFIG
-  } = options;
-  const judge = createJudge(judgeConfig);
+    rubric,
+    reference,
+    threshold = 0.7,
+    reps = 1,
+    provider,
+    model,
+    apiKeyEnvVar,
+    maxTokens,
+    temperature,
+    maxBudgetUsd,
+    maxToolOutputSize
+  } = config;
+  const resolvedRubric = resolveRubric(rubric);
+  const judgeConfig = {
+    ...provider !== void 0 && { provider },
+    ...model !== void 0 && { model },
+    ...apiKeyEnvVar !== void 0 && { apiKeyEnvVar },
+    ...maxTokens !== void 0 && { maxTokens },
+    ...temperature !== void 0 && { temperature },
+    ...maxBudgetUsd !== void 0 && { maxBudgetUsd },
+    ...maxToolOutputSize !== void 0 && { maxToolOutputSize }
+  };
   try {
-    const result = await judge.evaluate(received, reference, rubric);
-    const score = result.score ?? (result.pass ? 1 : 0);
-    const passes = score >= passingThreshold;
-    if (this.isNot) {
-      return {
-        pass: !passes,
-        message: () => passes ? `Expected judge evaluation to fail, but it passed with score ${score.toFixed(2)}` : `Judge evaluation failed as expected with score ${score.toFixed(2)}`
-      };
+    const judge = createJudge(judgeConfig);
+    const scores = [];
+    let lastReasoning;
+    for (let i = 0; i < reps; i++) {
+      const judgeResult = await judge.evaluate(
+        response,
+        reference ?? null,
+        resolvedRubric
+      );
+      scores.push(judgeResult.score ?? (judgeResult.pass ? 1 : 0));
+      lastReasoning = judgeResult.reasoning;
     }
-    if (passes) {
+    if (scores.length === 0) {
       return {
-        pass: true,
-        message: () => `Judge evaluation passed with score ${score.toFixed(2)} (threshold: ${passingThreshold})`
+        pass: false,
+        message: "Judge evaluation failed: no scores collected"
       };
     }
+    const meanScore = scores.reduce((a, b) => a + b, 0) / scores.length;
+    const passed = meanScore >= threshold;
+    const repNote = reps > 1 ? ` (mean of ${reps} reps: [${scores.map((s) => s.toFixed(2)).join(", ")}])` : "";
+    let stdDev;
+    let highVariance;
+    if (reps > 1) {
+      stdDev = computeStdDev(scores, meanScore);
+      highVariance = stdDev > 0.2;
+      if (highVariance) {
+        console.warn(
+          `[mcp-server-tester] Judge scores have high variance (stdDev=${stdDev.toFixed(2)}, scores=[${scores.map((s) => s.toFixed(2)).join(", ")}]). The rubric may be ambiguous.`
+        );
+      }
+    }
     return {
-      pass: false,
-      message: () => `Judge evaluation failed with score ${score.toFixed(2)} (threshold: ${passingThreshold}). Reasoning: ${result.reasoning ?? "No reasoning provided"}`
+      pass: passed,
+      message: passed ? `Judge passed with score ${meanScore.toFixed(2)}${repNote}` : `Judge failed with score ${meanScore.toFixed(2)} (threshold: ${threshold})${repNote}. ${lastReasoning ?? ""}`,
+      details: reps > 1 ? {
+        scores,
+        scoreStdDev: stdDev,
+        highVariance
+      } : void 0
     };
-  } catch (error) {
+  } catch (err) {
     return {
       pass: false,
-      message: () => `Judge evaluation failed with error: ${error instanceof Error ? error.message : String(error)}`
+      message: `Judge evaluation error: ${err instanceof Error ? err.message : String(err)}`
     };
   }
 }
+// src/assertions/matchers/toPassToolJudge.ts
+var DEFAULT_PASSING_THRESHOLD = 0.7;
+async function toPassToolJudge(received, rubric, options = {}) {
+  const {
+    reference = null,
+    passingThreshold = DEFAULT_PASSING_THRESHOLD,
+    reps,
+    provider,
+    model
+  } = options;
+  const validation = await validateJudge(received, {
+    rubric,
+    reference: reference ?? void 0,
+    threshold: passingThreshold,
+    ...reps !== void 0 && { reps },
+    ...provider !== void 0 && { provider },
+    ...model !== void 0 && { model }
+  });
+  if (this.isNot) {
+    return {
+      pass: !validation.pass,
+      message: () => validation.pass ? `Expected judge evaluation to fail, but it passed` : `Judge evaluation failed as expected`
+    };
+  }
+  return {
+    pass: validation.pass,
+    message: () => validation.message
+  };
+}
 // src/assertions/validators/size.ts
 function validateSize(response, options) {
   const { maxBytes, minBytes } = options;
@@ -903,6 +1173,144 @@ async function toSatisfyToolPredicate(received, predicate, description) {
   }
 }
+// src/assertions/validators/toolCalls.ts
+function isSimulationResult(value) {
+  return typeof value === "object" && value !== null && "success" in value && "toolCalls" in value && Array.isArray(value.toolCalls);
+}
+function partialMatch(actual, expected) {
+  return Object.entries(expected).every(([k, v]) => {
+    const actualVal = actual[k];
+    if (typeof v === "object" && v !== null && typeof actualVal === "object" && actualVal !== null) {
+      return partialMatch(
+        actualVal,
+        v
+      );
+    }
+    return JSON.stringify(actualVal) === JSON.stringify(v);
+  });
+}
+function findMatchingCall(actual, expected, startIndex = 0) {
+  for (let i = startIndex; i < actual.length; i++) {
+    const call = actual[i];
+    if (call.name !== expected.name) continue;
+    if (expected.arguments !== void 0 && !partialMatch(call.arguments ?? {}, expected.arguments)) {
+      continue;
+    }
+    return i;
+  }
+  return -1;
+}
+function validateToolCalls(response, expectation) {
+  if (!isSimulationResult(response)) {
+    return {
+      pass: false,
+      message: "toolsTriggered expectation requires llm_host mode \u2014 response must be an LLMHostSimulationResult"
+    };
+  }
+  const actual = response.toolCalls;
+  const requiredCalls = expectation.calls.filter((c) => c.required !== false);
+  const calledRequiredCount = requiredCalls.filter(
+    (expected) => findMatchingCall(actual, expected) !== -1
+  ).length;
+  const recall = requiredCalls.length > 0 ? calledRequiredCount / requiredCalls.length : 1;
+  const allowedNames = new Set(expectation.calls.map((c) => c.name));
+  const precision = actual.length > 0 ? actual.filter((c) => allowedNames.has(c.name)).length / actual.length : 1;
+  const metrics = { precision, recall };
+  const order = expectation.order ?? "any";
+  if (order === "strict") {
+    let searchFrom = 0;
+    for (const expected of expectation.calls) {
+      const idx = findMatchingCall(actual, expected, searchFrom);
+      if (idx === -1) {
+        if (expected.required !== false) {
+          return {
+            pass: false,
+            message: `Expected tool '${expected.name}' to be called in sequence (starting from position ${searchFrom}), but it was not found`,
+            metrics
+          };
+        }
+      } else {
+        searchFrom = idx + 1;
+      }
+    }
+  } else {
+    const required = expectation.calls.filter((c) => c.required !== false);
+    for (const expected of required) {
+      const idx = findMatchingCall(actual, expected);
+      if (idx === -1) {
+        const argsNote = expected.arguments !== void 0 ? ` with args ${JSON.stringify(expected.arguments)}` : "";
+        return {
+          pass: false,
+          message: `Expected tool '${expected.name}'${argsNote} to be called, but it was not`,
+          metrics
+        };
+      }
+    }
+  }
+  if (expectation.exclusive === true) {
+    const unexpected = actual.filter((c) => !allowedNames.has(c.name));
+    if (unexpected.length > 0) {
+      const names = unexpected.map((c) => `'${c.name}'`).join(", ");
+      return {
+        pass: false,
+        message: `Unexpected tool calls: ${names}. Only ${[...allowedNames].map((n) => `'${n}'`).join(", ")} are allowed`,
+        metrics
+      };
+    }
+  }
+  return { pass: true, message: "All tool call expectations met", metrics };
+}
+function validateToolCallCount(response, options) {
+  if (!isSimulationResult(response)) {
+    return {
+      pass: false,
+      message: "toolCallCount expectation requires llm_host mode \u2014 response must be an LLMHostSimulationResult"
+    };
+  }
+  const count = response.toolCalls.length;
+  const { min, max, exact } = options;
+  if (exact !== void 0 && count !== exact) {
+    return {
+      pass: false,
+      message: `Expected exactly ${exact} tool call(s), but got ${count}`
+    };
+  }
+  if (min !== void 0 && count < min) {
+    return {
+      pass: false,
+      message: `Expected at least ${min} tool call(s), but got ${count}`
+    };
+  }
+  if (max !== void 0 && count > max) {
+    return {
+      pass: false,
+      message: `Expected at most ${max} tool call(s), but got ${count}`
+    };
+  }
+  return {
+    pass: true,
+    message: `Tool call count (${count}) is within expected range`
+  };
+}
+// src/assertions/matchers/toHaveToolCalls.ts
+function toHaveToolCalls(received, expectation) {
+  const result = validateToolCalls(received, expectation);
+  return {
+    pass: result.pass,
+    message: () => result.message
+  };
+}
+// src/assertions/matchers/toHaveToolCallCount.ts
+function toHaveToolCallCount(received, options) {
+  const result = validateToolCallCount(received, options);
+  return {
+    pass: result.pass,
+    message: () => result.message
+  };
+}
 // src/assertions/matchers/index.ts
 var expect = expect$1.extend({
   toMatchToolResponse,
@@ -913,7 +1321,9 @@ var expect = expect$1.extend({
   toBeToolError,
   toPassToolJudge,
   toHaveToolResponseSize,
-  toSatisfyToolPredicate
+  toSatisfyToolPredicate,
+  toHaveToolCalls,
+  toHaveToolCallCount
 });
 var MCPHostCapabilitiesSchema = z.object({
   sampling: z.record(z.unknown()).optional(),
@@ -930,9 +1340,16 @@ var MCPOAuthConfigSchema = z.object({
   clientSecret: z.string().optional(),
   redirectUri: z.string().url().optional()
 });
+var MCPClientCredentialsConfigSchema = z.object({
+  clientId: z.string().optional(),
+  clientSecret: z.string().optional(),
+  tokenEndpoint: z.string().url("tokenEndpoint must be a valid URL").optional(),
+  scopes: z.array(z.string()).optional()
+});
 var MCPAuthConfigSchema = z.object({
   accessToken: z.string().optional(),
-  oauth: MCPOAuthConfigSchema.optional()
+  oauth: MCPOAuthConfigSchema.optional(),
+  clientCredentials: MCPClientCredentialsConfigSchema.optional()
 }).refine(
   (data) => !(data.accessToken && data.oauth),
   "Cannot specify both accessToken and oauth configuration"
@@ -942,19 +1359,48 @@ var StdioConfigSchema = z.object({
   command: z.string().min(1, "command is required for stdio transport"),
   args: z.array(z.string()).optional(),
   cwd: z.string().optional(),
+  env: z.record(z.string(), z.string()).optional(),
   capabilities: MCPHostCapabilitiesSchema.optional(),
   connectTimeoutMs: z.number().positive().optional(),
   requestTimeoutMs: z.number().positive().optional(),
+  callTimeoutMs: z.number().positive().optional(),
   quiet: z.boolean().optional()
 });
+function isLocalhost(hostname) {
+  return hostname === "localhost" || hostname === "127.0.0.1" || hostname === "::1";
+}
 var HttpConfigSchema = z.object({
   transport: z.literal("http"),
-  serverUrl: z.string().url("serverUrl must be a valid URL"),
+  serverUrl: z.string().url("serverUrl must be a valid URL").refine((url) => {
+    let parsed;
+    try {
+      parsed = new URL(url);
+    } catch {
+      return true;
+    }
+    if (parsed.protocol === "http:" && !isLocalhost(parsed.hostname)) {
+      console.warn(
+        `[mcp-server-tester] serverUrl uses http:// for non-localhost address "${parsed.hostname}". This transmits tokens unencrypted. Use https:// for remote servers.`
+      );
+    }
+    return true;
+  }),
   headers: z.record(z.string()).optional(),
   capabilities: MCPHostCapabilitiesSchema.optional(),
   connectTimeoutMs: z.number().positive().optional(),
   requestTimeoutMs: z.number().positive().optional(),
-  auth: MCPAuthConfigSchema.optional()
+  callTimeoutMs: z.number().positive().optional(),
+  auth: MCPAuthConfigSchema.optional(),
+  proxy: z.object({
+    url: z.string().url("proxy.url must be a valid URL")
+  }).optional(),
+  retryAttempts: z.number().int().min(0).optional(),
+  tls: z.object({
+    ca: z.string().optional(),
+    cert: z.string().optional(),
+    key: z.string().optional(),
+    rejectUnauthorized: z.boolean().optional()
+  }).optional()
 });
 var MCPConfigSchema = z.discriminatedUnion("transport", [
   StdioConfigSchema,
@@ -964,26 +1410,245 @@ function validateMCPConfig(config) {
   return MCPConfigSchema.parse(config);
 }
 function isStdioConfig(config) {
-  return config.transport === "stdio" && typeof config.command === "string";
+  return config.transport === "stdio";
 }
 function isHttpConfig(config) {
-  return config.transport === "http" && typeof config.serverUrl === "string";
+  return config.transport === "http";
 }
 var NAMESPACE = "mcp-server-tester";
 var debugClient = createDebug(`${NAMESPACE}:client`);
 createDebug(`${NAMESPACE}:oauth`);
 createDebug(`${NAMESPACE}:eval`);
+var debugHttp = createDebug(`${NAMESPACE}:http`);
+// package.json
+var package_default = {
+  version: "1.0.0-beta.1"};
+var debug = createDebug("mcp-server-tester:oauth-flow");
+async function generatePKCE() {
+  const codeVerifier = oauth.generateRandomCodeVerifier();
+  const codeChallenge = await oauth.calculatePKCECodeChallenge(codeVerifier);
+  return {
+    codeVerifier,
+    codeChallenge
+  };
+}
+function generateState() {
+  return oauth.generateRandomState();
+}
+function buildAuthorizationUrl(config) {
+  const authorizationEndpoint = config.authServer.server.authorization_endpoint;
+  if (!authorizationEndpoint) {
+    throw new Error(
+      "Authorization server does not have an authorization_endpoint"
+    );
+  }
+  const authorizationUrl = new URL(authorizationEndpoint);
+  authorizationUrl.searchParams.set("client_id", config.clientId);
+  authorizationUrl.searchParams.set("redirect_uri", config.redirectUri);
+  authorizationUrl.searchParams.set("response_type", "code");
+  authorizationUrl.searchParams.set("scope", config.scopes.join(" "));
+  authorizationUrl.searchParams.set("code_challenge", config.codeChallenge);
+  authorizationUrl.searchParams.set("code_challenge_method", "S256");
+  authorizationUrl.searchParams.set("state", config.state);
+  if (config.resource) {
+    authorizationUrl.searchParams.set("resource", config.resource);
+  }
+  return authorizationUrl;
+}
+async function exchangeCodeForTokens(config) {
+  const client = {
+    client_id: config.clientId,
+    token_endpoint_auth_method: config.clientSecret ? "client_secret_basic" : "none"
+  };
+  const clientAuth = config.clientSecret ? oauth.ClientSecretBasic(config.clientSecret) : oauth.None();
+  const callbackUrl = new URL(config.redirectUri);
+  callbackUrl.searchParams.set("code", config.code);
+  callbackUrl.searchParams.set("state", config.state);
+  const validatedParams = oauth.validateAuthResponse(
+    config.authServer.server,
+    client,
+    callbackUrl,
+    config.state
+  );
+  const response = await oauth.authorizationCodeGrantRequest(
+    config.authServer.server,
+    client,
+    clientAuth,
+    validatedParams,
+    config.redirectUri,
+    config.codeVerifier
+  );
+  const result = await oauth.processAuthorizationCodeResponse(
+    config.authServer.server,
+    client,
+    response
+  );
+  return {
+    accessToken: result.access_token,
+    tokenType: result.token_type,
+    expiresIn: result.expires_in,
+    refreshToken: result.refresh_token,
+    scope: result.scope
+  };
+}
+async function refreshAccessToken(config) {
+  const client = {
+    client_id: config.clientId,
+    token_endpoint_auth_method: config.clientSecret ? "client_secret_basic" : "none"
+  };
+  const clientAuth = config.clientSecret ? oauth.ClientSecretBasic(config.clientSecret) : oauth.None();
+  const response = await oauth.refreshTokenGrantRequest(
+    config.authServer.server,
+    client,
+    clientAuth,
+    config.refreshToken
+  );
+  if (!response.ok) {
+    const contentType = response.headers.get("content-type") ?? "";
+    let errorMessage = `Token refresh failed: ${response.status} ${response.statusText}`;
+    try {
+      if (contentType.includes("application/json")) {
+        const errorBody = await response.clone().json();
+        if (errorBody.error) {
+          errorMessage = `Token refresh failed: ${errorBody.error}`;
+          if (errorBody.error_description) {
+            errorMessage += ` - ${errorBody.error_description}`;
+          }
+        }
+      } else {
+        const textBody = await response.clone().text();
+        if (textBody) {
+          errorMessage = `Token refresh failed: ${response.status} - ${textBody}`;
+        }
+      }
+    } catch {
+    }
+    throw new Error(errorMessage);
+  }
+  const result = await oauth.processRefreshTokenResponse(
+    config.authServer.server,
+    client,
+    response
+  );
+  return {
+    accessToken: result.access_token,
+    tokenType: result.token_type,
+    expiresIn: result.expires_in,
+    refreshToken: result.refresh_token,
+    scope: result.scope
+  };
+}
+async function performClientCredentialsFlow(config) {
+  const tokenEndpointUrl = new URL(config.tokenEndpoint);
+  const authServer = {
+    issuer: tokenEndpointUrl.origin,
+    token_endpoint: config.tokenEndpoint
+  };
+  const client = {
+    client_id: config.clientId
+  };
+  const clientAuth = oauth.ClientSecretBasic(config.clientSecret);
+  const parameters = {};
+  if (config.scopes && config.scopes.length > 0) {
+    parameters["scope"] = config.scopes.join(" ");
+  }
+  const response = await oauth.clientCredentialsGrantRequest(
+    authServer,
+    client,
+    clientAuth,
+    parameters
+  );
+  const result = await oauth.processClientCredentialsResponse(
+    authServer,
+    client,
+    response
+  );
+  const requestedScopes = new Set(
+    config.scopes && config.scopes.length > 0 ? config.scopes : []
+  );
+  const grantedScopes = new Set(
+    (result.scope ?? "").split(" ").filter(Boolean)
+  );
+  const missingScopes = [...requestedScopes].filter(
+    (s) => !grantedScopes.has(s)
+  );
+  if (missingScopes.length > 0 && requestedScopes.size > 0 && grantedScopes.size > 0) {
+    debug(
+      "[oauth] Warning: Token server granted fewer scopes than requested. Missing: %s",
+      missingScopes.join(", ")
+    );
+  }
+  return {
+    accessToken: result.access_token,
+    tokenType: result.token_type,
+    expiresIn: result.expires_in,
+    scope: result.scope
+  };
+}
 // src/mcp/clientFactory.ts
+function getRetryAfterDelayMs(err) {
+  const response = err?.response;
+  const retryAfter = response?.headers?.get?.("Retry-After");
+  if (retryAfter) {
+    const seconds = parseInt(retryAfter, 10);
+    if (!isNaN(seconds)) return seconds * 1e3;
+  }
+  return null;
+}
+function isRateLimitError(err) {
+  const response = err?.response;
+  return response?.status === 429;
+}
+function isTransientNetworkError(err) {
+  if (!(err instanceof Error)) return false;
+  const msg = err.message.toLowerCase();
+  return msg.includes("econnreset") || msg.includes("econnrefused") || msg.includes("etimedout") || msg.includes("enotfound") || msg.includes("network") || msg.includes("socket hang up") || msg.includes("fetch failed");
+}
+function isRetryableError(err) {
+  return isTransientNetworkError(err) || isRateLimitError(err);
+}
+async function retryWithBackoff(fn, maxAttempts) {
+  let lastErr;
+  for (let attempt = 0; attempt <= maxAttempts; attempt++) {
+    try {
+      return await fn();
+    } catch (err) {
+      lastErr = err;
+      if (attempt < maxAttempts && isRetryableError(err)) {
+        const retryAfterMs = getRetryAfterDelayMs(err);
+        const delayMs = retryAfterMs !== null ? retryAfterMs : Math.min(1e3 * 2 ** attempt, 3e4);
+        debugClient(
+          "Retryable error on attempt %d/%d, retrying in %dms: %s",
+          attempt + 1,
+          maxAttempts + 1,
+          delayMs,
+          err.message
+        );
+        await new Promise((resolve) => setTimeout(resolve, delayMs));
+      } else {
+        throw err;
+      }
+    }
+  }
+  throw lastErr;
+}
+var agentRegistry = /* @__PURE__ */ new WeakMap();
 async function createMCPClientForConfig(config, options) {
   const validatedConfig = validateMCPConfig(config);
   const client = new Client(
     {
       name: options?.clientInfo?.name ?? "@gleanwork/mcp-server-tester",
-      version: options?.clientInfo?.version ?? "0.1.0"
+      version: options?.clientInfo?.version ?? package_default.version
     },
     {
-      capabilities: validatedConfig.capabilities ?? {}
+      capabilities: {
+        ...validatedConfig.capabilities ?? {},
+        // Only advertise sampling if a handler has been registered;
+        // declaring sampling capability without a handler violates the MCP spec
+        sampling: options?.samplingHandler ? validatedConfig.capabilities?.sampling ?? {} : void 0
+      }
     }
   );
   if (isStdioConfig(validatedConfig)) {
@@ -992,33 +1657,140 @@ async function createMCPClientForConfig(config, options) {
       args: validatedConfig.args ?? [],
       ...validatedConfig.cwd && { cwd: validatedConfig.cwd },
       // Suppress server stderr when quiet mode is enabled
-      ...validatedConfig.quiet && { stderr: "ignore" }
+      ...validatedConfig.quiet && { stderr: "ignore" },
+      ...validatedConfig.env && {
+        env: Object.fromEntries(
+          Object.entries({ ...process.env, ...validatedConfig.env }).filter(
+            (entry) => entry[1] !== void 0
+          )
+        )
+      }
     });
     debugClient("Connecting via stdio: %O", {
       command: validatedConfig.command,
       args: validatedConfig.args,
       cwd: validatedConfig.cwd
     });
-    await client.connect(transport);
+    await client.connect(
+      transport,
+      validatedConfig.connectTimeoutMs !== void 0 ? { timeout: validatedConfig.connectTimeoutMs } : void 0
+    );
   } else if (isHttpConfig(validatedConfig)) {
     const headers = { ...validatedConfig.headers };
+    if (validatedConfig.auth?.clientCredentials && !options?.authProvider) {
+      const ccConfig = validatedConfig.auth.clientCredentials;
+      const clientId = ccConfig.clientId ?? process.env["MCP_CLIENT_ID"];
+      const clientSecret = ccConfig.clientSecret ?? process.env["MCP_CLIENT_SECRET"];
+      if (!clientId || !clientSecret) {
+        throw new Error(
+          "Client credentials require clientId/clientSecret in config or MCP_CLIENT_ID/MCP_CLIENT_SECRET env vars"
+        );
+      }
+      if (!ccConfig.tokenEndpoint) {
+        throw new Error(
+          "Client credentials require tokenEndpoint in auth.clientCredentials config"
+        );
+      }
+      debugClient("Fetching token via client credentials grant");
+      const tokenResult = await performClientCredentialsFlow({
+        tokenEndpoint: ccConfig.tokenEndpoint,
+        clientId,
+        clientSecret,
+        scopes: ccConfig.scopes
+      });
+      headers.Authorization = `Bearer ${tokenResult.accessToken}`;
+    }
     if (validatedConfig.auth?.accessToken && !options?.authProvider) {
       headers.Authorization = `Bearer ${validatedConfig.auth.accessToken}`;
     }
-    const transport = new StreamableHTTPClientTransport(
-      new URL(validatedConfig.serverUrl),
-      {
-        requestInit: Object.keys(headers).length > 0 ? { headers } : void 0,
-        // Pass auth provider for OAuth flow - MCP SDK handles it automatically
-        authProvider: options?.authProvider
+    const url = new URL(validatedConfig.serverUrl);
+    let requestInit = Object.keys(headers).length > 0 ? { headers } : void 0;
+    const proxyUrl = validatedConfig.proxy?.url ?? process.env["HTTPS_PROXY"] ?? process.env["HTTP_PROXY"];
+    if (proxyUrl) {
+      const proxyAgent = new ProxyAgent(proxyUrl);
+      try {
+        const sanitized = new URL(proxyUrl);
+        debugClient(
+          "Using proxy: %s://%s:%s",
+          sanitized.protocol.slice(0, -1),
+          sanitized.hostname,
+          sanitized.port
+        );
+      } catch {
+        debugClient("Using proxy (unparseable URL)");
       }
-    );
+      requestInit = {
+        ...requestInit,
+        dispatcher: proxyAgent
+      };
+    }
+    if (validatedConfig.tls) {
+      const tlsCfg = validatedConfig.tls;
+      try {
+        const dispatcher = new Agent({
+          connect: {
+            ...tlsCfg.ca && { ca: readFileSync(tlsCfg.ca) },
+            ...tlsCfg.cert && { cert: readFileSync(tlsCfg.cert) },
+            ...tlsCfg.key && { key: readFileSync(tlsCfg.key) },
+            rejectUnauthorized: tlsCfg.rejectUnauthorized ?? true
+          }
+        });
+        agentRegistry.set(client, dispatcher);
+        requestInit = {
+          ...requestInit,
+          dispatcher
+        };
+        debugClient("TLS configuration applied");
+      } catch (error) {
+        const filePath = tlsCfg.ca ?? tlsCfg.cert ?? tlsCfg.key;
+        const fileType = tlsCfg.ca ? "CA certificate" : tlsCfg.cert ? "client certificate" : "client key";
+        throw new Error(
+          `Failed to load TLS ${fileType} from ${filePath}: ${error instanceof Error ? error.message : String(error)}`
+        );
+      }
+    } else if (proxyUrl) {
+      const existingDispatcher = requestInit?.dispatcher;
+      if (existingDispatcher) {
+        agentRegistry.set(client, existingDispatcher);
+      }
+    }
     debugClient("Connecting via HTTP: %O", {
       serverUrl: validatedConfig.serverUrl,
       headers: Object.keys(headers).length > 0 ? Object.keys(headers) : void 0,
       hasAuthProvider: !!options?.authProvider
     });
-    await client.connect(transport);
+    debugHttp("Connecting to %s", validatedConfig.serverUrl);
+    if (Object.keys(headers).length > 0) {
+      debugHttp("Request header names: %O", Object.keys(headers));
+    }
+    const retryAttempts = validatedConfig.retryAttempts ?? 0;
+    const connectOptions = validatedConfig.connectTimeoutMs !== void 0 ? { timeout: validatedConfig.connectTimeoutMs } : void 0;
+    await retryWithBackoff(async () => {
+      try {
+        debugHttp("Attempting transport: streamableHttp");
+        const streamableTransport = new StreamableHTTPClientTransport(url, {
+          requestInit,
+          authProvider: options?.authProvider
+        });
+        await client.connect(streamableTransport, connectOptions);
+        debugClient("Connected via Streamable HTTP");
+        debugHttp("Connection established via streamableHttp");
+      } catch (err) {
+        debugHttp(
+          "streamableHttp failed (%s), falling back to SSE",
+          err.message
+        );
+        debugClient("Streamable HTTP failed, falling back to SSE transport");
+        debugHttp("Attempting transport: sse");
+        const sseTransport = new SSEClientTransport(url, {
+          requestInit,
+          authProvider: options?.authProvider
+        });
+        await client.connect(sseTransport, connectOptions);
+        debugClient("Connected via SSE");
+        debugHttp("Connection established via sse");
+      }
+    }, retryAttempts);
   }
   debugClient("Connected successfully");
   const serverInfo = client.getServerVersion();
@@ -1031,12 +1803,43 @@ async function closeMCPClient(client) {
   try {
     await client.close();
   } catch (error) {
-    console.error("[MCP] Error closing client:", error);
+    debugClient(
+      "Error closing client: %s",
+      error instanceof Error ? error.message : String(error)
+    );
     throw error;
+  } finally {
+    const agent = agentRegistry.get(client);
+    if (agent) {
+      agentRegistry.delete(client);
+      try {
+        await agent.close();
+      } catch (agentError) {
+        debugClient(
+          "Error closing undici agent: %s",
+          agentError.message
+        );
+      }
+    }
   }
 }
 // src/mcp/fixtures/mcpFixture.ts
+var DEFAULT_CALL_TIMEOUT_MS = 3e4;
+function withCallTimeout(promise, ms, opName) {
+  let timer;
+  return Promise.race([
+    promise,
+    new Promise((_, reject) => {
+      timer = setTimeout(
+        () => reject(
+          new Error(`MCP operation "${opName}" timed out after ${ms}ms`)
+        ),
+        ms
+      );
+    })
+  ]).finally(() => clearTimeout(timer));
+}
 var testStep = null;
 try {
   const playwright = __require("@playwright/test");
@@ -1048,20 +1851,29 @@ try {
 function createMCPFixture(client, testInfo, options) {
   const authType = options?.authType ?? "none";
   const project = options?.project;
+  const callTimeout = options?.callTimeoutMs ?? DEFAULT_CALL_TIMEOUT_MS;
   if (!testInfo) {
     return {
       client,
       authType,
       project,
       async listTools() {
-        const result = await client.listTools();
+        const result = await withCallTimeout(
+          client.listTools(),
+          callTimeout,
+          "listTools"
+        );
         return result.tools;
       },
       async callTool(name, args) {
-        const result = await client.callTool({
-          name,
-          arguments: args
-        });
+        const result = await withCallTimeout(
+          client.callTool({
+            name,
+            arguments: args
+          }),
+          callTimeout,
+          `callTool("${name}")`
+        );
         return result;
       },
       getServerInfo() {
@@ -1082,7 +1894,11 @@ function createMCPFixture(client, testInfo, options) {
     project,
     async listTools() {
       const execute = async () => {
-        const result = await client.listTools();
+        const result = await withCallTimeout(
+          client.listTools(),
+          callTimeout,
+          "listTools"
+        );
         const tools = result.tools;
         await testInfo.attach("mcp-list-tools", {
           contentType: "application/json",
@@ -1106,10 +1922,14 @@ function createMCPFixture(client, testInfo, options) {
     async callTool(name, args) {
       const execute = async () => {
         const startTime = Date.now();
-        const result = await client.callTool({
-          name,
-          arguments: args
-        });
+        const result = await withCallTimeout(
+          client.callTool({
+            name,
+            arguments: args
+          }),
+          callTimeout,
+          `callTool("${name}")`
+        );
         const durationMs = Date.now() - startTime;
         await testInfo.attach(`mcp-call-${name}`, {
           contentType: "application/json",
@@ -1148,7 +1968,8 @@ function createMCPFixture(client, testInfo, options) {
           null,
           2
         )
-      }).catch(() => {
+      }).catch((err) => {
+        console.error("[MCPFixture] Failed to attach server info:", err);
       });
       return result;
     }
@@ -1242,6 +2063,9 @@ var PlaywrightOAuthClientProvider = class {
   }
   /**
    * Stores new OAuth tokens for the current session
+   *
+   * The code verifier is cleared after a successful token exchange — it is
+   * single-use per PKCE spec and must not persist beyond the exchange.
    */
   async saveTokens(tokens) {
     const state = await this.loadState() ?? this.createEmptyState();
@@ -1251,6 +2075,7 @@ var PlaywrightOAuthClientProvider = class {
       refreshToken: tokens.refresh_token,
       expiresAt: tokens.expires_in ? Date.now() + tokens.expires_in * 1e3 : void 0
     };
+    delete state.codeVerifier;
     await this.saveState(state);
   }
   /**
@@ -1330,11 +2155,11 @@ In a testing context, use performOAuthSetup() in your Playwright globalSetup to
     state.savedAt = Date.now();
     this.cachedState = state;
     const dir = path2.dirname(this.config.storagePath);
-    await fs2.mkdir(dir, { recursive: true });
+    await fs2.mkdir(dir, { recursive: true, mode: 448 });
     await fs2.writeFile(
       this.config.storagePath,
       JSON.stringify(state, null, 2),
-      "utf-8"
+      { encoding: "utf-8", mode: 384 }
     );
   }
   async deleteState() {
@@ -1364,119 +2189,27 @@ In a testing context, use performOAuthSetup() in your Playwright globalSetup to
     return result;
   }
 };
-async function generatePKCE() {
-  const codeVerifier = oauth.generateRandomCodeVerifier();
-  const codeChallenge = await oauth.calculatePKCECodeChallenge(codeVerifier);
-  return {
-    codeVerifier,
-    codeChallenge
-  };
-}
-function generateState() {
-  return oauth.generateRandomState();
-}
-function buildAuthorizationUrl(config) {
-  const authorizationEndpoint = config.authServer.server.authorization_endpoint;
-  if (!authorizationEndpoint) {
-    throw new Error(
-      "Authorization server does not have an authorization_endpoint"
-    );
-  }
-  const authorizationUrl = new URL(authorizationEndpoint);
-  authorizationUrl.searchParams.set("client_id", config.clientId);
-  authorizationUrl.searchParams.set("redirect_uri", config.redirectUri);
-  authorizationUrl.searchParams.set("response_type", "code");
-  authorizationUrl.searchParams.set("scope", config.scopes.join(" "));
-  authorizationUrl.searchParams.set("code_challenge", config.codeChallenge);
-  authorizationUrl.searchParams.set("code_challenge_method", "S256");
-  authorizationUrl.searchParams.set("state", config.state);
-  if (config.resource) {
-    authorizationUrl.searchParams.set("resource", config.resource);
+function isLocalhostUrl(url) {
+  try {
+    const parsed = new URL(url);
+    const h = parsed.hostname;
+    return h === "localhost" || h === "127.0.0.1" || h === "::1";
+  } catch {
+    return false;
   }
-  return authorizationUrl;
 }
-async function exchangeCodeForTokens(config) {
-  const client = {
-    client_id: config.clientId,
-    token_endpoint_auth_method: config.clientSecret ? "client_secret_basic" : "none"
-  };
-  const clientAuth = config.clientSecret ? oauth.ClientSecretBasic(config.clientSecret) : oauth.None();
-  const callbackUrl = new URL(config.redirectUri);
-  callbackUrl.searchParams.set("code", config.code);
-  callbackUrl.searchParams.set("state", config.state);
-  const validatedParams = oauth.validateAuthResponse(
-    config.authServer.server,
-    client,
-    callbackUrl,
-    config.state
-  );
-  const response = await oauth.authorizationCodeGrantRequest(
-    config.authServer.server,
-    client,
-    clientAuth,
-    validatedParams,
-    config.redirectUri,
-    config.codeVerifier
-  );
-  const result = await oauth.processAuthorizationCodeResponse(
-    config.authServer.server,
-    client,
-    response
-  );
-  return {
-    accessToken: result.access_token,
-    tokenType: result.token_type,
-    expiresIn: result.expires_in,
-    refreshToken: result.refresh_token,
-    scope: result.scope
-  };
-}
-async function refreshAccessToken(config) {
-  const client = {
-    client_id: config.clientId,
-    token_endpoint_auth_method: config.clientSecret ? "client_secret_basic" : "none"
-  };
-  const clientAuth = config.clientSecret ? oauth.ClientSecretBasic(config.clientSecret) : oauth.None();
-  const response = await oauth.refreshTokenGrantRequest(
-    config.authServer.server,
-    client,
-    clientAuth,
-    config.refreshToken
-  );
-  if (!response.ok) {
-    const contentType = response.headers.get("content-type") ?? "";
-    let errorMessage = `Token refresh failed: ${response.status} ${response.statusText}`;
-    try {
-      if (contentType.includes("application/json")) {
-        const errorBody = await response.clone().json();
-        if (errorBody.error) {
-          errorMessage = `Token refresh failed: ${errorBody.error}`;
-          if (errorBody.error_description) {
-            errorMessage += ` - ${errorBody.error_description}`;
-          }
-        }
-      } else {
-        const textBody = await response.clone().text();
-        if (textBody) {
-          errorMessage = `Token refresh failed: ${response.status} - ${textBody}`;
-        }
-      }
-    } catch {
+function validateAuthServerEndpoints(authServer) {
+  const endpoints = [
+    { name: "authorization_endpoint", url: authServer.authorization_endpoint },
+    { name: "token_endpoint", url: authServer.token_endpoint }
+  ];
+  for (const { name, url } of endpoints) {
+    if (url && !url.startsWith("https://") && !isLocalhostUrl(url)) {
+      throw new Error(
+        `OAuth discovery returned an insecure ${name}: "${url}". Only HTTPS endpoints are permitted for OAuth flows to prevent token interception.`
+      );
     }
-    throw new Error(errorMessage);
   }
-  const result = await oauth.processRefreshTokenResponse(
-    config.authServer.server,
-    client,
-    response
-  );
-  return {
-    accessToken: result.access_token,
-    tokenType: result.token_type,
-    expiresIn: result.expires_in,
-    refreshToken: result.refresh_token,
-    scope: result.scope
-  };
 }
 var MCP_PROTOCOL_VERSION = "2025-06-18";
 async function discoverProtectedResource(mcpServerUrl) {
@@ -1546,6 +2279,7 @@ async function discoverAuthorizationServer(authServerUrl) {
     })
   });
   const metadata = await oauth.processDiscoveryResponse(issuer, response);
+  validateAuthServerEndpoints(metadata);
   return {
     server: metadata,
     issuer: authServerUrl
@@ -1691,7 +2425,7 @@ var FileOAuthStorage = class {
 };
 // src/auth/cli.ts
-var debug = createDebug("mcp-server-tester:cli-oauth");
+var debug2 = createDebug("mcp-server-tester:cli-oauth");
 var DEFAULT_TIMEOUT_MS = 3e5;
 var DEFAULT_CLIENT_NAME = "@gleanwork/mcp-server-tester";
 var DEFAULT_METADATA_TTL_MS = 24 * 60 * 60 * 1e3;
@@ -1717,7 +2451,7 @@ var CLIOAuthClient = class {
   async getAccessToken() {
     const envTokens = loadTokensFromEnv();
     if (envTokens) {
-      debug("Using tokens from environment variables");
+      debug2("Using tokens from environment variables");
       return {
         accessToken: envTokens.accessToken,
         tokenType: envTokens.tokenType,
@@ -1730,7 +2464,7 @@ var CLIOAuthClient = class {
     if (storedTokens?.accessToken) {
       const isValid = await this.storage.hasValidToken();
       if (isValid) {
-        debug("Using cached tokens from storage");
+        debug2("Using cached tokens from storage");
         return {
           accessToken: storedTokens.accessToken,
           tokenType: storedTokens.tokenType,
@@ -1740,7 +2474,7 @@ var CLIOAuthClient = class {
         };
       }
       if (storedTokens.refreshToken) {
-        debug("Token expired, attempting refresh");
+        debug2("Token expired, attempting refresh");
         try {
           const refreshedTokens = await this.refreshStoredToken(storedTokens);
           return {
@@ -1751,11 +2485,11 @@ var CLIOAuthClient = class {
             fromEnv: false
           };
         } catch (error) {
-          debug("Token refresh failed, will re-authenticate:", error);
+          debug2("Token refresh failed, will re-authenticate:", error);
         }
       }
     }
-    debug("Performing full OAuth authentication");
+    debug2("Performing full OAuth authentication");
     return this.authenticate();
   }
   /**
@@ -1771,7 +2505,7 @@ var CLIOAuthClient = class {
   async tryGetAccessToken() {
     const envTokens = loadTokensFromEnv();
     if (envTokens) {
-      debug("Using tokens from environment variables");
+      debug2("Using tokens from environment variables");
       return {
         accessToken: envTokens.accessToken,
         tokenType: envTokens.tokenType,
@@ -1784,7 +2518,7 @@ var CLIOAuthClient = class {
     if (storedTokens?.accessToken) {
       const isValid = await this.storage.hasValidToken();
       if (isValid) {
-        debug("Using cached tokens from storage");
+        debug2("Using cached tokens from storage");
         return {
           accessToken: storedTokens.accessToken,
           tokenType: storedTokens.tokenType,
@@ -1794,7 +2528,7 @@ var CLIOAuthClient = class {
         };
       }
       if (storedTokens.refreshToken) {
-        debug("Token expired, attempting refresh");
+        debug2("Token expired, attempting refresh");
         try {
           const refreshedTokens = await this.refreshStoredToken(storedTokens);
           return {
@@ -1805,12 +2539,12 @@ var CLIOAuthClient = class {
             fromEnv: false
           };
         } catch (error) {
-          debug("Token refresh failed:", error);
+          debug2("Token refresh failed:", error);
           return null;
         }
       }
     }
-    debug("No valid token available");
+    debug2("No valid token available");
     return null;
   }
   /**
@@ -1845,7 +2579,7 @@ var CLIOAuthClient = class {
    */
   async clearCredentials() {
     await this.storage.deleteTokens();
-    debug("Cleared stored credentials");
+    debug2("Cleared stored credentials");
   }
   /**
    * Discover protected resource and authorization server
@@ -1855,12 +2589,12 @@ var CLIOAuthClient = class {
     if (cachedMetadata) {
       const age = Date.now() - cachedMetadata.discoveredAt;
       if (age < DEFAULT_METADATA_TTL_MS) {
-        debug("Using cached server metadata (age: %dms)", age);
-        debug(
+        debug2("Using cached server metadata (age: %dms)", age);
+        debug2(
           "Cached protected resource scopes: %O",
           cachedMetadata.protectedResource.scopes_supported
         );
-        debug(
+        debug2(
           "Cached auth server scopes: %O",
           cachedMetadata.authServer.server.scopes_supported
         );
@@ -1869,12 +2603,12 @@ var CLIOAuthClient = class {
           authServer: cachedMetadata.authServer
         };
       }
-      debug("Cached server metadata is stale (age: %dms), re-discovering", age);
+      debug2("Cached server metadata is stale (age: %dms), re-discovering", age);
     }
-    debug("Discovering protected resource:", this.config.mcpServerUrl);
+    debug2("Discovering protected resource:", this.config.mcpServerUrl);
     const prResult = await discoverProtectedResource(this.config.mcpServerUrl);
-    debug("Found protected resource:", prResult.metadata.resource);
-    debug(
+    debug2("Found protected resource:", prResult.metadata.resource);
+    debug2(
       "Protected resource scopes_supported: %O",
       prResult.metadata.scopes_supported
     );
@@ -1884,10 +2618,10 @@ var CLIOAuthClient = class {
         "No authorization servers found in protected resource metadata"
       );
     }
-    debug("Discovering authorization server:", authServerUrl);
+    debug2("Discovering authorization server:", authServerUrl);
     const authServer = await discoverAuthorizationServer(authServerUrl);
-    debug("Found authorization server:", authServer.issuer);
-    debug(
+    debug2("Found authorization server:", authServer.issuer);
+    debug2(
       "Auth server scopes_supported: %O",
       authServer.server.scopes_supported
     );
@@ -1907,7 +2641,7 @@ var CLIOAuthClient = class {
    */
   async getOrRegisterClient(authServer) {
     if (this.config.clientId) {
-      debug("Using pre-configured client ID");
+      debug2("Using pre-configured client ID");
       return {
         clientId: this.config.clientId,
         clientSecret: this.config.clientSecret
@@ -1915,10 +2649,10 @@ var CLIOAuthClient = class {
     }
     const cachedClient = await this.storage.loadClient();
     if (cachedClient?.clientId) {
-      debug("Using cached client registration");
+      debug2("Using cached client registration");
       return cachedClient;
     }
-    debug("Registering new client via DCR");
+    debug2("Registering new client via DCR");
     const client = await this.registerClient(authServer);
     await this.storage.saveClient(client);
     return client;
@@ -1956,7 +2690,7 @@ ${errorText}`
       );
     }
     const data = await response.json();
-    debug("Client registered:", data.client_id);
+    debug2("Client registered:", data.client_id);
     return {
       clientId: data.client_id,
       clientSecret: data.client_secret,
@@ -1974,17 +2708,17 @@ ${errorText}`
     const redirectUri = `http://127.0.0.1:${port}/callback`;
     try {
       const requestedScopes = this.config.scopes ?? protectedResource.scopes_supported ?? authServer.server.scopes_supported ?? ["openid"];
-      debug("Scope resolution:");
-      debug("  - User config scopes: %O", this.config.scopes);
-      debug(
+      debug2("Scope resolution:");
+      debug2("  - User config scopes: %O", this.config.scopes);
+      debug2(
         "  - Protected resource scopes_supported: %O",
         protectedResource.scopes_supported
       );
-      debug(
+      debug2(
         "  - Auth server scopes_supported: %O",
         authServer.server.scopes_supported
       );
-      debug("  - Final requested scopes: %O", requestedScopes);
+      debug2("  - Final requested scopes: %O", requestedScopes);
       const authUrl = buildAuthorizationUrl({
         authServer,
         clientId: client.clientId,
@@ -1994,16 +2728,19 @@ ${errorText}`
         state,
         resource: protectedResource.resource
       });
-      debug("Authorization URL: %s", authUrl.toString());
-      debug("Authorization URL params:");
-      debug("  - client_id: %s", authUrl.searchParams.get("client_id"));
-      debug("  - redirect_uri: %s", authUrl.searchParams.get("redirect_uri"));
-      debug("  - scope: %s", authUrl.searchParams.get("scope"));
-      debug("  - resource: %s", authUrl.searchParams.get("resource"));
+      debug2(
+        "Authorization URL (base): %s",
+        `${authUrl.origin}${authUrl.pathname}`
+      );
+      debug2("Authorization URL params:");
+      debug2("  - client_id: %s", authUrl.searchParams.get("client_id"));
+      debug2("  - redirect_uri: %s", authUrl.searchParams.get("redirect_uri"));
+      debug2("  - scope: %s", authUrl.searchParams.get("scope"));
+      debug2("  - resource: %s", authUrl.searchParams.get("resource"));
       await this.openBrowserOrPrintUrl(authUrl);
-      debug("Waiting for OAuth callback...");
+      debug2("Waiting for OAuth callback...");
       const code = await codePromise;
-      debug("Received authorization code");
+      debug2("Received authorization code");
       const tokenResult = await exchangeCodeForTokens({
         authServer,
         clientId: client.clientId,
@@ -2041,14 +2778,14 @@ ${errorText}`
     let clientId;
     let clientSecret;
     if (storedTokens.clientId) {
-      debug("Using clientId from stored tokens for refresh");
+      debug2("Using clientId from stored tokens for refresh");
       clientId = storedTokens.clientId;
       const storedClient = await this.storage.loadClient();
       if (storedClient?.clientId === clientId) {
         clientSecret = storedClient.clientSecret;
       }
     } else {
-      debug(
+      debug2(
         "No clientId in stored tokens, falling back to stored client (legacy behavior)"
       );
       const client = await this.getOrRegisterClient(metadata.authServer);
@@ -2142,7 +2879,7 @@ ${errorText}`
       const preferredPort = this.config.callbackPort ?? 0;
       server.listen(preferredPort, "127.0.0.1", () => {
         const address = server.address();
-        debug("Callback server listening on port", address.port);
+        debug2("Callback server listening on port", address.port);
         resolve({ port: address.port, codePromise, close: forceClose });
       });
       server.on("error", (err) => {
@@ -2166,9 +2903,9 @@ ${errorText}`
     try {
       const open = await import('open');
       await open.default(url.toString());
-      debug("Opened browser for authentication");
+      debug2("Opened browser for authentication");
     } catch (error) {
-      debug("Failed to open browser:", error);
+      debug2("Failed to open browser:", error);
       console.log("\nFailed to open browser automatically.");
       console.log("Please open the following URL manually:\n");
       console.log(url.toString() + "\n");
@@ -2314,30 +3051,31 @@ var test = test$1.extend({
       );
     }
     let resolvedAuthType = "none";
+    const httpConfig = isHttpConfig(mcpConfig) ? mcpConfig : null;
     let authProvider;
-    if (mcpConfig.auth?.oauth?.authStatePath) {
+    if (httpConfig?.auth?.oauth?.authStatePath) {
       authProvider = new PlaywrightOAuthClientProvider({
-        storagePath: mcpConfig.auth.oauth.authStatePath,
-        redirectUri: mcpConfig.auth.oauth.redirectUri ?? "http://localhost:3000/oauth/callback",
-        clientId: mcpConfig.auth.oauth.clientId,
-        clientSecret: mcpConfig.auth.oauth.clientSecret
+        storagePath: httpConfig.auth.oauth.authStatePath,
+        redirectUri: httpConfig.auth.oauth.redirectUri ?? "http://localhost:3000/oauth/callback",
+        clientId: httpConfig.auth.oauth.clientId,
+        clientSecret: httpConfig.auth.oauth.clientSecret
       });
       resolvedAuthType = "oauth";
     }
     let effectiveConfig = mcpConfig;
-    if (mcpConfig.auth?.accessToken) {
+    if (httpConfig?.auth?.accessToken) {
       resolvedAuthType = "api-token";
     }
-    if (isHttpConfig(mcpConfig) && !mcpConfig.auth?.accessToken && !mcpConfig.auth?.oauth?.authStatePath) {
+    if (httpConfig && !httpConfig.auth?.accessToken && !httpConfig.auth?.oauth?.authStatePath) {
       const cliClient = new CLIOAuthClient({
-        mcpServerUrl: mcpConfig.serverUrl
+        mcpServerUrl: httpConfig.serverUrl
       });
       const tokenResult = await cliClient.tryGetAccessToken();
       if (tokenResult) {
         effectiveConfig = {
-          ...mcpConfig,
+          ...httpConfig,
           auth: {
-            ...mcpConfig.auth,
+            ...httpConfig.auth,
             accessToken: tokenResult.accessToken
           }
         };
@@ -2348,7 +3086,7 @@ var test = test$1.extend({
     const client = await createMCPClientForConfig(effectiveConfig, {
       clientInfo: {
         name: "@gleanwork/mcp-server-tester",
-        version: "0.1.0"
+        version: package_default.version
       },
       authProvider
     });
@@ -2365,9 +3103,11 @@ var test = test$1.extend({
    * Automatically tracks all MCP operations for the reporter
    */
   mcp: async ({ mcpClient, _mcpFixtureState }, use, testInfo) => {
+    const useConfig = testInfo.project.use;
     const api = createMCPFixture(mcpClient, testInfo, {
       authType: _mcpFixtureState.resolvedAuthType,
-      project: testInfo.project.name
+      project: testInfo.project.name,
+      callTimeoutMs: useConfig.mcpConfig?.callTimeoutMs
     });
     await use(api);
   }