npm - @gleanwork/mcp-server-tester - Versions diffs - 1.0.0-beta.7 → 1.0.0 - Mend

@gleanwork/mcp-server-tester 1.0.0-beta.7 → 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (16) hide show

package/README.md +20 -1
package/dist/cli/index.js +12 -1
package/dist/fixtures/mcp.d.ts +33 -8
package/dist/fixtures/mcp.js +354 -37
package/dist/fixtures/mcp.js.map +1 -1
package/dist/index.cjs +721 -76
package/dist/index.cjs.map +1 -1
package/dist/index.d.cts +533 -116
package/dist/index.d.ts +533 -116
package/dist/index.js +719 -78
package/dist/index.js.map +1 -1
package/dist/reporters/ui-dist/app.js +8 -134
package/dist/reporters/ui-dist/styles.css +1 -1
package/package.json +11 -6
package/dist/reporters/mcpReporter.d.cts +0 -90
package/dist/reporters/mcpReporter.d.ts +0 -90

package/dist/fixtures/mcp.js CHANGED Viewed

@@ -1,6 +1,6 @@
 import { expect as expect$1, test as test$1 } from '@playwright/test';
-import { query } from '@anthropic-ai/claude-agent-sdk';
 import { z } from 'zod';
+import { query } from '@anthropic-ai/claude-agent-sdk';
 import { Client } from '@modelcontextprotocol/sdk/client/index.js';
 import { StdioClientTransport } from '@modelcontextprotocol/sdk/client/stdio.js';
 import { StreamableHTTPClientTransport } from '@modelcontextprotocol/sdk/client/streamableHttp.js';
@@ -237,11 +237,13 @@ function validateSchema(response, schema, options = {}) {
   } catch (error) {
     const zodError = error;
     const issues = formatZodIssues(zodError);
+    const text = stringifyResponse(response);
     return {
       pass: false,
       message: `Response does not match schema: ${issues}`,
       details: {
-        issues: zodError.issues
+        issues: zodError.issues,
+        textPreview: truncateForDisplay2(text)
       }
     };
   }
@@ -294,16 +296,29 @@ function formatZodIssues(error) {
   });
   return issues.join("; ");
 }
+function truncateForDisplay2(str, maxLength = 200) {
+  if (str.length <= maxLength) {
+    return str;
+  }
+  return str.slice(0, maxLength) + "... (truncated)";
+}
 // src/assertions/matchers/toMatchToolSchema.ts
 function toMatchToolSchema(received, schema, options = {}) {
   const result = validateSchema(received, schema, options);
+  const preview = result.details?.textPreview;
   return {
     pass: result.pass,
     message: () => {
       if (this.isNot) {
         return result.pass ? "Expected response NOT to match schema, but it did" : result.message;
       }
+      if (!result.pass && preview) {
+        return `${result.message}
+Actual response (truncated):
+${preview}`;
+      }
       return result.message;
     }
   };
@@ -334,11 +349,11 @@ function validateText(response, expected, options = {}) {
     details: {
       missing,
       textLength: text.length,
-      textPreview: truncateForDisplay2(text)
+      textPreview: truncateForDisplay3(text)
     }
   };
 }
-function truncateForDisplay2(str, maxLength = 200) {
+function truncateForDisplay3(str, maxLength = 200) {
   if (str.length <= maxLength) {
     return str;
   }
@@ -348,6 +363,7 @@ function truncateForDisplay2(str, maxLength = 200) {
 // src/assertions/matchers/toContainToolText.ts
 function toContainToolText(received, expected, options = {}) {
   const result = validateText(received, expected, options);
+  const preview = result.details?.textPreview;
   return {
     pass: result.pass,
     message: () => {
@@ -355,6 +371,12 @@ function toContainToolText(received, expected, options = {}) {
         const expectedStr = Array.isArray(expected) ? expected.map((s) => `"${s}"`).join(", ") : `"${expected}"`;
         return result.pass ? `Expected response NOT to contain ${expectedStr}, but it did` : result.message;
       }
+      if (!result.pass && preview) {
+        return `${result.message}
+Actual response (truncated):
+${preview}`;
+      }
       return result.message;
     }
   };
@@ -385,7 +407,7 @@ function validatePattern(response, patterns, options = {}) {
     details: {
       unmatched,
       textLength: text.length,
-      textPreview: truncateForDisplay3(text)
+      textPreview: truncateForDisplay4(text)
     }
   };
 }
@@ -405,7 +427,7 @@ function patternToString(pattern) {
   }
   return `/${pattern}/`;
 }
-function truncateForDisplay3(str, maxLength = 200) {
+function truncateForDisplay4(str, maxLength = 200) {
   if (str.length <= maxLength) {
     return str;
   }
@@ -415,12 +437,19 @@ function truncateForDisplay3(str, maxLength = 200) {
 // src/assertions/matchers/toMatchToolPattern.ts
 function toMatchToolPattern(received, patterns, options = {}) {
   const result = validatePattern(received, patterns, options);
+  const preview = result.details?.textPreview;
   return {
     pass: result.pass,
     message: () => {
       if (this.isNot) {
         return result.pass ? "Expected response NOT to match pattern(s), but it did" : result.message;
       }
+      if (!result.pass && preview) {
+        return `${result.message}
+Actual response (truncated):
+${preview}`;
+      }
       return result.message;
     }
   };
@@ -567,7 +596,7 @@ function validateError(response, expected = true) {
         pass: false,
         message: "Expected an error response but got success",
         details: {
-          textPreview: truncateForDisplay4(extractText2(response))
+          textPreview: truncateForDisplay5(extractText2(response))
         }
       };
     } else {
@@ -579,7 +608,7 @@ function validateError(response, expected = true) {
       }
       return {
         pass: false,
-        message: `Expected a success response but got error: "${truncateForDisplay4(errorMessage)}"`,
+        message: `Expected a success response but got error: "${truncateForDisplay5(errorMessage)}"`,
         details: {
           errorMessage
         }
@@ -592,7 +621,7 @@ function validateError(response, expected = true) {
       pass: false,
       message: `Expected an error containing "${expectedMessages[0]}" but got success`,
       details: {
-        textPreview: truncateForDisplay4(extractText2(response))
+        textPreview: truncateForDisplay5(extractText2(response))
       }
     };
   }
@@ -614,7 +643,7 @@ function validateError(response, expected = true) {
     }
   };
 }
-function truncateForDisplay4(str, maxLength = 200) {
+function truncateForDisplay5(str, maxLength = 200) {
   if (str.length <= maxLength) {
     return str;
   }
@@ -662,7 +691,175 @@ var JudgeResponseSchema = z.object({
   reasoning: z.string()
 });
-// src/judge/claudeAgentJudge.ts
+// src/judge/anthropicJudge.ts
+function createAnthropicJudge(config = {}) {
+  const apiKeyEnvVar = config.apiKeyEnvVar ?? "ANTHROPIC_API_KEY";
+  const apiKey = process.env[apiKeyEnvVar];
+  if (!apiKey) {
+    throw new Error(
+      `Anthropic judge requires an API key. Set the ${apiKeyEnvVar} environment variable.`
+    );
+  }
+  const model = config.model ?? "claude-sonnet-4-20250514";
+  const maxTokens = config.maxTokens ?? 1e3;
+  const temperature = config.temperature ?? 0;
+  return {
+    async evaluate(candidate, reference, rubric) {
+      let anthropicModule;
+      try {
+        anthropicModule = await import('@anthropic-ai/sdk');
+      } catch (err) {
+        throw new Error(
+          `Anthropic judge requires the \`@anthropic-ai/sdk\` package. Install it with: npm install @anthropic-ai/sdk
+Original error: ${err instanceof Error ? err.message : String(err)}`
+        );
+      }
+      const client = new anthropicModule.default({ apiKey });
+      const prompt = buildJudgePrompt(candidate, reference, rubric);
+      const startTime = Date.now();
+      const response = await client.messages.create({
+        model,
+        max_tokens: maxTokens,
+        temperature,
+        system: 'You are an expert evaluator. Respond with valid JSON only: {"pass": true|false, "score": 0.0-1.0, "reasoning": "explanation"}',
+        messages: [{ role: "user", content: prompt }]
+      });
+      const durationMs = Date.now() - startTime;
+      const textBlock = response.content.find(
+        (b) => b.type === "text"
+      );
+      const text = textBlock?.text ?? "";
+      const parsed = parseJudgeResponse(text);
+      return {
+        pass: parsed.pass,
+        score: parsed.score,
+        reasoning: parsed.reasoning,
+        usage: {
+          inputTokens: response.usage?.input_tokens ?? 0,
+          outputTokens: response.usage?.output_tokens ?? 0,
+          totalCostUsd: 0,
+          durationMs
+        }
+      };
+    }
+  };
+}
+function buildJudgePrompt(candidate, reference, rubric) {
+  const candidateStr = typeof candidate === "string" ? candidate : JSON.stringify(candidate, null, 2);
+  const referenceStr = reference !== null && reference !== void 0 ? typeof reference === "string" ? reference : JSON.stringify(reference, null, 2) : null;
+  return `Rubric:
+${rubric}
+<candidate_response>
+${candidateStr}
+</candidate_response>
+<reference_answer>
+${referenceStr ?? "No reference provided."}
+</reference_answer>
+Evaluate and return JSON: {"pass": boolean, "score": number (0-1), "reasoning": string}`;
+}
+function parseJudgeResponse(text) {
+  const cleaned = text.replace(/```json\n?/g, "").replace(/```\n?/g, "").trim();
+  let parsed;
+  try {
+    parsed = JSON.parse(cleaned);
+  } catch {
+    throw new Error(`Failed to parse judge response as JSON: ${text}`);
+  }
+  const result = JudgeResponseSchema.safeParse(parsed);
+  if (!result.success) {
+    throw new Error(
+      `Judge returned invalid response. Expected {pass, score, reasoning} but got: ${cleaned.slice(0, 500)}
+Validation errors: ${JSON.stringify(result.error.issues)}`
+    );
+  }
+  return result.data;
+}
+// src/judge/vertexAnthropicJudge.ts
+function createVertexAnthropicJudge(config = {}) {
+  const model = config.model ?? "claude-sonnet-4-20250514";
+  const maxTokens = config.maxTokens ?? 1e3;
+  const temperature = config.temperature ?? 0;
+  return {
+    async evaluate(candidate, reference, rubric) {
+      let vertexModule;
+      try {
+        vertexModule = await import('@anthropic-ai/vertex-sdk');
+      } catch (err) {
+        throw new Error(
+          `Vertex Anthropic judge requires the \`@anthropic-ai/vertex-sdk\` package. Install it with: npm install @anthropic-ai/vertex-sdk
+Original error: ${err instanceof Error ? err.message : String(err)}`
+        );
+      }
+      const client = new vertexModule.AnthropicVertex({
+        projectId: process.env.GOOGLE_VERTEX_PROJECT ?? process.env.CLOUD_ML_PROJECT_ID,
+        region: process.env.GOOGLE_VERTEX_LOCATION ?? "us-east5"
+      });
+      const prompt = buildJudgePrompt2(candidate, reference, rubric);
+      const startTime = Date.now();
+      const response = await client.messages.create({
+        model,
+        max_tokens: maxTokens,
+        temperature,
+        system: 'You are an expert evaluator. Respond with valid JSON only: {"pass": true|false, "score": 0.0-1.0, "reasoning": "explanation"}',
+        messages: [{ role: "user", content: prompt }]
+      });
+      const durationMs = Date.now() - startTime;
+      const textBlock = response.content.find(
+        (b) => b.type === "text"
+      );
+      const text = textBlock?.text ?? "";
+      const parsed = parseJudgeResponse2(text);
+      return {
+        pass: parsed.pass,
+        score: parsed.score,
+        reasoning: parsed.reasoning,
+        usage: {
+          inputTokens: response.usage?.input_tokens ?? 0,
+          outputTokens: response.usage?.output_tokens ?? 0,
+          totalCostUsd: 0,
+          durationMs
+        }
+      };
+    }
+  };
+}
+function buildJudgePrompt2(candidate, reference, rubric) {
+  const candidateStr = typeof candidate === "string" ? candidate : JSON.stringify(candidate, null, 2);
+  const referenceStr = reference !== null && reference !== void 0 ? typeof reference === "string" ? reference : JSON.stringify(reference, null, 2) : null;
+  return `Rubric:
+${rubric}
+<candidate_response>
+${candidateStr}
+</candidate_response>
+<reference_answer>
+${referenceStr ?? "No reference provided."}
+</reference_answer>
+Evaluate and return JSON: {"pass": boolean, "score": number (0-1), "reasoning": string}`;
+}
+function parseJudgeResponse2(text) {
+  const cleaned = text.replace(/```json\n?/g, "").replace(/```\n?/g, "").trim();
+  let parsed;
+  try {
+    parsed = JSON.parse(cleaned);
+  } catch {
+    throw new Error(`Failed to parse judge response as JSON: ${text}`);
+  }
+  const result = JudgeResponseSchema.safeParse(parsed);
+  if (!result.success) {
+    throw new Error(
+      `Judge returned invalid response. Expected {pass, score, reasoning} but got: ${cleaned.slice(0, 500)}
+Validation errors: ${JSON.stringify(result.error.issues)}`
+    );
+  }
+  return result.data;
+}
 function createClaudeAgentJudge(config) {
   const model = config.model ?? "claude-sonnet-4-20250514";
   const maxBudgetUsd = config.maxBudgetUsd ?? 0.1;
@@ -680,7 +877,7 @@ function createClaudeAgentJudge(config) {
           exceedsMaxToolOutputSize: true
         };
       }
-      const prompt = buildJudgePrompt(candidate, reference, rubric);
+      const prompt = buildJudgePrompt3(candidate, reference, rubric);
       try {
         let resultMessage;
         for await (const message of query({
@@ -712,7 +909,7 @@ function createClaudeAgentJudge(config) {
           );
         }
         const responseText = resultMessage.result ?? "";
-        const parsed = parseJudgeResponse(responseText);
+        const parsed = parseJudgeResponse3(responseText);
         const usage = {
           inputTokens: resultMessage.usage?.input_tokens ?? 0,
           outputTokens: resultMessage.usage?.output_tokens ?? 0,
@@ -741,7 +938,7 @@ function createClaudeAgentJudge(config) {
 function buildSystemPrompt() {
   return 'You are an expert evaluator. Evaluate the candidate response based on the rubric provided. Respond ONLY with valid JSON in this exact format: {"pass": boolean, "score": number (0-1), "reasoning": string}. Do not include any other text, markdown formatting, or code blocks.';
 }
-function buildJudgePrompt(candidate, reference, rubric) {
+function buildJudgePrompt3(candidate, reference, rubric) {
   const candidateStr = typeof candidate === "string" ? candidate : JSON.stringify(candidate, null, 2);
   const referenceStr = reference !== null && reference !== void 0 ? typeof reference === "string" ? reference : JSON.stringify(reference, null, 2) : null;
   const parts = [];
@@ -758,7 +955,7 @@ function buildJudgePrompt(candidate, reference, rubric) {
   );
   return parts.join("");
 }
-function parseJudgeResponse(text) {
+function parseJudgeResponse3(text) {
   let jsonText = text.trim();
   if (jsonText.startsWith("```json")) {
     jsonText = jsonText.slice(7);
@@ -815,7 +1012,7 @@ Original error: ${err instanceof Error ? err.message : String(err)}`
         );
       }
       const client = new openaiModule.default({ apiKey });
-      const prompt = buildJudgePrompt2(candidate, reference, rubric);
+      const prompt = buildJudgePrompt4(candidate, reference, rubric);
       const startTime = Date.now();
       const completion = await client.chat.completions.create({
         model,
@@ -831,7 +1028,7 @@ Original error: ${err instanceof Error ? err.message : String(err)}`
       });
       const durationMs = Date.now() - startTime;
       const text = completion.choices[0]?.message.content ?? "";
-      const parsed = parseJudgeResponse2(text);
+      const parsed = parseJudgeResponse4(text);
       return {
         pass: parsed.pass,
         score: parsed.score,
@@ -846,7 +1043,7 @@ Original error: ${err instanceof Error ? err.message : String(err)}`
     }
   };
 }
-function buildJudgePrompt2(candidate, reference, rubric) {
+function buildJudgePrompt4(candidate, reference, rubric) {
   const candidateStr = typeof candidate === "string" ? candidate : JSON.stringify(candidate, null, 2);
   const referenceStr = reference !== null && reference !== void 0 ? typeof reference === "string" ? reference : JSON.stringify(reference, null, 2) : null;
   return `Rubric:
@@ -862,7 +1059,7 @@ ${referenceStr ?? "No reference provided."}
 Evaluate and return JSON: {"pass": boolean, "score": number (0-1), "reasoning": string}`;
 }
-function parseJudgeResponse2(text) {
+function parseJudgeResponse4(text) {
   const cleaned = text.replace(/```json\n?/g, "").replace(/```\n?/g, "").trim();
   let parsed;
   try {
@@ -964,14 +1161,33 @@ function createJudge(config = {}) {
   const provider = config.provider ?? "anthropic";
   switch (provider) {
     case "anthropic":
+      return createAnthropicJudge(config);
+    case "vertex-anthropic":
+      return createVertexAnthropicJudge(config);
+    case "anthropic-agent-sdk":
       return createClaudeAgentJudge(config);
     case "openai":
       return createOpenAIJudge(config);
     case "google":
       return createGoogleJudge(config);
     default:
-      throw new Error(`Unsupported LLM provider: ${String(provider)}`);
+      throw new Error(
+        `Unsupported LLM provider: ${String(provider)}. Valid providers: 'anthropic', 'vertex-anthropic', 'anthropic-agent-sdk', 'openai', 'google'`
+      );
+  }
+}
+// src/judge/judgeRegistry.ts
+var registry = /* @__PURE__ */ new Map();
+function getRegisteredJudge(name) {
+  const executor = registry.get(name);
+  if (!executor) {
+    const available = registry.size > 0 ? ` Available judges: ${[...registry.keys()].join(", ")}` : " No judges are registered.";
+    throw new Error(
+      `Judge "${name}" is not registered.${available} Register it with registerJudge() before tests run.`
+    );
   }
+  return executor;
 }
 // src/assertions/validators/judge.ts
@@ -982,6 +1198,7 @@ function computeStdDev(scores, mean) {
 }
 async function validateJudge(response, config) {
   const {
+    judge: judgeName,
     rubric,
     reference,
     threshold = 0.7,
@@ -994,6 +1211,29 @@ async function validateJudge(response, config) {
     maxBudgetUsd,
     maxToolOutputSize
   } = config;
+  if (judgeName !== void 0) {
+    try {
+      const executor = getRegisteredJudge(judgeName);
+      const judgeResult = await executor(response, reference ?? void 0);
+      const score = judgeResult.score;
+      const passed = score >= threshold;
+      return {
+        pass: passed,
+        message: passed ? `Custom judge "${judgeName}" passed with score ${score.toFixed(2)}` : `Custom judge "${judgeName}" failed with score ${score.toFixed(2)} (threshold: ${threshold}). ${judgeResult.reasoning ?? ""}`
+      };
+    } catch (err) {
+      return {
+        pass: false,
+        message: `Custom judge "${judgeName}" error: ${err instanceof Error ? err.message : String(err)}`
+      };
+    }
+  }
+  if (rubric === void 0) {
+    return {
+      pass: false,
+      message: 'Judge evaluation failed: either "judge" or "rubric" must be provided'
+    };
+  }
   const resolvedRubric = resolveRubric(rubric);
   const judgeConfig = {
     ...provider !== void 0 && { provider },
@@ -1040,11 +1280,17 @@ async function validateJudge(response, config) {
     return {
       pass: passed,
       message: passed ? `Judge passed with score ${meanScore.toFixed(2)}${repNote}` : `Judge failed with score ${meanScore.toFixed(2)} (threshold: ${threshold})${repNote}. ${lastReasoning ?? ""}`,
-      details: reps > 1 ? {
-        scores,
-        scoreStdDev: stdDev,
-        highVariance
-      } : void 0
+      details: {
+        score: meanScore,
+        reasoning: lastReasoning,
+        judgeProvider: provider ?? "anthropic",
+        judgeModel: model,
+        ...reps > 1 && {
+          scores,
+          scoreStdDev: stdDev,
+          highVariance
+        }
+      }
     };
   } catch (err) {
     return {
@@ -1056,31 +1302,68 @@ async function validateJudge(response, config) {
 // src/assertions/matchers/toPassToolJudge.ts
 var DEFAULT_PASSING_THRESHOLD = 0.7;
-async function toPassToolJudge(received, rubric, options = {}) {
+async function runSingleJudge(received, rubric, options) {
   const {
     reference = null,
     passingThreshold = DEFAULT_PASSING_THRESHOLD,
     reps,
     provider,
-    model
+    model,
+    judge
   } = options;
   const validation = await validateJudge(received, {
-    rubric,
+    ...rubric !== void 0 && { rubric },
     reference: reference ?? void 0,
     threshold: passingThreshold,
     ...reps !== void 0 && { reps },
     ...provider !== void 0 && { provider },
-    ...model !== void 0 && { model }
+    ...model !== void 0 && { model },
+    ...judge !== void 0 && { judge }
   });
+  return { pass: validation.pass, message: validation.message };
+}
+async function toPassToolJudge(received, rubricOrOptions, maybeOptions) {
+  if (Array.isArray(rubricOrOptions)) {
+    const results = await Promise.all(
+      rubricOrOptions.map(async (judgeConfig) => {
+        const { rubric: r, ...opts } = judgeConfig;
+        return runSingleJudge(received, r, opts);
+      })
+    );
+    const allPassed = results.every((r) => r.pass);
+    const passCount = results.filter((r) => r.pass).length;
+    const summary = `${passCount}/${results.length} judges passed`;
+    const details = results.map((r) => r.message).join("\n");
+    if (this.isNot) {
+      return {
+        pass: !allPassed,
+        message: () => allPassed ? `Expected all judges to fail, but ${summary}` : `Judges failed as expected: ${summary}`
+      };
+    }
+    return {
+      pass: allPassed,
+      message: () => `${summary}
+${details}`
+    };
+  }
+  let rubric;
+  let options;
+  if (typeof rubricOrOptions === "string" || typeof rubricOrOptions === "object" && rubricOrOptions !== null && "text" in rubricOrOptions) {
+    rubric = rubricOrOptions;
+    options = maybeOptions ?? {};
+  } else {
+    options = rubricOrOptions;
+  }
+  const result = await runSingleJudge(received, rubric, options);
   if (this.isNot) {
     return {
-      pass: !validation.pass,
-      message: () => validation.pass ? `Expected judge evaluation to fail, but it passed` : `Judge evaluation failed as expected`
+      pass: !result.pass,
+      message: () => result.pass ? `Expected judge evaluation to fail, but it passed` : `Judge evaluation failed as expected`
     };
   }
   return {
-    pass: validation.pass,
-    message: () => validation.message
+    pass: result.pass,
+    message: () => result.message
   };
 }
@@ -1188,9 +1471,17 @@ async function toSatisfyToolPredicate(received, predicate, description) {
 function isSimulationResult(value) {
   return typeof value === "object" && value !== null && "success" in value && "toolCalls" in value && Array.isArray(value.toolCalls);
 }
+function isPatternMatcher(v) {
+  return typeof v === "object" && v !== null && "$pattern" in v && typeof v["$pattern"] === "string";
+}
 function partialMatch(actual, expected) {
   return Object.entries(expected).every(([k, v]) => {
     const actualVal = actual[k];
+    if (isPatternMatcher(v)) {
+      if (typeof actualVal !== "string") return false;
+      const re = new RegExp(v.$pattern, v.$flags);
+      return re.test(actualVal);
+    }
     if (typeof v === "object" && v !== null && typeof actualVal === "object" && actualVal !== null) {
       return partialMatch(
         actualVal,
@@ -1237,6 +1528,10 @@ function validateToolCalls(response, expectation) {
           return {
             pass: false,
             message: `Expected tool '${expected.name}' to be called in sequence (starting from position ${searchFrom}), but it was not found`,
+            details: {
+              actual: actual.map((c) => c.name),
+              expected: expected.name
+            },
             metrics
           };
         }
@@ -1253,6 +1548,10 @@ function validateToolCalls(response, expectation) {
         return {
           pass: false,
           message: `Expected tool '${expected.name}'${argsNote} to be called, but it was not`,
+          details: {
+            actual: actual.map((c) => c.name),
+            expected: expected.name
+          },
           metrics
         };
       }
@@ -1265,6 +1564,10 @@ function validateToolCalls(response, expectation) {
       return {
         pass: false,
         message: `Unexpected tool calls: ${names}. Only ${[...allowedNames].map((n) => `'${n}'`).join(", ")} are allowed`,
+        details: {
+          actual: actual.map((c) => c.name),
+          unexpected: unexpected.map((c) => c.name)
+        },
         metrics
       };
     }
@@ -1283,19 +1586,22 @@ function validateToolCallCount(response, options) {
   if (exact !== void 0 && count !== exact) {
     return {
       pass: false,
-      message: `Expected exactly ${exact} tool call(s), but got ${count}`
+      message: `Expected exactly ${exact} tool call(s), but got ${count}`,
+      details: { actual: count, expected: exact }
     };
   }
   if (min !== void 0 && count < min) {
     return {
       pass: false,
-      message: `Expected at least ${min} tool call(s), but got ${count}`
+      message: `Expected at least ${min} tool call(s), but got ${count}`,
+      details: { actual: count, min }
     };
   }
   if (max !== void 0 && count > max) {
     return {
       pass: false,
-      message: `Expected at most ${max} tool call(s), but got ${count}`
+      message: `Expected at most ${max} tool call(s), but got ${count}`,
+      details: { actual: count, max }
     };
   }
   return {
@@ -1434,7 +1740,7 @@ var debugHttp = createDebug(`${NAMESPACE}:http`);
 // package.json
 var package_default = {
-  version: "1.0.0-beta.7"};
+  version: "1.0.0"};
 var debug = createDebug("mcp-server-tester:oauth-flow");
 async function generatePKCE() {
   const codeVerifier = oauth.generateRandomCodeVerifier();
@@ -1815,6 +2121,17 @@ async function createMCPClientForConfig(config, options) {
 }
 async function closeMCPClient(client) {
   try {
+    const transport = client.transport;
+    if (transport instanceof StreamableHTTPClientTransport) {
+      try {
+        await transport.terminateSession();
+      } catch (sessionError) {
+        debugClient(
+          "Error terminating session: %s",
+          sessionError instanceof Error ? sessionError.message : String(sessionError)
+        );
+      }
+    }
     await client.close();
   } catch (error) {
     debugClient(