npm - @gleanwork/mcp-server-tester - Versions diffs - 1.0.0-beta.3 → 1.0.0-beta.5 - Mend

@gleanwork/mcp-server-tester 1.0.0-beta.3 → 1.0.0-beta.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (19) hide show

package/README.md +11 -10
package/dist/cli/index.js +34 -11
package/dist/fixtures/mcp.d.ts +6 -6
package/dist/fixtures/mcp.js +5 -5
package/dist/fixtures/mcp.js.map +1 -1
package/dist/index.cjs +79 -45
package/dist/index.cjs.map +1 -1
package/dist/index.d.cts +215 -1168
package/dist/index.d.ts +215 -1168
package/dist/index.js +79 -45
package/dist/index.js.map +1 -1
package/dist/reporters/mcpReporter.cjs.map +1 -1
package/dist/reporters/mcpReporter.js.map +1 -1
package/dist/reporters/ui-dist/app.js +107 -7
package/dist/reporters/ui-dist/styles.css +1 -1
package/package.json +9 -6
package/src/reporters/ui-dist/app.js +0 -174
package/src/reporters/ui-dist/index.html +0 -28
package/src/reporters/ui-dist/styles.css +0 -1

package/README.md CHANGED Viewed

@@ -1,6 +1,6 @@
 # @gleanwork/mcp-server-tester
-[![Experimental](https://img.shields.io/badge/-Experimental-D8FD49?style=flat-square&logo=data:image/svg+xml;base64,PHN2ZyB2aWV3Qm94PSIwIDAgMzIgMzIiIGZpbGw9Im5vbmUiIHhtbG5zPSJodHRwOi8vd3d3LnczLm9yZy8yMDAwL3N2ZyI+CjxwYXRoIGQ9Ik0yNC4zMDA2IDIuOTU0MjdMMjAuNzY1NiAwLjE5OTk1MUwxNy45MDI4IDMuOTk1MjdDMTMuNTY1MyAxLjkzNDk1IDguMjMwMTkgMy4wODQzOSA1LjE5Mzk0IDcuMDA5ODNDMS42NTg4OCAxMS41NjQyIDIuNDgzIDE4LjExMzggNy4wMzczOCAyMS42NDg5QzguNzcyMzggMjIuOTkzNSAxMC43ODkzIDIzLjcwOTIgMTIuODI3OSAyMy44MTc3QzE2LjE0NjEgMjQuMDEyOCAxOS41MDc3IDIyLjYyNDggMjEuNjc2NSAxOS44MDU1QzI0LjczNDQgMTUuODggMjQuNTE3NSAxMC40MTQ4IDIxLjQ1OTYgNi43Mjc4OUwyNC4zMDA2IDIuOTU0MjdaTTE4LjExOTcgMTcuMDUxMkMxNi4xMDI4IDE5LjYzMiAxMi4zNzI1IDIwLjEwOTEgOS43NzAwMSAxOC4wOTIyQzcuMTg5MTkgMTYuMDc1MiA2LjcxMjA3IDEyLjMyMzMgOC43MjkwMSA5Ljc0MjQ2QzkuNzA0OTQgOC40ODQ1OCAxMS4xMTQ2IDcuNjgyMTQgMTIuNjc2MSA3LjQ4Njk2QzEzLjA0NDggNy40NDM1OCAxMy40MTM1IDcuNDIxOSAxMy43ODIyIDcuNDQzNThDMTQuOTc1IDcuNTA4NjUgMTYuMTI0NCA3Ljk0MjM5IDE3LjA3ODcgOC42Nzk3N0MxOS42NTk1IDEwLjcxODQgMjAuMTM2NiAxNC40NzAzIDE4LjExOTcgMTcuMDUxMloiIGZpbGw9IndoaXRlIi8+CjxwYXRoIGQ9Ik0yNC41MTc2IDIxLjY5MjJDMjMuOTMyIDIyLjQ1MTMgMjMuMjgxNCAyMy4xMjM2IDIyLjU2NTcgMjMuNzUyNUMyMS44NzE3IDI0LjMzODEgMjEuMTEyNyAyNC44ODAzIDIwLjMxMDIgMjUuMzM1N0MxOS41Mjk1IDI1Ljc2OTUgMTguNjgzNyAyNi4xMzgyIDE3LjgzNzggMjYuNDIwMUMxNi45OTIgMjYuNzAyIDE2LjEwMjggMjYuODk3MiAxNS4yMTM3IDI3LjAwNTdDMTQuMzI0NSAyNy4xMTQxIDEzLjQzNTMgMjcuMTU3NSAxMi41MjQ0IDI3LjA5MjRDMTEuNjEzNSAyNy4wMjczIDEwLjcyNDMgMjYuODc1NSA5Ljg1Njg0IDI2LjY1ODdMOS42NjE2NSAyNy4zNzQzTDguNzcyNDYgMzAuOTk2MkM5LjkwMDIxIDMxLjI5OTggMTEuMDQ5NyAzMS40NzMzIDEyLjIyMDggMzEuNTZDMTIuMjY0MiAzMS41NiAxMi4zMjkyIDMxLjU2IDEyLjM3MjYgMzEuNTZDMTMuNTAwMyAzMS42MjUxIDE0LjY0OTggMzEuNTgxNyAxNS43NTU4IDMxLjQ1MTZDMTYuOTI3IDMxLjI5OTggMTguMDk4MSAzMS4wMzk1IDE5LjIyNTggMzAuNjcwOEMyMC4zNTM2IDMwLjMwMjIgMjEuNDU5NyAyOS44MjUgMjIuNTAwNyAyOS4yMzk1QzIzLjU2MzQgMjguNjUzOSAyNC41NjEgMjcuOTM4MiAyNS40OTM1IDI3LjE1NzVDMjYuNDQ3OCAyNi4zNTUgMjcuMzE1MyAyNS40NDQyIDI4LjA3NDQgMjQuNDQ2NUMyOC4xODI4IDI0LjMxNjQgMjguMjY5NSAyNC4xNjQ2IDI4LjM3OCAyNC4wMTI4TDI0Ljc3NzkgMjEuMzQ1MkMyNC42Njk0IDIxLjQ1MzcgMjQuNjA0NCAyMS41ODM4IDI0LjUxNzYgMjEuNjkyMloiIGZpbGw9IndoaXRlIi8+Cjwvc3ZnPg==&labelColor=343CED)](https://github.com/gleanwork/.github/blob/main/docs/repository-stability.md#experimental)
+[![GA](https://img.shields.io/badge/-GA-F6F3EB?style=flat-square&logo=data:image/svg+xml;base64,PHN2ZyB2aWV3Qm94PSIwIDAgMzIgMzIiIGZpbGw9Im5vbmUiIHhtbG5zPSJodHRwOi8vd3d3LnczLm9yZy8yMDAwL3N2ZyI+CjxwYXRoIGQ9Ik0yNC4zMDA2IDIuOTU0MjdMMjAuNzY1NiAwLjE5OTk1MUwxNy45MDI4IDMuOTk1MjdDMTMuNTY1MyAxLjkzNDk1IDguMjMwMTkgMy4wODQzOSA1LjE5Mzk0IDcuMDA5ODNDMS42NTg4OCAxMS41NjQyIDIuNDgzIDE4LjExMzggNy4wMzczOCAyMS42NDg5QzguNzcyMzggMjIuOTkzNSAxMC43ODkzIDIzLjcwOTIgMTIuODI3OSAyMy44MTc3QzE2LjE0NjEgMjQuMDEyOCAxOS41MDc3IDIyLjYyNDggMjEuNjc2NSAxOS44MDU1QzI0LjczNDQgMTUuODggMjQuNTE3NSAxMC40MTQ4IDIxLjQ1OTYgNi43Mjc4OUwyNC4zMDA2IDIuOTU0MjdaTTE4LjExOTcgMTcuMDUxMkMxNi4xMDI4IDE5LjYzMiAxMi4zNzI1IDIwLjEwOTEgOS43NzAwMSAxOC4wOTIyQzcuMTg5MTkgMTYuMDc1MiA2LjcxMjA3IDEyLjMyMzMgOC43MjkwMSA5Ljc0MjQ2QzkuNzA0OTQgOC40ODQ1OCAxMS4xMTQ2IDcuNjgyMTQgMTIuNjc2MSA3LjQ4Njk2QzEzLjA0NDggNy40NDM1OCAxMy40MTM1IDcuNDIxOSAxMy43ODIyIDcuNDQzNThDMTQuOTc1IDcuNTA4NjUgMTYuMTI0NCA3Ljk0MjM5IDE3LjA3ODcgOC42Nzk3N0MxOS42NTk1IDEwLjcxODQgMjAuMTM2NiAxNC40NzAzIDE4LjExOTcgMTcuMDUxMloiIGZpbGw9IndoaXRlIi8+CjxwYXRoIGQ9Ik0yNC41MTc2IDIxLjY5MjJDMjMuOTMyIDIyLjQ1MTMgMjMuMjgxNCAyMy4xMjM2IDIyLjU2NTcgMjMuNzUyNUMyMS44NzE3IDI0LjMzODEgMjEuMTEyNyAyNC44ODAzIDIwLjMxMDIgMjUuMzM1N0MxOS41Mjk1IDI1Ljc2OTUgMTguNjgzNyAyNi4xMzgyIDE3LjgzNzggMjYuNDIwMUMxNi45OTIgMjYuNzAyIDE2LjEwMjggMjYuODk3MiAxNS4yMTM3IDI3LjAwNTdDMTQuMzI0NSAyNy4xMTQxIDEzLjQzNTMgMjcuMTU3NSAxMi41MjQ0IDI3LjA5MjRDMTEuNjEzNSAyNy4wMjczIDEwLjcyNDMgMjYuODc1NSA5Ljg1Njg0IDI2LjY1ODdMOS42NjE2NSAyNy4zNzQzTDguNzcyNDYgMzAuOTk2MkM5LjkwMDIxIDMxLjI5OTggMTEuMDQ5NyAzMS40NzMzIDEyLjIyMDggMzEuNTZDMTIuMjY0MiAzMS41NiAxMi4zMjkyIDMxLjU2IDEyLjM3MjYgMzEuNTZDMTMuNTAwMyAzMS42MjUxIDE0LjY0OTggMzEuNTgxNyAxNS43NTU4IDMxLjQ1MTZDMTYuOTI3IDMxLjI5OTggMTguMDk4MSAzMS4wMzk1IDE5LjIyNTggMzAuNjcwOEMyMC4zNTM2IDMwLjMwMjIgMjEuNDU5NyAyOS44MjUgMjIuNTAwNyAyOS4yMzk1QzIzLjU2MzQgMjguNjUzOSAyNC41NjEgMjcuOTM4MiAyNS40OTM1IDI3LjE1NzVDMjYuNDQ3OCAyNi4zNTUgMjcuMzE1MyAyNS40NDQyIDI4LjA3NDQgMjQuNDQ2NUMyOC4xODI4IDI0LjMxNjQgMjguMjY5NSAyNC4xNjQ2IDI4LjM3OCAyNC4wMTI4TDI0Ljc3NzkgMjEuMzQ1MkMyNC42Njk0IDIxLjQ1MzcgMjQuNjA0NCAyMS41ODM4IDI0LjUxNzYgMjEuNjkyMloiIGZpbGw9IndoaXRlIi8+Cjwvc3ZnPg==&labelColor=343CED)](https://github.com/gleanwork/.github/blob/main/docs/repository-stability.md#ga)
 [![npm version](https://img.shields.io/npm/v/@gleanwork/mcp-server-tester)](https://www.npmjs.com/package/@gleanwork/mcp-server-tester)
 [![CI](https://github.com/gleanwork/mcp-server-tester/actions/workflows/ci.yml/badge.svg)](https://github.com/gleanwork/mcp-server-tester/actions/workflows/ci.yml)
 [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT)
@@ -11,7 +11,7 @@ A testing and evaluation framework for [Model Context Protocol (MCP)](https://mo
 The `mcp` Playwright fixture connects to your MCP server (stdio or HTTP) and exposes a high-level API for calling tools and asserting responses. Custom matchers keep assertions readable.
-```typescript
+```typescript snippet=snippets/basic-test.ts
 import { test, expect } from '@gleanwork/mcp-server-tester/fixtures/mcp';
 test('read_file returns file contents', async ({ mcp }) => {
@@ -48,7 +48,7 @@ Available matchers:
 Eval datasets let you define test cases as JSON files and run them with `runEvalDataset()`. Each case specifies a tool call and one or more assertions.
-```json
+```json snippet=snippets/eval-dataset.json
 {
   "name": "file-ops",
   "cases": [
@@ -73,7 +73,7 @@ Eval datasets let you define test cases as JSON files and run them with `runEval
 }
 ```
-```typescript
+```typescript snippet=snippets/run-eval-dataset.ts
 import { test, expect } from '@gleanwork/mcp-server-tester/fixtures/mcp';
 import { loadEvalDataset, runEvalDataset } from '@gleanwork/mcp-server-tester';
 import { z } from 'zod';
@@ -102,12 +102,12 @@ Supported assertion types:
 In LLM host mode, a real LLM receives your server's tool list and a natural language prompt, then decides which tools to call. This tests whether your tool names, descriptions, and input schemas are clear enough for autonomous use — a different question from whether the tools return correct output.
-```json
+```json snippet=snippets/mcp-host-dataset.json
 {
   "id": "find-config",
-  "mode": "llm_host",
+  "mode": "mcp_host",
   "scenario": "Find the application config file and return its contents",
-  "llmHostConfig": {
+  "mcpHostConfig": {
     "provider": "anthropic",
     "model": "claude-opus-4-20250514"
   },
@@ -119,7 +119,7 @@ In LLM host mode, a real LLM receives your server's tool list and a natural lang
 }
 ```
-LLM host mode makes real API calls and produces non-deterministic results. Use `iterations` to run a case multiple times and measure pass rate rather than expecting 100% on a single run. See the [LLM Host Guide](docs/llm-host.md) for configuration and cost management.
+LLM host mode makes real API calls and produces non-deterministic results. Use `iterations` to run a case multiple times and measure pass rate rather than expecting 100% on a single run. See the [LLM Host Guide](docs/mcp-host.md) for configuration and cost management.
 ## Installation
@@ -147,7 +147,7 @@ The CLI wizard creates a `playwright.config.ts`, example tests, and a sample eva
 Point the framework at your MCP server in `playwright.config.ts`:
-```typescript
+```typescript snippet=snippets/playwright-config.ts
 import { defineConfig } from '@playwright/test';
 export default defineConfig({
@@ -174,12 +174,13 @@ For HTTP servers, set `transport: 'http'` and `serverUrl`. For servers that requ
 - [Quick Start](./docs/quickstart.md) — detailed setup and configuration
 - [Expectations](./docs/expectations.md) — all assertion types including snapshot sanitizers
-- [LLM Host Simulation](docs/llm-host.md) — tool discoverability testing
+- [LLM Host Simulation](docs/mcp-host.md) — tool discoverability testing
 - [API Reference](./docs/api-reference.md)
 - [Transports](./docs/transports.md) — stdio and HTTP configuration, OAuth
 - [CLI Commands](./docs/cli.md) — init, generate, login, token
 - [UI Reporter](./docs/ui-reporter.md) — interactive web UI for test results
 - [Development](./docs/development.md) — contributing and building
+- [Migration Guide (v0.12 → v1.0)](./docs/migrations/migration-1.0.md) — upgrading from pre-1.0 releases
 ## Examples

package/dist/cli/index.js CHANGED Viewed

@@ -17,7 +17,7 @@ import { SSEClientTransport } from '@modelcontextprotocol/sdk/client/sse.js';
 import { z } from 'zod';
 import createDebug from 'debug';
 import { ProxyAgent, Agent } from 'undici';
-import { readFileSync } from 'fs';
+import { existsSync, readFileSync } from 'fs';
 import * as oauth from 'oauth4webapi';
 import { homedir } from 'os';
 import * as http from 'http';
@@ -80,7 +80,7 @@ function JsonPreview({ data, maxLines = 15 }) {
 // package.json
 var package_default = {
-  version: "1.0.0-beta.3"};
+  version: "1.0.0-beta.5"};
 // src/cli/templates/index.ts
 function getPlaywrightConfigTemplate(answers) {
@@ -255,10 +255,10 @@ function getPackageJsonTemplate(projectName) {
     "evals"
   ],
   "dependencies": {
-    "@modelcontextprotocol/sdk": "^1.0.4",
+    "@modelcontextprotocol/sdk": "^1.27.0",
     "@playwright/test": "^1.49.0",
     "@gleanwork/mcp-server-tester": "^${package_default.version}",
-    "zod": "^3.24.1"
+    "zod": "^4.0.0"
   },
   "devDependencies": {
     "typescript": "^5.7.2"
@@ -531,7 +531,7 @@ async function init(options) {
   await waitUntilExit();
 }
 var MCPHostCapabilitiesSchema = z.object({
-  sampling: z.record(z.unknown()).optional(),
+  sampling: z.record(z.string(), z.unknown()).optional(),
   roots: z.object({
     listChanged: z.boolean()
   }).optional()
@@ -590,7 +590,7 @@ var HttpConfigSchema = z.object({
     }
     return true;
   }),
-  headers: z.record(z.string()).optional(),
+  headers: z.record(z.string(), z.string()).optional(),
   capabilities: MCPHostCapabilitiesSchema.optional(),
   connectTimeoutMs: z.number().positive().optional(),
   requestTimeoutMs: z.number().positive().optional(),
@@ -827,7 +827,7 @@ async function retryWithBackoff(fn, maxAttempts) {
           delayMs,
           err.message
         );
-        await new Promise((resolve3) => setTimeout(resolve3, delayMs));
+        await new Promise((resolve4) => setTimeout(resolve4, delayMs));
       } else {
         throw err;
       }
@@ -1683,7 +1683,7 @@ ${errorText}`
    */
   async startCallbackServer(expectedState) {
     const timeoutMs = this.config.timeoutMs ?? DEFAULT_TIMEOUT_MS;
-    return new Promise((resolve3, reject) => {
+    return new Promise((resolve4, reject) => {
       const server = http.createServer();
       const connections = /* @__PURE__ */ new Set();
       server.on("connection", (socket) => {
@@ -1756,7 +1756,7 @@ ${errorText}`
       server.listen(preferredPort, "127.0.0.1", () => {
         const address = server.address();
         debug2("Callback server listening on port", address.port);
-        resolve3({ port: address.port, codePromise, close: forceClose });
+        resolve4({ port: address.port, codePromise, close: forceClose });
       });
       server.on("error", (err) => {
         reject(err);
@@ -1777,8 +1777,8 @@ ${errorText}`
       return;
     }
     try {
-      const open = await import('open');
-      await open.default(url.toString());
+      const open2 = await import('open');
+      await open2.default(url.toString());
       debug2("Opened browser for authentication");
     } catch (error) {
       debug2("Failed to open browser:", error);
@@ -3062,6 +3062,24 @@ async function token(serverUrl, options) {
   );
   await waitUntilExit();
 }
+async function open(options) {
+  const outputDir = resolve(options.dir ?? ".mcp-test-results");
+  const reportPath = join(outputDir, "latest", "index.html");
+  if (!existsSync(reportPath)) {
+    console.error(`No report found at ${reportPath}`);
+    console.error("Run your Playwright tests first to generate a report.");
+    process.exit(1);
+  }
+  console.log(`Opening report: ${reportPath}`);
+  try {
+    const { default: openBrowser } = await import('open');
+    await openBrowser(reportPath);
+  } catch (error) {
+    console.error("Failed to open report in browser:", error);
+    console.error(`Open manually: file://${reportPath}`);
+    process.exit(1);
+  }
+}
 // src/cli/index.ts
 var program = new Command();
@@ -3077,4 +3095,9 @@ program.command("token").description("Output stored OAuth tokens for CI/CD use")
   "Output format: env, json, or gh (default: env)",
   "env"
 ).option("--state-dir <dir>", "Custom directory for token storage").action(token);
+program.command("open").description("Open the MCP eval reporter UI in your browser").option(
+  "-d, --dir <directory>",
+  "Report output directory",
+  ".mcp-test-results"
+).action(open);
 program.parse();

package/dist/fixtures/mcp.d.ts CHANGED Viewed

@@ -217,9 +217,9 @@ type RubricSpec = BuiltInRubric | {
 type ProviderKind = 'anthropic' | 'openai' | 'google';
 /**
- * Tool call validators for llm_host simulation results.
+ * Tool call validators for mcp_host simulation results.
  *
- * These validators extract the tool call trace from an LLMHostSimulationResult
+ * These validators extract the tool call trace from an MCPHostSimulationResult
  * and apply assertions against expected call lists and counts.
  */
@@ -408,7 +408,7 @@ declare global {
              */
             toSatisfyToolPredicate(predicate: ToolPredicate, description?: string): Promise<R>;
             /**
-             * Validates which tools the LLM called during an llm_host simulation.
+             * Validates which tools the LLM called during a mcp_host simulation.
              *
              * @example
              * ```typescript
@@ -420,7 +420,7 @@ declare global {
              */
             toHaveToolCalls(expectation: ToolCallExpectation): R;
             /**
-             * Validates the number of tool calls made during an llm_host simulation.
+             * Validates the number of tool calls made during a mcp_host simulation.
              *
              * @example
              * ```typescript
@@ -531,7 +531,7 @@ declare function toSatisfyToolPredicate(this: {
 /**
  * toHaveToolCalls Matcher
  *
- * Validates which tools the LLM called during an llm_host simulation.
+ * Validates which tools the LLM called during a mcp_host simulation.
  */
 /**
@@ -547,7 +547,7 @@ declare function toHaveToolCalls(this: {
 /**
  * toHaveToolCallCount Matcher
  *
- * Validates the number of tool calls made during an llm_host simulation.
+ * Validates the number of tool calls made during a mcp_host simulation.
  */
 /**

package/dist/fixtures/mcp.js CHANGED Viewed

@@ -1215,7 +1215,7 @@ function validateToolCalls(response, expectation) {
   if (!isSimulationResult(response)) {
     return {
       pass: false,
-      message: "toolsTriggered expectation requires llm_host mode \u2014 response must be an LLMHostSimulationResult"
+      message: "toolsTriggered expectation requires mcp_host mode \u2014 response must be an MCPHostSimulationResult"
     };
   }
   const actual = response.toolCalls;
@@ -1275,7 +1275,7 @@ function validateToolCallCount(response, options) {
   if (!isSimulationResult(response)) {
     return {
       pass: false,
-      message: "toolCallCount expectation requires llm_host mode \u2014 response must be an LLMHostSimulationResult"
+      message: "toolCallCount expectation requires mcp_host mode \u2014 response must be an MCPHostSimulationResult"
     };
   }
   const count = response.toolCalls.length;
@@ -1337,7 +1337,7 @@ var expect = expect$1.extend({
   toHaveToolCallCount
 });
 var MCPHostCapabilitiesSchema = z.object({
-  sampling: z.record(z.unknown()).optional(),
+  sampling: z.record(z.string(), z.unknown()).optional(),
   roots: z.object({
     listChanged: z.boolean()
   }).optional()
@@ -1396,7 +1396,7 @@ var HttpConfigSchema = z.object({
     }
     return true;
   }),
-  headers: z.record(z.string()).optional(),
+  headers: z.record(z.string(), z.string()).optional(),
   capabilities: MCPHostCapabilitiesSchema.optional(),
   connectTimeoutMs: z.number().positive().optional(),
   requestTimeoutMs: z.number().positive().optional(),
@@ -1434,7 +1434,7 @@ var debugHttp = createDebug(`${NAMESPACE}:http`);
 // package.json
 var package_default = {
-  version: "1.0.0-beta.3"};
+  version: "1.0.0-beta.5"};
 var debug = createDebug("mcp-server-tester:oauth-flow");
 async function generatePKCE() {
   const codeVerifier = oauth.generateRandomCodeVerifier();