@gleanwork/mcp-server-tester 1.0.0-beta.2 → 1.0.0-beta.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -1,6 +1,6 @@
1
1
  # @gleanwork/mcp-server-tester
2
2
 
3
- [![Experimental](https://img.shields.io/badge/-Experimental-D8FD49?style=flat-square&logo=data:image/svg+xml;base64,PHN2ZyB2aWV3Qm94PSIwIDAgMzIgMzIiIGZpbGw9Im5vbmUiIHhtbG5zPSJodHRwOi8vd3d3LnczLm9yZy8yMDAwL3N2ZyI+CjxwYXRoIGQ9Ik0yNC4zMDA2IDIuOTU0MjdMMjAuNzY1NiAwLjE5OTk1MUwxNy45MDI4IDMuOTk1MjdDMTMuNTY1MyAxLjkzNDk1IDguMjMwMTkgMy4wODQzOSA1LjE5Mzk0IDcuMDA5ODNDMS42NTg4OCAxMS41NjQyIDIuNDgzIDE4LjExMzggNy4wMzczOCAyMS42NDg5QzguNzcyMzggMjIuOTkzNSAxMC43ODkzIDIzLjcwOTIgMTIuODI3OSAyMy44MTc3QzE2LjE0NjEgMjQuMDEyOCAxOS41MDc3IDIyLjYyNDggMjEuNjc2NSAxOS44MDU1QzI0LjczNDQgMTUuODggMjQuNTE3NSAxMC40MTQ4IDIxLjQ1OTYgNi43Mjc4OUwyNC4zMDA2IDIuOTU0MjdaTTE4LjExOTcgMTcuMDUxMkMxNi4xMDI4IDE5LjYzMiAxMi4zNzI1IDIwLjEwOTEgOS43NzAwMSAxOC4wOTIyQzcuMTg5MTkgMTYuMDc1MiA2LjcxMjA3IDEyLjMyMzMgOC43MjkwMSA5Ljc0MjQ2QzkuNzA0OTQgOC40ODQ1OCAxMS4xMTQ2IDcuNjgyMTQgMTIuNjc2MSA3LjQ4Njk2QzEzLjA0NDggNy40NDM1OCAxMy40MTM1IDcuNDIxOSAxMy43ODIyIDcuNDQzNThDMTQuOTc1IDcuNTA4NjUgMTYuMTI0NCA3Ljk0MjM5IDE3LjA3ODcgOC42Nzk3N0MxOS42NTk1IDEwLjcxODQgMjAuMTM2NiAxNC40NzAzIDE4LjExOTcgMTcuMDUxMloiIGZpbGw9IndoaXRlIi8+CjxwYXRoIGQ9Ik0yNC41MTc2IDIxLjY5MjJDMjMuOTMyIDIyLjQ1MTMgMjMuMjgxNCAyMy4xMjM2IDIyLjU2NTcgMjMuNzUyNUMyMS44NzE3IDI0LjMzODEgMjEuMTEyNyAyNC44ODAzIDIwLjMxMDIgMjUuMzM1N0MxOS41Mjk1IDI1Ljc2OTUgMTguNjgzNyAyNi4xMzgyIDE3LjgzNzggMjYuNDIwMUMxNi45OTIgMjYuNzAyIDE2LjEwMjggMjYuODk3MiAxNS4yMTM3IDI3LjAwNTdDMTQuMzI0NSAyNy4xMTQxIDEzLjQzNTMgMjcuMTU3NSAxMi41MjQ0IDI3LjA5MjRDMTEuNjEzNSAyNy4wMjczIDEwLjcyNDMgMjYuODc1NSA5Ljg1Njg0IDI2LjY1ODdMOS42NjE2NSAyNy4zNzQzTDguNzcyNDYgMzAuOTk2MkM5LjkwMDIxIDMxLjI5OTggMTEuMDQ5NyAzMS40NzMzIDEyLjIyMDggMzEuNTZDMTIuMjY0MiAzMS41NiAxMi4zMjkyIDMxLjU2IDEyLjM3MjYgMzEuNTZDMTMuNTAwMyAzMS42MjUxIDE0LjY0OTggMzEuNTgxNyAxNS43NTU4IDMxLjQ1MTZDMTYuOTI3IDMxLjI5OTggMTguMDk4MSAzMS4wMzk1IDE5LjIyNTggMzAuNjcwOEMyMC4zNTM2IDMwLjMwMjIgMjEuNDU5NyAyOS44MjUgMjIuNTAwNyAyOS4yMzk1QzIzLjU2MzQgMjguNjUzOSAyNC41NjEgMjcuOTM4MiAyNS40OTM1IDI3LjE1NzVDMjYuNDQ3OCAyNi4zNTUgMjcuMzE1MyAyNS40NDQyIDI4LjA3NDQgMjQuNDQ2NUMyOC4xODI4IDI0LjMxNjQgMjguMjY5NSAyNC4xNjQ2IDI4LjM3OCAyNC4wMTI4TDI0Ljc3NzkgMjEuMzQ1MkMyNC42Njk0IDIxLjQ1MzcgMjQuNjA0NCAyMS41ODM4IDI0LjUxNzYgMjEuNjkyMloiIGZpbGw9IndoaXRlIi8+Cjwvc3ZnPg==&labelColor=343CED)](https://github.com/gleanwork/.github/blob/main/docs/repository-stability.md#experimental)
3
+ [![GA](https://img.shields.io/badge/-GA-F6F3EB?style=flat-square&logo=data:image/svg+xml;base64,PHN2ZyB2aWV3Qm94PSIwIDAgMzIgMzIiIGZpbGw9Im5vbmUiIHhtbG5zPSJodHRwOi8vd3d3LnczLm9yZy8yMDAwL3N2ZyI+CjxwYXRoIGQ9Ik0yNC4zMDA2IDIuOTU0MjdMMjAuNzY1NiAwLjE5OTk1MUwxNy45MDI4IDMuOTk1MjdDMTMuNTY1MyAxLjkzNDk1IDguMjMwMTkgMy4wODQzOSA1LjE5Mzk0IDcuMDA5ODNDMS42NTg4OCAxMS41NjQyIDIuNDgzIDE4LjExMzggNy4wMzczOCAyMS42NDg5QzguNzcyMzggMjIuOTkzNSAxMC43ODkzIDIzLjcwOTIgMTIuODI3OSAyMy44MTc3QzE2LjE0NjEgMjQuMDEyOCAxOS41MDc3IDIyLjYyNDggMjEuNjc2NSAxOS44MDU1QzI0LjczNDQgMTUuODggMjQuNTE3NSAxMC40MTQ4IDIxLjQ1OTYgNi43Mjc4OUwyNC4zMDA2IDIuOTU0MjdaTTE4LjExOTcgMTcuMDUxMkMxNi4xMDI4IDE5LjYzMiAxMi4zNzI1IDIwLjEwOTEgOS43NzAwMSAxOC4wOTIyQzcuMTg5MTkgMTYuMDc1MiA2LjcxMjA3IDEyLjMyMzMgOC43MjkwMSA5Ljc0MjQ2QzkuNzA0OTQgOC40ODQ1OCAxMS4xMTQ2IDcuNjgyMTQgMTIuNjc2MSA3LjQ4Njk2QzEzLjA0NDggNy40NDM1OCAxMy40MTM1IDcuNDIxOSAxMy43ODIyIDcuNDQzNThDMTQuOTc1IDcuNTA4NjUgMTYuMTI0NCA3Ljk0MjM5IDE3LjA3ODcgOC42Nzk3N0MxOS42NTk1IDEwLjcxODQgMjAuMTM2NiAxNC40NzAzIDE4LjExOTcgMTcuMDUxMloiIGZpbGw9IndoaXRlIi8+CjxwYXRoIGQ9Ik0yNC41MTc2IDIxLjY5MjJDMjMuOTMyIDIyLjQ1MTMgMjMuMjgxNCAyMy4xMjM2IDIyLjU2NTcgMjMuNzUyNUMyMS44NzE3IDI0LjMzODEgMjEuMTEyNyAyNC44ODAzIDIwLjMxMDIgMjUuMzM1N0MxOS41Mjk1IDI1Ljc2OTUgMTguNjgzNyAyNi4xMzgyIDE3LjgzNzggMjYuNDIwMUMxNi45OTIgMjYuNzAyIDE2LjEwMjggMjYuODk3MiAxNS4yMTM3IDI3LjAwNTdDMTQuMzI0NSAyNy4xMTQxIDEzLjQzNTMgMjcuMTU3NSAxMi41MjQ0IDI3LjA5MjRDMTEuNjEzNSAyNy4wMjczIDEwLjcyNDMgMjYuODc1NSA5Ljg1Njg0IDI2LjY1ODdMOS42NjE2NSAyNy4zNzQzTDguNzcyNDYgMzAuOTk2MkM5LjkwMDIxIDMxLjI5OTggMTEuMDQ5NyAzMS40NzMzIDEyLjIyMDggMzEuNTZDMTIuMjY0MiAzMS41NiAxMi4zMjkyIDMxLjU2IDEyLjM3MjYgMzEuNTZDMTMuNTAwMyAzMS42MjUxIDE0LjY0OTggMzEuNTgxNyAxNS43NTU4IDMxLjQ1MTZDMTYuOTI3IDMxLjI5OTggMTguMDk4MSAzMS4wMzk1IDE5LjIyNTggMzAuNjcwOEMyMC4zNTM2IDMwLjMwMjIgMjEuNDU5NyAyOS44MjUgMjIuNTAwNyAyOS4yMzk1QzIzLjU2MzQgMjguNjUzOSAyNC41NjEgMjcuOTM4MiAyNS40OTM1IDI3LjE1NzVDMjYuNDQ3OCAyNi4zNTUgMjcuMzE1MyAyNS40NDQyIDI4LjA3NDQgMjQuNDQ2NUMyOC4xODI4IDI0LjMxNjQgMjguMjY5NSAyNC4xNjQ2IDI4LjM3OCAyNC4wMTI4TDI0Ljc3NzkgMjEuMzQ1MkMyNC42Njk0IDIxLjQ1MzcgMjQuNjA0NCAyMS41ODM4IDI0LjUxNzYgMjEuNjkyMloiIGZpbGw9IndoaXRlIi8+Cjwvc3ZnPg==&labelColor=343CED)](https://github.com/gleanwork/.github/blob/main/docs/repository-stability.md#ga)
4
4
  [![npm version](https://img.shields.io/npm/v/@gleanwork/mcp-server-tester)](https://www.npmjs.com/package/@gleanwork/mcp-server-tester)
5
5
  [![CI](https://github.com/gleanwork/mcp-server-tester/actions/workflows/ci.yml/badge.svg)](https://github.com/gleanwork/mcp-server-tester/actions/workflows/ci.yml)
6
6
  [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT)
@@ -11,7 +11,7 @@ A testing and evaluation framework for [Model Context Protocol (MCP)](https://mo
11
11
 
12
12
  The `mcp` Playwright fixture connects to your MCP server (stdio or HTTP) and exposes a high-level API for calling tools and asserting responses. Custom matchers keep assertions readable.
13
13
 
14
- ```typescript
14
+ ```typescript snippet=snippets/basic-test.ts
15
15
  import { test, expect } from '@gleanwork/mcp-server-tester/fixtures/mcp';
16
16
 
17
17
  test('read_file returns file contents', async ({ mcp }) => {
@@ -30,24 +30,25 @@ Playwright tests are fast, deterministic, and designed for CI. Use them for regr
30
30
 
31
31
  Available matchers:
32
32
 
33
- | Matcher | Description |
34
- | ------------------------ | ----------------------------------------------- |
35
- | `toContainToolText` | Response contains expected substrings |
36
- | `toMatchToolSchema` | Response validates against a Zod schema |
37
- | `toMatchToolPattern` | Response matches a regex pattern |
38
- | `toMatchToolSnapshot` | Response matches a saved baseline |
39
- | `toBeToolError` | Response is (or is not) an error |
40
- | `toHaveToolResponseSize` | Response size is within bounds |
41
- | `toSatisfyToolPredicate` | Response satisfies a custom function |
42
- | `toHaveToolCalls` | LLM called the expected tools |
43
- | `toHaveToolCallCount` | LLM made N tool calls |
44
- | `toPassToolJudge` | LLM evaluates response quality against a rubric |
33
+ | Matcher | Description |
34
+ | ------------------------ | ---------------------------------------------------- |
35
+ | `toMatchToolResponse` | Response exactly matches expected value (deep equal) |
36
+ | `toContainToolText` | Response contains expected substrings |
37
+ | `toMatchToolSchema` | Response validates against a Zod schema |
38
+ | `toMatchToolPattern` | Response matches a regex pattern |
39
+ | `toMatchToolSnapshot` | Response matches a saved baseline |
40
+ | `toBeToolError` | Response is (or is not) an error |
41
+ | `toHaveToolResponseSize` | Response size is within bounds |
42
+ | `toSatisfyToolPredicate` | Response satisfies a custom function |
43
+ | `toHaveToolCalls` | LLM called the expected tools |
44
+ | `toHaveToolCallCount` | LLM made N tool calls |
45
+ | `toPassToolJudge` | LLM evaluates response quality against a rubric |
45
46
 
46
47
  ## Eval Datasets
47
48
 
48
49
  Eval datasets let you define test cases as JSON files and run them with `runEvalDataset()`. Each case specifies a tool call and one or more assertions.
49
50
 
50
- ```json
51
+ ```json snippet=snippets/eval-dataset.json
51
52
  {
52
53
  "name": "file-ops",
53
54
  "cases": [
@@ -72,7 +73,7 @@ Eval datasets let you define test cases as JSON files and run them with `runEval
72
73
  }
73
74
  ```
74
75
 
75
- ```typescript
76
+ ```typescript snippet=snippets/run-eval-dataset.ts
76
77
  import { test, expect } from '@gleanwork/mcp-server-tester/fixtures/mcp';
77
78
  import { loadEvalDataset, runEvalDataset } from '@gleanwork/mcp-server-tester';
78
79
  import { z } from 'zod';
@@ -101,12 +102,12 @@ Supported assertion types:
101
102
 
102
103
  In LLM host mode, a real LLM receives your server's tool list and a natural language prompt, then decides which tools to call. This tests whether your tool names, descriptions, and input schemas are clear enough for autonomous use — a different question from whether the tools return correct output.
103
104
 
104
- ```json
105
+ ```json snippet=snippets/mcp-host-dataset.json
105
106
  {
106
107
  "id": "find-config",
107
- "mode": "llm_host",
108
+ "mode": "mcp_host",
108
109
  "scenario": "Find the application config file and return its contents",
109
- "llmHostConfig": {
110
+ "mcpHostConfig": {
110
111
  "provider": "anthropic",
111
112
  "model": "claude-opus-4-20250514"
112
113
  },
@@ -118,7 +119,7 @@ In LLM host mode, a real LLM receives your server's tool list and a natural lang
118
119
  }
119
120
  ```
120
121
 
121
- LLM host mode makes real API calls and produces non-deterministic results. Use `iterations` to run a case multiple times and measure pass rate rather than expecting 100% on a single run. See the [LLM Host Guide](docs/llm-host.md) for configuration and cost management.
122
+ LLM host mode makes real API calls and produces non-deterministic results. Use `iterations` to run a case multiple times and measure pass rate rather than expecting 100% on a single run. See the [LLM Host Guide](docs/mcp-host.md) for configuration and cost management.
122
123
 
123
124
  ## Installation
124
125
 
@@ -146,7 +147,7 @@ The CLI wizard creates a `playwright.config.ts`, example tests, and a sample eva
146
147
 
147
148
  Point the framework at your MCP server in `playwright.config.ts`:
148
149
 
149
- ```typescript
150
+ ```typescript snippet=snippets/playwright-config.ts
150
151
  import { defineConfig } from '@playwright/test';
151
152
 
152
153
  export default defineConfig({
@@ -173,12 +174,13 @@ For HTTP servers, set `transport: 'http'` and `serverUrl`. For servers that requ
173
174
 
174
175
  - [Quick Start](./docs/quickstart.md) — detailed setup and configuration
175
176
  - [Expectations](./docs/expectations.md) — all assertion types including snapshot sanitizers
176
- - [LLM Host Simulation](docs/llm-host.md) — tool discoverability testing
177
+ - [LLM Host Simulation](docs/mcp-host.md) — tool discoverability testing
177
178
  - [API Reference](./docs/api-reference.md)
178
179
  - [Transports](./docs/transports.md) — stdio and HTTP configuration, OAuth
179
180
  - [CLI Commands](./docs/cli.md) — init, generate, login, token
180
181
  - [UI Reporter](./docs/ui-reporter.md) — interactive web UI for test results
181
182
  - [Development](./docs/development.md) — contributing and building
183
+ - [Migration Guide (v0.12 → v1.0)](./docs/migrations/migration-1.0.md) — upgrading from pre-1.0 releases
182
184
 
183
185
  ## Examples
184
186
 
package/dist/cli/index.js CHANGED
@@ -17,7 +17,7 @@ import { SSEClientTransport } from '@modelcontextprotocol/sdk/client/sse.js';
17
17
  import { z } from 'zod';
18
18
  import createDebug from 'debug';
19
19
  import { ProxyAgent, Agent } from 'undici';
20
- import { readFileSync } from 'fs';
20
+ import { existsSync, readFileSync } from 'fs';
21
21
  import * as oauth from 'oauth4webapi';
22
22
  import { homedir } from 'os';
23
23
  import * as http from 'http';
@@ -80,7 +80,7 @@ function JsonPreview({ data, maxLines = 15 }) {
80
80
 
81
81
  // package.json
82
82
  var package_default = {
83
- version: "1.0.0-beta.2"};
83
+ version: "1.0.0-beta.4"};
84
84
 
85
85
  // src/cli/templates/index.ts
86
86
  function getPlaywrightConfigTemplate(answers) {
@@ -255,10 +255,10 @@ function getPackageJsonTemplate(projectName) {
255
255
  "evals"
256
256
  ],
257
257
  "dependencies": {
258
- "@modelcontextprotocol/sdk": "^1.0.4",
258
+ "@modelcontextprotocol/sdk": "^1.27.0",
259
259
  "@playwright/test": "^1.49.0",
260
260
  "@gleanwork/mcp-server-tester": "^${package_default.version}",
261
- "zod": "^3.24.1"
261
+ "zod": "^4.0.0"
262
262
  },
263
263
  "devDependencies": {
264
264
  "typescript": "^5.7.2"
@@ -531,7 +531,7 @@ async function init(options) {
531
531
  await waitUntilExit();
532
532
  }
533
533
  var MCPHostCapabilitiesSchema = z.object({
534
- sampling: z.record(z.unknown()).optional(),
534
+ sampling: z.record(z.string(), z.unknown()).optional(),
535
535
  roots: z.object({
536
536
  listChanged: z.boolean()
537
537
  }).optional()
@@ -590,7 +590,7 @@ var HttpConfigSchema = z.object({
590
590
  }
591
591
  return true;
592
592
  }),
593
- headers: z.record(z.string()).optional(),
593
+ headers: z.record(z.string(), z.string()).optional(),
594
594
  capabilities: MCPHostCapabilitiesSchema.optional(),
595
595
  connectTimeoutMs: z.number().positive().optional(),
596
596
  requestTimeoutMs: z.number().positive().optional(),
@@ -827,7 +827,7 @@ async function retryWithBackoff(fn, maxAttempts) {
827
827
  delayMs,
828
828
  err.message
829
829
  );
830
- await new Promise((resolve3) => setTimeout(resolve3, delayMs));
830
+ await new Promise((resolve4) => setTimeout(resolve4, delayMs));
831
831
  } else {
832
832
  throw err;
833
833
  }
@@ -877,7 +877,10 @@ async function createMCPClientForConfig(config, options) {
877
877
  validatedConfig.connectTimeoutMs !== void 0 ? { timeout: validatedConfig.connectTimeoutMs } : void 0
878
878
  );
879
879
  } else if (isHttpConfig(validatedConfig)) {
880
- const headers = { ...validatedConfig.headers };
880
+ const headers = {
881
+ "User-Agent": `@gleanwork/mcp-server-tester/${package_default.version}`,
882
+ ...validatedConfig.headers
883
+ };
881
884
  if (validatedConfig.auth?.clientCredentials && true) {
882
885
  const ccConfig = validatedConfig.auth.clientCredentials;
883
886
  const clientId = ccConfig.clientId ?? process.env["MCP_CLIENT_ID"];
@@ -1680,7 +1683,7 @@ ${errorText}`
1680
1683
  */
1681
1684
  async startCallbackServer(expectedState) {
1682
1685
  const timeoutMs = this.config.timeoutMs ?? DEFAULT_TIMEOUT_MS;
1683
- return new Promise((resolve3, reject) => {
1686
+ return new Promise((resolve4, reject) => {
1684
1687
  const server = http.createServer();
1685
1688
  const connections = /* @__PURE__ */ new Set();
1686
1689
  server.on("connection", (socket) => {
@@ -1753,7 +1756,7 @@ ${errorText}`
1753
1756
  server.listen(preferredPort, "127.0.0.1", () => {
1754
1757
  const address = server.address();
1755
1758
  debug2("Callback server listening on port", address.port);
1756
- resolve3({ port: address.port, codePromise, close: forceClose });
1759
+ resolve4({ port: address.port, codePromise, close: forceClose });
1757
1760
  });
1758
1761
  server.on("error", (err) => {
1759
1762
  reject(err);
@@ -1774,8 +1777,8 @@ ${errorText}`
1774
1777
  return;
1775
1778
  }
1776
1779
  try {
1777
- const open = await import('open');
1778
- await open.default(url.toString());
1780
+ const open2 = await import('open');
1781
+ await open2.default(url.toString());
1779
1782
  debug2("Opened browser for authentication");
1780
1783
  } catch (error) {
1781
1784
  debug2("Failed to open browser:", error);
@@ -3059,6 +3062,24 @@ async function token(serverUrl, options) {
3059
3062
  );
3060
3063
  await waitUntilExit();
3061
3064
  }
3065
+ async function open(options) {
3066
+ const outputDir = resolve(options.dir ?? ".mcp-test-results");
3067
+ const reportPath = join(outputDir, "latest", "index.html");
3068
+ if (!existsSync(reportPath)) {
3069
+ console.error(`No report found at ${reportPath}`);
3070
+ console.error("Run your Playwright tests first to generate a report.");
3071
+ process.exit(1);
3072
+ }
3073
+ console.log(`Opening report: ${reportPath}`);
3074
+ try {
3075
+ const { default: openBrowser } = await import('open');
3076
+ await openBrowser(reportPath);
3077
+ } catch (error) {
3078
+ console.error("Failed to open report in browser:", error);
3079
+ console.error(`Open manually: file://${reportPath}`);
3080
+ process.exit(1);
3081
+ }
3082
+ }
3062
3083
 
3063
3084
  // src/cli/index.ts
3064
3085
  var program = new Command();
@@ -3074,4 +3095,9 @@ program.command("token").description("Output stored OAuth tokens for CI/CD use")
3074
3095
  "Output format: env, json, or gh (default: env)",
3075
3096
  "env"
3076
3097
  ).option("--state-dir <dir>", "Custom directory for token storage").action(token);
3098
+ program.command("open").description("Open the MCP eval reporter UI in your browser").option(
3099
+ "-d, --dir <directory>",
3100
+ "Report output directory",
3101
+ ".mcp-test-results"
3102
+ ).action(open);
3077
3103
  program.parse();
@@ -169,6 +169,14 @@ declare function toMatchToolPattern(this: {
169
169
  /**
170
170
  * Creates the toMatchToolSnapshot matcher function
171
171
  *
172
+ * @remarks
173
+ * **Requires Playwright test context.** This matcher calls `expect(content).toMatchSnapshot()`
174
+ * internally, which only works inside a Playwright test (i.e., when `testInfo` is available).
175
+ * Calling it outside a Playwright test will throw a cryptic context error.
176
+ *
177
+ * To test sanitizer logic without a Playwright context, use the exported `applySanitizers`
178
+ * function directly.
179
+ *
172
180
  * Note: This is an async matcher that uses Playwright's snapshot testing.
173
181
  */
174
182
  declare function toMatchToolSnapshot(this: {
@@ -209,9 +217,9 @@ type RubricSpec = BuiltInRubric | {
209
217
  type ProviderKind = 'anthropic' | 'openai' | 'google';
210
218
 
211
219
  /**
212
- * Tool call validators for llm_host simulation results.
220
+ * Tool call validators for mcp_host simulation results.
213
221
  *
214
- * These validators extract the tool call trace from an LLMHostSimulationResult
222
+ * These validators extract the tool call trace from an MCPHostSimulationResult
215
223
  * and apply assertions against expected call lists and counts.
216
224
  */
217
225
 
@@ -400,7 +408,7 @@ declare global {
400
408
  */
401
409
  toSatisfyToolPredicate(predicate: ToolPredicate, description?: string): Promise<R>;
402
410
  /**
403
- * Validates which tools the LLM called during an llm_host simulation.
411
+ * Validates which tools the LLM called during a mcp_host simulation.
404
412
  *
405
413
  * @example
406
414
  * ```typescript
@@ -412,7 +420,7 @@ declare global {
412
420
  */
413
421
  toHaveToolCalls(expectation: ToolCallExpectation): R;
414
422
  /**
415
- * Validates the number of tool calls made during an llm_host simulation.
423
+ * Validates the number of tool calls made during a mcp_host simulation.
416
424
  *
417
425
  * @example
418
426
  * ```typescript
@@ -523,7 +531,7 @@ declare function toSatisfyToolPredicate(this: {
523
531
  /**
524
532
  * toHaveToolCalls Matcher
525
533
  *
526
- * Validates which tools the LLM called during an llm_host simulation.
534
+ * Validates which tools the LLM called during a mcp_host simulation.
527
535
  */
528
536
 
529
537
  /**
@@ -539,7 +547,7 @@ declare function toHaveToolCalls(this: {
539
547
  /**
540
548
  * toHaveToolCallCount Matcher
541
549
  *
542
- * Validates the number of tool calls made during an llm_host simulation.
550
+ * Validates the number of tool calls made during a mcp_host simulation.
543
551
  */
544
552
 
545
553
  /**
@@ -1215,7 +1215,7 @@ function validateToolCalls(response, expectation) {
1215
1215
  if (!isSimulationResult(response)) {
1216
1216
  return {
1217
1217
  pass: false,
1218
- message: "toolsTriggered expectation requires llm_host mode \u2014 response must be an LLMHostSimulationResult"
1218
+ message: "toolsTriggered expectation requires mcp_host mode \u2014 response must be an MCPHostSimulationResult"
1219
1219
  };
1220
1220
  }
1221
1221
  const actual = response.toolCalls;
@@ -1275,7 +1275,7 @@ function validateToolCallCount(response, options) {
1275
1275
  if (!isSimulationResult(response)) {
1276
1276
  return {
1277
1277
  pass: false,
1278
- message: "toolCallCount expectation requires llm_host mode \u2014 response must be an LLMHostSimulationResult"
1278
+ message: "toolCallCount expectation requires mcp_host mode \u2014 response must be an MCPHostSimulationResult"
1279
1279
  };
1280
1280
  }
1281
1281
  const count = response.toolCalls.length;
@@ -1337,7 +1337,7 @@ var expect = expect$1.extend({
1337
1337
  toHaveToolCallCount
1338
1338
  });
1339
1339
  var MCPHostCapabilitiesSchema = z.object({
1340
- sampling: z.record(z.unknown()).optional(),
1340
+ sampling: z.record(z.string(), z.unknown()).optional(),
1341
1341
  roots: z.object({
1342
1342
  listChanged: z.boolean()
1343
1343
  }).optional()
@@ -1396,7 +1396,7 @@ var HttpConfigSchema = z.object({
1396
1396
  }
1397
1397
  return true;
1398
1398
  }),
1399
- headers: z.record(z.string()).optional(),
1399
+ headers: z.record(z.string(), z.string()).optional(),
1400
1400
  capabilities: MCPHostCapabilitiesSchema.optional(),
1401
1401
  connectTimeoutMs: z.number().positive().optional(),
1402
1402
  requestTimeoutMs: z.number().positive().optional(),
@@ -1434,7 +1434,7 @@ var debugHttp = createDebug(`${NAMESPACE}:http`);
1434
1434
 
1435
1435
  // package.json
1436
1436
  var package_default = {
1437
- version: "1.0.0-beta.2"};
1437
+ version: "1.0.0-beta.4"};
1438
1438
  var debug = createDebug("mcp-server-tester:oauth-flow");
1439
1439
  async function generatePKCE() {
1440
1440
  const codeVerifier = oauth.generateRandomCodeVerifier();
@@ -1687,7 +1687,10 @@ async function createMCPClientForConfig(config, options) {
1687
1687
  validatedConfig.connectTimeoutMs !== void 0 ? { timeout: validatedConfig.connectTimeoutMs } : void 0
1688
1688
  );
1689
1689
  } else if (isHttpConfig(validatedConfig)) {
1690
- const headers = { ...validatedConfig.headers };
1690
+ const headers = {
1691
+ "User-Agent": `@gleanwork/mcp-server-tester/${package_default.version}`,
1692
+ ...validatedConfig.headers
1693
+ };
1691
1694
  if (validatedConfig.auth?.clientCredentials && !options?.authProvider) {
1692
1695
  const ccConfig = validatedConfig.auth.clientCredentials;
1693
1696
  const clientId = ccConfig.clientId ?? process.env["MCP_CLIENT_ID"];