@gleanwork/mcp-server-tester 1.0.0-beta.8 → 1.0.1-beta.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -126,7 +126,7 @@ LLM host mode makes real API calls and produces non-deterministic results. Use `
126
126
  Requires Node.js 22+.
127
127
 
128
128
  ```bash
129
- npm install --save-dev @gleanwork/mcp-server-tester @playwright/test zod
129
+ npm install --save-dev @gleanwork/mcp-server-tester @playwright/test
130
130
  ```
131
131
 
132
132
  The Anthropic SDK is only needed for LLM-as-judge assertions or LLM host mode with the Anthropic provider:
@@ -182,6 +182,25 @@ For HTTP servers, set `transport: 'http'` and `serverUrl`. For servers that requ
182
182
  - [Development](./docs/development.md) — contributing and building
183
183
  - [Migration Guide (v0.12 → v1.0)](./docs/migrations/migration-1.0.md) — upgrading from pre-1.0 releases
184
184
 
185
+ ## AI Skills
186
+
187
+ Install AI skills to help your coding assistant generate tests, eval datasets, and MCP host evals:
188
+
189
+ ```bash
190
+ npx skills add -g gleanwork/mcp-server-tester
191
+ ```
192
+
193
+ This installs skills globally so they're available across all your projects. Four skills are included:
194
+
195
+ | Skill | Description |
196
+ | --------------------- | ----------------------------------------------------------- |
197
+ | `mcp-tester-guide` | Framework reference — matchers, config, auth, anti-patterns |
198
+ | `write-mcp-test` | Generate direct-mode Playwright tests |
199
+ | `write-mcp-eval` | Generate data-driven eval datasets |
200
+ | `write-mcp-host-eval` | Generate LLM host simulation evals |
201
+
202
+ Compatible with Claude Code, Cursor, Windsurf, Copilot, and [40+ other AI agents](https://github.com/nicepkg/nice-skills).
203
+
185
204
  ## Examples
186
205
 
187
206
  The `examples/` directory contains complete working examples:
package/dist/cli/index.js CHANGED
@@ -80,7 +80,7 @@ function JsonPreview({ data, maxLines = 15 }) {
80
80
 
81
81
  // package.json
82
82
  var package_default = {
83
- version: "1.0.0-beta.8"};
83
+ version: "1.0.1-beta.0"};
84
84
 
85
85
  // src/cli/templates/index.ts
86
86
  function getPlaywrightConfigTemplate(answers) {
@@ -1005,6 +1005,17 @@ async function createMCPClientForConfig(config, options) {
1005
1005
  }
1006
1006
  async function closeMCPClient(client) {
1007
1007
  try {
1008
+ const transport = client.transport;
1009
+ if (transport instanceof StreamableHTTPClientTransport) {
1010
+ try {
1011
+ await transport.terminateSession();
1012
+ } catch (sessionError) {
1013
+ debugClient(
1014
+ "Error terminating session: %s",
1015
+ sessionError instanceof Error ? sessionError.message : String(sessionError)
1016
+ );
1017
+ }
1018
+ }
1008
1019
  await client.close();
1009
1020
  } catch (error) {
1010
1021
  debugClient(
@@ -237,11 +237,13 @@ function validateSchema(response, schema, options = {}) {
237
237
  } catch (error) {
238
238
  const zodError = error;
239
239
  const issues = formatZodIssues(zodError);
240
+ const text = stringifyResponse(response);
240
241
  return {
241
242
  pass: false,
242
243
  message: `Response does not match schema: ${issues}`,
243
244
  details: {
244
- issues: zodError.issues
245
+ issues: zodError.issues,
246
+ textPreview: truncateForDisplay2(text)
245
247
  }
246
248
  };
247
249
  }
@@ -294,16 +296,29 @@ function formatZodIssues(error) {
294
296
  });
295
297
  return issues.join("; ");
296
298
  }
299
+ function truncateForDisplay2(str, maxLength = 200) {
300
+ if (str.length <= maxLength) {
301
+ return str;
302
+ }
303
+ return str.slice(0, maxLength) + "... (truncated)";
304
+ }
297
305
 
298
306
  // src/assertions/matchers/toMatchToolSchema.ts
299
307
  function toMatchToolSchema(received, schema, options = {}) {
300
308
  const result = validateSchema(received, schema, options);
309
+ const preview = result.details?.textPreview;
301
310
  return {
302
311
  pass: result.pass,
303
312
  message: () => {
304
313
  if (this.isNot) {
305
314
  return result.pass ? "Expected response NOT to match schema, but it did" : result.message;
306
315
  }
316
+ if (!result.pass && preview) {
317
+ return `${result.message}
318
+
319
+ Actual response (truncated):
320
+ ${preview}`;
321
+ }
307
322
  return result.message;
308
323
  }
309
324
  };
@@ -334,11 +349,11 @@ function validateText(response, expected, options = {}) {
334
349
  details: {
335
350
  missing,
336
351
  textLength: text.length,
337
- textPreview: truncateForDisplay2(text)
352
+ textPreview: truncateForDisplay3(text)
338
353
  }
339
354
  };
340
355
  }
341
- function truncateForDisplay2(str, maxLength = 200) {
356
+ function truncateForDisplay3(str, maxLength = 200) {
342
357
  if (str.length <= maxLength) {
343
358
  return str;
344
359
  }
@@ -348,6 +363,7 @@ function truncateForDisplay2(str, maxLength = 200) {
348
363
  // src/assertions/matchers/toContainToolText.ts
349
364
  function toContainToolText(received, expected, options = {}) {
350
365
  const result = validateText(received, expected, options);
366
+ const preview = result.details?.textPreview;
351
367
  return {
352
368
  pass: result.pass,
353
369
  message: () => {
@@ -355,6 +371,12 @@ function toContainToolText(received, expected, options = {}) {
355
371
  const expectedStr = Array.isArray(expected) ? expected.map((s) => `"${s}"`).join(", ") : `"${expected}"`;
356
372
  return result.pass ? `Expected response NOT to contain ${expectedStr}, but it did` : result.message;
357
373
  }
374
+ if (!result.pass && preview) {
375
+ return `${result.message}
376
+
377
+ Actual response (truncated):
378
+ ${preview}`;
379
+ }
358
380
  return result.message;
359
381
  }
360
382
  };
@@ -385,7 +407,7 @@ function validatePattern(response, patterns, options = {}) {
385
407
  details: {
386
408
  unmatched,
387
409
  textLength: text.length,
388
- textPreview: truncateForDisplay3(text)
410
+ textPreview: truncateForDisplay4(text)
389
411
  }
390
412
  };
391
413
  }
@@ -405,7 +427,7 @@ function patternToString(pattern) {
405
427
  }
406
428
  return `/${pattern}/`;
407
429
  }
408
- function truncateForDisplay3(str, maxLength = 200) {
430
+ function truncateForDisplay4(str, maxLength = 200) {
409
431
  if (str.length <= maxLength) {
410
432
  return str;
411
433
  }
@@ -415,12 +437,19 @@ function truncateForDisplay3(str, maxLength = 200) {
415
437
  // src/assertions/matchers/toMatchToolPattern.ts
416
438
  function toMatchToolPattern(received, patterns, options = {}) {
417
439
  const result = validatePattern(received, patterns, options);
440
+ const preview = result.details?.textPreview;
418
441
  return {
419
442
  pass: result.pass,
420
443
  message: () => {
421
444
  if (this.isNot) {
422
445
  return result.pass ? "Expected response NOT to match pattern(s), but it did" : result.message;
423
446
  }
447
+ if (!result.pass && preview) {
448
+ return `${result.message}
449
+
450
+ Actual response (truncated):
451
+ ${preview}`;
452
+ }
424
453
  return result.message;
425
454
  }
426
455
  };
@@ -567,7 +596,7 @@ function validateError(response, expected = true) {
567
596
  pass: false,
568
597
  message: "Expected an error response but got success",
569
598
  details: {
570
- textPreview: truncateForDisplay4(extractText2(response))
599
+ textPreview: truncateForDisplay5(extractText2(response))
571
600
  }
572
601
  };
573
602
  } else {
@@ -579,7 +608,7 @@ function validateError(response, expected = true) {
579
608
  }
580
609
  return {
581
610
  pass: false,
582
- message: `Expected a success response but got error: "${truncateForDisplay4(errorMessage)}"`,
611
+ message: `Expected a success response but got error: "${truncateForDisplay5(errorMessage)}"`,
583
612
  details: {
584
613
  errorMessage
585
614
  }
@@ -592,7 +621,7 @@ function validateError(response, expected = true) {
592
621
  pass: false,
593
622
  message: `Expected an error containing "${expectedMessages[0]}" but got success`,
594
623
  details: {
595
- textPreview: truncateForDisplay4(extractText2(response))
624
+ textPreview: truncateForDisplay5(extractText2(response))
596
625
  }
597
626
  };
598
627
  }
@@ -614,7 +643,7 @@ function validateError(response, expected = true) {
614
643
  }
615
644
  };
616
645
  }
617
- function truncateForDisplay4(str, maxLength = 200) {
646
+ function truncateForDisplay5(str, maxLength = 200) {
618
647
  if (str.length <= maxLength) {
619
648
  return str;
620
649
  }
@@ -1142,7 +1171,9 @@ function createJudge(config = {}) {
1142
1171
  case "google":
1143
1172
  return createGoogleJudge(config);
1144
1173
  default:
1145
- throw new Error(`Unsupported LLM provider: ${String(provider)}`);
1174
+ throw new Error(
1175
+ `Unsupported LLM provider: ${String(provider)}. Valid providers: 'anthropic', 'vertex-anthropic', 'anthropic-agent-sdk', 'openai', 'google'`
1176
+ );
1146
1177
  }
1147
1178
  }
1148
1179
 
@@ -1497,6 +1528,10 @@ function validateToolCalls(response, expectation) {
1497
1528
  return {
1498
1529
  pass: false,
1499
1530
  message: `Expected tool '${expected.name}' to be called in sequence (starting from position ${searchFrom}), but it was not found`,
1531
+ details: {
1532
+ actual: actual.map((c) => c.name),
1533
+ expected: expected.name
1534
+ },
1500
1535
  metrics
1501
1536
  };
1502
1537
  }
@@ -1513,6 +1548,10 @@ function validateToolCalls(response, expectation) {
1513
1548
  return {
1514
1549
  pass: false,
1515
1550
  message: `Expected tool '${expected.name}'${argsNote} to be called, but it was not`,
1551
+ details: {
1552
+ actual: actual.map((c) => c.name),
1553
+ expected: expected.name
1554
+ },
1516
1555
  metrics
1517
1556
  };
1518
1557
  }
@@ -1525,6 +1564,10 @@ function validateToolCalls(response, expectation) {
1525
1564
  return {
1526
1565
  pass: false,
1527
1566
  message: `Unexpected tool calls: ${names}. Only ${[...allowedNames].map((n) => `'${n}'`).join(", ")} are allowed`,
1567
+ details: {
1568
+ actual: actual.map((c) => c.name),
1569
+ unexpected: unexpected.map((c) => c.name)
1570
+ },
1528
1571
  metrics
1529
1572
  };
1530
1573
  }
@@ -1543,19 +1586,22 @@ function validateToolCallCount(response, options) {
1543
1586
  if (exact !== void 0 && count !== exact) {
1544
1587
  return {
1545
1588
  pass: false,
1546
- message: `Expected exactly ${exact} tool call(s), but got ${count}`
1589
+ message: `Expected exactly ${exact} tool call(s), but got ${count}`,
1590
+ details: { actual: count, expected: exact }
1547
1591
  };
1548
1592
  }
1549
1593
  if (min !== void 0 && count < min) {
1550
1594
  return {
1551
1595
  pass: false,
1552
- message: `Expected at least ${min} tool call(s), but got ${count}`
1596
+ message: `Expected at least ${min} tool call(s), but got ${count}`,
1597
+ details: { actual: count, min }
1553
1598
  };
1554
1599
  }
1555
1600
  if (max !== void 0 && count > max) {
1556
1601
  return {
1557
1602
  pass: false,
1558
- message: `Expected at most ${max} tool call(s), but got ${count}`
1603
+ message: `Expected at most ${max} tool call(s), but got ${count}`,
1604
+ details: { actual: count, max }
1559
1605
  };
1560
1606
  }
1561
1607
  return {
@@ -1694,7 +1740,7 @@ var debugHttp = createDebug(`${NAMESPACE}:http`);
1694
1740
 
1695
1741
  // package.json
1696
1742
  var package_default = {
1697
- version: "1.0.0-beta.8"};
1743
+ version: "1.0.1-beta.0"};
1698
1744
  var debug = createDebug("mcp-server-tester:oauth-flow");
1699
1745
  async function generatePKCE() {
1700
1746
  const codeVerifier = oauth.generateRandomCodeVerifier();
@@ -2075,6 +2121,17 @@ async function createMCPClientForConfig(config, options) {
2075
2121
  }
2076
2122
  async function closeMCPClient(client) {
2077
2123
  try {
2124
+ const transport = client.transport;
2125
+ if (transport instanceof StreamableHTTPClientTransport) {
2126
+ try {
2127
+ await transport.terminateSession();
2128
+ } catch (sessionError) {
2129
+ debugClient(
2130
+ "Error terminating session: %s",
2131
+ sessionError instanceof Error ? sessionError.message : String(sessionError)
2132
+ );
2133
+ }
2134
+ }
2078
2135
  await client.close();
2079
2136
  } catch (error) {
2080
2137
  debugClient(