@gleanwork/mcp-server-tester 1.0.0-beta.8 → 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +20 -1
- package/dist/cli/index.js +12 -1
- package/dist/fixtures/mcp.js +71 -14
- package/dist/fixtures/mcp.js.map +1 -1
- package/dist/index.cjs +73 -15
- package/dist/index.cjs.map +1 -1
- package/dist/index.d.cts +30 -2
- package/dist/index.d.ts +30 -2
- package/dist/index.js +73 -16
- package/dist/index.js.map +1 -1
- package/package.json +1 -1
package/README.md
CHANGED
|
@@ -126,7 +126,7 @@ LLM host mode makes real API calls and produces non-deterministic results. Use `
|
|
|
126
126
|
Requires Node.js 22+.
|
|
127
127
|
|
|
128
128
|
```bash
|
|
129
|
-
npm install --save-dev @gleanwork/mcp-server-tester @playwright/test
|
|
129
|
+
npm install --save-dev @gleanwork/mcp-server-tester @playwright/test
|
|
130
130
|
```
|
|
131
131
|
|
|
132
132
|
The Anthropic SDK is only needed for LLM-as-judge assertions or LLM host mode with the Anthropic provider:
|
|
@@ -182,6 +182,25 @@ For HTTP servers, set `transport: 'http'` and `serverUrl`. For servers that requ
|
|
|
182
182
|
- [Development](./docs/development.md) — contributing and building
|
|
183
183
|
- [Migration Guide (v0.12 → v1.0)](./docs/migrations/migration-1.0.md) — upgrading from pre-1.0 releases
|
|
184
184
|
|
|
185
|
+
## AI Skills
|
|
186
|
+
|
|
187
|
+
Install AI skills to help your coding assistant generate tests, eval datasets, and MCP host evals:
|
|
188
|
+
|
|
189
|
+
```bash
|
|
190
|
+
npx skills add -g gleanwork/mcp-server-tester
|
|
191
|
+
```
|
|
192
|
+
|
|
193
|
+
This installs skills globally so they're available across all your projects. Four skills are included:
|
|
194
|
+
|
|
195
|
+
| Skill | Description |
|
|
196
|
+
| --------------------- | ----------------------------------------------------------- |
|
|
197
|
+
| `mcp-tester-guide` | Framework reference — matchers, config, auth, anti-patterns |
|
|
198
|
+
| `write-mcp-test` | Generate direct-mode Playwright tests |
|
|
199
|
+
| `write-mcp-eval` | Generate data-driven eval datasets |
|
|
200
|
+
| `write-mcp-host-eval` | Generate LLM host simulation evals |
|
|
201
|
+
|
|
202
|
+
Compatible with Claude Code, Cursor, Windsurf, Copilot, and [40+ other AI agents](https://github.com/nicepkg/nice-skills).
|
|
203
|
+
|
|
185
204
|
## Examples
|
|
186
205
|
|
|
187
206
|
The `examples/` directory contains complete working examples:
|
package/dist/cli/index.js
CHANGED
|
@@ -80,7 +80,7 @@ function JsonPreview({ data, maxLines = 15 }) {
|
|
|
80
80
|
|
|
81
81
|
// package.json
|
|
82
82
|
var package_default = {
|
|
83
|
-
version: "1.0.0
|
|
83
|
+
version: "1.0.0"};
|
|
84
84
|
|
|
85
85
|
// src/cli/templates/index.ts
|
|
86
86
|
function getPlaywrightConfigTemplate(answers) {
|
|
@@ -1005,6 +1005,17 @@ async function createMCPClientForConfig(config, options) {
|
|
|
1005
1005
|
}
|
|
1006
1006
|
async function closeMCPClient(client) {
|
|
1007
1007
|
try {
|
|
1008
|
+
const transport = client.transport;
|
|
1009
|
+
if (transport instanceof StreamableHTTPClientTransport) {
|
|
1010
|
+
try {
|
|
1011
|
+
await transport.terminateSession();
|
|
1012
|
+
} catch (sessionError) {
|
|
1013
|
+
debugClient(
|
|
1014
|
+
"Error terminating session: %s",
|
|
1015
|
+
sessionError instanceof Error ? sessionError.message : String(sessionError)
|
|
1016
|
+
);
|
|
1017
|
+
}
|
|
1018
|
+
}
|
|
1008
1019
|
await client.close();
|
|
1009
1020
|
} catch (error) {
|
|
1010
1021
|
debugClient(
|
package/dist/fixtures/mcp.js
CHANGED
|
@@ -237,11 +237,13 @@ function validateSchema(response, schema, options = {}) {
|
|
|
237
237
|
} catch (error) {
|
|
238
238
|
const zodError = error;
|
|
239
239
|
const issues = formatZodIssues(zodError);
|
|
240
|
+
const text = stringifyResponse(response);
|
|
240
241
|
return {
|
|
241
242
|
pass: false,
|
|
242
243
|
message: `Response does not match schema: ${issues}`,
|
|
243
244
|
details: {
|
|
244
|
-
issues: zodError.issues
|
|
245
|
+
issues: zodError.issues,
|
|
246
|
+
textPreview: truncateForDisplay2(text)
|
|
245
247
|
}
|
|
246
248
|
};
|
|
247
249
|
}
|
|
@@ -294,16 +296,29 @@ function formatZodIssues(error) {
|
|
|
294
296
|
});
|
|
295
297
|
return issues.join("; ");
|
|
296
298
|
}
|
|
299
|
+
function truncateForDisplay2(str, maxLength = 200) {
|
|
300
|
+
if (str.length <= maxLength) {
|
|
301
|
+
return str;
|
|
302
|
+
}
|
|
303
|
+
return str.slice(0, maxLength) + "... (truncated)";
|
|
304
|
+
}
|
|
297
305
|
|
|
298
306
|
// src/assertions/matchers/toMatchToolSchema.ts
|
|
299
307
|
function toMatchToolSchema(received, schema, options = {}) {
|
|
300
308
|
const result = validateSchema(received, schema, options);
|
|
309
|
+
const preview = result.details?.textPreview;
|
|
301
310
|
return {
|
|
302
311
|
pass: result.pass,
|
|
303
312
|
message: () => {
|
|
304
313
|
if (this.isNot) {
|
|
305
314
|
return result.pass ? "Expected response NOT to match schema, but it did" : result.message;
|
|
306
315
|
}
|
|
316
|
+
if (!result.pass && preview) {
|
|
317
|
+
return `${result.message}
|
|
318
|
+
|
|
319
|
+
Actual response (truncated):
|
|
320
|
+
${preview}`;
|
|
321
|
+
}
|
|
307
322
|
return result.message;
|
|
308
323
|
}
|
|
309
324
|
};
|
|
@@ -334,11 +349,11 @@ function validateText(response, expected, options = {}) {
|
|
|
334
349
|
details: {
|
|
335
350
|
missing,
|
|
336
351
|
textLength: text.length,
|
|
337
|
-
textPreview:
|
|
352
|
+
textPreview: truncateForDisplay3(text)
|
|
338
353
|
}
|
|
339
354
|
};
|
|
340
355
|
}
|
|
341
|
-
function
|
|
356
|
+
function truncateForDisplay3(str, maxLength = 200) {
|
|
342
357
|
if (str.length <= maxLength) {
|
|
343
358
|
return str;
|
|
344
359
|
}
|
|
@@ -348,6 +363,7 @@ function truncateForDisplay2(str, maxLength = 200) {
|
|
|
348
363
|
// src/assertions/matchers/toContainToolText.ts
|
|
349
364
|
function toContainToolText(received, expected, options = {}) {
|
|
350
365
|
const result = validateText(received, expected, options);
|
|
366
|
+
const preview = result.details?.textPreview;
|
|
351
367
|
return {
|
|
352
368
|
pass: result.pass,
|
|
353
369
|
message: () => {
|
|
@@ -355,6 +371,12 @@ function toContainToolText(received, expected, options = {}) {
|
|
|
355
371
|
const expectedStr = Array.isArray(expected) ? expected.map((s) => `"${s}"`).join(", ") : `"${expected}"`;
|
|
356
372
|
return result.pass ? `Expected response NOT to contain ${expectedStr}, but it did` : result.message;
|
|
357
373
|
}
|
|
374
|
+
if (!result.pass && preview) {
|
|
375
|
+
return `${result.message}
|
|
376
|
+
|
|
377
|
+
Actual response (truncated):
|
|
378
|
+
${preview}`;
|
|
379
|
+
}
|
|
358
380
|
return result.message;
|
|
359
381
|
}
|
|
360
382
|
};
|
|
@@ -385,7 +407,7 @@ function validatePattern(response, patterns, options = {}) {
|
|
|
385
407
|
details: {
|
|
386
408
|
unmatched,
|
|
387
409
|
textLength: text.length,
|
|
388
|
-
textPreview:
|
|
410
|
+
textPreview: truncateForDisplay4(text)
|
|
389
411
|
}
|
|
390
412
|
};
|
|
391
413
|
}
|
|
@@ -405,7 +427,7 @@ function patternToString(pattern) {
|
|
|
405
427
|
}
|
|
406
428
|
return `/${pattern}/`;
|
|
407
429
|
}
|
|
408
|
-
function
|
|
430
|
+
function truncateForDisplay4(str, maxLength = 200) {
|
|
409
431
|
if (str.length <= maxLength) {
|
|
410
432
|
return str;
|
|
411
433
|
}
|
|
@@ -415,12 +437,19 @@ function truncateForDisplay3(str, maxLength = 200) {
|
|
|
415
437
|
// src/assertions/matchers/toMatchToolPattern.ts
|
|
416
438
|
function toMatchToolPattern(received, patterns, options = {}) {
|
|
417
439
|
const result = validatePattern(received, patterns, options);
|
|
440
|
+
const preview = result.details?.textPreview;
|
|
418
441
|
return {
|
|
419
442
|
pass: result.pass,
|
|
420
443
|
message: () => {
|
|
421
444
|
if (this.isNot) {
|
|
422
445
|
return result.pass ? "Expected response NOT to match pattern(s), but it did" : result.message;
|
|
423
446
|
}
|
|
447
|
+
if (!result.pass && preview) {
|
|
448
|
+
return `${result.message}
|
|
449
|
+
|
|
450
|
+
Actual response (truncated):
|
|
451
|
+
${preview}`;
|
|
452
|
+
}
|
|
424
453
|
return result.message;
|
|
425
454
|
}
|
|
426
455
|
};
|
|
@@ -567,7 +596,7 @@ function validateError(response, expected = true) {
|
|
|
567
596
|
pass: false,
|
|
568
597
|
message: "Expected an error response but got success",
|
|
569
598
|
details: {
|
|
570
|
-
textPreview:
|
|
599
|
+
textPreview: truncateForDisplay5(extractText2(response))
|
|
571
600
|
}
|
|
572
601
|
};
|
|
573
602
|
} else {
|
|
@@ -579,7 +608,7 @@ function validateError(response, expected = true) {
|
|
|
579
608
|
}
|
|
580
609
|
return {
|
|
581
610
|
pass: false,
|
|
582
|
-
message: `Expected a success response but got error: "${
|
|
611
|
+
message: `Expected a success response but got error: "${truncateForDisplay5(errorMessage)}"`,
|
|
583
612
|
details: {
|
|
584
613
|
errorMessage
|
|
585
614
|
}
|
|
@@ -592,7 +621,7 @@ function validateError(response, expected = true) {
|
|
|
592
621
|
pass: false,
|
|
593
622
|
message: `Expected an error containing "${expectedMessages[0]}" but got success`,
|
|
594
623
|
details: {
|
|
595
|
-
textPreview:
|
|
624
|
+
textPreview: truncateForDisplay5(extractText2(response))
|
|
596
625
|
}
|
|
597
626
|
};
|
|
598
627
|
}
|
|
@@ -614,7 +643,7 @@ function validateError(response, expected = true) {
|
|
|
614
643
|
}
|
|
615
644
|
};
|
|
616
645
|
}
|
|
617
|
-
function
|
|
646
|
+
function truncateForDisplay5(str, maxLength = 200) {
|
|
618
647
|
if (str.length <= maxLength) {
|
|
619
648
|
return str;
|
|
620
649
|
}
|
|
@@ -1142,7 +1171,9 @@ function createJudge(config = {}) {
|
|
|
1142
1171
|
case "google":
|
|
1143
1172
|
return createGoogleJudge(config);
|
|
1144
1173
|
default:
|
|
1145
|
-
throw new Error(
|
|
1174
|
+
throw new Error(
|
|
1175
|
+
`Unsupported LLM provider: ${String(provider)}. Valid providers: 'anthropic', 'vertex-anthropic', 'anthropic-agent-sdk', 'openai', 'google'`
|
|
1176
|
+
);
|
|
1146
1177
|
}
|
|
1147
1178
|
}
|
|
1148
1179
|
|
|
@@ -1497,6 +1528,10 @@ function validateToolCalls(response, expectation) {
|
|
|
1497
1528
|
return {
|
|
1498
1529
|
pass: false,
|
|
1499
1530
|
message: `Expected tool '${expected.name}' to be called in sequence (starting from position ${searchFrom}), but it was not found`,
|
|
1531
|
+
details: {
|
|
1532
|
+
actual: actual.map((c) => c.name),
|
|
1533
|
+
expected: expected.name
|
|
1534
|
+
},
|
|
1500
1535
|
metrics
|
|
1501
1536
|
};
|
|
1502
1537
|
}
|
|
@@ -1513,6 +1548,10 @@ function validateToolCalls(response, expectation) {
|
|
|
1513
1548
|
return {
|
|
1514
1549
|
pass: false,
|
|
1515
1550
|
message: `Expected tool '${expected.name}'${argsNote} to be called, but it was not`,
|
|
1551
|
+
details: {
|
|
1552
|
+
actual: actual.map((c) => c.name),
|
|
1553
|
+
expected: expected.name
|
|
1554
|
+
},
|
|
1516
1555
|
metrics
|
|
1517
1556
|
};
|
|
1518
1557
|
}
|
|
@@ -1525,6 +1564,10 @@ function validateToolCalls(response, expectation) {
|
|
|
1525
1564
|
return {
|
|
1526
1565
|
pass: false,
|
|
1527
1566
|
message: `Unexpected tool calls: ${names}. Only ${[...allowedNames].map((n) => `'${n}'`).join(", ")} are allowed`,
|
|
1567
|
+
details: {
|
|
1568
|
+
actual: actual.map((c) => c.name),
|
|
1569
|
+
unexpected: unexpected.map((c) => c.name)
|
|
1570
|
+
},
|
|
1528
1571
|
metrics
|
|
1529
1572
|
};
|
|
1530
1573
|
}
|
|
@@ -1543,19 +1586,22 @@ function validateToolCallCount(response, options) {
|
|
|
1543
1586
|
if (exact !== void 0 && count !== exact) {
|
|
1544
1587
|
return {
|
|
1545
1588
|
pass: false,
|
|
1546
|
-
message: `Expected exactly ${exact} tool call(s), but got ${count}
|
|
1589
|
+
message: `Expected exactly ${exact} tool call(s), but got ${count}`,
|
|
1590
|
+
details: { actual: count, expected: exact }
|
|
1547
1591
|
};
|
|
1548
1592
|
}
|
|
1549
1593
|
if (min !== void 0 && count < min) {
|
|
1550
1594
|
return {
|
|
1551
1595
|
pass: false,
|
|
1552
|
-
message: `Expected at least ${min} tool call(s), but got ${count}
|
|
1596
|
+
message: `Expected at least ${min} tool call(s), but got ${count}`,
|
|
1597
|
+
details: { actual: count, min }
|
|
1553
1598
|
};
|
|
1554
1599
|
}
|
|
1555
1600
|
if (max !== void 0 && count > max) {
|
|
1556
1601
|
return {
|
|
1557
1602
|
pass: false,
|
|
1558
|
-
message: `Expected at most ${max} tool call(s), but got ${count}
|
|
1603
|
+
message: `Expected at most ${max} tool call(s), but got ${count}`,
|
|
1604
|
+
details: { actual: count, max }
|
|
1559
1605
|
};
|
|
1560
1606
|
}
|
|
1561
1607
|
return {
|
|
@@ -1694,7 +1740,7 @@ var debugHttp = createDebug(`${NAMESPACE}:http`);
|
|
|
1694
1740
|
|
|
1695
1741
|
// package.json
|
|
1696
1742
|
var package_default = {
|
|
1697
|
-
version: "1.0.0
|
|
1743
|
+
version: "1.0.0"};
|
|
1698
1744
|
var debug = createDebug("mcp-server-tester:oauth-flow");
|
|
1699
1745
|
async function generatePKCE() {
|
|
1700
1746
|
const codeVerifier = oauth.generateRandomCodeVerifier();
|
|
@@ -2075,6 +2121,17 @@ async function createMCPClientForConfig(config, options) {
|
|
|
2075
2121
|
}
|
|
2076
2122
|
async function closeMCPClient(client) {
|
|
2077
2123
|
try {
|
|
2124
|
+
const transport = client.transport;
|
|
2125
|
+
if (transport instanceof StreamableHTTPClientTransport) {
|
|
2126
|
+
try {
|
|
2127
|
+
await transport.terminateSession();
|
|
2128
|
+
} catch (sessionError) {
|
|
2129
|
+
debugClient(
|
|
2130
|
+
"Error terminating session: %s",
|
|
2131
|
+
sessionError instanceof Error ? sessionError.message : String(sessionError)
|
|
2132
|
+
);
|
|
2133
|
+
}
|
|
2134
|
+
}
|
|
2078
2135
|
await client.close();
|
|
2079
2136
|
} catch (error) {
|
|
2080
2137
|
debugClient(
|