@yawlabs/mcp-compliance 0.11.0 → 0.12.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/{chunk-DGGPE3ZM.js → chunk-M67VVIRO.js} +36 -8
- package/dist/index.js +42 -9
- package/dist/mcp/server.js +1 -1
- package/dist/runner.d.ts +20 -0
- package/dist/runner.js +1 -1
- package/package.json +2 -1
|
@@ -600,7 +600,8 @@ var TEST_DEFINITIONS = [
|
|
|
600
600
|
required: true,
|
|
601
601
|
specRef: "basic/utilities#ping",
|
|
602
602
|
description: "Tests that the server responds to the ping method with an empty result object. This is a required utility method.",
|
|
603
|
-
recommendation: 'Implement a "ping" method handler that returns an empty result object {}. This is required by the MCP spec for keepalive and connectivity checking.'
|
|
603
|
+
recommendation: 'Implement a "ping" method handler that returns an empty result object {}. This is required by the MCP spec for keepalive and connectivity checking.',
|
|
604
|
+
parallelSafe: true
|
|
604
605
|
},
|
|
605
606
|
{
|
|
606
607
|
id: "lifecycle-instructions",
|
|
@@ -609,7 +610,8 @@ var TEST_DEFINITIONS = [
|
|
|
609
610
|
required: false,
|
|
610
611
|
specRef: "basic/lifecycle#initialization",
|
|
611
612
|
description: "If the server includes an instructions field in the initialize response, validates it is a string. Instructions provide guidance for how the client should interact with the server.",
|
|
612
|
-
recommendation: "If you include an instructions field in the initialize response, ensure it is a string. Remove the field or fix the type if it is not a string."
|
|
613
|
+
recommendation: "If you include an instructions field in the initialize response, ensure it is a string. Remove the field or fix the type if it is not a string.",
|
|
614
|
+
parallelSafe: true
|
|
613
615
|
},
|
|
614
616
|
{
|
|
615
617
|
id: "lifecycle-id-match",
|
|
@@ -708,7 +710,8 @@ var TEST_DEFINITIONS = [
|
|
|
708
710
|
required: false,
|
|
709
711
|
specRef: "client/sampling",
|
|
710
712
|
description: "If the server's initialize response or serverInfo implies it uses client-side sampling (sampling/createMessage), verify the capability declaration shape. Currently this is an advisory shape check \u2014 actually exercising the server\u2192client flow requires a client-side sampling handler and is out of scope.",
|
|
711
|
-
recommendation: "Sampling is a client capability (the client provides LLM access to the server). Servers don't declare sampling in their own capabilities; they just call sampling/createMessage against clients that advertise it. No server-side action required."
|
|
713
|
+
recommendation: "Sampling is a client capability (the client provides LLM access to the server). Servers don't declare sampling in their own capabilities; they just call sampling/createMessage against clients that advertise it. No server-side action required.",
|
|
714
|
+
parallelSafe: true
|
|
712
715
|
},
|
|
713
716
|
{
|
|
714
717
|
id: "lifecycle-roots-capability",
|
|
@@ -717,7 +720,8 @@ var TEST_DEFINITIONS = [
|
|
|
717
720
|
required: false,
|
|
718
721
|
specRef: "client/roots",
|
|
719
722
|
description: "Roots (filesystem root paths) is a client capability. This test verifies that if a server sends roots/list requests, it handles gracefully when the client doesn't declare the roots capability (i.e., doesn't crash).",
|
|
720
|
-
recommendation: "Before calling roots/list, check if the initialized client capabilities include 'roots'. If not, skip the call \u2014 the client can't respond. Never assume roots is available; it's opt-in on the client side."
|
|
723
|
+
recommendation: "Before calling roots/list, check if the initialized client capabilities include 'roots'. If not, skip the call \u2014 the client can't respond. Never assume roots is available; it's opt-in on the client side.",
|
|
724
|
+
parallelSafe: true
|
|
721
725
|
},
|
|
722
726
|
{
|
|
723
727
|
id: "lifecycle-elicitation-capability",
|
|
@@ -726,7 +730,8 @@ var TEST_DEFINITIONS = [
|
|
|
726
730
|
required: false,
|
|
727
731
|
specRef: "client/elicitation",
|
|
728
732
|
description: "Elicitation (asking the user for structured input mid-operation) is a client capability added in 2025-11-25. This test verifies servers that use elicitation/create handle the case where clients don't support it.",
|
|
729
|
-
recommendation: "Before calling elicitation/create, check the initialized client capabilities. If elicitation is absent, fall back to a safer default (ask once up-front via tool parameters, or fail cleanly with a clear error)."
|
|
733
|
+
recommendation: "Before calling elicitation/create, check the initialized client capabilities. If elicitation is absent, fall back to a safer default (ask once up-front via tool parameters, or fail cleanly with a clear error).",
|
|
734
|
+
parallelSafe: true
|
|
730
735
|
},
|
|
731
736
|
{
|
|
732
737
|
id: "lifecycle-meta-tolerance",
|
|
@@ -735,7 +740,8 @@ var TEST_DEFINITIONS = [
|
|
|
735
740
|
required: false,
|
|
736
741
|
specRef: "basic/utilities#_meta",
|
|
737
742
|
description: "Sends a ping with params._meta = { extra: 'value' } and verifies the server doesn't error. The 2025-11-25 spec allows arbitrary _meta on any request; servers should ignore unknown _meta fields gracefully.",
|
|
738
|
-
recommendation: "Treat the _meta field as opaque \u2014 pass it through your request validator, but do not reject requests for unknown _meta keys. The MCP spec reserves _meta for protocol/transport metadata and forward-compat extensibility."
|
|
743
|
+
recommendation: "Treat the _meta field as opaque \u2014 pass it through your request validator, but do not reject requests for unknown _meta keys. The MCP spec reserves _meta for protocol/transport metadata and forward-compat extensibility.",
|
|
744
|
+
parallelSafe: true
|
|
739
745
|
},
|
|
740
746
|
// ── Tools (4 tests) ──────────────────────────────────────────────
|
|
741
747
|
{
|
|
@@ -1434,8 +1440,14 @@ async function runComplianceSuite(target, options = {}) {
|
|
|
1434
1440
|
let resourceNames = [];
|
|
1435
1441
|
let promptCount = 0;
|
|
1436
1442
|
let promptNames = [];
|
|
1437
|
-
|
|
1438
|
-
|
|
1443
|
+
const concurrency = Math.max(1, options.concurrency ?? 1);
|
|
1444
|
+
const inFlight = /* @__PURE__ */ new Set();
|
|
1445
|
+
async function drainPool() {
|
|
1446
|
+
while (inFlight.size > 0) {
|
|
1447
|
+
await Promise.race(inFlight);
|
|
1448
|
+
}
|
|
1449
|
+
}
|
|
1450
|
+
async function runTestFn(id, name, category, required, specRef, fn) {
|
|
1439
1451
|
const start = Date.now();
|
|
1440
1452
|
let lastResult = { passed: false, details: "" };
|
|
1441
1453
|
for (let attempt = 0; attempt <= retries; attempt++) {
|
|
@@ -1463,6 +1475,21 @@ async function runComplianceSuite(target, options = {}) {
|
|
|
1463
1475
|
options.onProgress?.(id, lastResult.passed, lastResult.details);
|
|
1464
1476
|
options.onTestComplete?.(result);
|
|
1465
1477
|
}
|
|
1478
|
+
async function test(id, name, category, required, specRef, fn) {
|
|
1479
|
+
if (!shouldRun2(id, category)) return;
|
|
1480
|
+
const def = TEST_DEFINITIONS_MAP.get(id);
|
|
1481
|
+
const eligible = concurrency > 1 && def?.parallelSafe === true;
|
|
1482
|
+
if (!eligible) {
|
|
1483
|
+
if (inFlight.size > 0) await drainPool();
|
|
1484
|
+
await runTestFn(id, name, category, required, specRef, fn);
|
|
1485
|
+
return;
|
|
1486
|
+
}
|
|
1487
|
+
while (inFlight.size >= concurrency) await Promise.race(inFlight);
|
|
1488
|
+
const p = runTestFn(id, name, category, required, specRef, fn).finally(() => {
|
|
1489
|
+
inFlight.delete(p);
|
|
1490
|
+
});
|
|
1491
|
+
inFlight.add(p);
|
|
1492
|
+
}
|
|
1466
1493
|
await test(
|
|
1467
1494
|
"transport-post",
|
|
1468
1495
|
"HTTP POST accepted",
|
|
@@ -3999,6 +4026,7 @@ async function runComplianceSuite(target, options = {}) {
|
|
|
3999
4026
|
const truncated = warnings.length - MAX_WARNINGS;
|
|
4000
4027
|
warnings.splice(MAX_WARNINGS, truncated, `... and ${truncated} more warning(s) suppressed`);
|
|
4001
4028
|
}
|
|
4029
|
+
if (inFlight.size > 0) await drainPool();
|
|
4002
4030
|
const { score, grade, overall, summary, categories } = computeScore(tests);
|
|
4003
4031
|
const badge = generateBadge(displayUrl);
|
|
4004
4032
|
return {
|
package/dist/index.js
CHANGED
|
@@ -946,7 +946,8 @@ var TEST_DEFINITIONS = [
|
|
|
946
946
|
required: true,
|
|
947
947
|
specRef: "basic/utilities#ping",
|
|
948
948
|
description: "Tests that the server responds to the ping method with an empty result object. This is a required utility method.",
|
|
949
|
-
recommendation: 'Implement a "ping" method handler that returns an empty result object {}. This is required by the MCP spec for keepalive and connectivity checking.'
|
|
949
|
+
recommendation: 'Implement a "ping" method handler that returns an empty result object {}. This is required by the MCP spec for keepalive and connectivity checking.',
|
|
950
|
+
parallelSafe: true
|
|
950
951
|
},
|
|
951
952
|
{
|
|
952
953
|
id: "lifecycle-instructions",
|
|
@@ -955,7 +956,8 @@ var TEST_DEFINITIONS = [
|
|
|
955
956
|
required: false,
|
|
956
957
|
specRef: "basic/lifecycle#initialization",
|
|
957
958
|
description: "If the server includes an instructions field in the initialize response, validates it is a string. Instructions provide guidance for how the client should interact with the server.",
|
|
958
|
-
recommendation: "If you include an instructions field in the initialize response, ensure it is a string. Remove the field or fix the type if it is not a string."
|
|
959
|
+
recommendation: "If you include an instructions field in the initialize response, ensure it is a string. Remove the field or fix the type if it is not a string.",
|
|
960
|
+
parallelSafe: true
|
|
959
961
|
},
|
|
960
962
|
{
|
|
961
963
|
id: "lifecycle-id-match",
|
|
@@ -1054,7 +1056,8 @@ var TEST_DEFINITIONS = [
|
|
|
1054
1056
|
required: false,
|
|
1055
1057
|
specRef: "client/sampling",
|
|
1056
1058
|
description: "If the server's initialize response or serverInfo implies it uses client-side sampling (sampling/createMessage), verify the capability declaration shape. Currently this is an advisory shape check \u2014 actually exercising the server\u2192client flow requires a client-side sampling handler and is out of scope.",
|
|
1057
|
-
recommendation: "Sampling is a client capability (the client provides LLM access to the server). Servers don't declare sampling in their own capabilities; they just call sampling/createMessage against clients that advertise it. No server-side action required."
|
|
1059
|
+
recommendation: "Sampling is a client capability (the client provides LLM access to the server). Servers don't declare sampling in their own capabilities; they just call sampling/createMessage against clients that advertise it. No server-side action required.",
|
|
1060
|
+
parallelSafe: true
|
|
1058
1061
|
},
|
|
1059
1062
|
{
|
|
1060
1063
|
id: "lifecycle-roots-capability",
|
|
@@ -1063,7 +1066,8 @@ var TEST_DEFINITIONS = [
|
|
|
1063
1066
|
required: false,
|
|
1064
1067
|
specRef: "client/roots",
|
|
1065
1068
|
description: "Roots (filesystem root paths) is a client capability. This test verifies that if a server sends roots/list requests, it handles gracefully when the client doesn't declare the roots capability (i.e., doesn't crash).",
|
|
1066
|
-
recommendation: "Before calling roots/list, check if the initialized client capabilities include 'roots'. If not, skip the call \u2014 the client can't respond. Never assume roots is available; it's opt-in on the client side."
|
|
1069
|
+
recommendation: "Before calling roots/list, check if the initialized client capabilities include 'roots'. If not, skip the call \u2014 the client can't respond. Never assume roots is available; it's opt-in on the client side.",
|
|
1070
|
+
parallelSafe: true
|
|
1067
1071
|
},
|
|
1068
1072
|
{
|
|
1069
1073
|
id: "lifecycle-elicitation-capability",
|
|
@@ -1072,7 +1076,8 @@ var TEST_DEFINITIONS = [
|
|
|
1072
1076
|
required: false,
|
|
1073
1077
|
specRef: "client/elicitation",
|
|
1074
1078
|
description: "Elicitation (asking the user for structured input mid-operation) is a client capability added in 2025-11-25. This test verifies servers that use elicitation/create handle the case where clients don't support it.",
|
|
1075
|
-
recommendation: "Before calling elicitation/create, check the initialized client capabilities. If elicitation is absent, fall back to a safer default (ask once up-front via tool parameters, or fail cleanly with a clear error)."
|
|
1079
|
+
recommendation: "Before calling elicitation/create, check the initialized client capabilities. If elicitation is absent, fall back to a safer default (ask once up-front via tool parameters, or fail cleanly with a clear error).",
|
|
1080
|
+
parallelSafe: true
|
|
1076
1081
|
},
|
|
1077
1082
|
{
|
|
1078
1083
|
id: "lifecycle-meta-tolerance",
|
|
@@ -1081,7 +1086,8 @@ var TEST_DEFINITIONS = [
|
|
|
1081
1086
|
required: false,
|
|
1082
1087
|
specRef: "basic/utilities#_meta",
|
|
1083
1088
|
description: "Sends a ping with params._meta = { extra: 'value' } and verifies the server doesn't error. The 2025-11-25 spec allows arbitrary _meta on any request; servers should ignore unknown _meta fields gracefully.",
|
|
1084
|
-
recommendation: "Treat the _meta field as opaque \u2014 pass it through your request validator, but do not reject requests for unknown _meta keys. The MCP spec reserves _meta for protocol/transport metadata and forward-compat extensibility."
|
|
1089
|
+
recommendation: "Treat the _meta field as opaque \u2014 pass it through your request validator, but do not reject requests for unknown _meta keys. The MCP spec reserves _meta for protocol/transport metadata and forward-compat extensibility.",
|
|
1090
|
+
parallelSafe: true
|
|
1085
1091
|
},
|
|
1086
1092
|
// ── Tools (4 tests) ──────────────────────────────────────────────
|
|
1087
1093
|
{
|
|
@@ -1780,8 +1786,14 @@ async function runComplianceSuite(target, options = {}) {
|
|
|
1780
1786
|
let resourceNames = [];
|
|
1781
1787
|
let promptCount = 0;
|
|
1782
1788
|
let promptNames = [];
|
|
1783
|
-
|
|
1784
|
-
|
|
1789
|
+
const concurrency = Math.max(1, options.concurrency ?? 1);
|
|
1790
|
+
const inFlight = /* @__PURE__ */ new Set();
|
|
1791
|
+
async function drainPool() {
|
|
1792
|
+
while (inFlight.size > 0) {
|
|
1793
|
+
await Promise.race(inFlight);
|
|
1794
|
+
}
|
|
1795
|
+
}
|
|
1796
|
+
async function runTestFn(id, name, category, required, specRef, fn) {
|
|
1785
1797
|
const start = Date.now();
|
|
1786
1798
|
let lastResult = { passed: false, details: "" };
|
|
1787
1799
|
for (let attempt = 0; attempt <= retries; attempt++) {
|
|
@@ -1809,6 +1821,21 @@ async function runComplianceSuite(target, options = {}) {
|
|
|
1809
1821
|
options.onProgress?.(id, lastResult.passed, lastResult.details);
|
|
1810
1822
|
options.onTestComplete?.(result);
|
|
1811
1823
|
}
|
|
1824
|
+
async function test(id, name, category, required, specRef, fn) {
|
|
1825
|
+
if (!shouldRun2(id, category)) return;
|
|
1826
|
+
const def = TEST_DEFINITIONS_MAP.get(id);
|
|
1827
|
+
const eligible = concurrency > 1 && def?.parallelSafe === true;
|
|
1828
|
+
if (!eligible) {
|
|
1829
|
+
if (inFlight.size > 0) await drainPool();
|
|
1830
|
+
await runTestFn(id, name, category, required, specRef, fn);
|
|
1831
|
+
return;
|
|
1832
|
+
}
|
|
1833
|
+
while (inFlight.size >= concurrency) await Promise.race(inFlight);
|
|
1834
|
+
const p = runTestFn(id, name, category, required, specRef, fn).finally(() => {
|
|
1835
|
+
inFlight.delete(p);
|
|
1836
|
+
});
|
|
1837
|
+
inFlight.add(p);
|
|
1838
|
+
}
|
|
1812
1839
|
await test(
|
|
1813
1840
|
"transport-post",
|
|
1814
1841
|
"HTTP POST accepted",
|
|
@@ -4345,6 +4372,7 @@ async function runComplianceSuite(target, options = {}) {
|
|
|
4345
4372
|
const truncated = warnings.length - MAX_WARNINGS;
|
|
4346
4373
|
warnings.splice(MAX_WARNINGS, truncated, `... and ${truncated} more warning(s) suppressed`);
|
|
4347
4374
|
}
|
|
4375
|
+
if (inFlight.size > 0) await drainPool();
|
|
4348
4376
|
const { score, grade, overall, summary, categories } = computeScore(tests);
|
|
4349
4377
|
const badge = generateBadge(displayUrl);
|
|
4350
4378
|
return {
|
|
@@ -5275,7 +5303,11 @@ program.command("test").description("Run the full compliance test suite against
|
|
|
5275
5303
|
"--timeout <ms>",
|
|
5276
5304
|
"Request timeout in milliseconds (bump to 30000+ for stdio servers with slow startup)",
|
|
5277
5305
|
"15000"
|
|
5278
|
-
).option("--no-color", "Disable colored output (also honors NO_COLOR env var)").option("--watch", "Re-run tests when files in the cwd change (stdio targets only)").option(
|
|
5306
|
+
).option("--no-color", "Disable colored output (also honors NO_COLOR env var)").option("--watch", "Re-run tests when files in the cwd change (stdio targets only)").option(
|
|
5307
|
+
"--concurrency <n>",
|
|
5308
|
+
"Max parallel-safe tests in flight (default 1; see docs/PERFORMANCE.md before raising)",
|
|
5309
|
+
"1"
|
|
5310
|
+
).option("--preflight-timeout <ms>", "Preflight connectivity check timeout in milliseconds").option("--retries <n>", "Number of retries for failed tests", "0").option(
|
|
5279
5311
|
"--only <items>",
|
|
5280
5312
|
'Only run matching categories or test IDs, comma-separated (e.g., "transport,lifecycle" or "transport-post,lifecycle-init")',
|
|
5281
5313
|
parseList
|
|
@@ -5334,6 +5366,7 @@ Testing ${describeTarget(transportTarget)}...
|
|
|
5334
5366
|
timeout: parsePositiveInt(opts.timeout, "--timeout", 1),
|
|
5335
5367
|
preflightTimeout: opts.preflightTimeout ? parsePositiveInt(opts.preflightTimeout, "--preflight-timeout", 1) : config?.preflightTimeout,
|
|
5336
5368
|
retries: parsePositiveInt(opts.retries, "--retries"),
|
|
5369
|
+
concurrency: parsePositiveInt(opts.concurrency, "--concurrency", 1),
|
|
5337
5370
|
only,
|
|
5338
5371
|
skip,
|
|
5339
5372
|
onProgress: verbose ? (testId, passed, details) => {
|
package/dist/mcp/server.js
CHANGED
package/dist/runner.d.ts
CHANGED
|
@@ -63,6 +63,18 @@ interface TestDefinition {
|
|
|
63
63
|
recommendation: string;
|
|
64
64
|
/** Transports this test applies to. Omit = all transports. */
|
|
65
65
|
transports?: ("http" | "stdio")[];
|
|
66
|
+
/**
|
|
67
|
+
* Declares this test safe to run concurrently with other parallel-safe
|
|
68
|
+
* tests. Default = false (serialized with other tests in the runner
|
|
69
|
+
* loop). Tests are parallel-safe when they:
|
|
70
|
+
* - don't mutate shared closure state (sessionId, cachedToolsList, …)
|
|
71
|
+
* - don't depend on the result of another concurrently-running test
|
|
72
|
+
* - tolerate the server seeing >1 in-flight request at a time
|
|
73
|
+
*
|
|
74
|
+
* Setup tests (init, notifications/initialized) and tests that
|
|
75
|
+
* populate caches (tools/list, resources/list) must stay `false`.
|
|
76
|
+
*/
|
|
77
|
+
parallelSafe?: boolean;
|
|
66
78
|
}
|
|
67
79
|
/** Describes the server under test. URL string = HTTP for backwards compat. */
|
|
68
80
|
type TransportTarget = {
|
|
@@ -168,6 +180,14 @@ interface RunOptions {
|
|
|
168
180
|
skip?: string[];
|
|
169
181
|
/** Preflight connectivity check timeout in milliseconds (default: min(timeout, 10000)) */
|
|
170
182
|
preflightTimeout?: number;
|
|
183
|
+
/**
|
|
184
|
+
* Maximum number of parallel-safe tests in flight at once. Default 1
|
|
185
|
+
* (strictly sequential — matches pre-0.12 behavior). Tests are only
|
|
186
|
+
* eligible for parallel execution when their `TestDefinition.parallelSafe`
|
|
187
|
+
* is true; everything else stays sequential regardless. See
|
|
188
|
+
* docs/PERFORMANCE.md for the design.
|
|
189
|
+
*/
|
|
190
|
+
concurrency?: number;
|
|
171
191
|
}
|
|
172
192
|
/**
|
|
173
193
|
* Run the full MCP compliance test suite. Accepts either a URL string
|
package/dist/runner.js
CHANGED
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@yawlabs/mcp-compliance",
|
|
3
|
-
"version": "0.
|
|
3
|
+
"version": "0.12.0",
|
|
4
4
|
"description": "CLI tool and MCP server that tests MCP servers for spec compliance",
|
|
5
5
|
"license": "MIT",
|
|
6
6
|
"author": "Yaw Labs <contact@yaw.sh> (https://yaw.sh)",
|
|
@@ -51,6 +51,7 @@
|
|
|
51
51
|
"ajv": "^8.18.0",
|
|
52
52
|
"ajv-formats": "^3.0.1",
|
|
53
53
|
"tsup": "^8.4.0",
|
|
54
|
+
"tsx": "^4.21.0",
|
|
54
55
|
"typescript": "^5.8.3",
|
|
55
56
|
"vitest": "^3.1.1"
|
|
56
57
|
},
|