@yawlabs/mcp-compliance 0.10.1 → 0.12.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +11 -3
- package/dist/{chunk-7KISK3FS.js → chunk-M67VVIRO.js} +107 -6
- package/dist/index.js +112 -7
- package/dist/mcp/server.js +1 -1
- package/dist/runner.d.ts +31 -1
- package/dist/runner.js +5 -3
- package/package.json +2 -1
package/README.md
CHANGED
|
@@ -5,7 +5,7 @@
|
|
|
5
5
|
[](https://github.com/YawLabs/mcp-compliance/stargazers)
|
|
6
6
|
[](https://github.com/YawLabs/mcp-compliance/actions/workflows/ci.yml)
|
|
7
7
|
|
|
8
|
-
**Test any MCP server for spec compliance.**
|
|
8
|
+
**Test any MCP server for spec compliance.** 88-test suite covering transport, lifecycle, tools, resources, prompts, error handling, schema validation, and security against the [MCP specification](https://modelcontextprotocol.io/specification/2025-11-25). Works against **HTTP endpoints** (`https://my-server.com/mcp`) and **stdio servers** (`npx @modelcontextprotocol/server-filesystem /tmp`) alike. CLI, MCP server, and programmatic API.
|
|
9
9
|
|
|
10
10
|
Built and maintained by [Yaw Labs](https://yaw.sh).
|
|
11
11
|
|
|
@@ -479,7 +479,7 @@ Restart your MCP client and approve the server when prompted.
|
|
|
479
479
|
|
|
480
480
|
### Tools
|
|
481
481
|
|
|
482
|
-
- **mcp_compliance_test** — Run the full
|
|
482
|
+
- **mcp_compliance_test** — Run the full 88-test suite against a URL or stdio command. Supports auth, custom headers, env vars, timeout, retries, and category/test filtering. Returns grade, score, and detailed results.
|
|
483
483
|
- **mcp_compliance_badge** — Get the badge markdown/HTML for a server. Supports auth and custom headers.
|
|
484
484
|
- **mcp_compliance_explain** — Explain what a specific test ID checks and why it matters.
|
|
485
485
|
|
|
@@ -540,8 +540,16 @@ Consumer guidance:
|
|
|
540
540
|
|
|
541
541
|
The compliance testing methodology is published as an open specification:
|
|
542
542
|
|
|
543
|
-
- **[MCP Compliance Testing Specification](./MCP_COMPLIANCE_SPEC.md)** — test execution model, scoring algorithm, all
|
|
543
|
+
- **[MCP Compliance Testing Specification](./MCP_COMPLIANCE_SPEC.md)** — test execution model, scoring algorithm, all 88 test rules with pass/fail criteria (CC BY 4.0)
|
|
544
544
|
- **[Machine-readable rule catalog](./mcp-compliance-rules.json)** — JSON Schema-compliant catalog for programmatic consumption
|
|
545
|
+
- **[Why `mcp-compliance`](./docs/WHY.md)** — the problem, existing alternatives, what this tool does differently
|
|
546
|
+
- **[Fixing common failures](./docs/FIXES.md)** — recipes for the most frequent test failures with code snippets
|
|
547
|
+
- **[Spec version migration policy](./docs/SPEC_VERSION_MIGRATION.md)** — how this tool evolves with MCP spec releases
|
|
548
|
+
- **[mcp.hosting external API](./docs/EXT_API.md)** — public submit/retrieve/badge/delete endpoints used by `mcp-compliance badge` and any custom integrations
|
|
549
|
+
- **[Enterprise tier (draft)](./docs/ENTERPRISE.md)** — paid tier structure for organizations with scheduled/private/audit-track compliance needs
|
|
550
|
+
- **[Performance deep-dive](./docs/PERFORMANCE.md)** — why the suite is sequential and what parallel execution would cost
|
|
551
|
+
- **[Spec PR drafts](./docs/spec-prs/)** — our proposed MCP spec clarifications for ambiguous cases we've hit
|
|
552
|
+
- **[mcp.hosting integration spec](./docs/mcp-hosting-integration.md)** — the contract between this engine and the mcp.hosting platform: URL surfaces, data flow, storage model, badge API, leaderboard, router integration
|
|
545
553
|
|
|
546
554
|
These are complementary to (not competing with) the [official MCP specification](https://modelcontextprotocol.io/specification/2025-11-25). The MCP spec defines what servers must do; this spec defines how to verify compliance.
|
|
547
555
|
|
|
@@ -600,7 +600,8 @@ var TEST_DEFINITIONS = [
|
|
|
600
600
|
required: true,
|
|
601
601
|
specRef: "basic/utilities#ping",
|
|
602
602
|
description: "Tests that the server responds to the ping method with an empty result object. This is a required utility method.",
|
|
603
|
-
recommendation: 'Implement a "ping" method handler that returns an empty result object {}. This is required by the MCP spec for keepalive and connectivity checking.'
|
|
603
|
+
recommendation: 'Implement a "ping" method handler that returns an empty result object {}. This is required by the MCP spec for keepalive and connectivity checking.',
|
|
604
|
+
parallelSafe: true
|
|
604
605
|
},
|
|
605
606
|
{
|
|
606
607
|
id: "lifecycle-instructions",
|
|
@@ -609,7 +610,8 @@ var TEST_DEFINITIONS = [
|
|
|
609
610
|
required: false,
|
|
610
611
|
specRef: "basic/lifecycle#initialization",
|
|
611
612
|
description: "If the server includes an instructions field in the initialize response, validates it is a string. Instructions provide guidance for how the client should interact with the server.",
|
|
612
|
-
recommendation: "If you include an instructions field in the initialize response, ensure it is a string. Remove the field or fix the type if it is not a string."
|
|
613
|
+
recommendation: "If you include an instructions field in the initialize response, ensure it is a string. Remove the field or fix the type if it is not a string.",
|
|
614
|
+
parallelSafe: true
|
|
613
615
|
},
|
|
614
616
|
{
|
|
615
617
|
id: "lifecycle-id-match",
|
|
@@ -701,6 +703,36 @@ var TEST_DEFINITIONS = [
|
|
|
701
703
|
description: "Sends a tools/call request with _meta.progressToken and checks if the server sends progress notifications via SSE. Progress support is optional but recommended for long-running operations.",
|
|
702
704
|
recommendation: "When a request includes _meta.progressToken, send notifications/progress events via SSE to report progress. Include progressToken, progress (current), and optionally total fields."
|
|
703
705
|
},
|
|
706
|
+
{
|
|
707
|
+
id: "lifecycle-sampling-capability",
|
|
708
|
+
name: "Sampling capability shape",
|
|
709
|
+
category: "lifecycle",
|
|
710
|
+
required: false,
|
|
711
|
+
specRef: "client/sampling",
|
|
712
|
+
description: "If the server's initialize response or serverInfo implies it uses client-side sampling (sampling/createMessage), verify the capability declaration shape. Currently this is an advisory shape check \u2014 actually exercising the server\u2192client flow requires a client-side sampling handler and is out of scope.",
|
|
713
|
+
recommendation: "Sampling is a client capability (the client provides LLM access to the server). Servers don't declare sampling in their own capabilities; they just call sampling/createMessage against clients that advertise it. No server-side action required.",
|
|
714
|
+
parallelSafe: true
|
|
715
|
+
},
|
|
716
|
+
{
|
|
717
|
+
id: "lifecycle-roots-capability",
|
|
718
|
+
name: "Roots capability shape",
|
|
719
|
+
category: "lifecycle",
|
|
720
|
+
required: false,
|
|
721
|
+
specRef: "client/roots",
|
|
722
|
+
description: "Roots (filesystem root paths) is a client capability. This test verifies that if a server sends roots/list requests, it handles gracefully when the client doesn't declare the roots capability (i.e., doesn't crash).",
|
|
723
|
+
recommendation: "Before calling roots/list, check if the initialized client capabilities include 'roots'. If not, skip the call \u2014 the client can't respond. Never assume roots is available; it's opt-in on the client side.",
|
|
724
|
+
parallelSafe: true
|
|
725
|
+
},
|
|
726
|
+
{
|
|
727
|
+
id: "lifecycle-elicitation-capability",
|
|
728
|
+
name: "Elicitation capability shape",
|
|
729
|
+
category: "lifecycle",
|
|
730
|
+
required: false,
|
|
731
|
+
specRef: "client/elicitation",
|
|
732
|
+
description: "Elicitation (asking the user for structured input mid-operation) is a client capability added in 2025-11-25. This test verifies servers that use elicitation/create handle the case where clients don't support it.",
|
|
733
|
+
recommendation: "Before calling elicitation/create, check the initialized client capabilities. If elicitation is absent, fall back to a safer default (ask once up-front via tool parameters, or fail cleanly with a clear error).",
|
|
734
|
+
parallelSafe: true
|
|
735
|
+
},
|
|
704
736
|
{
|
|
705
737
|
id: "lifecycle-meta-tolerance",
|
|
706
738
|
name: "Tolerates _meta field on requests",
|
|
@@ -708,7 +740,8 @@ var TEST_DEFINITIONS = [
|
|
|
708
740
|
required: false,
|
|
709
741
|
specRef: "basic/utilities#_meta",
|
|
710
742
|
description: "Sends a ping with params._meta = { extra: 'value' } and verifies the server doesn't error. The 2025-11-25 spec allows arbitrary _meta on any request; servers should ignore unknown _meta fields gracefully.",
|
|
711
|
-
recommendation: "Treat the _meta field as opaque \u2014 pass it through your request validator, but do not reject requests for unknown _meta keys. The MCP spec reserves _meta for protocol/transport metadata and forward-compat extensibility."
|
|
743
|
+
recommendation: "Treat the _meta field as opaque \u2014 pass it through your request validator, but do not reject requests for unknown _meta keys. The MCP spec reserves _meta for protocol/transport metadata and forward-compat extensibility.",
|
|
744
|
+
parallelSafe: true
|
|
712
745
|
},
|
|
713
746
|
// ── Tools (4 tests) ──────────────────────────────────────────────
|
|
714
747
|
{
|
|
@@ -1407,8 +1440,14 @@ async function runComplianceSuite(target, options = {}) {
|
|
|
1407
1440
|
let resourceNames = [];
|
|
1408
1441
|
let promptCount = 0;
|
|
1409
1442
|
let promptNames = [];
|
|
1410
|
-
|
|
1411
|
-
|
|
1443
|
+
const concurrency = Math.max(1, options.concurrency ?? 1);
|
|
1444
|
+
const inFlight = /* @__PURE__ */ new Set();
|
|
1445
|
+
async function drainPool() {
|
|
1446
|
+
while (inFlight.size > 0) {
|
|
1447
|
+
await Promise.race(inFlight);
|
|
1448
|
+
}
|
|
1449
|
+
}
|
|
1450
|
+
async function runTestFn(id, name, category, required, specRef, fn) {
|
|
1412
1451
|
const start = Date.now();
|
|
1413
1452
|
let lastResult = { passed: false, details: "" };
|
|
1414
1453
|
for (let attempt = 0; attempt <= retries; attempt++) {
|
|
@@ -1436,6 +1475,21 @@ async function runComplianceSuite(target, options = {}) {
|
|
|
1436
1475
|
options.onProgress?.(id, lastResult.passed, lastResult.details);
|
|
1437
1476
|
options.onTestComplete?.(result);
|
|
1438
1477
|
}
|
|
1478
|
+
async function test(id, name, category, required, specRef, fn) {
|
|
1479
|
+
if (!shouldRun2(id, category)) return;
|
|
1480
|
+
const def = TEST_DEFINITIONS_MAP.get(id);
|
|
1481
|
+
const eligible = concurrency > 1 && def?.parallelSafe === true;
|
|
1482
|
+
if (!eligible) {
|
|
1483
|
+
if (inFlight.size > 0) await drainPool();
|
|
1484
|
+
await runTestFn(id, name, category, required, specRef, fn);
|
|
1485
|
+
return;
|
|
1486
|
+
}
|
|
1487
|
+
while (inFlight.size >= concurrency) await Promise.race(inFlight);
|
|
1488
|
+
const p = runTestFn(id, name, category, required, specRef, fn).finally(() => {
|
|
1489
|
+
inFlight.delete(p);
|
|
1490
|
+
});
|
|
1491
|
+
inFlight.add(p);
|
|
1492
|
+
}
|
|
1439
1493
|
await test(
|
|
1440
1494
|
"transport-post",
|
|
1441
1495
|
"HTTP POST accepted",
|
|
@@ -1604,7 +1658,11 @@ async function runComplianceSuite(target, options = {}) {
|
|
|
1604
1658
|
try {
|
|
1605
1659
|
initRes = await rpc("initialize", {
|
|
1606
1660
|
protocolVersion: SPEC_VERSION,
|
|
1607
|
-
capabilities: {
|
|
1661
|
+
capabilities: {
|
|
1662
|
+
sampling: {},
|
|
1663
|
+
roots: { listChanged: true },
|
|
1664
|
+
elicitation: {}
|
|
1665
|
+
},
|
|
1608
1666
|
clientInfo: { name: "mcp-compliance", version: TOOL_VERSION }
|
|
1609
1667
|
});
|
|
1610
1668
|
const result = initRes?.body?.result;
|
|
@@ -2026,6 +2084,47 @@ async function runComplianceSuite(target, options = {}) {
|
|
|
2026
2084
|
}
|
|
2027
2085
|
}
|
|
2028
2086
|
);
|
|
2087
|
+
await test(
|
|
2088
|
+
"lifecycle-sampling-capability",
|
|
2089
|
+
"Sampling capability shape",
|
|
2090
|
+
"lifecycle",
|
|
2091
|
+
false,
|
|
2092
|
+
"client/sampling",
|
|
2093
|
+
async () => {
|
|
2094
|
+
if (!initRes || initRes.body?.error) {
|
|
2095
|
+
return { passed: false, details: "Server rejected initialize" };
|
|
2096
|
+
}
|
|
2097
|
+
return {
|
|
2098
|
+
passed: true,
|
|
2099
|
+
details: "Server accepted initialize with client sampling capability. Full server\u2192client sampling flow not exercised."
|
|
2100
|
+
};
|
|
2101
|
+
}
|
|
2102
|
+
);
|
|
2103
|
+
await test("lifecycle-roots-capability", "Roots capability shape", "lifecycle", false, "client/roots", async () => {
|
|
2104
|
+
if (!initRes || initRes.body?.error) {
|
|
2105
|
+
return { passed: false, details: "Server rejected initialize" };
|
|
2106
|
+
}
|
|
2107
|
+
return {
|
|
2108
|
+
passed: true,
|
|
2109
|
+
details: "Server accepted initialize. Full server\u2192client roots/list flow not exercised (requires a roots-aware client)."
|
|
2110
|
+
};
|
|
2111
|
+
});
|
|
2112
|
+
await test(
|
|
2113
|
+
"lifecycle-elicitation-capability",
|
|
2114
|
+
"Elicitation capability shape",
|
|
2115
|
+
"lifecycle",
|
|
2116
|
+
false,
|
|
2117
|
+
"client/elicitation",
|
|
2118
|
+
async () => {
|
|
2119
|
+
if (!initRes || initRes.body?.error) {
|
|
2120
|
+
return { passed: false, details: "Server rejected initialize" };
|
|
2121
|
+
}
|
|
2122
|
+
return {
|
|
2123
|
+
passed: true,
|
|
2124
|
+
details: "Server accepted initialize. Full server\u2192client elicitation/create flow not exercised."
|
|
2125
|
+
};
|
|
2126
|
+
}
|
|
2127
|
+
);
|
|
2029
2128
|
await test(
|
|
2030
2129
|
"lifecycle-meta-tolerance",
|
|
2031
2130
|
"Tolerates _meta field on requests",
|
|
@@ -3927,6 +4026,7 @@ async function runComplianceSuite(target, options = {}) {
|
|
|
3927
4026
|
const truncated = warnings.length - MAX_WARNINGS;
|
|
3928
4027
|
warnings.splice(MAX_WARNINGS, truncated, `... and ${truncated} more warning(s) suppressed`);
|
|
3929
4028
|
}
|
|
4029
|
+
if (inFlight.size > 0) await drainPool();
|
|
3930
4030
|
const { score, grade, overall, summary, categories } = computeScore(tests);
|
|
3931
4031
|
const badge = generateBadge(displayUrl);
|
|
3932
4032
|
return {
|
|
@@ -3958,6 +4058,7 @@ async function runComplianceSuite(target, options = {}) {
|
|
|
3958
4058
|
}
|
|
3959
4059
|
|
|
3960
4060
|
export {
|
|
4061
|
+
urlHash,
|
|
3961
4062
|
generateBadge,
|
|
3962
4063
|
computeGrade,
|
|
3963
4064
|
computeScore,
|
package/dist/index.js
CHANGED
|
@@ -946,7 +946,8 @@ var TEST_DEFINITIONS = [
|
|
|
946
946
|
required: true,
|
|
947
947
|
specRef: "basic/utilities#ping",
|
|
948
948
|
description: "Tests that the server responds to the ping method with an empty result object. This is a required utility method.",
|
|
949
|
-
recommendation: 'Implement a "ping" method handler that returns an empty result object {}. This is required by the MCP spec for keepalive and connectivity checking.'
|
|
949
|
+
recommendation: 'Implement a "ping" method handler that returns an empty result object {}. This is required by the MCP spec for keepalive and connectivity checking.',
|
|
950
|
+
parallelSafe: true
|
|
950
951
|
},
|
|
951
952
|
{
|
|
952
953
|
id: "lifecycle-instructions",
|
|
@@ -955,7 +956,8 @@ var TEST_DEFINITIONS = [
|
|
|
955
956
|
required: false,
|
|
956
957
|
specRef: "basic/lifecycle#initialization",
|
|
957
958
|
description: "If the server includes an instructions field in the initialize response, validates it is a string. Instructions provide guidance for how the client should interact with the server.",
|
|
958
|
-
recommendation: "If you include an instructions field in the initialize response, ensure it is a string. Remove the field or fix the type if it is not a string."
|
|
959
|
+
recommendation: "If you include an instructions field in the initialize response, ensure it is a string. Remove the field or fix the type if it is not a string.",
|
|
960
|
+
parallelSafe: true
|
|
959
961
|
},
|
|
960
962
|
{
|
|
961
963
|
id: "lifecycle-id-match",
|
|
@@ -1047,6 +1049,36 @@ var TEST_DEFINITIONS = [
|
|
|
1047
1049
|
description: "Sends a tools/call request with _meta.progressToken and checks if the server sends progress notifications via SSE. Progress support is optional but recommended for long-running operations.",
|
|
1048
1050
|
recommendation: "When a request includes _meta.progressToken, send notifications/progress events via SSE to report progress. Include progressToken, progress (current), and optionally total fields."
|
|
1049
1051
|
},
|
|
1052
|
+
{
|
|
1053
|
+
id: "lifecycle-sampling-capability",
|
|
1054
|
+
name: "Sampling capability shape",
|
|
1055
|
+
category: "lifecycle",
|
|
1056
|
+
required: false,
|
|
1057
|
+
specRef: "client/sampling",
|
|
1058
|
+
description: "If the server's initialize response or serverInfo implies it uses client-side sampling (sampling/createMessage), verify the capability declaration shape. Currently this is an advisory shape check \u2014 actually exercising the server\u2192client flow requires a client-side sampling handler and is out of scope.",
|
|
1059
|
+
recommendation: "Sampling is a client capability (the client provides LLM access to the server). Servers don't declare sampling in their own capabilities; they just call sampling/createMessage against clients that advertise it. No server-side action required.",
|
|
1060
|
+
parallelSafe: true
|
|
1061
|
+
},
|
|
1062
|
+
{
|
|
1063
|
+
id: "lifecycle-roots-capability",
|
|
1064
|
+
name: "Roots capability shape",
|
|
1065
|
+
category: "lifecycle",
|
|
1066
|
+
required: false,
|
|
1067
|
+
specRef: "client/roots",
|
|
1068
|
+
description: "Roots (filesystem root paths) is a client capability. This test verifies that if a server sends roots/list requests, it handles gracefully when the client doesn't declare the roots capability (i.e., doesn't crash).",
|
|
1069
|
+
recommendation: "Before calling roots/list, check if the initialized client capabilities include 'roots'. If not, skip the call \u2014 the client can't respond. Never assume roots is available; it's opt-in on the client side.",
|
|
1070
|
+
parallelSafe: true
|
|
1071
|
+
},
|
|
1072
|
+
{
|
|
1073
|
+
id: "lifecycle-elicitation-capability",
|
|
1074
|
+
name: "Elicitation capability shape",
|
|
1075
|
+
category: "lifecycle",
|
|
1076
|
+
required: false,
|
|
1077
|
+
specRef: "client/elicitation",
|
|
1078
|
+
description: "Elicitation (asking the user for structured input mid-operation) is a client capability added in 2025-11-25. This test verifies servers that use elicitation/create handle the case where clients don't support it.",
|
|
1079
|
+
recommendation: "Before calling elicitation/create, check the initialized client capabilities. If elicitation is absent, fall back to a safer default (ask once up-front via tool parameters, or fail cleanly with a clear error).",
|
|
1080
|
+
parallelSafe: true
|
|
1081
|
+
},
|
|
1050
1082
|
{
|
|
1051
1083
|
id: "lifecycle-meta-tolerance",
|
|
1052
1084
|
name: "Tolerates _meta field on requests",
|
|
@@ -1054,7 +1086,8 @@ var TEST_DEFINITIONS = [
|
|
|
1054
1086
|
required: false,
|
|
1055
1087
|
specRef: "basic/utilities#_meta",
|
|
1056
1088
|
description: "Sends a ping with params._meta = { extra: 'value' } and verifies the server doesn't error. The 2025-11-25 spec allows arbitrary _meta on any request; servers should ignore unknown _meta fields gracefully.",
|
|
1057
|
-
recommendation: "Treat the _meta field as opaque \u2014 pass it through your request validator, but do not reject requests for unknown _meta keys. The MCP spec reserves _meta for protocol/transport metadata and forward-compat extensibility."
|
|
1089
|
+
recommendation: "Treat the _meta field as opaque \u2014 pass it through your request validator, but do not reject requests for unknown _meta keys. The MCP spec reserves _meta for protocol/transport metadata and forward-compat extensibility.",
|
|
1090
|
+
parallelSafe: true
|
|
1058
1091
|
},
|
|
1059
1092
|
// ── Tools (4 tests) ──────────────────────────────────────────────
|
|
1060
1093
|
{
|
|
@@ -1753,8 +1786,14 @@ async function runComplianceSuite(target, options = {}) {
|
|
|
1753
1786
|
let resourceNames = [];
|
|
1754
1787
|
let promptCount = 0;
|
|
1755
1788
|
let promptNames = [];
|
|
1756
|
-
|
|
1757
|
-
|
|
1789
|
+
const concurrency = Math.max(1, options.concurrency ?? 1);
|
|
1790
|
+
const inFlight = /* @__PURE__ */ new Set();
|
|
1791
|
+
async function drainPool() {
|
|
1792
|
+
while (inFlight.size > 0) {
|
|
1793
|
+
await Promise.race(inFlight);
|
|
1794
|
+
}
|
|
1795
|
+
}
|
|
1796
|
+
async function runTestFn(id, name, category, required, specRef, fn) {
|
|
1758
1797
|
const start = Date.now();
|
|
1759
1798
|
let lastResult = { passed: false, details: "" };
|
|
1760
1799
|
for (let attempt = 0; attempt <= retries; attempt++) {
|
|
@@ -1782,6 +1821,21 @@ async function runComplianceSuite(target, options = {}) {
|
|
|
1782
1821
|
options.onProgress?.(id, lastResult.passed, lastResult.details);
|
|
1783
1822
|
options.onTestComplete?.(result);
|
|
1784
1823
|
}
|
|
1824
|
+
async function test(id, name, category, required, specRef, fn) {
|
|
1825
|
+
if (!shouldRun2(id, category)) return;
|
|
1826
|
+
const def = TEST_DEFINITIONS_MAP.get(id);
|
|
1827
|
+
const eligible = concurrency > 1 && def?.parallelSafe === true;
|
|
1828
|
+
if (!eligible) {
|
|
1829
|
+
if (inFlight.size > 0) await drainPool();
|
|
1830
|
+
await runTestFn(id, name, category, required, specRef, fn);
|
|
1831
|
+
return;
|
|
1832
|
+
}
|
|
1833
|
+
while (inFlight.size >= concurrency) await Promise.race(inFlight);
|
|
1834
|
+
const p = runTestFn(id, name, category, required, specRef, fn).finally(() => {
|
|
1835
|
+
inFlight.delete(p);
|
|
1836
|
+
});
|
|
1837
|
+
inFlight.add(p);
|
|
1838
|
+
}
|
|
1785
1839
|
await test(
|
|
1786
1840
|
"transport-post",
|
|
1787
1841
|
"HTTP POST accepted",
|
|
@@ -1950,7 +2004,11 @@ async function runComplianceSuite(target, options = {}) {
|
|
|
1950
2004
|
try {
|
|
1951
2005
|
initRes = await rpc("initialize", {
|
|
1952
2006
|
protocolVersion: SPEC_VERSION,
|
|
1953
|
-
capabilities: {
|
|
2007
|
+
capabilities: {
|
|
2008
|
+
sampling: {},
|
|
2009
|
+
roots: { listChanged: true },
|
|
2010
|
+
elicitation: {}
|
|
2011
|
+
},
|
|
1954
2012
|
clientInfo: { name: "mcp-compliance", version: TOOL_VERSION }
|
|
1955
2013
|
});
|
|
1956
2014
|
const result = initRes?.body?.result;
|
|
@@ -2372,6 +2430,47 @@ async function runComplianceSuite(target, options = {}) {
|
|
|
2372
2430
|
}
|
|
2373
2431
|
}
|
|
2374
2432
|
);
|
|
2433
|
+
await test(
|
|
2434
|
+
"lifecycle-sampling-capability",
|
|
2435
|
+
"Sampling capability shape",
|
|
2436
|
+
"lifecycle",
|
|
2437
|
+
false,
|
|
2438
|
+
"client/sampling",
|
|
2439
|
+
async () => {
|
|
2440
|
+
if (!initRes || initRes.body?.error) {
|
|
2441
|
+
return { passed: false, details: "Server rejected initialize" };
|
|
2442
|
+
}
|
|
2443
|
+
return {
|
|
2444
|
+
passed: true,
|
|
2445
|
+
details: "Server accepted initialize with client sampling capability. Full server\u2192client sampling flow not exercised."
|
|
2446
|
+
};
|
|
2447
|
+
}
|
|
2448
|
+
);
|
|
2449
|
+
await test("lifecycle-roots-capability", "Roots capability shape", "lifecycle", false, "client/roots", async () => {
|
|
2450
|
+
if (!initRes || initRes.body?.error) {
|
|
2451
|
+
return { passed: false, details: "Server rejected initialize" };
|
|
2452
|
+
}
|
|
2453
|
+
return {
|
|
2454
|
+
passed: true,
|
|
2455
|
+
details: "Server accepted initialize. Full server\u2192client roots/list flow not exercised (requires a roots-aware client)."
|
|
2456
|
+
};
|
|
2457
|
+
});
|
|
2458
|
+
await test(
|
|
2459
|
+
"lifecycle-elicitation-capability",
|
|
2460
|
+
"Elicitation capability shape",
|
|
2461
|
+
"lifecycle",
|
|
2462
|
+
false,
|
|
2463
|
+
"client/elicitation",
|
|
2464
|
+
async () => {
|
|
2465
|
+
if (!initRes || initRes.body?.error) {
|
|
2466
|
+
return { passed: false, details: "Server rejected initialize" };
|
|
2467
|
+
}
|
|
2468
|
+
return {
|
|
2469
|
+
passed: true,
|
|
2470
|
+
details: "Server accepted initialize. Full server\u2192client elicitation/create flow not exercised."
|
|
2471
|
+
};
|
|
2472
|
+
}
|
|
2473
|
+
);
|
|
2375
2474
|
await test(
|
|
2376
2475
|
"lifecycle-meta-tolerance",
|
|
2377
2476
|
"Tolerates _meta field on requests",
|
|
@@ -4273,6 +4372,7 @@ async function runComplianceSuite(target, options = {}) {
|
|
|
4273
4372
|
const truncated = warnings.length - MAX_WARNINGS;
|
|
4274
4373
|
warnings.splice(MAX_WARNINGS, truncated, `... and ${truncated} more warning(s) suppressed`);
|
|
4275
4374
|
}
|
|
4375
|
+
if (inFlight.size > 0) await drainPool();
|
|
4276
4376
|
const { score, grade, overall, summary, categories } = computeScore(tests);
|
|
4277
4377
|
const badge = generateBadge(displayUrl);
|
|
4278
4378
|
return {
|
|
@@ -5203,7 +5303,11 @@ program.command("test").description("Run the full compliance test suite against
|
|
|
5203
5303
|
"--timeout <ms>",
|
|
5204
5304
|
"Request timeout in milliseconds (bump to 30000+ for stdio servers with slow startup)",
|
|
5205
5305
|
"15000"
|
|
5206
|
-
).option("--no-color", "Disable colored output (also honors NO_COLOR env var)").option("--watch", "Re-run tests when files in the cwd change (stdio targets only)").option(
|
|
5306
|
+
).option("--no-color", "Disable colored output (also honors NO_COLOR env var)").option("--watch", "Re-run tests when files in the cwd change (stdio targets only)").option(
|
|
5307
|
+
"--concurrency <n>",
|
|
5308
|
+
"Max parallel-safe tests in flight (default 1; see docs/PERFORMANCE.md before raising)",
|
|
5309
|
+
"1"
|
|
5310
|
+
).option("--preflight-timeout <ms>", "Preflight connectivity check timeout in milliseconds").option("--retries <n>", "Number of retries for failed tests", "0").option(
|
|
5207
5311
|
"--only <items>",
|
|
5208
5312
|
'Only run matching categories or test IDs, comma-separated (e.g., "transport,lifecycle" or "transport-post,lifecycle-init")',
|
|
5209
5313
|
parseList
|
|
@@ -5262,6 +5366,7 @@ Testing ${describeTarget(transportTarget)}...
|
|
|
5262
5366
|
timeout: parsePositiveInt(opts.timeout, "--timeout", 1),
|
|
5263
5367
|
preflightTimeout: opts.preflightTimeout ? parsePositiveInt(opts.preflightTimeout, "--preflight-timeout", 1) : config?.preflightTimeout,
|
|
5264
5368
|
retries: parsePositiveInt(opts.retries, "--retries"),
|
|
5369
|
+
concurrency: parsePositiveInt(opts.concurrency, "--concurrency", 1),
|
|
5265
5370
|
only,
|
|
5266
5371
|
skip,
|
|
5267
5372
|
onProgress: verbose ? (testId, passed, details) => {
|
package/dist/mcp/server.js
CHANGED
package/dist/runner.d.ts
CHANGED
|
@@ -63,6 +63,18 @@ interface TestDefinition {
|
|
|
63
63
|
recommendation: string;
|
|
64
64
|
/** Transports this test applies to. Omit = all transports. */
|
|
65
65
|
transports?: ("http" | "stdio")[];
|
|
66
|
+
/**
|
|
67
|
+
* Declares this test safe to run concurrently with other parallel-safe
|
|
68
|
+
* tests. Default = false (serialized with other tests in the runner
|
|
69
|
+
* loop). Tests are parallel-safe when they:
|
|
70
|
+
* - don't mutate shared closure state (sessionId, cachedToolsList, …)
|
|
71
|
+
* - don't depend on the result of another concurrently-running test
|
|
72
|
+
* - tolerate the server seeing >1 in-flight request at a time
|
|
73
|
+
*
|
|
74
|
+
* Setup tests (init, notifications/initialized) and tests that
|
|
75
|
+
* populate caches (tools/list, resources/list) must stay `false`.
|
|
76
|
+
*/
|
|
77
|
+
parallelSafe?: boolean;
|
|
66
78
|
}
|
|
67
79
|
/** Describes the server under test. URL string = HTTP for backwards compat. */
|
|
68
80
|
type TransportTarget = {
|
|
@@ -98,6 +110,16 @@ declare function computeScore(tests: TestResult[]): {
|
|
|
98
110
|
}>;
|
|
99
111
|
};
|
|
100
112
|
|
|
113
|
+
/**
|
|
114
|
+
* Generate a short, deterministic hash of a URL for badge paths.
|
|
115
|
+
* SHA-256 truncated to 24 hex chars (96 bits of entropy) — matches the
|
|
116
|
+
* server-side hash width used by mcp.hosting for `/compliance/ext/<hash>`.
|
|
117
|
+
*
|
|
118
|
+
* Exported so mcp.hosting (and other consumers) can compute matching
|
|
119
|
+
* hashes when looking up reports/badges by URL. The hash is the canonical
|
|
120
|
+
* key for `/compliance/ext/<hash>` and `/api/compliance/ext/<hash>/badge`.
|
|
121
|
+
*/
|
|
122
|
+
declare function urlHash(url: string): string;
|
|
101
123
|
/**
|
|
102
124
|
* Generate badge URLs and markdown for a compliance report.
|
|
103
125
|
* Badge images are served by mcp.hosting.
|
|
@@ -158,6 +180,14 @@ interface RunOptions {
|
|
|
158
180
|
skip?: string[];
|
|
159
181
|
/** Preflight connectivity check timeout in milliseconds (default: min(timeout, 10000)) */
|
|
160
182
|
preflightTimeout?: number;
|
|
183
|
+
/**
|
|
184
|
+
* Maximum number of parallel-safe tests in flight at once. Default 1
|
|
185
|
+
* (strictly sequential — matches pre-0.12 behavior). Tests are only
|
|
186
|
+
* eligible for parallel execution when their `TestDefinition.parallelSafe`
|
|
187
|
+
* is true; everything else stays sequential regardless. See
|
|
188
|
+
* docs/PERFORMANCE.md for the design.
|
|
189
|
+
*/
|
|
190
|
+
concurrency?: number;
|
|
161
191
|
}
|
|
162
192
|
/**
|
|
163
193
|
* Run the full MCP compliance test suite. Accepts either a URL string
|
|
@@ -165,4 +195,4 @@ interface RunOptions {
|
|
|
165
195
|
*/
|
|
166
196
|
declare function runComplianceSuite(target: string | TransportTarget, options?: RunOptions): Promise<ComplianceReport>;
|
|
167
197
|
|
|
168
|
-
export { type ComplianceReport, type PreviewOptions, type RunOptions, SPEC_BASE, SPEC_VERSION, TEST_DEFINITIONS, type TestResult, computeGrade, computeScore, generateBadge, parseSSEResponse, previewTests, runComplianceSuite };
|
|
198
|
+
export { type ComplianceReport, type PreviewOptions, type RunOptions, SPEC_BASE, SPEC_VERSION, TEST_DEFINITIONS, type TestResult, computeGrade, computeScore, generateBadge, parseSSEResponse, previewTests, runComplianceSuite, urlHash };
|
package/dist/runner.js
CHANGED
|
@@ -7,8 +7,9 @@ import {
|
|
|
7
7
|
generateBadge,
|
|
8
8
|
parseSSEResponse,
|
|
9
9
|
previewTests,
|
|
10
|
-
runComplianceSuite
|
|
11
|
-
|
|
10
|
+
runComplianceSuite,
|
|
11
|
+
urlHash
|
|
12
|
+
} from "./chunk-M67VVIRO.js";
|
|
12
13
|
export {
|
|
13
14
|
SPEC_BASE,
|
|
14
15
|
SPEC_VERSION,
|
|
@@ -18,5 +19,6 @@ export {
|
|
|
18
19
|
generateBadge,
|
|
19
20
|
parseSSEResponse,
|
|
20
21
|
previewTests,
|
|
21
|
-
runComplianceSuite
|
|
22
|
+
runComplianceSuite,
|
|
23
|
+
urlHash
|
|
22
24
|
};
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@yawlabs/mcp-compliance",
|
|
3
|
-
"version": "0.
|
|
3
|
+
"version": "0.12.0",
|
|
4
4
|
"description": "CLI tool and MCP server that tests MCP servers for spec compliance",
|
|
5
5
|
"license": "MIT",
|
|
6
6
|
"author": "Yaw Labs <contact@yaw.sh> (https://yaw.sh)",
|
|
@@ -51,6 +51,7 @@
|
|
|
51
51
|
"ajv": "^8.18.0",
|
|
52
52
|
"ajv-formats": "^3.0.1",
|
|
53
53
|
"tsup": "^8.4.0",
|
|
54
|
+
"tsx": "^4.21.0",
|
|
54
55
|
"typescript": "^5.8.3",
|
|
55
56
|
"vitest": "^3.1.1"
|
|
56
57
|
},
|