@yawlabs/mcp-compliance 0.10.1 → 0.12.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -5,7 +5,7 @@
5
5
  [![GitHub stars](https://img.shields.io/github/stars/YawLabs/mcp-compliance)](https://github.com/YawLabs/mcp-compliance/stargazers)
6
6
  [![CI](https://github.com/YawLabs/mcp-compliance/actions/workflows/ci.yml/badge.svg)](https://github.com/YawLabs/mcp-compliance/actions/workflows/ci.yml)
7
7
 
8
- **Test any MCP server for spec compliance.** 85-test suite covering transport, lifecycle, tools, resources, prompts, error handling, schema validation, and security against the [MCP specification](https://modelcontextprotocol.io/specification/2025-11-25). Works against **HTTP endpoints** (`https://my-server.com/mcp`) and **stdio servers** (`npx @modelcontextprotocol/server-filesystem /tmp`) alike. CLI, MCP server, and programmatic API.
8
+ **Test any MCP server for spec compliance.** 88-test suite covering transport, lifecycle, tools, resources, prompts, error handling, schema validation, and security against the [MCP specification](https://modelcontextprotocol.io/specification/2025-11-25). Works against **HTTP endpoints** (`https://my-server.com/mcp`) and **stdio servers** (`npx @modelcontextprotocol/server-filesystem /tmp`) alike. CLI, MCP server, and programmatic API.
9
9
 
10
10
  Built and maintained by [Yaw Labs](https://yaw.sh).
11
11
 
@@ -479,7 +479,7 @@ Restart your MCP client and approve the server when prompted.
479
479
 
480
480
  ### Tools
481
481
 
482
- - **mcp_compliance_test** — Run the full 85-test suite against a URL or stdio command. Supports auth, custom headers, env vars, timeout, retries, and category/test filtering. Returns grade, score, and detailed results.
482
+ - **mcp_compliance_test** — Run the full 88-test suite against a URL or stdio command. Supports auth, custom headers, env vars, timeout, retries, and category/test filtering. Returns grade, score, and detailed results.
483
483
  - **mcp_compliance_badge** — Get the badge markdown/HTML for a server. Supports auth and custom headers.
484
484
  - **mcp_compliance_explain** — Explain what a specific test ID checks and why it matters.
485
485
 
@@ -540,8 +540,16 @@ Consumer guidance:
540
540
 
541
541
  The compliance testing methodology is published as an open specification:
542
542
 
543
- - **[MCP Compliance Testing Specification](./MCP_COMPLIANCE_SPEC.md)** — test execution model, scoring algorithm, all 84 test rules with pass/fail criteria (CC BY 4.0)
543
+ - **[MCP Compliance Testing Specification](./MCP_COMPLIANCE_SPEC.md)** — test execution model, scoring algorithm, all 88 test rules with pass/fail criteria (CC BY 4.0)
544
544
  - **[Machine-readable rule catalog](./mcp-compliance-rules.json)** — JSON Schema-compliant catalog for programmatic consumption
545
+ - **[Why `mcp-compliance`](./docs/WHY.md)** — the problem, existing alternatives, what this tool does differently
546
+ - **[Fixing common failures](./docs/FIXES.md)** — recipes for the most frequent test failures with code snippets
547
+ - **[Spec version migration policy](./docs/SPEC_VERSION_MIGRATION.md)** — how this tool evolves with MCP spec releases
548
+ - **[mcp.hosting external API](./docs/EXT_API.md)** — public submit/retrieve/badge/delete endpoints used by `mcp-compliance badge` and any custom integrations
549
+ - **[Enterprise tier (draft)](./docs/ENTERPRISE.md)** — paid tier structure for organizations with scheduled/private/audit-track compliance needs
550
+ - **[Performance deep-dive](./docs/PERFORMANCE.md)** — why the suite is sequential and what parallel execution would cost
551
+ - **[Spec PR drafts](./docs/spec-prs/)** — our proposed MCP spec clarifications for ambiguous cases we've hit
552
+ - **[mcp.hosting integration spec](./docs/mcp-hosting-integration.md)** — the contract between this engine and the mcp.hosting platform: URL surfaces, data flow, storage model, badge API, leaderboard, router integration
545
553
 
546
554
  These are complementary to (not competing with) the [official MCP specification](https://modelcontextprotocol.io/specification/2025-11-25). The MCP spec defines what servers must do; this spec defines how to verify compliance.
547
555
 
@@ -600,7 +600,8 @@ var TEST_DEFINITIONS = [
600
600
  required: true,
601
601
  specRef: "basic/utilities#ping",
602
602
  description: "Tests that the server responds to the ping method with an empty result object. This is a required utility method.",
603
- recommendation: 'Implement a "ping" method handler that returns an empty result object {}. This is required by the MCP spec for keepalive and connectivity checking.'
603
+ recommendation: 'Implement a "ping" method handler that returns an empty result object {}. This is required by the MCP spec for keepalive and connectivity checking.',
604
+ parallelSafe: true
604
605
  },
605
606
  {
606
607
  id: "lifecycle-instructions",
@@ -609,7 +610,8 @@ var TEST_DEFINITIONS = [
609
610
  required: false,
610
611
  specRef: "basic/lifecycle#initialization",
611
612
  description: "If the server includes an instructions field in the initialize response, validates it is a string. Instructions provide guidance for how the client should interact with the server.",
612
- recommendation: "If you include an instructions field in the initialize response, ensure it is a string. Remove the field or fix the type if it is not a string."
613
+ recommendation: "If you include an instructions field in the initialize response, ensure it is a string. Remove the field or fix the type if it is not a string.",
614
+ parallelSafe: true
613
615
  },
614
616
  {
615
617
  id: "lifecycle-id-match",
@@ -701,6 +703,36 @@ var TEST_DEFINITIONS = [
701
703
  description: "Sends a tools/call request with _meta.progressToken and checks if the server sends progress notifications via SSE. Progress support is optional but recommended for long-running operations.",
702
704
  recommendation: "When a request includes _meta.progressToken, send notifications/progress events via SSE to report progress. Include progressToken, progress (current), and optionally total fields."
703
705
  },
706
+ {
707
+ id: "lifecycle-sampling-capability",
708
+ name: "Sampling capability shape",
709
+ category: "lifecycle",
710
+ required: false,
711
+ specRef: "client/sampling",
712
+ description: "If the server's initialize response or serverInfo implies it uses client-side sampling (sampling/createMessage), verify the capability declaration shape. Currently this is an advisory shape check \u2014 actually exercising the server\u2192client flow requires a client-side sampling handler and is out of scope.",
713
+ recommendation: "Sampling is a client capability (the client provides LLM access to the server). Servers don't declare sampling in their own capabilities; they just call sampling/createMessage against clients that advertise it. No server-side action required.",
714
+ parallelSafe: true
715
+ },
716
+ {
717
+ id: "lifecycle-roots-capability",
718
+ name: "Roots capability shape",
719
+ category: "lifecycle",
720
+ required: false,
721
+ specRef: "client/roots",
722
+ description: "Roots (filesystem root paths) is a client capability. This test verifies that if a server sends roots/list requests, it handles gracefully when the client doesn't declare the roots capability (i.e., doesn't crash).",
723
+ recommendation: "Before calling roots/list, check if the initialized client capabilities include 'roots'. If not, skip the call \u2014 the client can't respond. Never assume roots is available; it's opt-in on the client side.",
724
+ parallelSafe: true
725
+ },
726
+ {
727
+ id: "lifecycle-elicitation-capability",
728
+ name: "Elicitation capability shape",
729
+ category: "lifecycle",
730
+ required: false,
731
+ specRef: "client/elicitation",
732
+ description: "Elicitation (asking the user for structured input mid-operation) is a client capability added in 2025-11-25. This test verifies servers that use elicitation/create handle the case where clients don't support it.",
733
+ recommendation: "Before calling elicitation/create, check the initialized client capabilities. If elicitation is absent, fall back to a safer default (ask once up-front via tool parameters, or fail cleanly with a clear error).",
734
+ parallelSafe: true
735
+ },
704
736
  {
705
737
  id: "lifecycle-meta-tolerance",
706
738
  name: "Tolerates _meta field on requests",
@@ -708,7 +740,8 @@ var TEST_DEFINITIONS = [
708
740
  required: false,
709
741
  specRef: "basic/utilities#_meta",
710
742
  description: "Sends a ping with params._meta = { extra: 'value' } and verifies the server doesn't error. The 2025-11-25 spec allows arbitrary _meta on any request; servers should ignore unknown _meta fields gracefully.",
711
- recommendation: "Treat the _meta field as opaque \u2014 pass it through your request validator, but do not reject requests for unknown _meta keys. The MCP spec reserves _meta for protocol/transport metadata and forward-compat extensibility."
743
+ recommendation: "Treat the _meta field as opaque \u2014 pass it through your request validator, but do not reject requests for unknown _meta keys. The MCP spec reserves _meta for protocol/transport metadata and forward-compat extensibility.",
744
+ parallelSafe: true
712
745
  },
713
746
  // ── Tools (4 tests) ──────────────────────────────────────────────
714
747
  {
@@ -1407,8 +1440,14 @@ async function runComplianceSuite(target, options = {}) {
1407
1440
  let resourceNames = [];
1408
1441
  let promptCount = 0;
1409
1442
  let promptNames = [];
1410
- async function test(id, name, category, required, specRef, fn) {
1411
- if (!shouldRun2(id, category)) return;
1443
+ const concurrency = Math.max(1, options.concurrency ?? 1);
1444
+ const inFlight = /* @__PURE__ */ new Set();
1445
+ async function drainPool() {
1446
+ while (inFlight.size > 0) {
1447
+ await Promise.race(inFlight);
1448
+ }
1449
+ }
1450
+ async function runTestFn(id, name, category, required, specRef, fn) {
1412
1451
  const start = Date.now();
1413
1452
  let lastResult = { passed: false, details: "" };
1414
1453
  for (let attempt = 0; attempt <= retries; attempt++) {
@@ -1436,6 +1475,21 @@ async function runComplianceSuite(target, options = {}) {
1436
1475
  options.onProgress?.(id, lastResult.passed, lastResult.details);
1437
1476
  options.onTestComplete?.(result);
1438
1477
  }
1478
+ async function test(id, name, category, required, specRef, fn) {
1479
+ if (!shouldRun2(id, category)) return;
1480
+ const def = TEST_DEFINITIONS_MAP.get(id);
1481
+ const eligible = concurrency > 1 && def?.parallelSafe === true;
1482
+ if (!eligible) {
1483
+ if (inFlight.size > 0) await drainPool();
1484
+ await runTestFn(id, name, category, required, specRef, fn);
1485
+ return;
1486
+ }
1487
+ while (inFlight.size >= concurrency) await Promise.race(inFlight);
1488
+ const p = runTestFn(id, name, category, required, specRef, fn).finally(() => {
1489
+ inFlight.delete(p);
1490
+ });
1491
+ inFlight.add(p);
1492
+ }
1439
1493
  await test(
1440
1494
  "transport-post",
1441
1495
  "HTTP POST accepted",
@@ -1604,7 +1658,11 @@ async function runComplianceSuite(target, options = {}) {
1604
1658
  try {
1605
1659
  initRes = await rpc("initialize", {
1606
1660
  protocolVersion: SPEC_VERSION,
1607
- capabilities: {},
1661
+ capabilities: {
1662
+ sampling: {},
1663
+ roots: { listChanged: true },
1664
+ elicitation: {}
1665
+ },
1608
1666
  clientInfo: { name: "mcp-compliance", version: TOOL_VERSION }
1609
1667
  });
1610
1668
  const result = initRes?.body?.result;
@@ -2026,6 +2084,47 @@ async function runComplianceSuite(target, options = {}) {
2026
2084
  }
2027
2085
  }
2028
2086
  );
2087
+ await test(
2088
+ "lifecycle-sampling-capability",
2089
+ "Sampling capability shape",
2090
+ "lifecycle",
2091
+ false,
2092
+ "client/sampling",
2093
+ async () => {
2094
+ if (!initRes || initRes.body?.error) {
2095
+ return { passed: false, details: "Server rejected initialize" };
2096
+ }
2097
+ return {
2098
+ passed: true,
2099
+ details: "Server accepted initialize with client sampling capability. Full server\u2192client sampling flow not exercised."
2100
+ };
2101
+ }
2102
+ );
2103
+ await test("lifecycle-roots-capability", "Roots capability shape", "lifecycle", false, "client/roots", async () => {
2104
+ if (!initRes || initRes.body?.error) {
2105
+ return { passed: false, details: "Server rejected initialize" };
2106
+ }
2107
+ return {
2108
+ passed: true,
2109
+ details: "Server accepted initialize. Full server\u2192client roots/list flow not exercised (requires a roots-aware client)."
2110
+ };
2111
+ });
2112
+ await test(
2113
+ "lifecycle-elicitation-capability",
2114
+ "Elicitation capability shape",
2115
+ "lifecycle",
2116
+ false,
2117
+ "client/elicitation",
2118
+ async () => {
2119
+ if (!initRes || initRes.body?.error) {
2120
+ return { passed: false, details: "Server rejected initialize" };
2121
+ }
2122
+ return {
2123
+ passed: true,
2124
+ details: "Server accepted initialize. Full server\u2192client elicitation/create flow not exercised."
2125
+ };
2126
+ }
2127
+ );
2029
2128
  await test(
2030
2129
  "lifecycle-meta-tolerance",
2031
2130
  "Tolerates _meta field on requests",
@@ -3927,6 +4026,7 @@ async function runComplianceSuite(target, options = {}) {
3927
4026
  const truncated = warnings.length - MAX_WARNINGS;
3928
4027
  warnings.splice(MAX_WARNINGS, truncated, `... and ${truncated} more warning(s) suppressed`);
3929
4028
  }
4029
+ if (inFlight.size > 0) await drainPool();
3930
4030
  const { score, grade, overall, summary, categories } = computeScore(tests);
3931
4031
  const badge = generateBadge(displayUrl);
3932
4032
  return {
@@ -3958,6 +4058,7 @@ async function runComplianceSuite(target, options = {}) {
3958
4058
  }
3959
4059
 
3960
4060
  export {
4061
+ urlHash,
3961
4062
  generateBadge,
3962
4063
  computeGrade,
3963
4064
  computeScore,
package/dist/index.js CHANGED
@@ -946,7 +946,8 @@ var TEST_DEFINITIONS = [
946
946
  required: true,
947
947
  specRef: "basic/utilities#ping",
948
948
  description: "Tests that the server responds to the ping method with an empty result object. This is a required utility method.",
949
- recommendation: 'Implement a "ping" method handler that returns an empty result object {}. This is required by the MCP spec for keepalive and connectivity checking.'
949
+ recommendation: 'Implement a "ping" method handler that returns an empty result object {}. This is required by the MCP spec for keepalive and connectivity checking.',
950
+ parallelSafe: true
950
951
  },
951
952
  {
952
953
  id: "lifecycle-instructions",
@@ -955,7 +956,8 @@ var TEST_DEFINITIONS = [
955
956
  required: false,
956
957
  specRef: "basic/lifecycle#initialization",
957
958
  description: "If the server includes an instructions field in the initialize response, validates it is a string. Instructions provide guidance for how the client should interact with the server.",
958
- recommendation: "If you include an instructions field in the initialize response, ensure it is a string. Remove the field or fix the type if it is not a string."
959
+ recommendation: "If you include an instructions field in the initialize response, ensure it is a string. Remove the field or fix the type if it is not a string.",
960
+ parallelSafe: true
959
961
  },
960
962
  {
961
963
  id: "lifecycle-id-match",
@@ -1047,6 +1049,36 @@ var TEST_DEFINITIONS = [
1047
1049
  description: "Sends a tools/call request with _meta.progressToken and checks if the server sends progress notifications via SSE. Progress support is optional but recommended for long-running operations.",
1048
1050
  recommendation: "When a request includes _meta.progressToken, send notifications/progress events via SSE to report progress. Include progressToken, progress (current), and optionally total fields."
1049
1051
  },
1052
+ {
1053
+ id: "lifecycle-sampling-capability",
1054
+ name: "Sampling capability shape",
1055
+ category: "lifecycle",
1056
+ required: false,
1057
+ specRef: "client/sampling",
1058
+ description: "If the server's initialize response or serverInfo implies it uses client-side sampling (sampling/createMessage), verify the capability declaration shape. Currently this is an advisory shape check \u2014 actually exercising the server\u2192client flow requires a client-side sampling handler and is out of scope.",
1059
+ recommendation: "Sampling is a client capability (the client provides LLM access to the server). Servers don't declare sampling in their own capabilities; they just call sampling/createMessage against clients that advertise it. No server-side action required.",
1060
+ parallelSafe: true
1061
+ },
1062
+ {
1063
+ id: "lifecycle-roots-capability",
1064
+ name: "Roots capability shape",
1065
+ category: "lifecycle",
1066
+ required: false,
1067
+ specRef: "client/roots",
1068
+ description: "Roots (filesystem root paths) is a client capability. This test verifies that if a server sends roots/list requests, it handles gracefully when the client doesn't declare the roots capability (i.e., doesn't crash).",
1069
+ recommendation: "Before calling roots/list, check if the initialized client capabilities include 'roots'. If not, skip the call \u2014 the client can't respond. Never assume roots is available; it's opt-in on the client side.",
1070
+ parallelSafe: true
1071
+ },
1072
+ {
1073
+ id: "lifecycle-elicitation-capability",
1074
+ name: "Elicitation capability shape",
1075
+ category: "lifecycle",
1076
+ required: false,
1077
+ specRef: "client/elicitation",
1078
+ description: "Elicitation (asking the user for structured input mid-operation) is a client capability added in 2025-11-25. This test verifies servers that use elicitation/create handle the case where clients don't support it.",
1079
+ recommendation: "Before calling elicitation/create, check the initialized client capabilities. If elicitation is absent, fall back to a safer default (ask once up-front via tool parameters, or fail cleanly with a clear error).",
1080
+ parallelSafe: true
1081
+ },
1050
1082
  {
1051
1083
  id: "lifecycle-meta-tolerance",
1052
1084
  name: "Tolerates _meta field on requests",
@@ -1054,7 +1086,8 @@ var TEST_DEFINITIONS = [
1054
1086
  required: false,
1055
1087
  specRef: "basic/utilities#_meta",
1056
1088
  description: "Sends a ping with params._meta = { extra: 'value' } and verifies the server doesn't error. The 2025-11-25 spec allows arbitrary _meta on any request; servers should ignore unknown _meta fields gracefully.",
1057
- recommendation: "Treat the _meta field as opaque \u2014 pass it through your request validator, but do not reject requests for unknown _meta keys. The MCP spec reserves _meta for protocol/transport metadata and forward-compat extensibility."
1089
+ recommendation: "Treat the _meta field as opaque \u2014 pass it through your request validator, but do not reject requests for unknown _meta keys. The MCP spec reserves _meta for protocol/transport metadata and forward-compat extensibility.",
1090
+ parallelSafe: true
1058
1091
  },
1059
1092
  // ── Tools (4 tests) ──────────────────────────────────────────────
1060
1093
  {
@@ -1753,8 +1786,14 @@ async function runComplianceSuite(target, options = {}) {
1753
1786
  let resourceNames = [];
1754
1787
  let promptCount = 0;
1755
1788
  let promptNames = [];
1756
- async function test(id, name, category, required, specRef, fn) {
1757
- if (!shouldRun2(id, category)) return;
1789
+ const concurrency = Math.max(1, options.concurrency ?? 1);
1790
+ const inFlight = /* @__PURE__ */ new Set();
1791
+ async function drainPool() {
1792
+ while (inFlight.size > 0) {
1793
+ await Promise.race(inFlight);
1794
+ }
1795
+ }
1796
+ async function runTestFn(id, name, category, required, specRef, fn) {
1758
1797
  const start = Date.now();
1759
1798
  let lastResult = { passed: false, details: "" };
1760
1799
  for (let attempt = 0; attempt <= retries; attempt++) {
@@ -1782,6 +1821,21 @@ async function runComplianceSuite(target, options = {}) {
1782
1821
  options.onProgress?.(id, lastResult.passed, lastResult.details);
1783
1822
  options.onTestComplete?.(result);
1784
1823
  }
1824
+ async function test(id, name, category, required, specRef, fn) {
1825
+ if (!shouldRun2(id, category)) return;
1826
+ const def = TEST_DEFINITIONS_MAP.get(id);
1827
+ const eligible = concurrency > 1 && def?.parallelSafe === true;
1828
+ if (!eligible) {
1829
+ if (inFlight.size > 0) await drainPool();
1830
+ await runTestFn(id, name, category, required, specRef, fn);
1831
+ return;
1832
+ }
1833
+ while (inFlight.size >= concurrency) await Promise.race(inFlight);
1834
+ const p = runTestFn(id, name, category, required, specRef, fn).finally(() => {
1835
+ inFlight.delete(p);
1836
+ });
1837
+ inFlight.add(p);
1838
+ }
1785
1839
  await test(
1786
1840
  "transport-post",
1787
1841
  "HTTP POST accepted",
@@ -1950,7 +2004,11 @@ async function runComplianceSuite(target, options = {}) {
1950
2004
  try {
1951
2005
  initRes = await rpc("initialize", {
1952
2006
  protocolVersion: SPEC_VERSION,
1953
- capabilities: {},
2007
+ capabilities: {
2008
+ sampling: {},
2009
+ roots: { listChanged: true },
2010
+ elicitation: {}
2011
+ },
1954
2012
  clientInfo: { name: "mcp-compliance", version: TOOL_VERSION }
1955
2013
  });
1956
2014
  const result = initRes?.body?.result;
@@ -2372,6 +2430,47 @@ async function runComplianceSuite(target, options = {}) {
2372
2430
  }
2373
2431
  }
2374
2432
  );
2433
+ await test(
2434
+ "lifecycle-sampling-capability",
2435
+ "Sampling capability shape",
2436
+ "lifecycle",
2437
+ false,
2438
+ "client/sampling",
2439
+ async () => {
2440
+ if (!initRes || initRes.body?.error) {
2441
+ return { passed: false, details: "Server rejected initialize" };
2442
+ }
2443
+ return {
2444
+ passed: true,
2445
+ details: "Server accepted initialize with client sampling capability. Full server\u2192client sampling flow not exercised."
2446
+ };
2447
+ }
2448
+ );
2449
+ await test("lifecycle-roots-capability", "Roots capability shape", "lifecycle", false, "client/roots", async () => {
2450
+ if (!initRes || initRes.body?.error) {
2451
+ return { passed: false, details: "Server rejected initialize" };
2452
+ }
2453
+ return {
2454
+ passed: true,
2455
+ details: "Server accepted initialize. Full server\u2192client roots/list flow not exercised (requires a roots-aware client)."
2456
+ };
2457
+ });
2458
+ await test(
2459
+ "lifecycle-elicitation-capability",
2460
+ "Elicitation capability shape",
2461
+ "lifecycle",
2462
+ false,
2463
+ "client/elicitation",
2464
+ async () => {
2465
+ if (!initRes || initRes.body?.error) {
2466
+ return { passed: false, details: "Server rejected initialize" };
2467
+ }
2468
+ return {
2469
+ passed: true,
2470
+ details: "Server accepted initialize. Full server\u2192client elicitation/create flow not exercised."
2471
+ };
2472
+ }
2473
+ );
2375
2474
  await test(
2376
2475
  "lifecycle-meta-tolerance",
2377
2476
  "Tolerates _meta field on requests",
@@ -4273,6 +4372,7 @@ async function runComplianceSuite(target, options = {}) {
4273
4372
  const truncated = warnings.length - MAX_WARNINGS;
4274
4373
  warnings.splice(MAX_WARNINGS, truncated, `... and ${truncated} more warning(s) suppressed`);
4275
4374
  }
4375
+ if (inFlight.size > 0) await drainPool();
4276
4376
  const { score, grade, overall, summary, categories } = computeScore(tests);
4277
4377
  const badge = generateBadge(displayUrl);
4278
4378
  return {
@@ -5203,7 +5303,11 @@ program.command("test").description("Run the full compliance test suite against
5203
5303
  "--timeout <ms>",
5204
5304
  "Request timeout in milliseconds (bump to 30000+ for stdio servers with slow startup)",
5205
5305
  "15000"
5206
- ).option("--no-color", "Disable colored output (also honors NO_COLOR env var)").option("--watch", "Re-run tests when files in the cwd change (stdio targets only)").option("--preflight-timeout <ms>", "Preflight connectivity check timeout in milliseconds").option("--retries <n>", "Number of retries for failed tests", "0").option(
5306
+ ).option("--no-color", "Disable colored output (also honors NO_COLOR env var)").option("--watch", "Re-run tests when files in the cwd change (stdio targets only)").option(
5307
+ "--concurrency <n>",
5308
+ "Max parallel-safe tests in flight (default 1; see docs/PERFORMANCE.md before raising)",
5309
+ "1"
5310
+ ).option("--preflight-timeout <ms>", "Preflight connectivity check timeout in milliseconds").option("--retries <n>", "Number of retries for failed tests", "0").option(
5207
5311
  "--only <items>",
5208
5312
  'Only run matching categories or test IDs, comma-separated (e.g., "transport,lifecycle" or "transport-post,lifecycle-init")',
5209
5313
  parseList
@@ -5262,6 +5366,7 @@ Testing ${describeTarget(transportTarget)}...
5262
5366
  timeout: parsePositiveInt(opts.timeout, "--timeout", 1),
5263
5367
  preflightTimeout: opts.preflightTimeout ? parsePositiveInt(opts.preflightTimeout, "--preflight-timeout", 1) : config?.preflightTimeout,
5264
5368
  retries: parsePositiveInt(opts.retries, "--retries"),
5369
+ concurrency: parsePositiveInt(opts.concurrency, "--concurrency", 1),
5265
5370
  only,
5266
5371
  skip,
5267
5372
  onProgress: verbose ? (testId, passed, details) => {
@@ -2,7 +2,7 @@ import {
2
2
  SPEC_BASE,
3
3
  TEST_DEFINITIONS,
4
4
  runComplianceSuite
5
- } from "../chunk-7KISK3FS.js";
5
+ } from "../chunk-M67VVIRO.js";
6
6
 
7
7
  // src/mcp/server.ts
8
8
  import { existsSync, readFileSync } from "fs";
package/dist/runner.d.ts CHANGED
@@ -63,6 +63,18 @@ interface TestDefinition {
63
63
  recommendation: string;
64
64
  /** Transports this test applies to. Omit = all transports. */
65
65
  transports?: ("http" | "stdio")[];
66
+ /**
67
+ * Declares this test safe to run concurrently with other parallel-safe
68
+ * tests. Default = false (serialized with other tests in the runner
69
+ * loop). Tests are parallel-safe when they:
70
+ * - don't mutate shared closure state (sessionId, cachedToolsList, …)
71
+ * - don't depend on the result of another concurrently-running test
72
+ * - tolerate the server seeing >1 in-flight request at a time
73
+ *
74
+ * Setup tests (init, notifications/initialized) and tests that
75
+ * populate caches (tools/list, resources/list) must stay `false`.
76
+ */
77
+ parallelSafe?: boolean;
66
78
  }
67
79
  /** Describes the server under test. URL string = HTTP for backwards compat. */
68
80
  type TransportTarget = {
@@ -98,6 +110,16 @@ declare function computeScore(tests: TestResult[]): {
98
110
  }>;
99
111
  };
100
112
 
113
+ /**
114
+ * Generate a short, deterministic hash of a URL for badge paths.
115
+ * SHA-256 truncated to 24 hex chars (96 bits of entropy) — matches the
116
+ * server-side hash width used by mcp.hosting for `/compliance/ext/<hash>`.
117
+ *
118
+ * Exported so mcp.hosting (and other consumers) can compute matching
119
+ * hashes when looking up reports/badges by URL. The hash is the canonical
120
+ * key for `/compliance/ext/<hash>` and `/api/compliance/ext/<hash>/badge`.
121
+ */
122
+ declare function urlHash(url: string): string;
101
123
  /**
102
124
  * Generate badge URLs and markdown for a compliance report.
103
125
  * Badge images are served by mcp.hosting.
@@ -158,6 +180,14 @@ interface RunOptions {
158
180
  skip?: string[];
159
181
  /** Preflight connectivity check timeout in milliseconds (default: min(timeout, 10000)) */
160
182
  preflightTimeout?: number;
183
+ /**
184
+ * Maximum number of parallel-safe tests in flight at once. Default 1
185
+ * (strictly sequential — matches pre-0.12 behavior). Tests are only
186
+ * eligible for parallel execution when their `TestDefinition.parallelSafe`
187
+ * is true; everything else stays sequential regardless. See
188
+ * docs/PERFORMANCE.md for the design.
189
+ */
190
+ concurrency?: number;
161
191
  }
162
192
  /**
163
193
  * Run the full MCP compliance test suite. Accepts either a URL string
@@ -165,4 +195,4 @@ interface RunOptions {
165
195
  */
166
196
  declare function runComplianceSuite(target: string | TransportTarget, options?: RunOptions): Promise<ComplianceReport>;
167
197
 
168
- export { type ComplianceReport, type PreviewOptions, type RunOptions, SPEC_BASE, SPEC_VERSION, TEST_DEFINITIONS, type TestResult, computeGrade, computeScore, generateBadge, parseSSEResponse, previewTests, runComplianceSuite };
198
+ export { type ComplianceReport, type PreviewOptions, type RunOptions, SPEC_BASE, SPEC_VERSION, TEST_DEFINITIONS, type TestResult, computeGrade, computeScore, generateBadge, parseSSEResponse, previewTests, runComplianceSuite, urlHash };
package/dist/runner.js CHANGED
@@ -7,8 +7,9 @@ import {
7
7
  generateBadge,
8
8
  parseSSEResponse,
9
9
  previewTests,
10
- runComplianceSuite
11
- } from "./chunk-7KISK3FS.js";
10
+ runComplianceSuite,
11
+ urlHash
12
+ } from "./chunk-M67VVIRO.js";
12
13
  export {
13
14
  SPEC_BASE,
14
15
  SPEC_VERSION,
@@ -18,5 +19,6 @@ export {
18
19
  generateBadge,
19
20
  parseSSEResponse,
20
21
  previewTests,
21
- runComplianceSuite
22
+ runComplianceSuite,
23
+ urlHash
22
24
  };
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@yawlabs/mcp-compliance",
3
- "version": "0.10.1",
3
+ "version": "0.12.0",
4
4
  "description": "CLI tool and MCP server that tests MCP servers for spec compliance",
5
5
  "license": "MIT",
6
6
  "author": "Yaw Labs <contact@yaw.sh> (https://yaw.sh)",
@@ -51,6 +51,7 @@
51
51
  "ajv": "^8.18.0",
52
52
  "ajv-formats": "^3.0.1",
53
53
  "tsup": "^8.4.0",
54
+ "tsx": "^4.21.0",
54
55
  "typescript": "^5.8.3",
55
56
  "vitest": "^3.1.1"
56
57
  },