@yawlabs/mcp-compliance 0.11.0 → 0.12.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -15,7 +15,11 @@ function generateBadge(url) {
15
15
  imageUrl,
16
16
  reportUrl,
17
17
  markdown: `[![MCP Compliant](${imageUrl})](${reportUrl})`,
18
- html: `<a href="${reportUrl}"><img src="${imageUrl}" alt="MCP Compliant"></a>`
18
+ // loading="lazy" so READMEs that embed many badges don't block first
19
+ // paint on this image. Markdown renderers (GitHub, npmjs.com) emit
20
+ // their own <img> from the markdown form so the attribute only
21
+ // matters for the HTML form people paste into custom pages.
22
+ html: `<a href="${reportUrl}"><img src="${imageUrl}" alt="MCP Compliant" loading="lazy"></a>`
19
23
  };
20
24
  }
21
25
 
@@ -600,7 +604,8 @@ var TEST_DEFINITIONS = [
600
604
  required: true,
601
605
  specRef: "basic/utilities#ping",
602
606
  description: "Tests that the server responds to the ping method with an empty result object. This is a required utility method.",
603
- recommendation: 'Implement a "ping" method handler that returns an empty result object {}. This is required by the MCP spec for keepalive and connectivity checking.'
607
+ recommendation: 'Implement a "ping" method handler that returns an empty result object {}. This is required by the MCP spec for keepalive and connectivity checking.',
608
+ parallelSafe: true
604
609
  },
605
610
  {
606
611
  id: "lifecycle-instructions",
@@ -609,7 +614,8 @@ var TEST_DEFINITIONS = [
609
614
  required: false,
610
615
  specRef: "basic/lifecycle#initialization",
611
616
  description: "If the server includes an instructions field in the initialize response, validates it is a string. Instructions provide guidance for how the client should interact with the server.",
612
- recommendation: "If you include an instructions field in the initialize response, ensure it is a string. Remove the field or fix the type if it is not a string."
617
+ recommendation: "If you include an instructions field in the initialize response, ensure it is a string. Remove the field or fix the type if it is not a string.",
618
+ parallelSafe: true
613
619
  },
614
620
  {
615
621
  id: "lifecycle-id-match",
@@ -708,7 +714,8 @@ var TEST_DEFINITIONS = [
708
714
  required: false,
709
715
  specRef: "client/sampling",
710
716
  description: "If the server's initialize response or serverInfo implies it uses client-side sampling (sampling/createMessage), verify the capability declaration shape. Currently this is an advisory shape check \u2014 actually exercising the server\u2192client flow requires a client-side sampling handler and is out of scope.",
711
- recommendation: "Sampling is a client capability (the client provides LLM access to the server). Servers don't declare sampling in their own capabilities; they just call sampling/createMessage against clients that advertise it. No server-side action required."
717
+ recommendation: "Sampling is a client capability (the client provides LLM access to the server). Servers don't declare sampling in their own capabilities; they just call sampling/createMessage against clients that advertise it. No server-side action required.",
718
+ parallelSafe: true
712
719
  },
713
720
  {
714
721
  id: "lifecycle-roots-capability",
@@ -717,7 +724,8 @@ var TEST_DEFINITIONS = [
717
724
  required: false,
718
725
  specRef: "client/roots",
719
726
  description: "Roots (filesystem root paths) is a client capability. This test verifies that if a server sends roots/list requests, it handles gracefully when the client doesn't declare the roots capability (i.e., doesn't crash).",
720
- recommendation: "Before calling roots/list, check if the initialized client capabilities include 'roots'. If not, skip the call \u2014 the client can't respond. Never assume roots is available; it's opt-in on the client side."
727
+ recommendation: "Before calling roots/list, check if the initialized client capabilities include 'roots'. If not, skip the call \u2014 the client can't respond. Never assume roots is available; it's opt-in on the client side.",
728
+ parallelSafe: true
721
729
  },
722
730
  {
723
731
  id: "lifecycle-elicitation-capability",
@@ -726,7 +734,8 @@ var TEST_DEFINITIONS = [
726
734
  required: false,
727
735
  specRef: "client/elicitation",
728
736
  description: "Elicitation (asking the user for structured input mid-operation) is a client capability added in 2025-11-25. This test verifies servers that use elicitation/create handle the case where clients don't support it.",
729
- recommendation: "Before calling elicitation/create, check the initialized client capabilities. If elicitation is absent, fall back to a safer default (ask once up-front via tool parameters, or fail cleanly with a clear error)."
737
+ recommendation: "Before calling elicitation/create, check the initialized client capabilities. If elicitation is absent, fall back to a safer default (ask once up-front via tool parameters, or fail cleanly with a clear error).",
738
+ parallelSafe: true
730
739
  },
731
740
  {
732
741
  id: "lifecycle-meta-tolerance",
@@ -735,7 +744,8 @@ var TEST_DEFINITIONS = [
735
744
  required: false,
736
745
  specRef: "basic/utilities#_meta",
737
746
  description: "Sends a ping with params._meta = { extra: 'value' } and verifies the server doesn't error. The 2025-11-25 spec allows arbitrary _meta on any request; servers should ignore unknown _meta fields gracefully.",
738
- recommendation: "Treat the _meta field as opaque \u2014 pass it through your request validator, but do not reject requests for unknown _meta keys. The MCP spec reserves _meta for protocol/transport metadata and forward-compat extensibility."
747
+ recommendation: "Treat the _meta field as opaque \u2014 pass it through your request validator, but do not reject requests for unknown _meta keys. The MCP spec reserves _meta for protocol/transport metadata and forward-compat extensibility.",
748
+ parallelSafe: true
739
749
  },
740
750
  // ── Tools (4 tests) ──────────────────────────────────────────────
741
751
  {
@@ -1434,8 +1444,14 @@ async function runComplianceSuite(target, options = {}) {
1434
1444
  let resourceNames = [];
1435
1445
  let promptCount = 0;
1436
1446
  let promptNames = [];
1437
- async function test(id, name, category, required, specRef, fn) {
1438
- if (!shouldRun2(id, category)) return;
1447
+ const concurrency = Math.max(1, options.concurrency ?? 1);
1448
+ const inFlight = /* @__PURE__ */ new Set();
1449
+ async function drainPool() {
1450
+ while (inFlight.size > 0) {
1451
+ await Promise.race(inFlight);
1452
+ }
1453
+ }
1454
+ async function runTestFn(id, name, category, required, specRef, fn) {
1439
1455
  const start = Date.now();
1440
1456
  let lastResult = { passed: false, details: "" };
1441
1457
  for (let attempt = 0; attempt <= retries; attempt++) {
@@ -1463,6 +1479,26 @@ async function runComplianceSuite(target, options = {}) {
1463
1479
  options.onProgress?.(id, lastResult.passed, lastResult.details);
1464
1480
  options.onTestComplete?.(result);
1465
1481
  }
1482
+ async function test(id, name, category, required, specRef, fn) {
1483
+ if (options.signal?.aborted) {
1484
+ if (inFlight.size > 0) await drainPool().catch(() => {
1485
+ });
1486
+ throw options.signal.reason ?? new Error("Aborted");
1487
+ }
1488
+ if (!shouldRun2(id, category)) return;
1489
+ const def = TEST_DEFINITIONS_MAP.get(id);
1490
+ const eligible = concurrency > 1 && def?.parallelSafe === true;
1491
+ if (!eligible) {
1492
+ if (inFlight.size > 0) await drainPool();
1493
+ await runTestFn(id, name, category, required, specRef, fn);
1494
+ return;
1495
+ }
1496
+ while (inFlight.size >= concurrency) await Promise.race(inFlight);
1497
+ const p = runTestFn(id, name, category, required, specRef, fn).finally(() => {
1498
+ inFlight.delete(p);
1499
+ });
1500
+ inFlight.add(p);
1501
+ }
1466
1502
  await test(
1467
1503
  "transport-post",
1468
1504
  "HTTP POST accepted",
@@ -3999,6 +4035,7 @@ async function runComplianceSuite(target, options = {}) {
3999
4035
  const truncated = warnings.length - MAX_WARNINGS;
4000
4036
  warnings.splice(MAX_WARNINGS, truncated, `... and ${truncated} more warning(s) suppressed`);
4001
4037
  }
4038
+ if (inFlight.size > 0) await drainPool();
4002
4039
  const { score, grade, overall, summary, categories } = computeScore(tests);
4003
4040
  const badge = generateBadge(displayUrl);
4004
4041
  return {
package/dist/index.js CHANGED
@@ -702,7 +702,11 @@ function generateBadge(url) {
702
702
  imageUrl,
703
703
  reportUrl,
704
704
  markdown: `[![MCP Compliant](${imageUrl})](${reportUrl})`,
705
- html: `<a href="${reportUrl}"><img src="${imageUrl}" alt="MCP Compliant"></a>`
705
+ // loading="lazy" so READMEs that embed many badges don't block first
706
+ // paint on this image. Markdown renderers (GitHub, npmjs.com) emit
707
+ // their own <img> from the markdown form so the attribute only
708
+ // matters for the HTML form people paste into custom pages.
709
+ html: `<a href="${reportUrl}"><img src="${imageUrl}" alt="MCP Compliant" loading="lazy"></a>`
706
710
  };
707
711
  }
708
712
 
@@ -946,7 +950,8 @@ var TEST_DEFINITIONS = [
946
950
  required: true,
947
951
  specRef: "basic/utilities#ping",
948
952
  description: "Tests that the server responds to the ping method with an empty result object. This is a required utility method.",
949
- recommendation: 'Implement a "ping" method handler that returns an empty result object {}. This is required by the MCP spec for keepalive and connectivity checking.'
953
+ recommendation: 'Implement a "ping" method handler that returns an empty result object {}. This is required by the MCP spec for keepalive and connectivity checking.',
954
+ parallelSafe: true
950
955
  },
951
956
  {
952
957
  id: "lifecycle-instructions",
@@ -955,7 +960,8 @@ var TEST_DEFINITIONS = [
955
960
  required: false,
956
961
  specRef: "basic/lifecycle#initialization",
957
962
  description: "If the server includes an instructions field in the initialize response, validates it is a string. Instructions provide guidance for how the client should interact with the server.",
958
- recommendation: "If you include an instructions field in the initialize response, ensure it is a string. Remove the field or fix the type if it is not a string."
963
+ recommendation: "If you include an instructions field in the initialize response, ensure it is a string. Remove the field or fix the type if it is not a string.",
964
+ parallelSafe: true
959
965
  },
960
966
  {
961
967
  id: "lifecycle-id-match",
@@ -1054,7 +1060,8 @@ var TEST_DEFINITIONS = [
1054
1060
  required: false,
1055
1061
  specRef: "client/sampling",
1056
1062
  description: "If the server's initialize response or serverInfo implies it uses client-side sampling (sampling/createMessage), verify the capability declaration shape. Currently this is an advisory shape check \u2014 actually exercising the server\u2192client flow requires a client-side sampling handler and is out of scope.",
1057
- recommendation: "Sampling is a client capability (the client provides LLM access to the server). Servers don't declare sampling in their own capabilities; they just call sampling/createMessage against clients that advertise it. No server-side action required."
1063
+ recommendation: "Sampling is a client capability (the client provides LLM access to the server). Servers don't declare sampling in their own capabilities; they just call sampling/createMessage against clients that advertise it. No server-side action required.",
1064
+ parallelSafe: true
1058
1065
  },
1059
1066
  {
1060
1067
  id: "lifecycle-roots-capability",
@@ -1063,7 +1070,8 @@ var TEST_DEFINITIONS = [
1063
1070
  required: false,
1064
1071
  specRef: "client/roots",
1065
1072
  description: "Roots (filesystem root paths) is a client capability. This test verifies that if a server sends roots/list requests, it handles gracefully when the client doesn't declare the roots capability (i.e., doesn't crash).",
1066
- recommendation: "Before calling roots/list, check if the initialized client capabilities include 'roots'. If not, skip the call \u2014 the client can't respond. Never assume roots is available; it's opt-in on the client side."
1073
+ recommendation: "Before calling roots/list, check if the initialized client capabilities include 'roots'. If not, skip the call \u2014 the client can't respond. Never assume roots is available; it's opt-in on the client side.",
1074
+ parallelSafe: true
1067
1075
  },
1068
1076
  {
1069
1077
  id: "lifecycle-elicitation-capability",
@@ -1072,7 +1080,8 @@ var TEST_DEFINITIONS = [
1072
1080
  required: false,
1073
1081
  specRef: "client/elicitation",
1074
1082
  description: "Elicitation (asking the user for structured input mid-operation) is a client capability added in 2025-11-25. This test verifies servers that use elicitation/create handle the case where clients don't support it.",
1075
- recommendation: "Before calling elicitation/create, check the initialized client capabilities. If elicitation is absent, fall back to a safer default (ask once up-front via tool parameters, or fail cleanly with a clear error)."
1083
+ recommendation: "Before calling elicitation/create, check the initialized client capabilities. If elicitation is absent, fall back to a safer default (ask once up-front via tool parameters, or fail cleanly with a clear error).",
1084
+ parallelSafe: true
1076
1085
  },
1077
1086
  {
1078
1087
  id: "lifecycle-meta-tolerance",
@@ -1081,7 +1090,8 @@ var TEST_DEFINITIONS = [
1081
1090
  required: false,
1082
1091
  specRef: "basic/utilities#_meta",
1083
1092
  description: "Sends a ping with params._meta = { extra: 'value' } and verifies the server doesn't error. The 2025-11-25 spec allows arbitrary _meta on any request; servers should ignore unknown _meta fields gracefully.",
1084
- recommendation: "Treat the _meta field as opaque \u2014 pass it through your request validator, but do not reject requests for unknown _meta keys. The MCP spec reserves _meta for protocol/transport metadata and forward-compat extensibility."
1093
+ recommendation: "Treat the _meta field as opaque \u2014 pass it through your request validator, but do not reject requests for unknown _meta keys. The MCP spec reserves _meta for protocol/transport metadata and forward-compat extensibility.",
1094
+ parallelSafe: true
1085
1095
  },
1086
1096
  // ── Tools (4 tests) ──────────────────────────────────────────────
1087
1097
  {
@@ -1780,8 +1790,14 @@ async function runComplianceSuite(target, options = {}) {
1780
1790
  let resourceNames = [];
1781
1791
  let promptCount = 0;
1782
1792
  let promptNames = [];
1783
- async function test(id, name, category, required, specRef, fn) {
1784
- if (!shouldRun2(id, category)) return;
1793
+ const concurrency = Math.max(1, options.concurrency ?? 1);
1794
+ const inFlight = /* @__PURE__ */ new Set();
1795
+ async function drainPool() {
1796
+ while (inFlight.size > 0) {
1797
+ await Promise.race(inFlight);
1798
+ }
1799
+ }
1800
+ async function runTestFn(id, name, category, required, specRef, fn) {
1785
1801
  const start = Date.now();
1786
1802
  let lastResult = { passed: false, details: "" };
1787
1803
  for (let attempt = 0; attempt <= retries; attempt++) {
@@ -1809,6 +1825,26 @@ async function runComplianceSuite(target, options = {}) {
1809
1825
  options.onProgress?.(id, lastResult.passed, lastResult.details);
1810
1826
  options.onTestComplete?.(result);
1811
1827
  }
1828
+ async function test(id, name, category, required, specRef, fn) {
1829
+ if (options.signal?.aborted) {
1830
+ if (inFlight.size > 0) await drainPool().catch(() => {
1831
+ });
1832
+ throw options.signal.reason ?? new Error("Aborted");
1833
+ }
1834
+ if (!shouldRun2(id, category)) return;
1835
+ const def = TEST_DEFINITIONS_MAP.get(id);
1836
+ const eligible = concurrency > 1 && def?.parallelSafe === true;
1837
+ if (!eligible) {
1838
+ if (inFlight.size > 0) await drainPool();
1839
+ await runTestFn(id, name, category, required, specRef, fn);
1840
+ return;
1841
+ }
1842
+ while (inFlight.size >= concurrency) await Promise.race(inFlight);
1843
+ const p = runTestFn(id, name, category, required, specRef, fn).finally(() => {
1844
+ inFlight.delete(p);
1845
+ });
1846
+ inFlight.add(p);
1847
+ }
1812
1848
  await test(
1813
1849
  "transport-post",
1814
1850
  "HTTP POST accepted",
@@ -4345,6 +4381,7 @@ async function runComplianceSuite(target, options = {}) {
4345
4381
  const truncated = warnings.length - MAX_WARNINGS;
4346
4382
  warnings.splice(MAX_WARNINGS, truncated, `... and ${truncated} more warning(s) suppressed`);
4347
4383
  }
4384
+ if (inFlight.size > 0) await drainPool();
4348
4385
  const { score, grade, overall, summary, categories } = computeScore(tests);
4349
4386
  const badge = generateBadge(displayUrl);
4350
4387
  return {
@@ -5275,7 +5312,11 @@ program.command("test").description("Run the full compliance test suite against
5275
5312
  "--timeout <ms>",
5276
5313
  "Request timeout in milliseconds (bump to 30000+ for stdio servers with slow startup)",
5277
5314
  "15000"
5278
- ).option("--no-color", "Disable colored output (also honors NO_COLOR env var)").option("--watch", "Re-run tests when files in the cwd change (stdio targets only)").option("--preflight-timeout <ms>", "Preflight connectivity check timeout in milliseconds").option("--retries <n>", "Number of retries for failed tests", "0").option(
5315
+ ).option("--no-color", "Disable colored output (also honors NO_COLOR env var)").option("--watch", "Re-run tests when files in the cwd change (stdio targets only)").option(
5316
+ "--concurrency <n>",
5317
+ "Max parallel-safe tests in flight (default 1; see docs/PERFORMANCE.md before raising)",
5318
+ "1"
5319
+ ).option("--preflight-timeout <ms>", "Preflight connectivity check timeout in milliseconds").option("--retries <n>", "Number of retries for failed tests", "0").option(
5279
5320
  "--only <items>",
5280
5321
  'Only run matching categories or test IDs, comma-separated (e.g., "transport,lifecycle" or "transport-post,lifecycle-init")',
5281
5322
  parseList
@@ -5334,6 +5375,7 @@ Testing ${describeTarget(transportTarget)}...
5334
5375
  timeout: parsePositiveInt(opts.timeout, "--timeout", 1),
5335
5376
  preflightTimeout: opts.preflightTimeout ? parsePositiveInt(opts.preflightTimeout, "--preflight-timeout", 1) : config?.preflightTimeout,
5336
5377
  retries: parsePositiveInt(opts.retries, "--retries"),
5378
+ concurrency: parsePositiveInt(opts.concurrency, "--concurrency", 1),
5337
5379
  only,
5338
5380
  skip,
5339
5381
  onProgress: verbose ? (testId, passed, details) => {
@@ -2,7 +2,7 @@ import {
2
2
  SPEC_BASE,
3
3
  TEST_DEFINITIONS,
4
4
  runComplianceSuite
5
- } from "../chunk-DGGPE3ZM.js";
5
+ } from "../chunk-G5K7CRWU.js";
6
6
 
7
7
  // src/mcp/server.ts
8
8
  import { existsSync, readFileSync } from "fs";
package/dist/runner.d.ts CHANGED
@@ -63,6 +63,18 @@ interface TestDefinition {
63
63
  recommendation: string;
64
64
  /** Transports this test applies to. Omit = all transports. */
65
65
  transports?: ("http" | "stdio")[];
66
+ /**
67
+ * Declares this test safe to run concurrently with other parallel-safe
68
+ * tests. Default = false (serialized with other tests in the runner
69
+ * loop). Tests are parallel-safe when they:
70
+ * - don't mutate shared closure state (sessionId, cachedToolsList, …)
71
+ * - don't depend on the result of another concurrently-running test
72
+ * - tolerate the server seeing >1 in-flight request at a time
73
+ *
74
+ * Setup tests (init, notifications/initialized) and tests that
75
+ * populate caches (tools/list, resources/list) must stay `false`.
76
+ */
77
+ parallelSafe?: boolean;
66
78
  }
67
79
  /** Describes the server under test. URL string = HTTP for backwards compat. */
68
80
  type TransportTarget = {
@@ -168,6 +180,25 @@ interface RunOptions {
168
180
  skip?: string[];
169
181
  /** Preflight connectivity check timeout in milliseconds (default: min(timeout, 10000)) */
170
182
  preflightTimeout?: number;
183
+ /**
184
+ * Maximum number of parallel-safe tests in flight at once. Default 1
185
+ * (strictly sequential — matches pre-0.12 behavior). Tests are only
186
+ * eligible for parallel execution when their `TestDefinition.parallelSafe`
187
+ * is true; everything else stays sequential regardless. See
188
+ * docs/PERFORMANCE.md for the design.
189
+ */
190
+ concurrency?: number;
191
+ /**
192
+ * AbortSignal that cancels the suite mid-flight. When the signal fires,
193
+ * no further tests start; the in-flight test is cancelled if its
194
+ * underlying request supports the signal. The promise rejects with the
195
+ * signal's reason (an `AbortError` by default).
196
+ *
197
+ * Useful for live UIs (SSE, WebSocket) where the client may disconnect
198
+ * before the suite finishes — wiring the disconnect to abort here
199
+ * stops the server from burning compute on a dropped client.
200
+ */
201
+ signal?: AbortSignal;
171
202
  }
172
203
  /**
173
204
  * Run the full MCP compliance test suite. Accepts either a URL string
package/dist/runner.js CHANGED
@@ -9,7 +9,7 @@ import {
9
9
  previewTests,
10
10
  runComplianceSuite,
11
11
  urlHash
12
- } from "./chunk-DGGPE3ZM.js";
12
+ } from "./chunk-G5K7CRWU.js";
13
13
  export {
14
14
  SPEC_BASE,
15
15
  SPEC_VERSION,
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@yawlabs/mcp-compliance",
3
- "version": "0.11.0",
3
+ "version": "0.12.1",
4
4
  "description": "CLI tool and MCP server that tests MCP servers for spec compliance",
5
5
  "license": "MIT",
6
6
  "author": "Yaw Labs <contact@yaw.sh> (https://yaw.sh)",
@@ -51,6 +51,7 @@
51
51
  "ajv": "^8.18.0",
52
52
  "ajv-formats": "^3.0.1",
53
53
  "tsup": "^8.4.0",
54
+ "tsx": "^4.21.0",
54
55
  "typescript": "^5.8.3",
55
56
  "vitest": "^3.1.1"
56
57
  },