@yawlabs/mcp-compliance 0.11.0 → 0.12.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -600,7 +600,8 @@ var TEST_DEFINITIONS = [
600
600
  required: true,
601
601
  specRef: "basic/utilities#ping",
602
602
  description: "Tests that the server responds to the ping method with an empty result object. This is a required utility method.",
603
- recommendation: 'Implement a "ping" method handler that returns an empty result object {}. This is required by the MCP spec for keepalive and connectivity checking.'
603
+ recommendation: 'Implement a "ping" method handler that returns an empty result object {}. This is required by the MCP spec for keepalive and connectivity checking.',
604
+ parallelSafe: true
604
605
  },
605
606
  {
606
607
  id: "lifecycle-instructions",
@@ -609,7 +610,8 @@ var TEST_DEFINITIONS = [
609
610
  required: false,
610
611
  specRef: "basic/lifecycle#initialization",
611
612
  description: "If the server includes an instructions field in the initialize response, validates it is a string. Instructions provide guidance for how the client should interact with the server.",
612
- recommendation: "If you include an instructions field in the initialize response, ensure it is a string. Remove the field or fix the type if it is not a string."
613
+ recommendation: "If you include an instructions field in the initialize response, ensure it is a string. Remove the field or fix the type if it is not a string.",
614
+ parallelSafe: true
613
615
  },
614
616
  {
615
617
  id: "lifecycle-id-match",
@@ -708,7 +710,8 @@ var TEST_DEFINITIONS = [
708
710
  required: false,
709
711
  specRef: "client/sampling",
710
712
  description: "If the server's initialize response or serverInfo implies it uses client-side sampling (sampling/createMessage), verify the capability declaration shape. Currently this is an advisory shape check \u2014 actually exercising the server\u2192client flow requires a client-side sampling handler and is out of scope.",
711
- recommendation: "Sampling is a client capability (the client provides LLM access to the server). Servers don't declare sampling in their own capabilities; they just call sampling/createMessage against clients that advertise it. No server-side action required."
713
+ recommendation: "Sampling is a client capability (the client provides LLM access to the server). Servers don't declare sampling in their own capabilities; they just call sampling/createMessage against clients that advertise it. No server-side action required.",
714
+ parallelSafe: true
712
715
  },
713
716
  {
714
717
  id: "lifecycle-roots-capability",
@@ -717,7 +720,8 @@ var TEST_DEFINITIONS = [
717
720
  required: false,
718
721
  specRef: "client/roots",
719
722
  description: "Roots (filesystem root paths) is a client capability. This test verifies that if a server sends roots/list requests, it handles gracefully when the client doesn't declare the roots capability (i.e., doesn't crash).",
720
- recommendation: "Before calling roots/list, check if the initialized client capabilities include 'roots'. If not, skip the call \u2014 the client can't respond. Never assume roots is available; it's opt-in on the client side."
723
+ recommendation: "Before calling roots/list, check if the initialized client capabilities include 'roots'. If not, skip the call \u2014 the client can't respond. Never assume roots is available; it's opt-in on the client side.",
724
+ parallelSafe: true
721
725
  },
722
726
  {
723
727
  id: "lifecycle-elicitation-capability",
@@ -726,7 +730,8 @@ var TEST_DEFINITIONS = [
726
730
  required: false,
727
731
  specRef: "client/elicitation",
728
732
  description: "Elicitation (asking the user for structured input mid-operation) is a client capability added in 2025-11-25. This test verifies servers that use elicitation/create handle the case where clients don't support it.",
729
- recommendation: "Before calling elicitation/create, check the initialized client capabilities. If elicitation is absent, fall back to a safer default (ask once up-front via tool parameters, or fail cleanly with a clear error)."
733
+ recommendation: "Before calling elicitation/create, check the initialized client capabilities. If elicitation is absent, fall back to a safer default (ask once up-front via tool parameters, or fail cleanly with a clear error).",
734
+ parallelSafe: true
730
735
  },
731
736
  {
732
737
  id: "lifecycle-meta-tolerance",
@@ -735,7 +740,8 @@ var TEST_DEFINITIONS = [
735
740
  required: false,
736
741
  specRef: "basic/utilities#_meta",
737
742
  description: "Sends a ping with params._meta = { extra: 'value' } and verifies the server doesn't error. The 2025-11-25 spec allows arbitrary _meta on any request; servers should ignore unknown _meta fields gracefully.",
738
- recommendation: "Treat the _meta field as opaque \u2014 pass it through your request validator, but do not reject requests for unknown _meta keys. The MCP spec reserves _meta for protocol/transport metadata and forward-compat extensibility."
743
+ recommendation: "Treat the _meta field as opaque \u2014 pass it through your request validator, but do not reject requests for unknown _meta keys. The MCP spec reserves _meta for protocol/transport metadata and forward-compat extensibility.",
744
+ parallelSafe: true
739
745
  },
740
746
  // ── Tools (4 tests) ──────────────────────────────────────────────
741
747
  {
@@ -1434,8 +1440,14 @@ async function runComplianceSuite(target, options = {}) {
1434
1440
  let resourceNames = [];
1435
1441
  let promptCount = 0;
1436
1442
  let promptNames = [];
1437
- async function test(id, name, category, required, specRef, fn) {
1438
- if (!shouldRun2(id, category)) return;
1443
+ const concurrency = Math.max(1, options.concurrency ?? 1);
1444
+ const inFlight = /* @__PURE__ */ new Set();
1445
+ async function drainPool() {
1446
+ while (inFlight.size > 0) {
1447
+ await Promise.race(inFlight);
1448
+ }
1449
+ }
1450
+ async function runTestFn(id, name, category, required, specRef, fn) {
1439
1451
  const start = Date.now();
1440
1452
  let lastResult = { passed: false, details: "" };
1441
1453
  for (let attempt = 0; attempt <= retries; attempt++) {
@@ -1463,6 +1475,21 @@ async function runComplianceSuite(target, options = {}) {
1463
1475
  options.onProgress?.(id, lastResult.passed, lastResult.details);
1464
1476
  options.onTestComplete?.(result);
1465
1477
  }
1478
+ async function test(id, name, category, required, specRef, fn) {
1479
+ if (!shouldRun2(id, category)) return;
1480
+ const def = TEST_DEFINITIONS_MAP.get(id);
1481
+ const eligible = concurrency > 1 && def?.parallelSafe === true;
1482
+ if (!eligible) {
1483
+ if (inFlight.size > 0) await drainPool();
1484
+ await runTestFn(id, name, category, required, specRef, fn);
1485
+ return;
1486
+ }
1487
+ while (inFlight.size >= concurrency) await Promise.race(inFlight);
1488
+ const p = runTestFn(id, name, category, required, specRef, fn).finally(() => {
1489
+ inFlight.delete(p);
1490
+ });
1491
+ inFlight.add(p);
1492
+ }
1466
1493
  await test(
1467
1494
  "transport-post",
1468
1495
  "HTTP POST accepted",
@@ -3999,6 +4026,7 @@ async function runComplianceSuite(target, options = {}) {
3999
4026
  const truncated = warnings.length - MAX_WARNINGS;
4000
4027
  warnings.splice(MAX_WARNINGS, truncated, `... and ${truncated} more warning(s) suppressed`);
4001
4028
  }
4029
+ if (inFlight.size > 0) await drainPool();
4002
4030
  const { score, grade, overall, summary, categories } = computeScore(tests);
4003
4031
  const badge = generateBadge(displayUrl);
4004
4032
  return {
package/dist/index.js CHANGED
@@ -946,7 +946,8 @@ var TEST_DEFINITIONS = [
946
946
  required: true,
947
947
  specRef: "basic/utilities#ping",
948
948
  description: "Tests that the server responds to the ping method with an empty result object. This is a required utility method.",
949
- recommendation: 'Implement a "ping" method handler that returns an empty result object {}. This is required by the MCP spec for keepalive and connectivity checking.'
949
+ recommendation: 'Implement a "ping" method handler that returns an empty result object {}. This is required by the MCP spec for keepalive and connectivity checking.',
950
+ parallelSafe: true
950
951
  },
951
952
  {
952
953
  id: "lifecycle-instructions",
@@ -955,7 +956,8 @@ var TEST_DEFINITIONS = [
955
956
  required: false,
956
957
  specRef: "basic/lifecycle#initialization",
957
958
  description: "If the server includes an instructions field in the initialize response, validates it is a string. Instructions provide guidance for how the client should interact with the server.",
958
- recommendation: "If you include an instructions field in the initialize response, ensure it is a string. Remove the field or fix the type if it is not a string."
959
+ recommendation: "If you include an instructions field in the initialize response, ensure it is a string. Remove the field or fix the type if it is not a string.",
960
+ parallelSafe: true
959
961
  },
960
962
  {
961
963
  id: "lifecycle-id-match",
@@ -1054,7 +1056,8 @@ var TEST_DEFINITIONS = [
1054
1056
  required: false,
1055
1057
  specRef: "client/sampling",
1056
1058
  description: "If the server's initialize response or serverInfo implies it uses client-side sampling (sampling/createMessage), verify the capability declaration shape. Currently this is an advisory shape check \u2014 actually exercising the server\u2192client flow requires a client-side sampling handler and is out of scope.",
1057
- recommendation: "Sampling is a client capability (the client provides LLM access to the server). Servers don't declare sampling in their own capabilities; they just call sampling/createMessage against clients that advertise it. No server-side action required."
1059
+ recommendation: "Sampling is a client capability (the client provides LLM access to the server). Servers don't declare sampling in their own capabilities; they just call sampling/createMessage against clients that advertise it. No server-side action required.",
1060
+ parallelSafe: true
1058
1061
  },
1059
1062
  {
1060
1063
  id: "lifecycle-roots-capability",
@@ -1063,7 +1066,8 @@ var TEST_DEFINITIONS = [
1063
1066
  required: false,
1064
1067
  specRef: "client/roots",
1065
1068
  description: "Roots (filesystem root paths) is a client capability. This test verifies that if a server sends roots/list requests, it handles gracefully when the client doesn't declare the roots capability (i.e., doesn't crash).",
1066
- recommendation: "Before calling roots/list, check if the initialized client capabilities include 'roots'. If not, skip the call \u2014 the client can't respond. Never assume roots is available; it's opt-in on the client side."
1069
+ recommendation: "Before calling roots/list, check if the initialized client capabilities include 'roots'. If not, skip the call \u2014 the client can't respond. Never assume roots is available; it's opt-in on the client side.",
1070
+ parallelSafe: true
1067
1071
  },
1068
1072
  {
1069
1073
  id: "lifecycle-elicitation-capability",
@@ -1072,7 +1076,8 @@ var TEST_DEFINITIONS = [
1072
1076
  required: false,
1073
1077
  specRef: "client/elicitation",
1074
1078
  description: "Elicitation (asking the user for structured input mid-operation) is a client capability added in 2025-11-25. This test verifies servers that use elicitation/create handle the case where clients don't support it.",
1075
- recommendation: "Before calling elicitation/create, check the initialized client capabilities. If elicitation is absent, fall back to a safer default (ask once up-front via tool parameters, or fail cleanly with a clear error)."
1079
+ recommendation: "Before calling elicitation/create, check the initialized client capabilities. If elicitation is absent, fall back to a safer default (ask once up-front via tool parameters, or fail cleanly with a clear error).",
1080
+ parallelSafe: true
1076
1081
  },
1077
1082
  {
1078
1083
  id: "lifecycle-meta-tolerance",
@@ -1081,7 +1086,8 @@ var TEST_DEFINITIONS = [
1081
1086
  required: false,
1082
1087
  specRef: "basic/utilities#_meta",
1083
1088
  description: "Sends a ping with params._meta = { extra: 'value' } and verifies the server doesn't error. The 2025-11-25 spec allows arbitrary _meta on any request; servers should ignore unknown _meta fields gracefully.",
1084
- recommendation: "Treat the _meta field as opaque \u2014 pass it through your request validator, but do not reject requests for unknown _meta keys. The MCP spec reserves _meta for protocol/transport metadata and forward-compat extensibility."
1089
+ recommendation: "Treat the _meta field as opaque \u2014 pass it through your request validator, but do not reject requests for unknown _meta keys. The MCP spec reserves _meta for protocol/transport metadata and forward-compat extensibility.",
1090
+ parallelSafe: true
1085
1091
  },
1086
1092
  // ── Tools (4 tests) ──────────────────────────────────────────────
1087
1093
  {
@@ -1780,8 +1786,14 @@ async function runComplianceSuite(target, options = {}) {
1780
1786
  let resourceNames = [];
1781
1787
  let promptCount = 0;
1782
1788
  let promptNames = [];
1783
- async function test(id, name, category, required, specRef, fn) {
1784
- if (!shouldRun2(id, category)) return;
1789
+ const concurrency = Math.max(1, options.concurrency ?? 1);
1790
+ const inFlight = /* @__PURE__ */ new Set();
1791
+ async function drainPool() {
1792
+ while (inFlight.size > 0) {
1793
+ await Promise.race(inFlight);
1794
+ }
1795
+ }
1796
+ async function runTestFn(id, name, category, required, specRef, fn) {
1785
1797
  const start = Date.now();
1786
1798
  let lastResult = { passed: false, details: "" };
1787
1799
  for (let attempt = 0; attempt <= retries; attempt++) {
@@ -1809,6 +1821,21 @@ async function runComplianceSuite(target, options = {}) {
1809
1821
  options.onProgress?.(id, lastResult.passed, lastResult.details);
1810
1822
  options.onTestComplete?.(result);
1811
1823
  }
1824
+ async function test(id, name, category, required, specRef, fn) {
1825
+ if (!shouldRun2(id, category)) return;
1826
+ const def = TEST_DEFINITIONS_MAP.get(id);
1827
+ const eligible = concurrency > 1 && def?.parallelSafe === true;
1828
+ if (!eligible) {
1829
+ if (inFlight.size > 0) await drainPool();
1830
+ await runTestFn(id, name, category, required, specRef, fn);
1831
+ return;
1832
+ }
1833
+ while (inFlight.size >= concurrency) await Promise.race(inFlight);
1834
+ const p = runTestFn(id, name, category, required, specRef, fn).finally(() => {
1835
+ inFlight.delete(p);
1836
+ });
1837
+ inFlight.add(p);
1838
+ }
1812
1839
  await test(
1813
1840
  "transport-post",
1814
1841
  "HTTP POST accepted",
@@ -4345,6 +4372,7 @@ async function runComplianceSuite(target, options = {}) {
4345
4372
  const truncated = warnings.length - MAX_WARNINGS;
4346
4373
  warnings.splice(MAX_WARNINGS, truncated, `... and ${truncated} more warning(s) suppressed`);
4347
4374
  }
4375
+ if (inFlight.size > 0) await drainPool();
4348
4376
  const { score, grade, overall, summary, categories } = computeScore(tests);
4349
4377
  const badge = generateBadge(displayUrl);
4350
4378
  return {
@@ -5275,7 +5303,11 @@ program.command("test").description("Run the full compliance test suite against
5275
5303
  "--timeout <ms>",
5276
5304
  "Request timeout in milliseconds (bump to 30000+ for stdio servers with slow startup)",
5277
5305
  "15000"
5278
- ).option("--no-color", "Disable colored output (also honors NO_COLOR env var)").option("--watch", "Re-run tests when files in the cwd change (stdio targets only)").option("--preflight-timeout <ms>", "Preflight connectivity check timeout in milliseconds").option("--retries <n>", "Number of retries for failed tests", "0").option(
5306
+ ).option("--no-color", "Disable colored output (also honors NO_COLOR env var)").option("--watch", "Re-run tests when files in the cwd change (stdio targets only)").option(
5307
+ "--concurrency <n>",
5308
+ "Max parallel-safe tests in flight (default 1; see docs/PERFORMANCE.md before raising)",
5309
+ "1"
5310
+ ).option("--preflight-timeout <ms>", "Preflight connectivity check timeout in milliseconds").option("--retries <n>", "Number of retries for failed tests", "0").option(
5279
5311
  "--only <items>",
5280
5312
  'Only run matching categories or test IDs, comma-separated (e.g., "transport,lifecycle" or "transport-post,lifecycle-init")',
5281
5313
  parseList
@@ -5334,6 +5366,7 @@ Testing ${describeTarget(transportTarget)}...
5334
5366
  timeout: parsePositiveInt(opts.timeout, "--timeout", 1),
5335
5367
  preflightTimeout: opts.preflightTimeout ? parsePositiveInt(opts.preflightTimeout, "--preflight-timeout", 1) : config?.preflightTimeout,
5336
5368
  retries: parsePositiveInt(opts.retries, "--retries"),
5369
+ concurrency: parsePositiveInt(opts.concurrency, "--concurrency", 1),
5337
5370
  only,
5338
5371
  skip,
5339
5372
  onProgress: verbose ? (testId, passed, details) => {
@@ -2,7 +2,7 @@ import {
2
2
  SPEC_BASE,
3
3
  TEST_DEFINITIONS,
4
4
  runComplianceSuite
5
- } from "../chunk-DGGPE3ZM.js";
5
+ } from "../chunk-M67VVIRO.js";
6
6
 
7
7
  // src/mcp/server.ts
8
8
  import { existsSync, readFileSync } from "fs";
package/dist/runner.d.ts CHANGED
@@ -63,6 +63,18 @@ interface TestDefinition {
63
63
  recommendation: string;
64
64
  /** Transports this test applies to. Omit = all transports. */
65
65
  transports?: ("http" | "stdio")[];
66
+ /**
67
+ * Declares this test safe to run concurrently with other parallel-safe
68
+ * tests. Default = false (serialized with other tests in the runner
69
+ * loop). Tests are parallel-safe when they:
70
+ * - don't mutate shared closure state (sessionId, cachedToolsList, …)
71
+ * - don't depend on the result of another concurrently-running test
72
+ * - tolerate the server seeing >1 in-flight request at a time
73
+ *
74
+ * Setup tests (init, notifications/initialized) and tests that
75
+ * populate caches (tools/list, resources/list) must stay `false`.
76
+ */
77
+ parallelSafe?: boolean;
66
78
  }
67
79
  /** Describes the server under test. URL string = HTTP for backwards compat. */
68
80
  type TransportTarget = {
@@ -168,6 +180,14 @@ interface RunOptions {
168
180
  skip?: string[];
169
181
  /** Preflight connectivity check timeout in milliseconds (default: min(timeout, 10000)) */
170
182
  preflightTimeout?: number;
183
+ /**
184
+ * Maximum number of parallel-safe tests in flight at once. Default 1
185
+ * (strictly sequential — matches pre-0.12 behavior). Tests are only
186
+ * eligible for parallel execution when their `TestDefinition.parallelSafe`
187
+ * is true; everything else stays sequential regardless. See
188
+ * docs/PERFORMANCE.md for the design.
189
+ */
190
+ concurrency?: number;
171
191
  }
172
192
  /**
173
193
  * Run the full MCP compliance test suite. Accepts either a URL string
package/dist/runner.js CHANGED
@@ -9,7 +9,7 @@ import {
9
9
  previewTests,
10
10
  runComplianceSuite,
11
11
  urlHash
12
- } from "./chunk-DGGPE3ZM.js";
12
+ } from "./chunk-M67VVIRO.js";
13
13
  export {
14
14
  SPEC_BASE,
15
15
  SPEC_VERSION,
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@yawlabs/mcp-compliance",
3
- "version": "0.11.0",
3
+ "version": "0.12.0",
4
4
  "description": "CLI tool and MCP server that tests MCP servers for spec compliance",
5
5
  "license": "MIT",
6
6
  "author": "Yaw Labs <contact@yaw.sh> (https://yaw.sh)",
@@ -51,6 +51,7 @@
51
51
  "ajv": "^8.18.0",
52
52
  "ajv-formats": "^3.0.1",
53
53
  "tsup": "^8.4.0",
54
+ "tsx": "^4.21.0",
54
55
  "typescript": "^5.8.3",
55
56
  "vitest": "^3.1.1"
56
57
  },