npm - @yawlabs/mcp-compliance - Versions diffs - 0.11.0 → 0.12.0 - Mend

@yawlabs/mcp-compliance 0.11.0 → 0.12.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (6) hide show

package/dist/{chunk-DGGPE3ZM.js → chunk-M67VVIRO.js} +36 -8
package/dist/index.js +42 -9
package/dist/mcp/server.js +1 -1
package/dist/runner.d.ts +20 -0
package/dist/runner.js +1 -1
package/package.json +2 -1

package/dist/{chunk-DGGPE3ZM.js → chunk-M67VVIRO.js} RENAMED Viewed

@@ -600,7 +600,8 @@ var TEST_DEFINITIONS = [
     required: true,
     specRef: "basic/utilities#ping",
     description: "Tests that the server responds to the ping method with an empty result object. This is a required utility method.",
-    recommendation: 'Implement a "ping" method handler that returns an empty result object {}. This is required by the MCP spec for keepalive and connectivity checking.'
+    recommendation: 'Implement a "ping" method handler that returns an empty result object {}. This is required by the MCP spec for keepalive and connectivity checking.',
+    parallelSafe: true
   },
   {
     id: "lifecycle-instructions",
@@ -609,7 +610,8 @@ var TEST_DEFINITIONS = [
     required: false,
     specRef: "basic/lifecycle#initialization",
     description: "If the server includes an instructions field in the initialize response, validates it is a string. Instructions provide guidance for how the client should interact with the server.",
-    recommendation: "If you include an instructions field in the initialize response, ensure it is a string. Remove the field or fix the type if it is not a string."
+    recommendation: "If you include an instructions field in the initialize response, ensure it is a string. Remove the field or fix the type if it is not a string.",
+    parallelSafe: true
   },
   {
     id: "lifecycle-id-match",
@@ -708,7 +710,8 @@ var TEST_DEFINITIONS = [
     required: false,
     specRef: "client/sampling",
     description: "If the server's initialize response or serverInfo implies it uses client-side sampling (sampling/createMessage), verify the capability declaration shape. Currently this is an advisory shape check \u2014 actually exercising the server\u2192client flow requires a client-side sampling handler and is out of scope.",
-    recommendation: "Sampling is a client capability (the client provides LLM access to the server). Servers don't declare sampling in their own capabilities; they just call sampling/createMessage against clients that advertise it. No server-side action required."
+    recommendation: "Sampling is a client capability (the client provides LLM access to the server). Servers don't declare sampling in their own capabilities; they just call sampling/createMessage against clients that advertise it. No server-side action required.",
+    parallelSafe: true
   },
   {
     id: "lifecycle-roots-capability",
@@ -717,7 +720,8 @@ var TEST_DEFINITIONS = [
     required: false,
     specRef: "client/roots",
     description: "Roots (filesystem root paths) is a client capability. This test verifies that if a server sends roots/list requests, it handles gracefully when the client doesn't declare the roots capability (i.e., doesn't crash).",
-    recommendation: "Before calling roots/list, check if the initialized client capabilities include 'roots'. If not, skip the call \u2014 the client can't respond. Never assume roots is available; it's opt-in on the client side."
+    recommendation: "Before calling roots/list, check if the initialized client capabilities include 'roots'. If not, skip the call \u2014 the client can't respond. Never assume roots is available; it's opt-in on the client side.",
+    parallelSafe: true
   },
   {
     id: "lifecycle-elicitation-capability",
@@ -726,7 +730,8 @@ var TEST_DEFINITIONS = [
     required: false,
     specRef: "client/elicitation",
     description: "Elicitation (asking the user for structured input mid-operation) is a client capability added in 2025-11-25. This test verifies servers that use elicitation/create handle the case where clients don't support it.",
-    recommendation: "Before calling elicitation/create, check the initialized client capabilities. If elicitation is absent, fall back to a safer default (ask once up-front via tool parameters, or fail cleanly with a clear error)."
+    recommendation: "Before calling elicitation/create, check the initialized client capabilities. If elicitation is absent, fall back to a safer default (ask once up-front via tool parameters, or fail cleanly with a clear error).",
+    parallelSafe: true
   },
   {
     id: "lifecycle-meta-tolerance",
@@ -735,7 +740,8 @@ var TEST_DEFINITIONS = [
     required: false,
     specRef: "basic/utilities#_meta",
     description: "Sends a ping with params._meta = { extra: 'value' } and verifies the server doesn't error. The 2025-11-25 spec allows arbitrary _meta on any request; servers should ignore unknown _meta fields gracefully.",
-    recommendation: "Treat the _meta field as opaque \u2014 pass it through your request validator, but do not reject requests for unknown _meta keys. The MCP spec reserves _meta for protocol/transport metadata and forward-compat extensibility."
+    recommendation: "Treat the _meta field as opaque \u2014 pass it through your request validator, but do not reject requests for unknown _meta keys. The MCP spec reserves _meta for protocol/transport metadata and forward-compat extensibility.",
+    parallelSafe: true
   },
   // ── Tools (4 tests) ──────────────────────────────────────────────
   {
@@ -1434,8 +1440,14 @@ async function runComplianceSuite(target, options = {}) {
     let resourceNames = [];
     let promptCount = 0;
     let promptNames = [];
-    async function test(id, name, category, required, specRef, fn) {
-      if (!shouldRun2(id, category)) return;
+    const concurrency = Math.max(1, options.concurrency ?? 1);
+    const inFlight = /* @__PURE__ */ new Set();
+    async function drainPool() {
+      while (inFlight.size > 0) {
+        await Promise.race(inFlight);
+      }
+    }
+    async function runTestFn(id, name, category, required, specRef, fn) {
       const start = Date.now();
       let lastResult = { passed: false, details: "" };
       for (let attempt = 0; attempt <= retries; attempt++) {
@@ -1463,6 +1475,21 @@ async function runComplianceSuite(target, options = {}) {
       options.onProgress?.(id, lastResult.passed, lastResult.details);
       options.onTestComplete?.(result);
     }
+    async function test(id, name, category, required, specRef, fn) {
+      if (!shouldRun2(id, category)) return;
+      const def = TEST_DEFINITIONS_MAP.get(id);
+      const eligible = concurrency > 1 && def?.parallelSafe === true;
+      if (!eligible) {
+        if (inFlight.size > 0) await drainPool();
+        await runTestFn(id, name, category, required, specRef, fn);
+        return;
+      }
+      while (inFlight.size >= concurrency) await Promise.race(inFlight);
+      const p = runTestFn(id, name, category, required, specRef, fn).finally(() => {
+        inFlight.delete(p);
+      });
+      inFlight.add(p);
+    }
     await test(
       "transport-post",
       "HTTP POST accepted",
@@ -3999,6 +4026,7 @@ async function runComplianceSuite(target, options = {}) {
       const truncated = warnings.length - MAX_WARNINGS;
       warnings.splice(MAX_WARNINGS, truncated, `... and ${truncated} more warning(s) suppressed`);
     }
+    if (inFlight.size > 0) await drainPool();
     const { score, grade, overall, summary, categories } = computeScore(tests);
     const badge = generateBadge(displayUrl);
     return {

package/dist/index.js CHANGED Viewed

@@ -946,7 +946,8 @@ var TEST_DEFINITIONS = [
     required: true,
     specRef: "basic/utilities#ping",
     description: "Tests that the server responds to the ping method with an empty result object. This is a required utility method.",
-    recommendation: 'Implement a "ping" method handler that returns an empty result object {}. This is required by the MCP spec for keepalive and connectivity checking.'
+    recommendation: 'Implement a "ping" method handler that returns an empty result object {}. This is required by the MCP spec for keepalive and connectivity checking.',
+    parallelSafe: true
   },
   {
     id: "lifecycle-instructions",
@@ -955,7 +956,8 @@ var TEST_DEFINITIONS = [
     required: false,
     specRef: "basic/lifecycle#initialization",
     description: "If the server includes an instructions field in the initialize response, validates it is a string. Instructions provide guidance for how the client should interact with the server.",
-    recommendation: "If you include an instructions field in the initialize response, ensure it is a string. Remove the field or fix the type if it is not a string."
+    recommendation: "If you include an instructions field in the initialize response, ensure it is a string. Remove the field or fix the type if it is not a string.",
+    parallelSafe: true
   },
   {
     id: "lifecycle-id-match",
@@ -1054,7 +1056,8 @@ var TEST_DEFINITIONS = [
     required: false,
     specRef: "client/sampling",
     description: "If the server's initialize response or serverInfo implies it uses client-side sampling (sampling/createMessage), verify the capability declaration shape. Currently this is an advisory shape check \u2014 actually exercising the server\u2192client flow requires a client-side sampling handler and is out of scope.",
-    recommendation: "Sampling is a client capability (the client provides LLM access to the server). Servers don't declare sampling in their own capabilities; they just call sampling/createMessage against clients that advertise it. No server-side action required."
+    recommendation: "Sampling is a client capability (the client provides LLM access to the server). Servers don't declare sampling in their own capabilities; they just call sampling/createMessage against clients that advertise it. No server-side action required.",
+    parallelSafe: true
   },
   {
     id: "lifecycle-roots-capability",
@@ -1063,7 +1066,8 @@ var TEST_DEFINITIONS = [
     required: false,
     specRef: "client/roots",
     description: "Roots (filesystem root paths) is a client capability. This test verifies that if a server sends roots/list requests, it handles gracefully when the client doesn't declare the roots capability (i.e., doesn't crash).",
-    recommendation: "Before calling roots/list, check if the initialized client capabilities include 'roots'. If not, skip the call \u2014 the client can't respond. Never assume roots is available; it's opt-in on the client side."
+    recommendation: "Before calling roots/list, check if the initialized client capabilities include 'roots'. If not, skip the call \u2014 the client can't respond. Never assume roots is available; it's opt-in on the client side.",
+    parallelSafe: true
   },
   {
     id: "lifecycle-elicitation-capability",
@@ -1072,7 +1076,8 @@ var TEST_DEFINITIONS = [
     required: false,
     specRef: "client/elicitation",
     description: "Elicitation (asking the user for structured input mid-operation) is a client capability added in 2025-11-25. This test verifies servers that use elicitation/create handle the case where clients don't support it.",
-    recommendation: "Before calling elicitation/create, check the initialized client capabilities. If elicitation is absent, fall back to a safer default (ask once up-front via tool parameters, or fail cleanly with a clear error)."
+    recommendation: "Before calling elicitation/create, check the initialized client capabilities. If elicitation is absent, fall back to a safer default (ask once up-front via tool parameters, or fail cleanly with a clear error).",
+    parallelSafe: true
   },
   {
     id: "lifecycle-meta-tolerance",
@@ -1081,7 +1086,8 @@ var TEST_DEFINITIONS = [
     required: false,
     specRef: "basic/utilities#_meta",
     description: "Sends a ping with params._meta = { extra: 'value' } and verifies the server doesn't error. The 2025-11-25 spec allows arbitrary _meta on any request; servers should ignore unknown _meta fields gracefully.",
-    recommendation: "Treat the _meta field as opaque \u2014 pass it through your request validator, but do not reject requests for unknown _meta keys. The MCP spec reserves _meta for protocol/transport metadata and forward-compat extensibility."
+    recommendation: "Treat the _meta field as opaque \u2014 pass it through your request validator, but do not reject requests for unknown _meta keys. The MCP spec reserves _meta for protocol/transport metadata and forward-compat extensibility.",
+    parallelSafe: true
   },
   // ── Tools (4 tests) ──────────────────────────────────────────────
   {
@@ -1780,8 +1786,14 @@ async function runComplianceSuite(target, options = {}) {
     let resourceNames = [];
     let promptCount = 0;
     let promptNames = [];
-    async function test(id, name, category, required, specRef, fn) {
-      if (!shouldRun2(id, category)) return;
+    const concurrency = Math.max(1, options.concurrency ?? 1);
+    const inFlight = /* @__PURE__ */ new Set();
+    async function drainPool() {
+      while (inFlight.size > 0) {
+        await Promise.race(inFlight);
+      }
+    }
+    async function runTestFn(id, name, category, required, specRef, fn) {
       const start = Date.now();
       let lastResult = { passed: false, details: "" };
       for (let attempt = 0; attempt <= retries; attempt++) {
@@ -1809,6 +1821,21 @@ async function runComplianceSuite(target, options = {}) {
       options.onProgress?.(id, lastResult.passed, lastResult.details);
       options.onTestComplete?.(result);
     }
+    async function test(id, name, category, required, specRef, fn) {
+      if (!shouldRun2(id, category)) return;
+      const def = TEST_DEFINITIONS_MAP.get(id);
+      const eligible = concurrency > 1 && def?.parallelSafe === true;
+      if (!eligible) {
+        if (inFlight.size > 0) await drainPool();
+        await runTestFn(id, name, category, required, specRef, fn);
+        return;
+      }
+      while (inFlight.size >= concurrency) await Promise.race(inFlight);
+      const p = runTestFn(id, name, category, required, specRef, fn).finally(() => {
+        inFlight.delete(p);
+      });
+      inFlight.add(p);
+    }
     await test(
       "transport-post",
       "HTTP POST accepted",
@@ -4345,6 +4372,7 @@ async function runComplianceSuite(target, options = {}) {
       const truncated = warnings.length - MAX_WARNINGS;
       warnings.splice(MAX_WARNINGS, truncated, `... and ${truncated} more warning(s) suppressed`);
     }
+    if (inFlight.size > 0) await drainPool();
     const { score, grade, overall, summary, categories } = computeScore(tests);
     const badge = generateBadge(displayUrl);
     return {
@@ -5275,7 +5303,11 @@ program.command("test").description("Run the full compliance test suite against
   "--timeout <ms>",
   "Request timeout in milliseconds (bump to 30000+ for stdio servers with slow startup)",
   "15000"
-).option("--no-color", "Disable colored output (also honors NO_COLOR env var)").option("--watch", "Re-run tests when files in the cwd change (stdio targets only)").option("--preflight-timeout <ms>", "Preflight connectivity check timeout in milliseconds").option("--retries <n>", "Number of retries for failed tests", "0").option(
+).option("--no-color", "Disable colored output (also honors NO_COLOR env var)").option("--watch", "Re-run tests when files in the cwd change (stdio targets only)").option(
+  "--concurrency <n>",
+  "Max parallel-safe tests in flight (default 1; see docs/PERFORMANCE.md before raising)",
+  "1"
+).option("--preflight-timeout <ms>", "Preflight connectivity check timeout in milliseconds").option("--retries <n>", "Number of retries for failed tests", "0").option(
   "--only <items>",
   'Only run matching categories or test IDs, comma-separated (e.g., "transport,lifecycle" or "transport-post,lifecycle-init")',
   parseList
@@ -5334,6 +5366,7 @@ Testing ${describeTarget(transportTarget)}...
           timeout: parsePositiveInt(opts.timeout, "--timeout", 1),
           preflightTimeout: opts.preflightTimeout ? parsePositiveInt(opts.preflightTimeout, "--preflight-timeout", 1) : config?.preflightTimeout,
           retries: parsePositiveInt(opts.retries, "--retries"),
+          concurrency: parsePositiveInt(opts.concurrency, "--concurrency", 1),
           only,
           skip,
           onProgress: verbose ? (testId, passed, details) => {

package/dist/mcp/server.js CHANGED Viewed

@@ -2,7 +2,7 @@ import {
   SPEC_BASE,
   TEST_DEFINITIONS,
   runComplianceSuite
-} from "../chunk-DGGPE3ZM.js";
+} from "../chunk-M67VVIRO.js";
 // src/mcp/server.ts
 import { existsSync, readFileSync } from "fs";

package/dist/runner.d.ts CHANGED Viewed

@@ -63,6 +63,18 @@ interface TestDefinition {
     recommendation: string;
     /** Transports this test applies to. Omit = all transports. */
     transports?: ("http" | "stdio")[];
+    /**
+     * Declares this test safe to run concurrently with other parallel-safe
+     * tests. Default = false (serialized with other tests in the runner
+     * loop). Tests are parallel-safe when they:
+     *   - don't mutate shared closure state (sessionId, cachedToolsList, …)
+     *   - don't depend on the result of another concurrently-running test
+     *   - tolerate the server seeing >1 in-flight request at a time
+     *
+     * Setup tests (init, notifications/initialized) and tests that
+     * populate caches (tools/list, resources/list) must stay `false`.
+     */
+    parallelSafe?: boolean;
 }
 /** Describes the server under test. URL string = HTTP for backwards compat. */
 type TransportTarget = {
@@ -168,6 +180,14 @@ interface RunOptions {
     skip?: string[];
     /** Preflight connectivity check timeout in milliseconds (default: min(timeout, 10000)) */
     preflightTimeout?: number;
+    /**
+     * Maximum number of parallel-safe tests in flight at once. Default 1
+     * (strictly sequential — matches pre-0.12 behavior). Tests are only
+     * eligible for parallel execution when their `TestDefinition.parallelSafe`
+     * is true; everything else stays sequential regardless. See
+     * docs/PERFORMANCE.md for the design.
+     */
+    concurrency?: number;
 }
 /**
  * Run the full MCP compliance test suite. Accepts either a URL string

package/dist/runner.js CHANGED Viewed

@@ -9,7 +9,7 @@ import {
   previewTests,
   runComplianceSuite,
   urlHash
-} from "./chunk-DGGPE3ZM.js";
+} from "./chunk-M67VVIRO.js";
 export {
   SPEC_BASE,
   SPEC_VERSION,

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@yawlabs/mcp-compliance",
-  "version": "0.11.0",
+  "version": "0.12.0",
   "description": "CLI tool and MCP server that tests MCP servers for spec compliance",
   "license": "MIT",
   "author": "Yaw Labs <contact@yaw.sh> (https://yaw.sh)",
@@ -51,6 +51,7 @@
     "ajv": "^8.18.0",
     "ajv-formats": "^3.0.1",
     "tsup": "^8.4.0",
+    "tsx": "^4.21.0",
     "typescript": "^5.8.3",
     "vitest": "^3.1.1"
   },