@yawlabs/mcp-compliance 0.9.2 → 0.11.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -5,7 +5,7 @@
5
5
  [![GitHub stars](https://img.shields.io/github/stars/YawLabs/mcp-compliance)](https://github.com/YawLabs/mcp-compliance/stargazers)
6
6
  [![CI](https://github.com/YawLabs/mcp-compliance/actions/workflows/ci.yml/badge.svg)](https://github.com/YawLabs/mcp-compliance/actions/workflows/ci.yml)
7
7
 
8
- **Test any MCP server for spec compliance.** 84-test suite covering transport, lifecycle, tools, resources, prompts, error handling, schema validation, and security against the [MCP specification](https://modelcontextprotocol.io/specification/2025-11-25). Works against **HTTP endpoints** (`https://my-server.com/mcp`) and **stdio servers** (`npx @modelcontextprotocol/server-filesystem /tmp`) alike. CLI, MCP server, and programmatic API.
8
+ **Test any MCP server for spec compliance.** 88-test suite covering transport, lifecycle, tools, resources, prompts, error handling, schema validation, and security against the [MCP specification](https://modelcontextprotocol.io/specification/2025-11-25). Works against **HTTP endpoints** (`https://my-server.com/mcp`) and **stdio servers** (`npx @modelcontextprotocol/server-filesystem /tmp`) alike. CLI, MCP server, and programmatic API.
9
9
 
10
10
  Built and maintained by [Yaw Labs](https://yaw.sh).
11
11
 
@@ -128,6 +128,19 @@ On Windows, `npx` and other `.cmd` shims are handled automatically by spawning t
128
128
 
129
129
  ### CI integration
130
130
 
131
+ **GitHub Action** (drop into any `.github/workflows/*.yml`):
132
+
133
+ ```yaml
134
+ - uses: YawLabs/mcp-compliance@v0
135
+ with:
136
+ target: 'node ./dist/server.js' # or a URL like https://my-server.com/mcp
137
+ format: github # ::error / ::warning annotations on the PR
138
+ strict: 'true' # exit non-zero if any required test fails
139
+ min-grade: 'A' # also exit if grade slips
140
+ ```
141
+
142
+ **Manual CLI invocation:**
143
+
131
144
  ```bash
132
145
  # GitHub Actions: emits ::error / ::warning annotations inline on the PR
133
146
  mcp-compliance test https://my-server.com/mcp --format github --strict
@@ -135,11 +148,30 @@ mcp-compliance test https://my-server.com/mcp --format github --strict
135
148
  # Slack/Linear/PR comment: drop the body straight into a comment
136
149
  mcp-compliance test https://my-server.com/mcp --format markdown > report.md
137
150
 
151
+ # HTML report (self-contained, share anywhere — issue comments, S3, GitHub Pages)
152
+ mcp-compliance test https://my-server.com/mcp --format html > report.html
153
+
138
154
  # Block release if grade slips below B
139
155
  mcp-compliance test https://my-server.com/mcp --min-grade B
140
156
 
141
157
  # Preview which tests will run before connecting (handy for --only/--skip authoring)
142
158
  mcp-compliance test --list --transport stdio --skip security
159
+
160
+ # Diff two runs — exit 1 if anything that was passing is now failing
161
+ mcp-compliance test https://my-server.com/mcp --format json > current.json
162
+ mcp-compliance diff baseline.json current.json
163
+
164
+ # Watch mode for stdio dev loop — re-runs on file changes in cwd
165
+ mcp-compliance test --watch -- node ./dist/server.js
166
+
167
+ # Latency benchmark
168
+ mcp-compliance benchmark -- node ./dist/server.js -r 200 -c 4
169
+ ```
170
+
171
+ **Docker:**
172
+
173
+ ```bash
174
+ docker run --rm ghcr.io/yawlabs/mcp-compliance test https://my-server.com/mcp
143
175
  ```
144
176
 
145
177
  ### Scaffold a config
@@ -447,7 +479,7 @@ Restart your MCP client and approve the server when prompted.
447
479
 
448
480
  ### Tools
449
481
 
450
- - **mcp_compliance_test** — Run the full 84-test suite against a URL or stdio command. Supports auth, custom headers, env vars, timeout, retries, and category/test filtering. Returns grade, score, and detailed results.
482
+ - **mcp_compliance_test** — Run the full 88-test suite against a URL or stdio command. Supports auth, custom headers, env vars, timeout, retries, and category/test filtering. Returns grade, score, and detailed results.
451
483
  - **mcp_compliance_badge** — Get the badge markdown/HTML for a server. Supports auth and custom headers.
452
484
  - **mcp_compliance_explain** — Explain what a specific test ID checks and why it matters.
453
485
 
@@ -468,14 +500,56 @@ const report2 = await runComplianceSuite('https://my-server.com/mcp', {
468
500
  retries: 1,
469
501
  only: ['transport', 'lifecycle'],
470
502
  });
503
+
504
+ // Live progress for streaming UIs (e.g. server-sent-events to a browser)
505
+ await runComplianceSuite('https://my-server.com/mcp', {
506
+ onTestComplete: (result) => {
507
+ // result has the full TestResult: id, name, category, required,
508
+ // passed, details, durationMs, specRef. Push it to your client.
509
+ sendToClient(result);
510
+ },
511
+ });
512
+ ```
513
+
514
+ ## Report schema
515
+
516
+ The JSON output of the test suite is a stable, versioned contract. Every report includes a `schemaVersion` field at the top level. The full JSON Schema lives at [`schemas/report.v1.json`](./schemas/report.v1.json) and is shipped with the npm package.
517
+
518
+ ```jsonc
519
+ {
520
+ "schemaVersion": "1", // bumped on breaking changes to the report shape
521
+ "specVersion": "2025-11-25", // MCP spec version tested against
522
+ "toolVersion": "0.10.0", // mcp-compliance version that produced the report
523
+ "url": "...",
524
+ "timestamp": "...",
525
+ "grade": "A",
526
+ "score": 92.5,
527
+ "tests": [ ... ],
528
+ // ...
529
+ }
471
530
  ```
472
531
 
532
+ Consumer guidance:
533
+
534
+ - Pin against `schemaVersion`. Reject reports with an unknown version rather than guessing at the shape.
535
+ - The schema validates with any Draft 2020-12 validator (e.g. `ajv`).
536
+ - Within a major version, additions are non-breaking. Renames, removals, or type changes bump the version.
537
+ - Two runs against the same server produce equivalent grade, score, and per-test pass/fail (modulo timings/timestamps).
538
+
473
539
  ## Specification
474
540
 
475
541
  The compliance testing methodology is published as an open specification:
476
542
 
477
- - **[MCP Compliance Testing Specification](./MCP_COMPLIANCE_SPEC.md)** — test execution model, scoring algorithm, all 84 test rules with pass/fail criteria (CC BY 4.0)
543
+ - **[MCP Compliance Testing Specification](./MCP_COMPLIANCE_SPEC.md)** — test execution model, scoring algorithm, all 88 test rules with pass/fail criteria (CC BY 4.0)
478
544
  - **[Machine-readable rule catalog](./mcp-compliance-rules.json)** — JSON Schema-compliant catalog for programmatic consumption
545
+ - **[Why `mcp-compliance`](./docs/WHY.md)** — the problem, existing alternatives, what this tool does differently
546
+ - **[Fixing common failures](./docs/FIXES.md)** — recipes for the most frequent test failures with code snippets
547
+ - **[Spec version migration policy](./docs/SPEC_VERSION_MIGRATION.md)** — how this tool evolves with MCP spec releases
548
+ - **[mcp.hosting external API](./docs/EXT_API.md)** — public submit/retrieve/badge/delete endpoints used by `mcp-compliance badge` and any custom integrations
549
+ - **[Enterprise tier (draft)](./docs/ENTERPRISE.md)** — paid tier structure for organizations with scheduled/private/audit-track compliance needs
550
+ - **[Performance deep-dive](./docs/PERFORMANCE.md)** — why the suite is sequential and what parallel execution would cost
551
+ - **[Spec PR drafts](./docs/spec-prs/)** — our proposed MCP spec clarifications for ambiguous cases we've hit
552
+ - **[mcp.hosting integration spec](./docs/mcp-hosting-integration.md)** — the contract between this engine and the mcp.hosting platform: URL surfaces, data flow, storage model, badge API, leaderboard, router integration
479
553
 
480
554
  These are complementary to (not competing with) the [official MCP specification](https://modelcontextprotocol.io/specification/2025-11-25). The MCP spec defines what servers must do; this spec defines how to verify compliance.
481
555
 
@@ -5,7 +5,7 @@ import { request as request2 } from "undici";
5
5
  // src/badge.ts
6
6
  import { createHash } from "crypto";
7
7
  function urlHash(url) {
8
- return createHash("sha256").update(url).digest("hex").slice(0, 12);
8
+ return createHash("sha256").update(url).digest("hex").slice(0, 24);
9
9
  }
10
10
  function generateBadge(url) {
11
11
  const hash = urlHash(url);
@@ -209,9 +209,21 @@ function createStdioTransport(opts) {
209
209
  let exited = false;
210
210
  let exitCode = null;
211
211
  let spawnError = null;
212
+ let spawned = false;
212
213
  const pending = /* @__PURE__ */ new Map();
213
214
  let stdoutBuffer = "";
214
215
  let stderrBuffer = "";
216
+ const spawnReady = new Promise((resolve, reject) => {
217
+ child.once("spawn", () => {
218
+ spawned = true;
219
+ resolve();
220
+ });
221
+ child.once("error", (err) => {
222
+ if (!spawned) reject(err);
223
+ });
224
+ });
225
+ spawnReady.catch(() => {
226
+ });
215
227
  child.on("error", (err) => {
216
228
  spawnError = err;
217
229
  rejectAllPending(err);
@@ -281,6 +293,15 @@ function createStdioTransport(opts) {
281
293
  ${snippet.replace(/\n/g, "\n ")}`;
282
294
  }
283
295
  async function writeLine(line) {
296
+ if (!spawned && !spawnError) {
297
+ try {
298
+ await spawnReady;
299
+ } catch (err) {
300
+ throw new Error(
301
+ annotateWithStderr(`stdio transport: spawn failed \u2014 ${err instanceof Error ? err.message : String(err)}`)
302
+ );
303
+ }
304
+ }
284
305
  if (exited) {
285
306
  throw new Error(annotateWithStderr(`stdio transport: child has exited (code ${exitCode})`));
286
307
  }
@@ -375,6 +396,7 @@ function createStdioTransport(opts) {
375
396
  }
376
397
 
377
398
  // src/types.ts
399
+ var REPORT_SCHEMA_VERSION = "1";
378
400
  var TEST_DEFINITIONS = [
379
401
  // ── Transport (13 tests) ─────────────────────────────────────────
380
402
  {
@@ -679,6 +701,42 @@ var TEST_DEFINITIONS = [
679
701
  description: "Sends a tools/call request with _meta.progressToken and checks if the server sends progress notifications via SSE. Progress support is optional but recommended for long-running operations.",
680
702
  recommendation: "When a request includes _meta.progressToken, send notifications/progress events via SSE to report progress. Include progressToken, progress (current), and optionally total fields."
681
703
  },
704
+ {
705
+ id: "lifecycle-sampling-capability",
706
+ name: "Sampling capability shape",
707
+ category: "lifecycle",
708
+ required: false,
709
+ specRef: "client/sampling",
710
+ description: "If the server's initialize response or serverInfo implies it uses client-side sampling (sampling/createMessage), verify the capability declaration shape. Currently this is an advisory shape check \u2014 actually exercising the server\u2192client flow requires a client-side sampling handler and is out of scope.",
711
+ recommendation: "Sampling is a client capability (the client provides LLM access to the server). Servers don't declare sampling in their own capabilities; they just call sampling/createMessage against clients that advertise it. No server-side action required."
712
+ },
713
+ {
714
+ id: "lifecycle-roots-capability",
715
+ name: "Roots capability shape",
716
+ category: "lifecycle",
717
+ required: false,
718
+ specRef: "client/roots",
719
+ description: "Roots (filesystem root paths) is a client capability. This test verifies that if a server sends roots/list requests, it handles gracefully when the client doesn't declare the roots capability (i.e., doesn't crash).",
720
+ recommendation: "Before calling roots/list, check if the initialized client capabilities include 'roots'. If not, skip the call \u2014 the client can't respond. Never assume roots is available; it's opt-in on the client side."
721
+ },
722
+ {
723
+ id: "lifecycle-elicitation-capability",
724
+ name: "Elicitation capability shape",
725
+ category: "lifecycle",
726
+ required: false,
727
+ specRef: "client/elicitation",
728
+ description: "Elicitation (asking the user for structured input mid-operation) is a client capability added in 2025-11-25. This test verifies servers that use elicitation/create handle the case where clients don't support it.",
729
+ recommendation: "Before calling elicitation/create, check the initialized client capabilities. If elicitation is absent, fall back to a safer default (ask once up-front via tool parameters, or fail cleanly with a clear error)."
730
+ },
731
+ {
732
+ id: "lifecycle-meta-tolerance",
733
+ name: "Tolerates _meta field on requests",
734
+ category: "lifecycle",
735
+ required: false,
736
+ specRef: "basic/utilities#_meta",
737
+ description: "Sends a ping with params._meta = { extra: 'value' } and verifies the server doesn't error. The 2025-11-25 spec allows arbitrary _meta on any request; servers should ignore unknown _meta fields gracefully.",
738
+ recommendation: "Treat the _meta field as opaque \u2014 pass it through your request validator, but do not reject requests for unknown _meta keys. The MCP spec reserves _meta for protocol/transport metadata and forward-compat extensibility."
739
+ },
682
740
  // ── Tools (4 tests) ──────────────────────────────────────────────
683
741
  {
684
742
  id: "tools-list",
@@ -1391,7 +1449,7 @@ async function runComplianceSuite(target, options = {}) {
1391
1449
  if (attempt < retries) await new Promise((r) => setTimeout(r, 1e3 * (attempt + 1)));
1392
1450
  }
1393
1451
  }
1394
- tests.push({
1452
+ const result = {
1395
1453
  id,
1396
1454
  name,
1397
1455
  category,
@@ -1400,8 +1458,10 @@ async function runComplianceSuite(target, options = {}) {
1400
1458
  details: lastResult.details,
1401
1459
  durationMs: Date.now() - start,
1402
1460
  specRef: `${SPEC_BASE}/${specRef}`
1403
- });
1461
+ };
1462
+ tests.push(result);
1404
1463
  options.onProgress?.(id, lastResult.passed, lastResult.details);
1464
+ options.onTestComplete?.(result);
1405
1465
  }
1406
1466
  await test(
1407
1467
  "transport-post",
@@ -1571,7 +1631,11 @@ async function runComplianceSuite(target, options = {}) {
1571
1631
  try {
1572
1632
  initRes = await rpc("initialize", {
1573
1633
  protocolVersion: SPEC_VERSION,
1574
- capabilities: {},
1634
+ capabilities: {
1635
+ sampling: {},
1636
+ roots: { listChanged: true },
1637
+ elicitation: {}
1638
+ },
1575
1639
  clientInfo: { name: "mcp-compliance", version: TOOL_VERSION }
1576
1640
  });
1577
1641
  const result = initRes?.body?.result;
@@ -1993,6 +2057,69 @@ async function runComplianceSuite(target, options = {}) {
1993
2057
  }
1994
2058
  }
1995
2059
  );
2060
+ await test(
2061
+ "lifecycle-sampling-capability",
2062
+ "Sampling capability shape",
2063
+ "lifecycle",
2064
+ false,
2065
+ "client/sampling",
2066
+ async () => {
2067
+ if (!initRes || initRes.body?.error) {
2068
+ return { passed: false, details: "Server rejected initialize" };
2069
+ }
2070
+ return {
2071
+ passed: true,
2072
+ details: "Server accepted initialize with client sampling capability. Full server\u2192client sampling flow not exercised."
2073
+ };
2074
+ }
2075
+ );
2076
+ await test("lifecycle-roots-capability", "Roots capability shape", "lifecycle", false, "client/roots", async () => {
2077
+ if (!initRes || initRes.body?.error) {
2078
+ return { passed: false, details: "Server rejected initialize" };
2079
+ }
2080
+ return {
2081
+ passed: true,
2082
+ details: "Server accepted initialize. Full server\u2192client roots/list flow not exercised (requires a roots-aware client)."
2083
+ };
2084
+ });
2085
+ await test(
2086
+ "lifecycle-elicitation-capability",
2087
+ "Elicitation capability shape",
2088
+ "lifecycle",
2089
+ false,
2090
+ "client/elicitation",
2091
+ async () => {
2092
+ if (!initRes || initRes.body?.error) {
2093
+ return { passed: false, details: "Server rejected initialize" };
2094
+ }
2095
+ return {
2096
+ passed: true,
2097
+ details: "Server accepted initialize. Full server\u2192client elicitation/create flow not exercised."
2098
+ };
2099
+ }
2100
+ );
2101
+ await test(
2102
+ "lifecycle-meta-tolerance",
2103
+ "Tolerates _meta field on requests",
2104
+ "lifecycle",
2105
+ false,
2106
+ "basic/utilities#_meta",
2107
+ async () => {
2108
+ try {
2109
+ const res = await rpc("ping", { _meta: { "mcp-compliance/probe": "1" } });
2110
+ const body = res.body;
2111
+ if (body.error) {
2112
+ return {
2113
+ passed: false,
2114
+ details: `Server rejected _meta on ping (code ${body.error.code}). _meta should be ignored, not error.`
2115
+ };
2116
+ }
2117
+ return { passed: true, details: "Server accepted ping with arbitrary _meta field" };
2118
+ } catch (err) {
2119
+ return { passed: false, details: `Error: ${err instanceof Error ? err.message : String(err)}` };
2120
+ }
2121
+ }
2122
+ );
1996
2123
  await test(
1997
2124
  "transport-content-type-init",
1998
2125
  "Initialize response has valid content type",
@@ -3875,6 +4002,7 @@ async function runComplianceSuite(target, options = {}) {
3875
4002
  const { score, grade, overall, summary, categories } = computeScore(tests);
3876
4003
  const badge = generateBadge(displayUrl);
3877
4004
  return {
4005
+ schemaVersion: REPORT_SCHEMA_VERSION,
3878
4006
  specVersion: SPEC_VERSION,
3879
4007
  toolVersion: TOOL_VERSION,
3880
4008
  url: displayUrl,
@@ -3902,6 +4030,7 @@ async function runComplianceSuite(target, options = {}) {
3902
4030
  }
3903
4031
 
3904
4032
  export {
4033
+ urlHash,
3905
4034
  generateBadge,
3906
4035
  computeGrade,
3907
4036
  computeScore,