@yawlabs/mcp-compliance 0.12.2 → 0.13.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -15,13 +15,13 @@ MCP servers are multiplying fast — but most ship without compliance testing. B
15
15
 
16
16
  This tool solves that:
17
17
 
18
- - **84 tests across 8 categories** — transport, lifecycle, tools, resources, prompts, error handling, schema validation, and security. No gaps. (HTTP runs all 81 transport-applicable tests; stdio runs ~70 — HTTP-specific tests like CORS, TLS, session headers, and rate limiting are gated out.)
18
+ - **88 tests across 8 categories** — transport, lifecycle, tools, resources, prompts, error handling, schema validation, and security. No gaps. (HTTP runs all 85 transport-applicable tests; stdio runs ~75 — HTTP-specific tests like CORS, TLS, session headers, and rate limiting are gated out.)
19
19
  - **Capability-driven** — tests adapt to what the server declares. If it says it supports tools, tool tests become required. No false failures for features the server doesn't claim.
20
20
  - **Graded scoring** — A-F letter grade with a weighted score (required tests 70%, optional 30%). One number to communicate compliance.
21
21
  - **CI-ready** — `--strict` mode exits with code 1 on required test failures. Drop it into any pipeline.
22
22
  - **Spec-referenced** — every test links to the exact section of the MCP specification it validates. No ambiguity about what's being tested or why.
23
23
  - **Three interfaces** — CLI for humans, MCP server for AI assistants, programmatic API for integration.
24
- - **Published specification** — the [testing methodology](./MCP_COMPLIANCE_SPEC.md) and [rule catalog](./mcp-compliance-rules.json) are open (CC BY 4.0) so anyone can implement compatible tooling.
24
+ - **Published methodology** — the [testing methodology](./COMPLIANCE_RUBRIC.md) and [rule catalog](./mcp-compliance-rules.json) are open (CC BY 4.0) so anyone can build compatible tooling or fork the rules.
25
25
 
26
26
  ## Quick start
27
27
 
@@ -124,6 +124,7 @@ On Windows, `npx` and other `.cmd` shims are handled automatically by spawning t
124
124
  | `--retries <n>` | both | Number of retries for failed tests (default: `0`) |
125
125
  | `--only <items>` | both | Only run tests matching these categories or test IDs (comma-separated) |
126
126
  | `--skip <items>` | both | Skip tests matching these categories or test IDs (comma-separated) |
127
+ | `--concurrency <n>` | both | Max parallel-safe tests in flight (default: `1`; raising reduces wall time but can perturb timing-sensitive servers) |
127
128
  | `--verbose` | both | Print each test result as it runs (also forwards stdio stderr) |
128
129
 
129
130
  ### CI integration
@@ -258,11 +259,12 @@ Then embed it in your README:
258
259
 
259
260
  The `test` command never publishes — use it for CI, debugging, and local iteration. `badge` is the only command that publishes to mcp.hosting.
260
261
 
261
- ## What the 84 tests check
262
+ ## What the 88 tests check
262
263
 
263
264
  <details>
264
- <summary><strong>Transport (13 tests)</strong></summary>
265
+ <summary><strong>Transport (16 tests)</strong></summary>
265
266
 
267
+ HTTP-only (13):
266
268
  - **transport-post** — Server accepts HTTP POST requests (required)
267
269
  - **transport-content-type** — Responds with application/json or text/event-stream (required)
268
270
  - **transport-notification-202** — Notifications return exactly 202 Accepted
@@ -277,10 +279,15 @@ The `test` command never publishes — use it for CI, debugging, and local itera
277
279
  - **transport-concurrent** — Handles concurrent requests
278
280
  - **transport-sse-event-field** — SSE responses include required event: message field
279
281
 
282
+ stdio-only (3):
283
+ - **stdio-framing** — Newline-delimited JSON framing (required)
284
+ - **stdio-unicode** — UTF-8 unicode roundtrip preserves non-ASCII payloads
285
+ - **stdio-unknown-method-recovers** — Returns -32601 for unknown methods and keeps serving
286
+
280
287
  </details>
281
288
 
282
289
  <details>
283
- <summary><strong>Lifecycle (17 tests)</strong></summary>
290
+ <summary><strong>Lifecycle (21 tests)</strong></summary>
284
291
 
285
292
  - **lifecycle-init** — Initialize handshake succeeds (required)
286
293
  - **lifecycle-proto-version** — Returns valid YYYY-MM-DD protocol version (required)
@@ -299,6 +306,10 @@ The `test` command never publishes — use it for CI, debugging, and local itera
299
306
  - **lifecycle-progress** — Handles progress notifications gracefully
300
307
  - **lifecycle-list-changed** — Accepts listChanged notifications for declared capabilities
301
308
  - **lifecycle-progress-token** — Supports progress tokens in requests via SSE
309
+ - **lifecycle-sampling-capability** — Advisory check for server-side use of the client sampling capability
310
+ - **lifecycle-roots-capability** — Advisory check for server-side use of the client roots capability
311
+ - **lifecycle-elicitation-capability** — Advisory check for the 2025-11-25 client elicitation capability
312
+ - **lifecycle-meta-tolerance** — Server ignores unknown `_meta` fields on incoming requests
302
313
 
303
314
  </details>
304
315
 
@@ -399,7 +410,7 @@ The `test` command never publishes — use it for CI, debugging, and local itera
399
410
  | D | 40-59 |
400
411
  | F | 0-39 |
401
412
 
402
- Required tests are worth 70% of the score, optional tests 30%. See the [full scoring algorithm](./MCP_COMPLIANCE_SPEC.md#2-scoring-algorithm) in the specification.
413
+ Required tests are worth 70% of the score, optional tests 30%. See the [full scoring algorithm](./COMPLIANCE_RUBRIC.md#2-scoring-algorithm) in the methodology doc.
403
414
 
404
415
  ## CI integration
405
416
 
@@ -536,11 +547,11 @@ Consumer guidance:
536
547
  - Within a major version, additions are non-breaking. Renames, removals, or type changes bump the version.
537
548
  - Two runs against the same server produce equivalent grade, score, and per-test pass/fail (modulo timings/timestamps).
538
549
 
539
- ## Specification
550
+ ## Methodology & docs
540
551
 
541
- The compliance testing methodology is published as an open specification:
552
+ The testing methodology is published openly so the grading is auditable:
542
553
 
543
- - **[MCP Compliance Testing Specification](./MCP_COMPLIANCE_SPEC.md)** — test execution model, scoring algorithm, all 88 test rules with pass/fail criteria (CC BY 4.0)
554
+ - **[Testing methodology](./COMPLIANCE_RUBRIC.md)** — test execution model, scoring algorithm, all 88 test rules with pass/fail criteria (CC BY 4.0)
544
555
  - **[Machine-readable rule catalog](./mcp-compliance-rules.json)** — JSON Schema-compliant catalog for programmatic consumption
545
556
  - **[Why `mcp-compliance`](./docs/WHY.md)** — the problem, existing alternatives, what this tool does differently
546
557
  - **[Fixing common failures](./docs/FIXES.md)** — recipes for the most frequent test failures with code snippets
@@ -551,7 +562,7 @@ The compliance testing methodology is published as an open specification:
551
562
  - **[Spec PR drafts](./docs/spec-prs/)** — our proposed MCP spec clarifications for ambiguous cases we've hit
552
563
  - **[mcp.hosting integration spec](./docs/mcp-hosting-integration.md)** — the contract between this engine and the mcp.hosting platform: URL surfaces, data flow, storage model, badge API, leaderboard, router integration
553
564
 
554
- These are complementary to (not competing with) the [official MCP specification](https://modelcontextprotocol.io/specification/2025-11-25). The MCP spec defines what servers must do; this spec defines how to verify compliance.
565
+ The methodology is not an authoritative conformance standard — it's one tool's choices, published so they can be inspected, adopted, or forked. The [official MCP specification](https://modelcontextprotocol.io/specification/2025-11-25) defines what servers must do; this document describes how `@yawlabs/mcp-compliance` verifies it.
555
566
 
556
567
  ## Requirements
557
568
 
@@ -583,7 +594,7 @@ npm test
583
594
 
584
595
  - [mcp.hosting](https://mcp.hosting) — Hosted MCP server infrastructure
585
596
  - [MCP Specification](https://modelcontextprotocol.io/specification/2025-11-25)
586
- - [MCP Compliance Testing Spec](./MCP_COMPLIANCE_SPEC.md)
597
+ - [Testing methodology](./COMPLIANCE_RUBRIC.md)
587
598
  - [Yaw Labs](https://yaw.sh)
588
599
 
589
600
  ## License
@@ -63,7 +63,7 @@ import { request } from "undici";
63
63
 
64
64
  // src/sse.ts
65
65
  function parseSSEResponse(text) {
66
- const lines = text.split("\n");
66
+ const lines = text.split(/\r?\n/);
67
67
  let firstJsonRpcResponse = null;
68
68
  let currentData = [];
69
69
  function flushEvent() {
@@ -106,7 +106,8 @@ function createHttpTransport(opts) {
106
106
  function normalizeHeaders(raw) {
107
107
  const out = {};
108
108
  for (const [k, v] of Object.entries(raw)) {
109
- if (typeof v === "string") out[k] = v;
109
+ if (v === void 0) continue;
110
+ out[k] = Array.isArray(v) ? v.join(", ") : v;
110
111
  }
111
112
  return out;
112
113
  }
@@ -250,6 +251,11 @@ function createStdioTransport(opts) {
250
251
  handleLine(line);
251
252
  }
252
253
  if (stdoutBuffer.length > stdoutBufferSize) {
254
+ stderrBuffer += `[mcp-compliance] stdout buffer exceeded ${stdoutBufferSize} bytes without a newline; discarding buffered data
255
+ `;
256
+ if (stderrBuffer.length > stderrBufferSize) {
257
+ stderrBuffer = stderrBuffer.slice(stderrBuffer.length - stderrBufferSize);
258
+ }
253
259
  stdoutBuffer = "";
254
260
  }
255
261
  });
@@ -402,7 +408,7 @@ function createStdioTransport(opts) {
402
408
  // src/types.ts
403
409
  var REPORT_SCHEMA_VERSION = "1";
404
410
  var TEST_DEFINITIONS = [
405
- // ── Transport (13 tests) ─────────────────────────────────────────
411
+ // ── Transport (16 tests: 13 HTTP + 3 stdio) ──────────────────────
406
412
  {
407
413
  id: "transport-post",
408
414
  name: "HTTP POST accepted",
@@ -551,7 +557,7 @@ var TEST_DEFINITIONS = [
551
557
  recommendation: "Return JSON-RPC error -32601 (Method not found) for unknown methods. Do not exit the process or disconnect \u2014 the client should be able to keep using the session after an error.",
552
558
  transports: ["stdio"]
553
559
  },
554
- // ── Lifecycle (17 tests) ─────────────────────────────────────────
560
+ // ── Lifecycle (21 tests) ─────────────────────────────────────────
555
561
  {
556
562
  id: "lifecycle-init",
557
563
  name: "Initialize handshake",
@@ -1249,17 +1255,13 @@ var STACK_TRACE_PATTERNS = [
1249
1255
  // PHP
1250
1256
  /panicked\s+at\s+'/i,
1251
1257
  // Rust
1252
- /ENOENT|EACCES|EPERM/,
1253
- // Node.js system errors
1254
1258
  /node_modules\//,
1255
- // Node.js module paths
1256
- /\/usr\/local\/|\/home\//,
1257
- // Unix paths
1258
- /[A-Z]:\\.*\\/,
1259
- // Windows paths
1260
- /password|passwd|secret|credential/i,
1261
- // Sensitive terms
1262
- /jdbc:|mysql:|postgres:|mongodb:/i
1259
+ // Node.js module paths (filesystem layout leak)
1260
+ /\/usr\/local\/|\/home\/|\/root\//,
1261
+ // Unix absolute paths
1262
+ /[A-Z]:\\[\w\s.-]+\\[\w\s.-]+/,
1263
+ // Windows absolute paths (drive + 2+ segments)
1264
+ /jdbc:|mysql:\/\/|postgres(?:ql)?:\/\/|mongodb(?:\+srv)?:\/\//i
1263
1265
  // DB connection strings
1264
1266
  ];
1265
1267
  var INTERNAL_IP_PATTERNS = [
@@ -1278,6 +1280,20 @@ function createIdCounter(start = 0) {
1278
1280
  let id = start;
1279
1281
  return () => ++id;
1280
1282
  }
1283
+ function dedupAndCapWarnings(warnings, max) {
1284
+ const seen = /* @__PURE__ */ new Set();
1285
+ const deduped = [];
1286
+ for (const w of warnings) {
1287
+ if (seen.has(w)) continue;
1288
+ seen.add(w);
1289
+ deduped.push(w);
1290
+ }
1291
+ if (deduped.length > max) {
1292
+ const truncated = deduped.length - max;
1293
+ return [...deduped.slice(0, max), `... and ${truncated} more warning(s) suppressed`];
1294
+ }
1295
+ return deduped;
1296
+ }
1281
1297
  var STDIO_INCOMPATIBLE_IDS = /* @__PURE__ */ new Set([
1282
1298
  // Lifecycle tests that use raw undici for HTTP-specific checks
1283
1299
  "lifecycle-string-id",
@@ -4030,12 +4046,11 @@ async function runComplianceSuite(target, options = {}) {
4030
4046
  return { passed: true, details: "Unknown method returned JSON-RPC error; subsequent ping succeeded" };
4031
4047
  }
4032
4048
  );
4033
- const MAX_WARNINGS = 100;
4034
- if (warnings.length > MAX_WARNINGS) {
4035
- const truncated = warnings.length - MAX_WARNINGS;
4036
- warnings.splice(MAX_WARNINGS, truncated, `... and ${truncated} more warning(s) suppressed`);
4037
- }
4038
4049
  if (inFlight.size > 0) await drainPool();
4050
+ const MAX_WARNINGS = 50;
4051
+ const capped = dedupAndCapWarnings(warnings, MAX_WARNINGS);
4052
+ warnings.length = 0;
4053
+ warnings.push(...capped);
4039
4054
  const { score, grade, overall, summary, categories } = computeScore(tests);
4040
4055
  const badge = generateBadge(displayUrl);
4041
4056
  return {
@@ -4075,6 +4090,7 @@ export {
4075
4090
  TEST_DEFINITIONS,
4076
4091
  SPEC_VERSION,
4077
4092
  SPEC_BASE,
4093
+ dedupAndCapWarnings,
4078
4094
  previewTests,
4079
4095
  runComplianceSuite
4080
4096
  };
package/dist/index.js CHANGED
@@ -16,6 +16,9 @@ var GRADE_COLORS = {
16
16
  F: "#e05d44"
17
17
  };
18
18
  var UNTESTED_COLOR = "#9f9f9f";
19
+ function escXml(s) {
20
+ return s.replace(/&/g, "&amp;").replace(/</g, "&lt;").replace(/>/g, "&gt;").replace(/"/g, "&quot;").replace(/'/g, "&apos;");
21
+ }
19
22
  function renderBadgeSvg(input) {
20
23
  let gradeLabel = "unknown";
21
24
  let color = UNTESTED_COLOR;
@@ -27,14 +30,16 @@ function renderBadgeSvg(input) {
27
30
  title = `MCP Compliant: Grade ${input.grade}${input.score != null ? ` (${input.score}%)` : ""} - tested ${date}`;
28
31
  }
29
32
  const leftText = "MCP Compliant";
30
- const rightText = gradeLabel;
33
+ const rightText = escXml(gradeLabel);
34
+ const ariaLabel = `${leftText}: ${escXml(gradeLabel)}`;
35
+ const titleEsc = escXml(title);
31
36
  const leftWidth = 95;
32
37
  const rightWidth = 40;
33
38
  const totalWidth = leftWidth + rightWidth;
34
39
  const leftX = leftWidth / 2;
35
40
  const rightX = leftWidth + rightWidth / 2;
36
- return `<svg xmlns="http://www.w3.org/2000/svg" width="${totalWidth}" height="20" role="img" aria-label="${leftText}: ${rightText}">
37
- <title>${title}</title>
41
+ return `<svg xmlns="http://www.w3.org/2000/svg" width="${totalWidth}" height="20" role="img" aria-label="${ariaLabel}">
42
+ <title>${titleEsc}</title>
38
43
  <linearGradient id="s" x2="0" y2="100%">
39
44
  <stop offset="0" stop-color="#bbb" stop-opacity=".1"/>
40
45
  <stop offset="1" stop-opacity=".1"/>
@@ -64,7 +69,7 @@ import { request } from "undici";
64
69
 
65
70
  // src/sse.ts
66
71
  function parseSSEResponse(text) {
67
- const lines = text.split("\n");
72
+ const lines = text.split(/\r?\n/);
68
73
  let firstJsonRpcResponse = null;
69
74
  let currentData = [];
70
75
  function flushEvent() {
@@ -107,7 +112,8 @@ function createHttpTransport(opts) {
107
112
  function normalizeHeaders(raw) {
108
113
  const out = {};
109
114
  for (const [k, v] of Object.entries(raw)) {
110
- if (typeof v === "string") out[k] = v;
115
+ if (v === void 0) continue;
116
+ out[k] = Array.isArray(v) ? v.join(", ") : v;
111
117
  }
112
118
  return out;
113
119
  }
@@ -251,6 +257,11 @@ function createStdioTransport(opts) {
251
257
  handleLine(line);
252
258
  }
253
259
  if (stdoutBuffer.length > stdoutBufferSize) {
260
+ stderrBuffer += `[mcp-compliance] stdout buffer exceeded ${stdoutBufferSize} bytes without a newline; discarding buffered data
261
+ `;
262
+ if (stderrBuffer.length > stderrBufferSize) {
263
+ stderrBuffer = stderrBuffer.slice(stderrBuffer.length - stderrBufferSize);
264
+ }
254
265
  stdoutBuffer = "";
255
266
  }
256
267
  });
@@ -579,6 +590,11 @@ function validateTarget(t, source) {
579
590
 
580
591
  // src/diff.ts
581
592
  function diffReports(baseline, current) {
593
+ if (baseline.specVersion && current.specVersion && baseline.specVersion !== current.specVersion) {
594
+ throw new Error(
595
+ `Spec version mismatch: baseline is ${baseline.specVersion}, current is ${current.specVersion}. Re-run the baseline with this tool version (or downgrade the tool to match) before diffing.`
596
+ );
597
+ }
582
598
  const baseById = new Map(baseline.tests.map((t) => [t.id, t]));
583
599
  const curById = new Map(current.tests.map((t) => [t.id, t]));
584
600
  const regressions = [];
@@ -676,7 +692,7 @@ function hasRegressions(summary) {
676
692
  }
677
693
 
678
694
  // src/mcp/server.ts
679
- import { existsSync as existsSync2, readFileSync as readFileSync2 } from "fs";
695
+ import { existsSync as existsSync2, readFileSync as readFileSync2, realpathSync } from "fs";
680
696
  import { dirname, join as join2, resolve as resolve2 } from "path";
681
697
  import { fileURLToPath } from "url";
682
698
  import { McpServer } from "@modelcontextprotocol/sdk/server/mcp.js";
@@ -748,7 +764,7 @@ function computeScore(tests) {
748
764
  // src/types.ts
749
765
  var REPORT_SCHEMA_VERSION = "1";
750
766
  var TEST_DEFINITIONS = [
751
- // ── Transport (13 tests) ─────────────────────────────────────────
767
+ // ── Transport (16 tests: 13 HTTP + 3 stdio) ──────────────────────
752
768
  {
753
769
  id: "transport-post",
754
770
  name: "HTTP POST accepted",
@@ -897,7 +913,7 @@ var TEST_DEFINITIONS = [
897
913
  recommendation: "Return JSON-RPC error -32601 (Method not found) for unknown methods. Do not exit the process or disconnect \u2014 the client should be able to keep using the session after an error.",
898
914
  transports: ["stdio"]
899
915
  },
900
- // ── Lifecycle (17 tests) ─────────────────────────────────────────
916
+ // ── Lifecycle (21 tests) ─────────────────────────────────────────
901
917
  {
902
918
  id: "lifecycle-init",
903
919
  name: "Initialize handshake",
@@ -1595,17 +1611,13 @@ var STACK_TRACE_PATTERNS = [
1595
1611
  // PHP
1596
1612
  /panicked\s+at\s+'/i,
1597
1613
  // Rust
1598
- /ENOENT|EACCES|EPERM/,
1599
- // Node.js system errors
1600
1614
  /node_modules\//,
1601
- // Node.js module paths
1602
- /\/usr\/local\/|\/home\//,
1603
- // Unix paths
1604
- /[A-Z]:\\.*\\/,
1605
- // Windows paths
1606
- /password|passwd|secret|credential/i,
1607
- // Sensitive terms
1608
- /jdbc:|mysql:|postgres:|mongodb:/i
1615
+ // Node.js module paths (filesystem layout leak)
1616
+ /\/usr\/local\/|\/home\/|\/root\//,
1617
+ // Unix absolute paths
1618
+ /[A-Z]:\\[\w\s.-]+\\[\w\s.-]+/,
1619
+ // Windows absolute paths (drive + 2+ segments)
1620
+ /jdbc:|mysql:\/\/|postgres(?:ql)?:\/\/|mongodb(?:\+srv)?:\/\//i
1609
1621
  // DB connection strings
1610
1622
  ];
1611
1623
  var INTERNAL_IP_PATTERNS = [
@@ -1624,6 +1636,20 @@ function createIdCounter(start = 0) {
1624
1636
  let id = start;
1625
1637
  return () => ++id;
1626
1638
  }
1639
+ function dedupAndCapWarnings(warnings, max) {
1640
+ const seen = /* @__PURE__ */ new Set();
1641
+ const deduped = [];
1642
+ for (const w of warnings) {
1643
+ if (seen.has(w)) continue;
1644
+ seen.add(w);
1645
+ deduped.push(w);
1646
+ }
1647
+ if (deduped.length > max) {
1648
+ const truncated = deduped.length - max;
1649
+ return [...deduped.slice(0, max), `... and ${truncated} more warning(s) suppressed`];
1650
+ }
1651
+ return deduped;
1652
+ }
1627
1653
  var STDIO_INCOMPATIBLE_IDS = /* @__PURE__ */ new Set([
1628
1654
  // Lifecycle tests that use raw undici for HTTP-specific checks
1629
1655
  "lifecycle-string-id",
@@ -4376,12 +4402,11 @@ async function runComplianceSuite(target, options = {}) {
4376
4402
  return { passed: true, details: "Unknown method returned JSON-RPC error; subsequent ping succeeded" };
4377
4403
  }
4378
4404
  );
4379
- const MAX_WARNINGS = 100;
4380
- if (warnings.length > MAX_WARNINGS) {
4381
- const truncated = warnings.length - MAX_WARNINGS;
4382
- warnings.splice(MAX_WARNINGS, truncated, `... and ${truncated} more warning(s) suppressed`);
4383
- }
4384
4405
  if (inFlight.size > 0) await drainPool();
4406
+ const MAX_WARNINGS = 50;
4407
+ const capped = dedupAndCapWarnings(warnings, MAX_WARNINGS);
4408
+ warnings.length = 0;
4409
+ warnings.push(...capped);
4385
4410
  const { score, grade, overall, summary, categories } = computeScore(tests);
4386
4411
  const badge = generateBadge(displayUrl);
4387
4412
  return {
@@ -4416,7 +4441,7 @@ async function runComplianceSuite(target, options = {}) {
4416
4441
  function registerTools(server) {
4417
4442
  server.tool(
4418
4443
  "mcp_compliance_test",
4419
- "Run the full MCP compliance test suite against a server URL. Returns grade (A-F), score, and detailed results for all 81 tests covering transport, lifecycle, tools, resources, prompts, errors, schema validation, and security.",
4444
+ "Run the full MCP compliance test suite against a server URL. Returns grade (A-F), score, and detailed results for all 88 tests covering transport, lifecycle, tools, resources, prompts, errors, schema validation, and security.",
4420
4445
  {
4421
4446
  url: z.string().url().describe("The MCP server URL to test (must be HTTP or HTTPS)"),
4422
4447
  auth: z.string().optional().describe('Authorization header value (e.g., "Bearer tok123")'),
@@ -4611,8 +4636,16 @@ async function startServer() {
4611
4636
  const transport = new StdioServerTransport();
4612
4637
  await server.connect(transport);
4613
4638
  }
4614
- var isDirectRun = process.argv[1]?.endsWith("mcp/server.js") || process.argv[1]?.endsWith("mcp\\server.js");
4615
- if (isDirectRun) {
4639
+ function isInvokedDirectly() {
4640
+ const argv1 = process.argv[1];
4641
+ if (!argv1) return false;
4642
+ try {
4643
+ return realpathSync(argv1) === realpathSync(fileURLToPath(import.meta.url));
4644
+ } catch {
4645
+ return false;
4646
+ }
4647
+ }
4648
+ if (isInvokedDirectly()) {
4616
4649
  startServer().catch((err) => {
4617
4650
  console.error("MCP server error:", err);
4618
4651
  process.exit(1);
@@ -5380,7 +5413,9 @@ Testing ${describeTarget(transportTarget)}...
5380
5413
  skip,
5381
5414
  onProgress: verbose ? (testId, passed, details) => {
5382
5415
  const icon = passed ? chalk2.green("PASS") : chalk2.red("FAIL");
5383
- console.log(` ${icon} ${testId} \u2014 ${details}`);
5416
+ const stream = opts.format === "terminal" ? process.stdout : process.stderr;
5417
+ stream.write(` ${icon} ${testId} \u2014 ${details}
5418
+ `);
5384
5419
  } : void 0
5385
5420
  });
5386
5421
  if (verbose && opts.format === "terminal") {
@@ -5414,6 +5449,16 @@ Badge SVG written to ${opts.output}`));
5414
5449
  console.error(chalk2.red("\nError: --watch only applies to stdio targets (HTTP servers are remote).\n"));
5415
5450
  process.exit(1);
5416
5451
  }
5452
+ if (opts.format !== "terminal" && opts.format !== "markdown" && opts.format !== "html") {
5453
+ console.error(
5454
+ chalk2.red(
5455
+ `
5456
+ Error: --watch is incompatible with --format=${opts.format} (multi-run output would be unparseable). Use --format=terminal.
5457
+ `
5458
+ )
5459
+ );
5460
+ process.exit(1);
5461
+ }
5417
5462
  await runOnce();
5418
5463
  let pending = null;
5419
5464
  let running = false;
@@ -5427,8 +5472,9 @@ Badge SVG written to ${opts.output}`));
5427
5472
  if (running) return;
5428
5473
  running = true;
5429
5474
  try {
5430
- console.log(chalk2.dim(`
5475
+ process.stderr.write(chalk2.dim(`
5431
5476
  [watch] ${f} changed \u2014 re-running...
5477
+
5432
5478
  `));
5433
5479
  await runOnce();
5434
5480
  } catch (err) {
@@ -5440,7 +5486,7 @@ Badge SVG written to ${opts.output}`));
5440
5486
  });
5441
5487
  process.on("SIGINT", () => {
5442
5488
  watcher.close();
5443
- console.log(chalk2.dim("\n[watch] stopped"));
5489
+ process.stderr.write(chalk2.dim("\n[watch] stopped\n"));
5444
5490
  process.exit(0);
5445
5491
  });
5446
5492
  await new Promise(() => {
@@ -2,10 +2,10 @@ import {
2
2
  SPEC_BASE,
3
3
  TEST_DEFINITIONS,
4
4
  runComplianceSuite
5
- } from "../chunk-G5K7CRWU.js";
5
+ } from "../chunk-BX22BHC5.js";
6
6
 
7
7
  // src/mcp/server.ts
8
- import { existsSync, readFileSync } from "fs";
8
+ import { existsSync, readFileSync, realpathSync } from "fs";
9
9
  import { dirname, join, resolve } from "path";
10
10
  import { fileURLToPath } from "url";
11
11
  import { McpServer } from "@modelcontextprotocol/sdk/server/mcp.js";
@@ -16,7 +16,7 @@ import { z } from "zod";
16
16
  function registerTools(server) {
17
17
  server.tool(
18
18
  "mcp_compliance_test",
19
- "Run the full MCP compliance test suite against a server URL. Returns grade (A-F), score, and detailed results for all 81 tests covering transport, lifecycle, tools, resources, prompts, errors, schema validation, and security.",
19
+ "Run the full MCP compliance test suite against a server URL. Returns grade (A-F), score, and detailed results for all 88 tests covering transport, lifecycle, tools, resources, prompts, errors, schema validation, and security.",
20
20
  {
21
21
  url: z.string().url().describe("The MCP server URL to test (must be HTTP or HTTPS)"),
22
22
  auth: z.string().optional().describe('Authorization header value (e.g., "Bearer tok123")'),
@@ -211,8 +211,16 @@ async function startServer() {
211
211
  const transport = new StdioServerTransport();
212
212
  await server.connect(transport);
213
213
  }
214
- var isDirectRun = process.argv[1]?.endsWith("mcp/server.js") || process.argv[1]?.endsWith("mcp\\server.js");
215
- if (isDirectRun) {
214
+ function isInvokedDirectly() {
215
+ const argv1 = process.argv[1];
216
+ if (!argv1) return false;
217
+ try {
218
+ return realpathSync(argv1) === realpathSync(fileURLToPath(import.meta.url));
219
+ } catch {
220
+ return false;
221
+ }
222
+ }
223
+ if (isInvokedDirectly()) {
216
224
  startServer().catch((err) => {
217
225
  console.error("MCP server error:", err);
218
226
  process.exit(1);
package/dist/runner.d.ts CHANGED
@@ -89,7 +89,7 @@ type TransportTarget = {
89
89
  cwd?: string;
90
90
  verbose?: boolean;
91
91
  };
92
- /** All 81 test IDs with descriptions for the explain command */
92
+ /** All 88 test IDs with descriptions for the explain command */
93
93
  declare const TEST_DEFINITIONS: TestDefinition[];
94
94
 
95
95
  declare function computeGrade(score: number): Grade;
@@ -142,6 +142,14 @@ declare function parseSSEResponse(text: string): any;
142
142
 
143
143
  declare const SPEC_VERSION = "2025-11-25";
144
144
  declare const SPEC_BASE = "https://modelcontextprotocol.io/specification/2025-11-25";
145
+ /**
146
+ * Dedupe and cap a list of warnings, preserving insertion order and
147
+ * appending a truncation sentinel when capped. Extracted so the cap
148
+ * semantics can be unit-tested without spinning up a suite run.
149
+ *
150
+ * @internal Exported for testing.
151
+ */
152
+ declare function dedupAndCapWarnings(warnings: readonly string[], max: number): string[];
145
153
 
146
154
  interface PreviewOptions {
147
155
  /** Transport to filter against. Defaults to "http". */
@@ -206,4 +214,4 @@ interface RunOptions {
206
214
  */
207
215
  declare function runComplianceSuite(target: string | TransportTarget, options?: RunOptions): Promise<ComplianceReport>;
208
216
 
209
- export { type ComplianceReport, type PreviewOptions, type RunOptions, SPEC_BASE, SPEC_VERSION, TEST_DEFINITIONS, type TestResult, computeGrade, computeScore, generateBadge, parseSSEResponse, previewTests, runComplianceSuite, urlHash };
217
+ export { type ComplianceReport, type PreviewOptions, type RunOptions, SPEC_BASE, SPEC_VERSION, TEST_DEFINITIONS, type TestResult, computeGrade, computeScore, dedupAndCapWarnings, generateBadge, parseSSEResponse, previewTests, runComplianceSuite, urlHash };
package/dist/runner.js CHANGED
@@ -4,18 +4,20 @@ import {
4
4
  TEST_DEFINITIONS,
5
5
  computeGrade,
6
6
  computeScore,
7
+ dedupAndCapWarnings,
7
8
  generateBadge,
8
9
  parseSSEResponse,
9
10
  previewTests,
10
11
  runComplianceSuite,
11
12
  urlHash
12
- } from "./chunk-G5K7CRWU.js";
13
+ } from "./chunk-BX22BHC5.js";
13
14
  export {
14
15
  SPEC_BASE,
15
16
  SPEC_VERSION,
16
17
  TEST_DEFINITIONS,
17
18
  computeGrade,
18
19
  computeScore,
20
+ dedupAndCapWarnings,
19
21
  generateBadge,
20
22
  parseSSEResponse,
21
23
  previewTests,
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@yawlabs/mcp-compliance",
3
- "version": "0.12.2",
3
+ "version": "0.13.1",
4
4
  "description": "CLI tool and MCP server that tests MCP servers for spec compliance",
5
5
  "license": "MIT",
6
6
  "author": "Yaw Labs <contact@yaw.sh> (https://yaw.sh)",
@@ -42,7 +42,7 @@
42
42
  "dependencies": {
43
43
  "@modelcontextprotocol/sdk": "^1.29.0",
44
44
  "chalk": "^5.4.1",
45
- "commander": "^13.1.0",
45
+ "commander": "^14.0.3",
46
46
  "undici": "^7.8.0",
47
47
  "zod": "^3.24.4"
48
48
  },
@@ -57,7 +57,7 @@
57
57
  "vitest": "^3.1.1"
58
58
  },
59
59
  "engines": {
60
- "node": ">=18"
60
+ "node": ">=20"
61
61
  },
62
62
  "keywords": [
63
63
  "mcp",