@yawlabs/mcp-compliance 0.12.2 → 0.13.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +22 -11
- package/dist/{chunk-G5K7CRWU.js → chunk-BX22BHC5.js} +35 -19
- package/dist/index.js +75 -29
- package/dist/mcp/server.js +13 -5
- package/dist/runner.d.ts +10 -2
- package/dist/runner.js +3 -1
- package/package.json +3 -3
package/README.md
CHANGED
|
@@ -15,13 +15,13 @@ MCP servers are multiplying fast — but most ship without compliance testing. B
|
|
|
15
15
|
|
|
16
16
|
This tool solves that:
|
|
17
17
|
|
|
18
|
-
- **
|
|
18
|
+
- **88 tests across 8 categories** — transport, lifecycle, tools, resources, prompts, error handling, schema validation, and security. No gaps. (HTTP runs all 85 transport-applicable tests; stdio runs ~75 — HTTP-specific tests like CORS, TLS, session headers, and rate limiting are gated out.)
|
|
19
19
|
- **Capability-driven** — tests adapt to what the server declares. If it says it supports tools, tool tests become required. No false failures for features the server doesn't claim.
|
|
20
20
|
- **Graded scoring** — A-F letter grade with a weighted score (required tests 70%, optional 30%). One number to communicate compliance.
|
|
21
21
|
- **CI-ready** — `--strict` mode exits with code 1 on required test failures. Drop it into any pipeline.
|
|
22
22
|
- **Spec-referenced** — every test links to the exact section of the MCP specification it validates. No ambiguity about what's being tested or why.
|
|
23
23
|
- **Three interfaces** — CLI for humans, MCP server for AI assistants, programmatic API for integration.
|
|
24
|
-
- **Published
|
|
24
|
+
- **Published methodology** — the [testing methodology](./COMPLIANCE_RUBRIC.md) and [rule catalog](./mcp-compliance-rules.json) are open (CC BY 4.0) so anyone can build compatible tooling or fork the rules.
|
|
25
25
|
|
|
26
26
|
## Quick start
|
|
27
27
|
|
|
@@ -124,6 +124,7 @@ On Windows, `npx` and other `.cmd` shims are handled automatically by spawning t
|
|
|
124
124
|
| `--retries <n>` | both | Number of retries for failed tests (default: `0`) |
|
|
125
125
|
| `--only <items>` | both | Only run tests matching these categories or test IDs (comma-separated) |
|
|
126
126
|
| `--skip <items>` | both | Skip tests matching these categories or test IDs (comma-separated) |
|
|
127
|
+
| `--concurrency <n>` | both | Max parallel-safe tests in flight (default: `1`; raising reduces wall time but can perturb timing-sensitive servers) |
|
|
127
128
|
| `--verbose` | both | Print each test result as it runs (also forwards stdio stderr) |
|
|
128
129
|
|
|
129
130
|
### CI integration
|
|
@@ -258,11 +259,12 @@ Then embed it in your README:
|
|
|
258
259
|
|
|
259
260
|
The `test` command never publishes — use it for CI, debugging, and local iteration. `badge` is the only command that publishes to mcp.hosting.
|
|
260
261
|
|
|
261
|
-
## What the
|
|
262
|
+
## What the 88 tests check
|
|
262
263
|
|
|
263
264
|
<details>
|
|
264
|
-
<summary><strong>Transport (
|
|
265
|
+
<summary><strong>Transport (16 tests)</strong></summary>
|
|
265
266
|
|
|
267
|
+
HTTP-only (13):
|
|
266
268
|
- **transport-post** — Server accepts HTTP POST requests (required)
|
|
267
269
|
- **transport-content-type** — Responds with application/json or text/event-stream (required)
|
|
268
270
|
- **transport-notification-202** — Notifications return exactly 202 Accepted
|
|
@@ -277,10 +279,15 @@ The `test` command never publishes — use it for CI, debugging, and local itera
|
|
|
277
279
|
- **transport-concurrent** — Handles concurrent requests
|
|
278
280
|
- **transport-sse-event-field** — SSE responses include required event: message field
|
|
279
281
|
|
|
282
|
+
stdio-only (3):
|
|
283
|
+
- **stdio-framing** — Newline-delimited JSON framing (required)
|
|
284
|
+
- **stdio-unicode** — UTF-8 unicode roundtrip preserves non-ASCII payloads
|
|
285
|
+
- **stdio-unknown-method-recovers** — Returns -32601 for unknown methods and keeps serving
|
|
286
|
+
|
|
280
287
|
</details>
|
|
281
288
|
|
|
282
289
|
<details>
|
|
283
|
-
<summary><strong>Lifecycle (
|
|
290
|
+
<summary><strong>Lifecycle (21 tests)</strong></summary>
|
|
284
291
|
|
|
285
292
|
- **lifecycle-init** — Initialize handshake succeeds (required)
|
|
286
293
|
- **lifecycle-proto-version** — Returns valid YYYY-MM-DD protocol version (required)
|
|
@@ -299,6 +306,10 @@ The `test` command never publishes — use it for CI, debugging, and local itera
|
|
|
299
306
|
- **lifecycle-progress** — Handles progress notifications gracefully
|
|
300
307
|
- **lifecycle-list-changed** — Accepts listChanged notifications for declared capabilities
|
|
301
308
|
- **lifecycle-progress-token** — Supports progress tokens in requests via SSE
|
|
309
|
+
- **lifecycle-sampling-capability** — Advisory check for server-side use of the client sampling capability
|
|
310
|
+
- **lifecycle-roots-capability** — Advisory check for server-side use of the client roots capability
|
|
311
|
+
- **lifecycle-elicitation-capability** — Advisory check for the 2025-11-25 client elicitation capability
|
|
312
|
+
- **lifecycle-meta-tolerance** — Server ignores unknown `_meta` fields on incoming requests
|
|
302
313
|
|
|
303
314
|
</details>
|
|
304
315
|
|
|
@@ -399,7 +410,7 @@ The `test` command never publishes — use it for CI, debugging, and local itera
|
|
|
399
410
|
| D | 40-59 |
|
|
400
411
|
| F | 0-39 |
|
|
401
412
|
|
|
402
|
-
Required tests are worth 70% of the score, optional tests 30%. See the [full scoring algorithm](./
|
|
413
|
+
Required tests are worth 70% of the score, optional tests 30%. See the [full scoring algorithm](./COMPLIANCE_RUBRIC.md#2-scoring-algorithm) in the methodology doc.
|
|
403
414
|
|
|
404
415
|
## CI integration
|
|
405
416
|
|
|
@@ -536,11 +547,11 @@ Consumer guidance:
|
|
|
536
547
|
- Within a major version, additions are non-breaking. Renames, removals, or type changes bump the version.
|
|
537
548
|
- Two runs against the same server produce equivalent grade, score, and per-test pass/fail (modulo timings/timestamps).
|
|
538
549
|
|
|
539
|
-
##
|
|
550
|
+
## Methodology & docs
|
|
540
551
|
|
|
541
|
-
The
|
|
552
|
+
The testing methodology is published openly so the grading is auditable:
|
|
542
553
|
|
|
543
|
-
- **[
|
|
554
|
+
- **[Testing methodology](./COMPLIANCE_RUBRIC.md)** — test execution model, scoring algorithm, all 88 test rules with pass/fail criteria (CC BY 4.0)
|
|
544
555
|
- **[Machine-readable rule catalog](./mcp-compliance-rules.json)** — JSON Schema-compliant catalog for programmatic consumption
|
|
545
556
|
- **[Why `mcp-compliance`](./docs/WHY.md)** — the problem, existing alternatives, what this tool does differently
|
|
546
557
|
- **[Fixing common failures](./docs/FIXES.md)** — recipes for the most frequent test failures with code snippets
|
|
@@ -551,7 +562,7 @@ The compliance testing methodology is published as an open specification:
|
|
|
551
562
|
- **[Spec PR drafts](./docs/spec-prs/)** — our proposed MCP spec clarifications for ambiguous cases we've hit
|
|
552
563
|
- **[mcp.hosting integration spec](./docs/mcp-hosting-integration.md)** — the contract between this engine and the mcp.hosting platform: URL surfaces, data flow, storage model, badge API, leaderboard, router integration
|
|
553
564
|
|
|
554
|
-
|
|
565
|
+
The methodology is not an authoritative conformance standard — it's one tool's choices, published so they can be inspected, adopted, or forked. The [official MCP specification](https://modelcontextprotocol.io/specification/2025-11-25) defines what servers must do; this document describes how `@yawlabs/mcp-compliance` verifies it.
|
|
555
566
|
|
|
556
567
|
## Requirements
|
|
557
568
|
|
|
@@ -583,7 +594,7 @@ npm test
|
|
|
583
594
|
|
|
584
595
|
- [mcp.hosting](https://mcp.hosting) — Hosted MCP server infrastructure
|
|
585
596
|
- [MCP Specification](https://modelcontextprotocol.io/specification/2025-11-25)
|
|
586
|
-
- [
|
|
597
|
+
- [Testing methodology](./COMPLIANCE_RUBRIC.md)
|
|
587
598
|
- [Yaw Labs](https://yaw.sh)
|
|
588
599
|
|
|
589
600
|
## License
|
|
@@ -63,7 +63,7 @@ import { request } from "undici";
|
|
|
63
63
|
|
|
64
64
|
// src/sse.ts
|
|
65
65
|
function parseSSEResponse(text) {
|
|
66
|
-
const lines = text.split(
|
|
66
|
+
const lines = text.split(/\r?\n/);
|
|
67
67
|
let firstJsonRpcResponse = null;
|
|
68
68
|
let currentData = [];
|
|
69
69
|
function flushEvent() {
|
|
@@ -106,7 +106,8 @@ function createHttpTransport(opts) {
|
|
|
106
106
|
function normalizeHeaders(raw) {
|
|
107
107
|
const out = {};
|
|
108
108
|
for (const [k, v] of Object.entries(raw)) {
|
|
109
|
-
if (
|
|
109
|
+
if (v === void 0) continue;
|
|
110
|
+
out[k] = Array.isArray(v) ? v.join(", ") : v;
|
|
110
111
|
}
|
|
111
112
|
return out;
|
|
112
113
|
}
|
|
@@ -250,6 +251,11 @@ function createStdioTransport(opts) {
|
|
|
250
251
|
handleLine(line);
|
|
251
252
|
}
|
|
252
253
|
if (stdoutBuffer.length > stdoutBufferSize) {
|
|
254
|
+
stderrBuffer += `[mcp-compliance] stdout buffer exceeded ${stdoutBufferSize} bytes without a newline; discarding buffered data
|
|
255
|
+
`;
|
|
256
|
+
if (stderrBuffer.length > stderrBufferSize) {
|
|
257
|
+
stderrBuffer = stderrBuffer.slice(stderrBuffer.length - stderrBufferSize);
|
|
258
|
+
}
|
|
253
259
|
stdoutBuffer = "";
|
|
254
260
|
}
|
|
255
261
|
});
|
|
@@ -402,7 +408,7 @@ function createStdioTransport(opts) {
|
|
|
402
408
|
// src/types.ts
|
|
403
409
|
var REPORT_SCHEMA_VERSION = "1";
|
|
404
410
|
var TEST_DEFINITIONS = [
|
|
405
|
-
// ── Transport (13
|
|
411
|
+
// ── Transport (16 tests: 13 HTTP + 3 stdio) ──────────────────────
|
|
406
412
|
{
|
|
407
413
|
id: "transport-post",
|
|
408
414
|
name: "HTTP POST accepted",
|
|
@@ -551,7 +557,7 @@ var TEST_DEFINITIONS = [
|
|
|
551
557
|
recommendation: "Return JSON-RPC error -32601 (Method not found) for unknown methods. Do not exit the process or disconnect \u2014 the client should be able to keep using the session after an error.",
|
|
552
558
|
transports: ["stdio"]
|
|
553
559
|
},
|
|
554
|
-
// ── Lifecycle (
|
|
560
|
+
// ── Lifecycle (21 tests) ─────────────────────────────────────────
|
|
555
561
|
{
|
|
556
562
|
id: "lifecycle-init",
|
|
557
563
|
name: "Initialize handshake",
|
|
@@ -1249,17 +1255,13 @@ var STACK_TRACE_PATTERNS = [
|
|
|
1249
1255
|
// PHP
|
|
1250
1256
|
/panicked\s+at\s+'/i,
|
|
1251
1257
|
// Rust
|
|
1252
|
-
/ENOENT|EACCES|EPERM/,
|
|
1253
|
-
// Node.js system errors
|
|
1254
1258
|
/node_modules\//,
|
|
1255
|
-
// Node.js module paths
|
|
1256
|
-
/\/usr\/local\/|\/home\//,
|
|
1257
|
-
// Unix paths
|
|
1258
|
-
/[A-Z]
|
|
1259
|
-
// Windows paths
|
|
1260
|
-
/
|
|
1261
|
-
// Sensitive terms
|
|
1262
|
-
/jdbc:|mysql:|postgres:|mongodb:/i
|
|
1259
|
+
// Node.js module paths (filesystem layout leak)
|
|
1260
|
+
/\/usr\/local\/|\/home\/|\/root\//,
|
|
1261
|
+
// Unix absolute paths
|
|
1262
|
+
/[A-Z]:\\[\w\s.-]+\\[\w\s.-]+/,
|
|
1263
|
+
// Windows absolute paths (drive + 2+ segments)
|
|
1264
|
+
/jdbc:|mysql:\/\/|postgres(?:ql)?:\/\/|mongodb(?:\+srv)?:\/\//i
|
|
1263
1265
|
// DB connection strings
|
|
1264
1266
|
];
|
|
1265
1267
|
var INTERNAL_IP_PATTERNS = [
|
|
@@ -1278,6 +1280,20 @@ function createIdCounter(start = 0) {
|
|
|
1278
1280
|
let id = start;
|
|
1279
1281
|
return () => ++id;
|
|
1280
1282
|
}
|
|
1283
|
+
function dedupAndCapWarnings(warnings, max) {
|
|
1284
|
+
const seen = /* @__PURE__ */ new Set();
|
|
1285
|
+
const deduped = [];
|
|
1286
|
+
for (const w of warnings) {
|
|
1287
|
+
if (seen.has(w)) continue;
|
|
1288
|
+
seen.add(w);
|
|
1289
|
+
deduped.push(w);
|
|
1290
|
+
}
|
|
1291
|
+
if (deduped.length > max) {
|
|
1292
|
+
const truncated = deduped.length - max;
|
|
1293
|
+
return [...deduped.slice(0, max), `... and ${truncated} more warning(s) suppressed`];
|
|
1294
|
+
}
|
|
1295
|
+
return deduped;
|
|
1296
|
+
}
|
|
1281
1297
|
var STDIO_INCOMPATIBLE_IDS = /* @__PURE__ */ new Set([
|
|
1282
1298
|
// Lifecycle tests that use raw undici for HTTP-specific checks
|
|
1283
1299
|
"lifecycle-string-id",
|
|
@@ -4030,12 +4046,11 @@ async function runComplianceSuite(target, options = {}) {
|
|
|
4030
4046
|
return { passed: true, details: "Unknown method returned JSON-RPC error; subsequent ping succeeded" };
|
|
4031
4047
|
}
|
|
4032
4048
|
);
|
|
4033
|
-
const MAX_WARNINGS = 100;
|
|
4034
|
-
if (warnings.length > MAX_WARNINGS) {
|
|
4035
|
-
const truncated = warnings.length - MAX_WARNINGS;
|
|
4036
|
-
warnings.splice(MAX_WARNINGS, truncated, `... and ${truncated} more warning(s) suppressed`);
|
|
4037
|
-
}
|
|
4038
4049
|
if (inFlight.size > 0) await drainPool();
|
|
4050
|
+
const MAX_WARNINGS = 50;
|
|
4051
|
+
const capped = dedupAndCapWarnings(warnings, MAX_WARNINGS);
|
|
4052
|
+
warnings.length = 0;
|
|
4053
|
+
warnings.push(...capped);
|
|
4039
4054
|
const { score, grade, overall, summary, categories } = computeScore(tests);
|
|
4040
4055
|
const badge = generateBadge(displayUrl);
|
|
4041
4056
|
return {
|
|
@@ -4075,6 +4090,7 @@ export {
|
|
|
4075
4090
|
TEST_DEFINITIONS,
|
|
4076
4091
|
SPEC_VERSION,
|
|
4077
4092
|
SPEC_BASE,
|
|
4093
|
+
dedupAndCapWarnings,
|
|
4078
4094
|
previewTests,
|
|
4079
4095
|
runComplianceSuite
|
|
4080
4096
|
};
|
package/dist/index.js
CHANGED
|
@@ -16,6 +16,9 @@ var GRADE_COLORS = {
|
|
|
16
16
|
F: "#e05d44"
|
|
17
17
|
};
|
|
18
18
|
var UNTESTED_COLOR = "#9f9f9f";
|
|
19
|
+
function escXml(s) {
|
|
20
|
+
return s.replace(/&/g, "&").replace(/</g, "<").replace(/>/g, ">").replace(/"/g, """).replace(/'/g, "'");
|
|
21
|
+
}
|
|
19
22
|
function renderBadgeSvg(input) {
|
|
20
23
|
let gradeLabel = "unknown";
|
|
21
24
|
let color = UNTESTED_COLOR;
|
|
@@ -27,14 +30,16 @@ function renderBadgeSvg(input) {
|
|
|
27
30
|
title = `MCP Compliant: Grade ${input.grade}${input.score != null ? ` (${input.score}%)` : ""} - tested ${date}`;
|
|
28
31
|
}
|
|
29
32
|
const leftText = "MCP Compliant";
|
|
30
|
-
const rightText = gradeLabel;
|
|
33
|
+
const rightText = escXml(gradeLabel);
|
|
34
|
+
const ariaLabel = `${leftText}: ${escXml(gradeLabel)}`;
|
|
35
|
+
const titleEsc = escXml(title);
|
|
31
36
|
const leftWidth = 95;
|
|
32
37
|
const rightWidth = 40;
|
|
33
38
|
const totalWidth = leftWidth + rightWidth;
|
|
34
39
|
const leftX = leftWidth / 2;
|
|
35
40
|
const rightX = leftWidth + rightWidth / 2;
|
|
36
|
-
return `<svg xmlns="http://www.w3.org/2000/svg" width="${totalWidth}" height="20" role="img" aria-label="${
|
|
37
|
-
<title>${
|
|
41
|
+
return `<svg xmlns="http://www.w3.org/2000/svg" width="${totalWidth}" height="20" role="img" aria-label="${ariaLabel}">
|
|
42
|
+
<title>${titleEsc}</title>
|
|
38
43
|
<linearGradient id="s" x2="0" y2="100%">
|
|
39
44
|
<stop offset="0" stop-color="#bbb" stop-opacity=".1"/>
|
|
40
45
|
<stop offset="1" stop-opacity=".1"/>
|
|
@@ -64,7 +69,7 @@ import { request } from "undici";
|
|
|
64
69
|
|
|
65
70
|
// src/sse.ts
|
|
66
71
|
function parseSSEResponse(text) {
|
|
67
|
-
const lines = text.split(
|
|
72
|
+
const lines = text.split(/\r?\n/);
|
|
68
73
|
let firstJsonRpcResponse = null;
|
|
69
74
|
let currentData = [];
|
|
70
75
|
function flushEvent() {
|
|
@@ -107,7 +112,8 @@ function createHttpTransport(opts) {
|
|
|
107
112
|
function normalizeHeaders(raw) {
|
|
108
113
|
const out = {};
|
|
109
114
|
for (const [k, v] of Object.entries(raw)) {
|
|
110
|
-
if (
|
|
115
|
+
if (v === void 0) continue;
|
|
116
|
+
out[k] = Array.isArray(v) ? v.join(", ") : v;
|
|
111
117
|
}
|
|
112
118
|
return out;
|
|
113
119
|
}
|
|
@@ -251,6 +257,11 @@ function createStdioTransport(opts) {
|
|
|
251
257
|
handleLine(line);
|
|
252
258
|
}
|
|
253
259
|
if (stdoutBuffer.length > stdoutBufferSize) {
|
|
260
|
+
stderrBuffer += `[mcp-compliance] stdout buffer exceeded ${stdoutBufferSize} bytes without a newline; discarding buffered data
|
|
261
|
+
`;
|
|
262
|
+
if (stderrBuffer.length > stderrBufferSize) {
|
|
263
|
+
stderrBuffer = stderrBuffer.slice(stderrBuffer.length - stderrBufferSize);
|
|
264
|
+
}
|
|
254
265
|
stdoutBuffer = "";
|
|
255
266
|
}
|
|
256
267
|
});
|
|
@@ -579,6 +590,11 @@ function validateTarget(t, source) {
|
|
|
579
590
|
|
|
580
591
|
// src/diff.ts
|
|
581
592
|
function diffReports(baseline, current) {
|
|
593
|
+
if (baseline.specVersion && current.specVersion && baseline.specVersion !== current.specVersion) {
|
|
594
|
+
throw new Error(
|
|
595
|
+
`Spec version mismatch: baseline is ${baseline.specVersion}, current is ${current.specVersion}. Re-run the baseline with this tool version (or downgrade the tool to match) before diffing.`
|
|
596
|
+
);
|
|
597
|
+
}
|
|
582
598
|
const baseById = new Map(baseline.tests.map((t) => [t.id, t]));
|
|
583
599
|
const curById = new Map(current.tests.map((t) => [t.id, t]));
|
|
584
600
|
const regressions = [];
|
|
@@ -676,7 +692,7 @@ function hasRegressions(summary) {
|
|
|
676
692
|
}
|
|
677
693
|
|
|
678
694
|
// src/mcp/server.ts
|
|
679
|
-
import { existsSync as existsSync2, readFileSync as readFileSync2 } from "fs";
|
|
695
|
+
import { existsSync as existsSync2, readFileSync as readFileSync2, realpathSync } from "fs";
|
|
680
696
|
import { dirname, join as join2, resolve as resolve2 } from "path";
|
|
681
697
|
import { fileURLToPath } from "url";
|
|
682
698
|
import { McpServer } from "@modelcontextprotocol/sdk/server/mcp.js";
|
|
@@ -748,7 +764,7 @@ function computeScore(tests) {
|
|
|
748
764
|
// src/types.ts
|
|
749
765
|
var REPORT_SCHEMA_VERSION = "1";
|
|
750
766
|
var TEST_DEFINITIONS = [
|
|
751
|
-
// ── Transport (13
|
|
767
|
+
// ── Transport (16 tests: 13 HTTP + 3 stdio) ──────────────────────
|
|
752
768
|
{
|
|
753
769
|
id: "transport-post",
|
|
754
770
|
name: "HTTP POST accepted",
|
|
@@ -897,7 +913,7 @@ var TEST_DEFINITIONS = [
|
|
|
897
913
|
recommendation: "Return JSON-RPC error -32601 (Method not found) for unknown methods. Do not exit the process or disconnect \u2014 the client should be able to keep using the session after an error.",
|
|
898
914
|
transports: ["stdio"]
|
|
899
915
|
},
|
|
900
|
-
// ── Lifecycle (
|
|
916
|
+
// ── Lifecycle (21 tests) ─────────────────────────────────────────
|
|
901
917
|
{
|
|
902
918
|
id: "lifecycle-init",
|
|
903
919
|
name: "Initialize handshake",
|
|
@@ -1595,17 +1611,13 @@ var STACK_TRACE_PATTERNS = [
|
|
|
1595
1611
|
// PHP
|
|
1596
1612
|
/panicked\s+at\s+'/i,
|
|
1597
1613
|
// Rust
|
|
1598
|
-
/ENOENT|EACCES|EPERM/,
|
|
1599
|
-
// Node.js system errors
|
|
1600
1614
|
/node_modules\//,
|
|
1601
|
-
// Node.js module paths
|
|
1602
|
-
/\/usr\/local\/|\/home\//,
|
|
1603
|
-
// Unix paths
|
|
1604
|
-
/[A-Z]
|
|
1605
|
-
// Windows paths
|
|
1606
|
-
/
|
|
1607
|
-
// Sensitive terms
|
|
1608
|
-
/jdbc:|mysql:|postgres:|mongodb:/i
|
|
1615
|
+
// Node.js module paths (filesystem layout leak)
|
|
1616
|
+
/\/usr\/local\/|\/home\/|\/root\//,
|
|
1617
|
+
// Unix absolute paths
|
|
1618
|
+
/[A-Z]:\\[\w\s.-]+\\[\w\s.-]+/,
|
|
1619
|
+
// Windows absolute paths (drive + 2+ segments)
|
|
1620
|
+
/jdbc:|mysql:\/\/|postgres(?:ql)?:\/\/|mongodb(?:\+srv)?:\/\//i
|
|
1609
1621
|
// DB connection strings
|
|
1610
1622
|
];
|
|
1611
1623
|
var INTERNAL_IP_PATTERNS = [
|
|
@@ -1624,6 +1636,20 @@ function createIdCounter(start = 0) {
|
|
|
1624
1636
|
let id = start;
|
|
1625
1637
|
return () => ++id;
|
|
1626
1638
|
}
|
|
1639
|
+
function dedupAndCapWarnings(warnings, max) {
|
|
1640
|
+
const seen = /* @__PURE__ */ new Set();
|
|
1641
|
+
const deduped = [];
|
|
1642
|
+
for (const w of warnings) {
|
|
1643
|
+
if (seen.has(w)) continue;
|
|
1644
|
+
seen.add(w);
|
|
1645
|
+
deduped.push(w);
|
|
1646
|
+
}
|
|
1647
|
+
if (deduped.length > max) {
|
|
1648
|
+
const truncated = deduped.length - max;
|
|
1649
|
+
return [...deduped.slice(0, max), `... and ${truncated} more warning(s) suppressed`];
|
|
1650
|
+
}
|
|
1651
|
+
return deduped;
|
|
1652
|
+
}
|
|
1627
1653
|
var STDIO_INCOMPATIBLE_IDS = /* @__PURE__ */ new Set([
|
|
1628
1654
|
// Lifecycle tests that use raw undici for HTTP-specific checks
|
|
1629
1655
|
"lifecycle-string-id",
|
|
@@ -4376,12 +4402,11 @@ async function runComplianceSuite(target, options = {}) {
|
|
|
4376
4402
|
return { passed: true, details: "Unknown method returned JSON-RPC error; subsequent ping succeeded" };
|
|
4377
4403
|
}
|
|
4378
4404
|
);
|
|
4379
|
-
const MAX_WARNINGS = 100;
|
|
4380
|
-
if (warnings.length > MAX_WARNINGS) {
|
|
4381
|
-
const truncated = warnings.length - MAX_WARNINGS;
|
|
4382
|
-
warnings.splice(MAX_WARNINGS, truncated, `... and ${truncated} more warning(s) suppressed`);
|
|
4383
|
-
}
|
|
4384
4405
|
if (inFlight.size > 0) await drainPool();
|
|
4406
|
+
const MAX_WARNINGS = 50;
|
|
4407
|
+
const capped = dedupAndCapWarnings(warnings, MAX_WARNINGS);
|
|
4408
|
+
warnings.length = 0;
|
|
4409
|
+
warnings.push(...capped);
|
|
4385
4410
|
const { score, grade, overall, summary, categories } = computeScore(tests);
|
|
4386
4411
|
const badge = generateBadge(displayUrl);
|
|
4387
4412
|
return {
|
|
@@ -4416,7 +4441,7 @@ async function runComplianceSuite(target, options = {}) {
|
|
|
4416
4441
|
function registerTools(server) {
|
|
4417
4442
|
server.tool(
|
|
4418
4443
|
"mcp_compliance_test",
|
|
4419
|
-
"Run the full MCP compliance test suite against a server URL. Returns grade (A-F), score, and detailed results for all
|
|
4444
|
+
"Run the full MCP compliance test suite against a server URL. Returns grade (A-F), score, and detailed results for all 88 tests covering transport, lifecycle, tools, resources, prompts, errors, schema validation, and security.",
|
|
4420
4445
|
{
|
|
4421
4446
|
url: z.string().url().describe("The MCP server URL to test (must be HTTP or HTTPS)"),
|
|
4422
4447
|
auth: z.string().optional().describe('Authorization header value (e.g., "Bearer tok123")'),
|
|
@@ -4611,8 +4636,16 @@ async function startServer() {
|
|
|
4611
4636
|
const transport = new StdioServerTransport();
|
|
4612
4637
|
await server.connect(transport);
|
|
4613
4638
|
}
|
|
4614
|
-
|
|
4615
|
-
|
|
4639
|
+
function isInvokedDirectly() {
|
|
4640
|
+
const argv1 = process.argv[1];
|
|
4641
|
+
if (!argv1) return false;
|
|
4642
|
+
try {
|
|
4643
|
+
return realpathSync(argv1) === realpathSync(fileURLToPath(import.meta.url));
|
|
4644
|
+
} catch {
|
|
4645
|
+
return false;
|
|
4646
|
+
}
|
|
4647
|
+
}
|
|
4648
|
+
if (isInvokedDirectly()) {
|
|
4616
4649
|
startServer().catch((err) => {
|
|
4617
4650
|
console.error("MCP server error:", err);
|
|
4618
4651
|
process.exit(1);
|
|
@@ -5380,7 +5413,9 @@ Testing ${describeTarget(transportTarget)}...
|
|
|
5380
5413
|
skip,
|
|
5381
5414
|
onProgress: verbose ? (testId, passed, details) => {
|
|
5382
5415
|
const icon = passed ? chalk2.green("PASS") : chalk2.red("FAIL");
|
|
5383
|
-
|
|
5416
|
+
const stream = opts.format === "terminal" ? process.stdout : process.stderr;
|
|
5417
|
+
stream.write(` ${icon} ${testId} \u2014 ${details}
|
|
5418
|
+
`);
|
|
5384
5419
|
} : void 0
|
|
5385
5420
|
});
|
|
5386
5421
|
if (verbose && opts.format === "terminal") {
|
|
@@ -5414,6 +5449,16 @@ Badge SVG written to ${opts.output}`));
|
|
|
5414
5449
|
console.error(chalk2.red("\nError: --watch only applies to stdio targets (HTTP servers are remote).\n"));
|
|
5415
5450
|
process.exit(1);
|
|
5416
5451
|
}
|
|
5452
|
+
if (opts.format !== "terminal" && opts.format !== "markdown" && opts.format !== "html") {
|
|
5453
|
+
console.error(
|
|
5454
|
+
chalk2.red(
|
|
5455
|
+
`
|
|
5456
|
+
Error: --watch is incompatible with --format=${opts.format} (multi-run output would be unparseable). Use --format=terminal.
|
|
5457
|
+
`
|
|
5458
|
+
)
|
|
5459
|
+
);
|
|
5460
|
+
process.exit(1);
|
|
5461
|
+
}
|
|
5417
5462
|
await runOnce();
|
|
5418
5463
|
let pending = null;
|
|
5419
5464
|
let running = false;
|
|
@@ -5427,8 +5472,9 @@ Badge SVG written to ${opts.output}`));
|
|
|
5427
5472
|
if (running) return;
|
|
5428
5473
|
running = true;
|
|
5429
5474
|
try {
|
|
5430
|
-
|
|
5475
|
+
process.stderr.write(chalk2.dim(`
|
|
5431
5476
|
[watch] ${f} changed \u2014 re-running...
|
|
5477
|
+
|
|
5432
5478
|
`));
|
|
5433
5479
|
await runOnce();
|
|
5434
5480
|
} catch (err) {
|
|
@@ -5440,7 +5486,7 @@ Badge SVG written to ${opts.output}`));
|
|
|
5440
5486
|
});
|
|
5441
5487
|
process.on("SIGINT", () => {
|
|
5442
5488
|
watcher.close();
|
|
5443
|
-
|
|
5489
|
+
process.stderr.write(chalk2.dim("\n[watch] stopped\n"));
|
|
5444
5490
|
process.exit(0);
|
|
5445
5491
|
});
|
|
5446
5492
|
await new Promise(() => {
|
package/dist/mcp/server.js
CHANGED
|
@@ -2,10 +2,10 @@ import {
|
|
|
2
2
|
SPEC_BASE,
|
|
3
3
|
TEST_DEFINITIONS,
|
|
4
4
|
runComplianceSuite
|
|
5
|
-
} from "../chunk-
|
|
5
|
+
} from "../chunk-BX22BHC5.js";
|
|
6
6
|
|
|
7
7
|
// src/mcp/server.ts
|
|
8
|
-
import { existsSync, readFileSync } from "fs";
|
|
8
|
+
import { existsSync, readFileSync, realpathSync } from "fs";
|
|
9
9
|
import { dirname, join, resolve } from "path";
|
|
10
10
|
import { fileURLToPath } from "url";
|
|
11
11
|
import { McpServer } from "@modelcontextprotocol/sdk/server/mcp.js";
|
|
@@ -16,7 +16,7 @@ import { z } from "zod";
|
|
|
16
16
|
function registerTools(server) {
|
|
17
17
|
server.tool(
|
|
18
18
|
"mcp_compliance_test",
|
|
19
|
-
"Run the full MCP compliance test suite against a server URL. Returns grade (A-F), score, and detailed results for all
|
|
19
|
+
"Run the full MCP compliance test suite against a server URL. Returns grade (A-F), score, and detailed results for all 88 tests covering transport, lifecycle, tools, resources, prompts, errors, schema validation, and security.",
|
|
20
20
|
{
|
|
21
21
|
url: z.string().url().describe("The MCP server URL to test (must be HTTP or HTTPS)"),
|
|
22
22
|
auth: z.string().optional().describe('Authorization header value (e.g., "Bearer tok123")'),
|
|
@@ -211,8 +211,16 @@ async function startServer() {
|
|
|
211
211
|
const transport = new StdioServerTransport();
|
|
212
212
|
await server.connect(transport);
|
|
213
213
|
}
|
|
214
|
-
|
|
215
|
-
|
|
214
|
+
function isInvokedDirectly() {
|
|
215
|
+
const argv1 = process.argv[1];
|
|
216
|
+
if (!argv1) return false;
|
|
217
|
+
try {
|
|
218
|
+
return realpathSync(argv1) === realpathSync(fileURLToPath(import.meta.url));
|
|
219
|
+
} catch {
|
|
220
|
+
return false;
|
|
221
|
+
}
|
|
222
|
+
}
|
|
223
|
+
if (isInvokedDirectly()) {
|
|
216
224
|
startServer().catch((err) => {
|
|
217
225
|
console.error("MCP server error:", err);
|
|
218
226
|
process.exit(1);
|
package/dist/runner.d.ts
CHANGED
|
@@ -89,7 +89,7 @@ type TransportTarget = {
|
|
|
89
89
|
cwd?: string;
|
|
90
90
|
verbose?: boolean;
|
|
91
91
|
};
|
|
92
|
-
/** All
|
|
92
|
+
/** All 88 test IDs with descriptions for the explain command */
|
|
93
93
|
declare const TEST_DEFINITIONS: TestDefinition[];
|
|
94
94
|
|
|
95
95
|
declare function computeGrade(score: number): Grade;
|
|
@@ -142,6 +142,14 @@ declare function parseSSEResponse(text: string): any;
|
|
|
142
142
|
|
|
143
143
|
declare const SPEC_VERSION = "2025-11-25";
|
|
144
144
|
declare const SPEC_BASE = "https://modelcontextprotocol.io/specification/2025-11-25";
|
|
145
|
+
/**
|
|
146
|
+
* Dedupe and cap a list of warnings, preserving insertion order and
|
|
147
|
+
* appending a truncation sentinel when capped. Extracted so the cap
|
|
148
|
+
* semantics can be unit-tested without spinning up a suite run.
|
|
149
|
+
*
|
|
150
|
+
* @internal Exported for testing.
|
|
151
|
+
*/
|
|
152
|
+
declare function dedupAndCapWarnings(warnings: readonly string[], max: number): string[];
|
|
145
153
|
|
|
146
154
|
interface PreviewOptions {
|
|
147
155
|
/** Transport to filter against. Defaults to "http". */
|
|
@@ -206,4 +214,4 @@ interface RunOptions {
|
|
|
206
214
|
*/
|
|
207
215
|
declare function runComplianceSuite(target: string | TransportTarget, options?: RunOptions): Promise<ComplianceReport>;
|
|
208
216
|
|
|
209
|
-
export { type ComplianceReport, type PreviewOptions, type RunOptions, SPEC_BASE, SPEC_VERSION, TEST_DEFINITIONS, type TestResult, computeGrade, computeScore, generateBadge, parseSSEResponse, previewTests, runComplianceSuite, urlHash };
|
|
217
|
+
export { type ComplianceReport, type PreviewOptions, type RunOptions, SPEC_BASE, SPEC_VERSION, TEST_DEFINITIONS, type TestResult, computeGrade, computeScore, dedupAndCapWarnings, generateBadge, parseSSEResponse, previewTests, runComplianceSuite, urlHash };
|
package/dist/runner.js
CHANGED
|
@@ -4,18 +4,20 @@ import {
|
|
|
4
4
|
TEST_DEFINITIONS,
|
|
5
5
|
computeGrade,
|
|
6
6
|
computeScore,
|
|
7
|
+
dedupAndCapWarnings,
|
|
7
8
|
generateBadge,
|
|
8
9
|
parseSSEResponse,
|
|
9
10
|
previewTests,
|
|
10
11
|
runComplianceSuite,
|
|
11
12
|
urlHash
|
|
12
|
-
} from "./chunk-
|
|
13
|
+
} from "./chunk-BX22BHC5.js";
|
|
13
14
|
export {
|
|
14
15
|
SPEC_BASE,
|
|
15
16
|
SPEC_VERSION,
|
|
16
17
|
TEST_DEFINITIONS,
|
|
17
18
|
computeGrade,
|
|
18
19
|
computeScore,
|
|
20
|
+
dedupAndCapWarnings,
|
|
19
21
|
generateBadge,
|
|
20
22
|
parseSSEResponse,
|
|
21
23
|
previewTests,
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@yawlabs/mcp-compliance",
|
|
3
|
-
"version": "0.
|
|
3
|
+
"version": "0.13.1",
|
|
4
4
|
"description": "CLI tool and MCP server that tests MCP servers for spec compliance",
|
|
5
5
|
"license": "MIT",
|
|
6
6
|
"author": "Yaw Labs <contact@yaw.sh> (https://yaw.sh)",
|
|
@@ -42,7 +42,7 @@
|
|
|
42
42
|
"dependencies": {
|
|
43
43
|
"@modelcontextprotocol/sdk": "^1.29.0",
|
|
44
44
|
"chalk": "^5.4.1",
|
|
45
|
-
"commander": "^
|
|
45
|
+
"commander": "^14.0.3",
|
|
46
46
|
"undici": "^7.8.0",
|
|
47
47
|
"zod": "^3.24.4"
|
|
48
48
|
},
|
|
@@ -57,7 +57,7 @@
|
|
|
57
57
|
"vitest": "^3.1.1"
|
|
58
58
|
},
|
|
59
59
|
"engines": {
|
|
60
|
-
"node": ">=
|
|
60
|
+
"node": ">=20"
|
|
61
61
|
},
|
|
62
62
|
"keywords": [
|
|
63
63
|
"mcp",
|