@yawlabs/mcp-compliance 0.9.1 → 0.10.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +68 -2
- package/dist/{chunk-CH2E27X5.js → chunk-7KISK3FS.js} +99 -9
- package/dist/index.js +700 -187
- package/dist/mcp/server.js +1 -1
- package/dist/runner.d.ts +10 -1
- package/dist/runner.js +1 -1
- package/package.json +4 -1
- package/schemas/report.v1.json +165 -0
package/README.md
CHANGED
|
@@ -5,7 +5,7 @@
|
|
|
5
5
|
[](https://github.com/YawLabs/mcp-compliance/stargazers)
|
|
6
6
|
[](https://github.com/YawLabs/mcp-compliance/actions/workflows/ci.yml)
|
|
7
7
|
|
|
8
|
-
**Test any MCP server for spec compliance.**
|
|
8
|
+
**Test any MCP server for spec compliance.** 85-test suite covering transport, lifecycle, tools, resources, prompts, error handling, schema validation, and security against the [MCP specification](https://modelcontextprotocol.io/specification/2025-11-25). Works against **HTTP endpoints** (`https://my-server.com/mcp`) and **stdio servers** (`npx @modelcontextprotocol/server-filesystem /tmp`) alike. CLI, MCP server, and programmatic API.
|
|
9
9
|
|
|
10
10
|
Built and maintained by [Yaw Labs](https://yaw.sh).
|
|
11
11
|
|
|
@@ -128,6 +128,19 @@ On Windows, `npx` and other `.cmd` shims are handled automatically by spawning t
|
|
|
128
128
|
|
|
129
129
|
### CI integration
|
|
130
130
|
|
|
131
|
+
**GitHub Action** (drop into any `.github/workflows/*.yml`):
|
|
132
|
+
|
|
133
|
+
```yaml
|
|
134
|
+
- uses: YawLabs/mcp-compliance@v0
|
|
135
|
+
with:
|
|
136
|
+
target: 'node ./dist/server.js' # or a URL like https://my-server.com/mcp
|
|
137
|
+
format: github # ::error / ::warning annotations on the PR
|
|
138
|
+
strict: 'true' # exit non-zero if any required test fails
|
|
139
|
+
min-grade: 'A' # also exit if grade slips
|
|
140
|
+
```
|
|
141
|
+
|
|
142
|
+
**Manual CLI invocation:**
|
|
143
|
+
|
|
131
144
|
```bash
|
|
132
145
|
# GitHub Actions: emits ::error / ::warning annotations inline on the PR
|
|
133
146
|
mcp-compliance test https://my-server.com/mcp --format github --strict
|
|
@@ -135,11 +148,30 @@ mcp-compliance test https://my-server.com/mcp --format github --strict
|
|
|
135
148
|
# Slack/Linear/PR comment: drop the body straight into a comment
|
|
136
149
|
mcp-compliance test https://my-server.com/mcp --format markdown > report.md
|
|
137
150
|
|
|
151
|
+
# HTML report (self-contained, share anywhere — issue comments, S3, GitHub Pages)
|
|
152
|
+
mcp-compliance test https://my-server.com/mcp --format html > report.html
|
|
153
|
+
|
|
138
154
|
# Block release if grade slips below B
|
|
139
155
|
mcp-compliance test https://my-server.com/mcp --min-grade B
|
|
140
156
|
|
|
141
157
|
# Preview which tests will run before connecting (handy for --only/--skip authoring)
|
|
142
158
|
mcp-compliance test --list --transport stdio --skip security
|
|
159
|
+
|
|
160
|
+
# Diff two runs — exit 1 if anything that was passing is now failing
|
|
161
|
+
mcp-compliance test https://my-server.com/mcp --format json > current.json
|
|
162
|
+
mcp-compliance diff baseline.json current.json
|
|
163
|
+
|
|
164
|
+
# Watch mode for stdio dev loop — re-runs on file changes in cwd
|
|
165
|
+
mcp-compliance test --watch -- node ./dist/server.js
|
|
166
|
+
|
|
167
|
+
# Latency benchmark
|
|
168
|
+
mcp-compliance benchmark -- node ./dist/server.js -r 200 -c 4
|
|
169
|
+
```
|
|
170
|
+
|
|
171
|
+
**Docker:**
|
|
172
|
+
|
|
173
|
+
```bash
|
|
174
|
+
docker run --rm ghcr.io/yawlabs/mcp-compliance test https://my-server.com/mcp
|
|
143
175
|
```
|
|
144
176
|
|
|
145
177
|
### Scaffold a config
|
|
@@ -447,7 +479,7 @@ Restart your MCP client and approve the server when prompted.
|
|
|
447
479
|
|
|
448
480
|
### Tools
|
|
449
481
|
|
|
450
|
-
- **mcp_compliance_test** — Run the full
|
|
482
|
+
- **mcp_compliance_test** — Run the full 85-test suite against a URL or stdio command. Supports auth, custom headers, env vars, timeout, retries, and category/test filtering. Returns grade, score, and detailed results.
|
|
451
483
|
- **mcp_compliance_badge** — Get the badge markdown/HTML for a server. Supports auth and custom headers.
|
|
452
484
|
- **mcp_compliance_explain** — Explain what a specific test ID checks and why it matters.
|
|
453
485
|
|
|
@@ -468,8 +500,42 @@ const report2 = await runComplianceSuite('https://my-server.com/mcp', {
|
|
|
468
500
|
retries: 1,
|
|
469
501
|
only: ['transport', 'lifecycle'],
|
|
470
502
|
});
|
|
503
|
+
|
|
504
|
+
// Live progress for streaming UIs (e.g. server-sent-events to a browser)
|
|
505
|
+
await runComplianceSuite('https://my-server.com/mcp', {
|
|
506
|
+
onTestComplete: (result) => {
|
|
507
|
+
// result has the full TestResult: id, name, category, required,
|
|
508
|
+
// passed, details, durationMs, specRef. Push it to your client.
|
|
509
|
+
sendToClient(result);
|
|
510
|
+
},
|
|
511
|
+
});
|
|
512
|
+
```
|
|
513
|
+
|
|
514
|
+
## Report schema
|
|
515
|
+
|
|
516
|
+
The JSON output of the test suite is a stable, versioned contract. Every report includes a `schemaVersion` field at the top level. The full JSON Schema lives at [`schemas/report.v1.json`](./schemas/report.v1.json) and is shipped with the npm package.
|
|
517
|
+
|
|
518
|
+
```jsonc
|
|
519
|
+
{
|
|
520
|
+
"schemaVersion": "1", // bumped on breaking changes to the report shape
|
|
521
|
+
"specVersion": "2025-11-25", // MCP spec version tested against
|
|
522
|
+
"toolVersion": "0.10.0", // mcp-compliance version that produced the report
|
|
523
|
+
"url": "...",
|
|
524
|
+
"timestamp": "...",
|
|
525
|
+
"grade": "A",
|
|
526
|
+
"score": 92.5,
|
|
527
|
+
"tests": [ ... ],
|
|
528
|
+
// ...
|
|
529
|
+
}
|
|
471
530
|
```
|
|
472
531
|
|
|
532
|
+
Consumer guidance:
|
|
533
|
+
|
|
534
|
+
- Pin against `schemaVersion`. Reject reports with an unknown version rather than guessing at the shape.
|
|
535
|
+
- The schema validates with any Draft 2020-12 validator (e.g. `ajv`).
|
|
536
|
+
- Within a major version, additions are non-breaking. Renames, removals, or type changes bump the version.
|
|
537
|
+
- Two runs against the same server produce equivalent grade, score, and per-test pass/fail (modulo timings/timestamps).
|
|
538
|
+
|
|
473
539
|
## Specification
|
|
474
540
|
|
|
475
541
|
The compliance testing methodology is published as an open specification:
|
|
@@ -5,7 +5,7 @@ import { request as request2 } from "undici";
|
|
|
5
5
|
// src/badge.ts
|
|
6
6
|
import { createHash } from "crypto";
|
|
7
7
|
function urlHash(url) {
|
|
8
|
-
return createHash("sha256").update(url).digest("hex").slice(0,
|
|
8
|
+
return createHash("sha256").update(url).digest("hex").slice(0, 24);
|
|
9
9
|
}
|
|
10
10
|
function generateBadge(url) {
|
|
11
11
|
const hash = urlHash(url);
|
|
@@ -209,9 +209,21 @@ function createStdioTransport(opts) {
|
|
|
209
209
|
let exited = false;
|
|
210
210
|
let exitCode = null;
|
|
211
211
|
let spawnError = null;
|
|
212
|
+
let spawned = false;
|
|
212
213
|
const pending = /* @__PURE__ */ new Map();
|
|
213
214
|
let stdoutBuffer = "";
|
|
214
215
|
let stderrBuffer = "";
|
|
216
|
+
const spawnReady = new Promise((resolve, reject) => {
|
|
217
|
+
child.once("spawn", () => {
|
|
218
|
+
spawned = true;
|
|
219
|
+
resolve();
|
|
220
|
+
});
|
|
221
|
+
child.once("error", (err) => {
|
|
222
|
+
if (!spawned) reject(err);
|
|
223
|
+
});
|
|
224
|
+
});
|
|
225
|
+
spawnReady.catch(() => {
|
|
226
|
+
});
|
|
215
227
|
child.on("error", (err) => {
|
|
216
228
|
spawnError = err;
|
|
217
229
|
rejectAllPending(err);
|
|
@@ -281,6 +293,15 @@ function createStdioTransport(opts) {
|
|
|
281
293
|
${snippet.replace(/\n/g, "\n ")}`;
|
|
282
294
|
}
|
|
283
295
|
async function writeLine(line) {
|
|
296
|
+
if (!spawned && !spawnError) {
|
|
297
|
+
try {
|
|
298
|
+
await spawnReady;
|
|
299
|
+
} catch (err) {
|
|
300
|
+
throw new Error(
|
|
301
|
+
annotateWithStderr(`stdio transport: spawn failed \u2014 ${err instanceof Error ? err.message : String(err)}`)
|
|
302
|
+
);
|
|
303
|
+
}
|
|
304
|
+
}
|
|
284
305
|
if (exited) {
|
|
285
306
|
throw new Error(annotateWithStderr(`stdio transport: child has exited (code ${exitCode})`));
|
|
286
307
|
}
|
|
@@ -375,6 +396,7 @@ function createStdioTransport(opts) {
|
|
|
375
396
|
}
|
|
376
397
|
|
|
377
398
|
// src/types.ts
|
|
399
|
+
var REPORT_SCHEMA_VERSION = "1";
|
|
378
400
|
var TEST_DEFINITIONS = [
|
|
379
401
|
// ── Transport (13 tests) ─────────────────────────────────────────
|
|
380
402
|
{
|
|
@@ -679,6 +701,15 @@ var TEST_DEFINITIONS = [
|
|
|
679
701
|
description: "Sends a tools/call request with _meta.progressToken and checks if the server sends progress notifications via SSE. Progress support is optional but recommended for long-running operations.",
|
|
680
702
|
recommendation: "When a request includes _meta.progressToken, send notifications/progress events via SSE to report progress. Include progressToken, progress (current), and optionally total fields."
|
|
681
703
|
},
|
|
704
|
+
{
|
|
705
|
+
id: "lifecycle-meta-tolerance",
|
|
706
|
+
name: "Tolerates _meta field on requests",
|
|
707
|
+
category: "lifecycle",
|
|
708
|
+
required: false,
|
|
709
|
+
specRef: "basic/utilities#_meta",
|
|
710
|
+
description: "Sends a ping with params._meta = { extra: 'value' } and verifies the server doesn't error. The 2025-11-25 spec allows arbitrary _meta on any request; servers should ignore unknown _meta fields gracefully.",
|
|
711
|
+
recommendation: "Treat the _meta field as opaque \u2014 pass it through your request validator, but do not reject requests for unknown _meta keys. The MCP spec reserves _meta for protocol/transport metadata and forward-compat extensibility."
|
|
712
|
+
},
|
|
682
713
|
// ── Tools (4 tests) ──────────────────────────────────────────────
|
|
683
714
|
{
|
|
684
715
|
id: "tools-list",
|
|
@@ -1228,14 +1259,20 @@ var STDIO_INCOMPATIBLE_IDS = /* @__PURE__ */ new Set([
|
|
|
1228
1259
|
"error-parse-code",
|
|
1229
1260
|
"error-invalid-request-code",
|
|
1230
1261
|
// Security tests that are inherently HTTP-layer (auth headers,
|
|
1231
|
-
// sessions, CORS, TLS, rate limits, RFC 9728 metadata).
|
|
1262
|
+
// sessions, CORS, TLS, rate limits, RFC 9728 metadata). For stdio
|
|
1263
|
+
// servers these don't apply — the parent process owns the trust
|
|
1264
|
+
// boundary, not the server.
|
|
1232
1265
|
"security-tls-required",
|
|
1233
1266
|
"security-oauth-metadata",
|
|
1234
1267
|
"security-token-in-uri",
|
|
1235
1268
|
"security-rate-limiting",
|
|
1236
1269
|
"security-cors-headers",
|
|
1237
1270
|
"security-origin-validation",
|
|
1238
|
-
"security-session-not-auth"
|
|
1271
|
+
"security-session-not-auth",
|
|
1272
|
+
"security-auth-required",
|
|
1273
|
+
"security-auth-malformed",
|
|
1274
|
+
"security-www-authenticate",
|
|
1275
|
+
"security-session-entropy"
|
|
1239
1276
|
]);
|
|
1240
1277
|
function supportsTransport(def, kind) {
|
|
1241
1278
|
if (!def) return true;
|
|
@@ -1299,8 +1336,11 @@ async function runComplianceSuite(target, options = {}) {
|
|
|
1299
1336
|
return !options.skip.includes(category) && !options.skip.includes(id);
|
|
1300
1337
|
}
|
|
1301
1338
|
return true;
|
|
1339
|
+
}, looksRejected2 = function(text, isErrorFlag) {
|
|
1340
|
+
if (isErrorFlag) return true;
|
|
1341
|
+
return REJECTION_PATTERNS.some((p) => p.test(text));
|
|
1302
1342
|
};
|
|
1303
|
-
var buildHeaders = buildHeaders2, shouldRun = shouldRun2;
|
|
1343
|
+
var buildHeaders = buildHeaders2, shouldRun = shouldRun2, looksRejected = looksRejected2;
|
|
1304
1344
|
const backendUrl = resolvedTarget.type === "http" ? resolvedTarget.url : "";
|
|
1305
1345
|
const userHeaders = resolvedTarget.type === "http" ? resolvedTarget.headers ?? options.headers ?? {} : {};
|
|
1306
1346
|
const displayUrl = resolvedTarget.type === "http" ? resolvedTarget.url : `stdio:${resolvedTarget.command}${resolvedTarget.args?.length ? ` ${resolvedTarget.args.join(" ")}` : ""}`;
|
|
@@ -1382,7 +1422,7 @@ async function runComplianceSuite(target, options = {}) {
|
|
|
1382
1422
|
if (attempt < retries) await new Promise((r) => setTimeout(r, 1e3 * (attempt + 1)));
|
|
1383
1423
|
}
|
|
1384
1424
|
}
|
|
1385
|
-
|
|
1425
|
+
const result = {
|
|
1386
1426
|
id,
|
|
1387
1427
|
name,
|
|
1388
1428
|
category,
|
|
@@ -1391,8 +1431,10 @@ async function runComplianceSuite(target, options = {}) {
|
|
|
1391
1431
|
details: lastResult.details,
|
|
1392
1432
|
durationMs: Date.now() - start,
|
|
1393
1433
|
specRef: `${SPEC_BASE}/${specRef}`
|
|
1394
|
-
}
|
|
1434
|
+
};
|
|
1435
|
+
tests.push(result);
|
|
1395
1436
|
options.onProgress?.(id, lastResult.passed, lastResult.details);
|
|
1437
|
+
options.onTestComplete?.(result);
|
|
1396
1438
|
}
|
|
1397
1439
|
await test(
|
|
1398
1440
|
"transport-post",
|
|
@@ -1984,6 +2026,28 @@ async function runComplianceSuite(target, options = {}) {
|
|
|
1984
2026
|
}
|
|
1985
2027
|
}
|
|
1986
2028
|
);
|
|
2029
|
+
await test(
|
|
2030
|
+
"lifecycle-meta-tolerance",
|
|
2031
|
+
"Tolerates _meta field on requests",
|
|
2032
|
+
"lifecycle",
|
|
2033
|
+
false,
|
|
2034
|
+
"basic/utilities#_meta",
|
|
2035
|
+
async () => {
|
|
2036
|
+
try {
|
|
2037
|
+
const res = await rpc("ping", { _meta: { "mcp-compliance/probe": "1" } });
|
|
2038
|
+
const body = res.body;
|
|
2039
|
+
if (body.error) {
|
|
2040
|
+
return {
|
|
2041
|
+
passed: false,
|
|
2042
|
+
details: `Server rejected _meta on ping (code ${body.error.code}). _meta should be ignored, not error.`
|
|
2043
|
+
};
|
|
2044
|
+
}
|
|
2045
|
+
return { passed: true, details: "Server accepted ping with arbitrary _meta field" };
|
|
2046
|
+
} catch (err) {
|
|
2047
|
+
return { passed: false, details: `Error: ${err instanceof Error ? err.message : String(err)}` };
|
|
2048
|
+
}
|
|
2049
|
+
}
|
|
2050
|
+
);
|
|
1987
2051
|
await test(
|
|
1988
2052
|
"transport-content-type-init",
|
|
1989
2053
|
"Initialize response has valid content type",
|
|
@@ -3284,25 +3348,50 @@ async function runComplianceSuite(target, options = {}) {
|
|
|
3284
3348
|
}
|
|
3285
3349
|
}
|
|
3286
3350
|
);
|
|
3351
|
+
const REJECTION_PATTERNS = [
|
|
3352
|
+
/access denied/i,
|
|
3353
|
+
/permission denied/i,
|
|
3354
|
+
/not allowed/i,
|
|
3355
|
+
/not permitted/i,
|
|
3356
|
+
/outside (allowed|permitted)/i,
|
|
3357
|
+
/forbidden/i,
|
|
3358
|
+
/unauthorized/i,
|
|
3359
|
+
/invalid (path|input|argument|parameter|request)/i,
|
|
3360
|
+
/(payload|request) (rejected|blocked|refused)/i,
|
|
3361
|
+
/enoent|eacces|eperm/i,
|
|
3362
|
+
/sandbox(ed)?/i,
|
|
3363
|
+
/(no such file|file not found)/i,
|
|
3364
|
+
/\binvalid\b.*\b(input|json|argument|parameter|character)/i
|
|
3365
|
+
];
|
|
3287
3366
|
async function runInjectionTest(toolName, paramName, payloads, detectPattern, label) {
|
|
3288
3367
|
const issues = [];
|
|
3368
|
+
let defended = 0;
|
|
3289
3369
|
for (const payload of payloads) {
|
|
3290
3370
|
try {
|
|
3291
3371
|
const res = await rpc("tools/call", { name: toolName, arguments: { [paramName]: payload } });
|
|
3292
|
-
const
|
|
3372
|
+
const result = res.body?.result;
|
|
3373
|
+
const content = result?.content;
|
|
3374
|
+
const isErrorFlag = result?.isError === true;
|
|
3293
3375
|
if (Array.isArray(content)) {
|
|
3294
3376
|
const text = content.map((c) => c.text || "").join(" ");
|
|
3295
3377
|
if (detectPattern.test(text)) {
|
|
3296
|
-
|
|
3378
|
+
if (looksRejected2(text, isErrorFlag)) {
|
|
3379
|
+
defended++;
|
|
3380
|
+
} else {
|
|
3381
|
+
issues.push(`Payload "${payload}" ${label} (output: ${text.substring(0, 100)})`);
|
|
3382
|
+
}
|
|
3383
|
+
} else {
|
|
3384
|
+
defended++;
|
|
3297
3385
|
}
|
|
3298
3386
|
}
|
|
3299
3387
|
} catch {
|
|
3388
|
+
defended++;
|
|
3300
3389
|
}
|
|
3301
3390
|
}
|
|
3302
3391
|
if (issues.length > 0) return { passed: false, details: issues.join("; ") };
|
|
3303
3392
|
return {
|
|
3304
3393
|
passed: true,
|
|
3305
|
-
details: `Tested ${payloads.length} payloads against ${toolName}.${paramName} \u2014 no ${label.split(" ")[0]} detected`
|
|
3394
|
+
details: defended === payloads.length ? `Tested ${payloads.length} payloads against ${toolName}.${paramName} \u2014 server defended (rejected or sanitized)` : `Tested ${payloads.length} payloads against ${toolName}.${paramName} \u2014 no ${label.split(" ")[0]} detected`
|
|
3306
3395
|
};
|
|
3307
3396
|
}
|
|
3308
3397
|
if (toolNames.length > 0) {
|
|
@@ -3841,6 +3930,7 @@ async function runComplianceSuite(target, options = {}) {
|
|
|
3841
3930
|
const { score, grade, overall, summary, categories } = computeScore(tests);
|
|
3842
3931
|
const badge = generateBadge(displayUrl);
|
|
3843
3932
|
return {
|
|
3933
|
+
schemaVersion: REPORT_SCHEMA_VERSION,
|
|
3844
3934
|
specVersion: SPEC_VERSION,
|
|
3845
3935
|
toolVersion: TOOL_VERSION,
|
|
3846
3936
|
url: displayUrl,
|