@bryan-thompson/inspector-assessment-cli 1.12.0 ā 1.13.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/build/assess-full.js +56 -13
- package/build/validate-testbed.js +446 -0
- package/package.json +3 -2
package/build/assess-full.js
CHANGED
|
@@ -20,6 +20,8 @@ import { StreamableHTTPClientTransport } from "@modelcontextprotocol/sdk/client/
|
|
|
20
20
|
import { AssessmentOrchestrator, } from "../../client/lib/services/assessment/AssessmentOrchestrator.js";
|
|
21
21
|
import { DEFAULT_ASSESSMENT_CONFIG, } from "../../client/lib/lib/assessmentTypes.js";
|
|
22
22
|
import { FULL_CLAUDE_CODE_CONFIG } from "../../client/lib/services/assessment/lib/claudeCodeBridge.js";
|
|
23
|
+
import { createFormatter, } from "../../client/lib/lib/reportFormatters/index.js";
|
|
24
|
+
import { generatePolicyComplianceReport } from "../../client/lib/services/assessment/PolicyComplianceGenerator.js";
|
|
23
25
|
/**
|
|
24
26
|
* Load server configuration from Claude Code's MCP settings
|
|
25
27
|
*/
|
|
@@ -326,17 +328,41 @@ async function runFullAssessment(options) {
|
|
|
326
328
|
return results;
|
|
327
329
|
}
|
|
328
330
|
/**
|
|
329
|
-
* Save results to
|
|
331
|
+
* Save results to file with appropriate format
|
|
330
332
|
*/
|
|
331
|
-
function saveResults(serverName, results,
|
|
332
|
-
const
|
|
333
|
-
|
|
334
|
-
const
|
|
335
|
-
|
|
336
|
-
|
|
337
|
-
|
|
338
|
-
|
|
339
|
-
|
|
333
|
+
function saveResults(serverName, results, options) {
|
|
334
|
+
const format = options.format || "json";
|
|
335
|
+
// Generate policy compliance report if requested
|
|
336
|
+
const policyReport = options.includePolicy
|
|
337
|
+
? generatePolicyComplianceReport(results, serverName)
|
|
338
|
+
: undefined;
|
|
339
|
+
// Create formatter with options
|
|
340
|
+
const formatter = createFormatter({
|
|
341
|
+
format,
|
|
342
|
+
includePolicyMapping: options.includePolicy,
|
|
343
|
+
policyReport,
|
|
344
|
+
serverName,
|
|
345
|
+
includeDetails: true,
|
|
346
|
+
prettyPrint: true,
|
|
347
|
+
});
|
|
348
|
+
const fileExtension = formatter.getFileExtension();
|
|
349
|
+
const defaultPath = `/tmp/inspector-full-assessment-${serverName}${fileExtension}`;
|
|
350
|
+
const finalPath = options.outputPath || defaultPath;
|
|
351
|
+
// For JSON format, add metadata wrapper
|
|
352
|
+
if (format === "json") {
|
|
353
|
+
const output = {
|
|
354
|
+
timestamp: new Date().toISOString(),
|
|
355
|
+
assessmentType: "full",
|
|
356
|
+
...results,
|
|
357
|
+
...(policyReport ? { policyCompliance: policyReport } : {}),
|
|
358
|
+
};
|
|
359
|
+
fs.writeFileSync(finalPath, JSON.stringify(output, null, 2));
|
|
360
|
+
}
|
|
361
|
+
else {
|
|
362
|
+
// For other formats (markdown), use the formatter
|
|
363
|
+
const content = formatter.format(results);
|
|
364
|
+
fs.writeFileSync(finalPath, content);
|
|
365
|
+
}
|
|
340
366
|
return finalPath;
|
|
341
367
|
}
|
|
342
368
|
/**
|
|
@@ -467,6 +493,20 @@ function parseArgs() {
|
|
|
467
493
|
case "--json":
|
|
468
494
|
options.jsonOnly = true;
|
|
469
495
|
break;
|
|
496
|
+
case "--format":
|
|
497
|
+
case "-f":
|
|
498
|
+
const formatValue = args[++i];
|
|
499
|
+
if (formatValue !== "json" && formatValue !== "markdown") {
|
|
500
|
+
console.error(`Invalid format: ${formatValue}. Valid options: json, markdown`);
|
|
501
|
+
setTimeout(() => process.exit(1), 10);
|
|
502
|
+
options.helpRequested = true;
|
|
503
|
+
return options;
|
|
504
|
+
}
|
|
505
|
+
options.format = formatValue;
|
|
506
|
+
break;
|
|
507
|
+
case "--include-policy":
|
|
508
|
+
options.includePolicy = true;
|
|
509
|
+
break;
|
|
470
510
|
case "--help":
|
|
471
511
|
case "-h":
|
|
472
512
|
printHelp();
|
|
@@ -508,12 +548,14 @@ Run comprehensive MCP server assessment with all 11 assessor modules.
|
|
|
508
548
|
Options:
|
|
509
549
|
--server, -s <name> Server name (required, or pass as first positional arg)
|
|
510
550
|
--config, -c <path> Path to server config JSON
|
|
511
|
-
--output, -o <path> Output
|
|
551
|
+
--output, -o <path> Output path (default: /tmp/inspector-full-assessment-<server>.<ext>)
|
|
512
552
|
--source <path> Source code path for deep analysis (AUP, portability, etc.)
|
|
513
553
|
--pattern-config, -p <path> Path to custom annotation pattern JSON
|
|
554
|
+
--format, -f <type> Output format: json (default) or markdown
|
|
555
|
+
--include-policy Include policy compliance mapping in report (30 requirements)
|
|
514
556
|
--claude-enabled Enable Claude Code integration for intelligent analysis
|
|
515
557
|
--full Enable all assessment modules (default)
|
|
516
|
-
--json Output only JSON (no console summary)
|
|
558
|
+
--json Output only JSON path (no console summary)
|
|
517
559
|
--verbose, -v Enable verbose logging
|
|
518
560
|
--help, -h Show this help message
|
|
519
561
|
|
|
@@ -534,6 +576,7 @@ Examples:
|
|
|
534
576
|
mcp-assess-full my-server
|
|
535
577
|
mcp-assess-full --server broken-mcp --claude-enabled
|
|
536
578
|
mcp-assess-full --server my-server --source ./my-server --output ./results.json
|
|
579
|
+
mcp-assess-full --server my-server --format markdown --include-policy
|
|
537
580
|
`);
|
|
538
581
|
}
|
|
539
582
|
/**
|
|
@@ -549,7 +592,7 @@ async function main() {
|
|
|
549
592
|
if (!options.jsonOnly) {
|
|
550
593
|
displaySummary(results);
|
|
551
594
|
}
|
|
552
|
-
const outputPath = saveResults(options.serverName, results, options
|
|
595
|
+
const outputPath = saveResults(options.serverName, results, options);
|
|
553
596
|
if (options.jsonOnly) {
|
|
554
597
|
console.log(outputPath);
|
|
555
598
|
}
|
|
@@ -0,0 +1,446 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
/**
|
|
3
|
+
* Testbed Validation CLI
|
|
4
|
+
*
|
|
5
|
+
* Validates the MCP Inspector SecurityAssessor against the reference
|
|
6
|
+
* Vulnerable Testbed implementation to ensure detection accuracy.
|
|
7
|
+
*
|
|
8
|
+
* Runs assessments against both vulnerable and hardened servers,
|
|
9
|
+
* then calculates recall, precision, and false positive rates.
|
|
10
|
+
*
|
|
11
|
+
* Prerequisites:
|
|
12
|
+
* cd /home/bryan/mcp-servers/mcp-vulnerable-testbed && docker-compose up -d
|
|
13
|
+
*
|
|
14
|
+
* Usage:
|
|
15
|
+
* npm run validate:testbed
|
|
16
|
+
* mcp-validate-testbed
|
|
17
|
+
* mcp-validate-testbed --verbose
|
|
18
|
+
*/
|
|
19
|
+
import * as fs from "fs";
|
|
20
|
+
import { Client } from "@modelcontextprotocol/sdk/client/index.js";
|
|
21
|
+
import { StreamableHTTPClientTransport } from "@modelcontextprotocol/sdk/client/streamableHttp.js";
|
|
22
|
+
// Import from local client lib
|
|
23
|
+
import { SecurityAssessor } from "../../client/lib/services/assessment/modules/SecurityAssessor.js";
|
|
24
|
+
import { DEFAULT_ASSESSMENT_CONFIG, } from "../../client/lib/lib/assessmentTypes.js";
|
|
25
|
+
/**
|
|
26
|
+
* Testbed Configuration
|
|
27
|
+
*/
|
|
28
|
+
const TESTBED_CONFIG = {
|
|
29
|
+
vulnerable: {
|
|
30
|
+
name: "vulnerable-testbed",
|
|
31
|
+
url: "http://localhost:10900/mcp",
|
|
32
|
+
expectedVulnerabilities: 10,
|
|
33
|
+
},
|
|
34
|
+
hardened: {
|
|
35
|
+
name: "hardened-testbed",
|
|
36
|
+
url: "http://localhost:10901/mcp",
|
|
37
|
+
expectedVulnerabilities: 0,
|
|
38
|
+
},
|
|
39
|
+
};
|
|
40
|
+
/**
|
|
41
|
+
* Expected vulnerable tools (10 total)
|
|
42
|
+
*/
|
|
43
|
+
const VULNERABLE_TOOLS = [
|
|
44
|
+
"vulnerable_calculator_tool",
|
|
45
|
+
"vulnerable_system_exec_tool",
|
|
46
|
+
"vulnerable_data_leak_tool",
|
|
47
|
+
"vulnerable_config_modifier_tool",
|
|
48
|
+
"vulnerable_tool_override_tool",
|
|
49
|
+
"vulnerable_fetcher_tool",
|
|
50
|
+
"vulnerable_unicode_processor_tool",
|
|
51
|
+
"vulnerable_nested_parser_tool",
|
|
52
|
+
"vulnerable_package_installer_tool",
|
|
53
|
+
"vulnerable_rug_pull_tool",
|
|
54
|
+
];
|
|
55
|
+
/**
|
|
56
|
+
* Expected safe tools (should NOT be flagged)
|
|
57
|
+
*/
|
|
58
|
+
const SAFE_TOOLS = [
|
|
59
|
+
"safe_storage_tool_mcp",
|
|
60
|
+
"safe_search_tool_mcp",
|
|
61
|
+
"safe_list_tool_mcp",
|
|
62
|
+
"safe_info_tool_mcp",
|
|
63
|
+
"safe_echo_tool_mcp",
|
|
64
|
+
"safe_validate_tool_mcp",
|
|
65
|
+
];
|
|
66
|
+
/**
|
|
67
|
+
* Check if a server is available
|
|
68
|
+
*/
|
|
69
|
+
async function checkServerHealth(url) {
|
|
70
|
+
try {
|
|
71
|
+
const response = await fetch(url, {
|
|
72
|
+
method: "POST",
|
|
73
|
+
headers: {
|
|
74
|
+
"Content-Type": "application/json",
|
|
75
|
+
Accept: "application/json, text/event-stream",
|
|
76
|
+
},
|
|
77
|
+
body: JSON.stringify({
|
|
78
|
+
jsonrpc: "2.0",
|
|
79
|
+
method: "initialize",
|
|
80
|
+
params: {
|
|
81
|
+
protocolVersion: "2024-11-05",
|
|
82
|
+
capabilities: {},
|
|
83
|
+
clientInfo: { name: "testbed-validator", version: "1.0" },
|
|
84
|
+
},
|
|
85
|
+
id: 1,
|
|
86
|
+
}),
|
|
87
|
+
});
|
|
88
|
+
return response.ok;
|
|
89
|
+
}
|
|
90
|
+
catch {
|
|
91
|
+
return false;
|
|
92
|
+
}
|
|
93
|
+
}
|
|
94
|
+
/**
|
|
95
|
+
* Connect to MCP server
|
|
96
|
+
*/
|
|
97
|
+
async function connectToServer(url) {
|
|
98
|
+
const transport = new StreamableHTTPClientTransport(new URL(url));
|
|
99
|
+
const client = new Client({
|
|
100
|
+
name: "mcp-validate-testbed",
|
|
101
|
+
version: "1.0.0",
|
|
102
|
+
}, {
|
|
103
|
+
capabilities: {},
|
|
104
|
+
});
|
|
105
|
+
await client.connect(transport);
|
|
106
|
+
return client;
|
|
107
|
+
}
|
|
108
|
+
/**
|
|
109
|
+
* Get tools from server
|
|
110
|
+
*/
|
|
111
|
+
async function getTools(client) {
|
|
112
|
+
const response = await client.listTools();
|
|
113
|
+
return response.tools || [];
|
|
114
|
+
}
|
|
115
|
+
/**
|
|
116
|
+
* Create callTool wrapper
|
|
117
|
+
*/
|
|
118
|
+
function createCallToolWrapper(client) {
|
|
119
|
+
return async (name, params) => {
|
|
120
|
+
try {
|
|
121
|
+
const response = await client.callTool({
|
|
122
|
+
name,
|
|
123
|
+
arguments: params,
|
|
124
|
+
});
|
|
125
|
+
return {
|
|
126
|
+
content: response.content,
|
|
127
|
+
isError: response.isError || false,
|
|
128
|
+
structuredContent: response
|
|
129
|
+
.structuredContent,
|
|
130
|
+
};
|
|
131
|
+
}
|
|
132
|
+
catch (error) {
|
|
133
|
+
return {
|
|
134
|
+
content: [
|
|
135
|
+
{
|
|
136
|
+
type: "text",
|
|
137
|
+
text: `Error: ${error instanceof Error ? error.message : String(error)}`,
|
|
138
|
+
},
|
|
139
|
+
],
|
|
140
|
+
isError: true,
|
|
141
|
+
};
|
|
142
|
+
}
|
|
143
|
+
};
|
|
144
|
+
}
|
|
145
|
+
/**
|
|
146
|
+
* Run assessment on a server
|
|
147
|
+
*/
|
|
148
|
+
async function assessServer(serverName, url, verbose) {
|
|
149
|
+
if (verbose) {
|
|
150
|
+
console.log(`\n Connecting to ${url}...`);
|
|
151
|
+
}
|
|
152
|
+
const client = await connectToServer(url);
|
|
153
|
+
const tools = await getTools(client);
|
|
154
|
+
if (verbose) {
|
|
155
|
+
console.log(` Found ${tools.length} tools`);
|
|
156
|
+
}
|
|
157
|
+
const config = {
|
|
158
|
+
...DEFAULT_ASSESSMENT_CONFIG,
|
|
159
|
+
securityPatternsToTest: 17,
|
|
160
|
+
reviewerMode: false,
|
|
161
|
+
testTimeout: 30000,
|
|
162
|
+
};
|
|
163
|
+
const context = {
|
|
164
|
+
serverName,
|
|
165
|
+
tools,
|
|
166
|
+
callTool: createCallToolWrapper(client),
|
|
167
|
+
config,
|
|
168
|
+
};
|
|
169
|
+
if (verbose) {
|
|
170
|
+
console.log(` Running security assessment...`);
|
|
171
|
+
}
|
|
172
|
+
const assessor = new SecurityAssessor(config);
|
|
173
|
+
const assessment = await assessor.assess(context);
|
|
174
|
+
await client.close();
|
|
175
|
+
return {
|
|
176
|
+
serverName,
|
|
177
|
+
totalTools: tools.length,
|
|
178
|
+
testsRun: assessment.promptInjectionTests.length,
|
|
179
|
+
vulnerabilitiesFound: assessment.vulnerabilities.length,
|
|
180
|
+
assessment,
|
|
181
|
+
};
|
|
182
|
+
}
|
|
183
|
+
/**
|
|
184
|
+
* Calculate metrics for vulnerable server assessment
|
|
185
|
+
*/
|
|
186
|
+
function calculateVulnerableServerMetrics(result) {
|
|
187
|
+
// Get unique tools flagged as vulnerable
|
|
188
|
+
const flaggedTools = [
|
|
189
|
+
...new Set(result.assessment.promptInjectionTests
|
|
190
|
+
.filter((t) => t.vulnerable)
|
|
191
|
+
.map((t) => t.toolName)),
|
|
192
|
+
];
|
|
193
|
+
// True positives: Vulnerable tools correctly flagged
|
|
194
|
+
const detected = flaggedTools.filter((t) => VULNERABLE_TOOLS.includes(t));
|
|
195
|
+
// False negatives: Vulnerable tools not flagged
|
|
196
|
+
const missed = VULNERABLE_TOOLS.filter((t) => !flaggedTools.includes(t));
|
|
197
|
+
// False positives: Safe tools incorrectly flagged
|
|
198
|
+
const falsePositives = flaggedTools.filter((t) => SAFE_TOOLS.includes(t));
|
|
199
|
+
// True negatives: Safe tools correctly not flagged
|
|
200
|
+
const trueNegatives = SAFE_TOOLS.filter((t) => !flaggedTools.includes(t));
|
|
201
|
+
const tp = detected.length;
|
|
202
|
+
const fn = missed.length;
|
|
203
|
+
const fp = falsePositives.length;
|
|
204
|
+
const tn = trueNegatives.length;
|
|
205
|
+
return {
|
|
206
|
+
recall: tp / (tp + fn) || 0,
|
|
207
|
+
precision: tp / (tp + fp) || 0,
|
|
208
|
+
falsePositiveRate: fp / (fp + tn) || 0,
|
|
209
|
+
falseNegativeRate: fn / (fn + tp) || 0,
|
|
210
|
+
detected,
|
|
211
|
+
missed,
|
|
212
|
+
falsePositives,
|
|
213
|
+
};
|
|
214
|
+
}
|
|
215
|
+
/**
|
|
216
|
+
* Display validation results
|
|
217
|
+
*/
|
|
218
|
+
function displayResults(vulnResult, hardResult, vulnMetrics) {
|
|
219
|
+
console.log("\n" + "=".repeat(70));
|
|
220
|
+
console.log("MCP INSPECTOR TESTBED VALIDATION RESULTS");
|
|
221
|
+
console.log("=".repeat(70));
|
|
222
|
+
if (vulnResult && vulnMetrics) {
|
|
223
|
+
console.log("\nš VULNERABLE SERVER (localhost:10900)");
|
|
224
|
+
console.log("-".repeat(50));
|
|
225
|
+
console.log(`Tools Tested: ${vulnResult.totalTools}`);
|
|
226
|
+
console.log(`Tests Run: ${vulnResult.testsRun}`);
|
|
227
|
+
console.log(`Vulnerabilities Found: ${vulnResult.vulnerabilitiesFound}`);
|
|
228
|
+
console.log("");
|
|
229
|
+
console.log(`Recall: ${(vulnMetrics.recall * 100).toFixed(1)}% (${vulnMetrics.detected.length}/${VULNERABLE_TOOLS.length} vulnerabilities detected)`);
|
|
230
|
+
console.log(`Precision: ${(vulnMetrics.precision * 100).toFixed(1)}% (${vulnMetrics.falsePositives.length} false positives)`);
|
|
231
|
+
console.log(`False Positive Rate: ${(vulnMetrics.falsePositiveRate * 100).toFixed(1)}%`);
|
|
232
|
+
console.log(`False Negative Rate: ${(vulnMetrics.falseNegativeRate * 100).toFixed(1)}%`);
|
|
233
|
+
if (vulnMetrics.missed.length > 0) {
|
|
234
|
+
console.log(`\nā ļø Missed Vulnerabilities (${vulnMetrics.missed.length}):`);
|
|
235
|
+
for (const tool of vulnMetrics.missed) {
|
|
236
|
+
console.log(` - ${tool}`);
|
|
237
|
+
}
|
|
238
|
+
}
|
|
239
|
+
if (vulnMetrics.falsePositives.length > 0) {
|
|
240
|
+
console.log(`\nā False Positives (${vulnMetrics.falsePositives.length}):`);
|
|
241
|
+
for (const tool of vulnMetrics.falsePositives) {
|
|
242
|
+
console.log(` - ${tool}`);
|
|
243
|
+
}
|
|
244
|
+
}
|
|
245
|
+
if (vulnMetrics.detected.length > 0) {
|
|
246
|
+
console.log(`\nā
Correctly Detected (${vulnMetrics.detected.length}):`);
|
|
247
|
+
for (const tool of vulnMetrics.detected) {
|
|
248
|
+
console.log(` - ${tool}`);
|
|
249
|
+
}
|
|
250
|
+
}
|
|
251
|
+
}
|
|
252
|
+
if (hardResult) {
|
|
253
|
+
console.log("\nš HARDENED SERVER (localhost:10901)");
|
|
254
|
+
console.log("-".repeat(50));
|
|
255
|
+
console.log(`Tools Tested: ${hardResult.totalTools}`);
|
|
256
|
+
console.log(`Tests Run: ${hardResult.testsRun}`);
|
|
257
|
+
console.log(`Vulnerabilities Found: ${hardResult.vulnerabilitiesFound}`);
|
|
258
|
+
console.log(`Expected: 0`);
|
|
259
|
+
const hardenedFalsePositives = hardResult.assessment.promptInjectionTests
|
|
260
|
+
.filter((t) => t.vulnerable)
|
|
261
|
+
.map((t) => t.toolName);
|
|
262
|
+
if (hardResult.vulnerabilitiesFound > 0) {
|
|
263
|
+
console.log(`\nā False Positives on Hardened Server:`);
|
|
264
|
+
const uniqueFPs = [...new Set(hardenedFalsePositives)];
|
|
265
|
+
for (const tool of uniqueFPs) {
|
|
266
|
+
console.log(` - ${tool}`);
|
|
267
|
+
}
|
|
268
|
+
}
|
|
269
|
+
else {
|
|
270
|
+
console.log(`\nā
No false positives on hardened server`);
|
|
271
|
+
}
|
|
272
|
+
}
|
|
273
|
+
// Overall Status
|
|
274
|
+
console.log("\n" + "=".repeat(70));
|
|
275
|
+
console.log("OVERALL VALIDATION STATUS");
|
|
276
|
+
console.log("=".repeat(70));
|
|
277
|
+
const vulnPassed = vulnMetrics &&
|
|
278
|
+
vulnMetrics.recall >= 0.8 &&
|
|
279
|
+
vulnMetrics.falsePositives.length === 0;
|
|
280
|
+
const hardPassed = hardResult && hardResult.vulnerabilitiesFound === 0;
|
|
281
|
+
if (vulnPassed && hardPassed) {
|
|
282
|
+
console.log("\nā
PASS - Inspector meets accuracy targets\n");
|
|
283
|
+
console.log(" Target: 80%+ recall, 0 false positives");
|
|
284
|
+
console.log(` Actual: ${vulnMetrics ? (vulnMetrics.recall * 100).toFixed(1) : 0}% recall, ${vulnMetrics?.falsePositives.length || 0} false positives`);
|
|
285
|
+
}
|
|
286
|
+
else {
|
|
287
|
+
console.log("\nā FAIL - Inspector needs improvement\n");
|
|
288
|
+
if (vulnMetrics && vulnMetrics.recall < 0.8) {
|
|
289
|
+
console.log(` Recall too low: ${(vulnMetrics.recall * 100).toFixed(1)}% (target: 80%+)`);
|
|
290
|
+
}
|
|
291
|
+
if (vulnMetrics && vulnMetrics.falsePositives.length > 0) {
|
|
292
|
+
console.log(` Has false positives: ${vulnMetrics.falsePositives.length} (target: 0)`);
|
|
293
|
+
}
|
|
294
|
+
if (hardResult && hardResult.vulnerabilitiesFound > 0) {
|
|
295
|
+
console.log(` Hardened server flagged: ${hardResult.vulnerabilitiesFound} (target: 0)`);
|
|
296
|
+
}
|
|
297
|
+
}
|
|
298
|
+
console.log("=".repeat(70));
|
|
299
|
+
console.log("");
|
|
300
|
+
return vulnPassed && hardPassed;
|
|
301
|
+
}
|
|
302
|
+
/**
|
|
303
|
+
* Save results to JSON
|
|
304
|
+
*/
|
|
305
|
+
function saveResults(vulnResult, hardResult, vulnMetrics, outputPath) {
|
|
306
|
+
const finalPath = outputPath || "/tmp/testbed-validation-results.json";
|
|
307
|
+
const output = {
|
|
308
|
+
timestamp: new Date().toISOString(),
|
|
309
|
+
validationType: "testbed-validation",
|
|
310
|
+
vulnerable: vulnResult
|
|
311
|
+
? {
|
|
312
|
+
serverName: vulnResult.serverName,
|
|
313
|
+
tools: vulnResult.totalTools,
|
|
314
|
+
tests: vulnResult.testsRun,
|
|
315
|
+
vulnerabilities: vulnResult.vulnerabilitiesFound,
|
|
316
|
+
metrics: vulnMetrics,
|
|
317
|
+
}
|
|
318
|
+
: null,
|
|
319
|
+
hardened: hardResult
|
|
320
|
+
? {
|
|
321
|
+
serverName: hardResult.serverName,
|
|
322
|
+
tools: hardResult.totalTools,
|
|
323
|
+
tests: hardResult.testsRun,
|
|
324
|
+
vulnerabilities: hardResult.vulnerabilitiesFound,
|
|
325
|
+
}
|
|
326
|
+
: null,
|
|
327
|
+
};
|
|
328
|
+
fs.writeFileSync(finalPath, JSON.stringify(output, null, 2));
|
|
329
|
+
return finalPath;
|
|
330
|
+
}
|
|
331
|
+
/**
|
|
332
|
+
* Parse arguments
|
|
333
|
+
*/
|
|
334
|
+
function parseArgs() {
|
|
335
|
+
const args = process.argv.slice(2);
|
|
336
|
+
const options = {
|
|
337
|
+
verbose: false,
|
|
338
|
+
};
|
|
339
|
+
for (const arg of args) {
|
|
340
|
+
switch (arg) {
|
|
341
|
+
case "--verbose":
|
|
342
|
+
case "-v":
|
|
343
|
+
options.verbose = true;
|
|
344
|
+
break;
|
|
345
|
+
case "--help":
|
|
346
|
+
case "-h":
|
|
347
|
+
printHelp();
|
|
348
|
+
process.exit(0);
|
|
349
|
+
break;
|
|
350
|
+
default:
|
|
351
|
+
if (arg.startsWith("--output=")) {
|
|
352
|
+
options.outputPath = arg.split("=")[1];
|
|
353
|
+
}
|
|
354
|
+
}
|
|
355
|
+
}
|
|
356
|
+
return options;
|
|
357
|
+
}
|
|
358
|
+
/**
|
|
359
|
+
* Print help
|
|
360
|
+
*/
|
|
361
|
+
function printHelp() {
|
|
362
|
+
console.log(`
|
|
363
|
+
Usage: mcp-validate-testbed [options]
|
|
364
|
+
|
|
365
|
+
Validate MCP Inspector SecurityAssessor against the reference Vulnerable Testbed.
|
|
366
|
+
|
|
367
|
+
Options:
|
|
368
|
+
--verbose, -v Enable verbose logging
|
|
369
|
+
--output=<path> Output JSON path (default: /tmp/testbed-validation-results.json)
|
|
370
|
+
--help, -h Show this help message
|
|
371
|
+
|
|
372
|
+
Prerequisites:
|
|
373
|
+
Start the testbed containers before running:
|
|
374
|
+
cd /home/bryan/mcp-servers/mcp-vulnerable-testbed && docker-compose up -d
|
|
375
|
+
|
|
376
|
+
Validation Targets:
|
|
377
|
+
- Recall: 80%+ (at least 8/10 vulnerabilities detected)
|
|
378
|
+
- Precision: 100% (0 false positives on safe tools)
|
|
379
|
+
- Hardened: 0 vulnerabilities detected
|
|
380
|
+
|
|
381
|
+
Examples:
|
|
382
|
+
mcp-validate-testbed
|
|
383
|
+
mcp-validate-testbed --verbose
|
|
384
|
+
mcp-validate-testbed --output=./validation-results.json
|
|
385
|
+
`);
|
|
386
|
+
}
|
|
387
|
+
/**
|
|
388
|
+
* Main execution
|
|
389
|
+
*/
|
|
390
|
+
async function main() {
|
|
391
|
+
const options = parseArgs();
|
|
392
|
+
console.log("\nš MCP Inspector Testbed Validation");
|
|
393
|
+
console.log("=".repeat(50));
|
|
394
|
+
// Check server availability
|
|
395
|
+
console.log("\nChecking testbed servers...");
|
|
396
|
+
const vulnAvailable = await checkServerHealth(TESTBED_CONFIG.vulnerable.url);
|
|
397
|
+
const hardAvailable = await checkServerHealth(TESTBED_CONFIG.hardened.url);
|
|
398
|
+
if (!vulnAvailable && !hardAvailable) {
|
|
399
|
+
console.error("\nā Testbed containers not running!");
|
|
400
|
+
console.error("\nStart them with:");
|
|
401
|
+
console.error(" cd /home/bryan/mcp-servers/mcp-vulnerable-testbed");
|
|
402
|
+
console.error(" docker-compose up -d");
|
|
403
|
+
process.exit(1);
|
|
404
|
+
}
|
|
405
|
+
let vulnResult = null;
|
|
406
|
+
let hardResult = null;
|
|
407
|
+
let vulnMetrics = null;
|
|
408
|
+
// Test vulnerable server
|
|
409
|
+
if (vulnAvailable) {
|
|
410
|
+
console.log("\nš” Testing VULNERABLE server...");
|
|
411
|
+
try {
|
|
412
|
+
vulnResult = await assessServer(TESTBED_CONFIG.vulnerable.name, TESTBED_CONFIG.vulnerable.url, options.verbose);
|
|
413
|
+
vulnMetrics = calculateVulnerableServerMetrics(vulnResult);
|
|
414
|
+
}
|
|
415
|
+
catch (error) {
|
|
416
|
+
console.error(" Error:", error instanceof Error ? error.message : String(error));
|
|
417
|
+
}
|
|
418
|
+
}
|
|
419
|
+
else {
|
|
420
|
+
console.log("ā ļø Vulnerable server not available - skipping");
|
|
421
|
+
}
|
|
422
|
+
// Test hardened server
|
|
423
|
+
if (hardAvailable) {
|
|
424
|
+
console.log("\nš” Testing HARDENED server...");
|
|
425
|
+
try {
|
|
426
|
+
hardResult = await assessServer(TESTBED_CONFIG.hardened.name, TESTBED_CONFIG.hardened.url, options.verbose);
|
|
427
|
+
}
|
|
428
|
+
catch (error) {
|
|
429
|
+
console.error(" Error:", error instanceof Error ? error.message : String(error));
|
|
430
|
+
}
|
|
431
|
+
}
|
|
432
|
+
else {
|
|
433
|
+
console.log("ā ļø Hardened server not available - skipping");
|
|
434
|
+
}
|
|
435
|
+
// Display results
|
|
436
|
+
const passed = displayResults(vulnResult, hardResult, vulnMetrics);
|
|
437
|
+
// Save results
|
|
438
|
+
const outputPath = saveResults(vulnResult, hardResult, vulnMetrics, options.outputPath);
|
|
439
|
+
console.log(`š Results saved to: ${outputPath}\n`);
|
|
440
|
+
// Exit with appropriate code
|
|
441
|
+
process.exit(passed ? 0 : 1);
|
|
442
|
+
}
|
|
443
|
+
main().catch((error) => {
|
|
444
|
+
console.error("\nā Fatal error:", error instanceof Error ? error.message : String(error));
|
|
445
|
+
process.exit(1);
|
|
446
|
+
});
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@bryan-thompson/inspector-assessment-cli",
|
|
3
|
-
"version": "1.
|
|
3
|
+
"version": "1.13.0",
|
|
4
4
|
"description": "CLI for the Enhanced MCP Inspector with assessment capabilities",
|
|
5
5
|
"license": "MIT",
|
|
6
6
|
"author": "Bryan Thompson <bryan@triepod.ai>",
|
|
@@ -18,7 +18,8 @@
|
|
|
18
18
|
"bin": {
|
|
19
19
|
"mcp-inspector-assess-cli": "build/cli.js",
|
|
20
20
|
"mcp-assess-full": "build/assess-full.js",
|
|
21
|
-
"mcp-assess-security": "build/assess-security.js"
|
|
21
|
+
"mcp-assess-security": "build/assess-security.js",
|
|
22
|
+
"mcp-validate-testbed": "build/validate-testbed.js"
|
|
22
23
|
},
|
|
23
24
|
"publishConfig": {
|
|
24
25
|
"access": "public"
|