@bryan-thompson/inspector-assessment-cli 1.11.1 ā 1.13.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/build/assess-full.js +65 -13
- package/build/validate-testbed.js +446 -0
- package/package.json +3 -2
package/build/assess-full.js
CHANGED
|
@@ -20,6 +20,8 @@ import { StreamableHTTPClientTransport } from "@modelcontextprotocol/sdk/client/
|
|
|
20
20
|
import { AssessmentOrchestrator, } from "../../client/lib/services/assessment/AssessmentOrchestrator.js";
|
|
21
21
|
import { DEFAULT_ASSESSMENT_CONFIG, } from "../../client/lib/lib/assessmentTypes.js";
|
|
22
22
|
import { FULL_CLAUDE_CODE_CONFIG } from "../../client/lib/services/assessment/lib/claudeCodeBridge.js";
|
|
23
|
+
import { createFormatter, } from "../../client/lib/lib/reportFormatters/index.js";
|
|
24
|
+
import { generatePolicyComplianceReport } from "../../client/lib/services/assessment/PolicyComplianceGenerator.js";
|
|
23
25
|
/**
|
|
24
26
|
* Load server configuration from Claude Code's MCP settings
|
|
25
27
|
*/
|
|
@@ -259,6 +261,10 @@ function buildConfig(options) {
|
|
|
259
261
|
},
|
|
260
262
|
};
|
|
261
263
|
}
|
|
264
|
+
// Pass custom annotation pattern config path
|
|
265
|
+
if (options.patternConfigPath) {
|
|
266
|
+
config.patternConfigPath = options.patternConfigPath;
|
|
267
|
+
}
|
|
262
268
|
return config;
|
|
263
269
|
}
|
|
264
270
|
/**
|
|
@@ -322,17 +328,41 @@ async function runFullAssessment(options) {
|
|
|
322
328
|
return results;
|
|
323
329
|
}
|
|
324
330
|
/**
|
|
325
|
-
* Save results to
|
|
331
|
+
* Save results to file with appropriate format
|
|
326
332
|
*/
|
|
327
|
-
function saveResults(serverName, results,
|
|
328
|
-
const
|
|
329
|
-
|
|
330
|
-
const
|
|
331
|
-
|
|
332
|
-
|
|
333
|
-
|
|
334
|
-
|
|
335
|
-
|
|
333
|
+
function saveResults(serverName, results, options) {
|
|
334
|
+
const format = options.format || "json";
|
|
335
|
+
// Generate policy compliance report if requested
|
|
336
|
+
const policyReport = options.includePolicy
|
|
337
|
+
? generatePolicyComplianceReport(results, serverName)
|
|
338
|
+
: undefined;
|
|
339
|
+
// Create formatter with options
|
|
340
|
+
const formatter = createFormatter({
|
|
341
|
+
format,
|
|
342
|
+
includePolicyMapping: options.includePolicy,
|
|
343
|
+
policyReport,
|
|
344
|
+
serverName,
|
|
345
|
+
includeDetails: true,
|
|
346
|
+
prettyPrint: true,
|
|
347
|
+
});
|
|
348
|
+
const fileExtension = formatter.getFileExtension();
|
|
349
|
+
const defaultPath = `/tmp/inspector-full-assessment-${serverName}${fileExtension}`;
|
|
350
|
+
const finalPath = options.outputPath || defaultPath;
|
|
351
|
+
// For JSON format, add metadata wrapper
|
|
352
|
+
if (format === "json") {
|
|
353
|
+
const output = {
|
|
354
|
+
timestamp: new Date().toISOString(),
|
|
355
|
+
assessmentType: "full",
|
|
356
|
+
...results,
|
|
357
|
+
...(policyReport ? { policyCompliance: policyReport } : {}),
|
|
358
|
+
};
|
|
359
|
+
fs.writeFileSync(finalPath, JSON.stringify(output, null, 2));
|
|
360
|
+
}
|
|
361
|
+
else {
|
|
362
|
+
// For other formats (markdown), use the formatter
|
|
363
|
+
const content = formatter.format(results);
|
|
364
|
+
fs.writeFileSync(finalPath, content);
|
|
365
|
+
}
|
|
336
366
|
return finalPath;
|
|
337
367
|
}
|
|
338
368
|
/**
|
|
@@ -446,6 +476,10 @@ function parseArgs() {
|
|
|
446
476
|
case "--source":
|
|
447
477
|
options.sourceCodePath = args[++i];
|
|
448
478
|
break;
|
|
479
|
+
case "--pattern-config":
|
|
480
|
+
case "-p":
|
|
481
|
+
options.patternConfigPath = args[++i];
|
|
482
|
+
break;
|
|
449
483
|
case "--claude-enabled":
|
|
450
484
|
options.claudeEnabled = true;
|
|
451
485
|
break;
|
|
@@ -459,6 +493,20 @@ function parseArgs() {
|
|
|
459
493
|
case "--json":
|
|
460
494
|
options.jsonOnly = true;
|
|
461
495
|
break;
|
|
496
|
+
case "--format":
|
|
497
|
+
case "-f":
|
|
498
|
+
const formatValue = args[++i];
|
|
499
|
+
if (formatValue !== "json" && formatValue !== "markdown") {
|
|
500
|
+
console.error(`Invalid format: ${formatValue}. Valid options: json, markdown`);
|
|
501
|
+
setTimeout(() => process.exit(1), 10);
|
|
502
|
+
options.helpRequested = true;
|
|
503
|
+
return options;
|
|
504
|
+
}
|
|
505
|
+
options.format = formatValue;
|
|
506
|
+
break;
|
|
507
|
+
case "--include-policy":
|
|
508
|
+
options.includePolicy = true;
|
|
509
|
+
break;
|
|
462
510
|
case "--help":
|
|
463
511
|
case "-h":
|
|
464
512
|
printHelp();
|
|
@@ -500,11 +548,14 @@ Run comprehensive MCP server assessment with all 11 assessor modules.
|
|
|
500
548
|
Options:
|
|
501
549
|
--server, -s <name> Server name (required, or pass as first positional arg)
|
|
502
550
|
--config, -c <path> Path to server config JSON
|
|
503
|
-
--output, -o <path> Output
|
|
551
|
+
--output, -o <path> Output path (default: /tmp/inspector-full-assessment-<server>.<ext>)
|
|
504
552
|
--source <path> Source code path for deep analysis (AUP, portability, etc.)
|
|
553
|
+
--pattern-config, -p <path> Path to custom annotation pattern JSON
|
|
554
|
+
--format, -f <type> Output format: json (default) or markdown
|
|
555
|
+
--include-policy Include policy compliance mapping in report (30 requirements)
|
|
505
556
|
--claude-enabled Enable Claude Code integration for intelligent analysis
|
|
506
557
|
--full Enable all assessment modules (default)
|
|
507
|
-
--json Output only JSON (no console summary)
|
|
558
|
+
--json Output only JSON path (no console summary)
|
|
508
559
|
--verbose, -v Enable verbose logging
|
|
509
560
|
--help, -h Show this help message
|
|
510
561
|
|
|
@@ -525,6 +576,7 @@ Examples:
|
|
|
525
576
|
mcp-assess-full my-server
|
|
526
577
|
mcp-assess-full --server broken-mcp --claude-enabled
|
|
527
578
|
mcp-assess-full --server my-server --source ./my-server --output ./results.json
|
|
579
|
+
mcp-assess-full --server my-server --format markdown --include-policy
|
|
528
580
|
`);
|
|
529
581
|
}
|
|
530
582
|
/**
|
|
@@ -540,7 +592,7 @@ async function main() {
|
|
|
540
592
|
if (!options.jsonOnly) {
|
|
541
593
|
displaySummary(results);
|
|
542
594
|
}
|
|
543
|
-
const outputPath = saveResults(options.serverName, results, options
|
|
595
|
+
const outputPath = saveResults(options.serverName, results, options);
|
|
544
596
|
if (options.jsonOnly) {
|
|
545
597
|
console.log(outputPath);
|
|
546
598
|
}
|
|
@@ -0,0 +1,446 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
/**
|
|
3
|
+
* Testbed Validation CLI
|
|
4
|
+
*
|
|
5
|
+
* Validates the MCP Inspector SecurityAssessor against the reference
|
|
6
|
+
* Vulnerable Testbed implementation to ensure detection accuracy.
|
|
7
|
+
*
|
|
8
|
+
* Runs assessments against both vulnerable and hardened servers,
|
|
9
|
+
* then calculates recall, precision, and false positive rates.
|
|
10
|
+
*
|
|
11
|
+
* Prerequisites:
|
|
12
|
+
* cd /home/bryan/mcp-servers/mcp-vulnerable-testbed && docker-compose up -d
|
|
13
|
+
*
|
|
14
|
+
* Usage:
|
|
15
|
+
* npm run validate:testbed
|
|
16
|
+
* mcp-validate-testbed
|
|
17
|
+
* mcp-validate-testbed --verbose
|
|
18
|
+
*/
|
|
19
|
+
import * as fs from "fs";
|
|
20
|
+
import { Client } from "@modelcontextprotocol/sdk/client/index.js";
|
|
21
|
+
import { StreamableHTTPClientTransport } from "@modelcontextprotocol/sdk/client/streamableHttp.js";
|
|
22
|
+
// Import from local client lib
|
|
23
|
+
import { SecurityAssessor } from "../../client/lib/services/assessment/modules/SecurityAssessor.js";
|
|
24
|
+
import { DEFAULT_ASSESSMENT_CONFIG, } from "../../client/lib/lib/assessmentTypes.js";
|
|
25
|
+
/**
|
|
26
|
+
* Testbed Configuration
|
|
27
|
+
*/
|
|
28
|
+
const TESTBED_CONFIG = {
|
|
29
|
+
vulnerable: {
|
|
30
|
+
name: "vulnerable-testbed",
|
|
31
|
+
url: "http://localhost:10900/mcp",
|
|
32
|
+
expectedVulnerabilities: 10,
|
|
33
|
+
},
|
|
34
|
+
hardened: {
|
|
35
|
+
name: "hardened-testbed",
|
|
36
|
+
url: "http://localhost:10901/mcp",
|
|
37
|
+
expectedVulnerabilities: 0,
|
|
38
|
+
},
|
|
39
|
+
};
|
|
40
|
+
/**
|
|
41
|
+
* Expected vulnerable tools (10 total)
|
|
42
|
+
*/
|
|
43
|
+
const VULNERABLE_TOOLS = [
|
|
44
|
+
"vulnerable_calculator_tool",
|
|
45
|
+
"vulnerable_system_exec_tool",
|
|
46
|
+
"vulnerable_data_leak_tool",
|
|
47
|
+
"vulnerable_config_modifier_tool",
|
|
48
|
+
"vulnerable_tool_override_tool",
|
|
49
|
+
"vulnerable_fetcher_tool",
|
|
50
|
+
"vulnerable_unicode_processor_tool",
|
|
51
|
+
"vulnerable_nested_parser_tool",
|
|
52
|
+
"vulnerable_package_installer_tool",
|
|
53
|
+
"vulnerable_rug_pull_tool",
|
|
54
|
+
];
|
|
55
|
+
/**
|
|
56
|
+
* Expected safe tools (should NOT be flagged)
|
|
57
|
+
*/
|
|
58
|
+
const SAFE_TOOLS = [
|
|
59
|
+
"safe_storage_tool_mcp",
|
|
60
|
+
"safe_search_tool_mcp",
|
|
61
|
+
"safe_list_tool_mcp",
|
|
62
|
+
"safe_info_tool_mcp",
|
|
63
|
+
"safe_echo_tool_mcp",
|
|
64
|
+
"safe_validate_tool_mcp",
|
|
65
|
+
];
|
|
66
|
+
/**
|
|
67
|
+
* Check if a server is available
|
|
68
|
+
*/
|
|
69
|
+
async function checkServerHealth(url) {
|
|
70
|
+
try {
|
|
71
|
+
const response = await fetch(url, {
|
|
72
|
+
method: "POST",
|
|
73
|
+
headers: {
|
|
74
|
+
"Content-Type": "application/json",
|
|
75
|
+
Accept: "application/json, text/event-stream",
|
|
76
|
+
},
|
|
77
|
+
body: JSON.stringify({
|
|
78
|
+
jsonrpc: "2.0",
|
|
79
|
+
method: "initialize",
|
|
80
|
+
params: {
|
|
81
|
+
protocolVersion: "2024-11-05",
|
|
82
|
+
capabilities: {},
|
|
83
|
+
clientInfo: { name: "testbed-validator", version: "1.0" },
|
|
84
|
+
},
|
|
85
|
+
id: 1,
|
|
86
|
+
}),
|
|
87
|
+
});
|
|
88
|
+
return response.ok;
|
|
89
|
+
}
|
|
90
|
+
catch {
|
|
91
|
+
return false;
|
|
92
|
+
}
|
|
93
|
+
}
|
|
94
|
+
/**
|
|
95
|
+
* Connect to MCP server
|
|
96
|
+
*/
|
|
97
|
+
async function connectToServer(url) {
|
|
98
|
+
const transport = new StreamableHTTPClientTransport(new URL(url));
|
|
99
|
+
const client = new Client({
|
|
100
|
+
name: "mcp-validate-testbed",
|
|
101
|
+
version: "1.0.0",
|
|
102
|
+
}, {
|
|
103
|
+
capabilities: {},
|
|
104
|
+
});
|
|
105
|
+
await client.connect(transport);
|
|
106
|
+
return client;
|
|
107
|
+
}
|
|
108
|
+
/**
|
|
109
|
+
* Get tools from server
|
|
110
|
+
*/
|
|
111
|
+
async function getTools(client) {
|
|
112
|
+
const response = await client.listTools();
|
|
113
|
+
return response.tools || [];
|
|
114
|
+
}
|
|
115
|
+
/**
|
|
116
|
+
* Create callTool wrapper
|
|
117
|
+
*/
|
|
118
|
+
function createCallToolWrapper(client) {
|
|
119
|
+
return async (name, params) => {
|
|
120
|
+
try {
|
|
121
|
+
const response = await client.callTool({
|
|
122
|
+
name,
|
|
123
|
+
arguments: params,
|
|
124
|
+
});
|
|
125
|
+
return {
|
|
126
|
+
content: response.content,
|
|
127
|
+
isError: response.isError || false,
|
|
128
|
+
structuredContent: response
|
|
129
|
+
.structuredContent,
|
|
130
|
+
};
|
|
131
|
+
}
|
|
132
|
+
catch (error) {
|
|
133
|
+
return {
|
|
134
|
+
content: [
|
|
135
|
+
{
|
|
136
|
+
type: "text",
|
|
137
|
+
text: `Error: ${error instanceof Error ? error.message : String(error)}`,
|
|
138
|
+
},
|
|
139
|
+
],
|
|
140
|
+
isError: true,
|
|
141
|
+
};
|
|
142
|
+
}
|
|
143
|
+
};
|
|
144
|
+
}
|
|
145
|
+
/**
|
|
146
|
+
* Run assessment on a server
|
|
147
|
+
*/
|
|
148
|
+
async function assessServer(serverName, url, verbose) {
|
|
149
|
+
if (verbose) {
|
|
150
|
+
console.log(`\n Connecting to ${url}...`);
|
|
151
|
+
}
|
|
152
|
+
const client = await connectToServer(url);
|
|
153
|
+
const tools = await getTools(client);
|
|
154
|
+
if (verbose) {
|
|
155
|
+
console.log(` Found ${tools.length} tools`);
|
|
156
|
+
}
|
|
157
|
+
const config = {
|
|
158
|
+
...DEFAULT_ASSESSMENT_CONFIG,
|
|
159
|
+
securityPatternsToTest: 17,
|
|
160
|
+
reviewerMode: false,
|
|
161
|
+
testTimeout: 30000,
|
|
162
|
+
};
|
|
163
|
+
const context = {
|
|
164
|
+
serverName,
|
|
165
|
+
tools,
|
|
166
|
+
callTool: createCallToolWrapper(client),
|
|
167
|
+
config,
|
|
168
|
+
};
|
|
169
|
+
if (verbose) {
|
|
170
|
+
console.log(` Running security assessment...`);
|
|
171
|
+
}
|
|
172
|
+
const assessor = new SecurityAssessor(config);
|
|
173
|
+
const assessment = await assessor.assess(context);
|
|
174
|
+
await client.close();
|
|
175
|
+
return {
|
|
176
|
+
serverName,
|
|
177
|
+
totalTools: tools.length,
|
|
178
|
+
testsRun: assessment.promptInjectionTests.length,
|
|
179
|
+
vulnerabilitiesFound: assessment.vulnerabilities.length,
|
|
180
|
+
assessment,
|
|
181
|
+
};
|
|
182
|
+
}
|
|
183
|
+
/**
|
|
184
|
+
* Calculate metrics for vulnerable server assessment
|
|
185
|
+
*/
|
|
186
|
+
function calculateVulnerableServerMetrics(result) {
|
|
187
|
+
// Get unique tools flagged as vulnerable
|
|
188
|
+
const flaggedTools = [
|
|
189
|
+
...new Set(result.assessment.promptInjectionTests
|
|
190
|
+
.filter((t) => t.vulnerable)
|
|
191
|
+
.map((t) => t.toolName)),
|
|
192
|
+
];
|
|
193
|
+
// True positives: Vulnerable tools correctly flagged
|
|
194
|
+
const detected = flaggedTools.filter((t) => VULNERABLE_TOOLS.includes(t));
|
|
195
|
+
// False negatives: Vulnerable tools not flagged
|
|
196
|
+
const missed = VULNERABLE_TOOLS.filter((t) => !flaggedTools.includes(t));
|
|
197
|
+
// False positives: Safe tools incorrectly flagged
|
|
198
|
+
const falsePositives = flaggedTools.filter((t) => SAFE_TOOLS.includes(t));
|
|
199
|
+
// True negatives: Safe tools correctly not flagged
|
|
200
|
+
const trueNegatives = SAFE_TOOLS.filter((t) => !flaggedTools.includes(t));
|
|
201
|
+
const tp = detected.length;
|
|
202
|
+
const fn = missed.length;
|
|
203
|
+
const fp = falsePositives.length;
|
|
204
|
+
const tn = trueNegatives.length;
|
|
205
|
+
return {
|
|
206
|
+
recall: tp / (tp + fn) || 0,
|
|
207
|
+
precision: tp / (tp + fp) || 0,
|
|
208
|
+
falsePositiveRate: fp / (fp + tn) || 0,
|
|
209
|
+
falseNegativeRate: fn / (fn + tp) || 0,
|
|
210
|
+
detected,
|
|
211
|
+
missed,
|
|
212
|
+
falsePositives,
|
|
213
|
+
};
|
|
214
|
+
}
|
|
215
|
+
/**
|
|
216
|
+
* Display validation results
|
|
217
|
+
*/
|
|
218
|
+
function displayResults(vulnResult, hardResult, vulnMetrics) {
|
|
219
|
+
console.log("\n" + "=".repeat(70));
|
|
220
|
+
console.log("MCP INSPECTOR TESTBED VALIDATION RESULTS");
|
|
221
|
+
console.log("=".repeat(70));
|
|
222
|
+
if (vulnResult && vulnMetrics) {
|
|
223
|
+
console.log("\nš VULNERABLE SERVER (localhost:10900)");
|
|
224
|
+
console.log("-".repeat(50));
|
|
225
|
+
console.log(`Tools Tested: ${vulnResult.totalTools}`);
|
|
226
|
+
console.log(`Tests Run: ${vulnResult.testsRun}`);
|
|
227
|
+
console.log(`Vulnerabilities Found: ${vulnResult.vulnerabilitiesFound}`);
|
|
228
|
+
console.log("");
|
|
229
|
+
console.log(`Recall: ${(vulnMetrics.recall * 100).toFixed(1)}% (${vulnMetrics.detected.length}/${VULNERABLE_TOOLS.length} vulnerabilities detected)`);
|
|
230
|
+
console.log(`Precision: ${(vulnMetrics.precision * 100).toFixed(1)}% (${vulnMetrics.falsePositives.length} false positives)`);
|
|
231
|
+
console.log(`False Positive Rate: ${(vulnMetrics.falsePositiveRate * 100).toFixed(1)}%`);
|
|
232
|
+
console.log(`False Negative Rate: ${(vulnMetrics.falseNegativeRate * 100).toFixed(1)}%`);
|
|
233
|
+
if (vulnMetrics.missed.length > 0) {
|
|
234
|
+
console.log(`\nā ļø Missed Vulnerabilities (${vulnMetrics.missed.length}):`);
|
|
235
|
+
for (const tool of vulnMetrics.missed) {
|
|
236
|
+
console.log(` - ${tool}`);
|
|
237
|
+
}
|
|
238
|
+
}
|
|
239
|
+
if (vulnMetrics.falsePositives.length > 0) {
|
|
240
|
+
console.log(`\nā False Positives (${vulnMetrics.falsePositives.length}):`);
|
|
241
|
+
for (const tool of vulnMetrics.falsePositives) {
|
|
242
|
+
console.log(` - ${tool}`);
|
|
243
|
+
}
|
|
244
|
+
}
|
|
245
|
+
if (vulnMetrics.detected.length > 0) {
|
|
246
|
+
console.log(`\nā
Correctly Detected (${vulnMetrics.detected.length}):`);
|
|
247
|
+
for (const tool of vulnMetrics.detected) {
|
|
248
|
+
console.log(` - ${tool}`);
|
|
249
|
+
}
|
|
250
|
+
}
|
|
251
|
+
}
|
|
252
|
+
if (hardResult) {
|
|
253
|
+
console.log("\nš HARDENED SERVER (localhost:10901)");
|
|
254
|
+
console.log("-".repeat(50));
|
|
255
|
+
console.log(`Tools Tested: ${hardResult.totalTools}`);
|
|
256
|
+
console.log(`Tests Run: ${hardResult.testsRun}`);
|
|
257
|
+
console.log(`Vulnerabilities Found: ${hardResult.vulnerabilitiesFound}`);
|
|
258
|
+
console.log(`Expected: 0`);
|
|
259
|
+
const hardenedFalsePositives = hardResult.assessment.promptInjectionTests
|
|
260
|
+
.filter((t) => t.vulnerable)
|
|
261
|
+
.map((t) => t.toolName);
|
|
262
|
+
if (hardResult.vulnerabilitiesFound > 0) {
|
|
263
|
+
console.log(`\nā False Positives on Hardened Server:`);
|
|
264
|
+
const uniqueFPs = [...new Set(hardenedFalsePositives)];
|
|
265
|
+
for (const tool of uniqueFPs) {
|
|
266
|
+
console.log(` - ${tool}`);
|
|
267
|
+
}
|
|
268
|
+
}
|
|
269
|
+
else {
|
|
270
|
+
console.log(`\nā
No false positives on hardened server`);
|
|
271
|
+
}
|
|
272
|
+
}
|
|
273
|
+
// Overall Status
|
|
274
|
+
console.log("\n" + "=".repeat(70));
|
|
275
|
+
console.log("OVERALL VALIDATION STATUS");
|
|
276
|
+
console.log("=".repeat(70));
|
|
277
|
+
const vulnPassed = vulnMetrics &&
|
|
278
|
+
vulnMetrics.recall >= 0.8 &&
|
|
279
|
+
vulnMetrics.falsePositives.length === 0;
|
|
280
|
+
const hardPassed = hardResult && hardResult.vulnerabilitiesFound === 0;
|
|
281
|
+
if (vulnPassed && hardPassed) {
|
|
282
|
+
console.log("\nā
PASS - Inspector meets accuracy targets\n");
|
|
283
|
+
console.log(" Target: 80%+ recall, 0 false positives");
|
|
284
|
+
console.log(` Actual: ${vulnMetrics ? (vulnMetrics.recall * 100).toFixed(1) : 0}% recall, ${vulnMetrics?.falsePositives.length || 0} false positives`);
|
|
285
|
+
}
|
|
286
|
+
else {
|
|
287
|
+
console.log("\nā FAIL - Inspector needs improvement\n");
|
|
288
|
+
if (vulnMetrics && vulnMetrics.recall < 0.8) {
|
|
289
|
+
console.log(` Recall too low: ${(vulnMetrics.recall * 100).toFixed(1)}% (target: 80%+)`);
|
|
290
|
+
}
|
|
291
|
+
if (vulnMetrics && vulnMetrics.falsePositives.length > 0) {
|
|
292
|
+
console.log(` Has false positives: ${vulnMetrics.falsePositives.length} (target: 0)`);
|
|
293
|
+
}
|
|
294
|
+
if (hardResult && hardResult.vulnerabilitiesFound > 0) {
|
|
295
|
+
console.log(` Hardened server flagged: ${hardResult.vulnerabilitiesFound} (target: 0)`);
|
|
296
|
+
}
|
|
297
|
+
}
|
|
298
|
+
console.log("=".repeat(70));
|
|
299
|
+
console.log("");
|
|
300
|
+
return vulnPassed && hardPassed;
|
|
301
|
+
}
|
|
302
|
+
/**
|
|
303
|
+
* Save results to JSON
|
|
304
|
+
*/
|
|
305
|
+
function saveResults(vulnResult, hardResult, vulnMetrics, outputPath) {
|
|
306
|
+
const finalPath = outputPath || "/tmp/testbed-validation-results.json";
|
|
307
|
+
const output = {
|
|
308
|
+
timestamp: new Date().toISOString(),
|
|
309
|
+
validationType: "testbed-validation",
|
|
310
|
+
vulnerable: vulnResult
|
|
311
|
+
? {
|
|
312
|
+
serverName: vulnResult.serverName,
|
|
313
|
+
tools: vulnResult.totalTools,
|
|
314
|
+
tests: vulnResult.testsRun,
|
|
315
|
+
vulnerabilities: vulnResult.vulnerabilitiesFound,
|
|
316
|
+
metrics: vulnMetrics,
|
|
317
|
+
}
|
|
318
|
+
: null,
|
|
319
|
+
hardened: hardResult
|
|
320
|
+
? {
|
|
321
|
+
serverName: hardResult.serverName,
|
|
322
|
+
tools: hardResult.totalTools,
|
|
323
|
+
tests: hardResult.testsRun,
|
|
324
|
+
vulnerabilities: hardResult.vulnerabilitiesFound,
|
|
325
|
+
}
|
|
326
|
+
: null,
|
|
327
|
+
};
|
|
328
|
+
fs.writeFileSync(finalPath, JSON.stringify(output, null, 2));
|
|
329
|
+
return finalPath;
|
|
330
|
+
}
|
|
331
|
+
/**
|
|
332
|
+
* Parse arguments
|
|
333
|
+
*/
|
|
334
|
+
function parseArgs() {
|
|
335
|
+
const args = process.argv.slice(2);
|
|
336
|
+
const options = {
|
|
337
|
+
verbose: false,
|
|
338
|
+
};
|
|
339
|
+
for (const arg of args) {
|
|
340
|
+
switch (arg) {
|
|
341
|
+
case "--verbose":
|
|
342
|
+
case "-v":
|
|
343
|
+
options.verbose = true;
|
|
344
|
+
break;
|
|
345
|
+
case "--help":
|
|
346
|
+
case "-h":
|
|
347
|
+
printHelp();
|
|
348
|
+
process.exit(0);
|
|
349
|
+
break;
|
|
350
|
+
default:
|
|
351
|
+
if (arg.startsWith("--output=")) {
|
|
352
|
+
options.outputPath = arg.split("=")[1];
|
|
353
|
+
}
|
|
354
|
+
}
|
|
355
|
+
}
|
|
356
|
+
return options;
|
|
357
|
+
}
|
|
358
|
+
/**
|
|
359
|
+
* Print help
|
|
360
|
+
*/
|
|
361
|
+
function printHelp() {
|
|
362
|
+
console.log(`
|
|
363
|
+
Usage: mcp-validate-testbed [options]
|
|
364
|
+
|
|
365
|
+
Validate MCP Inspector SecurityAssessor against the reference Vulnerable Testbed.
|
|
366
|
+
|
|
367
|
+
Options:
|
|
368
|
+
--verbose, -v Enable verbose logging
|
|
369
|
+
--output=<path> Output JSON path (default: /tmp/testbed-validation-results.json)
|
|
370
|
+
--help, -h Show this help message
|
|
371
|
+
|
|
372
|
+
Prerequisites:
|
|
373
|
+
Start the testbed containers before running:
|
|
374
|
+
cd /home/bryan/mcp-servers/mcp-vulnerable-testbed && docker-compose up -d
|
|
375
|
+
|
|
376
|
+
Validation Targets:
|
|
377
|
+
- Recall: 80%+ (at least 8/10 vulnerabilities detected)
|
|
378
|
+
- Precision: 100% (0 false positives on safe tools)
|
|
379
|
+
- Hardened: 0 vulnerabilities detected
|
|
380
|
+
|
|
381
|
+
Examples:
|
|
382
|
+
mcp-validate-testbed
|
|
383
|
+
mcp-validate-testbed --verbose
|
|
384
|
+
mcp-validate-testbed --output=./validation-results.json
|
|
385
|
+
`);
|
|
386
|
+
}
|
|
387
|
+
/**
|
|
388
|
+
* Main execution
|
|
389
|
+
*/
|
|
390
|
+
async function main() {
|
|
391
|
+
const options = parseArgs();
|
|
392
|
+
console.log("\nš MCP Inspector Testbed Validation");
|
|
393
|
+
console.log("=".repeat(50));
|
|
394
|
+
// Check server availability
|
|
395
|
+
console.log("\nChecking testbed servers...");
|
|
396
|
+
const vulnAvailable = await checkServerHealth(TESTBED_CONFIG.vulnerable.url);
|
|
397
|
+
const hardAvailable = await checkServerHealth(TESTBED_CONFIG.hardened.url);
|
|
398
|
+
if (!vulnAvailable && !hardAvailable) {
|
|
399
|
+
console.error("\nā Testbed containers not running!");
|
|
400
|
+
console.error("\nStart them with:");
|
|
401
|
+
console.error(" cd /home/bryan/mcp-servers/mcp-vulnerable-testbed");
|
|
402
|
+
console.error(" docker-compose up -d");
|
|
403
|
+
process.exit(1);
|
|
404
|
+
}
|
|
405
|
+
let vulnResult = null;
|
|
406
|
+
let hardResult = null;
|
|
407
|
+
let vulnMetrics = null;
|
|
408
|
+
// Test vulnerable server
|
|
409
|
+
if (vulnAvailable) {
|
|
410
|
+
console.log("\nš” Testing VULNERABLE server...");
|
|
411
|
+
try {
|
|
412
|
+
vulnResult = await assessServer(TESTBED_CONFIG.vulnerable.name, TESTBED_CONFIG.vulnerable.url, options.verbose);
|
|
413
|
+
vulnMetrics = calculateVulnerableServerMetrics(vulnResult);
|
|
414
|
+
}
|
|
415
|
+
catch (error) {
|
|
416
|
+
console.error(" Error:", error instanceof Error ? error.message : String(error));
|
|
417
|
+
}
|
|
418
|
+
}
|
|
419
|
+
else {
|
|
420
|
+
console.log("ā ļø Vulnerable server not available - skipping");
|
|
421
|
+
}
|
|
422
|
+
// Test hardened server
|
|
423
|
+
if (hardAvailable) {
|
|
424
|
+
console.log("\nš” Testing HARDENED server...");
|
|
425
|
+
try {
|
|
426
|
+
hardResult = await assessServer(TESTBED_CONFIG.hardened.name, TESTBED_CONFIG.hardened.url, options.verbose);
|
|
427
|
+
}
|
|
428
|
+
catch (error) {
|
|
429
|
+
console.error(" Error:", error instanceof Error ? error.message : String(error));
|
|
430
|
+
}
|
|
431
|
+
}
|
|
432
|
+
else {
|
|
433
|
+
console.log("ā ļø Hardened server not available - skipping");
|
|
434
|
+
}
|
|
435
|
+
// Display results
|
|
436
|
+
const passed = displayResults(vulnResult, hardResult, vulnMetrics);
|
|
437
|
+
// Save results
|
|
438
|
+
const outputPath = saveResults(vulnResult, hardResult, vulnMetrics, options.outputPath);
|
|
439
|
+
console.log(`š Results saved to: ${outputPath}\n`);
|
|
440
|
+
// Exit with appropriate code
|
|
441
|
+
process.exit(passed ? 0 : 1);
|
|
442
|
+
}
|
|
443
|
+
main().catch((error) => {
|
|
444
|
+
console.error("\nā Fatal error:", error instanceof Error ? error.message : String(error));
|
|
445
|
+
process.exit(1);
|
|
446
|
+
});
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@bryan-thompson/inspector-assessment-cli",
|
|
3
|
-
"version": "1.
|
|
3
|
+
"version": "1.13.0",
|
|
4
4
|
"description": "CLI for the Enhanced MCP Inspector with assessment capabilities",
|
|
5
5
|
"license": "MIT",
|
|
6
6
|
"author": "Bryan Thompson <bryan@triepod.ai>",
|
|
@@ -18,7 +18,8 @@
|
|
|
18
18
|
"bin": {
|
|
19
19
|
"mcp-inspector-assess-cli": "build/cli.js",
|
|
20
20
|
"mcp-assess-full": "build/assess-full.js",
|
|
21
|
-
"mcp-assess-security": "build/assess-security.js"
|
|
21
|
+
"mcp-assess-security": "build/assess-security.js",
|
|
22
|
+
"mcp-validate-testbed": "build/validate-testbed.js"
|
|
22
23
|
},
|
|
23
24
|
"publishConfig": {
|
|
24
25
|
"access": "public"
|