@bryan-thompson/inspector-assessment-cli 1.12.0 ā 1.13.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/build/assess-full.js +115 -13
- package/build/validate-testbed.js +446 -0
- package/package.json +3 -2
package/build/assess-full.js
CHANGED
|
@@ -20,6 +20,8 @@ import { StreamableHTTPClientTransport } from "@modelcontextprotocol/sdk/client/
|
|
|
20
20
|
import { AssessmentOrchestrator, } from "../../client/lib/services/assessment/AssessmentOrchestrator.js";
|
|
21
21
|
import { DEFAULT_ASSESSMENT_CONFIG, } from "../../client/lib/lib/assessmentTypes.js";
|
|
22
22
|
import { FULL_CLAUDE_CODE_CONFIG } from "../../client/lib/services/assessment/lib/claudeCodeBridge.js";
|
|
23
|
+
import { createFormatter, } from "../../client/lib/lib/reportFormatters/index.js";
|
|
24
|
+
import { generatePolicyComplianceReport } from "../../client/lib/services/assessment/PolicyComplianceGenerator.js";
|
|
23
25
|
/**
|
|
24
26
|
* Load server configuration from Claude Code's MCP settings
|
|
25
27
|
*/
|
|
@@ -230,6 +232,7 @@ function buildConfig(options) {
|
|
|
230
232
|
enableExtendedAssessment: options.fullAssessment !== false,
|
|
231
233
|
parallelTesting: true,
|
|
232
234
|
testTimeout: 30000,
|
|
235
|
+
enableSourceCodeAnalysis: !!options.sourceCodePath,
|
|
233
236
|
};
|
|
234
237
|
if (options.fullAssessment !== false) {
|
|
235
238
|
config.assessmentCategories = {
|
|
@@ -244,6 +247,7 @@ function buildConfig(options) {
|
|
|
244
247
|
prohibitedLibraries: true,
|
|
245
248
|
manifestValidation: true,
|
|
246
249
|
portability: true,
|
|
250
|
+
externalAPIScanner: !!options.sourceCodePath,
|
|
247
251
|
};
|
|
248
252
|
}
|
|
249
253
|
if (options.claudeEnabled) {
|
|
@@ -292,6 +296,55 @@ async function runFullAssessment(options) {
|
|
|
292
296
|
if (!options.jsonOnly) {
|
|
293
297
|
console.log(`š§ Found ${tools.length} tool${tools.length !== 1 ? "s" : ""}`);
|
|
294
298
|
}
|
|
299
|
+
// Pre-flight validation checks
|
|
300
|
+
if (options.preflightOnly) {
|
|
301
|
+
const preflightResult = {
|
|
302
|
+
passed: true,
|
|
303
|
+
toolCount: tools.length,
|
|
304
|
+
errors: [],
|
|
305
|
+
};
|
|
306
|
+
// Check 1: Tools exist
|
|
307
|
+
if (tools.length === 0) {
|
|
308
|
+
preflightResult.passed = false;
|
|
309
|
+
preflightResult.errors.push("No tools discovered from server");
|
|
310
|
+
}
|
|
311
|
+
// Check 2: Manifest valid (if source path provided)
|
|
312
|
+
if (options.sourceCodePath) {
|
|
313
|
+
const manifestPath = path.join(options.sourceCodePath, "manifest.json");
|
|
314
|
+
if (fs.existsSync(manifestPath)) {
|
|
315
|
+
try {
|
|
316
|
+
JSON.parse(fs.readFileSync(manifestPath, "utf-8"));
|
|
317
|
+
preflightResult.manifestValid = true;
|
|
318
|
+
}
|
|
319
|
+
catch {
|
|
320
|
+
preflightResult.passed = false;
|
|
321
|
+
preflightResult.manifestValid = false;
|
|
322
|
+
preflightResult.errors.push("Invalid manifest.json (JSON parse error)");
|
|
323
|
+
}
|
|
324
|
+
}
|
|
325
|
+
}
|
|
326
|
+
// Check 3: First tool responds (basic connectivity)
|
|
327
|
+
if (tools.length > 0) {
|
|
328
|
+
try {
|
|
329
|
+
const callTool = createCallToolWrapper(client);
|
|
330
|
+
const firstToolResult = await callTool(tools[0].name, {});
|
|
331
|
+
preflightResult.serverResponsive = !firstToolResult.isError;
|
|
332
|
+
if (firstToolResult.isError) {
|
|
333
|
+
preflightResult.errors.push(`First tool (${tools[0].name}) returned error - server may not be fully functional`);
|
|
334
|
+
}
|
|
335
|
+
}
|
|
336
|
+
catch (e) {
|
|
337
|
+
preflightResult.serverResponsive = false;
|
|
338
|
+
preflightResult.errors.push(`First tool call failed: ${e instanceof Error ? e.message : String(e)}`);
|
|
339
|
+
}
|
|
340
|
+
}
|
|
341
|
+
await client.close();
|
|
342
|
+
// Output pre-flight result
|
|
343
|
+
console.log(JSON.stringify(preflightResult, null, 2));
|
|
344
|
+
setTimeout(() => process.exit(preflightResult.passed ? 0 : 1), 10);
|
|
345
|
+
// Return empty result (won't be used due to process.exit)
|
|
346
|
+
return {};
|
|
347
|
+
}
|
|
295
348
|
const config = buildConfig(options);
|
|
296
349
|
const orchestrator = new AssessmentOrchestrator(config);
|
|
297
350
|
if (!options.jsonOnly) {
|
|
@@ -326,17 +379,41 @@ async function runFullAssessment(options) {
|
|
|
326
379
|
return results;
|
|
327
380
|
}
|
|
328
381
|
/**
|
|
329
|
-
* Save results to
|
|
382
|
+
* Save results to file with appropriate format
|
|
330
383
|
*/
|
|
331
|
-
function saveResults(serverName, results,
|
|
332
|
-
const
|
|
333
|
-
|
|
334
|
-
const
|
|
335
|
-
|
|
336
|
-
|
|
337
|
-
|
|
338
|
-
|
|
339
|
-
|
|
384
|
+
function saveResults(serverName, results, options) {
|
|
385
|
+
const format = options.format || "json";
|
|
386
|
+
// Generate policy compliance report if requested
|
|
387
|
+
const policyReport = options.includePolicy
|
|
388
|
+
? generatePolicyComplianceReport(results, serverName)
|
|
389
|
+
: undefined;
|
|
390
|
+
// Create formatter with options
|
|
391
|
+
const formatter = createFormatter({
|
|
392
|
+
format,
|
|
393
|
+
includePolicyMapping: options.includePolicy,
|
|
394
|
+
policyReport,
|
|
395
|
+
serverName,
|
|
396
|
+
includeDetails: true,
|
|
397
|
+
prettyPrint: true,
|
|
398
|
+
});
|
|
399
|
+
const fileExtension = formatter.getFileExtension();
|
|
400
|
+
const defaultPath = `/tmp/inspector-full-assessment-${serverName}${fileExtension}`;
|
|
401
|
+
const finalPath = options.outputPath || defaultPath;
|
|
402
|
+
// For JSON format, add metadata wrapper
|
|
403
|
+
if (format === "json") {
|
|
404
|
+
const output = {
|
|
405
|
+
timestamp: new Date().toISOString(),
|
|
406
|
+
assessmentType: "full",
|
|
407
|
+
...results,
|
|
408
|
+
...(policyReport ? { policyCompliance: policyReport } : {}),
|
|
409
|
+
};
|
|
410
|
+
fs.writeFileSync(finalPath, JSON.stringify(output, null, 2));
|
|
411
|
+
}
|
|
412
|
+
else {
|
|
413
|
+
// For other formats (markdown), use the formatter
|
|
414
|
+
const content = formatter.format(results);
|
|
415
|
+
fs.writeFileSync(finalPath, content);
|
|
416
|
+
}
|
|
340
417
|
return finalPath;
|
|
341
418
|
}
|
|
342
419
|
/**
|
|
@@ -467,6 +544,23 @@ function parseArgs() {
|
|
|
467
544
|
case "--json":
|
|
468
545
|
options.jsonOnly = true;
|
|
469
546
|
break;
|
|
547
|
+
case "--format":
|
|
548
|
+
case "-f":
|
|
549
|
+
const formatValue = args[++i];
|
|
550
|
+
if (formatValue !== "json" && formatValue !== "markdown") {
|
|
551
|
+
console.error(`Invalid format: ${formatValue}. Valid options: json, markdown`);
|
|
552
|
+
setTimeout(() => process.exit(1), 10);
|
|
553
|
+
options.helpRequested = true;
|
|
554
|
+
return options;
|
|
555
|
+
}
|
|
556
|
+
options.format = formatValue;
|
|
557
|
+
break;
|
|
558
|
+
case "--include-policy":
|
|
559
|
+
options.includePolicy = true;
|
|
560
|
+
break;
|
|
561
|
+
case "--preflight":
|
|
562
|
+
options.preflightOnly = true;
|
|
563
|
+
break;
|
|
470
564
|
case "--help":
|
|
471
565
|
case "-h":
|
|
472
566
|
printHelp();
|
|
@@ -508,12 +602,15 @@ Run comprehensive MCP server assessment with all 11 assessor modules.
|
|
|
508
602
|
Options:
|
|
509
603
|
--server, -s <name> Server name (required, or pass as first positional arg)
|
|
510
604
|
--config, -c <path> Path to server config JSON
|
|
511
|
-
--output, -o <path> Output
|
|
605
|
+
--output, -o <path> Output path (default: /tmp/inspector-full-assessment-<server>.<ext>)
|
|
512
606
|
--source <path> Source code path for deep analysis (AUP, portability, etc.)
|
|
513
607
|
--pattern-config, -p <path> Path to custom annotation pattern JSON
|
|
608
|
+
--format, -f <type> Output format: json (default) or markdown
|
|
609
|
+
--include-policy Include policy compliance mapping in report (30 requirements)
|
|
610
|
+
--preflight Run quick validation only (tools exist, manifest valid, server responds)
|
|
514
611
|
--claude-enabled Enable Claude Code integration for intelligent analysis
|
|
515
612
|
--full Enable all assessment modules (default)
|
|
516
|
-
--json Output only JSON (no console summary)
|
|
613
|
+
--json Output only JSON path (no console summary)
|
|
517
614
|
--verbose, -v Enable verbose logging
|
|
518
615
|
--help, -h Show this help message
|
|
519
616
|
|
|
@@ -534,6 +631,7 @@ Examples:
|
|
|
534
631
|
mcp-assess-full my-server
|
|
535
632
|
mcp-assess-full --server broken-mcp --claude-enabled
|
|
536
633
|
mcp-assess-full --server my-server --source ./my-server --output ./results.json
|
|
634
|
+
mcp-assess-full --server my-server --format markdown --include-policy
|
|
537
635
|
`);
|
|
538
636
|
}
|
|
539
637
|
/**
|
|
@@ -546,10 +644,14 @@ async function main() {
|
|
|
546
644
|
return;
|
|
547
645
|
}
|
|
548
646
|
const results = await runFullAssessment(options);
|
|
647
|
+
// Pre-flight mode handles its own output and exit
|
|
648
|
+
if (options.preflightOnly) {
|
|
649
|
+
return;
|
|
650
|
+
}
|
|
549
651
|
if (!options.jsonOnly) {
|
|
550
652
|
displaySummary(results);
|
|
551
653
|
}
|
|
552
|
-
const outputPath = saveResults(options.serverName, results, options
|
|
654
|
+
const outputPath = saveResults(options.serverName, results, options);
|
|
553
655
|
if (options.jsonOnly) {
|
|
554
656
|
console.log(outputPath);
|
|
555
657
|
}
|
|
@@ -0,0 +1,446 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
/**
|
|
3
|
+
* Testbed Validation CLI
|
|
4
|
+
*
|
|
5
|
+
* Validates the MCP Inspector SecurityAssessor against the reference
|
|
6
|
+
* Vulnerable Testbed implementation to ensure detection accuracy.
|
|
7
|
+
*
|
|
8
|
+
* Runs assessments against both vulnerable and hardened servers,
|
|
9
|
+
* then calculates recall, precision, and false positive rates.
|
|
10
|
+
*
|
|
11
|
+
* Prerequisites:
|
|
12
|
+
* cd /home/bryan/mcp-servers/mcp-vulnerable-testbed && docker-compose up -d
|
|
13
|
+
*
|
|
14
|
+
* Usage:
|
|
15
|
+
* npm run validate:testbed
|
|
16
|
+
* mcp-validate-testbed
|
|
17
|
+
* mcp-validate-testbed --verbose
|
|
18
|
+
*/
|
|
19
|
+
import * as fs from "fs";
|
|
20
|
+
import { Client } from "@modelcontextprotocol/sdk/client/index.js";
|
|
21
|
+
import { StreamableHTTPClientTransport } from "@modelcontextprotocol/sdk/client/streamableHttp.js";
|
|
22
|
+
// Import from local client lib
|
|
23
|
+
import { SecurityAssessor } from "../../client/lib/services/assessment/modules/SecurityAssessor.js";
|
|
24
|
+
import { DEFAULT_ASSESSMENT_CONFIG, } from "../../client/lib/lib/assessmentTypes.js";
|
|
25
|
+
/**
|
|
26
|
+
* Testbed Configuration
|
|
27
|
+
*/
|
|
28
|
+
const TESTBED_CONFIG = {
|
|
29
|
+
vulnerable: {
|
|
30
|
+
name: "vulnerable-testbed",
|
|
31
|
+
url: "http://localhost:10900/mcp",
|
|
32
|
+
expectedVulnerabilities: 10,
|
|
33
|
+
},
|
|
34
|
+
hardened: {
|
|
35
|
+
name: "hardened-testbed",
|
|
36
|
+
url: "http://localhost:10901/mcp",
|
|
37
|
+
expectedVulnerabilities: 0,
|
|
38
|
+
},
|
|
39
|
+
};
|
|
40
|
+
/**
|
|
41
|
+
* Expected vulnerable tools (10 total)
|
|
42
|
+
*/
|
|
43
|
+
const VULNERABLE_TOOLS = [
|
|
44
|
+
"vulnerable_calculator_tool",
|
|
45
|
+
"vulnerable_system_exec_tool",
|
|
46
|
+
"vulnerable_data_leak_tool",
|
|
47
|
+
"vulnerable_config_modifier_tool",
|
|
48
|
+
"vulnerable_tool_override_tool",
|
|
49
|
+
"vulnerable_fetcher_tool",
|
|
50
|
+
"vulnerable_unicode_processor_tool",
|
|
51
|
+
"vulnerable_nested_parser_tool",
|
|
52
|
+
"vulnerable_package_installer_tool",
|
|
53
|
+
"vulnerable_rug_pull_tool",
|
|
54
|
+
];
|
|
55
|
+
/**
|
|
56
|
+
* Expected safe tools (should NOT be flagged)
|
|
57
|
+
*/
|
|
58
|
+
const SAFE_TOOLS = [
|
|
59
|
+
"safe_storage_tool_mcp",
|
|
60
|
+
"safe_search_tool_mcp",
|
|
61
|
+
"safe_list_tool_mcp",
|
|
62
|
+
"safe_info_tool_mcp",
|
|
63
|
+
"safe_echo_tool_mcp",
|
|
64
|
+
"safe_validate_tool_mcp",
|
|
65
|
+
];
|
|
66
|
+
/**
|
|
67
|
+
* Check if a server is available
|
|
68
|
+
*/
|
|
69
|
+
async function checkServerHealth(url) {
|
|
70
|
+
try {
|
|
71
|
+
const response = await fetch(url, {
|
|
72
|
+
method: "POST",
|
|
73
|
+
headers: {
|
|
74
|
+
"Content-Type": "application/json",
|
|
75
|
+
Accept: "application/json, text/event-stream",
|
|
76
|
+
},
|
|
77
|
+
body: JSON.stringify({
|
|
78
|
+
jsonrpc: "2.0",
|
|
79
|
+
method: "initialize",
|
|
80
|
+
params: {
|
|
81
|
+
protocolVersion: "2024-11-05",
|
|
82
|
+
capabilities: {},
|
|
83
|
+
clientInfo: { name: "testbed-validator", version: "1.0" },
|
|
84
|
+
},
|
|
85
|
+
id: 1,
|
|
86
|
+
}),
|
|
87
|
+
});
|
|
88
|
+
return response.ok;
|
|
89
|
+
}
|
|
90
|
+
catch {
|
|
91
|
+
return false;
|
|
92
|
+
}
|
|
93
|
+
}
|
|
94
|
+
/**
|
|
95
|
+
* Connect to MCP server
|
|
96
|
+
*/
|
|
97
|
+
async function connectToServer(url) {
|
|
98
|
+
const transport = new StreamableHTTPClientTransport(new URL(url));
|
|
99
|
+
const client = new Client({
|
|
100
|
+
name: "mcp-validate-testbed",
|
|
101
|
+
version: "1.0.0",
|
|
102
|
+
}, {
|
|
103
|
+
capabilities: {},
|
|
104
|
+
});
|
|
105
|
+
await client.connect(transport);
|
|
106
|
+
return client;
|
|
107
|
+
}
|
|
108
|
+
/**
|
|
109
|
+
* Get tools from server
|
|
110
|
+
*/
|
|
111
|
+
async function getTools(client) {
|
|
112
|
+
const response = await client.listTools();
|
|
113
|
+
return response.tools || [];
|
|
114
|
+
}
|
|
115
|
+
/**
|
|
116
|
+
* Create callTool wrapper
|
|
117
|
+
*/
|
|
118
|
+
function createCallToolWrapper(client) {
|
|
119
|
+
return async (name, params) => {
|
|
120
|
+
try {
|
|
121
|
+
const response = await client.callTool({
|
|
122
|
+
name,
|
|
123
|
+
arguments: params,
|
|
124
|
+
});
|
|
125
|
+
return {
|
|
126
|
+
content: response.content,
|
|
127
|
+
isError: response.isError || false,
|
|
128
|
+
structuredContent: response
|
|
129
|
+
.structuredContent,
|
|
130
|
+
};
|
|
131
|
+
}
|
|
132
|
+
catch (error) {
|
|
133
|
+
return {
|
|
134
|
+
content: [
|
|
135
|
+
{
|
|
136
|
+
type: "text",
|
|
137
|
+
text: `Error: ${error instanceof Error ? error.message : String(error)}`,
|
|
138
|
+
},
|
|
139
|
+
],
|
|
140
|
+
isError: true,
|
|
141
|
+
};
|
|
142
|
+
}
|
|
143
|
+
};
|
|
144
|
+
}
|
|
145
|
+
/**
|
|
146
|
+
* Run assessment on a server
|
|
147
|
+
*/
|
|
148
|
+
async function assessServer(serverName, url, verbose) {
|
|
149
|
+
if (verbose) {
|
|
150
|
+
console.log(`\n Connecting to ${url}...`);
|
|
151
|
+
}
|
|
152
|
+
const client = await connectToServer(url);
|
|
153
|
+
const tools = await getTools(client);
|
|
154
|
+
if (verbose) {
|
|
155
|
+
console.log(` Found ${tools.length} tools`);
|
|
156
|
+
}
|
|
157
|
+
const config = {
|
|
158
|
+
...DEFAULT_ASSESSMENT_CONFIG,
|
|
159
|
+
securityPatternsToTest: 17,
|
|
160
|
+
reviewerMode: false,
|
|
161
|
+
testTimeout: 30000,
|
|
162
|
+
};
|
|
163
|
+
const context = {
|
|
164
|
+
serverName,
|
|
165
|
+
tools,
|
|
166
|
+
callTool: createCallToolWrapper(client),
|
|
167
|
+
config,
|
|
168
|
+
};
|
|
169
|
+
if (verbose) {
|
|
170
|
+
console.log(` Running security assessment...`);
|
|
171
|
+
}
|
|
172
|
+
const assessor = new SecurityAssessor(config);
|
|
173
|
+
const assessment = await assessor.assess(context);
|
|
174
|
+
await client.close();
|
|
175
|
+
return {
|
|
176
|
+
serverName,
|
|
177
|
+
totalTools: tools.length,
|
|
178
|
+
testsRun: assessment.promptInjectionTests.length,
|
|
179
|
+
vulnerabilitiesFound: assessment.vulnerabilities.length,
|
|
180
|
+
assessment,
|
|
181
|
+
};
|
|
182
|
+
}
|
|
183
|
+
/**
|
|
184
|
+
* Calculate metrics for vulnerable server assessment
|
|
185
|
+
*/
|
|
186
|
+
function calculateVulnerableServerMetrics(result) {
|
|
187
|
+
// Get unique tools flagged as vulnerable
|
|
188
|
+
const flaggedTools = [
|
|
189
|
+
...new Set(result.assessment.promptInjectionTests
|
|
190
|
+
.filter((t) => t.vulnerable)
|
|
191
|
+
.map((t) => t.toolName)),
|
|
192
|
+
];
|
|
193
|
+
// True positives: Vulnerable tools correctly flagged
|
|
194
|
+
const detected = flaggedTools.filter((t) => VULNERABLE_TOOLS.includes(t));
|
|
195
|
+
// False negatives: Vulnerable tools not flagged
|
|
196
|
+
const missed = VULNERABLE_TOOLS.filter((t) => !flaggedTools.includes(t));
|
|
197
|
+
// False positives: Safe tools incorrectly flagged
|
|
198
|
+
const falsePositives = flaggedTools.filter((t) => SAFE_TOOLS.includes(t));
|
|
199
|
+
// True negatives: Safe tools correctly not flagged
|
|
200
|
+
const trueNegatives = SAFE_TOOLS.filter((t) => !flaggedTools.includes(t));
|
|
201
|
+
const tp = detected.length;
|
|
202
|
+
const fn = missed.length;
|
|
203
|
+
const fp = falsePositives.length;
|
|
204
|
+
const tn = trueNegatives.length;
|
|
205
|
+
return {
|
|
206
|
+
recall: tp / (tp + fn) || 0,
|
|
207
|
+
precision: tp / (tp + fp) || 0,
|
|
208
|
+
falsePositiveRate: fp / (fp + tn) || 0,
|
|
209
|
+
falseNegativeRate: fn / (fn + tp) || 0,
|
|
210
|
+
detected,
|
|
211
|
+
missed,
|
|
212
|
+
falsePositives,
|
|
213
|
+
};
|
|
214
|
+
}
|
|
215
|
+
/**
|
|
216
|
+
* Display validation results
|
|
217
|
+
*/
|
|
218
|
+
function displayResults(vulnResult, hardResult, vulnMetrics) {
|
|
219
|
+
console.log("\n" + "=".repeat(70));
|
|
220
|
+
console.log("MCP INSPECTOR TESTBED VALIDATION RESULTS");
|
|
221
|
+
console.log("=".repeat(70));
|
|
222
|
+
if (vulnResult && vulnMetrics) {
|
|
223
|
+
console.log("\nš VULNERABLE SERVER (localhost:10900)");
|
|
224
|
+
console.log("-".repeat(50));
|
|
225
|
+
console.log(`Tools Tested: ${vulnResult.totalTools}`);
|
|
226
|
+
console.log(`Tests Run: ${vulnResult.testsRun}`);
|
|
227
|
+
console.log(`Vulnerabilities Found: ${vulnResult.vulnerabilitiesFound}`);
|
|
228
|
+
console.log("");
|
|
229
|
+
console.log(`Recall: ${(vulnMetrics.recall * 100).toFixed(1)}% (${vulnMetrics.detected.length}/${VULNERABLE_TOOLS.length} vulnerabilities detected)`);
|
|
230
|
+
console.log(`Precision: ${(vulnMetrics.precision * 100).toFixed(1)}% (${vulnMetrics.falsePositives.length} false positives)`);
|
|
231
|
+
console.log(`False Positive Rate: ${(vulnMetrics.falsePositiveRate * 100).toFixed(1)}%`);
|
|
232
|
+
console.log(`False Negative Rate: ${(vulnMetrics.falseNegativeRate * 100).toFixed(1)}%`);
|
|
233
|
+
if (vulnMetrics.missed.length > 0) {
|
|
234
|
+
console.log(`\nā ļø Missed Vulnerabilities (${vulnMetrics.missed.length}):`);
|
|
235
|
+
for (const tool of vulnMetrics.missed) {
|
|
236
|
+
console.log(` - ${tool}`);
|
|
237
|
+
}
|
|
238
|
+
}
|
|
239
|
+
if (vulnMetrics.falsePositives.length > 0) {
|
|
240
|
+
console.log(`\nā False Positives (${vulnMetrics.falsePositives.length}):`);
|
|
241
|
+
for (const tool of vulnMetrics.falsePositives) {
|
|
242
|
+
console.log(` - ${tool}`);
|
|
243
|
+
}
|
|
244
|
+
}
|
|
245
|
+
if (vulnMetrics.detected.length > 0) {
|
|
246
|
+
console.log(`\nā
Correctly Detected (${vulnMetrics.detected.length}):`);
|
|
247
|
+
for (const tool of vulnMetrics.detected) {
|
|
248
|
+
console.log(` - ${tool}`);
|
|
249
|
+
}
|
|
250
|
+
}
|
|
251
|
+
}
|
|
252
|
+
if (hardResult) {
|
|
253
|
+
console.log("\nš HARDENED SERVER (localhost:10901)");
|
|
254
|
+
console.log("-".repeat(50));
|
|
255
|
+
console.log(`Tools Tested: ${hardResult.totalTools}`);
|
|
256
|
+
console.log(`Tests Run: ${hardResult.testsRun}`);
|
|
257
|
+
console.log(`Vulnerabilities Found: ${hardResult.vulnerabilitiesFound}`);
|
|
258
|
+
console.log(`Expected: 0`);
|
|
259
|
+
const hardenedFalsePositives = hardResult.assessment.promptInjectionTests
|
|
260
|
+
.filter((t) => t.vulnerable)
|
|
261
|
+
.map((t) => t.toolName);
|
|
262
|
+
if (hardResult.vulnerabilitiesFound > 0) {
|
|
263
|
+
console.log(`\nā False Positives on Hardened Server:`);
|
|
264
|
+
const uniqueFPs = [...new Set(hardenedFalsePositives)];
|
|
265
|
+
for (const tool of uniqueFPs) {
|
|
266
|
+
console.log(` - ${tool}`);
|
|
267
|
+
}
|
|
268
|
+
}
|
|
269
|
+
else {
|
|
270
|
+
console.log(`\nā
No false positives on hardened server`);
|
|
271
|
+
}
|
|
272
|
+
}
|
|
273
|
+
// Overall Status
|
|
274
|
+
console.log("\n" + "=".repeat(70));
|
|
275
|
+
console.log("OVERALL VALIDATION STATUS");
|
|
276
|
+
console.log("=".repeat(70));
|
|
277
|
+
const vulnPassed = vulnMetrics &&
|
|
278
|
+
vulnMetrics.recall >= 0.8 &&
|
|
279
|
+
vulnMetrics.falsePositives.length === 0;
|
|
280
|
+
const hardPassed = hardResult && hardResult.vulnerabilitiesFound === 0;
|
|
281
|
+
if (vulnPassed && hardPassed) {
|
|
282
|
+
console.log("\nā
PASS - Inspector meets accuracy targets\n");
|
|
283
|
+
console.log(" Target: 80%+ recall, 0 false positives");
|
|
284
|
+
console.log(` Actual: ${vulnMetrics ? (vulnMetrics.recall * 100).toFixed(1) : 0}% recall, ${vulnMetrics?.falsePositives.length || 0} false positives`);
|
|
285
|
+
}
|
|
286
|
+
else {
|
|
287
|
+
console.log("\nā FAIL - Inspector needs improvement\n");
|
|
288
|
+
if (vulnMetrics && vulnMetrics.recall < 0.8) {
|
|
289
|
+
console.log(` Recall too low: ${(vulnMetrics.recall * 100).toFixed(1)}% (target: 80%+)`);
|
|
290
|
+
}
|
|
291
|
+
if (vulnMetrics && vulnMetrics.falsePositives.length > 0) {
|
|
292
|
+
console.log(` Has false positives: ${vulnMetrics.falsePositives.length} (target: 0)`);
|
|
293
|
+
}
|
|
294
|
+
if (hardResult && hardResult.vulnerabilitiesFound > 0) {
|
|
295
|
+
console.log(` Hardened server flagged: ${hardResult.vulnerabilitiesFound} (target: 0)`);
|
|
296
|
+
}
|
|
297
|
+
}
|
|
298
|
+
console.log("=".repeat(70));
|
|
299
|
+
console.log("");
|
|
300
|
+
return vulnPassed && hardPassed;
|
|
301
|
+
}
|
|
302
|
+
/**
|
|
303
|
+
* Save results to JSON
|
|
304
|
+
*/
|
|
305
|
+
function saveResults(vulnResult, hardResult, vulnMetrics, outputPath) {
|
|
306
|
+
const finalPath = outputPath || "/tmp/testbed-validation-results.json";
|
|
307
|
+
const output = {
|
|
308
|
+
timestamp: new Date().toISOString(),
|
|
309
|
+
validationType: "testbed-validation",
|
|
310
|
+
vulnerable: vulnResult
|
|
311
|
+
? {
|
|
312
|
+
serverName: vulnResult.serverName,
|
|
313
|
+
tools: vulnResult.totalTools,
|
|
314
|
+
tests: vulnResult.testsRun,
|
|
315
|
+
vulnerabilities: vulnResult.vulnerabilitiesFound,
|
|
316
|
+
metrics: vulnMetrics,
|
|
317
|
+
}
|
|
318
|
+
: null,
|
|
319
|
+
hardened: hardResult
|
|
320
|
+
? {
|
|
321
|
+
serverName: hardResult.serverName,
|
|
322
|
+
tools: hardResult.totalTools,
|
|
323
|
+
tests: hardResult.testsRun,
|
|
324
|
+
vulnerabilities: hardResult.vulnerabilitiesFound,
|
|
325
|
+
}
|
|
326
|
+
: null,
|
|
327
|
+
};
|
|
328
|
+
fs.writeFileSync(finalPath, JSON.stringify(output, null, 2));
|
|
329
|
+
return finalPath;
|
|
330
|
+
}
|
|
331
|
+
/**
|
|
332
|
+
* Parse arguments
|
|
333
|
+
*/
|
|
334
|
+
function parseArgs() {
|
|
335
|
+
const args = process.argv.slice(2);
|
|
336
|
+
const options = {
|
|
337
|
+
verbose: false,
|
|
338
|
+
};
|
|
339
|
+
for (const arg of args) {
|
|
340
|
+
switch (arg) {
|
|
341
|
+
case "--verbose":
|
|
342
|
+
case "-v":
|
|
343
|
+
options.verbose = true;
|
|
344
|
+
break;
|
|
345
|
+
case "--help":
|
|
346
|
+
case "-h":
|
|
347
|
+
printHelp();
|
|
348
|
+
process.exit(0);
|
|
349
|
+
break;
|
|
350
|
+
default:
|
|
351
|
+
if (arg.startsWith("--output=")) {
|
|
352
|
+
options.outputPath = arg.split("=")[1];
|
|
353
|
+
}
|
|
354
|
+
}
|
|
355
|
+
}
|
|
356
|
+
return options;
|
|
357
|
+
}
|
|
358
|
+
/**
|
|
359
|
+
* Print help
|
|
360
|
+
*/
|
|
361
|
+
function printHelp() {
|
|
362
|
+
console.log(`
|
|
363
|
+
Usage: mcp-validate-testbed [options]
|
|
364
|
+
|
|
365
|
+
Validate MCP Inspector SecurityAssessor against the reference Vulnerable Testbed.
|
|
366
|
+
|
|
367
|
+
Options:
|
|
368
|
+
--verbose, -v Enable verbose logging
|
|
369
|
+
--output=<path> Output JSON path (default: /tmp/testbed-validation-results.json)
|
|
370
|
+
--help, -h Show this help message
|
|
371
|
+
|
|
372
|
+
Prerequisites:
|
|
373
|
+
Start the testbed containers before running:
|
|
374
|
+
cd /home/bryan/mcp-servers/mcp-vulnerable-testbed && docker-compose up -d
|
|
375
|
+
|
|
376
|
+
Validation Targets:
|
|
377
|
+
- Recall: 80%+ (at least 8/10 vulnerabilities detected)
|
|
378
|
+
- Precision: 100% (0 false positives on safe tools)
|
|
379
|
+
- Hardened: 0 vulnerabilities detected
|
|
380
|
+
|
|
381
|
+
Examples:
|
|
382
|
+
mcp-validate-testbed
|
|
383
|
+
mcp-validate-testbed --verbose
|
|
384
|
+
mcp-validate-testbed --output=./validation-results.json
|
|
385
|
+
`);
|
|
386
|
+
}
|
|
387
|
+
/**
|
|
388
|
+
* Main execution
|
|
389
|
+
*/
|
|
390
|
+
async function main() {
|
|
391
|
+
const options = parseArgs();
|
|
392
|
+
console.log("\nš MCP Inspector Testbed Validation");
|
|
393
|
+
console.log("=".repeat(50));
|
|
394
|
+
// Check server availability
|
|
395
|
+
console.log("\nChecking testbed servers...");
|
|
396
|
+
const vulnAvailable = await checkServerHealth(TESTBED_CONFIG.vulnerable.url);
|
|
397
|
+
const hardAvailable = await checkServerHealth(TESTBED_CONFIG.hardened.url);
|
|
398
|
+
if (!vulnAvailable && !hardAvailable) {
|
|
399
|
+
console.error("\nā Testbed containers not running!");
|
|
400
|
+
console.error("\nStart them with:");
|
|
401
|
+
console.error(" cd /home/bryan/mcp-servers/mcp-vulnerable-testbed");
|
|
402
|
+
console.error(" docker-compose up -d");
|
|
403
|
+
process.exit(1);
|
|
404
|
+
}
|
|
405
|
+
let vulnResult = null;
|
|
406
|
+
let hardResult = null;
|
|
407
|
+
let vulnMetrics = null;
|
|
408
|
+
// Test vulnerable server
|
|
409
|
+
if (vulnAvailable) {
|
|
410
|
+
console.log("\nš” Testing VULNERABLE server...");
|
|
411
|
+
try {
|
|
412
|
+
vulnResult = await assessServer(TESTBED_CONFIG.vulnerable.name, TESTBED_CONFIG.vulnerable.url, options.verbose);
|
|
413
|
+
vulnMetrics = calculateVulnerableServerMetrics(vulnResult);
|
|
414
|
+
}
|
|
415
|
+
catch (error) {
|
|
416
|
+
console.error(" Error:", error instanceof Error ? error.message : String(error));
|
|
417
|
+
}
|
|
418
|
+
}
|
|
419
|
+
else {
|
|
420
|
+
console.log("ā ļø Vulnerable server not available - skipping");
|
|
421
|
+
}
|
|
422
|
+
// Test hardened server
|
|
423
|
+
if (hardAvailable) {
|
|
424
|
+
console.log("\nš” Testing HARDENED server...");
|
|
425
|
+
try {
|
|
426
|
+
hardResult = await assessServer(TESTBED_CONFIG.hardened.name, TESTBED_CONFIG.hardened.url, options.verbose);
|
|
427
|
+
}
|
|
428
|
+
catch (error) {
|
|
429
|
+
console.error(" Error:", error instanceof Error ? error.message : String(error));
|
|
430
|
+
}
|
|
431
|
+
}
|
|
432
|
+
else {
|
|
433
|
+
console.log("ā ļø Hardened server not available - skipping");
|
|
434
|
+
}
|
|
435
|
+
// Display results
|
|
436
|
+
const passed = displayResults(vulnResult, hardResult, vulnMetrics);
|
|
437
|
+
// Save results
|
|
438
|
+
const outputPath = saveResults(vulnResult, hardResult, vulnMetrics, options.outputPath);
|
|
439
|
+
console.log(`š Results saved to: ${outputPath}\n`);
|
|
440
|
+
// Exit with appropriate code
|
|
441
|
+
process.exit(passed ? 0 : 1);
|
|
442
|
+
}
|
|
443
|
+
main().catch((error) => {
|
|
444
|
+
console.error("\nā Fatal error:", error instanceof Error ? error.message : String(error));
|
|
445
|
+
process.exit(1);
|
|
446
|
+
});
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@bryan-thompson/inspector-assessment-cli",
|
|
3
|
-
"version": "1.
|
|
3
|
+
"version": "1.13.1",
|
|
4
4
|
"description": "CLI for the Enhanced MCP Inspector with assessment capabilities",
|
|
5
5
|
"license": "MIT",
|
|
6
6
|
"author": "Bryan Thompson <bryan@triepod.ai>",
|
|
@@ -18,7 +18,8 @@
|
|
|
18
18
|
"bin": {
|
|
19
19
|
"mcp-inspector-assess-cli": "build/cli.js",
|
|
20
20
|
"mcp-assess-full": "build/assess-full.js",
|
|
21
|
-
"mcp-assess-security": "build/assess-security.js"
|
|
21
|
+
"mcp-assess-security": "build/assess-security.js",
|
|
22
|
+
"mcp-validate-testbed": "build/validate-testbed.js"
|
|
22
23
|
},
|
|
23
24
|
"publishConfig": {
|
|
24
25
|
"access": "public"
|