@bryan-thompson/inspector-assessment-cli 1.25.9 → 1.26.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,355 @@
1
+ /**
2
+ * Testbed A/B Comparison Integration Tests
3
+ *
4
+ * Tests the inspector's pure behavior-based detection by comparing two servers
5
+ * with IDENTICAL tool names but different implementations:
6
+ * - vulnerable-mcp: Exploitable implementations (10 vulnerable + 6 safe tools)
7
+ * - hardened-mcp: Safe implementations (same tool names, all safe)
8
+ *
9
+ * This proves the inspector detects vulnerabilities based on tool behavior,
10
+ * not name-based heuristics.
11
+ *
12
+ * Note: Tests skip gracefully when testbed servers are unavailable.
13
+ */
14
+ import { describe, it, expect, beforeAll } from "@jest/globals";
15
+ // Testbed server URLs
16
+ const VULNERABLE_URL = "http://localhost:10900/mcp";
17
+ const HARDENED_URL = "http://localhost:10901/mcp";
18
+ /**
19
+ * Default headers required by MCP HTTP servers
20
+ */
21
+ const DEFAULT_HEADERS = {
22
+ "Content-Type": "application/json",
23
+ Accept: "application/json, text/event-stream",
24
+ };
25
+ /**
26
+ * Check if a server is available by sending an initialize request
27
+ */
28
+ async function checkServerAvailable(url) {
29
+ try {
30
+ const response = await fetch(url, {
31
+ method: "POST",
32
+ headers: DEFAULT_HEADERS,
33
+ body: JSON.stringify({
34
+ jsonrpc: "2.0",
35
+ method: "initialize",
36
+ params: {
37
+ protocolVersion: "2024-11-05",
38
+ capabilities: {},
39
+ clientInfo: { name: "testbed-test", version: "1.0.0" },
40
+ },
41
+ id: 1,
42
+ }),
43
+ });
44
+ return response.status < 500;
45
+ }
46
+ catch {
47
+ return false;
48
+ }
49
+ }
50
+ /**
51
+ * Parse SSE response to extract JSON data
52
+ * MCP streamable HTTP returns Server-Sent Events format
53
+ */
54
+ async function parseSSEResponse(response) {
55
+ const text = await response.text();
56
+ // If it's plain JSON, parse directly
57
+ if (text.trim().startsWith("{")) {
58
+ return JSON.parse(text);
59
+ }
60
+ // Parse SSE format: "event: message\ndata: {...}\n\n"
61
+ const lines = text.split("\n");
62
+ for (const line of lines) {
63
+ if (line.startsWith("data:")) {
64
+ const jsonStr = line.slice(5).trim();
65
+ if (jsonStr) {
66
+ return JSON.parse(jsonStr);
67
+ }
68
+ }
69
+ }
70
+ throw new Error(`Unable to parse SSE response: ${text.slice(0, 100)}`);
71
+ }
72
+ /**
73
+ * Send an MCP JSON-RPC request and parse response
74
+ */
75
+ async function sendMcpRequest(url, method, params = {}) {
76
+ const response = await fetch(url, {
77
+ method: "POST",
78
+ headers: DEFAULT_HEADERS,
79
+ body: JSON.stringify({
80
+ jsonrpc: "2.0",
81
+ method,
82
+ params,
83
+ id: Date.now(),
84
+ }),
85
+ });
86
+ let data = null;
87
+ if (response.ok) {
88
+ try {
89
+ data = await parseSSEResponse(response.clone());
90
+ }
91
+ catch {
92
+ // Response might not be parseable
93
+ }
94
+ }
95
+ return { response, data };
96
+ }
97
+ /**
98
+ * Get tool list from server
99
+ */
100
+ async function getToolList(url) {
101
+ const { data } = await sendMcpRequest(url, "tools/list");
102
+ if (!data)
103
+ return [];
104
+ const result = data.result;
105
+ const tools = result.tools;
106
+ return tools.map((tool) => tool.name);
107
+ }
108
+ /**
109
+ * Call a tool and return the response
110
+ */
111
+ async function callTool(url, toolName, args) {
112
+ const { data } = await sendMcpRequest(url, "tools/call", {
113
+ name: toolName,
114
+ arguments: args,
115
+ });
116
+ return data;
117
+ }
118
+ describe("Testbed A/B Comparison", () => {
119
+ let bothServersAvailable = false;
120
+ let vulnerableAvailable = false;
121
+ let hardenedAvailable = false;
122
+ beforeAll(async () => {
123
+ const [v, h] = await Promise.all([
124
+ checkServerAvailable(VULNERABLE_URL),
125
+ checkServerAvailable(HARDENED_URL),
126
+ ]);
127
+ vulnerableAvailable = v;
128
+ hardenedAvailable = h;
129
+ bothServersAvailable = v && h;
130
+ if (!bothServersAvailable) {
131
+ console.log("\n⚠️ Skipping testbed A/B comparison tests - servers not available");
132
+ console.log(" Start servers with:");
133
+ console.log(" - vulnerable-mcp: http://localhost:10900/mcp");
134
+ console.log(" - hardened-mcp: http://localhost:10901/mcp\n");
135
+ }
136
+ });
137
+ describe("Health Check Tests", () => {
138
+ it("should connect to vulnerable-mcp server", async () => {
139
+ if (!vulnerableAvailable) {
140
+ console.log("⏩ Skipping: vulnerable-mcp not available");
141
+ return;
142
+ }
143
+ const { response, data } = await sendMcpRequest(VULNERABLE_URL, "initialize", {
144
+ protocolVersion: "2024-11-05",
145
+ capabilities: {},
146
+ clientInfo: {
147
+ name: "testbed-test",
148
+ version: "1.0.0",
149
+ },
150
+ });
151
+ expect(response.ok).toBe(true);
152
+ expect(data).toHaveProperty("jsonrpc", "2.0");
153
+ expect(data).toHaveProperty("result");
154
+ const result = data.result;
155
+ expect(result).toHaveProperty("serverInfo");
156
+ });
157
+ it("should connect to hardened-mcp server", async () => {
158
+ if (!hardenedAvailable) {
159
+ console.log("⏩ Skipping: hardened-mcp not available");
160
+ return;
161
+ }
162
+ const { response, data } = await sendMcpRequest(HARDENED_URL, "initialize", {
163
+ protocolVersion: "2024-11-05",
164
+ capabilities: {},
165
+ clientInfo: {
166
+ name: "testbed-test",
167
+ version: "1.0.0",
168
+ },
169
+ });
170
+ expect(response.ok).toBe(true);
171
+ expect(data).toHaveProperty("jsonrpc", "2.0");
172
+ expect(data).toHaveProperty("result");
173
+ const result = data.result;
174
+ expect(result).toHaveProperty("serverInfo");
175
+ });
176
+ it("should list tools on both servers", async () => {
177
+ if (!bothServersAvailable) {
178
+ console.log("⏩ Skipping: both servers not available");
179
+ return;
180
+ }
181
+ const [vulnerableTools, hardenedTools] = await Promise.all([
182
+ getToolList(VULNERABLE_URL),
183
+ getToolList(HARDENED_URL),
184
+ ]);
185
+ // Skip if tools lists are empty (server may require session state)
186
+ if (vulnerableTools.length === 0 && hardenedTools.length === 0) {
187
+ console.log("⏩ Skipping: servers returned empty tool lists");
188
+ return;
189
+ }
190
+ expect(vulnerableTools.length).toBeGreaterThan(0);
191
+ expect(hardenedTools.length).toBeGreaterThan(0);
192
+ });
193
+ });
194
+ describe("Tool List Parity", () => {
195
+ it("should have identical tool names on both servers", async () => {
196
+ if (!bothServersAvailable) {
197
+ console.log("⏩ Skipping: both servers not available");
198
+ return;
199
+ }
200
+ const [vulnerableTools, hardenedTools] = await Promise.all([
201
+ getToolList(VULNERABLE_URL),
202
+ getToolList(HARDENED_URL),
203
+ ]);
204
+ // Skip if tools lists are empty (server may require session state)
205
+ if (vulnerableTools.length === 0 || hardenedTools.length === 0) {
206
+ console.log("⏩ Skipping: server returned empty tool list");
207
+ return;
208
+ }
209
+ // Sort for comparison
210
+ const sortedVulnerable = [...vulnerableTools].sort();
211
+ const sortedHardened = [...hardenedTools].sort();
212
+ expect(sortedVulnerable).toEqual(sortedHardened);
213
+ });
214
+ it("should have 16 tools (10 vulnerable + 6 safe)", async () => {
215
+ if (!bothServersAvailable) {
216
+ console.log("⏩ Skipping: both servers not available");
217
+ return;
218
+ }
219
+ const [vulnerableTools, hardenedTools] = await Promise.all([
220
+ getToolList(VULNERABLE_URL),
221
+ getToolList(HARDENED_URL),
222
+ ]);
223
+ // Skip if tools lists are empty (server may require session state)
224
+ if (vulnerableTools.length === 0 || hardenedTools.length === 0) {
225
+ console.log("⏩ Skipping: server returned empty tool list");
226
+ return;
227
+ }
228
+ expect(vulnerableTools.length).toBe(16);
229
+ expect(hardenedTools.length).toBe(16);
230
+ });
231
+ it("should include expected vulnerable tool names", async () => {
232
+ if (!bothServersAvailable) {
233
+ console.log("⏩ Skipping: both servers not available");
234
+ return;
235
+ }
236
+ const vulnerableTools = await getToolList(VULNERABLE_URL);
237
+ // Skip if tools list is empty (server may require session state)
238
+ if (vulnerableTools.length === 0) {
239
+ console.log("⏩ Skipping: server returned empty tool list");
240
+ return;
241
+ }
242
+ const expectedTools = [
243
+ "vulnerable_calculator_tool",
244
+ "vulnerable_system_exec_tool",
245
+ ];
246
+ expectedTools.forEach((toolName) => {
247
+ expect(vulnerableTools).toContain(toolName);
248
+ });
249
+ });
250
+ it("should include expected safe tool names", async () => {
251
+ if (!bothServersAvailable) {
252
+ console.log("⏩ Skipping: both servers not available");
253
+ return;
254
+ }
255
+ const tools = await getToolList(VULNERABLE_URL);
256
+ // Skip if tools list is empty (server may require session state)
257
+ if (tools.length === 0) {
258
+ console.log("⏩ Skipping: server returned empty tool list");
259
+ return;
260
+ }
261
+ const expectedSafeTools = [
262
+ "safe_storage_tool",
263
+ "safe_calculator_tool",
264
+ "safe_validation_tool",
265
+ ];
266
+ expectedSafeTools.forEach((toolName) => {
267
+ expect(tools).toContain(toolName);
268
+ });
269
+ });
270
+ });
271
+ describe("Tool Behavior Comparison", () => {
272
+ it("should show different behavior for calculator tool", async () => {
273
+ if (!bothServersAvailable) {
274
+ console.log("⏩ Skipping: both servers not available");
275
+ return;
276
+ }
277
+ const testInput = { expression: "__import__('os').system('id')" };
278
+ const [vulnerableResponse, hardenedResponse] = await Promise.all([
279
+ callTool(VULNERABLE_URL, "vulnerable_calculator_tool", testInput),
280
+ callTool(HARDENED_URL, "vulnerable_calculator_tool", testInput),
281
+ ]);
282
+ // Skip if either response is null (server may require session state)
283
+ if (!vulnerableResponse || !hardenedResponse) {
284
+ console.log("⏩ Skipping: tool call returned null (session state required)");
285
+ return;
286
+ }
287
+ // Both should respond, but vulnerable may execute, hardened should reject
288
+ expect(vulnerableResponse).toBeDefined();
289
+ expect(hardenedResponse).toBeDefined();
290
+ // Vulnerable server might show exploitable behavior
291
+ // Hardened server should show safe behavior (error or sanitized)
292
+ // We're not asserting specific behavior, just that they're different
293
+ expect(vulnerableResponse).not.toEqual(hardenedResponse);
294
+ });
295
+ it("should show identical behavior for safe tools", async () => {
296
+ if (!bothServersAvailable) {
297
+ console.log("⏩ Skipping: both servers not available");
298
+ return;
299
+ }
300
+ const testInput = { value: "test_data" };
301
+ const [vulnerableResponse, hardenedResponse] = await Promise.all([
302
+ callTool(VULNERABLE_URL, "safe_storage_tool", testInput),
303
+ callTool(HARDENED_URL, "safe_storage_tool", testInput),
304
+ ]);
305
+ // Skip if either response is null (server may require session state)
306
+ if (!vulnerableResponse || !hardenedResponse) {
307
+ console.log("⏩ Skipping: tool call returned null (session state required)");
308
+ return;
309
+ }
310
+ expect(vulnerableResponse).toBeDefined();
311
+ expect(hardenedResponse).toBeDefined();
312
+ });
313
+ });
314
+ describe("Session Management", () => {
315
+ it("should return response headers from vulnerable server", async () => {
316
+ if (!vulnerableAvailable) {
317
+ console.log("⏩ Skipping: vulnerable-mcp not available");
318
+ return;
319
+ }
320
+ const { response } = await sendMcpRequest(VULNERABLE_URL, "tools/list");
321
+ expect(response.headers.get("content-type")).toBeTruthy();
322
+ });
323
+ it("should return response headers from hardened server", async () => {
324
+ if (!hardenedAvailable) {
325
+ console.log("⏩ Skipping: hardened-mcp not available");
326
+ return;
327
+ }
328
+ const { response } = await sendMcpRequest(HARDENED_URL, "tools/list");
329
+ expect(response.headers.get("content-type")).toBeTruthy();
330
+ });
331
+ it("should handle protocol version negotiation", async () => {
332
+ if (!bothServersAvailable) {
333
+ console.log("⏩ Skipping: both servers not available");
334
+ return;
335
+ }
336
+ const initParams = {
337
+ protocolVersion: "2024-11-05",
338
+ capabilities: {},
339
+ clientInfo: { name: "testbed-test", version: "1.0.0" },
340
+ };
341
+ const [vulnerableResult, hardenedResult] = await Promise.all([
342
+ sendMcpRequest(VULNERABLE_URL, "initialize", initParams),
343
+ sendMcpRequest(HARDENED_URL, "initialize", initParams),
344
+ ]);
345
+ const vulnerableData = vulnerableResult.data;
346
+ const hardenedData = hardenedResult.data;
347
+ expect(vulnerableData).toBeDefined();
348
+ expect(hardenedData).toBeDefined();
349
+ const vulnerableResultData = vulnerableData.result;
350
+ const hardenedResultData = hardenedData.result;
351
+ expect(vulnerableResultData).toHaveProperty("protocolVersion");
352
+ expect(hardenedResultData).toHaveProperty("protocolVersion");
353
+ });
354
+ });
355
+ });
@@ -384,10 +384,22 @@ function buildConfig(options) {
384
384
  config.temporalInvocations = options.temporalInvocations;
385
385
  }
386
386
  if (options.claudeEnabled) {
387
+ // Check for HTTP transport via --claude-http flag or environment variables
388
+ const useHttpTransport = options.claudeHttp || process.env.INSPECTOR_CLAUDE === "true";
389
+ const auditorUrl = options.mcpAuditorUrl ||
390
+ process.env.INSPECTOR_MCP_AUDITOR_URL ||
391
+ "http://localhost:8085";
387
392
  config.claudeCode = {
388
393
  enabled: true,
389
394
  timeout: FULL_CLAUDE_CODE_CONFIG.timeout || 60000,
390
395
  maxRetries: FULL_CLAUDE_CODE_CONFIG.maxRetries || 2,
396
+ // Use HTTP transport when --claude-http flag or INSPECTOR_CLAUDE env is set
397
+ ...(useHttpTransport && {
398
+ transport: "http",
399
+ httpConfig: {
400
+ baseUrl: auditorUrl,
401
+ },
402
+ }),
391
403
  features: {
392
404
  intelligentTestGeneration: true,
393
405
  aupSemanticAnalysis: true,
@@ -395,6 +407,9 @@ function buildConfig(options) {
395
407
  documentationQuality: true,
396
408
  },
397
409
  };
410
+ if (useHttpTransport) {
411
+ console.log(`🔗 Claude Bridge HTTP transport: ${auditorUrl}`);
412
+ }
398
413
  }
399
414
  // Pass custom annotation pattern config path
400
415
  if (options.patternConfigPath) {
@@ -890,6 +905,32 @@ function parseArgs() {
890
905
  case "--claude-enabled":
891
906
  options.claudeEnabled = true;
892
907
  break;
908
+ case "--claude-http":
909
+ // Enable Claude Bridge with HTTP transport (connects to mcp-auditor)
910
+ options.claudeEnabled = true;
911
+ options.claudeHttp = true;
912
+ break;
913
+ case "--mcp-auditor-url": {
914
+ const urlValue = args[++i];
915
+ if (!urlValue || urlValue.startsWith("-")) {
916
+ console.error("Error: --mcp-auditor-url requires a URL argument");
917
+ setTimeout(() => process.exit(1), 10);
918
+ options.helpRequested = true;
919
+ return options;
920
+ }
921
+ try {
922
+ new URL(urlValue); // Validate URL format
923
+ options.mcpAuditorUrl = urlValue;
924
+ }
925
+ catch {
926
+ console.error(`Error: Invalid URL for --mcp-auditor-url: ${urlValue}`);
927
+ console.error(" Expected format: http://hostname:port or https://hostname:port");
928
+ setTimeout(() => process.exit(1), 10);
929
+ options.helpRequested = true;
930
+ return options;
931
+ }
932
+ break;
933
+ }
893
934
  case "--full":
894
935
  options.fullAssessment = true;
895
936
  break;
@@ -1047,6 +1088,23 @@ function parseArgs() {
1047
1088
  options.helpRequested = true;
1048
1089
  return options;
1049
1090
  }
1091
+ // Environment variable fallbacks (matches run-security-assessment.ts behavior)
1092
+ // INSPECTOR_CLAUDE=true enables Claude with HTTP transport
1093
+ if (process.env.INSPECTOR_CLAUDE === "true" && !options.claudeEnabled) {
1094
+ options.claudeEnabled = true;
1095
+ options.claudeHttp = true; // HTTP transport when enabled via env var
1096
+ }
1097
+ // INSPECTOR_MCP_AUDITOR_URL overrides default URL (only if not set via CLI)
1098
+ if (process.env.INSPECTOR_MCP_AUDITOR_URL && !options.mcpAuditorUrl) {
1099
+ const envUrl = process.env.INSPECTOR_MCP_AUDITOR_URL;
1100
+ try {
1101
+ new URL(envUrl);
1102
+ options.mcpAuditorUrl = envUrl;
1103
+ }
1104
+ catch {
1105
+ console.warn(`Warning: Invalid INSPECTOR_MCP_AUDITOR_URL: ${envUrl}, using default`);
1106
+ }
1107
+ }
1050
1108
  return options;
1051
1109
  }
1052
1110
  /**
@@ -1072,7 +1130,9 @@ Options:
1072
1130
  --diff-only Output only the comparison diff (requires --compare)
1073
1131
  --resume Resume from previous interrupted assessment
1074
1132
  --no-resume Force fresh start, clear any existing state
1075
- --claude-enabled Enable Claude Code integration for intelligent analysis
1133
+ --claude-enabled Enable Claude Code integration (CLI transport: requires 'claude' binary)
1134
+ --claude-http Enable Claude Code via HTTP transport (connects to mcp-auditor proxy)
1135
+ --mcp-auditor-url <url> mcp-auditor URL for HTTP transport (default: http://localhost:8085)
1076
1136
  --full Enable all assessment modules (default)
1077
1137
  --profile <name> Use predefined module profile (quick, security, compliance, full)
1078
1138
  --temporal-invocations <n> Number of invocations per tool for rug pull detection (default: 25)
@@ -1086,6 +1146,11 @@ Options:
1086
1146
  Also supports LOG_LEVEL environment variable
1087
1147
  --help, -h Show this help message
1088
1148
 
1149
+ Environment Variables:
1150
+ INSPECTOR_CLAUDE=true Enable Claude with HTTP transport (same as --claude-http)
1151
+ INSPECTOR_MCP_AUDITOR_URL Override default mcp-auditor URL (default: http://localhost:8085)
1152
+ LOG_LEVEL Set log level (overridden by --log-level flag)
1153
+
1089
1154
  ${getProfileHelpText()}
1090
1155
  Module Selection:
1091
1156
  --profile, --skip-modules, and --only-modules are mutually exclusive.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@bryan-thompson/inspector-assessment-cli",
3
- "version": "1.25.9",
3
+ "version": "1.26.0",
4
4
  "description": "CLI for the Enhanced MCP Inspector with assessment capabilities",
5
5
  "license": "MIT",
6
6
  "author": "Bryan Thompson <bryan@triepod.ai>",