@bryan-thompson/inspector-assessment-client 1.5.0 → 1.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (74) hide show
  1. package/dist/assets/{OAuthCallback-DGVqLct6.js → OAuthCallback-Xo9zS7pv.js} +1 -1
  2. package/dist/assets/{OAuthDebugCallback-DHflRQgp.js → OAuthDebugCallback-CaIey8K_.js} +1 -1
  3. package/dist/assets/{index-Btl7vuTl.js → index-nCPw6E-c.js} +4 -4
  4. package/dist/index.html +1 -1
  5. package/lib/lib/assessmentTypes.d.ts +670 -0
  6. package/lib/lib/assessmentTypes.d.ts.map +1 -0
  7. package/lib/lib/assessmentTypes.js +220 -0
  8. package/lib/lib/aupPatterns.d.ts +63 -0
  9. package/lib/lib/aupPatterns.d.ts.map +1 -0
  10. package/lib/lib/aupPatterns.js +344 -0
  11. package/lib/lib/prohibitedLibraries.d.ts +76 -0
  12. package/lib/lib/prohibitedLibraries.d.ts.map +1 -0
  13. package/lib/lib/prohibitedLibraries.js +364 -0
  14. package/lib/lib/securityPatterns.d.ts +64 -0
  15. package/lib/lib/securityPatterns.d.ts.map +1 -0
  16. package/lib/lib/securityPatterns.js +453 -0
  17. package/lib/services/assessment/AssessmentOrchestrator.d.ts +88 -0
  18. package/lib/services/assessment/AssessmentOrchestrator.d.ts.map +1 -0
  19. package/lib/services/assessment/AssessmentOrchestrator.js +418 -0
  20. package/lib/services/assessment/ResponseValidator.d.ts +69 -0
  21. package/lib/services/assessment/ResponseValidator.d.ts.map +1 -0
  22. package/lib/services/assessment/ResponseValidator.js +1038 -0
  23. package/lib/services/assessment/TestDataGenerator.d.ts +86 -0
  24. package/lib/services/assessment/TestDataGenerator.d.ts.map +1 -0
  25. package/lib/services/assessment/TestDataGenerator.js +669 -0
  26. package/lib/services/assessment/TestScenarioEngine.d.ts +91 -0
  27. package/lib/services/assessment/TestScenarioEngine.d.ts.map +1 -0
  28. package/lib/services/assessment/TestScenarioEngine.js +505 -0
  29. package/lib/services/assessment/ToolClassifier.d.ts +61 -0
  30. package/lib/services/assessment/ToolClassifier.d.ts.map +1 -0
  31. package/lib/services/assessment/ToolClassifier.js +349 -0
  32. package/lib/services/assessment/lib/claudeCodeBridge.d.ts +160 -0
  33. package/lib/services/assessment/lib/claudeCodeBridge.d.ts.map +1 -0
  34. package/lib/services/assessment/lib/claudeCodeBridge.js +357 -0
  35. package/lib/services/assessment/modules/AUPComplianceAssessor.d.ts +100 -0
  36. package/lib/services/assessment/modules/AUPComplianceAssessor.d.ts.map +1 -0
  37. package/lib/services/assessment/modules/AUPComplianceAssessor.js +474 -0
  38. package/lib/services/assessment/modules/BaseAssessor.d.ts +71 -0
  39. package/lib/services/assessment/modules/BaseAssessor.d.ts.map +1 -0
  40. package/lib/services/assessment/modules/BaseAssessor.js +171 -0
  41. package/lib/services/assessment/modules/DocumentationAssessor.d.ts +45 -0
  42. package/lib/services/assessment/modules/DocumentationAssessor.d.ts.map +1 -0
  43. package/lib/services/assessment/modules/DocumentationAssessor.js +355 -0
  44. package/lib/services/assessment/modules/ErrorHandlingAssessor.d.ts +25 -0
  45. package/lib/services/assessment/modules/ErrorHandlingAssessor.d.ts.map +1 -0
  46. package/lib/services/assessment/modules/ErrorHandlingAssessor.js +564 -0
  47. package/lib/services/assessment/modules/FunctionalityAssessor.d.ts +20 -0
  48. package/lib/services/assessment/modules/FunctionalityAssessor.d.ts.map +1 -0
  49. package/lib/services/assessment/modules/FunctionalityAssessor.js +253 -0
  50. package/lib/services/assessment/modules/MCPSpecComplianceAssessor.d.ts +70 -0
  51. package/lib/services/assessment/modules/MCPSpecComplianceAssessor.d.ts.map +1 -0
  52. package/lib/services/assessment/modules/MCPSpecComplianceAssessor.js +508 -0
  53. package/lib/services/assessment/modules/ManifestValidationAssessor.d.ts +70 -0
  54. package/lib/services/assessment/modules/ManifestValidationAssessor.d.ts.map +1 -0
  55. package/lib/services/assessment/modules/ManifestValidationAssessor.js +430 -0
  56. package/lib/services/assessment/modules/PortabilityAssessor.d.ts +43 -0
  57. package/lib/services/assessment/modules/PortabilityAssessor.d.ts.map +1 -0
  58. package/lib/services/assessment/modules/PortabilityAssessor.js +347 -0
  59. package/lib/services/assessment/modules/ProhibitedLibrariesAssessor.d.ts +41 -0
  60. package/lib/services/assessment/modules/ProhibitedLibrariesAssessor.d.ts.map +1 -0
  61. package/lib/services/assessment/modules/ProhibitedLibrariesAssessor.js +256 -0
  62. package/lib/services/assessment/modules/SecurityAssessor.d.ts +176 -0
  63. package/lib/services/assessment/modules/SecurityAssessor.d.ts.map +1 -0
  64. package/lib/services/assessment/modules/SecurityAssessor.js +1333 -0
  65. package/lib/services/assessment/modules/ToolAnnotationAssessor.d.ts +96 -0
  66. package/lib/services/assessment/modules/ToolAnnotationAssessor.d.ts.map +1 -0
  67. package/lib/services/assessment/modules/ToolAnnotationAssessor.js +593 -0
  68. package/lib/services/assessment/modules/UsabilityAssessor.d.ts +21 -0
  69. package/lib/services/assessment/modules/UsabilityAssessor.d.ts.map +1 -0
  70. package/lib/services/assessment/modules/UsabilityAssessor.js +241 -0
  71. package/lib/services/assessment/modules/index.d.ts +33 -0
  72. package/lib/services/assessment/modules/index.d.ts.map +1 -0
  73. package/lib/services/assessment/modules/index.js +35 -0
  74. package/package.json +15 -3
@@ -0,0 +1,1333 @@
1
+ /**
2
+ * Security Assessor Module
3
+ * Tests for backend API security vulnerabilities using 8 focused patterns
4
+ * - Critical Injection (3): Command, SQL, Path Traversal
5
+ * - Input Validation (3): Type Safety, Boundary Testing, Required Fields
6
+ * - Protocol Compliance (2): MCP Error Format, Timeout Handling
7
+ */
8
+ import { BaseAssessor } from "./BaseAssessor.js";
9
+ import { getAllAttackPatterns, getPayloadsForAttack, } from "../../../lib/securityPatterns.js";
10
+ import { ToolClassifier, ToolCategory } from "../ToolClassifier.js";
11
+ export class SecurityAssessor extends BaseAssessor {
12
+ async assess(context) {
13
+ // Select tools for testing first
14
+ const toolsToTest = this.selectToolsForTesting(context.tools);
15
+ // Run universal security testing - test selected tools with ALL attack types
16
+ const allTests = await this.runUniversalSecurityTests(context);
17
+ // Separate connection errors from valid tests
18
+ const connectionErrors = allTests.filter((t) => t.connectionError === true);
19
+ const validTests = allTests.filter((t) => !t.connectionError);
20
+ // Log connection error warning
21
+ if (connectionErrors.length > 0) {
22
+ this.log(`⚠️ WARNING: ${connectionErrors.length} test${connectionErrors.length !== 1 ? "s" : ""} failed due to connection/server errors`);
23
+ this.log(`Connection errors: ${connectionErrors.map((e) => `${e.toolName}:${e.testName} (${e.errorType})`).join(", ")}`);
24
+ }
25
+ // Count vulnerabilities from VALID tests only
26
+ const vulnerabilities = [];
27
+ let highRiskCount = 0;
28
+ let mediumRiskCount = 0;
29
+ for (const test of validTests) {
30
+ if (test.vulnerable) {
31
+ // Create confidence-aware vulnerability message
32
+ let vulnerability;
33
+ if (test.confidence === "high" || !test.confidence) {
34
+ // High confidence: definitive language
35
+ vulnerability = `${test.toolName} vulnerable to ${test.testName}`;
36
+ }
37
+ else if (test.confidence === "medium") {
38
+ // Medium confidence: potential issue
39
+ vulnerability = `${test.toolName} may have ${test.testName} issue`;
40
+ }
41
+ else {
42
+ // Low confidence: flagged for review
43
+ vulnerability = `${test.toolName} flagged for ${test.testName} (needs review)`;
44
+ }
45
+ if (!vulnerabilities.includes(vulnerability)) {
46
+ vulnerabilities.push(vulnerability);
47
+ }
48
+ if (test.riskLevel === "HIGH")
49
+ highRiskCount++;
50
+ else if (test.riskLevel === "MEDIUM")
51
+ mediumRiskCount++;
52
+ }
53
+ }
54
+ // Additional security checks for new patterns (only on selected tools)
55
+ const additionalVulnerabilities = await this.performAdditionalSecurityChecks(toolsToTest);
56
+ vulnerabilities.push(...additionalVulnerabilities);
57
+ // Determine overall risk level
58
+ const overallRiskLevel = this.determineOverallRiskLevel(highRiskCount, mediumRiskCount, vulnerabilities.length);
59
+ // Determine status (pass validTests array to check confidence levels, not allTests)
60
+ const status = this.determineSecurityStatus(validTests, vulnerabilities.length, validTests.length, connectionErrors.length);
61
+ // Generate explanation (pass both validTests and connectionErrors)
62
+ const explanation = this.generateSecurityExplanation(validTests, connectionErrors, vulnerabilities, overallRiskLevel);
63
+ return {
64
+ promptInjectionTests: allTests,
65
+ vulnerabilities,
66
+ overallRiskLevel,
67
+ status,
68
+ explanation,
69
+ };
70
+ }
71
+ /**
72
+ * Select tools for testing based on configuration
73
+ */
74
+ selectToolsForTesting(tools) {
75
+ // Prefer new selectedToolsForTesting configuration
76
+ // Note: undefined/null means "test all" (default), empty array [] means "test none" (explicit)
77
+ if (this.config.selectedToolsForTesting !== undefined) {
78
+ const selectedNames = new Set(this.config.selectedToolsForTesting);
79
+ const selectedTools = tools.filter((tool) => selectedNames.has(tool.name));
80
+ // Empty array means user explicitly selected 0 tools
81
+ if (this.config.selectedToolsForTesting.length === 0) {
82
+ this.log(`User selected 0 tools for security testing - skipping tests`);
83
+ return [];
84
+ }
85
+ // If no tools matched the names (config out of sync), log warning but respect selection
86
+ if (selectedTools.length === 0) {
87
+ this.log(`Warning: No tools matched selection (${this.config.selectedToolsForTesting.join(", ")})`);
88
+ return [];
89
+ }
90
+ this.log(`Testing ${selectedTools.length} selected tools out of ${tools.length} for security`);
91
+ return selectedTools;
92
+ }
93
+ // Default: test all tools
94
+ this.log(`Testing all ${tools.length} tools for security`);
95
+ return tools;
96
+ }
97
+ /**
98
+ * Run comprehensive security tests (advanced mode)
99
+ * Tests selected tools with ALL 8 security patterns using diverse payloads
100
+ * Includes injection tests, validation tests, and protocol compliance checks
101
+ */
102
+ async runUniversalSecurityTests(context) {
103
+ // Check if advanced security testing is enabled
104
+ if (!this.config.enableDomainTesting) {
105
+ return this.runBasicSecurityTests(context);
106
+ }
107
+ const results = [];
108
+ const attackPatterns = getAllAttackPatterns();
109
+ // Select tools for testing
110
+ const toolsToTest = this.selectToolsForTesting(context.tools);
111
+ this.log(`Starting ADVANCED security assessment - testing ${toolsToTest.length} tools with ${attackPatterns.length} security patterns (~${toolsToTest.length * attackPatterns.length * 3} tests)`);
112
+ for (const tool of toolsToTest) {
113
+ // Tools with no input parameters can't be exploited via payload injection
114
+ // Add passing results so they appear in the UI
115
+ if (!this.hasInputParameters(tool)) {
116
+ this.log(`${tool.name} has no input parameters - adding passing results`);
117
+ // Add a passing result for each attack pattern so tool appears in UI
118
+ for (const attackPattern of attackPatterns) {
119
+ const payloads = getPayloadsForAttack(attackPattern.attackName);
120
+ // Add one passing result per payload type
121
+ for (const payload of payloads) {
122
+ results.push({
123
+ testName: attackPattern.attackName,
124
+ description: payload.description,
125
+ payload: payload.payload,
126
+ riskLevel: payload.riskLevel,
127
+ toolName: tool.name,
128
+ vulnerable: false,
129
+ evidence: "Tool has no input parameters - cannot be exploited via payload injection",
130
+ });
131
+ }
132
+ }
133
+ continue;
134
+ }
135
+ this.log(`Testing ${tool.name} with all attack patterns`);
136
+ // Test with each attack type (all patterns in advanced mode)
137
+ for (const attackPattern of attackPatterns) {
138
+ // Get ALL payloads for this attack pattern
139
+ const payloads = getPayloadsForAttack(attackPattern.attackName);
140
+ // Test tool with each payload variation
141
+ for (const payload of payloads) {
142
+ this.testCount++;
143
+ try {
144
+ const result = await this.testPayload(tool, attackPattern.attackName, payload, context.callTool);
145
+ results.push(result);
146
+ if (result.vulnerable) {
147
+ this.log(`🚨 VULNERABILITY: ${tool.name} - ${attackPattern.attackName} (${payload.payloadType}: ${payload.description})`);
148
+ }
149
+ }
150
+ catch (error) {
151
+ this.logError(`Error testing ${tool.name} with ${attackPattern.attackName}`, error);
152
+ }
153
+ // Rate limiting
154
+ if (this.testCount % 5 === 0) {
155
+ await this.sleep(100);
156
+ }
157
+ }
158
+ }
159
+ }
160
+ this.log(`ADVANCED security assessment complete: ${results.length} tests executed, ${results.filter((r) => r.vulnerable).length} vulnerabilities found`);
161
+ return results;
162
+ }
163
+ /**
164
+ * Run basic security tests (fast mode)
165
+ * Tests only 3 critical injection patterns with 1 generic payload each
166
+ * Used when enableDomainTesting = false
167
+ */
168
+ async runBasicSecurityTests(context) {
169
+ const results = [];
170
+ // Only test 4 critical injection patterns
171
+ const criticalPatterns = [
172
+ "Command Injection",
173
+ "Calculator Injection",
174
+ "SQL Injection",
175
+ "Path Traversal",
176
+ ];
177
+ const allPatterns = getAllAttackPatterns();
178
+ const basicPatterns = allPatterns.filter((p) => criticalPatterns.includes(p.attackName));
179
+ // Select tools for testing
180
+ const toolsToTest = this.selectToolsForTesting(context.tools);
181
+ this.log(`Starting BASIC security assessment - testing ${toolsToTest.length} tools with ${basicPatterns.length} critical injection patterns (~${toolsToTest.length * basicPatterns.length * 5} tests)`);
182
+ for (const tool of toolsToTest) {
183
+ // Tools with no input parameters can't be exploited via payload injection
184
+ // Add passing results so they appear in the UI
185
+ if (!this.hasInputParameters(tool)) {
186
+ this.log(`${tool.name} has no input parameters - adding passing results`);
187
+ // Add a passing result for each basic pattern so tool appears in UI
188
+ for (const attackPattern of basicPatterns) {
189
+ const allPayloads = getPayloadsForAttack(attackPattern.attackName);
190
+ const payload = allPayloads[0];
191
+ if (payload) {
192
+ results.push({
193
+ testName: attackPattern.attackName,
194
+ description: payload.description,
195
+ payload: payload.payload,
196
+ riskLevel: payload.riskLevel,
197
+ toolName: tool.name,
198
+ vulnerable: false,
199
+ evidence: "Tool has no input parameters - cannot be exploited via payload injection",
200
+ });
201
+ }
202
+ }
203
+ continue;
204
+ }
205
+ this.log(`Testing ${tool.name} with ${basicPatterns.length} critical patterns`);
206
+ // Test with each critical pattern
207
+ for (const attackPattern of basicPatterns) {
208
+ // Get only the FIRST (most generic) payload for basic testing
209
+ const allPayloads = getPayloadsForAttack(attackPattern.attackName);
210
+ const payload = allPayloads[0]; // Just use first payload
211
+ if (!payload)
212
+ continue;
213
+ this.testCount++;
214
+ try {
215
+ const result = await this.testPayload(tool, attackPattern.attackName, payload, context.callTool);
216
+ results.push(result);
217
+ if (result.vulnerable) {
218
+ this.log(`🚨 VULNERABILITY: ${tool.name} - ${attackPattern.attackName}`);
219
+ }
220
+ }
221
+ catch (error) {
222
+ this.logError(`Error testing ${tool.name} with ${attackPattern.attackName}`, error);
223
+ }
224
+ // Rate limiting
225
+ if (this.testCount % 5 === 0) {
226
+ await this.sleep(100);
227
+ }
228
+ }
229
+ }
230
+ this.log(`BASIC security assessment complete: ${results.length} tests executed, ${results.filter((r) => r.vulnerable).length} vulnerabilities found`);
231
+ return results;
232
+ }
233
+ /**
234
+ * Test tool with a specific payload
235
+ */
236
+ async testPayload(tool, attackName, payload, callTool) {
237
+ // Skip execution-based tests for API wrappers (they return data, not execute code)
238
+ if (this.isApiWrapper(tool) && this.isExecutionTest(attackName)) {
239
+ return {
240
+ testName: attackName,
241
+ description: payload.description,
242
+ payload: payload.payload,
243
+ riskLevel: payload.riskLevel,
244
+ toolName: tool.name,
245
+ vulnerable: false,
246
+ evidence: "API wrapper tool - skips execution tests (returns external data as text, does not execute it as code)",
247
+ };
248
+ }
249
+ try {
250
+ // Create parameters using payload
251
+ const params = this.createTestParameters(payload, tool);
252
+ if (Object.keys(params).length === 0) {
253
+ return {
254
+ testName: attackName,
255
+ description: payload.description,
256
+ payload: payload.payload,
257
+ riskLevel: payload.riskLevel,
258
+ toolName: tool.name,
259
+ vulnerable: false,
260
+ evidence: "No compatible parameters for testing",
261
+ };
262
+ }
263
+ // Execute tool call
264
+ const response = await this.executeWithTimeout(callTool(tool.name, params), 5000);
265
+ // Check for connection errors FIRST (before vulnerability analysis)
266
+ if (this.isConnectionError(response)) {
267
+ return {
268
+ testName: attackName,
269
+ description: payload.description,
270
+ payload: payload.payload,
271
+ riskLevel: payload.riskLevel,
272
+ toolName: tool.name,
273
+ vulnerable: true, // Mark as failed (test could not complete)
274
+ evidence: `CONNECTION ERROR: Test could not complete due to server/network failure`,
275
+ response: this.extractResponseContent(response),
276
+ connectionError: true,
277
+ errorType: this.classifyError(response),
278
+ testReliability: "failed",
279
+ confidence: "high",
280
+ requiresManualReview: true,
281
+ };
282
+ }
283
+ // Analyze with evidence-based detection
284
+ const { isVulnerable, evidence } = this.analyzeResponse(response, payload, tool);
285
+ // Calculate confidence and manual review requirements
286
+ const confidenceResult = this.calculateConfidence(tool, isVulnerable, evidence || "", this.extractResponseContent(response), payload);
287
+ return {
288
+ testName: attackName,
289
+ description: payload.description,
290
+ payload: payload.payload,
291
+ riskLevel: payload.riskLevel,
292
+ toolName: tool.name,
293
+ vulnerable: isVulnerable,
294
+ evidence,
295
+ response: this.extractResponseContent(response),
296
+ ...confidenceResult,
297
+ };
298
+ }
299
+ catch (error) {
300
+ // Check if error is a connection/server failure
301
+ if (this.isConnectionErrorFromException(error)) {
302
+ return {
303
+ testName: attackName,
304
+ description: payload.description,
305
+ payload: payload.payload,
306
+ riskLevel: payload.riskLevel,
307
+ toolName: tool.name,
308
+ vulnerable: false,
309
+ evidence: `CONNECTION ERROR: Test could not complete due to server/network failure`,
310
+ response: this.extractErrorMessage(error),
311
+ connectionError: true,
312
+ errorType: this.classifyErrorFromException(error),
313
+ testReliability: "failed",
314
+ confidence: "high",
315
+ requiresManualReview: true,
316
+ };
317
+ }
318
+ return {
319
+ testName: attackName,
320
+ description: payload.description,
321
+ payload: payload.payload,
322
+ riskLevel: payload.riskLevel,
323
+ toolName: tool.name,
324
+ vulnerable: false,
325
+ evidence: `Tool rejected input: ${this.extractErrorMessage(error)}`,
326
+ };
327
+ }
328
+ }
329
+ /**
330
+ * Check if response indicates connection/server failure
331
+ * Returns true if test couldn't complete due to infrastructure issues
332
+ *
333
+ * CRITICAL: Only match transport/infrastructure errors, NOT tool business logic
334
+ */
335
+ isConnectionError(response) {
336
+ const text = this.extractResponseContent(response).toLowerCase();
337
+ // UNAMBIGUOUS patterns - only match infrastructure failures
338
+ const unambiguousPatterns = [
339
+ /MCP error -32001/i, // MCP transport errors
340
+ /MCP error -32603/i, // MCP internal error
341
+ /MCP error -32000/i, // MCP server error
342
+ /MCP error -32700/i, // MCP parse error
343
+ /socket hang up/i, // Network socket errors
344
+ /ECONNREFUSED/i, // Connection refused
345
+ /ETIMEDOUT/i, // Network timeout
346
+ /ERR_CONNECTION/i, // Connection errors
347
+ /fetch failed/i, // HTTP fetch failures
348
+ /connection reset/i, // Connection reset
349
+ /error POSTing to endpoint/i, // Transport layer POST errors
350
+ /error GETting.*endpoint/i, // Transport layer GET errors (requires 'endpoint' to avoid false positives)
351
+ /service unavailable/i, // HTTP 503 (server down)
352
+ /gateway timeout/i, // HTTP 504 (gateway timeout)
353
+ /unknown tool:/i, // Tool name not in current server's tool list (stale tool list)
354
+ /tool.*not found/i, // Alternative phrasing for missing tool
355
+ /tool.*does not exist/i, // Alternative phrasing for missing tool
356
+ /no such tool/i, // Alternative phrasing for missing tool
357
+ ];
358
+ // Check unambiguous patterns first
359
+ if (unambiguousPatterns.some((pattern) => pattern.test(text))) {
360
+ return true;
361
+ }
362
+ // CONTEXTUAL patterns - only match if in MCP error context
363
+ // These words can appear in legitimate tool responses, so require MCP prefix
364
+ const mcpPrefix = /^mcp error -\d+:/i.test(text);
365
+ if (mcpPrefix) {
366
+ const contextualPatterns = [
367
+ /bad request/i, // HTTP 400 (only if in MCP error)
368
+ /unauthorized/i, // HTTP 401 (only if in MCP error)
369
+ /forbidden/i, // HTTP 403 (only if in MCP error)
370
+ /no valid session/i, // Session errors (only if in MCP error)
371
+ /session.*expired/i, // Session expiration (only if in MCP error)
372
+ /internal server error/i, // HTTP 500 (only if in MCP error)
373
+ /HTTP [45]\d\d/i, // HTTP status codes (only if in MCP error)
374
+ ];
375
+ return contextualPatterns.some((pattern) => pattern.test(text));
376
+ }
377
+ return false;
378
+ }
379
+ /**
380
+ * Check if caught exception indicates connection/server failure
381
+ * CRITICAL: Only match transport/infrastructure errors, NOT tool business logic
382
+ */
383
+ isConnectionErrorFromException(error) {
384
+ if (error instanceof Error) {
385
+ const message = error.message.toLowerCase();
386
+ // UNAMBIGUOUS patterns - only match infrastructure failures
387
+ const unambiguousPatterns = [
388
+ /MCP error -32001/i, // MCP transport errors
389
+ /MCP error -32603/i, // MCP internal error
390
+ /MCP error -32000/i, // MCP server error
391
+ /MCP error -32700/i, // MCP parse error
392
+ /socket hang up/i, // Network socket errors
393
+ /ECONNREFUSED/i, // Connection refused
394
+ /ETIMEDOUT/i, // Network timeout
395
+ /network error/i, // Generic network errors
396
+ /ERR_CONNECTION/i, // Connection errors
397
+ /fetch failed/i, // HTTP fetch failures
398
+ /connection reset/i, // Connection reset
399
+ /error POSTing to endpoint/i, // Transport layer POST errors
400
+ /error GETting/i, // Transport layer GET errors
401
+ /service unavailable/i, // HTTP 503 (server down)
402
+ /gateway timeout/i, // HTTP 504 (gateway timeout)
403
+ /unknown tool:/i, // Tool name not in current server's tool list (stale tool list)
404
+ /tool.*not found/i, // Alternative phrasing for missing tool
405
+ /tool.*does not exist/i, // Alternative phrasing for missing tool
406
+ /no such tool/i, // Alternative phrasing for missing tool
407
+ ];
408
+ // Check unambiguous patterns first
409
+ if (unambiguousPatterns.some((pattern) => pattern.test(message))) {
410
+ return true;
411
+ }
412
+ // CONTEXTUAL patterns - only match if in MCP error context
413
+ const mcpPrefix = /^mcp error -\d+:/i.test(message);
414
+ if (mcpPrefix) {
415
+ const contextualPatterns = [
416
+ /bad request/i,
417
+ /unauthorized/i,
418
+ /forbidden/i,
419
+ /no valid session/i,
420
+ /session.*expired/i,
421
+ /internal server error/i,
422
+ /HTTP [45]\d\d/i,
423
+ ];
424
+ return contextualPatterns.some((pattern) => pattern.test(message));
425
+ }
426
+ }
427
+ return false;
428
+ }
429
+ /**
430
+ * Classify error type for reporting
431
+ */
432
+ classifyError(response) {
433
+ const text = this.extractResponseContent(response).toLowerCase();
434
+ // Connection-level errors (network, transport)
435
+ if (/socket|ECONNREFUSED|ETIMEDOUT|network|fetch failed|connection reset/i.test(text)) {
436
+ return "connection";
437
+ }
438
+ // Server-level errors (backend issues)
439
+ if (/-32603|-32000|-32700|internal server error|service unavailable|gateway timeout|HTTP 5\d\d|error POSTing.*endpoint|error GETting.*endpoint|bad request|HTTP 400|unauthorized|forbidden|no valid session|session.*expired/i.test(text)) {
440
+ return "server";
441
+ }
442
+ // Protocol errors (MCP-specific)
443
+ if (/-32001/i.test(text)) {
444
+ return "protocol";
445
+ }
446
+ return "protocol";
447
+ }
448
+ /**
449
+ * Classify error type from caught exception
450
+ */
451
+ classifyErrorFromException(error) {
452
+ if (error instanceof Error) {
453
+ const message = error.message.toLowerCase();
454
+ // Connection-level errors (network, transport)
455
+ if (/socket|ECONNREFUSED|ETIMEDOUT|network|fetch failed|connection reset/i.test(message)) {
456
+ return "connection";
457
+ }
458
+ // Server-level errors (backend issues)
459
+ if (/-32603|-32000|-32700|internal server error|service unavailable|gateway timeout|HTTP 5\d\d|error POSTing|error GETting|bad request|HTTP 400|unauthorized|forbidden|no valid session|session.*expired/i.test(message)) {
460
+ return "server";
461
+ }
462
+ // Protocol errors (MCP-specific)
463
+ if (/-32001/i.test(message)) {
464
+ return "protocol";
465
+ }
466
+ }
467
+ return "protocol";
468
+ }
469
+ /**
470
+ * Analyze response with evidence-based detection
471
+ * CRITICAL: Distinguish between safe reflection and actual execution
472
+ */
473
+ analyzeResponse(response, payload, tool) {
474
+ const responseText = this.extractResponseContent(response).toLowerCase();
475
+ const payloadText = payload.payload.toLowerCase();
476
+ // ✅ STEP 1: Check for MCP validation errors (HIGHEST PRIORITY)
477
+ // Tools that reject invalid input before processing are SECURE
478
+ const errorInfo = this.extractErrorInfo(response);
479
+ if (this.isMCPValidationError(errorInfo, responseText)) {
480
+ const errorCode = errorInfo.code ? ` (${errorInfo.code})` : "";
481
+ return {
482
+ isVulnerable: false,
483
+ evidence: `MCP validation error${errorCode}: Tool properly rejected invalid input before processing`,
484
+ };
485
+ }
486
+ // ✅ STEP 1.5: Classify tool and check for safe categories (prevents false positives)
487
+ // Check tool category before generic pattern matching to avoid false positives
488
+ const classifier = new ToolClassifier();
489
+ const classification = classifier.classify(tool.name, tool.description);
490
+ // Check if tool is in a safe category and response matches expected format
491
+ if (classification.categories.includes(ToolCategory.SEARCH_RETRIEVAL)) {
492
+ // Search tools should return search results, not execute code
493
+ if (this.isSearchResultResponse(responseText)) {
494
+ return {
495
+ isVulnerable: false,
496
+ evidence: "Search tool returned query results (not code execution)",
497
+ };
498
+ }
499
+ }
500
+ if (classification.categories.includes(ToolCategory.CRUD_CREATION)) {
501
+ // Creation tools should return creation confirmations, not execute code
502
+ if (this.isCreationResponse(responseText)) {
503
+ return {
504
+ isVulnerable: false,
505
+ evidence: "CRUD tool created/modified resource (not code execution)",
506
+ };
507
+ }
508
+ }
509
+ if (classification.categories.includes(ToolCategory.READ_ONLY_INFO)) {
510
+ // Info tools should return user/workspace data (intended exposure)
511
+ return {
512
+ isVulnerable: false,
513
+ evidence: "Read-only info tool returned user/workspace data (intended data exposure, not vulnerability)",
514
+ };
515
+ }
516
+ if (classification.categories.includes(ToolCategory.SAFE_STORAGE)) {
517
+ // Safe storage control tools should NEVER be flagged (validated safe implementations)
518
+ return {
519
+ isVulnerable: false,
520
+ evidence: "Safe storage control tool (validated safe implementation, control group for testing)",
521
+ };
522
+ }
523
+ // ✅ STEP 1: Check if response is just reflecting/echoing input (SAFE)
524
+ // This prevents false positives from tools that echo malicious input as data
525
+ // IMPORTANT: Check reflection BEFORE execution evidence (priority order matters)
526
+ // This handles escaping differences (e.g., single vs double backslashes)
527
+ const isJustReflection = this.isReflectionResponse(responseText);
528
+ if (isJustReflection) {
529
+ return {
530
+ isVulnerable: false,
531
+ evidence: "Tool safely reflected input without execution",
532
+ };
533
+ }
534
+ // Response doesn't match reflection patterns - continue checking
535
+ // Falls through - might be execution (with or without echo of input)
536
+ // ✅ STEP 2: Check if tool explicitly rejected the input (SAFE)
537
+ // CRITICAL: Check this BEFORE evidence matching to prevent false positives
538
+ // Tools that reject invalid input are secure, regardless of error message content
539
+ if (this.isValidationRejection(response)) {
540
+ return {
541
+ isVulnerable: false,
542
+ evidence: "Tool correctly rejected invalid input with validation error",
543
+ };
544
+ }
545
+ // ✅ STEP 3: Check for expected evidence of execution
546
+ // Only flag as vulnerable if we find execution evidence AND it's not just reflection
547
+ if (payload.evidence && payload.evidence.test(responseText)) {
548
+ // Double-check: is the evidence pattern matching the input or the output?
549
+ const evidenceMatchesInput = payload.evidence.test(payloadText);
550
+ if (evidenceMatchesInput && responseText.includes(payloadText)) {
551
+ // Evidence matches input and response contains input - probably just echoing
552
+ return {
553
+ isVulnerable: false,
554
+ evidence: "Evidence pattern matched echoed input, not execution",
555
+ };
556
+ }
557
+ // NEW: For validation-type patterns, require execution evidence
558
+ // This prevents false positives from validation error messages
559
+ if (this.isValidationPattern(payload.evidence)) {
560
+ const hasExecutionEvidence = this.hasExecutionEvidence(responseText);
561
+ if (!hasExecutionEvidence) {
562
+ return {
563
+ isVulnerable: false,
564
+ evidence: "Pattern matched in error message but no execution evidence - tool properly validated input",
565
+ };
566
+ }
567
+ }
568
+ // Evidence doesn't match input OR response doesn't contain input - real execution
569
+ return {
570
+ isVulnerable: true,
571
+ evidence: `Tool executed malicious instruction: found evidence matching ${payload.evidence} - ${payload.description}`,
572
+ };
573
+ }
574
+ // ✅ STEP 4: Fall back to existing detection logic for comprehensive analysis
575
+ return this.analyzeInjectionResponse(response, payload.payload);
576
+ }
577
+ /**
578
+ * Check if tool explicitly rejected input with validation error (SAFE)
579
+ * Returns true if tool properly rejected invalid input (SECURE)
580
+ *
581
+ * Distinguishes between:
582
+ * - SAFE: {"valid": false, "errors": ["Input too long"]} (rejection)
583
+ * - VULNERABLE: {"result": "deleted 5 rows"} (execution)
584
+ */
585
+ isValidationRejection(response) {
586
+ const responseText = this.extractResponseContent(response);
587
+ try {
588
+ const parsed = JSON.parse(responseText);
589
+ // Explicit rejection indicators (highest priority)
590
+ if (parsed.valid === false ||
591
+ parsed.error === true ||
592
+ parsed.error === "true" || // Handle string boolean
593
+ (parsed.error && parsed.error !== false) || // Any truthy error value
594
+ parsed.status === "rejected" ||
595
+ parsed.status === "invalid" ||
596
+ parsed.status === "failed") {
597
+ return true;
598
+ }
599
+ // Has explicit error details (errors array, error field)
600
+ if (parsed.errors &&
601
+ Array.isArray(parsed.errors) &&
602
+ parsed.errors.length > 0) {
603
+ return true;
604
+ }
605
+ if (parsed.error && typeof parsed.error === "string") {
606
+ return true;
607
+ }
608
+ // Result field indicates validation failure
609
+ if (typeof parsed.result === "string") {
610
+ const resultRejectionPatterns = [
611
+ /validation (failed|error)/i,
612
+ /rejected/i,
613
+ /not.*approved/i,
614
+ /not.*in.*list/i,
615
+ /invalid.*input/i,
616
+ /error:.*invalid/i,
617
+ ];
618
+ if (resultRejectionPatterns.some((p) => p.test(parsed.result))) {
619
+ return true;
620
+ }
621
+ }
622
+ }
623
+ catch {
624
+ // Not JSON, check text patterns
625
+ }
626
+ // Text-based rejection patterns (fallback for non-JSON responses)
627
+ const rejectionPatterns = [
628
+ /validation failed/i,
629
+ /rejected/i,
630
+ /not.*approved/i,
631
+ /not.*in.*list/i,
632
+ /invalid.*input/i,
633
+ /error:.*invalid/i,
634
+ ];
635
+ return rejectionPatterns.some((pattern) => pattern.test(responseText));
636
+ }
637
+ /**
638
+ * Check if response is an MCP validation error (safe rejection)
639
+ * Returns true if tool rejected input before processing (SECURE)
640
+ *
641
+ * Validation errors indicate proper input sanitization and are NOT vulnerabilities.
642
+ * Examples:
643
+ * - MCP error -32602 (JSON-RPC Invalid params standard)
644
+ * - "parameter validation failed: invalid url"
645
+ * - "schema validation error: must be a string"
646
+ */
647
+ isMCPValidationError(errorInfo, responseText) {
648
+ // Check for JSON-RPC -32602 (Invalid params)
649
+ // This is the standard error code for parameter validation failures
650
+ if (errorInfo.code === -32602 || errorInfo.code === "-32602") {
651
+ return true;
652
+ }
653
+ // Check for explicit validation rejection patterns
654
+ // These indicate the tool properly validated input BEFORE processing
655
+ const validationPatterns = [
656
+ /parameter validation failed/i,
657
+ /schema validation (error|failed)/i,
658
+ /invalid (url|email|format|parameter|input|data)/i,
659
+ /must be a valid/i,
660
+ /must have a valid/i,
661
+ /failed to validate/i,
662
+ /validation error/i,
663
+ /does not match (pattern|schema)/i,
664
+ /not a valid (url|email|number|string)/i,
665
+ /expected.*but (got|received)/i,
666
+ /type mismatch/i,
667
+ /\brequired\b.*\bmissing\b/i,
668
+ // Boundary validation patterns (empty strings, required fields)
669
+ /cannot.*be.*empty/i,
670
+ /must.*not.*be.*empty/i,
671
+ /empty.*not.*allowed/i,
672
+ /\brequired\b/i, // Generic required field validation
673
+ /missing.*required/i,
674
+ /field.*required/i,
675
+ ];
676
+ return validationPatterns.some((pattern) => pattern.test(responseText));
677
+ }
678
+ /**
679
+ * Check if evidence pattern is ambiguous (can match both validation and execution)
680
+ * These patterns appear in BOTH:
681
+ * - Secure validation errors: "parameter validation failed: invalid type"
682
+ * - Vulnerable execution errors: "SQL executed: type error in result"
683
+ */
684
+ isValidationPattern(evidencePattern) {
685
+ const patternStr = evidencePattern.toString().toLowerCase();
686
+ // Patterns that are ambiguous - match both validation AND execution errors
687
+ const ambiguousPatterns = [
688
+ "type.*error",
689
+ "invalid.*type",
690
+ "error",
691
+ "invalid",
692
+ "failed",
693
+ "negative.*not.*allowed",
694
+ "must.*be.*positive",
695
+ "invalid.*value",
696
+ "overflow",
697
+ "out.*of.*range",
698
+ ];
699
+ return ambiguousPatterns.some((ambiguous) => patternStr.includes(ambiguous));
700
+ }
701
+ /**
702
+ * Check if response contains evidence of actual execution
703
+ * Returns true only if malicious input was actually PROCESSED/EXECUTED
704
+ *
705
+ * Distinguishes between:
706
+ * - SECURE: "Parameter validation failed: invalid type" (rejected BEFORE execution)
707
+ * - VULNERABLE: "SQL executed: syntax error" (error DURING execution)
708
+ */
709
+ hasExecutionEvidence(responseText) {
710
+ const executionIndicators = [
711
+ // Explicit execution verbs
712
+ /\bexecuted\b/i,
713
+ /\bprocessed\b/i,
714
+ /\bran\b.*command/i,
715
+ /\bcompleted\b/i,
716
+ /\bcomputed\b/i,
717
+ /\bcalculated\b/i,
718
+ // System-level errors (only occur during execution)
719
+ /NullPointerException/i,
720
+ /SegmentationFault/i,
721
+ /StackOverflow/i,
722
+ /OutOfMemory/i,
723
+ /syntax error in executed/i,
724
+ /error while executing/i,
725
+ /failed during execution/i,
726
+ /error in query execution/i,
727
+ /runtime error/i,
728
+ // Side effects (prove execution occurred)
729
+ /deleted \d+ (rows|files|records)/i,
730
+ /(file|resource) (opened|accessed|modified|deleted)/i,
731
+ /query returned \d+ results/i,
732
+ /modified \d+ records/i,
733
+ /\d+ rows affected/i,
734
+ /command output:/i,
735
+ /execution result:/i,
736
+ ];
737
+ return executionIndicators.some((pattern) => pattern.test(responseText));
738
+ }
739
+ /**
740
+ * Perform additional security checks
741
+ */
742
+ async performAdditionalSecurityChecks(tools) {
743
+ const vulnerabilities = [];
744
+ const classifier = new ToolClassifier();
745
+ // Check for tools that might handle sensitive data
746
+ for (const tool of tools) {
747
+ const toolText = `${tool.name} ${tool.description || ""}`.toLowerCase();
748
+ // Skip tools in safe categories that are designed to return user/data info
749
+ const classification = classifier.classify(tool.name, tool.description);
750
+ if (classification.categories.includes(ToolCategory.READ_ONLY_INFO) ||
751
+ classification.categories.includes(ToolCategory.SEARCH_RETRIEVAL) ||
752
+ classification.categories.includes(ToolCategory.CRUD_CREATION)) {
753
+ continue; // These tools are designed to return data, skip the check
754
+ }
755
+ if (/key|secret|credential|password|token|auth/.test(toolText) &&
756
+ !this.hasInputParameters(tool)) {
757
+ vulnerabilities.push(`${tool.name} may expose sensitive data (security-related tool with no input validation)`);
758
+ }
759
+ }
760
+ return vulnerabilities;
761
+ }
762
+ /**
763
+ * Determine overall risk level
764
+ */
765
+ determineOverallRiskLevel(highRiskCount, mediumRiskCount, totalVulnerabilities) {
766
+ if (highRiskCount > 0)
767
+ return "HIGH";
768
+ if (mediumRiskCount > 2)
769
+ return "HIGH";
770
+ if (mediumRiskCount > 0)
771
+ return "MEDIUM";
772
+ if (totalVulnerabilities > 0)
773
+ return "LOW";
774
+ return "LOW";
775
+ }
776
+ /**
777
+ * Determine security status based on confidence levels
778
+ */
779
+ determineSecurityStatus(tests, vulnerabilityCount, testCount, connectionErrorCount = 0) {
780
+ // If there are connection errors, we can't verify security
781
+ if (connectionErrorCount > 0)
782
+ return "FAIL";
783
+ // If no tests were run, we can't determine security status
784
+ if (testCount === 0)
785
+ return "NEED_MORE_INFO";
786
+ if (vulnerabilityCount === 0)
787
+ return "PASS";
788
+ // Check confidence levels of vulnerabilities
789
+ const hasHighConfidence = tests.some((t) => t.vulnerable && (!t.confidence || t.confidence === "high"));
790
+ // Only HIGH confidence vulnerabilities should result in FAIL
791
+ if (hasHighConfidence)
792
+ return "FAIL";
793
+ // Medium and low confidence always require review
794
+ return "NEED_MORE_INFO";
795
+ }
796
+ /**
797
+ * Generate security explanation
798
+ */
799
+ generateSecurityExplanation(validTests, connectionErrors, vulnerabilities, riskLevel) {
800
+ const vulnCount = vulnerabilities.length;
801
+ const testCount = validTests.length;
802
+ const errorCount = connectionErrors.length;
803
+ // Build explanation starting with connection error warning if present
804
+ let explanation = "";
805
+ if (errorCount > 0) {
806
+ explanation += `⚠️ ${errorCount} test${errorCount !== 1 ? "s" : ""} failed due to connection/server errors. `;
807
+ }
808
+ // Handle case when no tools were tested
809
+ if (testCount === 0 && errorCount > 0) {
810
+ return (explanation +
811
+ `No valid tests completed. Check server connectivity and retry assessment.`);
812
+ }
813
+ if (testCount === 0 && errorCount === 0) {
814
+ return `No tools selected for security testing. Select tools to run security assessments.`;
815
+ }
816
+ if (vulnCount === 0) {
817
+ return (explanation +
818
+ `Tested ${testCount} security patterns across selected tools. No vulnerabilities detected. All tools properly handle malicious inputs.`);
819
+ }
820
+ // Count by confidence level (from valid tests only)
821
+ const highConfidenceCount = validTests.filter((t) => t.vulnerable && (!t.confidence || t.confidence === "high")).length;
822
+ const mediumConfidenceCount = validTests.filter((t) => t.vulnerable && t.confidence === "medium").length;
823
+ const lowConfidenceCount = validTests.filter((t) => t.vulnerable && t.confidence === "low").length;
824
+ // Generate confidence-aware explanation
825
+ if (highConfidenceCount > 0) {
826
+ return (explanation +
827
+ `Found ${highConfidenceCount} confirmed vulnerability${highConfidenceCount !== 1 ? "s" : ""} across ${testCount} security tests. Risk level: ${riskLevel}. Tools may execute malicious commands or leak sensitive data.`);
828
+ }
829
+ else if (mediumConfidenceCount > 0) {
830
+ return (explanation +
831
+ `Detected ${mediumConfidenceCount} potential security concern${mediumConfidenceCount !== 1 ? "s" : ""} across ${testCount} security tests requiring manual review. Tools showed suspicious behavior that needs verification.`);
832
+ }
833
+ else {
834
+ return (explanation +
835
+ `Flagged ${lowConfidenceCount} uncertain detection${lowConfidenceCount !== 1 ? "s" : ""} across ${testCount} security tests. Manual verification needed to confirm if these are actual vulnerabilities or false positives.`);
836
+ }
837
+ }
838
+ /**
839
+ * Calculate confidence level and manual review requirements
840
+ * Detects ambiguous patterns that need human verification
841
+ */
842
+ calculateConfidence(tool, isVulnerable, evidence, responseText, payload) {
843
+ const toolDescription = (tool.description || "").toLowerCase();
844
+ const toolName = tool.name.toLowerCase();
845
+ const responseLower = responseText.toLowerCase();
846
+ const payloadLower = payload.payload.toLowerCase();
847
+ // HIGH CONFIDENCE: Clear cases
848
+ // 1. Not vulnerable with clear safety indicators
849
+ if (!isVulnerable &&
850
+ (evidence.includes("safely reflected") ||
851
+ evidence.includes("API wrapper") ||
852
+ evidence.includes("safe: true"))) {
853
+ return {
854
+ confidence: "high",
855
+ requiresManualReview: false,
856
+ };
857
+ }
858
+ // 2. Vulnerable with unambiguous execution evidence
859
+ if (isVulnerable &&
860
+ evidence.includes("executed") &&
861
+ !this.isStructuredDataTool(toolName, toolDescription)) {
862
+ return {
863
+ confidence: "high",
864
+ requiresManualReview: false,
865
+ };
866
+ }
867
+ // LOW CONFIDENCE: Ambiguous pattern matches in structured data
868
+ if (isVulnerable) {
869
+ // Check if tool returns structured data (search, lookup, retrieval)
870
+ const isDataTool = this.isStructuredDataTool(toolName, toolDescription);
871
+ // Check if response contains structured data indicators
872
+ const hasStructuredData = /title:|name:|description:|trust score:|id:|snippets:/i.test(responseText) ||
873
+ /^\s*-\s+/m.test(responseText) || // Bullet points
874
+ /"[^"]+"\s*:\s*"[^"]+"/g.test(responseText); // JSON-like structure
875
+ // Check if evidence pattern appears in input query
876
+ const patternInInput = payload.evidence?.test(payloadLower);
877
+ // Check if response echoes the input
878
+ const echosInput = responseLower.includes(payloadLower);
879
+ if (isDataTool && (hasStructuredData || echosInput) && patternInInput) {
880
+ return {
881
+ confidence: "low",
882
+ requiresManualReview: true,
883
+ manualReviewReason: "Pattern matched in structured data response. Tool may be legitimately " +
884
+ "returning data containing search terms rather than executing malicious code.",
885
+ reviewGuidance: "Verify: 1) Does the tool actually execute/compute the input? " +
886
+ "2) Or does it just return pre-existing data that happens to contain the pattern? " +
887
+ `3) Check if '${payload.evidence}' appears in legitimate tool output vs. execution results.`,
888
+ };
889
+ }
890
+ // Arithmetic patterns in numeric data (scores, counts, IDs)
891
+ if (payload.evidence &&
892
+ /\b\d\b/.test(payload.evidence.toString()) &&
893
+ /\b(score|count|trust|rating|id|version)\b/i.test(responseText)) {
894
+ return {
895
+ confidence: "low",
896
+ requiresManualReview: true,
897
+ manualReviewReason: "Numeric pattern found in response with numeric metadata (scores, counts, etc.). " +
898
+ "May be coincidental data rather than arithmetic execution.",
899
+ reviewGuidance: "Verify: 1) Did the tool actually compute an arithmetic result? " +
900
+ "2) Or does the number appear in metadata like trust scores, version numbers, or counts? " +
901
+ "3) Compare pattern location in response with tool's expected output format.",
902
+ };
903
+ }
904
+ // Role/admin patterns in tool that deals with admin-related content
905
+ if (/admin|role|privilege|elevated/i.test(payload.payload) &&
906
+ /\b(library|search|documentation|api|wrapper)\b/i.test(toolDescription)) {
907
+ return {
908
+ confidence: "low",
909
+ requiresManualReview: true,
910
+ manualReviewReason: "Admin-related keywords found in search/retrieval tool results. " +
911
+ "Tool may be returning data about admin-related libraries/APIs rather than elevating privileges.",
912
+ reviewGuidance: "Verify: 1) Did the tool actually change behavior or assume admin role? " +
913
+ "2) Or did it return search results for admin-related content? " +
914
+ "3) Test if tool behavior actually changed after this request.",
915
+ };
916
+ }
917
+ }
918
+ // MEDIUM CONFIDENCE: Execution evidence but some ambiguity
919
+ if (isVulnerable && evidence.includes("executed")) {
920
+ return {
921
+ confidence: "medium",
922
+ requiresManualReview: true,
923
+ manualReviewReason: "Execution indicators found but context suggests possible ambiguity.",
924
+ reviewGuidance: "Verify: 1) Review the full response to confirm actual code execution. " +
925
+ "2) Check if tool's intended function involves execution. " +
926
+ "3) Test with variations to confirm consistency.",
927
+ };
928
+ }
929
+ // Default: HIGH confidence for clear safe cases
930
+ return {
931
+ confidence: "high",
932
+ requiresManualReview: false,
933
+ };
934
+ }
935
+ /**
936
+ * Check if tool is a structured data tool (search, lookup, retrieval)
937
+ * These tools naturally echo input patterns in their results
938
+ */
939
+ isStructuredDataTool(toolName, toolDescription) {
940
+ const dataToolPatterns = [
941
+ /search/i,
942
+ /find/i,
943
+ /lookup/i,
944
+ /query/i,
945
+ /retrieve/i,
946
+ /fetch/i,
947
+ /get/i,
948
+ /list/i,
949
+ /resolve/i,
950
+ /discover/i,
951
+ /browse/i,
952
+ ];
953
+ const combined = `${toolName} ${toolDescription}`;
954
+ return dataToolPatterns.some((pattern) => pattern.test(combined));
955
+ }
956
+ /**
957
+ * Check if response is just reflection (safe)
958
+ * Expanded to catch more reflection patterns including echo, repeat, display
959
+ * IMPROVED: Bidirectional patterns, safety indicators, and two-layer defense
960
+ *
961
+ * CRITICAL: This check distinguishes between:
962
+ * - SAFE: Tool stores/echoes malicious input as data (reflection)
963
+ * - VULNERABLE: Tool executes malicious input and returns results (execution)
964
+ *
965
+ * Two-layer defense:
966
+ * Layer 1: Match reflection/status patterns
967
+ * Layer 2: Verify NO execution evidence (defense-in-depth)
968
+ */
969
+ isReflectionResponse(responseText) {
970
+ console.log("[DIAG] isReflectionResponse called");
971
+ console.log("[DIAG] Response preview:", responseText.substring(0, 200));
972
+ // Status message patterns (NEW)
973
+ const statusPatterns = [
974
+ // "Action executed successfully: <anything>" (generic status message)
975
+ /action\s+executed\s+successfully:/i,
976
+ /command\s+executed\s+successfully:/i,
977
+ // "Action executed successfully" (generic status - in JSON or standalone)
978
+ /"result":\s*"action\s+executed\s+successfully"/i,
979
+ /result.*action\s+executed\s+successfully/i,
980
+ /successfully\s+(executed|completed|processed):/i,
981
+ /successfully\s+(executed|completed|processed)"/i,
982
+ ];
983
+ const reflectionPatterns = [
984
+ ...statusPatterns,
985
+ // Original patterns (keep all existing)
986
+ /stored.*query/i,
987
+ /saved.*input/i,
988
+ /received.*parameter/i,
989
+ /processing.*request/i,
990
+ /storing.*data/i,
991
+ /added.*to.*collection/i,
992
+ /echo:/i,
993
+ /echoing/i,
994
+ /repeating/i,
995
+ /displaying/i,
996
+ /showing.*input/i,
997
+ /message.*echoed/i,
998
+ /safely.*as.*data/i,
999
+ // NEW: Bidirectional patterns (catch "Query stored" and "stored query")
1000
+ /query.*stored/i,
1001
+ /stored.*query/i, // Bidirectional: "Stored query"
1002
+ /input.*saved/i,
1003
+ /parameter.*received/i,
1004
+ /command.*stored/i,
1005
+ /stored.*command/i, // Bidirectional: "Stored command"
1006
+ /data.*stored/i,
1007
+ /stored.*data/i, // Bidirectional: "Stored data"
1008
+ /action.*stored/i,
1009
+ /stored.*action/i, // Bidirectional: "Stored action"
1010
+ /text.*stored/i,
1011
+ /stored.*text/i, // Bidirectional: "Stored text"
1012
+ /setting.*stored/i,
1013
+ /stored.*setting/i, // Bidirectional: "Stored setting"
1014
+ /instruction.*stored/i,
1015
+ /stored.*instruction/i, // Bidirectional: "Stored instruction"
1016
+ /url.*stored/i,
1017
+ /stored.*url/i, // Bidirectional: "Stored URL"
1018
+ /package.*stored/i,
1019
+ /stored.*package/i, // Bidirectional: "Stored package"
1020
+ // NEW: Safety indicators (common in hardened implementations)
1021
+ /stored.*safely/i,
1022
+ /safely.*stored/i,
1023
+ /without\s+execut/i,
1024
+ /not\s+executed/i,
1025
+ /never\s+executed/i,
1026
+ /stored.*as.*data/i,
1027
+ /treated.*as.*data/i,
1028
+ /stored\s+in\s+(collection|database)/i,
1029
+ // NEW: Common safe storage responses
1030
+ /stored.*successfully/i,
1031
+ /saved.*to/i,
1032
+ /recorded\s+in/i,
1033
+ /added\s+to/i,
1034
+ // NEW: Storage/logging confirmations (high confidence)
1035
+ /logged successfully:/i,
1036
+ /queued for processing:/i,
1037
+ /saved (for|successfully)/i,
1038
+ /stored for (admin review|configuration|processing)/i,
1039
+ // NEW: Processing confirmations (high confidence)
1040
+ /processed successfully/i,
1041
+ /validated successfully/i,
1042
+ /parsed successfully/i,
1043
+ /(validation|processing) (passed|completed)/i,
1044
+ // NEW: Error messages with input reflection (common safe pattern)
1045
+ /error:.*not (found|in approved list|recognized)/i,
1046
+ /error getting info for ['"].*['"]/i,
1047
+ /invalid .* format.*stored as text/i,
1048
+ /error:.*too (long|short|large)/i,
1049
+ ];
1050
+ // LAYER 1: Check for reflection/status patterns
1051
+ const matchedPatterns = [];
1052
+ const hasReflection = reflectionPatterns.some((pattern) => {
1053
+ const matches = pattern.test(responseText);
1054
+ if (matches) {
1055
+ matchedPatterns.push(pattern.source.substring(0, 50));
1056
+ }
1057
+ return matches;
1058
+ });
1059
+ console.log("[DIAG] Has reflection:", hasReflection);
1060
+ if (matchedPatterns.length > 0) {
1061
+ console.log("[DIAG] Matched reflection patterns:", matchedPatterns.join(", "));
1062
+ }
1063
+ if (hasReflection) {
1064
+ // LAYER 2: Defense-in-depth - verify NO execution evidence
1065
+ // For JSON responses, check execution artifacts only in result/output fields
1066
+ try {
1067
+ const parsed = JSON.parse(responseText);
1068
+ const resultText = String(parsed.result || "");
1069
+ const outputFields = [
1070
+ parsed.stdout,
1071
+ parsed.stderr,
1072
+ parsed.output,
1073
+ parsed.contents,
1074
+ parsed.execution_log,
1075
+ parsed.command_output,
1076
+ ]
1077
+ .filter(Boolean)
1078
+ .join(" ");
1079
+ // Only check resultText for execution if it's NOT purely a status message
1080
+ // Status messages like "Action executed successfully: X" just echo the payload
1081
+ const resultIsStatusOnly = statusPatterns.some((pattern) => pattern.test(resultText));
1082
+ const hasExecutionInOutput = resultIsStatusOnly
1083
+ ? this.detectExecutionArtifacts(outputFields) // Skip result, check only output fields
1084
+ : this.detectExecutionArtifacts(resultText) ||
1085
+ this.detectExecutionArtifacts(outputFields);
1086
+ console.log("[DIAG] JSON mode - checking execution in result/output fields only");
1087
+ console.log("[DIAG] Has execution in output:", hasExecutionInOutput);
1088
+ if (hasExecutionInOutput) {
1089
+ console.log("[DIAG] RESULT: Reflection + Execution in output = VULNERABLE (false)");
1090
+ return false;
1091
+ }
1092
+ console.log("[DIAG] RESULT: Reflection + clean output = SAFE (true)");
1093
+ return true;
1094
+ }
1095
+ catch {
1096
+ // Not JSON, check entire response for execution
1097
+ const hasExecution = this.detectExecutionArtifacts(responseText);
1098
+ console.log("[DIAG] Text mode - Has execution artifacts:", hasExecution);
1099
+ if (hasExecution) {
1100
+ console.log("[DIAG] RESULT: Reflection + Execution = VULNERABLE (false)");
1101
+ return false;
1102
+ }
1103
+ console.log("[DIAG] RESULT: Reflection only = SAFE (true)");
1104
+ return true;
1105
+ }
1106
+ }
1107
+ // JSON Structural Analysis with execution verification
1108
+ try {
1109
+ const parsed = JSON.parse(responseText);
1110
+ // Check placeholder action with safe result
1111
+ if (parsed.action === "test" || parsed.action === "placeholder") {
1112
+ const resultText = String(parsed.result || "");
1113
+ if (!this.detectExecutionArtifacts(resultText)) {
1114
+ return true; // Placeholder action with clean result
1115
+ }
1116
+ }
1117
+ // Check generic status without execution
1118
+ if (parsed.status && /(completed|success|ok|done)/.test(parsed.status)) {
1119
+ if (!this.detectExecutionArtifacts(responseText)) {
1120
+ return true; // Status indicator with no execution
1121
+ }
1122
+ }
1123
+ }
1124
+ catch {
1125
+ // Not JSON, continue with text-only analysis
1126
+ }
1127
+ return false;
1128
+ }
1129
+ /**
1130
+ * Detect execution artifacts in response
1131
+ * Returns true if response contains evidence of actual code execution
1132
+ *
1133
+ * HIGH confidence: System files, commands, directory listings
1134
+ * MEDIUM confidence: Contextual patterns (root alone, paths)
1135
+ */
1136
+ detectExecutionArtifacts(responseText) {
1137
+ console.log("[DIAG] detectExecutionArtifacts called");
1138
+ const executionIndicators = [
1139
+ // HIGH CONFIDENCE - System files (requires format)
1140
+ /[a-z]+:x:\d+:\d+:/i, // passwd: "root:x:0:0:"
1141
+ /uid=\d+\([^)]+\)\s+gid=\d+/i, // id: "uid=0(root) gid=0(root)"
1142
+ // HIGH CONFIDENCE - Directory listings (full format)
1143
+ /[d-][rwx-]{9}\s+\d+\s+[a-z]+/i, // ls -la: "drwxr-xr-x 2 root"
1144
+ /total\s+\d+\s*$/m, // ls total line
1145
+ // HIGH CONFIDENCE - Command execution fields
1146
+ /command_executed:\s*[^"\s]/i, // "command_executed: whoami"
1147
+ /stdout:\s*["']?[^"'\s]/i, // "stdout: root"
1148
+ /(execution|output)_log:/i, // Log fields
1149
+ // HIGH CONFIDENCE - Shell paths
1150
+ /\/bin\/(bash|sh|zsh|dash)/i, // Shell executables
1151
+ // MEDIUM CONFIDENCE - System identity (contextual only)
1152
+ /\b(root|administrator)\s*$/im, // "root" alone on line (whoami)
1153
+ /\/root\//i, // Path: "/root/"
1154
+ /\/etc\/passwd/i, // Sensitive file
1155
+ // MEDIUM CONFIDENCE - Process info
1156
+ /PID:\s*\d{3,}/i, // Process ID
1157
+ ];
1158
+ const matchedExecutionPatterns = [];
1159
+ const found = executionIndicators.some((pattern) => {
1160
+ const matches = pattern.test(responseText);
1161
+ if (matches) {
1162
+ matchedExecutionPatterns.push(pattern.source.substring(0, 50));
1163
+ }
1164
+ return matches;
1165
+ });
1166
+ if (matchedExecutionPatterns.length > 0) {
1167
+ console.log("[DIAG] Matched execution patterns:", matchedExecutionPatterns.join(", "));
1168
+ }
1169
+ console.log("[DIAG] Execution artifacts found:", found);
1170
+ return found;
1171
+ }
1172
+ /**
1173
+ * Analyze injection response (existing logic)
1174
+ * Note: payload parameter unused after refactoring to two-layer defense
1175
+ */
1176
+ analyzeInjectionResponse(response, _payload) {
1177
+ const responseText = this.extractResponseContent(response);
1178
+ // Look for signs of execution vs reflection
1179
+ const executionPatterns = [
1180
+ /executed/i,
1181
+ /command.*ran/i,
1182
+ /result.*is/i,
1183
+ /output.*:/i,
1184
+ /returned.*value/i,
1185
+ ];
1186
+ const hasExecution = executionPatterns.some((pattern) => pattern.test(responseText));
1187
+ // CRITICAL: Check reflection BEFORE flagging as vulnerable
1188
+ if (hasExecution && !this.isReflectionResponse(responseText)) {
1189
+ return {
1190
+ isVulnerable: true,
1191
+ evidence: "Tool executed instruction: found execution keywords",
1192
+ };
1193
+ }
1194
+ return { isVulnerable: false };
1195
+ }
1196
+ /**
1197
+ * Extract response content
1198
+ */
1199
+ extractResponseContent(response) {
1200
+ if (response.content && Array.isArray(response.content)) {
1201
+ return response.content
1202
+ .map((c) => (c.type === "text" ? c.text : ""))
1203
+ .join(" ");
1204
+ }
1205
+ return String(response.content || "");
1206
+ }
1207
+ /**
1208
+ * Check if tool has input parameters
1209
+ */
1210
+ hasInputParameters(tool) {
1211
+ const schema = tool.inputSchema?.type === "object" ? tool.inputSchema : tool.inputSchema;
1212
+ return schema?.properties && Object.keys(schema.properties).length > 0;
1213
+ }
1214
+ createTestParameters(payload, tool) {
1215
+ // Extract tool schema
1216
+ const schema = tool.inputSchema?.type === "object" ? tool.inputSchema : tool.inputSchema;
1217
+ if (!schema?.properties) {
1218
+ return {};
1219
+ }
1220
+ const params = {};
1221
+ const targetParamTypes = payload.parameterTypes || [];
1222
+ let payloadInjected = false;
1223
+ // Try to match payload to appropriate parameter by name
1224
+ if (targetParamTypes.length > 0) {
1225
+ // Payload is parameter-specific (e.g., URLs only for "url" params)
1226
+ for (const [key, prop] of Object.entries(schema.properties)) {
1227
+ const propSchema = prop;
1228
+ const paramNameLower = key.toLowerCase();
1229
+ // Check if parameter name matches expected types
1230
+ if (propSchema.type === "string" &&
1231
+ targetParamTypes.some((type) => paramNameLower.includes(type))) {
1232
+ params[key] = payload.payload;
1233
+ payloadInjected = true;
1234
+ break;
1235
+ }
1236
+ }
1237
+ }
1238
+ else {
1239
+ // Generic payload - inject into first string parameter (original behavior)
1240
+ for (const [key, prop] of Object.entries(schema.properties)) {
1241
+ const propSchema = prop;
1242
+ if (propSchema.type === "string" && !payloadInjected) {
1243
+ params[key] = payload.payload;
1244
+ payloadInjected = true;
1245
+ break;
1246
+ }
1247
+ }
1248
+ }
1249
+ // Fill required parameters with safe defaults
1250
+ for (const [key, prop] of Object.entries(schema.properties)) {
1251
+ const propSchema = prop;
1252
+ if (schema.required?.includes(key) && !(key in params)) {
1253
+ if (propSchema.type === "string") {
1254
+ params[key] = "test";
1255
+ }
1256
+ else if (propSchema.type === "number") {
1257
+ params[key] = 1;
1258
+ }
1259
+ else if (propSchema.type === "boolean") {
1260
+ params[key] = true;
1261
+ }
1262
+ else if (propSchema.type === "object") {
1263
+ params[key] = {};
1264
+ }
1265
+ else if (propSchema.type === "array") {
1266
+ params[key] = [];
1267
+ }
1268
+ }
1269
+ }
1270
+ return params;
1271
+ }
1272
+ /**
1273
+ * Check if tool is an API wrapper (safe data-passing tool)
1274
+ */
1275
+ isApiWrapper(tool) {
1276
+ const classifier = new ToolClassifier();
1277
+ const classification = classifier.classify(tool.name, tool.description || "");
1278
+ return classification.categories.includes(ToolCategory.API_WRAPPER);
1279
+ }
1280
+ /**
1281
+ * Check if attack is an execution-based test
1282
+ * These tests assume the tool executes input as code, which doesn't apply to API wrappers
1283
+ */
1284
+ isExecutionTest(attackName) {
1285
+ const executionTests = [
1286
+ "Command Injection",
1287
+ "SQL Injection",
1288
+ "Path Traversal",
1289
+ ];
1290
+ return executionTests.includes(attackName);
1291
+ }
1292
+ /**
1293
+ * Check if response is returning search results
1294
+ * Search tools return query results as data, not execute them
1295
+ */
1296
+ isSearchResultResponse(responseText) {
1297
+ const searchResultPatterns = [
1298
+ /"results"\s*:\s*\[/i, // JSON results array
1299
+ /"type"\s*:\s*"search"/i, // Type indicator
1300
+ /"object"\s*:\s*"list"/i, // Notion list format
1301
+ /\bhighlight\b/i, // Search highlighting
1302
+ /search\s+results/i,
1303
+ /found\s+\d+\s+(results?|pages?|items?)/i,
1304
+ /query\s+(returned|matched)/i,
1305
+ /\d+\s+(results?|matches?|hits?)\s+for/i,
1306
+ /"has_more"\s*:/i, // Pagination indicator
1307
+ /next_cursor/i, // Pagination cursor
1308
+ ];
1309
+ return searchResultPatterns.some((pattern) => pattern.test(responseText));
1310
+ }
1311
+ /**
1312
+ * Check if response is from a creation/modification operation
1313
+ * CRUD tools create/modify resources, not execute code
1314
+ */
1315
+ isCreationResponse(responseText) {
1316
+ const creationPatterns = [
1317
+ /successfully\s+created/i,
1318
+ /database\s+created/i,
1319
+ /page\s+created/i,
1320
+ /resource\s+created/i,
1321
+ /\bcreate\s+table\b/i, // SQL creation
1322
+ /\binsert\s+into\b/i, // SQL insertion
1323
+ /"id"\s*:\s*"[a-f0-9-]{36}"/i, // UUID response (created resource)
1324
+ /"object"\s*:\s*"(page|database)"/i, // Notion object types
1325
+ /collection:\/\//i, // Collection URI
1326
+ /successfully\s+(added|inserted|updated|modified)/i,
1327
+ /resource\s+id:\s*[a-f0-9-]/i,
1328
+ /"created_time"/i, // Timestamp from creation
1329
+ /"last_edited_time"/i, // Timestamp from modification
1330
+ ];
1331
+ return creationPatterns.some((pattern) => pattern.test(responseText));
1332
+ }
1333
+ }