agent-security-scanner-mcp 1.2.0 → 1.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -2,7 +2,14 @@
2
2
 
3
3
  A powerful MCP (Model Context Protocol) server for real-time security vulnerability scanning. Integrates with Claude Desktop, Claude Code, OpenCode.ai, Kilo Code, and any MCP-compatible client to automatically detect and fix security issues as you code.
4
4
 
5
- **275 Semgrep-aligned security rules | 105 auto-fix templates | 100% fix coverage | Package hallucination detection**
5
+ **275+ Semgrep-aligned security rules | 105 auto-fix templates | 100% fix coverage | Package hallucination detection | AI Agent prompt security**
6
+
7
+ ## What's New in v1.3.0
8
+
9
+ - **AI Agent Prompt Security** - New `scan_agent_prompt` tool to detect malicious prompts before execution
10
+ - **56 prompt attack detection rules** - Exfiltration, backdoor requests, social engineering, jailbreaks
11
+ - **Risk scoring engine** - BLOCK/WARN/LOG/ALLOW actions with 0-100 risk scores
12
+ - **Prompt injection detection** - 39 rules for LLM prompt injection patterns
6
13
 
7
14
  ## What's New in v1.2.0
8
15
 
@@ -215,6 +222,89 @@ List all 105 available auto-fix templates.
215
222
 
216
223
  ---
217
224
 
225
+ ## AI Agent Prompt Security
226
+
227
+ Protect AI coding agents (Claude Code, Cursor, Copilot, etc.) from malicious prompts before execution. Detects exfiltration attempts, backdoor requests, social engineering, and obfuscated attacks.
228
+
229
+ ### `scan_agent_prompt`
230
+
231
+ Scan a prompt for malicious intent before allowing an AI agent to execute it.
232
+
233
+ ```
234
+ Parameters:
235
+ prompt_text (string): The prompt text to analyze
236
+ context (object, optional):
237
+ - sensitivity_level: "high" | "medium" | "low" (default: "medium")
238
+
239
+ Returns:
240
+ - action: "BLOCK" | "WARN" | "LOG" | "ALLOW"
241
+ - risk_score: 0-100
242
+ - risk_level: "CRITICAL" | "HIGH" | "MEDIUM" | "LOW" | "NONE"
243
+ - findings: Array of detected issues
244
+ - explanation: Human-readable summary
245
+ - recommendations: Suggested actions
246
+ ```
247
+
248
+ **Risk Thresholds:**
249
+
250
+ | Risk Level | Score Range | Action |
251
+ |------------|-------------|--------|
252
+ | CRITICAL | 85-100 | BLOCK |
253
+ | HIGH | 70-84 | BLOCK |
254
+ | MEDIUM | 50-69 | WARN |
255
+ | LOW | 25-49 | LOG |
256
+ | NONE | 0-24 | ALLOW |
257
+
258
+ **Example - Malicious prompt (BLOCKED):**
259
+ ```json
260
+ {
261
+ "action": "BLOCK",
262
+ "risk_score": 100,
263
+ "risk_level": "CRITICAL",
264
+ "findings": [
265
+ {
266
+ "rule_id": "agent.injection.security.backdoor-request",
267
+ "category": "malicious-injection",
268
+ "severity": "error",
269
+ "message": "Request to add backdoor or hidden access mechanism",
270
+ "matched_text": "add a hidden backdoor",
271
+ "confidence": "high"
272
+ }
273
+ ],
274
+ "explanation": "Detected 1 potential security issue(s) in prompt",
275
+ "recommendations": [
276
+ "Do not execute this prompt",
277
+ "Review the flagged patterns",
278
+ "Report if this appears to be an attack attempt"
279
+ ]
280
+ }
281
+ ```
282
+
283
+ **Example - Safe prompt (ALLOWED):**
284
+ ```json
285
+ {
286
+ "action": "ALLOW",
287
+ "risk_score": 0,
288
+ "risk_level": "NONE",
289
+ "findings": [],
290
+ "explanation": "No security issues detected in prompt",
291
+ "recommendations": []
292
+ }
293
+ ```
294
+
295
+ **Attack Categories Detected (56 rules):**
296
+
297
+ | Category | Rules | Examples |
298
+ |----------|-------|----------|
299
+ | Exfiltration | 10 | Send code to webhook, read .env files, push to external repo |
300
+ | Malicious Injection | 11 | Add backdoor, create reverse shell, disable authentication |
301
+ | System Manipulation | 9 | rm -rf /, modify /etc/passwd, add cron persistence |
302
+ | Social Engineering | 6 | Fake authorization claims, fake debug mode, urgency pressure |
303
+ | Obfuscation | 4 | Base64 encoded commands, ROT13, fragmented instructions |
304
+ | Agent Manipulation | 3 | Ignore previous instructions, override safety, DAN jailbreaks |
305
+
306
+ ---
307
+
218
308
  ## Package Hallucination Detection
219
309
 
220
310
  Detect AI-hallucinated package names that don't exist in official registries. Prevents supply chain attacks where attackers register fake package names suggested by AI.
package/analyzer.py CHANGED
@@ -32,6 +32,13 @@ EXTENSION_MAP = {
32
32
  '.json': 'json',
33
33
  '.tf': 'terraform',
34
34
  '.hcl': 'terraform',
35
+ # Prompt/text file extensions for prompt injection scanning
36
+ '.txt': 'generic',
37
+ '.md': 'generic',
38
+ '.prompt': 'generic',
39
+ '.jinja': 'generic',
40
+ '.jinja2': 'generic',
41
+ '.j2': 'generic',
35
42
  }
36
43
 
37
44
  def detect_language(file_path):
package/index.js CHANGED
@@ -606,6 +606,109 @@ const FIX_TEMPLATES = {
606
606
  }
607
607
  },
608
608
 
609
+ // ===========================================
610
+ // PROMPT INJECTION - LLM SECURITY
611
+ // ===========================================
612
+ "prompt-injection": {
613
+ description: "Sanitize user input before including in LLM prompts",
614
+ fix: (line, lang) => {
615
+ if (lang === 'python') {
616
+ return line
617
+ .replace(/f["']([^"']*)\{([^}]+)\}([^"']*)["']/, '"$1{sanitized}$3".format(sanitized=sanitize_prompt_input($2))')
618
+ .replace(/\+\s*(\w+)/, '+ sanitize_prompt_input($1)');
619
+ }
620
+ return line
621
+ .replace(/`([^`]*)\$\{([^}]+)\}([^`]*)`/, '`$1${sanitizePromptInput($2)}$3`')
622
+ .replace(/\+\s*(\w+)/, '+ sanitizePromptInput($1)');
623
+ }
624
+ },
625
+ "openai-unsafe-fstring": {
626
+ description: "Sanitize user input before including in OpenAI prompts",
627
+ fix: (line, lang) => {
628
+ if (lang === 'python') {
629
+ return line.replace(
630
+ /content\s*:\s*f["']([^"']*)["']/,
631
+ 'content: sanitize_llm_input(f"$1")'
632
+ );
633
+ }
634
+ return line.replace(/content\s*:\s*`([^`]*)`/, 'content: sanitizePromptInput(`$1`)');
635
+ }
636
+ },
637
+ "anthropic-unsafe-fstring": {
638
+ description: "Sanitize user input before including in Anthropic prompts",
639
+ fix: (line, lang) => {
640
+ if (lang === 'python') {
641
+ return line.replace(
642
+ /content\s*=\s*f["']([^"']*)["']/,
643
+ 'content=sanitize_llm_input(f"$1")'
644
+ );
645
+ }
646
+ return line.replace(/content\s*:\s*`([^`]*)`/, 'content: sanitizePromptInput(`$1`)');
647
+ }
648
+ },
649
+ "langchain-unsafe-template": {
650
+ description: "Use input validation for LangChain template variables",
651
+ fix: (line) => '# TODO: Sanitize template variables before use\n' + line
652
+ },
653
+ "langchain-chain-unsafe": {
654
+ description: "Validate user input before LangChain chain execution",
655
+ fix: (line, lang) => {
656
+ if (lang === 'python') {
657
+ return line.replace(/\.run\s*\(\s*(\w+)/, '.run(sanitize_chain_input($1)');
658
+ }
659
+ return line.replace(/\.invoke\s*\(\s*(\w+)/, '.invoke(sanitizeChainInput($1)');
660
+ }
661
+ },
662
+ "langchain-agent-unsafe": {
663
+ description: "Validate user input before LangChain agent execution",
664
+ fix: (line) => '# SECURITY: Validate and sanitize user input before agent execution\n' + line
665
+ },
666
+ "eval-llm-response": {
667
+ description: "CRITICAL: Never eval() LLM responses - use JSON parsing or ast.literal_eval for safe subset",
668
+ fix: (line, lang) => {
669
+ if (lang === 'python') {
670
+ return line.replace(/eval\s*\(\s*(\w+)/, 'ast.literal_eval($1 # SECURITY: Use safe parsing only');
671
+ }
672
+ return line.replace(/eval\s*\(\s*(\w+)/, 'JSON.parse($1 /* SECURITY: Use safe JSON parsing */');
673
+ }
674
+ },
675
+ "exec-llm-response": {
676
+ description: "CRITICAL: Never exec() LLM responses - remove or use sandboxed execution",
677
+ fix: (line) => '# SECURITY CRITICAL: Removed dangerous exec() of LLM response\n# ' + line
678
+ },
679
+ "function-constructor": {
680
+ description: "CRITICAL: Never use new Function() with LLM responses",
681
+ fix: (line) => '// SECURITY CRITICAL: Removed dangerous Function constructor with LLM response\n// ' + line
682
+ },
683
+ "pickle-llm-response": {
684
+ description: "Use JSON instead of pickle for LLM response deserialization",
685
+ fix: (line) => line.replace(/pickle\.(loads?)\s*\(/, 'json.$1(')
686
+ },
687
+ "ignore-previous-instructions": {
688
+ description: "Detected prompt injection pattern - sanitize or reject this input",
689
+ fix: (line) => '# SECURITY: Detected prompt injection attempt - INPUT SHOULD BE REJECTED\n# ' + line
690
+ },
691
+ "jailbreak-dan": {
692
+ description: "Detected DAN jailbreak attempt - reject this input",
693
+ fix: (line) => '# SECURITY: Detected jailbreak attempt - INPUT REJECTED\n# ' + line
694
+ },
695
+ "jailbreak-roleplay": {
696
+ description: "Detected role-play jailbreak attempt - sanitize or reject",
697
+ fix: (line) => '# SECURITY: Potential jailbreak via role-play - validate input\n# ' + line
698
+ },
699
+ "system-prompt-extraction": {
700
+ description: "Detected system prompt extraction attempt - reject this input",
701
+ fix: (line) => '# SECURITY: System prompt extraction attempt blocked\n# ' + line
702
+ },
703
+ "delimiter-injection": {
704
+ description: "Detected delimiter injection - escape special characters or reject",
705
+ fix: (line) => '# SECURITY: Delimiter injection blocked - escape special tokens\n# ' + line
706
+ },
707
+ "context-manipulation": {
708
+ description: "Detected context manipulation attempt - validate input",
709
+ fix: (line) => '# SECURITY: Context manipulation detected - validate user input\n# ' + line
710
+ },
711
+
609
712
  // ===========================================
610
713
  // ADDITIONAL SECURITY FIXES
611
714
  // ===========================================
@@ -629,7 +732,10 @@ function detectLanguage(filePath) {
629
732
  const langMap = {
630
733
  'py': 'python', 'js': 'javascript', 'ts': 'typescript',
631
734
  'tsx': 'typescript', 'jsx': 'javascript', 'java': 'java',
632
- 'go': 'go', 'rb': 'ruby', 'php': 'php'
735
+ 'go': 'go', 'rb': 'ruby', 'php': 'php',
736
+ // Prompt/text file extensions for prompt injection scanning
737
+ 'txt': 'generic', 'md': 'generic', 'prompt': 'generic',
738
+ 'jinja': 'generic', 'jinja2': 'generic', 'j2': 'generic'
633
739
  };
634
740
  return langMap[ext] || 'generic';
635
741
  }
@@ -1050,6 +1156,407 @@ server.tool(
1050
1156
  }
1051
1157
  );
1052
1158
 
1159
+ // ===========================================
1160
+ // AGENT PROMPT SECURITY SCANNING
1161
+ // ===========================================
1162
+
1163
+ // Risk thresholds for action determination
1164
+ const RISK_THRESHOLDS = {
1165
+ CRITICAL: 85,
1166
+ HIGH: 70,
1167
+ MEDIUM: 50,
1168
+ LOW: 25
1169
+ };
1170
+
1171
+ // Category weights for risk calculation
1172
+ const CATEGORY_WEIGHTS = {
1173
+ "exfiltration": 1.0,
1174
+ "malicious-injection": 1.0,
1175
+ "system-manipulation": 1.0,
1176
+ "social-engineering": 0.8,
1177
+ "obfuscation": 0.7,
1178
+ "agent-manipulation": 0.9,
1179
+ "prompt-injection": 0.9,
1180
+ "prompt-injection-content": 0.9,
1181
+ "prompt-injection-jailbreak": 0.85,
1182
+ "prompt-injection-extraction": 0.9,
1183
+ "prompt-injection-delimiter": 0.8
1184
+ };
1185
+
1186
+ // Confidence multipliers
1187
+ const CONFIDENCE_MULTIPLIERS = {
1188
+ "HIGH": 1.0,
1189
+ "MEDIUM": 0.7,
1190
+ "LOW": 0.4
1191
+ };
1192
+
1193
+ // Load agent attack rules from YAML
1194
+ function loadAgentAttackRules() {
1195
+ try {
1196
+ const rulesPath = join(__dirname, 'rules', 'agent-attacks.security.yaml');
1197
+ if (!existsSync(rulesPath)) {
1198
+ console.error("Agent attack rules file not found");
1199
+ return [];
1200
+ }
1201
+
1202
+ const yaml = readFileSync(rulesPath, 'utf-8');
1203
+ const rules = [];
1204
+
1205
+ // Simple YAML parsing for rules
1206
+ const ruleBlocks = yaml.split(/^ - id:/m).slice(1);
1207
+
1208
+ for (const block of ruleBlocks) {
1209
+ const lines = (' - id:' + block).split('\n');
1210
+ const rule = {
1211
+ id: '',
1212
+ severity: 'WARNING',
1213
+ message: '',
1214
+ patterns: [],
1215
+ metadata: {}
1216
+ };
1217
+
1218
+ let inPatterns = false;
1219
+ let inMetadata = false;
1220
+
1221
+ for (const line of lines) {
1222
+ if (line.match(/^\s+- id:\s*/)) {
1223
+ rule.id = line.replace(/^\s+- id:\s*/, '').trim();
1224
+ } else if (line.match(/^\s+severity:\s*/)) {
1225
+ rule.severity = line.replace(/^\s+severity:\s*/, '').trim();
1226
+ } else if (line.match(/^\s+message:\s*/)) {
1227
+ rule.message = line.replace(/^\s+message:\s*["']?/, '').replace(/["']$/, '').trim();
1228
+ } else if (line.match(/^\s+patterns:\s*$/)) {
1229
+ inPatterns = true;
1230
+ inMetadata = false;
1231
+ } else if (line.match(/^\s+metadata:\s*$/)) {
1232
+ inPatterns = false;
1233
+ inMetadata = true;
1234
+ } else if (inPatterns && line.match(/^\s+- /)) {
1235
+ let pattern = line.replace(/^\s+- /, '').trim();
1236
+ pattern = pattern.replace(/^["']|["']$/g, '');
1237
+ // Strip Python-style inline flags - JS doesn't support them
1238
+ pattern = pattern.replace(/^\(\?i\)/, '');
1239
+ // Unescape double backslashes from YAML (\\s -> \s)
1240
+ pattern = pattern.replace(/\\\\/g, '\\');
1241
+ if (pattern) rule.patterns.push(pattern);
1242
+ } else if (inMetadata && line.match(/^\s+\w+:/)) {
1243
+ const match = line.match(/^\s+(\w+):\s*["']?([^"'\n]+)["']?/);
1244
+ if (match) {
1245
+ rule.metadata[match[1]] = match[2].trim();
1246
+ }
1247
+ } else if (line.match(/^\s+languages:/)) {
1248
+ inPatterns = false;
1249
+ inMetadata = false;
1250
+ }
1251
+ }
1252
+
1253
+ if (rule.id && rule.patterns.length > 0) {
1254
+ rules.push(rule);
1255
+ }
1256
+ }
1257
+
1258
+ return rules;
1259
+ } catch (error) {
1260
+ console.error("Error loading agent attack rules:", error.message);
1261
+ return [];
1262
+ }
1263
+ }
1264
+
1265
+ // Also load prompt injection rules
1266
+ function loadPromptInjectionRules() {
1267
+ try {
1268
+ const rulesPath = join(__dirname, 'rules', 'prompt-injection.security.yaml');
1269
+ if (!existsSync(rulesPath)) {
1270
+ return [];
1271
+ }
1272
+
1273
+ const yaml = readFileSync(rulesPath, 'utf-8');
1274
+ const rules = [];
1275
+
1276
+ const ruleBlocks = yaml.split(/^ - id:/m).slice(1);
1277
+
1278
+ for (const block of ruleBlocks) {
1279
+ const lines = (' - id:' + block).split('\n');
1280
+ const rule = {
1281
+ id: '',
1282
+ severity: 'WARNING',
1283
+ message: '',
1284
+ patterns: [],
1285
+ metadata: {}
1286
+ };
1287
+
1288
+ let inPatterns = false;
1289
+ let inMetadata = false;
1290
+
1291
+ for (const line of lines) {
1292
+ if (line.match(/^\s+- id:\s*/)) {
1293
+ rule.id = line.replace(/^\s+- id:\s*/, '').trim();
1294
+ } else if (line.match(/^\s+severity:\s*/)) {
1295
+ rule.severity = line.replace(/^\s+severity:\s*/, '').trim();
1296
+ } else if (line.match(/^\s+message:\s*/)) {
1297
+ rule.message = line.replace(/^\s+message:\s*["']?/, '').replace(/["']$/, '').trim();
1298
+ } else if (line.match(/^\s+patterns:\s*$/)) {
1299
+ inPatterns = true;
1300
+ inMetadata = false;
1301
+ } else if (line.match(/^\s+metadata:\s*$/)) {
1302
+ inPatterns = false;
1303
+ inMetadata = true;
1304
+ } else if (inPatterns && line.match(/^\s+- /)) {
1305
+ let pattern = line.replace(/^\s+- /, '').trim();
1306
+ pattern = pattern.replace(/^["']|["']$/g, '');
1307
+ // Strip Python-style inline flags - JS doesn't support them
1308
+ pattern = pattern.replace(/^\(\?i\)/, '');
1309
+ // Unescape double backslashes from YAML (\\s -> \s)
1310
+ pattern = pattern.replace(/\\\\/g, '\\');
1311
+ if (pattern) rule.patterns.push(pattern);
1312
+ } else if (inMetadata && line.match(/^\s+\w+:/)) {
1313
+ const match = line.match(/^\s+(\w+):\s*["']?([^"'\n]+)["']?/);
1314
+ if (match) {
1315
+ rule.metadata[match[1]] = match[2].trim();
1316
+ }
1317
+ }
1318
+ }
1319
+
1320
+ // Only include generic rules (content patterns, not code patterns)
1321
+ if (rule.id && rule.patterns.length > 0 && rule.id.startsWith('generic.prompt')) {
1322
+ rules.push(rule);
1323
+ }
1324
+ }
1325
+
1326
+ return rules;
1327
+ } catch (error) {
1328
+ console.error("Error loading prompt injection rules:", error.message);
1329
+ return [];
1330
+ }
1331
+ }
1332
+
1333
+ // Calculate risk score from findings
1334
+ function calculateRiskScore(findings, context) {
1335
+ if (findings.length === 0) return 0;
1336
+
1337
+ let totalScore = 0;
1338
+
1339
+ for (const finding of findings) {
1340
+ const riskScore = parseInt(finding.risk_score) || 50;
1341
+ const category = finding.category || 'unknown';
1342
+ const confidence = finding.confidence || 'MEDIUM';
1343
+
1344
+ const categoryWeight = CATEGORY_WEIGHTS[category] || 0.5;
1345
+ const confidenceMultiplier = CONFIDENCE_MULTIPLIERS[confidence] || 0.7;
1346
+
1347
+ totalScore += (riskScore / 100) * categoryWeight * confidenceMultiplier * 100;
1348
+ }
1349
+
1350
+ // Average the scores but boost for multiple findings
1351
+ let avgScore = totalScore / findings.length;
1352
+
1353
+ // Boost score if multiple findings (compound risk)
1354
+ if (findings.length > 1) {
1355
+ avgScore = Math.min(100, avgScore * (1 + (findings.length - 1) * 0.1));
1356
+ }
1357
+
1358
+ // Apply sensitivity adjustment
1359
+ if (context?.sensitivity_level === 'high') {
1360
+ avgScore = Math.min(100, avgScore * 1.2);
1361
+ } else if (context?.sensitivity_level === 'low') {
1362
+ avgScore = avgScore * 0.8;
1363
+ }
1364
+
1365
+ return Math.round(avgScore);
1366
+ }
1367
+
1368
+ // Determine action based on risk score and findings
1369
+ function determineAction(riskScore, findings) {
1370
+ // Check for any BLOCK action findings
1371
+ const hasBlockFinding = findings.some(f => f.action === 'BLOCK');
1372
+ if (hasBlockFinding || riskScore >= RISK_THRESHOLDS.CRITICAL) {
1373
+ return 'BLOCK';
1374
+ }
1375
+
1376
+ if (riskScore >= RISK_THRESHOLDS.HIGH) {
1377
+ return 'BLOCK';
1378
+ }
1379
+
1380
+ const hasWarnFinding = findings.some(f => f.action === 'WARN');
1381
+ if (hasWarnFinding || riskScore >= RISK_THRESHOLDS.MEDIUM) {
1382
+ return 'WARN';
1383
+ }
1384
+
1385
+ const hasLogFinding = findings.some(f => f.action === 'LOG');
1386
+ if (hasLogFinding || riskScore >= RISK_THRESHOLDS.LOW) {
1387
+ return 'LOG';
1388
+ }
1389
+
1390
+ return 'ALLOW';
1391
+ }
1392
+
1393
+ // Determine risk level from score
1394
+ function getRiskLevel(score) {
1395
+ if (score >= RISK_THRESHOLDS.CRITICAL) return 'CRITICAL';
1396
+ if (score >= RISK_THRESHOLDS.HIGH) return 'HIGH';
1397
+ if (score >= RISK_THRESHOLDS.MEDIUM) return 'MEDIUM';
1398
+ if (score >= RISK_THRESHOLDS.LOW) return 'LOW';
1399
+ return 'NONE';
1400
+ }
1401
+
1402
+ // Generate explanation from findings
1403
+ function generateExplanation(findings, action) {
1404
+ if (findings.length === 0) {
1405
+ return 'No security concerns detected in this prompt.';
1406
+ }
1407
+
1408
+ const categories = [...new Set(findings.map(f => f.category))];
1409
+ const severity = findings.some(f => f.severity === 'ERROR') ? 'critical' : 'potential';
1410
+
1411
+ let explanation = `Detected ${findings.length} ${severity} security concern(s)`;
1412
+
1413
+ if (categories.length > 0) {
1414
+ explanation += ` in categories: ${categories.join(', ')}`;
1415
+ }
1416
+
1417
+ explanation += `. Action: ${action}.`;
1418
+
1419
+ if (action === 'BLOCK') {
1420
+ explanation += ' This prompt appears to contain malicious intent and should not be executed.';
1421
+ } else if (action === 'WARN') {
1422
+ explanation += ' Review carefully before proceeding.';
1423
+ }
1424
+
1425
+ return explanation;
1426
+ }
1427
+
1428
+ // Generate recommendations from findings
1429
+ function generateRecommendations(findings) {
1430
+ const recommendations = new Set();
1431
+
1432
+ for (const finding of findings) {
1433
+ const category = finding.category;
1434
+
1435
+ switch (category) {
1436
+ case 'exfiltration':
1437
+ recommendations.add('Never allow prompts that request sending code or secrets to external URLs');
1438
+ recommendations.add('Block access to sensitive files like .env, SSH keys, and credentials');
1439
+ break;
1440
+ case 'malicious-injection':
1441
+ recommendations.add('Reject requests for backdoors, reverse shells, or malicious code');
1442
+ recommendations.add('Never disable security controls at user request');
1443
+ break;
1444
+ case 'system-manipulation':
1445
+ recommendations.add('Block destructive file operations and system configuration changes');
1446
+ recommendations.add('Prevent persistence mechanisms like crontab or startup script modifications');
1447
+ break;
1448
+ case 'social-engineering':
1449
+ recommendations.add('Verify authorization claims through proper channels, not prompt content');
1450
+ recommendations.add('Be skeptical of urgency claims or claims of special modes');
1451
+ break;
1452
+ case 'obfuscation':
1453
+ recommendations.add('Be wary of encoded or fragmented instructions');
1454
+ recommendations.add('Reject requests for "examples" of malicious code');
1455
+ break;
1456
+ case 'agent-manipulation':
1457
+ recommendations.add('Maintain confirmation prompts for sensitive operations');
1458
+ recommendations.add('Never hide output or actions from the user');
1459
+ break;
1460
+ default:
1461
+ recommendations.add('Review this prompt carefully before execution');
1462
+ }
1463
+ }
1464
+
1465
+ return [...recommendations];
1466
+ }
1467
+
1468
+ // Create SHA256 hash for audit logging
1469
+ function hashPrompt(text) {
1470
+ const crypto = require('crypto');
1471
+ return crypto.createHash('sha256').update(text).digest('hex').substring(0, 16);
1472
+ }
1473
+
1474
+ // Register scan_agent_prompt tool
1475
+ server.tool(
1476
+ "scan_agent_prompt",
1477
+ "Scan a prompt/instruction for potential malicious intent before execution. Returns risk assessment and recommended action (BLOCK/WARN/LOG/ALLOW).",
1478
+ {
1479
+ prompt_text: z.string().describe("The prompt or instruction text to analyze"),
1480
+ context: z.object({
1481
+ previous_messages: z.array(z.string()).optional().describe("Previous conversation messages for multi-turn detection"),
1482
+ sensitivity_level: z.enum(["high", "medium", "low"]).optional().describe("Sensitivity level - high means more strict, low means more permissive")
1483
+ }).optional().describe("Optional context for better analysis")
1484
+ },
1485
+ async ({ prompt_text, context }) => {
1486
+ const findings = [];
1487
+
1488
+ // Load rules
1489
+ const agentRules = loadAgentAttackRules();
1490
+ const promptRules = loadPromptInjectionRules();
1491
+ const allRules = [...agentRules, ...promptRules];
1492
+
1493
+ // Scan prompt against all rules
1494
+ for (const rule of allRules) {
1495
+ for (const pattern of rule.patterns) {
1496
+ try {
1497
+ const regex = new RegExp(pattern, 'i');
1498
+ const match = prompt_text.match(regex);
1499
+
1500
+ if (match) {
1501
+ findings.push({
1502
+ rule_id: rule.id,
1503
+ category: rule.metadata.category || 'unknown',
1504
+ severity: rule.severity,
1505
+ message: rule.message,
1506
+ matched_text: match[0].substring(0, 100),
1507
+ confidence: rule.metadata.confidence || 'MEDIUM',
1508
+ risk_score: rule.metadata.risk_score || '50',
1509
+ action: rule.metadata.action || 'WARN'
1510
+ });
1511
+ break; // Only one match per rule
1512
+ }
1513
+ } catch (e) {
1514
+ // Skip invalid regex
1515
+ }
1516
+ }
1517
+ }
1518
+
1519
+ // Calculate risk score
1520
+ const riskScore = calculateRiskScore(findings, context);
1521
+ const action = determineAction(riskScore, findings);
1522
+ const riskLevel = getRiskLevel(riskScore);
1523
+ const explanation = generateExplanation(findings, action);
1524
+ const recommendations = generateRecommendations(findings);
1525
+
1526
+ // Create audit info
1527
+ const audit = {
1528
+ timestamp: new Date().toISOString(),
1529
+ prompt_hash: hashPrompt(prompt_text),
1530
+ prompt_length: prompt_text.length,
1531
+ rules_checked: allRules.length,
1532
+ context_provided: !!context
1533
+ };
1534
+
1535
+ return {
1536
+ content: [{
1537
+ type: "text",
1538
+ text: JSON.stringify({
1539
+ action,
1540
+ risk_score: riskScore,
1541
+ risk_level: riskLevel,
1542
+ findings_count: findings.length,
1543
+ findings: findings.map(f => ({
1544
+ rule_id: f.rule_id,
1545
+ category: f.category,
1546
+ severity: f.severity,
1547
+ message: f.message,
1548
+ matched_text: f.matched_text,
1549
+ confidence: f.confidence
1550
+ })),
1551
+ explanation,
1552
+ recommendations,
1553
+ audit
1554
+ }, null, 2)
1555
+ }]
1556
+ };
1557
+ }
1558
+ );
1559
+
1053
1560
  // Load package lists on module initialization
1054
1561
  loadPackageLists();
1055
1562
 
package/package.json CHANGED
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "name": "agent-security-scanner-mcp",
3
- "version": "1.2.0",
4
- "description": "MCP server for security scanning & package hallucination detection. Works with Claude Desktop, Claude Code, OpenCode, Kilo Code. Detects SQL injection, XSS, secrets, and AI-invented packages.",
3
+ "version": "1.3.0",
4
+ "description": "MCP server for security scanning, AI agent prompt security & package hallucination detection. Works with Claude Desktop, Claude Code, OpenCode, Kilo Code. Detects SQL injection, XSS, secrets, prompt attacks, and AI-invented packages.",
5
5
  "main": "index.js",
6
6
  "type": "module",
7
7
  "bin": {
@@ -26,7 +26,11 @@
26
26
  "secrets-detection",
27
27
  "hallucination-detection",
28
28
  "package-verification",
29
- "supply-chain-security"
29
+ "supply-chain-security",
30
+ "prompt-injection",
31
+ "agent-security",
32
+ "llm-security",
33
+ "ai-safety"
30
34
  ],
31
35
  "author": "",
32
36
  "license": "MIT",