agent-security-scanner-mcp 3.14.0 → 3.16.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/analyzer.py CHANGED
@@ -12,6 +12,7 @@ import warnings
12
12
 
13
13
  # Suppress regex deprecation warnings for patterns with inline flags
14
14
  warnings.filterwarnings('ignore', category=DeprecationWarning)
15
+ import fnmatch
15
16
  import json
16
17
  import os
17
18
  import re
@@ -70,12 +71,54 @@ def detect_language(file_path):
70
71
  return EXTENSION_MAP.get(ext, 'generic')
71
72
 
72
73
 
74
+ def _matches_path_filter(file_path, rule):
75
+ """Check if file_path matches a rule's paths.include/exclude filter.
76
+
77
+ Returns True if the rule should be applied to this file.
78
+ Rules without paths metadata apply to all files.
79
+ """
80
+ paths = rule.metadata.get('paths') if hasattr(rule, 'metadata') else None
81
+ if not paths or not isinstance(paths, dict):
82
+ return True
83
+
84
+ basename = os.path.basename(file_path)
85
+
86
+ # If include patterns exist, file must match at least one
87
+ include = paths.get('include')
88
+ if include and isinstance(include, list):
89
+ if not any(fnmatch.fnmatch(basename, p) or fnmatch.fnmatch(file_path, p) for p in include):
90
+ return False
91
+
92
+ # If exclude patterns exist, file must NOT match any
93
+ exclude = paths.get('exclude')
94
+ if exclude and isinstance(exclude, list):
95
+ if any(fnmatch.fnmatch(basename, p) or fnmatch.fnmatch(file_path, p) for p in exclude):
96
+ return False
97
+
98
+ return True
99
+
100
+
73
101
  def analyze_file_regex(file_path):
74
102
  """Original regex-based analysis (fallback when tree-sitter unavailable)."""
75
103
  issues = []
76
104
  try:
77
105
  language = detect_language(file_path)
78
- rules = get_rules_for_language(language)
106
+ all_rules = get_rules_for_language(language)
107
+ # Filter out rules whose paths.include/exclude don't match this file
108
+ rules = {}
109
+ basename = os.path.basename(file_path)
110
+ for rid, r in all_rules.items():
111
+ rule_paths = r.get('metadata', {}).get('paths')
112
+ if rule_paths and isinstance(rule_paths, dict):
113
+ include = rule_paths.get('include')
114
+ if include and isinstance(include, list):
115
+ if not any(fnmatch.fnmatch(basename, p) or fnmatch.fnmatch(file_path, p) for p in include):
116
+ continue
117
+ exclude = rule_paths.get('exclude')
118
+ if exclude and isinstance(exclude, list):
119
+ if any(fnmatch.fnmatch(basename, p) or fnmatch.fnmatch(file_path, p) for p in exclude):
120
+ continue
121
+ rules[rid] = r
79
122
  print(f"[REGEX] Language: {language}, rules loaded: {len(rules)}", file=sys.stderr)
80
123
  with open(file_path, 'r', encoding='utf-8') as f:
81
124
  lines = f.readlines()
@@ -170,6 +213,7 @@ def analyze_file_ast(file_path):
170
213
  if (parse_result.language in r.languages or 'generic' in r.languages)
171
214
  and r.metadata.get('category', 'unknown') in SECURITY_CATEGORIES
172
215
  and r.id not in NOISY_RULES
216
+ and _matches_path_filter(file_path, r)
173
217
  ]
174
218
 
175
219
  findings = engine.apply_rules(applicable_rules, ast)
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "agent-security-scanner-mcp",
3
- "version": "3.14.0",
3
+ "version": "3.16.0",
4
4
  "mcpName": "io.github.sinewaveai/agent-security-scanner-mcp",
5
5
  "description": "Security scanner MCP server for AI coding agents. Prompt injection firewall, package hallucination detection (4.3M+ packages), 1000+ vulnerability rules with AST & taint analysis, auto-fix. For Claude Code, Cursor, Windsurf, Cline, OpenClaw.",
6
6
  "main": "index.js",
@@ -64,7 +64,7 @@
64
64
  "type": "git",
65
65
  "url": "git+https://github.com/sinewaveai/agent-security-scanner-mcp.git"
66
66
  },
67
- "homepage": "https://github.com/sinewaveai/agent-security-scanner-mcp#readme",
67
+ "homepage": "https://www.proof-layer.com/",
68
68
  "bugs": {
69
69
  "url": "https://github.com/sinewaveai/agent-security-scanner-mcp/issues"
70
70
  },
package/rules/__init__.py CHANGED
@@ -109,6 +109,12 @@ def load_yaml_rules():
109
109
  if not extracted:
110
110
  continue
111
111
 
112
+ metadata = rule.get('metadata', {})
113
+ # Preserve paths.include/exclude for file-level filtering
114
+ paths = rule.get('paths')
115
+ if paths and isinstance(paths, dict):
116
+ metadata['paths'] = paths
117
+
112
118
  rules[rule_id] = {
113
119
  'id': rule_id,
114
120
  'name': rule_id.split('.')[-1].replace('-', ' ').title(),
@@ -116,7 +122,7 @@ def load_yaml_rules():
116
122
  'message': rule.get('message', ''),
117
123
  'severity': rule.get('severity', 'WARNING').lower(),
118
124
  'languages': rule.get('languages', ['generic']),
119
- 'metadata': rule.get('metadata', {})
125
+ 'metadata': metadata
120
126
  }
121
127
  except Exception as e:
122
128
  print(f"Error loading {filepath}: {e}")
package/semgrep_loader.py CHANGED
@@ -252,7 +252,12 @@ class SemgrepRuleLoader:
252
252
  languages.append('generic')
253
253
 
254
254
  metadata = rule_data.get('metadata', {})
255
-
255
+
256
+ # Preserve paths.include/exclude for file-level filtering
257
+ paths = rule_data.get('paths')
258
+ if paths and isinstance(paths, dict):
259
+ metadata['paths'] = paths
260
+
256
261
  # Convert Semgrep patterns to our AST patterns
257
262
  patterns = self._extract_patterns(rule_data)
258
263
  pattern_nots = self._extract_pattern_nots(rule_data)
@@ -21,6 +21,17 @@ const MANIFEST_INJECTION_PHRASES = /ignore\s+previous|exfiltrat|override\s+.*ins
21
21
  const MANIFEST_ZERO_WIDTH = /[\u200B\u200C\u200D\uFEFF\u2060]/;
22
22
  const MANIFEST_BIDI = /[\u202A-\u202E\u2066-\u2069\u200E\u200F\u061C]/;
23
23
 
24
+ // Suspicious default values in inputSchema (shell commands, URLs, injection phrases)
25
+ const SUSPICIOUS_DEFAULT = /\b(curl|wget|nc|bash|sh|powershell|cmd)\b.*[|>]|https?:\/\/[^\s'"]+|ignore\s+previous|exfiltrat|override\s+.*instruction|do\s+not\s+tell|hidden\s+instruction|bypass\s+.*filter/i;
26
+
27
+ // URL patterns for tool description scanning
28
+ const URL_IN_DESCRIPTION = /https?:\/\/[^\s'"<>]+/gi;
29
+ const SAFE_URL_DOMAINS = /^https?:\/\/(github\.com|npmjs\.com|pypi\.org|docs\.|api\.)/i;
30
+ const TUNNELING_URL = /https?:\/\/[^\s'"]*\b(ngrok|serveo|localtunnel|localhost|127\.0\.0\.1|webhook\.site|requestbin|pipedream|interact\.sh|burp|oast)\b/i;
31
+
32
+ // Cross-tool priority/exclusivity patterns
33
+ const PRIORITY_PATTERNS = /\b(before\s+calling\s+any\s+other\s+tool|do\s+not\s+use\s+any\s+other\s+tool|replaces?\s+the\s+function\s+of|must\s+be\s+(called|used|run|invoked)\s+(first|before)|always\s+(call|use|run|invoke)\s+this\s+(first|before)|instead\s+of\s+(using|calling))\b/i;
34
+
24
35
  // Directories to skip when walking
25
36
  const SKIP_DIRS = new Set([
26
37
  'node_modules', '.git', 'dist', 'build', '__pycache__',
@@ -567,6 +578,14 @@ function generateRecommendations(findings) {
567
578
  recommendations.push('Tool schema changed since baseline. Run with update_baseline:true only after manually verifying all changes. Rug pull attacks modify tool behavior after initial user approval (Adversa TOP25 #6).');
568
579
  }
569
580
 
581
+ if (categories.has('schema-manipulation')) {
582
+ recommendations.push('Inspect all inputSchema property descriptions, defaults, and enum values for hidden instructions. Attackers embed injection in schema metadata that reaches the LLM but is invisible to users.');
583
+ }
584
+
585
+ if (categories.has('cross-tool-manipulation')) {
586
+ recommendations.push('Tool descriptions must not direct the LLM to invoke other tools or claim execution priority. This is a cross-tool manipulation attack that can chain tool calls without user consent.');
587
+ }
588
+
570
589
  if (recommendations.length === 0) {
571
590
  recommendations.push('No critical issues found. Continue following security best practices.');
572
591
  }
@@ -744,6 +763,139 @@ function checkRugPull(manifestPath, serverDir) {
744
763
  return findings;
745
764
  }
746
765
 
766
+ // ============================================================
767
+ // Schema-level inspection (Task 1)
768
+ // ============================================================
769
+
770
+ function escapeRegex(str) {
771
+ return str.replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
772
+ }
773
+
774
+ function checkSchemaManipulation(tool, manifestPath) {
775
+ const findings = [];
776
+ const name = tool.name || '';
777
+ const schema = tool.inputSchema;
778
+ if (!schema || typeof schema !== 'object') return findings;
779
+
780
+ const properties = schema.properties || {};
781
+
782
+ // Flag additionalProperties: true with no defined properties
783
+ if (schema.additionalProperties === true && Object.keys(properties).length === 0) {
784
+ findings.push({
785
+ rule: 'mcp.schema-open-additionalProperties',
786
+ severity: 'WARNING',
787
+ category: 'schema-manipulation',
788
+ message: `Tool "${name}" has additionalProperties:true with no defined properties — accepts arbitrary hidden parameters.`,
789
+ file: manifestPath,
790
+ line: 1,
791
+ match: name
792
+ });
793
+ }
794
+
795
+ // Walk each property
796
+ for (const [propName, propDef] of Object.entries(properties)) {
797
+ if (!propDef || typeof propDef !== 'object') continue;
798
+
799
+ const desc = propDef.description || '';
800
+ const defaultVal = propDef.default !== undefined ? String(propDef.default) : '';
801
+ const enumValues = Array.isArray(propDef.enum) ? propDef.enum.map(String) : [];
802
+
803
+ // Check description for injection phrases or hidden chars
804
+ if (desc && (MANIFEST_INJECTION_PHRASES.test(desc) || MANIFEST_ZERO_WIDTH.test(desc) || MANIFEST_BIDI.test(desc))) {
805
+ findings.push({
806
+ rule: 'mcp.schema-description-injection',
807
+ severity: 'ERROR',
808
+ category: 'schema-manipulation',
809
+ message: `Tool "${name}" property "${propName}" description contains injection language or hidden characters.`,
810
+ file: manifestPath,
811
+ line: 1,
812
+ match: desc.substring(0, 100)
813
+ });
814
+ }
815
+
816
+ // Check default for suspicious content
817
+ if (defaultVal && SUSPICIOUS_DEFAULT.test(defaultVal)) {
818
+ findings.push({
819
+ rule: 'mcp.schema-suspicious-default',
820
+ severity: 'ERROR',
821
+ category: 'schema-manipulation',
822
+ message: `Tool "${name}" property "${propName}" has a suspicious default value containing shell commands, URLs, or injection patterns.`,
823
+ file: manifestPath,
824
+ line: 1,
825
+ match: defaultVal.substring(0, 100)
826
+ });
827
+ }
828
+
829
+ // Check enum values for injection/suspicious content
830
+ for (const val of enumValues) {
831
+ if (MANIFEST_INJECTION_PHRASES.test(val) || SUSPICIOUS_DEFAULT.test(val)) {
832
+ findings.push({
833
+ rule: 'mcp.schema-suspicious-default',
834
+ severity: 'ERROR',
835
+ category: 'schema-manipulation',
836
+ message: `Tool "${name}" property "${propName}" has a suspicious enum value.`,
837
+ file: manifestPath,
838
+ line: 1,
839
+ match: val.substring(0, 100)
840
+ });
841
+ break;
842
+ }
843
+ }
844
+ }
845
+
846
+ return findings;
847
+ }
848
+
849
+ // ============================================================
850
+ // Cross-tool manipulation detection (Task 2)
851
+ // ============================================================
852
+
853
+ function checkCrossToolManipulation(tools, manifestPath) {
854
+ const findings = [];
855
+ const toolNames = new Set(tools.map(t => (t.name || '').toLowerCase()).filter(Boolean));
856
+
857
+ for (const tool of tools) {
858
+ const name = tool.name || '';
859
+ const description = tool.description || '';
860
+ if (!description) continue;
861
+
862
+ // Check for cross-tool reference with action directives
863
+ for (const otherName of toolNames) {
864
+ if (otherName === name.toLowerCase()) continue;
865
+ const escaped = escapeRegex(otherName);
866
+ const refPattern1 = new RegExp(`\\b(before\\s+using|always\\s+(call|use|run|invoke)|after\\s+calling|instead\\s+of)\\s+\\w*${escaped}\\b`, 'i');
867
+ const refPattern2 = new RegExp(`\\b(call|use|invoke|run|execute|trigger)\\s+\\w*${escaped}\\b.*\\b(first|before|always)\\b`, 'i');
868
+ if (refPattern1.test(description) || refPattern2.test(description)) {
869
+ findings.push({
870
+ rule: 'mcp.cross-tool-reference',
871
+ severity: 'ERROR',
872
+ category: 'cross-tool-manipulation',
873
+ message: `Tool "${name}" description contains action directive referencing tool "${otherName}". This may be a cross-tool manipulation attack.`,
874
+ file: manifestPath,
875
+ line: 1,
876
+ match: description.substring(0, 100)
877
+ });
878
+ break;
879
+ }
880
+ }
881
+
882
+ // Check for generic priority/exclusivity patterns
883
+ if (PRIORITY_PATTERNS.test(description)) {
884
+ findings.push({
885
+ rule: 'mcp.cross-tool-priority-override',
886
+ severity: 'ERROR',
887
+ category: 'cross-tool-manipulation',
888
+ message: `Tool "${name}" description demands execution priority or exclusivity over other tools.`,
889
+ file: manifestPath,
890
+ line: 1,
891
+ match: description.substring(0, 100)
892
+ });
893
+ }
894
+ }
895
+
896
+ return findings;
897
+ }
898
+
747
899
  // ============================================================
748
900
  // Manifest scanning (server.json)
749
901
  // ============================================================
@@ -793,6 +945,40 @@ function scanManifest(manifestPath) {
793
945
  if (description.length > 500) {
794
946
  findings.push({ rule: 'mcp.manifest-description-too-long', severity: 'WARNING', category: 'description-injection', message: `Tool "${name}" description is ${description.length} chars — unusually long descriptions often contain hidden instructions.`, file: manifestPath, line: 1, match: description.substring(0, 100) });
795
947
  }
948
+
949
+ // Schema-level inspection (Task 1)
950
+ findings.push(...checkSchemaManipulation(tool, manifestPath));
951
+
952
+ // URL detection in descriptions (Task 4)
953
+ const urls = description.match(URL_IN_DESCRIPTION);
954
+ if (urls) {
955
+ for (const url of urls) {
956
+ if (TUNNELING_URL.test(url)) {
957
+ findings.push({ rule: 'mcp.description-tunneling-url', severity: 'ERROR', category: 'description-injection', message: `Tool "${name}" description contains a dev/tunneling URL. No legitimate production tool should reference tunneling services.`, file: manifestPath, line: 1, match: url.substring(0, 100) });
958
+ } else if (!SAFE_URL_DOMAINS.test(url)) {
959
+ findings.push({ rule: 'mcp.description-suspicious-url', severity: 'WARNING', category: 'description-injection', message: `Tool "${name}" description contains an external URL that the LLM might follow.`, file: manifestPath, line: 1, match: url.substring(0, 100) });
960
+ }
961
+ }
962
+ }
963
+ }
964
+
965
+ // Cross-tool manipulation detection (Task 2)
966
+ findings.push(...checkCrossToolManipulation(tools, manifestPath));
967
+
968
+ // Z-score anomaly detection for description length (Task 3)
969
+ if (tools.length >= 5) {
970
+ const lengths = tools.map(t => (t.description || '').length);
971
+ const mean = lengths.reduce((a, b) => a + b, 0) / lengths.length;
972
+ const stddev = Math.sqrt(lengths.reduce((sum, l) => sum + (l - mean) ** 2, 0) / lengths.length);
973
+ if (stddev > 0) {
974
+ for (const tool of tools) {
975
+ const len = (tool.description || '').length;
976
+ const zScore = (len - mean) / stddev;
977
+ if (zScore > 2.5) {
978
+ findings.push({ rule: 'mcp.description-length-anomaly', severity: 'WARNING', category: 'description-injection', message: `Tool "${tool.name}" description length (${len} chars) is a statistical outlier (z-score: ${zScore.toFixed(1)}) compared to other tools. May hide injected instructions.`, file: manifestPath, line: 1, match: (tool.description || '').substring(0, 100) });
979
+ }
980
+ }
981
+ }
796
982
  }
797
983
 
798
984
  return findings;
@@ -622,6 +622,55 @@ export async function scanAgentPrompt({ prompt_text, context, verbosity }) {
622
622
  }
623
623
  }
624
624
  }
625
+
626
+ // 2.8b: Nested base64 detection (double encoding)
627
+ const innerB64Matches = decoded.match(base64Regex);
628
+ if (innerB64Matches) {
629
+ for (const innerB64 of innerB64Matches) {
630
+ try {
631
+ const innerDecoded = Buffer.from(innerB64, 'base64').toString('utf-8');
632
+ const innerPrintable = innerDecoded.split('').filter(c => c.charCodeAt(0) >= 32 && c.charCodeAt(0) <= 126).length;
633
+ if (innerPrintable / innerDecoded.length > 0.5) {
634
+ findings.push({
635
+ rule_id: 'nested-base64',
636
+ category: 'obfuscation',
637
+ severity: 'ERROR',
638
+ message: 'Nested base64 encoding detected — double encoding is a strong indicator of intentional obfuscation.',
639
+ matched_text: innerDecoded.substring(0, 100),
640
+ confidence: 'HIGH',
641
+ risk_score: '85',
642
+ action: 'BLOCK'
643
+ });
644
+ // Re-scan doubly-decoded text
645
+ for (const rule of allRules) {
646
+ for (const pattern of rule.patterns) {
647
+ try {
648
+ const regex = new RegExp(pattern, 'i');
649
+ const match = innerDecoded.match(regex);
650
+ if (match) {
651
+ findings.push({
652
+ rule_id: rule.id + '.nested-base64-decoded',
653
+ category: rule.metadata.category || 'unknown',
654
+ severity: rule.severity,
655
+ message: rule.message + ' (detected in nested base64-decoded content)',
656
+ matched_text: match[0].substring(0, 100),
657
+ confidence: 'HIGH',
658
+ risk_score: rule.metadata.risk_score || '50',
659
+ action: rule.metadata.action || 'BLOCK'
660
+ });
661
+ break;
662
+ }
663
+ } catch (e) {
664
+ // Skip invalid regex
665
+ }
666
+ }
667
+ }
668
+ }
669
+ } catch (e) {
670
+ // Skip invalid inner base64
671
+ }
672
+ }
673
+ }
625
674
  }
626
675
  } catch (e) {
627
676
  // Skip invalid base64