isnad-scan 0.3.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- isnad_scan/__init__.py +2 -0
- isnad_scan/ast_analyzer.py +374 -0
- isnad_scan/binary_scanner.py +230 -0
- isnad_scan/cli.py +221 -0
- isnad_scan/cve_checker.py +238 -0
- isnad_scan/js_analyzer.py +154 -0
- isnad_scan/patterns.py +573 -0
- isnad_scan/scanner.py +342 -0
- isnad_scan-0.3.0.dist-info/METADATA +186 -0
- isnad_scan-0.3.0.dist-info/RECORD +12 -0
- isnad_scan-0.3.0.dist-info/WHEEL +4 -0
- isnad_scan-0.3.0.dist-info/entry_points.txt +2 -0
isnad_scan/patterns.py
ADDED
|
@@ -0,0 +1,573 @@
|
|
|
1
|
+
"""Pattern definitions for security scanning."""
|
|
2
|
+
import re
|
|
3
|
+
from dataclasses import dataclass
|
|
4
|
+
from enum import Enum
|
|
5
|
+
from typing import List, Tuple, Pattern
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class Severity(Enum):
|
|
9
|
+
DANGER = "danger"
|
|
10
|
+
WARN = "warn"
|
|
11
|
+
INFO = "info"
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
@dataclass
|
|
15
|
+
class Finding:
|
|
16
|
+
severity: Severity
|
|
17
|
+
pattern_id: str
|
|
18
|
+
description: str
|
|
19
|
+
file: str
|
|
20
|
+
line: int
|
|
21
|
+
match: str
|
|
22
|
+
context: str = ""
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
@dataclass
|
|
26
|
+
class PatternDef:
|
|
27
|
+
pattern: Pattern
|
|
28
|
+
pattern_id: str
|
|
29
|
+
severity: Severity
|
|
30
|
+
description: str
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
def _compile(patterns: List[Tuple[str, str, Severity, str]]) -> List[PatternDef]:
|
|
34
|
+
"""Compile pattern definitions."""
|
|
35
|
+
return [
|
|
36
|
+
PatternDef(
|
|
37
|
+
pattern=re.compile(p, re.IGNORECASE | re.MULTILINE),
|
|
38
|
+
pattern_id=pid,
|
|
39
|
+
severity=sev,
|
|
40
|
+
description=desc,
|
|
41
|
+
)
|
|
42
|
+
for p, pid, sev, desc in patterns
|
|
43
|
+
]
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
PATTERNS = _compile([
|
|
47
|
+
# ============================================================
|
|
48
|
+
# DANGER: Immediate threats - high confidence malicious
|
|
49
|
+
# ============================================================
|
|
50
|
+
|
|
51
|
+
# --- Hidden content (skill injection vectors) ---
|
|
52
|
+
|
|
53
|
+
(r'<!--[\s\S]*?(https?://(?!example\.com|localhost|127\.0\.0\.1)[^\s<>]+)[\s\S]*?-->',
|
|
54
|
+
'hidden_url_html', Severity.DANGER,
|
|
55
|
+
'Hidden URL in HTML comment - classic skill injection vector'),
|
|
56
|
+
|
|
57
|
+
(r'<!--[\s\S]*?(curl|wget|fetch|exec|eval|system)[\s\S]*?-->',
|
|
58
|
+
'hidden_command_html', Severity.DANGER,
|
|
59
|
+
'Hidden command in HTML comment - skill injection vector'),
|
|
60
|
+
|
|
61
|
+
# Prompt injection patterns in markdown/docs
|
|
62
|
+
(r'<!--[\s\S]*?(ignore|bypass|override|disable|skip)[\s\S]*?(security|warning|scan|check|filter|restrict)[\s\S]*?-->',
|
|
63
|
+
'prompt_injection_html', Severity.DANGER,
|
|
64
|
+
'Potential prompt injection - attempts to override security'),
|
|
65
|
+
|
|
66
|
+
(r'(IMPORTANT|SYSTEM|OVERRIDE|INSTRUCTION)[:\s]*(ignore|bypass|skip|disable)',
|
|
67
|
+
'prompt_injection_keyword', Severity.DANGER,
|
|
68
|
+
'Prompt injection keyword pattern - attempts to manipulate LLM'),
|
|
69
|
+
|
|
70
|
+
# Prompt injection suppression - require hidden context (HTML comments, etc.)
|
|
71
|
+
(r'<!--[^>]*(do not|don\'t|never)\s+(report|flag|warn|alert|scan|check)[^>]*-->',
|
|
72
|
+
'prompt_injection_suppress', Severity.DANGER,
|
|
73
|
+
'Hidden instruction to suppress security reporting'),
|
|
74
|
+
|
|
75
|
+
# --- Code execution ---
|
|
76
|
+
|
|
77
|
+
# eval/exec - function calls
|
|
78
|
+
(r'\beval\s*\([^)]+\)',
|
|
79
|
+
'eval_usage', Severity.DANGER,
|
|
80
|
+
'eval() usage - arbitrary code execution risk'),
|
|
81
|
+
|
|
82
|
+
(r'\bexec\s*\([^)]+\)',
|
|
83
|
+
'exec_usage', Severity.DANGER,
|
|
84
|
+
'exec() usage - arbitrary code execution risk'),
|
|
85
|
+
|
|
86
|
+
(r'\bcompile\s*\([^)]+["\'][^"\']*["\']\s*,\s*["\']exec["\']\s*\)',
|
|
87
|
+
'compile_exec', Severity.DANGER,
|
|
88
|
+
'compile() with exec mode - code execution'),
|
|
89
|
+
|
|
90
|
+
# Evasion-resistant: getattr-based eval/exec
|
|
91
|
+
(r'getattr\s*\([^)]*["\']+(eval|exec|compile|system)["\']',
|
|
92
|
+
'getattr_dangerous', Severity.DANGER,
|
|
93
|
+
'Dynamic access to dangerous function via getattr'),
|
|
94
|
+
|
|
95
|
+
(r'__builtins__\s*[\.\[]',
|
|
96
|
+
'builtins_access', Severity.DANGER,
|
|
97
|
+
'Direct __builtins__ access - likely evasion attempt'),
|
|
98
|
+
|
|
99
|
+
# String concatenation to build dangerous calls
|
|
100
|
+
(r'["\']ev["\']\s*\+\s*["\']al["\']\s*|["\']ex["\']\s*\+\s*["\']ec["\']',
|
|
101
|
+
'string_concat_evasion', Severity.DANGER,
|
|
102
|
+
'String concatenation building eval/exec - evasion attempt'),
|
|
103
|
+
|
|
104
|
+
# Nested getattr (double evasion)
|
|
105
|
+
(r'getattr\s*\(\s*getattr\s*\(',
|
|
106
|
+
'nested_getattr', Severity.DANGER,
|
|
107
|
+
'Nested getattr - likely evasion attempt'),
|
|
108
|
+
|
|
109
|
+
# Lambda with dangerous calls inside
|
|
110
|
+
(r'lambda[^:]*:\s*(eval|exec|compile|system|popen)\s*\(',
|
|
111
|
+
'lambda_dangerous', Severity.DANGER,
|
|
112
|
+
'Lambda containing dangerous function call'),
|
|
113
|
+
|
|
114
|
+
# __dict__ access to builtins
|
|
115
|
+
(r'__dict__\s*\[\s*["\']+(eval|exec|system|compile)["\']',
|
|
116
|
+
'dict_dangerous_access', Severity.DANGER,
|
|
117
|
+
'Dictionary access to dangerous function'),
|
|
118
|
+
|
|
119
|
+
# Unicode homoglyph evasion (common lookalikes for 'eval')
|
|
120
|
+
(r'[ℯеe][vѵν][aаɑ][lⅼ]\s*\(|[eеℯ][xхχ][eеℯ][cсς]\s*\(',
|
|
121
|
+
'unicode_homoglyph', Severity.DANGER,
|
|
122
|
+
'Unicode homoglyph evasion - looks like eval/exec'),
|
|
123
|
+
|
|
124
|
+
# --- Shell injection ---
|
|
125
|
+
|
|
126
|
+
(r'subprocess\.(run|call|Popen)\s*\([^)]*shell\s*=\s*True',
|
|
127
|
+
'shell_injection', Severity.DANGER,
|
|
128
|
+
'Shell=True in subprocess - command injection risk'),
|
|
129
|
+
|
|
130
|
+
(r'os\.system\s*\(',
|
|
131
|
+
'os_system', Severity.DANGER,
|
|
132
|
+
'os.system() - command injection risk'),
|
|
133
|
+
|
|
134
|
+
(r'os\.popen\s*\(',
|
|
135
|
+
'os_popen', Severity.DANGER,
|
|
136
|
+
'os.popen() - command injection risk'),
|
|
137
|
+
|
|
138
|
+
(r'commands\.(getoutput|getstatusoutput)\s*\(',
|
|
139
|
+
'commands_exec', Severity.DANGER,
|
|
140
|
+
'commands module execution - deprecated and dangerous'),
|
|
141
|
+
|
|
142
|
+
# Node.js
|
|
143
|
+
(r'child_process\.(exec|execSync|spawn|spawnSync)\s*\(',
|
|
144
|
+
'node_exec', Severity.DANGER,
|
|
145
|
+
'child_process execution - command injection risk'),
|
|
146
|
+
|
|
147
|
+
(r'new\s+Function\s*\(',
|
|
148
|
+
'js_function_constructor', Severity.DANGER,
|
|
149
|
+
'Function constructor - equivalent to eval()'),
|
|
150
|
+
|
|
151
|
+
(r'setTimeout\s*\(\s*["\']',
|
|
152
|
+
'settimeout_string', Severity.DANGER,
|
|
153
|
+
'setTimeout with string argument - implicit eval'),
|
|
154
|
+
|
|
155
|
+
(r'setInterval\s*\(\s*["\']',
|
|
156
|
+
'setinterval_string', Severity.DANGER,
|
|
157
|
+
'setInterval with string argument - implicit eval'),
|
|
158
|
+
|
|
159
|
+
# Shell backtick command substitution (in actual shell scripts, not docs)
|
|
160
|
+
# Only flag in .sh files or when clearly executing
|
|
161
|
+
(r'^\s*[A-Z_]+=`[^`]*\$\([^)]+\)[^`]*`',
|
|
162
|
+
'shell_backtick_subst', Severity.DANGER,
|
|
163
|
+
'Shell variable assignment with nested command substitution'),
|
|
164
|
+
|
|
165
|
+
# --- Data exfiltration (require variable interpolation, not placeholders) ---
|
|
166
|
+
|
|
167
|
+
# Curl/wget with actual variable expansion (not documentation examples)
|
|
168
|
+
(r'(curl|wget)\s+[^|&;\n]*?(\$\{?[A-Z_]*?(API.?KEY|SECRET|TOKEN|PASSWORD|CREDENTIAL|PRIVATE)[A-Z_]*\}?|\$\([^)]*?(api.?key|secret|token|password)\))',
|
|
169
|
+
'credential_exfil_curl', Severity.DANGER,
|
|
170
|
+
'Potential credential exfiltration via curl/wget with variable expansion'),
|
|
171
|
+
|
|
172
|
+
# Python requests with variable (not string literal placeholder)
|
|
173
|
+
(r'requests\.(get|post|put)\s*\([^)]*?[\+\{].*?(api.?key|secret|token|password)',
|
|
174
|
+
'credential_exfil_requests', Severity.DANGER,
|
|
175
|
+
'Potential credential exfiltration via requests with dynamic credential'),
|
|
176
|
+
|
|
177
|
+
# Any network call sending os.environ values
|
|
178
|
+
(r'(curl|wget|requests|fetch|http).*?os\.environ',
|
|
179
|
+
'credential_exfil_env', Severity.DANGER,
|
|
180
|
+
'Network call with environment variable access'),
|
|
181
|
+
|
|
182
|
+
# DNS exfiltration
|
|
183
|
+
(r'socket\.gethostbyname\s*\([^)]*\+',
|
|
184
|
+
'dns_exfil_gethostbyname', Severity.DANGER,
|
|
185
|
+
'Dynamic DNS lookup - potential data exfiltration'),
|
|
186
|
+
|
|
187
|
+
(r'dns\.resolver\.(resolve|query)\s*\([^)]*\+',
|
|
188
|
+
'dns_exfil_resolver', Severity.DANGER,
|
|
189
|
+
'Dynamic DNS resolution - potential data exfiltration'),
|
|
190
|
+
|
|
191
|
+
# Tor hidden service in actual URL (not regex pattern or documentation)
|
|
192
|
+
(r'https?://[a-z0-9]+\.onion',
|
|
193
|
+
'tor_hidden_service', Severity.DANGER,
|
|
194
|
+
'Tor hidden service URL - suspicious'),
|
|
195
|
+
|
|
196
|
+
# --- Obfuscation ---
|
|
197
|
+
|
|
198
|
+
(r'base64\.(b64decode|decodebytes)\s*\(',
|
|
199
|
+
'base64_decode', Severity.DANGER,
|
|
200
|
+
'Base64 decoding - often used to hide malicious payloads'),
|
|
201
|
+
|
|
202
|
+
(r'codecs\.(decode|encode)\s*\([^)]*rot',
|
|
203
|
+
'rot_encoding', Severity.DANGER,
|
|
204
|
+
'ROT encoding - obfuscation technique'),
|
|
205
|
+
|
|
206
|
+
(r'\\x[0-9a-fA-F]{2}(\\x[0-9a-fA-F]{2}){10,}',
|
|
207
|
+
'hex_encoded_string', Severity.DANGER,
|
|
208
|
+
'Long hex-encoded string - potential obfuscation'),
|
|
209
|
+
|
|
210
|
+
(r'\\u[0-9a-fA-F]{4}(\\u[0-9a-fA-F]{4}){10,}',
|
|
211
|
+
'unicode_encoded_string', Severity.DANGER,
|
|
212
|
+
'Long unicode-encoded string - potential obfuscation'),
|
|
213
|
+
|
|
214
|
+
(r'bytes\.fromhex\s*\(',
|
|
215
|
+
'bytes_fromhex', Severity.DANGER,
|
|
216
|
+
'bytes.fromhex() - often used for obfuscation'),
|
|
217
|
+
|
|
218
|
+
(r'chr\s*\(\s*\d+\s*\)\s*\+\s*chr\s*\(',
|
|
219
|
+
'chr_concatenation', Severity.DANGER,
|
|
220
|
+
'chr() concatenation - string obfuscation'),
|
|
221
|
+
|
|
222
|
+
# --- Path traversal (only in actual code contexts) ---
|
|
223
|
+
|
|
224
|
+
# Path traversal in function calls (open, read, etc.)
|
|
225
|
+
(r'(open|read|Path|os\.path|shutil)\s*\([^)]*\.\./\.\.',
|
|
226
|
+
'path_traversal_code', Severity.DANGER,
|
|
227
|
+
'Path traversal in code - directory escape attempt'),
|
|
228
|
+
|
|
229
|
+
# Path traversal in string that's being used (assigned or passed)
|
|
230
|
+
(r'=\s*["\'][^"\']*\.\./\.\.[^"\']*["\']',
|
|
231
|
+
'path_traversal_assigned', Severity.DANGER,
|
|
232
|
+
'Path traversal string assignment'),
|
|
233
|
+
|
|
234
|
+
# --- Raw network (unusual for skills) ---
|
|
235
|
+
|
|
236
|
+
(r'socket\.socket\s*\(',
|
|
237
|
+
'raw_socket', Severity.DANGER,
|
|
238
|
+
'Raw socket creation - unusual for agent skills'),
|
|
239
|
+
|
|
240
|
+
# --- Deserialization ---
|
|
241
|
+
|
|
242
|
+
(r'pickle\.(load|loads)\s*\(',
|
|
243
|
+
'pickle_load', Severity.DANGER,
|
|
244
|
+
'Pickle deserialization - RCE if untrusted data'),
|
|
245
|
+
|
|
246
|
+
(r'marshal\.(load|loads)\s*\(',
|
|
247
|
+
'marshal_load', Severity.DANGER,
|
|
248
|
+
'Marshal deserialization - code execution risk'),
|
|
249
|
+
|
|
250
|
+
(r'yaml\.(load|unsafe_load)\s*\([^)]*(?!Loader)',
|
|
251
|
+
'yaml_unsafe_load', Severity.DANGER,
|
|
252
|
+
'Unsafe YAML load - potential code execution'),
|
|
253
|
+
|
|
254
|
+
# Reverse shell patterns
|
|
255
|
+
(r'(bash|sh|nc|ncat|netcat)\s+.*?(-e|exec)\s+.*?(bash|sh|\/bin)',
|
|
256
|
+
'reverse_shell', Severity.DANGER,
|
|
257
|
+
'Potential reverse shell command'),
|
|
258
|
+
|
|
259
|
+
(r'python[23]?\s+.*?-c\s+["\'].*?(socket|subprocess|pty)',
|
|
260
|
+
'python_reverse_shell', Severity.DANGER,
|
|
261
|
+
'Python one-liner with network/shell - possible reverse shell'),
|
|
262
|
+
|
|
263
|
+
# Attempt to disable security features
|
|
264
|
+
(r'PYTHONDONTWRITEBYTECODE|PYTHONNOUSERSITE|--trusted-host|--disable-pip-version-check',
|
|
265
|
+
'security_bypass_env', Severity.WARN,
|
|
266
|
+
'Environment variable that may weaken security'),
|
|
267
|
+
|
|
268
|
+
# Steganography libraries (hiding data in images)
|
|
269
|
+
(r'(steganography|stegano|stepic)\.',
|
|
270
|
+
'steganography', Severity.DANGER,
|
|
271
|
+
'Steganography library - may hide malicious data in images'),
|
|
272
|
+
|
|
273
|
+
# ============================================================
|
|
274
|
+
# WARN: Suspicious but context-dependent
|
|
275
|
+
# ============================================================
|
|
276
|
+
|
|
277
|
+
# --- Network access ---
|
|
278
|
+
|
|
279
|
+
(r'requests\.(get|post|put|delete|patch)\s*\(',
|
|
280
|
+
'network_request', Severity.WARN,
|
|
281
|
+
'HTTP request - verify destination is expected'),
|
|
282
|
+
|
|
283
|
+
(r'urllib\.request\.(urlopen|urlretrieve)',
|
|
284
|
+
'urllib_request', Severity.WARN,
|
|
285
|
+
'URL fetch - verify destination is expected'),
|
|
286
|
+
|
|
287
|
+
(r'http\.client\.(HTTPConnection|HTTPSConnection)',
|
|
288
|
+
'http_client', Severity.WARN,
|
|
289
|
+
'HTTP client connection - verify destination'),
|
|
290
|
+
|
|
291
|
+
(r'aiohttp\.ClientSession',
|
|
292
|
+
'aiohttp_session', Severity.WARN,
|
|
293
|
+
'Async HTTP client - verify destination'),
|
|
294
|
+
|
|
295
|
+
(r'websockets?\.(connect|serve)',
|
|
296
|
+
'websocket', Severity.WARN,
|
|
297
|
+
'WebSocket connection - verify destination'),
|
|
298
|
+
|
|
299
|
+
(r'fetch\s*\(\s*["\']https?://',
|
|
300
|
+
'fetch_request', Severity.WARN,
|
|
301
|
+
'Fetch request - verify destination is expected'),
|
|
302
|
+
|
|
303
|
+
# --- File system ---
|
|
304
|
+
|
|
305
|
+
(r'open\s*\([^)]*["\']w',
|
|
306
|
+
'file_write', Severity.WARN,
|
|
307
|
+
'File write operation - verify path is expected'),
|
|
308
|
+
|
|
309
|
+
(r'\.write_text\s*\(|\.write_bytes\s*\(',
|
|
310
|
+
'pathlib_write', Severity.WARN,
|
|
311
|
+
'File write via pathlib - verify path is expected'),
|
|
312
|
+
|
|
313
|
+
(r'shutil\.(copy|move|rmtree)',
|
|
314
|
+
'shutil_operations', Severity.WARN,
|
|
315
|
+
'File system operations - verify paths'),
|
|
316
|
+
|
|
317
|
+
(r'os\.(remove|unlink|rmdir)\s*\(',
|
|
318
|
+
'file_deletion', Severity.WARN,
|
|
319
|
+
'File deletion - verify path is expected'),
|
|
320
|
+
|
|
321
|
+
# --- Environment ---
|
|
322
|
+
|
|
323
|
+
(r'os\.environ\s*[\[\.]|os\.getenv\s*\(',
|
|
324
|
+
'env_access', Severity.WARN,
|
|
325
|
+
'Environment variable access - verify which vars'),
|
|
326
|
+
|
|
327
|
+
(r'process\.env\.',
|
|
328
|
+
'node_env_access', Severity.WARN,
|
|
329
|
+
'Node.js environment access - verify which vars'),
|
|
330
|
+
|
|
331
|
+
(r'dotenv|load_dotenv',
|
|
332
|
+
'dotenv_usage', Severity.WARN,
|
|
333
|
+
'dotenv usage - loads environment from file'),
|
|
334
|
+
|
|
335
|
+
# --- Dynamic behavior ---
|
|
336
|
+
|
|
337
|
+
(r'__import__\s*\(',
|
|
338
|
+
'dynamic_import', Severity.WARN,
|
|
339
|
+
'Dynamic import - harder to audit statically'),
|
|
340
|
+
|
|
341
|
+
(r'importlib\.import_module\s*\(',
|
|
342
|
+
'importlib_import', Severity.WARN,
|
|
343
|
+
'Dynamic import via importlib'),
|
|
344
|
+
|
|
345
|
+
(r'globals\s*\(\s*\)\s*\[|locals\s*\(\s*\)\s*\[',
|
|
346
|
+
'globals_locals_access', Severity.WARN,
|
|
347
|
+
'Dynamic access via globals()/locals()'),
|
|
348
|
+
|
|
349
|
+
(r'setattr\s*\([^)]*,\s*[^"\']+,',
|
|
350
|
+
'dynamic_setattr', Severity.WARN,
|
|
351
|
+
'Dynamic setattr with variable name'),
|
|
352
|
+
|
|
353
|
+
# --- Crypto (not dangerous but notable) ---
|
|
354
|
+
|
|
355
|
+
(r'Crypto\.|cryptography\.',
|
|
356
|
+
'crypto_library', Severity.WARN,
|
|
357
|
+
'Cryptographic operations - verify implementation'),
|
|
358
|
+
|
|
359
|
+
# --- Path traversal (warn level - needs context) ---
|
|
360
|
+
|
|
361
|
+
# Path traversal in string literals (might be documentation)
|
|
362
|
+
(r'["\'][^"\']*\.\./\.\.[^"\']*["\']',
|
|
363
|
+
'path_traversal_string', Severity.WARN,
|
|
364
|
+
'Path traversal in string - verify if documentation or actual code'),
|
|
365
|
+
|
|
366
|
+
# ============================================================
|
|
367
|
+
# INFO: Notable but usually fine
|
|
368
|
+
# ============================================================
|
|
369
|
+
|
|
370
|
+
(r'subprocess\.(run|call|Popen)\s*\(\s*\[',
|
|
371
|
+
'subprocess_list', Severity.INFO,
|
|
372
|
+
'Subprocess with list args (safer than shell=True)'),
|
|
373
|
+
|
|
374
|
+
(r'open\s*\([^)]*["\']r["\']',
|
|
375
|
+
'file_read', Severity.INFO,
|
|
376
|
+
'File read operation'),
|
|
377
|
+
|
|
378
|
+
(r'hashlib\.',
|
|
379
|
+
'hashlib_usage', Severity.INFO,
|
|
380
|
+
'Hashing operations'),
|
|
381
|
+
|
|
382
|
+
(r'logging\.(debug|info|warning|error)',
|
|
383
|
+
'logging_usage', Severity.INFO,
|
|
384
|
+
'Logging statements'),
|
|
385
|
+
])
|
|
386
|
+
|
|
387
|
+
|
|
388
|
+
# Dependency vulnerability patterns (for requirements.txt, package.json)
|
|
389
|
+
DANGEROUS_PACKAGES = {
|
|
390
|
+
# Known typosquats (real attacks from PyPI history)
|
|
391
|
+
'reqeusts', 'requets', 'request', 'requsts', 'reequests', # requests
|
|
392
|
+
'crytpography', 'cryptograpy', 'crypotgraphy', # cryptography
|
|
393
|
+
'djago', 'djnago', 'dajngo', # django
|
|
394
|
+
'flaask', 'flassk', # flask
|
|
395
|
+
'coloarama', 'colorma', # colorama
|
|
396
|
+
'urllib', 'urllib2', # stdlib impersonation
|
|
397
|
+
'setup-tools', 'setuptols', # setuptools
|
|
398
|
+
|
|
399
|
+
# Explicitly malicious package names
|
|
400
|
+
'keylogger', 'rat', 'trojan', 'backdoor', 'exploit', 'malware',
|
|
401
|
+
'credential-stealer', 'password-stealer', 'token-grabber',
|
|
402
|
+
|
|
403
|
+
# Offensive security tools (may indicate malicious skill)
|
|
404
|
+
'pwntools', 'impacket', 'mitmproxy', 'scapy',
|
|
405
|
+
}
|
|
406
|
+
|
|
407
|
+
SUSPICIOUS_PACKAGE_PATTERNS = [
|
|
408
|
+
r'^git\+https?://(?!github\.com/(python|pypa|psf)/)', # Non-standard git deps
|
|
409
|
+
r'^git\+ssh://', # SSH git deps
|
|
410
|
+
r'@[a-f0-9]{40}$', # Pinned to specific commit (could be malicious)
|
|
411
|
+
]
|
|
412
|
+
|
|
413
|
+
|
|
414
|
+
def is_documentation_context(line: str, filename: str) -> bool:
|
|
415
|
+
"""Check if a line appears to be documentation rather than code."""
|
|
416
|
+
line_lower = line.lower().strip()
|
|
417
|
+
|
|
418
|
+
# Markdown and documentation files - be more lenient
|
|
419
|
+
if filename.endswith(('.md', '.markdown', '.rst', '.txt')):
|
|
420
|
+
# All findings in markdown reference files are likely documentation
|
|
421
|
+
if '/references/' in filename or '/examples/' in filename or '/docs/' in filename:
|
|
422
|
+
return True
|
|
423
|
+
|
|
424
|
+
# Bullet point describing a pattern
|
|
425
|
+
if line_lower.startswith(('-', '*', '•', '>')):
|
|
426
|
+
return True
|
|
427
|
+
# Inside a documentation list
|
|
428
|
+
if '`' in line and ('pattern' in line_lower or 'example' in line_lower or 'detect' in line_lower):
|
|
429
|
+
return True
|
|
430
|
+
# API documentation with placeholder values
|
|
431
|
+
if 'your_api_key' in line_lower or 'your_token' in line_lower or '<api' in line_lower:
|
|
432
|
+
return True
|
|
433
|
+
# Curl examples in documentation (common pattern)
|
|
434
|
+
if line_lower.startswith('curl ') and 'authorization' in line_lower:
|
|
435
|
+
return True
|
|
436
|
+
# Security documentation showing bad examples
|
|
437
|
+
if 'vulnerable' in line_lower or 'dangerous' in line_lower or 'insecure' in line_lower:
|
|
438
|
+
return True
|
|
439
|
+
if 'bad' in line_lower and ('example' in line_lower or 'practice' in line_lower):
|
|
440
|
+
return True
|
|
441
|
+
# Table rows (|...|)
|
|
442
|
+
if '|' in line and line.count('|') >= 2:
|
|
443
|
+
return True
|
|
444
|
+
|
|
445
|
+
# YAML config files (semgrep rules, etc.) - rule definitions, not code
|
|
446
|
+
if filename.endswith(('.yaml', '.yml')):
|
|
447
|
+
if line_lower.startswith('- pattern'):
|
|
448
|
+
return True
|
|
449
|
+
if 'semgrep' in filename.lower() or '/rules/' in filename.lower():
|
|
450
|
+
return True
|
|
451
|
+
|
|
452
|
+
# Comment explaining what to look for
|
|
453
|
+
if line_lower.startswith('#') and ('detect' in line_lower or 'pattern' in line_lower or 'check' in line_lower):
|
|
454
|
+
return True
|
|
455
|
+
|
|
456
|
+
return False
|
|
457
|
+
|
|
458
|
+
|
|
459
|
+
def is_in_code_block(content: str, position: int, filename: str) -> bool:
|
|
460
|
+
"""Check if a position is inside a markdown code block (``` ... ```)."""
|
|
461
|
+
if not filename.endswith(('.md', '.markdown')):
|
|
462
|
+
return False
|
|
463
|
+
|
|
464
|
+
# Count code block delimiters before this position
|
|
465
|
+
before = content[:position]
|
|
466
|
+
fence_count = before.count('```')
|
|
467
|
+
|
|
468
|
+
# Odd count means we're inside a code block
|
|
469
|
+
return fence_count % 2 == 1
|
|
470
|
+
|
|
471
|
+
|
|
472
|
+
def is_inside_string_literal(context: str, match: str) -> bool:
|
|
473
|
+
"""Check if a match appears to be inside a string literal (dict key, etc.)."""
|
|
474
|
+
match_start = match[:15].replace('(', r'\(').replace(')', r'\)')
|
|
475
|
+
|
|
476
|
+
# Look for patterns like "eval(": or 'marshal.load':
|
|
477
|
+
# Dict key pattern: "function_name":
|
|
478
|
+
if re.search(rf'["\'][^"\']*?{re.escape(match_start[:10])}[^"\']*?["\']\s*:', context):
|
|
479
|
+
return True
|
|
480
|
+
|
|
481
|
+
# String containing the function name (description, etc.)
|
|
482
|
+
if re.search(rf':\s*["\'][^"\']*?{re.escape(match_start[:10])}', context):
|
|
483
|
+
return True
|
|
484
|
+
|
|
485
|
+
return False
|
|
486
|
+
|
|
487
|
+
|
|
488
|
+
def scan_content(content: str, filename: str) -> List[Finding]:
|
|
489
|
+
"""Scan content against all patterns."""
|
|
490
|
+
findings = []
|
|
491
|
+
lines = content.split('\n')
|
|
492
|
+
|
|
493
|
+
for pattern_def in PATTERNS:
|
|
494
|
+
for match in pattern_def.pattern.finditer(content):
|
|
495
|
+
line_start = content.count('\n', 0, match.start()) + 1
|
|
496
|
+
|
|
497
|
+
if line_start <= len(lines):
|
|
498
|
+
context = lines[line_start - 1].strip()
|
|
499
|
+
else:
|
|
500
|
+
context = ""
|
|
501
|
+
|
|
502
|
+
# Check if this is documentation/string literal context - downgrade severity
|
|
503
|
+
severity = pattern_def.severity
|
|
504
|
+
is_docs = (
|
|
505
|
+
is_documentation_context(context, filename) or
|
|
506
|
+
is_in_code_block(content, match.start(), filename)
|
|
507
|
+
)
|
|
508
|
+
is_string = is_inside_string_literal(context, match.group(0))
|
|
509
|
+
|
|
510
|
+
if (is_docs or is_string) and severity == Severity.DANGER:
|
|
511
|
+
severity = Severity.INFO # Downgrade documentation/string mentions
|
|
512
|
+
|
|
513
|
+
note = ""
|
|
514
|
+
if is_docs:
|
|
515
|
+
note = " (in documentation)"
|
|
516
|
+
elif is_string:
|
|
517
|
+
note = " (in string literal)"
|
|
518
|
+
|
|
519
|
+
findings.append(Finding(
|
|
520
|
+
severity=severity,
|
|
521
|
+
pattern_id=pattern_def.pattern_id,
|
|
522
|
+
description=pattern_def.description + note,
|
|
523
|
+
file=filename,
|
|
524
|
+
line=line_start,
|
|
525
|
+
match=match.group(0)[:100],
|
|
526
|
+
context=context[:200],
|
|
527
|
+
))
|
|
528
|
+
|
|
529
|
+
return findings
|
|
530
|
+
|
|
531
|
+
|
|
532
|
+
def scan_dependencies(content: str, filename: str) -> List[Finding]:
|
|
533
|
+
"""Scan dependency files for suspicious packages."""
|
|
534
|
+
findings = []
|
|
535
|
+
lines = content.split('\n')
|
|
536
|
+
|
|
537
|
+
for i, line in enumerate(lines, 1):
|
|
538
|
+
line_clean = line.strip().lower()
|
|
539
|
+
|
|
540
|
+
# Skip comments and empty lines
|
|
541
|
+
if not line_clean or line_clean.startswith('#') or line_clean.startswith('//'):
|
|
542
|
+
continue
|
|
543
|
+
|
|
544
|
+
# Extract package name
|
|
545
|
+
pkg_name = line_clean.split('==')[0].split('>=')[0].split('<=')[0].split('[')[0].strip()
|
|
546
|
+
|
|
547
|
+
# Check against dangerous packages
|
|
548
|
+
if pkg_name in DANGEROUS_PACKAGES:
|
|
549
|
+
findings.append(Finding(
|
|
550
|
+
severity=Severity.DANGER,
|
|
551
|
+
pattern_id='dangerous_package',
|
|
552
|
+
description=f'Potentially dangerous or typosquatted package: {pkg_name}',
|
|
553
|
+
file=filename,
|
|
554
|
+
line=i,
|
|
555
|
+
match=line.strip()[:100],
|
|
556
|
+
context=line.strip(),
|
|
557
|
+
))
|
|
558
|
+
|
|
559
|
+
# Check suspicious patterns
|
|
560
|
+
for pattern in SUSPICIOUS_PACKAGE_PATTERNS:
|
|
561
|
+
if re.search(pattern, line_clean):
|
|
562
|
+
findings.append(Finding(
|
|
563
|
+
severity=Severity.WARN,
|
|
564
|
+
pattern_id='suspicious_dependency',
|
|
565
|
+
description='Suspicious dependency source',
|
|
566
|
+
file=filename,
|
|
567
|
+
line=i,
|
|
568
|
+
match=line.strip()[:100],
|
|
569
|
+
context=line.strip(),
|
|
570
|
+
))
|
|
571
|
+
break
|
|
572
|
+
|
|
573
|
+
return findings
|