isnad-scan 0.3.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- isnad_scan/__init__.py +2 -0
- isnad_scan/ast_analyzer.py +374 -0
- isnad_scan/binary_scanner.py +230 -0
- isnad_scan/cli.py +221 -0
- isnad_scan/cve_checker.py +238 -0
- isnad_scan/js_analyzer.py +154 -0
- isnad_scan/patterns.py +573 -0
- isnad_scan/scanner.py +342 -0
- isnad_scan-0.3.0.dist-info/METADATA +186 -0
- isnad_scan-0.3.0.dist-info/RECORD +12 -0
- isnad_scan-0.3.0.dist-info/WHEEL +4 -0
- isnad_scan-0.3.0.dist-info/entry_points.txt +2 -0
isnad_scan/__init__.py
ADDED
|
@@ -0,0 +1,374 @@
|
|
|
1
|
+
"""AST-based analysis for Python files.
|
|
2
|
+
|
|
3
|
+
Catches evasion techniques that regex cannot:
|
|
4
|
+
- Import aliasing (from os import system as s)
|
|
5
|
+
- Nested/chained attribute access
|
|
6
|
+
- Dynamic function construction
|
|
7
|
+
- Control flow to dangerous calls
|
|
8
|
+
"""
|
|
9
|
+
import ast
|
|
10
|
+
from dataclasses import dataclass
|
|
11
|
+
from pathlib import Path
|
|
12
|
+
from typing import List, Set, Dict, Optional, Tuple
|
|
13
|
+
|
|
14
|
+
from .patterns import Finding, Severity
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
# Dangerous functions we track through the AST
|
|
18
|
+
DANGEROUS_FUNCTIONS = {
|
|
19
|
+
'eval', 'exec', 'compile', '__import__',
|
|
20
|
+
'getattr', 'setattr', 'delattr',
|
|
21
|
+
'open', # Track for path traversal
|
|
22
|
+
}
|
|
23
|
+
|
|
24
|
+
DANGEROUS_CALLABLES = {
|
|
25
|
+
# builtins
|
|
26
|
+
('builtins', 'eval'), ('builtins', 'exec'), ('builtins', 'compile'),
|
|
27
|
+
('builtins', '__import__'), ('builtins', 'open'),
|
|
28
|
+
# os module
|
|
29
|
+
('os', 'system'), ('os', 'popen'), ('os', 'spawn'),
|
|
30
|
+
('os', 'spawnl'), ('os', 'spawnle'), ('os', 'spawnlp'), ('os', 'spawnlpe'),
|
|
31
|
+
('os', 'spawnv'), ('os', 'spawnve'), ('os', 'spawnvp'), ('os', 'spawnvpe'),
|
|
32
|
+
('os', 'execl'), ('os', 'execle'), ('os', 'execlp'), ('os', 'execlpe'),
|
|
33
|
+
('os', 'execv'), ('os', 'execve'), ('os', 'execvp'), ('os', 'execvpe'),
|
|
34
|
+
('os', 'remove'), ('os', 'unlink'), ('os', 'rmdir'),
|
|
35
|
+
# subprocess
|
|
36
|
+
('subprocess', 'run'), ('subprocess', 'call'), ('subprocess', 'Popen'),
|
|
37
|
+
('subprocess', 'check_call'), ('subprocess', 'check_output'),
|
|
38
|
+
# pickle
|
|
39
|
+
('pickle', 'load'), ('pickle', 'loads'),
|
|
40
|
+
('_pickle', 'load'), ('_pickle', 'loads'),
|
|
41
|
+
# marshal
|
|
42
|
+
('marshal', 'load'), ('marshal', 'loads'),
|
|
43
|
+
# socket
|
|
44
|
+
('socket', 'socket'), ('socket', 'create_connection'),
|
|
45
|
+
# requests
|
|
46
|
+
('requests', 'get'), ('requests', 'post'), ('requests', 'put'),
|
|
47
|
+
('requests', 'delete'), ('requests', 'patch'),
|
|
48
|
+
# urllib
|
|
49
|
+
('urllib.request', 'urlopen'), ('urllib.request', 'urlretrieve'),
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
# Modules that when imported suggest suspicious activity
|
|
53
|
+
SUSPICIOUS_MODULES = {
|
|
54
|
+
'ctypes', # Memory manipulation
|
|
55
|
+
'mmap', # Memory mapping
|
|
56
|
+
'pty', # Pseudo-terminal (reverse shells)
|
|
57
|
+
'fcntl', # File control
|
|
58
|
+
'resource', # Resource limits manipulation
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
@dataclass
|
|
63
|
+
class ImportedName:
|
|
64
|
+
"""Tracks an imported name and its origin."""
|
|
65
|
+
name: str # Local name (e.g., 's')
|
|
66
|
+
module: str # Source module (e.g., 'os')
|
|
67
|
+
original: str # Original name (e.g., 'system')
|
|
68
|
+
line: int
|
|
69
|
+
|
|
70
|
+
@property
|
|
71
|
+
def qualified(self) -> Tuple[str, str]:
|
|
72
|
+
return (self.module, self.original)
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
class DangerousCallVisitor(ast.NodeVisitor):
|
|
76
|
+
"""AST visitor that tracks imports and finds dangerous calls."""
|
|
77
|
+
|
|
78
|
+
def __init__(self, filename: str):
|
|
79
|
+
self.filename = filename
|
|
80
|
+
self.findings: List[Finding] = []
|
|
81
|
+
|
|
82
|
+
# Track imported names: local_name -> ImportedName
|
|
83
|
+
self.imports: Dict[str, ImportedName] = {}
|
|
84
|
+
|
|
85
|
+
# Track module aliases: alias -> module_name
|
|
86
|
+
self.module_aliases: Dict[str, str] = {}
|
|
87
|
+
|
|
88
|
+
# Track variable assignments that might hold dangerous refs
|
|
89
|
+
self.dangerous_vars: Dict[str, Tuple[str, int]] = {} # name -> (reason, line)
|
|
90
|
+
|
|
91
|
+
def visit_Import(self, node: ast.Import):
|
|
92
|
+
"""Handle: import os, import subprocess as sp"""
|
|
93
|
+
for alias in node.names:
|
|
94
|
+
module = alias.name
|
|
95
|
+
local_name = alias.asname or alias.name
|
|
96
|
+
|
|
97
|
+
self.module_aliases[local_name] = module
|
|
98
|
+
|
|
99
|
+
# Check for suspicious module imports
|
|
100
|
+
base_module = module.split('.')[0]
|
|
101
|
+
if base_module in SUSPICIOUS_MODULES:
|
|
102
|
+
self.findings.append(Finding(
|
|
103
|
+
severity=Severity.WARN,
|
|
104
|
+
pattern_id='suspicious_module_import',
|
|
105
|
+
description=f'Import of suspicious module: {module}',
|
|
106
|
+
file=self.filename,
|
|
107
|
+
line=node.lineno,
|
|
108
|
+
match=f'import {module}',
|
|
109
|
+
context=f'import {module}' + (f' as {alias.asname}' if alias.asname else ''),
|
|
110
|
+
))
|
|
111
|
+
|
|
112
|
+
self.generic_visit(node)
|
|
113
|
+
|
|
114
|
+
def visit_ImportFrom(self, node: ast.ImportFrom):
|
|
115
|
+
"""Handle: from os import system, from os import system as s"""
|
|
116
|
+
module = node.module or ''
|
|
117
|
+
|
|
118
|
+
for alias in node.names:
|
|
119
|
+
original_name = alias.name
|
|
120
|
+
local_name = alias.asname or alias.name
|
|
121
|
+
|
|
122
|
+
# Track the import
|
|
123
|
+
self.imports[local_name] = ImportedName(
|
|
124
|
+
name=local_name,
|
|
125
|
+
module=module,
|
|
126
|
+
original=original_name,
|
|
127
|
+
line=node.lineno,
|
|
128
|
+
)
|
|
129
|
+
|
|
130
|
+
# Check if importing something dangerous
|
|
131
|
+
if (module, original_name) in DANGEROUS_CALLABLES:
|
|
132
|
+
severity = Severity.DANGER if original_name in ('system', 'popen', 'eval', 'exec') else Severity.WARN
|
|
133
|
+
|
|
134
|
+
# If aliased, it's more suspicious (trying to hide)
|
|
135
|
+
if alias.asname:
|
|
136
|
+
severity = Severity.DANGER
|
|
137
|
+
desc = f'Dangerous function imported with alias: {module}.{original_name} as {local_name}'
|
|
138
|
+
else:
|
|
139
|
+
desc = f'Dangerous function imported: {module}.{original_name}'
|
|
140
|
+
|
|
141
|
+
self.findings.append(Finding(
|
|
142
|
+
severity=severity,
|
|
143
|
+
pattern_id='dangerous_import',
|
|
144
|
+
description=desc,
|
|
145
|
+
file=self.filename,
|
|
146
|
+
line=node.lineno,
|
|
147
|
+
match=f'from {module} import {original_name}',
|
|
148
|
+
context=ast.unparse(node) if hasattr(ast, 'unparse') else '',
|
|
149
|
+
))
|
|
150
|
+
|
|
151
|
+
# Check for suspicious module
|
|
152
|
+
base_module = module.split('.')[0] if module else ''
|
|
153
|
+
if base_module in SUSPICIOUS_MODULES:
|
|
154
|
+
self.findings.append(Finding(
|
|
155
|
+
severity=Severity.WARN,
|
|
156
|
+
pattern_id='suspicious_module_import',
|
|
157
|
+
description=f'Import from suspicious module: {module}',
|
|
158
|
+
file=self.filename,
|
|
159
|
+
line=node.lineno,
|
|
160
|
+
match=f'from {module} import {original_name}',
|
|
161
|
+
))
|
|
162
|
+
|
|
163
|
+
self.generic_visit(node)
|
|
164
|
+
|
|
165
|
+
def visit_Assign(self, node: ast.Assign):
|
|
166
|
+
"""Track assignments that might store dangerous references."""
|
|
167
|
+
# Check for: x = eval, x = __builtins__.eval, etc.
|
|
168
|
+
if isinstance(node.value, ast.Name):
|
|
169
|
+
value_name = node.value.id
|
|
170
|
+
if value_name in ('eval', 'exec', 'compile', 'system', 'open'):
|
|
171
|
+
for target in node.targets:
|
|
172
|
+
if isinstance(target, ast.Name):
|
|
173
|
+
self.dangerous_vars[target.id] = (f'alias for {value_name}', node.lineno)
|
|
174
|
+
self.findings.append(Finding(
|
|
175
|
+
severity=Severity.DANGER,
|
|
176
|
+
pattern_id='dangerous_alias',
|
|
177
|
+
description=f'Variable assigned dangerous builtin: {target.id} = {value_name}',
|
|
178
|
+
file=self.filename,
|
|
179
|
+
line=node.lineno,
|
|
180
|
+
match=f'{target.id} = {value_name}',
|
|
181
|
+
))
|
|
182
|
+
|
|
183
|
+
# Check for: x = getattr(module, 'dangerous')
|
|
184
|
+
if isinstance(node.value, ast.Call):
|
|
185
|
+
call = node.value
|
|
186
|
+
if isinstance(call.func, ast.Name) and call.func.id == 'getattr':
|
|
187
|
+
if len(call.args) >= 2 and isinstance(call.args[1], ast.Constant):
|
|
188
|
+
attr_name = call.args[1].value
|
|
189
|
+
if attr_name in ('eval', 'exec', 'compile', 'system', 'popen'):
|
|
190
|
+
for target in node.targets:
|
|
191
|
+
if isinstance(target, ast.Name):
|
|
192
|
+
self.dangerous_vars[target.id] = (f'getattr result for {attr_name}', node.lineno)
|
|
193
|
+
|
|
194
|
+
self.generic_visit(node)
|
|
195
|
+
|
|
196
|
+
def visit_Call(self, node: ast.Call):
|
|
197
|
+
"""Check function calls for dangerous patterns."""
|
|
198
|
+
|
|
199
|
+
# Direct call to dangerous name: eval(...), exec(...)
|
|
200
|
+
if isinstance(node.func, ast.Name):
|
|
201
|
+
func_name = node.func.id
|
|
202
|
+
|
|
203
|
+
# Check if it's an imported dangerous function
|
|
204
|
+
if func_name in self.imports:
|
|
205
|
+
imp = self.imports[func_name]
|
|
206
|
+
if imp.qualified in DANGEROUS_CALLABLES:
|
|
207
|
+
self.findings.append(Finding(
|
|
208
|
+
severity=Severity.DANGER,
|
|
209
|
+
pattern_id='dangerous_call_aliased',
|
|
210
|
+
description=f'Call to aliased dangerous function: {func_name} (imported from {imp.module}.{imp.original})',
|
|
211
|
+
file=self.filename,
|
|
212
|
+
line=node.lineno,
|
|
213
|
+
match=f'{func_name}(...)',
|
|
214
|
+
))
|
|
215
|
+
|
|
216
|
+
# Check if it's a variable holding a dangerous reference
|
|
217
|
+
if func_name in self.dangerous_vars:
|
|
218
|
+
reason, _ = self.dangerous_vars[func_name]
|
|
219
|
+
self.findings.append(Finding(
|
|
220
|
+
severity=Severity.DANGER,
|
|
221
|
+
pattern_id='dangerous_var_call',
|
|
222
|
+
description=f'Call via dangerous variable: {func_name} ({reason})',
|
|
223
|
+
file=self.filename,
|
|
224
|
+
line=node.lineno,
|
|
225
|
+
match=f'{func_name}(...)',
|
|
226
|
+
))
|
|
227
|
+
|
|
228
|
+
# Attribute call: os.system(...), subprocess.run(..., shell=True)
|
|
229
|
+
if isinstance(node.func, ast.Attribute):
|
|
230
|
+
self._check_attribute_call(node)
|
|
231
|
+
|
|
232
|
+
# Check subprocess calls for shell=True
|
|
233
|
+
self._check_subprocess_shell(node)
|
|
234
|
+
|
|
235
|
+
# Check for eval/exec with dynamic input
|
|
236
|
+
self._check_dynamic_code_exec(node)
|
|
237
|
+
|
|
238
|
+
self.generic_visit(node)
|
|
239
|
+
|
|
240
|
+
def _check_attribute_call(self, node: ast.Call):
|
|
241
|
+
"""Check attribute-based calls like os.system(), module.func()"""
|
|
242
|
+
attr = node.func
|
|
243
|
+
if not isinstance(attr, ast.Attribute):
|
|
244
|
+
return
|
|
245
|
+
|
|
246
|
+
# Get the full attribute chain
|
|
247
|
+
parts = []
|
|
248
|
+
current = attr
|
|
249
|
+
while isinstance(current, ast.Attribute):
|
|
250
|
+
parts.append(current.attr)
|
|
251
|
+
current = current.value
|
|
252
|
+
|
|
253
|
+
if isinstance(current, ast.Name):
|
|
254
|
+
parts.append(current.id)
|
|
255
|
+
|
|
256
|
+
parts.reverse()
|
|
257
|
+
|
|
258
|
+
if len(parts) >= 2:
|
|
259
|
+
# Check if first part is an alias
|
|
260
|
+
base = parts[0]
|
|
261
|
+
if base in self.module_aliases:
|
|
262
|
+
module = self.module_aliases[base]
|
|
263
|
+
elif base in self.imports:
|
|
264
|
+
module = self.imports[base].module
|
|
265
|
+
else:
|
|
266
|
+
module = base
|
|
267
|
+
|
|
268
|
+
func = parts[-1]
|
|
269
|
+
|
|
270
|
+
# Check against dangerous callables
|
|
271
|
+
if (module, func) in DANGEROUS_CALLABLES:
|
|
272
|
+
self.findings.append(Finding(
|
|
273
|
+
severity=Severity.WARN, # WARN because might be legitimate
|
|
274
|
+
pattern_id='dangerous_module_call',
|
|
275
|
+
description=f'Call to potentially dangerous function: {module}.{func}',
|
|
276
|
+
file=self.filename,
|
|
277
|
+
line=node.lineno,
|
|
278
|
+
match='.'.join(parts) + '(...)',
|
|
279
|
+
))
|
|
280
|
+
|
|
281
|
+
def _check_subprocess_shell(self, node: ast.Call):
|
|
282
|
+
"""Check for subprocess calls with shell=True."""
|
|
283
|
+
# Get function name
|
|
284
|
+
func_name = None
|
|
285
|
+
if isinstance(node.func, ast.Name):
|
|
286
|
+
if node.func.id in self.imports:
|
|
287
|
+
imp = self.imports[node.func.id]
|
|
288
|
+
if imp.module == 'subprocess':
|
|
289
|
+
func_name = imp.original
|
|
290
|
+
elif isinstance(node.func, ast.Attribute):
|
|
291
|
+
if isinstance(node.func.value, ast.Name):
|
|
292
|
+
base = node.func.value.id
|
|
293
|
+
module = self.module_aliases.get(base, base)
|
|
294
|
+
if module == 'subprocess':
|
|
295
|
+
func_name = node.func.attr
|
|
296
|
+
|
|
297
|
+
if func_name in ('run', 'call', 'Popen', 'check_call', 'check_output'):
|
|
298
|
+
# Check for shell=True in keyword arguments
|
|
299
|
+
for kw in node.keywords:
|
|
300
|
+
if kw.arg == 'shell':
|
|
301
|
+
if isinstance(kw.value, ast.Constant) and kw.value.value is True:
|
|
302
|
+
self.findings.append(Finding(
|
|
303
|
+
severity=Severity.DANGER,
|
|
304
|
+
pattern_id='subprocess_shell_true',
|
|
305
|
+
description='subprocess call with shell=True - command injection risk',
|
|
306
|
+
file=self.filename,
|
|
307
|
+
line=node.lineno,
|
|
308
|
+
match=f'subprocess.{func_name}(..., shell=True)',
|
|
309
|
+
))
|
|
310
|
+
elif not isinstance(kw.value, ast.Constant):
|
|
311
|
+
# shell= with a variable - suspicious
|
|
312
|
+
self.findings.append(Finding(
|
|
313
|
+
severity=Severity.WARN,
|
|
314
|
+
pattern_id='subprocess_shell_dynamic',
|
|
315
|
+
description='subprocess call with dynamic shell parameter',
|
|
316
|
+
file=self.filename,
|
|
317
|
+
line=node.lineno,
|
|
318
|
+
match=f'subprocess.{func_name}(..., shell=?)',
|
|
319
|
+
))
|
|
320
|
+
|
|
321
|
+
def _check_dynamic_code_exec(self, node: ast.Call):
|
|
322
|
+
"""Check if eval/exec is called with non-literal input."""
|
|
323
|
+
func_name = None
|
|
324
|
+
if isinstance(node.func, ast.Name):
|
|
325
|
+
func_name = node.func.id
|
|
326
|
+
|
|
327
|
+
if func_name in ('eval', 'exec', 'compile'):
|
|
328
|
+
if node.args:
|
|
329
|
+
first_arg = node.args[0]
|
|
330
|
+
# If first arg is not a string literal, it's dynamic
|
|
331
|
+
if not isinstance(first_arg, ast.Constant):
|
|
332
|
+
self.findings.append(Finding(
|
|
333
|
+
severity=Severity.DANGER,
|
|
334
|
+
pattern_id='dynamic_code_execution',
|
|
335
|
+
description=f'{func_name}() called with dynamic input - high risk',
|
|
336
|
+
file=self.filename,
|
|
337
|
+
line=node.lineno,
|
|
338
|
+
match=f'{func_name}(<dynamic>)',
|
|
339
|
+
))
|
|
340
|
+
|
|
341
|
+
|
|
342
|
+
def analyze_python_ast(content: str, filename: str) -> List[Finding]:
|
|
343
|
+
"""Analyze Python code using AST for dangerous patterns."""
|
|
344
|
+
try:
|
|
345
|
+
tree = ast.parse(content, filename=filename)
|
|
346
|
+
except SyntaxError as e:
|
|
347
|
+
# Return a finding about syntax error (might be obfuscation)
|
|
348
|
+
return [Finding(
|
|
349
|
+
severity=Severity.WARN,
|
|
350
|
+
pattern_id='python_syntax_error',
|
|
351
|
+
description=f'Python syntax error (might be obfuscated or Python 2): {e}',
|
|
352
|
+
file=filename,
|
|
353
|
+
line=e.lineno or 1,
|
|
354
|
+
match=str(e),
|
|
355
|
+
)]
|
|
356
|
+
except Exception as e:
|
|
357
|
+
return [Finding(
|
|
358
|
+
severity=Severity.INFO,
|
|
359
|
+
pattern_id='ast_parse_error',
|
|
360
|
+
description=f'Could not parse Python AST: {e}',
|
|
361
|
+
file=filename,
|
|
362
|
+
line=1,
|
|
363
|
+
match=str(e),
|
|
364
|
+
)]
|
|
365
|
+
|
|
366
|
+
visitor = DangerousCallVisitor(filename)
|
|
367
|
+
visitor.visit(tree)
|
|
368
|
+
|
|
369
|
+
return visitor.findings
|
|
370
|
+
|
|
371
|
+
|
|
372
|
+
def is_python_file(filename: str) -> bool:
|
|
373
|
+
"""Check if a file is a Python file."""
|
|
374
|
+
return filename.endswith(('.py', '.pyw', '.pyi'))
|
|
@@ -0,0 +1,230 @@
|
|
|
1
|
+
"""Binary file scanning for embedded scripts and suspicious content.
|
|
2
|
+
|
|
3
|
+
Scans:
|
|
4
|
+
- .pyc files (compiled Python)
|
|
5
|
+
- Executables for embedded scripts
|
|
6
|
+
- Images for steganography indicators
|
|
7
|
+
- Archives for suspicious contents
|
|
8
|
+
"""
|
|
9
|
+
import re
|
|
10
|
+
import struct
|
|
11
|
+
from pathlib import Path
|
|
12
|
+
from typing import List, Optional, Tuple
|
|
13
|
+
|
|
14
|
+
from .patterns import Finding, Severity
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
# Suspicious strings to look for in binaries
|
|
18
|
+
SUSPICIOUS_BINARY_PATTERNS = [
|
|
19
|
+
# URLs (potential C2, exfiltration)
|
|
20
|
+
(rb'https?://[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}(?:/[^\s\x00]*)?', 'embedded_url', Severity.WARN),
|
|
21
|
+
|
|
22
|
+
# IP addresses
|
|
23
|
+
(rb'\b(?:\d{1,3}\.){3}\d{1,3}\b', 'embedded_ip', Severity.INFO),
|
|
24
|
+
|
|
25
|
+
# Shell commands
|
|
26
|
+
(rb'/bin/(?:ba)?sh', 'embedded_shell_path', Severity.WARN),
|
|
27
|
+
(rb'(?:curl|wget|nc|netcat)\s+', 'embedded_network_cmd', Severity.DANGER),
|
|
28
|
+
(rb'rm\s+-rf', 'embedded_rm_rf', Severity.DANGER),
|
|
29
|
+
|
|
30
|
+
# Python code patterns
|
|
31
|
+
(rb'import\s+(?:os|subprocess|socket|requests)', 'embedded_python_import', Severity.WARN),
|
|
32
|
+
(rb'eval\s*\(|exec\s*\(', 'embedded_eval_exec', Severity.DANGER),
|
|
33
|
+
(rb'__import__\s*\(', 'embedded_dynamic_import', Severity.DANGER),
|
|
34
|
+
|
|
35
|
+
# JavaScript patterns
|
|
36
|
+
(rb'require\s*\(\s*["\']child_process', 'embedded_child_process', Severity.DANGER),
|
|
37
|
+
(rb'new\s+Function\s*\(', 'embedded_function_constructor', Severity.DANGER),
|
|
38
|
+
|
|
39
|
+
# Base64-encoded shell commands (common obfuscation)
|
|
40
|
+
(rb'(?:YmFz|L2Jpbi|Y3Vy|d2dl|bmV0Y2F0)', 'embedded_b64_cmd', Severity.WARN), # bash, /bin, curl, wget, netcat
|
|
41
|
+
|
|
42
|
+
# Crypto wallet addresses (exfiltration targets)
|
|
43
|
+
(rb'(?:bc1|[13])[a-zA-HJ-NP-Z0-9]{25,39}', 'embedded_btc_address', Severity.WARN),
|
|
44
|
+
(rb'0x[a-fA-F0-9]{40}', 'embedded_eth_address', Severity.INFO),
|
|
45
|
+
|
|
46
|
+
# Private key patterns
|
|
47
|
+
(rb'-----BEGIN (?:RSA |EC |OPENSSH )?PRIVATE KEY-----', 'embedded_private_key', Severity.DANGER),
|
|
48
|
+
(rb'AKIA[0-9A-Z]{16}', 'embedded_aws_key', Severity.DANGER),
|
|
49
|
+
]
|
|
50
|
+
|
|
51
|
+
# Binary file extensions to scan
|
|
52
|
+
BINARY_EXTENSIONS = {
|
|
53
|
+
'.pyc', '.pyo', # Compiled Python
|
|
54
|
+
'.so', '.dll', '.dylib', # Shared libraries
|
|
55
|
+
'.exe', '.bin', # Executables
|
|
56
|
+
'.whl', '.egg', # Python packages
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
# Image extensions (for stego check)
|
|
60
|
+
IMAGE_EXTENSIONS = {'.png', '.jpg', '.jpeg', '.gif', '.bmp', '.webp'}
|
|
61
|
+
|
|
62
|
+
# Archive extensions
|
|
63
|
+
ARCHIVE_EXTENSIONS = {'.zip', '.tar', '.gz', '.tgz', '.bz2', '.7z', '.rar'}
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
def is_binary_file(path: Path) -> bool:
|
|
67
|
+
"""Check if a file should be treated as binary."""
|
|
68
|
+
return path.suffix.lower() in BINARY_EXTENSIONS
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
def is_image_file(path: Path) -> bool:
|
|
72
|
+
"""Check if a file is an image."""
|
|
73
|
+
return path.suffix.lower() in IMAGE_EXTENSIONS
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
def scan_binary_content(content: bytes, filename: str) -> List[Finding]:
|
|
77
|
+
"""Scan binary content for suspicious patterns."""
|
|
78
|
+
findings = []
|
|
79
|
+
|
|
80
|
+
for pattern, pattern_id, severity in SUSPICIOUS_BINARY_PATTERNS:
|
|
81
|
+
for match in re.finditer(pattern, content):
|
|
82
|
+
# Get approximate position
|
|
83
|
+
pos = match.start()
|
|
84
|
+
|
|
85
|
+
# Get context (surrounding bytes as printable string)
|
|
86
|
+
start = max(0, pos - 20)
|
|
87
|
+
end = min(len(content), pos + 100)
|
|
88
|
+
context_bytes = content[start:end]
|
|
89
|
+
# Convert to printable string
|
|
90
|
+
context = ''.join(chr(b) if 32 <= b < 127 else '.' for b in context_bytes)
|
|
91
|
+
|
|
92
|
+
matched = match.group(0)
|
|
93
|
+
try:
|
|
94
|
+
match_str = matched.decode('utf-8', errors='replace')[:100]
|
|
95
|
+
except Exception:
|
|
96
|
+
match_str = str(matched[:50])
|
|
97
|
+
|
|
98
|
+
findings.append(Finding(
|
|
99
|
+
severity=severity,
|
|
100
|
+
pattern_id=f'binary_{pattern_id}',
|
|
101
|
+
description=f'Suspicious content in binary file: {pattern_id}',
|
|
102
|
+
file=filename,
|
|
103
|
+
line=0, # No line numbers in binary
|
|
104
|
+
match=match_str,
|
|
105
|
+
context=context[:200],
|
|
106
|
+
))
|
|
107
|
+
|
|
108
|
+
return findings
|
|
109
|
+
|
|
110
|
+
|
|
111
|
+
def check_pyc_file(content: bytes, filename: str) -> List[Finding]:
|
|
112
|
+
"""Check compiled Python file for suspicious indicators."""
|
|
113
|
+
findings = []
|
|
114
|
+
|
|
115
|
+
# Basic PYC validation
|
|
116
|
+
if len(content) < 16:
|
|
117
|
+
findings.append(Finding(
|
|
118
|
+
severity=Severity.WARN,
|
|
119
|
+
pattern_id='binary_invalid_pyc',
|
|
120
|
+
description='Invalid or truncated .pyc file',
|
|
121
|
+
file=filename,
|
|
122
|
+
line=0,
|
|
123
|
+
match='<invalid pyc>',
|
|
124
|
+
))
|
|
125
|
+
return findings
|
|
126
|
+
|
|
127
|
+
# Check magic number (varies by Python version)
|
|
128
|
+
# We won't validate specific versions, just note it exists
|
|
129
|
+
findings.append(Finding(
|
|
130
|
+
severity=Severity.INFO,
|
|
131
|
+
pattern_id='binary_pyc_file',
|
|
132
|
+
description='Compiled Python bytecode - harder to audit than source',
|
|
133
|
+
file=filename,
|
|
134
|
+
line=0,
|
|
135
|
+
match=f'.pyc file ({len(content)} bytes)',
|
|
136
|
+
))
|
|
137
|
+
|
|
138
|
+
# Scan the bytecode for embedded strings
|
|
139
|
+
binary_findings = scan_binary_content(content, filename)
|
|
140
|
+
findings.extend(binary_findings)
|
|
141
|
+
|
|
142
|
+
return findings
|
|
143
|
+
|
|
144
|
+
|
|
145
|
+
def check_image_for_stego(content: bytes, filename: str) -> List[Finding]:
|
|
146
|
+
"""Basic check for steganography indicators in images."""
|
|
147
|
+
findings = []
|
|
148
|
+
|
|
149
|
+
# Check for data appended after image end markers
|
|
150
|
+
# PNG ends with IEND chunk
|
|
151
|
+
if filename.lower().endswith('.png'):
|
|
152
|
+
iend_pos = content.find(b'IEND')
|
|
153
|
+
if iend_pos != -1 and iend_pos + 12 < len(content):
|
|
154
|
+
# There's significant data after IEND
|
|
155
|
+
extra_data = len(content) - (iend_pos + 12)
|
|
156
|
+
if extra_data > 100:
|
|
157
|
+
findings.append(Finding(
|
|
158
|
+
severity=Severity.WARN,
|
|
159
|
+
pattern_id='binary_image_extra_data',
|
|
160
|
+
description=f'PNG has {extra_data} bytes after IEND marker - possible hidden data',
|
|
161
|
+
file=filename,
|
|
162
|
+
line=0,
|
|
163
|
+
match=f'{extra_data} bytes extra',
|
|
164
|
+
))
|
|
165
|
+
|
|
166
|
+
# JPEG ends with FFD9
|
|
167
|
+
if filename.lower().endswith(('.jpg', '.jpeg')):
|
|
168
|
+
eoi_pos = content.rfind(b'\xff\xd9')
|
|
169
|
+
if eoi_pos != -1 and eoi_pos + 2 < len(content):
|
|
170
|
+
extra_data = len(content) - (eoi_pos + 2)
|
|
171
|
+
if extra_data > 100:
|
|
172
|
+
findings.append(Finding(
|
|
173
|
+
severity=Severity.WARN,
|
|
174
|
+
pattern_id='binary_image_extra_data',
|
|
175
|
+
description=f'JPEG has {extra_data} bytes after EOI marker - possible hidden data',
|
|
176
|
+
file=filename,
|
|
177
|
+
line=0,
|
|
178
|
+
match=f'{extra_data} bytes extra',
|
|
179
|
+
))
|
|
180
|
+
|
|
181
|
+
# Check for embedded scripts in image
|
|
182
|
+
script_patterns = [
|
|
183
|
+
b'<script', b'<?php', b'<%', b'#!/',
|
|
184
|
+
b'import ', b'eval(', b'exec(',
|
|
185
|
+
]
|
|
186
|
+
for pattern in script_patterns:
|
|
187
|
+
if pattern in content:
|
|
188
|
+
findings.append(Finding(
|
|
189
|
+
severity=Severity.DANGER,
|
|
190
|
+
pattern_id='binary_image_embedded_script',
|
|
191
|
+
description=f'Image contains embedded script pattern: {pattern.decode("utf-8", errors="replace")}',
|
|
192
|
+
file=filename,
|
|
193
|
+
line=0,
|
|
194
|
+
match=pattern.decode('utf-8', errors='replace'),
|
|
195
|
+
))
|
|
196
|
+
|
|
197
|
+
return findings
|
|
198
|
+
|
|
199
|
+
|
|
200
|
+
def scan_binary_file(path: Path) -> Tuple[List[Finding], Optional[str]]:
|
|
201
|
+
"""Scan a binary file for security issues."""
|
|
202
|
+
try:
|
|
203
|
+
content = path.read_bytes()
|
|
204
|
+
except Exception as e:
|
|
205
|
+
return [], f"Could not read binary file: {e}"
|
|
206
|
+
|
|
207
|
+
# Size limit for binary files (5MB)
|
|
208
|
+
if len(content) > 5 * 1024 * 1024:
|
|
209
|
+
return [Finding(
|
|
210
|
+
severity=Severity.INFO,
|
|
211
|
+
pattern_id='binary_large_file',
|
|
212
|
+
description='Large binary file - skipped detailed scan',
|
|
213
|
+
file=str(path),
|
|
214
|
+
line=0,
|
|
215
|
+
match=f'{len(content)} bytes',
|
|
216
|
+
)], None
|
|
217
|
+
|
|
218
|
+
findings = []
|
|
219
|
+
filename = str(path)
|
|
220
|
+
|
|
221
|
+
# Type-specific checks
|
|
222
|
+
if path.suffix.lower() in ('.pyc', '.pyo'):
|
|
223
|
+
findings.extend(check_pyc_file(content, filename))
|
|
224
|
+
elif is_image_file(path):
|
|
225
|
+
findings.extend(check_image_for_stego(content, filename))
|
|
226
|
+
else:
|
|
227
|
+
# General binary scan
|
|
228
|
+
findings.extend(scan_binary_content(content, filename))
|
|
229
|
+
|
|
230
|
+
return findings, None
|