@intentsolutionsio/penetration-tester 2.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude-plugin/plugin.json +19 -0
- package/LICENSE +21 -0
- package/README.md +160 -0
- package/commands/pentest.md +84 -0
- package/commands/scan-headers.md +43 -0
- package/package.json +40 -0
- package/skills/performing-penetration-testing/SKILL.md +266 -0
- package/skills/performing-penetration-testing/references/OWASP_TOP_10.md +284 -0
- package/skills/performing-penetration-testing/references/REMEDIATION_PLAYBOOK.md +452 -0
- package/skills/performing-penetration-testing/references/SECURITY_HEADERS.md +365 -0
- package/skills/performing-penetration-testing/scripts/code_security_scanner.py +780 -0
- package/skills/performing-penetration-testing/scripts/dependency_auditor.py +777 -0
- package/skills/performing-penetration-testing/scripts/requirements.txt +4 -0
- package/skills/performing-penetration-testing/scripts/security_scanner.py +1166 -0
- package/skills/performing-penetration-testing/scripts/setup_pentest_env.sh +199 -0
|
@@ -0,0 +1,780 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Static analysis security scanner combining Bandit and custom regex pattern detection.
|
|
3
|
+
|
|
4
|
+
Scans source code for common security vulnerabilities including hardcoded secrets,
|
|
5
|
+
SQL injection, command injection, insecure deserialization, and weak cryptography.
|
|
6
|
+
|
|
7
|
+
Usage:
|
|
8
|
+
python3 code_security_scanner.py /path/to/code [options]
|
|
9
|
+
|
|
10
|
+
Options:
|
|
11
|
+
--tools bandit,regex Comma-separated list of scan engines (default: both)
|
|
12
|
+
--output findings.json Write JSON report to file
|
|
13
|
+
--severity low Minimum severity threshold (critical, high, medium, low)
|
|
14
|
+
--exclude "test_*" Comma-separated glob patterns to exclude
|
|
15
|
+
--verbose Print detailed progress information
|
|
16
|
+
|
|
17
|
+
Exit codes:
|
|
18
|
+
0 - No critical or high severity findings
|
|
19
|
+
1 - Critical or high severity findings detected
|
|
20
|
+
2 - Scanner error (missing tools, invalid arguments, etc.)
|
|
21
|
+
"""
|
|
22
|
+
|
|
23
|
+
from __future__ import annotations
|
|
24
|
+
|
|
25
|
+
import argparse
|
|
26
|
+
import fnmatch
|
|
27
|
+
import json
|
|
28
|
+
import os
|
|
29
|
+
import re
|
|
30
|
+
import subprocess
|
|
31
|
+
import sys
|
|
32
|
+
from pathlib import Path
|
|
33
|
+
from typing import Any, Optional
|
|
34
|
+
|
|
35
|
+
# ---------------------------------------------------------------------------
|
|
36
|
+
# Constants
|
|
37
|
+
# ---------------------------------------------------------------------------
|
|
38
|
+
|
|
39
|
+
SEVERITY_ORDER: dict[str, int] = {
|
|
40
|
+
"critical": 0,
|
|
41
|
+
"high": 1,
|
|
42
|
+
"medium": 2,
|
|
43
|
+
"low": 3,
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
SCANNABLE_EXTENSIONS: set[str] = {
|
|
47
|
+
".py", ".js", ".ts", ".jsx", ".tsx",
|
|
48
|
+
".java", ".rb", ".go", ".php", ".sh",
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
SKIP_DIRS: set[str] = {
|
|
52
|
+
".git", "node_modules", "__pycache__", ".venv", "venv",
|
|
53
|
+
".tox", ".mypy_cache", ".pytest_cache", "dist", "build",
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
BANDIT_TIMEOUT_SECONDS: int = 120
|
|
57
|
+
|
|
58
|
+
# ---------------------------------------------------------------------------
|
|
59
|
+
# Compiled regex patterns
|
|
60
|
+
# ---------------------------------------------------------------------------
|
|
61
|
+
|
|
62
|
+
# Each entry: (compiled_pattern, category, severity, confidence, title, remediation, cwe)
|
|
63
|
+
|
|
64
|
+
_HARDCODED_SECRET_PATTERNS: list[tuple[re.Pattern[str], str, str, str, str, str, str | None]] = [
|
|
65
|
+
(
|
|
66
|
+
re.compile(r"""api[_\-]?key\s*[=:]\s*["'][A-Za-z0-9]{20,}""", re.IGNORECASE),
|
|
67
|
+
"hardcoded-secret", "high", "medium",
|
|
68
|
+
"Hardcoded API key detected",
|
|
69
|
+
"Move API keys to environment variables or a secrets manager.",
|
|
70
|
+
"CWE-798",
|
|
71
|
+
),
|
|
72
|
+
(
|
|
73
|
+
re.compile(r"""AKIA[0-9A-Z]{16}"""),
|
|
74
|
+
"hardcoded-secret", "critical", "high",
|
|
75
|
+
"AWS Access Key ID detected",
|
|
76
|
+
"Rotate the exposed key immediately and use IAM roles or environment variables.",
|
|
77
|
+
"CWE-798",
|
|
78
|
+
),
|
|
79
|
+
(
|
|
80
|
+
re.compile(r"""password\s*[=:]\s*["'](?!["']$)(?!\s*$)(?!<%=)(?!\$\{)(?!\{\{)[^"']+["']""", re.IGNORECASE),
|
|
81
|
+
"hardcoded-secret", "high", "medium",
|
|
82
|
+
"Hardcoded password detected",
|
|
83
|
+
"Use environment variables or a secrets manager instead of hardcoded passwords.",
|
|
84
|
+
"CWE-798",
|
|
85
|
+
),
|
|
86
|
+
(
|
|
87
|
+
re.compile(r"""-----BEGIN\s+(?:RSA\s+|EC\s+|DSA\s+)?PRIVATE\s+KEY-----"""),
|
|
88
|
+
"hardcoded-secret", "critical", "high",
|
|
89
|
+
"Private key embedded in source code",
|
|
90
|
+
"Remove the private key from source and store it in a secure vault.",
|
|
91
|
+
"CWE-321",
|
|
92
|
+
),
|
|
93
|
+
(
|
|
94
|
+
re.compile(
|
|
95
|
+
r"""(?:secret|token|bearer)\s*[=:]\s*["'][A-Za-z0-9+/=]{20,}""",
|
|
96
|
+
re.IGNORECASE,
|
|
97
|
+
),
|
|
98
|
+
"hardcoded-secret", "high", "medium",
|
|
99
|
+
"Hardcoded secret or token detected",
|
|
100
|
+
"Store secrets in environment variables or a dedicated secrets manager.",
|
|
101
|
+
"CWE-798",
|
|
102
|
+
),
|
|
103
|
+
]
|
|
104
|
+
|
|
105
|
+
_SQL_INJECTION_PATTERNS: list[tuple[re.Pattern[str], str, str, str, str, str, str | None]] = [
|
|
106
|
+
(
|
|
107
|
+
re.compile(
|
|
108
|
+
r"""(?:execute|cursor|query)\s*\(\s*f["'].*(?:%s|%d|\{)""",
|
|
109
|
+
re.IGNORECASE,
|
|
110
|
+
),
|
|
111
|
+
"sql-injection", "high", "high",
|
|
112
|
+
"Potential SQL injection via string formatting",
|
|
113
|
+
"Use parameterized queries or prepared statements instead of string formatting.",
|
|
114
|
+
"CWE-89",
|
|
115
|
+
),
|
|
116
|
+
(
|
|
117
|
+
re.compile(r"""["']SELECT\s+.*["']\s*\+\s*""", re.IGNORECASE),
|
|
118
|
+
"sql-injection", "high", "medium",
|
|
119
|
+
"SQL query built with string concatenation (SELECT)",
|
|
120
|
+
"Use parameterized queries instead of string concatenation.",
|
|
121
|
+
"CWE-89",
|
|
122
|
+
),
|
|
123
|
+
(
|
|
124
|
+
re.compile(r"""["']INSERT\s+.*["']\s*\+\s*""", re.IGNORECASE),
|
|
125
|
+
"sql-injection", "high", "medium",
|
|
126
|
+
"SQL query built with string concatenation (INSERT)",
|
|
127
|
+
"Use parameterized queries instead of string concatenation.",
|
|
128
|
+
"CWE-89",
|
|
129
|
+
),
|
|
130
|
+
]
|
|
131
|
+
|
|
132
|
+
_COMMAND_INJECTION_PATTERNS: list[tuple[re.Pattern[str], str, str, str, str, str, str | None]] = [
|
|
133
|
+
(
|
|
134
|
+
re.compile(r"""os\.system\("""),
|
|
135
|
+
"command-injection", "high", "high",
|
|
136
|
+
"Use of os.system() allows shell command injection",
|
|
137
|
+
"Use subprocess.run() with a list of arguments and shell=False.",
|
|
138
|
+
"CWE-78",
|
|
139
|
+
),
|
|
140
|
+
(
|
|
141
|
+
re.compile(r"""subprocess\.(?:call|run|Popen)\(.*shell\s*=\s*True"""),
|
|
142
|
+
"command-injection", "high", "high",
|
|
143
|
+
"Subprocess call with shell=True enables command injection",
|
|
144
|
+
"Pass commands as a list with shell=False instead of shell=True.",
|
|
145
|
+
"CWE-78",
|
|
146
|
+
),
|
|
147
|
+
(
|
|
148
|
+
re.compile(r"""\beval\("""),
|
|
149
|
+
"command-injection", "medium", "medium",
|
|
150
|
+
"Use of eval() can execute arbitrary code",
|
|
151
|
+
"Avoid eval(). Use ast.literal_eval() for data parsing or refactor logic.",
|
|
152
|
+
"CWE-95",
|
|
153
|
+
),
|
|
154
|
+
(
|
|
155
|
+
re.compile(r"""\bexec\("""),
|
|
156
|
+
"command-injection", "medium", "medium",
|
|
157
|
+
"Use of exec() can execute arbitrary code",
|
|
158
|
+
"Avoid exec(). Refactor to use safer alternatives.",
|
|
159
|
+
"CWE-95",
|
|
160
|
+
),
|
|
161
|
+
]
|
|
162
|
+
|
|
163
|
+
_DESERIALIZATION_PATTERNS: list[tuple[re.Pattern[str], str, str, str, str, str, str | None]] = [
|
|
164
|
+
(
|
|
165
|
+
re.compile(r"""pickle\.loads?\("""),
|
|
166
|
+
"insecure-deserialization", "high", "high",
|
|
167
|
+
"Insecure deserialization with pickle",
|
|
168
|
+
"Avoid pickle for untrusted data. Use JSON or a safe serialization format.",
|
|
169
|
+
"CWE-502",
|
|
170
|
+
),
|
|
171
|
+
(
|
|
172
|
+
re.compile(r"""yaml\.load\((?!.*Loader\s*=\s*(?:Safe|Base)Loader)"""),
|
|
173
|
+
"insecure-deserialization", "high", "high",
|
|
174
|
+
"Unsafe YAML loading without SafeLoader",
|
|
175
|
+
"Use yaml.safe_load() or pass Loader=SafeLoader to yaml.load().",
|
|
176
|
+
"CWE-502",
|
|
177
|
+
),
|
|
178
|
+
(
|
|
179
|
+
re.compile(r"""marshal\.loads?\("""),
|
|
180
|
+
"insecure-deserialization", "high", "medium",
|
|
181
|
+
"Insecure deserialization with marshal",
|
|
182
|
+
"Avoid marshal for untrusted data. Use JSON or a safe serialization format.",
|
|
183
|
+
"CWE-502",
|
|
184
|
+
),
|
|
185
|
+
]
|
|
186
|
+
|
|
187
|
+
_CRYPTO_NETWORK_PATTERNS: list[tuple[re.Pattern[str], str, str, str, str, str, str | None]] = [
|
|
188
|
+
(
|
|
189
|
+
re.compile(r"""verify\s*=\s*False"""),
|
|
190
|
+
"insecure-transport", "medium", "high",
|
|
191
|
+
"SSL/TLS certificate verification disabled",
|
|
192
|
+
"Enable certificate verification. Set verify=True or provide a CA bundle.",
|
|
193
|
+
"CWE-295",
|
|
194
|
+
),
|
|
195
|
+
(
|
|
196
|
+
re.compile(r"""\bMD5\b|\.md5\(""", re.IGNORECASE),
|
|
197
|
+
"weak-crypto", "medium", "medium",
|
|
198
|
+
"Use of weak MD5 hashing algorithm",
|
|
199
|
+
"Use SHA-256 or stronger hashing. For passwords, use bcrypt or Argon2.",
|
|
200
|
+
"CWE-328",
|
|
201
|
+
),
|
|
202
|
+
(
|
|
203
|
+
re.compile(r"""\bSHA1\b|\.sha1\(""", re.IGNORECASE),
|
|
204
|
+
"weak-crypto", "medium", "medium",
|
|
205
|
+
"Use of weak SHA-1 hashing algorithm",
|
|
206
|
+
"Use SHA-256 or stronger hashing. For passwords, use bcrypt or Argon2.",
|
|
207
|
+
"CWE-328",
|
|
208
|
+
),
|
|
209
|
+
(
|
|
210
|
+
re.compile(r"""http://(?!localhost|127\.0\.0\.1|0\.0\.0\.0|\[::1\])"""),
|
|
211
|
+
"insecure-transport", "medium", "low",
|
|
212
|
+
"Insecure HTTP URL (not HTTPS)",
|
|
213
|
+
"Use HTTPS for all external communications.",
|
|
214
|
+
"CWE-319",
|
|
215
|
+
),
|
|
216
|
+
]
|
|
217
|
+
|
|
218
|
+
ALL_REGEX_PATTERNS = (
|
|
219
|
+
_HARDCODED_SECRET_PATTERNS
|
|
220
|
+
+ _SQL_INJECTION_PATTERNS
|
|
221
|
+
+ _COMMAND_INJECTION_PATTERNS
|
|
222
|
+
+ _DESERIALIZATION_PATTERNS
|
|
223
|
+
+ _CRYPTO_NETWORK_PATTERNS
|
|
224
|
+
)
|
|
225
|
+
|
|
226
|
+
|
|
227
|
+
# ---------------------------------------------------------------------------
|
|
228
|
+
# Utility helpers
|
|
229
|
+
# ---------------------------------------------------------------------------
|
|
230
|
+
|
|
231
|
+
def _log(message: str, verbose: bool = True) -> None:
|
|
232
|
+
"""Print a progress message to stderr."""
|
|
233
|
+
if verbose:
|
|
234
|
+
print(f"[scanner] {message}", file=sys.stderr)
|
|
235
|
+
|
|
236
|
+
|
|
237
|
+
def _is_binary_file(filepath: Path) -> bool:
|
|
238
|
+
"""Return True if file appears to be binary (contains null bytes in first 1KB)."""
|
|
239
|
+
try:
|
|
240
|
+
with open(filepath, "rb") as fh:
|
|
241
|
+
chunk = fh.read(1024)
|
|
242
|
+
return b"\x00" in chunk
|
|
243
|
+
except (OSError, PermissionError):
|
|
244
|
+
return True
|
|
245
|
+
|
|
246
|
+
|
|
247
|
+
def _should_exclude(filepath: Path, exclude_patterns: list[str] | None) -> bool:
|
|
248
|
+
"""Check if a file matches any exclusion glob pattern."""
|
|
249
|
+
if not exclude_patterns:
|
|
250
|
+
return False
|
|
251
|
+
name = filepath.name
|
|
252
|
+
rel = str(filepath)
|
|
253
|
+
for pattern in exclude_patterns:
|
|
254
|
+
if fnmatch.fnmatch(name, pattern) or fnmatch.fnmatch(rel, pattern):
|
|
255
|
+
return True
|
|
256
|
+
return False
|
|
257
|
+
|
|
258
|
+
|
|
259
|
+
def _severity_at_or_above(severity: str, threshold: str) -> bool:
|
|
260
|
+
"""Return True if severity meets or exceeds the threshold."""
|
|
261
|
+
return SEVERITY_ORDER.get(severity, 99) <= SEVERITY_ORDER.get(threshold, 99)
|
|
262
|
+
|
|
263
|
+
|
|
264
|
+
def _normalize_bandit_severity(raw: str) -> str:
|
|
265
|
+
"""Map Bandit severity strings to our canonical levels."""
|
|
266
|
+
mapping = {
|
|
267
|
+
"HIGH": "high",
|
|
268
|
+
"MEDIUM": "medium",
|
|
269
|
+
"LOW": "low",
|
|
270
|
+
"UNDEFINED": "low",
|
|
271
|
+
}
|
|
272
|
+
return mapping.get(raw.upper(), "low")
|
|
273
|
+
|
|
274
|
+
|
|
275
|
+
def _normalize_bandit_confidence(raw: str) -> str:
|
|
276
|
+
"""Map Bandit confidence strings to canonical levels."""
|
|
277
|
+
return raw.lower() if raw.lower() in ("high", "medium", "low") else "low"
|
|
278
|
+
|
|
279
|
+
|
|
280
|
+
# ---------------------------------------------------------------------------
|
|
281
|
+
# Bandit scanning
|
|
282
|
+
# ---------------------------------------------------------------------------
|
|
283
|
+
|
|
284
|
+
def run_bandit_scan(
|
|
285
|
+
directory: Path,
|
|
286
|
+
exclude_patterns: list[str] | None = None,
|
|
287
|
+
verbose: bool = False,
|
|
288
|
+
) -> list[dict[str, Any]]:
|
|
289
|
+
"""
|
|
290
|
+
Run Bandit static analysis on a directory and return structured findings.
|
|
291
|
+
|
|
292
|
+
If Bandit is not installed, prints installation instructions and returns
|
|
293
|
+
an empty list rather than raising an exception.
|
|
294
|
+
"""
|
|
295
|
+
cmd: list[str] = ["bandit", "-r", str(directory), "-f", "json", "-q"]
|
|
296
|
+
|
|
297
|
+
if exclude_patterns:
|
|
298
|
+
# Bandit's -x flag accepts comma-separated paths/globs
|
|
299
|
+
cmd.extend(["-x", ",".join(exclude_patterns)])
|
|
300
|
+
|
|
301
|
+
_log(f"Running bandit on {directory} ...", verbose)
|
|
302
|
+
|
|
303
|
+
try:
|
|
304
|
+
result = subprocess.run(
|
|
305
|
+
cmd,
|
|
306
|
+
capture_output=True,
|
|
307
|
+
text=True,
|
|
308
|
+
timeout=BANDIT_TIMEOUT_SECONDS,
|
|
309
|
+
)
|
|
310
|
+
except FileNotFoundError:
|
|
311
|
+
print(
|
|
312
|
+
"[scanner] Bandit is not installed.\n"
|
|
313
|
+
" Install with: pip install bandit\n"
|
|
314
|
+
" Or: pipx install bandit\n"
|
|
315
|
+
" Skipping bandit scan.",
|
|
316
|
+
file=sys.stderr,
|
|
317
|
+
)
|
|
318
|
+
return []
|
|
319
|
+
except subprocess.TimeoutExpired:
|
|
320
|
+
print(
|
|
321
|
+
f"[scanner] Bandit scan timed out after {BANDIT_TIMEOUT_SECONDS}s. "
|
|
322
|
+
"Consider narrowing the scan scope.",
|
|
323
|
+
file=sys.stderr,
|
|
324
|
+
)
|
|
325
|
+
return []
|
|
326
|
+
|
|
327
|
+
# Bandit returns exit code 1 when it finds issues, which is expected.
|
|
328
|
+
# Only treat missing JSON output as an error.
|
|
329
|
+
stdout = result.stdout.strip()
|
|
330
|
+
if not stdout:
|
|
331
|
+
_log("Bandit produced no output (no Python files or no findings).", verbose)
|
|
332
|
+
return []
|
|
333
|
+
|
|
334
|
+
try:
|
|
335
|
+
data = json.loads(stdout)
|
|
336
|
+
except json.JSONDecodeError as exc:
|
|
337
|
+
print(
|
|
338
|
+
f"[scanner] Failed to parse bandit JSON output: {exc}",
|
|
339
|
+
file=sys.stderr,
|
|
340
|
+
)
|
|
341
|
+
return []
|
|
342
|
+
|
|
343
|
+
findings: list[dict[str, Any]] = []
|
|
344
|
+
for issue in data.get("results", []):
|
|
345
|
+
findings.append({
|
|
346
|
+
"tool": "bandit",
|
|
347
|
+
"file": str(Path(issue.get("filename", "unknown")).resolve()),
|
|
348
|
+
"line": issue.get("line_number", 0),
|
|
349
|
+
"severity": _normalize_bandit_severity(issue.get("issue_severity", "LOW")),
|
|
350
|
+
"confidence": _normalize_bandit_confidence(issue.get("issue_confidence", "LOW")),
|
|
351
|
+
"category": issue.get("test_id", "unknown"),
|
|
352
|
+
"title": issue.get("test_name", "Unknown issue"),
|
|
353
|
+
"detail": issue.get("issue_text", ""),
|
|
354
|
+
"remediation": "",
|
|
355
|
+
"cwe": (
|
|
356
|
+
f"CWE-{issue['issue_cwe']['id']}"
|
|
357
|
+
if issue.get("issue_cwe", {}).get("id")
|
|
358
|
+
else None
|
|
359
|
+
),
|
|
360
|
+
})
|
|
361
|
+
|
|
362
|
+
_log(f"Bandit found {len(findings)} issue(s).", verbose)
|
|
363
|
+
return findings
|
|
364
|
+
|
|
365
|
+
|
|
366
|
+
# ---------------------------------------------------------------------------
|
|
367
|
+
# Regex-based scanning
|
|
368
|
+
# ---------------------------------------------------------------------------
|
|
369
|
+
|
|
370
|
+
def run_regex_scan(
|
|
371
|
+
directory: Path,
|
|
372
|
+
exclude_patterns: list[str] | None = None,
|
|
373
|
+
verbose: bool = False,
|
|
374
|
+
) -> list[dict[str, Any]]:
|
|
375
|
+
"""
|
|
376
|
+
Walk the directory tree and scan source files against compiled regex
|
|
377
|
+
patterns for common security vulnerabilities.
|
|
378
|
+
|
|
379
|
+
Skips binary files, hidden/vendored directories, and files matching
|
|
380
|
+
exclusion patterns.
|
|
381
|
+
"""
|
|
382
|
+
findings: list[dict[str, Any]] = []
|
|
383
|
+
files_scanned = 0
|
|
384
|
+
|
|
385
|
+
_log(f"Running regex scan on {directory} ...", verbose)
|
|
386
|
+
|
|
387
|
+
for root, dirs, files in os.walk(directory):
|
|
388
|
+
# Prune directories we never want to enter (modifying dirs in-place)
|
|
389
|
+
dirs[:] = [d for d in dirs if d not in SKIP_DIRS and not d.startswith(".")]
|
|
390
|
+
|
|
391
|
+
for filename in files:
|
|
392
|
+
filepath = Path(root) / filename
|
|
393
|
+
|
|
394
|
+
# Extension filter
|
|
395
|
+
if filepath.suffix.lower() not in SCANNABLE_EXTENSIONS:
|
|
396
|
+
continue
|
|
397
|
+
|
|
398
|
+
# Exclusion filter
|
|
399
|
+
rel_path = filepath.relative_to(directory)
|
|
400
|
+
if _should_exclude(rel_path, exclude_patterns):
|
|
401
|
+
continue
|
|
402
|
+
|
|
403
|
+
# Skip binary files
|
|
404
|
+
if _is_binary_file(filepath):
|
|
405
|
+
continue
|
|
406
|
+
|
|
407
|
+
try:
|
|
408
|
+
lines = filepath.read_text(encoding="utf-8", errors="replace").splitlines()
|
|
409
|
+
except (OSError, PermissionError):
|
|
410
|
+
continue
|
|
411
|
+
|
|
412
|
+
files_scanned += 1
|
|
413
|
+
is_test_file = _is_test_path(str(rel_path))
|
|
414
|
+
|
|
415
|
+
for line_num, line in enumerate(lines, start=1):
|
|
416
|
+
stripped = line.strip()
|
|
417
|
+
# Skip comments (basic heuristic across languages)
|
|
418
|
+
if stripped.startswith("#") or stripped.startswith("//"):
|
|
419
|
+
continue
|
|
420
|
+
|
|
421
|
+
for pattern, category, severity, confidence, title, remediation, cwe in ALL_REGEX_PATTERNS:
|
|
422
|
+
# Skip insecure HTTP check in test files
|
|
423
|
+
if category == "insecure-transport" and "http://" in title.lower() and is_test_file:
|
|
424
|
+
continue
|
|
425
|
+
|
|
426
|
+
if pattern.search(line):
|
|
427
|
+
# Extra validation for password pattern: skip placeholders
|
|
428
|
+
if "password" in title.lower() and _is_password_placeholder(line):
|
|
429
|
+
continue
|
|
430
|
+
|
|
431
|
+
truncated_line = line.strip()[:200]
|
|
432
|
+
findings.append({
|
|
433
|
+
"tool": "regex",
|
|
434
|
+
"file": str(filepath.resolve()),
|
|
435
|
+
"line": line_num,
|
|
436
|
+
"severity": severity,
|
|
437
|
+
"confidence": confidence,
|
|
438
|
+
"category": category,
|
|
439
|
+
"title": title,
|
|
440
|
+
"detail": truncated_line,
|
|
441
|
+
"remediation": remediation,
|
|
442
|
+
"cwe": cwe,
|
|
443
|
+
})
|
|
444
|
+
|
|
445
|
+
_log(f"Regex scan complete: {files_scanned} file(s) scanned, {len(findings)} issue(s) found.", verbose)
|
|
446
|
+
return findings
|
|
447
|
+
|
|
448
|
+
|
|
449
|
+
def _is_test_path(rel_path: str) -> bool:
|
|
450
|
+
"""Heuristic to detect test files and directories."""
|
|
451
|
+
parts = rel_path.lower().replace("\\", "/")
|
|
452
|
+
return (
|
|
453
|
+
"/test/" in parts
|
|
454
|
+
or "/tests/" in parts
|
|
455
|
+
or parts.startswith("test/")
|
|
456
|
+
or parts.startswith("tests/")
|
|
457
|
+
or parts.endswith("_test.py")
|
|
458
|
+
or parts.endswith("_test.js")
|
|
459
|
+
or parts.endswith("_test.ts")
|
|
460
|
+
or "test_" in Path(rel_path).name.lower()
|
|
461
|
+
or ".test." in Path(rel_path).name.lower()
|
|
462
|
+
or ".spec." in Path(rel_path).name.lower()
|
|
463
|
+
)
|
|
464
|
+
|
|
465
|
+
|
|
466
|
+
def _is_password_placeholder(line: str) -> bool:
|
|
467
|
+
"""
|
|
468
|
+
Return True if a password assignment looks like a placeholder, empty
|
|
469
|
+
string, environment variable reference, or template variable rather
|
|
470
|
+
than a real hardcoded credential.
|
|
471
|
+
"""
|
|
472
|
+
lower = line.lower()
|
|
473
|
+
placeholders = [
|
|
474
|
+
'password = ""', "password = ''",
|
|
475
|
+
'password: ""', "password: ''",
|
|
476
|
+
"password = os.environ", "password = os.getenv",
|
|
477
|
+
"password = env(", "password = config",
|
|
478
|
+
"password = settings",
|
|
479
|
+
"password = none", "password = null",
|
|
480
|
+
"password_hash", "password_field",
|
|
481
|
+
"password_input", "password_reset",
|
|
482
|
+
"${", "<%=", "{{",
|
|
483
|
+
"placeholder", "changeme", "xxx", "example",
|
|
484
|
+
"your_password", "your-password",
|
|
485
|
+
"password_here", "<password>",
|
|
486
|
+
]
|
|
487
|
+
for p in placeholders:
|
|
488
|
+
if p in lower:
|
|
489
|
+
return True
|
|
490
|
+
return False
|
|
491
|
+
|
|
492
|
+
|
|
493
|
+
# ---------------------------------------------------------------------------
|
|
494
|
+
# Merge and deduplicate findings
|
|
495
|
+
# ---------------------------------------------------------------------------
|
|
496
|
+
|
|
497
|
+
def merge_findings(
|
|
498
|
+
bandit_results: list[dict[str, Any]],
|
|
499
|
+
regex_results: list[dict[str, Any]],
|
|
500
|
+
) -> list[dict[str, Any]]:
|
|
501
|
+
"""
|
|
502
|
+
Merge findings from bandit and regex scanners into a unified list.
|
|
503
|
+
|
|
504
|
+
Deduplication: when both tools flag the same file and line, keep the
|
|
505
|
+
finding with the longer detail field (typically more informative).
|
|
506
|
+
|
|
507
|
+
Results are sorted by severity (critical > high > medium > low),
|
|
508
|
+
then by file path, then by line number.
|
|
509
|
+
"""
|
|
510
|
+
# Build index for deduplication: (resolved_file, line) -> finding
|
|
511
|
+
seen: dict[tuple[str, int], dict[str, Any]] = {}
|
|
512
|
+
|
|
513
|
+
for finding in bandit_results + regex_results:
|
|
514
|
+
key = (finding["file"], finding["line"])
|
|
515
|
+
if key in seen:
|
|
516
|
+
existing = seen[key]
|
|
517
|
+
# Keep whichever has more detail
|
|
518
|
+
if len(finding.get("detail", "")) > len(existing.get("detail", "")):
|
|
519
|
+
seen[key] = finding
|
|
520
|
+
# If equal detail length, prefer higher severity
|
|
521
|
+
elif (
|
|
522
|
+
len(finding.get("detail", "")) == len(existing.get("detail", ""))
|
|
523
|
+
and SEVERITY_ORDER.get(finding["severity"], 99)
|
|
524
|
+
< SEVERITY_ORDER.get(existing["severity"], 99)
|
|
525
|
+
):
|
|
526
|
+
seen[key] = finding
|
|
527
|
+
else:
|
|
528
|
+
seen[key] = finding
|
|
529
|
+
|
|
530
|
+
merged = list(seen.values())
|
|
531
|
+
merged.sort(
|
|
532
|
+
key=lambda f: (
|
|
533
|
+
SEVERITY_ORDER.get(f["severity"], 99),
|
|
534
|
+
f["file"],
|
|
535
|
+
f["line"],
|
|
536
|
+
)
|
|
537
|
+
)
|
|
538
|
+
return merged
|
|
539
|
+
|
|
540
|
+
|
|
541
|
+
# ---------------------------------------------------------------------------
|
|
542
|
+
# Reporting
|
|
543
|
+
# ---------------------------------------------------------------------------
|
|
544
|
+
|
|
545
|
+
def generate_report(
|
|
546
|
+
directory: Path,
|
|
547
|
+
findings: list[dict[str, Any]],
|
|
548
|
+
output_path: Path | None = None,
|
|
549
|
+
) -> None:
|
|
550
|
+
"""
|
|
551
|
+
Print a Markdown-formatted security report to stdout and optionally
|
|
552
|
+
write a JSON report to the specified output path.
|
|
553
|
+
"""
|
|
554
|
+
if not findings:
|
|
555
|
+
print("\n=== Security Scan Report ===\n")
|
|
556
|
+
print(f"Target: {directory.resolve()}\n")
|
|
557
|
+
print("No security issues found.\n")
|
|
558
|
+
if output_path:
|
|
559
|
+
_write_json_report(directory, findings, output_path)
|
|
560
|
+
return
|
|
561
|
+
|
|
562
|
+
# -- Summary statistics --
|
|
563
|
+
by_severity: dict[str, int] = {}
|
|
564
|
+
by_category: dict[str, int] = {}
|
|
565
|
+
by_file: dict[str, int] = {}
|
|
566
|
+
|
|
567
|
+
for f in findings:
|
|
568
|
+
sev = f["severity"]
|
|
569
|
+
cat = f["category"]
|
|
570
|
+
fil = f["file"]
|
|
571
|
+
by_severity[sev] = by_severity.get(sev, 0) + 1
|
|
572
|
+
by_category[cat] = by_category.get(cat, 0) + 1
|
|
573
|
+
by_file[fil] = by_file.get(fil, 0) + 1
|
|
574
|
+
|
|
575
|
+
print("\n=== Security Scan Report ===\n")
|
|
576
|
+
print(f"Target: {directory.resolve()}")
|
|
577
|
+
print(f"Total findings: {len(findings)}\n")
|
|
578
|
+
|
|
579
|
+
# Severity summary
|
|
580
|
+
print("## Findings by Severity\n")
|
|
581
|
+
for sev in ("critical", "high", "medium", "low"):
|
|
582
|
+
count = by_severity.get(sev, 0)
|
|
583
|
+
if count > 0:
|
|
584
|
+
label = sev.upper()
|
|
585
|
+
print(f" {label}: {count}")
|
|
586
|
+
print()
|
|
587
|
+
|
|
588
|
+
# Category summary
|
|
589
|
+
print("## Findings by Category\n")
|
|
590
|
+
for cat, count in sorted(by_category.items(), key=lambda x: -x[1]):
|
|
591
|
+
print(f" {cat}: {count}")
|
|
592
|
+
print()
|
|
593
|
+
|
|
594
|
+
# Top 5 most affected files
|
|
595
|
+
top_files = sorted(by_file.items(), key=lambda x: -x[1])[:5]
|
|
596
|
+
if top_files:
|
|
597
|
+
print("## Top Affected Files\n")
|
|
598
|
+
for filepath, count in top_files:
|
|
599
|
+
# Show relative path if possible
|
|
600
|
+
try:
|
|
601
|
+
rel = Path(filepath).relative_to(directory.resolve())
|
|
602
|
+
except ValueError:
|
|
603
|
+
rel = filepath
|
|
604
|
+
print(f" {rel} ({count} finding(s))")
|
|
605
|
+
print()
|
|
606
|
+
|
|
607
|
+
# Detailed findings grouped by severity
|
|
608
|
+
print("## Detailed Findings\n")
|
|
609
|
+
current_severity = None
|
|
610
|
+
for finding in findings:
|
|
611
|
+
sev = finding["severity"]
|
|
612
|
+
if sev != current_severity:
|
|
613
|
+
current_severity = sev
|
|
614
|
+
print(f"### {sev.upper()}\n")
|
|
615
|
+
|
|
616
|
+
try:
|
|
617
|
+
rel = Path(finding["file"]).relative_to(directory.resolve())
|
|
618
|
+
except ValueError:
|
|
619
|
+
rel = finding["file"]
|
|
620
|
+
|
|
621
|
+
print(f"- **{finding['title']}**")
|
|
622
|
+
print(f" File: {rel}:{finding['line']}")
|
|
623
|
+
print(f" Tool: {finding['tool']} | Confidence: {finding['confidence']}")
|
|
624
|
+
if finding.get("cwe"):
|
|
625
|
+
print(f" CWE: {finding['cwe']}")
|
|
626
|
+
if finding.get("detail"):
|
|
627
|
+
detail_display = finding["detail"][:200]
|
|
628
|
+
print(f" Detail: {detail_display}")
|
|
629
|
+
if finding.get("remediation"):
|
|
630
|
+
print(f" Remediation: {finding['remediation']}")
|
|
631
|
+
print()
|
|
632
|
+
|
|
633
|
+
# JSON output
|
|
634
|
+
if output_path:
|
|
635
|
+
_write_json_report(directory, findings, output_path)
|
|
636
|
+
|
|
637
|
+
|
|
638
|
+
def _write_json_report(
|
|
639
|
+
directory: Path,
|
|
640
|
+
findings: list[dict[str, Any]],
|
|
641
|
+
output_path: Path,
|
|
642
|
+
) -> None:
|
|
643
|
+
"""Write the findings to a JSON file."""
|
|
644
|
+
report = {
|
|
645
|
+
"scanner": "code_security_scanner",
|
|
646
|
+
"target": str(directory.resolve()),
|
|
647
|
+
"total_findings": len(findings),
|
|
648
|
+
"summary": {
|
|
649
|
+
"by_severity": {},
|
|
650
|
+
"by_category": {},
|
|
651
|
+
},
|
|
652
|
+
"findings": findings,
|
|
653
|
+
}
|
|
654
|
+
|
|
655
|
+
for f in findings:
|
|
656
|
+
sev = f["severity"]
|
|
657
|
+
cat = f["category"]
|
|
658
|
+
report["summary"]["by_severity"][sev] = report["summary"]["by_severity"].get(sev, 0) + 1
|
|
659
|
+
report["summary"]["by_category"][cat] = report["summary"]["by_category"].get(cat, 0) + 1
|
|
660
|
+
|
|
661
|
+
try:
|
|
662
|
+
output_path.parent.mkdir(parents=True, exist_ok=True)
|
|
663
|
+
with open(output_path, "w", encoding="utf-8") as fh:
|
|
664
|
+
json.dump(report, fh, indent=2, default=str)
|
|
665
|
+
print(f"\nJSON report written to: {output_path}", file=sys.stderr)
|
|
666
|
+
except OSError as exc:
|
|
667
|
+
print(f"[scanner] Failed to write JSON report: {exc}", file=sys.stderr)
|
|
668
|
+
|
|
669
|
+
|
|
670
|
+
# ---------------------------------------------------------------------------
|
|
671
|
+
# CLI entry point
|
|
672
|
+
# ---------------------------------------------------------------------------
|
|
673
|
+
|
|
674
|
+
def main() -> None:
|
|
675
|
+
"""Parse arguments and run the security scanner."""
|
|
676
|
+
parser = argparse.ArgumentParser(
|
|
677
|
+
prog="code_security_scanner",
|
|
678
|
+
description="Static analysis security scanner combining Bandit and custom regex patterns.",
|
|
679
|
+
epilog="Exit code 0 if no critical/high findings, 1 otherwise, 2 on scanner error.",
|
|
680
|
+
)
|
|
681
|
+
parser.add_argument(
|
|
682
|
+
"directory",
|
|
683
|
+
type=Path,
|
|
684
|
+
help="Path to the source code directory to scan.",
|
|
685
|
+
)
|
|
686
|
+
parser.add_argument(
|
|
687
|
+
"--tools",
|
|
688
|
+
type=str,
|
|
689
|
+
default="bandit,regex",
|
|
690
|
+
help="Comma-separated list of scan engines to use (default: bandit,regex).",
|
|
691
|
+
)
|
|
692
|
+
parser.add_argument(
|
|
693
|
+
"--output",
|
|
694
|
+
type=Path,
|
|
695
|
+
default=None,
|
|
696
|
+
help="Path to write JSON report (optional).",
|
|
697
|
+
)
|
|
698
|
+
parser.add_argument(
|
|
699
|
+
"--severity",
|
|
700
|
+
type=str,
|
|
701
|
+
default="low",
|
|
702
|
+
choices=["critical", "high", "medium", "low"],
|
|
703
|
+
help="Minimum severity threshold to report (default: low).",
|
|
704
|
+
)
|
|
705
|
+
parser.add_argument(
|
|
706
|
+
"--exclude",
|
|
707
|
+
type=str,
|
|
708
|
+
default=None,
|
|
709
|
+
help='Comma-separated glob patterns to exclude (e.g. "test_*,*_test.py").',
|
|
710
|
+
)
|
|
711
|
+
parser.add_argument(
|
|
712
|
+
"--verbose",
|
|
713
|
+
action="store_true",
|
|
714
|
+
help="Print detailed progress information to stderr.",
|
|
715
|
+
)
|
|
716
|
+
|
|
717
|
+
args = parser.parse_args()
|
|
718
|
+
|
|
719
|
+
# Validate directory
|
|
720
|
+
if not args.directory.is_dir():
|
|
721
|
+
print(f"[scanner] Error: '{args.directory}' is not a valid directory.", file=sys.stderr)
|
|
722
|
+
sys.exit(2)
|
|
723
|
+
|
|
724
|
+
directory = args.directory.resolve()
|
|
725
|
+
tools = [t.strip().lower() for t in args.tools.split(",")]
|
|
726
|
+
exclude_patterns = [p.strip() for p in args.exclude.split(",")] if args.exclude else None
|
|
727
|
+
severity_threshold = args.severity.lower()
|
|
728
|
+
verbose = args.verbose
|
|
729
|
+
|
|
730
|
+
valid_tools = {"bandit", "regex"}
|
|
731
|
+
for tool in tools:
|
|
732
|
+
if tool not in valid_tools:
|
|
733
|
+
print(
|
|
734
|
+
f"[scanner] Warning: Unknown tool '{tool}'. Valid tools: {', '.join(sorted(valid_tools))}",
|
|
735
|
+
file=sys.stderr,
|
|
736
|
+
)
|
|
737
|
+
|
|
738
|
+
_log(f"Scanning: {directory}", verbose)
|
|
739
|
+
_log(f"Tools: {', '.join(tools)}", verbose)
|
|
740
|
+
_log(f"Severity threshold: {severity_threshold}", verbose)
|
|
741
|
+
if exclude_patterns:
|
|
742
|
+
_log(f"Exclude patterns: {', '.join(exclude_patterns)}", verbose)
|
|
743
|
+
|
|
744
|
+
# Run selected scan engines
|
|
745
|
+
bandit_results: list[dict[str, Any]] = []
|
|
746
|
+
regex_results: list[dict[str, Any]] = []
|
|
747
|
+
|
|
748
|
+
if "bandit" in tools:
|
|
749
|
+
bandit_results = run_bandit_scan(directory, exclude_patterns, verbose)
|
|
750
|
+
|
|
751
|
+
if "regex" in tools:
|
|
752
|
+
regex_results = run_regex_scan(directory, exclude_patterns, verbose)
|
|
753
|
+
|
|
754
|
+
# Merge and deduplicate
|
|
755
|
+
all_findings = merge_findings(bandit_results, regex_results)
|
|
756
|
+
|
|
757
|
+
# Apply severity filter
|
|
758
|
+
filtered_findings = [
|
|
759
|
+
f for f in all_findings
|
|
760
|
+
if _severity_at_or_above(f["severity"], severity_threshold)
|
|
761
|
+
]
|
|
762
|
+
|
|
763
|
+
_log(
|
|
764
|
+
f"Total: {len(all_findings)} finding(s), "
|
|
765
|
+
f"{len(filtered_findings)} at or above '{severity_threshold}' severity.",
|
|
766
|
+
verbose,
|
|
767
|
+
)
|
|
768
|
+
|
|
769
|
+
# Generate report
|
|
770
|
+
generate_report(directory, filtered_findings, args.output)
|
|
771
|
+
|
|
772
|
+
# Exit code based on critical/high findings
|
|
773
|
+
has_critical_or_high = any(
|
|
774
|
+
f["severity"] in ("critical", "high") for f in filtered_findings
|
|
775
|
+
)
|
|
776
|
+
sys.exit(1 if has_critical_or_high else 0)
|
|
777
|
+
|
|
778
|
+
|
|
779
|
+
if __name__ == "__main__":
|
|
780
|
+
main()
|