@intentsolutionsio/penetration-tester 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,780 @@
1
+ """
2
+ Static analysis security scanner combining Bandit and custom regex pattern detection.
3
+
4
+ Scans source code for common security vulnerabilities including hardcoded secrets,
5
+ SQL injection, command injection, insecure deserialization, and weak cryptography.
6
+
7
+ Usage:
8
+ python3 code_security_scanner.py /path/to/code [options]
9
+
10
+ Options:
11
+ --tools bandit,regex Comma-separated list of scan engines (default: both)
12
+ --output findings.json Write JSON report to file
13
+ --severity low Minimum severity threshold (critical, high, medium, low)
14
+ --exclude "test_*" Comma-separated glob patterns to exclude
15
+ --verbose Print detailed progress information
16
+
17
+ Exit codes:
18
+ 0 - No critical or high severity findings
19
+ 1 - Critical or high severity findings detected
20
+ 2 - Scanner error (missing tools, invalid arguments, etc.)
21
+ """
22
+
23
+ from __future__ import annotations
24
+
25
+ import argparse
26
+ import fnmatch
27
+ import json
28
+ import os
29
+ import re
30
+ import subprocess
31
+ import sys
32
+ from pathlib import Path
33
+ from typing import Any, Optional
34
+
35
+ # ---------------------------------------------------------------------------
36
+ # Constants
37
+ # ---------------------------------------------------------------------------
38
+
39
+ SEVERITY_ORDER: dict[str, int] = {
40
+ "critical": 0,
41
+ "high": 1,
42
+ "medium": 2,
43
+ "low": 3,
44
+ }
45
+
46
+ SCANNABLE_EXTENSIONS: set[str] = {
47
+ ".py", ".js", ".ts", ".jsx", ".tsx",
48
+ ".java", ".rb", ".go", ".php", ".sh",
49
+ }
50
+
51
+ SKIP_DIRS: set[str] = {
52
+ ".git", "node_modules", "__pycache__", ".venv", "venv",
53
+ ".tox", ".mypy_cache", ".pytest_cache", "dist", "build",
54
+ }
55
+
56
+ BANDIT_TIMEOUT_SECONDS: int = 120
57
+
58
+ # ---------------------------------------------------------------------------
59
+ # Compiled regex patterns
60
+ # ---------------------------------------------------------------------------
61
+
62
+ # Each entry: (compiled_pattern, category, severity, confidence, title, remediation, cwe)
63
+
64
+ _HARDCODED_SECRET_PATTERNS: list[tuple[re.Pattern[str], str, str, str, str, str, str | None]] = [
65
+ (
66
+ re.compile(r"""api[_\-]?key\s*[=:]\s*["'][A-Za-z0-9]{20,}""", re.IGNORECASE),
67
+ "hardcoded-secret", "high", "medium",
68
+ "Hardcoded API key detected",
69
+ "Move API keys to environment variables or a secrets manager.",
70
+ "CWE-798",
71
+ ),
72
+ (
73
+ re.compile(r"""AKIA[0-9A-Z]{16}"""),
74
+ "hardcoded-secret", "critical", "high",
75
+ "AWS Access Key ID detected",
76
+ "Rotate the exposed key immediately and use IAM roles or environment variables.",
77
+ "CWE-798",
78
+ ),
79
+ (
80
+ re.compile(r"""password\s*[=:]\s*["'](?!["']$)(?!\s*$)(?!<%=)(?!\$\{)(?!\{\{)[^"']+["']""", re.IGNORECASE),
81
+ "hardcoded-secret", "high", "medium",
82
+ "Hardcoded password detected",
83
+ "Use environment variables or a secrets manager instead of hardcoded passwords.",
84
+ "CWE-798",
85
+ ),
86
+ (
87
+ re.compile(r"""-----BEGIN\s+(?:RSA\s+|EC\s+|DSA\s+)?PRIVATE\s+KEY-----"""),
88
+ "hardcoded-secret", "critical", "high",
89
+ "Private key embedded in source code",
90
+ "Remove the private key from source and store it in a secure vault.",
91
+ "CWE-321",
92
+ ),
93
+ (
94
+ re.compile(
95
+ r"""(?:secret|token|bearer)\s*[=:]\s*["'][A-Za-z0-9+/=]{20,}""",
96
+ re.IGNORECASE,
97
+ ),
98
+ "hardcoded-secret", "high", "medium",
99
+ "Hardcoded secret or token detected",
100
+ "Store secrets in environment variables or a dedicated secrets manager.",
101
+ "CWE-798",
102
+ ),
103
+ ]
104
+
105
+ _SQL_INJECTION_PATTERNS: list[tuple[re.Pattern[str], str, str, str, str, str, str | None]] = [
106
+ (
107
+ re.compile(
108
+ r"""(?:execute|cursor|query)\s*\(\s*f["'].*(?:%s|%d|\{)""",
109
+ re.IGNORECASE,
110
+ ),
111
+ "sql-injection", "high", "high",
112
+ "Potential SQL injection via string formatting",
113
+ "Use parameterized queries or prepared statements instead of string formatting.",
114
+ "CWE-89",
115
+ ),
116
+ (
117
+ re.compile(r"""["']SELECT\s+.*["']\s*\+\s*""", re.IGNORECASE),
118
+ "sql-injection", "high", "medium",
119
+ "SQL query built with string concatenation (SELECT)",
120
+ "Use parameterized queries instead of string concatenation.",
121
+ "CWE-89",
122
+ ),
123
+ (
124
+ re.compile(r"""["']INSERT\s+.*["']\s*\+\s*""", re.IGNORECASE),
125
+ "sql-injection", "high", "medium",
126
+ "SQL query built with string concatenation (INSERT)",
127
+ "Use parameterized queries instead of string concatenation.",
128
+ "CWE-89",
129
+ ),
130
+ ]
131
+
132
+ _COMMAND_INJECTION_PATTERNS: list[tuple[re.Pattern[str], str, str, str, str, str, str | None]] = [
133
+ (
134
+ re.compile(r"""os\.system\("""),
135
+ "command-injection", "high", "high",
136
+ "Use of os.system() allows shell command injection",
137
+ "Use subprocess.run() with a list of arguments and shell=False.",
138
+ "CWE-78",
139
+ ),
140
+ (
141
+ re.compile(r"""subprocess\.(?:call|run|Popen)\(.*shell\s*=\s*True"""),
142
+ "command-injection", "high", "high",
143
+ "Subprocess call with shell=True enables command injection",
144
+ "Pass commands as a list with shell=False instead of shell=True.",
145
+ "CWE-78",
146
+ ),
147
+ (
148
+ re.compile(r"""\beval\("""),
149
+ "command-injection", "medium", "medium",
150
+ "Use of eval() can execute arbitrary code",
151
+ "Avoid eval(). Use ast.literal_eval() for data parsing or refactor logic.",
152
+ "CWE-95",
153
+ ),
154
+ (
155
+ re.compile(r"""\bexec\("""),
156
+ "command-injection", "medium", "medium",
157
+ "Use of exec() can execute arbitrary code",
158
+ "Avoid exec(). Refactor to use safer alternatives.",
159
+ "CWE-95",
160
+ ),
161
+ ]
162
+
163
+ _DESERIALIZATION_PATTERNS: list[tuple[re.Pattern[str], str, str, str, str, str, str | None]] = [
164
+ (
165
+ re.compile(r"""pickle\.loads?\("""),
166
+ "insecure-deserialization", "high", "high",
167
+ "Insecure deserialization with pickle",
168
+ "Avoid pickle for untrusted data. Use JSON or a safe serialization format.",
169
+ "CWE-502",
170
+ ),
171
+ (
172
+ re.compile(r"""yaml\.load\((?!.*Loader\s*=\s*(?:Safe|Base)Loader)"""),
173
+ "insecure-deserialization", "high", "high",
174
+ "Unsafe YAML loading without SafeLoader",
175
+ "Use yaml.safe_load() or pass Loader=SafeLoader to yaml.load().",
176
+ "CWE-502",
177
+ ),
178
+ (
179
+ re.compile(r"""marshal\.loads?\("""),
180
+ "insecure-deserialization", "high", "medium",
181
+ "Insecure deserialization with marshal",
182
+ "Avoid marshal for untrusted data. Use JSON or a safe serialization format.",
183
+ "CWE-502",
184
+ ),
185
+ ]
186
+
187
+ _CRYPTO_NETWORK_PATTERNS: list[tuple[re.Pattern[str], str, str, str, str, str, str | None]] = [
188
+ (
189
+ re.compile(r"""verify\s*=\s*False"""),
190
+ "insecure-transport", "medium", "high",
191
+ "SSL/TLS certificate verification disabled",
192
+ "Enable certificate verification. Set verify=True or provide a CA bundle.",
193
+ "CWE-295",
194
+ ),
195
+ (
196
+ re.compile(r"""\bMD5\b|\.md5\(""", re.IGNORECASE),
197
+ "weak-crypto", "medium", "medium",
198
+ "Use of weak MD5 hashing algorithm",
199
+ "Use SHA-256 or stronger hashing. For passwords, use bcrypt or Argon2.",
200
+ "CWE-328",
201
+ ),
202
+ (
203
+ re.compile(r"""\bSHA1\b|\.sha1\(""", re.IGNORECASE),
204
+ "weak-crypto", "medium", "medium",
205
+ "Use of weak SHA-1 hashing algorithm",
206
+ "Use SHA-256 or stronger hashing. For passwords, use bcrypt or Argon2.",
207
+ "CWE-328",
208
+ ),
209
+ (
210
+ re.compile(r"""http://(?!localhost|127\.0\.0\.1|0\.0\.0\.0|\[::1\])"""),
211
+ "insecure-transport", "medium", "low",
212
+ "Insecure HTTP URL (not HTTPS)",
213
+ "Use HTTPS for all external communications.",
214
+ "CWE-319",
215
+ ),
216
+ ]
217
+
218
+ ALL_REGEX_PATTERNS = (
219
+ _HARDCODED_SECRET_PATTERNS
220
+ + _SQL_INJECTION_PATTERNS
221
+ + _COMMAND_INJECTION_PATTERNS
222
+ + _DESERIALIZATION_PATTERNS
223
+ + _CRYPTO_NETWORK_PATTERNS
224
+ )
225
+
226
+
227
+ # ---------------------------------------------------------------------------
228
+ # Utility helpers
229
+ # ---------------------------------------------------------------------------
230
+
231
+ def _log(message: str, verbose: bool = True) -> None:
232
+ """Print a progress message to stderr."""
233
+ if verbose:
234
+ print(f"[scanner] {message}", file=sys.stderr)
235
+
236
+
237
+ def _is_binary_file(filepath: Path) -> bool:
238
+ """Return True if file appears to be binary (contains null bytes in first 1KB)."""
239
+ try:
240
+ with open(filepath, "rb") as fh:
241
+ chunk = fh.read(1024)
242
+ return b"\x00" in chunk
243
+ except (OSError, PermissionError):
244
+ return True
245
+
246
+
247
+ def _should_exclude(filepath: Path, exclude_patterns: list[str] | None) -> bool:
248
+ """Check if a file matches any exclusion glob pattern."""
249
+ if not exclude_patterns:
250
+ return False
251
+ name = filepath.name
252
+ rel = str(filepath)
253
+ for pattern in exclude_patterns:
254
+ if fnmatch.fnmatch(name, pattern) or fnmatch.fnmatch(rel, pattern):
255
+ return True
256
+ return False
257
+
258
+
259
+ def _severity_at_or_above(severity: str, threshold: str) -> bool:
260
+ """Return True if severity meets or exceeds the threshold."""
261
+ return SEVERITY_ORDER.get(severity, 99) <= SEVERITY_ORDER.get(threshold, 99)
262
+
263
+
264
+ def _normalize_bandit_severity(raw: str) -> str:
265
+ """Map Bandit severity strings to our canonical levels."""
266
+ mapping = {
267
+ "HIGH": "high",
268
+ "MEDIUM": "medium",
269
+ "LOW": "low",
270
+ "UNDEFINED": "low",
271
+ }
272
+ return mapping.get(raw.upper(), "low")
273
+
274
+
275
+ def _normalize_bandit_confidence(raw: str) -> str:
276
+ """Map Bandit confidence strings to canonical levels."""
277
+ return raw.lower() if raw.lower() in ("high", "medium", "low") else "low"
278
+
279
+
280
+ # ---------------------------------------------------------------------------
281
+ # Bandit scanning
282
+ # ---------------------------------------------------------------------------
283
+
284
+ def run_bandit_scan(
285
+ directory: Path,
286
+ exclude_patterns: list[str] | None = None,
287
+ verbose: bool = False,
288
+ ) -> list[dict[str, Any]]:
289
+ """
290
+ Run Bandit static analysis on a directory and return structured findings.
291
+
292
+ If Bandit is not installed, prints installation instructions and returns
293
+ an empty list rather than raising an exception.
294
+ """
295
+ cmd: list[str] = ["bandit", "-r", str(directory), "-f", "json", "-q"]
296
+
297
+ if exclude_patterns:
298
+ # Bandit's -x flag accepts comma-separated paths/globs
299
+ cmd.extend(["-x", ",".join(exclude_patterns)])
300
+
301
+ _log(f"Running bandit on {directory} ...", verbose)
302
+
303
+ try:
304
+ result = subprocess.run(
305
+ cmd,
306
+ capture_output=True,
307
+ text=True,
308
+ timeout=BANDIT_TIMEOUT_SECONDS,
309
+ )
310
+ except FileNotFoundError:
311
+ print(
312
+ "[scanner] Bandit is not installed.\n"
313
+ " Install with: pip install bandit\n"
314
+ " Or: pipx install bandit\n"
315
+ " Skipping bandit scan.",
316
+ file=sys.stderr,
317
+ )
318
+ return []
319
+ except subprocess.TimeoutExpired:
320
+ print(
321
+ f"[scanner] Bandit scan timed out after {BANDIT_TIMEOUT_SECONDS}s. "
322
+ "Consider narrowing the scan scope.",
323
+ file=sys.stderr,
324
+ )
325
+ return []
326
+
327
+ # Bandit returns exit code 1 when it finds issues, which is expected.
328
+ # Only treat missing JSON output as an error.
329
+ stdout = result.stdout.strip()
330
+ if not stdout:
331
+ _log("Bandit produced no output (no Python files or no findings).", verbose)
332
+ return []
333
+
334
+ try:
335
+ data = json.loads(stdout)
336
+ except json.JSONDecodeError as exc:
337
+ print(
338
+ f"[scanner] Failed to parse bandit JSON output: {exc}",
339
+ file=sys.stderr,
340
+ )
341
+ return []
342
+
343
+ findings: list[dict[str, Any]] = []
344
+ for issue in data.get("results", []):
345
+ findings.append({
346
+ "tool": "bandit",
347
+ "file": str(Path(issue.get("filename", "unknown")).resolve()),
348
+ "line": issue.get("line_number", 0),
349
+ "severity": _normalize_bandit_severity(issue.get("issue_severity", "LOW")),
350
+ "confidence": _normalize_bandit_confidence(issue.get("issue_confidence", "LOW")),
351
+ "category": issue.get("test_id", "unknown"),
352
+ "title": issue.get("test_name", "Unknown issue"),
353
+ "detail": issue.get("issue_text", ""),
354
+ "remediation": "",
355
+ "cwe": (
356
+ f"CWE-{issue['issue_cwe']['id']}"
357
+ if issue.get("issue_cwe", {}).get("id")
358
+ else None
359
+ ),
360
+ })
361
+
362
+ _log(f"Bandit found {len(findings)} issue(s).", verbose)
363
+ return findings
364
+
365
+
366
+ # ---------------------------------------------------------------------------
367
+ # Regex-based scanning
368
+ # ---------------------------------------------------------------------------
369
+
370
+ def run_regex_scan(
371
+ directory: Path,
372
+ exclude_patterns: list[str] | None = None,
373
+ verbose: bool = False,
374
+ ) -> list[dict[str, Any]]:
375
+ """
376
+ Walk the directory tree and scan source files against compiled regex
377
+ patterns for common security vulnerabilities.
378
+
379
+ Skips binary files, hidden/vendored directories, and files matching
380
+ exclusion patterns.
381
+ """
382
+ findings: list[dict[str, Any]] = []
383
+ files_scanned = 0
384
+
385
+ _log(f"Running regex scan on {directory} ...", verbose)
386
+
387
+ for root, dirs, files in os.walk(directory):
388
+ # Prune directories we never want to enter (modifying dirs in-place)
389
+ dirs[:] = [d for d in dirs if d not in SKIP_DIRS and not d.startswith(".")]
390
+
391
+ for filename in files:
392
+ filepath = Path(root) / filename
393
+
394
+ # Extension filter
395
+ if filepath.suffix.lower() not in SCANNABLE_EXTENSIONS:
396
+ continue
397
+
398
+ # Exclusion filter
399
+ rel_path = filepath.relative_to(directory)
400
+ if _should_exclude(rel_path, exclude_patterns):
401
+ continue
402
+
403
+ # Skip binary files
404
+ if _is_binary_file(filepath):
405
+ continue
406
+
407
+ try:
408
+ lines = filepath.read_text(encoding="utf-8", errors="replace").splitlines()
409
+ except (OSError, PermissionError):
410
+ continue
411
+
412
+ files_scanned += 1
413
+ is_test_file = _is_test_path(str(rel_path))
414
+
415
+ for line_num, line in enumerate(lines, start=1):
416
+ stripped = line.strip()
417
+ # Skip comments (basic heuristic across languages)
418
+ if stripped.startswith("#") or stripped.startswith("//"):
419
+ continue
420
+
421
+ for pattern, category, severity, confidence, title, remediation, cwe in ALL_REGEX_PATTERNS:
422
+ # Skip insecure HTTP check in test files
423
+ if category == "insecure-transport" and "http://" in title.lower() and is_test_file:
424
+ continue
425
+
426
+ if pattern.search(line):
427
+ # Extra validation for password pattern: skip placeholders
428
+ if "password" in title.lower() and _is_password_placeholder(line):
429
+ continue
430
+
431
+ truncated_line = line.strip()[:200]
432
+ findings.append({
433
+ "tool": "regex",
434
+ "file": str(filepath.resolve()),
435
+ "line": line_num,
436
+ "severity": severity,
437
+ "confidence": confidence,
438
+ "category": category,
439
+ "title": title,
440
+ "detail": truncated_line,
441
+ "remediation": remediation,
442
+ "cwe": cwe,
443
+ })
444
+
445
+ _log(f"Regex scan complete: {files_scanned} file(s) scanned, {len(findings)} issue(s) found.", verbose)
446
+ return findings
447
+
448
+
449
+ def _is_test_path(rel_path: str) -> bool:
450
+ """Heuristic to detect test files and directories."""
451
+ parts = rel_path.lower().replace("\\", "/")
452
+ return (
453
+ "/test/" in parts
454
+ or "/tests/" in parts
455
+ or parts.startswith("test/")
456
+ or parts.startswith("tests/")
457
+ or parts.endswith("_test.py")
458
+ or parts.endswith("_test.js")
459
+ or parts.endswith("_test.ts")
460
+ or "test_" in Path(rel_path).name.lower()
461
+ or ".test." in Path(rel_path).name.lower()
462
+ or ".spec." in Path(rel_path).name.lower()
463
+ )
464
+
465
+
466
+ def _is_password_placeholder(line: str) -> bool:
467
+ """
468
+ Return True if a password assignment looks like a placeholder, empty
469
+ string, environment variable reference, or template variable rather
470
+ than a real hardcoded credential.
471
+ """
472
+ lower = line.lower()
473
+ placeholders = [
474
+ 'password = ""', "password = ''",
475
+ 'password: ""', "password: ''",
476
+ "password = os.environ", "password = os.getenv",
477
+ "password = env(", "password = config",
478
+ "password = settings",
479
+ "password = none", "password = null",
480
+ "password_hash", "password_field",
481
+ "password_input", "password_reset",
482
+ "${", "<%=", "{{",
483
+ "placeholder", "changeme", "xxx", "example",
484
+ "your_password", "your-password",
485
+ "password_here", "<password>",
486
+ ]
487
+ for p in placeholders:
488
+ if p in lower:
489
+ return True
490
+ return False
491
+
492
+
493
+ # ---------------------------------------------------------------------------
494
+ # Merge and deduplicate findings
495
+ # ---------------------------------------------------------------------------
496
+
497
+ def merge_findings(
498
+ bandit_results: list[dict[str, Any]],
499
+ regex_results: list[dict[str, Any]],
500
+ ) -> list[dict[str, Any]]:
501
+ """
502
+ Merge findings from bandit and regex scanners into a unified list.
503
+
504
+ Deduplication: when both tools flag the same file and line, keep the
505
+ finding with the longer detail field (typically more informative).
506
+
507
+ Results are sorted by severity (critical > high > medium > low),
508
+ then by file path, then by line number.
509
+ """
510
+ # Build index for deduplication: (resolved_file, line) -> finding
511
+ seen: dict[tuple[str, int], dict[str, Any]] = {}
512
+
513
+ for finding in bandit_results + regex_results:
514
+ key = (finding["file"], finding["line"])
515
+ if key in seen:
516
+ existing = seen[key]
517
+ # Keep whichever has more detail
518
+ if len(finding.get("detail", "")) > len(existing.get("detail", "")):
519
+ seen[key] = finding
520
+ # If equal detail length, prefer higher severity
521
+ elif (
522
+ len(finding.get("detail", "")) == len(existing.get("detail", ""))
523
+ and SEVERITY_ORDER.get(finding["severity"], 99)
524
+ < SEVERITY_ORDER.get(existing["severity"], 99)
525
+ ):
526
+ seen[key] = finding
527
+ else:
528
+ seen[key] = finding
529
+
530
+ merged = list(seen.values())
531
+ merged.sort(
532
+ key=lambda f: (
533
+ SEVERITY_ORDER.get(f["severity"], 99),
534
+ f["file"],
535
+ f["line"],
536
+ )
537
+ )
538
+ return merged
539
+
540
+
541
+ # ---------------------------------------------------------------------------
542
+ # Reporting
543
+ # ---------------------------------------------------------------------------
544
+
545
+ def generate_report(
546
+ directory: Path,
547
+ findings: list[dict[str, Any]],
548
+ output_path: Path | None = None,
549
+ ) -> None:
550
+ """
551
+ Print a Markdown-formatted security report to stdout and optionally
552
+ write a JSON report to the specified output path.
553
+ """
554
+ if not findings:
555
+ print("\n=== Security Scan Report ===\n")
556
+ print(f"Target: {directory.resolve()}\n")
557
+ print("No security issues found.\n")
558
+ if output_path:
559
+ _write_json_report(directory, findings, output_path)
560
+ return
561
+
562
+ # -- Summary statistics --
563
+ by_severity: dict[str, int] = {}
564
+ by_category: dict[str, int] = {}
565
+ by_file: dict[str, int] = {}
566
+
567
+ for f in findings:
568
+ sev = f["severity"]
569
+ cat = f["category"]
570
+ fil = f["file"]
571
+ by_severity[sev] = by_severity.get(sev, 0) + 1
572
+ by_category[cat] = by_category.get(cat, 0) + 1
573
+ by_file[fil] = by_file.get(fil, 0) + 1
574
+
575
+ print("\n=== Security Scan Report ===\n")
576
+ print(f"Target: {directory.resolve()}")
577
+ print(f"Total findings: {len(findings)}\n")
578
+
579
+ # Severity summary
580
+ print("## Findings by Severity\n")
581
+ for sev in ("critical", "high", "medium", "low"):
582
+ count = by_severity.get(sev, 0)
583
+ if count > 0:
584
+ label = sev.upper()
585
+ print(f" {label}: {count}")
586
+ print()
587
+
588
+ # Category summary
589
+ print("## Findings by Category\n")
590
+ for cat, count in sorted(by_category.items(), key=lambda x: -x[1]):
591
+ print(f" {cat}: {count}")
592
+ print()
593
+
594
+ # Top 5 most affected files
595
+ top_files = sorted(by_file.items(), key=lambda x: -x[1])[:5]
596
+ if top_files:
597
+ print("## Top Affected Files\n")
598
+ for filepath, count in top_files:
599
+ # Show relative path if possible
600
+ try:
601
+ rel = Path(filepath).relative_to(directory.resolve())
602
+ except ValueError:
603
+ rel = filepath
604
+ print(f" {rel} ({count} finding(s))")
605
+ print()
606
+
607
+ # Detailed findings grouped by severity
608
+ print("## Detailed Findings\n")
609
+ current_severity = None
610
+ for finding in findings:
611
+ sev = finding["severity"]
612
+ if sev != current_severity:
613
+ current_severity = sev
614
+ print(f"### {sev.upper()}\n")
615
+
616
+ try:
617
+ rel = Path(finding["file"]).relative_to(directory.resolve())
618
+ except ValueError:
619
+ rel = finding["file"]
620
+
621
+ print(f"- **{finding['title']}**")
622
+ print(f" File: {rel}:{finding['line']}")
623
+ print(f" Tool: {finding['tool']} | Confidence: {finding['confidence']}")
624
+ if finding.get("cwe"):
625
+ print(f" CWE: {finding['cwe']}")
626
+ if finding.get("detail"):
627
+ detail_display = finding["detail"][:200]
628
+ print(f" Detail: {detail_display}")
629
+ if finding.get("remediation"):
630
+ print(f" Remediation: {finding['remediation']}")
631
+ print()
632
+
633
+ # JSON output
634
+ if output_path:
635
+ _write_json_report(directory, findings, output_path)
636
+
637
+
638
+ def _write_json_report(
639
+ directory: Path,
640
+ findings: list[dict[str, Any]],
641
+ output_path: Path,
642
+ ) -> None:
643
+ """Write the findings to a JSON file."""
644
+ report = {
645
+ "scanner": "code_security_scanner",
646
+ "target": str(directory.resolve()),
647
+ "total_findings": len(findings),
648
+ "summary": {
649
+ "by_severity": {},
650
+ "by_category": {},
651
+ },
652
+ "findings": findings,
653
+ }
654
+
655
+ for f in findings:
656
+ sev = f["severity"]
657
+ cat = f["category"]
658
+ report["summary"]["by_severity"][sev] = report["summary"]["by_severity"].get(sev, 0) + 1
659
+ report["summary"]["by_category"][cat] = report["summary"]["by_category"].get(cat, 0) + 1
660
+
661
+ try:
662
+ output_path.parent.mkdir(parents=True, exist_ok=True)
663
+ with open(output_path, "w", encoding="utf-8") as fh:
664
+ json.dump(report, fh, indent=2, default=str)
665
+ print(f"\nJSON report written to: {output_path}", file=sys.stderr)
666
+ except OSError as exc:
667
+ print(f"[scanner] Failed to write JSON report: {exc}", file=sys.stderr)
668
+
669
+
670
+ # ---------------------------------------------------------------------------
671
+ # CLI entry point
672
+ # ---------------------------------------------------------------------------
673
+
674
+ def main() -> None:
675
+ """Parse arguments and run the security scanner."""
676
+ parser = argparse.ArgumentParser(
677
+ prog="code_security_scanner",
678
+ description="Static analysis security scanner combining Bandit and custom regex patterns.",
679
+ epilog="Exit code 0 if no critical/high findings, 1 otherwise, 2 on scanner error.",
680
+ )
681
+ parser.add_argument(
682
+ "directory",
683
+ type=Path,
684
+ help="Path to the source code directory to scan.",
685
+ )
686
+ parser.add_argument(
687
+ "--tools",
688
+ type=str,
689
+ default="bandit,regex",
690
+ help="Comma-separated list of scan engines to use (default: bandit,regex).",
691
+ )
692
+ parser.add_argument(
693
+ "--output",
694
+ type=Path,
695
+ default=None,
696
+ help="Path to write JSON report (optional).",
697
+ )
698
+ parser.add_argument(
699
+ "--severity",
700
+ type=str,
701
+ default="low",
702
+ choices=["critical", "high", "medium", "low"],
703
+ help="Minimum severity threshold to report (default: low).",
704
+ )
705
+ parser.add_argument(
706
+ "--exclude",
707
+ type=str,
708
+ default=None,
709
+ help='Comma-separated glob patterns to exclude (e.g. "test_*,*_test.py").',
710
+ )
711
+ parser.add_argument(
712
+ "--verbose",
713
+ action="store_true",
714
+ help="Print detailed progress information to stderr.",
715
+ )
716
+
717
+ args = parser.parse_args()
718
+
719
+ # Validate directory
720
+ if not args.directory.is_dir():
721
+ print(f"[scanner] Error: '{args.directory}' is not a valid directory.", file=sys.stderr)
722
+ sys.exit(2)
723
+
724
+ directory = args.directory.resolve()
725
+ tools = [t.strip().lower() for t in args.tools.split(",")]
726
+ exclude_patterns = [p.strip() for p in args.exclude.split(",")] if args.exclude else None
727
+ severity_threshold = args.severity.lower()
728
+ verbose = args.verbose
729
+
730
+ valid_tools = {"bandit", "regex"}
731
+ for tool in tools:
732
+ if tool not in valid_tools:
733
+ print(
734
+ f"[scanner] Warning: Unknown tool '{tool}'. Valid tools: {', '.join(sorted(valid_tools))}",
735
+ file=sys.stderr,
736
+ )
737
+
738
+ _log(f"Scanning: {directory}", verbose)
739
+ _log(f"Tools: {', '.join(tools)}", verbose)
740
+ _log(f"Severity threshold: {severity_threshold}", verbose)
741
+ if exclude_patterns:
742
+ _log(f"Exclude patterns: {', '.join(exclude_patterns)}", verbose)
743
+
744
+ # Run selected scan engines
745
+ bandit_results: list[dict[str, Any]] = []
746
+ regex_results: list[dict[str, Any]] = []
747
+
748
+ if "bandit" in tools:
749
+ bandit_results = run_bandit_scan(directory, exclude_patterns, verbose)
750
+
751
+ if "regex" in tools:
752
+ regex_results = run_regex_scan(directory, exclude_patterns, verbose)
753
+
754
+ # Merge and deduplicate
755
+ all_findings = merge_findings(bandit_results, regex_results)
756
+
757
+ # Apply severity filter
758
+ filtered_findings = [
759
+ f for f in all_findings
760
+ if _severity_at_or_above(f["severity"], severity_threshold)
761
+ ]
762
+
763
+ _log(
764
+ f"Total: {len(all_findings)} finding(s), "
765
+ f"{len(filtered_findings)} at or above '{severity_threshold}' severity.",
766
+ verbose,
767
+ )
768
+
769
+ # Generate report
770
+ generate_report(directory, filtered_findings, args.output)
771
+
772
+ # Exit code based on critical/high findings
773
+ has_critical_or_high = any(
774
+ f["severity"] in ("critical", "high") for f in filtered_findings
775
+ )
776
+ sys.exit(1 if has_critical_or_high else 0)
777
+
778
+
779
+ if __name__ == "__main__":
780
+ main()