scan4secrets 2.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (42) hide show
  1. scan4secrets/__init__.py +3 -0
  2. scan4secrets/__main__.py +4 -0
  3. scan4secrets/cli.py +277 -0
  4. scan4secrets/config/rules.yaml +1256 -0
  5. scan4secrets/config/wordlist/CloudProvider-Service.txt +108 -0
  6. scan4secrets/config/wordlist/Docker-Compose-Kubernetes.txt +87 -0
  7. scan4secrets/config/wordlist/Keys-SSH-Certificate.txt +107 -0
  8. scan4secrets/config/wordlist/Node.js-Express-JS.txt +80 -0
  9. scan4secrets/config/wordlist/OtherConfig-CI-DevOps.txt +141 -0
  10. scan4secrets/config/wordlist/Python-Django-Flask.txt +88 -0
  11. scan4secrets/config/wordlist/React-Next.js-Vite-Frontend.txt +94 -0
  12. scan4secrets/config/wordlist/admin-panels.txt +130 -0
  13. scan4secrets/config/wordlist/api-paths.txt +129 -0
  14. scan4secrets/config/wordlist/backup-files.txt +111 -0
  15. scan4secrets/config/wordlist/common.txt +163 -0
  16. scan4secrets/config/wordlist/database-dumps.txt +120 -0
  17. scan4secrets/config/wordlist/env.txt +59 -0
  18. scan4secrets/config/wordlist/php-laravel-symfony-drupal.txt +107 -0
  19. scan4secrets/config/wordlist/wordpress.txt +53 -0
  20. scan4secrets/engine/__init__.py +1 -0
  21. scan4secrets/engine/crawler.py +327 -0
  22. scan4secrets/engine/entropy.py +11 -0
  23. scan4secrets/engine/findings.py +67 -0
  24. scan4secrets/engine/rules.py +139 -0
  25. scan4secrets/engine/scanner.py +126 -0
  26. scan4secrets/engine/sourcemap.py +19 -0
  27. scan4secrets/engine/verifier.py +61 -0
  28. scan4secrets/engine/wordlists.py +54 -0
  29. scan4secrets/reporters/__init__.py +47 -0
  30. scan4secrets/reporters/csv_.py +21 -0
  31. scan4secrets/reporters/excel.py +16 -0
  32. scan4secrets/reporters/html.py +114 -0
  33. scan4secrets/reporters/json_.py +9 -0
  34. scan4secrets/reporters/jsonl.py +12 -0
  35. scan4secrets/reporters/pdf.py +52 -0
  36. scan4secrets/reporters/sarif.py +74 -0
  37. scan4secrets-2.1.0.dist-info/METADATA +293 -0
  38. scan4secrets-2.1.0.dist-info/RECORD +42 -0
  39. scan4secrets-2.1.0.dist-info/WHEEL +5 -0
  40. scan4secrets-2.1.0.dist-info/entry_points.txt +2 -0
  41. scan4secrets-2.1.0.dist-info/licenses/LICENSE +21 -0
  42. scan4secrets-2.1.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,3 @@
1
+ """scan4secrets — DAST + SAST secret scanner."""
2
+
3
+ __version__ = "2.1.0"
@@ -0,0 +1,4 @@
1
+ from scan4secrets.cli import main
2
+
3
+ if __name__ == "__main__":
4
+ raise SystemExit(main())
scan4secrets/cli.py ADDED
@@ -0,0 +1,277 @@
1
+ """scan4secrets v2 CLI.
2
+
3
+ Usage examples:
4
+ scan4secrets --path /code --report sarif json --output reports/run1
5
+ scan4secrets --url https://target.com --threads 32 --verify --report html
6
+ scan4secrets --path /code --rules custom-rules.yaml --fail-on high
7
+ """
8
+
9
+ from __future__ import annotations
10
+ import argparse
11
+ import logging
12
+ import sys
13
+ from pathlib import Path
14
+ from collections import Counter
15
+
16
+ from rich.console import Console
17
+ from rich.table import Table
18
+ from rich import box
19
+
20
+ from scan4secrets import __version__
21
+ from scan4secrets.engine.rules import load_rules
22
+ from scan4secrets.engine.scanner import scan_path, DEFAULT_SKIP_DIRS, DEFAULT_MAX_BYTES
23
+ from scan4secrets.engine.crawler import crawl_and_scan, build_session, normalize_url
24
+ from scan4secrets.engine.wordlists import seed_urls_from_wordlists, seed_urls_from_files, load_wordlists
25
+ from scan4secrets.engine.verifier import verify_findings
26
+ from scan4secrets.engine.findings import Finding, severity_at_least, suppress_generic_when_specific
27
+ from scan4secrets.reporters import write_reports
28
+
29
+
30
+ def _parser() -> argparse.ArgumentParser:
31
+ p = argparse.ArgumentParser(
32
+ prog="scan4secrets",
33
+ description="DAST + SAST secret scanner — verify findings against vendor APIs.",
34
+ )
35
+ p.add_argument("--version", action="version", version=f"scan4secrets {__version__}")
36
+
37
+ src = p.add_argument_group("Input")
38
+ src.add_argument("--path", help="local directory or file (SAST)")
39
+ src.add_argument("--url", help="target URL (DAST)")
40
+ src.add_argument("--stdin", action="store_true", help="read content from stdin")
41
+
42
+ rules = p.add_argument_group("Rules")
43
+ rules.add_argument("--rules", help="path to custom rules.yaml")
44
+ rules.add_argument("--rule-id", nargs="+", help="only run these rule IDs")
45
+ rules.add_argument("--disable-rule", nargs="+", default=[], help="rule IDs to disable")
46
+ rules.add_argument("--entropy-min", type=float, help="override per-rule entropy floor")
47
+
48
+ sast = p.add_argument_group("SAST (filesystem)")
49
+ sast.add_argument("--exclude", nargs="+", default=[], help="extra glob patterns to exclude")
50
+ sast.add_argument("--exclude-dir", nargs="+", default=[], help="extra directory names to skip")
51
+ sast.add_argument("--max-size", default=str(DEFAULT_MAX_BYTES), help="skip files larger than (bytes or e.g. 10M)")
52
+
53
+ dast = p.add_argument_group("DAST (web)")
54
+ dast.add_argument("--threads", type=int, default=16)
55
+ dast.add_argument("--timeout", type=int, default=10)
56
+ dast.add_argument("--max-urls", type=int, default=2000,
57
+ help="max URLs to fetch per run (default 2000; large enough to crawl bundled wordlists + discovered links)")
58
+ dast.add_argument("--max-depth", type=int, default=3)
59
+ dast.add_argument("--user-agent", help="override User-Agent")
60
+ dast.add_argument("--header", action="append", default=[], metavar="K:V", help="add request header (repeatable)")
61
+ dast.add_argument("--cookie", help="cookie string")
62
+ dast.add_argument("--proxy", help="http(s) proxy URL")
63
+ dast.add_argument("--insecure", action="store_true", help="skip TLS verification")
64
+ dast.add_argument("--strict-host", action="store_true", help="restrict crawl to exact start host (default: same eTLD+1)")
65
+ dast.add_argument("--no-sourcemaps", action="store_true", help="skip parsing .js.map files")
66
+ dast.add_argument("--no-js-endpoints", action="store_true", help="skip extracting endpoints from JS sources")
67
+ dast.add_argument("--wordlist", nargs="+", metavar="FILE",
68
+ help="path(s) to user wordlist file(s). When given, REPLACES the bundled wordlists.")
69
+ dast.add_argument("--wordlist-only", nargs="+", metavar="NAME",
70
+ help="restrict bundled wordlists to these stems (e.g. common env wordpress). Ignored if --wordlist is given.")
71
+ dast.add_argument("--no-wordlist", action="store_true",
72
+ help="disable wordlist seeding entirely")
73
+
74
+ verify = p.add_argument_group("Verification")
75
+ verify.add_argument("--verify", action="store_true", help="live-verify findings against vendor APIs")
76
+ verify.add_argument("--verify-timeout", type=int, default=5)
77
+
78
+ out = p.add_argument_group("Output")
79
+ out.add_argument("--report", nargs="+", default=["html", "json"],
80
+ choices=["sarif", "json", "jsonl", "csv", "html", "excel", "pdf"])
81
+ out.add_argument("--output", default="scan4secrets-report",
82
+ help="output base path (extension added per format)")
83
+ out.add_argument("--unsafe-show", action="store_true", help="include raw secrets in reports (DANGEROUS)")
84
+
85
+ log = p.add_argument_group("Logging / CI")
86
+ log.add_argument("--quiet", action="store_true")
87
+ log.add_argument("--verbose", action="store_true")
88
+ log.add_argument("--debug", action="store_true")
89
+ log.add_argument("--no-color", action="store_true")
90
+ log.add_argument("--fail-on", choices=["info","low","medium","high","critical"], default=None,
91
+ help="exit 1 if any finding >= this severity")
92
+ log.add_argument("--keep-generic", action="store_true",
93
+ help="don't suppress generic-rule duplicates when a vendor-specific rule fired on the same value")
94
+ return p
95
+
96
+
97
+ def _parse_size(s: str) -> int:
98
+ s = str(s).strip().upper()
99
+ units = {"K": 1024, "M": 1024**2, "G": 1024**3}
100
+ if s and s[-1] in units:
101
+ return int(float(s[:-1]) * units[s[-1]])
102
+ return int(s)
103
+
104
+
105
+ def _parse_headers(items):
106
+ out = {}
107
+ for it in items:
108
+ if ":" in it:
109
+ k, _, v = it.partition(":")
110
+ out[k.strip()] = v.strip()
111
+ return out
112
+
113
+
114
+ def _filter_rules(rules, ids, disabled, entropy_override):
115
+ if ids:
116
+ wanted = set(ids)
117
+ rules = [r for r in rules if r.id in wanted]
118
+ if disabled:
119
+ bad = set(disabled)
120
+ rules = [r for r in rules if r.id not in bad]
121
+ if entropy_override is not None:
122
+ for r in rules:
123
+ r.entropy_min = entropy_override
124
+ return rules
125
+
126
+
127
+ def _print_summary(findings, console: Console):
128
+ counts = Counter(f.severity for f in findings)
129
+ tbl = Table(title="Scan summary", box=box.SQUARE, show_header=True)
130
+ tbl.add_column("Severity")
131
+ tbl.add_column("Count", justify="right")
132
+ for s in ("critical", "high", "medium", "low", "info"):
133
+ tbl.add_row(s, str(counts.get(s, 0)))
134
+ tbl.add_row("[bold]TOTAL", f"[bold]{len(findings)}")
135
+ console.print(tbl)
136
+
137
+
138
+ def _print_findings(findings, console: Console):
139
+ if not findings:
140
+ return
141
+ tbl = Table(box=box.SIMPLE, show_lines=False, header_style="bold")
142
+ tbl.add_column("SEV", width=8)
143
+ tbl.add_column("Rule", width=28)
144
+ tbl.add_column("V", width=3, justify="center")
145
+ tbl.add_column("Where", overflow="fold")
146
+ tbl.add_column("Secret (redacted)", overflow="fold")
147
+ style = {"critical": "bold red", "high": "red", "medium": "yellow", "low": "cyan", "info": "dim"}
148
+ for f in findings:
149
+ vmark = "[green]Y[/green]" if f.verified is True else "[dim]-[/dim]" if f.verified is None else "[dim]n[/dim]"
150
+ tbl.add_row(
151
+ f"[{style.get(f.severity,'')}]{f.severity}[/]",
152
+ f.rule_id,
153
+ vmark,
154
+ f"{f.file}:{f.line}",
155
+ f.secret_redacted,
156
+ )
157
+ console.print(tbl)
158
+
159
+
160
+ def main(argv=None) -> int:
161
+ args = _parser().parse_args(argv)
162
+
163
+ level = logging.DEBUG if args.debug else logging.INFO if args.verbose else logging.WARNING
164
+ logging.basicConfig(level=level, format="%(levelname)s %(name)s %(message)s")
165
+
166
+ if not (args.path or args.url or args.stdin):
167
+ print("error: --path, --url, or --stdin required", file=sys.stderr)
168
+ return 2
169
+
170
+ console = Console(quiet=args.quiet, no_color=args.no_color)
171
+
172
+ rules = load_rules(args.rules)
173
+ rules = _filter_rules(rules, args.rule_id, args.disable_rule, args.entropy_min)
174
+ if not args.quiet:
175
+ console.print(f"[bold]scan4secrets v{__version__}[/] — {len(rules)} rules loaded")
176
+
177
+ findings = []
178
+
179
+ if args.stdin:
180
+ from scan4secrets.engine.scanner import scan_text
181
+ from scan4secrets.engine.rules import KeywordIndex
182
+ idx = KeywordIndex(rules)
183
+ text = sys.stdin.read()
184
+ findings.extend(scan_text(text, "<stdin>", rules, idx))
185
+
186
+ if args.path:
187
+ if not args.quiet:
188
+ console.print(f"[bold cyan]SAST[/] scanning {args.path}")
189
+ exclude_dirs = DEFAULT_SKIP_DIRS | set(args.exclude_dir)
190
+ findings.extend(scan_path(
191
+ Path(args.path), rules,
192
+ exclude_dirs=exclude_dirs,
193
+ exclude_globs=args.exclude,
194
+ max_bytes=_parse_size(args.max_size),
195
+ ))
196
+
197
+ if args.url:
198
+ if not args.quiet:
199
+ console.print(f"[bold cyan]DAST[/] crawling {args.url}")
200
+ session = build_session(
201
+ user_agent=args.user_agent or None,
202
+ headers=_parse_headers(args.header),
203
+ cookie=args.cookie,
204
+ proxy=args.proxy,
205
+ verify_tls=not args.insecure,
206
+ timeout=args.timeout,
207
+ )
208
+ extra_seeds = []
209
+ if args.no_wordlist:
210
+ scope_label = "disabled"
211
+ elif args.wordlist:
212
+ extra_seeds = seed_urls_from_files(normalize_url(args.url), args.wordlist)
213
+ scope_label = f"user:{','.join(args.wordlist)}"
214
+ elif args.wordlist_only:
215
+ extra_seeds = seed_urls_from_wordlists(normalize_url(args.url), only=args.wordlist_only)
216
+ scope_label = f"bundled:{','.join(args.wordlist_only)}"
217
+ else:
218
+ extra_seeds = seed_urls_from_wordlists(normalize_url(args.url))
219
+ scope_label = "bundled:all"
220
+ if not args.quiet and not args.no_wordlist:
221
+ console.print(f"[dim]wordlist ({scope_label}) seeded {len(extra_seeds)} candidate URLs[/]")
222
+ findings.extend(crawl_and_scan(
223
+ normalize_url(args.url), rules,
224
+ session=session,
225
+ max_urls=args.max_urls,
226
+ max_depth=args.max_depth,
227
+ threads=args.threads,
228
+ timeout=args.timeout,
229
+ strict_host=args.strict_host,
230
+ parse_sourcemaps=not args.no_sourcemaps,
231
+ extract_js_endpoints=not args.no_js_endpoints,
232
+ extra_seeds=extra_seeds,
233
+ ))
234
+
235
+ # dedupe across SAST + DAST
236
+ seen = set()
237
+ deduped = []
238
+ for f in findings:
239
+ if f.dedup_key() in seen:
240
+ continue
241
+ seen.add(f.dedup_key())
242
+ deduped.append(f)
243
+ findings = deduped
244
+
245
+ # noise reduction: suppress generic rules when a specific vendor rule fired on the same value
246
+ if not args.keep_generic:
247
+ before = len(findings)
248
+ findings = suppress_generic_when_specific(findings)
249
+ if before > len(findings) and not args.quiet:
250
+ console.print(f"[dim]suppressed {before - len(findings)} generic-rule duplicates[/]")
251
+
252
+ if args.verify and findings:
253
+ if not args.quiet:
254
+ console.print(f"[bold]Verifying[/] {sum(1 for f in findings if any(r.id == f.rule_id and r.verify for r in rules))} candidates...")
255
+ verify_findings(findings, rules, timeout=args.verify_timeout)
256
+
257
+ if not args.quiet:
258
+ _print_findings(findings, console)
259
+ _print_summary(findings, console)
260
+
261
+ if findings:
262
+ out_base = Path(args.output)
263
+ out_base.parent.mkdir(parents=True, exist_ok=True)
264
+ written = write_reports(findings, out_base, args.report, unsafe_show=args.unsafe_show)
265
+ if not args.quiet:
266
+ for fmt, p in written.items():
267
+ console.print(f"[green]+[/] {fmt.upper():6s} -> {p}")
268
+ elif not args.quiet:
269
+ console.print("[green]No secrets found.[/]")
270
+
271
+ if args.fail_on and any(severity_at_least(f.severity, args.fail_on) for f in findings):
272
+ return 1
273
+ return 0
274
+
275
+
276
+ if __name__ == "__main__":
277
+ raise SystemExit(main())