scan4secrets 2.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (48) hide show
  1. scan4secrets-2.1.0/LICENSE +21 -0
  2. scan4secrets-2.1.0/PKG-INFO +293 -0
  3. scan4secrets-2.1.0/README.md +244 -0
  4. scan4secrets-2.1.0/pyproject.toml +64 -0
  5. scan4secrets-2.1.0/scan4secrets/__init__.py +3 -0
  6. scan4secrets-2.1.0/scan4secrets/__main__.py +4 -0
  7. scan4secrets-2.1.0/scan4secrets/cli.py +277 -0
  8. scan4secrets-2.1.0/scan4secrets/config/rules.yaml +1256 -0
  9. scan4secrets-2.1.0/scan4secrets/config/wordlist/CloudProvider-Service.txt +108 -0
  10. scan4secrets-2.1.0/scan4secrets/config/wordlist/Docker-Compose-Kubernetes.txt +87 -0
  11. scan4secrets-2.1.0/scan4secrets/config/wordlist/Keys-SSH-Certificate.txt +107 -0
  12. scan4secrets-2.1.0/scan4secrets/config/wordlist/Node.js-Express-JS.txt +80 -0
  13. scan4secrets-2.1.0/scan4secrets/config/wordlist/OtherConfig-CI-DevOps.txt +141 -0
  14. scan4secrets-2.1.0/scan4secrets/config/wordlist/Python-Django-Flask.txt +88 -0
  15. scan4secrets-2.1.0/scan4secrets/config/wordlist/React-Next.js-Vite-Frontend.txt +94 -0
  16. scan4secrets-2.1.0/scan4secrets/config/wordlist/admin-panels.txt +130 -0
  17. scan4secrets-2.1.0/scan4secrets/config/wordlist/api-paths.txt +129 -0
  18. scan4secrets-2.1.0/scan4secrets/config/wordlist/backup-files.txt +111 -0
  19. scan4secrets-2.1.0/scan4secrets/config/wordlist/common.txt +163 -0
  20. scan4secrets-2.1.0/scan4secrets/config/wordlist/database-dumps.txt +120 -0
  21. scan4secrets-2.1.0/scan4secrets/config/wordlist/env.txt +59 -0
  22. scan4secrets-2.1.0/scan4secrets/config/wordlist/php-laravel-symfony-drupal.txt +107 -0
  23. scan4secrets-2.1.0/scan4secrets/config/wordlist/wordpress.txt +53 -0
  24. scan4secrets-2.1.0/scan4secrets/engine/__init__.py +1 -0
  25. scan4secrets-2.1.0/scan4secrets/engine/crawler.py +327 -0
  26. scan4secrets-2.1.0/scan4secrets/engine/entropy.py +11 -0
  27. scan4secrets-2.1.0/scan4secrets/engine/findings.py +67 -0
  28. scan4secrets-2.1.0/scan4secrets/engine/rules.py +139 -0
  29. scan4secrets-2.1.0/scan4secrets/engine/scanner.py +126 -0
  30. scan4secrets-2.1.0/scan4secrets/engine/sourcemap.py +19 -0
  31. scan4secrets-2.1.0/scan4secrets/engine/verifier.py +61 -0
  32. scan4secrets-2.1.0/scan4secrets/engine/wordlists.py +54 -0
  33. scan4secrets-2.1.0/scan4secrets/reporters/__init__.py +47 -0
  34. scan4secrets-2.1.0/scan4secrets/reporters/csv_.py +21 -0
  35. scan4secrets-2.1.0/scan4secrets/reporters/excel.py +16 -0
  36. scan4secrets-2.1.0/scan4secrets/reporters/html.py +114 -0
  37. scan4secrets-2.1.0/scan4secrets/reporters/json_.py +9 -0
  38. scan4secrets-2.1.0/scan4secrets/reporters/jsonl.py +12 -0
  39. scan4secrets-2.1.0/scan4secrets/reporters/pdf.py +52 -0
  40. scan4secrets-2.1.0/scan4secrets/reporters/sarif.py +74 -0
  41. scan4secrets-2.1.0/scan4secrets.egg-info/PKG-INFO +293 -0
  42. scan4secrets-2.1.0/scan4secrets.egg-info/SOURCES.txt +46 -0
  43. scan4secrets-2.1.0/scan4secrets.egg-info/dependency_links.txt +1 -0
  44. scan4secrets-2.1.0/scan4secrets.egg-info/entry_points.txt +2 -0
  45. scan4secrets-2.1.0/scan4secrets.egg-info/requires.txt +15 -0
  46. scan4secrets-2.1.0/scan4secrets.egg-info/top_level.txt +1 -0
  47. scan4secrets-2.1.0/setup.cfg +4 -0
  48. scan4secrets-2.1.0/tests/test_rules.py +182 -0
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 Madhurendra kumar
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,293 @@
1
+ Metadata-Version: 2.4
2
+ Name: scan4secrets
3
+ Version: 2.1.0
4
+ Summary: DAST + SAST secret scanner with live verification, source-map parsing, and CI-native reporting
5
+ Author: M14R41
6
+ License: MIT
7
+ Project-URL: Homepage, https://scan4secrets.m14r41.in
8
+ Project-URL: Documentation, https://scan4secrets.m14r41.in/docs/intro
9
+ Project-URL: Repository, https://github.com/m14r41/scan4secrets
10
+ Project-URL: Issues, https://github.com/m14r41/scan4secrets/issues
11
+ Project-URL: Changelog, https://scan4secrets.m14r41.in/docs/changelog
12
+ Project-URL: Downloads, https://github.com/m14r41/scan4secrets/releases
13
+ Keywords: security,secrets,scanner,sast,dast,credentials,bug-bounty
14
+ Classifier: Development Status :: 4 - Beta
15
+ Classifier: Environment :: Console
16
+ Classifier: Intended Audience :: Information Technology
17
+ Classifier: Intended Audience :: System Administrators
18
+ Classifier: Intended Audience :: Developers
19
+ Classifier: License :: OSI Approved :: MIT License
20
+ Classifier: Operating System :: OS Independent
21
+ Classifier: Programming Language :: Python
22
+ Classifier: Programming Language :: Python :: 3
23
+ Classifier: Programming Language :: Python :: 3.9
24
+ Classifier: Programming Language :: Python :: 3.10
25
+ Classifier: Programming Language :: Python :: 3.11
26
+ Classifier: Programming Language :: Python :: 3.12
27
+ Classifier: Programming Language :: Python :: 3.13
28
+ Classifier: Topic :: Security
29
+ Classifier: Topic :: Software Development :: Quality Assurance
30
+ Classifier: Topic :: System :: Monitoring
31
+ Requires-Python: >=3.9
32
+ Description-Content-Type: text/markdown
33
+ License-File: LICENSE
34
+ Requires-Dist: requests>=2.31
35
+ Requires-Dist: beautifulsoup4>=4.12
36
+ Requires-Dist: PyYAML>=6.0
37
+ Requires-Dist: pyahocorasick>=2.0
38
+ Requires-Dist: rich>=13.0
39
+ Requires-Dist: colorama>=0.4
40
+ Requires-Dist: pandas>=2.0
41
+ Requires-Dist: openpyxl>=3.1
42
+ Requires-Dist: fpdf2>=2.7
43
+ Requires-Dist: tldextract>=5.0
44
+ Provides-Extra: dev
45
+ Requires-Dist: pytest>=7; extra == "dev"
46
+ Requires-Dist: ruff>=0.4; extra == "dev"
47
+ Requires-Dist: mypy>=1.8; extra == "dev"
48
+ Dynamic: license-file
49
+
50
+ # scan4secrets
51
+
52
+ **DAST + SAST secret scanner with live verification, source-map parsing, and CI-native reporting.**
53
+
54
+ Find leaked credentials in source trees, running web apps, and CI logs. Verify them live against vendor APIs. Output SARIF for code-scanning dashboards, JSONL for SOAR pipelines, or Excel/PDF/HTML for client reports.
55
+
56
+ ---
57
+
58
+ ## Why scan4secrets
59
+
60
+ The crowded landscape (`gitleaks`, `trufflehog`, `detect-secrets`) is great at SAST on git trees but stops there. **scan4secrets fills the gaps they don't cover**:
61
+
62
+ | Capability | gitleaks | trufflehog | detect-secrets | **scan4secrets** |
63
+ |---|:---:|:---:|:---:|:---:|
64
+ | SAST secret detection | Y | Y | Y | Y |
65
+ | **DAST live web crawl** | - | - | - | Y |
66
+ | **JS source-map parsing** | - | - | - | Y |
67
+ | **JS endpoint extraction** | - | - | - | Y |
68
+ | **HTTP-header secret scan** | - | - | - | Y |
69
+ | Live token verification | - | Y | - | Y |
70
+ | SARIF output | Y | - | - | Y |
71
+ | **Excel / PDF / HTML reports** | - | - | - | Y |
72
+ | Entropy gate + allowlist | Y | Y | Y | Y |
73
+ | YAML rules schema | - (TOML) | - | - | Y |
74
+ | Authenticated DAST (cookie/header/proxy) | n/a | n/a | n/a | Y |
75
+
76
+ It is a **complement to gitleaks**, not a replacement. Use both: gitleaks in pre-commit + CI for git-history SAST, scan4secrets for live DAST against staging/production.
77
+
78
+ ---
79
+
80
+ ## Install
81
+
82
+ ```bash
83
+ # from source
84
+ git clone https://github.com/m14r41/scan4secrets
85
+ cd scan4secrets
86
+ pip install -e .
87
+
88
+ # OR via pipx
89
+ pipx install git+https://github.com/m14r41/scan4secrets
90
+
91
+ # OR Docker
92
+ docker run --rm -v $(pwd):/scan ghcr.io/m14r41/scan4secrets:latest --path /scan
93
+ ```
94
+
95
+ After install, the `scan4secrets` command is on your PATH.
96
+
97
+ ---
98
+
99
+ ## Quick start
100
+
101
+ ```bash
102
+ # SAST: scan a local directory
103
+ scan4secrets --path /code
104
+
105
+ # DAST: crawl a live target
106
+ scan4secrets --url https://staging.example.com --threads 32
107
+
108
+ # DAST runs ALL bundled wordlists by default (1279 paths: /.env, /wp-config.php, /backup.zip, ...)
109
+ scan4secrets --url https://target.com
110
+
111
+ # Use YOUR OWN wordlist file (replaces the bundled set)
112
+ scan4secrets --url https://target.com --wordlist /path/to/my-paths.txt
113
+
114
+ # Combine multiple custom wordlist files
115
+ scan4secrets --url https://target.com --wordlist seclists/Common.txt internal-paths.txt
116
+
117
+ # Restrict to specific bundled wordlists by stem
118
+ scan4secrets --url https://wp.example.com --wordlist-only wordpress common env
119
+
120
+ # Turn wordlist seeding off entirely (only follow live links)
121
+ scan4secrets --url https://target.com --no-wordlist
122
+
123
+ # Full audit with verification + HTML report
124
+ scan4secrets --path . --url https://app.example.com \
125
+ --verify --report html sarif json \
126
+ --output reports/audit-$(date +%F)
127
+
128
+ # Authenticated DAST with proxy (works with Burp / ZAP)
129
+ scan4secrets --url https://app.example.com \
130
+ --cookie "session=abc123" \
131
+ --header "X-Tenant: acme" \
132
+ --proxy http://127.0.0.1:8080
133
+
134
+ # CI gate (exit 1 if anything >= high)
135
+ scan4secrets --path . --report sarif --fail-on high \
136
+ --output reports/scan
137
+ ```
138
+
139
+ ---
140
+
141
+ ## What it detects
142
+
143
+ 170+ rules covering:
144
+
145
+ - **Cloud:** AWS, GCP, Azure, DigitalOcean, Heroku, Linode, Vultr, Hetzner, Alibaba, IBM Cloud, Oracle Cloud, Render, Vercel, Netlify, Fly.io
146
+ - **CDN / edge:** Cloudflare (API token + Origin CA), Fastly, Cloudinary, Akamai EdgeGrid, BunnyCDN
147
+ - **Source control:** GitHub (classic / fine-grained / OAuth / App / refresh / deploy key), GitLab, Bitbucket
148
+ - **CI/CD:** CircleCI, Travis, Buildkite, Jenkins, ArgoCD, Pulumi, Snyk, Doppler
149
+ - **Payments:** Stripe, Square, PayPal/Braintree, Razorpay, Plaid, Adyen, Paddle, LemonSqueezy, Coinbase, Binance
150
+ - **E-commerce:** Shopify (private app / shared secret / custom app / partner), WooCommerce REST
151
+ - **Messaging:** Slack (5 token types + webhook), Discord (bot + webhook), Twilio, Telegram, Microsoft Teams webhook, Zoom JWT, Vonage/Nexmo
152
+ - **SMS / carriers:** MessageBird, Plivo
153
+ - **AI/ML:** OpenAI, Anthropic, Hugging Face, Replicate, Cohere, Pinecone, Mistral, Groq, Perplexity, DeepL, AssemblyAI, ElevenLabs, Stability AI
154
+ - **Email / marketing:** SendGrid, Mailgun, Mailchimp, Postmark, Resend, Mailjet, Klaviyo, ConvertKit, Customer.io
155
+ - **Monitoring:** Datadog, Sentry (DSN + org-auth-token), New Relic, Grafana (service-account + Cloud), LaunchDarkly (SDK + mobile), Honeycomb, Rollbar, Bugsnag, Splunk HEC, PagerDuty
156
+ - **DevOps / registries:** Docker Hub, Docker registry auth, NPM, PyPI, RubyGems, crates.io, JFrog Artifactory, Terraform Cloud, HashiCorp Vault, HashiCorp Cloud
157
+ - **Auth / identity:** Auth0, Okta, Clerk, WorkOS, Stytch, Atlassian / Jira, Frontegg, Keycloak
158
+ - **Productivity SaaS:** Notion, Linear, Airtable, Asana, ClickUp, Typeform, Calendly, Zendesk, Intercom
159
+ - **Mobile / push:** Firebase Cloud Messaging, Expo, OneSignal, Microsoft AppCenter
160
+ - **Data / ML platforms:** Databricks, Snowflake, Algolia
161
+ - **Mapping:** Mapbox (pk / sk), HERE Maps
162
+ - **Blockchain / Web3:** Infura, Alchemy, Etherscan, WalletConnect, QuickNode
163
+ - **Storage:** Backblaze B2 (KeyID + appKey)
164
+ - **Networking / VPN:** Tailscale (auth + API)
165
+ - **QA / browser testing:** BrowserStack, Sauce Labs, Percy
166
+ - **Connection strings:** PostgreSQL, MySQL, MongoDB (incl. srv), Redis, AMQP
167
+ - **Webhooks:** Zapier, IFTTT, Meta / Facebook Graph
168
+ - **Auth tokens:** JWT, HTTP Basic in URLs
169
+ - **Crypto:** RSA / EC / OPENSSH / PGP private keys, SSH public keys, Cloudflare Origin CA, GitHub deploy keys
170
+ - **Contextual fallbacks:** quoted/unquoted high-entropy strings, hex tokens, UUIDs near credential names
171
+
172
+ See [docs/RULES.md](docs/RULES.md) for the full reference and how to add custom rules.
173
+
174
+ ---
175
+
176
+ ## Live verification
177
+
178
+ With `--verify`, scan4secrets makes one HTTP request per detected token to the vendor API to confirm whether the credential is still **live**:
179
+
180
+ | Rule | Probe | Success |
181
+ |---|---|---|
182
+ | `github-pat-classic` / `github-pat-fine-grained` | `GET https://api.github.com/user` | HTTP 200 |
183
+ | `stripe-secret-live` | `GET https://api.stripe.com/v1/charges?limit=1` | HTTP 200 |
184
+ | `slack-bot-token` | `POST https://slack.com/api/auth.test` | HTTP 200 |
185
+ | `openai-key` | `GET https://api.openai.com/v1/models` | HTTP 200 |
186
+
187
+ Each finding gets `verified=true|false|null` in every output format. A verified token is incident-grade evidence; an unverified one is a hypothesis.
188
+
189
+ See [docs/VERIFICATION.md](docs/VERIFICATION.md) for the full vendor list and how to add probes.
190
+
191
+ ---
192
+
193
+ ## Reports
194
+
195
+ ```bash
196
+ scan4secrets --path . --report sarif json jsonl csv html excel pdf --output reports/run
197
+ ```
198
+
199
+ | Format | Best for |
200
+ |---|---|
201
+ | `sarif` | GitHub Code Scanning, GitLab Security Dashboard, Sonar, Defect Dojo |
202
+ | `json` | Tooling integrations, post-processing |
203
+ | `jsonl` | SIEM/SOAR pipelines (Splunk, Datadog, Sentinel) |
204
+ | `csv` | Spreadsheet triage |
205
+ | `html` | Sortable / filterable / colored UI for client review |
206
+ | `excel` | Pivot tables and exec summaries |
207
+ | `pdf` | Compliance evidence packets |
208
+
209
+ Secrets are **redacted by default** (`abcd****wxyz`). Use `--unsafe-show` only when reports are stored securely.
210
+
211
+ ---
212
+
213
+ ## DAST details
214
+
215
+ The crawler:
216
+
217
+ 1. Honors **scope** (same eTLD+1 by default; `--strict-host` for exact host)
218
+ 2. Runs **concurrently** (`--threads N`, default 16)
219
+ 3. Sends a custom **User-Agent**, optional **headers**, **cookies**, and routes through your **proxy** (Burp / ZAP friendly)
220
+ 4. Parses **`.js.map`** files and scans every embedded source (catches secrets hidden inside production source maps that no SAST sees)
221
+ 5. Extracts **string-literal endpoints** from `.js` files and probes them
222
+ 6. Scans **response headers** as well as body
223
+ 7. **Path-guess wordlists are ON by default** — every DAST run seeds 1279 sensitive paths (`.env`, `.git/config`, `wp-config.php`, `phpinfo.php`, `backup.zip`, `composer.json`, source maps, admin panels, API docs, ...). Restrict with `--wordlist-only NAME ...` or disable with `--no-wordlist`.
224
+ 8. Caps at `--max-urls` and `--max-depth` so you can't accidentally DoS a target
225
+
226
+ Wordlists are stack-specific: `common`, `env`, `wordpress`, `php-laravel-symfony-drupal`, `Python-Django-Flask`, `Node.js-Express-JS`, `React-Next.js-Vite-Frontend`, `Docker-Compose-Kubernetes`, `CloudProvider-Service`, `Keys-SSH-Certificate`, `OtherConfig-CI-DevOps`, `backup-files`, `admin-panels`, `api-paths`, `database-dumps`. Use `--wordlist-only NAME ...` to restrict to specific stems.
227
+
228
+ ---
229
+
230
+ ## CI / pre-commit
231
+
232
+ `.pre-commit-hooks.yaml` is shipped:
233
+
234
+ ```yaml
235
+ repos:
236
+ - repo: https://github.com/m14r41/scan4secrets
237
+ rev: v2.1.0
238
+ hooks:
239
+ - id: scan4secrets
240
+ ```
241
+
242
+ GitHub Actions:
243
+
244
+ ```yaml
245
+ - uses: actions/checkout@v4
246
+ - run: pip install scan4secrets
247
+ - run: scan4secrets --path . --report sarif --output results --fail-on high
248
+ - uses: github/codeql-action/upload-sarif@v3
249
+ if: always()
250
+ with: { sarif_file: results.sarif }
251
+ ```
252
+
253
+ ---
254
+
255
+ ## Documentation
256
+
257
+ - [docs/ARCHITECTURE.md](docs/ARCHITECTURE.md) — package layout, data flow, extension points
258
+ - [docs/RULES.md](docs/RULES.md) — rule schema, examples, writing custom rules
259
+ - [docs/VERIFICATION.md](docs/VERIFICATION.md) — how live verification works, adding new vendors
260
+ - [docs/CHANGELOG.md](docs/CHANGELOG.md) — what's new in v2 vs v1
261
+ - [docs/GAP_ANALYSIS.md](docs/GAP_ANALYSIS.md) — empirical comparison vs v1 and gitleaks
262
+
263
+ ---
264
+
265
+ ## Benchmark
266
+
267
+ Tested on [Plazmaz/leaky-repo](https://github.com/Plazmaz/leaky-repo) (seeded with real-format secrets) and on [expressjs/express](https://github.com/expressjs/express) (clean OSS code).
268
+
269
+ | Tool | leaky-repo (TPs found) | benign express (FPs) |
270
+ |---|---:|---:|
271
+ | scan4secrets v1 | 35 (~22 TPs, ~13 FPs) | **27** |
272
+ | gitleaks | 22 | 0 |
273
+ | **scan4secrets v2** | **23** (all TPs, incl. SSH/PEM/Docker keys v1 missed) | **0** |
274
+
275
+ v2 has 0% FP rate on benign code (vs v1's ~13% per-file rate) and captures the high-value secret classes (private keys, Docker registry auth) that v1 was structurally incapable of detecting.
276
+
277
+ ---
278
+
279
+ ## Contributing
280
+
281
+ - Add a rule: edit `scan4secrets/config/rules.yaml`
282
+ - Add a verifier: extend the `verify:` block in the rule
283
+ - Add a reporter: drop a module under `scan4secrets/reporters/` and register in `__init__.py`
284
+
285
+ Run tests: `pytest -q` (planted-secret fixtures under `tests/fixtures/`)
286
+
287
+ ---
288
+
289
+ ## License
290
+
291
+ MIT — see [LICENSE](LICENSE).
292
+
293
+ Built by [@M14R41](https://github.com/m14r41).
@@ -0,0 +1,244 @@
1
+ # scan4secrets
2
+
3
+ **DAST + SAST secret scanner with live verification, source-map parsing, and CI-native reporting.**
4
+
5
+ Find leaked credentials in source trees, running web apps, and CI logs. Verify them live against vendor APIs. Output SARIF for code-scanning dashboards, JSONL for SOAR pipelines, or Excel/PDF/HTML for client reports.
6
+
7
+ ---
8
+
9
+ ## Why scan4secrets
10
+
11
+ The crowded landscape (`gitleaks`, `trufflehog`, `detect-secrets`) is great at SAST on git trees but stops there. **scan4secrets fills the gaps they don't cover**:
12
+
13
+ | Capability | gitleaks | trufflehog | detect-secrets | **scan4secrets** |
14
+ |---|:---:|:---:|:---:|:---:|
15
+ | SAST secret detection | Y | Y | Y | Y |
16
+ | **DAST live web crawl** | - | - | - | Y |
17
+ | **JS source-map parsing** | - | - | - | Y |
18
+ | **JS endpoint extraction** | - | - | - | Y |
19
+ | **HTTP-header secret scan** | - | - | - | Y |
20
+ | Live token verification | - | Y | - | Y |
21
+ | SARIF output | Y | - | - | Y |
22
+ | **Excel / PDF / HTML reports** | - | - | - | Y |
23
+ | Entropy gate + allowlist | Y | Y | Y | Y |
24
+ | YAML rules schema | - (TOML) | - | - | Y |
25
+ | Authenticated DAST (cookie/header/proxy) | n/a | n/a | n/a | Y |
26
+
27
+ It is a **complement to gitleaks**, not a replacement. Use both: gitleaks in pre-commit + CI for git-history SAST, scan4secrets for live DAST against staging/production.
28
+
29
+ ---
30
+
31
+ ## Install
32
+
33
+ ```bash
34
+ # from source
35
+ git clone https://github.com/m14r41/scan4secrets
36
+ cd scan4secrets
37
+ pip install -e .
38
+
39
+ # OR via pipx
40
+ pipx install git+https://github.com/m14r41/scan4secrets
41
+
42
+ # OR Docker
43
+ docker run --rm -v $(pwd):/scan ghcr.io/m14r41/scan4secrets:latest --path /scan
44
+ ```
45
+
46
+ After install, the `scan4secrets` command is on your PATH.
47
+
48
+ ---
49
+
50
+ ## Quick start
51
+
52
+ ```bash
53
+ # SAST: scan a local directory
54
+ scan4secrets --path /code
55
+
56
+ # DAST: crawl a live target
57
+ scan4secrets --url https://staging.example.com --threads 32
58
+
59
+ # DAST runs ALL bundled wordlists by default (1279 paths: /.env, /wp-config.php, /backup.zip, ...)
60
+ scan4secrets --url https://target.com
61
+
62
+ # Use YOUR OWN wordlist file (replaces the bundled set)
63
+ scan4secrets --url https://target.com --wordlist /path/to/my-paths.txt
64
+
65
+ # Combine multiple custom wordlist files
66
+ scan4secrets --url https://target.com --wordlist seclists/Common.txt internal-paths.txt
67
+
68
+ # Restrict to specific bundled wordlists by stem
69
+ scan4secrets --url https://wp.example.com --wordlist-only wordpress common env
70
+
71
+ # Turn wordlist seeding off entirely (only follow live links)
72
+ scan4secrets --url https://target.com --no-wordlist
73
+
74
+ # Full audit with verification + HTML report
75
+ scan4secrets --path . --url https://app.example.com \
76
+ --verify --report html sarif json \
77
+ --output reports/audit-$(date +%F)
78
+
79
+ # Authenticated DAST with proxy (works with Burp / ZAP)
80
+ scan4secrets --url https://app.example.com \
81
+ --cookie "session=abc123" \
82
+ --header "X-Tenant: acme" \
83
+ --proxy http://127.0.0.1:8080
84
+
85
+ # CI gate (exit 1 if anything >= high)
86
+ scan4secrets --path . --report sarif --fail-on high \
87
+ --output reports/scan
88
+ ```
89
+
90
+ ---
91
+
92
+ ## What it detects
93
+
94
+ 170+ rules covering:
95
+
96
+ - **Cloud:** AWS, GCP, Azure, DigitalOcean, Heroku, Linode, Vultr, Hetzner, Alibaba, IBM Cloud, Oracle Cloud, Render, Vercel, Netlify, Fly.io
97
+ - **CDN / edge:** Cloudflare (API token + Origin CA), Fastly, Cloudinary, Akamai EdgeGrid, BunnyCDN
98
+ - **Source control:** GitHub (classic / fine-grained / OAuth / App / refresh / deploy key), GitLab, Bitbucket
99
+ - **CI/CD:** CircleCI, Travis, Buildkite, Jenkins, ArgoCD, Pulumi, Snyk, Doppler
100
+ - **Payments:** Stripe, Square, PayPal/Braintree, Razorpay, Plaid, Adyen, Paddle, LemonSqueezy, Coinbase, Binance
101
+ - **E-commerce:** Shopify (private app / shared secret / custom app / partner), WooCommerce REST
102
+ - **Messaging:** Slack (5 token types + webhook), Discord (bot + webhook), Twilio, Telegram, Microsoft Teams webhook, Zoom JWT, Vonage/Nexmo
103
+ - **SMS / carriers:** MessageBird, Plivo
104
+ - **AI/ML:** OpenAI, Anthropic, Hugging Face, Replicate, Cohere, Pinecone, Mistral, Groq, Perplexity, DeepL, AssemblyAI, ElevenLabs, Stability AI
105
+ - **Email / marketing:** SendGrid, Mailgun, Mailchimp, Postmark, Resend, Mailjet, Klaviyo, ConvertKit, Customer.io
106
+ - **Monitoring:** Datadog, Sentry (DSN + org-auth-token), New Relic, Grafana (service-account + Cloud), LaunchDarkly (SDK + mobile), Honeycomb, Rollbar, Bugsnag, Splunk HEC, PagerDuty
107
+ - **DevOps / registries:** Docker Hub, Docker registry auth, NPM, PyPI, RubyGems, crates.io, JFrog Artifactory, Terraform Cloud, HashiCorp Vault, HashiCorp Cloud
108
+ - **Auth / identity:** Auth0, Okta, Clerk, WorkOS, Stytch, Atlassian / Jira, Frontegg, Keycloak
109
+ - **Productivity SaaS:** Notion, Linear, Airtable, Asana, ClickUp, Typeform, Calendly, Zendesk, Intercom
110
+ - **Mobile / push:** Firebase Cloud Messaging, Expo, OneSignal, Microsoft AppCenter
111
+ - **Data / ML platforms:** Databricks, Snowflake, Algolia
112
+ - **Mapping:** Mapbox (pk / sk), HERE Maps
113
+ - **Blockchain / Web3:** Infura, Alchemy, Etherscan, WalletConnect, QuickNode
114
+ - **Storage:** Backblaze B2 (KeyID + appKey)
115
+ - **Networking / VPN:** Tailscale (auth + API)
116
+ - **QA / browser testing:** BrowserStack, Sauce Labs, Percy
117
+ - **Connection strings:** PostgreSQL, MySQL, MongoDB (incl. srv), Redis, AMQP
118
+ - **Webhooks:** Zapier, IFTTT, Meta / Facebook Graph
119
+ - **Auth tokens:** JWT, HTTP Basic in URLs
120
+ - **Crypto:** RSA / EC / OPENSSH / PGP private keys, SSH public keys, Cloudflare Origin CA, GitHub deploy keys
121
+ - **Contextual fallbacks:** quoted/unquoted high-entropy strings, hex tokens, UUIDs near credential names
122
+
123
+ See [docs/RULES.md](docs/RULES.md) for the full reference and how to add custom rules.
124
+
125
+ ---
126
+
127
+ ## Live verification
128
+
129
+ With `--verify`, scan4secrets makes one HTTP request per detected token to the vendor API to confirm whether the credential is still **live**:
130
+
131
+ | Rule | Probe | Success |
132
+ |---|---|---|
133
+ | `github-pat-classic` / `github-pat-fine-grained` | `GET https://api.github.com/user` | HTTP 200 |
134
+ | `stripe-secret-live` | `GET https://api.stripe.com/v1/charges?limit=1` | HTTP 200 |
135
+ | `slack-bot-token` | `POST https://slack.com/api/auth.test` | HTTP 200 |
136
+ | `openai-key` | `GET https://api.openai.com/v1/models` | HTTP 200 |
137
+
138
+ Each finding gets `verified=true|false|null` in every output format. A verified token is incident-grade evidence; an unverified one is a hypothesis.
139
+
140
+ See [docs/VERIFICATION.md](docs/VERIFICATION.md) for the full vendor list and how to add probes.
141
+
142
+ ---
143
+
144
+ ## Reports
145
+
146
+ ```bash
147
+ scan4secrets --path . --report sarif json jsonl csv html excel pdf --output reports/run
148
+ ```
149
+
150
+ | Format | Best for |
151
+ |---|---|
152
+ | `sarif` | GitHub Code Scanning, GitLab Security Dashboard, Sonar, Defect Dojo |
153
+ | `json` | Tooling integrations, post-processing |
154
+ | `jsonl` | SIEM/SOAR pipelines (Splunk, Datadog, Sentinel) |
155
+ | `csv` | Spreadsheet triage |
156
+ | `html` | Sortable / filterable / colored UI for client review |
157
+ | `excel` | Pivot tables and exec summaries |
158
+ | `pdf` | Compliance evidence packets |
159
+
160
+ Secrets are **redacted by default** (`abcd****wxyz`). Use `--unsafe-show` only when reports are stored securely.
161
+
162
+ ---
163
+
164
+ ## DAST details
165
+
166
+ The crawler:
167
+
168
+ 1. Honors **scope** (same eTLD+1 by default; `--strict-host` for exact host)
169
+ 2. Runs **concurrently** (`--threads N`, default 16)
170
+ 3. Sends a custom **User-Agent**, optional **headers**, **cookies**, and routes through your **proxy** (Burp / ZAP friendly)
171
+ 4. Parses **`.js.map`** files and scans every embedded source (catches secrets hidden inside production source maps that no SAST sees)
172
+ 5. Extracts **string-literal endpoints** from `.js` files and probes them
173
+ 6. Scans **response headers** as well as body
174
+ 7. **Path-guess wordlists are ON by default** — every DAST run seeds 1279 sensitive paths (`.env`, `.git/config`, `wp-config.php`, `phpinfo.php`, `backup.zip`, `composer.json`, source maps, admin panels, API docs, ...). Restrict with `--wordlist-only NAME ...` or disable with `--no-wordlist`.
175
+ 8. Caps at `--max-urls` and `--max-depth` so you can't accidentally DoS a target
176
+
177
+ Wordlists are stack-specific: `common`, `env`, `wordpress`, `php-laravel-symfony-drupal`, `Python-Django-Flask`, `Node.js-Express-JS`, `React-Next.js-Vite-Frontend`, `Docker-Compose-Kubernetes`, `CloudProvider-Service`, `Keys-SSH-Certificate`, `OtherConfig-CI-DevOps`, `backup-files`, `admin-panels`, `api-paths`, `database-dumps`. Use `--wordlist-only NAME ...` to restrict to specific stems.
178
+
179
+ ---
180
+
181
+ ## CI / pre-commit
182
+
183
+ `.pre-commit-hooks.yaml` is shipped:
184
+
185
+ ```yaml
186
+ repos:
187
+ - repo: https://github.com/m14r41/scan4secrets
188
+ rev: v2.1.0
189
+ hooks:
190
+ - id: scan4secrets
191
+ ```
192
+
193
+ GitHub Actions:
194
+
195
+ ```yaml
196
+ - uses: actions/checkout@v4
197
+ - run: pip install scan4secrets
198
+ - run: scan4secrets --path . --report sarif --output results --fail-on high
199
+ - uses: github/codeql-action/upload-sarif@v3
200
+ if: always()
201
+ with: { sarif_file: results.sarif }
202
+ ```
203
+
204
+ ---
205
+
206
+ ## Documentation
207
+
208
+ - [docs/ARCHITECTURE.md](docs/ARCHITECTURE.md) — package layout, data flow, extension points
209
+ - [docs/RULES.md](docs/RULES.md) — rule schema, examples, writing custom rules
210
+ - [docs/VERIFICATION.md](docs/VERIFICATION.md) — how live verification works, adding new vendors
211
+ - [docs/CHANGELOG.md](docs/CHANGELOG.md) — what's new in v2 vs v1
212
+ - [docs/GAP_ANALYSIS.md](docs/GAP_ANALYSIS.md) — empirical comparison vs v1 and gitleaks
213
+
214
+ ---
215
+
216
+ ## Benchmark
217
+
218
+ Tested on [Plazmaz/leaky-repo](https://github.com/Plazmaz/leaky-repo) (seeded with real-format secrets) and on [expressjs/express](https://github.com/expressjs/express) (clean OSS code).
219
+
220
+ | Tool | leaky-repo (TPs found) | benign express (FPs) |
221
+ |---|---:|---:|
222
+ | scan4secrets v1 | 35 (~22 TPs, ~13 FPs) | **27** |
223
+ | gitleaks | 22 | 0 |
224
+ | **scan4secrets v2** | **23** (all TPs, incl. SSH/PEM/Docker keys v1 missed) | **0** |
225
+
226
+ v2 has 0% FP rate on benign code (vs v1's ~13% per-file rate) and captures the high-value secret classes (private keys, Docker registry auth) that v1 was structurally incapable of detecting.
227
+
228
+ ---
229
+
230
+ ## Contributing
231
+
232
+ - Add a rule: edit `scan4secrets/config/rules.yaml`
233
+ - Add a verifier: extend the `verify:` block in the rule
234
+ - Add a reporter: drop a module under `scan4secrets/reporters/` and register in `__init__.py`
235
+
236
+ Run tests: `pytest -q` (planted-secret fixtures under `tests/fixtures/`)
237
+
238
+ ---
239
+
240
+ ## License
241
+
242
+ MIT — see [LICENSE](LICENSE).
243
+
244
+ Built by [@M14R41](https://github.com/m14r41).
@@ -0,0 +1,64 @@
1
+ [build-system]
2
+ requires = ["setuptools>=68", "wheel"]
3
+ build-backend = "setuptools.build_meta"
4
+
5
+ [project]
6
+ name = "scan4secrets"
7
+ version = "2.1.0"
8
+ description = "DAST + SAST secret scanner with live verification, source-map parsing, and CI-native reporting"
9
+ readme = "README.md"
10
+ requires-python = ">=3.9"
11
+ license = { text = "MIT" }
12
+ authors = [{ name = "M14R41" }]
13
+ keywords = ["security", "secrets", "scanner", "sast", "dast", "credentials", "bug-bounty"]
14
+ classifiers = [
15
+ "Development Status :: 4 - Beta",
16
+ "Environment :: Console",
17
+ "Intended Audience :: Information Technology",
18
+ "Intended Audience :: System Administrators",
19
+ "Intended Audience :: Developers",
20
+ "License :: OSI Approved :: MIT License",
21
+ "Operating System :: OS Independent",
22
+ "Programming Language :: Python",
23
+ "Programming Language :: Python :: 3",
24
+ "Programming Language :: Python :: 3.9",
25
+ "Programming Language :: Python :: 3.10",
26
+ "Programming Language :: Python :: 3.11",
27
+ "Programming Language :: Python :: 3.12",
28
+ "Programming Language :: Python :: 3.13",
29
+ "Topic :: Security",
30
+ "Topic :: Software Development :: Quality Assurance",
31
+ "Topic :: System :: Monitoring",
32
+ ]
33
+ dependencies = [
34
+ "requests>=2.31",
35
+ "beautifulsoup4>=4.12",
36
+ "PyYAML>=6.0",
37
+ "pyahocorasick>=2.0",
38
+ "rich>=13.0",
39
+ "colorama>=0.4",
40
+ "pandas>=2.0",
41
+ "openpyxl>=3.1",
42
+ "fpdf2>=2.7",
43
+ "tldextract>=5.0",
44
+ ]
45
+
46
+ [project.optional-dependencies]
47
+ dev = ["pytest>=7", "ruff>=0.4", "mypy>=1.8"]
48
+
49
+ [project.scripts]
50
+ scan4secrets = "scan4secrets.cli:main"
51
+
52
+ [project.urls]
53
+ Homepage = "https://scan4secrets.m14r41.in"
54
+ Documentation = "https://scan4secrets.m14r41.in/docs/intro"
55
+ Repository = "https://github.com/m14r41/scan4secrets"
56
+ Issues = "https://github.com/m14r41/scan4secrets/issues"
57
+ Changelog = "https://scan4secrets.m14r41.in/docs/changelog"
58
+ Downloads = "https://github.com/m14r41/scan4secrets/releases"
59
+
60
+ [tool.setuptools.packages.find]
61
+ include = ["scan4secrets*"]
62
+
63
+ [tool.setuptools.package-data]
64
+ scan4secrets = ["config/*.yaml", "config/*.json", "config/wordlist/*.txt"]
@@ -0,0 +1,3 @@
1
+ """scan4secrets — DAST + SAST secret scanner."""
2
+
3
+ __version__ = "2.1.0"
@@ -0,0 +1,4 @@
1
+ from scan4secrets.cli import main
2
+
3
+ if __name__ == "__main__":
4
+ raise SystemExit(main())