compliance-scanner 1.0.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 Naufal
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,134 @@
1
+ Metadata-Version: 2.4
2
+ Name: compliance-scanner
3
+ Version: 1.0.0
4
+ Summary: Scan codebases for leaked secrets, API keys, and credentials. Compliance-grade reporting with online report sharing.
5
+ Author: Naufal
6
+ License: MIT
7
+ Project-URL: Homepage, https://github.com/naufal/compliance-scanner
8
+ Project-URL: Repository, https://github.com/naufal/compliance-scanner
9
+ Project-URL: Issues, https://github.com/naufal/compliance-scanner/issues
10
+ Keywords: security,compliance,secrets,scanner,credentials,api-keys
11
+ Classifier: Development Status :: 4 - Beta
12
+ Classifier: Intended Audience :: Developers
13
+ Classifier: Intended Audience :: System Administrators
14
+ Classifier: License :: OSI Approved :: MIT License
15
+ Classifier: Operating System :: OS Independent
16
+ Classifier: Programming Language :: Python :: 3
17
+ Classifier: Programming Language :: Python :: 3.9
18
+ Classifier: Programming Language :: Python :: 3.10
19
+ Classifier: Programming Language :: Python :: 3.11
20
+ Classifier: Programming Language :: Python :: 3.12
21
+ Classifier: Topic :: Security
22
+ Classifier: Topic :: Software Development :: Quality Assurance
23
+ Requires-Python: >=3.9
24
+ Description-Content-Type: text/markdown
25
+ License-File: LICENSE
26
+ Dynamic: license-file
27
+
28
+ # compliance-scanner
29
+
30
+ Scan codebases for leaked secrets, API keys, and credentials. Compliance-grade reporting with online report sharing.
31
+
32
+ ## Installation
33
+
34
+ ```bash
35
+ # From PyPI (when published)
36
+ pip install compliance-scanner
37
+
38
+ # From source
39
+ git clone https://github.com/naufal/compliance-scanner.git
40
+ cd compliance-scanner
41
+ pip install .
42
+ ```
43
+
44
+ ## Usage
45
+
46
+ ```bash
47
+ # Scan current directory
48
+ compliance-scanner .
49
+
50
+ # Scan specific path
51
+ compliance-scanner /path/to/project
52
+
53
+ # Local only (no online upload)
54
+ compliance-scanner . --no-upload
55
+
56
+ # JSON output
57
+ compliance-scanner . --json --output report.json
58
+
59
+ # Custom expiry for online report
60
+ compliance-scanner . --expiry 7
61
+
62
+ # Ignore additional directories
63
+ compliance-scanner . --ignore .env,.secrets
64
+
65
+ # Scan only Python files
66
+ compliance-scanner . --file-glob "*.py"
67
+
68
+ # Quiet mode
69
+ compliance-scanner . -q
70
+ ```
71
+
72
+ ## What It Detects
73
+
74
+ - High-entropy strings (potential API keys, tokens)
75
+ - Variable assignments with secrets (`API_KEY=...`, `SECRET=...`)
76
+ - JWT tokens (`eyJ...`)
77
+ - AWS access keys (`AKIA...`)
78
+ - GitHub tokens (`ghp_...`, `gho_...`, `ghs_...`)
79
+ - Slack tokens (`xoxb-...`, `xoxp-...`)
80
+ - Stripe keys (`sk_live_...`, `pk_live_...`)
81
+ - Google API keys (`AIza...`)
82
+ - Base64-encoded strings
83
+ - Generic long alphanumeric strings
84
+
85
+ ## Output
86
+
87
+ ### Terminal
88
+ ```
89
+ ============================================================
90
+ COMPLIANCE SCAN — SECRET/CREDENTIAL DETECTION REPORT
91
+ ============================================================
92
+
93
+ Scan root : /path/to/project
94
+ Files scanned : 42
95
+
96
+ STATUS: 3 POTENTIAL FINDINGS
97
+ Showing top 3 by score
98
+
99
+ Rank : 1
100
+ Score : 200
101
+ Variable : API_KEY
102
+ Value : sk_live_abc123...
103
+ File : /path/to/config.py
104
+ Line : 15
105
+ ------------------------------------------------------------
106
+ ```
107
+
108
+ ### Online Report
109
+ Reports are uploaded to [dpaste.org](https://dpaste.org) with configurable expiry (default: 30 days). Share the URL with your team for compliance reviews.
110
+
111
+ ### JSON
112
+ ```json
113
+ {
114
+ "scan_root": "/path/to/project",
115
+ "files_scanned": 42,
116
+ "total_findings": 3,
117
+ "status": "findings_detected",
118
+ "findings": [...]
119
+ }
120
+ ```
121
+
122
+ ## Exit Codes
123
+
124
+ - `0` — Clean (no findings)
125
+ - `1` — Findings detected
126
+
127
+ Use in CI/CD:
128
+ ```bash
129
+ compliance-scanner . --no-upload --quiet || echo "SECRETS DETECTED"
130
+ ```
131
+
132
+ ## License
133
+
134
+ MIT
@@ -0,0 +1,107 @@
1
+ # compliance-scanner
2
+
3
+ Scan codebases for leaked secrets, API keys, and credentials. Compliance-grade reporting with online report sharing.
4
+
5
+ ## Installation
6
+
7
+ ```bash
8
+ # From PyPI (when published)
9
+ pip install compliance-scanner
10
+
11
+ # From source
12
+ git clone https://github.com/naufal/compliance-scanner.git
13
+ cd compliance-scanner
14
+ pip install .
15
+ ```
16
+
17
+ ## Usage
18
+
19
+ ```bash
20
+ # Scan current directory
21
+ compliance-scanner .
22
+
23
+ # Scan specific path
24
+ compliance-scanner /path/to/project
25
+
26
+ # Local only (no online upload)
27
+ compliance-scanner . --no-upload
28
+
29
+ # JSON output
30
+ compliance-scanner . --json --output report.json
31
+
32
+ # Custom expiry for online report
33
+ compliance-scanner . --expiry 7
34
+
35
+ # Ignore additional directories
36
+ compliance-scanner . --ignore .env,.secrets
37
+
38
+ # Scan only Python files
39
+ compliance-scanner . --file-glob "*.py"
40
+
41
+ # Quiet mode
42
+ compliance-scanner . -q
43
+ ```
44
+
45
+ ## What It Detects
46
+
47
+ - High-entropy strings (potential API keys, tokens)
48
+ - Variable assignments with secrets (`API_KEY=...`, `SECRET=...`)
49
+ - JWT tokens (`eyJ...`)
50
+ - AWS access keys (`AKIA...`)
51
+ - GitHub tokens (`ghp_...`, `gho_...`, `ghs_...`)
52
+ - Slack tokens (`xoxb-...`, `xoxp-...`)
53
+ - Stripe keys (`sk_live_...`, `pk_live_...`)
54
+ - Google API keys (`AIza...`)
55
+ - Base64-encoded strings
56
+ - Generic long alphanumeric strings
57
+
58
+ ## Output
59
+
60
+ ### Terminal
61
+ ```
62
+ ============================================================
63
+ COMPLIANCE SCAN — SECRET/CREDENTIAL DETECTION REPORT
64
+ ============================================================
65
+
66
+ Scan root : /path/to/project
67
+ Files scanned : 42
68
+
69
+ STATUS: 3 POTENTIAL FINDINGS
70
+ Showing top 3 by score
71
+
72
+ Rank : 1
73
+ Score : 200
74
+ Variable : API_KEY
75
+ Value : sk_live_abc123...
76
+ File : /path/to/config.py
77
+ Line : 15
78
+ ------------------------------------------------------------
79
+ ```
80
+
81
+ ### Online Report
82
+ Reports are uploaded to [dpaste.org](https://dpaste.org) with configurable expiry (default: 30 days). Share the URL with your team for compliance reviews.
83
+
84
+ ### JSON
85
+ ```json
86
+ {
87
+ "scan_root": "/path/to/project",
88
+ "files_scanned": 42,
89
+ "total_findings": 3,
90
+ "status": "findings_detected",
91
+ "findings": [...]
92
+ }
93
+ ```
94
+
95
+ ## Exit Codes
96
+
97
+ - `0` — Clean (no findings)
98
+ - `1` — Findings detected
99
+
100
+ Use in CI/CD:
101
+ ```bash
102
+ compliance-scanner . --no-upload --quiet || echo "SECRETS DETECTED"
103
+ ```
104
+
105
+ ## License
106
+
107
+ MIT
@@ -0,0 +1,3 @@
1
+ """Compliance Scanner — scan codebases for leaked secrets and credentials."""
2
+
3
+ __version__ = "1.0.0"
@@ -0,0 +1,152 @@
1
+ #!/usr/bin/env python3
2
+ """CLI entry point for compliance-scanner."""
3
+
4
+ import argparse
5
+ import sys
6
+ from compliance_scanner.scanner import scan_directory, DEFAULT_IGNORE_DIRS
7
+ from compliance_scanner.report import print_report, print_and_upload
8
+ from compliance_scanner import __version__
9
+
10
+
11
+ def build_parser() -> argparse.ArgumentParser:
12
+ parser = argparse.ArgumentParser(
13
+ prog="compliance-scanner",
14
+ description="Scan codebases for leaked secrets, API keys, and credentials.",
15
+ epilog="Examples:\n"
16
+ " compliance-scanner .\n"
17
+ " compliance-scanner /path/to/project --no-upload\n"
18
+ " compliance-scanner . --json --output report.json\n"
19
+ " compliance-scanner . --expiry 7 --ignore .env,.secrets",
20
+ formatter_class=argparse.RawDescriptionHelpFormatter,
21
+ )
22
+
23
+ parser.add_argument(
24
+ "directory",
25
+ nargs="?",
26
+ default=".",
27
+ help="Directory to scan (default: current directory)",
28
+ )
29
+
30
+ parser.add_argument(
31
+ "--version",
32
+ action="version",
33
+ version=f"%(prog)s {__version__}",
34
+ )
35
+
36
+ parser.add_argument(
37
+ "--no-upload",
38
+ action="store_true",
39
+ help="Don't upload report to dpaste.org (local print only)",
40
+ )
41
+
42
+ parser.add_argument(
43
+ "--json",
44
+ action="store_true",
45
+ help="Output report as JSON",
46
+ )
47
+
48
+ parser.add_argument(
49
+ "--output", "-o",
50
+ type=str,
51
+ help="Save report to file (default: stdout only)",
52
+ )
53
+
54
+ parser.add_argument(
55
+ "--expiry",
56
+ type=int,
57
+ default=30,
58
+ help="Days until online report expires (default: 30)",
59
+ )
60
+
61
+ parser.add_argument(
62
+ "--ignore",
63
+ type=str,
64
+ help="Comma-separated directory names to ignore (in addition to defaults)",
65
+ )
66
+
67
+ parser.add_argument(
68
+ "--max-items",
69
+ type=int,
70
+ default=200,
71
+ help="Maximum findings to show (default: 200)",
72
+ )
73
+
74
+ parser.add_argument(
75
+ "--file-glob",
76
+ type=str,
77
+ help="Only scan files matching glob pattern (e.g., '*.py')",
78
+ )
79
+
80
+ parser.add_argument(
81
+ "--quiet", "-q",
82
+ action="store_true",
83
+ help="Suppress progress output",
84
+ )
85
+
86
+ return parser
87
+
88
+
89
+ def main():
90
+ parser = build_parser()
91
+ args = parser.parse_args()
92
+
93
+ # Build ignore set
94
+ ignore_dirs = DEFAULT_IGNORE_DIRS.copy()
95
+ if args.ignore:
96
+ for d in args.ignore.split(","):
97
+ d = d.strip()
98
+ if d:
99
+ ignore_dirs.add(d)
100
+
101
+ if not args.quiet:
102
+ print(f"Compliance Scanner v{__version__}")
103
+ print(f"Scanning: {args.directory}")
104
+ print("...")
105
+
106
+ # Run scan
107
+ result = scan_directory(
108
+ root_dir=args.directory,
109
+ ignore_dirs=ignore_dirs,
110
+ file_glob=args.file_glob,
111
+ )
112
+
113
+ ranked = result.unique_findings
114
+
115
+ if not args.quiet:
116
+ print(f"Scanned {result.files_scanned} files")
117
+
118
+ # Output
119
+ if args.json:
120
+ from compliance_scanner.report import format_json_report
121
+ report = format_json_report(ranked, root_dir=args.directory, files_scanned=result.files_scanned)
122
+ else:
123
+ from compliance_scanner.report import format_report
124
+ report = format_report(ranked, root_dir=args.directory, files_scanned=result.files_scanned)
125
+
126
+ print("\n" + report)
127
+
128
+ # Save to file if requested
129
+ if args.output:
130
+ with open(args.output, "w") as f:
131
+ f.write(report)
132
+ if not args.quiet:
133
+ print(f"\nReport saved to: {args.output}")
134
+
135
+ # Upload unless disabled
136
+ if not args.no_upload and not args.json:
137
+ print("\n" + "=" * 60)
138
+ print("Uploading report to dpaste.org ...")
139
+ try:
140
+ from compliance_scanner.report import upload_to_dpaste
141
+ link = upload_to_dpaste(report, args.expiry)
142
+ print(f"Report URL: {link}")
143
+ print(f"(Expires in {args.expiry} days)")
144
+ except Exception as e:
145
+ print(f"Upload failed: {e}")
146
+
147
+ # Exit code: 0 if clean, 1 if findings
148
+ sys.exit(0 if not ranked else 1)
149
+
150
+
151
+ if __name__ == "__main__":
152
+ main()
@@ -0,0 +1,121 @@
1
+ """Report formatting and online upload (dpaste.org)."""
2
+
3
+ import os
4
+ import urllib.request
5
+ import urllib.parse
6
+ from typing import Optional
7
+
8
+ DPASTE_URL = "https://dpaste.org/api/"
9
+ DPASTE_EXPIRY_DAYS = 30
10
+
11
+
12
+ def format_report(
13
+ ranked: list,
14
+ max_items: int = 200,
15
+ root_dir: str = ".",
16
+ files_scanned: int = 0,
17
+ ) -> str:
18
+ """Format scan results into a clean text report."""
19
+ lines = []
20
+ lines.append("=" * 60)
21
+ lines.append("COMPLIANCE SCAN — SECRET/CREDENTIAL DETECTION REPORT")
22
+ lines.append("=" * 60)
23
+ lines.append("")
24
+ lines.append(f"Scan root : {os.path.abspath(root_dir)}")
25
+ lines.append(f"Files scanned : {files_scanned}")
26
+ lines.append("")
27
+
28
+ if not ranked:
29
+ lines.append("STATUS: CLEAN")
30
+ lines.append("No potential secrets or credentials detected.")
31
+ else:
32
+ lines.append(f"STATUS: {len(ranked)} POTENTIAL FINDINGS")
33
+ lines.append(f"Showing top {min(max_items, len(ranked))} by score")
34
+ lines.append("")
35
+
36
+ for i, item in enumerate(ranked[:max_items], start=1):
37
+ lines.append(f"Rank : {i}")
38
+ lines.append(f"Score : {item.score}")
39
+ lines.append(f"Variable : {item.variable}")
40
+ lines.append(f"Value : {item.value}")
41
+ lines.append(f"File : {item.file}")
42
+ lines.append(f"Line : {item.line}")
43
+ lines.append(f"Pattern : {item.pattern_name}")
44
+ lines.append("-" * 60)
45
+
46
+ lines.append("")
47
+ lines.append("Generated by compliance-scanner v1.0.0")
48
+ return "\n".join(lines)
49
+
50
+
51
+ def format_json_report(ranked: list, root_dir: str = ".", files_scanned: int = 0) -> str:
52
+ """Format scan results as JSON."""
53
+ import json
54
+
55
+ report = {
56
+ "scan_root": os.path.abspath(root_dir),
57
+ "files_scanned": files_scanned,
58
+ "total_findings": len(ranked),
59
+ "status": "clean" if not ranked else "findings_detected",
60
+ "findings": [item.to_dict() for item in ranked],
61
+ }
62
+
63
+ return json.dumps(report, indent=2)
64
+
65
+
66
+ def upload_to_dpaste(text: str, expiry_days: int = 30) -> str:
67
+ """Upload text to dpaste.org and return the shareable URL."""
68
+ data = urllib.parse.urlencode({
69
+ "content": text,
70
+ "syntax": "text",
71
+ "expiry_days": str(expiry_days),
72
+ }).encode()
73
+
74
+ req = urllib.request.Request(
75
+ DPASTE_URL,
76
+ data=data,
77
+ headers={"Content-Type": "application/x-www-form-urlencoded"},
78
+ method="POST",
79
+ )
80
+
81
+ with urllib.request.urlopen(req, timeout=30) as resp:
82
+ return resp.read().decode().strip()
83
+
84
+
85
+ def print_report(ranked: list, root_dir: str = ".", files_scanned: int = 0) -> str:
86
+ """Print report locally."""
87
+ report = format_report(ranked, root_dir=root_dir, files_scanned=files_scanned)
88
+ print("\n" + report)
89
+ return report
90
+
91
+
92
+ def print_and_upload(
93
+ ranked: list,
94
+ root_dir: str = ".",
95
+ files_scanned: int = 0,
96
+ expiry_days: int = 30,
97
+ upload: bool = True,
98
+ json_output: bool = False,
99
+ ) -> Optional[str]:
100
+ """Print report and optionally upload to dpaste."""
101
+ if json_output:
102
+ report = format_json_report(ranked, root_dir=root_dir, files_scanned=files_scanned)
103
+ else:
104
+ report = format_report(ranked, root_dir=root_dir, files_scanned=files_scanned)
105
+
106
+ print("\n" + report)
107
+
108
+ if not upload:
109
+ return None
110
+
111
+ print("\n" + "=" * 60)
112
+ print("Uploading report to dpaste.org ...")
113
+
114
+ try:
115
+ link = upload_to_dpaste(report, expiry_days)
116
+ print(f"Report URL: {link}")
117
+ print(f"(Expires in {expiry_days} days)")
118
+ return link
119
+ except Exception as e:
120
+ print(f"Upload failed: {e}")
121
+ return None
@@ -0,0 +1,231 @@
1
+ """Core scanning engine — pattern matching, entropy scoring, file walking."""
2
+
3
+ import os
4
+ import re
5
+ import math
6
+ from dataclasses import dataclass, field
7
+ from typing import Optional
8
+
9
+ # Default directories to skip
10
+ DEFAULT_IGNORE_DIRS = {
11
+ ".git",
12
+ "node_modules",
13
+ ".venv",
14
+ "venv",
15
+ "__pycache__",
16
+ ".idea",
17
+ ".vscode",
18
+ "dist",
19
+ "build",
20
+ ".tox",
21
+ ".mypy_cache",
22
+ ".pytest_cache",
23
+ "coverage",
24
+ ".coverage",
25
+ ".nyc_output",
26
+ ".sass-cache",
27
+ }
28
+
29
+ # Binary file extensions to skip
30
+ BINARY_EXTENSIONS = {
31
+ ".png", ".jpg", ".jpeg", ".gif", ".bmp", ".ico", ".svg", ".webp",
32
+ ".mp3", ".mp4", ".avi", ".mov", ".wmv", ".flv", ".webm",
33
+ ".zip", ".tar", ".gz", ".bz2", ".7z", ".rar",
34
+ ".exe", ".dll", ".so", ".dylib", ".bin",
35
+ ".pdf", ".doc", ".docx", ".xls", ".xlsx",
36
+ ".pyc", ".pyo", ".class", ".o", ".obj",
37
+ }
38
+
39
+ # Regex patterns for common secret formats
40
+ PATTERNS = [
41
+ # Long alphanumeric strings (potential API keys)
42
+ r'[A-Za-z0-9_\-]{32,}',
43
+ # Base64-encoded strings
44
+ r'[A-Za-z0-9+/]{40,}={0,2}',
45
+ # JWT tokens
46
+ r'eyJ[A-Za-z0-9_\-]+\.[A-Za-z0-9_\-]+\.[A-Za-z0-9_\-]+',
47
+ # AWS access key IDs
48
+ r'AKIA[0-9A-Z]{16}',
49
+ # GitHub personal access tokens
50
+ r'ghp_[A-Za-z0-9]{36}',
51
+ # GitHub OAuth tokens
52
+ r'gho_[A-Za-z0-9]{36}',
53
+ # GitHub App tokens
54
+ r'(ghu|ghs|ghr)_[A-Za-z0-9]{36}',
55
+ # Slack tokens
56
+ r'xox[bprs]-[A-Za-z0-9\-]+',
57
+ # Stripe keys
58
+ r'sk_live_[A-Za-z0-9]+',
59
+ r'pk_live_[A-Za-z0-9]+',
60
+ # Google API keys
61
+ r'AIza[0-9A-Za-z_\-]{35}',
62
+ # Heroku API keys
63
+ r'[hH]eroku[0-9a-fA-F]{32}',
64
+ # Generic "key/secret/token/password" assignments
65
+ r'(?:key|secret|token|password|api_key|apikey|api_secret)\s*[:=]\s*["\']?[A-Za-z0-9_\-]{16,}',
66
+ ]
67
+
68
+
69
+ @dataclass
70
+ class Finding:
71
+ """A single secret finding."""
72
+ variable: str
73
+ value: str
74
+ score: int
75
+ file: str
76
+ line: int
77
+ pattern_name: str = "generic"
78
+
79
+ def to_dict(self) -> dict:
80
+ return {
81
+ "variable": self.variable,
82
+ "value": self.value,
83
+ "score": self.score,
84
+ "file": self.file,
85
+ "line": self.line,
86
+ "pattern_name": self.pattern_name,
87
+ }
88
+
89
+
90
+ @dataclass
91
+ class ScanResult:
92
+ """Aggregated scan results."""
93
+ findings: list = field(default_factory=list)
94
+ files_scanned: int = 0
95
+ errors: list = field(default_factory=list)
96
+ root_dir: str = ""
97
+
98
+ @property
99
+ def unique_findings(self) -> list:
100
+ """Deduplicated findings ranked by score."""
101
+ seen = {}
102
+ for f in self.findings:
103
+ if f.value not in seen or f.score > seen[f.value].score:
104
+ seen[f.value] = f
105
+ return sorted(seen.values(), key=lambda x: x.score, reverse=True)
106
+
107
+ @property
108
+ def has_findings(self) -> bool:
109
+ return len(self.unique_findings) > 0
110
+
111
+
112
+ def entropy(s: str) -> float:
113
+ """Calculate Shannon entropy of a string."""
114
+ if not s:
115
+ return 0.0
116
+ freq = {}
117
+ for c in s:
118
+ freq[c] = freq.get(c, 0) + 1
119
+ length = len(s)
120
+ return -sum((count / length) * math.log2(count / length) for count in freq.values())
121
+
122
+
123
+ def score_candidate(value: str) -> int:
124
+ """Score a candidate string based on length, entropy, and character diversity."""
125
+ score = 0
126
+ score += min(len(value), 100)
127
+
128
+ e = entropy(value)
129
+
130
+ if e > 4.0:
131
+ score += 50
132
+ if e > 4.5:
133
+ score += 50
134
+ if re.search(r"[A-Z]", value):
135
+ score += 20
136
+ if re.search(r"[a-z]", value):
137
+ score += 20
138
+ if re.search(r"\d", value):
139
+ score += 20
140
+ if re.search(r"[_\-+=/]", value):
141
+ score += 20
142
+
143
+ return score
144
+
145
+
146
+ def extract_variables(line: str) -> list:
147
+ """Extract variable assignments that look like secrets."""
148
+ candidates = []
149
+ matches = re.finditer(
150
+ r'([A-Za-z_][A-Za-z0-9_]*)\s*[:=]\s*[\'"]?([A-Za-z0-9_\-+=/.]{16,})[\'"]?',
151
+ line,
152
+ )
153
+ for match in matches:
154
+ candidates.append((match.group(1), match.group(2)))
155
+ return candidates
156
+
157
+
158
+ def is_binary_file(path: str) -> bool:
159
+ """Check if a file is likely binary by extension."""
160
+ _, ext = os.path.splitext(path)
161
+ return ext.lower() in BINARY_EXTENSIONS
162
+
163
+
164
+ def scan_file(path: str) -> list:
165
+ """Scan a single file for potential secrets."""
166
+ findings = []
167
+
168
+ if is_binary_file(path):
169
+ return findings
170
+
171
+ try:
172
+ with open(path, "r", encoding="utf-8", errors="ignore") as f:
173
+ for lineno, line in enumerate(f, 1):
174
+ # Check variable assignments
175
+ for var_name, value in extract_variables(line):
176
+ findings.append(Finding(
177
+ variable=var_name,
178
+ value=value,
179
+ score=score_candidate(value),
180
+ file=path,
181
+ line=lineno,
182
+ pattern_name="variable_assignment",
183
+ ))
184
+
185
+ # Check regex patterns
186
+ for pattern in PATTERNS:
187
+ for match in re.finditer(pattern, line):
188
+ value = match.group(0)
189
+ findings.append(Finding(
190
+ variable="(unknown)",
191
+ value=value,
192
+ score=score_candidate(value),
193
+ file=path,
194
+ line=lineno,
195
+ pattern_name="pattern_match",
196
+ ))
197
+ except Exception as e:
198
+ return findings
199
+
200
+ return findings
201
+
202
+
203
+ def scan_directory(
204
+ root_dir: str = ".",
205
+ ignore_dirs: Optional[set] = None,
206
+ file_glob: Optional[str] = None,
207
+ ) -> ScanResult:
208
+ """Walk a directory tree and scan all files for secrets."""
209
+ if ignore_dirs is None:
210
+ ignore_dirs = DEFAULT_IGNORE_DIRS.copy()
211
+
212
+ result = ScanResult(root_dir=os.path.abspath(root_dir))
213
+
214
+ for root, dirs, files in os.walk(root_dir):
215
+ # Filter ignored directories
216
+ dirs[:] = [d for d in dirs if d not in ignore_dirs]
217
+
218
+ for filename in files:
219
+ filepath = os.path.join(root, filename)
220
+
221
+ # Optional file glob filter
222
+ if file_glob:
223
+ import fnmatch
224
+ if not fnmatch.fnmatch(filename, file_glob):
225
+ continue
226
+
227
+ findings = scan_file(filepath)
228
+ result.findings.extend(findings)
229
+ result.files_scanned += 1
230
+
231
+ return result
@@ -0,0 +1,134 @@
1
+ Metadata-Version: 2.4
2
+ Name: compliance-scanner
3
+ Version: 1.0.0
4
+ Summary: Scan codebases for leaked secrets, API keys, and credentials. Compliance-grade reporting with online report sharing.
5
+ Author: Naufal
6
+ License: MIT
7
+ Project-URL: Homepage, https://github.com/naufal/compliance-scanner
8
+ Project-URL: Repository, https://github.com/naufal/compliance-scanner
9
+ Project-URL: Issues, https://github.com/naufal/compliance-scanner/issues
10
+ Keywords: security,compliance,secrets,scanner,credentials,api-keys
11
+ Classifier: Development Status :: 4 - Beta
12
+ Classifier: Intended Audience :: Developers
13
+ Classifier: Intended Audience :: System Administrators
14
+ Classifier: License :: OSI Approved :: MIT License
15
+ Classifier: Operating System :: OS Independent
16
+ Classifier: Programming Language :: Python :: 3
17
+ Classifier: Programming Language :: Python :: 3.9
18
+ Classifier: Programming Language :: Python :: 3.10
19
+ Classifier: Programming Language :: Python :: 3.11
20
+ Classifier: Programming Language :: Python :: 3.12
21
+ Classifier: Topic :: Security
22
+ Classifier: Topic :: Software Development :: Quality Assurance
23
+ Requires-Python: >=3.9
24
+ Description-Content-Type: text/markdown
25
+ License-File: LICENSE
26
+ Dynamic: license-file
27
+
28
+ # compliance-scanner
29
+
30
+ Scan codebases for leaked secrets, API keys, and credentials. Compliance-grade reporting with online report sharing.
31
+
32
+ ## Installation
33
+
34
+ ```bash
35
+ # From PyPI (when published)
36
+ pip install compliance-scanner
37
+
38
+ # From source
39
+ git clone https://github.com/naufal/compliance-scanner.git
40
+ cd compliance-scanner
41
+ pip install .
42
+ ```
43
+
44
+ ## Usage
45
+
46
+ ```bash
47
+ # Scan current directory
48
+ compliance-scanner .
49
+
50
+ # Scan specific path
51
+ compliance-scanner /path/to/project
52
+
53
+ # Local only (no online upload)
54
+ compliance-scanner . --no-upload
55
+
56
+ # JSON output
57
+ compliance-scanner . --json --output report.json
58
+
59
+ # Custom expiry for online report
60
+ compliance-scanner . --expiry 7
61
+
62
+ # Ignore additional directories
63
+ compliance-scanner . --ignore .env,.secrets
64
+
65
+ # Scan only Python files
66
+ compliance-scanner . --file-glob "*.py"
67
+
68
+ # Quiet mode
69
+ compliance-scanner . -q
70
+ ```
71
+
72
+ ## What It Detects
73
+
74
+ - High-entropy strings (potential API keys, tokens)
75
+ - Variable assignments with secrets (`API_KEY=...`, `SECRET=...`)
76
+ - JWT tokens (`eyJ...`)
77
+ - AWS access keys (`AKIA...`)
78
+ - GitHub tokens (`ghp_...`, `gho_...`, `ghs_...`)
79
+ - Slack tokens (`xoxb-...`, `xoxp-...`)
80
+ - Stripe keys (`sk_live_...`, `pk_live_...`)
81
+ - Google API keys (`AIza...`)
82
+ - Base64-encoded strings
83
+ - Generic long alphanumeric strings
84
+
85
+ ## Output
86
+
87
+ ### Terminal
88
+ ```
89
+ ============================================================
90
+ COMPLIANCE SCAN — SECRET/CREDENTIAL DETECTION REPORT
91
+ ============================================================
92
+
93
+ Scan root : /path/to/project
94
+ Files scanned : 42
95
+
96
+ STATUS: 3 POTENTIAL FINDINGS
97
+ Showing top 3 by score
98
+
99
+ Rank : 1
100
+ Score : 200
101
+ Variable : API_KEY
102
+ Value : sk_live_abc123...
103
+ File : /path/to/config.py
104
+ Line : 15
105
+ ------------------------------------------------------------
106
+ ```
107
+
108
+ ### Online Report
109
+ Reports are uploaded to [dpaste.org](https://dpaste.org) with configurable expiry (default: 30 days). Share the URL with your team for compliance reviews.
110
+
111
+ ### JSON
112
+ ```json
113
+ {
114
+ "scan_root": "/path/to/project",
115
+ "files_scanned": 42,
116
+ "total_findings": 3,
117
+ "status": "findings_detected",
118
+ "findings": [...]
119
+ }
120
+ ```
121
+
122
+ ## Exit Codes
123
+
124
+ - `0` — Clean (no findings)
125
+ - `1` — Findings detected
126
+
127
+ Use in CI/CD:
128
+ ```bash
129
+ compliance-scanner . --no-upload --quiet || echo "SECRETS DETECTED"
130
+ ```
131
+
132
+ ## License
133
+
134
+ MIT
@@ -0,0 +1,13 @@
1
+ LICENSE
2
+ README.md
3
+ pyproject.toml
4
+ setup.py
5
+ compliance_scanner/__init__.py
6
+ compliance_scanner/cli.py
7
+ compliance_scanner/report.py
8
+ compliance_scanner/scanner.py
9
+ compliance_scanner.egg-info/PKG-INFO
10
+ compliance_scanner.egg-info/SOURCES.txt
11
+ compliance_scanner.egg-info/dependency_links.txt
12
+ compliance_scanner.egg-info/entry_points.txt
13
+ compliance_scanner.egg-info/top_level.txt
@@ -0,0 +1,2 @@
1
+ [console_scripts]
2
+ compliance-scanner = compliance_scanner.cli:main
@@ -0,0 +1,2 @@
1
+ compliance_scanner
2
+ dist
@@ -0,0 +1,40 @@
1
+ [build-system]
2
+ requires = ["setuptools>=45", "wheel"]
3
+ build-backend = "setuptools.build_meta"
4
+
5
+ [project]
6
+ name = "compliance-scanner"
7
+ version = "1.0.0"
8
+ description = "Scan codebases for leaked secrets, API keys, and credentials. Compliance-grade reporting with online report sharing."
9
+ readme = "README.md"
10
+ license = {text = "MIT"}
11
+ requires-python = ">=3.9"
12
+ authors = [
13
+ {name = "Naufal"},
14
+ ]
15
+ keywords = ["security", "compliance", "secrets", "scanner", "credentials", "api-keys"]
16
+ classifiers = [
17
+ "Development Status :: 4 - Beta",
18
+ "Intended Audience :: Developers",
19
+ "Intended Audience :: System Administrators",
20
+ "License :: OSI Approved :: MIT License",
21
+ "Operating System :: OS Independent",
22
+ "Programming Language :: Python :: 3",
23
+ "Programming Language :: Python :: 3.9",
24
+ "Programming Language :: Python :: 3.10",
25
+ "Programming Language :: Python :: 3.11",
26
+ "Programming Language :: Python :: 3.12",
27
+ "Topic :: Security",
28
+ "Topic :: Software Development :: Quality Assurance",
29
+ ]
30
+
31
+ [project.scripts]
32
+ compliance-scanner = "compliance_scanner.cli:main"
33
+
34
+ [project.urls]
35
+ Homepage = "https://github.com/naufal/compliance-scanner"
36
+ Repository = "https://github.com/naufal/compliance-scanner"
37
+ Issues = "https://github.com/naufal/compliance-scanner/issues"
38
+
39
+ [tool.setuptools.packages.find]
40
+ where = ["."]
@@ -0,0 +1,4 @@
1
+ [egg_info]
2
+ tag_build =
3
+ tag_date = 0
4
+
@@ -0,0 +1,12 @@
1
+ from setuptools import setup, find_packages
2
+
3
+ setup(
4
+ name="compliance-scanner",
5
+ version="1.0.0",
6
+ packages=find_packages(),
7
+ entry_points={
8
+ "console_scripts": [
9
+ "compliance-scanner=compliance_scanner.cli:main",
10
+ ],
11
+ },
12
+ )