secret-scan 0.1.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,22 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2025 amitu314, harshahemanth
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
22
+
@@ -0,0 +1,187 @@
1
+ Metadata-Version: 2.4
2
+ Name: secret-scan
3
+ Version: 0.1.2
4
+ Summary: A simple secret/credential scanner for source code repositories.
5
+ Author-email: Your Name <you@example.com>
6
+ License: MIT License
7
+
8
+ Copyright (c) 2025 amitu314, harshahemanth
9
+
10
+ Permission is hereby granted, free of charge, to any person obtaining a copy
11
+ of this software and associated documentation files (the "Software"), to deal
12
+ in the Software without restriction, including without limitation the rights
13
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
14
+ copies of the Software, and to permit persons to whom the Software is
15
+ furnished to do so, subject to the following conditions:
16
+
17
+ The above copyright notice and this permission notice shall be included in all
18
+ copies or substantial portions of the Software.
19
+
20
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
21
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
22
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
23
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
24
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
25
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
26
+ SOFTWARE.
27
+
28
+
29
+ Project-URL: Homepage, https://github.com/harshahemanth/secret-scan
30
+ Project-URL: Repository, https://github.com/harshahemanth/secret-scan
31
+ Project-URL: Issues, https://github.com/harshahemanth/secret-scan/issues
32
+ Project-URL: Documentation, https://github.com/harshahemanth/secret-scan#readme
33
+ Keywords: security,secrets,credentials,scanner
34
+ Classifier: Programming Language :: Python :: 3
35
+ Classifier: License :: OSI Approved :: MIT License
36
+ Classifier: Environment :: Console
37
+ Classifier: Intended Audience :: Developers
38
+ Classifier: Topic :: Security
39
+ Requires-Python: >=3.9
40
+ Description-Content-Type: text/markdown
41
+ License-File: LICENSE
42
+ Dynamic: license-file
43
+
44
+ # secret-scan
45
+
46
+ A fast, lightweight CLI tool to detect secrets in source code.
47
+
48
+ `secret-scan` scans directories for sensitive data such as:
49
+
50
+ - AWS Access Keys and Secret Keys
51
+ - OpenAI API keys (sk-...)
52
+ - Password assignments
53
+ - Bearer tokens
54
+ - SSH private keys
55
+ - Azure storage keys
56
+ - Generic API keys and tokens
57
+ - JWT tokens
58
+
59
+ It skips binary files, ignores common junk directories (node_modules, .git, venv, etc.), avoids scanning large files, and supports extensible regular expressions.
60
+
61
+ ## Installation
62
+
63
+ pip install secret-scan
64
+
65
+ To upgrade:
66
+
67
+ pip install --upgrade secret-scan
68
+
69
+ ## Basic Usage
70
+
71
+ Scan the current directory:
72
+
73
+ secret-scan .
74
+
75
+ Scan a specific directory:
76
+
77
+ secret-scan ~/projects/my-repo
78
+
79
+ Write results to a file (default: docsCred.txt):
80
+
81
+ secret-scan . --output secrets.txt
82
+
83
+ ## JSON Output
84
+
85
+ Generate JSON output (useful for CI pipelines):
86
+
87
+ secret-scan . --json
88
+
89
+ Example output:
90
+
91
+ [
92
+ {
93
+ "file": "config/settings.py",
94
+ "line": 20,
95
+ "match": "AWS_ACCESS_KEY_ID=AKIA1234567890ABCD12"
96
+ },
97
+ {
98
+ "file": "service/api.py",
99
+ "line": 42,
100
+ "match": "sk-ABCDEFGHIJKLMNOPQRSTUV123456"
101
+ }
102
+ ]
103
+
104
+ ## Command-Line Options
105
+
106
+ | Flag | Description |
107
+ |------------------|--------------------------------------------|
108
+ | --output <file> | Save text results (default: docsCred.txt) |
109
+ | --skip-ext .log | Skip specific file extensions |
110
+ | --skip-dir <dir> | Skip specific directories |
111
+ | --max-size-mb N | Scan only files smaller than N MB |
112
+ | --json | Print JSON results to stdout |
113
+
114
+ Example:
115
+
116
+ secret-scan . --skip-ext .log --skip-dir build --json
117
+
118
+ ## What It Detects
119
+
120
+ ### AWS
121
+ - Access Key IDs (AKIA...)
122
+ - Secret Access Keys
123
+ - Environment variable forms such as AWS_ACCESS_KEY_ID=...
124
+
125
+ ### OpenAI
126
+ - Keys beginning with sk-
127
+
128
+ ### Passwords and Tokens
129
+ - password=...
130
+ - api_key=...
131
+ - Bearer tokens
132
+ - JWT tokens (xxx.yyy.zzz)
133
+
134
+ ### Private Keys
135
+ - -----BEGIN PRIVATE KEY-----
136
+
137
+ ### Cloud Provider Keys
138
+ - Azure storage account keys
139
+ - Redis/MySQL/Postgres/Mongo/FTP/SMTP connection strings
140
+
141
+ ## Automatic Skips
142
+
143
+ The scanner automatically ignores:
144
+
145
+ - .git, .hg, .svn
146
+ - node_modules
147
+ - Python virtual environments (venv, .venv, env)
148
+ - Binary files (null-byte detection)
149
+ - Large files (over 5 MB by default)
150
+ - Common non-text extensions (images, archives, executables)
151
+
152
+ ## Extending Detection Patterns
153
+
154
+ Detection patterns are defined in:
155
+
156
+ src/secret_scanner/patterns.py
157
+
158
+ You may extend or modify these patterns to detect additional token types.
159
+
160
+ ## Programmatic Usage
161
+
162
+ Example using the Python API:
163
+
164
+ from pathlib import Path
165
+ from secret_scanner import scan_directory
166
+
167
+ matches = scan_directory(Path("."), output_path=None)
168
+ for m in matches:
169
+ print(m["file"], m["line"], m["match"])
170
+
171
+ ## Running Tests
172
+
173
+ pytest -q
174
+
175
+ ## Contributing
176
+
177
+ Contributions are welcome.
178
+
179
+ 1. Fork the repository
180
+ 2. Create a feature branch
181
+ 3. Add tests for new functionality
182
+ 4. Open a pull request
183
+
184
+ ## License
185
+
186
+ This project is licensed under the MIT License. See the LICENSE file for full details.
187
+
@@ -0,0 +1,144 @@
1
+ # secret-scan
2
+
3
+ A fast, lightweight CLI tool to detect secrets in source code.
4
+
5
+ `secret-scan` scans directories for sensitive data such as:
6
+
7
+ - AWS Access Keys and Secret Keys
8
+ - OpenAI API keys (sk-...)
9
+ - Password assignments
10
+ - Bearer tokens
11
+ - SSH private keys
12
+ - Azure storage keys
13
+ - Generic API keys and tokens
14
+ - JWT tokens
15
+
16
+ It skips binary files, ignores common junk directories (node_modules, .git, venv, etc.), avoids scanning large files, and supports extensible regular expressions.
17
+
18
+ ## Installation
19
+
20
+ pip install secret-scan
21
+
22
+ To upgrade:
23
+
24
+ pip install --upgrade secret-scan
25
+
26
+ ## Basic Usage
27
+
28
+ Scan the current directory:
29
+
30
+ secret-scan .
31
+
32
+ Scan a specific directory:
33
+
34
+ secret-scan ~/projects/my-repo
35
+
36
+ Write results to a file (default: docsCred.txt):
37
+
38
+ secret-scan . --output secrets.txt
39
+
40
+ ## JSON Output
41
+
42
+ Generate JSON output (useful for CI pipelines):
43
+
44
+ secret-scan . --json
45
+
46
+ Example output:
47
+
48
+ [
49
+ {
50
+ "file": "config/settings.py",
51
+ "line": 20,
52
+ "match": "AWS_ACCESS_KEY_ID=AKIA1234567890ABCD12"
53
+ },
54
+ {
55
+ "file": "service/api.py",
56
+ "line": 42,
57
+ "match": "sk-ABCDEFGHIJKLMNOPQRSTUV123456"
58
+ }
59
+ ]
60
+
61
+ ## Command-Line Options
62
+
63
+ | Flag | Description |
64
+ |------------------|--------------------------------------------|
65
+ | --output <file> | Save text results (default: docsCred.txt) |
66
+ | --skip-ext .log | Skip specific file extensions |
67
+ | --skip-dir <dir> | Skip specific directories |
68
+ | --max-size-mb N | Scan only files smaller than N MB |
69
+ | --json | Print JSON results to stdout |
70
+
71
+ Example:
72
+
73
+ secret-scan . --skip-ext .log --skip-dir build --json
74
+
75
+ ## What It Detects
76
+
77
+ ### AWS
78
+ - Access Key IDs (AKIA...)
79
+ - Secret Access Keys
80
+ - Environment variable forms such as AWS_ACCESS_KEY_ID=...
81
+
82
+ ### OpenAI
83
+ - Keys beginning with sk-
84
+
85
+ ### Passwords and Tokens
86
+ - password=...
87
+ - api_key=...
88
+ - Bearer tokens
89
+ - JWT tokens (xxx.yyy.zzz)
90
+
91
+ ### Private Keys
92
+ - -----BEGIN PRIVATE KEY-----
93
+
94
+ ### Cloud Provider Keys
95
+ - Azure storage account keys
96
+ - Redis/MySQL/Postgres/Mongo/FTP/SMTP connection strings
97
+
98
+ ## Automatic Skips
99
+
100
+ The scanner automatically ignores:
101
+
102
+ - .git, .hg, .svn
103
+ - node_modules
104
+ - Python virtual environments (venv, .venv, env)
105
+ - Binary files (null-byte detection)
106
+ - Large files (over 5 MB by default)
107
+ - Common non-text extensions (images, archives, executables)
108
+
109
+ ## Extending Detection Patterns
110
+
111
+ Detection patterns are defined in:
112
+
113
+ src/secret_scanner/patterns.py
114
+
115
+ You may extend or modify these patterns to detect additional token types.
116
+
117
+ ## Programmatic Usage
118
+
119
+ Example using the Python API:
120
+
121
+ from pathlib import Path
122
+ from secret_scanner import scan_directory
123
+
124
+ matches = scan_directory(Path("."), output_path=None)
125
+ for m in matches:
126
+ print(m["file"], m["line"], m["match"])
127
+
128
+ ## Running Tests
129
+
130
+ pytest -q
131
+
132
+ ## Contributing
133
+
134
+ Contributions are welcome.
135
+
136
+ 1. Fork the repository
137
+ 2. Create a feature branch
138
+ 3. Add tests for new functionality
139
+ 4. Open a pull request
140
+
141
+ ## License
142
+
143
+ This project is licensed under the MIT License. See the LICENSE file for full details.
144
+
@@ -0,0 +1,39 @@
1
+ [build-system]
2
+ requires = ["setuptools>=64", "wheel"]
3
+ build-backend = "setuptools.build_meta"
4
+
5
+ [project]
6
+ name = "secret-scan"
7
+ version = "0.1.2"
8
+ description = "A simple secret/credential scanner for source code repositories."
9
+ readme = "README.md"
10
+ requires-python = ">=3.9"
11
+ license = { file = "LICENSE" }
12
+ authors = [
13
+ { name = "Your Name", email = "you@example.com" }
14
+ ]
15
+ keywords = ["security", "secrets", "credentials", "scanner"]
16
+ classifiers = [
17
+ "Programming Language :: Python :: 3",
18
+ "License :: OSI Approved :: MIT License",
19
+ "Environment :: Console",
20
+ "Intended Audience :: Developers",
21
+ "Topic :: Security",
22
+ ]
23
+
24
+ dependencies = [] # stdlib only right now
25
+
26
+ [project.urls]
27
+ Homepage = "https://github.com/harshahemanth/secret-scan"
28
+ Repository = "https://github.com/harshahemanth/secret-scan"
29
+ Issues = "https://github.com/harshahemanth/secret-scan/issues"
30
+ Documentation = "https://github.com/harshahemanth/secret-scan#readme"
31
+
32
+ [project.scripts]
33
+ secret-scan = "secret_scanner.cli:main"
34
+
35
+ [tool.setuptools]
36
+ package-dir = {"" = "src"}
37
+
38
+ [tool.setuptools.packages.find]
39
+ where = ["src"]
@@ -0,0 +1,4 @@
1
+ [egg_info]
2
+ tag_build =
3
+ tag_date = 0
4
+
@@ -0,0 +1,187 @@
1
+ Metadata-Version: 2.4
2
+ Name: secret-scan
3
+ Version: 0.1.2
4
+ Summary: A simple secret/credential scanner for source code repositories.
5
+ Author-email: Your Name <you@example.com>
6
+ License: MIT License
7
+
8
+ Copyright (c) 2025 amitu314, harshahemanth
9
+
10
+ Permission is hereby granted, free of charge, to any person obtaining a copy
11
+ of this software and associated documentation files (the "Software"), to deal
12
+ in the Software without restriction, including without limitation the rights
13
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
14
+ copies of the Software, and to permit persons to whom the Software is
15
+ furnished to do so, subject to the following conditions:
16
+
17
+ The above copyright notice and this permission notice shall be included in all
18
+ copies or substantial portions of the Software.
19
+
20
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
21
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
22
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
23
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
24
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
25
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
26
+ SOFTWARE.
27
+
28
+
29
+ Project-URL: Homepage, https://github.com/harshahemanth/secret-scan
30
+ Project-URL: Repository, https://github.com/harshahemanth/secret-scan
31
+ Project-URL: Issues, https://github.com/harshahemanth/secret-scan/issues
32
+ Project-URL: Documentation, https://github.com/harshahemanth/secret-scan#readme
33
+ Keywords: security,secrets,credentials,scanner
34
+ Classifier: Programming Language :: Python :: 3
35
+ Classifier: License :: OSI Approved :: MIT License
36
+ Classifier: Environment :: Console
37
+ Classifier: Intended Audience :: Developers
38
+ Classifier: Topic :: Security
39
+ Requires-Python: >=3.9
40
+ Description-Content-Type: text/markdown
41
+ License-File: LICENSE
42
+ Dynamic: license-file
43
+
44
+ # secret-scan
45
+
46
+ A fast, lightweight CLI tool to detect secrets in source code.
47
+
48
+ `secret-scan` scans directories for sensitive data such as:
49
+
50
+ - AWS Access Keys and Secret Keys
51
+ - OpenAI API keys (sk-...)
52
+ - Password assignments
53
+ - Bearer tokens
54
+ - SSH private keys
55
+ - Azure storage keys
56
+ - Generic API keys and tokens
57
+ - JWT tokens
58
+
59
+ It skips binary files, ignores common junk directories (node_modules, .git, venv, etc.), avoids scanning large files, and supports extensible regular expressions.
60
+
61
+ ## Installation
62
+
63
+ pip install secret-scan
64
+
65
+ To upgrade:
66
+
67
+ pip install --upgrade secret-scan
68
+
69
+ ## Basic Usage
70
+
71
+ Scan the current directory:
72
+
73
+ secret-scan .
74
+
75
+ Scan a specific directory:
76
+
77
+ secret-scan ~/projects/my-repo
78
+
79
+ Write results to a file (default: docsCred.txt):
80
+
81
+ secret-scan . --output secrets.txt
82
+
83
+ ## JSON Output
84
+
85
+ Generate JSON output (useful for CI pipelines):
86
+
87
+ secret-scan . --json
88
+
89
+ Example output:
90
+
91
+ [
92
+ {
93
+ "file": "config/settings.py",
94
+ "line": 20,
95
+ "match": "AWS_ACCESS_KEY_ID=AKIA1234567890ABCD12"
96
+ },
97
+ {
98
+ "file": "service/api.py",
99
+ "line": 42,
100
+ "match": "sk-ABCDEFGHIJKLMNOPQRSTUV123456"
101
+ }
102
+ ]
103
+
104
+ ## Command-Line Options
105
+
106
+ | Flag | Description |
107
+ |------------------|--------------------------------------------|
108
+ | --output <file> | Save text results (default: docsCred.txt) |
109
+ | --skip-ext .log | Skip specific file extensions |
110
+ | --skip-dir <dir> | Skip specific directories |
111
+ | --max-size-mb N | Scan only files smaller than N MB |
112
+ | --json | Print JSON results to stdout |
113
+
114
+ Example:
115
+
116
+ secret-scan . --skip-ext .log --skip-dir build --json
117
+
118
+ ## What It Detects
119
+
120
+ ### AWS
121
+ - Access Key IDs (AKIA...)
122
+ - Secret Access Keys
123
+ - Environment variable forms such as AWS_ACCESS_KEY_ID=...
124
+
125
+ ### OpenAI
126
+ - Keys beginning with sk-
127
+
128
+ ### Passwords and Tokens
129
+ - password=...
130
+ - api_key=...
131
+ - Bearer tokens
132
+ - JWT tokens (xxx.yyy.zzz)
133
+
134
+ ### Private Keys
135
+ - -----BEGIN PRIVATE KEY-----
136
+
137
+ ### Cloud Provider Keys
138
+ - Azure storage account keys
139
+ - Redis/MySQL/Postgres/Mongo/FTP/SMTP connection strings
140
+
141
+ ## Automatic Skips
142
+
143
+ The scanner automatically ignores:
144
+
145
+ - .git, .hg, .svn
146
+ - node_modules
147
+ - Python virtual environments (venv, .venv, env)
148
+ - Binary files (null-byte detection)
149
+ - Large files (over 5 MB by default)
150
+ - Common non-text extensions (images, archives, executables)
151
+
152
+ ## Extending Detection Patterns
153
+
154
+ Detection patterns are defined in:
155
+
156
+ src/secret_scanner/patterns.py
157
+
158
+ You may extend or modify these patterns to detect additional token types.
159
+
160
+ ## Programmatic Usage
161
+
162
+ Example using the Python API:
163
+
164
+ from pathlib import Path
165
+ from secret_scanner import scan_directory
166
+
167
+ matches = scan_directory(Path("."), output_path=None)
168
+ for m in matches:
169
+ print(m["file"], m["line"], m["match"])
170
+
171
+ ## Running Tests
172
+
173
+ pytest -q
174
+
175
+ ## Contributing
176
+
177
+ Contributions are welcome.
178
+
179
+ 1. Fork the repository
180
+ 2. Create a feature branch
181
+ 3. Add tests for new functionality
182
+ 4. Open a pull request
183
+
184
+ ## License
185
+
186
+ This project is licensed under the MIT License. See the LICENSE file for full details.
187
+
@@ -0,0 +1,12 @@
1
+ LICENSE
2
+ README.md
3
+ pyproject.toml
4
+ src/secret_scan.egg-info/PKG-INFO
5
+ src/secret_scan.egg-info/SOURCES.txt
6
+ src/secret_scan.egg-info/dependency_links.txt
7
+ src/secret_scan.egg-info/entry_points.txt
8
+ src/secret_scan.egg-info/top_level.txt
9
+ src/secret_scanner/cli.py
10
+ src/secret_scanner/patterns.py
11
+ src/secret_scanner/scanner.py
12
+ tests/test_secret_scanner.py
@@ -0,0 +1,2 @@
1
+ [console_scripts]
2
+ secret-scan = secret_scanner.cli:main
@@ -0,0 +1 @@
1
+ secret_scanner
@@ -0,0 +1,89 @@
1
+ # src/secret_scanner/cli.py
2
+
3
+ import argparse
4
+ import json
5
+ import sys
6
+ from pathlib import Path
7
+
8
+ from .scanner import scan_directory
9
+
10
+
11
+ def parse_args(argv=None):
12
+ parser = argparse.ArgumentParser(
13
+ description="Scan a directory for potential credentials/secrets."
14
+ )
15
+ parser.add_argument(
16
+ "path",
17
+ help="Directory to scan.",
18
+ )
19
+ parser.add_argument(
20
+ "-o",
21
+ "--output",
22
+ default="docsCred.txt",
23
+ help="Output file path for text results (default: docsCred.txt)",
24
+ )
25
+ parser.add_argument(
26
+ "--max-size-mb",
27
+ type=int,
28
+ default=5,
29
+ help="Maximum file size in megabytes to scan (default: 5). "
30
+ "Use 0 or a negative value to disable the size limit.",
31
+ )
32
+ parser.add_argument(
33
+ "--skip-dir",
34
+ action="append",
35
+ default=[],
36
+ help="Additional directory name to skip. Can be passed multiple times.",
37
+ )
38
+ parser.add_argument(
39
+ "--skip-ext",
40
+ action="append",
41
+ default=[],
42
+ help="Additional file extension to skip (e.g. .log). "
43
+ "Can be passed multiple times.",
44
+ )
45
+ parser.add_argument(
46
+ "--json",
47
+ action="store_true",
48
+ help="Print results as JSON to stdout.",
49
+ )
50
+ return parser.parse_args(argv)
51
+
52
+
53
+ def main(argv=None):
54
+ args = parse_args(argv)
55
+
56
+ root = Path(args.path).expanduser()
57
+ output = Path(args.output).expanduser() if args.output else None
58
+
59
+ if args.max_size_mb and args.max_size_mb > 0:
60
+ max_bytes = args.max_size_mb * 1024 * 1024
61
+ else:
62
+ max_bytes = None
63
+
64
+ extra_dirs = set(args.skip_dir) if args.skip_dir else None
65
+ extra_exts = set(args.skip_ext) if args.skip_ext else None
66
+
67
+ print(f"Scanning directory: {root}", file=sys.stderr)
68
+ if output is not None:
69
+ print(f"Writing text results to: {output}", file=sys.stderr)
70
+
71
+ matches = scan_directory(
72
+ root_path=root,
73
+ output_path=output,
74
+ skip_dirs=extra_dirs,
75
+ skip_exts=extra_exts,
76
+ max_file_size_bytes=max_bytes,
77
+ )
78
+
79
+ print(f"Scan complete. {len(matches)} potential secret(s) found.", file=sys.stderr)
80
+
81
+ if args.json:
82
+ # Pretty JSON to stdout
83
+ json.dump(matches, sys.stdout, indent=2)
84
+ print() # newline after JSON
85
+
86
+
87
+ if __name__ == "__main__":
88
+ main(sys.argv[1:])
89
+
@@ -0,0 +1,43 @@
1
+ # src/secret_scanner/patterns.py
2
+
3
+ import re
4
+
5
+ PATTERN_SOURCE = r"""
6
+ (
7
+ # Existing patterns ...
8
+
9
+ (?:mongodb|postgres|mysql|jdbc|redis|ftp|smtp)[\s_\-=:][A-Za-z0-9+=._-]{10,}|
10
+ Azure_Storage_(?:AccountName|AccountKey|key|Key|KEY|AccessKey|ACCESSKEY|SasToken)[^\n]+|
11
+ ClientSecret"\svalue=.+|
12
+ (?:AccessKey|ACCESSKEY|ACCESS_KEY|Access_key)=\S{10,}|
13
+ AccountKey=\S{10,}|
14
+ secret_key_base:\s.[A-Za-z0-9_.-]{12,}|
15
+ secret(?:\s|:|=).+[A-Za-z0-9_.-]{12,}|
16
+ Bearer\s.\S{11,}|
17
+ api[_-](?:key|token)(?::|=).[A-Za-z0-9_.-]{10,}|
18
+ ssh-rsa\s+[A-Za-z0-9+/=]+|
19
+ -----BEGIN\s(?:RSA|DSA|EC|PGP|OPENSSH)\sPRIVATE\sKEY-----|
20
+ (?:password|passwd|pwd|Password|PASSWORD)\s*[:=]\s*["']?[^\s"']{8,}|
21
+ eyJ[A-Za-z0-9_-]{10,}\.[A-Za-z0-9_-]{10,}\.[A-Za-z0-9_-]{10,}|
22
+
23
+
24
+ # AWS access key IDs (AKIA..., etc.)
25
+ (?:AWS|aws)_?(?:ACCESS_KEY_ID|ACCESS_KEY|ACCESSKEY)\s*[:=]\s*["']?(?:AKIA|ASIA|AGPA|AIDA|AROA|ANPA)[0-9A-Z]{16}["']?|
26
+ (?:AKIA|ASIA|AGPA|AIDA|AROA|ANPA)[0-9A-Z]{16}| # standalone
27
+
28
+
29
+ # AWS secret access keys
30
+ (?:aws_secret_access_key|AWS_SECRET_ACCESS_KEY)\s*[:=]\s*["']?[A-Za-z0-9/+=]{40}["']?|
31
+ aws_?(?:secret|access)?_?key\s*[:=]\s*["']?[A-Za-z0-9/+=]{16,}["']?|
32
+
33
+
34
+ # OpenAI API keys (sk-)
35
+ (?:OPENAI_API_KEY|openai_api_key)\s*[:=]\s*["']?sk-[A-Za-z0-9]{20,}["']?|
36
+ sk-[A-Za-z0-9]{20,}
37
+ )
38
+ """
39
+
40
+
41
+ def build_pattern() -> re.Pattern:
42
+ return re.compile(PATTERN_SOURCE, re.IGNORECASE | re.VERBOSE)
43
+
@@ -0,0 +1,124 @@
1
+ # src/secret_scanner/scanner.py
2
+
3
+ import os
4
+ from pathlib import Path
5
+ import re
6
+
7
+ from .patterns import build_pattern
8
+
9
+ DEFAULT_SKIP_DIRS = {
10
+ ".git", ".hg", ".svn",
11
+ ".idea", ".vscode",
12
+ "node_modules",
13
+ ".venv", "venv", "env",
14
+ "__pycache__",
15
+ "dist", "build",
16
+ }
17
+
18
+ DEFAULT_SKIP_EXTS = {
19
+ ".jpg", ".jpeg", ".png", ".gif", ".bmp", ".ico",
20
+ ".pdf",
21
+ ".zip", ".tar", ".gz", ".7z", ".rar",
22
+ ".exe", ".dll", ".so", ".dylib",
23
+ ".class", ".jar",
24
+ }
25
+
26
+
27
+ def is_binary_file(path: Path, blocksize: int = 1024) -> bool:
28
+ try:
29
+ with path.open("rb") as f:
30
+ chunk = f.read(blocksize)
31
+ return b"\0" in chunk
32
+ except OSError:
33
+ return True
34
+
35
+
36
+ def scan_directory(
37
+ root_path: Path,
38
+ output_path: Path | None = None,
39
+ skip_dirs=None,
40
+ skip_exts=None,
41
+ max_file_size_bytes: int | None = 5 * 1024 * 1024,
42
+ pattern: re.Pattern | None = None,
43
+ ):
44
+ """
45
+ Walks root_path, skips junk dirs/exts/binary/large files,
46
+ scans text files line-by-line, optionally writes to output_path,
47
+ and returns a list of match dicts:
48
+ { "file": str, "line": int, "match": str }
49
+ """
50
+ if skip_dirs is None:
51
+ effective_skip_dirs = set(DEFAULT_SKIP_DIRS)
52
+ else:
53
+ effective_skip_dirs = set(DEFAULT_SKIP_DIRS).union(skip_dirs)
54
+
55
+ if skip_exts is None:
56
+ effective_skip_exts = set(DEFAULT_SKIP_EXTS)
57
+ else:
58
+ extra = {
59
+ e.lower() if e.startswith(".") else f".{e.lower()}"
60
+ for e in skip_exts
61
+ }
62
+ effective_skip_exts = set(DEFAULT_SKIP_EXTS).union(extra)
63
+
64
+ if pattern is None:
65
+ pattern = build_pattern()
66
+
67
+ matches_found: list[dict] = []
68
+ root_path = root_path.resolve()
69
+
70
+ # If output_path is provided, open once and reuse
71
+ cred_file_ctx = (
72
+ open(output_path, "w", encoding="utf-8")
73
+ if output_path is not None
74
+ else None
75
+ )
76
+
77
+ try:
78
+ for current_root, dirnames, filenames in os.walk(root_path):
79
+ dirnames[:] = [d for d in dirnames if d not in effective_skip_dirs]
80
+
81
+ for filename in filenames:
82
+ file_path = Path(current_root) / filename
83
+
84
+ ext = (
85
+ "." + file_path.name.split(".")[-1].lower()
86
+ if "." in file_path.name
87
+ else ""
88
+ )
89
+ if ext in effective_skip_exts:
90
+ continue
91
+
92
+ if max_file_size_bytes is not None:
93
+ try:
94
+ if file_path.stat().st_size > max_file_size_bytes:
95
+ continue
96
+ except OSError:
97
+ continue
98
+
99
+ if is_binary_file(file_path):
100
+ continue
101
+
102
+ try:
103
+ with file_path.open("r", encoding="utf-8", errors="ignore") as f:
104
+ for lineno, line in enumerate(f, start=1):
105
+ for m in pattern.finditer(line):
106
+ match_text = m.group(0)
107
+ record = {
108
+ "file": str(file_path),
109
+ "line": lineno,
110
+ "match": match_text,
111
+ }
112
+ matches_found.append(record)
113
+ if cred_file_ctx is not None:
114
+ cred_file_ctx.write(
115
+ f"{file_path}:{lineno} | {match_text}\n"
116
+ )
117
+ except Exception as e:
118
+ print(f"Error reading file {file_path}: {e}")
119
+ finally:
120
+ if cred_file_ctx is not None:
121
+ cred_file_ctx.close()
122
+
123
+ return matches_found
124
+
@@ -0,0 +1,137 @@
1
+ # tests/test_secret_scanner.py
2
+
3
+ from pathlib import Path
4
+
5
+ from secret_scanner.scanner import scan_directory, DEFAULT_SKIP_DIRS
6
+ from secret_scanner.patterns import build_pattern
7
+
8
+
9
+ def _write_text(path: Path, content: str):
10
+ path.parent.mkdir(parents=True, exist_ok=True)
11
+ path.write_text(content, encoding="utf-8")
12
+
13
+
14
+ def _write_binary(path: Path, content: bytes):
15
+ path.parent.mkdir(parents=True, exist_ok=True)
16
+ with path.open("wb") as f:
17
+ f.write(content)
18
+
19
+
20
+ def test_skips_default_junk_dirs(tmp_path: Path):
21
+ """
22
+ Files under junk dirs like node_modules should be skipped,
23
+ but regular app files should be scanned.
24
+ """
25
+ # Junk dir with a "secret"
26
+ junk_dir = tmp_path / "node_modules"
27
+ junk_file = junk_dir / "secret.js"
28
+ _write_text(junk_file, "password=supersecretjunk")
29
+
30
+ # Normal app file with a "secret"
31
+ app_dir = tmp_path / "app"
32
+ app_file = app_dir / "config.py"
33
+ _write_text(app_file, "password=mygoodsecret")
34
+
35
+ output_file = tmp_path / "out.txt"
36
+ matches = scan_directory(tmp_path, output_file)
37
+
38
+ # Should find the secret in app/config.py
39
+ assert any("mygoodsecret" in m["match"] for m in matches)
40
+
41
+ # Should NOT report the one under node_modules
42
+ assert not any("supersecretjunk" in m["match"] for m in matches)
43
+
44
+
45
+ def test_skips_binary_files(tmp_path: Path):
46
+ """
47
+ Binary files (with null bytes) should be skipped even if they contain
48
+ credential-looking strings.
49
+ """
50
+ text_file = tmp_path / "config.txt"
51
+ _write_text(text_file, "password=plaintextsecret")
52
+
53
+ bin_file = tmp_path / "binary.dat"
54
+ # Contains a null byte and a password-looking string
55
+ _write_binary(bin_file, b"\x00\x01\x02password=binarysecret")
56
+
57
+ output_file = tmp_path / "out.txt"
58
+ matches = scan_directory(tmp_path, output_file)
59
+
60
+ # Should detect the secret in the text file
61
+ assert any("plaintextsecret" in m["match"] for m in matches)
62
+
63
+ # Should NOT detect the binarysecret
64
+ assert not any("binarysecret" in m["match"] for m in matches)
65
+
66
+
67
+ def test_respects_extra_skip_ext(tmp_path: Path):
68
+ """
69
+ Additional skip extensions passed to scan_directory should be honored.
70
+ """
71
+ # .log file with secret
72
+ log_file = tmp_path / "app.log"
73
+ _write_text(log_file, "password=logsecret")
74
+
75
+ # .txt file with secret
76
+ txt_file = tmp_path / "config.txt"
77
+ _write_text(txt_file, "password=txtsecret")
78
+
79
+ # First scan: default behavior, .log is NOT skipped
80
+ output_file1 = tmp_path / "out1.txt"
81
+ matches1 = scan_directory(tmp_path, output_file1)
82
+ assert any("logsecret" in m["match"] and m["file"] == str(log_file) for m in matches1)
83
+ assert any("txtsecret" in m["match"] and m["file"] == str(txt_file) for m in matches1)
84
+
85
+ # Second scan: explicitly skip .log files
86
+ output_file2 = tmp_path / "out2.txt"
87
+ matches2 = scan_directory(tmp_path, output_file2, skip_exts={".log"})
88
+
89
+ # Should still see txtsecret from config.txt
90
+ assert any("txtsecret" in m["match"] and m["file"] == str(txt_file) for m in matches2)
91
+
92
+ # Should NOT see logsecret coming from app.log
93
+ assert not any("logsecret" in m["match"] and m["file"] == str(log_file) for m in matches2)
94
+
95
+
96
+ def test_uses_default_regex_pattern(tmp_path: Path):
97
+ """
98
+ Sanity check that the default regex pattern is actually used and
99
+ recognizes a common password pattern.
100
+ """
101
+ file_path = tmp_path / "test.txt"
102
+ _write_text(file_path, "password=mydefaultpatternsecret")
103
+
104
+ output_file = tmp_path / "out.txt"
105
+ matches = scan_directory(tmp_path, output_file, pattern=build_pattern())
106
+
107
+ assert any("mydefaultpatternsecret" in m["match"] for m in matches)
108
+
109
+
110
+ def test_detects_aws_access_key(tmp_path: Path):
111
+ """
112
+ Ensure AWS access key IDs are detected.
113
+ AWS Access Key ID: AKIA + 16 uppercase alnum chars (total 20).
114
+ """
115
+ file_path = tmp_path / "aws.txt"
116
+ fake_key = "AKIA1234567890ABCD12" # 4 + 16 = 20 chars
117
+ _write_text(file_path, f"AWS_ACCESS_KEY_ID={fake_key}")
118
+
119
+ output_file = tmp_path / "out.txt"
120
+ matches = scan_directory(tmp_path, output_file)
121
+
122
+ assert any(fake_key in m["match"] for m in matches)
123
+
124
+
125
+ def test_detects_openai_key(tmp_path: Path):
126
+ """
127
+ Ensure OpenAI API keys (sk-...) are detected.
128
+ """
129
+ file_path = tmp_path / "openai.txt"
130
+ fake_key = "sk-ABCDEFGHIJKLMNOPQRSTUV123456"
131
+ _write_text(file_path, f'OPENAI_API_KEY="{fake_key}"')
132
+
133
+ output_file = tmp_path / "out.txt"
134
+ matches = scan_directory(tmp_path, output_file)
135
+
136
+ assert any(fake_key in m["match"] for m in matches)
137
+