secret-scan 0.1.2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- secret_scan-0.1.2/LICENSE +22 -0
- secret_scan-0.1.2/PKG-INFO +187 -0
- secret_scan-0.1.2/README.md +144 -0
- secret_scan-0.1.2/pyproject.toml +39 -0
- secret_scan-0.1.2/setup.cfg +4 -0
- secret_scan-0.1.2/src/secret_scan.egg-info/PKG-INFO +187 -0
- secret_scan-0.1.2/src/secret_scan.egg-info/SOURCES.txt +12 -0
- secret_scan-0.1.2/src/secret_scan.egg-info/dependency_links.txt +1 -0
- secret_scan-0.1.2/src/secret_scan.egg-info/entry_points.txt +2 -0
- secret_scan-0.1.2/src/secret_scan.egg-info/top_level.txt +1 -0
- secret_scan-0.1.2/src/secret_scanner/cli.py +89 -0
- secret_scan-0.1.2/src/secret_scanner/patterns.py +43 -0
- secret_scan-0.1.2/src/secret_scanner/scanner.py +124 -0
- secret_scan-0.1.2/tests/test_secret_scanner.py +137 -0
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2025 amitu314, harshahemanth
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
|
22
|
+
|
|
@@ -0,0 +1,187 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: secret-scan
|
|
3
|
+
Version: 0.1.2
|
|
4
|
+
Summary: A simple secret/credential scanner for source code repositories.
|
|
5
|
+
Author-email: Your Name <you@example.com>
|
|
6
|
+
License: MIT License
|
|
7
|
+
|
|
8
|
+
Copyright (c) 2025 amitu314, harshahemanth
|
|
9
|
+
|
|
10
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
11
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
12
|
+
in the Software without restriction, including without limitation the rights
|
|
13
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
14
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
15
|
+
furnished to do so, subject to the following conditions:
|
|
16
|
+
|
|
17
|
+
The above copyright notice and this permission notice shall be included in all
|
|
18
|
+
copies or substantial portions of the Software.
|
|
19
|
+
|
|
20
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
21
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
22
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
23
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
24
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
25
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
26
|
+
SOFTWARE.
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
Project-URL: Homepage, https://github.com/harshahemanth/secret-scan
|
|
30
|
+
Project-URL: Repository, https://github.com/harshahemanth/secret-scan
|
|
31
|
+
Project-URL: Issues, https://github.com/harshahemanth/secret-scan/issues
|
|
32
|
+
Project-URL: Documentation, https://github.com/harshahemanth/secret-scan#readme
|
|
33
|
+
Keywords: security,secrets,credentials,scanner
|
|
34
|
+
Classifier: Programming Language :: Python :: 3
|
|
35
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
36
|
+
Classifier: Environment :: Console
|
|
37
|
+
Classifier: Intended Audience :: Developers
|
|
38
|
+
Classifier: Topic :: Security
|
|
39
|
+
Requires-Python: >=3.9
|
|
40
|
+
Description-Content-Type: text/markdown
|
|
41
|
+
License-File: LICENSE
|
|
42
|
+
Dynamic: license-file
|
|
43
|
+
|
|
44
|
+
# secret-scan
|
|
45
|
+
|
|
46
|
+
A fast, lightweight CLI tool to detect secrets in source code.
|
|
47
|
+
|
|
48
|
+
`secret-scan` scans directories for sensitive data such as:
|
|
49
|
+
|
|
50
|
+
- AWS Access Keys and Secret Keys
|
|
51
|
+
- OpenAI API keys (sk-...)
|
|
52
|
+
- Password assignments
|
|
53
|
+
- Bearer tokens
|
|
54
|
+
- SSH private keys
|
|
55
|
+
- Azure storage keys
|
|
56
|
+
- Generic API keys and tokens
|
|
57
|
+
- JWT tokens
|
|
58
|
+
|
|
59
|
+
It skips binary files, ignores common junk directories (node_modules, .git, venv, etc.), avoids scanning large files, and supports extensible regular expressions.
|
|
60
|
+
|
|
61
|
+
## Installation
|
|
62
|
+
|
|
63
|
+
pip install secret-scan
|
|
64
|
+
|
|
65
|
+
To upgrade:
|
|
66
|
+
|
|
67
|
+
pip install --upgrade secret-scan
|
|
68
|
+
|
|
69
|
+
## Basic Usage
|
|
70
|
+
|
|
71
|
+
Scan the current directory:
|
|
72
|
+
|
|
73
|
+
secret-scan .
|
|
74
|
+
|
|
75
|
+
Scan a specific directory:
|
|
76
|
+
|
|
77
|
+
secret-scan ~/projects/my-repo
|
|
78
|
+
|
|
79
|
+
Write results to a file (default: docsCred.txt):
|
|
80
|
+
|
|
81
|
+
secret-scan . --output secrets.txt
|
|
82
|
+
|
|
83
|
+
## JSON Output
|
|
84
|
+
|
|
85
|
+
Generate JSON output (useful for CI pipelines):
|
|
86
|
+
|
|
87
|
+
secret-scan . --json
|
|
88
|
+
|
|
89
|
+
Example output:
|
|
90
|
+
|
|
91
|
+
[
|
|
92
|
+
{
|
|
93
|
+
"file": "config/settings.py",
|
|
94
|
+
"line": 20,
|
|
95
|
+
"match": "AWS_ACCESS_KEY_ID=AKIA1234567890ABCD12"
|
|
96
|
+
},
|
|
97
|
+
{
|
|
98
|
+
"file": "service/api.py",
|
|
99
|
+
"line": 42,
|
|
100
|
+
"match": "sk-ABCDEFGHIJKLMNOPQRSTUV123456"
|
|
101
|
+
}
|
|
102
|
+
]
|
|
103
|
+
|
|
104
|
+
## Command-Line Options
|
|
105
|
+
|
|
106
|
+
| Flag | Description |
|
|
107
|
+
|------------------|--------------------------------------------|
|
|
108
|
+
| --output <file> | Save text results (default: docsCred.txt) |
|
|
109
|
+
| --skip-ext .log | Skip specific file extensions |
|
|
110
|
+
| --skip-dir <dir> | Skip specific directories |
|
|
111
|
+
| --max-size-mb N | Scan only files smaller than N MB |
|
|
112
|
+
| --json | Print JSON results to stdout |
|
|
113
|
+
|
|
114
|
+
Example:
|
|
115
|
+
|
|
116
|
+
secret-scan . --skip-ext .log --skip-dir build --json
|
|
117
|
+
|
|
118
|
+
## What It Detects
|
|
119
|
+
|
|
120
|
+
### AWS
|
|
121
|
+
- Access Key IDs (AKIA...)
|
|
122
|
+
- Secret Access Keys
|
|
123
|
+
- Environment variable forms such as AWS_ACCESS_KEY_ID=...
|
|
124
|
+
|
|
125
|
+
### OpenAI
|
|
126
|
+
- Keys beginning with sk-
|
|
127
|
+
|
|
128
|
+
### Passwords and Tokens
|
|
129
|
+
- password=...
|
|
130
|
+
- api_key=...
|
|
131
|
+
- Bearer tokens
|
|
132
|
+
- JWT tokens (xxx.yyy.zzz)
|
|
133
|
+
|
|
134
|
+
### Private Keys
|
|
135
|
+
- -----BEGIN PRIVATE KEY-----
|
|
136
|
+
|
|
137
|
+
### Cloud Provider Keys
|
|
138
|
+
- Azure storage account keys
|
|
139
|
+
- Redis/MySQL/Postgres/Mongo/FTP/SMTP connection strings
|
|
140
|
+
|
|
141
|
+
## Automatic Skips
|
|
142
|
+
|
|
143
|
+
The scanner automatically ignores:
|
|
144
|
+
|
|
145
|
+
- .git, .hg, .svn
|
|
146
|
+
- node_modules
|
|
147
|
+
- Python virtual environments (venv, .venv, env)
|
|
148
|
+
- Binary files (null-byte detection)
|
|
149
|
+
- Large files (over 5 MB by default)
|
|
150
|
+
- Common non-text extensions (images, archives, executables)
|
|
151
|
+
|
|
152
|
+
## Extending Detection Patterns
|
|
153
|
+
|
|
154
|
+
Detection patterns are defined in:
|
|
155
|
+
|
|
156
|
+
src/secret_scanner/patterns.py
|
|
157
|
+
|
|
158
|
+
You may extend or modify these patterns to detect additional token types.
|
|
159
|
+
|
|
160
|
+
## Programmatic Usage
|
|
161
|
+
|
|
162
|
+
Example using the Python API:
|
|
163
|
+
|
|
164
|
+
from pathlib import Path
|
|
165
|
+
from secret_scanner import scan_directory
|
|
166
|
+
|
|
167
|
+
matches = scan_directory(Path("."), output_path=None)
|
|
168
|
+
for m in matches:
|
|
169
|
+
print(m["file"], m["line"], m["match"])
|
|
170
|
+
|
|
171
|
+
## Running Tests
|
|
172
|
+
|
|
173
|
+
pytest -q
|
|
174
|
+
|
|
175
|
+
## Contributing
|
|
176
|
+
|
|
177
|
+
Contributions are welcome.
|
|
178
|
+
|
|
179
|
+
1. Fork the repository
|
|
180
|
+
2. Create a feature branch
|
|
181
|
+
3. Add tests for new functionality
|
|
182
|
+
4. Open a pull request
|
|
183
|
+
|
|
184
|
+
## License
|
|
185
|
+
|
|
186
|
+
This project is licensed under the MIT License. See the LICENSE file for full details.
|
|
187
|
+
|
|
@@ -0,0 +1,144 @@
|
|
|
1
|
+
# secret-scan
|
|
2
|
+
|
|
3
|
+
A fast, lightweight CLI tool to detect secrets in source code.
|
|
4
|
+
|
|
5
|
+
`secret-scan` scans directories for sensitive data such as:
|
|
6
|
+
|
|
7
|
+
- AWS Access Keys and Secret Keys
|
|
8
|
+
- OpenAI API keys (sk-...)
|
|
9
|
+
- Password assignments
|
|
10
|
+
- Bearer tokens
|
|
11
|
+
- SSH private keys
|
|
12
|
+
- Azure storage keys
|
|
13
|
+
- Generic API keys and tokens
|
|
14
|
+
- JWT tokens
|
|
15
|
+
|
|
16
|
+
It skips binary files, ignores common junk directories (node_modules, .git, venv, etc.), avoids scanning large files, and supports extensible regular expressions.
|
|
17
|
+
|
|
18
|
+
## Installation
|
|
19
|
+
|
|
20
|
+
pip install secret-scan
|
|
21
|
+
|
|
22
|
+
To upgrade:
|
|
23
|
+
|
|
24
|
+
pip install --upgrade secret-scan
|
|
25
|
+
|
|
26
|
+
## Basic Usage
|
|
27
|
+
|
|
28
|
+
Scan the current directory:
|
|
29
|
+
|
|
30
|
+
secret-scan .
|
|
31
|
+
|
|
32
|
+
Scan a specific directory:
|
|
33
|
+
|
|
34
|
+
secret-scan ~/projects/my-repo
|
|
35
|
+
|
|
36
|
+
Write results to a file (default: docsCred.txt):
|
|
37
|
+
|
|
38
|
+
secret-scan . --output secrets.txt
|
|
39
|
+
|
|
40
|
+
## JSON Output
|
|
41
|
+
|
|
42
|
+
Generate JSON output (useful for CI pipelines):
|
|
43
|
+
|
|
44
|
+
secret-scan . --json
|
|
45
|
+
|
|
46
|
+
Example output:
|
|
47
|
+
|
|
48
|
+
[
|
|
49
|
+
{
|
|
50
|
+
"file": "config/settings.py",
|
|
51
|
+
"line": 20,
|
|
52
|
+
"match": "AWS_ACCESS_KEY_ID=AKIA1234567890ABCD12"
|
|
53
|
+
},
|
|
54
|
+
{
|
|
55
|
+
"file": "service/api.py",
|
|
56
|
+
"line": 42,
|
|
57
|
+
"match": "sk-ABCDEFGHIJKLMNOPQRSTUV123456"
|
|
58
|
+
}
|
|
59
|
+
]
|
|
60
|
+
|
|
61
|
+
## Command-Line Options
|
|
62
|
+
|
|
63
|
+
| Flag | Description |
|
|
64
|
+
|------------------|--------------------------------------------|
|
|
65
|
+
| --output <file> | Save text results (default: docsCred.txt) |
|
|
66
|
+
| --skip-ext .log | Skip specific file extensions |
|
|
67
|
+
| --skip-dir <dir> | Skip specific directories |
|
|
68
|
+
| --max-size-mb N | Scan only files smaller than N MB |
|
|
69
|
+
| --json | Print JSON results to stdout |
|
|
70
|
+
|
|
71
|
+
Example:
|
|
72
|
+
|
|
73
|
+
secret-scan . --skip-ext .log --skip-dir build --json
|
|
74
|
+
|
|
75
|
+
## What It Detects
|
|
76
|
+
|
|
77
|
+
### AWS
|
|
78
|
+
- Access Key IDs (AKIA...)
|
|
79
|
+
- Secret Access Keys
|
|
80
|
+
- Environment variable forms such as AWS_ACCESS_KEY_ID=...
|
|
81
|
+
|
|
82
|
+
### OpenAI
|
|
83
|
+
- Keys beginning with sk-
|
|
84
|
+
|
|
85
|
+
### Passwords and Tokens
|
|
86
|
+
- password=...
|
|
87
|
+
- api_key=...
|
|
88
|
+
- Bearer tokens
|
|
89
|
+
- JWT tokens (xxx.yyy.zzz)
|
|
90
|
+
|
|
91
|
+
### Private Keys
|
|
92
|
+
- -----BEGIN PRIVATE KEY-----
|
|
93
|
+
|
|
94
|
+
### Cloud Provider Keys
|
|
95
|
+
- Azure storage account keys
|
|
96
|
+
- Redis/MySQL/Postgres/Mongo/FTP/SMTP connection strings
|
|
97
|
+
|
|
98
|
+
## Automatic Skips
|
|
99
|
+
|
|
100
|
+
The scanner automatically ignores:
|
|
101
|
+
|
|
102
|
+
- .git, .hg, .svn
|
|
103
|
+
- node_modules
|
|
104
|
+
- Python virtual environments (venv, .venv, env)
|
|
105
|
+
- Binary files (null-byte detection)
|
|
106
|
+
- Large files (over 5 MB by default)
|
|
107
|
+
- Common non-text extensions (images, archives, executables)
|
|
108
|
+
|
|
109
|
+
## Extending Detection Patterns
|
|
110
|
+
|
|
111
|
+
Detection patterns are defined in:
|
|
112
|
+
|
|
113
|
+
src/secret_scanner/patterns.py
|
|
114
|
+
|
|
115
|
+
You may extend or modify these patterns to detect additional token types.
|
|
116
|
+
|
|
117
|
+
## Programmatic Usage
|
|
118
|
+
|
|
119
|
+
Example using the Python API:
|
|
120
|
+
|
|
121
|
+
from pathlib import Path
|
|
122
|
+
from secret_scanner import scan_directory
|
|
123
|
+
|
|
124
|
+
matches = scan_directory(Path("."), output_path=None)
|
|
125
|
+
for m in matches:
|
|
126
|
+
print(m["file"], m["line"], m["match"])
|
|
127
|
+
|
|
128
|
+
## Running Tests
|
|
129
|
+
|
|
130
|
+
pytest -q
|
|
131
|
+
|
|
132
|
+
## Contributing
|
|
133
|
+
|
|
134
|
+
Contributions are welcome.
|
|
135
|
+
|
|
136
|
+
1. Fork the repository
|
|
137
|
+
2. Create a feature branch
|
|
138
|
+
3. Add tests for new functionality
|
|
139
|
+
4. Open a pull request
|
|
140
|
+
|
|
141
|
+
## License
|
|
142
|
+
|
|
143
|
+
This project is licensed under the MIT License. See the LICENSE file for full details.
|
|
144
|
+
|
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = ["setuptools>=64", "wheel"]
|
|
3
|
+
build-backend = "setuptools.build_meta"
|
|
4
|
+
|
|
5
|
+
[project]
|
|
6
|
+
name = "secret-scan"
|
|
7
|
+
version = "0.1.2"
|
|
8
|
+
description = "A simple secret/credential scanner for source code repositories."
|
|
9
|
+
readme = "README.md"
|
|
10
|
+
requires-python = ">=3.9"
|
|
11
|
+
license = { file = "LICENSE" }
|
|
12
|
+
authors = [
|
|
13
|
+
{ name = "Your Name", email = "you@example.com" }
|
|
14
|
+
]
|
|
15
|
+
keywords = ["security", "secrets", "credentials", "scanner"]
|
|
16
|
+
classifiers = [
|
|
17
|
+
"Programming Language :: Python :: 3",
|
|
18
|
+
"License :: OSI Approved :: MIT License",
|
|
19
|
+
"Environment :: Console",
|
|
20
|
+
"Intended Audience :: Developers",
|
|
21
|
+
"Topic :: Security",
|
|
22
|
+
]
|
|
23
|
+
|
|
24
|
+
dependencies = [] # stdlib only right now
|
|
25
|
+
|
|
26
|
+
[project.urls]
|
|
27
|
+
Homepage = "https://github.com/harshahemanth/secret-scan"
|
|
28
|
+
Repository = "https://github.com/harshahemanth/secret-scan"
|
|
29
|
+
Issues = "https://github.com/harshahemanth/secret-scan/issues"
|
|
30
|
+
Documentation = "https://github.com/harshahemanth/secret-scan#readme"
|
|
31
|
+
|
|
32
|
+
[project.scripts]
|
|
33
|
+
secret-scan = "secret_scanner.cli:main"
|
|
34
|
+
|
|
35
|
+
[tool.setuptools]
|
|
36
|
+
package-dir = {"" = "src"}
|
|
37
|
+
|
|
38
|
+
[tool.setuptools.packages.find]
|
|
39
|
+
where = ["src"]
|
|
@@ -0,0 +1,187 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: secret-scan
|
|
3
|
+
Version: 0.1.2
|
|
4
|
+
Summary: A simple secret/credential scanner for source code repositories.
|
|
5
|
+
Author-email: Your Name <you@example.com>
|
|
6
|
+
License: MIT License
|
|
7
|
+
|
|
8
|
+
Copyright (c) 2025 amitu314, harshahemanth
|
|
9
|
+
|
|
10
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
11
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
12
|
+
in the Software without restriction, including without limitation the rights
|
|
13
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
14
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
15
|
+
furnished to do so, subject to the following conditions:
|
|
16
|
+
|
|
17
|
+
The above copyright notice and this permission notice shall be included in all
|
|
18
|
+
copies or substantial portions of the Software.
|
|
19
|
+
|
|
20
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
21
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
22
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
23
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
24
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
25
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
26
|
+
SOFTWARE.
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
Project-URL: Homepage, https://github.com/harshahemanth/secret-scan
|
|
30
|
+
Project-URL: Repository, https://github.com/harshahemanth/secret-scan
|
|
31
|
+
Project-URL: Issues, https://github.com/harshahemanth/secret-scan/issues
|
|
32
|
+
Project-URL: Documentation, https://github.com/harshahemanth/secret-scan#readme
|
|
33
|
+
Keywords: security,secrets,credentials,scanner
|
|
34
|
+
Classifier: Programming Language :: Python :: 3
|
|
35
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
36
|
+
Classifier: Environment :: Console
|
|
37
|
+
Classifier: Intended Audience :: Developers
|
|
38
|
+
Classifier: Topic :: Security
|
|
39
|
+
Requires-Python: >=3.9
|
|
40
|
+
Description-Content-Type: text/markdown
|
|
41
|
+
License-File: LICENSE
|
|
42
|
+
Dynamic: license-file
|
|
43
|
+
|
|
44
|
+
# secret-scan
|
|
45
|
+
|
|
46
|
+
A fast, lightweight CLI tool to detect secrets in source code.
|
|
47
|
+
|
|
48
|
+
`secret-scan` scans directories for sensitive data such as:
|
|
49
|
+
|
|
50
|
+
- AWS Access Keys and Secret Keys
|
|
51
|
+
- OpenAI API keys (sk-...)
|
|
52
|
+
- Password assignments
|
|
53
|
+
- Bearer tokens
|
|
54
|
+
- SSH private keys
|
|
55
|
+
- Azure storage keys
|
|
56
|
+
- Generic API keys and tokens
|
|
57
|
+
- JWT tokens
|
|
58
|
+
|
|
59
|
+
It skips binary files, ignores common junk directories (node_modules, .git, venv, etc.), avoids scanning large files, and supports extensible regular expressions.
|
|
60
|
+
|
|
61
|
+
## Installation
|
|
62
|
+
|
|
63
|
+
pip install secret-scan
|
|
64
|
+
|
|
65
|
+
To upgrade:
|
|
66
|
+
|
|
67
|
+
pip install --upgrade secret-scan
|
|
68
|
+
|
|
69
|
+
## Basic Usage
|
|
70
|
+
|
|
71
|
+
Scan the current directory:
|
|
72
|
+
|
|
73
|
+
secret-scan .
|
|
74
|
+
|
|
75
|
+
Scan a specific directory:
|
|
76
|
+
|
|
77
|
+
secret-scan ~/projects/my-repo
|
|
78
|
+
|
|
79
|
+
Write results to a file (default: docsCred.txt):
|
|
80
|
+
|
|
81
|
+
secret-scan . --output secrets.txt
|
|
82
|
+
|
|
83
|
+
## JSON Output
|
|
84
|
+
|
|
85
|
+
Generate JSON output (useful for CI pipelines):
|
|
86
|
+
|
|
87
|
+
secret-scan . --json
|
|
88
|
+
|
|
89
|
+
Example output:
|
|
90
|
+
|
|
91
|
+
[
|
|
92
|
+
{
|
|
93
|
+
"file": "config/settings.py",
|
|
94
|
+
"line": 20,
|
|
95
|
+
"match": "AWS_ACCESS_KEY_ID=AKIA1234567890ABCD12"
|
|
96
|
+
},
|
|
97
|
+
{
|
|
98
|
+
"file": "service/api.py",
|
|
99
|
+
"line": 42,
|
|
100
|
+
"match": "sk-ABCDEFGHIJKLMNOPQRSTUV123456"
|
|
101
|
+
}
|
|
102
|
+
]
|
|
103
|
+
|
|
104
|
+
## Command-Line Options
|
|
105
|
+
|
|
106
|
+
| Flag | Description |
|
|
107
|
+
|------------------|--------------------------------------------|
|
|
108
|
+
| --output <file> | Save text results (default: docsCred.txt) |
|
|
109
|
+
| --skip-ext .log | Skip specific file extensions |
|
|
110
|
+
| --skip-dir <dir> | Skip specific directories |
|
|
111
|
+
| --max-size-mb N | Scan only files smaller than N MB |
|
|
112
|
+
| --json | Print JSON results to stdout |
|
|
113
|
+
|
|
114
|
+
Example:
|
|
115
|
+
|
|
116
|
+
secret-scan . --skip-ext .log --skip-dir build --json
|
|
117
|
+
|
|
118
|
+
## What It Detects
|
|
119
|
+
|
|
120
|
+
### AWS
|
|
121
|
+
- Access Key IDs (AKIA...)
|
|
122
|
+
- Secret Access Keys
|
|
123
|
+
- Environment variable forms such as AWS_ACCESS_KEY_ID=...
|
|
124
|
+
|
|
125
|
+
### OpenAI
|
|
126
|
+
- Keys beginning with sk-
|
|
127
|
+
|
|
128
|
+
### Passwords and Tokens
|
|
129
|
+
- password=...
|
|
130
|
+
- api_key=...
|
|
131
|
+
- Bearer tokens
|
|
132
|
+
- JWT tokens (xxx.yyy.zzz)
|
|
133
|
+
|
|
134
|
+
### Private Keys
|
|
135
|
+
- -----BEGIN PRIVATE KEY-----
|
|
136
|
+
|
|
137
|
+
### Cloud Provider Keys
|
|
138
|
+
- Azure storage account keys
|
|
139
|
+
- Redis/MySQL/Postgres/Mongo/FTP/SMTP connection strings
|
|
140
|
+
|
|
141
|
+
## Automatic Skips
|
|
142
|
+
|
|
143
|
+
The scanner automatically ignores:
|
|
144
|
+
|
|
145
|
+
- .git, .hg, .svn
|
|
146
|
+
- node_modules
|
|
147
|
+
- Python virtual environments (venv, .venv, env)
|
|
148
|
+
- Binary files (null-byte detection)
|
|
149
|
+
- Large files (over 5 MB by default)
|
|
150
|
+
- Common non-text extensions (images, archives, executables)
|
|
151
|
+
|
|
152
|
+
## Extending Detection Patterns
|
|
153
|
+
|
|
154
|
+
Detection patterns are defined in:
|
|
155
|
+
|
|
156
|
+
src/secret_scanner/patterns.py
|
|
157
|
+
|
|
158
|
+
You may extend or modify these patterns to detect additional token types.
|
|
159
|
+
|
|
160
|
+
## Programmatic Usage
|
|
161
|
+
|
|
162
|
+
Example using the Python API:
|
|
163
|
+
|
|
164
|
+
from pathlib import Path
|
|
165
|
+
from secret_scanner import scan_directory
|
|
166
|
+
|
|
167
|
+
matches = scan_directory(Path("."), output_path=None)
|
|
168
|
+
for m in matches:
|
|
169
|
+
print(m["file"], m["line"], m["match"])
|
|
170
|
+
|
|
171
|
+
## Running Tests
|
|
172
|
+
|
|
173
|
+
pytest -q
|
|
174
|
+
|
|
175
|
+
## Contributing
|
|
176
|
+
|
|
177
|
+
Contributions are welcome.
|
|
178
|
+
|
|
179
|
+
1. Fork the repository
|
|
180
|
+
2. Create a feature branch
|
|
181
|
+
3. Add tests for new functionality
|
|
182
|
+
4. Open a pull request
|
|
183
|
+
|
|
184
|
+
## License
|
|
185
|
+
|
|
186
|
+
This project is licensed under the MIT License. See the LICENSE file for full details.
|
|
187
|
+
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
LICENSE
|
|
2
|
+
README.md
|
|
3
|
+
pyproject.toml
|
|
4
|
+
src/secret_scan.egg-info/PKG-INFO
|
|
5
|
+
src/secret_scan.egg-info/SOURCES.txt
|
|
6
|
+
src/secret_scan.egg-info/dependency_links.txt
|
|
7
|
+
src/secret_scan.egg-info/entry_points.txt
|
|
8
|
+
src/secret_scan.egg-info/top_level.txt
|
|
9
|
+
src/secret_scanner/cli.py
|
|
10
|
+
src/secret_scanner/patterns.py
|
|
11
|
+
src/secret_scanner/scanner.py
|
|
12
|
+
tests/test_secret_scanner.py
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
secret_scanner
|
|
@@ -0,0 +1,89 @@
|
|
|
1
|
+
# src/secret_scanner/cli.py
|
|
2
|
+
|
|
3
|
+
import argparse
|
|
4
|
+
import json
|
|
5
|
+
import sys
|
|
6
|
+
from pathlib import Path
|
|
7
|
+
|
|
8
|
+
from .scanner import scan_directory
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
def parse_args(argv=None):
|
|
12
|
+
parser = argparse.ArgumentParser(
|
|
13
|
+
description="Scan a directory for potential credentials/secrets."
|
|
14
|
+
)
|
|
15
|
+
parser.add_argument(
|
|
16
|
+
"path",
|
|
17
|
+
help="Directory to scan.",
|
|
18
|
+
)
|
|
19
|
+
parser.add_argument(
|
|
20
|
+
"-o",
|
|
21
|
+
"--output",
|
|
22
|
+
default="docsCred.txt",
|
|
23
|
+
help="Output file path for text results (default: docsCred.txt)",
|
|
24
|
+
)
|
|
25
|
+
parser.add_argument(
|
|
26
|
+
"--max-size-mb",
|
|
27
|
+
type=int,
|
|
28
|
+
default=5,
|
|
29
|
+
help="Maximum file size in megabytes to scan (default: 5). "
|
|
30
|
+
"Use 0 or a negative value to disable the size limit.",
|
|
31
|
+
)
|
|
32
|
+
parser.add_argument(
|
|
33
|
+
"--skip-dir",
|
|
34
|
+
action="append",
|
|
35
|
+
default=[],
|
|
36
|
+
help="Additional directory name to skip. Can be passed multiple times.",
|
|
37
|
+
)
|
|
38
|
+
parser.add_argument(
|
|
39
|
+
"--skip-ext",
|
|
40
|
+
action="append",
|
|
41
|
+
default=[],
|
|
42
|
+
help="Additional file extension to skip (e.g. .log). "
|
|
43
|
+
"Can be passed multiple times.",
|
|
44
|
+
)
|
|
45
|
+
parser.add_argument(
|
|
46
|
+
"--json",
|
|
47
|
+
action="store_true",
|
|
48
|
+
help="Print results as JSON to stdout.",
|
|
49
|
+
)
|
|
50
|
+
return parser.parse_args(argv)
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
def main(argv=None):
|
|
54
|
+
args = parse_args(argv)
|
|
55
|
+
|
|
56
|
+
root = Path(args.path).expanduser()
|
|
57
|
+
output = Path(args.output).expanduser() if args.output else None
|
|
58
|
+
|
|
59
|
+
if args.max_size_mb and args.max_size_mb > 0:
|
|
60
|
+
max_bytes = args.max_size_mb * 1024 * 1024
|
|
61
|
+
else:
|
|
62
|
+
max_bytes = None
|
|
63
|
+
|
|
64
|
+
extra_dirs = set(args.skip_dir) if args.skip_dir else None
|
|
65
|
+
extra_exts = set(args.skip_ext) if args.skip_ext else None
|
|
66
|
+
|
|
67
|
+
print(f"Scanning directory: {root}", file=sys.stderr)
|
|
68
|
+
if output is not None:
|
|
69
|
+
print(f"Writing text results to: {output}", file=sys.stderr)
|
|
70
|
+
|
|
71
|
+
matches = scan_directory(
|
|
72
|
+
root_path=root,
|
|
73
|
+
output_path=output,
|
|
74
|
+
skip_dirs=extra_dirs,
|
|
75
|
+
skip_exts=extra_exts,
|
|
76
|
+
max_file_size_bytes=max_bytes,
|
|
77
|
+
)
|
|
78
|
+
|
|
79
|
+
print(f"Scan complete. {len(matches)} potential secret(s) found.", file=sys.stderr)
|
|
80
|
+
|
|
81
|
+
if args.json:
|
|
82
|
+
# Pretty JSON to stdout
|
|
83
|
+
json.dump(matches, sys.stdout, indent=2)
|
|
84
|
+
print() # newline after JSON
|
|
85
|
+
|
|
86
|
+
|
|
87
|
+
if __name__ == "__main__":
|
|
88
|
+
main(sys.argv[1:])
|
|
89
|
+
|
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
# src/secret_scanner/patterns.py
|
|
2
|
+
|
|
3
|
+
import re
|
|
4
|
+
|
|
5
|
+
PATTERN_SOURCE = r"""
|
|
6
|
+
(
|
|
7
|
+
# Existing patterns ...
|
|
8
|
+
|
|
9
|
+
(?:mongodb|postgres|mysql|jdbc|redis|ftp|smtp)[\s_\-=:][A-Za-z0-9+=._-]{10,}|
|
|
10
|
+
Azure_Storage_(?:AccountName|AccountKey|key|Key|KEY|AccessKey|ACCESSKEY|SasToken)[^\n]+|
|
|
11
|
+
ClientSecret"\svalue=.+|
|
|
12
|
+
(?:AccessKey|ACCESSKEY|ACCESS_KEY|Access_key)=\S{10,}|
|
|
13
|
+
AccountKey=\S{10,}|
|
|
14
|
+
secret_key_base:\s.[A-Za-z0-9_.-]{12,}|
|
|
15
|
+
secret(?:\s|:|=).+[A-Za-z0-9_.-]{12,}|
|
|
16
|
+
Bearer\s.\S{11,}|
|
|
17
|
+
api[_-](?:key|token)(?::|=).[A-Za-z0-9_.-]{10,}|
|
|
18
|
+
ssh-rsa\s+[A-Za-z0-9+/=]+|
|
|
19
|
+
-----BEGIN\s(?:RSA|DSA|EC|PGP|OPENSSH)\sPRIVATE\sKEY-----|
|
|
20
|
+
(?:password|passwd|pwd|Password|PASSWORD)\s*[:=]\s*["']?[^\s"']{8,}|
|
|
21
|
+
eyJ[A-Za-z0-9_-]{10,}\.[A-Za-z0-9_-]{10,}\.[A-Za-z0-9_-]{10,}|
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
# AWS access key IDs (AKIA..., etc.)
|
|
25
|
+
(?:AWS|aws)_?(?:ACCESS_KEY_ID|ACCESS_KEY|ACCESSKEY)\s*[:=]\s*["']?(?:AKIA|ASIA|AGPA|AIDA|AROA|ANPA)[0-9A-Z]{16}["']?|
|
|
26
|
+
(?:AKIA|ASIA|AGPA|AIDA|AROA|ANPA)[0-9A-Z]{16}| # standalone
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
# AWS secret access keys
|
|
30
|
+
(?:aws_secret_access_key|AWS_SECRET_ACCESS_KEY)\s*[:=]\s*["']?[A-Za-z0-9/+=]{40}["']?|
|
|
31
|
+
aws_?(?:secret|access)?_?key\s*[:=]\s*["']?[A-Za-z0-9/+=]{16,}["']?|
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
# OpenAI API keys (sk-)
|
|
35
|
+
(?:OPENAI_API_KEY|openai_api_key)\s*[:=]\s*["']?sk-[A-Za-z0-9]{20,}["']?|
|
|
36
|
+
sk-[A-Za-z0-9]{20,}
|
|
37
|
+
)
|
|
38
|
+
"""
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
def build_pattern() -> re.Pattern:
|
|
42
|
+
return re.compile(PATTERN_SOURCE, re.IGNORECASE | re.VERBOSE)
|
|
43
|
+
|
|
@@ -0,0 +1,124 @@
|
|
|
1
|
+
# src/secret_scanner/scanner.py
|
|
2
|
+
|
|
3
|
+
import os
|
|
4
|
+
from pathlib import Path
|
|
5
|
+
import re
|
|
6
|
+
|
|
7
|
+
from .patterns import build_pattern
|
|
8
|
+
|
|
9
|
+
DEFAULT_SKIP_DIRS = {
|
|
10
|
+
".git", ".hg", ".svn",
|
|
11
|
+
".idea", ".vscode",
|
|
12
|
+
"node_modules",
|
|
13
|
+
".venv", "venv", "env",
|
|
14
|
+
"__pycache__",
|
|
15
|
+
"dist", "build",
|
|
16
|
+
}
|
|
17
|
+
|
|
18
|
+
DEFAULT_SKIP_EXTS = {
|
|
19
|
+
".jpg", ".jpeg", ".png", ".gif", ".bmp", ".ico",
|
|
20
|
+
".pdf",
|
|
21
|
+
".zip", ".tar", ".gz", ".7z", ".rar",
|
|
22
|
+
".exe", ".dll", ".so", ".dylib",
|
|
23
|
+
".class", ".jar",
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
def is_binary_file(path: Path, blocksize: int = 1024) -> bool:
|
|
28
|
+
try:
|
|
29
|
+
with path.open("rb") as f:
|
|
30
|
+
chunk = f.read(blocksize)
|
|
31
|
+
return b"\0" in chunk
|
|
32
|
+
except OSError:
|
|
33
|
+
return True
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
def scan_directory(
|
|
37
|
+
root_path: Path,
|
|
38
|
+
output_path: Path | None = None,
|
|
39
|
+
skip_dirs=None,
|
|
40
|
+
skip_exts=None,
|
|
41
|
+
max_file_size_bytes: int | None = 5 * 1024 * 1024,
|
|
42
|
+
pattern: re.Pattern | None = None,
|
|
43
|
+
):
|
|
44
|
+
"""
|
|
45
|
+
Walks root_path, skips junk dirs/exts/binary/large files,
|
|
46
|
+
scans text files line-by-line, optionally writes to output_path,
|
|
47
|
+
and returns a list of match dicts:
|
|
48
|
+
{ "file": str, "line": int, "match": str }
|
|
49
|
+
"""
|
|
50
|
+
if skip_dirs is None:
|
|
51
|
+
effective_skip_dirs = set(DEFAULT_SKIP_DIRS)
|
|
52
|
+
else:
|
|
53
|
+
effective_skip_dirs = set(DEFAULT_SKIP_DIRS).union(skip_dirs)
|
|
54
|
+
|
|
55
|
+
if skip_exts is None:
|
|
56
|
+
effective_skip_exts = set(DEFAULT_SKIP_EXTS)
|
|
57
|
+
else:
|
|
58
|
+
extra = {
|
|
59
|
+
e.lower() if e.startswith(".") else f".{e.lower()}"
|
|
60
|
+
for e in skip_exts
|
|
61
|
+
}
|
|
62
|
+
effective_skip_exts = set(DEFAULT_SKIP_EXTS).union(extra)
|
|
63
|
+
|
|
64
|
+
if pattern is None:
|
|
65
|
+
pattern = build_pattern()
|
|
66
|
+
|
|
67
|
+
matches_found: list[dict] = []
|
|
68
|
+
root_path = root_path.resolve()
|
|
69
|
+
|
|
70
|
+
# If output_path is provided, open once and reuse
|
|
71
|
+
cred_file_ctx = (
|
|
72
|
+
open(output_path, "w", encoding="utf-8")
|
|
73
|
+
if output_path is not None
|
|
74
|
+
else None
|
|
75
|
+
)
|
|
76
|
+
|
|
77
|
+
try:
|
|
78
|
+
for current_root, dirnames, filenames in os.walk(root_path):
|
|
79
|
+
dirnames[:] = [d for d in dirnames if d not in effective_skip_dirs]
|
|
80
|
+
|
|
81
|
+
for filename in filenames:
|
|
82
|
+
file_path = Path(current_root) / filename
|
|
83
|
+
|
|
84
|
+
ext = (
|
|
85
|
+
"." + file_path.name.split(".")[-1].lower()
|
|
86
|
+
if "." in file_path.name
|
|
87
|
+
else ""
|
|
88
|
+
)
|
|
89
|
+
if ext in effective_skip_exts:
|
|
90
|
+
continue
|
|
91
|
+
|
|
92
|
+
if max_file_size_bytes is not None:
|
|
93
|
+
try:
|
|
94
|
+
if file_path.stat().st_size > max_file_size_bytes:
|
|
95
|
+
continue
|
|
96
|
+
except OSError:
|
|
97
|
+
continue
|
|
98
|
+
|
|
99
|
+
if is_binary_file(file_path):
|
|
100
|
+
continue
|
|
101
|
+
|
|
102
|
+
try:
|
|
103
|
+
with file_path.open("r", encoding="utf-8", errors="ignore") as f:
|
|
104
|
+
for lineno, line in enumerate(f, start=1):
|
|
105
|
+
for m in pattern.finditer(line):
|
|
106
|
+
match_text = m.group(0)
|
|
107
|
+
record = {
|
|
108
|
+
"file": str(file_path),
|
|
109
|
+
"line": lineno,
|
|
110
|
+
"match": match_text,
|
|
111
|
+
}
|
|
112
|
+
matches_found.append(record)
|
|
113
|
+
if cred_file_ctx is not None:
|
|
114
|
+
cred_file_ctx.write(
|
|
115
|
+
f"{file_path}:{lineno} | {match_text}\n"
|
|
116
|
+
)
|
|
117
|
+
except Exception as e:
|
|
118
|
+
print(f"Error reading file {file_path}: {e}")
|
|
119
|
+
finally:
|
|
120
|
+
if cred_file_ctx is not None:
|
|
121
|
+
cred_file_ctx.close()
|
|
122
|
+
|
|
123
|
+
return matches_found
|
|
124
|
+
|
|
@@ -0,0 +1,137 @@
|
|
|
1
|
+
# tests/test_secret_scanner.py
|
|
2
|
+
|
|
3
|
+
from pathlib import Path
|
|
4
|
+
|
|
5
|
+
from secret_scanner.scanner import scan_directory, DEFAULT_SKIP_DIRS
|
|
6
|
+
from secret_scanner.patterns import build_pattern
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
def _write_text(path: Path, content: str):
|
|
10
|
+
path.parent.mkdir(parents=True, exist_ok=True)
|
|
11
|
+
path.write_text(content, encoding="utf-8")
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
def _write_binary(path: Path, content: bytes):
|
|
15
|
+
path.parent.mkdir(parents=True, exist_ok=True)
|
|
16
|
+
with path.open("wb") as f:
|
|
17
|
+
f.write(content)
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
def test_skips_default_junk_dirs(tmp_path: Path):
|
|
21
|
+
"""
|
|
22
|
+
Files under junk dirs like node_modules should be skipped,
|
|
23
|
+
but regular app files should be scanned.
|
|
24
|
+
"""
|
|
25
|
+
# Junk dir with a "secret"
|
|
26
|
+
junk_dir = tmp_path / "node_modules"
|
|
27
|
+
junk_file = junk_dir / "secret.js"
|
|
28
|
+
_write_text(junk_file, "password=supersecretjunk")
|
|
29
|
+
|
|
30
|
+
# Normal app file with a "secret"
|
|
31
|
+
app_dir = tmp_path / "app"
|
|
32
|
+
app_file = app_dir / "config.py"
|
|
33
|
+
_write_text(app_file, "password=mygoodsecret")
|
|
34
|
+
|
|
35
|
+
output_file = tmp_path / "out.txt"
|
|
36
|
+
matches = scan_directory(tmp_path, output_file)
|
|
37
|
+
|
|
38
|
+
# Should find the secret in app/config.py
|
|
39
|
+
assert any("mygoodsecret" in m["match"] for m in matches)
|
|
40
|
+
|
|
41
|
+
# Should NOT report the one under node_modules
|
|
42
|
+
assert not any("supersecretjunk" in m["match"] for m in matches)
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
def test_skips_binary_files(tmp_path: Path):
|
|
46
|
+
"""
|
|
47
|
+
Binary files (with null bytes) should be skipped even if they contain
|
|
48
|
+
credential-looking strings.
|
|
49
|
+
"""
|
|
50
|
+
text_file = tmp_path / "config.txt"
|
|
51
|
+
_write_text(text_file, "password=plaintextsecret")
|
|
52
|
+
|
|
53
|
+
bin_file = tmp_path / "binary.dat"
|
|
54
|
+
# Contains a null byte and a password-looking string
|
|
55
|
+
_write_binary(bin_file, b"\x00\x01\x02password=binarysecret")
|
|
56
|
+
|
|
57
|
+
output_file = tmp_path / "out.txt"
|
|
58
|
+
matches = scan_directory(tmp_path, output_file)
|
|
59
|
+
|
|
60
|
+
# Should detect the secret in the text file
|
|
61
|
+
assert any("plaintextsecret" in m["match"] for m in matches)
|
|
62
|
+
|
|
63
|
+
# Should NOT detect the binarysecret
|
|
64
|
+
assert not any("binarysecret" in m["match"] for m in matches)
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
def test_respects_extra_skip_ext(tmp_path: Path):
|
|
68
|
+
"""
|
|
69
|
+
Additional skip extensions passed to scan_directory should be honored.
|
|
70
|
+
"""
|
|
71
|
+
# .log file with secret
|
|
72
|
+
log_file = tmp_path / "app.log"
|
|
73
|
+
_write_text(log_file, "password=logsecret")
|
|
74
|
+
|
|
75
|
+
# .txt file with secret
|
|
76
|
+
txt_file = tmp_path / "config.txt"
|
|
77
|
+
_write_text(txt_file, "password=txtsecret")
|
|
78
|
+
|
|
79
|
+
# First scan: default behavior, .log is NOT skipped
|
|
80
|
+
output_file1 = tmp_path / "out1.txt"
|
|
81
|
+
matches1 = scan_directory(tmp_path, output_file1)
|
|
82
|
+
assert any("logsecret" in m["match"] and m["file"] == str(log_file) for m in matches1)
|
|
83
|
+
assert any("txtsecret" in m["match"] and m["file"] == str(txt_file) for m in matches1)
|
|
84
|
+
|
|
85
|
+
# Second scan: explicitly skip .log files
|
|
86
|
+
output_file2 = tmp_path / "out2.txt"
|
|
87
|
+
matches2 = scan_directory(tmp_path, output_file2, skip_exts={".log"})
|
|
88
|
+
|
|
89
|
+
# Should still see txtsecret from config.txt
|
|
90
|
+
assert any("txtsecret" in m["match"] and m["file"] == str(txt_file) for m in matches2)
|
|
91
|
+
|
|
92
|
+
# Should NOT see logsecret coming from app.log
|
|
93
|
+
assert not any("logsecret" in m["match"] and m["file"] == str(log_file) for m in matches2)
|
|
94
|
+
|
|
95
|
+
|
|
96
|
+
def test_uses_default_regex_pattern(tmp_path: Path):
|
|
97
|
+
"""
|
|
98
|
+
Sanity check that the default regex pattern is actually used and
|
|
99
|
+
recognizes a common password pattern.
|
|
100
|
+
"""
|
|
101
|
+
file_path = tmp_path / "test.txt"
|
|
102
|
+
_write_text(file_path, "password=mydefaultpatternsecret")
|
|
103
|
+
|
|
104
|
+
output_file = tmp_path / "out.txt"
|
|
105
|
+
matches = scan_directory(tmp_path, output_file, pattern=build_pattern())
|
|
106
|
+
|
|
107
|
+
assert any("mydefaultpatternsecret" in m["match"] for m in matches)
|
|
108
|
+
|
|
109
|
+
|
|
110
|
+
def test_detects_aws_access_key(tmp_path: Path):
|
|
111
|
+
"""
|
|
112
|
+
Ensure AWS access key IDs are detected.
|
|
113
|
+
AWS Access Key ID: AKIA + 16 uppercase alnum chars (total 20).
|
|
114
|
+
"""
|
|
115
|
+
file_path = tmp_path / "aws.txt"
|
|
116
|
+
fake_key = "AKIA1234567890ABCD12" # 4 + 16 = 20 chars
|
|
117
|
+
_write_text(file_path, f"AWS_ACCESS_KEY_ID={fake_key}")
|
|
118
|
+
|
|
119
|
+
output_file = tmp_path / "out.txt"
|
|
120
|
+
matches = scan_directory(tmp_path, output_file)
|
|
121
|
+
|
|
122
|
+
assert any(fake_key in m["match"] for m in matches)
|
|
123
|
+
|
|
124
|
+
|
|
125
|
+
def test_detects_openai_key(tmp_path: Path):
|
|
126
|
+
"""
|
|
127
|
+
Ensure OpenAI API keys (sk-...) are detected.
|
|
128
|
+
"""
|
|
129
|
+
file_path = tmp_path / "openai.txt"
|
|
130
|
+
fake_key = "sk-ABCDEFGHIJKLMNOPQRSTUV123456"
|
|
131
|
+
_write_text(file_path, f'OPENAI_API_KEY="{fake_key}"')
|
|
132
|
+
|
|
133
|
+
output_file = tmp_path / "out.txt"
|
|
134
|
+
matches = scan_directory(tmp_path, output_file)
|
|
135
|
+
|
|
136
|
+
assert any(fake_key in m["match"] for m in matches)
|
|
137
|
+
|