credsweeper 1.11.5__py3-none-any.whl → 1.13.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of credsweeper might be problematic. Click here for more details.
- credsweeper/__init__.py +21 -15
- credsweeper/__main__.py +158 -42
- credsweeper/app.py +18 -13
- credsweeper/common/keyword_pattern.py +19 -18
- credsweeper/common/morpheme_checklist.txt +28 -6
- credsweeper/config/__init__.py +0 -1
- credsweeper/config/config.py +4 -3
- credsweeper/credentials/__init__.py +0 -5
- credsweeper/credentials/augment_candidates.py +1 -1
- credsweeper/credentials/candidate.py +1 -1
- credsweeper/credentials/credential_manager.py +1 -1
- credsweeper/credentials/line_data.py +43 -8
- credsweeper/deep_scanner/__init__.py +0 -1
- credsweeper/deep_scanner/abstract_scanner.py +4 -3
- credsweeper/deep_scanner/byte_scanner.py +1 -1
- credsweeper/deep_scanner/bzip2_scanner.py +2 -2
- credsweeper/deep_scanner/csv_scanner.py +71 -0
- credsweeper/deep_scanner/deb_scanner.py +1 -1
- credsweeper/deep_scanner/deep_scanner.py +22 -12
- credsweeper/deep_scanner/docx_scanner.py +1 -1
- credsweeper/deep_scanner/eml_scanner.py +1 -1
- credsweeper/deep_scanner/encoder_scanner.py +1 -1
- credsweeper/deep_scanner/gzip_scanner.py +2 -2
- credsweeper/deep_scanner/html_scanner.py +1 -1
- credsweeper/deep_scanner/jclass_scanner.py +1 -1
- credsweeper/deep_scanner/jks_scanner.py +12 -3
- credsweeper/deep_scanner/lang_scanner.py +1 -1
- credsweeper/deep_scanner/lzma_scanner.py +2 -2
- credsweeper/deep_scanner/mxfile_scanner.py +1 -1
- credsweeper/deep_scanner/pdf_scanner.py +1 -1
- credsweeper/deep_scanner/pkcs_scanner.py +6 -2
- credsweeper/deep_scanner/pptx_scanner.py +1 -1
- credsweeper/deep_scanner/rpm_scanner.py +1 -1
- credsweeper/deep_scanner/rtf_scanner.py +41 -0
- credsweeper/deep_scanner/strings_scanner.py +52 -0
- credsweeper/deep_scanner/tar_scanner.py +2 -2
- credsweeper/deep_scanner/tmx_scanner.py +2 -2
- credsweeper/deep_scanner/xlsx_scanner.py +2 -2
- credsweeper/deep_scanner/xml_scanner.py +1 -1
- credsweeper/deep_scanner/zip_scanner.py +2 -2
- credsweeper/file_handler/__init__.py +0 -15
- credsweeper/file_handler/abstract_provider.py +3 -4
- credsweeper/file_handler/byte_content_provider.py +11 -2
- credsweeper/file_handler/content_provider.py +1 -1
- credsweeper/file_handler/data_content_provider.py +1 -1
- credsweeper/file_handler/diff_content_provider.py +133 -3
- credsweeper/file_handler/file_path_extractor.py +4 -2
- credsweeper/file_handler/files_provider.py +4 -4
- credsweeper/file_handler/patches_provider.py +7 -8
- credsweeper/file_handler/text_content_provider.py +8 -2
- credsweeper/filters/__init__.py +3 -4
- credsweeper/filters/filter.py +5 -3
- credsweeper/filters/group/__init__.py +0 -2
- credsweeper/filters/group/general_keyword.py +2 -2
- credsweeper/filters/group/general_pattern.py +2 -2
- credsweeper/filters/group/group.py +38 -36
- credsweeper/filters/group/password_keyword.py +9 -8
- credsweeper/filters/group/token_pattern.py +5 -5
- credsweeper/filters/group/url_credentials_group.py +8 -8
- credsweeper/filters/group/weird_base36_token.py +6 -6
- credsweeper/filters/group/weird_base64_token.py +5 -5
- credsweeper/filters/line_git_binary_check.py +5 -4
- credsweeper/filters/line_specific_key_check.py +6 -5
- credsweeper/filters/line_uue_part_check.py +5 -4
- credsweeper/filters/value_allowlist_check.py +6 -5
- credsweeper/filters/value_array_dictionary_check.py +8 -6
- credsweeper/filters/value_atlassian_token_check.py +6 -5
- credsweeper/filters/value_azure_token_check.py +6 -5
- credsweeper/filters/value_base32_data_check.py +8 -5
- credsweeper/filters/value_base64_data_check.py +6 -5
- credsweeper/filters/value_base64_encoded_pem_check.py +6 -5
- credsweeper/filters/value_base64_key_check.py +6 -5
- credsweeper/filters/value_base64_part_check.py +6 -5
- credsweeper/filters/value_basic_auth_check.py +37 -0
- credsweeper/filters/value_blocklist_check.py +6 -4
- credsweeper/filters/value_camel_case_check.py +8 -7
- credsweeper/filters/value_dictionary_keyword_check.py +6 -4
- credsweeper/filters/value_discord_bot_check.py +6 -5
- credsweeper/filters/value_entropy_base_check.py +6 -5
- credsweeper/filters/value_file_path_check.py +13 -8
- credsweeper/filters/value_github_check.py +8 -6
- credsweeper/filters/value_grafana_check.py +6 -5
- credsweeper/filters/value_grafana_service_check.py +5 -4
- credsweeper/filters/value_hex_number_check.py +5 -4
- credsweeper/filters/value_jfrog_token_check.py +6 -5
- credsweeper/filters/value_json_web_key_check.py +6 -5
- credsweeper/filters/value_json_web_token_check.py +6 -5
- credsweeper/filters/value_last_word_check.py +6 -4
- credsweeper/filters/{value_dictionary_value_length_check.py → value_length_check.py} +12 -6
- credsweeper/filters/value_method_check.py +5 -4
- credsweeper/filters/value_morphemes_check.py +43 -0
- credsweeper/filters/value_not_allowed_pattern_check.py +6 -5
- credsweeper/filters/value_not_part_encoded_check.py +4 -4
- credsweeper/filters/value_number_check.py +5 -4
- credsweeper/filters/value_pattern_check.py +61 -41
- credsweeper/filters/value_similarity_check.py +6 -4
- credsweeper/filters/value_split_keyword_check.py +5 -4
- credsweeper/filters/value_string_type_check.py +10 -7
- credsweeper/filters/value_token_base_check.py +5 -4
- credsweeper/filters/value_token_check.py +6 -5
- credsweeper/logger/__init__.py +0 -1
- credsweeper/logger/logger.py +1 -1
- credsweeper/ml_model/__init__.py +0 -1
- credsweeper/ml_model/features/__init__.py +1 -0
- credsweeper/ml_model/features/entropy_evaluation.py +1 -1
- credsweeper/ml_model/features/feature.py +2 -19
- credsweeper/ml_model/features/file_extension.py +2 -2
- credsweeper/ml_model/features/has_html_tag.py +12 -10
- credsweeper/ml_model/features/is_secret_numeric.py +5 -4
- credsweeper/ml_model/features/length_of_attribute.py +1 -1
- credsweeper/ml_model/features/morpheme_dense.py +15 -8
- credsweeper/ml_model/features/rule_name.py +2 -2
- credsweeper/ml_model/features/rule_severity.py +21 -0
- credsweeper/ml_model/features/search_in_attribute.py +1 -1
- credsweeper/ml_model/features/word_in.py +10 -33
- credsweeper/ml_model/features/word_in_path.py +6 -4
- credsweeper/ml_model/features/word_in_postamble.py +2 -5
- credsweeper/ml_model/features/word_in_preamble.py +2 -5
- credsweeper/ml_model/features/word_in_transition.py +2 -5
- credsweeper/ml_model/features/word_in_value.py +3 -4
- credsweeper/ml_model/features/word_in_variable.py +3 -4
- credsweeper/ml_model/ml_config.json +140 -27
- credsweeper/ml_model/ml_model.onnx +0 -0
- credsweeper/ml_model/ml_validator.py +4 -3
- credsweeper/rules/__init__.py +0 -1
- credsweeper/rules/config.yaml +329 -239
- credsweeper/rules/rule.py +4 -3
- credsweeper/scanner/__init__.py +0 -1
- credsweeper/scanner/scan_type/__init__.py +0 -5
- credsweeper/scanner/scan_type/multi_pattern.py +4 -4
- credsweeper/scanner/scan_type/pem_key_pattern.py +4 -4
- credsweeper/scanner/scan_type/scan_type.py +4 -4
- credsweeper/scanner/scan_type/single_pattern.py +4 -4
- credsweeper/scanner/scanner.py +24 -15
- credsweeper/secret/config.json +19 -6
- credsweeper/utils/__init__.py +0 -1
- credsweeper/utils/pem_key_detector.py +3 -3
- credsweeper/utils/util.py +24 -150
- {credsweeper-1.11.5.dist-info → credsweeper-1.13.3.dist-info}/METADATA +7 -7
- credsweeper-1.13.3.dist-info/RECORD +164 -0
- credsweeper/filters/value_couple_keyword_check.py +0 -26
- credsweeper-1.11.5.dist-info/RECORD +0 -159
- {credsweeper-1.11.5.dist-info → credsweeper-1.13.3.dist-info}/WHEEL +0 -0
- {credsweeper-1.11.5.dist-info → credsweeper-1.13.3.dist-info}/entry_points.txt +0 -0
- {credsweeper-1.11.5.dist-info → credsweeper-1.13.3.dist-info}/licenses/LICENSE +0 -0
|
@@ -1,9 +1,10 @@
|
|
|
1
|
+
import os.path
|
|
1
2
|
from pathlib import Path
|
|
2
3
|
from typing import List, Any
|
|
3
4
|
|
|
4
5
|
import numpy as np
|
|
5
6
|
|
|
6
|
-
from credsweeper.credentials import Candidate
|
|
7
|
+
from credsweeper.credentials.candidate import Candidate
|
|
7
8
|
from credsweeper.ml_model.features.word_in import WordIn
|
|
8
9
|
|
|
9
10
|
|
|
@@ -16,9 +17,10 @@ class WordInPath(WordIn):
|
|
|
16
17
|
path = Path(file_path)
|
|
17
18
|
# apply ./ for normalised path to detect "/src" for relative path
|
|
18
19
|
posix_lower_path = path.as_posix().lower() if path.is_absolute() else f"./{path.as_posix().lower()}"
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
return
|
|
20
|
+
# prevent extra confusion from the same word in extension
|
|
21
|
+
path_without_extension, _ = os.path.splitext(posix_lower_path)
|
|
22
|
+
return self.word_in_(path_without_extension)
|
|
23
|
+
return np.array([self.zero])
|
|
22
24
|
|
|
23
25
|
def extract(self, candidate: Candidate) -> Any:
|
|
24
26
|
raise NotImplementedError
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
import numpy as np
|
|
2
2
|
|
|
3
3
|
from credsweeper.common.constants import ML_HUNK
|
|
4
|
-
from credsweeper.credentials import Candidate
|
|
4
|
+
from credsweeper.credentials.candidate import Candidate
|
|
5
5
|
from credsweeper.ml_model.features.word_in import WordIn
|
|
6
6
|
|
|
7
7
|
|
|
@@ -15,7 +15,4 @@ class WordInPostamble(WordIn):
|
|
|
15
15
|
else candidate.line_data_list[0].value_end + ML_HUNK
|
|
16
16
|
postamble = candidate.line_data_list[0].line[candidate.line_data_list[0].value_end:postamble_end].strip()
|
|
17
17
|
|
|
18
|
-
if postamble
|
|
19
|
-
return self.word_in_str(postamble.lower())
|
|
20
|
-
else:
|
|
21
|
-
return np.array([np.zeros(shape=[self.dimension], dtype=np.int8)])
|
|
18
|
+
return self.word_in_(postamble.lower()) if postamble else np.array([self.zero])
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
import numpy as np
|
|
2
2
|
|
|
3
3
|
from credsweeper.common.constants import ML_HUNK
|
|
4
|
-
from credsweeper.credentials import Candidate
|
|
4
|
+
from credsweeper.credentials.candidate import Candidate
|
|
5
5
|
from credsweeper.ml_model.features.word_in import WordIn
|
|
6
6
|
|
|
7
7
|
|
|
@@ -20,7 +20,4 @@ class WordInPreamble(WordIn):
|
|
|
20
20
|
else candidate.line_data_list[0].value_start - ML_HUNK
|
|
21
21
|
preamble = candidate.line_data_list[0].line[preamble_start:candidate.line_data_list[0].value_start].strip()
|
|
22
22
|
|
|
23
|
-
if preamble
|
|
24
|
-
return self.word_in_str(preamble.lower())
|
|
25
|
-
else:
|
|
26
|
-
return np.array([np.zeros(shape=[self.dimension], dtype=np.int8)])
|
|
23
|
+
return self.word_in_(preamble.lower()) if preamble else np.array([self.zero])
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
import numpy as np
|
|
2
2
|
|
|
3
|
-
from credsweeper.credentials import Candidate
|
|
3
|
+
from credsweeper.credentials.candidate import Candidate
|
|
4
4
|
from credsweeper.ml_model.features.word_in import WordIn
|
|
5
5
|
|
|
6
6
|
|
|
@@ -15,7 +15,4 @@ class WordInTransition(WordIn):
|
|
|
15
15
|
else:
|
|
16
16
|
transition = ''
|
|
17
17
|
|
|
18
|
-
if transition
|
|
19
|
-
return self.word_in_str(transition.lower())
|
|
20
|
-
else:
|
|
21
|
-
return np.array([np.zeros(shape=[self.dimension], dtype=np.int8)])
|
|
18
|
+
return self.word_in_(transition.lower()) if transition else np.array([self.zero])
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
import numpy as np
|
|
2
2
|
|
|
3
|
-
from credsweeper.credentials import Candidate
|
|
3
|
+
from credsweeper.credentials.candidate import Candidate
|
|
4
4
|
from credsweeper.ml_model.features.word_in import WordIn
|
|
5
5
|
|
|
6
6
|
|
|
@@ -10,6 +10,5 @@ class WordInValue(WordIn):
|
|
|
10
10
|
def extract(self, candidate: Candidate) -> np.ndarray:
|
|
11
11
|
"""Returns array of matching words for first line"""
|
|
12
12
|
if value := candidate.line_data_list[0].value:
|
|
13
|
-
return self.
|
|
14
|
-
|
|
15
|
-
return np.array([np.zeros(shape=[self.dimension], dtype=np.int8)])
|
|
13
|
+
return self.word_in_(value.lower())
|
|
14
|
+
return np.array([self.zero])
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
import numpy as np
|
|
2
2
|
|
|
3
|
-
from credsweeper.credentials import Candidate
|
|
3
|
+
from credsweeper.credentials.candidate import Candidate
|
|
4
4
|
from credsweeper.ml_model.features.word_in import WordIn
|
|
5
5
|
|
|
6
6
|
|
|
@@ -10,6 +10,5 @@ class WordInVariable(WordIn):
|
|
|
10
10
|
def extract(self, candidate: Candidate) -> np.ndarray:
|
|
11
11
|
"""Returns array of matching words for first line"""
|
|
12
12
|
if variable := candidate.line_data_list[0].variable:
|
|
13
|
-
return self.
|
|
14
|
-
|
|
15
|
-
return np.zeros(shape=[self.dimension], dtype=np.int8)
|
|
13
|
+
return self.word_in_(variable.lower())
|
|
14
|
+
return np.array([self.zero])
|
|
@@ -8,6 +8,11 @@
|
|
|
8
8
|
"highest": 0.92996
|
|
9
9
|
},
|
|
10
10
|
"features": [
|
|
11
|
+
{
|
|
12
|
+
"type": "RuleSeverity",
|
|
13
|
+
"comment": "INFO=0.0, LOW=0.25, MEDIUM=0.5, HIGH=0.75, CRITICAL=1.0",
|
|
14
|
+
"kwargs": {}
|
|
15
|
+
},
|
|
11
16
|
{
|
|
12
17
|
"type": "EntropyEvaluation",
|
|
13
18
|
"kwargs": {}
|
|
@@ -58,7 +63,7 @@
|
|
|
58
63
|
"type": "SearchInAttribute",
|
|
59
64
|
"comment": "Repeated symbol",
|
|
60
65
|
"kwargs": {
|
|
61
|
-
"pattern": "
|
|
66
|
+
"pattern": "(?:(\\S)(\\S))((\\1.)|(.\\2)){7,}",
|
|
62
67
|
"attribute": "value"
|
|
63
68
|
}
|
|
64
69
|
},
|
|
@@ -66,7 +71,15 @@
|
|
|
66
71
|
"type": "SearchInAttribute",
|
|
67
72
|
"comment": "SHA marker",
|
|
68
73
|
"kwargs": {
|
|
69
|
-
"pattern": "
|
|
74
|
+
"pattern": "(?i:sha)[_-]?(224|256|384|512)",
|
|
75
|
+
"attribute": "value"
|
|
76
|
+
}
|
|
77
|
+
},
|
|
78
|
+
{
|
|
79
|
+
"type": "SearchInAttribute",
|
|
80
|
+
"comment": "ASN1 prefix for PEM keys",
|
|
81
|
+
"kwargs": {
|
|
82
|
+
"pattern": "MII",
|
|
70
83
|
"attribute": "value"
|
|
71
84
|
}
|
|
72
85
|
},
|
|
@@ -114,7 +127,7 @@
|
|
|
114
127
|
"type": "SearchInAttribute",
|
|
115
128
|
"comment": "VariableNotAllowedNameCheck",
|
|
116
129
|
"kwargs": {
|
|
117
|
-
"pattern": "(?i:(filters?|pub(lic)?)_?key)",
|
|
130
|
+
"pattern": "(?i:(sha[_-]?(224|256|384|512)|projects?|filters?|pub(lic)?)_?key)",
|
|
118
131
|
"attribute": "variable"
|
|
119
132
|
}
|
|
120
133
|
},
|
|
@@ -122,7 +135,7 @@
|
|
|
122
135
|
"type": "SearchInAttribute",
|
|
123
136
|
"comment": "VariableNotAllowedNameCheck",
|
|
124
137
|
"kwargs": {
|
|
125
|
-
"pattern": "(?i:(id|size|name|type|manager)$)",
|
|
138
|
+
"pattern": "(?i:(id|sum|size|name|type|manager|algorithm|pattern|view|error|date(time)?|time(stamp)?|tag|version|hash|rate)$)",
|
|
126
139
|
"attribute": "variable"
|
|
127
140
|
}
|
|
128
141
|
},
|
|
@@ -139,16 +152,30 @@
|
|
|
139
152
|
"kwargs": {
|
|
140
153
|
"words": [
|
|
141
154
|
" ",
|
|
155
|
+
".",
|
|
156
|
+
",",
|
|
157
|
+
"]",
|
|
158
|
+
"#",
|
|
142
159
|
"/",
|
|
143
|
-
"
|
|
160
|
+
"\\",
|
|
144
161
|
"_id",
|
|
162
|
+
"_at",
|
|
145
163
|
"_len",
|
|
164
|
+
"256",
|
|
165
|
+
"512",
|
|
146
166
|
"access",
|
|
167
|
+
"assert",
|
|
147
168
|
"cache",
|
|
148
169
|
"client",
|
|
149
170
|
"control",
|
|
150
|
-
"
|
|
171
|
+
"crypt",
|
|
172
|
+
"crypted",
|
|
173
|
+
"decrypt",
|
|
174
|
+
"encrypt",
|
|
175
|
+
"dummy",
|
|
176
|
+
"disable",
|
|
151
177
|
"example",
|
|
178
|
+
"expect",
|
|
152
179
|
"expire",
|
|
153
180
|
"fake",
|
|
154
181
|
"file",
|
|
@@ -173,6 +200,7 @@
|
|
|
173
200
|
"pw",
|
|
174
201
|
"secret",
|
|
175
202
|
"size",
|
|
203
|
+
"sha",
|
|
176
204
|
"space",
|
|
177
205
|
"status",
|
|
178
206
|
"sword",
|
|
@@ -195,6 +223,8 @@
|
|
|
195
223
|
"kwargs": {
|
|
196
224
|
"words": [
|
|
197
225
|
" ",
|
|
226
|
+
":",
|
|
227
|
+
"=",
|
|
198
228
|
"$(",
|
|
199
229
|
"${",
|
|
200
230
|
"(",
|
|
@@ -207,25 +237,40 @@
|
|
|
207
237
|
"[",
|
|
208
238
|
"_id",
|
|
209
239
|
"abc",
|
|
240
|
+
"aaaa",
|
|
210
241
|
"allow",
|
|
242
|
+
"arn:aws:",
|
|
211
243
|
"bar",
|
|
212
244
|
"disable",
|
|
213
245
|
"changeme",
|
|
246
|
+
"crypt",
|
|
247
|
+
"crypted",
|
|
248
|
+
"decrypt",
|
|
249
|
+
"edited",
|
|
250
|
+
"encrypt",
|
|
214
251
|
"example",
|
|
252
|
+
"expire",
|
|
215
253
|
"fake",
|
|
216
254
|
"file",
|
|
217
255
|
"foo",
|
|
256
|
+
"hash",
|
|
257
|
+
"hex",
|
|
258
|
+
"key",
|
|
218
259
|
"min",
|
|
219
260
|
"mock",
|
|
220
261
|
"my",
|
|
221
262
|
"nil",
|
|
222
263
|
"pass",
|
|
223
|
-
"passwd",
|
|
224
|
-
"password",
|
|
225
|
-
"pswd",
|
|
226
264
|
"public",
|
|
227
265
|
"pwd",
|
|
266
|
+
"redacted",
|
|
267
|
+
"rsa",
|
|
268
|
+
"salt",
|
|
269
|
+
"secret",
|
|
270
|
+
"sha",
|
|
271
|
+
"ssh",
|
|
228
272
|
"test",
|
|
273
|
+
"word",
|
|
229
274
|
"xxx",
|
|
230
275
|
"xyz"
|
|
231
276
|
]
|
|
@@ -248,9 +293,12 @@
|
|
|
248
293
|
"@",
|
|
249
294
|
"[",
|
|
250
295
|
"approval",
|
|
296
|
+
"arn:aws:",
|
|
251
297
|
"assert",
|
|
252
298
|
"case",
|
|
253
299
|
"circle",
|
|
300
|
+
"color",
|
|
301
|
+
"e.g.",
|
|
254
302
|
"equal",
|
|
255
303
|
"example",
|
|
256
304
|
"expect",
|
|
@@ -263,7 +311,6 @@
|
|
|
263
311
|
"none",
|
|
264
312
|
"null",
|
|
265
313
|
"pass",
|
|
266
|
-
"password",
|
|
267
314
|
"path",
|
|
268
315
|
"pwd",
|
|
269
316
|
"sqa",
|
|
@@ -271,7 +318,8 @@
|
|
|
271
318
|
"true",
|
|
272
319
|
"undefined",
|
|
273
320
|
"unit",
|
|
274
|
-
"width"
|
|
321
|
+
"width",
|
|
322
|
+
"word"
|
|
275
323
|
]
|
|
276
324
|
}
|
|
277
325
|
},
|
|
@@ -284,12 +332,18 @@
|
|
|
284
332
|
"&",
|
|
285
333
|
"(",
|
|
286
334
|
"->",
|
|
335
|
+
"=>",
|
|
287
336
|
".",
|
|
337
|
+
",",
|
|
288
338
|
"?",
|
|
289
339
|
"@",
|
|
290
340
|
"[",
|
|
341
|
+
"{",
|
|
291
342
|
"bearer",
|
|
343
|
+
"get",
|
|
344
|
+
"e.g.",
|
|
292
345
|
"equal",
|
|
346
|
+
"env",
|
|
293
347
|
"example",
|
|
294
348
|
"expect",
|
|
295
349
|
"line",
|
|
@@ -318,9 +372,13 @@
|
|
|
318
372
|
"?",
|
|
319
373
|
"@",
|
|
320
374
|
"[",
|
|
375
|
+
"}",
|
|
376
|
+
"\\",
|
|
321
377
|
"assert",
|
|
322
378
|
"case",
|
|
323
379
|
"circle",
|
|
380
|
+
"color",
|
|
381
|
+
"e.g.",
|
|
324
382
|
"equal",
|
|
325
383
|
"example",
|
|
326
384
|
"expect",
|
|
@@ -333,8 +391,6 @@
|
|
|
333
391
|
"none",
|
|
334
392
|
"null",
|
|
335
393
|
"pass",
|
|
336
|
-
"passwd",
|
|
337
|
-
"password",
|
|
338
394
|
"path",
|
|
339
395
|
"pwd",
|
|
340
396
|
"sqa",
|
|
@@ -342,7 +398,8 @@
|
|
|
342
398
|
"true",
|
|
343
399
|
"undefined",
|
|
344
400
|
"unit",
|
|
345
|
-
"width"
|
|
401
|
+
"width",
|
|
402
|
+
"word"
|
|
346
403
|
]
|
|
347
404
|
}
|
|
348
405
|
},
|
|
@@ -350,17 +407,62 @@
|
|
|
350
407
|
"type": "WordInPath",
|
|
351
408
|
"kwargs": {
|
|
352
409
|
"words": [
|
|
353
|
-
"
|
|
354
|
-
"
|
|
355
|
-
"/
|
|
410
|
+
"test",
|
|
411
|
+
"mock",
|
|
412
|
+
"/src",
|
|
413
|
+
"code",
|
|
414
|
+
"/include",
|
|
415
|
+
"internal",
|
|
416
|
+
"tool",
|
|
417
|
+
"util",
|
|
418
|
+
"example",
|
|
419
|
+
"sample",
|
|
420
|
+
"conf",
|
|
421
|
+
"secret",
|
|
422
|
+
"setting",
|
|
423
|
+
"security",
|
|
424
|
+
"secure",
|
|
425
|
+
"resource",
|
|
426
|
+
"fixture",
|
|
427
|
+
"docker",
|
|
428
|
+
"/docs",
|
|
429
|
+
"/doc/",
|
|
430
|
+
"document",
|
|
431
|
+
"/lang",
|
|
432
|
+
"/local",
|
|
433
|
+
"/lib",
|
|
434
|
+
"/spec",
|
|
435
|
+
"/pkg",
|
|
436
|
+
"/api",
|
|
437
|
+
"/rest",
|
|
438
|
+
"/opt",
|
|
439
|
+
"/sys",
|
|
440
|
+
"kube",
|
|
441
|
+
"kafka",
|
|
442
|
+
"cluster",
|
|
443
|
+
"template",
|
|
444
|
+
"other",
|
|
445
|
+
"public",
|
|
446
|
+
"init",
|
|
447
|
+
"client",
|
|
448
|
+
"server",
|
|
449
|
+
"/model",
|
|
450
|
+
"/modul",
|
|
451
|
+
"browser",
|
|
452
|
+
"/env/",
|
|
453
|
+
"/app",
|
|
454
|
+
"/assets/",
|
|
455
|
+
"vendor",
|
|
456
|
+
"readme",
|
|
457
|
+
"build",
|
|
458
|
+
"/dist-packages",
|
|
356
459
|
"/record",
|
|
357
460
|
"/script",
|
|
358
|
-
"/site-packages
|
|
359
|
-
"
|
|
360
|
-
"/
|
|
361
|
-
"/
|
|
362
|
-
"/
|
|
363
|
-
"/assets/"
|
|
461
|
+
"/site-packages",
|
|
462
|
+
"python",
|
|
463
|
+
"/usr",
|
|
464
|
+
"/etc",
|
|
465
|
+
"/fuzz"
|
|
364
466
|
]
|
|
365
467
|
}
|
|
366
468
|
},
|
|
@@ -378,6 +480,7 @@
|
|
|
378
480
|
"kwargs": {
|
|
379
481
|
"extensions": [
|
|
380
482
|
"",
|
|
483
|
+
".04",
|
|
381
484
|
".1",
|
|
382
485
|
".adoc",
|
|
383
486
|
".asciidoc",
|
|
@@ -391,12 +494,12 @@
|
|
|
391
494
|
".bundle",
|
|
392
495
|
".bzl",
|
|
393
496
|
".c",
|
|
497
|
+
".cast",
|
|
394
498
|
".cc",
|
|
395
499
|
".cf",
|
|
396
500
|
".cjs",
|
|
397
501
|
".cljc",
|
|
398
502
|
".cmd",
|
|
399
|
-
".cmm",
|
|
400
503
|
".cnf",
|
|
401
504
|
".coffee",
|
|
402
505
|
".conf",
|
|
@@ -407,8 +510,8 @@
|
|
|
407
510
|
".csp",
|
|
408
511
|
".csv",
|
|
409
512
|
".dist",
|
|
410
|
-
".doc",
|
|
411
513
|
".dockerfile",
|
|
514
|
+
".edited",
|
|
412
515
|
".eex",
|
|
413
516
|
".env",
|
|
414
517
|
".erb",
|
|
@@ -424,9 +527,13 @@
|
|
|
424
527
|
".go",
|
|
425
528
|
".golden",
|
|
426
529
|
".gradle",
|
|
530
|
+
".graphql",
|
|
427
531
|
".groovy",
|
|
532
|
+
".gtpl",
|
|
428
533
|
".h",
|
|
429
534
|
".haml",
|
|
535
|
+
".har",
|
|
536
|
+
".hpp",
|
|
430
537
|
".hs",
|
|
431
538
|
".html",
|
|
432
539
|
".idl",
|
|
@@ -456,6 +563,7 @@
|
|
|
456
563
|
".lua",
|
|
457
564
|
".m",
|
|
458
565
|
".manifest",
|
|
566
|
+
".markdown",
|
|
459
567
|
".markerb",
|
|
460
568
|
".md",
|
|
461
569
|
".mdx",
|
|
@@ -471,6 +579,7 @@
|
|
|
471
579
|
".odd",
|
|
472
580
|
".onnx",
|
|
473
581
|
".oracle",
|
|
582
|
+
".original",
|
|
474
583
|
".pan",
|
|
475
584
|
".patch",
|
|
476
585
|
".php",
|
|
@@ -488,17 +597,20 @@
|
|
|
488
597
|
".purs",
|
|
489
598
|
".pxd",
|
|
490
599
|
".py",
|
|
600
|
+
".pyi",
|
|
491
601
|
".pyx",
|
|
492
602
|
".r",
|
|
493
603
|
".rake",
|
|
494
604
|
".rb",
|
|
495
605
|
".re",
|
|
496
606
|
".red",
|
|
607
|
+
".response",
|
|
497
608
|
".resx",
|
|
498
609
|
".rexx",
|
|
499
610
|
".rnh",
|
|
500
611
|
".rrc",
|
|
501
612
|
".rs",
|
|
613
|
+
".rsa",
|
|
502
614
|
".rsp",
|
|
503
615
|
".rst",
|
|
504
616
|
".rules",
|
|
@@ -512,6 +624,7 @@
|
|
|
512
624
|
".storyboard",
|
|
513
625
|
".strings",
|
|
514
626
|
".sty",
|
|
627
|
+
".swift",
|
|
515
628
|
".t",
|
|
516
629
|
".td",
|
|
517
630
|
".tdf",
|
|
@@ -530,6 +643,7 @@
|
|
|
530
643
|
".tsx",
|
|
531
644
|
".txt",
|
|
532
645
|
".var",
|
|
646
|
+
".vsmdi",
|
|
533
647
|
".vue",
|
|
534
648
|
".xaml",
|
|
535
649
|
".xib",
|
|
@@ -550,9 +664,8 @@
|
|
|
550
664
|
"CMD Password",
|
|
551
665
|
"CMD Secret",
|
|
552
666
|
"CMD Token",
|
|
553
|
-
"
|
|
667
|
+
"CURL User Password",
|
|
554
668
|
"Credential",
|
|
555
|
-
"Github Old Token",
|
|
556
669
|
"Key",
|
|
557
670
|
"Nonce",
|
|
558
671
|
"Password",
|
|
Binary file
|
|
@@ -9,8 +9,9 @@ from onnxruntime import InferenceSession
|
|
|
9
9
|
|
|
10
10
|
import credsweeper.ml_model.features as features
|
|
11
11
|
from credsweeper.common.constants import ThresholdPreset, ML_HUNK
|
|
12
|
-
from credsweeper.credentials import Candidate
|
|
13
|
-
from credsweeper.
|
|
12
|
+
from credsweeper.credentials.candidate import Candidate
|
|
13
|
+
from credsweeper.credentials.candidate_key import CandidateKey
|
|
14
|
+
from credsweeper.utils.util import Util
|
|
14
15
|
|
|
15
16
|
logger = logging.getLogger(__name__)
|
|
16
17
|
|
|
@@ -271,7 +272,7 @@ class MlValidator:
|
|
|
271
272
|
if head != tail:
|
|
272
273
|
probability[head:tail] = self._batch_call_model(line_input_list, variable_input_list, value_input_list,
|
|
273
274
|
features_list)
|
|
274
|
-
is_cred =
|
|
275
|
+
is_cred = self.threshold <= probability
|
|
275
276
|
if logger.isEnabledFor(logging.DEBUG):
|
|
276
277
|
for i, decision in enumerate(is_cred):
|
|
277
278
|
logger.debug("ML decision: %s with prediction: %s for value: %s", decision, probability[i],
|
credsweeper/rules/__init__.py
CHANGED
|
@@ -1 +0,0 @@
|
|
|
1
|
-
from credsweeper.rules.rule import Rule
|