credsweeper 1.11.2__py3-none-any.whl → 1.11.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of credsweeper might be problematic. Click here for more details.
- credsweeper/__init__.py +1 -1
- credsweeper/__main__.py +7 -5
- credsweeper/app.py +28 -47
- credsweeper/common/constants.py +2 -5
- credsweeper/common/keyword_pattern.py +15 -9
- credsweeper/common/morpheme_checklist.txt +4 -2
- credsweeper/credentials/candidate_key.py +1 -1
- credsweeper/credentials/credential_manager.py +4 -3
- credsweeper/credentials/line_data.py +16 -15
- credsweeper/deep_scanner/abstract_scanner.py +10 -1
- credsweeper/deep_scanner/deb_scanner.py +48 -0
- credsweeper/deep_scanner/deep_scanner.py +65 -43
- credsweeper/deep_scanner/docx_scanner.py +1 -1
- credsweeper/deep_scanner/encoder_scanner.py +2 -2
- credsweeper/deep_scanner/gzip_scanner.py +1 -1
- credsweeper/deep_scanner/html_scanner.py +3 -3
- credsweeper/deep_scanner/jks_scanner.py +2 -4
- credsweeper/deep_scanner/lang_scanner.py +2 -2
- credsweeper/deep_scanner/lzma_scanner.py +40 -0
- credsweeper/deep_scanner/pkcs12_scanner.py +3 -5
- credsweeper/deep_scanner/xml_scanner.py +2 -2
- credsweeper/file_handler/byte_content_provider.py +2 -2
- credsweeper/file_handler/content_provider.py +1 -1
- credsweeper/file_handler/data_content_provider.py +23 -14
- credsweeper/file_handler/diff_content_provider.py +2 -2
- credsweeper/file_handler/file_path_extractor.py +1 -1
- credsweeper/file_handler/files_provider.py +2 -4
- credsweeper/file_handler/patches_provider.py +1 -1
- credsweeper/file_handler/string_content_provider.py +2 -2
- credsweeper/file_handler/struct_content_provider.py +1 -1
- credsweeper/file_handler/text_content_provider.py +2 -2
- credsweeper/filters/value_array_dictionary_check.py +3 -1
- credsweeper/filters/value_azure_token_check.py +1 -2
- credsweeper/filters/value_base64_encoded_pem_check.py +1 -1
- credsweeper/filters/value_base64_part_check.py +30 -21
- credsweeper/filters/value_discord_bot_check.py +1 -2
- credsweeper/filters/value_entropy_base32_check.py +11 -31
- credsweeper/filters/value_entropy_base36_check.py +11 -34
- credsweeper/filters/value_entropy_base64_check.py +15 -48
- credsweeper/filters/value_entropy_base_check.py +37 -0
- credsweeper/filters/value_file_path_check.py +1 -1
- credsweeper/filters/value_hex_number_check.py +3 -3
- credsweeper/filters/value_json_web_token_check.py +4 -5
- credsweeper/filters/value_pattern_check.py +64 -16
- credsweeper/filters/value_string_type_check.py +11 -3
- credsweeper/filters/value_token_base32_check.py +0 -4
- credsweeper/filters/value_token_base36_check.py +0 -4
- credsweeper/filters/value_token_base64_check.py +0 -4
- credsweeper/filters/value_token_check.py +1 -1
- credsweeper/ml_model/features/file_extension.py +2 -2
- credsweeper/ml_model/features/morpheme_dense.py +0 -4
- credsweeper/ml_model/features/rule_name.py +1 -1
- credsweeper/ml_model/features/word_in_path.py +0 -9
- credsweeper/ml_model/features/word_in_postamble.py +0 -11
- credsweeper/ml_model/features/word_in_preamble.py +0 -11
- credsweeper/ml_model/features/word_in_transition.py +0 -11
- credsweeper/ml_model/features/word_in_value.py +0 -11
- credsweeper/ml_model/features/word_in_variable.py +0 -11
- credsweeper/ml_model/ml_validator.py +45 -22
- credsweeper/rules/config.yaml +238 -208
- credsweeper/rules/rule.py +3 -3
- credsweeper/scanner/scan_type/scan_type.py +2 -3
- credsweeper/scanner/scanner.py +7 -1
- credsweeper/secret/config.json +16 -5
- credsweeper/utils/hop_stat.py +3 -3
- credsweeper/utils/pem_key_detector.py +8 -7
- credsweeper/utils/util.py +76 -146
- {credsweeper-1.11.2.dist-info → credsweeper-1.11.4.dist-info}/METADATA +1 -1
- {credsweeper-1.11.2.dist-info → credsweeper-1.11.4.dist-info}/RECORD +72 -70
- credsweeper/utils/entropy_validator.py +0 -72
- {credsweeper-1.11.2.dist-info → credsweeper-1.11.4.dist-info}/WHEEL +0 -0
- {credsweeper-1.11.2.dist-info → credsweeper-1.11.4.dist-info}/entry_points.txt +0 -0
- {credsweeper-1.11.2.dist-info → credsweeper-1.11.4.dist-info}/licenses/LICENSE +0 -0
|
@@ -1,8 +1,9 @@
|
|
|
1
|
+
import contextlib
|
|
1
2
|
import datetime
|
|
2
3
|
import logging
|
|
3
4
|
from typing import List, Optional, Any, Tuple, Union
|
|
4
5
|
|
|
5
|
-
from credsweeper.common.constants import RECURSIVE_SCAN_LIMITATION
|
|
6
|
+
from credsweeper.common.constants import RECURSIVE_SCAN_LIMITATION, MIN_DATA_LEN, MIN_VALUE_LENGTH
|
|
6
7
|
from credsweeper.config import Config
|
|
7
8
|
from credsweeper.credentials import Candidate
|
|
8
9
|
from credsweeper.credentials.augment_candidates import augment_candidates
|
|
@@ -16,6 +17,7 @@ from credsweeper.scanner import Scanner
|
|
|
16
17
|
from credsweeper.utils import Util
|
|
17
18
|
from .byte_scanner import ByteScanner
|
|
18
19
|
from .bzip2_scanner import Bzip2Scanner
|
|
20
|
+
from .deb_scanner import DebScanner
|
|
19
21
|
from .docx_scanner import DocxScanner
|
|
20
22
|
from .eml_scanner import EmlScanner
|
|
21
23
|
from .encoder_scanner import EncoderScanner
|
|
@@ -23,6 +25,7 @@ from .gzip_scanner import GzipScanner
|
|
|
23
25
|
from .html_scanner import HtmlScanner
|
|
24
26
|
from .jks_scanner import JksScanner
|
|
25
27
|
from .lang_scanner import LangScanner
|
|
28
|
+
from .lzma_scanner import LzmaScanner
|
|
26
29
|
from .mxfile_scanner import MxfileScanner
|
|
27
30
|
from .pdf_scanner import PdfScanner
|
|
28
31
|
from .pkcs12_scanner import Pkcs12Scanner
|
|
@@ -48,10 +51,12 @@ class DeepScanner(
|
|
|
48
51
|
HtmlScanner, #
|
|
49
52
|
JksScanner, #
|
|
50
53
|
LangScanner, #
|
|
54
|
+
LzmaScanner, #
|
|
51
55
|
PdfScanner, #
|
|
52
56
|
Pkcs12Scanner, #
|
|
53
57
|
PptxScanner, #
|
|
54
58
|
TarScanner, #
|
|
59
|
+
DebScanner, #
|
|
55
60
|
XmlScanner, #
|
|
56
61
|
XlsxScanner, #
|
|
57
62
|
ZipScanner
|
|
@@ -106,9 +111,15 @@ class DeepScanner(
|
|
|
106
111
|
elif Util.is_bzip2(data):
|
|
107
112
|
if 0 < depth:
|
|
108
113
|
deep_scanners.append(Bzip2Scanner)
|
|
114
|
+
elif Util.is_lzma(data):
|
|
115
|
+
if 0 < depth:
|
|
116
|
+
deep_scanners.append(LzmaScanner)
|
|
109
117
|
elif Util.is_tar(data):
|
|
110
118
|
if 0 < depth:
|
|
111
119
|
deep_scanners.append(TarScanner)
|
|
120
|
+
elif Util.is_deb(data):
|
|
121
|
+
if 0 < depth:
|
|
122
|
+
deep_scanners.append(DebScanner)
|
|
112
123
|
elif Util.is_gzip(data):
|
|
113
124
|
if 0 < depth:
|
|
114
125
|
deep_scanners.append(GzipScanner)
|
|
@@ -140,13 +151,16 @@ class DeepScanner(
|
|
|
140
151
|
else:
|
|
141
152
|
fallback_scanners.append(EmlScanner)
|
|
142
153
|
fallback_scanners.append(ByteScanner)
|
|
154
|
+
elif Util.is_known(data):
|
|
155
|
+
# the format is known but cannot be scanned
|
|
156
|
+
pass
|
|
143
157
|
elif not Util.is_binary(data):
|
|
144
158
|
if 0 < depth:
|
|
145
159
|
deep_scanners.append(EncoderScanner)
|
|
146
160
|
deep_scanners.append(LangScanner)
|
|
147
161
|
deep_scanners.append(ByteScanner)
|
|
148
162
|
else:
|
|
149
|
-
logger.warning("Cannot apply a deep scanner for type %s", file_type)
|
|
163
|
+
logger.warning("Cannot apply a deep scanner for type %s prefix %s", file_type, str(data[:MIN_DATA_LEN]))
|
|
150
164
|
return deep_scanners, fallback_scanners
|
|
151
165
|
|
|
152
166
|
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
|
|
@@ -175,7 +189,7 @@ class DeepScanner(
|
|
|
175
189
|
# this scan is successful, so fallback is not necessary
|
|
176
190
|
fallback = False
|
|
177
191
|
if fallback:
|
|
178
|
-
for scan_class in
|
|
192
|
+
for scan_class in fallback_scanners:
|
|
179
193
|
fallback_candidates = scan_class.data_scan(self, data_provider, depth, recursive_limit_size)
|
|
180
194
|
if fallback_candidates is None:
|
|
181
195
|
continue
|
|
@@ -201,10 +215,10 @@ class DeepScanner(
|
|
|
201
215
|
int) else RECURSIVE_SCAN_LIMITATION
|
|
202
216
|
candidates: List[Candidate] = []
|
|
203
217
|
data: Optional[bytes] = None
|
|
204
|
-
if isinstance(content_provider, TextContentProvider
|
|
218
|
+
if isinstance(content_provider, (TextContentProvider, ByteContentProvider)):
|
|
205
219
|
# Feature to scan files which might be containers
|
|
206
220
|
data = content_provider.data
|
|
207
|
-
info = "FILE"
|
|
221
|
+
info = f"FILE:{content_provider.file_path}"
|
|
208
222
|
elif isinstance(content_provider, DiffContentProvider) and content_provider.diff:
|
|
209
223
|
candidates = self.scanner.scan(content_provider)
|
|
210
224
|
# Feature to scan binary diffs
|
|
@@ -212,7 +226,7 @@ class DeepScanner(
|
|
|
212
226
|
# the check for legal fix mypy issue
|
|
213
227
|
if isinstance(diff, bytes):
|
|
214
228
|
data = diff
|
|
215
|
-
info = "DIFF"
|
|
229
|
+
info = f"DIFF:{content_provider.file_path}"
|
|
216
230
|
else:
|
|
217
231
|
logger.warning(f"Content provider {type(content_provider)} does not support deep scan")
|
|
218
232
|
info = "NA"
|
|
@@ -239,15 +253,18 @@ class DeepScanner(
|
|
|
239
253
|
recursive_limit_size: maximal bytes of opened files to prevent recursive zip-bomb attack
|
|
240
254
|
"""
|
|
241
255
|
candidates: List[Candidate] = []
|
|
242
|
-
logger.debug("Start data_scan: size=%d, depth=%d, limit=%d, path=%s, info=%s", len(data_provider.data), depth,
|
|
243
|
-
recursive_limit_size, data_provider.file_path, data_provider.info)
|
|
244
|
-
|
|
245
256
|
if 0 > depth:
|
|
246
257
|
# break recursion if maximal depth is reached
|
|
247
|
-
logger.debug("
|
|
258
|
+
logger.debug("Bottom reached %s recursive_limit_size:%d", data_provider.file_path, recursive_limit_size)
|
|
248
259
|
return candidates
|
|
249
|
-
|
|
250
260
|
depth -= 1
|
|
261
|
+
if MIN_DATA_LEN > len(data_provider.data):
|
|
262
|
+
# break recursion for minimal data size
|
|
263
|
+
logger.debug("Too small data: size=%d, depth=%d, limit=%d, path=%s, info=%s", len(data_provider.data),
|
|
264
|
+
depth, recursive_limit_size, data_provider.file_path, data_provider.info)
|
|
265
|
+
return candidates
|
|
266
|
+
logger.debug("Start data_scan: size=%d, depth=%d, limit=%d, path=%s, info=%s", len(data_provider.data), depth,
|
|
267
|
+
recursive_limit_size, data_provider.file_path, data_provider.info)
|
|
251
268
|
|
|
252
269
|
if FilePathExtractor.is_find_by_ext_file(self.config, data_provider.file_type):
|
|
253
270
|
# Skip scanning file and makes fake candidate due the extension is suspicious
|
|
@@ -287,7 +304,7 @@ class DeepScanner(
|
|
|
287
304
|
items: List[Tuple[Union[int, str], Any]] = []
|
|
288
305
|
struct_key: Optional[str] = None
|
|
289
306
|
struct_value: Optional[str] = None
|
|
290
|
-
|
|
307
|
+
lines_for_keyword_rules = []
|
|
291
308
|
if isinstance(struct_provider.struct, dict):
|
|
292
309
|
for key, value in struct_provider.struct.items():
|
|
293
310
|
if isinstance(value, (list, tuple)) and 1 == len(value):
|
|
@@ -298,13 +315,13 @@ class DeepScanner(
|
|
|
298
315
|
# for transformation {"key": "api_key", "value": "XXXXXXX"} -> {"api_key": "XXXXXXX"}
|
|
299
316
|
struct_key = struct_provider.struct.get("key")
|
|
300
317
|
struct_value = struct_provider.struct.get("value")
|
|
301
|
-
elif isinstance(struct_provider.struct, list
|
|
318
|
+
elif isinstance(struct_provider.struct, (list, tuple)):
|
|
302
319
|
items = list(enumerate(struct_provider.struct))
|
|
303
320
|
else:
|
|
304
321
|
logger.error("Not supported type:%s val:%s", str(type(struct_provider.struct)), str(struct_provider.struct))
|
|
305
322
|
|
|
306
323
|
for key, value in items:
|
|
307
|
-
if isinstance(value, dict) or isinstance(value, (list, tuple)) and 1
|
|
324
|
+
if isinstance(value, dict) or isinstance(value, (list, tuple)) and 1 <= len(value):
|
|
308
325
|
val_struct_provider = StructContentProvider(struct=value,
|
|
309
326
|
file_path=struct_provider.file_path,
|
|
310
327
|
file_type=struct_provider.file_type,
|
|
@@ -313,52 +330,57 @@ class DeepScanner(
|
|
|
313
330
|
candidates.extend(new_candidates)
|
|
314
331
|
|
|
315
332
|
elif isinstance(value, bytes):
|
|
316
|
-
|
|
317
|
-
|
|
318
|
-
|
|
319
|
-
|
|
320
|
-
|
|
321
|
-
|
|
322
|
-
|
|
333
|
+
if MIN_DATA_LEN <= len(value):
|
|
334
|
+
bytes_struct_provider = DataContentProvider(data=value,
|
|
335
|
+
file_path=struct_provider.file_path,
|
|
336
|
+
file_type=struct_provider.file_type,
|
|
337
|
+
info=f"{struct_provider.info}|BYTES:{key}")
|
|
338
|
+
new_limit = recursive_limit_size - len(value)
|
|
339
|
+
new_candidates = self.recursive_scan(bytes_struct_provider, depth, new_limit)
|
|
340
|
+
candidates.extend(new_candidates)
|
|
341
|
+
if MIN_VALUE_LENGTH <= len(value) and isinstance(key, str) \
|
|
342
|
+
and self.scanner.keywords_required_substrings_check(key.lower()):
|
|
343
|
+
str_val = str(value)
|
|
344
|
+
lines_for_keyword_rules.append(f"{key} = '{str_val}'" if '"' in str_val else f'{key} = "{str_val}"')
|
|
323
345
|
|
|
324
346
|
elif isinstance(value, str):
|
|
325
|
-
|
|
326
|
-
|
|
327
|
-
|
|
328
|
-
|
|
329
|
-
|
|
330
|
-
|
|
331
|
-
|
|
332
|
-
|
|
333
|
-
|
|
347
|
+
if MIN_DATA_LEN <= len(value):
|
|
348
|
+
# recursive scan only for data which may be decoded at least
|
|
349
|
+
with contextlib.suppress(UnicodeError):
|
|
350
|
+
data = value.encode(encoding=DEFAULT_ENCODING, errors='strict')
|
|
351
|
+
str_struct_provider = DataContentProvider(data=data,
|
|
352
|
+
file_path=struct_provider.file_path,
|
|
353
|
+
file_type=struct_provider.file_type,
|
|
354
|
+
info=f"{struct_provider.info}|STRING:{key}")
|
|
355
|
+
new_limit = recursive_limit_size - len(str_struct_provider.data)
|
|
356
|
+
new_candidates = self.recursive_scan(str_struct_provider, depth, new_limit)
|
|
357
|
+
candidates.extend(new_candidates)
|
|
334
358
|
# use key = "value" scan for common cases like in TOML
|
|
335
|
-
if isinstance(key, str)
|
|
336
|
-
|
|
359
|
+
if MIN_VALUE_LENGTH <= len(value) and isinstance(key, str) \
|
|
360
|
+
and self.scanner.keywords_required_substrings_check(key.lower()):
|
|
361
|
+
lines_for_keyword_rules.append(f"{key} = '{value}'" if '"' in value else f'{key} = "{value}"')
|
|
337
362
|
|
|
338
363
|
elif isinstance(value, (int, float, datetime.date, datetime.datetime)):
|
|
339
|
-
#
|
|
340
|
-
|
|
341
|
-
line_for_keyword_rules += f"{key} = \"{value}\"; "
|
|
342
|
-
|
|
364
|
+
# skip useless types
|
|
365
|
+
pass
|
|
343
366
|
else:
|
|
344
367
|
logger.warning("Not supported type:%s value(%s)", str(type(value)), str(value))
|
|
345
368
|
|
|
346
|
-
if
|
|
347
|
-
str_provider = StringContentProvider(
|
|
369
|
+
if lines_for_keyword_rules:
|
|
370
|
+
str_provider = StringContentProvider(lines_for_keyword_rules,
|
|
348
371
|
file_path=struct_provider.file_path,
|
|
349
|
-
file_type=".
|
|
350
|
-
info=f"{struct_provider.info}|KEYWORD:`{
|
|
372
|
+
file_type=".py",
|
|
373
|
+
info=f"{struct_provider.info}|KEYWORD:`{lines_for_keyword_rules}`")
|
|
351
374
|
new_candidates = self.scanner.scan(str_provider)
|
|
352
375
|
augment_candidates(candidates, new_candidates)
|
|
353
376
|
|
|
354
377
|
# last check when dictionary is {"key": "api_key", "value": "XXXXXXX"} -> {"api_key": "XXXXXXX"}
|
|
355
378
|
if isinstance(struct_key, str) and isinstance(struct_value, str):
|
|
356
|
-
line_for_keyword_rules = f"{struct_key} = \"{struct_value}\""
|
|
357
379
|
key_value_provider = StringContentProvider(
|
|
358
|
-
[
|
|
380
|
+
[f"{struct_key} = '{struct_value}'" if '"' in struct_value else f'{struct_key} = "{struct_value}"'],
|
|
359
381
|
file_path=struct_provider.file_path,
|
|
360
382
|
file_type=".toml",
|
|
361
|
-
info=f"{struct_provider.info}|KEY_VALUE:`{
|
|
383
|
+
info=f"{struct_provider.info}|KEY_VALUE:`{lines_for_keyword_rules}`")
|
|
362
384
|
new_candidates = self.scanner.scan(key_value_provider)
|
|
363
385
|
augment_candidates(candidates, new_candidates)
|
|
364
386
|
return candidates
|
|
@@ -42,7 +42,7 @@ class DocxScanner(AbstractScanner, ABC):
|
|
|
42
42
|
yield from DocxScanner._iter_block_items(block.footer)
|
|
43
43
|
return
|
|
44
44
|
elif isinstance(block, _Cell):
|
|
45
|
-
parent_elm = block._tc
|
|
45
|
+
parent_elm = block._tc # pylint: disable=W0212
|
|
46
46
|
else:
|
|
47
47
|
raise ValueError(f"unrecognised:{type(block)}")
|
|
48
48
|
|
|
@@ -18,11 +18,11 @@ class EncoderScanner(AbstractScanner, ABC):
|
|
|
18
18
|
depth: int, #
|
|
19
19
|
recursive_limit_size: int) -> Optional[List[Candidate]]:
|
|
20
20
|
"""Tries to decode data from base64 encode to bytes and scan as bytes again"""
|
|
21
|
-
if data_provider.represent_as_encoded():
|
|
21
|
+
if result := data_provider.represent_as_encoded():
|
|
22
22
|
decoded_data_provider = DataContentProvider(data=data_provider.decoded,
|
|
23
23
|
file_path=data_provider.file_path,
|
|
24
24
|
file_type=data_provider.file_type,
|
|
25
25
|
info=f"{data_provider.info}|BASE64")
|
|
26
26
|
new_limit = recursive_limit_size - len(decoded_data_provider.data)
|
|
27
27
|
return self.recursive_scan(decoded_data_provider, depth, new_limit)
|
|
28
|
-
return None
|
|
28
|
+
return None if result is None else []
|
|
@@ -31,7 +31,7 @@ class GzipScanner(AbstractScanner, ABC):
|
|
|
31
31
|
gzip_content_provider = DataContentProvider(data=f.read(),
|
|
32
32
|
file_path=new_path,
|
|
33
33
|
file_type=Util.get_extension(new_path),
|
|
34
|
-
info=f"{data_provider.info}|GZIP:{
|
|
34
|
+
info=f"{data_provider.info}|GZIP:{new_path}")
|
|
35
35
|
new_limit = recursive_limit_size - len(gzip_content_provider.data)
|
|
36
36
|
gzip_candidates = self.recursive_scan(gzip_content_provider, depth, new_limit)
|
|
37
37
|
return gzip_candidates
|
|
@@ -19,12 +19,12 @@ class HtmlScanner(AbstractScanner, ABC):
|
|
|
19
19
|
depth: int, #
|
|
20
20
|
recursive_limit_size: int) -> Optional[List[Candidate]]:
|
|
21
21
|
"""Tries to represent data as html text and scan as text lines"""
|
|
22
|
-
if data_provider.represent_as_html(depth, recursive_limit_size,
|
|
23
|
-
|
|
22
|
+
if result := data_provider.represent_as_html(depth, recursive_limit_size,
|
|
23
|
+
self.scanner.keywords_required_substrings_check):
|
|
24
24
|
string_data_provider = StringContentProvider(lines=data_provider.lines,
|
|
25
25
|
line_numbers=data_provider.line_numbers,
|
|
26
26
|
file_path=data_provider.file_path,
|
|
27
27
|
file_type=data_provider.file_type,
|
|
28
28
|
info=f"{data_provider.info}|HTML")
|
|
29
29
|
return self.scanner.scan(string_data_provider)
|
|
30
|
-
return None
|
|
30
|
+
return None if result is None else []
|
|
@@ -20,7 +20,6 @@ class JksScanner(AbstractScanner, ABC):
|
|
|
20
20
|
depth: int, #
|
|
21
21
|
recursive_limit_size: int) -> Optional[List[Candidate]]:
|
|
22
22
|
"""Tries to scan JKS to open with standard password"""
|
|
23
|
-
candidates = []
|
|
24
23
|
for pw_probe in self.config.bruteforce_list:
|
|
25
24
|
try:
|
|
26
25
|
keystore = jks.KeyStore.loads(data_provider.data, pw_probe, try_decrypt_keys=True)
|
|
@@ -38,8 +37,7 @@ class JksScanner(AbstractScanner, ABC):
|
|
|
38
37
|
candidate.line_data_list[0].value = pw_probe or "<EMPTY PASSWORD>"
|
|
39
38
|
candidate.line_data_list[0].value_start = 1
|
|
40
39
|
candidate.line_data_list[0].value_end = 1 + len(candidate.line_data_list[0].value)
|
|
41
|
-
|
|
42
|
-
break
|
|
40
|
+
return [candidate]
|
|
43
41
|
except Exception as jks_exc:
|
|
44
42
|
logger.debug(f"{data_provider.file_path}:{pw_probe}:{jks_exc}")
|
|
45
|
-
return
|
|
43
|
+
return None
|
|
@@ -19,10 +19,10 @@ class LangScanner(AbstractScanner, ABC):
|
|
|
19
19
|
depth: int, #
|
|
20
20
|
recursive_limit_size: int) -> Optional[List[Candidate]]:
|
|
21
21
|
"""Tries to represent data as markup language and scan as structure"""
|
|
22
|
-
if data_provider.represent_as_structure():
|
|
22
|
+
if result := data_provider.represent_as_structure():
|
|
23
23
|
struct_data_provider = StructContentProvider(struct=data_provider.structure,
|
|
24
24
|
file_path=data_provider.file_path,
|
|
25
25
|
file_type=data_provider.file_type,
|
|
26
26
|
info=f"{data_provider.info}|STRUCT")
|
|
27
27
|
return self.structure_scan(struct_data_provider, depth, recursive_limit_size)
|
|
28
|
-
return None
|
|
28
|
+
return None if result is None else []
|
|
@@ -0,0 +1,40 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
import lzma
|
|
3
|
+
from abc import ABC
|
|
4
|
+
from pathlib import Path
|
|
5
|
+
from typing import List, Optional
|
|
6
|
+
|
|
7
|
+
from credsweeper.credentials import Candidate
|
|
8
|
+
from credsweeper.deep_scanner.abstract_scanner import AbstractScanner
|
|
9
|
+
from credsweeper.file_handler.data_content_provider import DataContentProvider
|
|
10
|
+
from credsweeper.utils import Util
|
|
11
|
+
|
|
12
|
+
logger = logging.getLogger(__name__)
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
class LzmaScanner(AbstractScanner, ABC):
|
|
16
|
+
"""Implements lzma scanning"""
|
|
17
|
+
|
|
18
|
+
def data_scan(
|
|
19
|
+
self, #
|
|
20
|
+
data_provider: DataContentProvider, #
|
|
21
|
+
depth: int, #
|
|
22
|
+
recursive_limit_size: int) -> Optional[List[Candidate]]:
|
|
23
|
+
"""Extracts data from lzma archive and launches data_scan"""
|
|
24
|
+
try:
|
|
25
|
+
file_path = Path(data_provider.file_path)
|
|
26
|
+
new_path = file_path.as_posix()
|
|
27
|
+
if ".xz" == file_path.suffix:
|
|
28
|
+
new_path = new_path[:-3]
|
|
29
|
+
elif ".lzma" == file_path.suffix:
|
|
30
|
+
new_path = new_path[:-5]
|
|
31
|
+
lzma_content_provider = DataContentProvider(data=lzma.decompress(data_provider.data),
|
|
32
|
+
file_path=new_path,
|
|
33
|
+
file_type=Util.get_extension(new_path),
|
|
34
|
+
info=f"{data_provider.info}|LZMA:{file_path}")
|
|
35
|
+
new_limit = recursive_limit_size - len(lzma_content_provider.data)
|
|
36
|
+
lzma_candidates = self.recursive_scan(lzma_content_provider, depth, new_limit)
|
|
37
|
+
return lzma_candidates
|
|
38
|
+
except Exception as lzma_exc:
|
|
39
|
+
logger.error(f"{data_provider.file_path}:{lzma_exc}")
|
|
40
|
+
return None
|
|
@@ -20,10 +20,9 @@ class Pkcs12Scanner(AbstractScanner, ABC):
|
|
|
20
20
|
depth: int, #
|
|
21
21
|
recursive_limit_size: int) -> Optional[List[Candidate]]:
|
|
22
22
|
"""Tries to scan PKCS12 to open with standard password"""
|
|
23
|
-
candidates = []
|
|
24
23
|
for pw_probe in self.config.bruteforce_list:
|
|
25
24
|
try:
|
|
26
|
-
(private_key,
|
|
25
|
+
(private_key, _certificate, _additional_certificates) \
|
|
27
26
|
= cryptography.hazmat.primitives.serialization.pkcs12.load_key_and_certificates(data_provider.data,
|
|
28
27
|
pw_probe.encode())
|
|
29
28
|
# the password probe has passed, it will be the value
|
|
@@ -40,8 +39,7 @@ class Pkcs12Scanner(AbstractScanner, ABC):
|
|
|
40
39
|
candidate.line_data_list[0].value = value
|
|
41
40
|
candidate.line_data_list[0].value_start = 1
|
|
42
41
|
candidate.line_data_list[0].value_end = 1 + len(candidate.line_data_list[0].value)
|
|
43
|
-
|
|
44
|
-
break
|
|
42
|
+
return [candidate]
|
|
45
43
|
except Exception as pkcs_exc:
|
|
46
44
|
logger.debug(f"{data_provider.file_path}:{pw_probe}:{pkcs_exc}")
|
|
47
|
-
return
|
|
45
|
+
return None
|
|
@@ -19,11 +19,11 @@ class XmlScanner(AbstractScanner, ABC):
|
|
|
19
19
|
depth: int, #
|
|
20
20
|
recursive_limit_size: int) -> Optional[List[Candidate]]:
|
|
21
21
|
"""Tries to represent data as xml text and scan as text lines"""
|
|
22
|
-
if data_provider.represent_as_xml():
|
|
22
|
+
if result := data_provider.represent_as_xml():
|
|
23
23
|
string_data_provider = StringContentProvider(lines=data_provider.lines,
|
|
24
24
|
line_numbers=data_provider.line_numbers,
|
|
25
25
|
file_path=data_provider.file_path,
|
|
26
26
|
file_type=data_provider.file_type,
|
|
27
27
|
info=f"{data_provider.info}|XML")
|
|
28
28
|
return self.scanner.scan(string_data_provider)
|
|
29
|
-
return None
|
|
29
|
+
return None if result is None else []
|
|
@@ -32,10 +32,10 @@ class ByteContentProvider(ContentProvider):
|
|
|
32
32
|
def free(self) -> None:
|
|
33
33
|
"""free data after scan to reduce memory usage"""
|
|
34
34
|
self.__data = None
|
|
35
|
-
if
|
|
35
|
+
if "data" in self.__dict__:
|
|
36
36
|
delattr(self, "data")
|
|
37
37
|
self.__lines = None
|
|
38
|
-
if
|
|
38
|
+
if "lines" in self.__dict__:
|
|
39
39
|
delattr(self, "lines")
|
|
40
40
|
|
|
41
41
|
@cached_property
|
|
@@ -93,7 +93,7 @@ class ContentProvider(ABC):
|
|
|
93
93
|
if min_len > len(line.strip()):
|
|
94
94
|
# Ignore target if stripped part is too short for all types
|
|
95
95
|
continue
|
|
96
|
-
|
|
96
|
+
if MAX_LINE_LENGTH < len(line):
|
|
97
97
|
for chunk_start, chunk_end in Util.get_chunks(len(line)):
|
|
98
98
|
target = AnalysisTarget(
|
|
99
99
|
line_pos=line_pos, #
|
|
@@ -54,10 +54,10 @@ class DataContentProvider(ContentProvider):
|
|
|
54
54
|
def free(self) -> None:
|
|
55
55
|
"""free data after scan to reduce memory usage"""
|
|
56
56
|
self.__data = None
|
|
57
|
-
if
|
|
57
|
+
if "data" in self.__dict__:
|
|
58
58
|
delattr(self, "data")
|
|
59
59
|
self.__text = None
|
|
60
|
-
if
|
|
60
|
+
if "text" in self.__dict__:
|
|
61
61
|
delattr(self, "text")
|
|
62
62
|
self.structure = None
|
|
63
63
|
self.decoded = None
|
|
@@ -76,9 +76,14 @@ class DataContentProvider(ContentProvider):
|
|
|
76
76
|
return self.structure is not None and (isinstance(self.structure, dict) and 0 < len(self.structure.keys())
|
|
77
77
|
or isinstance(self.structure, list) and 0 < len(self.structure))
|
|
78
78
|
|
|
79
|
-
def represent_as_structure(self) -> bool:
|
|
79
|
+
def represent_as_structure(self) -> Optional[bool]:
|
|
80
80
|
"""Tries to convert data with many parsers. Stores result to internal structure
|
|
81
|
-
|
|
81
|
+
|
|
82
|
+
Return:
|
|
83
|
+
True if some structure found
|
|
84
|
+
False if no data found
|
|
85
|
+
None if the format is not acceptable
|
|
86
|
+
|
|
82
87
|
"""
|
|
83
88
|
if MIN_DATA_LEN > len(self.text):
|
|
84
89
|
return False
|
|
@@ -134,13 +139,15 @@ class DataContentProvider(ContentProvider):
|
|
|
134
139
|
if self.__is_structure():
|
|
135
140
|
return True
|
|
136
141
|
# # # None of above
|
|
137
|
-
return
|
|
142
|
+
return None
|
|
138
143
|
|
|
139
|
-
def represent_as_xml(self) -> bool:
|
|
144
|
+
def represent_as_xml(self) -> Optional[bool]:
|
|
140
145
|
"""Tries to read data as xml
|
|
141
146
|
|
|
142
147
|
Return:
|
|
143
148
|
True if reading was successful
|
|
149
|
+
False if no data found
|
|
150
|
+
None if the format is not acceptable
|
|
144
151
|
|
|
145
152
|
"""
|
|
146
153
|
if MIN_XML_LEN > len(self.text):
|
|
@@ -150,14 +157,12 @@ class DataContentProvider(ContentProvider):
|
|
|
150
157
|
xml_text = self.text.splitlines()
|
|
151
158
|
self.lines, self.line_numbers = Util.get_xml_from_lines(xml_text)
|
|
152
159
|
logger.debug("CONVERTED from xml")
|
|
160
|
+
return bool(self.lines and self.line_numbers)
|
|
153
161
|
else:
|
|
154
162
|
logger.debug("Weak data to parse as XML")
|
|
155
|
-
return False
|
|
156
163
|
except Exception as exc:
|
|
157
164
|
logger.debug("Cannot parse as XML:%s %s", exc, self.data)
|
|
158
|
-
|
|
159
|
-
return bool(self.lines and self.line_numbers)
|
|
160
|
-
return False
|
|
165
|
+
return None
|
|
161
166
|
|
|
162
167
|
def _check_multiline_cell(self, cell: Tag) -> Optional[Tuple[int, str]]:
|
|
163
168
|
"""multiline cell will be analysed as text or return single line from cell
|
|
@@ -336,11 +341,13 @@ class DataContentProvider(ContentProvider):
|
|
|
336
341
|
self, #
|
|
337
342
|
depth: int, #
|
|
338
343
|
recursive_limit_size: int, #
|
|
339
|
-
keywords_required_substrings_check: Callable[[str], bool]) -> bool:
|
|
344
|
+
keywords_required_substrings_check: Callable[[str], bool]) -> Optional[bool]:
|
|
340
345
|
"""Tries to read data as html
|
|
341
346
|
|
|
342
347
|
Return:
|
|
343
348
|
True if reading was successful
|
|
349
|
+
False if no data found
|
|
350
|
+
None if the format is not acceptable
|
|
344
351
|
|
|
345
352
|
"""
|
|
346
353
|
try:
|
|
@@ -361,13 +368,15 @@ class DataContentProvider(ContentProvider):
|
|
|
361
368
|
logger.debug("Cannot parse as HTML:%s %s", exc, self.data)
|
|
362
369
|
else:
|
|
363
370
|
return bool(self.lines and self.line_numbers)
|
|
364
|
-
return
|
|
371
|
+
return None
|
|
365
372
|
|
|
366
|
-
def represent_as_encoded(self) -> bool:
|
|
373
|
+
def represent_as_encoded(self) -> Optional[bool]:
|
|
367
374
|
"""Decodes data from base64. Stores result in decoded
|
|
368
375
|
|
|
369
376
|
Return:
|
|
370
377
|
True if the data correctly parsed and verified
|
|
378
|
+
False if no data found
|
|
379
|
+
None if the format is not acceptable
|
|
371
380
|
|
|
372
381
|
"""
|
|
373
382
|
if len(self.data) < MIN_ENCODED_DATA_LEN \
|
|
@@ -383,7 +392,7 @@ class DataContentProvider(ContentProvider):
|
|
|
383
392
|
logger.debug("Cannot decoded as base64:%s %s", exc, self.data)
|
|
384
393
|
else:
|
|
385
394
|
return self.decoded is not None and 0 < len(self.decoded)
|
|
386
|
-
return
|
|
395
|
+
return None
|
|
387
396
|
|
|
388
397
|
def yield_analysis_target(self, min_len: int) -> Generator[AnalysisTarget, None, None]:
|
|
389
398
|
"""Return nothing. The class provides only data storage.
|
|
@@ -48,8 +48,8 @@ class DiffContentProvider(ContentProvider):
|
|
|
48
48
|
|
|
49
49
|
def free(self) -> None:
|
|
50
50
|
"""free data after scan to reduce memory usage"""
|
|
51
|
-
self.__diff =
|
|
52
|
-
if
|
|
51
|
+
self.__diff = []
|
|
52
|
+
if "diff" in self.__dict__:
|
|
53
53
|
delattr(self, "diff")
|
|
54
54
|
|
|
55
55
|
@staticmethod
|
|
@@ -162,7 +162,7 @@ class FilePathExtractor:
|
|
|
162
162
|
True when the file is oversize or less than MIN_DATA_LEN, or unsupported
|
|
163
163
|
"""
|
|
164
164
|
path = reference[1] if isinstance(reference, tuple) else reference
|
|
165
|
-
if isinstance(path, str
|
|
165
|
+
if isinstance(path, (str, Path)):
|
|
166
166
|
file_size = os.path.getsize(path)
|
|
167
167
|
elif isinstance(path, io.BytesIO):
|
|
168
168
|
current_pos = path.tell()
|
|
@@ -42,7 +42,7 @@ class FilesProvider(AbstractProvider):
|
|
|
42
42
|
"""
|
|
43
43
|
text_content_provider_list: List[Union[DiffContentProvider, TextContentProvider]] = []
|
|
44
44
|
for path in self.paths:
|
|
45
|
-
if isinstance(path, str
|
|
45
|
+
if isinstance(path, (str, Path)):
|
|
46
46
|
new_files = FilePathExtractor.get_file_paths(config, path)
|
|
47
47
|
if self.skip_ignored:
|
|
48
48
|
new_files = FilePathExtractor.apply_gitignore(new_files)
|
|
@@ -50,9 +50,7 @@ class FilesProvider(AbstractProvider):
|
|
|
50
50
|
text_content_provider_list.append(TextContentProvider(_file))
|
|
51
51
|
elif isinstance(path, io.BytesIO):
|
|
52
52
|
text_content_provider_list.append(TextContentProvider((":memory:", path)))
|
|
53
|
-
elif isinstance(path, tuple)
|
|
54
|
-
and (isinstance(path[0], str) or isinstance(path[0], Path)) \
|
|
55
|
-
and isinstance(path[1], io.BytesIO):
|
|
53
|
+
elif isinstance(path, tuple) and (isinstance(path[0], (str, Path))) and isinstance(path[1], io.BytesIO):
|
|
56
54
|
# suppose, all the files must be scanned
|
|
57
55
|
text_content_provider_list.append(TextContentProvider(path))
|
|
58
56
|
else:
|
|
@@ -37,7 +37,7 @@ class PatchesProvider(AbstractProvider):
|
|
|
37
37
|
for file_path in self.paths:
|
|
38
38
|
if FilePathExtractor.check_file_size(config, file_path):
|
|
39
39
|
continue
|
|
40
|
-
if isinstance(file_path, str
|
|
40
|
+
if isinstance(file_path, (str, Path)):
|
|
41
41
|
raw_patches.append(Util.read_file(file_path))
|
|
42
42
|
elif isinstance(file_path, io.BytesIO):
|
|
43
43
|
the_patch = Util.decode_bytes(file_path.read())
|
|
@@ -38,10 +38,10 @@ class StringContentProvider(ContentProvider):
|
|
|
38
38
|
def free(self) -> None:
|
|
39
39
|
"""free data after scan to reduce memory usage"""
|
|
40
40
|
self.__lines = []
|
|
41
|
-
if
|
|
41
|
+
if "lines" in self.__dict__:
|
|
42
42
|
delattr(self, "lines")
|
|
43
43
|
self.__line_numbers = []
|
|
44
|
-
if
|
|
44
|
+
if "line_numbers" in self.__dict__:
|
|
45
45
|
delattr(self, "line_numbers")
|
|
46
46
|
|
|
47
47
|
@cached_property
|
|
@@ -38,7 +38,7 @@ class StructContentProvider(ContentProvider):
|
|
|
38
38
|
def free(self) -> None:
|
|
39
39
|
"""free data after scan to reduce memory usage"""
|
|
40
40
|
self.__struct = None
|
|
41
|
-
if
|
|
41
|
+
if "struct" in self.__dict__:
|
|
42
42
|
delattr(self, "struct")
|
|
43
43
|
|
|
44
44
|
def yield_analysis_target(self, min_len: int) -> Generator[AnalysisTarget, None, None]:
|
|
@@ -42,10 +42,10 @@ class TextContentProvider(ContentProvider):
|
|
|
42
42
|
def free(self) -> None:
|
|
43
43
|
"""free data after scan to reduce memory usage"""
|
|
44
44
|
self.__data = None
|
|
45
|
-
if
|
|
45
|
+
if "data" in self.__dict__:
|
|
46
46
|
delattr(self, "data")
|
|
47
47
|
self.__lines = None
|
|
48
|
-
if
|
|
48
|
+
if "lines" in self.__dict__:
|
|
49
49
|
delattr(self, "lines")
|
|
50
50
|
if isinstance(self.__io, io.BytesIO) and self.__io and not self.__io.closed:
|
|
51
51
|
self.__io.close()
|