credsweeper 1.11.4__py3-none-any.whl → 1.11.6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of credsweeper might be problematic. Click here for more details.
- credsweeper/__init__.py +21 -15
- credsweeper/__main__.py +141 -35
- credsweeper/app.py +11 -11
- credsweeper/config/__init__.py +0 -1
- credsweeper/config/config.py +1 -1
- credsweeper/credentials/__init__.py +0 -5
- credsweeper/credentials/augment_candidates.py +1 -1
- credsweeper/credentials/candidate.py +1 -1
- credsweeper/credentials/credential_manager.py +1 -1
- credsweeper/credentials/line_data.py +2 -2
- credsweeper/deep_scanner/__init__.py +0 -1
- credsweeper/deep_scanner/abstract_scanner.py +272 -17
- credsweeper/deep_scanner/byte_scanner.py +1 -1
- credsweeper/deep_scanner/bzip2_scanner.py +2 -2
- credsweeper/deep_scanner/deb_scanner.py +34 -27
- credsweeper/deep_scanner/deep_scanner.py +37 -250
- credsweeper/deep_scanner/docx_scanner.py +1 -1
- credsweeper/deep_scanner/eml_scanner.py +1 -1
- credsweeper/deep_scanner/encoder_scanner.py +1 -1
- credsweeper/deep_scanner/gzip_scanner.py +2 -2
- credsweeper/deep_scanner/html_scanner.py +1 -1
- credsweeper/deep_scanner/jclass_scanner.py +74 -0
- credsweeper/deep_scanner/jks_scanner.py +1 -1
- credsweeper/deep_scanner/lang_scanner.py +1 -1
- credsweeper/deep_scanner/lzma_scanner.py +2 -2
- credsweeper/deep_scanner/mxfile_scanner.py +1 -1
- credsweeper/deep_scanner/patch_scanner.py +48 -0
- credsweeper/deep_scanner/pdf_scanner.py +1 -1
- credsweeper/deep_scanner/pkcs_scanner.py +41 -0
- credsweeper/deep_scanner/pptx_scanner.py +1 -1
- credsweeper/deep_scanner/rpm_scanner.py +49 -0
- credsweeper/deep_scanner/sqlite3_scanner.py +79 -0
- credsweeper/deep_scanner/tar_scanner.py +2 -2
- credsweeper/deep_scanner/tmx_scanner.py +2 -2
- credsweeper/deep_scanner/xlsx_scanner.py +2 -2
- credsweeper/deep_scanner/xml_scanner.py +1 -1
- credsweeper/deep_scanner/zip_scanner.py +2 -2
- credsweeper/file_handler/__init__.py +0 -15
- credsweeper/file_handler/abstract_provider.py +3 -4
- credsweeper/file_handler/byte_content_provider.py +1 -1
- credsweeper/file_handler/content_provider.py +1 -1
- credsweeper/file_handler/data_content_provider.py +2 -3
- credsweeper/file_handler/diff_content_provider.py +133 -3
- credsweeper/file_handler/file_path_extractor.py +2 -2
- credsweeper/file_handler/files_provider.py +4 -4
- credsweeper/file_handler/patches_provider.py +10 -8
- credsweeper/file_handler/text_content_provider.py +1 -1
- credsweeper/filters/__init__.py +2 -2
- credsweeper/filters/filter.py +2 -2
- credsweeper/filters/group/__init__.py +0 -2
- credsweeper/filters/group/general_keyword.py +2 -2
- credsweeper/filters/group/general_pattern.py +2 -2
- credsweeper/filters/group/group.py +16 -5
- credsweeper/filters/group/password_keyword.py +2 -2
- credsweeper/filters/group/token_pattern.py +2 -2
- credsweeper/filters/group/url_credentials_group.py +2 -2
- credsweeper/filters/group/weird_base36_token.py +2 -2
- credsweeper/filters/group/weird_base64_token.py +2 -2
- credsweeper/filters/line_git_binary_check.py +3 -3
- credsweeper/filters/line_specific_key_check.py +4 -4
- credsweeper/filters/line_uue_part_check.py +3 -3
- credsweeper/filters/value_allowlist_check.py +4 -4
- credsweeper/filters/value_array_dictionary_check.py +3 -3
- credsweeper/filters/value_atlassian_token_check.py +4 -4
- credsweeper/filters/value_azure_token_check.py +4 -4
- credsweeper/filters/value_base32_data_check.py +4 -4
- credsweeper/filters/value_base64_data_check.py +4 -4
- credsweeper/filters/value_base64_encoded_pem_check.py +4 -4
- credsweeper/filters/value_base64_key_check.py +13 -18
- credsweeper/filters/value_base64_part_check.py +4 -4
- credsweeper/filters/value_basic_auth_check.py +36 -0
- credsweeper/filters/value_blocklist_check.py +3 -3
- credsweeper/filters/value_camel_case_check.py +4 -4
- credsweeper/filters/value_couple_keyword_check.py +3 -3
- credsweeper/filters/value_dictionary_keyword_check.py +3 -3
- credsweeper/filters/value_dictionary_value_length_check.py +3 -3
- credsweeper/filters/value_discord_bot_check.py +4 -4
- credsweeper/filters/value_entropy_base_check.py +4 -4
- credsweeper/filters/value_file_path_check.py +5 -4
- credsweeper/filters/value_github_check.py +3 -3
- credsweeper/filters/value_grafana_check.py +4 -4
- credsweeper/filters/value_grafana_service_check.py +3 -3
- credsweeper/filters/value_hex_number_check.py +3 -3
- credsweeper/filters/value_jfrog_token_check.py +4 -4
- credsweeper/filters/value_json_web_key_check.py +37 -0
- credsweeper/filters/value_json_web_token_check.py +4 -4
- credsweeper/filters/value_last_word_check.py +3 -3
- credsweeper/filters/value_method_check.py +3 -3
- credsweeper/filters/value_not_allowed_pattern_check.py +4 -4
- credsweeper/filters/value_not_part_encoded_check.py +3 -3
- credsweeper/filters/value_number_check.py +3 -3
- credsweeper/filters/value_pattern_check.py +3 -3
- credsweeper/filters/value_similarity_check.py +3 -3
- credsweeper/filters/value_split_keyword_check.py +3 -3
- credsweeper/filters/value_string_type_check.py +3 -3
- credsweeper/filters/value_token_base_check.py +3 -3
- credsweeper/filters/value_token_check.py +3 -3
- credsweeper/logger/__init__.py +0 -1
- credsweeper/logger/logger.py +1 -1
- credsweeper/ml_model/__init__.py +0 -1
- credsweeper/ml_model/features/entropy_evaluation.py +1 -1
- credsweeper/ml_model/features/feature.py +1 -1
- credsweeper/ml_model/features/file_extension.py +1 -1
- credsweeper/ml_model/features/has_html_tag.py +2 -2
- credsweeper/ml_model/features/is_secret_numeric.py +1 -1
- credsweeper/ml_model/features/length_of_attribute.py +1 -1
- credsweeper/ml_model/features/morpheme_dense.py +1 -1
- credsweeper/ml_model/features/rule_name.py +1 -1
- credsweeper/ml_model/features/search_in_attribute.py +1 -1
- credsweeper/ml_model/features/word_in.py +1 -1
- credsweeper/ml_model/features/word_in_path.py +1 -1
- credsweeper/ml_model/features/word_in_postamble.py +1 -1
- credsweeper/ml_model/features/word_in_preamble.py +1 -1
- credsweeper/ml_model/features/word_in_transition.py +1 -1
- credsweeper/ml_model/features/word_in_value.py +1 -1
- credsweeper/ml_model/features/word_in_variable.py +1 -1
- credsweeper/ml_model/ml_validator.py +3 -2
- credsweeper/rules/__init__.py +0 -1
- credsweeper/rules/config.yaml +114 -25
- credsweeper/rules/rule.py +4 -3
- credsweeper/scanner/__init__.py +0 -1
- credsweeper/scanner/scan_type/__init__.py +0 -5
- credsweeper/scanner/scan_type/multi_pattern.py +5 -6
- credsweeper/scanner/scan_type/pem_key_pattern.py +4 -4
- credsweeper/scanner/scan_type/scan_type.py +4 -4
- credsweeper/scanner/scan_type/single_pattern.py +4 -4
- credsweeper/scanner/scanner.py +8 -5
- credsweeper/secret/config.json +6 -6
- credsweeper/utils/__init__.py +0 -1
- credsweeper/utils/pem_key_detector.py +5 -5
- credsweeper/utils/util.py +143 -206
- {credsweeper-1.11.4.dist-info → credsweeper-1.11.6.dist-info}/METADATA +3 -6
- credsweeper-1.11.6.dist-info/RECORD +160 -0
- credsweeper/deep_scanner/pkcs12_scanner.py +0 -45
- credsweeper-1.11.4.dist-info/RECORD +0 -154
- {credsweeper-1.11.4.dist-info → credsweeper-1.11.6.dist-info}/WHEEL +0 -0
- {credsweeper-1.11.4.dist-info → credsweeper-1.11.6.dist-info}/entry_points.txt +0 -0
- {credsweeper-1.11.4.dist-info → credsweeper-1.11.6.dist-info}/licenses/LICENSE +0 -0
|
@@ -1,20 +1,10 @@
|
|
|
1
|
-
import contextlib
|
|
2
|
-
import datetime
|
|
3
1
|
import logging
|
|
4
|
-
from typing import List,
|
|
2
|
+
from typing import List, Any, Tuple
|
|
5
3
|
|
|
6
|
-
from credsweeper.common.constants import
|
|
7
|
-
from credsweeper.config import Config
|
|
8
|
-
from credsweeper.
|
|
9
|
-
from credsweeper.
|
|
10
|
-
from credsweeper.file_handler.byte_content_provider import ByteContentProvider
|
|
11
|
-
from credsweeper.file_handler.content_provider import ContentProvider
|
|
12
|
-
from credsweeper.file_handler.data_content_provider import DataContentProvider
|
|
13
|
-
from credsweeper.file_handler.diff_content_provider import DiffContentProvider
|
|
14
|
-
from credsweeper.file_handler.string_content_provider import StringContentProvider
|
|
15
|
-
from credsweeper.file_handler.text_content_provider import TextContentProvider
|
|
16
|
-
from credsweeper.scanner import Scanner
|
|
17
|
-
from credsweeper.utils import Util
|
|
4
|
+
from credsweeper.common.constants import MIN_DATA_LEN
|
|
5
|
+
from credsweeper.config.config import Config
|
|
6
|
+
from credsweeper.scanner.scanner import Scanner
|
|
7
|
+
from credsweeper.utils.util import Util
|
|
18
8
|
from .byte_scanner import ByteScanner
|
|
19
9
|
from .bzip2_scanner import Bzip2Scanner
|
|
20
10
|
from .deb_scanner import DebScanner
|
|
@@ -23,21 +13,23 @@ from .eml_scanner import EmlScanner
|
|
|
23
13
|
from .encoder_scanner import EncoderScanner
|
|
24
14
|
from .gzip_scanner import GzipScanner
|
|
25
15
|
from .html_scanner import HtmlScanner
|
|
16
|
+
from .jclass_scanner import JclassScanner
|
|
26
17
|
from .jks_scanner import JksScanner
|
|
27
18
|
from .lang_scanner import LangScanner
|
|
28
19
|
from .lzma_scanner import LzmaScanner
|
|
29
20
|
from .mxfile_scanner import MxfileScanner
|
|
21
|
+
from .patch_scanner import PatchScanner
|
|
30
22
|
from .pdf_scanner import PdfScanner
|
|
31
|
-
from .
|
|
23
|
+
from .pkcs_scanner import PkcsScanner
|
|
32
24
|
from .pptx_scanner import PptxScanner
|
|
25
|
+
from .rpm_scanner import RpmScanner
|
|
26
|
+
from .sqlite3_scanner import Sqlite3Scanner
|
|
33
27
|
from .tar_scanner import TarScanner
|
|
34
28
|
from .tmx_scanner import TmxScanner
|
|
35
29
|
from .xlsx_scanner import XlsxScanner
|
|
36
30
|
from .xml_scanner import XmlScanner
|
|
37
31
|
from .zip_scanner import ZipScanner
|
|
38
|
-
from ..
|
|
39
|
-
from ..file_handler.file_path_extractor import FilePathExtractor
|
|
40
|
-
from ..file_handler.struct_content_provider import StructContentProvider
|
|
32
|
+
from ..file_handler.descriptor import Descriptor
|
|
41
33
|
|
|
42
34
|
logger = logging.getLogger(__name__)
|
|
43
35
|
|
|
@@ -49,12 +41,16 @@ class DeepScanner(
|
|
|
49
41
|
EncoderScanner, #
|
|
50
42
|
GzipScanner, #
|
|
51
43
|
HtmlScanner, #
|
|
44
|
+
JclassScanner, #
|
|
52
45
|
JksScanner, #
|
|
53
46
|
LangScanner, #
|
|
54
47
|
LzmaScanner, #
|
|
48
|
+
PatchScanner, #
|
|
55
49
|
PdfScanner, #
|
|
56
|
-
|
|
50
|
+
PkcsScanner, #
|
|
57
51
|
PptxScanner, #
|
|
52
|
+
RpmScanner, #
|
|
53
|
+
Sqlite3Scanner, #
|
|
58
54
|
TarScanner, #
|
|
59
55
|
DebScanner, #
|
|
60
56
|
XmlScanner, #
|
|
@@ -82,7 +78,7 @@ class DeepScanner(
|
|
|
82
78
|
return self.__scanner
|
|
83
79
|
|
|
84
80
|
@staticmethod
|
|
85
|
-
def get_deep_scanners(data: bytes,
|
|
81
|
+
def get_deep_scanners(data: bytes, descriptor: Descriptor, depth: int) -> Tuple[List[Any], List[Any]]:
|
|
86
82
|
"""Returns possibly scan methods for the data depends on content and fallback scanners"""
|
|
87
83
|
deep_scanners: List[Any] = []
|
|
88
84
|
fallback_scanners: List[Any] = []
|
|
@@ -91,20 +87,20 @@ class DeepScanner(
|
|
|
91
87
|
deep_scanners.append(ZipScanner)
|
|
92
88
|
# probably, there might be a docx, xlsx and so on.
|
|
93
89
|
# It might be scanned with text representation in third-party libraries.
|
|
94
|
-
if
|
|
90
|
+
if descriptor.extension in (".xlsx", ".ods"):
|
|
95
91
|
deep_scanners.append(XlsxScanner)
|
|
96
92
|
else:
|
|
97
93
|
fallback_scanners.append(XlsxScanner)
|
|
98
|
-
if ".docx" ==
|
|
94
|
+
if ".docx" == descriptor.extension:
|
|
99
95
|
deep_scanners.append(DocxScanner)
|
|
100
96
|
else:
|
|
101
97
|
fallback_scanners.append(DocxScanner)
|
|
102
|
-
if ".pptx" ==
|
|
98
|
+
if ".pptx" == descriptor.extension:
|
|
103
99
|
deep_scanners.append(PptxScanner)
|
|
104
100
|
else:
|
|
105
101
|
fallback_scanners.append(PptxScanner)
|
|
106
102
|
elif Util.is_com(data):
|
|
107
|
-
if ".xls" ==
|
|
103
|
+
if ".xls" == descriptor.extension:
|
|
108
104
|
deep_scanners.append(XlsxScanner)
|
|
109
105
|
else:
|
|
110
106
|
fallback_scanners.append(XlsxScanner)
|
|
@@ -125,10 +121,18 @@ class DeepScanner(
|
|
|
125
121
|
deep_scanners.append(GzipScanner)
|
|
126
122
|
elif Util.is_pdf(data):
|
|
127
123
|
deep_scanners.append(PdfScanner)
|
|
124
|
+
elif Util.is_rpm(data):
|
|
125
|
+
if 0 < depth:
|
|
126
|
+
deep_scanners.append(RpmScanner)
|
|
127
|
+
elif Util.is_jclass(data):
|
|
128
|
+
deep_scanners.append(JclassScanner)
|
|
128
129
|
elif Util.is_jks(data):
|
|
129
130
|
deep_scanners.append(JksScanner)
|
|
131
|
+
elif Util.is_sqlite3(data):
|
|
132
|
+
if 0 < depth:
|
|
133
|
+
deep_scanners.append(Sqlite3Scanner)
|
|
130
134
|
elif Util.is_asn1(data):
|
|
131
|
-
deep_scanners.append(
|
|
135
|
+
deep_scanners.append(PkcsScanner)
|
|
132
136
|
elif Util.is_xml(data):
|
|
133
137
|
if Util.is_html(data):
|
|
134
138
|
deep_scanners.append(HtmlScanner)
|
|
@@ -146,9 +150,12 @@ class DeepScanner(
|
|
|
146
150
|
deep_scanners.append(XmlScanner)
|
|
147
151
|
fallback_scanners.append(ByteScanner)
|
|
148
152
|
elif Util.is_eml(data):
|
|
149
|
-
if ".eml" ==
|
|
153
|
+
if ".eml" == descriptor.extension:
|
|
150
154
|
deep_scanners.append(EmlScanner)
|
|
151
155
|
else:
|
|
156
|
+
if 0 < depth:
|
|
157
|
+
# formal patch looks like an eml
|
|
158
|
+
deep_scanners.append(PatchScanner)
|
|
152
159
|
fallback_scanners.append(EmlScanner)
|
|
153
160
|
fallback_scanners.append(ByteScanner)
|
|
154
161
|
elif Util.is_known(data):
|
|
@@ -156,231 +163,11 @@ class DeepScanner(
|
|
|
156
163
|
pass
|
|
157
164
|
elif not Util.is_binary(data):
|
|
158
165
|
if 0 < depth:
|
|
166
|
+
deep_scanners.append(PatchScanner)
|
|
159
167
|
deep_scanners.append(EncoderScanner)
|
|
160
168
|
deep_scanners.append(LangScanner)
|
|
161
169
|
deep_scanners.append(ByteScanner)
|
|
162
170
|
else:
|
|
163
|
-
logger.warning("Cannot apply a deep scanner for type %s prefix %s",
|
|
171
|
+
logger.warning("Cannot apply a deep scanner for type %s prefix %s %d", descriptor,
|
|
172
|
+
repr(data[:MIN_DATA_LEN]), len(data))
|
|
164
173
|
return deep_scanners, fallback_scanners
|
|
165
|
-
|
|
166
|
-
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
|
|
167
|
-
|
|
168
|
-
def deep_scan_with_fallback(self, data_provider: DataContentProvider, depth: int,
|
|
169
|
-
recursive_limit_size: int) -> List[Candidate]:
|
|
170
|
-
"""Scans with deep scanners and fallback scanners if possible
|
|
171
|
-
|
|
172
|
-
Args:
|
|
173
|
-
data_provider: DataContentProvider with raw data
|
|
174
|
-
depth: maximal level of recursion
|
|
175
|
-
recursive_limit_size: maximal bytes of opened files to prevent recursive zip-bomb attack
|
|
176
|
-
|
|
177
|
-
Returns: list with candidates
|
|
178
|
-
|
|
179
|
-
"""
|
|
180
|
-
candidates: List[Candidate] = []
|
|
181
|
-
deep_scanners, fallback_scanners = self.get_deep_scanners(data_provider.data, data_provider.file_type, depth)
|
|
182
|
-
fallback = True
|
|
183
|
-
for scan_class in deep_scanners:
|
|
184
|
-
new_candidates = scan_class.data_scan(self, data_provider, depth, recursive_limit_size)
|
|
185
|
-
if new_candidates is None:
|
|
186
|
-
# scanner did not recognise the content type
|
|
187
|
-
continue
|
|
188
|
-
augment_candidates(candidates, new_candidates)
|
|
189
|
-
# this scan is successful, so fallback is not necessary
|
|
190
|
-
fallback = False
|
|
191
|
-
if fallback:
|
|
192
|
-
for scan_class in fallback_scanners:
|
|
193
|
-
fallback_candidates = scan_class.data_scan(self, data_provider, depth, recursive_limit_size)
|
|
194
|
-
if fallback_candidates is None:
|
|
195
|
-
continue
|
|
196
|
-
augment_candidates(candidates, fallback_candidates)
|
|
197
|
-
# use only first successful fallback scanner
|
|
198
|
-
break
|
|
199
|
-
return candidates
|
|
200
|
-
|
|
201
|
-
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
|
|
202
|
-
|
|
203
|
-
def scan(self,
|
|
204
|
-
content_provider: ContentProvider,
|
|
205
|
-
depth: int,
|
|
206
|
-
recursive_limit_size: Optional[int] = None) -> List[Candidate]:
|
|
207
|
-
"""Initial scan method to launch recursive scan. Skips ByteScanner to prevent extra scan
|
|
208
|
-
|
|
209
|
-
Args:
|
|
210
|
-
content_provider: ContentProvider that might contain raw data
|
|
211
|
-
depth: maximal level of recursion
|
|
212
|
-
recursive_limit_size: maximal bytes of opened files to prevent recursive zip-bomb attack
|
|
213
|
-
"""
|
|
214
|
-
recursive_limit_size = recursive_limit_size if isinstance(recursive_limit_size,
|
|
215
|
-
int) else RECURSIVE_SCAN_LIMITATION
|
|
216
|
-
candidates: List[Candidate] = []
|
|
217
|
-
data: Optional[bytes] = None
|
|
218
|
-
if isinstance(content_provider, (TextContentProvider, ByteContentProvider)):
|
|
219
|
-
# Feature to scan files which might be containers
|
|
220
|
-
data = content_provider.data
|
|
221
|
-
info = f"FILE:{content_provider.file_path}"
|
|
222
|
-
elif isinstance(content_provider, DiffContentProvider) and content_provider.diff:
|
|
223
|
-
candidates = self.scanner.scan(content_provider)
|
|
224
|
-
# Feature to scan binary diffs
|
|
225
|
-
diff = content_provider.diff[0].get("line")
|
|
226
|
-
# the check for legal fix mypy issue
|
|
227
|
-
if isinstance(diff, bytes):
|
|
228
|
-
data = diff
|
|
229
|
-
info = f"DIFF:{content_provider.file_path}"
|
|
230
|
-
else:
|
|
231
|
-
logger.warning(f"Content provider {type(content_provider)} does not support deep scan")
|
|
232
|
-
info = "NA"
|
|
233
|
-
|
|
234
|
-
if data:
|
|
235
|
-
data_provider = DataContentProvider(data=data,
|
|
236
|
-
file_path=content_provider.file_path,
|
|
237
|
-
file_type=content_provider.file_type,
|
|
238
|
-
info=content_provider.info or info)
|
|
239
|
-
new_candidates = self.deep_scan_with_fallback(data_provider, depth, recursive_limit_size - len(data))
|
|
240
|
-
augment_candidates(candidates, new_candidates)
|
|
241
|
-
return candidates
|
|
242
|
-
|
|
243
|
-
def recursive_scan(
|
|
244
|
-
self, #
|
|
245
|
-
data_provider: DataContentProvider, #
|
|
246
|
-
depth: int = 0, #
|
|
247
|
-
recursive_limit_size: int = 0) -> List[Candidate]:
|
|
248
|
-
"""Recursive function to scan files which might be containers like ZIP archives
|
|
249
|
-
|
|
250
|
-
Args:
|
|
251
|
-
data_provider: DataContentProvider object may be a container
|
|
252
|
-
depth: maximal level of recursion
|
|
253
|
-
recursive_limit_size: maximal bytes of opened files to prevent recursive zip-bomb attack
|
|
254
|
-
"""
|
|
255
|
-
candidates: List[Candidate] = []
|
|
256
|
-
if 0 > depth:
|
|
257
|
-
# break recursion if maximal depth is reached
|
|
258
|
-
logger.debug("Bottom reached %s recursive_limit_size:%d", data_provider.file_path, recursive_limit_size)
|
|
259
|
-
return candidates
|
|
260
|
-
depth -= 1
|
|
261
|
-
if MIN_DATA_LEN > len(data_provider.data):
|
|
262
|
-
# break recursion for minimal data size
|
|
263
|
-
logger.debug("Too small data: size=%d, depth=%d, limit=%d, path=%s, info=%s", len(data_provider.data),
|
|
264
|
-
depth, recursive_limit_size, data_provider.file_path, data_provider.info)
|
|
265
|
-
return candidates
|
|
266
|
-
logger.debug("Start data_scan: size=%d, depth=%d, limit=%d, path=%s, info=%s", len(data_provider.data), depth,
|
|
267
|
-
recursive_limit_size, data_provider.file_path, data_provider.info)
|
|
268
|
-
|
|
269
|
-
if FilePathExtractor.is_find_by_ext_file(self.config, data_provider.file_type):
|
|
270
|
-
# Skip scanning file and makes fake candidate due the extension is suspicious
|
|
271
|
-
dummy_candidate = Candidate.get_dummy_candidate(self.config, data_provider.file_path,
|
|
272
|
-
data_provider.file_type, data_provider.info,
|
|
273
|
-
FilePathExtractor.FIND_BY_EXT_RULE)
|
|
274
|
-
candidates.append(dummy_candidate)
|
|
275
|
-
else:
|
|
276
|
-
new_candidates = self.deep_scan_with_fallback(data_provider, depth, recursive_limit_size)
|
|
277
|
-
augment_candidates(candidates, new_candidates)
|
|
278
|
-
|
|
279
|
-
return candidates
|
|
280
|
-
|
|
281
|
-
def structure_scan(
|
|
282
|
-
self, #
|
|
283
|
-
struct_provider: StructContentProvider, #
|
|
284
|
-
depth: int, #
|
|
285
|
-
recursive_limit_size: int) -> List[Candidate]:
|
|
286
|
-
"""Recursive function to scan structured data
|
|
287
|
-
|
|
288
|
-
Args:
|
|
289
|
-
struct_provider: DataContentProvider object may be a container
|
|
290
|
-
depth: maximal level of recursion
|
|
291
|
-
recursive_limit_size: maximal bytes of opened files to prevent recursive zip-bomb attack
|
|
292
|
-
"""
|
|
293
|
-
candidates: List[Candidate] = []
|
|
294
|
-
logger.debug("Start struct_scan: depth=%d, limit=%d, path=%s, info=%s", depth, recursive_limit_size,
|
|
295
|
-
struct_provider.file_path, struct_provider.info)
|
|
296
|
-
|
|
297
|
-
if 0 > depth:
|
|
298
|
-
# break recursion if maximal depth is reached
|
|
299
|
-
logger.debug("bottom reached %s recursive_limit_size:%d", struct_provider.file_path, recursive_limit_size)
|
|
300
|
-
return candidates
|
|
301
|
-
|
|
302
|
-
depth -= 1
|
|
303
|
-
|
|
304
|
-
items: List[Tuple[Union[int, str], Any]] = []
|
|
305
|
-
struct_key: Optional[str] = None
|
|
306
|
-
struct_value: Optional[str] = None
|
|
307
|
-
lines_for_keyword_rules = []
|
|
308
|
-
if isinstance(struct_provider.struct, dict):
|
|
309
|
-
for key, value in struct_provider.struct.items():
|
|
310
|
-
if isinstance(value, (list, tuple)) and 1 == len(value):
|
|
311
|
-
# simplify some structures like YAML when single item in new line is a value
|
|
312
|
-
items.append((key, value[0]))
|
|
313
|
-
else:
|
|
314
|
-
items.append((key, value))
|
|
315
|
-
# for transformation {"key": "api_key", "value": "XXXXXXX"} -> {"api_key": "XXXXXXX"}
|
|
316
|
-
struct_key = struct_provider.struct.get("key")
|
|
317
|
-
struct_value = struct_provider.struct.get("value")
|
|
318
|
-
elif isinstance(struct_provider.struct, (list, tuple)):
|
|
319
|
-
items = list(enumerate(struct_provider.struct))
|
|
320
|
-
else:
|
|
321
|
-
logger.error("Not supported type:%s val:%s", str(type(struct_provider.struct)), str(struct_provider.struct))
|
|
322
|
-
|
|
323
|
-
for key, value in items:
|
|
324
|
-
if isinstance(value, dict) or isinstance(value, (list, tuple)) and 1 <= len(value):
|
|
325
|
-
val_struct_provider = StructContentProvider(struct=value,
|
|
326
|
-
file_path=struct_provider.file_path,
|
|
327
|
-
file_type=struct_provider.file_type,
|
|
328
|
-
info=f"{struct_provider.info}|STRUCT:{key}")
|
|
329
|
-
new_candidates = self.structure_scan(val_struct_provider, depth, recursive_limit_size)
|
|
330
|
-
candidates.extend(new_candidates)
|
|
331
|
-
|
|
332
|
-
elif isinstance(value, bytes):
|
|
333
|
-
if MIN_DATA_LEN <= len(value):
|
|
334
|
-
bytes_struct_provider = DataContentProvider(data=value,
|
|
335
|
-
file_path=struct_provider.file_path,
|
|
336
|
-
file_type=struct_provider.file_type,
|
|
337
|
-
info=f"{struct_provider.info}|BYTES:{key}")
|
|
338
|
-
new_limit = recursive_limit_size - len(value)
|
|
339
|
-
new_candidates = self.recursive_scan(bytes_struct_provider, depth, new_limit)
|
|
340
|
-
candidates.extend(new_candidates)
|
|
341
|
-
if MIN_VALUE_LENGTH <= len(value) and isinstance(key, str) \
|
|
342
|
-
and self.scanner.keywords_required_substrings_check(key.lower()):
|
|
343
|
-
str_val = str(value)
|
|
344
|
-
lines_for_keyword_rules.append(f"{key} = '{str_val}'" if '"' in str_val else f'{key} = "{str_val}"')
|
|
345
|
-
|
|
346
|
-
elif isinstance(value, str):
|
|
347
|
-
if MIN_DATA_LEN <= len(value):
|
|
348
|
-
# recursive scan only for data which may be decoded at least
|
|
349
|
-
with contextlib.suppress(UnicodeError):
|
|
350
|
-
data = value.encode(encoding=DEFAULT_ENCODING, errors='strict')
|
|
351
|
-
str_struct_provider = DataContentProvider(data=data,
|
|
352
|
-
file_path=struct_provider.file_path,
|
|
353
|
-
file_type=struct_provider.file_type,
|
|
354
|
-
info=f"{struct_provider.info}|STRING:{key}")
|
|
355
|
-
new_limit = recursive_limit_size - len(str_struct_provider.data)
|
|
356
|
-
new_candidates = self.recursive_scan(str_struct_provider, depth, new_limit)
|
|
357
|
-
candidates.extend(new_candidates)
|
|
358
|
-
# use key = "value" scan for common cases like in TOML
|
|
359
|
-
if MIN_VALUE_LENGTH <= len(value) and isinstance(key, str) \
|
|
360
|
-
and self.scanner.keywords_required_substrings_check(key.lower()):
|
|
361
|
-
lines_for_keyword_rules.append(f"{key} = '{value}'" if '"' in value else f'{key} = "{value}"')
|
|
362
|
-
|
|
363
|
-
elif isinstance(value, (int, float, datetime.date, datetime.datetime)):
|
|
364
|
-
# skip useless types
|
|
365
|
-
pass
|
|
366
|
-
else:
|
|
367
|
-
logger.warning("Not supported type:%s value(%s)", str(type(value)), str(value))
|
|
368
|
-
|
|
369
|
-
if lines_for_keyword_rules:
|
|
370
|
-
str_provider = StringContentProvider(lines_for_keyword_rules,
|
|
371
|
-
file_path=struct_provider.file_path,
|
|
372
|
-
file_type=".py",
|
|
373
|
-
info=f"{struct_provider.info}|KEYWORD:`{lines_for_keyword_rules}`")
|
|
374
|
-
new_candidates = self.scanner.scan(str_provider)
|
|
375
|
-
augment_candidates(candidates, new_candidates)
|
|
376
|
-
|
|
377
|
-
# last check when dictionary is {"key": "api_key", "value": "XXXXXXX"} -> {"api_key": "XXXXXXX"}
|
|
378
|
-
if isinstance(struct_key, str) and isinstance(struct_value, str):
|
|
379
|
-
key_value_provider = StringContentProvider(
|
|
380
|
-
[f"{struct_key} = '{struct_value}'" if '"' in struct_value else f'{struct_key} = "{struct_value}"'],
|
|
381
|
-
file_path=struct_provider.file_path,
|
|
382
|
-
file_type=".toml",
|
|
383
|
-
info=f"{struct_provider.info}|KEY_VALUE:`{lines_for_keyword_rules}`")
|
|
384
|
-
new_candidates = self.scanner.scan(key_value_provider)
|
|
385
|
-
augment_candidates(candidates, new_candidates)
|
|
386
|
-
return candidates
|
|
@@ -11,7 +11,7 @@ from docx.table import _Cell, Table
|
|
|
11
11
|
from docx.text.paragraph import Paragraph
|
|
12
12
|
from lxml.etree import _Element
|
|
13
13
|
|
|
14
|
-
from credsweeper.credentials import Candidate
|
|
14
|
+
from credsweeper.credentials.candidate import Candidate
|
|
15
15
|
from credsweeper.deep_scanner.abstract_scanner import AbstractScanner
|
|
16
16
|
from credsweeper.file_handler.data_content_provider import DataContentProvider
|
|
17
17
|
from credsweeper.file_handler.string_content_provider import StringContentProvider
|
|
@@ -3,7 +3,7 @@ import logging
|
|
|
3
3
|
from abc import ABC
|
|
4
4
|
from typing import List, Optional
|
|
5
5
|
|
|
6
|
-
from credsweeper.credentials import Candidate
|
|
6
|
+
from credsweeper.credentials.candidate import Candidate
|
|
7
7
|
from credsweeper.deep_scanner.abstract_scanner import AbstractScanner
|
|
8
8
|
from credsweeper.file_handler.byte_content_provider import ByteContentProvider
|
|
9
9
|
from credsweeper.file_handler.data_content_provider import DataContentProvider
|
|
@@ -2,7 +2,7 @@ import logging
|
|
|
2
2
|
from abc import ABC
|
|
3
3
|
from typing import List, Optional
|
|
4
4
|
|
|
5
|
-
from credsweeper.credentials import Candidate
|
|
5
|
+
from credsweeper.credentials.candidate import Candidate
|
|
6
6
|
from credsweeper.deep_scanner.abstract_scanner import AbstractScanner
|
|
7
7
|
from credsweeper.file_handler.data_content_provider import DataContentProvider
|
|
8
8
|
|
|
@@ -5,10 +5,10 @@ from abc import ABC
|
|
|
5
5
|
from pathlib import Path
|
|
6
6
|
from typing import List, Optional
|
|
7
7
|
|
|
8
|
-
from credsweeper.credentials import Candidate
|
|
8
|
+
from credsweeper.credentials.candidate import Candidate
|
|
9
9
|
from credsweeper.deep_scanner.abstract_scanner import AbstractScanner
|
|
10
10
|
from credsweeper.file_handler.data_content_provider import DataContentProvider
|
|
11
|
-
from credsweeper.utils import Util
|
|
11
|
+
from credsweeper.utils.util import Util
|
|
12
12
|
|
|
13
13
|
logger = logging.getLogger(__name__)
|
|
14
14
|
|
|
@@ -2,7 +2,7 @@ import logging
|
|
|
2
2
|
from abc import ABC
|
|
3
3
|
from typing import List, Optional
|
|
4
4
|
|
|
5
|
-
from credsweeper.credentials import Candidate
|
|
5
|
+
from credsweeper.credentials.candidate import Candidate
|
|
6
6
|
from credsweeper.deep_scanner.abstract_scanner import AbstractScanner
|
|
7
7
|
from credsweeper.file_handler.data_content_provider import DataContentProvider
|
|
8
8
|
from credsweeper.file_handler.string_content_provider import StringContentProvider
|
|
@@ -0,0 +1,74 @@
|
|
|
1
|
+
import io
|
|
2
|
+
import logging
|
|
3
|
+
import struct
|
|
4
|
+
from abc import ABC
|
|
5
|
+
from typing import List, Optional
|
|
6
|
+
|
|
7
|
+
from credsweeper.common.constants import MIN_DATA_LEN, UTF_8
|
|
8
|
+
from credsweeper.credentials.candidate import Candidate
|
|
9
|
+
from credsweeper.deep_scanner.abstract_scanner import AbstractScanner
|
|
10
|
+
from credsweeper.file_handler.data_content_provider import DataContentProvider
|
|
11
|
+
from credsweeper.file_handler.struct_content_provider import StructContentProvider
|
|
12
|
+
|
|
13
|
+
logger = logging.getLogger(__name__)
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
class JclassScanner(AbstractScanner, ABC):
|
|
17
|
+
"""Implements java .class scanning"""
|
|
18
|
+
|
|
19
|
+
@staticmethod
|
|
20
|
+
def u2(stream: io.BytesIO) -> int:
|
|
21
|
+
"""Extracts unsigned 16 bit big-endian"""
|
|
22
|
+
return int(struct.unpack(">H", stream.read(2))[0])
|
|
23
|
+
|
|
24
|
+
@staticmethod
|
|
25
|
+
def get_utf8_constants(stream: io.BytesIO) -> List[str]:
|
|
26
|
+
"""Extracts only Utf8 constants from java ClassFile"""
|
|
27
|
+
result = []
|
|
28
|
+
item_count = JclassScanner.u2(stream)
|
|
29
|
+
while 0 < item_count:
|
|
30
|
+
# actual number of items is one less!
|
|
31
|
+
item_count -= 1
|
|
32
|
+
# uint8
|
|
33
|
+
tag = int(stream.read(1)[0])
|
|
34
|
+
if 1 == tag:
|
|
35
|
+
length = JclassScanner.u2(stream)
|
|
36
|
+
data = stream.read(int(length))
|
|
37
|
+
if MIN_DATA_LEN <= length:
|
|
38
|
+
value = data.decode(encoding=UTF_8, errors="replace")
|
|
39
|
+
result.append(value)
|
|
40
|
+
elif tag in (3, 4, 9, 10, 11, 12, 18):
|
|
41
|
+
_ = stream.read(4)
|
|
42
|
+
elif tag in (7, 8, 16):
|
|
43
|
+
_ = stream.read(2)
|
|
44
|
+
elif tag in (5, 6):
|
|
45
|
+
_ = stream.read(8)
|
|
46
|
+
elif 15 == tag:
|
|
47
|
+
_ = stream.read(3)
|
|
48
|
+
else:
|
|
49
|
+
logger.error(f"Unknown tag {tag}")
|
|
50
|
+
break
|
|
51
|
+
return result
|
|
52
|
+
|
|
53
|
+
def data_scan(
|
|
54
|
+
self, #
|
|
55
|
+
data_provider: DataContentProvider, #
|
|
56
|
+
depth: int, #
|
|
57
|
+
recursive_limit_size: int) -> Optional[List[Candidate]]:
|
|
58
|
+
"""Extracts data from binary"""
|
|
59
|
+
try:
|
|
60
|
+
stream = io.BytesIO(data_provider.data)
|
|
61
|
+
stream.read(4) # magic
|
|
62
|
+
minor = JclassScanner.u2(stream)
|
|
63
|
+
major = JclassScanner.u2(stream)
|
|
64
|
+
constants = JclassScanner.get_utf8_constants(stream)
|
|
65
|
+
struct_content_provider = StructContentProvider(struct=constants,
|
|
66
|
+
file_path=data_provider.file_path,
|
|
67
|
+
file_type=data_provider.file_type,
|
|
68
|
+
info=f"{data_provider.info}|Java.{major}.{minor}")
|
|
69
|
+
new_limit = recursive_limit_size - sum(len(x) for x in constants)
|
|
70
|
+
candidates = self.structure_scan(struct_content_provider, depth, new_limit)
|
|
71
|
+
return candidates
|
|
72
|
+
except Exception as jclass_exc:
|
|
73
|
+
logger.error(f"{data_provider.file_path}:{jclass_exc}")
|
|
74
|
+
return None
|
|
@@ -4,7 +4,7 @@ from typing import List, Optional
|
|
|
4
4
|
|
|
5
5
|
import jks
|
|
6
6
|
|
|
7
|
-
from credsweeper.credentials import Candidate
|
|
7
|
+
from credsweeper.credentials.candidate import Candidate
|
|
8
8
|
from credsweeper.deep_scanner.abstract_scanner import AbstractScanner
|
|
9
9
|
from credsweeper.file_handler.data_content_provider import DataContentProvider
|
|
10
10
|
|
|
@@ -2,7 +2,7 @@ import logging
|
|
|
2
2
|
from abc import ABC
|
|
3
3
|
from typing import List, Optional
|
|
4
4
|
|
|
5
|
-
from credsweeper.credentials import Candidate
|
|
5
|
+
from credsweeper.credentials.candidate import Candidate
|
|
6
6
|
from credsweeper.deep_scanner.abstract_scanner import AbstractScanner
|
|
7
7
|
from credsweeper.file_handler.data_content_provider import DataContentProvider
|
|
8
8
|
from credsweeper.file_handler.struct_content_provider import StructContentProvider
|
|
@@ -4,10 +4,10 @@ from abc import ABC
|
|
|
4
4
|
from pathlib import Path
|
|
5
5
|
from typing import List, Optional
|
|
6
6
|
|
|
7
|
-
from credsweeper.credentials import Candidate
|
|
7
|
+
from credsweeper.credentials.candidate import Candidate
|
|
8
8
|
from credsweeper.deep_scanner.abstract_scanner import AbstractScanner
|
|
9
9
|
from credsweeper.file_handler.data_content_provider import DataContentProvider
|
|
10
|
-
from credsweeper.utils import Util
|
|
10
|
+
from credsweeper.utils.util import Util
|
|
11
11
|
|
|
12
12
|
logger = logging.getLogger(__name__)
|
|
13
13
|
|
|
@@ -5,7 +5,7 @@ from typing import List, Optional
|
|
|
5
5
|
from bs4 import BeautifulSoup
|
|
6
6
|
from lxml import etree
|
|
7
7
|
|
|
8
|
-
from credsweeper.credentials import Candidate
|
|
8
|
+
from credsweeper.credentials.candidate import Candidate
|
|
9
9
|
from credsweeper.deep_scanner.abstract_scanner import AbstractScanner
|
|
10
10
|
from credsweeper.file_handler.data_content_provider import DataContentProvider
|
|
11
11
|
from credsweeper.file_handler.string_content_provider import StringContentProvider
|
|
@@ -0,0 +1,48 @@
|
|
|
1
|
+
import io
|
|
2
|
+
import logging
|
|
3
|
+
from abc import ABC
|
|
4
|
+
from typing import List, Optional
|
|
5
|
+
|
|
6
|
+
from credsweeper.common.constants import DiffRowType
|
|
7
|
+
from credsweeper.credentials.candidate import Candidate
|
|
8
|
+
from credsweeper.deep_scanner.abstract_scanner import AbstractScanner
|
|
9
|
+
from credsweeper.file_handler.data_content_provider import DataContentProvider
|
|
10
|
+
from credsweeper.file_handler.patches_provider import PatchesProvider
|
|
11
|
+
|
|
12
|
+
logger = logging.getLogger(__name__)
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
class PatchScanner(AbstractScanner, ABC):
|
|
16
|
+
"""Implements .patch scanning"""
|
|
17
|
+
|
|
18
|
+
def data_scan(
|
|
19
|
+
self, #
|
|
20
|
+
data_provider: DataContentProvider, #
|
|
21
|
+
depth: int, #
|
|
22
|
+
recursive_limit_size: int) -> Optional[List[Candidate]]:
|
|
23
|
+
"""Tries to scan EML with text representation"""
|
|
24
|
+
try:
|
|
25
|
+
candidates: List[Candidate] = []
|
|
26
|
+
# common limitation
|
|
27
|
+
new_limit_size = recursive_limit_size - len(data_provider.data)
|
|
28
|
+
# ADDED
|
|
29
|
+
path_added = [(data_provider.file_path, io.BytesIO(data_provider.data))]
|
|
30
|
+
added_content_provider = PatchesProvider(path_added, change_type=DiffRowType.ADDED)
|
|
31
|
+
for added_file in added_content_provider.get_scannable_files(self.config):
|
|
32
|
+
added_candidates = self.scan(added_file, depth, new_limit_size)
|
|
33
|
+
candidates.extend(added_candidates)
|
|
34
|
+
# DELETED
|
|
35
|
+
path_deleted = [(data_provider.file_path, io.BytesIO(data_provider.data))]
|
|
36
|
+
deleted_content_provider = PatchesProvider(path_deleted, change_type=DiffRowType.DELETED)
|
|
37
|
+
for deleted_file in deleted_content_provider.get_scannable_files(self.config):
|
|
38
|
+
added_candidates = self.scan(deleted_file, depth, new_limit_size)
|
|
39
|
+
candidates.extend(added_candidates)
|
|
40
|
+
# update the line data for deep scan only
|
|
41
|
+
for i in candidates:
|
|
42
|
+
for line_data in i.line_data_list:
|
|
43
|
+
line_data.path = f"{data_provider.file_path}/{line_data.path}"
|
|
44
|
+
line_data.info = f"{data_provider.info}|PATCH:{line_data.info}"
|
|
45
|
+
return candidates
|
|
46
|
+
except Exception as patch_exc:
|
|
47
|
+
logger.error(f"{data_provider.file_path}:{patch_exc}")
|
|
48
|
+
return None
|
|
@@ -6,7 +6,7 @@ from typing import List, Optional
|
|
|
6
6
|
from pdfminer.high_level import extract_pages
|
|
7
7
|
from pdfminer.layout import LAParams, LTText, LTItem
|
|
8
8
|
|
|
9
|
-
from credsweeper.credentials import Candidate
|
|
9
|
+
from credsweeper.credentials.candidate import Candidate
|
|
10
10
|
from credsweeper.deep_scanner.abstract_scanner import AbstractScanner
|
|
11
11
|
from credsweeper.file_handler.data_content_provider import DataContentProvider, MIN_DATA_LEN
|
|
12
12
|
from credsweeper.file_handler.string_content_provider import StringContentProvider
|
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
import base64
|
|
2
|
+
import logging
|
|
3
|
+
from abc import ABC
|
|
4
|
+
from typing import List, Optional
|
|
5
|
+
|
|
6
|
+
from credsweeper.credentials.candidate import Candidate
|
|
7
|
+
from credsweeper.deep_scanner.abstract_scanner import AbstractScanner
|
|
8
|
+
from credsweeper.file_handler.data_content_provider import DataContentProvider
|
|
9
|
+
from credsweeper.utils.util import Util
|
|
10
|
+
|
|
11
|
+
logger = logging.getLogger(__name__)
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
class PkcsScanner(AbstractScanner, ABC):
|
|
15
|
+
"""Implements pkcs12 scanning"""
|
|
16
|
+
|
|
17
|
+
def data_scan(
|
|
18
|
+
self, #
|
|
19
|
+
data_provider: DataContentProvider, #
|
|
20
|
+
depth: int, #
|
|
21
|
+
recursive_limit_size: int) -> Optional[List[Candidate]]:
|
|
22
|
+
"""Tries to scan PKCS12 to open with standard password"""
|
|
23
|
+
for pw_probe in self.config.bruteforce_list:
|
|
24
|
+
try:
|
|
25
|
+
password = pw_probe.encode() if pw_probe else None
|
|
26
|
+
if pkey := Util.load_pk(data_provider.data, password):
|
|
27
|
+
if not Util.check_pk(pkey):
|
|
28
|
+
logger.debug("False alarm %s", data_provider.info)
|
|
29
|
+
return []
|
|
30
|
+
candidate = Candidate.get_dummy_candidate(
|
|
31
|
+
self.config, #
|
|
32
|
+
data_provider.file_path, #
|
|
33
|
+
data_provider.file_type, #
|
|
34
|
+
f"{data_provider.info}|PKCS:{repr(password)} is the password", #
|
|
35
|
+
"PKCS")
|
|
36
|
+
candidate.line_data_list[0].line = base64.b64encode(data_provider.data).decode()
|
|
37
|
+
candidate.line_data_list[0].value = repr(password)
|
|
38
|
+
return [candidate]
|
|
39
|
+
except Exception as pkcs_exc:
|
|
40
|
+
logger.debug(f"{data_provider.file_path}:{pw_probe}:{pkcs_exc}")
|
|
41
|
+
return None
|
|
@@ -5,7 +5,7 @@ from typing import List, Optional
|
|
|
5
5
|
|
|
6
6
|
from pptx import Presentation
|
|
7
7
|
|
|
8
|
-
from credsweeper.credentials import Candidate
|
|
8
|
+
from credsweeper.credentials.candidate import Candidate
|
|
9
9
|
from credsweeper.deep_scanner.abstract_scanner import AbstractScanner
|
|
10
10
|
from credsweeper.file_handler.data_content_provider import DataContentProvider
|
|
11
11
|
from credsweeper.file_handler.string_content_provider import StringContentProvider
|