credsweeper 1.11.3__py3-none-any.whl → 1.11.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of credsweeper might be problematic. Click here for more details.
- credsweeper/__init__.py +1 -1
- credsweeper/__main__.py +1 -1
- credsweeper/app.py +21 -44
- credsweeper/common/constants.py +2 -5
- credsweeper/credentials/candidate_key.py +1 -1
- credsweeper/credentials/credential_manager.py +4 -3
- credsweeper/credentials/line_data.py +2 -5
- credsweeper/deep_scanner/abstract_scanner.py +269 -14
- credsweeper/deep_scanner/deb_scanner.py +55 -0
- credsweeper/deep_scanner/deep_scanner.py +39 -241
- credsweeper/deep_scanner/gzip_scanner.py +1 -1
- credsweeper/deep_scanner/jclass_scanner.py +74 -0
- credsweeper/deep_scanner/patch_scanner.py +48 -0
- credsweeper/deep_scanner/pkcs_scanner.py +41 -0
- credsweeper/deep_scanner/rpm_scanner.py +49 -0
- credsweeper/deep_scanner/sqlite3_scanner.py +79 -0
- credsweeper/file_handler/byte_content_provider.py +2 -2
- credsweeper/file_handler/content_provider.py +1 -1
- credsweeper/file_handler/data_content_provider.py +3 -4
- credsweeper/file_handler/diff_content_provider.py +2 -2
- credsweeper/file_handler/file_path_extractor.py +1 -1
- credsweeper/file_handler/files_provider.py +2 -4
- credsweeper/file_handler/patches_provider.py +5 -2
- credsweeper/file_handler/string_content_provider.py +2 -2
- credsweeper/file_handler/struct_content_provider.py +1 -1
- credsweeper/file_handler/text_content_provider.py +2 -2
- credsweeper/filters/__init__.py +1 -0
- credsweeper/filters/value_base64_encoded_pem_check.py +1 -1
- credsweeper/filters/value_base64_key_check.py +9 -14
- credsweeper/filters/value_entropy_base64_check.py +2 -6
- credsweeper/filters/value_json_web_key_check.py +37 -0
- credsweeper/filters/value_pattern_check.py +64 -16
- credsweeper/ml_model/features/file_extension.py +1 -1
- credsweeper/ml_model/ml_validator.py +43 -21
- credsweeper/rules/config.yaml +51 -9
- credsweeper/rules/rule.py +3 -3
- credsweeper/scanner/scan_type/multi_pattern.py +1 -2
- credsweeper/secret/config.json +6 -6
- credsweeper/utils/hop_stat.py +3 -3
- credsweeper/utils/pem_key_detector.py +6 -4
- credsweeper/utils/util.py +154 -79
- {credsweeper-1.11.3.dist-info → credsweeper-1.11.5.dist-info}/METADATA +3 -6
- {credsweeper-1.11.3.dist-info → credsweeper-1.11.5.dist-info}/RECORD +46 -40
- credsweeper/deep_scanner/pkcs12_scanner.py +0 -45
- {credsweeper-1.11.3.dist-info → credsweeper-1.11.5.dist-info}/WHEEL +0 -0
- {credsweeper-1.11.3.dist-info → credsweeper-1.11.5.dist-info}/entry_points.txt +0 -0
- {credsweeper-1.11.3.dist-info → credsweeper-1.11.5.dist-info}/licenses/LICENSE +0 -0
|
@@ -1,41 +1,35 @@
|
|
|
1
|
-
import datetime
|
|
2
1
|
import logging
|
|
3
|
-
from typing import List,
|
|
2
|
+
from typing import List, Any, Tuple
|
|
4
3
|
|
|
5
|
-
from credsweeper.common.constants import
|
|
4
|
+
from credsweeper.common.constants import MIN_DATA_LEN
|
|
6
5
|
from credsweeper.config import Config
|
|
7
|
-
from credsweeper.credentials import Candidate
|
|
8
|
-
from credsweeper.credentials.augment_candidates import augment_candidates
|
|
9
|
-
from credsweeper.file_handler.byte_content_provider import ByteContentProvider
|
|
10
|
-
from credsweeper.file_handler.content_provider import ContentProvider
|
|
11
|
-
from credsweeper.file_handler.data_content_provider import DataContentProvider
|
|
12
|
-
from credsweeper.file_handler.diff_content_provider import DiffContentProvider
|
|
13
|
-
from credsweeper.file_handler.string_content_provider import StringContentProvider
|
|
14
|
-
from credsweeper.file_handler.text_content_provider import TextContentProvider
|
|
15
6
|
from credsweeper.scanner import Scanner
|
|
16
7
|
from credsweeper.utils import Util
|
|
17
8
|
from .byte_scanner import ByteScanner
|
|
18
9
|
from .bzip2_scanner import Bzip2Scanner
|
|
10
|
+
from .deb_scanner import DebScanner
|
|
19
11
|
from .docx_scanner import DocxScanner
|
|
20
12
|
from .eml_scanner import EmlScanner
|
|
21
13
|
from .encoder_scanner import EncoderScanner
|
|
22
14
|
from .gzip_scanner import GzipScanner
|
|
23
15
|
from .html_scanner import HtmlScanner
|
|
16
|
+
from .jclass_scanner import JclassScanner
|
|
24
17
|
from .jks_scanner import JksScanner
|
|
25
18
|
from .lang_scanner import LangScanner
|
|
26
19
|
from .lzma_scanner import LzmaScanner
|
|
27
20
|
from .mxfile_scanner import MxfileScanner
|
|
21
|
+
from .patch_scanner import PatchScanner
|
|
28
22
|
from .pdf_scanner import PdfScanner
|
|
29
|
-
from .
|
|
23
|
+
from .pkcs_scanner import PkcsScanner
|
|
30
24
|
from .pptx_scanner import PptxScanner
|
|
25
|
+
from .rpm_scanner import RpmScanner
|
|
26
|
+
from .sqlite3_scanner import Sqlite3Scanner
|
|
31
27
|
from .tar_scanner import TarScanner
|
|
32
28
|
from .tmx_scanner import TmxScanner
|
|
33
29
|
from .xlsx_scanner import XlsxScanner
|
|
34
30
|
from .xml_scanner import XmlScanner
|
|
35
31
|
from .zip_scanner import ZipScanner
|
|
36
|
-
from ..
|
|
37
|
-
from ..file_handler.file_path_extractor import FilePathExtractor
|
|
38
|
-
from ..file_handler.struct_content_provider import StructContentProvider
|
|
32
|
+
from ..file_handler.descriptor import Descriptor
|
|
39
33
|
|
|
40
34
|
logger = logging.getLogger(__name__)
|
|
41
35
|
|
|
@@ -47,13 +41,18 @@ class DeepScanner(
|
|
|
47
41
|
EncoderScanner, #
|
|
48
42
|
GzipScanner, #
|
|
49
43
|
HtmlScanner, #
|
|
44
|
+
JclassScanner, #
|
|
50
45
|
JksScanner, #
|
|
51
46
|
LangScanner, #
|
|
52
47
|
LzmaScanner, #
|
|
48
|
+
PatchScanner, #
|
|
53
49
|
PdfScanner, #
|
|
54
|
-
|
|
50
|
+
PkcsScanner, #
|
|
55
51
|
PptxScanner, #
|
|
52
|
+
RpmScanner, #
|
|
53
|
+
Sqlite3Scanner, #
|
|
56
54
|
TarScanner, #
|
|
55
|
+
DebScanner, #
|
|
57
56
|
XmlScanner, #
|
|
58
57
|
XlsxScanner, #
|
|
59
58
|
ZipScanner
|
|
@@ -79,7 +78,7 @@ class DeepScanner(
|
|
|
79
78
|
return self.__scanner
|
|
80
79
|
|
|
81
80
|
@staticmethod
|
|
82
|
-
def get_deep_scanners(data: bytes,
|
|
81
|
+
def get_deep_scanners(data: bytes, descriptor: Descriptor, depth: int) -> Tuple[List[Any], List[Any]]:
|
|
83
82
|
"""Returns possibly scan methods for the data depends on content and fallback scanners"""
|
|
84
83
|
deep_scanners: List[Any] = []
|
|
85
84
|
fallback_scanners: List[Any] = []
|
|
@@ -88,20 +87,20 @@ class DeepScanner(
|
|
|
88
87
|
deep_scanners.append(ZipScanner)
|
|
89
88
|
# probably, there might be a docx, xlsx and so on.
|
|
90
89
|
# It might be scanned with text representation in third-party libraries.
|
|
91
|
-
if
|
|
90
|
+
if descriptor.extension in (".xlsx", ".ods"):
|
|
92
91
|
deep_scanners.append(XlsxScanner)
|
|
93
92
|
else:
|
|
94
93
|
fallback_scanners.append(XlsxScanner)
|
|
95
|
-
if ".docx" ==
|
|
94
|
+
if ".docx" == descriptor.extension:
|
|
96
95
|
deep_scanners.append(DocxScanner)
|
|
97
96
|
else:
|
|
98
97
|
fallback_scanners.append(DocxScanner)
|
|
99
|
-
if ".pptx" ==
|
|
98
|
+
if ".pptx" == descriptor.extension:
|
|
100
99
|
deep_scanners.append(PptxScanner)
|
|
101
100
|
else:
|
|
102
101
|
fallback_scanners.append(PptxScanner)
|
|
103
102
|
elif Util.is_com(data):
|
|
104
|
-
if ".xls" ==
|
|
103
|
+
if ".xls" == descriptor.extension:
|
|
105
104
|
deep_scanners.append(XlsxScanner)
|
|
106
105
|
else:
|
|
107
106
|
fallback_scanners.append(XlsxScanner)
|
|
@@ -114,15 +113,26 @@ class DeepScanner(
|
|
|
114
113
|
elif Util.is_tar(data):
|
|
115
114
|
if 0 < depth:
|
|
116
115
|
deep_scanners.append(TarScanner)
|
|
116
|
+
elif Util.is_deb(data):
|
|
117
|
+
if 0 < depth:
|
|
118
|
+
deep_scanners.append(DebScanner)
|
|
117
119
|
elif Util.is_gzip(data):
|
|
118
120
|
if 0 < depth:
|
|
119
121
|
deep_scanners.append(GzipScanner)
|
|
120
122
|
elif Util.is_pdf(data):
|
|
121
123
|
deep_scanners.append(PdfScanner)
|
|
124
|
+
elif Util.is_rpm(data):
|
|
125
|
+
if 0 < depth:
|
|
126
|
+
deep_scanners.append(RpmScanner)
|
|
127
|
+
elif Util.is_jclass(data):
|
|
128
|
+
deep_scanners.append(JclassScanner)
|
|
122
129
|
elif Util.is_jks(data):
|
|
123
130
|
deep_scanners.append(JksScanner)
|
|
131
|
+
elif Util.is_sqlite3(data):
|
|
132
|
+
if 0 < depth:
|
|
133
|
+
deep_scanners.append(Sqlite3Scanner)
|
|
124
134
|
elif Util.is_asn1(data):
|
|
125
|
-
deep_scanners.append(
|
|
135
|
+
deep_scanners.append(PkcsScanner)
|
|
126
136
|
elif Util.is_xml(data):
|
|
127
137
|
if Util.is_html(data):
|
|
128
138
|
deep_scanners.append(HtmlScanner)
|
|
@@ -140,9 +150,12 @@ class DeepScanner(
|
|
|
140
150
|
deep_scanners.append(XmlScanner)
|
|
141
151
|
fallback_scanners.append(ByteScanner)
|
|
142
152
|
elif Util.is_eml(data):
|
|
143
|
-
if ".eml" ==
|
|
153
|
+
if ".eml" == descriptor.extension:
|
|
144
154
|
deep_scanners.append(EmlScanner)
|
|
145
155
|
else:
|
|
156
|
+
if 0 < depth:
|
|
157
|
+
# formal patch looks like an eml
|
|
158
|
+
deep_scanners.append(PatchScanner)
|
|
146
159
|
fallback_scanners.append(EmlScanner)
|
|
147
160
|
fallback_scanners.append(ByteScanner)
|
|
148
161
|
elif Util.is_known(data):
|
|
@@ -150,226 +163,11 @@ class DeepScanner(
|
|
|
150
163
|
pass
|
|
151
164
|
elif not Util.is_binary(data):
|
|
152
165
|
if 0 < depth:
|
|
166
|
+
deep_scanners.append(PatchScanner)
|
|
153
167
|
deep_scanners.append(EncoderScanner)
|
|
154
168
|
deep_scanners.append(LangScanner)
|
|
155
169
|
deep_scanners.append(ByteScanner)
|
|
156
170
|
else:
|
|
157
|
-
logger.warning("Cannot apply a deep scanner for type %s prefix %s",
|
|
171
|
+
logger.warning("Cannot apply a deep scanner for type %s prefix %s %d", descriptor,
|
|
172
|
+
repr(data[:MIN_DATA_LEN]), len(data))
|
|
158
173
|
return deep_scanners, fallback_scanners
|
|
159
|
-
|
|
160
|
-
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
|
|
161
|
-
|
|
162
|
-
def deep_scan_with_fallback(self, data_provider: DataContentProvider, depth: int,
|
|
163
|
-
recursive_limit_size: int) -> List[Candidate]:
|
|
164
|
-
"""Scans with deep scanners and fallback scanners if possible
|
|
165
|
-
|
|
166
|
-
Args:
|
|
167
|
-
data_provider: DataContentProvider with raw data
|
|
168
|
-
depth: maximal level of recursion
|
|
169
|
-
recursive_limit_size: maximal bytes of opened files to prevent recursive zip-bomb attack
|
|
170
|
-
|
|
171
|
-
Returns: list with candidates
|
|
172
|
-
|
|
173
|
-
"""
|
|
174
|
-
candidates: List[Candidate] = []
|
|
175
|
-
deep_scanners, fallback_scanners = self.get_deep_scanners(data_provider.data, data_provider.file_type, depth)
|
|
176
|
-
fallback = True
|
|
177
|
-
for scan_class in deep_scanners:
|
|
178
|
-
new_candidates = scan_class.data_scan(self, data_provider, depth, recursive_limit_size)
|
|
179
|
-
if new_candidates is None:
|
|
180
|
-
# scanner did not recognise the content type
|
|
181
|
-
continue
|
|
182
|
-
augment_candidates(candidates, new_candidates)
|
|
183
|
-
# this scan is successful, so fallback is not necessary
|
|
184
|
-
fallback = False
|
|
185
|
-
if fallback:
|
|
186
|
-
for scan_class in fallback_scanners:
|
|
187
|
-
fallback_candidates = scan_class.data_scan(self, data_provider, depth, recursive_limit_size)
|
|
188
|
-
if fallback_candidates is None:
|
|
189
|
-
continue
|
|
190
|
-
augment_candidates(candidates, fallback_candidates)
|
|
191
|
-
# use only first successful fallback scanner
|
|
192
|
-
break
|
|
193
|
-
return candidates
|
|
194
|
-
|
|
195
|
-
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
|
|
196
|
-
|
|
197
|
-
def scan(self,
|
|
198
|
-
content_provider: ContentProvider,
|
|
199
|
-
depth: int,
|
|
200
|
-
recursive_limit_size: Optional[int] = None) -> List[Candidate]:
|
|
201
|
-
"""Initial scan method to launch recursive scan. Skips ByteScanner to prevent extra scan
|
|
202
|
-
|
|
203
|
-
Args:
|
|
204
|
-
content_provider: ContentProvider that might contain raw data
|
|
205
|
-
depth: maximal level of recursion
|
|
206
|
-
recursive_limit_size: maximal bytes of opened files to prevent recursive zip-bomb attack
|
|
207
|
-
"""
|
|
208
|
-
recursive_limit_size = recursive_limit_size if isinstance(recursive_limit_size,
|
|
209
|
-
int) else RECURSIVE_SCAN_LIMITATION
|
|
210
|
-
candidates: List[Candidate] = []
|
|
211
|
-
data: Optional[bytes] = None
|
|
212
|
-
if isinstance(content_provider, TextContentProvider) or isinstance(content_provider, ByteContentProvider):
|
|
213
|
-
# Feature to scan files which might be containers
|
|
214
|
-
data = content_provider.data
|
|
215
|
-
info = "FILE"
|
|
216
|
-
elif isinstance(content_provider, DiffContentProvider) and content_provider.diff:
|
|
217
|
-
candidates = self.scanner.scan(content_provider)
|
|
218
|
-
# Feature to scan binary diffs
|
|
219
|
-
diff = content_provider.diff[0].get("line")
|
|
220
|
-
# the check for legal fix mypy issue
|
|
221
|
-
if isinstance(diff, bytes):
|
|
222
|
-
data = diff
|
|
223
|
-
info = "DIFF"
|
|
224
|
-
else:
|
|
225
|
-
logger.warning(f"Content provider {type(content_provider)} does not support deep scan")
|
|
226
|
-
info = "NA"
|
|
227
|
-
|
|
228
|
-
if data:
|
|
229
|
-
data_provider = DataContentProvider(data=data,
|
|
230
|
-
file_path=content_provider.file_path,
|
|
231
|
-
file_type=content_provider.file_type,
|
|
232
|
-
info=content_provider.info or info)
|
|
233
|
-
new_candidates = self.deep_scan_with_fallback(data_provider, depth, recursive_limit_size - len(data))
|
|
234
|
-
augment_candidates(candidates, new_candidates)
|
|
235
|
-
return candidates
|
|
236
|
-
|
|
237
|
-
def recursive_scan(
|
|
238
|
-
self, #
|
|
239
|
-
data_provider: DataContentProvider, #
|
|
240
|
-
depth: int = 0, #
|
|
241
|
-
recursive_limit_size: int = 0) -> List[Candidate]:
|
|
242
|
-
"""Recursive function to scan files which might be containers like ZIP archives
|
|
243
|
-
|
|
244
|
-
Args:
|
|
245
|
-
data_provider: DataContentProvider object may be a container
|
|
246
|
-
depth: maximal level of recursion
|
|
247
|
-
recursive_limit_size: maximal bytes of opened files to prevent recursive zip-bomb attack
|
|
248
|
-
"""
|
|
249
|
-
candidates: List[Candidate] = []
|
|
250
|
-
if 0 > depth:
|
|
251
|
-
# break recursion if maximal depth is reached
|
|
252
|
-
logger.debug("Bottom reached %s recursive_limit_size:%d", data_provider.file_path, recursive_limit_size)
|
|
253
|
-
return candidates
|
|
254
|
-
depth -= 1
|
|
255
|
-
if MIN_DATA_LEN > len(data_provider.data):
|
|
256
|
-
# break recursion for minimal data size
|
|
257
|
-
logger.debug("Too small data: size=%d, depth=%d, limit=%d, path=%s, info=%s", len(data_provider.data),
|
|
258
|
-
depth, recursive_limit_size, data_provider.file_path, data_provider.info)
|
|
259
|
-
return candidates
|
|
260
|
-
logger.debug("Start data_scan: size=%d, depth=%d, limit=%d, path=%s, info=%s", len(data_provider.data), depth,
|
|
261
|
-
recursive_limit_size, data_provider.file_path, data_provider.info)
|
|
262
|
-
|
|
263
|
-
if FilePathExtractor.is_find_by_ext_file(self.config, data_provider.file_type):
|
|
264
|
-
# Skip scanning file and makes fake candidate due the extension is suspicious
|
|
265
|
-
dummy_candidate = Candidate.get_dummy_candidate(self.config, data_provider.file_path,
|
|
266
|
-
data_provider.file_type, data_provider.info,
|
|
267
|
-
FilePathExtractor.FIND_BY_EXT_RULE)
|
|
268
|
-
candidates.append(dummy_candidate)
|
|
269
|
-
else:
|
|
270
|
-
new_candidates = self.deep_scan_with_fallback(data_provider, depth, recursive_limit_size)
|
|
271
|
-
augment_candidates(candidates, new_candidates)
|
|
272
|
-
|
|
273
|
-
return candidates
|
|
274
|
-
|
|
275
|
-
def structure_scan(
|
|
276
|
-
self, #
|
|
277
|
-
struct_provider: StructContentProvider, #
|
|
278
|
-
depth: int, #
|
|
279
|
-
recursive_limit_size: int) -> List[Candidate]:
|
|
280
|
-
"""Recursive function to scan structured data
|
|
281
|
-
|
|
282
|
-
Args:
|
|
283
|
-
struct_provider: DataContentProvider object may be a container
|
|
284
|
-
depth: maximal level of recursion
|
|
285
|
-
recursive_limit_size: maximal bytes of opened files to prevent recursive zip-bomb attack
|
|
286
|
-
"""
|
|
287
|
-
candidates: List[Candidate] = []
|
|
288
|
-
logger.debug("Start struct_scan: depth=%d, limit=%d, path=%s, info=%s", depth, recursive_limit_size,
|
|
289
|
-
struct_provider.file_path, struct_provider.info)
|
|
290
|
-
|
|
291
|
-
if 0 > depth:
|
|
292
|
-
# break recursion if maximal depth is reached
|
|
293
|
-
logger.debug("bottom reached %s recursive_limit_size:%d", struct_provider.file_path, recursive_limit_size)
|
|
294
|
-
return candidates
|
|
295
|
-
|
|
296
|
-
depth -= 1
|
|
297
|
-
|
|
298
|
-
items: List[Tuple[Union[int, str], Any]] = []
|
|
299
|
-
struct_key: Optional[str] = None
|
|
300
|
-
struct_value: Optional[str] = None
|
|
301
|
-
line_for_keyword_rules = ""
|
|
302
|
-
if isinstance(struct_provider.struct, dict):
|
|
303
|
-
for key, value in struct_provider.struct.items():
|
|
304
|
-
if isinstance(value, (list, tuple)) and 1 == len(value):
|
|
305
|
-
# simplify some structures like YAML when single item in new line is a value
|
|
306
|
-
items.append((key, value[0]))
|
|
307
|
-
else:
|
|
308
|
-
items.append((key, value))
|
|
309
|
-
# for transformation {"key": "api_key", "value": "XXXXXXX"} -> {"api_key": "XXXXXXX"}
|
|
310
|
-
struct_key = struct_provider.struct.get("key")
|
|
311
|
-
struct_value = struct_provider.struct.get("value")
|
|
312
|
-
elif isinstance(struct_provider.struct, list) or isinstance(struct_provider.struct, tuple):
|
|
313
|
-
items = list(enumerate(struct_provider.struct))
|
|
314
|
-
else:
|
|
315
|
-
logger.error("Not supported type:%s val:%s", str(type(struct_provider.struct)), str(struct_provider.struct))
|
|
316
|
-
|
|
317
|
-
for key, value in items:
|
|
318
|
-
if isinstance(value, dict) or isinstance(value, (list, tuple)) and 1 < len(value):
|
|
319
|
-
val_struct_provider = StructContentProvider(struct=value,
|
|
320
|
-
file_path=struct_provider.file_path,
|
|
321
|
-
file_type=struct_provider.file_type,
|
|
322
|
-
info=f"{struct_provider.info}|STRUCT:{key}")
|
|
323
|
-
new_candidates = self.structure_scan(val_struct_provider, depth, recursive_limit_size)
|
|
324
|
-
candidates.extend(new_candidates)
|
|
325
|
-
|
|
326
|
-
elif isinstance(value, bytes):
|
|
327
|
-
bytes_struct_provider = DataContentProvider(data=value,
|
|
328
|
-
file_path=struct_provider.file_path,
|
|
329
|
-
file_type=struct_provider.file_type,
|
|
330
|
-
info=f"{struct_provider.info}|BYTES:{key}")
|
|
331
|
-
new_limit = recursive_limit_size - len(value)
|
|
332
|
-
new_candidates = self.recursive_scan(bytes_struct_provider, depth, new_limit)
|
|
333
|
-
candidates.extend(new_candidates)
|
|
334
|
-
|
|
335
|
-
elif isinstance(value, str):
|
|
336
|
-
data = value.encode(encoding=DEFAULT_ENCODING, errors='replace')
|
|
337
|
-
str_struct_provider = DataContentProvider(data=data,
|
|
338
|
-
file_path=struct_provider.file_path,
|
|
339
|
-
file_type=struct_provider.file_type,
|
|
340
|
-
info=f"{struct_provider.info}|STRING:{key}")
|
|
341
|
-
new_limit = recursive_limit_size - len(str_struct_provider.data)
|
|
342
|
-
new_candidates = self.recursive_scan(str_struct_provider, depth, new_limit)
|
|
343
|
-
candidates.extend(new_candidates)
|
|
344
|
-
|
|
345
|
-
# use key = "value" scan for common cases like in TOML
|
|
346
|
-
if isinstance(key, str) and self.scanner.keywords_required_substrings_check(key):
|
|
347
|
-
line_for_keyword_rules += f"{key} = \"{value}\"; "
|
|
348
|
-
|
|
349
|
-
elif isinstance(value, (int, float, datetime.date, datetime.datetime)):
|
|
350
|
-
# use the fields only in case of matched keywords
|
|
351
|
-
if isinstance(key, str) and self.scanner.keywords_required_substrings_check(key):
|
|
352
|
-
line_for_keyword_rules += f"{key} = \"{value}\"; "
|
|
353
|
-
|
|
354
|
-
else:
|
|
355
|
-
logger.warning("Not supported type:%s value(%s)", str(type(value)), str(value))
|
|
356
|
-
|
|
357
|
-
if line_for_keyword_rules:
|
|
358
|
-
str_provider = StringContentProvider([line_for_keyword_rules],
|
|
359
|
-
file_path=struct_provider.file_path,
|
|
360
|
-
file_type=".toml",
|
|
361
|
-
info=f"{struct_provider.info}|KEYWORD:`{line_for_keyword_rules}`")
|
|
362
|
-
new_candidates = self.scanner.scan(str_provider)
|
|
363
|
-
augment_candidates(candidates, new_candidates)
|
|
364
|
-
|
|
365
|
-
# last check when dictionary is {"key": "api_key", "value": "XXXXXXX"} -> {"api_key": "XXXXXXX"}
|
|
366
|
-
if isinstance(struct_key, str) and isinstance(struct_value, str):
|
|
367
|
-
line_for_keyword_rules = f"{struct_key} = \"{struct_value}\""
|
|
368
|
-
key_value_provider = StringContentProvider(
|
|
369
|
-
[line_for_keyword_rules],
|
|
370
|
-
file_path=struct_provider.file_path,
|
|
371
|
-
file_type=".toml",
|
|
372
|
-
info=f"{struct_provider.info}|KEY_VALUE:`{line_for_keyword_rules}`")
|
|
373
|
-
new_candidates = self.scanner.scan(key_value_provider)
|
|
374
|
-
augment_candidates(candidates, new_candidates)
|
|
375
|
-
return candidates
|
|
@@ -31,7 +31,7 @@ class GzipScanner(AbstractScanner, ABC):
|
|
|
31
31
|
gzip_content_provider = DataContentProvider(data=f.read(),
|
|
32
32
|
file_path=new_path,
|
|
33
33
|
file_type=Util.get_extension(new_path),
|
|
34
|
-
info=f"{data_provider.info}|GZIP:{
|
|
34
|
+
info=f"{data_provider.info}|GZIP:{new_path}")
|
|
35
35
|
new_limit = recursive_limit_size - len(gzip_content_provider.data)
|
|
36
36
|
gzip_candidates = self.recursive_scan(gzip_content_provider, depth, new_limit)
|
|
37
37
|
return gzip_candidates
|
|
@@ -0,0 +1,74 @@
|
|
|
1
|
+
import io
|
|
2
|
+
import logging
|
|
3
|
+
import struct
|
|
4
|
+
from abc import ABC
|
|
5
|
+
from typing import List, Optional
|
|
6
|
+
|
|
7
|
+
from credsweeper.common.constants import MIN_DATA_LEN, UTF_8
|
|
8
|
+
from credsweeper.credentials import Candidate
|
|
9
|
+
from credsweeper.deep_scanner.abstract_scanner import AbstractScanner
|
|
10
|
+
from credsweeper.file_handler.data_content_provider import DataContentProvider
|
|
11
|
+
from credsweeper.file_handler.struct_content_provider import StructContentProvider
|
|
12
|
+
|
|
13
|
+
logger = logging.getLogger(__name__)
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
class JclassScanner(AbstractScanner, ABC):
|
|
17
|
+
"""Implements java .class scanning"""
|
|
18
|
+
|
|
19
|
+
@staticmethod
|
|
20
|
+
def u2(stream: io.BytesIO) -> int:
|
|
21
|
+
"""Extracts unsigned 16 bit big-endian"""
|
|
22
|
+
return int(struct.unpack(">H", stream.read(2))[0])
|
|
23
|
+
|
|
24
|
+
@staticmethod
|
|
25
|
+
def get_utf8_constants(stream: io.BytesIO) -> List[str]:
|
|
26
|
+
"""Extracts only Utf8 constants from java ClassFile"""
|
|
27
|
+
result = []
|
|
28
|
+
item_count = JclassScanner.u2(stream)
|
|
29
|
+
while 0 < item_count:
|
|
30
|
+
# actual number of items is one less!
|
|
31
|
+
item_count -= 1
|
|
32
|
+
# uint8
|
|
33
|
+
tag = int(stream.read(1)[0])
|
|
34
|
+
if 1 == tag:
|
|
35
|
+
length = JclassScanner.u2(stream)
|
|
36
|
+
data = stream.read(int(length))
|
|
37
|
+
if MIN_DATA_LEN <= length:
|
|
38
|
+
value = data.decode(encoding=UTF_8, errors="replace")
|
|
39
|
+
result.append(value)
|
|
40
|
+
elif tag in (3, 4, 9, 10, 11, 12, 18):
|
|
41
|
+
_ = stream.read(4)
|
|
42
|
+
elif tag in (7, 8, 16):
|
|
43
|
+
_ = stream.read(2)
|
|
44
|
+
elif tag in (5, 6):
|
|
45
|
+
_ = stream.read(8)
|
|
46
|
+
elif 15 == tag:
|
|
47
|
+
_ = stream.read(3)
|
|
48
|
+
else:
|
|
49
|
+
logger.error(f"Unknown tag {tag}")
|
|
50
|
+
break
|
|
51
|
+
return result
|
|
52
|
+
|
|
53
|
+
def data_scan(
|
|
54
|
+
self, #
|
|
55
|
+
data_provider: DataContentProvider, #
|
|
56
|
+
depth: int, #
|
|
57
|
+
recursive_limit_size: int) -> Optional[List[Candidate]]:
|
|
58
|
+
"""Extracts data from binary"""
|
|
59
|
+
try:
|
|
60
|
+
stream = io.BytesIO(data_provider.data)
|
|
61
|
+
stream.read(4) # magic
|
|
62
|
+
minor = JclassScanner.u2(stream)
|
|
63
|
+
major = JclassScanner.u2(stream)
|
|
64
|
+
constants = JclassScanner.get_utf8_constants(stream)
|
|
65
|
+
struct_content_provider = StructContentProvider(struct=constants,
|
|
66
|
+
file_path=data_provider.file_path,
|
|
67
|
+
file_type=data_provider.file_type,
|
|
68
|
+
info=f"{data_provider.info}|Java.{major}.{minor}")
|
|
69
|
+
new_limit = recursive_limit_size - sum(len(x) for x in constants)
|
|
70
|
+
candidates = self.structure_scan(struct_content_provider, depth, new_limit)
|
|
71
|
+
return candidates
|
|
72
|
+
except Exception as jclass_exc:
|
|
73
|
+
logger.error(f"{data_provider.file_path}:{jclass_exc}")
|
|
74
|
+
return None
|
|
@@ -0,0 +1,48 @@
|
|
|
1
|
+
import io
|
|
2
|
+
import logging
|
|
3
|
+
from abc import ABC
|
|
4
|
+
from typing import List, Optional
|
|
5
|
+
|
|
6
|
+
from credsweeper.common.constants import DiffRowType
|
|
7
|
+
from credsweeper.credentials.candidate import Candidate
|
|
8
|
+
from credsweeper.deep_scanner.abstract_scanner import AbstractScanner
|
|
9
|
+
from credsweeper.file_handler.data_content_provider import DataContentProvider
|
|
10
|
+
from credsweeper.file_handler.patches_provider import PatchesProvider
|
|
11
|
+
|
|
12
|
+
logger = logging.getLogger(__name__)
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
class PatchScanner(AbstractScanner, ABC):
|
|
16
|
+
"""Implements .patch scanning"""
|
|
17
|
+
|
|
18
|
+
def data_scan(
|
|
19
|
+
self, #
|
|
20
|
+
data_provider: DataContentProvider, #
|
|
21
|
+
depth: int, #
|
|
22
|
+
recursive_limit_size: int) -> Optional[List[Candidate]]:
|
|
23
|
+
"""Tries to scan EML with text representation"""
|
|
24
|
+
try:
|
|
25
|
+
candidates: List[Candidate] = []
|
|
26
|
+
# common limitation
|
|
27
|
+
new_limit_size = recursive_limit_size - len(data_provider.data)
|
|
28
|
+
# ADDED
|
|
29
|
+
path_added = [(data_provider.file_path, io.BytesIO(data_provider.data))]
|
|
30
|
+
added_content_provider = PatchesProvider(path_added, change_type=DiffRowType.ADDED)
|
|
31
|
+
for added_file in added_content_provider.get_scannable_files(self.config):
|
|
32
|
+
added_candidates = self.scan(added_file, depth, new_limit_size)
|
|
33
|
+
candidates.extend(added_candidates)
|
|
34
|
+
# DELETED
|
|
35
|
+
path_deleted = [(data_provider.file_path, io.BytesIO(data_provider.data))]
|
|
36
|
+
deleted_content_provider = PatchesProvider(path_deleted, change_type=DiffRowType.DELETED)
|
|
37
|
+
for deleted_file in deleted_content_provider.get_scannable_files(self.config):
|
|
38
|
+
added_candidates = self.scan(deleted_file, depth, new_limit_size)
|
|
39
|
+
candidates.extend(added_candidates)
|
|
40
|
+
# update the line data for deep scan only
|
|
41
|
+
for i in candidates:
|
|
42
|
+
for line_data in i.line_data_list:
|
|
43
|
+
line_data.path = f"{data_provider.file_path}/{line_data.path}"
|
|
44
|
+
line_data.info = f"{data_provider.info}|PATCH:{line_data.info}"
|
|
45
|
+
return candidates
|
|
46
|
+
except Exception as patch_exc:
|
|
47
|
+
logger.error(f"{data_provider.file_path}:{patch_exc}")
|
|
48
|
+
return None
|
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
import base64
|
|
2
|
+
import logging
|
|
3
|
+
from abc import ABC
|
|
4
|
+
from typing import List, Optional
|
|
5
|
+
|
|
6
|
+
from credsweeper.credentials import Candidate
|
|
7
|
+
from credsweeper.deep_scanner.abstract_scanner import AbstractScanner
|
|
8
|
+
from credsweeper.file_handler.data_content_provider import DataContentProvider
|
|
9
|
+
from credsweeper.utils import Util
|
|
10
|
+
|
|
11
|
+
logger = logging.getLogger(__name__)
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
class PkcsScanner(AbstractScanner, ABC):
|
|
15
|
+
"""Implements pkcs12 scanning"""
|
|
16
|
+
|
|
17
|
+
def data_scan(
|
|
18
|
+
self, #
|
|
19
|
+
data_provider: DataContentProvider, #
|
|
20
|
+
depth: int, #
|
|
21
|
+
recursive_limit_size: int) -> Optional[List[Candidate]]:
|
|
22
|
+
"""Tries to scan PKCS12 to open with standard password"""
|
|
23
|
+
for pw_probe in self.config.bruteforce_list:
|
|
24
|
+
try:
|
|
25
|
+
password = pw_probe.encode() if pw_probe else None
|
|
26
|
+
if pkey := Util.load_pk(data_provider.data, password):
|
|
27
|
+
if not Util.check_pk(pkey):
|
|
28
|
+
logger.debug("False alarm %s", data_provider.info)
|
|
29
|
+
return []
|
|
30
|
+
candidate = Candidate.get_dummy_candidate(
|
|
31
|
+
self.config, #
|
|
32
|
+
data_provider.file_path, #
|
|
33
|
+
data_provider.file_type, #
|
|
34
|
+
f"{data_provider.info}|PKCS:{repr(password)} is the password", #
|
|
35
|
+
"PKCS")
|
|
36
|
+
candidate.line_data_list[0].line = base64.b64encode(data_provider.data).decode()
|
|
37
|
+
candidate.line_data_list[0].value = repr(password)
|
|
38
|
+
return [candidate]
|
|
39
|
+
except Exception as pkcs_exc:
|
|
40
|
+
logger.debug(f"{data_provider.file_path}:{pw_probe}:{pkcs_exc}")
|
|
41
|
+
return None
|
|
@@ -0,0 +1,49 @@
|
|
|
1
|
+
import io
|
|
2
|
+
import logging
|
|
3
|
+
from abc import ABC
|
|
4
|
+
from typing import List, Optional
|
|
5
|
+
|
|
6
|
+
import rpmfile
|
|
7
|
+
|
|
8
|
+
from credsweeper.credentials.candidate import Candidate
|
|
9
|
+
from credsweeper.deep_scanner.abstract_scanner import AbstractScanner
|
|
10
|
+
from credsweeper.file_handler.data_content_provider import DataContentProvider
|
|
11
|
+
from credsweeper.file_handler.file_path_extractor import FilePathExtractor
|
|
12
|
+
from credsweeper.utils import Util
|
|
13
|
+
|
|
14
|
+
logger = logging.getLogger(__name__)
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
class RpmScanner(AbstractScanner, ABC):
|
|
18
|
+
"""Implements rpm scanning"""
|
|
19
|
+
|
|
20
|
+
def data_scan(
|
|
21
|
+
self, #
|
|
22
|
+
data_provider: DataContentProvider, #
|
|
23
|
+
depth: int, #
|
|
24
|
+
recursive_limit_size: int) -> Optional[List[Candidate]]:
|
|
25
|
+
"""Extracts files one by one from the package type and launches recursive scan"""
|
|
26
|
+
try:
|
|
27
|
+
candidates = []
|
|
28
|
+
with rpmfile.open(fileobj=io.BytesIO(data_provider.data)) as rpm_file:
|
|
29
|
+
for member in rpm_file.getmembers():
|
|
30
|
+
# skip directory
|
|
31
|
+
if 0 != member.isdir:
|
|
32
|
+
continue
|
|
33
|
+
if FilePathExtractor.check_exclude_file(self.config, member.name):
|
|
34
|
+
continue
|
|
35
|
+
if 0 > recursive_limit_size - member.size:
|
|
36
|
+
logger.error(f"{member.filename}: size {member.size}"
|
|
37
|
+
f" is over limit {recursive_limit_size} depth:{depth}")
|
|
38
|
+
continue
|
|
39
|
+
rpm_content_provider = DataContentProvider(data=rpm_file.extractfile(member).read(),
|
|
40
|
+
file_path=data_provider.file_path,
|
|
41
|
+
file_type=Util.get_extension(member.name),
|
|
42
|
+
info=f"{data_provider.info}|RPM:{member.name}")
|
|
43
|
+
new_limit = recursive_limit_size - len(rpm_content_provider.data)
|
|
44
|
+
rpm_candidates = self.recursive_scan(rpm_content_provider, depth, new_limit)
|
|
45
|
+
candidates.extend(rpm_candidates)
|
|
46
|
+
return candidates
|
|
47
|
+
except Exception as rpm_exc:
|
|
48
|
+
logger.error(f"{data_provider.file_path}:{rpm_exc}")
|
|
49
|
+
return None
|