credsweeper 1.11.3__py3-none-any.whl → 1.11.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of credsweeper might be problematic. Click here for more details.

Files changed (47) hide show
  1. credsweeper/__init__.py +1 -1
  2. credsweeper/__main__.py +1 -1
  3. credsweeper/app.py +21 -44
  4. credsweeper/common/constants.py +2 -5
  5. credsweeper/credentials/candidate_key.py +1 -1
  6. credsweeper/credentials/credential_manager.py +4 -3
  7. credsweeper/credentials/line_data.py +2 -5
  8. credsweeper/deep_scanner/abstract_scanner.py +269 -14
  9. credsweeper/deep_scanner/deb_scanner.py +55 -0
  10. credsweeper/deep_scanner/deep_scanner.py +39 -241
  11. credsweeper/deep_scanner/gzip_scanner.py +1 -1
  12. credsweeper/deep_scanner/jclass_scanner.py +74 -0
  13. credsweeper/deep_scanner/patch_scanner.py +48 -0
  14. credsweeper/deep_scanner/pkcs_scanner.py +41 -0
  15. credsweeper/deep_scanner/rpm_scanner.py +49 -0
  16. credsweeper/deep_scanner/sqlite3_scanner.py +79 -0
  17. credsweeper/file_handler/byte_content_provider.py +2 -2
  18. credsweeper/file_handler/content_provider.py +1 -1
  19. credsweeper/file_handler/data_content_provider.py +3 -4
  20. credsweeper/file_handler/diff_content_provider.py +2 -2
  21. credsweeper/file_handler/file_path_extractor.py +1 -1
  22. credsweeper/file_handler/files_provider.py +2 -4
  23. credsweeper/file_handler/patches_provider.py +5 -2
  24. credsweeper/file_handler/string_content_provider.py +2 -2
  25. credsweeper/file_handler/struct_content_provider.py +1 -1
  26. credsweeper/file_handler/text_content_provider.py +2 -2
  27. credsweeper/filters/__init__.py +1 -0
  28. credsweeper/filters/value_base64_encoded_pem_check.py +1 -1
  29. credsweeper/filters/value_base64_key_check.py +9 -14
  30. credsweeper/filters/value_entropy_base64_check.py +2 -6
  31. credsweeper/filters/value_json_web_key_check.py +37 -0
  32. credsweeper/filters/value_pattern_check.py +64 -16
  33. credsweeper/ml_model/features/file_extension.py +1 -1
  34. credsweeper/ml_model/ml_validator.py +43 -21
  35. credsweeper/rules/config.yaml +51 -9
  36. credsweeper/rules/rule.py +3 -3
  37. credsweeper/scanner/scan_type/multi_pattern.py +1 -2
  38. credsweeper/secret/config.json +6 -6
  39. credsweeper/utils/hop_stat.py +3 -3
  40. credsweeper/utils/pem_key_detector.py +6 -4
  41. credsweeper/utils/util.py +154 -79
  42. {credsweeper-1.11.3.dist-info → credsweeper-1.11.5.dist-info}/METADATA +3 -6
  43. {credsweeper-1.11.3.dist-info → credsweeper-1.11.5.dist-info}/RECORD +46 -40
  44. credsweeper/deep_scanner/pkcs12_scanner.py +0 -45
  45. {credsweeper-1.11.3.dist-info → credsweeper-1.11.5.dist-info}/WHEEL +0 -0
  46. {credsweeper-1.11.3.dist-info → credsweeper-1.11.5.dist-info}/entry_points.txt +0 -0
  47. {credsweeper-1.11.3.dist-info → credsweeper-1.11.5.dist-info}/licenses/LICENSE +0 -0
@@ -1,41 +1,35 @@
1
- import datetime
2
1
  import logging
3
- from typing import List, Optional, Any, Tuple, Union
2
+ from typing import List, Any, Tuple
4
3
 
5
- from credsweeper.common.constants import RECURSIVE_SCAN_LIMITATION, MIN_DATA_LEN
4
+ from credsweeper.common.constants import MIN_DATA_LEN
6
5
  from credsweeper.config import Config
7
- from credsweeper.credentials import Candidate
8
- from credsweeper.credentials.augment_candidates import augment_candidates
9
- from credsweeper.file_handler.byte_content_provider import ByteContentProvider
10
- from credsweeper.file_handler.content_provider import ContentProvider
11
- from credsweeper.file_handler.data_content_provider import DataContentProvider
12
- from credsweeper.file_handler.diff_content_provider import DiffContentProvider
13
- from credsweeper.file_handler.string_content_provider import StringContentProvider
14
- from credsweeper.file_handler.text_content_provider import TextContentProvider
15
6
  from credsweeper.scanner import Scanner
16
7
  from credsweeper.utils import Util
17
8
  from .byte_scanner import ByteScanner
18
9
  from .bzip2_scanner import Bzip2Scanner
10
+ from .deb_scanner import DebScanner
19
11
  from .docx_scanner import DocxScanner
20
12
  from .eml_scanner import EmlScanner
21
13
  from .encoder_scanner import EncoderScanner
22
14
  from .gzip_scanner import GzipScanner
23
15
  from .html_scanner import HtmlScanner
16
+ from .jclass_scanner import JclassScanner
24
17
  from .jks_scanner import JksScanner
25
18
  from .lang_scanner import LangScanner
26
19
  from .lzma_scanner import LzmaScanner
27
20
  from .mxfile_scanner import MxfileScanner
21
+ from .patch_scanner import PatchScanner
28
22
  from .pdf_scanner import PdfScanner
29
- from .pkcs12_scanner import Pkcs12Scanner
23
+ from .pkcs_scanner import PkcsScanner
30
24
  from .pptx_scanner import PptxScanner
25
+ from .rpm_scanner import RpmScanner
26
+ from .sqlite3_scanner import Sqlite3Scanner
31
27
  from .tar_scanner import TarScanner
32
28
  from .tmx_scanner import TmxScanner
33
29
  from .xlsx_scanner import XlsxScanner
34
30
  from .xml_scanner import XmlScanner
35
31
  from .zip_scanner import ZipScanner
36
- from ..common.constants import DEFAULT_ENCODING
37
- from ..file_handler.file_path_extractor import FilePathExtractor
38
- from ..file_handler.struct_content_provider import StructContentProvider
32
+ from ..file_handler.descriptor import Descriptor
39
33
 
40
34
  logger = logging.getLogger(__name__)
41
35
 
@@ -47,13 +41,18 @@ class DeepScanner(
47
41
  EncoderScanner, #
48
42
  GzipScanner, #
49
43
  HtmlScanner, #
44
+ JclassScanner, #
50
45
  JksScanner, #
51
46
  LangScanner, #
52
47
  LzmaScanner, #
48
+ PatchScanner, #
53
49
  PdfScanner, #
54
- Pkcs12Scanner, #
50
+ PkcsScanner, #
55
51
  PptxScanner, #
52
+ RpmScanner, #
53
+ Sqlite3Scanner, #
56
54
  TarScanner, #
55
+ DebScanner, #
57
56
  XmlScanner, #
58
57
  XlsxScanner, #
59
58
  ZipScanner
@@ -79,7 +78,7 @@ class DeepScanner(
79
78
  return self.__scanner
80
79
 
81
80
  @staticmethod
82
- def get_deep_scanners(data: bytes, file_type: str, depth: int) -> Tuple[List[Any], List[Any]]:
81
+ def get_deep_scanners(data: bytes, descriptor: Descriptor, depth: int) -> Tuple[List[Any], List[Any]]:
83
82
  """Returns possibly scan methods for the data depends on content and fallback scanners"""
84
83
  deep_scanners: List[Any] = []
85
84
  fallback_scanners: List[Any] = []
@@ -88,20 +87,20 @@ class DeepScanner(
88
87
  deep_scanners.append(ZipScanner)
89
88
  # probably, there might be a docx, xlsx and so on.
90
89
  # It might be scanned with text representation in third-party libraries.
91
- if file_type in (".xlsx", ".ods"):
90
+ if descriptor.extension in (".xlsx", ".ods"):
92
91
  deep_scanners.append(XlsxScanner)
93
92
  else:
94
93
  fallback_scanners.append(XlsxScanner)
95
- if ".docx" == file_type:
94
+ if ".docx" == descriptor.extension:
96
95
  deep_scanners.append(DocxScanner)
97
96
  else:
98
97
  fallback_scanners.append(DocxScanner)
99
- if ".pptx" == file_type:
98
+ if ".pptx" == descriptor.extension:
100
99
  deep_scanners.append(PptxScanner)
101
100
  else:
102
101
  fallback_scanners.append(PptxScanner)
103
102
  elif Util.is_com(data):
104
- if ".xls" == file_type:
103
+ if ".xls" == descriptor.extension:
105
104
  deep_scanners.append(XlsxScanner)
106
105
  else:
107
106
  fallback_scanners.append(XlsxScanner)
@@ -114,15 +113,26 @@ class DeepScanner(
114
113
  elif Util.is_tar(data):
115
114
  if 0 < depth:
116
115
  deep_scanners.append(TarScanner)
116
+ elif Util.is_deb(data):
117
+ if 0 < depth:
118
+ deep_scanners.append(DebScanner)
117
119
  elif Util.is_gzip(data):
118
120
  if 0 < depth:
119
121
  deep_scanners.append(GzipScanner)
120
122
  elif Util.is_pdf(data):
121
123
  deep_scanners.append(PdfScanner)
124
+ elif Util.is_rpm(data):
125
+ if 0 < depth:
126
+ deep_scanners.append(RpmScanner)
127
+ elif Util.is_jclass(data):
128
+ deep_scanners.append(JclassScanner)
122
129
  elif Util.is_jks(data):
123
130
  deep_scanners.append(JksScanner)
131
+ elif Util.is_sqlite3(data):
132
+ if 0 < depth:
133
+ deep_scanners.append(Sqlite3Scanner)
124
134
  elif Util.is_asn1(data):
125
- deep_scanners.append(Pkcs12Scanner)
135
+ deep_scanners.append(PkcsScanner)
126
136
  elif Util.is_xml(data):
127
137
  if Util.is_html(data):
128
138
  deep_scanners.append(HtmlScanner)
@@ -140,9 +150,12 @@ class DeepScanner(
140
150
  deep_scanners.append(XmlScanner)
141
151
  fallback_scanners.append(ByteScanner)
142
152
  elif Util.is_eml(data):
143
- if ".eml" == file_type:
153
+ if ".eml" == descriptor.extension:
144
154
  deep_scanners.append(EmlScanner)
145
155
  else:
156
+ if 0 < depth:
157
+ # formal patch looks like an eml
158
+ deep_scanners.append(PatchScanner)
146
159
  fallback_scanners.append(EmlScanner)
147
160
  fallback_scanners.append(ByteScanner)
148
161
  elif Util.is_known(data):
@@ -150,226 +163,11 @@ class DeepScanner(
150
163
  pass
151
164
  elif not Util.is_binary(data):
152
165
  if 0 < depth:
166
+ deep_scanners.append(PatchScanner)
153
167
  deep_scanners.append(EncoderScanner)
154
168
  deep_scanners.append(LangScanner)
155
169
  deep_scanners.append(ByteScanner)
156
170
  else:
157
- logger.warning("Cannot apply a deep scanner for type %s prefix %s", file_type, str(data[:MIN_DATA_LEN]))
171
+ logger.warning("Cannot apply a deep scanner for type %s prefix %s %d", descriptor,
172
+ repr(data[:MIN_DATA_LEN]), len(data))
158
173
  return deep_scanners, fallback_scanners
159
-
160
- # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
161
-
162
- def deep_scan_with_fallback(self, data_provider: DataContentProvider, depth: int,
163
- recursive_limit_size: int) -> List[Candidate]:
164
- """Scans with deep scanners and fallback scanners if possible
165
-
166
- Args:
167
- data_provider: DataContentProvider with raw data
168
- depth: maximal level of recursion
169
- recursive_limit_size: maximal bytes of opened files to prevent recursive zip-bomb attack
170
-
171
- Returns: list with candidates
172
-
173
- """
174
- candidates: List[Candidate] = []
175
- deep_scanners, fallback_scanners = self.get_deep_scanners(data_provider.data, data_provider.file_type, depth)
176
- fallback = True
177
- for scan_class in deep_scanners:
178
- new_candidates = scan_class.data_scan(self, data_provider, depth, recursive_limit_size)
179
- if new_candidates is None:
180
- # scanner did not recognise the content type
181
- continue
182
- augment_candidates(candidates, new_candidates)
183
- # this scan is successful, so fallback is not necessary
184
- fallback = False
185
- if fallback:
186
- for scan_class in fallback_scanners:
187
- fallback_candidates = scan_class.data_scan(self, data_provider, depth, recursive_limit_size)
188
- if fallback_candidates is None:
189
- continue
190
- augment_candidates(candidates, fallback_candidates)
191
- # use only first successful fallback scanner
192
- break
193
- return candidates
194
-
195
- # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
196
-
197
- def scan(self,
198
- content_provider: ContentProvider,
199
- depth: int,
200
- recursive_limit_size: Optional[int] = None) -> List[Candidate]:
201
- """Initial scan method to launch recursive scan. Skips ByteScanner to prevent extra scan
202
-
203
- Args:
204
- content_provider: ContentProvider that might contain raw data
205
- depth: maximal level of recursion
206
- recursive_limit_size: maximal bytes of opened files to prevent recursive zip-bomb attack
207
- """
208
- recursive_limit_size = recursive_limit_size if isinstance(recursive_limit_size,
209
- int) else RECURSIVE_SCAN_LIMITATION
210
- candidates: List[Candidate] = []
211
- data: Optional[bytes] = None
212
- if isinstance(content_provider, TextContentProvider) or isinstance(content_provider, ByteContentProvider):
213
- # Feature to scan files which might be containers
214
- data = content_provider.data
215
- info = "FILE"
216
- elif isinstance(content_provider, DiffContentProvider) and content_provider.diff:
217
- candidates = self.scanner.scan(content_provider)
218
- # Feature to scan binary diffs
219
- diff = content_provider.diff[0].get("line")
220
- # the check for legal fix mypy issue
221
- if isinstance(diff, bytes):
222
- data = diff
223
- info = "DIFF"
224
- else:
225
- logger.warning(f"Content provider {type(content_provider)} does not support deep scan")
226
- info = "NA"
227
-
228
- if data:
229
- data_provider = DataContentProvider(data=data,
230
- file_path=content_provider.file_path,
231
- file_type=content_provider.file_type,
232
- info=content_provider.info or info)
233
- new_candidates = self.deep_scan_with_fallback(data_provider, depth, recursive_limit_size - len(data))
234
- augment_candidates(candidates, new_candidates)
235
- return candidates
236
-
237
- def recursive_scan(
238
- self, #
239
- data_provider: DataContentProvider, #
240
- depth: int = 0, #
241
- recursive_limit_size: int = 0) -> List[Candidate]:
242
- """Recursive function to scan files which might be containers like ZIP archives
243
-
244
- Args:
245
- data_provider: DataContentProvider object may be a container
246
- depth: maximal level of recursion
247
- recursive_limit_size: maximal bytes of opened files to prevent recursive zip-bomb attack
248
- """
249
- candidates: List[Candidate] = []
250
- if 0 > depth:
251
- # break recursion if maximal depth is reached
252
- logger.debug("Bottom reached %s recursive_limit_size:%d", data_provider.file_path, recursive_limit_size)
253
- return candidates
254
- depth -= 1
255
- if MIN_DATA_LEN > len(data_provider.data):
256
- # break recursion for minimal data size
257
- logger.debug("Too small data: size=%d, depth=%d, limit=%d, path=%s, info=%s", len(data_provider.data),
258
- depth, recursive_limit_size, data_provider.file_path, data_provider.info)
259
- return candidates
260
- logger.debug("Start data_scan: size=%d, depth=%d, limit=%d, path=%s, info=%s", len(data_provider.data), depth,
261
- recursive_limit_size, data_provider.file_path, data_provider.info)
262
-
263
- if FilePathExtractor.is_find_by_ext_file(self.config, data_provider.file_type):
264
- # Skip scanning file and makes fake candidate due the extension is suspicious
265
- dummy_candidate = Candidate.get_dummy_candidate(self.config, data_provider.file_path,
266
- data_provider.file_type, data_provider.info,
267
- FilePathExtractor.FIND_BY_EXT_RULE)
268
- candidates.append(dummy_candidate)
269
- else:
270
- new_candidates = self.deep_scan_with_fallback(data_provider, depth, recursive_limit_size)
271
- augment_candidates(candidates, new_candidates)
272
-
273
- return candidates
274
-
275
- def structure_scan(
276
- self, #
277
- struct_provider: StructContentProvider, #
278
- depth: int, #
279
- recursive_limit_size: int) -> List[Candidate]:
280
- """Recursive function to scan structured data
281
-
282
- Args:
283
- struct_provider: DataContentProvider object may be a container
284
- depth: maximal level of recursion
285
- recursive_limit_size: maximal bytes of opened files to prevent recursive zip-bomb attack
286
- """
287
- candidates: List[Candidate] = []
288
- logger.debug("Start struct_scan: depth=%d, limit=%d, path=%s, info=%s", depth, recursive_limit_size,
289
- struct_provider.file_path, struct_provider.info)
290
-
291
- if 0 > depth:
292
- # break recursion if maximal depth is reached
293
- logger.debug("bottom reached %s recursive_limit_size:%d", struct_provider.file_path, recursive_limit_size)
294
- return candidates
295
-
296
- depth -= 1
297
-
298
- items: List[Tuple[Union[int, str], Any]] = []
299
- struct_key: Optional[str] = None
300
- struct_value: Optional[str] = None
301
- line_for_keyword_rules = ""
302
- if isinstance(struct_provider.struct, dict):
303
- for key, value in struct_provider.struct.items():
304
- if isinstance(value, (list, tuple)) and 1 == len(value):
305
- # simplify some structures like YAML when single item in new line is a value
306
- items.append((key, value[0]))
307
- else:
308
- items.append((key, value))
309
- # for transformation {"key": "api_key", "value": "XXXXXXX"} -> {"api_key": "XXXXXXX"}
310
- struct_key = struct_provider.struct.get("key")
311
- struct_value = struct_provider.struct.get("value")
312
- elif isinstance(struct_provider.struct, list) or isinstance(struct_provider.struct, tuple):
313
- items = list(enumerate(struct_provider.struct))
314
- else:
315
- logger.error("Not supported type:%s val:%s", str(type(struct_provider.struct)), str(struct_provider.struct))
316
-
317
- for key, value in items:
318
- if isinstance(value, dict) or isinstance(value, (list, tuple)) and 1 < len(value):
319
- val_struct_provider = StructContentProvider(struct=value,
320
- file_path=struct_provider.file_path,
321
- file_type=struct_provider.file_type,
322
- info=f"{struct_provider.info}|STRUCT:{key}")
323
- new_candidates = self.structure_scan(val_struct_provider, depth, recursive_limit_size)
324
- candidates.extend(new_candidates)
325
-
326
- elif isinstance(value, bytes):
327
- bytes_struct_provider = DataContentProvider(data=value,
328
- file_path=struct_provider.file_path,
329
- file_type=struct_provider.file_type,
330
- info=f"{struct_provider.info}|BYTES:{key}")
331
- new_limit = recursive_limit_size - len(value)
332
- new_candidates = self.recursive_scan(bytes_struct_provider, depth, new_limit)
333
- candidates.extend(new_candidates)
334
-
335
- elif isinstance(value, str):
336
- data = value.encode(encoding=DEFAULT_ENCODING, errors='replace')
337
- str_struct_provider = DataContentProvider(data=data,
338
- file_path=struct_provider.file_path,
339
- file_type=struct_provider.file_type,
340
- info=f"{struct_provider.info}|STRING:{key}")
341
- new_limit = recursive_limit_size - len(str_struct_provider.data)
342
- new_candidates = self.recursive_scan(str_struct_provider, depth, new_limit)
343
- candidates.extend(new_candidates)
344
-
345
- # use key = "value" scan for common cases like in TOML
346
- if isinstance(key, str) and self.scanner.keywords_required_substrings_check(key):
347
- line_for_keyword_rules += f"{key} = \"{value}\"; "
348
-
349
- elif isinstance(value, (int, float, datetime.date, datetime.datetime)):
350
- # use the fields only in case of matched keywords
351
- if isinstance(key, str) and self.scanner.keywords_required_substrings_check(key):
352
- line_for_keyword_rules += f"{key} = \"{value}\"; "
353
-
354
- else:
355
- logger.warning("Not supported type:%s value(%s)", str(type(value)), str(value))
356
-
357
- if line_for_keyword_rules:
358
- str_provider = StringContentProvider([line_for_keyword_rules],
359
- file_path=struct_provider.file_path,
360
- file_type=".toml",
361
- info=f"{struct_provider.info}|KEYWORD:`{line_for_keyword_rules}`")
362
- new_candidates = self.scanner.scan(str_provider)
363
- augment_candidates(candidates, new_candidates)
364
-
365
- # last check when dictionary is {"key": "api_key", "value": "XXXXXXX"} -> {"api_key": "XXXXXXX"}
366
- if isinstance(struct_key, str) and isinstance(struct_value, str):
367
- line_for_keyword_rules = f"{struct_key} = \"{struct_value}\""
368
- key_value_provider = StringContentProvider(
369
- [line_for_keyword_rules],
370
- file_path=struct_provider.file_path,
371
- file_type=".toml",
372
- info=f"{struct_provider.info}|KEY_VALUE:`{line_for_keyword_rules}`")
373
- new_candidates = self.scanner.scan(key_value_provider)
374
- augment_candidates(candidates, new_candidates)
375
- return candidates
@@ -31,7 +31,7 @@ class GzipScanner(AbstractScanner, ABC):
31
31
  gzip_content_provider = DataContentProvider(data=f.read(),
32
32
  file_path=new_path,
33
33
  file_type=Util.get_extension(new_path),
34
- info=f"{data_provider.info}|GZIP:{file_path}")
34
+ info=f"{data_provider.info}|GZIP:{new_path}")
35
35
  new_limit = recursive_limit_size - len(gzip_content_provider.data)
36
36
  gzip_candidates = self.recursive_scan(gzip_content_provider, depth, new_limit)
37
37
  return gzip_candidates
@@ -0,0 +1,74 @@
1
+ import io
2
+ import logging
3
+ import struct
4
+ from abc import ABC
5
+ from typing import List, Optional
6
+
7
+ from credsweeper.common.constants import MIN_DATA_LEN, UTF_8
8
+ from credsweeper.credentials import Candidate
9
+ from credsweeper.deep_scanner.abstract_scanner import AbstractScanner
10
+ from credsweeper.file_handler.data_content_provider import DataContentProvider
11
+ from credsweeper.file_handler.struct_content_provider import StructContentProvider
12
+
13
+ logger = logging.getLogger(__name__)
14
+
15
+
16
+ class JclassScanner(AbstractScanner, ABC):
17
+ """Implements java .class scanning"""
18
+
19
+ @staticmethod
20
+ def u2(stream: io.BytesIO) -> int:
21
+ """Extracts unsigned 16 bit big-endian"""
22
+ return int(struct.unpack(">H", stream.read(2))[0])
23
+
24
+ @staticmethod
25
+ def get_utf8_constants(stream: io.BytesIO) -> List[str]:
26
+ """Extracts only Utf8 constants from java ClassFile"""
27
+ result = []
28
+ item_count = JclassScanner.u2(stream)
29
+ while 0 < item_count:
30
+ # actual number of items is one less!
31
+ item_count -= 1
32
+ # uint8
33
+ tag = int(stream.read(1)[0])
34
+ if 1 == tag:
35
+ length = JclassScanner.u2(stream)
36
+ data = stream.read(int(length))
37
+ if MIN_DATA_LEN <= length:
38
+ value = data.decode(encoding=UTF_8, errors="replace")
39
+ result.append(value)
40
+ elif tag in (3, 4, 9, 10, 11, 12, 18):
41
+ _ = stream.read(4)
42
+ elif tag in (7, 8, 16):
43
+ _ = stream.read(2)
44
+ elif tag in (5, 6):
45
+ _ = stream.read(8)
46
+ elif 15 == tag:
47
+ _ = stream.read(3)
48
+ else:
49
+ logger.error(f"Unknown tag {tag}")
50
+ break
51
+ return result
52
+
53
+ def data_scan(
54
+ self, #
55
+ data_provider: DataContentProvider, #
56
+ depth: int, #
57
+ recursive_limit_size: int) -> Optional[List[Candidate]]:
58
+ """Extracts data from binary"""
59
+ try:
60
+ stream = io.BytesIO(data_provider.data)
61
+ stream.read(4) # magic
62
+ minor = JclassScanner.u2(stream)
63
+ major = JclassScanner.u2(stream)
64
+ constants = JclassScanner.get_utf8_constants(stream)
65
+ struct_content_provider = StructContentProvider(struct=constants,
66
+ file_path=data_provider.file_path,
67
+ file_type=data_provider.file_type,
68
+ info=f"{data_provider.info}|Java.{major}.{minor}")
69
+ new_limit = recursive_limit_size - sum(len(x) for x in constants)
70
+ candidates = self.structure_scan(struct_content_provider, depth, new_limit)
71
+ return candidates
72
+ except Exception as jclass_exc:
73
+ logger.error(f"{data_provider.file_path}:{jclass_exc}")
74
+ return None
@@ -0,0 +1,48 @@
1
+ import io
2
+ import logging
3
+ from abc import ABC
4
+ from typing import List, Optional
5
+
6
+ from credsweeper.common.constants import DiffRowType
7
+ from credsweeper.credentials.candidate import Candidate
8
+ from credsweeper.deep_scanner.abstract_scanner import AbstractScanner
9
+ from credsweeper.file_handler.data_content_provider import DataContentProvider
10
+ from credsweeper.file_handler.patches_provider import PatchesProvider
11
+
12
+ logger = logging.getLogger(__name__)
13
+
14
+
15
+ class PatchScanner(AbstractScanner, ABC):
16
+ """Implements .patch scanning"""
17
+
18
+ def data_scan(
19
+ self, #
20
+ data_provider: DataContentProvider, #
21
+ depth: int, #
22
+ recursive_limit_size: int) -> Optional[List[Candidate]]:
23
+ """Tries to scan EML with text representation"""
24
+ try:
25
+ candidates: List[Candidate] = []
26
+ # common limitation
27
+ new_limit_size = recursive_limit_size - len(data_provider.data)
28
+ # ADDED
29
+ path_added = [(data_provider.file_path, io.BytesIO(data_provider.data))]
30
+ added_content_provider = PatchesProvider(path_added, change_type=DiffRowType.ADDED)
31
+ for added_file in added_content_provider.get_scannable_files(self.config):
32
+ added_candidates = self.scan(added_file, depth, new_limit_size)
33
+ candidates.extend(added_candidates)
34
+ # DELETED
35
+ path_deleted = [(data_provider.file_path, io.BytesIO(data_provider.data))]
36
+ deleted_content_provider = PatchesProvider(path_deleted, change_type=DiffRowType.DELETED)
37
+ for deleted_file in deleted_content_provider.get_scannable_files(self.config):
38
+ added_candidates = self.scan(deleted_file, depth, new_limit_size)
39
+ candidates.extend(added_candidates)
40
+ # update the line data for deep scan only
41
+ for i in candidates:
42
+ for line_data in i.line_data_list:
43
+ line_data.path = f"{data_provider.file_path}/{line_data.path}"
44
+ line_data.info = f"{data_provider.info}|PATCH:{line_data.info}"
45
+ return candidates
46
+ except Exception as patch_exc:
47
+ logger.error(f"{data_provider.file_path}:{patch_exc}")
48
+ return None
@@ -0,0 +1,41 @@
1
+ import base64
2
+ import logging
3
+ from abc import ABC
4
+ from typing import List, Optional
5
+
6
+ from credsweeper.credentials import Candidate
7
+ from credsweeper.deep_scanner.abstract_scanner import AbstractScanner
8
+ from credsweeper.file_handler.data_content_provider import DataContentProvider
9
+ from credsweeper.utils import Util
10
+
11
+ logger = logging.getLogger(__name__)
12
+
13
+
14
+ class PkcsScanner(AbstractScanner, ABC):
15
+ """Implements pkcs12 scanning"""
16
+
17
+ def data_scan(
18
+ self, #
19
+ data_provider: DataContentProvider, #
20
+ depth: int, #
21
+ recursive_limit_size: int) -> Optional[List[Candidate]]:
22
+ """Tries to scan PKCS12 to open with standard password"""
23
+ for pw_probe in self.config.bruteforce_list:
24
+ try:
25
+ password = pw_probe.encode() if pw_probe else None
26
+ if pkey := Util.load_pk(data_provider.data, password):
27
+ if not Util.check_pk(pkey):
28
+ logger.debug("False alarm %s", data_provider.info)
29
+ return []
30
+ candidate = Candidate.get_dummy_candidate(
31
+ self.config, #
32
+ data_provider.file_path, #
33
+ data_provider.file_type, #
34
+ f"{data_provider.info}|PKCS:{repr(password)} is the password", #
35
+ "PKCS")
36
+ candidate.line_data_list[0].line = base64.b64encode(data_provider.data).decode()
37
+ candidate.line_data_list[0].value = repr(password)
38
+ return [candidate]
39
+ except Exception as pkcs_exc:
40
+ logger.debug(f"{data_provider.file_path}:{pw_probe}:{pkcs_exc}")
41
+ return None
@@ -0,0 +1,49 @@
1
+ import io
2
+ import logging
3
+ from abc import ABC
4
+ from typing import List, Optional
5
+
6
+ import rpmfile
7
+
8
+ from credsweeper.credentials.candidate import Candidate
9
+ from credsweeper.deep_scanner.abstract_scanner import AbstractScanner
10
+ from credsweeper.file_handler.data_content_provider import DataContentProvider
11
+ from credsweeper.file_handler.file_path_extractor import FilePathExtractor
12
+ from credsweeper.utils import Util
13
+
14
+ logger = logging.getLogger(__name__)
15
+
16
+
17
+ class RpmScanner(AbstractScanner, ABC):
18
+ """Implements rpm scanning"""
19
+
20
+ def data_scan(
21
+ self, #
22
+ data_provider: DataContentProvider, #
23
+ depth: int, #
24
+ recursive_limit_size: int) -> Optional[List[Candidate]]:
25
+ """Extracts files one by one from the package type and launches recursive scan"""
26
+ try:
27
+ candidates = []
28
+ with rpmfile.open(fileobj=io.BytesIO(data_provider.data)) as rpm_file:
29
+ for member in rpm_file.getmembers():
30
+ # skip directory
31
+ if 0 != member.isdir:
32
+ continue
33
+ if FilePathExtractor.check_exclude_file(self.config, member.name):
34
+ continue
35
+ if 0 > recursive_limit_size - member.size:
36
+ logger.error(f"{member.filename}: size {member.size}"
37
+ f" is over limit {recursive_limit_size} depth:{depth}")
38
+ continue
39
+ rpm_content_provider = DataContentProvider(data=rpm_file.extractfile(member).read(),
40
+ file_path=data_provider.file_path,
41
+ file_type=Util.get_extension(member.name),
42
+ info=f"{data_provider.info}|RPM:{member.name}")
43
+ new_limit = recursive_limit_size - len(rpm_content_provider.data)
44
+ rpm_candidates = self.recursive_scan(rpm_content_provider, depth, new_limit)
45
+ candidates.extend(rpm_candidates)
46
+ return candidates
47
+ except Exception as rpm_exc:
48
+ logger.error(f"{data_provider.file_path}:{rpm_exc}")
49
+ return None