credsweeper 1.11.4__py3-none-any.whl → 1.11.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of credsweeper might be problematic. Click here for more details.

@@ -1,18 +1,8 @@
1
- import contextlib
2
- import datetime
3
1
  import logging
4
- from typing import List, Optional, Any, Tuple, Union
2
+ from typing import List, Any, Tuple
5
3
 
6
- from credsweeper.common.constants import RECURSIVE_SCAN_LIMITATION, MIN_DATA_LEN, MIN_VALUE_LENGTH
4
+ from credsweeper.common.constants import MIN_DATA_LEN
7
5
  from credsweeper.config import Config
8
- from credsweeper.credentials import Candidate
9
- from credsweeper.credentials.augment_candidates import augment_candidates
10
- from credsweeper.file_handler.byte_content_provider import ByteContentProvider
11
- from credsweeper.file_handler.content_provider import ContentProvider
12
- from credsweeper.file_handler.data_content_provider import DataContentProvider
13
- from credsweeper.file_handler.diff_content_provider import DiffContentProvider
14
- from credsweeper.file_handler.string_content_provider import StringContentProvider
15
- from credsweeper.file_handler.text_content_provider import TextContentProvider
16
6
  from credsweeper.scanner import Scanner
17
7
  from credsweeper.utils import Util
18
8
  from .byte_scanner import ByteScanner
@@ -23,21 +13,23 @@ from .eml_scanner import EmlScanner
23
13
  from .encoder_scanner import EncoderScanner
24
14
  from .gzip_scanner import GzipScanner
25
15
  from .html_scanner import HtmlScanner
16
+ from .jclass_scanner import JclassScanner
26
17
  from .jks_scanner import JksScanner
27
18
  from .lang_scanner import LangScanner
28
19
  from .lzma_scanner import LzmaScanner
29
20
  from .mxfile_scanner import MxfileScanner
21
+ from .patch_scanner import PatchScanner
30
22
  from .pdf_scanner import PdfScanner
31
- from .pkcs12_scanner import Pkcs12Scanner
23
+ from .pkcs_scanner import PkcsScanner
32
24
  from .pptx_scanner import PptxScanner
25
+ from .rpm_scanner import RpmScanner
26
+ from .sqlite3_scanner import Sqlite3Scanner
33
27
  from .tar_scanner import TarScanner
34
28
  from .tmx_scanner import TmxScanner
35
29
  from .xlsx_scanner import XlsxScanner
36
30
  from .xml_scanner import XmlScanner
37
31
  from .zip_scanner import ZipScanner
38
- from ..common.constants import DEFAULT_ENCODING
39
- from ..file_handler.file_path_extractor import FilePathExtractor
40
- from ..file_handler.struct_content_provider import StructContentProvider
32
+ from ..file_handler.descriptor import Descriptor
41
33
 
42
34
  logger = logging.getLogger(__name__)
43
35
 
@@ -49,12 +41,16 @@ class DeepScanner(
49
41
  EncoderScanner, #
50
42
  GzipScanner, #
51
43
  HtmlScanner, #
44
+ JclassScanner, #
52
45
  JksScanner, #
53
46
  LangScanner, #
54
47
  LzmaScanner, #
48
+ PatchScanner, #
55
49
  PdfScanner, #
56
- Pkcs12Scanner, #
50
+ PkcsScanner, #
57
51
  PptxScanner, #
52
+ RpmScanner, #
53
+ Sqlite3Scanner, #
58
54
  TarScanner, #
59
55
  DebScanner, #
60
56
  XmlScanner, #
@@ -82,7 +78,7 @@ class DeepScanner(
82
78
  return self.__scanner
83
79
 
84
80
  @staticmethod
85
- def get_deep_scanners(data: bytes, file_type: str, depth: int) -> Tuple[List[Any], List[Any]]:
81
+ def get_deep_scanners(data: bytes, descriptor: Descriptor, depth: int) -> Tuple[List[Any], List[Any]]:
86
82
  """Returns possibly scan methods for the data depends on content and fallback scanners"""
87
83
  deep_scanners: List[Any] = []
88
84
  fallback_scanners: List[Any] = []
@@ -91,20 +87,20 @@ class DeepScanner(
91
87
  deep_scanners.append(ZipScanner)
92
88
  # probably, there might be a docx, xlsx and so on.
93
89
  # It might be scanned with text representation in third-party libraries.
94
- if file_type in (".xlsx", ".ods"):
90
+ if descriptor.extension in (".xlsx", ".ods"):
95
91
  deep_scanners.append(XlsxScanner)
96
92
  else:
97
93
  fallback_scanners.append(XlsxScanner)
98
- if ".docx" == file_type:
94
+ if ".docx" == descriptor.extension:
99
95
  deep_scanners.append(DocxScanner)
100
96
  else:
101
97
  fallback_scanners.append(DocxScanner)
102
- if ".pptx" == file_type:
98
+ if ".pptx" == descriptor.extension:
103
99
  deep_scanners.append(PptxScanner)
104
100
  else:
105
101
  fallback_scanners.append(PptxScanner)
106
102
  elif Util.is_com(data):
107
- if ".xls" == file_type:
103
+ if ".xls" == descriptor.extension:
108
104
  deep_scanners.append(XlsxScanner)
109
105
  else:
110
106
  fallback_scanners.append(XlsxScanner)
@@ -125,10 +121,18 @@ class DeepScanner(
125
121
  deep_scanners.append(GzipScanner)
126
122
  elif Util.is_pdf(data):
127
123
  deep_scanners.append(PdfScanner)
124
+ elif Util.is_rpm(data):
125
+ if 0 < depth:
126
+ deep_scanners.append(RpmScanner)
127
+ elif Util.is_jclass(data):
128
+ deep_scanners.append(JclassScanner)
128
129
  elif Util.is_jks(data):
129
130
  deep_scanners.append(JksScanner)
131
+ elif Util.is_sqlite3(data):
132
+ if 0 < depth:
133
+ deep_scanners.append(Sqlite3Scanner)
130
134
  elif Util.is_asn1(data):
131
- deep_scanners.append(Pkcs12Scanner)
135
+ deep_scanners.append(PkcsScanner)
132
136
  elif Util.is_xml(data):
133
137
  if Util.is_html(data):
134
138
  deep_scanners.append(HtmlScanner)
@@ -146,9 +150,12 @@ class DeepScanner(
146
150
  deep_scanners.append(XmlScanner)
147
151
  fallback_scanners.append(ByteScanner)
148
152
  elif Util.is_eml(data):
149
- if ".eml" == file_type:
153
+ if ".eml" == descriptor.extension:
150
154
  deep_scanners.append(EmlScanner)
151
155
  else:
156
+ if 0 < depth:
157
+ # formal patch looks like an eml
158
+ deep_scanners.append(PatchScanner)
152
159
  fallback_scanners.append(EmlScanner)
153
160
  fallback_scanners.append(ByteScanner)
154
161
  elif Util.is_known(data):
@@ -156,231 +163,11 @@ class DeepScanner(
156
163
  pass
157
164
  elif not Util.is_binary(data):
158
165
  if 0 < depth:
166
+ deep_scanners.append(PatchScanner)
159
167
  deep_scanners.append(EncoderScanner)
160
168
  deep_scanners.append(LangScanner)
161
169
  deep_scanners.append(ByteScanner)
162
170
  else:
163
- logger.warning("Cannot apply a deep scanner for type %s prefix %s", file_type, str(data[:MIN_DATA_LEN]))
171
+ logger.warning("Cannot apply a deep scanner for type %s prefix %s %d", descriptor,
172
+ repr(data[:MIN_DATA_LEN]), len(data))
164
173
  return deep_scanners, fallback_scanners
165
-
166
- # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
167
-
168
- def deep_scan_with_fallback(self, data_provider: DataContentProvider, depth: int,
169
- recursive_limit_size: int) -> List[Candidate]:
170
- """Scans with deep scanners and fallback scanners if possible
171
-
172
- Args:
173
- data_provider: DataContentProvider with raw data
174
- depth: maximal level of recursion
175
- recursive_limit_size: maximal bytes of opened files to prevent recursive zip-bomb attack
176
-
177
- Returns: list with candidates
178
-
179
- """
180
- candidates: List[Candidate] = []
181
- deep_scanners, fallback_scanners = self.get_deep_scanners(data_provider.data, data_provider.file_type, depth)
182
- fallback = True
183
- for scan_class in deep_scanners:
184
- new_candidates = scan_class.data_scan(self, data_provider, depth, recursive_limit_size)
185
- if new_candidates is None:
186
- # scanner did not recognise the content type
187
- continue
188
- augment_candidates(candidates, new_candidates)
189
- # this scan is successful, so fallback is not necessary
190
- fallback = False
191
- if fallback:
192
- for scan_class in fallback_scanners:
193
- fallback_candidates = scan_class.data_scan(self, data_provider, depth, recursive_limit_size)
194
- if fallback_candidates is None:
195
- continue
196
- augment_candidates(candidates, fallback_candidates)
197
- # use only first successful fallback scanner
198
- break
199
- return candidates
200
-
201
- # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
202
-
203
- def scan(self,
204
- content_provider: ContentProvider,
205
- depth: int,
206
- recursive_limit_size: Optional[int] = None) -> List[Candidate]:
207
- """Initial scan method to launch recursive scan. Skips ByteScanner to prevent extra scan
208
-
209
- Args:
210
- content_provider: ContentProvider that might contain raw data
211
- depth: maximal level of recursion
212
- recursive_limit_size: maximal bytes of opened files to prevent recursive zip-bomb attack
213
- """
214
- recursive_limit_size = recursive_limit_size if isinstance(recursive_limit_size,
215
- int) else RECURSIVE_SCAN_LIMITATION
216
- candidates: List[Candidate] = []
217
- data: Optional[bytes] = None
218
- if isinstance(content_provider, (TextContentProvider, ByteContentProvider)):
219
- # Feature to scan files which might be containers
220
- data = content_provider.data
221
- info = f"FILE:{content_provider.file_path}"
222
- elif isinstance(content_provider, DiffContentProvider) and content_provider.diff:
223
- candidates = self.scanner.scan(content_provider)
224
- # Feature to scan binary diffs
225
- diff = content_provider.diff[0].get("line")
226
- # the check for legal fix mypy issue
227
- if isinstance(diff, bytes):
228
- data = diff
229
- info = f"DIFF:{content_provider.file_path}"
230
- else:
231
- logger.warning(f"Content provider {type(content_provider)} does not support deep scan")
232
- info = "NA"
233
-
234
- if data:
235
- data_provider = DataContentProvider(data=data,
236
- file_path=content_provider.file_path,
237
- file_type=content_provider.file_type,
238
- info=content_provider.info or info)
239
- new_candidates = self.deep_scan_with_fallback(data_provider, depth, recursive_limit_size - len(data))
240
- augment_candidates(candidates, new_candidates)
241
- return candidates
242
-
243
- def recursive_scan(
244
- self, #
245
- data_provider: DataContentProvider, #
246
- depth: int = 0, #
247
- recursive_limit_size: int = 0) -> List[Candidate]:
248
- """Recursive function to scan files which might be containers like ZIP archives
249
-
250
- Args:
251
- data_provider: DataContentProvider object may be a container
252
- depth: maximal level of recursion
253
- recursive_limit_size: maximal bytes of opened files to prevent recursive zip-bomb attack
254
- """
255
- candidates: List[Candidate] = []
256
- if 0 > depth:
257
- # break recursion if maximal depth is reached
258
- logger.debug("Bottom reached %s recursive_limit_size:%d", data_provider.file_path, recursive_limit_size)
259
- return candidates
260
- depth -= 1
261
- if MIN_DATA_LEN > len(data_provider.data):
262
- # break recursion for minimal data size
263
- logger.debug("Too small data: size=%d, depth=%d, limit=%d, path=%s, info=%s", len(data_provider.data),
264
- depth, recursive_limit_size, data_provider.file_path, data_provider.info)
265
- return candidates
266
- logger.debug("Start data_scan: size=%d, depth=%d, limit=%d, path=%s, info=%s", len(data_provider.data), depth,
267
- recursive_limit_size, data_provider.file_path, data_provider.info)
268
-
269
- if FilePathExtractor.is_find_by_ext_file(self.config, data_provider.file_type):
270
- # Skip scanning file and makes fake candidate due the extension is suspicious
271
- dummy_candidate = Candidate.get_dummy_candidate(self.config, data_provider.file_path,
272
- data_provider.file_type, data_provider.info,
273
- FilePathExtractor.FIND_BY_EXT_RULE)
274
- candidates.append(dummy_candidate)
275
- else:
276
- new_candidates = self.deep_scan_with_fallback(data_provider, depth, recursive_limit_size)
277
- augment_candidates(candidates, new_candidates)
278
-
279
- return candidates
280
-
281
- def structure_scan(
282
- self, #
283
- struct_provider: StructContentProvider, #
284
- depth: int, #
285
- recursive_limit_size: int) -> List[Candidate]:
286
- """Recursive function to scan structured data
287
-
288
- Args:
289
- struct_provider: DataContentProvider object may be a container
290
- depth: maximal level of recursion
291
- recursive_limit_size: maximal bytes of opened files to prevent recursive zip-bomb attack
292
- """
293
- candidates: List[Candidate] = []
294
- logger.debug("Start struct_scan: depth=%d, limit=%d, path=%s, info=%s", depth, recursive_limit_size,
295
- struct_provider.file_path, struct_provider.info)
296
-
297
- if 0 > depth:
298
- # break recursion if maximal depth is reached
299
- logger.debug("bottom reached %s recursive_limit_size:%d", struct_provider.file_path, recursive_limit_size)
300
- return candidates
301
-
302
- depth -= 1
303
-
304
- items: List[Tuple[Union[int, str], Any]] = []
305
- struct_key: Optional[str] = None
306
- struct_value: Optional[str] = None
307
- lines_for_keyword_rules = []
308
- if isinstance(struct_provider.struct, dict):
309
- for key, value in struct_provider.struct.items():
310
- if isinstance(value, (list, tuple)) and 1 == len(value):
311
- # simplify some structures like YAML when single item in new line is a value
312
- items.append((key, value[0]))
313
- else:
314
- items.append((key, value))
315
- # for transformation {"key": "api_key", "value": "XXXXXXX"} -> {"api_key": "XXXXXXX"}
316
- struct_key = struct_provider.struct.get("key")
317
- struct_value = struct_provider.struct.get("value")
318
- elif isinstance(struct_provider.struct, (list, tuple)):
319
- items = list(enumerate(struct_provider.struct))
320
- else:
321
- logger.error("Not supported type:%s val:%s", str(type(struct_provider.struct)), str(struct_provider.struct))
322
-
323
- for key, value in items:
324
- if isinstance(value, dict) or isinstance(value, (list, tuple)) and 1 <= len(value):
325
- val_struct_provider = StructContentProvider(struct=value,
326
- file_path=struct_provider.file_path,
327
- file_type=struct_provider.file_type,
328
- info=f"{struct_provider.info}|STRUCT:{key}")
329
- new_candidates = self.structure_scan(val_struct_provider, depth, recursive_limit_size)
330
- candidates.extend(new_candidates)
331
-
332
- elif isinstance(value, bytes):
333
- if MIN_DATA_LEN <= len(value):
334
- bytes_struct_provider = DataContentProvider(data=value,
335
- file_path=struct_provider.file_path,
336
- file_type=struct_provider.file_type,
337
- info=f"{struct_provider.info}|BYTES:{key}")
338
- new_limit = recursive_limit_size - len(value)
339
- new_candidates = self.recursive_scan(bytes_struct_provider, depth, new_limit)
340
- candidates.extend(new_candidates)
341
- if MIN_VALUE_LENGTH <= len(value) and isinstance(key, str) \
342
- and self.scanner.keywords_required_substrings_check(key.lower()):
343
- str_val = str(value)
344
- lines_for_keyword_rules.append(f"{key} = '{str_val}'" if '"' in str_val else f'{key} = "{str_val}"')
345
-
346
- elif isinstance(value, str):
347
- if MIN_DATA_LEN <= len(value):
348
- # recursive scan only for data which may be decoded at least
349
- with contextlib.suppress(UnicodeError):
350
- data = value.encode(encoding=DEFAULT_ENCODING, errors='strict')
351
- str_struct_provider = DataContentProvider(data=data,
352
- file_path=struct_provider.file_path,
353
- file_type=struct_provider.file_type,
354
- info=f"{struct_provider.info}|STRING:{key}")
355
- new_limit = recursive_limit_size - len(str_struct_provider.data)
356
- new_candidates = self.recursive_scan(str_struct_provider, depth, new_limit)
357
- candidates.extend(new_candidates)
358
- # use key = "value" scan for common cases like in TOML
359
- if MIN_VALUE_LENGTH <= len(value) and isinstance(key, str) \
360
- and self.scanner.keywords_required_substrings_check(key.lower()):
361
- lines_for_keyword_rules.append(f"{key} = '{value}'" if '"' in value else f'{key} = "{value}"')
362
-
363
- elif isinstance(value, (int, float, datetime.date, datetime.datetime)):
364
- # skip useless types
365
- pass
366
- else:
367
- logger.warning("Not supported type:%s value(%s)", str(type(value)), str(value))
368
-
369
- if lines_for_keyword_rules:
370
- str_provider = StringContentProvider(lines_for_keyword_rules,
371
- file_path=struct_provider.file_path,
372
- file_type=".py",
373
- info=f"{struct_provider.info}|KEYWORD:`{lines_for_keyword_rules}`")
374
- new_candidates = self.scanner.scan(str_provider)
375
- augment_candidates(candidates, new_candidates)
376
-
377
- # last check when dictionary is {"key": "api_key", "value": "XXXXXXX"} -> {"api_key": "XXXXXXX"}
378
- if isinstance(struct_key, str) and isinstance(struct_value, str):
379
- key_value_provider = StringContentProvider(
380
- [f"{struct_key} = '{struct_value}'" if '"' in struct_value else f'{struct_key} = "{struct_value}"'],
381
- file_path=struct_provider.file_path,
382
- file_type=".toml",
383
- info=f"{struct_provider.info}|KEY_VALUE:`{lines_for_keyword_rules}`")
384
- new_candidates = self.scanner.scan(key_value_provider)
385
- augment_candidates(candidates, new_candidates)
386
- return candidates
@@ -0,0 +1,74 @@
1
+ import io
2
+ import logging
3
+ import struct
4
+ from abc import ABC
5
+ from typing import List, Optional
6
+
7
+ from credsweeper.common.constants import MIN_DATA_LEN, UTF_8
8
+ from credsweeper.credentials import Candidate
9
+ from credsweeper.deep_scanner.abstract_scanner import AbstractScanner
10
+ from credsweeper.file_handler.data_content_provider import DataContentProvider
11
+ from credsweeper.file_handler.struct_content_provider import StructContentProvider
12
+
13
+ logger = logging.getLogger(__name__)
14
+
15
+
16
+ class JclassScanner(AbstractScanner, ABC):
17
+ """Implements java .class scanning"""
18
+
19
+ @staticmethod
20
+ def u2(stream: io.BytesIO) -> int:
21
+ """Extracts unsigned 16 bit big-endian"""
22
+ return int(struct.unpack(">H", stream.read(2))[0])
23
+
24
+ @staticmethod
25
+ def get_utf8_constants(stream: io.BytesIO) -> List[str]:
26
+ """Extracts only Utf8 constants from java ClassFile"""
27
+ result = []
28
+ item_count = JclassScanner.u2(stream)
29
+ while 0 < item_count:
30
+ # actual number of items is one less!
31
+ item_count -= 1
32
+ # uint8
33
+ tag = int(stream.read(1)[0])
34
+ if 1 == tag:
35
+ length = JclassScanner.u2(stream)
36
+ data = stream.read(int(length))
37
+ if MIN_DATA_LEN <= length:
38
+ value = data.decode(encoding=UTF_8, errors="replace")
39
+ result.append(value)
40
+ elif tag in (3, 4, 9, 10, 11, 12, 18):
41
+ _ = stream.read(4)
42
+ elif tag in (7, 8, 16):
43
+ _ = stream.read(2)
44
+ elif tag in (5, 6):
45
+ _ = stream.read(8)
46
+ elif 15 == tag:
47
+ _ = stream.read(3)
48
+ else:
49
+ logger.error(f"Unknown tag {tag}")
50
+ break
51
+ return result
52
+
53
+ def data_scan(
54
+ self, #
55
+ data_provider: DataContentProvider, #
56
+ depth: int, #
57
+ recursive_limit_size: int) -> Optional[List[Candidate]]:
58
+ """Extracts data from binary"""
59
+ try:
60
+ stream = io.BytesIO(data_provider.data)
61
+ stream.read(4) # magic
62
+ minor = JclassScanner.u2(stream)
63
+ major = JclassScanner.u2(stream)
64
+ constants = JclassScanner.get_utf8_constants(stream)
65
+ struct_content_provider = StructContentProvider(struct=constants,
66
+ file_path=data_provider.file_path,
67
+ file_type=data_provider.file_type,
68
+ info=f"{data_provider.info}|Java.{major}.{minor}")
69
+ new_limit = recursive_limit_size - sum(len(x) for x in constants)
70
+ candidates = self.structure_scan(struct_content_provider, depth, new_limit)
71
+ return candidates
72
+ except Exception as jclass_exc:
73
+ logger.error(f"{data_provider.file_path}:{jclass_exc}")
74
+ return None
@@ -0,0 +1,48 @@
1
+ import io
2
+ import logging
3
+ from abc import ABC
4
+ from typing import List, Optional
5
+
6
+ from credsweeper.common.constants import DiffRowType
7
+ from credsweeper.credentials.candidate import Candidate
8
+ from credsweeper.deep_scanner.abstract_scanner import AbstractScanner
9
+ from credsweeper.file_handler.data_content_provider import DataContentProvider
10
+ from credsweeper.file_handler.patches_provider import PatchesProvider
11
+
12
+ logger = logging.getLogger(__name__)
13
+
14
+
15
+ class PatchScanner(AbstractScanner, ABC):
16
+ """Implements .patch scanning"""
17
+
18
+ def data_scan(
19
+ self, #
20
+ data_provider: DataContentProvider, #
21
+ depth: int, #
22
+ recursive_limit_size: int) -> Optional[List[Candidate]]:
23
+ """Tries to scan EML with text representation"""
24
+ try:
25
+ candidates: List[Candidate] = []
26
+ # common limitation
27
+ new_limit_size = recursive_limit_size - len(data_provider.data)
28
+ # ADDED
29
+ path_added = [(data_provider.file_path, io.BytesIO(data_provider.data))]
30
+ added_content_provider = PatchesProvider(path_added, change_type=DiffRowType.ADDED)
31
+ for added_file in added_content_provider.get_scannable_files(self.config):
32
+ added_candidates = self.scan(added_file, depth, new_limit_size)
33
+ candidates.extend(added_candidates)
34
+ # DELETED
35
+ path_deleted = [(data_provider.file_path, io.BytesIO(data_provider.data))]
36
+ deleted_content_provider = PatchesProvider(path_deleted, change_type=DiffRowType.DELETED)
37
+ for deleted_file in deleted_content_provider.get_scannable_files(self.config):
38
+ added_candidates = self.scan(deleted_file, depth, new_limit_size)
39
+ candidates.extend(added_candidates)
40
+ # update the line data for deep scan only
41
+ for i in candidates:
42
+ for line_data in i.line_data_list:
43
+ line_data.path = f"{data_provider.file_path}/{line_data.path}"
44
+ line_data.info = f"{data_provider.info}|PATCH:{line_data.info}"
45
+ return candidates
46
+ except Exception as patch_exc:
47
+ logger.error(f"{data_provider.file_path}:{patch_exc}")
48
+ return None
@@ -0,0 +1,41 @@
1
+ import base64
2
+ import logging
3
+ from abc import ABC
4
+ from typing import List, Optional
5
+
6
+ from credsweeper.credentials import Candidate
7
+ from credsweeper.deep_scanner.abstract_scanner import AbstractScanner
8
+ from credsweeper.file_handler.data_content_provider import DataContentProvider
9
+ from credsweeper.utils import Util
10
+
11
+ logger = logging.getLogger(__name__)
12
+
13
+
14
+ class PkcsScanner(AbstractScanner, ABC):
15
+ """Implements pkcs12 scanning"""
16
+
17
+ def data_scan(
18
+ self, #
19
+ data_provider: DataContentProvider, #
20
+ depth: int, #
21
+ recursive_limit_size: int) -> Optional[List[Candidate]]:
22
+ """Tries to scan PKCS12 to open with standard password"""
23
+ for pw_probe in self.config.bruteforce_list:
24
+ try:
25
+ password = pw_probe.encode() if pw_probe else None
26
+ if pkey := Util.load_pk(data_provider.data, password):
27
+ if not Util.check_pk(pkey):
28
+ logger.debug("False alarm %s", data_provider.info)
29
+ return []
30
+ candidate = Candidate.get_dummy_candidate(
31
+ self.config, #
32
+ data_provider.file_path, #
33
+ data_provider.file_type, #
34
+ f"{data_provider.info}|PKCS:{repr(password)} is the password", #
35
+ "PKCS")
36
+ candidate.line_data_list[0].line = base64.b64encode(data_provider.data).decode()
37
+ candidate.line_data_list[0].value = repr(password)
38
+ return [candidate]
39
+ except Exception as pkcs_exc:
40
+ logger.debug(f"{data_provider.file_path}:{pw_probe}:{pkcs_exc}")
41
+ return None
@@ -0,0 +1,49 @@
1
+ import io
2
+ import logging
3
+ from abc import ABC
4
+ from typing import List, Optional
5
+
6
+ import rpmfile
7
+
8
+ from credsweeper.credentials.candidate import Candidate
9
+ from credsweeper.deep_scanner.abstract_scanner import AbstractScanner
10
+ from credsweeper.file_handler.data_content_provider import DataContentProvider
11
+ from credsweeper.file_handler.file_path_extractor import FilePathExtractor
12
+ from credsweeper.utils import Util
13
+
14
+ logger = logging.getLogger(__name__)
15
+
16
+
17
+ class RpmScanner(AbstractScanner, ABC):
18
+ """Implements rpm scanning"""
19
+
20
+ def data_scan(
21
+ self, #
22
+ data_provider: DataContentProvider, #
23
+ depth: int, #
24
+ recursive_limit_size: int) -> Optional[List[Candidate]]:
25
+ """Extracts files one by one from the package type and launches recursive scan"""
26
+ try:
27
+ candidates = []
28
+ with rpmfile.open(fileobj=io.BytesIO(data_provider.data)) as rpm_file:
29
+ for member in rpm_file.getmembers():
30
+ # skip directory
31
+ if 0 != member.isdir:
32
+ continue
33
+ if FilePathExtractor.check_exclude_file(self.config, member.name):
34
+ continue
35
+ if 0 > recursive_limit_size - member.size:
36
+ logger.error(f"{member.filename}: size {member.size}"
37
+ f" is over limit {recursive_limit_size} depth:{depth}")
38
+ continue
39
+ rpm_content_provider = DataContentProvider(data=rpm_file.extractfile(member).read(),
40
+ file_path=data_provider.file_path,
41
+ file_type=Util.get_extension(member.name),
42
+ info=f"{data_provider.info}|RPM:{member.name}")
43
+ new_limit = recursive_limit_size - len(rpm_content_provider.data)
44
+ rpm_candidates = self.recursive_scan(rpm_content_provider, depth, new_limit)
45
+ candidates.extend(rpm_candidates)
46
+ return candidates
47
+ except Exception as rpm_exc:
48
+ logger.error(f"{data_provider.file_path}:{rpm_exc}")
49
+ return None
@@ -0,0 +1,79 @@
1
+ import logging
2
+ import os.path
3
+ import sqlite3
4
+ import sys
5
+ import tempfile
6
+ from abc import ABC
7
+ from typing import List, Optional, Tuple, Any, Generator
8
+
9
+ from credsweeper.credentials.candidate import Candidate
10
+ from credsweeper.deep_scanner.abstract_scanner import AbstractScanner
11
+ from credsweeper.file_handler.data_content_provider import DataContentProvider
12
+ from credsweeper.file_handler.struct_content_provider import StructContentProvider
13
+
14
+ logger = logging.getLogger(__name__)
15
+
16
+
17
+ class Sqlite3Scanner(AbstractScanner, ABC):
18
+ """Implements SQLite3 database scanning"""
19
+
20
+ @staticmethod
21
+ def __walk(sqlite3db) -> Generator[Tuple[str, Any], None, None]:
22
+ sqlite3db.row_factory = sqlite3.Row
23
+ cursor = sqlite3db.cursor()
24
+ cursor.execute("SELECT name FROM sqlite_master WHERE type='table' AND name NOT LIKE 'sqlite_%';")
25
+ for table in cursor.fetchall():
26
+ table_name = table[0]
27
+ try:
28
+ cursor.execute(f"SELECT * FROM {table_name}")
29
+ for row in cursor:
30
+ yield table_name, dict(row)
31
+ except sqlite3.DatabaseError as exc:
32
+ print(f"Error reading table {table_name}: {exc}")
33
+
34
+ @staticmethod
35
+ def walk_sqlite(data: bytes) -> Generator[Tuple[str, Any], None, None]:
36
+ """Yields data from sqlite3 database"""
37
+ if 10 < sys.version_info.minor:
38
+ # Added in version 3.11
39
+ with sqlite3.connect(":memory:") as sqlite3db:
40
+ sqlite3db.deserialize(data) # type: ignore
41
+ yield from Sqlite3Scanner.__walk(sqlite3db)
42
+ elif "nt" != os.name:
43
+ # a tmpfile has to be used. TODO: remove when 3.10 will deprecate
44
+ with tempfile.NamedTemporaryFile(suffix=".sqlite") as t:
45
+ t.write(data)
46
+ t.flush()
47
+ with sqlite3.connect(t.name) as sqlite3db:
48
+ yield from Sqlite3Scanner.__walk(sqlite3db)
49
+ elif "nt" == os.name:
50
+ # windows trick. TODO: remove when 3.10 will deprecate
51
+ with tempfile.NamedTemporaryFile(delete=False, suffix=".sqlite") as t:
52
+ t.write(data)
53
+ t.flush()
54
+ sqlite3db = sqlite3.connect(t.name)
55
+ yield from Sqlite3Scanner.__walk(sqlite3db)
56
+ sqlite3db.close()
57
+ if os.path.exists(t.name):
58
+ os.remove(t.name)
59
+
60
+ def data_scan(
61
+ self, #
62
+ data_provider: DataContentProvider, #
63
+ depth: int, #
64
+ recursive_limit_size: int) -> Optional[List[Candidate]]:
65
+ """Extracts data file from .ar (debian) archive and launches data_scan"""
66
+ try:
67
+ candidates: List[Candidate] = []
68
+ new_limit = recursive_limit_size - len(data_provider.data)
69
+ for table, row in self.walk_sqlite(data_provider.data):
70
+ struct_content_provider = StructContentProvider(struct=row,
71
+ file_path=data_provider.file_path,
72
+ file_type=data_provider.file_type,
73
+ info=f"{data_provider.info}|SQLite3.{table}")
74
+ if new_candidates := self.structure_scan(struct_content_provider, depth, new_limit):
75
+ candidates.extend(new_candidates)
76
+ return candidates
77
+ except Exception as exc:
78
+ logger.error(exc)
79
+ return None