credsweeper 1.11.4__py3-none-any.whl → 1.11.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of credsweeper might be problematic. Click here for more details.

Files changed (138) hide show
  1. credsweeper/__init__.py +21 -15
  2. credsweeper/__main__.py +141 -35
  3. credsweeper/app.py +11 -11
  4. credsweeper/config/__init__.py +0 -1
  5. credsweeper/config/config.py +1 -1
  6. credsweeper/credentials/__init__.py +0 -5
  7. credsweeper/credentials/augment_candidates.py +1 -1
  8. credsweeper/credentials/candidate.py +1 -1
  9. credsweeper/credentials/credential_manager.py +1 -1
  10. credsweeper/credentials/line_data.py +2 -2
  11. credsweeper/deep_scanner/__init__.py +0 -1
  12. credsweeper/deep_scanner/abstract_scanner.py +272 -17
  13. credsweeper/deep_scanner/byte_scanner.py +1 -1
  14. credsweeper/deep_scanner/bzip2_scanner.py +2 -2
  15. credsweeper/deep_scanner/deb_scanner.py +34 -27
  16. credsweeper/deep_scanner/deep_scanner.py +37 -250
  17. credsweeper/deep_scanner/docx_scanner.py +1 -1
  18. credsweeper/deep_scanner/eml_scanner.py +1 -1
  19. credsweeper/deep_scanner/encoder_scanner.py +1 -1
  20. credsweeper/deep_scanner/gzip_scanner.py +2 -2
  21. credsweeper/deep_scanner/html_scanner.py +1 -1
  22. credsweeper/deep_scanner/jclass_scanner.py +74 -0
  23. credsweeper/deep_scanner/jks_scanner.py +1 -1
  24. credsweeper/deep_scanner/lang_scanner.py +1 -1
  25. credsweeper/deep_scanner/lzma_scanner.py +2 -2
  26. credsweeper/deep_scanner/mxfile_scanner.py +1 -1
  27. credsweeper/deep_scanner/patch_scanner.py +48 -0
  28. credsweeper/deep_scanner/pdf_scanner.py +1 -1
  29. credsweeper/deep_scanner/pkcs_scanner.py +41 -0
  30. credsweeper/deep_scanner/pptx_scanner.py +1 -1
  31. credsweeper/deep_scanner/rpm_scanner.py +49 -0
  32. credsweeper/deep_scanner/sqlite3_scanner.py +79 -0
  33. credsweeper/deep_scanner/tar_scanner.py +2 -2
  34. credsweeper/deep_scanner/tmx_scanner.py +2 -2
  35. credsweeper/deep_scanner/xlsx_scanner.py +2 -2
  36. credsweeper/deep_scanner/xml_scanner.py +1 -1
  37. credsweeper/deep_scanner/zip_scanner.py +2 -2
  38. credsweeper/file_handler/__init__.py +0 -15
  39. credsweeper/file_handler/abstract_provider.py +3 -4
  40. credsweeper/file_handler/byte_content_provider.py +1 -1
  41. credsweeper/file_handler/content_provider.py +1 -1
  42. credsweeper/file_handler/data_content_provider.py +2 -3
  43. credsweeper/file_handler/diff_content_provider.py +133 -3
  44. credsweeper/file_handler/file_path_extractor.py +2 -2
  45. credsweeper/file_handler/files_provider.py +4 -4
  46. credsweeper/file_handler/patches_provider.py +10 -8
  47. credsweeper/file_handler/text_content_provider.py +1 -1
  48. credsweeper/filters/__init__.py +2 -2
  49. credsweeper/filters/filter.py +2 -2
  50. credsweeper/filters/group/__init__.py +0 -2
  51. credsweeper/filters/group/general_keyword.py +2 -2
  52. credsweeper/filters/group/general_pattern.py +2 -2
  53. credsweeper/filters/group/group.py +16 -5
  54. credsweeper/filters/group/password_keyword.py +2 -2
  55. credsweeper/filters/group/token_pattern.py +2 -2
  56. credsweeper/filters/group/url_credentials_group.py +2 -2
  57. credsweeper/filters/group/weird_base36_token.py +2 -2
  58. credsweeper/filters/group/weird_base64_token.py +2 -2
  59. credsweeper/filters/line_git_binary_check.py +3 -3
  60. credsweeper/filters/line_specific_key_check.py +4 -4
  61. credsweeper/filters/line_uue_part_check.py +3 -3
  62. credsweeper/filters/value_allowlist_check.py +4 -4
  63. credsweeper/filters/value_array_dictionary_check.py +3 -3
  64. credsweeper/filters/value_atlassian_token_check.py +4 -4
  65. credsweeper/filters/value_azure_token_check.py +4 -4
  66. credsweeper/filters/value_base32_data_check.py +4 -4
  67. credsweeper/filters/value_base64_data_check.py +4 -4
  68. credsweeper/filters/value_base64_encoded_pem_check.py +4 -4
  69. credsweeper/filters/value_base64_key_check.py +13 -18
  70. credsweeper/filters/value_base64_part_check.py +4 -4
  71. credsweeper/filters/value_basic_auth_check.py +36 -0
  72. credsweeper/filters/value_blocklist_check.py +3 -3
  73. credsweeper/filters/value_camel_case_check.py +4 -4
  74. credsweeper/filters/value_couple_keyword_check.py +3 -3
  75. credsweeper/filters/value_dictionary_keyword_check.py +3 -3
  76. credsweeper/filters/value_dictionary_value_length_check.py +3 -3
  77. credsweeper/filters/value_discord_bot_check.py +4 -4
  78. credsweeper/filters/value_entropy_base_check.py +4 -4
  79. credsweeper/filters/value_file_path_check.py +5 -4
  80. credsweeper/filters/value_github_check.py +3 -3
  81. credsweeper/filters/value_grafana_check.py +4 -4
  82. credsweeper/filters/value_grafana_service_check.py +3 -3
  83. credsweeper/filters/value_hex_number_check.py +3 -3
  84. credsweeper/filters/value_jfrog_token_check.py +4 -4
  85. credsweeper/filters/value_json_web_key_check.py +37 -0
  86. credsweeper/filters/value_json_web_token_check.py +4 -4
  87. credsweeper/filters/value_last_word_check.py +3 -3
  88. credsweeper/filters/value_method_check.py +3 -3
  89. credsweeper/filters/value_not_allowed_pattern_check.py +4 -4
  90. credsweeper/filters/value_not_part_encoded_check.py +3 -3
  91. credsweeper/filters/value_number_check.py +3 -3
  92. credsweeper/filters/value_pattern_check.py +3 -3
  93. credsweeper/filters/value_similarity_check.py +3 -3
  94. credsweeper/filters/value_split_keyword_check.py +3 -3
  95. credsweeper/filters/value_string_type_check.py +3 -3
  96. credsweeper/filters/value_token_base_check.py +3 -3
  97. credsweeper/filters/value_token_check.py +3 -3
  98. credsweeper/logger/__init__.py +0 -1
  99. credsweeper/logger/logger.py +1 -1
  100. credsweeper/ml_model/__init__.py +0 -1
  101. credsweeper/ml_model/features/entropy_evaluation.py +1 -1
  102. credsweeper/ml_model/features/feature.py +1 -1
  103. credsweeper/ml_model/features/file_extension.py +1 -1
  104. credsweeper/ml_model/features/has_html_tag.py +2 -2
  105. credsweeper/ml_model/features/is_secret_numeric.py +1 -1
  106. credsweeper/ml_model/features/length_of_attribute.py +1 -1
  107. credsweeper/ml_model/features/morpheme_dense.py +1 -1
  108. credsweeper/ml_model/features/rule_name.py +1 -1
  109. credsweeper/ml_model/features/search_in_attribute.py +1 -1
  110. credsweeper/ml_model/features/word_in.py +1 -1
  111. credsweeper/ml_model/features/word_in_path.py +1 -1
  112. credsweeper/ml_model/features/word_in_postamble.py +1 -1
  113. credsweeper/ml_model/features/word_in_preamble.py +1 -1
  114. credsweeper/ml_model/features/word_in_transition.py +1 -1
  115. credsweeper/ml_model/features/word_in_value.py +1 -1
  116. credsweeper/ml_model/features/word_in_variable.py +1 -1
  117. credsweeper/ml_model/ml_validator.py +3 -2
  118. credsweeper/rules/__init__.py +0 -1
  119. credsweeper/rules/config.yaml +114 -25
  120. credsweeper/rules/rule.py +4 -3
  121. credsweeper/scanner/__init__.py +0 -1
  122. credsweeper/scanner/scan_type/__init__.py +0 -5
  123. credsweeper/scanner/scan_type/multi_pattern.py +5 -6
  124. credsweeper/scanner/scan_type/pem_key_pattern.py +4 -4
  125. credsweeper/scanner/scan_type/scan_type.py +4 -4
  126. credsweeper/scanner/scan_type/single_pattern.py +4 -4
  127. credsweeper/scanner/scanner.py +8 -5
  128. credsweeper/secret/config.json +6 -6
  129. credsweeper/utils/__init__.py +0 -1
  130. credsweeper/utils/pem_key_detector.py +5 -5
  131. credsweeper/utils/util.py +143 -206
  132. {credsweeper-1.11.4.dist-info → credsweeper-1.11.6.dist-info}/METADATA +3 -6
  133. credsweeper-1.11.6.dist-info/RECORD +160 -0
  134. credsweeper/deep_scanner/pkcs12_scanner.py +0 -45
  135. credsweeper-1.11.4.dist-info/RECORD +0 -154
  136. {credsweeper-1.11.4.dist-info → credsweeper-1.11.6.dist-info}/WHEEL +0 -0
  137. {credsweeper-1.11.4.dist-info → credsweeper-1.11.6.dist-info}/entry_points.txt +0 -0
  138. {credsweeper-1.11.4.dist-info → credsweeper-1.11.6.dist-info}/licenses/LICENSE +0 -0
@@ -5,9 +5,13 @@
5
5
  ".aar",
6
6
  ".apk",
7
7
  ".bz2",
8
+ ".class",
8
9
  ".gz",
10
+ ".jar",
9
11
  ".lzma",
12
+ ".rpm",
10
13
  ".tar",
14
+ ".war",
11
15
  ".xz",
12
16
  ".zip"
13
17
  ],
@@ -28,7 +32,6 @@
28
32
  ".avi",
29
33
  ".bin",
30
34
  ".bmp",
31
- ".class",
32
35
  ".css",
33
36
  ".dmg",
34
37
  ".ear",
@@ -40,7 +43,6 @@
40
43
  ".ico",
41
44
  ".img",
42
45
  ".info",
43
- ".jar",
44
46
  ".jpeg",
45
47
  ".jpg",
46
48
  ".map",
@@ -62,10 +64,8 @@
62
64
  ".rar",
63
65
  ".rc",
64
66
  ".rc2",
65
- ".rar",
66
67
  ".realm",
67
68
  ".res",
68
- ".rpm",
69
69
  ".s7z",
70
70
  ".scss",
71
71
  ".so",
@@ -76,7 +76,6 @@
76
76
  ".ttf",
77
77
  ".vcxproj",
78
78
  ".vdproj",
79
- ".war",
80
79
  ".wav",
81
80
  ".webm",
82
81
  ".webp",
@@ -161,7 +160,8 @@
161
160
  "bruteforce_list": [
162
161
  "",
163
162
  "changeit",
164
- "changeme"
163
+ "changeme",
164
+ "tizen"
165
165
  ],
166
166
  "check_for_literals": true,
167
167
  "min_pattern_value_length": 12,
@@ -1 +0,0 @@
1
- from credsweeper.utils.util import DiffRowData, Util, DiffDict
@@ -4,11 +4,11 @@ import re
4
4
  import string
5
5
  from typing import List
6
6
 
7
- from credsweeper.common.constants import PEM_BEGIN_PATTERN, PEM_END_PATTERN
8
- from credsweeper.config import Config
9
- from credsweeper.credentials import LineData
7
+ from credsweeper.common.constants import PEM_BEGIN_PATTERN, PEM_END_PATTERN, Chars
8
+ from credsweeper.config.config import Config
9
+ from credsweeper.credentials.line_data import LineData
10
10
  from credsweeper.file_handler.analysis_target import AnalysisTarget
11
- from credsweeper.utils import Util
11
+ from credsweeper.utils.util import Util
12
12
 
13
13
  logger = logging.getLogger(__name__)
14
14
 
@@ -17,7 +17,7 @@ ENTROPY_LIMIT_BASE64 = 4.5
17
17
 
18
18
  class PemKeyDetector:
19
19
  """Class to detect PEM PRIVATE keys only"""
20
- base64set = set(string.ascii_uppercase) | set(string.ascii_lowercase) | set(string.digits) | {'+', '/', '='}
20
+ base64set = set(Chars.BASE64STDPAD_CHARS.value)
21
21
 
22
22
  ignore_starts = [PEM_BEGIN_PATTERN, "Proc-Type", "Version", "DEK-Info"]
23
23
  wrap_characters = "\\'\";,[]#*!"
credsweeper/utils/util.py CHANGED
@@ -1,46 +1,38 @@
1
1
  import ast
2
2
  import base64
3
+ import contextlib
3
4
  import json
4
5
  import logging
5
6
  import math
6
7
  import os
8
+ import random
7
9
  import re
8
10
  import string
9
- import struct
10
11
  import tarfile
11
- from dataclasses import dataclass
12
12
  from pathlib import Path
13
13
  from typing import Any, Dict, List, Tuple, Optional, Union
14
14
 
15
15
  import numpy as np
16
- import whatthepatch
17
16
  import yaml
17
+ from cryptography.hazmat.primitives import hashes
18
+ from cryptography.hazmat.primitives.asymmetric import padding
19
+ from cryptography.hazmat.primitives.asymmetric.dh import DHPrivateKey, DHPublicKey
20
+ from cryptography.hazmat.primitives.asymmetric.dsa import DSAPrivateKey, DSAPublicKey
21
+ from cryptography.hazmat.primitives.asymmetric.ec import EllipticCurvePrivateKey, EllipticCurvePublicKey
22
+ from cryptography.hazmat.primitives.asymmetric.ed25519 import Ed25519PrivateKey, Ed25519PublicKey
23
+ from cryptography.hazmat.primitives.asymmetric.ed448 import Ed448PrivateKey, Ed448PublicKey
24
+ from cryptography.hazmat.primitives.asymmetric.types import PrivateKeyTypes
25
+ from cryptography.hazmat.primitives.asymmetric.x25519 import X25519PublicKey, X25519PrivateKey
26
+ from cryptography.hazmat.primitives.asymmetric.x448 import X448PublicKey, X448PrivateKey
27
+ from cryptography.hazmat.primitives.serialization import load_der_private_key
28
+ from cryptography.hazmat.primitives.serialization.pkcs12 import load_key_and_certificates
18
29
  from lxml import etree
19
- from typing_extensions import TypedDict
20
30
 
21
- from credsweeper.common.constants import DiffRowType, AVAILABLE_ENCODINGS, \
31
+ from credsweeper.common.constants import AVAILABLE_ENCODINGS, \
22
32
  DEFAULT_ENCODING, LATIN_1, CHUNK_SIZE, MAX_LINE_LENGTH, CHUNK_STEP_SIZE, ASCII
23
33
 
24
34
  logger = logging.getLogger(__name__)
25
35
 
26
- DiffDict = TypedDict(
27
- "DiffDict",
28
- {
29
- "old": Optional[int], #
30
- "new": Optional[int], #
31
- "line": Union[str, bytes], # bytes are possibly since whatthepatch v1.0.4
32
- "hunk": Any # not used
33
- })
34
-
35
-
36
- @dataclass(frozen=True)
37
- class DiffRowData:
38
- """Class for keeping data of diff row."""
39
-
40
- line_type: DiffRowType
41
- line_numb: int
42
- line: str
43
-
44
36
 
45
37
  class Util:
46
38
  """Class that contains different useful methods."""
@@ -152,11 +144,10 @@ class Util:
152
144
  @staticmethod
153
145
  def is_known(data: Union[bytes, bytearray]) -> bool:
154
146
  """Returns True if any known binary format is found to prevent extra scan a file without an extension."""
155
- if isinstance(data, (bytes, bytearray)):
156
- if 127 <= len(data) and data.startswith(b"\x7f\x45\x4c\x46"):
157
- # https://en.wikipedia.org/wiki/Executable_and_Linkable_Format
158
- # minimal ELF is 127 bytes https://github.com/tchajed/minimal-elf
159
- return True
147
+ if isinstance(data, (bytes, bytearray)) and data.startswith(b"\x7f\x45\x4c\x46") and 127 <= len(data):
148
+ # https://en.wikipedia.org/wiki/Executable_and_Linkable_Format
149
+ # minimal ELF is 127 bytes https://github.com/tchajed/minimal-elf
150
+ return True
160
151
  return False
161
152
 
162
153
  @staticmethod
@@ -165,10 +156,9 @@ class Util:
165
156
  Returns True when two zeroes sequence is found in begin of data.
166
157
  The sequence never exists in text format (UTF-8, UTF-16). UTF-32 is not supported.
167
158
  """
168
- if 0 <= data.find(b"\0\0", 0, MAX_LINE_LENGTH):
159
+ if isinstance(data, (bytes, bytearray)) and 0 <= data.find(b"\0\0", 0, MAX_LINE_LENGTH):
169
160
  return True
170
- else:
171
- return False
161
+ return False
172
162
 
173
163
  NOT_LATIN1_PRINTABLE_SET = set(range(0, 256)) \
174
164
  .difference(set(x for x in string.printable.encode(ASCII))) \
@@ -182,7 +172,7 @@ class Util:
182
172
  non_latin1_cnt = sum(1 for x in data[:MAX_LINE_LENGTH] if x in Util.NOT_LATIN1_PRINTABLE_SET)
183
173
  # experiment for 255217 binary files shown avg = 0.268264 ± 0.168767, so let choose minimal
184
174
  chunk_len = min(MAX_LINE_LENGTH, len(data))
185
- result = 0.1 > non_latin1_cnt / chunk_len
175
+ result = bool(0.1 > non_latin1_cnt / chunk_len)
186
176
  return result
187
177
 
188
178
  @staticmethod
@@ -267,138 +257,33 @@ class Util:
267
257
  return lines
268
258
 
269
259
  @staticmethod
270
- def patch2files_diff(raw_patch: List[str], change_type: DiffRowType) -> Dict[str, List[DiffDict]]:
271
- """Generate files changes from patch for added or deleted filepaths.
272
-
273
- Args:
274
- raw_patch: git patch file content
275
- change_type: change type to select, DiffRowType.ADDED or DiffRowType.DELETED
276
-
277
- Return:
278
- return dict with ``{file paths: list of file row changes}``, where
279
- elements of list of file row changes represented as::
280
-
281
- {
282
- "old": line number before diff,
283
- "new": line number after diff,
284
- "line": line text,
285
- "hunk": diff hunk number
286
- }
287
-
288
- """
289
- if not raw_patch:
290
- return {}
291
-
292
- added_files, deleted_files = {}, {}
293
- try:
294
- for patch in whatthepatch.parse_patch(raw_patch):
295
- if patch.changes is None:
296
- logger.warning(f"Patch '{str(patch.header)}' cannot be scanned")
297
- continue
298
- changes = []
299
- for change in patch.changes:
300
- change_dict = change._asdict()
301
- changes.append(change_dict)
302
-
303
- added_files[patch.header.new_path] = changes
304
- deleted_files[patch.header.old_path] = changes
305
- if change_type == DiffRowType.ADDED:
306
- return added_files
307
- elif change_type == DiffRowType.DELETED:
308
- return deleted_files
309
- else:
310
- logger.error(f"Change type should be one of: '{DiffRowType.ADDED}', '{DiffRowType.DELETED}';"
311
- f" but received {change_type}")
312
- except Exception as exc:
313
- logger.exception(exc)
314
- return {}
315
-
316
- @staticmethod
317
- def preprocess_diff_rows(
318
- added_line_number: Optional[int], #
319
- deleted_line_number: Optional[int], #
320
- line: str) -> List[DiffRowData]:
321
- """Auxiliary function to extend diff changes.
322
-
323
- Args:
324
- added_line_number: number of added line or None
325
- deleted_line_number: number of deleted line or None
326
- line: the text line
327
-
328
- Return:
329
- diff rows data with as list of row change type, line number, row content
330
-
331
- """
332
- rows_data: List[DiffRowData] = []
333
- if isinstance(added_line_number, int):
334
- # indicates line was inserted
335
- rows_data.append(DiffRowData(DiffRowType.ADDED, added_line_number, line))
336
- if isinstance(deleted_line_number, int):
337
- # indicates line was removed
338
- rows_data.append(DiffRowData(DiffRowType.DELETED, deleted_line_number, line))
339
- return rows_data
340
-
341
- @staticmethod
342
- def wrong_change(change: DiffDict) -> bool:
343
- """Returns True if the change is wrong"""
344
- for i in ["line", "new", "old"]:
345
- if i not in change:
346
- logger.error(f"Skipping wrong change {change}")
260
+ def is_zip(data: Union[bytes, bytearray]) -> bool:
261
+ """According https://en.wikipedia.org/wiki/List_of_file_signatures"""
262
+ if isinstance(data, (bytes, bytearray)) and data.startswith(b"PK") and 4 <= len(data):
263
+ if 0x03 == data[2] and 0x04 == data[3]:
264
+ # normal PK
347
265
  return True
266
+ elif 0x05 == data[2] and 0x06 == data[3]:
267
+ # empty archive - no sense to scan in other scanners, so let it be a zip
268
+ return True
269
+ elif 0x07 == data[2] and 0x08 == data[3]:
270
+ # spanned archive - NOT SUPPORTED
271
+ return False
348
272
  return False
349
273
 
350
274
  @staticmethod
351
- def preprocess_file_diff(changes: List[DiffDict]) -> List[DiffRowData]:
352
- """Generate changed file rows from diff data with changed lines (e.g. marked + or - in diff).
353
-
354
- Args:
355
- changes: git diff by file rows data
356
-
357
- Return:
358
- diff rows data with as list of row change type, line number, row content
359
-
360
- """
361
- if not changes:
362
- return []
363
-
364
- rows_data = []
365
- # process diff to restore lines and their positions
366
- for change in changes:
367
- if Util.wrong_change(change):
368
- continue
369
- line = change["line"]
370
- if isinstance(line, str):
371
- rows_data.extend(Util.preprocess_diff_rows(change.get("new"), change.get("old"), line))
372
- elif isinstance(line, (bytes, bytearray)):
373
- logger.warning("The feature is available with the deep scan option")
374
- else:
375
- logger.error(f"Unknown type of line {type(line)}")
376
-
377
- return rows_data
378
-
379
- @staticmethod
380
- def is_zip(data: Union[bytes, bytearray]) -> bool:
275
+ def is_com(data: Union[bytes, bytearray]) -> bool:
381
276
  """According https://en.wikipedia.org/wiki/List_of_file_signatures"""
382
- if isinstance(data, (bytes, bytearray)) and 3 < len(data):
383
- # PK
384
- if data.startswith(b"PK"):
385
- if 0x03 == data[2] and 0x04 == data[3]:
386
- return True
387
- # empty archive - no sense to scan
388
- elif 0x05 == data[2] and 0x06 == data[3]:
389
- return True
390
- # spanned archive - NOT SUPPORTED
391
- elif 0x07 == data[2] and 0x08 == data[3]:
392
- return False
277
+ if isinstance(data, (bytes, bytearray)) and data.startswith(b"\xD0\xCF\x11\xE0\xA1\xB1\x1A\xE1"):
278
+ # Compound File Binary Format: doc, xls, ppt, msi, msg
279
+ return True
393
280
  return False
394
281
 
395
282
  @staticmethod
396
- def is_com(data: Union[bytes, bytearray]) -> bool:
283
+ def is_rpm(data: Union[bytes, bytearray]) -> bool:
397
284
  """According https://en.wikipedia.org/wiki/List_of_file_signatures"""
398
- if isinstance(data, (bytes, bytearray)) and 8 < len(data):
399
- if data.startswith(b"\xD0\xCF\x11\xE0\xA1\xB1\x1A\xE1"):
400
- # Compound File Binary Format: doc, xls, ppt, msi, msg
401
- return True
285
+ if isinstance(data, (bytes, bytearray)) and data.startswith(b"\xED\xAB\xEE\xDB"):
286
+ return True
402
287
  return False
403
288
 
404
289
  @staticmethod
@@ -411,88 +296,105 @@ class Util:
411
296
  or
412
297
  0x20 == data[262] and 0x20 == data[263] and 0x00 == data[264]
413
298
  ):
414
- try:
299
+ with contextlib.suppress(Exception):
415
300
  chksum = tarfile.nti(data[148:156]) # type: ignore
416
301
  unsigned_chksum, signed_chksum = tarfile.calc_chksums(data) # type: ignore
417
- return bool(chksum == unsigned_chksum or chksum == signed_chksum)
418
- except Exception as exc:
419
- logger.exception(f"Corrupted TAR ? {exc}")
302
+ if chksum == unsigned_chksum or chksum == signed_chksum:
303
+ return True
420
304
  return False
421
305
 
422
306
  @staticmethod
423
307
  def is_deb(data: Union[bytes, bytearray]) -> bool:
424
308
  """According https://en.wikipedia.org/wiki/Deb_(file_format)"""
425
- if isinstance(data, (bytes, bytearray)) and 512 <= len(data) and data.startswith(b"!<arch>\n"):
309
+ if isinstance(data, (bytes, bytearray)) and data.startswith(b"!<arch>\n"):
426
310
  return True
427
311
  return False
428
312
 
429
313
  @staticmethod
430
314
  def is_bzip2(data: Union[bytes, bytearray]) -> bool:
431
315
  """According https://en.wikipedia.org/wiki/Bzip2"""
432
- if isinstance(data, (bytes, bytearray)) and 10 <= len(data):
433
- if data.startswith(b"\x42\x5A\x68") \
434
- and 0x31 <= data[3] <= 0x39 \
435
- and 0x31 == data[4] and 0x41 == data[5] and 0x59 == data[6] \
436
- and 0x26 == data[7] and 0x53 == data[8] and 0x59 == data[9]:
437
- return True
316
+ if isinstance(data, (bytes, bytearray)) and data.startswith(b"\x42\x5A\x68") and 10 <= len(data) \
317
+ and 0x31 <= data[3] <= 0x39 \
318
+ and 0x31 == data[4] and 0x41 == data[5] and 0x59 == data[6] \
319
+ and 0x26 == data[7] and 0x53 == data[8] and 0x59 == data[9]:
320
+ return True
438
321
  return False
439
322
 
440
323
  @staticmethod
441
324
  def is_gzip(data: Union[bytes, bytearray]) -> bool:
442
325
  """According https://www.rfc-editor.org/rfc/rfc1952"""
443
- if isinstance(data, (bytes, bytearray)) and 3 <= len(data):
444
- if data.startswith(b"\x1F\x8B\x08"):
445
- return True
326
+ if isinstance(data, (bytes, bytearray)) and data.startswith(b"\x1F\x8B\x08"):
327
+ return True
446
328
  return False
447
329
 
448
330
  @staticmethod
449
331
  def is_pdf(data: Union[bytes, bytearray]) -> bool:
450
332
  """According https://en.wikipedia.org/wiki/List_of_file_signatures - pdf"""
451
- if isinstance(data, (bytes, bytearray)) and 5 <= len(data):
452
- if data.startswith(b"\x25\x50\x44\x46\x2D"):
453
- return True
333
+ if isinstance(data, (bytes, bytearray)) and data.startswith(b"%PDF-"):
334
+ return True
335
+ return False
336
+
337
+ @staticmethod
338
+ def is_jclass(data: Union[bytes, bytearray]) -> bool:
339
+ """According https://en.wikipedia.org/wiki/List_of_file_signatures - java class"""
340
+ if isinstance(data, (bytes, bytearray)) and data.startswith(b"\xCA\xFE\xBA\xBE"):
341
+ return True
454
342
  return False
455
343
 
456
344
  @staticmethod
457
345
  def is_jks(data: Union[bytes, bytearray]) -> bool:
458
346
  """According https://en.wikipedia.org/wiki/List_of_file_signatures - jks"""
459
- if isinstance(data, (bytes, bytearray)) and 4 <= len(data):
460
- if data.startswith(b"\xFE\xED\xFE\xED"):
461
- return True
347
+ if isinstance(data, (bytes, bytearray)) and data.startswith(b"\xFE\xED\xFE\xED"):
348
+ return True
462
349
  return False
463
350
 
464
351
  @staticmethod
465
352
  def is_lzma(data: Union[bytes, bytearray]) -> bool:
466
353
  """According https://en.wikipedia.org/wiki/List_of_file_signatures - lzma also xz"""
467
- if isinstance(data, (bytes, bytearray)) and 6 <= len(data):
468
- if data.startswith((b"\xFD\x37\x7A\x58\x5A\x00", b"\x5D\x00\x00")):
469
- return True
354
+ if isinstance(data, (bytes, bytearray)) and data.startswith((b"\xFD7zXZ\x00", b"\x5D\x00\x00")):
355
+ return True
356
+ return False
357
+
358
+ @classmethod
359
+ def is_sqlite3(cls, data):
360
+ """According https://en.wikipedia.org/wiki/List_of_file_signatures - SQLite Database"""
361
+ if isinstance(data, (bytes, bytearray)) and data.startswith(b"SQLite format 3\0"):
362
+ return True
470
363
  return False
471
364
 
472
365
  @staticmethod
473
- def is_asn1(data: Union[bytes, bytearray]) -> bool:
474
- """Only sequence type 0x30 and size correctness is checked"""
475
- if isinstance(data, (bytes, bytearray)) and 4 <= len(data):
476
- # sequence
477
- if 0x30 == data[0]:
478
- # https://www.oss.com/asn1/resources/asn1-made-simple/asn1-quick-reference/basic-encoding-rules.html#Lengths
479
- length = data[1]
366
+ def is_asn1(data: Union[bytes, bytearray]) -> int:
367
+ """Only sequence type 0x30 and size correctness are checked
368
+ Returns size of ASN1 data over 128 bytes or 0 if no interested data
369
+ """
370
+ if isinstance(data, (bytes, bytearray)) and 2 <= len(data) and 0x30 == data[0]:
371
+ # https://www.oss.com/asn1/resources/asn1-made-simple/asn1-quick-reference/basic-encoding-rules.html#Lengths
372
+ length = data[1]
373
+ if 0x80 == length:
374
+ if data.endswith(b"\x00\x00"):
375
+ # assume, all data are ASN1 of various size
376
+ return len(data)
377
+ else:
378
+ # skip the case where the ASN1 size is smaller than the actual data
379
+ return 0
380
+ elif 0x80 < length:
480
381
  byte_len = 0x7F & length
481
- if 0x80 == length and data.endswith(b"\x00\x00"):
482
- return True
483
- elif 0x80 < length and 1 < byte_len < len(data): # additional check
484
- len_bytes = data[2:2 + byte_len]
485
- try:
486
- long_size = struct.unpack(">h", len_bytes)
487
- except struct.error:
488
- long_size = (-1,) # yapf: disable
489
- length = long_size[0]
490
- elif 0x80 < length and 1 == byte_len: # small size
491
- length = data[2]
382
+ len_limit = 2 + byte_len
383
+ if 4 >= byte_len and len(data) >= len_limit:
384
+ length = 0
385
+ for i in range(2, len_limit):
386
+ length <<= 8
387
+ length |= data[i]
388
+ if len(data) >= length + len_limit:
389
+ return length + len_limit
492
390
  else:
493
- byte_len = 0
494
- return len(data) == length + 2 + byte_len
495
- return False
391
+ # unsupported huge size
392
+ return 0
393
+ else:
394
+ # less than 0x80
395
+ if len(data) >= length + 2:
396
+ return length + 2
397
+ return 0
496
398
 
497
399
  @staticmethod
498
400
  def is_html(data: Union[bytes, bytearray]) -> bool:
@@ -547,12 +449,12 @@ class Util:
547
449
  @staticmethod
548
450
  def is_eml(data: Union[bytes, bytearray]) -> bool:
549
451
  """According to https://datatracker.ietf.org/doc/html/rfc822 lookup the fields: Date, From, To or Subject"""
550
- if isinstance(data, (bytes, bytearray)):
551
- if (b"\nDate:" in data or data.startswith(b"Date:")) \
552
- and (b"\nFrom:" in data or data.startswith(b"From:")) \
553
- and (b"\nTo:" in data or data.startswith(b"To:")) \
554
- and (b"\nSubject:" in data or data.startswith(b"Subject:")):
555
- return True
452
+ if isinstance(data, (bytes, bytearray)) \
453
+ and (b"\nDate:" in data or data.startswith(b"Date:")) \
454
+ and (b"\nFrom:" in data or data.startswith(b"From:")) \
455
+ and (b"\nTo:" in data or data.startswith(b"To:")) \
456
+ and (b"\nSubject:" in data or data.startswith(b"Subject:")):
457
+ return True
556
458
  return False
557
459
 
558
460
  @staticmethod
@@ -665,10 +567,13 @@ class Util:
665
567
  result = ast.unparse(src).splitlines()
666
568
  return result
667
569
 
570
+ PEM_CLEANING_PATTERN = re.compile(r"\\[tnrvf]")
571
+ WHITESPACE_TRANS_TABLE = str.maketrans('', '', string.whitespace)
572
+
668
573
  @staticmethod
669
574
  def decode_base64(text: str, padding_safe: bool = False, urlsafe_detect=False) -> bytes:
670
575
  """decode text to bytes with / without padding detect and urlsafe symbols"""
671
- value = text
576
+ value = text.translate(Util.WHITESPACE_TRANS_TABLE)
672
577
  if padding_safe:
673
578
  pad_num = 0x3 & len(value)
674
579
  if pad_num:
@@ -679,6 +584,38 @@ class Util:
679
584
  decoded = base64.b64decode(value, validate=True)
680
585
  return decoded
681
586
 
587
+ @staticmethod
588
+ def load_pk(data: bytes, password: Optional[bytes] = None) -> Optional[PrivateKeyTypes]:
589
+ """Try to load private key from PKCS1, PKCS8 and PKCS12 formats"""
590
+ with contextlib.suppress(Exception):
591
+ # PKCS1, PKCS8 probes
592
+ private_key = load_der_private_key(data, password)
593
+ return private_key
594
+ with contextlib.suppress(Exception):
595
+ # PKCS12 probe
596
+ private_key, _certificate, _additional_certificates = load_key_and_certificates(data, password)
597
+ return private_key
598
+ return None
599
+
600
+ RANDOM_DATA = random.randbytes(20)
601
+
602
+ @staticmethod
603
+ def check_pk(pkey: PrivateKeyTypes) -> bool:
604
+ """Check private key with encrypt-decrypt random data"""
605
+ if isinstance(pkey, (EllipticCurvePrivateKey, DSAPrivateKey, Ed448PrivateKey, Ed25519PrivateKey, DHPrivateKey,
606
+ X448PrivateKey, X25519PrivateKey)):
607
+ # One does not simply perform check the keys
608
+ return True
609
+ if isinstance(pkey, (EllipticCurvePublicKey, DSAPublicKey, Ed448PublicKey, Ed25519PublicKey, DHPublicKey,
610
+ X448PublicKey, X25519PublicKey)) or not pkey:
611
+ # These aren't the keys we're looking for
612
+ return False
613
+ # DSA, RSA
614
+ pd = padding.OAEP(mgf=padding.MGF1(algorithm=hashes.SHA1()), algorithm=hashes.SHA1(), label=None)
615
+ ciphertext = pkey.public_key().encrypt(Util.RANDOM_DATA, padding=pd)
616
+ refurb = pkey.decrypt(ciphertext, padding=pd)
617
+ return bool(refurb == Util.RANDOM_DATA)
618
+
682
619
  @staticmethod
683
620
  def get_chunks(line_len: int) -> List[Tuple[int, int]]:
684
621
  """Returns chunks positions for given line length"""
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: credsweeper
3
- Version: 1.11.4
3
+ Version: 1.11.6
4
4
  Summary: Credential Sweeper
5
5
  Project-URL: Homepage, https://github.com/Samsung/CredSweeper
6
6
  Project-URL: Bug Tracker, https://github.com/Samsung/CredSweeper/issues
@@ -37,6 +37,7 @@ Requires-Dist: python-dateutil
37
37
  Requires-Dist: python-docx
38
38
  Requires-Dist: python-pptx
39
39
  Requires-Dist: pyyaml
40
+ Requires-Dist: rpmfile
40
41
  Requires-Dist: whatthepatch
41
42
  Requires-Dist: xlrd
42
43
  Description-Content-Type: text/markdown
@@ -140,11 +141,7 @@ cat output.json
140
141
  "value_start": 12,
141
142
  "value_end": 19,
142
143
  "variable": "password",
143
- "entropy_validation": {
144
- "iterator": "BASE64_CHARS",
145
- "entropy": 2.120589933192232,
146
- "valid": false
147
- }
144
+ "entropy": 2.12059
148
145
  }
149
146
  ]
150
147
  }