credsweeper 1.11.2__py3-none-any.whl → 1.11.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of credsweeper might be problematic. Click here for more details.

Files changed (73) hide show
  1. credsweeper/__init__.py +1 -1
  2. credsweeper/__main__.py +7 -5
  3. credsweeper/app.py +28 -47
  4. credsweeper/common/constants.py +2 -5
  5. credsweeper/common/keyword_pattern.py +15 -9
  6. credsweeper/common/morpheme_checklist.txt +4 -2
  7. credsweeper/credentials/candidate_key.py +1 -1
  8. credsweeper/credentials/credential_manager.py +4 -3
  9. credsweeper/credentials/line_data.py +16 -15
  10. credsweeper/deep_scanner/abstract_scanner.py +10 -1
  11. credsweeper/deep_scanner/deb_scanner.py +48 -0
  12. credsweeper/deep_scanner/deep_scanner.py +65 -43
  13. credsweeper/deep_scanner/docx_scanner.py +1 -1
  14. credsweeper/deep_scanner/encoder_scanner.py +2 -2
  15. credsweeper/deep_scanner/gzip_scanner.py +1 -1
  16. credsweeper/deep_scanner/html_scanner.py +3 -3
  17. credsweeper/deep_scanner/jks_scanner.py +2 -4
  18. credsweeper/deep_scanner/lang_scanner.py +2 -2
  19. credsweeper/deep_scanner/lzma_scanner.py +40 -0
  20. credsweeper/deep_scanner/pkcs12_scanner.py +3 -5
  21. credsweeper/deep_scanner/xml_scanner.py +2 -2
  22. credsweeper/file_handler/byte_content_provider.py +2 -2
  23. credsweeper/file_handler/content_provider.py +1 -1
  24. credsweeper/file_handler/data_content_provider.py +23 -14
  25. credsweeper/file_handler/diff_content_provider.py +2 -2
  26. credsweeper/file_handler/file_path_extractor.py +1 -1
  27. credsweeper/file_handler/files_provider.py +2 -4
  28. credsweeper/file_handler/patches_provider.py +1 -1
  29. credsweeper/file_handler/string_content_provider.py +2 -2
  30. credsweeper/file_handler/struct_content_provider.py +1 -1
  31. credsweeper/file_handler/text_content_provider.py +2 -2
  32. credsweeper/filters/value_array_dictionary_check.py +3 -1
  33. credsweeper/filters/value_azure_token_check.py +1 -2
  34. credsweeper/filters/value_base64_encoded_pem_check.py +1 -1
  35. credsweeper/filters/value_base64_part_check.py +30 -21
  36. credsweeper/filters/value_discord_bot_check.py +1 -2
  37. credsweeper/filters/value_entropy_base32_check.py +11 -31
  38. credsweeper/filters/value_entropy_base36_check.py +11 -34
  39. credsweeper/filters/value_entropy_base64_check.py +15 -48
  40. credsweeper/filters/value_entropy_base_check.py +37 -0
  41. credsweeper/filters/value_file_path_check.py +1 -1
  42. credsweeper/filters/value_hex_number_check.py +3 -3
  43. credsweeper/filters/value_json_web_token_check.py +4 -5
  44. credsweeper/filters/value_pattern_check.py +64 -16
  45. credsweeper/filters/value_string_type_check.py +11 -3
  46. credsweeper/filters/value_token_base32_check.py +0 -4
  47. credsweeper/filters/value_token_base36_check.py +0 -4
  48. credsweeper/filters/value_token_base64_check.py +0 -4
  49. credsweeper/filters/value_token_check.py +1 -1
  50. credsweeper/ml_model/features/file_extension.py +2 -2
  51. credsweeper/ml_model/features/morpheme_dense.py +0 -4
  52. credsweeper/ml_model/features/rule_name.py +1 -1
  53. credsweeper/ml_model/features/word_in_path.py +0 -9
  54. credsweeper/ml_model/features/word_in_postamble.py +0 -11
  55. credsweeper/ml_model/features/word_in_preamble.py +0 -11
  56. credsweeper/ml_model/features/word_in_transition.py +0 -11
  57. credsweeper/ml_model/features/word_in_value.py +0 -11
  58. credsweeper/ml_model/features/word_in_variable.py +0 -11
  59. credsweeper/ml_model/ml_validator.py +45 -22
  60. credsweeper/rules/config.yaml +238 -208
  61. credsweeper/rules/rule.py +3 -3
  62. credsweeper/scanner/scan_type/scan_type.py +2 -3
  63. credsweeper/scanner/scanner.py +7 -1
  64. credsweeper/secret/config.json +16 -5
  65. credsweeper/utils/hop_stat.py +3 -3
  66. credsweeper/utils/pem_key_detector.py +8 -7
  67. credsweeper/utils/util.py +76 -146
  68. {credsweeper-1.11.2.dist-info → credsweeper-1.11.4.dist-info}/METADATA +1 -1
  69. {credsweeper-1.11.2.dist-info → credsweeper-1.11.4.dist-info}/RECORD +72 -70
  70. credsweeper/utils/entropy_validator.py +0 -72
  71. {credsweeper-1.11.2.dist-info → credsweeper-1.11.4.dist-info}/WHEEL +0 -0
  72. {credsweeper-1.11.2.dist-info → credsweeper-1.11.4.dist-info}/entry_points.txt +0 -0
  73. {credsweeper-1.11.2.dist-info → credsweeper-1.11.4.dist-info}/licenses/LICENSE +0 -0
@@ -14,7 +14,7 @@ class ValueArrayDictionaryCheck(Filter):
14
14
  `token = {'root'}` would be kept
15
15
  """
16
16
 
17
- PATTERN = re.compile(r"\[('|\")?.+('|\")?\]")
17
+ PATTERN = re.compile(r"\[('|\")?[^,]+('|\")?\]")
18
18
 
19
19
  def __init__(self, config: Config = None) -> None:
20
20
  pass
@@ -32,6 +32,8 @@ class ValueArrayDictionaryCheck(Filter):
32
32
  """
33
33
  if line_data.is_well_quoted_value:
34
34
  return False
35
+ if line_data.wrap and "byte" in line_data.wrap.lower():
36
+ return False
35
37
  if self.PATTERN.search(line_data.value):
36
38
  return True
37
39
  if line_data.wrap and not line_data.is_well_quoted_value and ('[' in line_data.wrap or '(' in line_data.wrap):
@@ -1,7 +1,6 @@
1
1
  import contextlib
2
2
  import json
3
3
 
4
- from credsweeper.common.constants import Chars
5
4
  from credsweeper.config import Config
6
5
  from credsweeper.credentials import LineData
7
6
  from credsweeper.file_handler.analysis_target import AnalysisTarget
@@ -45,7 +44,7 @@ class ValueAzureTokenCheck(Filter):
45
44
  # must be all parts in payload
46
45
  return True
47
46
  min_entropy = ValueEntropyBase64Check.get_min_data_entropy(len(parts[2]))
48
- entropy = Util.get_shannon_entropy(parts[2], Chars.BASE64URL_CHARS.value)
47
+ entropy = Util.get_shannon_entropy(parts[2])
49
48
  # good signature has to be like random bytes
50
49
  return entropy < min_entropy
51
50
 
@@ -30,7 +30,7 @@ class ValueBase64EncodedPem(Filter):
30
30
  with contextlib.suppress(Exception):
31
31
  text = Util.decode_base64(line_data.value, padding_safe=True, urlsafe_detect=True)
32
32
  lines = text.decode(ASCII).splitlines()
33
- lines_pos = [x for x in range(len(lines))]
33
+ lines_pos = list(range(len(lines)))
34
34
  for line_pos, line in zip(lines_pos, lines):
35
35
  if PEM_BEGIN_PATTERN in line:
36
36
  new_target = AnalysisTarget(line_pos, lines, lines_pos, target.descriptor)
@@ -1,6 +1,7 @@
1
1
  import contextlib
2
2
  import re
3
3
  import statistics
4
+ from itertools import takewhile
4
5
 
5
6
  from credsweeper.common.constants import Chars
6
7
  from credsweeper.config import Config
@@ -16,8 +17,8 @@ class ValueBase64PartCheck(Filter):
16
17
  Check that candidate is NOT a part of base64 long line
17
18
  """
18
19
 
19
- base64_pattern = re.compile(r"^(\\{1,8}[0abfnrtv]|[0-9A-Za-z+/=]){1,4000}")
20
- base64_set = set(Chars.BASE64STDPAD_CHARS.value)
20
+ base64_pattern = re.compile(r"^(\\{1,8}[0abfnrtv]|[0-9A-Za-z+/=]){1,4000}$")
21
+ base64_char_set = set(Chars.BASE64STDPAD_CHARS.value + '\\')
21
22
 
22
23
  def __init__(self, config: Config = None) -> None:
23
24
  pass
@@ -64,38 +65,46 @@ class ValueBase64PartCheck(Filter):
64
65
  elif right_end - left_start >= 2 * len_value:
65
66
  # simple analysis for data too large to yield sensible insights
66
67
  part_set = set(line[left_start:right_end])
67
- if not part_set.difference(self.base64_set):
68
+ if not part_set.difference(ValueBase64PartCheck.base64_char_set):
68
69
  # obvious case: all characters are base64 standard
69
70
  return True
70
71
 
71
- left_part = line[left_start:line_data.value_start]
72
- len_left = len(left_part)
73
- right_part = line[line_data.value_end:right_end]
74
- len_right = len(right_part)
72
+ left_part = ''.join(
73
+ takewhile(lambda x: x in ValueBase64PartCheck.base64_char_set,
74
+ reversed(line[left_start:line_data.value_start])))
75
+
76
+ right_part = ''.join(
77
+ takewhile(lambda x: x in ValueBase64PartCheck.base64_char_set, line[line_data.value_end:right_end]))
75
78
 
76
79
  min_entropy_value = ValueEntropyBase64Check.get_min_data_entropy(len_value)
77
- value_entropy = Util.get_shannon_entropy(value, Chars.BASE64STD_CHARS.value)
78
80
 
79
- if ValueEntropyBase64Check.min_length < len_left:
80
- left_entropy = Util.get_shannon_entropy(left_part, Chars.BASE64STD_CHARS.value)
81
- if len_left < len_value:
82
- left_entropy *= len_value / len_left
83
- else:
84
- left_entropy = min_entropy_value
81
+ left_entropy = Util.get_shannon_entropy(left_part)
82
+ value_entropy = Util.get_shannon_entropy(value)
83
+ right_entropy = Util.get_shannon_entropy(right_part)
84
+ common = left_part + value + right_part
85
+ common_entropy = Util.get_shannon_entropy(common)
86
+ min_entropy_common = ValueEntropyBase64Check.get_min_data_entropy(len(common))
87
+ if min_entropy_common < common_entropy:
88
+ return True
85
89
 
86
- if ValueEntropyBase64Check.min_length < len_right:
87
- right_entropy = Util.get_shannon_entropy(right_part, Chars.BASE64STD_CHARS.value)
88
- if len_right < len_value:
89
- left_entropy *= len_right / len_left
90
+ if left_entropy and right_entropy:
91
+ data = [left_entropy, value_entropy, right_entropy, min_entropy_value, common_entropy]
92
+ elif left_entropy and not right_entropy:
93
+ data = [left_entropy, value_entropy, min_entropy_value, min_entropy_value, common_entropy]
94
+ elif not left_entropy and right_entropy:
95
+ data = [value_entropy, right_entropy, min_entropy_value, min_entropy_value, common_entropy]
90
96
  else:
91
- right_entropy = min_entropy_value
97
+ return False
92
98
 
93
- data = [left_entropy, value_entropy, right_entropy, min_entropy_value]
94
99
  avg = statistics.mean(data)
95
100
  stdev = statistics.stdev(data, avg)
96
101
  avg_min = avg - 1.1 * stdev
97
- if avg_min <= left_entropy and avg_min <= right_entropy:
102
+ if (0. == left_entropy or avg_min < left_entropy or left_entropy < value_entropy < right_entropy) \
103
+ and (
104
+ 0. == right_entropy or avg_min < right_entropy or right_entropy < value_entropy < left_entropy):
98
105
  # high entropy of bound parts looks like a part of base64 long line
99
106
  return True
107
+ else:
108
+ return False
100
109
 
101
110
  return False
@@ -1,6 +1,5 @@
1
1
  import contextlib
2
2
 
3
- from credsweeper.common.constants import Chars
4
3
  from credsweeper.config import Config
5
4
  from credsweeper.credentials import LineData
6
5
  from credsweeper.file_handler.analysis_target import AnalysisTarget
@@ -32,7 +31,7 @@ class ValueDiscordBotCheck(Filter):
32
31
  id_part = line_data.value[:dot_separator_index]
33
32
  discord_id = int(Util.decode_base64(id_part, padding_safe=True, urlsafe_detect=True))
34
33
  entropy_part = line_data.value[dot_separator_index:]
35
- entropy = Util.get_shannon_entropy(entropy_part, Chars.BASE64URL_CHARS.value)
34
+ entropy = Util.get_shannon_entropy(entropy_part)
36
35
  min_entropy = ValueEntropyBase64Check.get_min_data_entropy(len(entropy_part))
37
36
  if 1000 <= discord_id and min_entropy <= entropy:
38
37
  return False
@@ -1,42 +1,22 @@
1
1
  import math
2
+ from functools import cache
2
3
 
3
- from credsweeper.common.constants import Chars
4
- from credsweeper.config import Config
5
- from credsweeper.credentials import LineData
6
- from credsweeper.file_handler.analysis_target import AnalysisTarget
7
- from credsweeper.filters import Filter
8
- from credsweeper.utils import Util
4
+ from credsweeper.filters.value_entropy_base_check import ValueEntropyBaseCheck
9
5
 
10
6
 
11
- class ValueEntropyBase32Check(Filter):
12
- """Check that candidate have Shanon Entropy (for [a-z0-9])"""
13
-
14
- def __init__(self, config: Config = None) -> None:
15
- pass
7
+ class ValueEntropyBase32Check(ValueEntropyBaseCheck):
8
+ """Base32 entropy check"""
16
9
 
17
10
  @staticmethod
11
+ @cache
18
12
  def get_min_data_entropy(x: int) -> float:
19
13
  """Returns average entropy for size of random data. Precalculated data is applied for speedup"""
20
- if 16 == x:
21
- y = 3.46
22
- elif 10 <= x:
23
- # approximation does not exceed stdev
24
- y = 0.64 * math.log2(x) + 0.9
14
+ if 8 <= x < 17:
15
+ y = 0.80569236 * math.log2(x) + 0.13439734
16
+ elif 17 <= x < 33:
17
+ y = 0.66350481 * math.log2(x) + 0.71143862
18
+ elif 33 <= x:
19
+ y = 4.04
25
20
  else:
26
21
  y = 0
27
22
  return y
28
-
29
- def run(self, line_data: LineData, target: AnalysisTarget) -> bool:
30
- """Run filter checks on received credential candidate data 'line_data'.
31
-
32
- Args:
33
- line_data: credential candidate data
34
- target: multiline target from which line data was obtained
35
-
36
- Return:
37
- True, if need to filter candidate and False if left
38
-
39
- """
40
- entropy = Util.get_shannon_entropy(line_data.value, Chars.BASE32_CHARS.value)
41
- min_entropy = ValueEntropyBase32Check.get_min_data_entropy(len(line_data.value))
42
- return min_entropy > entropy or 0 == min_entropy
@@ -1,46 +1,23 @@
1
1
  import math
2
+ from functools import cache
2
3
 
3
- from credsweeper.common.constants import Chars
4
- from credsweeper.config import Config
5
- from credsweeper.credentials import LineData
6
- from credsweeper.file_handler.analysis_target import AnalysisTarget
7
- from credsweeper.filters import Filter
8
- from credsweeper.utils import Util
4
+ from credsweeper.filters.value_entropy_base_check import ValueEntropyBaseCheck
9
5
 
10
6
 
11
- class ValueEntropyBase36Check(Filter):
12
- """Check that candidate have Shanon Entropy (for [a-z0-9])"""
13
-
14
- def __init__(self, config: Config = None) -> None:
15
- pass
7
+ class ValueEntropyBase36Check(ValueEntropyBaseCheck):
8
+ """Base36 entropy check"""
16
9
 
17
10
  @staticmethod
11
+ @cache
18
12
  def get_min_data_entropy(x: int) -> float:
19
13
  """Returns minimal entropy for size of random data. Precalculated data is applied for speedup"""
20
14
  if 15 == x:
21
- y = 3.43
22
- elif 24 == x:
23
- y = 3.91
24
- elif 25 == x:
25
- y = 3.95
26
- elif 10 <= x:
27
- # approximation does not exceed standard deviation
28
- y = 0.7 * math.log2(x) + 0.7
15
+ # workaround for Dropbox App secret
16
+ y = 3.374
17
+ elif 10 <= x < 26:
18
+ y = 0.731566857 * math.log2(x) + 0.474132
19
+ elif 26 <= x:
20
+ y = 3.9
29
21
  else:
30
22
  y = 0
31
23
  return y
32
-
33
- def run(self, line_data: LineData, target: AnalysisTarget) -> bool:
34
- """Run filter checks on received credential candidate data 'line_data'.
35
-
36
- Args:
37
- line_data: credential candidate data
38
- target: multiline target from which line data was obtained
39
-
40
- Return:
41
- True, if need to filter candidate and False if left
42
-
43
- """
44
- entropy = Util.get_shannon_entropy(line_data.value, Chars.BASE36_CHARS.value)
45
- min_entropy = ValueEntropyBase36Check.get_min_data_entropy(len(line_data.value))
46
- return min_entropy > entropy or 0 == min_entropy
@@ -1,59 +1,26 @@
1
1
  import math
2
+ from functools import cache
2
3
 
3
- from credsweeper.common.constants import Chars, ENTROPY_LIMIT_BASE64
4
- from credsweeper.config import Config
5
- from credsweeper.credentials import LineData
6
- from credsweeper.file_handler.analysis_target import AnalysisTarget
7
- from credsweeper.filters import Filter
8
- from credsweeper.utils import Util
4
+ from credsweeper.filters.value_entropy_base_check import ValueEntropyBaseCheck
9
5
 
10
6
 
11
- class ValueEntropyBase64Check(Filter):
12
- """Check that candidate have Shanon Entropy > 3 (for HEX_CHARS or BASE36_CHARS) or > 4.5 (for BASE64_CHARS)."""
13
-
14
- # If the value size is less than this value the entropy evaluation gives an imprecise result
15
- min_length = 12
16
-
17
- def __init__(self, config: Config = None) -> None:
18
- pass
7
+ class ValueEntropyBase64Check(ValueEntropyBaseCheck):
8
+ """Base64 entropy check"""
19
9
 
20
10
  @staticmethod
11
+ @cache
21
12
  def get_min_data_entropy(x: int) -> float:
22
13
  """Returns minimal average entropy for size of random data. Precalculated round data is applied for speedup"""
23
- if 18 == x:
24
- y = 3.8
25
- elif 20 == x:
26
- y = 3.9
27
- elif 24 == x:
28
- y = 4.1
29
- elif 32 == x:
30
- y = 4.4
31
- elif ValueEntropyBase64Check.min_length <= x < 35:
32
- # logarithm base 2 - slow, but precise. Approximation does not exceed stdev
33
- y = 0.77 * math.log2(x) + 0.62
34
- elif 35 <= x < 60:
35
- y = ENTROPY_LIMIT_BASE64
36
- elif 60 <= x:
37
- # the entropy grows slowly after 60
38
- y = 5.0
14
+ if 12 <= x < 18:
15
+ y = 0.915 * math.log2(x) - 0.047
16
+ elif 18 <= x < 35:
17
+ y = 0.767 * math.log2(x) + 0.5677
18
+ elif 35 <= x < 65:
19
+ y = 0.944 * math.log2(x) - 0.009 * x - 0.04
20
+ elif 65 <= x < 256:
21
+ y = 0.621 * math.log2(x) - 0.003 * x + 1.54
22
+ elif 256 <= x:
23
+ y = 6 - 64 / x
39
24
  else:
40
25
  y = 0
41
26
  return y
42
-
43
- def run(self, line_data: LineData, target: AnalysisTarget) -> bool:
44
- """Run filter checks on received credential candidate data 'line_data'.
45
-
46
- Args:
47
- line_data: credential candidate data
48
- target: multiline target from which line data was obtained
49
-
50
- Return:
51
- True, if need to filter candidate and False if left
52
-
53
- """
54
- if '-' in line_data.value or '_' in line_data.value:
55
- entropy = Util.get_shannon_entropy(line_data.value, Chars.BASE64URL_CHARS.value)
56
- else:
57
- entropy = Util.get_shannon_entropy(line_data.value, Chars.BASE64STD_CHARS.value)
58
- min_entropy = ValueEntropyBase64Check.get_min_data_entropy(len(line_data.value))
59
- return min_entropy > entropy or 0 == min_entropy
@@ -0,0 +1,37 @@
1
+ from abc import abstractmethod
2
+
3
+ from credsweeper.config import Config
4
+ from credsweeper.credentials import LineData
5
+ from credsweeper.file_handler.analysis_target import AnalysisTarget
6
+ from credsweeper.filters import Filter
7
+ from credsweeper.utils import Util
8
+
9
+
10
+ class ValueEntropyBaseCheck(Filter):
11
+ """Check that candidate value has minimal Shanon Entropy for appropriated base"""
12
+
13
+ def __init__(self, config: Config = None) -> None:
14
+ pass
15
+
16
+ @staticmethod
17
+ @abstractmethod
18
+ def get_min_data_entropy(x: int) -> float:
19
+ """Returns minimal entropy for size of data"""
20
+ raise NotImplementedError()
21
+
22
+ def run(self, line_data: LineData, target: AnalysisTarget) -> bool:
23
+ """Run filter checks on received credential candidate data 'line_data'.
24
+
25
+ Args:
26
+ line_data: credential candidate data
27
+ target: multiline target from which line data was obtained
28
+
29
+ Return:
30
+ True, when need to filter candidate and False if left
31
+
32
+ """
33
+ entropy = Util.get_shannon_entropy(line_data.value)
34
+ min_entropy = self.get_min_data_entropy(len(line_data.value))
35
+ if min_entropy > entropy or 0 == min_entropy:
36
+ return True
37
+ return False
@@ -53,7 +53,7 @@ class ValueFilePathCheck(Filter):
53
53
  break
54
54
  else:
55
55
  # all symbols are from base64 alphabet
56
- entropy = Util.get_shannon_entropy(value, Chars.BASE64STDPAD_CHARS.value)
56
+ entropy = Util.get_shannon_entropy(value)
57
57
  if 0 == min_entropy or min_entropy > entropy:
58
58
  contains_unix_separator = 1 < value.count('/')
59
59
  else:
@@ -7,9 +7,9 @@ from credsweeper.filters import Filter
7
7
 
8
8
 
9
9
  class ValueHexNumberCheck(Filter):
10
- """Check value if it a value in 32 or 64 bits hex representation"""
10
+ """Check value if it is a value up to 64 bits hex representation"""
11
11
 
12
- HEX_32_64_VALUE_REGEX = re.compile(r"^0x([0-9a-f]{8}){1,2}$")
12
+ HEX_08_64_VALUE_REGEX = re.compile(r"^0x[0-9a-f]{1,16}$")
13
13
 
14
14
  def __init__(self, config: Config = None) -> None:
15
15
  pass
@@ -26,6 +26,6 @@ class ValueHexNumberCheck(Filter):
26
26
 
27
27
  """
28
28
  value = line_data.value.lower()
29
- if len(value) in [10, 18] and ValueHexNumberCheck.HEX_32_64_VALUE_REGEX.match(value):
29
+ if ValueHexNumberCheck.HEX_08_64_VALUE_REGEX.match(value):
30
30
  return True
31
31
  return False
@@ -15,14 +15,13 @@ class ValueJsonWebTokenCheck(Filter):
15
15
  https://www.iana.org/assignments/jose/jose.xhtml
16
16
  """
17
17
  header_keys = {
18
- "alg", "jku", "jwk", "kid", "x5u", "x5c", "x5t", "x5t#S256", "typ", "cty", "crit", "alg", "enc", "zip", "jku",
19
- "jwk", "kid", "x5u", "x5c", "x5t", "x5t#S256", "typ", "cty", "crit", "epk", "apu", "apv", "iv", "tag", "p2s",
20
- "p2c", "iss", "sub", "aud", "b64", "ppt", "url", "nonce", "svt"
18
+ "kid", "x5u", "x5t", "x5t#S256", "typ", "cty", "crit", "alg", "enc", "zip", "jku", "jwk", "x5c", "epk", "apu",
19
+ "apv", "iv", "tag", "p2s", "p2c", "iss", "sub", "aud", "b64", "ppt", "url", "nonce", "svt"
21
20
  }
22
21
  payload_keys = {
23
22
  "iss", "sub", "aud", "exp", "nbf", "iat", "jti", "kty", "use", "key_ops", "alg", "enc", "zip", "jku", "jwk",
24
- "kid", "x5u", "x5c", "x5t", "x5t#S256", "crv", "x", "y", "d", "n", "e", "d", "p", "q", "dp", "dq", "qi", "oth",
25
- "k", "crv", "d", "x", "ext", "crit", "keys", "id", "role", "token", "secret", "password", "nonce"
23
+ "kid", "x5u", "x5c", "x5t", "x5t#S256", "x", "y", "d", "n", "e", "p", "q", "dp", "dq", "qi", "oth", "k", "crv",
24
+ "ext", "crit", "keys", "id", "role", "token", "secret", "password", "nonce"
26
25
  }
27
26
 
28
27
  def __init__(self, config: Config = None) -> None:
@@ -33,33 +33,33 @@ class ValuePatternCheck(Filter):
33
33
  # use non whitespace symbol pattern
34
34
  self.pattern = re.compile(fr"(\S)\1{{{str(self.pattern_len - 1)},}}")
35
35
 
36
- def equal_pattern_check(self, line_data_value: str) -> bool:
36
+ def equal_pattern_check(self, value: str) -> bool:
37
37
  """Check if candidate value contain 4 and more same chars or numbers sequences.
38
38
 
39
39
  Args:
40
- line_data_value: string variable, credential candidate value
40
+ value: string variable, credential candidate value
41
41
 
42
42
  Return:
43
43
  True if contain and False if not
44
44
 
45
45
  """
46
- if self.pattern.findall(line_data_value):
46
+ if self.pattern.findall(value):
47
47
  return True
48
48
  return False
49
49
 
50
- def ascending_pattern_check(self, line_data_value: str) -> bool:
50
+ def ascending_pattern_check(self, value: str) -> bool:
51
51
  """Check if candidate value contain 4 and more ascending chars or numbers sequences.
52
52
 
53
53
  Arg:
54
- line_data_value: credential candidate value
54
+ value: credential candidate value
55
55
 
56
56
  Return:
57
57
  True if contain and False if not
58
58
 
59
59
  """
60
60
  count = 1
61
- for key in range(len(line_data_value) - 1):
62
- if ord(line_data_value[key + 1]) - ord(line_data_value[key]) == 1:
61
+ for key in range(len(value) - 1):
62
+ if ord(value[key + 1]) - ord(value[key]) == 1:
63
63
  count += 1
64
64
  else:
65
65
  count = 1
@@ -68,19 +68,19 @@ class ValuePatternCheck(Filter):
68
68
  return True
69
69
  return False
70
70
 
71
- def descending_pattern_check(self, line_data_value: str) -> bool:
71
+ def descending_pattern_check(self, value: str) -> bool:
72
72
  """Check if candidate value contain 4 and more descending chars or numbers sequences.
73
73
 
74
74
  Arg:
75
- line_data_value: string variable, credential candidate value
75
+ value: string variable, credential candidate value
76
76
 
77
77
  Return:
78
78
  boolean variable. True if contain and False if not
79
79
 
80
80
  """
81
81
  count = 1
82
- for key in range(len(line_data_value) - 1):
83
- if ord(line_data_value[key]) - ord(line_data_value[key + 1]) == 1:
82
+ for key in range(len(value) - 1):
83
+ if ord(value[key]) - ord(value[key + 1]) == 1:
84
84
  count += 1
85
85
  else:
86
86
  count = 1
@@ -89,6 +89,57 @@ class ValuePatternCheck(Filter):
89
89
  return True
90
90
  return False
91
91
 
92
+ def check_val(self, value: str) -> bool:
93
+ """Cumulative value check.
94
+
95
+ Arg:
96
+ value: string variable, credential candidate value
97
+
98
+ Return:
99
+ boolean variable. True if contain and False if not
100
+
101
+ """
102
+ if self.equal_pattern_check(value):
103
+ return True
104
+ if self.ascending_pattern_check(value):
105
+ return True
106
+ if self.descending_pattern_check(value):
107
+ return True
108
+ return False
109
+
110
+ def duple_pattern_check(self, value: str) -> bool:
111
+ """Check if candidate value is a duplet value with possible patterns.
112
+
113
+ Arg:
114
+ value: string variable, credential candidate value
115
+
116
+ Return:
117
+ boolean variable. True if contain and False if not
118
+
119
+ """
120
+ # 001122334455... case
121
+ pair_duple = True
122
+ # 0102030405... case
123
+ even_duple = True
124
+ even_prev = value[0]
125
+ even_value = value[0::2]
126
+ # 1020304050... case
127
+ odd_duple = True
128
+ odd_prev = value[1]
129
+ odd_value = value[1::2]
130
+ for even_i, odd_i in zip(even_value, odd_value):
131
+ pair_duple &= even_i == odd_i
132
+ even_duple &= even_i == even_prev
133
+ odd_duple &= odd_i == odd_prev
134
+ if not pair_duple and not even_duple and not odd_duple:
135
+ break
136
+ else:
137
+ if pair_duple or odd_duple:
138
+ return self.check_val(even_value)
139
+ if even_duple:
140
+ return self.check_val(odd_value)
141
+ return False
142
+
92
143
  def run(self, line_data: LineData, target: AnalysisTarget) -> bool:
93
144
  """Run filter checks on received credential candidate data 'line_data'.
94
145
 
@@ -103,13 +154,10 @@ class ValuePatternCheck(Filter):
103
154
  if len(line_data.value) < self.pattern_len:
104
155
  return True
105
156
 
106
- if self.equal_pattern_check(line_data.value):
107
- return True
108
-
109
- if self.ascending_pattern_check(line_data.value):
157
+ if self.check_val(line_data.value):
110
158
  return True
111
159
 
112
- if self.descending_pattern_check(line_data.value):
160
+ if 2 * self.pattern_len <= len(line_data.value) and self.duple_pattern_check(line_data.value):
113
161
  return True
114
162
 
115
163
  return False
@@ -1,3 +1,5 @@
1
+ import re
2
+
1
3
  from credsweeper.config import Config
2
4
  from credsweeper.credentials import LineData
3
5
  from credsweeper.file_handler.analysis_target import AnalysisTarget
@@ -9,6 +11,7 @@ class ValueStringTypeCheck(Filter):
9
11
 
10
12
  If it is, then checks if line_data really have string literal declaration.
11
13
  Comment rows in source files (start with //, /\*, etc) ignored.
14
+ Multiple bytes scenario allowed [123,23,54,67,78,89] or {0xae, 0x54, 0x55, 0xff}
12
15
 
13
16
  True if:
14
17
 
@@ -20,6 +23,8 @@ class ValueStringTypeCheck(Filter):
20
23
  False otherwise
21
24
  """
22
25
 
26
+ MULTIBYTE_PATTERN = re.compile(r"(\s*(0x)?[0-9a-f]{1,3}\s*,){8,80}", flags=re.IGNORECASE)
27
+
23
28
  def __init__(self, config: Config) -> None:
24
29
  self.check_for_literals = config.check_for_literals
25
30
 
@@ -37,10 +42,13 @@ class ValueStringTypeCheck(Filter):
37
42
  if not self.check_for_literals or line_data.url_part:
38
43
  return False
39
44
 
40
- not_quoted = not line_data.is_well_quoted_value
41
- not_comment = not line_data.is_comment()
45
+ if ValueStringTypeCheck.MULTIBYTE_PATTERN.match(line_data.value):
46
+ return False
42
47
 
43
- if line_data.is_source_file_with_quotes() and not_comment and not_quoted and not line_data.is_quoted \
48
+ if line_data.is_source_file_with_quotes() \
49
+ and not line_data.is_comment() \
50
+ and not line_data.is_well_quoted_value \
51
+ and not line_data.is_quoted \
44
52
  and line_data.separator and '=' in line_data.separator:
45
53
  # heterogeneous code e.g. YAML in Python uses colon sign instead equals
46
54
  return True
@@ -1,6 +1,5 @@
1
1
  from typing import Tuple
2
2
 
3
- from credsweeper.config import Config
4
3
  from credsweeper.filters.value_token_base_check import ValueTokenBaseCheck
5
4
 
6
5
 
@@ -21,9 +20,6 @@ class ValueTokenBase32Check(ValueTokenBaseCheck):
21
20
  64: ((3.4805990476190476, 0.28572156450556774), (2.035756800745673, 0.18815721535870078)),
22
21
  }
23
22
 
24
- def __init__(self, config: Config = None) -> None:
25
- super().__init__(config)
26
-
27
23
  @staticmethod
28
24
  def get_stat_range(size: int) -> Tuple[Tuple[float, float], Tuple[float, float]]:
29
25
  """Returns minimal, maximal for hop and deviation. Precalculated data is applied for speedup"""
@@ -1,6 +1,5 @@
1
1
  from typing import Tuple
2
2
 
3
- from credsweeper.config import Config
4
3
  from credsweeper.filters.value_token_base_check import ValueTokenBaseCheck
5
4
 
6
5
 
@@ -21,9 +20,6 @@ class ValueTokenBase36Check(ValueTokenBaseCheck):
21
20
  64: ((3.7190009761904763, 0.30325954360127116), (2.1751172797904093, 0.1942582237461476)),
22
21
  }
23
22
 
24
- def __init__(self, config: Config = None) -> None:
25
- super().__init__(config)
26
-
27
23
  @staticmethod
28
24
  def get_stat_range(size: int) -> Tuple[Tuple[float, float], Tuple[float, float]]:
29
25
  """Returns minimal, maximal for hop and deviation. Precalculated data is applied for speedup"""
@@ -1,6 +1,5 @@
1
1
  from typing import Tuple
2
2
 
3
- from credsweeper.config import Config
4
3
  from credsweeper.filters.value_token_base_check import ValueTokenBaseCheck
5
4
 
6
5
 
@@ -21,9 +20,6 @@ class ValueTokenBase64Check(ValueTokenBaseCheck):
21
20
  64: ((3.7625271746031745, 0.31733579704946846), (2.257532519514275, 0.20571908142867643)),
22
21
  }
23
22
 
24
- def __init__(self, config: Config = None) -> None:
25
- super().__init__(config)
26
-
27
23
  @staticmethod
28
24
  def get_stat_range(size: int) -> Tuple[Tuple[float, float], Tuple[float, float]]:
29
25
  """Returns minimal, maximal for hop and deviation. Precalculated data is applied for speedup"""