credsweeper 1.11.2__py3-none-any.whl → 1.11.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of credsweeper might be problematic. Click here for more details.
- credsweeper/__init__.py +1 -1
- credsweeper/__main__.py +7 -5
- credsweeper/app.py +28 -47
- credsweeper/common/constants.py +2 -5
- credsweeper/common/keyword_pattern.py +15 -9
- credsweeper/common/morpheme_checklist.txt +4 -2
- credsweeper/credentials/candidate_key.py +1 -1
- credsweeper/credentials/credential_manager.py +4 -3
- credsweeper/credentials/line_data.py +16 -15
- credsweeper/deep_scanner/abstract_scanner.py +10 -1
- credsweeper/deep_scanner/deb_scanner.py +48 -0
- credsweeper/deep_scanner/deep_scanner.py +65 -43
- credsweeper/deep_scanner/docx_scanner.py +1 -1
- credsweeper/deep_scanner/encoder_scanner.py +2 -2
- credsweeper/deep_scanner/gzip_scanner.py +1 -1
- credsweeper/deep_scanner/html_scanner.py +3 -3
- credsweeper/deep_scanner/jks_scanner.py +2 -4
- credsweeper/deep_scanner/lang_scanner.py +2 -2
- credsweeper/deep_scanner/lzma_scanner.py +40 -0
- credsweeper/deep_scanner/pkcs12_scanner.py +3 -5
- credsweeper/deep_scanner/xml_scanner.py +2 -2
- credsweeper/file_handler/byte_content_provider.py +2 -2
- credsweeper/file_handler/content_provider.py +1 -1
- credsweeper/file_handler/data_content_provider.py +23 -14
- credsweeper/file_handler/diff_content_provider.py +2 -2
- credsweeper/file_handler/file_path_extractor.py +1 -1
- credsweeper/file_handler/files_provider.py +2 -4
- credsweeper/file_handler/patches_provider.py +1 -1
- credsweeper/file_handler/string_content_provider.py +2 -2
- credsweeper/file_handler/struct_content_provider.py +1 -1
- credsweeper/file_handler/text_content_provider.py +2 -2
- credsweeper/filters/value_array_dictionary_check.py +3 -1
- credsweeper/filters/value_azure_token_check.py +1 -2
- credsweeper/filters/value_base64_encoded_pem_check.py +1 -1
- credsweeper/filters/value_base64_part_check.py +30 -21
- credsweeper/filters/value_discord_bot_check.py +1 -2
- credsweeper/filters/value_entropy_base32_check.py +11 -31
- credsweeper/filters/value_entropy_base36_check.py +11 -34
- credsweeper/filters/value_entropy_base64_check.py +15 -48
- credsweeper/filters/value_entropy_base_check.py +37 -0
- credsweeper/filters/value_file_path_check.py +1 -1
- credsweeper/filters/value_hex_number_check.py +3 -3
- credsweeper/filters/value_json_web_token_check.py +4 -5
- credsweeper/filters/value_pattern_check.py +64 -16
- credsweeper/filters/value_string_type_check.py +11 -3
- credsweeper/filters/value_token_base32_check.py +0 -4
- credsweeper/filters/value_token_base36_check.py +0 -4
- credsweeper/filters/value_token_base64_check.py +0 -4
- credsweeper/filters/value_token_check.py +1 -1
- credsweeper/ml_model/features/file_extension.py +2 -2
- credsweeper/ml_model/features/morpheme_dense.py +0 -4
- credsweeper/ml_model/features/rule_name.py +1 -1
- credsweeper/ml_model/features/word_in_path.py +0 -9
- credsweeper/ml_model/features/word_in_postamble.py +0 -11
- credsweeper/ml_model/features/word_in_preamble.py +0 -11
- credsweeper/ml_model/features/word_in_transition.py +0 -11
- credsweeper/ml_model/features/word_in_value.py +0 -11
- credsweeper/ml_model/features/word_in_variable.py +0 -11
- credsweeper/ml_model/ml_validator.py +45 -22
- credsweeper/rules/config.yaml +238 -208
- credsweeper/rules/rule.py +3 -3
- credsweeper/scanner/scan_type/scan_type.py +2 -3
- credsweeper/scanner/scanner.py +7 -1
- credsweeper/secret/config.json +16 -5
- credsweeper/utils/hop_stat.py +3 -3
- credsweeper/utils/pem_key_detector.py +8 -7
- credsweeper/utils/util.py +76 -146
- {credsweeper-1.11.2.dist-info → credsweeper-1.11.4.dist-info}/METADATA +1 -1
- {credsweeper-1.11.2.dist-info → credsweeper-1.11.4.dist-info}/RECORD +72 -70
- credsweeper/utils/entropy_validator.py +0 -72
- {credsweeper-1.11.2.dist-info → credsweeper-1.11.4.dist-info}/WHEEL +0 -0
- {credsweeper-1.11.2.dist-info → credsweeper-1.11.4.dist-info}/entry_points.txt +0 -0
- {credsweeper-1.11.2.dist-info → credsweeper-1.11.4.dist-info}/licenses/LICENSE +0 -0
|
@@ -14,7 +14,7 @@ class ValueArrayDictionaryCheck(Filter):
|
|
|
14
14
|
`token = {'root'}` would be kept
|
|
15
15
|
"""
|
|
16
16
|
|
|
17
|
-
PATTERN = re.compile(r"\[('|\")
|
|
17
|
+
PATTERN = re.compile(r"\[('|\")?[^,]+('|\")?\]")
|
|
18
18
|
|
|
19
19
|
def __init__(self, config: Config = None) -> None:
|
|
20
20
|
pass
|
|
@@ -32,6 +32,8 @@ class ValueArrayDictionaryCheck(Filter):
|
|
|
32
32
|
"""
|
|
33
33
|
if line_data.is_well_quoted_value:
|
|
34
34
|
return False
|
|
35
|
+
if line_data.wrap and "byte" in line_data.wrap.lower():
|
|
36
|
+
return False
|
|
35
37
|
if self.PATTERN.search(line_data.value):
|
|
36
38
|
return True
|
|
37
39
|
if line_data.wrap and not line_data.is_well_quoted_value and ('[' in line_data.wrap or '(' in line_data.wrap):
|
|
@@ -1,7 +1,6 @@
|
|
|
1
1
|
import contextlib
|
|
2
2
|
import json
|
|
3
3
|
|
|
4
|
-
from credsweeper.common.constants import Chars
|
|
5
4
|
from credsweeper.config import Config
|
|
6
5
|
from credsweeper.credentials import LineData
|
|
7
6
|
from credsweeper.file_handler.analysis_target import AnalysisTarget
|
|
@@ -45,7 +44,7 @@ class ValueAzureTokenCheck(Filter):
|
|
|
45
44
|
# must be all parts in payload
|
|
46
45
|
return True
|
|
47
46
|
min_entropy = ValueEntropyBase64Check.get_min_data_entropy(len(parts[2]))
|
|
48
|
-
entropy = Util.get_shannon_entropy(parts[2]
|
|
47
|
+
entropy = Util.get_shannon_entropy(parts[2])
|
|
49
48
|
# good signature has to be like random bytes
|
|
50
49
|
return entropy < min_entropy
|
|
51
50
|
|
|
@@ -30,7 +30,7 @@ class ValueBase64EncodedPem(Filter):
|
|
|
30
30
|
with contextlib.suppress(Exception):
|
|
31
31
|
text = Util.decode_base64(line_data.value, padding_safe=True, urlsafe_detect=True)
|
|
32
32
|
lines = text.decode(ASCII).splitlines()
|
|
33
|
-
lines_pos =
|
|
33
|
+
lines_pos = list(range(len(lines)))
|
|
34
34
|
for line_pos, line in zip(lines_pos, lines):
|
|
35
35
|
if PEM_BEGIN_PATTERN in line:
|
|
36
36
|
new_target = AnalysisTarget(line_pos, lines, lines_pos, target.descriptor)
|
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
import contextlib
|
|
2
2
|
import re
|
|
3
3
|
import statistics
|
|
4
|
+
from itertools import takewhile
|
|
4
5
|
|
|
5
6
|
from credsweeper.common.constants import Chars
|
|
6
7
|
from credsweeper.config import Config
|
|
@@ -16,8 +17,8 @@ class ValueBase64PartCheck(Filter):
|
|
|
16
17
|
Check that candidate is NOT a part of base64 long line
|
|
17
18
|
"""
|
|
18
19
|
|
|
19
|
-
base64_pattern = re.compile(r"^(\\{1,8}[0abfnrtv]|[0-9A-Za-z+/=]){1,4000}")
|
|
20
|
-
|
|
20
|
+
base64_pattern = re.compile(r"^(\\{1,8}[0abfnrtv]|[0-9A-Za-z+/=]){1,4000}$")
|
|
21
|
+
base64_char_set = set(Chars.BASE64STDPAD_CHARS.value + '\\')
|
|
21
22
|
|
|
22
23
|
def __init__(self, config: Config = None) -> None:
|
|
23
24
|
pass
|
|
@@ -64,38 +65,46 @@ class ValueBase64PartCheck(Filter):
|
|
|
64
65
|
elif right_end - left_start >= 2 * len_value:
|
|
65
66
|
# simple analysis for data too large to yield sensible insights
|
|
66
67
|
part_set = set(line[left_start:right_end])
|
|
67
|
-
if not part_set.difference(
|
|
68
|
+
if not part_set.difference(ValueBase64PartCheck.base64_char_set):
|
|
68
69
|
# obvious case: all characters are base64 standard
|
|
69
70
|
return True
|
|
70
71
|
|
|
71
|
-
left_part =
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
72
|
+
left_part = ''.join(
|
|
73
|
+
takewhile(lambda x: x in ValueBase64PartCheck.base64_char_set,
|
|
74
|
+
reversed(line[left_start:line_data.value_start])))
|
|
75
|
+
|
|
76
|
+
right_part = ''.join(
|
|
77
|
+
takewhile(lambda x: x in ValueBase64PartCheck.base64_char_set, line[line_data.value_end:right_end]))
|
|
75
78
|
|
|
76
79
|
min_entropy_value = ValueEntropyBase64Check.get_min_data_entropy(len_value)
|
|
77
|
-
value_entropy = Util.get_shannon_entropy(value, Chars.BASE64STD_CHARS.value)
|
|
78
80
|
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
81
|
+
left_entropy = Util.get_shannon_entropy(left_part)
|
|
82
|
+
value_entropy = Util.get_shannon_entropy(value)
|
|
83
|
+
right_entropy = Util.get_shannon_entropy(right_part)
|
|
84
|
+
common = left_part + value + right_part
|
|
85
|
+
common_entropy = Util.get_shannon_entropy(common)
|
|
86
|
+
min_entropy_common = ValueEntropyBase64Check.get_min_data_entropy(len(common))
|
|
87
|
+
if min_entropy_common < common_entropy:
|
|
88
|
+
return True
|
|
85
89
|
|
|
86
|
-
if
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
+
if left_entropy and right_entropy:
|
|
91
|
+
data = [left_entropy, value_entropy, right_entropy, min_entropy_value, common_entropy]
|
|
92
|
+
elif left_entropy and not right_entropy:
|
|
93
|
+
data = [left_entropy, value_entropy, min_entropy_value, min_entropy_value, common_entropy]
|
|
94
|
+
elif not left_entropy and right_entropy:
|
|
95
|
+
data = [value_entropy, right_entropy, min_entropy_value, min_entropy_value, common_entropy]
|
|
90
96
|
else:
|
|
91
|
-
|
|
97
|
+
return False
|
|
92
98
|
|
|
93
|
-
data = [left_entropy, value_entropy, right_entropy, min_entropy_value]
|
|
94
99
|
avg = statistics.mean(data)
|
|
95
100
|
stdev = statistics.stdev(data, avg)
|
|
96
101
|
avg_min = avg - 1.1 * stdev
|
|
97
|
-
if
|
|
102
|
+
if (0. == left_entropy or avg_min < left_entropy or left_entropy < value_entropy < right_entropy) \
|
|
103
|
+
and (
|
|
104
|
+
0. == right_entropy or avg_min < right_entropy or right_entropy < value_entropy < left_entropy):
|
|
98
105
|
# high entropy of bound parts looks like a part of base64 long line
|
|
99
106
|
return True
|
|
107
|
+
else:
|
|
108
|
+
return False
|
|
100
109
|
|
|
101
110
|
return False
|
|
@@ -1,6 +1,5 @@
|
|
|
1
1
|
import contextlib
|
|
2
2
|
|
|
3
|
-
from credsweeper.common.constants import Chars
|
|
4
3
|
from credsweeper.config import Config
|
|
5
4
|
from credsweeper.credentials import LineData
|
|
6
5
|
from credsweeper.file_handler.analysis_target import AnalysisTarget
|
|
@@ -32,7 +31,7 @@ class ValueDiscordBotCheck(Filter):
|
|
|
32
31
|
id_part = line_data.value[:dot_separator_index]
|
|
33
32
|
discord_id = int(Util.decode_base64(id_part, padding_safe=True, urlsafe_detect=True))
|
|
34
33
|
entropy_part = line_data.value[dot_separator_index:]
|
|
35
|
-
entropy = Util.get_shannon_entropy(entropy_part
|
|
34
|
+
entropy = Util.get_shannon_entropy(entropy_part)
|
|
36
35
|
min_entropy = ValueEntropyBase64Check.get_min_data_entropy(len(entropy_part))
|
|
37
36
|
if 1000 <= discord_id and min_entropy <= entropy:
|
|
38
37
|
return False
|
|
@@ -1,42 +1,22 @@
|
|
|
1
1
|
import math
|
|
2
|
+
from functools import cache
|
|
2
3
|
|
|
3
|
-
from credsweeper.
|
|
4
|
-
from credsweeper.config import Config
|
|
5
|
-
from credsweeper.credentials import LineData
|
|
6
|
-
from credsweeper.file_handler.analysis_target import AnalysisTarget
|
|
7
|
-
from credsweeper.filters import Filter
|
|
8
|
-
from credsweeper.utils import Util
|
|
4
|
+
from credsweeper.filters.value_entropy_base_check import ValueEntropyBaseCheck
|
|
9
5
|
|
|
10
6
|
|
|
11
|
-
class ValueEntropyBase32Check(
|
|
12
|
-
"""
|
|
13
|
-
|
|
14
|
-
def __init__(self, config: Config = None) -> None:
|
|
15
|
-
pass
|
|
7
|
+
class ValueEntropyBase32Check(ValueEntropyBaseCheck):
|
|
8
|
+
"""Base32 entropy check"""
|
|
16
9
|
|
|
17
10
|
@staticmethod
|
|
11
|
+
@cache
|
|
18
12
|
def get_min_data_entropy(x: int) -> float:
|
|
19
13
|
"""Returns average entropy for size of random data. Precalculated data is applied for speedup"""
|
|
20
|
-
if
|
|
21
|
-
y =
|
|
22
|
-
elif
|
|
23
|
-
|
|
24
|
-
|
|
14
|
+
if 8 <= x < 17:
|
|
15
|
+
y = 0.80569236 * math.log2(x) + 0.13439734
|
|
16
|
+
elif 17 <= x < 33:
|
|
17
|
+
y = 0.66350481 * math.log2(x) + 0.71143862
|
|
18
|
+
elif 33 <= x:
|
|
19
|
+
y = 4.04
|
|
25
20
|
else:
|
|
26
21
|
y = 0
|
|
27
22
|
return y
|
|
28
|
-
|
|
29
|
-
def run(self, line_data: LineData, target: AnalysisTarget) -> bool:
|
|
30
|
-
"""Run filter checks on received credential candidate data 'line_data'.
|
|
31
|
-
|
|
32
|
-
Args:
|
|
33
|
-
line_data: credential candidate data
|
|
34
|
-
target: multiline target from which line data was obtained
|
|
35
|
-
|
|
36
|
-
Return:
|
|
37
|
-
True, if need to filter candidate and False if left
|
|
38
|
-
|
|
39
|
-
"""
|
|
40
|
-
entropy = Util.get_shannon_entropy(line_data.value, Chars.BASE32_CHARS.value)
|
|
41
|
-
min_entropy = ValueEntropyBase32Check.get_min_data_entropy(len(line_data.value))
|
|
42
|
-
return min_entropy > entropy or 0 == min_entropy
|
|
@@ -1,46 +1,23 @@
|
|
|
1
1
|
import math
|
|
2
|
+
from functools import cache
|
|
2
3
|
|
|
3
|
-
from credsweeper.
|
|
4
|
-
from credsweeper.config import Config
|
|
5
|
-
from credsweeper.credentials import LineData
|
|
6
|
-
from credsweeper.file_handler.analysis_target import AnalysisTarget
|
|
7
|
-
from credsweeper.filters import Filter
|
|
8
|
-
from credsweeper.utils import Util
|
|
4
|
+
from credsweeper.filters.value_entropy_base_check import ValueEntropyBaseCheck
|
|
9
5
|
|
|
10
6
|
|
|
11
|
-
class ValueEntropyBase36Check(
|
|
12
|
-
"""
|
|
13
|
-
|
|
14
|
-
def __init__(self, config: Config = None) -> None:
|
|
15
|
-
pass
|
|
7
|
+
class ValueEntropyBase36Check(ValueEntropyBaseCheck):
|
|
8
|
+
"""Base36 entropy check"""
|
|
16
9
|
|
|
17
10
|
@staticmethod
|
|
11
|
+
@cache
|
|
18
12
|
def get_min_data_entropy(x: int) -> float:
|
|
19
13
|
"""Returns minimal entropy for size of random data. Precalculated data is applied for speedup"""
|
|
20
14
|
if 15 == x:
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
# approximation does not exceed standard deviation
|
|
28
|
-
y = 0.7 * math.log2(x) + 0.7
|
|
15
|
+
# workaround for Dropbox App secret
|
|
16
|
+
y = 3.374
|
|
17
|
+
elif 10 <= x < 26:
|
|
18
|
+
y = 0.731566857 * math.log2(x) + 0.474132
|
|
19
|
+
elif 26 <= x:
|
|
20
|
+
y = 3.9
|
|
29
21
|
else:
|
|
30
22
|
y = 0
|
|
31
23
|
return y
|
|
32
|
-
|
|
33
|
-
def run(self, line_data: LineData, target: AnalysisTarget) -> bool:
|
|
34
|
-
"""Run filter checks on received credential candidate data 'line_data'.
|
|
35
|
-
|
|
36
|
-
Args:
|
|
37
|
-
line_data: credential candidate data
|
|
38
|
-
target: multiline target from which line data was obtained
|
|
39
|
-
|
|
40
|
-
Return:
|
|
41
|
-
True, if need to filter candidate and False if left
|
|
42
|
-
|
|
43
|
-
"""
|
|
44
|
-
entropy = Util.get_shannon_entropy(line_data.value, Chars.BASE36_CHARS.value)
|
|
45
|
-
min_entropy = ValueEntropyBase36Check.get_min_data_entropy(len(line_data.value))
|
|
46
|
-
return min_entropy > entropy or 0 == min_entropy
|
|
@@ -1,59 +1,26 @@
|
|
|
1
1
|
import math
|
|
2
|
+
from functools import cache
|
|
2
3
|
|
|
3
|
-
from credsweeper.
|
|
4
|
-
from credsweeper.config import Config
|
|
5
|
-
from credsweeper.credentials import LineData
|
|
6
|
-
from credsweeper.file_handler.analysis_target import AnalysisTarget
|
|
7
|
-
from credsweeper.filters import Filter
|
|
8
|
-
from credsweeper.utils import Util
|
|
4
|
+
from credsweeper.filters.value_entropy_base_check import ValueEntropyBaseCheck
|
|
9
5
|
|
|
10
6
|
|
|
11
|
-
class ValueEntropyBase64Check(
|
|
12
|
-
"""
|
|
13
|
-
|
|
14
|
-
# If the value size is less than this value the entropy evaluation gives an imprecise result
|
|
15
|
-
min_length = 12
|
|
16
|
-
|
|
17
|
-
def __init__(self, config: Config = None) -> None:
|
|
18
|
-
pass
|
|
7
|
+
class ValueEntropyBase64Check(ValueEntropyBaseCheck):
|
|
8
|
+
"""Base64 entropy check"""
|
|
19
9
|
|
|
20
10
|
@staticmethod
|
|
11
|
+
@cache
|
|
21
12
|
def get_min_data_entropy(x: int) -> float:
|
|
22
13
|
"""Returns minimal average entropy for size of random data. Precalculated round data is applied for speedup"""
|
|
23
|
-
if
|
|
24
|
-
y =
|
|
25
|
-
elif
|
|
26
|
-
y =
|
|
27
|
-
elif
|
|
28
|
-
y =
|
|
29
|
-
elif
|
|
30
|
-
y =
|
|
31
|
-
elif
|
|
32
|
-
|
|
33
|
-
y = 0.77 * math.log2(x) + 0.62
|
|
34
|
-
elif 35 <= x < 60:
|
|
35
|
-
y = ENTROPY_LIMIT_BASE64
|
|
36
|
-
elif 60 <= x:
|
|
37
|
-
# the entropy grows slowly after 60
|
|
38
|
-
y = 5.0
|
|
14
|
+
if 12 <= x < 18:
|
|
15
|
+
y = 0.915 * math.log2(x) - 0.047
|
|
16
|
+
elif 18 <= x < 35:
|
|
17
|
+
y = 0.767 * math.log2(x) + 0.5677
|
|
18
|
+
elif 35 <= x < 65:
|
|
19
|
+
y = 0.944 * math.log2(x) - 0.009 * x - 0.04
|
|
20
|
+
elif 65 <= x < 256:
|
|
21
|
+
y = 0.621 * math.log2(x) - 0.003 * x + 1.54
|
|
22
|
+
elif 256 <= x:
|
|
23
|
+
y = 6 - 64 / x
|
|
39
24
|
else:
|
|
40
25
|
y = 0
|
|
41
26
|
return y
|
|
42
|
-
|
|
43
|
-
def run(self, line_data: LineData, target: AnalysisTarget) -> bool:
|
|
44
|
-
"""Run filter checks on received credential candidate data 'line_data'.
|
|
45
|
-
|
|
46
|
-
Args:
|
|
47
|
-
line_data: credential candidate data
|
|
48
|
-
target: multiline target from which line data was obtained
|
|
49
|
-
|
|
50
|
-
Return:
|
|
51
|
-
True, if need to filter candidate and False if left
|
|
52
|
-
|
|
53
|
-
"""
|
|
54
|
-
if '-' in line_data.value or '_' in line_data.value:
|
|
55
|
-
entropy = Util.get_shannon_entropy(line_data.value, Chars.BASE64URL_CHARS.value)
|
|
56
|
-
else:
|
|
57
|
-
entropy = Util.get_shannon_entropy(line_data.value, Chars.BASE64STD_CHARS.value)
|
|
58
|
-
min_entropy = ValueEntropyBase64Check.get_min_data_entropy(len(line_data.value))
|
|
59
|
-
return min_entropy > entropy or 0 == min_entropy
|
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
from abc import abstractmethod
|
|
2
|
+
|
|
3
|
+
from credsweeper.config import Config
|
|
4
|
+
from credsweeper.credentials import LineData
|
|
5
|
+
from credsweeper.file_handler.analysis_target import AnalysisTarget
|
|
6
|
+
from credsweeper.filters import Filter
|
|
7
|
+
from credsweeper.utils import Util
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class ValueEntropyBaseCheck(Filter):
|
|
11
|
+
"""Check that candidate value has minimal Shanon Entropy for appropriated base"""
|
|
12
|
+
|
|
13
|
+
def __init__(self, config: Config = None) -> None:
|
|
14
|
+
pass
|
|
15
|
+
|
|
16
|
+
@staticmethod
|
|
17
|
+
@abstractmethod
|
|
18
|
+
def get_min_data_entropy(x: int) -> float:
|
|
19
|
+
"""Returns minimal entropy for size of data"""
|
|
20
|
+
raise NotImplementedError()
|
|
21
|
+
|
|
22
|
+
def run(self, line_data: LineData, target: AnalysisTarget) -> bool:
|
|
23
|
+
"""Run filter checks on received credential candidate data 'line_data'.
|
|
24
|
+
|
|
25
|
+
Args:
|
|
26
|
+
line_data: credential candidate data
|
|
27
|
+
target: multiline target from which line data was obtained
|
|
28
|
+
|
|
29
|
+
Return:
|
|
30
|
+
True, when need to filter candidate and False if left
|
|
31
|
+
|
|
32
|
+
"""
|
|
33
|
+
entropy = Util.get_shannon_entropy(line_data.value)
|
|
34
|
+
min_entropy = self.get_min_data_entropy(len(line_data.value))
|
|
35
|
+
if min_entropy > entropy or 0 == min_entropy:
|
|
36
|
+
return True
|
|
37
|
+
return False
|
|
@@ -53,7 +53,7 @@ class ValueFilePathCheck(Filter):
|
|
|
53
53
|
break
|
|
54
54
|
else:
|
|
55
55
|
# all symbols are from base64 alphabet
|
|
56
|
-
entropy = Util.get_shannon_entropy(value
|
|
56
|
+
entropy = Util.get_shannon_entropy(value)
|
|
57
57
|
if 0 == min_entropy or min_entropy > entropy:
|
|
58
58
|
contains_unix_separator = 1 < value.count('/')
|
|
59
59
|
else:
|
|
@@ -7,9 +7,9 @@ from credsweeper.filters import Filter
|
|
|
7
7
|
|
|
8
8
|
|
|
9
9
|
class ValueHexNumberCheck(Filter):
|
|
10
|
-
"""Check value if it a value
|
|
10
|
+
"""Check value if it is a value up to 64 bits hex representation"""
|
|
11
11
|
|
|
12
|
-
|
|
12
|
+
HEX_08_64_VALUE_REGEX = re.compile(r"^0x[0-9a-f]{1,16}$")
|
|
13
13
|
|
|
14
14
|
def __init__(self, config: Config = None) -> None:
|
|
15
15
|
pass
|
|
@@ -26,6 +26,6 @@ class ValueHexNumberCheck(Filter):
|
|
|
26
26
|
|
|
27
27
|
"""
|
|
28
28
|
value = line_data.value.lower()
|
|
29
|
-
if
|
|
29
|
+
if ValueHexNumberCheck.HEX_08_64_VALUE_REGEX.match(value):
|
|
30
30
|
return True
|
|
31
31
|
return False
|
|
@@ -15,14 +15,13 @@ class ValueJsonWebTokenCheck(Filter):
|
|
|
15
15
|
https://www.iana.org/assignments/jose/jose.xhtml
|
|
16
16
|
"""
|
|
17
17
|
header_keys = {
|
|
18
|
-
"
|
|
19
|
-
"
|
|
20
|
-
"p2c", "iss", "sub", "aud", "b64", "ppt", "url", "nonce", "svt"
|
|
18
|
+
"kid", "x5u", "x5t", "x5t#S256", "typ", "cty", "crit", "alg", "enc", "zip", "jku", "jwk", "x5c", "epk", "apu",
|
|
19
|
+
"apv", "iv", "tag", "p2s", "p2c", "iss", "sub", "aud", "b64", "ppt", "url", "nonce", "svt"
|
|
21
20
|
}
|
|
22
21
|
payload_keys = {
|
|
23
22
|
"iss", "sub", "aud", "exp", "nbf", "iat", "jti", "kty", "use", "key_ops", "alg", "enc", "zip", "jku", "jwk",
|
|
24
|
-
"kid", "x5u", "x5c", "x5t", "x5t#S256", "
|
|
25
|
-
"
|
|
23
|
+
"kid", "x5u", "x5c", "x5t", "x5t#S256", "x", "y", "d", "n", "e", "p", "q", "dp", "dq", "qi", "oth", "k", "crv",
|
|
24
|
+
"ext", "crit", "keys", "id", "role", "token", "secret", "password", "nonce"
|
|
26
25
|
}
|
|
27
26
|
|
|
28
27
|
def __init__(self, config: Config = None) -> None:
|
|
@@ -33,33 +33,33 @@ class ValuePatternCheck(Filter):
|
|
|
33
33
|
# use non whitespace symbol pattern
|
|
34
34
|
self.pattern = re.compile(fr"(\S)\1{{{str(self.pattern_len - 1)},}}")
|
|
35
35
|
|
|
36
|
-
def equal_pattern_check(self,
|
|
36
|
+
def equal_pattern_check(self, value: str) -> bool:
|
|
37
37
|
"""Check if candidate value contain 4 and more same chars or numbers sequences.
|
|
38
38
|
|
|
39
39
|
Args:
|
|
40
|
-
|
|
40
|
+
value: string variable, credential candidate value
|
|
41
41
|
|
|
42
42
|
Return:
|
|
43
43
|
True if contain and False if not
|
|
44
44
|
|
|
45
45
|
"""
|
|
46
|
-
if self.pattern.findall(
|
|
46
|
+
if self.pattern.findall(value):
|
|
47
47
|
return True
|
|
48
48
|
return False
|
|
49
49
|
|
|
50
|
-
def ascending_pattern_check(self,
|
|
50
|
+
def ascending_pattern_check(self, value: str) -> bool:
|
|
51
51
|
"""Check if candidate value contain 4 and more ascending chars or numbers sequences.
|
|
52
52
|
|
|
53
53
|
Arg:
|
|
54
|
-
|
|
54
|
+
value: credential candidate value
|
|
55
55
|
|
|
56
56
|
Return:
|
|
57
57
|
True if contain and False if not
|
|
58
58
|
|
|
59
59
|
"""
|
|
60
60
|
count = 1
|
|
61
|
-
for key in range(len(
|
|
62
|
-
if ord(
|
|
61
|
+
for key in range(len(value) - 1):
|
|
62
|
+
if ord(value[key + 1]) - ord(value[key]) == 1:
|
|
63
63
|
count += 1
|
|
64
64
|
else:
|
|
65
65
|
count = 1
|
|
@@ -68,19 +68,19 @@ class ValuePatternCheck(Filter):
|
|
|
68
68
|
return True
|
|
69
69
|
return False
|
|
70
70
|
|
|
71
|
-
def descending_pattern_check(self,
|
|
71
|
+
def descending_pattern_check(self, value: str) -> bool:
|
|
72
72
|
"""Check if candidate value contain 4 and more descending chars or numbers sequences.
|
|
73
73
|
|
|
74
74
|
Arg:
|
|
75
|
-
|
|
75
|
+
value: string variable, credential candidate value
|
|
76
76
|
|
|
77
77
|
Return:
|
|
78
78
|
boolean variable. True if contain and False if not
|
|
79
79
|
|
|
80
80
|
"""
|
|
81
81
|
count = 1
|
|
82
|
-
for key in range(len(
|
|
83
|
-
if ord(
|
|
82
|
+
for key in range(len(value) - 1):
|
|
83
|
+
if ord(value[key]) - ord(value[key + 1]) == 1:
|
|
84
84
|
count += 1
|
|
85
85
|
else:
|
|
86
86
|
count = 1
|
|
@@ -89,6 +89,57 @@ class ValuePatternCheck(Filter):
|
|
|
89
89
|
return True
|
|
90
90
|
return False
|
|
91
91
|
|
|
92
|
+
def check_val(self, value: str) -> bool:
|
|
93
|
+
"""Cumulative value check.
|
|
94
|
+
|
|
95
|
+
Arg:
|
|
96
|
+
value: string variable, credential candidate value
|
|
97
|
+
|
|
98
|
+
Return:
|
|
99
|
+
boolean variable. True if contain and False if not
|
|
100
|
+
|
|
101
|
+
"""
|
|
102
|
+
if self.equal_pattern_check(value):
|
|
103
|
+
return True
|
|
104
|
+
if self.ascending_pattern_check(value):
|
|
105
|
+
return True
|
|
106
|
+
if self.descending_pattern_check(value):
|
|
107
|
+
return True
|
|
108
|
+
return False
|
|
109
|
+
|
|
110
|
+
def duple_pattern_check(self, value: str) -> bool:
|
|
111
|
+
"""Check if candidate value is a duplet value with possible patterns.
|
|
112
|
+
|
|
113
|
+
Arg:
|
|
114
|
+
value: string variable, credential candidate value
|
|
115
|
+
|
|
116
|
+
Return:
|
|
117
|
+
boolean variable. True if contain and False if not
|
|
118
|
+
|
|
119
|
+
"""
|
|
120
|
+
# 001122334455... case
|
|
121
|
+
pair_duple = True
|
|
122
|
+
# 0102030405... case
|
|
123
|
+
even_duple = True
|
|
124
|
+
even_prev = value[0]
|
|
125
|
+
even_value = value[0::2]
|
|
126
|
+
# 1020304050... case
|
|
127
|
+
odd_duple = True
|
|
128
|
+
odd_prev = value[1]
|
|
129
|
+
odd_value = value[1::2]
|
|
130
|
+
for even_i, odd_i in zip(even_value, odd_value):
|
|
131
|
+
pair_duple &= even_i == odd_i
|
|
132
|
+
even_duple &= even_i == even_prev
|
|
133
|
+
odd_duple &= odd_i == odd_prev
|
|
134
|
+
if not pair_duple and not even_duple and not odd_duple:
|
|
135
|
+
break
|
|
136
|
+
else:
|
|
137
|
+
if pair_duple or odd_duple:
|
|
138
|
+
return self.check_val(even_value)
|
|
139
|
+
if even_duple:
|
|
140
|
+
return self.check_val(odd_value)
|
|
141
|
+
return False
|
|
142
|
+
|
|
92
143
|
def run(self, line_data: LineData, target: AnalysisTarget) -> bool:
|
|
93
144
|
"""Run filter checks on received credential candidate data 'line_data'.
|
|
94
145
|
|
|
@@ -103,13 +154,10 @@ class ValuePatternCheck(Filter):
|
|
|
103
154
|
if len(line_data.value) < self.pattern_len:
|
|
104
155
|
return True
|
|
105
156
|
|
|
106
|
-
if self.
|
|
107
|
-
return True
|
|
108
|
-
|
|
109
|
-
if self.ascending_pattern_check(line_data.value):
|
|
157
|
+
if self.check_val(line_data.value):
|
|
110
158
|
return True
|
|
111
159
|
|
|
112
|
-
if self.
|
|
160
|
+
if 2 * self.pattern_len <= len(line_data.value) and self.duple_pattern_check(line_data.value):
|
|
113
161
|
return True
|
|
114
162
|
|
|
115
163
|
return False
|
|
@@ -1,3 +1,5 @@
|
|
|
1
|
+
import re
|
|
2
|
+
|
|
1
3
|
from credsweeper.config import Config
|
|
2
4
|
from credsweeper.credentials import LineData
|
|
3
5
|
from credsweeper.file_handler.analysis_target import AnalysisTarget
|
|
@@ -9,6 +11,7 @@ class ValueStringTypeCheck(Filter):
|
|
|
9
11
|
|
|
10
12
|
If it is, then checks if line_data really have string literal declaration.
|
|
11
13
|
Comment rows in source files (start with //, /\*, etc) ignored.
|
|
14
|
+
Multiple bytes scenario allowed [123,23,54,67,78,89] or {0xae, 0x54, 0x55, 0xff}
|
|
12
15
|
|
|
13
16
|
True if:
|
|
14
17
|
|
|
@@ -20,6 +23,8 @@ class ValueStringTypeCheck(Filter):
|
|
|
20
23
|
False otherwise
|
|
21
24
|
"""
|
|
22
25
|
|
|
26
|
+
MULTIBYTE_PATTERN = re.compile(r"(\s*(0x)?[0-9a-f]{1,3}\s*,){8,80}", flags=re.IGNORECASE)
|
|
27
|
+
|
|
23
28
|
def __init__(self, config: Config) -> None:
|
|
24
29
|
self.check_for_literals = config.check_for_literals
|
|
25
30
|
|
|
@@ -37,10 +42,13 @@ class ValueStringTypeCheck(Filter):
|
|
|
37
42
|
if not self.check_for_literals or line_data.url_part:
|
|
38
43
|
return False
|
|
39
44
|
|
|
40
|
-
|
|
41
|
-
|
|
45
|
+
if ValueStringTypeCheck.MULTIBYTE_PATTERN.match(line_data.value):
|
|
46
|
+
return False
|
|
42
47
|
|
|
43
|
-
if line_data.is_source_file_with_quotes()
|
|
48
|
+
if line_data.is_source_file_with_quotes() \
|
|
49
|
+
and not line_data.is_comment() \
|
|
50
|
+
and not line_data.is_well_quoted_value \
|
|
51
|
+
and not line_data.is_quoted \
|
|
44
52
|
and line_data.separator and '=' in line_data.separator:
|
|
45
53
|
# heterogeneous code e.g. YAML in Python uses colon sign instead equals
|
|
46
54
|
return True
|
|
@@ -1,6 +1,5 @@
|
|
|
1
1
|
from typing import Tuple
|
|
2
2
|
|
|
3
|
-
from credsweeper.config import Config
|
|
4
3
|
from credsweeper.filters.value_token_base_check import ValueTokenBaseCheck
|
|
5
4
|
|
|
6
5
|
|
|
@@ -21,9 +20,6 @@ class ValueTokenBase32Check(ValueTokenBaseCheck):
|
|
|
21
20
|
64: ((3.4805990476190476, 0.28572156450556774), (2.035756800745673, 0.18815721535870078)),
|
|
22
21
|
}
|
|
23
22
|
|
|
24
|
-
def __init__(self, config: Config = None) -> None:
|
|
25
|
-
super().__init__(config)
|
|
26
|
-
|
|
27
23
|
@staticmethod
|
|
28
24
|
def get_stat_range(size: int) -> Tuple[Tuple[float, float], Tuple[float, float]]:
|
|
29
25
|
"""Returns minimal, maximal for hop and deviation. Precalculated data is applied for speedup"""
|
|
@@ -1,6 +1,5 @@
|
|
|
1
1
|
from typing import Tuple
|
|
2
2
|
|
|
3
|
-
from credsweeper.config import Config
|
|
4
3
|
from credsweeper.filters.value_token_base_check import ValueTokenBaseCheck
|
|
5
4
|
|
|
6
5
|
|
|
@@ -21,9 +20,6 @@ class ValueTokenBase36Check(ValueTokenBaseCheck):
|
|
|
21
20
|
64: ((3.7190009761904763, 0.30325954360127116), (2.1751172797904093, 0.1942582237461476)),
|
|
22
21
|
}
|
|
23
22
|
|
|
24
|
-
def __init__(self, config: Config = None) -> None:
|
|
25
|
-
super().__init__(config)
|
|
26
|
-
|
|
27
23
|
@staticmethod
|
|
28
24
|
def get_stat_range(size: int) -> Tuple[Tuple[float, float], Tuple[float, float]]:
|
|
29
25
|
"""Returns minimal, maximal for hop and deviation. Precalculated data is applied for speedup"""
|
|
@@ -1,6 +1,5 @@
|
|
|
1
1
|
from typing import Tuple
|
|
2
2
|
|
|
3
|
-
from credsweeper.config import Config
|
|
4
3
|
from credsweeper.filters.value_token_base_check import ValueTokenBaseCheck
|
|
5
4
|
|
|
6
5
|
|
|
@@ -21,9 +20,6 @@ class ValueTokenBase64Check(ValueTokenBaseCheck):
|
|
|
21
20
|
64: ((3.7625271746031745, 0.31733579704946846), (2.257532519514275, 0.20571908142867643)),
|
|
22
21
|
}
|
|
23
22
|
|
|
24
|
-
def __init__(self, config: Config = None) -> None:
|
|
25
|
-
super().__init__(config)
|
|
26
|
-
|
|
27
23
|
@staticmethod
|
|
28
24
|
def get_stat_range(size: int) -> Tuple[Tuple[float, float], Tuple[float, float]]:
|
|
29
25
|
"""Returns minimal, maximal for hop and deviation. Precalculated data is applied for speedup"""
|