credsweeper 1.11.6__py3-none-any.whl → 1.12.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of credsweeper might be problematic. Click here for more details.
- credsweeper/__init__.py +1 -1
- credsweeper/common/keyword_pattern.py +13 -15
- credsweeper/common/morpheme_checklist.txt +2 -0
- credsweeper/config/config.py +2 -2
- credsweeper/credentials/line_data.py +20 -0
- credsweeper/filters/__init__.py +1 -1
- credsweeper/filters/filter.py +3 -1
- credsweeper/filters/group/group.py +22 -31
- credsweeper/filters/group/password_keyword.py +7 -6
- credsweeper/filters/group/token_pattern.py +1 -1
- credsweeper/filters/group/url_credentials_group.py +6 -6
- credsweeper/filters/group/weird_base36_token.py +1 -1
- credsweeper/filters/group/weird_base64_token.py +1 -1
- credsweeper/filters/line_git_binary_check.py +2 -1
- credsweeper/filters/line_specific_key_check.py +2 -1
- credsweeper/filters/line_uue_part_check.py +2 -1
- credsweeper/filters/value_allowlist_check.py +2 -1
- credsweeper/filters/value_array_dictionary_check.py +5 -3
- credsweeper/filters/value_atlassian_token_check.py +2 -1
- credsweeper/filters/value_azure_token_check.py +2 -1
- credsweeper/filters/value_base32_data_check.py +4 -1
- credsweeper/filters/value_base64_data_check.py +2 -1
- credsweeper/filters/value_base64_encoded_pem_check.py +2 -1
- credsweeper/filters/value_base64_key_check.py +2 -1
- credsweeper/filters/value_base64_part_check.py +2 -1
- credsweeper/filters/value_basic_auth_check.py +2 -1
- credsweeper/filters/value_blocklist_check.py +3 -1
- credsweeper/filters/value_camel_case_check.py +2 -1
- credsweeper/filters/value_couple_keyword_check.py +3 -1
- credsweeper/filters/value_dictionary_keyword_check.py +3 -1
- credsweeper/filters/value_discord_bot_check.py +2 -1
- credsweeper/filters/value_entropy_base_check.py +2 -1
- credsweeper/filters/value_file_path_check.py +3 -1
- credsweeper/filters/value_github_check.py +2 -1
- credsweeper/filters/value_grafana_check.py +2 -1
- credsweeper/filters/value_grafana_service_check.py +2 -1
- credsweeper/filters/value_hex_number_check.py +2 -1
- credsweeper/filters/value_jfrog_token_check.py +2 -1
- credsweeper/filters/value_json_web_key_check.py +2 -1
- credsweeper/filters/value_json_web_token_check.py +2 -1
- credsweeper/filters/value_last_word_check.py +3 -1
- credsweeper/filters/{value_dictionary_value_length_check.py → value_length_check.py} +9 -3
- credsweeper/filters/value_method_check.py +2 -1
- credsweeper/filters/value_not_allowed_pattern_check.py +2 -1
- credsweeper/filters/value_not_part_encoded_check.py +1 -1
- credsweeper/filters/value_number_check.py +2 -1
- credsweeper/filters/value_pattern_check.py +58 -38
- credsweeper/filters/value_similarity_check.py +3 -1
- credsweeper/filters/value_split_keyword_check.py +2 -1
- credsweeper/filters/value_string_type_check.py +6 -4
- credsweeper/filters/value_token_base_check.py +2 -1
- credsweeper/filters/value_token_check.py +3 -2
- credsweeper/ml_model/features/__init__.py +1 -0
- credsweeper/ml_model/features/morpheme_dense.py +14 -7
- credsweeper/ml_model/features/rule_severity.py +21 -0
- credsweeper/ml_model/features/word_in_path.py +4 -1
- credsweeper/ml_model/ml_config.json +130 -24
- credsweeper/ml_model/ml_model.onnx +0 -0
- credsweeper/rules/config.yaml +50 -23
- credsweeper/scanner/scanner.py +4 -3
- credsweeper/secret/config.json +2 -2
- credsweeper/utils/util.py +2 -0
- {credsweeper-1.11.6.dist-info → credsweeper-1.12.0.dist-info}/METADATA +1 -1
- {credsweeper-1.11.6.dist-info → credsweeper-1.12.0.dist-info}/RECORD +67 -66
- {credsweeper-1.11.6.dist-info → credsweeper-1.12.0.dist-info}/WHEEL +0 -0
- {credsweeper-1.11.6.dist-info → credsweeper-1.12.0.dist-info}/entry_points.txt +0 -0
- {credsweeper-1.11.6.dist-info → credsweeper-1.12.0.dist-info}/licenses/LICENSE +0 -0
|
@@ -1,3 +1,5 @@
|
|
|
1
|
+
from typing import Optional
|
|
2
|
+
|
|
1
3
|
from credsweeper.common import static_keyword_checklist
|
|
2
4
|
from credsweeper.common.constants import Chars
|
|
3
5
|
from credsweeper.config.config import Config
|
|
@@ -18,7 +20,7 @@ class ValueFilePathCheck(Filter):
|
|
|
18
20
|
unusual_windows_symbols_in_path = "\t\n\r!$@`&*(){}<>+=;,~^"
|
|
19
21
|
unusual_linux_symbols_in_path = "\t\n\r!@`&*<>+=;,~^:\\"
|
|
20
22
|
|
|
21
|
-
def __init__(self, config: Config = None) -> None:
|
|
23
|
+
def __init__(self, config: Optional[Config] = None) -> None:
|
|
22
24
|
pass
|
|
23
25
|
|
|
24
26
|
def run(self, line_data: LineData, target: AnalysisTarget) -> bool:
|
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
import binascii
|
|
2
2
|
import contextlib
|
|
3
|
+
from typing import Optional
|
|
3
4
|
|
|
4
5
|
import base62
|
|
5
6
|
|
|
@@ -13,7 +14,7 @@ from credsweeper.filters.filter import Filter
|
|
|
13
14
|
class ValueGitHubCheck(Filter):
|
|
14
15
|
"""GitHub Classic Token validation"""
|
|
15
16
|
|
|
16
|
-
def __init__(self, config: Config = None) -> None:
|
|
17
|
+
def __init__(self, config: Optional[Config] = None) -> None:
|
|
17
18
|
pass
|
|
18
19
|
|
|
19
20
|
def run(self, line_data: LineData, target: AnalysisTarget) -> bool:
|
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
import contextlib
|
|
2
2
|
import json
|
|
3
|
+
from typing import Optional
|
|
3
4
|
|
|
4
5
|
from credsweeper.config.config import Config
|
|
5
6
|
from credsweeper.credentials.line_data import LineData
|
|
@@ -11,7 +12,7 @@ from credsweeper.utils.util import Util
|
|
|
11
12
|
class ValueGrafanaCheck(Filter):
|
|
12
13
|
"""Grafana Provisioned API Key and Access Policy Token"""
|
|
13
14
|
|
|
14
|
-
def __init__(self, config: Config = None) -> None:
|
|
15
|
+
def __init__(self, config: Optional[Config] = None) -> None:
|
|
15
16
|
pass
|
|
16
17
|
|
|
17
18
|
def run(self, line_data: LineData, target: AnalysisTarget) -> bool:
|
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
import binascii
|
|
2
2
|
import contextlib
|
|
3
3
|
import struct
|
|
4
|
+
from typing import Optional
|
|
4
5
|
|
|
5
6
|
from credsweeper.common.constants import ASCII
|
|
6
7
|
from credsweeper.config.config import Config
|
|
@@ -12,7 +13,7 @@ from credsweeper.filters.filter import Filter
|
|
|
12
13
|
class ValueGrafanaServiceCheck(Filter):
|
|
13
14
|
"""Check that candidate have a known structure"""
|
|
14
15
|
|
|
15
|
-
def __init__(self, config: Config = None) -> None:
|
|
16
|
+
def __init__(self, config: Optional[Config] = None) -> None:
|
|
16
17
|
pass
|
|
17
18
|
|
|
18
19
|
def run(self, line_data: LineData, target: AnalysisTarget) -> bool:
|
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
import re
|
|
2
|
+
from typing import Optional
|
|
2
3
|
|
|
3
4
|
from credsweeper.config.config import Config
|
|
4
5
|
from credsweeper.credentials.line_data import LineData
|
|
@@ -11,7 +12,7 @@ class ValueHexNumberCheck(Filter):
|
|
|
11
12
|
|
|
12
13
|
HEX_08_64_VALUE_REGEX = re.compile(r"^0x[0-9a-f]{1,16}$")
|
|
13
14
|
|
|
14
|
-
def __init__(self, config: Config = None) -> None:
|
|
15
|
+
def __init__(self, config: Optional[Config] = None) -> None:
|
|
15
16
|
pass
|
|
16
17
|
|
|
17
18
|
def run(self, line_data: LineData, target: AnalysisTarget) -> bool:
|
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
import contextlib
|
|
2
2
|
import re
|
|
3
|
+
from typing import Optional
|
|
3
4
|
|
|
4
5
|
import base58
|
|
5
6
|
|
|
@@ -14,7 +15,7 @@ from credsweeper.utils.util import Util
|
|
|
14
15
|
class ValueJfrogTokenCheck(Filter):
|
|
15
16
|
"""Check that candidate have a known structure JFROG token"""
|
|
16
17
|
|
|
17
|
-
def __init__(self, config: Config = None) -> None:
|
|
18
|
+
def __init__(self, config: Optional[Config] = None) -> None:
|
|
18
19
|
# reftkn:01:0123456789:abcdefGhijklmnoPqrstuVwxyz0
|
|
19
20
|
self._pattern = re.compile(r"reftkn:\d+:\d+:[\w_/+-]+")
|
|
20
21
|
|
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
import contextlib
|
|
2
|
+
from typing import Optional
|
|
2
3
|
|
|
3
4
|
from credsweeper.config.config import Config
|
|
4
5
|
from credsweeper.credentials.line_data import LineData
|
|
@@ -15,7 +16,7 @@ class ValueJsonWebKeyCheck(Filter):
|
|
|
15
16
|
https://datatracker.ietf.org/doc/html/rfc7518
|
|
16
17
|
"""
|
|
17
18
|
|
|
18
|
-
def __init__(self, config: Config = None) -> None:
|
|
19
|
+
def __init__(self, config: Optional[Config] = None) -> None:
|
|
19
20
|
pass
|
|
20
21
|
|
|
21
22
|
def run(self, line_data: LineData, target: AnalysisTarget) -> bool:
|
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
import contextlib
|
|
2
2
|
import json
|
|
3
|
+
from typing import Optional
|
|
3
4
|
|
|
4
5
|
from credsweeper.config.config import Config
|
|
5
6
|
from credsweeper.credentials.line_data import LineData
|
|
@@ -24,7 +25,7 @@ class ValueJsonWebTokenCheck(Filter):
|
|
|
24
25
|
"ext", "crit", "keys", "id", "role", "token", "secret", "password", "nonce"
|
|
25
26
|
}
|
|
26
27
|
|
|
27
|
-
def __init__(self, config: Config = None) -> None:
|
|
28
|
+
def __init__(self, config: Optional[Config] = None) -> None:
|
|
28
29
|
pass
|
|
29
30
|
|
|
30
31
|
def run(self, line_data: LineData, target: AnalysisTarget) -> bool:
|
|
@@ -1,3 +1,5 @@
|
|
|
1
|
+
from typing import Optional
|
|
2
|
+
|
|
1
3
|
from credsweeper.config.config import Config
|
|
2
4
|
from credsweeper.credentials.line_data import LineData
|
|
3
5
|
from credsweeper.file_handler.analysis_target import AnalysisTarget
|
|
@@ -7,7 +9,7 @@ from credsweeper.filters.filter import Filter
|
|
|
7
9
|
class ValueLastWordCheck(Filter):
|
|
8
10
|
"""Check that secret is not short value that ends with `:`."""
|
|
9
11
|
|
|
10
|
-
def __init__(self, config: Config = None) -> None:
|
|
12
|
+
def __init__(self, config: Optional[Config] = None) -> None:
|
|
11
13
|
pass
|
|
12
14
|
|
|
13
15
|
def run(self, line_data: LineData, target: AnalysisTarget) -> bool:
|
|
@@ -1,13 +1,19 @@
|
|
|
1
|
+
from typing import Optional
|
|
2
|
+
|
|
3
|
+
from credsweeper.common.constants import MIN_VALUE_LENGTH, MAX_LINE_LENGTH
|
|
1
4
|
from credsweeper.config.config import Config
|
|
2
5
|
from credsweeper.credentials.line_data import LineData
|
|
3
6
|
from credsweeper.file_handler.analysis_target import AnalysisTarget
|
|
4
7
|
from credsweeper.filters.filter import Filter
|
|
5
8
|
|
|
6
9
|
|
|
7
|
-
class
|
|
8
|
-
"""Check that candidate length is between
|
|
10
|
+
class ValueLengthCheck(Filter):
|
|
11
|
+
"""Check that candidate value length is between MIN and MAX."""
|
|
9
12
|
|
|
10
|
-
def __init__(self,
|
|
13
|
+
def __init__(self,
|
|
14
|
+
config: Optional[Config] = None,
|
|
15
|
+
min_len: int = MIN_VALUE_LENGTH,
|
|
16
|
+
max_len: int = MAX_LINE_LENGTH) -> None:
|
|
11
17
|
self.min_len = min_len
|
|
12
18
|
self.max_len = max_len
|
|
13
19
|
|
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
import re
|
|
2
|
+
from typing import Optional
|
|
2
3
|
|
|
3
4
|
from credsweeper.config.config import Config
|
|
4
5
|
from credsweeper.credentials.line_data import LineData
|
|
@@ -14,7 +15,7 @@ class ValueMethodCheck(Filter):
|
|
|
14
15
|
|
|
15
16
|
PATTERN = re.compile(r"^[~.\->:0-9A-Za-z_]+\(.*\)")
|
|
16
17
|
|
|
17
|
-
def __init__(self, config: Config = None) -> None:
|
|
18
|
+
def __init__(self, config: Optional[Config] = None) -> None:
|
|
18
19
|
pass
|
|
19
20
|
|
|
20
21
|
def run(self, line_data: LineData, target: AnalysisTarget) -> bool:
|
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
import re
|
|
2
|
+
from typing import Optional
|
|
2
3
|
|
|
3
4
|
from credsweeper.config.config import Config
|
|
4
5
|
from credsweeper.credentials.line_data import LineData
|
|
@@ -15,7 +16,7 @@ class ValueNotAllowedPatternCheck(Filter):
|
|
|
15
16
|
f"{Util.get_regex_combine_or(NOT_ALLOWED)}$", #
|
|
16
17
|
flags=re.IGNORECASE)
|
|
17
18
|
|
|
18
|
-
def __init__(self, config: Config = None) -> None:
|
|
19
|
+
def __init__(self, config: Optional[Config] = None) -> None:
|
|
19
20
|
pass
|
|
20
21
|
|
|
21
22
|
def run(self, line_data: LineData, target: AnalysisTarget) -> bool:
|
|
@@ -16,7 +16,7 @@ class ValueNotPartEncodedCheck(Filter):
|
|
|
16
16
|
BASE64_ENCODED_DATA_PATTERN_AFTER = re.compile(
|
|
17
17
|
r"(^|[^A-Za-z0-9]+)(?P<val>(([A-Za-z0-9=_-]{4}){4,64})|(([A-Za-z0-9=+/]{4}){4,64}))([^=A-Za-z0-9]+|$)")
|
|
18
18
|
|
|
19
|
-
def __init__(self, config: Config = None) -> None:
|
|
19
|
+
def __init__(self, config: Optional[Config] = None) -> None:
|
|
20
20
|
pass
|
|
21
21
|
|
|
22
22
|
@staticmethod
|
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
import re
|
|
2
|
+
from typing import Optional
|
|
2
3
|
|
|
3
4
|
from credsweeper.config.config import Config
|
|
4
5
|
from credsweeper.credentials.line_data import LineData
|
|
@@ -12,7 +13,7 @@ class ValueNumberCheck(Filter):
|
|
|
12
13
|
HEX_VALUE_REGEX = re.compile("^(0x)?[0-9a-f]{1,128}[ul]{0,3}$")
|
|
13
14
|
DEC_VALUE_REGEX = re.compile("^-?[0-9]{1,20}[ul]{0,3}$")
|
|
14
15
|
|
|
15
|
-
def __init__(self, config: Config = None) -> None:
|
|
16
|
+
def __init__(self, config: Optional[Config] = None) -> None:
|
|
16
17
|
pass
|
|
17
18
|
|
|
18
19
|
def run(self, line_data: LineData, target: AnalysisTarget) -> bool:
|
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
import re
|
|
2
|
+
from typing import Optional
|
|
2
3
|
|
|
3
|
-
from credsweeper.common.constants import DEFAULT_PATTERN_LEN
|
|
4
|
+
from credsweeper.common.constants import DEFAULT_PATTERN_LEN, MAX_LINE_LENGTH
|
|
4
5
|
from credsweeper.config.config import Config
|
|
5
6
|
from credsweeper.credentials.line_data import LineData
|
|
6
7
|
from credsweeper.file_handler.analysis_target import AnalysisTarget
|
|
@@ -22,36 +23,60 @@ class ValuePatternCheck(Filter):
|
|
|
22
23
|
Default pattern LEN is 4
|
|
23
24
|
"""
|
|
24
25
|
|
|
25
|
-
|
|
26
|
+
MAX_PATTERN_LENGTH = int(MAX_LINE_LENGTH).bit_length()
|
|
27
|
+
|
|
28
|
+
def __init__(self, config: Optional[Config] = None, pattern_len: Optional[int] = None):
|
|
26
29
|
"""Create ValuePatternCheck with a specific pattern_len to check.
|
|
27
30
|
|
|
28
31
|
Args:
|
|
29
32
|
config: pattern len to use during check. DEFAULT_PATTERN_LEN by default
|
|
33
|
+
pattern_len: size of constant pattern length for any value size or None for dynamic pattern size
|
|
30
34
|
|
|
31
35
|
"""
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
36
|
+
patterns_count = 1 + ValuePatternCheck.MAX_PATTERN_LENGTH
|
|
37
|
+
if pattern_len is None:
|
|
38
|
+
self.pattern_len = -1
|
|
39
|
+
# pattern length depends on value length
|
|
40
|
+
self.pattern_lengths = [max(x, DEFAULT_PATTERN_LEN) for x in range(patterns_count)]
|
|
41
|
+
self.patterns = [ValuePatternCheck.get_pattern(x) for x in range(patterns_count)]
|
|
42
|
+
elif isinstance(pattern_len, int) and DEFAULT_PATTERN_LEN <= pattern_len:
|
|
43
|
+
self.pattern_len = pattern_len
|
|
44
|
+
# constant pattern for any value length
|
|
45
|
+
self.pattern_lengths = [pattern_len] * patterns_count
|
|
46
|
+
self.patterns = [ValuePatternCheck.get_pattern(pattern_len)] * patterns_count
|
|
47
|
+
else:
|
|
48
|
+
raise ValueError(f"Wrong type of pattern length {type(pattern_len)} = {repr(pattern_len)}")
|
|
49
|
+
|
|
50
|
+
@staticmethod
|
|
51
|
+
def get_pattern(pattern_len: int) -> re.Pattern:
|
|
52
|
+
"""Creates regex pattern to find N or more identical characters in sequence"""
|
|
53
|
+
if DEFAULT_PATTERN_LEN < pattern_len:
|
|
54
|
+
pattern = fr"(\S)\1{{{str(pattern_len - 1)},}}"
|
|
55
|
+
else:
|
|
56
|
+
pattern = r"(\S)\1{3,}"
|
|
57
|
+
return re.compile(pattern)
|
|
35
58
|
|
|
36
|
-
def equal_pattern_check(self, value: str) -> bool:
|
|
59
|
+
def equal_pattern_check(self, value: str, bit_length: int) -> bool:
|
|
37
60
|
"""Check if candidate value contain 4 and more same chars or numbers sequences.
|
|
38
61
|
|
|
39
62
|
Args:
|
|
40
63
|
value: string variable, credential candidate value
|
|
64
|
+
bit_length: speedup for len(value).bit_length()
|
|
41
65
|
|
|
42
66
|
Return:
|
|
43
67
|
True if contain and False if not
|
|
44
68
|
|
|
45
69
|
"""
|
|
46
|
-
if self.
|
|
70
|
+
if self.patterns[bit_length].findall(value):
|
|
47
71
|
return True
|
|
48
72
|
return False
|
|
49
73
|
|
|
50
|
-
def ascending_pattern_check(self, value: str) -> bool:
|
|
74
|
+
def ascending_pattern_check(self, value: str, bit_length: int) -> bool:
|
|
51
75
|
"""Check if candidate value contain 4 and more ascending chars or numbers sequences.
|
|
52
76
|
|
|
53
77
|
Arg:
|
|
54
78
|
value: credential candidate value
|
|
79
|
+
bit_length: speedup for len(value).bit_length()
|
|
55
80
|
|
|
56
81
|
Return:
|
|
57
82
|
True if contain and False if not
|
|
@@ -64,15 +89,16 @@ class ValuePatternCheck(Filter):
|
|
|
64
89
|
else:
|
|
65
90
|
count = 1
|
|
66
91
|
continue
|
|
67
|
-
if count == self.
|
|
92
|
+
if count == self.pattern_lengths[bit_length]:
|
|
68
93
|
return True
|
|
69
94
|
return False
|
|
70
95
|
|
|
71
|
-
def descending_pattern_check(self, value: str) -> bool:
|
|
96
|
+
def descending_pattern_check(self, value: str, bit_length: int) -> bool:
|
|
72
97
|
"""Check if candidate value contain 4 and more descending chars or numbers sequences.
|
|
73
98
|
|
|
74
99
|
Arg:
|
|
75
100
|
value: string variable, credential candidate value
|
|
101
|
+
bit_length: speedup for len(value).bit_length()
|
|
76
102
|
|
|
77
103
|
Return:
|
|
78
104
|
boolean variable. True if contain and False if not
|
|
@@ -85,59 +111,44 @@ class ValuePatternCheck(Filter):
|
|
|
85
111
|
else:
|
|
86
112
|
count = 1
|
|
87
113
|
continue
|
|
88
|
-
if count == self.
|
|
114
|
+
if count == self.pattern_lengths[bit_length]:
|
|
89
115
|
return True
|
|
90
116
|
return False
|
|
91
117
|
|
|
92
|
-
def check_val(self, value: str) -> bool:
|
|
118
|
+
def check_val(self, value: str, bit_length: int) -> bool:
|
|
93
119
|
"""Cumulative value check.
|
|
94
120
|
|
|
95
121
|
Arg:
|
|
96
122
|
value: string variable, credential candidate value
|
|
123
|
+
bit_length: speedup for len(value).bit_length()
|
|
97
124
|
|
|
98
125
|
Return:
|
|
99
126
|
boolean variable. True if contain and False if not
|
|
100
127
|
|
|
101
128
|
"""
|
|
102
|
-
if self.equal_pattern_check(value):
|
|
129
|
+
if self.equal_pattern_check(value, bit_length):
|
|
103
130
|
return True
|
|
104
|
-
if self.ascending_pattern_check(value):
|
|
131
|
+
if self.ascending_pattern_check(value, bit_length):
|
|
105
132
|
return True
|
|
106
|
-
if self.descending_pattern_check(value):
|
|
133
|
+
if self.descending_pattern_check(value, bit_length):
|
|
107
134
|
return True
|
|
108
135
|
return False
|
|
109
136
|
|
|
110
|
-
def duple_pattern_check(self, value: str) -> bool:
|
|
137
|
+
def duple_pattern_check(self, value: str, bit_length: int) -> bool:
|
|
111
138
|
"""Check if candidate value is a duplet value with possible patterns.
|
|
112
139
|
|
|
113
140
|
Arg:
|
|
114
141
|
value: string variable, credential candidate value
|
|
142
|
+
bit_length: speedup for len(value).bit_length()
|
|
115
143
|
|
|
116
144
|
Return:
|
|
117
145
|
boolean variable. True if contain and False if not
|
|
118
146
|
|
|
119
147
|
"""
|
|
120
|
-
# 001122334455... case
|
|
121
|
-
pair_duple = True
|
|
122
|
-
# 0102030405... case
|
|
123
|
-
even_duple = True
|
|
124
|
-
even_prev = value[0]
|
|
125
148
|
even_value = value[0::2]
|
|
126
|
-
# 1020304050... case
|
|
127
|
-
odd_duple = True
|
|
128
|
-
odd_prev = value[1]
|
|
129
149
|
odd_value = value[1::2]
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
even_duple &= even_i == even_prev
|
|
133
|
-
odd_duple &= odd_i == odd_prev
|
|
134
|
-
if not pair_duple and not even_duple and not odd_duple:
|
|
135
|
-
break
|
|
136
|
-
else:
|
|
137
|
-
if pair_duple or odd_duple:
|
|
138
|
-
return self.check_val(even_value)
|
|
139
|
-
if even_duple:
|
|
140
|
-
return self.check_val(odd_value)
|
|
150
|
+
if self.check_val(even_value, bit_length) and self.check_val(odd_value, bit_length):
|
|
151
|
+
return True
|
|
141
152
|
return False
|
|
142
153
|
|
|
143
154
|
def run(self, line_data: LineData, target: AnalysisTarget) -> bool:
|
|
@@ -151,13 +162,22 @@ class ValuePatternCheck(Filter):
|
|
|
151
162
|
boolean variable. True, if need to filter candidate and False if left
|
|
152
163
|
|
|
153
164
|
"""
|
|
154
|
-
|
|
165
|
+
value_length = len(line_data.value)
|
|
166
|
+
bit_length = max(DEFAULT_PATTERN_LEN, value_length.bit_length())
|
|
167
|
+
|
|
168
|
+
if ValuePatternCheck.MAX_PATTERN_LENGTH < bit_length:
|
|
169
|
+
# huge values may contain anything
|
|
170
|
+
return False
|
|
171
|
+
|
|
172
|
+
if 0 <= value_length < self.pattern_len or value_length < self.pattern_lengths[bit_length]:
|
|
173
|
+
# too short value
|
|
155
174
|
return True
|
|
156
175
|
|
|
157
|
-
if self.check_val(line_data.value):
|
|
176
|
+
if self.check_val(line_data.value, bit_length):
|
|
158
177
|
return True
|
|
159
178
|
|
|
160
|
-
if 2 * self.
|
|
179
|
+
if 2 * self.pattern_lengths[bit_length] <= value_length \
|
|
180
|
+
and self.duple_pattern_check(line_data.value, bit_length):
|
|
161
181
|
return True
|
|
162
182
|
|
|
163
183
|
return False
|
|
@@ -1,3 +1,5 @@
|
|
|
1
|
+
from typing import Optional
|
|
2
|
+
|
|
1
3
|
from credsweeper.config.config import Config
|
|
2
4
|
from credsweeper.credentials.line_data import LineData
|
|
3
5
|
from credsweeper.file_handler.analysis_target import AnalysisTarget
|
|
@@ -7,7 +9,7 @@ from credsweeper.filters.filter import Filter
|
|
|
7
9
|
class ValueSimilarityCheck(Filter):
|
|
8
10
|
"""Check if candidate value is at least 70% same as candidate keyword. Like: `secret = "mysecret"`."""
|
|
9
11
|
|
|
10
|
-
def __init__(self, config: Config = None) -> None:
|
|
12
|
+
def __init__(self, config: Optional[Config] = None) -> None:
|
|
11
13
|
pass
|
|
12
14
|
|
|
13
15
|
def run(self, line_data: LineData, target: AnalysisTarget) -> bool:
|
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
from typing import Optional
|
|
1
2
|
from typing import Union
|
|
2
3
|
|
|
3
4
|
from credsweeper.common import static_keyword_checklist
|
|
@@ -10,7 +11,7 @@ from credsweeper.filters.filter import Filter
|
|
|
10
11
|
class ValueSplitKeywordCheck(Filter):
|
|
11
12
|
"""Check value by splitting with standard whitespace separators and any word is not matched in checklist."""
|
|
12
13
|
|
|
13
|
-
def __init__(self, config: Config = None) -> None:
|
|
14
|
+
def __init__(self, config: Optional[Config] = None) -> None:
|
|
14
15
|
pass
|
|
15
16
|
|
|
16
17
|
def run(self, line_data: LineData, target: AnalysisTarget) -> bool:
|
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
import re
|
|
2
|
+
from typing import Optional
|
|
2
3
|
|
|
3
4
|
from credsweeper.config.config import Config
|
|
4
5
|
from credsweeper.credentials.line_data import LineData
|
|
@@ -23,10 +24,11 @@ class ValueStringTypeCheck(Filter):
|
|
|
23
24
|
False otherwise
|
|
24
25
|
"""
|
|
25
26
|
|
|
26
|
-
MULTIBYTE_PATTERN = re.compile(r"(
|
|
27
|
+
MULTIBYTE_PATTERN = re.compile(r"((0x)?[0-9a-f]{1,16}[UL]*)(\s*,\s*((0x)?[0-9a-f]{1,16}[UL]*)){3}",
|
|
28
|
+
flags=re.IGNORECASE)
|
|
27
29
|
|
|
28
|
-
def __init__(self, config: Config) -> None:
|
|
29
|
-
self.check_for_literals =
|
|
30
|
+
def __init__(self, config: Optional[Config] = None, check_for_literals=True) -> None:
|
|
31
|
+
self.check_for_literals = check_for_literals
|
|
30
32
|
|
|
31
33
|
def run(self, line_data: LineData, target: AnalysisTarget) -> bool:
|
|
32
34
|
"""Run filter checks on received credential candidate data 'line_data'.
|
|
@@ -42,7 +44,7 @@ class ValueStringTypeCheck(Filter):
|
|
|
42
44
|
if not self.check_for_literals or line_data.url_part:
|
|
43
45
|
return False
|
|
44
46
|
|
|
45
|
-
if ValueStringTypeCheck.MULTIBYTE_PATTERN.
|
|
47
|
+
if ValueStringTypeCheck.MULTIBYTE_PATTERN.search(line_data.value):
|
|
46
48
|
return False
|
|
47
49
|
|
|
48
50
|
if line_data.is_source_file_with_quotes() \
|
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
import contextlib
|
|
2
2
|
from abc import abstractmethod
|
|
3
|
+
from typing import Optional
|
|
3
4
|
from typing import Tuple
|
|
4
5
|
|
|
5
6
|
from credsweeper.config.config import Config
|
|
@@ -26,7 +27,7 @@ class ValueTokenBaseCheck(Filter):
|
|
|
26
27
|
64: 2.15981241,
|
|
27
28
|
}
|
|
28
29
|
|
|
29
|
-
def __init__(self, config: Config = None) -> None:
|
|
30
|
+
def __init__(self, config: Optional[Config] = None) -> None:
|
|
30
31
|
self.__hop_stat = HopStat()
|
|
31
32
|
|
|
32
33
|
@staticmethod
|
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
import re
|
|
2
|
+
from typing import Optional
|
|
2
3
|
|
|
3
4
|
from credsweeper.config.config import Config
|
|
4
5
|
from credsweeper.credentials.line_data import LineData
|
|
@@ -17,9 +18,9 @@ class ValueTokenCheck(Filter):
|
|
|
17
18
|
|
|
18
19
|
"""
|
|
19
20
|
|
|
20
|
-
SPLIT_PATTERN = r"(
|
|
21
|
+
SPLIT_PATTERN = re.compile(r"(?<!\W) (?!\W)|[;(){}<>[\]`]")
|
|
21
22
|
|
|
22
|
-
def __init__(self, config: Config = None) -> None:
|
|
23
|
+
def __init__(self, config: Optional[Config] = None) -> None:
|
|
23
24
|
pass
|
|
24
25
|
|
|
25
26
|
def run(self, line_data: LineData, target: AnalysisTarget) -> bool:
|
|
@@ -5,6 +5,7 @@ from credsweeper.ml_model.features.is_secret_numeric import IsSecretNumeric
|
|
|
5
5
|
from credsweeper.ml_model.features.length_of_attribute import LengthOfAttribute
|
|
6
6
|
from credsweeper.ml_model.features.morpheme_dense import MorphemeDense
|
|
7
7
|
from credsweeper.ml_model.features.rule_name import RuleName
|
|
8
|
+
from credsweeper.ml_model.features.rule_severity import RuleSeverity
|
|
8
9
|
from credsweeper.ml_model.features.search_in_attribute import SearchInAttribute
|
|
9
10
|
from credsweeper.ml_model.features.word_in_path import WordInPath
|
|
10
11
|
from credsweeper.ml_model.features.word_in_postamble import WordInPostamble
|
|
@@ -7,13 +7,20 @@ class MorphemeDense(Feature):
|
|
|
7
7
|
"""Feature calculates morphemes density for a value"""
|
|
8
8
|
|
|
9
9
|
def extract(self, candidate: Candidate) -> float:
|
|
10
|
+
density = 0.0
|
|
10
11
|
if value := candidate.line_data_list[0].value.lower():
|
|
11
|
-
|
|
12
|
+
morphemes_length = 0
|
|
12
13
|
for morpheme in static_keyword_checklist.morpheme_set:
|
|
13
|
-
|
|
14
|
-
|
|
14
|
+
morpheme_pos = value.find(morpheme)
|
|
15
|
+
if 0 <= morpheme_pos:
|
|
16
|
+
morpheme_len = len(morpheme)
|
|
17
|
+
while 0 <= morpheme_pos:
|
|
18
|
+
morphemes_length += morpheme_len
|
|
19
|
+
morpheme_pos += morpheme_len
|
|
20
|
+
morpheme_pos = value.find(morpheme, morpheme_pos)
|
|
15
21
|
# normalization: minimal morpheme length is 3
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
22
|
+
density = morphemes_length / len(value)
|
|
23
|
+
if 1.0 < density:
|
|
24
|
+
# overlap morpheme case
|
|
25
|
+
density = 1.0
|
|
26
|
+
return density
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
from credsweeper.common.constants import Severity
|
|
2
|
+
from credsweeper.credentials.candidate import Candidate
|
|
3
|
+
from credsweeper.ml_model.features.feature import Feature
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
class RuleSeverity(Feature):
|
|
7
|
+
"""Categorical feature that corresponds to rule name."""
|
|
8
|
+
|
|
9
|
+
def extract(self, candidate: Candidate) -> float:
|
|
10
|
+
if Severity.CRITICAL == candidate.severity:
|
|
11
|
+
return 1.0
|
|
12
|
+
elif Severity.HIGH == candidate.severity:
|
|
13
|
+
return 0.75
|
|
14
|
+
elif Severity.MEDIUM == candidate.severity:
|
|
15
|
+
return 0.5
|
|
16
|
+
elif Severity.LOW == candidate.severity:
|
|
17
|
+
return 0.25
|
|
18
|
+
elif Severity.INFO == candidate.severity:
|
|
19
|
+
return 0.0
|
|
20
|
+
else:
|
|
21
|
+
raise ValueError(f"Unknown type of severity: {candidate.severity}")
|
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
import os.path
|
|
1
2
|
from pathlib import Path
|
|
2
3
|
from typing import List, Any
|
|
3
4
|
|
|
@@ -16,7 +17,9 @@ class WordInPath(WordIn):
|
|
|
16
17
|
path = Path(file_path)
|
|
17
18
|
# apply ./ for normalised path to detect "/src" for relative path
|
|
18
19
|
posix_lower_path = path.as_posix().lower() if path.is_absolute() else f"./{path.as_posix().lower()}"
|
|
19
|
-
|
|
20
|
+
# prevent extra confusion from the same word in extension
|
|
21
|
+
path_without_extension, _ = os.path.splitext(posix_lower_path)
|
|
22
|
+
return self.word_in_str(path_without_extension)
|
|
20
23
|
else:
|
|
21
24
|
return np.array([np.zeros(shape=[self.dimension], dtype=np.int8)])
|
|
22
25
|
|