credsweeper 1.11.5__py3-none-any.whl → 1.13.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of credsweeper might be problematic. Click here for more details.
- credsweeper/__init__.py +21 -15
- credsweeper/__main__.py +158 -42
- credsweeper/app.py +18 -13
- credsweeper/common/keyword_pattern.py +19 -18
- credsweeper/common/morpheme_checklist.txt +28 -6
- credsweeper/config/__init__.py +0 -1
- credsweeper/config/config.py +4 -3
- credsweeper/credentials/__init__.py +0 -5
- credsweeper/credentials/augment_candidates.py +1 -1
- credsweeper/credentials/candidate.py +1 -1
- credsweeper/credentials/credential_manager.py +1 -1
- credsweeper/credentials/line_data.py +43 -8
- credsweeper/deep_scanner/__init__.py +0 -1
- credsweeper/deep_scanner/abstract_scanner.py +4 -3
- credsweeper/deep_scanner/byte_scanner.py +1 -1
- credsweeper/deep_scanner/bzip2_scanner.py +2 -2
- credsweeper/deep_scanner/csv_scanner.py +71 -0
- credsweeper/deep_scanner/deb_scanner.py +1 -1
- credsweeper/deep_scanner/deep_scanner.py +22 -12
- credsweeper/deep_scanner/docx_scanner.py +1 -1
- credsweeper/deep_scanner/eml_scanner.py +1 -1
- credsweeper/deep_scanner/encoder_scanner.py +1 -1
- credsweeper/deep_scanner/gzip_scanner.py +2 -2
- credsweeper/deep_scanner/html_scanner.py +1 -1
- credsweeper/deep_scanner/jclass_scanner.py +1 -1
- credsweeper/deep_scanner/jks_scanner.py +12 -3
- credsweeper/deep_scanner/lang_scanner.py +1 -1
- credsweeper/deep_scanner/lzma_scanner.py +2 -2
- credsweeper/deep_scanner/mxfile_scanner.py +1 -1
- credsweeper/deep_scanner/pdf_scanner.py +1 -1
- credsweeper/deep_scanner/pkcs_scanner.py +6 -2
- credsweeper/deep_scanner/pptx_scanner.py +1 -1
- credsweeper/deep_scanner/rpm_scanner.py +1 -1
- credsweeper/deep_scanner/rtf_scanner.py +41 -0
- credsweeper/deep_scanner/strings_scanner.py +52 -0
- credsweeper/deep_scanner/tar_scanner.py +2 -2
- credsweeper/deep_scanner/tmx_scanner.py +2 -2
- credsweeper/deep_scanner/xlsx_scanner.py +2 -2
- credsweeper/deep_scanner/xml_scanner.py +1 -1
- credsweeper/deep_scanner/zip_scanner.py +2 -2
- credsweeper/file_handler/__init__.py +0 -15
- credsweeper/file_handler/abstract_provider.py +3 -4
- credsweeper/file_handler/byte_content_provider.py +11 -2
- credsweeper/file_handler/content_provider.py +1 -1
- credsweeper/file_handler/data_content_provider.py +1 -1
- credsweeper/file_handler/diff_content_provider.py +133 -3
- credsweeper/file_handler/file_path_extractor.py +4 -2
- credsweeper/file_handler/files_provider.py +4 -4
- credsweeper/file_handler/patches_provider.py +7 -8
- credsweeper/file_handler/text_content_provider.py +8 -2
- credsweeper/filters/__init__.py +3 -4
- credsweeper/filters/filter.py +5 -3
- credsweeper/filters/group/__init__.py +0 -2
- credsweeper/filters/group/general_keyword.py +2 -2
- credsweeper/filters/group/general_pattern.py +2 -2
- credsweeper/filters/group/group.py +38 -36
- credsweeper/filters/group/password_keyword.py +9 -8
- credsweeper/filters/group/token_pattern.py +5 -5
- credsweeper/filters/group/url_credentials_group.py +8 -8
- credsweeper/filters/group/weird_base36_token.py +6 -6
- credsweeper/filters/group/weird_base64_token.py +5 -5
- credsweeper/filters/line_git_binary_check.py +5 -4
- credsweeper/filters/line_specific_key_check.py +6 -5
- credsweeper/filters/line_uue_part_check.py +5 -4
- credsweeper/filters/value_allowlist_check.py +6 -5
- credsweeper/filters/value_array_dictionary_check.py +8 -6
- credsweeper/filters/value_atlassian_token_check.py +6 -5
- credsweeper/filters/value_azure_token_check.py +6 -5
- credsweeper/filters/value_base32_data_check.py +8 -5
- credsweeper/filters/value_base64_data_check.py +6 -5
- credsweeper/filters/value_base64_encoded_pem_check.py +6 -5
- credsweeper/filters/value_base64_key_check.py +6 -5
- credsweeper/filters/value_base64_part_check.py +6 -5
- credsweeper/filters/value_basic_auth_check.py +37 -0
- credsweeper/filters/value_blocklist_check.py +6 -4
- credsweeper/filters/value_camel_case_check.py +8 -7
- credsweeper/filters/value_dictionary_keyword_check.py +6 -4
- credsweeper/filters/value_discord_bot_check.py +6 -5
- credsweeper/filters/value_entropy_base_check.py +6 -5
- credsweeper/filters/value_file_path_check.py +13 -8
- credsweeper/filters/value_github_check.py +8 -6
- credsweeper/filters/value_grafana_check.py +6 -5
- credsweeper/filters/value_grafana_service_check.py +5 -4
- credsweeper/filters/value_hex_number_check.py +5 -4
- credsweeper/filters/value_jfrog_token_check.py +6 -5
- credsweeper/filters/value_json_web_key_check.py +6 -5
- credsweeper/filters/value_json_web_token_check.py +6 -5
- credsweeper/filters/value_last_word_check.py +6 -4
- credsweeper/filters/{value_dictionary_value_length_check.py → value_length_check.py} +12 -6
- credsweeper/filters/value_method_check.py +5 -4
- credsweeper/filters/value_morphemes_check.py +43 -0
- credsweeper/filters/value_not_allowed_pattern_check.py +6 -5
- credsweeper/filters/value_not_part_encoded_check.py +4 -4
- credsweeper/filters/value_number_check.py +5 -4
- credsweeper/filters/value_pattern_check.py +61 -41
- credsweeper/filters/value_similarity_check.py +6 -4
- credsweeper/filters/value_split_keyword_check.py +5 -4
- credsweeper/filters/value_string_type_check.py +10 -7
- credsweeper/filters/value_token_base_check.py +5 -4
- credsweeper/filters/value_token_check.py +6 -5
- credsweeper/logger/__init__.py +0 -1
- credsweeper/logger/logger.py +1 -1
- credsweeper/ml_model/__init__.py +0 -1
- credsweeper/ml_model/features/__init__.py +1 -0
- credsweeper/ml_model/features/entropy_evaluation.py +1 -1
- credsweeper/ml_model/features/feature.py +2 -19
- credsweeper/ml_model/features/file_extension.py +2 -2
- credsweeper/ml_model/features/has_html_tag.py +12 -10
- credsweeper/ml_model/features/is_secret_numeric.py +5 -4
- credsweeper/ml_model/features/length_of_attribute.py +1 -1
- credsweeper/ml_model/features/morpheme_dense.py +15 -8
- credsweeper/ml_model/features/rule_name.py +2 -2
- credsweeper/ml_model/features/rule_severity.py +21 -0
- credsweeper/ml_model/features/search_in_attribute.py +1 -1
- credsweeper/ml_model/features/word_in.py +10 -33
- credsweeper/ml_model/features/word_in_path.py +6 -4
- credsweeper/ml_model/features/word_in_postamble.py +2 -5
- credsweeper/ml_model/features/word_in_preamble.py +2 -5
- credsweeper/ml_model/features/word_in_transition.py +2 -5
- credsweeper/ml_model/features/word_in_value.py +3 -4
- credsweeper/ml_model/features/word_in_variable.py +3 -4
- credsweeper/ml_model/ml_config.json +140 -27
- credsweeper/ml_model/ml_model.onnx +0 -0
- credsweeper/ml_model/ml_validator.py +4 -3
- credsweeper/rules/__init__.py +0 -1
- credsweeper/rules/config.yaml +329 -239
- credsweeper/rules/rule.py +4 -3
- credsweeper/scanner/__init__.py +0 -1
- credsweeper/scanner/scan_type/__init__.py +0 -5
- credsweeper/scanner/scan_type/multi_pattern.py +4 -4
- credsweeper/scanner/scan_type/pem_key_pattern.py +4 -4
- credsweeper/scanner/scan_type/scan_type.py +4 -4
- credsweeper/scanner/scan_type/single_pattern.py +4 -4
- credsweeper/scanner/scanner.py +24 -15
- credsweeper/secret/config.json +19 -6
- credsweeper/utils/__init__.py +0 -1
- credsweeper/utils/pem_key_detector.py +3 -3
- credsweeper/utils/util.py +24 -150
- {credsweeper-1.11.5.dist-info → credsweeper-1.13.3.dist-info}/METADATA +7 -7
- credsweeper-1.13.3.dist-info/RECORD +164 -0
- credsweeper/filters/value_couple_keyword_check.py +0 -26
- credsweeper-1.11.5.dist-info/RECORD +0 -159
- {credsweeper-1.11.5.dist-info → credsweeper-1.13.3.dist-info}/WHEEL +0 -0
- {credsweeper-1.11.5.dist-info → credsweeper-1.13.3.dist-info}/entry_points.txt +0 -0
- {credsweeper-1.11.5.dist-info → credsweeper-1.13.3.dist-info}/licenses/LICENSE +0 -0
credsweeper/rules/rule.py
CHANGED
|
@@ -7,9 +7,10 @@ from typing import Dict, List, Optional, Union, Set
|
|
|
7
7
|
from credsweeper import filters
|
|
8
8
|
from credsweeper.common.constants import RuleType, Severity, MAX_LINE_LENGTH, Confidence
|
|
9
9
|
from credsweeper.common.keyword_pattern import KeywordPattern
|
|
10
|
-
from credsweeper.config import Config
|
|
11
|
-
from credsweeper.filters import
|
|
12
|
-
from credsweeper.filters.
|
|
10
|
+
from credsweeper.config.config import Config
|
|
11
|
+
from credsweeper.filters import group
|
|
12
|
+
from credsweeper.filters.filter import Filter
|
|
13
|
+
from credsweeper.filters.group.group import Group
|
|
13
14
|
|
|
14
15
|
logger = logging.getLogger(__name__)
|
|
15
16
|
|
credsweeper/scanner/__init__.py
CHANGED
|
@@ -1 +0,0 @@
|
|
|
1
|
-
from credsweeper.scanner.scanner import Scanner
|
|
@@ -1,5 +0,0 @@
|
|
|
1
|
-
from credsweeper.scanner.scan_type.scan_type import ScanType # isort:skip
|
|
2
|
-
|
|
3
|
-
from credsweeper.scanner.scan_type.multi_pattern import MultiPattern
|
|
4
|
-
from credsweeper.scanner.scan_type.pem_key_pattern import PemKeyPattern
|
|
5
|
-
from credsweeper.scanner.scan_type.single_pattern import SinglePattern
|
|
@@ -1,11 +1,11 @@
|
|
|
1
1
|
from typing import List
|
|
2
2
|
|
|
3
3
|
from credsweeper.common.constants import RuleType
|
|
4
|
-
from credsweeper.config import Config
|
|
5
|
-
from credsweeper.credentials import Candidate
|
|
4
|
+
from credsweeper.config.config import Config
|
|
5
|
+
from credsweeper.credentials.candidate import Candidate
|
|
6
6
|
from credsweeper.file_handler.analysis_target import AnalysisTarget
|
|
7
|
-
from credsweeper.rules import Rule
|
|
8
|
-
from credsweeper.scanner.scan_type import ScanType
|
|
7
|
+
from credsweeper.rules.rule import Rule
|
|
8
|
+
from credsweeper.scanner.scan_type.scan_type import ScanType
|
|
9
9
|
|
|
10
10
|
|
|
11
11
|
class MultiPattern(ScanType):
|
|
@@ -2,11 +2,11 @@ import logging
|
|
|
2
2
|
from typing import List
|
|
3
3
|
|
|
4
4
|
from credsweeper.common.constants import RuleType
|
|
5
|
-
from credsweeper.config import Config
|
|
6
|
-
from credsweeper.credentials import Candidate
|
|
5
|
+
from credsweeper.config.config import Config
|
|
6
|
+
from credsweeper.credentials.candidate import Candidate
|
|
7
7
|
from credsweeper.file_handler.analysis_target import AnalysisTarget
|
|
8
|
-
from credsweeper.rules import Rule
|
|
9
|
-
from credsweeper.scanner.scan_type import ScanType
|
|
8
|
+
from credsweeper.rules.rule import Rule
|
|
9
|
+
from credsweeper.scanner.scan_type.scan_type import ScanType
|
|
10
10
|
from credsweeper.utils.pem_key_detector import PemKeyDetector
|
|
11
11
|
|
|
12
12
|
logger = logging.getLogger(__name__)
|
|
@@ -4,11 +4,11 @@ from abc import ABC, abstractmethod
|
|
|
4
4
|
from typing import List
|
|
5
5
|
|
|
6
6
|
from credsweeper.common.constants import RuleType, MIN_DATA_LEN
|
|
7
|
-
from credsweeper.config import Config
|
|
8
|
-
from credsweeper.credentials import Candidate, LineData
|
|
7
|
+
from credsweeper.config.config import Config
|
|
8
|
+
from credsweeper.credentials.candidate import Candidate, LineData
|
|
9
9
|
from credsweeper.file_handler.analysis_target import AnalysisTarget
|
|
10
|
-
from credsweeper.filters import Filter
|
|
11
|
-
from credsweeper.rules import Rule
|
|
10
|
+
from credsweeper.filters.filter import Filter
|
|
11
|
+
from credsweeper.rules.rule import Rule
|
|
12
12
|
|
|
13
13
|
logger = logging.getLogger(__name__)
|
|
14
14
|
|
|
@@ -1,10 +1,10 @@
|
|
|
1
1
|
from typing import List
|
|
2
2
|
|
|
3
|
-
from credsweeper.config import Config
|
|
4
|
-
from credsweeper.credentials import Candidate
|
|
3
|
+
from credsweeper.config.config import Config
|
|
4
|
+
from credsweeper.credentials.candidate import Candidate
|
|
5
5
|
from credsweeper.file_handler.analysis_target import AnalysisTarget
|
|
6
|
-
from credsweeper.rules import Rule
|
|
7
|
-
from credsweeper.scanner.scan_type import ScanType
|
|
6
|
+
from credsweeper.rules.rule import Rule
|
|
7
|
+
from credsweeper.scanner.scan_type.scan_type import ScanType
|
|
8
8
|
|
|
9
9
|
|
|
10
10
|
class SinglePattern(ScanType):
|
credsweeper/scanner/scanner.py
CHANGED
|
@@ -6,16 +6,21 @@ from typing import List, Type, Tuple, Union, Dict, Generator, Set
|
|
|
6
6
|
from credsweeper.app import APP_PATH
|
|
7
7
|
from credsweeper.common.constants import RuleType, MIN_VARIABLE_LENGTH, MIN_SEPARATOR_LENGTH, MIN_VALUE_LENGTH, \
|
|
8
8
|
MAX_LINE_LENGTH, PEM_BEGIN_PATTERN
|
|
9
|
-
from credsweeper.config import Config
|
|
10
|
-
from credsweeper.credentials import Candidate
|
|
9
|
+
from credsweeper.config.config import Config
|
|
10
|
+
from credsweeper.credentials.candidate import Candidate
|
|
11
11
|
from credsweeper.file_handler.analysis_target import AnalysisTarget
|
|
12
12
|
from credsweeper.file_handler.content_provider import ContentProvider
|
|
13
|
-
from credsweeper.rules import Rule
|
|
14
|
-
from credsweeper.scanner.scan_type import
|
|
15
|
-
from credsweeper.
|
|
13
|
+
from credsweeper.rules.rule import Rule
|
|
14
|
+
from credsweeper.scanner.scan_type.multi_pattern import MultiPattern
|
|
15
|
+
from credsweeper.scanner.scan_type.pem_key_pattern import PemKeyPattern
|
|
16
|
+
from credsweeper.scanner.scan_type.scan_type import ScanType
|
|
17
|
+
from credsweeper.scanner.scan_type.single_pattern import SinglePattern
|
|
18
|
+
from credsweeper.utils.util import Util
|
|
16
19
|
|
|
17
20
|
logger = logging.getLogger(__name__)
|
|
18
21
|
|
|
22
|
+
RULES_PATH = APP_PATH / "rules" / "config.yaml"
|
|
23
|
+
|
|
19
24
|
|
|
20
25
|
class Scanner:
|
|
21
26
|
"""Advanced Credential Scanner base class.
|
|
@@ -63,11 +68,11 @@ class Scanner:
|
|
|
63
68
|
return True
|
|
64
69
|
return False
|
|
65
70
|
|
|
66
|
-
def _set_rules_scanners(self,
|
|
71
|
+
def _set_rules_scanners(self, rules_path: Union[None, str, Path]) -> None:
|
|
67
72
|
"""Auxiliary method to fill rules, determine min_pattern_len and set scanners"""
|
|
68
|
-
if
|
|
69
|
-
|
|
70
|
-
rule_templates = Util.yaml_load(
|
|
73
|
+
if rules_path is None:
|
|
74
|
+
rules_path = RULES_PATH
|
|
75
|
+
rule_templates = Util.yaml_load(rules_path)
|
|
71
76
|
if rule_templates and isinstance(rule_templates, list):
|
|
72
77
|
rule_names = set()
|
|
73
78
|
for rule_template in rule_templates:
|
|
@@ -95,7 +100,7 @@ class Scanner:
|
|
|
95
100
|
logger.warning(f"Unknown rule type:{rule.rule_type}")
|
|
96
101
|
self.rules_scanners.append((rule, self.get_scanner(rule)))
|
|
97
102
|
else:
|
|
98
|
-
raise RuntimeError(f"Wrong rules '{rule_templates}' were read from '{
|
|
103
|
+
raise RuntimeError(f"Wrong rules '{rule_templates}' were read from '{rules_path}'")
|
|
99
104
|
|
|
100
105
|
def _is_available(self, rule: Rule) -> bool:
|
|
101
106
|
"""separate the method to reduce complexity"""
|
|
@@ -142,16 +147,22 @@ class Scanner:
|
|
|
142
147
|
# Trim string from outer spaces to make future `x in str` checks faster
|
|
143
148
|
target_line_stripped = target.line_strip
|
|
144
149
|
target_line_stripped_len = target.line_strip_len
|
|
150
|
+
# use lower case for required substring
|
|
151
|
+
target_line_stripped_lower = target.line_lower_strip
|
|
145
152
|
|
|
146
153
|
# "cache" - YAPF and pycharm formatters ...
|
|
147
154
|
matched_keyword = \
|
|
148
155
|
target_line_stripped_len >= self.min_keyword_len and ( #
|
|
149
156
|
'=' in target_line_stripped
|
|
150
157
|
or ':' in target_line_stripped
|
|
151
|
-
or "
|
|
152
|
-
|
|
153
|
-
|
|
158
|
+
or ("define" in target_line_stripped
|
|
159
|
+
and ('(' in target_line_stripped and ',' in target_line_stripped
|
|
160
|
+
or "#define" in target_line_stripped
|
|
161
|
+
or "%define" in target_line_stripped)
|
|
162
|
+
)
|
|
154
163
|
or "%global" in target_line_stripped
|
|
164
|
+
or "set" in target_line_stripped_lower
|
|
165
|
+
or "%3d" in target_line_stripped_lower
|
|
155
166
|
) #
|
|
156
167
|
matched_pem_key = \
|
|
157
168
|
target_line_stripped_len >= self.min_pem_key_len \
|
|
@@ -165,8 +176,6 @@ class Scanner:
|
|
|
165
176
|
target.line_num)
|
|
166
177
|
continue
|
|
167
178
|
|
|
168
|
-
# use lower case for required substring
|
|
169
|
-
target_line_stripped_lower = target.line_lower_strip
|
|
170
179
|
# cached value to skip the same regex verifying
|
|
171
180
|
matched_regex: Dict[re.Pattern, bool] = {}
|
|
172
181
|
|
credsweeper/secret/config.json
CHANGED
|
@@ -12,18 +12,21 @@
|
|
|
12
12
|
".rpm",
|
|
13
13
|
".tar",
|
|
14
14
|
".war",
|
|
15
|
+
".whl",
|
|
15
16
|
".xz",
|
|
16
17
|
".zip"
|
|
17
18
|
],
|
|
18
19
|
"documents": [
|
|
19
|
-
".
|
|
20
|
+
".doc",
|
|
20
21
|
".docx",
|
|
21
|
-
".pptx",
|
|
22
|
-
".xls",
|
|
23
22
|
".odp",
|
|
24
23
|
".ods",
|
|
25
24
|
".odt",
|
|
26
|
-
".pdf"
|
|
25
|
+
".pdf",
|
|
26
|
+
".ppt",
|
|
27
|
+
".pptx",
|
|
28
|
+
".xls",
|
|
29
|
+
".xlsx"
|
|
27
30
|
],
|
|
28
31
|
"extension": [
|
|
29
32
|
".7z",
|
|
@@ -45,16 +48,23 @@
|
|
|
45
48
|
".info",
|
|
46
49
|
".jpeg",
|
|
47
50
|
".jpg",
|
|
51
|
+
".lib",
|
|
48
52
|
".map",
|
|
49
53
|
".m4a",
|
|
50
54
|
".mat",
|
|
51
55
|
".mo",
|
|
56
|
+
".mov",
|
|
52
57
|
".mp3",
|
|
53
58
|
".mp4",
|
|
59
|
+
".mpg",
|
|
60
|
+
".mkv",
|
|
54
61
|
".npy",
|
|
55
62
|
".npz",
|
|
56
63
|
".obj",
|
|
64
|
+
".oga",
|
|
57
65
|
".ogg",
|
|
66
|
+
".ogv",
|
|
67
|
+
".ops",
|
|
58
68
|
".pak",
|
|
59
69
|
".png",
|
|
60
70
|
".psd",
|
|
@@ -71,8 +81,10 @@
|
|
|
71
81
|
".so",
|
|
72
82
|
".sum",
|
|
73
83
|
".svg",
|
|
84
|
+
".swf",
|
|
74
85
|
".tif",
|
|
75
86
|
".tiff",
|
|
87
|
+
".tlb",
|
|
76
88
|
".ttf",
|
|
77
89
|
".vcxproj",
|
|
78
90
|
".vdproj",
|
|
@@ -81,6 +93,7 @@
|
|
|
81
93
|
".webp",
|
|
82
94
|
".wma",
|
|
83
95
|
".woff",
|
|
96
|
+
".woff2",
|
|
84
97
|
".yuv"
|
|
85
98
|
],
|
|
86
99
|
"path": [
|
|
@@ -164,8 +177,8 @@
|
|
|
164
177
|
"tizen"
|
|
165
178
|
],
|
|
166
179
|
"check_for_literals": true,
|
|
167
|
-
"
|
|
168
|
-
"
|
|
180
|
+
"max_password_value_length": 64,
|
|
181
|
+
"max_url_cred_value_length": 80,
|
|
169
182
|
"line_data_output": [
|
|
170
183
|
"line",
|
|
171
184
|
"line_num",
|
credsweeper/utils/__init__.py
CHANGED
|
@@ -1 +0,0 @@
|
|
|
1
|
-
from credsweeper.utils.util import DiffRowData, Util, DiffDict
|
|
@@ -5,10 +5,10 @@ import string
|
|
|
5
5
|
from typing import List
|
|
6
6
|
|
|
7
7
|
from credsweeper.common.constants import PEM_BEGIN_PATTERN, PEM_END_PATTERN, Chars
|
|
8
|
-
from credsweeper.config import Config
|
|
9
|
-
from credsweeper.credentials import LineData
|
|
8
|
+
from credsweeper.config.config import Config
|
|
9
|
+
from credsweeper.credentials.line_data import LineData
|
|
10
10
|
from credsweeper.file_handler.analysis_target import AnalysisTarget
|
|
11
|
-
from credsweeper.utils import Util
|
|
11
|
+
from credsweeper.utils.util import Util
|
|
12
12
|
|
|
13
13
|
logger = logging.getLogger(__name__)
|
|
14
14
|
|
credsweeper/utils/util.py
CHANGED
|
@@ -9,12 +9,10 @@ import random
|
|
|
9
9
|
import re
|
|
10
10
|
import string
|
|
11
11
|
import tarfile
|
|
12
|
-
from dataclasses import dataclass
|
|
13
12
|
from pathlib import Path
|
|
14
13
|
from typing import Any, Dict, List, Tuple, Optional, Union
|
|
15
14
|
|
|
16
15
|
import numpy as np
|
|
17
|
-
import whatthepatch
|
|
18
16
|
import yaml
|
|
19
17
|
from cryptography.hazmat.primitives import hashes
|
|
20
18
|
from cryptography.hazmat.primitives.asymmetric import padding
|
|
@@ -29,31 +27,12 @@ from cryptography.hazmat.primitives.asymmetric.x448 import X448PublicKey, X448Pr
|
|
|
29
27
|
from cryptography.hazmat.primitives.serialization import load_der_private_key
|
|
30
28
|
from cryptography.hazmat.primitives.serialization.pkcs12 import load_key_and_certificates
|
|
31
29
|
from lxml import etree
|
|
32
|
-
from typing_extensions import TypedDict
|
|
33
30
|
|
|
34
|
-
from credsweeper.common.constants import
|
|
31
|
+
from credsweeper.common.constants import AVAILABLE_ENCODINGS, \
|
|
35
32
|
DEFAULT_ENCODING, LATIN_1, CHUNK_SIZE, MAX_LINE_LENGTH, CHUNK_STEP_SIZE, ASCII
|
|
36
33
|
|
|
37
34
|
logger = logging.getLogger(__name__)
|
|
38
35
|
|
|
39
|
-
DiffDict = TypedDict(
|
|
40
|
-
"DiffDict",
|
|
41
|
-
{
|
|
42
|
-
"old": Optional[int], #
|
|
43
|
-
"new": Optional[int], #
|
|
44
|
-
"line": Union[str, bytes], # bytes are possibly since whatthepatch v1.0.4
|
|
45
|
-
"hunk": Any # not used
|
|
46
|
-
})
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
@dataclass(frozen=True)
|
|
50
|
-
class DiffRowData:
|
|
51
|
-
"""Class for keeping data of diff row."""
|
|
52
|
-
|
|
53
|
-
line_type: DiffRowType
|
|
54
|
-
line_numb: int
|
|
55
|
-
line: str
|
|
56
|
-
|
|
57
36
|
|
|
58
37
|
class Util:
|
|
59
38
|
"""Class that contains different useful methods."""
|
|
@@ -82,11 +61,11 @@ class Util:
|
|
|
82
61
|
def get_shannon_entropy(data: Union[str, bytes]) -> float:
|
|
83
62
|
"""Borrowed from http://blog.dkbza.org/2007/05/scanning-data-for-entropy-anomalies.html."""
|
|
84
63
|
if not data:
|
|
85
|
-
return 0.
|
|
64
|
+
return 0.0
|
|
86
65
|
size = len(data)
|
|
87
66
|
_uniq, counts = np.unique(list(data), return_counts=True)
|
|
88
67
|
probabilities = counts / size
|
|
89
|
-
entropy = float(
|
|
68
|
+
entropy = -float(np.sum(probabilities * np.log2(probabilities)))
|
|
90
69
|
return entropy
|
|
91
70
|
|
|
92
71
|
# Precalculated data for speedup
|
|
@@ -162,15 +141,6 @@ class Util:
|
|
|
162
141
|
min_entropy = Util.get_min_data_entropy(data_len)
|
|
163
142
|
return entropy < min_entropy
|
|
164
143
|
|
|
165
|
-
@staticmethod
|
|
166
|
-
def is_known(data: Union[bytes, bytearray]) -> bool:
|
|
167
|
-
"""Returns True if any known binary format is found to prevent extra scan a file without an extension."""
|
|
168
|
-
if isinstance(data, (bytes, bytearray)) and data.startswith(b"\x7f\x45\x4c\x46") and 127 <= len(data):
|
|
169
|
-
# https://en.wikipedia.org/wiki/Executable_and_Linkable_Format
|
|
170
|
-
# minimal ELF is 127 bytes https://github.com/tchajed/minimal-elf
|
|
171
|
-
return True
|
|
172
|
-
return False
|
|
173
|
-
|
|
174
144
|
@staticmethod
|
|
175
145
|
def is_binary(data: Union[bytes, bytearray]) -> bool:
|
|
176
146
|
"""
|
|
@@ -239,13 +209,12 @@ class Util:
|
|
|
239
209
|
try:
|
|
240
210
|
if binary_suggest and LATIN_1 == encoding and (Util.is_binary(content) or not Util.is_latin1(content)):
|
|
241
211
|
# LATIN_1 may convert data (bytes in range 0x80:0xFF are transformed)
|
|
242
|
-
# so skip this encoding when checking binaries
|
|
243
|
-
logger.warning("Binary file detected %s", repr(content[:8]))
|
|
244
212
|
break
|
|
245
|
-
|
|
246
|
-
if content !=
|
|
213
|
+
_text = content.decode(encoding=encoding, errors="strict")
|
|
214
|
+
if content != _text.encode(encoding=encoding, errors="strict"):
|
|
247
215
|
# the check helps to detect a real encoding
|
|
248
216
|
raise UnicodeError
|
|
217
|
+
text = _text
|
|
249
218
|
break
|
|
250
219
|
except UnicodeError:
|
|
251
220
|
binary_suggest = True
|
|
@@ -254,6 +223,11 @@ class Util:
|
|
|
254
223
|
logger.error(f"Unexpected Error: Can't read content as {encoding}. Error message: {exc}")
|
|
255
224
|
return text
|
|
256
225
|
|
|
226
|
+
@staticmethod
|
|
227
|
+
def split_text(text: str) -> List[str]:
|
|
228
|
+
"""Splits a text into lines, handling all common line endings (e.g., LF, CRLF, CR)."""
|
|
229
|
+
return text.replace("\r\n", '\n').replace('\r', '\n').split('\n')
|
|
230
|
+
|
|
257
231
|
@staticmethod
|
|
258
232
|
def decode_bytes(content: bytes, encodings: Optional[List[str]] = None) -> List[str]:
|
|
259
233
|
"""Decode content using different encodings.
|
|
@@ -272,121 +246,11 @@ class Util:
|
|
|
272
246
|
|
|
273
247
|
"""
|
|
274
248
|
if text := Util.decode_text(content, encodings):
|
|
275
|
-
lines =
|
|
249
|
+
lines = Util.split_text(text)
|
|
276
250
|
else:
|
|
277
251
|
lines = []
|
|
278
252
|
return lines
|
|
279
253
|
|
|
280
|
-
@staticmethod
|
|
281
|
-
def patch2files_diff(raw_patch: List[str], change_type: DiffRowType) -> Dict[str, List[DiffDict]]:
|
|
282
|
-
"""Generate files changes from patch for added or deleted filepaths.
|
|
283
|
-
|
|
284
|
-
Args:
|
|
285
|
-
raw_patch: git patch file content
|
|
286
|
-
change_type: change type to select, DiffRowType.ADDED or DiffRowType.DELETED
|
|
287
|
-
|
|
288
|
-
Return:
|
|
289
|
-
return dict with ``{file paths: list of file row changes}``, where
|
|
290
|
-
elements of list of file row changes represented as::
|
|
291
|
-
|
|
292
|
-
{
|
|
293
|
-
"old": line number before diff,
|
|
294
|
-
"new": line number after diff,
|
|
295
|
-
"line": line text,
|
|
296
|
-
"hunk": diff hunk number
|
|
297
|
-
}
|
|
298
|
-
|
|
299
|
-
"""
|
|
300
|
-
if not raw_patch:
|
|
301
|
-
return {}
|
|
302
|
-
|
|
303
|
-
added_files, deleted_files = {}, {}
|
|
304
|
-
try:
|
|
305
|
-
for patch in whatthepatch.parse_patch(raw_patch):
|
|
306
|
-
if patch.changes is None:
|
|
307
|
-
logger.warning(f"Patch '{str(patch.header)}' cannot be scanned")
|
|
308
|
-
continue
|
|
309
|
-
changes = []
|
|
310
|
-
for change in patch.changes:
|
|
311
|
-
change_dict = change._asdict()
|
|
312
|
-
changes.append(change_dict)
|
|
313
|
-
|
|
314
|
-
added_files[patch.header.new_path] = changes
|
|
315
|
-
deleted_files[patch.header.old_path] = changes
|
|
316
|
-
if change_type == DiffRowType.ADDED:
|
|
317
|
-
return added_files
|
|
318
|
-
elif change_type == DiffRowType.DELETED:
|
|
319
|
-
return deleted_files
|
|
320
|
-
else:
|
|
321
|
-
logger.error(f"Change type should be one of: '{DiffRowType.ADDED}', '{DiffRowType.DELETED}';"
|
|
322
|
-
f" but received {change_type}")
|
|
323
|
-
except Exception as exc:
|
|
324
|
-
logger.exception(exc)
|
|
325
|
-
return {}
|
|
326
|
-
|
|
327
|
-
@staticmethod
|
|
328
|
-
def preprocess_diff_rows(
|
|
329
|
-
added_line_number: Optional[int], #
|
|
330
|
-
deleted_line_number: Optional[int], #
|
|
331
|
-
line: str) -> List[DiffRowData]:
|
|
332
|
-
"""Auxiliary function to extend diff changes.
|
|
333
|
-
|
|
334
|
-
Args:
|
|
335
|
-
added_line_number: number of added line or None
|
|
336
|
-
deleted_line_number: number of deleted line or None
|
|
337
|
-
line: the text line
|
|
338
|
-
|
|
339
|
-
Return:
|
|
340
|
-
diff rows data with as list of row change type, line number, row content
|
|
341
|
-
|
|
342
|
-
"""
|
|
343
|
-
rows_data: List[DiffRowData] = []
|
|
344
|
-
if isinstance(added_line_number, int):
|
|
345
|
-
# indicates line was inserted
|
|
346
|
-
rows_data.append(DiffRowData(DiffRowType.ADDED, added_line_number, line))
|
|
347
|
-
if isinstance(deleted_line_number, int):
|
|
348
|
-
# indicates line was removed
|
|
349
|
-
rows_data.append(DiffRowData(DiffRowType.DELETED, deleted_line_number, line))
|
|
350
|
-
return rows_data
|
|
351
|
-
|
|
352
|
-
@staticmethod
|
|
353
|
-
def wrong_change(change: DiffDict) -> bool:
|
|
354
|
-
"""Returns True if the change is wrong"""
|
|
355
|
-
for i in ["line", "new", "old"]:
|
|
356
|
-
if i not in change:
|
|
357
|
-
logger.error(f"Skipping wrong change {change}")
|
|
358
|
-
return True
|
|
359
|
-
return False
|
|
360
|
-
|
|
361
|
-
@staticmethod
|
|
362
|
-
def preprocess_file_diff(changes: List[DiffDict]) -> List[DiffRowData]:
|
|
363
|
-
"""Generate changed file rows from diff data with changed lines (e.g. marked + or - in diff).
|
|
364
|
-
|
|
365
|
-
Args:
|
|
366
|
-
changes: git diff by file rows data
|
|
367
|
-
|
|
368
|
-
Return:
|
|
369
|
-
diff rows data with as list of row change type, line number, row content
|
|
370
|
-
|
|
371
|
-
"""
|
|
372
|
-
if not changes:
|
|
373
|
-
return []
|
|
374
|
-
|
|
375
|
-
rows_data = []
|
|
376
|
-
# process diff to restore lines and their positions
|
|
377
|
-
for change in changes:
|
|
378
|
-
if Util.wrong_change(change):
|
|
379
|
-
continue
|
|
380
|
-
line = change["line"]
|
|
381
|
-
if isinstance(line, str):
|
|
382
|
-
rows_data.extend(Util.preprocess_diff_rows(change.get("new"), change.get("old"), line))
|
|
383
|
-
elif isinstance(line, (bytes, bytearray)):
|
|
384
|
-
logger.warning("The feature is available with the deep scan option")
|
|
385
|
-
else:
|
|
386
|
-
logger.error(f"Unknown type of line {type(line)}")
|
|
387
|
-
|
|
388
|
-
return rows_data
|
|
389
|
-
|
|
390
254
|
@staticmethod
|
|
391
255
|
def is_zip(data: Union[bytes, bytearray]) -> bool:
|
|
392
256
|
"""According https://en.wikipedia.org/wiki/List_of_file_signatures"""
|
|
@@ -486,13 +350,20 @@ class Util:
|
|
|
486
350
|
return True
|
|
487
351
|
return False
|
|
488
352
|
|
|
489
|
-
@
|
|
490
|
-
def is_sqlite3(
|
|
353
|
+
@staticmethod
|
|
354
|
+
def is_sqlite3(data: Union[bytes, bytearray]):
|
|
491
355
|
"""According https://en.wikipedia.org/wiki/List_of_file_signatures - SQLite Database"""
|
|
492
356
|
if isinstance(data, (bytes, bytearray)) and data.startswith(b"SQLite format 3\0"):
|
|
493
357
|
return True
|
|
494
358
|
return False
|
|
495
359
|
|
|
360
|
+
@staticmethod
|
|
361
|
+
def is_rtf(data: Union[bytes, bytearray]):
|
|
362
|
+
"""According https://en.wikipedia.org/wiki/List_of_file_signatures - Rich Text Format"""
|
|
363
|
+
if isinstance(data, (bytes, bytearray)) and data.startswith(b"{\\rtf1") and data.endswith(b"}"):
|
|
364
|
+
return True
|
|
365
|
+
return False
|
|
366
|
+
|
|
496
367
|
@staticmethod
|
|
497
368
|
def is_asn1(data: Union[bytes, bytearray]) -> int:
|
|
498
369
|
"""Only sequence type 0x30 and size correctness are checked
|
|
@@ -706,6 +577,7 @@ class Util:
|
|
|
706
577
|
"""decode text to bytes with / without padding detect and urlsafe symbols"""
|
|
707
578
|
value = text.translate(Util.WHITESPACE_TRANS_TABLE)
|
|
708
579
|
if padding_safe:
|
|
580
|
+
value = value.rstrip('=') # python 3.10 workaround
|
|
709
581
|
pad_num = 0x3 & len(value)
|
|
710
582
|
if pad_num:
|
|
711
583
|
value += '=' * (4 - pad_num)
|
|
@@ -769,6 +641,8 @@ class Util:
|
|
|
769
641
|
@staticmethod
|
|
770
642
|
def subtext(text: str, pos: int, hunk_size: int) -> str:
|
|
771
643
|
"""cut text symmetrically for given position or use remained quota to be fitted in 2x hunk_size"""
|
|
644
|
+
# cut trailed whitespaces to obtain more informative data
|
|
645
|
+
text = text.rstrip()
|
|
772
646
|
if hunk_size <= pos:
|
|
773
647
|
left_quota = 0
|
|
774
648
|
left_pos = pos - hunk_size
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: credsweeper
|
|
3
|
-
Version: 1.
|
|
3
|
+
Version: 1.13.3
|
|
4
4
|
Summary: Credential Sweeper
|
|
5
5
|
Project-URL: Homepage, https://github.com/Samsung/CredSweeper
|
|
6
6
|
Project-URL: Bug Tracker, https://github.com/Samsung/CredSweeper/issues
|
|
@@ -10,13 +10,12 @@ Classifier: License :: OSI Approved :: MIT License
|
|
|
10
10
|
Classifier: Operating System :: OS Independent
|
|
11
11
|
Classifier: Programming Language :: Python :: 3
|
|
12
12
|
Classifier: Programming Language :: Python :: 3 :: Only
|
|
13
|
-
Classifier: Programming Language :: Python :: 3.9
|
|
14
13
|
Classifier: Programming Language :: Python :: 3.10
|
|
15
14
|
Classifier: Programming Language :: Python :: 3.11
|
|
16
15
|
Classifier: Programming Language :: Python :: 3.12
|
|
17
16
|
Classifier: Topic :: Security
|
|
18
17
|
Classifier: Topic :: Software Development :: Quality Assurance
|
|
19
|
-
Requires-Python: >=3.
|
|
18
|
+
Requires-Python: >=3.10
|
|
20
19
|
Requires-Dist: base58
|
|
21
20
|
Requires-Dist: beautifulsoup4>=4.11.0
|
|
22
21
|
Requires-Dist: colorama
|
|
@@ -24,10 +23,10 @@ Requires-Dist: cryptography
|
|
|
24
23
|
Requires-Dist: gitpython
|
|
25
24
|
Requires-Dist: humanfriendly
|
|
26
25
|
Requires-Dist: lxml
|
|
27
|
-
Requires-Dist: numpy
|
|
26
|
+
Requires-Dist: numpy
|
|
28
27
|
Requires-Dist: odfpy
|
|
29
|
-
Requires-Dist: onnxruntime; platform_system != 'Windows'
|
|
30
|
-
Requires-Dist: onnxruntime==1.19.2; platform_system == 'Windows'
|
|
28
|
+
Requires-Dist: onnxruntime; platform_system != 'Windows' or python_version != '3.12'
|
|
29
|
+
Requires-Dist: onnxruntime==1.19.2; platform_system == 'Windows' and python_version == '3.12'
|
|
31
30
|
Requires-Dist: openpyxl
|
|
32
31
|
Requires-Dist: pandas
|
|
33
32
|
Requires-Dist: pdfminer-six
|
|
@@ -38,6 +37,7 @@ Requires-Dist: python-docx
|
|
|
38
37
|
Requires-Dist: python-pptx
|
|
39
38
|
Requires-Dist: pyyaml
|
|
40
39
|
Requires-Dist: rpmfile
|
|
40
|
+
Requires-Dist: striprtf
|
|
41
41
|
Requires-Dist: whatthepatch
|
|
42
42
|
Requires-Dist: xlrd
|
|
43
43
|
Description-Content-Type: text/markdown
|
|
@@ -90,7 +90,7 @@ Full documentation can be found here: <https://credsweeper.readthedocs.io/>
|
|
|
90
90
|
|
|
91
91
|
### Main Requirements
|
|
92
92
|
|
|
93
|
-
- Python 3.
|
|
93
|
+
- Python 3.10, 3.11, 3.12
|
|
94
94
|
|
|
95
95
|
### Installation
|
|
96
96
|
|