credsweeper 1.11.3__tar.gz → 1.11.4__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of credsweeper might be problematic. Click here for more details.
- {credsweeper-1.11.3 → credsweeper-1.11.4}/PKG-INFO +1 -1
- {credsweeper-1.11.3 → credsweeper-1.11.4}/credsweeper/__init__.py +1 -1
- {credsweeper-1.11.3 → credsweeper-1.11.4}/credsweeper/__main__.py +1 -1
- {credsweeper-1.11.3 → credsweeper-1.11.4}/credsweeper/app.py +21 -44
- {credsweeper-1.11.3 → credsweeper-1.11.4}/credsweeper/common/constants.py +2 -5
- {credsweeper-1.11.3 → credsweeper-1.11.4}/credsweeper/credentials/candidate_key.py +1 -1
- {credsweeper-1.11.3 → credsweeper-1.11.4}/credsweeper/credentials/credential_manager.py +4 -3
- {credsweeper-1.11.3 → credsweeper-1.11.4}/credsweeper/credentials/line_data.py +2 -5
- credsweeper-1.11.4/credsweeper/deep_scanner/deb_scanner.py +48 -0
- {credsweeper-1.11.3 → credsweeper-1.11.4}/credsweeper/deep_scanner/deep_scanner.py +47 -36
- {credsweeper-1.11.3 → credsweeper-1.11.4}/credsweeper/deep_scanner/gzip_scanner.py +1 -1
- {credsweeper-1.11.3 → credsweeper-1.11.4}/credsweeper/file_handler/byte_content_provider.py +2 -2
- {credsweeper-1.11.3 → credsweeper-1.11.4}/credsweeper/file_handler/content_provider.py +1 -1
- {credsweeper-1.11.3 → credsweeper-1.11.4}/credsweeper/file_handler/data_content_provider.py +2 -2
- {credsweeper-1.11.3 → credsweeper-1.11.4}/credsweeper/file_handler/diff_content_provider.py +2 -2
- {credsweeper-1.11.3 → credsweeper-1.11.4}/credsweeper/file_handler/file_path_extractor.py +1 -1
- {credsweeper-1.11.3 → credsweeper-1.11.4}/credsweeper/file_handler/files_provider.py +2 -4
- {credsweeper-1.11.3 → credsweeper-1.11.4}/credsweeper/file_handler/patches_provider.py +1 -1
- {credsweeper-1.11.3 → credsweeper-1.11.4}/credsweeper/file_handler/string_content_provider.py +2 -2
- {credsweeper-1.11.3 → credsweeper-1.11.4}/credsweeper/file_handler/struct_content_provider.py +1 -1
- {credsweeper-1.11.3 → credsweeper-1.11.4}/credsweeper/file_handler/text_content_provider.py +2 -2
- {credsweeper-1.11.3 → credsweeper-1.11.4}/credsweeper/filters/value_base64_encoded_pem_check.py +1 -1
- {credsweeper-1.11.3 → credsweeper-1.11.4}/credsweeper/filters/value_entropy_base64_check.py +2 -6
- {credsweeper-1.11.3 → credsweeper-1.11.4}/credsweeper/filters/value_pattern_check.py +64 -16
- {credsweeper-1.11.3 → credsweeper-1.11.4}/credsweeper/ml_model/features/file_extension.py +1 -1
- {credsweeper-1.11.3 → credsweeper-1.11.4}/credsweeper/ml_model/ml_validator.py +43 -21
- {credsweeper-1.11.3 → credsweeper-1.11.4}/credsweeper/rules/config.yaml +3 -3
- {credsweeper-1.11.3 → credsweeper-1.11.4}/credsweeper/rules/rule.py +3 -3
- {credsweeper-1.11.3 → credsweeper-1.11.4}/credsweeper/utils/hop_stat.py +3 -3
- {credsweeper-1.11.3 → credsweeper-1.11.4}/credsweeper/utils/pem_key_detector.py +5 -3
- {credsweeper-1.11.3 → credsweeper-1.11.4}/credsweeper/utils/util.py +13 -6
- {credsweeper-1.11.3 → credsweeper-1.11.4}/.gitignore +0 -0
- {credsweeper-1.11.3 → credsweeper-1.11.4}/LICENSE +0 -0
- {credsweeper-1.11.3 → credsweeper-1.11.4}/README.md +0 -0
- {credsweeper-1.11.3 → credsweeper-1.11.4}/credsweeper/common/__init__.py +0 -0
- {credsweeper-1.11.3 → credsweeper-1.11.4}/credsweeper/common/keyword_checklist.py +0 -0
- {credsweeper-1.11.3 → credsweeper-1.11.4}/credsweeper/common/keyword_checklist.txt +0 -0
- {credsweeper-1.11.3 → credsweeper-1.11.4}/credsweeper/common/keyword_pattern.py +0 -0
- {credsweeper-1.11.3 → credsweeper-1.11.4}/credsweeper/common/morpheme_checklist.txt +0 -0
- {credsweeper-1.11.3 → credsweeper-1.11.4}/credsweeper/config/__init__.py +0 -0
- {credsweeper-1.11.3 → credsweeper-1.11.4}/credsweeper/config/config.py +0 -0
- {credsweeper-1.11.3 → credsweeper-1.11.4}/credsweeper/credentials/__init__.py +0 -0
- {credsweeper-1.11.3 → credsweeper-1.11.4}/credsweeper/credentials/augment_candidates.py +0 -0
- {credsweeper-1.11.3 → credsweeper-1.11.4}/credsweeper/credentials/candidate.py +0 -0
- {credsweeper-1.11.3 → credsweeper-1.11.4}/credsweeper/credentials/candidate_group_generator.py +0 -0
- {credsweeper-1.11.3 → credsweeper-1.11.4}/credsweeper/deep_scanner/__init__.py +0 -0
- {credsweeper-1.11.3 → credsweeper-1.11.4}/credsweeper/deep_scanner/abstract_scanner.py +0 -0
- {credsweeper-1.11.3 → credsweeper-1.11.4}/credsweeper/deep_scanner/byte_scanner.py +0 -0
- {credsweeper-1.11.3 → credsweeper-1.11.4}/credsweeper/deep_scanner/bzip2_scanner.py +0 -0
- {credsweeper-1.11.3 → credsweeper-1.11.4}/credsweeper/deep_scanner/docx_scanner.py +0 -0
- {credsweeper-1.11.3 → credsweeper-1.11.4}/credsweeper/deep_scanner/eml_scanner.py +0 -0
- {credsweeper-1.11.3 → credsweeper-1.11.4}/credsweeper/deep_scanner/encoder_scanner.py +0 -0
- {credsweeper-1.11.3 → credsweeper-1.11.4}/credsweeper/deep_scanner/html_scanner.py +0 -0
- {credsweeper-1.11.3 → credsweeper-1.11.4}/credsweeper/deep_scanner/jks_scanner.py +0 -0
- {credsweeper-1.11.3 → credsweeper-1.11.4}/credsweeper/deep_scanner/lang_scanner.py +0 -0
- {credsweeper-1.11.3 → credsweeper-1.11.4}/credsweeper/deep_scanner/lzma_scanner.py +0 -0
- {credsweeper-1.11.3 → credsweeper-1.11.4}/credsweeper/deep_scanner/mxfile_scanner.py +0 -0
- {credsweeper-1.11.3 → credsweeper-1.11.4}/credsweeper/deep_scanner/pdf_scanner.py +0 -0
- {credsweeper-1.11.3 → credsweeper-1.11.4}/credsweeper/deep_scanner/pkcs12_scanner.py +0 -0
- {credsweeper-1.11.3 → credsweeper-1.11.4}/credsweeper/deep_scanner/pptx_scanner.py +0 -0
- {credsweeper-1.11.3 → credsweeper-1.11.4}/credsweeper/deep_scanner/tar_scanner.py +0 -0
- {credsweeper-1.11.3 → credsweeper-1.11.4}/credsweeper/deep_scanner/tmx_scanner.py +0 -0
- {credsweeper-1.11.3 → credsweeper-1.11.4}/credsweeper/deep_scanner/xlsx_scanner.py +0 -0
- {credsweeper-1.11.3 → credsweeper-1.11.4}/credsweeper/deep_scanner/xml_scanner.py +0 -0
- {credsweeper-1.11.3 → credsweeper-1.11.4}/credsweeper/deep_scanner/zip_scanner.py +0 -0
- {credsweeper-1.11.3 → credsweeper-1.11.4}/credsweeper/file_handler/__init__.py +0 -0
- {credsweeper-1.11.3 → credsweeper-1.11.4}/credsweeper/file_handler/abstract_provider.py +0 -0
- {credsweeper-1.11.3 → credsweeper-1.11.4}/credsweeper/file_handler/analysis_target.py +0 -0
- {credsweeper-1.11.3 → credsweeper-1.11.4}/credsweeper/file_handler/descriptor.py +0 -0
- {credsweeper-1.11.3 → credsweeper-1.11.4}/credsweeper/filters/__init__.py +0 -0
- {credsweeper-1.11.3 → credsweeper-1.11.4}/credsweeper/filters/filter.py +0 -0
- {credsweeper-1.11.3 → credsweeper-1.11.4}/credsweeper/filters/group/__init__.py +0 -0
- {credsweeper-1.11.3 → credsweeper-1.11.4}/credsweeper/filters/group/general_keyword.py +0 -0
- {credsweeper-1.11.3 → credsweeper-1.11.4}/credsweeper/filters/group/general_pattern.py +0 -0
- {credsweeper-1.11.3 → credsweeper-1.11.4}/credsweeper/filters/group/group.py +0 -0
- {credsweeper-1.11.3 → credsweeper-1.11.4}/credsweeper/filters/group/password_keyword.py +0 -0
- {credsweeper-1.11.3 → credsweeper-1.11.4}/credsweeper/filters/group/token_pattern.py +0 -0
- {credsweeper-1.11.3 → credsweeper-1.11.4}/credsweeper/filters/group/url_credentials_group.py +0 -0
- {credsweeper-1.11.3 → credsweeper-1.11.4}/credsweeper/filters/group/weird_base36_token.py +0 -0
- {credsweeper-1.11.3 → credsweeper-1.11.4}/credsweeper/filters/group/weird_base64_token.py +0 -0
- {credsweeper-1.11.3 → credsweeper-1.11.4}/credsweeper/filters/line_git_binary_check.py +0 -0
- {credsweeper-1.11.3 → credsweeper-1.11.4}/credsweeper/filters/line_specific_key_check.py +0 -0
- {credsweeper-1.11.3 → credsweeper-1.11.4}/credsweeper/filters/line_uue_part_check.py +0 -0
- {credsweeper-1.11.3 → credsweeper-1.11.4}/credsweeper/filters/value_allowlist_check.py +0 -0
- {credsweeper-1.11.3 → credsweeper-1.11.4}/credsweeper/filters/value_array_dictionary_check.py +0 -0
- {credsweeper-1.11.3 → credsweeper-1.11.4}/credsweeper/filters/value_atlassian_token_check.py +0 -0
- {credsweeper-1.11.3 → credsweeper-1.11.4}/credsweeper/filters/value_azure_token_check.py +0 -0
- {credsweeper-1.11.3 → credsweeper-1.11.4}/credsweeper/filters/value_base32_data_check.py +0 -0
- {credsweeper-1.11.3 → credsweeper-1.11.4}/credsweeper/filters/value_base64_data_check.py +0 -0
- {credsweeper-1.11.3 → credsweeper-1.11.4}/credsweeper/filters/value_base64_key_check.py +0 -0
- {credsweeper-1.11.3 → credsweeper-1.11.4}/credsweeper/filters/value_base64_part_check.py +0 -0
- {credsweeper-1.11.3 → credsweeper-1.11.4}/credsweeper/filters/value_blocklist_check.py +0 -0
- {credsweeper-1.11.3 → credsweeper-1.11.4}/credsweeper/filters/value_camel_case_check.py +0 -0
- {credsweeper-1.11.3 → credsweeper-1.11.4}/credsweeper/filters/value_couple_keyword_check.py +0 -0
- {credsweeper-1.11.3 → credsweeper-1.11.4}/credsweeper/filters/value_dictionary_keyword_check.py +0 -0
- {credsweeper-1.11.3 → credsweeper-1.11.4}/credsweeper/filters/value_dictionary_value_length_check.py +0 -0
- {credsweeper-1.11.3 → credsweeper-1.11.4}/credsweeper/filters/value_discord_bot_check.py +0 -0
- {credsweeper-1.11.3 → credsweeper-1.11.4}/credsweeper/filters/value_entropy_base32_check.py +0 -0
- {credsweeper-1.11.3 → credsweeper-1.11.4}/credsweeper/filters/value_entropy_base36_check.py +0 -0
- {credsweeper-1.11.3 → credsweeper-1.11.4}/credsweeper/filters/value_entropy_base_check.py +0 -0
- {credsweeper-1.11.3 → credsweeper-1.11.4}/credsweeper/filters/value_file_path_check.py +0 -0
- {credsweeper-1.11.3 → credsweeper-1.11.4}/credsweeper/filters/value_github_check.py +0 -0
- {credsweeper-1.11.3 → credsweeper-1.11.4}/credsweeper/filters/value_grafana_check.py +0 -0
- {credsweeper-1.11.3 → credsweeper-1.11.4}/credsweeper/filters/value_grafana_service_check.py +0 -0
- {credsweeper-1.11.3 → credsweeper-1.11.4}/credsweeper/filters/value_hex_number_check.py +0 -0
- {credsweeper-1.11.3 → credsweeper-1.11.4}/credsweeper/filters/value_jfrog_token_check.py +0 -0
- {credsweeper-1.11.3 → credsweeper-1.11.4}/credsweeper/filters/value_json_web_token_check.py +0 -0
- {credsweeper-1.11.3 → credsweeper-1.11.4}/credsweeper/filters/value_last_word_check.py +0 -0
- {credsweeper-1.11.3 → credsweeper-1.11.4}/credsweeper/filters/value_method_check.py +0 -0
- {credsweeper-1.11.3 → credsweeper-1.11.4}/credsweeper/filters/value_not_allowed_pattern_check.py +0 -0
- {credsweeper-1.11.3 → credsweeper-1.11.4}/credsweeper/filters/value_not_part_encoded_check.py +0 -0
- {credsweeper-1.11.3 → credsweeper-1.11.4}/credsweeper/filters/value_number_check.py +0 -0
- {credsweeper-1.11.3 → credsweeper-1.11.4}/credsweeper/filters/value_similarity_check.py +0 -0
- {credsweeper-1.11.3 → credsweeper-1.11.4}/credsweeper/filters/value_split_keyword_check.py +0 -0
- {credsweeper-1.11.3 → credsweeper-1.11.4}/credsweeper/filters/value_string_type_check.py +0 -0
- {credsweeper-1.11.3 → credsweeper-1.11.4}/credsweeper/filters/value_token_base32_check.py +0 -0
- {credsweeper-1.11.3 → credsweeper-1.11.4}/credsweeper/filters/value_token_base36_check.py +0 -0
- {credsweeper-1.11.3 → credsweeper-1.11.4}/credsweeper/filters/value_token_base64_check.py +0 -0
- {credsweeper-1.11.3 → credsweeper-1.11.4}/credsweeper/filters/value_token_base_check.py +0 -0
- {credsweeper-1.11.3 → credsweeper-1.11.4}/credsweeper/filters/value_token_check.py +0 -0
- {credsweeper-1.11.3 → credsweeper-1.11.4}/credsweeper/logger/__init__.py +0 -0
- {credsweeper-1.11.3 → credsweeper-1.11.4}/credsweeper/logger/logger.py +0 -0
- {credsweeper-1.11.3 → credsweeper-1.11.4}/credsweeper/ml_model/__init__.py +0 -0
- {credsweeper-1.11.3 → credsweeper-1.11.4}/credsweeper/ml_model/features/__init__.py +0 -0
- {credsweeper-1.11.3 → credsweeper-1.11.4}/credsweeper/ml_model/features/entropy_evaluation.py +0 -0
- {credsweeper-1.11.3 → credsweeper-1.11.4}/credsweeper/ml_model/features/feature.py +0 -0
- {credsweeper-1.11.3 → credsweeper-1.11.4}/credsweeper/ml_model/features/has_html_tag.py +0 -0
- {credsweeper-1.11.3 → credsweeper-1.11.4}/credsweeper/ml_model/features/is_secret_numeric.py +0 -0
- {credsweeper-1.11.3 → credsweeper-1.11.4}/credsweeper/ml_model/features/length_of_attribute.py +0 -0
- {credsweeper-1.11.3 → credsweeper-1.11.4}/credsweeper/ml_model/features/morpheme_dense.py +0 -0
- {credsweeper-1.11.3 → credsweeper-1.11.4}/credsweeper/ml_model/features/rule_name.py +0 -0
- {credsweeper-1.11.3 → credsweeper-1.11.4}/credsweeper/ml_model/features/search_in_attribute.py +0 -0
- {credsweeper-1.11.3 → credsweeper-1.11.4}/credsweeper/ml_model/features/word_in.py +0 -0
- {credsweeper-1.11.3 → credsweeper-1.11.4}/credsweeper/ml_model/features/word_in_path.py +0 -0
- {credsweeper-1.11.3 → credsweeper-1.11.4}/credsweeper/ml_model/features/word_in_postamble.py +0 -0
- {credsweeper-1.11.3 → credsweeper-1.11.4}/credsweeper/ml_model/features/word_in_preamble.py +0 -0
- {credsweeper-1.11.3 → credsweeper-1.11.4}/credsweeper/ml_model/features/word_in_transition.py +0 -0
- {credsweeper-1.11.3 → credsweeper-1.11.4}/credsweeper/ml_model/features/word_in_value.py +0 -0
- {credsweeper-1.11.3 → credsweeper-1.11.4}/credsweeper/ml_model/features/word_in_variable.py +0 -0
- {credsweeper-1.11.3 → credsweeper-1.11.4}/credsweeper/ml_model/ml_config.json +0 -0
- {credsweeper-1.11.3 → credsweeper-1.11.4}/credsweeper/ml_model/ml_model.onnx +0 -0
- {credsweeper-1.11.3 → credsweeper-1.11.4}/credsweeper/py.typed +0 -0
- {credsweeper-1.11.3 → credsweeper-1.11.4}/credsweeper/rules/__init__.py +0 -0
- {credsweeper-1.11.3 → credsweeper-1.11.4}/credsweeper/scanner/__init__.py +0 -0
- {credsweeper-1.11.3 → credsweeper-1.11.4}/credsweeper/scanner/scan_type/__init__.py +0 -0
- {credsweeper-1.11.3 → credsweeper-1.11.4}/credsweeper/scanner/scan_type/multi_pattern.py +0 -0
- {credsweeper-1.11.3 → credsweeper-1.11.4}/credsweeper/scanner/scan_type/pem_key_pattern.py +0 -0
- {credsweeper-1.11.3 → credsweeper-1.11.4}/credsweeper/scanner/scan_type/scan_type.py +0 -0
- {credsweeper-1.11.3 → credsweeper-1.11.4}/credsweeper/scanner/scan_type/single_pattern.py +0 -0
- {credsweeper-1.11.3 → credsweeper-1.11.4}/credsweeper/scanner/scanner.py +0 -0
- {credsweeper-1.11.3 → credsweeper-1.11.4}/credsweeper/secret/config.json +0 -0
- {credsweeper-1.11.3 → credsweeper-1.11.4}/credsweeper/secret/log.yaml +0 -0
- {credsweeper-1.11.3 → credsweeper-1.11.4}/credsweeper/utils/__init__.py +0 -0
- {credsweeper-1.11.3 → credsweeper-1.11.4}/pyproject.toml +0 -0
|
@@ -63,7 +63,7 @@ def logger_levels(log_level: str) -> str:
|
|
|
63
63
|
Returns True if log_level UPPERCASE is one of keys
|
|
64
64
|
"""
|
|
65
65
|
val = log_level.upper()
|
|
66
|
-
if
|
|
66
|
+
if val in Logger.LEVELS:
|
|
67
67
|
return val
|
|
68
68
|
raise ArgumentTypeError(f"Log level provided: {log_level} -- must be one of: {' | '.join(Logger.LEVELS.keys())}")
|
|
69
69
|
|
|
@@ -15,11 +15,13 @@ from credsweeper.common.constants import Severity, ThresholdPreset, DiffRowType,
|
|
|
15
15
|
from credsweeper.config import Config
|
|
16
16
|
from credsweeper.credentials import Candidate, CredentialManager, CandidateKey
|
|
17
17
|
from credsweeper.deep_scanner.deep_scanner import DeepScanner
|
|
18
|
+
from credsweeper.file_handler.content_provider import ContentProvider
|
|
18
19
|
from credsweeper.file_handler.diff_content_provider import DiffContentProvider
|
|
19
20
|
from credsweeper.file_handler.file_path_extractor import FilePathExtractor
|
|
20
21
|
from credsweeper.file_handler.abstract_provider import AbstractProvider
|
|
21
22
|
from credsweeper.file_handler.text_content_provider import TextContentProvider
|
|
22
23
|
from credsweeper.scanner import Scanner
|
|
24
|
+
from credsweeper.ml_model.ml_validator import MlValidator
|
|
23
25
|
from credsweeper.utils import Util
|
|
24
26
|
|
|
25
27
|
logger = logging.getLogger(__name__)
|
|
@@ -94,7 +96,7 @@ class CredSweeper:
|
|
|
94
96
|
log_level: str - level for pool initializer according logging levels (UPPERCASE)
|
|
95
97
|
|
|
96
98
|
"""
|
|
97
|
-
self.pool_count: int =
|
|
99
|
+
self.pool_count: int = max(1, int(pool_count))
|
|
98
100
|
if not (_severity := Severity.get(severity)):
|
|
99
101
|
raise RuntimeError(f"Severity level provided: {severity}"
|
|
100
102
|
f" -- must be one of: {' | '.join([i.value for i in Severity])}")
|
|
@@ -123,9 +125,9 @@ class CredSweeper:
|
|
|
123
125
|
self.ml_config = ml_config
|
|
124
126
|
self.ml_model = ml_model
|
|
125
127
|
self.ml_providers = ml_providers
|
|
126
|
-
self.ml_validator = None
|
|
127
128
|
self.__thrifty = thrifty
|
|
128
129
|
self.__log_level = log_level
|
|
130
|
+
self.__ml_validator: Optional[MlValidator] = None
|
|
129
131
|
|
|
130
132
|
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
|
|
131
133
|
|
|
@@ -182,35 +184,22 @@ class CredSweeper:
|
|
|
182
184
|
|
|
183
185
|
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
|
|
184
186
|
|
|
185
|
-
# the import cannot be done on top due
|
|
186
|
-
# TypeError: cannot pickle 'onnxruntime.capi.onnxruntime_pybind11_state.InferenceSession' object
|
|
187
|
-
from credsweeper.ml_model import MlValidator
|
|
188
|
-
|
|
189
|
-
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
|
|
190
|
-
|
|
191
187
|
@property
|
|
192
188
|
def ml_validator(self) -> MlValidator:
|
|
193
189
|
"""ml_validator getter"""
|
|
194
|
-
from credsweeper.ml_model import MlValidator
|
|
195
190
|
if not self.__ml_validator:
|
|
196
|
-
self.__ml_validator
|
|
191
|
+
self.__ml_validator = MlValidator(
|
|
197
192
|
threshold=self.ml_threshold, #
|
|
198
193
|
ml_config=self.ml_config, #
|
|
199
194
|
ml_model=self.ml_model, #
|
|
200
195
|
ml_providers=self.ml_providers, #
|
|
201
196
|
)
|
|
202
|
-
|
|
197
|
+
if not self.__ml_validator:
|
|
198
|
+
raise RuntimeError("MlValidator was not initialized!")
|
|
203
199
|
return self.__ml_validator
|
|
204
200
|
|
|
205
201
|
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
|
|
206
202
|
|
|
207
|
-
@ml_validator.setter
|
|
208
|
-
def ml_validator(self, _ml_validator: Optional[MlValidator]) -> None:
|
|
209
|
-
"""ml_validator setter"""
|
|
210
|
-
self.__ml_validator = _ml_validator
|
|
211
|
-
|
|
212
|
-
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
|
|
213
|
-
|
|
214
203
|
@staticmethod
|
|
215
204
|
def pool_initializer(log_kwargs) -> None:
|
|
216
205
|
"""Ignore SIGINT in child processes."""
|
|
@@ -219,20 +208,6 @@ class CredSweeper:
|
|
|
219
208
|
|
|
220
209
|
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
|
|
221
210
|
|
|
222
|
-
@property
|
|
223
|
-
def config(self) -> Config:
|
|
224
|
-
"""config getter"""
|
|
225
|
-
return self.__config
|
|
226
|
-
|
|
227
|
-
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
|
|
228
|
-
|
|
229
|
-
@config.setter
|
|
230
|
-
def config(self, config: Config) -> None:
|
|
231
|
-
"""config setter"""
|
|
232
|
-
self.__config = config
|
|
233
|
-
|
|
234
|
-
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
|
|
235
|
-
|
|
236
211
|
def run(self, content_provider: AbstractProvider) -> int:
|
|
237
212
|
"""Run an analysis of 'content_provider' object.
|
|
238
213
|
|
|
@@ -241,9 +216,10 @@ class CredSweeper:
|
|
|
241
216
|
|
|
242
217
|
"""
|
|
243
218
|
_empty_list: Sequence[Union[DiffContentProvider, TextContentProvider]] = []
|
|
244
|
-
file_extractors
|
|
245
|
-
|
|
246
|
-
|
|
219
|
+
file_extractors = content_provider.get_scannable_files(self.config) if content_provider else _empty_list
|
|
220
|
+
if not file_extractors:
|
|
221
|
+
logger.info(f"No scannable targets for {len(content_provider.paths)} paths")
|
|
222
|
+
return 0
|
|
247
223
|
self.scan(file_extractors)
|
|
248
224
|
self.post_processing()
|
|
249
225
|
# PatchesProvider has the attribute. Circular import error appears with using the isinstance
|
|
@@ -260,7 +236,7 @@ class CredSweeper:
|
|
|
260
236
|
content_providers: file objects to scan
|
|
261
237
|
|
|
262
238
|
"""
|
|
263
|
-
if 1 < self.pool_count:
|
|
239
|
+
if 1 < self.pool_count and 1 < len(content_providers):
|
|
264
240
|
self.__multi_jobs_scan(content_providers)
|
|
265
241
|
else:
|
|
266
242
|
self.__single_job_scan(content_providers)
|
|
@@ -269,6 +245,7 @@ class CredSweeper:
|
|
|
269
245
|
|
|
270
246
|
def __single_job_scan(self, content_providers: Sequence[Union[DiffContentProvider, TextContentProvider]]) -> None:
|
|
271
247
|
"""Performs scan in main thread"""
|
|
248
|
+
logger.info(f"Scan for {len(content_providers)} providers")
|
|
272
249
|
all_cred = self.files_scan(content_providers)
|
|
273
250
|
self.credential_manager.set_credentials(all_cred)
|
|
274
251
|
|
|
@@ -284,12 +261,14 @@ class CredSweeper:
|
|
|
284
261
|
if "SILENCE" == self.__log_level:
|
|
285
262
|
logging.addLevelName(60, "SILENCE")
|
|
286
263
|
log_kwargs["level"] = self.__log_level
|
|
287
|
-
|
|
288
|
-
|
|
264
|
+
pool_count = min(self.pool_count, len(content_providers))
|
|
265
|
+
logger.info(f"Scan in {pool_count} processes for {len(content_providers)} providers")
|
|
266
|
+
with multiprocessing.get_context("spawn").Pool(processes=pool_count,
|
|
267
|
+
initializer=CredSweeper.pool_initializer,
|
|
289
268
|
initargs=(log_kwargs, )) as pool:
|
|
290
269
|
try:
|
|
291
|
-
for scan_results in pool.imap_unordered(self.files_scan,
|
|
292
|
-
|
|
270
|
+
for scan_results in pool.imap_unordered(self.files_scan,
|
|
271
|
+
(content_providers[x::pool_count] for x in range(pool_count))):
|
|
293
272
|
for cred in scan_results:
|
|
294
273
|
self.credential_manager.add_credential(cred)
|
|
295
274
|
except KeyboardInterrupt:
|
|
@@ -301,9 +280,7 @@ class CredSweeper:
|
|
|
301
280
|
|
|
302
281
|
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
|
|
303
282
|
|
|
304
|
-
def files_scan(
|
|
305
|
-
self, #
|
|
306
|
-
content_providers: Sequence[Union[DiffContentProvider, TextContentProvider]]) -> List[Candidate]:
|
|
283
|
+
def files_scan(self, content_providers: Sequence[ContentProvider]) -> List[Candidate]:
|
|
307
284
|
"""Auxiliary method for scan one sequence"""
|
|
308
285
|
all_cred: List[Candidate] = []
|
|
309
286
|
for provider in content_providers:
|
|
@@ -316,7 +293,7 @@ class CredSweeper:
|
|
|
316
293
|
|
|
317
294
|
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
|
|
318
295
|
|
|
319
|
-
def file_scan(self, content_provider:
|
|
296
|
+
def file_scan(self, content_provider: ContentProvider) -> List[Candidate]:
|
|
320
297
|
"""Run scanning of file from 'file_provider'.
|
|
321
298
|
|
|
322
299
|
Args:
|
|
@@ -96,10 +96,6 @@ class Chars(Enum):
|
|
|
96
96
|
ASCII_PRINTABLE = string.printable
|
|
97
97
|
|
|
98
98
|
|
|
99
|
-
ENTROPY_LIMIT_BASE64 = 4.5
|
|
100
|
-
ENTROPY_LIMIT_BASE3x = 3
|
|
101
|
-
|
|
102
|
-
|
|
103
99
|
class GroupType(Enum):
|
|
104
100
|
"""Group type - used in Group constructor for load predefined set of filters"""
|
|
105
101
|
KEYWORD = "keyword"
|
|
@@ -148,7 +144,8 @@ OVERLAP_SIZE = 1000
|
|
|
148
144
|
CHUNK_STEP_SIZE = CHUNK_SIZE - OVERLAP_SIZE
|
|
149
145
|
# ML hunk size to limit of variable or value size and get substring near value
|
|
150
146
|
ML_HUNK = 80
|
|
151
|
-
|
|
147
|
+
|
|
148
|
+
# values according https://docs.python.org/3/library/codecs.html
|
|
152
149
|
UTF_8 = "utf_8"
|
|
153
150
|
UTF_16 = "utf_16"
|
|
154
151
|
LATIN_1 = "latin_1"
|
|
@@ -110,7 +110,8 @@ class CredentialManager:
|
|
|
110
110
|
# Match by file path+line num+value. Value required so two different credentials still be
|
|
111
111
|
# processed independently
|
|
112
112
|
candidate_key = CandidateKey(line_data)
|
|
113
|
-
if candidate_key
|
|
114
|
-
groups[candidate_key]
|
|
115
|
-
|
|
113
|
+
if candidate_key in groups:
|
|
114
|
+
groups[candidate_key].append(credential_candidate)
|
|
115
|
+
else:
|
|
116
|
+
groups[candidate_key] = [credential_candidate]
|
|
116
117
|
return groups
|
|
@@ -327,11 +327,8 @@ class LineData:
|
|
|
327
327
|
True if file require quotation, False otherwise
|
|
328
328
|
|
|
329
329
|
"""
|
|
330
|
-
|
|
331
|
-
|
|
332
|
-
if Util.get_extension(self.path) in self.config.source_quote_ext:
|
|
333
|
-
return True
|
|
334
|
-
return False
|
|
330
|
+
file_type = self.file_type or Util.get_extension(self.path)
|
|
331
|
+
return bool(file_type) and file_type in self.config.source_quote_ext
|
|
335
332
|
|
|
336
333
|
@staticmethod
|
|
337
334
|
def get_hash_or_subtext(
|
|
@@ -0,0 +1,48 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
from abc import ABC
|
|
3
|
+
from typing import List, Optional
|
|
4
|
+
|
|
5
|
+
from credsweeper.common.constants import ASCII, MIN_DATA_LEN
|
|
6
|
+
from credsweeper.credentials import Candidate
|
|
7
|
+
from credsweeper.deep_scanner.abstract_scanner import AbstractScanner
|
|
8
|
+
from credsweeper.file_handler.data_content_provider import DataContentProvider
|
|
9
|
+
|
|
10
|
+
logger = logging.getLogger(__name__)
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class DebScanner(AbstractScanner, ABC):
|
|
14
|
+
"""Implements deb (ar) scanning"""
|
|
15
|
+
|
|
16
|
+
def data_scan(
|
|
17
|
+
self, #
|
|
18
|
+
data_provider: DataContentProvider, #
|
|
19
|
+
depth: int, #
|
|
20
|
+
recursive_limit_size: int) -> Optional[List[Candidate]]:
|
|
21
|
+
"""Extracts data file from .ar (debian) archive and launches data_scan"""
|
|
22
|
+
candidates: Optional[List[Candidate]] = None
|
|
23
|
+
offset = 8 # b"!<arch>\n"
|
|
24
|
+
while offset < len(data_provider.data):
|
|
25
|
+
try:
|
|
26
|
+
file_size_data = data_provider.data[offset + 48:offset + 58]
|
|
27
|
+
file_size = int(file_size_data.decode(ASCII))
|
|
28
|
+
offset += 60
|
|
29
|
+
if file_size < MIN_DATA_LEN:
|
|
30
|
+
offset += file_size
|
|
31
|
+
continue
|
|
32
|
+
data = data_provider.data[offset:offset + file_size]
|
|
33
|
+
deb_content_provider = DataContentProvider(data=data,
|
|
34
|
+
file_path=data_provider.file_path,
|
|
35
|
+
file_type=data_provider.file_type,
|
|
36
|
+
info=f"{data_provider.info}|DEB:0x{offset:x}")
|
|
37
|
+
new_limit = recursive_limit_size - file_size
|
|
38
|
+
deb_candidates = self.recursive_scan(deb_content_provider, depth, new_limit)
|
|
39
|
+
if deb_candidates is not None:
|
|
40
|
+
if candidates:
|
|
41
|
+
candidates.extend(deb_candidates)
|
|
42
|
+
else:
|
|
43
|
+
candidates = deb_candidates
|
|
44
|
+
# data padding = 2
|
|
45
|
+
offset += 1 + file_size if 1 & file_size else file_size
|
|
46
|
+
except Exception as exc:
|
|
47
|
+
logger.error(exc)
|
|
48
|
+
return candidates
|
|
@@ -1,8 +1,9 @@
|
|
|
1
|
+
import contextlib
|
|
1
2
|
import datetime
|
|
2
3
|
import logging
|
|
3
4
|
from typing import List, Optional, Any, Tuple, Union
|
|
4
5
|
|
|
5
|
-
from credsweeper.common.constants import RECURSIVE_SCAN_LIMITATION, MIN_DATA_LEN
|
|
6
|
+
from credsweeper.common.constants import RECURSIVE_SCAN_LIMITATION, MIN_DATA_LEN, MIN_VALUE_LENGTH
|
|
6
7
|
from credsweeper.config import Config
|
|
7
8
|
from credsweeper.credentials import Candidate
|
|
8
9
|
from credsweeper.credentials.augment_candidates import augment_candidates
|
|
@@ -16,6 +17,7 @@ from credsweeper.scanner import Scanner
|
|
|
16
17
|
from credsweeper.utils import Util
|
|
17
18
|
from .byte_scanner import ByteScanner
|
|
18
19
|
from .bzip2_scanner import Bzip2Scanner
|
|
20
|
+
from .deb_scanner import DebScanner
|
|
19
21
|
from .docx_scanner import DocxScanner
|
|
20
22
|
from .eml_scanner import EmlScanner
|
|
21
23
|
from .encoder_scanner import EncoderScanner
|
|
@@ -54,6 +56,7 @@ class DeepScanner(
|
|
|
54
56
|
Pkcs12Scanner, #
|
|
55
57
|
PptxScanner, #
|
|
56
58
|
TarScanner, #
|
|
59
|
+
DebScanner, #
|
|
57
60
|
XmlScanner, #
|
|
58
61
|
XlsxScanner, #
|
|
59
62
|
ZipScanner
|
|
@@ -114,6 +117,9 @@ class DeepScanner(
|
|
|
114
117
|
elif Util.is_tar(data):
|
|
115
118
|
if 0 < depth:
|
|
116
119
|
deep_scanners.append(TarScanner)
|
|
120
|
+
elif Util.is_deb(data):
|
|
121
|
+
if 0 < depth:
|
|
122
|
+
deep_scanners.append(DebScanner)
|
|
117
123
|
elif Util.is_gzip(data):
|
|
118
124
|
if 0 < depth:
|
|
119
125
|
deep_scanners.append(GzipScanner)
|
|
@@ -209,10 +215,10 @@ class DeepScanner(
|
|
|
209
215
|
int) else RECURSIVE_SCAN_LIMITATION
|
|
210
216
|
candidates: List[Candidate] = []
|
|
211
217
|
data: Optional[bytes] = None
|
|
212
|
-
if isinstance(content_provider, TextContentProvider
|
|
218
|
+
if isinstance(content_provider, (TextContentProvider, ByteContentProvider)):
|
|
213
219
|
# Feature to scan files which might be containers
|
|
214
220
|
data = content_provider.data
|
|
215
|
-
info = "FILE"
|
|
221
|
+
info = f"FILE:{content_provider.file_path}"
|
|
216
222
|
elif isinstance(content_provider, DiffContentProvider) and content_provider.diff:
|
|
217
223
|
candidates = self.scanner.scan(content_provider)
|
|
218
224
|
# Feature to scan binary diffs
|
|
@@ -220,7 +226,7 @@ class DeepScanner(
|
|
|
220
226
|
# the check for legal fix mypy issue
|
|
221
227
|
if isinstance(diff, bytes):
|
|
222
228
|
data = diff
|
|
223
|
-
info = "DIFF"
|
|
229
|
+
info = f"DIFF:{content_provider.file_path}"
|
|
224
230
|
else:
|
|
225
231
|
logger.warning(f"Content provider {type(content_provider)} does not support deep scan")
|
|
226
232
|
info = "NA"
|
|
@@ -298,7 +304,7 @@ class DeepScanner(
|
|
|
298
304
|
items: List[Tuple[Union[int, str], Any]] = []
|
|
299
305
|
struct_key: Optional[str] = None
|
|
300
306
|
struct_value: Optional[str] = None
|
|
301
|
-
|
|
307
|
+
lines_for_keyword_rules = []
|
|
302
308
|
if isinstance(struct_provider.struct, dict):
|
|
303
309
|
for key, value in struct_provider.struct.items():
|
|
304
310
|
if isinstance(value, (list, tuple)) and 1 == len(value):
|
|
@@ -309,13 +315,13 @@ class DeepScanner(
|
|
|
309
315
|
# for transformation {"key": "api_key", "value": "XXXXXXX"} -> {"api_key": "XXXXXXX"}
|
|
310
316
|
struct_key = struct_provider.struct.get("key")
|
|
311
317
|
struct_value = struct_provider.struct.get("value")
|
|
312
|
-
elif isinstance(struct_provider.struct, list
|
|
318
|
+
elif isinstance(struct_provider.struct, (list, tuple)):
|
|
313
319
|
items = list(enumerate(struct_provider.struct))
|
|
314
320
|
else:
|
|
315
321
|
logger.error("Not supported type:%s val:%s", str(type(struct_provider.struct)), str(struct_provider.struct))
|
|
316
322
|
|
|
317
323
|
for key, value in items:
|
|
318
|
-
if isinstance(value, dict) or isinstance(value, (list, tuple)) and 1
|
|
324
|
+
if isinstance(value, dict) or isinstance(value, (list, tuple)) and 1 <= len(value):
|
|
319
325
|
val_struct_provider = StructContentProvider(struct=value,
|
|
320
326
|
file_path=struct_provider.file_path,
|
|
321
327
|
file_type=struct_provider.file_type,
|
|
@@ -324,52 +330,57 @@ class DeepScanner(
|
|
|
324
330
|
candidates.extend(new_candidates)
|
|
325
331
|
|
|
326
332
|
elif isinstance(value, bytes):
|
|
327
|
-
|
|
328
|
-
|
|
329
|
-
|
|
330
|
-
|
|
331
|
-
|
|
332
|
-
|
|
333
|
-
|
|
333
|
+
if MIN_DATA_LEN <= len(value):
|
|
334
|
+
bytes_struct_provider = DataContentProvider(data=value,
|
|
335
|
+
file_path=struct_provider.file_path,
|
|
336
|
+
file_type=struct_provider.file_type,
|
|
337
|
+
info=f"{struct_provider.info}|BYTES:{key}")
|
|
338
|
+
new_limit = recursive_limit_size - len(value)
|
|
339
|
+
new_candidates = self.recursive_scan(bytes_struct_provider, depth, new_limit)
|
|
340
|
+
candidates.extend(new_candidates)
|
|
341
|
+
if MIN_VALUE_LENGTH <= len(value) and isinstance(key, str) \
|
|
342
|
+
and self.scanner.keywords_required_substrings_check(key.lower()):
|
|
343
|
+
str_val = str(value)
|
|
344
|
+
lines_for_keyword_rules.append(f"{key} = '{str_val}'" if '"' in str_val else f'{key} = "{str_val}"')
|
|
334
345
|
|
|
335
346
|
elif isinstance(value, str):
|
|
336
|
-
|
|
337
|
-
|
|
338
|
-
|
|
339
|
-
|
|
340
|
-
|
|
341
|
-
|
|
342
|
-
|
|
343
|
-
|
|
344
|
-
|
|
347
|
+
if MIN_DATA_LEN <= len(value):
|
|
348
|
+
# recursive scan only for data which may be decoded at least
|
|
349
|
+
with contextlib.suppress(UnicodeError):
|
|
350
|
+
data = value.encode(encoding=DEFAULT_ENCODING, errors='strict')
|
|
351
|
+
str_struct_provider = DataContentProvider(data=data,
|
|
352
|
+
file_path=struct_provider.file_path,
|
|
353
|
+
file_type=struct_provider.file_type,
|
|
354
|
+
info=f"{struct_provider.info}|STRING:{key}")
|
|
355
|
+
new_limit = recursive_limit_size - len(str_struct_provider.data)
|
|
356
|
+
new_candidates = self.recursive_scan(str_struct_provider, depth, new_limit)
|
|
357
|
+
candidates.extend(new_candidates)
|
|
345
358
|
# use key = "value" scan for common cases like in TOML
|
|
346
|
-
if isinstance(key, str)
|
|
347
|
-
|
|
359
|
+
if MIN_VALUE_LENGTH <= len(value) and isinstance(key, str) \
|
|
360
|
+
and self.scanner.keywords_required_substrings_check(key.lower()):
|
|
361
|
+
lines_for_keyword_rules.append(f"{key} = '{value}'" if '"' in value else f'{key} = "{value}"')
|
|
348
362
|
|
|
349
363
|
elif isinstance(value, (int, float, datetime.date, datetime.datetime)):
|
|
350
|
-
#
|
|
351
|
-
|
|
352
|
-
line_for_keyword_rules += f"{key} = \"{value}\"; "
|
|
353
|
-
|
|
364
|
+
# skip useless types
|
|
365
|
+
pass
|
|
354
366
|
else:
|
|
355
367
|
logger.warning("Not supported type:%s value(%s)", str(type(value)), str(value))
|
|
356
368
|
|
|
357
|
-
if
|
|
358
|
-
str_provider = StringContentProvider(
|
|
369
|
+
if lines_for_keyword_rules:
|
|
370
|
+
str_provider = StringContentProvider(lines_for_keyword_rules,
|
|
359
371
|
file_path=struct_provider.file_path,
|
|
360
|
-
file_type=".
|
|
361
|
-
info=f"{struct_provider.info}|KEYWORD:`{
|
|
372
|
+
file_type=".py",
|
|
373
|
+
info=f"{struct_provider.info}|KEYWORD:`{lines_for_keyword_rules}`")
|
|
362
374
|
new_candidates = self.scanner.scan(str_provider)
|
|
363
375
|
augment_candidates(candidates, new_candidates)
|
|
364
376
|
|
|
365
377
|
# last check when dictionary is {"key": "api_key", "value": "XXXXXXX"} -> {"api_key": "XXXXXXX"}
|
|
366
378
|
if isinstance(struct_key, str) and isinstance(struct_value, str):
|
|
367
|
-
line_for_keyword_rules = f"{struct_key} = \"{struct_value}\""
|
|
368
379
|
key_value_provider = StringContentProvider(
|
|
369
|
-
[
|
|
380
|
+
[f"{struct_key} = '{struct_value}'" if '"' in struct_value else f'{struct_key} = "{struct_value}"'],
|
|
370
381
|
file_path=struct_provider.file_path,
|
|
371
382
|
file_type=".toml",
|
|
372
|
-
info=f"{struct_provider.info}|KEY_VALUE:`{
|
|
383
|
+
info=f"{struct_provider.info}|KEY_VALUE:`{lines_for_keyword_rules}`")
|
|
373
384
|
new_candidates = self.scanner.scan(key_value_provider)
|
|
374
385
|
augment_candidates(candidates, new_candidates)
|
|
375
386
|
return candidates
|
|
@@ -31,7 +31,7 @@ class GzipScanner(AbstractScanner, ABC):
|
|
|
31
31
|
gzip_content_provider = DataContentProvider(data=f.read(),
|
|
32
32
|
file_path=new_path,
|
|
33
33
|
file_type=Util.get_extension(new_path),
|
|
34
|
-
info=f"{data_provider.info}|GZIP:{
|
|
34
|
+
info=f"{data_provider.info}|GZIP:{new_path}")
|
|
35
35
|
new_limit = recursive_limit_size - len(gzip_content_provider.data)
|
|
36
36
|
gzip_candidates = self.recursive_scan(gzip_content_provider, depth, new_limit)
|
|
37
37
|
return gzip_candidates
|
|
@@ -32,10 +32,10 @@ class ByteContentProvider(ContentProvider):
|
|
|
32
32
|
def free(self) -> None:
|
|
33
33
|
"""free data after scan to reduce memory usage"""
|
|
34
34
|
self.__data = None
|
|
35
|
-
if
|
|
35
|
+
if "data" in self.__dict__:
|
|
36
36
|
delattr(self, "data")
|
|
37
37
|
self.__lines = None
|
|
38
|
-
if
|
|
38
|
+
if "lines" in self.__dict__:
|
|
39
39
|
delattr(self, "lines")
|
|
40
40
|
|
|
41
41
|
@cached_property
|
|
@@ -93,7 +93,7 @@ class ContentProvider(ABC):
|
|
|
93
93
|
if min_len > len(line.strip()):
|
|
94
94
|
# Ignore target if stripped part is too short for all types
|
|
95
95
|
continue
|
|
96
|
-
|
|
96
|
+
if MAX_LINE_LENGTH < len(line):
|
|
97
97
|
for chunk_start, chunk_end in Util.get_chunks(len(line)):
|
|
98
98
|
target = AnalysisTarget(
|
|
99
99
|
line_pos=line_pos, #
|
|
@@ -54,10 +54,10 @@ class DataContentProvider(ContentProvider):
|
|
|
54
54
|
def free(self) -> None:
|
|
55
55
|
"""free data after scan to reduce memory usage"""
|
|
56
56
|
self.__data = None
|
|
57
|
-
if
|
|
57
|
+
if "data" in self.__dict__:
|
|
58
58
|
delattr(self, "data")
|
|
59
59
|
self.__text = None
|
|
60
|
-
if
|
|
60
|
+
if "text" in self.__dict__:
|
|
61
61
|
delattr(self, "text")
|
|
62
62
|
self.structure = None
|
|
63
63
|
self.decoded = None
|
|
@@ -48,8 +48,8 @@ class DiffContentProvider(ContentProvider):
|
|
|
48
48
|
|
|
49
49
|
def free(self) -> None:
|
|
50
50
|
"""free data after scan to reduce memory usage"""
|
|
51
|
-
self.__diff =
|
|
52
|
-
if
|
|
51
|
+
self.__diff = []
|
|
52
|
+
if "diff" in self.__dict__:
|
|
53
53
|
delattr(self, "diff")
|
|
54
54
|
|
|
55
55
|
@staticmethod
|
|
@@ -162,7 +162,7 @@ class FilePathExtractor:
|
|
|
162
162
|
True when the file is oversize or less than MIN_DATA_LEN, or unsupported
|
|
163
163
|
"""
|
|
164
164
|
path = reference[1] if isinstance(reference, tuple) else reference
|
|
165
|
-
if isinstance(path, str
|
|
165
|
+
if isinstance(path, (str, Path)):
|
|
166
166
|
file_size = os.path.getsize(path)
|
|
167
167
|
elif isinstance(path, io.BytesIO):
|
|
168
168
|
current_pos = path.tell()
|
|
@@ -42,7 +42,7 @@ class FilesProvider(AbstractProvider):
|
|
|
42
42
|
"""
|
|
43
43
|
text_content_provider_list: List[Union[DiffContentProvider, TextContentProvider]] = []
|
|
44
44
|
for path in self.paths:
|
|
45
|
-
if isinstance(path, str
|
|
45
|
+
if isinstance(path, (str, Path)):
|
|
46
46
|
new_files = FilePathExtractor.get_file_paths(config, path)
|
|
47
47
|
if self.skip_ignored:
|
|
48
48
|
new_files = FilePathExtractor.apply_gitignore(new_files)
|
|
@@ -50,9 +50,7 @@ class FilesProvider(AbstractProvider):
|
|
|
50
50
|
text_content_provider_list.append(TextContentProvider(_file))
|
|
51
51
|
elif isinstance(path, io.BytesIO):
|
|
52
52
|
text_content_provider_list.append(TextContentProvider((":memory:", path)))
|
|
53
|
-
elif isinstance(path, tuple)
|
|
54
|
-
and (isinstance(path[0], str) or isinstance(path[0], Path)) \
|
|
55
|
-
and isinstance(path[1], io.BytesIO):
|
|
53
|
+
elif isinstance(path, tuple) and (isinstance(path[0], (str, Path))) and isinstance(path[1], io.BytesIO):
|
|
56
54
|
# suppose, all the files must be scanned
|
|
57
55
|
text_content_provider_list.append(TextContentProvider(path))
|
|
58
56
|
else:
|
|
@@ -37,7 +37,7 @@ class PatchesProvider(AbstractProvider):
|
|
|
37
37
|
for file_path in self.paths:
|
|
38
38
|
if FilePathExtractor.check_file_size(config, file_path):
|
|
39
39
|
continue
|
|
40
|
-
if isinstance(file_path, str
|
|
40
|
+
if isinstance(file_path, (str, Path)):
|
|
41
41
|
raw_patches.append(Util.read_file(file_path))
|
|
42
42
|
elif isinstance(file_path, io.BytesIO):
|
|
43
43
|
the_patch = Util.decode_bytes(file_path.read())
|
{credsweeper-1.11.3 → credsweeper-1.11.4}/credsweeper/file_handler/string_content_provider.py
RENAMED
|
@@ -38,10 +38,10 @@ class StringContentProvider(ContentProvider):
|
|
|
38
38
|
def free(self) -> None:
|
|
39
39
|
"""free data after scan to reduce memory usage"""
|
|
40
40
|
self.__lines = []
|
|
41
|
-
if
|
|
41
|
+
if "lines" in self.__dict__:
|
|
42
42
|
delattr(self, "lines")
|
|
43
43
|
self.__line_numbers = []
|
|
44
|
-
if
|
|
44
|
+
if "line_numbers" in self.__dict__:
|
|
45
45
|
delattr(self, "line_numbers")
|
|
46
46
|
|
|
47
47
|
@cached_property
|
{credsweeper-1.11.3 → credsweeper-1.11.4}/credsweeper/file_handler/struct_content_provider.py
RENAMED
|
@@ -38,7 +38,7 @@ class StructContentProvider(ContentProvider):
|
|
|
38
38
|
def free(self) -> None:
|
|
39
39
|
"""free data after scan to reduce memory usage"""
|
|
40
40
|
self.__struct = None
|
|
41
|
-
if
|
|
41
|
+
if "struct" in self.__dict__:
|
|
42
42
|
delattr(self, "struct")
|
|
43
43
|
|
|
44
44
|
def yield_analysis_target(self, min_len: int) -> Generator[AnalysisTarget, None, None]:
|
|
@@ -42,10 +42,10 @@ class TextContentProvider(ContentProvider):
|
|
|
42
42
|
def free(self) -> None:
|
|
43
43
|
"""free data after scan to reduce memory usage"""
|
|
44
44
|
self.__data = None
|
|
45
|
-
if
|
|
45
|
+
if "data" in self.__dict__:
|
|
46
46
|
delattr(self, "data")
|
|
47
47
|
self.__lines = None
|
|
48
|
-
if
|
|
48
|
+
if "lines" in self.__dict__:
|
|
49
49
|
delattr(self, "lines")
|
|
50
50
|
if isinstance(self.__io, io.BytesIO) and self.__io and not self.__io.closed:
|
|
51
51
|
self.__io.close()
|
{credsweeper-1.11.3 → credsweeper-1.11.4}/credsweeper/filters/value_base64_encoded_pem_check.py
RENAMED
|
@@ -30,7 +30,7 @@ class ValueBase64EncodedPem(Filter):
|
|
|
30
30
|
with contextlib.suppress(Exception):
|
|
31
31
|
text = Util.decode_base64(line_data.value, padding_safe=True, urlsafe_detect=True)
|
|
32
32
|
lines = text.decode(ASCII).splitlines()
|
|
33
|
-
lines_pos =
|
|
33
|
+
lines_pos = list(range(len(lines)))
|
|
34
34
|
for line_pos, line in zip(lines_pos, lines):
|
|
35
35
|
if PEM_BEGIN_PATTERN in line:
|
|
36
36
|
new_target = AnalysisTarget(line_pos, lines, lines_pos, target.descriptor)
|
|
@@ -19,12 +19,8 @@ class ValueEntropyBase64Check(ValueEntropyBaseCheck):
|
|
|
19
19
|
y = 0.944 * math.log2(x) - 0.009 * x - 0.04
|
|
20
20
|
elif 65 <= x < 256:
|
|
21
21
|
y = 0.621 * math.log2(x) - 0.003 * x + 1.54
|
|
22
|
-
elif 256 <= x
|
|
23
|
-
y =
|
|
24
|
-
elif 512 <= x < 1024:
|
|
25
|
-
y = 5.89
|
|
26
|
-
elif 1024 <= x:
|
|
27
|
-
y = 5.94
|
|
22
|
+
elif 256 <= x:
|
|
23
|
+
y = 6 - 64 / x
|
|
28
24
|
else:
|
|
29
25
|
y = 0
|
|
30
26
|
return y
|