credsweeper 1.11.3__tar.gz → 1.11.5__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of credsweeper might be problematic. Click here for more details.
- {credsweeper-1.11.3 → credsweeper-1.11.5}/PKG-INFO +3 -6
- {credsweeper-1.11.3 → credsweeper-1.11.5}/README.md +1 -5
- {credsweeper-1.11.3 → credsweeper-1.11.5}/credsweeper/__init__.py +1 -1
- {credsweeper-1.11.3 → credsweeper-1.11.5}/credsweeper/__main__.py +1 -1
- {credsweeper-1.11.3 → credsweeper-1.11.5}/credsweeper/app.py +21 -44
- {credsweeper-1.11.3 → credsweeper-1.11.5}/credsweeper/common/constants.py +2 -5
- {credsweeper-1.11.3 → credsweeper-1.11.5}/credsweeper/credentials/candidate_key.py +1 -1
- {credsweeper-1.11.3 → credsweeper-1.11.5}/credsweeper/credentials/credential_manager.py +4 -3
- {credsweeper-1.11.3 → credsweeper-1.11.5}/credsweeper/credentials/line_data.py +2 -5
- credsweeper-1.11.5/credsweeper/deep_scanner/abstract_scanner.py +306 -0
- credsweeper-1.11.5/credsweeper/deep_scanner/deb_scanner.py +55 -0
- credsweeper-1.11.5/credsweeper/deep_scanner/deep_scanner.py +173 -0
- {credsweeper-1.11.3 → credsweeper-1.11.5}/credsweeper/deep_scanner/gzip_scanner.py +1 -1
- credsweeper-1.11.5/credsweeper/deep_scanner/jclass_scanner.py +74 -0
- credsweeper-1.11.5/credsweeper/deep_scanner/patch_scanner.py +48 -0
- credsweeper-1.11.5/credsweeper/deep_scanner/pkcs_scanner.py +41 -0
- credsweeper-1.11.5/credsweeper/deep_scanner/rpm_scanner.py +49 -0
- credsweeper-1.11.5/credsweeper/deep_scanner/sqlite3_scanner.py +79 -0
- {credsweeper-1.11.3 → credsweeper-1.11.5}/credsweeper/file_handler/byte_content_provider.py +2 -2
- {credsweeper-1.11.3 → credsweeper-1.11.5}/credsweeper/file_handler/content_provider.py +1 -1
- {credsweeper-1.11.3 → credsweeper-1.11.5}/credsweeper/file_handler/data_content_provider.py +3 -4
- {credsweeper-1.11.3 → credsweeper-1.11.5}/credsweeper/file_handler/diff_content_provider.py +2 -2
- {credsweeper-1.11.3 → credsweeper-1.11.5}/credsweeper/file_handler/file_path_extractor.py +1 -1
- {credsweeper-1.11.3 → credsweeper-1.11.5}/credsweeper/file_handler/files_provider.py +2 -4
- {credsweeper-1.11.3 → credsweeper-1.11.5}/credsweeper/file_handler/patches_provider.py +5 -2
- {credsweeper-1.11.3 → credsweeper-1.11.5}/credsweeper/file_handler/string_content_provider.py +2 -2
- {credsweeper-1.11.3 → credsweeper-1.11.5}/credsweeper/file_handler/struct_content_provider.py +1 -1
- {credsweeper-1.11.3 → credsweeper-1.11.5}/credsweeper/file_handler/text_content_provider.py +2 -2
- {credsweeper-1.11.3 → credsweeper-1.11.5}/credsweeper/filters/__init__.py +1 -0
- {credsweeper-1.11.3 → credsweeper-1.11.5}/credsweeper/filters/value_base64_encoded_pem_check.py +1 -1
- {credsweeper-1.11.3 → credsweeper-1.11.5}/credsweeper/filters/value_base64_key_check.py +9 -14
- {credsweeper-1.11.3 → credsweeper-1.11.5}/credsweeper/filters/value_entropy_base64_check.py +2 -6
- credsweeper-1.11.5/credsweeper/filters/value_json_web_key_check.py +37 -0
- {credsweeper-1.11.3 → credsweeper-1.11.5}/credsweeper/filters/value_pattern_check.py +64 -16
- {credsweeper-1.11.3 → credsweeper-1.11.5}/credsweeper/ml_model/features/file_extension.py +1 -1
- {credsweeper-1.11.3 → credsweeper-1.11.5}/credsweeper/ml_model/ml_validator.py +43 -21
- {credsweeper-1.11.3 → credsweeper-1.11.5}/credsweeper/rules/config.yaml +51 -9
- {credsweeper-1.11.3 → credsweeper-1.11.5}/credsweeper/rules/rule.py +3 -3
- {credsweeper-1.11.3 → credsweeper-1.11.5}/credsweeper/scanner/scan_type/multi_pattern.py +1 -2
- {credsweeper-1.11.3 → credsweeper-1.11.5}/credsweeper/secret/config.json +6 -6
- {credsweeper-1.11.3 → credsweeper-1.11.5}/credsweeper/utils/hop_stat.py +3 -3
- {credsweeper-1.11.3 → credsweeper-1.11.5}/credsweeper/utils/pem_key_detector.py +6 -4
- {credsweeper-1.11.3 → credsweeper-1.11.5}/credsweeper/utils/util.py +154 -79
- {credsweeper-1.11.3 → credsweeper-1.11.5}/pyproject.toml +1 -0
- credsweeper-1.11.3/credsweeper/deep_scanner/abstract_scanner.py +0 -51
- credsweeper-1.11.3/credsweeper/deep_scanner/deep_scanner.py +0 -375
- credsweeper-1.11.3/credsweeper/deep_scanner/pkcs12_scanner.py +0 -45
- {credsweeper-1.11.3 → credsweeper-1.11.5}/.gitignore +0 -0
- {credsweeper-1.11.3 → credsweeper-1.11.5}/LICENSE +0 -0
- {credsweeper-1.11.3 → credsweeper-1.11.5}/credsweeper/common/__init__.py +0 -0
- {credsweeper-1.11.3 → credsweeper-1.11.5}/credsweeper/common/keyword_checklist.py +0 -0
- {credsweeper-1.11.3 → credsweeper-1.11.5}/credsweeper/common/keyword_checklist.txt +0 -0
- {credsweeper-1.11.3 → credsweeper-1.11.5}/credsweeper/common/keyword_pattern.py +0 -0
- {credsweeper-1.11.3 → credsweeper-1.11.5}/credsweeper/common/morpheme_checklist.txt +0 -0
- {credsweeper-1.11.3 → credsweeper-1.11.5}/credsweeper/config/__init__.py +0 -0
- {credsweeper-1.11.3 → credsweeper-1.11.5}/credsweeper/config/config.py +0 -0
- {credsweeper-1.11.3 → credsweeper-1.11.5}/credsweeper/credentials/__init__.py +0 -0
- {credsweeper-1.11.3 → credsweeper-1.11.5}/credsweeper/credentials/augment_candidates.py +0 -0
- {credsweeper-1.11.3 → credsweeper-1.11.5}/credsweeper/credentials/candidate.py +0 -0
- {credsweeper-1.11.3 → credsweeper-1.11.5}/credsweeper/credentials/candidate_group_generator.py +0 -0
- {credsweeper-1.11.3 → credsweeper-1.11.5}/credsweeper/deep_scanner/__init__.py +0 -0
- {credsweeper-1.11.3 → credsweeper-1.11.5}/credsweeper/deep_scanner/byte_scanner.py +0 -0
- {credsweeper-1.11.3 → credsweeper-1.11.5}/credsweeper/deep_scanner/bzip2_scanner.py +0 -0
- {credsweeper-1.11.3 → credsweeper-1.11.5}/credsweeper/deep_scanner/docx_scanner.py +0 -0
- {credsweeper-1.11.3 → credsweeper-1.11.5}/credsweeper/deep_scanner/eml_scanner.py +0 -0
- {credsweeper-1.11.3 → credsweeper-1.11.5}/credsweeper/deep_scanner/encoder_scanner.py +0 -0
- {credsweeper-1.11.3 → credsweeper-1.11.5}/credsweeper/deep_scanner/html_scanner.py +0 -0
- {credsweeper-1.11.3 → credsweeper-1.11.5}/credsweeper/deep_scanner/jks_scanner.py +0 -0
- {credsweeper-1.11.3 → credsweeper-1.11.5}/credsweeper/deep_scanner/lang_scanner.py +0 -0
- {credsweeper-1.11.3 → credsweeper-1.11.5}/credsweeper/deep_scanner/lzma_scanner.py +0 -0
- {credsweeper-1.11.3 → credsweeper-1.11.5}/credsweeper/deep_scanner/mxfile_scanner.py +0 -0
- {credsweeper-1.11.3 → credsweeper-1.11.5}/credsweeper/deep_scanner/pdf_scanner.py +0 -0
- {credsweeper-1.11.3 → credsweeper-1.11.5}/credsweeper/deep_scanner/pptx_scanner.py +0 -0
- {credsweeper-1.11.3 → credsweeper-1.11.5}/credsweeper/deep_scanner/tar_scanner.py +0 -0
- {credsweeper-1.11.3 → credsweeper-1.11.5}/credsweeper/deep_scanner/tmx_scanner.py +0 -0
- {credsweeper-1.11.3 → credsweeper-1.11.5}/credsweeper/deep_scanner/xlsx_scanner.py +0 -0
- {credsweeper-1.11.3 → credsweeper-1.11.5}/credsweeper/deep_scanner/xml_scanner.py +0 -0
- {credsweeper-1.11.3 → credsweeper-1.11.5}/credsweeper/deep_scanner/zip_scanner.py +0 -0
- {credsweeper-1.11.3 → credsweeper-1.11.5}/credsweeper/file_handler/__init__.py +0 -0
- {credsweeper-1.11.3 → credsweeper-1.11.5}/credsweeper/file_handler/abstract_provider.py +0 -0
- {credsweeper-1.11.3 → credsweeper-1.11.5}/credsweeper/file_handler/analysis_target.py +0 -0
- {credsweeper-1.11.3 → credsweeper-1.11.5}/credsweeper/file_handler/descriptor.py +0 -0
- {credsweeper-1.11.3 → credsweeper-1.11.5}/credsweeper/filters/filter.py +0 -0
- {credsweeper-1.11.3 → credsweeper-1.11.5}/credsweeper/filters/group/__init__.py +0 -0
- {credsweeper-1.11.3 → credsweeper-1.11.5}/credsweeper/filters/group/general_keyword.py +0 -0
- {credsweeper-1.11.3 → credsweeper-1.11.5}/credsweeper/filters/group/general_pattern.py +0 -0
- {credsweeper-1.11.3 → credsweeper-1.11.5}/credsweeper/filters/group/group.py +0 -0
- {credsweeper-1.11.3 → credsweeper-1.11.5}/credsweeper/filters/group/password_keyword.py +0 -0
- {credsweeper-1.11.3 → credsweeper-1.11.5}/credsweeper/filters/group/token_pattern.py +0 -0
- {credsweeper-1.11.3 → credsweeper-1.11.5}/credsweeper/filters/group/url_credentials_group.py +0 -0
- {credsweeper-1.11.3 → credsweeper-1.11.5}/credsweeper/filters/group/weird_base36_token.py +0 -0
- {credsweeper-1.11.3 → credsweeper-1.11.5}/credsweeper/filters/group/weird_base64_token.py +0 -0
- {credsweeper-1.11.3 → credsweeper-1.11.5}/credsweeper/filters/line_git_binary_check.py +0 -0
- {credsweeper-1.11.3 → credsweeper-1.11.5}/credsweeper/filters/line_specific_key_check.py +0 -0
- {credsweeper-1.11.3 → credsweeper-1.11.5}/credsweeper/filters/line_uue_part_check.py +0 -0
- {credsweeper-1.11.3 → credsweeper-1.11.5}/credsweeper/filters/value_allowlist_check.py +0 -0
- {credsweeper-1.11.3 → credsweeper-1.11.5}/credsweeper/filters/value_array_dictionary_check.py +0 -0
- {credsweeper-1.11.3 → credsweeper-1.11.5}/credsweeper/filters/value_atlassian_token_check.py +0 -0
- {credsweeper-1.11.3 → credsweeper-1.11.5}/credsweeper/filters/value_azure_token_check.py +0 -0
- {credsweeper-1.11.3 → credsweeper-1.11.5}/credsweeper/filters/value_base32_data_check.py +0 -0
- {credsweeper-1.11.3 → credsweeper-1.11.5}/credsweeper/filters/value_base64_data_check.py +0 -0
- {credsweeper-1.11.3 → credsweeper-1.11.5}/credsweeper/filters/value_base64_part_check.py +0 -0
- {credsweeper-1.11.3 → credsweeper-1.11.5}/credsweeper/filters/value_blocklist_check.py +0 -0
- {credsweeper-1.11.3 → credsweeper-1.11.5}/credsweeper/filters/value_camel_case_check.py +0 -0
- {credsweeper-1.11.3 → credsweeper-1.11.5}/credsweeper/filters/value_couple_keyword_check.py +0 -0
- {credsweeper-1.11.3 → credsweeper-1.11.5}/credsweeper/filters/value_dictionary_keyword_check.py +0 -0
- {credsweeper-1.11.3 → credsweeper-1.11.5}/credsweeper/filters/value_dictionary_value_length_check.py +0 -0
- {credsweeper-1.11.3 → credsweeper-1.11.5}/credsweeper/filters/value_discord_bot_check.py +0 -0
- {credsweeper-1.11.3 → credsweeper-1.11.5}/credsweeper/filters/value_entropy_base32_check.py +0 -0
- {credsweeper-1.11.3 → credsweeper-1.11.5}/credsweeper/filters/value_entropy_base36_check.py +0 -0
- {credsweeper-1.11.3 → credsweeper-1.11.5}/credsweeper/filters/value_entropy_base_check.py +0 -0
- {credsweeper-1.11.3 → credsweeper-1.11.5}/credsweeper/filters/value_file_path_check.py +0 -0
- {credsweeper-1.11.3 → credsweeper-1.11.5}/credsweeper/filters/value_github_check.py +0 -0
- {credsweeper-1.11.3 → credsweeper-1.11.5}/credsweeper/filters/value_grafana_check.py +0 -0
- {credsweeper-1.11.3 → credsweeper-1.11.5}/credsweeper/filters/value_grafana_service_check.py +0 -0
- {credsweeper-1.11.3 → credsweeper-1.11.5}/credsweeper/filters/value_hex_number_check.py +0 -0
- {credsweeper-1.11.3 → credsweeper-1.11.5}/credsweeper/filters/value_jfrog_token_check.py +0 -0
- {credsweeper-1.11.3 → credsweeper-1.11.5}/credsweeper/filters/value_json_web_token_check.py +0 -0
- {credsweeper-1.11.3 → credsweeper-1.11.5}/credsweeper/filters/value_last_word_check.py +0 -0
- {credsweeper-1.11.3 → credsweeper-1.11.5}/credsweeper/filters/value_method_check.py +0 -0
- {credsweeper-1.11.3 → credsweeper-1.11.5}/credsweeper/filters/value_not_allowed_pattern_check.py +0 -0
- {credsweeper-1.11.3 → credsweeper-1.11.5}/credsweeper/filters/value_not_part_encoded_check.py +0 -0
- {credsweeper-1.11.3 → credsweeper-1.11.5}/credsweeper/filters/value_number_check.py +0 -0
- {credsweeper-1.11.3 → credsweeper-1.11.5}/credsweeper/filters/value_similarity_check.py +0 -0
- {credsweeper-1.11.3 → credsweeper-1.11.5}/credsweeper/filters/value_split_keyword_check.py +0 -0
- {credsweeper-1.11.3 → credsweeper-1.11.5}/credsweeper/filters/value_string_type_check.py +0 -0
- {credsweeper-1.11.3 → credsweeper-1.11.5}/credsweeper/filters/value_token_base32_check.py +0 -0
- {credsweeper-1.11.3 → credsweeper-1.11.5}/credsweeper/filters/value_token_base36_check.py +0 -0
- {credsweeper-1.11.3 → credsweeper-1.11.5}/credsweeper/filters/value_token_base64_check.py +0 -0
- {credsweeper-1.11.3 → credsweeper-1.11.5}/credsweeper/filters/value_token_base_check.py +0 -0
- {credsweeper-1.11.3 → credsweeper-1.11.5}/credsweeper/filters/value_token_check.py +0 -0
- {credsweeper-1.11.3 → credsweeper-1.11.5}/credsweeper/logger/__init__.py +0 -0
- {credsweeper-1.11.3 → credsweeper-1.11.5}/credsweeper/logger/logger.py +0 -0
- {credsweeper-1.11.3 → credsweeper-1.11.5}/credsweeper/ml_model/__init__.py +0 -0
- {credsweeper-1.11.3 → credsweeper-1.11.5}/credsweeper/ml_model/features/__init__.py +0 -0
- {credsweeper-1.11.3 → credsweeper-1.11.5}/credsweeper/ml_model/features/entropy_evaluation.py +0 -0
- {credsweeper-1.11.3 → credsweeper-1.11.5}/credsweeper/ml_model/features/feature.py +0 -0
- {credsweeper-1.11.3 → credsweeper-1.11.5}/credsweeper/ml_model/features/has_html_tag.py +0 -0
- {credsweeper-1.11.3 → credsweeper-1.11.5}/credsweeper/ml_model/features/is_secret_numeric.py +0 -0
- {credsweeper-1.11.3 → credsweeper-1.11.5}/credsweeper/ml_model/features/length_of_attribute.py +0 -0
- {credsweeper-1.11.3 → credsweeper-1.11.5}/credsweeper/ml_model/features/morpheme_dense.py +0 -0
- {credsweeper-1.11.3 → credsweeper-1.11.5}/credsweeper/ml_model/features/rule_name.py +0 -0
- {credsweeper-1.11.3 → credsweeper-1.11.5}/credsweeper/ml_model/features/search_in_attribute.py +0 -0
- {credsweeper-1.11.3 → credsweeper-1.11.5}/credsweeper/ml_model/features/word_in.py +0 -0
- {credsweeper-1.11.3 → credsweeper-1.11.5}/credsweeper/ml_model/features/word_in_path.py +0 -0
- {credsweeper-1.11.3 → credsweeper-1.11.5}/credsweeper/ml_model/features/word_in_postamble.py +0 -0
- {credsweeper-1.11.3 → credsweeper-1.11.5}/credsweeper/ml_model/features/word_in_preamble.py +0 -0
- {credsweeper-1.11.3 → credsweeper-1.11.5}/credsweeper/ml_model/features/word_in_transition.py +0 -0
- {credsweeper-1.11.3 → credsweeper-1.11.5}/credsweeper/ml_model/features/word_in_value.py +0 -0
- {credsweeper-1.11.3 → credsweeper-1.11.5}/credsweeper/ml_model/features/word_in_variable.py +0 -0
- {credsweeper-1.11.3 → credsweeper-1.11.5}/credsweeper/ml_model/ml_config.json +0 -0
- {credsweeper-1.11.3 → credsweeper-1.11.5}/credsweeper/ml_model/ml_model.onnx +0 -0
- {credsweeper-1.11.3 → credsweeper-1.11.5}/credsweeper/py.typed +0 -0
- {credsweeper-1.11.3 → credsweeper-1.11.5}/credsweeper/rules/__init__.py +0 -0
- {credsweeper-1.11.3 → credsweeper-1.11.5}/credsweeper/scanner/__init__.py +0 -0
- {credsweeper-1.11.3 → credsweeper-1.11.5}/credsweeper/scanner/scan_type/__init__.py +0 -0
- {credsweeper-1.11.3 → credsweeper-1.11.5}/credsweeper/scanner/scan_type/pem_key_pattern.py +0 -0
- {credsweeper-1.11.3 → credsweeper-1.11.5}/credsweeper/scanner/scan_type/scan_type.py +0 -0
- {credsweeper-1.11.3 → credsweeper-1.11.5}/credsweeper/scanner/scan_type/single_pattern.py +0 -0
- {credsweeper-1.11.3 → credsweeper-1.11.5}/credsweeper/scanner/scanner.py +0 -0
- {credsweeper-1.11.3 → credsweeper-1.11.5}/credsweeper/secret/log.yaml +0 -0
- {credsweeper-1.11.3 → credsweeper-1.11.5}/credsweeper/utils/__init__.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: credsweeper
|
|
3
|
-
Version: 1.11.
|
|
3
|
+
Version: 1.11.5
|
|
4
4
|
Summary: Credential Sweeper
|
|
5
5
|
Project-URL: Homepage, https://github.com/Samsung/CredSweeper
|
|
6
6
|
Project-URL: Bug Tracker, https://github.com/Samsung/CredSweeper/issues
|
|
@@ -37,6 +37,7 @@ Requires-Dist: python-dateutil
|
|
|
37
37
|
Requires-Dist: python-docx
|
|
38
38
|
Requires-Dist: python-pptx
|
|
39
39
|
Requires-Dist: pyyaml
|
|
40
|
+
Requires-Dist: rpmfile
|
|
40
41
|
Requires-Dist: whatthepatch
|
|
41
42
|
Requires-Dist: xlrd
|
|
42
43
|
Description-Content-Type: text/markdown
|
|
@@ -140,11 +141,7 @@ cat output.json
|
|
|
140
141
|
"value_start": 12,
|
|
141
142
|
"value_end": 19,
|
|
142
143
|
"variable": "password",
|
|
143
|
-
"
|
|
144
|
-
"iterator": "BASE64_CHARS",
|
|
145
|
-
"entropy": 2.120589933192232,
|
|
146
|
-
"valid": false
|
|
147
|
-
}
|
|
144
|
+
"entropy": 2.12059
|
|
148
145
|
}
|
|
149
146
|
]
|
|
150
147
|
}
|
|
@@ -63,7 +63,7 @@ def logger_levels(log_level: str) -> str:
|
|
|
63
63
|
Returns True if log_level UPPERCASE is one of keys
|
|
64
64
|
"""
|
|
65
65
|
val = log_level.upper()
|
|
66
|
-
if
|
|
66
|
+
if val in Logger.LEVELS:
|
|
67
67
|
return val
|
|
68
68
|
raise ArgumentTypeError(f"Log level provided: {log_level} -- must be one of: {' | '.join(Logger.LEVELS.keys())}")
|
|
69
69
|
|
|
@@ -15,11 +15,13 @@ from credsweeper.common.constants import Severity, ThresholdPreset, DiffRowType,
|
|
|
15
15
|
from credsweeper.config import Config
|
|
16
16
|
from credsweeper.credentials import Candidate, CredentialManager, CandidateKey
|
|
17
17
|
from credsweeper.deep_scanner.deep_scanner import DeepScanner
|
|
18
|
+
from credsweeper.file_handler.content_provider import ContentProvider
|
|
18
19
|
from credsweeper.file_handler.diff_content_provider import DiffContentProvider
|
|
19
20
|
from credsweeper.file_handler.file_path_extractor import FilePathExtractor
|
|
20
21
|
from credsweeper.file_handler.abstract_provider import AbstractProvider
|
|
21
22
|
from credsweeper.file_handler.text_content_provider import TextContentProvider
|
|
22
23
|
from credsweeper.scanner import Scanner
|
|
24
|
+
from credsweeper.ml_model.ml_validator import MlValidator
|
|
23
25
|
from credsweeper.utils import Util
|
|
24
26
|
|
|
25
27
|
logger = logging.getLogger(__name__)
|
|
@@ -94,7 +96,7 @@ class CredSweeper:
|
|
|
94
96
|
log_level: str - level for pool initializer according logging levels (UPPERCASE)
|
|
95
97
|
|
|
96
98
|
"""
|
|
97
|
-
self.pool_count: int =
|
|
99
|
+
self.pool_count: int = max(1, int(pool_count))
|
|
98
100
|
if not (_severity := Severity.get(severity)):
|
|
99
101
|
raise RuntimeError(f"Severity level provided: {severity}"
|
|
100
102
|
f" -- must be one of: {' | '.join([i.value for i in Severity])}")
|
|
@@ -123,9 +125,9 @@ class CredSweeper:
|
|
|
123
125
|
self.ml_config = ml_config
|
|
124
126
|
self.ml_model = ml_model
|
|
125
127
|
self.ml_providers = ml_providers
|
|
126
|
-
self.ml_validator = None
|
|
127
128
|
self.__thrifty = thrifty
|
|
128
129
|
self.__log_level = log_level
|
|
130
|
+
self.__ml_validator: Optional[MlValidator] = None
|
|
129
131
|
|
|
130
132
|
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
|
|
131
133
|
|
|
@@ -182,35 +184,22 @@ class CredSweeper:
|
|
|
182
184
|
|
|
183
185
|
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
|
|
184
186
|
|
|
185
|
-
# the import cannot be done on top due
|
|
186
|
-
# TypeError: cannot pickle 'onnxruntime.capi.onnxruntime_pybind11_state.InferenceSession' object
|
|
187
|
-
from credsweeper.ml_model import MlValidator
|
|
188
|
-
|
|
189
|
-
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
|
|
190
|
-
|
|
191
187
|
@property
|
|
192
188
|
def ml_validator(self) -> MlValidator:
|
|
193
189
|
"""ml_validator getter"""
|
|
194
|
-
from credsweeper.ml_model import MlValidator
|
|
195
190
|
if not self.__ml_validator:
|
|
196
|
-
self.__ml_validator
|
|
191
|
+
self.__ml_validator = MlValidator(
|
|
197
192
|
threshold=self.ml_threshold, #
|
|
198
193
|
ml_config=self.ml_config, #
|
|
199
194
|
ml_model=self.ml_model, #
|
|
200
195
|
ml_providers=self.ml_providers, #
|
|
201
196
|
)
|
|
202
|
-
|
|
197
|
+
if not self.__ml_validator:
|
|
198
|
+
raise RuntimeError("MlValidator was not initialized!")
|
|
203
199
|
return self.__ml_validator
|
|
204
200
|
|
|
205
201
|
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
|
|
206
202
|
|
|
207
|
-
@ml_validator.setter
|
|
208
|
-
def ml_validator(self, _ml_validator: Optional[MlValidator]) -> None:
|
|
209
|
-
"""ml_validator setter"""
|
|
210
|
-
self.__ml_validator = _ml_validator
|
|
211
|
-
|
|
212
|
-
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
|
|
213
|
-
|
|
214
203
|
@staticmethod
|
|
215
204
|
def pool_initializer(log_kwargs) -> None:
|
|
216
205
|
"""Ignore SIGINT in child processes."""
|
|
@@ -219,20 +208,6 @@ class CredSweeper:
|
|
|
219
208
|
|
|
220
209
|
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
|
|
221
210
|
|
|
222
|
-
@property
|
|
223
|
-
def config(self) -> Config:
|
|
224
|
-
"""config getter"""
|
|
225
|
-
return self.__config
|
|
226
|
-
|
|
227
|
-
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
|
|
228
|
-
|
|
229
|
-
@config.setter
|
|
230
|
-
def config(self, config: Config) -> None:
|
|
231
|
-
"""config setter"""
|
|
232
|
-
self.__config = config
|
|
233
|
-
|
|
234
|
-
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
|
|
235
|
-
|
|
236
211
|
def run(self, content_provider: AbstractProvider) -> int:
|
|
237
212
|
"""Run an analysis of 'content_provider' object.
|
|
238
213
|
|
|
@@ -241,9 +216,10 @@ class CredSweeper:
|
|
|
241
216
|
|
|
242
217
|
"""
|
|
243
218
|
_empty_list: Sequence[Union[DiffContentProvider, TextContentProvider]] = []
|
|
244
|
-
file_extractors
|
|
245
|
-
|
|
246
|
-
|
|
219
|
+
file_extractors = content_provider.get_scannable_files(self.config) if content_provider else _empty_list
|
|
220
|
+
if not file_extractors:
|
|
221
|
+
logger.info(f"No scannable targets for {len(content_provider.paths)} paths")
|
|
222
|
+
return 0
|
|
247
223
|
self.scan(file_extractors)
|
|
248
224
|
self.post_processing()
|
|
249
225
|
# PatchesProvider has the attribute. Circular import error appears with using the isinstance
|
|
@@ -260,7 +236,7 @@ class CredSweeper:
|
|
|
260
236
|
content_providers: file objects to scan
|
|
261
237
|
|
|
262
238
|
"""
|
|
263
|
-
if 1 < self.pool_count:
|
|
239
|
+
if 1 < self.pool_count and 1 < len(content_providers):
|
|
264
240
|
self.__multi_jobs_scan(content_providers)
|
|
265
241
|
else:
|
|
266
242
|
self.__single_job_scan(content_providers)
|
|
@@ -269,6 +245,7 @@ class CredSweeper:
|
|
|
269
245
|
|
|
270
246
|
def __single_job_scan(self, content_providers: Sequence[Union[DiffContentProvider, TextContentProvider]]) -> None:
|
|
271
247
|
"""Performs scan in main thread"""
|
|
248
|
+
logger.info(f"Scan for {len(content_providers)} providers")
|
|
272
249
|
all_cred = self.files_scan(content_providers)
|
|
273
250
|
self.credential_manager.set_credentials(all_cred)
|
|
274
251
|
|
|
@@ -284,12 +261,14 @@ class CredSweeper:
|
|
|
284
261
|
if "SILENCE" == self.__log_level:
|
|
285
262
|
logging.addLevelName(60, "SILENCE")
|
|
286
263
|
log_kwargs["level"] = self.__log_level
|
|
287
|
-
|
|
288
|
-
|
|
264
|
+
pool_count = min(self.pool_count, len(content_providers))
|
|
265
|
+
logger.info(f"Scan in {pool_count} processes for {len(content_providers)} providers")
|
|
266
|
+
with multiprocessing.get_context("spawn").Pool(processes=pool_count,
|
|
267
|
+
initializer=CredSweeper.pool_initializer,
|
|
289
268
|
initargs=(log_kwargs, )) as pool:
|
|
290
269
|
try:
|
|
291
|
-
for scan_results in pool.imap_unordered(self.files_scan,
|
|
292
|
-
|
|
270
|
+
for scan_results in pool.imap_unordered(self.files_scan,
|
|
271
|
+
(content_providers[x::pool_count] for x in range(pool_count))):
|
|
293
272
|
for cred in scan_results:
|
|
294
273
|
self.credential_manager.add_credential(cred)
|
|
295
274
|
except KeyboardInterrupt:
|
|
@@ -301,9 +280,7 @@ class CredSweeper:
|
|
|
301
280
|
|
|
302
281
|
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
|
|
303
282
|
|
|
304
|
-
def files_scan(
|
|
305
|
-
self, #
|
|
306
|
-
content_providers: Sequence[Union[DiffContentProvider, TextContentProvider]]) -> List[Candidate]:
|
|
283
|
+
def files_scan(self, content_providers: Sequence[ContentProvider]) -> List[Candidate]:
|
|
307
284
|
"""Auxiliary method for scan one sequence"""
|
|
308
285
|
all_cred: List[Candidate] = []
|
|
309
286
|
for provider in content_providers:
|
|
@@ -316,7 +293,7 @@ class CredSweeper:
|
|
|
316
293
|
|
|
317
294
|
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
|
|
318
295
|
|
|
319
|
-
def file_scan(self, content_provider:
|
|
296
|
+
def file_scan(self, content_provider: ContentProvider) -> List[Candidate]:
|
|
320
297
|
"""Run scanning of file from 'file_provider'.
|
|
321
298
|
|
|
322
299
|
Args:
|
|
@@ -96,10 +96,6 @@ class Chars(Enum):
|
|
|
96
96
|
ASCII_PRINTABLE = string.printable
|
|
97
97
|
|
|
98
98
|
|
|
99
|
-
ENTROPY_LIMIT_BASE64 = 4.5
|
|
100
|
-
ENTROPY_LIMIT_BASE3x = 3
|
|
101
|
-
|
|
102
|
-
|
|
103
99
|
class GroupType(Enum):
|
|
104
100
|
"""Group type - used in Group constructor for load predefined set of filters"""
|
|
105
101
|
KEYWORD = "keyword"
|
|
@@ -148,7 +144,8 @@ OVERLAP_SIZE = 1000
|
|
|
148
144
|
CHUNK_STEP_SIZE = CHUNK_SIZE - OVERLAP_SIZE
|
|
149
145
|
# ML hunk size to limit of variable or value size and get substring near value
|
|
150
146
|
ML_HUNK = 80
|
|
151
|
-
|
|
147
|
+
|
|
148
|
+
# values according https://docs.python.org/3/library/codecs.html
|
|
152
149
|
UTF_8 = "utf_8"
|
|
153
150
|
UTF_16 = "utf_16"
|
|
154
151
|
LATIN_1 = "latin_1"
|
|
@@ -110,7 +110,8 @@ class CredentialManager:
|
|
|
110
110
|
# Match by file path+line num+value. Value required so two different credentials still be
|
|
111
111
|
# processed independently
|
|
112
112
|
candidate_key = CandidateKey(line_data)
|
|
113
|
-
if candidate_key
|
|
114
|
-
groups[candidate_key]
|
|
115
|
-
|
|
113
|
+
if candidate_key in groups:
|
|
114
|
+
groups[candidate_key].append(credential_candidate)
|
|
115
|
+
else:
|
|
116
|
+
groups[candidate_key] = [credential_candidate]
|
|
116
117
|
return groups
|
|
@@ -327,11 +327,8 @@ class LineData:
|
|
|
327
327
|
True if file require quotation, False otherwise
|
|
328
328
|
|
|
329
329
|
"""
|
|
330
|
-
|
|
331
|
-
|
|
332
|
-
if Util.get_extension(self.path) in self.config.source_quote_ext:
|
|
333
|
-
return True
|
|
334
|
-
return False
|
|
330
|
+
file_type = self.file_type or Util.get_extension(self.path)
|
|
331
|
+
return bool(file_type) and file_type in self.config.source_quote_ext
|
|
335
332
|
|
|
336
333
|
@staticmethod
|
|
337
334
|
def get_hash_or_subtext(
|
|
@@ -0,0 +1,306 @@
|
|
|
1
|
+
import contextlib
|
|
2
|
+
import datetime
|
|
3
|
+
import logging
|
|
4
|
+
from abc import abstractmethod, ABC
|
|
5
|
+
from typing import List, Optional, Tuple, Any, Generator
|
|
6
|
+
|
|
7
|
+
from credsweeper.common.constants import RECURSIVE_SCAN_LIMITATION, MIN_DATA_LEN, DEFAULT_ENCODING, UTF_8, \
|
|
8
|
+
MIN_VALUE_LENGTH
|
|
9
|
+
from credsweeper.config import Config
|
|
10
|
+
from credsweeper.credentials import Candidate
|
|
11
|
+
from credsweeper.credentials.augment_candidates import augment_candidates
|
|
12
|
+
from credsweeper.file_handler.byte_content_provider import ByteContentProvider
|
|
13
|
+
from credsweeper.file_handler.content_provider import ContentProvider
|
|
14
|
+
from credsweeper.file_handler.data_content_provider import DataContentProvider
|
|
15
|
+
from credsweeper.file_handler.descriptor import Descriptor
|
|
16
|
+
from credsweeper.file_handler.diff_content_provider import DiffContentProvider
|
|
17
|
+
from credsweeper.file_handler.file_path_extractor import FilePathExtractor
|
|
18
|
+
from credsweeper.file_handler.string_content_provider import StringContentProvider
|
|
19
|
+
from credsweeper.file_handler.struct_content_provider import StructContentProvider
|
|
20
|
+
from credsweeper.file_handler.text_content_provider import TextContentProvider
|
|
21
|
+
from credsweeper.scanner import Scanner
|
|
22
|
+
|
|
23
|
+
logger = logging.getLogger(__name__)
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
class AbstractScanner(ABC):
|
|
27
|
+
"""Base abstract class for all recursive scanners"""
|
|
28
|
+
|
|
29
|
+
@property
|
|
30
|
+
@abstractmethod
|
|
31
|
+
def config(self) -> Config:
|
|
32
|
+
"""Abstract property to be defined in DeepScanner"""
|
|
33
|
+
raise NotImplementedError(__name__)
|
|
34
|
+
|
|
35
|
+
@property
|
|
36
|
+
@abstractmethod
|
|
37
|
+
def scanner(self) -> Scanner:
|
|
38
|
+
"""Abstract property to be defined in DeepScanner"""
|
|
39
|
+
raise NotImplementedError(__name__)
|
|
40
|
+
|
|
41
|
+
@abstractmethod
|
|
42
|
+
def data_scan(
|
|
43
|
+
self, #
|
|
44
|
+
data_provider: DataContentProvider, #
|
|
45
|
+
depth: int, #
|
|
46
|
+
recursive_limit_size: int) -> Optional[List[Candidate]]:
|
|
47
|
+
"""Abstract method to be defined in DeepScanner"""
|
|
48
|
+
raise NotImplementedError(__name__)
|
|
49
|
+
|
|
50
|
+
@staticmethod
|
|
51
|
+
@abstractmethod
|
|
52
|
+
def get_deep_scanners(data: bytes, descriptor: Descriptor, depth: int) -> Tuple[List[Any], List[Any]]:
|
|
53
|
+
"""Returns possibly scan methods for the data depends on content and fallback scanners"""
|
|
54
|
+
|
|
55
|
+
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
|
|
56
|
+
|
|
57
|
+
def recursive_scan(
|
|
58
|
+
self, #
|
|
59
|
+
data_provider: DataContentProvider, #
|
|
60
|
+
depth: int = 0, #
|
|
61
|
+
recursive_limit_size: int = 0) -> List[Candidate]:
|
|
62
|
+
"""Recursive function to scan files which might be containers like ZIP archives
|
|
63
|
+
|
|
64
|
+
Args:
|
|
65
|
+
data_provider: DataContentProvider object may be a container
|
|
66
|
+
depth: maximal level of recursion
|
|
67
|
+
recursive_limit_size: maximal bytes of opened files to prevent recursive zip-bomb attack
|
|
68
|
+
"""
|
|
69
|
+
candidates: List[Candidate] = []
|
|
70
|
+
if 0 > depth:
|
|
71
|
+
# break recursion if maximal depth is reached
|
|
72
|
+
logger.debug("Bottom reached %s recursive_limit_size:%d", data_provider.file_path, recursive_limit_size)
|
|
73
|
+
return candidates
|
|
74
|
+
depth -= 1
|
|
75
|
+
if MIN_DATA_LEN > len(data_provider.data):
|
|
76
|
+
# break recursion for minimal data size
|
|
77
|
+
logger.debug("Too small data: size=%d, depth=%d, limit=%d, path=%s, info=%s", len(data_provider.data),
|
|
78
|
+
depth, recursive_limit_size, data_provider.file_path, data_provider.info)
|
|
79
|
+
return candidates
|
|
80
|
+
logger.debug("Start data_scan: size=%d, depth=%d, limit=%d, path=%s, info=%s", len(data_provider.data), depth,
|
|
81
|
+
recursive_limit_size, data_provider.file_path, data_provider.info)
|
|
82
|
+
|
|
83
|
+
if FilePathExtractor.is_find_by_ext_file(self.config, data_provider.file_type):
|
|
84
|
+
# Skip scanning file and makes fake candidate due the extension is suspicious
|
|
85
|
+
dummy_candidate = Candidate.get_dummy_candidate(self.config, data_provider.file_path,
|
|
86
|
+
data_provider.file_type, data_provider.info,
|
|
87
|
+
FilePathExtractor.FIND_BY_EXT_RULE)
|
|
88
|
+
candidates.append(dummy_candidate)
|
|
89
|
+
else:
|
|
90
|
+
new_candidates = self.deep_scan_with_fallback(data_provider, depth, recursive_limit_size)
|
|
91
|
+
augment_candidates(candidates, new_candidates)
|
|
92
|
+
|
|
93
|
+
return candidates
|
|
94
|
+
|
|
95
|
+
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
|
|
96
|
+
|
|
97
|
+
@staticmethod
|
|
98
|
+
def key_value_combination(structure: dict) -> Generator[Tuple[Any, Any], None, None]:
|
|
99
|
+
"""Combine items by `key` and `value` from a dictionary for augmentation
|
|
100
|
+
{..., "key": "api_key", "value": "XXXXXXX", ...} -> ("api_key", "XXXXXXX")
|
|
101
|
+
|
|
102
|
+
"""
|
|
103
|
+
for key_id in ("key", "KEY", "Key"):
|
|
104
|
+
if key_id in structure:
|
|
105
|
+
struct_key = structure.get(key_id)
|
|
106
|
+
break
|
|
107
|
+
else:
|
|
108
|
+
struct_key = None
|
|
109
|
+
if isinstance(struct_key, bytes):
|
|
110
|
+
# sqlite table may produce bytes for `key`
|
|
111
|
+
with contextlib.suppress(UnicodeError):
|
|
112
|
+
struct_key = struct_key.decode(UTF_8)
|
|
113
|
+
# only str type is common used for the augmentation
|
|
114
|
+
if struct_key and isinstance(struct_key, str):
|
|
115
|
+
for value_id in ("value", "VALUE", "Value"):
|
|
116
|
+
if value_id in structure:
|
|
117
|
+
struct_value = structure.get(value_id)
|
|
118
|
+
if struct_value and isinstance(struct_value, (str, bytes)):
|
|
119
|
+
yield struct_key, struct_value
|
|
120
|
+
# break in successful case
|
|
121
|
+
break
|
|
122
|
+
|
|
123
|
+
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
|
|
124
|
+
|
|
125
|
+
@staticmethod
|
|
126
|
+
def structure_processing(structure: Any) -> Generator[Tuple[Any, Any], None, None]:
|
|
127
|
+
"""Yields pair `key, value` from given structure if applicable"""
|
|
128
|
+
if isinstance(structure, dict):
|
|
129
|
+
# transform dictionary to list
|
|
130
|
+
for key, value in structure.items():
|
|
131
|
+
if not value:
|
|
132
|
+
# skip empty values
|
|
133
|
+
continue
|
|
134
|
+
if isinstance(value, (list, tuple)):
|
|
135
|
+
if 1 == len(value):
|
|
136
|
+
# simplify some structures like YAML when single item in new line is a value
|
|
137
|
+
yield key, value[0]
|
|
138
|
+
continue
|
|
139
|
+
# all other data will be precessed in next code
|
|
140
|
+
yield key, value
|
|
141
|
+
yield from AbstractScanner.key_value_combination(structure)
|
|
142
|
+
elif isinstance(structure, (list, tuple)):
|
|
143
|
+
# enumerate the items to fit for return structure
|
|
144
|
+
for key, value in enumerate(structure):
|
|
145
|
+
yield key, value
|
|
146
|
+
else:
|
|
147
|
+
logger.error("Not supported type:%s val:%s", str(type(structure)), repr(structure))
|
|
148
|
+
|
|
149
|
+
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
|
|
150
|
+
|
|
151
|
+
def structure_scan(
|
|
152
|
+
self, #
|
|
153
|
+
struct_provider: StructContentProvider, #
|
|
154
|
+
depth: int, #
|
|
155
|
+
recursive_limit_size: int) -> List[Candidate]:
|
|
156
|
+
"""Recursive function to scan structured data
|
|
157
|
+
|
|
158
|
+
Args:
|
|
159
|
+
struct_provider: DataContentProvider object may be a container
|
|
160
|
+
depth: maximal level of recursion
|
|
161
|
+
recursive_limit_size: maximal bytes of opened files to prevent recursive zip-bomb attack
|
|
162
|
+
"""
|
|
163
|
+
candidates: List[Candidate] = []
|
|
164
|
+
logger.debug("Start struct_scan: depth=%d, limit=%d, path=%s, info=%s", depth, recursive_limit_size,
|
|
165
|
+
struct_provider.file_path, struct_provider.info)
|
|
166
|
+
|
|
167
|
+
if 0 > depth:
|
|
168
|
+
# break recursion if maximal depth is reached
|
|
169
|
+
logger.debug("bottom reached %s recursive_limit_size:%d", struct_provider.file_path, recursive_limit_size)
|
|
170
|
+
return candidates
|
|
171
|
+
|
|
172
|
+
depth -= 1
|
|
173
|
+
|
|
174
|
+
augmented_lines_for_keyword_rules = []
|
|
175
|
+
for key, value in AbstractScanner.structure_processing(struct_provider.struct):
|
|
176
|
+
# a keyword rule may be applicable for `key` (str only) and `value` (str, bytes)
|
|
177
|
+
keyword_match = bool(isinstance(key, str) and self.scanner.keywords_required_substrings_check(key.lower()))
|
|
178
|
+
|
|
179
|
+
if isinstance(value, (dict, list, tuple)) and value:
|
|
180
|
+
# recursive scan for not empty structured `value`
|
|
181
|
+
val_struct_provider = StructContentProvider(struct=value,
|
|
182
|
+
file_path=struct_provider.file_path,
|
|
183
|
+
file_type=struct_provider.file_type,
|
|
184
|
+
info=f"{struct_provider.info}|STRUCT:{key}")
|
|
185
|
+
new_candidates = self.structure_scan(val_struct_provider, depth, recursive_limit_size)
|
|
186
|
+
candidates.extend(new_candidates)
|
|
187
|
+
elif isinstance(value, bytes):
|
|
188
|
+
# recursive data scan
|
|
189
|
+
if MIN_DATA_LEN <= len(value):
|
|
190
|
+
bytes_struct_provider = DataContentProvider(data=value,
|
|
191
|
+
file_path=struct_provider.file_path,
|
|
192
|
+
file_type=struct_provider.file_type,
|
|
193
|
+
info=f"{struct_provider.info}|BYTES:{key}")
|
|
194
|
+
new_limit = recursive_limit_size - len(value)
|
|
195
|
+
new_candidates = self.recursive_scan(bytes_struct_provider, depth, new_limit)
|
|
196
|
+
candidates.extend(new_candidates)
|
|
197
|
+
if keyword_match and MIN_VALUE_LENGTH <= len(value):
|
|
198
|
+
augmented_lines_for_keyword_rules.append(f"{key} = {repr(value)}")
|
|
199
|
+
elif isinstance(value, str):
|
|
200
|
+
# recursive text scan with transformation into bytes
|
|
201
|
+
stripped_value = value.strip()
|
|
202
|
+
if MIN_DATA_LEN <= len(stripped_value):
|
|
203
|
+
# recursive scan only for data which may be decoded at least
|
|
204
|
+
with contextlib.suppress(UnicodeError):
|
|
205
|
+
data = stripped_value.encode(encoding=DEFAULT_ENCODING, errors='strict')
|
|
206
|
+
str_struct_provider = DataContentProvider(data=data,
|
|
207
|
+
file_path=struct_provider.file_path,
|
|
208
|
+
file_type=struct_provider.file_type,
|
|
209
|
+
info=f"{struct_provider.info}|STRING:{key}")
|
|
210
|
+
new_limit = recursive_limit_size - len(str_struct_provider.data)
|
|
211
|
+
new_candidates = self.recursive_scan(str_struct_provider, depth, new_limit)
|
|
212
|
+
candidates.extend(new_candidates)
|
|
213
|
+
if keyword_match and MIN_VALUE_LENGTH <= len(stripped_value):
|
|
214
|
+
augmented_lines_for_keyword_rules.append(f"{key} = {repr(stripped_value)}")
|
|
215
|
+
elif value is None or isinstance(value, (int, float, datetime.date, datetime.datetime)):
|
|
216
|
+
# skip useless types
|
|
217
|
+
pass
|
|
218
|
+
else:
|
|
219
|
+
logger.warning("Not supported type:%s value(%s)", str(type(value)), str(value))
|
|
220
|
+
|
|
221
|
+
if augmented_lines_for_keyword_rules:
|
|
222
|
+
str_provider = StringContentProvider(augmented_lines_for_keyword_rules,
|
|
223
|
+
file_path=struct_provider.file_path,
|
|
224
|
+
file_type=struct_provider.file_type,
|
|
225
|
+
info=f"{struct_provider.info}|KEYWORD")
|
|
226
|
+
new_candidates = self.scanner.scan(str_provider)
|
|
227
|
+
augment_candidates(candidates, new_candidates)
|
|
228
|
+
|
|
229
|
+
return candidates
|
|
230
|
+
|
|
231
|
+
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
|
|
232
|
+
|
|
233
|
+
def deep_scan_with_fallback(self, data_provider: DataContentProvider, depth: int,
|
|
234
|
+
recursive_limit_size: int) -> List[Candidate]:
|
|
235
|
+
"""Scans with deep scanners and fallback scanners if possible
|
|
236
|
+
|
|
237
|
+
Args:
|
|
238
|
+
data_provider: DataContentProvider with raw data
|
|
239
|
+
depth: maximal level of recursion
|
|
240
|
+
recursive_limit_size: maximal bytes of opened files to prevent recursive zip-bomb attack
|
|
241
|
+
|
|
242
|
+
Returns: list with candidates
|
|
243
|
+
|
|
244
|
+
"""
|
|
245
|
+
candidates: List[Candidate] = []
|
|
246
|
+
deep_scanners, fallback_scanners = self.get_deep_scanners(data_provider.data, data_provider.descriptor, depth)
|
|
247
|
+
fallback = True
|
|
248
|
+
for scan_class in deep_scanners:
|
|
249
|
+
new_candidates = scan_class.data_scan(self, data_provider, depth, recursive_limit_size)
|
|
250
|
+
if new_candidates is None:
|
|
251
|
+
# scanner did not recognise the content type
|
|
252
|
+
continue
|
|
253
|
+
augment_candidates(candidates, new_candidates)
|
|
254
|
+
# this scan is successful, so fallback is not necessary
|
|
255
|
+
fallback = False
|
|
256
|
+
if fallback:
|
|
257
|
+
for scan_class in fallback_scanners:
|
|
258
|
+
fallback_candidates = scan_class.data_scan(self, data_provider, depth, recursive_limit_size)
|
|
259
|
+
if fallback_candidates is None:
|
|
260
|
+
continue
|
|
261
|
+
augment_candidates(candidates, fallback_candidates)
|
|
262
|
+
# use only first successful fallback scanner
|
|
263
|
+
break
|
|
264
|
+
return candidates
|
|
265
|
+
|
|
266
|
+
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
|
|
267
|
+
|
|
268
|
+
def scan(self,
|
|
269
|
+
content_provider: ContentProvider,
|
|
270
|
+
depth: int,
|
|
271
|
+
recursive_limit_size: Optional[int] = None) -> List[Candidate]:
|
|
272
|
+
"""Initial scan method to launch recursive scan. Skips ByteScanner to prevent extra scan
|
|
273
|
+
|
|
274
|
+
Args:
|
|
275
|
+
content_provider: ContentProvider that might contain raw data
|
|
276
|
+
depth: maximal level of recursion
|
|
277
|
+
recursive_limit_size: maximal bytes of opened files to prevent recursive zip-bomb attack
|
|
278
|
+
"""
|
|
279
|
+
recursive_limit_size = recursive_limit_size if isinstance(recursive_limit_size,
|
|
280
|
+
int) else RECURSIVE_SCAN_LIMITATION
|
|
281
|
+
candidates: List[Candidate] = []
|
|
282
|
+
data: Optional[bytes] = None
|
|
283
|
+
if isinstance(content_provider, (TextContentProvider, ByteContentProvider)):
|
|
284
|
+
# Feature to scan files which might be containers
|
|
285
|
+
data = content_provider.data
|
|
286
|
+
info = f"FILE:{content_provider.file_path}"
|
|
287
|
+
elif isinstance(content_provider, DiffContentProvider) and content_provider.diff:
|
|
288
|
+
candidates = self.scanner.scan(content_provider)
|
|
289
|
+
# Feature to scan binary diffs
|
|
290
|
+
diff = content_provider.diff[0].get("line")
|
|
291
|
+
# the check for legal fix mypy issue
|
|
292
|
+
if isinstance(diff, bytes):
|
|
293
|
+
data = diff
|
|
294
|
+
info = f"DIFF:{content_provider.file_path}"
|
|
295
|
+
else:
|
|
296
|
+
logger.warning(f"Content provider {type(content_provider)} does not support deep scan")
|
|
297
|
+
info = "NA"
|
|
298
|
+
|
|
299
|
+
if data:
|
|
300
|
+
data_provider = DataContentProvider(data=data,
|
|
301
|
+
file_path=content_provider.file_path,
|
|
302
|
+
file_type=content_provider.file_type,
|
|
303
|
+
info=content_provider.info or info)
|
|
304
|
+
new_candidates = self.deep_scan_with_fallback(data_provider, depth, recursive_limit_size - len(data))
|
|
305
|
+
augment_candidates(candidates, new_candidates)
|
|
306
|
+
return candidates
|
|
@@ -0,0 +1,55 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
import struct
|
|
3
|
+
from abc import ABC
|
|
4
|
+
from typing import List, Optional, Generator, Tuple
|
|
5
|
+
|
|
6
|
+
from credsweeper.common.constants import MIN_DATA_LEN, UTF_8
|
|
7
|
+
from credsweeper.credentials import Candidate
|
|
8
|
+
from credsweeper.deep_scanner.abstract_scanner import AbstractScanner
|
|
9
|
+
from credsweeper.file_handler.data_content_provider import DataContentProvider
|
|
10
|
+
from credsweeper.utils.util import Util
|
|
11
|
+
|
|
12
|
+
logger = logging.getLogger(__name__)
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
class DebScanner(AbstractScanner, ABC):
|
|
16
|
+
"""Implements deb (ar) scanning"""
|
|
17
|
+
|
|
18
|
+
__header_size = 60
|
|
19
|
+
|
|
20
|
+
@staticmethod
|
|
21
|
+
def walk_deb(data: bytes) -> Generator[Tuple[int, str, bytes], None, None]:
|
|
22
|
+
"""Processes sequence of DEB archive and yields offset, name and data"""
|
|
23
|
+
offset = 8 # b"!<arch>\n"
|
|
24
|
+
data_limit = len(data) - DebScanner.__header_size
|
|
25
|
+
while offset <= data_limit:
|
|
26
|
+
_data = data[offset:offset + DebScanner.__header_size]
|
|
27
|
+
offset += DebScanner.__header_size
|
|
28
|
+
# basic header structure
|
|
29
|
+
_name, _, _size, __ = struct.unpack('16s32s10s2s', _data)
|
|
30
|
+
file_size = int(_size)
|
|
31
|
+
if MIN_DATA_LEN < file_size <= len(data) - offset:
|
|
32
|
+
_data = data[offset:offset + file_size]
|
|
33
|
+
yield offset, _name.decode(encoding=UTF_8).strip().rstrip('/'), _data
|
|
34
|
+
offset += file_size if 0 == 1 & file_size else file_size + 1
|
|
35
|
+
|
|
36
|
+
def data_scan(
|
|
37
|
+
self, #
|
|
38
|
+
data_provider: DataContentProvider, #
|
|
39
|
+
depth: int, #
|
|
40
|
+
recursive_limit_size: int) -> Optional[List[Candidate]]:
|
|
41
|
+
"""Extracts data file from .ar (debian) archive and launches data_scan"""
|
|
42
|
+
try:
|
|
43
|
+
candidates: List[Candidate] = []
|
|
44
|
+
for offset, name, data in DebScanner.walk_deb(data_provider.data):
|
|
45
|
+
deb_content_provider = DataContentProvider(data=data,
|
|
46
|
+
file_path=f"{data_provider.file_path}/{name}",
|
|
47
|
+
file_type=Util.get_extension(name),
|
|
48
|
+
info=f"{data_provider.info}|DEB:0x{offset:x}")
|
|
49
|
+
new_limit = recursive_limit_size - len(data)
|
|
50
|
+
deb_candidates = self.recursive_scan(deb_content_provider, depth, new_limit)
|
|
51
|
+
candidates.extend(deb_candidates)
|
|
52
|
+
return candidates
|
|
53
|
+
except Exception as exc:
|
|
54
|
+
logger.error(exc)
|
|
55
|
+
return None
|