credsweeper 1.11.4__tar.gz → 1.11.5__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of credsweeper might be problematic. Click here for more details.
- {credsweeper-1.11.4 → credsweeper-1.11.5}/PKG-INFO +3 -6
- {credsweeper-1.11.4 → credsweeper-1.11.5}/README.md +1 -5
- {credsweeper-1.11.4 → credsweeper-1.11.5}/credsweeper/__init__.py +1 -1
- credsweeper-1.11.4/credsweeper/deep_scanner/deep_scanner.py → credsweeper-1.11.5/credsweeper/deep_scanner/abstract_scanner.py +178 -258
- credsweeper-1.11.5/credsweeper/deep_scanner/deb_scanner.py +55 -0
- credsweeper-1.11.5/credsweeper/deep_scanner/deep_scanner.py +173 -0
- credsweeper-1.11.5/credsweeper/deep_scanner/jclass_scanner.py +74 -0
- credsweeper-1.11.5/credsweeper/deep_scanner/patch_scanner.py +48 -0
- credsweeper-1.11.5/credsweeper/deep_scanner/pkcs_scanner.py +41 -0
- credsweeper-1.11.5/credsweeper/deep_scanner/rpm_scanner.py +49 -0
- credsweeper-1.11.5/credsweeper/deep_scanner/sqlite3_scanner.py +79 -0
- {credsweeper-1.11.4 → credsweeper-1.11.5}/credsweeper/file_handler/data_content_provider.py +1 -2
- {credsweeper-1.11.4 → credsweeper-1.11.5}/credsweeper/file_handler/patches_provider.py +4 -1
- {credsweeper-1.11.4 → credsweeper-1.11.5}/credsweeper/filters/__init__.py +1 -0
- {credsweeper-1.11.4 → credsweeper-1.11.5}/credsweeper/filters/value_base64_key_check.py +9 -14
- credsweeper-1.11.5/credsweeper/filters/value_json_web_key_check.py +37 -0
- {credsweeper-1.11.4 → credsweeper-1.11.5}/credsweeper/rules/config.yaml +48 -6
- {credsweeper-1.11.4 → credsweeper-1.11.5}/credsweeper/scanner/scan_type/multi_pattern.py +1 -2
- {credsweeper-1.11.4 → credsweeper-1.11.5}/credsweeper/secret/config.json +6 -6
- {credsweeper-1.11.4 → credsweeper-1.11.5}/credsweeper/utils/pem_key_detector.py +2 -2
- {credsweeper-1.11.4 → credsweeper-1.11.5}/credsweeper/utils/util.py +143 -75
- {credsweeper-1.11.4 → credsweeper-1.11.5}/pyproject.toml +1 -0
- credsweeper-1.11.4/credsweeper/deep_scanner/abstract_scanner.py +0 -51
- credsweeper-1.11.4/credsweeper/deep_scanner/deb_scanner.py +0 -48
- credsweeper-1.11.4/credsweeper/deep_scanner/pkcs12_scanner.py +0 -45
- {credsweeper-1.11.4 → credsweeper-1.11.5}/.gitignore +0 -0
- {credsweeper-1.11.4 → credsweeper-1.11.5}/LICENSE +0 -0
- {credsweeper-1.11.4 → credsweeper-1.11.5}/credsweeper/__main__.py +0 -0
- {credsweeper-1.11.4 → credsweeper-1.11.5}/credsweeper/app.py +0 -0
- {credsweeper-1.11.4 → credsweeper-1.11.5}/credsweeper/common/__init__.py +0 -0
- {credsweeper-1.11.4 → credsweeper-1.11.5}/credsweeper/common/constants.py +0 -0
- {credsweeper-1.11.4 → credsweeper-1.11.5}/credsweeper/common/keyword_checklist.py +0 -0
- {credsweeper-1.11.4 → credsweeper-1.11.5}/credsweeper/common/keyword_checklist.txt +0 -0
- {credsweeper-1.11.4 → credsweeper-1.11.5}/credsweeper/common/keyword_pattern.py +0 -0
- {credsweeper-1.11.4 → credsweeper-1.11.5}/credsweeper/common/morpheme_checklist.txt +0 -0
- {credsweeper-1.11.4 → credsweeper-1.11.5}/credsweeper/config/__init__.py +0 -0
- {credsweeper-1.11.4 → credsweeper-1.11.5}/credsweeper/config/config.py +0 -0
- {credsweeper-1.11.4 → credsweeper-1.11.5}/credsweeper/credentials/__init__.py +0 -0
- {credsweeper-1.11.4 → credsweeper-1.11.5}/credsweeper/credentials/augment_candidates.py +0 -0
- {credsweeper-1.11.4 → credsweeper-1.11.5}/credsweeper/credentials/candidate.py +0 -0
- {credsweeper-1.11.4 → credsweeper-1.11.5}/credsweeper/credentials/candidate_group_generator.py +0 -0
- {credsweeper-1.11.4 → credsweeper-1.11.5}/credsweeper/credentials/candidate_key.py +0 -0
- {credsweeper-1.11.4 → credsweeper-1.11.5}/credsweeper/credentials/credential_manager.py +0 -0
- {credsweeper-1.11.4 → credsweeper-1.11.5}/credsweeper/credentials/line_data.py +0 -0
- {credsweeper-1.11.4 → credsweeper-1.11.5}/credsweeper/deep_scanner/__init__.py +0 -0
- {credsweeper-1.11.4 → credsweeper-1.11.5}/credsweeper/deep_scanner/byte_scanner.py +0 -0
- {credsweeper-1.11.4 → credsweeper-1.11.5}/credsweeper/deep_scanner/bzip2_scanner.py +0 -0
- {credsweeper-1.11.4 → credsweeper-1.11.5}/credsweeper/deep_scanner/docx_scanner.py +0 -0
- {credsweeper-1.11.4 → credsweeper-1.11.5}/credsweeper/deep_scanner/eml_scanner.py +0 -0
- {credsweeper-1.11.4 → credsweeper-1.11.5}/credsweeper/deep_scanner/encoder_scanner.py +0 -0
- {credsweeper-1.11.4 → credsweeper-1.11.5}/credsweeper/deep_scanner/gzip_scanner.py +0 -0
- {credsweeper-1.11.4 → credsweeper-1.11.5}/credsweeper/deep_scanner/html_scanner.py +0 -0
- {credsweeper-1.11.4 → credsweeper-1.11.5}/credsweeper/deep_scanner/jks_scanner.py +0 -0
- {credsweeper-1.11.4 → credsweeper-1.11.5}/credsweeper/deep_scanner/lang_scanner.py +0 -0
- {credsweeper-1.11.4 → credsweeper-1.11.5}/credsweeper/deep_scanner/lzma_scanner.py +0 -0
- {credsweeper-1.11.4 → credsweeper-1.11.5}/credsweeper/deep_scanner/mxfile_scanner.py +0 -0
- {credsweeper-1.11.4 → credsweeper-1.11.5}/credsweeper/deep_scanner/pdf_scanner.py +0 -0
- {credsweeper-1.11.4 → credsweeper-1.11.5}/credsweeper/deep_scanner/pptx_scanner.py +0 -0
- {credsweeper-1.11.4 → credsweeper-1.11.5}/credsweeper/deep_scanner/tar_scanner.py +0 -0
- {credsweeper-1.11.4 → credsweeper-1.11.5}/credsweeper/deep_scanner/tmx_scanner.py +0 -0
- {credsweeper-1.11.4 → credsweeper-1.11.5}/credsweeper/deep_scanner/xlsx_scanner.py +0 -0
- {credsweeper-1.11.4 → credsweeper-1.11.5}/credsweeper/deep_scanner/xml_scanner.py +0 -0
- {credsweeper-1.11.4 → credsweeper-1.11.5}/credsweeper/deep_scanner/zip_scanner.py +0 -0
- {credsweeper-1.11.4 → credsweeper-1.11.5}/credsweeper/file_handler/__init__.py +0 -0
- {credsweeper-1.11.4 → credsweeper-1.11.5}/credsweeper/file_handler/abstract_provider.py +0 -0
- {credsweeper-1.11.4 → credsweeper-1.11.5}/credsweeper/file_handler/analysis_target.py +0 -0
- {credsweeper-1.11.4 → credsweeper-1.11.5}/credsweeper/file_handler/byte_content_provider.py +0 -0
- {credsweeper-1.11.4 → credsweeper-1.11.5}/credsweeper/file_handler/content_provider.py +0 -0
- {credsweeper-1.11.4 → credsweeper-1.11.5}/credsweeper/file_handler/descriptor.py +0 -0
- {credsweeper-1.11.4 → credsweeper-1.11.5}/credsweeper/file_handler/diff_content_provider.py +0 -0
- {credsweeper-1.11.4 → credsweeper-1.11.5}/credsweeper/file_handler/file_path_extractor.py +0 -0
- {credsweeper-1.11.4 → credsweeper-1.11.5}/credsweeper/file_handler/files_provider.py +0 -0
- {credsweeper-1.11.4 → credsweeper-1.11.5}/credsweeper/file_handler/string_content_provider.py +0 -0
- {credsweeper-1.11.4 → credsweeper-1.11.5}/credsweeper/file_handler/struct_content_provider.py +0 -0
- {credsweeper-1.11.4 → credsweeper-1.11.5}/credsweeper/file_handler/text_content_provider.py +0 -0
- {credsweeper-1.11.4 → credsweeper-1.11.5}/credsweeper/filters/filter.py +0 -0
- {credsweeper-1.11.4 → credsweeper-1.11.5}/credsweeper/filters/group/__init__.py +0 -0
- {credsweeper-1.11.4 → credsweeper-1.11.5}/credsweeper/filters/group/general_keyword.py +0 -0
- {credsweeper-1.11.4 → credsweeper-1.11.5}/credsweeper/filters/group/general_pattern.py +0 -0
- {credsweeper-1.11.4 → credsweeper-1.11.5}/credsweeper/filters/group/group.py +0 -0
- {credsweeper-1.11.4 → credsweeper-1.11.5}/credsweeper/filters/group/password_keyword.py +0 -0
- {credsweeper-1.11.4 → credsweeper-1.11.5}/credsweeper/filters/group/token_pattern.py +0 -0
- {credsweeper-1.11.4 → credsweeper-1.11.5}/credsweeper/filters/group/url_credentials_group.py +0 -0
- {credsweeper-1.11.4 → credsweeper-1.11.5}/credsweeper/filters/group/weird_base36_token.py +0 -0
- {credsweeper-1.11.4 → credsweeper-1.11.5}/credsweeper/filters/group/weird_base64_token.py +0 -0
- {credsweeper-1.11.4 → credsweeper-1.11.5}/credsweeper/filters/line_git_binary_check.py +0 -0
- {credsweeper-1.11.4 → credsweeper-1.11.5}/credsweeper/filters/line_specific_key_check.py +0 -0
- {credsweeper-1.11.4 → credsweeper-1.11.5}/credsweeper/filters/line_uue_part_check.py +0 -0
- {credsweeper-1.11.4 → credsweeper-1.11.5}/credsweeper/filters/value_allowlist_check.py +0 -0
- {credsweeper-1.11.4 → credsweeper-1.11.5}/credsweeper/filters/value_array_dictionary_check.py +0 -0
- {credsweeper-1.11.4 → credsweeper-1.11.5}/credsweeper/filters/value_atlassian_token_check.py +0 -0
- {credsweeper-1.11.4 → credsweeper-1.11.5}/credsweeper/filters/value_azure_token_check.py +0 -0
- {credsweeper-1.11.4 → credsweeper-1.11.5}/credsweeper/filters/value_base32_data_check.py +0 -0
- {credsweeper-1.11.4 → credsweeper-1.11.5}/credsweeper/filters/value_base64_data_check.py +0 -0
- {credsweeper-1.11.4 → credsweeper-1.11.5}/credsweeper/filters/value_base64_encoded_pem_check.py +0 -0
- {credsweeper-1.11.4 → credsweeper-1.11.5}/credsweeper/filters/value_base64_part_check.py +0 -0
- {credsweeper-1.11.4 → credsweeper-1.11.5}/credsweeper/filters/value_blocklist_check.py +0 -0
- {credsweeper-1.11.4 → credsweeper-1.11.5}/credsweeper/filters/value_camel_case_check.py +0 -0
- {credsweeper-1.11.4 → credsweeper-1.11.5}/credsweeper/filters/value_couple_keyword_check.py +0 -0
- {credsweeper-1.11.4 → credsweeper-1.11.5}/credsweeper/filters/value_dictionary_keyword_check.py +0 -0
- {credsweeper-1.11.4 → credsweeper-1.11.5}/credsweeper/filters/value_dictionary_value_length_check.py +0 -0
- {credsweeper-1.11.4 → credsweeper-1.11.5}/credsweeper/filters/value_discord_bot_check.py +0 -0
- {credsweeper-1.11.4 → credsweeper-1.11.5}/credsweeper/filters/value_entropy_base32_check.py +0 -0
- {credsweeper-1.11.4 → credsweeper-1.11.5}/credsweeper/filters/value_entropy_base36_check.py +0 -0
- {credsweeper-1.11.4 → credsweeper-1.11.5}/credsweeper/filters/value_entropy_base64_check.py +0 -0
- {credsweeper-1.11.4 → credsweeper-1.11.5}/credsweeper/filters/value_entropy_base_check.py +0 -0
- {credsweeper-1.11.4 → credsweeper-1.11.5}/credsweeper/filters/value_file_path_check.py +0 -0
- {credsweeper-1.11.4 → credsweeper-1.11.5}/credsweeper/filters/value_github_check.py +0 -0
- {credsweeper-1.11.4 → credsweeper-1.11.5}/credsweeper/filters/value_grafana_check.py +0 -0
- {credsweeper-1.11.4 → credsweeper-1.11.5}/credsweeper/filters/value_grafana_service_check.py +0 -0
- {credsweeper-1.11.4 → credsweeper-1.11.5}/credsweeper/filters/value_hex_number_check.py +0 -0
- {credsweeper-1.11.4 → credsweeper-1.11.5}/credsweeper/filters/value_jfrog_token_check.py +0 -0
- {credsweeper-1.11.4 → credsweeper-1.11.5}/credsweeper/filters/value_json_web_token_check.py +0 -0
- {credsweeper-1.11.4 → credsweeper-1.11.5}/credsweeper/filters/value_last_word_check.py +0 -0
- {credsweeper-1.11.4 → credsweeper-1.11.5}/credsweeper/filters/value_method_check.py +0 -0
- {credsweeper-1.11.4 → credsweeper-1.11.5}/credsweeper/filters/value_not_allowed_pattern_check.py +0 -0
- {credsweeper-1.11.4 → credsweeper-1.11.5}/credsweeper/filters/value_not_part_encoded_check.py +0 -0
- {credsweeper-1.11.4 → credsweeper-1.11.5}/credsweeper/filters/value_number_check.py +0 -0
- {credsweeper-1.11.4 → credsweeper-1.11.5}/credsweeper/filters/value_pattern_check.py +0 -0
- {credsweeper-1.11.4 → credsweeper-1.11.5}/credsweeper/filters/value_similarity_check.py +0 -0
- {credsweeper-1.11.4 → credsweeper-1.11.5}/credsweeper/filters/value_split_keyword_check.py +0 -0
- {credsweeper-1.11.4 → credsweeper-1.11.5}/credsweeper/filters/value_string_type_check.py +0 -0
- {credsweeper-1.11.4 → credsweeper-1.11.5}/credsweeper/filters/value_token_base32_check.py +0 -0
- {credsweeper-1.11.4 → credsweeper-1.11.5}/credsweeper/filters/value_token_base36_check.py +0 -0
- {credsweeper-1.11.4 → credsweeper-1.11.5}/credsweeper/filters/value_token_base64_check.py +0 -0
- {credsweeper-1.11.4 → credsweeper-1.11.5}/credsweeper/filters/value_token_base_check.py +0 -0
- {credsweeper-1.11.4 → credsweeper-1.11.5}/credsweeper/filters/value_token_check.py +0 -0
- {credsweeper-1.11.4 → credsweeper-1.11.5}/credsweeper/logger/__init__.py +0 -0
- {credsweeper-1.11.4 → credsweeper-1.11.5}/credsweeper/logger/logger.py +0 -0
- {credsweeper-1.11.4 → credsweeper-1.11.5}/credsweeper/ml_model/__init__.py +0 -0
- {credsweeper-1.11.4 → credsweeper-1.11.5}/credsweeper/ml_model/features/__init__.py +0 -0
- {credsweeper-1.11.4 → credsweeper-1.11.5}/credsweeper/ml_model/features/entropy_evaluation.py +0 -0
- {credsweeper-1.11.4 → credsweeper-1.11.5}/credsweeper/ml_model/features/feature.py +0 -0
- {credsweeper-1.11.4 → credsweeper-1.11.5}/credsweeper/ml_model/features/file_extension.py +0 -0
- {credsweeper-1.11.4 → credsweeper-1.11.5}/credsweeper/ml_model/features/has_html_tag.py +0 -0
- {credsweeper-1.11.4 → credsweeper-1.11.5}/credsweeper/ml_model/features/is_secret_numeric.py +0 -0
- {credsweeper-1.11.4 → credsweeper-1.11.5}/credsweeper/ml_model/features/length_of_attribute.py +0 -0
- {credsweeper-1.11.4 → credsweeper-1.11.5}/credsweeper/ml_model/features/morpheme_dense.py +0 -0
- {credsweeper-1.11.4 → credsweeper-1.11.5}/credsweeper/ml_model/features/rule_name.py +0 -0
- {credsweeper-1.11.4 → credsweeper-1.11.5}/credsweeper/ml_model/features/search_in_attribute.py +0 -0
- {credsweeper-1.11.4 → credsweeper-1.11.5}/credsweeper/ml_model/features/word_in.py +0 -0
- {credsweeper-1.11.4 → credsweeper-1.11.5}/credsweeper/ml_model/features/word_in_path.py +0 -0
- {credsweeper-1.11.4 → credsweeper-1.11.5}/credsweeper/ml_model/features/word_in_postamble.py +0 -0
- {credsweeper-1.11.4 → credsweeper-1.11.5}/credsweeper/ml_model/features/word_in_preamble.py +0 -0
- {credsweeper-1.11.4 → credsweeper-1.11.5}/credsweeper/ml_model/features/word_in_transition.py +0 -0
- {credsweeper-1.11.4 → credsweeper-1.11.5}/credsweeper/ml_model/features/word_in_value.py +0 -0
- {credsweeper-1.11.4 → credsweeper-1.11.5}/credsweeper/ml_model/features/word_in_variable.py +0 -0
- {credsweeper-1.11.4 → credsweeper-1.11.5}/credsweeper/ml_model/ml_config.json +0 -0
- {credsweeper-1.11.4 → credsweeper-1.11.5}/credsweeper/ml_model/ml_model.onnx +0 -0
- {credsweeper-1.11.4 → credsweeper-1.11.5}/credsweeper/ml_model/ml_validator.py +0 -0
- {credsweeper-1.11.4 → credsweeper-1.11.5}/credsweeper/py.typed +0 -0
- {credsweeper-1.11.4 → credsweeper-1.11.5}/credsweeper/rules/__init__.py +0 -0
- {credsweeper-1.11.4 → credsweeper-1.11.5}/credsweeper/rules/rule.py +0 -0
- {credsweeper-1.11.4 → credsweeper-1.11.5}/credsweeper/scanner/__init__.py +0 -0
- {credsweeper-1.11.4 → credsweeper-1.11.5}/credsweeper/scanner/scan_type/__init__.py +0 -0
- {credsweeper-1.11.4 → credsweeper-1.11.5}/credsweeper/scanner/scan_type/pem_key_pattern.py +0 -0
- {credsweeper-1.11.4 → credsweeper-1.11.5}/credsweeper/scanner/scan_type/scan_type.py +0 -0
- {credsweeper-1.11.4 → credsweeper-1.11.5}/credsweeper/scanner/scan_type/single_pattern.py +0 -0
- {credsweeper-1.11.4 → credsweeper-1.11.5}/credsweeper/scanner/scanner.py +0 -0
- {credsweeper-1.11.4 → credsweeper-1.11.5}/credsweeper/secret/log.yaml +0 -0
- {credsweeper-1.11.4 → credsweeper-1.11.5}/credsweeper/utils/__init__.py +0 -0
- {credsweeper-1.11.4 → credsweeper-1.11.5}/credsweeper/utils/hop_stat.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: credsweeper
|
|
3
|
-
Version: 1.11.
|
|
3
|
+
Version: 1.11.5
|
|
4
4
|
Summary: Credential Sweeper
|
|
5
5
|
Project-URL: Homepage, https://github.com/Samsung/CredSweeper
|
|
6
6
|
Project-URL: Bug Tracker, https://github.com/Samsung/CredSweeper/issues
|
|
@@ -37,6 +37,7 @@ Requires-Dist: python-dateutil
|
|
|
37
37
|
Requires-Dist: python-docx
|
|
38
38
|
Requires-Dist: python-pptx
|
|
39
39
|
Requires-Dist: pyyaml
|
|
40
|
+
Requires-Dist: rpmfile
|
|
40
41
|
Requires-Dist: whatthepatch
|
|
41
42
|
Requires-Dist: xlrd
|
|
42
43
|
Description-Content-Type: text/markdown
|
|
@@ -140,11 +141,7 @@ cat output.json
|
|
|
140
141
|
"value_start": 12,
|
|
141
142
|
"value_end": 19,
|
|
142
143
|
"variable": "password",
|
|
143
|
-
"
|
|
144
|
-
"iterator": "BASE64_CHARS",
|
|
145
|
-
"entropy": 2.120589933192232,
|
|
146
|
-
"valid": false
|
|
147
|
-
}
|
|
144
|
+
"entropy": 2.12059
|
|
148
145
|
}
|
|
149
146
|
]
|
|
150
147
|
}
|
|
@@ -1,245 +1,59 @@
|
|
|
1
1
|
import contextlib
|
|
2
2
|
import datetime
|
|
3
3
|
import logging
|
|
4
|
-
from
|
|
4
|
+
from abc import abstractmethod, ABC
|
|
5
|
+
from typing import List, Optional, Tuple, Any, Generator
|
|
5
6
|
|
|
6
|
-
from credsweeper.common.constants import RECURSIVE_SCAN_LIMITATION, MIN_DATA_LEN,
|
|
7
|
+
from credsweeper.common.constants import RECURSIVE_SCAN_LIMITATION, MIN_DATA_LEN, DEFAULT_ENCODING, UTF_8, \
|
|
8
|
+
MIN_VALUE_LENGTH
|
|
7
9
|
from credsweeper.config import Config
|
|
8
10
|
from credsweeper.credentials import Candidate
|
|
9
11
|
from credsweeper.credentials.augment_candidates import augment_candidates
|
|
10
12
|
from credsweeper.file_handler.byte_content_provider import ByteContentProvider
|
|
11
13
|
from credsweeper.file_handler.content_provider import ContentProvider
|
|
12
14
|
from credsweeper.file_handler.data_content_provider import DataContentProvider
|
|
15
|
+
from credsweeper.file_handler.descriptor import Descriptor
|
|
13
16
|
from credsweeper.file_handler.diff_content_provider import DiffContentProvider
|
|
17
|
+
from credsweeper.file_handler.file_path_extractor import FilePathExtractor
|
|
14
18
|
from credsweeper.file_handler.string_content_provider import StringContentProvider
|
|
19
|
+
from credsweeper.file_handler.struct_content_provider import StructContentProvider
|
|
15
20
|
from credsweeper.file_handler.text_content_provider import TextContentProvider
|
|
16
21
|
from credsweeper.scanner import Scanner
|
|
17
|
-
from credsweeper.utils import Util
|
|
18
|
-
from .byte_scanner import ByteScanner
|
|
19
|
-
from .bzip2_scanner import Bzip2Scanner
|
|
20
|
-
from .deb_scanner import DebScanner
|
|
21
|
-
from .docx_scanner import DocxScanner
|
|
22
|
-
from .eml_scanner import EmlScanner
|
|
23
|
-
from .encoder_scanner import EncoderScanner
|
|
24
|
-
from .gzip_scanner import GzipScanner
|
|
25
|
-
from .html_scanner import HtmlScanner
|
|
26
|
-
from .jks_scanner import JksScanner
|
|
27
|
-
from .lang_scanner import LangScanner
|
|
28
|
-
from .lzma_scanner import LzmaScanner
|
|
29
|
-
from .mxfile_scanner import MxfileScanner
|
|
30
|
-
from .pdf_scanner import PdfScanner
|
|
31
|
-
from .pkcs12_scanner import Pkcs12Scanner
|
|
32
|
-
from .pptx_scanner import PptxScanner
|
|
33
|
-
from .tar_scanner import TarScanner
|
|
34
|
-
from .tmx_scanner import TmxScanner
|
|
35
|
-
from .xlsx_scanner import XlsxScanner
|
|
36
|
-
from .xml_scanner import XmlScanner
|
|
37
|
-
from .zip_scanner import ZipScanner
|
|
38
|
-
from ..common.constants import DEFAULT_ENCODING
|
|
39
|
-
from ..file_handler.file_path_extractor import FilePathExtractor
|
|
40
|
-
from ..file_handler.struct_content_provider import StructContentProvider
|
|
41
22
|
|
|
42
23
|
logger = logging.getLogger(__name__)
|
|
43
24
|
|
|
44
25
|
|
|
45
|
-
class
|
|
46
|
-
|
|
47
|
-
Bzip2Scanner, #
|
|
48
|
-
DocxScanner, #
|
|
49
|
-
EncoderScanner, #
|
|
50
|
-
GzipScanner, #
|
|
51
|
-
HtmlScanner, #
|
|
52
|
-
JksScanner, #
|
|
53
|
-
LangScanner, #
|
|
54
|
-
LzmaScanner, #
|
|
55
|
-
PdfScanner, #
|
|
56
|
-
Pkcs12Scanner, #
|
|
57
|
-
PptxScanner, #
|
|
58
|
-
TarScanner, #
|
|
59
|
-
DebScanner, #
|
|
60
|
-
XmlScanner, #
|
|
61
|
-
XlsxScanner, #
|
|
62
|
-
ZipScanner
|
|
63
|
-
): # yapf: disable
|
|
64
|
-
"""Advanced scanner with recursive exploring of data"""
|
|
65
|
-
|
|
66
|
-
def __init__(self, config: Config, scanner: Scanner) -> None:
|
|
67
|
-
"""Initialize Advanced credential scanner.
|
|
68
|
-
|
|
69
|
-
Args:
|
|
70
|
-
scanner: CredSweeper scanner object
|
|
71
|
-
config: dictionary variable, stores analyzer features
|
|
72
|
-
"""
|
|
73
|
-
self.__config = config
|
|
74
|
-
self.__scanner = scanner
|
|
26
|
+
class AbstractScanner(ABC):
|
|
27
|
+
"""Base abstract class for all recursive scanners"""
|
|
75
28
|
|
|
76
29
|
@property
|
|
30
|
+
@abstractmethod
|
|
77
31
|
def config(self) -> Config:
|
|
78
|
-
|
|
32
|
+
"""Abstract property to be defined in DeepScanner"""
|
|
33
|
+
raise NotImplementedError(__name__)
|
|
79
34
|
|
|
80
35
|
@property
|
|
36
|
+
@abstractmethod
|
|
81
37
|
def scanner(self) -> Scanner:
|
|
82
|
-
|
|
38
|
+
"""Abstract property to be defined in DeepScanner"""
|
|
39
|
+
raise NotImplementedError(__name__)
|
|
40
|
+
|
|
41
|
+
@abstractmethod
|
|
42
|
+
def data_scan(
|
|
43
|
+
self, #
|
|
44
|
+
data_provider: DataContentProvider, #
|
|
45
|
+
depth: int, #
|
|
46
|
+
recursive_limit_size: int) -> Optional[List[Candidate]]:
|
|
47
|
+
"""Abstract method to be defined in DeepScanner"""
|
|
48
|
+
raise NotImplementedError(__name__)
|
|
83
49
|
|
|
84
50
|
@staticmethod
|
|
85
|
-
|
|
51
|
+
@abstractmethod
|
|
52
|
+
def get_deep_scanners(data: bytes, descriptor: Descriptor, depth: int) -> Tuple[List[Any], List[Any]]:
|
|
86
53
|
"""Returns possibly scan methods for the data depends on content and fallback scanners"""
|
|
87
|
-
deep_scanners: List[Any] = []
|
|
88
|
-
fallback_scanners: List[Any] = []
|
|
89
|
-
if Util.is_zip(data):
|
|
90
|
-
if 0 < depth:
|
|
91
|
-
deep_scanners.append(ZipScanner)
|
|
92
|
-
# probably, there might be a docx, xlsx and so on.
|
|
93
|
-
# It might be scanned with text representation in third-party libraries.
|
|
94
|
-
if file_type in (".xlsx", ".ods"):
|
|
95
|
-
deep_scanners.append(XlsxScanner)
|
|
96
|
-
else:
|
|
97
|
-
fallback_scanners.append(XlsxScanner)
|
|
98
|
-
if ".docx" == file_type:
|
|
99
|
-
deep_scanners.append(DocxScanner)
|
|
100
|
-
else:
|
|
101
|
-
fallback_scanners.append(DocxScanner)
|
|
102
|
-
if ".pptx" == file_type:
|
|
103
|
-
deep_scanners.append(PptxScanner)
|
|
104
|
-
else:
|
|
105
|
-
fallback_scanners.append(PptxScanner)
|
|
106
|
-
elif Util.is_com(data):
|
|
107
|
-
if ".xls" == file_type:
|
|
108
|
-
deep_scanners.append(XlsxScanner)
|
|
109
|
-
else:
|
|
110
|
-
fallback_scanners.append(XlsxScanner)
|
|
111
|
-
elif Util.is_bzip2(data):
|
|
112
|
-
if 0 < depth:
|
|
113
|
-
deep_scanners.append(Bzip2Scanner)
|
|
114
|
-
elif Util.is_lzma(data):
|
|
115
|
-
if 0 < depth:
|
|
116
|
-
deep_scanners.append(LzmaScanner)
|
|
117
|
-
elif Util.is_tar(data):
|
|
118
|
-
if 0 < depth:
|
|
119
|
-
deep_scanners.append(TarScanner)
|
|
120
|
-
elif Util.is_deb(data):
|
|
121
|
-
if 0 < depth:
|
|
122
|
-
deep_scanners.append(DebScanner)
|
|
123
|
-
elif Util.is_gzip(data):
|
|
124
|
-
if 0 < depth:
|
|
125
|
-
deep_scanners.append(GzipScanner)
|
|
126
|
-
elif Util.is_pdf(data):
|
|
127
|
-
deep_scanners.append(PdfScanner)
|
|
128
|
-
elif Util.is_jks(data):
|
|
129
|
-
deep_scanners.append(JksScanner)
|
|
130
|
-
elif Util.is_asn1(data):
|
|
131
|
-
deep_scanners.append(Pkcs12Scanner)
|
|
132
|
-
elif Util.is_xml(data):
|
|
133
|
-
if Util.is_html(data):
|
|
134
|
-
deep_scanners.append(HtmlScanner)
|
|
135
|
-
deep_scanners.append(XmlScanner)
|
|
136
|
-
fallback_scanners.append(ByteScanner)
|
|
137
|
-
elif Util.is_mxfile(data):
|
|
138
|
-
deep_scanners.append(MxfileScanner)
|
|
139
|
-
deep_scanners.append(XmlScanner)
|
|
140
|
-
fallback_scanners.append(ByteScanner)
|
|
141
|
-
elif Util.is_tmx(data):
|
|
142
|
-
deep_scanners.append(TmxScanner)
|
|
143
|
-
fallback_scanners.append(XmlScanner)
|
|
144
|
-
fallback_scanners.append(ByteScanner)
|
|
145
|
-
else:
|
|
146
|
-
deep_scanners.append(XmlScanner)
|
|
147
|
-
fallback_scanners.append(ByteScanner)
|
|
148
|
-
elif Util.is_eml(data):
|
|
149
|
-
if ".eml" == file_type:
|
|
150
|
-
deep_scanners.append(EmlScanner)
|
|
151
|
-
else:
|
|
152
|
-
fallback_scanners.append(EmlScanner)
|
|
153
|
-
fallback_scanners.append(ByteScanner)
|
|
154
|
-
elif Util.is_known(data):
|
|
155
|
-
# the format is known but cannot be scanned
|
|
156
|
-
pass
|
|
157
|
-
elif not Util.is_binary(data):
|
|
158
|
-
if 0 < depth:
|
|
159
|
-
deep_scanners.append(EncoderScanner)
|
|
160
|
-
deep_scanners.append(LangScanner)
|
|
161
|
-
deep_scanners.append(ByteScanner)
|
|
162
|
-
else:
|
|
163
|
-
logger.warning("Cannot apply a deep scanner for type %s prefix %s", file_type, str(data[:MIN_DATA_LEN]))
|
|
164
|
-
return deep_scanners, fallback_scanners
|
|
165
|
-
|
|
166
|
-
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
|
|
167
|
-
|
|
168
|
-
def deep_scan_with_fallback(self, data_provider: DataContentProvider, depth: int,
|
|
169
|
-
recursive_limit_size: int) -> List[Candidate]:
|
|
170
|
-
"""Scans with deep scanners and fallback scanners if possible
|
|
171
|
-
|
|
172
|
-
Args:
|
|
173
|
-
data_provider: DataContentProvider with raw data
|
|
174
|
-
depth: maximal level of recursion
|
|
175
|
-
recursive_limit_size: maximal bytes of opened files to prevent recursive zip-bomb attack
|
|
176
|
-
|
|
177
|
-
Returns: list with candidates
|
|
178
|
-
|
|
179
|
-
"""
|
|
180
|
-
candidates: List[Candidate] = []
|
|
181
|
-
deep_scanners, fallback_scanners = self.get_deep_scanners(data_provider.data, data_provider.file_type, depth)
|
|
182
|
-
fallback = True
|
|
183
|
-
for scan_class in deep_scanners:
|
|
184
|
-
new_candidates = scan_class.data_scan(self, data_provider, depth, recursive_limit_size)
|
|
185
|
-
if new_candidates is None:
|
|
186
|
-
# scanner did not recognise the content type
|
|
187
|
-
continue
|
|
188
|
-
augment_candidates(candidates, new_candidates)
|
|
189
|
-
# this scan is successful, so fallback is not necessary
|
|
190
|
-
fallback = False
|
|
191
|
-
if fallback:
|
|
192
|
-
for scan_class in fallback_scanners:
|
|
193
|
-
fallback_candidates = scan_class.data_scan(self, data_provider, depth, recursive_limit_size)
|
|
194
|
-
if fallback_candidates is None:
|
|
195
|
-
continue
|
|
196
|
-
augment_candidates(candidates, fallback_candidates)
|
|
197
|
-
# use only first successful fallback scanner
|
|
198
|
-
break
|
|
199
|
-
return candidates
|
|
200
54
|
|
|
201
55
|
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
|
|
202
56
|
|
|
203
|
-
def scan(self,
|
|
204
|
-
content_provider: ContentProvider,
|
|
205
|
-
depth: int,
|
|
206
|
-
recursive_limit_size: Optional[int] = None) -> List[Candidate]:
|
|
207
|
-
"""Initial scan method to launch recursive scan. Skips ByteScanner to prevent extra scan
|
|
208
|
-
|
|
209
|
-
Args:
|
|
210
|
-
content_provider: ContentProvider that might contain raw data
|
|
211
|
-
depth: maximal level of recursion
|
|
212
|
-
recursive_limit_size: maximal bytes of opened files to prevent recursive zip-bomb attack
|
|
213
|
-
"""
|
|
214
|
-
recursive_limit_size = recursive_limit_size if isinstance(recursive_limit_size,
|
|
215
|
-
int) else RECURSIVE_SCAN_LIMITATION
|
|
216
|
-
candidates: List[Candidate] = []
|
|
217
|
-
data: Optional[bytes] = None
|
|
218
|
-
if isinstance(content_provider, (TextContentProvider, ByteContentProvider)):
|
|
219
|
-
# Feature to scan files which might be containers
|
|
220
|
-
data = content_provider.data
|
|
221
|
-
info = f"FILE:{content_provider.file_path}"
|
|
222
|
-
elif isinstance(content_provider, DiffContentProvider) and content_provider.diff:
|
|
223
|
-
candidates = self.scanner.scan(content_provider)
|
|
224
|
-
# Feature to scan binary diffs
|
|
225
|
-
diff = content_provider.diff[0].get("line")
|
|
226
|
-
# the check for legal fix mypy issue
|
|
227
|
-
if isinstance(diff, bytes):
|
|
228
|
-
data = diff
|
|
229
|
-
info = f"DIFF:{content_provider.file_path}"
|
|
230
|
-
else:
|
|
231
|
-
logger.warning(f"Content provider {type(content_provider)} does not support deep scan")
|
|
232
|
-
info = "NA"
|
|
233
|
-
|
|
234
|
-
if data:
|
|
235
|
-
data_provider = DataContentProvider(data=data,
|
|
236
|
-
file_path=content_provider.file_path,
|
|
237
|
-
file_type=content_provider.file_type,
|
|
238
|
-
info=content_provider.info or info)
|
|
239
|
-
new_candidates = self.deep_scan_with_fallback(data_provider, depth, recursive_limit_size - len(data))
|
|
240
|
-
augment_candidates(candidates, new_candidates)
|
|
241
|
-
return candidates
|
|
242
|
-
|
|
243
57
|
def recursive_scan(
|
|
244
58
|
self, #
|
|
245
59
|
data_provider: DataContentProvider, #
|
|
@@ -278,6 +92,62 @@ class DeepScanner(
|
|
|
278
92
|
|
|
279
93
|
return candidates
|
|
280
94
|
|
|
95
|
+
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
|
|
96
|
+
|
|
97
|
+
@staticmethod
|
|
98
|
+
def key_value_combination(structure: dict) -> Generator[Tuple[Any, Any], None, None]:
|
|
99
|
+
"""Combine items by `key` and `value` from a dictionary for augmentation
|
|
100
|
+
{..., "key": "api_key", "value": "XXXXXXX", ...} -> ("api_key", "XXXXXXX")
|
|
101
|
+
|
|
102
|
+
"""
|
|
103
|
+
for key_id in ("key", "KEY", "Key"):
|
|
104
|
+
if key_id in structure:
|
|
105
|
+
struct_key = structure.get(key_id)
|
|
106
|
+
break
|
|
107
|
+
else:
|
|
108
|
+
struct_key = None
|
|
109
|
+
if isinstance(struct_key, bytes):
|
|
110
|
+
# sqlite table may produce bytes for `key`
|
|
111
|
+
with contextlib.suppress(UnicodeError):
|
|
112
|
+
struct_key = struct_key.decode(UTF_8)
|
|
113
|
+
# only str type is common used for the augmentation
|
|
114
|
+
if struct_key and isinstance(struct_key, str):
|
|
115
|
+
for value_id in ("value", "VALUE", "Value"):
|
|
116
|
+
if value_id in structure:
|
|
117
|
+
struct_value = structure.get(value_id)
|
|
118
|
+
if struct_value and isinstance(struct_value, (str, bytes)):
|
|
119
|
+
yield struct_key, struct_value
|
|
120
|
+
# break in successful case
|
|
121
|
+
break
|
|
122
|
+
|
|
123
|
+
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
|
|
124
|
+
|
|
125
|
+
@staticmethod
|
|
126
|
+
def structure_processing(structure: Any) -> Generator[Tuple[Any, Any], None, None]:
|
|
127
|
+
"""Yields pair `key, value` from given structure if applicable"""
|
|
128
|
+
if isinstance(structure, dict):
|
|
129
|
+
# transform dictionary to list
|
|
130
|
+
for key, value in structure.items():
|
|
131
|
+
if not value:
|
|
132
|
+
# skip empty values
|
|
133
|
+
continue
|
|
134
|
+
if isinstance(value, (list, tuple)):
|
|
135
|
+
if 1 == len(value):
|
|
136
|
+
# simplify some structures like YAML when single item in new line is a value
|
|
137
|
+
yield key, value[0]
|
|
138
|
+
continue
|
|
139
|
+
# all other data will be precessed in next code
|
|
140
|
+
yield key, value
|
|
141
|
+
yield from AbstractScanner.key_value_combination(structure)
|
|
142
|
+
elif isinstance(structure, (list, tuple)):
|
|
143
|
+
# enumerate the items to fit for return structure
|
|
144
|
+
for key, value in enumerate(structure):
|
|
145
|
+
yield key, value
|
|
146
|
+
else:
|
|
147
|
+
logger.error("Not supported type:%s val:%s", str(type(structure)), repr(structure))
|
|
148
|
+
|
|
149
|
+
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
|
|
150
|
+
|
|
281
151
|
def structure_scan(
|
|
282
152
|
self, #
|
|
283
153
|
struct_provider: StructContentProvider, #
|
|
@@ -301,35 +171,21 @@ class DeepScanner(
|
|
|
301
171
|
|
|
302
172
|
depth -= 1
|
|
303
173
|
|
|
304
|
-
|
|
305
|
-
|
|
306
|
-
|
|
307
|
-
|
|
308
|
-
if isinstance(struct_provider.struct, dict):
|
|
309
|
-
for key, value in struct_provider.struct.items():
|
|
310
|
-
if isinstance(value, (list, tuple)) and 1 == len(value):
|
|
311
|
-
# simplify some structures like YAML when single item in new line is a value
|
|
312
|
-
items.append((key, value[0]))
|
|
313
|
-
else:
|
|
314
|
-
items.append((key, value))
|
|
315
|
-
# for transformation {"key": "api_key", "value": "XXXXXXX"} -> {"api_key": "XXXXXXX"}
|
|
316
|
-
struct_key = struct_provider.struct.get("key")
|
|
317
|
-
struct_value = struct_provider.struct.get("value")
|
|
318
|
-
elif isinstance(struct_provider.struct, (list, tuple)):
|
|
319
|
-
items = list(enumerate(struct_provider.struct))
|
|
320
|
-
else:
|
|
321
|
-
logger.error("Not supported type:%s val:%s", str(type(struct_provider.struct)), str(struct_provider.struct))
|
|
174
|
+
augmented_lines_for_keyword_rules = []
|
|
175
|
+
for key, value in AbstractScanner.structure_processing(struct_provider.struct):
|
|
176
|
+
# a keyword rule may be applicable for `key` (str only) and `value` (str, bytes)
|
|
177
|
+
keyword_match = bool(isinstance(key, str) and self.scanner.keywords_required_substrings_check(key.lower()))
|
|
322
178
|
|
|
323
|
-
|
|
324
|
-
|
|
179
|
+
if isinstance(value, (dict, list, tuple)) and value:
|
|
180
|
+
# recursive scan for not empty structured `value`
|
|
325
181
|
val_struct_provider = StructContentProvider(struct=value,
|
|
326
182
|
file_path=struct_provider.file_path,
|
|
327
183
|
file_type=struct_provider.file_type,
|
|
328
184
|
info=f"{struct_provider.info}|STRUCT:{key}")
|
|
329
185
|
new_candidates = self.structure_scan(val_struct_provider, depth, recursive_limit_size)
|
|
330
186
|
candidates.extend(new_candidates)
|
|
331
|
-
|
|
332
187
|
elif isinstance(value, bytes):
|
|
188
|
+
# recursive data scan
|
|
333
189
|
if MIN_DATA_LEN <= len(value):
|
|
334
190
|
bytes_struct_provider = DataContentProvider(data=value,
|
|
335
191
|
file_path=struct_provider.file_path,
|
|
@@ -338,16 +194,15 @@ class DeepScanner(
|
|
|
338
194
|
new_limit = recursive_limit_size - len(value)
|
|
339
195
|
new_candidates = self.recursive_scan(bytes_struct_provider, depth, new_limit)
|
|
340
196
|
candidates.extend(new_candidates)
|
|
341
|
-
if MIN_VALUE_LENGTH <= len(value)
|
|
342
|
-
|
|
343
|
-
str_val = str(value)
|
|
344
|
-
lines_for_keyword_rules.append(f"{key} = '{str_val}'" if '"' in str_val else f'{key} = "{str_val}"')
|
|
345
|
-
|
|
197
|
+
if keyword_match and MIN_VALUE_LENGTH <= len(value):
|
|
198
|
+
augmented_lines_for_keyword_rules.append(f"{key} = {repr(value)}")
|
|
346
199
|
elif isinstance(value, str):
|
|
347
|
-
|
|
200
|
+
# recursive text scan with transformation into bytes
|
|
201
|
+
stripped_value = value.strip()
|
|
202
|
+
if MIN_DATA_LEN <= len(stripped_value):
|
|
348
203
|
# recursive scan only for data which may be decoded at least
|
|
349
204
|
with contextlib.suppress(UnicodeError):
|
|
350
|
-
data =
|
|
205
|
+
data = stripped_value.encode(encoding=DEFAULT_ENCODING, errors='strict')
|
|
351
206
|
str_struct_provider = DataContentProvider(data=data,
|
|
352
207
|
file_path=struct_provider.file_path,
|
|
353
208
|
file_type=struct_provider.file_type,
|
|
@@ -355,32 +210,97 @@ class DeepScanner(
|
|
|
355
210
|
new_limit = recursive_limit_size - len(str_struct_provider.data)
|
|
356
211
|
new_candidates = self.recursive_scan(str_struct_provider, depth, new_limit)
|
|
357
212
|
candidates.extend(new_candidates)
|
|
358
|
-
|
|
359
|
-
|
|
360
|
-
|
|
361
|
-
lines_for_keyword_rules.append(f"{key} = '{value}'" if '"' in value else f'{key} = "{value}"')
|
|
362
|
-
|
|
363
|
-
elif isinstance(value, (int, float, datetime.date, datetime.datetime)):
|
|
213
|
+
if keyword_match and MIN_VALUE_LENGTH <= len(stripped_value):
|
|
214
|
+
augmented_lines_for_keyword_rules.append(f"{key} = {repr(stripped_value)}")
|
|
215
|
+
elif value is None or isinstance(value, (int, float, datetime.date, datetime.datetime)):
|
|
364
216
|
# skip useless types
|
|
365
217
|
pass
|
|
366
218
|
else:
|
|
367
219
|
logger.warning("Not supported type:%s value(%s)", str(type(value)), str(value))
|
|
368
220
|
|
|
369
|
-
if
|
|
370
|
-
str_provider = StringContentProvider(
|
|
221
|
+
if augmented_lines_for_keyword_rules:
|
|
222
|
+
str_provider = StringContentProvider(augmented_lines_for_keyword_rules,
|
|
371
223
|
file_path=struct_provider.file_path,
|
|
372
|
-
file_type=
|
|
373
|
-
info=f"{struct_provider.info}|KEYWORD
|
|
224
|
+
file_type=struct_provider.file_type,
|
|
225
|
+
info=f"{struct_provider.info}|KEYWORD")
|
|
374
226
|
new_candidates = self.scanner.scan(str_provider)
|
|
375
227
|
augment_candidates(candidates, new_candidates)
|
|
376
228
|
|
|
377
|
-
|
|
378
|
-
|
|
379
|
-
|
|
380
|
-
|
|
381
|
-
|
|
382
|
-
|
|
383
|
-
|
|
384
|
-
|
|
229
|
+
return candidates
|
|
230
|
+
|
|
231
|
+
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
|
|
232
|
+
|
|
233
|
+
def deep_scan_with_fallback(self, data_provider: DataContentProvider, depth: int,
|
|
234
|
+
recursive_limit_size: int) -> List[Candidate]:
|
|
235
|
+
"""Scans with deep scanners and fallback scanners if possible
|
|
236
|
+
|
|
237
|
+
Args:
|
|
238
|
+
data_provider: DataContentProvider with raw data
|
|
239
|
+
depth: maximal level of recursion
|
|
240
|
+
recursive_limit_size: maximal bytes of opened files to prevent recursive zip-bomb attack
|
|
241
|
+
|
|
242
|
+
Returns: list with candidates
|
|
243
|
+
|
|
244
|
+
"""
|
|
245
|
+
candidates: List[Candidate] = []
|
|
246
|
+
deep_scanners, fallback_scanners = self.get_deep_scanners(data_provider.data, data_provider.descriptor, depth)
|
|
247
|
+
fallback = True
|
|
248
|
+
for scan_class in deep_scanners:
|
|
249
|
+
new_candidates = scan_class.data_scan(self, data_provider, depth, recursive_limit_size)
|
|
250
|
+
if new_candidates is None:
|
|
251
|
+
# scanner did not recognise the content type
|
|
252
|
+
continue
|
|
253
|
+
augment_candidates(candidates, new_candidates)
|
|
254
|
+
# this scan is successful, so fallback is not necessary
|
|
255
|
+
fallback = False
|
|
256
|
+
if fallback:
|
|
257
|
+
for scan_class in fallback_scanners:
|
|
258
|
+
fallback_candidates = scan_class.data_scan(self, data_provider, depth, recursive_limit_size)
|
|
259
|
+
if fallback_candidates is None:
|
|
260
|
+
continue
|
|
261
|
+
augment_candidates(candidates, fallback_candidates)
|
|
262
|
+
# use only first successful fallback scanner
|
|
263
|
+
break
|
|
264
|
+
return candidates
|
|
265
|
+
|
|
266
|
+
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
|
|
267
|
+
|
|
268
|
+
def scan(self,
|
|
269
|
+
content_provider: ContentProvider,
|
|
270
|
+
depth: int,
|
|
271
|
+
recursive_limit_size: Optional[int] = None) -> List[Candidate]:
|
|
272
|
+
"""Initial scan method to launch recursive scan. Skips ByteScanner to prevent extra scan
|
|
273
|
+
|
|
274
|
+
Args:
|
|
275
|
+
content_provider: ContentProvider that might contain raw data
|
|
276
|
+
depth: maximal level of recursion
|
|
277
|
+
recursive_limit_size: maximal bytes of opened files to prevent recursive zip-bomb attack
|
|
278
|
+
"""
|
|
279
|
+
recursive_limit_size = recursive_limit_size if isinstance(recursive_limit_size,
|
|
280
|
+
int) else RECURSIVE_SCAN_LIMITATION
|
|
281
|
+
candidates: List[Candidate] = []
|
|
282
|
+
data: Optional[bytes] = None
|
|
283
|
+
if isinstance(content_provider, (TextContentProvider, ByteContentProvider)):
|
|
284
|
+
# Feature to scan files which might be containers
|
|
285
|
+
data = content_provider.data
|
|
286
|
+
info = f"FILE:{content_provider.file_path}"
|
|
287
|
+
elif isinstance(content_provider, DiffContentProvider) and content_provider.diff:
|
|
288
|
+
candidates = self.scanner.scan(content_provider)
|
|
289
|
+
# Feature to scan binary diffs
|
|
290
|
+
diff = content_provider.diff[0].get("line")
|
|
291
|
+
# the check for legal fix mypy issue
|
|
292
|
+
if isinstance(diff, bytes):
|
|
293
|
+
data = diff
|
|
294
|
+
info = f"DIFF:{content_provider.file_path}"
|
|
295
|
+
else:
|
|
296
|
+
logger.warning(f"Content provider {type(content_provider)} does not support deep scan")
|
|
297
|
+
info = "NA"
|
|
298
|
+
|
|
299
|
+
if data:
|
|
300
|
+
data_provider = DataContentProvider(data=data,
|
|
301
|
+
file_path=content_provider.file_path,
|
|
302
|
+
file_type=content_provider.file_type,
|
|
303
|
+
info=content_provider.info or info)
|
|
304
|
+
new_candidates = self.deep_scan_with_fallback(data_provider, depth, recursive_limit_size - len(data))
|
|
385
305
|
augment_candidates(candidates, new_candidates)
|
|
386
306
|
return candidates
|
|
@@ -0,0 +1,55 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
import struct
|
|
3
|
+
from abc import ABC
|
|
4
|
+
from typing import List, Optional, Generator, Tuple
|
|
5
|
+
|
|
6
|
+
from credsweeper.common.constants import MIN_DATA_LEN, UTF_8
|
|
7
|
+
from credsweeper.credentials import Candidate
|
|
8
|
+
from credsweeper.deep_scanner.abstract_scanner import AbstractScanner
|
|
9
|
+
from credsweeper.file_handler.data_content_provider import DataContentProvider
|
|
10
|
+
from credsweeper.utils.util import Util
|
|
11
|
+
|
|
12
|
+
logger = logging.getLogger(__name__)
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
class DebScanner(AbstractScanner, ABC):
|
|
16
|
+
"""Implements deb (ar) scanning"""
|
|
17
|
+
|
|
18
|
+
__header_size = 60
|
|
19
|
+
|
|
20
|
+
@staticmethod
|
|
21
|
+
def walk_deb(data: bytes) -> Generator[Tuple[int, str, bytes], None, None]:
|
|
22
|
+
"""Processes sequence of DEB archive and yields offset, name and data"""
|
|
23
|
+
offset = 8 # b"!<arch>\n"
|
|
24
|
+
data_limit = len(data) - DebScanner.__header_size
|
|
25
|
+
while offset <= data_limit:
|
|
26
|
+
_data = data[offset:offset + DebScanner.__header_size]
|
|
27
|
+
offset += DebScanner.__header_size
|
|
28
|
+
# basic header structure
|
|
29
|
+
_name, _, _size, __ = struct.unpack('16s32s10s2s', _data)
|
|
30
|
+
file_size = int(_size)
|
|
31
|
+
if MIN_DATA_LEN < file_size <= len(data) - offset:
|
|
32
|
+
_data = data[offset:offset + file_size]
|
|
33
|
+
yield offset, _name.decode(encoding=UTF_8).strip().rstrip('/'), _data
|
|
34
|
+
offset += file_size if 0 == 1 & file_size else file_size + 1
|
|
35
|
+
|
|
36
|
+
def data_scan(
|
|
37
|
+
self, #
|
|
38
|
+
data_provider: DataContentProvider, #
|
|
39
|
+
depth: int, #
|
|
40
|
+
recursive_limit_size: int) -> Optional[List[Candidate]]:
|
|
41
|
+
"""Extracts data file from .ar (debian) archive and launches data_scan"""
|
|
42
|
+
try:
|
|
43
|
+
candidates: List[Candidate] = []
|
|
44
|
+
for offset, name, data in DebScanner.walk_deb(data_provider.data):
|
|
45
|
+
deb_content_provider = DataContentProvider(data=data,
|
|
46
|
+
file_path=f"{data_provider.file_path}/{name}",
|
|
47
|
+
file_type=Util.get_extension(name),
|
|
48
|
+
info=f"{data_provider.info}|DEB:0x{offset:x}")
|
|
49
|
+
new_limit = recursive_limit_size - len(data)
|
|
50
|
+
deb_candidates = self.recursive_scan(deb_content_provider, depth, new_limit)
|
|
51
|
+
candidates.extend(deb_candidates)
|
|
52
|
+
return candidates
|
|
53
|
+
except Exception as exc:
|
|
54
|
+
logger.error(exc)
|
|
55
|
+
return None
|