credsweeper 1.12.1__tar.gz → 1.13.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of credsweeper might be problematic. Click here for more details.
- {credsweeper-1.12.1 → credsweeper-1.13.0}/.gitignore +2 -0
- {credsweeper-1.12.1 → credsweeper-1.13.0}/PKG-INFO +7 -7
- {credsweeper-1.12.1 → credsweeper-1.13.0}/README.md +1 -1
- {credsweeper-1.12.1 → credsweeper-1.13.0}/credsweeper/__init__.py +1 -1
- {credsweeper-1.12.1 → credsweeper-1.13.0}/credsweeper/__main__.py +23 -13
- {credsweeper-1.12.1 → credsweeper-1.13.0}/credsweeper/app.py +7 -2
- {credsweeper-1.12.1 → credsweeper-1.13.0}/credsweeper/common/keyword_pattern.py +6 -3
- {credsweeper-1.12.1 → credsweeper-1.13.0}/credsweeper/common/morpheme_checklist.txt +13 -1
- {credsweeper-1.12.1 → credsweeper-1.13.0}/credsweeper/config/config.py +1 -0
- {credsweeper-1.12.1 → credsweeper-1.13.0}/credsweeper/credentials/line_data.py +16 -0
- {credsweeper-1.12.1 → credsweeper-1.13.0}/credsweeper/deep_scanner/deep_scanner.py +13 -7
- credsweeper-1.13.0/credsweeper/deep_scanner/rtf_scanner.py +41 -0
- credsweeper-1.13.0/credsweeper/deep_scanner/strings_scanner.py +52 -0
- {credsweeper-1.12.1 → credsweeper-1.13.0}/credsweeper/file_handler/byte_content_provider.py +10 -1
- {credsweeper-1.12.1 → credsweeper-1.13.0}/credsweeper/file_handler/file_path_extractor.py +2 -0
- {credsweeper-1.12.1 → credsweeper-1.13.0}/credsweeper/file_handler/text_content_provider.py +7 -1
- {credsweeper-1.12.1 → credsweeper-1.13.0}/credsweeper/filters/__init__.py +1 -1
- {credsweeper-1.12.1 → credsweeper-1.13.0}/credsweeper/filters/group/token_pattern.py +2 -2
- {credsweeper-1.12.1 → credsweeper-1.13.0}/credsweeper/filters/group/weird_base36_token.py +2 -2
- {credsweeper-1.12.1 → credsweeper-1.13.0}/credsweeper/filters/group/weird_base64_token.py +2 -2
- {credsweeper-1.12.1 → credsweeper-1.13.0}/credsweeper/filters/value_camel_case_check.py +2 -2
- {credsweeper-1.12.1 → credsweeper-1.13.0}/credsweeper/filters/value_file_path_check.py +5 -3
- {credsweeper-1.12.1 → credsweeper-1.13.0}/credsweeper/filters/value_github_check.py +3 -2
- credsweeper-1.13.0/credsweeper/filters/value_morphemes_check.py +43 -0
- {credsweeper-1.12.1 → credsweeper-1.13.0}/credsweeper/filters/value_string_type_check.py +1 -0
- {credsweeper-1.12.1 → credsweeper-1.13.0}/credsweeper/ml_model/features/feature.py +1 -18
- {credsweeper-1.12.1 → credsweeper-1.13.0}/credsweeper/ml_model/features/file_extension.py +1 -1
- {credsweeper-1.12.1 → credsweeper-1.13.0}/credsweeper/ml_model/features/has_html_tag.py +10 -8
- {credsweeper-1.12.1 → credsweeper-1.13.0}/credsweeper/ml_model/features/is_secret_numeric.py +4 -3
- {credsweeper-1.12.1 → credsweeper-1.13.0}/credsweeper/ml_model/features/rule_name.py +1 -1
- credsweeper-1.13.0/credsweeper/ml_model/features/word_in.py +36 -0
- {credsweeper-1.12.1 → credsweeper-1.13.0}/credsweeper/ml_model/features/word_in_path.py +2 -3
- {credsweeper-1.12.1 → credsweeper-1.13.0}/credsweeper/ml_model/features/word_in_postamble.py +1 -4
- {credsweeper-1.12.1 → credsweeper-1.13.0}/credsweeper/ml_model/features/word_in_preamble.py +1 -4
- {credsweeper-1.12.1 → credsweeper-1.13.0}/credsweeper/ml_model/features/word_in_transition.py +1 -4
- {credsweeper-1.12.1 → credsweeper-1.13.0}/credsweeper/ml_model/features/word_in_value.py +2 -3
- {credsweeper-1.12.1 → credsweeper-1.13.0}/credsweeper/ml_model/features/word_in_variable.py +2 -3
- {credsweeper-1.12.1 → credsweeper-1.13.0}/credsweeper/ml_model/ml_config.json +15 -8
- {credsweeper-1.12.1 → credsweeper-1.13.0}/credsweeper/ml_model/ml_model.onnx +0 -0
- {credsweeper-1.12.1 → credsweeper-1.13.0}/credsweeper/ml_model/ml_validator.py +1 -1
- {credsweeper-1.12.1 → credsweeper-1.13.0}/credsweeper/rules/config.yaml +174 -142
- {credsweeper-1.12.1 → credsweeper-1.13.0}/credsweeper/scanner/scanner.py +12 -7
- {credsweeper-1.12.1 → credsweeper-1.13.0}/credsweeper/secret/config.json +18 -5
- {credsweeper-1.12.1 → credsweeper-1.13.0}/credsweeper/utils/util.py +21 -18
- {credsweeper-1.12.1 → credsweeper-1.13.0}/pyproject.toml +5 -5
- credsweeper-1.12.1/credsweeper/filters/value_couple_keyword_check.py +0 -28
- credsweeper-1.12.1/credsweeper/ml_model/features/word_in.py +0 -59
- {credsweeper-1.12.1 → credsweeper-1.13.0}/LICENSE +0 -0
- {credsweeper-1.12.1 → credsweeper-1.13.0}/credsweeper/common/__init__.py +0 -0
- {credsweeper-1.12.1 → credsweeper-1.13.0}/credsweeper/common/constants.py +0 -0
- {credsweeper-1.12.1 → credsweeper-1.13.0}/credsweeper/common/keyword_checklist.py +0 -0
- {credsweeper-1.12.1 → credsweeper-1.13.0}/credsweeper/common/keyword_checklist.txt +0 -0
- {credsweeper-1.12.1 → credsweeper-1.13.0}/credsweeper/config/__init__.py +0 -0
- {credsweeper-1.12.1 → credsweeper-1.13.0}/credsweeper/credentials/__init__.py +0 -0
- {credsweeper-1.12.1 → credsweeper-1.13.0}/credsweeper/credentials/augment_candidates.py +0 -0
- {credsweeper-1.12.1 → credsweeper-1.13.0}/credsweeper/credentials/candidate.py +0 -0
- {credsweeper-1.12.1 → credsweeper-1.13.0}/credsweeper/credentials/candidate_group_generator.py +0 -0
- {credsweeper-1.12.1 → credsweeper-1.13.0}/credsweeper/credentials/candidate_key.py +0 -0
- {credsweeper-1.12.1 → credsweeper-1.13.0}/credsweeper/credentials/credential_manager.py +0 -0
- {credsweeper-1.12.1 → credsweeper-1.13.0}/credsweeper/deep_scanner/__init__.py +0 -0
- {credsweeper-1.12.1 → credsweeper-1.13.0}/credsweeper/deep_scanner/abstract_scanner.py +0 -0
- {credsweeper-1.12.1 → credsweeper-1.13.0}/credsweeper/deep_scanner/byte_scanner.py +0 -0
- {credsweeper-1.12.1 → credsweeper-1.13.0}/credsweeper/deep_scanner/bzip2_scanner.py +0 -0
- {credsweeper-1.12.1 → credsweeper-1.13.0}/credsweeper/deep_scanner/deb_scanner.py +0 -0
- {credsweeper-1.12.1 → credsweeper-1.13.0}/credsweeper/deep_scanner/docx_scanner.py +0 -0
- {credsweeper-1.12.1 → credsweeper-1.13.0}/credsweeper/deep_scanner/eml_scanner.py +0 -0
- {credsweeper-1.12.1 → credsweeper-1.13.0}/credsweeper/deep_scanner/encoder_scanner.py +0 -0
- {credsweeper-1.12.1 → credsweeper-1.13.0}/credsweeper/deep_scanner/gzip_scanner.py +0 -0
- {credsweeper-1.12.1 → credsweeper-1.13.0}/credsweeper/deep_scanner/html_scanner.py +0 -0
- {credsweeper-1.12.1 → credsweeper-1.13.0}/credsweeper/deep_scanner/jclass_scanner.py +0 -0
- {credsweeper-1.12.1 → credsweeper-1.13.0}/credsweeper/deep_scanner/jks_scanner.py +0 -0
- {credsweeper-1.12.1 → credsweeper-1.13.0}/credsweeper/deep_scanner/lang_scanner.py +0 -0
- {credsweeper-1.12.1 → credsweeper-1.13.0}/credsweeper/deep_scanner/lzma_scanner.py +0 -0
- {credsweeper-1.12.1 → credsweeper-1.13.0}/credsweeper/deep_scanner/mxfile_scanner.py +0 -0
- {credsweeper-1.12.1 → credsweeper-1.13.0}/credsweeper/deep_scanner/patch_scanner.py +0 -0
- {credsweeper-1.12.1 → credsweeper-1.13.0}/credsweeper/deep_scanner/pdf_scanner.py +0 -0
- {credsweeper-1.12.1 → credsweeper-1.13.0}/credsweeper/deep_scanner/pkcs_scanner.py +0 -0
- {credsweeper-1.12.1 → credsweeper-1.13.0}/credsweeper/deep_scanner/pptx_scanner.py +0 -0
- {credsweeper-1.12.1 → credsweeper-1.13.0}/credsweeper/deep_scanner/rpm_scanner.py +0 -0
- {credsweeper-1.12.1 → credsweeper-1.13.0}/credsweeper/deep_scanner/sqlite3_scanner.py +0 -0
- {credsweeper-1.12.1 → credsweeper-1.13.0}/credsweeper/deep_scanner/tar_scanner.py +0 -0
- {credsweeper-1.12.1 → credsweeper-1.13.0}/credsweeper/deep_scanner/tmx_scanner.py +0 -0
- {credsweeper-1.12.1 → credsweeper-1.13.0}/credsweeper/deep_scanner/xlsx_scanner.py +0 -0
- {credsweeper-1.12.1 → credsweeper-1.13.0}/credsweeper/deep_scanner/xml_scanner.py +0 -0
- {credsweeper-1.12.1 → credsweeper-1.13.0}/credsweeper/deep_scanner/zip_scanner.py +0 -0
- {credsweeper-1.12.1 → credsweeper-1.13.0}/credsweeper/file_handler/__init__.py +0 -0
- {credsweeper-1.12.1 → credsweeper-1.13.0}/credsweeper/file_handler/abstract_provider.py +0 -0
- {credsweeper-1.12.1 → credsweeper-1.13.0}/credsweeper/file_handler/analysis_target.py +0 -0
- {credsweeper-1.12.1 → credsweeper-1.13.0}/credsweeper/file_handler/content_provider.py +0 -0
- {credsweeper-1.12.1 → credsweeper-1.13.0}/credsweeper/file_handler/data_content_provider.py +0 -0
- {credsweeper-1.12.1 → credsweeper-1.13.0}/credsweeper/file_handler/descriptor.py +0 -0
- {credsweeper-1.12.1 → credsweeper-1.13.0}/credsweeper/file_handler/diff_content_provider.py +0 -0
- {credsweeper-1.12.1 → credsweeper-1.13.0}/credsweeper/file_handler/files_provider.py +0 -0
- {credsweeper-1.12.1 → credsweeper-1.13.0}/credsweeper/file_handler/patches_provider.py +0 -0
- {credsweeper-1.12.1 → credsweeper-1.13.0}/credsweeper/file_handler/string_content_provider.py +0 -0
- {credsweeper-1.12.1 → credsweeper-1.13.0}/credsweeper/file_handler/struct_content_provider.py +0 -0
- {credsweeper-1.12.1 → credsweeper-1.13.0}/credsweeper/filters/filter.py +0 -0
- {credsweeper-1.12.1 → credsweeper-1.13.0}/credsweeper/filters/group/__init__.py +0 -0
- {credsweeper-1.12.1 → credsweeper-1.13.0}/credsweeper/filters/group/general_keyword.py +0 -0
- {credsweeper-1.12.1 → credsweeper-1.13.0}/credsweeper/filters/group/general_pattern.py +0 -0
- {credsweeper-1.12.1 → credsweeper-1.13.0}/credsweeper/filters/group/group.py +0 -0
- {credsweeper-1.12.1 → credsweeper-1.13.0}/credsweeper/filters/group/password_keyword.py +0 -0
- {credsweeper-1.12.1 → credsweeper-1.13.0}/credsweeper/filters/group/url_credentials_group.py +0 -0
- {credsweeper-1.12.1 → credsweeper-1.13.0}/credsweeper/filters/line_git_binary_check.py +0 -0
- {credsweeper-1.12.1 → credsweeper-1.13.0}/credsweeper/filters/line_specific_key_check.py +0 -0
- {credsweeper-1.12.1 → credsweeper-1.13.0}/credsweeper/filters/line_uue_part_check.py +0 -0
- {credsweeper-1.12.1 → credsweeper-1.13.0}/credsweeper/filters/value_allowlist_check.py +0 -0
- {credsweeper-1.12.1 → credsweeper-1.13.0}/credsweeper/filters/value_array_dictionary_check.py +0 -0
- {credsweeper-1.12.1 → credsweeper-1.13.0}/credsweeper/filters/value_atlassian_token_check.py +0 -0
- {credsweeper-1.12.1 → credsweeper-1.13.0}/credsweeper/filters/value_azure_token_check.py +0 -0
- {credsweeper-1.12.1 → credsweeper-1.13.0}/credsweeper/filters/value_base32_data_check.py +0 -0
- {credsweeper-1.12.1 → credsweeper-1.13.0}/credsweeper/filters/value_base64_data_check.py +0 -0
- {credsweeper-1.12.1 → credsweeper-1.13.0}/credsweeper/filters/value_base64_encoded_pem_check.py +0 -0
- {credsweeper-1.12.1 → credsweeper-1.13.0}/credsweeper/filters/value_base64_key_check.py +0 -0
- {credsweeper-1.12.1 → credsweeper-1.13.0}/credsweeper/filters/value_base64_part_check.py +0 -0
- {credsweeper-1.12.1 → credsweeper-1.13.0}/credsweeper/filters/value_basic_auth_check.py +0 -0
- {credsweeper-1.12.1 → credsweeper-1.13.0}/credsweeper/filters/value_blocklist_check.py +0 -0
- {credsweeper-1.12.1 → credsweeper-1.13.0}/credsweeper/filters/value_dictionary_keyword_check.py +0 -0
- {credsweeper-1.12.1 → credsweeper-1.13.0}/credsweeper/filters/value_discord_bot_check.py +0 -0
- {credsweeper-1.12.1 → credsweeper-1.13.0}/credsweeper/filters/value_entropy_base32_check.py +0 -0
- {credsweeper-1.12.1 → credsweeper-1.13.0}/credsweeper/filters/value_entropy_base36_check.py +0 -0
- {credsweeper-1.12.1 → credsweeper-1.13.0}/credsweeper/filters/value_entropy_base64_check.py +0 -0
- {credsweeper-1.12.1 → credsweeper-1.13.0}/credsweeper/filters/value_entropy_base_check.py +0 -0
- {credsweeper-1.12.1 → credsweeper-1.13.0}/credsweeper/filters/value_grafana_check.py +0 -0
- {credsweeper-1.12.1 → credsweeper-1.13.0}/credsweeper/filters/value_grafana_service_check.py +0 -0
- {credsweeper-1.12.1 → credsweeper-1.13.0}/credsweeper/filters/value_hex_number_check.py +0 -0
- {credsweeper-1.12.1 → credsweeper-1.13.0}/credsweeper/filters/value_jfrog_token_check.py +0 -0
- {credsweeper-1.12.1 → credsweeper-1.13.0}/credsweeper/filters/value_json_web_key_check.py +0 -0
- {credsweeper-1.12.1 → credsweeper-1.13.0}/credsweeper/filters/value_json_web_token_check.py +0 -0
- {credsweeper-1.12.1 → credsweeper-1.13.0}/credsweeper/filters/value_last_word_check.py +0 -0
- {credsweeper-1.12.1 → credsweeper-1.13.0}/credsweeper/filters/value_length_check.py +0 -0
- {credsweeper-1.12.1 → credsweeper-1.13.0}/credsweeper/filters/value_method_check.py +0 -0
- {credsweeper-1.12.1 → credsweeper-1.13.0}/credsweeper/filters/value_not_allowed_pattern_check.py +0 -0
- {credsweeper-1.12.1 → credsweeper-1.13.0}/credsweeper/filters/value_not_part_encoded_check.py +0 -0
- {credsweeper-1.12.1 → credsweeper-1.13.0}/credsweeper/filters/value_number_check.py +0 -0
- {credsweeper-1.12.1 → credsweeper-1.13.0}/credsweeper/filters/value_pattern_check.py +0 -0
- {credsweeper-1.12.1 → credsweeper-1.13.0}/credsweeper/filters/value_similarity_check.py +0 -0
- {credsweeper-1.12.1 → credsweeper-1.13.0}/credsweeper/filters/value_split_keyword_check.py +0 -0
- {credsweeper-1.12.1 → credsweeper-1.13.0}/credsweeper/filters/value_token_base32_check.py +0 -0
- {credsweeper-1.12.1 → credsweeper-1.13.0}/credsweeper/filters/value_token_base36_check.py +0 -0
- {credsweeper-1.12.1 → credsweeper-1.13.0}/credsweeper/filters/value_token_base64_check.py +0 -0
- {credsweeper-1.12.1 → credsweeper-1.13.0}/credsweeper/filters/value_token_base_check.py +0 -0
- {credsweeper-1.12.1 → credsweeper-1.13.0}/credsweeper/filters/value_token_check.py +0 -0
- {credsweeper-1.12.1 → credsweeper-1.13.0}/credsweeper/logger/__init__.py +0 -0
- {credsweeper-1.12.1 → credsweeper-1.13.0}/credsweeper/logger/logger.py +0 -0
- {credsweeper-1.12.1 → credsweeper-1.13.0}/credsweeper/ml_model/__init__.py +0 -0
- {credsweeper-1.12.1 → credsweeper-1.13.0}/credsweeper/ml_model/features/__init__.py +0 -0
- {credsweeper-1.12.1 → credsweeper-1.13.0}/credsweeper/ml_model/features/entropy_evaluation.py +0 -0
- {credsweeper-1.12.1 → credsweeper-1.13.0}/credsweeper/ml_model/features/length_of_attribute.py +0 -0
- {credsweeper-1.12.1 → credsweeper-1.13.0}/credsweeper/ml_model/features/morpheme_dense.py +0 -0
- {credsweeper-1.12.1 → credsweeper-1.13.0}/credsweeper/ml_model/features/rule_severity.py +0 -0
- {credsweeper-1.12.1 → credsweeper-1.13.0}/credsweeper/ml_model/features/search_in_attribute.py +0 -0
- {credsweeper-1.12.1 → credsweeper-1.13.0}/credsweeper/py.typed +0 -0
- {credsweeper-1.12.1 → credsweeper-1.13.0}/credsweeper/rules/__init__.py +0 -0
- {credsweeper-1.12.1 → credsweeper-1.13.0}/credsweeper/rules/rule.py +0 -0
- {credsweeper-1.12.1 → credsweeper-1.13.0}/credsweeper/scanner/__init__.py +0 -0
- {credsweeper-1.12.1 → credsweeper-1.13.0}/credsweeper/scanner/scan_type/__init__.py +0 -0
- {credsweeper-1.12.1 → credsweeper-1.13.0}/credsweeper/scanner/scan_type/multi_pattern.py +0 -0
- {credsweeper-1.12.1 → credsweeper-1.13.0}/credsweeper/scanner/scan_type/pem_key_pattern.py +0 -0
- {credsweeper-1.12.1 → credsweeper-1.13.0}/credsweeper/scanner/scan_type/scan_type.py +0 -0
- {credsweeper-1.12.1 → credsweeper-1.13.0}/credsweeper/scanner/scan_type/single_pattern.py +0 -0
- {credsweeper-1.12.1 → credsweeper-1.13.0}/credsweeper/secret/log.yaml +0 -0
- {credsweeper-1.12.1 → credsweeper-1.13.0}/credsweeper/utils/__init__.py +0 -0
- {credsweeper-1.12.1 → credsweeper-1.13.0}/credsweeper/utils/hop_stat.py +0 -0
- {credsweeper-1.12.1 → credsweeper-1.13.0}/credsweeper/utils/pem_key_detector.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: credsweeper
|
|
3
|
-
Version: 1.
|
|
3
|
+
Version: 1.13.0
|
|
4
4
|
Summary: Credential Sweeper
|
|
5
5
|
Project-URL: Homepage, https://github.com/Samsung/CredSweeper
|
|
6
6
|
Project-URL: Bug Tracker, https://github.com/Samsung/CredSweeper/issues
|
|
@@ -10,13 +10,12 @@ Classifier: License :: OSI Approved :: MIT License
|
|
|
10
10
|
Classifier: Operating System :: OS Independent
|
|
11
11
|
Classifier: Programming Language :: Python :: 3
|
|
12
12
|
Classifier: Programming Language :: Python :: 3 :: Only
|
|
13
|
-
Classifier: Programming Language :: Python :: 3.9
|
|
14
13
|
Classifier: Programming Language :: Python :: 3.10
|
|
15
14
|
Classifier: Programming Language :: Python :: 3.11
|
|
16
15
|
Classifier: Programming Language :: Python :: 3.12
|
|
17
16
|
Classifier: Topic :: Security
|
|
18
17
|
Classifier: Topic :: Software Development :: Quality Assurance
|
|
19
|
-
Requires-Python: >=3.
|
|
18
|
+
Requires-Python: >=3.10
|
|
20
19
|
Requires-Dist: base58
|
|
21
20
|
Requires-Dist: beautifulsoup4>=4.11.0
|
|
22
21
|
Requires-Dist: colorama
|
|
@@ -24,10 +23,10 @@ Requires-Dist: cryptography
|
|
|
24
23
|
Requires-Dist: gitpython
|
|
25
24
|
Requires-Dist: humanfriendly
|
|
26
25
|
Requires-Dist: lxml
|
|
27
|
-
Requires-Dist: numpy
|
|
26
|
+
Requires-Dist: numpy
|
|
28
27
|
Requires-Dist: odfpy
|
|
29
|
-
Requires-Dist: onnxruntime; platform_system != 'Windows'
|
|
30
|
-
Requires-Dist: onnxruntime==1.19.2; platform_system == 'Windows'
|
|
28
|
+
Requires-Dist: onnxruntime; platform_system != 'Windows' or python_version != '3.12'
|
|
29
|
+
Requires-Dist: onnxruntime==1.19.2; platform_system == 'Windows' and python_version == '3.12'
|
|
31
30
|
Requires-Dist: openpyxl
|
|
32
31
|
Requires-Dist: pandas
|
|
33
32
|
Requires-Dist: pdfminer-six
|
|
@@ -38,6 +37,7 @@ Requires-Dist: python-docx
|
|
|
38
37
|
Requires-Dist: python-pptx
|
|
39
38
|
Requires-Dist: pyyaml
|
|
40
39
|
Requires-Dist: rpmfile
|
|
40
|
+
Requires-Dist: striprtf
|
|
41
41
|
Requires-Dist: whatthepatch
|
|
42
42
|
Requires-Dist: xlrd
|
|
43
43
|
Description-Content-Type: text/markdown
|
|
@@ -90,7 +90,7 @@ Full documentation can be found here: <https://credsweeper.readthedocs.io/>
|
|
|
90
90
|
|
|
91
91
|
### Main Requirements
|
|
92
92
|
|
|
93
|
-
- Python 3.
|
|
93
|
+
- Python 3.10, 3.11, 3.12
|
|
94
94
|
|
|
95
95
|
### Installation
|
|
96
96
|
|
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
import binascii
|
|
2
|
+
import contextlib
|
|
2
3
|
import logging
|
|
3
4
|
import os
|
|
4
5
|
import sys
|
|
@@ -34,24 +35,24 @@ def positive_int(value: Any) -> int:
|
|
|
34
35
|
return int_value
|
|
35
36
|
|
|
36
37
|
|
|
37
|
-
def
|
|
38
|
+
def threshold_or_float_or_zero(arg: str) -> Union[int, float, ThresholdPreset]:
|
|
38
39
|
"""Return ThresholdPreset or a float from the input string
|
|
39
40
|
|
|
40
41
|
Args:
|
|
41
42
|
arg: string that either a float or one of allowed values in ThresholdPreset
|
|
42
43
|
|
|
43
44
|
Returns:
|
|
44
|
-
float if arg convertible to float, ThresholdPreset if one of the allowed values
|
|
45
|
+
int = 0 to disable ML validator, float if arg convertible to float, ThresholdPreset if one of the allowed values
|
|
45
46
|
|
|
46
47
|
Raises:
|
|
47
48
|
ArgumentTypeError: if arg cannot be interpreted as float or ThresholdPreset
|
|
48
49
|
|
|
49
50
|
"""
|
|
50
51
|
allowed_presents = [e.value for e in ThresholdPreset]
|
|
51
|
-
|
|
52
|
+
if '0' == arg:
|
|
53
|
+
return 0
|
|
54
|
+
with contextlib.suppress(ValueError):
|
|
52
55
|
return float(arg) # try convert to float
|
|
53
|
-
except ValueError:
|
|
54
|
-
pass
|
|
55
56
|
if arg in allowed_presents:
|
|
56
57
|
return ThresholdPreset[arg]
|
|
57
58
|
raise ArgumentTypeError(f"value must be a float or one of {allowed_presents}")
|
|
@@ -158,6 +159,10 @@ def get_arguments() -> Namespace:
|
|
|
158
159
|
help="find files by predefined extension",
|
|
159
160
|
dest="find_by_ext",
|
|
160
161
|
action="store_true")
|
|
162
|
+
parser.add_argument("--pedantic",
|
|
163
|
+
help="process files without extension",
|
|
164
|
+
action=BooleanOptionalAction,
|
|
165
|
+
default=False)
|
|
161
166
|
parser.add_argument("--depth",
|
|
162
167
|
help="additional recursive search in data (experimental)",
|
|
163
168
|
type=positive_int,
|
|
@@ -172,11 +177,11 @@ def get_arguments() -> Namespace:
|
|
|
172
177
|
"The lower the threshold - the more credentials will be reported. "
|
|
173
178
|
f"Allowed values: float between 0 and 1, or any of {[e.value for e in ThresholdPreset]} "
|
|
174
179
|
"(default: medium)",
|
|
175
|
-
type=
|
|
180
|
+
type=threshold_or_float_or_zero,
|
|
176
181
|
default=ThresholdPreset.medium,
|
|
177
182
|
dest="ml_threshold",
|
|
178
183
|
required=False,
|
|
179
|
-
metavar="
|
|
184
|
+
metavar="THRESHOLD_OR_FLOAT_OR_ZERO")
|
|
180
185
|
parser.add_argument("--ml_batch_size",
|
|
181
186
|
"-b",
|
|
182
187
|
help="batch size for model inference (default: 16)",
|
|
@@ -299,6 +304,7 @@ def get_credsweeper(args: Namespace) -> CredSweeper:
|
|
|
299
304
|
ml_model=args.ml_model,
|
|
300
305
|
ml_providers=args.ml_providers,
|
|
301
306
|
find_by_ext=args.find_by_ext,
|
|
307
|
+
pedantic=args.pedantic,
|
|
302
308
|
depth=args.depth,
|
|
303
309
|
doc=args.doc,
|
|
304
310
|
severity=args.severity,
|
|
@@ -335,7 +341,8 @@ def scan(args: Namespace, content_provider: AbstractProvider) -> int:
|
|
|
335
341
|
def get_commit_providers(commit: Commit, repo: Repo) -> Sequence[ByteContentProvider]:
|
|
336
342
|
"""Process a commit and for providers"""
|
|
337
343
|
result = {}
|
|
338
|
-
|
|
344
|
+
# use the hardcoded sha1 until sha256 objects are not supported by GitPython
|
|
345
|
+
ancestors = commit.parents or [repo.tree("4b825dc642cb6eb9a060e54bf8d69288fbee4904")]
|
|
339
346
|
for parent in ancestors:
|
|
340
347
|
for diff in parent.diff(commit):
|
|
341
348
|
# only result files
|
|
@@ -372,9 +379,11 @@ def drill(args: Namespace) -> Tuple[int, int]:
|
|
|
372
379
|
# then - credsweeper
|
|
373
380
|
credsweeper = get_credsweeper(args)
|
|
374
381
|
# use flat iterations to avoid recursive limits
|
|
375
|
-
to_scan =
|
|
382
|
+
to_scan = set(commits_sha1)
|
|
376
383
|
# local speedup for already scanned commits - avoid file system interactive
|
|
377
384
|
scanned = set()
|
|
385
|
+
# to avoid double-check
|
|
386
|
+
skipped = set()
|
|
378
387
|
while to_scan:
|
|
379
388
|
commit_sha1 = to_scan.pop()
|
|
380
389
|
if commit_sha1 in scanned:
|
|
@@ -382,8 +391,8 @@ def drill(args: Namespace) -> Tuple[int, int]:
|
|
|
382
391
|
continue
|
|
383
392
|
commit = repo.commit(commit_sha1)
|
|
384
393
|
if commit.parents:
|
|
385
|
-
# add parents
|
|
386
|
-
to_scan.
|
|
394
|
+
# add parents only when they were not skipped or scanned previously
|
|
395
|
+
to_scan.update(x.hexsha for x in commit.parents if x.hexsha not in skipped and x.hexsha not in scanned)
|
|
387
396
|
# check whether the commit has been checked and the report is present
|
|
388
397
|
skip_already_scanned = False
|
|
389
398
|
if args.json_filename:
|
|
@@ -401,9 +410,10 @@ def drill(args: Namespace) -> Tuple[int, int]:
|
|
|
401
410
|
else:
|
|
402
411
|
credsweeper.xlsx_filename = xlsx_path
|
|
403
412
|
if skip_already_scanned:
|
|
404
|
-
|
|
413
|
+
skipped.add(commit_sha1)
|
|
414
|
+
logger.info("Skip already scanned commit: %s %s", commit_sha1, commit.committed_datetime.isoformat())
|
|
405
415
|
continue
|
|
406
|
-
logger.info("Scan commit: %s", commit_sha1)
|
|
416
|
+
logger.info("Scan commit: %s %s", commit_sha1, commit.committed_datetime.isoformat())
|
|
407
417
|
# prepare all files to scan in the commit with bytes->IO transformation to avoid a multiprocess issue
|
|
408
418
|
if providers := get_commit_providers(commit, repo):
|
|
409
419
|
credsweeper.credential_manager.candidates.clear()
|
|
@@ -52,11 +52,12 @@ class CredSweeper:
|
|
|
52
52
|
use_filters: bool = True,
|
|
53
53
|
pool_count: int = 1,
|
|
54
54
|
ml_batch_size: Optional[int] = None,
|
|
55
|
-
ml_threshold: Union[float, ThresholdPreset] = ThresholdPreset.medium,
|
|
55
|
+
ml_threshold: Union[int, float, ThresholdPreset] = ThresholdPreset.medium,
|
|
56
56
|
ml_config: Union[None, str, Path] = None,
|
|
57
57
|
ml_model: Union[None, str, Path] = None,
|
|
58
58
|
ml_providers: Optional[str] = None,
|
|
59
59
|
find_by_ext: bool = False,
|
|
60
|
+
pedantic: bool = False,
|
|
60
61
|
depth: int = 0,
|
|
61
62
|
doc: bool = False,
|
|
62
63
|
severity: Union[Severity, str] = Severity.INFO,
|
|
@@ -86,6 +87,7 @@ class CredSweeper:
|
|
|
86
87
|
ml_model: str or Path to set custom ml model
|
|
87
88
|
ml_providers: str - comma separated list with providers
|
|
88
89
|
find_by_ext: boolean - files will be reported by extension
|
|
90
|
+
pedantic: boolean - scan all files
|
|
89
91
|
depth: int - how deep container files will be scanned
|
|
90
92
|
doc: boolean - document-specific scanning
|
|
91
93
|
severity: Severity - minimum severity level of rule
|
|
@@ -103,6 +105,7 @@ class CredSweeper:
|
|
|
103
105
|
config_dict = self._get_config_dict(config_path=config_path,
|
|
104
106
|
use_filters=use_filters,
|
|
105
107
|
find_by_ext=find_by_ext,
|
|
108
|
+
pedantic=pedantic,
|
|
106
109
|
depth=depth,
|
|
107
110
|
doc=doc,
|
|
108
111
|
severity=_severity,
|
|
@@ -145,6 +148,7 @@ class CredSweeper:
|
|
|
145
148
|
config_path: Optional[str], #
|
|
146
149
|
use_filters: bool, #
|
|
147
150
|
find_by_ext: bool, #
|
|
151
|
+
pedantic: bool, #
|
|
148
152
|
depth: int, #
|
|
149
153
|
doc: bool, #
|
|
150
154
|
severity: Severity, #
|
|
@@ -155,6 +159,7 @@ class CredSweeper:
|
|
|
155
159
|
config_dict["use_filters"] = use_filters
|
|
156
160
|
config_dict["find_by_ext"] = find_by_ext
|
|
157
161
|
config_dict["size_limit"] = size_limit
|
|
162
|
+
config_dict["pedantic"] = pedantic
|
|
158
163
|
config_dict["depth"] = depth
|
|
159
164
|
config_dict["doc"] = doc
|
|
160
165
|
config_dict["severity"] = severity.value
|
|
@@ -169,7 +174,7 @@ class CredSweeper:
|
|
|
169
174
|
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
|
|
170
175
|
|
|
171
176
|
def _use_ml_validation(self) -> bool:
|
|
172
|
-
if isinstance(self.ml_threshold,
|
|
177
|
+
if isinstance(self.ml_threshold, int) and 0 == self.ml_threshold:
|
|
173
178
|
logger.info("ML validation is disabled")
|
|
174
179
|
return False
|
|
175
180
|
if not self.credential_manager.candidates:
|
|
@@ -3,7 +3,10 @@ import re
|
|
|
3
3
|
|
|
4
4
|
class KeywordPattern:
|
|
5
5
|
"""Pattern set of keyword types"""
|
|
6
|
-
directive = r"(?P<directive>(?:
|
|
6
|
+
directive = r"(?P<directive>(?:" \
|
|
7
|
+
r"(?:[#%]define|define(?=(\s|\\{1,8}[tnr])*\()|%global)" \
|
|
8
|
+
r"(?:\s?\(|\s|\\{1,8}[tnr]){1,8}|\bset(?=\b|\w*(\s|\\{1,8}[tnr])*\()" \
|
|
9
|
+
r"))?"
|
|
7
10
|
key_left = r"(?:\\[nrt]|(\\\\*u00|%)[0-9a-f]{2}|\s)*" \
|
|
8
11
|
r"(?P<variable>(([\"'`]{1,8}[^:=\"'`}<>\\/&?]*|[^:=\"'`}<>\s()\\/&?;,%]*)"
|
|
9
12
|
# keyword will be inserted here
|
|
@@ -13,7 +16,7 @@ class KeywordPattern:
|
|
|
13
16
|
r")" # <variable>
|
|
14
17
|
separator = r"(?(directive)|(\s|\\{1,8}[tnr])*\]?(\s|\\{1,8}[tnr])*)" \
|
|
15
18
|
r"(?P<separator>:(\s[a-z]{3,9}[?]?\s)?=|:(?!:)|=(>|>|(\\\\*u00|%)26gt;)|!==|!=|===|==|=~|=" \
|
|
16
|
-
r"|(?(directive)(
|
|
19
|
+
r"|(?(directive)(,|\\t|\s|\((?!\))){1,80}|%3d))" \
|
|
17
20
|
r"(\s|\\{1,8}[tnr])*"
|
|
18
21
|
# might be curly, square or parenthesis with words before
|
|
19
22
|
wrap = r"(?P<wrap>(" \
|
|
@@ -23,7 +26,7 @@ class KeywordPattern:
|
|
|
23
26
|
r"\s*" \
|
|
24
27
|
r"(\[(?!\])|\((?!\))|\{(?!\}))" \
|
|
25
28
|
r"(\s|\\{1,8}[tnr])*" \
|
|
26
|
-
r"(?(get)('[^']
|
|
29
|
+
r"(?(get)('[^']{1,31}'|\"[^\"]{1,31}\")\s*,\s*|)" \
|
|
27
30
|
r"([0-9a-z_]{1,32}\s*[:=]\s*)?" \
|
|
28
31
|
r"){1,8})?"
|
|
29
32
|
string_prefix = r"(((b|r|br|rb|u|f|rf|fr|l|@)(?=(\\*[\"'`])))?"
|
|
@@ -14,11 +14,15 @@
|
|
|
14
14
|
/var
|
|
15
15
|
000
|
|
16
16
|
111
|
|
17
|
+
14159265
|
|
18
|
+
18284590
|
|
17
19
|
222
|
|
18
20
|
333
|
|
19
21
|
444
|
|
20
22
|
555
|
|
23
|
+
65358979
|
|
21
24
|
666
|
|
25
|
+
71828182
|
|
22
26
|
777
|
|
23
27
|
80211
|
|
24
28
|
888
|
|
@@ -195,7 +199,7 @@ aux
|
|
|
195
199
|
avail
|
|
196
200
|
avatar
|
|
197
201
|
aver
|
|
198
|
-
|
|
202
|
+
awesom
|
|
199
203
|
axis
|
|
200
204
|
azure
|
|
201
205
|
back
|
|
@@ -227,6 +231,7 @@ bind
|
|
|
227
231
|
bio
|
|
228
232
|
bipol
|
|
229
233
|
bit
|
|
234
|
+
bixby
|
|
230
235
|
black
|
|
231
236
|
blan
|
|
232
237
|
bless
|
|
@@ -497,6 +502,7 @@ dust
|
|
|
497
502
|
dvb
|
|
498
503
|
dynamic
|
|
499
504
|
dynamo
|
|
505
|
+
eadbee
|
|
500
506
|
easin
|
|
501
507
|
easy
|
|
502
508
|
ecdhe
|
|
@@ -789,6 +795,7 @@ jpg_
|
|
|
789
795
|
json
|
|
790
796
|
jump
|
|
791
797
|
justif
|
|
798
|
+
kafka
|
|
792
799
|
kerberos
|
|
793
800
|
kernel
|
|
794
801
|
key
|
|
@@ -798,6 +805,7 @@ kind
|
|
|
798
805
|
kinesis
|
|
799
806
|
kirk
|
|
800
807
|
know
|
|
808
|
+
knox
|
|
801
809
|
kris
|
|
802
810
|
lab
|
|
803
811
|
lag
|
|
@@ -1317,6 +1325,7 @@ sock
|
|
|
1317
1325
|
soft
|
|
1318
1326
|
solid
|
|
1319
1327
|
solve
|
|
1328
|
+
some
|
|
1320
1329
|
sony
|
|
1321
1330
|
sort
|
|
1322
1331
|
sound
|
|
@@ -1429,6 +1438,7 @@ tio
|
|
|
1429
1438
|
tish
|
|
1430
1439
|
title
|
|
1431
1440
|
titud
|
|
1441
|
+
tizen
|
|
1432
1442
|
tmp/
|
|
1433
1443
|
to_
|
|
1434
1444
|
tod
|
|
@@ -1440,6 +1450,7 @@ topic
|
|
|
1440
1450
|
tory
|
|
1441
1451
|
total
|
|
1442
1452
|
touch
|
|
1453
|
+
tour
|
|
1443
1454
|
trace
|
|
1444
1455
|
tract
|
|
1445
1456
|
traffic
|
|
@@ -1574,5 +1585,6 @@ you
|
|
|
1574
1585
|
zeppelin
|
|
1575
1586
|
zero
|
|
1576
1587
|
zing
|
|
1588
|
+
zigbee
|
|
1577
1589
|
zona
|
|
1578
1590
|
zorro
|
|
@@ -35,6 +35,7 @@ class Config:
|
|
|
35
35
|
self.candidate_output: List[str] = config["candidate_output"]
|
|
36
36
|
self.find_by_ext: bool = config["find_by_ext"]
|
|
37
37
|
self.size_limit: Optional[int] = parse_size(config["size_limit"]) if config["size_limit"] is not None else None
|
|
38
|
+
self.pedantic: bool = bool(config["pedantic"])
|
|
38
39
|
self.depth: int = int(config["depth"])
|
|
39
40
|
self.doc: bool = config["doc"]
|
|
40
41
|
self.severity: Severity = Severity.get(config.get("severity"))
|
|
@@ -163,6 +163,7 @@ class LineData:
|
|
|
163
163
|
self.clean_url_parameters()
|
|
164
164
|
self.clean_bash_parameters()
|
|
165
165
|
self.clean_toml_parameters()
|
|
166
|
+
self.clean_tag_parameters()
|
|
166
167
|
if 0 <= self.value_start and 0 <= self.value_end and len(self.value) < len(_value):
|
|
167
168
|
start = _value.find(self.value)
|
|
168
169
|
self.value_start += start
|
|
@@ -232,6 +233,21 @@ class LineData:
|
|
|
232
233
|
self.value = self.value[:-1]
|
|
233
234
|
cleaning_required = True
|
|
234
235
|
|
|
236
|
+
def clean_tag_parameters(self) -> None:
|
|
237
|
+
"""Remove closing tag from value if the opened is somewhere before in line"""
|
|
238
|
+
cleaning_required = self.value and self.value.endswith('>')
|
|
239
|
+
while cleaning_required:
|
|
240
|
+
closing_tag_pos = self.value.rfind("</")
|
|
241
|
+
if 0 <= closing_tag_pos:
|
|
242
|
+
# use `<a` to avoid tag parameters
|
|
243
|
+
opening_tag_prefix = f"<{self.value[closing_tag_pos + 2:-1]}"
|
|
244
|
+
if cleaning_required := (opening_tag_prefix not in self.value
|
|
245
|
+
and 0 <= self.line.find(opening_tag_prefix, 0, self.value_start)):
|
|
246
|
+
self.value = self.value[:closing_tag_pos]
|
|
247
|
+
cleaning_required = self.value and self.value.endswith('>')
|
|
248
|
+
else:
|
|
249
|
+
break
|
|
250
|
+
|
|
235
251
|
def sanitize_variable(self) -> None:
|
|
236
252
|
"""Remove trailing spaces, dashes and quotations around the variable. Correct position."""
|
|
237
253
|
sanitized_var_len = 0
|
|
@@ -1,7 +1,6 @@
|
|
|
1
1
|
import logging
|
|
2
2
|
from typing import List, Any, Tuple
|
|
3
3
|
|
|
4
|
-
from credsweeper.common.constants import MIN_DATA_LEN
|
|
5
4
|
from credsweeper.config.config import Config
|
|
6
5
|
from credsweeper.scanner.scanner import Scanner
|
|
7
6
|
from credsweeper.utils.util import Util
|
|
@@ -23,7 +22,9 @@ from .pdf_scanner import PdfScanner
|
|
|
23
22
|
from .pkcs_scanner import PkcsScanner
|
|
24
23
|
from .pptx_scanner import PptxScanner
|
|
25
24
|
from .rpm_scanner import RpmScanner
|
|
25
|
+
from .rtf_scanner import RtfScanner
|
|
26
26
|
from .sqlite3_scanner import Sqlite3Scanner
|
|
27
|
+
from .strings_scanner import StringsScanner
|
|
27
28
|
from .tar_scanner import TarScanner
|
|
28
29
|
from .tmx_scanner import TmxScanner
|
|
29
30
|
from .xlsx_scanner import XlsxScanner
|
|
@@ -49,8 +50,10 @@ class DeepScanner(
|
|
|
49
50
|
PdfScanner, #
|
|
50
51
|
PkcsScanner, #
|
|
51
52
|
PptxScanner, #
|
|
53
|
+
RtfScanner, #
|
|
52
54
|
RpmScanner, #
|
|
53
55
|
Sqlite3Scanner, #
|
|
56
|
+
StringsScanner, #
|
|
54
57
|
TarScanner, #
|
|
55
58
|
DebScanner, #
|
|
56
59
|
XmlScanner, #
|
|
@@ -133,6 +136,9 @@ class DeepScanner(
|
|
|
133
136
|
deep_scanners.append(Sqlite3Scanner)
|
|
134
137
|
elif Util.is_asn1(data):
|
|
135
138
|
deep_scanners.append(PkcsScanner)
|
|
139
|
+
elif Util.is_rtf(data):
|
|
140
|
+
deep_scanners.append(RtfScanner)
|
|
141
|
+
fallback_scanners.append(ByteScanner)
|
|
136
142
|
elif Util.is_xml(data):
|
|
137
143
|
if Util.is_html(data):
|
|
138
144
|
deep_scanners.append(HtmlScanner)
|
|
@@ -150,7 +156,7 @@ class DeepScanner(
|
|
|
150
156
|
deep_scanners.append(XmlScanner)
|
|
151
157
|
fallback_scanners.append(ByteScanner)
|
|
152
158
|
elif Util.is_eml(data):
|
|
153
|
-
if ".eml"
|
|
159
|
+
if descriptor.extension in (".eml", ".mht"):
|
|
154
160
|
deep_scanners.append(EmlScanner)
|
|
155
161
|
else:
|
|
156
162
|
if 0 < depth:
|
|
@@ -158,9 +164,6 @@ class DeepScanner(
|
|
|
158
164
|
deep_scanners.append(PatchScanner)
|
|
159
165
|
fallback_scanners.append(EmlScanner)
|
|
160
166
|
fallback_scanners.append(ByteScanner)
|
|
161
|
-
elif Util.is_known(data):
|
|
162
|
-
# the format is known but cannot be scanned
|
|
163
|
-
pass
|
|
164
167
|
elif not Util.is_binary(data):
|
|
165
168
|
if 0 < depth:
|
|
166
169
|
deep_scanners.append(PatchScanner)
|
|
@@ -168,6 +171,9 @@ class DeepScanner(
|
|
|
168
171
|
deep_scanners.append(LangScanner)
|
|
169
172
|
deep_scanners.append(ByteScanner)
|
|
170
173
|
else:
|
|
171
|
-
|
|
172
|
-
|
|
174
|
+
if 0 < depth:
|
|
175
|
+
deep_scanners.append(StringsScanner)
|
|
176
|
+
else:
|
|
177
|
+
logger.warning("Cannot apply a deep scanner for type %s prefix %s %d", descriptor, repr(data[:32]),
|
|
178
|
+
len(data))
|
|
173
179
|
return deep_scanners, fallback_scanners
|
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
from abc import ABC
|
|
3
|
+
from typing import List, Optional
|
|
4
|
+
|
|
5
|
+
from striprtf import striprtf
|
|
6
|
+
|
|
7
|
+
from credsweeper.credentials.candidate import Candidate
|
|
8
|
+
from credsweeper.deep_scanner.abstract_scanner import AbstractScanner
|
|
9
|
+
from credsweeper.file_handler.data_content_provider import DataContentProvider
|
|
10
|
+
from credsweeper.file_handler.string_content_provider import StringContentProvider
|
|
11
|
+
from credsweeper.utils.util import Util
|
|
12
|
+
|
|
13
|
+
logger = logging.getLogger(__name__)
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
class RtfScanner(AbstractScanner, ABC):
|
|
17
|
+
"""Implements squash file system scanning"""
|
|
18
|
+
|
|
19
|
+
@staticmethod
|
|
20
|
+
def get_lines(text: str) -> List[str]:
|
|
21
|
+
"""Extracts text lines from RTF format"""
|
|
22
|
+
rtf_text = striprtf.rtf_to_text(text)
|
|
23
|
+
lines = Util.split_text(rtf_text)
|
|
24
|
+
return lines
|
|
25
|
+
|
|
26
|
+
def data_scan(
|
|
27
|
+
self, #
|
|
28
|
+
data_provider: DataContentProvider, #
|
|
29
|
+
depth: int, #
|
|
30
|
+
recursive_limit_size: int) -> Optional[List[Candidate]]:
|
|
31
|
+
"""Scans data as RTF"""
|
|
32
|
+
try:
|
|
33
|
+
string_data_provider = StringContentProvider(lines=RtfScanner.get_lines(data_provider.text),
|
|
34
|
+
file_path=data_provider.file_path,
|
|
35
|
+
file_type=data_provider.file_type,
|
|
36
|
+
info=f"{data_provider.info}|RTF")
|
|
37
|
+
rtf_candidates = self.scanner.scan(string_data_provider)
|
|
38
|
+
return rtf_candidates
|
|
39
|
+
except Exception as rtf_exc:
|
|
40
|
+
logger.error(f"{data_provider.file_path}:{rtf_exc}")
|
|
41
|
+
return None
|
|
@@ -0,0 +1,52 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
from abc import ABC
|
|
3
|
+
from typing import List, Optional, Tuple
|
|
4
|
+
|
|
5
|
+
from credsweeper.common.constants import MIN_DATA_LEN
|
|
6
|
+
from credsweeper.credentials.candidate import Candidate
|
|
7
|
+
from credsweeper.deep_scanner.abstract_scanner import AbstractScanner
|
|
8
|
+
from credsweeper.file_handler.data_content_provider import DataContentProvider
|
|
9
|
+
from credsweeper.file_handler.string_content_provider import StringContentProvider
|
|
10
|
+
|
|
11
|
+
logger = logging.getLogger(__name__)
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
class StringsScanner(AbstractScanner, ABC):
|
|
15
|
+
"""Implements known binary file scanning with ASCII strings representations"""
|
|
16
|
+
|
|
17
|
+
@staticmethod
|
|
18
|
+
def get_strings(data: bytes) -> List[Tuple[str, int]]:
|
|
19
|
+
"""Processes binary to found ASCII strings. Use offset instead line number."""
|
|
20
|
+
strings = []
|
|
21
|
+
offset = 0
|
|
22
|
+
line = ''
|
|
23
|
+
for n, x in enumerate(data):
|
|
24
|
+
if 0x09 == x or 0x20 <= x <= 0x7E:
|
|
25
|
+
# TAB, SPACE and visible ASCII symbols
|
|
26
|
+
if not offset:
|
|
27
|
+
# for line number
|
|
28
|
+
offset = n
|
|
29
|
+
line += chr(x)
|
|
30
|
+
elif MIN_DATA_LEN <= len(line):
|
|
31
|
+
strings.append((line, offset))
|
|
32
|
+
offset = 0
|
|
33
|
+
line = ''
|
|
34
|
+
if MIN_DATA_LEN <= len(line):
|
|
35
|
+
strings.append((line, offset))
|
|
36
|
+
return strings
|
|
37
|
+
|
|
38
|
+
def data_scan(
|
|
39
|
+
self, #
|
|
40
|
+
data_provider: DataContentProvider, #
|
|
41
|
+
depth: int, #
|
|
42
|
+
recursive_limit_size: int) -> Optional[List[Candidate]]:
|
|
43
|
+
"""Extracts data file from .ar (debian) archive and launches data_scan"""
|
|
44
|
+
|
|
45
|
+
if strings := StringsScanner.get_strings(data_provider.data):
|
|
46
|
+
string_data_provider = StringContentProvider(lines=[x[0] for x in strings],
|
|
47
|
+
line_numbers=[x[1] for x in strings],
|
|
48
|
+
file_path=data_provider.file_path,
|
|
49
|
+
file_type=data_provider.file_type,
|
|
50
|
+
info=f"{data_provider.info}|STRINGS")
|
|
51
|
+
return self.scanner.scan(string_data_provider)
|
|
52
|
+
return None if strings is None else []
|
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
import logging
|
|
1
2
|
from functools import cached_property
|
|
2
3
|
from typing import List, Optional, Generator
|
|
3
4
|
|
|
@@ -5,6 +6,8 @@ from credsweeper.file_handler.analysis_target import AnalysisTarget
|
|
|
5
6
|
from credsweeper.file_handler.content_provider import ContentProvider
|
|
6
7
|
from credsweeper.utils.util import Util
|
|
7
8
|
|
|
9
|
+
logger = logging.getLogger(__name__)
|
|
10
|
+
|
|
8
11
|
|
|
9
12
|
class ByteContentProvider(ContentProvider):
|
|
10
13
|
"""Allow to scan byte sequence instead of extra reading a file"""
|
|
@@ -42,7 +45,13 @@ class ByteContentProvider(ContentProvider):
|
|
|
42
45
|
def lines(self) -> List[str]:
|
|
43
46
|
"""lines RO getter for ByteContentProvider"""
|
|
44
47
|
if self.__lines is None:
|
|
45
|
-
|
|
48
|
+
text = Util.decode_text(self.__data)
|
|
49
|
+
if text is None:
|
|
50
|
+
logger.warning("Binary data detected %s %s %s", self.file_path, self.info,
|
|
51
|
+
repr(self.__data[:32]) if isinstance(self.__data, bytes) else "NONE")
|
|
52
|
+
self.__lines = []
|
|
53
|
+
else:
|
|
54
|
+
self.__lines = Util.split_text(text)
|
|
46
55
|
return self.__lines if self.__lines is not None else []
|
|
47
56
|
|
|
48
57
|
def yield_analysis_target(self, min_len: int) -> Generator[AnalysisTarget, None, None]:
|
|
@@ -127,6 +127,8 @@ class FilePathExtractor:
|
|
|
127
127
|
Return:
|
|
128
128
|
True when the file full path should be excluded according config
|
|
129
129
|
"""
|
|
130
|
+
if config.pedantic:
|
|
131
|
+
return False
|
|
130
132
|
path = path.replace('\\', '/')
|
|
131
133
|
lower_path = path.lower()
|
|
132
134
|
if config.not_allowed_path_pattern.match(lower_path):
|
|
@@ -54,7 +54,13 @@ class TextContentProvider(ContentProvider):
|
|
|
54
54
|
def lines(self) -> Optional[List[str]]:
|
|
55
55
|
"""lines getter for TextContentProvider"""
|
|
56
56
|
if self.__lines is None:
|
|
57
|
-
|
|
57
|
+
text = Util.decode_text(self.data)
|
|
58
|
+
if text is None:
|
|
59
|
+
logger.warning("Binary file detected %s %s %s", self.file_path, self.info,
|
|
60
|
+
repr(self.__data[:32]) if isinstance(self.__data, bytes) else "NONE")
|
|
61
|
+
self.__lines = []
|
|
62
|
+
else:
|
|
63
|
+
self.__lines = Util.split_text(text)
|
|
58
64
|
return self.__lines if self.__lines is not None else []
|
|
59
65
|
|
|
60
66
|
def yield_analysis_target(self, min_len: int) -> Generator[AnalysisTarget, None, None]:
|
|
@@ -13,7 +13,6 @@ from credsweeper.filters.value_base64_part_check import ValueBase64PartCheck
|
|
|
13
13
|
from credsweeper.filters.value_basic_auth_check import ValueBasicAuthCheck
|
|
14
14
|
from credsweeper.filters.value_blocklist_check import ValueBlocklistCheck
|
|
15
15
|
from credsweeper.filters.value_camel_case_check import ValueCamelCaseCheck
|
|
16
|
-
from credsweeper.filters.value_couple_keyword_check import ValueCoupleKeywordCheck
|
|
17
16
|
from credsweeper.filters.value_dictionary_keyword_check import ValueDictionaryKeywordCheck
|
|
18
17
|
from credsweeper.filters.value_discord_bot_check import ValueDiscordBotCheck
|
|
19
18
|
from credsweeper.filters.value_entropy_base32_check import ValueEntropyBase32Check
|
|
@@ -30,6 +29,7 @@ from credsweeper.filters.value_json_web_token_check import ValueJsonWebTokenChec
|
|
|
30
29
|
from credsweeper.filters.value_last_word_check import ValueLastWordCheck
|
|
31
30
|
from credsweeper.filters.value_length_check import ValueLengthCheck
|
|
32
31
|
from credsweeper.filters.value_method_check import ValueMethodCheck
|
|
32
|
+
from credsweeper.filters.value_morphemes_check import ValueMorphemesCheck
|
|
33
33
|
from credsweeper.filters.value_not_allowed_pattern_check import ValueNotAllowedPatternCheck
|
|
34
34
|
from credsweeper.filters.value_not_part_encoded_check import ValueNotPartEncodedCheck
|
|
35
35
|
from credsweeper.filters.value_number_check import ValueNumberCheck
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
from credsweeper.common.constants import GroupType
|
|
2
2
|
from credsweeper.config.config import Config
|
|
3
|
-
from credsweeper.filters import
|
|
3
|
+
from credsweeper.filters import ValueMorphemesCheck, ValueCamelCaseCheck, ValueNumberCheck, ValuePatternCheck
|
|
4
4
|
from credsweeper.filters.group.group import Group
|
|
5
5
|
|
|
6
6
|
|
|
@@ -10,7 +10,7 @@ class TokenPattern(Group):
|
|
|
10
10
|
def __init__(self, config: Config) -> None:
|
|
11
11
|
super().__init__(config, GroupType.DEFAULT)
|
|
12
12
|
self.filters = [
|
|
13
|
-
|
|
13
|
+
ValueMorphemesCheck(),
|
|
14
14
|
ValueNumberCheck(),
|
|
15
15
|
ValueCamelCaseCheck(),
|
|
16
16
|
ValuePatternCheck(),
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
from credsweeper.common.constants import GroupType
|
|
2
2
|
from credsweeper.config.config import Config
|
|
3
|
-
from credsweeper.filters import
|
|
3
|
+
from credsweeper.filters import ValueMorphemesCheck, ValuePatternCheck, ValueNumberCheck, ValueEntropyBase36Check, \
|
|
4
4
|
ValueTokenBase36Check
|
|
5
5
|
from credsweeper.filters.group.group import Group
|
|
6
6
|
|
|
@@ -11,7 +11,7 @@ class WeirdBase36Token(Group):
|
|
|
11
11
|
def __init__(self, config: Config) -> None:
|
|
12
12
|
super().__init__(config, GroupType.DEFAULT)
|
|
13
13
|
self.filters = [
|
|
14
|
-
|
|
14
|
+
ValueMorphemesCheck(threshold=1),
|
|
15
15
|
ValuePatternCheck(),
|
|
16
16
|
ValueNumberCheck(),
|
|
17
17
|
ValueTokenBase36Check(),
|