credsweeper 1.10.7__tar.gz → 1.10.8__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of credsweeper might be problematic. Click here for more details.
- {credsweeper-1.10.7 → credsweeper-1.10.8}/PKG-INFO +4 -2
- {credsweeper-1.10.7 → credsweeper-1.10.8}/credsweeper/__init__.py +1 -1
- {credsweeper-1.10.7 → credsweeper-1.10.8}/credsweeper/common/keyword_pattern.py +1 -1
- {credsweeper-1.10.7 → credsweeper-1.10.8}/credsweeper/deep_scanner/bzip2_scanner.py +1 -1
- {credsweeper-1.10.7 → credsweeper-1.10.8}/credsweeper/deep_scanner/deep_scanner.py +77 -37
- {credsweeper-1.10.7 → credsweeper-1.10.8}/credsweeper/deep_scanner/gzip_scanner.py +1 -1
- {credsweeper-1.10.7 → credsweeper-1.10.8}/credsweeper/ml_model/features/word_in_path.py +4 -2
- {credsweeper-1.10.7 → credsweeper-1.10.8}/credsweeper/rules/config.yaml +32 -1
- {credsweeper-1.10.7 → credsweeper-1.10.8}/credsweeper/scanner/scanner.py +6 -1
- {credsweeper-1.10.7 → credsweeper-1.10.8}/credsweeper/secret/config.json +4 -1
- {credsweeper-1.10.7 → credsweeper-1.10.8}/credsweeper/utils/util.py +24 -8
- {credsweeper-1.10.7 → credsweeper-1.10.8}/pyproject.toml +3 -1
- {credsweeper-1.10.7 → credsweeper-1.10.8}/.gitignore +0 -0
- {credsweeper-1.10.7 → credsweeper-1.10.8}/LICENSE +0 -0
- {credsweeper-1.10.7 → credsweeper-1.10.8}/README.md +0 -0
- {credsweeper-1.10.7 → credsweeper-1.10.8}/credsweeper/__main__.py +0 -0
- {credsweeper-1.10.7 → credsweeper-1.10.8}/credsweeper/app.py +0 -0
- {credsweeper-1.10.7 → credsweeper-1.10.8}/credsweeper/common/__init__.py +0 -0
- {credsweeper-1.10.7 → credsweeper-1.10.8}/credsweeper/common/constants.py +0 -0
- {credsweeper-1.10.7 → credsweeper-1.10.8}/credsweeper/common/keyword_checklist.py +0 -0
- {credsweeper-1.10.7 → credsweeper-1.10.8}/credsweeper/common/keyword_checklist.txt +0 -0
- {credsweeper-1.10.7 → credsweeper-1.10.8}/credsweeper/common/morpheme_checklist.txt +0 -0
- {credsweeper-1.10.7 → credsweeper-1.10.8}/credsweeper/config/__init__.py +0 -0
- {credsweeper-1.10.7 → credsweeper-1.10.8}/credsweeper/config/config.py +0 -0
- {credsweeper-1.10.7 → credsweeper-1.10.8}/credsweeper/credentials/__init__.py +0 -0
- {credsweeper-1.10.7 → credsweeper-1.10.8}/credsweeper/credentials/augment_candidates.py +0 -0
- {credsweeper-1.10.7 → credsweeper-1.10.8}/credsweeper/credentials/candidate.py +0 -0
- {credsweeper-1.10.7 → credsweeper-1.10.8}/credsweeper/credentials/candidate_group_generator.py +0 -0
- {credsweeper-1.10.7 → credsweeper-1.10.8}/credsweeper/credentials/candidate_key.py +0 -0
- {credsweeper-1.10.7 → credsweeper-1.10.8}/credsweeper/credentials/credential_manager.py +0 -0
- {credsweeper-1.10.7 → credsweeper-1.10.8}/credsweeper/credentials/line_data.py +0 -0
- {credsweeper-1.10.7 → credsweeper-1.10.8}/credsweeper/deep_scanner/__init__.py +0 -0
- {credsweeper-1.10.7 → credsweeper-1.10.8}/credsweeper/deep_scanner/abstract_scanner.py +0 -0
- {credsweeper-1.10.7 → credsweeper-1.10.8}/credsweeper/deep_scanner/byte_scanner.py +0 -0
- {credsweeper-1.10.7 → credsweeper-1.10.8}/credsweeper/deep_scanner/docx_scanner.py +0 -0
- {credsweeper-1.10.7 → credsweeper-1.10.8}/credsweeper/deep_scanner/eml_scanner.py +0 -0
- {credsweeper-1.10.7 → credsweeper-1.10.8}/credsweeper/deep_scanner/encoder_scanner.py +0 -0
- {credsweeper-1.10.7 → credsweeper-1.10.8}/credsweeper/deep_scanner/html_scanner.py +0 -0
- {credsweeper-1.10.7 → credsweeper-1.10.8}/credsweeper/deep_scanner/jks_scanner.py +0 -0
- {credsweeper-1.10.7 → credsweeper-1.10.8}/credsweeper/deep_scanner/lang_scanner.py +0 -0
- {credsweeper-1.10.7 → credsweeper-1.10.8}/credsweeper/deep_scanner/mxfile_scanner.py +0 -0
- {credsweeper-1.10.7 → credsweeper-1.10.8}/credsweeper/deep_scanner/pdf_scanner.py +0 -0
- {credsweeper-1.10.7 → credsweeper-1.10.8}/credsweeper/deep_scanner/pkcs12_scanner.py +0 -0
- {credsweeper-1.10.7 → credsweeper-1.10.8}/credsweeper/deep_scanner/pptx_scanner.py +0 -0
- {credsweeper-1.10.7 → credsweeper-1.10.8}/credsweeper/deep_scanner/tar_scanner.py +0 -0
- {credsweeper-1.10.7 → credsweeper-1.10.8}/credsweeper/deep_scanner/xlsx_scanner.py +0 -0
- {credsweeper-1.10.7 → credsweeper-1.10.8}/credsweeper/deep_scanner/xml_scanner.py +0 -0
- {credsweeper-1.10.7 → credsweeper-1.10.8}/credsweeper/deep_scanner/zip_scanner.py +0 -0
- {credsweeper-1.10.7 → credsweeper-1.10.8}/credsweeper/file_handler/__init__.py +0 -0
- {credsweeper-1.10.7 → credsweeper-1.10.8}/credsweeper/file_handler/abstract_provider.py +0 -0
- {credsweeper-1.10.7 → credsweeper-1.10.8}/credsweeper/file_handler/analysis_target.py +0 -0
- {credsweeper-1.10.7 → credsweeper-1.10.8}/credsweeper/file_handler/byte_content_provider.py +0 -0
- {credsweeper-1.10.7 → credsweeper-1.10.8}/credsweeper/file_handler/content_provider.py +0 -0
- {credsweeper-1.10.7 → credsweeper-1.10.8}/credsweeper/file_handler/data_content_provider.py +0 -0
- {credsweeper-1.10.7 → credsweeper-1.10.8}/credsweeper/file_handler/descriptor.py +0 -0
- {credsweeper-1.10.7 → credsweeper-1.10.8}/credsweeper/file_handler/diff_content_provider.py +0 -0
- {credsweeper-1.10.7 → credsweeper-1.10.8}/credsweeper/file_handler/file_path_extractor.py +0 -0
- {credsweeper-1.10.7 → credsweeper-1.10.8}/credsweeper/file_handler/files_provider.py +0 -0
- {credsweeper-1.10.7 → credsweeper-1.10.8}/credsweeper/file_handler/patches_provider.py +0 -0
- {credsweeper-1.10.7 → credsweeper-1.10.8}/credsweeper/file_handler/string_content_provider.py +0 -0
- {credsweeper-1.10.7 → credsweeper-1.10.8}/credsweeper/file_handler/struct_content_provider.py +0 -0
- {credsweeper-1.10.7 → credsweeper-1.10.8}/credsweeper/file_handler/text_content_provider.py +0 -0
- {credsweeper-1.10.7 → credsweeper-1.10.8}/credsweeper/filters/__init__.py +0 -0
- {credsweeper-1.10.7 → credsweeper-1.10.8}/credsweeper/filters/filter.py +0 -0
- {credsweeper-1.10.7 → credsweeper-1.10.8}/credsweeper/filters/group/__init__.py +0 -0
- {credsweeper-1.10.7 → credsweeper-1.10.8}/credsweeper/filters/group/general_keyword.py +0 -0
- {credsweeper-1.10.7 → credsweeper-1.10.8}/credsweeper/filters/group/general_pattern.py +0 -0
- {credsweeper-1.10.7 → credsweeper-1.10.8}/credsweeper/filters/group/group.py +0 -0
- {credsweeper-1.10.7 → credsweeper-1.10.8}/credsweeper/filters/group/password_keyword.py +0 -0
- {credsweeper-1.10.7 → credsweeper-1.10.8}/credsweeper/filters/group/token_pattern.py +0 -0
- {credsweeper-1.10.7 → credsweeper-1.10.8}/credsweeper/filters/group/url_credentials_group.py +0 -0
- {credsweeper-1.10.7 → credsweeper-1.10.8}/credsweeper/filters/group/weird_base36_token.py +0 -0
- {credsweeper-1.10.7 → credsweeper-1.10.8}/credsweeper/filters/group/weird_base64_token.py +0 -0
- {credsweeper-1.10.7 → credsweeper-1.10.8}/credsweeper/filters/line_git_binary_check.py +0 -0
- {credsweeper-1.10.7 → credsweeper-1.10.8}/credsweeper/filters/line_specific_key_check.py +0 -0
- {credsweeper-1.10.7 → credsweeper-1.10.8}/credsweeper/filters/line_uue_part_check.py +0 -0
- {credsweeper-1.10.7 → credsweeper-1.10.8}/credsweeper/filters/value_allowlist_check.py +0 -0
- {credsweeper-1.10.7 → credsweeper-1.10.8}/credsweeper/filters/value_array_dictionary_check.py +0 -0
- {credsweeper-1.10.7 → credsweeper-1.10.8}/credsweeper/filters/value_atlassian_token_check.py +0 -0
- {credsweeper-1.10.7 → credsweeper-1.10.8}/credsweeper/filters/value_azure_token_check.py +0 -0
- {credsweeper-1.10.7 → credsweeper-1.10.8}/credsweeper/filters/value_base32_data_check.py +0 -0
- {credsweeper-1.10.7 → credsweeper-1.10.8}/credsweeper/filters/value_base64_data_check.py +0 -0
- {credsweeper-1.10.7 → credsweeper-1.10.8}/credsweeper/filters/value_base64_encoded_pem_check.py +0 -0
- {credsweeper-1.10.7 → credsweeper-1.10.8}/credsweeper/filters/value_base64_key_check.py +0 -0
- {credsweeper-1.10.7 → credsweeper-1.10.8}/credsweeper/filters/value_base64_part_check.py +0 -0
- {credsweeper-1.10.7 → credsweeper-1.10.8}/credsweeper/filters/value_blocklist_check.py +0 -0
- {credsweeper-1.10.7 → credsweeper-1.10.8}/credsweeper/filters/value_camel_case_check.py +0 -0
- {credsweeper-1.10.7 → credsweeper-1.10.8}/credsweeper/filters/value_couple_keyword_check.py +0 -0
- {credsweeper-1.10.7 → credsweeper-1.10.8}/credsweeper/filters/value_dictionary_keyword_check.py +0 -0
- {credsweeper-1.10.7 → credsweeper-1.10.8}/credsweeper/filters/value_dictionary_value_length_check.py +0 -0
- {credsweeper-1.10.7 → credsweeper-1.10.8}/credsweeper/filters/value_discord_bot_check.py +0 -0
- {credsweeper-1.10.7 → credsweeper-1.10.8}/credsweeper/filters/value_entropy_base32_check.py +0 -0
- {credsweeper-1.10.7 → credsweeper-1.10.8}/credsweeper/filters/value_entropy_base36_check.py +0 -0
- {credsweeper-1.10.7 → credsweeper-1.10.8}/credsweeper/filters/value_entropy_base64_check.py +0 -0
- {credsweeper-1.10.7 → credsweeper-1.10.8}/credsweeper/filters/value_file_path_check.py +0 -0
- {credsweeper-1.10.7 → credsweeper-1.10.8}/credsweeper/filters/value_first_word_check.py +0 -0
- {credsweeper-1.10.7 → credsweeper-1.10.8}/credsweeper/filters/value_github_check.py +0 -0
- {credsweeper-1.10.7 → credsweeper-1.10.8}/credsweeper/filters/value_grafana_check.py +0 -0
- {credsweeper-1.10.7 → credsweeper-1.10.8}/credsweeper/filters/value_grafana_service_check.py +0 -0
- {credsweeper-1.10.7 → credsweeper-1.10.8}/credsweeper/filters/value_hex_number_check.py +0 -0
- {credsweeper-1.10.7 → credsweeper-1.10.8}/credsweeper/filters/value_jfrog_token_check.py +0 -0
- {credsweeper-1.10.7 → credsweeper-1.10.8}/credsweeper/filters/value_json_web_token_check.py +0 -0
- {credsweeper-1.10.7 → credsweeper-1.10.8}/credsweeper/filters/value_last_word_check.py +0 -0
- {credsweeper-1.10.7 → credsweeper-1.10.8}/credsweeper/filters/value_method_check.py +0 -0
- {credsweeper-1.10.7 → credsweeper-1.10.8}/credsweeper/filters/value_not_allowed_pattern_check.py +0 -0
- {credsweeper-1.10.7 → credsweeper-1.10.8}/credsweeper/filters/value_not_part_encoded_check.py +0 -0
- {credsweeper-1.10.7 → credsweeper-1.10.8}/credsweeper/filters/value_number_check.py +0 -0
- {credsweeper-1.10.7 → credsweeper-1.10.8}/credsweeper/filters/value_pattern_check.py +0 -0
- {credsweeper-1.10.7 → credsweeper-1.10.8}/credsweeper/filters/value_similarity_check.py +0 -0
- {credsweeper-1.10.7 → credsweeper-1.10.8}/credsweeper/filters/value_split_keyword_check.py +0 -0
- {credsweeper-1.10.7 → credsweeper-1.10.8}/credsweeper/filters/value_string_type_check.py +0 -0
- {credsweeper-1.10.7 → credsweeper-1.10.8}/credsweeper/filters/value_token_base32_check.py +0 -0
- {credsweeper-1.10.7 → credsweeper-1.10.8}/credsweeper/filters/value_token_base36_check.py +0 -0
- {credsweeper-1.10.7 → credsweeper-1.10.8}/credsweeper/filters/value_token_base64_check.py +0 -0
- {credsweeper-1.10.7 → credsweeper-1.10.8}/credsweeper/filters/value_token_base_check.py +0 -0
- {credsweeper-1.10.7 → credsweeper-1.10.8}/credsweeper/filters/value_token_check.py +0 -0
- {credsweeper-1.10.7 → credsweeper-1.10.8}/credsweeper/logger/__init__.py +0 -0
- {credsweeper-1.10.7 → credsweeper-1.10.8}/credsweeper/logger/logger.py +0 -0
- {credsweeper-1.10.7 → credsweeper-1.10.8}/credsweeper/ml_model/__init__.py +0 -0
- {credsweeper-1.10.7 → credsweeper-1.10.8}/credsweeper/ml_model/features/__init__.py +0 -0
- {credsweeper-1.10.7 → credsweeper-1.10.8}/credsweeper/ml_model/features/entropy_evaluation.py +0 -0
- {credsweeper-1.10.7 → credsweeper-1.10.8}/credsweeper/ml_model/features/feature.py +0 -0
- {credsweeper-1.10.7 → credsweeper-1.10.8}/credsweeper/ml_model/features/file_extension.py +0 -0
- {credsweeper-1.10.7 → credsweeper-1.10.8}/credsweeper/ml_model/features/has_html_tag.py +0 -0
- {credsweeper-1.10.7 → credsweeper-1.10.8}/credsweeper/ml_model/features/is_secret_numeric.py +0 -0
- {credsweeper-1.10.7 → credsweeper-1.10.8}/credsweeper/ml_model/features/length_of_attribute.py +0 -0
- {credsweeper-1.10.7 → credsweeper-1.10.8}/credsweeper/ml_model/features/morpheme_dense.py +0 -0
- {credsweeper-1.10.7 → credsweeper-1.10.8}/credsweeper/ml_model/features/rule_name.py +0 -0
- {credsweeper-1.10.7 → credsweeper-1.10.8}/credsweeper/ml_model/features/search_in_attribute.py +0 -0
- {credsweeper-1.10.7 → credsweeper-1.10.8}/credsweeper/ml_model/features/word_in.py +0 -0
- {credsweeper-1.10.7 → credsweeper-1.10.8}/credsweeper/ml_model/features/word_in_line.py +0 -0
- {credsweeper-1.10.7 → credsweeper-1.10.8}/credsweeper/ml_model/features/word_in_value.py +0 -0
- {credsweeper-1.10.7 → credsweeper-1.10.8}/credsweeper/ml_model/features/word_in_variable.py +0 -0
- {credsweeper-1.10.7 → credsweeper-1.10.8}/credsweeper/ml_model/ml_config.json +0 -0
- {credsweeper-1.10.7 → credsweeper-1.10.8}/credsweeper/ml_model/ml_model.onnx +0 -0
- {credsweeper-1.10.7 → credsweeper-1.10.8}/credsweeper/ml_model/ml_validator.py +0 -0
- {credsweeper-1.10.7 → credsweeper-1.10.8}/credsweeper/py.typed +0 -0
- {credsweeper-1.10.7 → credsweeper-1.10.8}/credsweeper/rules/__init__.py +0 -0
- {credsweeper-1.10.7 → credsweeper-1.10.8}/credsweeper/rules/rule.py +0 -0
- {credsweeper-1.10.7 → credsweeper-1.10.8}/credsweeper/scanner/__init__.py +0 -0
- {credsweeper-1.10.7 → credsweeper-1.10.8}/credsweeper/scanner/scan_type/__init__.py +0 -0
- {credsweeper-1.10.7 → credsweeper-1.10.8}/credsweeper/scanner/scan_type/multi_pattern.py +0 -0
- {credsweeper-1.10.7 → credsweeper-1.10.8}/credsweeper/scanner/scan_type/pem_key_pattern.py +0 -0
- {credsweeper-1.10.7 → credsweeper-1.10.8}/credsweeper/scanner/scan_type/scan_type.py +0 -0
- {credsweeper-1.10.7 → credsweeper-1.10.8}/credsweeper/scanner/scan_type/single_pattern.py +0 -0
- {credsweeper-1.10.7 → credsweeper-1.10.8}/credsweeper/secret/log.yaml +0 -0
- {credsweeper-1.10.7 → credsweeper-1.10.8}/credsweeper/utils/__init__.py +0 -0
- {credsweeper-1.10.7 → credsweeper-1.10.8}/credsweeper/utils/entropy_validator.py +0 -0
- {credsweeper-1.10.7 → credsweeper-1.10.8}/credsweeper/utils/hop_stat.py +0 -0
- {credsweeper-1.10.7 → credsweeper-1.10.8}/credsweeper/utils/pem_key_detector.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: credsweeper
|
|
3
|
-
Version: 1.10.
|
|
3
|
+
Version: 1.10.8
|
|
4
4
|
Summary: Credential Sweeper
|
|
5
5
|
Project-URL: Homepage, https://github.com/Samsung/CredSweeper
|
|
6
6
|
Project-URL: Bug Tracker, https://github.com/Samsung/CredSweeper/issues
|
|
@@ -26,7 +26,8 @@ Requires-Dist: humanfriendly
|
|
|
26
26
|
Requires-Dist: lxml
|
|
27
27
|
Requires-Dist: numpy<2.0.0
|
|
28
28
|
Requires-Dist: odfpy
|
|
29
|
-
Requires-Dist: onnxruntime
|
|
29
|
+
Requires-Dist: onnxruntime; platform_system != 'Windows'
|
|
30
|
+
Requires-Dist: onnxruntime==1.19.2; platform_system == 'Windows'
|
|
30
31
|
Requires-Dist: openpyxl
|
|
31
32
|
Requires-Dist: pandas
|
|
32
33
|
Requires-Dist: pdfminer-six
|
|
@@ -37,6 +38,7 @@ Requires-Dist: python-docx
|
|
|
37
38
|
Requires-Dist: python-pptx
|
|
38
39
|
Requires-Dist: pyyaml
|
|
39
40
|
Requires-Dist: whatthepatch
|
|
41
|
+
Requires-Dist: xlrd
|
|
40
42
|
Description-Content-Type: text/markdown
|
|
41
43
|
|
|
42
44
|
# CredSweeper
|
|
@@ -26,7 +26,7 @@ class KeywordPattern:
|
|
|
26
26
|
string_prefix = r"(((b|r|br|rb|u|f|rf|fr|l|@)(?=(\\*[`'\"])))?"
|
|
27
27
|
left_quote = r"(?P<value_leftquote>((?P<esq>\\{1,8})?([`'\"]|&(quot|apos);)){1,4}))?"
|
|
28
28
|
# Authentication scheme ( oauth | basic | bearer | apikey ) precedes to credential
|
|
29
|
-
auth_keywords = r"(\s?(oauth|bot|basic|bearer|apikey|accesskey)\s)?"
|
|
29
|
+
auth_keywords = r"(\s?(oauth|bot|basic|bearer|apikey|accesskey|ssws|ntlm)\s)?"
|
|
30
30
|
value = r"(?P<value>" \
|
|
31
31
|
r"(?(value_leftquote)" \
|
|
32
32
|
r"(" \
|
|
@@ -29,7 +29,7 @@ class Bzip2Scanner(AbstractScanner, ABC):
|
|
|
29
29
|
bzip2_content_provider = DataContentProvider(data=bz2.decompress(data_provider.data),
|
|
30
30
|
file_path=new_path,
|
|
31
31
|
file_type=Util.get_extension(new_path),
|
|
32
|
-
info=f"{data_provider.info}|BZIP2:{
|
|
32
|
+
info=f"{data_provider.info}|BZIP2:{file_path}")
|
|
33
33
|
new_limit = recursive_limit_size - len(bzip2_content_provider.data)
|
|
34
34
|
bzip2_candidates = self.recursive_scan(bzip2_content_provider, depth, new_limit)
|
|
35
35
|
return bzip2_candidates
|
|
@@ -76,17 +76,32 @@ class DeepScanner(
|
|
|
76
76
|
return self.__scanner
|
|
77
77
|
|
|
78
78
|
@staticmethod
|
|
79
|
-
def get_deep_scanners(data: bytes, file_type: str, depth: int) -> List[Any]:
|
|
80
|
-
"""Returns possibly scan methods for the data depends on content"""
|
|
79
|
+
def get_deep_scanners(data: bytes, file_type: str, depth: int) -> Tuple[List[Any], List[Any]]:
|
|
80
|
+
"""Returns possibly scan methods for the data depends on content and fallback scanners"""
|
|
81
81
|
deep_scanners: List[Any] = []
|
|
82
|
+
fallback_scanners: List[Any] = []
|
|
82
83
|
if Util.is_zip(data):
|
|
83
84
|
if 0 < depth:
|
|
84
85
|
deep_scanners.append(ZipScanner)
|
|
85
|
-
# probably, there might be a docx,
|
|
86
|
+
# probably, there might be a docx, xlsx and so on.
|
|
86
87
|
# It might be scanned with text representation in third-party libraries.
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
88
|
+
if file_type in (".xlsx", ".ods"):
|
|
89
|
+
deep_scanners.append(XlsxScanner)
|
|
90
|
+
else:
|
|
91
|
+
fallback_scanners.append(XlsxScanner)
|
|
92
|
+
if ".docx" == file_type:
|
|
93
|
+
deep_scanners.append(DocxScanner)
|
|
94
|
+
else:
|
|
95
|
+
fallback_scanners.append(DocxScanner)
|
|
96
|
+
if ".pptx" == file_type:
|
|
97
|
+
deep_scanners.append(PptxScanner)
|
|
98
|
+
else:
|
|
99
|
+
fallback_scanners.append(PptxScanner)
|
|
100
|
+
elif Util.is_com(data):
|
|
101
|
+
if ".xls" == file_type:
|
|
102
|
+
deep_scanners.append(XlsxScanner)
|
|
103
|
+
else:
|
|
104
|
+
fallback_scanners.append(XlsxScanner)
|
|
90
105
|
elif Util.is_bzip2(data):
|
|
91
106
|
if 0 < depth:
|
|
92
107
|
deep_scanners.append(Bzip2Scanner)
|
|
@@ -102,25 +117,67 @@ class DeepScanner(
|
|
|
102
117
|
deep_scanners.append(JksScanner)
|
|
103
118
|
elif Util.is_asn1(data):
|
|
104
119
|
deep_scanners.append(Pkcs12Scanner)
|
|
105
|
-
elif file_type in [".eml", ".mht"]:
|
|
106
|
-
if Util.is_eml(data):
|
|
107
|
-
deep_scanners.append(EmlScanner)
|
|
108
|
-
elif Util.is_xml(data) and Util.is_html(data):
|
|
109
|
-
deep_scanners.append(HtmlScanner)
|
|
110
|
-
else:
|
|
111
|
-
deep_scanners.append(ByteScanner)
|
|
112
120
|
elif Util.is_xml(data):
|
|
113
121
|
if Util.is_html(data):
|
|
114
122
|
deep_scanners.append(HtmlScanner)
|
|
123
|
+
deep_scanners.append(XmlScanner)
|
|
124
|
+
fallback_scanners.append(ByteScanner)
|
|
115
125
|
elif Util.is_mxfile(data):
|
|
116
126
|
deep_scanners.append(MxfileScanner)
|
|
117
|
-
|
|
118
|
-
|
|
127
|
+
deep_scanners.append(XmlScanner)
|
|
128
|
+
fallback_scanners.append(ByteScanner)
|
|
129
|
+
else:
|
|
130
|
+
deep_scanners.append(XmlScanner)
|
|
131
|
+
fallback_scanners.append(ByteScanner)
|
|
132
|
+
elif Util.is_eml(data):
|
|
133
|
+
if ".eml" == file_type:
|
|
134
|
+
deep_scanners.append(EmlScanner)
|
|
135
|
+
else:
|
|
136
|
+
fallback_scanners.append(EmlScanner)
|
|
137
|
+
fallback_scanners.append(ByteScanner)
|
|
138
|
+
elif not Util.is_binary(data):
|
|
119
139
|
if 0 < depth:
|
|
120
140
|
deep_scanners.append(EncoderScanner)
|
|
121
141
|
deep_scanners.append(LangScanner)
|
|
122
142
|
deep_scanners.append(ByteScanner)
|
|
123
|
-
|
|
143
|
+
else:
|
|
144
|
+
logger.warning("Cannot apply a deep scanner for type %s", file_type)
|
|
145
|
+
return deep_scanners, fallback_scanners
|
|
146
|
+
|
|
147
|
+
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
|
|
148
|
+
|
|
149
|
+
def deep_scan_with_fallback(self, data_provider: DataContentProvider, depth: int,
|
|
150
|
+
recursive_limit_size: int) -> List[Candidate]:
|
|
151
|
+
"""Scans with deep scanners and fallback scanners if possible
|
|
152
|
+
|
|
153
|
+
Args:
|
|
154
|
+
data_provider: DataContentProvider with raw data
|
|
155
|
+
depth: maximal level of recursion
|
|
156
|
+
recursive_limit_size: maximal bytes of opened files to prevent recursive zip-bomb attack
|
|
157
|
+
|
|
158
|
+
Returns: list with candidates
|
|
159
|
+
|
|
160
|
+
"""
|
|
161
|
+
candidates: List[Candidate] = []
|
|
162
|
+
deep_scanners, fallback_scanners = self.get_deep_scanners(data_provider.data, data_provider.file_type, depth)
|
|
163
|
+
fallback = True
|
|
164
|
+
for scan_class in deep_scanners:
|
|
165
|
+
new_candidates = scan_class.data_scan(self, data_provider, depth, recursive_limit_size)
|
|
166
|
+
if new_candidates is None:
|
|
167
|
+
# scanner did not recognise the content type
|
|
168
|
+
continue
|
|
169
|
+
augment_candidates(candidates, new_candidates)
|
|
170
|
+
# this scan is successful, so fallback is not necessary
|
|
171
|
+
fallback = False
|
|
172
|
+
if fallback:
|
|
173
|
+
for scan_class in deep_scanners:
|
|
174
|
+
fallback_candidates = scan_class.data_scan(self, data_provider, depth, recursive_limit_size)
|
|
175
|
+
if fallback_candidates is None:
|
|
176
|
+
continue
|
|
177
|
+
augment_candidates(candidates, fallback_candidates)
|
|
178
|
+
# use only first successful fallback scanner
|
|
179
|
+
break
|
|
180
|
+
return candidates
|
|
124
181
|
|
|
125
182
|
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
|
|
126
183
|
|
|
@@ -160,17 +217,8 @@ class DeepScanner(
|
|
|
160
217
|
file_path=content_provider.file_path,
|
|
161
218
|
file_type=content_provider.file_type,
|
|
162
219
|
info=content_provider.info or info)
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
fallback = True
|
|
166
|
-
for scan_class in scanner_classes:
|
|
167
|
-
if new_candidates := scan_class.data_scan(self, data_provider, depth, recursive_limit_size - len(data)):
|
|
168
|
-
augment_candidates(candidates, new_candidates)
|
|
169
|
-
fallback = False
|
|
170
|
-
if fallback and ByteScanner not in scanner_classes and not Util.is_binary(data):
|
|
171
|
-
# wrong assumption case
|
|
172
|
-
fallback_candidates = ByteScanner.data_scan(self, data_provider, depth, recursive_limit_size)
|
|
173
|
-
augment_candidates(candidates, fallback_candidates)
|
|
220
|
+
new_candidates = self.deep_scan_with_fallback(data_provider, depth, recursive_limit_size - len(data))
|
|
221
|
+
augment_candidates(candidates, new_candidates)
|
|
174
222
|
return candidates
|
|
175
223
|
|
|
176
224
|
def recursive_scan(
|
|
@@ -203,16 +251,8 @@ class DeepScanner(
|
|
|
203
251
|
FilePathExtractor.FIND_BY_EXT_RULE)
|
|
204
252
|
candidates.append(dummy_candidate)
|
|
205
253
|
else:
|
|
206
|
-
|
|
207
|
-
|
|
208
|
-
scanner_classes = self.get_deep_scanners(data_provider.data, data_provider.file_type, depth)
|
|
209
|
-
for scanner_class in scanner_classes:
|
|
210
|
-
if new_candidates := scanner_class.data_scan(self, data_provider, depth, recursive_limit_size):
|
|
211
|
-
augment_candidates(candidates, new_candidates)
|
|
212
|
-
fallback = False
|
|
213
|
-
if fallback and ByteScanner not in scanner_classes and not Util.is_binary(data_provider.data):
|
|
214
|
-
bypass_candidates = ByteScanner.data_scan(self, data_provider, depth, recursive_limit_size)
|
|
215
|
-
augment_candidates(candidates, bypass_candidates)
|
|
254
|
+
new_candidates = self.deep_scan_with_fallback(data_provider, depth, recursive_limit_size)
|
|
255
|
+
augment_candidates(candidates, new_candidates)
|
|
216
256
|
|
|
217
257
|
return candidates
|
|
218
258
|
|
|
@@ -31,7 +31,7 @@ class GzipScanner(AbstractScanner, ABC):
|
|
|
31
31
|
gzip_content_provider = DataContentProvider(data=f.read(),
|
|
32
32
|
file_path=new_path,
|
|
33
33
|
file_type=Util.get_extension(new_path),
|
|
34
|
-
info=f"{data_provider.info}|GZIP:{
|
|
34
|
+
info=f"{data_provider.info}|GZIP:{file_path}")
|
|
35
35
|
new_limit = recursive_limit_size - len(gzip_content_provider.data)
|
|
36
36
|
gzip_candidates = self.recursive_scan(gzip_content_provider, depth, new_limit)
|
|
37
37
|
return gzip_candidates
|
|
@@ -21,8 +21,10 @@ class WordInPath(WordIn):
|
|
|
21
21
|
|
|
22
22
|
def __call__(self, candidates: List[Candidate]) -> np.ndarray:
|
|
23
23
|
# actually there must be one path because the candidates are grouped before
|
|
24
|
-
if
|
|
25
|
-
|
|
24
|
+
if file_path := candidates[0].line_data_list[0].path:
|
|
25
|
+
path = Path(file_path)
|
|
26
|
+
# apply ./ for normalised path to detect "/src" for relative path
|
|
27
|
+
posix_lower_path = path.as_posix().lower() if path.is_absolute() else f"./{path.as_posix().lower()}"
|
|
26
28
|
return self.word_in_str(posix_lower_path)
|
|
27
29
|
else:
|
|
28
30
|
return np.array([np.zeros(shape=[self.dimension], dtype=np.int8)])
|
|
@@ -576,7 +576,7 @@
|
|
|
576
576
|
confidence: strong
|
|
577
577
|
type: pattern
|
|
578
578
|
values:
|
|
579
|
-
- (?:(?<![0-9A-Za-z_-])|\\[0abfnrtv]|(%|\\x)[0-9A-Fa-f]{2}|\\[0-7]{3}|\\[Uu]([0-9A-Fa-f]{4}){1,2}|\x1B\[[0-9;]{0,80}m)(?P<value>xox[
|
|
579
|
+
- (?:(?<![0-9A-Za-z_-])|\\[0abfnrtv]|(%|\\x)[0-9A-Fa-f]{2}|\\[0-7]{3}|\\[Uu]([0-9A-Fa-f]{4}){1,2}|\x1B\[[0-9;]{0,80}m)(?P<value>xox[a-z]\-[0-9A-Za-z-]{10,250})(?![0-9A-Za-z_-])
|
|
580
580
|
filter_type: GeneralPattern
|
|
581
581
|
required_substrings:
|
|
582
582
|
- xox
|
|
@@ -1407,6 +1407,37 @@
|
|
|
1407
1407
|
- code
|
|
1408
1408
|
- doc
|
|
1409
1409
|
|
|
1410
|
+
- name: Sentry Organization Auth Token
|
|
1411
|
+
severity: high
|
|
1412
|
+
confidence: strong
|
|
1413
|
+
type: pattern
|
|
1414
|
+
values:
|
|
1415
|
+
- (?:(?<![0-9A-Za-z_-])|\\[0abfnrtv]|(%|\\x)[0-9A-Fa-f]{2}|\\[0-7]{3}|\\[Uu]([0-9A-Fa-f]{4}){1,2}|\x1B\[[0-9;]{0,80}m)(?P<value>sntrys_eyJ[0-9A-Za-z_-]{80,8000}=*([0-9A-Za-z_-]{32,256})?)(?![0-9A-Za-z_-])
|
|
1416
|
+
min_line_len: 37
|
|
1417
|
+
filter_type:
|
|
1418
|
+
- ValuePatternCheck(5)
|
|
1419
|
+
- ValueEntropyBase64Check
|
|
1420
|
+
required_substrings:
|
|
1421
|
+
- sntrys_eyJ
|
|
1422
|
+
target:
|
|
1423
|
+
- code
|
|
1424
|
+
- doc
|
|
1425
|
+
|
|
1426
|
+
- name: Sentry User Auth Token
|
|
1427
|
+
severity: high
|
|
1428
|
+
confidence: strong
|
|
1429
|
+
type: pattern
|
|
1430
|
+
values:
|
|
1431
|
+
- (?:(?<![0-9A-Za-z_-])|\\[0abfnrtv]|(%|\\x)[0-9A-Fa-f]{2}|\\[0-7]{3}|\\[Uu]([0-9A-Fa-f]{4}){1,2}|\x1B\[[0-9;]{0,80}m)(?P<value>sntryu_[0-9a-f]{64})(?![0-9A-Za-z_-])
|
|
1432
|
+
min_line_len: 37
|
|
1433
|
+
filter_type:
|
|
1434
|
+
- ValuePatternCheck(5)
|
|
1435
|
+
required_substrings:
|
|
1436
|
+
- sntryu_
|
|
1437
|
+
target:
|
|
1438
|
+
- code
|
|
1439
|
+
- doc
|
|
1440
|
+
|
|
1410
1441
|
- name: Discord Bot Token
|
|
1411
1442
|
severity: high
|
|
1412
1443
|
confidence: strong
|
|
@@ -69,6 +69,7 @@ class Scanner:
|
|
|
69
69
|
rule_path = APP_PATH / "rules" / "config.yaml"
|
|
70
70
|
rule_templates = Util.yaml_load(rule_path)
|
|
71
71
|
if rule_templates and isinstance(rule_templates, list):
|
|
72
|
+
rule_names = set()
|
|
72
73
|
for rule_template in rule_templates:
|
|
73
74
|
try:
|
|
74
75
|
rule = Rule(self.config, rule_template)
|
|
@@ -77,6 +78,10 @@ class Scanner:
|
|
|
77
78
|
raise exc
|
|
78
79
|
if not self._is_available(rule):
|
|
79
80
|
continue
|
|
81
|
+
if rule.rule_name in rule_names:
|
|
82
|
+
raise RuntimeError(f"Duplicated rule name {rule.rule_name}")
|
|
83
|
+
else:
|
|
84
|
+
rule_names.add(rule.rule_name)
|
|
80
85
|
if 0 < rule.min_line_len:
|
|
81
86
|
if rule.rule_type == RuleType.KEYWORD:
|
|
82
87
|
self.min_keyword_len = min(self.min_keyword_len, rule.min_line_len)
|
|
@@ -141,7 +146,7 @@ class Scanner:
|
|
|
141
146
|
# "cache" - YAPF and pycharm formatters ...
|
|
142
147
|
matched_keyword = \
|
|
143
148
|
target_line_stripped_len >= self.min_keyword_len and ( #
|
|
144
|
-
|
|
149
|
+
'=' in target_line_stripped or ':' in target_line_stripped) #
|
|
145
150
|
matched_pem_key = \
|
|
146
151
|
target_line_stripped_len >= self.min_pem_key_len \
|
|
147
152
|
and PEM_BEGIN_PATTERN in target_line_stripped and "PRIVATE" in target_line_stripped
|
|
@@ -153,19 +153,26 @@ class Util:
|
|
|
153
153
|
return entropy < min_entropy
|
|
154
154
|
|
|
155
155
|
@staticmethod
|
|
156
|
-
def
|
|
156
|
+
def is_known(data: bytes) -> bool:
|
|
157
157
|
"""
|
|
158
158
|
Returns true if any recognized binary format found
|
|
159
|
-
or two zeroes sequence is found which never exists in text format (UTF-8, UTF-16)
|
|
160
|
-
UTF-32 is not supported
|
|
161
159
|
"""
|
|
162
160
|
if Util.is_zip(data) \
|
|
163
161
|
or Util.is_gzip(data) \
|
|
164
162
|
or Util.is_tar(data) \
|
|
165
163
|
or Util.is_bzip2(data) \
|
|
164
|
+
or Util.is_com(data) \
|
|
166
165
|
or Util.is_pdf(data) \
|
|
167
166
|
or Util.is_elf(data):
|
|
168
167
|
return True
|
|
168
|
+
return False
|
|
169
|
+
|
|
170
|
+
@staticmethod
|
|
171
|
+
def is_binary(data: bytes) -> bool:
|
|
172
|
+
"""
|
|
173
|
+
Returns True when two zeroes sequence is found which never exists in text format (UTF-8, UTF-16)
|
|
174
|
+
UTF-32 is not supported
|
|
175
|
+
"""
|
|
169
176
|
if 0 <= data.find(b"\0\0", 0, MAX_LINE_LENGTH):
|
|
170
177
|
return True
|
|
171
178
|
non_ascii_cnt = 0
|
|
@@ -224,7 +231,7 @@ class Util:
|
|
|
224
231
|
encodings = AVAILABLE_ENCODINGS
|
|
225
232
|
for encoding in encodings:
|
|
226
233
|
try:
|
|
227
|
-
if binary_suggest and LATIN_1 == encoding and Util.is_binary(content):
|
|
234
|
+
if binary_suggest and LATIN_1 == encoding and (Util.is_known(content) or Util.is_binary(content)):
|
|
228
235
|
# LATIN_1 may convert data (bytes in range 0x80:0xFF are transformed)
|
|
229
236
|
# so skip this encoding when checking binaries
|
|
230
237
|
logger.warning("Binary file detected")
|
|
@@ -390,6 +397,15 @@ class Util:
|
|
|
390
397
|
return False
|
|
391
398
|
return False
|
|
392
399
|
|
|
400
|
+
@staticmethod
|
|
401
|
+
def is_com(data: bytes) -> bool:
|
|
402
|
+
"""According https://en.wikipedia.org/wiki/List_of_file_signatures"""
|
|
403
|
+
if isinstance(data, bytes) and 8 < len(data):
|
|
404
|
+
if data.startswith(b"\xD0\xCF\x11\xE0\xA1\xB1\x1A\xE1"):
|
|
405
|
+
# Compound File Binary Format: doc, xls, ppt, msi, msg
|
|
406
|
+
return True
|
|
407
|
+
return False
|
|
408
|
+
|
|
393
409
|
@staticmethod
|
|
394
410
|
def is_tar(data: bytes) -> bool:
|
|
395
411
|
"""According https://en.wikipedia.org/wiki/List_of_file_signatures"""
|
|
@@ -520,10 +536,10 @@ class Util:
|
|
|
520
536
|
def is_eml(data: Union[bytes, bytearray]) -> bool:
|
|
521
537
|
"""According to https://datatracker.ietf.org/doc/html/rfc822 lookup the fields: Date, From, To or Subject"""
|
|
522
538
|
if isinstance(data, (bytes, bytearray)):
|
|
523
|
-
if (
|
|
524
|
-
and (b"\nFrom:" in data or data.startswith(b"From:"))
|
|
525
|
-
and (b"\nTo:" in data or data.startswith(b"To:")
|
|
526
|
-
|
|
539
|
+
if (b"\nDate:" in data or data.startswith(b"Date:")) \
|
|
540
|
+
and (b"\nFrom:" in data or data.startswith(b"From:")) \
|
|
541
|
+
and (b"\nTo:" in data or data.startswith(b"To:")) \
|
|
542
|
+
and (b"\nSubject:" in data or data.startswith(b"Subject:")):
|
|
527
543
|
return True
|
|
528
544
|
return False
|
|
529
545
|
|
|
@@ -14,7 +14,8 @@ dependencies = [
|
|
|
14
14
|
"lxml",
|
|
15
15
|
"numpy<2.0.0",
|
|
16
16
|
"odfpy",
|
|
17
|
-
"onnxruntime",
|
|
17
|
+
"onnxruntime==1.19.2; platform_system == 'Windows'", # Python 3.9 limitation
|
|
18
|
+
"onnxruntime; platform_system != 'Windows'",
|
|
18
19
|
"openpyxl",
|
|
19
20
|
"pandas",
|
|
20
21
|
"pdfminer.six",
|
|
@@ -25,6 +26,7 @@ dependencies = [
|
|
|
25
26
|
"python-pptx",
|
|
26
27
|
"PyYAML",
|
|
27
28
|
"whatthepatch",
|
|
29
|
+
"xlrd",
|
|
28
30
|
]
|
|
29
31
|
requires-python = ">=3.9"
|
|
30
32
|
readme = "README.md"
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{credsweeper-1.10.7 → credsweeper-1.10.8}/credsweeper/credentials/candidate_group_generator.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{credsweeper-1.10.7 → credsweeper-1.10.8}/credsweeper/file_handler/string_content_provider.py
RENAMED
|
File without changes
|
{credsweeper-1.10.7 → credsweeper-1.10.8}/credsweeper/file_handler/struct_content_provider.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{credsweeper-1.10.7 → credsweeper-1.10.8}/credsweeper/filters/group/url_credentials_group.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{credsweeper-1.10.7 → credsweeper-1.10.8}/credsweeper/filters/value_array_dictionary_check.py
RENAMED
|
File without changes
|
{credsweeper-1.10.7 → credsweeper-1.10.8}/credsweeper/filters/value_atlassian_token_check.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{credsweeper-1.10.7 → credsweeper-1.10.8}/credsweeper/filters/value_base64_encoded_pem_check.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{credsweeper-1.10.7 → credsweeper-1.10.8}/credsweeper/filters/value_dictionary_keyword_check.py
RENAMED
|
File without changes
|
{credsweeper-1.10.7 → credsweeper-1.10.8}/credsweeper/filters/value_dictionary_value_length_check.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{credsweeper-1.10.7 → credsweeper-1.10.8}/credsweeper/filters/value_grafana_service_check.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{credsweeper-1.10.7 → credsweeper-1.10.8}/credsweeper/filters/value_not_allowed_pattern_check.py
RENAMED
|
File without changes
|
{credsweeper-1.10.7 → credsweeper-1.10.8}/credsweeper/filters/value_not_part_encoded_check.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{credsweeper-1.10.7 → credsweeper-1.10.8}/credsweeper/ml_model/features/entropy_evaluation.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{credsweeper-1.10.7 → credsweeper-1.10.8}/credsweeper/ml_model/features/is_secret_numeric.py
RENAMED
|
File without changes
|
{credsweeper-1.10.7 → credsweeper-1.10.8}/credsweeper/ml_model/features/length_of_attribute.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
{credsweeper-1.10.7 → credsweeper-1.10.8}/credsweeper/ml_model/features/search_in_attribute.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|