credsweeper 1.11.2__tar.gz → 1.11.3__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of credsweeper might be problematic. Click here for more details.
- {credsweeper-1.11.2 → credsweeper-1.11.3}/PKG-INFO +1 -1
- {credsweeper-1.11.2 → credsweeper-1.11.3}/credsweeper/__init__.py +1 -1
- {credsweeper-1.11.2 → credsweeper-1.11.3}/credsweeper/__main__.py +6 -4
- {credsweeper-1.11.2 → credsweeper-1.11.3}/credsweeper/app.py +7 -3
- {credsweeper-1.11.2 → credsweeper-1.11.3}/credsweeper/common/keyword_pattern.py +15 -9
- {credsweeper-1.11.2 → credsweeper-1.11.3}/credsweeper/common/morpheme_checklist.txt +4 -2
- {credsweeper-1.11.2 → credsweeper-1.11.3}/credsweeper/credentials/line_data.py +14 -10
- {credsweeper-1.11.2 → credsweeper-1.11.3}/credsweeper/deep_scanner/abstract_scanner.py +10 -1
- {credsweeper-1.11.2 → credsweeper-1.11.3}/credsweeper/deep_scanner/deep_scanner.py +19 -8
- {credsweeper-1.11.2 → credsweeper-1.11.3}/credsweeper/deep_scanner/docx_scanner.py +1 -1
- {credsweeper-1.11.2 → credsweeper-1.11.3}/credsweeper/deep_scanner/encoder_scanner.py +2 -2
- {credsweeper-1.11.2 → credsweeper-1.11.3}/credsweeper/deep_scanner/html_scanner.py +3 -3
- {credsweeper-1.11.2 → credsweeper-1.11.3}/credsweeper/deep_scanner/jks_scanner.py +2 -4
- {credsweeper-1.11.2 → credsweeper-1.11.3}/credsweeper/deep_scanner/lang_scanner.py +2 -2
- credsweeper-1.11.3/credsweeper/deep_scanner/lzma_scanner.py +40 -0
- {credsweeper-1.11.2 → credsweeper-1.11.3}/credsweeper/deep_scanner/pkcs12_scanner.py +3 -5
- {credsweeper-1.11.2 → credsweeper-1.11.3}/credsweeper/deep_scanner/xml_scanner.py +2 -2
- {credsweeper-1.11.2 → credsweeper-1.11.3}/credsweeper/file_handler/data_content_provider.py +21 -12
- {credsweeper-1.11.2 → credsweeper-1.11.3}/credsweeper/filters/value_array_dictionary_check.py +3 -1
- {credsweeper-1.11.2 → credsweeper-1.11.3}/credsweeper/filters/value_azure_token_check.py +1 -2
- {credsweeper-1.11.2 → credsweeper-1.11.3}/credsweeper/filters/value_base64_part_check.py +30 -21
- {credsweeper-1.11.2 → credsweeper-1.11.3}/credsweeper/filters/value_discord_bot_check.py +1 -2
- credsweeper-1.11.3/credsweeper/filters/value_entropy_base32_check.py +22 -0
- credsweeper-1.11.3/credsweeper/filters/value_entropy_base36_check.py +23 -0
- credsweeper-1.11.3/credsweeper/filters/value_entropy_base64_check.py +30 -0
- credsweeper-1.11.3/credsweeper/filters/value_entropy_base_check.py +37 -0
- {credsweeper-1.11.2 → credsweeper-1.11.3}/credsweeper/filters/value_file_path_check.py +1 -1
- {credsweeper-1.11.2 → credsweeper-1.11.3}/credsweeper/filters/value_hex_number_check.py +3 -3
- {credsweeper-1.11.2 → credsweeper-1.11.3}/credsweeper/filters/value_json_web_token_check.py +4 -5
- {credsweeper-1.11.2 → credsweeper-1.11.3}/credsweeper/filters/value_string_type_check.py +11 -3
- {credsweeper-1.11.2 → credsweeper-1.11.3}/credsweeper/filters/value_token_base32_check.py +0 -4
- {credsweeper-1.11.2 → credsweeper-1.11.3}/credsweeper/filters/value_token_base36_check.py +0 -4
- {credsweeper-1.11.2 → credsweeper-1.11.3}/credsweeper/filters/value_token_base64_check.py +0 -4
- {credsweeper-1.11.2 → credsweeper-1.11.3}/credsweeper/filters/value_token_check.py +1 -1
- {credsweeper-1.11.2 → credsweeper-1.11.3}/credsweeper/ml_model/features/file_extension.py +1 -1
- {credsweeper-1.11.2 → credsweeper-1.11.3}/credsweeper/ml_model/features/morpheme_dense.py +0 -4
- {credsweeper-1.11.2 → credsweeper-1.11.3}/credsweeper/ml_model/features/rule_name.py +1 -1
- {credsweeper-1.11.2 → credsweeper-1.11.3}/credsweeper/ml_model/features/word_in_path.py +0 -9
- {credsweeper-1.11.2 → credsweeper-1.11.3}/credsweeper/ml_model/features/word_in_postamble.py +0 -11
- {credsweeper-1.11.2 → credsweeper-1.11.3}/credsweeper/ml_model/features/word_in_preamble.py +0 -11
- {credsweeper-1.11.2 → credsweeper-1.11.3}/credsweeper/ml_model/features/word_in_transition.py +0 -11
- {credsweeper-1.11.2 → credsweeper-1.11.3}/credsweeper/ml_model/features/word_in_value.py +0 -11
- {credsweeper-1.11.2 → credsweeper-1.11.3}/credsweeper/ml_model/features/word_in_variable.py +0 -11
- {credsweeper-1.11.2 → credsweeper-1.11.3}/credsweeper/ml_model/ml_validator.py +4 -3
- {credsweeper-1.11.2 → credsweeper-1.11.3}/credsweeper/rules/config.yaml +238 -208
- {credsweeper-1.11.2 → credsweeper-1.11.3}/credsweeper/scanner/scan_type/scan_type.py +2 -3
- {credsweeper-1.11.2 → credsweeper-1.11.3}/credsweeper/scanner/scanner.py +7 -1
- {credsweeper-1.11.2 → credsweeper-1.11.3}/credsweeper/secret/config.json +16 -5
- {credsweeper-1.11.2 → credsweeper-1.11.3}/credsweeper/utils/pem_key_detector.py +4 -5
- {credsweeper-1.11.2 → credsweeper-1.11.3}/credsweeper/utils/util.py +67 -144
- credsweeper-1.11.2/credsweeper/filters/value_entropy_base32_check.py +0 -42
- credsweeper-1.11.2/credsweeper/filters/value_entropy_base36_check.py +0 -46
- credsweeper-1.11.2/credsweeper/filters/value_entropy_base64_check.py +0 -59
- credsweeper-1.11.2/credsweeper/utils/entropy_validator.py +0 -72
- {credsweeper-1.11.2 → credsweeper-1.11.3}/.gitignore +0 -0
- {credsweeper-1.11.2 → credsweeper-1.11.3}/LICENSE +0 -0
- {credsweeper-1.11.2 → credsweeper-1.11.3}/README.md +0 -0
- {credsweeper-1.11.2 → credsweeper-1.11.3}/credsweeper/common/__init__.py +0 -0
- {credsweeper-1.11.2 → credsweeper-1.11.3}/credsweeper/common/constants.py +0 -0
- {credsweeper-1.11.2 → credsweeper-1.11.3}/credsweeper/common/keyword_checklist.py +0 -0
- {credsweeper-1.11.2 → credsweeper-1.11.3}/credsweeper/common/keyword_checklist.txt +0 -0
- {credsweeper-1.11.2 → credsweeper-1.11.3}/credsweeper/config/__init__.py +0 -0
- {credsweeper-1.11.2 → credsweeper-1.11.3}/credsweeper/config/config.py +0 -0
- {credsweeper-1.11.2 → credsweeper-1.11.3}/credsweeper/credentials/__init__.py +0 -0
- {credsweeper-1.11.2 → credsweeper-1.11.3}/credsweeper/credentials/augment_candidates.py +0 -0
- {credsweeper-1.11.2 → credsweeper-1.11.3}/credsweeper/credentials/candidate.py +0 -0
- {credsweeper-1.11.2 → credsweeper-1.11.3}/credsweeper/credentials/candidate_group_generator.py +0 -0
- {credsweeper-1.11.2 → credsweeper-1.11.3}/credsweeper/credentials/candidate_key.py +0 -0
- {credsweeper-1.11.2 → credsweeper-1.11.3}/credsweeper/credentials/credential_manager.py +0 -0
- {credsweeper-1.11.2 → credsweeper-1.11.3}/credsweeper/deep_scanner/__init__.py +0 -0
- {credsweeper-1.11.2 → credsweeper-1.11.3}/credsweeper/deep_scanner/byte_scanner.py +0 -0
- {credsweeper-1.11.2 → credsweeper-1.11.3}/credsweeper/deep_scanner/bzip2_scanner.py +0 -0
- {credsweeper-1.11.2 → credsweeper-1.11.3}/credsweeper/deep_scanner/eml_scanner.py +0 -0
- {credsweeper-1.11.2 → credsweeper-1.11.3}/credsweeper/deep_scanner/gzip_scanner.py +0 -0
- {credsweeper-1.11.2 → credsweeper-1.11.3}/credsweeper/deep_scanner/mxfile_scanner.py +0 -0
- {credsweeper-1.11.2 → credsweeper-1.11.3}/credsweeper/deep_scanner/pdf_scanner.py +0 -0
- {credsweeper-1.11.2 → credsweeper-1.11.3}/credsweeper/deep_scanner/pptx_scanner.py +0 -0
- {credsweeper-1.11.2 → credsweeper-1.11.3}/credsweeper/deep_scanner/tar_scanner.py +0 -0
- {credsweeper-1.11.2 → credsweeper-1.11.3}/credsweeper/deep_scanner/tmx_scanner.py +0 -0
- {credsweeper-1.11.2 → credsweeper-1.11.3}/credsweeper/deep_scanner/xlsx_scanner.py +0 -0
- {credsweeper-1.11.2 → credsweeper-1.11.3}/credsweeper/deep_scanner/zip_scanner.py +0 -0
- {credsweeper-1.11.2 → credsweeper-1.11.3}/credsweeper/file_handler/__init__.py +0 -0
- {credsweeper-1.11.2 → credsweeper-1.11.3}/credsweeper/file_handler/abstract_provider.py +0 -0
- {credsweeper-1.11.2 → credsweeper-1.11.3}/credsweeper/file_handler/analysis_target.py +0 -0
- {credsweeper-1.11.2 → credsweeper-1.11.3}/credsweeper/file_handler/byte_content_provider.py +0 -0
- {credsweeper-1.11.2 → credsweeper-1.11.3}/credsweeper/file_handler/content_provider.py +0 -0
- {credsweeper-1.11.2 → credsweeper-1.11.3}/credsweeper/file_handler/descriptor.py +0 -0
- {credsweeper-1.11.2 → credsweeper-1.11.3}/credsweeper/file_handler/diff_content_provider.py +0 -0
- {credsweeper-1.11.2 → credsweeper-1.11.3}/credsweeper/file_handler/file_path_extractor.py +0 -0
- {credsweeper-1.11.2 → credsweeper-1.11.3}/credsweeper/file_handler/files_provider.py +0 -0
- {credsweeper-1.11.2 → credsweeper-1.11.3}/credsweeper/file_handler/patches_provider.py +0 -0
- {credsweeper-1.11.2 → credsweeper-1.11.3}/credsweeper/file_handler/string_content_provider.py +0 -0
- {credsweeper-1.11.2 → credsweeper-1.11.3}/credsweeper/file_handler/struct_content_provider.py +0 -0
- {credsweeper-1.11.2 → credsweeper-1.11.3}/credsweeper/file_handler/text_content_provider.py +0 -0
- {credsweeper-1.11.2 → credsweeper-1.11.3}/credsweeper/filters/__init__.py +0 -0
- {credsweeper-1.11.2 → credsweeper-1.11.3}/credsweeper/filters/filter.py +0 -0
- {credsweeper-1.11.2 → credsweeper-1.11.3}/credsweeper/filters/group/__init__.py +0 -0
- {credsweeper-1.11.2 → credsweeper-1.11.3}/credsweeper/filters/group/general_keyword.py +0 -0
- {credsweeper-1.11.2 → credsweeper-1.11.3}/credsweeper/filters/group/general_pattern.py +0 -0
- {credsweeper-1.11.2 → credsweeper-1.11.3}/credsweeper/filters/group/group.py +0 -0
- {credsweeper-1.11.2 → credsweeper-1.11.3}/credsweeper/filters/group/password_keyword.py +0 -0
- {credsweeper-1.11.2 → credsweeper-1.11.3}/credsweeper/filters/group/token_pattern.py +0 -0
- {credsweeper-1.11.2 → credsweeper-1.11.3}/credsweeper/filters/group/url_credentials_group.py +0 -0
- {credsweeper-1.11.2 → credsweeper-1.11.3}/credsweeper/filters/group/weird_base36_token.py +0 -0
- {credsweeper-1.11.2 → credsweeper-1.11.3}/credsweeper/filters/group/weird_base64_token.py +0 -0
- {credsweeper-1.11.2 → credsweeper-1.11.3}/credsweeper/filters/line_git_binary_check.py +0 -0
- {credsweeper-1.11.2 → credsweeper-1.11.3}/credsweeper/filters/line_specific_key_check.py +0 -0
- {credsweeper-1.11.2 → credsweeper-1.11.3}/credsweeper/filters/line_uue_part_check.py +0 -0
- {credsweeper-1.11.2 → credsweeper-1.11.3}/credsweeper/filters/value_allowlist_check.py +0 -0
- {credsweeper-1.11.2 → credsweeper-1.11.3}/credsweeper/filters/value_atlassian_token_check.py +0 -0
- {credsweeper-1.11.2 → credsweeper-1.11.3}/credsweeper/filters/value_base32_data_check.py +0 -0
- {credsweeper-1.11.2 → credsweeper-1.11.3}/credsweeper/filters/value_base64_data_check.py +0 -0
- {credsweeper-1.11.2 → credsweeper-1.11.3}/credsweeper/filters/value_base64_encoded_pem_check.py +0 -0
- {credsweeper-1.11.2 → credsweeper-1.11.3}/credsweeper/filters/value_base64_key_check.py +0 -0
- {credsweeper-1.11.2 → credsweeper-1.11.3}/credsweeper/filters/value_blocklist_check.py +0 -0
- {credsweeper-1.11.2 → credsweeper-1.11.3}/credsweeper/filters/value_camel_case_check.py +0 -0
- {credsweeper-1.11.2 → credsweeper-1.11.3}/credsweeper/filters/value_couple_keyword_check.py +0 -0
- {credsweeper-1.11.2 → credsweeper-1.11.3}/credsweeper/filters/value_dictionary_keyword_check.py +0 -0
- {credsweeper-1.11.2 → credsweeper-1.11.3}/credsweeper/filters/value_dictionary_value_length_check.py +0 -0
- {credsweeper-1.11.2 → credsweeper-1.11.3}/credsweeper/filters/value_github_check.py +0 -0
- {credsweeper-1.11.2 → credsweeper-1.11.3}/credsweeper/filters/value_grafana_check.py +0 -0
- {credsweeper-1.11.2 → credsweeper-1.11.3}/credsweeper/filters/value_grafana_service_check.py +0 -0
- {credsweeper-1.11.2 → credsweeper-1.11.3}/credsweeper/filters/value_jfrog_token_check.py +0 -0
- {credsweeper-1.11.2 → credsweeper-1.11.3}/credsweeper/filters/value_last_word_check.py +0 -0
- {credsweeper-1.11.2 → credsweeper-1.11.3}/credsweeper/filters/value_method_check.py +0 -0
- {credsweeper-1.11.2 → credsweeper-1.11.3}/credsweeper/filters/value_not_allowed_pattern_check.py +0 -0
- {credsweeper-1.11.2 → credsweeper-1.11.3}/credsweeper/filters/value_not_part_encoded_check.py +0 -0
- {credsweeper-1.11.2 → credsweeper-1.11.3}/credsweeper/filters/value_number_check.py +0 -0
- {credsweeper-1.11.2 → credsweeper-1.11.3}/credsweeper/filters/value_pattern_check.py +0 -0
- {credsweeper-1.11.2 → credsweeper-1.11.3}/credsweeper/filters/value_similarity_check.py +0 -0
- {credsweeper-1.11.2 → credsweeper-1.11.3}/credsweeper/filters/value_split_keyword_check.py +0 -0
- {credsweeper-1.11.2 → credsweeper-1.11.3}/credsweeper/filters/value_token_base_check.py +0 -0
- {credsweeper-1.11.2 → credsweeper-1.11.3}/credsweeper/logger/__init__.py +0 -0
- {credsweeper-1.11.2 → credsweeper-1.11.3}/credsweeper/logger/logger.py +0 -0
- {credsweeper-1.11.2 → credsweeper-1.11.3}/credsweeper/ml_model/__init__.py +0 -0
- {credsweeper-1.11.2 → credsweeper-1.11.3}/credsweeper/ml_model/features/__init__.py +0 -0
- {credsweeper-1.11.2 → credsweeper-1.11.3}/credsweeper/ml_model/features/entropy_evaluation.py +0 -0
- {credsweeper-1.11.2 → credsweeper-1.11.3}/credsweeper/ml_model/features/feature.py +0 -0
- {credsweeper-1.11.2 → credsweeper-1.11.3}/credsweeper/ml_model/features/has_html_tag.py +0 -0
- {credsweeper-1.11.2 → credsweeper-1.11.3}/credsweeper/ml_model/features/is_secret_numeric.py +0 -0
- {credsweeper-1.11.2 → credsweeper-1.11.3}/credsweeper/ml_model/features/length_of_attribute.py +0 -0
- {credsweeper-1.11.2 → credsweeper-1.11.3}/credsweeper/ml_model/features/search_in_attribute.py +0 -0
- {credsweeper-1.11.2 → credsweeper-1.11.3}/credsweeper/ml_model/features/word_in.py +0 -0
- {credsweeper-1.11.2 → credsweeper-1.11.3}/credsweeper/ml_model/ml_config.json +0 -0
- {credsweeper-1.11.2 → credsweeper-1.11.3}/credsweeper/ml_model/ml_model.onnx +0 -0
- {credsweeper-1.11.2 → credsweeper-1.11.3}/credsweeper/py.typed +0 -0
- {credsweeper-1.11.2 → credsweeper-1.11.3}/credsweeper/rules/__init__.py +0 -0
- {credsweeper-1.11.2 → credsweeper-1.11.3}/credsweeper/rules/rule.py +0 -0
- {credsweeper-1.11.2 → credsweeper-1.11.3}/credsweeper/scanner/__init__.py +0 -0
- {credsweeper-1.11.2 → credsweeper-1.11.3}/credsweeper/scanner/scan_type/__init__.py +0 -0
- {credsweeper-1.11.2 → credsweeper-1.11.3}/credsweeper/scanner/scan_type/multi_pattern.py +0 -0
- {credsweeper-1.11.2 → credsweeper-1.11.3}/credsweeper/scanner/scan_type/pem_key_pattern.py +0 -0
- {credsweeper-1.11.2 → credsweeper-1.11.3}/credsweeper/scanner/scan_type/single_pattern.py +0 -0
- {credsweeper-1.11.2 → credsweeper-1.11.3}/credsweeper/secret/log.yaml +0 -0
- {credsweeper-1.11.2 → credsweeper-1.11.3}/credsweeper/utils/__init__.py +0 -0
- {credsweeper-1.11.2 → credsweeper-1.11.3}/credsweeper/utils/hop_stat.py +0 -0
- {credsweeper-1.11.2 → credsweeper-1.11.3}/pyproject.toml +0 -0
|
@@ -4,6 +4,7 @@ import os
|
|
|
4
4
|
import sys
|
|
5
5
|
import time
|
|
6
6
|
from argparse import ArgumentParser, ArgumentTypeError, Namespace, BooleanOptionalAction
|
|
7
|
+
from pathlib import Path
|
|
7
8
|
from typing import Any, Union, Dict
|
|
8
9
|
|
|
9
10
|
from credsweeper import __version__
|
|
@@ -88,10 +89,11 @@ def check_integrity() -> int:
|
|
|
88
89
|
Returns CRC32 of files in integer
|
|
89
90
|
"""
|
|
90
91
|
crc32 = 0
|
|
91
|
-
for root,
|
|
92
|
-
for
|
|
93
|
-
if Util.get_extension(
|
|
94
|
-
|
|
92
|
+
for root, _dirs, files in os.walk(APP_PATH):
|
|
93
|
+
for file_name in files:
|
|
94
|
+
if Util.get_extension(file_name) in [".py", ".json", ".txt", ".yaml", ".onnx"]:
|
|
95
|
+
file_path = Path(root) / file_name
|
|
96
|
+
data = Util.read_data(file_path)
|
|
95
97
|
if data:
|
|
96
98
|
crc32 ^= binascii.crc32(data)
|
|
97
99
|
return crc32
|
|
@@ -11,7 +11,7 @@ from colorama import Style
|
|
|
11
11
|
# Directory of credsweeper sources MUST be placed before imports to avoid circular import error
|
|
12
12
|
APP_PATH = Path(__file__).resolve().parent
|
|
13
13
|
|
|
14
|
-
from credsweeper.common.constants import Severity, ThresholdPreset, DiffRowType
|
|
14
|
+
from credsweeper.common.constants import Severity, ThresholdPreset, DiffRowType, DEFAULT_ENCODING
|
|
15
15
|
from credsweeper.config import Config
|
|
16
16
|
from credsweeper.credentials import Candidate, CredentialManager, CandidateKey
|
|
17
17
|
from credsweeper.deep_scanner.deep_scanner import DeepScanner
|
|
@@ -415,7 +415,7 @@ class CredSweeper:
|
|
|
415
415
|
if isinstance(change_type, DiffRowType):
|
|
416
416
|
# add suffix for appropriated reports to create two files for the patch scan
|
|
417
417
|
json_path = json_path.with_suffix(f".{change_type.value}{json_path.suffix}")
|
|
418
|
-
with open(json_path, 'w') as f:
|
|
418
|
+
with open(json_path, 'w', encoding=DEFAULT_ENCODING) as f:
|
|
419
419
|
# use the approach to reduce total memory usage in case of huge data
|
|
420
420
|
first_item = True
|
|
421
421
|
f.write('[\n')
|
|
@@ -446,8 +446,12 @@ class CredSweeper:
|
|
|
446
446
|
for credential in credentials:
|
|
447
447
|
for line_data in credential.line_data_list:
|
|
448
448
|
# bright rule name and path or info
|
|
449
|
+
if isinstance(credential.ml_probability, float):
|
|
450
|
+
ml_probability_info = f" {credential.ml_probability:.6f}"
|
|
451
|
+
else:
|
|
452
|
+
ml_probability_info = ""
|
|
449
453
|
print(Style.BRIGHT + credential.rule_name +
|
|
450
|
-
f" {line_data.info or line_data.path}:{line_data.line_num}
|
|
454
|
+
f" {line_data.info or line_data.path}:{line_data.line_num}{ml_probability_info}" +
|
|
451
455
|
Style.RESET_ALL)
|
|
452
456
|
print(line_data.get_colored_line(hashed=self.hashed, subtext=self.subtext))
|
|
453
457
|
|
|
@@ -3,25 +3,30 @@ import re
|
|
|
3
3
|
|
|
4
4
|
class KeywordPattern:
|
|
5
5
|
"""Pattern set of keyword types"""
|
|
6
|
-
|
|
6
|
+
directive = r"(?P<directive>(?:(?:[#%]define|%global)(?:\s|\\t)|\bset))?"
|
|
7
|
+
key_left = r"(?:\\[nrt]|%[0-9a-f]{2}|\s)*" \
|
|
7
8
|
r"(?P<variable>(([`'\"]{1,8}[^:='\"`}<>\\/&?]*|[^:='\"`}<>\s()\\/&?;,%]*)" \
|
|
8
9
|
r"(?P<keyword>"
|
|
9
10
|
# there will be inserted a keyword
|
|
10
11
|
key_right = r")" \
|
|
11
|
-
r"[^%:='\"`<>{?!&;\n]*" \
|
|
12
|
+
r"[^%:='\"`<>({?!&;\n]*" \
|
|
12
13
|
r")" \
|
|
13
14
|
r"(&(quot|apos);|%[0-9a-f]{2}|[`'\"])*" \
|
|
14
15
|
r")" # <variable>
|
|
15
|
-
separator = r"(\s|\\{1,8}[tnr])*\]?(\s|\\{1,8}[tnr])*" \
|
|
16
|
-
r"(?P<separator>:(\s[a-z]{3,9}[?]?\s)?=|:(?!:)|=(>|>|(\\\\*u00|%)26gt;)
|
|
16
|
+
separator = r"(?(directive)|(\s|\\{1,8}[tnr])*\]?(\s|\\{1,8}[tnr])*)" \
|
|
17
|
+
r"(?P<separator>:(\s[a-z]{3,9}[?]?\s)?=|:(?!:)|=(>|>|(\\\\*u00|%)26gt;)|!==|!=|===|==|=~|=" \
|
|
18
|
+
r"|(?(directive)(\\t|\s|\((?!\))){1,80}|%3d))" \
|
|
17
19
|
r"(\s|\\{1,8}[tnr])*"
|
|
18
20
|
# might be curly, square or parenthesis with words before
|
|
19
21
|
wrap = r"(?P<wrap>(" \
|
|
20
22
|
r"(new(\s|\\{1,8}[tnr]|byte|char|string|\[\]){1,8})?" \
|
|
23
|
+
r"(?P<get>([_a-z][0-9a-z_.\[\]]*\.)get|(os\.)?getenv)?" \
|
|
21
24
|
r"([0-9a-z_.]|::|-(>|>))*" \
|
|
22
|
-
r"
|
|
25
|
+
r"\s*" \
|
|
26
|
+
r"(\[(?!\])|\((?!\))|\{(?!\}))" \
|
|
23
27
|
r"(\s|\\{1,8}[tnr])*" \
|
|
24
|
-
r"([
|
|
28
|
+
r"(?(get)('[^']+'|\"[^\"]+\")\s*,\s*|)" \
|
|
29
|
+
r"([0-9a-z_]{1,32}\s*[:=]\s*)?" \
|
|
25
30
|
r"){1,8})?"
|
|
26
31
|
string_prefix = r"(((b|r|br|rb|u|f|rf|fr|l|@)(?=(\\*[`'\"])))?"
|
|
27
32
|
left_quote = r"(?P<value_leftquote>((?P<esq>\\{1,8})?([`'\"]|&(quot|apos);)){1,4}))?"
|
|
@@ -39,7 +44,7 @@ class KeywordPattern:
|
|
|
39
44
|
r"(?P<url_esc>%[0-9a-f]{2})" \
|
|
40
45
|
r"|" \
|
|
41
46
|
r"(?(url_esc)[^\s`'\",;\\&]|[^\s`'\",;\\])" \
|
|
42
|
-
r")"\
|
|
47
|
+
r")" \
|
|
43
48
|
r"){4,8000}" \
|
|
44
49
|
r"|" \
|
|
45
50
|
r"(<[^>]{4,8000}>)" \
|
|
@@ -48,18 +53,19 @@ class KeywordPattern:
|
|
|
48
53
|
r"|" \
|
|
49
54
|
r"(\$?\{{1,3}[^}]{4,8000}\}{1,3})" \
|
|
50
55
|
r"|" \
|
|
51
|
-
r"(?(wrap)(?(value_leftquote)(?!\\(?P=value_leftquote))|[^\]\)\}]){16,8000})"\
|
|
56
|
+
r"(?(wrap)(?(value_leftquote)(?!\\(?P=value_leftquote))|[^\]\)\}]){16,8000})" \
|
|
52
57
|
r")" # <value>
|
|
53
58
|
right_quote = r"(?(value_leftquote)" \
|
|
54
59
|
r"(?P<value_rightquote>(?<!\\)(?P=value_leftquote)|\\$|(?<=[0-9a-z+_/-])$)" \
|
|
55
60
|
r"|" \
|
|
56
|
-
r"(?(wrap)(\]|\)|\}
|
|
61
|
+
r"(?(wrap)(\]|\)|\}|;|\\|$))" \
|
|
57
62
|
r")"
|
|
58
63
|
|
|
59
64
|
@classmethod
|
|
60
65
|
def get_keyword_pattern(cls, keyword: str) -> re.Pattern:
|
|
61
66
|
"""Returns compiled regex pattern"""
|
|
62
67
|
expression = ''.join([ #
|
|
68
|
+
cls.directive, #
|
|
63
69
|
cls.key_left, #
|
|
64
70
|
keyword, #
|
|
65
71
|
cls.key_right, #
|
|
@@ -885,7 +885,7 @@ mbler
|
|
|
885
885
|
mean
|
|
886
886
|
measur
|
|
887
887
|
medi
|
|
888
|
-
|
|
888
|
+
medus
|
|
889
889
|
meet
|
|
890
890
|
mem_
|
|
891
891
|
memb
|
|
@@ -925,7 +925,7 @@ month
|
|
|
925
925
|
morp
|
|
926
926
|
mory
|
|
927
927
|
mote
|
|
928
|
-
|
|
928
|
+
motor
|
|
929
929
|
mount
|
|
930
930
|
move
|
|
931
931
|
mpeg
|
|
@@ -1005,6 +1005,7 @@ origin
|
|
|
1005
1005
|
orithm
|
|
1006
1006
|
ormat
|
|
1007
1007
|
orph
|
|
1008
|
+
otorola
|
|
1008
1009
|
ottle
|
|
1009
1010
|
ously
|
|
1010
1011
|
out
|
|
@@ -1485,6 +1486,7 @@ up_
|
|
|
1485
1486
|
updat
|
|
1486
1487
|
upgrade
|
|
1487
1488
|
url
|
|
1489
|
+
usa
|
|
1488
1490
|
usb
|
|
1489
1491
|
use
|
|
1490
1492
|
usin
|
|
@@ -10,7 +10,6 @@ from colorama import Fore, Style
|
|
|
10
10
|
from credsweeper.common.constants import MAX_LINE_LENGTH, UTF_8, StartEnd, ML_HUNK
|
|
11
11
|
from credsweeper.config import Config
|
|
12
12
|
from credsweeper.utils import Util
|
|
13
|
-
from credsweeper.utils.entropy_validator import EntropyValidator
|
|
14
13
|
|
|
15
14
|
|
|
16
15
|
class LineData:
|
|
@@ -32,7 +31,7 @@ class LineData:
|
|
|
32
31
|
"""
|
|
33
32
|
|
|
34
33
|
quotation_marks = ('"', "'", '`')
|
|
35
|
-
comment_starts = ("//", "* ", "#", "/*", "<!––", "%{", "%", "...", "(*", "--", "--[[", "#=")
|
|
34
|
+
comment_starts = ("//", "* ", "# ", "/*", "<!––", "%{", "%", "...", "(*", "--", "--[[", "#=")
|
|
36
35
|
bash_param_split = re.compile("\\s+(\\-|\\||\\>|\\w+?\\>|\\&)")
|
|
37
36
|
line_endings = re.compile(r"\\{1,8}[nr]")
|
|
38
37
|
# https://en.wikipedia.org/wiki/Percent-encoding
|
|
@@ -87,8 +86,9 @@ class LineData:
|
|
|
87
86
|
self.url_part = False
|
|
88
87
|
self.wrap = None
|
|
89
88
|
self._3d_escaped_separator = False
|
|
90
|
-
|
|
91
89
|
self.initialize(match_obj)
|
|
90
|
+
# the line is very useful for debug breakpoint
|
|
91
|
+
pass # pylint: disable=W0107
|
|
92
92
|
|
|
93
93
|
def compare(self, other: 'LineData') -> bool:
|
|
94
94
|
"""Comparison method - skip whole line and checks only when variable and value are the same"""
|
|
@@ -373,10 +373,10 @@ class LineData:
|
|
|
373
373
|
def to_str(self, subtext: bool = False, hashed: bool = False) -> str:
|
|
374
374
|
"""Represent line_data with subtext or|and hashed values"""
|
|
375
375
|
cut_pos = StartEnd(self.variable_start, self.value_end) if subtext else None
|
|
376
|
-
return f"
|
|
377
|
-
f" | line_num: {self.line_num}
|
|
376
|
+
return f"path: {self.path}" \
|
|
377
|
+
f" | line_num: {self.line_num}" \
|
|
378
378
|
f" | value: '{self.get_hash_or_subtext(self.value, hashed)}'" \
|
|
379
|
-
f" |
|
|
379
|
+
f" | line: '{self.get_hash_or_subtext(self.line, hashed, cut_pos)}'"
|
|
380
380
|
|
|
381
381
|
def __str__(self):
|
|
382
382
|
return self.to_str()
|
|
@@ -393,6 +393,10 @@ class LineData:
|
|
|
393
393
|
"""
|
|
394
394
|
cut_pos = StartEnd(self.variable_start if 0 <= self.variable_start else self.value_start,
|
|
395
395
|
self.value_end) if subtext else None
|
|
396
|
+
if isinstance(self.value, str):
|
|
397
|
+
entropy = round(Util.get_shannon_entropy(self.value), 5)
|
|
398
|
+
else:
|
|
399
|
+
entropy = None
|
|
396
400
|
full_output = {
|
|
397
401
|
"key": self.key,
|
|
398
402
|
"line": self.get_hash_or_subtext(self.line, hashed, cut_pos),
|
|
@@ -401,18 +405,18 @@ class LineData:
|
|
|
401
405
|
# info may contain variable name - so let it be hashed if requested
|
|
402
406
|
"info": self.get_hash_or_subtext(self.info, hashed),
|
|
403
407
|
"pattern": self.pattern.pattern,
|
|
408
|
+
"variable": self.get_hash_or_subtext(self.variable, hashed),
|
|
409
|
+
"variable_start": self.variable_start,
|
|
410
|
+
"variable_end": self.variable_end,
|
|
404
411
|
"separator": self.separator,
|
|
405
412
|
"separator_start": self.separator_start,
|
|
406
413
|
"separator_end": self.separator_end,
|
|
407
414
|
"value": self.get_hash_or_subtext(self.value, hashed),
|
|
408
415
|
"value_start": self.value_start,
|
|
409
416
|
"value_end": self.value_end,
|
|
410
|
-
"
|
|
411
|
-
"variable_start": self.variable_start,
|
|
412
|
-
"variable_end": self.variable_end,
|
|
417
|
+
"entropy": entropy,
|
|
413
418
|
"value_leftquote": self.value_leftquote,
|
|
414
419
|
"value_rightquote": self.value_rightquote,
|
|
415
|
-
"entropy_validation": EntropyValidator(self.value).to_dict()
|
|
416
420
|
}
|
|
417
421
|
reported_output = {k: v for k, v in full_output.items() if k in self.config.line_data_output}
|
|
418
422
|
return reported_output
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
from abc import abstractmethod, ABC
|
|
2
|
-
from typing import List
|
|
2
|
+
from typing import List, Optional
|
|
3
3
|
|
|
4
4
|
from credsweeper.config import Config
|
|
5
5
|
from credsweeper.credentials import Candidate
|
|
@@ -40,3 +40,12 @@ class AbstractScanner(ABC):
|
|
|
40
40
|
recursive_limit_size: int) -> List[Candidate]:
|
|
41
41
|
"""Abstract method to be defined in DeepScanner"""
|
|
42
42
|
raise NotImplementedError(__name__)
|
|
43
|
+
|
|
44
|
+
@abstractmethod
|
|
45
|
+
def data_scan(
|
|
46
|
+
self, #
|
|
47
|
+
data_provider: DataContentProvider, #
|
|
48
|
+
depth: int, #
|
|
49
|
+
recursive_limit_size: int) -> Optional[List[Candidate]]:
|
|
50
|
+
"""Abstract method to be defined in DeepScanner"""
|
|
51
|
+
raise NotImplementedError(__name__)
|
|
@@ -2,7 +2,7 @@ import datetime
|
|
|
2
2
|
import logging
|
|
3
3
|
from typing import List, Optional, Any, Tuple, Union
|
|
4
4
|
|
|
5
|
-
from credsweeper.common.constants import RECURSIVE_SCAN_LIMITATION
|
|
5
|
+
from credsweeper.common.constants import RECURSIVE_SCAN_LIMITATION, MIN_DATA_LEN
|
|
6
6
|
from credsweeper.config import Config
|
|
7
7
|
from credsweeper.credentials import Candidate
|
|
8
8
|
from credsweeper.credentials.augment_candidates import augment_candidates
|
|
@@ -23,6 +23,7 @@ from .gzip_scanner import GzipScanner
|
|
|
23
23
|
from .html_scanner import HtmlScanner
|
|
24
24
|
from .jks_scanner import JksScanner
|
|
25
25
|
from .lang_scanner import LangScanner
|
|
26
|
+
from .lzma_scanner import LzmaScanner
|
|
26
27
|
from .mxfile_scanner import MxfileScanner
|
|
27
28
|
from .pdf_scanner import PdfScanner
|
|
28
29
|
from .pkcs12_scanner import Pkcs12Scanner
|
|
@@ -48,6 +49,7 @@ class DeepScanner(
|
|
|
48
49
|
HtmlScanner, #
|
|
49
50
|
JksScanner, #
|
|
50
51
|
LangScanner, #
|
|
52
|
+
LzmaScanner, #
|
|
51
53
|
PdfScanner, #
|
|
52
54
|
Pkcs12Scanner, #
|
|
53
55
|
PptxScanner, #
|
|
@@ -106,6 +108,9 @@ class DeepScanner(
|
|
|
106
108
|
elif Util.is_bzip2(data):
|
|
107
109
|
if 0 < depth:
|
|
108
110
|
deep_scanners.append(Bzip2Scanner)
|
|
111
|
+
elif Util.is_lzma(data):
|
|
112
|
+
if 0 < depth:
|
|
113
|
+
deep_scanners.append(LzmaScanner)
|
|
109
114
|
elif Util.is_tar(data):
|
|
110
115
|
if 0 < depth:
|
|
111
116
|
deep_scanners.append(TarScanner)
|
|
@@ -140,13 +145,16 @@ class DeepScanner(
|
|
|
140
145
|
else:
|
|
141
146
|
fallback_scanners.append(EmlScanner)
|
|
142
147
|
fallback_scanners.append(ByteScanner)
|
|
148
|
+
elif Util.is_known(data):
|
|
149
|
+
# the format is known but cannot be scanned
|
|
150
|
+
pass
|
|
143
151
|
elif not Util.is_binary(data):
|
|
144
152
|
if 0 < depth:
|
|
145
153
|
deep_scanners.append(EncoderScanner)
|
|
146
154
|
deep_scanners.append(LangScanner)
|
|
147
155
|
deep_scanners.append(ByteScanner)
|
|
148
156
|
else:
|
|
149
|
-
logger.warning("Cannot apply a deep scanner for type %s", file_type)
|
|
157
|
+
logger.warning("Cannot apply a deep scanner for type %s prefix %s", file_type, str(data[:MIN_DATA_LEN]))
|
|
150
158
|
return deep_scanners, fallback_scanners
|
|
151
159
|
|
|
152
160
|
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
|
|
@@ -175,7 +183,7 @@ class DeepScanner(
|
|
|
175
183
|
# this scan is successful, so fallback is not necessary
|
|
176
184
|
fallback = False
|
|
177
185
|
if fallback:
|
|
178
|
-
for scan_class in
|
|
186
|
+
for scan_class in fallback_scanners:
|
|
179
187
|
fallback_candidates = scan_class.data_scan(self, data_provider, depth, recursive_limit_size)
|
|
180
188
|
if fallback_candidates is None:
|
|
181
189
|
continue
|
|
@@ -239,15 +247,18 @@ class DeepScanner(
|
|
|
239
247
|
recursive_limit_size: maximal bytes of opened files to prevent recursive zip-bomb attack
|
|
240
248
|
"""
|
|
241
249
|
candidates: List[Candidate] = []
|
|
242
|
-
logger.debug("Start data_scan: size=%d, depth=%d, limit=%d, path=%s, info=%s", len(data_provider.data), depth,
|
|
243
|
-
recursive_limit_size, data_provider.file_path, data_provider.info)
|
|
244
|
-
|
|
245
250
|
if 0 > depth:
|
|
246
251
|
# break recursion if maximal depth is reached
|
|
247
|
-
logger.debug("
|
|
252
|
+
logger.debug("Bottom reached %s recursive_limit_size:%d", data_provider.file_path, recursive_limit_size)
|
|
248
253
|
return candidates
|
|
249
|
-
|
|
250
254
|
depth -= 1
|
|
255
|
+
if MIN_DATA_LEN > len(data_provider.data):
|
|
256
|
+
# break recursion for minimal data size
|
|
257
|
+
logger.debug("Too small data: size=%d, depth=%d, limit=%d, path=%s, info=%s", len(data_provider.data),
|
|
258
|
+
depth, recursive_limit_size, data_provider.file_path, data_provider.info)
|
|
259
|
+
return candidates
|
|
260
|
+
logger.debug("Start data_scan: size=%d, depth=%d, limit=%d, path=%s, info=%s", len(data_provider.data), depth,
|
|
261
|
+
recursive_limit_size, data_provider.file_path, data_provider.info)
|
|
251
262
|
|
|
252
263
|
if FilePathExtractor.is_find_by_ext_file(self.config, data_provider.file_type):
|
|
253
264
|
# Skip scanning file and makes fake candidate due the extension is suspicious
|
|
@@ -42,7 +42,7 @@ class DocxScanner(AbstractScanner, ABC):
|
|
|
42
42
|
yield from DocxScanner._iter_block_items(block.footer)
|
|
43
43
|
return
|
|
44
44
|
elif isinstance(block, _Cell):
|
|
45
|
-
parent_elm = block._tc
|
|
45
|
+
parent_elm = block._tc # pylint: disable=W0212
|
|
46
46
|
else:
|
|
47
47
|
raise ValueError(f"unrecognised:{type(block)}")
|
|
48
48
|
|
|
@@ -18,11 +18,11 @@ class EncoderScanner(AbstractScanner, ABC):
|
|
|
18
18
|
depth: int, #
|
|
19
19
|
recursive_limit_size: int) -> Optional[List[Candidate]]:
|
|
20
20
|
"""Tries to decode data from base64 encode to bytes and scan as bytes again"""
|
|
21
|
-
if data_provider.represent_as_encoded():
|
|
21
|
+
if result := data_provider.represent_as_encoded():
|
|
22
22
|
decoded_data_provider = DataContentProvider(data=data_provider.decoded,
|
|
23
23
|
file_path=data_provider.file_path,
|
|
24
24
|
file_type=data_provider.file_type,
|
|
25
25
|
info=f"{data_provider.info}|BASE64")
|
|
26
26
|
new_limit = recursive_limit_size - len(decoded_data_provider.data)
|
|
27
27
|
return self.recursive_scan(decoded_data_provider, depth, new_limit)
|
|
28
|
-
return None
|
|
28
|
+
return None if result is None else []
|
|
@@ -19,12 +19,12 @@ class HtmlScanner(AbstractScanner, ABC):
|
|
|
19
19
|
depth: int, #
|
|
20
20
|
recursive_limit_size: int) -> Optional[List[Candidate]]:
|
|
21
21
|
"""Tries to represent data as html text and scan as text lines"""
|
|
22
|
-
if data_provider.represent_as_html(depth, recursive_limit_size,
|
|
23
|
-
|
|
22
|
+
if result := data_provider.represent_as_html(depth, recursive_limit_size,
|
|
23
|
+
self.scanner.keywords_required_substrings_check):
|
|
24
24
|
string_data_provider = StringContentProvider(lines=data_provider.lines,
|
|
25
25
|
line_numbers=data_provider.line_numbers,
|
|
26
26
|
file_path=data_provider.file_path,
|
|
27
27
|
file_type=data_provider.file_type,
|
|
28
28
|
info=f"{data_provider.info}|HTML")
|
|
29
29
|
return self.scanner.scan(string_data_provider)
|
|
30
|
-
return None
|
|
30
|
+
return None if result is None else []
|
|
@@ -20,7 +20,6 @@ class JksScanner(AbstractScanner, ABC):
|
|
|
20
20
|
depth: int, #
|
|
21
21
|
recursive_limit_size: int) -> Optional[List[Candidate]]:
|
|
22
22
|
"""Tries to scan JKS to open with standard password"""
|
|
23
|
-
candidates = []
|
|
24
23
|
for pw_probe in self.config.bruteforce_list:
|
|
25
24
|
try:
|
|
26
25
|
keystore = jks.KeyStore.loads(data_provider.data, pw_probe, try_decrypt_keys=True)
|
|
@@ -38,8 +37,7 @@ class JksScanner(AbstractScanner, ABC):
|
|
|
38
37
|
candidate.line_data_list[0].value = pw_probe or "<EMPTY PASSWORD>"
|
|
39
38
|
candidate.line_data_list[0].value_start = 1
|
|
40
39
|
candidate.line_data_list[0].value_end = 1 + len(candidate.line_data_list[0].value)
|
|
41
|
-
|
|
42
|
-
break
|
|
40
|
+
return [candidate]
|
|
43
41
|
except Exception as jks_exc:
|
|
44
42
|
logger.debug(f"{data_provider.file_path}:{pw_probe}:{jks_exc}")
|
|
45
|
-
return
|
|
43
|
+
return None
|
|
@@ -19,10 +19,10 @@ class LangScanner(AbstractScanner, ABC):
|
|
|
19
19
|
depth: int, #
|
|
20
20
|
recursive_limit_size: int) -> Optional[List[Candidate]]:
|
|
21
21
|
"""Tries to represent data as markup language and scan as structure"""
|
|
22
|
-
if data_provider.represent_as_structure():
|
|
22
|
+
if result := data_provider.represent_as_structure():
|
|
23
23
|
struct_data_provider = StructContentProvider(struct=data_provider.structure,
|
|
24
24
|
file_path=data_provider.file_path,
|
|
25
25
|
file_type=data_provider.file_type,
|
|
26
26
|
info=f"{data_provider.info}|STRUCT")
|
|
27
27
|
return self.structure_scan(struct_data_provider, depth, recursive_limit_size)
|
|
28
|
-
return None
|
|
28
|
+
return None if result is None else []
|
|
@@ -0,0 +1,40 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
import lzma
|
|
3
|
+
from abc import ABC
|
|
4
|
+
from pathlib import Path
|
|
5
|
+
from typing import List, Optional
|
|
6
|
+
|
|
7
|
+
from credsweeper.credentials import Candidate
|
|
8
|
+
from credsweeper.deep_scanner.abstract_scanner import AbstractScanner
|
|
9
|
+
from credsweeper.file_handler.data_content_provider import DataContentProvider
|
|
10
|
+
from credsweeper.utils import Util
|
|
11
|
+
|
|
12
|
+
logger = logging.getLogger(__name__)
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
class LzmaScanner(AbstractScanner, ABC):
|
|
16
|
+
"""Implements lzma scanning"""
|
|
17
|
+
|
|
18
|
+
def data_scan(
|
|
19
|
+
self, #
|
|
20
|
+
data_provider: DataContentProvider, #
|
|
21
|
+
depth: int, #
|
|
22
|
+
recursive_limit_size: int) -> Optional[List[Candidate]]:
|
|
23
|
+
"""Extracts data from lzma archive and launches data_scan"""
|
|
24
|
+
try:
|
|
25
|
+
file_path = Path(data_provider.file_path)
|
|
26
|
+
new_path = file_path.as_posix()
|
|
27
|
+
if ".xz" == file_path.suffix:
|
|
28
|
+
new_path = new_path[:-3]
|
|
29
|
+
elif ".lzma" == file_path.suffix:
|
|
30
|
+
new_path = new_path[:-5]
|
|
31
|
+
lzma_content_provider = DataContentProvider(data=lzma.decompress(data_provider.data),
|
|
32
|
+
file_path=new_path,
|
|
33
|
+
file_type=Util.get_extension(new_path),
|
|
34
|
+
info=f"{data_provider.info}|LZMA:{file_path}")
|
|
35
|
+
new_limit = recursive_limit_size - len(lzma_content_provider.data)
|
|
36
|
+
lzma_candidates = self.recursive_scan(lzma_content_provider, depth, new_limit)
|
|
37
|
+
return lzma_candidates
|
|
38
|
+
except Exception as lzma_exc:
|
|
39
|
+
logger.error(f"{data_provider.file_path}:{lzma_exc}")
|
|
40
|
+
return None
|
|
@@ -20,10 +20,9 @@ class Pkcs12Scanner(AbstractScanner, ABC):
|
|
|
20
20
|
depth: int, #
|
|
21
21
|
recursive_limit_size: int) -> Optional[List[Candidate]]:
|
|
22
22
|
"""Tries to scan PKCS12 to open with standard password"""
|
|
23
|
-
candidates = []
|
|
24
23
|
for pw_probe in self.config.bruteforce_list:
|
|
25
24
|
try:
|
|
26
|
-
(private_key,
|
|
25
|
+
(private_key, _certificate, _additional_certificates) \
|
|
27
26
|
= cryptography.hazmat.primitives.serialization.pkcs12.load_key_and_certificates(data_provider.data,
|
|
28
27
|
pw_probe.encode())
|
|
29
28
|
# the password probe has passed, it will be the value
|
|
@@ -40,8 +39,7 @@ class Pkcs12Scanner(AbstractScanner, ABC):
|
|
|
40
39
|
candidate.line_data_list[0].value = value
|
|
41
40
|
candidate.line_data_list[0].value_start = 1
|
|
42
41
|
candidate.line_data_list[0].value_end = 1 + len(candidate.line_data_list[0].value)
|
|
43
|
-
|
|
44
|
-
break
|
|
42
|
+
return [candidate]
|
|
45
43
|
except Exception as pkcs_exc:
|
|
46
44
|
logger.debug(f"{data_provider.file_path}:{pw_probe}:{pkcs_exc}")
|
|
47
|
-
return
|
|
45
|
+
return None
|
|
@@ -19,11 +19,11 @@ class XmlScanner(AbstractScanner, ABC):
|
|
|
19
19
|
depth: int, #
|
|
20
20
|
recursive_limit_size: int) -> Optional[List[Candidate]]:
|
|
21
21
|
"""Tries to represent data as xml text and scan as text lines"""
|
|
22
|
-
if data_provider.represent_as_xml():
|
|
22
|
+
if result := data_provider.represent_as_xml():
|
|
23
23
|
string_data_provider = StringContentProvider(lines=data_provider.lines,
|
|
24
24
|
line_numbers=data_provider.line_numbers,
|
|
25
25
|
file_path=data_provider.file_path,
|
|
26
26
|
file_type=data_provider.file_type,
|
|
27
27
|
info=f"{data_provider.info}|XML")
|
|
28
28
|
return self.scanner.scan(string_data_provider)
|
|
29
|
-
return None
|
|
29
|
+
return None if result is None else []
|
|
@@ -76,9 +76,14 @@ class DataContentProvider(ContentProvider):
|
|
|
76
76
|
return self.structure is not None and (isinstance(self.structure, dict) and 0 < len(self.structure.keys())
|
|
77
77
|
or isinstance(self.structure, list) and 0 < len(self.structure))
|
|
78
78
|
|
|
79
|
-
def represent_as_structure(self) -> bool:
|
|
79
|
+
def represent_as_structure(self) -> Optional[bool]:
|
|
80
80
|
"""Tries to convert data with many parsers. Stores result to internal structure
|
|
81
|
-
|
|
81
|
+
|
|
82
|
+
Return:
|
|
83
|
+
True if some structure found
|
|
84
|
+
False if no data found
|
|
85
|
+
None if the format is not acceptable
|
|
86
|
+
|
|
82
87
|
"""
|
|
83
88
|
if MIN_DATA_LEN > len(self.text):
|
|
84
89
|
return False
|
|
@@ -134,13 +139,15 @@ class DataContentProvider(ContentProvider):
|
|
|
134
139
|
if self.__is_structure():
|
|
135
140
|
return True
|
|
136
141
|
# # # None of above
|
|
137
|
-
return
|
|
142
|
+
return None
|
|
138
143
|
|
|
139
|
-
def represent_as_xml(self) -> bool:
|
|
144
|
+
def represent_as_xml(self) -> Optional[bool]:
|
|
140
145
|
"""Tries to read data as xml
|
|
141
146
|
|
|
142
147
|
Return:
|
|
143
148
|
True if reading was successful
|
|
149
|
+
False if no data found
|
|
150
|
+
None if the format is not acceptable
|
|
144
151
|
|
|
145
152
|
"""
|
|
146
153
|
if MIN_XML_LEN > len(self.text):
|
|
@@ -150,14 +157,12 @@ class DataContentProvider(ContentProvider):
|
|
|
150
157
|
xml_text = self.text.splitlines()
|
|
151
158
|
self.lines, self.line_numbers = Util.get_xml_from_lines(xml_text)
|
|
152
159
|
logger.debug("CONVERTED from xml")
|
|
160
|
+
return bool(self.lines and self.line_numbers)
|
|
153
161
|
else:
|
|
154
162
|
logger.debug("Weak data to parse as XML")
|
|
155
|
-
return False
|
|
156
163
|
except Exception as exc:
|
|
157
164
|
logger.debug("Cannot parse as XML:%s %s", exc, self.data)
|
|
158
|
-
|
|
159
|
-
return bool(self.lines and self.line_numbers)
|
|
160
|
-
return False
|
|
165
|
+
return None
|
|
161
166
|
|
|
162
167
|
def _check_multiline_cell(self, cell: Tag) -> Optional[Tuple[int, str]]:
|
|
163
168
|
"""multiline cell will be analysed as text or return single line from cell
|
|
@@ -336,11 +341,13 @@ class DataContentProvider(ContentProvider):
|
|
|
336
341
|
self, #
|
|
337
342
|
depth: int, #
|
|
338
343
|
recursive_limit_size: int, #
|
|
339
|
-
keywords_required_substrings_check: Callable[[str], bool]) -> bool:
|
|
344
|
+
keywords_required_substrings_check: Callable[[str], bool]) -> Optional[bool]:
|
|
340
345
|
"""Tries to read data as html
|
|
341
346
|
|
|
342
347
|
Return:
|
|
343
348
|
True if reading was successful
|
|
349
|
+
False if no data found
|
|
350
|
+
None if the format is not acceptable
|
|
344
351
|
|
|
345
352
|
"""
|
|
346
353
|
try:
|
|
@@ -361,13 +368,15 @@ class DataContentProvider(ContentProvider):
|
|
|
361
368
|
logger.debug("Cannot parse as HTML:%s %s", exc, self.data)
|
|
362
369
|
else:
|
|
363
370
|
return bool(self.lines and self.line_numbers)
|
|
364
|
-
return
|
|
371
|
+
return None
|
|
365
372
|
|
|
366
|
-
def represent_as_encoded(self) -> bool:
|
|
373
|
+
def represent_as_encoded(self) -> Optional[bool]:
|
|
367
374
|
"""Decodes data from base64. Stores result in decoded
|
|
368
375
|
|
|
369
376
|
Return:
|
|
370
377
|
True if the data correctly parsed and verified
|
|
378
|
+
False if no data found
|
|
379
|
+
None if the format is not acceptable
|
|
371
380
|
|
|
372
381
|
"""
|
|
373
382
|
if len(self.data) < MIN_ENCODED_DATA_LEN \
|
|
@@ -383,7 +392,7 @@ class DataContentProvider(ContentProvider):
|
|
|
383
392
|
logger.debug("Cannot decoded as base64:%s %s", exc, self.data)
|
|
384
393
|
else:
|
|
385
394
|
return self.decoded is not None and 0 < len(self.decoded)
|
|
386
|
-
return
|
|
395
|
+
return None
|
|
387
396
|
|
|
388
397
|
def yield_analysis_target(self, min_len: int) -> Generator[AnalysisTarget, None, None]:
|
|
389
398
|
"""Return nothing. The class provides only data storage.
|
{credsweeper-1.11.2 → credsweeper-1.11.3}/credsweeper/filters/value_array_dictionary_check.py
RENAMED
|
@@ -14,7 +14,7 @@ class ValueArrayDictionaryCheck(Filter):
|
|
|
14
14
|
`token = {'root'}` would be kept
|
|
15
15
|
"""
|
|
16
16
|
|
|
17
|
-
PATTERN = re.compile(r"\[('|\")
|
|
17
|
+
PATTERN = re.compile(r"\[('|\")?[^,]+('|\")?\]")
|
|
18
18
|
|
|
19
19
|
def __init__(self, config: Config = None) -> None:
|
|
20
20
|
pass
|
|
@@ -32,6 +32,8 @@ class ValueArrayDictionaryCheck(Filter):
|
|
|
32
32
|
"""
|
|
33
33
|
if line_data.is_well_quoted_value:
|
|
34
34
|
return False
|
|
35
|
+
if line_data.wrap and "byte" in line_data.wrap.lower():
|
|
36
|
+
return False
|
|
35
37
|
if self.PATTERN.search(line_data.value):
|
|
36
38
|
return True
|
|
37
39
|
if line_data.wrap and not line_data.is_well_quoted_value and ('[' in line_data.wrap or '(' in line_data.wrap):
|
|
@@ -1,7 +1,6 @@
|
|
|
1
1
|
import contextlib
|
|
2
2
|
import json
|
|
3
3
|
|
|
4
|
-
from credsweeper.common.constants import Chars
|
|
5
4
|
from credsweeper.config import Config
|
|
6
5
|
from credsweeper.credentials import LineData
|
|
7
6
|
from credsweeper.file_handler.analysis_target import AnalysisTarget
|
|
@@ -45,7 +44,7 @@ class ValueAzureTokenCheck(Filter):
|
|
|
45
44
|
# must be all parts in payload
|
|
46
45
|
return True
|
|
47
46
|
min_entropy = ValueEntropyBase64Check.get_min_data_entropy(len(parts[2]))
|
|
48
|
-
entropy = Util.get_shannon_entropy(parts[2]
|
|
47
|
+
entropy = Util.get_shannon_entropy(parts[2])
|
|
49
48
|
# good signature has to be like random bytes
|
|
50
49
|
return entropy < min_entropy
|
|
51
50
|
|