credsweeper 1.11.2__tar.gz → 1.11.4__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of credsweeper might be problematic. Click here for more details.
- {credsweeper-1.11.2 → credsweeper-1.11.4}/PKG-INFO +1 -1
- {credsweeper-1.11.2 → credsweeper-1.11.4}/credsweeper/__init__.py +1 -1
- {credsweeper-1.11.2 → credsweeper-1.11.4}/credsweeper/__main__.py +7 -5
- {credsweeper-1.11.2 → credsweeper-1.11.4}/credsweeper/app.py +28 -47
- {credsweeper-1.11.2 → credsweeper-1.11.4}/credsweeper/common/constants.py +2 -5
- {credsweeper-1.11.2 → credsweeper-1.11.4}/credsweeper/common/keyword_pattern.py +15 -9
- {credsweeper-1.11.2 → credsweeper-1.11.4}/credsweeper/common/morpheme_checklist.txt +4 -2
- {credsweeper-1.11.2 → credsweeper-1.11.4}/credsweeper/credentials/candidate_key.py +1 -1
- {credsweeper-1.11.2 → credsweeper-1.11.4}/credsweeper/credentials/credential_manager.py +4 -3
- {credsweeper-1.11.2 → credsweeper-1.11.4}/credsweeper/credentials/line_data.py +16 -15
- {credsweeper-1.11.2 → credsweeper-1.11.4}/credsweeper/deep_scanner/abstract_scanner.py +10 -1
- credsweeper-1.11.4/credsweeper/deep_scanner/deb_scanner.py +48 -0
- {credsweeper-1.11.2 → credsweeper-1.11.4}/credsweeper/deep_scanner/deep_scanner.py +65 -43
- {credsweeper-1.11.2 → credsweeper-1.11.4}/credsweeper/deep_scanner/docx_scanner.py +1 -1
- {credsweeper-1.11.2 → credsweeper-1.11.4}/credsweeper/deep_scanner/encoder_scanner.py +2 -2
- {credsweeper-1.11.2 → credsweeper-1.11.4}/credsweeper/deep_scanner/gzip_scanner.py +1 -1
- {credsweeper-1.11.2 → credsweeper-1.11.4}/credsweeper/deep_scanner/html_scanner.py +3 -3
- {credsweeper-1.11.2 → credsweeper-1.11.4}/credsweeper/deep_scanner/jks_scanner.py +2 -4
- {credsweeper-1.11.2 → credsweeper-1.11.4}/credsweeper/deep_scanner/lang_scanner.py +2 -2
- credsweeper-1.11.4/credsweeper/deep_scanner/lzma_scanner.py +40 -0
- {credsweeper-1.11.2 → credsweeper-1.11.4}/credsweeper/deep_scanner/pkcs12_scanner.py +3 -5
- {credsweeper-1.11.2 → credsweeper-1.11.4}/credsweeper/deep_scanner/xml_scanner.py +2 -2
- {credsweeper-1.11.2 → credsweeper-1.11.4}/credsweeper/file_handler/byte_content_provider.py +2 -2
- {credsweeper-1.11.2 → credsweeper-1.11.4}/credsweeper/file_handler/content_provider.py +1 -1
- {credsweeper-1.11.2 → credsweeper-1.11.4}/credsweeper/file_handler/data_content_provider.py +23 -14
- {credsweeper-1.11.2 → credsweeper-1.11.4}/credsweeper/file_handler/diff_content_provider.py +2 -2
- {credsweeper-1.11.2 → credsweeper-1.11.4}/credsweeper/file_handler/file_path_extractor.py +1 -1
- {credsweeper-1.11.2 → credsweeper-1.11.4}/credsweeper/file_handler/files_provider.py +2 -4
- {credsweeper-1.11.2 → credsweeper-1.11.4}/credsweeper/file_handler/patches_provider.py +1 -1
- {credsweeper-1.11.2 → credsweeper-1.11.4}/credsweeper/file_handler/string_content_provider.py +2 -2
- {credsweeper-1.11.2 → credsweeper-1.11.4}/credsweeper/file_handler/struct_content_provider.py +1 -1
- {credsweeper-1.11.2 → credsweeper-1.11.4}/credsweeper/file_handler/text_content_provider.py +2 -2
- {credsweeper-1.11.2 → credsweeper-1.11.4}/credsweeper/filters/value_array_dictionary_check.py +3 -1
- {credsweeper-1.11.2 → credsweeper-1.11.4}/credsweeper/filters/value_azure_token_check.py +1 -2
- {credsweeper-1.11.2 → credsweeper-1.11.4}/credsweeper/filters/value_base64_encoded_pem_check.py +1 -1
- {credsweeper-1.11.2 → credsweeper-1.11.4}/credsweeper/filters/value_base64_part_check.py +30 -21
- {credsweeper-1.11.2 → credsweeper-1.11.4}/credsweeper/filters/value_discord_bot_check.py +1 -2
- credsweeper-1.11.4/credsweeper/filters/value_entropy_base32_check.py +22 -0
- credsweeper-1.11.4/credsweeper/filters/value_entropy_base36_check.py +23 -0
- credsweeper-1.11.4/credsweeper/filters/value_entropy_base64_check.py +26 -0
- credsweeper-1.11.4/credsweeper/filters/value_entropy_base_check.py +37 -0
- {credsweeper-1.11.2 → credsweeper-1.11.4}/credsweeper/filters/value_file_path_check.py +1 -1
- {credsweeper-1.11.2 → credsweeper-1.11.4}/credsweeper/filters/value_hex_number_check.py +3 -3
- {credsweeper-1.11.2 → credsweeper-1.11.4}/credsweeper/filters/value_json_web_token_check.py +4 -5
- {credsweeper-1.11.2 → credsweeper-1.11.4}/credsweeper/filters/value_pattern_check.py +64 -16
- {credsweeper-1.11.2 → credsweeper-1.11.4}/credsweeper/filters/value_string_type_check.py +11 -3
- {credsweeper-1.11.2 → credsweeper-1.11.4}/credsweeper/filters/value_token_base32_check.py +0 -4
- {credsweeper-1.11.2 → credsweeper-1.11.4}/credsweeper/filters/value_token_base36_check.py +0 -4
- {credsweeper-1.11.2 → credsweeper-1.11.4}/credsweeper/filters/value_token_base64_check.py +0 -4
- {credsweeper-1.11.2 → credsweeper-1.11.4}/credsweeper/filters/value_token_check.py +1 -1
- {credsweeper-1.11.2 → credsweeper-1.11.4}/credsweeper/ml_model/features/file_extension.py +2 -2
- {credsweeper-1.11.2 → credsweeper-1.11.4}/credsweeper/ml_model/features/morpheme_dense.py +0 -4
- {credsweeper-1.11.2 → credsweeper-1.11.4}/credsweeper/ml_model/features/rule_name.py +1 -1
- {credsweeper-1.11.2 → credsweeper-1.11.4}/credsweeper/ml_model/features/word_in_path.py +0 -9
- {credsweeper-1.11.2 → credsweeper-1.11.4}/credsweeper/ml_model/features/word_in_postamble.py +0 -11
- {credsweeper-1.11.2 → credsweeper-1.11.4}/credsweeper/ml_model/features/word_in_preamble.py +0 -11
- {credsweeper-1.11.2 → credsweeper-1.11.4}/credsweeper/ml_model/features/word_in_transition.py +0 -11
- {credsweeper-1.11.2 → credsweeper-1.11.4}/credsweeper/ml_model/features/word_in_value.py +0 -11
- {credsweeper-1.11.2 → credsweeper-1.11.4}/credsweeper/ml_model/features/word_in_variable.py +0 -11
- {credsweeper-1.11.2 → credsweeper-1.11.4}/credsweeper/ml_model/ml_validator.py +45 -22
- {credsweeper-1.11.2 → credsweeper-1.11.4}/credsweeper/rules/config.yaml +238 -208
- {credsweeper-1.11.2 → credsweeper-1.11.4}/credsweeper/rules/rule.py +3 -3
- {credsweeper-1.11.2 → credsweeper-1.11.4}/credsweeper/scanner/scan_type/scan_type.py +2 -3
- {credsweeper-1.11.2 → credsweeper-1.11.4}/credsweeper/scanner/scanner.py +7 -1
- {credsweeper-1.11.2 → credsweeper-1.11.4}/credsweeper/secret/config.json +16 -5
- {credsweeper-1.11.2 → credsweeper-1.11.4}/credsweeper/utils/hop_stat.py +3 -3
- {credsweeper-1.11.2 → credsweeper-1.11.4}/credsweeper/utils/pem_key_detector.py +8 -7
- {credsweeper-1.11.2 → credsweeper-1.11.4}/credsweeper/utils/util.py +76 -146
- credsweeper-1.11.2/credsweeper/filters/value_entropy_base32_check.py +0 -42
- credsweeper-1.11.2/credsweeper/filters/value_entropy_base36_check.py +0 -46
- credsweeper-1.11.2/credsweeper/filters/value_entropy_base64_check.py +0 -59
- credsweeper-1.11.2/credsweeper/utils/entropy_validator.py +0 -72
- {credsweeper-1.11.2 → credsweeper-1.11.4}/.gitignore +0 -0
- {credsweeper-1.11.2 → credsweeper-1.11.4}/LICENSE +0 -0
- {credsweeper-1.11.2 → credsweeper-1.11.4}/README.md +0 -0
- {credsweeper-1.11.2 → credsweeper-1.11.4}/credsweeper/common/__init__.py +0 -0
- {credsweeper-1.11.2 → credsweeper-1.11.4}/credsweeper/common/keyword_checklist.py +0 -0
- {credsweeper-1.11.2 → credsweeper-1.11.4}/credsweeper/common/keyword_checklist.txt +0 -0
- {credsweeper-1.11.2 → credsweeper-1.11.4}/credsweeper/config/__init__.py +0 -0
- {credsweeper-1.11.2 → credsweeper-1.11.4}/credsweeper/config/config.py +0 -0
- {credsweeper-1.11.2 → credsweeper-1.11.4}/credsweeper/credentials/__init__.py +0 -0
- {credsweeper-1.11.2 → credsweeper-1.11.4}/credsweeper/credentials/augment_candidates.py +0 -0
- {credsweeper-1.11.2 → credsweeper-1.11.4}/credsweeper/credentials/candidate.py +0 -0
- {credsweeper-1.11.2 → credsweeper-1.11.4}/credsweeper/credentials/candidate_group_generator.py +0 -0
- {credsweeper-1.11.2 → credsweeper-1.11.4}/credsweeper/deep_scanner/__init__.py +0 -0
- {credsweeper-1.11.2 → credsweeper-1.11.4}/credsweeper/deep_scanner/byte_scanner.py +0 -0
- {credsweeper-1.11.2 → credsweeper-1.11.4}/credsweeper/deep_scanner/bzip2_scanner.py +0 -0
- {credsweeper-1.11.2 → credsweeper-1.11.4}/credsweeper/deep_scanner/eml_scanner.py +0 -0
- {credsweeper-1.11.2 → credsweeper-1.11.4}/credsweeper/deep_scanner/mxfile_scanner.py +0 -0
- {credsweeper-1.11.2 → credsweeper-1.11.4}/credsweeper/deep_scanner/pdf_scanner.py +0 -0
- {credsweeper-1.11.2 → credsweeper-1.11.4}/credsweeper/deep_scanner/pptx_scanner.py +0 -0
- {credsweeper-1.11.2 → credsweeper-1.11.4}/credsweeper/deep_scanner/tar_scanner.py +0 -0
- {credsweeper-1.11.2 → credsweeper-1.11.4}/credsweeper/deep_scanner/tmx_scanner.py +0 -0
- {credsweeper-1.11.2 → credsweeper-1.11.4}/credsweeper/deep_scanner/xlsx_scanner.py +0 -0
- {credsweeper-1.11.2 → credsweeper-1.11.4}/credsweeper/deep_scanner/zip_scanner.py +0 -0
- {credsweeper-1.11.2 → credsweeper-1.11.4}/credsweeper/file_handler/__init__.py +0 -0
- {credsweeper-1.11.2 → credsweeper-1.11.4}/credsweeper/file_handler/abstract_provider.py +0 -0
- {credsweeper-1.11.2 → credsweeper-1.11.4}/credsweeper/file_handler/analysis_target.py +0 -0
- {credsweeper-1.11.2 → credsweeper-1.11.4}/credsweeper/file_handler/descriptor.py +0 -0
- {credsweeper-1.11.2 → credsweeper-1.11.4}/credsweeper/filters/__init__.py +0 -0
- {credsweeper-1.11.2 → credsweeper-1.11.4}/credsweeper/filters/filter.py +0 -0
- {credsweeper-1.11.2 → credsweeper-1.11.4}/credsweeper/filters/group/__init__.py +0 -0
- {credsweeper-1.11.2 → credsweeper-1.11.4}/credsweeper/filters/group/general_keyword.py +0 -0
- {credsweeper-1.11.2 → credsweeper-1.11.4}/credsweeper/filters/group/general_pattern.py +0 -0
- {credsweeper-1.11.2 → credsweeper-1.11.4}/credsweeper/filters/group/group.py +0 -0
- {credsweeper-1.11.2 → credsweeper-1.11.4}/credsweeper/filters/group/password_keyword.py +0 -0
- {credsweeper-1.11.2 → credsweeper-1.11.4}/credsweeper/filters/group/token_pattern.py +0 -0
- {credsweeper-1.11.2 → credsweeper-1.11.4}/credsweeper/filters/group/url_credentials_group.py +0 -0
- {credsweeper-1.11.2 → credsweeper-1.11.4}/credsweeper/filters/group/weird_base36_token.py +0 -0
- {credsweeper-1.11.2 → credsweeper-1.11.4}/credsweeper/filters/group/weird_base64_token.py +0 -0
- {credsweeper-1.11.2 → credsweeper-1.11.4}/credsweeper/filters/line_git_binary_check.py +0 -0
- {credsweeper-1.11.2 → credsweeper-1.11.4}/credsweeper/filters/line_specific_key_check.py +0 -0
- {credsweeper-1.11.2 → credsweeper-1.11.4}/credsweeper/filters/line_uue_part_check.py +0 -0
- {credsweeper-1.11.2 → credsweeper-1.11.4}/credsweeper/filters/value_allowlist_check.py +0 -0
- {credsweeper-1.11.2 → credsweeper-1.11.4}/credsweeper/filters/value_atlassian_token_check.py +0 -0
- {credsweeper-1.11.2 → credsweeper-1.11.4}/credsweeper/filters/value_base32_data_check.py +0 -0
- {credsweeper-1.11.2 → credsweeper-1.11.4}/credsweeper/filters/value_base64_data_check.py +0 -0
- {credsweeper-1.11.2 → credsweeper-1.11.4}/credsweeper/filters/value_base64_key_check.py +0 -0
- {credsweeper-1.11.2 → credsweeper-1.11.4}/credsweeper/filters/value_blocklist_check.py +0 -0
- {credsweeper-1.11.2 → credsweeper-1.11.4}/credsweeper/filters/value_camel_case_check.py +0 -0
- {credsweeper-1.11.2 → credsweeper-1.11.4}/credsweeper/filters/value_couple_keyword_check.py +0 -0
- {credsweeper-1.11.2 → credsweeper-1.11.4}/credsweeper/filters/value_dictionary_keyword_check.py +0 -0
- {credsweeper-1.11.2 → credsweeper-1.11.4}/credsweeper/filters/value_dictionary_value_length_check.py +0 -0
- {credsweeper-1.11.2 → credsweeper-1.11.4}/credsweeper/filters/value_github_check.py +0 -0
- {credsweeper-1.11.2 → credsweeper-1.11.4}/credsweeper/filters/value_grafana_check.py +0 -0
- {credsweeper-1.11.2 → credsweeper-1.11.4}/credsweeper/filters/value_grafana_service_check.py +0 -0
- {credsweeper-1.11.2 → credsweeper-1.11.4}/credsweeper/filters/value_jfrog_token_check.py +0 -0
- {credsweeper-1.11.2 → credsweeper-1.11.4}/credsweeper/filters/value_last_word_check.py +0 -0
- {credsweeper-1.11.2 → credsweeper-1.11.4}/credsweeper/filters/value_method_check.py +0 -0
- {credsweeper-1.11.2 → credsweeper-1.11.4}/credsweeper/filters/value_not_allowed_pattern_check.py +0 -0
- {credsweeper-1.11.2 → credsweeper-1.11.4}/credsweeper/filters/value_not_part_encoded_check.py +0 -0
- {credsweeper-1.11.2 → credsweeper-1.11.4}/credsweeper/filters/value_number_check.py +0 -0
- {credsweeper-1.11.2 → credsweeper-1.11.4}/credsweeper/filters/value_similarity_check.py +0 -0
- {credsweeper-1.11.2 → credsweeper-1.11.4}/credsweeper/filters/value_split_keyword_check.py +0 -0
- {credsweeper-1.11.2 → credsweeper-1.11.4}/credsweeper/filters/value_token_base_check.py +0 -0
- {credsweeper-1.11.2 → credsweeper-1.11.4}/credsweeper/logger/__init__.py +0 -0
- {credsweeper-1.11.2 → credsweeper-1.11.4}/credsweeper/logger/logger.py +0 -0
- {credsweeper-1.11.2 → credsweeper-1.11.4}/credsweeper/ml_model/__init__.py +0 -0
- {credsweeper-1.11.2 → credsweeper-1.11.4}/credsweeper/ml_model/features/__init__.py +0 -0
- {credsweeper-1.11.2 → credsweeper-1.11.4}/credsweeper/ml_model/features/entropy_evaluation.py +0 -0
- {credsweeper-1.11.2 → credsweeper-1.11.4}/credsweeper/ml_model/features/feature.py +0 -0
- {credsweeper-1.11.2 → credsweeper-1.11.4}/credsweeper/ml_model/features/has_html_tag.py +0 -0
- {credsweeper-1.11.2 → credsweeper-1.11.4}/credsweeper/ml_model/features/is_secret_numeric.py +0 -0
- {credsweeper-1.11.2 → credsweeper-1.11.4}/credsweeper/ml_model/features/length_of_attribute.py +0 -0
- {credsweeper-1.11.2 → credsweeper-1.11.4}/credsweeper/ml_model/features/search_in_attribute.py +0 -0
- {credsweeper-1.11.2 → credsweeper-1.11.4}/credsweeper/ml_model/features/word_in.py +0 -0
- {credsweeper-1.11.2 → credsweeper-1.11.4}/credsweeper/ml_model/ml_config.json +0 -0
- {credsweeper-1.11.2 → credsweeper-1.11.4}/credsweeper/ml_model/ml_model.onnx +0 -0
- {credsweeper-1.11.2 → credsweeper-1.11.4}/credsweeper/py.typed +0 -0
- {credsweeper-1.11.2 → credsweeper-1.11.4}/credsweeper/rules/__init__.py +0 -0
- {credsweeper-1.11.2 → credsweeper-1.11.4}/credsweeper/scanner/__init__.py +0 -0
- {credsweeper-1.11.2 → credsweeper-1.11.4}/credsweeper/scanner/scan_type/__init__.py +0 -0
- {credsweeper-1.11.2 → credsweeper-1.11.4}/credsweeper/scanner/scan_type/multi_pattern.py +0 -0
- {credsweeper-1.11.2 → credsweeper-1.11.4}/credsweeper/scanner/scan_type/pem_key_pattern.py +0 -0
- {credsweeper-1.11.2 → credsweeper-1.11.4}/credsweeper/scanner/scan_type/single_pattern.py +0 -0
- {credsweeper-1.11.2 → credsweeper-1.11.4}/credsweeper/secret/log.yaml +0 -0
- {credsweeper-1.11.2 → credsweeper-1.11.4}/credsweeper/utils/__init__.py +0 -0
- {credsweeper-1.11.2 → credsweeper-1.11.4}/pyproject.toml +0 -0
|
@@ -4,6 +4,7 @@ import os
|
|
|
4
4
|
import sys
|
|
5
5
|
import time
|
|
6
6
|
from argparse import ArgumentParser, ArgumentTypeError, Namespace, BooleanOptionalAction
|
|
7
|
+
from pathlib import Path
|
|
7
8
|
from typing import Any, Union, Dict
|
|
8
9
|
|
|
9
10
|
from credsweeper import __version__
|
|
@@ -62,7 +63,7 @@ def logger_levels(log_level: str) -> str:
|
|
|
62
63
|
Returns True if log_level UPPERCASE is one of keys
|
|
63
64
|
"""
|
|
64
65
|
val = log_level.upper()
|
|
65
|
-
if
|
|
66
|
+
if val in Logger.LEVELS:
|
|
66
67
|
return val
|
|
67
68
|
raise ArgumentTypeError(f"Log level provided: {log_level} -- must be one of: {' | '.join(Logger.LEVELS.keys())}")
|
|
68
69
|
|
|
@@ -88,10 +89,11 @@ def check_integrity() -> int:
|
|
|
88
89
|
Returns CRC32 of files in integer
|
|
89
90
|
"""
|
|
90
91
|
crc32 = 0
|
|
91
|
-
for root,
|
|
92
|
-
for
|
|
93
|
-
if Util.get_extension(
|
|
94
|
-
|
|
92
|
+
for root, _dirs, files in os.walk(APP_PATH):
|
|
93
|
+
for file_name in files:
|
|
94
|
+
if Util.get_extension(file_name) in [".py", ".json", ".txt", ".yaml", ".onnx"]:
|
|
95
|
+
file_path = Path(root) / file_name
|
|
96
|
+
data = Util.read_data(file_path)
|
|
95
97
|
if data:
|
|
96
98
|
crc32 ^= binascii.crc32(data)
|
|
97
99
|
return crc32
|
|
@@ -11,15 +11,17 @@ from colorama import Style
|
|
|
11
11
|
# Directory of credsweeper sources MUST be placed before imports to avoid circular import error
|
|
12
12
|
APP_PATH = Path(__file__).resolve().parent
|
|
13
13
|
|
|
14
|
-
from credsweeper.common.constants import Severity, ThresholdPreset, DiffRowType
|
|
14
|
+
from credsweeper.common.constants import Severity, ThresholdPreset, DiffRowType, DEFAULT_ENCODING
|
|
15
15
|
from credsweeper.config import Config
|
|
16
16
|
from credsweeper.credentials import Candidate, CredentialManager, CandidateKey
|
|
17
17
|
from credsweeper.deep_scanner.deep_scanner import DeepScanner
|
|
18
|
+
from credsweeper.file_handler.content_provider import ContentProvider
|
|
18
19
|
from credsweeper.file_handler.diff_content_provider import DiffContentProvider
|
|
19
20
|
from credsweeper.file_handler.file_path_extractor import FilePathExtractor
|
|
20
21
|
from credsweeper.file_handler.abstract_provider import AbstractProvider
|
|
21
22
|
from credsweeper.file_handler.text_content_provider import TextContentProvider
|
|
22
23
|
from credsweeper.scanner import Scanner
|
|
24
|
+
from credsweeper.ml_model.ml_validator import MlValidator
|
|
23
25
|
from credsweeper.utils import Util
|
|
24
26
|
|
|
25
27
|
logger = logging.getLogger(__name__)
|
|
@@ -94,7 +96,7 @@ class CredSweeper:
|
|
|
94
96
|
log_level: str - level for pool initializer according logging levels (UPPERCASE)
|
|
95
97
|
|
|
96
98
|
"""
|
|
97
|
-
self.pool_count: int =
|
|
99
|
+
self.pool_count: int = max(1, int(pool_count))
|
|
98
100
|
if not (_severity := Severity.get(severity)):
|
|
99
101
|
raise RuntimeError(f"Severity level provided: {severity}"
|
|
100
102
|
f" -- must be one of: {' | '.join([i.value for i in Severity])}")
|
|
@@ -123,9 +125,9 @@ class CredSweeper:
|
|
|
123
125
|
self.ml_config = ml_config
|
|
124
126
|
self.ml_model = ml_model
|
|
125
127
|
self.ml_providers = ml_providers
|
|
126
|
-
self.ml_validator = None
|
|
127
128
|
self.__thrifty = thrifty
|
|
128
129
|
self.__log_level = log_level
|
|
130
|
+
self.__ml_validator: Optional[MlValidator] = None
|
|
129
131
|
|
|
130
132
|
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
|
|
131
133
|
|
|
@@ -182,35 +184,22 @@ class CredSweeper:
|
|
|
182
184
|
|
|
183
185
|
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
|
|
184
186
|
|
|
185
|
-
# the import cannot be done on top due
|
|
186
|
-
# TypeError: cannot pickle 'onnxruntime.capi.onnxruntime_pybind11_state.InferenceSession' object
|
|
187
|
-
from credsweeper.ml_model import MlValidator
|
|
188
|
-
|
|
189
|
-
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
|
|
190
|
-
|
|
191
187
|
@property
|
|
192
188
|
def ml_validator(self) -> MlValidator:
|
|
193
189
|
"""ml_validator getter"""
|
|
194
|
-
from credsweeper.ml_model import MlValidator
|
|
195
190
|
if not self.__ml_validator:
|
|
196
|
-
self.__ml_validator
|
|
191
|
+
self.__ml_validator = MlValidator(
|
|
197
192
|
threshold=self.ml_threshold, #
|
|
198
193
|
ml_config=self.ml_config, #
|
|
199
194
|
ml_model=self.ml_model, #
|
|
200
195
|
ml_providers=self.ml_providers, #
|
|
201
196
|
)
|
|
202
|
-
|
|
197
|
+
if not self.__ml_validator:
|
|
198
|
+
raise RuntimeError("MlValidator was not initialized!")
|
|
203
199
|
return self.__ml_validator
|
|
204
200
|
|
|
205
201
|
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
|
|
206
202
|
|
|
207
|
-
@ml_validator.setter
|
|
208
|
-
def ml_validator(self, _ml_validator: Optional[MlValidator]) -> None:
|
|
209
|
-
"""ml_validator setter"""
|
|
210
|
-
self.__ml_validator = _ml_validator
|
|
211
|
-
|
|
212
|
-
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
|
|
213
|
-
|
|
214
203
|
@staticmethod
|
|
215
204
|
def pool_initializer(log_kwargs) -> None:
|
|
216
205
|
"""Ignore SIGINT in child processes."""
|
|
@@ -219,20 +208,6 @@ class CredSweeper:
|
|
|
219
208
|
|
|
220
209
|
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
|
|
221
210
|
|
|
222
|
-
@property
|
|
223
|
-
def config(self) -> Config:
|
|
224
|
-
"""config getter"""
|
|
225
|
-
return self.__config
|
|
226
|
-
|
|
227
|
-
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
|
|
228
|
-
|
|
229
|
-
@config.setter
|
|
230
|
-
def config(self, config: Config) -> None:
|
|
231
|
-
"""config setter"""
|
|
232
|
-
self.__config = config
|
|
233
|
-
|
|
234
|
-
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
|
|
235
|
-
|
|
236
211
|
def run(self, content_provider: AbstractProvider) -> int:
|
|
237
212
|
"""Run an analysis of 'content_provider' object.
|
|
238
213
|
|
|
@@ -241,9 +216,10 @@ class CredSweeper:
|
|
|
241
216
|
|
|
242
217
|
"""
|
|
243
218
|
_empty_list: Sequence[Union[DiffContentProvider, TextContentProvider]] = []
|
|
244
|
-
file_extractors
|
|
245
|
-
|
|
246
|
-
|
|
219
|
+
file_extractors = content_provider.get_scannable_files(self.config) if content_provider else _empty_list
|
|
220
|
+
if not file_extractors:
|
|
221
|
+
logger.info(f"No scannable targets for {len(content_provider.paths)} paths")
|
|
222
|
+
return 0
|
|
247
223
|
self.scan(file_extractors)
|
|
248
224
|
self.post_processing()
|
|
249
225
|
# PatchesProvider has the attribute. Circular import error appears with using the isinstance
|
|
@@ -260,7 +236,7 @@ class CredSweeper:
|
|
|
260
236
|
content_providers: file objects to scan
|
|
261
237
|
|
|
262
238
|
"""
|
|
263
|
-
if 1 < self.pool_count:
|
|
239
|
+
if 1 < self.pool_count and 1 < len(content_providers):
|
|
264
240
|
self.__multi_jobs_scan(content_providers)
|
|
265
241
|
else:
|
|
266
242
|
self.__single_job_scan(content_providers)
|
|
@@ -269,6 +245,7 @@ class CredSweeper:
|
|
|
269
245
|
|
|
270
246
|
def __single_job_scan(self, content_providers: Sequence[Union[DiffContentProvider, TextContentProvider]]) -> None:
|
|
271
247
|
"""Performs scan in main thread"""
|
|
248
|
+
logger.info(f"Scan for {len(content_providers)} providers")
|
|
272
249
|
all_cred = self.files_scan(content_providers)
|
|
273
250
|
self.credential_manager.set_credentials(all_cred)
|
|
274
251
|
|
|
@@ -284,12 +261,14 @@ class CredSweeper:
|
|
|
284
261
|
if "SILENCE" == self.__log_level:
|
|
285
262
|
logging.addLevelName(60, "SILENCE")
|
|
286
263
|
log_kwargs["level"] = self.__log_level
|
|
287
|
-
|
|
288
|
-
|
|
264
|
+
pool_count = min(self.pool_count, len(content_providers))
|
|
265
|
+
logger.info(f"Scan in {pool_count} processes for {len(content_providers)} providers")
|
|
266
|
+
with multiprocessing.get_context("spawn").Pool(processes=pool_count,
|
|
267
|
+
initializer=CredSweeper.pool_initializer,
|
|
289
268
|
initargs=(log_kwargs, )) as pool:
|
|
290
269
|
try:
|
|
291
|
-
for scan_results in pool.imap_unordered(self.files_scan,
|
|
292
|
-
|
|
270
|
+
for scan_results in pool.imap_unordered(self.files_scan,
|
|
271
|
+
(content_providers[x::pool_count] for x in range(pool_count))):
|
|
293
272
|
for cred in scan_results:
|
|
294
273
|
self.credential_manager.add_credential(cred)
|
|
295
274
|
except KeyboardInterrupt:
|
|
@@ -301,9 +280,7 @@ class CredSweeper:
|
|
|
301
280
|
|
|
302
281
|
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
|
|
303
282
|
|
|
304
|
-
def files_scan(
|
|
305
|
-
self, #
|
|
306
|
-
content_providers: Sequence[Union[DiffContentProvider, TextContentProvider]]) -> List[Candidate]:
|
|
283
|
+
def files_scan(self, content_providers: Sequence[ContentProvider]) -> List[Candidate]:
|
|
307
284
|
"""Auxiliary method for scan one sequence"""
|
|
308
285
|
all_cred: List[Candidate] = []
|
|
309
286
|
for provider in content_providers:
|
|
@@ -316,7 +293,7 @@ class CredSweeper:
|
|
|
316
293
|
|
|
317
294
|
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
|
|
318
295
|
|
|
319
|
-
def file_scan(self, content_provider:
|
|
296
|
+
def file_scan(self, content_provider: ContentProvider) -> List[Candidate]:
|
|
320
297
|
"""Run scanning of file from 'file_provider'.
|
|
321
298
|
|
|
322
299
|
Args:
|
|
@@ -415,7 +392,7 @@ class CredSweeper:
|
|
|
415
392
|
if isinstance(change_type, DiffRowType):
|
|
416
393
|
# add suffix for appropriated reports to create two files for the patch scan
|
|
417
394
|
json_path = json_path.with_suffix(f".{change_type.value}{json_path.suffix}")
|
|
418
|
-
with open(json_path, 'w') as f:
|
|
395
|
+
with open(json_path, 'w', encoding=DEFAULT_ENCODING) as f:
|
|
419
396
|
# use the approach to reduce total memory usage in case of huge data
|
|
420
397
|
first_item = True
|
|
421
398
|
f.write('[\n')
|
|
@@ -446,8 +423,12 @@ class CredSweeper:
|
|
|
446
423
|
for credential in credentials:
|
|
447
424
|
for line_data in credential.line_data_list:
|
|
448
425
|
# bright rule name and path or info
|
|
426
|
+
if isinstance(credential.ml_probability, float):
|
|
427
|
+
ml_probability_info = f" {credential.ml_probability:.6f}"
|
|
428
|
+
else:
|
|
429
|
+
ml_probability_info = ""
|
|
449
430
|
print(Style.BRIGHT + credential.rule_name +
|
|
450
|
-
f" {line_data.info or line_data.path}:{line_data.line_num}
|
|
431
|
+
f" {line_data.info or line_data.path}:{line_data.line_num}{ml_probability_info}" +
|
|
451
432
|
Style.RESET_ALL)
|
|
452
433
|
print(line_data.get_colored_line(hashed=self.hashed, subtext=self.subtext))
|
|
453
434
|
|
|
@@ -96,10 +96,6 @@ class Chars(Enum):
|
|
|
96
96
|
ASCII_PRINTABLE = string.printable
|
|
97
97
|
|
|
98
98
|
|
|
99
|
-
ENTROPY_LIMIT_BASE64 = 4.5
|
|
100
|
-
ENTROPY_LIMIT_BASE3x = 3
|
|
101
|
-
|
|
102
|
-
|
|
103
99
|
class GroupType(Enum):
|
|
104
100
|
"""Group type - used in Group constructor for load predefined set of filters"""
|
|
105
101
|
KEYWORD = "keyword"
|
|
@@ -148,7 +144,8 @@ OVERLAP_SIZE = 1000
|
|
|
148
144
|
CHUNK_STEP_SIZE = CHUNK_SIZE - OVERLAP_SIZE
|
|
149
145
|
# ML hunk size to limit of variable or value size and get substring near value
|
|
150
146
|
ML_HUNK = 80
|
|
151
|
-
|
|
147
|
+
|
|
148
|
+
# values according https://docs.python.org/3/library/codecs.html
|
|
152
149
|
UTF_8 = "utf_8"
|
|
153
150
|
UTF_16 = "utf_16"
|
|
154
151
|
LATIN_1 = "latin_1"
|
|
@@ -3,25 +3,30 @@ import re
|
|
|
3
3
|
|
|
4
4
|
class KeywordPattern:
|
|
5
5
|
"""Pattern set of keyword types"""
|
|
6
|
-
|
|
6
|
+
directive = r"(?P<directive>(?:(?:[#%]define|%global)(?:\s|\\t)|\bset))?"
|
|
7
|
+
key_left = r"(?:\\[nrt]|%[0-9a-f]{2}|\s)*" \
|
|
7
8
|
r"(?P<variable>(([`'\"]{1,8}[^:='\"`}<>\\/&?]*|[^:='\"`}<>\s()\\/&?;,%]*)" \
|
|
8
9
|
r"(?P<keyword>"
|
|
9
10
|
# there will be inserted a keyword
|
|
10
11
|
key_right = r")" \
|
|
11
|
-
r"[^%:='\"`<>{?!&;\n]*" \
|
|
12
|
+
r"[^%:='\"`<>({?!&;\n]*" \
|
|
12
13
|
r")" \
|
|
13
14
|
r"(&(quot|apos);|%[0-9a-f]{2}|[`'\"])*" \
|
|
14
15
|
r")" # <variable>
|
|
15
|
-
separator = r"(\s|\\{1,8}[tnr])*\]?(\s|\\{1,8}[tnr])*" \
|
|
16
|
-
r"(?P<separator>:(\s[a-z]{3,9}[?]?\s)?=|:(?!:)|=(>|>|(\\\\*u00|%)26gt;)
|
|
16
|
+
separator = r"(?(directive)|(\s|\\{1,8}[tnr])*\]?(\s|\\{1,8}[tnr])*)" \
|
|
17
|
+
r"(?P<separator>:(\s[a-z]{3,9}[?]?\s)?=|:(?!:)|=(>|>|(\\\\*u00|%)26gt;)|!==|!=|===|==|=~|=" \
|
|
18
|
+
r"|(?(directive)(\\t|\s|\((?!\))){1,80}|%3d))" \
|
|
17
19
|
r"(\s|\\{1,8}[tnr])*"
|
|
18
20
|
# might be curly, square or parenthesis with words before
|
|
19
21
|
wrap = r"(?P<wrap>(" \
|
|
20
22
|
r"(new(\s|\\{1,8}[tnr]|byte|char|string|\[\]){1,8})?" \
|
|
23
|
+
r"(?P<get>([_a-z][0-9a-z_.\[\]]*\.)get|(os\.)?getenv)?" \
|
|
21
24
|
r"([0-9a-z_.]|::|-(>|>))*" \
|
|
22
|
-
r"
|
|
25
|
+
r"\s*" \
|
|
26
|
+
r"(\[(?!\])|\((?!\))|\{(?!\}))" \
|
|
23
27
|
r"(\s|\\{1,8}[tnr])*" \
|
|
24
|
-
r"([
|
|
28
|
+
r"(?(get)('[^']+'|\"[^\"]+\")\s*,\s*|)" \
|
|
29
|
+
r"([0-9a-z_]{1,32}\s*[:=]\s*)?" \
|
|
25
30
|
r"){1,8})?"
|
|
26
31
|
string_prefix = r"(((b|r|br|rb|u|f|rf|fr|l|@)(?=(\\*[`'\"])))?"
|
|
27
32
|
left_quote = r"(?P<value_leftquote>((?P<esq>\\{1,8})?([`'\"]|&(quot|apos);)){1,4}))?"
|
|
@@ -39,7 +44,7 @@ class KeywordPattern:
|
|
|
39
44
|
r"(?P<url_esc>%[0-9a-f]{2})" \
|
|
40
45
|
r"|" \
|
|
41
46
|
r"(?(url_esc)[^\s`'\",;\\&]|[^\s`'\",;\\])" \
|
|
42
|
-
r")"\
|
|
47
|
+
r")" \
|
|
43
48
|
r"){4,8000}" \
|
|
44
49
|
r"|" \
|
|
45
50
|
r"(<[^>]{4,8000}>)" \
|
|
@@ -48,18 +53,19 @@ class KeywordPattern:
|
|
|
48
53
|
r"|" \
|
|
49
54
|
r"(\$?\{{1,3}[^}]{4,8000}\}{1,3})" \
|
|
50
55
|
r"|" \
|
|
51
|
-
r"(?(wrap)(?(value_leftquote)(?!\\(?P=value_leftquote))|[^\]\)\}]){16,8000})"\
|
|
56
|
+
r"(?(wrap)(?(value_leftquote)(?!\\(?P=value_leftquote))|[^\]\)\}]){16,8000})" \
|
|
52
57
|
r")" # <value>
|
|
53
58
|
right_quote = r"(?(value_leftquote)" \
|
|
54
59
|
r"(?P<value_rightquote>(?<!\\)(?P=value_leftquote)|\\$|(?<=[0-9a-z+_/-])$)" \
|
|
55
60
|
r"|" \
|
|
56
|
-
r"(?(wrap)(\]|\)|\}
|
|
61
|
+
r"(?(wrap)(\]|\)|\}|;|\\|$))" \
|
|
57
62
|
r")"
|
|
58
63
|
|
|
59
64
|
@classmethod
|
|
60
65
|
def get_keyword_pattern(cls, keyword: str) -> re.Pattern:
|
|
61
66
|
"""Returns compiled regex pattern"""
|
|
62
67
|
expression = ''.join([ #
|
|
68
|
+
cls.directive, #
|
|
63
69
|
cls.key_left, #
|
|
64
70
|
keyword, #
|
|
65
71
|
cls.key_right, #
|
|
@@ -885,7 +885,7 @@ mbler
|
|
|
885
885
|
mean
|
|
886
886
|
measur
|
|
887
887
|
medi
|
|
888
|
-
|
|
888
|
+
medus
|
|
889
889
|
meet
|
|
890
890
|
mem_
|
|
891
891
|
memb
|
|
@@ -925,7 +925,7 @@ month
|
|
|
925
925
|
morp
|
|
926
926
|
mory
|
|
927
927
|
mote
|
|
928
|
-
|
|
928
|
+
motor
|
|
929
929
|
mount
|
|
930
930
|
move
|
|
931
931
|
mpeg
|
|
@@ -1005,6 +1005,7 @@ origin
|
|
|
1005
1005
|
orithm
|
|
1006
1006
|
ormat
|
|
1007
1007
|
orph
|
|
1008
|
+
otorola
|
|
1008
1009
|
ottle
|
|
1009
1010
|
ously
|
|
1010
1011
|
out
|
|
@@ -1485,6 +1486,7 @@ up_
|
|
|
1485
1486
|
updat
|
|
1486
1487
|
upgrade
|
|
1487
1488
|
url
|
|
1489
|
+
usa
|
|
1488
1490
|
usb
|
|
1489
1491
|
use
|
|
1490
1492
|
usin
|
|
@@ -110,7 +110,8 @@ class CredentialManager:
|
|
|
110
110
|
# Match by file path+line num+value. Value required so two different credentials still be
|
|
111
111
|
# processed independently
|
|
112
112
|
candidate_key = CandidateKey(line_data)
|
|
113
|
-
if candidate_key
|
|
114
|
-
groups[candidate_key]
|
|
115
|
-
|
|
113
|
+
if candidate_key in groups:
|
|
114
|
+
groups[candidate_key].append(credential_candidate)
|
|
115
|
+
else:
|
|
116
|
+
groups[candidate_key] = [credential_candidate]
|
|
116
117
|
return groups
|
|
@@ -10,7 +10,6 @@ from colorama import Fore, Style
|
|
|
10
10
|
from credsweeper.common.constants import MAX_LINE_LENGTH, UTF_8, StartEnd, ML_HUNK
|
|
11
11
|
from credsweeper.config import Config
|
|
12
12
|
from credsweeper.utils import Util
|
|
13
|
-
from credsweeper.utils.entropy_validator import EntropyValidator
|
|
14
13
|
|
|
15
14
|
|
|
16
15
|
class LineData:
|
|
@@ -32,7 +31,7 @@ class LineData:
|
|
|
32
31
|
"""
|
|
33
32
|
|
|
34
33
|
quotation_marks = ('"', "'", '`')
|
|
35
|
-
comment_starts = ("//", "* ", "#", "/*", "<!––", "%{", "%", "...", "(*", "--", "--[[", "#=")
|
|
34
|
+
comment_starts = ("//", "* ", "# ", "/*", "<!––", "%{", "%", "...", "(*", "--", "--[[", "#=")
|
|
36
35
|
bash_param_split = re.compile("\\s+(\\-|\\||\\>|\\w+?\\>|\\&)")
|
|
37
36
|
line_endings = re.compile(r"\\{1,8}[nr]")
|
|
38
37
|
# https://en.wikipedia.org/wiki/Percent-encoding
|
|
@@ -87,8 +86,9 @@ class LineData:
|
|
|
87
86
|
self.url_part = False
|
|
88
87
|
self.wrap = None
|
|
89
88
|
self._3d_escaped_separator = False
|
|
90
|
-
|
|
91
89
|
self.initialize(match_obj)
|
|
90
|
+
# the line is very useful for debug breakpoint
|
|
91
|
+
pass # pylint: disable=W0107
|
|
92
92
|
|
|
93
93
|
def compare(self, other: 'LineData') -> bool:
|
|
94
94
|
"""Comparison method - skip whole line and checks only when variable and value are the same"""
|
|
@@ -327,11 +327,8 @@ class LineData:
|
|
|
327
327
|
True if file require quotation, False otherwise
|
|
328
328
|
|
|
329
329
|
"""
|
|
330
|
-
|
|
331
|
-
|
|
332
|
-
if Util.get_extension(self.path) in self.config.source_quote_ext:
|
|
333
|
-
return True
|
|
334
|
-
return False
|
|
330
|
+
file_type = self.file_type or Util.get_extension(self.path)
|
|
331
|
+
return bool(file_type) and file_type in self.config.source_quote_ext
|
|
335
332
|
|
|
336
333
|
@staticmethod
|
|
337
334
|
def get_hash_or_subtext(
|
|
@@ -373,10 +370,10 @@ class LineData:
|
|
|
373
370
|
def to_str(self, subtext: bool = False, hashed: bool = False) -> str:
|
|
374
371
|
"""Represent line_data with subtext or|and hashed values"""
|
|
375
372
|
cut_pos = StartEnd(self.variable_start, self.value_end) if subtext else None
|
|
376
|
-
return f"
|
|
377
|
-
f" | line_num: {self.line_num}
|
|
373
|
+
return f"path: {self.path}" \
|
|
374
|
+
f" | line_num: {self.line_num}" \
|
|
378
375
|
f" | value: '{self.get_hash_or_subtext(self.value, hashed)}'" \
|
|
379
|
-
f" |
|
|
376
|
+
f" | line: '{self.get_hash_or_subtext(self.line, hashed, cut_pos)}'"
|
|
380
377
|
|
|
381
378
|
def __str__(self):
|
|
382
379
|
return self.to_str()
|
|
@@ -393,6 +390,10 @@ class LineData:
|
|
|
393
390
|
"""
|
|
394
391
|
cut_pos = StartEnd(self.variable_start if 0 <= self.variable_start else self.value_start,
|
|
395
392
|
self.value_end) if subtext else None
|
|
393
|
+
if isinstance(self.value, str):
|
|
394
|
+
entropy = round(Util.get_shannon_entropy(self.value), 5)
|
|
395
|
+
else:
|
|
396
|
+
entropy = None
|
|
396
397
|
full_output = {
|
|
397
398
|
"key": self.key,
|
|
398
399
|
"line": self.get_hash_or_subtext(self.line, hashed, cut_pos),
|
|
@@ -401,18 +402,18 @@ class LineData:
|
|
|
401
402
|
# info may contain variable name - so let it be hashed if requested
|
|
402
403
|
"info": self.get_hash_or_subtext(self.info, hashed),
|
|
403
404
|
"pattern": self.pattern.pattern,
|
|
405
|
+
"variable": self.get_hash_or_subtext(self.variable, hashed),
|
|
406
|
+
"variable_start": self.variable_start,
|
|
407
|
+
"variable_end": self.variable_end,
|
|
404
408
|
"separator": self.separator,
|
|
405
409
|
"separator_start": self.separator_start,
|
|
406
410
|
"separator_end": self.separator_end,
|
|
407
411
|
"value": self.get_hash_or_subtext(self.value, hashed),
|
|
408
412
|
"value_start": self.value_start,
|
|
409
413
|
"value_end": self.value_end,
|
|
410
|
-
"
|
|
411
|
-
"variable_start": self.variable_start,
|
|
412
|
-
"variable_end": self.variable_end,
|
|
414
|
+
"entropy": entropy,
|
|
413
415
|
"value_leftquote": self.value_leftquote,
|
|
414
416
|
"value_rightquote": self.value_rightquote,
|
|
415
|
-
"entropy_validation": EntropyValidator(self.value).to_dict()
|
|
416
417
|
}
|
|
417
418
|
reported_output = {k: v for k, v in full_output.items() if k in self.config.line_data_output}
|
|
418
419
|
return reported_output
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
from abc import abstractmethod, ABC
|
|
2
|
-
from typing import List
|
|
2
|
+
from typing import List, Optional
|
|
3
3
|
|
|
4
4
|
from credsweeper.config import Config
|
|
5
5
|
from credsweeper.credentials import Candidate
|
|
@@ -40,3 +40,12 @@ class AbstractScanner(ABC):
|
|
|
40
40
|
recursive_limit_size: int) -> List[Candidate]:
|
|
41
41
|
"""Abstract method to be defined in DeepScanner"""
|
|
42
42
|
raise NotImplementedError(__name__)
|
|
43
|
+
|
|
44
|
+
@abstractmethod
|
|
45
|
+
def data_scan(
|
|
46
|
+
self, #
|
|
47
|
+
data_provider: DataContentProvider, #
|
|
48
|
+
depth: int, #
|
|
49
|
+
recursive_limit_size: int) -> Optional[List[Candidate]]:
|
|
50
|
+
"""Abstract method to be defined in DeepScanner"""
|
|
51
|
+
raise NotImplementedError(__name__)
|
|
@@ -0,0 +1,48 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
from abc import ABC
|
|
3
|
+
from typing import List, Optional
|
|
4
|
+
|
|
5
|
+
from credsweeper.common.constants import ASCII, MIN_DATA_LEN
|
|
6
|
+
from credsweeper.credentials import Candidate
|
|
7
|
+
from credsweeper.deep_scanner.abstract_scanner import AbstractScanner
|
|
8
|
+
from credsweeper.file_handler.data_content_provider import DataContentProvider
|
|
9
|
+
|
|
10
|
+
logger = logging.getLogger(__name__)
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class DebScanner(AbstractScanner, ABC):
|
|
14
|
+
"""Implements deb (ar) scanning"""
|
|
15
|
+
|
|
16
|
+
def data_scan(
|
|
17
|
+
self, #
|
|
18
|
+
data_provider: DataContentProvider, #
|
|
19
|
+
depth: int, #
|
|
20
|
+
recursive_limit_size: int) -> Optional[List[Candidate]]:
|
|
21
|
+
"""Extracts data file from .ar (debian) archive and launches data_scan"""
|
|
22
|
+
candidates: Optional[List[Candidate]] = None
|
|
23
|
+
offset = 8 # b"!<arch>\n"
|
|
24
|
+
while offset < len(data_provider.data):
|
|
25
|
+
try:
|
|
26
|
+
file_size_data = data_provider.data[offset + 48:offset + 58]
|
|
27
|
+
file_size = int(file_size_data.decode(ASCII))
|
|
28
|
+
offset += 60
|
|
29
|
+
if file_size < MIN_DATA_LEN:
|
|
30
|
+
offset += file_size
|
|
31
|
+
continue
|
|
32
|
+
data = data_provider.data[offset:offset + file_size]
|
|
33
|
+
deb_content_provider = DataContentProvider(data=data,
|
|
34
|
+
file_path=data_provider.file_path,
|
|
35
|
+
file_type=data_provider.file_type,
|
|
36
|
+
info=f"{data_provider.info}|DEB:0x{offset:x}")
|
|
37
|
+
new_limit = recursive_limit_size - file_size
|
|
38
|
+
deb_candidates = self.recursive_scan(deb_content_provider, depth, new_limit)
|
|
39
|
+
if deb_candidates is not None:
|
|
40
|
+
if candidates:
|
|
41
|
+
candidates.extend(deb_candidates)
|
|
42
|
+
else:
|
|
43
|
+
candidates = deb_candidates
|
|
44
|
+
# data padding = 2
|
|
45
|
+
offset += 1 + file_size if 1 & file_size else file_size
|
|
46
|
+
except Exception as exc:
|
|
47
|
+
logger.error(exc)
|
|
48
|
+
return candidates
|