credsweeper 1.10.8__tar.gz → 1.11.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of credsweeper might be problematic. Click here for more details.
- {credsweeper-1.10.8 → credsweeper-1.11.1}/PKG-INFO +1 -1
- {credsweeper-1.10.8 → credsweeper-1.11.1}/credsweeper/__init__.py +1 -1
- {credsweeper-1.10.8 → credsweeper-1.11.1}/credsweeper/common/morpheme_checklist.txt +2 -0
- {credsweeper-1.10.8 → credsweeper-1.11.1}/credsweeper/deep_scanner/deep_scanner.py +5 -0
- credsweeper-1.11.1/credsweeper/deep_scanner/tmx_scanner.py +45 -0
- {credsweeper-1.10.8 → credsweeper-1.11.1}/credsweeper/ml_model/features/__init__.py +3 -1
- credsweeper-1.11.1/credsweeper/ml_model/features/word_in_postamble.py +32 -0
- credsweeper-1.11.1/credsweeper/ml_model/features/word_in_preamble.py +37 -0
- credsweeper-1.10.8/credsweeper/ml_model/features/word_in_line.py → credsweeper-1.11.1/credsweeper/ml_model/features/word_in_transition.py +10 -7
- {credsweeper-1.10.8 → credsweeper-1.11.1}/credsweeper/ml_model/ml_config.json +214 -80
- {credsweeper-1.10.8 → credsweeper-1.11.1}/credsweeper/ml_model/ml_model.onnx +0 -0
- {credsweeper-1.10.8 → credsweeper-1.11.1}/credsweeper/utils/util.py +16 -4
- {credsweeper-1.10.8 → credsweeper-1.11.1}/.gitignore +0 -0
- {credsweeper-1.10.8 → credsweeper-1.11.1}/LICENSE +0 -0
- {credsweeper-1.10.8 → credsweeper-1.11.1}/README.md +0 -0
- {credsweeper-1.10.8 → credsweeper-1.11.1}/credsweeper/__main__.py +0 -0
- {credsweeper-1.10.8 → credsweeper-1.11.1}/credsweeper/app.py +0 -0
- {credsweeper-1.10.8 → credsweeper-1.11.1}/credsweeper/common/__init__.py +0 -0
- {credsweeper-1.10.8 → credsweeper-1.11.1}/credsweeper/common/constants.py +0 -0
- {credsweeper-1.10.8 → credsweeper-1.11.1}/credsweeper/common/keyword_checklist.py +0 -0
- {credsweeper-1.10.8 → credsweeper-1.11.1}/credsweeper/common/keyword_checklist.txt +0 -0
- {credsweeper-1.10.8 → credsweeper-1.11.1}/credsweeper/common/keyword_pattern.py +0 -0
- {credsweeper-1.10.8 → credsweeper-1.11.1}/credsweeper/config/__init__.py +0 -0
- {credsweeper-1.10.8 → credsweeper-1.11.1}/credsweeper/config/config.py +0 -0
- {credsweeper-1.10.8 → credsweeper-1.11.1}/credsweeper/credentials/__init__.py +0 -0
- {credsweeper-1.10.8 → credsweeper-1.11.1}/credsweeper/credentials/augment_candidates.py +0 -0
- {credsweeper-1.10.8 → credsweeper-1.11.1}/credsweeper/credentials/candidate.py +0 -0
- {credsweeper-1.10.8 → credsweeper-1.11.1}/credsweeper/credentials/candidate_group_generator.py +0 -0
- {credsweeper-1.10.8 → credsweeper-1.11.1}/credsweeper/credentials/candidate_key.py +0 -0
- {credsweeper-1.10.8 → credsweeper-1.11.1}/credsweeper/credentials/credential_manager.py +0 -0
- {credsweeper-1.10.8 → credsweeper-1.11.1}/credsweeper/credentials/line_data.py +0 -0
- {credsweeper-1.10.8 → credsweeper-1.11.1}/credsweeper/deep_scanner/__init__.py +0 -0
- {credsweeper-1.10.8 → credsweeper-1.11.1}/credsweeper/deep_scanner/abstract_scanner.py +0 -0
- {credsweeper-1.10.8 → credsweeper-1.11.1}/credsweeper/deep_scanner/byte_scanner.py +0 -0
- {credsweeper-1.10.8 → credsweeper-1.11.1}/credsweeper/deep_scanner/bzip2_scanner.py +0 -0
- {credsweeper-1.10.8 → credsweeper-1.11.1}/credsweeper/deep_scanner/docx_scanner.py +0 -0
- {credsweeper-1.10.8 → credsweeper-1.11.1}/credsweeper/deep_scanner/eml_scanner.py +0 -0
- {credsweeper-1.10.8 → credsweeper-1.11.1}/credsweeper/deep_scanner/encoder_scanner.py +0 -0
- {credsweeper-1.10.8 → credsweeper-1.11.1}/credsweeper/deep_scanner/gzip_scanner.py +0 -0
- {credsweeper-1.10.8 → credsweeper-1.11.1}/credsweeper/deep_scanner/html_scanner.py +0 -0
- {credsweeper-1.10.8 → credsweeper-1.11.1}/credsweeper/deep_scanner/jks_scanner.py +0 -0
- {credsweeper-1.10.8 → credsweeper-1.11.1}/credsweeper/deep_scanner/lang_scanner.py +0 -0
- {credsweeper-1.10.8 → credsweeper-1.11.1}/credsweeper/deep_scanner/mxfile_scanner.py +0 -0
- {credsweeper-1.10.8 → credsweeper-1.11.1}/credsweeper/deep_scanner/pdf_scanner.py +0 -0
- {credsweeper-1.10.8 → credsweeper-1.11.1}/credsweeper/deep_scanner/pkcs12_scanner.py +0 -0
- {credsweeper-1.10.8 → credsweeper-1.11.1}/credsweeper/deep_scanner/pptx_scanner.py +0 -0
- {credsweeper-1.10.8 → credsweeper-1.11.1}/credsweeper/deep_scanner/tar_scanner.py +0 -0
- {credsweeper-1.10.8 → credsweeper-1.11.1}/credsweeper/deep_scanner/xlsx_scanner.py +0 -0
- {credsweeper-1.10.8 → credsweeper-1.11.1}/credsweeper/deep_scanner/xml_scanner.py +0 -0
- {credsweeper-1.10.8 → credsweeper-1.11.1}/credsweeper/deep_scanner/zip_scanner.py +0 -0
- {credsweeper-1.10.8 → credsweeper-1.11.1}/credsweeper/file_handler/__init__.py +0 -0
- {credsweeper-1.10.8 → credsweeper-1.11.1}/credsweeper/file_handler/abstract_provider.py +0 -0
- {credsweeper-1.10.8 → credsweeper-1.11.1}/credsweeper/file_handler/analysis_target.py +0 -0
- {credsweeper-1.10.8 → credsweeper-1.11.1}/credsweeper/file_handler/byte_content_provider.py +0 -0
- {credsweeper-1.10.8 → credsweeper-1.11.1}/credsweeper/file_handler/content_provider.py +0 -0
- {credsweeper-1.10.8 → credsweeper-1.11.1}/credsweeper/file_handler/data_content_provider.py +0 -0
- {credsweeper-1.10.8 → credsweeper-1.11.1}/credsweeper/file_handler/descriptor.py +0 -0
- {credsweeper-1.10.8 → credsweeper-1.11.1}/credsweeper/file_handler/diff_content_provider.py +0 -0
- {credsweeper-1.10.8 → credsweeper-1.11.1}/credsweeper/file_handler/file_path_extractor.py +0 -0
- {credsweeper-1.10.8 → credsweeper-1.11.1}/credsweeper/file_handler/files_provider.py +0 -0
- {credsweeper-1.10.8 → credsweeper-1.11.1}/credsweeper/file_handler/patches_provider.py +0 -0
- {credsweeper-1.10.8 → credsweeper-1.11.1}/credsweeper/file_handler/string_content_provider.py +0 -0
- {credsweeper-1.10.8 → credsweeper-1.11.1}/credsweeper/file_handler/struct_content_provider.py +0 -0
- {credsweeper-1.10.8 → credsweeper-1.11.1}/credsweeper/file_handler/text_content_provider.py +0 -0
- {credsweeper-1.10.8 → credsweeper-1.11.1}/credsweeper/filters/__init__.py +0 -0
- {credsweeper-1.10.8 → credsweeper-1.11.1}/credsweeper/filters/filter.py +0 -0
- {credsweeper-1.10.8 → credsweeper-1.11.1}/credsweeper/filters/group/__init__.py +0 -0
- {credsweeper-1.10.8 → credsweeper-1.11.1}/credsweeper/filters/group/general_keyword.py +0 -0
- {credsweeper-1.10.8 → credsweeper-1.11.1}/credsweeper/filters/group/general_pattern.py +0 -0
- {credsweeper-1.10.8 → credsweeper-1.11.1}/credsweeper/filters/group/group.py +0 -0
- {credsweeper-1.10.8 → credsweeper-1.11.1}/credsweeper/filters/group/password_keyword.py +0 -0
- {credsweeper-1.10.8 → credsweeper-1.11.1}/credsweeper/filters/group/token_pattern.py +0 -0
- {credsweeper-1.10.8 → credsweeper-1.11.1}/credsweeper/filters/group/url_credentials_group.py +0 -0
- {credsweeper-1.10.8 → credsweeper-1.11.1}/credsweeper/filters/group/weird_base36_token.py +0 -0
- {credsweeper-1.10.8 → credsweeper-1.11.1}/credsweeper/filters/group/weird_base64_token.py +0 -0
- {credsweeper-1.10.8 → credsweeper-1.11.1}/credsweeper/filters/line_git_binary_check.py +0 -0
- {credsweeper-1.10.8 → credsweeper-1.11.1}/credsweeper/filters/line_specific_key_check.py +0 -0
- {credsweeper-1.10.8 → credsweeper-1.11.1}/credsweeper/filters/line_uue_part_check.py +0 -0
- {credsweeper-1.10.8 → credsweeper-1.11.1}/credsweeper/filters/value_allowlist_check.py +0 -0
- {credsweeper-1.10.8 → credsweeper-1.11.1}/credsweeper/filters/value_array_dictionary_check.py +0 -0
- {credsweeper-1.10.8 → credsweeper-1.11.1}/credsweeper/filters/value_atlassian_token_check.py +0 -0
- {credsweeper-1.10.8 → credsweeper-1.11.1}/credsweeper/filters/value_azure_token_check.py +0 -0
- {credsweeper-1.10.8 → credsweeper-1.11.1}/credsweeper/filters/value_base32_data_check.py +0 -0
- {credsweeper-1.10.8 → credsweeper-1.11.1}/credsweeper/filters/value_base64_data_check.py +0 -0
- {credsweeper-1.10.8 → credsweeper-1.11.1}/credsweeper/filters/value_base64_encoded_pem_check.py +0 -0
- {credsweeper-1.10.8 → credsweeper-1.11.1}/credsweeper/filters/value_base64_key_check.py +0 -0
- {credsweeper-1.10.8 → credsweeper-1.11.1}/credsweeper/filters/value_base64_part_check.py +0 -0
- {credsweeper-1.10.8 → credsweeper-1.11.1}/credsweeper/filters/value_blocklist_check.py +0 -0
- {credsweeper-1.10.8 → credsweeper-1.11.1}/credsweeper/filters/value_camel_case_check.py +0 -0
- {credsweeper-1.10.8 → credsweeper-1.11.1}/credsweeper/filters/value_couple_keyword_check.py +0 -0
- {credsweeper-1.10.8 → credsweeper-1.11.1}/credsweeper/filters/value_dictionary_keyword_check.py +0 -0
- {credsweeper-1.10.8 → credsweeper-1.11.1}/credsweeper/filters/value_dictionary_value_length_check.py +0 -0
- {credsweeper-1.10.8 → credsweeper-1.11.1}/credsweeper/filters/value_discord_bot_check.py +0 -0
- {credsweeper-1.10.8 → credsweeper-1.11.1}/credsweeper/filters/value_entropy_base32_check.py +0 -0
- {credsweeper-1.10.8 → credsweeper-1.11.1}/credsweeper/filters/value_entropy_base36_check.py +0 -0
- {credsweeper-1.10.8 → credsweeper-1.11.1}/credsweeper/filters/value_entropy_base64_check.py +0 -0
- {credsweeper-1.10.8 → credsweeper-1.11.1}/credsweeper/filters/value_file_path_check.py +0 -0
- {credsweeper-1.10.8 → credsweeper-1.11.1}/credsweeper/filters/value_first_word_check.py +0 -0
- {credsweeper-1.10.8 → credsweeper-1.11.1}/credsweeper/filters/value_github_check.py +0 -0
- {credsweeper-1.10.8 → credsweeper-1.11.1}/credsweeper/filters/value_grafana_check.py +0 -0
- {credsweeper-1.10.8 → credsweeper-1.11.1}/credsweeper/filters/value_grafana_service_check.py +0 -0
- {credsweeper-1.10.8 → credsweeper-1.11.1}/credsweeper/filters/value_hex_number_check.py +0 -0
- {credsweeper-1.10.8 → credsweeper-1.11.1}/credsweeper/filters/value_jfrog_token_check.py +0 -0
- {credsweeper-1.10.8 → credsweeper-1.11.1}/credsweeper/filters/value_json_web_token_check.py +0 -0
- {credsweeper-1.10.8 → credsweeper-1.11.1}/credsweeper/filters/value_last_word_check.py +0 -0
- {credsweeper-1.10.8 → credsweeper-1.11.1}/credsweeper/filters/value_method_check.py +0 -0
- {credsweeper-1.10.8 → credsweeper-1.11.1}/credsweeper/filters/value_not_allowed_pattern_check.py +0 -0
- {credsweeper-1.10.8 → credsweeper-1.11.1}/credsweeper/filters/value_not_part_encoded_check.py +0 -0
- {credsweeper-1.10.8 → credsweeper-1.11.1}/credsweeper/filters/value_number_check.py +0 -0
- {credsweeper-1.10.8 → credsweeper-1.11.1}/credsweeper/filters/value_pattern_check.py +0 -0
- {credsweeper-1.10.8 → credsweeper-1.11.1}/credsweeper/filters/value_similarity_check.py +0 -0
- {credsweeper-1.10.8 → credsweeper-1.11.1}/credsweeper/filters/value_split_keyword_check.py +0 -0
- {credsweeper-1.10.8 → credsweeper-1.11.1}/credsweeper/filters/value_string_type_check.py +0 -0
- {credsweeper-1.10.8 → credsweeper-1.11.1}/credsweeper/filters/value_token_base32_check.py +0 -0
- {credsweeper-1.10.8 → credsweeper-1.11.1}/credsweeper/filters/value_token_base36_check.py +0 -0
- {credsweeper-1.10.8 → credsweeper-1.11.1}/credsweeper/filters/value_token_base64_check.py +0 -0
- {credsweeper-1.10.8 → credsweeper-1.11.1}/credsweeper/filters/value_token_base_check.py +0 -0
- {credsweeper-1.10.8 → credsweeper-1.11.1}/credsweeper/filters/value_token_check.py +0 -0
- {credsweeper-1.10.8 → credsweeper-1.11.1}/credsweeper/logger/__init__.py +0 -0
- {credsweeper-1.10.8 → credsweeper-1.11.1}/credsweeper/logger/logger.py +0 -0
- {credsweeper-1.10.8 → credsweeper-1.11.1}/credsweeper/ml_model/__init__.py +0 -0
- {credsweeper-1.10.8 → credsweeper-1.11.1}/credsweeper/ml_model/features/entropy_evaluation.py +0 -0
- {credsweeper-1.10.8 → credsweeper-1.11.1}/credsweeper/ml_model/features/feature.py +0 -0
- {credsweeper-1.10.8 → credsweeper-1.11.1}/credsweeper/ml_model/features/file_extension.py +0 -0
- {credsweeper-1.10.8 → credsweeper-1.11.1}/credsweeper/ml_model/features/has_html_tag.py +0 -0
- {credsweeper-1.10.8 → credsweeper-1.11.1}/credsweeper/ml_model/features/is_secret_numeric.py +0 -0
- {credsweeper-1.10.8 → credsweeper-1.11.1}/credsweeper/ml_model/features/length_of_attribute.py +0 -0
- {credsweeper-1.10.8 → credsweeper-1.11.1}/credsweeper/ml_model/features/morpheme_dense.py +0 -0
- {credsweeper-1.10.8 → credsweeper-1.11.1}/credsweeper/ml_model/features/rule_name.py +0 -0
- {credsweeper-1.10.8 → credsweeper-1.11.1}/credsweeper/ml_model/features/search_in_attribute.py +0 -0
- {credsweeper-1.10.8 → credsweeper-1.11.1}/credsweeper/ml_model/features/word_in.py +0 -0
- {credsweeper-1.10.8 → credsweeper-1.11.1}/credsweeper/ml_model/features/word_in_path.py +0 -0
- {credsweeper-1.10.8 → credsweeper-1.11.1}/credsweeper/ml_model/features/word_in_value.py +0 -0
- {credsweeper-1.10.8 → credsweeper-1.11.1}/credsweeper/ml_model/features/word_in_variable.py +0 -0
- {credsweeper-1.10.8 → credsweeper-1.11.1}/credsweeper/ml_model/ml_validator.py +0 -0
- {credsweeper-1.10.8 → credsweeper-1.11.1}/credsweeper/py.typed +0 -0
- {credsweeper-1.10.8 → credsweeper-1.11.1}/credsweeper/rules/__init__.py +0 -0
- {credsweeper-1.10.8 → credsweeper-1.11.1}/credsweeper/rules/config.yaml +0 -0
- {credsweeper-1.10.8 → credsweeper-1.11.1}/credsweeper/rules/rule.py +0 -0
- {credsweeper-1.10.8 → credsweeper-1.11.1}/credsweeper/scanner/__init__.py +0 -0
- {credsweeper-1.10.8 → credsweeper-1.11.1}/credsweeper/scanner/scan_type/__init__.py +0 -0
- {credsweeper-1.10.8 → credsweeper-1.11.1}/credsweeper/scanner/scan_type/multi_pattern.py +0 -0
- {credsweeper-1.10.8 → credsweeper-1.11.1}/credsweeper/scanner/scan_type/pem_key_pattern.py +0 -0
- {credsweeper-1.10.8 → credsweeper-1.11.1}/credsweeper/scanner/scan_type/scan_type.py +0 -0
- {credsweeper-1.10.8 → credsweeper-1.11.1}/credsweeper/scanner/scan_type/single_pattern.py +0 -0
- {credsweeper-1.10.8 → credsweeper-1.11.1}/credsweeper/scanner/scanner.py +0 -0
- {credsweeper-1.10.8 → credsweeper-1.11.1}/credsweeper/secret/config.json +0 -0
- {credsweeper-1.10.8 → credsweeper-1.11.1}/credsweeper/secret/log.yaml +0 -0
- {credsweeper-1.10.8 → credsweeper-1.11.1}/credsweeper/utils/__init__.py +0 -0
- {credsweeper-1.10.8 → credsweeper-1.11.1}/credsweeper/utils/entropy_validator.py +0 -0
- {credsweeper-1.10.8 → credsweeper-1.11.1}/credsweeper/utils/hop_stat.py +0 -0
- {credsweeper-1.10.8 → credsweeper-1.11.1}/credsweeper/utils/pem_key_detector.py +0 -0
- {credsweeper-1.10.8 → credsweeper-1.11.1}/pyproject.toml +0 -0
|
@@ -28,6 +28,7 @@ from .pdf_scanner import PdfScanner
|
|
|
28
28
|
from .pkcs12_scanner import Pkcs12Scanner
|
|
29
29
|
from .pptx_scanner import PptxScanner
|
|
30
30
|
from .tar_scanner import TarScanner
|
|
31
|
+
from .tmx_scanner import TmxScanner
|
|
31
32
|
from .xlsx_scanner import XlsxScanner
|
|
32
33
|
from .xml_scanner import XmlScanner
|
|
33
34
|
from .zip_scanner import ZipScanner
|
|
@@ -126,6 +127,10 @@ class DeepScanner(
|
|
|
126
127
|
deep_scanners.append(MxfileScanner)
|
|
127
128
|
deep_scanners.append(XmlScanner)
|
|
128
129
|
fallback_scanners.append(ByteScanner)
|
|
130
|
+
elif Util.is_tmx(data):
|
|
131
|
+
deep_scanners.append(TmxScanner)
|
|
132
|
+
fallback_scanners.append(XmlScanner)
|
|
133
|
+
fallback_scanners.append(ByteScanner)
|
|
129
134
|
else:
|
|
130
135
|
deep_scanners.append(XmlScanner)
|
|
131
136
|
fallback_scanners.append(ByteScanner)
|
|
@@ -0,0 +1,45 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
from abc import ABC
|
|
3
|
+
from typing import List, Optional
|
|
4
|
+
|
|
5
|
+
from lxml import etree
|
|
6
|
+
|
|
7
|
+
from credsweeper.common.constants import MIN_DATA_LEN
|
|
8
|
+
from credsweeper.credentials import Candidate
|
|
9
|
+
from credsweeper.deep_scanner.abstract_scanner import AbstractScanner
|
|
10
|
+
from credsweeper.file_handler.data_content_provider import DataContentProvider
|
|
11
|
+
from credsweeper.file_handler.string_content_provider import StringContentProvider
|
|
12
|
+
from credsweeper.utils import Util
|
|
13
|
+
|
|
14
|
+
logger = logging.getLogger(__name__)
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
class TmxScanner(AbstractScanner, ABC):
|
|
18
|
+
"""Realises tmX files scanning for values only. Image tags are skipped."""
|
|
19
|
+
|
|
20
|
+
def data_scan(
|
|
21
|
+
self, #
|
|
22
|
+
data_provider: DataContentProvider, #
|
|
23
|
+
depth: int, #
|
|
24
|
+
recursive_limit_size: int) -> Optional[List[Candidate]]:
|
|
25
|
+
"""Tries to represent data as xml text and scan as text lines"""
|
|
26
|
+
try:
|
|
27
|
+
lines = []
|
|
28
|
+
# the format is always in single line xlm, so line numbers are not actual
|
|
29
|
+
tree = etree.fromstring(data_provider.data)
|
|
30
|
+
for element in tree.iter():
|
|
31
|
+
tag = Util.extract_element_data(element, "tag")
|
|
32
|
+
if "Image" in tag:
|
|
33
|
+
continue
|
|
34
|
+
text = Util.extract_element_data(element, "text")
|
|
35
|
+
if MIN_DATA_LEN > len(text):
|
|
36
|
+
continue
|
|
37
|
+
lines.append(text)
|
|
38
|
+
tmx_data_provider = StringContentProvider(lines=lines,
|
|
39
|
+
file_path=data_provider.file_path,
|
|
40
|
+
file_type=data_provider.file_type,
|
|
41
|
+
info=f"{data_provider.info}|TMX")
|
|
42
|
+
return self.scanner.scan(tmx_data_provider)
|
|
43
|
+
except Exception as exc:
|
|
44
|
+
logger.warning("Cannot processed tmX file %s %s", str(data_provider.file_path), str(exc))
|
|
45
|
+
return None
|
|
@@ -6,7 +6,9 @@ from credsweeper.ml_model.features.length_of_attribute import LengthOfAttribute
|
|
|
6
6
|
from credsweeper.ml_model.features.morpheme_dense import MorphemeDense
|
|
7
7
|
from credsweeper.ml_model.features.rule_name import RuleName
|
|
8
8
|
from credsweeper.ml_model.features.search_in_attribute import SearchInAttribute
|
|
9
|
-
from credsweeper.ml_model.features.word_in_line import WordInLine
|
|
10
9
|
from credsweeper.ml_model.features.word_in_path import WordInPath
|
|
10
|
+
from credsweeper.ml_model.features.word_in_postamble import WordInPostamble
|
|
11
|
+
from credsweeper.ml_model.features.word_in_preamble import WordInPreamble
|
|
12
|
+
from credsweeper.ml_model.features.word_in_transition import WordInTransition
|
|
11
13
|
from credsweeper.ml_model.features.word_in_value import WordInValue
|
|
12
14
|
from credsweeper.ml_model.features.word_in_variable import WordInVariable
|
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
from typing import List
|
|
2
|
+
|
|
3
|
+
import numpy as np
|
|
4
|
+
|
|
5
|
+
from credsweeper.common.constants import ML_HUNK
|
|
6
|
+
from credsweeper.credentials import Candidate
|
|
7
|
+
from credsweeper.ml_model.features.word_in import WordIn
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class WordInPostamble(WordIn):
|
|
11
|
+
"""Feature is true if line contains at least one word from predefined list."""
|
|
12
|
+
|
|
13
|
+
def __init__(self, words: List[str]) -> None:
|
|
14
|
+
"""Feature returns array of matching words
|
|
15
|
+
|
|
16
|
+
Args:
|
|
17
|
+
words: list of predefined words - MUST BE IN LOWER CASE
|
|
18
|
+
|
|
19
|
+
"""
|
|
20
|
+
super().__init__(words)
|
|
21
|
+
|
|
22
|
+
def extract(self, candidate: Candidate) -> np.ndarray:
|
|
23
|
+
"""Returns true if any words in a part of line after value"""
|
|
24
|
+
postamble_end = len(candidate.line_data_list[0].line) \
|
|
25
|
+
if len(candidate.line_data_list[0].line) < candidate.line_data_list[0].value_end + ML_HUNK \
|
|
26
|
+
else candidate.line_data_list[0].value_end + ML_HUNK
|
|
27
|
+
postamble = candidate.line_data_list[0].line[candidate.line_data_list[0].value_end:postamble_end].strip()
|
|
28
|
+
|
|
29
|
+
if postamble:
|
|
30
|
+
return self.word_in_str(postamble.lower())
|
|
31
|
+
else:
|
|
32
|
+
return np.array([np.zeros(shape=[self.dimension], dtype=np.int8)])
|
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
from typing import List
|
|
2
|
+
|
|
3
|
+
import numpy as np
|
|
4
|
+
|
|
5
|
+
from credsweeper.common.constants import ML_HUNK
|
|
6
|
+
from credsweeper.credentials import Candidate
|
|
7
|
+
from credsweeper.ml_model.features.word_in import WordIn
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class WordInPreamble(WordIn):
|
|
11
|
+
"""Feature is true if line contains at least one word from predefined list."""
|
|
12
|
+
|
|
13
|
+
def __init__(self, words: List[str]) -> None:
|
|
14
|
+
"""Feature returns array of matching words
|
|
15
|
+
|
|
16
|
+
Args:
|
|
17
|
+
words: list of predefined words - MUST BE IN LOWER CASE
|
|
18
|
+
|
|
19
|
+
"""
|
|
20
|
+
super().__init__(words)
|
|
21
|
+
|
|
22
|
+
def extract(self, candidate: Candidate) -> np.ndarray:
|
|
23
|
+
"""Returns true if any words in line before variable or value"""
|
|
24
|
+
if 0 <= candidate.line_data_list[0].variable_start:
|
|
25
|
+
preamble_start = 0 if ML_HUNK >= candidate.line_data_list[0].variable_start \
|
|
26
|
+
else candidate.line_data_list[0].variable_start - ML_HUNK
|
|
27
|
+
preamble = candidate.line_data_list[0].line[preamble_start:candidate.line_data_list[0].
|
|
28
|
+
variable_start].strip()
|
|
29
|
+
else:
|
|
30
|
+
preamble_start = 0 if ML_HUNK >= candidate.line_data_list[0].value_start \
|
|
31
|
+
else candidate.line_data_list[0].value_start - ML_HUNK
|
|
32
|
+
preamble = candidate.line_data_list[0].line[preamble_start:candidate.line_data_list[0].value_start].strip()
|
|
33
|
+
|
|
34
|
+
if preamble:
|
|
35
|
+
return self.word_in_str(preamble.lower())
|
|
36
|
+
else:
|
|
37
|
+
return np.array([np.zeros(shape=[self.dimension], dtype=np.int8)])
|
|
@@ -2,13 +2,11 @@ from typing import List
|
|
|
2
2
|
|
|
3
3
|
import numpy as np
|
|
4
4
|
|
|
5
|
-
from credsweeper.common.constants import CHUNK_SIZE
|
|
6
5
|
from credsweeper.credentials import Candidate
|
|
7
6
|
from credsweeper.ml_model.features.word_in import WordIn
|
|
8
|
-
from credsweeper.utils import Util
|
|
9
7
|
|
|
10
8
|
|
|
11
|
-
class
|
|
9
|
+
class WordInTransition(WordIn):
|
|
12
10
|
"""Feature is true if line contains at least one word from predefined list."""
|
|
13
11
|
|
|
14
12
|
def __init__(self, words: List[str]) -> None:
|
|
@@ -21,9 +19,14 @@ class WordInLine(WordIn):
|
|
|
21
19
|
super().__init__(words)
|
|
22
20
|
|
|
23
21
|
def extract(self, candidate: Candidate) -> np.ndarray:
|
|
24
|
-
"""Returns true if any words
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
22
|
+
"""Returns true if any words between variable and value"""
|
|
23
|
+
if 0 <= candidate.line_data_list[0].variable_end < candidate.line_data_list[0].value_start:
|
|
24
|
+
transition = candidate.line_data_list[0].line[candidate.line_data_list[0].variable_end:candidate.
|
|
25
|
+
line_data_list[0].value_start].strip()
|
|
26
|
+
else:
|
|
27
|
+
transition = ''
|
|
28
|
+
|
|
29
|
+
if transition:
|
|
30
|
+
return self.word_in_str(transition.lower())
|
|
28
31
|
else:
|
|
29
32
|
return np.array([np.zeros(shape=[self.dimension], dtype=np.int8)])
|
|
@@ -70,6 +70,38 @@
|
|
|
70
70
|
"attribute": "value"
|
|
71
71
|
}
|
|
72
72
|
},
|
|
73
|
+
{
|
|
74
|
+
"type": "SearchInAttribute",
|
|
75
|
+
"comment": "camelStyle naming detection",
|
|
76
|
+
"kwargs": {
|
|
77
|
+
"pattern": "^[a-z][a-z]{1,16}[0-9]*([A-Z]([a-z]{1,16}[0-9]*|[0-9]{1,16})){1,8}$",
|
|
78
|
+
"attribute": "value"
|
|
79
|
+
}
|
|
80
|
+
},
|
|
81
|
+
{
|
|
82
|
+
"type": "SearchInAttribute",
|
|
83
|
+
"comment": "PascalStyle naming detection",
|
|
84
|
+
"kwargs": {
|
|
85
|
+
"pattern": "^([A-Z]([a-z]{1,16}[0-9]*|[0-9]{1,16})){1,8}$",
|
|
86
|
+
"attribute": "value"
|
|
87
|
+
}
|
|
88
|
+
},
|
|
89
|
+
{
|
|
90
|
+
"type": "SearchInAttribute",
|
|
91
|
+
"comment": "UPPERCASE naming detection",
|
|
92
|
+
"kwargs": {
|
|
93
|
+
"pattern": "^(_+[0-9]{1,16}|_*[A-Z]{1,16}[0-9]*)(_+([0-9]{1,16}|[A-Z]{1,16}[0-9]*)){1,8}_*$",
|
|
94
|
+
"attribute": "value"
|
|
95
|
+
}
|
|
96
|
+
},
|
|
97
|
+
{
|
|
98
|
+
"type": "SearchInAttribute",
|
|
99
|
+
"comment": "lowercase naming detection",
|
|
100
|
+
"kwargs": {
|
|
101
|
+
"pattern": "^(_+[0-9]{1,16}|_*[a-z]{1,16}[0-9]*)(_+([0-9]{1,16}|[a-z]{1,16}[0-9]*)){1,8}_*$",
|
|
102
|
+
"attribute": "value"
|
|
103
|
+
}
|
|
104
|
+
},
|
|
73
105
|
{
|
|
74
106
|
"type": "SearchInAttribute",
|
|
75
107
|
"comment": "VariableNotAllowedPatternCheck",
|
|
@@ -82,7 +114,7 @@
|
|
|
82
114
|
"type": "SearchInAttribute",
|
|
83
115
|
"comment": "VariableNotAllowedNameCheck",
|
|
84
116
|
"kwargs": {
|
|
85
|
-
"pattern": "(?i:pub(lic)?_?key)",
|
|
117
|
+
"pattern": "(?i:(filters?|pub(lic)?)_?key)",
|
|
86
118
|
"attribute": "variable"
|
|
87
119
|
}
|
|
88
120
|
},
|
|
@@ -90,7 +122,15 @@
|
|
|
90
122
|
"type": "SearchInAttribute",
|
|
91
123
|
"comment": "VariableNotAllowedNameCheck",
|
|
92
124
|
"kwargs": {
|
|
93
|
-
"pattern": "(?i:
|
|
125
|
+
"pattern": "(?i:(id|size|name|type|manager)$)",
|
|
126
|
+
"attribute": "variable"
|
|
127
|
+
}
|
|
128
|
+
},
|
|
129
|
+
{
|
|
130
|
+
"type": "SearchInAttribute",
|
|
131
|
+
"comment": "PWD invocation",
|
|
132
|
+
"kwargs": {
|
|
133
|
+
"pattern": "(?i:(^\\$pwd$)|(^\\$\\{#?pwd[^}]*\\}$)|(^\\$\\(pwd\\)$)|(^`pwd`$))",
|
|
94
134
|
"attribute": "variable"
|
|
95
135
|
}
|
|
96
136
|
},
|
|
@@ -98,44 +138,55 @@
|
|
|
98
138
|
"type": "WordInVariable",
|
|
99
139
|
"kwargs": {
|
|
100
140
|
"words": [
|
|
101
|
-
"/",
|
|
102
141
|
" ",
|
|
142
|
+
"/",
|
|
103
143
|
"_at",
|
|
104
144
|
"_id",
|
|
105
|
-
"
|
|
145
|
+
"_len",
|
|
146
|
+
"access",
|
|
147
|
+
"cache",
|
|
148
|
+
"client",
|
|
149
|
+
"control",
|
|
150
|
+
"encrypted",
|
|
151
|
+
"example",
|
|
152
|
+
"expire",
|
|
153
|
+
"fake",
|
|
106
154
|
"file",
|
|
107
|
-
"
|
|
155
|
+
"filter",
|
|
156
|
+
"fingerprint",
|
|
108
157
|
"hash",
|
|
158
|
+
"key",
|
|
159
|
+
"label",
|
|
160
|
+
"length",
|
|
161
|
+
"manager",
|
|
162
|
+
"mock",
|
|
109
163
|
"name",
|
|
164
|
+
"native",
|
|
165
|
+
"obj",
|
|
166
|
+
"option",
|
|
167
|
+
"p/w",
|
|
168
|
+
"parameter",
|
|
169
|
+
"pass",
|
|
170
|
+
"path",
|
|
171
|
+
"project",
|
|
172
|
+
"public",
|
|
173
|
+
"pw",
|
|
174
|
+
"secret",
|
|
175
|
+
"size",
|
|
176
|
+
"space",
|
|
177
|
+
"status",
|
|
178
|
+
"sword",
|
|
179
|
+
"temp",
|
|
110
180
|
"test",
|
|
181
|
+
"thumbprint",
|
|
111
182
|
"time",
|
|
112
|
-
"
|
|
183
|
+
"timestamp",
|
|
184
|
+
"title",
|
|
185
|
+
"token",
|
|
113
186
|
"type",
|
|
114
|
-
"mock",
|
|
115
|
-
"size",
|
|
116
187
|
"uniq",
|
|
117
|
-
"fake",
|
|
118
|
-
"view",
|
|
119
|
-
"cache",
|
|
120
188
|
"valid",
|
|
121
|
-
"
|
|
122
|
-
"title",
|
|
123
|
-
"access",
|
|
124
|
-
"space",
|
|
125
|
-
"filter",
|
|
126
|
-
"native",
|
|
127
|
-
"status",
|
|
128
|
-
"expire",
|
|
129
|
-
"client",
|
|
130
|
-
"option",
|
|
131
|
-
"public",
|
|
132
|
-
"project",
|
|
133
|
-
"control",
|
|
134
|
-
"parameter",
|
|
135
|
-
"encrypted",
|
|
136
|
-
"timestamp",
|
|
137
|
-
"thumbprint",
|
|
138
|
-
"fingerprint"
|
|
189
|
+
"view"
|
|
139
190
|
]
|
|
140
191
|
}
|
|
141
192
|
},
|
|
@@ -144,76 +195,154 @@
|
|
|
144
195
|
"kwargs": {
|
|
145
196
|
"words": [
|
|
146
197
|
" ",
|
|
198
|
+
"$(",
|
|
199
|
+
"${",
|
|
147
200
|
"(",
|
|
148
|
-
"[",
|
|
149
|
-
".",
|
|
150
201
|
"->",
|
|
151
|
-
"
|
|
152
|
-
"$(",
|
|
202
|
+
".",
|
|
153
203
|
"...",
|
|
154
|
-
"foo",
|
|
155
|
-
"bar",
|
|
156
204
|
"123",
|
|
205
|
+
"<",
|
|
206
|
+
">",
|
|
207
|
+
"[",
|
|
208
|
+
"_id",
|
|
157
209
|
"abc",
|
|
158
|
-
"
|
|
159
|
-
"
|
|
160
|
-
"
|
|
161
|
-
"
|
|
162
|
-
"
|
|
163
|
-
"
|
|
210
|
+
"allow",
|
|
211
|
+
"bar",
|
|
212
|
+
"disable",
|
|
213
|
+
"changeme",
|
|
214
|
+
"example",
|
|
215
|
+
"fake",
|
|
216
|
+
"file",
|
|
217
|
+
"foo",
|
|
218
|
+
"min",
|
|
219
|
+
"mock",
|
|
220
|
+
"my",
|
|
221
|
+
"nil",
|
|
164
222
|
"pass",
|
|
223
|
+
"passwd",
|
|
165
224
|
"password",
|
|
166
|
-
"
|
|
167
|
-
"null",
|
|
168
|
-
"nil",
|
|
169
|
-
"undefined",
|
|
170
|
-
"none",
|
|
171
|
-
"true",
|
|
172
|
-
"false",
|
|
173
|
-
"example",
|
|
225
|
+
"pswd",
|
|
174
226
|
"public",
|
|
175
|
-
"
|
|
176
|
-
"fake",
|
|
227
|
+
"pwd",
|
|
177
228
|
"test",
|
|
178
|
-
"
|
|
179
|
-
"
|
|
180
|
-
"file",
|
|
181
|
-
"id"
|
|
229
|
+
"xxx",
|
|
230
|
+
"xyz"
|
|
182
231
|
]
|
|
183
232
|
}
|
|
184
233
|
},
|
|
185
234
|
{
|
|
186
|
-
"type": "
|
|
235
|
+
"type": "WordInPreamble",
|
|
187
236
|
"kwargs": {
|
|
188
237
|
"words": [
|
|
238
|
+
"$",
|
|
239
|
+
"%2",
|
|
240
|
+
"%3",
|
|
241
|
+
"&",
|
|
242
|
+
"&",
|
|
189
243
|
"(",
|
|
190
|
-
"
|
|
244
|
+
"->",
|
|
191
245
|
".",
|
|
192
|
-
"$",
|
|
193
246
|
"://",
|
|
247
|
+
"?",
|
|
194
248
|
"@",
|
|
195
|
-
"
|
|
196
|
-
"
|
|
197
|
-
"
|
|
198
|
-
"
|
|
249
|
+
"[",
|
|
250
|
+
"approval",
|
|
251
|
+
"assert",
|
|
252
|
+
"case",
|
|
253
|
+
"circle",
|
|
254
|
+
"equal",
|
|
255
|
+
"example",
|
|
256
|
+
"expect",
|
|
257
|
+
"false",
|
|
258
|
+
"height",
|
|
259
|
+
"image",
|
|
260
|
+
"line",
|
|
261
|
+
"media",
|
|
262
|
+
"nil",
|
|
263
|
+
"none",
|
|
264
|
+
"null",
|
|
199
265
|
"pass",
|
|
200
266
|
"password",
|
|
201
|
-
"
|
|
202
|
-
"
|
|
203
|
-
"
|
|
204
|
-
"
|
|
267
|
+
"path",
|
|
268
|
+
"pwd",
|
|
269
|
+
"sqa",
|
|
270
|
+
"test",
|
|
271
|
+
"true",
|
|
205
272
|
"undefined",
|
|
206
|
-
"none",
|
|
207
273
|
"unit",
|
|
274
|
+
"width"
|
|
275
|
+
]
|
|
276
|
+
}
|
|
277
|
+
},
|
|
278
|
+
{
|
|
279
|
+
"type": "WordInTransition",
|
|
280
|
+
"kwargs": {
|
|
281
|
+
"words": [
|
|
282
|
+
"%2",
|
|
283
|
+
"%3",
|
|
284
|
+
"&",
|
|
285
|
+
"(",
|
|
286
|
+
"->",
|
|
287
|
+
".",
|
|
288
|
+
"?",
|
|
289
|
+
"@",
|
|
290
|
+
"[",
|
|
291
|
+
"bearer",
|
|
292
|
+
"equal",
|
|
293
|
+
"example",
|
|
294
|
+
"expect",
|
|
295
|
+
"line",
|
|
296
|
+
"media",
|
|
297
|
+
"pass",
|
|
298
|
+
"password",
|
|
299
|
+
"path",
|
|
208
300
|
"test",
|
|
209
|
-
"
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
|
|
301
|
+
"unit"
|
|
302
|
+
]
|
|
303
|
+
}
|
|
304
|
+
},
|
|
305
|
+
{
|
|
306
|
+
"type": "WordInPostamble",
|
|
307
|
+
"kwargs": {
|
|
308
|
+
"words": [
|
|
309
|
+
"$",
|
|
310
|
+
"%2",
|
|
311
|
+
"%3",
|
|
312
|
+
"&",
|
|
313
|
+
"&",
|
|
314
|
+
"(",
|
|
315
|
+
"->",
|
|
316
|
+
".",
|
|
317
|
+
"://",
|
|
318
|
+
"?",
|
|
319
|
+
"@",
|
|
320
|
+
"[",
|
|
213
321
|
"assert",
|
|
322
|
+
"case",
|
|
323
|
+
"circle",
|
|
214
324
|
"equal",
|
|
215
325
|
"example",
|
|
216
|
-
"expect"
|
|
326
|
+
"expect",
|
|
327
|
+
"false",
|
|
328
|
+
"height",
|
|
329
|
+
"image",
|
|
330
|
+
"line",
|
|
331
|
+
"media",
|
|
332
|
+
"nil",
|
|
333
|
+
"none",
|
|
334
|
+
"null",
|
|
335
|
+
"pass",
|
|
336
|
+
"passwd",
|
|
337
|
+
"password",
|
|
338
|
+
"path",
|
|
339
|
+
"pwd",
|
|
340
|
+
"sqa",
|
|
341
|
+
"test",
|
|
342
|
+
"true",
|
|
343
|
+
"undefined",
|
|
344
|
+
"unit",
|
|
345
|
+
"width"
|
|
217
346
|
]
|
|
218
347
|
}
|
|
219
348
|
},
|
|
@@ -221,20 +350,22 @@
|
|
|
221
350
|
"type": "WordInPath",
|
|
222
351
|
"kwargs": {
|
|
223
352
|
"words": [
|
|
224
|
-
"/
|
|
225
|
-
"/config",
|
|
226
|
-
"/src/",
|
|
227
|
-
"/record",
|
|
228
|
-
"/usr/local/lib/python",
|
|
353
|
+
"/conf",
|
|
229
354
|
"/dist-packages/",
|
|
355
|
+
"/example",
|
|
356
|
+
"/record",
|
|
357
|
+
"/script",
|
|
230
358
|
"/site-packages/",
|
|
231
|
-
"/
|
|
359
|
+
"/src/",
|
|
360
|
+
"/test",
|
|
361
|
+
"/tool",
|
|
362
|
+
"/usr/local/lib/python",
|
|
363
|
+
"/assets/"
|
|
232
364
|
]
|
|
233
365
|
}
|
|
234
366
|
},
|
|
235
367
|
{
|
|
236
|
-
"type": "MorphemeDense"
|
|
237
|
-
"kwargs": {}
|
|
368
|
+
"type": "MorphemeDense"
|
|
238
369
|
},
|
|
239
370
|
{
|
|
240
371
|
"type": "HasHtmlTag"
|
|
@@ -255,6 +386,7 @@
|
|
|
255
386
|
".bat",
|
|
256
387
|
".bats",
|
|
257
388
|
".bazel",
|
|
389
|
+
".bin",
|
|
258
390
|
".build",
|
|
259
391
|
".bundle",
|
|
260
392
|
".bzl",
|
|
@@ -337,6 +469,7 @@
|
|
|
337
469
|
".nix",
|
|
338
470
|
".nolint",
|
|
339
471
|
".odd",
|
|
472
|
+
".onnx",
|
|
340
473
|
".oracle",
|
|
341
474
|
".pan",
|
|
342
475
|
".patch",
|
|
@@ -396,6 +529,7 @@
|
|
|
396
529
|
".ts",
|
|
397
530
|
".tsx",
|
|
398
531
|
".txt",
|
|
532
|
+
".var",
|
|
399
533
|
".vue",
|
|
400
534
|
".xaml",
|
|
401
535
|
".xib",
|
|
Binary file
|
|
@@ -517,6 +517,18 @@ class Util:
|
|
|
517
517
|
return True
|
|
518
518
|
return False
|
|
519
519
|
|
|
520
|
+
@staticmethod
|
|
521
|
+
def is_tmx(data: Union[bytes, bytearray]) -> bool:
|
|
522
|
+
"""Used to detect tm7,tm6,etc. (ThreadModeling) format."""
|
|
523
|
+
if isinstance(data, (bytes, bytearray)):
|
|
524
|
+
for opening_tag, closing_tag in [(b"<ThreatModel", b"</ThreatModel>"),
|
|
525
|
+
(b"<KnowledgeBase", b"</KnowledgeBase>")]:
|
|
526
|
+
opening_pos = data.find(opening_tag, 0, MAX_LINE_LENGTH)
|
|
527
|
+
if 0 <= opening_pos < data.find(closing_tag, opening_pos):
|
|
528
|
+
# opening and closing tags were found - suppose it is an HTML
|
|
529
|
+
return True
|
|
530
|
+
return False
|
|
531
|
+
|
|
520
532
|
# A well-formed XML must start from < or a whitespace character
|
|
521
533
|
XML_FIRST_BRACKET_PATTERN = re.compile(rb"^\s*<")
|
|
522
534
|
XML_OPENING_TAG_PATTERN = re.compile(rb"<([0-9A-Za-z_]{1,256})")
|
|
@@ -583,14 +595,14 @@ class Util:
|
|
|
583
595
|
line_nums = []
|
|
584
596
|
tree = etree.fromstringlist(xml_lines)
|
|
585
597
|
for element in tree.iter():
|
|
586
|
-
tag = Util.
|
|
587
|
-
text = Util.
|
|
598
|
+
tag = Util.extract_element_data(element, "tag")
|
|
599
|
+
text = Util.extract_element_data(element, "text")
|
|
588
600
|
lines.append(f"{tag} : {text}")
|
|
589
601
|
line_nums.append(element.sourceline)
|
|
590
602
|
return lines, line_nums
|
|
591
603
|
|
|
592
604
|
@staticmethod
|
|
593
|
-
def
|
|
605
|
+
def extract_element_data(element: Any, attr: str) -> str:
|
|
594
606
|
"""Extract xml element data to string.
|
|
595
607
|
|
|
596
608
|
Try to extract the xml data and strip() the string.
|
|
@@ -605,7 +617,7 @@ class Util:
|
|
|
605
617
|
"""
|
|
606
618
|
element_attr: Any = getattr(element, attr)
|
|
607
619
|
if element_attr is None or not isinstance(element_attr, str):
|
|
608
|
-
return
|
|
620
|
+
return ''
|
|
609
621
|
return str(element_attr).strip()
|
|
610
622
|
|
|
611
623
|
@staticmethod
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|