PyPI - credsweeper - Versions diffs - 1.11.2__py3-none-any.whl → 1.11.4__py3-none-any.whl - Mend

credsweeper 1.11.2py3-none-any.whl → 1.11.4py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of credsweeper might be problematic. Click here for more details.

Files changed (73) hide show

credsweeper/__init__.py +1 -1
credsweeper/__main__.py +7 -5
credsweeper/app.py +28 -47
credsweeper/common/constants.py +2 -5
credsweeper/common/keyword_pattern.py +15 -9
credsweeper/common/morpheme_checklist.txt +4 -2
credsweeper/credentials/candidate_key.py +1 -1
credsweeper/credentials/credential_manager.py +4 -3
credsweeper/credentials/line_data.py +16 -15
credsweeper/deep_scanner/abstract_scanner.py +10 -1
credsweeper/deep_scanner/deb_scanner.py +48 -0
credsweeper/deep_scanner/deep_scanner.py +65 -43
credsweeper/deep_scanner/docx_scanner.py +1 -1
credsweeper/deep_scanner/encoder_scanner.py +2 -2
credsweeper/deep_scanner/gzip_scanner.py +1 -1
credsweeper/deep_scanner/html_scanner.py +3 -3
credsweeper/deep_scanner/jks_scanner.py +2 -4
credsweeper/deep_scanner/lang_scanner.py +2 -2
credsweeper/deep_scanner/lzma_scanner.py +40 -0
credsweeper/deep_scanner/pkcs12_scanner.py +3 -5
credsweeper/deep_scanner/xml_scanner.py +2 -2
credsweeper/file_handler/byte_content_provider.py +2 -2
credsweeper/file_handler/content_provider.py +1 -1
credsweeper/file_handler/data_content_provider.py +23 -14
credsweeper/file_handler/diff_content_provider.py +2 -2
credsweeper/file_handler/file_path_extractor.py +1 -1
credsweeper/file_handler/files_provider.py +2 -4
credsweeper/file_handler/patches_provider.py +1 -1
credsweeper/file_handler/string_content_provider.py +2 -2
credsweeper/file_handler/struct_content_provider.py +1 -1
credsweeper/file_handler/text_content_provider.py +2 -2
credsweeper/filters/value_array_dictionary_check.py +3 -1
credsweeper/filters/value_azure_token_check.py +1 -2
credsweeper/filters/value_base64_encoded_pem_check.py +1 -1
credsweeper/filters/value_base64_part_check.py +30 -21
credsweeper/filters/value_discord_bot_check.py +1 -2
credsweeper/filters/value_entropy_base32_check.py +11 -31
credsweeper/filters/value_entropy_base36_check.py +11 -34
credsweeper/filters/value_entropy_base64_check.py +15 -48
credsweeper/filters/value_entropy_base_check.py +37 -0
credsweeper/filters/value_file_path_check.py +1 -1
credsweeper/filters/value_hex_number_check.py +3 -3
credsweeper/filters/value_json_web_token_check.py +4 -5
credsweeper/filters/value_pattern_check.py +64 -16
credsweeper/filters/value_string_type_check.py +11 -3
credsweeper/filters/value_token_base32_check.py +0 -4
credsweeper/filters/value_token_base36_check.py +0 -4
credsweeper/filters/value_token_base64_check.py +0 -4
credsweeper/filters/value_token_check.py +1 -1
credsweeper/ml_model/features/file_extension.py +2 -2
credsweeper/ml_model/features/morpheme_dense.py +0 -4
credsweeper/ml_model/features/rule_name.py +1 -1
credsweeper/ml_model/features/word_in_path.py +0 -9
credsweeper/ml_model/features/word_in_postamble.py +0 -11
credsweeper/ml_model/features/word_in_preamble.py +0 -11
credsweeper/ml_model/features/word_in_transition.py +0 -11
credsweeper/ml_model/features/word_in_value.py +0 -11
credsweeper/ml_model/features/word_in_variable.py +0 -11
credsweeper/ml_model/ml_validator.py +45 -22
credsweeper/rules/config.yaml +238 -208
credsweeper/rules/rule.py +3 -3
credsweeper/scanner/scan_type/scan_type.py +2 -3
credsweeper/scanner/scanner.py +7 -1
credsweeper/secret/config.json +16 -5
credsweeper/utils/hop_stat.py +3 -3
credsweeper/utils/pem_key_detector.py +8 -7
credsweeper/utils/util.py +76 -146
{credsweeper-1.11.2.dist-info → credsweeper-1.11.4.dist-info}/METADATA +1 -1
{credsweeper-1.11.2.dist-info → credsweeper-1.11.4.dist-info}/RECORD +72 -70
credsweeper/utils/entropy_validator.py +0 -72
{credsweeper-1.11.2.dist-info → credsweeper-1.11.4.dist-info}/WHEEL +0 -0
{credsweeper-1.11.2.dist-info → credsweeper-1.11.4.dist-info}/entry_points.txt +0 -0
{credsweeper-1.11.2.dist-info → credsweeper-1.11.4.dist-info}/licenses/LICENSE +0 -0

credsweeper/rules/rule.py CHANGED Viewed

@@ -179,7 +179,6 @@ class Rule:
             for value in _values:
                 _pattern = KeywordPattern.get_keyword_pattern(value)
                 _patterns.append(_pattern)
-            return _patterns
         elif RuleType.MULTI == self.rule_type and 2 == len(_values) \
                 or self.rule_type in (RuleType.PATTERN, RuleType.PEM_KEY) and 0 < len(_values):
             for value in _values:
@@ -188,8 +187,9 @@ class Rule:
                 logger.warning(f"Rule {self.rule_name} has extra patterns. Only single pattern supported.")
             elif RuleType.MULTI == self.rule_type and 2 < len(_values):
                 logger.warning(f"Rule {self.rule_name} has extra patterns. Only two patterns supported.")
-            return _patterns
-        raise ValueError(f"Malformed rule config file. Rule '{self.rule_name}' type '{self.rule_type}' is invalid.")
+        else:
+            raise ValueError(f"Malformed rule config file. Rule '{self.rule_name}' type '{self.rule_type}' is invalid.")
+        return _patterns
     @cached_property
     def patterns(self) -> List[re.Pattern]:

credsweeper/scanner/scan_type/scan_type.py CHANGED Viewed

@@ -38,13 +38,12 @@ class ScanType(ABC):
         raise NotImplementedError()
     @classmethod
-    def filtering(cls, config: Config, target: AnalysisTarget, line_data: LineData, filters: List[Filter]) -> bool:
+    def filtering(cls, target: AnalysisTarget, line_data: LineData, filters: List[Filter]) -> bool:
         """Check if line data should be removed based on filters.
         If `use_filters` option is false, always return False
         Args:
-            config: dict of credsweeper configuration
             target: AnalysisTarget from which `line_data` was obtained
             line_data: Line data to check with `filters`
             filters: Filters to use
@@ -112,7 +111,7 @@ class ScanType(ABC):
                     bypass_start = line_data.value_end
                     bypass_end = offset_end
-                if config.use_filters and cls.filtering(config, target, line_data, filters):
+                if config.use_filters and cls.filtering(target, line_data, filters):
                     if line_data.variable and 0 <= line_data.variable_start < line_data.variable_end:
                         # may be next matched item will be not filtered - let search it after variable
                         bypass_start = line_data.variable_end

credsweeper/scanner/scanner.py CHANGED Viewed

@@ -146,7 +146,13 @@ class Scanner:
             # "cache" - YAPF and pycharm formatters ...
             matched_keyword = \
                 target_line_stripped_len >= self.min_keyword_len and (  #
-                        '=' in target_line_stripped or ':' in target_line_stripped)  #
+                        '=' in target_line_stripped
+                        or ':' in target_line_stripped
+                        or "set" in target_line_stripped
+                        or "#define" in target_line_stripped
+                        or "%define" in target_line_stripped
+                        or "%global" in target_line_stripped
+                )  #
             matched_pem_key = \
                 target_line_stripped_len >= self.min_pem_key_len \
                 and PEM_BEGIN_PATTERN in target_line_stripped and "PRIVATE" in target_line_stripped

credsweeper/secret/config.json CHANGED Viewed

@@ -2,10 +2,13 @@
     "exclude": {
         "pattern": [],
         "containers": [
+            ".aar",
             ".apk",
             ".bz2",
             ".gz",
+            ".lzma",
             ".tar",
+            ".xz",
             ".zip"
         ],
         "documents": [
@@ -20,17 +23,20 @@
         ],
         "extension": [
             ".7z",
+            ".a",
             ".aac",
-            ".aar",
             ".avi",
+            ".bin",
             ".bmp",
             ".class",
             ".css",
             ".dmg",
             ".ear",
             ".eot",
+            ".elf",
             ".exe",
             ".gif",
+            ".gmo",
             ".ico",
             ".img",
             ".info",
@@ -45,6 +51,7 @@
             ".mp4",
             ".npy",
             ".npz",
+            ".obj",
             ".ogg",
             ".pak",
             ".png",
@@ -52,10 +59,13 @@
             ".pyc",
             ".pyd",
             ".pyo",
+            ".rar",
             ".rc",
             ".rc2",
             ".rar",
             ".realm",
+            ".res",
+            ".rpm",
             ".s7z",
             ".scss",
             ".so",
@@ -70,6 +80,7 @@
             ".wav",
             ".webm",
             ".webp",
+            ".wma",
             ".woff",
             ".yuv"
         ],
@@ -160,13 +171,13 @@
         "line_num",
         "path",
         "info",
-        "value",
-        "value_start",
-        "value_end",
         "variable",
         "variable_start",
         "variable_end",
-        "entropy_validation"
+        "value",
+        "value_start",
+        "value_end",
+        "entropy"
     ],
     "candidate_output": [
         "rule",

credsweeper/utils/hop_stat.py CHANGED Viewed

@@ -1,5 +1,5 @@
 import statistics
-from typing import Tuple
+from typing import Tuple, Dict
 class HopStat:
@@ -62,7 +62,7 @@ class HopStat:
     })
     def __init__(self):
-        self.__hop_dict = dict()
+        self.__hop_dict: Dict[Tuple[str, str], int] = {}
         base = ''.join(x for x in HopStat.KEYBOARD)
         for a in (x for x in base if '\0' != x):
             for b in (x for x in base if '\0' != x):
@@ -81,7 +81,7 @@ class HopStat:
     def __get_xyz(c: str) -> Tuple[int, int, int]:
         """Returns axial coordinates of a char on keyboad qwerty"""
         x = y = z = 0
-        for i in range(len(HopStat.KEYBOARD)):
+        for i, _ in enumerate(HopStat.KEYBOARD):
             x = HopStat.KEYBOARD[i].find(c)
             if 0 <= x:
                 z = i

credsweeper/utils/pem_key_detector.py CHANGED Viewed

@@ -4,15 +4,16 @@ import re
 import string
 from typing import List
-from credsweeper.common.constants import PEM_BEGIN_PATTERN, PEM_END_PATTERN, Chars
+from credsweeper.common.constants import PEM_BEGIN_PATTERN, PEM_END_PATTERN
 from credsweeper.config import Config
 from credsweeper.credentials import LineData
 from credsweeper.file_handler.analysis_target import AnalysisTarget
 from credsweeper.utils import Util
-from credsweeper.utils.entropy_validator import EntropyValidator
 logger = logging.getLogger(__name__)
+ENTROPY_LIMIT_BASE64 = 4.5
 class PemKeyDetector:
     """Class to detect PEM PRIVATE keys only"""
@@ -65,13 +66,13 @@ class PemKeyDetector:
                     if PEM_BEGIN_PATTERN in subline:
                         begin_pattern_not_passed = False
                     continue
-                elif PEM_END_PATTERN in subline:
+                if PEM_END_PATTERN in subline:
                     if "PGP" in target.line_strip:
                         # Check if entropy is high enough for base64 set with padding sign
-                        entropy_validator = EntropyValidator(key_data, Chars.BASE64STDPAD_CHARS)
-                        if entropy_validator.valid:
+                        entropy = Util.get_shannon_entropy(key_data)
+                        if ENTROPY_LIMIT_BASE64 <= entropy:
                             return line_data
-                        logger.debug("Filtered with entropy %f '%s'", entropy_validator.entropy, key_data)
+                        logger.debug("Filtered with entropy %f '%s'", entropy, key_data)
                     if "OPENSSH" in target.line_strip:
                         # Check whether the key is encrypted
                         with contextlib.suppress(Exception):
@@ -125,7 +126,7 @@ class PemKeyDetector:
         line = line.strip(string.whitespace)
         if line.startswith("//"):
             # simplify first condition for speed-up of doxygen style processing
-            if line.startswith("// ") or line.startswith("/// "):
+            if line.startswith(("// ", "/// ")):
                 # Assume that the commented line is to be separated from base64 code, it may be a part of PEM, otherwise
                 line = line[3:]
         if line.startswith("/*"):

credsweeper/utils/util.py CHANGED Viewed

@@ -12,13 +12,14 @@ from dataclasses import dataclass
 from pathlib import Path
 from typing import Any, Dict, List, Tuple, Optional, Union
+import numpy as np
 import whatthepatch
 import yaml
 from lxml import etree
 from typing_extensions import TypedDict
 from credsweeper.common.constants import DiffRowType, AVAILABLE_ENCODINGS, \
-    DEFAULT_ENCODING, LATIN_1, CHUNK_SIZE, MAX_LINE_LENGTH, CHUNK_STEP_SIZE
+    DEFAULT_ENCODING, LATIN_1, CHUNK_SIZE, MAX_LINE_LENGTH, CHUNK_STEP_SIZE, ASCII
 logger = logging.getLogger(__name__)
@@ -65,21 +66,17 @@ class Util:
         return result
     @staticmethod
-    def get_shannon_entropy(data: str, iterator: str) -> float:
+    def get_shannon_entropy(data: Union[str, bytes]) -> float:
         """Borrowed from http://blog.dkbza.org/2007/05/scanning-data-for-entropy-anomalies.html."""
         if not data:
-            return 0
-        entropy = 0.
-        data_len = float(len(data))
-        for x in iterator:
-            p_x = data.count(x) / data_len
-            if p_x > 0:
-                entropy += -p_x * math.log(p_x, 2)
+            return 0.
+        size = len(data)
+        _uniq, counts = np.unique(list(data), return_counts=True)
+        probabilities = counts / size
+        entropy = float(-np.sum(probabilities * np.log2(probabilities)))
         return entropy
-    """Precalculated data for speedup"""
+    # Precalculated data for speedup
     MIN_DATA_ENTROPY: Dict[int, float] = {
         16: 1.66973671780348,
         20: 2.07723544540831,
@@ -153,41 +150,39 @@ class Util:
         return entropy < min_entropy
     @staticmethod
-    def is_known(data: bytes) -> bool:
-        """
-        Returns true if any recognized binary format found
-        """
-        if Util.is_zip(data) \
-                or Util.is_gzip(data) \
-                or Util.is_tar(data) \
-                or Util.is_bzip2(data) \
-                or Util.is_com(data) \
-                or Util.is_pdf(data) \
-                or Util.is_elf(data):
-            return True
+    def is_known(data: Union[bytes, bytearray]) -> bool:
+        """Returns True if any known binary format is found to prevent extra scan a file without an extension."""
+        if isinstance(data, (bytes, bytearray)):
+            if 127 <= len(data) and data.startswith(b"\x7f\x45\x4c\x46"):
+                # https://en.wikipedia.org/wiki/Executable_and_Linkable_Format
+                # minimal ELF is 127 bytes https://github.com/tchajed/minimal-elf
+                return True
         return False
     @staticmethod
-    def is_binary(data: bytes) -> bool:
+    def is_binary(data: Union[bytes, bytearray]) -> bool:
         """
-        Returns True when two zeroes sequence is found which never exists in text format (UTF-8, UTF-16)
-        UTF-32 is not supported
+        Returns True when two zeroes sequence is found in begin of data.
+        The sequence never exists in text format (UTF-8, UTF-16). UTF-32 is not supported.
         """
         if 0 <= data.find(b"\0\0", 0, MAX_LINE_LENGTH):
             return True
-        non_ascii_cnt = 0
-        for n in range(min([len(data), MAX_LINE_LENGTH])):
-            i = data[n]
-            if 0x20 > i and i not in (0x09, 0x0A, 0x0D) or 0x7E < i < 0xA0:
-                # less than space and not tab, line feed, line end
-                non_ascii_cnt += 1
+        else:
+            return False
+    NOT_LATIN1_PRINTABLE_SET = set(range(0, 256)) \
+        .difference(set(x for x in string.printable.encode(ASCII))) \
+        .difference(set(x for x in range(0xA0, 0x100)))
+    @staticmethod
+    def is_latin1(data: Union[bytes, bytearray]) -> bool:
+        """Returns True when data looks like LATIN-1 for first MAX_LINE_LENGTH bytes."""
+        result = False
         if data:
+            non_latin1_cnt = sum(1 for x in data[:MAX_LINE_LENGTH] if x in Util.NOT_LATIN1_PRINTABLE_SET)
             # experiment for 255217 binary files shown avg = 0.268264 ± 0.168767, so let choose minimal
-            chunk_len = float(MAX_LINE_LENGTH if MAX_LINE_LENGTH < len(data) else len(data))
-            result = 0.1 < non_ascii_cnt / chunk_len
-        else:
-            # empty data case
-            result = False
+            chunk_len = min(MAX_LINE_LENGTH, len(data))
+            result = 0.1 > non_latin1_cnt / chunk_len
         return result
     @staticmethod
@@ -231,10 +226,10 @@ class Util:
             encodings = AVAILABLE_ENCODINGS
         for encoding in encodings:
             try:
-                if binary_suggest and LATIN_1 == encoding and (Util.is_known(content) or Util.is_binary(content)):
+                if binary_suggest and LATIN_1 == encoding and (Util.is_binary(content) or not Util.is_latin1(content)):
                     # LATIN_1 may convert data (bytes in range 0x80:0xFF are transformed)
                     # so skip this encoding when checking binaries
-                    logger.warning("Binary file detected")
+                    logger.warning("Binary file detected %s", repr(content[:8]))
                     break
                 text = content.decode(encoding, errors="strict")
                 if content != text.encode(encoding, errors="strict"):
@@ -374,7 +369,7 @@ class Util:
             line = change["line"]
             if isinstance(line, str):
                 rows_data.extend(Util.preprocess_diff_rows(change.get("new"), change.get("old"), line))
-            elif isinstance(line, bytes):
+            elif isinstance(line, (bytes, bytearray)):
                 logger.warning("The feature is available with the deep scan option")
             else:
                 logger.error(f"Unknown type of line {type(line)}")
@@ -382,9 +377,9 @@ class Util:
         return rows_data
     @staticmethod
-    def is_zip(data: bytes) -> bool:
+    def is_zip(data: Union[bytes, bytearray]) -> bool:
         """According https://en.wikipedia.org/wiki/List_of_file_signatures"""
-        if isinstance(data, bytes) and 3 < len(data):
+        if isinstance(data, (bytes, bytearray)) and 3 < len(data):
             # PK
             if data.startswith(b"PK"):
                 if 0x03 == data[2] and 0x04 == data[3]:
@@ -398,18 +393,18 @@ class Util:
         return False
     @staticmethod
-    def is_com(data: bytes) -> bool:
+    def is_com(data: Union[bytes, bytearray]) -> bool:
         """According https://en.wikipedia.org/wiki/List_of_file_signatures"""
-        if isinstance(data, bytes) and 8 < len(data):
+        if isinstance(data, (bytes, bytearray)) and 8 < len(data):
             if data.startswith(b"\xD0\xCF\x11\xE0\xA1\xB1\x1A\xE1"):
                 # Compound File Binary Format: doc, xls, ppt, msi, msg
                 return True
         return False
     @staticmethod
-    def is_tar(data: bytes) -> bool:
+    def is_tar(data: Union[bytes, bytearray]) -> bool:
         """According https://en.wikipedia.org/wiki/List_of_file_signatures"""
-        if isinstance(data, bytes) and 512 <= len(data):
+        if isinstance(data, (bytes, bytearray)) and 512 <= len(data):
             if 0x75 == data[257] and 0x73 == data[258] and 0x74 == data[259] \
                     and 0x61 == data[260] and 0x72 == data[261] and (
                     0x00 == data[262] and 0x30 == data[263] and 0x30 == data[264]
@@ -425,9 +420,16 @@ class Util:
         return False
     @staticmethod
-    def is_bzip2(data: bytes) -> bool:
+    def is_deb(data: Union[bytes, bytearray]) -> bool:
+        """According https://en.wikipedia.org/wiki/Deb_(file_format)"""
+        if isinstance(data, (bytes, bytearray)) and 512 <= len(data) and data.startswith(b"!<arch>\n"):
+            return True
+        return False
+    @staticmethod
+    def is_bzip2(data: Union[bytes, bytearray]) -> bool:
         """According https://en.wikipedia.org/wiki/Bzip2"""
-        if isinstance(data, bytes) and 10 <= len(data):
+        if isinstance(data, (bytes, bytearray)) and 10 <= len(data):
             if data.startswith(b"\x42\x5A\x68") \
                     and 0x31 <= data[3] <= 0x39 \
                     and 0x31 == data[4] and 0x41 == data[5] and 0x59 == data[6] \
@@ -436,42 +438,49 @@ class Util:
         return False
     @staticmethod
-    def is_gzip(data: bytes) -> bool:
+    def is_gzip(data: Union[bytes, bytearray]) -> bool:
         """According https://www.rfc-editor.org/rfc/rfc1952"""
-        if isinstance(data, bytes) and 3 <= len(data):
+        if isinstance(data, (bytes, bytearray)) and 3 <= len(data):
             if data.startswith(b"\x1F\x8B\x08"):
                 return True
         return False
     @staticmethod
-    def is_pdf(data: bytes) -> bool:
+    def is_pdf(data: Union[bytes, bytearray]) -> bool:
         """According https://en.wikipedia.org/wiki/List_of_file_signatures - pdf"""
-        if isinstance(data, bytes) and 5 <= len(data):
+        if isinstance(data, (bytes, bytearray)) and 5 <= len(data):
             if data.startswith(b"\x25\x50\x44\x46\x2D"):
                 return True
         return False
     @staticmethod
-    def is_jks(data: bytes) -> bool:
+    def is_jks(data: Union[bytes, bytearray]) -> bool:
         """According https://en.wikipedia.org/wiki/List_of_file_signatures - jks"""
-        if isinstance(data, bytes) and 4 <= len(data):
+        if isinstance(data, (bytes, bytearray)) and 4 <= len(data):
             if data.startswith(b"\xFE\xED\xFE\xED"):
                 return True
         return False
     @staticmethod
-    def is_asn1(data: bytes) -> bool:
+    def is_lzma(data: Union[bytes, bytearray]) -> bool:
+        """According https://en.wikipedia.org/wiki/List_of_file_signatures - lzma also xz"""
+        if isinstance(data, (bytes, bytearray)) and 6 <= len(data):
+            if data.startswith((b"\xFD\x37\x7A\x58\x5A\x00", b"\x5D\x00\x00")):
+                return True
+        return False
+    @staticmethod
+    def is_asn1(data: Union[bytes, bytearray]) -> bool:
         """Only sequence type 0x30 and size correctness is checked"""
-        data_length = len(data)
-        if isinstance(data, bytes) and 4 <= data_length:
+        if isinstance(data, (bytes, bytearray)) and 4 <= len(data):
             # sequence
             if 0x30 == data[0]:
                 # https://www.oss.com/asn1/resources/asn1-made-simple/asn1-quick-reference/basic-encoding-rules.html#Lengths
                 length = data[1]
-                byte_len = (0x7F & length)
+                byte_len = 0x7F & length
                 if 0x80 == length and data.endswith(b"\x00\x00"):
                     return True
-                elif 0x80 < length and 1 < byte_len < data_length:  # additional check
+                elif 0x80 < length and 1 < byte_len < len(data):  # additional check
                     len_bytes = data[2:2 + byte_len]
                     try:
                         long_size = struct.unpack(">h", len_bytes)
@@ -482,26 +491,17 @@ class Util:
                     length = data[2]
                 else:
                     byte_len = 0
-                return data_length == length + 2 + byte_len
-        return False
-    @staticmethod
-    def is_elf(data: Union[bytes, bytearray]) -> bool:
-        """According to https://en.wikipedia.org/wiki/Executable_and_Linkable_Format use only 5 bytes"""
-        if isinstance(data, (bytes, bytearray)) and 127 <= len(data):
-            # minimal is 127 bytes https://github.com/tchajed/minimal-elf
-            if data.startswith(b"\x7f\x45\x4c\x46") and (0x01 == data[5] or 0x02 == data[5]):
-                return True
+                return len(data) == length + 2 + byte_len
         return False
     @staticmethod
     def is_html(data: Union[bytes, bytearray]) -> bool:
         """Used to detect html format. Suppose, invocation of is_xml() was True before."""
         if isinstance(data, (bytes, bytearray)):
-            for opening_tag, closing_tag in [(b"<html>", b"</html>"), (b"<table", b"</table>"), (b"<p>", b"</p>"),
-                                             (b"<span>", b"</span>"), (b"<div>", b"</div>"), (b"<li>", b"</li>"),
-                                             (b"<ol>", b"</ol>"), (b"<ul>", b"</ul>"), (b"<th>", b"</th>"),
-                                             (b"<tr>", b"</tr>"), (b"<td>", b"</td>")]:
+            for opening_tag, closing_tag in [(b"<html", b"</html>"), (b"<body", b"</body>"), (b"<table", b"</table>"),
+                                             (b"<p>", b"</p>"), (b"<span>", b"</span>"), (b"<div>", b"</div>"),
+                                             (b"<li>", b"</li>"), (b"<ol>", b"</ol>"), (b"<ul>", b"</ul>"),
+                                             (b"<th>", b"</th>"), (b"<tr>", b"</tr>"), (b"<td>", b"</td>")]:
                 opening_pos = data.find(opening_tag, 0, MAX_LINE_LENGTH)
                 if 0 <= opening_pos < data.find(closing_tag, opening_pos):
                     # opening and closing tags were found - suppose it is an HTML
@@ -658,81 +658,11 @@ class Util:
         except Exception as exc:
             logging.error(f"Failed to write: {file_path} {exc}")
-    @staticmethod
-    def __extract_value(node: Any, value: Any) -> List[Any]:
-        result = []
-        for i in getattr(node, "targets"):
-            if hasattr(i, "id"):
-                result.append({getattr(i, "id"): value})
-            else:
-                logger.error(f"{str(i)} has no 'id'")
-        return result
-    @staticmethod
-    def __extract_assign(node: Any) -> List[Any]:
-        result = []
-        if hasattr(node, "value") and hasattr(node, "targets"):
-            value = getattr(node, "value")
-            if hasattr(value, "value"):
-                # python 3.8 - 3.10
-                result.extend(Util.__extract_value(node, getattr(value, "value")))
-            else:
-                logger.error(f"value.{value} has no 'value' {dir(value)}")
-        else:
-            logger.error(f"{str(node)} has no 'value' {dir(node)}")
-        return result
-    @staticmethod
-    def ast_to_dict(node: Any) -> List[Any]:
-        """Recursive parsing AST tree of python source to list with strings"""
-        result: List[Any] = []
-        if hasattr(node, "value") and isinstance(node.value, str):
-            result.append(node.value)
-        if isinstance(node, ast.Module) \
-                or isinstance(node, ast.FunctionDef):
-            if hasattr(node, "body"):
-                for i in node.body:
-                    x = Util.ast_to_dict(i)
-                    if x:
-                        result.extend(x)
-        elif isinstance(node, ast.Import):
-            logger.debug("Import:%s", str(node))
-        elif isinstance(node, ast.Assign):
-            result.extend(Util.__extract_assign(node))
-        elif isinstance(node, ast.Expr) \
-                or isinstance(node, ast.AnnAssign) \
-                or isinstance(node, ast.AugAssign) \
-                or isinstance(node, ast.Call) \
-                or isinstance(node, ast.JoinedStr) \
-                or isinstance(node, ast.Return) \
-                or isinstance(node, ast.ImportFrom) \
-                or isinstance(node, ast.Assert) \
-                or isinstance(node, ast.Pass) \
-                or isinstance(node, ast.Raise) \
-                or isinstance(node, ast.Str) \
-                or isinstance(node, ast.Name) \
-                or isinstance(node, ast.FormattedValue) \
-                or isinstance(node, ast.Global):
-            if hasattr(node, "value"):
-                result.extend(Util.ast_to_dict(getattr(node, "value")))
-            if hasattr(node, "args"):
-                for i in getattr(node, "args"):
-                    result.extend(Util.ast_to_dict(i))
-            if hasattr(node, "values"):
-                for i in getattr(node, "values"):
-                    result.extend(Util.ast_to_dict(i))
-            else:
-                logger.debug(f"skip:{str(node)}")
-        else:
-            logger.debug(f"unknown:{str(node)}")
-        return result
     @staticmethod
     def parse_python(source: str) -> List[Any]:
-        """Parse python source to list of strings and assignments"""
+        """Parse python source and back to remove strings merge and line wrap"""
         src = ast.parse(source)
-        result = Util.ast_to_dict(src)
+        result = ast.unparse(src).splitlines()
         return result
     @staticmethod

{credsweeper-1.11.2.dist-info → credsweeper-1.11.4.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: credsweeper
-Version: 1.11.2
+Version: 1.11.4
 Summary: Credential Sweeper
 Project-URL: Homepage, https://github.com/Samsung/CredSweeper
 Project-URL: Bug Tracker, https://github.com/Samsung/CredSweeper/issues

credsweeper 1.11.2__py3-none-any.whl → 1.11.4__py3-none-any.whl

Potentially problematic release.

credsweeper 1.11.2py3-none-any.whl → 1.11.4py3-none-any.whl