PyPI - credsweeper - Versions diffs - 1.11.2__tar.gz → 1.11.3__tar.gz - Mend

credsweeper 1.11.2tar.gz → 1.11.3tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of credsweeper might be problematic. Click here for more details.

Files changed (157) hide show

{credsweeper-1.11.2 → credsweeper-1.11.3}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: credsweeper
-Version: 1.11.2
+Version: 1.11.3
 Summary: Credential Sweeper
 Project-URL: Homepage, https://github.com/Samsung/CredSweeper
 Project-URL: Bug Tracker, https://github.com/Samsung/CredSweeper/issues

{credsweeper-1.11.2 → credsweeper-1.11.3}/credsweeper/__init__.py RENAMED Viewed

@@ -18,4 +18,4 @@ __all__ = [
     '__version__'
 ]
-__version__ = "1.11.2"
+__version__ = "1.11.3"

{credsweeper-1.11.2 → credsweeper-1.11.3}/credsweeper/__main__.py RENAMED Viewed

@@ -4,6 +4,7 @@ import os
 import sys
 import time
 from argparse import ArgumentParser, ArgumentTypeError, Namespace, BooleanOptionalAction
+from pathlib import Path
 from typing import Any, Union, Dict
 from credsweeper import __version__
@@ -88,10 +89,11 @@ def check_integrity() -> int:
     Returns CRC32 of files in integer
     """
     crc32 = 0
-    for root, dirs, files in os.walk(APP_PATH):
-        for file_path in files:
-            if Util.get_extension(file_path) in [".py", ".json", ".txt", ".yaml", ".onnx"]:
-                data = Util.read_data(os.path.join(root, file_path))
+    for root, _dirs, files in os.walk(APP_PATH):
+        for file_name in files:
+            if Util.get_extension(file_name) in [".py", ".json", ".txt", ".yaml", ".onnx"]:
+                file_path = Path(root) / file_name
+                data = Util.read_data(file_path)
                 if data:
                     crc32 ^= binascii.crc32(data)
     return crc32

{credsweeper-1.11.2 → credsweeper-1.11.3}/credsweeper/app.py RENAMED Viewed

@@ -11,7 +11,7 @@ from colorama import Style
 # Directory of credsweeper sources MUST be placed before imports to avoid circular import error
 APP_PATH = Path(__file__).resolve().parent
-from credsweeper.common.constants import Severity, ThresholdPreset, DiffRowType
+from credsweeper.common.constants import Severity, ThresholdPreset, DiffRowType, DEFAULT_ENCODING
 from credsweeper.config import Config
 from credsweeper.credentials import Candidate, CredentialManager, CandidateKey
 from credsweeper.deep_scanner.deep_scanner import DeepScanner
@@ -415,7 +415,7 @@ class CredSweeper:
             if isinstance(change_type, DiffRowType):
                 # add suffix for appropriated reports to create two files for the patch scan
                 json_path = json_path.with_suffix(f".{change_type.value}{json_path.suffix}")
-            with open(json_path, 'w') as f:
+            with open(json_path, 'w', encoding=DEFAULT_ENCODING) as f:
                 # use the approach to reduce total memory usage in case of huge data
                 first_item = True
                 f.write('[\n')
@@ -446,8 +446,12 @@ class CredSweeper:
             for credential in credentials:
                 for line_data in credential.line_data_list:
                     # bright rule name and path or info
+                    if isinstance(credential.ml_probability, float):
+                        ml_probability_info = f" {credential.ml_probability:.6f}"
+                    else:
+                        ml_probability_info = ""
                     print(Style.BRIGHT + credential.rule_name +
-                          f" {line_data.info or line_data.path}:{line_data.line_num} {credential.ml_probability}" +
+                          f" {line_data.info or line_data.path}:{line_data.line_num}{ml_probability_info}" +
                           Style.RESET_ALL)
                     print(line_data.get_colored_line(hashed=self.hashed, subtext=self.subtext))

{credsweeper-1.11.2 → credsweeper-1.11.3}/credsweeper/common/keyword_pattern.py RENAMED Viewed

@@ -3,25 +3,30 @@ import re
 class KeywordPattern:
     """Pattern set of keyword types"""
-    key_left = r"(\\[nrt]|%[0-9a-f]{2})?" \
+    directive = r"(?P<directive>(?:(?:[#%]define|%global)(?:\s|\\t)|\bset))?"
+    key_left = r"(?:\\[nrt]|%[0-9a-f]{2}|\s)*" \
                r"(?P<variable>(([`'\"]{1,8}[^:='\"`}<>\\/&?]*|[^:='\"`}<>\s()\\/&?;,%]*)" \
                r"(?P<keyword>"
     # there will be inserted a keyword
     key_right = r")" \
-                r"[^%:='\"`<>{?!&;\n]*" \
+                r"[^%:='\"`<>({?!&;\n]*" \
                 r")" \
                 r"(&(quot|apos);|%[0-9a-f]{2}|[`'\"])*" \
                 r")"  # <variable>
-    separator = r"(\s|\\{1,8}[tnr])*\]?(\s|\\{1,8}[tnr])*" \
-                r"(?P<separator>:(\s[a-z]{3,9}[?]?\s)?=|:(?!:)|=(>|&gt;|(\\\\*u00|%)26gt;)|!==|!=|===|==|=|%3d)" \
+    separator = r"(?(directive)|(\s|\\{1,8}[tnr])*\]?(\s|\\{1,8}[tnr])*)" \
+                r"(?P<separator>:(\s[a-z]{3,9}[?]?\s)?=|:(?!:)|=(>|&gt;|(\\\\*u00|%)26gt;)|!==|!=|===|==|=~|=" \
+                r"|(?(directive)(\\t|\s|\((?!\))){1,80}|%3d))" \
                 r"(\s|\\{1,8}[tnr])*"
     # might be curly, square or parenthesis with words before
     wrap = r"(?P<wrap>(" \
            r"(new(\s|\\{1,8}[tnr]|byte|char|string|\[\]){1,8})?" \
+           r"(?P<get>([_a-z][0-9a-z_.\[\]]*\.)get|(os\.)?getenv)?" \
            r"([0-9a-z_.]|::|-(>|&gt;))*" \
-           r"[\[\(\{]" \
+           r"\s*" \
+           r"(\[(?!\])|\((?!\))|\{(?!\}))" \
            r"(\s|\\{1,8}[tnr])*" \
-           r"([0-9a-z_]{1,32}[:=]\s*)?" \
+           r"(?(get)('[^']+'|\"[^\"]+\")\s*,\s*|)" \
+           r"([0-9a-z_]{1,32}\s*[:=]\s*)?" \
            r"){1,8})?"
     string_prefix = r"(((b|r|br|rb|u|f|rf|fr|l|@)(?=(\\*[`'\"])))?"
     left_quote = r"(?P<value_leftquote>((?P<esq>\\{1,8})?([`'\"]|&(quot|apos);)){1,4}))?"
@@ -39,7 +44,7 @@ class KeywordPattern:
             r"(?P<url_esc>%[0-9a-f]{2})" \
             r"|" \
             r"(?(url_esc)[^\s`'\",;\\&]|[^\s`'\",;\\])" \
-            r")"\
+            r")" \
             r"){4,8000}" \
             r"|" \
             r"(<[^>]{4,8000}>)" \
@@ -48,18 +53,19 @@ class KeywordPattern:
             r"|" \
             r"(\$?\{{1,3}[^}]{4,8000}\}{1,3})" \
             r"|" \
-            r"(?(wrap)(?(value_leftquote)(?!\\(?P=value_leftquote))|[^\]\)\}]){16,8000})"\
+            r"(?(wrap)(?(value_leftquote)(?!\\(?P=value_leftquote))|[^\]\)\}]){16,8000})" \
             r")"  # <value>
     right_quote = r"(?(value_leftquote)" \
                   r"(?P<value_rightquote>(?<!\\)(?P=value_leftquote)|\\$|(?<=[0-9a-z+_/-])$)" \
                   r"|" \
-                  r"(?(wrap)(\]|\)|\}|,|;|\\|$))" \
+                  r"(?(wrap)(\]|\)|\}|;|\\|$))" \
                   r")"
     @classmethod
     def get_keyword_pattern(cls, keyword: str) -> re.Pattern:
         """Returns compiled regex pattern"""
         expression = ''.join([  #
+            cls.directive,  #
             cls.key_left,  #
             keyword,  #
             cls.key_right,  #

{credsweeper-1.11.2 → credsweeper-1.11.3}/credsweeper/common/morpheme_checklist.txt RENAMED Viewed

@@ -885,7 +885,7 @@ mbler
 mean
 measur
 medi
-medusa
+medus
 meet
 mem_
 memb
@@ -925,7 +925,7 @@ month
 morp
 mory
 mote
-motorola
+motor
 mount
 move
 mpeg
@@ -1005,6 +1005,7 @@ origin
 orithm
 ormat
 orph
+otorola
 ottle
 ously
 out
@@ -1485,6 +1486,7 @@ up_
 updat
 upgrade
 url
+usa
 usb
 use
 usin

{credsweeper-1.11.2 → credsweeper-1.11.3}/credsweeper/credentials/line_data.py RENAMED Viewed

@@ -10,7 +10,6 @@ from colorama import Fore, Style
 from credsweeper.common.constants import MAX_LINE_LENGTH, UTF_8, StartEnd, ML_HUNK
 from credsweeper.config import Config
 from credsweeper.utils import Util
-from credsweeper.utils.entropy_validator import EntropyValidator
 class LineData:
@@ -32,7 +31,7 @@ class LineData:
     """
     quotation_marks = ('"', "'", '`')
-    comment_starts = ("//", "* ", "#", "/*", "<!––", "%{", "%", "...", "(*", "--", "--[[", "#=")
+    comment_starts = ("//", "* ", "# ", "/*", "<!––", "%{", "%", "...", "(*", "--", "--[[", "#=")
     bash_param_split = re.compile("\\s+(\\-|\\||\\>|\\w+?\\>|\\&)")
     line_endings = re.compile(r"\\{1,8}[nr]")
     # https://en.wikipedia.org/wiki/Percent-encoding
@@ -87,8 +86,9 @@ class LineData:
         self.url_part = False
         self.wrap = None
         self._3d_escaped_separator = False
         self.initialize(match_obj)
+        # the line is very useful for debug breakpoint
+        pass  # pylint: disable=W0107
     def compare(self, other: 'LineData') -> bool:
         """Comparison method - skip whole line and checks only when variable and value are the same"""
@@ -373,10 +373,10 @@ class LineData:
     def to_str(self, subtext: bool = False, hashed: bool = False) -> str:
         """Represent line_data with subtext or|and hashed values"""
         cut_pos = StartEnd(self.variable_start, self.value_end) if subtext else None
-        return f"line: '{self.get_hash_or_subtext(self.line, hashed, cut_pos)}'" \
-               f" | line_num: {self.line_num} | path: {self.path}" \
+        return f"path: {self.path}" \
+               f" | line_num: {self.line_num}" \
                f" | value: '{self.get_hash_or_subtext(self.value, hashed)}'" \
-               f" | entropy_validation: {EntropyValidator(self.value)}"
+               f" | line: '{self.get_hash_or_subtext(self.line, hashed, cut_pos)}'"
     def __str__(self):
         return self.to_str()
@@ -393,6 +393,10 @@ class LineData:
         """
         cut_pos = StartEnd(self.variable_start if 0 <= self.variable_start else self.value_start,
                            self.value_end) if subtext else None
+        if isinstance(self.value, str):
+            entropy = round(Util.get_shannon_entropy(self.value), 5)
+        else:
+            entropy = None
         full_output = {
             "key": self.key,
             "line": self.get_hash_or_subtext(self.line, hashed, cut_pos),
@@ -401,18 +405,18 @@ class LineData:
             # info may contain variable name - so let it be hashed if requested
             "info": self.get_hash_or_subtext(self.info, hashed),
             "pattern": self.pattern.pattern,
+            "variable": self.get_hash_or_subtext(self.variable, hashed),
+            "variable_start": self.variable_start,
+            "variable_end": self.variable_end,
             "separator": self.separator,
             "separator_start": self.separator_start,
             "separator_end": self.separator_end,
             "value": self.get_hash_or_subtext(self.value, hashed),
             "value_start": self.value_start,
             "value_end": self.value_end,
-            "variable": self.get_hash_or_subtext(self.variable, hashed),
-            "variable_start": self.variable_start,
-            "variable_end": self.variable_end,
+            "entropy": entropy,
             "value_leftquote": self.value_leftquote,
             "value_rightquote": self.value_rightquote,
-            "entropy_validation": EntropyValidator(self.value).to_dict()
         }
         reported_output = {k: v for k, v in full_output.items() if k in self.config.line_data_output}
         return reported_output

{credsweeper-1.11.2 → credsweeper-1.11.3}/credsweeper/deep_scanner/abstract_scanner.py RENAMED Viewed

@@ -1,5 +1,5 @@
 from abc import abstractmethod, ABC
-from typing import List
+from typing import List, Optional
 from credsweeper.config import Config
 from credsweeper.credentials import Candidate
@@ -40,3 +40,12 @@ class AbstractScanner(ABC):
             recursive_limit_size: int) -> List[Candidate]:
         """Abstract method to be defined in DeepScanner"""
         raise NotImplementedError(__name__)
+    @abstractmethod
+    def data_scan(
+            self,  #
+            data_provider: DataContentProvider,  #
+            depth: int,  #
+            recursive_limit_size: int) -> Optional[List[Candidate]]:
+        """Abstract method to be defined in DeepScanner"""
+        raise NotImplementedError(__name__)

{credsweeper-1.11.2 → credsweeper-1.11.3}/credsweeper/deep_scanner/deep_scanner.py RENAMED Viewed

@@ -2,7 +2,7 @@ import datetime
 import logging
 from typing import List, Optional, Any, Tuple, Union
-from credsweeper.common.constants import RECURSIVE_SCAN_LIMITATION
+from credsweeper.common.constants import RECURSIVE_SCAN_LIMITATION, MIN_DATA_LEN
 from credsweeper.config import Config
 from credsweeper.credentials import Candidate
 from credsweeper.credentials.augment_candidates import augment_candidates
@@ -23,6 +23,7 @@ from .gzip_scanner import GzipScanner
 from .html_scanner import HtmlScanner
 from .jks_scanner import JksScanner
 from .lang_scanner import LangScanner
+from .lzma_scanner import LzmaScanner
 from .mxfile_scanner import MxfileScanner
 from .pdf_scanner import PdfScanner
 from .pkcs12_scanner import Pkcs12Scanner
@@ -48,6 +49,7 @@ class DeepScanner(
     HtmlScanner,  #
     JksScanner,  #
     LangScanner,  #
+    LzmaScanner,  #
     PdfScanner,  #
     Pkcs12Scanner,  #
     PptxScanner,  #
@@ -106,6 +108,9 @@ class DeepScanner(
         elif Util.is_bzip2(data):
             if 0 < depth:
                 deep_scanners.append(Bzip2Scanner)
+        elif Util.is_lzma(data):
+            if 0 < depth:
+                deep_scanners.append(LzmaScanner)
         elif Util.is_tar(data):
             if 0 < depth:
                 deep_scanners.append(TarScanner)
@@ -140,13 +145,16 @@ class DeepScanner(
             else:
                 fallback_scanners.append(EmlScanner)
             fallback_scanners.append(ByteScanner)
+        elif Util.is_known(data):
+            # the format is known but cannot be scanned
+            pass
         elif not Util.is_binary(data):
             if 0 < depth:
                 deep_scanners.append(EncoderScanner)
                 deep_scanners.append(LangScanner)
             deep_scanners.append(ByteScanner)
         else:
-            logger.warning("Cannot apply a deep scanner for type %s", file_type)
+            logger.warning("Cannot apply a deep scanner for type %s prefix %s", file_type, str(data[:MIN_DATA_LEN]))
         return deep_scanners, fallback_scanners
     # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
@@ -175,7 +183,7 @@ class DeepScanner(
             # this scan is successful, so fallback is not necessary
             fallback = False
         if fallback:
-            for scan_class in deep_scanners:
+            for scan_class in fallback_scanners:
                 fallback_candidates = scan_class.data_scan(self, data_provider, depth, recursive_limit_size)
                 if fallback_candidates is None:
                     continue
@@ -239,15 +247,18 @@ class DeepScanner(
                 recursive_limit_size: maximal bytes of opened files to prevent recursive zip-bomb attack
         """
         candidates: List[Candidate] = []
-        logger.debug("Start data_scan: size=%d, depth=%d, limit=%d, path=%s, info=%s", len(data_provider.data), depth,
-                     recursive_limit_size, data_provider.file_path, data_provider.info)
         if 0 > depth:
             # break recursion if maximal depth is reached
-            logger.debug("bottom reached %s recursive_limit_size:%d", data_provider.file_path, recursive_limit_size)
+            logger.debug("Bottom reached %s recursive_limit_size:%d", data_provider.file_path, recursive_limit_size)
             return candidates
         depth -= 1
+        if MIN_DATA_LEN > len(data_provider.data):
+            # break recursion for minimal data size
+            logger.debug("Too small data: size=%d, depth=%d, limit=%d, path=%s, info=%s", len(data_provider.data),
+                         depth, recursive_limit_size, data_provider.file_path, data_provider.info)
+            return candidates
+        logger.debug("Start data_scan: size=%d, depth=%d, limit=%d, path=%s, info=%s", len(data_provider.data), depth,
+                     recursive_limit_size, data_provider.file_path, data_provider.info)
         if FilePathExtractor.is_find_by_ext_file(self.config, data_provider.file_type):
             # Skip scanning file and makes fake candidate due the extension is suspicious

{credsweeper-1.11.2 → credsweeper-1.11.3}/credsweeper/deep_scanner/docx_scanner.py RENAMED Viewed

@@ -42,7 +42,7 @@ class DocxScanner(AbstractScanner, ABC):
             yield from DocxScanner._iter_block_items(block.footer)
             return
         elif isinstance(block, _Cell):
-            parent_elm = block._tc
+            parent_elm = block._tc  # pylint: disable=W0212
         else:
             raise ValueError(f"unrecognised:{type(block)}")

{credsweeper-1.11.2 → credsweeper-1.11.3}/credsweeper/deep_scanner/encoder_scanner.py RENAMED Viewed

@@ -18,11 +18,11 @@ class EncoderScanner(AbstractScanner, ABC):
             depth: int,  #
             recursive_limit_size: int) -> Optional[List[Candidate]]:
         """Tries to decode data from base64 encode to bytes and scan as bytes again"""
-        if data_provider.represent_as_encoded():
+        if result := data_provider.represent_as_encoded():
             decoded_data_provider = DataContentProvider(data=data_provider.decoded,
                                                         file_path=data_provider.file_path,
                                                         file_type=data_provider.file_type,
                                                         info=f"{data_provider.info}|BASE64")
             new_limit = recursive_limit_size - len(decoded_data_provider.data)
             return self.recursive_scan(decoded_data_provider, depth, new_limit)
-        return None
+        return None if result is None else []

{credsweeper-1.11.2 → credsweeper-1.11.3}/credsweeper/deep_scanner/html_scanner.py RENAMED Viewed

@@ -19,12 +19,12 @@ class HtmlScanner(AbstractScanner, ABC):
             depth: int,  #
             recursive_limit_size: int) -> Optional[List[Candidate]]:
         """Tries to represent data as html text and scan as text lines"""
-        if data_provider.represent_as_html(depth, recursive_limit_size,
-                                           self.scanner.keywords_required_substrings_check):
+        if result := data_provider.represent_as_html(depth, recursive_limit_size,
+                                                     self.scanner.keywords_required_substrings_check):
             string_data_provider = StringContentProvider(lines=data_provider.lines,
                                                          line_numbers=data_provider.line_numbers,
                                                          file_path=data_provider.file_path,
                                                          file_type=data_provider.file_type,
                                                          info=f"{data_provider.info}|HTML")
             return self.scanner.scan(string_data_provider)
-        return None
+        return None if result is None else []

{credsweeper-1.11.2 → credsweeper-1.11.3}/credsweeper/deep_scanner/jks_scanner.py RENAMED Viewed

@@ -20,7 +20,6 @@ class JksScanner(AbstractScanner, ABC):
             depth: int,  #
             recursive_limit_size: int) -> Optional[List[Candidate]]:
         """Tries to scan JKS to open with standard password"""
-        candidates = []
         for pw_probe in self.config.bruteforce_list:
             try:
                 keystore = jks.KeyStore.loads(data_provider.data, pw_probe, try_decrypt_keys=True)
@@ -38,8 +37,7 @@ class JksScanner(AbstractScanner, ABC):
                 candidate.line_data_list[0].value = pw_probe or "<EMPTY PASSWORD>"
                 candidate.line_data_list[0].value_start = 1
                 candidate.line_data_list[0].value_end = 1 + len(candidate.line_data_list[0].value)
-                candidates.append(candidate)
-                break
+                return [candidate]
             except Exception as jks_exc:
                 logger.debug(f"{data_provider.file_path}:{pw_probe}:{jks_exc}")
-        return candidates
+        return None

{credsweeper-1.11.2 → credsweeper-1.11.3}/credsweeper/deep_scanner/lang_scanner.py RENAMED Viewed

@@ -19,10 +19,10 @@ class LangScanner(AbstractScanner, ABC):
             depth: int,  #
             recursive_limit_size: int) -> Optional[List[Candidate]]:
         """Tries to represent data as markup language and scan as structure"""
-        if data_provider.represent_as_structure():
+        if result := data_provider.represent_as_structure():
             struct_data_provider = StructContentProvider(struct=data_provider.structure,
                                                          file_path=data_provider.file_path,
                                                          file_type=data_provider.file_type,
                                                          info=f"{data_provider.info}|STRUCT")
             return self.structure_scan(struct_data_provider, depth, recursive_limit_size)
-        return None
+        return None if result is None else []

credsweeper-1.11.3/credsweeper/deep_scanner/lzma_scanner.py ADDED Viewed

@@ -0,0 +1,40 @@
+import logging
+import lzma
+from abc import ABC
+from pathlib import Path
+from typing import List, Optional
+from credsweeper.credentials import Candidate
+from credsweeper.deep_scanner.abstract_scanner import AbstractScanner
+from credsweeper.file_handler.data_content_provider import DataContentProvider
+from credsweeper.utils import Util
+logger = logging.getLogger(__name__)
+class LzmaScanner(AbstractScanner, ABC):
+    """Implements lzma scanning"""
+    def data_scan(
+            self,  #
+            data_provider: DataContentProvider,  #
+            depth: int,  #
+            recursive_limit_size: int) -> Optional[List[Candidate]]:
+        """Extracts data from lzma archive and launches data_scan"""
+        try:
+            file_path = Path(data_provider.file_path)
+            new_path = file_path.as_posix()
+            if ".xz" == file_path.suffix:
+                new_path = new_path[:-3]
+            elif ".lzma" == file_path.suffix:
+                new_path = new_path[:-5]
+            lzma_content_provider = DataContentProvider(data=lzma.decompress(data_provider.data),
+                                                        file_path=new_path,
+                                                        file_type=Util.get_extension(new_path),
+                                                        info=f"{data_provider.info}|LZMA:{file_path}")
+            new_limit = recursive_limit_size - len(lzma_content_provider.data)
+            lzma_candidates = self.recursive_scan(lzma_content_provider, depth, new_limit)
+            return lzma_candidates
+        except Exception as lzma_exc:
+            logger.error(f"{data_provider.file_path}:{lzma_exc}")
+        return None

{credsweeper-1.11.2 → credsweeper-1.11.3}/credsweeper/deep_scanner/pkcs12_scanner.py RENAMED Viewed

@@ -20,10 +20,9 @@ class Pkcs12Scanner(AbstractScanner, ABC):
             depth: int,  #
             recursive_limit_size: int) -> Optional[List[Candidate]]:
         """Tries to scan PKCS12 to open with standard password"""
-        candidates = []
         for pw_probe in self.config.bruteforce_list:
             try:
-                (private_key, certificate, additional_certificates) \
+                (private_key, _certificate, _additional_certificates) \
                     = cryptography.hazmat.primitives.serialization.pkcs12.load_key_and_certificates(data_provider.data,
                                                                                                     pw_probe.encode())
                 # the password probe has passed, it will be the value
@@ -40,8 +39,7 @@ class Pkcs12Scanner(AbstractScanner, ABC):
                 candidate.line_data_list[0].value = value
                 candidate.line_data_list[0].value_start = 1
                 candidate.line_data_list[0].value_end = 1 + len(candidate.line_data_list[0].value)
-                candidates.append(candidate)
-                break
+                return [candidate]
             except Exception as pkcs_exc:
                 logger.debug(f"{data_provider.file_path}:{pw_probe}:{pkcs_exc}")
-        return candidates
+        return None

{credsweeper-1.11.2 → credsweeper-1.11.3}/credsweeper/deep_scanner/xml_scanner.py RENAMED Viewed

@@ -19,11 +19,11 @@ class XmlScanner(AbstractScanner, ABC):
             depth: int,  #
             recursive_limit_size: int) -> Optional[List[Candidate]]:
         """Tries to represent data as xml text and scan as text lines"""
-        if data_provider.represent_as_xml():
+        if result := data_provider.represent_as_xml():
             string_data_provider = StringContentProvider(lines=data_provider.lines,
                                                          line_numbers=data_provider.line_numbers,
                                                          file_path=data_provider.file_path,
                                                          file_type=data_provider.file_type,
                                                          info=f"{data_provider.info}|XML")
             return self.scanner.scan(string_data_provider)
-        return None
+        return None if result is None else []

{credsweeper-1.11.2 → credsweeper-1.11.3}/credsweeper/file_handler/data_content_provider.py RENAMED Viewed

@@ -76,9 +76,14 @@ class DataContentProvider(ContentProvider):
         return self.structure is not None and (isinstance(self.structure, dict) and 0 < len(self.structure.keys())
                                                or isinstance(self.structure, list) and 0 < len(self.structure))
-    def represent_as_structure(self) -> bool:
+    def represent_as_structure(self) -> Optional[bool]:
         """Tries to convert data with many parsers. Stores result to internal structure
-        Return True if some structure found
+        Return:
+             True if some structure found
+             False if no data found
+             None if the format is not acceptable
         """
         if MIN_DATA_LEN > len(self.text):
             return False
@@ -134,13 +139,15 @@ class DataContentProvider(ContentProvider):
             if self.__is_structure():
                 return True
         # # # None of above
-        return False
+        return None
-    def represent_as_xml(self) -> bool:
+    def represent_as_xml(self) -> Optional[bool]:
         """Tries to read data as xml
         Return:
              True if reading was successful
+             False if no data found
+             None if the format is not acceptable
         """
         if MIN_XML_LEN > len(self.text):
@@ -150,14 +157,12 @@ class DataContentProvider(ContentProvider):
                 xml_text = self.text.splitlines()
                 self.lines, self.line_numbers = Util.get_xml_from_lines(xml_text)
                 logger.debug("CONVERTED from xml")
+                return bool(self.lines and self.line_numbers)
             else:
                 logger.debug("Weak data to parse as XML")
-                return False
         except Exception as exc:
             logger.debug("Cannot parse as XML:%s %s", exc, self.data)
-        else:
-            return bool(self.lines and self.line_numbers)
-        return False
+        return None
     def _check_multiline_cell(self, cell: Tag) -> Optional[Tuple[int, str]]:
         """multiline cell will be analysed as text or return single line from cell
@@ -336,11 +341,13 @@ class DataContentProvider(ContentProvider):
             self,  #
             depth: int,  #
             recursive_limit_size: int,  #
-            keywords_required_substrings_check: Callable[[str], bool]) -> bool:
+            keywords_required_substrings_check: Callable[[str], bool]) -> Optional[bool]:
         """Tries to read data as html
         Return:
              True if reading was successful
+             False if no data found
+             None if the format is not acceptable
         """
         try:
@@ -361,13 +368,15 @@ class DataContentProvider(ContentProvider):
             logger.debug("Cannot parse as HTML:%s %s", exc, self.data)
         else:
             return bool(self.lines and self.line_numbers)
-        return False
+        return None
-    def represent_as_encoded(self) -> bool:
+    def represent_as_encoded(self) -> Optional[bool]:
         """Decodes data from base64. Stores result in decoded
         Return:
              True if the data correctly parsed and verified
+             False if no data found
+             None if the format is not acceptable
         """
         if len(self.data) < MIN_ENCODED_DATA_LEN \
@@ -383,7 +392,7 @@ class DataContentProvider(ContentProvider):
             logger.debug("Cannot decoded as base64:%s %s", exc, self.data)
         else:
             return self.decoded is not None and 0 < len(self.decoded)
-        return False
+        return None
     def yield_analysis_target(self, min_len: int) -> Generator[AnalysisTarget, None, None]:
         """Return nothing. The class provides only data storage.

{credsweeper-1.11.2 → credsweeper-1.11.3}/credsweeper/filters/value_array_dictionary_check.py RENAMED Viewed

@@ -14,7 +14,7 @@ class ValueArrayDictionaryCheck(Filter):
         `token = {'root'}` would be kept
     """
-    PATTERN = re.compile(r"\[('|\")?.+('|\")?\]")
+    PATTERN = re.compile(r"\[('|\")?[^,]+('|\")?\]")
     def __init__(self, config: Config = None) -> None:
         pass
@@ -32,6 +32,8 @@ class ValueArrayDictionaryCheck(Filter):
         """
         if line_data.is_well_quoted_value:
             return False
+        if line_data.wrap and "byte" in line_data.wrap.lower():
+            return False
         if self.PATTERN.search(line_data.value):
             return True
         if line_data.wrap and not line_data.is_well_quoted_value and ('[' in line_data.wrap or '(' in line_data.wrap):

{credsweeper-1.11.2 → credsweeper-1.11.3}/credsweeper/filters/value_azure_token_check.py RENAMED Viewed

@@ -1,7 +1,6 @@
 import contextlib
 import json
-from credsweeper.common.constants import Chars
 from credsweeper.config import Config
 from credsweeper.credentials import LineData
 from credsweeper.file_handler.analysis_target import AnalysisTarget
@@ -45,7 +44,7 @@ class ValueAzureTokenCheck(Filter):
                 # must be all parts in payload
                 return True
             min_entropy = ValueEntropyBase64Check.get_min_data_entropy(len(parts[2]))
-            entropy = Util.get_shannon_entropy(parts[2], Chars.BASE64URL_CHARS.value)
+            entropy = Util.get_shannon_entropy(parts[2])
             # good signature has to be like random bytes
             return entropy < min_entropy

credsweeper 1.11.2__tar.gz → 1.11.3__tar.gz

Potentially problematic release.

credsweeper 1.11.2tar.gz → 1.11.3tar.gz