PyPI - credsweeper - Versions diffs - 1.11.2__py3-none-any.whl → 1.11.3__py3-none-any.whl - Mend

credsweeper 1.11.2py3-none-any.whl → 1.11.3py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of credsweeper might be problematic. Click here for more details.

Files changed (55) hide show

credsweeper/__init__.py +1 -1
credsweeper/__main__.py +6 -4
credsweeper/app.py +7 -3
credsweeper/common/keyword_pattern.py +15 -9
credsweeper/common/morpheme_checklist.txt +4 -2
credsweeper/credentials/line_data.py +14 -10
credsweeper/deep_scanner/abstract_scanner.py +10 -1
credsweeper/deep_scanner/deep_scanner.py +19 -8
credsweeper/deep_scanner/docx_scanner.py +1 -1
credsweeper/deep_scanner/encoder_scanner.py +2 -2
credsweeper/deep_scanner/html_scanner.py +3 -3
credsweeper/deep_scanner/jks_scanner.py +2 -4
credsweeper/deep_scanner/lang_scanner.py +2 -2
credsweeper/deep_scanner/lzma_scanner.py +40 -0
credsweeper/deep_scanner/pkcs12_scanner.py +3 -5
credsweeper/deep_scanner/xml_scanner.py +2 -2
credsweeper/file_handler/data_content_provider.py +21 -12
credsweeper/filters/value_array_dictionary_check.py +3 -1
credsweeper/filters/value_azure_token_check.py +1 -2
credsweeper/filters/value_base64_part_check.py +30 -21
credsweeper/filters/value_discord_bot_check.py +1 -2
credsweeper/filters/value_entropy_base32_check.py +11 -31
credsweeper/filters/value_entropy_base36_check.py +11 -34
credsweeper/filters/value_entropy_base64_check.py +19 -48
credsweeper/filters/value_entropy_base_check.py +37 -0
credsweeper/filters/value_file_path_check.py +1 -1
credsweeper/filters/value_hex_number_check.py +3 -3
credsweeper/filters/value_json_web_token_check.py +4 -5
credsweeper/filters/value_string_type_check.py +11 -3
credsweeper/filters/value_token_base32_check.py +0 -4
credsweeper/filters/value_token_base36_check.py +0 -4
credsweeper/filters/value_token_base64_check.py +0 -4
credsweeper/filters/value_token_check.py +1 -1
credsweeper/ml_model/features/file_extension.py +1 -1
credsweeper/ml_model/features/morpheme_dense.py +0 -4
credsweeper/ml_model/features/rule_name.py +1 -1
credsweeper/ml_model/features/word_in_path.py +0 -9
credsweeper/ml_model/features/word_in_postamble.py +0 -11
credsweeper/ml_model/features/word_in_preamble.py +0 -11
credsweeper/ml_model/features/word_in_transition.py +0 -11
credsweeper/ml_model/features/word_in_value.py +0 -11
credsweeper/ml_model/features/word_in_variable.py +0 -11
credsweeper/ml_model/ml_validator.py +4 -3
credsweeper/rules/config.yaml +238 -208
credsweeper/scanner/scan_type/scan_type.py +2 -3
credsweeper/scanner/scanner.py +7 -1
credsweeper/secret/config.json +16 -5
credsweeper/utils/pem_key_detector.py +4 -5
credsweeper/utils/util.py +67 -144
{credsweeper-1.11.2.dist-info → credsweeper-1.11.3.dist-info}/METADATA +1 -1
{credsweeper-1.11.2.dist-info → credsweeper-1.11.3.dist-info}/RECORD +54 -53
credsweeper/utils/entropy_validator.py +0 -72
{credsweeper-1.11.2.dist-info → credsweeper-1.11.3.dist-info}/WHEEL +0 -0
{credsweeper-1.11.2.dist-info → credsweeper-1.11.3.dist-info}/entry_points.txt +0 -0
{credsweeper-1.11.2.dist-info → credsweeper-1.11.3.dist-info}/licenses/LICENSE +0 -0

credsweeper/filters/value_base64_part_check.py CHANGED Viewed

@@ -1,6 +1,7 @@
 import contextlib
 import re
 import statistics
+from itertools import takewhile
 from credsweeper.common.constants import Chars
 from credsweeper.config import Config
@@ -16,8 +17,8 @@ class ValueBase64PartCheck(Filter):
     Check that candidate is NOT a part of base64 long line
     """
-    base64_pattern = re.compile(r"^(\\{1,8}[0abfnrtv]|[0-9A-Za-z+/=]){1,4000}")
-    base64_set = set(Chars.BASE64STDPAD_CHARS.value)
+    base64_pattern = re.compile(r"^(\\{1,8}[0abfnrtv]|[0-9A-Za-z+/=]){1,4000}$")
+    base64_char_set = set(Chars.BASE64STDPAD_CHARS.value + '\\')
     def __init__(self, config: Config = None) -> None:
         pass
@@ -64,38 +65,46 @@ class ValueBase64PartCheck(Filter):
                 elif right_end - left_start >= 2 * len_value:
                     # simple analysis for data too large to yield sensible insights
                     part_set = set(line[left_start:right_end])
-                    if not part_set.difference(self.base64_set):
+                    if not part_set.difference(ValueBase64PartCheck.base64_char_set):
                         # obvious case: all characters are base64 standard
                         return True
-                left_part = line[left_start:line_data.value_start]
-                len_left = len(left_part)
-                right_part = line[line_data.value_end:right_end]
-                len_right = len(right_part)
+                left_part = ''.join(
+                    takewhile(lambda x: x in ValueBase64PartCheck.base64_char_set,
+                              reversed(line[left_start:line_data.value_start])))
+                right_part = ''.join(
+                    takewhile(lambda x: x in ValueBase64PartCheck.base64_char_set, line[line_data.value_end:right_end]))
                 min_entropy_value = ValueEntropyBase64Check.get_min_data_entropy(len_value)
-                value_entropy = Util.get_shannon_entropy(value, Chars.BASE64STD_CHARS.value)
-                if ValueEntropyBase64Check.min_length < len_left:
-                    left_entropy = Util.get_shannon_entropy(left_part, Chars.BASE64STD_CHARS.value)
-                    if len_left < len_value:
-                        left_entropy *= len_value / len_left
-                else:
-                    left_entropy = min_entropy_value
+                left_entropy = Util.get_shannon_entropy(left_part)
+                value_entropy = Util.get_shannon_entropy(value)
+                right_entropy = Util.get_shannon_entropy(right_part)
+                common = left_part + value + right_part
+                common_entropy = Util.get_shannon_entropy(common)
+                min_entropy_common = ValueEntropyBase64Check.get_min_data_entropy(len(common))
+                if min_entropy_common < common_entropy:
+                    return True
-                if ValueEntropyBase64Check.min_length < len_right:
-                    right_entropy = Util.get_shannon_entropy(right_part, Chars.BASE64STD_CHARS.value)
-                    if len_right < len_value:
-                        left_entropy *= len_right / len_left
+                if left_entropy and right_entropy:
+                    data = [left_entropy, value_entropy, right_entropy, min_entropy_value, common_entropy]
+                elif left_entropy and not right_entropy:
+                    data = [left_entropy, value_entropy, min_entropy_value, min_entropy_value, common_entropy]
+                elif not left_entropy and right_entropy:
+                    data = [value_entropy, right_entropy, min_entropy_value, min_entropy_value, common_entropy]
                 else:
-                    right_entropy = min_entropy_value
+                    return False
-                data = [left_entropy, value_entropy, right_entropy, min_entropy_value]
                 avg = statistics.mean(data)
                 stdev = statistics.stdev(data, avg)
                 avg_min = avg - 1.1 * stdev
-                if avg_min <= left_entropy and avg_min <= right_entropy:
+                if (0. == left_entropy or avg_min < left_entropy or left_entropy < value_entropy < right_entropy) \
+                        and (
+                        0. == right_entropy or avg_min < right_entropy or right_entropy < value_entropy < left_entropy):
                     # high entropy of bound parts looks like a part of base64 long line
                     return True
+                else:
+                    return False
         return False

credsweeper/filters/value_discord_bot_check.py CHANGED Viewed

@@ -1,6 +1,5 @@
 import contextlib
-from credsweeper.common.constants import Chars
 from credsweeper.config import Config
 from credsweeper.credentials import LineData
 from credsweeper.file_handler.analysis_target import AnalysisTarget
@@ -32,7 +31,7 @@ class ValueDiscordBotCheck(Filter):
             id_part = line_data.value[:dot_separator_index]
             discord_id = int(Util.decode_base64(id_part, padding_safe=True, urlsafe_detect=True))
             entropy_part = line_data.value[dot_separator_index:]
-            entropy = Util.get_shannon_entropy(entropy_part, Chars.BASE64URL_CHARS.value)
+            entropy = Util.get_shannon_entropy(entropy_part)
             min_entropy = ValueEntropyBase64Check.get_min_data_entropy(len(entropy_part))
             if 1000 <= discord_id and min_entropy <= entropy:
                 return False

credsweeper/filters/value_entropy_base32_check.py CHANGED Viewed

@@ -1,42 +1,22 @@
 import math
+from functools import cache
-from credsweeper.common.constants import Chars
-from credsweeper.config import Config
-from credsweeper.credentials import LineData
-from credsweeper.file_handler.analysis_target import AnalysisTarget
-from credsweeper.filters import Filter
-from credsweeper.utils import Util
+from credsweeper.filters.value_entropy_base_check import ValueEntropyBaseCheck
-class ValueEntropyBase32Check(Filter):
-    """Check that candidate have Shanon Entropy (for [a-z0-9])"""
-    def __init__(self, config: Config = None) -> None:
-        pass
+class ValueEntropyBase32Check(ValueEntropyBaseCheck):
+    """Base32 entropy check"""
     @staticmethod
+    @cache
     def get_min_data_entropy(x: int) -> float:
         """Returns average entropy for size of random data. Precalculated data is applied for speedup"""
-        if 16 == x:
-            y = 3.46
-        elif 10 <= x:
-            # approximation does not exceed stdev
-            y = 0.64 * math.log2(x) + 0.9
+        if 8 <= x < 17:
+            y = 0.80569236 * math.log2(x) + 0.13439734
+        elif 17 <= x < 33:
+            y = 0.66350481 * math.log2(x) + 0.71143862
+        elif 33 <= x:
+            y = 4.04
         else:
             y = 0
         return y
-    def run(self, line_data: LineData, target: AnalysisTarget) -> bool:
-        """Run filter checks on received credential candidate data 'line_data'.
-        Args:
-            line_data: credential candidate data
-            target: multiline target from which line data was obtained
-        Return:
-            True, if need to filter candidate and False if left
-        """
-        entropy = Util.get_shannon_entropy(line_data.value, Chars.BASE32_CHARS.value)
-        min_entropy = ValueEntropyBase32Check.get_min_data_entropy(len(line_data.value))
-        return min_entropy > entropy or 0 == min_entropy

credsweeper/filters/value_entropy_base36_check.py CHANGED Viewed

@@ -1,46 +1,23 @@
 import math
+from functools import cache
-from credsweeper.common.constants import Chars
-from credsweeper.config import Config
-from credsweeper.credentials import LineData
-from credsweeper.file_handler.analysis_target import AnalysisTarget
-from credsweeper.filters import Filter
-from credsweeper.utils import Util
+from credsweeper.filters.value_entropy_base_check import ValueEntropyBaseCheck
-class ValueEntropyBase36Check(Filter):
-    """Check that candidate have Shanon Entropy (for [a-z0-9])"""
-    def __init__(self, config: Config = None) -> None:
-        pass
+class ValueEntropyBase36Check(ValueEntropyBaseCheck):
+    """Base36 entropy check"""
     @staticmethod
+    @cache
     def get_min_data_entropy(x: int) -> float:
         """Returns minimal entropy for size of random data. Precalculated data is applied for speedup"""
         if 15 == x:
-            y = 3.43
-        elif 24 == x:
-            y = 3.91
-        elif 25 == x:
-            y = 3.95
-        elif 10 <= x:
-            # approximation does not exceed standard deviation
-            y = 0.7 * math.log2(x) + 0.7
+            # workaround for Dropbox App secret
+            y = 3.374
+        elif 10 <= x < 26:
+            y = 0.731566857 * math.log2(x) + 0.474132
+        elif 26 <= x:
+            y = 3.9
         else:
             y = 0
         return y
-    def run(self, line_data: LineData, target: AnalysisTarget) -> bool:
-        """Run filter checks on received credential candidate data 'line_data'.
-        Args:
-            line_data: credential candidate data
-            target: multiline target from which line data was obtained
-        Return:
-            True, if need to filter candidate and False if left
-        """
-        entropy = Util.get_shannon_entropy(line_data.value, Chars.BASE36_CHARS.value)
-        min_entropy = ValueEntropyBase36Check.get_min_data_entropy(len(line_data.value))
-        return min_entropy > entropy or 0 == min_entropy

credsweeper/filters/value_entropy_base64_check.py CHANGED Viewed

@@ -1,59 +1,30 @@
 import math
+from functools import cache
-from credsweeper.common.constants import Chars, ENTROPY_LIMIT_BASE64
-from credsweeper.config import Config
-from credsweeper.credentials import LineData
-from credsweeper.file_handler.analysis_target import AnalysisTarget
-from credsweeper.filters import Filter
-from credsweeper.utils import Util
+from credsweeper.filters.value_entropy_base_check import ValueEntropyBaseCheck
-class ValueEntropyBase64Check(Filter):
-    """Check that candidate have Shanon Entropy > 3 (for HEX_CHARS or BASE36_CHARS) or > 4.5 (for BASE64_CHARS)."""
-    # If the value size is less than this value the entropy evaluation gives an imprecise result
-    min_length = 12
-    def __init__(self, config: Config = None) -> None:
-        pass
+class ValueEntropyBase64Check(ValueEntropyBaseCheck):
+    """Base64 entropy check"""
     @staticmethod
+    @cache
     def get_min_data_entropy(x: int) -> float:
         """Returns minimal average entropy for size of random data. Precalculated round data is applied for speedup"""
-        if 18 == x:
-            y = 3.8
-        elif 20 == x:
-            y = 3.9
-        elif 24 == x:
-            y = 4.1
-        elif 32 == x:
-            y = 4.4
-        elif ValueEntropyBase64Check.min_length <= x < 35:
-            # logarithm base 2 - slow, but precise. Approximation does not exceed stdev
-            y = 0.77 * math.log2(x) + 0.62
-        elif 35 <= x < 60:
-            y = ENTROPY_LIMIT_BASE64
-        elif 60 <= x:
-            # the entropy grows slowly after 60
-            y = 5.0
+        if 12 <= x < 18:
+            y = 0.915 * math.log2(x) - 0.047
+        elif 18 <= x < 35:
+            y = 0.767 * math.log2(x) + 0.5677
+        elif 35 <= x < 65:
+            y = 0.944 * math.log2(x) - 0.009 * x - 0.04
+        elif 65 <= x < 256:
+            y = 0.621 * math.log2(x) - 0.003 * x + 1.54
+        elif 256 <= x < 512:
+            y = 5.77
+        elif 512 <= x < 1024:
+            y = 5.89
+        elif 1024 <= x:
+            y = 5.94
         else:
             y = 0
         return y
-    def run(self, line_data: LineData, target: AnalysisTarget) -> bool:
-        """Run filter checks on received credential candidate data 'line_data'.
-        Args:
-            line_data: credential candidate data
-            target: multiline target from which line data was obtained
-        Return:
-            True, if need to filter candidate and False if left
-        """
-        if '-' in line_data.value or '_' in line_data.value:
-            entropy = Util.get_shannon_entropy(line_data.value, Chars.BASE64URL_CHARS.value)
-        else:
-            entropy = Util.get_shannon_entropy(line_data.value, Chars.BASE64STD_CHARS.value)
-        min_entropy = ValueEntropyBase64Check.get_min_data_entropy(len(line_data.value))
-        return min_entropy > entropy or 0 == min_entropy

credsweeper/filters/value_entropy_base_check.py ADDED Viewed

@@ -0,0 +1,37 @@
+from abc import abstractmethod
+from credsweeper.config import Config
+from credsweeper.credentials import LineData
+from credsweeper.file_handler.analysis_target import AnalysisTarget
+from credsweeper.filters import Filter
+from credsweeper.utils import Util
+class ValueEntropyBaseCheck(Filter):
+    """Check that candidate value has minimal Shanon Entropy for appropriated base"""
+    def __init__(self, config: Config = None) -> None:
+        pass
+    @staticmethod
+    @abstractmethod
+    def get_min_data_entropy(x: int) -> float:
+        """Returns minimal entropy for size of data"""
+        raise NotImplementedError()
+    def run(self, line_data: LineData, target: AnalysisTarget) -> bool:
+        """Run filter checks on received credential candidate data 'line_data'.
+        Args:
+            line_data: credential candidate data
+            target: multiline target from which line data was obtained
+        Return:
+            True, when need to filter candidate and False if left
+        """
+        entropy = Util.get_shannon_entropy(line_data.value)
+        min_entropy = self.get_min_data_entropy(len(line_data.value))
+        if min_entropy > entropy or 0 == min_entropy:
+            return True
+        return False

credsweeper/filters/value_file_path_check.py CHANGED Viewed

@@ -53,7 +53,7 @@ class ValueFilePathCheck(Filter):
                     break
             else:
                 # all symbols are from base64 alphabet
-                entropy = Util.get_shannon_entropy(value, Chars.BASE64STDPAD_CHARS.value)
+                entropy = Util.get_shannon_entropy(value)
                 if 0 == min_entropy or min_entropy > entropy:
                     contains_unix_separator = 1 < value.count('/')
                 else:

credsweeper/filters/value_hex_number_check.py CHANGED Viewed

@@ -7,9 +7,9 @@ from credsweeper.filters import Filter
 class ValueHexNumberCheck(Filter):
-    """Check value if it a value in 32 or 64 bits hex representation"""
+    """Check value if it is a value up to 64 bits hex representation"""
-    HEX_32_64_VALUE_REGEX = re.compile(r"^0x([0-9a-f]{8}){1,2}$")
+    HEX_08_64_VALUE_REGEX = re.compile(r"^0x[0-9a-f]{1,16}$")
     def __init__(self, config: Config = None) -> None:
         pass
@@ -26,6 +26,6 @@ class ValueHexNumberCheck(Filter):
         """
         value = line_data.value.lower()
-        if len(value) in [10, 18] and ValueHexNumberCheck.HEX_32_64_VALUE_REGEX.match(value):
+        if ValueHexNumberCheck.HEX_08_64_VALUE_REGEX.match(value):
             return True
         return False

credsweeper/filters/value_json_web_token_check.py CHANGED Viewed

@@ -15,14 +15,13 @@ class ValueJsonWebTokenCheck(Filter):
     https://www.iana.org/assignments/jose/jose.xhtml
     """
     header_keys = {
-        "alg", "jku", "jwk", "kid", "x5u", "x5c", "x5t", "x5t#S256", "typ", "cty", "crit", "alg", "enc", "zip", "jku",
-        "jwk", "kid", "x5u", "x5c", "x5t", "x5t#S256", "typ", "cty", "crit", "epk", "apu", "apv", "iv", "tag", "p2s",
-        "p2c", "iss", "sub", "aud", "b64", "ppt", "url", "nonce", "svt"
+        "kid", "x5u", "x5t", "x5t#S256", "typ", "cty", "crit", "alg", "enc", "zip", "jku", "jwk", "x5c", "epk", "apu",
+        "apv", "iv", "tag", "p2s", "p2c", "iss", "sub", "aud", "b64", "ppt", "url", "nonce", "svt"
     }
     payload_keys = {
         "iss", "sub", "aud", "exp", "nbf", "iat", "jti", "kty", "use", "key_ops", "alg", "enc", "zip", "jku", "jwk",
-        "kid", "x5u", "x5c", "x5t", "x5t#S256", "crv", "x", "y", "d", "n", "e", "d", "p", "q", "dp", "dq", "qi", "oth",
-        "k", "crv", "d", "x", "ext", "crit", "keys", "id", "role", "token", "secret", "password", "nonce"
+        "kid", "x5u", "x5c", "x5t", "x5t#S256", "x", "y", "d", "n", "e", "p", "q", "dp", "dq", "qi", "oth", "k", "crv",
+        "ext", "crit", "keys", "id", "role", "token", "secret", "password", "nonce"
     }
     def __init__(self, config: Config = None) -> None:

credsweeper/filters/value_string_type_check.py CHANGED Viewed

@@ -1,3 +1,5 @@
+import re
 from credsweeper.config import Config
 from credsweeper.credentials import LineData
 from credsweeper.file_handler.analysis_target import AnalysisTarget
@@ -9,6 +11,7 @@ class ValueStringTypeCheck(Filter):
     If it is, then checks if line_data really have string literal declaration.
     Comment rows in source files (start with //, /\*, etc) ignored.
+    Multiple bytes scenario allowed [123,23,54,67,78,89] or {0xae, 0x54, 0x55, 0xff}
     True if:
@@ -20,6 +23,8 @@ class ValueStringTypeCheck(Filter):
     False otherwise
     """
+    MULTIBYTE_PATTERN = re.compile(r"(\s*(0x)?[0-9a-f]{1,3}\s*,){8,80}", flags=re.IGNORECASE)
     def __init__(self, config: Config) -> None:
         self.check_for_literals = config.check_for_literals
@@ -37,10 +42,13 @@ class ValueStringTypeCheck(Filter):
         if not self.check_for_literals or line_data.url_part:
             return False
-        not_quoted = not line_data.is_well_quoted_value
-        not_comment = not line_data.is_comment()
+        if ValueStringTypeCheck.MULTIBYTE_PATTERN.match(line_data.value):
+            return False
-        if line_data.is_source_file_with_quotes() and not_comment and not_quoted and not line_data.is_quoted \
+        if line_data.is_source_file_with_quotes() \
+                and not line_data.is_comment() \
+                and not line_data.is_well_quoted_value \
+                and not line_data.is_quoted \
                 and line_data.separator and '=' in line_data.separator:
             # heterogeneous code e.g. YAML in Python uses colon sign instead equals
             return True

credsweeper/filters/value_token_base32_check.py CHANGED Viewed

@@ -1,6 +1,5 @@
 from typing import Tuple
-from credsweeper.config import Config
 from credsweeper.filters.value_token_base_check import ValueTokenBaseCheck
@@ -21,9 +20,6 @@ class ValueTokenBase32Check(ValueTokenBaseCheck):
         64: ((3.4805990476190476, 0.28572156450556774), (2.035756800745673, 0.18815721535870078)),
     }
-    def __init__(self, config: Config = None) -> None:
-        super().__init__(config)
     @staticmethod
     def get_stat_range(size: int) -> Tuple[Tuple[float, float], Tuple[float, float]]:
         """Returns minimal, maximal for hop and deviation. Precalculated data is applied for speedup"""

credsweeper/filters/value_token_base36_check.py CHANGED Viewed

@@ -1,6 +1,5 @@
 from typing import Tuple
-from credsweeper.config import Config
 from credsweeper.filters.value_token_base_check import ValueTokenBaseCheck
@@ -21,9 +20,6 @@ class ValueTokenBase36Check(ValueTokenBaseCheck):
         64: ((3.7190009761904763, 0.30325954360127116), (2.1751172797904093, 0.1942582237461476)),
     }
-    def __init__(self, config: Config = None) -> None:
-        super().__init__(config)
     @staticmethod
     def get_stat_range(size: int) -> Tuple[Tuple[float, float], Tuple[float, float]]:
         """Returns minimal, maximal for hop and deviation. Precalculated data is applied for speedup"""

credsweeper/filters/value_token_base64_check.py CHANGED Viewed

@@ -1,6 +1,5 @@
 from typing import Tuple
-from credsweeper.config import Config
 from credsweeper.filters.value_token_base_check import ValueTokenBaseCheck
@@ -21,9 +20,6 @@ class ValueTokenBase64Check(ValueTokenBaseCheck):
         64: ((3.7625271746031745, 0.31733579704946846), (2.257532519514275, 0.20571908142867643)),
     }
-    def __init__(self, config: Config = None) -> None:
-        super().__init__(config)
     @staticmethod
     def get_stat_range(size: int) -> Tuple[Tuple[float, float], Tuple[float, float]]:
         """Returns minimal, maximal for hop and deviation. Precalculated data is applied for speedup"""

credsweeper/filters/value_token_check.py CHANGED Viewed

@@ -17,7 +17,7 @@ class ValueTokenCheck(Filter):
     """
-    SPLIT_PATTERN = r" |;|\)|\(|{|}|<|>|\[|\]|`"
+    SPLIT_PATTERN = r"(?<!,) (?!,)|;|\)|\(|{|}|<|>|\[|\]|`"
     def __init__(self, config: Config = None) -> None:
         pass

credsweeper/ml_model/features/file_extension.py CHANGED Viewed

@@ -15,7 +15,7 @@ class FileExtension(WordIn):
     """
     def __init__(self, extensions: List[str]) -> None:
-        super().__init__(extensions)
+        super().__init__(words=extensions)
     def __call__(self, candidates: List[Candidate]) -> np.ndarray:
         extension_set = set([candidate.line_data_list[0].file_type.lower() for candidate in candidates])

credsweeper/ml_model/features/morpheme_dense.py CHANGED Viewed

@@ -6,10 +6,6 @@ from credsweeper.ml_model.features.feature import Feature
 class MorphemeDense(Feature):
     """Feature calculates morphemes density for a value"""
-    def __init__(self) -> None:
-        """Class initializer"""
-        super().__init__()
     def extract(self, candidate: Candidate) -> float:
         if value := candidate.line_data_list[0].value.lower():
             morphemes_counter = 0

credsweeper/ml_model/features/rule_name.py CHANGED Viewed

@@ -15,7 +15,7 @@ class RuleName(WordIn):
     """
     def __init__(self, rule_names: List[str]) -> None:
-        super().__init__(rule_names)
+        super().__init__(words=rule_names)
     def __call__(self, candidates: List[Candidate]) -> np.ndarray:
         candidate_rule_set = set(x.rule_name for x in candidates)

credsweeper/ml_model/features/word_in_path.py CHANGED Viewed

@@ -10,15 +10,6 @@ from credsweeper.ml_model.features.word_in import WordIn
 class WordInPath(WordIn):
     """Categorical feature that corresponds to words in path (POSIX, lowercase)"""
-    def __init__(self, words: List[str]) -> None:
-        """WordInPath constructor
-        Args:
-            words: list of predefined words - MUST BE IN LOWER CASE & POSIX
-        """
-        super().__init__(words)
     def __call__(self, candidates: List[Candidate]) -> np.ndarray:
         # actually there must be one path because the candidates are grouped before
         if file_path := candidates[0].line_data_list[0].path:

credsweeper/ml_model/features/word_in_postamble.py CHANGED Viewed

@@ -1,5 +1,3 @@
-from typing import List
 import numpy as np
 from credsweeper.common.constants import ML_HUNK
@@ -10,15 +8,6 @@ from credsweeper.ml_model.features.word_in import WordIn
 class WordInPostamble(WordIn):
     """Feature is true if line contains at least one word from predefined list."""
-    def __init__(self, words: List[str]) -> None:
-        """Feature returns array of matching words
-        Args:
-            words: list of predefined words - MUST BE IN LOWER CASE
-        """
-        super().__init__(words)
     def extract(self, candidate: Candidate) -> np.ndarray:
         """Returns true if any words in a part of line after value"""
         postamble_end = len(candidate.line_data_list[0].line) \

credsweeper/ml_model/features/word_in_preamble.py CHANGED Viewed

@@ -1,5 +1,3 @@
-from typing import List
 import numpy as np
 from credsweeper.common.constants import ML_HUNK
@@ -10,15 +8,6 @@ from credsweeper.ml_model.features.word_in import WordIn
 class WordInPreamble(WordIn):
     """Feature is true if line contains at least one word from predefined list."""
-    def __init__(self, words: List[str]) -> None:
-        """Feature returns array of matching words
-        Args:
-            words: list of predefined words - MUST BE IN LOWER CASE
-        """
-        super().__init__(words)
     def extract(self, candidate: Candidate) -> np.ndarray:
         """Returns true if any words in line before variable or value"""
         if 0 <= candidate.line_data_list[0].variable_start:

credsweeper/ml_model/features/word_in_transition.py CHANGED Viewed

@@ -1,5 +1,3 @@
-from typing import List
 import numpy as np
 from credsweeper.credentials import Candidate
@@ -9,15 +7,6 @@ from credsweeper.ml_model.features.word_in import WordIn
 class WordInTransition(WordIn):
     """Feature is true if line contains at least one word from predefined list."""
-    def __init__(self, words: List[str]) -> None:
-        """Feature returns array of matching words
-        Args:
-            words: list of predefined words - MUST BE IN LOWER CASE
-        """
-        super().__init__(words)
     def extract(self, candidate: Candidate) -> np.ndarray:
         """Returns true if any words between variable and value"""
         if 0 <= candidate.line_data_list[0].variable_end < candidate.line_data_list[0].value_start:

credsweeper/ml_model/features/word_in_value.py CHANGED Viewed

@@ -1,5 +1,3 @@
-from typing import List
 import numpy as np
 from credsweeper.credentials import Candidate
@@ -9,15 +7,6 @@ from credsweeper.ml_model.features.word_in import WordIn
 class WordInValue(WordIn):
     """Feature returns true if candidate value contains at least one word from predefined list."""
-    def __init__(self, words: List[str]) -> None:
-        """Feature is true if candidate value contains at least one predefined word.
-        Args:
-            words: list of predefined words - MUST BE IN LOWER CASE and SORTED (preferred)
-        """
-        super().__init__(words)
     def extract(self, candidate: Candidate) -> np.ndarray:
         """Returns array of matching words for first line"""
         if value := candidate.line_data_list[0].value:

credsweeper/ml_model/features/word_in_variable.py CHANGED Viewed

@@ -1,5 +1,3 @@
-from typing import List
 import numpy as np
 from credsweeper.credentials import Candidate
@@ -9,15 +7,6 @@ from credsweeper.ml_model.features.word_in import WordIn
 class WordInVariable(WordIn):
     """Feature returns array of words matching in variable"""
-    def __init__(self, words: List[str]) -> None:
-        """Feature is true if candidate value contains at least one predefined word.
-        Args:
-            words: list of predefined words - MUST BE IN LOWER CASE
-        """
-        super().__init__(words)
     def extract(self, candidate: Candidate) -> np.ndarray:
         """Returns array of matching words for first line"""
         if variable := candidate.line_data_list[0].variable:

credsweeper 1.11.2__py3-none-any.whl → 1.11.3__py3-none-any.whl

Potentially problematic release.

credsweeper 1.11.2py3-none-any.whl → 1.11.3py3-none-any.whl