credsweeper 1.11.5__py3-none-any.whl → 1.12.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of credsweeper might be problematic. Click here for more details.

Files changed (141) hide show
  1. credsweeper/__init__.py +21 -15
  2. credsweeper/__main__.py +141 -35
  3. credsweeper/app.py +11 -11
  4. credsweeper/common/keyword_pattern.py +13 -15
  5. credsweeper/common/morpheme_checklist.txt +2 -0
  6. credsweeper/config/__init__.py +0 -1
  7. credsweeper/config/config.py +3 -3
  8. credsweeper/credentials/__init__.py +0 -5
  9. credsweeper/credentials/augment_candidates.py +1 -1
  10. credsweeper/credentials/candidate.py +1 -1
  11. credsweeper/credentials/credential_manager.py +1 -1
  12. credsweeper/credentials/line_data.py +22 -2
  13. credsweeper/deep_scanner/__init__.py +0 -1
  14. credsweeper/deep_scanner/abstract_scanner.py +3 -3
  15. credsweeper/deep_scanner/byte_scanner.py +1 -1
  16. credsweeper/deep_scanner/bzip2_scanner.py +2 -2
  17. credsweeper/deep_scanner/deb_scanner.py +1 -1
  18. credsweeper/deep_scanner/deep_scanner.py +3 -3
  19. credsweeper/deep_scanner/docx_scanner.py +1 -1
  20. credsweeper/deep_scanner/eml_scanner.py +1 -1
  21. credsweeper/deep_scanner/encoder_scanner.py +1 -1
  22. credsweeper/deep_scanner/gzip_scanner.py +2 -2
  23. credsweeper/deep_scanner/html_scanner.py +1 -1
  24. credsweeper/deep_scanner/jclass_scanner.py +1 -1
  25. credsweeper/deep_scanner/jks_scanner.py +1 -1
  26. credsweeper/deep_scanner/lang_scanner.py +1 -1
  27. credsweeper/deep_scanner/lzma_scanner.py +2 -2
  28. credsweeper/deep_scanner/mxfile_scanner.py +1 -1
  29. credsweeper/deep_scanner/pdf_scanner.py +1 -1
  30. credsweeper/deep_scanner/pkcs_scanner.py +2 -2
  31. credsweeper/deep_scanner/pptx_scanner.py +1 -1
  32. credsweeper/deep_scanner/rpm_scanner.py +1 -1
  33. credsweeper/deep_scanner/tar_scanner.py +2 -2
  34. credsweeper/deep_scanner/tmx_scanner.py +2 -2
  35. credsweeper/deep_scanner/xlsx_scanner.py +2 -2
  36. credsweeper/deep_scanner/xml_scanner.py +1 -1
  37. credsweeper/deep_scanner/zip_scanner.py +2 -2
  38. credsweeper/file_handler/__init__.py +0 -15
  39. credsweeper/file_handler/abstract_provider.py +3 -4
  40. credsweeper/file_handler/byte_content_provider.py +1 -1
  41. credsweeper/file_handler/content_provider.py +1 -1
  42. credsweeper/file_handler/data_content_provider.py +1 -1
  43. credsweeper/file_handler/diff_content_provider.py +133 -3
  44. credsweeper/file_handler/file_path_extractor.py +2 -2
  45. credsweeper/file_handler/files_provider.py +4 -4
  46. credsweeper/file_handler/patches_provider.py +7 -8
  47. credsweeper/file_handler/text_content_provider.py +1 -1
  48. credsweeper/filters/__init__.py +2 -3
  49. credsweeper/filters/filter.py +5 -3
  50. credsweeper/filters/group/__init__.py +0 -2
  51. credsweeper/filters/group/general_keyword.py +2 -2
  52. credsweeper/filters/group/general_pattern.py +2 -2
  53. credsweeper/filters/group/group.py +38 -36
  54. credsweeper/filters/group/password_keyword.py +9 -8
  55. credsweeper/filters/group/token_pattern.py +3 -3
  56. credsweeper/filters/group/url_credentials_group.py +8 -8
  57. credsweeper/filters/group/weird_base36_token.py +3 -3
  58. credsweeper/filters/group/weird_base64_token.py +3 -3
  59. credsweeper/filters/line_git_binary_check.py +5 -4
  60. credsweeper/filters/line_specific_key_check.py +6 -5
  61. credsweeper/filters/line_uue_part_check.py +5 -4
  62. credsweeper/filters/value_allowlist_check.py +6 -5
  63. credsweeper/filters/value_array_dictionary_check.py +8 -6
  64. credsweeper/filters/value_atlassian_token_check.py +6 -5
  65. credsweeper/filters/value_azure_token_check.py +6 -5
  66. credsweeper/filters/value_base32_data_check.py +8 -5
  67. credsweeper/filters/value_base64_data_check.py +6 -5
  68. credsweeper/filters/value_base64_encoded_pem_check.py +6 -5
  69. credsweeper/filters/value_base64_key_check.py +6 -5
  70. credsweeper/filters/value_base64_part_check.py +6 -5
  71. credsweeper/filters/value_basic_auth_check.py +37 -0
  72. credsweeper/filters/value_blocklist_check.py +6 -4
  73. credsweeper/filters/value_camel_case_check.py +6 -5
  74. credsweeper/filters/value_couple_keyword_check.py +6 -4
  75. credsweeper/filters/value_dictionary_keyword_check.py +6 -4
  76. credsweeper/filters/value_discord_bot_check.py +6 -5
  77. credsweeper/filters/value_entropy_base_check.py +6 -5
  78. credsweeper/filters/value_file_path_check.py +8 -5
  79. credsweeper/filters/value_github_check.py +5 -4
  80. credsweeper/filters/value_grafana_check.py +6 -5
  81. credsweeper/filters/value_grafana_service_check.py +5 -4
  82. credsweeper/filters/value_hex_number_check.py +5 -4
  83. credsweeper/filters/value_jfrog_token_check.py +6 -5
  84. credsweeper/filters/value_json_web_key_check.py +6 -5
  85. credsweeper/filters/value_json_web_token_check.py +6 -5
  86. credsweeper/filters/value_last_word_check.py +6 -4
  87. credsweeper/filters/{value_dictionary_value_length_check.py → value_length_check.py} +12 -6
  88. credsweeper/filters/value_method_check.py +5 -4
  89. credsweeper/filters/value_not_allowed_pattern_check.py +6 -5
  90. credsweeper/filters/value_not_part_encoded_check.py +4 -4
  91. credsweeper/filters/value_number_check.py +5 -4
  92. credsweeper/filters/value_pattern_check.py +61 -41
  93. credsweeper/filters/value_similarity_check.py +6 -4
  94. credsweeper/filters/value_split_keyword_check.py +5 -4
  95. credsweeper/filters/value_string_type_check.py +9 -7
  96. credsweeper/filters/value_token_base_check.py +5 -4
  97. credsweeper/filters/value_token_check.py +6 -5
  98. credsweeper/logger/__init__.py +0 -1
  99. credsweeper/logger/logger.py +1 -1
  100. credsweeper/ml_model/__init__.py +0 -1
  101. credsweeper/ml_model/features/__init__.py +1 -0
  102. credsweeper/ml_model/features/entropy_evaluation.py +1 -1
  103. credsweeper/ml_model/features/feature.py +1 -1
  104. credsweeper/ml_model/features/file_extension.py +1 -1
  105. credsweeper/ml_model/features/has_html_tag.py +2 -2
  106. credsweeper/ml_model/features/is_secret_numeric.py +1 -1
  107. credsweeper/ml_model/features/length_of_attribute.py +1 -1
  108. credsweeper/ml_model/features/morpheme_dense.py +15 -8
  109. credsweeper/ml_model/features/rule_name.py +1 -1
  110. credsweeper/ml_model/features/rule_severity.py +21 -0
  111. credsweeper/ml_model/features/search_in_attribute.py +1 -1
  112. credsweeper/ml_model/features/word_in.py +1 -1
  113. credsweeper/ml_model/features/word_in_path.py +5 -2
  114. credsweeper/ml_model/features/word_in_postamble.py +1 -1
  115. credsweeper/ml_model/features/word_in_preamble.py +1 -1
  116. credsweeper/ml_model/features/word_in_transition.py +1 -1
  117. credsweeper/ml_model/features/word_in_value.py +1 -1
  118. credsweeper/ml_model/features/word_in_variable.py +1 -1
  119. credsweeper/ml_model/ml_config.json +130 -24
  120. credsweeper/ml_model/ml_model.onnx +0 -0
  121. credsweeper/ml_model/ml_validator.py +3 -2
  122. credsweeper/rules/__init__.py +0 -1
  123. credsweeper/rules/config.yaml +116 -42
  124. credsweeper/rules/rule.py +4 -3
  125. credsweeper/scanner/__init__.py +0 -1
  126. credsweeper/scanner/scan_type/__init__.py +0 -5
  127. credsweeper/scanner/scan_type/multi_pattern.py +4 -4
  128. credsweeper/scanner/scan_type/pem_key_pattern.py +4 -4
  129. credsweeper/scanner/scan_type/scan_type.py +4 -4
  130. credsweeper/scanner/scan_type/single_pattern.py +4 -4
  131. credsweeper/scanner/scanner.py +12 -8
  132. credsweeper/secret/config.json +2 -2
  133. credsweeper/utils/__init__.py +0 -1
  134. credsweeper/utils/pem_key_detector.py +3 -3
  135. credsweeper/utils/util.py +3 -132
  136. {credsweeper-1.11.5.dist-info → credsweeper-1.12.0.dist-info}/METADATA +1 -1
  137. credsweeper-1.12.0.dist-info/RECORD +161 -0
  138. credsweeper-1.11.5.dist-info/RECORD +0 -159
  139. {credsweeper-1.11.5.dist-info → credsweeper-1.12.0.dist-info}/WHEEL +0 -0
  140. {credsweeper-1.11.5.dist-info → credsweeper-1.12.0.dist-info}/entry_points.txt +0 -0
  141. {credsweeper-1.11.5.dist-info → credsweeper-1.12.0.dist-info}/licenses/LICENSE +0 -0
@@ -1,14 +1,16 @@
1
+ from typing import Optional
2
+
1
3
  from credsweeper.common import static_keyword_checklist
2
- from credsweeper.config import Config
3
- from credsweeper.credentials import LineData
4
+ from credsweeper.config.config import Config
5
+ from credsweeper.credentials.line_data import LineData
4
6
  from credsweeper.file_handler.analysis_target import AnalysisTarget
5
- from credsweeper.filters import Filter
7
+ from credsweeper.filters.filter import Filter
6
8
 
7
9
 
8
10
  class ValueDictionaryKeywordCheck(Filter):
9
11
  """Check that no word from dictionary present in the candidate value."""
10
12
 
11
- def __init__(self, config: Config = None) -> None:
13
+ def __init__(self, config: Optional[Config] = None) -> None:
12
14
  pass
13
15
 
14
16
  def run(self, line_data: LineData, target: AnalysisTarget) -> bool:
@@ -1,17 +1,18 @@
1
1
  import contextlib
2
+ from typing import Optional
2
3
 
3
- from credsweeper.config import Config
4
- from credsweeper.credentials import LineData
4
+ from credsweeper.config.config import Config
5
+ from credsweeper.credentials.line_data import LineData
5
6
  from credsweeper.file_handler.analysis_target import AnalysisTarget
6
- from credsweeper.filters import Filter
7
+ from credsweeper.filters.filter import Filter
7
8
  from credsweeper.filters.value_entropy_base64_check import ValueEntropyBase64Check
8
- from credsweeper.utils import Util
9
+ from credsweeper.utils.util import Util
9
10
 
10
11
 
11
12
  class ValueDiscordBotCheck(Filter):
12
13
  """Discord bot Token"""
13
14
 
14
- def __init__(self, config: Config = None) -> None:
15
+ def __init__(self, config: Optional[Config] = None) -> None:
15
16
  pass
16
17
 
17
18
  def run(self, line_data: LineData, target: AnalysisTarget) -> bool:
@@ -1,16 +1,17 @@
1
1
  from abc import abstractmethod
2
+ from typing import Optional
2
3
 
3
- from credsweeper.config import Config
4
- from credsweeper.credentials import LineData
4
+ from credsweeper.config.config import Config
5
+ from credsweeper.credentials.line_data import LineData
5
6
  from credsweeper.file_handler.analysis_target import AnalysisTarget
6
- from credsweeper.filters import Filter
7
- from credsweeper.utils import Util
7
+ from credsweeper.filters.filter import Filter
8
+ from credsweeper.utils.util import Util
8
9
 
9
10
 
10
11
  class ValueEntropyBaseCheck(Filter):
11
12
  """Check that candidate value has minimal Shanon Entropy for appropriated base"""
12
13
 
13
- def __init__(self, config: Config = None) -> None:
14
+ def __init__(self, config: Optional[Config] = None) -> None:
14
15
  pass
15
16
 
16
17
  @staticmethod
@@ -1,10 +1,13 @@
1
+ from typing import Optional
2
+
1
3
  from credsweeper.common import static_keyword_checklist
2
4
  from credsweeper.common.constants import Chars
3
- from credsweeper.config import Config
4
- from credsweeper.credentials import LineData
5
+ from credsweeper.config.config import Config
6
+ from credsweeper.credentials.line_data import LineData
5
7
  from credsweeper.file_handler.analysis_target import AnalysisTarget
6
- from credsweeper.filters import Filter, ValueEntropyBase64Check
7
- from credsweeper.utils import Util
8
+ from credsweeper.filters.filter import Filter
9
+ from credsweeper.filters.value_entropy_base64_check import ValueEntropyBase64Check
10
+ from credsweeper.utils.util import Util
8
11
 
9
12
 
10
13
  class ValueFilePathCheck(Filter):
@@ -17,7 +20,7 @@ class ValueFilePathCheck(Filter):
17
20
  unusual_windows_symbols_in_path = "\t\n\r!$@`&*(){}<>+=;,~^"
18
21
  unusual_linux_symbols_in_path = "\t\n\r!@`&*<>+=;,~^:\\"
19
22
 
20
- def __init__(self, config: Config = None) -> None:
23
+ def __init__(self, config: Optional[Config] = None) -> None:
21
24
  pass
22
25
 
23
26
  def run(self, line_data: LineData, target: AnalysisTarget) -> bool:
@@ -1,19 +1,20 @@
1
1
  import binascii
2
2
  import contextlib
3
+ from typing import Optional
3
4
 
4
5
  import base62
5
6
 
6
7
  from credsweeper.common.constants import ASCII
7
- from credsweeper.config import Config
8
- from credsweeper.credentials import LineData
8
+ from credsweeper.config.config import Config
9
+ from credsweeper.credentials.line_data import LineData
9
10
  from credsweeper.file_handler.analysis_target import AnalysisTarget
10
- from credsweeper.filters import Filter
11
+ from credsweeper.filters.filter import Filter
11
12
 
12
13
 
13
14
  class ValueGitHubCheck(Filter):
14
15
  """GitHub Classic Token validation"""
15
16
 
16
- def __init__(self, config: Config = None) -> None:
17
+ def __init__(self, config: Optional[Config] = None) -> None:
17
18
  pass
18
19
 
19
20
  def run(self, line_data: LineData, target: AnalysisTarget) -> bool:
@@ -1,17 +1,18 @@
1
1
  import contextlib
2
2
  import json
3
+ from typing import Optional
3
4
 
4
- from credsweeper.config import Config
5
- from credsweeper.credentials import LineData
5
+ from credsweeper.config.config import Config
6
+ from credsweeper.credentials.line_data import LineData
6
7
  from credsweeper.file_handler.analysis_target import AnalysisTarget
7
- from credsweeper.filters import Filter
8
- from credsweeper.utils import Util
8
+ from credsweeper.filters.filter import Filter
9
+ from credsweeper.utils.util import Util
9
10
 
10
11
 
11
12
  class ValueGrafanaCheck(Filter):
12
13
  """Grafana Provisioned API Key and Access Policy Token"""
13
14
 
14
- def __init__(self, config: Config = None) -> None:
15
+ def __init__(self, config: Optional[Config] = None) -> None:
15
16
  pass
16
17
 
17
18
  def run(self, line_data: LineData, target: AnalysisTarget) -> bool:
@@ -1,18 +1,19 @@
1
1
  import binascii
2
2
  import contextlib
3
3
  import struct
4
+ from typing import Optional
4
5
 
5
6
  from credsweeper.common.constants import ASCII
6
- from credsweeper.config import Config
7
- from credsweeper.credentials import LineData
7
+ from credsweeper.config.config import Config
8
+ from credsweeper.credentials.line_data import LineData
8
9
  from credsweeper.file_handler.analysis_target import AnalysisTarget
9
- from credsweeper.filters import Filter
10
+ from credsweeper.filters.filter import Filter
10
11
 
11
12
 
12
13
  class ValueGrafanaServiceCheck(Filter):
13
14
  """Check that candidate have a known structure"""
14
15
 
15
- def __init__(self, config: Config = None) -> None:
16
+ def __init__(self, config: Optional[Config] = None) -> None:
16
17
  pass
17
18
 
18
19
  def run(self, line_data: LineData, target: AnalysisTarget) -> bool:
@@ -1,9 +1,10 @@
1
1
  import re
2
+ from typing import Optional
2
3
 
3
- from credsweeper.config import Config
4
- from credsweeper.credentials import LineData
4
+ from credsweeper.config.config import Config
5
+ from credsweeper.credentials.line_data import LineData
5
6
  from credsweeper.file_handler.analysis_target import AnalysisTarget
6
- from credsweeper.filters import Filter
7
+ from credsweeper.filters.filter import Filter
7
8
 
8
9
 
9
10
  class ValueHexNumberCheck(Filter):
@@ -11,7 +12,7 @@ class ValueHexNumberCheck(Filter):
11
12
 
12
13
  HEX_08_64_VALUE_REGEX = re.compile(r"^0x[0-9a-f]{1,16}$")
13
14
 
14
- def __init__(self, config: Config = None) -> None:
15
+ def __init__(self, config: Optional[Config] = None) -> None:
15
16
  pass
16
17
 
17
18
  def run(self, line_data: LineData, target: AnalysisTarget) -> bool:
@@ -1,20 +1,21 @@
1
1
  import contextlib
2
2
  import re
3
+ from typing import Optional
3
4
 
4
5
  import base58
5
6
 
6
7
  from credsweeper.common.constants import ASCII
7
- from credsweeper.config import Config
8
- from credsweeper.credentials import LineData
8
+ from credsweeper.config.config import Config
9
+ from credsweeper.credentials.line_data import LineData
9
10
  from credsweeper.file_handler.analysis_target import AnalysisTarget
10
- from credsweeper.filters import Filter
11
- from credsweeper.utils import Util
11
+ from credsweeper.filters.filter import Filter
12
+ from credsweeper.utils.util import Util
12
13
 
13
14
 
14
15
  class ValueJfrogTokenCheck(Filter):
15
16
  """Check that candidate have a known structure JFROG token"""
16
17
 
17
- def __init__(self, config: Config = None) -> None:
18
+ def __init__(self, config: Optional[Config] = None) -> None:
18
19
  # reftkn:01:0123456789:abcdefGhijklmnoPqrstuVwxyz0
19
20
  self._pattern = re.compile(r"reftkn:\d+:\d+:[\w_/+-]+")
20
21
 
@@ -1,10 +1,11 @@
1
1
  import contextlib
2
+ from typing import Optional
2
3
 
3
- from credsweeper.config import Config
4
- from credsweeper.credentials import LineData
4
+ from credsweeper.config.config import Config
5
+ from credsweeper.credentials.line_data import LineData
5
6
  from credsweeper.file_handler.analysis_target import AnalysisTarget
6
- from credsweeper.filters import Filter
7
- from credsweeper.utils import Util
7
+ from credsweeper.filters.filter import Filter
8
+ from credsweeper.utils.util import Util
8
9
 
9
10
 
10
11
  class ValueJsonWebKeyCheck(Filter):
@@ -15,7 +16,7 @@ class ValueJsonWebKeyCheck(Filter):
15
16
  https://datatracker.ietf.org/doc/html/rfc7518
16
17
  """
17
18
 
18
- def __init__(self, config: Config = None) -> None:
19
+ def __init__(self, config: Optional[Config] = None) -> None:
19
20
  pass
20
21
 
21
22
  def run(self, line_data: LineData, target: AnalysisTarget) -> bool:
@@ -1,11 +1,12 @@
1
1
  import contextlib
2
2
  import json
3
+ from typing import Optional
3
4
 
4
- from credsweeper.config import Config
5
- from credsweeper.credentials import LineData
5
+ from credsweeper.config.config import Config
6
+ from credsweeper.credentials.line_data import LineData
6
7
  from credsweeper.file_handler.analysis_target import AnalysisTarget
7
- from credsweeper.filters import Filter
8
- from credsweeper.utils import Util
8
+ from credsweeper.filters.filter import Filter
9
+ from credsweeper.utils.util import Util
9
10
 
10
11
 
11
12
  class ValueJsonWebTokenCheck(Filter):
@@ -24,7 +25,7 @@ class ValueJsonWebTokenCheck(Filter):
24
25
  "ext", "crit", "keys", "id", "role", "token", "secret", "password", "nonce"
25
26
  }
26
27
 
27
- def __init__(self, config: Config = None) -> None:
28
+ def __init__(self, config: Optional[Config] = None) -> None:
28
29
  pass
29
30
 
30
31
  def run(self, line_data: LineData, target: AnalysisTarget) -> bool:
@@ -1,13 +1,15 @@
1
- from credsweeper.config import Config
2
- from credsweeper.credentials import LineData
1
+ from typing import Optional
2
+
3
+ from credsweeper.config.config import Config
4
+ from credsweeper.credentials.line_data import LineData
3
5
  from credsweeper.file_handler.analysis_target import AnalysisTarget
4
- from credsweeper.filters import Filter
6
+ from credsweeper.filters.filter import Filter
5
7
 
6
8
 
7
9
  class ValueLastWordCheck(Filter):
8
10
  """Check that secret is not short value that ends with `:`."""
9
11
 
10
- def __init__(self, config: Config = None) -> None:
12
+ def __init__(self, config: Optional[Config] = None) -> None:
11
13
  pass
12
14
 
13
15
  def run(self, line_data: LineData, target: AnalysisTarget) -> bool:
@@ -1,13 +1,19 @@
1
- from credsweeper.config import Config
2
- from credsweeper.credentials import LineData
1
+ from typing import Optional
2
+
3
+ from credsweeper.common.constants import MIN_VALUE_LENGTH, MAX_LINE_LENGTH
4
+ from credsweeper.config.config import Config
5
+ from credsweeper.credentials.line_data import LineData
3
6
  from credsweeper.file_handler.analysis_target import AnalysisTarget
4
- from credsweeper.filters import Filter
7
+ from credsweeper.filters.filter import Filter
5
8
 
6
9
 
7
- class ValueDictionaryValueLengthCheck(Filter):
8
- """Check that candidate length is between 5 and 30."""
10
+ class ValueLengthCheck(Filter):
11
+ """Check that candidate value length is between MIN and MAX."""
9
12
 
10
- def __init__(self, config: Config = None, min_len: int = 4, max_len: int = 31) -> None:
13
+ def __init__(self,
14
+ config: Optional[Config] = None,
15
+ min_len: int = MIN_VALUE_LENGTH,
16
+ max_len: int = MAX_LINE_LENGTH) -> None:
11
17
  self.min_len = min_len
12
18
  self.max_len = max_len
13
19
 
@@ -1,9 +1,10 @@
1
1
  import re
2
+ from typing import Optional
2
3
 
3
- from credsweeper.config import Config
4
- from credsweeper.credentials import LineData
4
+ from credsweeper.config.config import Config
5
+ from credsweeper.credentials.line_data import LineData
5
6
  from credsweeper.file_handler.analysis_target import AnalysisTarget
6
- from credsweeper.filters import Filter
7
+ from credsweeper.filters.filter import Filter
7
8
 
8
9
 
9
10
  class ValueMethodCheck(Filter):
@@ -14,7 +15,7 @@ class ValueMethodCheck(Filter):
14
15
 
15
16
  PATTERN = re.compile(r"^[~.\->:0-9A-Za-z_]+\(.*\)")
16
17
 
17
- def __init__(self, config: Config = None) -> None:
18
+ def __init__(self, config: Optional[Config] = None) -> None:
18
19
  pass
19
20
 
20
21
  def run(self, line_data: LineData, target: AnalysisTarget) -> bool:
@@ -1,10 +1,11 @@
1
1
  import re
2
+ from typing import Optional
2
3
 
3
- from credsweeper.config import Config
4
- from credsweeper.credentials import LineData
4
+ from credsweeper.config.config import Config
5
+ from credsweeper.credentials.line_data import LineData
5
6
  from credsweeper.file_handler.analysis_target import AnalysisTarget
6
- from credsweeper.filters import Filter
7
- from credsweeper.utils import Util
7
+ from credsweeper.filters.filter import Filter
8
+ from credsweeper.utils.util import Util
8
9
 
9
10
 
10
11
  class ValueNotAllowedPatternCheck(Filter):
@@ -15,7 +16,7 @@ class ValueNotAllowedPatternCheck(Filter):
15
16
  f"{Util.get_regex_combine_or(NOT_ALLOWED)}$", #
16
17
  flags=re.IGNORECASE)
17
18
 
18
- def __init__(self, config: Config = None) -> None:
19
+ def __init__(self, config: Optional[Config] = None) -> None:
19
20
  pass
20
21
 
21
22
  def run(self, line_data: LineData, target: AnalysisTarget) -> bool:
@@ -2,10 +2,10 @@ import re
2
2
  from typing import Optional
3
3
 
4
4
  from credsweeper.common import static_keyword_checklist
5
- from credsweeper.config import Config
6
- from credsweeper.credentials import LineData
5
+ from credsweeper.config.config import Config
6
+ from credsweeper.credentials.line_data import LineData
7
7
  from credsweeper.file_handler.analysis_target import AnalysisTarget
8
- from credsweeper.filters import Filter
8
+ from credsweeper.filters.filter import Filter
9
9
 
10
10
 
11
11
  class ValueNotPartEncodedCheck(Filter):
@@ -16,7 +16,7 @@ class ValueNotPartEncodedCheck(Filter):
16
16
  BASE64_ENCODED_DATA_PATTERN_AFTER = re.compile(
17
17
  r"(^|[^A-Za-z0-9]+)(?P<val>(([A-Za-z0-9=_-]{4}){4,64})|(([A-Za-z0-9=+/]{4}){4,64}))([^=A-Za-z0-9]+|$)")
18
18
 
19
- def __init__(self, config: Config = None) -> None:
19
+ def __init__(self, config: Optional[Config] = None) -> None:
20
20
  pass
21
21
 
22
22
  @staticmethod
@@ -1,9 +1,10 @@
1
1
  import re
2
+ from typing import Optional
2
3
 
3
- from credsweeper.config import Config
4
- from credsweeper.credentials import LineData
4
+ from credsweeper.config.config import Config
5
+ from credsweeper.credentials.line_data import LineData
5
6
  from credsweeper.file_handler.analysis_target import AnalysisTarget
6
- from credsweeper.filters import Filter
7
+ from credsweeper.filters.filter import Filter
7
8
 
8
9
 
9
10
  class ValueNumberCheck(Filter):
@@ -12,7 +13,7 @@ class ValueNumberCheck(Filter):
12
13
  HEX_VALUE_REGEX = re.compile("^(0x)?[0-9a-f]{1,128}[ul]{0,3}$")
13
14
  DEC_VALUE_REGEX = re.compile("^-?[0-9]{1,20}[ul]{0,3}$")
14
15
 
15
- def __init__(self, config: Config = None) -> None:
16
+ def __init__(self, config: Optional[Config] = None) -> None:
16
17
  pass
17
18
 
18
19
  def run(self, line_data: LineData, target: AnalysisTarget) -> bool:
@@ -1,10 +1,11 @@
1
1
  import re
2
+ from typing import Optional
2
3
 
3
- from credsweeper.common.constants import DEFAULT_PATTERN_LEN
4
- from credsweeper.config import Config
5
- from credsweeper.credentials import LineData
4
+ from credsweeper.common.constants import DEFAULT_PATTERN_LEN, MAX_LINE_LENGTH
5
+ from credsweeper.config.config import Config
6
+ from credsweeper.credentials.line_data import LineData
6
7
  from credsweeper.file_handler.analysis_target import AnalysisTarget
7
- from credsweeper.filters import Filter
8
+ from credsweeper.filters.filter import Filter
8
9
 
9
10
 
10
11
  class ValuePatternCheck(Filter):
@@ -22,36 +23,60 @@ class ValuePatternCheck(Filter):
22
23
  Default pattern LEN is 4
23
24
  """
24
25
 
25
- def __init__(self, config: Config = None, pattern_len: int = DEFAULT_PATTERN_LEN):
26
+ MAX_PATTERN_LENGTH = int(MAX_LINE_LENGTH).bit_length()
27
+
28
+ def __init__(self, config: Optional[Config] = None, pattern_len: Optional[int] = None):
26
29
  """Create ValuePatternCheck with a specific pattern_len to check.
27
30
 
28
31
  Args:
29
32
  config: pattern len to use during check. DEFAULT_PATTERN_LEN by default
33
+ pattern_len: size of constant pattern length for any value size or None for dynamic pattern size
30
34
 
31
35
  """
32
- self.pattern_len = pattern_len
33
- # use non whitespace symbol pattern
34
- self.pattern = re.compile(fr"(\S)\1{{{str(self.pattern_len - 1)},}}")
36
+ patterns_count = 1 + ValuePatternCheck.MAX_PATTERN_LENGTH
37
+ if pattern_len is None:
38
+ self.pattern_len = -1
39
+ # pattern length depends on value length
40
+ self.pattern_lengths = [max(x, DEFAULT_PATTERN_LEN) for x in range(patterns_count)]
41
+ self.patterns = [ValuePatternCheck.get_pattern(x) for x in range(patterns_count)]
42
+ elif isinstance(pattern_len, int) and DEFAULT_PATTERN_LEN <= pattern_len:
43
+ self.pattern_len = pattern_len
44
+ # constant pattern for any value length
45
+ self.pattern_lengths = [pattern_len] * patterns_count
46
+ self.patterns = [ValuePatternCheck.get_pattern(pattern_len)] * patterns_count
47
+ else:
48
+ raise ValueError(f"Wrong type of pattern length {type(pattern_len)} = {repr(pattern_len)}")
49
+
50
+ @staticmethod
51
+ def get_pattern(pattern_len: int) -> re.Pattern:
52
+ """Creates regex pattern to find N or more identical characters in sequence"""
53
+ if DEFAULT_PATTERN_LEN < pattern_len:
54
+ pattern = fr"(\S)\1{{{str(pattern_len - 1)},}}"
55
+ else:
56
+ pattern = r"(\S)\1{3,}"
57
+ return re.compile(pattern)
35
58
 
36
- def equal_pattern_check(self, value: str) -> bool:
59
+ def equal_pattern_check(self, value: str, bit_length: int) -> bool:
37
60
  """Check if candidate value contain 4 and more same chars or numbers sequences.
38
61
 
39
62
  Args:
40
63
  value: string variable, credential candidate value
64
+ bit_length: speedup for len(value).bit_length()
41
65
 
42
66
  Return:
43
67
  True if contain and False if not
44
68
 
45
69
  """
46
- if self.pattern.findall(value):
70
+ if self.patterns[bit_length].findall(value):
47
71
  return True
48
72
  return False
49
73
 
50
- def ascending_pattern_check(self, value: str) -> bool:
74
+ def ascending_pattern_check(self, value: str, bit_length: int) -> bool:
51
75
  """Check if candidate value contain 4 and more ascending chars or numbers sequences.
52
76
 
53
77
  Arg:
54
78
  value: credential candidate value
79
+ bit_length: speedup for len(value).bit_length()
55
80
 
56
81
  Return:
57
82
  True if contain and False if not
@@ -64,15 +89,16 @@ class ValuePatternCheck(Filter):
64
89
  else:
65
90
  count = 1
66
91
  continue
67
- if count == self.pattern_len:
92
+ if count == self.pattern_lengths[bit_length]:
68
93
  return True
69
94
  return False
70
95
 
71
- def descending_pattern_check(self, value: str) -> bool:
96
+ def descending_pattern_check(self, value: str, bit_length: int) -> bool:
72
97
  """Check if candidate value contain 4 and more descending chars or numbers sequences.
73
98
 
74
99
  Arg:
75
100
  value: string variable, credential candidate value
101
+ bit_length: speedup for len(value).bit_length()
76
102
 
77
103
  Return:
78
104
  boolean variable. True if contain and False if not
@@ -85,59 +111,44 @@ class ValuePatternCheck(Filter):
85
111
  else:
86
112
  count = 1
87
113
  continue
88
- if count == self.pattern_len:
114
+ if count == self.pattern_lengths[bit_length]:
89
115
  return True
90
116
  return False
91
117
 
92
- def check_val(self, value: str) -> bool:
118
+ def check_val(self, value: str, bit_length: int) -> bool:
93
119
  """Cumulative value check.
94
120
 
95
121
  Arg:
96
122
  value: string variable, credential candidate value
123
+ bit_length: speedup for len(value).bit_length()
97
124
 
98
125
  Return:
99
126
  boolean variable. True if contain and False if not
100
127
 
101
128
  """
102
- if self.equal_pattern_check(value):
129
+ if self.equal_pattern_check(value, bit_length):
103
130
  return True
104
- if self.ascending_pattern_check(value):
131
+ if self.ascending_pattern_check(value, bit_length):
105
132
  return True
106
- if self.descending_pattern_check(value):
133
+ if self.descending_pattern_check(value, bit_length):
107
134
  return True
108
135
  return False
109
136
 
110
- def duple_pattern_check(self, value: str) -> bool:
137
+ def duple_pattern_check(self, value: str, bit_length: int) -> bool:
111
138
  """Check if candidate value is a duplet value with possible patterns.
112
139
 
113
140
  Arg:
114
141
  value: string variable, credential candidate value
142
+ bit_length: speedup for len(value).bit_length()
115
143
 
116
144
  Return:
117
145
  boolean variable. True if contain and False if not
118
146
 
119
147
  """
120
- # 001122334455... case
121
- pair_duple = True
122
- # 0102030405... case
123
- even_duple = True
124
- even_prev = value[0]
125
148
  even_value = value[0::2]
126
- # 1020304050... case
127
- odd_duple = True
128
- odd_prev = value[1]
129
149
  odd_value = value[1::2]
130
- for even_i, odd_i in zip(even_value, odd_value):
131
- pair_duple &= even_i == odd_i
132
- even_duple &= even_i == even_prev
133
- odd_duple &= odd_i == odd_prev
134
- if not pair_duple and not even_duple and not odd_duple:
135
- break
136
- else:
137
- if pair_duple or odd_duple:
138
- return self.check_val(even_value)
139
- if even_duple:
140
- return self.check_val(odd_value)
150
+ if self.check_val(even_value, bit_length) and self.check_val(odd_value, bit_length):
151
+ return True
141
152
  return False
142
153
 
143
154
  def run(self, line_data: LineData, target: AnalysisTarget) -> bool:
@@ -151,13 +162,22 @@ class ValuePatternCheck(Filter):
151
162
  boolean variable. True, if need to filter candidate and False if left
152
163
 
153
164
  """
154
- if len(line_data.value) < self.pattern_len:
165
+ value_length = len(line_data.value)
166
+ bit_length = max(DEFAULT_PATTERN_LEN, value_length.bit_length())
167
+
168
+ if ValuePatternCheck.MAX_PATTERN_LENGTH < bit_length:
169
+ # huge values may contain anything
170
+ return False
171
+
172
+ if 0 <= value_length < self.pattern_len or value_length < self.pattern_lengths[bit_length]:
173
+ # too short value
155
174
  return True
156
175
 
157
- if self.check_val(line_data.value):
176
+ if self.check_val(line_data.value, bit_length):
158
177
  return True
159
178
 
160
- if 2 * self.pattern_len <= len(line_data.value) and self.duple_pattern_check(line_data.value):
179
+ if 2 * self.pattern_lengths[bit_length] <= value_length \
180
+ and self.duple_pattern_check(line_data.value, bit_length):
161
181
  return True
162
182
 
163
183
  return False
@@ -1,13 +1,15 @@
1
- from credsweeper.config import Config
2
- from credsweeper.credentials import LineData
1
+ from typing import Optional
2
+
3
+ from credsweeper.config.config import Config
4
+ from credsweeper.credentials.line_data import LineData
3
5
  from credsweeper.file_handler.analysis_target import AnalysisTarget
4
- from credsweeper.filters import Filter
6
+ from credsweeper.filters.filter import Filter
5
7
 
6
8
 
7
9
  class ValueSimilarityCheck(Filter):
8
10
  """Check if candidate value is at least 70% same as candidate keyword. Like: `secret = "mysecret"`."""
9
11
 
10
- def __init__(self, config: Config = None) -> None:
12
+ def __init__(self, config: Optional[Config] = None) -> None:
11
13
  pass
12
14
 
13
15
  def run(self, line_data: LineData, target: AnalysisTarget) -> bool:
@@ -1,16 +1,17 @@
1
+ from typing import Optional
1
2
  from typing import Union
2
3
 
3
4
  from credsweeper.common import static_keyword_checklist
4
- from credsweeper.config import Config
5
- from credsweeper.credentials import LineData
5
+ from credsweeper.config.config import Config
6
+ from credsweeper.credentials.line_data import LineData
6
7
  from credsweeper.file_handler.analysis_target import AnalysisTarget
7
- from credsweeper.filters import Filter
8
+ from credsweeper.filters.filter import Filter
8
9
 
9
10
 
10
11
  class ValueSplitKeywordCheck(Filter):
11
12
  """Check value by splitting with standard whitespace separators and any word is not matched in checklist."""
12
13
 
13
- def __init__(self, config: Config = None) -> None:
14
+ def __init__(self, config: Optional[Config] = None) -> None:
14
15
  pass
15
16
 
16
17
  def run(self, line_data: LineData, target: AnalysisTarget) -> bool: