credsweeper 1.11.5__py3-none-any.whl → 1.13.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of credsweeper might be problematic. Click here for more details.

Files changed (145) hide show
  1. credsweeper/__init__.py +21 -15
  2. credsweeper/__main__.py +158 -42
  3. credsweeper/app.py +18 -13
  4. credsweeper/common/keyword_pattern.py +19 -18
  5. credsweeper/common/morpheme_checklist.txt +28 -6
  6. credsweeper/config/__init__.py +0 -1
  7. credsweeper/config/config.py +4 -3
  8. credsweeper/credentials/__init__.py +0 -5
  9. credsweeper/credentials/augment_candidates.py +1 -1
  10. credsweeper/credentials/candidate.py +1 -1
  11. credsweeper/credentials/credential_manager.py +1 -1
  12. credsweeper/credentials/line_data.py +43 -8
  13. credsweeper/deep_scanner/__init__.py +0 -1
  14. credsweeper/deep_scanner/abstract_scanner.py +4 -3
  15. credsweeper/deep_scanner/byte_scanner.py +1 -1
  16. credsweeper/deep_scanner/bzip2_scanner.py +2 -2
  17. credsweeper/deep_scanner/csv_scanner.py +71 -0
  18. credsweeper/deep_scanner/deb_scanner.py +1 -1
  19. credsweeper/deep_scanner/deep_scanner.py +22 -12
  20. credsweeper/deep_scanner/docx_scanner.py +1 -1
  21. credsweeper/deep_scanner/eml_scanner.py +1 -1
  22. credsweeper/deep_scanner/encoder_scanner.py +1 -1
  23. credsweeper/deep_scanner/gzip_scanner.py +2 -2
  24. credsweeper/deep_scanner/html_scanner.py +1 -1
  25. credsweeper/deep_scanner/jclass_scanner.py +1 -1
  26. credsweeper/deep_scanner/jks_scanner.py +12 -3
  27. credsweeper/deep_scanner/lang_scanner.py +1 -1
  28. credsweeper/deep_scanner/lzma_scanner.py +2 -2
  29. credsweeper/deep_scanner/mxfile_scanner.py +1 -1
  30. credsweeper/deep_scanner/pdf_scanner.py +1 -1
  31. credsweeper/deep_scanner/pkcs_scanner.py +6 -2
  32. credsweeper/deep_scanner/pptx_scanner.py +1 -1
  33. credsweeper/deep_scanner/rpm_scanner.py +1 -1
  34. credsweeper/deep_scanner/rtf_scanner.py +41 -0
  35. credsweeper/deep_scanner/strings_scanner.py +52 -0
  36. credsweeper/deep_scanner/tar_scanner.py +2 -2
  37. credsweeper/deep_scanner/tmx_scanner.py +2 -2
  38. credsweeper/deep_scanner/xlsx_scanner.py +2 -2
  39. credsweeper/deep_scanner/xml_scanner.py +1 -1
  40. credsweeper/deep_scanner/zip_scanner.py +2 -2
  41. credsweeper/file_handler/__init__.py +0 -15
  42. credsweeper/file_handler/abstract_provider.py +3 -4
  43. credsweeper/file_handler/byte_content_provider.py +11 -2
  44. credsweeper/file_handler/content_provider.py +1 -1
  45. credsweeper/file_handler/data_content_provider.py +1 -1
  46. credsweeper/file_handler/diff_content_provider.py +133 -3
  47. credsweeper/file_handler/file_path_extractor.py +4 -2
  48. credsweeper/file_handler/files_provider.py +4 -4
  49. credsweeper/file_handler/patches_provider.py +7 -8
  50. credsweeper/file_handler/text_content_provider.py +8 -2
  51. credsweeper/filters/__init__.py +3 -4
  52. credsweeper/filters/filter.py +5 -3
  53. credsweeper/filters/group/__init__.py +0 -2
  54. credsweeper/filters/group/general_keyword.py +2 -2
  55. credsweeper/filters/group/general_pattern.py +2 -2
  56. credsweeper/filters/group/group.py +38 -36
  57. credsweeper/filters/group/password_keyword.py +9 -8
  58. credsweeper/filters/group/token_pattern.py +5 -5
  59. credsweeper/filters/group/url_credentials_group.py +8 -8
  60. credsweeper/filters/group/weird_base36_token.py +6 -6
  61. credsweeper/filters/group/weird_base64_token.py +5 -5
  62. credsweeper/filters/line_git_binary_check.py +5 -4
  63. credsweeper/filters/line_specific_key_check.py +6 -5
  64. credsweeper/filters/line_uue_part_check.py +5 -4
  65. credsweeper/filters/value_allowlist_check.py +6 -5
  66. credsweeper/filters/value_array_dictionary_check.py +8 -6
  67. credsweeper/filters/value_atlassian_token_check.py +6 -5
  68. credsweeper/filters/value_azure_token_check.py +6 -5
  69. credsweeper/filters/value_base32_data_check.py +8 -5
  70. credsweeper/filters/value_base64_data_check.py +6 -5
  71. credsweeper/filters/value_base64_encoded_pem_check.py +6 -5
  72. credsweeper/filters/value_base64_key_check.py +6 -5
  73. credsweeper/filters/value_base64_part_check.py +6 -5
  74. credsweeper/filters/value_basic_auth_check.py +37 -0
  75. credsweeper/filters/value_blocklist_check.py +6 -4
  76. credsweeper/filters/value_camel_case_check.py +8 -7
  77. credsweeper/filters/value_dictionary_keyword_check.py +6 -4
  78. credsweeper/filters/value_discord_bot_check.py +6 -5
  79. credsweeper/filters/value_entropy_base_check.py +6 -5
  80. credsweeper/filters/value_file_path_check.py +13 -8
  81. credsweeper/filters/value_github_check.py +8 -6
  82. credsweeper/filters/value_grafana_check.py +6 -5
  83. credsweeper/filters/value_grafana_service_check.py +5 -4
  84. credsweeper/filters/value_hex_number_check.py +5 -4
  85. credsweeper/filters/value_jfrog_token_check.py +6 -5
  86. credsweeper/filters/value_json_web_key_check.py +6 -5
  87. credsweeper/filters/value_json_web_token_check.py +6 -5
  88. credsweeper/filters/value_last_word_check.py +6 -4
  89. credsweeper/filters/{value_dictionary_value_length_check.py → value_length_check.py} +12 -6
  90. credsweeper/filters/value_method_check.py +5 -4
  91. credsweeper/filters/value_morphemes_check.py +43 -0
  92. credsweeper/filters/value_not_allowed_pattern_check.py +6 -5
  93. credsweeper/filters/value_not_part_encoded_check.py +4 -4
  94. credsweeper/filters/value_number_check.py +5 -4
  95. credsweeper/filters/value_pattern_check.py +61 -41
  96. credsweeper/filters/value_similarity_check.py +6 -4
  97. credsweeper/filters/value_split_keyword_check.py +5 -4
  98. credsweeper/filters/value_string_type_check.py +10 -7
  99. credsweeper/filters/value_token_base_check.py +5 -4
  100. credsweeper/filters/value_token_check.py +6 -5
  101. credsweeper/logger/__init__.py +0 -1
  102. credsweeper/logger/logger.py +1 -1
  103. credsweeper/ml_model/__init__.py +0 -1
  104. credsweeper/ml_model/features/__init__.py +1 -0
  105. credsweeper/ml_model/features/entropy_evaluation.py +1 -1
  106. credsweeper/ml_model/features/feature.py +2 -19
  107. credsweeper/ml_model/features/file_extension.py +2 -2
  108. credsweeper/ml_model/features/has_html_tag.py +12 -10
  109. credsweeper/ml_model/features/is_secret_numeric.py +5 -4
  110. credsweeper/ml_model/features/length_of_attribute.py +1 -1
  111. credsweeper/ml_model/features/morpheme_dense.py +15 -8
  112. credsweeper/ml_model/features/rule_name.py +2 -2
  113. credsweeper/ml_model/features/rule_severity.py +21 -0
  114. credsweeper/ml_model/features/search_in_attribute.py +1 -1
  115. credsweeper/ml_model/features/word_in.py +10 -33
  116. credsweeper/ml_model/features/word_in_path.py +6 -4
  117. credsweeper/ml_model/features/word_in_postamble.py +2 -5
  118. credsweeper/ml_model/features/word_in_preamble.py +2 -5
  119. credsweeper/ml_model/features/word_in_transition.py +2 -5
  120. credsweeper/ml_model/features/word_in_value.py +3 -4
  121. credsweeper/ml_model/features/word_in_variable.py +3 -4
  122. credsweeper/ml_model/ml_config.json +140 -27
  123. credsweeper/ml_model/ml_model.onnx +0 -0
  124. credsweeper/ml_model/ml_validator.py +4 -3
  125. credsweeper/rules/__init__.py +0 -1
  126. credsweeper/rules/config.yaml +329 -239
  127. credsweeper/rules/rule.py +4 -3
  128. credsweeper/scanner/__init__.py +0 -1
  129. credsweeper/scanner/scan_type/__init__.py +0 -5
  130. credsweeper/scanner/scan_type/multi_pattern.py +4 -4
  131. credsweeper/scanner/scan_type/pem_key_pattern.py +4 -4
  132. credsweeper/scanner/scan_type/scan_type.py +4 -4
  133. credsweeper/scanner/scan_type/single_pattern.py +4 -4
  134. credsweeper/scanner/scanner.py +24 -15
  135. credsweeper/secret/config.json +19 -6
  136. credsweeper/utils/__init__.py +0 -1
  137. credsweeper/utils/pem_key_detector.py +3 -3
  138. credsweeper/utils/util.py +24 -150
  139. {credsweeper-1.11.5.dist-info → credsweeper-1.13.3.dist-info}/METADATA +7 -7
  140. credsweeper-1.13.3.dist-info/RECORD +164 -0
  141. credsweeper/filters/value_couple_keyword_check.py +0 -26
  142. credsweeper-1.11.5.dist-info/RECORD +0 -159
  143. {credsweeper-1.11.5.dist-info → credsweeper-1.13.3.dist-info}/WHEEL +0 -0
  144. {credsweeper-1.11.5.dist-info → credsweeper-1.13.3.dist-info}/entry_points.txt +0 -0
  145. {credsweeper-1.11.5.dist-info → credsweeper-1.13.3.dist-info}/licenses/LICENSE +0 -0
@@ -1,7 +1,9 @@
1
- from credsweeper.config import Config
2
- from credsweeper.credentials import LineData
1
+ from typing import Optional
2
+
3
+ from credsweeper.config.config import Config
4
+ from credsweeper.credentials.line_data import LineData
3
5
  from credsweeper.file_handler.analysis_target import AnalysisTarget
4
- from credsweeper.filters import Filter
6
+ from credsweeper.filters.filter import Filter
5
7
 
6
8
 
7
9
  class ValueBlocklistCheck(Filter):
@@ -18,7 +20,7 @@ class ValueBlocklistCheck(Filter):
18
20
  "undefined",
19
21
  ]
20
22
 
21
- def __init__(self, config: Config = None) -> None:
23
+ def __init__(self, config: Optional[Config] = None) -> None:
22
24
  pass
23
25
 
24
26
  def run(self, line_data: LineData, target: AnalysisTarget) -> bool:
@@ -1,20 +1,21 @@
1
1
  import re
2
+ from typing import Optional
2
3
 
3
4
  from credsweeper.common import static_keyword_checklist
4
- from credsweeper.config import Config
5
- from credsweeper.credentials import LineData
5
+ from credsweeper.config.config import Config
6
+ from credsweeper.credentials.line_data import LineData
6
7
  from credsweeper.file_handler.analysis_target import AnalysisTarget
7
- from credsweeper.filters import Filter
8
- from credsweeper.utils import Util
8
+ from credsweeper.filters.filter import Filter
9
+ from credsweeper.utils.util import Util
9
10
 
10
11
 
11
12
  class ValueCamelCaseCheck(Filter):
12
13
  """Check that candidate is not written in camel case."""
13
14
 
14
- CAMEL_CASE = ["^([a-z]+([A-Z][a-z]+)+)$", "^([A-Z][a-z]+([A-Z][a-z]+)+)$"]
15
+ CAMEL_CASE = ["[a-z]+([A-Z][a-z]+)+", "[A-Z][a-z]+([A-Z][a-z]+)+"]
15
16
  CAMEL_CASE_PATTERN = re.compile(Util.get_regex_combine_or(CAMEL_CASE))
16
17
 
17
- def __init__(self, config: Config = None) -> None:
18
+ def __init__(self, config: Optional[Config] = None) -> None:
18
19
  pass
19
20
 
20
21
  def run(self, line_data: LineData, target: AnalysisTarget) -> bool:
@@ -30,7 +31,7 @@ class ValueCamelCaseCheck(Filter):
30
31
  """
31
32
  if line_data.is_well_quoted_value:
32
33
  return False
33
- if self.CAMEL_CASE_PATTERN.match(line_data.value):
34
+ if self.CAMEL_CASE_PATTERN.fullmatch(line_data.value):
34
35
  return static_keyword_checklist.check_morphemes(line_data.value.lower(), 1)
35
36
 
36
37
  return False
@@ -1,14 +1,16 @@
1
+ from typing import Optional
2
+
1
3
  from credsweeper.common import static_keyword_checklist
2
- from credsweeper.config import Config
3
- from credsweeper.credentials import LineData
4
+ from credsweeper.config.config import Config
5
+ from credsweeper.credentials.line_data import LineData
4
6
  from credsweeper.file_handler.analysis_target import AnalysisTarget
5
- from credsweeper.filters import Filter
7
+ from credsweeper.filters.filter import Filter
6
8
 
7
9
 
8
10
  class ValueDictionaryKeywordCheck(Filter):
9
11
  """Check that no word from dictionary present in the candidate value."""
10
12
 
11
- def __init__(self, config: Config = None) -> None:
13
+ def __init__(self, config: Optional[Config] = None) -> None:
12
14
  pass
13
15
 
14
16
  def run(self, line_data: LineData, target: AnalysisTarget) -> bool:
@@ -1,17 +1,18 @@
1
1
  import contextlib
2
+ from typing import Optional
2
3
 
3
- from credsweeper.config import Config
4
- from credsweeper.credentials import LineData
4
+ from credsweeper.config.config import Config
5
+ from credsweeper.credentials.line_data import LineData
5
6
  from credsweeper.file_handler.analysis_target import AnalysisTarget
6
- from credsweeper.filters import Filter
7
+ from credsweeper.filters.filter import Filter
7
8
  from credsweeper.filters.value_entropy_base64_check import ValueEntropyBase64Check
8
- from credsweeper.utils import Util
9
+ from credsweeper.utils.util import Util
9
10
 
10
11
 
11
12
  class ValueDiscordBotCheck(Filter):
12
13
  """Discord bot Token"""
13
14
 
14
- def __init__(self, config: Config = None) -> None:
15
+ def __init__(self, config: Optional[Config] = None) -> None:
15
16
  pass
16
17
 
17
18
  def run(self, line_data: LineData, target: AnalysisTarget) -> bool:
@@ -1,16 +1,17 @@
1
1
  from abc import abstractmethod
2
+ from typing import Optional
2
3
 
3
- from credsweeper.config import Config
4
- from credsweeper.credentials import LineData
4
+ from credsweeper.config.config import Config
5
+ from credsweeper.credentials.line_data import LineData
5
6
  from credsweeper.file_handler.analysis_target import AnalysisTarget
6
- from credsweeper.filters import Filter
7
- from credsweeper.utils import Util
7
+ from credsweeper.filters.filter import Filter
8
+ from credsweeper.utils.util import Util
8
9
 
9
10
 
10
11
  class ValueEntropyBaseCheck(Filter):
11
12
  """Check that candidate value has minimal Shanon Entropy for appropriated base"""
12
13
 
13
- def __init__(self, config: Config = None) -> None:
14
+ def __init__(self, config: Optional[Config] = None) -> None:
14
15
  pass
15
16
 
16
17
  @staticmethod
@@ -1,10 +1,13 @@
1
+ from typing import Optional
2
+
1
3
  from credsweeper.common import static_keyword_checklist
2
4
  from credsweeper.common.constants import Chars
3
- from credsweeper.config import Config
4
- from credsweeper.credentials import LineData
5
+ from credsweeper.config.config import Config
6
+ from credsweeper.credentials.line_data import LineData
5
7
  from credsweeper.file_handler.analysis_target import AnalysisTarget
6
- from credsweeper.filters import Filter, ValueEntropyBase64Check
7
- from credsweeper.utils import Util
8
+ from credsweeper.filters.filter import Filter
9
+ from credsweeper.filters.value_entropy_base64_check import ValueEntropyBase64Check
10
+ from credsweeper.utils.util import Util
8
11
 
9
12
 
10
13
  class ValueFilePathCheck(Filter):
@@ -17,7 +20,7 @@ class ValueFilePathCheck(Filter):
17
20
  unusual_windows_symbols_in_path = "\t\n\r!$@`&*(){}<>+=;,~^"
18
21
  unusual_linux_symbols_in_path = "\t\n\r!@`&*<>+=;,~^:\\"
19
22
 
20
- def __init__(self, config: Config = None) -> None:
23
+ def __init__(self, config: Optional[Config] = None) -> None:
21
24
  pass
22
25
 
23
26
  def run(self, line_data: LineData, target: AnalysisTarget) -> bool:
@@ -32,6 +35,8 @@ class ValueFilePathCheck(Filter):
32
35
 
33
36
  """
34
37
  value = line_data.value
38
+ bit_length = len(value).bit_length()
39
+ morpheme_threshold = 1 if 6 > bit_length else bit_length - 4
35
40
  contains_unix_separator = '/' in value
36
41
  if contains_unix_separator:
37
42
  if ("://" in value #
@@ -42,14 +47,14 @@ class ValueFilePathCheck(Filter):
42
47
  or value.startswith("//") and ':' == line_data.separator):
43
48
  # common case for url definition or aliases
44
49
  # or _keyword_://example.com where : is the separator
45
- return static_keyword_checklist.check_morphemes(value.lower(), 1)
50
+ return static_keyword_checklist.check_morphemes(value.lower(), morpheme_threshold)
46
51
  # base64 encoded data might look like linux path
47
52
  min_entropy = ValueEntropyBase64Check.get_min_data_entropy(len(value))
48
53
  # get minimal entropy to compare with shannon entropy of found value
49
54
  # min_entropy == 0 means that the value cannot be checked with the entropy due high variance
50
55
  for i in value:
51
56
  if i not in self.base64stdpad_possible_set:
52
- # value contains wrong BASE64STDPAD_CHARS symbols like -_
57
+ # value contains wrong BASE64STDPAD_CHARS symbols like -_.
53
58
  break
54
59
  else:
55
60
  # all symbols are from base64 alphabet
@@ -71,5 +76,5 @@ class ValueFilePathCheck(Filter):
71
76
  break
72
77
  else:
73
78
  if contains_unix_separator ^ contains_windows_separator:
74
- return static_keyword_checklist.check_morphemes(value.lower(), 1)
79
+ return static_keyword_checklist.check_morphemes(value.lower(), morpheme_threshold)
75
80
  return False
@@ -1,19 +1,20 @@
1
1
  import binascii
2
2
  import contextlib
3
+ from typing import Optional
3
4
 
4
5
  import base62
5
6
 
6
7
  from credsweeper.common.constants import ASCII
7
- from credsweeper.config import Config
8
- from credsweeper.credentials import LineData
8
+ from credsweeper.config.config import Config
9
+ from credsweeper.credentials.line_data import LineData
9
10
  from credsweeper.file_handler.analysis_target import AnalysisTarget
10
- from credsweeper.filters import Filter
11
+ from credsweeper.filters.filter import Filter
11
12
 
12
13
 
13
14
  class ValueGitHubCheck(Filter):
14
- """GitHub Classic Token validation"""
15
+ """NPM or GitHub Classic Token validation"""
15
16
 
16
- def __init__(self, config: Config = None) -> None:
17
+ def __init__(self, config: Optional[Config] = None) -> None:
17
18
  pass
18
19
 
19
20
  def run(self, line_data: LineData, target: AnalysisTarget) -> bool:
@@ -28,8 +29,9 @@ class ValueGitHubCheck(Filter):
28
29
 
29
30
  """
30
31
  # https://github.blog/2021-04-05-behind-githubs-new-authentication-token-formats/
32
+ # https://github.blog/security/announcing-npms-new-access-token-format/
31
33
  with contextlib.suppress(Exception):
32
- if line_data.value.startswith("gh") and '_' == line_data.value[3]:
34
+ if (line_data.value.startswith("gh") and '_' == line_data.value[3]) or line_data.value.startswith("npm_"):
33
35
  token = line_data.value[4:-6]
34
36
  data = token.encode(ASCII, errors="strict")
35
37
  crc32sum = binascii.crc32(data)
@@ -1,17 +1,18 @@
1
1
  import contextlib
2
2
  import json
3
+ from typing import Optional
3
4
 
4
- from credsweeper.config import Config
5
- from credsweeper.credentials import LineData
5
+ from credsweeper.config.config import Config
6
+ from credsweeper.credentials.line_data import LineData
6
7
  from credsweeper.file_handler.analysis_target import AnalysisTarget
7
- from credsweeper.filters import Filter
8
- from credsweeper.utils import Util
8
+ from credsweeper.filters.filter import Filter
9
+ from credsweeper.utils.util import Util
9
10
 
10
11
 
11
12
  class ValueGrafanaCheck(Filter):
12
13
  """Grafana Provisioned API Key and Access Policy Token"""
13
14
 
14
- def __init__(self, config: Config = None) -> None:
15
+ def __init__(self, config: Optional[Config] = None) -> None:
15
16
  pass
16
17
 
17
18
  def run(self, line_data: LineData, target: AnalysisTarget) -> bool:
@@ -1,18 +1,19 @@
1
1
  import binascii
2
2
  import contextlib
3
3
  import struct
4
+ from typing import Optional
4
5
 
5
6
  from credsweeper.common.constants import ASCII
6
- from credsweeper.config import Config
7
- from credsweeper.credentials import LineData
7
+ from credsweeper.config.config import Config
8
+ from credsweeper.credentials.line_data import LineData
8
9
  from credsweeper.file_handler.analysis_target import AnalysisTarget
9
- from credsweeper.filters import Filter
10
+ from credsweeper.filters.filter import Filter
10
11
 
11
12
 
12
13
  class ValueGrafanaServiceCheck(Filter):
13
14
  """Check that candidate have a known structure"""
14
15
 
15
- def __init__(self, config: Config = None) -> None:
16
+ def __init__(self, config: Optional[Config] = None) -> None:
16
17
  pass
17
18
 
18
19
  def run(self, line_data: LineData, target: AnalysisTarget) -> bool:
@@ -1,9 +1,10 @@
1
1
  import re
2
+ from typing import Optional
2
3
 
3
- from credsweeper.config import Config
4
- from credsweeper.credentials import LineData
4
+ from credsweeper.config.config import Config
5
+ from credsweeper.credentials.line_data import LineData
5
6
  from credsweeper.file_handler.analysis_target import AnalysisTarget
6
- from credsweeper.filters import Filter
7
+ from credsweeper.filters.filter import Filter
7
8
 
8
9
 
9
10
  class ValueHexNumberCheck(Filter):
@@ -11,7 +12,7 @@ class ValueHexNumberCheck(Filter):
11
12
 
12
13
  HEX_08_64_VALUE_REGEX = re.compile(r"^0x[0-9a-f]{1,16}$")
13
14
 
14
- def __init__(self, config: Config = None) -> None:
15
+ def __init__(self, config: Optional[Config] = None) -> None:
15
16
  pass
16
17
 
17
18
  def run(self, line_data: LineData, target: AnalysisTarget) -> bool:
@@ -1,20 +1,21 @@
1
1
  import contextlib
2
2
  import re
3
+ from typing import Optional
3
4
 
4
5
  import base58
5
6
 
6
7
  from credsweeper.common.constants import ASCII
7
- from credsweeper.config import Config
8
- from credsweeper.credentials import LineData
8
+ from credsweeper.config.config import Config
9
+ from credsweeper.credentials.line_data import LineData
9
10
  from credsweeper.file_handler.analysis_target import AnalysisTarget
10
- from credsweeper.filters import Filter
11
- from credsweeper.utils import Util
11
+ from credsweeper.filters.filter import Filter
12
+ from credsweeper.utils.util import Util
12
13
 
13
14
 
14
15
  class ValueJfrogTokenCheck(Filter):
15
16
  """Check that candidate have a known structure JFROG token"""
16
17
 
17
- def __init__(self, config: Config = None) -> None:
18
+ def __init__(self, config: Optional[Config] = None) -> None:
18
19
  # reftkn:01:0123456789:abcdefGhijklmnoPqrstuVwxyz0
19
20
  self._pattern = re.compile(r"reftkn:\d+:\d+:[\w_/+-]+")
20
21
 
@@ -1,10 +1,11 @@
1
1
  import contextlib
2
+ from typing import Optional
2
3
 
3
- from credsweeper.config import Config
4
- from credsweeper.credentials import LineData
4
+ from credsweeper.config.config import Config
5
+ from credsweeper.credentials.line_data import LineData
5
6
  from credsweeper.file_handler.analysis_target import AnalysisTarget
6
- from credsweeper.filters import Filter
7
- from credsweeper.utils import Util
7
+ from credsweeper.filters.filter import Filter
8
+ from credsweeper.utils.util import Util
8
9
 
9
10
 
10
11
  class ValueJsonWebKeyCheck(Filter):
@@ -15,7 +16,7 @@ class ValueJsonWebKeyCheck(Filter):
15
16
  https://datatracker.ietf.org/doc/html/rfc7518
16
17
  """
17
18
 
18
- def __init__(self, config: Config = None) -> None:
19
+ def __init__(self, config: Optional[Config] = None) -> None:
19
20
  pass
20
21
 
21
22
  def run(self, line_data: LineData, target: AnalysisTarget) -> bool:
@@ -1,11 +1,12 @@
1
1
  import contextlib
2
2
  import json
3
+ from typing import Optional
3
4
 
4
- from credsweeper.config import Config
5
- from credsweeper.credentials import LineData
5
+ from credsweeper.config.config import Config
6
+ from credsweeper.credentials.line_data import LineData
6
7
  from credsweeper.file_handler.analysis_target import AnalysisTarget
7
- from credsweeper.filters import Filter
8
- from credsweeper.utils import Util
8
+ from credsweeper.filters.filter import Filter
9
+ from credsweeper.utils.util import Util
9
10
 
10
11
 
11
12
  class ValueJsonWebTokenCheck(Filter):
@@ -24,7 +25,7 @@ class ValueJsonWebTokenCheck(Filter):
24
25
  "ext", "crit", "keys", "id", "role", "token", "secret", "password", "nonce"
25
26
  }
26
27
 
27
- def __init__(self, config: Config = None) -> None:
28
+ def __init__(self, config: Optional[Config] = None) -> None:
28
29
  pass
29
30
 
30
31
  def run(self, line_data: LineData, target: AnalysisTarget) -> bool:
@@ -1,13 +1,15 @@
1
- from credsweeper.config import Config
2
- from credsweeper.credentials import LineData
1
+ from typing import Optional
2
+
3
+ from credsweeper.config.config import Config
4
+ from credsweeper.credentials.line_data import LineData
3
5
  from credsweeper.file_handler.analysis_target import AnalysisTarget
4
- from credsweeper.filters import Filter
6
+ from credsweeper.filters.filter import Filter
5
7
 
6
8
 
7
9
  class ValueLastWordCheck(Filter):
8
10
  """Check that secret is not short value that ends with `:`."""
9
11
 
10
- def __init__(self, config: Config = None) -> None:
12
+ def __init__(self, config: Optional[Config] = None) -> None:
11
13
  pass
12
14
 
13
15
  def run(self, line_data: LineData, target: AnalysisTarget) -> bool:
@@ -1,13 +1,19 @@
1
- from credsweeper.config import Config
2
- from credsweeper.credentials import LineData
1
+ from typing import Optional
2
+
3
+ from credsweeper.common.constants import MIN_VALUE_LENGTH, MAX_LINE_LENGTH
4
+ from credsweeper.config.config import Config
5
+ from credsweeper.credentials.line_data import LineData
3
6
  from credsweeper.file_handler.analysis_target import AnalysisTarget
4
- from credsweeper.filters import Filter
7
+ from credsweeper.filters.filter import Filter
5
8
 
6
9
 
7
- class ValueDictionaryValueLengthCheck(Filter):
8
- """Check that candidate length is between 5 and 30."""
10
+ class ValueLengthCheck(Filter):
11
+ """Check that candidate value length is between MIN and MAX."""
9
12
 
10
- def __init__(self, config: Config = None, min_len: int = 4, max_len: int = 31) -> None:
13
+ def __init__(self,
14
+ config: Optional[Config] = None,
15
+ min_len: int = MIN_VALUE_LENGTH,
16
+ max_len: int = MAX_LINE_LENGTH) -> None:
11
17
  self.min_len = min_len
12
18
  self.max_len = max_len
13
19
 
@@ -1,9 +1,10 @@
1
1
  import re
2
+ from typing import Optional
2
3
 
3
- from credsweeper.config import Config
4
- from credsweeper.credentials import LineData
4
+ from credsweeper.config.config import Config
5
+ from credsweeper.credentials.line_data import LineData
5
6
  from credsweeper.file_handler.analysis_target import AnalysisTarget
6
- from credsweeper.filters import Filter
7
+ from credsweeper.filters.filter import Filter
7
8
 
8
9
 
9
10
  class ValueMethodCheck(Filter):
@@ -14,7 +15,7 @@ class ValueMethodCheck(Filter):
14
15
 
15
16
  PATTERN = re.compile(r"^[~.\->:0-9A-Za-z_]+\(.*\)")
16
17
 
17
- def __init__(self, config: Config = None) -> None:
18
+ def __init__(self, config: Optional[Config] = None) -> None:
18
19
  pass
19
20
 
20
21
  def run(self, line_data: LineData, target: AnalysisTarget) -> bool:
@@ -0,0 +1,43 @@
1
+ from typing import Optional
2
+
3
+ from credsweeper.common import static_keyword_checklist
4
+ from credsweeper.common.constants import MAX_LINE_LENGTH
5
+ from credsweeper.config.config import Config
6
+ from credsweeper.credentials.line_data import LineData
7
+ from credsweeper.file_handler.analysis_target import AnalysisTarget
8
+ from credsweeper.filters.filter import Filter
9
+
10
+
11
+ class ValueMorphemesCheck(Filter):
12
+ """Check value for a threshold of morphemes count"""
13
+
14
+ THRESHOLDS_X3 = int(MAX_LINE_LENGTH).bit_length()
15
+ # one morpheme is very likely to be random generated even for 3 symbols
16
+ MAX_MORPHEMES_LIMIT = max(1, THRESHOLDS_X3 - 4)
17
+
18
+ def __init__(self, config: Optional[Config] = None, threshold: Optional[int] = None) -> None:
19
+ # threshold - minimum morphemes number in a value
20
+ if threshold is None:
21
+ # use dynamic thresholds
22
+ self.thresholds = [max(1, x - 4) for x in range(ValueMorphemesCheck.THRESHOLDS_X3)]
23
+ elif isinstance(threshold, int) and 0 <= threshold:
24
+ # constant thresholds for any pattern
25
+ self.thresholds = [threshold] * ValueMorphemesCheck.THRESHOLDS_X3
26
+ else:
27
+ raise ValueError(f"Wrong type of pattern length {type(threshold)} = {repr(threshold)}")
28
+
29
+ def run(self, line_data: LineData, target: AnalysisTarget) -> bool:
30
+ """Run filter checks on received credential candidate data 'line_data'.
31
+
32
+ Args:
33
+ line_data: credential candidate data
34
+ target: multiline target from which line data was obtained
35
+
36
+ Return:
37
+ True, if need to filter candidate and False if left
38
+
39
+ """
40
+ threshold_id = len(line_data.value).bit_length()
41
+ # use the last (max) threshold in very huge value
42
+ threshold = self.thresholds[threshold_id] if len(self.thresholds) > threshold_id else self.thresholds[-1]
43
+ return static_keyword_checklist.check_morphemes(line_data.value.lower(), threshold)
@@ -1,10 +1,11 @@
1
1
  import re
2
+ from typing import Optional
2
3
 
3
- from credsweeper.config import Config
4
- from credsweeper.credentials import LineData
4
+ from credsweeper.config.config import Config
5
+ from credsweeper.credentials.line_data import LineData
5
6
  from credsweeper.file_handler.analysis_target import AnalysisTarget
6
- from credsweeper.filters import Filter
7
- from credsweeper.utils import Util
7
+ from credsweeper.filters.filter import Filter
8
+ from credsweeper.utils.util import Util
8
9
 
9
10
 
10
11
  class ValueNotAllowedPatternCheck(Filter):
@@ -15,7 +16,7 @@ class ValueNotAllowedPatternCheck(Filter):
15
16
  f"{Util.get_regex_combine_or(NOT_ALLOWED)}$", #
16
17
  flags=re.IGNORECASE)
17
18
 
18
- def __init__(self, config: Config = None) -> None:
19
+ def __init__(self, config: Optional[Config] = None) -> None:
19
20
  pass
20
21
 
21
22
  def run(self, line_data: LineData, target: AnalysisTarget) -> bool:
@@ -2,10 +2,10 @@ import re
2
2
  from typing import Optional
3
3
 
4
4
  from credsweeper.common import static_keyword_checklist
5
- from credsweeper.config import Config
6
- from credsweeper.credentials import LineData
5
+ from credsweeper.config.config import Config
6
+ from credsweeper.credentials.line_data import LineData
7
7
  from credsweeper.file_handler.analysis_target import AnalysisTarget
8
- from credsweeper.filters import Filter
8
+ from credsweeper.filters.filter import Filter
9
9
 
10
10
 
11
11
  class ValueNotPartEncodedCheck(Filter):
@@ -16,7 +16,7 @@ class ValueNotPartEncodedCheck(Filter):
16
16
  BASE64_ENCODED_DATA_PATTERN_AFTER = re.compile(
17
17
  r"(^|[^A-Za-z0-9]+)(?P<val>(([A-Za-z0-9=_-]{4}){4,64})|(([A-Za-z0-9=+/]{4}){4,64}))([^=A-Za-z0-9]+|$)")
18
18
 
19
- def __init__(self, config: Config = None) -> None:
19
+ def __init__(self, config: Optional[Config] = None) -> None:
20
20
  pass
21
21
 
22
22
  @staticmethod
@@ -1,9 +1,10 @@
1
1
  import re
2
+ from typing import Optional
2
3
 
3
- from credsweeper.config import Config
4
- from credsweeper.credentials import LineData
4
+ from credsweeper.config.config import Config
5
+ from credsweeper.credentials.line_data import LineData
5
6
  from credsweeper.file_handler.analysis_target import AnalysisTarget
6
- from credsweeper.filters import Filter
7
+ from credsweeper.filters.filter import Filter
7
8
 
8
9
 
9
10
  class ValueNumberCheck(Filter):
@@ -12,7 +13,7 @@ class ValueNumberCheck(Filter):
12
13
  HEX_VALUE_REGEX = re.compile("^(0x)?[0-9a-f]{1,128}[ul]{0,3}$")
13
14
  DEC_VALUE_REGEX = re.compile("^-?[0-9]{1,20}[ul]{0,3}$")
14
15
 
15
- def __init__(self, config: Config = None) -> None:
16
+ def __init__(self, config: Optional[Config] = None) -> None:
16
17
  pass
17
18
 
18
19
  def run(self, line_data: LineData, target: AnalysisTarget) -> bool: