credsweeper 1.11.5__py3-none-any.whl → 1.12.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of credsweeper might be problematic. Click here for more details.

Files changed (141) hide show
  1. credsweeper/__init__.py +21 -15
  2. credsweeper/__main__.py +141 -35
  3. credsweeper/app.py +11 -11
  4. credsweeper/common/keyword_pattern.py +13 -15
  5. credsweeper/common/morpheme_checklist.txt +2 -0
  6. credsweeper/config/__init__.py +0 -1
  7. credsweeper/config/config.py +3 -3
  8. credsweeper/credentials/__init__.py +0 -5
  9. credsweeper/credentials/augment_candidates.py +1 -1
  10. credsweeper/credentials/candidate.py +1 -1
  11. credsweeper/credentials/credential_manager.py +1 -1
  12. credsweeper/credentials/line_data.py +22 -2
  13. credsweeper/deep_scanner/__init__.py +0 -1
  14. credsweeper/deep_scanner/abstract_scanner.py +3 -3
  15. credsweeper/deep_scanner/byte_scanner.py +1 -1
  16. credsweeper/deep_scanner/bzip2_scanner.py +2 -2
  17. credsweeper/deep_scanner/deb_scanner.py +1 -1
  18. credsweeper/deep_scanner/deep_scanner.py +3 -3
  19. credsweeper/deep_scanner/docx_scanner.py +1 -1
  20. credsweeper/deep_scanner/eml_scanner.py +1 -1
  21. credsweeper/deep_scanner/encoder_scanner.py +1 -1
  22. credsweeper/deep_scanner/gzip_scanner.py +2 -2
  23. credsweeper/deep_scanner/html_scanner.py +1 -1
  24. credsweeper/deep_scanner/jclass_scanner.py +1 -1
  25. credsweeper/deep_scanner/jks_scanner.py +1 -1
  26. credsweeper/deep_scanner/lang_scanner.py +1 -1
  27. credsweeper/deep_scanner/lzma_scanner.py +2 -2
  28. credsweeper/deep_scanner/mxfile_scanner.py +1 -1
  29. credsweeper/deep_scanner/pdf_scanner.py +1 -1
  30. credsweeper/deep_scanner/pkcs_scanner.py +2 -2
  31. credsweeper/deep_scanner/pptx_scanner.py +1 -1
  32. credsweeper/deep_scanner/rpm_scanner.py +1 -1
  33. credsweeper/deep_scanner/tar_scanner.py +2 -2
  34. credsweeper/deep_scanner/tmx_scanner.py +2 -2
  35. credsweeper/deep_scanner/xlsx_scanner.py +2 -2
  36. credsweeper/deep_scanner/xml_scanner.py +1 -1
  37. credsweeper/deep_scanner/zip_scanner.py +2 -2
  38. credsweeper/file_handler/__init__.py +0 -15
  39. credsweeper/file_handler/abstract_provider.py +3 -4
  40. credsweeper/file_handler/byte_content_provider.py +1 -1
  41. credsweeper/file_handler/content_provider.py +1 -1
  42. credsweeper/file_handler/data_content_provider.py +1 -1
  43. credsweeper/file_handler/diff_content_provider.py +133 -3
  44. credsweeper/file_handler/file_path_extractor.py +2 -2
  45. credsweeper/file_handler/files_provider.py +4 -4
  46. credsweeper/file_handler/patches_provider.py +7 -8
  47. credsweeper/file_handler/text_content_provider.py +1 -1
  48. credsweeper/filters/__init__.py +2 -3
  49. credsweeper/filters/filter.py +5 -3
  50. credsweeper/filters/group/__init__.py +0 -2
  51. credsweeper/filters/group/general_keyword.py +2 -2
  52. credsweeper/filters/group/general_pattern.py +2 -2
  53. credsweeper/filters/group/group.py +38 -36
  54. credsweeper/filters/group/password_keyword.py +9 -8
  55. credsweeper/filters/group/token_pattern.py +3 -3
  56. credsweeper/filters/group/url_credentials_group.py +8 -8
  57. credsweeper/filters/group/weird_base36_token.py +3 -3
  58. credsweeper/filters/group/weird_base64_token.py +3 -3
  59. credsweeper/filters/line_git_binary_check.py +5 -4
  60. credsweeper/filters/line_specific_key_check.py +6 -5
  61. credsweeper/filters/line_uue_part_check.py +5 -4
  62. credsweeper/filters/value_allowlist_check.py +6 -5
  63. credsweeper/filters/value_array_dictionary_check.py +8 -6
  64. credsweeper/filters/value_atlassian_token_check.py +6 -5
  65. credsweeper/filters/value_azure_token_check.py +6 -5
  66. credsweeper/filters/value_base32_data_check.py +8 -5
  67. credsweeper/filters/value_base64_data_check.py +6 -5
  68. credsweeper/filters/value_base64_encoded_pem_check.py +6 -5
  69. credsweeper/filters/value_base64_key_check.py +6 -5
  70. credsweeper/filters/value_base64_part_check.py +6 -5
  71. credsweeper/filters/value_basic_auth_check.py +37 -0
  72. credsweeper/filters/value_blocklist_check.py +6 -4
  73. credsweeper/filters/value_camel_case_check.py +6 -5
  74. credsweeper/filters/value_couple_keyword_check.py +6 -4
  75. credsweeper/filters/value_dictionary_keyword_check.py +6 -4
  76. credsweeper/filters/value_discord_bot_check.py +6 -5
  77. credsweeper/filters/value_entropy_base_check.py +6 -5
  78. credsweeper/filters/value_file_path_check.py +8 -5
  79. credsweeper/filters/value_github_check.py +5 -4
  80. credsweeper/filters/value_grafana_check.py +6 -5
  81. credsweeper/filters/value_grafana_service_check.py +5 -4
  82. credsweeper/filters/value_hex_number_check.py +5 -4
  83. credsweeper/filters/value_jfrog_token_check.py +6 -5
  84. credsweeper/filters/value_json_web_key_check.py +6 -5
  85. credsweeper/filters/value_json_web_token_check.py +6 -5
  86. credsweeper/filters/value_last_word_check.py +6 -4
  87. credsweeper/filters/{value_dictionary_value_length_check.py → value_length_check.py} +12 -6
  88. credsweeper/filters/value_method_check.py +5 -4
  89. credsweeper/filters/value_not_allowed_pattern_check.py +6 -5
  90. credsweeper/filters/value_not_part_encoded_check.py +4 -4
  91. credsweeper/filters/value_number_check.py +5 -4
  92. credsweeper/filters/value_pattern_check.py +61 -41
  93. credsweeper/filters/value_similarity_check.py +6 -4
  94. credsweeper/filters/value_split_keyword_check.py +5 -4
  95. credsweeper/filters/value_string_type_check.py +9 -7
  96. credsweeper/filters/value_token_base_check.py +5 -4
  97. credsweeper/filters/value_token_check.py +6 -5
  98. credsweeper/logger/__init__.py +0 -1
  99. credsweeper/logger/logger.py +1 -1
  100. credsweeper/ml_model/__init__.py +0 -1
  101. credsweeper/ml_model/features/__init__.py +1 -0
  102. credsweeper/ml_model/features/entropy_evaluation.py +1 -1
  103. credsweeper/ml_model/features/feature.py +1 -1
  104. credsweeper/ml_model/features/file_extension.py +1 -1
  105. credsweeper/ml_model/features/has_html_tag.py +2 -2
  106. credsweeper/ml_model/features/is_secret_numeric.py +1 -1
  107. credsweeper/ml_model/features/length_of_attribute.py +1 -1
  108. credsweeper/ml_model/features/morpheme_dense.py +15 -8
  109. credsweeper/ml_model/features/rule_name.py +1 -1
  110. credsweeper/ml_model/features/rule_severity.py +21 -0
  111. credsweeper/ml_model/features/search_in_attribute.py +1 -1
  112. credsweeper/ml_model/features/word_in.py +1 -1
  113. credsweeper/ml_model/features/word_in_path.py +5 -2
  114. credsweeper/ml_model/features/word_in_postamble.py +1 -1
  115. credsweeper/ml_model/features/word_in_preamble.py +1 -1
  116. credsweeper/ml_model/features/word_in_transition.py +1 -1
  117. credsweeper/ml_model/features/word_in_value.py +1 -1
  118. credsweeper/ml_model/features/word_in_variable.py +1 -1
  119. credsweeper/ml_model/ml_config.json +130 -24
  120. credsweeper/ml_model/ml_model.onnx +0 -0
  121. credsweeper/ml_model/ml_validator.py +3 -2
  122. credsweeper/rules/__init__.py +0 -1
  123. credsweeper/rules/config.yaml +116 -42
  124. credsweeper/rules/rule.py +4 -3
  125. credsweeper/scanner/__init__.py +0 -1
  126. credsweeper/scanner/scan_type/__init__.py +0 -5
  127. credsweeper/scanner/scan_type/multi_pattern.py +4 -4
  128. credsweeper/scanner/scan_type/pem_key_pattern.py +4 -4
  129. credsweeper/scanner/scan_type/scan_type.py +4 -4
  130. credsweeper/scanner/scan_type/single_pattern.py +4 -4
  131. credsweeper/scanner/scanner.py +12 -8
  132. credsweeper/secret/config.json +2 -2
  133. credsweeper/utils/__init__.py +0 -1
  134. credsweeper/utils/pem_key_detector.py +3 -3
  135. credsweeper/utils/util.py +3 -132
  136. {credsweeper-1.11.5.dist-info → credsweeper-1.12.0.dist-info}/METADATA +1 -1
  137. credsweeper-1.12.0.dist-info/RECORD +161 -0
  138. credsweeper-1.11.5.dist-info/RECORD +0 -159
  139. {credsweeper-1.11.5.dist-info → credsweeper-1.12.0.dist-info}/WHEEL +0 -0
  140. {credsweeper-1.11.5.dist-info → credsweeper-1.12.0.dist-info}/entry_points.txt +0 -0
  141. {credsweeper-1.11.5.dist-info → credsweeper-1.12.0.dist-info}/licenses/LICENSE +0 -0
@@ -2,23 +2,53 @@ from abc import ABC
2
2
  from typing import List
3
3
 
4
4
  from credsweeper.common.constants import GroupType
5
- from credsweeper.config import Config
6
- from credsweeper.filters import (Filter, LineSpecificKeyCheck, ValueAllowlistCheck, ValueArrayDictionaryCheck,
7
- ValueBlocklistCheck, ValueCamelCaseCheck, ValueFilePathCheck, ValueLastWordCheck,
8
- ValueMethodCheck, ValueNotAllowedPatternCheck, ValuePatternCheck, ValueSimilarityCheck,
9
- ValueStringTypeCheck, ValueTokenCheck, ValueHexNumberCheck)
5
+ from credsweeper.config.config import Config
6
+ from credsweeper.filters.filter import Filter
7
+ from credsweeper.filters.line_specific_key_check import LineSpecificKeyCheck
8
+ from credsweeper.filters.value_allowlist_check import ValueAllowlistCheck
9
+ from credsweeper.filters.value_array_dictionary_check import ValueArrayDictionaryCheck
10
+ from credsweeper.filters.value_blocklist_check import ValueBlocklistCheck
11
+ from credsweeper.filters.value_camel_case_check import ValueCamelCaseCheck
12
+ from credsweeper.filters.value_file_path_check import ValueFilePathCheck
13
+ from credsweeper.filters.value_hex_number_check import ValueHexNumberCheck
14
+ from credsweeper.filters.value_last_word_check import ValueLastWordCheck
15
+ from credsweeper.filters.value_method_check import ValueMethodCheck
16
+ from credsweeper.filters.value_not_allowed_pattern_check import ValueNotAllowedPatternCheck
17
+ from credsweeper.filters.value_pattern_check import ValuePatternCheck
18
+ from credsweeper.filters.value_similarity_check import ValueSimilarityCheck
19
+ from credsweeper.filters.value_string_type_check import ValueStringTypeCheck
20
+ from credsweeper.filters.value_token_check import ValueTokenCheck
10
21
 
11
22
 
12
23
  class Group(ABC):
13
24
  """Abstract Group class"""
14
25
 
15
26
  def __init__(self, config: Config, rule_type: GroupType = GroupType.DEFAULT) -> None:
27
+ """Config is required for filter group"""
16
28
  if rule_type == GroupType.KEYWORD:
17
- self.filters: List[Filter] = self.get_keyword_base_filters(config)
29
+ self.__filters = [ #
30
+ ValueAllowlistCheck(), #
31
+ ValueArrayDictionaryCheck(), #
32
+ ValueBlocklistCheck(), #
33
+ ValueCamelCaseCheck(), #
34
+ ValueFilePathCheck(), #
35
+ ValueHexNumberCheck(), #
36
+ ValueLastWordCheck(), #
37
+ ValueMethodCheck(), #
38
+ ValueSimilarityCheck(), #
39
+ ValueStringTypeCheck(check_for_literals=config.check_for_literals), #
40
+ ValueTokenCheck(), #
41
+ ]
42
+ if not config.doc:
43
+ self.__filters.extend([ValuePatternCheck(), ValueNotAllowedPatternCheck()])
18
44
  elif rule_type == GroupType.PATTERN:
19
- self.filters: List[Filter] = self.get_pattern_base_filters(config)
45
+ self.__filters = [ #
46
+ LineSpecificKeyCheck(), #
47
+ ValuePatternCheck(), #
48
+ ]
20
49
  else:
21
- self.filters: List[Filter] = []
50
+ # GroupType.DEFAULT
51
+ self.__filters = []
22
52
 
23
53
  @property
24
54
  def filters(self) -> List[Filter]:
@@ -29,31 +59,3 @@ class Group(ABC):
29
59
  def filters(self, filters: List[Filter]) -> None:
30
60
  """property setter"""
31
61
  self.__filters = filters
32
-
33
- @staticmethod
34
- def get_keyword_base_filters(config: Config) -> List[Filter]:
35
- """returns base filters"""
36
- filters = [ #
37
- ValueAllowlistCheck(),
38
- ValueArrayDictionaryCheck(),
39
- ValueBlocklistCheck(),
40
- ValueCamelCaseCheck(),
41
- ValueFilePathCheck(),
42
- ValueHexNumberCheck(),
43
- ValueLastWordCheck(),
44
- ValueMethodCheck(),
45
- ValueSimilarityCheck(),
46
- ValueStringTypeCheck(config),
47
- ValueTokenCheck(),
48
- ]
49
- if not config.doc:
50
- filters.extend([ValuePatternCheck(pattern_len=config.pattern_len), ValueNotAllowedPatternCheck()])
51
- return filters
52
-
53
- @staticmethod
54
- def get_pattern_base_filters(config: Config) -> List[Filter]:
55
- """return base filters for pattern"""
56
- return [ #
57
- LineSpecificKeyCheck(), #
58
- ValuePatternCheck(pattern_len=config.pattern_len), #
59
- ]
@@ -1,8 +1,8 @@
1
1
  from credsweeper.common.constants import GroupType
2
- from credsweeper.config import Config
3
- from credsweeper.filters import ValueDictionaryValueLengthCheck, LineGitBinaryCheck
2
+ from credsweeper.config.config import Config
3
+ from credsweeper.filters import ValueLengthCheck, LineGitBinaryCheck
4
4
  from credsweeper.filters import ValueSplitKeywordCheck
5
- from credsweeper.filters.group import Group
5
+ from credsweeper.filters.group.group import Group
6
6
  from credsweeper.filters.line_uue_part_check import LineUUEPartCheck
7
7
 
8
8
 
@@ -11,8 +11,9 @@ class PasswordKeyword(Group):
11
11
 
12
12
  def __init__(self, config: Config) -> None:
13
13
  super().__init__(config, GroupType.KEYWORD)
14
- self.filters.extend(
15
- [ValueDictionaryValueLengthCheck(),
16
- ValueSplitKeywordCheck(),
17
- LineGitBinaryCheck(),
18
- LineUUEPartCheck()])
14
+ self.filters.extend([
15
+ ValueLengthCheck(max_len=config.max_password_value_length),
16
+ ValueSplitKeywordCheck(),
17
+ LineGitBinaryCheck(),
18
+ LineUUEPartCheck()
19
+ ])
@@ -1,7 +1,7 @@
1
1
  from credsweeper.common.constants import GroupType
2
- from credsweeper.config import Config
2
+ from credsweeper.config.config import Config
3
3
  from credsweeper.filters import ValueCoupleKeywordCheck, ValueCamelCaseCheck, ValueNumberCheck, ValuePatternCheck
4
- from credsweeper.filters.group import Group
4
+ from credsweeper.filters.group.group import Group
5
5
 
6
6
 
7
7
  class TokenPattern(Group):
@@ -13,5 +13,5 @@ class TokenPattern(Group):
13
13
  ValueCoupleKeywordCheck(),
14
14
  ValueNumberCheck(),
15
15
  ValueCamelCaseCheck(),
16
- ValuePatternCheck(pattern_len=config.pattern_len)
16
+ ValuePatternCheck(),
17
17
  ]
@@ -1,10 +1,10 @@
1
1
  from credsweeper.common.constants import GroupType
2
- from credsweeper.config import Config
2
+ from credsweeper.config.config import Config
3
3
  from credsweeper.filters import (ValueAllowlistCheck, ValueArrayDictionaryCheck, ValueBlocklistCheck,
4
- ValueCamelCaseCheck, ValueDictionaryValueLengthCheck, ValueFilePathCheck,
5
- ValueLastWordCheck, ValueMethodCheck, ValueNotAllowedPatternCheck, ValuePatternCheck,
6
- ValueStringTypeCheck, ValueTokenCheck)
7
- from credsweeper.filters.group import Group
4
+ ValueCamelCaseCheck, ValueLengthCheck, ValueFilePathCheck, ValueLastWordCheck,
5
+ ValueMethodCheck, ValueNotAllowedPatternCheck, ValuePatternCheck, ValueStringTypeCheck,
6
+ ValueTokenCheck)
7
+ from credsweeper.filters.group.group import Group
8
8
 
9
9
 
10
10
  class UrlCredentialsGroup(Group):
@@ -25,9 +25,9 @@ class UrlCredentialsGroup(Group):
25
25
  ValueFilePathCheck(),
26
26
  ValueLastWordCheck(),
27
27
  ValueMethodCheck(),
28
- ValueStringTypeCheck(config),
28
+ ValueStringTypeCheck(check_for_literals=config.check_for_literals),
29
29
  ValueNotAllowedPatternCheck(),
30
30
  ValueTokenCheck(),
31
- ValueDictionaryValueLengthCheck(min_len=4, max_len=80),
32
- ValuePatternCheck(pattern_len=config.pattern_len)
31
+ ValueLengthCheck(max_len=config.max_url_cred_value_length),
32
+ ValuePatternCheck()
33
33
  ]
@@ -1,8 +1,8 @@
1
1
  from credsweeper.common.constants import GroupType
2
- from credsweeper.config import Config
2
+ from credsweeper.config.config import Config
3
3
  from credsweeper.filters import ValueCoupleKeywordCheck, ValuePatternCheck, ValueNumberCheck, ValueEntropyBase36Check, \
4
4
  ValueTokenBase36Check
5
- from credsweeper.filters.group import Group
5
+ from credsweeper.filters.group.group import Group
6
6
 
7
7
 
8
8
  class WeirdBase36Token(Group):
@@ -12,7 +12,7 @@ class WeirdBase36Token(Group):
12
12
  super().__init__(config, GroupType.DEFAULT)
13
13
  self.filters = [
14
14
  ValueCoupleKeywordCheck(),
15
- ValuePatternCheck(config),
15
+ ValuePatternCheck(),
16
16
  ValueNumberCheck(),
17
17
  ValueTokenBase36Check(),
18
18
  ValueEntropyBase36Check()
@@ -1,9 +1,9 @@
1
1
  from credsweeper.common.constants import GroupType
2
- from credsweeper.config import Config
2
+ from credsweeper.config.config import Config
3
3
  from credsweeper.filters import ValueCoupleKeywordCheck, ValueNotPartEncodedCheck, \
4
4
  ValueBase64DataCheck, ValueEntropyBase64Check, ValuePatternCheck, ValueNumberCheck, ValueTokenBase64Check, \
5
5
  ValueBase64PartCheck
6
- from credsweeper.filters.group import Group
6
+ from credsweeper.filters.group.group import Group
7
7
 
8
8
 
9
9
  class WeirdBase64Token(Group):
@@ -17,7 +17,7 @@ class WeirdBase64Token(Group):
17
17
  ValueBase64DataCheck(),
18
18
  ValueTokenBase64Check(),
19
19
  ValueEntropyBase64Check(),
20
- ValuePatternCheck(config),
20
+ ValuePatternCheck(),
21
21
  ValueNotPartEncodedCheck(),
22
22
  ValueBase64PartCheck(),
23
23
  ]
@@ -1,18 +1,19 @@
1
1
  import base64
2
2
  import contextlib
3
3
  import re
4
+ from typing import Optional
4
5
 
5
- from credsweeper.config import Config
6
- from credsweeper.credentials import LineData
6
+ from credsweeper.config.config import Config
7
+ from credsweeper.credentials.line_data import LineData
7
8
  from credsweeper.file_handler.analysis_target import AnalysisTarget
8
- from credsweeper.filters import Filter
9
+ from credsweeper.filters.filter import Filter
9
10
 
10
11
 
11
12
  class LineGitBinaryCheck(Filter):
12
13
  """Checks that line is not a part of git binary patch"""
13
14
  base85string = re.compile(r"^[A-Za-z][0-9A-Za-z!#$%&()*+;<=>?@^_`{|}~-]{6,65}$")
14
15
 
15
- def __init__(self, config: Config = None) -> None:
16
+ def __init__(self, config: Optional[Config] = None) -> None:
16
17
  pass
17
18
 
18
19
  def run(self, line_data: LineData, target: AnalysisTarget) -> bool:
@@ -1,11 +1,12 @@
1
1
  import re
2
+ from typing import Optional
2
3
 
3
4
  from credsweeper.common.constants import ML_HUNK
4
- from credsweeper.config import Config
5
- from credsweeper.credentials import LineData
5
+ from credsweeper.config.config import Config
6
+ from credsweeper.credentials.line_data import LineData
6
7
  from credsweeper.file_handler.analysis_target import AnalysisTarget
7
- from credsweeper.filters import Filter
8
- from credsweeper.utils import Util
8
+ from credsweeper.filters.filter import Filter
9
+ from credsweeper.utils.util import Util
9
10
 
10
11
 
11
12
  class LineSpecificKeyCheck(Filter):
@@ -14,7 +15,7 @@ class LineSpecificKeyCheck(Filter):
14
15
  NOT_ALLOWED = [r"example", r"\benc[\(\[]", r"\btrue\b", r"\bfalse\b"]
15
16
  NOT_ALLOWED_PATTERN = re.compile(Util.get_regex_combine_or(NOT_ALLOWED), re.IGNORECASE)
16
17
 
17
- def __init__(self, config: Config = None) -> None:
18
+ def __init__(self, config: Optional[Config] = None) -> None:
18
19
  pass
19
20
 
20
21
  def run(self, line_data: LineData, target: AnalysisTarget) -> bool:
@@ -1,16 +1,17 @@
1
1
  import re
2
+ from typing import Optional
2
3
 
3
- from credsweeper.config import Config
4
- from credsweeper.credentials import LineData
4
+ from credsweeper.config.config import Config
5
+ from credsweeper.credentials.line_data import LineData
5
6
  from credsweeper.file_handler.analysis_target import AnalysisTarget
6
- from credsweeper.filters import Filter
7
+ from credsweeper.filters.filter import Filter
7
8
 
8
9
 
9
10
  class LineUUEPartCheck(Filter):
10
11
  """Checks that line is not a part of UU encoding only for maximal line"""
11
12
  uue_string = re.compile(r"^M[!-`]{60}$")
12
13
 
13
- def __init__(self, config: Config = None) -> None:
14
+ def __init__(self, config: Optional[Config] = None) -> None:
14
15
  pass
15
16
 
16
17
  def run(self, line_data: LineData, target: AnalysisTarget) -> bool:
@@ -1,10 +1,11 @@
1
1
  import re
2
+ from typing import Optional
2
3
 
3
- from credsweeper.config import Config
4
- from credsweeper.credentials import LineData
4
+ from credsweeper.config.config import Config
5
+ from credsweeper.credentials.line_data import LineData
5
6
  from credsweeper.file_handler.analysis_target import AnalysisTarget
6
- from credsweeper.filters import Filter
7
- from credsweeper.utils import Util
7
+ from credsweeper.filters.filter import Filter
8
+ from credsweeper.utils.util import Util
8
9
 
9
10
 
10
11
  class ValueAllowlistCheck(Filter):
@@ -40,7 +41,7 @@ class ValueAllowlistCheck(Filter):
40
41
 
41
42
  ALLOWED_UNQUOTED_PATTERN = re.compile(Util.get_regex_combine_or(ALLOWED_UNQUOTED), flags=re.IGNORECASE)
42
43
 
43
- def __init__(self, config: Config = None) -> None:
44
+ def __init__(self, config: Optional[Config] = None) -> None:
44
45
  pass
45
46
 
46
47
  def run(self, line_data: LineData, target: AnalysisTarget) -> bool:
@@ -1,9 +1,10 @@
1
1
  import re
2
+ from typing import Optional
2
3
 
3
- from credsweeper.config import Config
4
- from credsweeper.credentials import LineData
4
+ from credsweeper.config.config import Config
5
+ from credsweeper.credentials.line_data import LineData
5
6
  from credsweeper.file_handler.analysis_target import AnalysisTarget
6
- from credsweeper.filters import Filter
7
+ from credsweeper.filters.filter import Filter
7
8
 
8
9
 
9
10
  class ValueArrayDictionaryCheck(Filter):
@@ -14,9 +15,9 @@ class ValueArrayDictionaryCheck(Filter):
14
15
  `token = {'root'}` would be kept
15
16
  """
16
17
 
17
- PATTERN = re.compile(r"\[('|\")?[^,]+('|\")?\]")
18
+ PATTERN = re.compile(r"\[['\"]?[^,]+['\"]?]")
18
19
 
19
- def __init__(self, config: Config = None) -> None:
20
+ def __init__(self, config: Optional[Config] = None) -> None:
20
21
  pass
21
22
 
22
23
  def run(self, line_data: LineData, target: AnalysisTarget) -> bool:
@@ -32,11 +33,12 @@ class ValueArrayDictionaryCheck(Filter):
32
33
  """
33
34
  if line_data.is_well_quoted_value:
34
35
  return False
36
+ # not well quoted value
35
37
  if line_data.wrap and "byte" in line_data.wrap.lower():
36
38
  return False
37
39
  if self.PATTERN.search(line_data.value):
38
40
  return True
39
- if line_data.wrap and not line_data.is_well_quoted_value and ('[' in line_data.wrap or '(' in line_data.wrap):
41
+ if line_data.wrap and (line_data.wrap.endswith('[') or line_data.wrap.endswith('(')):
40
42
  return True
41
43
 
42
44
  return False
@@ -1,18 +1,19 @@
1
1
  import binascii
2
2
  import contextlib
3
+ from typing import Optional
3
4
 
4
5
  from credsweeper.common.constants import LATIN_1, ASCII
5
- from credsweeper.config import Config
6
- from credsweeper.credentials import LineData
6
+ from credsweeper.config.config import Config
7
+ from credsweeper.credentials.line_data import LineData
7
8
  from credsweeper.file_handler.analysis_target import AnalysisTarget
8
- from credsweeper.filters import Filter
9
- from credsweeper.utils import Util
9
+ from credsweeper.filters.filter import Filter
10
+ from credsweeper.utils.util import Util
10
11
 
11
12
 
12
13
  class ValueAtlassianTokenCheck(Filter):
13
14
  """Check that candidate have a known structure"""
14
15
 
15
- def __init__(self, config: Config = None) -> None:
16
+ def __init__(self, config: Optional[Config] = None) -> None:
16
17
  pass
17
18
 
18
19
  def run(self, line_data: LineData, target: AnalysisTarget) -> bool:
@@ -1,12 +1,13 @@
1
1
  import contextlib
2
2
  import json
3
+ from typing import Optional
3
4
 
4
- from credsweeper.config import Config
5
- from credsweeper.credentials import LineData
5
+ from credsweeper.config.config import Config
6
+ from credsweeper.credentials.line_data import LineData
6
7
  from credsweeper.file_handler.analysis_target import AnalysisTarget
7
- from credsweeper.filters import Filter
8
+ from credsweeper.filters.filter import Filter
8
9
  from credsweeper.filters.value_entropy_base64_check import ValueEntropyBase64Check
9
- from credsweeper.utils import Util
10
+ from credsweeper.utils.util import Util
10
11
 
11
12
 
12
13
  class ValueAzureTokenCheck(Filter):
@@ -15,7 +16,7 @@ class ValueAzureTokenCheck(Filter):
15
16
  https://learn.microsoft.com/en-us/azure/active-directory-b2c/access-tokens
16
17
  """
17
18
 
18
- def __init__(self, config: Config = None) -> None:
19
+ def __init__(self, config: Optional[Config] = None) -> None:
19
20
  pass
20
21
 
21
22
  def run(self, line_data: LineData, target: AnalysisTarget) -> bool:
@@ -1,12 +1,13 @@
1
1
  import base64
2
2
  import contextlib
3
3
  import string
4
+ from typing import Optional
4
5
 
5
- from credsweeper.config import Config
6
- from credsweeper.credentials import LineData
6
+ from credsweeper.config.config import Config
7
+ from credsweeper.credentials.line_data import LineData
7
8
  from credsweeper.file_handler.analysis_target import AnalysisTarget
8
- from credsweeper.filters import Filter
9
- from credsweeper.utils import Util
9
+ from credsweeper.filters.filter import Filter
10
+ from credsweeper.utils.util import Util
10
11
 
11
12
 
12
13
  class ValueBase32DataCheck(Filter):
@@ -14,7 +15,7 @@ class ValueBase32DataCheck(Filter):
14
15
  Check that candidate is NOT an ascii encoded string with entropy check
15
16
  """
16
17
 
17
- def __init__(self, config: Config = None) -> None:
18
+ def __init__(self, config: Optional[Config] = None) -> None:
18
19
  pass
19
20
 
20
21
  def run(self, line_data: LineData, target: AnalysisTarget) -> bool:
@@ -38,6 +39,8 @@ class ValueBase32DataCheck(Filter):
38
39
  return True
39
40
  # check whether decoded bytes have enough entropy
40
41
  with contextlib.suppress(Exception):
42
+ if pad_remain := len(value) % 8:
43
+ value += '=' * (8 - pad_remain)
41
44
  decoded = base64.b32decode(value)
42
45
  return Util.is_ascii_entropy_validate(decoded)
43
46
  return True
@@ -1,11 +1,12 @@
1
1
  import contextlib
2
2
  import string
3
+ from typing import Optional
3
4
 
4
- from credsweeper.config import Config
5
- from credsweeper.credentials import LineData
5
+ from credsweeper.config.config import Config
6
+ from credsweeper.credentials.line_data import LineData
6
7
  from credsweeper.file_handler.analysis_target import AnalysisTarget
7
- from credsweeper.filters import Filter
8
- from credsweeper.utils import Util
8
+ from credsweeper.filters.filter import Filter
9
+ from credsweeper.utils.util import Util
9
10
 
10
11
 
11
12
  class ValueBase64DataCheck(Filter):
@@ -13,7 +14,7 @@ class ValueBase64DataCheck(Filter):
13
14
  Check that candidate is NOT an ascii encoded string with entropy check
14
15
  """
15
16
 
16
- def __init__(self, config: Config = None) -> None:
17
+ def __init__(self, config: Optional[Config] = None) -> None:
17
18
  pass
18
19
 
19
20
  def run(self, line_data: LineData, target: AnalysisTarget) -> bool:
@@ -1,18 +1,19 @@
1
1
  import contextlib
2
+ from typing import Optional
2
3
 
3
4
  from credsweeper.common.constants import ASCII, PEM_BEGIN_PATTERN
4
- from credsweeper.config import Config
5
- from credsweeper.credentials import LineData
5
+ from credsweeper.config.config import Config
6
+ from credsweeper.credentials.line_data import LineData
6
7
  from credsweeper.file_handler.analysis_target import AnalysisTarget
7
- from credsweeper.filters import Filter
8
- from credsweeper.utils import Util
8
+ from credsweeper.filters.filter import Filter
9
9
  from credsweeper.utils.pem_key_detector import PemKeyDetector
10
+ from credsweeper.utils.util import Util
10
11
 
11
12
 
12
13
  class ValueBase64EncodedPem(Filter):
13
14
  """Check that candidate contains base64 encoded pem private key"""
14
15
 
15
- def __init__(self, config: Config = None) -> None:
16
+ def __init__(self, config: Optional[Config] = None) -> None:
16
17
  self.config = config
17
18
 
18
19
  def run(self, line_data: LineData, target: AnalysisTarget) -> bool:
@@ -1,10 +1,11 @@
1
1
  import contextlib
2
+ from typing import Optional
2
3
 
3
- from credsweeper.config import Config
4
- from credsweeper.credentials import LineData
4
+ from credsweeper.config.config import Config
5
+ from credsweeper.credentials.line_data import LineData
5
6
  from credsweeper.file_handler.analysis_target import AnalysisTarget
6
- from credsweeper.filters import Filter
7
- from credsweeper.utils import Util
7
+ from credsweeper.filters.filter import Filter
8
+ from credsweeper.utils.util import Util
8
9
 
9
10
 
10
11
  class ValueBase64KeyCheck(Filter):
@@ -12,7 +13,7 @@ class ValueBase64KeyCheck(Filter):
12
13
 
13
14
  EXTRA_TRANS_TABLE = str.maketrans('', '', "\",'\\")
14
15
 
15
- def __init__(self, config: Config = None) -> None:
16
+ def __init__(self, config: Optional[Config] = None) -> None:
16
17
  self.config = config
17
18
 
18
19
  def run(self, line_data: LineData, target: AnalysisTarget) -> bool:
@@ -2,14 +2,15 @@ import contextlib
2
2
  import re
3
3
  import statistics
4
4
  from itertools import takewhile
5
+ from typing import Optional
5
6
 
6
7
  from credsweeper.common.constants import Chars
7
- from credsweeper.config import Config
8
- from credsweeper.credentials import LineData
8
+ from credsweeper.config.config import Config
9
+ from credsweeper.credentials.line_data import LineData
9
10
  from credsweeper.file_handler.analysis_target import AnalysisTarget
10
- from credsweeper.filters import Filter
11
+ from credsweeper.filters.filter import Filter
11
12
  from credsweeper.filters.value_entropy_base64_check import ValueEntropyBase64Check
12
- from credsweeper.utils import Util
13
+ from credsweeper.utils.util import Util
13
14
 
14
15
 
15
16
  class ValueBase64PartCheck(Filter):
@@ -20,7 +21,7 @@ class ValueBase64PartCheck(Filter):
20
21
  base64_pattern = re.compile(r"^(\\{1,8}[0abfnrtv]|[0-9A-Za-z+/=]){1,4000}$")
21
22
  base64_char_set = set(Chars.BASE64STDPAD_CHARS.value + '\\')
22
23
 
23
- def __init__(self, config: Config = None) -> None:
24
+ def __init__(self, config: Optional[Config] = None) -> None:
24
25
  pass
25
26
 
26
27
  def run(self, line_data: LineData, target: AnalysisTarget) -> bool:
@@ -0,0 +1,37 @@
1
+ import contextlib
2
+ from typing import Optional
3
+
4
+ from credsweeper.common.constants import DEFAULT_PATTERN_LEN, UTF_8
5
+ from credsweeper.config.config import Config
6
+ from credsweeper.credentials.line_data import LineData
7
+ from credsweeper.file_handler.analysis_target import AnalysisTarget
8
+ from credsweeper.filters.filter import Filter
9
+ from credsweeper.utils.util import Util
10
+
11
+
12
+ class ValueBasicAuthCheck(Filter):
13
+ """Check that candidate have a known structure"""
14
+
15
+ def __init__(self, config: Optional[Config] = None) -> None:
16
+ pass
17
+
18
+ def run(self, line_data: LineData, target: AnalysisTarget) -> bool:
19
+ """Run filter checks on received token which might be structured.
20
+
21
+ Args:
22
+ line_data: credential candidate data
23
+ target: multiline target from which line data was obtained
24
+
25
+ Return:
26
+ True, if need to filter candidate and False if left
27
+
28
+ """
29
+ value = line_data.value
30
+ with contextlib.suppress(Exception):
31
+ # Basic encoding -> login:password
32
+ decoded = Util.decode_base64(value, padding_safe=True, urlsafe_detect=True)
33
+ delimiter_pos = decoded.find(b':')
34
+ # check whether the delimiter exists and all chars are decoded
35
+ if 0 < delimiter_pos < len(decoded) - DEFAULT_PATTERN_LEN and decoded.decode(UTF_8):
36
+ return False
37
+ return True
@@ -1,7 +1,9 @@
1
- from credsweeper.config import Config
2
- from credsweeper.credentials import LineData
1
+ from typing import Optional
2
+
3
+ from credsweeper.config.config import Config
4
+ from credsweeper.credentials.line_data import LineData
3
5
  from credsweeper.file_handler.analysis_target import AnalysisTarget
4
- from credsweeper.filters import Filter
6
+ from credsweeper.filters.filter import Filter
5
7
 
6
8
 
7
9
  class ValueBlocklistCheck(Filter):
@@ -18,7 +20,7 @@ class ValueBlocklistCheck(Filter):
18
20
  "undefined",
19
21
  ]
20
22
 
21
- def __init__(self, config: Config = None) -> None:
23
+ def __init__(self, config: Optional[Config] = None) -> None:
22
24
  pass
23
25
 
24
26
  def run(self, line_data: LineData, target: AnalysisTarget) -> bool:
@@ -1,11 +1,12 @@
1
1
  import re
2
+ from typing import Optional
2
3
 
3
4
  from credsweeper.common import static_keyword_checklist
4
- from credsweeper.config import Config
5
- from credsweeper.credentials import LineData
5
+ from credsweeper.config.config import Config
6
+ from credsweeper.credentials.line_data import LineData
6
7
  from credsweeper.file_handler.analysis_target import AnalysisTarget
7
- from credsweeper.filters import Filter
8
- from credsweeper.utils import Util
8
+ from credsweeper.filters.filter import Filter
9
+ from credsweeper.utils.util import Util
9
10
 
10
11
 
11
12
  class ValueCamelCaseCheck(Filter):
@@ -14,7 +15,7 @@ class ValueCamelCaseCheck(Filter):
14
15
  CAMEL_CASE = ["^([a-z]+([A-Z][a-z]+)+)$", "^([A-Z][a-z]+([A-Z][a-z]+)+)$"]
15
16
  CAMEL_CASE_PATTERN = re.compile(Util.get_regex_combine_or(CAMEL_CASE))
16
17
 
17
- def __init__(self, config: Config = None) -> None:
18
+ def __init__(self, config: Optional[Config] = None) -> None:
18
19
  pass
19
20
 
20
21
  def run(self, line_data: LineData, target: AnalysisTarget) -> bool:
@@ -1,14 +1,16 @@
1
+ from typing import Optional
2
+
1
3
  from credsweeper.common import static_keyword_checklist
2
- from credsweeper.config import Config
3
- from credsweeper.credentials import LineData
4
+ from credsweeper.config.config import Config
5
+ from credsweeper.credentials.line_data import LineData
4
6
  from credsweeper.file_handler.analysis_target import AnalysisTarget
5
- from credsweeper.filters import Filter
7
+ from credsweeper.filters.filter import Filter
6
8
 
7
9
 
8
10
  class ValueCoupleKeywordCheck(Filter):
9
11
  """Check value if TWO words from morphemes checklist exists in value"""
10
12
 
11
- def __init__(self, config: Config = None, threshold=1) -> None:
13
+ def __init__(self, config: Optional[Config] = None, threshold=1) -> None:
12
14
  # threshold - minimum morphemes number in a value
13
15
  self.threshold = threshold
14
16