credsweeper 1.11.5__py3-none-any.whl → 1.12.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of credsweeper might be problematic. Click here for more details.

Files changed (141) hide show
  1. credsweeper/__init__.py +21 -15
  2. credsweeper/__main__.py +141 -35
  3. credsweeper/app.py +11 -11
  4. credsweeper/common/keyword_pattern.py +13 -15
  5. credsweeper/common/morpheme_checklist.txt +2 -0
  6. credsweeper/config/__init__.py +0 -1
  7. credsweeper/config/config.py +3 -3
  8. credsweeper/credentials/__init__.py +0 -5
  9. credsweeper/credentials/augment_candidates.py +1 -1
  10. credsweeper/credentials/candidate.py +1 -1
  11. credsweeper/credentials/credential_manager.py +1 -1
  12. credsweeper/credentials/line_data.py +22 -2
  13. credsweeper/deep_scanner/__init__.py +0 -1
  14. credsweeper/deep_scanner/abstract_scanner.py +3 -3
  15. credsweeper/deep_scanner/byte_scanner.py +1 -1
  16. credsweeper/deep_scanner/bzip2_scanner.py +2 -2
  17. credsweeper/deep_scanner/deb_scanner.py +1 -1
  18. credsweeper/deep_scanner/deep_scanner.py +3 -3
  19. credsweeper/deep_scanner/docx_scanner.py +1 -1
  20. credsweeper/deep_scanner/eml_scanner.py +1 -1
  21. credsweeper/deep_scanner/encoder_scanner.py +1 -1
  22. credsweeper/deep_scanner/gzip_scanner.py +2 -2
  23. credsweeper/deep_scanner/html_scanner.py +1 -1
  24. credsweeper/deep_scanner/jclass_scanner.py +1 -1
  25. credsweeper/deep_scanner/jks_scanner.py +1 -1
  26. credsweeper/deep_scanner/lang_scanner.py +1 -1
  27. credsweeper/deep_scanner/lzma_scanner.py +2 -2
  28. credsweeper/deep_scanner/mxfile_scanner.py +1 -1
  29. credsweeper/deep_scanner/pdf_scanner.py +1 -1
  30. credsweeper/deep_scanner/pkcs_scanner.py +2 -2
  31. credsweeper/deep_scanner/pptx_scanner.py +1 -1
  32. credsweeper/deep_scanner/rpm_scanner.py +1 -1
  33. credsweeper/deep_scanner/tar_scanner.py +2 -2
  34. credsweeper/deep_scanner/tmx_scanner.py +2 -2
  35. credsweeper/deep_scanner/xlsx_scanner.py +2 -2
  36. credsweeper/deep_scanner/xml_scanner.py +1 -1
  37. credsweeper/deep_scanner/zip_scanner.py +2 -2
  38. credsweeper/file_handler/__init__.py +0 -15
  39. credsweeper/file_handler/abstract_provider.py +3 -4
  40. credsweeper/file_handler/byte_content_provider.py +1 -1
  41. credsweeper/file_handler/content_provider.py +1 -1
  42. credsweeper/file_handler/data_content_provider.py +1 -1
  43. credsweeper/file_handler/diff_content_provider.py +133 -3
  44. credsweeper/file_handler/file_path_extractor.py +2 -2
  45. credsweeper/file_handler/files_provider.py +4 -4
  46. credsweeper/file_handler/patches_provider.py +7 -8
  47. credsweeper/file_handler/text_content_provider.py +1 -1
  48. credsweeper/filters/__init__.py +2 -3
  49. credsweeper/filters/filter.py +5 -3
  50. credsweeper/filters/group/__init__.py +0 -2
  51. credsweeper/filters/group/general_keyword.py +2 -2
  52. credsweeper/filters/group/general_pattern.py +2 -2
  53. credsweeper/filters/group/group.py +38 -36
  54. credsweeper/filters/group/password_keyword.py +9 -8
  55. credsweeper/filters/group/token_pattern.py +3 -3
  56. credsweeper/filters/group/url_credentials_group.py +8 -8
  57. credsweeper/filters/group/weird_base36_token.py +3 -3
  58. credsweeper/filters/group/weird_base64_token.py +3 -3
  59. credsweeper/filters/line_git_binary_check.py +5 -4
  60. credsweeper/filters/line_specific_key_check.py +6 -5
  61. credsweeper/filters/line_uue_part_check.py +5 -4
  62. credsweeper/filters/value_allowlist_check.py +6 -5
  63. credsweeper/filters/value_array_dictionary_check.py +8 -6
  64. credsweeper/filters/value_atlassian_token_check.py +6 -5
  65. credsweeper/filters/value_azure_token_check.py +6 -5
  66. credsweeper/filters/value_base32_data_check.py +8 -5
  67. credsweeper/filters/value_base64_data_check.py +6 -5
  68. credsweeper/filters/value_base64_encoded_pem_check.py +6 -5
  69. credsweeper/filters/value_base64_key_check.py +6 -5
  70. credsweeper/filters/value_base64_part_check.py +6 -5
  71. credsweeper/filters/value_basic_auth_check.py +37 -0
  72. credsweeper/filters/value_blocklist_check.py +6 -4
  73. credsweeper/filters/value_camel_case_check.py +6 -5
  74. credsweeper/filters/value_couple_keyword_check.py +6 -4
  75. credsweeper/filters/value_dictionary_keyword_check.py +6 -4
  76. credsweeper/filters/value_discord_bot_check.py +6 -5
  77. credsweeper/filters/value_entropy_base_check.py +6 -5
  78. credsweeper/filters/value_file_path_check.py +8 -5
  79. credsweeper/filters/value_github_check.py +5 -4
  80. credsweeper/filters/value_grafana_check.py +6 -5
  81. credsweeper/filters/value_grafana_service_check.py +5 -4
  82. credsweeper/filters/value_hex_number_check.py +5 -4
  83. credsweeper/filters/value_jfrog_token_check.py +6 -5
  84. credsweeper/filters/value_json_web_key_check.py +6 -5
  85. credsweeper/filters/value_json_web_token_check.py +6 -5
  86. credsweeper/filters/value_last_word_check.py +6 -4
  87. credsweeper/filters/{value_dictionary_value_length_check.py → value_length_check.py} +12 -6
  88. credsweeper/filters/value_method_check.py +5 -4
  89. credsweeper/filters/value_not_allowed_pattern_check.py +6 -5
  90. credsweeper/filters/value_not_part_encoded_check.py +4 -4
  91. credsweeper/filters/value_number_check.py +5 -4
  92. credsweeper/filters/value_pattern_check.py +61 -41
  93. credsweeper/filters/value_similarity_check.py +6 -4
  94. credsweeper/filters/value_split_keyword_check.py +5 -4
  95. credsweeper/filters/value_string_type_check.py +9 -7
  96. credsweeper/filters/value_token_base_check.py +5 -4
  97. credsweeper/filters/value_token_check.py +6 -5
  98. credsweeper/logger/__init__.py +0 -1
  99. credsweeper/logger/logger.py +1 -1
  100. credsweeper/ml_model/__init__.py +0 -1
  101. credsweeper/ml_model/features/__init__.py +1 -0
  102. credsweeper/ml_model/features/entropy_evaluation.py +1 -1
  103. credsweeper/ml_model/features/feature.py +1 -1
  104. credsweeper/ml_model/features/file_extension.py +1 -1
  105. credsweeper/ml_model/features/has_html_tag.py +2 -2
  106. credsweeper/ml_model/features/is_secret_numeric.py +1 -1
  107. credsweeper/ml_model/features/length_of_attribute.py +1 -1
  108. credsweeper/ml_model/features/morpheme_dense.py +15 -8
  109. credsweeper/ml_model/features/rule_name.py +1 -1
  110. credsweeper/ml_model/features/rule_severity.py +21 -0
  111. credsweeper/ml_model/features/search_in_attribute.py +1 -1
  112. credsweeper/ml_model/features/word_in.py +1 -1
  113. credsweeper/ml_model/features/word_in_path.py +5 -2
  114. credsweeper/ml_model/features/word_in_postamble.py +1 -1
  115. credsweeper/ml_model/features/word_in_preamble.py +1 -1
  116. credsweeper/ml_model/features/word_in_transition.py +1 -1
  117. credsweeper/ml_model/features/word_in_value.py +1 -1
  118. credsweeper/ml_model/features/word_in_variable.py +1 -1
  119. credsweeper/ml_model/ml_config.json +130 -24
  120. credsweeper/ml_model/ml_model.onnx +0 -0
  121. credsweeper/ml_model/ml_validator.py +3 -2
  122. credsweeper/rules/__init__.py +0 -1
  123. credsweeper/rules/config.yaml +116 -42
  124. credsweeper/rules/rule.py +4 -3
  125. credsweeper/scanner/__init__.py +0 -1
  126. credsweeper/scanner/scan_type/__init__.py +0 -5
  127. credsweeper/scanner/scan_type/multi_pattern.py +4 -4
  128. credsweeper/scanner/scan_type/pem_key_pattern.py +4 -4
  129. credsweeper/scanner/scan_type/scan_type.py +4 -4
  130. credsweeper/scanner/scan_type/single_pattern.py +4 -4
  131. credsweeper/scanner/scanner.py +12 -8
  132. credsweeper/secret/config.json +2 -2
  133. credsweeper/utils/__init__.py +0 -1
  134. credsweeper/utils/pem_key_detector.py +3 -3
  135. credsweeper/utils/util.py +3 -132
  136. {credsweeper-1.11.5.dist-info → credsweeper-1.12.0.dist-info}/METADATA +1 -1
  137. credsweeper-1.12.0.dist-info/RECORD +161 -0
  138. credsweeper-1.11.5.dist-info/RECORD +0 -159
  139. {credsweeper-1.11.5.dist-info → credsweeper-1.12.0.dist-info}/WHEEL +0 -0
  140. {credsweeper-1.11.5.dist-info → credsweeper-1.12.0.dist-info}/entry_points.txt +0 -0
  141. {credsweeper-1.11.5.dist-info → credsweeper-1.12.0.dist-info}/licenses/LICENSE +0 -0
@@ -1,9 +1,10 @@
1
1
  import re
2
+ from typing import Optional
2
3
 
3
- from credsweeper.config import Config
4
- from credsweeper.credentials import LineData
4
+ from credsweeper.config.config import Config
5
+ from credsweeper.credentials.line_data import LineData
5
6
  from credsweeper.file_handler.analysis_target import AnalysisTarget
6
- from credsweeper.filters import Filter
7
+ from credsweeper.filters.filter import Filter
7
8
 
8
9
 
9
10
  class ValueStringTypeCheck(Filter):
@@ -23,10 +24,11 @@ class ValueStringTypeCheck(Filter):
23
24
  False otherwise
24
25
  """
25
26
 
26
- MULTIBYTE_PATTERN = re.compile(r"(\s*(0x)?[0-9a-f]{1,3}\s*,){8,80}", flags=re.IGNORECASE)
27
+ MULTIBYTE_PATTERN = re.compile(r"((0x)?[0-9a-f]{1,16}[UL]*)(\s*,\s*((0x)?[0-9a-f]{1,16}[UL]*)){3}",
28
+ flags=re.IGNORECASE)
27
29
 
28
- def __init__(self, config: Config) -> None:
29
- self.check_for_literals = config.check_for_literals
30
+ def __init__(self, config: Optional[Config] = None, check_for_literals=True) -> None:
31
+ self.check_for_literals = check_for_literals
30
32
 
31
33
  def run(self, line_data: LineData, target: AnalysisTarget) -> bool:
32
34
  """Run filter checks on received credential candidate data 'line_data'.
@@ -42,7 +44,7 @@ class ValueStringTypeCheck(Filter):
42
44
  if not self.check_for_literals or line_data.url_part:
43
45
  return False
44
46
 
45
- if ValueStringTypeCheck.MULTIBYTE_PATTERN.match(line_data.value):
47
+ if ValueStringTypeCheck.MULTIBYTE_PATTERN.search(line_data.value):
46
48
  return False
47
49
 
48
50
  if line_data.is_source_file_with_quotes() \
@@ -1,11 +1,12 @@
1
1
  import contextlib
2
2
  from abc import abstractmethod
3
+ from typing import Optional
3
4
  from typing import Tuple
4
5
 
5
- from credsweeper.config import Config
6
- from credsweeper.credentials import LineData
6
+ from credsweeper.config.config import Config
7
+ from credsweeper.credentials.line_data import LineData
7
8
  from credsweeper.file_handler.analysis_target import AnalysisTarget
8
- from credsweeper.filters import Filter
9
+ from credsweeper.filters.filter import Filter
9
10
  from credsweeper.utils.hop_stat import HopStat
10
11
 
11
12
 
@@ -26,7 +27,7 @@ class ValueTokenBaseCheck(Filter):
26
27
  64: 2.15981241,
27
28
  }
28
29
 
29
- def __init__(self, config: Config = None) -> None:
30
+ def __init__(self, config: Optional[Config] = None) -> None:
30
31
  self.__hop_stat = HopStat()
31
32
 
32
33
  @staticmethod
@@ -1,9 +1,10 @@
1
1
  import re
2
+ from typing import Optional
2
3
 
3
- from credsweeper.config import Config
4
- from credsweeper.credentials import LineData
4
+ from credsweeper.config.config import Config
5
+ from credsweeper.credentials.line_data import LineData
5
6
  from credsweeper.file_handler.analysis_target import AnalysisTarget
6
- from credsweeper.filters import Filter
7
+ from credsweeper.filters.filter import Filter
7
8
 
8
9
 
9
10
  class ValueTokenCheck(Filter):
@@ -17,9 +18,9 @@ class ValueTokenCheck(Filter):
17
18
 
18
19
  """
19
20
 
20
- SPLIT_PATTERN = r"(?<!,) (?!,)|;|\)|\(|{|}|<|>|\[|\]|`"
21
+ SPLIT_PATTERN = re.compile(r"(?<!\W) (?!\W)|[;(){}<>[\]`]")
21
22
 
22
- def __init__(self, config: Config = None) -> None:
23
+ def __init__(self, config: Optional[Config] = None) -> None:
23
24
  pass
24
25
 
25
26
  def run(self, line_data: LineData, target: AnalysisTarget) -> bool:
@@ -1 +0,0 @@
1
- from credsweeper.logger.logger import Logger
@@ -4,7 +4,7 @@ from pathlib import Path
4
4
  from typing import Optional
5
5
 
6
6
  from credsweeper.app import APP_PATH
7
- from credsweeper.utils import Util
7
+ from credsweeper.utils.util import Util
8
8
 
9
9
 
10
10
  class Logger:
@@ -1 +0,0 @@
1
- from credsweeper.ml_model.ml_validator import MlValidator
@@ -5,6 +5,7 @@ from credsweeper.ml_model.features.is_secret_numeric import IsSecretNumeric
5
5
  from credsweeper.ml_model.features.length_of_attribute import LengthOfAttribute
6
6
  from credsweeper.ml_model.features.morpheme_dense import MorphemeDense
7
7
  from credsweeper.ml_model.features.rule_name import RuleName
8
+ from credsweeper.ml_model.features.rule_severity import RuleSeverity
8
9
  from credsweeper.ml_model.features.search_in_attribute import SearchInAttribute
9
10
  from credsweeper.ml_model.features.word_in_path import WordInPath
10
11
  from credsweeper.ml_model.features.word_in_postamble import WordInPostamble
@@ -4,7 +4,7 @@ from typing import Dict, List, Set
4
4
  import numpy as np
5
5
 
6
6
  from credsweeper.common.constants import Chars, ML_HUNK
7
- from credsweeper.credentials import Candidate
7
+ from credsweeper.credentials.candidate import Candidate
8
8
  from credsweeper.file_handler.data_content_provider import MIN_DATA_LEN
9
9
  from credsweeper.ml_model.features.feature import Feature
10
10
 
@@ -3,7 +3,7 @@ from typing import List, Any
3
3
 
4
4
  import numpy as np
5
5
 
6
- from credsweeper.credentials import Candidate
6
+ from credsweeper.credentials.candidate import Candidate
7
7
 
8
8
 
9
9
  class Feature(ABC):
@@ -2,7 +2,7 @@ from typing import List, Any
2
2
 
3
3
  import numpy as np
4
4
 
5
- from credsweeper.credentials import Candidate
5
+ from credsweeper.credentials.candidate import Candidate
6
6
  from credsweeper.ml_model.features.word_in import WordIn
7
7
 
8
8
 
@@ -1,7 +1,7 @@
1
1
  from credsweeper.common.constants import CHUNK_SIZE
2
- from credsweeper.credentials import Candidate
2
+ from credsweeper.credentials.candidate import Candidate
3
3
  from credsweeper.ml_model.features.feature import Feature
4
- from credsweeper.utils import Util
4
+ from credsweeper.utils.util import Util
5
5
 
6
6
 
7
7
  class HasHtmlTag(Feature):
@@ -1,4 +1,4 @@
1
- from credsweeper.credentials import Candidate
1
+ from credsweeper.credentials.candidate import Candidate
2
2
  from credsweeper.ml_model.features.feature import Feature
3
3
 
4
4
 
@@ -1,7 +1,7 @@
1
1
  import numpy as np
2
2
 
3
3
  from credsweeper.common.constants import ML_HUNK
4
- from credsweeper.credentials import Candidate
4
+ from credsweeper.credentials.candidate import Candidate
5
5
  from credsweeper.ml_model.features.feature import Feature
6
6
 
7
7
 
@@ -1,5 +1,5 @@
1
1
  from credsweeper.common import static_keyword_checklist
2
- from credsweeper.credentials import Candidate
2
+ from credsweeper.credentials.candidate import Candidate
3
3
  from credsweeper.ml_model.features.feature import Feature
4
4
 
5
5
 
@@ -7,13 +7,20 @@ class MorphemeDense(Feature):
7
7
  """Feature calculates morphemes density for a value"""
8
8
 
9
9
  def extract(self, candidate: Candidate) -> float:
10
+ density = 0.0
10
11
  if value := candidate.line_data_list[0].value.lower():
11
- morphemes_counter = 0
12
+ morphemes_length = 0
12
13
  for morpheme in static_keyword_checklist.morpheme_set:
13
- if morpheme in value:
14
- morphemes_counter += 1
14
+ morpheme_pos = value.find(morpheme)
15
+ if 0 <= morpheme_pos:
16
+ morpheme_len = len(morpheme)
17
+ while 0 <= morpheme_pos:
18
+ morphemes_length += morpheme_len
19
+ morpheme_pos += morpheme_len
20
+ morpheme_pos = value.find(morpheme, morpheme_pos)
15
21
  # normalization: minimal morpheme length is 3
16
- return 3.0 * morphemes_counter / len(value)
17
- else:
18
- # empty value case
19
- return 0.0
22
+ density = morphemes_length / len(value)
23
+ if 1.0 < density:
24
+ # overlap morpheme case
25
+ density = 1.0
26
+ return density
@@ -2,7 +2,7 @@ from typing import List, Any
2
2
 
3
3
  import numpy as np
4
4
 
5
- from credsweeper.credentials import Candidate
5
+ from credsweeper.credentials.candidate import Candidate
6
6
  from credsweeper.ml_model.features.word_in import WordIn
7
7
 
8
8
 
@@ -0,0 +1,21 @@
1
+ from credsweeper.common.constants import Severity
2
+ from credsweeper.credentials.candidate import Candidate
3
+ from credsweeper.ml_model.features.feature import Feature
4
+
5
+
6
+ class RuleSeverity(Feature):
7
+ """Categorical feature that corresponds to rule name."""
8
+
9
+ def extract(self, candidate: Candidate) -> float:
10
+ if Severity.CRITICAL == candidate.severity:
11
+ return 1.0
12
+ elif Severity.HIGH == candidate.severity:
13
+ return 0.75
14
+ elif Severity.MEDIUM == candidate.severity:
15
+ return 0.5
16
+ elif Severity.LOW == candidate.severity:
17
+ return 0.25
18
+ elif Severity.INFO == candidate.severity:
19
+ return 0.0
20
+ else:
21
+ raise ValueError(f"Unknown type of severity: {candidate.severity}")
@@ -1,6 +1,6 @@
1
1
  import re
2
2
 
3
- from credsweeper.credentials import Candidate
3
+ from credsweeper.credentials.candidate import Candidate
4
4
  from credsweeper.ml_model.features.feature import Feature
5
5
 
6
6
 
@@ -3,7 +3,7 @@ from typing import List, Any, Tuple, Set
3
3
 
4
4
  import numpy as np
5
5
 
6
- from credsweeper.credentials import Candidate
6
+ from credsweeper.credentials.candidate import Candidate
7
7
  from credsweeper.ml_model.features.feature import Feature
8
8
 
9
9
 
@@ -1,9 +1,10 @@
1
+ import os.path
1
2
  from pathlib import Path
2
3
  from typing import List, Any
3
4
 
4
5
  import numpy as np
5
6
 
6
- from credsweeper.credentials import Candidate
7
+ from credsweeper.credentials.candidate import Candidate
7
8
  from credsweeper.ml_model.features.word_in import WordIn
8
9
 
9
10
 
@@ -16,7 +17,9 @@ class WordInPath(WordIn):
16
17
  path = Path(file_path)
17
18
  # apply ./ for normalised path to detect "/src" for relative path
18
19
  posix_lower_path = path.as_posix().lower() if path.is_absolute() else f"./{path.as_posix().lower()}"
19
- return self.word_in_str(posix_lower_path)
20
+ # prevent extra confusion from the same word in extension
21
+ path_without_extension, _ = os.path.splitext(posix_lower_path)
22
+ return self.word_in_str(path_without_extension)
20
23
  else:
21
24
  return np.array([np.zeros(shape=[self.dimension], dtype=np.int8)])
22
25
 
@@ -1,7 +1,7 @@
1
1
  import numpy as np
2
2
 
3
3
  from credsweeper.common.constants import ML_HUNK
4
- from credsweeper.credentials import Candidate
4
+ from credsweeper.credentials.candidate import Candidate
5
5
  from credsweeper.ml_model.features.word_in import WordIn
6
6
 
7
7
 
@@ -1,7 +1,7 @@
1
1
  import numpy as np
2
2
 
3
3
  from credsweeper.common.constants import ML_HUNK
4
- from credsweeper.credentials import Candidate
4
+ from credsweeper.credentials.candidate import Candidate
5
5
  from credsweeper.ml_model.features.word_in import WordIn
6
6
 
7
7
 
@@ -1,6 +1,6 @@
1
1
  import numpy as np
2
2
 
3
- from credsweeper.credentials import Candidate
3
+ from credsweeper.credentials.candidate import Candidate
4
4
  from credsweeper.ml_model.features.word_in import WordIn
5
5
 
6
6
 
@@ -1,6 +1,6 @@
1
1
  import numpy as np
2
2
 
3
- from credsweeper.credentials import Candidate
3
+ from credsweeper.credentials.candidate import Candidate
4
4
  from credsweeper.ml_model.features.word_in import WordIn
5
5
 
6
6
 
@@ -1,6 +1,6 @@
1
1
  import numpy as np
2
2
 
3
- from credsweeper.credentials import Candidate
3
+ from credsweeper.credentials.candidate import Candidate
4
4
  from credsweeper.ml_model.features.word_in import WordIn
5
5
 
6
6