credsweeper 1.11.1__py3-none-any.whl → 1.11.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of credsweeper might be problematic. Click here for more details.

Files changed (59) hide show
  1. credsweeper/__init__.py +1 -1
  2. credsweeper/__main__.py +6 -4
  3. credsweeper/app.py +7 -3
  4. credsweeper/common/keyword_pattern.py +26 -12
  5. credsweeper/common/morpheme_checklist.txt +4 -2
  6. credsweeper/credentials/line_data.py +14 -10
  7. credsweeper/deep_scanner/abstract_scanner.py +10 -1
  8. credsweeper/deep_scanner/deep_scanner.py +19 -8
  9. credsweeper/deep_scanner/docx_scanner.py +1 -1
  10. credsweeper/deep_scanner/encoder_scanner.py +2 -2
  11. credsweeper/deep_scanner/html_scanner.py +3 -3
  12. credsweeper/deep_scanner/jks_scanner.py +2 -4
  13. credsweeper/deep_scanner/lang_scanner.py +2 -2
  14. credsweeper/deep_scanner/lzma_scanner.py +40 -0
  15. credsweeper/deep_scanner/pkcs12_scanner.py +3 -5
  16. credsweeper/deep_scanner/xml_scanner.py +2 -2
  17. credsweeper/file_handler/data_content_provider.py +21 -12
  18. credsweeper/filters/__init__.py +0 -1
  19. credsweeper/filters/group/group.py +3 -4
  20. credsweeper/filters/group/url_credentials_group.py +2 -3
  21. credsweeper/filters/value_allowlist_check.py +6 -7
  22. credsweeper/filters/value_array_dictionary_check.py +3 -1
  23. credsweeper/filters/value_azure_token_check.py +1 -2
  24. credsweeper/filters/value_base64_part_check.py +30 -21
  25. credsweeper/filters/value_discord_bot_check.py +1 -2
  26. credsweeper/filters/value_entropy_base32_check.py +11 -31
  27. credsweeper/filters/value_entropy_base36_check.py +11 -34
  28. credsweeper/filters/value_entropy_base64_check.py +19 -48
  29. credsweeper/filters/{value_first_word_check.py → value_entropy_base_check.py} +13 -14
  30. credsweeper/filters/value_file_path_check.py +1 -1
  31. credsweeper/filters/value_hex_number_check.py +3 -3
  32. credsweeper/filters/value_json_web_token_check.py +4 -5
  33. credsweeper/filters/value_string_type_check.py +11 -3
  34. credsweeper/filters/value_token_base32_check.py +0 -4
  35. credsweeper/filters/value_token_base36_check.py +0 -4
  36. credsweeper/filters/value_token_base64_check.py +0 -4
  37. credsweeper/filters/value_token_check.py +1 -1
  38. credsweeper/ml_model/features/file_extension.py +1 -1
  39. credsweeper/ml_model/features/morpheme_dense.py +0 -4
  40. credsweeper/ml_model/features/rule_name.py +1 -1
  41. credsweeper/ml_model/features/word_in_path.py +0 -9
  42. credsweeper/ml_model/features/word_in_postamble.py +0 -11
  43. credsweeper/ml_model/features/word_in_preamble.py +0 -11
  44. credsweeper/ml_model/features/word_in_transition.py +0 -11
  45. credsweeper/ml_model/features/word_in_value.py +0 -11
  46. credsweeper/ml_model/features/word_in_variable.py +0 -11
  47. credsweeper/ml_model/ml_validator.py +4 -3
  48. credsweeper/rules/config.yaml +238 -208
  49. credsweeper/scanner/scan_type/scan_type.py +2 -3
  50. credsweeper/scanner/scanner.py +7 -1
  51. credsweeper/secret/config.json +16 -5
  52. credsweeper/utils/pem_key_detector.py +4 -5
  53. credsweeper/utils/util.py +67 -144
  54. {credsweeper-1.11.1.dist-info → credsweeper-1.11.3.dist-info}/METADATA +1 -1
  55. {credsweeper-1.11.1.dist-info → credsweeper-1.11.3.dist-info}/RECORD +58 -58
  56. credsweeper/utils/entropy_validator.py +0 -72
  57. {credsweeper-1.11.1.dist-info → credsweeper-1.11.3.dist-info}/WHEEL +0 -0
  58. {credsweeper-1.11.1.dist-info → credsweeper-1.11.3.dist-info}/entry_points.txt +0 -0
  59. {credsweeper-1.11.1.dist-info → credsweeper-1.11.3.dist-info}/licenses/LICENSE +0 -0
@@ -1,5 +1,3 @@
1
- from typing import List
2
-
3
1
  import numpy as np
4
2
 
5
3
  from credsweeper.common.constants import ML_HUNK
@@ -10,15 +8,6 @@ from credsweeper.ml_model.features.word_in import WordIn
10
8
  class WordInPreamble(WordIn):
11
9
  """Feature is true if line contains at least one word from predefined list."""
12
10
 
13
- def __init__(self, words: List[str]) -> None:
14
- """Feature returns array of matching words
15
-
16
- Args:
17
- words: list of predefined words - MUST BE IN LOWER CASE
18
-
19
- """
20
- super().__init__(words)
21
-
22
11
  def extract(self, candidate: Candidate) -> np.ndarray:
23
12
  """Returns true if any words in line before variable or value"""
24
13
  if 0 <= candidate.line_data_list[0].variable_start:
@@ -1,5 +1,3 @@
1
- from typing import List
2
-
3
1
  import numpy as np
4
2
 
5
3
  from credsweeper.credentials import Candidate
@@ -9,15 +7,6 @@ from credsweeper.ml_model.features.word_in import WordIn
9
7
  class WordInTransition(WordIn):
10
8
  """Feature is true if line contains at least one word from predefined list."""
11
9
 
12
- def __init__(self, words: List[str]) -> None:
13
- """Feature returns array of matching words
14
-
15
- Args:
16
- words: list of predefined words - MUST BE IN LOWER CASE
17
-
18
- """
19
- super().__init__(words)
20
-
21
10
  def extract(self, candidate: Candidate) -> np.ndarray:
22
11
  """Returns true if any words between variable and value"""
23
12
  if 0 <= candidate.line_data_list[0].variable_end < candidate.line_data_list[0].value_start:
@@ -1,5 +1,3 @@
1
- from typing import List
2
-
3
1
  import numpy as np
4
2
 
5
3
  from credsweeper.credentials import Candidate
@@ -9,15 +7,6 @@ from credsweeper.ml_model.features.word_in import WordIn
9
7
  class WordInValue(WordIn):
10
8
  """Feature returns true if candidate value contains at least one word from predefined list."""
11
9
 
12
- def __init__(self, words: List[str]) -> None:
13
- """Feature is true if candidate value contains at least one predefined word.
14
-
15
- Args:
16
- words: list of predefined words - MUST BE IN LOWER CASE and SORTED (preferred)
17
-
18
- """
19
- super().__init__(words)
20
-
21
10
  def extract(self, candidate: Candidate) -> np.ndarray:
22
11
  """Returns array of matching words for first line"""
23
12
  if value := candidate.line_data_list[0].value:
@@ -1,5 +1,3 @@
1
- from typing import List
2
-
3
1
  import numpy as np
4
2
 
5
3
  from credsweeper.credentials import Candidate
@@ -9,15 +7,6 @@ from credsweeper.ml_model.features.word_in import WordIn
9
7
  class WordInVariable(WordIn):
10
8
  """Feature returns array of words matching in variable"""
11
9
 
12
- def __init__(self, words: List[str]) -> None:
13
- """Feature is true if candidate value contains at least one predefined word.
14
-
15
- Args:
16
- words: list of predefined words - MUST BE IN LOWER CASE
17
-
18
- """
19
- super().__init__(words)
20
-
21
10
  def extract(self, candidate: Candidate) -> np.ndarray:
22
11
  """Returns array of matching words for first line"""
23
12
  if variable := candidate.line_data_list[0].variable:
@@ -92,8 +92,9 @@ class MlValidator:
92
92
  try:
93
93
  feature = feature_constructor(**kwargs)
94
94
  except TypeError:
95
- raise TypeError(f'Error while parsing model details. Cannot create feature "{feature_class}"'
96
- f' with kwargs "{kwargs}"')
95
+ logger.error(f'Error while parsing model details. Cannot create feature "{feature_class}"'
96
+ f' with kwargs "{kwargs}"')
97
+ raise
97
98
  if feature_definition["type"] in ["RuleName"]:
98
99
  self.unique_feature_list.append(feature)
99
100
  else:
@@ -229,7 +230,7 @@ class MlValidator:
229
230
  features_list = []
230
231
  probability: np.ndarray = np.zeros(len(group_list), dtype=np.float32)
231
232
  head = tail = 0
232
- for group_key, candidates in group_list:
233
+ for _group_key, candidates in group_list:
233
234
  line_input, variable_input, value_input, feature_array = self.get_group_features(candidates)
234
235
  line_input_list.append(line_input)
235
236
  variable_input_list.append(variable_input)