credsweeper 1.11.2__tar.gz → 1.11.4__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of credsweeper might be problematic. Click here for more details.

Files changed (158) hide show
  1. {credsweeper-1.11.2 → credsweeper-1.11.4}/PKG-INFO +1 -1
  2. {credsweeper-1.11.2 → credsweeper-1.11.4}/credsweeper/__init__.py +1 -1
  3. {credsweeper-1.11.2 → credsweeper-1.11.4}/credsweeper/__main__.py +7 -5
  4. {credsweeper-1.11.2 → credsweeper-1.11.4}/credsweeper/app.py +28 -47
  5. {credsweeper-1.11.2 → credsweeper-1.11.4}/credsweeper/common/constants.py +2 -5
  6. {credsweeper-1.11.2 → credsweeper-1.11.4}/credsweeper/common/keyword_pattern.py +15 -9
  7. {credsweeper-1.11.2 → credsweeper-1.11.4}/credsweeper/common/morpheme_checklist.txt +4 -2
  8. {credsweeper-1.11.2 → credsweeper-1.11.4}/credsweeper/credentials/candidate_key.py +1 -1
  9. {credsweeper-1.11.2 → credsweeper-1.11.4}/credsweeper/credentials/credential_manager.py +4 -3
  10. {credsweeper-1.11.2 → credsweeper-1.11.4}/credsweeper/credentials/line_data.py +16 -15
  11. {credsweeper-1.11.2 → credsweeper-1.11.4}/credsweeper/deep_scanner/abstract_scanner.py +10 -1
  12. credsweeper-1.11.4/credsweeper/deep_scanner/deb_scanner.py +48 -0
  13. {credsweeper-1.11.2 → credsweeper-1.11.4}/credsweeper/deep_scanner/deep_scanner.py +65 -43
  14. {credsweeper-1.11.2 → credsweeper-1.11.4}/credsweeper/deep_scanner/docx_scanner.py +1 -1
  15. {credsweeper-1.11.2 → credsweeper-1.11.4}/credsweeper/deep_scanner/encoder_scanner.py +2 -2
  16. {credsweeper-1.11.2 → credsweeper-1.11.4}/credsweeper/deep_scanner/gzip_scanner.py +1 -1
  17. {credsweeper-1.11.2 → credsweeper-1.11.4}/credsweeper/deep_scanner/html_scanner.py +3 -3
  18. {credsweeper-1.11.2 → credsweeper-1.11.4}/credsweeper/deep_scanner/jks_scanner.py +2 -4
  19. {credsweeper-1.11.2 → credsweeper-1.11.4}/credsweeper/deep_scanner/lang_scanner.py +2 -2
  20. credsweeper-1.11.4/credsweeper/deep_scanner/lzma_scanner.py +40 -0
  21. {credsweeper-1.11.2 → credsweeper-1.11.4}/credsweeper/deep_scanner/pkcs12_scanner.py +3 -5
  22. {credsweeper-1.11.2 → credsweeper-1.11.4}/credsweeper/deep_scanner/xml_scanner.py +2 -2
  23. {credsweeper-1.11.2 → credsweeper-1.11.4}/credsweeper/file_handler/byte_content_provider.py +2 -2
  24. {credsweeper-1.11.2 → credsweeper-1.11.4}/credsweeper/file_handler/content_provider.py +1 -1
  25. {credsweeper-1.11.2 → credsweeper-1.11.4}/credsweeper/file_handler/data_content_provider.py +23 -14
  26. {credsweeper-1.11.2 → credsweeper-1.11.4}/credsweeper/file_handler/diff_content_provider.py +2 -2
  27. {credsweeper-1.11.2 → credsweeper-1.11.4}/credsweeper/file_handler/file_path_extractor.py +1 -1
  28. {credsweeper-1.11.2 → credsweeper-1.11.4}/credsweeper/file_handler/files_provider.py +2 -4
  29. {credsweeper-1.11.2 → credsweeper-1.11.4}/credsweeper/file_handler/patches_provider.py +1 -1
  30. {credsweeper-1.11.2 → credsweeper-1.11.4}/credsweeper/file_handler/string_content_provider.py +2 -2
  31. {credsweeper-1.11.2 → credsweeper-1.11.4}/credsweeper/file_handler/struct_content_provider.py +1 -1
  32. {credsweeper-1.11.2 → credsweeper-1.11.4}/credsweeper/file_handler/text_content_provider.py +2 -2
  33. {credsweeper-1.11.2 → credsweeper-1.11.4}/credsweeper/filters/value_array_dictionary_check.py +3 -1
  34. {credsweeper-1.11.2 → credsweeper-1.11.4}/credsweeper/filters/value_azure_token_check.py +1 -2
  35. {credsweeper-1.11.2 → credsweeper-1.11.4}/credsweeper/filters/value_base64_encoded_pem_check.py +1 -1
  36. {credsweeper-1.11.2 → credsweeper-1.11.4}/credsweeper/filters/value_base64_part_check.py +30 -21
  37. {credsweeper-1.11.2 → credsweeper-1.11.4}/credsweeper/filters/value_discord_bot_check.py +1 -2
  38. credsweeper-1.11.4/credsweeper/filters/value_entropy_base32_check.py +22 -0
  39. credsweeper-1.11.4/credsweeper/filters/value_entropy_base36_check.py +23 -0
  40. credsweeper-1.11.4/credsweeper/filters/value_entropy_base64_check.py +26 -0
  41. credsweeper-1.11.4/credsweeper/filters/value_entropy_base_check.py +37 -0
  42. {credsweeper-1.11.2 → credsweeper-1.11.4}/credsweeper/filters/value_file_path_check.py +1 -1
  43. {credsweeper-1.11.2 → credsweeper-1.11.4}/credsweeper/filters/value_hex_number_check.py +3 -3
  44. {credsweeper-1.11.2 → credsweeper-1.11.4}/credsweeper/filters/value_json_web_token_check.py +4 -5
  45. {credsweeper-1.11.2 → credsweeper-1.11.4}/credsweeper/filters/value_pattern_check.py +64 -16
  46. {credsweeper-1.11.2 → credsweeper-1.11.4}/credsweeper/filters/value_string_type_check.py +11 -3
  47. {credsweeper-1.11.2 → credsweeper-1.11.4}/credsweeper/filters/value_token_base32_check.py +0 -4
  48. {credsweeper-1.11.2 → credsweeper-1.11.4}/credsweeper/filters/value_token_base36_check.py +0 -4
  49. {credsweeper-1.11.2 → credsweeper-1.11.4}/credsweeper/filters/value_token_base64_check.py +0 -4
  50. {credsweeper-1.11.2 → credsweeper-1.11.4}/credsweeper/filters/value_token_check.py +1 -1
  51. {credsweeper-1.11.2 → credsweeper-1.11.4}/credsweeper/ml_model/features/file_extension.py +2 -2
  52. {credsweeper-1.11.2 → credsweeper-1.11.4}/credsweeper/ml_model/features/morpheme_dense.py +0 -4
  53. {credsweeper-1.11.2 → credsweeper-1.11.4}/credsweeper/ml_model/features/rule_name.py +1 -1
  54. {credsweeper-1.11.2 → credsweeper-1.11.4}/credsweeper/ml_model/features/word_in_path.py +0 -9
  55. {credsweeper-1.11.2 → credsweeper-1.11.4}/credsweeper/ml_model/features/word_in_postamble.py +0 -11
  56. {credsweeper-1.11.2 → credsweeper-1.11.4}/credsweeper/ml_model/features/word_in_preamble.py +0 -11
  57. {credsweeper-1.11.2 → credsweeper-1.11.4}/credsweeper/ml_model/features/word_in_transition.py +0 -11
  58. {credsweeper-1.11.2 → credsweeper-1.11.4}/credsweeper/ml_model/features/word_in_value.py +0 -11
  59. {credsweeper-1.11.2 → credsweeper-1.11.4}/credsweeper/ml_model/features/word_in_variable.py +0 -11
  60. {credsweeper-1.11.2 → credsweeper-1.11.4}/credsweeper/ml_model/ml_validator.py +45 -22
  61. {credsweeper-1.11.2 → credsweeper-1.11.4}/credsweeper/rules/config.yaml +238 -208
  62. {credsweeper-1.11.2 → credsweeper-1.11.4}/credsweeper/rules/rule.py +3 -3
  63. {credsweeper-1.11.2 → credsweeper-1.11.4}/credsweeper/scanner/scan_type/scan_type.py +2 -3
  64. {credsweeper-1.11.2 → credsweeper-1.11.4}/credsweeper/scanner/scanner.py +7 -1
  65. {credsweeper-1.11.2 → credsweeper-1.11.4}/credsweeper/secret/config.json +16 -5
  66. {credsweeper-1.11.2 → credsweeper-1.11.4}/credsweeper/utils/hop_stat.py +3 -3
  67. {credsweeper-1.11.2 → credsweeper-1.11.4}/credsweeper/utils/pem_key_detector.py +8 -7
  68. {credsweeper-1.11.2 → credsweeper-1.11.4}/credsweeper/utils/util.py +76 -146
  69. credsweeper-1.11.2/credsweeper/filters/value_entropy_base32_check.py +0 -42
  70. credsweeper-1.11.2/credsweeper/filters/value_entropy_base36_check.py +0 -46
  71. credsweeper-1.11.2/credsweeper/filters/value_entropy_base64_check.py +0 -59
  72. credsweeper-1.11.2/credsweeper/utils/entropy_validator.py +0 -72
  73. {credsweeper-1.11.2 → credsweeper-1.11.4}/.gitignore +0 -0
  74. {credsweeper-1.11.2 → credsweeper-1.11.4}/LICENSE +0 -0
  75. {credsweeper-1.11.2 → credsweeper-1.11.4}/README.md +0 -0
  76. {credsweeper-1.11.2 → credsweeper-1.11.4}/credsweeper/common/__init__.py +0 -0
  77. {credsweeper-1.11.2 → credsweeper-1.11.4}/credsweeper/common/keyword_checklist.py +0 -0
  78. {credsweeper-1.11.2 → credsweeper-1.11.4}/credsweeper/common/keyword_checklist.txt +0 -0
  79. {credsweeper-1.11.2 → credsweeper-1.11.4}/credsweeper/config/__init__.py +0 -0
  80. {credsweeper-1.11.2 → credsweeper-1.11.4}/credsweeper/config/config.py +0 -0
  81. {credsweeper-1.11.2 → credsweeper-1.11.4}/credsweeper/credentials/__init__.py +0 -0
  82. {credsweeper-1.11.2 → credsweeper-1.11.4}/credsweeper/credentials/augment_candidates.py +0 -0
  83. {credsweeper-1.11.2 → credsweeper-1.11.4}/credsweeper/credentials/candidate.py +0 -0
  84. {credsweeper-1.11.2 → credsweeper-1.11.4}/credsweeper/credentials/candidate_group_generator.py +0 -0
  85. {credsweeper-1.11.2 → credsweeper-1.11.4}/credsweeper/deep_scanner/__init__.py +0 -0
  86. {credsweeper-1.11.2 → credsweeper-1.11.4}/credsweeper/deep_scanner/byte_scanner.py +0 -0
  87. {credsweeper-1.11.2 → credsweeper-1.11.4}/credsweeper/deep_scanner/bzip2_scanner.py +0 -0
  88. {credsweeper-1.11.2 → credsweeper-1.11.4}/credsweeper/deep_scanner/eml_scanner.py +0 -0
  89. {credsweeper-1.11.2 → credsweeper-1.11.4}/credsweeper/deep_scanner/mxfile_scanner.py +0 -0
  90. {credsweeper-1.11.2 → credsweeper-1.11.4}/credsweeper/deep_scanner/pdf_scanner.py +0 -0
  91. {credsweeper-1.11.2 → credsweeper-1.11.4}/credsweeper/deep_scanner/pptx_scanner.py +0 -0
  92. {credsweeper-1.11.2 → credsweeper-1.11.4}/credsweeper/deep_scanner/tar_scanner.py +0 -0
  93. {credsweeper-1.11.2 → credsweeper-1.11.4}/credsweeper/deep_scanner/tmx_scanner.py +0 -0
  94. {credsweeper-1.11.2 → credsweeper-1.11.4}/credsweeper/deep_scanner/xlsx_scanner.py +0 -0
  95. {credsweeper-1.11.2 → credsweeper-1.11.4}/credsweeper/deep_scanner/zip_scanner.py +0 -0
  96. {credsweeper-1.11.2 → credsweeper-1.11.4}/credsweeper/file_handler/__init__.py +0 -0
  97. {credsweeper-1.11.2 → credsweeper-1.11.4}/credsweeper/file_handler/abstract_provider.py +0 -0
  98. {credsweeper-1.11.2 → credsweeper-1.11.4}/credsweeper/file_handler/analysis_target.py +0 -0
  99. {credsweeper-1.11.2 → credsweeper-1.11.4}/credsweeper/file_handler/descriptor.py +0 -0
  100. {credsweeper-1.11.2 → credsweeper-1.11.4}/credsweeper/filters/__init__.py +0 -0
  101. {credsweeper-1.11.2 → credsweeper-1.11.4}/credsweeper/filters/filter.py +0 -0
  102. {credsweeper-1.11.2 → credsweeper-1.11.4}/credsweeper/filters/group/__init__.py +0 -0
  103. {credsweeper-1.11.2 → credsweeper-1.11.4}/credsweeper/filters/group/general_keyword.py +0 -0
  104. {credsweeper-1.11.2 → credsweeper-1.11.4}/credsweeper/filters/group/general_pattern.py +0 -0
  105. {credsweeper-1.11.2 → credsweeper-1.11.4}/credsweeper/filters/group/group.py +0 -0
  106. {credsweeper-1.11.2 → credsweeper-1.11.4}/credsweeper/filters/group/password_keyword.py +0 -0
  107. {credsweeper-1.11.2 → credsweeper-1.11.4}/credsweeper/filters/group/token_pattern.py +0 -0
  108. {credsweeper-1.11.2 → credsweeper-1.11.4}/credsweeper/filters/group/url_credentials_group.py +0 -0
  109. {credsweeper-1.11.2 → credsweeper-1.11.4}/credsweeper/filters/group/weird_base36_token.py +0 -0
  110. {credsweeper-1.11.2 → credsweeper-1.11.4}/credsweeper/filters/group/weird_base64_token.py +0 -0
  111. {credsweeper-1.11.2 → credsweeper-1.11.4}/credsweeper/filters/line_git_binary_check.py +0 -0
  112. {credsweeper-1.11.2 → credsweeper-1.11.4}/credsweeper/filters/line_specific_key_check.py +0 -0
  113. {credsweeper-1.11.2 → credsweeper-1.11.4}/credsweeper/filters/line_uue_part_check.py +0 -0
  114. {credsweeper-1.11.2 → credsweeper-1.11.4}/credsweeper/filters/value_allowlist_check.py +0 -0
  115. {credsweeper-1.11.2 → credsweeper-1.11.4}/credsweeper/filters/value_atlassian_token_check.py +0 -0
  116. {credsweeper-1.11.2 → credsweeper-1.11.4}/credsweeper/filters/value_base32_data_check.py +0 -0
  117. {credsweeper-1.11.2 → credsweeper-1.11.4}/credsweeper/filters/value_base64_data_check.py +0 -0
  118. {credsweeper-1.11.2 → credsweeper-1.11.4}/credsweeper/filters/value_base64_key_check.py +0 -0
  119. {credsweeper-1.11.2 → credsweeper-1.11.4}/credsweeper/filters/value_blocklist_check.py +0 -0
  120. {credsweeper-1.11.2 → credsweeper-1.11.4}/credsweeper/filters/value_camel_case_check.py +0 -0
  121. {credsweeper-1.11.2 → credsweeper-1.11.4}/credsweeper/filters/value_couple_keyword_check.py +0 -0
  122. {credsweeper-1.11.2 → credsweeper-1.11.4}/credsweeper/filters/value_dictionary_keyword_check.py +0 -0
  123. {credsweeper-1.11.2 → credsweeper-1.11.4}/credsweeper/filters/value_dictionary_value_length_check.py +0 -0
  124. {credsweeper-1.11.2 → credsweeper-1.11.4}/credsweeper/filters/value_github_check.py +0 -0
  125. {credsweeper-1.11.2 → credsweeper-1.11.4}/credsweeper/filters/value_grafana_check.py +0 -0
  126. {credsweeper-1.11.2 → credsweeper-1.11.4}/credsweeper/filters/value_grafana_service_check.py +0 -0
  127. {credsweeper-1.11.2 → credsweeper-1.11.4}/credsweeper/filters/value_jfrog_token_check.py +0 -0
  128. {credsweeper-1.11.2 → credsweeper-1.11.4}/credsweeper/filters/value_last_word_check.py +0 -0
  129. {credsweeper-1.11.2 → credsweeper-1.11.4}/credsweeper/filters/value_method_check.py +0 -0
  130. {credsweeper-1.11.2 → credsweeper-1.11.4}/credsweeper/filters/value_not_allowed_pattern_check.py +0 -0
  131. {credsweeper-1.11.2 → credsweeper-1.11.4}/credsweeper/filters/value_not_part_encoded_check.py +0 -0
  132. {credsweeper-1.11.2 → credsweeper-1.11.4}/credsweeper/filters/value_number_check.py +0 -0
  133. {credsweeper-1.11.2 → credsweeper-1.11.4}/credsweeper/filters/value_similarity_check.py +0 -0
  134. {credsweeper-1.11.2 → credsweeper-1.11.4}/credsweeper/filters/value_split_keyword_check.py +0 -0
  135. {credsweeper-1.11.2 → credsweeper-1.11.4}/credsweeper/filters/value_token_base_check.py +0 -0
  136. {credsweeper-1.11.2 → credsweeper-1.11.4}/credsweeper/logger/__init__.py +0 -0
  137. {credsweeper-1.11.2 → credsweeper-1.11.4}/credsweeper/logger/logger.py +0 -0
  138. {credsweeper-1.11.2 → credsweeper-1.11.4}/credsweeper/ml_model/__init__.py +0 -0
  139. {credsweeper-1.11.2 → credsweeper-1.11.4}/credsweeper/ml_model/features/__init__.py +0 -0
  140. {credsweeper-1.11.2 → credsweeper-1.11.4}/credsweeper/ml_model/features/entropy_evaluation.py +0 -0
  141. {credsweeper-1.11.2 → credsweeper-1.11.4}/credsweeper/ml_model/features/feature.py +0 -0
  142. {credsweeper-1.11.2 → credsweeper-1.11.4}/credsweeper/ml_model/features/has_html_tag.py +0 -0
  143. {credsweeper-1.11.2 → credsweeper-1.11.4}/credsweeper/ml_model/features/is_secret_numeric.py +0 -0
  144. {credsweeper-1.11.2 → credsweeper-1.11.4}/credsweeper/ml_model/features/length_of_attribute.py +0 -0
  145. {credsweeper-1.11.2 → credsweeper-1.11.4}/credsweeper/ml_model/features/search_in_attribute.py +0 -0
  146. {credsweeper-1.11.2 → credsweeper-1.11.4}/credsweeper/ml_model/features/word_in.py +0 -0
  147. {credsweeper-1.11.2 → credsweeper-1.11.4}/credsweeper/ml_model/ml_config.json +0 -0
  148. {credsweeper-1.11.2 → credsweeper-1.11.4}/credsweeper/ml_model/ml_model.onnx +0 -0
  149. {credsweeper-1.11.2 → credsweeper-1.11.4}/credsweeper/py.typed +0 -0
  150. {credsweeper-1.11.2 → credsweeper-1.11.4}/credsweeper/rules/__init__.py +0 -0
  151. {credsweeper-1.11.2 → credsweeper-1.11.4}/credsweeper/scanner/__init__.py +0 -0
  152. {credsweeper-1.11.2 → credsweeper-1.11.4}/credsweeper/scanner/scan_type/__init__.py +0 -0
  153. {credsweeper-1.11.2 → credsweeper-1.11.4}/credsweeper/scanner/scan_type/multi_pattern.py +0 -0
  154. {credsweeper-1.11.2 → credsweeper-1.11.4}/credsweeper/scanner/scan_type/pem_key_pattern.py +0 -0
  155. {credsweeper-1.11.2 → credsweeper-1.11.4}/credsweeper/scanner/scan_type/single_pattern.py +0 -0
  156. {credsweeper-1.11.2 → credsweeper-1.11.4}/credsweeper/secret/log.yaml +0 -0
  157. {credsweeper-1.11.2 → credsweeper-1.11.4}/credsweeper/utils/__init__.py +0 -0
  158. {credsweeper-1.11.2 → credsweeper-1.11.4}/pyproject.toml +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: credsweeper
3
- Version: 1.11.2
3
+ Version: 1.11.4
4
4
  Summary: Credential Sweeper
5
5
  Project-URL: Homepage, https://github.com/Samsung/CredSweeper
6
6
  Project-URL: Bug Tracker, https://github.com/Samsung/CredSweeper/issues
@@ -18,4 +18,4 @@ __all__ = [
18
18
  '__version__'
19
19
  ]
20
20
 
21
- __version__ = "1.11.2"
21
+ __version__ = "1.11.4"
@@ -4,6 +4,7 @@ import os
4
4
  import sys
5
5
  import time
6
6
  from argparse import ArgumentParser, ArgumentTypeError, Namespace, BooleanOptionalAction
7
+ from pathlib import Path
7
8
  from typing import Any, Union, Dict
8
9
 
9
10
  from credsweeper import __version__
@@ -62,7 +63,7 @@ def logger_levels(log_level: str) -> str:
62
63
  Returns True if log_level UPPERCASE is one of keys
63
64
  """
64
65
  val = log_level.upper()
65
- if any(val == i for i in Logger.LEVELS.keys()):
66
+ if val in Logger.LEVELS:
66
67
  return val
67
68
  raise ArgumentTypeError(f"Log level provided: {log_level} -- must be one of: {' | '.join(Logger.LEVELS.keys())}")
68
69
 
@@ -88,10 +89,11 @@ def check_integrity() -> int:
88
89
  Returns CRC32 of files in integer
89
90
  """
90
91
  crc32 = 0
91
- for root, dirs, files in os.walk(APP_PATH):
92
- for file_path in files:
93
- if Util.get_extension(file_path) in [".py", ".json", ".txt", ".yaml", ".onnx"]:
94
- data = Util.read_data(os.path.join(root, file_path))
92
+ for root, _dirs, files in os.walk(APP_PATH):
93
+ for file_name in files:
94
+ if Util.get_extension(file_name) in [".py", ".json", ".txt", ".yaml", ".onnx"]:
95
+ file_path = Path(root) / file_name
96
+ data = Util.read_data(file_path)
95
97
  if data:
96
98
  crc32 ^= binascii.crc32(data)
97
99
  return crc32
@@ -11,15 +11,17 @@ from colorama import Style
11
11
  # Directory of credsweeper sources MUST be placed before imports to avoid circular import error
12
12
  APP_PATH = Path(__file__).resolve().parent
13
13
 
14
- from credsweeper.common.constants import Severity, ThresholdPreset, DiffRowType
14
+ from credsweeper.common.constants import Severity, ThresholdPreset, DiffRowType, DEFAULT_ENCODING
15
15
  from credsweeper.config import Config
16
16
  from credsweeper.credentials import Candidate, CredentialManager, CandidateKey
17
17
  from credsweeper.deep_scanner.deep_scanner import DeepScanner
18
+ from credsweeper.file_handler.content_provider import ContentProvider
18
19
  from credsweeper.file_handler.diff_content_provider import DiffContentProvider
19
20
  from credsweeper.file_handler.file_path_extractor import FilePathExtractor
20
21
  from credsweeper.file_handler.abstract_provider import AbstractProvider
21
22
  from credsweeper.file_handler.text_content_provider import TextContentProvider
22
23
  from credsweeper.scanner import Scanner
24
+ from credsweeper.ml_model.ml_validator import MlValidator
23
25
  from credsweeper.utils import Util
24
26
 
25
27
  logger = logging.getLogger(__name__)
@@ -94,7 +96,7 @@ class CredSweeper:
94
96
  log_level: str - level for pool initializer according logging levels (UPPERCASE)
95
97
 
96
98
  """
97
- self.pool_count: int = int(pool_count) if int(pool_count) > 1 else 1
99
+ self.pool_count: int = max(1, int(pool_count))
98
100
  if not (_severity := Severity.get(severity)):
99
101
  raise RuntimeError(f"Severity level provided: {severity}"
100
102
  f" -- must be one of: {' | '.join([i.value for i in Severity])}")
@@ -123,9 +125,9 @@ class CredSweeper:
123
125
  self.ml_config = ml_config
124
126
  self.ml_model = ml_model
125
127
  self.ml_providers = ml_providers
126
- self.ml_validator = None
127
128
  self.__thrifty = thrifty
128
129
  self.__log_level = log_level
130
+ self.__ml_validator: Optional[MlValidator] = None
129
131
 
130
132
  # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
131
133
 
@@ -182,35 +184,22 @@ class CredSweeper:
182
184
 
183
185
  # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
184
186
 
185
- # the import cannot be done on top due
186
- # TypeError: cannot pickle 'onnxruntime.capi.onnxruntime_pybind11_state.InferenceSession' object
187
- from credsweeper.ml_model import MlValidator
188
-
189
- # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
190
-
191
187
  @property
192
188
  def ml_validator(self) -> MlValidator:
193
189
  """ml_validator getter"""
194
- from credsweeper.ml_model import MlValidator
195
190
  if not self.__ml_validator:
196
- self.__ml_validator: MlValidator = MlValidator(
191
+ self.__ml_validator = MlValidator(
197
192
  threshold=self.ml_threshold, #
198
193
  ml_config=self.ml_config, #
199
194
  ml_model=self.ml_model, #
200
195
  ml_providers=self.ml_providers, #
201
196
  )
202
- assert self.__ml_validator, "self.__ml_validator was not initialized"
197
+ if not self.__ml_validator:
198
+ raise RuntimeError("MlValidator was not initialized!")
203
199
  return self.__ml_validator
204
200
 
205
201
  # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
206
202
 
207
- @ml_validator.setter
208
- def ml_validator(self, _ml_validator: Optional[MlValidator]) -> None:
209
- """ml_validator setter"""
210
- self.__ml_validator = _ml_validator
211
-
212
- # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
213
-
214
203
  @staticmethod
215
204
  def pool_initializer(log_kwargs) -> None:
216
205
  """Ignore SIGINT in child processes."""
@@ -219,20 +208,6 @@ class CredSweeper:
219
208
 
220
209
  # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
221
210
 
222
- @property
223
- def config(self) -> Config:
224
- """config getter"""
225
- return self.__config
226
-
227
- # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
228
-
229
- @config.setter
230
- def config(self, config: Config) -> None:
231
- """config setter"""
232
- self.__config = config
233
-
234
- # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
235
-
236
211
  def run(self, content_provider: AbstractProvider) -> int:
237
212
  """Run an analysis of 'content_provider' object.
238
213
 
@@ -241,9 +216,10 @@ class CredSweeper:
241
216
 
242
217
  """
243
218
  _empty_list: Sequence[Union[DiffContentProvider, TextContentProvider]] = []
244
- file_extractors: Sequence[Union[DiffContentProvider, TextContentProvider]] = \
245
- content_provider.get_scannable_files(self.config) if content_provider else _empty_list
246
- logger.info(f"Start Scanner for {len(file_extractors)} providers")
219
+ file_extractors = content_provider.get_scannable_files(self.config) if content_provider else _empty_list
220
+ if not file_extractors:
221
+ logger.info(f"No scannable targets for {len(content_provider.paths)} paths")
222
+ return 0
247
223
  self.scan(file_extractors)
248
224
  self.post_processing()
249
225
  # PatchesProvider has the attribute. Circular import error appears with using the isinstance
@@ -260,7 +236,7 @@ class CredSweeper:
260
236
  content_providers: file objects to scan
261
237
 
262
238
  """
263
- if 1 < self.pool_count:
239
+ if 1 < self.pool_count and 1 < len(content_providers):
264
240
  self.__multi_jobs_scan(content_providers)
265
241
  else:
266
242
  self.__single_job_scan(content_providers)
@@ -269,6 +245,7 @@ class CredSweeper:
269
245
 
270
246
  def __single_job_scan(self, content_providers: Sequence[Union[DiffContentProvider, TextContentProvider]]) -> None:
271
247
  """Performs scan in main thread"""
248
+ logger.info(f"Scan for {len(content_providers)} providers")
272
249
  all_cred = self.files_scan(content_providers)
273
250
  self.credential_manager.set_credentials(all_cred)
274
251
 
@@ -284,12 +261,14 @@ class CredSweeper:
284
261
  if "SILENCE" == self.__log_level:
285
262
  logging.addLevelName(60, "SILENCE")
286
263
  log_kwargs["level"] = self.__log_level
287
- with multiprocessing.get_context("spawn").Pool(processes=self.pool_count,
288
- initializer=self.pool_initializer,
264
+ pool_count = min(self.pool_count, len(content_providers))
265
+ logger.info(f"Scan in {pool_count} processes for {len(content_providers)} providers")
266
+ with multiprocessing.get_context("spawn").Pool(processes=pool_count,
267
+ initializer=CredSweeper.pool_initializer,
289
268
  initargs=(log_kwargs, )) as pool:
290
269
  try:
291
- for scan_results in pool.imap_unordered(self.files_scan, (content_providers[x::self.pool_count]
292
- for x in range(self.pool_count))):
270
+ for scan_results in pool.imap_unordered(self.files_scan,
271
+ (content_providers[x::pool_count] for x in range(pool_count))):
293
272
  for cred in scan_results:
294
273
  self.credential_manager.add_credential(cred)
295
274
  except KeyboardInterrupt:
@@ -301,9 +280,7 @@ class CredSweeper:
301
280
 
302
281
  # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
303
282
 
304
- def files_scan(
305
- self, #
306
- content_providers: Sequence[Union[DiffContentProvider, TextContentProvider]]) -> List[Candidate]:
283
+ def files_scan(self, content_providers: Sequence[ContentProvider]) -> List[Candidate]:
307
284
  """Auxiliary method for scan one sequence"""
308
285
  all_cred: List[Candidate] = []
309
286
  for provider in content_providers:
@@ -316,7 +293,7 @@ class CredSweeper:
316
293
 
317
294
  # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
318
295
 
319
- def file_scan(self, content_provider: Union[DiffContentProvider, TextContentProvider]) -> List[Candidate]:
296
+ def file_scan(self, content_provider: ContentProvider) -> List[Candidate]:
320
297
  """Run scanning of file from 'file_provider'.
321
298
 
322
299
  Args:
@@ -415,7 +392,7 @@ class CredSweeper:
415
392
  if isinstance(change_type, DiffRowType):
416
393
  # add suffix for appropriated reports to create two files for the patch scan
417
394
  json_path = json_path.with_suffix(f".{change_type.value}{json_path.suffix}")
418
- with open(json_path, 'w') as f:
395
+ with open(json_path, 'w', encoding=DEFAULT_ENCODING) as f:
419
396
  # use the approach to reduce total memory usage in case of huge data
420
397
  first_item = True
421
398
  f.write('[\n')
@@ -446,8 +423,12 @@ class CredSweeper:
446
423
  for credential in credentials:
447
424
  for line_data in credential.line_data_list:
448
425
  # bright rule name and path or info
426
+ if isinstance(credential.ml_probability, float):
427
+ ml_probability_info = f" {credential.ml_probability:.6f}"
428
+ else:
429
+ ml_probability_info = ""
449
430
  print(Style.BRIGHT + credential.rule_name +
450
- f" {line_data.info or line_data.path}:{line_data.line_num} {credential.ml_probability}" +
431
+ f" {line_data.info or line_data.path}:{line_data.line_num}{ml_probability_info}" +
451
432
  Style.RESET_ALL)
452
433
  print(line_data.get_colored_line(hashed=self.hashed, subtext=self.subtext))
453
434
 
@@ -96,10 +96,6 @@ class Chars(Enum):
96
96
  ASCII_PRINTABLE = string.printable
97
97
 
98
98
 
99
- ENTROPY_LIMIT_BASE64 = 4.5
100
- ENTROPY_LIMIT_BASE3x = 3
101
-
102
-
103
99
  class GroupType(Enum):
104
100
  """Group type - used in Group constructor for load predefined set of filters"""
105
101
  KEYWORD = "keyword"
@@ -148,7 +144,8 @@ OVERLAP_SIZE = 1000
148
144
  CHUNK_STEP_SIZE = CHUNK_SIZE - OVERLAP_SIZE
149
145
  # ML hunk size to limit of variable or value size and get substring near value
150
146
  ML_HUNK = 80
151
- """ values according https://docs.python.org/3/library/codecs.html """
147
+
148
+ # values according https://docs.python.org/3/library/codecs.html
152
149
  UTF_8 = "utf_8"
153
150
  UTF_16 = "utf_16"
154
151
  LATIN_1 = "latin_1"
@@ -3,25 +3,30 @@ import re
3
3
 
4
4
  class KeywordPattern:
5
5
  """Pattern set of keyword types"""
6
- key_left = r"(\\[nrt]|%[0-9a-f]{2})?" \
6
+ directive = r"(?P<directive>(?:(?:[#%]define|%global)(?:\s|\\t)|\bset))?"
7
+ key_left = r"(?:\\[nrt]|%[0-9a-f]{2}|\s)*" \
7
8
  r"(?P<variable>(([`'\"]{1,8}[^:='\"`}<>\\/&?]*|[^:='\"`}<>\s()\\/&?;,%]*)" \
8
9
  r"(?P<keyword>"
9
10
  # there will be inserted a keyword
10
11
  key_right = r")" \
11
- r"[^%:='\"`<>{?!&;\n]*" \
12
+ r"[^%:='\"`<>({?!&;\n]*" \
12
13
  r")" \
13
14
  r"(&(quot|apos);|%[0-9a-f]{2}|[`'\"])*" \
14
15
  r")" # <variable>
15
- separator = r"(\s|\\{1,8}[tnr])*\]?(\s|\\{1,8}[tnr])*" \
16
- r"(?P<separator>:(\s[a-z]{3,9}[?]?\s)?=|:(?!:)|=(>|&gt;|(\\\\*u00|%)26gt;)|!==|!=|===|==|=|%3d)" \
16
+ separator = r"(?(directive)|(\s|\\{1,8}[tnr])*\]?(\s|\\{1,8}[tnr])*)" \
17
+ r"(?P<separator>:(\s[a-z]{3,9}[?]?\s)?=|:(?!:)|=(>|&gt;|(\\\\*u00|%)26gt;)|!==|!=|===|==|=~|=" \
18
+ r"|(?(directive)(\\t|\s|\((?!\))){1,80}|%3d))" \
17
19
  r"(\s|\\{1,8}[tnr])*"
18
20
  # might be curly, square or parenthesis with words before
19
21
  wrap = r"(?P<wrap>(" \
20
22
  r"(new(\s|\\{1,8}[tnr]|byte|char|string|\[\]){1,8})?" \
23
+ r"(?P<get>([_a-z][0-9a-z_.\[\]]*\.)get|(os\.)?getenv)?" \
21
24
  r"([0-9a-z_.]|::|-(>|&gt;))*" \
22
- r"[\[\(\{]" \
25
+ r"\s*" \
26
+ r"(\[(?!\])|\((?!\))|\{(?!\}))" \
23
27
  r"(\s|\\{1,8}[tnr])*" \
24
- r"([0-9a-z_]{1,32}[:=]\s*)?" \
28
+ r"(?(get)('[^']+'|\"[^\"]+\")\s*,\s*|)" \
29
+ r"([0-9a-z_]{1,32}\s*[:=]\s*)?" \
25
30
  r"){1,8})?"
26
31
  string_prefix = r"(((b|r|br|rb|u|f|rf|fr|l|@)(?=(\\*[`'\"])))?"
27
32
  left_quote = r"(?P<value_leftquote>((?P<esq>\\{1,8})?([`'\"]|&(quot|apos);)){1,4}))?"
@@ -39,7 +44,7 @@ class KeywordPattern:
39
44
  r"(?P<url_esc>%[0-9a-f]{2})" \
40
45
  r"|" \
41
46
  r"(?(url_esc)[^\s`'\",;\\&]|[^\s`'\",;\\])" \
42
- r")"\
47
+ r")" \
43
48
  r"){4,8000}" \
44
49
  r"|" \
45
50
  r"(<[^>]{4,8000}>)" \
@@ -48,18 +53,19 @@ class KeywordPattern:
48
53
  r"|" \
49
54
  r"(\$?\{{1,3}[^}]{4,8000}\}{1,3})" \
50
55
  r"|" \
51
- r"(?(wrap)(?(value_leftquote)(?!\\(?P=value_leftquote))|[^\]\)\}]){16,8000})"\
56
+ r"(?(wrap)(?(value_leftquote)(?!\\(?P=value_leftquote))|[^\]\)\}]){16,8000})" \
52
57
  r")" # <value>
53
58
  right_quote = r"(?(value_leftquote)" \
54
59
  r"(?P<value_rightquote>(?<!\\)(?P=value_leftquote)|\\$|(?<=[0-9a-z+_/-])$)" \
55
60
  r"|" \
56
- r"(?(wrap)(\]|\)|\}|,|;|\\|$))" \
61
+ r"(?(wrap)(\]|\)|\}|;|\\|$))" \
57
62
  r")"
58
63
 
59
64
  @classmethod
60
65
  def get_keyword_pattern(cls, keyword: str) -> re.Pattern:
61
66
  """Returns compiled regex pattern"""
62
67
  expression = ''.join([ #
68
+ cls.directive, #
63
69
  cls.key_left, #
64
70
  keyword, #
65
71
  cls.key_right, #
@@ -885,7 +885,7 @@ mbler
885
885
  mean
886
886
  measur
887
887
  medi
888
- medusa
888
+ medus
889
889
  meet
890
890
  mem_
891
891
  memb
@@ -925,7 +925,7 @@ month
925
925
  morp
926
926
  mory
927
927
  mote
928
- motorola
928
+ motor
929
929
  mount
930
930
  move
931
931
  mpeg
@@ -1005,6 +1005,7 @@ origin
1005
1005
  orithm
1006
1006
  ormat
1007
1007
  orph
1008
+ otorola
1008
1009
  ottle
1009
1010
  ously
1010
1011
  out
@@ -1485,6 +1486,7 @@ up_
1485
1486
  updat
1486
1487
  upgrade
1487
1488
  url
1489
+ usa
1488
1490
  usb
1489
1491
  use
1490
1492
  usin
@@ -24,7 +24,7 @@ class CandidateKey:
24
24
  return self.key == other.key
25
25
 
26
26
  def __ne__(self, other):
27
- return not (self == other)
27
+ return not bool(self == other)
28
28
 
29
29
  def __repr__(self) -> str:
30
30
  return f"{self.key}:{self.__line}"
@@ -110,7 +110,8 @@ class CredentialManager:
110
110
  # Match by file path+line num+value. Value required so two different credentials still be
111
111
  # processed independently
112
112
  candidate_key = CandidateKey(line_data)
113
- if candidate_key not in groups:
114
- groups[candidate_key] = list()
115
- groups[candidate_key].append(credential_candidate)
113
+ if candidate_key in groups:
114
+ groups[candidate_key].append(credential_candidate)
115
+ else:
116
+ groups[candidate_key] = [credential_candidate]
116
117
  return groups
@@ -10,7 +10,6 @@ from colorama import Fore, Style
10
10
  from credsweeper.common.constants import MAX_LINE_LENGTH, UTF_8, StartEnd, ML_HUNK
11
11
  from credsweeper.config import Config
12
12
  from credsweeper.utils import Util
13
- from credsweeper.utils.entropy_validator import EntropyValidator
14
13
 
15
14
 
16
15
  class LineData:
@@ -32,7 +31,7 @@ class LineData:
32
31
  """
33
32
 
34
33
  quotation_marks = ('"', "'", '`')
35
- comment_starts = ("//", "* ", "#", "/*", "<!––", "%{", "%", "...", "(*", "--", "--[[", "#=")
34
+ comment_starts = ("//", "* ", "# ", "/*", "<!––", "%{", "%", "...", "(*", "--", "--[[", "#=")
36
35
  bash_param_split = re.compile("\\s+(\\-|\\||\\>|\\w+?\\>|\\&)")
37
36
  line_endings = re.compile(r"\\{1,8}[nr]")
38
37
  # https://en.wikipedia.org/wiki/Percent-encoding
@@ -87,8 +86,9 @@ class LineData:
87
86
  self.url_part = False
88
87
  self.wrap = None
89
88
  self._3d_escaped_separator = False
90
-
91
89
  self.initialize(match_obj)
90
+ # the line is very useful for debug breakpoint
91
+ pass # pylint: disable=W0107
92
92
 
93
93
  def compare(self, other: 'LineData') -> bool:
94
94
  """Comparison method - skip whole line and checks only when variable and value are the same"""
@@ -327,11 +327,8 @@ class LineData:
327
327
  True if file require quotation, False otherwise
328
328
 
329
329
  """
330
- if not self.path:
331
- return False
332
- if Util.get_extension(self.path) in self.config.source_quote_ext:
333
- return True
334
- return False
330
+ file_type = self.file_type or Util.get_extension(self.path)
331
+ return bool(file_type) and file_type in self.config.source_quote_ext
335
332
 
336
333
  @staticmethod
337
334
  def get_hash_or_subtext(
@@ -373,10 +370,10 @@ class LineData:
373
370
  def to_str(self, subtext: bool = False, hashed: bool = False) -> str:
374
371
  """Represent line_data with subtext or|and hashed values"""
375
372
  cut_pos = StartEnd(self.variable_start, self.value_end) if subtext else None
376
- return f"line: '{self.get_hash_or_subtext(self.line, hashed, cut_pos)}'" \
377
- f" | line_num: {self.line_num} | path: {self.path}" \
373
+ return f"path: {self.path}" \
374
+ f" | line_num: {self.line_num}" \
378
375
  f" | value: '{self.get_hash_or_subtext(self.value, hashed)}'" \
379
- f" | entropy_validation: {EntropyValidator(self.value)}"
376
+ f" | line: '{self.get_hash_or_subtext(self.line, hashed, cut_pos)}'"
380
377
 
381
378
  def __str__(self):
382
379
  return self.to_str()
@@ -393,6 +390,10 @@ class LineData:
393
390
  """
394
391
  cut_pos = StartEnd(self.variable_start if 0 <= self.variable_start else self.value_start,
395
392
  self.value_end) if subtext else None
393
+ if isinstance(self.value, str):
394
+ entropy = round(Util.get_shannon_entropy(self.value), 5)
395
+ else:
396
+ entropy = None
396
397
  full_output = {
397
398
  "key": self.key,
398
399
  "line": self.get_hash_or_subtext(self.line, hashed, cut_pos),
@@ -401,18 +402,18 @@ class LineData:
401
402
  # info may contain variable name - so let it be hashed if requested
402
403
  "info": self.get_hash_or_subtext(self.info, hashed),
403
404
  "pattern": self.pattern.pattern,
405
+ "variable": self.get_hash_or_subtext(self.variable, hashed),
406
+ "variable_start": self.variable_start,
407
+ "variable_end": self.variable_end,
404
408
  "separator": self.separator,
405
409
  "separator_start": self.separator_start,
406
410
  "separator_end": self.separator_end,
407
411
  "value": self.get_hash_or_subtext(self.value, hashed),
408
412
  "value_start": self.value_start,
409
413
  "value_end": self.value_end,
410
- "variable": self.get_hash_or_subtext(self.variable, hashed),
411
- "variable_start": self.variable_start,
412
- "variable_end": self.variable_end,
414
+ "entropy": entropy,
413
415
  "value_leftquote": self.value_leftquote,
414
416
  "value_rightquote": self.value_rightquote,
415
- "entropy_validation": EntropyValidator(self.value).to_dict()
416
417
  }
417
418
  reported_output = {k: v for k, v in full_output.items() if k in self.config.line_data_output}
418
419
  return reported_output
@@ -1,5 +1,5 @@
1
1
  from abc import abstractmethod, ABC
2
- from typing import List
2
+ from typing import List, Optional
3
3
 
4
4
  from credsweeper.config import Config
5
5
  from credsweeper.credentials import Candidate
@@ -40,3 +40,12 @@ class AbstractScanner(ABC):
40
40
  recursive_limit_size: int) -> List[Candidate]:
41
41
  """Abstract method to be defined in DeepScanner"""
42
42
  raise NotImplementedError(__name__)
43
+
44
+ @abstractmethod
45
+ def data_scan(
46
+ self, #
47
+ data_provider: DataContentProvider, #
48
+ depth: int, #
49
+ recursive_limit_size: int) -> Optional[List[Candidate]]:
50
+ """Abstract method to be defined in DeepScanner"""
51
+ raise NotImplementedError(__name__)
@@ -0,0 +1,48 @@
1
+ import logging
2
+ from abc import ABC
3
+ from typing import List, Optional
4
+
5
+ from credsweeper.common.constants import ASCII, MIN_DATA_LEN
6
+ from credsweeper.credentials import Candidate
7
+ from credsweeper.deep_scanner.abstract_scanner import AbstractScanner
8
+ from credsweeper.file_handler.data_content_provider import DataContentProvider
9
+
10
+ logger = logging.getLogger(__name__)
11
+
12
+
13
+ class DebScanner(AbstractScanner, ABC):
14
+ """Implements deb (ar) scanning"""
15
+
16
+ def data_scan(
17
+ self, #
18
+ data_provider: DataContentProvider, #
19
+ depth: int, #
20
+ recursive_limit_size: int) -> Optional[List[Candidate]]:
21
+ """Extracts data file from .ar (debian) archive and launches data_scan"""
22
+ candidates: Optional[List[Candidate]] = None
23
+ offset = 8 # b"!<arch>\n"
24
+ while offset < len(data_provider.data):
25
+ try:
26
+ file_size_data = data_provider.data[offset + 48:offset + 58]
27
+ file_size = int(file_size_data.decode(ASCII))
28
+ offset += 60
29
+ if file_size < MIN_DATA_LEN:
30
+ offset += file_size
31
+ continue
32
+ data = data_provider.data[offset:offset + file_size]
33
+ deb_content_provider = DataContentProvider(data=data,
34
+ file_path=data_provider.file_path,
35
+ file_type=data_provider.file_type,
36
+ info=f"{data_provider.info}|DEB:0x{offset:x}")
37
+ new_limit = recursive_limit_size - file_size
38
+ deb_candidates = self.recursive_scan(deb_content_provider, depth, new_limit)
39
+ if deb_candidates is not None:
40
+ if candidates:
41
+ candidates.extend(deb_candidates)
42
+ else:
43
+ candidates = deb_candidates
44
+ # data padding = 2
45
+ offset += 1 + file_size if 1 & file_size else file_size
46
+ except Exception as exc:
47
+ logger.error(exc)
48
+ return candidates