credsweeper 1.11.3__tar.gz → 1.11.4__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of credsweeper might be problematic. Click here for more details.

Files changed (154) hide show
  1. {credsweeper-1.11.3 → credsweeper-1.11.4}/PKG-INFO +1 -1
  2. {credsweeper-1.11.3 → credsweeper-1.11.4}/credsweeper/__init__.py +1 -1
  3. {credsweeper-1.11.3 → credsweeper-1.11.4}/credsweeper/__main__.py +1 -1
  4. {credsweeper-1.11.3 → credsweeper-1.11.4}/credsweeper/app.py +21 -44
  5. {credsweeper-1.11.3 → credsweeper-1.11.4}/credsweeper/common/constants.py +2 -5
  6. {credsweeper-1.11.3 → credsweeper-1.11.4}/credsweeper/credentials/candidate_key.py +1 -1
  7. {credsweeper-1.11.3 → credsweeper-1.11.4}/credsweeper/credentials/credential_manager.py +4 -3
  8. {credsweeper-1.11.3 → credsweeper-1.11.4}/credsweeper/credentials/line_data.py +2 -5
  9. credsweeper-1.11.4/credsweeper/deep_scanner/deb_scanner.py +48 -0
  10. {credsweeper-1.11.3 → credsweeper-1.11.4}/credsweeper/deep_scanner/deep_scanner.py +47 -36
  11. {credsweeper-1.11.3 → credsweeper-1.11.4}/credsweeper/deep_scanner/gzip_scanner.py +1 -1
  12. {credsweeper-1.11.3 → credsweeper-1.11.4}/credsweeper/file_handler/byte_content_provider.py +2 -2
  13. {credsweeper-1.11.3 → credsweeper-1.11.4}/credsweeper/file_handler/content_provider.py +1 -1
  14. {credsweeper-1.11.3 → credsweeper-1.11.4}/credsweeper/file_handler/data_content_provider.py +2 -2
  15. {credsweeper-1.11.3 → credsweeper-1.11.4}/credsweeper/file_handler/diff_content_provider.py +2 -2
  16. {credsweeper-1.11.3 → credsweeper-1.11.4}/credsweeper/file_handler/file_path_extractor.py +1 -1
  17. {credsweeper-1.11.3 → credsweeper-1.11.4}/credsweeper/file_handler/files_provider.py +2 -4
  18. {credsweeper-1.11.3 → credsweeper-1.11.4}/credsweeper/file_handler/patches_provider.py +1 -1
  19. {credsweeper-1.11.3 → credsweeper-1.11.4}/credsweeper/file_handler/string_content_provider.py +2 -2
  20. {credsweeper-1.11.3 → credsweeper-1.11.4}/credsweeper/file_handler/struct_content_provider.py +1 -1
  21. {credsweeper-1.11.3 → credsweeper-1.11.4}/credsweeper/file_handler/text_content_provider.py +2 -2
  22. {credsweeper-1.11.3 → credsweeper-1.11.4}/credsweeper/filters/value_base64_encoded_pem_check.py +1 -1
  23. {credsweeper-1.11.3 → credsweeper-1.11.4}/credsweeper/filters/value_entropy_base64_check.py +2 -6
  24. {credsweeper-1.11.3 → credsweeper-1.11.4}/credsweeper/filters/value_pattern_check.py +64 -16
  25. {credsweeper-1.11.3 → credsweeper-1.11.4}/credsweeper/ml_model/features/file_extension.py +1 -1
  26. {credsweeper-1.11.3 → credsweeper-1.11.4}/credsweeper/ml_model/ml_validator.py +43 -21
  27. {credsweeper-1.11.3 → credsweeper-1.11.4}/credsweeper/rules/config.yaml +3 -3
  28. {credsweeper-1.11.3 → credsweeper-1.11.4}/credsweeper/rules/rule.py +3 -3
  29. {credsweeper-1.11.3 → credsweeper-1.11.4}/credsweeper/utils/hop_stat.py +3 -3
  30. {credsweeper-1.11.3 → credsweeper-1.11.4}/credsweeper/utils/pem_key_detector.py +5 -3
  31. {credsweeper-1.11.3 → credsweeper-1.11.4}/credsweeper/utils/util.py +13 -6
  32. {credsweeper-1.11.3 → credsweeper-1.11.4}/.gitignore +0 -0
  33. {credsweeper-1.11.3 → credsweeper-1.11.4}/LICENSE +0 -0
  34. {credsweeper-1.11.3 → credsweeper-1.11.4}/README.md +0 -0
  35. {credsweeper-1.11.3 → credsweeper-1.11.4}/credsweeper/common/__init__.py +0 -0
  36. {credsweeper-1.11.3 → credsweeper-1.11.4}/credsweeper/common/keyword_checklist.py +0 -0
  37. {credsweeper-1.11.3 → credsweeper-1.11.4}/credsweeper/common/keyword_checklist.txt +0 -0
  38. {credsweeper-1.11.3 → credsweeper-1.11.4}/credsweeper/common/keyword_pattern.py +0 -0
  39. {credsweeper-1.11.3 → credsweeper-1.11.4}/credsweeper/common/morpheme_checklist.txt +0 -0
  40. {credsweeper-1.11.3 → credsweeper-1.11.4}/credsweeper/config/__init__.py +0 -0
  41. {credsweeper-1.11.3 → credsweeper-1.11.4}/credsweeper/config/config.py +0 -0
  42. {credsweeper-1.11.3 → credsweeper-1.11.4}/credsweeper/credentials/__init__.py +0 -0
  43. {credsweeper-1.11.3 → credsweeper-1.11.4}/credsweeper/credentials/augment_candidates.py +0 -0
  44. {credsweeper-1.11.3 → credsweeper-1.11.4}/credsweeper/credentials/candidate.py +0 -0
  45. {credsweeper-1.11.3 → credsweeper-1.11.4}/credsweeper/credentials/candidate_group_generator.py +0 -0
  46. {credsweeper-1.11.3 → credsweeper-1.11.4}/credsweeper/deep_scanner/__init__.py +0 -0
  47. {credsweeper-1.11.3 → credsweeper-1.11.4}/credsweeper/deep_scanner/abstract_scanner.py +0 -0
  48. {credsweeper-1.11.3 → credsweeper-1.11.4}/credsweeper/deep_scanner/byte_scanner.py +0 -0
  49. {credsweeper-1.11.3 → credsweeper-1.11.4}/credsweeper/deep_scanner/bzip2_scanner.py +0 -0
  50. {credsweeper-1.11.3 → credsweeper-1.11.4}/credsweeper/deep_scanner/docx_scanner.py +0 -0
  51. {credsweeper-1.11.3 → credsweeper-1.11.4}/credsweeper/deep_scanner/eml_scanner.py +0 -0
  52. {credsweeper-1.11.3 → credsweeper-1.11.4}/credsweeper/deep_scanner/encoder_scanner.py +0 -0
  53. {credsweeper-1.11.3 → credsweeper-1.11.4}/credsweeper/deep_scanner/html_scanner.py +0 -0
  54. {credsweeper-1.11.3 → credsweeper-1.11.4}/credsweeper/deep_scanner/jks_scanner.py +0 -0
  55. {credsweeper-1.11.3 → credsweeper-1.11.4}/credsweeper/deep_scanner/lang_scanner.py +0 -0
  56. {credsweeper-1.11.3 → credsweeper-1.11.4}/credsweeper/deep_scanner/lzma_scanner.py +0 -0
  57. {credsweeper-1.11.3 → credsweeper-1.11.4}/credsweeper/deep_scanner/mxfile_scanner.py +0 -0
  58. {credsweeper-1.11.3 → credsweeper-1.11.4}/credsweeper/deep_scanner/pdf_scanner.py +0 -0
  59. {credsweeper-1.11.3 → credsweeper-1.11.4}/credsweeper/deep_scanner/pkcs12_scanner.py +0 -0
  60. {credsweeper-1.11.3 → credsweeper-1.11.4}/credsweeper/deep_scanner/pptx_scanner.py +0 -0
  61. {credsweeper-1.11.3 → credsweeper-1.11.4}/credsweeper/deep_scanner/tar_scanner.py +0 -0
  62. {credsweeper-1.11.3 → credsweeper-1.11.4}/credsweeper/deep_scanner/tmx_scanner.py +0 -0
  63. {credsweeper-1.11.3 → credsweeper-1.11.4}/credsweeper/deep_scanner/xlsx_scanner.py +0 -0
  64. {credsweeper-1.11.3 → credsweeper-1.11.4}/credsweeper/deep_scanner/xml_scanner.py +0 -0
  65. {credsweeper-1.11.3 → credsweeper-1.11.4}/credsweeper/deep_scanner/zip_scanner.py +0 -0
  66. {credsweeper-1.11.3 → credsweeper-1.11.4}/credsweeper/file_handler/__init__.py +0 -0
  67. {credsweeper-1.11.3 → credsweeper-1.11.4}/credsweeper/file_handler/abstract_provider.py +0 -0
  68. {credsweeper-1.11.3 → credsweeper-1.11.4}/credsweeper/file_handler/analysis_target.py +0 -0
  69. {credsweeper-1.11.3 → credsweeper-1.11.4}/credsweeper/file_handler/descriptor.py +0 -0
  70. {credsweeper-1.11.3 → credsweeper-1.11.4}/credsweeper/filters/__init__.py +0 -0
  71. {credsweeper-1.11.3 → credsweeper-1.11.4}/credsweeper/filters/filter.py +0 -0
  72. {credsweeper-1.11.3 → credsweeper-1.11.4}/credsweeper/filters/group/__init__.py +0 -0
  73. {credsweeper-1.11.3 → credsweeper-1.11.4}/credsweeper/filters/group/general_keyword.py +0 -0
  74. {credsweeper-1.11.3 → credsweeper-1.11.4}/credsweeper/filters/group/general_pattern.py +0 -0
  75. {credsweeper-1.11.3 → credsweeper-1.11.4}/credsweeper/filters/group/group.py +0 -0
  76. {credsweeper-1.11.3 → credsweeper-1.11.4}/credsweeper/filters/group/password_keyword.py +0 -0
  77. {credsweeper-1.11.3 → credsweeper-1.11.4}/credsweeper/filters/group/token_pattern.py +0 -0
  78. {credsweeper-1.11.3 → credsweeper-1.11.4}/credsweeper/filters/group/url_credentials_group.py +0 -0
  79. {credsweeper-1.11.3 → credsweeper-1.11.4}/credsweeper/filters/group/weird_base36_token.py +0 -0
  80. {credsweeper-1.11.3 → credsweeper-1.11.4}/credsweeper/filters/group/weird_base64_token.py +0 -0
  81. {credsweeper-1.11.3 → credsweeper-1.11.4}/credsweeper/filters/line_git_binary_check.py +0 -0
  82. {credsweeper-1.11.3 → credsweeper-1.11.4}/credsweeper/filters/line_specific_key_check.py +0 -0
  83. {credsweeper-1.11.3 → credsweeper-1.11.4}/credsweeper/filters/line_uue_part_check.py +0 -0
  84. {credsweeper-1.11.3 → credsweeper-1.11.4}/credsweeper/filters/value_allowlist_check.py +0 -0
  85. {credsweeper-1.11.3 → credsweeper-1.11.4}/credsweeper/filters/value_array_dictionary_check.py +0 -0
  86. {credsweeper-1.11.3 → credsweeper-1.11.4}/credsweeper/filters/value_atlassian_token_check.py +0 -0
  87. {credsweeper-1.11.3 → credsweeper-1.11.4}/credsweeper/filters/value_azure_token_check.py +0 -0
  88. {credsweeper-1.11.3 → credsweeper-1.11.4}/credsweeper/filters/value_base32_data_check.py +0 -0
  89. {credsweeper-1.11.3 → credsweeper-1.11.4}/credsweeper/filters/value_base64_data_check.py +0 -0
  90. {credsweeper-1.11.3 → credsweeper-1.11.4}/credsweeper/filters/value_base64_key_check.py +0 -0
  91. {credsweeper-1.11.3 → credsweeper-1.11.4}/credsweeper/filters/value_base64_part_check.py +0 -0
  92. {credsweeper-1.11.3 → credsweeper-1.11.4}/credsweeper/filters/value_blocklist_check.py +0 -0
  93. {credsweeper-1.11.3 → credsweeper-1.11.4}/credsweeper/filters/value_camel_case_check.py +0 -0
  94. {credsweeper-1.11.3 → credsweeper-1.11.4}/credsweeper/filters/value_couple_keyword_check.py +0 -0
  95. {credsweeper-1.11.3 → credsweeper-1.11.4}/credsweeper/filters/value_dictionary_keyword_check.py +0 -0
  96. {credsweeper-1.11.3 → credsweeper-1.11.4}/credsweeper/filters/value_dictionary_value_length_check.py +0 -0
  97. {credsweeper-1.11.3 → credsweeper-1.11.4}/credsweeper/filters/value_discord_bot_check.py +0 -0
  98. {credsweeper-1.11.3 → credsweeper-1.11.4}/credsweeper/filters/value_entropy_base32_check.py +0 -0
  99. {credsweeper-1.11.3 → credsweeper-1.11.4}/credsweeper/filters/value_entropy_base36_check.py +0 -0
  100. {credsweeper-1.11.3 → credsweeper-1.11.4}/credsweeper/filters/value_entropy_base_check.py +0 -0
  101. {credsweeper-1.11.3 → credsweeper-1.11.4}/credsweeper/filters/value_file_path_check.py +0 -0
  102. {credsweeper-1.11.3 → credsweeper-1.11.4}/credsweeper/filters/value_github_check.py +0 -0
  103. {credsweeper-1.11.3 → credsweeper-1.11.4}/credsweeper/filters/value_grafana_check.py +0 -0
  104. {credsweeper-1.11.3 → credsweeper-1.11.4}/credsweeper/filters/value_grafana_service_check.py +0 -0
  105. {credsweeper-1.11.3 → credsweeper-1.11.4}/credsweeper/filters/value_hex_number_check.py +0 -0
  106. {credsweeper-1.11.3 → credsweeper-1.11.4}/credsweeper/filters/value_jfrog_token_check.py +0 -0
  107. {credsweeper-1.11.3 → credsweeper-1.11.4}/credsweeper/filters/value_json_web_token_check.py +0 -0
  108. {credsweeper-1.11.3 → credsweeper-1.11.4}/credsweeper/filters/value_last_word_check.py +0 -0
  109. {credsweeper-1.11.3 → credsweeper-1.11.4}/credsweeper/filters/value_method_check.py +0 -0
  110. {credsweeper-1.11.3 → credsweeper-1.11.4}/credsweeper/filters/value_not_allowed_pattern_check.py +0 -0
  111. {credsweeper-1.11.3 → credsweeper-1.11.4}/credsweeper/filters/value_not_part_encoded_check.py +0 -0
  112. {credsweeper-1.11.3 → credsweeper-1.11.4}/credsweeper/filters/value_number_check.py +0 -0
  113. {credsweeper-1.11.3 → credsweeper-1.11.4}/credsweeper/filters/value_similarity_check.py +0 -0
  114. {credsweeper-1.11.3 → credsweeper-1.11.4}/credsweeper/filters/value_split_keyword_check.py +0 -0
  115. {credsweeper-1.11.3 → credsweeper-1.11.4}/credsweeper/filters/value_string_type_check.py +0 -0
  116. {credsweeper-1.11.3 → credsweeper-1.11.4}/credsweeper/filters/value_token_base32_check.py +0 -0
  117. {credsweeper-1.11.3 → credsweeper-1.11.4}/credsweeper/filters/value_token_base36_check.py +0 -0
  118. {credsweeper-1.11.3 → credsweeper-1.11.4}/credsweeper/filters/value_token_base64_check.py +0 -0
  119. {credsweeper-1.11.3 → credsweeper-1.11.4}/credsweeper/filters/value_token_base_check.py +0 -0
  120. {credsweeper-1.11.3 → credsweeper-1.11.4}/credsweeper/filters/value_token_check.py +0 -0
  121. {credsweeper-1.11.3 → credsweeper-1.11.4}/credsweeper/logger/__init__.py +0 -0
  122. {credsweeper-1.11.3 → credsweeper-1.11.4}/credsweeper/logger/logger.py +0 -0
  123. {credsweeper-1.11.3 → credsweeper-1.11.4}/credsweeper/ml_model/__init__.py +0 -0
  124. {credsweeper-1.11.3 → credsweeper-1.11.4}/credsweeper/ml_model/features/__init__.py +0 -0
  125. {credsweeper-1.11.3 → credsweeper-1.11.4}/credsweeper/ml_model/features/entropy_evaluation.py +0 -0
  126. {credsweeper-1.11.3 → credsweeper-1.11.4}/credsweeper/ml_model/features/feature.py +0 -0
  127. {credsweeper-1.11.3 → credsweeper-1.11.4}/credsweeper/ml_model/features/has_html_tag.py +0 -0
  128. {credsweeper-1.11.3 → credsweeper-1.11.4}/credsweeper/ml_model/features/is_secret_numeric.py +0 -0
  129. {credsweeper-1.11.3 → credsweeper-1.11.4}/credsweeper/ml_model/features/length_of_attribute.py +0 -0
  130. {credsweeper-1.11.3 → credsweeper-1.11.4}/credsweeper/ml_model/features/morpheme_dense.py +0 -0
  131. {credsweeper-1.11.3 → credsweeper-1.11.4}/credsweeper/ml_model/features/rule_name.py +0 -0
  132. {credsweeper-1.11.3 → credsweeper-1.11.4}/credsweeper/ml_model/features/search_in_attribute.py +0 -0
  133. {credsweeper-1.11.3 → credsweeper-1.11.4}/credsweeper/ml_model/features/word_in.py +0 -0
  134. {credsweeper-1.11.3 → credsweeper-1.11.4}/credsweeper/ml_model/features/word_in_path.py +0 -0
  135. {credsweeper-1.11.3 → credsweeper-1.11.4}/credsweeper/ml_model/features/word_in_postamble.py +0 -0
  136. {credsweeper-1.11.3 → credsweeper-1.11.4}/credsweeper/ml_model/features/word_in_preamble.py +0 -0
  137. {credsweeper-1.11.3 → credsweeper-1.11.4}/credsweeper/ml_model/features/word_in_transition.py +0 -0
  138. {credsweeper-1.11.3 → credsweeper-1.11.4}/credsweeper/ml_model/features/word_in_value.py +0 -0
  139. {credsweeper-1.11.3 → credsweeper-1.11.4}/credsweeper/ml_model/features/word_in_variable.py +0 -0
  140. {credsweeper-1.11.3 → credsweeper-1.11.4}/credsweeper/ml_model/ml_config.json +0 -0
  141. {credsweeper-1.11.3 → credsweeper-1.11.4}/credsweeper/ml_model/ml_model.onnx +0 -0
  142. {credsweeper-1.11.3 → credsweeper-1.11.4}/credsweeper/py.typed +0 -0
  143. {credsweeper-1.11.3 → credsweeper-1.11.4}/credsweeper/rules/__init__.py +0 -0
  144. {credsweeper-1.11.3 → credsweeper-1.11.4}/credsweeper/scanner/__init__.py +0 -0
  145. {credsweeper-1.11.3 → credsweeper-1.11.4}/credsweeper/scanner/scan_type/__init__.py +0 -0
  146. {credsweeper-1.11.3 → credsweeper-1.11.4}/credsweeper/scanner/scan_type/multi_pattern.py +0 -0
  147. {credsweeper-1.11.3 → credsweeper-1.11.4}/credsweeper/scanner/scan_type/pem_key_pattern.py +0 -0
  148. {credsweeper-1.11.3 → credsweeper-1.11.4}/credsweeper/scanner/scan_type/scan_type.py +0 -0
  149. {credsweeper-1.11.3 → credsweeper-1.11.4}/credsweeper/scanner/scan_type/single_pattern.py +0 -0
  150. {credsweeper-1.11.3 → credsweeper-1.11.4}/credsweeper/scanner/scanner.py +0 -0
  151. {credsweeper-1.11.3 → credsweeper-1.11.4}/credsweeper/secret/config.json +0 -0
  152. {credsweeper-1.11.3 → credsweeper-1.11.4}/credsweeper/secret/log.yaml +0 -0
  153. {credsweeper-1.11.3 → credsweeper-1.11.4}/credsweeper/utils/__init__.py +0 -0
  154. {credsweeper-1.11.3 → credsweeper-1.11.4}/pyproject.toml +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: credsweeper
3
- Version: 1.11.3
3
+ Version: 1.11.4
4
4
  Summary: Credential Sweeper
5
5
  Project-URL: Homepage, https://github.com/Samsung/CredSweeper
6
6
  Project-URL: Bug Tracker, https://github.com/Samsung/CredSweeper/issues
@@ -18,4 +18,4 @@ __all__ = [
18
18
  '__version__'
19
19
  ]
20
20
 
21
- __version__ = "1.11.3"
21
+ __version__ = "1.11.4"
@@ -63,7 +63,7 @@ def logger_levels(log_level: str) -> str:
63
63
  Returns True if log_level UPPERCASE is one of keys
64
64
  """
65
65
  val = log_level.upper()
66
- if any(val == i for i in Logger.LEVELS.keys()):
66
+ if val in Logger.LEVELS:
67
67
  return val
68
68
  raise ArgumentTypeError(f"Log level provided: {log_level} -- must be one of: {' | '.join(Logger.LEVELS.keys())}")
69
69
 
@@ -15,11 +15,13 @@ from credsweeper.common.constants import Severity, ThresholdPreset, DiffRowType,
15
15
  from credsweeper.config import Config
16
16
  from credsweeper.credentials import Candidate, CredentialManager, CandidateKey
17
17
  from credsweeper.deep_scanner.deep_scanner import DeepScanner
18
+ from credsweeper.file_handler.content_provider import ContentProvider
18
19
  from credsweeper.file_handler.diff_content_provider import DiffContentProvider
19
20
  from credsweeper.file_handler.file_path_extractor import FilePathExtractor
20
21
  from credsweeper.file_handler.abstract_provider import AbstractProvider
21
22
  from credsweeper.file_handler.text_content_provider import TextContentProvider
22
23
  from credsweeper.scanner import Scanner
24
+ from credsweeper.ml_model.ml_validator import MlValidator
23
25
  from credsweeper.utils import Util
24
26
 
25
27
  logger = logging.getLogger(__name__)
@@ -94,7 +96,7 @@ class CredSweeper:
94
96
  log_level: str - level for pool initializer according logging levels (UPPERCASE)
95
97
 
96
98
  """
97
- self.pool_count: int = int(pool_count) if int(pool_count) > 1 else 1
99
+ self.pool_count: int = max(1, int(pool_count))
98
100
  if not (_severity := Severity.get(severity)):
99
101
  raise RuntimeError(f"Severity level provided: {severity}"
100
102
  f" -- must be one of: {' | '.join([i.value for i in Severity])}")
@@ -123,9 +125,9 @@ class CredSweeper:
123
125
  self.ml_config = ml_config
124
126
  self.ml_model = ml_model
125
127
  self.ml_providers = ml_providers
126
- self.ml_validator = None
127
128
  self.__thrifty = thrifty
128
129
  self.__log_level = log_level
130
+ self.__ml_validator: Optional[MlValidator] = None
129
131
 
130
132
  # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
131
133
 
@@ -182,35 +184,22 @@ class CredSweeper:
182
184
 
183
185
  # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
184
186
 
185
- # the import cannot be done on top due
186
- # TypeError: cannot pickle 'onnxruntime.capi.onnxruntime_pybind11_state.InferenceSession' object
187
- from credsweeper.ml_model import MlValidator
188
-
189
- # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
190
-
191
187
  @property
192
188
  def ml_validator(self) -> MlValidator:
193
189
  """ml_validator getter"""
194
- from credsweeper.ml_model import MlValidator
195
190
  if not self.__ml_validator:
196
- self.__ml_validator: MlValidator = MlValidator(
191
+ self.__ml_validator = MlValidator(
197
192
  threshold=self.ml_threshold, #
198
193
  ml_config=self.ml_config, #
199
194
  ml_model=self.ml_model, #
200
195
  ml_providers=self.ml_providers, #
201
196
  )
202
- assert self.__ml_validator, "self.__ml_validator was not initialized"
197
+ if not self.__ml_validator:
198
+ raise RuntimeError("MlValidator was not initialized!")
203
199
  return self.__ml_validator
204
200
 
205
201
  # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
206
202
 
207
- @ml_validator.setter
208
- def ml_validator(self, _ml_validator: Optional[MlValidator]) -> None:
209
- """ml_validator setter"""
210
- self.__ml_validator = _ml_validator
211
-
212
- # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
213
-
214
203
  @staticmethod
215
204
  def pool_initializer(log_kwargs) -> None:
216
205
  """Ignore SIGINT in child processes."""
@@ -219,20 +208,6 @@ class CredSweeper:
219
208
 
220
209
  # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
221
210
 
222
- @property
223
- def config(self) -> Config:
224
- """config getter"""
225
- return self.__config
226
-
227
- # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
228
-
229
- @config.setter
230
- def config(self, config: Config) -> None:
231
- """config setter"""
232
- self.__config = config
233
-
234
- # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
235
-
236
211
  def run(self, content_provider: AbstractProvider) -> int:
237
212
  """Run an analysis of 'content_provider' object.
238
213
 
@@ -241,9 +216,10 @@ class CredSweeper:
241
216
 
242
217
  """
243
218
  _empty_list: Sequence[Union[DiffContentProvider, TextContentProvider]] = []
244
- file_extractors: Sequence[Union[DiffContentProvider, TextContentProvider]] = \
245
- content_provider.get_scannable_files(self.config) if content_provider else _empty_list
246
- logger.info(f"Start Scanner for {len(file_extractors)} providers")
219
+ file_extractors = content_provider.get_scannable_files(self.config) if content_provider else _empty_list
220
+ if not file_extractors:
221
+ logger.info(f"No scannable targets for {len(content_provider.paths)} paths")
222
+ return 0
247
223
  self.scan(file_extractors)
248
224
  self.post_processing()
249
225
  # PatchesProvider has the attribute. Circular import error appears with using the isinstance
@@ -260,7 +236,7 @@ class CredSweeper:
260
236
  content_providers: file objects to scan
261
237
 
262
238
  """
263
- if 1 < self.pool_count:
239
+ if 1 < self.pool_count and 1 < len(content_providers):
264
240
  self.__multi_jobs_scan(content_providers)
265
241
  else:
266
242
  self.__single_job_scan(content_providers)
@@ -269,6 +245,7 @@ class CredSweeper:
269
245
 
270
246
  def __single_job_scan(self, content_providers: Sequence[Union[DiffContentProvider, TextContentProvider]]) -> None:
271
247
  """Performs scan in main thread"""
248
+ logger.info(f"Scan for {len(content_providers)} providers")
272
249
  all_cred = self.files_scan(content_providers)
273
250
  self.credential_manager.set_credentials(all_cred)
274
251
 
@@ -284,12 +261,14 @@ class CredSweeper:
284
261
  if "SILENCE" == self.__log_level:
285
262
  logging.addLevelName(60, "SILENCE")
286
263
  log_kwargs["level"] = self.__log_level
287
- with multiprocessing.get_context("spawn").Pool(processes=self.pool_count,
288
- initializer=self.pool_initializer,
264
+ pool_count = min(self.pool_count, len(content_providers))
265
+ logger.info(f"Scan in {pool_count} processes for {len(content_providers)} providers")
266
+ with multiprocessing.get_context("spawn").Pool(processes=pool_count,
267
+ initializer=CredSweeper.pool_initializer,
289
268
  initargs=(log_kwargs, )) as pool:
290
269
  try:
291
- for scan_results in pool.imap_unordered(self.files_scan, (content_providers[x::self.pool_count]
292
- for x in range(self.pool_count))):
270
+ for scan_results in pool.imap_unordered(self.files_scan,
271
+ (content_providers[x::pool_count] for x in range(pool_count))):
293
272
  for cred in scan_results:
294
273
  self.credential_manager.add_credential(cred)
295
274
  except KeyboardInterrupt:
@@ -301,9 +280,7 @@ class CredSweeper:
301
280
 
302
281
  # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
303
282
 
304
- def files_scan(
305
- self, #
306
- content_providers: Sequence[Union[DiffContentProvider, TextContentProvider]]) -> List[Candidate]:
283
+ def files_scan(self, content_providers: Sequence[ContentProvider]) -> List[Candidate]:
307
284
  """Auxiliary method for scan one sequence"""
308
285
  all_cred: List[Candidate] = []
309
286
  for provider in content_providers:
@@ -316,7 +293,7 @@ class CredSweeper:
316
293
 
317
294
  # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
318
295
 
319
- def file_scan(self, content_provider: Union[DiffContentProvider, TextContentProvider]) -> List[Candidate]:
296
+ def file_scan(self, content_provider: ContentProvider) -> List[Candidate]:
320
297
  """Run scanning of file from 'file_provider'.
321
298
 
322
299
  Args:
@@ -96,10 +96,6 @@ class Chars(Enum):
96
96
  ASCII_PRINTABLE = string.printable
97
97
 
98
98
 
99
- ENTROPY_LIMIT_BASE64 = 4.5
100
- ENTROPY_LIMIT_BASE3x = 3
101
-
102
-
103
99
  class GroupType(Enum):
104
100
  """Group type - used in Group constructor for load predefined set of filters"""
105
101
  KEYWORD = "keyword"
@@ -148,7 +144,8 @@ OVERLAP_SIZE = 1000
148
144
  CHUNK_STEP_SIZE = CHUNK_SIZE - OVERLAP_SIZE
149
145
  # ML hunk size to limit of variable or value size and get substring near value
150
146
  ML_HUNK = 80
151
- """ values according https://docs.python.org/3/library/codecs.html """
147
+
148
+ # values according https://docs.python.org/3/library/codecs.html
152
149
  UTF_8 = "utf_8"
153
150
  UTF_16 = "utf_16"
154
151
  LATIN_1 = "latin_1"
@@ -24,7 +24,7 @@ class CandidateKey:
24
24
  return self.key == other.key
25
25
 
26
26
  def __ne__(self, other):
27
- return not (self == other)
27
+ return not bool(self == other)
28
28
 
29
29
  def __repr__(self) -> str:
30
30
  return f"{self.key}:{self.__line}"
@@ -110,7 +110,8 @@ class CredentialManager:
110
110
  # Match by file path+line num+value. Value required so two different credentials still be
111
111
  # processed independently
112
112
  candidate_key = CandidateKey(line_data)
113
- if candidate_key not in groups:
114
- groups[candidate_key] = list()
115
- groups[candidate_key].append(credential_candidate)
113
+ if candidate_key in groups:
114
+ groups[candidate_key].append(credential_candidate)
115
+ else:
116
+ groups[candidate_key] = [credential_candidate]
116
117
  return groups
@@ -327,11 +327,8 @@ class LineData:
327
327
  True if file require quotation, False otherwise
328
328
 
329
329
  """
330
- if not self.path:
331
- return False
332
- if Util.get_extension(self.path) in self.config.source_quote_ext:
333
- return True
334
- return False
330
+ file_type = self.file_type or Util.get_extension(self.path)
331
+ return bool(file_type) and file_type in self.config.source_quote_ext
335
332
 
336
333
  @staticmethod
337
334
  def get_hash_or_subtext(
@@ -0,0 +1,48 @@
1
+ import logging
2
+ from abc import ABC
3
+ from typing import List, Optional
4
+
5
+ from credsweeper.common.constants import ASCII, MIN_DATA_LEN
6
+ from credsweeper.credentials import Candidate
7
+ from credsweeper.deep_scanner.abstract_scanner import AbstractScanner
8
+ from credsweeper.file_handler.data_content_provider import DataContentProvider
9
+
10
+ logger = logging.getLogger(__name__)
11
+
12
+
13
+ class DebScanner(AbstractScanner, ABC):
14
+ """Implements deb (ar) scanning"""
15
+
16
+ def data_scan(
17
+ self, #
18
+ data_provider: DataContentProvider, #
19
+ depth: int, #
20
+ recursive_limit_size: int) -> Optional[List[Candidate]]:
21
+ """Extracts data file from .ar (debian) archive and launches data_scan"""
22
+ candidates: Optional[List[Candidate]] = None
23
+ offset = 8 # b"!<arch>\n"
24
+ while offset < len(data_provider.data):
25
+ try:
26
+ file_size_data = data_provider.data[offset + 48:offset + 58]
27
+ file_size = int(file_size_data.decode(ASCII))
28
+ offset += 60
29
+ if file_size < MIN_DATA_LEN:
30
+ offset += file_size
31
+ continue
32
+ data = data_provider.data[offset:offset + file_size]
33
+ deb_content_provider = DataContentProvider(data=data,
34
+ file_path=data_provider.file_path,
35
+ file_type=data_provider.file_type,
36
+ info=f"{data_provider.info}|DEB:0x{offset:x}")
37
+ new_limit = recursive_limit_size - file_size
38
+ deb_candidates = self.recursive_scan(deb_content_provider, depth, new_limit)
39
+ if deb_candidates is not None:
40
+ if candidates:
41
+ candidates.extend(deb_candidates)
42
+ else:
43
+ candidates = deb_candidates
44
+ # data padding = 2
45
+ offset += 1 + file_size if 1 & file_size else file_size
46
+ except Exception as exc:
47
+ logger.error(exc)
48
+ return candidates
@@ -1,8 +1,9 @@
1
+ import contextlib
1
2
  import datetime
2
3
  import logging
3
4
  from typing import List, Optional, Any, Tuple, Union
4
5
 
5
- from credsweeper.common.constants import RECURSIVE_SCAN_LIMITATION, MIN_DATA_LEN
6
+ from credsweeper.common.constants import RECURSIVE_SCAN_LIMITATION, MIN_DATA_LEN, MIN_VALUE_LENGTH
6
7
  from credsweeper.config import Config
7
8
  from credsweeper.credentials import Candidate
8
9
  from credsweeper.credentials.augment_candidates import augment_candidates
@@ -16,6 +17,7 @@ from credsweeper.scanner import Scanner
16
17
  from credsweeper.utils import Util
17
18
  from .byte_scanner import ByteScanner
18
19
  from .bzip2_scanner import Bzip2Scanner
20
+ from .deb_scanner import DebScanner
19
21
  from .docx_scanner import DocxScanner
20
22
  from .eml_scanner import EmlScanner
21
23
  from .encoder_scanner import EncoderScanner
@@ -54,6 +56,7 @@ class DeepScanner(
54
56
  Pkcs12Scanner, #
55
57
  PptxScanner, #
56
58
  TarScanner, #
59
+ DebScanner, #
57
60
  XmlScanner, #
58
61
  XlsxScanner, #
59
62
  ZipScanner
@@ -114,6 +117,9 @@ class DeepScanner(
114
117
  elif Util.is_tar(data):
115
118
  if 0 < depth:
116
119
  deep_scanners.append(TarScanner)
120
+ elif Util.is_deb(data):
121
+ if 0 < depth:
122
+ deep_scanners.append(DebScanner)
117
123
  elif Util.is_gzip(data):
118
124
  if 0 < depth:
119
125
  deep_scanners.append(GzipScanner)
@@ -209,10 +215,10 @@ class DeepScanner(
209
215
  int) else RECURSIVE_SCAN_LIMITATION
210
216
  candidates: List[Candidate] = []
211
217
  data: Optional[bytes] = None
212
- if isinstance(content_provider, TextContentProvider) or isinstance(content_provider, ByteContentProvider):
218
+ if isinstance(content_provider, (TextContentProvider, ByteContentProvider)):
213
219
  # Feature to scan files which might be containers
214
220
  data = content_provider.data
215
- info = "FILE"
221
+ info = f"FILE:{content_provider.file_path}"
216
222
  elif isinstance(content_provider, DiffContentProvider) and content_provider.diff:
217
223
  candidates = self.scanner.scan(content_provider)
218
224
  # Feature to scan binary diffs
@@ -220,7 +226,7 @@ class DeepScanner(
220
226
  # the check for legal fix mypy issue
221
227
  if isinstance(diff, bytes):
222
228
  data = diff
223
- info = "DIFF"
229
+ info = f"DIFF:{content_provider.file_path}"
224
230
  else:
225
231
  logger.warning(f"Content provider {type(content_provider)} does not support deep scan")
226
232
  info = "NA"
@@ -298,7 +304,7 @@ class DeepScanner(
298
304
  items: List[Tuple[Union[int, str], Any]] = []
299
305
  struct_key: Optional[str] = None
300
306
  struct_value: Optional[str] = None
301
- line_for_keyword_rules = ""
307
+ lines_for_keyword_rules = []
302
308
  if isinstance(struct_provider.struct, dict):
303
309
  for key, value in struct_provider.struct.items():
304
310
  if isinstance(value, (list, tuple)) and 1 == len(value):
@@ -309,13 +315,13 @@ class DeepScanner(
309
315
  # for transformation {"key": "api_key", "value": "XXXXXXX"} -> {"api_key": "XXXXXXX"}
310
316
  struct_key = struct_provider.struct.get("key")
311
317
  struct_value = struct_provider.struct.get("value")
312
- elif isinstance(struct_provider.struct, list) or isinstance(struct_provider.struct, tuple):
318
+ elif isinstance(struct_provider.struct, (list, tuple)):
313
319
  items = list(enumerate(struct_provider.struct))
314
320
  else:
315
321
  logger.error("Not supported type:%s val:%s", str(type(struct_provider.struct)), str(struct_provider.struct))
316
322
 
317
323
  for key, value in items:
318
- if isinstance(value, dict) or isinstance(value, (list, tuple)) and 1 < len(value):
324
+ if isinstance(value, dict) or isinstance(value, (list, tuple)) and 1 <= len(value):
319
325
  val_struct_provider = StructContentProvider(struct=value,
320
326
  file_path=struct_provider.file_path,
321
327
  file_type=struct_provider.file_type,
@@ -324,52 +330,57 @@ class DeepScanner(
324
330
  candidates.extend(new_candidates)
325
331
 
326
332
  elif isinstance(value, bytes):
327
- bytes_struct_provider = DataContentProvider(data=value,
328
- file_path=struct_provider.file_path,
329
- file_type=struct_provider.file_type,
330
- info=f"{struct_provider.info}|BYTES:{key}")
331
- new_limit = recursive_limit_size - len(value)
332
- new_candidates = self.recursive_scan(bytes_struct_provider, depth, new_limit)
333
- candidates.extend(new_candidates)
333
+ if MIN_DATA_LEN <= len(value):
334
+ bytes_struct_provider = DataContentProvider(data=value,
335
+ file_path=struct_provider.file_path,
336
+ file_type=struct_provider.file_type,
337
+ info=f"{struct_provider.info}|BYTES:{key}")
338
+ new_limit = recursive_limit_size - len(value)
339
+ new_candidates = self.recursive_scan(bytes_struct_provider, depth, new_limit)
340
+ candidates.extend(new_candidates)
341
+ if MIN_VALUE_LENGTH <= len(value) and isinstance(key, str) \
342
+ and self.scanner.keywords_required_substrings_check(key.lower()):
343
+ str_val = str(value)
344
+ lines_for_keyword_rules.append(f"{key} = '{str_val}'" if '"' in str_val else f'{key} = "{str_val}"')
334
345
 
335
346
  elif isinstance(value, str):
336
- data = value.encode(encoding=DEFAULT_ENCODING, errors='replace')
337
- str_struct_provider = DataContentProvider(data=data,
338
- file_path=struct_provider.file_path,
339
- file_type=struct_provider.file_type,
340
- info=f"{struct_provider.info}|STRING:{key}")
341
- new_limit = recursive_limit_size - len(str_struct_provider.data)
342
- new_candidates = self.recursive_scan(str_struct_provider, depth, new_limit)
343
- candidates.extend(new_candidates)
344
-
347
+ if MIN_DATA_LEN <= len(value):
348
+ # recursive scan only for data which may be decoded at least
349
+ with contextlib.suppress(UnicodeError):
350
+ data = value.encode(encoding=DEFAULT_ENCODING, errors='strict')
351
+ str_struct_provider = DataContentProvider(data=data,
352
+ file_path=struct_provider.file_path,
353
+ file_type=struct_provider.file_type,
354
+ info=f"{struct_provider.info}|STRING:{key}")
355
+ new_limit = recursive_limit_size - len(str_struct_provider.data)
356
+ new_candidates = self.recursive_scan(str_struct_provider, depth, new_limit)
357
+ candidates.extend(new_candidates)
345
358
  # use key = "value" scan for common cases like in TOML
346
- if isinstance(key, str) and self.scanner.keywords_required_substrings_check(key):
347
- line_for_keyword_rules += f"{key} = \"{value}\"; "
359
+ if MIN_VALUE_LENGTH <= len(value) and isinstance(key, str) \
360
+ and self.scanner.keywords_required_substrings_check(key.lower()):
361
+ lines_for_keyword_rules.append(f"{key} = '{value}'" if '"' in value else f'{key} = "{value}"')
348
362
 
349
363
  elif isinstance(value, (int, float, datetime.date, datetime.datetime)):
350
- # use the fields only in case of matched keywords
351
- if isinstance(key, str) and self.scanner.keywords_required_substrings_check(key):
352
- line_for_keyword_rules += f"{key} = \"{value}\"; "
353
-
364
+ # skip useless types
365
+ pass
354
366
  else:
355
367
  logger.warning("Not supported type:%s value(%s)", str(type(value)), str(value))
356
368
 
357
- if line_for_keyword_rules:
358
- str_provider = StringContentProvider([line_for_keyword_rules],
369
+ if lines_for_keyword_rules:
370
+ str_provider = StringContentProvider(lines_for_keyword_rules,
359
371
  file_path=struct_provider.file_path,
360
- file_type=".toml",
361
- info=f"{struct_provider.info}|KEYWORD:`{line_for_keyword_rules}`")
372
+ file_type=".py",
373
+ info=f"{struct_provider.info}|KEYWORD:`{lines_for_keyword_rules}`")
362
374
  new_candidates = self.scanner.scan(str_provider)
363
375
  augment_candidates(candidates, new_candidates)
364
376
 
365
377
  # last check when dictionary is {"key": "api_key", "value": "XXXXXXX"} -> {"api_key": "XXXXXXX"}
366
378
  if isinstance(struct_key, str) and isinstance(struct_value, str):
367
- line_for_keyword_rules = f"{struct_key} = \"{struct_value}\""
368
379
  key_value_provider = StringContentProvider(
369
- [line_for_keyword_rules],
380
+ [f"{struct_key} = '{struct_value}'" if '"' in struct_value else f'{struct_key} = "{struct_value}"'],
370
381
  file_path=struct_provider.file_path,
371
382
  file_type=".toml",
372
- info=f"{struct_provider.info}|KEY_VALUE:`{line_for_keyword_rules}`")
383
+ info=f"{struct_provider.info}|KEY_VALUE:`{lines_for_keyword_rules}`")
373
384
  new_candidates = self.scanner.scan(key_value_provider)
374
385
  augment_candidates(candidates, new_candidates)
375
386
  return candidates
@@ -31,7 +31,7 @@ class GzipScanner(AbstractScanner, ABC):
31
31
  gzip_content_provider = DataContentProvider(data=f.read(),
32
32
  file_path=new_path,
33
33
  file_type=Util.get_extension(new_path),
34
- info=f"{data_provider.info}|GZIP:{file_path}")
34
+ info=f"{data_provider.info}|GZIP:{new_path}")
35
35
  new_limit = recursive_limit_size - len(gzip_content_provider.data)
36
36
  gzip_candidates = self.recursive_scan(gzip_content_provider, depth, new_limit)
37
37
  return gzip_candidates
@@ -32,10 +32,10 @@ class ByteContentProvider(ContentProvider):
32
32
  def free(self) -> None:
33
33
  """free data after scan to reduce memory usage"""
34
34
  self.__data = None
35
- if hasattr(self, "data"):
35
+ if "data" in self.__dict__:
36
36
  delattr(self, "data")
37
37
  self.__lines = None
38
- if hasattr(self, "lines"):
38
+ if "lines" in self.__dict__:
39
39
  delattr(self, "lines")
40
40
 
41
41
  @cached_property
@@ -93,7 +93,7 @@ class ContentProvider(ABC):
93
93
  if min_len > len(line.strip()):
94
94
  # Ignore target if stripped part is too short for all types
95
95
  continue
96
- elif MAX_LINE_LENGTH < len(line):
96
+ if MAX_LINE_LENGTH < len(line):
97
97
  for chunk_start, chunk_end in Util.get_chunks(len(line)):
98
98
  target = AnalysisTarget(
99
99
  line_pos=line_pos, #
@@ -54,10 +54,10 @@ class DataContentProvider(ContentProvider):
54
54
  def free(self) -> None:
55
55
  """free data after scan to reduce memory usage"""
56
56
  self.__data = None
57
- if hasattr(self, "data"):
57
+ if "data" in self.__dict__:
58
58
  delattr(self, "data")
59
59
  self.__text = None
60
- if hasattr(self, "text"):
60
+ if "text" in self.__dict__:
61
61
  delattr(self, "text")
62
62
  self.structure = None
63
63
  self.decoded = None
@@ -48,8 +48,8 @@ class DiffContentProvider(ContentProvider):
48
48
 
49
49
  def free(self) -> None:
50
50
  """free data after scan to reduce memory usage"""
51
- self.__diff = None
52
- if hasattr(self, "diff"):
51
+ self.__diff = []
52
+ if "diff" in self.__dict__:
53
53
  delattr(self, "diff")
54
54
 
55
55
  @staticmethod
@@ -162,7 +162,7 @@ class FilePathExtractor:
162
162
  True when the file is oversize or less than MIN_DATA_LEN, or unsupported
163
163
  """
164
164
  path = reference[1] if isinstance(reference, tuple) else reference
165
- if isinstance(path, str) or isinstance(path, Path):
165
+ if isinstance(path, (str, Path)):
166
166
  file_size = os.path.getsize(path)
167
167
  elif isinstance(path, io.BytesIO):
168
168
  current_pos = path.tell()
@@ -42,7 +42,7 @@ class FilesProvider(AbstractProvider):
42
42
  """
43
43
  text_content_provider_list: List[Union[DiffContentProvider, TextContentProvider]] = []
44
44
  for path in self.paths:
45
- if isinstance(path, str) or isinstance(path, Path):
45
+ if isinstance(path, (str, Path)):
46
46
  new_files = FilePathExtractor.get_file_paths(config, path)
47
47
  if self.skip_ignored:
48
48
  new_files = FilePathExtractor.apply_gitignore(new_files)
@@ -50,9 +50,7 @@ class FilesProvider(AbstractProvider):
50
50
  text_content_provider_list.append(TextContentProvider(_file))
51
51
  elif isinstance(path, io.BytesIO):
52
52
  text_content_provider_list.append(TextContentProvider((":memory:", path)))
53
- elif isinstance(path, tuple) \
54
- and (isinstance(path[0], str) or isinstance(path[0], Path)) \
55
- and isinstance(path[1], io.BytesIO):
53
+ elif isinstance(path, tuple) and (isinstance(path[0], (str, Path))) and isinstance(path[1], io.BytesIO):
56
54
  # suppose, all the files must be scanned
57
55
  text_content_provider_list.append(TextContentProvider(path))
58
56
  else:
@@ -37,7 +37,7 @@ class PatchesProvider(AbstractProvider):
37
37
  for file_path in self.paths:
38
38
  if FilePathExtractor.check_file_size(config, file_path):
39
39
  continue
40
- if isinstance(file_path, str) or isinstance(file_path, Path):
40
+ if isinstance(file_path, (str, Path)):
41
41
  raw_patches.append(Util.read_file(file_path))
42
42
  elif isinstance(file_path, io.BytesIO):
43
43
  the_patch = Util.decode_bytes(file_path.read())
@@ -38,10 +38,10 @@ class StringContentProvider(ContentProvider):
38
38
  def free(self) -> None:
39
39
  """free data after scan to reduce memory usage"""
40
40
  self.__lines = []
41
- if hasattr(self, "lines"):
41
+ if "lines" in self.__dict__:
42
42
  delattr(self, "lines")
43
43
  self.__line_numbers = []
44
- if hasattr(self, "line_numbers"):
44
+ if "line_numbers" in self.__dict__:
45
45
  delattr(self, "line_numbers")
46
46
 
47
47
  @cached_property
@@ -38,7 +38,7 @@ class StructContentProvider(ContentProvider):
38
38
  def free(self) -> None:
39
39
  """free data after scan to reduce memory usage"""
40
40
  self.__struct = None
41
- if hasattr(self, "struct"):
41
+ if "struct" in self.__dict__:
42
42
  delattr(self, "struct")
43
43
 
44
44
  def yield_analysis_target(self, min_len: int) -> Generator[AnalysisTarget, None, None]:
@@ -42,10 +42,10 @@ class TextContentProvider(ContentProvider):
42
42
  def free(self) -> None:
43
43
  """free data after scan to reduce memory usage"""
44
44
  self.__data = None
45
- if hasattr(self, "data"):
45
+ if "data" in self.__dict__:
46
46
  delattr(self, "data")
47
47
  self.__lines = None
48
- if hasattr(self, "lines"):
48
+ if "lines" in self.__dict__:
49
49
  delattr(self, "lines")
50
50
  if isinstance(self.__io, io.BytesIO) and self.__io and not self.__io.closed:
51
51
  self.__io.close()
@@ -30,7 +30,7 @@ class ValueBase64EncodedPem(Filter):
30
30
  with contextlib.suppress(Exception):
31
31
  text = Util.decode_base64(line_data.value, padding_safe=True, urlsafe_detect=True)
32
32
  lines = text.decode(ASCII).splitlines()
33
- lines_pos = [x for x in range(len(lines))]
33
+ lines_pos = list(range(len(lines)))
34
34
  for line_pos, line in zip(lines_pos, lines):
35
35
  if PEM_BEGIN_PATTERN in line:
36
36
  new_target = AnalysisTarget(line_pos, lines, lines_pos, target.descriptor)
@@ -19,12 +19,8 @@ class ValueEntropyBase64Check(ValueEntropyBaseCheck):
19
19
  y = 0.944 * math.log2(x) - 0.009 * x - 0.04
20
20
  elif 65 <= x < 256:
21
21
  y = 0.621 * math.log2(x) - 0.003 * x + 1.54
22
- elif 256 <= x < 512:
23
- y = 5.77
24
- elif 512 <= x < 1024:
25
- y = 5.89
26
- elif 1024 <= x:
27
- y = 5.94
22
+ elif 256 <= x:
23
+ y = 6 - 64 / x
28
24
  else:
29
25
  y = 0
30
26
  return y