credsweeper 1.11.2__tar.gz → 1.11.3__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of credsweeper might be problematic. Click here for more details.

Files changed (157) hide show
  1. {credsweeper-1.11.2 → credsweeper-1.11.3}/PKG-INFO +1 -1
  2. {credsweeper-1.11.2 → credsweeper-1.11.3}/credsweeper/__init__.py +1 -1
  3. {credsweeper-1.11.2 → credsweeper-1.11.3}/credsweeper/__main__.py +6 -4
  4. {credsweeper-1.11.2 → credsweeper-1.11.3}/credsweeper/app.py +7 -3
  5. {credsweeper-1.11.2 → credsweeper-1.11.3}/credsweeper/common/keyword_pattern.py +15 -9
  6. {credsweeper-1.11.2 → credsweeper-1.11.3}/credsweeper/common/morpheme_checklist.txt +4 -2
  7. {credsweeper-1.11.2 → credsweeper-1.11.3}/credsweeper/credentials/line_data.py +14 -10
  8. {credsweeper-1.11.2 → credsweeper-1.11.3}/credsweeper/deep_scanner/abstract_scanner.py +10 -1
  9. {credsweeper-1.11.2 → credsweeper-1.11.3}/credsweeper/deep_scanner/deep_scanner.py +19 -8
  10. {credsweeper-1.11.2 → credsweeper-1.11.3}/credsweeper/deep_scanner/docx_scanner.py +1 -1
  11. {credsweeper-1.11.2 → credsweeper-1.11.3}/credsweeper/deep_scanner/encoder_scanner.py +2 -2
  12. {credsweeper-1.11.2 → credsweeper-1.11.3}/credsweeper/deep_scanner/html_scanner.py +3 -3
  13. {credsweeper-1.11.2 → credsweeper-1.11.3}/credsweeper/deep_scanner/jks_scanner.py +2 -4
  14. {credsweeper-1.11.2 → credsweeper-1.11.3}/credsweeper/deep_scanner/lang_scanner.py +2 -2
  15. credsweeper-1.11.3/credsweeper/deep_scanner/lzma_scanner.py +40 -0
  16. {credsweeper-1.11.2 → credsweeper-1.11.3}/credsweeper/deep_scanner/pkcs12_scanner.py +3 -5
  17. {credsweeper-1.11.2 → credsweeper-1.11.3}/credsweeper/deep_scanner/xml_scanner.py +2 -2
  18. {credsweeper-1.11.2 → credsweeper-1.11.3}/credsweeper/file_handler/data_content_provider.py +21 -12
  19. {credsweeper-1.11.2 → credsweeper-1.11.3}/credsweeper/filters/value_array_dictionary_check.py +3 -1
  20. {credsweeper-1.11.2 → credsweeper-1.11.3}/credsweeper/filters/value_azure_token_check.py +1 -2
  21. {credsweeper-1.11.2 → credsweeper-1.11.3}/credsweeper/filters/value_base64_part_check.py +30 -21
  22. {credsweeper-1.11.2 → credsweeper-1.11.3}/credsweeper/filters/value_discord_bot_check.py +1 -2
  23. credsweeper-1.11.3/credsweeper/filters/value_entropy_base32_check.py +22 -0
  24. credsweeper-1.11.3/credsweeper/filters/value_entropy_base36_check.py +23 -0
  25. credsweeper-1.11.3/credsweeper/filters/value_entropy_base64_check.py +30 -0
  26. credsweeper-1.11.3/credsweeper/filters/value_entropy_base_check.py +37 -0
  27. {credsweeper-1.11.2 → credsweeper-1.11.3}/credsweeper/filters/value_file_path_check.py +1 -1
  28. {credsweeper-1.11.2 → credsweeper-1.11.3}/credsweeper/filters/value_hex_number_check.py +3 -3
  29. {credsweeper-1.11.2 → credsweeper-1.11.3}/credsweeper/filters/value_json_web_token_check.py +4 -5
  30. {credsweeper-1.11.2 → credsweeper-1.11.3}/credsweeper/filters/value_string_type_check.py +11 -3
  31. {credsweeper-1.11.2 → credsweeper-1.11.3}/credsweeper/filters/value_token_base32_check.py +0 -4
  32. {credsweeper-1.11.2 → credsweeper-1.11.3}/credsweeper/filters/value_token_base36_check.py +0 -4
  33. {credsweeper-1.11.2 → credsweeper-1.11.3}/credsweeper/filters/value_token_base64_check.py +0 -4
  34. {credsweeper-1.11.2 → credsweeper-1.11.3}/credsweeper/filters/value_token_check.py +1 -1
  35. {credsweeper-1.11.2 → credsweeper-1.11.3}/credsweeper/ml_model/features/file_extension.py +1 -1
  36. {credsweeper-1.11.2 → credsweeper-1.11.3}/credsweeper/ml_model/features/morpheme_dense.py +0 -4
  37. {credsweeper-1.11.2 → credsweeper-1.11.3}/credsweeper/ml_model/features/rule_name.py +1 -1
  38. {credsweeper-1.11.2 → credsweeper-1.11.3}/credsweeper/ml_model/features/word_in_path.py +0 -9
  39. {credsweeper-1.11.2 → credsweeper-1.11.3}/credsweeper/ml_model/features/word_in_postamble.py +0 -11
  40. {credsweeper-1.11.2 → credsweeper-1.11.3}/credsweeper/ml_model/features/word_in_preamble.py +0 -11
  41. {credsweeper-1.11.2 → credsweeper-1.11.3}/credsweeper/ml_model/features/word_in_transition.py +0 -11
  42. {credsweeper-1.11.2 → credsweeper-1.11.3}/credsweeper/ml_model/features/word_in_value.py +0 -11
  43. {credsweeper-1.11.2 → credsweeper-1.11.3}/credsweeper/ml_model/features/word_in_variable.py +0 -11
  44. {credsweeper-1.11.2 → credsweeper-1.11.3}/credsweeper/ml_model/ml_validator.py +4 -3
  45. {credsweeper-1.11.2 → credsweeper-1.11.3}/credsweeper/rules/config.yaml +238 -208
  46. {credsweeper-1.11.2 → credsweeper-1.11.3}/credsweeper/scanner/scan_type/scan_type.py +2 -3
  47. {credsweeper-1.11.2 → credsweeper-1.11.3}/credsweeper/scanner/scanner.py +7 -1
  48. {credsweeper-1.11.2 → credsweeper-1.11.3}/credsweeper/secret/config.json +16 -5
  49. {credsweeper-1.11.2 → credsweeper-1.11.3}/credsweeper/utils/pem_key_detector.py +4 -5
  50. {credsweeper-1.11.2 → credsweeper-1.11.3}/credsweeper/utils/util.py +67 -144
  51. credsweeper-1.11.2/credsweeper/filters/value_entropy_base32_check.py +0 -42
  52. credsweeper-1.11.2/credsweeper/filters/value_entropy_base36_check.py +0 -46
  53. credsweeper-1.11.2/credsweeper/filters/value_entropy_base64_check.py +0 -59
  54. credsweeper-1.11.2/credsweeper/utils/entropy_validator.py +0 -72
  55. {credsweeper-1.11.2 → credsweeper-1.11.3}/.gitignore +0 -0
  56. {credsweeper-1.11.2 → credsweeper-1.11.3}/LICENSE +0 -0
  57. {credsweeper-1.11.2 → credsweeper-1.11.3}/README.md +0 -0
  58. {credsweeper-1.11.2 → credsweeper-1.11.3}/credsweeper/common/__init__.py +0 -0
  59. {credsweeper-1.11.2 → credsweeper-1.11.3}/credsweeper/common/constants.py +0 -0
  60. {credsweeper-1.11.2 → credsweeper-1.11.3}/credsweeper/common/keyword_checklist.py +0 -0
  61. {credsweeper-1.11.2 → credsweeper-1.11.3}/credsweeper/common/keyword_checklist.txt +0 -0
  62. {credsweeper-1.11.2 → credsweeper-1.11.3}/credsweeper/config/__init__.py +0 -0
  63. {credsweeper-1.11.2 → credsweeper-1.11.3}/credsweeper/config/config.py +0 -0
  64. {credsweeper-1.11.2 → credsweeper-1.11.3}/credsweeper/credentials/__init__.py +0 -0
  65. {credsweeper-1.11.2 → credsweeper-1.11.3}/credsweeper/credentials/augment_candidates.py +0 -0
  66. {credsweeper-1.11.2 → credsweeper-1.11.3}/credsweeper/credentials/candidate.py +0 -0
  67. {credsweeper-1.11.2 → credsweeper-1.11.3}/credsweeper/credentials/candidate_group_generator.py +0 -0
  68. {credsweeper-1.11.2 → credsweeper-1.11.3}/credsweeper/credentials/candidate_key.py +0 -0
  69. {credsweeper-1.11.2 → credsweeper-1.11.3}/credsweeper/credentials/credential_manager.py +0 -0
  70. {credsweeper-1.11.2 → credsweeper-1.11.3}/credsweeper/deep_scanner/__init__.py +0 -0
  71. {credsweeper-1.11.2 → credsweeper-1.11.3}/credsweeper/deep_scanner/byte_scanner.py +0 -0
  72. {credsweeper-1.11.2 → credsweeper-1.11.3}/credsweeper/deep_scanner/bzip2_scanner.py +0 -0
  73. {credsweeper-1.11.2 → credsweeper-1.11.3}/credsweeper/deep_scanner/eml_scanner.py +0 -0
  74. {credsweeper-1.11.2 → credsweeper-1.11.3}/credsweeper/deep_scanner/gzip_scanner.py +0 -0
  75. {credsweeper-1.11.2 → credsweeper-1.11.3}/credsweeper/deep_scanner/mxfile_scanner.py +0 -0
  76. {credsweeper-1.11.2 → credsweeper-1.11.3}/credsweeper/deep_scanner/pdf_scanner.py +0 -0
  77. {credsweeper-1.11.2 → credsweeper-1.11.3}/credsweeper/deep_scanner/pptx_scanner.py +0 -0
  78. {credsweeper-1.11.2 → credsweeper-1.11.3}/credsweeper/deep_scanner/tar_scanner.py +0 -0
  79. {credsweeper-1.11.2 → credsweeper-1.11.3}/credsweeper/deep_scanner/tmx_scanner.py +0 -0
  80. {credsweeper-1.11.2 → credsweeper-1.11.3}/credsweeper/deep_scanner/xlsx_scanner.py +0 -0
  81. {credsweeper-1.11.2 → credsweeper-1.11.3}/credsweeper/deep_scanner/zip_scanner.py +0 -0
  82. {credsweeper-1.11.2 → credsweeper-1.11.3}/credsweeper/file_handler/__init__.py +0 -0
  83. {credsweeper-1.11.2 → credsweeper-1.11.3}/credsweeper/file_handler/abstract_provider.py +0 -0
  84. {credsweeper-1.11.2 → credsweeper-1.11.3}/credsweeper/file_handler/analysis_target.py +0 -0
  85. {credsweeper-1.11.2 → credsweeper-1.11.3}/credsweeper/file_handler/byte_content_provider.py +0 -0
  86. {credsweeper-1.11.2 → credsweeper-1.11.3}/credsweeper/file_handler/content_provider.py +0 -0
  87. {credsweeper-1.11.2 → credsweeper-1.11.3}/credsweeper/file_handler/descriptor.py +0 -0
  88. {credsweeper-1.11.2 → credsweeper-1.11.3}/credsweeper/file_handler/diff_content_provider.py +0 -0
  89. {credsweeper-1.11.2 → credsweeper-1.11.3}/credsweeper/file_handler/file_path_extractor.py +0 -0
  90. {credsweeper-1.11.2 → credsweeper-1.11.3}/credsweeper/file_handler/files_provider.py +0 -0
  91. {credsweeper-1.11.2 → credsweeper-1.11.3}/credsweeper/file_handler/patches_provider.py +0 -0
  92. {credsweeper-1.11.2 → credsweeper-1.11.3}/credsweeper/file_handler/string_content_provider.py +0 -0
  93. {credsweeper-1.11.2 → credsweeper-1.11.3}/credsweeper/file_handler/struct_content_provider.py +0 -0
  94. {credsweeper-1.11.2 → credsweeper-1.11.3}/credsweeper/file_handler/text_content_provider.py +0 -0
  95. {credsweeper-1.11.2 → credsweeper-1.11.3}/credsweeper/filters/__init__.py +0 -0
  96. {credsweeper-1.11.2 → credsweeper-1.11.3}/credsweeper/filters/filter.py +0 -0
  97. {credsweeper-1.11.2 → credsweeper-1.11.3}/credsweeper/filters/group/__init__.py +0 -0
  98. {credsweeper-1.11.2 → credsweeper-1.11.3}/credsweeper/filters/group/general_keyword.py +0 -0
  99. {credsweeper-1.11.2 → credsweeper-1.11.3}/credsweeper/filters/group/general_pattern.py +0 -0
  100. {credsweeper-1.11.2 → credsweeper-1.11.3}/credsweeper/filters/group/group.py +0 -0
  101. {credsweeper-1.11.2 → credsweeper-1.11.3}/credsweeper/filters/group/password_keyword.py +0 -0
  102. {credsweeper-1.11.2 → credsweeper-1.11.3}/credsweeper/filters/group/token_pattern.py +0 -0
  103. {credsweeper-1.11.2 → credsweeper-1.11.3}/credsweeper/filters/group/url_credentials_group.py +0 -0
  104. {credsweeper-1.11.2 → credsweeper-1.11.3}/credsweeper/filters/group/weird_base36_token.py +0 -0
  105. {credsweeper-1.11.2 → credsweeper-1.11.3}/credsweeper/filters/group/weird_base64_token.py +0 -0
  106. {credsweeper-1.11.2 → credsweeper-1.11.3}/credsweeper/filters/line_git_binary_check.py +0 -0
  107. {credsweeper-1.11.2 → credsweeper-1.11.3}/credsweeper/filters/line_specific_key_check.py +0 -0
  108. {credsweeper-1.11.2 → credsweeper-1.11.3}/credsweeper/filters/line_uue_part_check.py +0 -0
  109. {credsweeper-1.11.2 → credsweeper-1.11.3}/credsweeper/filters/value_allowlist_check.py +0 -0
  110. {credsweeper-1.11.2 → credsweeper-1.11.3}/credsweeper/filters/value_atlassian_token_check.py +0 -0
  111. {credsweeper-1.11.2 → credsweeper-1.11.3}/credsweeper/filters/value_base32_data_check.py +0 -0
  112. {credsweeper-1.11.2 → credsweeper-1.11.3}/credsweeper/filters/value_base64_data_check.py +0 -0
  113. {credsweeper-1.11.2 → credsweeper-1.11.3}/credsweeper/filters/value_base64_encoded_pem_check.py +0 -0
  114. {credsweeper-1.11.2 → credsweeper-1.11.3}/credsweeper/filters/value_base64_key_check.py +0 -0
  115. {credsweeper-1.11.2 → credsweeper-1.11.3}/credsweeper/filters/value_blocklist_check.py +0 -0
  116. {credsweeper-1.11.2 → credsweeper-1.11.3}/credsweeper/filters/value_camel_case_check.py +0 -0
  117. {credsweeper-1.11.2 → credsweeper-1.11.3}/credsweeper/filters/value_couple_keyword_check.py +0 -0
  118. {credsweeper-1.11.2 → credsweeper-1.11.3}/credsweeper/filters/value_dictionary_keyword_check.py +0 -0
  119. {credsweeper-1.11.2 → credsweeper-1.11.3}/credsweeper/filters/value_dictionary_value_length_check.py +0 -0
  120. {credsweeper-1.11.2 → credsweeper-1.11.3}/credsweeper/filters/value_github_check.py +0 -0
  121. {credsweeper-1.11.2 → credsweeper-1.11.3}/credsweeper/filters/value_grafana_check.py +0 -0
  122. {credsweeper-1.11.2 → credsweeper-1.11.3}/credsweeper/filters/value_grafana_service_check.py +0 -0
  123. {credsweeper-1.11.2 → credsweeper-1.11.3}/credsweeper/filters/value_jfrog_token_check.py +0 -0
  124. {credsweeper-1.11.2 → credsweeper-1.11.3}/credsweeper/filters/value_last_word_check.py +0 -0
  125. {credsweeper-1.11.2 → credsweeper-1.11.3}/credsweeper/filters/value_method_check.py +0 -0
  126. {credsweeper-1.11.2 → credsweeper-1.11.3}/credsweeper/filters/value_not_allowed_pattern_check.py +0 -0
  127. {credsweeper-1.11.2 → credsweeper-1.11.3}/credsweeper/filters/value_not_part_encoded_check.py +0 -0
  128. {credsweeper-1.11.2 → credsweeper-1.11.3}/credsweeper/filters/value_number_check.py +0 -0
  129. {credsweeper-1.11.2 → credsweeper-1.11.3}/credsweeper/filters/value_pattern_check.py +0 -0
  130. {credsweeper-1.11.2 → credsweeper-1.11.3}/credsweeper/filters/value_similarity_check.py +0 -0
  131. {credsweeper-1.11.2 → credsweeper-1.11.3}/credsweeper/filters/value_split_keyword_check.py +0 -0
  132. {credsweeper-1.11.2 → credsweeper-1.11.3}/credsweeper/filters/value_token_base_check.py +0 -0
  133. {credsweeper-1.11.2 → credsweeper-1.11.3}/credsweeper/logger/__init__.py +0 -0
  134. {credsweeper-1.11.2 → credsweeper-1.11.3}/credsweeper/logger/logger.py +0 -0
  135. {credsweeper-1.11.2 → credsweeper-1.11.3}/credsweeper/ml_model/__init__.py +0 -0
  136. {credsweeper-1.11.2 → credsweeper-1.11.3}/credsweeper/ml_model/features/__init__.py +0 -0
  137. {credsweeper-1.11.2 → credsweeper-1.11.3}/credsweeper/ml_model/features/entropy_evaluation.py +0 -0
  138. {credsweeper-1.11.2 → credsweeper-1.11.3}/credsweeper/ml_model/features/feature.py +0 -0
  139. {credsweeper-1.11.2 → credsweeper-1.11.3}/credsweeper/ml_model/features/has_html_tag.py +0 -0
  140. {credsweeper-1.11.2 → credsweeper-1.11.3}/credsweeper/ml_model/features/is_secret_numeric.py +0 -0
  141. {credsweeper-1.11.2 → credsweeper-1.11.3}/credsweeper/ml_model/features/length_of_attribute.py +0 -0
  142. {credsweeper-1.11.2 → credsweeper-1.11.3}/credsweeper/ml_model/features/search_in_attribute.py +0 -0
  143. {credsweeper-1.11.2 → credsweeper-1.11.3}/credsweeper/ml_model/features/word_in.py +0 -0
  144. {credsweeper-1.11.2 → credsweeper-1.11.3}/credsweeper/ml_model/ml_config.json +0 -0
  145. {credsweeper-1.11.2 → credsweeper-1.11.3}/credsweeper/ml_model/ml_model.onnx +0 -0
  146. {credsweeper-1.11.2 → credsweeper-1.11.3}/credsweeper/py.typed +0 -0
  147. {credsweeper-1.11.2 → credsweeper-1.11.3}/credsweeper/rules/__init__.py +0 -0
  148. {credsweeper-1.11.2 → credsweeper-1.11.3}/credsweeper/rules/rule.py +0 -0
  149. {credsweeper-1.11.2 → credsweeper-1.11.3}/credsweeper/scanner/__init__.py +0 -0
  150. {credsweeper-1.11.2 → credsweeper-1.11.3}/credsweeper/scanner/scan_type/__init__.py +0 -0
  151. {credsweeper-1.11.2 → credsweeper-1.11.3}/credsweeper/scanner/scan_type/multi_pattern.py +0 -0
  152. {credsweeper-1.11.2 → credsweeper-1.11.3}/credsweeper/scanner/scan_type/pem_key_pattern.py +0 -0
  153. {credsweeper-1.11.2 → credsweeper-1.11.3}/credsweeper/scanner/scan_type/single_pattern.py +0 -0
  154. {credsweeper-1.11.2 → credsweeper-1.11.3}/credsweeper/secret/log.yaml +0 -0
  155. {credsweeper-1.11.2 → credsweeper-1.11.3}/credsweeper/utils/__init__.py +0 -0
  156. {credsweeper-1.11.2 → credsweeper-1.11.3}/credsweeper/utils/hop_stat.py +0 -0
  157. {credsweeper-1.11.2 → credsweeper-1.11.3}/pyproject.toml +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: credsweeper
3
- Version: 1.11.2
3
+ Version: 1.11.3
4
4
  Summary: Credential Sweeper
5
5
  Project-URL: Homepage, https://github.com/Samsung/CredSweeper
6
6
  Project-URL: Bug Tracker, https://github.com/Samsung/CredSweeper/issues
@@ -18,4 +18,4 @@ __all__ = [
18
18
  '__version__'
19
19
  ]
20
20
 
21
- __version__ = "1.11.2"
21
+ __version__ = "1.11.3"
@@ -4,6 +4,7 @@ import os
4
4
  import sys
5
5
  import time
6
6
  from argparse import ArgumentParser, ArgumentTypeError, Namespace, BooleanOptionalAction
7
+ from pathlib import Path
7
8
  from typing import Any, Union, Dict
8
9
 
9
10
  from credsweeper import __version__
@@ -88,10 +89,11 @@ def check_integrity() -> int:
88
89
  Returns CRC32 of files in integer
89
90
  """
90
91
  crc32 = 0
91
- for root, dirs, files in os.walk(APP_PATH):
92
- for file_path in files:
93
- if Util.get_extension(file_path) in [".py", ".json", ".txt", ".yaml", ".onnx"]:
94
- data = Util.read_data(os.path.join(root, file_path))
92
+ for root, _dirs, files in os.walk(APP_PATH):
93
+ for file_name in files:
94
+ if Util.get_extension(file_name) in [".py", ".json", ".txt", ".yaml", ".onnx"]:
95
+ file_path = Path(root) / file_name
96
+ data = Util.read_data(file_path)
95
97
  if data:
96
98
  crc32 ^= binascii.crc32(data)
97
99
  return crc32
@@ -11,7 +11,7 @@ from colorama import Style
11
11
  # Directory of credsweeper sources MUST be placed before imports to avoid circular import error
12
12
  APP_PATH = Path(__file__).resolve().parent
13
13
 
14
- from credsweeper.common.constants import Severity, ThresholdPreset, DiffRowType
14
+ from credsweeper.common.constants import Severity, ThresholdPreset, DiffRowType, DEFAULT_ENCODING
15
15
  from credsweeper.config import Config
16
16
  from credsweeper.credentials import Candidate, CredentialManager, CandidateKey
17
17
  from credsweeper.deep_scanner.deep_scanner import DeepScanner
@@ -415,7 +415,7 @@ class CredSweeper:
415
415
  if isinstance(change_type, DiffRowType):
416
416
  # add suffix for appropriated reports to create two files for the patch scan
417
417
  json_path = json_path.with_suffix(f".{change_type.value}{json_path.suffix}")
418
- with open(json_path, 'w') as f:
418
+ with open(json_path, 'w', encoding=DEFAULT_ENCODING) as f:
419
419
  # use the approach to reduce total memory usage in case of huge data
420
420
  first_item = True
421
421
  f.write('[\n')
@@ -446,8 +446,12 @@ class CredSweeper:
446
446
  for credential in credentials:
447
447
  for line_data in credential.line_data_list:
448
448
  # bright rule name and path or info
449
+ if isinstance(credential.ml_probability, float):
450
+ ml_probability_info = f" {credential.ml_probability:.6f}"
451
+ else:
452
+ ml_probability_info = ""
449
453
  print(Style.BRIGHT + credential.rule_name +
450
- f" {line_data.info or line_data.path}:{line_data.line_num} {credential.ml_probability}" +
454
+ f" {line_data.info or line_data.path}:{line_data.line_num}{ml_probability_info}" +
451
455
  Style.RESET_ALL)
452
456
  print(line_data.get_colored_line(hashed=self.hashed, subtext=self.subtext))
453
457
 
@@ -3,25 +3,30 @@ import re
3
3
 
4
4
  class KeywordPattern:
5
5
  """Pattern set of keyword types"""
6
- key_left = r"(\\[nrt]|%[0-9a-f]{2})?" \
6
+ directive = r"(?P<directive>(?:(?:[#%]define|%global)(?:\s|\\t)|\bset))?"
7
+ key_left = r"(?:\\[nrt]|%[0-9a-f]{2}|\s)*" \
7
8
  r"(?P<variable>(([`'\"]{1,8}[^:='\"`}<>\\/&?]*|[^:='\"`}<>\s()\\/&?;,%]*)" \
8
9
  r"(?P<keyword>"
9
10
  # there will be inserted a keyword
10
11
  key_right = r")" \
11
- r"[^%:='\"`<>{?!&;\n]*" \
12
+ r"[^%:='\"`<>({?!&;\n]*" \
12
13
  r")" \
13
14
  r"(&(quot|apos);|%[0-9a-f]{2}|[`'\"])*" \
14
15
  r")" # <variable>
15
- separator = r"(\s|\\{1,8}[tnr])*\]?(\s|\\{1,8}[tnr])*" \
16
- r"(?P<separator>:(\s[a-z]{3,9}[?]?\s)?=|:(?!:)|=(>|&gt;|(\\\\*u00|%)26gt;)|!==|!=|===|==|=|%3d)" \
16
+ separator = r"(?(directive)|(\s|\\{1,8}[tnr])*\]?(\s|\\{1,8}[tnr])*)" \
17
+ r"(?P<separator>:(\s[a-z]{3,9}[?]?\s)?=|:(?!:)|=(>|&gt;|(\\\\*u00|%)26gt;)|!==|!=|===|==|=~|=" \
18
+ r"|(?(directive)(\\t|\s|\((?!\))){1,80}|%3d))" \
17
19
  r"(\s|\\{1,8}[tnr])*"
18
20
  # might be curly, square or parenthesis with words before
19
21
  wrap = r"(?P<wrap>(" \
20
22
  r"(new(\s|\\{1,8}[tnr]|byte|char|string|\[\]){1,8})?" \
23
+ r"(?P<get>([_a-z][0-9a-z_.\[\]]*\.)get|(os\.)?getenv)?" \
21
24
  r"([0-9a-z_.]|::|-(>|&gt;))*" \
22
- r"[\[\(\{]" \
25
+ r"\s*" \
26
+ r"(\[(?!\])|\((?!\))|\{(?!\}))" \
23
27
  r"(\s|\\{1,8}[tnr])*" \
24
- r"([0-9a-z_]{1,32}[:=]\s*)?" \
28
+ r"(?(get)('[^']+'|\"[^\"]+\")\s*,\s*|)" \
29
+ r"([0-9a-z_]{1,32}\s*[:=]\s*)?" \
25
30
  r"){1,8})?"
26
31
  string_prefix = r"(((b|r|br|rb|u|f|rf|fr|l|@)(?=(\\*[`'\"])))?"
27
32
  left_quote = r"(?P<value_leftquote>((?P<esq>\\{1,8})?([`'\"]|&(quot|apos);)){1,4}))?"
@@ -39,7 +44,7 @@ class KeywordPattern:
39
44
  r"(?P<url_esc>%[0-9a-f]{2})" \
40
45
  r"|" \
41
46
  r"(?(url_esc)[^\s`'\",;\\&]|[^\s`'\",;\\])" \
42
- r")"\
47
+ r")" \
43
48
  r"){4,8000}" \
44
49
  r"|" \
45
50
  r"(<[^>]{4,8000}>)" \
@@ -48,18 +53,19 @@ class KeywordPattern:
48
53
  r"|" \
49
54
  r"(\$?\{{1,3}[^}]{4,8000}\}{1,3})" \
50
55
  r"|" \
51
- r"(?(wrap)(?(value_leftquote)(?!\\(?P=value_leftquote))|[^\]\)\}]){16,8000})"\
56
+ r"(?(wrap)(?(value_leftquote)(?!\\(?P=value_leftquote))|[^\]\)\}]){16,8000})" \
52
57
  r")" # <value>
53
58
  right_quote = r"(?(value_leftquote)" \
54
59
  r"(?P<value_rightquote>(?<!\\)(?P=value_leftquote)|\\$|(?<=[0-9a-z+_/-])$)" \
55
60
  r"|" \
56
- r"(?(wrap)(\]|\)|\}|,|;|\\|$))" \
61
+ r"(?(wrap)(\]|\)|\}|;|\\|$))" \
57
62
  r")"
58
63
 
59
64
  @classmethod
60
65
  def get_keyword_pattern(cls, keyword: str) -> re.Pattern:
61
66
  """Returns compiled regex pattern"""
62
67
  expression = ''.join([ #
68
+ cls.directive, #
63
69
  cls.key_left, #
64
70
  keyword, #
65
71
  cls.key_right, #
@@ -885,7 +885,7 @@ mbler
885
885
  mean
886
886
  measur
887
887
  medi
888
- medusa
888
+ medus
889
889
  meet
890
890
  mem_
891
891
  memb
@@ -925,7 +925,7 @@ month
925
925
  morp
926
926
  mory
927
927
  mote
928
- motorola
928
+ motor
929
929
  mount
930
930
  move
931
931
  mpeg
@@ -1005,6 +1005,7 @@ origin
1005
1005
  orithm
1006
1006
  ormat
1007
1007
  orph
1008
+ otorola
1008
1009
  ottle
1009
1010
  ously
1010
1011
  out
@@ -1485,6 +1486,7 @@ up_
1485
1486
  updat
1486
1487
  upgrade
1487
1488
  url
1489
+ usa
1488
1490
  usb
1489
1491
  use
1490
1492
  usin
@@ -10,7 +10,6 @@ from colorama import Fore, Style
10
10
  from credsweeper.common.constants import MAX_LINE_LENGTH, UTF_8, StartEnd, ML_HUNK
11
11
  from credsweeper.config import Config
12
12
  from credsweeper.utils import Util
13
- from credsweeper.utils.entropy_validator import EntropyValidator
14
13
 
15
14
 
16
15
  class LineData:
@@ -32,7 +31,7 @@ class LineData:
32
31
  """
33
32
 
34
33
  quotation_marks = ('"', "'", '`')
35
- comment_starts = ("//", "* ", "#", "/*", "<!––", "%{", "%", "...", "(*", "--", "--[[", "#=")
34
+ comment_starts = ("//", "* ", "# ", "/*", "<!––", "%{", "%", "...", "(*", "--", "--[[", "#=")
36
35
  bash_param_split = re.compile("\\s+(\\-|\\||\\>|\\w+?\\>|\\&)")
37
36
  line_endings = re.compile(r"\\{1,8}[nr]")
38
37
  # https://en.wikipedia.org/wiki/Percent-encoding
@@ -87,8 +86,9 @@ class LineData:
87
86
  self.url_part = False
88
87
  self.wrap = None
89
88
  self._3d_escaped_separator = False
90
-
91
89
  self.initialize(match_obj)
90
+ # the line is very useful for debug breakpoint
91
+ pass # pylint: disable=W0107
92
92
 
93
93
  def compare(self, other: 'LineData') -> bool:
94
94
  """Comparison method - skip whole line and checks only when variable and value are the same"""
@@ -373,10 +373,10 @@ class LineData:
373
373
  def to_str(self, subtext: bool = False, hashed: bool = False) -> str:
374
374
  """Represent line_data with subtext or|and hashed values"""
375
375
  cut_pos = StartEnd(self.variable_start, self.value_end) if subtext else None
376
- return f"line: '{self.get_hash_or_subtext(self.line, hashed, cut_pos)}'" \
377
- f" | line_num: {self.line_num} | path: {self.path}" \
376
+ return f"path: {self.path}" \
377
+ f" | line_num: {self.line_num}" \
378
378
  f" | value: '{self.get_hash_or_subtext(self.value, hashed)}'" \
379
- f" | entropy_validation: {EntropyValidator(self.value)}"
379
+ f" | line: '{self.get_hash_or_subtext(self.line, hashed, cut_pos)}'"
380
380
 
381
381
  def __str__(self):
382
382
  return self.to_str()
@@ -393,6 +393,10 @@ class LineData:
393
393
  """
394
394
  cut_pos = StartEnd(self.variable_start if 0 <= self.variable_start else self.value_start,
395
395
  self.value_end) if subtext else None
396
+ if isinstance(self.value, str):
397
+ entropy = round(Util.get_shannon_entropy(self.value), 5)
398
+ else:
399
+ entropy = None
396
400
  full_output = {
397
401
  "key": self.key,
398
402
  "line": self.get_hash_or_subtext(self.line, hashed, cut_pos),
@@ -401,18 +405,18 @@ class LineData:
401
405
  # info may contain variable name - so let it be hashed if requested
402
406
  "info": self.get_hash_or_subtext(self.info, hashed),
403
407
  "pattern": self.pattern.pattern,
408
+ "variable": self.get_hash_or_subtext(self.variable, hashed),
409
+ "variable_start": self.variable_start,
410
+ "variable_end": self.variable_end,
404
411
  "separator": self.separator,
405
412
  "separator_start": self.separator_start,
406
413
  "separator_end": self.separator_end,
407
414
  "value": self.get_hash_or_subtext(self.value, hashed),
408
415
  "value_start": self.value_start,
409
416
  "value_end": self.value_end,
410
- "variable": self.get_hash_or_subtext(self.variable, hashed),
411
- "variable_start": self.variable_start,
412
- "variable_end": self.variable_end,
417
+ "entropy": entropy,
413
418
  "value_leftquote": self.value_leftquote,
414
419
  "value_rightquote": self.value_rightquote,
415
- "entropy_validation": EntropyValidator(self.value).to_dict()
416
420
  }
417
421
  reported_output = {k: v for k, v in full_output.items() if k in self.config.line_data_output}
418
422
  return reported_output
@@ -1,5 +1,5 @@
1
1
  from abc import abstractmethod, ABC
2
- from typing import List
2
+ from typing import List, Optional
3
3
 
4
4
  from credsweeper.config import Config
5
5
  from credsweeper.credentials import Candidate
@@ -40,3 +40,12 @@ class AbstractScanner(ABC):
40
40
  recursive_limit_size: int) -> List[Candidate]:
41
41
  """Abstract method to be defined in DeepScanner"""
42
42
  raise NotImplementedError(__name__)
43
+
44
+ @abstractmethod
45
+ def data_scan(
46
+ self, #
47
+ data_provider: DataContentProvider, #
48
+ depth: int, #
49
+ recursive_limit_size: int) -> Optional[List[Candidate]]:
50
+ """Abstract method to be defined in DeepScanner"""
51
+ raise NotImplementedError(__name__)
@@ -2,7 +2,7 @@ import datetime
2
2
  import logging
3
3
  from typing import List, Optional, Any, Tuple, Union
4
4
 
5
- from credsweeper.common.constants import RECURSIVE_SCAN_LIMITATION
5
+ from credsweeper.common.constants import RECURSIVE_SCAN_LIMITATION, MIN_DATA_LEN
6
6
  from credsweeper.config import Config
7
7
  from credsweeper.credentials import Candidate
8
8
  from credsweeper.credentials.augment_candidates import augment_candidates
@@ -23,6 +23,7 @@ from .gzip_scanner import GzipScanner
23
23
  from .html_scanner import HtmlScanner
24
24
  from .jks_scanner import JksScanner
25
25
  from .lang_scanner import LangScanner
26
+ from .lzma_scanner import LzmaScanner
26
27
  from .mxfile_scanner import MxfileScanner
27
28
  from .pdf_scanner import PdfScanner
28
29
  from .pkcs12_scanner import Pkcs12Scanner
@@ -48,6 +49,7 @@ class DeepScanner(
48
49
  HtmlScanner, #
49
50
  JksScanner, #
50
51
  LangScanner, #
52
+ LzmaScanner, #
51
53
  PdfScanner, #
52
54
  Pkcs12Scanner, #
53
55
  PptxScanner, #
@@ -106,6 +108,9 @@ class DeepScanner(
106
108
  elif Util.is_bzip2(data):
107
109
  if 0 < depth:
108
110
  deep_scanners.append(Bzip2Scanner)
111
+ elif Util.is_lzma(data):
112
+ if 0 < depth:
113
+ deep_scanners.append(LzmaScanner)
109
114
  elif Util.is_tar(data):
110
115
  if 0 < depth:
111
116
  deep_scanners.append(TarScanner)
@@ -140,13 +145,16 @@ class DeepScanner(
140
145
  else:
141
146
  fallback_scanners.append(EmlScanner)
142
147
  fallback_scanners.append(ByteScanner)
148
+ elif Util.is_known(data):
149
+ # the format is known but cannot be scanned
150
+ pass
143
151
  elif not Util.is_binary(data):
144
152
  if 0 < depth:
145
153
  deep_scanners.append(EncoderScanner)
146
154
  deep_scanners.append(LangScanner)
147
155
  deep_scanners.append(ByteScanner)
148
156
  else:
149
- logger.warning("Cannot apply a deep scanner for type %s", file_type)
157
+ logger.warning("Cannot apply a deep scanner for type %s prefix %s", file_type, str(data[:MIN_DATA_LEN]))
150
158
  return deep_scanners, fallback_scanners
151
159
 
152
160
  # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
@@ -175,7 +183,7 @@ class DeepScanner(
175
183
  # this scan is successful, so fallback is not necessary
176
184
  fallback = False
177
185
  if fallback:
178
- for scan_class in deep_scanners:
186
+ for scan_class in fallback_scanners:
179
187
  fallback_candidates = scan_class.data_scan(self, data_provider, depth, recursive_limit_size)
180
188
  if fallback_candidates is None:
181
189
  continue
@@ -239,15 +247,18 @@ class DeepScanner(
239
247
  recursive_limit_size: maximal bytes of opened files to prevent recursive zip-bomb attack
240
248
  """
241
249
  candidates: List[Candidate] = []
242
- logger.debug("Start data_scan: size=%d, depth=%d, limit=%d, path=%s, info=%s", len(data_provider.data), depth,
243
- recursive_limit_size, data_provider.file_path, data_provider.info)
244
-
245
250
  if 0 > depth:
246
251
  # break recursion if maximal depth is reached
247
- logger.debug("bottom reached %s recursive_limit_size:%d", data_provider.file_path, recursive_limit_size)
252
+ logger.debug("Bottom reached %s recursive_limit_size:%d", data_provider.file_path, recursive_limit_size)
248
253
  return candidates
249
-
250
254
  depth -= 1
255
+ if MIN_DATA_LEN > len(data_provider.data):
256
+ # break recursion for minimal data size
257
+ logger.debug("Too small data: size=%d, depth=%d, limit=%d, path=%s, info=%s", len(data_provider.data),
258
+ depth, recursive_limit_size, data_provider.file_path, data_provider.info)
259
+ return candidates
260
+ logger.debug("Start data_scan: size=%d, depth=%d, limit=%d, path=%s, info=%s", len(data_provider.data), depth,
261
+ recursive_limit_size, data_provider.file_path, data_provider.info)
251
262
 
252
263
  if FilePathExtractor.is_find_by_ext_file(self.config, data_provider.file_type):
253
264
  # Skip scanning file and makes fake candidate due the extension is suspicious
@@ -42,7 +42,7 @@ class DocxScanner(AbstractScanner, ABC):
42
42
  yield from DocxScanner._iter_block_items(block.footer)
43
43
  return
44
44
  elif isinstance(block, _Cell):
45
- parent_elm = block._tc
45
+ parent_elm = block._tc # pylint: disable=W0212
46
46
  else:
47
47
  raise ValueError(f"unrecognised:{type(block)}")
48
48
 
@@ -18,11 +18,11 @@ class EncoderScanner(AbstractScanner, ABC):
18
18
  depth: int, #
19
19
  recursive_limit_size: int) -> Optional[List[Candidate]]:
20
20
  """Tries to decode data from base64 encode to bytes and scan as bytes again"""
21
- if data_provider.represent_as_encoded():
21
+ if result := data_provider.represent_as_encoded():
22
22
  decoded_data_provider = DataContentProvider(data=data_provider.decoded,
23
23
  file_path=data_provider.file_path,
24
24
  file_type=data_provider.file_type,
25
25
  info=f"{data_provider.info}|BASE64")
26
26
  new_limit = recursive_limit_size - len(decoded_data_provider.data)
27
27
  return self.recursive_scan(decoded_data_provider, depth, new_limit)
28
- return None
28
+ return None if result is None else []
@@ -19,12 +19,12 @@ class HtmlScanner(AbstractScanner, ABC):
19
19
  depth: int, #
20
20
  recursive_limit_size: int) -> Optional[List[Candidate]]:
21
21
  """Tries to represent data as html text and scan as text lines"""
22
- if data_provider.represent_as_html(depth, recursive_limit_size,
23
- self.scanner.keywords_required_substrings_check):
22
+ if result := data_provider.represent_as_html(depth, recursive_limit_size,
23
+ self.scanner.keywords_required_substrings_check):
24
24
  string_data_provider = StringContentProvider(lines=data_provider.lines,
25
25
  line_numbers=data_provider.line_numbers,
26
26
  file_path=data_provider.file_path,
27
27
  file_type=data_provider.file_type,
28
28
  info=f"{data_provider.info}|HTML")
29
29
  return self.scanner.scan(string_data_provider)
30
- return None
30
+ return None if result is None else []
@@ -20,7 +20,6 @@ class JksScanner(AbstractScanner, ABC):
20
20
  depth: int, #
21
21
  recursive_limit_size: int) -> Optional[List[Candidate]]:
22
22
  """Tries to scan JKS to open with standard password"""
23
- candidates = []
24
23
  for pw_probe in self.config.bruteforce_list:
25
24
  try:
26
25
  keystore = jks.KeyStore.loads(data_provider.data, pw_probe, try_decrypt_keys=True)
@@ -38,8 +37,7 @@ class JksScanner(AbstractScanner, ABC):
38
37
  candidate.line_data_list[0].value = pw_probe or "<EMPTY PASSWORD>"
39
38
  candidate.line_data_list[0].value_start = 1
40
39
  candidate.line_data_list[0].value_end = 1 + len(candidate.line_data_list[0].value)
41
- candidates.append(candidate)
42
- break
40
+ return [candidate]
43
41
  except Exception as jks_exc:
44
42
  logger.debug(f"{data_provider.file_path}:{pw_probe}:{jks_exc}")
45
- return candidates
43
+ return None
@@ -19,10 +19,10 @@ class LangScanner(AbstractScanner, ABC):
19
19
  depth: int, #
20
20
  recursive_limit_size: int) -> Optional[List[Candidate]]:
21
21
  """Tries to represent data as markup language and scan as structure"""
22
- if data_provider.represent_as_structure():
22
+ if result := data_provider.represent_as_structure():
23
23
  struct_data_provider = StructContentProvider(struct=data_provider.structure,
24
24
  file_path=data_provider.file_path,
25
25
  file_type=data_provider.file_type,
26
26
  info=f"{data_provider.info}|STRUCT")
27
27
  return self.structure_scan(struct_data_provider, depth, recursive_limit_size)
28
- return None
28
+ return None if result is None else []
@@ -0,0 +1,40 @@
1
+ import logging
2
+ import lzma
3
+ from abc import ABC
4
+ from pathlib import Path
5
+ from typing import List, Optional
6
+
7
+ from credsweeper.credentials import Candidate
8
+ from credsweeper.deep_scanner.abstract_scanner import AbstractScanner
9
+ from credsweeper.file_handler.data_content_provider import DataContentProvider
10
+ from credsweeper.utils import Util
11
+
12
+ logger = logging.getLogger(__name__)
13
+
14
+
15
+ class LzmaScanner(AbstractScanner, ABC):
16
+ """Implements lzma scanning"""
17
+
18
+ def data_scan(
19
+ self, #
20
+ data_provider: DataContentProvider, #
21
+ depth: int, #
22
+ recursive_limit_size: int) -> Optional[List[Candidate]]:
23
+ """Extracts data from lzma archive and launches data_scan"""
24
+ try:
25
+ file_path = Path(data_provider.file_path)
26
+ new_path = file_path.as_posix()
27
+ if ".xz" == file_path.suffix:
28
+ new_path = new_path[:-3]
29
+ elif ".lzma" == file_path.suffix:
30
+ new_path = new_path[:-5]
31
+ lzma_content_provider = DataContentProvider(data=lzma.decompress(data_provider.data),
32
+ file_path=new_path,
33
+ file_type=Util.get_extension(new_path),
34
+ info=f"{data_provider.info}|LZMA:{file_path}")
35
+ new_limit = recursive_limit_size - len(lzma_content_provider.data)
36
+ lzma_candidates = self.recursive_scan(lzma_content_provider, depth, new_limit)
37
+ return lzma_candidates
38
+ except Exception as lzma_exc:
39
+ logger.error(f"{data_provider.file_path}:{lzma_exc}")
40
+ return None
@@ -20,10 +20,9 @@ class Pkcs12Scanner(AbstractScanner, ABC):
20
20
  depth: int, #
21
21
  recursive_limit_size: int) -> Optional[List[Candidate]]:
22
22
  """Tries to scan PKCS12 to open with standard password"""
23
- candidates = []
24
23
  for pw_probe in self.config.bruteforce_list:
25
24
  try:
26
- (private_key, certificate, additional_certificates) \
25
+ (private_key, _certificate, _additional_certificates) \
27
26
  = cryptography.hazmat.primitives.serialization.pkcs12.load_key_and_certificates(data_provider.data,
28
27
  pw_probe.encode())
29
28
  # the password probe has passed, it will be the value
@@ -40,8 +39,7 @@ class Pkcs12Scanner(AbstractScanner, ABC):
40
39
  candidate.line_data_list[0].value = value
41
40
  candidate.line_data_list[0].value_start = 1
42
41
  candidate.line_data_list[0].value_end = 1 + len(candidate.line_data_list[0].value)
43
- candidates.append(candidate)
44
- break
42
+ return [candidate]
45
43
  except Exception as pkcs_exc:
46
44
  logger.debug(f"{data_provider.file_path}:{pw_probe}:{pkcs_exc}")
47
- return candidates
45
+ return None
@@ -19,11 +19,11 @@ class XmlScanner(AbstractScanner, ABC):
19
19
  depth: int, #
20
20
  recursive_limit_size: int) -> Optional[List[Candidate]]:
21
21
  """Tries to represent data as xml text and scan as text lines"""
22
- if data_provider.represent_as_xml():
22
+ if result := data_provider.represent_as_xml():
23
23
  string_data_provider = StringContentProvider(lines=data_provider.lines,
24
24
  line_numbers=data_provider.line_numbers,
25
25
  file_path=data_provider.file_path,
26
26
  file_type=data_provider.file_type,
27
27
  info=f"{data_provider.info}|XML")
28
28
  return self.scanner.scan(string_data_provider)
29
- return None
29
+ return None if result is None else []
@@ -76,9 +76,14 @@ class DataContentProvider(ContentProvider):
76
76
  return self.structure is not None and (isinstance(self.structure, dict) and 0 < len(self.structure.keys())
77
77
  or isinstance(self.structure, list) and 0 < len(self.structure))
78
78
 
79
- def represent_as_structure(self) -> bool:
79
+ def represent_as_structure(self) -> Optional[bool]:
80
80
  """Tries to convert data with many parsers. Stores result to internal structure
81
- Return True if some structure found
81
+
82
+ Return:
83
+ True if some structure found
84
+ False if no data found
85
+ None if the format is not acceptable
86
+
82
87
  """
83
88
  if MIN_DATA_LEN > len(self.text):
84
89
  return False
@@ -134,13 +139,15 @@ class DataContentProvider(ContentProvider):
134
139
  if self.__is_structure():
135
140
  return True
136
141
  # # # None of above
137
- return False
142
+ return None
138
143
 
139
- def represent_as_xml(self) -> bool:
144
+ def represent_as_xml(self) -> Optional[bool]:
140
145
  """Tries to read data as xml
141
146
 
142
147
  Return:
143
148
  True if reading was successful
149
+ False if no data found
150
+ None if the format is not acceptable
144
151
 
145
152
  """
146
153
  if MIN_XML_LEN > len(self.text):
@@ -150,14 +157,12 @@ class DataContentProvider(ContentProvider):
150
157
  xml_text = self.text.splitlines()
151
158
  self.lines, self.line_numbers = Util.get_xml_from_lines(xml_text)
152
159
  logger.debug("CONVERTED from xml")
160
+ return bool(self.lines and self.line_numbers)
153
161
  else:
154
162
  logger.debug("Weak data to parse as XML")
155
- return False
156
163
  except Exception as exc:
157
164
  logger.debug("Cannot parse as XML:%s %s", exc, self.data)
158
- else:
159
- return bool(self.lines and self.line_numbers)
160
- return False
165
+ return None
161
166
 
162
167
  def _check_multiline_cell(self, cell: Tag) -> Optional[Tuple[int, str]]:
163
168
  """multiline cell will be analysed as text or return single line from cell
@@ -336,11 +341,13 @@ class DataContentProvider(ContentProvider):
336
341
  self, #
337
342
  depth: int, #
338
343
  recursive_limit_size: int, #
339
- keywords_required_substrings_check: Callable[[str], bool]) -> bool:
344
+ keywords_required_substrings_check: Callable[[str], bool]) -> Optional[bool]:
340
345
  """Tries to read data as html
341
346
 
342
347
  Return:
343
348
  True if reading was successful
349
+ False if no data found
350
+ None if the format is not acceptable
344
351
 
345
352
  """
346
353
  try:
@@ -361,13 +368,15 @@ class DataContentProvider(ContentProvider):
361
368
  logger.debug("Cannot parse as HTML:%s %s", exc, self.data)
362
369
  else:
363
370
  return bool(self.lines and self.line_numbers)
364
- return False
371
+ return None
365
372
 
366
- def represent_as_encoded(self) -> bool:
373
+ def represent_as_encoded(self) -> Optional[bool]:
367
374
  """Decodes data from base64. Stores result in decoded
368
375
 
369
376
  Return:
370
377
  True if the data correctly parsed and verified
378
+ False if no data found
379
+ None if the format is not acceptable
371
380
 
372
381
  """
373
382
  if len(self.data) < MIN_ENCODED_DATA_LEN \
@@ -383,7 +392,7 @@ class DataContentProvider(ContentProvider):
383
392
  logger.debug("Cannot decoded as base64:%s %s", exc, self.data)
384
393
  else:
385
394
  return self.decoded is not None and 0 < len(self.decoded)
386
- return False
395
+ return None
387
396
 
388
397
  def yield_analysis_target(self, min_len: int) -> Generator[AnalysisTarget, None, None]:
389
398
  """Return nothing. The class provides only data storage.
@@ -14,7 +14,7 @@ class ValueArrayDictionaryCheck(Filter):
14
14
  `token = {'root'}` would be kept
15
15
  """
16
16
 
17
- PATTERN = re.compile(r"\[('|\")?.+('|\")?\]")
17
+ PATTERN = re.compile(r"\[('|\")?[^,]+('|\")?\]")
18
18
 
19
19
  def __init__(self, config: Config = None) -> None:
20
20
  pass
@@ -32,6 +32,8 @@ class ValueArrayDictionaryCheck(Filter):
32
32
  """
33
33
  if line_data.is_well_quoted_value:
34
34
  return False
35
+ if line_data.wrap and "byte" in line_data.wrap.lower():
36
+ return False
35
37
  if self.PATTERN.search(line_data.value):
36
38
  return True
37
39
  if line_data.wrap and not line_data.is_well_quoted_value and ('[' in line_data.wrap or '(' in line_data.wrap):
@@ -1,7 +1,6 @@
1
1
  import contextlib
2
2
  import json
3
3
 
4
- from credsweeper.common.constants import Chars
5
4
  from credsweeper.config import Config
6
5
  from credsweeper.credentials import LineData
7
6
  from credsweeper.file_handler.analysis_target import AnalysisTarget
@@ -45,7 +44,7 @@ class ValueAzureTokenCheck(Filter):
45
44
  # must be all parts in payload
46
45
  return True
47
46
  min_entropy = ValueEntropyBase64Check.get_min_data_entropy(len(parts[2]))
48
- entropy = Util.get_shannon_entropy(parts[2], Chars.BASE64URL_CHARS.value)
47
+ entropy = Util.get_shannon_entropy(parts[2])
49
48
  # good signature has to be like random bytes
50
49
  return entropy < min_entropy
51
50