credsweeper 1.12.2__tar.gz → 1.13.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of credsweeper might be problematic. Click here for more details.

Files changed (165) hide show
  1. {credsweeper-1.12.2 → credsweeper-1.13.0}/.gitignore +2 -0
  2. {credsweeper-1.12.2 → credsweeper-1.13.0}/PKG-INFO +7 -7
  3. {credsweeper-1.12.2 → credsweeper-1.13.0}/README.md +1 -1
  4. {credsweeper-1.12.2 → credsweeper-1.13.0}/credsweeper/__init__.py +1 -1
  5. {credsweeper-1.12.2 → credsweeper-1.13.0}/credsweeper/__main__.py +15 -8
  6. {credsweeper-1.12.2 → credsweeper-1.13.0}/credsweeper/app.py +7 -2
  7. {credsweeper-1.12.2 → credsweeper-1.13.0}/credsweeper/common/keyword_pattern.py +6 -3
  8. {credsweeper-1.12.2 → credsweeper-1.13.0}/credsweeper/common/morpheme_checklist.txt +11 -1
  9. {credsweeper-1.12.2 → credsweeper-1.13.0}/credsweeper/config/config.py +1 -0
  10. {credsweeper-1.12.2 → credsweeper-1.13.0}/credsweeper/credentials/line_data.py +16 -0
  11. {credsweeper-1.12.2 → credsweeper-1.13.0}/credsweeper/deep_scanner/deep_scanner.py +12 -6
  12. credsweeper-1.13.0/credsweeper/deep_scanner/rtf_scanner.py +41 -0
  13. credsweeper-1.13.0/credsweeper/deep_scanner/strings_scanner.py +52 -0
  14. {credsweeper-1.12.2 → credsweeper-1.13.0}/credsweeper/file_handler/byte_content_provider.py +10 -1
  15. {credsweeper-1.12.2 → credsweeper-1.13.0}/credsweeper/file_handler/file_path_extractor.py +2 -0
  16. {credsweeper-1.12.2 → credsweeper-1.13.0}/credsweeper/file_handler/text_content_provider.py +7 -1
  17. {credsweeper-1.12.2 → credsweeper-1.13.0}/credsweeper/filters/__init__.py +1 -1
  18. {credsweeper-1.12.2 → credsweeper-1.13.0}/credsweeper/filters/group/token_pattern.py +2 -2
  19. {credsweeper-1.12.2 → credsweeper-1.13.0}/credsweeper/filters/group/weird_base36_token.py +2 -2
  20. {credsweeper-1.12.2 → credsweeper-1.13.0}/credsweeper/filters/group/weird_base64_token.py +2 -2
  21. {credsweeper-1.12.2 → credsweeper-1.13.0}/credsweeper/filters/value_file_path_check.py +5 -3
  22. {credsweeper-1.12.2 → credsweeper-1.13.0}/credsweeper/filters/value_github_check.py +3 -2
  23. credsweeper-1.13.0/credsweeper/filters/value_morphemes_check.py +43 -0
  24. {credsweeper-1.12.2 → credsweeper-1.13.0}/credsweeper/filters/value_string_type_check.py +1 -0
  25. {credsweeper-1.12.2 → credsweeper-1.13.0}/credsweeper/ml_model/features/feature.py +1 -18
  26. {credsweeper-1.12.2 → credsweeper-1.13.0}/credsweeper/ml_model/features/file_extension.py +1 -1
  27. {credsweeper-1.12.2 → credsweeper-1.13.0}/credsweeper/ml_model/features/has_html_tag.py +10 -8
  28. {credsweeper-1.12.2 → credsweeper-1.13.0}/credsweeper/ml_model/features/is_secret_numeric.py +4 -3
  29. {credsweeper-1.12.2 → credsweeper-1.13.0}/credsweeper/ml_model/features/rule_name.py +1 -1
  30. credsweeper-1.13.0/credsweeper/ml_model/features/word_in.py +36 -0
  31. {credsweeper-1.12.2 → credsweeper-1.13.0}/credsweeper/ml_model/features/word_in_path.py +2 -3
  32. {credsweeper-1.12.2 → credsweeper-1.13.0}/credsweeper/ml_model/features/word_in_postamble.py +1 -4
  33. {credsweeper-1.12.2 → credsweeper-1.13.0}/credsweeper/ml_model/features/word_in_preamble.py +1 -4
  34. {credsweeper-1.12.2 → credsweeper-1.13.0}/credsweeper/ml_model/features/word_in_transition.py +1 -4
  35. {credsweeper-1.12.2 → credsweeper-1.13.0}/credsweeper/ml_model/features/word_in_value.py +2 -3
  36. {credsweeper-1.12.2 → credsweeper-1.13.0}/credsweeper/ml_model/features/word_in_variable.py +2 -3
  37. {credsweeper-1.12.2 → credsweeper-1.13.0}/credsweeper/ml_model/ml_config.json +15 -8
  38. {credsweeper-1.12.2 → credsweeper-1.13.0}/credsweeper/ml_model/ml_model.onnx +0 -0
  39. {credsweeper-1.12.2 → credsweeper-1.13.0}/credsweeper/ml_model/ml_validator.py +1 -1
  40. {credsweeper-1.12.2 → credsweeper-1.13.0}/credsweeper/rules/config.yaml +129 -128
  41. {credsweeper-1.12.2 → credsweeper-1.13.0}/credsweeper/scanner/scanner.py +12 -7
  42. {credsweeper-1.12.2 → credsweeper-1.13.0}/credsweeper/secret/config.json +18 -5
  43. {credsweeper-1.12.2 → credsweeper-1.13.0}/credsweeper/utils/util.py +19 -16
  44. {credsweeper-1.12.2 → credsweeper-1.13.0}/pyproject.toml +5 -5
  45. credsweeper-1.12.2/credsweeper/filters/value_couple_keyword_check.py +0 -28
  46. credsweeper-1.12.2/credsweeper/ml_model/features/word_in.py +0 -59
  47. {credsweeper-1.12.2 → credsweeper-1.13.0}/LICENSE +0 -0
  48. {credsweeper-1.12.2 → credsweeper-1.13.0}/credsweeper/common/__init__.py +0 -0
  49. {credsweeper-1.12.2 → credsweeper-1.13.0}/credsweeper/common/constants.py +0 -0
  50. {credsweeper-1.12.2 → credsweeper-1.13.0}/credsweeper/common/keyword_checklist.py +0 -0
  51. {credsweeper-1.12.2 → credsweeper-1.13.0}/credsweeper/common/keyword_checklist.txt +0 -0
  52. {credsweeper-1.12.2 → credsweeper-1.13.0}/credsweeper/config/__init__.py +0 -0
  53. {credsweeper-1.12.2 → credsweeper-1.13.0}/credsweeper/credentials/__init__.py +0 -0
  54. {credsweeper-1.12.2 → credsweeper-1.13.0}/credsweeper/credentials/augment_candidates.py +0 -0
  55. {credsweeper-1.12.2 → credsweeper-1.13.0}/credsweeper/credentials/candidate.py +0 -0
  56. {credsweeper-1.12.2 → credsweeper-1.13.0}/credsweeper/credentials/candidate_group_generator.py +0 -0
  57. {credsweeper-1.12.2 → credsweeper-1.13.0}/credsweeper/credentials/candidate_key.py +0 -0
  58. {credsweeper-1.12.2 → credsweeper-1.13.0}/credsweeper/credentials/credential_manager.py +0 -0
  59. {credsweeper-1.12.2 → credsweeper-1.13.0}/credsweeper/deep_scanner/__init__.py +0 -0
  60. {credsweeper-1.12.2 → credsweeper-1.13.0}/credsweeper/deep_scanner/abstract_scanner.py +0 -0
  61. {credsweeper-1.12.2 → credsweeper-1.13.0}/credsweeper/deep_scanner/byte_scanner.py +0 -0
  62. {credsweeper-1.12.2 → credsweeper-1.13.0}/credsweeper/deep_scanner/bzip2_scanner.py +0 -0
  63. {credsweeper-1.12.2 → credsweeper-1.13.0}/credsweeper/deep_scanner/deb_scanner.py +0 -0
  64. {credsweeper-1.12.2 → credsweeper-1.13.0}/credsweeper/deep_scanner/docx_scanner.py +0 -0
  65. {credsweeper-1.12.2 → credsweeper-1.13.0}/credsweeper/deep_scanner/eml_scanner.py +0 -0
  66. {credsweeper-1.12.2 → credsweeper-1.13.0}/credsweeper/deep_scanner/encoder_scanner.py +0 -0
  67. {credsweeper-1.12.2 → credsweeper-1.13.0}/credsweeper/deep_scanner/gzip_scanner.py +0 -0
  68. {credsweeper-1.12.2 → credsweeper-1.13.0}/credsweeper/deep_scanner/html_scanner.py +0 -0
  69. {credsweeper-1.12.2 → credsweeper-1.13.0}/credsweeper/deep_scanner/jclass_scanner.py +0 -0
  70. {credsweeper-1.12.2 → credsweeper-1.13.0}/credsweeper/deep_scanner/jks_scanner.py +0 -0
  71. {credsweeper-1.12.2 → credsweeper-1.13.0}/credsweeper/deep_scanner/lang_scanner.py +0 -0
  72. {credsweeper-1.12.2 → credsweeper-1.13.0}/credsweeper/deep_scanner/lzma_scanner.py +0 -0
  73. {credsweeper-1.12.2 → credsweeper-1.13.0}/credsweeper/deep_scanner/mxfile_scanner.py +0 -0
  74. {credsweeper-1.12.2 → credsweeper-1.13.0}/credsweeper/deep_scanner/patch_scanner.py +0 -0
  75. {credsweeper-1.12.2 → credsweeper-1.13.0}/credsweeper/deep_scanner/pdf_scanner.py +0 -0
  76. {credsweeper-1.12.2 → credsweeper-1.13.0}/credsweeper/deep_scanner/pkcs_scanner.py +0 -0
  77. {credsweeper-1.12.2 → credsweeper-1.13.0}/credsweeper/deep_scanner/pptx_scanner.py +0 -0
  78. {credsweeper-1.12.2 → credsweeper-1.13.0}/credsweeper/deep_scanner/rpm_scanner.py +0 -0
  79. {credsweeper-1.12.2 → credsweeper-1.13.0}/credsweeper/deep_scanner/sqlite3_scanner.py +0 -0
  80. {credsweeper-1.12.2 → credsweeper-1.13.0}/credsweeper/deep_scanner/tar_scanner.py +0 -0
  81. {credsweeper-1.12.2 → credsweeper-1.13.0}/credsweeper/deep_scanner/tmx_scanner.py +0 -0
  82. {credsweeper-1.12.2 → credsweeper-1.13.0}/credsweeper/deep_scanner/xlsx_scanner.py +0 -0
  83. {credsweeper-1.12.2 → credsweeper-1.13.0}/credsweeper/deep_scanner/xml_scanner.py +0 -0
  84. {credsweeper-1.12.2 → credsweeper-1.13.0}/credsweeper/deep_scanner/zip_scanner.py +0 -0
  85. {credsweeper-1.12.2 → credsweeper-1.13.0}/credsweeper/file_handler/__init__.py +0 -0
  86. {credsweeper-1.12.2 → credsweeper-1.13.0}/credsweeper/file_handler/abstract_provider.py +0 -0
  87. {credsweeper-1.12.2 → credsweeper-1.13.0}/credsweeper/file_handler/analysis_target.py +0 -0
  88. {credsweeper-1.12.2 → credsweeper-1.13.0}/credsweeper/file_handler/content_provider.py +0 -0
  89. {credsweeper-1.12.2 → credsweeper-1.13.0}/credsweeper/file_handler/data_content_provider.py +0 -0
  90. {credsweeper-1.12.2 → credsweeper-1.13.0}/credsweeper/file_handler/descriptor.py +0 -0
  91. {credsweeper-1.12.2 → credsweeper-1.13.0}/credsweeper/file_handler/diff_content_provider.py +0 -0
  92. {credsweeper-1.12.2 → credsweeper-1.13.0}/credsweeper/file_handler/files_provider.py +0 -0
  93. {credsweeper-1.12.2 → credsweeper-1.13.0}/credsweeper/file_handler/patches_provider.py +0 -0
  94. {credsweeper-1.12.2 → credsweeper-1.13.0}/credsweeper/file_handler/string_content_provider.py +0 -0
  95. {credsweeper-1.12.2 → credsweeper-1.13.0}/credsweeper/file_handler/struct_content_provider.py +0 -0
  96. {credsweeper-1.12.2 → credsweeper-1.13.0}/credsweeper/filters/filter.py +0 -0
  97. {credsweeper-1.12.2 → credsweeper-1.13.0}/credsweeper/filters/group/__init__.py +0 -0
  98. {credsweeper-1.12.2 → credsweeper-1.13.0}/credsweeper/filters/group/general_keyword.py +0 -0
  99. {credsweeper-1.12.2 → credsweeper-1.13.0}/credsweeper/filters/group/general_pattern.py +0 -0
  100. {credsweeper-1.12.2 → credsweeper-1.13.0}/credsweeper/filters/group/group.py +0 -0
  101. {credsweeper-1.12.2 → credsweeper-1.13.0}/credsweeper/filters/group/password_keyword.py +0 -0
  102. {credsweeper-1.12.2 → credsweeper-1.13.0}/credsweeper/filters/group/url_credentials_group.py +0 -0
  103. {credsweeper-1.12.2 → credsweeper-1.13.0}/credsweeper/filters/line_git_binary_check.py +0 -0
  104. {credsweeper-1.12.2 → credsweeper-1.13.0}/credsweeper/filters/line_specific_key_check.py +0 -0
  105. {credsweeper-1.12.2 → credsweeper-1.13.0}/credsweeper/filters/line_uue_part_check.py +0 -0
  106. {credsweeper-1.12.2 → credsweeper-1.13.0}/credsweeper/filters/value_allowlist_check.py +0 -0
  107. {credsweeper-1.12.2 → credsweeper-1.13.0}/credsweeper/filters/value_array_dictionary_check.py +0 -0
  108. {credsweeper-1.12.2 → credsweeper-1.13.0}/credsweeper/filters/value_atlassian_token_check.py +0 -0
  109. {credsweeper-1.12.2 → credsweeper-1.13.0}/credsweeper/filters/value_azure_token_check.py +0 -0
  110. {credsweeper-1.12.2 → credsweeper-1.13.0}/credsweeper/filters/value_base32_data_check.py +0 -0
  111. {credsweeper-1.12.2 → credsweeper-1.13.0}/credsweeper/filters/value_base64_data_check.py +0 -0
  112. {credsweeper-1.12.2 → credsweeper-1.13.0}/credsweeper/filters/value_base64_encoded_pem_check.py +0 -0
  113. {credsweeper-1.12.2 → credsweeper-1.13.0}/credsweeper/filters/value_base64_key_check.py +0 -0
  114. {credsweeper-1.12.2 → credsweeper-1.13.0}/credsweeper/filters/value_base64_part_check.py +0 -0
  115. {credsweeper-1.12.2 → credsweeper-1.13.0}/credsweeper/filters/value_basic_auth_check.py +0 -0
  116. {credsweeper-1.12.2 → credsweeper-1.13.0}/credsweeper/filters/value_blocklist_check.py +0 -0
  117. {credsweeper-1.12.2 → credsweeper-1.13.0}/credsweeper/filters/value_camel_case_check.py +0 -0
  118. {credsweeper-1.12.2 → credsweeper-1.13.0}/credsweeper/filters/value_dictionary_keyword_check.py +0 -0
  119. {credsweeper-1.12.2 → credsweeper-1.13.0}/credsweeper/filters/value_discord_bot_check.py +0 -0
  120. {credsweeper-1.12.2 → credsweeper-1.13.0}/credsweeper/filters/value_entropy_base32_check.py +0 -0
  121. {credsweeper-1.12.2 → credsweeper-1.13.0}/credsweeper/filters/value_entropy_base36_check.py +0 -0
  122. {credsweeper-1.12.2 → credsweeper-1.13.0}/credsweeper/filters/value_entropy_base64_check.py +0 -0
  123. {credsweeper-1.12.2 → credsweeper-1.13.0}/credsweeper/filters/value_entropy_base_check.py +0 -0
  124. {credsweeper-1.12.2 → credsweeper-1.13.0}/credsweeper/filters/value_grafana_check.py +0 -0
  125. {credsweeper-1.12.2 → credsweeper-1.13.0}/credsweeper/filters/value_grafana_service_check.py +0 -0
  126. {credsweeper-1.12.2 → credsweeper-1.13.0}/credsweeper/filters/value_hex_number_check.py +0 -0
  127. {credsweeper-1.12.2 → credsweeper-1.13.0}/credsweeper/filters/value_jfrog_token_check.py +0 -0
  128. {credsweeper-1.12.2 → credsweeper-1.13.0}/credsweeper/filters/value_json_web_key_check.py +0 -0
  129. {credsweeper-1.12.2 → credsweeper-1.13.0}/credsweeper/filters/value_json_web_token_check.py +0 -0
  130. {credsweeper-1.12.2 → credsweeper-1.13.0}/credsweeper/filters/value_last_word_check.py +0 -0
  131. {credsweeper-1.12.2 → credsweeper-1.13.0}/credsweeper/filters/value_length_check.py +0 -0
  132. {credsweeper-1.12.2 → credsweeper-1.13.0}/credsweeper/filters/value_method_check.py +0 -0
  133. {credsweeper-1.12.2 → credsweeper-1.13.0}/credsweeper/filters/value_not_allowed_pattern_check.py +0 -0
  134. {credsweeper-1.12.2 → credsweeper-1.13.0}/credsweeper/filters/value_not_part_encoded_check.py +0 -0
  135. {credsweeper-1.12.2 → credsweeper-1.13.0}/credsweeper/filters/value_number_check.py +0 -0
  136. {credsweeper-1.12.2 → credsweeper-1.13.0}/credsweeper/filters/value_pattern_check.py +0 -0
  137. {credsweeper-1.12.2 → credsweeper-1.13.0}/credsweeper/filters/value_similarity_check.py +0 -0
  138. {credsweeper-1.12.2 → credsweeper-1.13.0}/credsweeper/filters/value_split_keyword_check.py +0 -0
  139. {credsweeper-1.12.2 → credsweeper-1.13.0}/credsweeper/filters/value_token_base32_check.py +0 -0
  140. {credsweeper-1.12.2 → credsweeper-1.13.0}/credsweeper/filters/value_token_base36_check.py +0 -0
  141. {credsweeper-1.12.2 → credsweeper-1.13.0}/credsweeper/filters/value_token_base64_check.py +0 -0
  142. {credsweeper-1.12.2 → credsweeper-1.13.0}/credsweeper/filters/value_token_base_check.py +0 -0
  143. {credsweeper-1.12.2 → credsweeper-1.13.0}/credsweeper/filters/value_token_check.py +0 -0
  144. {credsweeper-1.12.2 → credsweeper-1.13.0}/credsweeper/logger/__init__.py +0 -0
  145. {credsweeper-1.12.2 → credsweeper-1.13.0}/credsweeper/logger/logger.py +0 -0
  146. {credsweeper-1.12.2 → credsweeper-1.13.0}/credsweeper/ml_model/__init__.py +0 -0
  147. {credsweeper-1.12.2 → credsweeper-1.13.0}/credsweeper/ml_model/features/__init__.py +0 -0
  148. {credsweeper-1.12.2 → credsweeper-1.13.0}/credsweeper/ml_model/features/entropy_evaluation.py +0 -0
  149. {credsweeper-1.12.2 → credsweeper-1.13.0}/credsweeper/ml_model/features/length_of_attribute.py +0 -0
  150. {credsweeper-1.12.2 → credsweeper-1.13.0}/credsweeper/ml_model/features/morpheme_dense.py +0 -0
  151. {credsweeper-1.12.2 → credsweeper-1.13.0}/credsweeper/ml_model/features/rule_severity.py +0 -0
  152. {credsweeper-1.12.2 → credsweeper-1.13.0}/credsweeper/ml_model/features/search_in_attribute.py +0 -0
  153. {credsweeper-1.12.2 → credsweeper-1.13.0}/credsweeper/py.typed +0 -0
  154. {credsweeper-1.12.2 → credsweeper-1.13.0}/credsweeper/rules/__init__.py +0 -0
  155. {credsweeper-1.12.2 → credsweeper-1.13.0}/credsweeper/rules/rule.py +0 -0
  156. {credsweeper-1.12.2 → credsweeper-1.13.0}/credsweeper/scanner/__init__.py +0 -0
  157. {credsweeper-1.12.2 → credsweeper-1.13.0}/credsweeper/scanner/scan_type/__init__.py +0 -0
  158. {credsweeper-1.12.2 → credsweeper-1.13.0}/credsweeper/scanner/scan_type/multi_pattern.py +0 -0
  159. {credsweeper-1.12.2 → credsweeper-1.13.0}/credsweeper/scanner/scan_type/pem_key_pattern.py +0 -0
  160. {credsweeper-1.12.2 → credsweeper-1.13.0}/credsweeper/scanner/scan_type/scan_type.py +0 -0
  161. {credsweeper-1.12.2 → credsweeper-1.13.0}/credsweeper/scanner/scan_type/single_pattern.py +0 -0
  162. {credsweeper-1.12.2 → credsweeper-1.13.0}/credsweeper/secret/log.yaml +0 -0
  163. {credsweeper-1.12.2 → credsweeper-1.13.0}/credsweeper/utils/__init__.py +0 -0
  164. {credsweeper-1.12.2 → credsweeper-1.13.0}/credsweeper/utils/hop_stat.py +0 -0
  165. {credsweeper-1.12.2 → credsweeper-1.13.0}/credsweeper/utils/pem_key_detector.py +0 -0
@@ -149,3 +149,5 @@ fuzz/corpus/*
149
149
  # experiment result and data
150
150
  /experiment/results/*
151
151
  /experiment/data/*
152
+ *.out
153
+
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: credsweeper
3
- Version: 1.12.2
3
+ Version: 1.13.0
4
4
  Summary: Credential Sweeper
5
5
  Project-URL: Homepage, https://github.com/Samsung/CredSweeper
6
6
  Project-URL: Bug Tracker, https://github.com/Samsung/CredSweeper/issues
@@ -10,13 +10,12 @@ Classifier: License :: OSI Approved :: MIT License
10
10
  Classifier: Operating System :: OS Independent
11
11
  Classifier: Programming Language :: Python :: 3
12
12
  Classifier: Programming Language :: Python :: 3 :: Only
13
- Classifier: Programming Language :: Python :: 3.9
14
13
  Classifier: Programming Language :: Python :: 3.10
15
14
  Classifier: Programming Language :: Python :: 3.11
16
15
  Classifier: Programming Language :: Python :: 3.12
17
16
  Classifier: Topic :: Security
18
17
  Classifier: Topic :: Software Development :: Quality Assurance
19
- Requires-Python: >=3.9
18
+ Requires-Python: >=3.10
20
19
  Requires-Dist: base58
21
20
  Requires-Dist: beautifulsoup4>=4.11.0
22
21
  Requires-Dist: colorama
@@ -24,10 +23,10 @@ Requires-Dist: cryptography
24
23
  Requires-Dist: gitpython
25
24
  Requires-Dist: humanfriendly
26
25
  Requires-Dist: lxml
27
- Requires-Dist: numpy<2.0.0
26
+ Requires-Dist: numpy
28
27
  Requires-Dist: odfpy
29
- Requires-Dist: onnxruntime; platform_system != 'Windows'
30
- Requires-Dist: onnxruntime==1.19.2; platform_system == 'Windows'
28
+ Requires-Dist: onnxruntime; platform_system != 'Windows' or python_version != '3.12'
29
+ Requires-Dist: onnxruntime==1.19.2; platform_system == 'Windows' and python_version == '3.12'
31
30
  Requires-Dist: openpyxl
32
31
  Requires-Dist: pandas
33
32
  Requires-Dist: pdfminer-six
@@ -38,6 +37,7 @@ Requires-Dist: python-docx
38
37
  Requires-Dist: python-pptx
39
38
  Requires-Dist: pyyaml
40
39
  Requires-Dist: rpmfile
40
+ Requires-Dist: striprtf
41
41
  Requires-Dist: whatthepatch
42
42
  Requires-Dist: xlrd
43
43
  Description-Content-Type: text/markdown
@@ -90,7 +90,7 @@ Full documentation can be found here: <https://credsweeper.readthedocs.io/>
90
90
 
91
91
  ### Main Requirements
92
92
 
93
- - Python 3.9, 3.10, 3.11, 3.12
93
+ - Python 3.10, 3.11, 3.12
94
94
 
95
95
  ### Installation
96
96
 
@@ -46,7 +46,7 @@ Full documentation can be found here: <https://credsweeper.readthedocs.io/>
46
46
 
47
47
  ### Main Requirements
48
48
 
49
- - Python 3.9, 3.10, 3.11, 3.12
49
+ - Python 3.10, 3.11, 3.12
50
50
 
51
51
  ### Installation
52
52
 
@@ -24,4 +24,4 @@ __all__ = [
24
24
  "__version__"
25
25
  ]
26
26
 
27
- __version__ = "1.12.2"
27
+ __version__ = "1.13.0"
@@ -1,4 +1,5 @@
1
1
  import binascii
2
+ import contextlib
2
3
  import logging
3
4
  import os
4
5
  import sys
@@ -34,24 +35,24 @@ def positive_int(value: Any) -> int:
34
35
  return int_value
35
36
 
36
37
 
37
- def threshold_or_float(arg: str) -> Union[float, ThresholdPreset]:
38
+ def threshold_or_float_or_zero(arg: str) -> Union[int, float, ThresholdPreset]:
38
39
  """Return ThresholdPreset or a float from the input string
39
40
 
40
41
  Args:
41
42
  arg: string that either a float or one of allowed values in ThresholdPreset
42
43
 
43
44
  Returns:
44
- float if arg convertible to float, ThresholdPreset if one of the allowed values
45
+ int = 0 to disable ML validator, float if arg convertible to float, ThresholdPreset if one of the allowed values
45
46
 
46
47
  Raises:
47
48
  ArgumentTypeError: if arg cannot be interpreted as float or ThresholdPreset
48
49
 
49
50
  """
50
51
  allowed_presents = [e.value for e in ThresholdPreset]
51
- try:
52
+ if '0' == arg:
53
+ return 0
54
+ with contextlib.suppress(ValueError):
52
55
  return float(arg) # try convert to float
53
- except ValueError:
54
- pass
55
56
  if arg in allowed_presents:
56
57
  return ThresholdPreset[arg]
57
58
  raise ArgumentTypeError(f"value must be a float or one of {allowed_presents}")
@@ -158,6 +159,10 @@ def get_arguments() -> Namespace:
158
159
  help="find files by predefined extension",
159
160
  dest="find_by_ext",
160
161
  action="store_true")
162
+ parser.add_argument("--pedantic",
163
+ help="process files without extension",
164
+ action=BooleanOptionalAction,
165
+ default=False)
161
166
  parser.add_argument("--depth",
162
167
  help="additional recursive search in data (experimental)",
163
168
  type=positive_int,
@@ -172,11 +177,11 @@ def get_arguments() -> Namespace:
172
177
  "The lower the threshold - the more credentials will be reported. "
173
178
  f"Allowed values: float between 0 and 1, or any of {[e.value for e in ThresholdPreset]} "
174
179
  "(default: medium)",
175
- type=threshold_or_float,
180
+ type=threshold_or_float_or_zero,
176
181
  default=ThresholdPreset.medium,
177
182
  dest="ml_threshold",
178
183
  required=False,
179
- metavar="FLOAT_OR_STR")
184
+ metavar="THRESHOLD_OR_FLOAT_OR_ZERO")
180
185
  parser.add_argument("--ml_batch_size",
181
186
  "-b",
182
187
  help="batch size for model inference (default: 16)",
@@ -299,6 +304,7 @@ def get_credsweeper(args: Namespace) -> CredSweeper:
299
304
  ml_model=args.ml_model,
300
305
  ml_providers=args.ml_providers,
301
306
  find_by_ext=args.find_by_ext,
307
+ pedantic=args.pedantic,
302
308
  depth=args.depth,
303
309
  doc=args.doc,
304
310
  severity=args.severity,
@@ -335,7 +341,8 @@ def scan(args: Namespace, content_provider: AbstractProvider) -> int:
335
341
  def get_commit_providers(commit: Commit, repo: Repo) -> Sequence[ByteContentProvider]:
336
342
  """Process a commit and for providers"""
337
343
  result = {}
338
- ancestors = commit.parents or [repo.tree()]
344
+ # use the hardcoded sha1 until sha256 objects are not supported by GitPython
345
+ ancestors = commit.parents or [repo.tree("4b825dc642cb6eb9a060e54bf8d69288fbee4904")]
339
346
  for parent in ancestors:
340
347
  for diff in parent.diff(commit):
341
348
  # only result files
@@ -52,11 +52,12 @@ class CredSweeper:
52
52
  use_filters: bool = True,
53
53
  pool_count: int = 1,
54
54
  ml_batch_size: Optional[int] = None,
55
- ml_threshold: Union[float, ThresholdPreset] = ThresholdPreset.medium,
55
+ ml_threshold: Union[int, float, ThresholdPreset] = ThresholdPreset.medium,
56
56
  ml_config: Union[None, str, Path] = None,
57
57
  ml_model: Union[None, str, Path] = None,
58
58
  ml_providers: Optional[str] = None,
59
59
  find_by_ext: bool = False,
60
+ pedantic: bool = False,
60
61
  depth: int = 0,
61
62
  doc: bool = False,
62
63
  severity: Union[Severity, str] = Severity.INFO,
@@ -86,6 +87,7 @@ class CredSweeper:
86
87
  ml_model: str or Path to set custom ml model
87
88
  ml_providers: str - comma separated list with providers
88
89
  find_by_ext: boolean - files will be reported by extension
90
+ pedantic: boolean - scan all files
89
91
  depth: int - how deep container files will be scanned
90
92
  doc: boolean - document-specific scanning
91
93
  severity: Severity - minimum severity level of rule
@@ -103,6 +105,7 @@ class CredSweeper:
103
105
  config_dict = self._get_config_dict(config_path=config_path,
104
106
  use_filters=use_filters,
105
107
  find_by_ext=find_by_ext,
108
+ pedantic=pedantic,
106
109
  depth=depth,
107
110
  doc=doc,
108
111
  severity=_severity,
@@ -145,6 +148,7 @@ class CredSweeper:
145
148
  config_path: Optional[str], #
146
149
  use_filters: bool, #
147
150
  find_by_ext: bool, #
151
+ pedantic: bool, #
148
152
  depth: int, #
149
153
  doc: bool, #
150
154
  severity: Severity, #
@@ -155,6 +159,7 @@ class CredSweeper:
155
159
  config_dict["use_filters"] = use_filters
156
160
  config_dict["find_by_ext"] = find_by_ext
157
161
  config_dict["size_limit"] = size_limit
162
+ config_dict["pedantic"] = pedantic
158
163
  config_dict["depth"] = depth
159
164
  config_dict["doc"] = doc
160
165
  config_dict["severity"] = severity.value
@@ -169,7 +174,7 @@ class CredSweeper:
169
174
  # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
170
175
 
171
176
  def _use_ml_validation(self) -> bool:
172
- if isinstance(self.ml_threshold, (float, int)) and 0 >= self.ml_threshold:
177
+ if isinstance(self.ml_threshold, int) and 0 == self.ml_threshold:
173
178
  logger.info("ML validation is disabled")
174
179
  return False
175
180
  if not self.credential_manager.candidates:
@@ -3,7 +3,10 @@ import re
3
3
 
4
4
  class KeywordPattern:
5
5
  """Pattern set of keyword types"""
6
- directive = r"(?P<directive>(?:(?:[#%]define|%global)(?:\s|\\t)|\bset))?"
6
+ directive = r"(?P<directive>(?:" \
7
+ r"(?:[#%]define|define(?=(\s|\\{1,8}[tnr])*\()|%global)" \
8
+ r"(?:\s?\(|\s|\\{1,8}[tnr]){1,8}|\bset(?=\b|\w*(\s|\\{1,8}[tnr])*\()" \
9
+ r"))?"
7
10
  key_left = r"(?:\\[nrt]|(\\\\*u00|%)[0-9a-f]{2}|\s)*" \
8
11
  r"(?P<variable>(([\"'`]{1,8}[^:=\"'`}<>\\/&?]*|[^:=\"'`}<>\s()\\/&?;,%]*)"
9
12
  # keyword will be inserted here
@@ -13,7 +16,7 @@ class KeywordPattern:
13
16
  r")" # <variable>
14
17
  separator = r"(?(directive)|(\s|\\{1,8}[tnr])*\]?(\s|\\{1,8}[tnr])*)" \
15
18
  r"(?P<separator>:(\s[a-z]{3,9}[?]?\s)?=|:(?!:)|=(>|&gt;|(\\\\*u00|%)26gt;)|!==|!=|===|==|=~|=" \
16
- r"|(?(directive)(\\t|\s|\((?!\))){1,80}|%3d))" \
19
+ r"|(?(directive)(,|\\t|\s|\((?!\))){1,80}|%3d))" \
17
20
  r"(\s|\\{1,8}[tnr])*"
18
21
  # might be curly, square or parenthesis with words before
19
22
  wrap = r"(?P<wrap>(" \
@@ -23,7 +26,7 @@ class KeywordPattern:
23
26
  r"\s*" \
24
27
  r"(\[(?!\])|\((?!\))|\{(?!\}))" \
25
28
  r"(\s|\\{1,8}[tnr])*" \
26
- r"(?(get)('[^']+'|\"[^\"]+\")\s*,\s*|)" \
29
+ r"(?(get)('[^']{1,31}'|\"[^\"]{1,31}\")\s*,\s*|)" \
27
30
  r"([0-9a-z_]{1,32}\s*[:=]\s*)?" \
28
31
  r"){1,8})?"
29
32
  string_prefix = r"(((b|r|br|rb|u|f|rf|fr|l|@)(?=(\\*[\"'`])))?"
@@ -14,11 +14,15 @@
14
14
  /var
15
15
  000
16
16
  111
17
+ 14159265
18
+ 18284590
17
19
  222
18
20
  333
19
21
  444
20
22
  555
23
+ 65358979
21
24
  666
25
+ 71828182
22
26
  777
23
27
  80211
24
28
  888
@@ -195,7 +199,7 @@ aux
195
199
  avail
196
200
  avatar
197
201
  aver
198
- awesome
202
+ awesom
199
203
  axis
200
204
  azure
201
205
  back
@@ -498,6 +502,7 @@ dust
498
502
  dvb
499
503
  dynamic
500
504
  dynamo
505
+ eadbee
501
506
  easin
502
507
  easy
503
508
  ecdhe
@@ -790,6 +795,7 @@ jpg_
790
795
  json
791
796
  jump
792
797
  justif
798
+ kafka
793
799
  kerberos
794
800
  kernel
795
801
  key
@@ -799,6 +805,7 @@ kind
799
805
  kinesis
800
806
  kirk
801
807
  know
808
+ knox
802
809
  kris
803
810
  lab
804
811
  lag
@@ -1318,6 +1325,7 @@ sock
1318
1325
  soft
1319
1326
  solid
1320
1327
  solve
1328
+ some
1321
1329
  sony
1322
1330
  sort
1323
1331
  sound
@@ -1430,6 +1438,7 @@ tio
1430
1438
  tish
1431
1439
  title
1432
1440
  titud
1441
+ tizen
1433
1442
  tmp/
1434
1443
  to_
1435
1444
  tod
@@ -1576,5 +1585,6 @@ you
1576
1585
  zeppelin
1577
1586
  zero
1578
1587
  zing
1588
+ zigbee
1579
1589
  zona
1580
1590
  zorro
@@ -35,6 +35,7 @@ class Config:
35
35
  self.candidate_output: List[str] = config["candidate_output"]
36
36
  self.find_by_ext: bool = config["find_by_ext"]
37
37
  self.size_limit: Optional[int] = parse_size(config["size_limit"]) if config["size_limit"] is not None else None
38
+ self.pedantic: bool = bool(config["pedantic"])
38
39
  self.depth: int = int(config["depth"])
39
40
  self.doc: bool = config["doc"]
40
41
  self.severity: Severity = Severity.get(config.get("severity"))
@@ -163,6 +163,7 @@ class LineData:
163
163
  self.clean_url_parameters()
164
164
  self.clean_bash_parameters()
165
165
  self.clean_toml_parameters()
166
+ self.clean_tag_parameters()
166
167
  if 0 <= self.value_start and 0 <= self.value_end and len(self.value) < len(_value):
167
168
  start = _value.find(self.value)
168
169
  self.value_start += start
@@ -232,6 +233,21 @@ class LineData:
232
233
  self.value = self.value[:-1]
233
234
  cleaning_required = True
234
235
 
236
+ def clean_tag_parameters(self) -> None:
237
+ """Remove closing tag from value if the opened is somewhere before in line"""
238
+ cleaning_required = self.value and self.value.endswith('>')
239
+ while cleaning_required:
240
+ closing_tag_pos = self.value.rfind("</")
241
+ if 0 <= closing_tag_pos:
242
+ # use `<a` to avoid tag parameters
243
+ opening_tag_prefix = f"<{self.value[closing_tag_pos + 2:-1]}"
244
+ if cleaning_required := (opening_tag_prefix not in self.value
245
+ and 0 <= self.line.find(opening_tag_prefix, 0, self.value_start)):
246
+ self.value = self.value[:closing_tag_pos]
247
+ cleaning_required = self.value and self.value.endswith('>')
248
+ else:
249
+ break
250
+
235
251
  def sanitize_variable(self) -> None:
236
252
  """Remove trailing spaces, dashes and quotations around the variable. Correct position."""
237
253
  sanitized_var_len = 0
@@ -1,7 +1,6 @@
1
1
  import logging
2
2
  from typing import List, Any, Tuple
3
3
 
4
- from credsweeper.common.constants import MIN_DATA_LEN
5
4
  from credsweeper.config.config import Config
6
5
  from credsweeper.scanner.scanner import Scanner
7
6
  from credsweeper.utils.util import Util
@@ -23,7 +22,9 @@ from .pdf_scanner import PdfScanner
23
22
  from .pkcs_scanner import PkcsScanner
24
23
  from .pptx_scanner import PptxScanner
25
24
  from .rpm_scanner import RpmScanner
25
+ from .rtf_scanner import RtfScanner
26
26
  from .sqlite3_scanner import Sqlite3Scanner
27
+ from .strings_scanner import StringsScanner
27
28
  from .tar_scanner import TarScanner
28
29
  from .tmx_scanner import TmxScanner
29
30
  from .xlsx_scanner import XlsxScanner
@@ -49,8 +50,10 @@ class DeepScanner(
49
50
  PdfScanner, #
50
51
  PkcsScanner, #
51
52
  PptxScanner, #
53
+ RtfScanner, #
52
54
  RpmScanner, #
53
55
  Sqlite3Scanner, #
56
+ StringsScanner, #
54
57
  TarScanner, #
55
58
  DebScanner, #
56
59
  XmlScanner, #
@@ -133,6 +136,9 @@ class DeepScanner(
133
136
  deep_scanners.append(Sqlite3Scanner)
134
137
  elif Util.is_asn1(data):
135
138
  deep_scanners.append(PkcsScanner)
139
+ elif Util.is_rtf(data):
140
+ deep_scanners.append(RtfScanner)
141
+ fallback_scanners.append(ByteScanner)
136
142
  elif Util.is_xml(data):
137
143
  if Util.is_html(data):
138
144
  deep_scanners.append(HtmlScanner)
@@ -158,9 +164,6 @@ class DeepScanner(
158
164
  deep_scanners.append(PatchScanner)
159
165
  fallback_scanners.append(EmlScanner)
160
166
  fallback_scanners.append(ByteScanner)
161
- elif Util.is_known(data):
162
- # the format is known but cannot be scanned
163
- pass
164
167
  elif not Util.is_binary(data):
165
168
  if 0 < depth:
166
169
  deep_scanners.append(PatchScanner)
@@ -168,6 +171,9 @@ class DeepScanner(
168
171
  deep_scanners.append(LangScanner)
169
172
  deep_scanners.append(ByteScanner)
170
173
  else:
171
- logger.warning("Cannot apply a deep scanner for type %s prefix %s %d", descriptor,
172
- repr(data[:MIN_DATA_LEN]), len(data))
174
+ if 0 < depth:
175
+ deep_scanners.append(StringsScanner)
176
+ else:
177
+ logger.warning("Cannot apply a deep scanner for type %s prefix %s %d", descriptor, repr(data[:32]),
178
+ len(data))
173
179
  return deep_scanners, fallback_scanners
@@ -0,0 +1,41 @@
1
+ import logging
2
+ from abc import ABC
3
+ from typing import List, Optional
4
+
5
+ from striprtf import striprtf
6
+
7
+ from credsweeper.credentials.candidate import Candidate
8
+ from credsweeper.deep_scanner.abstract_scanner import AbstractScanner
9
+ from credsweeper.file_handler.data_content_provider import DataContentProvider
10
+ from credsweeper.file_handler.string_content_provider import StringContentProvider
11
+ from credsweeper.utils.util import Util
12
+
13
+ logger = logging.getLogger(__name__)
14
+
15
+
16
+ class RtfScanner(AbstractScanner, ABC):
17
+ """Implements squash file system scanning"""
18
+
19
+ @staticmethod
20
+ def get_lines(text: str) -> List[str]:
21
+ """Extracts text lines from RTF format"""
22
+ rtf_text = striprtf.rtf_to_text(text)
23
+ lines = Util.split_text(rtf_text)
24
+ return lines
25
+
26
+ def data_scan(
27
+ self, #
28
+ data_provider: DataContentProvider, #
29
+ depth: int, #
30
+ recursive_limit_size: int) -> Optional[List[Candidate]]:
31
+ """Scans data as RTF"""
32
+ try:
33
+ string_data_provider = StringContentProvider(lines=RtfScanner.get_lines(data_provider.text),
34
+ file_path=data_provider.file_path,
35
+ file_type=data_provider.file_type,
36
+ info=f"{data_provider.info}|RTF")
37
+ rtf_candidates = self.scanner.scan(string_data_provider)
38
+ return rtf_candidates
39
+ except Exception as rtf_exc:
40
+ logger.error(f"{data_provider.file_path}:{rtf_exc}")
41
+ return None
@@ -0,0 +1,52 @@
1
+ import logging
2
+ from abc import ABC
3
+ from typing import List, Optional, Tuple
4
+
5
+ from credsweeper.common.constants import MIN_DATA_LEN
6
+ from credsweeper.credentials.candidate import Candidate
7
+ from credsweeper.deep_scanner.abstract_scanner import AbstractScanner
8
+ from credsweeper.file_handler.data_content_provider import DataContentProvider
9
+ from credsweeper.file_handler.string_content_provider import StringContentProvider
10
+
11
+ logger = logging.getLogger(__name__)
12
+
13
+
14
+ class StringsScanner(AbstractScanner, ABC):
15
+ """Implements known binary file scanning with ASCII strings representations"""
16
+
17
+ @staticmethod
18
+ def get_strings(data: bytes) -> List[Tuple[str, int]]:
19
+ """Processes binary to found ASCII strings. Use offset instead line number."""
20
+ strings = []
21
+ offset = 0
22
+ line = ''
23
+ for n, x in enumerate(data):
24
+ if 0x09 == x or 0x20 <= x <= 0x7E:
25
+ # TAB, SPACE and visible ASCII symbols
26
+ if not offset:
27
+ # for line number
28
+ offset = n
29
+ line += chr(x)
30
+ elif MIN_DATA_LEN <= len(line):
31
+ strings.append((line, offset))
32
+ offset = 0
33
+ line = ''
34
+ if MIN_DATA_LEN <= len(line):
35
+ strings.append((line, offset))
36
+ return strings
37
+
38
+ def data_scan(
39
+ self, #
40
+ data_provider: DataContentProvider, #
41
+ depth: int, #
42
+ recursive_limit_size: int) -> Optional[List[Candidate]]:
43
+ """Extracts data file from .ar (debian) archive and launches data_scan"""
44
+
45
+ if strings := StringsScanner.get_strings(data_provider.data):
46
+ string_data_provider = StringContentProvider(lines=[x[0] for x in strings],
47
+ line_numbers=[x[1] for x in strings],
48
+ file_path=data_provider.file_path,
49
+ file_type=data_provider.file_type,
50
+ info=f"{data_provider.info}|STRINGS")
51
+ return self.scanner.scan(string_data_provider)
52
+ return None if strings is None else []
@@ -1,3 +1,4 @@
1
+ import logging
1
2
  from functools import cached_property
2
3
  from typing import List, Optional, Generator
3
4
 
@@ -5,6 +6,8 @@ from credsweeper.file_handler.analysis_target import AnalysisTarget
5
6
  from credsweeper.file_handler.content_provider import ContentProvider
6
7
  from credsweeper.utils.util import Util
7
8
 
9
+ logger = logging.getLogger(__name__)
10
+
8
11
 
9
12
  class ByteContentProvider(ContentProvider):
10
13
  """Allow to scan byte sequence instead of extra reading a file"""
@@ -42,7 +45,13 @@ class ByteContentProvider(ContentProvider):
42
45
  def lines(self) -> List[str]:
43
46
  """lines RO getter for ByteContentProvider"""
44
47
  if self.__lines is None:
45
- self.__lines = Util.decode_bytes(self.__data)
48
+ text = Util.decode_text(self.__data)
49
+ if text is None:
50
+ logger.warning("Binary data detected %s %s %s", self.file_path, self.info,
51
+ repr(self.__data[:32]) if isinstance(self.__data, bytes) else "NONE")
52
+ self.__lines = []
53
+ else:
54
+ self.__lines = Util.split_text(text)
46
55
  return self.__lines if self.__lines is not None else []
47
56
 
48
57
  def yield_analysis_target(self, min_len: int) -> Generator[AnalysisTarget, None, None]:
@@ -127,6 +127,8 @@ class FilePathExtractor:
127
127
  Return:
128
128
  True when the file full path should be excluded according config
129
129
  """
130
+ if config.pedantic:
131
+ return False
130
132
  path = path.replace('\\', '/')
131
133
  lower_path = path.lower()
132
134
  if config.not_allowed_path_pattern.match(lower_path):
@@ -54,7 +54,13 @@ class TextContentProvider(ContentProvider):
54
54
  def lines(self) -> Optional[List[str]]:
55
55
  """lines getter for TextContentProvider"""
56
56
  if self.__lines is None:
57
- self.__lines = Util.decode_bytes(self.data)
57
+ text = Util.decode_text(self.data)
58
+ if text is None:
59
+ logger.warning("Binary file detected %s %s %s", self.file_path, self.info,
60
+ repr(self.__data[:32]) if isinstance(self.__data, bytes) else "NONE")
61
+ self.__lines = []
62
+ else:
63
+ self.__lines = Util.split_text(text)
58
64
  return self.__lines if self.__lines is not None else []
59
65
 
60
66
  def yield_analysis_target(self, min_len: int) -> Generator[AnalysisTarget, None, None]:
@@ -13,7 +13,6 @@ from credsweeper.filters.value_base64_part_check import ValueBase64PartCheck
13
13
  from credsweeper.filters.value_basic_auth_check import ValueBasicAuthCheck
14
14
  from credsweeper.filters.value_blocklist_check import ValueBlocklistCheck
15
15
  from credsweeper.filters.value_camel_case_check import ValueCamelCaseCheck
16
- from credsweeper.filters.value_couple_keyword_check import ValueCoupleKeywordCheck
17
16
  from credsweeper.filters.value_dictionary_keyword_check import ValueDictionaryKeywordCheck
18
17
  from credsweeper.filters.value_discord_bot_check import ValueDiscordBotCheck
19
18
  from credsweeper.filters.value_entropy_base32_check import ValueEntropyBase32Check
@@ -30,6 +29,7 @@ from credsweeper.filters.value_json_web_token_check import ValueJsonWebTokenChec
30
29
  from credsweeper.filters.value_last_word_check import ValueLastWordCheck
31
30
  from credsweeper.filters.value_length_check import ValueLengthCheck
32
31
  from credsweeper.filters.value_method_check import ValueMethodCheck
32
+ from credsweeper.filters.value_morphemes_check import ValueMorphemesCheck
33
33
  from credsweeper.filters.value_not_allowed_pattern_check import ValueNotAllowedPatternCheck
34
34
  from credsweeper.filters.value_not_part_encoded_check import ValueNotPartEncodedCheck
35
35
  from credsweeper.filters.value_number_check import ValueNumberCheck
@@ -1,6 +1,6 @@
1
1
  from credsweeper.common.constants import GroupType
2
2
  from credsweeper.config.config import Config
3
- from credsweeper.filters import ValueCoupleKeywordCheck, ValueCamelCaseCheck, ValueNumberCheck, ValuePatternCheck
3
+ from credsweeper.filters import ValueMorphemesCheck, ValueCamelCaseCheck, ValueNumberCheck, ValuePatternCheck
4
4
  from credsweeper.filters.group.group import Group
5
5
 
6
6
 
@@ -10,7 +10,7 @@ class TokenPattern(Group):
10
10
  def __init__(self, config: Config) -> None:
11
11
  super().__init__(config, GroupType.DEFAULT)
12
12
  self.filters = [
13
- ValueCoupleKeywordCheck(),
13
+ ValueMorphemesCheck(),
14
14
  ValueNumberCheck(),
15
15
  ValueCamelCaseCheck(),
16
16
  ValuePatternCheck(),
@@ -1,6 +1,6 @@
1
1
  from credsweeper.common.constants import GroupType
2
2
  from credsweeper.config.config import Config
3
- from credsweeper.filters import ValueCoupleKeywordCheck, ValuePatternCheck, ValueNumberCheck, ValueEntropyBase36Check, \
3
+ from credsweeper.filters import ValueMorphemesCheck, ValuePatternCheck, ValueNumberCheck, ValueEntropyBase36Check, \
4
4
  ValueTokenBase36Check
5
5
  from credsweeper.filters.group.group import Group
6
6
 
@@ -11,7 +11,7 @@ class WeirdBase36Token(Group):
11
11
  def __init__(self, config: Config) -> None:
12
12
  super().__init__(config, GroupType.DEFAULT)
13
13
  self.filters = [
14
- ValueCoupleKeywordCheck(),
14
+ ValueMorphemesCheck(threshold=1),
15
15
  ValuePatternCheck(),
16
16
  ValueNumberCheck(),
17
17
  ValueTokenBase36Check(),
@@ -1,6 +1,6 @@
1
1
  from credsweeper.common.constants import GroupType
2
2
  from credsweeper.config.config import Config
3
- from credsweeper.filters import ValueCoupleKeywordCheck, ValueNotPartEncodedCheck, \
3
+ from credsweeper.filters import ValueMorphemesCheck, ValueNotPartEncodedCheck, \
4
4
  ValueBase64DataCheck, ValueEntropyBase64Check, ValuePatternCheck, ValueNumberCheck, ValueTokenBase64Check, \
5
5
  ValueBase64PartCheck
6
6
  from credsweeper.filters.group.group import Group
@@ -12,7 +12,7 @@ class WeirdBase64Token(Group):
12
12
  def __init__(self, config: Config) -> None:
13
13
  super().__init__(config, GroupType.DEFAULT)
14
14
  self.filters = [
15
- ValueCoupleKeywordCheck(),
15
+ ValueMorphemesCheck(threshold=1),
16
16
  ValueNumberCheck(),
17
17
  ValueBase64DataCheck(),
18
18
  ValueTokenBase64Check(),
@@ -35,6 +35,8 @@ class ValueFilePathCheck(Filter):
35
35
 
36
36
  """
37
37
  value = line_data.value
38
+ bit_length = len(value).bit_length()
39
+ morpheme_threshold = 1 if 6 > bit_length else bit_length - 4
38
40
  contains_unix_separator = '/' in value
39
41
  if contains_unix_separator:
40
42
  if ("://" in value #
@@ -45,14 +47,14 @@ class ValueFilePathCheck(Filter):
45
47
  or value.startswith("//") and ':' == line_data.separator):
46
48
  # common case for url definition or aliases
47
49
  # or _keyword_://example.com where : is the separator
48
- return static_keyword_checklist.check_morphemes(value.lower(), 1)
50
+ return static_keyword_checklist.check_morphemes(value.lower(), morpheme_threshold)
49
51
  # base64 encoded data might look like linux path
50
52
  min_entropy = ValueEntropyBase64Check.get_min_data_entropy(len(value))
51
53
  # get minimal entropy to compare with shannon entropy of found value
52
54
  # min_entropy == 0 means that the value cannot be checked with the entropy due high variance
53
55
  for i in value:
54
56
  if i not in self.base64stdpad_possible_set:
55
- # value contains wrong BASE64STDPAD_CHARS symbols like -_
57
+ # value contains wrong BASE64STDPAD_CHARS symbols like -_.
56
58
  break
57
59
  else:
58
60
  # all symbols are from base64 alphabet
@@ -74,5 +76,5 @@ class ValueFilePathCheck(Filter):
74
76
  break
75
77
  else:
76
78
  if contains_unix_separator ^ contains_windows_separator:
77
- return static_keyword_checklist.check_morphemes(value.lower(), 1)
79
+ return static_keyword_checklist.check_morphemes(value.lower(), morpheme_threshold)
78
80
  return False