credsweeper 1.12.2__tar.gz → 1.13.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of credsweeper might be problematic. Click here for more details.

Files changed (165) hide show
  1. {credsweeper-1.12.2 → credsweeper-1.13.1}/.gitignore +2 -0
  2. {credsweeper-1.12.2 → credsweeper-1.13.1}/PKG-INFO +7 -7
  3. {credsweeper-1.12.2 → credsweeper-1.13.1}/README.md +1 -1
  4. {credsweeper-1.12.2 → credsweeper-1.13.1}/credsweeper/__init__.py +1 -1
  5. {credsweeper-1.12.2 → credsweeper-1.13.1}/credsweeper/__main__.py +15 -8
  6. {credsweeper-1.12.2 → credsweeper-1.13.1}/credsweeper/app.py +7 -2
  7. {credsweeper-1.12.2 → credsweeper-1.13.1}/credsweeper/common/keyword_pattern.py +6 -3
  8. {credsweeper-1.12.2 → credsweeper-1.13.1}/credsweeper/common/morpheme_checklist.txt +24 -6
  9. {credsweeper-1.12.2 → credsweeper-1.13.1}/credsweeper/config/config.py +1 -0
  10. {credsweeper-1.12.2 → credsweeper-1.13.1}/credsweeper/credentials/line_data.py +21 -6
  11. {credsweeper-1.12.2 → credsweeper-1.13.1}/credsweeper/deep_scanner/deep_scanner.py +12 -6
  12. {credsweeper-1.12.2 → credsweeper-1.13.1}/credsweeper/deep_scanner/jks_scanner.py +11 -2
  13. {credsweeper-1.12.2 → credsweeper-1.13.1}/credsweeper/deep_scanner/pkcs_scanner.py +4 -0
  14. credsweeper-1.13.1/credsweeper/deep_scanner/rtf_scanner.py +41 -0
  15. credsweeper-1.13.1/credsweeper/deep_scanner/strings_scanner.py +52 -0
  16. {credsweeper-1.12.2 → credsweeper-1.13.1}/credsweeper/file_handler/byte_content_provider.py +10 -1
  17. {credsweeper-1.12.2 → credsweeper-1.13.1}/credsweeper/file_handler/file_path_extractor.py +2 -0
  18. {credsweeper-1.12.2 → credsweeper-1.13.1}/credsweeper/file_handler/text_content_provider.py +7 -1
  19. {credsweeper-1.12.2 → credsweeper-1.13.1}/credsweeper/filters/__init__.py +1 -1
  20. {credsweeper-1.12.2 → credsweeper-1.13.1}/credsweeper/filters/group/token_pattern.py +2 -2
  21. {credsweeper-1.12.2 → credsweeper-1.13.1}/credsweeper/filters/group/weird_base36_token.py +2 -2
  22. {credsweeper-1.12.2 → credsweeper-1.13.1}/credsweeper/filters/group/weird_base64_token.py +2 -2
  23. {credsweeper-1.12.2 → credsweeper-1.13.1}/credsweeper/filters/value_file_path_check.py +5 -3
  24. {credsweeper-1.12.2 → credsweeper-1.13.1}/credsweeper/filters/value_github_check.py +3 -2
  25. credsweeper-1.13.1/credsweeper/filters/value_morphemes_check.py +43 -0
  26. {credsweeper-1.12.2 → credsweeper-1.13.1}/credsweeper/filters/value_string_type_check.py +1 -0
  27. {credsweeper-1.12.2 → credsweeper-1.13.1}/credsweeper/ml_model/features/feature.py +1 -18
  28. {credsweeper-1.12.2 → credsweeper-1.13.1}/credsweeper/ml_model/features/file_extension.py +1 -1
  29. {credsweeper-1.12.2 → credsweeper-1.13.1}/credsweeper/ml_model/features/has_html_tag.py +10 -8
  30. {credsweeper-1.12.2 → credsweeper-1.13.1}/credsweeper/ml_model/features/is_secret_numeric.py +4 -3
  31. {credsweeper-1.12.2 → credsweeper-1.13.1}/credsweeper/ml_model/features/rule_name.py +1 -1
  32. credsweeper-1.13.1/credsweeper/ml_model/features/word_in.py +36 -0
  33. {credsweeper-1.12.2 → credsweeper-1.13.1}/credsweeper/ml_model/features/word_in_path.py +2 -3
  34. {credsweeper-1.12.2 → credsweeper-1.13.1}/credsweeper/ml_model/features/word_in_postamble.py +1 -4
  35. {credsweeper-1.12.2 → credsweeper-1.13.1}/credsweeper/ml_model/features/word_in_preamble.py +1 -4
  36. {credsweeper-1.12.2 → credsweeper-1.13.1}/credsweeper/ml_model/features/word_in_transition.py +1 -4
  37. {credsweeper-1.12.2 → credsweeper-1.13.1}/credsweeper/ml_model/features/word_in_value.py +2 -3
  38. {credsweeper-1.12.2 → credsweeper-1.13.1}/credsweeper/ml_model/features/word_in_variable.py +2 -3
  39. {credsweeper-1.12.2 → credsweeper-1.13.1}/credsweeper/ml_model/ml_config.json +15 -8
  40. {credsweeper-1.12.2 → credsweeper-1.13.1}/credsweeper/ml_model/ml_model.onnx +0 -0
  41. {credsweeper-1.12.2 → credsweeper-1.13.1}/credsweeper/ml_model/ml_validator.py +1 -1
  42. {credsweeper-1.12.2 → credsweeper-1.13.1}/credsweeper/rules/config.yaml +129 -128
  43. {credsweeper-1.12.2 → credsweeper-1.13.1}/credsweeper/scanner/scanner.py +12 -7
  44. {credsweeper-1.12.2 → credsweeper-1.13.1}/credsweeper/secret/config.json +18 -5
  45. {credsweeper-1.12.2 → credsweeper-1.13.1}/credsweeper/utils/util.py +19 -16
  46. {credsweeper-1.12.2 → credsweeper-1.13.1}/pyproject.toml +5 -5
  47. credsweeper-1.12.2/credsweeper/filters/value_couple_keyword_check.py +0 -28
  48. credsweeper-1.12.2/credsweeper/ml_model/features/word_in.py +0 -59
  49. {credsweeper-1.12.2 → credsweeper-1.13.1}/LICENSE +0 -0
  50. {credsweeper-1.12.2 → credsweeper-1.13.1}/credsweeper/common/__init__.py +0 -0
  51. {credsweeper-1.12.2 → credsweeper-1.13.1}/credsweeper/common/constants.py +0 -0
  52. {credsweeper-1.12.2 → credsweeper-1.13.1}/credsweeper/common/keyword_checklist.py +0 -0
  53. {credsweeper-1.12.2 → credsweeper-1.13.1}/credsweeper/common/keyword_checklist.txt +0 -0
  54. {credsweeper-1.12.2 → credsweeper-1.13.1}/credsweeper/config/__init__.py +0 -0
  55. {credsweeper-1.12.2 → credsweeper-1.13.1}/credsweeper/credentials/__init__.py +0 -0
  56. {credsweeper-1.12.2 → credsweeper-1.13.1}/credsweeper/credentials/augment_candidates.py +0 -0
  57. {credsweeper-1.12.2 → credsweeper-1.13.1}/credsweeper/credentials/candidate.py +0 -0
  58. {credsweeper-1.12.2 → credsweeper-1.13.1}/credsweeper/credentials/candidate_group_generator.py +0 -0
  59. {credsweeper-1.12.2 → credsweeper-1.13.1}/credsweeper/credentials/candidate_key.py +0 -0
  60. {credsweeper-1.12.2 → credsweeper-1.13.1}/credsweeper/credentials/credential_manager.py +0 -0
  61. {credsweeper-1.12.2 → credsweeper-1.13.1}/credsweeper/deep_scanner/__init__.py +0 -0
  62. {credsweeper-1.12.2 → credsweeper-1.13.1}/credsweeper/deep_scanner/abstract_scanner.py +0 -0
  63. {credsweeper-1.12.2 → credsweeper-1.13.1}/credsweeper/deep_scanner/byte_scanner.py +0 -0
  64. {credsweeper-1.12.2 → credsweeper-1.13.1}/credsweeper/deep_scanner/bzip2_scanner.py +0 -0
  65. {credsweeper-1.12.2 → credsweeper-1.13.1}/credsweeper/deep_scanner/deb_scanner.py +0 -0
  66. {credsweeper-1.12.2 → credsweeper-1.13.1}/credsweeper/deep_scanner/docx_scanner.py +0 -0
  67. {credsweeper-1.12.2 → credsweeper-1.13.1}/credsweeper/deep_scanner/eml_scanner.py +0 -0
  68. {credsweeper-1.12.2 → credsweeper-1.13.1}/credsweeper/deep_scanner/encoder_scanner.py +0 -0
  69. {credsweeper-1.12.2 → credsweeper-1.13.1}/credsweeper/deep_scanner/gzip_scanner.py +0 -0
  70. {credsweeper-1.12.2 → credsweeper-1.13.1}/credsweeper/deep_scanner/html_scanner.py +0 -0
  71. {credsweeper-1.12.2 → credsweeper-1.13.1}/credsweeper/deep_scanner/jclass_scanner.py +0 -0
  72. {credsweeper-1.12.2 → credsweeper-1.13.1}/credsweeper/deep_scanner/lang_scanner.py +0 -0
  73. {credsweeper-1.12.2 → credsweeper-1.13.1}/credsweeper/deep_scanner/lzma_scanner.py +0 -0
  74. {credsweeper-1.12.2 → credsweeper-1.13.1}/credsweeper/deep_scanner/mxfile_scanner.py +0 -0
  75. {credsweeper-1.12.2 → credsweeper-1.13.1}/credsweeper/deep_scanner/patch_scanner.py +0 -0
  76. {credsweeper-1.12.2 → credsweeper-1.13.1}/credsweeper/deep_scanner/pdf_scanner.py +0 -0
  77. {credsweeper-1.12.2 → credsweeper-1.13.1}/credsweeper/deep_scanner/pptx_scanner.py +0 -0
  78. {credsweeper-1.12.2 → credsweeper-1.13.1}/credsweeper/deep_scanner/rpm_scanner.py +0 -0
  79. {credsweeper-1.12.2 → credsweeper-1.13.1}/credsweeper/deep_scanner/sqlite3_scanner.py +0 -0
  80. {credsweeper-1.12.2 → credsweeper-1.13.1}/credsweeper/deep_scanner/tar_scanner.py +0 -0
  81. {credsweeper-1.12.2 → credsweeper-1.13.1}/credsweeper/deep_scanner/tmx_scanner.py +0 -0
  82. {credsweeper-1.12.2 → credsweeper-1.13.1}/credsweeper/deep_scanner/xlsx_scanner.py +0 -0
  83. {credsweeper-1.12.2 → credsweeper-1.13.1}/credsweeper/deep_scanner/xml_scanner.py +0 -0
  84. {credsweeper-1.12.2 → credsweeper-1.13.1}/credsweeper/deep_scanner/zip_scanner.py +0 -0
  85. {credsweeper-1.12.2 → credsweeper-1.13.1}/credsweeper/file_handler/__init__.py +0 -0
  86. {credsweeper-1.12.2 → credsweeper-1.13.1}/credsweeper/file_handler/abstract_provider.py +0 -0
  87. {credsweeper-1.12.2 → credsweeper-1.13.1}/credsweeper/file_handler/analysis_target.py +0 -0
  88. {credsweeper-1.12.2 → credsweeper-1.13.1}/credsweeper/file_handler/content_provider.py +0 -0
  89. {credsweeper-1.12.2 → credsweeper-1.13.1}/credsweeper/file_handler/data_content_provider.py +0 -0
  90. {credsweeper-1.12.2 → credsweeper-1.13.1}/credsweeper/file_handler/descriptor.py +0 -0
  91. {credsweeper-1.12.2 → credsweeper-1.13.1}/credsweeper/file_handler/diff_content_provider.py +0 -0
  92. {credsweeper-1.12.2 → credsweeper-1.13.1}/credsweeper/file_handler/files_provider.py +0 -0
  93. {credsweeper-1.12.2 → credsweeper-1.13.1}/credsweeper/file_handler/patches_provider.py +0 -0
  94. {credsweeper-1.12.2 → credsweeper-1.13.1}/credsweeper/file_handler/string_content_provider.py +0 -0
  95. {credsweeper-1.12.2 → credsweeper-1.13.1}/credsweeper/file_handler/struct_content_provider.py +0 -0
  96. {credsweeper-1.12.2 → credsweeper-1.13.1}/credsweeper/filters/filter.py +0 -0
  97. {credsweeper-1.12.2 → credsweeper-1.13.1}/credsweeper/filters/group/__init__.py +0 -0
  98. {credsweeper-1.12.2 → credsweeper-1.13.1}/credsweeper/filters/group/general_keyword.py +0 -0
  99. {credsweeper-1.12.2 → credsweeper-1.13.1}/credsweeper/filters/group/general_pattern.py +0 -0
  100. {credsweeper-1.12.2 → credsweeper-1.13.1}/credsweeper/filters/group/group.py +0 -0
  101. {credsweeper-1.12.2 → credsweeper-1.13.1}/credsweeper/filters/group/password_keyword.py +0 -0
  102. {credsweeper-1.12.2 → credsweeper-1.13.1}/credsweeper/filters/group/url_credentials_group.py +0 -0
  103. {credsweeper-1.12.2 → credsweeper-1.13.1}/credsweeper/filters/line_git_binary_check.py +0 -0
  104. {credsweeper-1.12.2 → credsweeper-1.13.1}/credsweeper/filters/line_specific_key_check.py +0 -0
  105. {credsweeper-1.12.2 → credsweeper-1.13.1}/credsweeper/filters/line_uue_part_check.py +0 -0
  106. {credsweeper-1.12.2 → credsweeper-1.13.1}/credsweeper/filters/value_allowlist_check.py +0 -0
  107. {credsweeper-1.12.2 → credsweeper-1.13.1}/credsweeper/filters/value_array_dictionary_check.py +0 -0
  108. {credsweeper-1.12.2 → credsweeper-1.13.1}/credsweeper/filters/value_atlassian_token_check.py +0 -0
  109. {credsweeper-1.12.2 → credsweeper-1.13.1}/credsweeper/filters/value_azure_token_check.py +0 -0
  110. {credsweeper-1.12.2 → credsweeper-1.13.1}/credsweeper/filters/value_base32_data_check.py +0 -0
  111. {credsweeper-1.12.2 → credsweeper-1.13.1}/credsweeper/filters/value_base64_data_check.py +0 -0
  112. {credsweeper-1.12.2 → credsweeper-1.13.1}/credsweeper/filters/value_base64_encoded_pem_check.py +0 -0
  113. {credsweeper-1.12.2 → credsweeper-1.13.1}/credsweeper/filters/value_base64_key_check.py +0 -0
  114. {credsweeper-1.12.2 → credsweeper-1.13.1}/credsweeper/filters/value_base64_part_check.py +0 -0
  115. {credsweeper-1.12.2 → credsweeper-1.13.1}/credsweeper/filters/value_basic_auth_check.py +0 -0
  116. {credsweeper-1.12.2 → credsweeper-1.13.1}/credsweeper/filters/value_blocklist_check.py +0 -0
  117. {credsweeper-1.12.2 → credsweeper-1.13.1}/credsweeper/filters/value_camel_case_check.py +0 -0
  118. {credsweeper-1.12.2 → credsweeper-1.13.1}/credsweeper/filters/value_dictionary_keyword_check.py +0 -0
  119. {credsweeper-1.12.2 → credsweeper-1.13.1}/credsweeper/filters/value_discord_bot_check.py +0 -0
  120. {credsweeper-1.12.2 → credsweeper-1.13.1}/credsweeper/filters/value_entropy_base32_check.py +0 -0
  121. {credsweeper-1.12.2 → credsweeper-1.13.1}/credsweeper/filters/value_entropy_base36_check.py +0 -0
  122. {credsweeper-1.12.2 → credsweeper-1.13.1}/credsweeper/filters/value_entropy_base64_check.py +0 -0
  123. {credsweeper-1.12.2 → credsweeper-1.13.1}/credsweeper/filters/value_entropy_base_check.py +0 -0
  124. {credsweeper-1.12.2 → credsweeper-1.13.1}/credsweeper/filters/value_grafana_check.py +0 -0
  125. {credsweeper-1.12.2 → credsweeper-1.13.1}/credsweeper/filters/value_grafana_service_check.py +0 -0
  126. {credsweeper-1.12.2 → credsweeper-1.13.1}/credsweeper/filters/value_hex_number_check.py +0 -0
  127. {credsweeper-1.12.2 → credsweeper-1.13.1}/credsweeper/filters/value_jfrog_token_check.py +0 -0
  128. {credsweeper-1.12.2 → credsweeper-1.13.1}/credsweeper/filters/value_json_web_key_check.py +0 -0
  129. {credsweeper-1.12.2 → credsweeper-1.13.1}/credsweeper/filters/value_json_web_token_check.py +0 -0
  130. {credsweeper-1.12.2 → credsweeper-1.13.1}/credsweeper/filters/value_last_word_check.py +0 -0
  131. {credsweeper-1.12.2 → credsweeper-1.13.1}/credsweeper/filters/value_length_check.py +0 -0
  132. {credsweeper-1.12.2 → credsweeper-1.13.1}/credsweeper/filters/value_method_check.py +0 -0
  133. {credsweeper-1.12.2 → credsweeper-1.13.1}/credsweeper/filters/value_not_allowed_pattern_check.py +0 -0
  134. {credsweeper-1.12.2 → credsweeper-1.13.1}/credsweeper/filters/value_not_part_encoded_check.py +0 -0
  135. {credsweeper-1.12.2 → credsweeper-1.13.1}/credsweeper/filters/value_number_check.py +0 -0
  136. {credsweeper-1.12.2 → credsweeper-1.13.1}/credsweeper/filters/value_pattern_check.py +0 -0
  137. {credsweeper-1.12.2 → credsweeper-1.13.1}/credsweeper/filters/value_similarity_check.py +0 -0
  138. {credsweeper-1.12.2 → credsweeper-1.13.1}/credsweeper/filters/value_split_keyword_check.py +0 -0
  139. {credsweeper-1.12.2 → credsweeper-1.13.1}/credsweeper/filters/value_token_base32_check.py +0 -0
  140. {credsweeper-1.12.2 → credsweeper-1.13.1}/credsweeper/filters/value_token_base36_check.py +0 -0
  141. {credsweeper-1.12.2 → credsweeper-1.13.1}/credsweeper/filters/value_token_base64_check.py +0 -0
  142. {credsweeper-1.12.2 → credsweeper-1.13.1}/credsweeper/filters/value_token_base_check.py +0 -0
  143. {credsweeper-1.12.2 → credsweeper-1.13.1}/credsweeper/filters/value_token_check.py +0 -0
  144. {credsweeper-1.12.2 → credsweeper-1.13.1}/credsweeper/logger/__init__.py +0 -0
  145. {credsweeper-1.12.2 → credsweeper-1.13.1}/credsweeper/logger/logger.py +0 -0
  146. {credsweeper-1.12.2 → credsweeper-1.13.1}/credsweeper/ml_model/__init__.py +0 -0
  147. {credsweeper-1.12.2 → credsweeper-1.13.1}/credsweeper/ml_model/features/__init__.py +0 -0
  148. {credsweeper-1.12.2 → credsweeper-1.13.1}/credsweeper/ml_model/features/entropy_evaluation.py +0 -0
  149. {credsweeper-1.12.2 → credsweeper-1.13.1}/credsweeper/ml_model/features/length_of_attribute.py +0 -0
  150. {credsweeper-1.12.2 → credsweeper-1.13.1}/credsweeper/ml_model/features/morpheme_dense.py +0 -0
  151. {credsweeper-1.12.2 → credsweeper-1.13.1}/credsweeper/ml_model/features/rule_severity.py +0 -0
  152. {credsweeper-1.12.2 → credsweeper-1.13.1}/credsweeper/ml_model/features/search_in_attribute.py +0 -0
  153. {credsweeper-1.12.2 → credsweeper-1.13.1}/credsweeper/py.typed +0 -0
  154. {credsweeper-1.12.2 → credsweeper-1.13.1}/credsweeper/rules/__init__.py +0 -0
  155. {credsweeper-1.12.2 → credsweeper-1.13.1}/credsweeper/rules/rule.py +0 -0
  156. {credsweeper-1.12.2 → credsweeper-1.13.1}/credsweeper/scanner/__init__.py +0 -0
  157. {credsweeper-1.12.2 → credsweeper-1.13.1}/credsweeper/scanner/scan_type/__init__.py +0 -0
  158. {credsweeper-1.12.2 → credsweeper-1.13.1}/credsweeper/scanner/scan_type/multi_pattern.py +0 -0
  159. {credsweeper-1.12.2 → credsweeper-1.13.1}/credsweeper/scanner/scan_type/pem_key_pattern.py +0 -0
  160. {credsweeper-1.12.2 → credsweeper-1.13.1}/credsweeper/scanner/scan_type/scan_type.py +0 -0
  161. {credsweeper-1.12.2 → credsweeper-1.13.1}/credsweeper/scanner/scan_type/single_pattern.py +0 -0
  162. {credsweeper-1.12.2 → credsweeper-1.13.1}/credsweeper/secret/log.yaml +0 -0
  163. {credsweeper-1.12.2 → credsweeper-1.13.1}/credsweeper/utils/__init__.py +0 -0
  164. {credsweeper-1.12.2 → credsweeper-1.13.1}/credsweeper/utils/hop_stat.py +0 -0
  165. {credsweeper-1.12.2 → credsweeper-1.13.1}/credsweeper/utils/pem_key_detector.py +0 -0
@@ -149,3 +149,5 @@ fuzz/corpus/*
149
149
  # experiment result and data
150
150
  /experiment/results/*
151
151
  /experiment/data/*
152
+ *.out
153
+
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: credsweeper
3
- Version: 1.12.2
3
+ Version: 1.13.1
4
4
  Summary: Credential Sweeper
5
5
  Project-URL: Homepage, https://github.com/Samsung/CredSweeper
6
6
  Project-URL: Bug Tracker, https://github.com/Samsung/CredSweeper/issues
@@ -10,13 +10,12 @@ Classifier: License :: OSI Approved :: MIT License
10
10
  Classifier: Operating System :: OS Independent
11
11
  Classifier: Programming Language :: Python :: 3
12
12
  Classifier: Programming Language :: Python :: 3 :: Only
13
- Classifier: Programming Language :: Python :: 3.9
14
13
  Classifier: Programming Language :: Python :: 3.10
15
14
  Classifier: Programming Language :: Python :: 3.11
16
15
  Classifier: Programming Language :: Python :: 3.12
17
16
  Classifier: Topic :: Security
18
17
  Classifier: Topic :: Software Development :: Quality Assurance
19
- Requires-Python: >=3.9
18
+ Requires-Python: >=3.10
20
19
  Requires-Dist: base58
21
20
  Requires-Dist: beautifulsoup4>=4.11.0
22
21
  Requires-Dist: colorama
@@ -24,10 +23,10 @@ Requires-Dist: cryptography
24
23
  Requires-Dist: gitpython
25
24
  Requires-Dist: humanfriendly
26
25
  Requires-Dist: lxml
27
- Requires-Dist: numpy<2.0.0
26
+ Requires-Dist: numpy
28
27
  Requires-Dist: odfpy
29
- Requires-Dist: onnxruntime; platform_system != 'Windows'
30
- Requires-Dist: onnxruntime==1.19.2; platform_system == 'Windows'
28
+ Requires-Dist: onnxruntime; platform_system != 'Windows' or python_version != '3.12'
29
+ Requires-Dist: onnxruntime==1.19.2; platform_system == 'Windows' and python_version == '3.12'
31
30
  Requires-Dist: openpyxl
32
31
  Requires-Dist: pandas
33
32
  Requires-Dist: pdfminer-six
@@ -38,6 +37,7 @@ Requires-Dist: python-docx
38
37
  Requires-Dist: python-pptx
39
38
  Requires-Dist: pyyaml
40
39
  Requires-Dist: rpmfile
40
+ Requires-Dist: striprtf
41
41
  Requires-Dist: whatthepatch
42
42
  Requires-Dist: xlrd
43
43
  Description-Content-Type: text/markdown
@@ -90,7 +90,7 @@ Full documentation can be found here: <https://credsweeper.readthedocs.io/>
90
90
 
91
91
  ### Main Requirements
92
92
 
93
- - Python 3.9, 3.10, 3.11, 3.12
93
+ - Python 3.10, 3.11, 3.12
94
94
 
95
95
  ### Installation
96
96
 
@@ -46,7 +46,7 @@ Full documentation can be found here: <https://credsweeper.readthedocs.io/>
46
46
 
47
47
  ### Main Requirements
48
48
 
49
- - Python 3.9, 3.10, 3.11, 3.12
49
+ - Python 3.10, 3.11, 3.12
50
50
 
51
51
  ### Installation
52
52
 
@@ -24,4 +24,4 @@ __all__ = [
24
24
  "__version__"
25
25
  ]
26
26
 
27
- __version__ = "1.12.2"
27
+ __version__ = "1.13.1"
@@ -1,4 +1,5 @@
1
1
  import binascii
2
+ import contextlib
2
3
  import logging
3
4
  import os
4
5
  import sys
@@ -34,24 +35,24 @@ def positive_int(value: Any) -> int:
34
35
  return int_value
35
36
 
36
37
 
37
- def threshold_or_float(arg: str) -> Union[float, ThresholdPreset]:
38
+ def threshold_or_float_or_zero(arg: str) -> Union[int, float, ThresholdPreset]:
38
39
  """Return ThresholdPreset or a float from the input string
39
40
 
40
41
  Args:
41
42
  arg: string that either a float or one of allowed values in ThresholdPreset
42
43
 
43
44
  Returns:
44
- float if arg convertible to float, ThresholdPreset if one of the allowed values
45
+ int = 0 to disable ML validator, float if arg convertible to float, ThresholdPreset if one of the allowed values
45
46
 
46
47
  Raises:
47
48
  ArgumentTypeError: if arg cannot be interpreted as float or ThresholdPreset
48
49
 
49
50
  """
50
51
  allowed_presents = [e.value for e in ThresholdPreset]
51
- try:
52
+ if '0' == arg:
53
+ return 0
54
+ with contextlib.suppress(ValueError):
52
55
  return float(arg) # try convert to float
53
- except ValueError:
54
- pass
55
56
  if arg in allowed_presents:
56
57
  return ThresholdPreset[arg]
57
58
  raise ArgumentTypeError(f"value must be a float or one of {allowed_presents}")
@@ -158,6 +159,10 @@ def get_arguments() -> Namespace:
158
159
  help="find files by predefined extension",
159
160
  dest="find_by_ext",
160
161
  action="store_true")
162
+ parser.add_argument("--pedantic",
163
+ help="process files without extension",
164
+ action=BooleanOptionalAction,
165
+ default=False)
161
166
  parser.add_argument("--depth",
162
167
  help="additional recursive search in data (experimental)",
163
168
  type=positive_int,
@@ -172,11 +177,11 @@ def get_arguments() -> Namespace:
172
177
  "The lower the threshold - the more credentials will be reported. "
173
178
  f"Allowed values: float between 0 and 1, or any of {[e.value for e in ThresholdPreset]} "
174
179
  "(default: medium)",
175
- type=threshold_or_float,
180
+ type=threshold_or_float_or_zero,
176
181
  default=ThresholdPreset.medium,
177
182
  dest="ml_threshold",
178
183
  required=False,
179
- metavar="FLOAT_OR_STR")
184
+ metavar="THRESHOLD_OR_FLOAT_OR_ZERO")
180
185
  parser.add_argument("--ml_batch_size",
181
186
  "-b",
182
187
  help="batch size for model inference (default: 16)",
@@ -299,6 +304,7 @@ def get_credsweeper(args: Namespace) -> CredSweeper:
299
304
  ml_model=args.ml_model,
300
305
  ml_providers=args.ml_providers,
301
306
  find_by_ext=args.find_by_ext,
307
+ pedantic=args.pedantic,
302
308
  depth=args.depth,
303
309
  doc=args.doc,
304
310
  severity=args.severity,
@@ -335,7 +341,8 @@ def scan(args: Namespace, content_provider: AbstractProvider) -> int:
335
341
  def get_commit_providers(commit: Commit, repo: Repo) -> Sequence[ByteContentProvider]:
336
342
  """Process a commit and for providers"""
337
343
  result = {}
338
- ancestors = commit.parents or [repo.tree()]
344
+ # use the hardcoded sha1 until sha256 objects are not supported by GitPython
345
+ ancestors = commit.parents or [repo.tree("4b825dc642cb6eb9a060e54bf8d69288fbee4904")]
339
346
  for parent in ancestors:
340
347
  for diff in parent.diff(commit):
341
348
  # only result files
@@ -52,11 +52,12 @@ class CredSweeper:
52
52
  use_filters: bool = True,
53
53
  pool_count: int = 1,
54
54
  ml_batch_size: Optional[int] = None,
55
- ml_threshold: Union[float, ThresholdPreset] = ThresholdPreset.medium,
55
+ ml_threshold: Union[int, float, ThresholdPreset] = ThresholdPreset.medium,
56
56
  ml_config: Union[None, str, Path] = None,
57
57
  ml_model: Union[None, str, Path] = None,
58
58
  ml_providers: Optional[str] = None,
59
59
  find_by_ext: bool = False,
60
+ pedantic: bool = False,
60
61
  depth: int = 0,
61
62
  doc: bool = False,
62
63
  severity: Union[Severity, str] = Severity.INFO,
@@ -86,6 +87,7 @@ class CredSweeper:
86
87
  ml_model: str or Path to set custom ml model
87
88
  ml_providers: str - comma separated list with providers
88
89
  find_by_ext: boolean - files will be reported by extension
90
+ pedantic: boolean - scan all files
89
91
  depth: int - how deep container files will be scanned
90
92
  doc: boolean - document-specific scanning
91
93
  severity: Severity - minimum severity level of rule
@@ -103,6 +105,7 @@ class CredSweeper:
103
105
  config_dict = self._get_config_dict(config_path=config_path,
104
106
  use_filters=use_filters,
105
107
  find_by_ext=find_by_ext,
108
+ pedantic=pedantic,
106
109
  depth=depth,
107
110
  doc=doc,
108
111
  severity=_severity,
@@ -145,6 +148,7 @@ class CredSweeper:
145
148
  config_path: Optional[str], #
146
149
  use_filters: bool, #
147
150
  find_by_ext: bool, #
151
+ pedantic: bool, #
148
152
  depth: int, #
149
153
  doc: bool, #
150
154
  severity: Severity, #
@@ -155,6 +159,7 @@ class CredSweeper:
155
159
  config_dict["use_filters"] = use_filters
156
160
  config_dict["find_by_ext"] = find_by_ext
157
161
  config_dict["size_limit"] = size_limit
162
+ config_dict["pedantic"] = pedantic
158
163
  config_dict["depth"] = depth
159
164
  config_dict["doc"] = doc
160
165
  config_dict["severity"] = severity.value
@@ -169,7 +174,7 @@ class CredSweeper:
169
174
  # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
170
175
 
171
176
  def _use_ml_validation(self) -> bool:
172
- if isinstance(self.ml_threshold, (float, int)) and 0 >= self.ml_threshold:
177
+ if isinstance(self.ml_threshold, int) and 0 == self.ml_threshold:
173
178
  logger.info("ML validation is disabled")
174
179
  return False
175
180
  if not self.credential_manager.candidates:
@@ -3,7 +3,10 @@ import re
3
3
 
4
4
  class KeywordPattern:
5
5
  """Pattern set of keyword types"""
6
- directive = r"(?P<directive>(?:(?:[#%]define|%global)(?:\s|\\t)|\bset))?"
6
+ directive = r"(?P<directive>(?:" \
7
+ r"(?:[#%]define|define(?=(\s|\\{1,8}[tnr])*\()|%global)" \
8
+ r"(?:\s?\(|\s|\\{1,8}[tnr]){1,8}|\bset(?=\b|\w*(\s|\\{1,8}[tnr])*\()" \
9
+ r"))?"
7
10
  key_left = r"(?:\\[nrt]|(\\\\*u00|%)[0-9a-f]{2}|\s)*" \
8
11
  r"(?P<variable>(([\"'`]{1,8}[^:=\"'`}<>\\/&?]*|[^:=\"'`}<>\s()\\/&?;,%]*)"
9
12
  # keyword will be inserted here
@@ -13,7 +16,7 @@ class KeywordPattern:
13
16
  r")" # <variable>
14
17
  separator = r"(?(directive)|(\s|\\{1,8}[tnr])*\]?(\s|\\{1,8}[tnr])*)" \
15
18
  r"(?P<separator>:(\s[a-z]{3,9}[?]?\s)?=|:(?!:)|=(>|&gt;|(\\\\*u00|%)26gt;)|!==|!=|===|==|=~|=" \
16
- r"|(?(directive)(\\t|\s|\((?!\))){1,80}|%3d))" \
19
+ r"|(?(directive)(,|\\t|\s|\((?!\))){1,80}|%3d))" \
17
20
  r"(\s|\\{1,8}[tnr])*"
18
21
  # might be curly, square or parenthesis with words before
19
22
  wrap = r"(?P<wrap>(" \
@@ -23,7 +26,7 @@ class KeywordPattern:
23
26
  r"\s*" \
24
27
  r"(\[(?!\])|\((?!\))|\{(?!\}))" \
25
28
  r"(\s|\\{1,8}[tnr])*" \
26
- r"(?(get)('[^']+'|\"[^\"]+\")\s*,\s*|)" \
29
+ r"(?(get)('[^']{1,31}'|\"[^\"]{1,31}\")\s*,\s*|)" \
27
30
  r"([0-9a-z_]{1,32}\s*[:=]\s*)?" \
28
31
  r"){1,8})?"
29
32
  string_prefix = r"(((b|r|br|rb|u|f|rf|fr|l|@)(?=(\\*[\"'`])))?"
@@ -14,11 +14,15 @@
14
14
  /var
15
15
  000
16
16
  111
17
+ 14159265
18
+ 18284590
17
19
  222
18
20
  333
19
21
  444
20
22
  555
23
+ 65358979
21
24
  666
25
+ 71828182
22
26
  777
23
27
  80211
24
28
  888
@@ -195,7 +199,7 @@ aux
195
199
  avail
196
200
  avatar
197
201
  aver
198
- awesome
202
+ awesom
199
203
  axis
200
204
  azure
201
205
  back
@@ -234,6 +238,7 @@ bless
234
238
  blic
235
239
  blish
236
240
  blob
241
+ blood
237
242
  blue
238
243
  board
239
244
  bob
@@ -244,7 +249,7 @@ boost
244
249
  boot
245
250
  boss
246
251
  bot
247
- bound
252
+ boun
248
253
  box
249
254
  branch
250
255
  break
@@ -498,6 +503,7 @@ dust
498
503
  dvb
499
504
  dynamic
500
505
  dynamo
506
+ eadbee
501
507
  easin
502
508
  easy
503
509
  ecdhe
@@ -608,6 +614,7 @@ fleet
608
614
  flick
609
615
  flix
610
616
  float
617
+ flood
611
618
  floor
612
619
  fluent
613
620
  fluid
@@ -616,7 +623,7 @@ focus
616
623
  foo
617
624
  for
618
625
  fossil
619
- found
626
+ foun
620
627
  fpga
621
628
  frame
622
629
  free
@@ -649,6 +656,7 @@ git
649
656
  given
650
657
  global
651
658
  gobble
659
+ good
652
660
  google
653
661
  grab
654
662
  grace
@@ -704,6 +712,7 @@ home
704
712
  hook
705
713
  horizon
706
714
  host
715
+ houn
707
716
  hours
708
717
  html
709
718
  http
@@ -790,6 +799,7 @@ jpg_
790
799
  json
791
800
  jump
792
801
  justif
802
+ kafka
793
803
  kerberos
794
804
  kernel
795
805
  key
@@ -799,6 +809,7 @@ kind
799
809
  kinesis
800
810
  kirk
801
811
  know
812
+ knox
802
813
  kris
803
814
  lab
804
815
  lag
@@ -855,7 +866,7 @@ local
855
866
  lock
856
867
  log
857
868
  long
858
- lookup
869
+ look
859
870
  loop
860
871
  loose
861
872
  lost
@@ -948,6 +959,7 @@ ndow
948
959
  ned
949
960
  need
950
961
  neigh
962
+ neo4j
951
963
  ner
952
964
  net
953
965
  neutr
@@ -992,6 +1004,7 @@ oncat
992
1004
  one
993
1005
  onfig
994
1006
  only
1007
+ ookup
995
1008
  open
996
1009
  opt/
997
1010
  opted
@@ -1009,6 +1022,7 @@ ormat
1009
1022
  orph
1010
1023
  otorola
1011
1024
  ottle
1025
+ ound
1012
1026
  ously
1013
1027
  out
1014
1028
  over
@@ -1068,6 +1082,7 @@ pose
1068
1082
  posit
1069
1083
  possib
1070
1084
  post
1085
+ poun
1071
1086
  power
1072
1087
  pre_
1073
1088
  pred
@@ -1212,7 +1227,7 @@ rotat
1212
1227
  rotocol
1213
1228
  rottl
1214
1229
  rough
1215
- round
1230
+ roun
1216
1231
  roup
1217
1232
  row
1218
1233
  rroga
@@ -1318,9 +1333,10 @@ sock
1318
1333
  soft
1319
1334
  solid
1320
1335
  solve
1336
+ some
1321
1337
  sony
1322
1338
  sort
1323
- sound
1339
+ soun
1324
1340
  source
1325
1341
  space
1326
1342
  spacing
@@ -1430,6 +1446,7 @@ tio
1430
1446
  tish
1431
1447
  title
1432
1448
  titud
1449
+ tizen
1433
1450
  tmp/
1434
1451
  to_
1435
1452
  tod
@@ -1575,6 +1592,7 @@ yield
1575
1592
  you
1576
1593
  zeppelin
1577
1594
  zero
1595
+ zigbee
1578
1596
  zing
1579
1597
  zona
1580
1598
  zorro
@@ -35,6 +35,7 @@ class Config:
35
35
  self.candidate_output: List[str] = config["candidate_output"]
36
36
  self.find_by_ext: bool = config["find_by_ext"]
37
37
  self.size_limit: Optional[int] = parse_size(config["size_limit"]) if config["size_limit"] is not None else None
38
+ self.pedantic: bool = bool(config["pedantic"])
38
39
  self.depth: int = int(config["depth"])
39
40
  self.doc: bool = config["doc"]
40
41
  self.severity: Severity = Severity.get(config.get("severity"))
@@ -163,6 +163,7 @@ class LineData:
163
163
  self.clean_url_parameters()
164
164
  self.clean_bash_parameters()
165
165
  self.clean_toml_parameters()
166
+ self.clean_tag_parameters()
166
167
  if 0 <= self.value_start and 0 <= self.value_end and len(self.value) < len(_value):
167
168
  start = _value.find(self.value)
168
169
  self.value_start += start
@@ -196,15 +197,14 @@ class LineData:
196
197
  If line seem to be a URL - split by & character.
197
198
  Variable should be right most value after & or ? ([-1]). And value should be left most before & ([0])
198
199
  """
199
- if self.check_url_part():
200
+ # skip sanitize in case of URL credential rule - the regex is mature enough
201
+ if self.check_url_part() and not self.variable.endswith("://"):
200
202
  # all checks have passed - line before the value may be a URL
201
203
  self.variable = self.variable.rsplit('&')[-1].rsplit('?')[-1].rsplit(';')[-1]
202
204
  self.value = self.value.split('&', maxsplit=1)[0].split(';', maxsplit=1)[0].split('#', maxsplit=1)[0]
203
- if not self.variable.endswith("://"):
204
- # skip sanitize in case of URL credential rule
205
- self.value = self.url_unicode_split.split(self.value)[0]
206
- if self._3d_escaped_separator:
207
- self.value = self.url_percent_split.split(self.value)[0]
205
+ self.value = self.url_unicode_split.split(self.value)[0]
206
+ if self._3d_escaped_separator:
207
+ self.value = self.url_percent_split.split(self.value)[0]
208
208
 
209
209
  def clean_bash_parameters(self) -> None:
210
210
  """Split variable and value by bash special characters, if line assumed to be CLI command."""
@@ -232,6 +232,21 @@ class LineData:
232
232
  self.value = self.value[:-1]
233
233
  cleaning_required = True
234
234
 
235
+ def clean_tag_parameters(self) -> None:
236
+ """Remove closing tag from value if the opened is somewhere before in line"""
237
+ cleaning_required = self.value and self.value.endswith('>')
238
+ while cleaning_required:
239
+ closing_tag_pos = self.value.rfind("</")
240
+ if 0 <= closing_tag_pos:
241
+ # use `<a` to avoid tag parameters
242
+ opening_tag_prefix = f"<{self.value[closing_tag_pos + 2:-1]}"
243
+ if cleaning_required := (opening_tag_prefix not in self.value
244
+ and 0 <= self.line.find(opening_tag_prefix, 0, self.value_start)):
245
+ self.value = self.value[:closing_tag_pos]
246
+ cleaning_required = self.value and self.value.endswith('>')
247
+ else:
248
+ break
249
+
235
250
  def sanitize_variable(self) -> None:
236
251
  """Remove trailing spaces, dashes and quotations around the variable. Correct position."""
237
252
  sanitized_var_len = 0
@@ -1,7 +1,6 @@
1
1
  import logging
2
2
  from typing import List, Any, Tuple
3
3
 
4
- from credsweeper.common.constants import MIN_DATA_LEN
5
4
  from credsweeper.config.config import Config
6
5
  from credsweeper.scanner.scanner import Scanner
7
6
  from credsweeper.utils.util import Util
@@ -23,7 +22,9 @@ from .pdf_scanner import PdfScanner
23
22
  from .pkcs_scanner import PkcsScanner
24
23
  from .pptx_scanner import PptxScanner
25
24
  from .rpm_scanner import RpmScanner
25
+ from .rtf_scanner import RtfScanner
26
26
  from .sqlite3_scanner import Sqlite3Scanner
27
+ from .strings_scanner import StringsScanner
27
28
  from .tar_scanner import TarScanner
28
29
  from .tmx_scanner import TmxScanner
29
30
  from .xlsx_scanner import XlsxScanner
@@ -49,8 +50,10 @@ class DeepScanner(
49
50
  PdfScanner, #
50
51
  PkcsScanner, #
51
52
  PptxScanner, #
53
+ RtfScanner, #
52
54
  RpmScanner, #
53
55
  Sqlite3Scanner, #
56
+ StringsScanner, #
54
57
  TarScanner, #
55
58
  DebScanner, #
56
59
  XmlScanner, #
@@ -133,6 +136,9 @@ class DeepScanner(
133
136
  deep_scanners.append(Sqlite3Scanner)
134
137
  elif Util.is_asn1(data):
135
138
  deep_scanners.append(PkcsScanner)
139
+ elif Util.is_rtf(data):
140
+ deep_scanners.append(RtfScanner)
141
+ fallback_scanners.append(ByteScanner)
136
142
  elif Util.is_xml(data):
137
143
  if Util.is_html(data):
138
144
  deep_scanners.append(HtmlScanner)
@@ -158,9 +164,6 @@ class DeepScanner(
158
164
  deep_scanners.append(PatchScanner)
159
165
  fallback_scanners.append(EmlScanner)
160
166
  fallback_scanners.append(ByteScanner)
161
- elif Util.is_known(data):
162
- # the format is known but cannot be scanned
163
- pass
164
167
  elif not Util.is_binary(data):
165
168
  if 0 < depth:
166
169
  deep_scanners.append(PatchScanner)
@@ -168,6 +171,9 @@ class DeepScanner(
168
171
  deep_scanners.append(LangScanner)
169
172
  deep_scanners.append(ByteScanner)
170
173
  else:
171
- logger.warning("Cannot apply a deep scanner for type %s prefix %s %d", descriptor,
172
- repr(data[:MIN_DATA_LEN]), len(data))
174
+ if 0 < depth:
175
+ deep_scanners.append(StringsScanner)
176
+ else:
177
+ logger.warning("Cannot apply a deep scanner for type %s prefix %s %d", descriptor, repr(data[:32]),
178
+ len(data))
173
179
  return deep_scanners, fallback_scanners
@@ -4,6 +4,7 @@ from typing import List, Optional
4
4
 
5
5
  import jks
6
6
 
7
+ from credsweeper.common.constants import Severity, Confidence
7
8
  from credsweeper.credentials.candidate import Candidate
8
9
  from credsweeper.deep_scanner.abstract_scanner import AbstractScanner
9
10
  from credsweeper.file_handler.data_content_provider import DataContentProvider
@@ -24,14 +25,22 @@ class JksScanner(AbstractScanner, ABC):
24
25
  try:
25
26
  keystore = jks.KeyStore.loads(data_provider.data, pw_probe, try_decrypt_keys=True)
26
27
  # the password probe has passed, it will be the value
27
- info = (f"{data_provider.info}|JKS:"
28
- f"{'sensitive data' if keystore.private_keys or keystore.secret_keys else 'default password'}")
28
+ if keystore.private_keys or keystore.secret_keys:
29
+ severity = Severity.HIGH
30
+ confidence = Confidence.STRONG
31
+ info = f"{data_provider.info}|JKS:default password"
32
+ else:
33
+ severity = Severity.LOW
34
+ confidence = Confidence.WEAK
35
+ info = f"{data_provider.info}|JKS:sensitive data"
29
36
  candidate = Candidate.get_dummy_candidate(
30
37
  self.config, #
31
38
  data_provider.file_path, #
32
39
  data_provider.file_type, #
33
40
  info, #
34
41
  "Java Key Storage")
42
+ candidate.severity = severity
43
+ candidate.confidence = confidence
35
44
  value = pw_probe or "<EMPTY PASSWORD>"
36
45
  candidate.line_data_list[0].line = f"'{value}' is the password"
37
46
  candidate.line_data_list[0].value = pw_probe or "<EMPTY PASSWORD>"
@@ -3,6 +3,7 @@ import logging
3
3
  from abc import ABC
4
4
  from typing import List, Optional
5
5
 
6
+ from credsweeper.common.constants import Severity, Confidence
6
7
  from credsweeper.credentials.candidate import Candidate
7
8
  from credsweeper.deep_scanner.abstract_scanner import AbstractScanner
8
9
  from credsweeper.file_handler.data_content_provider import DataContentProvider
@@ -35,6 +36,9 @@ class PkcsScanner(AbstractScanner, ABC):
35
36
  "PKCS")
36
37
  candidate.line_data_list[0].line = base64.b64encode(data_provider.data).decode()
37
38
  candidate.line_data_list[0].value = repr(password)
39
+ # high severity is assigned to private key rules
40
+ candidate.severity = Severity.HIGH
41
+ candidate.confidence = Confidence.STRONG
38
42
  return [candidate]
39
43
  except Exception as pkcs_exc:
40
44
  logger.debug(f"{data_provider.file_path}:{pw_probe}:{pkcs_exc}")
@@ -0,0 +1,41 @@
1
+ import logging
2
+ from abc import ABC
3
+ from typing import List, Optional
4
+
5
+ from striprtf import striprtf
6
+
7
+ from credsweeper.credentials.candidate import Candidate
8
+ from credsweeper.deep_scanner.abstract_scanner import AbstractScanner
9
+ from credsweeper.file_handler.data_content_provider import DataContentProvider
10
+ from credsweeper.file_handler.string_content_provider import StringContentProvider
11
+ from credsweeper.utils.util import Util
12
+
13
+ logger = logging.getLogger(__name__)
14
+
15
+
16
+ class RtfScanner(AbstractScanner, ABC):
17
+ """Implements squash file system scanning"""
18
+
19
+ @staticmethod
20
+ def get_lines(text: str) -> List[str]:
21
+ """Extracts text lines from RTF format"""
22
+ rtf_text = striprtf.rtf_to_text(text)
23
+ lines = Util.split_text(rtf_text)
24
+ return lines
25
+
26
+ def data_scan(
27
+ self, #
28
+ data_provider: DataContentProvider, #
29
+ depth: int, #
30
+ recursive_limit_size: int) -> Optional[List[Candidate]]:
31
+ """Scans data as RTF"""
32
+ try:
33
+ string_data_provider = StringContentProvider(lines=RtfScanner.get_lines(data_provider.text),
34
+ file_path=data_provider.file_path,
35
+ file_type=data_provider.file_type,
36
+ info=f"{data_provider.info}|RTF")
37
+ rtf_candidates = self.scanner.scan(string_data_provider)
38
+ return rtf_candidates
39
+ except Exception as rtf_exc:
40
+ logger.error(f"{data_provider.file_path}:{rtf_exc}")
41
+ return None
@@ -0,0 +1,52 @@
1
+ import logging
2
+ from abc import ABC
3
+ from typing import List, Optional, Tuple
4
+
5
+ from credsweeper.common.constants import MIN_DATA_LEN
6
+ from credsweeper.credentials.candidate import Candidate
7
+ from credsweeper.deep_scanner.abstract_scanner import AbstractScanner
8
+ from credsweeper.file_handler.data_content_provider import DataContentProvider
9
+ from credsweeper.file_handler.string_content_provider import StringContentProvider
10
+
11
+ logger = logging.getLogger(__name__)
12
+
13
+
14
+ class StringsScanner(AbstractScanner, ABC):
15
+ """Implements known binary file scanning with ASCII strings representations"""
16
+
17
+ @staticmethod
18
+ def get_strings(data: bytes) -> List[Tuple[str, int]]:
19
+ """Processes binary to found ASCII strings. Use offset instead line number."""
20
+ strings = []
21
+ offset = 0
22
+ line = ''
23
+ for n, x in enumerate(data):
24
+ if 0x09 == x or 0x20 <= x <= 0x7E:
25
+ # TAB, SPACE and visible ASCII symbols
26
+ if not offset:
27
+ # for line number
28
+ offset = n
29
+ line += chr(x)
30
+ elif MIN_DATA_LEN <= len(line):
31
+ strings.append((line, offset))
32
+ offset = 0
33
+ line = ''
34
+ if MIN_DATA_LEN <= len(line):
35
+ strings.append((line, offset))
36
+ return strings
37
+
38
+ def data_scan(
39
+ self, #
40
+ data_provider: DataContentProvider, #
41
+ depth: int, #
42
+ recursive_limit_size: int) -> Optional[List[Candidate]]:
43
+ """Extracts data file from .ar (debian) archive and launches data_scan"""
44
+
45
+ if strings := StringsScanner.get_strings(data_provider.data):
46
+ string_data_provider = StringContentProvider(lines=[x[0] for x in strings],
47
+ line_numbers=[x[1] for x in strings],
48
+ file_path=data_provider.file_path,
49
+ file_type=data_provider.file_type,
50
+ info=f"{data_provider.info}|STRINGS")
51
+ return self.scanner.scan(string_data_provider)
52
+ return None if strings is None else []