credsweeper 1.11.3__tar.gz → 1.11.5__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of credsweeper might be problematic. Click here for more details.

Files changed (162) hide show
  1. {credsweeper-1.11.3 → credsweeper-1.11.5}/PKG-INFO +3 -6
  2. {credsweeper-1.11.3 → credsweeper-1.11.5}/README.md +1 -5
  3. {credsweeper-1.11.3 → credsweeper-1.11.5}/credsweeper/__init__.py +1 -1
  4. {credsweeper-1.11.3 → credsweeper-1.11.5}/credsweeper/__main__.py +1 -1
  5. {credsweeper-1.11.3 → credsweeper-1.11.5}/credsweeper/app.py +21 -44
  6. {credsweeper-1.11.3 → credsweeper-1.11.5}/credsweeper/common/constants.py +2 -5
  7. {credsweeper-1.11.3 → credsweeper-1.11.5}/credsweeper/credentials/candidate_key.py +1 -1
  8. {credsweeper-1.11.3 → credsweeper-1.11.5}/credsweeper/credentials/credential_manager.py +4 -3
  9. {credsweeper-1.11.3 → credsweeper-1.11.5}/credsweeper/credentials/line_data.py +2 -5
  10. credsweeper-1.11.5/credsweeper/deep_scanner/abstract_scanner.py +306 -0
  11. credsweeper-1.11.5/credsweeper/deep_scanner/deb_scanner.py +55 -0
  12. credsweeper-1.11.5/credsweeper/deep_scanner/deep_scanner.py +173 -0
  13. {credsweeper-1.11.3 → credsweeper-1.11.5}/credsweeper/deep_scanner/gzip_scanner.py +1 -1
  14. credsweeper-1.11.5/credsweeper/deep_scanner/jclass_scanner.py +74 -0
  15. credsweeper-1.11.5/credsweeper/deep_scanner/patch_scanner.py +48 -0
  16. credsweeper-1.11.5/credsweeper/deep_scanner/pkcs_scanner.py +41 -0
  17. credsweeper-1.11.5/credsweeper/deep_scanner/rpm_scanner.py +49 -0
  18. credsweeper-1.11.5/credsweeper/deep_scanner/sqlite3_scanner.py +79 -0
  19. {credsweeper-1.11.3 → credsweeper-1.11.5}/credsweeper/file_handler/byte_content_provider.py +2 -2
  20. {credsweeper-1.11.3 → credsweeper-1.11.5}/credsweeper/file_handler/content_provider.py +1 -1
  21. {credsweeper-1.11.3 → credsweeper-1.11.5}/credsweeper/file_handler/data_content_provider.py +3 -4
  22. {credsweeper-1.11.3 → credsweeper-1.11.5}/credsweeper/file_handler/diff_content_provider.py +2 -2
  23. {credsweeper-1.11.3 → credsweeper-1.11.5}/credsweeper/file_handler/file_path_extractor.py +1 -1
  24. {credsweeper-1.11.3 → credsweeper-1.11.5}/credsweeper/file_handler/files_provider.py +2 -4
  25. {credsweeper-1.11.3 → credsweeper-1.11.5}/credsweeper/file_handler/patches_provider.py +5 -2
  26. {credsweeper-1.11.3 → credsweeper-1.11.5}/credsweeper/file_handler/string_content_provider.py +2 -2
  27. {credsweeper-1.11.3 → credsweeper-1.11.5}/credsweeper/file_handler/struct_content_provider.py +1 -1
  28. {credsweeper-1.11.3 → credsweeper-1.11.5}/credsweeper/file_handler/text_content_provider.py +2 -2
  29. {credsweeper-1.11.3 → credsweeper-1.11.5}/credsweeper/filters/__init__.py +1 -0
  30. {credsweeper-1.11.3 → credsweeper-1.11.5}/credsweeper/filters/value_base64_encoded_pem_check.py +1 -1
  31. {credsweeper-1.11.3 → credsweeper-1.11.5}/credsweeper/filters/value_base64_key_check.py +9 -14
  32. {credsweeper-1.11.3 → credsweeper-1.11.5}/credsweeper/filters/value_entropy_base64_check.py +2 -6
  33. credsweeper-1.11.5/credsweeper/filters/value_json_web_key_check.py +37 -0
  34. {credsweeper-1.11.3 → credsweeper-1.11.5}/credsweeper/filters/value_pattern_check.py +64 -16
  35. {credsweeper-1.11.3 → credsweeper-1.11.5}/credsweeper/ml_model/features/file_extension.py +1 -1
  36. {credsweeper-1.11.3 → credsweeper-1.11.5}/credsweeper/ml_model/ml_validator.py +43 -21
  37. {credsweeper-1.11.3 → credsweeper-1.11.5}/credsweeper/rules/config.yaml +51 -9
  38. {credsweeper-1.11.3 → credsweeper-1.11.5}/credsweeper/rules/rule.py +3 -3
  39. {credsweeper-1.11.3 → credsweeper-1.11.5}/credsweeper/scanner/scan_type/multi_pattern.py +1 -2
  40. {credsweeper-1.11.3 → credsweeper-1.11.5}/credsweeper/secret/config.json +6 -6
  41. {credsweeper-1.11.3 → credsweeper-1.11.5}/credsweeper/utils/hop_stat.py +3 -3
  42. {credsweeper-1.11.3 → credsweeper-1.11.5}/credsweeper/utils/pem_key_detector.py +6 -4
  43. {credsweeper-1.11.3 → credsweeper-1.11.5}/credsweeper/utils/util.py +154 -79
  44. {credsweeper-1.11.3 → credsweeper-1.11.5}/pyproject.toml +1 -0
  45. credsweeper-1.11.3/credsweeper/deep_scanner/abstract_scanner.py +0 -51
  46. credsweeper-1.11.3/credsweeper/deep_scanner/deep_scanner.py +0 -375
  47. credsweeper-1.11.3/credsweeper/deep_scanner/pkcs12_scanner.py +0 -45
  48. {credsweeper-1.11.3 → credsweeper-1.11.5}/.gitignore +0 -0
  49. {credsweeper-1.11.3 → credsweeper-1.11.5}/LICENSE +0 -0
  50. {credsweeper-1.11.3 → credsweeper-1.11.5}/credsweeper/common/__init__.py +0 -0
  51. {credsweeper-1.11.3 → credsweeper-1.11.5}/credsweeper/common/keyword_checklist.py +0 -0
  52. {credsweeper-1.11.3 → credsweeper-1.11.5}/credsweeper/common/keyword_checklist.txt +0 -0
  53. {credsweeper-1.11.3 → credsweeper-1.11.5}/credsweeper/common/keyword_pattern.py +0 -0
  54. {credsweeper-1.11.3 → credsweeper-1.11.5}/credsweeper/common/morpheme_checklist.txt +0 -0
  55. {credsweeper-1.11.3 → credsweeper-1.11.5}/credsweeper/config/__init__.py +0 -0
  56. {credsweeper-1.11.3 → credsweeper-1.11.5}/credsweeper/config/config.py +0 -0
  57. {credsweeper-1.11.3 → credsweeper-1.11.5}/credsweeper/credentials/__init__.py +0 -0
  58. {credsweeper-1.11.3 → credsweeper-1.11.5}/credsweeper/credentials/augment_candidates.py +0 -0
  59. {credsweeper-1.11.3 → credsweeper-1.11.5}/credsweeper/credentials/candidate.py +0 -0
  60. {credsweeper-1.11.3 → credsweeper-1.11.5}/credsweeper/credentials/candidate_group_generator.py +0 -0
  61. {credsweeper-1.11.3 → credsweeper-1.11.5}/credsweeper/deep_scanner/__init__.py +0 -0
  62. {credsweeper-1.11.3 → credsweeper-1.11.5}/credsweeper/deep_scanner/byte_scanner.py +0 -0
  63. {credsweeper-1.11.3 → credsweeper-1.11.5}/credsweeper/deep_scanner/bzip2_scanner.py +0 -0
  64. {credsweeper-1.11.3 → credsweeper-1.11.5}/credsweeper/deep_scanner/docx_scanner.py +0 -0
  65. {credsweeper-1.11.3 → credsweeper-1.11.5}/credsweeper/deep_scanner/eml_scanner.py +0 -0
  66. {credsweeper-1.11.3 → credsweeper-1.11.5}/credsweeper/deep_scanner/encoder_scanner.py +0 -0
  67. {credsweeper-1.11.3 → credsweeper-1.11.5}/credsweeper/deep_scanner/html_scanner.py +0 -0
  68. {credsweeper-1.11.3 → credsweeper-1.11.5}/credsweeper/deep_scanner/jks_scanner.py +0 -0
  69. {credsweeper-1.11.3 → credsweeper-1.11.5}/credsweeper/deep_scanner/lang_scanner.py +0 -0
  70. {credsweeper-1.11.3 → credsweeper-1.11.5}/credsweeper/deep_scanner/lzma_scanner.py +0 -0
  71. {credsweeper-1.11.3 → credsweeper-1.11.5}/credsweeper/deep_scanner/mxfile_scanner.py +0 -0
  72. {credsweeper-1.11.3 → credsweeper-1.11.5}/credsweeper/deep_scanner/pdf_scanner.py +0 -0
  73. {credsweeper-1.11.3 → credsweeper-1.11.5}/credsweeper/deep_scanner/pptx_scanner.py +0 -0
  74. {credsweeper-1.11.3 → credsweeper-1.11.5}/credsweeper/deep_scanner/tar_scanner.py +0 -0
  75. {credsweeper-1.11.3 → credsweeper-1.11.5}/credsweeper/deep_scanner/tmx_scanner.py +0 -0
  76. {credsweeper-1.11.3 → credsweeper-1.11.5}/credsweeper/deep_scanner/xlsx_scanner.py +0 -0
  77. {credsweeper-1.11.3 → credsweeper-1.11.5}/credsweeper/deep_scanner/xml_scanner.py +0 -0
  78. {credsweeper-1.11.3 → credsweeper-1.11.5}/credsweeper/deep_scanner/zip_scanner.py +0 -0
  79. {credsweeper-1.11.3 → credsweeper-1.11.5}/credsweeper/file_handler/__init__.py +0 -0
  80. {credsweeper-1.11.3 → credsweeper-1.11.5}/credsweeper/file_handler/abstract_provider.py +0 -0
  81. {credsweeper-1.11.3 → credsweeper-1.11.5}/credsweeper/file_handler/analysis_target.py +0 -0
  82. {credsweeper-1.11.3 → credsweeper-1.11.5}/credsweeper/file_handler/descriptor.py +0 -0
  83. {credsweeper-1.11.3 → credsweeper-1.11.5}/credsweeper/filters/filter.py +0 -0
  84. {credsweeper-1.11.3 → credsweeper-1.11.5}/credsweeper/filters/group/__init__.py +0 -0
  85. {credsweeper-1.11.3 → credsweeper-1.11.5}/credsweeper/filters/group/general_keyword.py +0 -0
  86. {credsweeper-1.11.3 → credsweeper-1.11.5}/credsweeper/filters/group/general_pattern.py +0 -0
  87. {credsweeper-1.11.3 → credsweeper-1.11.5}/credsweeper/filters/group/group.py +0 -0
  88. {credsweeper-1.11.3 → credsweeper-1.11.5}/credsweeper/filters/group/password_keyword.py +0 -0
  89. {credsweeper-1.11.3 → credsweeper-1.11.5}/credsweeper/filters/group/token_pattern.py +0 -0
  90. {credsweeper-1.11.3 → credsweeper-1.11.5}/credsweeper/filters/group/url_credentials_group.py +0 -0
  91. {credsweeper-1.11.3 → credsweeper-1.11.5}/credsweeper/filters/group/weird_base36_token.py +0 -0
  92. {credsweeper-1.11.3 → credsweeper-1.11.5}/credsweeper/filters/group/weird_base64_token.py +0 -0
  93. {credsweeper-1.11.3 → credsweeper-1.11.5}/credsweeper/filters/line_git_binary_check.py +0 -0
  94. {credsweeper-1.11.3 → credsweeper-1.11.5}/credsweeper/filters/line_specific_key_check.py +0 -0
  95. {credsweeper-1.11.3 → credsweeper-1.11.5}/credsweeper/filters/line_uue_part_check.py +0 -0
  96. {credsweeper-1.11.3 → credsweeper-1.11.5}/credsweeper/filters/value_allowlist_check.py +0 -0
  97. {credsweeper-1.11.3 → credsweeper-1.11.5}/credsweeper/filters/value_array_dictionary_check.py +0 -0
  98. {credsweeper-1.11.3 → credsweeper-1.11.5}/credsweeper/filters/value_atlassian_token_check.py +0 -0
  99. {credsweeper-1.11.3 → credsweeper-1.11.5}/credsweeper/filters/value_azure_token_check.py +0 -0
  100. {credsweeper-1.11.3 → credsweeper-1.11.5}/credsweeper/filters/value_base32_data_check.py +0 -0
  101. {credsweeper-1.11.3 → credsweeper-1.11.5}/credsweeper/filters/value_base64_data_check.py +0 -0
  102. {credsweeper-1.11.3 → credsweeper-1.11.5}/credsweeper/filters/value_base64_part_check.py +0 -0
  103. {credsweeper-1.11.3 → credsweeper-1.11.5}/credsweeper/filters/value_blocklist_check.py +0 -0
  104. {credsweeper-1.11.3 → credsweeper-1.11.5}/credsweeper/filters/value_camel_case_check.py +0 -0
  105. {credsweeper-1.11.3 → credsweeper-1.11.5}/credsweeper/filters/value_couple_keyword_check.py +0 -0
  106. {credsweeper-1.11.3 → credsweeper-1.11.5}/credsweeper/filters/value_dictionary_keyword_check.py +0 -0
  107. {credsweeper-1.11.3 → credsweeper-1.11.5}/credsweeper/filters/value_dictionary_value_length_check.py +0 -0
  108. {credsweeper-1.11.3 → credsweeper-1.11.5}/credsweeper/filters/value_discord_bot_check.py +0 -0
  109. {credsweeper-1.11.3 → credsweeper-1.11.5}/credsweeper/filters/value_entropy_base32_check.py +0 -0
  110. {credsweeper-1.11.3 → credsweeper-1.11.5}/credsweeper/filters/value_entropy_base36_check.py +0 -0
  111. {credsweeper-1.11.3 → credsweeper-1.11.5}/credsweeper/filters/value_entropy_base_check.py +0 -0
  112. {credsweeper-1.11.3 → credsweeper-1.11.5}/credsweeper/filters/value_file_path_check.py +0 -0
  113. {credsweeper-1.11.3 → credsweeper-1.11.5}/credsweeper/filters/value_github_check.py +0 -0
  114. {credsweeper-1.11.3 → credsweeper-1.11.5}/credsweeper/filters/value_grafana_check.py +0 -0
  115. {credsweeper-1.11.3 → credsweeper-1.11.5}/credsweeper/filters/value_grafana_service_check.py +0 -0
  116. {credsweeper-1.11.3 → credsweeper-1.11.5}/credsweeper/filters/value_hex_number_check.py +0 -0
  117. {credsweeper-1.11.3 → credsweeper-1.11.5}/credsweeper/filters/value_jfrog_token_check.py +0 -0
  118. {credsweeper-1.11.3 → credsweeper-1.11.5}/credsweeper/filters/value_json_web_token_check.py +0 -0
  119. {credsweeper-1.11.3 → credsweeper-1.11.5}/credsweeper/filters/value_last_word_check.py +0 -0
  120. {credsweeper-1.11.3 → credsweeper-1.11.5}/credsweeper/filters/value_method_check.py +0 -0
  121. {credsweeper-1.11.3 → credsweeper-1.11.5}/credsweeper/filters/value_not_allowed_pattern_check.py +0 -0
  122. {credsweeper-1.11.3 → credsweeper-1.11.5}/credsweeper/filters/value_not_part_encoded_check.py +0 -0
  123. {credsweeper-1.11.3 → credsweeper-1.11.5}/credsweeper/filters/value_number_check.py +0 -0
  124. {credsweeper-1.11.3 → credsweeper-1.11.5}/credsweeper/filters/value_similarity_check.py +0 -0
  125. {credsweeper-1.11.3 → credsweeper-1.11.5}/credsweeper/filters/value_split_keyword_check.py +0 -0
  126. {credsweeper-1.11.3 → credsweeper-1.11.5}/credsweeper/filters/value_string_type_check.py +0 -0
  127. {credsweeper-1.11.3 → credsweeper-1.11.5}/credsweeper/filters/value_token_base32_check.py +0 -0
  128. {credsweeper-1.11.3 → credsweeper-1.11.5}/credsweeper/filters/value_token_base36_check.py +0 -0
  129. {credsweeper-1.11.3 → credsweeper-1.11.5}/credsweeper/filters/value_token_base64_check.py +0 -0
  130. {credsweeper-1.11.3 → credsweeper-1.11.5}/credsweeper/filters/value_token_base_check.py +0 -0
  131. {credsweeper-1.11.3 → credsweeper-1.11.5}/credsweeper/filters/value_token_check.py +0 -0
  132. {credsweeper-1.11.3 → credsweeper-1.11.5}/credsweeper/logger/__init__.py +0 -0
  133. {credsweeper-1.11.3 → credsweeper-1.11.5}/credsweeper/logger/logger.py +0 -0
  134. {credsweeper-1.11.3 → credsweeper-1.11.5}/credsweeper/ml_model/__init__.py +0 -0
  135. {credsweeper-1.11.3 → credsweeper-1.11.5}/credsweeper/ml_model/features/__init__.py +0 -0
  136. {credsweeper-1.11.3 → credsweeper-1.11.5}/credsweeper/ml_model/features/entropy_evaluation.py +0 -0
  137. {credsweeper-1.11.3 → credsweeper-1.11.5}/credsweeper/ml_model/features/feature.py +0 -0
  138. {credsweeper-1.11.3 → credsweeper-1.11.5}/credsweeper/ml_model/features/has_html_tag.py +0 -0
  139. {credsweeper-1.11.3 → credsweeper-1.11.5}/credsweeper/ml_model/features/is_secret_numeric.py +0 -0
  140. {credsweeper-1.11.3 → credsweeper-1.11.5}/credsweeper/ml_model/features/length_of_attribute.py +0 -0
  141. {credsweeper-1.11.3 → credsweeper-1.11.5}/credsweeper/ml_model/features/morpheme_dense.py +0 -0
  142. {credsweeper-1.11.3 → credsweeper-1.11.5}/credsweeper/ml_model/features/rule_name.py +0 -0
  143. {credsweeper-1.11.3 → credsweeper-1.11.5}/credsweeper/ml_model/features/search_in_attribute.py +0 -0
  144. {credsweeper-1.11.3 → credsweeper-1.11.5}/credsweeper/ml_model/features/word_in.py +0 -0
  145. {credsweeper-1.11.3 → credsweeper-1.11.5}/credsweeper/ml_model/features/word_in_path.py +0 -0
  146. {credsweeper-1.11.3 → credsweeper-1.11.5}/credsweeper/ml_model/features/word_in_postamble.py +0 -0
  147. {credsweeper-1.11.3 → credsweeper-1.11.5}/credsweeper/ml_model/features/word_in_preamble.py +0 -0
  148. {credsweeper-1.11.3 → credsweeper-1.11.5}/credsweeper/ml_model/features/word_in_transition.py +0 -0
  149. {credsweeper-1.11.3 → credsweeper-1.11.5}/credsweeper/ml_model/features/word_in_value.py +0 -0
  150. {credsweeper-1.11.3 → credsweeper-1.11.5}/credsweeper/ml_model/features/word_in_variable.py +0 -0
  151. {credsweeper-1.11.3 → credsweeper-1.11.5}/credsweeper/ml_model/ml_config.json +0 -0
  152. {credsweeper-1.11.3 → credsweeper-1.11.5}/credsweeper/ml_model/ml_model.onnx +0 -0
  153. {credsweeper-1.11.3 → credsweeper-1.11.5}/credsweeper/py.typed +0 -0
  154. {credsweeper-1.11.3 → credsweeper-1.11.5}/credsweeper/rules/__init__.py +0 -0
  155. {credsweeper-1.11.3 → credsweeper-1.11.5}/credsweeper/scanner/__init__.py +0 -0
  156. {credsweeper-1.11.3 → credsweeper-1.11.5}/credsweeper/scanner/scan_type/__init__.py +0 -0
  157. {credsweeper-1.11.3 → credsweeper-1.11.5}/credsweeper/scanner/scan_type/pem_key_pattern.py +0 -0
  158. {credsweeper-1.11.3 → credsweeper-1.11.5}/credsweeper/scanner/scan_type/scan_type.py +0 -0
  159. {credsweeper-1.11.3 → credsweeper-1.11.5}/credsweeper/scanner/scan_type/single_pattern.py +0 -0
  160. {credsweeper-1.11.3 → credsweeper-1.11.5}/credsweeper/scanner/scanner.py +0 -0
  161. {credsweeper-1.11.3 → credsweeper-1.11.5}/credsweeper/secret/log.yaml +0 -0
  162. {credsweeper-1.11.3 → credsweeper-1.11.5}/credsweeper/utils/__init__.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: credsweeper
3
- Version: 1.11.3
3
+ Version: 1.11.5
4
4
  Summary: Credential Sweeper
5
5
  Project-URL: Homepage, https://github.com/Samsung/CredSweeper
6
6
  Project-URL: Bug Tracker, https://github.com/Samsung/CredSweeper/issues
@@ -37,6 +37,7 @@ Requires-Dist: python-dateutil
37
37
  Requires-Dist: python-docx
38
38
  Requires-Dist: python-pptx
39
39
  Requires-Dist: pyyaml
40
+ Requires-Dist: rpmfile
40
41
  Requires-Dist: whatthepatch
41
42
  Requires-Dist: xlrd
42
43
  Description-Content-Type: text/markdown
@@ -140,11 +141,7 @@ cat output.json
140
141
  "value_start": 12,
141
142
  "value_end": 19,
142
143
  "variable": "password",
143
- "entropy_validation": {
144
- "iterator": "BASE64_CHARS",
145
- "entropy": 2.120589933192232,
146
- "valid": false
147
- }
144
+ "entropy": 2.12059
148
145
  }
149
146
  ]
150
147
  }
@@ -97,11 +97,7 @@ cat output.json
97
97
  "value_start": 12,
98
98
  "value_end": 19,
99
99
  "variable": "password",
100
- "entropy_validation": {
101
- "iterator": "BASE64_CHARS",
102
- "entropy": 2.120589933192232,
103
- "valid": false
104
- }
100
+ "entropy": 2.12059
105
101
  }
106
102
  ]
107
103
  }
@@ -18,4 +18,4 @@ __all__ = [
18
18
  '__version__'
19
19
  ]
20
20
 
21
- __version__ = "1.11.3"
21
+ __version__ = "1.11.5"
@@ -63,7 +63,7 @@ def logger_levels(log_level: str) -> str:
63
63
  Returns True if log_level UPPERCASE is one of keys
64
64
  """
65
65
  val = log_level.upper()
66
- if any(val == i for i in Logger.LEVELS.keys()):
66
+ if val in Logger.LEVELS:
67
67
  return val
68
68
  raise ArgumentTypeError(f"Log level provided: {log_level} -- must be one of: {' | '.join(Logger.LEVELS.keys())}")
69
69
 
@@ -15,11 +15,13 @@ from credsweeper.common.constants import Severity, ThresholdPreset, DiffRowType,
15
15
  from credsweeper.config import Config
16
16
  from credsweeper.credentials import Candidate, CredentialManager, CandidateKey
17
17
  from credsweeper.deep_scanner.deep_scanner import DeepScanner
18
+ from credsweeper.file_handler.content_provider import ContentProvider
18
19
  from credsweeper.file_handler.diff_content_provider import DiffContentProvider
19
20
  from credsweeper.file_handler.file_path_extractor import FilePathExtractor
20
21
  from credsweeper.file_handler.abstract_provider import AbstractProvider
21
22
  from credsweeper.file_handler.text_content_provider import TextContentProvider
22
23
  from credsweeper.scanner import Scanner
24
+ from credsweeper.ml_model.ml_validator import MlValidator
23
25
  from credsweeper.utils import Util
24
26
 
25
27
  logger = logging.getLogger(__name__)
@@ -94,7 +96,7 @@ class CredSweeper:
94
96
  log_level: str - level for pool initializer according logging levels (UPPERCASE)
95
97
 
96
98
  """
97
- self.pool_count: int = int(pool_count) if int(pool_count) > 1 else 1
99
+ self.pool_count: int = max(1, int(pool_count))
98
100
  if not (_severity := Severity.get(severity)):
99
101
  raise RuntimeError(f"Severity level provided: {severity}"
100
102
  f" -- must be one of: {' | '.join([i.value for i in Severity])}")
@@ -123,9 +125,9 @@ class CredSweeper:
123
125
  self.ml_config = ml_config
124
126
  self.ml_model = ml_model
125
127
  self.ml_providers = ml_providers
126
- self.ml_validator = None
127
128
  self.__thrifty = thrifty
128
129
  self.__log_level = log_level
130
+ self.__ml_validator: Optional[MlValidator] = None
129
131
 
130
132
  # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
131
133
 
@@ -182,35 +184,22 @@ class CredSweeper:
182
184
 
183
185
  # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
184
186
 
185
- # the import cannot be done on top due
186
- # TypeError: cannot pickle 'onnxruntime.capi.onnxruntime_pybind11_state.InferenceSession' object
187
- from credsweeper.ml_model import MlValidator
188
-
189
- # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
190
-
191
187
  @property
192
188
  def ml_validator(self) -> MlValidator:
193
189
  """ml_validator getter"""
194
- from credsweeper.ml_model import MlValidator
195
190
  if not self.__ml_validator:
196
- self.__ml_validator: MlValidator = MlValidator(
191
+ self.__ml_validator = MlValidator(
197
192
  threshold=self.ml_threshold, #
198
193
  ml_config=self.ml_config, #
199
194
  ml_model=self.ml_model, #
200
195
  ml_providers=self.ml_providers, #
201
196
  )
202
- assert self.__ml_validator, "self.__ml_validator was not initialized"
197
+ if not self.__ml_validator:
198
+ raise RuntimeError("MlValidator was not initialized!")
203
199
  return self.__ml_validator
204
200
 
205
201
  # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
206
202
 
207
- @ml_validator.setter
208
- def ml_validator(self, _ml_validator: Optional[MlValidator]) -> None:
209
- """ml_validator setter"""
210
- self.__ml_validator = _ml_validator
211
-
212
- # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
213
-
214
203
  @staticmethod
215
204
  def pool_initializer(log_kwargs) -> None:
216
205
  """Ignore SIGINT in child processes."""
@@ -219,20 +208,6 @@ class CredSweeper:
219
208
 
220
209
  # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
221
210
 
222
- @property
223
- def config(self) -> Config:
224
- """config getter"""
225
- return self.__config
226
-
227
- # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
228
-
229
- @config.setter
230
- def config(self, config: Config) -> None:
231
- """config setter"""
232
- self.__config = config
233
-
234
- # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
235
-
236
211
  def run(self, content_provider: AbstractProvider) -> int:
237
212
  """Run an analysis of 'content_provider' object.
238
213
 
@@ -241,9 +216,10 @@ class CredSweeper:
241
216
 
242
217
  """
243
218
  _empty_list: Sequence[Union[DiffContentProvider, TextContentProvider]] = []
244
- file_extractors: Sequence[Union[DiffContentProvider, TextContentProvider]] = \
245
- content_provider.get_scannable_files(self.config) if content_provider else _empty_list
246
- logger.info(f"Start Scanner for {len(file_extractors)} providers")
219
+ file_extractors = content_provider.get_scannable_files(self.config) if content_provider else _empty_list
220
+ if not file_extractors:
221
+ logger.info(f"No scannable targets for {len(content_provider.paths)} paths")
222
+ return 0
247
223
  self.scan(file_extractors)
248
224
  self.post_processing()
249
225
  # PatchesProvider has the attribute. Circular import error appears with using the isinstance
@@ -260,7 +236,7 @@ class CredSweeper:
260
236
  content_providers: file objects to scan
261
237
 
262
238
  """
263
- if 1 < self.pool_count:
239
+ if 1 < self.pool_count and 1 < len(content_providers):
264
240
  self.__multi_jobs_scan(content_providers)
265
241
  else:
266
242
  self.__single_job_scan(content_providers)
@@ -269,6 +245,7 @@ class CredSweeper:
269
245
 
270
246
  def __single_job_scan(self, content_providers: Sequence[Union[DiffContentProvider, TextContentProvider]]) -> None:
271
247
  """Performs scan in main thread"""
248
+ logger.info(f"Scan for {len(content_providers)} providers")
272
249
  all_cred = self.files_scan(content_providers)
273
250
  self.credential_manager.set_credentials(all_cred)
274
251
 
@@ -284,12 +261,14 @@ class CredSweeper:
284
261
  if "SILENCE" == self.__log_level:
285
262
  logging.addLevelName(60, "SILENCE")
286
263
  log_kwargs["level"] = self.__log_level
287
- with multiprocessing.get_context("spawn").Pool(processes=self.pool_count,
288
- initializer=self.pool_initializer,
264
+ pool_count = min(self.pool_count, len(content_providers))
265
+ logger.info(f"Scan in {pool_count} processes for {len(content_providers)} providers")
266
+ with multiprocessing.get_context("spawn").Pool(processes=pool_count,
267
+ initializer=CredSweeper.pool_initializer,
289
268
  initargs=(log_kwargs, )) as pool:
290
269
  try:
291
- for scan_results in pool.imap_unordered(self.files_scan, (content_providers[x::self.pool_count]
292
- for x in range(self.pool_count))):
270
+ for scan_results in pool.imap_unordered(self.files_scan,
271
+ (content_providers[x::pool_count] for x in range(pool_count))):
293
272
  for cred in scan_results:
294
273
  self.credential_manager.add_credential(cred)
295
274
  except KeyboardInterrupt:
@@ -301,9 +280,7 @@ class CredSweeper:
301
280
 
302
281
  # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
303
282
 
304
- def files_scan(
305
- self, #
306
- content_providers: Sequence[Union[DiffContentProvider, TextContentProvider]]) -> List[Candidate]:
283
+ def files_scan(self, content_providers: Sequence[ContentProvider]) -> List[Candidate]:
307
284
  """Auxiliary method for scan one sequence"""
308
285
  all_cred: List[Candidate] = []
309
286
  for provider in content_providers:
@@ -316,7 +293,7 @@ class CredSweeper:
316
293
 
317
294
  # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
318
295
 
319
- def file_scan(self, content_provider: Union[DiffContentProvider, TextContentProvider]) -> List[Candidate]:
296
+ def file_scan(self, content_provider: ContentProvider) -> List[Candidate]:
320
297
  """Run scanning of file from 'file_provider'.
321
298
 
322
299
  Args:
@@ -96,10 +96,6 @@ class Chars(Enum):
96
96
  ASCII_PRINTABLE = string.printable
97
97
 
98
98
 
99
- ENTROPY_LIMIT_BASE64 = 4.5
100
- ENTROPY_LIMIT_BASE3x = 3
101
-
102
-
103
99
  class GroupType(Enum):
104
100
  """Group type - used in Group constructor for load predefined set of filters"""
105
101
  KEYWORD = "keyword"
@@ -148,7 +144,8 @@ OVERLAP_SIZE = 1000
148
144
  CHUNK_STEP_SIZE = CHUNK_SIZE - OVERLAP_SIZE
149
145
  # ML hunk size to limit of variable or value size and get substring near value
150
146
  ML_HUNK = 80
151
- """ values according https://docs.python.org/3/library/codecs.html """
147
+
148
+ # values according https://docs.python.org/3/library/codecs.html
152
149
  UTF_8 = "utf_8"
153
150
  UTF_16 = "utf_16"
154
151
  LATIN_1 = "latin_1"
@@ -24,7 +24,7 @@ class CandidateKey:
24
24
  return self.key == other.key
25
25
 
26
26
  def __ne__(self, other):
27
- return not (self == other)
27
+ return not bool(self == other)
28
28
 
29
29
  def __repr__(self) -> str:
30
30
  return f"{self.key}:{self.__line}"
@@ -110,7 +110,8 @@ class CredentialManager:
110
110
  # Match by file path+line num+value. Value required so two different credentials still be
111
111
  # processed independently
112
112
  candidate_key = CandidateKey(line_data)
113
- if candidate_key not in groups:
114
- groups[candidate_key] = list()
115
- groups[candidate_key].append(credential_candidate)
113
+ if candidate_key in groups:
114
+ groups[candidate_key].append(credential_candidate)
115
+ else:
116
+ groups[candidate_key] = [credential_candidate]
116
117
  return groups
@@ -327,11 +327,8 @@ class LineData:
327
327
  True if file require quotation, False otherwise
328
328
 
329
329
  """
330
- if not self.path:
331
- return False
332
- if Util.get_extension(self.path) in self.config.source_quote_ext:
333
- return True
334
- return False
330
+ file_type = self.file_type or Util.get_extension(self.path)
331
+ return bool(file_type) and file_type in self.config.source_quote_ext
335
332
 
336
333
  @staticmethod
337
334
  def get_hash_or_subtext(
@@ -0,0 +1,306 @@
1
+ import contextlib
2
+ import datetime
3
+ import logging
4
+ from abc import abstractmethod, ABC
5
+ from typing import List, Optional, Tuple, Any, Generator
6
+
7
+ from credsweeper.common.constants import RECURSIVE_SCAN_LIMITATION, MIN_DATA_LEN, DEFAULT_ENCODING, UTF_8, \
8
+ MIN_VALUE_LENGTH
9
+ from credsweeper.config import Config
10
+ from credsweeper.credentials import Candidate
11
+ from credsweeper.credentials.augment_candidates import augment_candidates
12
+ from credsweeper.file_handler.byte_content_provider import ByteContentProvider
13
+ from credsweeper.file_handler.content_provider import ContentProvider
14
+ from credsweeper.file_handler.data_content_provider import DataContentProvider
15
+ from credsweeper.file_handler.descriptor import Descriptor
16
+ from credsweeper.file_handler.diff_content_provider import DiffContentProvider
17
+ from credsweeper.file_handler.file_path_extractor import FilePathExtractor
18
+ from credsweeper.file_handler.string_content_provider import StringContentProvider
19
+ from credsweeper.file_handler.struct_content_provider import StructContentProvider
20
+ from credsweeper.file_handler.text_content_provider import TextContentProvider
21
+ from credsweeper.scanner import Scanner
22
+
23
+ logger = logging.getLogger(__name__)
24
+
25
+
26
+ class AbstractScanner(ABC):
27
+ """Base abstract class for all recursive scanners"""
28
+
29
+ @property
30
+ @abstractmethod
31
+ def config(self) -> Config:
32
+ """Abstract property to be defined in DeepScanner"""
33
+ raise NotImplementedError(__name__)
34
+
35
+ @property
36
+ @abstractmethod
37
+ def scanner(self) -> Scanner:
38
+ """Abstract property to be defined in DeepScanner"""
39
+ raise NotImplementedError(__name__)
40
+
41
+ @abstractmethod
42
+ def data_scan(
43
+ self, #
44
+ data_provider: DataContentProvider, #
45
+ depth: int, #
46
+ recursive_limit_size: int) -> Optional[List[Candidate]]:
47
+ """Abstract method to be defined in DeepScanner"""
48
+ raise NotImplementedError(__name__)
49
+
50
+ @staticmethod
51
+ @abstractmethod
52
+ def get_deep_scanners(data: bytes, descriptor: Descriptor, depth: int) -> Tuple[List[Any], List[Any]]:
53
+ """Returns possibly scan methods for the data depends on content and fallback scanners"""
54
+
55
+ # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
56
+
57
+ def recursive_scan(
58
+ self, #
59
+ data_provider: DataContentProvider, #
60
+ depth: int = 0, #
61
+ recursive_limit_size: int = 0) -> List[Candidate]:
62
+ """Recursive function to scan files which might be containers like ZIP archives
63
+
64
+ Args:
65
+ data_provider: DataContentProvider object may be a container
66
+ depth: maximal level of recursion
67
+ recursive_limit_size: maximal bytes of opened files to prevent recursive zip-bomb attack
68
+ """
69
+ candidates: List[Candidate] = []
70
+ if 0 > depth:
71
+ # break recursion if maximal depth is reached
72
+ logger.debug("Bottom reached %s recursive_limit_size:%d", data_provider.file_path, recursive_limit_size)
73
+ return candidates
74
+ depth -= 1
75
+ if MIN_DATA_LEN > len(data_provider.data):
76
+ # break recursion for minimal data size
77
+ logger.debug("Too small data: size=%d, depth=%d, limit=%d, path=%s, info=%s", len(data_provider.data),
78
+ depth, recursive_limit_size, data_provider.file_path, data_provider.info)
79
+ return candidates
80
+ logger.debug("Start data_scan: size=%d, depth=%d, limit=%d, path=%s, info=%s", len(data_provider.data), depth,
81
+ recursive_limit_size, data_provider.file_path, data_provider.info)
82
+
83
+ if FilePathExtractor.is_find_by_ext_file(self.config, data_provider.file_type):
84
+ # Skip scanning file and makes fake candidate due the extension is suspicious
85
+ dummy_candidate = Candidate.get_dummy_candidate(self.config, data_provider.file_path,
86
+ data_provider.file_type, data_provider.info,
87
+ FilePathExtractor.FIND_BY_EXT_RULE)
88
+ candidates.append(dummy_candidate)
89
+ else:
90
+ new_candidates = self.deep_scan_with_fallback(data_provider, depth, recursive_limit_size)
91
+ augment_candidates(candidates, new_candidates)
92
+
93
+ return candidates
94
+
95
+ # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
96
+
97
+ @staticmethod
98
+ def key_value_combination(structure: dict) -> Generator[Tuple[Any, Any], None, None]:
99
+ """Combine items by `key` and `value` from a dictionary for augmentation
100
+ {..., "key": "api_key", "value": "XXXXXXX", ...} -> ("api_key", "XXXXXXX")
101
+
102
+ """
103
+ for key_id in ("key", "KEY", "Key"):
104
+ if key_id in structure:
105
+ struct_key = structure.get(key_id)
106
+ break
107
+ else:
108
+ struct_key = None
109
+ if isinstance(struct_key, bytes):
110
+ # sqlite table may produce bytes for `key`
111
+ with contextlib.suppress(UnicodeError):
112
+ struct_key = struct_key.decode(UTF_8)
113
+ # only str type is common used for the augmentation
114
+ if struct_key and isinstance(struct_key, str):
115
+ for value_id in ("value", "VALUE", "Value"):
116
+ if value_id in structure:
117
+ struct_value = structure.get(value_id)
118
+ if struct_value and isinstance(struct_value, (str, bytes)):
119
+ yield struct_key, struct_value
120
+ # break in successful case
121
+ break
122
+
123
+ # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
124
+
125
+ @staticmethod
126
+ def structure_processing(structure: Any) -> Generator[Tuple[Any, Any], None, None]:
127
+ """Yields pair `key, value` from given structure if applicable"""
128
+ if isinstance(structure, dict):
129
+ # transform dictionary to list
130
+ for key, value in structure.items():
131
+ if not value:
132
+ # skip empty values
133
+ continue
134
+ if isinstance(value, (list, tuple)):
135
+ if 1 == len(value):
136
+ # simplify some structures like YAML when single item in new line is a value
137
+ yield key, value[0]
138
+ continue
139
+ # all other data will be precessed in next code
140
+ yield key, value
141
+ yield from AbstractScanner.key_value_combination(structure)
142
+ elif isinstance(structure, (list, tuple)):
143
+ # enumerate the items to fit for return structure
144
+ for key, value in enumerate(structure):
145
+ yield key, value
146
+ else:
147
+ logger.error("Not supported type:%s val:%s", str(type(structure)), repr(structure))
148
+
149
+ # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
150
+
151
+ def structure_scan(
152
+ self, #
153
+ struct_provider: StructContentProvider, #
154
+ depth: int, #
155
+ recursive_limit_size: int) -> List[Candidate]:
156
+ """Recursive function to scan structured data
157
+
158
+ Args:
159
+ struct_provider: DataContentProvider object may be a container
160
+ depth: maximal level of recursion
161
+ recursive_limit_size: maximal bytes of opened files to prevent recursive zip-bomb attack
162
+ """
163
+ candidates: List[Candidate] = []
164
+ logger.debug("Start struct_scan: depth=%d, limit=%d, path=%s, info=%s", depth, recursive_limit_size,
165
+ struct_provider.file_path, struct_provider.info)
166
+
167
+ if 0 > depth:
168
+ # break recursion if maximal depth is reached
169
+ logger.debug("bottom reached %s recursive_limit_size:%d", struct_provider.file_path, recursive_limit_size)
170
+ return candidates
171
+
172
+ depth -= 1
173
+
174
+ augmented_lines_for_keyword_rules = []
175
+ for key, value in AbstractScanner.structure_processing(struct_provider.struct):
176
+ # a keyword rule may be applicable for `key` (str only) and `value` (str, bytes)
177
+ keyword_match = bool(isinstance(key, str) and self.scanner.keywords_required_substrings_check(key.lower()))
178
+
179
+ if isinstance(value, (dict, list, tuple)) and value:
180
+ # recursive scan for not empty structured `value`
181
+ val_struct_provider = StructContentProvider(struct=value,
182
+ file_path=struct_provider.file_path,
183
+ file_type=struct_provider.file_type,
184
+ info=f"{struct_provider.info}|STRUCT:{key}")
185
+ new_candidates = self.structure_scan(val_struct_provider, depth, recursive_limit_size)
186
+ candidates.extend(new_candidates)
187
+ elif isinstance(value, bytes):
188
+ # recursive data scan
189
+ if MIN_DATA_LEN <= len(value):
190
+ bytes_struct_provider = DataContentProvider(data=value,
191
+ file_path=struct_provider.file_path,
192
+ file_type=struct_provider.file_type,
193
+ info=f"{struct_provider.info}|BYTES:{key}")
194
+ new_limit = recursive_limit_size - len(value)
195
+ new_candidates = self.recursive_scan(bytes_struct_provider, depth, new_limit)
196
+ candidates.extend(new_candidates)
197
+ if keyword_match and MIN_VALUE_LENGTH <= len(value):
198
+ augmented_lines_for_keyword_rules.append(f"{key} = {repr(value)}")
199
+ elif isinstance(value, str):
200
+ # recursive text scan with transformation into bytes
201
+ stripped_value = value.strip()
202
+ if MIN_DATA_LEN <= len(stripped_value):
203
+ # recursive scan only for data which may be decoded at least
204
+ with contextlib.suppress(UnicodeError):
205
+ data = stripped_value.encode(encoding=DEFAULT_ENCODING, errors='strict')
206
+ str_struct_provider = DataContentProvider(data=data,
207
+ file_path=struct_provider.file_path,
208
+ file_type=struct_provider.file_type,
209
+ info=f"{struct_provider.info}|STRING:{key}")
210
+ new_limit = recursive_limit_size - len(str_struct_provider.data)
211
+ new_candidates = self.recursive_scan(str_struct_provider, depth, new_limit)
212
+ candidates.extend(new_candidates)
213
+ if keyword_match and MIN_VALUE_LENGTH <= len(stripped_value):
214
+ augmented_lines_for_keyword_rules.append(f"{key} = {repr(stripped_value)}")
215
+ elif value is None or isinstance(value, (int, float, datetime.date, datetime.datetime)):
216
+ # skip useless types
217
+ pass
218
+ else:
219
+ logger.warning("Not supported type:%s value(%s)", str(type(value)), str(value))
220
+
221
+ if augmented_lines_for_keyword_rules:
222
+ str_provider = StringContentProvider(augmented_lines_for_keyword_rules,
223
+ file_path=struct_provider.file_path,
224
+ file_type=struct_provider.file_type,
225
+ info=f"{struct_provider.info}|KEYWORD")
226
+ new_candidates = self.scanner.scan(str_provider)
227
+ augment_candidates(candidates, new_candidates)
228
+
229
+ return candidates
230
+
231
+ # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
232
+
233
+ def deep_scan_with_fallback(self, data_provider: DataContentProvider, depth: int,
234
+ recursive_limit_size: int) -> List[Candidate]:
235
+ """Scans with deep scanners and fallback scanners if possible
236
+
237
+ Args:
238
+ data_provider: DataContentProvider with raw data
239
+ depth: maximal level of recursion
240
+ recursive_limit_size: maximal bytes of opened files to prevent recursive zip-bomb attack
241
+
242
+ Returns: list with candidates
243
+
244
+ """
245
+ candidates: List[Candidate] = []
246
+ deep_scanners, fallback_scanners = self.get_deep_scanners(data_provider.data, data_provider.descriptor, depth)
247
+ fallback = True
248
+ for scan_class in deep_scanners:
249
+ new_candidates = scan_class.data_scan(self, data_provider, depth, recursive_limit_size)
250
+ if new_candidates is None:
251
+ # scanner did not recognise the content type
252
+ continue
253
+ augment_candidates(candidates, new_candidates)
254
+ # this scan is successful, so fallback is not necessary
255
+ fallback = False
256
+ if fallback:
257
+ for scan_class in fallback_scanners:
258
+ fallback_candidates = scan_class.data_scan(self, data_provider, depth, recursive_limit_size)
259
+ if fallback_candidates is None:
260
+ continue
261
+ augment_candidates(candidates, fallback_candidates)
262
+ # use only first successful fallback scanner
263
+ break
264
+ return candidates
265
+
266
+ # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
267
+
268
+ def scan(self,
269
+ content_provider: ContentProvider,
270
+ depth: int,
271
+ recursive_limit_size: Optional[int] = None) -> List[Candidate]:
272
+ """Initial scan method to launch recursive scan. Skips ByteScanner to prevent extra scan
273
+
274
+ Args:
275
+ content_provider: ContentProvider that might contain raw data
276
+ depth: maximal level of recursion
277
+ recursive_limit_size: maximal bytes of opened files to prevent recursive zip-bomb attack
278
+ """
279
+ recursive_limit_size = recursive_limit_size if isinstance(recursive_limit_size,
280
+ int) else RECURSIVE_SCAN_LIMITATION
281
+ candidates: List[Candidate] = []
282
+ data: Optional[bytes] = None
283
+ if isinstance(content_provider, (TextContentProvider, ByteContentProvider)):
284
+ # Feature to scan files which might be containers
285
+ data = content_provider.data
286
+ info = f"FILE:{content_provider.file_path}"
287
+ elif isinstance(content_provider, DiffContentProvider) and content_provider.diff:
288
+ candidates = self.scanner.scan(content_provider)
289
+ # Feature to scan binary diffs
290
+ diff = content_provider.diff[0].get("line")
291
+ # the check for legal fix mypy issue
292
+ if isinstance(diff, bytes):
293
+ data = diff
294
+ info = f"DIFF:{content_provider.file_path}"
295
+ else:
296
+ logger.warning(f"Content provider {type(content_provider)} does not support deep scan")
297
+ info = "NA"
298
+
299
+ if data:
300
+ data_provider = DataContentProvider(data=data,
301
+ file_path=content_provider.file_path,
302
+ file_type=content_provider.file_type,
303
+ info=content_provider.info or info)
304
+ new_candidates = self.deep_scan_with_fallback(data_provider, depth, recursive_limit_size - len(data))
305
+ augment_candidates(candidates, new_candidates)
306
+ return candidates
@@ -0,0 +1,55 @@
1
+ import logging
2
+ import struct
3
+ from abc import ABC
4
+ from typing import List, Optional, Generator, Tuple
5
+
6
+ from credsweeper.common.constants import MIN_DATA_LEN, UTF_8
7
+ from credsweeper.credentials import Candidate
8
+ from credsweeper.deep_scanner.abstract_scanner import AbstractScanner
9
+ from credsweeper.file_handler.data_content_provider import DataContentProvider
10
+ from credsweeper.utils.util import Util
11
+
12
+ logger = logging.getLogger(__name__)
13
+
14
+
15
+ class DebScanner(AbstractScanner, ABC):
16
+ """Implements deb (ar) scanning"""
17
+
18
+ __header_size = 60
19
+
20
+ @staticmethod
21
+ def walk_deb(data: bytes) -> Generator[Tuple[int, str, bytes], None, None]:
22
+ """Processes sequence of DEB archive and yields offset, name and data"""
23
+ offset = 8 # b"!<arch>\n"
24
+ data_limit = len(data) - DebScanner.__header_size
25
+ while offset <= data_limit:
26
+ _data = data[offset:offset + DebScanner.__header_size]
27
+ offset += DebScanner.__header_size
28
+ # basic header structure
29
+ _name, _, _size, __ = struct.unpack('16s32s10s2s', _data)
30
+ file_size = int(_size)
31
+ if MIN_DATA_LEN < file_size <= len(data) - offset:
32
+ _data = data[offset:offset + file_size]
33
+ yield offset, _name.decode(encoding=UTF_8).strip().rstrip('/'), _data
34
+ offset += file_size if 0 == 1 & file_size else file_size + 1
35
+
36
+ def data_scan(
37
+ self, #
38
+ data_provider: DataContentProvider, #
39
+ depth: int, #
40
+ recursive_limit_size: int) -> Optional[List[Candidate]]:
41
+ """Extracts data file from .ar (debian) archive and launches data_scan"""
42
+ try:
43
+ candidates: List[Candidate] = []
44
+ for offset, name, data in DebScanner.walk_deb(data_provider.data):
45
+ deb_content_provider = DataContentProvider(data=data,
46
+ file_path=f"{data_provider.file_path}/{name}",
47
+ file_type=Util.get_extension(name),
48
+ info=f"{data_provider.info}|DEB:0x{offset:x}")
49
+ new_limit = recursive_limit_size - len(data)
50
+ deb_candidates = self.recursive_scan(deb_content_provider, depth, new_limit)
51
+ candidates.extend(deb_candidates)
52
+ return candidates
53
+ except Exception as exc:
54
+ logger.error(exc)
55
+ return None