credsweeper 1.10.8__tar.gz → 1.11.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of credsweeper might be problematic. Click here for more details.

Files changed (153) hide show
  1. {credsweeper-1.10.8 → credsweeper-1.11.1}/PKG-INFO +1 -1
  2. {credsweeper-1.10.8 → credsweeper-1.11.1}/credsweeper/__init__.py +1 -1
  3. {credsweeper-1.10.8 → credsweeper-1.11.1}/credsweeper/common/morpheme_checklist.txt +2 -0
  4. {credsweeper-1.10.8 → credsweeper-1.11.1}/credsweeper/deep_scanner/deep_scanner.py +5 -0
  5. credsweeper-1.11.1/credsweeper/deep_scanner/tmx_scanner.py +45 -0
  6. {credsweeper-1.10.8 → credsweeper-1.11.1}/credsweeper/ml_model/features/__init__.py +3 -1
  7. credsweeper-1.11.1/credsweeper/ml_model/features/word_in_postamble.py +32 -0
  8. credsweeper-1.11.1/credsweeper/ml_model/features/word_in_preamble.py +37 -0
  9. credsweeper-1.10.8/credsweeper/ml_model/features/word_in_line.py → credsweeper-1.11.1/credsweeper/ml_model/features/word_in_transition.py +10 -7
  10. {credsweeper-1.10.8 → credsweeper-1.11.1}/credsweeper/ml_model/ml_config.json +214 -80
  11. {credsweeper-1.10.8 → credsweeper-1.11.1}/credsweeper/ml_model/ml_model.onnx +0 -0
  12. {credsweeper-1.10.8 → credsweeper-1.11.1}/credsweeper/utils/util.py +16 -4
  13. {credsweeper-1.10.8 → credsweeper-1.11.1}/.gitignore +0 -0
  14. {credsweeper-1.10.8 → credsweeper-1.11.1}/LICENSE +0 -0
  15. {credsweeper-1.10.8 → credsweeper-1.11.1}/README.md +0 -0
  16. {credsweeper-1.10.8 → credsweeper-1.11.1}/credsweeper/__main__.py +0 -0
  17. {credsweeper-1.10.8 → credsweeper-1.11.1}/credsweeper/app.py +0 -0
  18. {credsweeper-1.10.8 → credsweeper-1.11.1}/credsweeper/common/__init__.py +0 -0
  19. {credsweeper-1.10.8 → credsweeper-1.11.1}/credsweeper/common/constants.py +0 -0
  20. {credsweeper-1.10.8 → credsweeper-1.11.1}/credsweeper/common/keyword_checklist.py +0 -0
  21. {credsweeper-1.10.8 → credsweeper-1.11.1}/credsweeper/common/keyword_checklist.txt +0 -0
  22. {credsweeper-1.10.8 → credsweeper-1.11.1}/credsweeper/common/keyword_pattern.py +0 -0
  23. {credsweeper-1.10.8 → credsweeper-1.11.1}/credsweeper/config/__init__.py +0 -0
  24. {credsweeper-1.10.8 → credsweeper-1.11.1}/credsweeper/config/config.py +0 -0
  25. {credsweeper-1.10.8 → credsweeper-1.11.1}/credsweeper/credentials/__init__.py +0 -0
  26. {credsweeper-1.10.8 → credsweeper-1.11.1}/credsweeper/credentials/augment_candidates.py +0 -0
  27. {credsweeper-1.10.8 → credsweeper-1.11.1}/credsweeper/credentials/candidate.py +0 -0
  28. {credsweeper-1.10.8 → credsweeper-1.11.1}/credsweeper/credentials/candidate_group_generator.py +0 -0
  29. {credsweeper-1.10.8 → credsweeper-1.11.1}/credsweeper/credentials/candidate_key.py +0 -0
  30. {credsweeper-1.10.8 → credsweeper-1.11.1}/credsweeper/credentials/credential_manager.py +0 -0
  31. {credsweeper-1.10.8 → credsweeper-1.11.1}/credsweeper/credentials/line_data.py +0 -0
  32. {credsweeper-1.10.8 → credsweeper-1.11.1}/credsweeper/deep_scanner/__init__.py +0 -0
  33. {credsweeper-1.10.8 → credsweeper-1.11.1}/credsweeper/deep_scanner/abstract_scanner.py +0 -0
  34. {credsweeper-1.10.8 → credsweeper-1.11.1}/credsweeper/deep_scanner/byte_scanner.py +0 -0
  35. {credsweeper-1.10.8 → credsweeper-1.11.1}/credsweeper/deep_scanner/bzip2_scanner.py +0 -0
  36. {credsweeper-1.10.8 → credsweeper-1.11.1}/credsweeper/deep_scanner/docx_scanner.py +0 -0
  37. {credsweeper-1.10.8 → credsweeper-1.11.1}/credsweeper/deep_scanner/eml_scanner.py +0 -0
  38. {credsweeper-1.10.8 → credsweeper-1.11.1}/credsweeper/deep_scanner/encoder_scanner.py +0 -0
  39. {credsweeper-1.10.8 → credsweeper-1.11.1}/credsweeper/deep_scanner/gzip_scanner.py +0 -0
  40. {credsweeper-1.10.8 → credsweeper-1.11.1}/credsweeper/deep_scanner/html_scanner.py +0 -0
  41. {credsweeper-1.10.8 → credsweeper-1.11.1}/credsweeper/deep_scanner/jks_scanner.py +0 -0
  42. {credsweeper-1.10.8 → credsweeper-1.11.1}/credsweeper/deep_scanner/lang_scanner.py +0 -0
  43. {credsweeper-1.10.8 → credsweeper-1.11.1}/credsweeper/deep_scanner/mxfile_scanner.py +0 -0
  44. {credsweeper-1.10.8 → credsweeper-1.11.1}/credsweeper/deep_scanner/pdf_scanner.py +0 -0
  45. {credsweeper-1.10.8 → credsweeper-1.11.1}/credsweeper/deep_scanner/pkcs12_scanner.py +0 -0
  46. {credsweeper-1.10.8 → credsweeper-1.11.1}/credsweeper/deep_scanner/pptx_scanner.py +0 -0
  47. {credsweeper-1.10.8 → credsweeper-1.11.1}/credsweeper/deep_scanner/tar_scanner.py +0 -0
  48. {credsweeper-1.10.8 → credsweeper-1.11.1}/credsweeper/deep_scanner/xlsx_scanner.py +0 -0
  49. {credsweeper-1.10.8 → credsweeper-1.11.1}/credsweeper/deep_scanner/xml_scanner.py +0 -0
  50. {credsweeper-1.10.8 → credsweeper-1.11.1}/credsweeper/deep_scanner/zip_scanner.py +0 -0
  51. {credsweeper-1.10.8 → credsweeper-1.11.1}/credsweeper/file_handler/__init__.py +0 -0
  52. {credsweeper-1.10.8 → credsweeper-1.11.1}/credsweeper/file_handler/abstract_provider.py +0 -0
  53. {credsweeper-1.10.8 → credsweeper-1.11.1}/credsweeper/file_handler/analysis_target.py +0 -0
  54. {credsweeper-1.10.8 → credsweeper-1.11.1}/credsweeper/file_handler/byte_content_provider.py +0 -0
  55. {credsweeper-1.10.8 → credsweeper-1.11.1}/credsweeper/file_handler/content_provider.py +0 -0
  56. {credsweeper-1.10.8 → credsweeper-1.11.1}/credsweeper/file_handler/data_content_provider.py +0 -0
  57. {credsweeper-1.10.8 → credsweeper-1.11.1}/credsweeper/file_handler/descriptor.py +0 -0
  58. {credsweeper-1.10.8 → credsweeper-1.11.1}/credsweeper/file_handler/diff_content_provider.py +0 -0
  59. {credsweeper-1.10.8 → credsweeper-1.11.1}/credsweeper/file_handler/file_path_extractor.py +0 -0
  60. {credsweeper-1.10.8 → credsweeper-1.11.1}/credsweeper/file_handler/files_provider.py +0 -0
  61. {credsweeper-1.10.8 → credsweeper-1.11.1}/credsweeper/file_handler/patches_provider.py +0 -0
  62. {credsweeper-1.10.8 → credsweeper-1.11.1}/credsweeper/file_handler/string_content_provider.py +0 -0
  63. {credsweeper-1.10.8 → credsweeper-1.11.1}/credsweeper/file_handler/struct_content_provider.py +0 -0
  64. {credsweeper-1.10.8 → credsweeper-1.11.1}/credsweeper/file_handler/text_content_provider.py +0 -0
  65. {credsweeper-1.10.8 → credsweeper-1.11.1}/credsweeper/filters/__init__.py +0 -0
  66. {credsweeper-1.10.8 → credsweeper-1.11.1}/credsweeper/filters/filter.py +0 -0
  67. {credsweeper-1.10.8 → credsweeper-1.11.1}/credsweeper/filters/group/__init__.py +0 -0
  68. {credsweeper-1.10.8 → credsweeper-1.11.1}/credsweeper/filters/group/general_keyword.py +0 -0
  69. {credsweeper-1.10.8 → credsweeper-1.11.1}/credsweeper/filters/group/general_pattern.py +0 -0
  70. {credsweeper-1.10.8 → credsweeper-1.11.1}/credsweeper/filters/group/group.py +0 -0
  71. {credsweeper-1.10.8 → credsweeper-1.11.1}/credsweeper/filters/group/password_keyword.py +0 -0
  72. {credsweeper-1.10.8 → credsweeper-1.11.1}/credsweeper/filters/group/token_pattern.py +0 -0
  73. {credsweeper-1.10.8 → credsweeper-1.11.1}/credsweeper/filters/group/url_credentials_group.py +0 -0
  74. {credsweeper-1.10.8 → credsweeper-1.11.1}/credsweeper/filters/group/weird_base36_token.py +0 -0
  75. {credsweeper-1.10.8 → credsweeper-1.11.1}/credsweeper/filters/group/weird_base64_token.py +0 -0
  76. {credsweeper-1.10.8 → credsweeper-1.11.1}/credsweeper/filters/line_git_binary_check.py +0 -0
  77. {credsweeper-1.10.8 → credsweeper-1.11.1}/credsweeper/filters/line_specific_key_check.py +0 -0
  78. {credsweeper-1.10.8 → credsweeper-1.11.1}/credsweeper/filters/line_uue_part_check.py +0 -0
  79. {credsweeper-1.10.8 → credsweeper-1.11.1}/credsweeper/filters/value_allowlist_check.py +0 -0
  80. {credsweeper-1.10.8 → credsweeper-1.11.1}/credsweeper/filters/value_array_dictionary_check.py +0 -0
  81. {credsweeper-1.10.8 → credsweeper-1.11.1}/credsweeper/filters/value_atlassian_token_check.py +0 -0
  82. {credsweeper-1.10.8 → credsweeper-1.11.1}/credsweeper/filters/value_azure_token_check.py +0 -0
  83. {credsweeper-1.10.8 → credsweeper-1.11.1}/credsweeper/filters/value_base32_data_check.py +0 -0
  84. {credsweeper-1.10.8 → credsweeper-1.11.1}/credsweeper/filters/value_base64_data_check.py +0 -0
  85. {credsweeper-1.10.8 → credsweeper-1.11.1}/credsweeper/filters/value_base64_encoded_pem_check.py +0 -0
  86. {credsweeper-1.10.8 → credsweeper-1.11.1}/credsweeper/filters/value_base64_key_check.py +0 -0
  87. {credsweeper-1.10.8 → credsweeper-1.11.1}/credsweeper/filters/value_base64_part_check.py +0 -0
  88. {credsweeper-1.10.8 → credsweeper-1.11.1}/credsweeper/filters/value_blocklist_check.py +0 -0
  89. {credsweeper-1.10.8 → credsweeper-1.11.1}/credsweeper/filters/value_camel_case_check.py +0 -0
  90. {credsweeper-1.10.8 → credsweeper-1.11.1}/credsweeper/filters/value_couple_keyword_check.py +0 -0
  91. {credsweeper-1.10.8 → credsweeper-1.11.1}/credsweeper/filters/value_dictionary_keyword_check.py +0 -0
  92. {credsweeper-1.10.8 → credsweeper-1.11.1}/credsweeper/filters/value_dictionary_value_length_check.py +0 -0
  93. {credsweeper-1.10.8 → credsweeper-1.11.1}/credsweeper/filters/value_discord_bot_check.py +0 -0
  94. {credsweeper-1.10.8 → credsweeper-1.11.1}/credsweeper/filters/value_entropy_base32_check.py +0 -0
  95. {credsweeper-1.10.8 → credsweeper-1.11.1}/credsweeper/filters/value_entropy_base36_check.py +0 -0
  96. {credsweeper-1.10.8 → credsweeper-1.11.1}/credsweeper/filters/value_entropy_base64_check.py +0 -0
  97. {credsweeper-1.10.8 → credsweeper-1.11.1}/credsweeper/filters/value_file_path_check.py +0 -0
  98. {credsweeper-1.10.8 → credsweeper-1.11.1}/credsweeper/filters/value_first_word_check.py +0 -0
  99. {credsweeper-1.10.8 → credsweeper-1.11.1}/credsweeper/filters/value_github_check.py +0 -0
  100. {credsweeper-1.10.8 → credsweeper-1.11.1}/credsweeper/filters/value_grafana_check.py +0 -0
  101. {credsweeper-1.10.8 → credsweeper-1.11.1}/credsweeper/filters/value_grafana_service_check.py +0 -0
  102. {credsweeper-1.10.8 → credsweeper-1.11.1}/credsweeper/filters/value_hex_number_check.py +0 -0
  103. {credsweeper-1.10.8 → credsweeper-1.11.1}/credsweeper/filters/value_jfrog_token_check.py +0 -0
  104. {credsweeper-1.10.8 → credsweeper-1.11.1}/credsweeper/filters/value_json_web_token_check.py +0 -0
  105. {credsweeper-1.10.8 → credsweeper-1.11.1}/credsweeper/filters/value_last_word_check.py +0 -0
  106. {credsweeper-1.10.8 → credsweeper-1.11.1}/credsweeper/filters/value_method_check.py +0 -0
  107. {credsweeper-1.10.8 → credsweeper-1.11.1}/credsweeper/filters/value_not_allowed_pattern_check.py +0 -0
  108. {credsweeper-1.10.8 → credsweeper-1.11.1}/credsweeper/filters/value_not_part_encoded_check.py +0 -0
  109. {credsweeper-1.10.8 → credsweeper-1.11.1}/credsweeper/filters/value_number_check.py +0 -0
  110. {credsweeper-1.10.8 → credsweeper-1.11.1}/credsweeper/filters/value_pattern_check.py +0 -0
  111. {credsweeper-1.10.8 → credsweeper-1.11.1}/credsweeper/filters/value_similarity_check.py +0 -0
  112. {credsweeper-1.10.8 → credsweeper-1.11.1}/credsweeper/filters/value_split_keyword_check.py +0 -0
  113. {credsweeper-1.10.8 → credsweeper-1.11.1}/credsweeper/filters/value_string_type_check.py +0 -0
  114. {credsweeper-1.10.8 → credsweeper-1.11.1}/credsweeper/filters/value_token_base32_check.py +0 -0
  115. {credsweeper-1.10.8 → credsweeper-1.11.1}/credsweeper/filters/value_token_base36_check.py +0 -0
  116. {credsweeper-1.10.8 → credsweeper-1.11.1}/credsweeper/filters/value_token_base64_check.py +0 -0
  117. {credsweeper-1.10.8 → credsweeper-1.11.1}/credsweeper/filters/value_token_base_check.py +0 -0
  118. {credsweeper-1.10.8 → credsweeper-1.11.1}/credsweeper/filters/value_token_check.py +0 -0
  119. {credsweeper-1.10.8 → credsweeper-1.11.1}/credsweeper/logger/__init__.py +0 -0
  120. {credsweeper-1.10.8 → credsweeper-1.11.1}/credsweeper/logger/logger.py +0 -0
  121. {credsweeper-1.10.8 → credsweeper-1.11.1}/credsweeper/ml_model/__init__.py +0 -0
  122. {credsweeper-1.10.8 → credsweeper-1.11.1}/credsweeper/ml_model/features/entropy_evaluation.py +0 -0
  123. {credsweeper-1.10.8 → credsweeper-1.11.1}/credsweeper/ml_model/features/feature.py +0 -0
  124. {credsweeper-1.10.8 → credsweeper-1.11.1}/credsweeper/ml_model/features/file_extension.py +0 -0
  125. {credsweeper-1.10.8 → credsweeper-1.11.1}/credsweeper/ml_model/features/has_html_tag.py +0 -0
  126. {credsweeper-1.10.8 → credsweeper-1.11.1}/credsweeper/ml_model/features/is_secret_numeric.py +0 -0
  127. {credsweeper-1.10.8 → credsweeper-1.11.1}/credsweeper/ml_model/features/length_of_attribute.py +0 -0
  128. {credsweeper-1.10.8 → credsweeper-1.11.1}/credsweeper/ml_model/features/morpheme_dense.py +0 -0
  129. {credsweeper-1.10.8 → credsweeper-1.11.1}/credsweeper/ml_model/features/rule_name.py +0 -0
  130. {credsweeper-1.10.8 → credsweeper-1.11.1}/credsweeper/ml_model/features/search_in_attribute.py +0 -0
  131. {credsweeper-1.10.8 → credsweeper-1.11.1}/credsweeper/ml_model/features/word_in.py +0 -0
  132. {credsweeper-1.10.8 → credsweeper-1.11.1}/credsweeper/ml_model/features/word_in_path.py +0 -0
  133. {credsweeper-1.10.8 → credsweeper-1.11.1}/credsweeper/ml_model/features/word_in_value.py +0 -0
  134. {credsweeper-1.10.8 → credsweeper-1.11.1}/credsweeper/ml_model/features/word_in_variable.py +0 -0
  135. {credsweeper-1.10.8 → credsweeper-1.11.1}/credsweeper/ml_model/ml_validator.py +0 -0
  136. {credsweeper-1.10.8 → credsweeper-1.11.1}/credsweeper/py.typed +0 -0
  137. {credsweeper-1.10.8 → credsweeper-1.11.1}/credsweeper/rules/__init__.py +0 -0
  138. {credsweeper-1.10.8 → credsweeper-1.11.1}/credsweeper/rules/config.yaml +0 -0
  139. {credsweeper-1.10.8 → credsweeper-1.11.1}/credsweeper/rules/rule.py +0 -0
  140. {credsweeper-1.10.8 → credsweeper-1.11.1}/credsweeper/scanner/__init__.py +0 -0
  141. {credsweeper-1.10.8 → credsweeper-1.11.1}/credsweeper/scanner/scan_type/__init__.py +0 -0
  142. {credsweeper-1.10.8 → credsweeper-1.11.1}/credsweeper/scanner/scan_type/multi_pattern.py +0 -0
  143. {credsweeper-1.10.8 → credsweeper-1.11.1}/credsweeper/scanner/scan_type/pem_key_pattern.py +0 -0
  144. {credsweeper-1.10.8 → credsweeper-1.11.1}/credsweeper/scanner/scan_type/scan_type.py +0 -0
  145. {credsweeper-1.10.8 → credsweeper-1.11.1}/credsweeper/scanner/scan_type/single_pattern.py +0 -0
  146. {credsweeper-1.10.8 → credsweeper-1.11.1}/credsweeper/scanner/scanner.py +0 -0
  147. {credsweeper-1.10.8 → credsweeper-1.11.1}/credsweeper/secret/config.json +0 -0
  148. {credsweeper-1.10.8 → credsweeper-1.11.1}/credsweeper/secret/log.yaml +0 -0
  149. {credsweeper-1.10.8 → credsweeper-1.11.1}/credsweeper/utils/__init__.py +0 -0
  150. {credsweeper-1.10.8 → credsweeper-1.11.1}/credsweeper/utils/entropy_validator.py +0 -0
  151. {credsweeper-1.10.8 → credsweeper-1.11.1}/credsweeper/utils/hop_stat.py +0 -0
  152. {credsweeper-1.10.8 → credsweeper-1.11.1}/credsweeper/utils/pem_key_detector.py +0 -0
  153. {credsweeper-1.10.8 → credsweeper-1.11.1}/pyproject.toml +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: credsweeper
3
- Version: 1.10.8
3
+ Version: 1.11.1
4
4
  Summary: Credential Sweeper
5
5
  Project-URL: Homepage, https://github.com/Samsung/CredSweeper
6
6
  Project-URL: Bug Tracker, https://github.com/Samsung/CredSweeper/issues
@@ -18,4 +18,4 @@ __all__ = [
18
18
  '__version__'
19
19
  ]
20
20
 
21
- __version__ = "1.10.8"
21
+ __version__ = "1.11.1"
@@ -960,6 +960,7 @@ nish
960
960
  nism
961
961
  node
962
962
  non
963
+ nope
963
964
  norm
964
965
  not
965
966
  nsive
@@ -1529,6 +1530,7 @@ warn
1529
1530
  watch
1530
1531
  wave
1531
1532
  way
1533
+ weak
1532
1534
  web
1533
1535
  week
1534
1536
  weight
@@ -28,6 +28,7 @@ from .pdf_scanner import PdfScanner
28
28
  from .pkcs12_scanner import Pkcs12Scanner
29
29
  from .pptx_scanner import PptxScanner
30
30
  from .tar_scanner import TarScanner
31
+ from .tmx_scanner import TmxScanner
31
32
  from .xlsx_scanner import XlsxScanner
32
33
  from .xml_scanner import XmlScanner
33
34
  from .zip_scanner import ZipScanner
@@ -126,6 +127,10 @@ class DeepScanner(
126
127
  deep_scanners.append(MxfileScanner)
127
128
  deep_scanners.append(XmlScanner)
128
129
  fallback_scanners.append(ByteScanner)
130
+ elif Util.is_tmx(data):
131
+ deep_scanners.append(TmxScanner)
132
+ fallback_scanners.append(XmlScanner)
133
+ fallback_scanners.append(ByteScanner)
129
134
  else:
130
135
  deep_scanners.append(XmlScanner)
131
136
  fallback_scanners.append(ByteScanner)
@@ -0,0 +1,45 @@
1
+ import logging
2
+ from abc import ABC
3
+ from typing import List, Optional
4
+
5
+ from lxml import etree
6
+
7
+ from credsweeper.common.constants import MIN_DATA_LEN
8
+ from credsweeper.credentials import Candidate
9
+ from credsweeper.deep_scanner.abstract_scanner import AbstractScanner
10
+ from credsweeper.file_handler.data_content_provider import DataContentProvider
11
+ from credsweeper.file_handler.string_content_provider import StringContentProvider
12
+ from credsweeper.utils import Util
13
+
14
+ logger = logging.getLogger(__name__)
15
+
16
+
17
+ class TmxScanner(AbstractScanner, ABC):
18
+ """Realises tmX files scanning for values only. Image tags are skipped."""
19
+
20
+ def data_scan(
21
+ self, #
22
+ data_provider: DataContentProvider, #
23
+ depth: int, #
24
+ recursive_limit_size: int) -> Optional[List[Candidate]]:
25
+ """Tries to represent data as xml text and scan as text lines"""
26
+ try:
27
+ lines = []
28
+ # the format is always in single line xlm, so line numbers are not actual
29
+ tree = etree.fromstring(data_provider.data)
30
+ for element in tree.iter():
31
+ tag = Util.extract_element_data(element, "tag")
32
+ if "Image" in tag:
33
+ continue
34
+ text = Util.extract_element_data(element, "text")
35
+ if MIN_DATA_LEN > len(text):
36
+ continue
37
+ lines.append(text)
38
+ tmx_data_provider = StringContentProvider(lines=lines,
39
+ file_path=data_provider.file_path,
40
+ file_type=data_provider.file_type,
41
+ info=f"{data_provider.info}|TMX")
42
+ return self.scanner.scan(tmx_data_provider)
43
+ except Exception as exc:
44
+ logger.warning("Cannot processed tmX file %s %s", str(data_provider.file_path), str(exc))
45
+ return None
@@ -6,7 +6,9 @@ from credsweeper.ml_model.features.length_of_attribute import LengthOfAttribute
6
6
  from credsweeper.ml_model.features.morpheme_dense import MorphemeDense
7
7
  from credsweeper.ml_model.features.rule_name import RuleName
8
8
  from credsweeper.ml_model.features.search_in_attribute import SearchInAttribute
9
- from credsweeper.ml_model.features.word_in_line import WordInLine
10
9
  from credsweeper.ml_model.features.word_in_path import WordInPath
10
+ from credsweeper.ml_model.features.word_in_postamble import WordInPostamble
11
+ from credsweeper.ml_model.features.word_in_preamble import WordInPreamble
12
+ from credsweeper.ml_model.features.word_in_transition import WordInTransition
11
13
  from credsweeper.ml_model.features.word_in_value import WordInValue
12
14
  from credsweeper.ml_model.features.word_in_variable import WordInVariable
@@ -0,0 +1,32 @@
1
+ from typing import List
2
+
3
+ import numpy as np
4
+
5
+ from credsweeper.common.constants import ML_HUNK
6
+ from credsweeper.credentials import Candidate
7
+ from credsweeper.ml_model.features.word_in import WordIn
8
+
9
+
10
+ class WordInPostamble(WordIn):
11
+ """Feature is true if line contains at least one word from predefined list."""
12
+
13
+ def __init__(self, words: List[str]) -> None:
14
+ """Feature returns array of matching words
15
+
16
+ Args:
17
+ words: list of predefined words - MUST BE IN LOWER CASE
18
+
19
+ """
20
+ super().__init__(words)
21
+
22
+ def extract(self, candidate: Candidate) -> np.ndarray:
23
+ """Returns true if any words in a part of line after value"""
24
+ postamble_end = len(candidate.line_data_list[0].line) \
25
+ if len(candidate.line_data_list[0].line) < candidate.line_data_list[0].value_end + ML_HUNK \
26
+ else candidate.line_data_list[0].value_end + ML_HUNK
27
+ postamble = candidate.line_data_list[0].line[candidate.line_data_list[0].value_end:postamble_end].strip()
28
+
29
+ if postamble:
30
+ return self.word_in_str(postamble.lower())
31
+ else:
32
+ return np.array([np.zeros(shape=[self.dimension], dtype=np.int8)])
@@ -0,0 +1,37 @@
1
+ from typing import List
2
+
3
+ import numpy as np
4
+
5
+ from credsweeper.common.constants import ML_HUNK
6
+ from credsweeper.credentials import Candidate
7
+ from credsweeper.ml_model.features.word_in import WordIn
8
+
9
+
10
+ class WordInPreamble(WordIn):
11
+ """Feature is true if line contains at least one word from predefined list."""
12
+
13
+ def __init__(self, words: List[str]) -> None:
14
+ """Feature returns array of matching words
15
+
16
+ Args:
17
+ words: list of predefined words - MUST BE IN LOWER CASE
18
+
19
+ """
20
+ super().__init__(words)
21
+
22
+ def extract(self, candidate: Candidate) -> np.ndarray:
23
+ """Returns true if any words in line before variable or value"""
24
+ if 0 <= candidate.line_data_list[0].variable_start:
25
+ preamble_start = 0 if ML_HUNK >= candidate.line_data_list[0].variable_start \
26
+ else candidate.line_data_list[0].variable_start - ML_HUNK
27
+ preamble = candidate.line_data_list[0].line[preamble_start:candidate.line_data_list[0].
28
+ variable_start].strip()
29
+ else:
30
+ preamble_start = 0 if ML_HUNK >= candidate.line_data_list[0].value_start \
31
+ else candidate.line_data_list[0].value_start - ML_HUNK
32
+ preamble = candidate.line_data_list[0].line[preamble_start:candidate.line_data_list[0].value_start].strip()
33
+
34
+ if preamble:
35
+ return self.word_in_str(preamble.lower())
36
+ else:
37
+ return np.array([np.zeros(shape=[self.dimension], dtype=np.int8)])
@@ -2,13 +2,11 @@ from typing import List
2
2
 
3
3
  import numpy as np
4
4
 
5
- from credsweeper.common.constants import CHUNK_SIZE
6
5
  from credsweeper.credentials import Candidate
7
6
  from credsweeper.ml_model.features.word_in import WordIn
8
- from credsweeper.utils import Util
9
7
 
10
8
 
11
- class WordInLine(WordIn):
9
+ class WordInTransition(WordIn):
12
10
  """Feature is true if line contains at least one word from predefined list."""
13
11
 
14
12
  def __init__(self, words: List[str]) -> None:
@@ -21,9 +19,14 @@ class WordInLine(WordIn):
21
19
  super().__init__(words)
22
20
 
23
21
  def extract(self, candidate: Candidate) -> np.ndarray:
24
- """Returns true if any words in first line"""
25
- subtext = Util.subtext(candidate.line_data_list[0].line, candidate.line_data_list[0].value_start, CHUNK_SIZE)
26
- if subtext:
27
- return self.word_in_str(subtext.lower())
22
+ """Returns true if any words between variable and value"""
23
+ if 0 <= candidate.line_data_list[0].variable_end < candidate.line_data_list[0].value_start:
24
+ transition = candidate.line_data_list[0].line[candidate.line_data_list[0].variable_end:candidate.
25
+ line_data_list[0].value_start].strip()
26
+ else:
27
+ transition = ''
28
+
29
+ if transition:
30
+ return self.word_in_str(transition.lower())
28
31
  else:
29
32
  return np.array([np.zeros(shape=[self.dimension], dtype=np.int8)])
@@ -70,6 +70,38 @@
70
70
  "attribute": "value"
71
71
  }
72
72
  },
73
+ {
74
+ "type": "SearchInAttribute",
75
+ "comment": "camelStyle naming detection",
76
+ "kwargs": {
77
+ "pattern": "^[a-z][a-z]{1,16}[0-9]*([A-Z]([a-z]{1,16}[0-9]*|[0-9]{1,16})){1,8}$",
78
+ "attribute": "value"
79
+ }
80
+ },
81
+ {
82
+ "type": "SearchInAttribute",
83
+ "comment": "PascalStyle naming detection",
84
+ "kwargs": {
85
+ "pattern": "^([A-Z]([a-z]{1,16}[0-9]*|[0-9]{1,16})){1,8}$",
86
+ "attribute": "value"
87
+ }
88
+ },
89
+ {
90
+ "type": "SearchInAttribute",
91
+ "comment": "UPPERCASE naming detection",
92
+ "kwargs": {
93
+ "pattern": "^(_+[0-9]{1,16}|_*[A-Z]{1,16}[0-9]*)(_+([0-9]{1,16}|[A-Z]{1,16}[0-9]*)){1,8}_*$",
94
+ "attribute": "value"
95
+ }
96
+ },
97
+ {
98
+ "type": "SearchInAttribute",
99
+ "comment": "lowercase naming detection",
100
+ "kwargs": {
101
+ "pattern": "^(_+[0-9]{1,16}|_*[a-z]{1,16}[0-9]*)(_+([0-9]{1,16}|[a-z]{1,16}[0-9]*)){1,8}_*$",
102
+ "attribute": "value"
103
+ }
104
+ },
73
105
  {
74
106
  "type": "SearchInAttribute",
75
107
  "comment": "VariableNotAllowedPatternCheck",
@@ -82,7 +114,7 @@
82
114
  "type": "SearchInAttribute",
83
115
  "comment": "VariableNotAllowedNameCheck",
84
116
  "kwargs": {
85
- "pattern": "(?i:pub(lic)?_?key)",
117
+ "pattern": "(?i:(filters?|pub(lic)?)_?key)",
86
118
  "attribute": "variable"
87
119
  }
88
120
  },
@@ -90,7 +122,15 @@
90
122
  "type": "SearchInAttribute",
91
123
  "comment": "VariableNotAllowedNameCheck",
92
124
  "kwargs": {
93
- "pattern": "(?i:_?id$|name$|type$)",
125
+ "pattern": "(?i:(id|size|name|type|manager)$)",
126
+ "attribute": "variable"
127
+ }
128
+ },
129
+ {
130
+ "type": "SearchInAttribute",
131
+ "comment": "PWD invocation",
132
+ "kwargs": {
133
+ "pattern": "(?i:(^\\$pwd$)|(^\\$\\{#?pwd[^}]*\\}$)|(^\\$\\(pwd\\)$)|(^`pwd`$))",
94
134
  "attribute": "variable"
95
135
  }
96
136
  },
@@ -98,44 +138,55 @@
98
138
  "type": "WordInVariable",
99
139
  "kwargs": {
100
140
  "words": [
101
- "/",
102
141
  " ",
142
+ "/",
103
143
  "_at",
104
144
  "_id",
105
- "obj",
145
+ "_len",
146
+ "access",
147
+ "cache",
148
+ "client",
149
+ "control",
150
+ "encrypted",
151
+ "example",
152
+ "expire",
153
+ "fake",
106
154
  "file",
107
- "path",
155
+ "filter",
156
+ "fingerprint",
108
157
  "hash",
158
+ "key",
159
+ "label",
160
+ "length",
161
+ "manager",
162
+ "mock",
109
163
  "name",
164
+ "native",
165
+ "obj",
166
+ "option",
167
+ "p/w",
168
+ "parameter",
169
+ "pass",
170
+ "path",
171
+ "project",
172
+ "public",
173
+ "pw",
174
+ "secret",
175
+ "size",
176
+ "space",
177
+ "status",
178
+ "sword",
179
+ "temp",
110
180
  "test",
181
+ "thumbprint",
111
182
  "time",
112
- "temp",
183
+ "timestamp",
184
+ "title",
185
+ "token",
113
186
  "type",
114
- "mock",
115
- "size",
116
187
  "uniq",
117
- "fake",
118
- "view",
119
- "cache",
120
188
  "valid",
121
- "label",
122
- "title",
123
- "access",
124
- "space",
125
- "filter",
126
- "native",
127
- "status",
128
- "expire",
129
- "client",
130
- "option",
131
- "public",
132
- "project",
133
- "control",
134
- "parameter",
135
- "encrypted",
136
- "timestamp",
137
- "thumbprint",
138
- "fingerprint"
189
+ "view"
139
190
  ]
140
191
  }
141
192
  },
@@ -144,76 +195,154 @@
144
195
  "kwargs": {
145
196
  "words": [
146
197
  " ",
198
+ "$(",
199
+ "${",
147
200
  "(",
148
- "[",
149
- ".",
150
201
  "->",
151
- "${",
152
- "$(",
202
+ ".",
153
203
  "...",
154
- "foo",
155
- "bar",
156
204
  "123",
205
+ "<",
206
+ ">",
207
+ "[",
208
+ "_id",
157
209
  "abc",
158
- "xyz",
159
- "xxx",
160
- "pwd",
161
- "passwd",
162
- "pswd",
163
- "psswd",
210
+ "allow",
211
+ "bar",
212
+ "disable",
213
+ "changeme",
214
+ "example",
215
+ "fake",
216
+ "file",
217
+ "foo",
218
+ "min",
219
+ "mock",
220
+ "my",
221
+ "nil",
164
222
  "pass",
223
+ "passwd",
165
224
  "password",
166
- "pasword",
167
- "null",
168
- "nil",
169
- "undefined",
170
- "none",
171
- "true",
172
- "false",
173
- "example",
225
+ "pswd",
174
226
  "public",
175
- "mock",
176
- "fake",
227
+ "pwd",
177
228
  "test",
178
- "allow",
179
- "my",
180
- "file",
181
- "id"
229
+ "xxx",
230
+ "xyz"
182
231
  ]
183
232
  }
184
233
  },
185
234
  {
186
- "type": "WordInLine",
235
+ "type": "WordInPreamble",
187
236
  "kwargs": {
188
237
  "words": [
238
+ "$",
239
+ "%2",
240
+ "%3",
241
+ "&",
242
+ "&amp;",
189
243
  "(",
190
- "[",
244
+ "->",
191
245
  ".",
192
- "$",
193
246
  "://",
247
+ "?",
194
248
  "@",
195
- "pwd",
196
- "passwd",
197
- "pswd",
198
- "psswd",
249
+ "[",
250
+ "approval",
251
+ "assert",
252
+ "case",
253
+ "circle",
254
+ "equal",
255
+ "example",
256
+ "expect",
257
+ "false",
258
+ "height",
259
+ "image",
260
+ "line",
261
+ "media",
262
+ "nil",
263
+ "none",
264
+ "null",
199
265
  "pass",
200
266
  "password",
201
- "pasword",
202
- "->",
203
- "null",
204
- "nil",
267
+ "path",
268
+ "pwd",
269
+ "sqa",
270
+ "test",
271
+ "true",
205
272
  "undefined",
206
- "none",
207
273
  "unit",
274
+ "width"
275
+ ]
276
+ }
277
+ },
278
+ {
279
+ "type": "WordInTransition",
280
+ "kwargs": {
281
+ "words": [
282
+ "%2",
283
+ "%3",
284
+ "&",
285
+ "(",
286
+ "->",
287
+ ".",
288
+ "?",
289
+ "@",
290
+ "[",
291
+ "bearer",
292
+ "equal",
293
+ "example",
294
+ "expect",
295
+ "line",
296
+ "media",
297
+ "pass",
298
+ "password",
299
+ "path",
208
300
  "test",
209
- "approval",
210
- "case",
211
- "true",
212
- "false",
301
+ "unit"
302
+ ]
303
+ }
304
+ },
305
+ {
306
+ "type": "WordInPostamble",
307
+ "kwargs": {
308
+ "words": [
309
+ "$",
310
+ "%2",
311
+ "%3",
312
+ "&",
313
+ "&amp;",
314
+ "(",
315
+ "->",
316
+ ".",
317
+ "://",
318
+ "?",
319
+ "@",
320
+ "[",
213
321
  "assert",
322
+ "case",
323
+ "circle",
214
324
  "equal",
215
325
  "example",
216
- "expect"
326
+ "expect",
327
+ "false",
328
+ "height",
329
+ "image",
330
+ "line",
331
+ "media",
332
+ "nil",
333
+ "none",
334
+ "null",
335
+ "pass",
336
+ "passwd",
337
+ "password",
338
+ "path",
339
+ "pwd",
340
+ "sqa",
341
+ "test",
342
+ "true",
343
+ "undefined",
344
+ "unit",
345
+ "width"
217
346
  ]
218
347
  }
219
348
  },
@@ -221,20 +350,22 @@
221
350
  "type": "WordInPath",
222
351
  "kwargs": {
223
352
  "words": [
224
- "/test",
225
- "/config",
226
- "/src/",
227
- "/record",
228
- "/usr/local/lib/python",
353
+ "/conf",
229
354
  "/dist-packages/",
355
+ "/example",
356
+ "/record",
357
+ "/script",
230
358
  "/site-packages/",
231
- "/example"
359
+ "/src/",
360
+ "/test",
361
+ "/tool",
362
+ "/usr/local/lib/python",
363
+ "/assets/"
232
364
  ]
233
365
  }
234
366
  },
235
367
  {
236
- "type": "MorphemeDense",
237
- "kwargs": {}
368
+ "type": "MorphemeDense"
238
369
  },
239
370
  {
240
371
  "type": "HasHtmlTag"
@@ -255,6 +386,7 @@
255
386
  ".bat",
256
387
  ".bats",
257
388
  ".bazel",
389
+ ".bin",
258
390
  ".build",
259
391
  ".bundle",
260
392
  ".bzl",
@@ -337,6 +469,7 @@
337
469
  ".nix",
338
470
  ".nolint",
339
471
  ".odd",
472
+ ".onnx",
340
473
  ".oracle",
341
474
  ".pan",
342
475
  ".patch",
@@ -396,6 +529,7 @@
396
529
  ".ts",
397
530
  ".tsx",
398
531
  ".txt",
532
+ ".var",
399
533
  ".vue",
400
534
  ".xaml",
401
535
  ".xib",
@@ -517,6 +517,18 @@ class Util:
517
517
  return True
518
518
  return False
519
519
 
520
+ @staticmethod
521
+ def is_tmx(data: Union[bytes, bytearray]) -> bool:
522
+ """Used to detect tm7,tm6,etc. (ThreadModeling) format."""
523
+ if isinstance(data, (bytes, bytearray)):
524
+ for opening_tag, closing_tag in [(b"<ThreatModel", b"</ThreatModel>"),
525
+ (b"<KnowledgeBase", b"</KnowledgeBase>")]:
526
+ opening_pos = data.find(opening_tag, 0, MAX_LINE_LENGTH)
527
+ if 0 <= opening_pos < data.find(closing_tag, opening_pos):
528
+ # opening and closing tags were found - suppose it is an HTML
529
+ return True
530
+ return False
531
+
520
532
  # A well-formed XML must start from < or a whitespace character
521
533
  XML_FIRST_BRACKET_PATTERN = re.compile(rb"^\s*<")
522
534
  XML_OPENING_TAG_PATTERN = re.compile(rb"<([0-9A-Za-z_]{1,256})")
@@ -583,14 +595,14 @@ class Util:
583
595
  line_nums = []
584
596
  tree = etree.fromstringlist(xml_lines)
585
597
  for element in tree.iter():
586
- tag = Util._extract_element_data(element, "tag")
587
- text = Util._extract_element_data(element, "text")
598
+ tag = Util.extract_element_data(element, "tag")
599
+ text = Util.extract_element_data(element, "text")
588
600
  lines.append(f"{tag} : {text}")
589
601
  line_nums.append(element.sourceline)
590
602
  return lines, line_nums
591
603
 
592
604
  @staticmethod
593
- def _extract_element_data(element, attr) -> str:
605
+ def extract_element_data(element: Any, attr: str) -> str:
594
606
  """Extract xml element data to string.
595
607
 
596
608
  Try to extract the xml data and strip() the string.
@@ -605,7 +617,7 @@ class Util:
605
617
  """
606
618
  element_attr: Any = getattr(element, attr)
607
619
  if element_attr is None or not isinstance(element_attr, str):
608
- return ""
620
+ return ''
609
621
  return str(element_attr).strip()
610
622
 
611
623
  @staticmethod
File without changes
File without changes
File without changes