credsweeper 1.11.5__py3-none-any.whl → 1.13.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of credsweeper might be problematic. Click here for more details.

Files changed (145) hide show
  1. credsweeper/__init__.py +21 -15
  2. credsweeper/__main__.py +158 -42
  3. credsweeper/app.py +18 -13
  4. credsweeper/common/keyword_pattern.py +19 -18
  5. credsweeper/common/morpheme_checklist.txt +28 -6
  6. credsweeper/config/__init__.py +0 -1
  7. credsweeper/config/config.py +4 -3
  8. credsweeper/credentials/__init__.py +0 -5
  9. credsweeper/credentials/augment_candidates.py +1 -1
  10. credsweeper/credentials/candidate.py +1 -1
  11. credsweeper/credentials/credential_manager.py +1 -1
  12. credsweeper/credentials/line_data.py +43 -8
  13. credsweeper/deep_scanner/__init__.py +0 -1
  14. credsweeper/deep_scanner/abstract_scanner.py +4 -3
  15. credsweeper/deep_scanner/byte_scanner.py +1 -1
  16. credsweeper/deep_scanner/bzip2_scanner.py +2 -2
  17. credsweeper/deep_scanner/csv_scanner.py +71 -0
  18. credsweeper/deep_scanner/deb_scanner.py +1 -1
  19. credsweeper/deep_scanner/deep_scanner.py +22 -12
  20. credsweeper/deep_scanner/docx_scanner.py +1 -1
  21. credsweeper/deep_scanner/eml_scanner.py +1 -1
  22. credsweeper/deep_scanner/encoder_scanner.py +1 -1
  23. credsweeper/deep_scanner/gzip_scanner.py +2 -2
  24. credsweeper/deep_scanner/html_scanner.py +1 -1
  25. credsweeper/deep_scanner/jclass_scanner.py +1 -1
  26. credsweeper/deep_scanner/jks_scanner.py +12 -3
  27. credsweeper/deep_scanner/lang_scanner.py +1 -1
  28. credsweeper/deep_scanner/lzma_scanner.py +2 -2
  29. credsweeper/deep_scanner/mxfile_scanner.py +1 -1
  30. credsweeper/deep_scanner/pdf_scanner.py +1 -1
  31. credsweeper/deep_scanner/pkcs_scanner.py +6 -2
  32. credsweeper/deep_scanner/pptx_scanner.py +1 -1
  33. credsweeper/deep_scanner/rpm_scanner.py +1 -1
  34. credsweeper/deep_scanner/rtf_scanner.py +41 -0
  35. credsweeper/deep_scanner/strings_scanner.py +52 -0
  36. credsweeper/deep_scanner/tar_scanner.py +2 -2
  37. credsweeper/deep_scanner/tmx_scanner.py +2 -2
  38. credsweeper/deep_scanner/xlsx_scanner.py +2 -2
  39. credsweeper/deep_scanner/xml_scanner.py +1 -1
  40. credsweeper/deep_scanner/zip_scanner.py +2 -2
  41. credsweeper/file_handler/__init__.py +0 -15
  42. credsweeper/file_handler/abstract_provider.py +3 -4
  43. credsweeper/file_handler/byte_content_provider.py +11 -2
  44. credsweeper/file_handler/content_provider.py +1 -1
  45. credsweeper/file_handler/data_content_provider.py +1 -1
  46. credsweeper/file_handler/diff_content_provider.py +133 -3
  47. credsweeper/file_handler/file_path_extractor.py +4 -2
  48. credsweeper/file_handler/files_provider.py +4 -4
  49. credsweeper/file_handler/patches_provider.py +7 -8
  50. credsweeper/file_handler/text_content_provider.py +8 -2
  51. credsweeper/filters/__init__.py +3 -4
  52. credsweeper/filters/filter.py +5 -3
  53. credsweeper/filters/group/__init__.py +0 -2
  54. credsweeper/filters/group/general_keyword.py +2 -2
  55. credsweeper/filters/group/general_pattern.py +2 -2
  56. credsweeper/filters/group/group.py +38 -36
  57. credsweeper/filters/group/password_keyword.py +9 -8
  58. credsweeper/filters/group/token_pattern.py +5 -5
  59. credsweeper/filters/group/url_credentials_group.py +8 -8
  60. credsweeper/filters/group/weird_base36_token.py +6 -6
  61. credsweeper/filters/group/weird_base64_token.py +5 -5
  62. credsweeper/filters/line_git_binary_check.py +5 -4
  63. credsweeper/filters/line_specific_key_check.py +6 -5
  64. credsweeper/filters/line_uue_part_check.py +5 -4
  65. credsweeper/filters/value_allowlist_check.py +6 -5
  66. credsweeper/filters/value_array_dictionary_check.py +8 -6
  67. credsweeper/filters/value_atlassian_token_check.py +6 -5
  68. credsweeper/filters/value_azure_token_check.py +6 -5
  69. credsweeper/filters/value_base32_data_check.py +8 -5
  70. credsweeper/filters/value_base64_data_check.py +6 -5
  71. credsweeper/filters/value_base64_encoded_pem_check.py +6 -5
  72. credsweeper/filters/value_base64_key_check.py +6 -5
  73. credsweeper/filters/value_base64_part_check.py +6 -5
  74. credsweeper/filters/value_basic_auth_check.py +37 -0
  75. credsweeper/filters/value_blocklist_check.py +6 -4
  76. credsweeper/filters/value_camel_case_check.py +8 -7
  77. credsweeper/filters/value_dictionary_keyword_check.py +6 -4
  78. credsweeper/filters/value_discord_bot_check.py +6 -5
  79. credsweeper/filters/value_entropy_base_check.py +6 -5
  80. credsweeper/filters/value_file_path_check.py +13 -8
  81. credsweeper/filters/value_github_check.py +8 -6
  82. credsweeper/filters/value_grafana_check.py +6 -5
  83. credsweeper/filters/value_grafana_service_check.py +5 -4
  84. credsweeper/filters/value_hex_number_check.py +5 -4
  85. credsweeper/filters/value_jfrog_token_check.py +6 -5
  86. credsweeper/filters/value_json_web_key_check.py +6 -5
  87. credsweeper/filters/value_json_web_token_check.py +6 -5
  88. credsweeper/filters/value_last_word_check.py +6 -4
  89. credsweeper/filters/{value_dictionary_value_length_check.py → value_length_check.py} +12 -6
  90. credsweeper/filters/value_method_check.py +5 -4
  91. credsweeper/filters/value_morphemes_check.py +43 -0
  92. credsweeper/filters/value_not_allowed_pattern_check.py +6 -5
  93. credsweeper/filters/value_not_part_encoded_check.py +4 -4
  94. credsweeper/filters/value_number_check.py +5 -4
  95. credsweeper/filters/value_pattern_check.py +61 -41
  96. credsweeper/filters/value_similarity_check.py +6 -4
  97. credsweeper/filters/value_split_keyword_check.py +5 -4
  98. credsweeper/filters/value_string_type_check.py +10 -7
  99. credsweeper/filters/value_token_base_check.py +5 -4
  100. credsweeper/filters/value_token_check.py +6 -5
  101. credsweeper/logger/__init__.py +0 -1
  102. credsweeper/logger/logger.py +1 -1
  103. credsweeper/ml_model/__init__.py +0 -1
  104. credsweeper/ml_model/features/__init__.py +1 -0
  105. credsweeper/ml_model/features/entropy_evaluation.py +1 -1
  106. credsweeper/ml_model/features/feature.py +2 -19
  107. credsweeper/ml_model/features/file_extension.py +2 -2
  108. credsweeper/ml_model/features/has_html_tag.py +12 -10
  109. credsweeper/ml_model/features/is_secret_numeric.py +5 -4
  110. credsweeper/ml_model/features/length_of_attribute.py +1 -1
  111. credsweeper/ml_model/features/morpheme_dense.py +15 -8
  112. credsweeper/ml_model/features/rule_name.py +2 -2
  113. credsweeper/ml_model/features/rule_severity.py +21 -0
  114. credsweeper/ml_model/features/search_in_attribute.py +1 -1
  115. credsweeper/ml_model/features/word_in.py +10 -33
  116. credsweeper/ml_model/features/word_in_path.py +6 -4
  117. credsweeper/ml_model/features/word_in_postamble.py +2 -5
  118. credsweeper/ml_model/features/word_in_preamble.py +2 -5
  119. credsweeper/ml_model/features/word_in_transition.py +2 -5
  120. credsweeper/ml_model/features/word_in_value.py +3 -4
  121. credsweeper/ml_model/features/word_in_variable.py +3 -4
  122. credsweeper/ml_model/ml_config.json +140 -27
  123. credsweeper/ml_model/ml_model.onnx +0 -0
  124. credsweeper/ml_model/ml_validator.py +4 -3
  125. credsweeper/rules/__init__.py +0 -1
  126. credsweeper/rules/config.yaml +329 -239
  127. credsweeper/rules/rule.py +4 -3
  128. credsweeper/scanner/__init__.py +0 -1
  129. credsweeper/scanner/scan_type/__init__.py +0 -5
  130. credsweeper/scanner/scan_type/multi_pattern.py +4 -4
  131. credsweeper/scanner/scan_type/pem_key_pattern.py +4 -4
  132. credsweeper/scanner/scan_type/scan_type.py +4 -4
  133. credsweeper/scanner/scan_type/single_pattern.py +4 -4
  134. credsweeper/scanner/scanner.py +24 -15
  135. credsweeper/secret/config.json +19 -6
  136. credsweeper/utils/__init__.py +0 -1
  137. credsweeper/utils/pem_key_detector.py +3 -3
  138. credsweeper/utils/util.py +24 -150
  139. {credsweeper-1.11.5.dist-info → credsweeper-1.13.3.dist-info}/METADATA +7 -7
  140. credsweeper-1.13.3.dist-info/RECORD +164 -0
  141. credsweeper/filters/value_couple_keyword_check.py +0 -26
  142. credsweeper-1.11.5.dist-info/RECORD +0 -159
  143. {credsweeper-1.11.5.dist-info → credsweeper-1.13.3.dist-info}/WHEEL +0 -0
  144. {credsweeper-1.11.5.dist-info → credsweeper-1.13.3.dist-info}/entry_points.txt +0 -0
  145. {credsweeper-1.11.5.dist-info → credsweeper-1.13.3.dist-info}/licenses/LICENSE +0 -0
@@ -14,11 +14,15 @@
14
14
  /var
15
15
  000
16
16
  111
17
+ 14159265
18
+ 18284590
17
19
  222
18
20
  333
19
21
  444
20
22
  555
23
+ 65358979
21
24
  666
25
+ 71828182
22
26
  777
23
27
  80211
24
28
  888
@@ -195,7 +199,7 @@ aux
195
199
  avail
196
200
  avatar
197
201
  aver
198
- awesome
202
+ awesom
199
203
  axis
200
204
  azure
201
205
  back
@@ -227,12 +231,14 @@ bind
227
231
  bio
228
232
  bipol
229
233
  bit
234
+ bixby
230
235
  black
231
236
  blan
232
237
  bless
233
238
  blic
234
239
  blish
235
240
  blob
241
+ blood
236
242
  blue
237
243
  board
238
244
  bob
@@ -243,7 +249,7 @@ boost
243
249
  boot
244
250
  boss
245
251
  bot
246
- bound
252
+ boun
247
253
  box
248
254
  branch
249
255
  break
@@ -497,6 +503,7 @@ dust
497
503
  dvb
498
504
  dynamic
499
505
  dynamo
506
+ eadbee
500
507
  easin
501
508
  easy
502
509
  ecdhe
@@ -607,6 +614,7 @@ fleet
607
614
  flick
608
615
  flix
609
616
  float
617
+ flood
610
618
  floor
611
619
  fluent
612
620
  fluid
@@ -615,7 +623,7 @@ focus
615
623
  foo
616
624
  for
617
625
  fossil
618
- found
626
+ foun
619
627
  fpga
620
628
  frame
621
629
  free
@@ -648,6 +656,7 @@ git
648
656
  given
649
657
  global
650
658
  gobble
659
+ good
651
660
  google
652
661
  grab
653
662
  grace
@@ -703,6 +712,7 @@ home
703
712
  hook
704
713
  horizon
705
714
  host
715
+ houn
706
716
  hours
707
717
  html
708
718
  http
@@ -789,6 +799,7 @@ jpg_
789
799
  json
790
800
  jump
791
801
  justif
802
+ kafka
792
803
  kerberos
793
804
  kernel
794
805
  key
@@ -797,6 +808,8 @@ kill
797
808
  kind
798
809
  kinesis
799
810
  kirk
811
+ know
812
+ knox
800
813
  kris
801
814
  lab
802
815
  lag
@@ -853,7 +866,7 @@ local
853
866
  lock
854
867
  log
855
868
  long
856
- lookup
869
+ look
857
870
  loop
858
871
  loose
859
872
  lost
@@ -946,6 +959,7 @@ ndow
946
959
  ned
947
960
  need
948
961
  neigh
962
+ neo4j
949
963
  ner
950
964
  net
951
965
  neutr
@@ -990,6 +1004,7 @@ oncat
990
1004
  one
991
1005
  onfig
992
1006
  only
1007
+ ookup
993
1008
  open
994
1009
  opt/
995
1010
  opted
@@ -1007,6 +1022,7 @@ ormat
1007
1022
  orph
1008
1023
  otorola
1009
1024
  ottle
1025
+ ound
1010
1026
  ously
1011
1027
  out
1012
1028
  over
@@ -1066,6 +1082,7 @@ pose
1066
1082
  posit
1067
1083
  possib
1068
1084
  post
1085
+ poun
1069
1086
  power
1070
1087
  pre_
1071
1088
  pred
@@ -1210,7 +1227,7 @@ rotat
1210
1227
  rotocol
1211
1228
  rottl
1212
1229
  rough
1213
- round
1230
+ roun
1214
1231
  roup
1215
1232
  row
1216
1233
  rroga
@@ -1222,6 +1239,7 @@ run
1222
1239
  rxtx
1223
1240
  sabl
1224
1241
  sage
1242
+ salt
1225
1243
  same
1226
1244
  sampl
1227
1245
  sams
@@ -1315,9 +1333,10 @@ sock
1315
1333
  soft
1316
1334
  solid
1317
1335
  solve
1336
+ some
1318
1337
  sony
1319
1338
  sort
1320
- sound
1339
+ soun
1321
1340
  source
1322
1341
  space
1323
1342
  spacing
@@ -1427,6 +1446,7 @@ tio
1427
1446
  tish
1428
1447
  title
1429
1448
  titud
1449
+ tizen
1430
1450
  tmp/
1431
1451
  to_
1432
1452
  tod
@@ -1438,6 +1458,7 @@ topic
1438
1458
  tory
1439
1459
  total
1440
1460
  touch
1461
+ tour
1441
1462
  trace
1442
1463
  tract
1443
1464
  traffic
@@ -1571,6 +1592,7 @@ yield
1571
1592
  you
1572
1593
  zeppelin
1573
1594
  zero
1595
+ zigbee
1574
1596
  zing
1575
1597
  zona
1576
1598
  zorro
@@ -1 +0,0 @@
1
- from credsweeper.config.config import Config
@@ -4,7 +4,7 @@ from typing import Dict, List, Optional, Set, Any
4
4
  from humanfriendly import parse_size
5
5
 
6
6
  from credsweeper.common.constants import Severity, DEFAULT_PATTERN_LEN
7
- from credsweeper.utils import Util
7
+ from credsweeper.utils.util import Util
8
8
 
9
9
 
10
10
  class Config:
@@ -35,12 +35,13 @@ class Config:
35
35
  self.candidate_output: List[str] = config["candidate_output"]
36
36
  self.find_by_ext: bool = config["find_by_ext"]
37
37
  self.size_limit: Optional[int] = parse_size(config["size_limit"]) if config["size_limit"] is not None else None
38
+ self.pedantic: bool = bool(config["pedantic"])
38
39
  self.depth: int = int(config["depth"])
39
40
  self.doc: bool = config["doc"]
40
41
  self.severity: Severity = Severity.get(config.get("severity"))
41
42
 
42
- self.min_keyword_value_length: int = int(config["min_keyword_value_length"])
43
- self.min_pattern_value_length: int = int(config["min_pattern_value_length"])
43
+ self.max_url_cred_value_length: int = int(config["max_url_cred_value_length"])
44
+ self.max_password_value_length: int = int(config["max_password_value_length"])
44
45
 
45
46
  # Trim exclude patterns from space like characters
46
47
  self.exclude_lines = set(line.strip() for line in self.exclude_lines)
@@ -1,5 +0,0 @@
1
- from credsweeper.credentials.candidate import Candidate
2
- from credsweeper.credentials.candidate_group_generator import CandidateGroupGenerator
3
- from credsweeper.credentials.candidate_key import CandidateKey
4
- from credsweeper.credentials.credential_manager import CredentialManager
5
- from credsweeper.credentials.line_data import LineData
@@ -1,6 +1,6 @@
1
1
  from typing import List
2
2
 
3
- from credsweeper.credentials import Candidate
3
+ from credsweeper.credentials.candidate import Candidate
4
4
 
5
5
 
6
6
  def augment_candidates(candidates: List[Candidate], new_candidates: List[Candidate]):
@@ -4,7 +4,7 @@ from json.encoder import py_encode_basestring_ascii
4
4
  from typing import Any, Dict, List, Optional
5
5
 
6
6
  from credsweeper.common.constants import Severity, Confidence
7
- from credsweeper.config import Config
7
+ from credsweeper.config.config import Config
8
8
  from credsweeper.credentials.line_data import LineData
9
9
 
10
10
 
@@ -2,7 +2,7 @@ import logging
2
2
  from multiprocessing import Manager
3
3
  from typing import List, Dict, Tuple
4
4
 
5
- from credsweeper.credentials import Candidate
5
+ from credsweeper.credentials.candidate import Candidate
6
6
  from credsweeper.credentials.candidate_group_generator import CandidateGroupGenerator, CandidateKey
7
7
 
8
8
  logger = logging.getLogger(__name__)
@@ -8,8 +8,8 @@ from typing import Any, Dict, Optional, Tuple
8
8
  from colorama import Fore, Style
9
9
 
10
10
  from credsweeper.common.constants import MAX_LINE_LENGTH, UTF_8, StartEnd, ML_HUNK
11
- from credsweeper.config import Config
12
- from credsweeper.utils import Util
11
+ from credsweeper.config.config import Config
12
+ from credsweeper.utils.util import Util
13
13
 
14
14
 
15
15
  class LineData:
@@ -137,12 +137,33 @@ class LineData:
137
137
 
138
138
  def sanitize_value(self):
139
139
  """Clean found value from extra artifacts. Correct positions if changed."""
140
+ # process the quotation workaround before cached properties invocation
141
+ if not self.value_leftquote and not self.value_rightquote:
142
+ while self.value:
143
+ first_symbol_code = ord(self.value[0])
144
+ last_symbol_code = ord(self.value[-1])
145
+ if 0x2018 <= first_symbol_code <= 0x201B and 0x2018 <= last_symbol_code <= 0x201B:
146
+ self.value_leftquote = self.value_rightquote = "'"
147
+ self.value = self.value[:-1]
148
+ self.value_end -= 1
149
+ self.value = self.value[1:]
150
+ self.value_start += 1
151
+ elif 0x201C <= first_symbol_code <= 0x201F and 0x201C <= last_symbol_code <= 0x201F:
152
+ self.value_leftquote = self.value_rightquote = '"'
153
+ self.value = self.value[1:]
154
+ self.value_start += 1
155
+ self.value = self.value[:-1]
156
+ self.value_end -= 1
157
+ else:
158
+ break
159
+
140
160
  if self.variable and self.value and not self.is_well_quoted_value:
141
161
  # sanitize is actual step for keyword pattern only
142
162
  _value = self.value
143
163
  self.clean_url_parameters()
144
164
  self.clean_bash_parameters()
145
165
  self.clean_toml_parameters()
166
+ self.clean_tag_parameters()
146
167
  if 0 <= self.value_start and 0 <= self.value_end and len(self.value) < len(_value):
147
168
  start = _value.find(self.value)
148
169
  self.value_start += start
@@ -176,15 +197,14 @@ class LineData:
176
197
  If line seem to be a URL - split by & character.
177
198
  Variable should be right most value after & or ? ([-1]). And value should be left most before & ([0])
178
199
  """
179
- if self.check_url_part():
200
+ # skip sanitize in case of URL credential rule - the regex is mature enough
201
+ if self.check_url_part() and not self.variable.endswith("://"):
180
202
  # all checks have passed - line before the value may be a URL
181
203
  self.variable = self.variable.rsplit('&')[-1].rsplit('?')[-1].rsplit(';')[-1]
182
204
  self.value = self.value.split('&', maxsplit=1)[0].split(';', maxsplit=1)[0].split('#', maxsplit=1)[0]
183
- if not self.variable.endswith("://"):
184
- # skip sanitize in case of URL credential rule
185
- self.value = self.url_unicode_split.split(self.value)[0]
186
- if self._3d_escaped_separator:
187
- self.value = self.url_percent_split.split(self.value)[0]
205
+ self.value = self.url_unicode_split.split(self.value)[0]
206
+ if self._3d_escaped_separator:
207
+ self.value = self.url_percent_split.split(self.value)[0]
188
208
 
189
209
  def clean_bash_parameters(self) -> None:
190
210
  """Split variable and value by bash special characters, if line assumed to be CLI command."""
@@ -212,6 +232,21 @@ class LineData:
212
232
  self.value = self.value[:-1]
213
233
  cleaning_required = True
214
234
 
235
+ def clean_tag_parameters(self) -> None:
236
+ """Remove closing tag from value if the opened is somewhere before in line"""
237
+ cleaning_required = self.value and self.value.endswith('>')
238
+ while cleaning_required:
239
+ closing_tag_pos = self.value.rfind("</")
240
+ if 0 <= closing_tag_pos:
241
+ # use `<a` to avoid tag parameters
242
+ opening_tag_prefix = f"<{self.value[closing_tag_pos + 2:-1]}"
243
+ if cleaning_required := (opening_tag_prefix not in self.value
244
+ and 0 <= self.line.find(opening_tag_prefix, 0, self.value_start)):
245
+ self.value = self.value[:closing_tag_pos]
246
+ cleaning_required = self.value and self.value.endswith('>')
247
+ else:
248
+ break
249
+
215
250
  def sanitize_variable(self) -> None:
216
251
  """Remove trailing spaces, dashes and quotations around the variable. Correct position."""
217
252
  sanitized_var_len = 0
@@ -1 +0,0 @@
1
- from credsweeper.deep_scanner.deep_scanner import DeepScanner
@@ -6,9 +6,9 @@ from typing import List, Optional, Tuple, Any, Generator
6
6
 
7
7
  from credsweeper.common.constants import RECURSIVE_SCAN_LIMITATION, MIN_DATA_LEN, DEFAULT_ENCODING, UTF_8, \
8
8
  MIN_VALUE_LENGTH
9
- from credsweeper.config import Config
10
- from credsweeper.credentials import Candidate
9
+ from credsweeper.config.config import Config
11
10
  from credsweeper.credentials.augment_candidates import augment_candidates
11
+ from credsweeper.credentials.candidate import Candidate
12
12
  from credsweeper.file_handler.byte_content_provider import ByteContentProvider
13
13
  from credsweeper.file_handler.content_provider import ContentProvider
14
14
  from credsweeper.file_handler.data_content_provider import DataContentProvider
@@ -18,7 +18,7 @@ from credsweeper.file_handler.file_path_extractor import FilePathExtractor
18
18
  from credsweeper.file_handler.string_content_provider import StringContentProvider
19
19
  from credsweeper.file_handler.struct_content_provider import StructContentProvider
20
20
  from credsweeper.file_handler.text_content_provider import TextContentProvider
21
- from credsweeper.scanner import Scanner
21
+ from credsweeper.scanner.scanner import Scanner
22
22
 
23
23
  logger = logging.getLogger(__name__)
24
24
 
@@ -51,6 +51,7 @@ class AbstractScanner(ABC):
51
51
  @abstractmethod
52
52
  def get_deep_scanners(data: bytes, descriptor: Descriptor, depth: int) -> Tuple[List[Any], List[Any]]:
53
53
  """Returns possibly scan methods for the data depends on content and fallback scanners"""
54
+ raise NotImplementedError(__name__)
54
55
 
55
56
  # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
56
57
 
@@ -2,7 +2,7 @@ import logging
2
2
  from abc import ABC
3
3
  from typing import List, Optional
4
4
 
5
- from credsweeper.credentials import Candidate
5
+ from credsweeper.credentials.candidate import Candidate
6
6
  from credsweeper.file_handler.byte_content_provider import ByteContentProvider
7
7
  from credsweeper.file_handler.data_content_provider import DataContentProvider
8
8
  from .abstract_scanner import AbstractScanner
@@ -4,10 +4,10 @@ from abc import ABC
4
4
  from pathlib import Path
5
5
  from typing import List, Optional
6
6
 
7
- from credsweeper.credentials import Candidate
7
+ from credsweeper.credentials.candidate import Candidate
8
8
  from credsweeper.deep_scanner.abstract_scanner import AbstractScanner
9
9
  from credsweeper.file_handler.data_content_provider import DataContentProvider
10
- from credsweeper.utils import Util
10
+ from credsweeper.utils.util import Util
11
11
 
12
12
  logger = logging.getLogger(__name__)
13
13
 
@@ -0,0 +1,71 @@
1
+ import csv
2
+ import io
3
+ import logging
4
+ from abc import ABC
5
+ from typing import List, Optional, Dict, Any
6
+
7
+ from credsweeper.common.constants import MAX_LINE_LENGTH
8
+ from credsweeper.credentials.candidate import Candidate
9
+ from credsweeper.deep_scanner.abstract_scanner import AbstractScanner
10
+ from credsweeper.file_handler.data_content_provider import DataContentProvider
11
+ from credsweeper.file_handler.struct_content_provider import StructContentProvider
12
+
13
+ logger = logging.getLogger(__name__)
14
+
15
+
16
+ class CsvScanner(AbstractScanner, ABC):
17
+ """Implements CSV scanning"""
18
+
19
+ sniffer = csv.Sniffer()
20
+ # do not use space as separator to avoid hallucinations
21
+ delimiters = ",;\t|\x1F"
22
+
23
+ @classmethod
24
+ def get_structure(cls, text: str) -> List[Dict[str, Any]]:
25
+ """Reads a text as CSV standard with guessed dialect"""
26
+ # windows style \r\n
27
+ first_line_end = text.find('\r', 0, MAX_LINE_LENGTH)
28
+ line_terminator = "\r\n"
29
+ if 0 > first_line_end:
30
+ # unix style \n
31
+ first_line_end = text.find('\n', 0, MAX_LINE_LENGTH)
32
+ line_terminator = "\n"
33
+ if 0 > first_line_end:
34
+ raise ValueError(f"No suitable line end found in {MAX_LINE_LENGTH} symbols")
35
+
36
+ first_line = text[:first_line_end]
37
+ dialect = cls.sniffer.sniff(first_line, delimiters=cls.delimiters)
38
+ rows = []
39
+ reader = csv.DictReader(io.StringIO(text),
40
+ delimiter=dialect.delimiter,
41
+ lineterminator=line_terminator,
42
+ strict=True)
43
+ # check the constant columns number for all rows
44
+ fields_number = sum(1 for x in reader.fieldnames if x is not None)
45
+ for row in reader:
46
+ if not isinstance(row, dict):
47
+ raise ValueError(f"ERROR: wrong row '{row}'")
48
+ if len(row) != fields_number or any(x is None for x in row.values()):
49
+ # None means no separator used
50
+ raise ValueError(f"Different columns number in row '{row}' - mismatch {fields_number}")
51
+ rows.append(row)
52
+ return rows
53
+
54
+ def data_scan(
55
+ self, #
56
+ data_provider: DataContentProvider, #
57
+ depth: int, #
58
+ recursive_limit_size: int) -> Optional[List[Candidate]]:
59
+ """Tries to scan each row as structure with column name in key"""
60
+ try:
61
+ if rows := self.get_structure(data_provider.text):
62
+ struct_content_provider = StructContentProvider(struct=rows,
63
+ file_path=data_provider.file_path,
64
+ file_type=data_provider.file_type,
65
+ info=f"{data_provider.info}|CSV")
66
+ new_limit = recursive_limit_size - sum(len(x) for x in rows)
67
+ struct_candidates = self.structure_scan(struct_content_provider, depth, new_limit)
68
+ return struct_candidates
69
+ except Exception as csv_exc:
70
+ logger.debug(f"{data_provider.file_path}:{csv_exc}")
71
+ return None
@@ -4,7 +4,7 @@ from abc import ABC
4
4
  from typing import List, Optional, Generator, Tuple
5
5
 
6
6
  from credsweeper.common.constants import MIN_DATA_LEN, UTF_8
7
- from credsweeper.credentials import Candidate
7
+ from credsweeper.credentials.candidate import Candidate
8
8
  from credsweeper.deep_scanner.abstract_scanner import AbstractScanner
9
9
  from credsweeper.file_handler.data_content_provider import DataContentProvider
10
10
  from credsweeper.utils.util import Util
@@ -1,12 +1,12 @@
1
1
  import logging
2
2
  from typing import List, Any, Tuple
3
3
 
4
- from credsweeper.common.constants import MIN_DATA_LEN
5
- from credsweeper.config import Config
6
- from credsweeper.scanner import Scanner
7
- from credsweeper.utils import Util
4
+ from credsweeper.config.config import Config
5
+ from credsweeper.scanner.scanner import Scanner
6
+ from credsweeper.utils.util import Util
8
7
  from .byte_scanner import ByteScanner
9
8
  from .bzip2_scanner import Bzip2Scanner
9
+ from .csv_scanner import CsvScanner
10
10
  from .deb_scanner import DebScanner
11
11
  from .docx_scanner import DocxScanner
12
12
  from .eml_scanner import EmlScanner
@@ -23,7 +23,9 @@ from .pdf_scanner import PdfScanner
23
23
  from .pkcs_scanner import PkcsScanner
24
24
  from .pptx_scanner import PptxScanner
25
25
  from .rpm_scanner import RpmScanner
26
+ from .rtf_scanner import RtfScanner
26
27
  from .sqlite3_scanner import Sqlite3Scanner
28
+ from .strings_scanner import StringsScanner
27
29
  from .tar_scanner import TarScanner
28
30
  from .tmx_scanner import TmxScanner
29
31
  from .xlsx_scanner import XlsxScanner
@@ -38,6 +40,7 @@ class DeepScanner(
38
40
  ByteScanner, #
39
41
  Bzip2Scanner, #
40
42
  DocxScanner, #
43
+ CsvScanner, #
41
44
  EncoderScanner, #
42
45
  GzipScanner, #
43
46
  HtmlScanner, #
@@ -49,8 +52,10 @@ class DeepScanner(
49
52
  PdfScanner, #
50
53
  PkcsScanner, #
51
54
  PptxScanner, #
55
+ RtfScanner, #
52
56
  RpmScanner, #
53
57
  Sqlite3Scanner, #
58
+ StringsScanner, #
54
59
  TarScanner, #
55
60
  DebScanner, #
56
61
  XmlScanner, #
@@ -133,6 +138,9 @@ class DeepScanner(
133
138
  deep_scanners.append(Sqlite3Scanner)
134
139
  elif Util.is_asn1(data):
135
140
  deep_scanners.append(PkcsScanner)
141
+ elif Util.is_rtf(data):
142
+ deep_scanners.append(RtfScanner)
143
+ fallback_scanners.append(ByteScanner)
136
144
  elif Util.is_xml(data):
137
145
  if Util.is_html(data):
138
146
  deep_scanners.append(HtmlScanner)
@@ -150,24 +158,26 @@ class DeepScanner(
150
158
  deep_scanners.append(XmlScanner)
151
159
  fallback_scanners.append(ByteScanner)
152
160
  elif Util.is_eml(data):
153
- if ".eml" == descriptor.extension:
161
+ if descriptor.extension in (".eml", ".mht"):
154
162
  deep_scanners.append(EmlScanner)
155
163
  else:
156
164
  if 0 < depth:
157
- # formal patch looks like an eml
165
+ # a formal patch looks like an eml
158
166
  deep_scanners.append(PatchScanner)
159
167
  fallback_scanners.append(EmlScanner)
160
168
  fallback_scanners.append(ByteScanner)
161
- elif Util.is_known(data):
162
- # the format is known but cannot be scanned
163
- pass
164
169
  elif not Util.is_binary(data):
170
+ # keep ByteScanner first to apply real value position if possible
171
+ deep_scanners.append(ByteScanner)
165
172
  if 0 < depth:
166
173
  deep_scanners.append(PatchScanner)
167
174
  deep_scanners.append(EncoderScanner)
168
175
  deep_scanners.append(LangScanner)
169
- deep_scanners.append(ByteScanner)
176
+ deep_scanners.append(CsvScanner)
170
177
  else:
171
- logger.warning("Cannot apply a deep scanner for type %s prefix %s %d", descriptor,
172
- repr(data[:MIN_DATA_LEN]), len(data))
178
+ if 0 < depth:
179
+ deep_scanners.append(StringsScanner)
180
+ else:
181
+ logger.warning("Cannot apply a deep scanner for type %s prefix %s %d", descriptor, repr(data[:32]),
182
+ len(data))
173
183
  return deep_scanners, fallback_scanners
@@ -11,7 +11,7 @@ from docx.table import _Cell, Table
11
11
  from docx.text.paragraph import Paragraph
12
12
  from lxml.etree import _Element
13
13
 
14
- from credsweeper.credentials import Candidate
14
+ from credsweeper.credentials.candidate import Candidate
15
15
  from credsweeper.deep_scanner.abstract_scanner import AbstractScanner
16
16
  from credsweeper.file_handler.data_content_provider import DataContentProvider
17
17
  from credsweeper.file_handler.string_content_provider import StringContentProvider
@@ -3,7 +3,7 @@ import logging
3
3
  from abc import ABC
4
4
  from typing import List, Optional
5
5
 
6
- from credsweeper.credentials import Candidate
6
+ from credsweeper.credentials.candidate import Candidate
7
7
  from credsweeper.deep_scanner.abstract_scanner import AbstractScanner
8
8
  from credsweeper.file_handler.byte_content_provider import ByteContentProvider
9
9
  from credsweeper.file_handler.data_content_provider import DataContentProvider
@@ -2,7 +2,7 @@ import logging
2
2
  from abc import ABC
3
3
  from typing import List, Optional
4
4
 
5
- from credsweeper.credentials import Candidate
5
+ from credsweeper.credentials.candidate import Candidate
6
6
  from credsweeper.deep_scanner.abstract_scanner import AbstractScanner
7
7
  from credsweeper.file_handler.data_content_provider import DataContentProvider
8
8
 
@@ -5,10 +5,10 @@ from abc import ABC
5
5
  from pathlib import Path
6
6
  from typing import List, Optional
7
7
 
8
- from credsweeper.credentials import Candidate
8
+ from credsweeper.credentials.candidate import Candidate
9
9
  from credsweeper.deep_scanner.abstract_scanner import AbstractScanner
10
10
  from credsweeper.file_handler.data_content_provider import DataContentProvider
11
- from credsweeper.utils import Util
11
+ from credsweeper.utils.util import Util
12
12
 
13
13
  logger = logging.getLogger(__name__)
14
14
 
@@ -2,7 +2,7 @@ import logging
2
2
  from abc import ABC
3
3
  from typing import List, Optional
4
4
 
5
- from credsweeper.credentials import Candidate
5
+ from credsweeper.credentials.candidate import Candidate
6
6
  from credsweeper.deep_scanner.abstract_scanner import AbstractScanner
7
7
  from credsweeper.file_handler.data_content_provider import DataContentProvider
8
8
  from credsweeper.file_handler.string_content_provider import StringContentProvider
@@ -5,7 +5,7 @@ from abc import ABC
5
5
  from typing import List, Optional
6
6
 
7
7
  from credsweeper.common.constants import MIN_DATA_LEN, UTF_8
8
- from credsweeper.credentials import Candidate
8
+ from credsweeper.credentials.candidate import Candidate
9
9
  from credsweeper.deep_scanner.abstract_scanner import AbstractScanner
10
10
  from credsweeper.file_handler.data_content_provider import DataContentProvider
11
11
  from credsweeper.file_handler.struct_content_provider import StructContentProvider
@@ -4,7 +4,8 @@ from typing import List, Optional
4
4
 
5
5
  import jks
6
6
 
7
- from credsweeper.credentials import Candidate
7
+ from credsweeper.common.constants import Severity, Confidence
8
+ from credsweeper.credentials.candidate import Candidate
8
9
  from credsweeper.deep_scanner.abstract_scanner import AbstractScanner
9
10
  from credsweeper.file_handler.data_content_provider import DataContentProvider
10
11
 
@@ -24,14 +25,22 @@ class JksScanner(AbstractScanner, ABC):
24
25
  try:
25
26
  keystore = jks.KeyStore.loads(data_provider.data, pw_probe, try_decrypt_keys=True)
26
27
  # the password probe has passed, it will be the value
27
- info = (f"{data_provider.info}|JKS:"
28
- f"{'sensitive data' if keystore.private_keys or keystore.secret_keys else 'default password'}")
28
+ if keystore.private_keys or keystore.secret_keys:
29
+ severity = Severity.HIGH
30
+ confidence = Confidence.STRONG
31
+ info = f"{data_provider.info}|JKS:default password"
32
+ else:
33
+ severity = Severity.LOW
34
+ confidence = Confidence.WEAK
35
+ info = f"{data_provider.info}|JKS:sensitive data"
29
36
  candidate = Candidate.get_dummy_candidate(
30
37
  self.config, #
31
38
  data_provider.file_path, #
32
39
  data_provider.file_type, #
33
40
  info, #
34
41
  "Java Key Storage")
42
+ candidate.severity = severity
43
+ candidate.confidence = confidence
35
44
  value = pw_probe or "<EMPTY PASSWORD>"
36
45
  candidate.line_data_list[0].line = f"'{value}' is the password"
37
46
  candidate.line_data_list[0].value = pw_probe or "<EMPTY PASSWORD>"