credsweeper 1.11.5__py3-none-any.whl → 1.13.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of credsweeper might be problematic. Click here for more details.
- credsweeper/__init__.py +21 -15
- credsweeper/__main__.py +158 -42
- credsweeper/app.py +18 -13
- credsweeper/common/keyword_pattern.py +19 -18
- credsweeper/common/morpheme_checklist.txt +28 -6
- credsweeper/config/__init__.py +0 -1
- credsweeper/config/config.py +4 -3
- credsweeper/credentials/__init__.py +0 -5
- credsweeper/credentials/augment_candidates.py +1 -1
- credsweeper/credentials/candidate.py +1 -1
- credsweeper/credentials/credential_manager.py +1 -1
- credsweeper/credentials/line_data.py +43 -8
- credsweeper/deep_scanner/__init__.py +0 -1
- credsweeper/deep_scanner/abstract_scanner.py +4 -3
- credsweeper/deep_scanner/byte_scanner.py +1 -1
- credsweeper/deep_scanner/bzip2_scanner.py +2 -2
- credsweeper/deep_scanner/csv_scanner.py +71 -0
- credsweeper/deep_scanner/deb_scanner.py +1 -1
- credsweeper/deep_scanner/deep_scanner.py +22 -12
- credsweeper/deep_scanner/docx_scanner.py +1 -1
- credsweeper/deep_scanner/eml_scanner.py +1 -1
- credsweeper/deep_scanner/encoder_scanner.py +1 -1
- credsweeper/deep_scanner/gzip_scanner.py +2 -2
- credsweeper/deep_scanner/html_scanner.py +1 -1
- credsweeper/deep_scanner/jclass_scanner.py +1 -1
- credsweeper/deep_scanner/jks_scanner.py +12 -3
- credsweeper/deep_scanner/lang_scanner.py +1 -1
- credsweeper/deep_scanner/lzma_scanner.py +2 -2
- credsweeper/deep_scanner/mxfile_scanner.py +1 -1
- credsweeper/deep_scanner/pdf_scanner.py +1 -1
- credsweeper/deep_scanner/pkcs_scanner.py +6 -2
- credsweeper/deep_scanner/pptx_scanner.py +1 -1
- credsweeper/deep_scanner/rpm_scanner.py +1 -1
- credsweeper/deep_scanner/rtf_scanner.py +41 -0
- credsweeper/deep_scanner/strings_scanner.py +52 -0
- credsweeper/deep_scanner/tar_scanner.py +2 -2
- credsweeper/deep_scanner/tmx_scanner.py +2 -2
- credsweeper/deep_scanner/xlsx_scanner.py +2 -2
- credsweeper/deep_scanner/xml_scanner.py +1 -1
- credsweeper/deep_scanner/zip_scanner.py +2 -2
- credsweeper/file_handler/__init__.py +0 -15
- credsweeper/file_handler/abstract_provider.py +3 -4
- credsweeper/file_handler/byte_content_provider.py +11 -2
- credsweeper/file_handler/content_provider.py +1 -1
- credsweeper/file_handler/data_content_provider.py +1 -1
- credsweeper/file_handler/diff_content_provider.py +133 -3
- credsweeper/file_handler/file_path_extractor.py +4 -2
- credsweeper/file_handler/files_provider.py +4 -4
- credsweeper/file_handler/patches_provider.py +7 -8
- credsweeper/file_handler/text_content_provider.py +8 -2
- credsweeper/filters/__init__.py +3 -4
- credsweeper/filters/filter.py +5 -3
- credsweeper/filters/group/__init__.py +0 -2
- credsweeper/filters/group/general_keyword.py +2 -2
- credsweeper/filters/group/general_pattern.py +2 -2
- credsweeper/filters/group/group.py +38 -36
- credsweeper/filters/group/password_keyword.py +9 -8
- credsweeper/filters/group/token_pattern.py +5 -5
- credsweeper/filters/group/url_credentials_group.py +8 -8
- credsweeper/filters/group/weird_base36_token.py +6 -6
- credsweeper/filters/group/weird_base64_token.py +5 -5
- credsweeper/filters/line_git_binary_check.py +5 -4
- credsweeper/filters/line_specific_key_check.py +6 -5
- credsweeper/filters/line_uue_part_check.py +5 -4
- credsweeper/filters/value_allowlist_check.py +6 -5
- credsweeper/filters/value_array_dictionary_check.py +8 -6
- credsweeper/filters/value_atlassian_token_check.py +6 -5
- credsweeper/filters/value_azure_token_check.py +6 -5
- credsweeper/filters/value_base32_data_check.py +8 -5
- credsweeper/filters/value_base64_data_check.py +6 -5
- credsweeper/filters/value_base64_encoded_pem_check.py +6 -5
- credsweeper/filters/value_base64_key_check.py +6 -5
- credsweeper/filters/value_base64_part_check.py +6 -5
- credsweeper/filters/value_basic_auth_check.py +37 -0
- credsweeper/filters/value_blocklist_check.py +6 -4
- credsweeper/filters/value_camel_case_check.py +8 -7
- credsweeper/filters/value_dictionary_keyword_check.py +6 -4
- credsweeper/filters/value_discord_bot_check.py +6 -5
- credsweeper/filters/value_entropy_base_check.py +6 -5
- credsweeper/filters/value_file_path_check.py +13 -8
- credsweeper/filters/value_github_check.py +8 -6
- credsweeper/filters/value_grafana_check.py +6 -5
- credsweeper/filters/value_grafana_service_check.py +5 -4
- credsweeper/filters/value_hex_number_check.py +5 -4
- credsweeper/filters/value_jfrog_token_check.py +6 -5
- credsweeper/filters/value_json_web_key_check.py +6 -5
- credsweeper/filters/value_json_web_token_check.py +6 -5
- credsweeper/filters/value_last_word_check.py +6 -4
- credsweeper/filters/{value_dictionary_value_length_check.py → value_length_check.py} +12 -6
- credsweeper/filters/value_method_check.py +5 -4
- credsweeper/filters/value_morphemes_check.py +43 -0
- credsweeper/filters/value_not_allowed_pattern_check.py +6 -5
- credsweeper/filters/value_not_part_encoded_check.py +4 -4
- credsweeper/filters/value_number_check.py +5 -4
- credsweeper/filters/value_pattern_check.py +61 -41
- credsweeper/filters/value_similarity_check.py +6 -4
- credsweeper/filters/value_split_keyword_check.py +5 -4
- credsweeper/filters/value_string_type_check.py +10 -7
- credsweeper/filters/value_token_base_check.py +5 -4
- credsweeper/filters/value_token_check.py +6 -5
- credsweeper/logger/__init__.py +0 -1
- credsweeper/logger/logger.py +1 -1
- credsweeper/ml_model/__init__.py +0 -1
- credsweeper/ml_model/features/__init__.py +1 -0
- credsweeper/ml_model/features/entropy_evaluation.py +1 -1
- credsweeper/ml_model/features/feature.py +2 -19
- credsweeper/ml_model/features/file_extension.py +2 -2
- credsweeper/ml_model/features/has_html_tag.py +12 -10
- credsweeper/ml_model/features/is_secret_numeric.py +5 -4
- credsweeper/ml_model/features/length_of_attribute.py +1 -1
- credsweeper/ml_model/features/morpheme_dense.py +15 -8
- credsweeper/ml_model/features/rule_name.py +2 -2
- credsweeper/ml_model/features/rule_severity.py +21 -0
- credsweeper/ml_model/features/search_in_attribute.py +1 -1
- credsweeper/ml_model/features/word_in.py +10 -33
- credsweeper/ml_model/features/word_in_path.py +6 -4
- credsweeper/ml_model/features/word_in_postamble.py +2 -5
- credsweeper/ml_model/features/word_in_preamble.py +2 -5
- credsweeper/ml_model/features/word_in_transition.py +2 -5
- credsweeper/ml_model/features/word_in_value.py +3 -4
- credsweeper/ml_model/features/word_in_variable.py +3 -4
- credsweeper/ml_model/ml_config.json +140 -27
- credsweeper/ml_model/ml_model.onnx +0 -0
- credsweeper/ml_model/ml_validator.py +4 -3
- credsweeper/rules/__init__.py +0 -1
- credsweeper/rules/config.yaml +329 -239
- credsweeper/rules/rule.py +4 -3
- credsweeper/scanner/__init__.py +0 -1
- credsweeper/scanner/scan_type/__init__.py +0 -5
- credsweeper/scanner/scan_type/multi_pattern.py +4 -4
- credsweeper/scanner/scan_type/pem_key_pattern.py +4 -4
- credsweeper/scanner/scan_type/scan_type.py +4 -4
- credsweeper/scanner/scan_type/single_pattern.py +4 -4
- credsweeper/scanner/scanner.py +24 -15
- credsweeper/secret/config.json +19 -6
- credsweeper/utils/__init__.py +0 -1
- credsweeper/utils/pem_key_detector.py +3 -3
- credsweeper/utils/util.py +24 -150
- {credsweeper-1.11.5.dist-info → credsweeper-1.13.3.dist-info}/METADATA +7 -7
- credsweeper-1.13.3.dist-info/RECORD +164 -0
- credsweeper/filters/value_couple_keyword_check.py +0 -26
- credsweeper-1.11.5.dist-info/RECORD +0 -159
- {credsweeper-1.11.5.dist-info → credsweeper-1.13.3.dist-info}/WHEEL +0 -0
- {credsweeper-1.11.5.dist-info → credsweeper-1.13.3.dist-info}/entry_points.txt +0 -0
- {credsweeper-1.11.5.dist-info → credsweeper-1.13.3.dist-info}/licenses/LICENSE +0 -0
|
@@ -14,11 +14,15 @@
|
|
|
14
14
|
/var
|
|
15
15
|
000
|
|
16
16
|
111
|
|
17
|
+
14159265
|
|
18
|
+
18284590
|
|
17
19
|
222
|
|
18
20
|
333
|
|
19
21
|
444
|
|
20
22
|
555
|
|
23
|
+
65358979
|
|
21
24
|
666
|
|
25
|
+
71828182
|
|
22
26
|
777
|
|
23
27
|
80211
|
|
24
28
|
888
|
|
@@ -195,7 +199,7 @@ aux
|
|
|
195
199
|
avail
|
|
196
200
|
avatar
|
|
197
201
|
aver
|
|
198
|
-
|
|
202
|
+
awesom
|
|
199
203
|
axis
|
|
200
204
|
azure
|
|
201
205
|
back
|
|
@@ -227,12 +231,14 @@ bind
|
|
|
227
231
|
bio
|
|
228
232
|
bipol
|
|
229
233
|
bit
|
|
234
|
+
bixby
|
|
230
235
|
black
|
|
231
236
|
blan
|
|
232
237
|
bless
|
|
233
238
|
blic
|
|
234
239
|
blish
|
|
235
240
|
blob
|
|
241
|
+
blood
|
|
236
242
|
blue
|
|
237
243
|
board
|
|
238
244
|
bob
|
|
@@ -243,7 +249,7 @@ boost
|
|
|
243
249
|
boot
|
|
244
250
|
boss
|
|
245
251
|
bot
|
|
246
|
-
|
|
252
|
+
boun
|
|
247
253
|
box
|
|
248
254
|
branch
|
|
249
255
|
break
|
|
@@ -497,6 +503,7 @@ dust
|
|
|
497
503
|
dvb
|
|
498
504
|
dynamic
|
|
499
505
|
dynamo
|
|
506
|
+
eadbee
|
|
500
507
|
easin
|
|
501
508
|
easy
|
|
502
509
|
ecdhe
|
|
@@ -607,6 +614,7 @@ fleet
|
|
|
607
614
|
flick
|
|
608
615
|
flix
|
|
609
616
|
float
|
|
617
|
+
flood
|
|
610
618
|
floor
|
|
611
619
|
fluent
|
|
612
620
|
fluid
|
|
@@ -615,7 +623,7 @@ focus
|
|
|
615
623
|
foo
|
|
616
624
|
for
|
|
617
625
|
fossil
|
|
618
|
-
|
|
626
|
+
foun
|
|
619
627
|
fpga
|
|
620
628
|
frame
|
|
621
629
|
free
|
|
@@ -648,6 +656,7 @@ git
|
|
|
648
656
|
given
|
|
649
657
|
global
|
|
650
658
|
gobble
|
|
659
|
+
good
|
|
651
660
|
google
|
|
652
661
|
grab
|
|
653
662
|
grace
|
|
@@ -703,6 +712,7 @@ home
|
|
|
703
712
|
hook
|
|
704
713
|
horizon
|
|
705
714
|
host
|
|
715
|
+
houn
|
|
706
716
|
hours
|
|
707
717
|
html
|
|
708
718
|
http
|
|
@@ -789,6 +799,7 @@ jpg_
|
|
|
789
799
|
json
|
|
790
800
|
jump
|
|
791
801
|
justif
|
|
802
|
+
kafka
|
|
792
803
|
kerberos
|
|
793
804
|
kernel
|
|
794
805
|
key
|
|
@@ -797,6 +808,8 @@ kill
|
|
|
797
808
|
kind
|
|
798
809
|
kinesis
|
|
799
810
|
kirk
|
|
811
|
+
know
|
|
812
|
+
knox
|
|
800
813
|
kris
|
|
801
814
|
lab
|
|
802
815
|
lag
|
|
@@ -853,7 +866,7 @@ local
|
|
|
853
866
|
lock
|
|
854
867
|
log
|
|
855
868
|
long
|
|
856
|
-
|
|
869
|
+
look
|
|
857
870
|
loop
|
|
858
871
|
loose
|
|
859
872
|
lost
|
|
@@ -946,6 +959,7 @@ ndow
|
|
|
946
959
|
ned
|
|
947
960
|
need
|
|
948
961
|
neigh
|
|
962
|
+
neo4j
|
|
949
963
|
ner
|
|
950
964
|
net
|
|
951
965
|
neutr
|
|
@@ -990,6 +1004,7 @@ oncat
|
|
|
990
1004
|
one
|
|
991
1005
|
onfig
|
|
992
1006
|
only
|
|
1007
|
+
ookup
|
|
993
1008
|
open
|
|
994
1009
|
opt/
|
|
995
1010
|
opted
|
|
@@ -1007,6 +1022,7 @@ ormat
|
|
|
1007
1022
|
orph
|
|
1008
1023
|
otorola
|
|
1009
1024
|
ottle
|
|
1025
|
+
ound
|
|
1010
1026
|
ously
|
|
1011
1027
|
out
|
|
1012
1028
|
over
|
|
@@ -1066,6 +1082,7 @@ pose
|
|
|
1066
1082
|
posit
|
|
1067
1083
|
possib
|
|
1068
1084
|
post
|
|
1085
|
+
poun
|
|
1069
1086
|
power
|
|
1070
1087
|
pre_
|
|
1071
1088
|
pred
|
|
@@ -1210,7 +1227,7 @@ rotat
|
|
|
1210
1227
|
rotocol
|
|
1211
1228
|
rottl
|
|
1212
1229
|
rough
|
|
1213
|
-
|
|
1230
|
+
roun
|
|
1214
1231
|
roup
|
|
1215
1232
|
row
|
|
1216
1233
|
rroga
|
|
@@ -1222,6 +1239,7 @@ run
|
|
|
1222
1239
|
rxtx
|
|
1223
1240
|
sabl
|
|
1224
1241
|
sage
|
|
1242
|
+
salt
|
|
1225
1243
|
same
|
|
1226
1244
|
sampl
|
|
1227
1245
|
sams
|
|
@@ -1315,9 +1333,10 @@ sock
|
|
|
1315
1333
|
soft
|
|
1316
1334
|
solid
|
|
1317
1335
|
solve
|
|
1336
|
+
some
|
|
1318
1337
|
sony
|
|
1319
1338
|
sort
|
|
1320
|
-
|
|
1339
|
+
soun
|
|
1321
1340
|
source
|
|
1322
1341
|
space
|
|
1323
1342
|
spacing
|
|
@@ -1427,6 +1446,7 @@ tio
|
|
|
1427
1446
|
tish
|
|
1428
1447
|
title
|
|
1429
1448
|
titud
|
|
1449
|
+
tizen
|
|
1430
1450
|
tmp/
|
|
1431
1451
|
to_
|
|
1432
1452
|
tod
|
|
@@ -1438,6 +1458,7 @@ topic
|
|
|
1438
1458
|
tory
|
|
1439
1459
|
total
|
|
1440
1460
|
touch
|
|
1461
|
+
tour
|
|
1441
1462
|
trace
|
|
1442
1463
|
tract
|
|
1443
1464
|
traffic
|
|
@@ -1571,6 +1592,7 @@ yield
|
|
|
1571
1592
|
you
|
|
1572
1593
|
zeppelin
|
|
1573
1594
|
zero
|
|
1595
|
+
zigbee
|
|
1574
1596
|
zing
|
|
1575
1597
|
zona
|
|
1576
1598
|
zorro
|
credsweeper/config/__init__.py
CHANGED
|
@@ -1 +0,0 @@
|
|
|
1
|
-
from credsweeper.config.config import Config
|
credsweeper/config/config.py
CHANGED
|
@@ -4,7 +4,7 @@ from typing import Dict, List, Optional, Set, Any
|
|
|
4
4
|
from humanfriendly import parse_size
|
|
5
5
|
|
|
6
6
|
from credsweeper.common.constants import Severity, DEFAULT_PATTERN_LEN
|
|
7
|
-
from credsweeper.utils import Util
|
|
7
|
+
from credsweeper.utils.util import Util
|
|
8
8
|
|
|
9
9
|
|
|
10
10
|
class Config:
|
|
@@ -35,12 +35,13 @@ class Config:
|
|
|
35
35
|
self.candidate_output: List[str] = config["candidate_output"]
|
|
36
36
|
self.find_by_ext: bool = config["find_by_ext"]
|
|
37
37
|
self.size_limit: Optional[int] = parse_size(config["size_limit"]) if config["size_limit"] is not None else None
|
|
38
|
+
self.pedantic: bool = bool(config["pedantic"])
|
|
38
39
|
self.depth: int = int(config["depth"])
|
|
39
40
|
self.doc: bool = config["doc"]
|
|
40
41
|
self.severity: Severity = Severity.get(config.get("severity"))
|
|
41
42
|
|
|
42
|
-
self.
|
|
43
|
-
self.
|
|
43
|
+
self.max_url_cred_value_length: int = int(config["max_url_cred_value_length"])
|
|
44
|
+
self.max_password_value_length: int = int(config["max_password_value_length"])
|
|
44
45
|
|
|
45
46
|
# Trim exclude patterns from space like characters
|
|
46
47
|
self.exclude_lines = set(line.strip() for line in self.exclude_lines)
|
|
@@ -1,5 +0,0 @@
|
|
|
1
|
-
from credsweeper.credentials.candidate import Candidate
|
|
2
|
-
from credsweeper.credentials.candidate_group_generator import CandidateGroupGenerator
|
|
3
|
-
from credsweeper.credentials.candidate_key import CandidateKey
|
|
4
|
-
from credsweeper.credentials.credential_manager import CredentialManager
|
|
5
|
-
from credsweeper.credentials.line_data import LineData
|
|
@@ -4,7 +4,7 @@ from json.encoder import py_encode_basestring_ascii
|
|
|
4
4
|
from typing import Any, Dict, List, Optional
|
|
5
5
|
|
|
6
6
|
from credsweeper.common.constants import Severity, Confidence
|
|
7
|
-
from credsweeper.config import Config
|
|
7
|
+
from credsweeper.config.config import Config
|
|
8
8
|
from credsweeper.credentials.line_data import LineData
|
|
9
9
|
|
|
10
10
|
|
|
@@ -2,7 +2,7 @@ import logging
|
|
|
2
2
|
from multiprocessing import Manager
|
|
3
3
|
from typing import List, Dict, Tuple
|
|
4
4
|
|
|
5
|
-
from credsweeper.credentials import Candidate
|
|
5
|
+
from credsweeper.credentials.candidate import Candidate
|
|
6
6
|
from credsweeper.credentials.candidate_group_generator import CandidateGroupGenerator, CandidateKey
|
|
7
7
|
|
|
8
8
|
logger = logging.getLogger(__name__)
|
|
@@ -8,8 +8,8 @@ from typing import Any, Dict, Optional, Tuple
|
|
|
8
8
|
from colorama import Fore, Style
|
|
9
9
|
|
|
10
10
|
from credsweeper.common.constants import MAX_LINE_LENGTH, UTF_8, StartEnd, ML_HUNK
|
|
11
|
-
from credsweeper.config import Config
|
|
12
|
-
from credsweeper.utils import Util
|
|
11
|
+
from credsweeper.config.config import Config
|
|
12
|
+
from credsweeper.utils.util import Util
|
|
13
13
|
|
|
14
14
|
|
|
15
15
|
class LineData:
|
|
@@ -137,12 +137,33 @@ class LineData:
|
|
|
137
137
|
|
|
138
138
|
def sanitize_value(self):
|
|
139
139
|
"""Clean found value from extra artifacts. Correct positions if changed."""
|
|
140
|
+
# process the quotation workaround before cached properties invocation
|
|
141
|
+
if not self.value_leftquote and not self.value_rightquote:
|
|
142
|
+
while self.value:
|
|
143
|
+
first_symbol_code = ord(self.value[0])
|
|
144
|
+
last_symbol_code = ord(self.value[-1])
|
|
145
|
+
if 0x2018 <= first_symbol_code <= 0x201B and 0x2018 <= last_symbol_code <= 0x201B:
|
|
146
|
+
self.value_leftquote = self.value_rightquote = "'"
|
|
147
|
+
self.value = self.value[:-1]
|
|
148
|
+
self.value_end -= 1
|
|
149
|
+
self.value = self.value[1:]
|
|
150
|
+
self.value_start += 1
|
|
151
|
+
elif 0x201C <= first_symbol_code <= 0x201F and 0x201C <= last_symbol_code <= 0x201F:
|
|
152
|
+
self.value_leftquote = self.value_rightquote = '"'
|
|
153
|
+
self.value = self.value[1:]
|
|
154
|
+
self.value_start += 1
|
|
155
|
+
self.value = self.value[:-1]
|
|
156
|
+
self.value_end -= 1
|
|
157
|
+
else:
|
|
158
|
+
break
|
|
159
|
+
|
|
140
160
|
if self.variable and self.value and not self.is_well_quoted_value:
|
|
141
161
|
# sanitize is actual step for keyword pattern only
|
|
142
162
|
_value = self.value
|
|
143
163
|
self.clean_url_parameters()
|
|
144
164
|
self.clean_bash_parameters()
|
|
145
165
|
self.clean_toml_parameters()
|
|
166
|
+
self.clean_tag_parameters()
|
|
146
167
|
if 0 <= self.value_start and 0 <= self.value_end and len(self.value) < len(_value):
|
|
147
168
|
start = _value.find(self.value)
|
|
148
169
|
self.value_start += start
|
|
@@ -176,15 +197,14 @@ class LineData:
|
|
|
176
197
|
If line seem to be a URL - split by & character.
|
|
177
198
|
Variable should be right most value after & or ? ([-1]). And value should be left most before & ([0])
|
|
178
199
|
"""
|
|
179
|
-
|
|
200
|
+
# skip sanitize in case of URL credential rule - the regex is mature enough
|
|
201
|
+
if self.check_url_part() and not self.variable.endswith("://"):
|
|
180
202
|
# all checks have passed - line before the value may be a URL
|
|
181
203
|
self.variable = self.variable.rsplit('&')[-1].rsplit('?')[-1].rsplit(';')[-1]
|
|
182
204
|
self.value = self.value.split('&', maxsplit=1)[0].split(';', maxsplit=1)[0].split('#', maxsplit=1)[0]
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
self.value = self.
|
|
186
|
-
if self._3d_escaped_separator:
|
|
187
|
-
self.value = self.url_percent_split.split(self.value)[0]
|
|
205
|
+
self.value = self.url_unicode_split.split(self.value)[0]
|
|
206
|
+
if self._3d_escaped_separator:
|
|
207
|
+
self.value = self.url_percent_split.split(self.value)[0]
|
|
188
208
|
|
|
189
209
|
def clean_bash_parameters(self) -> None:
|
|
190
210
|
"""Split variable and value by bash special characters, if line assumed to be CLI command."""
|
|
@@ -212,6 +232,21 @@ class LineData:
|
|
|
212
232
|
self.value = self.value[:-1]
|
|
213
233
|
cleaning_required = True
|
|
214
234
|
|
|
235
|
+
def clean_tag_parameters(self) -> None:
|
|
236
|
+
"""Remove closing tag from value if the opened is somewhere before in line"""
|
|
237
|
+
cleaning_required = self.value and self.value.endswith('>')
|
|
238
|
+
while cleaning_required:
|
|
239
|
+
closing_tag_pos = self.value.rfind("</")
|
|
240
|
+
if 0 <= closing_tag_pos:
|
|
241
|
+
# use `<a` to avoid tag parameters
|
|
242
|
+
opening_tag_prefix = f"<{self.value[closing_tag_pos + 2:-1]}"
|
|
243
|
+
if cleaning_required := (opening_tag_prefix not in self.value
|
|
244
|
+
and 0 <= self.line.find(opening_tag_prefix, 0, self.value_start)):
|
|
245
|
+
self.value = self.value[:closing_tag_pos]
|
|
246
|
+
cleaning_required = self.value and self.value.endswith('>')
|
|
247
|
+
else:
|
|
248
|
+
break
|
|
249
|
+
|
|
215
250
|
def sanitize_variable(self) -> None:
|
|
216
251
|
"""Remove trailing spaces, dashes and quotations around the variable. Correct position."""
|
|
217
252
|
sanitized_var_len = 0
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
from credsweeper.deep_scanner.deep_scanner import DeepScanner
|
|
@@ -6,9 +6,9 @@ from typing import List, Optional, Tuple, Any, Generator
|
|
|
6
6
|
|
|
7
7
|
from credsweeper.common.constants import RECURSIVE_SCAN_LIMITATION, MIN_DATA_LEN, DEFAULT_ENCODING, UTF_8, \
|
|
8
8
|
MIN_VALUE_LENGTH
|
|
9
|
-
from credsweeper.config import Config
|
|
10
|
-
from credsweeper.credentials import Candidate
|
|
9
|
+
from credsweeper.config.config import Config
|
|
11
10
|
from credsweeper.credentials.augment_candidates import augment_candidates
|
|
11
|
+
from credsweeper.credentials.candidate import Candidate
|
|
12
12
|
from credsweeper.file_handler.byte_content_provider import ByteContentProvider
|
|
13
13
|
from credsweeper.file_handler.content_provider import ContentProvider
|
|
14
14
|
from credsweeper.file_handler.data_content_provider import DataContentProvider
|
|
@@ -18,7 +18,7 @@ from credsweeper.file_handler.file_path_extractor import FilePathExtractor
|
|
|
18
18
|
from credsweeper.file_handler.string_content_provider import StringContentProvider
|
|
19
19
|
from credsweeper.file_handler.struct_content_provider import StructContentProvider
|
|
20
20
|
from credsweeper.file_handler.text_content_provider import TextContentProvider
|
|
21
|
-
from credsweeper.scanner import Scanner
|
|
21
|
+
from credsweeper.scanner.scanner import Scanner
|
|
22
22
|
|
|
23
23
|
logger = logging.getLogger(__name__)
|
|
24
24
|
|
|
@@ -51,6 +51,7 @@ class AbstractScanner(ABC):
|
|
|
51
51
|
@abstractmethod
|
|
52
52
|
def get_deep_scanners(data: bytes, descriptor: Descriptor, depth: int) -> Tuple[List[Any], List[Any]]:
|
|
53
53
|
"""Returns possibly scan methods for the data depends on content and fallback scanners"""
|
|
54
|
+
raise NotImplementedError(__name__)
|
|
54
55
|
|
|
55
56
|
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
|
|
56
57
|
|
|
@@ -2,7 +2,7 @@ import logging
|
|
|
2
2
|
from abc import ABC
|
|
3
3
|
from typing import List, Optional
|
|
4
4
|
|
|
5
|
-
from credsweeper.credentials import Candidate
|
|
5
|
+
from credsweeper.credentials.candidate import Candidate
|
|
6
6
|
from credsweeper.file_handler.byte_content_provider import ByteContentProvider
|
|
7
7
|
from credsweeper.file_handler.data_content_provider import DataContentProvider
|
|
8
8
|
from .abstract_scanner import AbstractScanner
|
|
@@ -4,10 +4,10 @@ from abc import ABC
|
|
|
4
4
|
from pathlib import Path
|
|
5
5
|
from typing import List, Optional
|
|
6
6
|
|
|
7
|
-
from credsweeper.credentials import Candidate
|
|
7
|
+
from credsweeper.credentials.candidate import Candidate
|
|
8
8
|
from credsweeper.deep_scanner.abstract_scanner import AbstractScanner
|
|
9
9
|
from credsweeper.file_handler.data_content_provider import DataContentProvider
|
|
10
|
-
from credsweeper.utils import Util
|
|
10
|
+
from credsweeper.utils.util import Util
|
|
11
11
|
|
|
12
12
|
logger = logging.getLogger(__name__)
|
|
13
13
|
|
|
@@ -0,0 +1,71 @@
|
|
|
1
|
+
import csv
|
|
2
|
+
import io
|
|
3
|
+
import logging
|
|
4
|
+
from abc import ABC
|
|
5
|
+
from typing import List, Optional, Dict, Any
|
|
6
|
+
|
|
7
|
+
from credsweeper.common.constants import MAX_LINE_LENGTH
|
|
8
|
+
from credsweeper.credentials.candidate import Candidate
|
|
9
|
+
from credsweeper.deep_scanner.abstract_scanner import AbstractScanner
|
|
10
|
+
from credsweeper.file_handler.data_content_provider import DataContentProvider
|
|
11
|
+
from credsweeper.file_handler.struct_content_provider import StructContentProvider
|
|
12
|
+
|
|
13
|
+
logger = logging.getLogger(__name__)
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
class CsvScanner(AbstractScanner, ABC):
|
|
17
|
+
"""Implements CSV scanning"""
|
|
18
|
+
|
|
19
|
+
sniffer = csv.Sniffer()
|
|
20
|
+
# do not use space as separator to avoid hallucinations
|
|
21
|
+
delimiters = ",;\t|\x1F"
|
|
22
|
+
|
|
23
|
+
@classmethod
|
|
24
|
+
def get_structure(cls, text: str) -> List[Dict[str, Any]]:
|
|
25
|
+
"""Reads a text as CSV standard with guessed dialect"""
|
|
26
|
+
# windows style \r\n
|
|
27
|
+
first_line_end = text.find('\r', 0, MAX_LINE_LENGTH)
|
|
28
|
+
line_terminator = "\r\n"
|
|
29
|
+
if 0 > first_line_end:
|
|
30
|
+
# unix style \n
|
|
31
|
+
first_line_end = text.find('\n', 0, MAX_LINE_LENGTH)
|
|
32
|
+
line_terminator = "\n"
|
|
33
|
+
if 0 > first_line_end:
|
|
34
|
+
raise ValueError(f"No suitable line end found in {MAX_LINE_LENGTH} symbols")
|
|
35
|
+
|
|
36
|
+
first_line = text[:first_line_end]
|
|
37
|
+
dialect = cls.sniffer.sniff(first_line, delimiters=cls.delimiters)
|
|
38
|
+
rows = []
|
|
39
|
+
reader = csv.DictReader(io.StringIO(text),
|
|
40
|
+
delimiter=dialect.delimiter,
|
|
41
|
+
lineterminator=line_terminator,
|
|
42
|
+
strict=True)
|
|
43
|
+
# check the constant columns number for all rows
|
|
44
|
+
fields_number = sum(1 for x in reader.fieldnames if x is not None)
|
|
45
|
+
for row in reader:
|
|
46
|
+
if not isinstance(row, dict):
|
|
47
|
+
raise ValueError(f"ERROR: wrong row '{row}'")
|
|
48
|
+
if len(row) != fields_number or any(x is None for x in row.values()):
|
|
49
|
+
# None means no separator used
|
|
50
|
+
raise ValueError(f"Different columns number in row '{row}' - mismatch {fields_number}")
|
|
51
|
+
rows.append(row)
|
|
52
|
+
return rows
|
|
53
|
+
|
|
54
|
+
def data_scan(
|
|
55
|
+
self, #
|
|
56
|
+
data_provider: DataContentProvider, #
|
|
57
|
+
depth: int, #
|
|
58
|
+
recursive_limit_size: int) -> Optional[List[Candidate]]:
|
|
59
|
+
"""Tries to scan each row as structure with column name in key"""
|
|
60
|
+
try:
|
|
61
|
+
if rows := self.get_structure(data_provider.text):
|
|
62
|
+
struct_content_provider = StructContentProvider(struct=rows,
|
|
63
|
+
file_path=data_provider.file_path,
|
|
64
|
+
file_type=data_provider.file_type,
|
|
65
|
+
info=f"{data_provider.info}|CSV")
|
|
66
|
+
new_limit = recursive_limit_size - sum(len(x) for x in rows)
|
|
67
|
+
struct_candidates = self.structure_scan(struct_content_provider, depth, new_limit)
|
|
68
|
+
return struct_candidates
|
|
69
|
+
except Exception as csv_exc:
|
|
70
|
+
logger.debug(f"{data_provider.file_path}:{csv_exc}")
|
|
71
|
+
return None
|
|
@@ -4,7 +4,7 @@ from abc import ABC
|
|
|
4
4
|
from typing import List, Optional, Generator, Tuple
|
|
5
5
|
|
|
6
6
|
from credsweeper.common.constants import MIN_DATA_LEN, UTF_8
|
|
7
|
-
from credsweeper.credentials import Candidate
|
|
7
|
+
from credsweeper.credentials.candidate import Candidate
|
|
8
8
|
from credsweeper.deep_scanner.abstract_scanner import AbstractScanner
|
|
9
9
|
from credsweeper.file_handler.data_content_provider import DataContentProvider
|
|
10
10
|
from credsweeper.utils.util import Util
|
|
@@ -1,12 +1,12 @@
|
|
|
1
1
|
import logging
|
|
2
2
|
from typing import List, Any, Tuple
|
|
3
3
|
|
|
4
|
-
from credsweeper.
|
|
5
|
-
from credsweeper.
|
|
6
|
-
from credsweeper.
|
|
7
|
-
from credsweeper.utils import Util
|
|
4
|
+
from credsweeper.config.config import Config
|
|
5
|
+
from credsweeper.scanner.scanner import Scanner
|
|
6
|
+
from credsweeper.utils.util import Util
|
|
8
7
|
from .byte_scanner import ByteScanner
|
|
9
8
|
from .bzip2_scanner import Bzip2Scanner
|
|
9
|
+
from .csv_scanner import CsvScanner
|
|
10
10
|
from .deb_scanner import DebScanner
|
|
11
11
|
from .docx_scanner import DocxScanner
|
|
12
12
|
from .eml_scanner import EmlScanner
|
|
@@ -23,7 +23,9 @@ from .pdf_scanner import PdfScanner
|
|
|
23
23
|
from .pkcs_scanner import PkcsScanner
|
|
24
24
|
from .pptx_scanner import PptxScanner
|
|
25
25
|
from .rpm_scanner import RpmScanner
|
|
26
|
+
from .rtf_scanner import RtfScanner
|
|
26
27
|
from .sqlite3_scanner import Sqlite3Scanner
|
|
28
|
+
from .strings_scanner import StringsScanner
|
|
27
29
|
from .tar_scanner import TarScanner
|
|
28
30
|
from .tmx_scanner import TmxScanner
|
|
29
31
|
from .xlsx_scanner import XlsxScanner
|
|
@@ -38,6 +40,7 @@ class DeepScanner(
|
|
|
38
40
|
ByteScanner, #
|
|
39
41
|
Bzip2Scanner, #
|
|
40
42
|
DocxScanner, #
|
|
43
|
+
CsvScanner, #
|
|
41
44
|
EncoderScanner, #
|
|
42
45
|
GzipScanner, #
|
|
43
46
|
HtmlScanner, #
|
|
@@ -49,8 +52,10 @@ class DeepScanner(
|
|
|
49
52
|
PdfScanner, #
|
|
50
53
|
PkcsScanner, #
|
|
51
54
|
PptxScanner, #
|
|
55
|
+
RtfScanner, #
|
|
52
56
|
RpmScanner, #
|
|
53
57
|
Sqlite3Scanner, #
|
|
58
|
+
StringsScanner, #
|
|
54
59
|
TarScanner, #
|
|
55
60
|
DebScanner, #
|
|
56
61
|
XmlScanner, #
|
|
@@ -133,6 +138,9 @@ class DeepScanner(
|
|
|
133
138
|
deep_scanners.append(Sqlite3Scanner)
|
|
134
139
|
elif Util.is_asn1(data):
|
|
135
140
|
deep_scanners.append(PkcsScanner)
|
|
141
|
+
elif Util.is_rtf(data):
|
|
142
|
+
deep_scanners.append(RtfScanner)
|
|
143
|
+
fallback_scanners.append(ByteScanner)
|
|
136
144
|
elif Util.is_xml(data):
|
|
137
145
|
if Util.is_html(data):
|
|
138
146
|
deep_scanners.append(HtmlScanner)
|
|
@@ -150,24 +158,26 @@ class DeepScanner(
|
|
|
150
158
|
deep_scanners.append(XmlScanner)
|
|
151
159
|
fallback_scanners.append(ByteScanner)
|
|
152
160
|
elif Util.is_eml(data):
|
|
153
|
-
if ".eml"
|
|
161
|
+
if descriptor.extension in (".eml", ".mht"):
|
|
154
162
|
deep_scanners.append(EmlScanner)
|
|
155
163
|
else:
|
|
156
164
|
if 0 < depth:
|
|
157
|
-
# formal patch looks like an eml
|
|
165
|
+
# a formal patch looks like an eml
|
|
158
166
|
deep_scanners.append(PatchScanner)
|
|
159
167
|
fallback_scanners.append(EmlScanner)
|
|
160
168
|
fallback_scanners.append(ByteScanner)
|
|
161
|
-
elif Util.is_known(data):
|
|
162
|
-
# the format is known but cannot be scanned
|
|
163
|
-
pass
|
|
164
169
|
elif not Util.is_binary(data):
|
|
170
|
+
# keep ByteScanner first to apply real value position if possible
|
|
171
|
+
deep_scanners.append(ByteScanner)
|
|
165
172
|
if 0 < depth:
|
|
166
173
|
deep_scanners.append(PatchScanner)
|
|
167
174
|
deep_scanners.append(EncoderScanner)
|
|
168
175
|
deep_scanners.append(LangScanner)
|
|
169
|
-
|
|
176
|
+
deep_scanners.append(CsvScanner)
|
|
170
177
|
else:
|
|
171
|
-
|
|
172
|
-
|
|
178
|
+
if 0 < depth:
|
|
179
|
+
deep_scanners.append(StringsScanner)
|
|
180
|
+
else:
|
|
181
|
+
logger.warning("Cannot apply a deep scanner for type %s prefix %s %d", descriptor, repr(data[:32]),
|
|
182
|
+
len(data))
|
|
173
183
|
return deep_scanners, fallback_scanners
|
|
@@ -11,7 +11,7 @@ from docx.table import _Cell, Table
|
|
|
11
11
|
from docx.text.paragraph import Paragraph
|
|
12
12
|
from lxml.etree import _Element
|
|
13
13
|
|
|
14
|
-
from credsweeper.credentials import Candidate
|
|
14
|
+
from credsweeper.credentials.candidate import Candidate
|
|
15
15
|
from credsweeper.deep_scanner.abstract_scanner import AbstractScanner
|
|
16
16
|
from credsweeper.file_handler.data_content_provider import DataContentProvider
|
|
17
17
|
from credsweeper.file_handler.string_content_provider import StringContentProvider
|
|
@@ -3,7 +3,7 @@ import logging
|
|
|
3
3
|
from abc import ABC
|
|
4
4
|
from typing import List, Optional
|
|
5
5
|
|
|
6
|
-
from credsweeper.credentials import Candidate
|
|
6
|
+
from credsweeper.credentials.candidate import Candidate
|
|
7
7
|
from credsweeper.deep_scanner.abstract_scanner import AbstractScanner
|
|
8
8
|
from credsweeper.file_handler.byte_content_provider import ByteContentProvider
|
|
9
9
|
from credsweeper.file_handler.data_content_provider import DataContentProvider
|
|
@@ -2,7 +2,7 @@ import logging
|
|
|
2
2
|
from abc import ABC
|
|
3
3
|
from typing import List, Optional
|
|
4
4
|
|
|
5
|
-
from credsweeper.credentials import Candidate
|
|
5
|
+
from credsweeper.credentials.candidate import Candidate
|
|
6
6
|
from credsweeper.deep_scanner.abstract_scanner import AbstractScanner
|
|
7
7
|
from credsweeper.file_handler.data_content_provider import DataContentProvider
|
|
8
8
|
|
|
@@ -5,10 +5,10 @@ from abc import ABC
|
|
|
5
5
|
from pathlib import Path
|
|
6
6
|
from typing import List, Optional
|
|
7
7
|
|
|
8
|
-
from credsweeper.credentials import Candidate
|
|
8
|
+
from credsweeper.credentials.candidate import Candidate
|
|
9
9
|
from credsweeper.deep_scanner.abstract_scanner import AbstractScanner
|
|
10
10
|
from credsweeper.file_handler.data_content_provider import DataContentProvider
|
|
11
|
-
from credsweeper.utils import Util
|
|
11
|
+
from credsweeper.utils.util import Util
|
|
12
12
|
|
|
13
13
|
logger = logging.getLogger(__name__)
|
|
14
14
|
|
|
@@ -2,7 +2,7 @@ import logging
|
|
|
2
2
|
from abc import ABC
|
|
3
3
|
from typing import List, Optional
|
|
4
4
|
|
|
5
|
-
from credsweeper.credentials import Candidate
|
|
5
|
+
from credsweeper.credentials.candidate import Candidate
|
|
6
6
|
from credsweeper.deep_scanner.abstract_scanner import AbstractScanner
|
|
7
7
|
from credsweeper.file_handler.data_content_provider import DataContentProvider
|
|
8
8
|
from credsweeper.file_handler.string_content_provider import StringContentProvider
|
|
@@ -5,7 +5,7 @@ from abc import ABC
|
|
|
5
5
|
from typing import List, Optional
|
|
6
6
|
|
|
7
7
|
from credsweeper.common.constants import MIN_DATA_LEN, UTF_8
|
|
8
|
-
from credsweeper.credentials import Candidate
|
|
8
|
+
from credsweeper.credentials.candidate import Candidate
|
|
9
9
|
from credsweeper.deep_scanner.abstract_scanner import AbstractScanner
|
|
10
10
|
from credsweeper.file_handler.data_content_provider import DataContentProvider
|
|
11
11
|
from credsweeper.file_handler.struct_content_provider import StructContentProvider
|
|
@@ -4,7 +4,8 @@ from typing import List, Optional
|
|
|
4
4
|
|
|
5
5
|
import jks
|
|
6
6
|
|
|
7
|
-
from credsweeper.
|
|
7
|
+
from credsweeper.common.constants import Severity, Confidence
|
|
8
|
+
from credsweeper.credentials.candidate import Candidate
|
|
8
9
|
from credsweeper.deep_scanner.abstract_scanner import AbstractScanner
|
|
9
10
|
from credsweeper.file_handler.data_content_provider import DataContentProvider
|
|
10
11
|
|
|
@@ -24,14 +25,22 @@ class JksScanner(AbstractScanner, ABC):
|
|
|
24
25
|
try:
|
|
25
26
|
keystore = jks.KeyStore.loads(data_provider.data, pw_probe, try_decrypt_keys=True)
|
|
26
27
|
# the password probe has passed, it will be the value
|
|
27
|
-
|
|
28
|
-
|
|
28
|
+
if keystore.private_keys or keystore.secret_keys:
|
|
29
|
+
severity = Severity.HIGH
|
|
30
|
+
confidence = Confidence.STRONG
|
|
31
|
+
info = f"{data_provider.info}|JKS:default password"
|
|
32
|
+
else:
|
|
33
|
+
severity = Severity.LOW
|
|
34
|
+
confidence = Confidence.WEAK
|
|
35
|
+
info = f"{data_provider.info}|JKS:sensitive data"
|
|
29
36
|
candidate = Candidate.get_dummy_candidate(
|
|
30
37
|
self.config, #
|
|
31
38
|
data_provider.file_path, #
|
|
32
39
|
data_provider.file_type, #
|
|
33
40
|
info, #
|
|
34
41
|
"Java Key Storage")
|
|
42
|
+
candidate.severity = severity
|
|
43
|
+
candidate.confidence = confidence
|
|
35
44
|
value = pw_probe or "<EMPTY PASSWORD>"
|
|
36
45
|
candidate.line_data_list[0].line = f"'{value}' is the password"
|
|
37
46
|
candidate.line_data_list[0].value = pw_probe or "<EMPTY PASSWORD>"
|