credsweeper 1.11.3__py3-none-any.whl → 1.11.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of credsweeper might be problematic. Click here for more details.
- credsweeper/__init__.py +1 -1
- credsweeper/__main__.py +1 -1
- credsweeper/app.py +21 -44
- credsweeper/common/constants.py +2 -5
- credsweeper/credentials/candidate_key.py +1 -1
- credsweeper/credentials/credential_manager.py +4 -3
- credsweeper/credentials/line_data.py +2 -5
- credsweeper/deep_scanner/abstract_scanner.py +269 -14
- credsweeper/deep_scanner/deb_scanner.py +55 -0
- credsweeper/deep_scanner/deep_scanner.py +39 -241
- credsweeper/deep_scanner/gzip_scanner.py +1 -1
- credsweeper/deep_scanner/jclass_scanner.py +74 -0
- credsweeper/deep_scanner/patch_scanner.py +48 -0
- credsweeper/deep_scanner/pkcs_scanner.py +41 -0
- credsweeper/deep_scanner/rpm_scanner.py +49 -0
- credsweeper/deep_scanner/sqlite3_scanner.py +79 -0
- credsweeper/file_handler/byte_content_provider.py +2 -2
- credsweeper/file_handler/content_provider.py +1 -1
- credsweeper/file_handler/data_content_provider.py +3 -4
- credsweeper/file_handler/diff_content_provider.py +2 -2
- credsweeper/file_handler/file_path_extractor.py +1 -1
- credsweeper/file_handler/files_provider.py +2 -4
- credsweeper/file_handler/patches_provider.py +5 -2
- credsweeper/file_handler/string_content_provider.py +2 -2
- credsweeper/file_handler/struct_content_provider.py +1 -1
- credsweeper/file_handler/text_content_provider.py +2 -2
- credsweeper/filters/__init__.py +1 -0
- credsweeper/filters/value_base64_encoded_pem_check.py +1 -1
- credsweeper/filters/value_base64_key_check.py +9 -14
- credsweeper/filters/value_entropy_base64_check.py +2 -6
- credsweeper/filters/value_json_web_key_check.py +37 -0
- credsweeper/filters/value_pattern_check.py +64 -16
- credsweeper/ml_model/features/file_extension.py +1 -1
- credsweeper/ml_model/ml_validator.py +43 -21
- credsweeper/rules/config.yaml +51 -9
- credsweeper/rules/rule.py +3 -3
- credsweeper/scanner/scan_type/multi_pattern.py +1 -2
- credsweeper/secret/config.json +6 -6
- credsweeper/utils/hop_stat.py +3 -3
- credsweeper/utils/pem_key_detector.py +6 -4
- credsweeper/utils/util.py +154 -79
- {credsweeper-1.11.3.dist-info → credsweeper-1.11.5.dist-info}/METADATA +3 -6
- {credsweeper-1.11.3.dist-info → credsweeper-1.11.5.dist-info}/RECORD +46 -40
- credsweeper/deep_scanner/pkcs12_scanner.py +0 -45
- {credsweeper-1.11.3.dist-info → credsweeper-1.11.5.dist-info}/WHEEL +0 -0
- {credsweeper-1.11.3.dist-info → credsweeper-1.11.5.dist-info}/entry_points.txt +0 -0
- {credsweeper-1.11.3.dist-info → credsweeper-1.11.5.dist-info}/licenses/LICENSE +0 -0
|
@@ -0,0 +1,79 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
import os.path
|
|
3
|
+
import sqlite3
|
|
4
|
+
import sys
|
|
5
|
+
import tempfile
|
|
6
|
+
from abc import ABC
|
|
7
|
+
from typing import List, Optional, Tuple, Any, Generator
|
|
8
|
+
|
|
9
|
+
from credsweeper.credentials.candidate import Candidate
|
|
10
|
+
from credsweeper.deep_scanner.abstract_scanner import AbstractScanner
|
|
11
|
+
from credsweeper.file_handler.data_content_provider import DataContentProvider
|
|
12
|
+
from credsweeper.file_handler.struct_content_provider import StructContentProvider
|
|
13
|
+
|
|
14
|
+
logger = logging.getLogger(__name__)
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
class Sqlite3Scanner(AbstractScanner, ABC):
|
|
18
|
+
"""Implements SQLite3 database scanning"""
|
|
19
|
+
|
|
20
|
+
@staticmethod
|
|
21
|
+
def __walk(sqlite3db) -> Generator[Tuple[str, Any], None, None]:
|
|
22
|
+
sqlite3db.row_factory = sqlite3.Row
|
|
23
|
+
cursor = sqlite3db.cursor()
|
|
24
|
+
cursor.execute("SELECT name FROM sqlite_master WHERE type='table' AND name NOT LIKE 'sqlite_%';")
|
|
25
|
+
for table in cursor.fetchall():
|
|
26
|
+
table_name = table[0]
|
|
27
|
+
try:
|
|
28
|
+
cursor.execute(f"SELECT * FROM {table_name}")
|
|
29
|
+
for row in cursor:
|
|
30
|
+
yield table_name, dict(row)
|
|
31
|
+
except sqlite3.DatabaseError as exc:
|
|
32
|
+
print(f"Error reading table {table_name}: {exc}")
|
|
33
|
+
|
|
34
|
+
@staticmethod
|
|
35
|
+
def walk_sqlite(data: bytes) -> Generator[Tuple[str, Any], None, None]:
|
|
36
|
+
"""Yields data from sqlite3 database"""
|
|
37
|
+
if 10 < sys.version_info.minor:
|
|
38
|
+
# Added in version 3.11
|
|
39
|
+
with sqlite3.connect(":memory:") as sqlite3db:
|
|
40
|
+
sqlite3db.deserialize(data) # type: ignore
|
|
41
|
+
yield from Sqlite3Scanner.__walk(sqlite3db)
|
|
42
|
+
elif "nt" != os.name:
|
|
43
|
+
# a tmpfile has to be used. TODO: remove when 3.10 will deprecate
|
|
44
|
+
with tempfile.NamedTemporaryFile(suffix=".sqlite") as t:
|
|
45
|
+
t.write(data)
|
|
46
|
+
t.flush()
|
|
47
|
+
with sqlite3.connect(t.name) as sqlite3db:
|
|
48
|
+
yield from Sqlite3Scanner.__walk(sqlite3db)
|
|
49
|
+
elif "nt" == os.name:
|
|
50
|
+
# windows trick. TODO: remove when 3.10 will deprecate
|
|
51
|
+
with tempfile.NamedTemporaryFile(delete=False, suffix=".sqlite") as t:
|
|
52
|
+
t.write(data)
|
|
53
|
+
t.flush()
|
|
54
|
+
sqlite3db = sqlite3.connect(t.name)
|
|
55
|
+
yield from Sqlite3Scanner.__walk(sqlite3db)
|
|
56
|
+
sqlite3db.close()
|
|
57
|
+
if os.path.exists(t.name):
|
|
58
|
+
os.remove(t.name)
|
|
59
|
+
|
|
60
|
+
def data_scan(
|
|
61
|
+
self, #
|
|
62
|
+
data_provider: DataContentProvider, #
|
|
63
|
+
depth: int, #
|
|
64
|
+
recursive_limit_size: int) -> Optional[List[Candidate]]:
|
|
65
|
+
"""Extracts data file from .ar (debian) archive and launches data_scan"""
|
|
66
|
+
try:
|
|
67
|
+
candidates: List[Candidate] = []
|
|
68
|
+
new_limit = recursive_limit_size - len(data_provider.data)
|
|
69
|
+
for table, row in self.walk_sqlite(data_provider.data):
|
|
70
|
+
struct_content_provider = StructContentProvider(struct=row,
|
|
71
|
+
file_path=data_provider.file_path,
|
|
72
|
+
file_type=data_provider.file_type,
|
|
73
|
+
info=f"{data_provider.info}|SQLite3.{table}")
|
|
74
|
+
if new_candidates := self.structure_scan(struct_content_provider, depth, new_limit):
|
|
75
|
+
candidates.extend(new_candidates)
|
|
76
|
+
return candidates
|
|
77
|
+
except Exception as exc:
|
|
78
|
+
logger.error(exc)
|
|
79
|
+
return None
|
|
@@ -32,10 +32,10 @@ class ByteContentProvider(ContentProvider):
|
|
|
32
32
|
def free(self) -> None:
|
|
33
33
|
"""free data after scan to reduce memory usage"""
|
|
34
34
|
self.__data = None
|
|
35
|
-
if
|
|
35
|
+
if "data" in self.__dict__:
|
|
36
36
|
delattr(self, "data")
|
|
37
37
|
self.__lines = None
|
|
38
|
-
if
|
|
38
|
+
if "lines" in self.__dict__:
|
|
39
39
|
delattr(self, "lines")
|
|
40
40
|
|
|
41
41
|
@cached_property
|
|
@@ -93,7 +93,7 @@ class ContentProvider(ABC):
|
|
|
93
93
|
if min_len > len(line.strip()):
|
|
94
94
|
# Ignore target if stripped part is too short for all types
|
|
95
95
|
continue
|
|
96
|
-
|
|
96
|
+
if MAX_LINE_LENGTH < len(line):
|
|
97
97
|
for chunk_start, chunk_end in Util.get_chunks(len(line)):
|
|
98
98
|
target = AnalysisTarget(
|
|
99
99
|
line_pos=line_pos, #
|
|
@@ -1,6 +1,5 @@
|
|
|
1
1
|
import json
|
|
2
2
|
import logging
|
|
3
|
-
import string
|
|
4
3
|
import warnings
|
|
5
4
|
from functools import cached_property
|
|
6
5
|
from typing import List, Optional, Any, Generator, Callable, Tuple
|
|
@@ -54,10 +53,10 @@ class DataContentProvider(ContentProvider):
|
|
|
54
53
|
def free(self) -> None:
|
|
55
54
|
"""free data after scan to reduce memory usage"""
|
|
56
55
|
self.__data = None
|
|
57
|
-
if
|
|
56
|
+
if "data" in self.__dict__:
|
|
58
57
|
delattr(self, "data")
|
|
59
58
|
self.__text = None
|
|
60
|
-
if
|
|
59
|
+
if "text" in self.__dict__:
|
|
61
60
|
delattr(self, "text")
|
|
62
61
|
self.structure = None
|
|
63
62
|
self.decoded = None
|
|
@@ -385,7 +384,7 @@ class DataContentProvider(ContentProvider):
|
|
|
385
384
|
return False
|
|
386
385
|
try:
|
|
387
386
|
self.decoded = Util.decode_base64( #
|
|
388
|
-
|
|
387
|
+
text=Util.PEM_CLEANING_PATTERN.sub(r'', self.text).replace('\\', ''), #
|
|
389
388
|
padding_safe=True, #
|
|
390
389
|
urlsafe_detect=True) #
|
|
391
390
|
except Exception as exc:
|
|
@@ -48,8 +48,8 @@ class DiffContentProvider(ContentProvider):
|
|
|
48
48
|
|
|
49
49
|
def free(self) -> None:
|
|
50
50
|
"""free data after scan to reduce memory usage"""
|
|
51
|
-
self.__diff =
|
|
52
|
-
if
|
|
51
|
+
self.__diff = []
|
|
52
|
+
if "diff" in self.__dict__:
|
|
53
53
|
delattr(self, "diff")
|
|
54
54
|
|
|
55
55
|
@staticmethod
|
|
@@ -162,7 +162,7 @@ class FilePathExtractor:
|
|
|
162
162
|
True when the file is oversize or less than MIN_DATA_LEN, or unsupported
|
|
163
163
|
"""
|
|
164
164
|
path = reference[1] if isinstance(reference, tuple) else reference
|
|
165
|
-
if isinstance(path, str
|
|
165
|
+
if isinstance(path, (str, Path)):
|
|
166
166
|
file_size = os.path.getsize(path)
|
|
167
167
|
elif isinstance(path, io.BytesIO):
|
|
168
168
|
current_pos = path.tell()
|
|
@@ -42,7 +42,7 @@ class FilesProvider(AbstractProvider):
|
|
|
42
42
|
"""
|
|
43
43
|
text_content_provider_list: List[Union[DiffContentProvider, TextContentProvider]] = []
|
|
44
44
|
for path in self.paths:
|
|
45
|
-
if isinstance(path, str
|
|
45
|
+
if isinstance(path, (str, Path)):
|
|
46
46
|
new_files = FilePathExtractor.get_file_paths(config, path)
|
|
47
47
|
if self.skip_ignored:
|
|
48
48
|
new_files = FilePathExtractor.apply_gitignore(new_files)
|
|
@@ -50,9 +50,7 @@ class FilesProvider(AbstractProvider):
|
|
|
50
50
|
text_content_provider_list.append(TextContentProvider(_file))
|
|
51
51
|
elif isinstance(path, io.BytesIO):
|
|
52
52
|
text_content_provider_list.append(TextContentProvider((":memory:", path)))
|
|
53
|
-
elif isinstance(path, tuple)
|
|
54
|
-
and (isinstance(path[0], str) or isinstance(path[0], Path)) \
|
|
55
|
-
and isinstance(path[1], io.BytesIO):
|
|
53
|
+
elif isinstance(path, tuple) and (isinstance(path[0], (str, Path))) and isinstance(path[1], io.BytesIO):
|
|
56
54
|
# suppose, all the files must be scanned
|
|
57
55
|
text_content_provider_list.append(TextContentProvider(path))
|
|
58
56
|
else:
|
|
@@ -3,12 +3,12 @@ import logging
|
|
|
3
3
|
from pathlib import Path
|
|
4
4
|
from typing import List, Union, Tuple, Sequence
|
|
5
5
|
|
|
6
|
-
from credsweeper import TextContentProvider
|
|
7
6
|
from credsweeper.common.constants import DiffRowType
|
|
8
7
|
from credsweeper.config import Config
|
|
9
8
|
from credsweeper.file_handler.abstract_provider import AbstractProvider
|
|
10
9
|
from credsweeper.file_handler.diff_content_provider import DiffContentProvider
|
|
11
10
|
from credsweeper.file_handler.file_path_extractor import FilePathExtractor
|
|
11
|
+
from credsweeper.file_handler.text_content_provider import TextContentProvider
|
|
12
12
|
from credsweeper.utils import Util
|
|
13
13
|
|
|
14
14
|
logger = logging.getLogger(__name__)
|
|
@@ -37,11 +37,14 @@ class PatchesProvider(AbstractProvider):
|
|
|
37
37
|
for file_path in self.paths:
|
|
38
38
|
if FilePathExtractor.check_file_size(config, file_path):
|
|
39
39
|
continue
|
|
40
|
-
if isinstance(file_path, str
|
|
40
|
+
if isinstance(file_path, (str, Path)):
|
|
41
41
|
raw_patches.append(Util.read_file(file_path))
|
|
42
42
|
elif isinstance(file_path, io.BytesIO):
|
|
43
43
|
the_patch = Util.decode_bytes(file_path.read())
|
|
44
44
|
raw_patches.append(the_patch)
|
|
45
|
+
elif isinstance(file_path, tuple) and 1 < len(file_path) and isinstance(file_path[1], io.BytesIO):
|
|
46
|
+
the_patch = Util.decode_bytes(file_path[1].read())
|
|
47
|
+
raw_patches.append(the_patch)
|
|
45
48
|
else:
|
|
46
49
|
logger.error(f"Unknown path type: {file_path}")
|
|
47
50
|
|
|
@@ -38,10 +38,10 @@ class StringContentProvider(ContentProvider):
|
|
|
38
38
|
def free(self) -> None:
|
|
39
39
|
"""free data after scan to reduce memory usage"""
|
|
40
40
|
self.__lines = []
|
|
41
|
-
if
|
|
41
|
+
if "lines" in self.__dict__:
|
|
42
42
|
delattr(self, "lines")
|
|
43
43
|
self.__line_numbers = []
|
|
44
|
-
if
|
|
44
|
+
if "line_numbers" in self.__dict__:
|
|
45
45
|
delattr(self, "line_numbers")
|
|
46
46
|
|
|
47
47
|
@cached_property
|
|
@@ -38,7 +38,7 @@ class StructContentProvider(ContentProvider):
|
|
|
38
38
|
def free(self) -> None:
|
|
39
39
|
"""free data after scan to reduce memory usage"""
|
|
40
40
|
self.__struct = None
|
|
41
|
-
if
|
|
41
|
+
if "struct" in self.__dict__:
|
|
42
42
|
delattr(self, "struct")
|
|
43
43
|
|
|
44
44
|
def yield_analysis_target(self, min_len: int) -> Generator[AnalysisTarget, None, None]:
|
|
@@ -42,10 +42,10 @@ class TextContentProvider(ContentProvider):
|
|
|
42
42
|
def free(self) -> None:
|
|
43
43
|
"""free data after scan to reduce memory usage"""
|
|
44
44
|
self.__data = None
|
|
45
|
-
if
|
|
45
|
+
if "data" in self.__dict__:
|
|
46
46
|
delattr(self, "data")
|
|
47
47
|
self.__lines = None
|
|
48
|
-
if
|
|
48
|
+
if "lines" in self.__dict__:
|
|
49
49
|
delattr(self, "lines")
|
|
50
50
|
if isinstance(self.__io, io.BytesIO) and self.__io and not self.__io.closed:
|
|
51
51
|
self.__io.close()
|
credsweeper/filters/__init__.py
CHANGED
|
@@ -27,6 +27,7 @@ from credsweeper.filters.value_grafana_check import ValueGrafanaCheck
|
|
|
27
27
|
from credsweeper.filters.value_grafana_service_check import ValueGrafanaServiceCheck
|
|
28
28
|
from credsweeper.filters.value_hex_number_check import ValueHexNumberCheck
|
|
29
29
|
from credsweeper.filters.value_jfrog_token_check import ValueJfrogTokenCheck
|
|
30
|
+
from credsweeper.filters.value_json_web_key_check import ValueJsonWebKeyCheck
|
|
30
31
|
from credsweeper.filters.value_json_web_token_check import ValueJsonWebTokenCheck
|
|
31
32
|
from credsweeper.filters.value_last_word_check import ValueLastWordCheck
|
|
32
33
|
from credsweeper.filters.value_method_check import ValueMethodCheck
|
|
@@ -30,7 +30,7 @@ class ValueBase64EncodedPem(Filter):
|
|
|
30
30
|
with contextlib.suppress(Exception):
|
|
31
31
|
text = Util.decode_base64(line_data.value, padding_safe=True, urlsafe_detect=True)
|
|
32
32
|
lines = text.decode(ASCII).splitlines()
|
|
33
|
-
lines_pos =
|
|
33
|
+
lines_pos = list(range(len(lines)))
|
|
34
34
|
for line_pos, line in zip(lines_pos, lines):
|
|
35
35
|
if PEM_BEGIN_PATTERN in line:
|
|
36
36
|
new_target = AnalysisTarget(line_pos, lines, lines_pos, target.descriptor)
|
|
@@ -1,7 +1,4 @@
|
|
|
1
1
|
import contextlib
|
|
2
|
-
import string
|
|
3
|
-
|
|
4
|
-
from cryptography.hazmat.primitives import serialization
|
|
5
2
|
|
|
6
3
|
from credsweeper.config import Config
|
|
7
4
|
from credsweeper.credentials import LineData
|
|
@@ -13,6 +10,8 @@ from credsweeper.utils import Util
|
|
|
13
10
|
class ValueBase64KeyCheck(Filter):
|
|
14
11
|
"""Check that candidate contains base64 encoded private key"""
|
|
15
12
|
|
|
13
|
+
EXTRA_TRANS_TABLE = str.maketrans('', '', "\",'\\")
|
|
14
|
+
|
|
16
15
|
def __init__(self, config: Config = None) -> None:
|
|
17
16
|
self.config = config
|
|
18
17
|
|
|
@@ -29,12 +28,10 @@ class ValueBase64KeyCheck(Filter):
|
|
|
29
28
|
"""
|
|
30
29
|
|
|
31
30
|
with contextlib.suppress(Exception):
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
for x in string.whitespace:
|
|
37
|
-
text = text.replace(x, '')
|
|
31
|
+
# remove backslash escaping sequences
|
|
32
|
+
text = Util.PEM_CLEANING_PATTERN.sub(r'', line_data.value)
|
|
33
|
+
# remove whitespaces
|
|
34
|
+
text = text.translate(Util.WHITESPACE_TRANS_TABLE)
|
|
38
35
|
# clean sequence concatenation case:
|
|
39
36
|
text = text.replace("'+'", '')
|
|
40
37
|
text = text.replace('"+"', '')
|
|
@@ -43,12 +40,10 @@ class ValueBase64KeyCheck(Filter):
|
|
|
43
40
|
text = text.replace('%2F', '/')
|
|
44
41
|
text = text.replace('%3D', '=')
|
|
45
42
|
# clean any other chars which should not appear
|
|
46
|
-
|
|
47
|
-
text = text.replace(x, "")
|
|
43
|
+
text = text.translate(ValueBase64KeyCheck.EXTRA_TRANS_TABLE)
|
|
48
44
|
# only PEM standard encoding supported in regex pattern to cut off ending of the key
|
|
49
45
|
key = Util.decode_base64(text, padding_safe=True, urlsafe_detect=False)
|
|
50
|
-
private_key =
|
|
51
|
-
if
|
|
52
|
-
# access to size field check - some types have no size
|
|
46
|
+
private_key = Util.load_pk(key, password=None)
|
|
47
|
+
if Util.check_pk(private_key):
|
|
53
48
|
return False
|
|
54
49
|
return True
|
|
@@ -19,12 +19,8 @@ class ValueEntropyBase64Check(ValueEntropyBaseCheck):
|
|
|
19
19
|
y = 0.944 * math.log2(x) - 0.009 * x - 0.04
|
|
20
20
|
elif 65 <= x < 256:
|
|
21
21
|
y = 0.621 * math.log2(x) - 0.003 * x + 1.54
|
|
22
|
-
elif 256 <= x
|
|
23
|
-
y =
|
|
24
|
-
elif 512 <= x < 1024:
|
|
25
|
-
y = 5.89
|
|
26
|
-
elif 1024 <= x:
|
|
27
|
-
y = 5.94
|
|
22
|
+
elif 256 <= x:
|
|
23
|
+
y = 6 - 64 / x
|
|
28
24
|
else:
|
|
29
25
|
y = 0
|
|
30
26
|
return y
|
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
import contextlib
|
|
2
|
+
|
|
3
|
+
from credsweeper.config import Config
|
|
4
|
+
from credsweeper.credentials import LineData
|
|
5
|
+
from credsweeper.file_handler.analysis_target import AnalysisTarget
|
|
6
|
+
from credsweeper.filters import Filter
|
|
7
|
+
from credsweeper.utils import Util
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class ValueJsonWebKeyCheck(Filter):
|
|
11
|
+
"""
|
|
12
|
+
Check that candidate is JWK which starts usually from 'e'
|
|
13
|
+
and have private parts of the key
|
|
14
|
+
https://datatracker.ietf.org/doc/html/rfc7517
|
|
15
|
+
https://datatracker.ietf.org/doc/html/rfc7518
|
|
16
|
+
"""
|
|
17
|
+
|
|
18
|
+
def __init__(self, config: Config = None) -> None:
|
|
19
|
+
pass
|
|
20
|
+
|
|
21
|
+
def run(self, line_data: LineData, target: AnalysisTarget) -> bool:
|
|
22
|
+
"""Run filter checks on received key which might be structured.
|
|
23
|
+
|
|
24
|
+
Args:
|
|
25
|
+
line_data: credential candidate data
|
|
26
|
+
target: multiline target from which line data was obtained
|
|
27
|
+
|
|
28
|
+
Return:
|
|
29
|
+
True, when need to filter candidate and False if left
|
|
30
|
+
|
|
31
|
+
"""
|
|
32
|
+
with contextlib.suppress(Exception):
|
|
33
|
+
if data := Util.decode_base64(line_data.value, padding_safe=True, urlsafe_detect=True):
|
|
34
|
+
if b'"kty":' in data and (b'"oct"' in data and b'"k":' in data or
|
|
35
|
+
(b'"EC"' in data or b'"RSA"' in data) and b'"d":' in data):
|
|
36
|
+
return False
|
|
37
|
+
return True
|
|
@@ -33,33 +33,33 @@ class ValuePatternCheck(Filter):
|
|
|
33
33
|
# use non whitespace symbol pattern
|
|
34
34
|
self.pattern = re.compile(fr"(\S)\1{{{str(self.pattern_len - 1)},}}")
|
|
35
35
|
|
|
36
|
-
def equal_pattern_check(self,
|
|
36
|
+
def equal_pattern_check(self, value: str) -> bool:
|
|
37
37
|
"""Check if candidate value contain 4 and more same chars or numbers sequences.
|
|
38
38
|
|
|
39
39
|
Args:
|
|
40
|
-
|
|
40
|
+
value: string variable, credential candidate value
|
|
41
41
|
|
|
42
42
|
Return:
|
|
43
43
|
True if contain and False if not
|
|
44
44
|
|
|
45
45
|
"""
|
|
46
|
-
if self.pattern.findall(
|
|
46
|
+
if self.pattern.findall(value):
|
|
47
47
|
return True
|
|
48
48
|
return False
|
|
49
49
|
|
|
50
|
-
def ascending_pattern_check(self,
|
|
50
|
+
def ascending_pattern_check(self, value: str) -> bool:
|
|
51
51
|
"""Check if candidate value contain 4 and more ascending chars or numbers sequences.
|
|
52
52
|
|
|
53
53
|
Arg:
|
|
54
|
-
|
|
54
|
+
value: credential candidate value
|
|
55
55
|
|
|
56
56
|
Return:
|
|
57
57
|
True if contain and False if not
|
|
58
58
|
|
|
59
59
|
"""
|
|
60
60
|
count = 1
|
|
61
|
-
for key in range(len(
|
|
62
|
-
if ord(
|
|
61
|
+
for key in range(len(value) - 1):
|
|
62
|
+
if ord(value[key + 1]) - ord(value[key]) == 1:
|
|
63
63
|
count += 1
|
|
64
64
|
else:
|
|
65
65
|
count = 1
|
|
@@ -68,19 +68,19 @@ class ValuePatternCheck(Filter):
|
|
|
68
68
|
return True
|
|
69
69
|
return False
|
|
70
70
|
|
|
71
|
-
def descending_pattern_check(self,
|
|
71
|
+
def descending_pattern_check(self, value: str) -> bool:
|
|
72
72
|
"""Check if candidate value contain 4 and more descending chars or numbers sequences.
|
|
73
73
|
|
|
74
74
|
Arg:
|
|
75
|
-
|
|
75
|
+
value: string variable, credential candidate value
|
|
76
76
|
|
|
77
77
|
Return:
|
|
78
78
|
boolean variable. True if contain and False if not
|
|
79
79
|
|
|
80
80
|
"""
|
|
81
81
|
count = 1
|
|
82
|
-
for key in range(len(
|
|
83
|
-
if ord(
|
|
82
|
+
for key in range(len(value) - 1):
|
|
83
|
+
if ord(value[key]) - ord(value[key + 1]) == 1:
|
|
84
84
|
count += 1
|
|
85
85
|
else:
|
|
86
86
|
count = 1
|
|
@@ -89,6 +89,57 @@ class ValuePatternCheck(Filter):
|
|
|
89
89
|
return True
|
|
90
90
|
return False
|
|
91
91
|
|
|
92
|
+
def check_val(self, value: str) -> bool:
|
|
93
|
+
"""Cumulative value check.
|
|
94
|
+
|
|
95
|
+
Arg:
|
|
96
|
+
value: string variable, credential candidate value
|
|
97
|
+
|
|
98
|
+
Return:
|
|
99
|
+
boolean variable. True if contain and False if not
|
|
100
|
+
|
|
101
|
+
"""
|
|
102
|
+
if self.equal_pattern_check(value):
|
|
103
|
+
return True
|
|
104
|
+
if self.ascending_pattern_check(value):
|
|
105
|
+
return True
|
|
106
|
+
if self.descending_pattern_check(value):
|
|
107
|
+
return True
|
|
108
|
+
return False
|
|
109
|
+
|
|
110
|
+
def duple_pattern_check(self, value: str) -> bool:
|
|
111
|
+
"""Check if candidate value is a duplet value with possible patterns.
|
|
112
|
+
|
|
113
|
+
Arg:
|
|
114
|
+
value: string variable, credential candidate value
|
|
115
|
+
|
|
116
|
+
Return:
|
|
117
|
+
boolean variable. True if contain and False if not
|
|
118
|
+
|
|
119
|
+
"""
|
|
120
|
+
# 001122334455... case
|
|
121
|
+
pair_duple = True
|
|
122
|
+
# 0102030405... case
|
|
123
|
+
even_duple = True
|
|
124
|
+
even_prev = value[0]
|
|
125
|
+
even_value = value[0::2]
|
|
126
|
+
# 1020304050... case
|
|
127
|
+
odd_duple = True
|
|
128
|
+
odd_prev = value[1]
|
|
129
|
+
odd_value = value[1::2]
|
|
130
|
+
for even_i, odd_i in zip(even_value, odd_value):
|
|
131
|
+
pair_duple &= even_i == odd_i
|
|
132
|
+
even_duple &= even_i == even_prev
|
|
133
|
+
odd_duple &= odd_i == odd_prev
|
|
134
|
+
if not pair_duple and not even_duple and not odd_duple:
|
|
135
|
+
break
|
|
136
|
+
else:
|
|
137
|
+
if pair_duple or odd_duple:
|
|
138
|
+
return self.check_val(even_value)
|
|
139
|
+
if even_duple:
|
|
140
|
+
return self.check_val(odd_value)
|
|
141
|
+
return False
|
|
142
|
+
|
|
92
143
|
def run(self, line_data: LineData, target: AnalysisTarget) -> bool:
|
|
93
144
|
"""Run filter checks on received credential candidate data 'line_data'.
|
|
94
145
|
|
|
@@ -103,13 +154,10 @@ class ValuePatternCheck(Filter):
|
|
|
103
154
|
if len(line_data.value) < self.pattern_len:
|
|
104
155
|
return True
|
|
105
156
|
|
|
106
|
-
if self.
|
|
107
|
-
return True
|
|
108
|
-
|
|
109
|
-
if self.ascending_pattern_check(line_data.value):
|
|
157
|
+
if self.check_val(line_data.value):
|
|
110
158
|
return True
|
|
111
159
|
|
|
112
|
-
if self.
|
|
160
|
+
if 2 * self.pattern_len <= len(line_data.value) and self.duple_pattern_check(line_data.value):
|
|
113
161
|
return True
|
|
114
162
|
|
|
115
163
|
return False
|
|
@@ -18,7 +18,7 @@ class FileExtension(WordIn):
|
|
|
18
18
|
super().__init__(words=extensions)
|
|
19
19
|
|
|
20
20
|
def __call__(self, candidates: List[Candidate]) -> np.ndarray:
|
|
21
|
-
extension_set = set(
|
|
21
|
+
extension_set = set(candidate.line_data_list[0].file_type.lower() for candidate in candidates)
|
|
22
22
|
return self.word_in_set(extension_set)
|
|
23
23
|
|
|
24
24
|
def extract(self, candidate: Candidate) -> Any:
|