credsweeper 1.11.5__py3-none-any.whl → 1.13.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of credsweeper might be problematic. Click here for more details.
- credsweeper/__init__.py +21 -15
- credsweeper/__main__.py +158 -42
- credsweeper/app.py +18 -13
- credsweeper/common/keyword_pattern.py +19 -18
- credsweeper/common/morpheme_checklist.txt +28 -6
- credsweeper/config/__init__.py +0 -1
- credsweeper/config/config.py +4 -3
- credsweeper/credentials/__init__.py +0 -5
- credsweeper/credentials/augment_candidates.py +1 -1
- credsweeper/credentials/candidate.py +1 -1
- credsweeper/credentials/credential_manager.py +1 -1
- credsweeper/credentials/line_data.py +43 -8
- credsweeper/deep_scanner/__init__.py +0 -1
- credsweeper/deep_scanner/abstract_scanner.py +4 -3
- credsweeper/deep_scanner/byte_scanner.py +1 -1
- credsweeper/deep_scanner/bzip2_scanner.py +2 -2
- credsweeper/deep_scanner/csv_scanner.py +71 -0
- credsweeper/deep_scanner/deb_scanner.py +1 -1
- credsweeper/deep_scanner/deep_scanner.py +22 -12
- credsweeper/deep_scanner/docx_scanner.py +1 -1
- credsweeper/deep_scanner/eml_scanner.py +1 -1
- credsweeper/deep_scanner/encoder_scanner.py +1 -1
- credsweeper/deep_scanner/gzip_scanner.py +2 -2
- credsweeper/deep_scanner/html_scanner.py +1 -1
- credsweeper/deep_scanner/jclass_scanner.py +1 -1
- credsweeper/deep_scanner/jks_scanner.py +12 -3
- credsweeper/deep_scanner/lang_scanner.py +1 -1
- credsweeper/deep_scanner/lzma_scanner.py +2 -2
- credsweeper/deep_scanner/mxfile_scanner.py +1 -1
- credsweeper/deep_scanner/pdf_scanner.py +1 -1
- credsweeper/deep_scanner/pkcs_scanner.py +6 -2
- credsweeper/deep_scanner/pptx_scanner.py +1 -1
- credsweeper/deep_scanner/rpm_scanner.py +1 -1
- credsweeper/deep_scanner/rtf_scanner.py +41 -0
- credsweeper/deep_scanner/strings_scanner.py +52 -0
- credsweeper/deep_scanner/tar_scanner.py +2 -2
- credsweeper/deep_scanner/tmx_scanner.py +2 -2
- credsweeper/deep_scanner/xlsx_scanner.py +2 -2
- credsweeper/deep_scanner/xml_scanner.py +1 -1
- credsweeper/deep_scanner/zip_scanner.py +2 -2
- credsweeper/file_handler/__init__.py +0 -15
- credsweeper/file_handler/abstract_provider.py +3 -4
- credsweeper/file_handler/byte_content_provider.py +11 -2
- credsweeper/file_handler/content_provider.py +1 -1
- credsweeper/file_handler/data_content_provider.py +1 -1
- credsweeper/file_handler/diff_content_provider.py +133 -3
- credsweeper/file_handler/file_path_extractor.py +4 -2
- credsweeper/file_handler/files_provider.py +4 -4
- credsweeper/file_handler/patches_provider.py +7 -8
- credsweeper/file_handler/text_content_provider.py +8 -2
- credsweeper/filters/__init__.py +3 -4
- credsweeper/filters/filter.py +5 -3
- credsweeper/filters/group/__init__.py +0 -2
- credsweeper/filters/group/general_keyword.py +2 -2
- credsweeper/filters/group/general_pattern.py +2 -2
- credsweeper/filters/group/group.py +38 -36
- credsweeper/filters/group/password_keyword.py +9 -8
- credsweeper/filters/group/token_pattern.py +5 -5
- credsweeper/filters/group/url_credentials_group.py +8 -8
- credsweeper/filters/group/weird_base36_token.py +6 -6
- credsweeper/filters/group/weird_base64_token.py +5 -5
- credsweeper/filters/line_git_binary_check.py +5 -4
- credsweeper/filters/line_specific_key_check.py +6 -5
- credsweeper/filters/line_uue_part_check.py +5 -4
- credsweeper/filters/value_allowlist_check.py +6 -5
- credsweeper/filters/value_array_dictionary_check.py +8 -6
- credsweeper/filters/value_atlassian_token_check.py +6 -5
- credsweeper/filters/value_azure_token_check.py +6 -5
- credsweeper/filters/value_base32_data_check.py +8 -5
- credsweeper/filters/value_base64_data_check.py +6 -5
- credsweeper/filters/value_base64_encoded_pem_check.py +6 -5
- credsweeper/filters/value_base64_key_check.py +6 -5
- credsweeper/filters/value_base64_part_check.py +6 -5
- credsweeper/filters/value_basic_auth_check.py +37 -0
- credsweeper/filters/value_blocklist_check.py +6 -4
- credsweeper/filters/value_camel_case_check.py +8 -7
- credsweeper/filters/value_dictionary_keyword_check.py +6 -4
- credsweeper/filters/value_discord_bot_check.py +6 -5
- credsweeper/filters/value_entropy_base_check.py +6 -5
- credsweeper/filters/value_file_path_check.py +13 -8
- credsweeper/filters/value_github_check.py +8 -6
- credsweeper/filters/value_grafana_check.py +6 -5
- credsweeper/filters/value_grafana_service_check.py +5 -4
- credsweeper/filters/value_hex_number_check.py +5 -4
- credsweeper/filters/value_jfrog_token_check.py +6 -5
- credsweeper/filters/value_json_web_key_check.py +6 -5
- credsweeper/filters/value_json_web_token_check.py +6 -5
- credsweeper/filters/value_last_word_check.py +6 -4
- credsweeper/filters/{value_dictionary_value_length_check.py → value_length_check.py} +12 -6
- credsweeper/filters/value_method_check.py +5 -4
- credsweeper/filters/value_morphemes_check.py +43 -0
- credsweeper/filters/value_not_allowed_pattern_check.py +6 -5
- credsweeper/filters/value_not_part_encoded_check.py +4 -4
- credsweeper/filters/value_number_check.py +5 -4
- credsweeper/filters/value_pattern_check.py +61 -41
- credsweeper/filters/value_similarity_check.py +6 -4
- credsweeper/filters/value_split_keyword_check.py +5 -4
- credsweeper/filters/value_string_type_check.py +10 -7
- credsweeper/filters/value_token_base_check.py +5 -4
- credsweeper/filters/value_token_check.py +6 -5
- credsweeper/logger/__init__.py +0 -1
- credsweeper/logger/logger.py +1 -1
- credsweeper/ml_model/__init__.py +0 -1
- credsweeper/ml_model/features/__init__.py +1 -0
- credsweeper/ml_model/features/entropy_evaluation.py +1 -1
- credsweeper/ml_model/features/feature.py +2 -19
- credsweeper/ml_model/features/file_extension.py +2 -2
- credsweeper/ml_model/features/has_html_tag.py +12 -10
- credsweeper/ml_model/features/is_secret_numeric.py +5 -4
- credsweeper/ml_model/features/length_of_attribute.py +1 -1
- credsweeper/ml_model/features/morpheme_dense.py +15 -8
- credsweeper/ml_model/features/rule_name.py +2 -2
- credsweeper/ml_model/features/rule_severity.py +21 -0
- credsweeper/ml_model/features/search_in_attribute.py +1 -1
- credsweeper/ml_model/features/word_in.py +10 -33
- credsweeper/ml_model/features/word_in_path.py +6 -4
- credsweeper/ml_model/features/word_in_postamble.py +2 -5
- credsweeper/ml_model/features/word_in_preamble.py +2 -5
- credsweeper/ml_model/features/word_in_transition.py +2 -5
- credsweeper/ml_model/features/word_in_value.py +3 -4
- credsweeper/ml_model/features/word_in_variable.py +3 -4
- credsweeper/ml_model/ml_config.json +140 -27
- credsweeper/ml_model/ml_model.onnx +0 -0
- credsweeper/ml_model/ml_validator.py +4 -3
- credsweeper/rules/__init__.py +0 -1
- credsweeper/rules/config.yaml +329 -239
- credsweeper/rules/rule.py +4 -3
- credsweeper/scanner/__init__.py +0 -1
- credsweeper/scanner/scan_type/__init__.py +0 -5
- credsweeper/scanner/scan_type/multi_pattern.py +4 -4
- credsweeper/scanner/scan_type/pem_key_pattern.py +4 -4
- credsweeper/scanner/scan_type/scan_type.py +4 -4
- credsweeper/scanner/scan_type/single_pattern.py +4 -4
- credsweeper/scanner/scanner.py +24 -15
- credsweeper/secret/config.json +19 -6
- credsweeper/utils/__init__.py +0 -1
- credsweeper/utils/pem_key_detector.py +3 -3
- credsweeper/utils/util.py +24 -150
- {credsweeper-1.11.5.dist-info → credsweeper-1.13.3.dist-info}/METADATA +7 -7
- credsweeper-1.13.3.dist-info/RECORD +164 -0
- credsweeper/filters/value_couple_keyword_check.py +0 -26
- credsweeper-1.11.5.dist-info/RECORD +0 -159
- {credsweeper-1.11.5.dist-info → credsweeper-1.13.3.dist-info}/WHEEL +0 -0
- {credsweeper-1.11.5.dist-info → credsweeper-1.13.3.dist-info}/entry_points.txt +0 -0
- {credsweeper-1.11.5.dist-info → credsweeper-1.13.3.dist-info}/licenses/LICENSE +0 -0
|
@@ -2,7 +2,7 @@ import logging
|
|
|
2
2
|
from abc import ABC
|
|
3
3
|
from typing import List, Optional
|
|
4
4
|
|
|
5
|
-
from credsweeper.credentials import Candidate
|
|
5
|
+
from credsweeper.credentials.candidate import Candidate
|
|
6
6
|
from credsweeper.deep_scanner.abstract_scanner import AbstractScanner
|
|
7
7
|
from credsweeper.file_handler.data_content_provider import DataContentProvider
|
|
8
8
|
from credsweeper.file_handler.struct_content_provider import StructContentProvider
|
|
@@ -4,10 +4,10 @@ from abc import ABC
|
|
|
4
4
|
from pathlib import Path
|
|
5
5
|
from typing import List, Optional
|
|
6
6
|
|
|
7
|
-
from credsweeper.credentials import Candidate
|
|
7
|
+
from credsweeper.credentials.candidate import Candidate
|
|
8
8
|
from credsweeper.deep_scanner.abstract_scanner import AbstractScanner
|
|
9
9
|
from credsweeper.file_handler.data_content_provider import DataContentProvider
|
|
10
|
-
from credsweeper.utils import Util
|
|
10
|
+
from credsweeper.utils.util import Util
|
|
11
11
|
|
|
12
12
|
logger = logging.getLogger(__name__)
|
|
13
13
|
|
|
@@ -5,7 +5,7 @@ from typing import List, Optional
|
|
|
5
5
|
from bs4 import BeautifulSoup
|
|
6
6
|
from lxml import etree
|
|
7
7
|
|
|
8
|
-
from credsweeper.credentials import Candidate
|
|
8
|
+
from credsweeper.credentials.candidate import Candidate
|
|
9
9
|
from credsweeper.deep_scanner.abstract_scanner import AbstractScanner
|
|
10
10
|
from credsweeper.file_handler.data_content_provider import DataContentProvider
|
|
11
11
|
from credsweeper.file_handler.string_content_provider import StringContentProvider
|
|
@@ -6,7 +6,7 @@ from typing import List, Optional
|
|
|
6
6
|
from pdfminer.high_level import extract_pages
|
|
7
7
|
from pdfminer.layout import LAParams, LTText, LTItem
|
|
8
8
|
|
|
9
|
-
from credsweeper.credentials import Candidate
|
|
9
|
+
from credsweeper.credentials.candidate import Candidate
|
|
10
10
|
from credsweeper.deep_scanner.abstract_scanner import AbstractScanner
|
|
11
11
|
from credsweeper.file_handler.data_content_provider import DataContentProvider, MIN_DATA_LEN
|
|
12
12
|
from credsweeper.file_handler.string_content_provider import StringContentProvider
|
|
@@ -3,10 +3,11 @@ import logging
|
|
|
3
3
|
from abc import ABC
|
|
4
4
|
from typing import List, Optional
|
|
5
5
|
|
|
6
|
-
from credsweeper.
|
|
6
|
+
from credsweeper.common.constants import Severity, Confidence
|
|
7
|
+
from credsweeper.credentials.candidate import Candidate
|
|
7
8
|
from credsweeper.deep_scanner.abstract_scanner import AbstractScanner
|
|
8
9
|
from credsweeper.file_handler.data_content_provider import DataContentProvider
|
|
9
|
-
from credsweeper.utils import Util
|
|
10
|
+
from credsweeper.utils.util import Util
|
|
10
11
|
|
|
11
12
|
logger = logging.getLogger(__name__)
|
|
12
13
|
|
|
@@ -35,6 +36,9 @@ class PkcsScanner(AbstractScanner, ABC):
|
|
|
35
36
|
"PKCS")
|
|
36
37
|
candidate.line_data_list[0].line = base64.b64encode(data_provider.data).decode()
|
|
37
38
|
candidate.line_data_list[0].value = repr(password)
|
|
39
|
+
# high severity is assigned to private key rules
|
|
40
|
+
candidate.severity = Severity.HIGH
|
|
41
|
+
candidate.confidence = Confidence.STRONG
|
|
38
42
|
return [candidate]
|
|
39
43
|
except Exception as pkcs_exc:
|
|
40
44
|
logger.debug(f"{data_provider.file_path}:{pw_probe}:{pkcs_exc}")
|
|
@@ -5,7 +5,7 @@ from typing import List, Optional
|
|
|
5
5
|
|
|
6
6
|
from pptx import Presentation
|
|
7
7
|
|
|
8
|
-
from credsweeper.credentials import Candidate
|
|
8
|
+
from credsweeper.credentials.candidate import Candidate
|
|
9
9
|
from credsweeper.deep_scanner.abstract_scanner import AbstractScanner
|
|
10
10
|
from credsweeper.file_handler.data_content_provider import DataContentProvider
|
|
11
11
|
from credsweeper.file_handler.string_content_provider import StringContentProvider
|
|
@@ -9,7 +9,7 @@ from credsweeper.credentials.candidate import Candidate
|
|
|
9
9
|
from credsweeper.deep_scanner.abstract_scanner import AbstractScanner
|
|
10
10
|
from credsweeper.file_handler.data_content_provider import DataContentProvider
|
|
11
11
|
from credsweeper.file_handler.file_path_extractor import FilePathExtractor
|
|
12
|
-
from credsweeper.utils import Util
|
|
12
|
+
from credsweeper.utils.util import Util
|
|
13
13
|
|
|
14
14
|
logger = logging.getLogger(__name__)
|
|
15
15
|
|
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
from abc import ABC
|
|
3
|
+
from typing import List, Optional
|
|
4
|
+
|
|
5
|
+
from striprtf import striprtf
|
|
6
|
+
|
|
7
|
+
from credsweeper.credentials.candidate import Candidate
|
|
8
|
+
from credsweeper.deep_scanner.abstract_scanner import AbstractScanner
|
|
9
|
+
from credsweeper.file_handler.data_content_provider import DataContentProvider
|
|
10
|
+
from credsweeper.file_handler.string_content_provider import StringContentProvider
|
|
11
|
+
from credsweeper.utils.util import Util
|
|
12
|
+
|
|
13
|
+
logger = logging.getLogger(__name__)
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
class RtfScanner(AbstractScanner, ABC):
|
|
17
|
+
"""Implements squash file system scanning"""
|
|
18
|
+
|
|
19
|
+
@staticmethod
|
|
20
|
+
def get_lines(text: str) -> List[str]:
|
|
21
|
+
"""Extracts text lines from RTF format"""
|
|
22
|
+
rtf_text = striprtf.rtf_to_text(text)
|
|
23
|
+
lines = Util.split_text(rtf_text)
|
|
24
|
+
return lines
|
|
25
|
+
|
|
26
|
+
def data_scan(
|
|
27
|
+
self, #
|
|
28
|
+
data_provider: DataContentProvider, #
|
|
29
|
+
depth: int, #
|
|
30
|
+
recursive_limit_size: int) -> Optional[List[Candidate]]:
|
|
31
|
+
"""Scans data as RTF"""
|
|
32
|
+
try:
|
|
33
|
+
string_data_provider = StringContentProvider(lines=RtfScanner.get_lines(data_provider.text),
|
|
34
|
+
file_path=data_provider.file_path,
|
|
35
|
+
file_type=data_provider.file_type,
|
|
36
|
+
info=f"{data_provider.info}|RTF")
|
|
37
|
+
rtf_candidates = self.scanner.scan(string_data_provider)
|
|
38
|
+
return rtf_candidates
|
|
39
|
+
except Exception as rtf_exc:
|
|
40
|
+
logger.error(f"{data_provider.file_path}:{rtf_exc}")
|
|
41
|
+
return None
|
|
@@ -0,0 +1,52 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
from abc import ABC
|
|
3
|
+
from typing import List, Optional, Tuple
|
|
4
|
+
|
|
5
|
+
from credsweeper.common.constants import MIN_DATA_LEN
|
|
6
|
+
from credsweeper.credentials.candidate import Candidate
|
|
7
|
+
from credsweeper.deep_scanner.abstract_scanner import AbstractScanner
|
|
8
|
+
from credsweeper.file_handler.data_content_provider import DataContentProvider
|
|
9
|
+
from credsweeper.file_handler.string_content_provider import StringContentProvider
|
|
10
|
+
|
|
11
|
+
logger = logging.getLogger(__name__)
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
class StringsScanner(AbstractScanner, ABC):
|
|
15
|
+
"""Implements known binary file scanning with ASCII strings representations"""
|
|
16
|
+
|
|
17
|
+
@staticmethod
|
|
18
|
+
def get_strings(data: bytes) -> List[Tuple[str, int]]:
|
|
19
|
+
"""Processes binary to found ASCII strings. Use offset instead line number."""
|
|
20
|
+
strings = []
|
|
21
|
+
offset = 0
|
|
22
|
+
line = ''
|
|
23
|
+
for n, x in enumerate(data):
|
|
24
|
+
if 0x09 == x or 0x20 <= x <= 0x7E:
|
|
25
|
+
# TAB, SPACE and visible ASCII symbols
|
|
26
|
+
if not offset:
|
|
27
|
+
# for line number
|
|
28
|
+
offset = n
|
|
29
|
+
line += chr(x)
|
|
30
|
+
elif MIN_DATA_LEN <= len(line):
|
|
31
|
+
strings.append((line, offset))
|
|
32
|
+
offset = 0
|
|
33
|
+
line = ''
|
|
34
|
+
if MIN_DATA_LEN <= len(line):
|
|
35
|
+
strings.append((line, offset))
|
|
36
|
+
return strings
|
|
37
|
+
|
|
38
|
+
def data_scan(
|
|
39
|
+
self, #
|
|
40
|
+
data_provider: DataContentProvider, #
|
|
41
|
+
depth: int, #
|
|
42
|
+
recursive_limit_size: int) -> Optional[List[Candidate]]:
|
|
43
|
+
"""Extracts data file from .ar (debian) archive and launches data_scan"""
|
|
44
|
+
|
|
45
|
+
if strings := StringsScanner.get_strings(data_provider.data):
|
|
46
|
+
string_data_provider = StringContentProvider(lines=[x[0] for x in strings],
|
|
47
|
+
line_numbers=[x[1] for x in strings],
|
|
48
|
+
file_path=data_provider.file_path,
|
|
49
|
+
file_type=data_provider.file_type,
|
|
50
|
+
info=f"{data_provider.info}|STRINGS")
|
|
51
|
+
return self.scanner.scan(string_data_provider)
|
|
52
|
+
return None if strings is None else []
|
|
@@ -4,11 +4,11 @@ from abc import ABC
|
|
|
4
4
|
from tarfile import TarFile
|
|
5
5
|
from typing import List, Optional
|
|
6
6
|
|
|
7
|
-
from credsweeper.credentials import Candidate
|
|
7
|
+
from credsweeper.credentials.candidate import Candidate
|
|
8
8
|
from credsweeper.deep_scanner.abstract_scanner import AbstractScanner
|
|
9
9
|
from credsweeper.file_handler.data_content_provider import DataContentProvider
|
|
10
10
|
from credsweeper.file_handler.file_path_extractor import FilePathExtractor
|
|
11
|
-
from credsweeper.utils import Util
|
|
11
|
+
from credsweeper.utils.util import Util
|
|
12
12
|
|
|
13
13
|
logger = logging.getLogger(__name__)
|
|
14
14
|
|
|
@@ -5,11 +5,11 @@ from typing import List, Optional
|
|
|
5
5
|
from lxml import etree
|
|
6
6
|
|
|
7
7
|
from credsweeper.common.constants import MIN_DATA_LEN
|
|
8
|
-
from credsweeper.credentials import Candidate
|
|
8
|
+
from credsweeper.credentials.candidate import Candidate
|
|
9
9
|
from credsweeper.deep_scanner.abstract_scanner import AbstractScanner
|
|
10
10
|
from credsweeper.file_handler.data_content_provider import DataContentProvider
|
|
11
11
|
from credsweeper.file_handler.string_content_provider import StringContentProvider
|
|
12
|
-
from credsweeper.utils import Util
|
|
12
|
+
from credsweeper.utils.util import Util
|
|
13
13
|
|
|
14
14
|
logger = logging.getLogger(__name__)
|
|
15
15
|
|
|
@@ -5,12 +5,12 @@ from typing import List, Optional
|
|
|
5
5
|
|
|
6
6
|
import pandas as pd
|
|
7
7
|
|
|
8
|
-
from credsweeper.credentials import Candidate
|
|
9
8
|
from credsweeper.credentials.augment_candidates import augment_candidates
|
|
9
|
+
from credsweeper.credentials.candidate import Candidate
|
|
10
10
|
from credsweeper.deep_scanner.abstract_scanner import AbstractScanner
|
|
11
11
|
from credsweeper.file_handler.data_content_provider import DataContentProvider
|
|
12
12
|
from credsweeper.file_handler.string_content_provider import StringContentProvider
|
|
13
|
-
from credsweeper.utils import Util
|
|
13
|
+
from credsweeper.utils.util import Util
|
|
14
14
|
|
|
15
15
|
logger = logging.getLogger(__name__)
|
|
16
16
|
|
|
@@ -2,7 +2,7 @@ import logging
|
|
|
2
2
|
from abc import ABC
|
|
3
3
|
from typing import List, Optional
|
|
4
4
|
|
|
5
|
-
from credsweeper.credentials import Candidate
|
|
5
|
+
from credsweeper.credentials.candidate import Candidate
|
|
6
6
|
from credsweeper.deep_scanner.abstract_scanner import AbstractScanner
|
|
7
7
|
from credsweeper.file_handler.data_content_provider import DataContentProvider
|
|
8
8
|
from credsweeper.file_handler.string_content_provider import StringContentProvider
|
|
@@ -4,11 +4,11 @@ from abc import ABC
|
|
|
4
4
|
from typing import List, Optional
|
|
5
5
|
from zipfile import ZipFile
|
|
6
6
|
|
|
7
|
-
from credsweeper.credentials import Candidate
|
|
7
|
+
from credsweeper.credentials.candidate import Candidate
|
|
8
8
|
from credsweeper.deep_scanner.abstract_scanner import AbstractScanner
|
|
9
9
|
from credsweeper.file_handler.data_content_provider import DataContentProvider
|
|
10
10
|
from credsweeper.file_handler.file_path_extractor import FilePathExtractor
|
|
11
|
-
from credsweeper.utils import Util
|
|
11
|
+
from credsweeper.utils.util import Util
|
|
12
12
|
|
|
13
13
|
logger = logging.getLogger(__name__)
|
|
14
14
|
|
|
@@ -1,15 +0,0 @@
|
|
|
1
|
-
from credsweeper.file_handler.byte_content_provider import ByteContentProvider
|
|
2
|
-
from credsweeper.file_handler.content_provider import ContentProvider
|
|
3
|
-
from credsweeper.file_handler.data_content_provider import DataContentProvider
|
|
4
|
-
from credsweeper.file_handler.diff_content_provider import DiffContentProvider
|
|
5
|
-
from credsweeper.file_handler.string_content_provider import StringContentProvider
|
|
6
|
-
from credsweeper.file_handler.text_content_provider import TextContentProvider
|
|
7
|
-
|
|
8
|
-
__all__ = [
|
|
9
|
-
'ByteContentProvider', #
|
|
10
|
-
'ContentProvider', #
|
|
11
|
-
'DataContentProvider', #
|
|
12
|
-
'DiffContentProvider', #
|
|
13
|
-
'StringContentProvider', #
|
|
14
|
-
'TextContentProvider', #
|
|
15
|
-
]
|
|
@@ -3,9 +3,8 @@ from abc import ABC, abstractmethod
|
|
|
3
3
|
from pathlib import Path
|
|
4
4
|
from typing import Union, Tuple, Sequence
|
|
5
5
|
|
|
6
|
-
from credsweeper.config import Config
|
|
7
|
-
from credsweeper.file_handler.
|
|
8
|
-
from credsweeper.file_handler.text_content_provider import TextContentProvider
|
|
6
|
+
from credsweeper.config.config import Config
|
|
7
|
+
from credsweeper.file_handler.content_provider import ContentProvider
|
|
9
8
|
|
|
10
9
|
|
|
11
10
|
class AbstractProvider(ABC):
|
|
@@ -31,7 +30,7 @@ class AbstractProvider(ABC):
|
|
|
31
30
|
self.__paths = paths
|
|
32
31
|
|
|
33
32
|
@abstractmethod
|
|
34
|
-
def get_scannable_files(self, config: Config) -> Sequence[
|
|
33
|
+
def get_scannable_files(self, config: Config) -> Sequence[ContentProvider]:
|
|
35
34
|
"""Get list of file object for analysis based on attribute "paths".
|
|
36
35
|
|
|
37
36
|
Args:
|
|
@@ -1,9 +1,12 @@
|
|
|
1
|
+
import logging
|
|
1
2
|
from functools import cached_property
|
|
2
3
|
from typing import List, Optional, Generator
|
|
3
4
|
|
|
4
5
|
from credsweeper.file_handler.analysis_target import AnalysisTarget
|
|
5
6
|
from credsweeper.file_handler.content_provider import ContentProvider
|
|
6
|
-
from credsweeper.utils import Util
|
|
7
|
+
from credsweeper.utils.util import Util
|
|
8
|
+
|
|
9
|
+
logger = logging.getLogger(__name__)
|
|
7
10
|
|
|
8
11
|
|
|
9
12
|
class ByteContentProvider(ContentProvider):
|
|
@@ -42,7 +45,13 @@ class ByteContentProvider(ContentProvider):
|
|
|
42
45
|
def lines(self) -> List[str]:
|
|
43
46
|
"""lines RO getter for ByteContentProvider"""
|
|
44
47
|
if self.__lines is None:
|
|
45
|
-
|
|
48
|
+
text = Util.decode_text(self.__data)
|
|
49
|
+
if text is None:
|
|
50
|
+
logger.warning("Binary data detected %s %s %s", self.file_path, self.info,
|
|
51
|
+
repr(self.__data[:32]) if isinstance(self.__data, bytes) else "NONE")
|
|
52
|
+
self.__lines = []
|
|
53
|
+
else:
|
|
54
|
+
self.__lines = Util.split_text(text)
|
|
46
55
|
return self.__lines if self.__lines is not None else []
|
|
47
56
|
|
|
48
57
|
def yield_analysis_target(self, min_len: int) -> Generator[AnalysisTarget, None, None]:
|
|
@@ -6,7 +6,7 @@ from typing import List, Optional, Generator
|
|
|
6
6
|
from credsweeper.common.constants import MAX_LINE_LENGTH
|
|
7
7
|
from credsweeper.file_handler.analysis_target import AnalysisTarget
|
|
8
8
|
from credsweeper.file_handler.descriptor import Descriptor
|
|
9
|
-
from credsweeper.utils import Util
|
|
9
|
+
from credsweeper.utils.util import Util
|
|
10
10
|
|
|
11
11
|
logger = logging.getLogger(__name__)
|
|
12
12
|
|
|
@@ -10,7 +10,7 @@ from bs4 import BeautifulSoup, Tag, XMLParsedAsHTMLWarning
|
|
|
10
10
|
from credsweeper.common.constants import MIN_DATA_LEN
|
|
11
11
|
from credsweeper.file_handler.analysis_target import AnalysisTarget
|
|
12
12
|
from credsweeper.file_handler.content_provider import ContentProvider
|
|
13
|
-
from credsweeper.utils import Util
|
|
13
|
+
from credsweeper.utils.util import Util
|
|
14
14
|
|
|
15
15
|
warnings.filterwarnings("ignore", category=XMLParsedAsHTMLWarning, module='bs4')
|
|
16
16
|
logger = logging.getLogger(__name__)
|
|
@@ -1,14 +1,34 @@
|
|
|
1
1
|
import logging
|
|
2
|
+
from dataclasses import dataclass
|
|
2
3
|
from functools import cached_property
|
|
3
|
-
from typing import List, Tuple, Generator
|
|
4
|
+
from typing import List, Tuple, Generator, TypedDict, Optional, Union, Any, Dict
|
|
5
|
+
|
|
6
|
+
import whatthepatch
|
|
4
7
|
|
|
5
8
|
from credsweeper.common.constants import DiffRowType
|
|
6
9
|
from credsweeper.file_handler.analysis_target import AnalysisTarget
|
|
7
10
|
from credsweeper.file_handler.content_provider import ContentProvider
|
|
8
|
-
from credsweeper.utils import DiffRowData, Util, DiffDict
|
|
9
11
|
|
|
10
12
|
logger = logging.getLogger(__name__)
|
|
11
13
|
|
|
14
|
+
DiffDict = TypedDict(
|
|
15
|
+
"DiffDict",
|
|
16
|
+
{
|
|
17
|
+
"old": Optional[int], #
|
|
18
|
+
"new": Optional[int], #
|
|
19
|
+
"line": Union[str, bytes], # bytes are possibly since whatthepatch v1.0.4
|
|
20
|
+
"hunk": Any # not used
|
|
21
|
+
})
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
@dataclass(frozen=True)
|
|
25
|
+
class DiffRowData:
|
|
26
|
+
"""Class for keeping data of diff row."""
|
|
27
|
+
|
|
28
|
+
line_type: DiffRowType
|
|
29
|
+
line_numb: int
|
|
30
|
+
line: str
|
|
31
|
+
|
|
12
32
|
|
|
13
33
|
class DiffContentProvider(ContentProvider):
|
|
14
34
|
"""Provide data from a single `.patch` file.
|
|
@@ -76,6 +96,116 @@ class DiffContentProvider(ContentProvider):
|
|
|
76
96
|
all_lines.append(line_data.line)
|
|
77
97
|
return change_numbs, all_lines
|
|
78
98
|
|
|
99
|
+
@staticmethod
|
|
100
|
+
def patch2files_diff(raw_patch: List[str], change_type: DiffRowType) -> Dict[str, List[DiffDict]]:
|
|
101
|
+
"""Generate files changes from patch for added or deleted filepaths.
|
|
102
|
+
|
|
103
|
+
Args:
|
|
104
|
+
raw_patch: git patch file content
|
|
105
|
+
change_type: change type to select, DiffRowType.ADDED or DiffRowType.DELETED
|
|
106
|
+
|
|
107
|
+
Return:
|
|
108
|
+
return dict with ``{file paths: list of file row changes}``, where
|
|
109
|
+
elements of list of file row changes represented as::
|
|
110
|
+
|
|
111
|
+
{
|
|
112
|
+
"old": line number before diff,
|
|
113
|
+
"new": line number after diff,
|
|
114
|
+
"line": line text,
|
|
115
|
+
"hunk": diff hunk number
|
|
116
|
+
}
|
|
117
|
+
|
|
118
|
+
"""
|
|
119
|
+
if not raw_patch:
|
|
120
|
+
return {}
|
|
121
|
+
|
|
122
|
+
added_files, deleted_files = {}, {}
|
|
123
|
+
try:
|
|
124
|
+
for patch in whatthepatch.parse_patch(raw_patch):
|
|
125
|
+
if patch.changes is None:
|
|
126
|
+
logger.warning(f"Patch '{str(patch.header)}' cannot be scanned")
|
|
127
|
+
continue
|
|
128
|
+
changes = []
|
|
129
|
+
for change in patch.changes:
|
|
130
|
+
change_dict = change._asdict()
|
|
131
|
+
changes.append(change_dict)
|
|
132
|
+
|
|
133
|
+
added_files[patch.header.new_path] = changes
|
|
134
|
+
deleted_files[patch.header.old_path] = changes
|
|
135
|
+
if change_type == DiffRowType.ADDED:
|
|
136
|
+
return added_files
|
|
137
|
+
elif change_type == DiffRowType.DELETED:
|
|
138
|
+
return deleted_files
|
|
139
|
+
else:
|
|
140
|
+
logger.error(f"Change type should be one of: '{DiffRowType.ADDED}', '{DiffRowType.DELETED}';"
|
|
141
|
+
f" but received {change_type}")
|
|
142
|
+
except Exception as exc:
|
|
143
|
+
logger.exception(exc)
|
|
144
|
+
return {}
|
|
145
|
+
|
|
146
|
+
@staticmethod
|
|
147
|
+
def preprocess_diff_rows(
|
|
148
|
+
added_line_number: Optional[int], #
|
|
149
|
+
deleted_line_number: Optional[int], #
|
|
150
|
+
line: str) -> List[DiffRowData]:
|
|
151
|
+
"""Auxiliary function to extend diff changes.
|
|
152
|
+
|
|
153
|
+
Args:
|
|
154
|
+
added_line_number: number of added line or None
|
|
155
|
+
deleted_line_number: number of deleted line or None
|
|
156
|
+
line: the text line
|
|
157
|
+
|
|
158
|
+
Return:
|
|
159
|
+
diff rows data with as list of row change type, line number, row content
|
|
160
|
+
|
|
161
|
+
"""
|
|
162
|
+
rows_data: List[DiffRowData] = []
|
|
163
|
+
if isinstance(added_line_number, int):
|
|
164
|
+
# indicates line was inserted
|
|
165
|
+
rows_data.append(DiffRowData(DiffRowType.ADDED, added_line_number, line))
|
|
166
|
+
if isinstance(deleted_line_number, int):
|
|
167
|
+
# indicates line was removed
|
|
168
|
+
rows_data.append(DiffRowData(DiffRowType.DELETED, deleted_line_number, line))
|
|
169
|
+
return rows_data
|
|
170
|
+
|
|
171
|
+
@staticmethod
|
|
172
|
+
def wrong_change(change: DiffDict) -> bool:
|
|
173
|
+
"""Returns True if the change is wrong"""
|
|
174
|
+
for i in ["line", "new", "old"]:
|
|
175
|
+
if i not in change:
|
|
176
|
+
logger.error(f"Skipping wrong change {change}")
|
|
177
|
+
return True
|
|
178
|
+
return False
|
|
179
|
+
|
|
180
|
+
@staticmethod
|
|
181
|
+
def preprocess_file_diff(changes: List[DiffDict]) -> List[DiffRowData]:
|
|
182
|
+
"""Generate changed file rows from diff data with changed lines (e.g. marked + or - in diff).
|
|
183
|
+
|
|
184
|
+
Args:
|
|
185
|
+
changes: git diff by file rows data
|
|
186
|
+
|
|
187
|
+
Return:
|
|
188
|
+
diff rows data with as list of row change type, line number, row content
|
|
189
|
+
|
|
190
|
+
"""
|
|
191
|
+
if not changes:
|
|
192
|
+
return []
|
|
193
|
+
|
|
194
|
+
rows_data = []
|
|
195
|
+
# process diff to restore lines and their positions
|
|
196
|
+
for change in changes:
|
|
197
|
+
if DiffContentProvider.wrong_change(change):
|
|
198
|
+
continue
|
|
199
|
+
line = change["line"]
|
|
200
|
+
if isinstance(line, str):
|
|
201
|
+
rows_data.extend(DiffContentProvider.preprocess_diff_rows(change.get("new"), change.get("old"), line))
|
|
202
|
+
elif isinstance(line, (bytes, bytearray)):
|
|
203
|
+
logger.warning("The feature is available with the deep scan option")
|
|
204
|
+
else:
|
|
205
|
+
logger.error(f"Unknown type of line {type(line)}")
|
|
206
|
+
|
|
207
|
+
return rows_data
|
|
208
|
+
|
|
79
209
|
def yield_analysis_target(self, min_len: int) -> Generator[AnalysisTarget, None, None]:
|
|
80
210
|
"""Preprocess file diff data to scan.
|
|
81
211
|
|
|
@@ -86,6 +216,6 @@ class DiffContentProvider(ContentProvider):
|
|
|
86
216
|
list of analysis targets of every row of file diff corresponding to change type "self.change_type"
|
|
87
217
|
|
|
88
218
|
"""
|
|
89
|
-
lines_data =
|
|
219
|
+
lines_data = DiffContentProvider.preprocess_file_diff(self.__diff)
|
|
90
220
|
change_numbs, all_lines = self.parse_lines_data(self.__change_type, lines_data)
|
|
91
221
|
return self.lines_to_targets(min_len, all_lines, change_numbs)
|
|
@@ -7,8 +7,8 @@ from typing import List, Dict, Union, Tuple
|
|
|
7
7
|
from git import InvalidGitRepositoryError, NoSuchPathError, Repo
|
|
8
8
|
|
|
9
9
|
from credsweeper.common.constants import MIN_DATA_LEN
|
|
10
|
-
from credsweeper.config import Config
|
|
11
|
-
from credsweeper.utils import Util
|
|
10
|
+
from credsweeper.config.config import Config
|
|
11
|
+
from credsweeper.utils.util import Util
|
|
12
12
|
|
|
13
13
|
logger = logging.getLogger(__name__)
|
|
14
14
|
|
|
@@ -127,6 +127,8 @@ class FilePathExtractor:
|
|
|
127
127
|
Return:
|
|
128
128
|
True when the file full path should be excluded according config
|
|
129
129
|
"""
|
|
130
|
+
if config.pedantic:
|
|
131
|
+
return False
|
|
130
132
|
path = path.replace('\\', '/')
|
|
131
133
|
lower_path = path.lower()
|
|
132
134
|
if config.not_allowed_path_pattern.match(lower_path):
|
|
@@ -3,9 +3,9 @@ import logging
|
|
|
3
3
|
from pathlib import Path
|
|
4
4
|
from typing import List, Optional, Union, Tuple, Sequence
|
|
5
5
|
|
|
6
|
-
from credsweeper import
|
|
7
|
-
from credsweeper.config import Config
|
|
6
|
+
from credsweeper.config.config import Config
|
|
8
7
|
from credsweeper.file_handler.abstract_provider import AbstractProvider
|
|
8
|
+
from credsweeper.file_handler.content_provider import ContentProvider
|
|
9
9
|
from credsweeper.file_handler.file_path_extractor import FilePathExtractor
|
|
10
10
|
from credsweeper.file_handler.text_content_provider import TextContentProvider
|
|
11
11
|
|
|
@@ -30,7 +30,7 @@ class FilesProvider(AbstractProvider):
|
|
|
30
30
|
super().__init__(paths)
|
|
31
31
|
self.skip_ignored = skip_ignored
|
|
32
32
|
|
|
33
|
-
def get_scannable_files(self, config: Config) -> Sequence[
|
|
33
|
+
def get_scannable_files(self, config: Config) -> Sequence[ContentProvider]:
|
|
34
34
|
"""Get list of full text file object for analysis of files with parent paths from "paths".
|
|
35
35
|
|
|
36
36
|
Args:
|
|
@@ -40,7 +40,7 @@ class FilesProvider(AbstractProvider):
|
|
|
40
40
|
preprocessed file objects for analysis
|
|
41
41
|
|
|
42
42
|
"""
|
|
43
|
-
text_content_provider_list: List[
|
|
43
|
+
text_content_provider_list: List[ContentProvider] = []
|
|
44
44
|
for path in self.paths:
|
|
45
45
|
if isinstance(path, (str, Path)):
|
|
46
46
|
new_files = FilePathExtractor.get_file_paths(config, path)
|
|
@@ -4,12 +4,12 @@ from pathlib import Path
|
|
|
4
4
|
from typing import List, Union, Tuple, Sequence
|
|
5
5
|
|
|
6
6
|
from credsweeper.common.constants import DiffRowType
|
|
7
|
-
from credsweeper.config import Config
|
|
7
|
+
from credsweeper.config.config import Config
|
|
8
8
|
from credsweeper.file_handler.abstract_provider import AbstractProvider
|
|
9
|
+
from credsweeper.file_handler.content_provider import ContentProvider
|
|
9
10
|
from credsweeper.file_handler.diff_content_provider import DiffContentProvider
|
|
10
11
|
from credsweeper.file_handler.file_path_extractor import FilePathExtractor
|
|
11
|
-
from credsweeper.
|
|
12
|
-
from credsweeper.utils import Util
|
|
12
|
+
from credsweeper.utils.util import Util
|
|
13
13
|
|
|
14
14
|
logger = logging.getLogger(__name__)
|
|
15
15
|
|
|
@@ -50,17 +50,16 @@ class PatchesProvider(AbstractProvider):
|
|
|
50
50
|
|
|
51
51
|
return raw_patches
|
|
52
52
|
|
|
53
|
-
def get_files_sequence(self,
|
|
54
|
-
raw_patches: List[List[str]]) -> Sequence[Union[DiffContentProvider, TextContentProvider]]:
|
|
53
|
+
def get_files_sequence(self, raw_patches: List[List[str]]) -> Sequence[ContentProvider]:
|
|
55
54
|
"""Returns sequence of files"""
|
|
56
|
-
files: List[
|
|
55
|
+
files: List[ContentProvider] = []
|
|
57
56
|
for raw_patch in raw_patches:
|
|
58
|
-
files_data =
|
|
57
|
+
files_data = DiffContentProvider.patch2files_diff(raw_patch, self.change_type)
|
|
59
58
|
for file_path, file_diff in files_data.items():
|
|
60
59
|
files.append(DiffContentProvider(file_path=file_path, change_type=self.change_type, diff=file_diff))
|
|
61
60
|
return files
|
|
62
61
|
|
|
63
|
-
def get_scannable_files(self, config: Config) -> Sequence[
|
|
62
|
+
def get_scannable_files(self, config: Config) -> Sequence[ContentProvider]:
|
|
64
63
|
"""Get files to scan. Output based on the `paths` field.
|
|
65
64
|
|
|
66
65
|
Args:
|
|
@@ -6,7 +6,7 @@ from typing import List, Optional, Union, Tuple, Generator
|
|
|
6
6
|
|
|
7
7
|
from credsweeper.file_handler.analysis_target import AnalysisTarget
|
|
8
8
|
from credsweeper.file_handler.content_provider import ContentProvider
|
|
9
|
-
from credsweeper.utils import Util
|
|
9
|
+
from credsweeper.utils.util import Util
|
|
10
10
|
|
|
11
11
|
logger = logging.getLogger(__name__)
|
|
12
12
|
|
|
@@ -54,7 +54,13 @@ class TextContentProvider(ContentProvider):
|
|
|
54
54
|
def lines(self) -> Optional[List[str]]:
|
|
55
55
|
"""lines getter for TextContentProvider"""
|
|
56
56
|
if self.__lines is None:
|
|
57
|
-
|
|
57
|
+
text = Util.decode_text(self.data)
|
|
58
|
+
if text is None:
|
|
59
|
+
logger.warning("Binary file detected %s %s %s", self.file_path, self.info,
|
|
60
|
+
repr(self.__data[:32]) if isinstance(self.__data, bytes) else "NONE")
|
|
61
|
+
self.__lines = []
|
|
62
|
+
else:
|
|
63
|
+
self.__lines = Util.split_text(text)
|
|
58
64
|
return self.__lines if self.__lines is not None else []
|
|
59
65
|
|
|
60
66
|
def yield_analysis_target(self, min_len: int) -> Generator[AnalysisTarget, None, None]:
|
credsweeper/filters/__init__.py
CHANGED
|
@@ -1,5 +1,3 @@
|
|
|
1
|
-
from credsweeper.filters.filter import Filter # isort:skip
|
|
2
|
-
|
|
3
1
|
from credsweeper.filters.line_git_binary_check import LineGitBinaryCheck
|
|
4
2
|
from credsweeper.filters.line_specific_key_check import LineSpecificKeyCheck
|
|
5
3
|
from credsweeper.filters.line_uue_part_check import LineUUEPartCheck
|
|
@@ -12,11 +10,10 @@ from credsweeper.filters.value_base64_data_check import ValueBase64DataCheck
|
|
|
12
10
|
from credsweeper.filters.value_base64_encoded_pem_check import ValueBase64EncodedPem
|
|
13
11
|
from credsweeper.filters.value_base64_key_check import ValueBase64KeyCheck
|
|
14
12
|
from credsweeper.filters.value_base64_part_check import ValueBase64PartCheck
|
|
13
|
+
from credsweeper.filters.value_basic_auth_check import ValueBasicAuthCheck
|
|
15
14
|
from credsweeper.filters.value_blocklist_check import ValueBlocklistCheck
|
|
16
15
|
from credsweeper.filters.value_camel_case_check import ValueCamelCaseCheck
|
|
17
|
-
from credsweeper.filters.value_couple_keyword_check import ValueCoupleKeywordCheck
|
|
18
16
|
from credsweeper.filters.value_dictionary_keyword_check import ValueDictionaryKeywordCheck
|
|
19
|
-
from credsweeper.filters.value_dictionary_value_length_check import ValueDictionaryValueLengthCheck
|
|
20
17
|
from credsweeper.filters.value_discord_bot_check import ValueDiscordBotCheck
|
|
21
18
|
from credsweeper.filters.value_entropy_base32_check import ValueEntropyBase32Check
|
|
22
19
|
from credsweeper.filters.value_entropy_base36_check import ValueEntropyBase36Check
|
|
@@ -30,7 +27,9 @@ from credsweeper.filters.value_jfrog_token_check import ValueJfrogTokenCheck
|
|
|
30
27
|
from credsweeper.filters.value_json_web_key_check import ValueJsonWebKeyCheck
|
|
31
28
|
from credsweeper.filters.value_json_web_token_check import ValueJsonWebTokenCheck
|
|
32
29
|
from credsweeper.filters.value_last_word_check import ValueLastWordCheck
|
|
30
|
+
from credsweeper.filters.value_length_check import ValueLengthCheck
|
|
33
31
|
from credsweeper.filters.value_method_check import ValueMethodCheck
|
|
32
|
+
from credsweeper.filters.value_morphemes_check import ValueMorphemesCheck
|
|
34
33
|
from credsweeper.filters.value_not_allowed_pattern_check import ValueNotAllowedPatternCheck
|
|
35
34
|
from credsweeper.filters.value_not_part_encoded_check import ValueNotPartEncodedCheck
|
|
36
35
|
from credsweeper.filters.value_number_check import ValueNumberCheck
|