credsweeper 1.11.4__py3-none-any.whl → 1.11.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of credsweeper might be problematic. Click here for more details.

Files changed (138) hide show
  1. credsweeper/__init__.py +21 -15
  2. credsweeper/__main__.py +141 -35
  3. credsweeper/app.py +11 -11
  4. credsweeper/config/__init__.py +0 -1
  5. credsweeper/config/config.py +1 -1
  6. credsweeper/credentials/__init__.py +0 -5
  7. credsweeper/credentials/augment_candidates.py +1 -1
  8. credsweeper/credentials/candidate.py +1 -1
  9. credsweeper/credentials/credential_manager.py +1 -1
  10. credsweeper/credentials/line_data.py +2 -2
  11. credsweeper/deep_scanner/__init__.py +0 -1
  12. credsweeper/deep_scanner/abstract_scanner.py +272 -17
  13. credsweeper/deep_scanner/byte_scanner.py +1 -1
  14. credsweeper/deep_scanner/bzip2_scanner.py +2 -2
  15. credsweeper/deep_scanner/deb_scanner.py +34 -27
  16. credsweeper/deep_scanner/deep_scanner.py +37 -250
  17. credsweeper/deep_scanner/docx_scanner.py +1 -1
  18. credsweeper/deep_scanner/eml_scanner.py +1 -1
  19. credsweeper/deep_scanner/encoder_scanner.py +1 -1
  20. credsweeper/deep_scanner/gzip_scanner.py +2 -2
  21. credsweeper/deep_scanner/html_scanner.py +1 -1
  22. credsweeper/deep_scanner/jclass_scanner.py +74 -0
  23. credsweeper/deep_scanner/jks_scanner.py +1 -1
  24. credsweeper/deep_scanner/lang_scanner.py +1 -1
  25. credsweeper/deep_scanner/lzma_scanner.py +2 -2
  26. credsweeper/deep_scanner/mxfile_scanner.py +1 -1
  27. credsweeper/deep_scanner/patch_scanner.py +48 -0
  28. credsweeper/deep_scanner/pdf_scanner.py +1 -1
  29. credsweeper/deep_scanner/pkcs_scanner.py +41 -0
  30. credsweeper/deep_scanner/pptx_scanner.py +1 -1
  31. credsweeper/deep_scanner/rpm_scanner.py +49 -0
  32. credsweeper/deep_scanner/sqlite3_scanner.py +79 -0
  33. credsweeper/deep_scanner/tar_scanner.py +2 -2
  34. credsweeper/deep_scanner/tmx_scanner.py +2 -2
  35. credsweeper/deep_scanner/xlsx_scanner.py +2 -2
  36. credsweeper/deep_scanner/xml_scanner.py +1 -1
  37. credsweeper/deep_scanner/zip_scanner.py +2 -2
  38. credsweeper/file_handler/__init__.py +0 -15
  39. credsweeper/file_handler/abstract_provider.py +3 -4
  40. credsweeper/file_handler/byte_content_provider.py +1 -1
  41. credsweeper/file_handler/content_provider.py +1 -1
  42. credsweeper/file_handler/data_content_provider.py +2 -3
  43. credsweeper/file_handler/diff_content_provider.py +133 -3
  44. credsweeper/file_handler/file_path_extractor.py +2 -2
  45. credsweeper/file_handler/files_provider.py +4 -4
  46. credsweeper/file_handler/patches_provider.py +10 -8
  47. credsweeper/file_handler/text_content_provider.py +1 -1
  48. credsweeper/filters/__init__.py +2 -2
  49. credsweeper/filters/filter.py +2 -2
  50. credsweeper/filters/group/__init__.py +0 -2
  51. credsweeper/filters/group/general_keyword.py +2 -2
  52. credsweeper/filters/group/general_pattern.py +2 -2
  53. credsweeper/filters/group/group.py +16 -5
  54. credsweeper/filters/group/password_keyword.py +2 -2
  55. credsweeper/filters/group/token_pattern.py +2 -2
  56. credsweeper/filters/group/url_credentials_group.py +2 -2
  57. credsweeper/filters/group/weird_base36_token.py +2 -2
  58. credsweeper/filters/group/weird_base64_token.py +2 -2
  59. credsweeper/filters/line_git_binary_check.py +3 -3
  60. credsweeper/filters/line_specific_key_check.py +4 -4
  61. credsweeper/filters/line_uue_part_check.py +3 -3
  62. credsweeper/filters/value_allowlist_check.py +4 -4
  63. credsweeper/filters/value_array_dictionary_check.py +3 -3
  64. credsweeper/filters/value_atlassian_token_check.py +4 -4
  65. credsweeper/filters/value_azure_token_check.py +4 -4
  66. credsweeper/filters/value_base32_data_check.py +4 -4
  67. credsweeper/filters/value_base64_data_check.py +4 -4
  68. credsweeper/filters/value_base64_encoded_pem_check.py +4 -4
  69. credsweeper/filters/value_base64_key_check.py +13 -18
  70. credsweeper/filters/value_base64_part_check.py +4 -4
  71. credsweeper/filters/value_basic_auth_check.py +36 -0
  72. credsweeper/filters/value_blocklist_check.py +3 -3
  73. credsweeper/filters/value_camel_case_check.py +4 -4
  74. credsweeper/filters/value_couple_keyword_check.py +3 -3
  75. credsweeper/filters/value_dictionary_keyword_check.py +3 -3
  76. credsweeper/filters/value_dictionary_value_length_check.py +3 -3
  77. credsweeper/filters/value_discord_bot_check.py +4 -4
  78. credsweeper/filters/value_entropy_base_check.py +4 -4
  79. credsweeper/filters/value_file_path_check.py +5 -4
  80. credsweeper/filters/value_github_check.py +3 -3
  81. credsweeper/filters/value_grafana_check.py +4 -4
  82. credsweeper/filters/value_grafana_service_check.py +3 -3
  83. credsweeper/filters/value_hex_number_check.py +3 -3
  84. credsweeper/filters/value_jfrog_token_check.py +4 -4
  85. credsweeper/filters/value_json_web_key_check.py +37 -0
  86. credsweeper/filters/value_json_web_token_check.py +4 -4
  87. credsweeper/filters/value_last_word_check.py +3 -3
  88. credsweeper/filters/value_method_check.py +3 -3
  89. credsweeper/filters/value_not_allowed_pattern_check.py +4 -4
  90. credsweeper/filters/value_not_part_encoded_check.py +3 -3
  91. credsweeper/filters/value_number_check.py +3 -3
  92. credsweeper/filters/value_pattern_check.py +3 -3
  93. credsweeper/filters/value_similarity_check.py +3 -3
  94. credsweeper/filters/value_split_keyword_check.py +3 -3
  95. credsweeper/filters/value_string_type_check.py +3 -3
  96. credsweeper/filters/value_token_base_check.py +3 -3
  97. credsweeper/filters/value_token_check.py +3 -3
  98. credsweeper/logger/__init__.py +0 -1
  99. credsweeper/logger/logger.py +1 -1
  100. credsweeper/ml_model/__init__.py +0 -1
  101. credsweeper/ml_model/features/entropy_evaluation.py +1 -1
  102. credsweeper/ml_model/features/feature.py +1 -1
  103. credsweeper/ml_model/features/file_extension.py +1 -1
  104. credsweeper/ml_model/features/has_html_tag.py +2 -2
  105. credsweeper/ml_model/features/is_secret_numeric.py +1 -1
  106. credsweeper/ml_model/features/length_of_attribute.py +1 -1
  107. credsweeper/ml_model/features/morpheme_dense.py +1 -1
  108. credsweeper/ml_model/features/rule_name.py +1 -1
  109. credsweeper/ml_model/features/search_in_attribute.py +1 -1
  110. credsweeper/ml_model/features/word_in.py +1 -1
  111. credsweeper/ml_model/features/word_in_path.py +1 -1
  112. credsweeper/ml_model/features/word_in_postamble.py +1 -1
  113. credsweeper/ml_model/features/word_in_preamble.py +1 -1
  114. credsweeper/ml_model/features/word_in_transition.py +1 -1
  115. credsweeper/ml_model/features/word_in_value.py +1 -1
  116. credsweeper/ml_model/features/word_in_variable.py +1 -1
  117. credsweeper/ml_model/ml_validator.py +3 -2
  118. credsweeper/rules/__init__.py +0 -1
  119. credsweeper/rules/config.yaml +114 -25
  120. credsweeper/rules/rule.py +4 -3
  121. credsweeper/scanner/__init__.py +0 -1
  122. credsweeper/scanner/scan_type/__init__.py +0 -5
  123. credsweeper/scanner/scan_type/multi_pattern.py +5 -6
  124. credsweeper/scanner/scan_type/pem_key_pattern.py +4 -4
  125. credsweeper/scanner/scan_type/scan_type.py +4 -4
  126. credsweeper/scanner/scan_type/single_pattern.py +4 -4
  127. credsweeper/scanner/scanner.py +8 -5
  128. credsweeper/secret/config.json +6 -6
  129. credsweeper/utils/__init__.py +0 -1
  130. credsweeper/utils/pem_key_detector.py +5 -5
  131. credsweeper/utils/util.py +143 -206
  132. {credsweeper-1.11.4.dist-info → credsweeper-1.11.6.dist-info}/METADATA +3 -6
  133. credsweeper-1.11.6.dist-info/RECORD +160 -0
  134. credsweeper/deep_scanner/pkcs12_scanner.py +0 -45
  135. credsweeper-1.11.4.dist-info/RECORD +0 -154
  136. {credsweeper-1.11.4.dist-info → credsweeper-1.11.6.dist-info}/WHEEL +0 -0
  137. {credsweeper-1.11.4.dist-info → credsweeper-1.11.6.dist-info}/entry_points.txt +0 -0
  138. {credsweeper-1.11.4.dist-info → credsweeper-1.11.6.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,49 @@
1
+ import io
2
+ import logging
3
+ from abc import ABC
4
+ from typing import List, Optional
5
+
6
+ import rpmfile
7
+
8
+ from credsweeper.credentials.candidate import Candidate
9
+ from credsweeper.deep_scanner.abstract_scanner import AbstractScanner
10
+ from credsweeper.file_handler.data_content_provider import DataContentProvider
11
+ from credsweeper.file_handler.file_path_extractor import FilePathExtractor
12
+ from credsweeper.utils.util import Util
13
+
14
+ logger = logging.getLogger(__name__)
15
+
16
+
17
+ class RpmScanner(AbstractScanner, ABC):
18
+ """Implements rpm scanning"""
19
+
20
+ def data_scan(
21
+ self, #
22
+ data_provider: DataContentProvider, #
23
+ depth: int, #
24
+ recursive_limit_size: int) -> Optional[List[Candidate]]:
25
+ """Extracts files one by one from the package type and launches recursive scan"""
26
+ try:
27
+ candidates = []
28
+ with rpmfile.open(fileobj=io.BytesIO(data_provider.data)) as rpm_file:
29
+ for member in rpm_file.getmembers():
30
+ # skip directory
31
+ if 0 != member.isdir:
32
+ continue
33
+ if FilePathExtractor.check_exclude_file(self.config, member.name):
34
+ continue
35
+ if 0 > recursive_limit_size - member.size:
36
+ logger.error(f"{member.filename}: size {member.size}"
37
+ f" is over limit {recursive_limit_size} depth:{depth}")
38
+ continue
39
+ rpm_content_provider = DataContentProvider(data=rpm_file.extractfile(member).read(),
40
+ file_path=data_provider.file_path,
41
+ file_type=Util.get_extension(member.name),
42
+ info=f"{data_provider.info}|RPM:{member.name}")
43
+ new_limit = recursive_limit_size - len(rpm_content_provider.data)
44
+ rpm_candidates = self.recursive_scan(rpm_content_provider, depth, new_limit)
45
+ candidates.extend(rpm_candidates)
46
+ return candidates
47
+ except Exception as rpm_exc:
48
+ logger.error(f"{data_provider.file_path}:{rpm_exc}")
49
+ return None
@@ -0,0 +1,79 @@
1
+ import logging
2
+ import os.path
3
+ import sqlite3
4
+ import sys
5
+ import tempfile
6
+ from abc import ABC
7
+ from typing import List, Optional, Tuple, Any, Generator
8
+
9
+ from credsweeper.credentials.candidate import Candidate
10
+ from credsweeper.deep_scanner.abstract_scanner import AbstractScanner
11
+ from credsweeper.file_handler.data_content_provider import DataContentProvider
12
+ from credsweeper.file_handler.struct_content_provider import StructContentProvider
13
+
14
+ logger = logging.getLogger(__name__)
15
+
16
+
17
+ class Sqlite3Scanner(AbstractScanner, ABC):
18
+ """Implements SQLite3 database scanning"""
19
+
20
+ @staticmethod
21
+ def __walk(sqlite3db) -> Generator[Tuple[str, Any], None, None]:
22
+ sqlite3db.row_factory = sqlite3.Row
23
+ cursor = sqlite3db.cursor()
24
+ cursor.execute("SELECT name FROM sqlite_master WHERE type='table' AND name NOT LIKE 'sqlite_%';")
25
+ for table in cursor.fetchall():
26
+ table_name = table[0]
27
+ try:
28
+ cursor.execute(f"SELECT * FROM {table_name}")
29
+ for row in cursor:
30
+ yield table_name, dict(row)
31
+ except sqlite3.DatabaseError as exc:
32
+ print(f"Error reading table {table_name}: {exc}")
33
+
34
+ @staticmethod
35
+ def walk_sqlite(data: bytes) -> Generator[Tuple[str, Any], None, None]:
36
+ """Yields data from sqlite3 database"""
37
+ if 10 < sys.version_info.minor:
38
+ # Added in version 3.11
39
+ with sqlite3.connect(":memory:") as sqlite3db:
40
+ sqlite3db.deserialize(data) # type: ignore
41
+ yield from Sqlite3Scanner.__walk(sqlite3db)
42
+ elif "nt" != os.name:
43
+ # a tmpfile has to be used. TODO: remove when 3.10 will deprecate
44
+ with tempfile.NamedTemporaryFile(suffix=".sqlite") as t:
45
+ t.write(data)
46
+ t.flush()
47
+ with sqlite3.connect(t.name) as sqlite3db:
48
+ yield from Sqlite3Scanner.__walk(sqlite3db)
49
+ elif "nt" == os.name:
50
+ # windows trick. TODO: remove when 3.10 will deprecate
51
+ with tempfile.NamedTemporaryFile(delete=False, suffix=".sqlite") as t:
52
+ t.write(data)
53
+ t.flush()
54
+ sqlite3db = sqlite3.connect(t.name)
55
+ yield from Sqlite3Scanner.__walk(sqlite3db)
56
+ sqlite3db.close()
57
+ if os.path.exists(t.name):
58
+ os.remove(t.name)
59
+
60
+ def data_scan(
61
+ self, #
62
+ data_provider: DataContentProvider, #
63
+ depth: int, #
64
+ recursive_limit_size: int) -> Optional[List[Candidate]]:
65
+ """Extracts data file from .ar (debian) archive and launches data_scan"""
66
+ try:
67
+ candidates: List[Candidate] = []
68
+ new_limit = recursive_limit_size - len(data_provider.data)
69
+ for table, row in self.walk_sqlite(data_provider.data):
70
+ struct_content_provider = StructContentProvider(struct=row,
71
+ file_path=data_provider.file_path,
72
+ file_type=data_provider.file_type,
73
+ info=f"{data_provider.info}|SQLite3.{table}")
74
+ if new_candidates := self.structure_scan(struct_content_provider, depth, new_limit):
75
+ candidates.extend(new_candidates)
76
+ return candidates
77
+ except Exception as exc:
78
+ logger.error(exc)
79
+ return None
@@ -4,11 +4,11 @@ from abc import ABC
4
4
  from tarfile import TarFile
5
5
  from typing import List, Optional
6
6
 
7
- from credsweeper.credentials import Candidate
7
+ from credsweeper.credentials.candidate import Candidate
8
8
  from credsweeper.deep_scanner.abstract_scanner import AbstractScanner
9
9
  from credsweeper.file_handler.data_content_provider import DataContentProvider
10
10
  from credsweeper.file_handler.file_path_extractor import FilePathExtractor
11
- from credsweeper.utils import Util
11
+ from credsweeper.utils.util import Util
12
12
 
13
13
  logger = logging.getLogger(__name__)
14
14
 
@@ -5,11 +5,11 @@ from typing import List, Optional
5
5
  from lxml import etree
6
6
 
7
7
  from credsweeper.common.constants import MIN_DATA_LEN
8
- from credsweeper.credentials import Candidate
8
+ from credsweeper.credentials.candidate import Candidate
9
9
  from credsweeper.deep_scanner.abstract_scanner import AbstractScanner
10
10
  from credsweeper.file_handler.data_content_provider import DataContentProvider
11
11
  from credsweeper.file_handler.string_content_provider import StringContentProvider
12
- from credsweeper.utils import Util
12
+ from credsweeper.utils.util import Util
13
13
 
14
14
  logger = logging.getLogger(__name__)
15
15
 
@@ -5,12 +5,12 @@ from typing import List, Optional
5
5
 
6
6
  import pandas as pd
7
7
 
8
- from credsweeper.credentials import Candidate
9
8
  from credsweeper.credentials.augment_candidates import augment_candidates
9
+ from credsweeper.credentials.candidate import Candidate
10
10
  from credsweeper.deep_scanner.abstract_scanner import AbstractScanner
11
11
  from credsweeper.file_handler.data_content_provider import DataContentProvider
12
12
  from credsweeper.file_handler.string_content_provider import StringContentProvider
13
- from credsweeper.utils import Util
13
+ from credsweeper.utils.util import Util
14
14
 
15
15
  logger = logging.getLogger(__name__)
16
16
 
@@ -2,7 +2,7 @@ import logging
2
2
  from abc import ABC
3
3
  from typing import List, Optional
4
4
 
5
- from credsweeper.credentials import Candidate
5
+ from credsweeper.credentials.candidate import Candidate
6
6
  from credsweeper.deep_scanner.abstract_scanner import AbstractScanner
7
7
  from credsweeper.file_handler.data_content_provider import DataContentProvider
8
8
  from credsweeper.file_handler.string_content_provider import StringContentProvider
@@ -4,11 +4,11 @@ from abc import ABC
4
4
  from typing import List, Optional
5
5
  from zipfile import ZipFile
6
6
 
7
- from credsweeper.credentials import Candidate
7
+ from credsweeper.credentials.candidate import Candidate
8
8
  from credsweeper.deep_scanner.abstract_scanner import AbstractScanner
9
9
  from credsweeper.file_handler.data_content_provider import DataContentProvider
10
10
  from credsweeper.file_handler.file_path_extractor import FilePathExtractor
11
- from credsweeper.utils import Util
11
+ from credsweeper.utils.util import Util
12
12
 
13
13
  logger = logging.getLogger(__name__)
14
14
 
@@ -1,15 +0,0 @@
1
- from credsweeper.file_handler.byte_content_provider import ByteContentProvider
2
- from credsweeper.file_handler.content_provider import ContentProvider
3
- from credsweeper.file_handler.data_content_provider import DataContentProvider
4
- from credsweeper.file_handler.diff_content_provider import DiffContentProvider
5
- from credsweeper.file_handler.string_content_provider import StringContentProvider
6
- from credsweeper.file_handler.text_content_provider import TextContentProvider
7
-
8
- __all__ = [
9
- 'ByteContentProvider', #
10
- 'ContentProvider', #
11
- 'DataContentProvider', #
12
- 'DiffContentProvider', #
13
- 'StringContentProvider', #
14
- 'TextContentProvider', #
15
- ]
@@ -3,9 +3,8 @@ from abc import ABC, abstractmethod
3
3
  from pathlib import Path
4
4
  from typing import Union, Tuple, Sequence
5
5
 
6
- from credsweeper.config import Config
7
- from credsweeper.file_handler.diff_content_provider import DiffContentProvider
8
- from credsweeper.file_handler.text_content_provider import TextContentProvider
6
+ from credsweeper.config.config import Config
7
+ from credsweeper.file_handler.content_provider import ContentProvider
9
8
 
10
9
 
11
10
  class AbstractProvider(ABC):
@@ -31,7 +30,7 @@ class AbstractProvider(ABC):
31
30
  self.__paths = paths
32
31
 
33
32
  @abstractmethod
34
- def get_scannable_files(self, config: Config) -> Sequence[Union[DiffContentProvider, TextContentProvider]]:
33
+ def get_scannable_files(self, config: Config) -> Sequence[ContentProvider]:
35
34
  """Get list of file object for analysis based on attribute "paths".
36
35
 
37
36
  Args:
@@ -3,7 +3,7 @@ from typing import List, Optional, Generator
3
3
 
4
4
  from credsweeper.file_handler.analysis_target import AnalysisTarget
5
5
  from credsweeper.file_handler.content_provider import ContentProvider
6
- from credsweeper.utils import Util
6
+ from credsweeper.utils.util import Util
7
7
 
8
8
 
9
9
  class ByteContentProvider(ContentProvider):
@@ -6,7 +6,7 @@ from typing import List, Optional, Generator
6
6
  from credsweeper.common.constants import MAX_LINE_LENGTH
7
7
  from credsweeper.file_handler.analysis_target import AnalysisTarget
8
8
  from credsweeper.file_handler.descriptor import Descriptor
9
- from credsweeper.utils import Util
9
+ from credsweeper.utils.util import Util
10
10
 
11
11
  logger = logging.getLogger(__name__)
12
12
 
@@ -1,6 +1,5 @@
1
1
  import json
2
2
  import logging
3
- import string
4
3
  import warnings
5
4
  from functools import cached_property
6
5
  from typing import List, Optional, Any, Generator, Callable, Tuple
@@ -11,7 +10,7 @@ from bs4 import BeautifulSoup, Tag, XMLParsedAsHTMLWarning
11
10
  from credsweeper.common.constants import MIN_DATA_LEN
12
11
  from credsweeper.file_handler.analysis_target import AnalysisTarget
13
12
  from credsweeper.file_handler.content_provider import ContentProvider
14
- from credsweeper.utils import Util
13
+ from credsweeper.utils.util import Util
15
14
 
16
15
  warnings.filterwarnings("ignore", category=XMLParsedAsHTMLWarning, module='bs4')
17
16
  logger = logging.getLogger(__name__)
@@ -385,7 +384,7 @@ class DataContentProvider(ContentProvider):
385
384
  return False
386
385
  try:
387
386
  self.decoded = Util.decode_base64( #
388
- self.text.translate(str.maketrans('', '', string.whitespace)), #
387
+ text=Util.PEM_CLEANING_PATTERN.sub(r'', self.text).replace('\\', ''), #
389
388
  padding_safe=True, #
390
389
  urlsafe_detect=True) #
391
390
  except Exception as exc:
@@ -1,14 +1,34 @@
1
1
  import logging
2
+ from dataclasses import dataclass
2
3
  from functools import cached_property
3
- from typing import List, Tuple, Generator
4
+ from typing import List, Tuple, Generator, TypedDict, Optional, Union, Any, Dict
5
+
6
+ import whatthepatch
4
7
 
5
8
  from credsweeper.common.constants import DiffRowType
6
9
  from credsweeper.file_handler.analysis_target import AnalysisTarget
7
10
  from credsweeper.file_handler.content_provider import ContentProvider
8
- from credsweeper.utils import DiffRowData, Util, DiffDict
9
11
 
10
12
  logger = logging.getLogger(__name__)
11
13
 
14
+ DiffDict = TypedDict(
15
+ "DiffDict",
16
+ {
17
+ "old": Optional[int], #
18
+ "new": Optional[int], #
19
+ "line": Union[str, bytes], # bytes are possibly since whatthepatch v1.0.4
20
+ "hunk": Any # not used
21
+ })
22
+
23
+
24
+ @dataclass(frozen=True)
25
+ class DiffRowData:
26
+ """Class for keeping data of diff row."""
27
+
28
+ line_type: DiffRowType
29
+ line_numb: int
30
+ line: str
31
+
12
32
 
13
33
  class DiffContentProvider(ContentProvider):
14
34
  """Provide data from a single `.patch` file.
@@ -76,6 +96,116 @@ class DiffContentProvider(ContentProvider):
76
96
  all_lines.append(line_data.line)
77
97
  return change_numbs, all_lines
78
98
 
99
+ @staticmethod
100
+ def patch2files_diff(raw_patch: List[str], change_type: DiffRowType) -> Dict[str, List[DiffDict]]:
101
+ """Generate files changes from patch for added or deleted filepaths.
102
+
103
+ Args:
104
+ raw_patch: git patch file content
105
+ change_type: change type to select, DiffRowType.ADDED or DiffRowType.DELETED
106
+
107
+ Return:
108
+ return dict with ``{file paths: list of file row changes}``, where
109
+ elements of list of file row changes represented as::
110
+
111
+ {
112
+ "old": line number before diff,
113
+ "new": line number after diff,
114
+ "line": line text,
115
+ "hunk": diff hunk number
116
+ }
117
+
118
+ """
119
+ if not raw_patch:
120
+ return {}
121
+
122
+ added_files, deleted_files = {}, {}
123
+ try:
124
+ for patch in whatthepatch.parse_patch(raw_patch):
125
+ if patch.changes is None:
126
+ logger.warning(f"Patch '{str(patch.header)}' cannot be scanned")
127
+ continue
128
+ changes = []
129
+ for change in patch.changes:
130
+ change_dict = change._asdict()
131
+ changes.append(change_dict)
132
+
133
+ added_files[patch.header.new_path] = changes
134
+ deleted_files[patch.header.old_path] = changes
135
+ if change_type == DiffRowType.ADDED:
136
+ return added_files
137
+ elif change_type == DiffRowType.DELETED:
138
+ return deleted_files
139
+ else:
140
+ logger.error(f"Change type should be one of: '{DiffRowType.ADDED}', '{DiffRowType.DELETED}';"
141
+ f" but received {change_type}")
142
+ except Exception as exc:
143
+ logger.exception(exc)
144
+ return {}
145
+
146
+ @staticmethod
147
+ def preprocess_diff_rows(
148
+ added_line_number: Optional[int], #
149
+ deleted_line_number: Optional[int], #
150
+ line: str) -> List[DiffRowData]:
151
+ """Auxiliary function to extend diff changes.
152
+
153
+ Args:
154
+ added_line_number: number of added line or None
155
+ deleted_line_number: number of deleted line or None
156
+ line: the text line
157
+
158
+ Return:
159
+ diff rows data with as list of row change type, line number, row content
160
+
161
+ """
162
+ rows_data: List[DiffRowData] = []
163
+ if isinstance(added_line_number, int):
164
+ # indicates line was inserted
165
+ rows_data.append(DiffRowData(DiffRowType.ADDED, added_line_number, line))
166
+ if isinstance(deleted_line_number, int):
167
+ # indicates line was removed
168
+ rows_data.append(DiffRowData(DiffRowType.DELETED, deleted_line_number, line))
169
+ return rows_data
170
+
171
+ @staticmethod
172
+ def wrong_change(change: DiffDict) -> bool:
173
+ """Returns True if the change is wrong"""
174
+ for i in ["line", "new", "old"]:
175
+ if i not in change:
176
+ logger.error(f"Skipping wrong change {change}")
177
+ return True
178
+ return False
179
+
180
+ @staticmethod
181
+ def preprocess_file_diff(changes: List[DiffDict]) -> List[DiffRowData]:
182
+ """Generate changed file rows from diff data with changed lines (e.g. marked + or - in diff).
183
+
184
+ Args:
185
+ changes: git diff by file rows data
186
+
187
+ Return:
188
+ diff rows data with as list of row change type, line number, row content
189
+
190
+ """
191
+ if not changes:
192
+ return []
193
+
194
+ rows_data = []
195
+ # process diff to restore lines and their positions
196
+ for change in changes:
197
+ if DiffContentProvider.wrong_change(change):
198
+ continue
199
+ line = change["line"]
200
+ if isinstance(line, str):
201
+ rows_data.extend(DiffContentProvider.preprocess_diff_rows(change.get("new"), change.get("old"), line))
202
+ elif isinstance(line, (bytes, bytearray)):
203
+ logger.warning("The feature is available with the deep scan option")
204
+ else:
205
+ logger.error(f"Unknown type of line {type(line)}")
206
+
207
+ return rows_data
208
+
79
209
  def yield_analysis_target(self, min_len: int) -> Generator[AnalysisTarget, None, None]:
80
210
  """Preprocess file diff data to scan.
81
211
 
@@ -86,6 +216,6 @@ class DiffContentProvider(ContentProvider):
86
216
  list of analysis targets of every row of file diff corresponding to change type "self.change_type"
87
217
 
88
218
  """
89
- lines_data = Util.preprocess_file_diff(self.__diff)
219
+ lines_data = DiffContentProvider.preprocess_file_diff(self.__diff)
90
220
  change_numbs, all_lines = self.parse_lines_data(self.__change_type, lines_data)
91
221
  return self.lines_to_targets(min_len, all_lines, change_numbs)
@@ -7,8 +7,8 @@ from typing import List, Dict, Union, Tuple
7
7
  from git import InvalidGitRepositoryError, NoSuchPathError, Repo
8
8
 
9
9
  from credsweeper.common.constants import MIN_DATA_LEN
10
- from credsweeper.config import Config
11
- from credsweeper.utils import Util
10
+ from credsweeper.config.config import Config
11
+ from credsweeper.utils.util import Util
12
12
 
13
13
  logger = logging.getLogger(__name__)
14
14
 
@@ -3,9 +3,9 @@ import logging
3
3
  from pathlib import Path
4
4
  from typing import List, Optional, Union, Tuple, Sequence
5
5
 
6
- from credsweeper import DiffContentProvider
7
- from credsweeper.config import Config
6
+ from credsweeper.config.config import Config
8
7
  from credsweeper.file_handler.abstract_provider import AbstractProvider
8
+ from credsweeper.file_handler.content_provider import ContentProvider
9
9
  from credsweeper.file_handler.file_path_extractor import FilePathExtractor
10
10
  from credsweeper.file_handler.text_content_provider import TextContentProvider
11
11
 
@@ -30,7 +30,7 @@ class FilesProvider(AbstractProvider):
30
30
  super().__init__(paths)
31
31
  self.skip_ignored = skip_ignored
32
32
 
33
- def get_scannable_files(self, config: Config) -> Sequence[Union[DiffContentProvider, TextContentProvider]]:
33
+ def get_scannable_files(self, config: Config) -> Sequence[ContentProvider]:
34
34
  """Get list of full text file object for analysis of files with parent paths from "paths".
35
35
 
36
36
  Args:
@@ -40,7 +40,7 @@ class FilesProvider(AbstractProvider):
40
40
  preprocessed file objects for analysis
41
41
 
42
42
  """
43
- text_content_provider_list: List[Union[DiffContentProvider, TextContentProvider]] = []
43
+ text_content_provider_list: List[ContentProvider] = []
44
44
  for path in self.paths:
45
45
  if isinstance(path, (str, Path)):
46
46
  new_files = FilePathExtractor.get_file_paths(config, path)
@@ -3,13 +3,13 @@ import logging
3
3
  from pathlib import Path
4
4
  from typing import List, Union, Tuple, Sequence
5
5
 
6
- from credsweeper import TextContentProvider
7
6
  from credsweeper.common.constants import DiffRowType
8
- from credsweeper.config import Config
7
+ from credsweeper.config.config import Config
9
8
  from credsweeper.file_handler.abstract_provider import AbstractProvider
9
+ from credsweeper.file_handler.content_provider import ContentProvider
10
10
  from credsweeper.file_handler.diff_content_provider import DiffContentProvider
11
11
  from credsweeper.file_handler.file_path_extractor import FilePathExtractor
12
- from credsweeper.utils import Util
12
+ from credsweeper.utils.util import Util
13
13
 
14
14
  logger = logging.getLogger(__name__)
15
15
 
@@ -42,22 +42,24 @@ class PatchesProvider(AbstractProvider):
42
42
  elif isinstance(file_path, io.BytesIO):
43
43
  the_patch = Util.decode_bytes(file_path.read())
44
44
  raw_patches.append(the_patch)
45
+ elif isinstance(file_path, tuple) and 1 < len(file_path) and isinstance(file_path[1], io.BytesIO):
46
+ the_patch = Util.decode_bytes(file_path[1].read())
47
+ raw_patches.append(the_patch)
45
48
  else:
46
49
  logger.error(f"Unknown path type: {file_path}")
47
50
 
48
51
  return raw_patches
49
52
 
50
- def get_files_sequence(self,
51
- raw_patches: List[List[str]]) -> Sequence[Union[DiffContentProvider, TextContentProvider]]:
53
+ def get_files_sequence(self, raw_patches: List[List[str]]) -> Sequence[ContentProvider]:
52
54
  """Returns sequence of files"""
53
- files: List[Union[DiffContentProvider, TextContentProvider]] = []
55
+ files: List[ContentProvider] = []
54
56
  for raw_patch in raw_patches:
55
- files_data = Util.patch2files_diff(raw_patch, self.change_type)
57
+ files_data = DiffContentProvider.patch2files_diff(raw_patch, self.change_type)
56
58
  for file_path, file_diff in files_data.items():
57
59
  files.append(DiffContentProvider(file_path=file_path, change_type=self.change_type, diff=file_diff))
58
60
  return files
59
61
 
60
- def get_scannable_files(self, config: Config) -> Sequence[Union[DiffContentProvider, TextContentProvider]]:
62
+ def get_scannable_files(self, config: Config) -> Sequence[ContentProvider]:
61
63
  """Get files to scan. Output based on the `paths` field.
62
64
 
63
65
  Args:
@@ -6,7 +6,7 @@ from typing import List, Optional, Union, Tuple, Generator
6
6
 
7
7
  from credsweeper.file_handler.analysis_target import AnalysisTarget
8
8
  from credsweeper.file_handler.content_provider import ContentProvider
9
- from credsweeper.utils import Util
9
+ from credsweeper.utils.util import Util
10
10
 
11
11
  logger = logging.getLogger(__name__)
12
12
 
@@ -1,5 +1,3 @@
1
- from credsweeper.filters.filter import Filter # isort:skip
2
-
3
1
  from credsweeper.filters.line_git_binary_check import LineGitBinaryCheck
4
2
  from credsweeper.filters.line_specific_key_check import LineSpecificKeyCheck
5
3
  from credsweeper.filters.line_uue_part_check import LineUUEPartCheck
@@ -12,6 +10,7 @@ from credsweeper.filters.value_base64_data_check import ValueBase64DataCheck
12
10
  from credsweeper.filters.value_base64_encoded_pem_check import ValueBase64EncodedPem
13
11
  from credsweeper.filters.value_base64_key_check import ValueBase64KeyCheck
14
12
  from credsweeper.filters.value_base64_part_check import ValueBase64PartCheck
13
+ from credsweeper.filters.value_basic_auth_check import ValueBasicAuthCheck
15
14
  from credsweeper.filters.value_blocklist_check import ValueBlocklistCheck
16
15
  from credsweeper.filters.value_camel_case_check import ValueCamelCaseCheck
17
16
  from credsweeper.filters.value_couple_keyword_check import ValueCoupleKeywordCheck
@@ -27,6 +26,7 @@ from credsweeper.filters.value_grafana_check import ValueGrafanaCheck
27
26
  from credsweeper.filters.value_grafana_service_check import ValueGrafanaServiceCheck
28
27
  from credsweeper.filters.value_hex_number_check import ValueHexNumberCheck
29
28
  from credsweeper.filters.value_jfrog_token_check import ValueJfrogTokenCheck
29
+ from credsweeper.filters.value_json_web_key_check import ValueJsonWebKeyCheck
30
30
  from credsweeper.filters.value_json_web_token_check import ValueJsonWebTokenCheck
31
31
  from credsweeper.filters.value_last_word_check import ValueLastWordCheck
32
32
  from credsweeper.filters.value_method_check import ValueMethodCheck
@@ -1,7 +1,7 @@
1
1
  from abc import abstractmethod, ABC
2
2
 
3
- from credsweeper.config import Config
4
- from credsweeper.credentials import LineData
3
+ from credsweeper.config.config import Config
4
+ from credsweeper.credentials.line_data import LineData
5
5
  from credsweeper.file_handler.analysis_target import AnalysisTarget
6
6
 
7
7
 
@@ -1,5 +1,3 @@
1
- from credsweeper.filters.group.group import Group # isort:skip
2
-
3
1
  from credsweeper.filters.group.general_keyword import GeneralKeyword
4
2
  from credsweeper.filters.group.general_pattern import GeneralPattern
5
3
  from credsweeper.filters.group.password_keyword import PasswordKeyword
@@ -1,7 +1,7 @@
1
1
  from credsweeper.common.constants import GroupType
2
- from credsweeper.config import Config
2
+ from credsweeper.config.config import Config
3
3
  from credsweeper.filters import ValueDictionaryKeywordCheck
4
- from credsweeper.filters.group import Group
4
+ from credsweeper.filters.group.group import Group
5
5
 
6
6
 
7
7
  class GeneralKeyword(Group):
@@ -1,6 +1,6 @@
1
1
  from credsweeper.common.constants import GroupType
2
- from credsweeper.config import Config
3
- from credsweeper.filters.group import Group
2
+ from credsweeper.config.config import Config
3
+ from credsweeper.filters.group.group import Group
4
4
 
5
5
 
6
6
  class GeneralPattern(Group):
@@ -2,11 +2,22 @@ from abc import ABC
2
2
  from typing import List
3
3
 
4
4
  from credsweeper.common.constants import GroupType
5
- from credsweeper.config import Config
6
- from credsweeper.filters import (Filter, LineSpecificKeyCheck, ValueAllowlistCheck, ValueArrayDictionaryCheck,
7
- ValueBlocklistCheck, ValueCamelCaseCheck, ValueFilePathCheck, ValueLastWordCheck,
8
- ValueMethodCheck, ValueNotAllowedPatternCheck, ValuePatternCheck, ValueSimilarityCheck,
9
- ValueStringTypeCheck, ValueTokenCheck, ValueHexNumberCheck)
5
+ from credsweeper.config.config import Config
6
+ from credsweeper.filters.filter import Filter
7
+ from credsweeper.filters.line_specific_key_check import LineSpecificKeyCheck
8
+ from credsweeper.filters.value_allowlist_check import ValueAllowlistCheck
9
+ from credsweeper.filters.value_array_dictionary_check import ValueArrayDictionaryCheck
10
+ from credsweeper.filters.value_blocklist_check import ValueBlocklistCheck
11
+ from credsweeper.filters.value_camel_case_check import ValueCamelCaseCheck
12
+ from credsweeper.filters.value_file_path_check import ValueFilePathCheck
13
+ from credsweeper.filters.value_hex_number_check import ValueHexNumberCheck
14
+ from credsweeper.filters.value_last_word_check import ValueLastWordCheck
15
+ from credsweeper.filters.value_method_check import ValueMethodCheck
16
+ from credsweeper.filters.value_not_allowed_pattern_check import ValueNotAllowedPatternCheck
17
+ from credsweeper.filters.value_pattern_check import ValuePatternCheck
18
+ from credsweeper.filters.value_similarity_check import ValueSimilarityCheck
19
+ from credsweeper.filters.value_string_type_check import ValueStringTypeCheck
20
+ from credsweeper.filters.value_token_check import ValueTokenCheck
10
21
 
11
22
 
12
23
  class Group(ABC):
@@ -1,8 +1,8 @@
1
1
  from credsweeper.common.constants import GroupType
2
- from credsweeper.config import Config
2
+ from credsweeper.config.config import Config
3
3
  from credsweeper.filters import ValueDictionaryValueLengthCheck, LineGitBinaryCheck
4
4
  from credsweeper.filters import ValueSplitKeywordCheck
5
- from credsweeper.filters.group import Group
5
+ from credsweeper.filters.group.group import Group
6
6
  from credsweeper.filters.line_uue_part_check import LineUUEPartCheck
7
7
 
8
8
 
@@ -1,7 +1,7 @@
1
1
  from credsweeper.common.constants import GroupType
2
- from credsweeper.config import Config
2
+ from credsweeper.config.config import Config
3
3
  from credsweeper.filters import ValueCoupleKeywordCheck, ValueCamelCaseCheck, ValueNumberCheck, ValuePatternCheck
4
- from credsweeper.filters.group import Group
4
+ from credsweeper.filters.group.group import Group
5
5
 
6
6
 
7
7
  class TokenPattern(Group):