credsweeper 1.13.1__py3-none-any.whl → 1.13.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of credsweeper might be problematic. Click here for more details.

credsweeper/__init__.py CHANGED
@@ -24,4 +24,4 @@ __all__ = [
24
24
  "__version__"
25
25
  ]
26
26
 
27
- __version__ = "1.13.1"
27
+ __version__ = "1.13.2"
@@ -51,6 +51,7 @@ class AbstractScanner(ABC):
51
51
  @abstractmethod
52
52
  def get_deep_scanners(data: bytes, descriptor: Descriptor, depth: int) -> Tuple[List[Any], List[Any]]:
53
53
  """Returns possibly scan methods for the data depends on content and fallback scanners"""
54
+ raise NotImplementedError(__name__)
54
55
 
55
56
  # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
56
57
 
@@ -0,0 +1,71 @@
1
+ import csv
2
+ import io
3
+ import logging
4
+ from abc import ABC
5
+ from typing import List, Optional, Dict, Any
6
+
7
+ from credsweeper.common.constants import MAX_LINE_LENGTH
8
+ from credsweeper.credentials.candidate import Candidate
9
+ from credsweeper.deep_scanner.abstract_scanner import AbstractScanner
10
+ from credsweeper.file_handler.data_content_provider import DataContentProvider
11
+ from credsweeper.file_handler.struct_content_provider import StructContentProvider
12
+
13
+ logger = logging.getLogger(__name__)
14
+
15
+
16
+ class CsvScanner(AbstractScanner, ABC):
17
+ """Implements CSV scanning"""
18
+
19
+ sniffer = csv.Sniffer()
20
+ # do not use space as separator to avoid hallucinations
21
+ delimiters = ",;\t|\x1F"
22
+
23
+ @classmethod
24
+ def get_structure(cls, text: str) -> List[Dict[str, Any]]:
25
+ """Reads a text as CSV standard with guessed dialect"""
26
+ # windows style \r\n
27
+ first_line_end = text.find('\r', 0, MAX_LINE_LENGTH)
28
+ line_terminator = "\r\n"
29
+ if 0 > first_line_end:
30
+ # unix style \n
31
+ first_line_end = text.find('\n', 0, MAX_LINE_LENGTH)
32
+ line_terminator = "\n"
33
+ if 0 > first_line_end:
34
+ raise ValueError(f"No suitable line end found in {MAX_LINE_LENGTH} symbols")
35
+
36
+ first_line = text[:first_line_end]
37
+ dialect = cls.sniffer.sniff(first_line, delimiters=cls.delimiters)
38
+ rows = []
39
+ reader = csv.DictReader(io.StringIO(text),
40
+ delimiter=dialect.delimiter,
41
+ lineterminator=line_terminator,
42
+ strict=True)
43
+ # check the constant columns number for all rows
44
+ fields_number = sum(1 for x in reader.fieldnames if x is not None)
45
+ for row in reader:
46
+ if not isinstance(row, dict):
47
+ raise ValueError(f"ERROR: wrong row '{row}'")
48
+ if len(row) != fields_number or any(x is None for x in row.values()):
49
+ # None means no separator used
50
+ raise ValueError(f"Different columns number in row '{row}' - mismatch {fields_number}")
51
+ rows.append(row)
52
+ return rows
53
+
54
+ def data_scan(
55
+ self, #
56
+ data_provider: DataContentProvider, #
57
+ depth: int, #
58
+ recursive_limit_size: int) -> Optional[List[Candidate]]:
59
+ """Tries to scan each row as structure with column name in key"""
60
+ try:
61
+ if rows := self.get_structure(data_provider.text):
62
+ struct_content_provider = StructContentProvider(struct=rows,
63
+ file_path=data_provider.file_path,
64
+ file_type=data_provider.file_type,
65
+ info=f"{data_provider.info}|CSV")
66
+ new_limit = recursive_limit_size - sum(len(x) for x in rows)
67
+ struct_candidates = self.structure_scan(struct_content_provider, depth, new_limit)
68
+ return struct_candidates
69
+ except Exception as csv_exc:
70
+ logger.debug(f"{data_provider.file_path}:{csv_exc}")
71
+ return None
@@ -6,6 +6,7 @@ from credsweeper.scanner.scanner import Scanner
6
6
  from credsweeper.utils.util import Util
7
7
  from .byte_scanner import ByteScanner
8
8
  from .bzip2_scanner import Bzip2Scanner
9
+ from .csv_scanner import CsvScanner
9
10
  from .deb_scanner import DebScanner
10
11
  from .docx_scanner import DocxScanner
11
12
  from .eml_scanner import EmlScanner
@@ -39,6 +40,7 @@ class DeepScanner(
39
40
  ByteScanner, #
40
41
  Bzip2Scanner, #
41
42
  DocxScanner, #
43
+ CsvScanner, #
42
44
  EncoderScanner, #
43
45
  GzipScanner, #
44
46
  HtmlScanner, #
@@ -160,16 +162,18 @@ class DeepScanner(
160
162
  deep_scanners.append(EmlScanner)
161
163
  else:
162
164
  if 0 < depth:
163
- # formal patch looks like an eml
165
+ # a formal patch looks like an eml
164
166
  deep_scanners.append(PatchScanner)
165
167
  fallback_scanners.append(EmlScanner)
166
168
  fallback_scanners.append(ByteScanner)
167
169
  elif not Util.is_binary(data):
170
+ # keep ByteScanner first to apply real value position if possible
171
+ deep_scanners.append(ByteScanner)
168
172
  if 0 < depth:
169
173
  deep_scanners.append(PatchScanner)
170
174
  deep_scanners.append(EncoderScanner)
171
175
  deep_scanners.append(LangScanner)
172
- deep_scanners.append(ByteScanner)
176
+ deep_scanners.append(CsvScanner)
173
177
  else:
174
178
  if 0 < depth:
175
179
  deep_scanners.append(StringsScanner)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: credsweeper
3
- Version: 1.13.1
3
+ Version: 1.13.2
4
4
  Summary: Credential Sweeper
5
5
  Project-URL: Homepage, https://github.com/Samsung/CredSweeper
6
6
  Project-URL: Bug Tracker, https://github.com/Samsung/CredSweeper/issues
@@ -1,4 +1,4 @@
1
- credsweeper/__init__.py,sha256=yLDEjKoQ6mpJua6hiONEwDSmOcatmytLF1x3VW2ElxE,992
1
+ credsweeper/__init__.py,sha256=3B7CNbsl5AZXQ1OcJTQF-1OaFkOV-m5TBbnHJucu8EQ,992
2
2
  credsweeper/__main__.py,sha256=cxbrvejofMAhaiOi9fO0qEKVME_HLGTeZAVmcppFsW8,22591
3
3
  credsweeper/app.py,sha256=U6V-LQ7OiqNjtP45ih-0HwJqTz5dpgkrG3sRCoWU21A,21001
4
4
  credsweeper/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -18,11 +18,12 @@ credsweeper/credentials/candidate_key.py,sha256=NsYGPqqjfm6z2MGtcQbiUGiWmKUCuzfl
18
18
  credsweeper/credentials/credential_manager.py,sha256=Au5oCtz0HY33e83P5EN5v6LQMLYIbtZGvaClOc-uKZ0,4197
19
19
  credsweeper/credentials/line_data.py,sha256=3Y9cuUGt9equShHlFGfumZG_S5_EHDFplBESYD2IFqs,22525
20
20
  credsweeper/deep_scanner/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
21
- credsweeper/deep_scanner/abstract_scanner.py,sha256=Dnfb4JpkOP1eF4MTxcGAXZ4E4H8T-KpFybYVqhjyGrE,15819
21
+ credsweeper/deep_scanner/abstract_scanner.py,sha256=AVeTfIa602bgvL5ao4ck1ucgSLJh4rK-TI9birvjpTM,15863
22
22
  credsweeper/deep_scanner/byte_scanner.py,sha256=U1XTqFmfJ71GQs16n1KZ-Grw2pJ3_K7ozHWDzI_vCbo,1135
23
23
  credsweeper/deep_scanner/bzip2_scanner.py,sha256=-Ops6s3MqurtPdrl56qgqYCHLMyWeP-TZ477FcpKMaA,1634
24
+ credsweeper/deep_scanner/csv_scanner.py,sha256=iI8DalkVsx_Fl-zXd5bSGS_Q2aOqu7gvoQxnR4qeTbA,3187
24
25
  credsweeper/deep_scanner/deb_scanner.py,sha256=cu2jCMUdctn3ezoOo7kHAC-85NCaY9m_b_rI3aNMmuQ,2495
25
- credsweeper/deep_scanner/deep_scanner.py,sha256=ZZuNs4mOITR00gYD8aQ1yn6QhvkssaVDWF5MKJHyvJA,6502
26
+ credsweeper/deep_scanner/deep_scanner.py,sha256=NI5E3GhogLGnNOWYejchdpdK_yyB9HvlK0IE9nvkHZ0,6686
26
27
  credsweeper/deep_scanner/docx_scanner.py,sha256=yL4IvSbiU9AkGHXUHYNV9rOlzfgPJ-dtMn7VxDg6pkw,4163
27
28
  credsweeper/deep_scanner/eml_scanner.py,sha256=zZSFRSX-5h5iMwofGNRYRjRxr_RdwnQyjP1D6ULz2P8,3522
28
29
  credsweeper/deep_scanner/encoder_scanner.py,sha256=3bip1bV7dW9BKeG7aIwWWyoi3whVuXxose5eLk3oYlA,1325
@@ -156,8 +157,8 @@ credsweeper/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU
156
157
  credsweeper/utils/hop_stat.py,sha256=vMd_1lcpDo4yaFhi61X0tJeeE83qUbzPckvxZcrgsgs,3010
157
158
  credsweeper/utils/pem_key_detector.py,sha256=5BwapI5Ub0DyK_X1v8rcTLe6U4uDjFjgWgitxBpHx9E,7588
158
159
  credsweeper/utils/util.py,sha256=EihI-BmLH6_qvjuIle32JQlz7L9lZPSFjs7NmRYZago,28117
159
- credsweeper-1.13.1.dist-info/METADATA,sha256=0IWfuJdX1FJdvsvOtz2Wu7-Lv2hg1X5RKTB6iROd-_U,10392
160
- credsweeper-1.13.1.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
161
- credsweeper-1.13.1.dist-info/entry_points.txt,sha256=SLGNZshvi3zpWPhVmRP-oDXRMRPBS4tzRDy6xYOXwqA,58
162
- credsweeper-1.13.1.dist-info/licenses/LICENSE,sha256=aU7mGjBKbmRHNLVXXzcPdKmTtBxRwDPtjflQRfN7fFg,1065
163
- credsweeper-1.13.1.dist-info/RECORD,,
160
+ credsweeper-1.13.2.dist-info/METADATA,sha256=SAkuZqAvcQ0C8xArhVDsAC905xfz2mwIU5RSp0mYA5Q,10392
161
+ credsweeper-1.13.2.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
162
+ credsweeper-1.13.2.dist-info/entry_points.txt,sha256=SLGNZshvi3zpWPhVmRP-oDXRMRPBS4tzRDy6xYOXwqA,58
163
+ credsweeper-1.13.2.dist-info/licenses/LICENSE,sha256=aU7mGjBKbmRHNLVXXzcPdKmTtBxRwDPtjflQRfN7fFg,1065
164
+ credsweeper-1.13.2.dist-info/RECORD,,