credsweeper 1.11.2__py3-none-any.whl → 1.11.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of credsweeper might be problematic. Click here for more details.

Files changed (55) hide show
  1. credsweeper/__init__.py +1 -1
  2. credsweeper/__main__.py +6 -4
  3. credsweeper/app.py +7 -3
  4. credsweeper/common/keyword_pattern.py +15 -9
  5. credsweeper/common/morpheme_checklist.txt +4 -2
  6. credsweeper/credentials/line_data.py +14 -10
  7. credsweeper/deep_scanner/abstract_scanner.py +10 -1
  8. credsweeper/deep_scanner/deep_scanner.py +19 -8
  9. credsweeper/deep_scanner/docx_scanner.py +1 -1
  10. credsweeper/deep_scanner/encoder_scanner.py +2 -2
  11. credsweeper/deep_scanner/html_scanner.py +3 -3
  12. credsweeper/deep_scanner/jks_scanner.py +2 -4
  13. credsweeper/deep_scanner/lang_scanner.py +2 -2
  14. credsweeper/deep_scanner/lzma_scanner.py +40 -0
  15. credsweeper/deep_scanner/pkcs12_scanner.py +3 -5
  16. credsweeper/deep_scanner/xml_scanner.py +2 -2
  17. credsweeper/file_handler/data_content_provider.py +21 -12
  18. credsweeper/filters/value_array_dictionary_check.py +3 -1
  19. credsweeper/filters/value_azure_token_check.py +1 -2
  20. credsweeper/filters/value_base64_part_check.py +30 -21
  21. credsweeper/filters/value_discord_bot_check.py +1 -2
  22. credsweeper/filters/value_entropy_base32_check.py +11 -31
  23. credsweeper/filters/value_entropy_base36_check.py +11 -34
  24. credsweeper/filters/value_entropy_base64_check.py +19 -48
  25. credsweeper/filters/value_entropy_base_check.py +37 -0
  26. credsweeper/filters/value_file_path_check.py +1 -1
  27. credsweeper/filters/value_hex_number_check.py +3 -3
  28. credsweeper/filters/value_json_web_token_check.py +4 -5
  29. credsweeper/filters/value_string_type_check.py +11 -3
  30. credsweeper/filters/value_token_base32_check.py +0 -4
  31. credsweeper/filters/value_token_base36_check.py +0 -4
  32. credsweeper/filters/value_token_base64_check.py +0 -4
  33. credsweeper/filters/value_token_check.py +1 -1
  34. credsweeper/ml_model/features/file_extension.py +1 -1
  35. credsweeper/ml_model/features/morpheme_dense.py +0 -4
  36. credsweeper/ml_model/features/rule_name.py +1 -1
  37. credsweeper/ml_model/features/word_in_path.py +0 -9
  38. credsweeper/ml_model/features/word_in_postamble.py +0 -11
  39. credsweeper/ml_model/features/word_in_preamble.py +0 -11
  40. credsweeper/ml_model/features/word_in_transition.py +0 -11
  41. credsweeper/ml_model/features/word_in_value.py +0 -11
  42. credsweeper/ml_model/features/word_in_variable.py +0 -11
  43. credsweeper/ml_model/ml_validator.py +4 -3
  44. credsweeper/rules/config.yaml +238 -208
  45. credsweeper/scanner/scan_type/scan_type.py +2 -3
  46. credsweeper/scanner/scanner.py +7 -1
  47. credsweeper/secret/config.json +16 -5
  48. credsweeper/utils/pem_key_detector.py +4 -5
  49. credsweeper/utils/util.py +67 -144
  50. {credsweeper-1.11.2.dist-info → credsweeper-1.11.3.dist-info}/METADATA +1 -1
  51. {credsweeper-1.11.2.dist-info → credsweeper-1.11.3.dist-info}/RECORD +54 -53
  52. credsweeper/utils/entropy_validator.py +0 -72
  53. {credsweeper-1.11.2.dist-info → credsweeper-1.11.3.dist-info}/WHEEL +0 -0
  54. {credsweeper-1.11.2.dist-info → credsweeper-1.11.3.dist-info}/entry_points.txt +0 -0
  55. {credsweeper-1.11.2.dist-info → credsweeper-1.11.3.dist-info}/licenses/LICENSE +0 -0
@@ -38,13 +38,12 @@ class ScanType(ABC):
38
38
  raise NotImplementedError()
39
39
 
40
40
  @classmethod
41
- def filtering(cls, config: Config, target: AnalysisTarget, line_data: LineData, filters: List[Filter]) -> bool:
41
+ def filtering(cls, target: AnalysisTarget, line_data: LineData, filters: List[Filter]) -> bool:
42
42
  """Check if line data should be removed based on filters.
43
43
 
44
44
  If `use_filters` option is false, always return False
45
45
 
46
46
  Args:
47
- config: dict of credsweeper configuration
48
47
  target: AnalysisTarget from which `line_data` was obtained
49
48
  line_data: Line data to check with `filters`
50
49
  filters: Filters to use
@@ -112,7 +111,7 @@ class ScanType(ABC):
112
111
  bypass_start = line_data.value_end
113
112
  bypass_end = offset_end
114
113
 
115
- if config.use_filters and cls.filtering(config, target, line_data, filters):
114
+ if config.use_filters and cls.filtering(target, line_data, filters):
116
115
  if line_data.variable and 0 <= line_data.variable_start < line_data.variable_end:
117
116
  # may be next matched item will be not filtered - let search it after variable
118
117
  bypass_start = line_data.variable_end
@@ -146,7 +146,13 @@ class Scanner:
146
146
  # "cache" - YAPF and pycharm formatters ...
147
147
  matched_keyword = \
148
148
  target_line_stripped_len >= self.min_keyword_len and ( #
149
- '=' in target_line_stripped or ':' in target_line_stripped) #
149
+ '=' in target_line_stripped
150
+ or ':' in target_line_stripped
151
+ or "set" in target_line_stripped
152
+ or "#define" in target_line_stripped
153
+ or "%define" in target_line_stripped
154
+ or "%global" in target_line_stripped
155
+ ) #
150
156
  matched_pem_key = \
151
157
  target_line_stripped_len >= self.min_pem_key_len \
152
158
  and PEM_BEGIN_PATTERN in target_line_stripped and "PRIVATE" in target_line_stripped
@@ -2,10 +2,13 @@
2
2
  "exclude": {
3
3
  "pattern": [],
4
4
  "containers": [
5
+ ".aar",
5
6
  ".apk",
6
7
  ".bz2",
7
8
  ".gz",
9
+ ".lzma",
8
10
  ".tar",
11
+ ".xz",
9
12
  ".zip"
10
13
  ],
11
14
  "documents": [
@@ -20,17 +23,20 @@
20
23
  ],
21
24
  "extension": [
22
25
  ".7z",
26
+ ".a",
23
27
  ".aac",
24
- ".aar",
25
28
  ".avi",
29
+ ".bin",
26
30
  ".bmp",
27
31
  ".class",
28
32
  ".css",
29
33
  ".dmg",
30
34
  ".ear",
31
35
  ".eot",
36
+ ".elf",
32
37
  ".exe",
33
38
  ".gif",
39
+ ".gmo",
34
40
  ".ico",
35
41
  ".img",
36
42
  ".info",
@@ -45,6 +51,7 @@
45
51
  ".mp4",
46
52
  ".npy",
47
53
  ".npz",
54
+ ".obj",
48
55
  ".ogg",
49
56
  ".pak",
50
57
  ".png",
@@ -52,10 +59,13 @@
52
59
  ".pyc",
53
60
  ".pyd",
54
61
  ".pyo",
62
+ ".rar",
55
63
  ".rc",
56
64
  ".rc2",
57
65
  ".rar",
58
66
  ".realm",
67
+ ".res",
68
+ ".rpm",
59
69
  ".s7z",
60
70
  ".scss",
61
71
  ".so",
@@ -70,6 +80,7 @@
70
80
  ".wav",
71
81
  ".webm",
72
82
  ".webp",
83
+ ".wma",
73
84
  ".woff",
74
85
  ".yuv"
75
86
  ],
@@ -160,13 +171,13 @@
160
171
  "line_num",
161
172
  "path",
162
173
  "info",
163
- "value",
164
- "value_start",
165
- "value_end",
166
174
  "variable",
167
175
  "variable_start",
168
176
  "variable_end",
169
- "entropy_validation"
177
+ "value",
178
+ "value_start",
179
+ "value_end",
180
+ "entropy"
170
181
  ],
171
182
  "candidate_output": [
172
183
  "rule",
@@ -4,12 +4,11 @@ import re
4
4
  import string
5
5
  from typing import List
6
6
 
7
- from credsweeper.common.constants import PEM_BEGIN_PATTERN, PEM_END_PATTERN, Chars
7
+ from credsweeper.common.constants import PEM_BEGIN_PATTERN, PEM_END_PATTERN, ENTROPY_LIMIT_BASE64
8
8
  from credsweeper.config import Config
9
9
  from credsweeper.credentials import LineData
10
10
  from credsweeper.file_handler.analysis_target import AnalysisTarget
11
11
  from credsweeper.utils import Util
12
- from credsweeper.utils.entropy_validator import EntropyValidator
13
12
 
14
13
  logger = logging.getLogger(__name__)
15
14
 
@@ -68,10 +67,10 @@ class PemKeyDetector:
68
67
  elif PEM_END_PATTERN in subline:
69
68
  if "PGP" in target.line_strip:
70
69
  # Check if entropy is high enough for base64 set with padding sign
71
- entropy_validator = EntropyValidator(key_data, Chars.BASE64STDPAD_CHARS)
72
- if entropy_validator.valid:
70
+ entropy = Util.get_shannon_entropy(key_data)
71
+ if ENTROPY_LIMIT_BASE64 <= entropy:
73
72
  return line_data
74
- logger.debug("Filtered with entropy %f '%s'", entropy_validator.entropy, key_data)
73
+ logger.debug("Filtered with entropy %f '%s'", entropy, key_data)
75
74
  if "OPENSSH" in target.line_strip:
76
75
  # Check whether the key is encrypted
77
76
  with contextlib.suppress(Exception):
credsweeper/utils/util.py CHANGED
@@ -12,13 +12,14 @@ from dataclasses import dataclass
12
12
  from pathlib import Path
13
13
  from typing import Any, Dict, List, Tuple, Optional, Union
14
14
 
15
+ import numpy as np
15
16
  import whatthepatch
16
17
  import yaml
17
18
  from lxml import etree
18
19
  from typing_extensions import TypedDict
19
20
 
20
21
  from credsweeper.common.constants import DiffRowType, AVAILABLE_ENCODINGS, \
21
- DEFAULT_ENCODING, LATIN_1, CHUNK_SIZE, MAX_LINE_LENGTH, CHUNK_STEP_SIZE
22
+ DEFAULT_ENCODING, LATIN_1, CHUNK_SIZE, MAX_LINE_LENGTH, CHUNK_STEP_SIZE, ASCII
22
23
 
23
24
  logger = logging.getLogger(__name__)
24
25
 
@@ -65,21 +66,17 @@ class Util:
65
66
  return result
66
67
 
67
68
  @staticmethod
68
- def get_shannon_entropy(data: str, iterator: str) -> float:
69
+ def get_shannon_entropy(data: Union[str, bytes]) -> float:
69
70
  """Borrowed from http://blog.dkbza.org/2007/05/scanning-data-for-entropy-anomalies.html."""
70
71
  if not data:
71
- return 0
72
-
73
- entropy = 0.
74
- data_len = float(len(data))
75
- for x in iterator:
76
- p_x = data.count(x) / data_len
77
- if p_x > 0:
78
- entropy += -p_x * math.log(p_x, 2)
79
-
72
+ return 0.
73
+ size = len(data)
74
+ _uniq, counts = np.unique(list(data), return_counts=True)
75
+ probabilities = counts / size
76
+ entropy = float(-np.sum(probabilities * np.log2(probabilities)))
80
77
  return entropy
81
78
 
82
- """Precalculated data for speedup"""
79
+ # Precalculated data for speedup
83
80
  MIN_DATA_ENTROPY: Dict[int, float] = {
84
81
  16: 1.66973671780348,
85
82
  20: 2.07723544540831,
@@ -153,41 +150,39 @@ class Util:
153
150
  return entropy < min_entropy
154
151
 
155
152
  @staticmethod
156
- def is_known(data: bytes) -> bool:
157
- """
158
- Returns true if any recognized binary format found
159
- """
160
- if Util.is_zip(data) \
161
- or Util.is_gzip(data) \
162
- or Util.is_tar(data) \
163
- or Util.is_bzip2(data) \
164
- or Util.is_com(data) \
165
- or Util.is_pdf(data) \
166
- or Util.is_elf(data):
167
- return True
153
+ def is_known(data: Union[bytes, bytearray]) -> bool:
154
+ """Returns True if any known binary format is found to prevent extra scan a file without an extension."""
155
+ if isinstance(data, (bytes, bytearray)):
156
+ if 127 <= len(data) and data.startswith(b"\x7f\x45\x4c\x46"):
157
+ # https://en.wikipedia.org/wiki/Executable_and_Linkable_Format
158
+ # minimal ELF is 127 bytes https://github.com/tchajed/minimal-elf
159
+ return True
168
160
  return False
169
161
 
170
162
  @staticmethod
171
- def is_binary(data: bytes) -> bool:
163
+ def is_binary(data: Union[bytes, bytearray]) -> bool:
172
164
  """
173
- Returns True when two zeroes sequence is found which never exists in text format (UTF-8, UTF-16)
174
- UTF-32 is not supported
165
+ Returns True when two zeroes sequence is found in begin of data.
166
+ The sequence never exists in text format (UTF-8, UTF-16). UTF-32 is not supported.
175
167
  """
176
168
  if 0 <= data.find(b"\0\0", 0, MAX_LINE_LENGTH):
177
169
  return True
178
- non_ascii_cnt = 0
179
- for n in range(min([len(data), MAX_LINE_LENGTH])):
180
- i = data[n]
181
- if 0x20 > i and i not in (0x09, 0x0A, 0x0D) or 0x7E < i < 0xA0:
182
- # less than space and not tab, line feed, line end
183
- non_ascii_cnt += 1
170
+ else:
171
+ return False
172
+
173
+ NOT_LATIN1_PRINTABLE_SET = (set(range(0,
174
+ 256)).difference(set(x for x in string.printable.encode(ASCII))).difference(
175
+ set(x for x in range(0xA0, 0x100))))
176
+
177
+ @staticmethod
178
+ def is_latin1(data: Union[bytes, bytearray]) -> bool:
179
+ """Returns True when data looks like LATIN-1 for first MAX_LINE_LENGTH bytes."""
180
+ result = False
184
181
  if data:
182
+ non_latin1_cnt = sum(1 for x in data[:MAX_LINE_LENGTH] if x in Util.NOT_LATIN1_PRINTABLE_SET)
185
183
  # experiment for 255217 binary files shown avg = 0.268264 ± 0.168767, so let choose minimal
186
- chunk_len = float(MAX_LINE_LENGTH if MAX_LINE_LENGTH < len(data) else len(data))
187
- result = 0.1 < non_ascii_cnt / chunk_len
188
- else:
189
- # empty data case
190
- result = False
184
+ chunk_len = min(MAX_LINE_LENGTH, len(data))
185
+ result = 0.1 > non_latin1_cnt / chunk_len
191
186
  return result
192
187
 
193
188
  @staticmethod
@@ -231,7 +226,7 @@ class Util:
231
226
  encodings = AVAILABLE_ENCODINGS
232
227
  for encoding in encodings:
233
228
  try:
234
- if binary_suggest and LATIN_1 == encoding and (Util.is_known(content) or Util.is_binary(content)):
229
+ if binary_suggest and LATIN_1 == encoding and (Util.is_binary(content) or not Util.is_latin1(content)):
235
230
  # LATIN_1 may convert data (bytes in range 0x80:0xFF are transformed)
236
231
  # so skip this encoding when checking binaries
237
232
  logger.warning("Binary file detected")
@@ -374,7 +369,7 @@ class Util:
374
369
  line = change["line"]
375
370
  if isinstance(line, str):
376
371
  rows_data.extend(Util.preprocess_diff_rows(change.get("new"), change.get("old"), line))
377
- elif isinstance(line, bytes):
372
+ elif isinstance(line, (bytes, bytearray)):
378
373
  logger.warning("The feature is available with the deep scan option")
379
374
  else:
380
375
  logger.error(f"Unknown type of line {type(line)}")
@@ -382,9 +377,9 @@ class Util:
382
377
  return rows_data
383
378
 
384
379
  @staticmethod
385
- def is_zip(data: bytes) -> bool:
380
+ def is_zip(data: Union[bytes, bytearray]) -> bool:
386
381
  """According https://en.wikipedia.org/wiki/List_of_file_signatures"""
387
- if isinstance(data, bytes) and 3 < len(data):
382
+ if isinstance(data, (bytes, bytearray)) and 3 < len(data):
388
383
  # PK
389
384
  if data.startswith(b"PK"):
390
385
  if 0x03 == data[2] and 0x04 == data[3]:
@@ -398,18 +393,18 @@ class Util:
398
393
  return False
399
394
 
400
395
  @staticmethod
401
- def is_com(data: bytes) -> bool:
396
+ def is_com(data: Union[bytes, bytearray]) -> bool:
402
397
  """According https://en.wikipedia.org/wiki/List_of_file_signatures"""
403
- if isinstance(data, bytes) and 8 < len(data):
398
+ if isinstance(data, (bytes, bytearray)) and 8 < len(data):
404
399
  if data.startswith(b"\xD0\xCF\x11\xE0\xA1\xB1\x1A\xE1"):
405
400
  # Compound File Binary Format: doc, xls, ppt, msi, msg
406
401
  return True
407
402
  return False
408
403
 
409
404
  @staticmethod
410
- def is_tar(data: bytes) -> bool:
405
+ def is_tar(data: Union[bytes, bytearray]) -> bool:
411
406
  """According https://en.wikipedia.org/wiki/List_of_file_signatures"""
412
- if isinstance(data, bytes) and 512 <= len(data):
407
+ if isinstance(data, (bytes, bytearray)) and 512 <= len(data):
413
408
  if 0x75 == data[257] and 0x73 == data[258] and 0x74 == data[259] \
414
409
  and 0x61 == data[260] and 0x72 == data[261] and (
415
410
  0x00 == data[262] and 0x30 == data[263] and 0x30 == data[264]
@@ -425,9 +420,9 @@ class Util:
425
420
  return False
426
421
 
427
422
  @staticmethod
428
- def is_bzip2(data: bytes) -> bool:
423
+ def is_bzip2(data: Union[bytes, bytearray]) -> bool:
429
424
  """According https://en.wikipedia.org/wiki/Bzip2"""
430
- if isinstance(data, bytes) and 10 <= len(data):
425
+ if isinstance(data, (bytes, bytearray)) and 10 <= len(data):
431
426
  if data.startswith(b"\x42\x5A\x68") \
432
427
  and 0x31 <= data[3] <= 0x39 \
433
428
  and 0x31 == data[4] and 0x41 == data[5] and 0x59 == data[6] \
@@ -436,34 +431,41 @@ class Util:
436
431
  return False
437
432
 
438
433
  @staticmethod
439
- def is_gzip(data: bytes) -> bool:
434
+ def is_gzip(data: Union[bytes, bytearray]) -> bool:
440
435
  """According https://www.rfc-editor.org/rfc/rfc1952"""
441
- if isinstance(data, bytes) and 3 <= len(data):
436
+ if isinstance(data, (bytes, bytearray)) and 3 <= len(data):
442
437
  if data.startswith(b"\x1F\x8B\x08"):
443
438
  return True
444
439
  return False
445
440
 
446
441
  @staticmethod
447
- def is_pdf(data: bytes) -> bool:
442
+ def is_pdf(data: Union[bytes, bytearray]) -> bool:
448
443
  """According https://en.wikipedia.org/wiki/List_of_file_signatures - pdf"""
449
- if isinstance(data, bytes) and 5 <= len(data):
444
+ if isinstance(data, (bytes, bytearray)) and 5 <= len(data):
450
445
  if data.startswith(b"\x25\x50\x44\x46\x2D"):
451
446
  return True
452
447
  return False
453
448
 
454
449
  @staticmethod
455
- def is_jks(data: bytes) -> bool:
450
+ def is_jks(data: Union[bytes, bytearray]) -> bool:
456
451
  """According https://en.wikipedia.org/wiki/List_of_file_signatures - jks"""
457
- if isinstance(data, bytes) and 4 <= len(data):
452
+ if isinstance(data, (bytes, bytearray)) and 4 <= len(data):
458
453
  if data.startswith(b"\xFE\xED\xFE\xED"):
459
454
  return True
460
455
  return False
461
456
 
462
457
  @staticmethod
463
- def is_asn1(data: bytes) -> bool:
458
+ def is_lzma(data: Union[bytes, bytearray]) -> bool:
459
+ """According https://en.wikipedia.org/wiki/List_of_file_signatures - lzma also xz"""
460
+ if isinstance(data, (bytes, bytearray)) and 6 <= len(data):
461
+ if data.startswith(b"\xFD\x37\x7A\x58\x5A\x00") or data.startswith(b"\x5D\x00\x00"):
462
+ return True
463
+ return False
464
+
465
+ @staticmethod
466
+ def is_asn1(data: Union[bytes, bytearray]) -> bool:
464
467
  """Only sequence type 0x30 and size correctness is checked"""
465
- data_length = len(data)
466
- if isinstance(data, bytes) and 4 <= data_length:
468
+ if isinstance(data, (bytes, bytearray)) and 4 <= len(data):
467
469
  # sequence
468
470
  if 0x30 == data[0]:
469
471
  # https://www.oss.com/asn1/resources/asn1-made-simple/asn1-quick-reference/basic-encoding-rules.html#Lengths
@@ -471,7 +473,7 @@ class Util:
471
473
  byte_len = (0x7F & length)
472
474
  if 0x80 == length and data.endswith(b"\x00\x00"):
473
475
  return True
474
- elif 0x80 < length and 1 < byte_len < data_length: # additional check
476
+ elif 0x80 < length and 1 < byte_len < len(data): # additional check
475
477
  len_bytes = data[2:2 + byte_len]
476
478
  try:
477
479
  long_size = struct.unpack(">h", len_bytes)
@@ -482,26 +484,17 @@ class Util:
482
484
  length = data[2]
483
485
  else:
484
486
  byte_len = 0
485
- return data_length == length + 2 + byte_len
486
- return False
487
-
488
- @staticmethod
489
- def is_elf(data: Union[bytes, bytearray]) -> bool:
490
- """According to https://en.wikipedia.org/wiki/Executable_and_Linkable_Format use only 5 bytes"""
491
- if isinstance(data, (bytes, bytearray)) and 127 <= len(data):
492
- # minimal is 127 bytes https://github.com/tchajed/minimal-elf
493
- if data.startswith(b"\x7f\x45\x4c\x46") and (0x01 == data[5] or 0x02 == data[5]):
494
- return True
487
+ return len(data) == length + 2 + byte_len
495
488
  return False
496
489
 
497
490
  @staticmethod
498
491
  def is_html(data: Union[bytes, bytearray]) -> bool:
499
492
  """Used to detect html format. Suppose, invocation of is_xml() was True before."""
500
493
  if isinstance(data, (bytes, bytearray)):
501
- for opening_tag, closing_tag in [(b"<html>", b"</html>"), (b"<table", b"</table>"), (b"<p>", b"</p>"),
502
- (b"<span>", b"</span>"), (b"<div>", b"</div>"), (b"<li>", b"</li>"),
503
- (b"<ol>", b"</ol>"), (b"<ul>", b"</ul>"), (b"<th>", b"</th>"),
504
- (b"<tr>", b"</tr>"), (b"<td>", b"</td>")]:
494
+ for opening_tag, closing_tag in [(b"<html", b"</html>"), (b"<body", b"</body>"), (b"<table", b"</table>"),
495
+ (b"<p>", b"</p>"), (b"<span>", b"</span>"), (b"<div>", b"</div>"),
496
+ (b"<li>", b"</li>"), (b"<ol>", b"</ol>"), (b"<ul>", b"</ul>"),
497
+ (b"<th>", b"</th>"), (b"<tr>", b"</tr>"), (b"<td>", b"</td>")]:
505
498
  opening_pos = data.find(opening_tag, 0, MAX_LINE_LENGTH)
506
499
  if 0 <= opening_pos < data.find(closing_tag, opening_pos):
507
500
  # opening and closing tags were found - suppose it is an HTML
@@ -658,81 +651,11 @@ class Util:
658
651
  except Exception as exc:
659
652
  logging.error(f"Failed to write: {file_path} {exc}")
660
653
 
661
- @staticmethod
662
- def __extract_value(node: Any, value: Any) -> List[Any]:
663
- result = []
664
- for i in getattr(node, "targets"):
665
- if hasattr(i, "id"):
666
- result.append({getattr(i, "id"): value})
667
- else:
668
- logger.error(f"{str(i)} has no 'id'")
669
- return result
670
-
671
- @staticmethod
672
- def __extract_assign(node: Any) -> List[Any]:
673
- result = []
674
- if hasattr(node, "value") and hasattr(node, "targets"):
675
- value = getattr(node, "value")
676
- if hasattr(value, "value"):
677
- # python 3.8 - 3.10
678
- result.extend(Util.__extract_value(node, getattr(value, "value")))
679
- else:
680
- logger.error(f"value.{value} has no 'value' {dir(value)}")
681
- else:
682
- logger.error(f"{str(node)} has no 'value' {dir(node)}")
683
- return result
684
-
685
- @staticmethod
686
- def ast_to_dict(node: Any) -> List[Any]:
687
- """Recursive parsing AST tree of python source to list with strings"""
688
- result: List[Any] = []
689
- if hasattr(node, "value") and isinstance(node.value, str):
690
- result.append(node.value)
691
-
692
- if isinstance(node, ast.Module) \
693
- or isinstance(node, ast.FunctionDef):
694
- if hasattr(node, "body"):
695
- for i in node.body:
696
- x = Util.ast_to_dict(i)
697
- if x:
698
- result.extend(x)
699
- elif isinstance(node, ast.Import):
700
- logger.debug("Import:%s", str(node))
701
- elif isinstance(node, ast.Assign):
702
- result.extend(Util.__extract_assign(node))
703
- elif isinstance(node, ast.Expr) \
704
- or isinstance(node, ast.AnnAssign) \
705
- or isinstance(node, ast.AugAssign) \
706
- or isinstance(node, ast.Call) \
707
- or isinstance(node, ast.JoinedStr) \
708
- or isinstance(node, ast.Return) \
709
- or isinstance(node, ast.ImportFrom) \
710
- or isinstance(node, ast.Assert) \
711
- or isinstance(node, ast.Pass) \
712
- or isinstance(node, ast.Raise) \
713
- or isinstance(node, ast.Str) \
714
- or isinstance(node, ast.Name) \
715
- or isinstance(node, ast.FormattedValue) \
716
- or isinstance(node, ast.Global):
717
- if hasattr(node, "value"):
718
- result.extend(Util.ast_to_dict(getattr(node, "value")))
719
- if hasattr(node, "args"):
720
- for i in getattr(node, "args"):
721
- result.extend(Util.ast_to_dict(i))
722
- if hasattr(node, "values"):
723
- for i in getattr(node, "values"):
724
- result.extend(Util.ast_to_dict(i))
725
- else:
726
- logger.debug(f"skip:{str(node)}")
727
- else:
728
- logger.debug(f"unknown:{str(node)}")
729
- return result
730
-
731
654
  @staticmethod
732
655
  def parse_python(source: str) -> List[Any]:
733
- """Parse python source to list of strings and assignments"""
656
+ """Parse python source and back to remove strings merge and line wrap"""
734
657
  src = ast.parse(source)
735
- result = Util.ast_to_dict(src)
658
+ result = ast.unparse(src).splitlines()
736
659
  return result
737
660
 
738
661
  @staticmethod
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: credsweeper
3
- Version: 1.11.2
3
+ Version: 1.11.3
4
4
  Summary: Credential Sweeper
5
5
  Project-URL: Homepage, https://github.com/Samsung/CredSweeper
6
6
  Project-URL: Bug Tracker, https://github.com/Samsung/CredSweeper/issues