credsweeper 1.11.4__py3-none-any.whl → 1.11.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of credsweeper might be problematic. Click here for more details.
- credsweeper/__init__.py +1 -1
- credsweeper/deep_scanner/abstract_scanner.py +269 -14
- credsweeper/deep_scanner/deb_scanner.py +33 -26
- credsweeper/deep_scanner/deep_scanner.py +34 -247
- credsweeper/deep_scanner/jclass_scanner.py +74 -0
- credsweeper/deep_scanner/patch_scanner.py +48 -0
- credsweeper/deep_scanner/pkcs_scanner.py +41 -0
- credsweeper/deep_scanner/rpm_scanner.py +49 -0
- credsweeper/deep_scanner/sqlite3_scanner.py +79 -0
- credsweeper/file_handler/data_content_provider.py +1 -2
- credsweeper/file_handler/patches_provider.py +4 -1
- credsweeper/filters/__init__.py +1 -0
- credsweeper/filters/value_base64_key_check.py +9 -14
- credsweeper/filters/value_json_web_key_check.py +37 -0
- credsweeper/rules/config.yaml +48 -6
- credsweeper/scanner/scan_type/multi_pattern.py +1 -2
- credsweeper/secret/config.json +6 -6
- credsweeper/utils/pem_key_detector.py +2 -2
- credsweeper/utils/util.py +143 -75
- {credsweeper-1.11.4.dist-info → credsweeper-1.11.5.dist-info}/METADATA +3 -6
- {credsweeper-1.11.4.dist-info → credsweeper-1.11.5.dist-info}/RECORD +24 -19
- credsweeper/deep_scanner/pkcs12_scanner.py +0 -45
- {credsweeper-1.11.4.dist-info → credsweeper-1.11.5.dist-info}/WHEEL +0 -0
- {credsweeper-1.11.4.dist-info → credsweeper-1.11.5.dist-info}/entry_points.txt +0 -0
- {credsweeper-1.11.4.dist-info → credsweeper-1.11.5.dist-info}/licenses/LICENSE +0 -0
|
@@ -1,6 +1,5 @@
|
|
|
1
1
|
import json
|
|
2
2
|
import logging
|
|
3
|
-
import string
|
|
4
3
|
import warnings
|
|
5
4
|
from functools import cached_property
|
|
6
5
|
from typing import List, Optional, Any, Generator, Callable, Tuple
|
|
@@ -385,7 +384,7 @@ class DataContentProvider(ContentProvider):
|
|
|
385
384
|
return False
|
|
386
385
|
try:
|
|
387
386
|
self.decoded = Util.decode_base64( #
|
|
388
|
-
|
|
387
|
+
text=Util.PEM_CLEANING_PATTERN.sub(r'', self.text).replace('\\', ''), #
|
|
389
388
|
padding_safe=True, #
|
|
390
389
|
urlsafe_detect=True) #
|
|
391
390
|
except Exception as exc:
|
|
@@ -3,12 +3,12 @@ import logging
|
|
|
3
3
|
from pathlib import Path
|
|
4
4
|
from typing import List, Union, Tuple, Sequence
|
|
5
5
|
|
|
6
|
-
from credsweeper import TextContentProvider
|
|
7
6
|
from credsweeper.common.constants import DiffRowType
|
|
8
7
|
from credsweeper.config import Config
|
|
9
8
|
from credsweeper.file_handler.abstract_provider import AbstractProvider
|
|
10
9
|
from credsweeper.file_handler.diff_content_provider import DiffContentProvider
|
|
11
10
|
from credsweeper.file_handler.file_path_extractor import FilePathExtractor
|
|
11
|
+
from credsweeper.file_handler.text_content_provider import TextContentProvider
|
|
12
12
|
from credsweeper.utils import Util
|
|
13
13
|
|
|
14
14
|
logger = logging.getLogger(__name__)
|
|
@@ -42,6 +42,9 @@ class PatchesProvider(AbstractProvider):
|
|
|
42
42
|
elif isinstance(file_path, io.BytesIO):
|
|
43
43
|
the_patch = Util.decode_bytes(file_path.read())
|
|
44
44
|
raw_patches.append(the_patch)
|
|
45
|
+
elif isinstance(file_path, tuple) and 1 < len(file_path) and isinstance(file_path[1], io.BytesIO):
|
|
46
|
+
the_patch = Util.decode_bytes(file_path[1].read())
|
|
47
|
+
raw_patches.append(the_patch)
|
|
45
48
|
else:
|
|
46
49
|
logger.error(f"Unknown path type: {file_path}")
|
|
47
50
|
|
credsweeper/filters/__init__.py
CHANGED
|
@@ -27,6 +27,7 @@ from credsweeper.filters.value_grafana_check import ValueGrafanaCheck
|
|
|
27
27
|
from credsweeper.filters.value_grafana_service_check import ValueGrafanaServiceCheck
|
|
28
28
|
from credsweeper.filters.value_hex_number_check import ValueHexNumberCheck
|
|
29
29
|
from credsweeper.filters.value_jfrog_token_check import ValueJfrogTokenCheck
|
|
30
|
+
from credsweeper.filters.value_json_web_key_check import ValueJsonWebKeyCheck
|
|
30
31
|
from credsweeper.filters.value_json_web_token_check import ValueJsonWebTokenCheck
|
|
31
32
|
from credsweeper.filters.value_last_word_check import ValueLastWordCheck
|
|
32
33
|
from credsweeper.filters.value_method_check import ValueMethodCheck
|
|
@@ -1,7 +1,4 @@
|
|
|
1
1
|
import contextlib
|
|
2
|
-
import string
|
|
3
|
-
|
|
4
|
-
from cryptography.hazmat.primitives import serialization
|
|
5
2
|
|
|
6
3
|
from credsweeper.config import Config
|
|
7
4
|
from credsweeper.credentials import LineData
|
|
@@ -13,6 +10,8 @@ from credsweeper.utils import Util
|
|
|
13
10
|
class ValueBase64KeyCheck(Filter):
|
|
14
11
|
"""Check that candidate contains base64 encoded private key"""
|
|
15
12
|
|
|
13
|
+
EXTRA_TRANS_TABLE = str.maketrans('', '', "\",'\\")
|
|
14
|
+
|
|
16
15
|
def __init__(self, config: Config = None) -> None:
|
|
17
16
|
self.config = config
|
|
18
17
|
|
|
@@ -29,12 +28,10 @@ class ValueBase64KeyCheck(Filter):
|
|
|
29
28
|
"""
|
|
30
29
|
|
|
31
30
|
with contextlib.suppress(Exception):
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
for x in string.whitespace:
|
|
37
|
-
text = text.replace(x, '')
|
|
31
|
+
# remove backslash escaping sequences
|
|
32
|
+
text = Util.PEM_CLEANING_PATTERN.sub(r'', line_data.value)
|
|
33
|
+
# remove whitespaces
|
|
34
|
+
text = text.translate(Util.WHITESPACE_TRANS_TABLE)
|
|
38
35
|
# clean sequence concatenation case:
|
|
39
36
|
text = text.replace("'+'", '')
|
|
40
37
|
text = text.replace('"+"', '')
|
|
@@ -43,12 +40,10 @@ class ValueBase64KeyCheck(Filter):
|
|
|
43
40
|
text = text.replace('%2F', '/')
|
|
44
41
|
text = text.replace('%3D', '=')
|
|
45
42
|
# clean any other chars which should not appear
|
|
46
|
-
|
|
47
|
-
text = text.replace(x, "")
|
|
43
|
+
text = text.translate(ValueBase64KeyCheck.EXTRA_TRANS_TABLE)
|
|
48
44
|
# only PEM standard encoding supported in regex pattern to cut off ending of the key
|
|
49
45
|
key = Util.decode_base64(text, padding_safe=True, urlsafe_detect=False)
|
|
50
|
-
private_key =
|
|
51
|
-
if
|
|
52
|
-
# access to size field check - some types have no size
|
|
46
|
+
private_key = Util.load_pk(key, password=None)
|
|
47
|
+
if Util.check_pk(private_key):
|
|
53
48
|
return False
|
|
54
49
|
return True
|
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
import contextlib
|
|
2
|
+
|
|
3
|
+
from credsweeper.config import Config
|
|
4
|
+
from credsweeper.credentials import LineData
|
|
5
|
+
from credsweeper.file_handler.analysis_target import AnalysisTarget
|
|
6
|
+
from credsweeper.filters import Filter
|
|
7
|
+
from credsweeper.utils import Util
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class ValueJsonWebKeyCheck(Filter):
|
|
11
|
+
"""
|
|
12
|
+
Check that candidate is JWK which starts usually from 'e'
|
|
13
|
+
and have private parts of the key
|
|
14
|
+
https://datatracker.ietf.org/doc/html/rfc7517
|
|
15
|
+
https://datatracker.ietf.org/doc/html/rfc7518
|
|
16
|
+
"""
|
|
17
|
+
|
|
18
|
+
def __init__(self, config: Config = None) -> None:
|
|
19
|
+
pass
|
|
20
|
+
|
|
21
|
+
def run(self, line_data: LineData, target: AnalysisTarget) -> bool:
|
|
22
|
+
"""Run filter checks on received key which might be structured.
|
|
23
|
+
|
|
24
|
+
Args:
|
|
25
|
+
line_data: credential candidate data
|
|
26
|
+
target: multiline target from which line data was obtained
|
|
27
|
+
|
|
28
|
+
Return:
|
|
29
|
+
True, when need to filter candidate and False if left
|
|
30
|
+
|
|
31
|
+
"""
|
|
32
|
+
with contextlib.suppress(Exception):
|
|
33
|
+
if data := Util.decode_base64(line_data.value, padding_safe=True, urlsafe_detect=True):
|
|
34
|
+
if b'"kty":' in data and (b'"oct"' in data and b'"k":' in data or
|
|
35
|
+
(b'"EC"' in data or b'"RSA"' in data) and b'"d":' in data):
|
|
36
|
+
return False
|
|
37
|
+
return True
|
credsweeper/rules/config.yaml
CHANGED
|
@@ -375,16 +375,16 @@
|
|
|
375
375
|
- code
|
|
376
376
|
- doc
|
|
377
377
|
|
|
378
|
-
- name: Heroku
|
|
378
|
+
- name: Heroku Credentials
|
|
379
379
|
severity: high
|
|
380
|
-
confidence:
|
|
380
|
+
confidence: strong
|
|
381
381
|
type: pattern
|
|
382
382
|
values:
|
|
383
|
-
- (?
|
|
383
|
+
- (?P<value>HRKU-([0-9A-Za-z_-]{60}|[0-9A-Fa-f]{8}(-[0-9A-Fa-f]{4}){3}-[0-9A-Fa-f]{12}))
|
|
384
384
|
filter_type: GeneralPattern
|
|
385
385
|
required_substrings:
|
|
386
|
-
-
|
|
387
|
-
min_line_len:
|
|
386
|
+
- HRKU-
|
|
387
|
+
min_line_len: 41
|
|
388
388
|
target:
|
|
389
389
|
- code
|
|
390
390
|
- doc
|
|
@@ -413,7 +413,49 @@
|
|
|
413
413
|
- ValueJsonWebTokenCheck
|
|
414
414
|
required_substrings:
|
|
415
415
|
- eyJ
|
|
416
|
-
min_line_len:
|
|
416
|
+
min_line_len: 64
|
|
417
|
+
target:
|
|
418
|
+
- code
|
|
419
|
+
- doc
|
|
420
|
+
|
|
421
|
+
- name: JSON Web Key
|
|
422
|
+
severity: medium
|
|
423
|
+
confidence: strong
|
|
424
|
+
type: pattern
|
|
425
|
+
values:
|
|
426
|
+
- (?P<value>\b(e(yJ|yAi|woi|wog|w0K)|W(yJ|3si|wp7|wog|w0K|3sK))[0-9A-Za-z_+/-]{60,8000})
|
|
427
|
+
filter_type:
|
|
428
|
+
- ValueJsonWebKeyCheck
|
|
429
|
+
required_substrings:
|
|
430
|
+
- eyJ
|
|
431
|
+
- eyAi
|
|
432
|
+
- ewoi
|
|
433
|
+
- ewog
|
|
434
|
+
- ew0K
|
|
435
|
+
- WyJ
|
|
436
|
+
- W3si
|
|
437
|
+
- Wwp7
|
|
438
|
+
- Wwog
|
|
439
|
+
- Ww0K
|
|
440
|
+
- W3sK
|
|
441
|
+
min_line_len: 64
|
|
442
|
+
target:
|
|
443
|
+
- code
|
|
444
|
+
- doc
|
|
445
|
+
|
|
446
|
+
- name: JWK
|
|
447
|
+
severity: medium
|
|
448
|
+
confidence: moderate
|
|
449
|
+
type: multi
|
|
450
|
+
values:
|
|
451
|
+
- (?P<value>['"]?\b(?P<variable>kty)[^0-9A-Za-z_-]{1,8}(RSA|EC|oct)\b['"]?)
|
|
452
|
+
- (?P<variable>\b[dk])[^0-9A-Za-z_-]{1,8}(?P<value>[0-9A-Za-z_-]{22,8000})(?![=0-9A-Za-z_-])
|
|
453
|
+
filter_type:
|
|
454
|
+
- ValuePatternCheck
|
|
455
|
+
- ValueCoupleKeywordCheck(3)
|
|
456
|
+
required_substrings:
|
|
457
|
+
- kty
|
|
458
|
+
min_line_len: 8
|
|
417
459
|
target:
|
|
418
460
|
- code
|
|
419
461
|
- doc
|
|
@@ -37,8 +37,7 @@ class MultiPattern(ScanType):
|
|
|
37
37
|
"Rules provided to MultiPattern.run should have pattern_type equal to MULTI_PATTERN"
|
|
38
38
|
|
|
39
39
|
candidates = cls._get_candidates(config, rule, target)
|
|
40
|
-
|
|
41
|
-
return candidates
|
|
40
|
+
|
|
42
41
|
for candidate in candidates:
|
|
43
42
|
line_pos_margin = 1
|
|
44
43
|
while line_pos_margin <= cls.MAX_SEARCH_MARGIN:
|
credsweeper/secret/config.json
CHANGED
|
@@ -5,9 +5,13 @@
|
|
|
5
5
|
".aar",
|
|
6
6
|
".apk",
|
|
7
7
|
".bz2",
|
|
8
|
+
".class",
|
|
8
9
|
".gz",
|
|
10
|
+
".jar",
|
|
9
11
|
".lzma",
|
|
12
|
+
".rpm",
|
|
10
13
|
".tar",
|
|
14
|
+
".war",
|
|
11
15
|
".xz",
|
|
12
16
|
".zip"
|
|
13
17
|
],
|
|
@@ -28,7 +32,6 @@
|
|
|
28
32
|
".avi",
|
|
29
33
|
".bin",
|
|
30
34
|
".bmp",
|
|
31
|
-
".class",
|
|
32
35
|
".css",
|
|
33
36
|
".dmg",
|
|
34
37
|
".ear",
|
|
@@ -40,7 +43,6 @@
|
|
|
40
43
|
".ico",
|
|
41
44
|
".img",
|
|
42
45
|
".info",
|
|
43
|
-
".jar",
|
|
44
46
|
".jpeg",
|
|
45
47
|
".jpg",
|
|
46
48
|
".map",
|
|
@@ -62,10 +64,8 @@
|
|
|
62
64
|
".rar",
|
|
63
65
|
".rc",
|
|
64
66
|
".rc2",
|
|
65
|
-
".rar",
|
|
66
67
|
".realm",
|
|
67
68
|
".res",
|
|
68
|
-
".rpm",
|
|
69
69
|
".s7z",
|
|
70
70
|
".scss",
|
|
71
71
|
".so",
|
|
@@ -76,7 +76,6 @@
|
|
|
76
76
|
".ttf",
|
|
77
77
|
".vcxproj",
|
|
78
78
|
".vdproj",
|
|
79
|
-
".war",
|
|
80
79
|
".wav",
|
|
81
80
|
".webm",
|
|
82
81
|
".webp",
|
|
@@ -161,7 +160,8 @@
|
|
|
161
160
|
"bruteforce_list": [
|
|
162
161
|
"",
|
|
163
162
|
"changeit",
|
|
164
|
-
"changeme"
|
|
163
|
+
"changeme",
|
|
164
|
+
"tizen"
|
|
165
165
|
],
|
|
166
166
|
"check_for_literals": true,
|
|
167
167
|
"min_pattern_value_length": 12,
|
|
@@ -4,7 +4,7 @@ import re
|
|
|
4
4
|
import string
|
|
5
5
|
from typing import List
|
|
6
6
|
|
|
7
|
-
from credsweeper.common.constants import PEM_BEGIN_PATTERN, PEM_END_PATTERN
|
|
7
|
+
from credsweeper.common.constants import PEM_BEGIN_PATTERN, PEM_END_PATTERN, Chars
|
|
8
8
|
from credsweeper.config import Config
|
|
9
9
|
from credsweeper.credentials import LineData
|
|
10
10
|
from credsweeper.file_handler.analysis_target import AnalysisTarget
|
|
@@ -17,7 +17,7 @@ ENTROPY_LIMIT_BASE64 = 4.5
|
|
|
17
17
|
|
|
18
18
|
class PemKeyDetector:
|
|
19
19
|
"""Class to detect PEM PRIVATE keys only"""
|
|
20
|
-
base64set = set(
|
|
20
|
+
base64set = set(Chars.BASE64STDPAD_CHARS.value)
|
|
21
21
|
|
|
22
22
|
ignore_starts = [PEM_BEGIN_PATTERN, "Proc-Type", "Version", "DEK-Info"]
|
|
23
23
|
wrap_characters = "\\'\";,[]#*!"
|
credsweeper/utils/util.py
CHANGED
|
@@ -1,12 +1,13 @@
|
|
|
1
1
|
import ast
|
|
2
2
|
import base64
|
|
3
|
+
import contextlib
|
|
3
4
|
import json
|
|
4
5
|
import logging
|
|
5
6
|
import math
|
|
6
7
|
import os
|
|
8
|
+
import random
|
|
7
9
|
import re
|
|
8
10
|
import string
|
|
9
|
-
import struct
|
|
10
11
|
import tarfile
|
|
11
12
|
from dataclasses import dataclass
|
|
12
13
|
from pathlib import Path
|
|
@@ -15,6 +16,18 @@ from typing import Any, Dict, List, Tuple, Optional, Union
|
|
|
15
16
|
import numpy as np
|
|
16
17
|
import whatthepatch
|
|
17
18
|
import yaml
|
|
19
|
+
from cryptography.hazmat.primitives import hashes
|
|
20
|
+
from cryptography.hazmat.primitives.asymmetric import padding
|
|
21
|
+
from cryptography.hazmat.primitives.asymmetric.dh import DHPrivateKey, DHPublicKey
|
|
22
|
+
from cryptography.hazmat.primitives.asymmetric.dsa import DSAPrivateKey, DSAPublicKey
|
|
23
|
+
from cryptography.hazmat.primitives.asymmetric.ec import EllipticCurvePrivateKey, EllipticCurvePublicKey
|
|
24
|
+
from cryptography.hazmat.primitives.asymmetric.ed25519 import Ed25519PrivateKey, Ed25519PublicKey
|
|
25
|
+
from cryptography.hazmat.primitives.asymmetric.ed448 import Ed448PrivateKey, Ed448PublicKey
|
|
26
|
+
from cryptography.hazmat.primitives.asymmetric.types import PrivateKeyTypes
|
|
27
|
+
from cryptography.hazmat.primitives.asymmetric.x25519 import X25519PublicKey, X25519PrivateKey
|
|
28
|
+
from cryptography.hazmat.primitives.asymmetric.x448 import X448PublicKey, X448PrivateKey
|
|
29
|
+
from cryptography.hazmat.primitives.serialization import load_der_private_key
|
|
30
|
+
from cryptography.hazmat.primitives.serialization.pkcs12 import load_key_and_certificates
|
|
18
31
|
from lxml import etree
|
|
19
32
|
from typing_extensions import TypedDict
|
|
20
33
|
|
|
@@ -152,11 +165,10 @@ class Util:
|
|
|
152
165
|
@staticmethod
|
|
153
166
|
def is_known(data: Union[bytes, bytearray]) -> bool:
|
|
154
167
|
"""Returns True if any known binary format is found to prevent extra scan a file without an extension."""
|
|
155
|
-
if isinstance(data, (bytes, bytearray)):
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
return True
|
|
168
|
+
if isinstance(data, (bytes, bytearray)) and data.startswith(b"\x7f\x45\x4c\x46") and 127 <= len(data):
|
|
169
|
+
# https://en.wikipedia.org/wiki/Executable_and_Linkable_Format
|
|
170
|
+
# minimal ELF is 127 bytes https://github.com/tchajed/minimal-elf
|
|
171
|
+
return True
|
|
160
172
|
return False
|
|
161
173
|
|
|
162
174
|
@staticmethod
|
|
@@ -165,10 +177,9 @@ class Util:
|
|
|
165
177
|
Returns True when two zeroes sequence is found in begin of data.
|
|
166
178
|
The sequence never exists in text format (UTF-8, UTF-16). UTF-32 is not supported.
|
|
167
179
|
"""
|
|
168
|
-
if 0 <= data.find(b"\0\0", 0, MAX_LINE_LENGTH):
|
|
180
|
+
if isinstance(data, (bytes, bytearray)) and 0 <= data.find(b"\0\0", 0, MAX_LINE_LENGTH):
|
|
169
181
|
return True
|
|
170
|
-
|
|
171
|
-
return False
|
|
182
|
+
return False
|
|
172
183
|
|
|
173
184
|
NOT_LATIN1_PRINTABLE_SET = set(range(0, 256)) \
|
|
174
185
|
.difference(set(x for x in string.printable.encode(ASCII))) \
|
|
@@ -182,7 +193,7 @@ class Util:
|
|
|
182
193
|
non_latin1_cnt = sum(1 for x in data[:MAX_LINE_LENGTH] if x in Util.NOT_LATIN1_PRINTABLE_SET)
|
|
183
194
|
# experiment for 255217 binary files shown avg = 0.268264 ± 0.168767, so let choose minimal
|
|
184
195
|
chunk_len = min(MAX_LINE_LENGTH, len(data))
|
|
185
|
-
result = 0.1 > non_latin1_cnt / chunk_len
|
|
196
|
+
result = bool(0.1 > non_latin1_cnt / chunk_len)
|
|
186
197
|
return result
|
|
187
198
|
|
|
188
199
|
@staticmethod
|
|
@@ -379,26 +390,31 @@ class Util:
|
|
|
379
390
|
@staticmethod
|
|
380
391
|
def is_zip(data: Union[bytes, bytearray]) -> bool:
|
|
381
392
|
"""According https://en.wikipedia.org/wiki/List_of_file_signatures"""
|
|
382
|
-
if isinstance(data, (bytes, bytearray)) and
|
|
383
|
-
|
|
384
|
-
|
|
385
|
-
|
|
386
|
-
|
|
387
|
-
# empty archive - no sense to scan
|
|
388
|
-
|
|
389
|
-
|
|
393
|
+
if isinstance(data, (bytes, bytearray)) and data.startswith(b"PK") and 4 <= len(data):
|
|
394
|
+
if 0x03 == data[2] and 0x04 == data[3]:
|
|
395
|
+
# normal PK
|
|
396
|
+
return True
|
|
397
|
+
elif 0x05 == data[2] and 0x06 == data[3]:
|
|
398
|
+
# empty archive - no sense to scan in other scanners, so let it be a zip
|
|
399
|
+
return True
|
|
400
|
+
elif 0x07 == data[2] and 0x08 == data[3]:
|
|
390
401
|
# spanned archive - NOT SUPPORTED
|
|
391
|
-
|
|
392
|
-
return False
|
|
402
|
+
return False
|
|
393
403
|
return False
|
|
394
404
|
|
|
395
405
|
@staticmethod
|
|
396
406
|
def is_com(data: Union[bytes, bytearray]) -> bool:
|
|
397
407
|
"""According https://en.wikipedia.org/wiki/List_of_file_signatures"""
|
|
398
|
-
if isinstance(data, (bytes, bytearray)) and
|
|
399
|
-
|
|
400
|
-
|
|
401
|
-
|
|
408
|
+
if isinstance(data, (bytes, bytearray)) and data.startswith(b"\xD0\xCF\x11\xE0\xA1\xB1\x1A\xE1"):
|
|
409
|
+
# Compound File Binary Format: doc, xls, ppt, msi, msg
|
|
410
|
+
return True
|
|
411
|
+
return False
|
|
412
|
+
|
|
413
|
+
@staticmethod
|
|
414
|
+
def is_rpm(data: Union[bytes, bytearray]) -> bool:
|
|
415
|
+
"""According https://en.wikipedia.org/wiki/List_of_file_signatures"""
|
|
416
|
+
if isinstance(data, (bytes, bytearray)) and data.startswith(b"\xED\xAB\xEE\xDB"):
|
|
417
|
+
return True
|
|
402
418
|
return False
|
|
403
419
|
|
|
404
420
|
@staticmethod
|
|
@@ -411,88 +427,105 @@ class Util:
|
|
|
411
427
|
or
|
|
412
428
|
0x20 == data[262] and 0x20 == data[263] and 0x00 == data[264]
|
|
413
429
|
):
|
|
414
|
-
|
|
430
|
+
with contextlib.suppress(Exception):
|
|
415
431
|
chksum = tarfile.nti(data[148:156]) # type: ignore
|
|
416
432
|
unsigned_chksum, signed_chksum = tarfile.calc_chksums(data) # type: ignore
|
|
417
|
-
|
|
418
|
-
|
|
419
|
-
logger.exception(f"Corrupted TAR ? {exc}")
|
|
433
|
+
if chksum == unsigned_chksum or chksum == signed_chksum:
|
|
434
|
+
return True
|
|
420
435
|
return False
|
|
421
436
|
|
|
422
437
|
@staticmethod
|
|
423
438
|
def is_deb(data: Union[bytes, bytearray]) -> bool:
|
|
424
439
|
"""According https://en.wikipedia.org/wiki/Deb_(file_format)"""
|
|
425
|
-
if isinstance(data, (bytes, bytearray)) and
|
|
440
|
+
if isinstance(data, (bytes, bytearray)) and data.startswith(b"!<arch>\n"):
|
|
426
441
|
return True
|
|
427
442
|
return False
|
|
428
443
|
|
|
429
444
|
@staticmethod
|
|
430
445
|
def is_bzip2(data: Union[bytes, bytearray]) -> bool:
|
|
431
446
|
"""According https://en.wikipedia.org/wiki/Bzip2"""
|
|
432
|
-
if isinstance(data, (bytes, bytearray)) and 10 <= len(data)
|
|
433
|
-
|
|
434
|
-
|
|
435
|
-
|
|
436
|
-
|
|
437
|
-
return True
|
|
447
|
+
if isinstance(data, (bytes, bytearray)) and data.startswith(b"\x42\x5A\x68") and 10 <= len(data) \
|
|
448
|
+
and 0x31 <= data[3] <= 0x39 \
|
|
449
|
+
and 0x31 == data[4] and 0x41 == data[5] and 0x59 == data[6] \
|
|
450
|
+
and 0x26 == data[7] and 0x53 == data[8] and 0x59 == data[9]:
|
|
451
|
+
return True
|
|
438
452
|
return False
|
|
439
453
|
|
|
440
454
|
@staticmethod
|
|
441
455
|
def is_gzip(data: Union[bytes, bytearray]) -> bool:
|
|
442
456
|
"""According https://www.rfc-editor.org/rfc/rfc1952"""
|
|
443
|
-
if isinstance(data, (bytes, bytearray)) and
|
|
444
|
-
|
|
445
|
-
return True
|
|
457
|
+
if isinstance(data, (bytes, bytearray)) and data.startswith(b"\x1F\x8B\x08"):
|
|
458
|
+
return True
|
|
446
459
|
return False
|
|
447
460
|
|
|
448
461
|
@staticmethod
|
|
449
462
|
def is_pdf(data: Union[bytes, bytearray]) -> bool:
|
|
450
463
|
"""According https://en.wikipedia.org/wiki/List_of_file_signatures - pdf"""
|
|
451
|
-
if isinstance(data, (bytes, bytearray)) and
|
|
452
|
-
|
|
453
|
-
|
|
464
|
+
if isinstance(data, (bytes, bytearray)) and data.startswith(b"%PDF-"):
|
|
465
|
+
return True
|
|
466
|
+
return False
|
|
467
|
+
|
|
468
|
+
@staticmethod
|
|
469
|
+
def is_jclass(data: Union[bytes, bytearray]) -> bool:
|
|
470
|
+
"""According https://en.wikipedia.org/wiki/List_of_file_signatures - java class"""
|
|
471
|
+
if isinstance(data, (bytes, bytearray)) and data.startswith(b"\xCA\xFE\xBA\xBE"):
|
|
472
|
+
return True
|
|
454
473
|
return False
|
|
455
474
|
|
|
456
475
|
@staticmethod
|
|
457
476
|
def is_jks(data: Union[bytes, bytearray]) -> bool:
|
|
458
477
|
"""According https://en.wikipedia.org/wiki/List_of_file_signatures - jks"""
|
|
459
|
-
if isinstance(data, (bytes, bytearray)) and
|
|
460
|
-
|
|
461
|
-
return True
|
|
478
|
+
if isinstance(data, (bytes, bytearray)) and data.startswith(b"\xFE\xED\xFE\xED"):
|
|
479
|
+
return True
|
|
462
480
|
return False
|
|
463
481
|
|
|
464
482
|
@staticmethod
|
|
465
483
|
def is_lzma(data: Union[bytes, bytearray]) -> bool:
|
|
466
484
|
"""According https://en.wikipedia.org/wiki/List_of_file_signatures - lzma also xz"""
|
|
467
|
-
if isinstance(data, (bytes, bytearray)) and
|
|
468
|
-
|
|
469
|
-
|
|
485
|
+
if isinstance(data, (bytes, bytearray)) and data.startswith((b"\xFD7zXZ\x00", b"\x5D\x00\x00")):
|
|
486
|
+
return True
|
|
487
|
+
return False
|
|
488
|
+
|
|
489
|
+
@classmethod
|
|
490
|
+
def is_sqlite3(cls, data):
|
|
491
|
+
"""According https://en.wikipedia.org/wiki/List_of_file_signatures - SQLite Database"""
|
|
492
|
+
if isinstance(data, (bytes, bytearray)) and data.startswith(b"SQLite format 3\0"):
|
|
493
|
+
return True
|
|
470
494
|
return False
|
|
471
495
|
|
|
472
496
|
@staticmethod
|
|
473
|
-
def is_asn1(data: Union[bytes, bytearray]) ->
|
|
474
|
-
"""Only sequence type 0x30 and size correctness
|
|
475
|
-
|
|
476
|
-
|
|
477
|
-
|
|
478
|
-
|
|
479
|
-
|
|
497
|
+
def is_asn1(data: Union[bytes, bytearray]) -> int:
|
|
498
|
+
"""Only sequence type 0x30 and size correctness are checked
|
|
499
|
+
Returns size of ASN1 data over 128 bytes or 0 if no interested data
|
|
500
|
+
"""
|
|
501
|
+
if isinstance(data, (bytes, bytearray)) and 2 <= len(data) and 0x30 == data[0]:
|
|
502
|
+
# https://www.oss.com/asn1/resources/asn1-made-simple/asn1-quick-reference/basic-encoding-rules.html#Lengths
|
|
503
|
+
length = data[1]
|
|
504
|
+
if 0x80 == length:
|
|
505
|
+
if data.endswith(b"\x00\x00"):
|
|
506
|
+
# assume, all data are ASN1 of various size
|
|
507
|
+
return len(data)
|
|
508
|
+
else:
|
|
509
|
+
# skip the case where the ASN1 size is smaller than the actual data
|
|
510
|
+
return 0
|
|
511
|
+
elif 0x80 < length:
|
|
480
512
|
byte_len = 0x7F & length
|
|
481
|
-
|
|
482
|
-
|
|
483
|
-
|
|
484
|
-
|
|
485
|
-
|
|
486
|
-
|
|
487
|
-
|
|
488
|
-
|
|
489
|
-
length = long_size[0]
|
|
490
|
-
elif 0x80 < length and 1 == byte_len: # small size
|
|
491
|
-
length = data[2]
|
|
513
|
+
len_limit = 2 + byte_len
|
|
514
|
+
if 4 >= byte_len and len(data) >= len_limit:
|
|
515
|
+
length = 0
|
|
516
|
+
for i in range(2, len_limit):
|
|
517
|
+
length <<= 8
|
|
518
|
+
length |= data[i]
|
|
519
|
+
if len(data) >= length + len_limit:
|
|
520
|
+
return length + len_limit
|
|
492
521
|
else:
|
|
493
|
-
|
|
494
|
-
|
|
495
|
-
|
|
522
|
+
# unsupported huge size
|
|
523
|
+
return 0
|
|
524
|
+
else:
|
|
525
|
+
# less than 0x80
|
|
526
|
+
if len(data) >= length + 2:
|
|
527
|
+
return length + 2
|
|
528
|
+
return 0
|
|
496
529
|
|
|
497
530
|
@staticmethod
|
|
498
531
|
def is_html(data: Union[bytes, bytearray]) -> bool:
|
|
@@ -547,12 +580,12 @@ class Util:
|
|
|
547
580
|
@staticmethod
|
|
548
581
|
def is_eml(data: Union[bytes, bytearray]) -> bool:
|
|
549
582
|
"""According to https://datatracker.ietf.org/doc/html/rfc822 lookup the fields: Date, From, To or Subject"""
|
|
550
|
-
if isinstance(data, (bytes, bytearray))
|
|
551
|
-
|
|
552
|
-
|
|
553
|
-
|
|
554
|
-
|
|
555
|
-
|
|
583
|
+
if isinstance(data, (bytes, bytearray)) \
|
|
584
|
+
and (b"\nDate:" in data or data.startswith(b"Date:")) \
|
|
585
|
+
and (b"\nFrom:" in data or data.startswith(b"From:")) \
|
|
586
|
+
and (b"\nTo:" in data or data.startswith(b"To:")) \
|
|
587
|
+
and (b"\nSubject:" in data or data.startswith(b"Subject:")):
|
|
588
|
+
return True
|
|
556
589
|
return False
|
|
557
590
|
|
|
558
591
|
@staticmethod
|
|
@@ -665,10 +698,13 @@ class Util:
|
|
|
665
698
|
result = ast.unparse(src).splitlines()
|
|
666
699
|
return result
|
|
667
700
|
|
|
701
|
+
PEM_CLEANING_PATTERN = re.compile(r"\\[tnrvf]")
|
|
702
|
+
WHITESPACE_TRANS_TABLE = str.maketrans('', '', string.whitespace)
|
|
703
|
+
|
|
668
704
|
@staticmethod
|
|
669
705
|
def decode_base64(text: str, padding_safe: bool = False, urlsafe_detect=False) -> bytes:
|
|
670
706
|
"""decode text to bytes with / without padding detect and urlsafe symbols"""
|
|
671
|
-
value = text
|
|
707
|
+
value = text.translate(Util.WHITESPACE_TRANS_TABLE)
|
|
672
708
|
if padding_safe:
|
|
673
709
|
pad_num = 0x3 & len(value)
|
|
674
710
|
if pad_num:
|
|
@@ -679,6 +715,38 @@ class Util:
|
|
|
679
715
|
decoded = base64.b64decode(value, validate=True)
|
|
680
716
|
return decoded
|
|
681
717
|
|
|
718
|
+
@staticmethod
|
|
719
|
+
def load_pk(data: bytes, password: Optional[bytes] = None) -> Optional[PrivateKeyTypes]:
|
|
720
|
+
"""Try to load private key from PKCS1, PKCS8 and PKCS12 formats"""
|
|
721
|
+
with contextlib.suppress(Exception):
|
|
722
|
+
# PKCS1, PKCS8 probes
|
|
723
|
+
private_key = load_der_private_key(data, password)
|
|
724
|
+
return private_key
|
|
725
|
+
with contextlib.suppress(Exception):
|
|
726
|
+
# PKCS12 probe
|
|
727
|
+
private_key, _certificate, _additional_certificates = load_key_and_certificates(data, password)
|
|
728
|
+
return private_key
|
|
729
|
+
return None
|
|
730
|
+
|
|
731
|
+
RANDOM_DATA = random.randbytes(20)
|
|
732
|
+
|
|
733
|
+
@staticmethod
|
|
734
|
+
def check_pk(pkey: PrivateKeyTypes) -> bool:
|
|
735
|
+
"""Check private key with encrypt-decrypt random data"""
|
|
736
|
+
if isinstance(pkey, (EllipticCurvePrivateKey, DSAPrivateKey, Ed448PrivateKey, Ed25519PrivateKey, DHPrivateKey,
|
|
737
|
+
X448PrivateKey, X25519PrivateKey)):
|
|
738
|
+
# One does not simply perform check the keys
|
|
739
|
+
return True
|
|
740
|
+
if isinstance(pkey, (EllipticCurvePublicKey, DSAPublicKey, Ed448PublicKey, Ed25519PublicKey, DHPublicKey,
|
|
741
|
+
X448PublicKey, X25519PublicKey)) or not pkey:
|
|
742
|
+
# These aren't the keys we're looking for
|
|
743
|
+
return False
|
|
744
|
+
# DSA, RSA
|
|
745
|
+
pd = padding.OAEP(mgf=padding.MGF1(algorithm=hashes.SHA1()), algorithm=hashes.SHA1(), label=None)
|
|
746
|
+
ciphertext = pkey.public_key().encrypt(Util.RANDOM_DATA, padding=pd)
|
|
747
|
+
refurb = pkey.decrypt(ciphertext, padding=pd)
|
|
748
|
+
return bool(refurb == Util.RANDOM_DATA)
|
|
749
|
+
|
|
682
750
|
@staticmethod
|
|
683
751
|
def get_chunks(line_len: int) -> List[Tuple[int, int]]:
|
|
684
752
|
"""Returns chunks positions for given line length"""
|