credsweeper 1.11.4__py3-none-any.whl → 1.11.6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of credsweeper might be problematic. Click here for more details.
- credsweeper/__init__.py +21 -15
- credsweeper/__main__.py +141 -35
- credsweeper/app.py +11 -11
- credsweeper/config/__init__.py +0 -1
- credsweeper/config/config.py +1 -1
- credsweeper/credentials/__init__.py +0 -5
- credsweeper/credentials/augment_candidates.py +1 -1
- credsweeper/credentials/candidate.py +1 -1
- credsweeper/credentials/credential_manager.py +1 -1
- credsweeper/credentials/line_data.py +2 -2
- credsweeper/deep_scanner/__init__.py +0 -1
- credsweeper/deep_scanner/abstract_scanner.py +272 -17
- credsweeper/deep_scanner/byte_scanner.py +1 -1
- credsweeper/deep_scanner/bzip2_scanner.py +2 -2
- credsweeper/deep_scanner/deb_scanner.py +34 -27
- credsweeper/deep_scanner/deep_scanner.py +37 -250
- credsweeper/deep_scanner/docx_scanner.py +1 -1
- credsweeper/deep_scanner/eml_scanner.py +1 -1
- credsweeper/deep_scanner/encoder_scanner.py +1 -1
- credsweeper/deep_scanner/gzip_scanner.py +2 -2
- credsweeper/deep_scanner/html_scanner.py +1 -1
- credsweeper/deep_scanner/jclass_scanner.py +74 -0
- credsweeper/deep_scanner/jks_scanner.py +1 -1
- credsweeper/deep_scanner/lang_scanner.py +1 -1
- credsweeper/deep_scanner/lzma_scanner.py +2 -2
- credsweeper/deep_scanner/mxfile_scanner.py +1 -1
- credsweeper/deep_scanner/patch_scanner.py +48 -0
- credsweeper/deep_scanner/pdf_scanner.py +1 -1
- credsweeper/deep_scanner/pkcs_scanner.py +41 -0
- credsweeper/deep_scanner/pptx_scanner.py +1 -1
- credsweeper/deep_scanner/rpm_scanner.py +49 -0
- credsweeper/deep_scanner/sqlite3_scanner.py +79 -0
- credsweeper/deep_scanner/tar_scanner.py +2 -2
- credsweeper/deep_scanner/tmx_scanner.py +2 -2
- credsweeper/deep_scanner/xlsx_scanner.py +2 -2
- credsweeper/deep_scanner/xml_scanner.py +1 -1
- credsweeper/deep_scanner/zip_scanner.py +2 -2
- credsweeper/file_handler/__init__.py +0 -15
- credsweeper/file_handler/abstract_provider.py +3 -4
- credsweeper/file_handler/byte_content_provider.py +1 -1
- credsweeper/file_handler/content_provider.py +1 -1
- credsweeper/file_handler/data_content_provider.py +2 -3
- credsweeper/file_handler/diff_content_provider.py +133 -3
- credsweeper/file_handler/file_path_extractor.py +2 -2
- credsweeper/file_handler/files_provider.py +4 -4
- credsweeper/file_handler/patches_provider.py +10 -8
- credsweeper/file_handler/text_content_provider.py +1 -1
- credsweeper/filters/__init__.py +2 -2
- credsweeper/filters/filter.py +2 -2
- credsweeper/filters/group/__init__.py +0 -2
- credsweeper/filters/group/general_keyword.py +2 -2
- credsweeper/filters/group/general_pattern.py +2 -2
- credsweeper/filters/group/group.py +16 -5
- credsweeper/filters/group/password_keyword.py +2 -2
- credsweeper/filters/group/token_pattern.py +2 -2
- credsweeper/filters/group/url_credentials_group.py +2 -2
- credsweeper/filters/group/weird_base36_token.py +2 -2
- credsweeper/filters/group/weird_base64_token.py +2 -2
- credsweeper/filters/line_git_binary_check.py +3 -3
- credsweeper/filters/line_specific_key_check.py +4 -4
- credsweeper/filters/line_uue_part_check.py +3 -3
- credsweeper/filters/value_allowlist_check.py +4 -4
- credsweeper/filters/value_array_dictionary_check.py +3 -3
- credsweeper/filters/value_atlassian_token_check.py +4 -4
- credsweeper/filters/value_azure_token_check.py +4 -4
- credsweeper/filters/value_base32_data_check.py +4 -4
- credsweeper/filters/value_base64_data_check.py +4 -4
- credsweeper/filters/value_base64_encoded_pem_check.py +4 -4
- credsweeper/filters/value_base64_key_check.py +13 -18
- credsweeper/filters/value_base64_part_check.py +4 -4
- credsweeper/filters/value_basic_auth_check.py +36 -0
- credsweeper/filters/value_blocklist_check.py +3 -3
- credsweeper/filters/value_camel_case_check.py +4 -4
- credsweeper/filters/value_couple_keyword_check.py +3 -3
- credsweeper/filters/value_dictionary_keyword_check.py +3 -3
- credsweeper/filters/value_dictionary_value_length_check.py +3 -3
- credsweeper/filters/value_discord_bot_check.py +4 -4
- credsweeper/filters/value_entropy_base_check.py +4 -4
- credsweeper/filters/value_file_path_check.py +5 -4
- credsweeper/filters/value_github_check.py +3 -3
- credsweeper/filters/value_grafana_check.py +4 -4
- credsweeper/filters/value_grafana_service_check.py +3 -3
- credsweeper/filters/value_hex_number_check.py +3 -3
- credsweeper/filters/value_jfrog_token_check.py +4 -4
- credsweeper/filters/value_json_web_key_check.py +37 -0
- credsweeper/filters/value_json_web_token_check.py +4 -4
- credsweeper/filters/value_last_word_check.py +3 -3
- credsweeper/filters/value_method_check.py +3 -3
- credsweeper/filters/value_not_allowed_pattern_check.py +4 -4
- credsweeper/filters/value_not_part_encoded_check.py +3 -3
- credsweeper/filters/value_number_check.py +3 -3
- credsweeper/filters/value_pattern_check.py +3 -3
- credsweeper/filters/value_similarity_check.py +3 -3
- credsweeper/filters/value_split_keyword_check.py +3 -3
- credsweeper/filters/value_string_type_check.py +3 -3
- credsweeper/filters/value_token_base_check.py +3 -3
- credsweeper/filters/value_token_check.py +3 -3
- credsweeper/logger/__init__.py +0 -1
- credsweeper/logger/logger.py +1 -1
- credsweeper/ml_model/__init__.py +0 -1
- credsweeper/ml_model/features/entropy_evaluation.py +1 -1
- credsweeper/ml_model/features/feature.py +1 -1
- credsweeper/ml_model/features/file_extension.py +1 -1
- credsweeper/ml_model/features/has_html_tag.py +2 -2
- credsweeper/ml_model/features/is_secret_numeric.py +1 -1
- credsweeper/ml_model/features/length_of_attribute.py +1 -1
- credsweeper/ml_model/features/morpheme_dense.py +1 -1
- credsweeper/ml_model/features/rule_name.py +1 -1
- credsweeper/ml_model/features/search_in_attribute.py +1 -1
- credsweeper/ml_model/features/word_in.py +1 -1
- credsweeper/ml_model/features/word_in_path.py +1 -1
- credsweeper/ml_model/features/word_in_postamble.py +1 -1
- credsweeper/ml_model/features/word_in_preamble.py +1 -1
- credsweeper/ml_model/features/word_in_transition.py +1 -1
- credsweeper/ml_model/features/word_in_value.py +1 -1
- credsweeper/ml_model/features/word_in_variable.py +1 -1
- credsweeper/ml_model/ml_validator.py +3 -2
- credsweeper/rules/__init__.py +0 -1
- credsweeper/rules/config.yaml +114 -25
- credsweeper/rules/rule.py +4 -3
- credsweeper/scanner/__init__.py +0 -1
- credsweeper/scanner/scan_type/__init__.py +0 -5
- credsweeper/scanner/scan_type/multi_pattern.py +5 -6
- credsweeper/scanner/scan_type/pem_key_pattern.py +4 -4
- credsweeper/scanner/scan_type/scan_type.py +4 -4
- credsweeper/scanner/scan_type/single_pattern.py +4 -4
- credsweeper/scanner/scanner.py +8 -5
- credsweeper/secret/config.json +6 -6
- credsweeper/utils/__init__.py +0 -1
- credsweeper/utils/pem_key_detector.py +5 -5
- credsweeper/utils/util.py +143 -206
- {credsweeper-1.11.4.dist-info → credsweeper-1.11.6.dist-info}/METADATA +3 -6
- credsweeper-1.11.6.dist-info/RECORD +160 -0
- credsweeper/deep_scanner/pkcs12_scanner.py +0 -45
- credsweeper-1.11.4.dist-info/RECORD +0 -154
- {credsweeper-1.11.4.dist-info → credsweeper-1.11.6.dist-info}/WHEEL +0 -0
- {credsweeper-1.11.4.dist-info → credsweeper-1.11.6.dist-info}/entry_points.txt +0 -0
- {credsweeper-1.11.4.dist-info → credsweeper-1.11.6.dist-info}/licenses/LICENSE +0 -0
credsweeper/secret/config.json
CHANGED
|
@@ -5,9 +5,13 @@
|
|
|
5
5
|
".aar",
|
|
6
6
|
".apk",
|
|
7
7
|
".bz2",
|
|
8
|
+
".class",
|
|
8
9
|
".gz",
|
|
10
|
+
".jar",
|
|
9
11
|
".lzma",
|
|
12
|
+
".rpm",
|
|
10
13
|
".tar",
|
|
14
|
+
".war",
|
|
11
15
|
".xz",
|
|
12
16
|
".zip"
|
|
13
17
|
],
|
|
@@ -28,7 +32,6 @@
|
|
|
28
32
|
".avi",
|
|
29
33
|
".bin",
|
|
30
34
|
".bmp",
|
|
31
|
-
".class",
|
|
32
35
|
".css",
|
|
33
36
|
".dmg",
|
|
34
37
|
".ear",
|
|
@@ -40,7 +43,6 @@
|
|
|
40
43
|
".ico",
|
|
41
44
|
".img",
|
|
42
45
|
".info",
|
|
43
|
-
".jar",
|
|
44
46
|
".jpeg",
|
|
45
47
|
".jpg",
|
|
46
48
|
".map",
|
|
@@ -62,10 +64,8 @@
|
|
|
62
64
|
".rar",
|
|
63
65
|
".rc",
|
|
64
66
|
".rc2",
|
|
65
|
-
".rar",
|
|
66
67
|
".realm",
|
|
67
68
|
".res",
|
|
68
|
-
".rpm",
|
|
69
69
|
".s7z",
|
|
70
70
|
".scss",
|
|
71
71
|
".so",
|
|
@@ -76,7 +76,6 @@
|
|
|
76
76
|
".ttf",
|
|
77
77
|
".vcxproj",
|
|
78
78
|
".vdproj",
|
|
79
|
-
".war",
|
|
80
79
|
".wav",
|
|
81
80
|
".webm",
|
|
82
81
|
".webp",
|
|
@@ -161,7 +160,8 @@
|
|
|
161
160
|
"bruteforce_list": [
|
|
162
161
|
"",
|
|
163
162
|
"changeit",
|
|
164
|
-
"changeme"
|
|
163
|
+
"changeme",
|
|
164
|
+
"tizen"
|
|
165
165
|
],
|
|
166
166
|
"check_for_literals": true,
|
|
167
167
|
"min_pattern_value_length": 12,
|
credsweeper/utils/__init__.py
CHANGED
|
@@ -1 +0,0 @@
|
|
|
1
|
-
from credsweeper.utils.util import DiffRowData, Util, DiffDict
|
|
@@ -4,11 +4,11 @@ import re
|
|
|
4
4
|
import string
|
|
5
5
|
from typing import List
|
|
6
6
|
|
|
7
|
-
from credsweeper.common.constants import PEM_BEGIN_PATTERN, PEM_END_PATTERN
|
|
8
|
-
from credsweeper.config import Config
|
|
9
|
-
from credsweeper.credentials import LineData
|
|
7
|
+
from credsweeper.common.constants import PEM_BEGIN_PATTERN, PEM_END_PATTERN, Chars
|
|
8
|
+
from credsweeper.config.config import Config
|
|
9
|
+
from credsweeper.credentials.line_data import LineData
|
|
10
10
|
from credsweeper.file_handler.analysis_target import AnalysisTarget
|
|
11
|
-
from credsweeper.utils import Util
|
|
11
|
+
from credsweeper.utils.util import Util
|
|
12
12
|
|
|
13
13
|
logger = logging.getLogger(__name__)
|
|
14
14
|
|
|
@@ -17,7 +17,7 @@ ENTROPY_LIMIT_BASE64 = 4.5
|
|
|
17
17
|
|
|
18
18
|
class PemKeyDetector:
|
|
19
19
|
"""Class to detect PEM PRIVATE keys only"""
|
|
20
|
-
base64set = set(
|
|
20
|
+
base64set = set(Chars.BASE64STDPAD_CHARS.value)
|
|
21
21
|
|
|
22
22
|
ignore_starts = [PEM_BEGIN_PATTERN, "Proc-Type", "Version", "DEK-Info"]
|
|
23
23
|
wrap_characters = "\\'\";,[]#*!"
|
credsweeper/utils/util.py
CHANGED
|
@@ -1,46 +1,38 @@
|
|
|
1
1
|
import ast
|
|
2
2
|
import base64
|
|
3
|
+
import contextlib
|
|
3
4
|
import json
|
|
4
5
|
import logging
|
|
5
6
|
import math
|
|
6
7
|
import os
|
|
8
|
+
import random
|
|
7
9
|
import re
|
|
8
10
|
import string
|
|
9
|
-
import struct
|
|
10
11
|
import tarfile
|
|
11
|
-
from dataclasses import dataclass
|
|
12
12
|
from pathlib import Path
|
|
13
13
|
from typing import Any, Dict, List, Tuple, Optional, Union
|
|
14
14
|
|
|
15
15
|
import numpy as np
|
|
16
|
-
import whatthepatch
|
|
17
16
|
import yaml
|
|
17
|
+
from cryptography.hazmat.primitives import hashes
|
|
18
|
+
from cryptography.hazmat.primitives.asymmetric import padding
|
|
19
|
+
from cryptography.hazmat.primitives.asymmetric.dh import DHPrivateKey, DHPublicKey
|
|
20
|
+
from cryptography.hazmat.primitives.asymmetric.dsa import DSAPrivateKey, DSAPublicKey
|
|
21
|
+
from cryptography.hazmat.primitives.asymmetric.ec import EllipticCurvePrivateKey, EllipticCurvePublicKey
|
|
22
|
+
from cryptography.hazmat.primitives.asymmetric.ed25519 import Ed25519PrivateKey, Ed25519PublicKey
|
|
23
|
+
from cryptography.hazmat.primitives.asymmetric.ed448 import Ed448PrivateKey, Ed448PublicKey
|
|
24
|
+
from cryptography.hazmat.primitives.asymmetric.types import PrivateKeyTypes
|
|
25
|
+
from cryptography.hazmat.primitives.asymmetric.x25519 import X25519PublicKey, X25519PrivateKey
|
|
26
|
+
from cryptography.hazmat.primitives.asymmetric.x448 import X448PublicKey, X448PrivateKey
|
|
27
|
+
from cryptography.hazmat.primitives.serialization import load_der_private_key
|
|
28
|
+
from cryptography.hazmat.primitives.serialization.pkcs12 import load_key_and_certificates
|
|
18
29
|
from lxml import etree
|
|
19
|
-
from typing_extensions import TypedDict
|
|
20
30
|
|
|
21
|
-
from credsweeper.common.constants import
|
|
31
|
+
from credsweeper.common.constants import AVAILABLE_ENCODINGS, \
|
|
22
32
|
DEFAULT_ENCODING, LATIN_1, CHUNK_SIZE, MAX_LINE_LENGTH, CHUNK_STEP_SIZE, ASCII
|
|
23
33
|
|
|
24
34
|
logger = logging.getLogger(__name__)
|
|
25
35
|
|
|
26
|
-
DiffDict = TypedDict(
|
|
27
|
-
"DiffDict",
|
|
28
|
-
{
|
|
29
|
-
"old": Optional[int], #
|
|
30
|
-
"new": Optional[int], #
|
|
31
|
-
"line": Union[str, bytes], # bytes are possibly since whatthepatch v1.0.4
|
|
32
|
-
"hunk": Any # not used
|
|
33
|
-
})
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
@dataclass(frozen=True)
|
|
37
|
-
class DiffRowData:
|
|
38
|
-
"""Class for keeping data of diff row."""
|
|
39
|
-
|
|
40
|
-
line_type: DiffRowType
|
|
41
|
-
line_numb: int
|
|
42
|
-
line: str
|
|
43
|
-
|
|
44
36
|
|
|
45
37
|
class Util:
|
|
46
38
|
"""Class that contains different useful methods."""
|
|
@@ -152,11 +144,10 @@ class Util:
|
|
|
152
144
|
@staticmethod
|
|
153
145
|
def is_known(data: Union[bytes, bytearray]) -> bool:
|
|
154
146
|
"""Returns True if any known binary format is found to prevent extra scan a file without an extension."""
|
|
155
|
-
if isinstance(data, (bytes, bytearray)):
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
return True
|
|
147
|
+
if isinstance(data, (bytes, bytearray)) and data.startswith(b"\x7f\x45\x4c\x46") and 127 <= len(data):
|
|
148
|
+
# https://en.wikipedia.org/wiki/Executable_and_Linkable_Format
|
|
149
|
+
# minimal ELF is 127 bytes https://github.com/tchajed/minimal-elf
|
|
150
|
+
return True
|
|
160
151
|
return False
|
|
161
152
|
|
|
162
153
|
@staticmethod
|
|
@@ -165,10 +156,9 @@ class Util:
|
|
|
165
156
|
Returns True when two zeroes sequence is found in begin of data.
|
|
166
157
|
The sequence never exists in text format (UTF-8, UTF-16). UTF-32 is not supported.
|
|
167
158
|
"""
|
|
168
|
-
if 0 <= data.find(b"\0\0", 0, MAX_LINE_LENGTH):
|
|
159
|
+
if isinstance(data, (bytes, bytearray)) and 0 <= data.find(b"\0\0", 0, MAX_LINE_LENGTH):
|
|
169
160
|
return True
|
|
170
|
-
|
|
171
|
-
return False
|
|
161
|
+
return False
|
|
172
162
|
|
|
173
163
|
NOT_LATIN1_PRINTABLE_SET = set(range(0, 256)) \
|
|
174
164
|
.difference(set(x for x in string.printable.encode(ASCII))) \
|
|
@@ -182,7 +172,7 @@ class Util:
|
|
|
182
172
|
non_latin1_cnt = sum(1 for x in data[:MAX_LINE_LENGTH] if x in Util.NOT_LATIN1_PRINTABLE_SET)
|
|
183
173
|
# experiment for 255217 binary files shown avg = 0.268264 ± 0.168767, so let choose minimal
|
|
184
174
|
chunk_len = min(MAX_LINE_LENGTH, len(data))
|
|
185
|
-
result = 0.1 > non_latin1_cnt / chunk_len
|
|
175
|
+
result = bool(0.1 > non_latin1_cnt / chunk_len)
|
|
186
176
|
return result
|
|
187
177
|
|
|
188
178
|
@staticmethod
|
|
@@ -267,138 +257,33 @@ class Util:
|
|
|
267
257
|
return lines
|
|
268
258
|
|
|
269
259
|
@staticmethod
|
|
270
|
-
def
|
|
271
|
-
"""
|
|
272
|
-
|
|
273
|
-
|
|
274
|
-
|
|
275
|
-
change_type: change type to select, DiffRowType.ADDED or DiffRowType.DELETED
|
|
276
|
-
|
|
277
|
-
Return:
|
|
278
|
-
return dict with ``{file paths: list of file row changes}``, where
|
|
279
|
-
elements of list of file row changes represented as::
|
|
280
|
-
|
|
281
|
-
{
|
|
282
|
-
"old": line number before diff,
|
|
283
|
-
"new": line number after diff,
|
|
284
|
-
"line": line text,
|
|
285
|
-
"hunk": diff hunk number
|
|
286
|
-
}
|
|
287
|
-
|
|
288
|
-
"""
|
|
289
|
-
if not raw_patch:
|
|
290
|
-
return {}
|
|
291
|
-
|
|
292
|
-
added_files, deleted_files = {}, {}
|
|
293
|
-
try:
|
|
294
|
-
for patch in whatthepatch.parse_patch(raw_patch):
|
|
295
|
-
if patch.changes is None:
|
|
296
|
-
logger.warning(f"Patch '{str(patch.header)}' cannot be scanned")
|
|
297
|
-
continue
|
|
298
|
-
changes = []
|
|
299
|
-
for change in patch.changes:
|
|
300
|
-
change_dict = change._asdict()
|
|
301
|
-
changes.append(change_dict)
|
|
302
|
-
|
|
303
|
-
added_files[patch.header.new_path] = changes
|
|
304
|
-
deleted_files[patch.header.old_path] = changes
|
|
305
|
-
if change_type == DiffRowType.ADDED:
|
|
306
|
-
return added_files
|
|
307
|
-
elif change_type == DiffRowType.DELETED:
|
|
308
|
-
return deleted_files
|
|
309
|
-
else:
|
|
310
|
-
logger.error(f"Change type should be one of: '{DiffRowType.ADDED}', '{DiffRowType.DELETED}';"
|
|
311
|
-
f" but received {change_type}")
|
|
312
|
-
except Exception as exc:
|
|
313
|
-
logger.exception(exc)
|
|
314
|
-
return {}
|
|
315
|
-
|
|
316
|
-
@staticmethod
|
|
317
|
-
def preprocess_diff_rows(
|
|
318
|
-
added_line_number: Optional[int], #
|
|
319
|
-
deleted_line_number: Optional[int], #
|
|
320
|
-
line: str) -> List[DiffRowData]:
|
|
321
|
-
"""Auxiliary function to extend diff changes.
|
|
322
|
-
|
|
323
|
-
Args:
|
|
324
|
-
added_line_number: number of added line or None
|
|
325
|
-
deleted_line_number: number of deleted line or None
|
|
326
|
-
line: the text line
|
|
327
|
-
|
|
328
|
-
Return:
|
|
329
|
-
diff rows data with as list of row change type, line number, row content
|
|
330
|
-
|
|
331
|
-
"""
|
|
332
|
-
rows_data: List[DiffRowData] = []
|
|
333
|
-
if isinstance(added_line_number, int):
|
|
334
|
-
# indicates line was inserted
|
|
335
|
-
rows_data.append(DiffRowData(DiffRowType.ADDED, added_line_number, line))
|
|
336
|
-
if isinstance(deleted_line_number, int):
|
|
337
|
-
# indicates line was removed
|
|
338
|
-
rows_data.append(DiffRowData(DiffRowType.DELETED, deleted_line_number, line))
|
|
339
|
-
return rows_data
|
|
340
|
-
|
|
341
|
-
@staticmethod
|
|
342
|
-
def wrong_change(change: DiffDict) -> bool:
|
|
343
|
-
"""Returns True if the change is wrong"""
|
|
344
|
-
for i in ["line", "new", "old"]:
|
|
345
|
-
if i not in change:
|
|
346
|
-
logger.error(f"Skipping wrong change {change}")
|
|
260
|
+
def is_zip(data: Union[bytes, bytearray]) -> bool:
|
|
261
|
+
"""According https://en.wikipedia.org/wiki/List_of_file_signatures"""
|
|
262
|
+
if isinstance(data, (bytes, bytearray)) and data.startswith(b"PK") and 4 <= len(data):
|
|
263
|
+
if 0x03 == data[2] and 0x04 == data[3]:
|
|
264
|
+
# normal PK
|
|
347
265
|
return True
|
|
266
|
+
elif 0x05 == data[2] and 0x06 == data[3]:
|
|
267
|
+
# empty archive - no sense to scan in other scanners, so let it be a zip
|
|
268
|
+
return True
|
|
269
|
+
elif 0x07 == data[2] and 0x08 == data[3]:
|
|
270
|
+
# spanned archive - NOT SUPPORTED
|
|
271
|
+
return False
|
|
348
272
|
return False
|
|
349
273
|
|
|
350
274
|
@staticmethod
|
|
351
|
-
def
|
|
352
|
-
"""Generate changed file rows from diff data with changed lines (e.g. marked + or - in diff).
|
|
353
|
-
|
|
354
|
-
Args:
|
|
355
|
-
changes: git diff by file rows data
|
|
356
|
-
|
|
357
|
-
Return:
|
|
358
|
-
diff rows data with as list of row change type, line number, row content
|
|
359
|
-
|
|
360
|
-
"""
|
|
361
|
-
if not changes:
|
|
362
|
-
return []
|
|
363
|
-
|
|
364
|
-
rows_data = []
|
|
365
|
-
# process diff to restore lines and their positions
|
|
366
|
-
for change in changes:
|
|
367
|
-
if Util.wrong_change(change):
|
|
368
|
-
continue
|
|
369
|
-
line = change["line"]
|
|
370
|
-
if isinstance(line, str):
|
|
371
|
-
rows_data.extend(Util.preprocess_diff_rows(change.get("new"), change.get("old"), line))
|
|
372
|
-
elif isinstance(line, (bytes, bytearray)):
|
|
373
|
-
logger.warning("The feature is available with the deep scan option")
|
|
374
|
-
else:
|
|
375
|
-
logger.error(f"Unknown type of line {type(line)}")
|
|
376
|
-
|
|
377
|
-
return rows_data
|
|
378
|
-
|
|
379
|
-
@staticmethod
|
|
380
|
-
def is_zip(data: Union[bytes, bytearray]) -> bool:
|
|
275
|
+
def is_com(data: Union[bytes, bytearray]) -> bool:
|
|
381
276
|
"""According https://en.wikipedia.org/wiki/List_of_file_signatures"""
|
|
382
|
-
if isinstance(data, (bytes, bytearray)) and
|
|
383
|
-
#
|
|
384
|
-
|
|
385
|
-
if 0x03 == data[2] and 0x04 == data[3]:
|
|
386
|
-
return True
|
|
387
|
-
# empty archive - no sense to scan
|
|
388
|
-
elif 0x05 == data[2] and 0x06 == data[3]:
|
|
389
|
-
return True
|
|
390
|
-
# spanned archive - NOT SUPPORTED
|
|
391
|
-
elif 0x07 == data[2] and 0x08 == data[3]:
|
|
392
|
-
return False
|
|
277
|
+
if isinstance(data, (bytes, bytearray)) and data.startswith(b"\xD0\xCF\x11\xE0\xA1\xB1\x1A\xE1"):
|
|
278
|
+
# Compound File Binary Format: doc, xls, ppt, msi, msg
|
|
279
|
+
return True
|
|
393
280
|
return False
|
|
394
281
|
|
|
395
282
|
@staticmethod
|
|
396
|
-
def
|
|
283
|
+
def is_rpm(data: Union[bytes, bytearray]) -> bool:
|
|
397
284
|
"""According https://en.wikipedia.org/wiki/List_of_file_signatures"""
|
|
398
|
-
if isinstance(data, (bytes, bytearray)) and
|
|
399
|
-
|
|
400
|
-
# Compound File Binary Format: doc, xls, ppt, msi, msg
|
|
401
|
-
return True
|
|
285
|
+
if isinstance(data, (bytes, bytearray)) and data.startswith(b"\xED\xAB\xEE\xDB"):
|
|
286
|
+
return True
|
|
402
287
|
return False
|
|
403
288
|
|
|
404
289
|
@staticmethod
|
|
@@ -411,88 +296,105 @@ class Util:
|
|
|
411
296
|
or
|
|
412
297
|
0x20 == data[262] and 0x20 == data[263] and 0x00 == data[264]
|
|
413
298
|
):
|
|
414
|
-
|
|
299
|
+
with contextlib.suppress(Exception):
|
|
415
300
|
chksum = tarfile.nti(data[148:156]) # type: ignore
|
|
416
301
|
unsigned_chksum, signed_chksum = tarfile.calc_chksums(data) # type: ignore
|
|
417
|
-
|
|
418
|
-
|
|
419
|
-
logger.exception(f"Corrupted TAR ? {exc}")
|
|
302
|
+
if chksum == unsigned_chksum or chksum == signed_chksum:
|
|
303
|
+
return True
|
|
420
304
|
return False
|
|
421
305
|
|
|
422
306
|
@staticmethod
|
|
423
307
|
def is_deb(data: Union[bytes, bytearray]) -> bool:
|
|
424
308
|
"""According https://en.wikipedia.org/wiki/Deb_(file_format)"""
|
|
425
|
-
if isinstance(data, (bytes, bytearray)) and
|
|
309
|
+
if isinstance(data, (bytes, bytearray)) and data.startswith(b"!<arch>\n"):
|
|
426
310
|
return True
|
|
427
311
|
return False
|
|
428
312
|
|
|
429
313
|
@staticmethod
|
|
430
314
|
def is_bzip2(data: Union[bytes, bytearray]) -> bool:
|
|
431
315
|
"""According https://en.wikipedia.org/wiki/Bzip2"""
|
|
432
|
-
if isinstance(data, (bytes, bytearray)) and 10 <= len(data)
|
|
433
|
-
|
|
434
|
-
|
|
435
|
-
|
|
436
|
-
|
|
437
|
-
return True
|
|
316
|
+
if isinstance(data, (bytes, bytearray)) and data.startswith(b"\x42\x5A\x68") and 10 <= len(data) \
|
|
317
|
+
and 0x31 <= data[3] <= 0x39 \
|
|
318
|
+
and 0x31 == data[4] and 0x41 == data[5] and 0x59 == data[6] \
|
|
319
|
+
and 0x26 == data[7] and 0x53 == data[8] and 0x59 == data[9]:
|
|
320
|
+
return True
|
|
438
321
|
return False
|
|
439
322
|
|
|
440
323
|
@staticmethod
|
|
441
324
|
def is_gzip(data: Union[bytes, bytearray]) -> bool:
|
|
442
325
|
"""According https://www.rfc-editor.org/rfc/rfc1952"""
|
|
443
|
-
if isinstance(data, (bytes, bytearray)) and
|
|
444
|
-
|
|
445
|
-
return True
|
|
326
|
+
if isinstance(data, (bytes, bytearray)) and data.startswith(b"\x1F\x8B\x08"):
|
|
327
|
+
return True
|
|
446
328
|
return False
|
|
447
329
|
|
|
448
330
|
@staticmethod
|
|
449
331
|
def is_pdf(data: Union[bytes, bytearray]) -> bool:
|
|
450
332
|
"""According https://en.wikipedia.org/wiki/List_of_file_signatures - pdf"""
|
|
451
|
-
if isinstance(data, (bytes, bytearray)) and
|
|
452
|
-
|
|
453
|
-
|
|
333
|
+
if isinstance(data, (bytes, bytearray)) and data.startswith(b"%PDF-"):
|
|
334
|
+
return True
|
|
335
|
+
return False
|
|
336
|
+
|
|
337
|
+
@staticmethod
|
|
338
|
+
def is_jclass(data: Union[bytes, bytearray]) -> bool:
|
|
339
|
+
"""According https://en.wikipedia.org/wiki/List_of_file_signatures - java class"""
|
|
340
|
+
if isinstance(data, (bytes, bytearray)) and data.startswith(b"\xCA\xFE\xBA\xBE"):
|
|
341
|
+
return True
|
|
454
342
|
return False
|
|
455
343
|
|
|
456
344
|
@staticmethod
|
|
457
345
|
def is_jks(data: Union[bytes, bytearray]) -> bool:
|
|
458
346
|
"""According https://en.wikipedia.org/wiki/List_of_file_signatures - jks"""
|
|
459
|
-
if isinstance(data, (bytes, bytearray)) and
|
|
460
|
-
|
|
461
|
-
return True
|
|
347
|
+
if isinstance(data, (bytes, bytearray)) and data.startswith(b"\xFE\xED\xFE\xED"):
|
|
348
|
+
return True
|
|
462
349
|
return False
|
|
463
350
|
|
|
464
351
|
@staticmethod
|
|
465
352
|
def is_lzma(data: Union[bytes, bytearray]) -> bool:
|
|
466
353
|
"""According https://en.wikipedia.org/wiki/List_of_file_signatures - lzma also xz"""
|
|
467
|
-
if isinstance(data, (bytes, bytearray)) and
|
|
468
|
-
|
|
469
|
-
|
|
354
|
+
if isinstance(data, (bytes, bytearray)) and data.startswith((b"\xFD7zXZ\x00", b"\x5D\x00\x00")):
|
|
355
|
+
return True
|
|
356
|
+
return False
|
|
357
|
+
|
|
358
|
+
@classmethod
|
|
359
|
+
def is_sqlite3(cls, data):
|
|
360
|
+
"""According https://en.wikipedia.org/wiki/List_of_file_signatures - SQLite Database"""
|
|
361
|
+
if isinstance(data, (bytes, bytearray)) and data.startswith(b"SQLite format 3\0"):
|
|
362
|
+
return True
|
|
470
363
|
return False
|
|
471
364
|
|
|
472
365
|
@staticmethod
|
|
473
|
-
def is_asn1(data: Union[bytes, bytearray]) ->
|
|
474
|
-
"""Only sequence type 0x30 and size correctness
|
|
475
|
-
|
|
476
|
-
|
|
477
|
-
|
|
478
|
-
|
|
479
|
-
|
|
366
|
+
def is_asn1(data: Union[bytes, bytearray]) -> int:
|
|
367
|
+
"""Only sequence type 0x30 and size correctness are checked
|
|
368
|
+
Returns size of ASN1 data over 128 bytes or 0 if no interested data
|
|
369
|
+
"""
|
|
370
|
+
if isinstance(data, (bytes, bytearray)) and 2 <= len(data) and 0x30 == data[0]:
|
|
371
|
+
# https://www.oss.com/asn1/resources/asn1-made-simple/asn1-quick-reference/basic-encoding-rules.html#Lengths
|
|
372
|
+
length = data[1]
|
|
373
|
+
if 0x80 == length:
|
|
374
|
+
if data.endswith(b"\x00\x00"):
|
|
375
|
+
# assume, all data are ASN1 of various size
|
|
376
|
+
return len(data)
|
|
377
|
+
else:
|
|
378
|
+
# skip the case where the ASN1 size is smaller than the actual data
|
|
379
|
+
return 0
|
|
380
|
+
elif 0x80 < length:
|
|
480
381
|
byte_len = 0x7F & length
|
|
481
|
-
|
|
482
|
-
|
|
483
|
-
|
|
484
|
-
|
|
485
|
-
|
|
486
|
-
|
|
487
|
-
|
|
488
|
-
|
|
489
|
-
length = long_size[0]
|
|
490
|
-
elif 0x80 < length and 1 == byte_len: # small size
|
|
491
|
-
length = data[2]
|
|
382
|
+
len_limit = 2 + byte_len
|
|
383
|
+
if 4 >= byte_len and len(data) >= len_limit:
|
|
384
|
+
length = 0
|
|
385
|
+
for i in range(2, len_limit):
|
|
386
|
+
length <<= 8
|
|
387
|
+
length |= data[i]
|
|
388
|
+
if len(data) >= length + len_limit:
|
|
389
|
+
return length + len_limit
|
|
492
390
|
else:
|
|
493
|
-
|
|
494
|
-
|
|
495
|
-
|
|
391
|
+
# unsupported huge size
|
|
392
|
+
return 0
|
|
393
|
+
else:
|
|
394
|
+
# less than 0x80
|
|
395
|
+
if len(data) >= length + 2:
|
|
396
|
+
return length + 2
|
|
397
|
+
return 0
|
|
496
398
|
|
|
497
399
|
@staticmethod
|
|
498
400
|
def is_html(data: Union[bytes, bytearray]) -> bool:
|
|
@@ -547,12 +449,12 @@ class Util:
|
|
|
547
449
|
@staticmethod
|
|
548
450
|
def is_eml(data: Union[bytes, bytearray]) -> bool:
|
|
549
451
|
"""According to https://datatracker.ietf.org/doc/html/rfc822 lookup the fields: Date, From, To or Subject"""
|
|
550
|
-
if isinstance(data, (bytes, bytearray))
|
|
551
|
-
|
|
552
|
-
|
|
553
|
-
|
|
554
|
-
|
|
555
|
-
|
|
452
|
+
if isinstance(data, (bytes, bytearray)) \
|
|
453
|
+
and (b"\nDate:" in data or data.startswith(b"Date:")) \
|
|
454
|
+
and (b"\nFrom:" in data or data.startswith(b"From:")) \
|
|
455
|
+
and (b"\nTo:" in data or data.startswith(b"To:")) \
|
|
456
|
+
and (b"\nSubject:" in data or data.startswith(b"Subject:")):
|
|
457
|
+
return True
|
|
556
458
|
return False
|
|
557
459
|
|
|
558
460
|
@staticmethod
|
|
@@ -665,10 +567,13 @@ class Util:
|
|
|
665
567
|
result = ast.unparse(src).splitlines()
|
|
666
568
|
return result
|
|
667
569
|
|
|
570
|
+
PEM_CLEANING_PATTERN = re.compile(r"\\[tnrvf]")
|
|
571
|
+
WHITESPACE_TRANS_TABLE = str.maketrans('', '', string.whitespace)
|
|
572
|
+
|
|
668
573
|
@staticmethod
|
|
669
574
|
def decode_base64(text: str, padding_safe: bool = False, urlsafe_detect=False) -> bytes:
|
|
670
575
|
"""decode text to bytes with / without padding detect and urlsafe symbols"""
|
|
671
|
-
value = text
|
|
576
|
+
value = text.translate(Util.WHITESPACE_TRANS_TABLE)
|
|
672
577
|
if padding_safe:
|
|
673
578
|
pad_num = 0x3 & len(value)
|
|
674
579
|
if pad_num:
|
|
@@ -679,6 +584,38 @@ class Util:
|
|
|
679
584
|
decoded = base64.b64decode(value, validate=True)
|
|
680
585
|
return decoded
|
|
681
586
|
|
|
587
|
+
@staticmethod
|
|
588
|
+
def load_pk(data: bytes, password: Optional[bytes] = None) -> Optional[PrivateKeyTypes]:
|
|
589
|
+
"""Try to load private key from PKCS1, PKCS8 and PKCS12 formats"""
|
|
590
|
+
with contextlib.suppress(Exception):
|
|
591
|
+
# PKCS1, PKCS8 probes
|
|
592
|
+
private_key = load_der_private_key(data, password)
|
|
593
|
+
return private_key
|
|
594
|
+
with contextlib.suppress(Exception):
|
|
595
|
+
# PKCS12 probe
|
|
596
|
+
private_key, _certificate, _additional_certificates = load_key_and_certificates(data, password)
|
|
597
|
+
return private_key
|
|
598
|
+
return None
|
|
599
|
+
|
|
600
|
+
RANDOM_DATA = random.randbytes(20)
|
|
601
|
+
|
|
602
|
+
@staticmethod
|
|
603
|
+
def check_pk(pkey: PrivateKeyTypes) -> bool:
|
|
604
|
+
"""Check private key with encrypt-decrypt random data"""
|
|
605
|
+
if isinstance(pkey, (EllipticCurvePrivateKey, DSAPrivateKey, Ed448PrivateKey, Ed25519PrivateKey, DHPrivateKey,
|
|
606
|
+
X448PrivateKey, X25519PrivateKey)):
|
|
607
|
+
# One does not simply perform check the keys
|
|
608
|
+
return True
|
|
609
|
+
if isinstance(pkey, (EllipticCurvePublicKey, DSAPublicKey, Ed448PublicKey, Ed25519PublicKey, DHPublicKey,
|
|
610
|
+
X448PublicKey, X25519PublicKey)) or not pkey:
|
|
611
|
+
# These aren't the keys we're looking for
|
|
612
|
+
return False
|
|
613
|
+
# DSA, RSA
|
|
614
|
+
pd = padding.OAEP(mgf=padding.MGF1(algorithm=hashes.SHA1()), algorithm=hashes.SHA1(), label=None)
|
|
615
|
+
ciphertext = pkey.public_key().encrypt(Util.RANDOM_DATA, padding=pd)
|
|
616
|
+
refurb = pkey.decrypt(ciphertext, padding=pd)
|
|
617
|
+
return bool(refurb == Util.RANDOM_DATA)
|
|
618
|
+
|
|
682
619
|
@staticmethod
|
|
683
620
|
def get_chunks(line_len: int) -> List[Tuple[int, int]]:
|
|
684
621
|
"""Returns chunks positions for given line length"""
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: credsweeper
|
|
3
|
-
Version: 1.11.
|
|
3
|
+
Version: 1.11.6
|
|
4
4
|
Summary: Credential Sweeper
|
|
5
5
|
Project-URL: Homepage, https://github.com/Samsung/CredSweeper
|
|
6
6
|
Project-URL: Bug Tracker, https://github.com/Samsung/CredSweeper/issues
|
|
@@ -37,6 +37,7 @@ Requires-Dist: python-dateutil
|
|
|
37
37
|
Requires-Dist: python-docx
|
|
38
38
|
Requires-Dist: python-pptx
|
|
39
39
|
Requires-Dist: pyyaml
|
|
40
|
+
Requires-Dist: rpmfile
|
|
40
41
|
Requires-Dist: whatthepatch
|
|
41
42
|
Requires-Dist: xlrd
|
|
42
43
|
Description-Content-Type: text/markdown
|
|
@@ -140,11 +141,7 @@ cat output.json
|
|
|
140
141
|
"value_start": 12,
|
|
141
142
|
"value_end": 19,
|
|
142
143
|
"variable": "password",
|
|
143
|
-
"
|
|
144
|
-
"iterator": "BASE64_CHARS",
|
|
145
|
-
"entropy": 2.120589933192232,
|
|
146
|
-
"valid": false
|
|
147
|
-
}
|
|
144
|
+
"entropy": 2.12059
|
|
148
145
|
}
|
|
149
146
|
]
|
|
150
147
|
}
|