hashsmith-cli 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/MANIFEST.in +2 -0
- package/README.md +256 -0
- package/bin/index.js +10 -0
- package/dist/hashsmith_cli-0.1.0-py3-none-any.whl +0 -0
- package/dist/hashsmith_cli-0.1.0.tar.gz +0 -0
- package/hashsmith/__init__.py +3 -0
- package/hashsmith/__main__.py +4 -0
- package/hashsmith/algorithms/__init__.py +1 -0
- package/hashsmith/algorithms/cracking.py +276 -0
- package/hashsmith/algorithms/decoding.py +317 -0
- package/hashsmith/algorithms/encoding.py +203 -0
- package/hashsmith/algorithms/hashing.py +223 -0
- package/hashsmith/algorithms/morse.py +64 -0
- package/hashsmith/cli.py +1014 -0
- package/hashsmith/utils/__init__.py +1 -0
- package/hashsmith/utils/banner.py +20 -0
- package/hashsmith/utils/clipboard.py +37 -0
- package/hashsmith/utils/hashdetect.py +33 -0
- package/hashsmith/utils/identify.py +629 -0
- package/hashsmith/utils/io.py +30 -0
- package/hashsmith/utils/metrics.py +20 -0
- package/hashsmith/utils/wordlist.py +11 -0
- package/hashsmith_cli.egg-info/PKG-INFO +272 -0
- package/hashsmith_cli.egg-info/SOURCES.txt +29 -0
- package/hashsmith_cli.egg-info/dependency_links.txt +1 -0
- package/hashsmith_cli.egg-info/entry_points.txt +2 -0
- package/hashsmith_cli.egg-info/requires.txt +4 -0
- package/hashsmith_cli.egg-info/top_level.txt +1 -0
- package/package.json +15 -0
- package/pyproject.toml +3 -0
- package/requirements.txt +4 -0
- package/setup.cfg +23 -0
- package/setup.py +5 -0
- package/wordlists/common.txt +230931 -0
|
@@ -0,0 +1,629 @@
|
|
|
1
|
+
import base64
|
|
2
|
+
import binascii
|
|
3
|
+
import re
|
|
4
|
+
import string
|
|
5
|
+
from typing import List, Tuple
|
|
6
|
+
|
|
7
|
+
from ..algorithms.decoding import (
|
|
8
|
+
decode_base58,
|
|
9
|
+
decode_hex,
|
|
10
|
+
decode_binary,
|
|
11
|
+
decode_decimal,
|
|
12
|
+
decode_octal,
|
|
13
|
+
decode_base64,
|
|
14
|
+
decode_base32,
|
|
15
|
+
decode_base85,
|
|
16
|
+
decode_base64url,
|
|
17
|
+
decode_morse_code,
|
|
18
|
+
decode_baconian,
|
|
19
|
+
decode_polybius,
|
|
20
|
+
decode_unicode_escaped,
|
|
21
|
+
decode_url,
|
|
22
|
+
decode_rot13,
|
|
23
|
+
decode_atbash,
|
|
24
|
+
decode_caesar,
|
|
25
|
+
decode_leet_speak,
|
|
26
|
+
decode_reverse,
|
|
27
|
+
decode_brainfuck,
|
|
28
|
+
decode_rail_fence,
|
|
29
|
+
decode_vigenere,
|
|
30
|
+
decode_xor,
|
|
31
|
+
)
|
|
32
|
+
from ..algorithms.encoding import (
|
|
33
|
+
encode_base58,
|
|
34
|
+
encode_hex,
|
|
35
|
+
encode_binary,
|
|
36
|
+
encode_decimal,
|
|
37
|
+
encode_octal,
|
|
38
|
+
encode_base64,
|
|
39
|
+
encode_base32,
|
|
40
|
+
encode_base85,
|
|
41
|
+
encode_base64url,
|
|
42
|
+
encode_morse_code,
|
|
43
|
+
encode_baconian,
|
|
44
|
+
encode_polybius,
|
|
45
|
+
encode_unicode_escaped,
|
|
46
|
+
encode_url,
|
|
47
|
+
encode_caesar,
|
|
48
|
+
)
|
|
49
|
+
from ..algorithms.morse import REVERSE_MORSE
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
COMMON_BIGRAMS = (
|
|
53
|
+
"th",
|
|
54
|
+
"he",
|
|
55
|
+
"in",
|
|
56
|
+
"er",
|
|
57
|
+
"an",
|
|
58
|
+
"re",
|
|
59
|
+
"nd",
|
|
60
|
+
"on",
|
|
61
|
+
"en",
|
|
62
|
+
"at",
|
|
63
|
+
"ou",
|
|
64
|
+
"ed",
|
|
65
|
+
"ha",
|
|
66
|
+
"to",
|
|
67
|
+
"or",
|
|
68
|
+
"it",
|
|
69
|
+
"is",
|
|
70
|
+
"hi",
|
|
71
|
+
"es",
|
|
72
|
+
"ng",
|
|
73
|
+
"st",
|
|
74
|
+
"ar",
|
|
75
|
+
"te",
|
|
76
|
+
"se",
|
|
77
|
+
"me",
|
|
78
|
+
"ve",
|
|
79
|
+
"of",
|
|
80
|
+
)
|
|
81
|
+
|
|
82
|
+
COMMON_WORDS = (
|
|
83
|
+
"the",
|
|
84
|
+
"and",
|
|
85
|
+
"you",
|
|
86
|
+
"that",
|
|
87
|
+
"have",
|
|
88
|
+
"for",
|
|
89
|
+
"not",
|
|
90
|
+
"with",
|
|
91
|
+
"this",
|
|
92
|
+
"but",
|
|
93
|
+
"from",
|
|
94
|
+
"hello",
|
|
95
|
+
"secret",
|
|
96
|
+
"message",
|
|
97
|
+
"attack",
|
|
98
|
+
"dawn",
|
|
99
|
+
)
|
|
100
|
+
|
|
101
|
+
|
|
102
|
+
def _normalize_spaces(value: str) -> str:
|
|
103
|
+
return " ".join(value.strip().split())
|
|
104
|
+
|
|
105
|
+
|
|
106
|
+
def _is_hex(value: str) -> bool:
|
|
107
|
+
return bool(value) and all(ch in "0123456789abcdefABCDEF" for ch in value)
|
|
108
|
+
|
|
109
|
+
|
|
110
|
+
def _vowel_ratio(value: str) -> float:
|
|
111
|
+
letters = [ch.lower() for ch in value if ch.isalpha()]
|
|
112
|
+
if not letters:
|
|
113
|
+
return 0.0
|
|
114
|
+
vowels = sum(1 for ch in letters if ch in "aeiou")
|
|
115
|
+
return vowels / len(letters)
|
|
116
|
+
|
|
117
|
+
|
|
118
|
+
def _alpha_count(value: str) -> int:
|
|
119
|
+
return sum(1 for ch in value if ch.isalpha())
|
|
120
|
+
|
|
121
|
+
|
|
122
|
+
def _alpha_ratio(value: str) -> float:
|
|
123
|
+
if not value:
|
|
124
|
+
return 0.0
|
|
125
|
+
return _alpha_count(value) / len(value)
|
|
126
|
+
|
|
127
|
+
|
|
128
|
+
def _printable_ratio(value: bytes) -> float:
|
|
129
|
+
if not value:
|
|
130
|
+
return 0.0
|
|
131
|
+
printable = sum(1 for ch in value if chr(ch) in string.printable)
|
|
132
|
+
return printable / len(value)
|
|
133
|
+
|
|
134
|
+
|
|
135
|
+
def _alnum_space_ratio(value: bytes) -> float:
|
|
136
|
+
if not value:
|
|
137
|
+
return 0.0
|
|
138
|
+
allowed = set(string.ascii_letters + string.digits + " ")
|
|
139
|
+
count = sum(1 for ch in value if chr(ch) in allowed)
|
|
140
|
+
return count / len(value)
|
|
141
|
+
|
|
142
|
+
|
|
143
|
+
def _bigram_score(value: str) -> float:
|
|
144
|
+
text = re.sub(r"[^a-z]", "", value.lower())
|
|
145
|
+
if len(text) < 2:
|
|
146
|
+
return 0.0
|
|
147
|
+
count = 0
|
|
148
|
+
for i in range(len(text) - 1):
|
|
149
|
+
if text[i : i + 2] in COMMON_BIGRAMS:
|
|
150
|
+
count += 1
|
|
151
|
+
return count / max(len(text) - 1, 1)
|
|
152
|
+
|
|
153
|
+
|
|
154
|
+
def _word_hit(value: str) -> bool:
|
|
155
|
+
text = re.sub(r"[^a-z ]", " ", value.lower())
|
|
156
|
+
tokens = [token for token in text.split() if token]
|
|
157
|
+
hits = [word for word in COMMON_WORDS if word in tokens]
|
|
158
|
+
if any(len(word) >= 4 for word in hits):
|
|
159
|
+
return True
|
|
160
|
+
return len(hits) >= 2
|
|
161
|
+
|
|
162
|
+
|
|
163
|
+
def _text_score(value: str) -> float:
|
|
164
|
+
return _bigram_score(value) + (0.6 if _word_hit(value) else 0.0) + (_vowel_ratio(value) * 0.4)
|
|
165
|
+
|
|
166
|
+
|
|
167
|
+
def _index_of_coincidence(value: str) -> float:
|
|
168
|
+
letters = [ch.lower() for ch in value if ch.isalpha()]
|
|
169
|
+
n = len(letters)
|
|
170
|
+
if n < 2:
|
|
171
|
+
return 0.0
|
|
172
|
+
counts = {}
|
|
173
|
+
for ch in letters:
|
|
174
|
+
counts[ch] = counts.get(ch, 0) + 1
|
|
175
|
+
numerator = sum(count * (count - 1) for count in counts.values())
|
|
176
|
+
return numerator / (n * (n - 1))
|
|
177
|
+
|
|
178
|
+
|
|
179
|
+
def _best_shift_score(value: str) -> Tuple[int, float, float]:
|
|
180
|
+
original_score = _bigram_score(value)
|
|
181
|
+
best_shift = 0
|
|
182
|
+
best_score = 0.0
|
|
183
|
+
for shift in range(1, 26):
|
|
184
|
+
decoded = decode_caesar(value, shift)
|
|
185
|
+
score = _bigram_score(decoded)
|
|
186
|
+
if score > best_score:
|
|
187
|
+
best_score = score
|
|
188
|
+
best_shift = shift
|
|
189
|
+
return best_shift, original_score, best_score
|
|
190
|
+
|
|
191
|
+
|
|
192
|
+
def _best_shift_vowel_ratio(value: str) -> Tuple[int, float, float]:
|
|
193
|
+
original_ratio = _vowel_ratio(value)
|
|
194
|
+
best_shift = 0
|
|
195
|
+
best_ratio = 0.0
|
|
196
|
+
for shift in range(1, 26):
|
|
197
|
+
decoded = decode_caesar(value, shift)
|
|
198
|
+
ratio = _vowel_ratio(decoded)
|
|
199
|
+
if ratio > best_ratio:
|
|
200
|
+
best_ratio = ratio
|
|
201
|
+
best_shift = shift
|
|
202
|
+
return best_shift, original_ratio, best_ratio
|
|
203
|
+
|
|
204
|
+
|
|
205
|
+
def _try_single_byte_xor(hex_text: str) -> Tuple[float, float]:
|
|
206
|
+
try:
|
|
207
|
+
raw = binascii.unhexlify(hex_text)
|
|
208
|
+
except (binascii.Error, ValueError):
|
|
209
|
+
return 0.0, 0.0
|
|
210
|
+
raw_printable = _printable_ratio(raw)
|
|
211
|
+
best_printable = 0.0
|
|
212
|
+
best_score = 0.0
|
|
213
|
+
for key in range(256):
|
|
214
|
+
decoded = bytes(b ^ key for b in raw)
|
|
215
|
+
printable_ratio = _printable_ratio(decoded)
|
|
216
|
+
if printable_ratio > best_printable:
|
|
217
|
+
best_printable = printable_ratio
|
|
218
|
+
if printable_ratio >= 0.9:
|
|
219
|
+
text = decoded.decode("utf-8", errors="ignore")
|
|
220
|
+
best_score = max(best_score, _bigram_score(text))
|
|
221
|
+
return raw_printable, best_score
|
|
222
|
+
|
|
223
|
+
|
|
224
|
+
def detect_encoding_types(text: str) -> List[str]:
|
|
225
|
+
value = text.strip()
|
|
226
|
+
if not value:
|
|
227
|
+
return []
|
|
228
|
+
|
|
229
|
+
if value.startswith(("$2a$", "$2b$", "$2y$", "$argon2", "scrypt$")):
|
|
230
|
+
return []
|
|
231
|
+
if value.startswith("*") and len(value) == 41:
|
|
232
|
+
return []
|
|
233
|
+
if value.startswith("md5") and len(value) == 35:
|
|
234
|
+
return []
|
|
235
|
+
if value.lower().startswith("0x0100"):
|
|
236
|
+
return []
|
|
237
|
+
|
|
238
|
+
strong_results: List[str] = []
|
|
239
|
+
heuristic_results: List[str] = []
|
|
240
|
+
|
|
241
|
+
# Binary (8-bit groups)
|
|
242
|
+
normalized = _normalize_spaces(value)
|
|
243
|
+
if re.fullmatch(r"[01]{8}( [01]{8})*", normalized):
|
|
244
|
+
try:
|
|
245
|
+
decoded = decode_binary(normalized)
|
|
246
|
+
if _normalize_spaces(encode_binary(decoded)) == normalized:
|
|
247
|
+
strong_results.append("binary")
|
|
248
|
+
except Exception:
|
|
249
|
+
pass
|
|
250
|
+
|
|
251
|
+
# Decimal (space-separated 0-255)
|
|
252
|
+
if re.fullmatch(r"\d{1,3}( \d{1,3})*", normalized):
|
|
253
|
+
try:
|
|
254
|
+
decoded = decode_decimal(normalized)
|
|
255
|
+
if _normalize_spaces(encode_decimal(decoded)) == normalized:
|
|
256
|
+
strong_results.append("decimal")
|
|
257
|
+
except Exception:
|
|
258
|
+
pass
|
|
259
|
+
|
|
260
|
+
# Octal (space-separated 0-7)
|
|
261
|
+
if re.fullmatch(r"[0-7]{1,3}( [0-7]{1,3})*", normalized):
|
|
262
|
+
try:
|
|
263
|
+
decoded = decode_octal(normalized)
|
|
264
|
+
if _normalize_spaces(encode_octal(decoded)) == normalized:
|
|
265
|
+
strong_results.append("octal")
|
|
266
|
+
except Exception:
|
|
267
|
+
pass
|
|
268
|
+
|
|
269
|
+
# Polybius (1-5 pairs and / for spaces)
|
|
270
|
+
if re.fullmatch(r"[1-5/ ]+", normalized):
|
|
271
|
+
try:
|
|
272
|
+
decoded = decode_polybius(normalized)
|
|
273
|
+
if _normalize_spaces(encode_polybius(decoded)) == normalized:
|
|
274
|
+
strong_results.append("polybius")
|
|
275
|
+
except Exception:
|
|
276
|
+
pass
|
|
277
|
+
|
|
278
|
+
# Baconian (A/B tokens)
|
|
279
|
+
if re.fullmatch(r"[ABab/ ]+", normalized):
|
|
280
|
+
try:
|
|
281
|
+
decoded = decode_baconian(normalized)
|
|
282
|
+
if _normalize_spaces(encode_baconian(decoded).upper()) == normalized.upper():
|
|
283
|
+
strong_results.append("baconian")
|
|
284
|
+
except Exception:
|
|
285
|
+
pass
|
|
286
|
+
|
|
287
|
+
# Morse (tokens must be known)
|
|
288
|
+
if re.fullmatch(r"[.\-/ ]+", normalized):
|
|
289
|
+
tokens = normalized.split()
|
|
290
|
+
if tokens and all(token in REVERSE_MORSE for token in tokens):
|
|
291
|
+
try:
|
|
292
|
+
decoded = decode_morse_code(normalized)
|
|
293
|
+
if _normalize_spaces(encode_morse_code(decoded)) == normalized:
|
|
294
|
+
strong_results.append("morse")
|
|
295
|
+
except Exception:
|
|
296
|
+
pass
|
|
297
|
+
|
|
298
|
+
# Unicode escaped (\uXXXX)
|
|
299
|
+
if re.fullmatch(r"(?:\\u[0-9a-fA-F]{4})+", value):
|
|
300
|
+
try:
|
|
301
|
+
decoded = decode_unicode_escaped(value)
|
|
302
|
+
if encode_unicode_escaped(decoded) == value.lower():
|
|
303
|
+
strong_results.append("unicode")
|
|
304
|
+
except Exception:
|
|
305
|
+
pass
|
|
306
|
+
|
|
307
|
+
# Hex (strict, UTF-8 round-trip)
|
|
308
|
+
hex_bytes = None
|
|
309
|
+
hex_printable = 0.0
|
|
310
|
+
hex_text_score = 0.0
|
|
311
|
+
has_hex_alpha = False
|
|
312
|
+
if len(value) % 2 == 0 and _is_hex(value):
|
|
313
|
+
try:
|
|
314
|
+
decoded = decode_hex(value)
|
|
315
|
+
if encode_hex(decoded).lower() == value.lower():
|
|
316
|
+
strong_results.append("hex")
|
|
317
|
+
hex_bytes = binascii.unhexlify(value)
|
|
318
|
+
hex_printable = _printable_ratio(hex_bytes)
|
|
319
|
+
hex_text_score = _bigram_score(hex_bytes.decode("utf-8", errors="ignore"))
|
|
320
|
+
has_hex_alpha = any(ch in "abcdef" for ch in value.lower())
|
|
321
|
+
hex_alnum_space = _alnum_space_ratio(hex_bytes)
|
|
322
|
+
except Exception:
|
|
323
|
+
pass
|
|
324
|
+
|
|
325
|
+
if _is_hex(value) and not strong_results and len(value) >= 16:
|
|
326
|
+
return []
|
|
327
|
+
|
|
328
|
+
# Base64 (strict, padded)
|
|
329
|
+
try:
|
|
330
|
+
decoded = decode_base64(value)
|
|
331
|
+
if encode_base64(decoded) == value:
|
|
332
|
+
strong_results.append("base64")
|
|
333
|
+
except Exception:
|
|
334
|
+
pass
|
|
335
|
+
|
|
336
|
+
# Base64URL (unpadded, URL-safe)
|
|
337
|
+
if "=" not in value and re.fullmatch(r"[A-Za-z0-9_-]+", value):
|
|
338
|
+
try:
|
|
339
|
+
decoded = decode_base64url(value)
|
|
340
|
+
if encode_base64url(decoded) == value:
|
|
341
|
+
strong_results.append("base64url")
|
|
342
|
+
except Exception:
|
|
343
|
+
pass
|
|
344
|
+
|
|
345
|
+
# Base32 (strict, padded)
|
|
346
|
+
try:
|
|
347
|
+
decoded = decode_base32(value)
|
|
348
|
+
if encode_base32(decoded) == value.upper():
|
|
349
|
+
strong_results.append("base32")
|
|
350
|
+
except Exception:
|
|
351
|
+
pass
|
|
352
|
+
|
|
353
|
+
# Base85 (strict round-trip)
|
|
354
|
+
try:
|
|
355
|
+
decoded = decode_base85(value)
|
|
356
|
+
if encode_base85(decoded) == value:
|
|
357
|
+
strong_results.append("base85")
|
|
358
|
+
except Exception:
|
|
359
|
+
pass
|
|
360
|
+
|
|
361
|
+
# Base58 (strict round-trip)
|
|
362
|
+
try:
|
|
363
|
+
decoded = decode_base58(value)
|
|
364
|
+
if encode_base58(decoded) == value:
|
|
365
|
+
strong_results.append("base58")
|
|
366
|
+
except Exception:
|
|
367
|
+
pass
|
|
368
|
+
|
|
369
|
+
# URL encoding (must include at least one valid %XX)
|
|
370
|
+
if re.search(r"%[0-9A-Fa-f]{2}", value):
|
|
371
|
+
try:
|
|
372
|
+
decoded = decode_url(value)
|
|
373
|
+
if encode_url(decoded) == value:
|
|
374
|
+
strong_results.append("url")
|
|
375
|
+
except Exception:
|
|
376
|
+
pass
|
|
377
|
+
# Brainfuck
|
|
378
|
+
if re.fullmatch(r"[+\-<>\[\].,]+", value):
|
|
379
|
+
try:
|
|
380
|
+
decoded = decode_brainfuck(value)
|
|
381
|
+
if decoded:
|
|
382
|
+
strong_results.append("brainf*ck")
|
|
383
|
+
except Exception:
|
|
384
|
+
pass
|
|
385
|
+
|
|
386
|
+
# Prefer polybius when present to avoid overlap with decimal/octal
|
|
387
|
+
if "polybius" in strong_results:
|
|
388
|
+
return ["polybius"]
|
|
389
|
+
|
|
390
|
+
# If any strong format matched, return only those
|
|
391
|
+
if strong_results:
|
|
392
|
+
if "hex" in strong_results and hex_bytes is not None:
|
|
393
|
+
raw_printable, xor_score = _try_single_byte_xor(value)
|
|
394
|
+
raw_text_score = _bigram_score(hex_bytes.decode("utf-8", errors="ignore"))
|
|
395
|
+
decoded_text = hex_bytes.decode("utf-8", errors="ignore")
|
|
396
|
+
if hex_printable >= 0.9 and (
|
|
397
|
+
_word_hit(decoded_text)
|
|
398
|
+
or (_alpha_ratio(decoded_text) >= 0.6 and _vowel_ratio(decoded_text) >= 0.25)
|
|
399
|
+
or raw_text_score >= 0.1
|
|
400
|
+
):
|
|
401
|
+
return ["hex"]
|
|
402
|
+
if (
|
|
403
|
+
(has_hex_alpha or hex_alnum_space < 0.85)
|
|
404
|
+
and (
|
|
405
|
+
(raw_printable < 0.6 and xor_score - raw_text_score >= 0.05)
|
|
406
|
+
or (raw_text_score < 0.01 and xor_score - raw_text_score >= 0.1)
|
|
407
|
+
)
|
|
408
|
+
):
|
|
409
|
+
return ["xor"]
|
|
410
|
+
return list(dict.fromkeys(strong_results))
|
|
411
|
+
|
|
412
|
+
# ROT13 short-word check (avoid false positives by requiring word hit)
|
|
413
|
+
if re.fullmatch(r"[A-Za-z ]+", value) and 4 <= len(value.strip()) < 6:
|
|
414
|
+
rot13_decoded = decode_rot13(value)
|
|
415
|
+
if _word_hit(rot13_decoded) and not _word_hit(value):
|
|
416
|
+
return ["rot13"]
|
|
417
|
+
|
|
418
|
+
# ROT13 / Caesar / Atbash / Reverse / Rail fence heuristics
|
|
419
|
+
if re.fullmatch(r"[A-Za-z ]+", value) and len(value.strip()) >= 6:
|
|
420
|
+
base_score = _text_score(value)
|
|
421
|
+
base_word_hit = _word_hit(value)
|
|
422
|
+
base_vowel = _vowel_ratio(value)
|
|
423
|
+
base_alpha = _alpha_ratio(value)
|
|
424
|
+
candidate_scores: dict[str, float] = {}
|
|
425
|
+
candidate_texts: dict[str, str] = {}
|
|
426
|
+
|
|
427
|
+
rot13_decoded = decode_rot13(value)
|
|
428
|
+
candidate_scores["rot13"] = _text_score(rot13_decoded)
|
|
429
|
+
candidate_texts["rot13"] = rot13_decoded
|
|
430
|
+
|
|
431
|
+
best_shift = 0
|
|
432
|
+
best_caesar_score = 0.0
|
|
433
|
+
second_caesar_score = 0.0
|
|
434
|
+
best_caesar_text = value
|
|
435
|
+
for shift in range(1, 26):
|
|
436
|
+
if shift == 13:
|
|
437
|
+
continue
|
|
438
|
+
decoded = decode_caesar(value, shift)
|
|
439
|
+
score = _text_score(decoded)
|
|
440
|
+
if score > best_caesar_score:
|
|
441
|
+
second_caesar_score = best_caesar_score
|
|
442
|
+
best_caesar_score = score
|
|
443
|
+
best_shift = shift
|
|
444
|
+
best_caesar_text = decoded
|
|
445
|
+
elif score > second_caesar_score:
|
|
446
|
+
second_caesar_score = score
|
|
447
|
+
if best_shift:
|
|
448
|
+
candidate_scores["caesar"] = best_caesar_score
|
|
449
|
+
candidate_texts["caesar"] = best_caesar_text
|
|
450
|
+
|
|
451
|
+
atbash_decoded = decode_atbash(value)
|
|
452
|
+
candidate_scores["atbash"] = _text_score(atbash_decoded)
|
|
453
|
+
candidate_texts["atbash"] = atbash_decoded
|
|
454
|
+
|
|
455
|
+
reverse_decoded = decode_reverse(value)
|
|
456
|
+
candidate_scores["reverse"] = _text_score(reverse_decoded)
|
|
457
|
+
candidate_texts["reverse"] = reverse_decoded
|
|
458
|
+
|
|
459
|
+
best_rf_score = 0.0
|
|
460
|
+
best_rf_text = ""
|
|
461
|
+
for rails in range(2, 6):
|
|
462
|
+
try:
|
|
463
|
+
decoded = decode_rail_fence(value, rails)
|
|
464
|
+
except Exception:
|
|
465
|
+
continue
|
|
466
|
+
score = _text_score(decoded)
|
|
467
|
+
if score > best_rf_score:
|
|
468
|
+
best_rf_score = score
|
|
469
|
+
best_rf_text = decoded
|
|
470
|
+
if best_rf_text and (_word_hit(best_rf_text) or _bigram_score(best_rf_text) >= 0.2):
|
|
471
|
+
candidate_scores["railfence"] = best_rf_score
|
|
472
|
+
candidate_texts["railfence"] = best_rf_text
|
|
473
|
+
|
|
474
|
+
best_match, best_score = max(candidate_scores.items(), key=lambda item: item[1])
|
|
475
|
+
ic_value = _index_of_coincidence(value)
|
|
476
|
+
best_text = candidate_texts.get(best_match, "")
|
|
477
|
+
any_word_hit = any(_word_hit(text) for text in candidate_texts.values())
|
|
478
|
+
score_delta = best_score - base_score
|
|
479
|
+
allow_heuristics = (
|
|
480
|
+
(not base_word_hit and base_score < 0.18)
|
|
481
|
+
or (score_delta >= 0.15 and best_score >= 0.25)
|
|
482
|
+
)
|
|
483
|
+
|
|
484
|
+
if allow_heuristics:
|
|
485
|
+
if best_match == "caesar" and score_delta >= 0.05 and len(value.strip()) > 12:
|
|
486
|
+
heuristic_results.append("caesar")
|
|
487
|
+
elif best_match in {"caesar", "rot13", "atbash"} and not any_word_hit and len(value.strip()) <= 12:
|
|
488
|
+
if not base_word_hit and base_score < 0.18:
|
|
489
|
+
heuristic_results.append("vigenere")
|
|
490
|
+
elif best_match == "caesar" and (best_score - base_score) >= 0.12:
|
|
491
|
+
heuristic_results.append("caesar")
|
|
492
|
+
elif (
|
|
493
|
+
best_match in {"caesar", "rot13", "atbash"}
|
|
494
|
+
and not any_word_hit
|
|
495
|
+
and score_delta < 0.05
|
|
496
|
+
and len(value.strip()) >= 8
|
|
497
|
+
and not base_word_hit
|
|
498
|
+
and base_score < 0.18
|
|
499
|
+
):
|
|
500
|
+
heuristic_results.append("vigenere")
|
|
501
|
+
elif (
|
|
502
|
+
best_match == "caesar"
|
|
503
|
+
and not any_word_hit
|
|
504
|
+
and (best_caesar_score - second_caesar_score) < 0.15
|
|
505
|
+
and len(value.strip()) >= 8
|
|
506
|
+
and not base_word_hit
|
|
507
|
+
and base_score < 0.18
|
|
508
|
+
):
|
|
509
|
+
heuristic_results.append("vigenere")
|
|
510
|
+
elif (
|
|
511
|
+
ic_value < 0.06
|
|
512
|
+
and not any_word_hit
|
|
513
|
+
and score_delta < 0.05
|
|
514
|
+
and len(value.strip()) >= 8
|
|
515
|
+
and not base_word_hit
|
|
516
|
+
and base_score < 0.18
|
|
517
|
+
):
|
|
518
|
+
heuristic_results.append("vigenere")
|
|
519
|
+
elif (
|
|
520
|
+
best_match in {"caesar", "rot13", "atbash"}
|
|
521
|
+
and not _word_hit(best_text)
|
|
522
|
+
and ic_value < 0.06
|
|
523
|
+
and best_score < 0.35
|
|
524
|
+
and len(value.strip()) >= 8
|
|
525
|
+
and not base_word_hit
|
|
526
|
+
and base_score < 0.18
|
|
527
|
+
):
|
|
528
|
+
heuristic_results.append("vigenere")
|
|
529
|
+
elif (
|
|
530
|
+
ic_value < 0.055
|
|
531
|
+
and (best_score - base_score) < 0.08
|
|
532
|
+
and len(value.strip()) >= 8
|
|
533
|
+
and not base_word_hit
|
|
534
|
+
and base_score < 0.18
|
|
535
|
+
):
|
|
536
|
+
heuristic_results.append("vigenere")
|
|
537
|
+
elif best_match == "reverse":
|
|
538
|
+
reverse_bigram = _bigram_score(best_text)
|
|
539
|
+
if (
|
|
540
|
+
_word_hit(best_text)
|
|
541
|
+
or (
|
|
542
|
+
reverse_bigram >= 0.25
|
|
543
|
+
and _vowel_ratio(best_text) >= 0.3
|
|
544
|
+
and base_score <= 0.1
|
|
545
|
+
and (best_score - base_score) >= 0.12
|
|
546
|
+
)
|
|
547
|
+
):
|
|
548
|
+
heuristic_results.append("reverse")
|
|
549
|
+
elif best_score >= max(0.2, base_score + 0.08):
|
|
550
|
+
if not (base_alpha >= 0.85 and base_vowel >= 0.28 and base_score >= 0.18):
|
|
551
|
+
heuristic_results.append(best_match)
|
|
552
|
+
|
|
553
|
+
# Leet (heuristic, require mixed letters+digits)
|
|
554
|
+
if not _is_hex(value) and any(ch.isalpha() for ch in value) and any(ch in "013457" for ch in value):
|
|
555
|
+
decoded = decode_leet_speak(value)
|
|
556
|
+
original_score = _bigram_score(value)
|
|
557
|
+
decoded_score = _bigram_score(decoded)
|
|
558
|
+
if _word_hit(decoded) or (decoded_score >= 0.12 and original_score <= 0.05):
|
|
559
|
+
heuristic_results.append("leet")
|
|
560
|
+
|
|
561
|
+
# XOR (single-byte key heuristic on hex)
|
|
562
|
+
if len(value) % 2 == 0 and _is_hex(value):
|
|
563
|
+
if hex_bytes is not None:
|
|
564
|
+
raw_printable, xor_score = _try_single_byte_xor(value)
|
|
565
|
+
raw_text_score = _bigram_score(hex_bytes.decode("utf-8", errors="ignore"))
|
|
566
|
+
if (
|
|
567
|
+
(has_hex_alpha or hex_alnum_space < 0.85)
|
|
568
|
+
and (
|
|
569
|
+
(raw_printable < 0.6 and xor_score - raw_text_score >= 0.05)
|
|
570
|
+
or (raw_text_score < 0.01 and xor_score - raw_text_score >= 0.1)
|
|
571
|
+
)
|
|
572
|
+
):
|
|
573
|
+
heuristic_results.append("xor")
|
|
574
|
+
|
|
575
|
+
return list(dict.fromkeys(heuristic_results))
|
|
576
|
+
|
|
577
|
+
|
|
578
|
+
def _weights_for_hex_length(length: int) -> List[Tuple[str, float]]:
|
|
579
|
+
return {
|
|
580
|
+
16: [("mysql323", 1.0)],
|
|
581
|
+
32: [("md5", 0.7), ("ntlm", 0.2), ("md4", 0.1)],
|
|
582
|
+
40: [("sha1", 0.85), ("mssql2000", 0.15)],
|
|
583
|
+
56: [("sha224", 0.8), ("sha3_224", 0.2)],
|
|
584
|
+
64: [("sha256", 0.7), ("sha3_256", 0.2), ("blake2s", 0.1)],
|
|
585
|
+
96: [("sha384", 1.0)],
|
|
586
|
+
128: [("sha512", 0.7), ("sha3_512", 0.2), ("blake2b", 0.1)],
|
|
587
|
+
}.get(length, [])
|
|
588
|
+
|
|
589
|
+
|
|
590
|
+
def _normalize_percentages(items: List[Tuple[str, float]]) -> List[Tuple[str, int]]:
|
|
591
|
+
if not items:
|
|
592
|
+
return []
|
|
593
|
+
total = sum(weight for _, weight in items) or 1.0
|
|
594
|
+
raw = [(name, weight / total * 100.0) for name, weight in items]
|
|
595
|
+
rounded = [(name, int(round(pct))) for name, pct in raw]
|
|
596
|
+
diff = 100 - sum(pct for _, pct in rounded)
|
|
597
|
+
if diff != 0:
|
|
598
|
+
name, pct = rounded[0]
|
|
599
|
+
rounded[0] = (name, pct + diff)
|
|
600
|
+
return rounded
|
|
601
|
+
|
|
602
|
+
|
|
603
|
+
def detect_hash_probabilities(value: str, top: int = 3) -> List[Tuple[str, int]]:
|
|
604
|
+
text = value.strip()
|
|
605
|
+
if not text:
|
|
606
|
+
return []
|
|
607
|
+
|
|
608
|
+
if text.startswith(("$2a$", "$2b$", "$2y$")):
|
|
609
|
+
return [("bcrypt", 100)]
|
|
610
|
+
if text.startswith("$argon2"):
|
|
611
|
+
return [("argon2", 100)]
|
|
612
|
+
if text.startswith("scrypt$"):
|
|
613
|
+
return [("scrypt", 100)]
|
|
614
|
+
if text.lower().startswith("0x0100"):
|
|
615
|
+
return _normalize_percentages([("mssql2005", 0.5), ("mssql2012", 0.5)])
|
|
616
|
+
if text.startswith("md5") and len(text) == 35:
|
|
617
|
+
return [("postgres", 100)]
|
|
618
|
+
if text.startswith("*") and len(text) == 41:
|
|
619
|
+
return [("mysql41", 100)]
|
|
620
|
+
|
|
621
|
+
if not _is_hex(text):
|
|
622
|
+
return []
|
|
623
|
+
|
|
624
|
+
weights = _weights_for_hex_length(len(text))
|
|
625
|
+
if not weights:
|
|
626
|
+
return []
|
|
627
|
+
|
|
628
|
+
weights = sorted(weights, key=lambda item: item[1], reverse=True)[:top]
|
|
629
|
+
return _normalize_percentages(weights)
|
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
from pathlib import Path
|
|
2
|
+
from typing import Optional
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
def read_text_from_file(path: str) -> str:
|
|
6
|
+
file_path = Path(path).expanduser().resolve()
|
|
7
|
+
if not file_path.exists():
|
|
8
|
+
raise ValueError(f"File not found: {path}")
|
|
9
|
+
if file_path.is_dir():
|
|
10
|
+
raise ValueError(f"Expected a file but got a directory: {path}")
|
|
11
|
+
try:
|
|
12
|
+
return file_path.read_text(encoding="utf-8")
|
|
13
|
+
except PermissionError:
|
|
14
|
+
raise ValueError(f"Permission denied for file: {path}")
|
|
15
|
+
except IsADirectoryError:
|
|
16
|
+
raise ValueError(f"Expected a file but got a directory: {path}")
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
def write_text_to_file(path: str, content: str) -> None:
|
|
20
|
+
file_path = Path(path).expanduser().resolve()
|
|
21
|
+
file_path.parent.mkdir(parents=True, exist_ok=True)
|
|
22
|
+
file_path.write_text(content, encoding="utf-8")
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
def resolve_input(text: Optional[str], file_path: Optional[str]) -> str:
|
|
26
|
+
if text:
|
|
27
|
+
return text
|
|
28
|
+
if file_path:
|
|
29
|
+
return read_text_from_file(file_path)
|
|
30
|
+
raise ValueError("Provide --text or --file")
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
import time
|
|
2
|
+
from dataclasses import dataclass
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
@dataclass
|
|
6
|
+
class RateCounter:
|
|
7
|
+
last_tick: float = 0.0
|
|
8
|
+
last_count: int = 0
|
|
9
|
+
|
|
10
|
+
def __post_init__(self) -> None:
|
|
11
|
+
if self.last_tick == 0.0:
|
|
12
|
+
self.last_tick = time.perf_counter()
|
|
13
|
+
|
|
14
|
+
def rate(self, total_count: int) -> float:
|
|
15
|
+
now = time.perf_counter()
|
|
16
|
+
delta_t = max(now - self.last_tick, 1e-9)
|
|
17
|
+
delta_c = total_count - self.last_count
|
|
18
|
+
self.last_tick = now
|
|
19
|
+
self.last_count = total_count
|
|
20
|
+
return delta_c / delta_t
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
from pathlib import Path
|
|
2
|
+
from typing import Iterable
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
def iter_wordlist(path: str) -> Iterable[str]:
|
|
6
|
+
file_path = Path(path).expanduser().resolve()
|
|
7
|
+
with file_path.open("r", encoding="utf-8", errors="ignore") as handle:
|
|
8
|
+
for line in handle:
|
|
9
|
+
word = line.strip()
|
|
10
|
+
if word:
|
|
11
|
+
yield word
|