pawpy-cli 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
pawpy/generator/gpu.py ADDED
@@ -0,0 +1,64 @@
1
+ """Optional GPU acceleration via CuPy.
2
+
3
+ When ``cupy`` is installed and ``--gpu`` is passed, rule application
4
+ is offloaded to the GPU for parallel execution.
5
+ """
6
+
7
+ from __future__ import annotations
8
+
9
+ import logging
10
+ from typing import List
11
+
12
+ logger = logging.getLogger("pawpy.generator.gpu")
13
+
14
+ _CUPY_AVAILABLE = False
15
+ try:
16
+ import cupy as cp
17
+
18
+ _CUPY_AVAILABLE = True
19
+ except ImportError:
20
+ cp = None # type: ignore[assignment]
21
+
22
+
23
+ def is_gpu_available() -> bool:
24
+ """Check whether CuPy (and thus GPU acceleration) is available."""
25
+ return _CUPY_AVAILABLE
26
+
27
+
28
+ def gpu_apply_rules(words: List[str], rules: List[str]) -> List[str]:
29
+ """Apply hashcat rules on the GPU using CuPy.
30
+
31
+ This is an experimental feature. It encodes words and rules into
32
+ GPU arrays and applies character-level transformations in parallel.
33
+
34
+ Args:
35
+ words: List of base words.
36
+ rules: List of hashcat-style rule strings.
37
+
38
+ Returns:
39
+ List of mutated words from GPU computation.
40
+ """
41
+ if not _CUPY_AVAILABLE:
42
+ logger.warning("CuPy not installed. Falling back to CPU.")
43
+ return []
44
+
45
+ logger.info(
46
+ "GPU acceleration: processing %d words with %d rules", len(words), len(rules)
47
+ )
48
+
49
+ # For now, this is a placeholder that demonstrates the interface.
50
+ # A full GPU implementation would:
51
+ # 1. Encode all words into a fixed-width character array on GPU
52
+ # 2. Encode rules into operation arrays
53
+ # 3. Use CuPy kernels to apply rules in parallel
54
+ # 4. Transfer results back to CPU
55
+
56
+ # Placeholder: fall back to CPU rule application
57
+ from pawpy.mutations.mangle import apply_hashcat_rules
58
+
59
+ all_results = set()
60
+ for word in words:
61
+ all_results.update(apply_hashcat_rules(word, rules))
62
+
63
+ logger.info("GPU mode complete (CPU fallback): %d candidates", len(all_results))
64
+ return sorted(all_results)
@@ -0,0 +1,99 @@
1
+ """Hybrid attack mode – combine base words with mask patterns.
2
+
3
+ Simulates hashcat -a 6 (word + right mask) and -a 7 (left mask + word).
4
+ """
5
+
6
+ from __future__ import annotations
7
+
8
+ import logging
9
+ import string
10
+ from itertools import product
11
+ from typing import List, Optional
12
+
13
+ logger = logging.getLogger("pawpy.generator.hybrid")
14
+
15
+ # Mask character mapping
16
+ _MASK_MAP = {
17
+ "?l": string.ascii_lowercase,
18
+ "?u": string.ascii_uppercase,
19
+ "?d": string.digits,
20
+ "?s": "!@#$%^&*()_+-=[]{}|;':\",./<>?`~",
21
+ "?a": string.ascii_letters + string.digits + "!@#$%^&*()_+-=[]{}|;':\",./<>?`~",
22
+ }
23
+
24
+
25
+ def _parse_mask(mask: str) -> List[str]:
26
+ """Parse a hashcat-style mask string into a list of character sets.
27
+
28
+ Supports: ?l, ?u, ?d, ?s, ?a, and literal characters.
29
+ """
30
+ result = []
31
+ i = 0
32
+ while i < len(mask):
33
+ if mask[i] == "?" and i + 1 < len(mask):
34
+ token = mask[i : i + 2]
35
+ if token in _MASK_MAP:
36
+ result.append(_MASK_MAP[token])
37
+ i += 2
38
+ continue
39
+ # Literal character
40
+ result.append([mask[i]])
41
+ i += 1
42
+ return result
43
+
44
+
45
+ def _expand_mask(mask_parts: List[List[str]], max_results: int = 100_000) -> List[str]:
46
+ """Expand a parsed mask into all possible combinations, with a safety cap."""
47
+ # Calculate total combinations
48
+ total = 1
49
+ for part in mask_parts:
50
+ total *= len(part)
51
+ if total > max_results:
52
+ logger.warning(
53
+ "Mask produces %d combinations (capped to %d). "
54
+ "Consider using a shorter mask.",
55
+ total,
56
+ max_results,
57
+ )
58
+ break
59
+
60
+ results = []
61
+ for combo in product(*mask_parts):
62
+ results.append("".join(combo))
63
+ if len(results) >= max_results:
64
+ break
65
+ return results
66
+
67
+
68
+ def hybrid_generate(
69
+ words: List[str],
70
+ left_mask: Optional[str] = None,
71
+ right_mask: Optional[str] = None,
72
+ ) -> List[str]:
73
+ """Generate hybrid attack candidates.
74
+
75
+ Args:
76
+ words: Base word list.
77
+ left_mask: Hashcat-style mask to prepend (simulates -a 7).
78
+ right_mask: Hashcat-style mask to append (simulates -a 6).
79
+
80
+ Returns:
81
+ List of word+mask combinations.
82
+ """
83
+ results = []
84
+
85
+ if right_mask:
86
+ mask_parts = _parse_mask(right_mask)
87
+ mask_vals = _expand_mask(mask_parts, max_results=1_000)
88
+ for word in words:
89
+ for mv in mask_vals:
90
+ results.append(f"{word}{mv}")
91
+
92
+ if left_mask:
93
+ mask_parts = _parse_mask(left_mask)
94
+ mask_vals = _expand_mask(mask_parts, max_results=1_000)
95
+ for word in words:
96
+ for mv in mask_vals:
97
+ results.append(f"{mv}{word}")
98
+
99
+ return results
@@ -0,0 +1,136 @@
1
+ """Billion-scale external merge sort for wordlists.
2
+
3
+ When the candidate set exceeds available memory, this module streams
4
+ candidates to temporary sorted chunks and then merge-sorts them into
5
+ the final output.
6
+ """
7
+
8
+ from __future__ import annotations
9
+
10
+ import heapq
11
+ import logging
12
+ import os
13
+ import tempfile
14
+ from typing import Generator, List
15
+
16
+ logger = logging.getLogger("pawpy.generator.sorter")
17
+
18
+
19
+ def _sort_and_write_chunk(lines: List[str], chunk_path: str) -> None:
20
+ """Sort a list of lines and write to a temp file."""
21
+ lines.sort()
22
+ with open(chunk_path, "w", encoding="utf-8", errors="ignore") as fh:
23
+ for line in lines:
24
+ fh.write(line + "\n")
25
+
26
+
27
+ def external_merge_sort(
28
+ lines: Generator[str, None, None],
29
+ output_path: str,
30
+ memory_threshold: int = 500_000_000,
31
+ chunk_line_count: int = 5_000_000,
32
+ ) -> int:
33
+ """Sort and deduplicate a stream of lines using external merge sort.
34
+
35
+ When the in-memory buffer exceeds *memory_threshold* bytes, the buffer
36
+ is sorted, deduplicated, and flushed to a temporary file. After all
37
+ input is consumed, the sorted chunks are merged.
38
+
39
+ Args:
40
+ lines: Generator yielding candidate password strings.
41
+ output_path: Final output file path.
42
+ memory_threshold: Approximate memory limit in bytes.
43
+ chunk_line_count: Maximum lines per sorted chunk.
44
+
45
+ Returns:
46
+ Number of unique lines written.
47
+ """
48
+ buffer: List[str] = []
49
+ chunk_files: List[str] = []
50
+ seen_in_buffer: set = set()
51
+ total_written = 0
52
+ est_size = 0
53
+
54
+ def flush_buffer():
55
+ nonlocal buffer, seen_in_buffer, est_size
56
+ if not buffer:
57
+ return
58
+ _, chunk_path = tempfile.mkstemp(suffix=".chunk", prefix="pawpy_")
59
+ _sort_and_write_chunk(buffer, chunk_path)
60
+ chunk_files.append(chunk_path)
61
+ logger.info(
62
+ "Flushed chunk %d: %d lines -> %s",
63
+ len(chunk_files),
64
+ len(buffer),
65
+ chunk_path,
66
+ )
67
+ buffer = []
68
+ seen_in_buffer = set()
69
+ est_size = 0
70
+
71
+ for line in lines:
72
+ line = line.strip()
73
+ if not line:
74
+ continue
75
+
76
+ if line not in seen_in_buffer:
77
+ buffer.append(line)
78
+ seen_in_buffer.add(line)
79
+ est_size += len(line.encode("utf-8")) + 1
80
+
81
+ if est_size >= memory_threshold or len(buffer) >= chunk_line_count:
82
+ flush_buffer()
83
+
84
+ # Flush remaining buffer
85
+ flush_buffer()
86
+
87
+ if not chunk_files:
88
+ # Everything fit in memory – just write sorted output
89
+ buffer.sort()
90
+ with open(output_path, "w", encoding="utf-8") as fh:
91
+ prev = None
92
+ for line in buffer:
93
+ if line != prev:
94
+ fh.write(line + "\n")
95
+ total_written += 1
96
+ prev = line
97
+ return total_written
98
+
99
+ # K-way merge using heapq
100
+ file_handles = []
101
+ try:
102
+ for path in chunk_files:
103
+ fh = open(path, "r", encoding="utf-8", errors="ignore")
104
+ first_line = fh.readline().strip()
105
+ if first_line:
106
+ file_handles.append((first_line, fh))
107
+
108
+ heapq.heapify(file_handles)
109
+
110
+ with open(output_path, "w", encoding="utf-8") as out_fh:
111
+ prev = None
112
+ while file_handles:
113
+ line, fh = heapq.heappop(file_handles)
114
+ if line != prev:
115
+ out_fh.write(line + "\n")
116
+ total_written += 1
117
+ prev = line
118
+ next_line = fh.readline().strip()
119
+ if next_line:
120
+ heapq.heappush(file_handles, (next_line, fh))
121
+ else:
122
+ fh.close()
123
+ finally:
124
+ # Clean up temp files
125
+ for _, fh in file_handles:
126
+ try:
127
+ fh.close()
128
+ except Exception:
129
+ pass
130
+ for path in chunk_files:
131
+ try:
132
+ os.unlink(path)
133
+ except Exception:
134
+ pass
135
+
136
+ return total_written
@@ -0,0 +1,20 @@
1
+ """Mutation engine – transforms base words into password candidates."""
2
+
3
+ from pawpy.mutations.dates import date_permutations
4
+ from pawpy.mutations.keyboard import dynamic_keyboard_walks, static_keyboard_walks
5
+ from pawpy.mutations.leet import leet_speak
6
+ from pawpy.mutations.mangle import apply_hashcat_rules, mangle_rules
7
+ from pawpy.mutations.markov import generate_markov_words, train_markov
8
+ from pawpy.mutations.templates import expand_templates
9
+
10
+ __all__ = [
11
+ "leet_speak",
12
+ "date_permutations",
13
+ "mangle_rules",
14
+ "apply_hashcat_rules",
15
+ "static_keyboard_walks",
16
+ "dynamic_keyboard_walks",
17
+ "train_markov",
18
+ "generate_markov_words",
19
+ "expand_templates",
20
+ ]
@@ -0,0 +1,72 @@
1
+ """Date permutation engine.
2
+
3
+ Takes dates in DDMMYYYY format and produces many common sub-string
4
+ variants: day, month, year, two-digit year, reversed, and combinations.
5
+ """
6
+
7
+ from __future__ import annotations
8
+
9
+ from datetime import datetime
10
+ from typing import List, Optional
11
+
12
+
13
+ def _parse_date(date_str: str) -> Optional[datetime]:
14
+ """Parse a DDMMYYYY string into a datetime. Returns None on failure."""
15
+ date_str = date_str.strip()
16
+ if len(date_str) != 8 or not date_str.isdigit():
17
+ return None
18
+ try:
19
+ return datetime.strptime(date_str, "%d%m%Y")
20
+ except ValueError:
21
+ return None
22
+
23
+
24
+ def date_permutations(date_str: str) -> List[str]:
25
+ """Generate date-based password fragments from a DDMMYYYY date string.
26
+
27
+ Produces: day, month, year (4-digit), year (2-digit), DDMM, MMDD,
28
+ DDMMYY, MMDDYY, YYYYMMDD, DD/MM/YYYY, MM/DD/YYYY, YYYY,
29
+ and reverse forms.
30
+ """
31
+ dt = _parse_date(date_str)
32
+ if dt is None:
33
+ return []
34
+
35
+ d = f"{dt.day:02d}"
36
+ m = f"{dt.month:02d}"
37
+ y4 = str(dt.year)
38
+ y2 = y4[-2:]
39
+
40
+ parts = [
41
+ d,
42
+ m,
43
+ y4,
44
+ y2,
45
+ f"{d}{m}",
46
+ f"{m}{d}",
47
+ f"{d}{m}{y2}",
48
+ f"{m}{d}{y2}",
49
+ f"{d}{m}{y4}",
50
+ f"{m}{d}{y4}",
51
+ f"{y4}{m}{d}",
52
+ f"{y4}{d}{m}",
53
+ f"{d}/{m}/{y4}",
54
+ f"{m}/{d}/{y4}",
55
+ f"{d}-{m}-{y4}",
56
+ f"{m}-{d}-{y4}",
57
+ f"{d}.{m}.{y4}",
58
+ f"{m}.{d}.{y4}",
59
+ y4,
60
+ y2,
61
+ f"{m}{d}",
62
+ f"{d}{m}",
63
+ ]
64
+
65
+ # Deduplicate while preserving order
66
+ seen = set()
67
+ result = []
68
+ for p in parts:
69
+ if p not in seen:
70
+ seen.add(p)
71
+ result.append(p)
72
+ return result
@@ -0,0 +1,99 @@
1
+ """Keyboard walk generator.
2
+
3
+ Produces password candidates from continuous keyboard walks on QWERTY
4
+ and other common layouts.
5
+ """
6
+
7
+ from __future__ import annotations
8
+
9
+ from typing import Dict, List, Set, Tuple
10
+
11
+ # QWERTY keyboard adjacency map (row, col) -> set of (row, col) neighbours
12
+ _QWERTY_LAYOUT = [
13
+ "`1234567890-=",
14
+ " qwertyuiop[]\\",
15
+ " asdfghjkl;'",
16
+ " zxcvbnm,./",
17
+ ]
18
+
19
+ # Build adjacency: each key maps to its neighbours (up to 8 directions)
20
+ _QWERTY_POS: Dict[str, Tuple[int, int]] = {}
21
+ _QWERTY_ADJ: Dict[str, List[str]] = {}
22
+
23
+ for _r, row in enumerate(_QWERTY_LAYOUT):
24
+ for _c, ch in enumerate(row):
25
+ if ch == " ":
26
+ continue
27
+ _QWERTY_POS[ch] = (_r, _c)
28
+
29
+ for _key, (_kr, _kc) in _QWERTY_POS.items():
30
+ neighbours = []
31
+ for dr in (-1, 0, 1):
32
+ for dc in (-1, 0, 1):
33
+ if dr == 0 and dc == 0:
34
+ continue
35
+ nr, nc = _kr + dr, _kc + dc
36
+ for other_key, (or_, oc) in _QWERTY_POS.items():
37
+ if or_ == nr and oc == nc:
38
+ neighbours.append(other_key)
39
+ _QWERTY_ADJ[_key] = neighbours
40
+
41
+
42
+ # Classic static keyboard walks
43
+ STATIC_WALKS = [
44
+ "qwerty",
45
+ "qwert",
46
+ "asdf",
47
+ "asdfgh",
48
+ "zxcvbn",
49
+ "zxcv",
50
+ "qazwsx",
51
+ "1qaz2wsx",
52
+ "qweasd",
53
+ "!@#$%",
54
+ "1234567890",
55
+ "0987654321",
56
+ "qwertyuiop",
57
+ "asdfghjkl",
58
+ "zxcvbnm",
59
+ "1q2w3e4r",
60
+ "q1w2e3r4",
61
+ "zaq1xsw2",
62
+ "1234qwer",
63
+ "poiuytrewq",
64
+ "lkjhgfdsa",
65
+ "mnbvcxz",
66
+ "!qaz2wsx3edc",
67
+ "1qaz!QAZ",
68
+ ]
69
+
70
+
71
+ def static_keyboard_walks() -> List[str]:
72
+ """Return the built-in list of classic keyboard walk patterns."""
73
+ return list(STATIC_WALKS)
74
+
75
+
76
+ def dynamic_keyboard_walks(min_len: int = 4, max_len: int = 8) -> List[str]:
77
+ """Generate all possible continuous keyboard walks up to *max_len*.
78
+
79
+ Uses BFS from each starting key, following adjacency relationships
80
+ on the QWERTY layout. Only walks of length >= *min_len* are returned.
81
+
82
+ Note: This can produce a very large number of candidates. For
83
+ max_len=8 the count is in the millions. Use with caution.
84
+ """
85
+ results: Set[str] = set()
86
+
87
+ for start_key in _QWERTY_POS:
88
+ # BFS
89
+ queue: List[Tuple[str, str]] = [(start_key, start_key)]
90
+ while queue:
91
+ current, walk = queue.pop(0)
92
+ if len(walk) >= min_len:
93
+ results.add(walk)
94
+ if len(walk) >= max_len:
95
+ continue
96
+ for neighbour in _QWERTY_ADJ.get(current, []):
97
+ queue.append((neighbour, walk + neighbour))
98
+
99
+ return sorted(results)
@@ -0,0 +1,65 @@
1
+ """Leet-speak mutation engine.
2
+
3
+ Supports multiple substitution tables and generates all possible
4
+ combinations of leet substitutions for a given word.
5
+ """
6
+
7
+ from __future__ import annotations
8
+
9
+ import itertools
10
+ from typing import Dict, List, Set
11
+
12
+ # Comprehensive leet substitution maps
13
+ _LEET_MAPS: List[Dict[str, List[str]]] = [
14
+ # Level 1: basic
15
+ {"a": ["@"], "e": ["3"], "i": ["1"], "o": ["0"], "s": ["$"], "t": ["7"]},
16
+ # Level 2: extended
17
+ {"a": ["@"], "e": ["3"], "i": ["1", "!"], "o": ["0"], "s": ["$", "5"], "t": ["7"]},
18
+ # Level 3: aggressive
19
+ {
20
+ "a": ["@", "4"],
21
+ "e": ["3"],
22
+ "i": ["1", "!"],
23
+ "o": ["0"],
24
+ "s": ["$", "5"],
25
+ "t": ["7"],
26
+ "l": ["1"],
27
+ "b": ["8"],
28
+ "g": ["9"],
29
+ "h": ["#"],
30
+ },
31
+ ]
32
+
33
+
34
+ def leet_speak(word: str, level: int = 2) -> List[str]:
35
+ """Generate leet-speak variations of *word*.
36
+
37
+ For each character that has substitutions, generate all combinations
38
+ of original vs. substituted forms. Returns a list of unique variants
39
+ (always includes the original word).
40
+
41
+ Args:
42
+ word: The base word to leetify.
43
+ level: Substitution table to use (1=basic, 2=extended, 3=aggressive).
44
+
45
+ Returns:
46
+ List of unique leet-speak variants.
47
+ """
48
+ level = max(1, min(level, len(_LEET_MAPS)))
49
+ table = _LEET_MAPS[level - 1]
50
+ results: Set[str] = set()
51
+
52
+ # Build a list of options per character position
53
+ char_options: List[List[str]] = []
54
+ for ch in word.lower():
55
+ subs = table.get(ch, [])
56
+ if subs:
57
+ char_options.append([ch] + subs)
58
+ else:
59
+ char_options.append([ch])
60
+
61
+ # Generate all combinations
62
+ for combo in itertools.product(*char_options):
63
+ results.add("".join(combo))
64
+
65
+ return sorted(results)