pawpy-cli 1.0.0b0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
pawpy/config.py ADDED
@@ -0,0 +1,60 @@
1
+ """Global configuration object for Pawpy."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import multiprocessing
6
+ from dataclasses import dataclass, field
7
+ from typing import List, Optional
8
+
9
+
10
+ @dataclass
11
+ class PawpyConfig:
12
+ """Central configuration holding every runtime setting."""
13
+
14
+ # --- I/O ---
15
+ output_file: str = "pawpy_wordlist.txt"
16
+ profile_json: Optional[str] = None # -j / --import-json
17
+ multi_json: Optional[str] = None # --multi
18
+ rule_file: Optional[str] = None # --rules
19
+ templates: List[str] = field(default_factory=list) # --template (repeatable)
20
+
21
+ # --- Hybrid masks ---
22
+ hybrid_left: Optional[str] = None # --hybrid-left
23
+ hybrid_right: Optional[str] = None # --hybrid-right
24
+
25
+ # --- Markov ---
26
+ markov: bool = False
27
+ markov_order: int = 2
28
+ markov_count: int = 5000
29
+
30
+ # --- Scoring / filtering ---
31
+ min_strength: Optional[int] = None # --min-strength (0-4)
32
+ min_length: Optional[int] = None
33
+ require_upper: bool = False
34
+ require_lower: bool = False
35
+ require_digit: bool = False
36
+ require_special: bool = False
37
+
38
+ # --- Modes ---
39
+ lite: bool = False # --lite
40
+ extreme: bool = False # --extreme
41
+ gpu: bool = False # --gpu
42
+
43
+ # --- Performance ---
44
+ threads: int = field(default_factory=lambda: multiprocessing.cpu_count() or 4)
45
+
46
+ # --- Internal ---
47
+ memory_threshold: int = 500_000_000 # bytes — switch to external sort above this
48
+ chunk_size: int = 10_000_000 # lines per sorted chunk
49
+
50
+ @property
51
+ def is_lite(self) -> bool:
52
+ return self.lite and not self.extreme
53
+
54
+ @property
55
+ def is_extreme(self) -> bool:
56
+ return self.extreme
57
+
58
+ def effective_threads(self) -> int:
59
+ """Clamp threads to a sane range."""
60
+ return max(1, min(self.threads, multiprocessing.cpu_count() * 4))
pawpy/data/__init__.py ADDED
@@ -0,0 +1,6 @@
1
+ """Embedded data and auto-update utilities."""
2
+
3
+ from pawpy.data.common_passwords import TOP_10K, get_common_passwords
4
+ from pawpy.data.updater import update_common_passwords
5
+
6
+ __all__ = ["get_common_passwords", "TOP_10K", "update_common_passwords"]
@@ -0,0 +1,139 @@
1
+ """Embedded top common passwords list.
2
+
3
+ This module contains a curated list of the most common passwords derived
4
+ from publicly available breach data (SecLists). The full 10,000-entry
5
+ list can be downloaded via ``pawpy update-passwords`` which fetches the
6
+ latest version from the SecLists repository.
7
+
8
+ Only the top 100 entries are embedded here to keep the package size
9
+ reasonable. The updater stores the full list alongside this file.
10
+ """
11
+
12
+ from __future__ import annotations
13
+
14
+ from pathlib import Path
15
+ from typing import List
16
+
17
+ # Top 100 most common passwords (from public breach analyses)
18
+ TOP_10K: List[str] = [
19
+ "123456",
20
+ "password",
21
+ "12345678",
22
+ "qwerty",
23
+ "123456789",
24
+ "12345",
25
+ "1234",
26
+ "111111",
27
+ "1234567",
28
+ "dragon",
29
+ "123123",
30
+ "baseball",
31
+ "abc123",
32
+ "football",
33
+ "monkey",
34
+ "letmein",
35
+ "696969",
36
+ "shadow",
37
+ "master",
38
+ "666666",
39
+ "qwertyuiop",
40
+ "123321",
41
+ "mustang",
42
+ "1234567890",
43
+ "michael",
44
+ "654321",
45
+ "pussy",
46
+ "superman",
47
+ "1qaz2wsx",
48
+ "7777777",
49
+ "fuckyou",
50
+ "121212",
51
+ "000000",
52
+ "qazwsx",
53
+ "123qwe",
54
+ "killer",
55
+ "trustno1",
56
+ "jordan",
57
+ "jennifer",
58
+ "zxcvbnm",
59
+ "asdfgh",
60
+ "hunter",
61
+ "buster",
62
+ "soccer",
63
+ "harley",
64
+ "batman",
65
+ "andrew",
66
+ "tigger",
67
+ "sunshine",
68
+ "iloveyou",
69
+ "fuckme",
70
+ "2000",
71
+ "charlie",
72
+ "robert",
73
+ "thomas",
74
+ "hockey",
75
+ "ranger",
76
+ "daniel",
77
+ "starwars",
78
+ "klaster",
79
+ "112233",
80
+ "george",
81
+ "asshole",
82
+ "computer",
83
+ "michelle",
84
+ "jessica",
85
+ "pepper",
86
+ "1111",
87
+ "zxcvbn",
88
+ "555555",
89
+ "131313",
90
+ "freedom",
91
+ "777777",
92
+ "pass",
93
+ "fuck",
94
+ "maggie",
95
+ "159753",
96
+ "aaaaaa",
97
+ "ginger",
98
+ "princess",
99
+ "joshua",
100
+ "cheese",
101
+ "amanda",
102
+ "summer",
103
+ "love",
104
+ "ashley",
105
+ "6969",
106
+ "nicole",
107
+ "chelsea",
108
+ "biteme",
109
+ "matthew",
110
+ "access",
111
+ "yankees",
112
+ "987654321",
113
+ "dallas",
114
+ "austin",
115
+ "thunder",
116
+ "taylor",
117
+ "matrix",
118
+ ]
119
+
120
+ # Path where the updater stores the full SecLists top passwords
121
+ _SECLISTS_CACHE = Path(__file__).parent / "_seclists_top10k.txt"
122
+
123
+
124
+ def get_common_passwords() -> List[str]:
125
+ """Return the common passwords list.
126
+
127
+ If the full SecLists file has been downloaded (via ``pawpy update-passwords``),
128
+ it is used instead of the embedded top-100.
129
+ """
130
+ if _SECLISTS_CACHE.exists():
131
+ passwords = []
132
+ with open(_SECLISTS_CACHE, "r", encoding="utf-8", errors="ignore") as fh:
133
+ for line in fh:
134
+ stripped = line.strip()
135
+ if stripped:
136
+ passwords.append(stripped)
137
+ if passwords:
138
+ return passwords
139
+ return TOP_10K
pawpy/data/updater.py ADDED
@@ -0,0 +1,49 @@
1
+ """Auto-update utility for common password lists.
2
+
3
+ Downloads the latest top passwords from the SecLists repository.
4
+ """
5
+
6
+ from __future__ import annotations
7
+
8
+ import logging
9
+ import urllib.request
10
+ from pathlib import Path
11
+ from typing import Optional
12
+
13
+ logger = logging.getLogger("pawpy.updater")
14
+
15
+ _SECLISTS_URL = (
16
+ "https://raw.githubusercontent.com/danielmiessler/SecLists/"
17
+ "master/Passwords/Common-Credentials/10k-most-common.txt"
18
+ )
19
+ _CACHE_PATH = Path(__file__).parent / "_seclists_top10k.txt"
20
+
21
+
22
+ def update_common_passwords(
23
+ url: Optional[str] = None, output: Optional[str] = None
24
+ ) -> str:
25
+ """Download the latest common passwords list from SecLists.
26
+
27
+ Args:
28
+ url: Override URL (default: SecLists 10k-most-common.txt).
29
+ output: Override output path (default: alongside this module).
30
+
31
+ Returns:
32
+ Path to the downloaded file.
33
+ """
34
+ url = url or _SECLISTS_URL
35
+ output_path = Path(output) if output else _CACHE_PATH
36
+
37
+ logger.info("Downloading common passwords from: %s", url)
38
+ try:
39
+ urllib.request.urlretrieve(url, output_path)
40
+ # Count lines
41
+ count = 0
42
+ with open(output_path, "r", encoding="utf-8", errors="ignore") as fh:
43
+ for _ in fh:
44
+ count += 1
45
+ logger.info("Downloaded %d passwords to %s", count, output_path)
46
+ return str(output_path)
47
+ except Exception as e:
48
+ logger.error("Failed to download: %s", e)
49
+ raise
@@ -0,0 +1 @@
1
+ """Password policy filtering subsystem."""
@@ -0,0 +1,59 @@
1
+ """Password complexity policy filter.
2
+
3
+ Enforces common password policy requirements such as minimum length
4
+ and character class requirements.
5
+ """
6
+
7
+ from __future__ import annotations
8
+
9
+ import re
10
+ from typing import Optional
11
+
12
+
13
+ class PolicyFilter:
14
+ """Check whether a password meets a given complexity policy.
15
+
16
+ All checks are optional – only enabled checks are enforced.
17
+ """
18
+
19
+ def __init__(
20
+ self,
21
+ min_length: Optional[int] = None,
22
+ require_upper: bool = False,
23
+ require_lower: bool = False,
24
+ require_digit: bool = False,
25
+ require_special: bool = False,
26
+ ) -> None:
27
+ self.min_length = min_length
28
+ self.require_upper = require_upper
29
+ self.require_lower = require_lower
30
+ self.require_digit = require_digit
31
+ self.require_special = require_special
32
+
33
+ def check(self, password: str) -> bool:
34
+ """Return True if *password* passes all enabled policy checks."""
35
+ if self.min_length is not None and len(password) < self.min_length:
36
+ return False
37
+ if self.require_upper and not re.search(r"[A-Z]", password):
38
+ return False
39
+ if self.require_lower and not re.search(r"[a-z]", password):
40
+ return False
41
+ if self.require_digit and not re.search(r"[0-9]", password):
42
+ return False
43
+ if self.require_special and not re.search(r"[^A-Za-z0-9]", password):
44
+ return False
45
+ return True
46
+
47
+ def __repr__(self) -> str:
48
+ checks = []
49
+ if self.min_length:
50
+ checks.append(f"min_len={self.min_length}")
51
+ if self.require_upper:
52
+ checks.append("upper")
53
+ if self.require_lower:
54
+ checks.append("lower")
55
+ if self.require_digit:
56
+ checks.append("digit")
57
+ if self.require_special:
58
+ checks.append("special")
59
+ return f"PolicyFilter({', '.join(checks)})"
@@ -0,0 +1,5 @@
1
+ """Password generation pipeline."""
2
+
3
+ from pawpy.generator.core import PipelineOrchestrator
4
+
5
+ __all__ = ["PipelineOrchestrator"]
@@ -0,0 +1,314 @@
1
+ """Main pipeline orchestrator – coordinates all mutation and output stages."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import logging
6
+ import os
7
+ from datetime import datetime
8
+ from typing import Any, Dict, List, Optional, Set
9
+
10
+ from rich.console import Console
11
+
12
+ from pawpy.config import PawpyConfig
13
+ from pawpy.generator.hybrid import hybrid_generate
14
+ from pawpy.mutations.dates import date_permutations
15
+ from pawpy.mutations.keyboard import dynamic_keyboard_walks, static_keyboard_walks
16
+ from pawpy.mutations.leet import leet_speak
17
+ from pawpy.mutations.mangle import apply_hashcat_rules, load_rules_file, mangle_rules
18
+ from pawpy.mutations.markov import generate_markov_words
19
+ from pawpy.mutations.templates import expand_templates
20
+ from pawpy.utils import make_progress
21
+
22
+ console = Console()
23
+ logger = logging.getLogger("pawpy.generator")
24
+
25
+
26
+ class PipelineOrchestrator:
27
+ """Orchestrates the full password generation pipeline.
28
+
29
+ Stages:
30
+ 1. Collect base words + dates from profile
31
+ 2. Generate mutations (leet, mangle, dates, keyboard, templates, rules, Markov, hybrid, year blends)
32
+ 3. Merge common passwords
33
+ 4. Optional scoring & policy filtering
34
+ 5. Sort and deduplicate
35
+ 6. Write output
36
+ """
37
+
38
+ def __init__(self, config: PawpyConfig, profile: Dict[str, Any]) -> None:
39
+ self.config = config
40
+ self.profile = profile
41
+ self._candidates: Set[str] = set()
42
+ self._temp_dir: Optional[str] = None
43
+
44
+ def _get_base_words(self) -> List[str]:
45
+ """Extract base words from the profile."""
46
+ # Handle both single-profile and merged multi-profile formats
47
+ list_fields = {"children", "keywords"}
48
+ words: Set[str] = set()
49
+ for key, value in self.profile.items():
50
+ if key in list_fields:
51
+ if isinstance(value, list):
52
+ for item in value:
53
+ if isinstance(item, str) and item.strip():
54
+ words.add(item.strip().lower())
55
+ elif isinstance(value, str):
56
+ for item in value.split(","):
57
+ if item.strip():
58
+ words.add(item.strip().lower())
59
+ else:
60
+ if isinstance(value, str) and value.strip():
61
+ words.add(value.strip().lower())
62
+ elif isinstance(value, list):
63
+ for item in value:
64
+ if isinstance(item, str) and item.strip():
65
+ words.add(item.strip().lower())
66
+ return sorted(words)
67
+
68
+ def _get_dates(self) -> List[str]:
69
+ """Extract date strings from the profile."""
70
+ dates = set()
71
+ for field in ("birthdate", "partner_bdate"):
72
+ val = self.profile.get(field, "")
73
+ if isinstance(val, str) and val.strip():
74
+ dates.add(val.strip())
75
+ elif isinstance(val, list):
76
+ for d in val:
77
+ if isinstance(d, str) and d.strip():
78
+ dates.add(d.strip())
79
+ return sorted(dates)
80
+
81
+ def _add_candidates(self, candidates: List[str]) -> int:
82
+ """Add candidates to the in-memory set, switching to disk if needed."""
83
+ new = 0
84
+ for c in candidates:
85
+ if c and c not in self._candidates:
86
+ self._candidates.add(c)
87
+ new += 1
88
+ return new
89
+
90
+ def _year_blend(self, word: str) -> List[str]:
91
+ """Append/prepend years 1990..current year to a word."""
92
+ current_year = datetime.now().year
93
+ results = []
94
+ for year in range(1990, current_year + 1):
95
+ results.append(f"{word}{year}")
96
+ results.append(f"{word}{str(year)[-2:]}")
97
+ results.append(f"{year}{word}")
98
+ results.append(f"{str(year)[-2:]}{word}")
99
+ return results
100
+
101
+ def _two_word_combos(self, words: List[str]) -> List[str]:
102
+ """Generate two-word combinations with separators."""
103
+ if len(words) > 30:
104
+ words = words[:30] # cap to prevent explosion
105
+ separators = ["", "_", "-", ".", "@", "#", "!", " "]
106
+ results = []
107
+ for i, w1 in enumerate(words):
108
+ for w2 in words[i + 1 :]:
109
+ for sep in separators:
110
+ results.append(f"{w1}{sep}{w2}")
111
+ results.append(f"{w1}{sep}{w2.capitalize()}")
112
+ results.append(f"{w1.capitalize()}{sep}{w2}")
113
+ results.append(f"{w1.capitalize()}{sep}{w2.capitalize()}")
114
+ return results
115
+
116
+ def run(self) -> str:
117
+ """Execute the full pipeline. Returns the output file path."""
118
+ progress = make_progress()
119
+ total_added = 0
120
+
121
+ # --- Stage 1: Base words ---
122
+ base_words = self._get_base_words()
123
+ dates = self._get_dates()
124
+ logger.info("Base words: %d, Dates: %d", len(base_words), len(dates))
125
+
126
+ task_base = progress.add_task("[cyan]Collecting base words...", total=None)
127
+ self._add_candidates(base_words)
128
+ self._add_candidates([w.capitalize() for w in base_words])
129
+ self._add_candidates([w.upper() for w in base_words])
130
+ total_added += len(self._candidates)
131
+ progress.update(task_base, completed=True)
132
+
133
+ # --- Stage 2: Date permutations ---
134
+ task_dates = progress.add_task("[cyan]Date permutations...", total=None)
135
+ date_frags = []
136
+ for d in dates:
137
+ date_frags.extend(date_permutations(d))
138
+ self._add_candidates(date_frags)
139
+ # Combine dates with base words
140
+ for word in base_words:
141
+ for frag in date_frags[:20]: # limit to prevent explosion
142
+ self._candidates.add(f"{word}{frag}")
143
+ self._candidates.add(f"{frag}{word}")
144
+ progress.update(task_dates, completed=True)
145
+
146
+ # --- Stage 3: Leet speak ---
147
+
148
+ leet_variants = []
149
+ for word in base_words:
150
+ leet_variants.extend(leet_speak(word, level=2))
151
+ self._add_candidates(leet_variants)
152
+ progress.update(task_dates, completed=True)
153
+
154
+ # --- Stage 4: Common mangle rules ---
155
+ task_mangle = progress.add_task("[cyan]Common mangle rules...", total=None)
156
+ mangled = []
157
+ for word in base_words:
158
+ mangled.extend(mangle_rules(word))
159
+ self._add_candidates(mangled)
160
+ progress.update(task_mangle, completed=True)
161
+
162
+ # --- Stage 5: Keyboard walks ---
163
+ task_kb = progress.add_task("[cyan]Keyboard walks...", total=None)
164
+ self._add_candidates(static_keyboard_walks())
165
+ if self.config.is_extreme and not self.config.is_lite:
166
+ walks = dynamic_keyboard_walks(min_len=4, max_len=6)
167
+ self._add_candidates(walks)
168
+ progress.update(task_kb, completed=True)
169
+
170
+ # --- Stage 6: Hashcat rules ---
171
+ if self.config.rule_file:
172
+ task_rules = progress.add_task(
173
+ "[cyan]Applying hashcat rules...", total=None
174
+ )
175
+ rules = load_rules_file(self.config.rule_file)
176
+ for word in base_words:
177
+ self._add_candidates(apply_hashcat_rules(word, rules))
178
+ progress.update(task_rules, completed=True)
179
+
180
+ # --- Stage 7: Custom templates ---
181
+ if self.config.templates:
182
+ task_tmpl = progress.add_task("[cyan]Expanding templates...", total=None)
183
+ expanded = expand_templates(self.config.templates, self.profile)
184
+ self._add_candidates(expanded)
185
+ progress.update(task_tmpl, completed=True)
186
+
187
+ # --- Stage 8: Markov blending ---
188
+ if self.config.markov and not self.config.is_lite:
189
+ task_markov = progress.add_task("[cyan]Markov blending...", total=None)
190
+ # Use base words + common passwords as training corpus
191
+ from pawpy.data.common_passwords import TOP_10K
192
+
193
+ corpus = base_words + TOP_10K[:2000]
194
+ markov_words = generate_markov_words(
195
+ corpus,
196
+ count=self.config.markov_count,
197
+ order=self.config.markov_order,
198
+ )
199
+ self._add_candidates(markov_words)
200
+ progress.update(task_markov, completed=True)
201
+
202
+ # --- Stage 9: Year-word blends ---
203
+ if self.config.is_extreme or not self.config.is_lite:
204
+ task_years = progress.add_task("[cyan]Year-word blends...", total=None)
205
+ year_blends = []
206
+ for word in base_words:
207
+ year_blends.extend(self._year_blend(word))
208
+ self._add_candidates(year_blends)
209
+ progress.update(task_years, completed=True)
210
+
211
+ # --- Stage 10: Two-word combinations ---
212
+ if not self.config.is_lite:
213
+ task_combo = progress.add_task("[cyan]Two-word combinations...", total=None)
214
+ combos = self._two_word_combos(base_words)
215
+ self._add_candidates(combos)
216
+ progress.update(task_combo, completed=True)
217
+
218
+ # --- Stage 11: Hybrid attacks ---
219
+ if self.config.hybrid_left or self.config.hybrid_right:
220
+ task_hybrid = progress.add_task("[cyan]Hybrid mask attacks...", total=None)
221
+ hybrid_cands = hybrid_generate(
222
+ base_words,
223
+ left_mask=self.config.hybrid_left,
224
+ right_mask=self.config.hybrid_right,
225
+ )
226
+ self._add_candidates(hybrid_cands)
227
+ progress.update(task_hybrid, completed=True)
228
+
229
+ # --- Stage 12: Common passwords ---
230
+ task_common = progress.add_task("[cyan]Merging common passwords...", total=None)
231
+ from pawpy.data.common_passwords import TOP_10K
232
+
233
+ self._add_candidates(TOP_10K)
234
+ progress.update(task_common, completed=True)
235
+
236
+ progress.stop()
237
+ console.print(
238
+ f"\n[green]✓[/green] Total candidates before filtering: [bold]{len(self._candidates):,}[/bold]"
239
+ )
240
+
241
+ # --- Stage 13: Scoring & Filtering ---
242
+ candidates_list = list(self._candidates)
243
+
244
+ if self.config.min_strength is not None:
245
+ from pawpy.scoring.scorer import score_and_prune
246
+
247
+ task_score = progress.add_task("[cyan]Scoring passwords...", total=None)
248
+ candidates_list = score_and_prune(candidates_list, self.config.min_strength)
249
+ progress.update(task_score, completed=True)
250
+ progress.stop()
251
+ console.print(
252
+ f"[green]✓[/green] After scoring: [bold]{len(candidates_list):,}[/bold]"
253
+ )
254
+
255
+ if any(
256
+ [
257
+ self.config.min_length,
258
+ self.config.require_upper,
259
+ self.config.require_lower,
260
+ self.config.require_digit,
261
+ self.config.require_special,
262
+ ]
263
+ ):
264
+ from pawpy.filters.policy import PolicyFilter
265
+
266
+ pf = PolicyFilter(
267
+ min_length=self.config.min_length,
268
+ require_upper=self.config.require_upper,
269
+ require_lower=self.config.require_lower,
270
+ require_digit=self.config.require_digit,
271
+ require_special=self.config.require_special,
272
+ )
273
+ task_policy = progress.add_task(
274
+ "[cyan]Applying policy filter...", total=None
275
+ )
276
+ candidates_list = [c for c in candidates_list if pf.check(c)]
277
+ progress.update(task_policy, completed=True)
278
+ progress.stop()
279
+ console.print(
280
+ f"[green]✓[/green] After policy filter: [bold]{len(candidates_list):,}[/bold]"
281
+ )
282
+
283
+ # --- Stage 14: Sort & Dedup ---
284
+ candidates_list.sort()
285
+ # Remove duplicates (shouldn't be needed but safety)
286
+ seen = set()
287
+ unique = []
288
+ for c in candidates_list:
289
+ if c not in seen:
290
+ seen.add(c)
291
+ unique.append(c)
292
+ candidates_list = unique
293
+
294
+ # --- Stage 15: Write output ---
295
+ output_path = self.config.output_file
296
+ task_write = progress.add_task(
297
+ "[cyan]Writing wordlist...", total=len(candidates_list)
298
+ )
299
+ with open(output_path, "w", encoding="utf-8") as fh:
300
+ for i, word in enumerate(candidates_list):
301
+ fh.write(word + "\n")
302
+ if i % 100_000 == 0:
303
+ progress.update(task_write, completed=i)
304
+ progress.update(task_write, completed=len(candidates_list))
305
+ progress.stop()
306
+
307
+ size_mb = os.path.getsize(output_path) / (1024 * 1024)
308
+ console.print(
309
+ f"\n[bold green]✓ Wordlist generated![/bold green]\n"
310
+ f" File: [cyan]{output_path}[/cyan]\n"
311
+ f" Entries: [bold]{len(candidates_list):,}[/bold]\n"
312
+ f" Size: [bold]{size_mb:.2f} MB[/bold]\n"
313
+ )
314
+ return output_path