pointblank 0.19.0__py3-none-any.whl → 0.20.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pointblank/__init__.py +44 -1
- pointblank/_utils_llms_txt.py +20 -0
- pointblank/data/api-docs.txt +793 -1
- pointblank/field.py +1507 -0
- pointblank/generate/__init__.py +17 -0
- pointblank/generate/base.py +49 -0
- pointblank/generate/generators.py +573 -0
- pointblank/generate/regex.py +217 -0
- pointblank/locales/__init__.py +1476 -0
- pointblank/locales/data/AR/address.json +73 -0
- pointblank/locales/data/AR/company.json +60 -0
- pointblank/locales/data/AR/internet.json +19 -0
- pointblank/locales/data/AR/misc.json +7 -0
- pointblank/locales/data/AR/person.json +39 -0
- pointblank/locales/data/AR/text.json +38 -0
- pointblank/locales/data/AT/address.json +84 -0
- pointblank/locales/data/AT/company.json +65 -0
- pointblank/locales/data/AT/internet.json +20 -0
- pointblank/locales/data/AT/misc.json +8 -0
- pointblank/locales/data/AT/person.json +17 -0
- pointblank/locales/data/AT/text.json +35 -0
- pointblank/locales/data/AU/address.json +83 -0
- pointblank/locales/data/AU/company.json +65 -0
- pointblank/locales/data/AU/internet.json +20 -0
- pointblank/locales/data/AU/misc.json +8 -0
- pointblank/locales/data/AU/person.json +17 -0
- pointblank/locales/data/AU/text.json +35 -0
- pointblank/locales/data/BE/address.json +225 -0
- pointblank/locales/data/BE/company.json +129 -0
- pointblank/locales/data/BE/internet.json +36 -0
- pointblank/locales/data/BE/misc.json +6 -0
- pointblank/locales/data/BE/person.json +62 -0
- pointblank/locales/data/BE/text.json +38 -0
- pointblank/locales/data/BG/address.json +75 -0
- pointblank/locales/data/BG/company.json +60 -0
- pointblank/locales/data/BG/internet.json +19 -0
- pointblank/locales/data/BG/misc.json +7 -0
- pointblank/locales/data/BG/person.json +40 -0
- pointblank/locales/data/BG/text.json +38 -0
- pointblank/locales/data/BR/address.json +98 -0
- pointblank/locales/data/BR/company.json +65 -0
- pointblank/locales/data/BR/internet.json +20 -0
- pointblank/locales/data/BR/misc.json +8 -0
- pointblank/locales/data/BR/person.json +17 -0
- pointblank/locales/data/BR/text.json +35 -0
- pointblank/locales/data/CA/address.json +747 -0
- pointblank/locales/data/CA/company.json +120 -0
- pointblank/locales/data/CA/internet.json +24 -0
- pointblank/locales/data/CA/misc.json +11 -0
- pointblank/locales/data/CA/person.json +1033 -0
- pointblank/locales/data/CA/text.json +58 -0
- pointblank/locales/data/CH/address.json +184 -0
- pointblank/locales/data/CH/company.json +112 -0
- pointblank/locales/data/CH/internet.json +20 -0
- pointblank/locales/data/CH/misc.json +10 -0
- pointblank/locales/data/CH/person.json +64 -0
- pointblank/locales/data/CH/text.json +45 -0
- pointblank/locales/data/CL/address.json +71 -0
- pointblank/locales/data/CL/company.json +60 -0
- pointblank/locales/data/CL/internet.json +19 -0
- pointblank/locales/data/CL/misc.json +7 -0
- pointblank/locales/data/CL/person.json +38 -0
- pointblank/locales/data/CL/text.json +38 -0
- pointblank/locales/data/CN/address.json +124 -0
- pointblank/locales/data/CN/company.json +76 -0
- pointblank/locales/data/CN/internet.json +20 -0
- pointblank/locales/data/CN/misc.json +8 -0
- pointblank/locales/data/CN/person.json +50 -0
- pointblank/locales/data/CN/text.json +38 -0
- pointblank/locales/data/CO/address.json +76 -0
- pointblank/locales/data/CO/company.json +60 -0
- pointblank/locales/data/CO/internet.json +19 -0
- pointblank/locales/data/CO/misc.json +7 -0
- pointblank/locales/data/CO/person.json +38 -0
- pointblank/locales/data/CO/text.json +38 -0
- pointblank/locales/data/CY/address.json +62 -0
- pointblank/locales/data/CY/company.json +60 -0
- pointblank/locales/data/CY/internet.json +19 -0
- pointblank/locales/data/CY/misc.json +7 -0
- pointblank/locales/data/CY/person.json +38 -0
- pointblank/locales/data/CY/text.json +38 -0
- pointblank/locales/data/CZ/address.json +70 -0
- pointblank/locales/data/CZ/company.json +61 -0
- pointblank/locales/data/CZ/internet.json +19 -0
- pointblank/locales/data/CZ/misc.json +7 -0
- pointblank/locales/data/CZ/person.json +40 -0
- pointblank/locales/data/CZ/text.json +38 -0
- pointblank/locales/data/DE/address.json +756 -0
- pointblank/locales/data/DE/company.json +101 -0
- pointblank/locales/data/DE/internet.json +22 -0
- pointblank/locales/data/DE/misc.json +11 -0
- pointblank/locales/data/DE/person.json +1026 -0
- pointblank/locales/data/DE/text.json +50 -0
- pointblank/locales/data/DK/address.json +231 -0
- pointblank/locales/data/DK/company.json +65 -0
- pointblank/locales/data/DK/internet.json +20 -0
- pointblank/locales/data/DK/misc.json +7 -0
- pointblank/locales/data/DK/person.json +45 -0
- pointblank/locales/data/DK/text.json +43 -0
- pointblank/locales/data/EE/address.json +69 -0
- pointblank/locales/data/EE/company.json +60 -0
- pointblank/locales/data/EE/internet.json +19 -0
- pointblank/locales/data/EE/misc.json +7 -0
- pointblank/locales/data/EE/person.json +39 -0
- pointblank/locales/data/EE/text.json +38 -0
- pointblank/locales/data/ES/address.json +3086 -0
- pointblank/locales/data/ES/company.json +644 -0
- pointblank/locales/data/ES/internet.json +25 -0
- pointblank/locales/data/ES/misc.json +11 -0
- pointblank/locales/data/ES/person.json +488 -0
- pointblank/locales/data/ES/text.json +49 -0
- pointblank/locales/data/FI/address.json +93 -0
- pointblank/locales/data/FI/company.json +65 -0
- pointblank/locales/data/FI/internet.json +20 -0
- pointblank/locales/data/FI/misc.json +8 -0
- pointblank/locales/data/FI/person.json +17 -0
- pointblank/locales/data/FI/text.json +35 -0
- pointblank/locales/data/FR/address.json +619 -0
- pointblank/locales/data/FR/company.json +111 -0
- pointblank/locales/data/FR/internet.json +22 -0
- pointblank/locales/data/FR/misc.json +11 -0
- pointblank/locales/data/FR/person.json +1066 -0
- pointblank/locales/data/FR/text.json +50 -0
- pointblank/locales/data/GB/address.json +5759 -0
- pointblank/locales/data/GB/company.json +131 -0
- pointblank/locales/data/GB/internet.json +24 -0
- pointblank/locales/data/GB/misc.json +45 -0
- pointblank/locales/data/GB/person.json +578 -0
- pointblank/locales/data/GB/text.json +61 -0
- pointblank/locales/data/GR/address.json +68 -0
- pointblank/locales/data/GR/company.json +61 -0
- pointblank/locales/data/GR/internet.json +19 -0
- pointblank/locales/data/GR/misc.json +7 -0
- pointblank/locales/data/GR/person.json +39 -0
- pointblank/locales/data/GR/text.json +38 -0
- pointblank/locales/data/HK/address.json +79 -0
- pointblank/locales/data/HK/company.json +69 -0
- pointblank/locales/data/HK/internet.json +19 -0
- pointblank/locales/data/HK/misc.json +7 -0
- pointblank/locales/data/HK/person.json +42 -0
- pointblank/locales/data/HK/text.json +38 -0
- pointblank/locales/data/HR/address.json +73 -0
- pointblank/locales/data/HR/company.json +60 -0
- pointblank/locales/data/HR/internet.json +19 -0
- pointblank/locales/data/HR/misc.json +7 -0
- pointblank/locales/data/HR/person.json +38 -0
- pointblank/locales/data/HR/text.json +38 -0
- pointblank/locales/data/HU/address.json +70 -0
- pointblank/locales/data/HU/company.json +61 -0
- pointblank/locales/data/HU/internet.json +19 -0
- pointblank/locales/data/HU/misc.json +7 -0
- pointblank/locales/data/HU/person.json +40 -0
- pointblank/locales/data/HU/text.json +38 -0
- pointblank/locales/data/ID/address.json +68 -0
- pointblank/locales/data/ID/company.json +61 -0
- pointblank/locales/data/ID/internet.json +19 -0
- pointblank/locales/data/ID/misc.json +7 -0
- pointblank/locales/data/ID/person.json +40 -0
- pointblank/locales/data/ID/text.json +38 -0
- pointblank/locales/data/IE/address.json +643 -0
- pointblank/locales/data/IE/company.json +140 -0
- pointblank/locales/data/IE/internet.json +24 -0
- pointblank/locales/data/IE/misc.json +44 -0
- pointblank/locales/data/IE/person.json +55 -0
- pointblank/locales/data/IE/text.json +60 -0
- pointblank/locales/data/IN/address.json +92 -0
- pointblank/locales/data/IN/company.json +65 -0
- pointblank/locales/data/IN/internet.json +20 -0
- pointblank/locales/data/IN/misc.json +8 -0
- pointblank/locales/data/IN/person.json +52 -0
- pointblank/locales/data/IN/text.json +39 -0
- pointblank/locales/data/IS/address.json +63 -0
- pointblank/locales/data/IS/company.json +61 -0
- pointblank/locales/data/IS/internet.json +19 -0
- pointblank/locales/data/IS/misc.json +7 -0
- pointblank/locales/data/IS/person.json +44 -0
- pointblank/locales/data/IS/text.json +38 -0
- pointblank/locales/data/IT/address.json +192 -0
- pointblank/locales/data/IT/company.json +137 -0
- pointblank/locales/data/IT/internet.json +20 -0
- pointblank/locales/data/IT/misc.json +10 -0
- pointblank/locales/data/IT/person.json +70 -0
- pointblank/locales/data/IT/text.json +44 -0
- pointblank/locales/data/JP/address.json +713 -0
- pointblank/locales/data/JP/company.json +113 -0
- pointblank/locales/data/JP/internet.json +22 -0
- pointblank/locales/data/JP/misc.json +10 -0
- pointblank/locales/data/JP/person.json +1057 -0
- pointblank/locales/data/JP/text.json +51 -0
- pointblank/locales/data/KR/address.json +77 -0
- pointblank/locales/data/KR/company.json +68 -0
- pointblank/locales/data/KR/internet.json +19 -0
- pointblank/locales/data/KR/misc.json +7 -0
- pointblank/locales/data/KR/person.json +40 -0
- pointblank/locales/data/KR/text.json +38 -0
- pointblank/locales/data/LT/address.json +66 -0
- pointblank/locales/data/LT/company.json +60 -0
- pointblank/locales/data/LT/internet.json +19 -0
- pointblank/locales/data/LT/misc.json +7 -0
- pointblank/locales/data/LT/person.json +42 -0
- pointblank/locales/data/LT/text.json +38 -0
- pointblank/locales/data/LU/address.json +66 -0
- pointblank/locales/data/LU/company.json +60 -0
- pointblank/locales/data/LU/internet.json +19 -0
- pointblank/locales/data/LU/misc.json +7 -0
- pointblank/locales/data/LU/person.json +38 -0
- pointblank/locales/data/LU/text.json +38 -0
- pointblank/locales/data/LV/address.json +62 -0
- pointblank/locales/data/LV/company.json +60 -0
- pointblank/locales/data/LV/internet.json +19 -0
- pointblank/locales/data/LV/misc.json +7 -0
- pointblank/locales/data/LV/person.json +40 -0
- pointblank/locales/data/LV/text.json +38 -0
- pointblank/locales/data/MT/address.json +61 -0
- pointblank/locales/data/MT/company.json +60 -0
- pointblank/locales/data/MT/internet.json +19 -0
- pointblank/locales/data/MT/misc.json +7 -0
- pointblank/locales/data/MT/person.json +38 -0
- pointblank/locales/data/MT/text.json +38 -0
- pointblank/locales/data/MX/address.json +100 -0
- pointblank/locales/data/MX/company.json +65 -0
- pointblank/locales/data/MX/internet.json +20 -0
- pointblank/locales/data/MX/misc.json +8 -0
- pointblank/locales/data/MX/person.json +18 -0
- pointblank/locales/data/MX/text.json +39 -0
- pointblank/locales/data/NL/address.json +1517 -0
- pointblank/locales/data/NL/company.json +133 -0
- pointblank/locales/data/NL/internet.json +44 -0
- pointblank/locales/data/NL/misc.json +55 -0
- pointblank/locales/data/NL/person.json +365 -0
- pointblank/locales/data/NL/text.json +210 -0
- pointblank/locales/data/NO/address.json +86 -0
- pointblank/locales/data/NO/company.json +66 -0
- pointblank/locales/data/NO/internet.json +20 -0
- pointblank/locales/data/NO/misc.json +8 -0
- pointblank/locales/data/NO/person.json +17 -0
- pointblank/locales/data/NO/text.json +35 -0
- pointblank/locales/data/NZ/address.json +90 -0
- pointblank/locales/data/NZ/company.json +65 -0
- pointblank/locales/data/NZ/internet.json +20 -0
- pointblank/locales/data/NZ/misc.json +8 -0
- pointblank/locales/data/NZ/person.json +17 -0
- pointblank/locales/data/NZ/text.json +39 -0
- pointblank/locales/data/PH/address.json +67 -0
- pointblank/locales/data/PH/company.json +61 -0
- pointblank/locales/data/PH/internet.json +19 -0
- pointblank/locales/data/PH/misc.json +7 -0
- pointblank/locales/data/PH/person.json +40 -0
- pointblank/locales/data/PH/text.json +38 -0
- pointblank/locales/data/PL/address.json +91 -0
- pointblank/locales/data/PL/company.json +65 -0
- pointblank/locales/data/PL/internet.json +20 -0
- pointblank/locales/data/PL/misc.json +8 -0
- pointblank/locales/data/PL/person.json +17 -0
- pointblank/locales/data/PL/text.json +35 -0
- pointblank/locales/data/PT/address.json +90 -0
- pointblank/locales/data/PT/company.json +65 -0
- pointblank/locales/data/PT/internet.json +20 -0
- pointblank/locales/data/PT/misc.json +8 -0
- pointblank/locales/data/PT/person.json +17 -0
- pointblank/locales/data/PT/text.json +35 -0
- pointblank/locales/data/RO/address.json +73 -0
- pointblank/locales/data/RO/company.json +61 -0
- pointblank/locales/data/RO/internet.json +19 -0
- pointblank/locales/data/RO/misc.json +7 -0
- pointblank/locales/data/RO/person.json +40 -0
- pointblank/locales/data/RO/text.json +38 -0
- pointblank/locales/data/RU/address.json +74 -0
- pointblank/locales/data/RU/company.json +60 -0
- pointblank/locales/data/RU/internet.json +19 -0
- pointblank/locales/data/RU/misc.json +7 -0
- pointblank/locales/data/RU/person.json +38 -0
- pointblank/locales/data/RU/text.json +38 -0
- pointblank/locales/data/SE/address.json +247 -0
- pointblank/locales/data/SE/company.json +65 -0
- pointblank/locales/data/SE/internet.json +20 -0
- pointblank/locales/data/SE/misc.json +7 -0
- pointblank/locales/data/SE/person.json +45 -0
- pointblank/locales/data/SE/text.json +43 -0
- pointblank/locales/data/SI/address.json +67 -0
- pointblank/locales/data/SI/company.json +60 -0
- pointblank/locales/data/SI/internet.json +19 -0
- pointblank/locales/data/SI/misc.json +7 -0
- pointblank/locales/data/SI/person.json +38 -0
- pointblank/locales/data/SI/text.json +38 -0
- pointblank/locales/data/SK/address.json +64 -0
- pointblank/locales/data/SK/company.json +60 -0
- pointblank/locales/data/SK/internet.json +19 -0
- pointblank/locales/data/SK/misc.json +7 -0
- pointblank/locales/data/SK/person.json +38 -0
- pointblank/locales/data/SK/text.json +38 -0
- pointblank/locales/data/TR/address.json +105 -0
- pointblank/locales/data/TR/company.json +65 -0
- pointblank/locales/data/TR/internet.json +20 -0
- pointblank/locales/data/TR/misc.json +8 -0
- pointblank/locales/data/TR/person.json +17 -0
- pointblank/locales/data/TR/text.json +35 -0
- pointblank/locales/data/TW/address.json +86 -0
- pointblank/locales/data/TW/company.json +69 -0
- pointblank/locales/data/TW/internet.json +19 -0
- pointblank/locales/data/TW/misc.json +7 -0
- pointblank/locales/data/TW/person.json +42 -0
- pointblank/locales/data/TW/text.json +38 -0
- pointblank/locales/data/US/address.json +996 -0
- pointblank/locales/data/US/company.json +131 -0
- pointblank/locales/data/US/internet.json +22 -0
- pointblank/locales/data/US/misc.json +11 -0
- pointblank/locales/data/US/person.json +1092 -0
- pointblank/locales/data/US/text.json +56 -0
- pointblank/locales/data/_shared/misc.json +42 -0
- pointblank/schema.py +339 -2
- {pointblank-0.19.0.dist-info → pointblank-0.20.0.dist-info}/METADATA +45 -1
- pointblank-0.20.0.dist-info/RECORD +366 -0
- {pointblank-0.19.0.dist-info → pointblank-0.20.0.dist-info}/WHEEL +1 -1
- pointblank-0.19.0.dist-info/RECORD +0 -59
- {pointblank-0.19.0.dist-info → pointblank-0.20.0.dist-info}/entry_points.txt +0 -0
- {pointblank-0.19.0.dist-info → pointblank-0.20.0.dist-info}/licenses/LICENSE +0 -0
- {pointblank-0.19.0.dist-info → pointblank-0.20.0.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,217 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import random
|
|
4
|
+
import string
|
|
5
|
+
from re import U
|
|
6
|
+
|
|
7
|
+
try:
|
|
8
|
+
import re._parser as sre_parse # Python 3.11+
|
|
9
|
+
except ImportError:
|
|
10
|
+
from re import sre_parse # type: ignore[attr-defined]
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
# Character categories for regex character classes
|
|
14
|
+
def _build_categories() -> dict:
|
|
15
|
+
"""Build character category mappings for regex classes like \\d, \\w, \\s."""
|
|
16
|
+
# Use ASCII-only characters for predictable, readable output
|
|
17
|
+
word_chars = list(string.ascii_letters + string.digits + "_")
|
|
18
|
+
non_word_chars = [chr(x) for x in range(32, 127) if chr(x) not in word_chars]
|
|
19
|
+
digit_chars = list(string.digits)
|
|
20
|
+
non_digit_chars = [chr(x) for x in range(32, 127) if chr(x) not in digit_chars]
|
|
21
|
+
space_chars = list(" \t\n\r\f\v")
|
|
22
|
+
non_space_chars = [chr(x) for x in range(32, 127) if chr(x) not in space_chars]
|
|
23
|
+
|
|
24
|
+
return {
|
|
25
|
+
sre_parse.CATEGORY_SPACE: space_chars,
|
|
26
|
+
sre_parse.CATEGORY_NOT_SPACE: non_space_chars,
|
|
27
|
+
sre_parse.CATEGORY_DIGIT: digit_chars,
|
|
28
|
+
sre_parse.CATEGORY_NOT_DIGIT: non_digit_chars,
|
|
29
|
+
sre_parse.CATEGORY_WORD: word_chars,
|
|
30
|
+
sre_parse.CATEGORY_NOT_WORD: non_word_chars,
|
|
31
|
+
"category_any": [chr(x) for x in range(32, 127)], # Printable ASCII
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
CATEGORIES = _build_categories()
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
def _parse_in(items: list, rng: random.Random) -> str:
|
|
39
|
+
"""Handle character class [...] by returning a random matching character."""
|
|
40
|
+
chars: list[str] = []
|
|
41
|
+
negate = False
|
|
42
|
+
|
|
43
|
+
for item in items:
|
|
44
|
+
op = item[0]
|
|
45
|
+
|
|
46
|
+
if op == sre_parse.NEGATE:
|
|
47
|
+
chars = list(CATEGORIES["category_any"])
|
|
48
|
+
negate = True
|
|
49
|
+
|
|
50
|
+
elif op == sre_parse.RANGE:
|
|
51
|
+
# Character range like a-z
|
|
52
|
+
range_chars = [chr(x) for x in range(item[1][0], item[1][1] + 1)]
|
|
53
|
+
if negate:
|
|
54
|
+
for char in range_chars:
|
|
55
|
+
if char in chars:
|
|
56
|
+
chars.remove(char)
|
|
57
|
+
else:
|
|
58
|
+
chars.extend(range_chars)
|
|
59
|
+
|
|
60
|
+
elif op == sre_parse.LITERAL:
|
|
61
|
+
char = chr(item[1])
|
|
62
|
+
if negate:
|
|
63
|
+
if char in chars:
|
|
64
|
+
chars.remove(char)
|
|
65
|
+
else:
|
|
66
|
+
chars.append(char)
|
|
67
|
+
|
|
68
|
+
elif op == sre_parse.CATEGORY:
|
|
69
|
+
category_chars = CATEGORIES.get(item[1], [""])
|
|
70
|
+
if negate:
|
|
71
|
+
for char in category_chars:
|
|
72
|
+
if char in chars:
|
|
73
|
+
chars.remove(char)
|
|
74
|
+
else:
|
|
75
|
+
chars.extend(category_chars)
|
|
76
|
+
|
|
77
|
+
return rng.choice(chars) if chars else ""
|
|
78
|
+
|
|
79
|
+
|
|
80
|
+
def _generate_one(
|
|
81
|
+
parsed: list, rng: random.Random, limit: int = 20, grouprefs: dict | None = None
|
|
82
|
+
) -> str:
|
|
83
|
+
"""Generate a single random string from a parsed regex pattern.
|
|
84
|
+
|
|
85
|
+
Parameters
|
|
86
|
+
----------
|
|
87
|
+
parsed
|
|
88
|
+
The parsed regex structure from sre_parse.parse().
|
|
89
|
+
rng
|
|
90
|
+
Random number generator for reproducibility.
|
|
91
|
+
limit
|
|
92
|
+
Maximum number of repetitions for unbounded quantifiers like * or +.
|
|
93
|
+
grouprefs
|
|
94
|
+
Dictionary to store captured group values for backreferences.
|
|
95
|
+
|
|
96
|
+
Returns
|
|
97
|
+
-------
|
|
98
|
+
str
|
|
99
|
+
A random string matching the pattern.
|
|
100
|
+
"""
|
|
101
|
+
if grouprefs is None:
|
|
102
|
+
grouprefs = {}
|
|
103
|
+
|
|
104
|
+
result = ""
|
|
105
|
+
|
|
106
|
+
for item in parsed:
|
|
107
|
+
op = item[0]
|
|
108
|
+
|
|
109
|
+
if op == sre_parse.IN:
|
|
110
|
+
# Character class [...]
|
|
111
|
+
result += _parse_in(item[1], rng)
|
|
112
|
+
|
|
113
|
+
elif op == sre_parse.LITERAL:
|
|
114
|
+
# Literal character
|
|
115
|
+
result += chr(item[1])
|
|
116
|
+
|
|
117
|
+
elif op == sre_parse.CATEGORY:
|
|
118
|
+
# Character category like \d, \w, \s
|
|
119
|
+
chars = CATEGORIES.get(item[1], [""])
|
|
120
|
+
result += rng.choice(chars)
|
|
121
|
+
|
|
122
|
+
elif op == sre_parse.ANY:
|
|
123
|
+
# . (any character)
|
|
124
|
+
result += rng.choice(CATEGORIES["category_any"])
|
|
125
|
+
|
|
126
|
+
elif op in (sre_parse.MAX_REPEAT, sre_parse.MIN_REPEAT):
|
|
127
|
+
# Quantifiers: *, +, ?, {n}, {n,m}
|
|
128
|
+
min_count, max_count = item[1][0], item[1][1]
|
|
129
|
+
sub_pattern = list(item[1][2])
|
|
130
|
+
|
|
131
|
+
# Limit unbounded repetitions
|
|
132
|
+
if max_count - min_count >= limit:
|
|
133
|
+
max_count = min_count + limit - 1
|
|
134
|
+
|
|
135
|
+
count = rng.randint(min_count, max_count)
|
|
136
|
+
for _ in range(count):
|
|
137
|
+
result += _generate_one(sub_pattern, rng, limit, grouprefs)
|
|
138
|
+
|
|
139
|
+
elif op == sre_parse.BRANCH:
|
|
140
|
+
# Alternation: a|b|c
|
|
141
|
+
branches = item[1][1]
|
|
142
|
+
chosen_branch = rng.choice(branches)
|
|
143
|
+
result += _generate_one(chosen_branch, rng, limit, grouprefs)
|
|
144
|
+
|
|
145
|
+
elif op == sre_parse.SUBPATTERN:
|
|
146
|
+
# Capturing group (...)
|
|
147
|
+
group_id = item[1][0]
|
|
148
|
+
# Python 3.6+ has different structure
|
|
149
|
+
sub_pattern = item[1][3] if len(item[1]) > 3 else item[1][1]
|
|
150
|
+
sub_result = _generate_one(sub_pattern, rng, limit, grouprefs)
|
|
151
|
+
if group_id:
|
|
152
|
+
grouprefs[group_id] = sub_result
|
|
153
|
+
result += sub_result
|
|
154
|
+
|
|
155
|
+
elif op == sre_parse.ASSERT:
|
|
156
|
+
# Lookahead assertion (?=...) - generate the content
|
|
157
|
+
sub_pattern = item[1][1]
|
|
158
|
+
result += _generate_one(sub_pattern, rng, limit, grouprefs)
|
|
159
|
+
|
|
160
|
+
elif op == sre_parse.AT:
|
|
161
|
+
# Anchors ^ and $ - ignore them
|
|
162
|
+
continue
|
|
163
|
+
|
|
164
|
+
elif op == sre_parse.NOT_LITERAL:
|
|
165
|
+
# [^x] - any character except x
|
|
166
|
+
chars = list(CATEGORIES["category_any"])
|
|
167
|
+
excluded = chr(item[1])
|
|
168
|
+
if excluded in chars:
|
|
169
|
+
chars.remove(excluded)
|
|
170
|
+
result += rng.choice(chars)
|
|
171
|
+
|
|
172
|
+
elif op == sre_parse.GROUPREF:
|
|
173
|
+
# Backreference \1, \2, etc.
|
|
174
|
+
result += grouprefs.get(item[1], "")
|
|
175
|
+
|
|
176
|
+
elif op == sre_parse.ASSERT_NOT:
|
|
177
|
+
# Negative lookahead (?!...) - skip
|
|
178
|
+
pass
|
|
179
|
+
|
|
180
|
+
else:
|
|
181
|
+
# Unknown operation - skip with warning (could also raise)
|
|
182
|
+
pass
|
|
183
|
+
|
|
184
|
+
return result
|
|
185
|
+
|
|
186
|
+
|
|
187
|
+
def generate_from_regex(pattern: str, rng: random.Random, limit: int = 20) -> str:
|
|
188
|
+
"""Generate a random string matching the given regular expression pattern.
|
|
189
|
+
|
|
190
|
+
Parameters
|
|
191
|
+
----------
|
|
192
|
+
pattern
|
|
193
|
+
A regular expression pattern string.
|
|
194
|
+
rng
|
|
195
|
+
Random number generator instance for reproducibility.
|
|
196
|
+
limit
|
|
197
|
+
Maximum number of repetitions for unbounded quantifiers (*, +).
|
|
198
|
+
Default is 20.
|
|
199
|
+
|
|
200
|
+
Returns
|
|
201
|
+
-------
|
|
202
|
+
str
|
|
203
|
+
A random string that matches the pattern.
|
|
204
|
+
|
|
205
|
+
Examples
|
|
206
|
+
--------
|
|
207
|
+
>>> import random
|
|
208
|
+
>>> rng = random.Random(23)
|
|
209
|
+
>>> generate_from_regex(r"[A-Z]{3}-\\d{4}", rng)
|
|
210
|
+
'CAS-6685'
|
|
211
|
+
>>> generate_from_regex(r"(foo|bar|baz)", rng)
|
|
212
|
+
'foo'
|
|
213
|
+
>>> generate_from_regex(r"\\w+@\\w+\\.com", rng)
|
|
214
|
+
'rCaoND5@g.com'
|
|
215
|
+
"""
|
|
216
|
+
parsed = list(sre_parse.parse(pattern, flags=U))
|
|
217
|
+
return _generate_one(parsed, rng, limit)
|