StringGenerator 0.5.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- strgen/__init__.py +797 -0
- strgen/countries.py +212 -0
- strgen/tests.py +497 -0
- stringgenerator-0.5.0.dist-info/METADATA +189 -0
- stringgenerator-0.5.0.dist-info/RECORD +8 -0
- stringgenerator-0.5.0.dist-info/WHEEL +5 -0
- stringgenerator-0.5.0.dist-info/licenses/LICENSE +29 -0
- stringgenerator-0.5.0.dist-info/top_level.txt +1 -0
strgen/countries.py
ADDED
|
@@ -0,0 +1,212 @@
|
|
|
1
|
+
countries = [
|
|
2
|
+
"Afghanistan",
|
|
3
|
+
"Albania",
|
|
4
|
+
"Algeria",
|
|
5
|
+
"America",
|
|
6
|
+
"Andorra",
|
|
7
|
+
"Angola",
|
|
8
|
+
"Antigua",
|
|
9
|
+
"Argentina",
|
|
10
|
+
"Armenia",
|
|
11
|
+
"Australia",
|
|
12
|
+
"Austria",
|
|
13
|
+
"Azerbaijan",
|
|
14
|
+
"Bahamas",
|
|
15
|
+
"Bahrain",
|
|
16
|
+
"Bangladesh",
|
|
17
|
+
"Barbados",
|
|
18
|
+
"Belarus",
|
|
19
|
+
"Belgium",
|
|
20
|
+
"Belize",
|
|
21
|
+
"Benin",
|
|
22
|
+
"Bhutan",
|
|
23
|
+
"Bissau",
|
|
24
|
+
"Bolivia",
|
|
25
|
+
"Bosnia",
|
|
26
|
+
"Botswana",
|
|
27
|
+
"Brazil",
|
|
28
|
+
"British",
|
|
29
|
+
"Brunei",
|
|
30
|
+
"Bulgaria",
|
|
31
|
+
"Burkina",
|
|
32
|
+
"Burma",
|
|
33
|
+
"Burundi",
|
|
34
|
+
"Cambodia",
|
|
35
|
+
"Cameroon",
|
|
36
|
+
"Canada",
|
|
37
|
+
"Cape Verde",
|
|
38
|
+
"Central African Republic",
|
|
39
|
+
"Chad",
|
|
40
|
+
"Chile",
|
|
41
|
+
"China",
|
|
42
|
+
"Colombia",
|
|
43
|
+
"Comoros",
|
|
44
|
+
"Congo",
|
|
45
|
+
"Costa Rica",
|
|
46
|
+
"country debt",
|
|
47
|
+
"Croatia",
|
|
48
|
+
"Cuba",
|
|
49
|
+
"Cyprus",
|
|
50
|
+
"Czech",
|
|
51
|
+
"Denmark",
|
|
52
|
+
"Djibouti",
|
|
53
|
+
"Dominica",
|
|
54
|
+
"East Timor",
|
|
55
|
+
"Ecuador",
|
|
56
|
+
"Egypt",
|
|
57
|
+
"El Salvador",
|
|
58
|
+
"Emirate",
|
|
59
|
+
"England",
|
|
60
|
+
"Eritrea",
|
|
61
|
+
"Estonia",
|
|
62
|
+
"Ethiopia",
|
|
63
|
+
"Fiji",
|
|
64
|
+
"Finland",
|
|
65
|
+
"France",
|
|
66
|
+
"Gabon",
|
|
67
|
+
"Gambia",
|
|
68
|
+
"Georgia",
|
|
69
|
+
"Germany",
|
|
70
|
+
"Ghana",
|
|
71
|
+
"Great Britain",
|
|
72
|
+
"Greece",
|
|
73
|
+
"Grenada",
|
|
74
|
+
"Grenadines",
|
|
75
|
+
"Guatemala",
|
|
76
|
+
"Guinea",
|
|
77
|
+
"Guyana",
|
|
78
|
+
"Haiti",
|
|
79
|
+
"Herzegovina",
|
|
80
|
+
"Honduras",
|
|
81
|
+
"Hungary",
|
|
82
|
+
"Iceland",
|
|
83
|
+
"in usa",
|
|
84
|
+
"India",
|
|
85
|
+
"Indonesia",
|
|
86
|
+
"Iran",
|
|
87
|
+
"Iraq",
|
|
88
|
+
"Ireland",
|
|
89
|
+
"Israel",
|
|
90
|
+
"Italy",
|
|
91
|
+
"Ivory Coast",
|
|
92
|
+
"Jamaica",
|
|
93
|
+
"Japan",
|
|
94
|
+
"Jordan",
|
|
95
|
+
"Kazakhstan",
|
|
96
|
+
"Kenya",
|
|
97
|
+
"Kiribati",
|
|
98
|
+
"Korea",
|
|
99
|
+
"Kosovo",
|
|
100
|
+
"Kuwait",
|
|
101
|
+
"Kyrgyzstan",
|
|
102
|
+
"Laos",
|
|
103
|
+
"Latvia",
|
|
104
|
+
"Lebanon",
|
|
105
|
+
"Lesotho",
|
|
106
|
+
"Liberia",
|
|
107
|
+
"Libya",
|
|
108
|
+
"Liechtenstein",
|
|
109
|
+
"Lithuania",
|
|
110
|
+
"Luxembourg",
|
|
111
|
+
"Macedonia",
|
|
112
|
+
"Madagascar",
|
|
113
|
+
"Malawi",
|
|
114
|
+
"Malaysia",
|
|
115
|
+
"Maldives",
|
|
116
|
+
"Mali",
|
|
117
|
+
"Malta",
|
|
118
|
+
"Marshall",
|
|
119
|
+
"Mauritania",
|
|
120
|
+
"Mauritius",
|
|
121
|
+
"Mexico",
|
|
122
|
+
"Micronesia",
|
|
123
|
+
"Moldova",
|
|
124
|
+
"Monaco",
|
|
125
|
+
"Mongolia",
|
|
126
|
+
"Montenegro",
|
|
127
|
+
"Morocco",
|
|
128
|
+
"Mozambique",
|
|
129
|
+
"Myanmar",
|
|
130
|
+
"Namibia",
|
|
131
|
+
"Nauru",
|
|
132
|
+
"Nepal",
|
|
133
|
+
"Netherlands",
|
|
134
|
+
"New Zealand",
|
|
135
|
+
"Nicaragua",
|
|
136
|
+
"Niger",
|
|
137
|
+
"Nigeria",
|
|
138
|
+
"Norway",
|
|
139
|
+
"Oman",
|
|
140
|
+
"Pakistan",
|
|
141
|
+
"Palau",
|
|
142
|
+
"Panama",
|
|
143
|
+
"Papua",
|
|
144
|
+
"Paraguay",
|
|
145
|
+
"Peru",
|
|
146
|
+
"Philippines",
|
|
147
|
+
"Poland",
|
|
148
|
+
"Portugal",
|
|
149
|
+
"Qatar",
|
|
150
|
+
"Romania",
|
|
151
|
+
"Russia",
|
|
152
|
+
"Rwanda",
|
|
153
|
+
"Samoa",
|
|
154
|
+
"San Marino",
|
|
155
|
+
"Sao Tome",
|
|
156
|
+
"Saudi Arabia",
|
|
157
|
+
"scotland",
|
|
158
|
+
"scottish",
|
|
159
|
+
"Senegal",
|
|
160
|
+
"Serbia",
|
|
161
|
+
"Seychelles",
|
|
162
|
+
"Sierra Leone",
|
|
163
|
+
"Singapore",
|
|
164
|
+
"Slovakia",
|
|
165
|
+
"Slovenia",
|
|
166
|
+
"Solomon",
|
|
167
|
+
"Somalia",
|
|
168
|
+
"South Africa",
|
|
169
|
+
"South Sudan",
|
|
170
|
+
"Spain",
|
|
171
|
+
"Sri Lanka",
|
|
172
|
+
"St. Kitts",
|
|
173
|
+
"St. Lucia",
|
|
174
|
+
"St Kitts",
|
|
175
|
+
"St Lucia",
|
|
176
|
+
"Saint Kitts",
|
|
177
|
+
"Santa Lucia",
|
|
178
|
+
"Sudan",
|
|
179
|
+
"Suriname",
|
|
180
|
+
"Swaziland",
|
|
181
|
+
"Sweden",
|
|
182
|
+
"Switzerland",
|
|
183
|
+
"Syria",
|
|
184
|
+
"Taiwan",
|
|
185
|
+
"Tajikistan",
|
|
186
|
+
"Tanzania",
|
|
187
|
+
"Thailand",
|
|
188
|
+
"Tobago",
|
|
189
|
+
"Togo",
|
|
190
|
+
"Tonga",
|
|
191
|
+
"Trinidad",
|
|
192
|
+
"Tunisia",
|
|
193
|
+
"Turkey",
|
|
194
|
+
"Turkmenistan",
|
|
195
|
+
"Tuvalu",
|
|
196
|
+
"Uganda",
|
|
197
|
+
"Ukraine",
|
|
198
|
+
"United Kingdom",
|
|
199
|
+
"United States",
|
|
200
|
+
"Uruguay",
|
|
201
|
+
"USA",
|
|
202
|
+
"Uzbekistan",
|
|
203
|
+
"Vanuatu",
|
|
204
|
+
"Vatican",
|
|
205
|
+
"Venezuela",
|
|
206
|
+
"Vietnam",
|
|
207
|
+
"wales",
|
|
208
|
+
"welsh",
|
|
209
|
+
"Yemen",
|
|
210
|
+
"Zambia",
|
|
211
|
+
"Zimbabwe",
|
|
212
|
+
]
|
strgen/tests.py
ADDED
|
@@ -0,0 +1,497 @@
|
|
|
1
|
+
# -*- coding: utf-8 -*-
|
|
2
|
+
import random
|
|
3
|
+
import collections
|
|
4
|
+
import statistics
|
|
5
|
+
|
|
6
|
+
from hypothesis import given
|
|
7
|
+
import hypothesis.strategies as st
|
|
8
|
+
from hypothesis import find, settings, Verbosity
|
|
9
|
+
from hypothesis.strategies import lists, integers
|
|
10
|
+
|
|
11
|
+
import unittest
|
|
12
|
+
from strgen import StringGenerator as SG
|
|
13
|
+
|
|
14
|
+
SPECIAL_CHARACTERS = "{}[]()|&$-\\"
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
def remove_special(s) -> str:
|
|
18
|
+
for c in SPECIAL_CHARACTERS:
|
|
19
|
+
s = s.replace(c, "")
|
|
20
|
+
return s
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
class CustomBadRandomizer:
|
|
24
|
+
pass
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
class CustomRandomizer(random.Random):
|
|
28
|
+
def choice(self, s):
|
|
29
|
+
return super().choice(s)
|
|
30
|
+
|
|
31
|
+
def shuffle(self, s):
|
|
32
|
+
return super().shuffle(s)
|
|
33
|
+
|
|
34
|
+
def randint(self, a, b):
|
|
35
|
+
return super().randint(a, b)
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
class TestSG(unittest.TestCase):
|
|
39
|
+
@given(st.text(min_size=2, max_size=100), st.integers(min_value=10, max_value=1000))
|
|
40
|
+
@settings(verbosity=Verbosity.verbose)
|
|
41
|
+
def test_unicode_strings(self, s, i):
|
|
42
|
+
s = remove_special(s)
|
|
43
|
+
if s:
|
|
44
|
+
p = f"[{s}]{{10}}"
|
|
45
|
+
print(p)
|
|
46
|
+
r = SG(p).render()
|
|
47
|
+
# print(r)
|
|
48
|
+
assert r
|
|
49
|
+
|
|
50
|
+
@given(st.characters(blacklist_characters=SPECIAL_CHARACTERS), st.integers())
|
|
51
|
+
@settings(verbosity=Verbosity.verbose)
|
|
52
|
+
def test_single_characters(self, s, i):
|
|
53
|
+
p = f"[{s}]{{10}}"
|
|
54
|
+
r = SG(p).render()
|
|
55
|
+
# print(r)
|
|
56
|
+
assert r
|
|
57
|
+
|
|
58
|
+
def test_string_generator(self):
|
|
59
|
+
"""Test various templates."""
|
|
60
|
+
test_list = [
|
|
61
|
+
r"[a-z][\c]{10}(.|_)[\c]{5:10}@[\c]{3:12}.(com|net|org)",
|
|
62
|
+
r"[\[\]\(\)\{\}\&\|\-\$_+=;'\"<>,.?:!#%^`~*@\\\]OC",
|
|
63
|
+
r"[a-z\d\d\d\d]{8}",
|
|
64
|
+
r"[\l]{6:10}&[\d]{2}",
|
|
65
|
+
r"([a-z]{4}|[0-9]{9})",
|
|
66
|
+
r"[\d]&[\c]&[\w\p]{6}",
|
|
67
|
+
]
|
|
68
|
+
|
|
69
|
+
for t in test_list:
|
|
70
|
+
result = SG(t).render()
|
|
71
|
+
self.assertIsNotNone(result)
|
|
72
|
+
|
|
73
|
+
def test_unicode_literals(self):
|
|
74
|
+
"""Test that Unicode literal strings are rendered correctly."""
|
|
75
|
+
unicode_strings = [
|
|
76
|
+
r"idzie wąż wąską dróżką",
|
|
77
|
+
r"༣ཁངཱུངྵ",
|
|
78
|
+
r"ᚠᚳᛦᛰ",
|
|
79
|
+
]
|
|
80
|
+
|
|
81
|
+
for template in unicode_strings:
|
|
82
|
+
result = SG(template).render()
|
|
83
|
+
self.assertEqual(result, template)
|
|
84
|
+
self.assertIsInstance(result, str)
|
|
85
|
+
|
|
86
|
+
def test_unicode_character_class(self):
|
|
87
|
+
"""Test Unicode character class ranges."""
|
|
88
|
+
template = r"[ą-ż]{8}"
|
|
89
|
+
result = SG(template).render()
|
|
90
|
+
|
|
91
|
+
self.assertIsInstance(result, str)
|
|
92
|
+
self.assertEqual(len(result), 8)
|
|
93
|
+
|
|
94
|
+
# Verify all characters are in the expected Unicode range
|
|
95
|
+
for char in result:
|
|
96
|
+
self.assertGreaterEqual(ord(char), ord("ą"))
|
|
97
|
+
self.assertLessEqual(ord(char), ord("ż"))
|
|
98
|
+
|
|
99
|
+
def test_unicode_escape_sequences(self):
|
|
100
|
+
"""Test Unicode escape sequences in templates."""
|
|
101
|
+
template = r"\xe6\xbf\xe5, \xe9\xe5\xa9\xe5\xe5\xad\xe6\xaf\xe7\xe9\xba\xbc\xe6\xe6"
|
|
102
|
+
result = SG(template).render()
|
|
103
|
+
|
|
104
|
+
self.assertIsInstance(result, str)
|
|
105
|
+
self.assertIsNotNone(result)
|
|
106
|
+
# The result should contain the decoded Unicode characters
|
|
107
|
+
# Note: This template contains hex escape sequences that should be decoded
|
|
108
|
+
|
|
109
|
+
def test_render_list_email_template(self):
|
|
110
|
+
"""Test render_list with email-like template."""
|
|
111
|
+
template = r"[a-z][\c]{10}(.|_)[\c]{5:10}@[\c]{3:12}.(com|net|org)"
|
|
112
|
+
list_length = 10
|
|
113
|
+
result = SG(template).render_list(list_length)
|
|
114
|
+
self.assertIsInstance(result, list)
|
|
115
|
+
self.assertEqual(len(result), list_length)
|
|
116
|
+
|
|
117
|
+
def test_render_list_mixed_alphanumeric(self):
|
|
118
|
+
"""Test render_list with mixed alphanumeric template."""
|
|
119
|
+
template = r"[a-z\d\d\d\d]{8}"
|
|
120
|
+
list_length = 10
|
|
121
|
+
result = SG(template).render_list(list_length)
|
|
122
|
+
self.assertIsInstance(result, list)
|
|
123
|
+
self.assertEqual(len(result), list_length)
|
|
124
|
+
|
|
125
|
+
def test_render_list_letter_digit_combination_colon(self):
|
|
126
|
+
"""Test render_list with letter-digit combination using colon range syntax."""
|
|
127
|
+
template = r"[\l]{6:10}&[\d]{2}"
|
|
128
|
+
list_length = 10
|
|
129
|
+
result = SG(template).render_list(list_length)
|
|
130
|
+
self.assertIsInstance(result, list)
|
|
131
|
+
self.assertEqual(len(result), list_length)
|
|
132
|
+
|
|
133
|
+
def test_render_list_letter_digit_combination_hyphen(self):
|
|
134
|
+
"""Test render_list with letter-digit combination using hyphen range syntax."""
|
|
135
|
+
template = r"[\l]{6-10}&[\d]{2}" # support both hyphen and colon for ranges
|
|
136
|
+
list_length = 10
|
|
137
|
+
result = SG(template).render_list(list_length)
|
|
138
|
+
self.assertIsInstance(result, list)
|
|
139
|
+
self.assertEqual(len(result), list_length)
|
|
140
|
+
|
|
141
|
+
def test_render_list_alternation(self):
|
|
142
|
+
"""Test render_list with alternation (OR) operator."""
|
|
143
|
+
template = r"([a-z]{4}|[0-9]{9})"
|
|
144
|
+
list_length = 10
|
|
145
|
+
result = SG(template).render_list(list_length)
|
|
146
|
+
self.assertIsInstance(result, list)
|
|
147
|
+
self.assertEqual(len(result), list_length)
|
|
148
|
+
|
|
149
|
+
def test_render_list_mixed_character_classes(self):
|
|
150
|
+
"""Test render_list with mixed character classes and & operator."""
|
|
151
|
+
template = r"[\d]&[\c]&[\w\p]{6}"
|
|
152
|
+
list_length = 10
|
|
153
|
+
result = SG(template).render_list(list_length)
|
|
154
|
+
self.assertIsInstance(result, list)
|
|
155
|
+
self.assertEqual(len(result), list_length)
|
|
156
|
+
|
|
157
|
+
def test_render_list_single_character_class(self):
|
|
158
|
+
"""Test render_list with single character class."""
|
|
159
|
+
template = r"[\w\p]"
|
|
160
|
+
list_length = 10
|
|
161
|
+
result = SG(template).render_list(list_length)
|
|
162
|
+
self.assertIsInstance(result, list)
|
|
163
|
+
self.assertEqual(len(result), list_length)
|
|
164
|
+
|
|
165
|
+
def test_render_list_fixed_length_character_class(self):
|
|
166
|
+
"""Test render_list with fixed length character class."""
|
|
167
|
+
template = r"[\w\p]{6}"
|
|
168
|
+
list_length = 10
|
|
169
|
+
result = SG(template).render_list(list_length)
|
|
170
|
+
self.assertIsInstance(result, list)
|
|
171
|
+
self.assertEqual(len(result), list_length)
|
|
172
|
+
|
|
173
|
+
def test_render_list_negative_quantifier(self):
|
|
174
|
+
"""Test render_list with negative quantifier."""
|
|
175
|
+
template = r"[\w\p]{-6}"
|
|
176
|
+
list_length = 10
|
|
177
|
+
result = SG(template).render_list(list_length)
|
|
178
|
+
self.assertIsInstance(result, list)
|
|
179
|
+
self.assertEqual(len(result), list_length)
|
|
180
|
+
|
|
181
|
+
def test_render_list_open_ended_quantifier(self):
|
|
182
|
+
"""Test render_list with open-ended quantifier."""
|
|
183
|
+
template = r"[\w\p]{:6}"
|
|
184
|
+
list_length = 10
|
|
185
|
+
result = SG(template).render_list(list_length)
|
|
186
|
+
self.assertIsInstance(result, list)
|
|
187
|
+
self.assertEqual(len(result), list_length)
|
|
188
|
+
|
|
189
|
+
def test_render_list_zero_to_n_quantifier(self):
|
|
190
|
+
"""Test render_list with zero-to-n quantifier."""
|
|
191
|
+
template = r"[\w\p]{0:6}"
|
|
192
|
+
list_length = 10
|
|
193
|
+
result = SG(template).render_list(list_length)
|
|
194
|
+
self.assertIsInstance(result, list)
|
|
195
|
+
self.assertEqual(len(result), list_length)
|
|
196
|
+
|
|
197
|
+
def test_render_set(self):
|
|
198
|
+
set_length = 100
|
|
199
|
+
p = r"[a-z][\c]{10}(.|_)[\c]{5:10}@[\c]{3:12}.(com|net|org)"
|
|
200
|
+
result = SG(p).render_set(set_length)
|
|
201
|
+
self.assertTrue(isinstance(result, set))
|
|
202
|
+
self.assertTrue(len(result) == set_length)
|
|
203
|
+
|
|
204
|
+
def test_list_progress(self):
|
|
205
|
+
"""Check if the progress indicator actually works"""
|
|
206
|
+
|
|
207
|
+
list_length = 10
|
|
208
|
+
|
|
209
|
+
progress_states = []
|
|
210
|
+
|
|
211
|
+
def progress_callback(current, total):
|
|
212
|
+
progress_state = "{current}/{total}".format(**locals())
|
|
213
|
+
progress_states.append(progress_state)
|
|
214
|
+
|
|
215
|
+
SG(r"[a-z\d\d\d\d]{8}").render_list(list_length, progress_callback=progress_callback)
|
|
216
|
+
|
|
217
|
+
# Length of list of progress states should match length of
|
|
218
|
+
# requested strings
|
|
219
|
+
self.assertTrue(len(progress_states) == list_length)
|
|
220
|
+
|
|
221
|
+
# Check the first and the last item for the sake of completeness
|
|
222
|
+
self.assertEqual(progress_states[0], "1/10")
|
|
223
|
+
self.assertEqual(progress_states[-1], "10/10")
|
|
224
|
+
|
|
225
|
+
def test_syntax_exception(self):
|
|
226
|
+
"""Make sure syntax errors in template are caught."""
|
|
227
|
+
test_list = [
|
|
228
|
+
r"[a-z]{a}", # not a valid quantifier
|
|
229
|
+
r"[a-]", # invalid class range
|
|
230
|
+
r"[[1-9]", # unescaped chars
|
|
231
|
+
r"((foo)(bar)))", # extra parens
|
|
232
|
+
# r"foo&", # binary operator error
|
|
233
|
+
# r"|foo", # binary operator error
|
|
234
|
+
r"[\w]{10:}", # cannot have open range in quantifier
|
|
235
|
+
]
|
|
236
|
+
for t in test_list:
|
|
237
|
+
# using 2.7 specific context manager here
|
|
238
|
+
# so, test won't work on < 2.7 but everything else should do
|
|
239
|
+
# with self.assertRaises(SG.SyntaxError) as context:
|
|
240
|
+
# SG(t).render()
|
|
241
|
+
with self.assertRaises(SG.SyntaxError):
|
|
242
|
+
SG(t).render()
|
|
243
|
+
|
|
244
|
+
def test_uniqueness_error(self):
|
|
245
|
+
"""Make sure we throw an exception if we can't generate list."""
|
|
246
|
+
t = "[123]"
|
|
247
|
+
self.assertRaises(
|
|
248
|
+
SG.UniquenessError,
|
|
249
|
+
lambda: SG(t).render_list(100, unique=True),
|
|
250
|
+
)
|
|
251
|
+
|
|
252
|
+
def test_escaping(self):
|
|
253
|
+
test_list = [
|
|
254
|
+
r"[\[\]]",
|
|
255
|
+
r"\{\}",
|
|
256
|
+
r"[\[\]\(\)\{\}\&\|\-\$_+=;'\"<>,.?:!#%^`~*@]{10}",
|
|
257
|
+
]
|
|
258
|
+
|
|
259
|
+
for t in test_list:
|
|
260
|
+
result = SG(t).render()
|
|
261
|
+
self.assertIsNotNone(result)
|
|
262
|
+
|
|
263
|
+
def test_literals(self):
|
|
264
|
+
"""Test various literals."""
|
|
265
|
+
test_list = [r"hel-lo[\w]{8}", r"hello:[\w]{8}", r"-hello-[\w]{8}"]
|
|
266
|
+
|
|
267
|
+
for t in test_list:
|
|
268
|
+
result = SG(t).render()
|
|
269
|
+
self.assertIsNotNone(result)
|
|
270
|
+
|
|
271
|
+
def test_forward_slash(self):
|
|
272
|
+
result = SG(r"[\\]").render()
|
|
273
|
+
self.assertEqual(result, "\\")
|
|
274
|
+
|
|
275
|
+
def test_capital_u(self):
|
|
276
|
+
result = SG(r"[\U]{10}").render()
|
|
277
|
+
assert len(result) == 10
|
|
278
|
+
assert result.isupper()
|
|
279
|
+
|
|
280
|
+
def test_source(self):
|
|
281
|
+
# you can pass a function
|
|
282
|
+
SG("blah${names}").render(names=lambda: random.choice(["1", "2", "3"]))
|
|
283
|
+
|
|
284
|
+
# you can pass a generator
|
|
285
|
+
SG("generator: ${names}").render(names=(lambda: (yield "somestring"))())
|
|
286
|
+
|
|
287
|
+
# range to list
|
|
288
|
+
SG("generator: ${names}").render(names=list(range(10)))
|
|
289
|
+
|
|
290
|
+
def test_unseeded_randomizer(self):
|
|
291
|
+
# provide a seed to get consistent results
|
|
292
|
+
pattern = r"[\w]{10}&([\d]{10}|M3W9MF_lH3906I14O50)"
|
|
293
|
+
|
|
294
|
+
sg = SG(pattern)
|
|
295
|
+
s1 = sg.render()
|
|
296
|
+
sg = SG(pattern)
|
|
297
|
+
s2 = sg.render()
|
|
298
|
+
assert s1 != s2
|
|
299
|
+
|
|
300
|
+
sg = SG(pattern)
|
|
301
|
+
list1 = sg.render_list(100)
|
|
302
|
+
sg = SG(pattern)
|
|
303
|
+
list2 = sg.render_list(100)
|
|
304
|
+
assert collections.Counter(list1) != collections.Counter(list2)
|
|
305
|
+
|
|
306
|
+
def test_seeded_randomizer(self):
|
|
307
|
+
# provide a seed to get consistent results
|
|
308
|
+
pattern = r"[\w]{10}&([\d]{10}|M3W9MF_lH3906I14O50)"
|
|
309
|
+
|
|
310
|
+
for seed in [random.randint(1, 100000000) for _ in range(100)]:
|
|
311
|
+
sg = SG(pattern, seed=seed)
|
|
312
|
+
s1 = sg.render()
|
|
313
|
+
sg = SG(pattern, seed=seed)
|
|
314
|
+
s2 = sg.render()
|
|
315
|
+
assert s1 == s2
|
|
316
|
+
|
|
317
|
+
sg = SG(pattern, seed=seed)
|
|
318
|
+
list1 = sg.render_list(100)
|
|
319
|
+
sg = SG(pattern, seed=seed)
|
|
320
|
+
list2 = sg.render_list(100)
|
|
321
|
+
assert collections.Counter(list1) == collections.Counter(list2)
|
|
322
|
+
|
|
323
|
+
def test_buffered_secure_randomizer(self):
|
|
324
|
+
"""BufferedSecureRandom is reachable via SG and drives every path."""
|
|
325
|
+
# Reachable without an extra import.
|
|
326
|
+
rng = SG.BufferedSecureRandom
|
|
327
|
+
from strgen import BufferedSecureRandom as ModuleLevel
|
|
328
|
+
|
|
329
|
+
assert rng is ModuleLevel
|
|
330
|
+
|
|
331
|
+
# Satisfies the randomizer contract.
|
|
332
|
+
instance = rng()
|
|
333
|
+
for method in ("randint", "choice", "choices", "shuffle"):
|
|
334
|
+
assert hasattr(instance, method)
|
|
335
|
+
|
|
336
|
+
# Fixed-length class (choices path).
|
|
337
|
+
result = SG(r"[\d]{10}", randomizer=rng()).render_set(2000)
|
|
338
|
+
assert len(result) == 2000
|
|
339
|
+
assert all(len(s) == 10 and s.isdigit() for s in result)
|
|
340
|
+
|
|
341
|
+
# Range quantifier (randint -> getrandbits path). A tiny buffer on a
|
|
342
|
+
# single reused instance forces many refills across the 200 renders.
|
|
343
|
+
ranged_sg = SG(r"[\d]{2:5}", randomizer=rng(bufsize=64))
|
|
344
|
+
ranged = [ranged_sg.render() for _ in range(200)]
|
|
345
|
+
assert all(2 <= len(s) <= 5 for s in ranged)
|
|
346
|
+
|
|
347
|
+
# Seeding is a no-op: two instances still differ.
|
|
348
|
+
a = SG(r"[\w]{16}", randomizer=rng()).render()
|
|
349
|
+
b = SG(r"[\w]{16}", randomizer=rng()).render()
|
|
350
|
+
assert a != b
|
|
351
|
+
|
|
352
|
+
# The fast choices override (byte rejection sampling) is unbiased.
|
|
353
|
+
# Reuse one generator so the 1 MB buffer is allocated once.
|
|
354
|
+
digit_sg = SG(r"[\d]", randomizer=rng())
|
|
355
|
+
counts = collections.Counter(digit_sg.render() for _ in range(20000))
|
|
356
|
+
assert set(counts) == set("0123456789")
|
|
357
|
+
# each digit within a generous band around the 2000 expected
|
|
358
|
+
assert all(1700 < c < 2300 for c in counts.values())
|
|
359
|
+
|
|
360
|
+
# Alphabets larger than one byte fall back to the standard path.
|
|
361
|
+
big = SG(r"[Ā-Ԁ]{4}", randomizer=rng()).render()
|
|
362
|
+
assert len(big) == 4
|
|
363
|
+
|
|
364
|
+
def test_randomizer_is_per_instance(self):
|
|
365
|
+
"""Each generator owns its randomizer.
|
|
366
|
+
|
|
367
|
+
Previously the randomizer was a class attribute, so constructing a
|
|
368
|
+
second generator clobbered the RNG of every existing one. This
|
|
369
|
+
interleaves two seeded instances and asserts each stays independent.
|
|
370
|
+
"""
|
|
371
|
+
pattern = r"[\w]{20}"
|
|
372
|
+
|
|
373
|
+
a = SG(pattern, seed=1)
|
|
374
|
+
b = SG(pattern, seed=2)
|
|
375
|
+
|
|
376
|
+
# Interleave renders across the two instances.
|
|
377
|
+
a_first = a.render()
|
|
378
|
+
b.render()
|
|
379
|
+
a_second = a.render()
|
|
380
|
+
|
|
381
|
+
# 'a' must behave exactly like a standalone seed=1 generator,
|
|
382
|
+
# unaffected by 'b' having been constructed and used in between.
|
|
383
|
+
ref = SG(pattern, seed=1)
|
|
384
|
+
assert a_first == ref.render()
|
|
385
|
+
assert a_second == ref.render()
|
|
386
|
+
|
|
387
|
+
def test_custom_bad_randomizer(self):
|
|
388
|
+
pattern = r"[\w]{10}&([\d]{10}|M3W9MF_lH3906I14O50)"
|
|
389
|
+
sg = SG(pattern, randomizer=CustomBadRandomizer())
|
|
390
|
+
with self.assertRaises(AttributeError):
|
|
391
|
+
sg.render()
|
|
392
|
+
|
|
393
|
+
def test_custom_randomizer(self):
|
|
394
|
+
pattern = r"[\w]{10}&([\d]{10}|M3W9MF_lH3906I14O50)"
|
|
395
|
+
sg = SG(pattern, randomizer=CustomRandomizer())
|
|
396
|
+
assert len(sg.render())
|
|
397
|
+
|
|
398
|
+
def test_dump(self):
|
|
399
|
+
"""make sure dump method works."""
|
|
400
|
+
SG(r"[\w]{8}").dump()
|
|
401
|
+
|
|
402
|
+
def test_str(self):
|
|
403
|
+
str(SG(r"[\w]{8}"))
|
|
404
|
+
|
|
405
|
+
def test_repr(self):
|
|
406
|
+
repr(SG(r"[\w]{8}"))
|
|
407
|
+
|
|
408
|
+
def test_counts(self):
|
|
409
|
+
assert SG(r"1&abc").count() == len(SG(r"1&abc").render_set(24))
|
|
410
|
+
assert SG(r"[\u\d]{2}|[abc]{3}", uaf=100).count() == len(
|
|
411
|
+
SG(r"[\u\d]{2}|[abc]{3}", uaf=100).render_list(1323, unique=True)
|
|
412
|
+
)
|
|
413
|
+
|
|
414
|
+
def test_probabilistic_or(self):
|
|
415
|
+
d = SG("0|1|2|3|4|5|6|7|8|9").render_list(10000)
|
|
416
|
+
d = [int(d) for d in d]
|
|
417
|
+
# statistics.mean(d)
|
|
418
|
+
# quantiles are not available in python 3.6, 3.7
|
|
419
|
+
# q = statistics.quantiles(d, n=4)
|
|
420
|
+
# # we expect: [2.0, 4.0, 7.0]
|
|
421
|
+
# assert q[0] == 2.0
|
|
422
|
+
# assert q[2] == 7.0
|
|
423
|
+
# # the middle quantile can be 4.0 or 5.0
|
|
424
|
+
# # because 4.5 is the mean
|
|
425
|
+
# assert q[1] == 4.0 or q[1] == 5.0
|
|
426
|
+
|
|
427
|
+
# Correct:
|
|
428
|
+
# SG('1|2|3[abc]{1}'
|
|
429
|
+
# ['1c', '2b', '1b', '2c', '2c', '3c', '3c', '2b', '1c', '1c']
|
|
430
|
+
|
|
431
|
+
# NOT Correct:
|
|
432
|
+
# SG('1|2|[abc]{1}'
|
|
433
|
+
# ['1c', '2b', '1b', '2c', '2c', '3c', '3c', '2b', '1c', '1c']
|
|
434
|
+
|
|
435
|
+
|
|
436
|
+
class TestParserRegressions(unittest.TestCase):
|
|
437
|
+
"""Regression tests for parser defects.
|
|
438
|
+
|
|
439
|
+
These assert the *intended* behavior and are expected to fail until the
|
|
440
|
+
parser/escape handling is fixed. Each maps to a demonstrated bug.
|
|
441
|
+
"""
|
|
442
|
+
|
|
443
|
+
def test_escaped_non_meta_in_literal_not_doubled(self):
|
|
444
|
+
"""`\\a` should be a literal 'a', not 'aa'.
|
|
445
|
+
|
|
446
|
+
getLiteral re-adds the escaped char because the loop variable still
|
|
447
|
+
holds it after consumption.
|
|
448
|
+
"""
|
|
449
|
+
self.assertEqual(SG(r"\a").render(), "a")
|
|
450
|
+
self.assertEqual(SG(r"foo\bar").render(), "foobar")
|
|
451
|
+
|
|
452
|
+
def test_escaped_backslash_literal(self):
|
|
453
|
+
"""An escaped backslash is a single literal backslash."""
|
|
454
|
+
self.assertEqual(SG(r"\\x").render(), "\\x")
|
|
455
|
+
self.assertEqual(SG(r"a\\b").render(), "a\\b")
|
|
456
|
+
|
|
457
|
+
def test_escaped_backslash_before_class(self):
|
|
458
|
+
"""A literal backslash immediately before a character class must parse.
|
|
459
|
+
|
|
460
|
+
`\\[abc]` => one backslash followed by one of a/b/c.
|
|
461
|
+
"""
|
|
462
|
+
result = SG(r"\\[abc]").render()
|
|
463
|
+
self.assertEqual(len(result), 2)
|
|
464
|
+
self.assertEqual(result[0], "\\")
|
|
465
|
+
self.assertIn(result[1], "abc")
|
|
466
|
+
|
|
467
|
+
def test_escaped_backslash_before_class_with_prefix(self):
|
|
468
|
+
result = SG(r"x\\[ab]").render()
|
|
469
|
+
self.assertEqual(len(result), 3)
|
|
470
|
+
self.assertEqual(result[:2], "x\\")
|
|
471
|
+
self.assertIn(result[2], "ab")
|
|
472
|
+
|
|
473
|
+
def test_source_at_start_of_pattern(self):
|
|
474
|
+
"""A `${name}` source at index 0 must render (last() is None there)."""
|
|
475
|
+
self.assertEqual(SG(r"${names}").render(names=["A"]), "A")
|
|
476
|
+
|
|
477
|
+
def test_empty_class_raises_syntax_error(self):
|
|
478
|
+
"""An empty character class is a template error, not a randrange crash."""
|
|
479
|
+
for t in (r"[]", r"[]{3}"):
|
|
480
|
+
with self.assertRaises(SG.SyntaxError):
|
|
481
|
+
SG(t).render()
|
|
482
|
+
|
|
483
|
+
def test_parse_errors_are_syntax_errors(self):
|
|
484
|
+
"""Malformed templates raise SG.SyntaxError, not bare Exception/TypeError."""
|
|
485
|
+
for t in (r"[a]{", r"[a]{1", r"${ }", r"${1bad}"):
|
|
486
|
+
with self.assertRaises(SG.SyntaxError):
|
|
487
|
+
SG(t).render()
|
|
488
|
+
|
|
489
|
+
def test_existing_syntax_errors_still_raise(self):
|
|
490
|
+
"""Baselines that already raise SG.SyntaxError; lock them in."""
|
|
491
|
+
for t in (r"[a]{x}", r"[a-]", r"[\w]{10:}"):
|
|
492
|
+
with self.assertRaises(SG.SyntaxError):
|
|
493
|
+
SG(t).render()
|
|
494
|
+
|
|
495
|
+
|
|
496
|
+
if __name__ == "__main__":
|
|
497
|
+
unittest.main()
|