regex 2026.1.14__cp311-cp311-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,4540 @@
1
+ from weakref import proxy
2
+ import copy
3
+ import pickle
4
+ import regex
5
+ import string
6
+ import sys
7
+ import unittest
8
+
9
+ # String subclasses for issue 18468.
10
+ class StrSubclass(str):
11
+ def __getitem__(self, index):
12
+ return StrSubclass(super().__getitem__(index))
13
+
14
+ class BytesSubclass(bytes):
15
+ def __getitem__(self, index):
16
+ return BytesSubclass(super().__getitem__(index))
17
+
18
+ class RegexTests(unittest.TestCase):
19
+ PATTERN_CLASS = "<class '_regex.Pattern'>"
20
+ FLAGS_WITH_COMPILED_PAT = "cannot process flags argument with a compiled pattern"
21
+ INVALID_GROUP_REF = "invalid group reference"
22
+ MISSING_GT = "missing >"
23
+ BAD_GROUP_NAME = "bad character in group name"
24
+ MISSING_GROUP_NAME = "missing group name"
25
+ MISSING_LT = "missing <"
26
+ UNKNOWN_GROUP_I = "unknown group"
27
+ UNKNOWN_GROUP = "unknown group"
28
+ BAD_ESCAPE = r"bad escape \(end of pattern\)"
29
+ BAD_OCTAL_ESCAPE = r"bad escape \\"
30
+ BAD_SET = "unterminated character set"
31
+ STR_PAT_ON_BYTES = "cannot use a string pattern on a bytes-like object"
32
+ BYTES_PAT_ON_STR = "cannot use a bytes pattern on a string-like object"
33
+ STR_PAT_BYTES_TEMPL = "expected str instance, bytes found"
34
+ BYTES_PAT_STR_TEMPL = "expected a bytes-like object, str found"
35
+ BYTES_PAT_UNI_FLAG = "cannot use UNICODE flag with a bytes pattern"
36
+ MIXED_FLAGS = "ASCII, LOCALE and UNICODE flags are mutually incompatible"
37
+ MISSING_RPAREN = "missing \\)"
38
+ TRAILING_CHARS = "unbalanced parenthesis"
39
+ BAD_CHAR_RANGE = "bad character range"
40
+ NOTHING_TO_REPEAT = "nothing to repeat"
41
+ MULTIPLE_REPEAT = "multiple repeat"
42
+ OPEN_GROUP = "cannot refer to an open group"
43
+ DUPLICATE_GROUP = "duplicate group"
44
+ CANT_TURN_OFF = "bad inline flags: cannot turn flags off"
45
+ UNDEF_CHAR_NAME = "undefined character name"
46
+
47
+ def assertTypedEqual(self, actual, expect, msg=None):
48
+ self.assertEqual(actual, expect, msg)
49
+
50
+ def recurse(actual, expect):
51
+ if isinstance(expect, (tuple, list)):
52
+ for x, y in zip(actual, expect):
53
+ recurse(x, y)
54
+ else:
55
+ self.assertIs(type(actual), type(expect), msg)
56
+
57
+ recurse(actual, expect)
58
+
59
+ def test_weakref(self):
60
+ s = 'QabbbcR'
61
+ x = regex.compile('ab+c')
62
+ y = proxy(x)
63
+ if x.findall('QabbbcR') != y.findall('QabbbcR'):
64
+ self.fail()
65
+
66
+ def test_search_star_plus(self):
67
+ self.assertEqual(regex.search('a*', 'xxx').span(0), (0, 0))
68
+ self.assertEqual(regex.search('x*', 'axx').span(), (0, 0))
69
+ self.assertEqual(regex.search('x+', 'axx').span(0), (1, 3))
70
+ self.assertEqual(regex.search('x+', 'axx').span(), (1, 3))
71
+ self.assertEqual(regex.search('x', 'aaa'), None)
72
+ self.assertEqual(regex.match('a*', 'xxx').span(0), (0, 0))
73
+ self.assertEqual(regex.match('a*', 'xxx').span(), (0, 0))
74
+ self.assertEqual(regex.match('x*', 'xxxa').span(0), (0, 3))
75
+ self.assertEqual(regex.match('x*', 'xxxa').span(), (0, 3))
76
+ self.assertEqual(regex.match('a+', 'xxx'), None)
77
+
78
+ def bump_num(self, matchobj):
79
+ int_value = int(matchobj[0])
80
+ return str(int_value + 1)
81
+
82
+ def test_basic_regex_sub(self):
83
+ self.assertEqual(regex.sub("(?i)b+", "x", "bbbb BBBB"), 'x x')
84
+ self.assertEqual(regex.sub(r'\d+', self.bump_num, '08.2 -2 23x99y'),
85
+ '9.3 -3 24x100y')
86
+ self.assertEqual(regex.sub(r'\d+', self.bump_num, '08.2 -2 23x99y', 3),
87
+ '9.3 -3 23x99y')
88
+
89
+ self.assertEqual(regex.sub('.', lambda m: r"\n", 'x'), "\\n")
90
+ self.assertEqual(regex.sub('.', r"\n", 'x'), "\n")
91
+
92
+ self.assertEqual(regex.sub('(?P<a>x)', r'\g<a>\g<a>', 'xx'), 'xxxx')
93
+ self.assertEqual(regex.sub('(?P<a>x)', r'\g<a>\g<1>', 'xx'), 'xxxx')
94
+ self.assertEqual(regex.sub('(?P<unk>x)', r'\g<unk>\g<unk>', 'xx'),
95
+ 'xxxx')
96
+ self.assertEqual(regex.sub('(?P<unk>x)', r'\g<1>\g<1>', 'xx'), 'xxxx')
97
+
98
+ self.assertEqual(regex.sub('a', r'\t\n\v\r\f\a\b', 'a'), "\t\n\v\r\f\a\b")
99
+ self.assertEqual(regex.sub('a', '\t\n\v\r\f\a', 'a'), "\t\n\v\r\f\a")
100
+ self.assertEqual(regex.sub('a', '\t\n\v\r\f\a', 'a'), chr(9) + chr(10)
101
+ + chr(11) + chr(13) + chr(12) + chr(7))
102
+
103
+ self.assertEqual(regex.sub(r'^\s*', 'X', 'test'), 'Xtest')
104
+
105
+ self.assertEqual(regex.sub(r"x", r"\x0A", "x"), "\n")
106
+ self.assertEqual(regex.sub(r"x", r"\u000A", "x"), "\n")
107
+ self.assertEqual(regex.sub(r"x", r"\U0000000A", "x"), "\n")
108
+ self.assertEqual(regex.sub(r"x", r"\N{LATIN CAPITAL LETTER A}",
109
+ "x"), "A")
110
+
111
+ self.assertEqual(regex.sub(br"x", br"\x0A", b"x"), b"\n")
112
+
113
+ def test_bug_449964(self):
114
+ # Fails for group followed by other escape.
115
+ self.assertEqual(regex.sub(r'(?P<unk>x)', r'\g<1>\g<1>\b', 'xx'),
116
+ "xx\bxx\b")
117
+
118
+ def test_bug_449000(self):
119
+ # Test for sub() on escaped characters.
120
+ self.assertEqual(regex.sub(r'\r\n', r'\n', 'abc\r\ndef\r\n'),
121
+ "abc\ndef\n")
122
+ self.assertEqual(regex.sub('\r\n', r'\n', 'abc\r\ndef\r\n'),
123
+ "abc\ndef\n")
124
+ self.assertEqual(regex.sub(r'\r\n', '\n', 'abc\r\ndef\r\n'),
125
+ "abc\ndef\n")
126
+ self.assertEqual(regex.sub('\r\n', '\n', 'abc\r\ndef\r\n'),
127
+ "abc\ndef\n")
128
+
129
+ def test_bug_1661(self):
130
+ # Verify that flags do not get silently ignored with compiled patterns
131
+ pattern = regex.compile('.')
132
+ self.assertRaisesRegex(ValueError, self.FLAGS_WITH_COMPILED_PAT,
133
+ lambda: regex.match(pattern, 'A', regex.I))
134
+ self.assertRaisesRegex(ValueError, self.FLAGS_WITH_COMPILED_PAT,
135
+ lambda: regex.search(pattern, 'A', regex.I))
136
+ self.assertRaisesRegex(ValueError, self.FLAGS_WITH_COMPILED_PAT,
137
+ lambda: regex.findall(pattern, 'A', regex.I))
138
+ self.assertRaisesRegex(ValueError, self.FLAGS_WITH_COMPILED_PAT,
139
+ lambda: regex.compile(pattern, regex.I))
140
+
141
+ def test_bug_3629(self):
142
+ # A regex that triggered a bug in the sre-code validator
143
+ self.assertEqual(repr(type(regex.compile("(?P<quote>)(?(quote))"))),
144
+ self.PATTERN_CLASS)
145
+
146
+ def test_sub_template_numeric_escape(self):
147
+ # Bug 776311 and friends.
148
+ self.assertEqual(regex.sub('x', r'\0', 'x'), "\0")
149
+ self.assertEqual(regex.sub('x', r'\000', 'x'), "\000")
150
+ self.assertEqual(regex.sub('x', r'\001', 'x'), "\001")
151
+ self.assertEqual(regex.sub('x', r'\008', 'x'), "\0" + "8")
152
+ self.assertEqual(regex.sub('x', r'\009', 'x'), "\0" + "9")
153
+ self.assertEqual(regex.sub('x', r'\111', 'x'), "\111")
154
+ self.assertEqual(regex.sub('x', r'\117', 'x'), "\117")
155
+
156
+ self.assertEqual(regex.sub('x', r'\1111', 'x'), "\1111")
157
+ self.assertEqual(regex.sub('x', r'\1111', 'x'), "\111" + "1")
158
+
159
+ self.assertEqual(regex.sub('x', r'\00', 'x'), '\x00')
160
+ self.assertEqual(regex.sub('x', r'\07', 'x'), '\x07')
161
+ self.assertEqual(regex.sub('x', r'\08', 'x'), "\0" + "8")
162
+ self.assertEqual(regex.sub('x', r'\09', 'x'), "\0" + "9")
163
+ self.assertEqual(regex.sub('x', r'\0a', 'x'), "\0" + "a")
164
+
165
+ self.assertEqual(regex.sub('x', r'\400', 'x'), "\u0100")
166
+ self.assertEqual(regex.sub('x', r'\777', 'x'), "\u01FF")
167
+ self.assertEqual(regex.sub(b'x', br'\400', b'x'), b"\x00")
168
+ self.assertEqual(regex.sub(b'x', br'\777', b'x'), b"\xFF")
169
+
170
+ self.assertRaisesRegex(regex.error, self.INVALID_GROUP_REF, lambda:
171
+ regex.sub('x', r'\1', 'x'))
172
+ self.assertRaisesRegex(regex.error, self.INVALID_GROUP_REF, lambda:
173
+ regex.sub('x', r'\8', 'x'))
174
+ self.assertRaisesRegex(regex.error, self.INVALID_GROUP_REF, lambda:
175
+ regex.sub('x', r'\9', 'x'))
176
+ self.assertRaisesRegex(regex.error, self.INVALID_GROUP_REF, lambda:
177
+ regex.sub('x', r'\11', 'x'))
178
+ self.assertRaisesRegex(regex.error, self.INVALID_GROUP_REF, lambda:
179
+ regex.sub('x', r'\18', 'x'))
180
+ self.assertRaisesRegex(regex.error, self.INVALID_GROUP_REF, lambda:
181
+ regex.sub('x', r'\1a', 'x'))
182
+ self.assertRaisesRegex(regex.error, self.INVALID_GROUP_REF, lambda:
183
+ regex.sub('x', r'\90', 'x'))
184
+ self.assertRaisesRegex(regex.error, self.INVALID_GROUP_REF, lambda:
185
+ regex.sub('x', r'\99', 'x'))
186
+ self.assertRaisesRegex(regex.error, self.INVALID_GROUP_REF, lambda:
187
+ regex.sub('x', r'\118', 'x')) # r'\11' + '8'
188
+ self.assertRaisesRegex(regex.error, self.INVALID_GROUP_REF, lambda:
189
+ regex.sub('x', r'\11a', 'x'))
190
+ self.assertRaisesRegex(regex.error, self.INVALID_GROUP_REF, lambda:
191
+ regex.sub('x', r'\181', 'x')) # r'\18' + '1'
192
+ self.assertRaisesRegex(regex.error, self.INVALID_GROUP_REF, lambda:
193
+ regex.sub('x', r'\800', 'x')) # r'\80' + '0'
194
+
195
+ # In Python 2.3 (etc), these loop endlessly in sre_parser.py.
196
+ self.assertEqual(regex.sub('(((((((((((x)))))))))))', r'\11', 'x'),
197
+ 'x')
198
+ self.assertEqual(regex.sub('((((((((((y))))))))))(.)', r'\118', 'xyz'),
199
+ 'xz8')
200
+ self.assertEqual(regex.sub('((((((((((y))))))))))(.)', r'\11a', 'xyz'),
201
+ 'xza')
202
+
203
+ def test_qualified_re_sub(self):
204
+ self.assertEqual(regex.sub('a', 'b', 'aaaaa'), 'bbbbb')
205
+ self.assertEqual(regex.sub('a', 'b', 'aaaaa', 1), 'baaaa')
206
+
207
+ def test_bug_114660(self):
208
+ self.assertEqual(regex.sub(r'(\S)\s+(\S)', r'\1 \2', 'hello there'),
209
+ 'hello there')
210
+
211
+ def test_bug_462270(self):
212
+ # Test for empty sub() behaviour, see SF bug #462270
213
+ if sys.version_info >= (3, 7, 0):
214
+ self.assertEqual(regex.sub('(?V0)x*', '-', 'abxd'), '-a-b--d-')
215
+ else:
216
+ self.assertEqual(regex.sub('(?V0)x*', '-', 'abxd'), '-a-b-d-')
217
+ self.assertEqual(regex.sub('(?V1)x*', '-', 'abxd'), '-a-b--d-')
218
+ self.assertEqual(regex.sub('x+', '-', 'abxd'), 'ab-d')
219
+
220
+ def test_bug_14462(self):
221
+ # chr(255) is a valid identifier in Python 3.
222
+ group_name = '\xFF'
223
+ self.assertEqual(regex.search(r'(?P<' + group_name + '>a)',
224
+ 'abc').group(group_name), 'a')
225
+
226
+ def test_symbolic_refs(self):
227
+ self.assertRaisesRegex(regex.error, self.MISSING_GT, lambda:
228
+ regex.sub('(?P<a>x)', r'\g<a', 'xx'))
229
+ self.assertRaisesRegex(regex.error, self.MISSING_GROUP_NAME, lambda:
230
+ regex.sub('(?P<a>x)', r'\g<', 'xx'))
231
+ self.assertRaisesRegex(regex.error, self.MISSING_LT, lambda:
232
+ regex.sub('(?P<a>x)', r'\g', 'xx'))
233
+ self.assertRaisesRegex(regex.error, self.BAD_GROUP_NAME, lambda:
234
+ regex.sub('(?P<a>x)', r'\g<a a>', 'xx'))
235
+ self.assertRaisesRegex(regex.error, self.BAD_GROUP_NAME, lambda:
236
+ regex.sub('(?P<a>x)', r'\g<1a1>', 'xx'))
237
+ self.assertRaisesRegex(IndexError, self.UNKNOWN_GROUP_I, lambda:
238
+ regex.sub('(?P<a>x)', r'\g<ab>', 'xx'))
239
+
240
+ # The new behaviour of unmatched but valid groups is to treat them like
241
+ # empty matches in the replacement template, like in Perl.
242
+ self.assertEqual(regex.sub('(?P<a>x)|(?P<b>y)', r'\g<b>', 'xx'), '')
243
+ self.assertEqual(regex.sub('(?P<a>x)|(?P<b>y)', r'\2', 'xx'), '')
244
+
245
+ # The old behaviour was to raise it as an IndexError.
246
+ self.assertRaisesRegex(regex.error, self.BAD_GROUP_NAME, lambda:
247
+ regex.sub('(?P<a>x)', r'\g<-1>', 'xx'))
248
+
249
+ def test_re_subn(self):
250
+ self.assertEqual(regex.subn("(?i)b+", "x", "bbbb BBBB"), ('x x', 2))
251
+ self.assertEqual(regex.subn("b+", "x", "bbbb BBBB"), ('x BBBB', 1))
252
+ self.assertEqual(regex.subn("b+", "x", "xyz"), ('xyz', 0))
253
+ self.assertEqual(regex.subn("b*", "x", "xyz"), ('xxxyxzx', 4))
254
+ self.assertEqual(regex.subn("b*", "x", "xyz", 2), ('xxxyz', 2))
255
+
256
+ def test_re_split(self):
257
+ self.assertEqual(regex.split(":", ":a:b::c"), ['', 'a', 'b', '', 'c'])
258
+ if sys.version_info >= (3, 7, 0):
259
+ self.assertEqual(regex.split(":*", ":a:b::c"), ['', '', 'a', '',
260
+ 'b', '', 'c', ''])
261
+ self.assertEqual(regex.split("(:*)", ":a:b::c"), ['', ':', '', '',
262
+ 'a', ':', '', '', 'b', '::', '', '', 'c', '', ''])
263
+ self.assertEqual(regex.split("(?::*)", ":a:b::c"), ['', '', 'a',
264
+ '', 'b', '', 'c', ''])
265
+ self.assertEqual(regex.split("(:)*", ":a:b::c"), ['', ':', '',
266
+ None, 'a', ':', '', None, 'b', ':', '', None, 'c', None, ''])
267
+ else:
268
+ self.assertEqual(regex.split(":*", ":a:b::c"), ['', 'a', 'b', 'c'])
269
+ self.assertEqual(regex.split("(:*)", ":a:b::c"), ['', ':', 'a',
270
+ ':', 'b', '::', 'c'])
271
+ self.assertEqual(regex.split("(?::*)", ":a:b::c"), ['', 'a', 'b',
272
+ 'c'])
273
+ self.assertEqual(regex.split("(:)*", ":a:b::c"), ['', ':', 'a',
274
+ ':', 'b', ':', 'c'])
275
+ self.assertEqual(regex.split("([b:]+)", ":a:b::c"), ['', ':', 'a',
276
+ ':b::', 'c'])
277
+ self.assertEqual(regex.split("(b)|(:+)", ":a:b::c"), ['', None, ':',
278
+ 'a', None, ':', '', 'b', None, '', None, '::', 'c'])
279
+ self.assertEqual(regex.split("(?:b)|(?::+)", ":a:b::c"), ['', 'a', '',
280
+ '', 'c'])
281
+
282
+ self.assertEqual(regex.split("x", "xaxbxc"), ['', 'a', 'b', 'c'])
283
+ self.assertEqual([m for m in regex.splititer("x", "xaxbxc")], ['', 'a',
284
+ 'b', 'c'])
285
+
286
+ self.assertEqual(regex.split("(?r)x", "xaxbxc"), ['c', 'b', 'a', ''])
287
+ self.assertEqual([m for m in regex.splititer("(?r)x", "xaxbxc")], ['c',
288
+ 'b', 'a', ''])
289
+
290
+ self.assertEqual(regex.split("(x)|(y)", "xaxbxc"), ['', 'x', None, 'a',
291
+ 'x', None, 'b', 'x', None, 'c'])
292
+ self.assertEqual([m for m in regex.splititer("(x)|(y)", "xaxbxc")],
293
+ ['', 'x', None, 'a', 'x', None, 'b', 'x', None, 'c'])
294
+
295
+ self.assertEqual(regex.split("(?r)(x)|(y)", "xaxbxc"), ['c', 'x', None,
296
+ 'b', 'x', None, 'a', 'x', None, ''])
297
+ self.assertEqual([m for m in regex.splititer("(?r)(x)|(y)", "xaxbxc")],
298
+ ['c', 'x', None, 'b', 'x', None, 'a', 'x', None, ''])
299
+
300
+ self.assertEqual(regex.split(r"(?V1)\b", "a b c"), ['', 'a', ' ', 'b',
301
+ ' ', 'c', ''])
302
+ self.assertEqual(regex.split(r"(?V1)\m", "a b c"), ['', 'a ', 'b ',
303
+ 'c'])
304
+ self.assertEqual(regex.split(r"(?V1)\M", "a b c"), ['a', ' b', ' c',
305
+ ''])
306
+
307
+ def test_qualified_re_split(self):
308
+ self.assertEqual(regex.split(":", ":a:b::c", 2), ['', 'a', 'b::c'])
309
+ self.assertEqual(regex.split(':', 'a:b:c:d', 2), ['a', 'b', 'c:d'])
310
+ self.assertEqual(regex.split("(:)", ":a:b::c", 2), ['', ':', 'a', ':',
311
+ 'b::c'])
312
+
313
+ if sys.version_info >= (3, 7, 0):
314
+ self.assertEqual(regex.split("(:*)", ":a:b::c", 2), ['', ':', '',
315
+ '', 'a:b::c'])
316
+ else:
317
+ self.assertEqual(regex.split("(:*)", ":a:b::c", 2), ['', ':', 'a',
318
+ ':', 'b::c'])
319
+
320
+ def test_re_findall(self):
321
+ self.assertEqual(regex.findall(":+", "abc"), [])
322
+ self.assertEqual(regex.findall(":+", "a:b::c:::d"), [':', '::', ':::'])
323
+ self.assertEqual(regex.findall("(:+)", "a:b::c:::d"), [':', '::',
324
+ ':::'])
325
+ self.assertEqual(regex.findall("(:)(:*)", "a:b::c:::d"), [(':', ''),
326
+ (':', ':'), (':', '::')])
327
+
328
+ self.assertEqual(regex.findall(r"\((?P<test>.{0,5}?TEST)\)",
329
+ "(MY TEST)"), ["MY TEST"])
330
+ self.assertEqual(regex.findall(r"\((?P<test>.{0,3}?TEST)\)",
331
+ "(MY TEST)"), ["MY TEST"])
332
+ self.assertEqual(regex.findall(r"\((?P<test>.{0,3}?T)\)", "(MY T)"),
333
+ ["MY T"])
334
+
335
+ self.assertEqual(regex.findall(r"[^a]{2}[A-Z]", "\n S"), [' S'])
336
+ self.assertEqual(regex.findall(r"[^a]{2,3}[A-Z]", "\n S"), ['\n S'])
337
+ self.assertEqual(regex.findall(r"[^a]{2,3}[A-Z]", "\n S"), [' S'])
338
+
339
+ self.assertEqual(regex.findall(r"X(Y[^Y]+?){1,2}( |Q)+DEF",
340
+ "XYABCYPPQ\nQ DEF"), [('YPPQ\n', ' ')])
341
+
342
+ self.assertEqual(regex.findall(r"(\nTest(\n+.+?){0,2}?)?\n+End",
343
+ "\nTest\nxyz\nxyz\nEnd"), [('\nTest\nxyz\nxyz', '\nxyz')])
344
+
345
+ def test_bug_117612(self):
346
+ self.assertEqual(regex.findall(r"(a|(b))", "aba"), [('a', ''), ('b',
347
+ 'b'), ('a', '')])
348
+
349
+ def test_re_match(self):
350
+ self.assertEqual(regex.match('a', 'a')[:], ('a',))
351
+ self.assertEqual(regex.match('(a)', 'a')[:], ('a', 'a'))
352
+ self.assertEqual(regex.match(r'(a)', 'a')[0], 'a')
353
+ self.assertEqual(regex.match(r'(a)', 'a')[1], 'a')
354
+ self.assertEqual(regex.match(r'(a)', 'a').group(1, 1), ('a', 'a'))
355
+
356
+ pat = regex.compile('((a)|(b))(c)?')
357
+ self.assertEqual(pat.match('a')[:], ('a', 'a', 'a', None, None))
358
+ self.assertEqual(pat.match('b')[:], ('b', 'b', None, 'b', None))
359
+ self.assertEqual(pat.match('ac')[:], ('ac', 'a', 'a', None, 'c'))
360
+ self.assertEqual(pat.match('bc')[:], ('bc', 'b', None, 'b', 'c'))
361
+ self.assertEqual(pat.match('bc')[:], ('bc', 'b', None, 'b', 'c'))
362
+
363
+ # A single group.
364
+ m = regex.match('(a)', 'a')
365
+ self.assertEqual(m.group(), 'a')
366
+ self.assertEqual(m.group(0), 'a')
367
+ self.assertEqual(m.group(1), 'a')
368
+ self.assertEqual(m.group(1, 1), ('a', 'a'))
369
+
370
+ pat = regex.compile('(?:(?P<a1>a)|(?P<b2>b))(?P<c3>c)?')
371
+ self.assertEqual(pat.match('a').group(1, 2, 3), ('a', None, None))
372
+ self.assertEqual(pat.match('b').group('a1', 'b2', 'c3'), (None, 'b',
373
+ None))
374
+ self.assertEqual(pat.match('ac').group(1, 'b2', 3), ('a', None, 'c'))
375
+
376
+ def test_re_groupref_exists(self):
377
+ self.assertEqual(regex.match(r'^(\()?([^()]+)(?(1)\))$', '(a)')[:],
378
+ ('(a)', '(', 'a'))
379
+ self.assertEqual(regex.match(r'^(\()?([^()]+)(?(1)\))$', 'a')[:], ('a',
380
+ None, 'a'))
381
+ self.assertEqual(regex.match(r'^(\()?([^()]+)(?(1)\))$', 'a)'), None)
382
+ self.assertEqual(regex.match(r'^(\()?([^()]+)(?(1)\))$', '(a'), None)
383
+ self.assertEqual(regex.match('^(?:(a)|c)((?(1)b|d))$', 'ab')[:], ('ab',
384
+ 'a', 'b'))
385
+ self.assertEqual(regex.match('^(?:(a)|c)((?(1)b|d))$', 'cd')[:], ('cd',
386
+ None, 'd'))
387
+ self.assertEqual(regex.match('^(?:(a)|c)((?(1)|d))$', 'cd')[:], ('cd',
388
+ None, 'd'))
389
+ self.assertEqual(regex.match('^(?:(a)|c)((?(1)|d))$', 'a')[:], ('a',
390
+ 'a', ''))
391
+
392
+ # Tests for bug #1177831: exercise groups other than the first group.
393
+ p = regex.compile('(?P<g1>a)(?P<g2>b)?((?(g2)c|d))')
394
+ self.assertEqual(p.match('abc')[:], ('abc', 'a', 'b', 'c'))
395
+ self.assertEqual(p.match('ad')[:], ('ad', 'a', None, 'd'))
396
+ self.assertEqual(p.match('abd'), None)
397
+ self.assertEqual(p.match('ac'), None)
398
+
399
+ def test_re_groupref(self):
400
+ self.assertEqual(regex.match(r'^(\|)?([^()]+)\1$', '|a|')[:], ('|a|',
401
+ '|', 'a'))
402
+ self.assertEqual(regex.match(r'^(\|)?([^()]+)\1?$', 'a')[:], ('a',
403
+ None, 'a'))
404
+ self.assertEqual(regex.match(r'^(\|)?([^()]+)\1$', 'a|'), None)
405
+ self.assertEqual(regex.match(r'^(\|)?([^()]+)\1$', '|a'), None)
406
+ self.assertEqual(regex.match(r'^(?:(a)|c)(\1)$', 'aa')[:], ('aa', 'a',
407
+ 'a'))
408
+ self.assertEqual(regex.match(r'^(?:(a)|c)(\1)?$', 'c')[:], ('c', None,
409
+ None))
410
+
411
+ self.assertEqual(regex.findall(r"(?i)(.{1,40}?),(.{1,40}?)(?:;)+(.{1,80}).{1,40}?\3(\ |;)+(.{1,80}?)\1",
412
+ "TEST, BEST; LEST ; Lest 123 Test, Best"), [('TEST', ' BEST',
413
+ ' LEST', ' ', '123 ')])
414
+
415
+ def test_groupdict(self):
416
+ self.assertEqual(regex.match('(?P<first>first) (?P<second>second)',
417
+ 'first second').groupdict(), {'first': 'first', 'second': 'second'})
418
+
419
+ def test_expand(self):
420
+ self.assertEqual(regex.match("(?P<first>first) (?P<second>second)",
421
+ "first second").expand(r"\2 \1 \g<second> \g<first>"),
422
+ 'second first second first')
423
+
424
+ def test_repeat_minmax(self):
425
+ self.assertEqual(regex.match(r"^(\w){1}$", "abc"), None)
426
+ self.assertEqual(regex.match(r"^(\w){1}?$", "abc"), None)
427
+ self.assertEqual(regex.match(r"^(\w){1,2}$", "abc"), None)
428
+ self.assertEqual(regex.match(r"^(\w){1,2}?$", "abc"), None)
429
+
430
+ self.assertEqual(regex.match(r"^(\w){3}$", "abc")[1], 'c')
431
+ self.assertEqual(regex.match(r"^(\w){1,3}$", "abc")[1], 'c')
432
+ self.assertEqual(regex.match(r"^(\w){1,4}$", "abc")[1], 'c')
433
+ self.assertEqual(regex.match(r"^(\w){3,4}?$", "abc")[1], 'c')
434
+ self.assertEqual(regex.match(r"^(\w){3}?$", "abc")[1], 'c')
435
+ self.assertEqual(regex.match(r"^(\w){1,3}?$", "abc")[1], 'c')
436
+ self.assertEqual(regex.match(r"^(\w){1,4}?$", "abc")[1], 'c')
437
+ self.assertEqual(regex.match(r"^(\w){3,4}?$", "abc")[1], 'c')
438
+
439
+ self.assertEqual(regex.match("^x{1}$", "xxx"), None)
440
+ self.assertEqual(regex.match("^x{1}?$", "xxx"), None)
441
+ self.assertEqual(regex.match("^x{1,2}$", "xxx"), None)
442
+ self.assertEqual(regex.match("^x{1,2}?$", "xxx"), None)
443
+
444
+ self.assertEqual(regex.match("^x{1}", "xxx")[0], 'x')
445
+ self.assertEqual(regex.match("^x{1}?", "xxx")[0], 'x')
446
+ self.assertEqual(regex.match("^x{0,1}", "xxx")[0], 'x')
447
+ self.assertEqual(regex.match("^x{0,1}?", "xxx")[0], '')
448
+
449
+ self.assertEqual(bool(regex.match("^x{3}$", "xxx")), True)
450
+ self.assertEqual(bool(regex.match("^x{1,3}$", "xxx")), True)
451
+ self.assertEqual(bool(regex.match("^x{1,4}$", "xxx")), True)
452
+ self.assertEqual(bool(regex.match("^x{3,4}?$", "xxx")), True)
453
+ self.assertEqual(bool(regex.match("^x{3}?$", "xxx")), True)
454
+ self.assertEqual(bool(regex.match("^x{1,3}?$", "xxx")), True)
455
+ self.assertEqual(bool(regex.match("^x{1,4}?$", "xxx")), True)
456
+ self.assertEqual(bool(regex.match("^x{3,4}?$", "xxx")), True)
457
+
458
+ self.assertEqual(regex.match("^x{}$", "xxx"), None)
459
+ self.assertEqual(bool(regex.match("^x{}$", "x{}")), True)
460
+
461
+ def test_getattr(self):
462
+ self.assertEqual(regex.compile("(?i)(a)(b)").pattern, '(?i)(a)(b)')
463
+ self.assertEqual(regex.compile("(?i)(a)(b)").flags, regex.I | regex.U |
464
+ regex.DEFAULT_VERSION)
465
+ self.assertEqual(regex.compile(b"(?i)(a)(b)").flags, regex.A | regex.I
466
+ | regex.DEFAULT_VERSION)
467
+ self.assertEqual(regex.compile("(?i)(a)(b)").groups, 2)
468
+ self.assertEqual(regex.compile("(?i)(a)(b)").groupindex, {})
469
+
470
+ self.assertEqual(regex.compile("(?i)(?P<first>a)(?P<other>b)").groupindex,
471
+ {'first': 1, 'other': 2})
472
+
473
+ self.assertEqual(regex.match("(a)", "a").pos, 0)
474
+ self.assertEqual(regex.match("(a)", "a").endpos, 1)
475
+
476
+ self.assertEqual(regex.search("b(c)", "abcdef").pos, 0)
477
+ self.assertEqual(regex.search("b(c)", "abcdef").endpos, 6)
478
+ self.assertEqual(regex.search("b(c)", "abcdef").span(), (1, 3))
479
+ self.assertEqual(regex.search("b(c)", "abcdef").span(1), (2, 3))
480
+
481
+ self.assertEqual(regex.match("(a)", "a").string, 'a')
482
+ self.assertEqual(regex.match("(a)", "a").regs, ((0, 1), (0, 1)))
483
+ self.assertEqual(repr(type(regex.match("(a)", "a").re)),
484
+ self.PATTERN_CLASS)
485
+
486
+ # Issue 14260.
487
+ p = regex.compile(r'abc(?P<n>def)')
488
+ p.groupindex["n"] = 0
489
+ self.assertEqual(p.groupindex["n"], 1)
490
+
491
+ def test_special_escapes(self):
492
+ self.assertEqual(regex.search(r"\b(b.)\b", "abcd abc bcd bx")[1], 'bx')
493
+ self.assertEqual(regex.search(r"\B(b.)\B", "abc bcd bc abxd")[1], 'bx')
494
+ self.assertEqual(regex.search(br"\b(b.)\b", b"abcd abc bcd bx",
495
+ regex.LOCALE)[1], b'bx')
496
+ self.assertEqual(regex.search(br"\B(b.)\B", b"abc bcd bc abxd",
497
+ regex.LOCALE)[1], b'bx')
498
+ self.assertEqual(regex.search(r"\b(b.)\b", "abcd abc bcd bx",
499
+ regex.UNICODE)[1], 'bx')
500
+ self.assertEqual(regex.search(r"\B(b.)\B", "abc bcd bc abxd",
501
+ regex.UNICODE)[1], 'bx')
502
+
503
+ self.assertEqual(regex.search(r"^abc$", "\nabc\n", regex.M)[0], 'abc')
504
+ self.assertEqual(regex.search(r"^\Aabc\Z$", "abc", regex.M)[0], 'abc')
505
+ self.assertEqual(regex.search(r"^\Aabc\Z$", "\nabc\n", regex.M), None)
506
+
507
+ self.assertEqual(regex.search(br"\b(b.)\b", b"abcd abc bcd bx")[1],
508
+ b'bx')
509
+ self.assertEqual(regex.search(br"\B(b.)\B", b"abc bcd bc abxd")[1],
510
+ b'bx')
511
+ self.assertEqual(regex.search(br"^abc$", b"\nabc\n", regex.M)[0],
512
+ b'abc')
513
+ self.assertEqual(regex.search(br"^\Aabc\Z$", b"abc", regex.M)[0],
514
+ b'abc')
515
+ self.assertEqual(regex.search(br"^\Aabc\Z$", b"\nabc\n", regex.M),
516
+ None)
517
+
518
+ self.assertEqual(regex.search(r"\d\D\w\W\s\S", "1aa! a")[0], '1aa! a')
519
+ self.assertEqual(regex.search(br"\d\D\w\W\s\S", b"1aa! a",
520
+ regex.LOCALE)[0], b'1aa! a')
521
+ self.assertEqual(regex.search(r"\d\D\w\W\s\S", "1aa! a",
522
+ regex.UNICODE)[0], '1aa! a')
523
+
524
+ def test_bigcharset(self):
525
+ self.assertEqual(regex.match(r"([\u2222\u2223])", "\u2222")[1],
526
+ '\u2222')
527
+ self.assertEqual(regex.match(r"([\u2222\u2223])", "\u2222",
528
+ regex.UNICODE)[1], '\u2222')
529
+ self.assertEqual("".join(regex.findall(".",
530
+ "e\xe8\xe9\xea\xeb\u0113\u011b\u0117", flags=regex.UNICODE)),
531
+ 'e\xe8\xe9\xea\xeb\u0113\u011b\u0117')
532
+ self.assertEqual("".join(regex.findall(r"[e\xe8\xe9\xea\xeb\u0113\u011b\u0117]",
533
+ "e\xe8\xe9\xea\xeb\u0113\u011b\u0117", flags=regex.UNICODE)),
534
+ 'e\xe8\xe9\xea\xeb\u0113\u011b\u0117')
535
+ self.assertEqual("".join(regex.findall(r"e|\xe8|\xe9|\xea|\xeb|\u0113|\u011b|\u0117",
536
+ "e\xe8\xe9\xea\xeb\u0113\u011b\u0117", flags=regex.UNICODE)),
537
+ 'e\xe8\xe9\xea\xeb\u0113\u011b\u0117')
538
+
539
+ def test_anyall(self):
540
+ self.assertEqual(regex.match("a.b", "a\nb", regex.DOTALL)[0], "a\nb")
541
+ self.assertEqual(regex.match("a.*b", "a\n\nb", regex.DOTALL)[0],
542
+ "a\n\nb")
543
+
544
+ def test_non_consuming(self):
545
+ self.assertEqual(regex.match(r"(a(?=\s[^a]))", "a b")[1], 'a')
546
+ self.assertEqual(regex.match(r"(a(?=\s[^a]*))", "a b")[1], 'a')
547
+ self.assertEqual(regex.match(r"(a(?=\s[abc]))", "a b")[1], 'a')
548
+ self.assertEqual(regex.match(r"(a(?=\s[abc]*))", "a bc")[1], 'a')
549
+ self.assertEqual(regex.match(r"(a)(?=\s\1)", "a a")[1], 'a')
550
+ self.assertEqual(regex.match(r"(a)(?=\s\1*)", "a aa")[1], 'a')
551
+ self.assertEqual(regex.match(r"(a)(?=\s(abc|a))", "a a")[1], 'a')
552
+
553
+ self.assertEqual(regex.match(r"(a(?!\s[^a]))", "a a")[1], 'a')
554
+ self.assertEqual(regex.match(r"(a(?!\s[abc]))", "a d")[1], 'a')
555
+ self.assertEqual(regex.match(r"(a)(?!\s\1)", "a b")[1], 'a')
556
+ self.assertEqual(regex.match(r"(a)(?!\s(abc|a))", "a b")[1], 'a')
557
+
558
+ def test_ignore_case(self):
559
+ self.assertEqual(regex.match("abc", "ABC", regex.I)[0], 'ABC')
560
+ self.assertEqual(regex.match(b"abc", b"ABC", regex.I)[0], b'ABC')
561
+
562
+ self.assertEqual(regex.match(r"(a\s[^a]*)", "a bb", regex.I)[1],
563
+ 'a bb')
564
+ self.assertEqual(regex.match(r"(a\s[abc])", "a b", regex.I)[1], 'a b')
565
+ self.assertEqual(regex.match(r"(a\s[abc]*)", "a bb", regex.I)[1],
566
+ 'a bb')
567
+ self.assertEqual(regex.match(r"((a)\s\2)", "a a", regex.I)[1], 'a a')
568
+ self.assertEqual(regex.match(r"((a)\s\2*)", "a aa", regex.I)[1],
569
+ 'a aa')
570
+ self.assertEqual(regex.match(r"((a)\s(abc|a))", "a a", regex.I)[1],
571
+ 'a a')
572
+ self.assertEqual(regex.match(r"((a)\s(abc|a)*)", "a aa", regex.I)[1],
573
+ 'a aa')
574
+
575
+ # Issue 3511.
576
+ self.assertEqual(regex.match(r"[Z-a]", "_").span(), (0, 1))
577
+ self.assertEqual(regex.match(r"(?i)[Z-a]", "_").span(), (0, 1))
578
+
579
+ self.assertEqual(bool(regex.match(r"(?i)nao", "nAo")), True)
580
+ self.assertEqual(bool(regex.match(r"(?i)n\xE3o", "n\xC3o")), True)
581
+ self.assertEqual(bool(regex.match(r"(?i)n\xE3o", "N\xC3O")), True)
582
+ self.assertEqual(bool(regex.match(r"(?i)s", "\u017F")), True)
583
+
584
+ def test_case_folding(self):
585
+ self.assertEqual(regex.search(r"(?fi)ss", "SS").span(), (0, 2))
586
+ self.assertEqual(regex.search(r"(?fi)SS", "ss").span(), (0, 2))
587
+ self.assertEqual(regex.search(r"(?fi)SS",
588
+ "\N{LATIN SMALL LETTER SHARP S}").span(), (0, 1))
589
+ self.assertEqual(regex.search(r"(?fi)\N{LATIN SMALL LETTER SHARP S}",
590
+ "SS").span(), (0, 2))
591
+
592
+ self.assertEqual(regex.search(r"(?fi)\N{LATIN SMALL LIGATURE ST}",
593
+ "ST").span(), (0, 2))
594
+ self.assertEqual(regex.search(r"(?fi)ST",
595
+ "\N{LATIN SMALL LIGATURE ST}").span(), (0, 1))
596
+ self.assertEqual(regex.search(r"(?fi)ST",
597
+ "\N{LATIN SMALL LIGATURE LONG S T}").span(), (0, 1))
598
+
599
+ self.assertEqual(regex.search(r"(?fi)SST",
600
+ "\N{LATIN SMALL LETTER SHARP S}t").span(), (0, 2))
601
+ self.assertEqual(regex.search(r"(?fi)SST",
602
+ "s\N{LATIN SMALL LIGATURE LONG S T}").span(), (0, 2))
603
+ self.assertEqual(regex.search(r"(?fi)SST",
604
+ "s\N{LATIN SMALL LIGATURE ST}").span(), (0, 2))
605
+ self.assertEqual(regex.search(r"(?fi)\N{LATIN SMALL LIGATURE ST}",
606
+ "SST").span(), (1, 3))
607
+ self.assertEqual(regex.search(r"(?fi)SST",
608
+ "s\N{LATIN SMALL LIGATURE ST}").span(), (0, 2))
609
+
610
+ self.assertEqual(regex.search(r"(?fi)FFI",
611
+ "\N{LATIN SMALL LIGATURE FFI}").span(), (0, 1))
612
+ self.assertEqual(regex.search(r"(?fi)FFI",
613
+ "\N{LATIN SMALL LIGATURE FF}i").span(), (0, 2))
614
+ self.assertEqual(regex.search(r"(?fi)FFI",
615
+ "f\N{LATIN SMALL LIGATURE FI}").span(), (0, 2))
616
+ self.assertEqual(regex.search(r"(?fi)\N{LATIN SMALL LIGATURE FFI}",
617
+ "FFI").span(), (0, 3))
618
+ self.assertEqual(regex.search(r"(?fi)\N{LATIN SMALL LIGATURE FF}i",
619
+ "FFI").span(), (0, 3))
620
+ self.assertEqual(regex.search(r"(?fi)f\N{LATIN SMALL LIGATURE FI}",
621
+ "FFI").span(), (0, 3))
622
+
623
+ sigma = "\u03A3\u03C3\u03C2"
624
+ for ch1 in sigma:
625
+ for ch2 in sigma:
626
+ if not regex.match(r"(?fi)" + ch1, ch2):
627
+ self.fail()
628
+
629
+ self.assertEqual(bool(regex.search(r"(?iV1)ff", "\uFB00\uFB01")),
630
+ True)
631
+ self.assertEqual(bool(regex.search(r"(?iV1)ff", "\uFB01\uFB00")),
632
+ True)
633
+ self.assertEqual(bool(regex.search(r"(?iV1)fi", "\uFB00\uFB01")),
634
+ True)
635
+ self.assertEqual(bool(regex.search(r"(?iV1)fi", "\uFB01\uFB00")),
636
+ True)
637
+ self.assertEqual(bool(regex.search(r"(?iV1)fffi", "\uFB00\uFB01")),
638
+ True)
639
+ self.assertEqual(bool(regex.search(r"(?iV1)f\uFB03",
640
+ "\uFB00\uFB01")), True)
641
+ self.assertEqual(bool(regex.search(r"(?iV1)ff", "\uFB00\uFB01")),
642
+ True)
643
+ self.assertEqual(bool(regex.search(r"(?iV1)fi", "\uFB00\uFB01")),
644
+ True)
645
+ self.assertEqual(bool(regex.search(r"(?iV1)fffi", "\uFB00\uFB01")),
646
+ True)
647
+ self.assertEqual(bool(regex.search(r"(?iV1)f\uFB03",
648
+ "\uFB00\uFB01")), True)
649
+ self.assertEqual(bool(regex.search(r"(?iV1)f\uFB01", "\uFB00i")),
650
+ True)
651
+ self.assertEqual(bool(regex.search(r"(?iV1)f\uFB01", "\uFB00i")),
652
+ True)
653
+
654
+ self.assertEqual(regex.findall(r"(?iV0)\m(?:word){e<=3}\M(?<!\m(?:word){e<=1}\M)",
655
+ "word word2 word word3 word word234 word23 word"), ["word234",
656
+ "word23"])
657
+ self.assertEqual(regex.findall(r"(?iV1)\m(?:word){e<=3}\M(?<!\m(?:word){e<=1}\M)",
658
+ "word word2 word word3 word word234 word23 word"), ["word234",
659
+ "word23"])
660
+
661
+ self.assertEqual(regex.search(r"(?fi)a\N{LATIN SMALL LIGATURE FFI}ne",
662
+ " affine ").span(), (2, 8))
663
+ self.assertEqual(regex.search(r"(?fi)a(?:\N{LATIN SMALL LIGATURE FFI}|x)ne",
664
+ " affine ").span(), (2, 8))
665
+ self.assertEqual(regex.search(r"(?fi)a(?:\N{LATIN SMALL LIGATURE FFI}|xy)ne",
666
+ " affine ").span(), (2, 8))
667
+ self.assertEqual(regex.search(r"(?fi)a\L<options>ne", "affine",
668
+ options=["\N{LATIN SMALL LIGATURE FFI}"]).span(), (0, 6))
669
+ self.assertEqual(regex.search(r"(?fi)a\L<options>ne",
670
+ "a\N{LATIN SMALL LIGATURE FFI}ne", options=["ffi"]).span(), (0, 4))
671
+
672
+ def test_category(self):
673
+ self.assertEqual(regex.match(r"(\s)", " ")[1], ' ')
674
+
675
+ def test_not_literal(self):
676
+ self.assertEqual(regex.search(r"\s([^a])", " b")[1], 'b')
677
+ self.assertEqual(regex.search(r"\s([^a]*)", " bb")[1], 'bb')
678
+
679
+ def test_search_coverage(self):
680
+ self.assertEqual(regex.search(r"\s(b)", " b")[1], 'b')
681
+ self.assertEqual(regex.search(r"a\s", "a ")[0], 'a ')
682
+
683
+ def test_re_escape(self):
684
+ p = ""
685
+ self.assertEqual(regex.escape(p), p)
686
+ for i in range(0, 256):
687
+ p += chr(i)
688
+ self.assertEqual(bool(regex.match(regex.escape(chr(i)), chr(i))),
689
+ True)
690
+ self.assertEqual(regex.match(regex.escape(chr(i)), chr(i)).span(),
691
+ (0, 1))
692
+
693
+ pat = regex.compile(regex.escape(p))
694
+ self.assertEqual(pat.match(p).span(), (0, 256))
695
+
696
+ def test_re_escape_byte(self):
697
+ p = b""
698
+ self.assertEqual(regex.escape(p), p)
699
+ for i in range(0, 256):
700
+ b = bytes([i])
701
+ p += b
702
+ self.assertEqual(bool(regex.match(regex.escape(b), b)), True)
703
+ self.assertEqual(regex.match(regex.escape(b), b).span(), (0, 1))
704
+
705
+ pat = regex.compile(regex.escape(p))
706
+ self.assertEqual(pat.match(p).span(), (0, 256))
707
+
708
+ def test_constants(self):
709
+ if regex.I != regex.IGNORECASE:
710
+ self.fail()
711
+ if regex.L != regex.LOCALE:
712
+ self.fail()
713
+ if regex.M != regex.MULTILINE:
714
+ self.fail()
715
+ if regex.S != regex.DOTALL:
716
+ self.fail()
717
+ if regex.X != regex.VERBOSE:
718
+ self.fail()
719
+
720
+ def test_flags(self):
721
+ for flag in [regex.I, regex.M, regex.X, regex.S, regex.L]:
722
+ self.assertEqual(repr(type(regex.compile('^pattern$', flag))),
723
+ self.PATTERN_CLASS)
724
+
725
+ def test_sre_character_literals(self):
726
+ for i in [0, 8, 16, 32, 64, 127, 128, 255]:
727
+ self.assertEqual(bool(regex.match(r"\%03o" % i, chr(i))), True)
728
+ self.assertEqual(bool(regex.match(r"\%03o0" % i, chr(i) + "0")),
729
+ True)
730
+ self.assertEqual(bool(regex.match(r"\%03o8" % i, chr(i) + "8")),
731
+ True)
732
+ self.assertEqual(bool(regex.match(r"\x%02x" % i, chr(i))), True)
733
+ self.assertEqual(bool(regex.match(r"\x%02x0" % i, chr(i) + "0")),
734
+ True)
735
+ self.assertEqual(bool(regex.match(r"\x%02xz" % i, chr(i) + "z")),
736
+ True)
737
+
738
+ self.assertRaisesRegex(regex.error, self.INVALID_GROUP_REF, lambda:
739
+ regex.match(r"\911", ""))
740
+
741
+ def test_sre_character_class_literals(self):
742
+ for i in [0, 8, 16, 32, 64, 127, 128, 255]:
743
+ self.assertEqual(bool(regex.match(r"[\%03o]" % i, chr(i))), True)
744
+ self.assertEqual(bool(regex.match(r"[\%03o0]" % i, chr(i))), True)
745
+ self.assertEqual(bool(regex.match(r"[\%03o8]" % i, chr(i))), True)
746
+ self.assertEqual(bool(regex.match(r"[\x%02x]" % i, chr(i))), True)
747
+ self.assertEqual(bool(regex.match(r"[\x%02x0]" % i, chr(i))), True)
748
+ self.assertEqual(bool(regex.match(r"[\x%02xz]" % i, chr(i))), True)
749
+
750
+ self.assertRaisesRegex(regex.error, self.BAD_OCTAL_ESCAPE, lambda:
751
+ regex.match(r"[\911]", ""))
752
+
753
+ def test_bug_113254(self):
754
+ self.assertEqual(regex.match(r'(a)|(b)', 'b').start(1), -1)
755
+ self.assertEqual(regex.match(r'(a)|(b)', 'b').end(1), -1)
756
+ self.assertEqual(regex.match(r'(a)|(b)', 'b').span(1), (-1, -1))
757
+
758
+ def test_bug_527371(self):
759
+ # Bug described in patches 527371/672491.
760
+ self.assertEqual(regex.match(r'(a)?a','a').lastindex, None)
761
+ self.assertEqual(regex.match(r'(a)(b)?b','ab').lastindex, 1)
762
+ self.assertEqual(regex.match(r'(?P<a>a)(?P<b>b)?b','ab').lastgroup,
763
+ 'a')
764
+ self.assertEqual(regex.match("(?P<a>a(b))", "ab").lastgroup, 'a')
765
+ self.assertEqual(regex.match("((a))", "a").lastindex, 1)
766
+
767
+ def test_bug_545855(self):
768
+ # Bug 545855 -- This pattern failed to cause a compile error as it
769
+ # should, instead provoking a TypeError.
770
+ self.assertRaisesRegex(regex.error, self.BAD_SET, lambda:
771
+ regex.compile('foo[a-'))
772
+
773
+ def test_bug_418626(self):
774
+ # Bugs 418626 at al. -- Testing Greg Chapman's addition of op code
775
+ # SRE_OP_MIN_REPEAT_ONE for eliminating recursion on simple uses of
776
+ # pattern '*?' on a long string.
777
+ self.assertEqual(regex.match('.*?c', 10000 * 'ab' + 'cd').end(0),
778
+ 20001)
779
+ self.assertEqual(regex.match('.*?cd', 5000 * 'ab' + 'c' + 5000 * 'ab' +
780
+ 'cde').end(0), 20003)
781
+ self.assertEqual(regex.match('.*?cd', 20000 * 'abc' + 'de').end(0),
782
+ 60001)
783
+ # Non-simple '*?' still used to hit the recursion limit, before the
784
+ # non-recursive scheme was implemented.
785
+ self.assertEqual(regex.search('(a|b)*?c', 10000 * 'ab' + 'cd').end(0),
786
+ 20001)
787
+
788
+ def test_bug_612074(self):
789
+ pat = "[" + regex.escape("\u2039") + "]"
790
+ self.assertEqual(regex.compile(pat) and 1, 1)
791
+
792
+ def test_stack_overflow(self):
793
+ # Nasty cases that used to overflow the straightforward recursive
794
+ # implementation of repeated groups.
795
+ self.assertEqual(regex.match('(x)*', 50000 * 'x')[1], 'x')
796
+ self.assertEqual(regex.match('(x)*y', 50000 * 'x' + 'y')[1], 'x')
797
+ self.assertEqual(regex.match('(x)*?y', 50000 * 'x' + 'y')[1], 'x')
798
+
799
+ def test_scanner(self):
800
+ def s_ident(scanner, token): return token
801
+ def s_operator(scanner, token): return "op%s" % token
802
+ def s_float(scanner, token): return float(token)
803
+ def s_int(scanner, token): return int(token)
804
+
805
+ scanner = regex.Scanner([(r"[a-zA-Z_]\w*", s_ident), (r"\d+\.\d*",
806
+ s_float), (r"\d+", s_int), (r"=|\+|-|\*|/", s_operator), (r"\s+",
807
+ None), ])
808
+
809
+ self.assertEqual(repr(type(scanner.scanner.scanner("").pattern)),
810
+ self.PATTERN_CLASS)
811
+
812
+ self.assertEqual(scanner.scan("sum = 3*foo + 312.50 + bar"), (['sum',
813
+ 'op=', 3, 'op*', 'foo', 'op+', 312.5, 'op+', 'bar'], ''))
814
+
815
+ def test_bug_448951(self):
816
+ # Bug 448951 (similar to 429357, but with single char match).
817
+ # (Also test greedy matches.)
818
+ for op in '', '?', '*':
819
+ self.assertEqual(regex.match(r'((.%s):)?z' % op, 'z')[:], ('z',
820
+ None, None))
821
+ self.assertEqual(regex.match(r'((.%s):)?z' % op, 'a:z')[:], ('a:z',
822
+ 'a:', 'a'))
823
+
824
+ def test_bug_725106(self):
825
+ # Capturing groups in alternatives in repeats.
826
+ self.assertEqual(regex.match('^((a)|b)*', 'abc')[:], ('ab', 'b', 'a'))
827
+ self.assertEqual(regex.match('^(([ab])|c)*', 'abc')[:], ('abc', 'c',
828
+ 'b'))
829
+ self.assertEqual(regex.match('^((d)|[ab])*', 'abc')[:], ('ab', 'b',
830
+ None))
831
+ self.assertEqual(regex.match('^((a)c|[ab])*', 'abc')[:], ('ab', 'b',
832
+ None))
833
+ self.assertEqual(regex.match('^((a)|b)*?c', 'abc')[:], ('abc', 'b',
834
+ 'a'))
835
+ self.assertEqual(regex.match('^(([ab])|c)*?d', 'abcd')[:], ('abcd',
836
+ 'c', 'b'))
837
+ self.assertEqual(regex.match('^((d)|[ab])*?c', 'abc')[:], ('abc', 'b',
838
+ None))
839
+ self.assertEqual(regex.match('^((a)c|[ab])*?c', 'abc')[:], ('abc', 'b',
840
+ None))
841
+
842
+ def test_bug_725149(self):
843
+ # Mark_stack_base restoring before restoring marks.
844
+ self.assertEqual(regex.match('(a)(?:(?=(b)*)c)*', 'abb')[:], ('a', 'a',
845
+ None))
846
+ self.assertEqual(regex.match('(a)((?!(b)*))*', 'abb')[:], ('a', 'a',
847
+ None, None))
848
+
849
+ def test_bug_764548(self):
850
+ # Bug 764548, regex.compile() barfs on str/unicode subclasses.
851
+ class my_unicode(str): pass
852
+ pat = regex.compile(my_unicode("abc"))
853
+ self.assertEqual(pat.match("xyz"), None)
854
+
855
+ def test_finditer(self):
856
+ it = regex.finditer(r":+", "a:b::c:::d")
857
+ self.assertEqual([item[0] for item in it], [':', '::', ':::'])
858
+
859
+ def test_bug_926075(self):
860
+ if regex.compile('bug_926075') is regex.compile(b'bug_926075'):
861
+ self.fail()
862
+
863
+ def test_bug_931848(self):
864
+ pattern = "[\u002E\u3002\uFF0E\uFF61]"
865
+ self.assertEqual(regex.compile(pattern).split("a.b.c"), ['a', 'b',
866
+ 'c'])
867
+
868
+ def test_bug_581080(self):
869
+ it = regex.finditer(r"\s", "a b")
870
+ self.assertEqual(next(it).span(), (1, 2))
871
+ self.assertRaises(StopIteration, lambda: next(it))
872
+
873
+ scanner = regex.compile(r"\s").scanner("a b")
874
+ self.assertEqual(scanner.search().span(), (1, 2))
875
+ self.assertEqual(scanner.search(), None)
876
+
877
+ def test_bug_817234(self):
878
+ it = regex.finditer(r".*", "asdf")
879
+ self.assertEqual(next(it).span(), (0, 4))
880
+ self.assertEqual(next(it).span(), (4, 4))
881
+ self.assertRaises(StopIteration, lambda: next(it))
882
+
883
+ def test_empty_array(self):
884
+ # SF buf 1647541.
885
+ import array
886
+ for typecode in 'bBhHiIlLfd':
887
+ a = array.array(typecode)
888
+ self.assertEqual(regex.compile(b"bla").match(a), None)
889
+ self.assertEqual(regex.compile(b"").match(a)[1 : ], ())
890
+
891
+ def test_inline_flags(self):
892
+ # Bug #1700.
893
+ upper_char = chr(0x1ea0) # Latin Capital Letter A with Dot Below
894
+ lower_char = chr(0x1ea1) # Latin Small Letter A with Dot Below
895
+
896
+ p = regex.compile(upper_char, regex.I | regex.U)
897
+ self.assertEqual(bool(p.match(lower_char)), True)
898
+
899
+ p = regex.compile(lower_char, regex.I | regex.U)
900
+ self.assertEqual(bool(p.match(upper_char)), True)
901
+
902
+ p = regex.compile('(?i)' + upper_char, regex.U)
903
+ self.assertEqual(bool(p.match(lower_char)), True)
904
+
905
+ p = regex.compile('(?i)' + lower_char, regex.U)
906
+ self.assertEqual(bool(p.match(upper_char)), True)
907
+
908
+ p = regex.compile('(?iu)' + upper_char)
909
+ self.assertEqual(bool(p.match(lower_char)), True)
910
+
911
+ p = regex.compile('(?iu)' + lower_char)
912
+ self.assertEqual(bool(p.match(upper_char)), True)
913
+
914
+ # Changed to positional flags in regex 2023.12.23.
915
+ self.assertEqual(bool(regex.match(r"(?i)a", "A")), True)
916
+ self.assertEqual(regex.match(r"a(?i)", "A"), None)
917
+
918
+ def test_dollar_matches_twice(self):
919
+ # $ matches the end of string, and just before the terminating \n.
920
+ pattern = regex.compile('$')
921
+ self.assertEqual(pattern.sub('#', 'a\nb\n'), 'a\nb#\n#')
922
+ self.assertEqual(pattern.sub('#', 'a\nb\nc'), 'a\nb\nc#')
923
+ self.assertEqual(pattern.sub('#', '\n'), '#\n#')
924
+
925
+ pattern = regex.compile('$', regex.MULTILINE)
926
+ self.assertEqual(pattern.sub('#', 'a\nb\n' ), 'a#\nb#\n#')
927
+ self.assertEqual(pattern.sub('#', 'a\nb\nc'), 'a#\nb#\nc#')
928
+ self.assertEqual(pattern.sub('#', '\n'), '#\n#')
929
+
930
+ def test_bytes_str_mixing(self):
931
+ # Mixing str and bytes is disallowed.
932
+ pat = regex.compile('.')
933
+ bpat = regex.compile(b'.')
934
+ self.assertRaisesRegex(TypeError, self.STR_PAT_ON_BYTES, lambda:
935
+ pat.match(b'b'))
936
+ self.assertRaisesRegex(TypeError, self.BYTES_PAT_ON_STR, lambda:
937
+ bpat.match('b'))
938
+ self.assertRaisesRegex(TypeError, self.STR_PAT_BYTES_TEMPL, lambda:
939
+ pat.sub(b'b', 'c'))
940
+ self.assertRaisesRegex(TypeError, self.STR_PAT_ON_BYTES, lambda:
941
+ pat.sub('b', b'c'))
942
+ self.assertRaisesRegex(TypeError, self.STR_PAT_ON_BYTES, lambda:
943
+ pat.sub(b'b', b'c'))
944
+ self.assertRaisesRegex(TypeError, self.BYTES_PAT_ON_STR, lambda:
945
+ bpat.sub(b'b', 'c'))
946
+ self.assertRaisesRegex(TypeError, self.BYTES_PAT_STR_TEMPL, lambda:
947
+ bpat.sub('b', b'c'))
948
+ self.assertRaisesRegex(TypeError, self.BYTES_PAT_ON_STR, lambda:
949
+ bpat.sub('b', 'c'))
950
+
951
+ self.assertRaisesRegex(ValueError, self.BYTES_PAT_UNI_FLAG, lambda:
952
+ regex.compile(br'\w', regex.UNICODE))
953
+ self.assertRaisesRegex(ValueError, self.BYTES_PAT_UNI_FLAG, lambda:
954
+ regex.compile(br'(?u)\w'))
955
+ self.assertRaisesRegex(ValueError, self.MIXED_FLAGS, lambda:
956
+ regex.compile(r'\w', regex.UNICODE | regex.ASCII))
957
+ self.assertRaisesRegex(ValueError, self.MIXED_FLAGS, lambda:
958
+ regex.compile(r'(?u)\w', regex.ASCII))
959
+ self.assertRaisesRegex(ValueError, self.MIXED_FLAGS, lambda:
960
+ regex.compile(r'(?a)\w', regex.UNICODE))
961
+ self.assertRaisesRegex(ValueError, self.MIXED_FLAGS, lambda:
962
+ regex.compile(r'(?au)\w'))
963
+
964
+ def test_ascii_and_unicode_flag(self):
965
+ # String patterns.
966
+ for flags in (0, regex.UNICODE):
967
+ pat = regex.compile('\xc0', flags | regex.IGNORECASE)
968
+ self.assertEqual(bool(pat.match('\xe0')), True)
969
+ pat = regex.compile(r'\w', flags)
970
+ self.assertEqual(bool(pat.match('\xe0')), True)
971
+
972
+ pat = regex.compile('\xc0', regex.ASCII | regex.IGNORECASE)
973
+ self.assertEqual(pat.match('\xe0'), None)
974
+ pat = regex.compile('(?a)\xc0', regex.IGNORECASE)
975
+ self.assertEqual(pat.match('\xe0'), None)
976
+ pat = regex.compile(r'\w', regex.ASCII)
977
+ self.assertEqual(pat.match('\xe0'), None)
978
+ pat = regex.compile(r'(?a)\w')
979
+ self.assertEqual(pat.match('\xe0'), None)
980
+
981
+ # Bytes patterns.
982
+ for flags in (0, regex.ASCII):
983
+ pat = regex.compile(b'\xc0', flags | regex.IGNORECASE)
984
+ self.assertEqual(pat.match(b'\xe0'), None)
985
+ pat = regex.compile(br'\w')
986
+ self.assertEqual(pat.match(b'\xe0'), None)
987
+
988
+ self.assertRaisesRegex(ValueError, self.MIXED_FLAGS, lambda:
989
+ regex.compile(r'(?au)\w'))
990
+
991
+ def test_subscripting_match(self):
992
+ m = regex.match(r'(?<a>\w)', 'xy')
993
+ if not m:
994
+ self.fail("Failed: expected match but returned None")
995
+ elif not m or m[0] != m.group(0) or m[1] != m.group(1):
996
+ self.fail("Failed")
997
+ if not m:
998
+ self.fail("Failed: expected match but returned None")
999
+ elif m[:] != ('x', 'x'):
1000
+ self.fail("Failed: expected \"('x', 'x')\" but got {} instead".format(ascii(m[:])))
1001
+
1002
+ def test_new_named_groups(self):
1003
+ m0 = regex.match(r'(?P<a>\w)', 'x')
1004
+ m1 = regex.match(r'(?<a>\w)', 'x')
1005
+ if not (m0 and m1 and m0[:] == m1[:]):
1006
+ self.fail("Failed")
1007
+
1008
+ def test_properties(self):
1009
+ self.assertEqual(regex.match(b'(?ai)\xC0', b'\xE0'), None)
1010
+ self.assertEqual(regex.match(br'(?ai)\xC0', b'\xE0'), None)
1011
+ self.assertEqual(regex.match(br'(?a)\w', b'\xE0'), None)
1012
+ self.assertEqual(bool(regex.match(r'\w', '\xE0')), True)
1013
+
1014
+ # Dropped the following test. It's not possible to determine what the
1015
+ # correct result should be in the general case.
1016
+ # self.assertEqual(bool(regex.match(br'(?L)\w', b'\xE0')),
1017
+ # b'\xE0'.isalnum())
1018
+
1019
+ self.assertEqual(bool(regex.match(br'(?L)\d', b'0')), True)
1020
+ self.assertEqual(bool(regex.match(br'(?L)\s', b' ')), True)
1021
+ self.assertEqual(bool(regex.match(br'(?L)\w', b'a')), True)
1022
+ self.assertEqual(regex.match(br'(?L)\d', b'?'), None)
1023
+ self.assertEqual(regex.match(br'(?L)\s', b'?'), None)
1024
+ self.assertEqual(regex.match(br'(?L)\w', b'?'), None)
1025
+
1026
+ self.assertEqual(regex.match(br'(?L)\D', b'0'), None)
1027
+ self.assertEqual(regex.match(br'(?L)\S', b' '), None)
1028
+ self.assertEqual(regex.match(br'(?L)\W', b'a'), None)
1029
+ self.assertEqual(bool(regex.match(br'(?L)\D', b'?')), True)
1030
+ self.assertEqual(bool(regex.match(br'(?L)\S', b'?')), True)
1031
+ self.assertEqual(bool(regex.match(br'(?L)\W', b'?')), True)
1032
+
1033
+ self.assertEqual(bool(regex.match(r'\p{Cyrillic}',
1034
+ '\N{CYRILLIC CAPITAL LETTER A}')), True)
1035
+ self.assertEqual(bool(regex.match(r'(?i)\p{Cyrillic}',
1036
+ '\N{CYRILLIC CAPITAL LETTER A}')), True)
1037
+ self.assertEqual(bool(regex.match(r'\p{IsCyrillic}',
1038
+ '\N{CYRILLIC CAPITAL LETTER A}')), True)
1039
+ self.assertEqual(bool(regex.match(r'\p{Script=Cyrillic}',
1040
+ '\N{CYRILLIC CAPITAL LETTER A}')), True)
1041
+ self.assertEqual(bool(regex.match(r'\p{InCyrillic}',
1042
+ '\N{CYRILLIC CAPITAL LETTER A}')), True)
1043
+ self.assertEqual(bool(regex.match(r'\p{Block=Cyrillic}',
1044
+ '\N{CYRILLIC CAPITAL LETTER A}')), True)
1045
+ self.assertEqual(bool(regex.match(r'[[:Cyrillic:]]',
1046
+ '\N{CYRILLIC CAPITAL LETTER A}')), True)
1047
+ self.assertEqual(bool(regex.match(r'[[:IsCyrillic:]]',
1048
+ '\N{CYRILLIC CAPITAL LETTER A}')), True)
1049
+ self.assertEqual(bool(regex.match(r'[[:Script=Cyrillic:]]',
1050
+ '\N{CYRILLIC CAPITAL LETTER A}')), True)
1051
+ self.assertEqual(bool(regex.match(r'[[:InCyrillic:]]',
1052
+ '\N{CYRILLIC CAPITAL LETTER A}')), True)
1053
+ self.assertEqual(bool(regex.match(r'[[:Block=Cyrillic:]]',
1054
+ '\N{CYRILLIC CAPITAL LETTER A}')), True)
1055
+
1056
+ self.assertEqual(bool(regex.match(r'\P{Cyrillic}',
1057
+ '\N{LATIN CAPITAL LETTER A}')), True)
1058
+ self.assertEqual(bool(regex.match(r'\P{IsCyrillic}',
1059
+ '\N{LATIN CAPITAL LETTER A}')), True)
1060
+ self.assertEqual(bool(regex.match(r'\P{Script=Cyrillic}',
1061
+ '\N{LATIN CAPITAL LETTER A}')), True)
1062
+ self.assertEqual(bool(regex.match(r'\P{InCyrillic}',
1063
+ '\N{LATIN CAPITAL LETTER A}')), True)
1064
+ self.assertEqual(bool(regex.match(r'\P{Block=Cyrillic}',
1065
+ '\N{LATIN CAPITAL LETTER A}')), True)
1066
+ self.assertEqual(bool(regex.match(r'\p{^Cyrillic}',
1067
+ '\N{LATIN CAPITAL LETTER A}')), True)
1068
+ self.assertEqual(bool(regex.match(r'\p{^IsCyrillic}',
1069
+ '\N{LATIN CAPITAL LETTER A}')), True)
1070
+ self.assertEqual(bool(regex.match(r'\p{^Script=Cyrillic}',
1071
+ '\N{LATIN CAPITAL LETTER A}')), True)
1072
+ self.assertEqual(bool(regex.match(r'\p{^InCyrillic}',
1073
+ '\N{LATIN CAPITAL LETTER A}')), True)
1074
+ self.assertEqual(bool(regex.match(r'\p{^Block=Cyrillic}',
1075
+ '\N{LATIN CAPITAL LETTER A}')), True)
1076
+ self.assertEqual(bool(regex.match(r'[[:^Cyrillic:]]',
1077
+ '\N{LATIN CAPITAL LETTER A}')), True)
1078
+ self.assertEqual(bool(regex.match(r'[[:^IsCyrillic:]]',
1079
+ '\N{LATIN CAPITAL LETTER A}')), True)
1080
+ self.assertEqual(bool(regex.match(r'[[:^Script=Cyrillic:]]',
1081
+ '\N{LATIN CAPITAL LETTER A}')), True)
1082
+ self.assertEqual(bool(regex.match(r'[[:^InCyrillic:]]',
1083
+ '\N{LATIN CAPITAL LETTER A}')), True)
1084
+ self.assertEqual(bool(regex.match(r'[[:^Block=Cyrillic:]]',
1085
+ '\N{LATIN CAPITAL LETTER A}')), True)
1086
+
1087
+ self.assertEqual(bool(regex.match(r'\d', '0')), True)
1088
+ self.assertEqual(bool(regex.match(r'\s', ' ')), True)
1089
+ self.assertEqual(bool(regex.match(r'\w', 'A')), True)
1090
+ self.assertEqual(regex.match(r"\d", "?"), None)
1091
+ self.assertEqual(regex.match(r"\s", "?"), None)
1092
+ self.assertEqual(regex.match(r"\w", "?"), None)
1093
+ self.assertEqual(regex.match(r"\D", "0"), None)
1094
+ self.assertEqual(regex.match(r"\S", " "), None)
1095
+ self.assertEqual(regex.match(r"\W", "A"), None)
1096
+ self.assertEqual(bool(regex.match(r'\D', '?')), True)
1097
+ self.assertEqual(bool(regex.match(r'\S', '?')), True)
1098
+ self.assertEqual(bool(regex.match(r'\W', '?')), True)
1099
+
1100
+ self.assertEqual(bool(regex.match(r'\p{L}', 'A')), True)
1101
+ self.assertEqual(bool(regex.match(r'\p{L}', 'a')), True)
1102
+ self.assertEqual(bool(regex.match(r'\p{Lu}', 'A')), True)
1103
+ self.assertEqual(bool(regex.match(r'\p{Ll}', 'a')), True)
1104
+
1105
+ self.assertEqual(bool(regex.match(r'(?i)a', 'a')), True)
1106
+ self.assertEqual(bool(regex.match(r'(?i)a', 'A')), True)
1107
+
1108
+ self.assertEqual(bool(regex.match(r'\w', '0')), True)
1109
+ self.assertEqual(bool(regex.match(r'\w', 'a')), True)
1110
+ self.assertEqual(bool(regex.match(r'\w', '_')), True)
1111
+
1112
+ self.assertEqual(regex.match(r"\X", "\xE0").span(), (0, 1))
1113
+ self.assertEqual(regex.match(r"\X", "a\u0300").span(), (0, 2))
1114
+ self.assertEqual(regex.findall(r"\X",
1115
+ "a\xE0a\u0300e\xE9e\u0301"), ['a', '\xe0', 'a\u0300', 'e',
1116
+ '\xe9', 'e\u0301'])
1117
+ self.assertEqual(regex.findall(r"\X{3}",
1118
+ "a\xE0a\u0300e\xE9e\u0301"), ['a\xe0a\u0300', 'e\xe9e\u0301'])
1119
+ self.assertEqual(regex.findall(r"\X", "\r\r\n\u0301A\u0301"),
1120
+ ['\r', '\r\n', '\u0301', 'A\u0301'])
1121
+
1122
+ self.assertEqual(bool(regex.match(r'\p{Ll}', 'a')), True)
1123
+
1124
+ chars_u = "-09AZaz_\u0393\u03b3"
1125
+ chars_b = b"-09AZaz_"
1126
+ word_set = set("Ll Lm Lo Lt Lu Mc Me Mn Nd Nl No Pc".split())
1127
+
1128
+ tests = [
1129
+ (r"\w", chars_u, "09AZaz_\u0393\u03b3"),
1130
+ (r"[[:word:]]", chars_u, "09AZaz_\u0393\u03b3"),
1131
+ (r"\W", chars_u, "-"),
1132
+ (r"[[:^word:]]", chars_u, "-"),
1133
+ (r"\d", chars_u, "09"),
1134
+ (r"[[:digit:]]", chars_u, "09"),
1135
+ (r"\D", chars_u, "-AZaz_\u0393\u03b3"),
1136
+ (r"[[:^digit:]]", chars_u, "-AZaz_\u0393\u03b3"),
1137
+ (r"[[:alpha:]]", chars_u, "AZaz\u0393\u03b3"),
1138
+ (r"[[:^alpha:]]", chars_u, "-09_"),
1139
+ (r"[[:alnum:]]", chars_u, "09AZaz\u0393\u03b3"),
1140
+ (r"[[:^alnum:]]", chars_u, "-_"),
1141
+ (r"[[:xdigit:]]", chars_u, "09Aa"),
1142
+ (r"[[:^xdigit:]]", chars_u, "-Zz_\u0393\u03b3"),
1143
+ (r"\p{InBasicLatin}", "a\xE1", "a"),
1144
+ (r"\P{InBasicLatin}", "a\xE1", "\xE1"),
1145
+ (r"(?i)\p{InBasicLatin}", "a\xE1", "a"),
1146
+ (r"(?i)\P{InBasicLatin}", "a\xE1", "\xE1"),
1147
+
1148
+ (br"(?L)\w", chars_b, b"09AZaz_"),
1149
+ (br"(?L)[[:word:]]", chars_b, b"09AZaz_"),
1150
+ (br"(?L)\W", chars_b, b"-"),
1151
+ (br"(?L)[[:^word:]]", chars_b, b"-"),
1152
+ (br"(?L)\d", chars_b, b"09"),
1153
+ (br"(?L)[[:digit:]]", chars_b, b"09"),
1154
+ (br"(?L)\D", chars_b, b"-AZaz_"),
1155
+ (br"(?L)[[:^digit:]]", chars_b, b"-AZaz_"),
1156
+ (br"(?L)[[:alpha:]]", chars_b, b"AZaz"),
1157
+ (br"(?L)[[:^alpha:]]", chars_b, b"-09_"),
1158
+ (br"(?L)[[:alnum:]]", chars_b, b"09AZaz"),
1159
+ (br"(?L)[[:^alnum:]]", chars_b, b"-_"),
1160
+ (br"(?L)[[:xdigit:]]", chars_b, b"09Aa"),
1161
+ (br"(?L)[[:^xdigit:]]", chars_b, b"-Zz_"),
1162
+
1163
+ (br"(?a)\w", chars_b, b"09AZaz_"),
1164
+ (br"(?a)[[:word:]]", chars_b, b"09AZaz_"),
1165
+ (br"(?a)\W", chars_b, b"-"),
1166
+ (br"(?a)[[:^word:]]", chars_b, b"-"),
1167
+ (br"(?a)\d", chars_b, b"09"),
1168
+ (br"(?a)[[:digit:]]", chars_b, b"09"),
1169
+ (br"(?a)\D", chars_b, b"-AZaz_"),
1170
+ (br"(?a)[[:^digit:]]", chars_b, b"-AZaz_"),
1171
+ (br"(?a)[[:alpha:]]", chars_b, b"AZaz"),
1172
+ (br"(?a)[[:^alpha:]]", chars_b, b"-09_"),
1173
+ (br"(?a)[[:alnum:]]", chars_b, b"09AZaz"),
1174
+ (br"(?a)[[:^alnum:]]", chars_b, b"-_"),
1175
+ (br"(?a)[[:xdigit:]]", chars_b, b"09Aa"),
1176
+ (br"(?a)[[:^xdigit:]]", chars_b, b"-Zz_"),
1177
+ ]
1178
+ for pattern, chars, expected in tests:
1179
+ try:
1180
+ if chars[ : 0].join(regex.findall(pattern, chars)) != expected:
1181
+ self.fail("Failed: {}".format(pattern))
1182
+ except Exception as e:
1183
+ self.fail("Failed: {} raised {}".format(pattern, ascii(e)))
1184
+
1185
+ self.assertEqual(bool(regex.match(r"\p{NumericValue=0}", "0")),
1186
+ True)
1187
+ self.assertEqual(bool(regex.match(r"\p{NumericValue=1/2}",
1188
+ "\N{VULGAR FRACTION ONE HALF}")), True)
1189
+ self.assertEqual(bool(regex.match(r"\p{NumericValue=0.5}",
1190
+ "\N{VULGAR FRACTION ONE HALF}")), True)
1191
+
1192
+ def test_word_class(self):
1193
+ self.assertEqual(regex.findall(r"\w+",
1194
+ " \u0939\u093f\u0928\u094d\u0926\u0940,"),
1195
+ ['\u0939\u093f\u0928\u094d\u0926\u0940'])
1196
+ self.assertEqual(regex.findall(r"\W+",
1197
+ " \u0939\u093f\u0928\u094d\u0926\u0940,"), [' ', ','])
1198
+ self.assertEqual(regex.split(r"(?V1)\b",
1199
+ " \u0939\u093f\u0928\u094d\u0926\u0940,"), [' ',
1200
+ '\u0939\u093f\u0928\u094d\u0926\u0940', ','])
1201
+ self.assertEqual(regex.split(r"(?V1)\B",
1202
+ " \u0939\u093f\u0928\u094d\u0926\u0940,"), ['', ' \u0939',
1203
+ '\u093f', '\u0928', '\u094d', '\u0926', '\u0940,', ''])
1204
+
1205
+ def test_search_anchor(self):
1206
+ self.assertEqual(regex.findall(r"\G\w{2}", "abcd ef"), ['ab', 'cd'])
1207
+
1208
+ def test_search_reverse(self):
1209
+ self.assertEqual(regex.findall(r"(?r).", "abc"), ['c', 'b', 'a'])
1210
+ self.assertEqual(regex.findall(r"(?r).", "abc", overlapped=True), ['c',
1211
+ 'b', 'a'])
1212
+ self.assertEqual(regex.findall(r"(?r)..", "abcde"), ['de', 'bc'])
1213
+ self.assertEqual(regex.findall(r"(?r)..", "abcde", overlapped=True),
1214
+ ['de', 'cd', 'bc', 'ab'])
1215
+ self.assertEqual(regex.findall(r"(?r)(.)(-)(.)", "a-b-c",
1216
+ overlapped=True), [("b", "-", "c"), ("a", "-", "b")])
1217
+
1218
+ self.assertEqual([m[0] for m in regex.finditer(r"(?r).", "abc")], ['c',
1219
+ 'b', 'a'])
1220
+ self.assertEqual([m[0] for m in regex.finditer(r"(?r)..", "abcde",
1221
+ overlapped=True)], ['de', 'cd', 'bc', 'ab'])
1222
+ self.assertEqual([m[0] for m in regex.finditer(r"(?r).", "abc")], ['c',
1223
+ 'b', 'a'])
1224
+ self.assertEqual([m[0] for m in regex.finditer(r"(?r)..", "abcde",
1225
+ overlapped=True)], ['de', 'cd', 'bc', 'ab'])
1226
+
1227
+ self.assertEqual(regex.findall(r"^|\w+", "foo bar"), ['', 'foo',
1228
+ 'bar'])
1229
+ self.assertEqual(regex.findall(r"(?V1)^|\w+", "foo bar"), ['', 'foo',
1230
+ 'bar'])
1231
+ self.assertEqual(regex.findall(r"(?r)^|\w+", "foo bar"), ['bar', 'foo',
1232
+ ''])
1233
+ self.assertEqual(regex.findall(r"(?rV1)^|\w+", "foo bar"), ['bar',
1234
+ 'foo', ''])
1235
+
1236
+ self.assertEqual([m[0] for m in regex.finditer(r"^|\w+", "foo bar")],
1237
+ ['', 'foo', 'bar'])
1238
+ self.assertEqual([m[0] for m in regex.finditer(r"(?V1)^|\w+",
1239
+ "foo bar")], ['', 'foo', 'bar'])
1240
+ self.assertEqual([m[0] for m in regex.finditer(r"(?r)^|\w+",
1241
+ "foo bar")], ['bar', 'foo', ''])
1242
+ self.assertEqual([m[0] for m in regex.finditer(r"(?rV1)^|\w+",
1243
+ "foo bar")], ['bar', 'foo', ''])
1244
+
1245
+ self.assertEqual(regex.findall(r"\G\w{2}", "abcd ef"), ['ab', 'cd'])
1246
+ self.assertEqual(regex.findall(r".{2}(?<=\G.*)", "abcd"), ['ab', 'cd'])
1247
+ self.assertEqual(regex.findall(r"(?r)\G\w{2}", "abcd ef"), [])
1248
+ self.assertEqual(regex.findall(r"(?r)\w{2}\G", "abcd ef"), ['ef'])
1249
+
1250
+ self.assertEqual(regex.findall(r"q*", "qqwe"), ['qq', '', '', ''])
1251
+ self.assertEqual(regex.findall(r"(?V1)q*", "qqwe"), ['qq', '', '', ''])
1252
+ self.assertEqual(regex.findall(r"(?r)q*", "qqwe"), ['', '', 'qq', ''])
1253
+ self.assertEqual(regex.findall(r"(?rV1)q*", "qqwe"), ['', '', 'qq',
1254
+ ''])
1255
+
1256
+ self.assertEqual(regex.findall(".", "abcd", pos=1, endpos=3), ['b',
1257
+ 'c'])
1258
+ self.assertEqual(regex.findall(".", "abcd", pos=1, endpos=-1), ['b',
1259
+ 'c'])
1260
+ self.assertEqual([m[0] for m in regex.finditer(".", "abcd", pos=1,
1261
+ endpos=3)], ['b', 'c'])
1262
+ self.assertEqual([m[0] for m in regex.finditer(".", "abcd", pos=1,
1263
+ endpos=-1)], ['b', 'c'])
1264
+
1265
+ self.assertEqual([m[0] for m in regex.finditer("(?r).", "abcd", pos=1,
1266
+ endpos=3)], ['c', 'b'])
1267
+ self.assertEqual([m[0] for m in regex.finditer("(?r).", "abcd", pos=1,
1268
+ endpos=-1)], ['c', 'b'])
1269
+ self.assertEqual(regex.findall("(?r).", "abcd", pos=1, endpos=3), ['c',
1270
+ 'b'])
1271
+ self.assertEqual(regex.findall("(?r).", "abcd", pos=1, endpos=-1),
1272
+ ['c', 'b'])
1273
+
1274
+ self.assertEqual(regex.findall(r"[ab]", "aB", regex.I), ['a', 'B'])
1275
+ self.assertEqual(regex.findall(r"(?r)[ab]", "aB", regex.I), ['B', 'a'])
1276
+
1277
+ self.assertEqual(regex.findall(r"(?r).{2}", "abc"), ['bc'])
1278
+ self.assertEqual(regex.findall(r"(?r).{2}", "abc", overlapped=True),
1279
+ ['bc', 'ab'])
1280
+ self.assertEqual(regex.findall(r"(\w+) (\w+)",
1281
+ "first second third fourth fifth"), [('first', 'second'), ('third',
1282
+ 'fourth')])
1283
+ self.assertEqual(regex.findall(r"(?r)(\w+) (\w+)",
1284
+ "first second third fourth fifth"), [('fourth', 'fifth'), ('second',
1285
+ 'third')])
1286
+
1287
+ self.assertEqual([m[0] for m in regex.finditer(r"(?r).{2}", "abc")],
1288
+ ['bc'])
1289
+ self.assertEqual([m[0] for m in regex.finditer(r"(?r).{2}", "abc",
1290
+ overlapped=True)], ['bc', 'ab'])
1291
+ self.assertEqual([m[0] for m in regex.finditer(r"(\w+) (\w+)",
1292
+ "first second third fourth fifth")], ['first second',
1293
+ 'third fourth'])
1294
+ self.assertEqual([m[0] for m in regex.finditer(r"(?r)(\w+) (\w+)",
1295
+ "first second third fourth fifth")], ['fourth fifth',
1296
+ 'second third'])
1297
+
1298
+ self.assertEqual(regex.search("abcdef", "abcdef").span(), (0, 6))
1299
+ self.assertEqual(regex.search("(?r)abcdef", "abcdef").span(), (0, 6))
1300
+ self.assertEqual(regex.search("(?i)abcdef", "ABCDEF").span(), (0, 6))
1301
+ self.assertEqual(regex.search("(?ir)abcdef", "ABCDEF").span(), (0, 6))
1302
+
1303
+ self.assertEqual(regex.sub(r"(.)", r"\1", "abc"), 'abc')
1304
+ self.assertEqual(regex.sub(r"(?r)(.)", r"\1", "abc"), 'abc')
1305
+
1306
+ def test_atomic(self):
1307
+ # Issue 433030.
1308
+ self.assertEqual(regex.search(r"(?>a*)a", "aa"), None)
1309
+
1310
+ def test_possessive(self):
1311
+ # Single-character non-possessive.
1312
+ self.assertEqual(regex.search(r"a?a", "a").span(), (0, 1))
1313
+ self.assertEqual(regex.search(r"a*a", "aaa").span(), (0, 3))
1314
+ self.assertEqual(regex.search(r"a+a", "aaa").span(), (0, 3))
1315
+ self.assertEqual(regex.search(r"a{1,3}a", "aaa").span(), (0, 3))
1316
+
1317
+ # Multiple-character non-possessive.
1318
+ self.assertEqual(regex.search(r"(?:ab)?ab", "ab").span(), (0, 2))
1319
+ self.assertEqual(regex.search(r"(?:ab)*ab", "ababab").span(), (0, 6))
1320
+ self.assertEqual(regex.search(r"(?:ab)+ab", "ababab").span(), (0, 6))
1321
+ self.assertEqual(regex.search(r"(?:ab){1,3}ab", "ababab").span(), (0,
1322
+ 6))
1323
+
1324
+ # Single-character possessive.
1325
+ self.assertEqual(regex.search(r"a?+a", "a"), None)
1326
+ self.assertEqual(regex.search(r"a*+a", "aaa"), None)
1327
+ self.assertEqual(regex.search(r"a++a", "aaa"), None)
1328
+ self.assertEqual(regex.search(r"a{1,3}+a", "aaa"), None)
1329
+
1330
+ # Multiple-character possessive.
1331
+ self.assertEqual(regex.search(r"(?:ab)?+ab", "ab"), None)
1332
+ self.assertEqual(regex.search(r"(?:ab)*+ab", "ababab"), None)
1333
+ self.assertEqual(regex.search(r"(?:ab)++ab", "ababab"), None)
1334
+ self.assertEqual(regex.search(r"(?:ab){1,3}+ab", "ababab"), None)
1335
+
1336
+ def test_zerowidth(self):
1337
+ # Issue 3262.
1338
+ if sys.version_info >= (3, 7, 0):
1339
+ self.assertEqual(regex.split(r"\b", "a b"), ['', 'a', ' ', 'b',
1340
+ ''])
1341
+ else:
1342
+ self.assertEqual(regex.split(r"\b", "a b"), ['a b'])
1343
+ self.assertEqual(regex.split(r"(?V1)\b", "a b"), ['', 'a', ' ', 'b',
1344
+ ''])
1345
+
1346
+ # Issue 1647489.
1347
+ self.assertEqual(regex.findall(r"^|\w+", "foo bar"), ['', 'foo',
1348
+ 'bar'])
1349
+ self.assertEqual([m[0] for m in regex.finditer(r"^|\w+", "foo bar")],
1350
+ ['', 'foo', 'bar'])
1351
+ self.assertEqual(regex.findall(r"(?r)^|\w+", "foo bar"), ['bar',
1352
+ 'foo', ''])
1353
+ self.assertEqual([m[0] for m in regex.finditer(r"(?r)^|\w+",
1354
+ "foo bar")], ['bar', 'foo', ''])
1355
+ self.assertEqual(regex.findall(r"(?V1)^|\w+", "foo bar"), ['', 'foo',
1356
+ 'bar'])
1357
+ self.assertEqual([m[0] for m in regex.finditer(r"(?V1)^|\w+",
1358
+ "foo bar")], ['', 'foo', 'bar'])
1359
+ self.assertEqual(regex.findall(r"(?rV1)^|\w+", "foo bar"), ['bar',
1360
+ 'foo', ''])
1361
+ self.assertEqual([m[0] for m in regex.finditer(r"(?rV1)^|\w+",
1362
+ "foo bar")], ['bar', 'foo', ''])
1363
+
1364
+ if sys.version_info >= (3, 7, 0):
1365
+ self.assertEqual(regex.split("", "xaxbxc"), ['', 'x', 'a', 'x',
1366
+ 'b', 'x', 'c', ''])
1367
+ self.assertEqual([m for m in regex.splititer("", "xaxbxc")], ['',
1368
+ 'x', 'a', 'x', 'b', 'x', 'c', ''])
1369
+ else:
1370
+ self.assertEqual(regex.split("", "xaxbxc"), ['xaxbxc'])
1371
+ self.assertEqual([m for m in regex.splititer("", "xaxbxc")],
1372
+ ['xaxbxc'])
1373
+
1374
+ if sys.version_info >= (3, 7, 0):
1375
+ self.assertEqual(regex.split("(?r)", "xaxbxc"), ['', 'c', 'x', 'b',
1376
+ 'x', 'a', 'x', ''])
1377
+ self.assertEqual([m for m in regex.splititer("(?r)", "xaxbxc")],
1378
+ ['', 'c', 'x', 'b', 'x', 'a', 'x', ''])
1379
+ else:
1380
+ self.assertEqual(regex.split("(?r)", "xaxbxc"), ['xaxbxc'])
1381
+ self.assertEqual([m for m in regex.splititer("(?r)", "xaxbxc")],
1382
+ ['xaxbxc'])
1383
+
1384
+ self.assertEqual(regex.split("(?V1)", "xaxbxc"), ['', 'x', 'a', 'x',
1385
+ 'b', 'x', 'c', ''])
1386
+ self.assertEqual([m for m in regex.splititer("(?V1)", "xaxbxc")], ['',
1387
+ 'x', 'a', 'x', 'b', 'x', 'c', ''])
1388
+
1389
+ self.assertEqual(regex.split("(?rV1)", "xaxbxc"), ['', 'c', 'x', 'b',
1390
+ 'x', 'a', 'x', ''])
1391
+ self.assertEqual([m for m in regex.splititer("(?rV1)", "xaxbxc")], ['',
1392
+ 'c', 'x', 'b', 'x', 'a', 'x', ''])
1393
+
1394
+ def test_scoped_and_inline_flags(self):
1395
+ # Issues 433028, 433024, 433027.
1396
+ self.assertEqual(regex.search(r"(?i)Ab", "ab").span(), (0, 2))
1397
+ self.assertEqual(regex.search(r"(?i:A)b", "ab").span(), (0, 2))
1398
+ # Changed to positional flags in regex 2023.12.23.
1399
+ self.assertEqual(regex.search(r"A(?i)b", "ab"), None)
1400
+
1401
+ self.assertEqual(regex.search(r"(?V0)Ab", "ab"), None)
1402
+ self.assertEqual(regex.search(r"(?V1)Ab", "ab"), None)
1403
+ self.assertEqual(regex.search(r"(?-i)Ab", "ab", flags=regex.I), None)
1404
+ self.assertEqual(regex.search(r"(?-i:A)b", "ab", flags=regex.I), None)
1405
+ self.assertEqual(regex.search(r"A(?-i)b", "ab", flags=regex.I).span(),
1406
+ (0, 2))
1407
+
1408
+ def test_repeated_repeats(self):
1409
+ # Issue 2537.
1410
+ self.assertEqual(regex.search(r"(?:a+)+", "aaa").span(), (0, 3))
1411
+ self.assertEqual(regex.search(r"(?:(?:ab)+c)+", "abcabc").span(), (0,
1412
+ 6))
1413
+
1414
+ # Hg issue 286.
1415
+ self.assertEqual(regex.search(r"(?:a+){2,}", "aaa").span(), (0, 3))
1416
+
1417
+ def test_lookbehind(self):
1418
+ self.assertEqual(regex.search(r"123(?<=a\d+)", "a123").span(), (1, 4))
1419
+ self.assertEqual(regex.search(r"123(?<=a\d+)", "b123"), None)
1420
+ self.assertEqual(regex.search(r"123(?<!a\d+)", "a123"), None)
1421
+ self.assertEqual(regex.search(r"123(?<!a\d+)", "b123").span(), (1, 4))
1422
+
1423
+ self.assertEqual(bool(regex.match("(a)b(?<=b)(c)", "abc")), True)
1424
+ self.assertEqual(regex.match("(a)b(?<=c)(c)", "abc"), None)
1425
+ self.assertEqual(bool(regex.match("(a)b(?=c)(c)", "abc")), True)
1426
+ self.assertEqual(regex.match("(a)b(?=b)(c)", "abc"), None)
1427
+
1428
+ self.assertEqual(regex.match("(?:(a)|(x))b(?<=(?(2)x|c))c", "abc"),
1429
+ None)
1430
+ self.assertEqual(regex.match("(?:(a)|(x))b(?<=(?(2)b|x))c", "abc"),
1431
+ None)
1432
+ self.assertEqual(bool(regex.match("(?:(a)|(x))b(?<=(?(2)x|b))c",
1433
+ "abc")), True)
1434
+ self.assertEqual(regex.match("(?:(a)|(x))b(?<=(?(1)c|x))c", "abc"),
1435
+ None)
1436
+ self.assertEqual(bool(regex.match("(?:(a)|(x))b(?<=(?(1)b|x))c",
1437
+ "abc")), True)
1438
+
1439
+ self.assertEqual(bool(regex.match("(?:(a)|(x))b(?=(?(2)x|c))c",
1440
+ "abc")), True)
1441
+ self.assertEqual(regex.match("(?:(a)|(x))b(?=(?(2)c|x))c", "abc"),
1442
+ None)
1443
+ self.assertEqual(bool(regex.match("(?:(a)|(x))b(?=(?(2)x|c))c",
1444
+ "abc")), True)
1445
+ self.assertEqual(regex.match("(?:(a)|(x))b(?=(?(1)b|x))c", "abc"),
1446
+ None)
1447
+ self.assertEqual(bool(regex.match("(?:(a)|(x))b(?=(?(1)c|x))c",
1448
+ "abc")), True)
1449
+
1450
+ self.assertEqual(regex.match("(a)b(?<=(?(2)x|c))(c)", "abc"), None)
1451
+ self.assertEqual(regex.match("(a)b(?<=(?(2)b|x))(c)", "abc"), None)
1452
+ self.assertEqual(regex.match("(a)b(?<=(?(1)c|x))(c)", "abc"), None)
1453
+ self.assertEqual(bool(regex.match("(a)b(?<=(?(1)b|x))(c)", "abc")),
1454
+ True)
1455
+
1456
+ self.assertEqual(bool(regex.match("(a)b(?=(?(2)x|c))(c)", "abc")),
1457
+ True)
1458
+ self.assertEqual(regex.match("(a)b(?=(?(2)b|x))(c)", "abc"), None)
1459
+ self.assertEqual(bool(regex.match("(a)b(?=(?(1)c|x))(c)", "abc")),
1460
+ True)
1461
+
1462
+ self.assertEqual(repr(type(regex.compile(r"(a)\2(b)"))),
1463
+ self.PATTERN_CLASS)
1464
+
1465
+ def test_unmatched_in_sub(self):
1466
+ # Issue 1519638.
1467
+
1468
+ if sys.version_info >= (3, 7, 0):
1469
+ self.assertEqual(regex.sub(r"(?V0)(x)?(y)?", r"\2-\1", "xy"),
1470
+ 'y-x-')
1471
+ else:
1472
+ self.assertEqual(regex.sub(r"(?V0)(x)?(y)?", r"\2-\1", "xy"),
1473
+ 'y-x')
1474
+ self.assertEqual(regex.sub(r"(?V1)(x)?(y)?", r"\2-\1", "xy"), 'y-x-')
1475
+ if sys.version_info >= (3, 7, 0):
1476
+ self.assertEqual(regex.sub(r"(?V0)(x)?(y)?", r"\2-\1", "x"), '-x-')
1477
+ else:
1478
+ self.assertEqual(regex.sub(r"(?V0)(x)?(y)?", r"\2-\1", "x"), '-x')
1479
+ self.assertEqual(regex.sub(r"(?V1)(x)?(y)?", r"\2-\1", "x"), '-x-')
1480
+ if sys.version_info >= (3, 7, 0):
1481
+ self.assertEqual(regex.sub(r"(?V0)(x)?(y)?", r"\2-\1", "y"), 'y--')
1482
+ else:
1483
+ self.assertEqual(regex.sub(r"(?V0)(x)?(y)?", r"\2-\1", "y"), 'y-')
1484
+ self.assertEqual(regex.sub(r"(?V1)(x)?(y)?", r"\2-\1", "y"), 'y--')
1485
+
1486
+ def test_bug_10328 (self):
1487
+ # Issue 10328.
1488
+ pat = regex.compile(r'(?mV0)(?P<trailing_ws>[ \t]+\r*$)|(?P<no_final_newline>(?<=[^\n])\Z)')
1489
+ if sys.version_info >= (3, 7, 0):
1490
+ self.assertEqual(pat.subn(lambda m: '<' + m.lastgroup + '>',
1491
+ 'foobar '), ('foobar<trailing_ws><no_final_newline>', 2))
1492
+ else:
1493
+ self.assertEqual(pat.subn(lambda m: '<' + m.lastgroup + '>',
1494
+ 'foobar '), ('foobar<trailing_ws>', 1))
1495
+ self.assertEqual([m.group() for m in pat.finditer('foobar ')], [' ',
1496
+ ''])
1497
+ pat = regex.compile(r'(?mV1)(?P<trailing_ws>[ \t]+\r*$)|(?P<no_final_newline>(?<=[^\n])\Z)')
1498
+ self.assertEqual(pat.subn(lambda m: '<' + m.lastgroup + '>',
1499
+ 'foobar '), ('foobar<trailing_ws><no_final_newline>', 2))
1500
+ self.assertEqual([m.group() for m in pat.finditer('foobar ')], [' ',
1501
+ ''])
1502
+
1503
+ def test_overlapped(self):
1504
+ self.assertEqual(regex.findall(r"..", "abcde"), ['ab', 'cd'])
1505
+ self.assertEqual(regex.findall(r"..", "abcde", overlapped=True), ['ab',
1506
+ 'bc', 'cd', 'de'])
1507
+ self.assertEqual(regex.findall(r"(?r)..", "abcde"), ['de', 'bc'])
1508
+ self.assertEqual(regex.findall(r"(?r)..", "abcde", overlapped=True),
1509
+ ['de', 'cd', 'bc', 'ab'])
1510
+ self.assertEqual(regex.findall(r"(.)(-)(.)", "a-b-c", overlapped=True),
1511
+ [("a", "-", "b"), ("b", "-", "c")])
1512
+
1513
+ self.assertEqual([m[0] for m in regex.finditer(r"..", "abcde")], ['ab',
1514
+ 'cd'])
1515
+ self.assertEqual([m[0] for m in regex.finditer(r"..", "abcde",
1516
+ overlapped=True)], ['ab', 'bc', 'cd', 'de'])
1517
+ self.assertEqual([m[0] for m in regex.finditer(r"(?r)..", "abcde")],
1518
+ ['de', 'bc'])
1519
+ self.assertEqual([m[0] for m in regex.finditer(r"(?r)..", "abcde",
1520
+ overlapped=True)], ['de', 'cd', 'bc', 'ab'])
1521
+
1522
+ self.assertEqual([m.groups() for m in regex.finditer(r"(.)(-)(.)",
1523
+ "a-b-c", overlapped=True)], [("a", "-", "b"), ("b", "-", "c")])
1524
+ self.assertEqual([m.groups() for m in regex.finditer(r"(?r)(.)(-)(.)",
1525
+ "a-b-c", overlapped=True)], [("b", "-", "c"), ("a", "-", "b")])
1526
+
1527
+ def test_splititer(self):
1528
+ self.assertEqual(regex.split(r",", "a,b,,c,"), ['a', 'b', '', 'c', ''])
1529
+ self.assertEqual([m for m in regex.splititer(r",", "a,b,,c,")], ['a',
1530
+ 'b', '', 'c', ''])
1531
+
1532
+ def test_grapheme(self):
1533
+ self.assertEqual(regex.match(r"\X", "\xE0").span(), (0, 1))
1534
+ self.assertEqual(regex.match(r"\X", "a\u0300").span(), (0, 2))
1535
+
1536
+ self.assertEqual(regex.findall(r"\X",
1537
+ "a\xE0a\u0300e\xE9e\u0301"), ['a', '\xe0', 'a\u0300', 'e',
1538
+ '\xe9', 'e\u0301'])
1539
+ self.assertEqual(regex.findall(r"\X{3}",
1540
+ "a\xE0a\u0300e\xE9e\u0301"), ['a\xe0a\u0300', 'e\xe9e\u0301'])
1541
+ self.assertEqual(regex.findall(r"\X", "\r\r\n\u0301A\u0301"),
1542
+ ['\r', '\r\n', '\u0301', 'A\u0301'])
1543
+
1544
+ def test_word_boundary(self):
1545
+ text = 'The quick ("brown") fox can\'t jump 32.3 feet, right?'
1546
+ self.assertEqual(regex.split(r'(?V1)\b', text), ['', 'The', ' ',
1547
+ 'quick', ' ("', 'brown', '") ', 'fox', ' ', 'can', "'", 't',
1548
+ ' ', 'jump', ' ', '32', '.', '3', ' ', 'feet', ', ',
1549
+ 'right', '?'])
1550
+ self.assertEqual(regex.split(r'(?V1w)\b', text), ['', 'The', ' ',
1551
+ 'quick', ' ', '(', '"', 'brown', '"', ')', ' ', 'fox', ' ',
1552
+ "can't", ' ', 'jump', ' ', '32.3', ' ', 'feet', ',', ' ',
1553
+ 'right', '?', ''])
1554
+
1555
+ text = "The fox"
1556
+ self.assertEqual(regex.split(r'(?V1)\b', text), ['', 'The', ' ',
1557
+ 'fox', ''])
1558
+ self.assertEqual(regex.split(r'(?V1w)\b', text), ['', 'The', ' ',
1559
+ 'fox', ''])
1560
+
1561
+ text = "can't aujourd'hui l'objectif"
1562
+ self.assertEqual(regex.split(r'(?V1)\b', text), ['', 'can', "'",
1563
+ 't', ' ', 'aujourd', "'", 'hui', ' ', 'l', "'", 'objectif',
1564
+ ''])
1565
+ self.assertEqual(regex.split(r'(?V1w)\b', text), ['', "can't", ' ',
1566
+ "aujourd'hui", ' ', "l'objectif", ''])
1567
+
1568
+ def test_line_boundary(self):
1569
+ self.assertEqual(regex.findall(r".+", "Line 1\nLine 2\n"), ["Line 1",
1570
+ "Line 2"])
1571
+ self.assertEqual(regex.findall(r".+", "Line 1\rLine 2\r"),
1572
+ ["Line 1\rLine 2\r"])
1573
+ self.assertEqual(regex.findall(r".+", "Line 1\r\nLine 2\r\n"),
1574
+ ["Line 1\r", "Line 2\r"])
1575
+ self.assertEqual(regex.findall(r"(?w).+", "Line 1\nLine 2\n"),
1576
+ ["Line 1", "Line 2"])
1577
+ self.assertEqual(regex.findall(r"(?w).+", "Line 1\rLine 2\r"),
1578
+ ["Line 1", "Line 2"])
1579
+ self.assertEqual(regex.findall(r"(?w).+", "Line 1\r\nLine 2\r\n"),
1580
+ ["Line 1", "Line 2"])
1581
+
1582
+ self.assertEqual(regex.search(r"^abc", "abc").start(), 0)
1583
+ self.assertEqual(regex.search(r"^abc", "\nabc"), None)
1584
+ self.assertEqual(regex.search(r"^abc", "\rabc"), None)
1585
+ self.assertEqual(regex.search(r"(?w)^abc", "abc").start(), 0)
1586
+ self.assertEqual(regex.search(r"(?w)^abc", "\nabc"), None)
1587
+ self.assertEqual(regex.search(r"(?w)^abc", "\rabc"), None)
1588
+
1589
+ self.assertEqual(regex.search(r"abc$", "abc").start(), 0)
1590
+ self.assertEqual(regex.search(r"abc$", "abc\n").start(), 0)
1591
+ self.assertEqual(regex.search(r"abc$", "abc\r"), None)
1592
+ self.assertEqual(regex.search(r"(?w)abc$", "abc").start(), 0)
1593
+ self.assertEqual(regex.search(r"(?w)abc$", "abc\n").start(), 0)
1594
+ self.assertEqual(regex.search(r"(?w)abc$", "abc\r").start(), 0)
1595
+
1596
+ self.assertEqual(regex.search(r"(?m)^abc", "abc").start(), 0)
1597
+ self.assertEqual(regex.search(r"(?m)^abc", "\nabc").start(), 1)
1598
+ self.assertEqual(regex.search(r"(?m)^abc", "\rabc"), None)
1599
+ self.assertEqual(regex.search(r"(?mw)^abc", "abc").start(), 0)
1600
+ self.assertEqual(regex.search(r"(?mw)^abc", "\nabc").start(), 1)
1601
+ self.assertEqual(regex.search(r"(?mw)^abc", "\rabc").start(), 1)
1602
+
1603
+ self.assertEqual(regex.search(r"(?m)abc$", "abc").start(), 0)
1604
+ self.assertEqual(regex.search(r"(?m)abc$", "abc\n").start(), 0)
1605
+ self.assertEqual(regex.search(r"(?m)abc$", "abc\r"), None)
1606
+ self.assertEqual(regex.search(r"(?mw)abc$", "abc").start(), 0)
1607
+ self.assertEqual(regex.search(r"(?mw)abc$", "abc\n").start(), 0)
1608
+ self.assertEqual(regex.search(r"(?mw)abc$", "abc\r").start(), 0)
1609
+
1610
+ def test_branch_reset(self):
1611
+ self.assertEqual(regex.match(r"(?:(a)|(b))(c)", "ac").groups(), ('a',
1612
+ None, 'c'))
1613
+ self.assertEqual(regex.match(r"(?:(a)|(b))(c)", "bc").groups(), (None,
1614
+ 'b', 'c'))
1615
+ self.assertEqual(regex.match(r"(?:(?<a>a)|(?<b>b))(?<c>c)",
1616
+ "ac").groups(), ('a', None, 'c'))
1617
+ self.assertEqual(regex.match(r"(?:(?<a>a)|(?<b>b))(?<c>c)",
1618
+ "bc").groups(), (None, 'b', 'c'))
1619
+
1620
+ self.assertEqual(regex.match(r"(?<a>a)(?:(?<b>b)|(?<c>c))(?<d>d)",
1621
+ "abd").groups(), ('a', 'b', None, 'd'))
1622
+ self.assertEqual(regex.match(r"(?<a>a)(?:(?<b>b)|(?<c>c))(?<d>d)",
1623
+ "acd").groups(), ('a', None, 'c', 'd'))
1624
+ self.assertEqual(regex.match(r"(a)(?:(b)|(c))(d)", "abd").groups(),
1625
+ ('a', 'b', None, 'd'))
1626
+
1627
+ self.assertEqual(regex.match(r"(a)(?:(b)|(c))(d)", "acd").groups(),
1628
+ ('a', None, 'c', 'd'))
1629
+ self.assertEqual(regex.match(r"(a)(?|(b)|(b))(d)", "abd").groups(),
1630
+ ('a', 'b', 'd'))
1631
+ self.assertEqual(regex.match(r"(?|(?<a>a)|(?<b>b))(c)", "ac").groups(),
1632
+ ('a', None, 'c'))
1633
+ self.assertEqual(regex.match(r"(?|(?<a>a)|(?<b>b))(c)", "bc").groups(),
1634
+ (None, 'b', 'c'))
1635
+ self.assertEqual(regex.match(r"(?|(?<a>a)|(?<a>b))(c)", "ac").groups(),
1636
+ ('a', 'c'))
1637
+
1638
+ self.assertEqual(regex.match(r"(?|(?<a>a)|(?<a>b))(c)", "bc").groups(),
1639
+ ('b', 'c'))
1640
+
1641
+ self.assertEqual(regex.match(r"(?|(?<a>a)(?<b>b)|(?<b>c)(?<a>d))(e)",
1642
+ "abe").groups(), ('a', 'b', 'e'))
1643
+ self.assertEqual(regex.match(r"(?|(?<a>a)(?<b>b)|(?<b>c)(?<a>d))(e)",
1644
+ "cde").groups(), ('d', 'c', 'e'))
1645
+ self.assertEqual(regex.match(r"(?|(?<a>a)(?<b>b)|(?<b>c)(d))(e)",
1646
+ "abe").groups(), ('a', 'b', 'e'))
1647
+ self.assertEqual(regex.match(r"(?|(?<a>a)(?<b>b)|(?<b>c)(d))(e)",
1648
+ "cde").groups(), ('d', 'c', 'e'))
1649
+ self.assertEqual(regex.match(r"(?|(?<a>a)(?<b>b)|(c)(d))(e)",
1650
+ "abe").groups(), ('a', 'b', 'e'))
1651
+ self.assertEqual(regex.match(r"(?|(?<a>a)(?<b>b)|(c)(d))(e)",
1652
+ "cde").groups(), ('c', 'd', 'e'))
1653
+
1654
+ # Hg issue 87: Allow duplicate names of groups
1655
+ self.assertEqual(regex.match(r"(?|(?<a>a)(?<b>b)|(c)(?<a>d))(e)",
1656
+ "abe").groups(), ("a", "b", "e"))
1657
+ self.assertEqual(regex.match(r"(?|(?<a>a)(?<b>b)|(c)(?<a>d))(e)",
1658
+ "abe").capturesdict(), {"a": ["a"], "b": ["b"]})
1659
+ self.assertEqual(regex.match(r"(?|(?<a>a)(?<b>b)|(c)(?<a>d))(e)",
1660
+ "cde").groups(), ("d", None, "e"))
1661
+ self.assertEqual(regex.match(r"(?|(?<a>a)(?<b>b)|(c)(?<a>d))(e)",
1662
+ "cde").capturesdict(), {"a": ["c", "d"], "b": []})
1663
+
1664
+ def test_set(self):
1665
+ self.assertEqual(regex.match(r"[a]", "a").span(), (0, 1))
1666
+ self.assertEqual(regex.match(r"(?i)[a]", "A").span(), (0, 1))
1667
+ self.assertEqual(regex.match(r"[a-b]", r"a").span(), (0, 1))
1668
+ self.assertEqual(regex.match(r"(?i)[a-b]", r"A").span(), (0, 1))
1669
+
1670
+ self.assertEqual(regex.sub(r"(?V0)([][])", r"-", "a[b]c"), "a-b-c")
1671
+
1672
+ self.assertEqual(regex.findall(r"[\p{Alpha}]", "a0"), ["a"])
1673
+ self.assertEqual(regex.findall(r"(?i)[\p{Alpha}]", "A0"), ["A"])
1674
+
1675
+ self.assertEqual(regex.findall(r"[a\p{Alpha}]", "ab0"), ["a", "b"])
1676
+ self.assertEqual(regex.findall(r"[a\P{Alpha}]", "ab0"), ["a", "0"])
1677
+ self.assertEqual(regex.findall(r"(?i)[a\p{Alpha}]", "ab0"), ["a",
1678
+ "b"])
1679
+ self.assertEqual(regex.findall(r"(?i)[a\P{Alpha}]", "ab0"), ["a",
1680
+ "0"])
1681
+
1682
+ self.assertEqual(regex.findall(r"[a-b\p{Alpha}]", "abC0"), ["a",
1683
+ "b", "C"])
1684
+ self.assertEqual(regex.findall(r"(?i)[a-b\p{Alpha}]", "AbC0"), ["A",
1685
+ "b", "C"])
1686
+
1687
+ self.assertEqual(regex.findall(r"[\p{Alpha}]", "a0"), ["a"])
1688
+ self.assertEqual(regex.findall(r"[\P{Alpha}]", "a0"), ["0"])
1689
+ self.assertEqual(regex.findall(r"[^\p{Alpha}]", "a0"), ["0"])
1690
+ self.assertEqual(regex.findall(r"[^\P{Alpha}]", "a0"), ["a"])
1691
+
1692
+ self.assertEqual("".join(regex.findall(r"[^\d-h]", "a^b12c-h")),
1693
+ 'a^bc')
1694
+ self.assertEqual("".join(regex.findall(r"[^\dh]", "a^b12c-h")),
1695
+ 'a^bc-')
1696
+ self.assertEqual("".join(regex.findall(r"[^h\s\db]", "a^b 12c-h")),
1697
+ 'a^c-')
1698
+ self.assertEqual("".join(regex.findall(r"[^b\w]", "a b")), ' ')
1699
+ self.assertEqual("".join(regex.findall(r"[^b\S]", "a b")), ' ')
1700
+ self.assertEqual("".join(regex.findall(r"[^8\d]", "a 1b2")), 'a b')
1701
+
1702
+ all_chars = "".join(chr(c) for c in range(0x100))
1703
+ self.assertEqual(len(regex.findall(r"\p{ASCII}", all_chars)), 128)
1704
+ self.assertEqual(len(regex.findall(r"\p{Letter}", all_chars)),
1705
+ 117)
1706
+ self.assertEqual(len(regex.findall(r"\p{Digit}", all_chars)), 10)
1707
+
1708
+ # Set operators
1709
+ self.assertEqual(len(regex.findall(r"(?V1)[\p{ASCII}&&\p{Letter}]",
1710
+ all_chars)), 52)
1711
+ self.assertEqual(len(regex.findall(r"(?V1)[\p{ASCII}&&\p{Alnum}&&\p{Letter}]",
1712
+ all_chars)), 52)
1713
+ self.assertEqual(len(regex.findall(r"(?V1)[\p{ASCII}&&\p{Alnum}&&\p{Digit}]",
1714
+ all_chars)), 10)
1715
+ self.assertEqual(len(regex.findall(r"(?V1)[\p{ASCII}&&\p{Cc}]",
1716
+ all_chars)), 33)
1717
+ self.assertEqual(len(regex.findall(r"(?V1)[\p{ASCII}&&\p{Graph}]",
1718
+ all_chars)), 94)
1719
+ self.assertEqual(len(regex.findall(r"(?V1)[\p{ASCII}--\p{Cc}]",
1720
+ all_chars)), 95)
1721
+ self.assertEqual(len(regex.findall(r"[\p{Letter}\p{Digit}]",
1722
+ all_chars)), 127)
1723
+ self.assertEqual(len(regex.findall(r"(?V1)[\p{Letter}||\p{Digit}]",
1724
+ all_chars)), 127)
1725
+ self.assertEqual(len(regex.findall(r"\p{HexDigit}", all_chars)),
1726
+ 22)
1727
+ self.assertEqual(len(regex.findall(r"(?V1)[\p{HexDigit}~~\p{Digit}]",
1728
+ all_chars)), 12)
1729
+ self.assertEqual(len(regex.findall(r"(?V1)[\p{Digit}~~\p{HexDigit}]",
1730
+ all_chars)), 12)
1731
+
1732
+ self.assertEqual(repr(type(regex.compile(r"(?V0)([][-])"))),
1733
+ self.PATTERN_CLASS)
1734
+ self.assertEqual(regex.findall(r"(?V1)[[a-z]--[aei]]", "abc"), ["b",
1735
+ "c"])
1736
+ self.assertEqual(regex.findall(r"(?iV1)[[a-z]--[aei]]", "abc"), ["b",
1737
+ "c"])
1738
+ self.assertEqual(regex.findall(r"(?V1)[\w--a]","abc"), ["b", "c"])
1739
+ self.assertEqual(regex.findall(r"(?iV1)[\w--a]","abc"), ["b", "c"])
1740
+
1741
+ def test_various(self):
1742
+ tests = [
1743
+ # Test ?P< and ?P= extensions.
1744
+ ('(?P<foo_123', '', '', regex.error, self.MISSING_GT), # Unterminated group identifier.
1745
+ ('(?P<1>a)', '', '', regex.error, self.BAD_GROUP_NAME), # Begins with a digit.
1746
+ ('(?P<!>a)', '', '', regex.error, self.BAD_GROUP_NAME), # Begins with an illegal char.
1747
+ ('(?P<foo!>a)', '', '', regex.error, self.BAD_GROUP_NAME), # Begins with an illegal char.
1748
+
1749
+ # Same tests, for the ?P= form.
1750
+ ('(?P<foo_123>a)(?P=foo_123', 'aa', '', regex.error,
1751
+ self.MISSING_RPAREN),
1752
+ ('(?P<foo_123>a)(?P=1)', 'aa', '1', ascii('a')),
1753
+ ('(?P<foo_123>a)(?P=0)', 'aa', '', regex.error,
1754
+ self.BAD_GROUP_NAME),
1755
+ ('(?P<foo_123>a)(?P=-1)', 'aa', '', regex.error,
1756
+ self.BAD_GROUP_NAME),
1757
+ ('(?P<foo_123>a)(?P=!)', 'aa', '', regex.error,
1758
+ self.BAD_GROUP_NAME),
1759
+ ('(?P<foo_123>a)(?P=foo_124)', 'aa', '', regex.error,
1760
+ self.UNKNOWN_GROUP), # Backref to undefined group.
1761
+
1762
+ ('(?P<foo_123>a)', 'a', '1', ascii('a')),
1763
+ ('(?P<foo_123>a)(?P=foo_123)', 'aa', '1', ascii('a')),
1764
+
1765
+ # Mal-formed \g in pattern treated as literal for compatibility.
1766
+ (r'(?<foo_123>a)\g<foo_123', 'aa', '', ascii(None)),
1767
+ (r'(?<foo_123>a)\g<1>', 'aa', '1', ascii('a')),
1768
+ (r'(?<foo_123>a)\g<!>', 'aa', '', ascii(None)),
1769
+ (r'(?<foo_123>a)\g<foo_124>', 'aa', '', regex.error,
1770
+ self.UNKNOWN_GROUP), # Backref to undefined group.
1771
+
1772
+ ('(?<foo_123>a)', 'a', '1', ascii('a')),
1773
+ (r'(?<foo_123>a)\g<foo_123>', 'aa', '1', ascii('a')),
1774
+
1775
+ # Test octal escapes.
1776
+ ('\\1', 'a', '', regex.error, self.INVALID_GROUP_REF), # Backreference.
1777
+ ('[\\1]', '\1', '0', "'\\x01'"), # Character.
1778
+ ('\\09', chr(0) + '9', '0', ascii(chr(0) + '9')),
1779
+ ('\\141', 'a', '0', ascii('a')),
1780
+ ('(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)(k)(l)\\119', 'abcdefghijklk9',
1781
+ '0,11', ascii(('abcdefghijklk9', 'k'))),
1782
+
1783
+ # Test \0 is handled everywhere.
1784
+ (r'\0', '\0', '0', ascii('\0')),
1785
+ (r'[\0a]', '\0', '0', ascii('\0')),
1786
+ (r'[a\0]', '\0', '0', ascii('\0')),
1787
+ (r'[^a\0]', '\0', '', ascii(None)),
1788
+
1789
+ # Test various letter escapes.
1790
+ (r'\a[\b]\f\n\r\t\v', '\a\b\f\n\r\t\v', '0',
1791
+ ascii('\a\b\f\n\r\t\v')),
1792
+ (r'[\a][\b][\f][\n][\r][\t][\v]', '\a\b\f\n\r\t\v', '0',
1793
+ ascii('\a\b\f\n\r\t\v')),
1794
+ (r'\xff', '\377', '0', ascii(chr(255))),
1795
+
1796
+ # New \x semantics.
1797
+ (r'\x00ffffffffffffff', '\377', '', ascii(None)),
1798
+ (r'\x00f', '\017', '', ascii(None)),
1799
+ (r'\x00fe', '\376', '', ascii(None)),
1800
+
1801
+ (r'\x00ff', '\377', '', ascii(None)),
1802
+ (r'\t\n\v\r\f\a\g', '\t\n\v\r\f\ag', '0', ascii('\t\n\v\r\f\ag')),
1803
+ ('\t\n\v\r\f\a\\g', '\t\n\v\r\f\ag', '0', ascii('\t\n\v\r\f\ag')),
1804
+ (r'\t\n\v\r\f\a', '\t\n\v\r\f\a', '0', ascii(chr(9) + chr(10) +
1805
+ chr(11) + chr(13) + chr(12) + chr(7))),
1806
+ (r'[\t][\n][\v][\r][\f][\b]', '\t\n\v\r\f\b', '0',
1807
+ ascii('\t\n\v\r\f\b')),
1808
+
1809
+ (r"^\w+=(\\[\000-\277]|[^\n\\])*",
1810
+ "SRC=eval.c g.c blah blah blah \\\\\n\tapes.c", '0',
1811
+ ascii("SRC=eval.c g.c blah blah blah \\\\")),
1812
+
1813
+ # Test that . only matches \n in DOTALL mode.
1814
+ ('a.b', 'acb', '0', ascii('acb')),
1815
+ ('a.b', 'a\nb', '', ascii(None)),
1816
+ ('a.*b', 'acc\nccb', '', ascii(None)),
1817
+ ('a.{4,5}b', 'acc\nccb', '', ascii(None)),
1818
+ ('a.b', 'a\rb', '0', ascii('a\rb')),
1819
+ # Changed to positional flags in regex 2023.12.23.
1820
+ ('a.b(?s)', 'a\nb', '', ascii(None)),
1821
+ ('(?s)a.b', 'a\nb', '0', ascii('a\nb')),
1822
+ ('a.*(?s)b', 'acc\nccb', '', ascii(None)),
1823
+ ('(?s)a.*b', 'acc\nccb', '0', ascii('acc\nccb')),
1824
+ ('(?s)a.{4,5}b', 'acc\nccb', '0', ascii('acc\nccb')),
1825
+
1826
+ (')', '', '', regex.error, self.TRAILING_CHARS), # Unmatched right bracket.
1827
+ ('', '', '0', "''"), # Empty pattern.
1828
+ ('abc', 'abc', '0', ascii('abc')),
1829
+ ('abc', 'xbc', '', ascii(None)),
1830
+ ('abc', 'axc', '', ascii(None)),
1831
+ ('abc', 'abx', '', ascii(None)),
1832
+ ('abc', 'xabcy', '0', ascii('abc')),
1833
+ ('abc', 'ababc', '0', ascii('abc')),
1834
+ ('ab*c', 'abc', '0', ascii('abc')),
1835
+ ('ab*bc', 'abc', '0', ascii('abc')),
1836
+
1837
+ ('ab*bc', 'abbc', '0', ascii('abbc')),
1838
+ ('ab*bc', 'abbbbc', '0', ascii('abbbbc')),
1839
+ ('ab+bc', 'abbc', '0', ascii('abbc')),
1840
+ ('ab+bc', 'abc', '', ascii(None)),
1841
+ ('ab+bc', 'abq', '', ascii(None)),
1842
+ ('ab+bc', 'abbbbc', '0', ascii('abbbbc')),
1843
+ ('ab?bc', 'abbc', '0', ascii('abbc')),
1844
+ ('ab?bc', 'abc', '0', ascii('abc')),
1845
+ ('ab?bc', 'abbbbc', '', ascii(None)),
1846
+ ('ab?c', 'abc', '0', ascii('abc')),
1847
+
1848
+ ('^abc$', 'abc', '0', ascii('abc')),
1849
+ ('^abc$', 'abcc', '', ascii(None)),
1850
+ ('^abc', 'abcc', '0', ascii('abc')),
1851
+ ('^abc$', 'aabc', '', ascii(None)),
1852
+ ('abc$', 'aabc', '0', ascii('abc')),
1853
+ ('^', 'abc', '0', ascii('')),
1854
+ ('$', 'abc', '0', ascii('')),
1855
+ ('a.c', 'abc', '0', ascii('abc')),
1856
+ ('a.c', 'axc', '0', ascii('axc')),
1857
+ ('a.*c', 'axyzc', '0', ascii('axyzc')),
1858
+
1859
+ ('a.*c', 'axyzd', '', ascii(None)),
1860
+ ('a[bc]d', 'abc', '', ascii(None)),
1861
+ ('a[bc]d', 'abd', '0', ascii('abd')),
1862
+ ('a[b-d]e', 'abd', '', ascii(None)),
1863
+ ('a[b-d]e', 'ace', '0', ascii('ace')),
1864
+ ('a[b-d]', 'aac', '0', ascii('ac')),
1865
+ ('a[-b]', 'a-', '0', ascii('a-')),
1866
+ ('a[\\-b]', 'a-', '0', ascii('a-')),
1867
+ ('a[b-]', 'a-', '0', ascii('a-')),
1868
+ ('a[]b', '-', '', regex.error, self.BAD_SET),
1869
+
1870
+ ('a[', '-', '', regex.error, self.BAD_SET),
1871
+ ('a\\', '-', '', regex.error, self.BAD_ESCAPE),
1872
+ ('abc)', '-', '', regex.error, self.TRAILING_CHARS),
1873
+ ('(abc', '-', '', regex.error, self.MISSING_RPAREN),
1874
+ ('a]', 'a]', '0', ascii('a]')),
1875
+ ('a[]]b', 'a]b', '0', ascii('a]b')),
1876
+ ('a[]]b', 'a]b', '0', ascii('a]b')),
1877
+ ('a[^bc]d', 'aed', '0', ascii('aed')),
1878
+ ('a[^bc]d', 'abd', '', ascii(None)),
1879
+ ('a[^-b]c', 'adc', '0', ascii('adc')),
1880
+
1881
+ ('a[^-b]c', 'a-c', '', ascii(None)),
1882
+ ('a[^]b]c', 'a]c', '', ascii(None)),
1883
+ ('a[^]b]c', 'adc', '0', ascii('adc')),
1884
+ ('\\ba\\b', 'a-', '0', ascii('a')),
1885
+ ('\\ba\\b', '-a', '0', ascii('a')),
1886
+ ('\\ba\\b', '-a-', '0', ascii('a')),
1887
+ ('\\by\\b', 'xy', '', ascii(None)),
1888
+ ('\\by\\b', 'yz', '', ascii(None)),
1889
+ ('\\by\\b', 'xyz', '', ascii(None)),
1890
+ ('x\\b', 'xyz', '', ascii(None)),
1891
+
1892
+ ('x\\B', 'xyz', '0', ascii('x')),
1893
+ ('\\Bz', 'xyz', '0', ascii('z')),
1894
+ ('z\\B', 'xyz', '', ascii(None)),
1895
+ ('\\Bx', 'xyz', '', ascii(None)),
1896
+ ('\\Ba\\B', 'a-', '', ascii(None)),
1897
+ ('\\Ba\\B', '-a', '', ascii(None)),
1898
+ ('\\Ba\\B', '-a-', '', ascii(None)),
1899
+ ('\\By\\B', 'xy', '', ascii(None)),
1900
+ ('\\By\\B', 'yz', '', ascii(None)),
1901
+ ('\\By\\b', 'xy', '0', ascii('y')),
1902
+
1903
+ ('\\by\\B', 'yz', '0', ascii('y')),
1904
+ ('\\By\\B', 'xyz', '0', ascii('y')),
1905
+ ('ab|cd', 'abc', '0', ascii('ab')),
1906
+ ('ab|cd', 'abcd', '0', ascii('ab')),
1907
+ ('()ef', 'def', '0,1', ascii(('ef', ''))),
1908
+ ('$b', 'b', '', ascii(None)),
1909
+ ('a\\(b', 'a(b', '', ascii(('a(b',))),
1910
+ ('a\\(*b', 'ab', '0', ascii('ab')),
1911
+ ('a\\(*b', 'a((b', '0', ascii('a((b')),
1912
+ ('a\\\\b', 'a\\b', '0', ascii('a\\b')),
1913
+
1914
+ ('((a))', 'abc', '0,1,2', ascii(('a', 'a', 'a'))),
1915
+ ('(a)b(c)', 'abc', '0,1,2', ascii(('abc', 'a', 'c'))),
1916
+ ('a+b+c', 'aabbabc', '0', ascii('abc')),
1917
+ ('(a+|b)*', 'ab', '0,1', ascii(('ab', 'b'))),
1918
+ ('(a+|b)+', 'ab', '0,1', ascii(('ab', 'b'))),
1919
+ ('(a+|b)?', 'ab', '0,1', ascii(('a', 'a'))),
1920
+ (')(', '-', '', regex.error, self.TRAILING_CHARS),
1921
+ ('[^ab]*', 'cde', '0', ascii('cde')),
1922
+ ('abc', '', '', ascii(None)),
1923
+ ('a*', '', '0', ascii('')),
1924
+
1925
+ ('a|b|c|d|e', 'e', '0', ascii('e')),
1926
+ ('(a|b|c|d|e)f', 'ef', '0,1', ascii(('ef', 'e'))),
1927
+ ('abcd*efg', 'abcdefg', '0', ascii('abcdefg')),
1928
+ ('ab*', 'xabyabbbz', '0', ascii('ab')),
1929
+ ('ab*', 'xayabbbz', '0', ascii('a')),
1930
+ ('(ab|cd)e', 'abcde', '0,1', ascii(('cde', 'cd'))),
1931
+ ('[abhgefdc]ij', 'hij', '0', ascii('hij')),
1932
+ ('^(ab|cd)e', 'abcde', '', ascii(None)),
1933
+ ('(abc|)ef', 'abcdef', '0,1', ascii(('ef', ''))),
1934
+ ('(a|b)c*d', 'abcd', '0,1', ascii(('bcd', 'b'))),
1935
+
1936
+ ('(ab|ab*)bc', 'abc', '0,1', ascii(('abc', 'a'))),
1937
+ ('a([bc]*)c*', 'abc', '0,1', ascii(('abc', 'bc'))),
1938
+ ('a([bc]*)(c*d)', 'abcd', '0,1,2', ascii(('abcd', 'bc', 'd'))),
1939
+ ('a([bc]+)(c*d)', 'abcd', '0,1,2', ascii(('abcd', 'bc', 'd'))),
1940
+ ('a([bc]*)(c+d)', 'abcd', '0,1,2', ascii(('abcd', 'b', 'cd'))),
1941
+ ('a[bcd]*dcdcde', 'adcdcde', '0', ascii('adcdcde')),
1942
+ ('a[bcd]+dcdcde', 'adcdcde', '', ascii(None)),
1943
+ ('(ab|a)b*c', 'abc', '0,1', ascii(('abc', 'ab'))),
1944
+ ('((a)(b)c)(d)', 'abcd', '1,2,3,4', ascii(('abc', 'a', 'b', 'd'))),
1945
+ ('[a-zA-Z_][a-zA-Z0-9_]*', 'alpha', '0', ascii('alpha')),
1946
+
1947
+ ('^a(bc+|b[eh])g|.h$', 'abh', '0,1', ascii(('bh', None))),
1948
+ ('(bc+d$|ef*g.|h?i(j|k))', 'effgz', '0,1,2', ascii(('effgz',
1949
+ 'effgz', None))),
1950
+ ('(bc+d$|ef*g.|h?i(j|k))', 'ij', '0,1,2', ascii(('ij', 'ij',
1951
+ 'j'))),
1952
+ ('(bc+d$|ef*g.|h?i(j|k))', 'effg', '', ascii(None)),
1953
+ ('(bc+d$|ef*g.|h?i(j|k))', 'bcdd', '', ascii(None)),
1954
+ ('(bc+d$|ef*g.|h?i(j|k))', 'reffgz', '0,1,2', ascii(('effgz',
1955
+ 'effgz', None))),
1956
+ ('(((((((((a)))))))))', 'a', '0', ascii('a')),
1957
+ ('multiple words of text', 'uh-uh', '', ascii(None)),
1958
+ ('multiple words', 'multiple words, yeah', '0',
1959
+ ascii('multiple words')),
1960
+ ('(.*)c(.*)', 'abcde', '0,1,2', ascii(('abcde', 'ab', 'de'))),
1961
+
1962
+ ('\\((.*), (.*)\\)', '(a, b)', '2,1', ascii(('b', 'a'))),
1963
+ ('[k]', 'ab', '', ascii(None)),
1964
+ ('a[-]?c', 'ac', '0', ascii('ac')),
1965
+ ('(abc)\\1', 'abcabc', '1', ascii('abc')),
1966
+ ('([a-c]*)\\1', 'abcabc', '1', ascii('abc')),
1967
+ ('^(.+)?B', 'AB', '1', ascii('A')),
1968
+ ('(a+).\\1$', 'aaaaa', '0,1', ascii(('aaaaa', 'aa'))),
1969
+ ('^(a+).\\1$', 'aaaa', '', ascii(None)),
1970
+ ('(abc)\\1', 'abcabc', '0,1', ascii(('abcabc', 'abc'))),
1971
+ ('([a-c]+)\\1', 'abcabc', '0,1', ascii(('abcabc', 'abc'))),
1972
+
1973
+ ('(a)\\1', 'aa', '0,1', ascii(('aa', 'a'))),
1974
+ ('(a+)\\1', 'aa', '0,1', ascii(('aa', 'a'))),
1975
+ ('(a+)+\\1', 'aa', '0,1', ascii(('aa', 'a'))),
1976
+ ('(a).+\\1', 'aba', '0,1', ascii(('aba', 'a'))),
1977
+ ('(a)ba*\\1', 'aba', '0,1', ascii(('aba', 'a'))),
1978
+ ('(aa|a)a\\1$', 'aaa', '0,1', ascii(('aaa', 'a'))),
1979
+ ('(a|aa)a\\1$', 'aaa', '0,1', ascii(('aaa', 'a'))),
1980
+ ('(a+)a\\1$', 'aaa', '0,1', ascii(('aaa', 'a'))),
1981
+ ('([abc]*)\\1', 'abcabc', '0,1', ascii(('abcabc', 'abc'))),
1982
+ ('(a)(b)c|ab', 'ab', '0,1,2', ascii(('ab', None, None))),
1983
+
1984
+ ('(a)+x', 'aaax', '0,1', ascii(('aaax', 'a'))),
1985
+ ('([ac])+x', 'aacx', '0,1', ascii(('aacx', 'c'))),
1986
+ ('([^/]*/)*sub1/', 'd:msgs/tdir/sub1/trial/away.cpp', '0,1',
1987
+ ascii(('d:msgs/tdir/sub1/', 'tdir/'))),
1988
+ ('([^.]*)\\.([^:]*):[T ]+(.*)', 'track1.title:TBlah blah blah',
1989
+ '0,1,2,3', ascii(('track1.title:TBlah blah blah', 'track1',
1990
+ 'title', 'Blah blah blah'))),
1991
+ ('([^N]*N)+', 'abNNxyzN', '0,1', ascii(('abNNxyzN', 'xyzN'))),
1992
+ ('([^N]*N)+', 'abNNxyz', '0,1', ascii(('abNN', 'N'))),
1993
+ ('([abc]*)x', 'abcx', '0,1', ascii(('abcx', 'abc'))),
1994
+ ('([abc]*)x', 'abc', '', ascii(None)),
1995
+ ('([xyz]*)x', 'abcx', '0,1', ascii(('x', ''))),
1996
+ ('(a)+b|aac', 'aac', '0,1', ascii(('aac', None))),
1997
+
1998
+ # Test symbolic groups.
1999
+ ('(?P<i d>aaa)a', 'aaaa', '', regex.error, self.BAD_GROUP_NAME),
2000
+ ('(?P<id>aaa)a', 'aaaa', '0,id', ascii(('aaaa', 'aaa'))),
2001
+ ('(?P<id>aa)(?P=id)', 'aaaa', '0,id', ascii(('aaaa', 'aa'))),
2002
+ ('(?P<id>aa)(?P=xd)', 'aaaa', '', regex.error, self.UNKNOWN_GROUP),
2003
+
2004
+ # Character properties.
2005
+ (r"\g", "g", '0', ascii('g')),
2006
+ (r"\g<1>", "g", '', regex.error, self.INVALID_GROUP_REF),
2007
+ (r"(.)\g<1>", "gg", '0', ascii('gg')),
2008
+ (r"(.)\g<1>", "gg", '', ascii(('gg', 'g'))),
2009
+ (r"\N", "N", '0', ascii('N')),
2010
+ (r"\N{LATIN SMALL LETTER A}", "a", '0', ascii('a')),
2011
+ (r"\p", "p", '0', ascii('p')),
2012
+ (r"\p{Ll}", "a", '0', ascii('a')),
2013
+ (r"\P", "P", '0', ascii('P')),
2014
+ (r"\P{Lu}", "p", '0', ascii('p')),
2015
+
2016
+ # All tests from Perl.
2017
+ ('abc', 'abc', '0', ascii('abc')),
2018
+ ('abc', 'xbc', '', ascii(None)),
2019
+ ('abc', 'axc', '', ascii(None)),
2020
+ ('abc', 'abx', '', ascii(None)),
2021
+ ('abc', 'xabcy', '0', ascii('abc')),
2022
+ ('abc', 'ababc', '0', ascii('abc')),
2023
+
2024
+ ('ab*c', 'abc', '0', ascii('abc')),
2025
+ ('ab*bc', 'abc', '0', ascii('abc')),
2026
+ ('ab*bc', 'abbc', '0', ascii('abbc')),
2027
+ ('ab*bc', 'abbbbc', '0', ascii('abbbbc')),
2028
+ ('ab{0,}bc', 'abbbbc', '0', ascii('abbbbc')),
2029
+ ('ab+bc', 'abbc', '0', ascii('abbc')),
2030
+ ('ab+bc', 'abc', '', ascii(None)),
2031
+ ('ab+bc', 'abq', '', ascii(None)),
2032
+ ('ab{1,}bc', 'abq', '', ascii(None)),
2033
+ ('ab+bc', 'abbbbc', '0', ascii('abbbbc')),
2034
+
2035
+ ('ab{1,}bc', 'abbbbc', '0', ascii('abbbbc')),
2036
+ ('ab{1,3}bc', 'abbbbc', '0', ascii('abbbbc')),
2037
+ ('ab{3,4}bc', 'abbbbc', '0', ascii('abbbbc')),
2038
+ ('ab{4,5}bc', 'abbbbc', '', ascii(None)),
2039
+ ('ab?bc', 'abbc', '0', ascii('abbc')),
2040
+ ('ab?bc', 'abc', '0', ascii('abc')),
2041
+ ('ab{0,1}bc', 'abc', '0', ascii('abc')),
2042
+ ('ab?bc', 'abbbbc', '', ascii(None)),
2043
+ ('ab?c', 'abc', '0', ascii('abc')),
2044
+ ('ab{0,1}c', 'abc', '0', ascii('abc')),
2045
+
2046
+ ('^abc$', 'abc', '0', ascii('abc')),
2047
+ ('^abc$', 'abcc', '', ascii(None)),
2048
+ ('^abc', 'abcc', '0', ascii('abc')),
2049
+ ('^abc$', 'aabc', '', ascii(None)),
2050
+ ('abc$', 'aabc', '0', ascii('abc')),
2051
+ ('^', 'abc', '0', ascii('')),
2052
+ ('$', 'abc', '0', ascii('')),
2053
+ ('a.c', 'abc', '0', ascii('abc')),
2054
+ ('a.c', 'axc', '0', ascii('axc')),
2055
+ ('a.*c', 'axyzc', '0', ascii('axyzc')),
2056
+
2057
+ ('a.*c', 'axyzd', '', ascii(None)),
2058
+ ('a[bc]d', 'abc', '', ascii(None)),
2059
+ ('a[bc]d', 'abd', '0', ascii('abd')),
2060
+ ('a[b-d]e', 'abd', '', ascii(None)),
2061
+ ('a[b-d]e', 'ace', '0', ascii('ace')),
2062
+ ('a[b-d]', 'aac', '0', ascii('ac')),
2063
+ ('a[-b]', 'a-', '0', ascii('a-')),
2064
+ ('a[b-]', 'a-', '0', ascii('a-')),
2065
+ ('a[b-a]', '-', '', regex.error, self.BAD_CHAR_RANGE),
2066
+ ('a[]b', '-', '', regex.error, self.BAD_SET),
2067
+
2068
+ ('a[', '-', '', regex.error, self.BAD_SET),
2069
+ ('a]', 'a]', '0', ascii('a]')),
2070
+ ('a[]]b', 'a]b', '0', ascii('a]b')),
2071
+ ('a[^bc]d', 'aed', '0', ascii('aed')),
2072
+ ('a[^bc]d', 'abd', '', ascii(None)),
2073
+ ('a[^-b]c', 'adc', '0', ascii('adc')),
2074
+ ('a[^-b]c', 'a-c', '', ascii(None)),
2075
+ ('a[^]b]c', 'a]c', '', ascii(None)),
2076
+ ('a[^]b]c', 'adc', '0', ascii('adc')),
2077
+ ('ab|cd', 'abc', '0', ascii('ab')),
2078
+
2079
+ ('ab|cd', 'abcd', '0', ascii('ab')),
2080
+ ('()ef', 'def', '0,1', ascii(('ef', ''))),
2081
+ ('*a', '-', '', regex.error, self.NOTHING_TO_REPEAT),
2082
+ ('(*)b', '-', '', regex.error, self.NOTHING_TO_REPEAT),
2083
+ ('$b', 'b', '', ascii(None)),
2084
+ ('a\\', '-', '', regex.error, self.BAD_ESCAPE),
2085
+ ('a\\(b', 'a(b', '', ascii(('a(b',))),
2086
+ ('a\\(*b', 'ab', '0', ascii('ab')),
2087
+ ('a\\(*b', 'a((b', '0', ascii('a((b')),
2088
+ ('a\\\\b', 'a\\b', '0', ascii('a\\b')),
2089
+
2090
+ ('abc)', '-', '', regex.error, self.TRAILING_CHARS),
2091
+ ('(abc', '-', '', regex.error, self.MISSING_RPAREN),
2092
+ ('((a))', 'abc', '0,1,2', ascii(('a', 'a', 'a'))),
2093
+ ('(a)b(c)', 'abc', '0,1,2', ascii(('abc', 'a', 'c'))),
2094
+ ('a+b+c', 'aabbabc', '0', ascii('abc')),
2095
+ ('a{1,}b{1,}c', 'aabbabc', '0', ascii('abc')),
2096
+ ('a**', '-', '', regex.error, self.MULTIPLE_REPEAT),
2097
+ ('a.+?c', 'abcabc', '0', ascii('abc')),
2098
+ ('(a+|b)*', 'ab', '0,1', ascii(('ab', 'b'))),
2099
+ ('(a+|b){0,}', 'ab', '0,1', ascii(('ab', 'b'))),
2100
+
2101
+ ('(a+|b)+', 'ab', '0,1', ascii(('ab', 'b'))),
2102
+ ('(a+|b){1,}', 'ab', '0,1', ascii(('ab', 'b'))),
2103
+ ('(a+|b)?', 'ab', '0,1', ascii(('a', 'a'))),
2104
+ ('(a+|b){0,1}', 'ab', '0,1', ascii(('a', 'a'))),
2105
+ (')(', '-', '', regex.error, self.TRAILING_CHARS),
2106
+ ('[^ab]*', 'cde', '0', ascii('cde')),
2107
+ ('abc', '', '', ascii(None)),
2108
+ ('a*', '', '0', ascii('')),
2109
+ ('([abc])*d', 'abbbcd', '0,1', ascii(('abbbcd', 'c'))),
2110
+ ('([abc])*bcd', 'abcd', '0,1', ascii(('abcd', 'a'))),
2111
+
2112
+ ('a|b|c|d|e', 'e', '0', ascii('e')),
2113
+ ('(a|b|c|d|e)f', 'ef', '0,1', ascii(('ef', 'e'))),
2114
+ ('abcd*efg', 'abcdefg', '0', ascii('abcdefg')),
2115
+ ('ab*', 'xabyabbbz', '0', ascii('ab')),
2116
+ ('ab*', 'xayabbbz', '0', ascii('a')),
2117
+ ('(ab|cd)e', 'abcde', '0,1', ascii(('cde', 'cd'))),
2118
+ ('[abhgefdc]ij', 'hij', '0', ascii('hij')),
2119
+ ('^(ab|cd)e', 'abcde', '', ascii(None)),
2120
+ ('(abc|)ef', 'abcdef', '0,1', ascii(('ef', ''))),
2121
+ ('(a|b)c*d', 'abcd', '0,1', ascii(('bcd', 'b'))),
2122
+
2123
+ ('(ab|ab*)bc', 'abc', '0,1', ascii(('abc', 'a'))),
2124
+ ('a([bc]*)c*', 'abc', '0,1', ascii(('abc', 'bc'))),
2125
+ ('a([bc]*)(c*d)', 'abcd', '0,1,2', ascii(('abcd', 'bc', 'd'))),
2126
+ ('a([bc]+)(c*d)', 'abcd', '0,1,2', ascii(('abcd', 'bc', 'd'))),
2127
+ ('a([bc]*)(c+d)', 'abcd', '0,1,2', ascii(('abcd', 'b', 'cd'))),
2128
+ ('a[bcd]*dcdcde', 'adcdcde', '0', ascii('adcdcde')),
2129
+ ('a[bcd]+dcdcde', 'adcdcde', '', ascii(None)),
2130
+ ('(ab|a)b*c', 'abc', '0,1', ascii(('abc', 'ab'))),
2131
+ ('((a)(b)c)(d)', 'abcd', '1,2,3,4', ascii(('abc', 'a', 'b', 'd'))),
2132
+ ('[a-zA-Z_][a-zA-Z0-9_]*', 'alpha', '0', ascii('alpha')),
2133
+
2134
+ ('^a(bc+|b[eh])g|.h$', 'abh', '0,1', ascii(('bh', None))),
2135
+ ('(bc+d$|ef*g.|h?i(j|k))', 'effgz', '0,1,2', ascii(('effgz',
2136
+ 'effgz', None))),
2137
+ ('(bc+d$|ef*g.|h?i(j|k))', 'ij', '0,1,2', ascii(('ij', 'ij',
2138
+ 'j'))),
2139
+ ('(bc+d$|ef*g.|h?i(j|k))', 'effg', '', ascii(None)),
2140
+ ('(bc+d$|ef*g.|h?i(j|k))', 'bcdd', '', ascii(None)),
2141
+ ('(bc+d$|ef*g.|h?i(j|k))', 'reffgz', '0,1,2', ascii(('effgz',
2142
+ 'effgz', None))),
2143
+ ('((((((((((a))))))))))', 'a', '10', ascii('a')),
2144
+ ('((((((((((a))))))))))\\10', 'aa', '0', ascii('aa')),
2145
+
2146
+ # Python does not have the same rules for \\41 so this is a syntax error
2147
+ # ('((((((((((a))))))))))\\41', 'aa', '', ascii(None)),
2148
+ # ('((((((((((a))))))))))\\41', 'a!', '0', ascii('a!')),
2149
+ ('((((((((((a))))))))))\\41', '', '', regex.error,
2150
+ self.INVALID_GROUP_REF),
2151
+ ('(?i)((((((((((a))))))))))\\41', '', '', regex.error,
2152
+ self.INVALID_GROUP_REF),
2153
+
2154
+ ('(((((((((a)))))))))', 'a', '0', ascii('a')),
2155
+ ('multiple words of text', 'uh-uh', '', ascii(None)),
2156
+ ('multiple words', 'multiple words, yeah', '0',
2157
+ ascii('multiple words')),
2158
+ ('(.*)c(.*)', 'abcde', '0,1,2', ascii(('abcde', 'ab', 'de'))),
2159
+ ('\\((.*), (.*)\\)', '(a, b)', '2,1', ascii(('b', 'a'))),
2160
+ ('[k]', 'ab', '', ascii(None)),
2161
+ ('a[-]?c', 'ac', '0', ascii('ac')),
2162
+ ('(abc)\\1', 'abcabc', '1', ascii('abc')),
2163
+ ('([a-c]*)\\1', 'abcabc', '1', ascii('abc')),
2164
+ ('(?i)abc', 'ABC', '0', ascii('ABC')),
2165
+
2166
+ ('(?i)abc', 'XBC', '', ascii(None)),
2167
+ ('(?i)abc', 'AXC', '', ascii(None)),
2168
+ ('(?i)abc', 'ABX', '', ascii(None)),
2169
+ ('(?i)abc', 'XABCY', '0', ascii('ABC')),
2170
+ ('(?i)abc', 'ABABC', '0', ascii('ABC')),
2171
+ ('(?i)ab*c', 'ABC', '0', ascii('ABC')),
2172
+ ('(?i)ab*bc', 'ABC', '0', ascii('ABC')),
2173
+ ('(?i)ab*bc', 'ABBC', '0', ascii('ABBC')),
2174
+ ('(?i)ab*?bc', 'ABBBBC', '0', ascii('ABBBBC')),
2175
+ ('(?i)ab{0,}?bc', 'ABBBBC', '0', ascii('ABBBBC')),
2176
+
2177
+ ('(?i)ab+?bc', 'ABBC', '0', ascii('ABBC')),
2178
+ ('(?i)ab+bc', 'ABC', '', ascii(None)),
2179
+ ('(?i)ab+bc', 'ABQ', '', ascii(None)),
2180
+ ('(?i)ab{1,}bc', 'ABQ', '', ascii(None)),
2181
+ ('(?i)ab+bc', 'ABBBBC', '0', ascii('ABBBBC')),
2182
+ ('(?i)ab{1,}?bc', 'ABBBBC', '0', ascii('ABBBBC')),
2183
+ ('(?i)ab{1,3}?bc', 'ABBBBC', '0', ascii('ABBBBC')),
2184
+ ('(?i)ab{3,4}?bc', 'ABBBBC', '0', ascii('ABBBBC')),
2185
+ ('(?i)ab{4,5}?bc', 'ABBBBC', '', ascii(None)),
2186
+ ('(?i)ab??bc', 'ABBC', '0', ascii('ABBC')),
2187
+
2188
+ ('(?i)ab??bc', 'ABC', '0', ascii('ABC')),
2189
+ ('(?i)ab{0,1}?bc', 'ABC', '0', ascii('ABC')),
2190
+ ('(?i)ab??bc', 'ABBBBC', '', ascii(None)),
2191
+ ('(?i)ab??c', 'ABC', '0', ascii('ABC')),
2192
+ ('(?i)ab{0,1}?c', 'ABC', '0', ascii('ABC')),
2193
+ ('(?i)^abc$', 'ABC', '0', ascii('ABC')),
2194
+ ('(?i)^abc$', 'ABCC', '', ascii(None)),
2195
+ ('(?i)^abc', 'ABCC', '0', ascii('ABC')),
2196
+ ('(?i)^abc$', 'AABC', '', ascii(None)),
2197
+ ('(?i)abc$', 'AABC', '0', ascii('ABC')),
2198
+
2199
+ ('(?i)^', 'ABC', '0', ascii('')),
2200
+ ('(?i)$', 'ABC', '0', ascii('')),
2201
+ ('(?i)a.c', 'ABC', '0', ascii('ABC')),
2202
+ ('(?i)a.c', 'AXC', '0', ascii('AXC')),
2203
+ ('(?i)a.*?c', 'AXYZC', '0', ascii('AXYZC')),
2204
+ ('(?i)a.*c', 'AXYZD', '', ascii(None)),
2205
+ ('(?i)a[bc]d', 'ABC', '', ascii(None)),
2206
+ ('(?i)a[bc]d', 'ABD', '0', ascii('ABD')),
2207
+ ('(?i)a[b-d]e', 'ABD', '', ascii(None)),
2208
+ ('(?i)a[b-d]e', 'ACE', '0', ascii('ACE')),
2209
+
2210
+ ('(?i)a[b-d]', 'AAC', '0', ascii('AC')),
2211
+ ('(?i)a[-b]', 'A-', '0', ascii('A-')),
2212
+ ('(?i)a[b-]', 'A-', '0', ascii('A-')),
2213
+ ('(?i)a[b-a]', '-', '', regex.error, self.BAD_CHAR_RANGE),
2214
+ ('(?i)a[]b', '-', '', regex.error, self.BAD_SET),
2215
+ ('(?i)a[', '-', '', regex.error, self.BAD_SET),
2216
+ ('(?i)a]', 'A]', '0', ascii('A]')),
2217
+ ('(?i)a[]]b', 'A]B', '0', ascii('A]B')),
2218
+ ('(?i)a[^bc]d', 'AED', '0', ascii('AED')),
2219
+ ('(?i)a[^bc]d', 'ABD', '', ascii(None)),
2220
+
2221
+ ('(?i)a[^-b]c', 'ADC', '0', ascii('ADC')),
2222
+ ('(?i)a[^-b]c', 'A-C', '', ascii(None)),
2223
+ ('(?i)a[^]b]c', 'A]C', '', ascii(None)),
2224
+ ('(?i)a[^]b]c', 'ADC', '0', ascii('ADC')),
2225
+ ('(?i)ab|cd', 'ABC', '0', ascii('AB')),
2226
+ ('(?i)ab|cd', 'ABCD', '0', ascii('AB')),
2227
+ ('(?i)()ef', 'DEF', '0,1', ascii(('EF', ''))),
2228
+ ('(?i)*a', '-', '', regex.error, self.NOTHING_TO_REPEAT),
2229
+ ('(?i)(*)b', '-', '', regex.error, self.NOTHING_TO_REPEAT),
2230
+ ('(?i)$b', 'B', '', ascii(None)),
2231
+
2232
+ ('(?i)a\\', '-', '', regex.error, self.BAD_ESCAPE),
2233
+ ('(?i)a\\(b', 'A(B', '', ascii(('A(B',))),
2234
+ ('(?i)a\\(*b', 'AB', '0', ascii('AB')),
2235
+ ('(?i)a\\(*b', 'A((B', '0', ascii('A((B')),
2236
+ ('(?i)a\\\\b', 'A\\B', '0', ascii('A\\B')),
2237
+ ('(?i)abc)', '-', '', regex.error, self.TRAILING_CHARS),
2238
+ ('(?i)(abc', '-', '', regex.error, self.MISSING_RPAREN),
2239
+ ('(?i)((a))', 'ABC', '0,1,2', ascii(('A', 'A', 'A'))),
2240
+ ('(?i)(a)b(c)', 'ABC', '0,1,2', ascii(('ABC', 'A', 'C'))),
2241
+ ('(?i)a+b+c', 'AABBABC', '0', ascii('ABC')),
2242
+
2243
+ ('(?i)a{1,}b{1,}c', 'AABBABC', '0', ascii('ABC')),
2244
+ ('(?i)a**', '-', '', regex.error, self.MULTIPLE_REPEAT),
2245
+ ('(?i)a.+?c', 'ABCABC', '0', ascii('ABC')),
2246
+ ('(?i)a.*?c', 'ABCABC', '0', ascii('ABC')),
2247
+ ('(?i)a.{0,5}?c', 'ABCABC', '0', ascii('ABC')),
2248
+ ('(?i)(a+|b)*', 'AB', '0,1', ascii(('AB', 'B'))),
2249
+ ('(?i)(a+|b){0,}', 'AB', '0,1', ascii(('AB', 'B'))),
2250
+ ('(?i)(a+|b)+', 'AB', '0,1', ascii(('AB', 'B'))),
2251
+ ('(?i)(a+|b){1,}', 'AB', '0,1', ascii(('AB', 'B'))),
2252
+ ('(?i)(a+|b)?', 'AB', '0,1', ascii(('A', 'A'))),
2253
+
2254
+ ('(?i)(a+|b){0,1}', 'AB', '0,1', ascii(('A', 'A'))),
2255
+ ('(?i)(a+|b){0,1}?', 'AB', '0,1', ascii(('', None))),
2256
+ ('(?i))(', '-', '', regex.error, self.TRAILING_CHARS),
2257
+ ('(?i)[^ab]*', 'CDE', '0', ascii('CDE')),
2258
+ ('(?i)abc', '', '', ascii(None)),
2259
+ ('(?i)a*', '', '0', ascii('')),
2260
+ ('(?i)([abc])*d', 'ABBBCD', '0,1', ascii(('ABBBCD', 'C'))),
2261
+ ('(?i)([abc])*bcd', 'ABCD', '0,1', ascii(('ABCD', 'A'))),
2262
+ ('(?i)a|b|c|d|e', 'E', '0', ascii('E')),
2263
+ ('(?i)(a|b|c|d|e)f', 'EF', '0,1', ascii(('EF', 'E'))),
2264
+
2265
+ ('(?i)abcd*efg', 'ABCDEFG', '0', ascii('ABCDEFG')),
2266
+ ('(?i)ab*', 'XABYABBBZ', '0', ascii('AB')),
2267
+ ('(?i)ab*', 'XAYABBBZ', '0', ascii('A')),
2268
+ ('(?i)(ab|cd)e', 'ABCDE', '0,1', ascii(('CDE', 'CD'))),
2269
+ ('(?i)[abhgefdc]ij', 'HIJ', '0', ascii('HIJ')),
2270
+ ('(?i)^(ab|cd)e', 'ABCDE', '', ascii(None)),
2271
+ ('(?i)(abc|)ef', 'ABCDEF', '0,1', ascii(('EF', ''))),
2272
+ ('(?i)(a|b)c*d', 'ABCD', '0,1', ascii(('BCD', 'B'))),
2273
+ ('(?i)(ab|ab*)bc', 'ABC', '0,1', ascii(('ABC', 'A'))),
2274
+ ('(?i)a([bc]*)c*', 'ABC', '0,1', ascii(('ABC', 'BC'))),
2275
+
2276
+ ('(?i)a([bc]*)(c*d)', 'ABCD', '0,1,2', ascii(('ABCD', 'BC', 'D'))),
2277
+ ('(?i)a([bc]+)(c*d)', 'ABCD', '0,1,2', ascii(('ABCD', 'BC', 'D'))),
2278
+ ('(?i)a([bc]*)(c+d)', 'ABCD', '0,1,2', ascii(('ABCD', 'B', 'CD'))),
2279
+ ('(?i)a[bcd]*dcdcde', 'ADCDCDE', '0', ascii('ADCDCDE')),
2280
+ ('(?i)a[bcd]+dcdcde', 'ADCDCDE', '', ascii(None)),
2281
+ ('(?i)(ab|a)b*c', 'ABC', '0,1', ascii(('ABC', 'AB'))),
2282
+ ('(?i)((a)(b)c)(d)', 'ABCD', '1,2,3,4', ascii(('ABC', 'A', 'B',
2283
+ 'D'))),
2284
+ ('(?i)[a-zA-Z_][a-zA-Z0-9_]*', 'ALPHA', '0', ascii('ALPHA')),
2285
+ ('(?i)^a(bc+|b[eh])g|.h$', 'ABH', '0,1', ascii(('BH', None))),
2286
+ ('(?i)(bc+d$|ef*g.|h?i(j|k))', 'EFFGZ', '0,1,2', ascii(('EFFGZ',
2287
+ 'EFFGZ', None))),
2288
+
2289
+ ('(?i)(bc+d$|ef*g.|h?i(j|k))', 'IJ', '0,1,2', ascii(('IJ', 'IJ',
2290
+ 'J'))),
2291
+ ('(?i)(bc+d$|ef*g.|h?i(j|k))', 'EFFG', '', ascii(None)),
2292
+ ('(?i)(bc+d$|ef*g.|h?i(j|k))', 'BCDD', '', ascii(None)),
2293
+ ('(?i)(bc+d$|ef*g.|h?i(j|k))', 'REFFGZ', '0,1,2', ascii(('EFFGZ',
2294
+ 'EFFGZ', None))),
2295
+ ('(?i)((((((((((a))))))))))', 'A', '10', ascii('A')),
2296
+ ('(?i)((((((((((a))))))))))\\10', 'AA', '0', ascii('AA')),
2297
+ #('(?i)((((((((((a))))))))))\\41', 'AA', '', ascii(None)),
2298
+ #('(?i)((((((((((a))))))))))\\41', 'A!', '0', ascii('A!')),
2299
+ ('(?i)(((((((((a)))))))))', 'A', '0', ascii('A')),
2300
+ ('(?i)(?:(?:(?:(?:(?:(?:(?:(?:(?:(a))))))))))', 'A', '1',
2301
+ ascii('A')),
2302
+ ('(?i)(?:(?:(?:(?:(?:(?:(?:(?:(?:(a|b|c))))))))))', 'C', '1',
2303
+ ascii('C')),
2304
+ ('(?i)multiple words of text', 'UH-UH', '', ascii(None)),
2305
+
2306
+ ('(?i)multiple words', 'MULTIPLE WORDS, YEAH', '0',
2307
+ ascii('MULTIPLE WORDS')),
2308
+ ('(?i)(.*)c(.*)', 'ABCDE', '0,1,2', ascii(('ABCDE', 'AB', 'DE'))),
2309
+ ('(?i)\\((.*), (.*)\\)', '(A, B)', '2,1', ascii(('B', 'A'))),
2310
+ ('(?i)[k]', 'AB', '', ascii(None)),
2311
+ # ('(?i)abcd', 'ABCD', SUCCEED, 'found+"-"+\\found+"-"+\\\\found', ascii(ABCD-$&-\\ABCD)),
2312
+ # ('(?i)a(bc)d', 'ABCD', SUCCEED, 'g1+"-"+\\g1+"-"+\\\\g1', ascii(BC-$1-\\BC)),
2313
+ ('(?i)a[-]?c', 'AC', '0', ascii('AC')),
2314
+ ('(?i)(abc)\\1', 'ABCABC', '1', ascii('ABC')),
2315
+ ('(?i)([a-c]*)\\1', 'ABCABC', '1', ascii('ABC')),
2316
+ ('a(?!b).', 'abad', '0', ascii('ad')),
2317
+ ('a(?=d).', 'abad', '0', ascii('ad')),
2318
+ ('a(?=c|d).', 'abad', '0', ascii('ad')),
2319
+
2320
+ ('a(?:b|c|d)(.)', 'ace', '1', ascii('e')),
2321
+ ('a(?:b|c|d)*(.)', 'ace', '1', ascii('e')),
2322
+ ('a(?:b|c|d)+?(.)', 'ace', '1', ascii('e')),
2323
+ ('a(?:b|(c|e){1,2}?|d)+?(.)', 'ace', '1,2', ascii(('c', 'e'))),
2324
+
2325
+ # Lookbehind: split by : but not if it is escaped by -.
2326
+ ('(?<!-):(.*?)(?<!-):', 'a:bc-:de:f', '1', ascii('bc-:de')),
2327
+ # Escaping with \ as we know it.
2328
+ ('(?<!\\\\):(.*?)(?<!\\\\):', 'a:bc\\:de:f', '1', ascii('bc\\:de')),
2329
+ # Terminating with ' and escaping with ? as in edifact.
2330
+ ("(?<!\\?)'(.*?)(?<!\\?)'", "a'bc?'de'f", '1', ascii("bc?'de")),
2331
+
2332
+ # Comments using the (?#...) syntax.
2333
+
2334
+ ('w(?# comment', 'w', '', regex.error, self.MISSING_RPAREN),
2335
+ ('w(?# comment 1)xy(?# comment 2)z', 'wxyz', '0', ascii('wxyz')),
2336
+
2337
+ # Check odd placement of embedded pattern modifiers.
2338
+
2339
+ # Not an error under PCRE/PRE:
2340
+ # When the new behaviour is turned on positional inline flags affect
2341
+ # only what follows.
2342
+ ('w(?i)', 'W', '0', ascii(None)),
2343
+ ('w(?i)', 'w', '0', ascii('w')),
2344
+ ('(?i)w', 'W', '0', ascii('W')),
2345
+
2346
+ # Comments using the x embedded pattern modifier.
2347
+ ("""(?x)w# comment 1
2348
+ x y
2349
+ # comment 2
2350
+ z""", 'wxyz', '0', ascii('wxyz')),
2351
+
2352
+ # Using the m embedded pattern modifier.
2353
+ ('^abc', """jkl
2354
+ abc
2355
+ xyz""", '', ascii(None)),
2356
+ ('(?m)^abc', """jkl
2357
+ abc
2358
+ xyz""", '0', ascii('abc')),
2359
+
2360
+ ('(?m)abc$', """jkl
2361
+ xyzabc
2362
+ 123""", '0', ascii('abc')),
2363
+
2364
+ # Using the s embedded pattern modifier.
2365
+ ('a.b', 'a\nb', '', ascii(None)),
2366
+ ('(?s)a.b', 'a\nb', '0', ascii('a\nb')),
2367
+
2368
+ # Test \w, etc. both inside and outside character classes.
2369
+ ('\\w+', '--ab_cd0123--', '0', ascii('ab_cd0123')),
2370
+ ('[\\w]+', '--ab_cd0123--', '0', ascii('ab_cd0123')),
2371
+ ('\\D+', '1234abc5678', '0', ascii('abc')),
2372
+ ('[\\D]+', '1234abc5678', '0', ascii('abc')),
2373
+ ('[\\da-fA-F]+', '123abc', '0', ascii('123abc')),
2374
+ # Not an error under PCRE/PRE:
2375
+ # ('[\\d-x]', '-', '', regex.error, self.BAD_CHAR_RANGE),
2376
+ (r'([\s]*)([\S]*)([\s]*)', ' testing!1972', '3,2,1', ascii(('',
2377
+ 'testing!1972', ' '))),
2378
+ (r'(\s*)(\S*)(\s*)', ' testing!1972', '3,2,1', ascii(('',
2379
+ 'testing!1972', ' '))),
2380
+
2381
+ #
2382
+ # Post-1.5.2 additions.
2383
+
2384
+ # xmllib problem.
2385
+ (r'(([a-z]+):)?([a-z]+)$', 'smil', '1,2,3', ascii((None, None,
2386
+ 'smil'))),
2387
+ # Bug 110866: reference to undefined group.
2388
+ (r'((.)\1+)', '', '', regex.error, self.OPEN_GROUP),
2389
+ # Bug 111869: search (PRE/PCRE fails on this one, SRE doesn't).
2390
+ (r'.*d', 'abc\nabd', '0', ascii('abd')),
2391
+ # Bug 112468: various expected syntax errors.
2392
+ (r'(', '', '', regex.error, self.MISSING_RPAREN),
2393
+ (r'[\41]', '!', '0', ascii('!')),
2394
+ # Bug 114033: nothing to repeat.
2395
+ (r'(x?)?', 'x', '0', ascii('x')),
2396
+ # Bug 115040: rescan if flags are modified inside pattern.
2397
+ # Changed to positional flags in regex 2023.12.23.
2398
+ (r' (?x)foo ', 'foo', '0', ascii(None)),
2399
+ (r'(?x) foo ', 'foo', '0', ascii('foo')),
2400
+ (r'(?x)foo ', 'foo', '0', ascii('foo')),
2401
+ # Bug 115618: negative lookahead.
2402
+ (r'(?<!abc)(d.f)', 'abcdefdof', '0', ascii('dof')),
2403
+ # Bug 116251: character class bug.
2404
+ (r'[\w-]+', 'laser_beam', '0', ascii('laser_beam')),
2405
+ # Bug 123769+127259: non-greedy backtracking bug.
2406
+ (r'.*?\S *:', 'xx:', '0', ascii('xx:')),
2407
+ (r'a[ ]*?\ (\d+).*', 'a 10', '0', ascii('a 10')),
2408
+ (r'a[ ]*?\ (\d+).*', 'a 10', '0', ascii('a 10')),
2409
+ # Bug 127259: \Z shouldn't depend on multiline mode.
2410
+ (r'(?ms).*?x\s*\Z(.*)','xx\nx\n', '1', ascii('')),
2411
+ # Bug 128899: uppercase literals under the ignorecase flag.
2412
+ (r'(?i)M+', 'MMM', '0', ascii('MMM')),
2413
+ (r'(?i)m+', 'MMM', '0', ascii('MMM')),
2414
+ (r'(?i)[M]+', 'MMM', '0', ascii('MMM')),
2415
+ (r'(?i)[m]+', 'MMM', '0', ascii('MMM')),
2416
+ # Bug 130748: ^* should be an error (nothing to repeat).
2417
+ # In 'regex' we won't bother to complain about this.
2418
+ # (r'^*', '', '', regex.error, self.NOTHING_TO_REPEAT),
2419
+ # Bug 133283: minimizing repeat problem.
2420
+ (r'"(?:\\"|[^"])*?"', r'"\""', '0', ascii(r'"\""')),
2421
+ # Bug 477728: minimizing repeat problem.
2422
+ (r'^.*?$', 'one\ntwo\nthree\n', '', ascii(None)),
2423
+ # Bug 483789: minimizing repeat problem.
2424
+ (r'a[^>]*?b', 'a>b', '', ascii(None)),
2425
+ # Bug 490573: minimizing repeat problem.
2426
+ (r'^a*?$', 'foo', '', ascii(None)),
2427
+ # Bug 470582: nested groups problem.
2428
+ (r'^((a)c)?(ab)$', 'ab', '1,2,3', ascii((None, None, 'ab'))),
2429
+ # Another minimizing repeat problem (capturing groups in assertions).
2430
+ ('^([ab]*?)(?=(b)?)c', 'abc', '1,2', ascii(('ab', None))),
2431
+ ('^([ab]*?)(?!(b))c', 'abc', '1,2', ascii(('ab', None))),
2432
+ ('^([ab]*?)(?<!(a))c', 'abc', '1,2', ascii(('ab', None))),
2433
+ # Bug 410271: \b broken under locales.
2434
+ (r'\b.\b', 'a', '0', ascii('a')),
2435
+ (r'\b.\b', '\N{LATIN CAPITAL LETTER A WITH DIAERESIS}', '0',
2436
+ ascii('\xc4')),
2437
+ (r'\w', '\N{LATIN CAPITAL LETTER A WITH DIAERESIS}', '0',
2438
+ ascii('\xc4')),
2439
+ ]
2440
+
2441
+ for t in tests:
2442
+ excval = None
2443
+ try:
2444
+ if len(t) == 4:
2445
+ pattern, string, groups, expected = t
2446
+ else:
2447
+ pattern, string, groups, expected, excval = t
2448
+ except ValueError:
2449
+ fields = ", ".join([ascii(f) for f in t[ : 3]] + ["..."])
2450
+ self.fail("Incorrect number of test fields: ({})".format(fields))
2451
+ else:
2452
+ group_list = []
2453
+ if groups:
2454
+ for group in groups.split(","):
2455
+ try:
2456
+ group_list.append(int(group))
2457
+ except ValueError:
2458
+ group_list.append(group)
2459
+
2460
+ if excval is not None:
2461
+ with self.subTest(pattern=pattern, string=string):
2462
+ self.assertRaisesRegex(expected, excval, regex.search,
2463
+ pattern, string)
2464
+ else:
2465
+ m = regex.search(pattern, string)
2466
+ if m:
2467
+ if group_list:
2468
+ actual = ascii(m.group(*group_list))
2469
+ else:
2470
+ actual = ascii(m[:])
2471
+ else:
2472
+ actual = ascii(m)
2473
+
2474
+ self.assertEqual(actual, expected)
2475
+
2476
+ def test_replacement(self):
2477
+ self.assertEqual(regex.sub(r"test\?", "result\\?\\.\a\n", "test?"),
2478
+ "result\\?\\.\a\n")
2479
+
2480
+ self.assertEqual(regex.sub('(.)', r"\1\1", 'x'), 'xx')
2481
+ self.assertEqual(regex.sub('(.)', regex.escape(r"\1\1"), 'x'), r"\1\1")
2482
+ self.assertEqual(regex.sub('(.)', r"\\1\\1", 'x'), r"\1\1")
2483
+ self.assertEqual(regex.sub('(.)', lambda m: r"\1\1", 'x'), r"\1\1")
2484
+
2485
+ def test_common_prefix(self):
2486
+ # Very long common prefix
2487
+ all = string.ascii_lowercase + string.digits + string.ascii_uppercase
2488
+ side = all * 4
2489
+ regexp = '(' + side + '|' + side + ')'
2490
+ self.assertEqual(repr(type(regex.compile(regexp))), self.PATTERN_CLASS)
2491
+
2492
+ def test_captures(self):
2493
+ self.assertEqual(regex.search(r"(\w)+", "abc").captures(1), ['a', 'b',
2494
+ 'c'])
2495
+ self.assertEqual(regex.search(r"(\w{3})+", "abcdef").captures(0, 1),
2496
+ (['abcdef'], ['abc', 'def']))
2497
+ self.assertEqual(regex.search(r"^(\d{1,3})(?:\.(\d{1,3})){3}$",
2498
+ "192.168.0.1").captures(1, 2), (['192', ], ['168', '0', '1']))
2499
+ self.assertEqual(regex.match(r"^([0-9A-F]{2}){4} ([a-z]\d){5}$",
2500
+ "3FB52A0C a2c4g3k9d3").captures(1, 2), (['3F', 'B5', '2A', '0C'],
2501
+ ['a2', 'c4', 'g3', 'k9', 'd3']))
2502
+ self.assertEqual(regex.match("([a-z]W)([a-z]X)+([a-z]Y)",
2503
+ "aWbXcXdXeXfY").captures(1, 2, 3), (['aW'], ['bX', 'cX', 'dX', 'eX'],
2504
+ ['fY']))
2505
+
2506
+ self.assertEqual(regex.search(r".*?(?=(.)+)b", "ab").captures(1),
2507
+ ['b'])
2508
+ self.assertEqual(regex.search(r".*?(?>(.){0,2})d", "abcd").captures(1),
2509
+ ['b', 'c'])
2510
+ self.assertEqual(regex.search(r"(.)+", "a").captures(1), ['a'])
2511
+
2512
+ def test_guards(self):
2513
+ m = regex.search(r"(X.*?Y\s*){3}(X\s*)+AB:",
2514
+ "XY\nX Y\nX Y\nXY\nXX AB:")
2515
+ self.assertEqual(m.span(0, 1, 2), ((3, 21), (12, 15), (16, 18)))
2516
+
2517
+ m = regex.search(r"(X.*?Y\s*){3,}(X\s*)+AB:",
2518
+ "XY\nX Y\nX Y\nXY\nXX AB:")
2519
+ self.assertEqual(m.span(0, 1, 2), ((0, 21), (12, 15), (16, 18)))
2520
+
2521
+ m = regex.search(r'\d{4}(\s*\w)?\W*((?!\d)\w){2}', "9999XX")
2522
+ self.assertEqual(m.span(0, 1, 2), ((0, 6), (-1, -1), (5, 6)))
2523
+
2524
+ m = regex.search(r'A\s*?.*?(\n+.*?\s*?){0,2}\(X', 'A\n1\nS\n1 (X')
2525
+ self.assertEqual(m.span(0, 1), ((0, 10), (5, 8)))
2526
+
2527
+ m = regex.search(r'Derde\s*:', 'aaaaaa:\nDerde:')
2528
+ self.assertEqual(m.span(), (8, 14))
2529
+ m = regex.search(r'Derde\s*:', 'aaaaa:\nDerde:')
2530
+ self.assertEqual(m.span(), (7, 13))
2531
+
2532
+ def test_turkic(self):
2533
+ # Turkish has dotted and dotless I/i.
2534
+ pairs = "I=i;I=\u0131;i=\u0130"
2535
+
2536
+ all_chars = set()
2537
+ matching = set()
2538
+ for pair in pairs.split(";"):
2539
+ ch1, ch2 = pair.split("=")
2540
+ all_chars.update((ch1, ch2))
2541
+ matching.add((ch1, ch1))
2542
+ matching.add((ch1, ch2))
2543
+ matching.add((ch2, ch1))
2544
+ matching.add((ch2, ch2))
2545
+
2546
+ for ch1 in all_chars:
2547
+ for ch2 in all_chars:
2548
+ m = regex.match(r"(?i)\A" + ch1 + r"\Z", ch2)
2549
+ if m:
2550
+ if (ch1, ch2) not in matching:
2551
+ self.fail("{} matching {}".format(ascii(ch1),
2552
+ ascii(ch2)))
2553
+ else:
2554
+ if (ch1, ch2) in matching:
2555
+ self.fail("{} not matching {}".format(ascii(ch1),
2556
+ ascii(ch2)))
2557
+
2558
+ def test_named_lists(self):
2559
+ options = ["one", "two", "three"]
2560
+ self.assertEqual(regex.match(r"333\L<bar>444", "333one444",
2561
+ bar=options).group(), "333one444")
2562
+ self.assertEqual(regex.match(r"(?i)333\L<bar>444", "333TWO444",
2563
+ bar=options).group(), "333TWO444")
2564
+ self.assertEqual(regex.match(r"333\L<bar>444", "333four444",
2565
+ bar=options), None)
2566
+
2567
+ options = [b"one", b"two", b"three"]
2568
+ self.assertEqual(regex.match(br"333\L<bar>444", b"333one444",
2569
+ bar=options).group(), b"333one444")
2570
+ self.assertEqual(regex.match(br"(?i)333\L<bar>444", b"333TWO444",
2571
+ bar=options).group(), b"333TWO444")
2572
+ self.assertEqual(regex.match(br"333\L<bar>444", b"333four444",
2573
+ bar=options), None)
2574
+
2575
+ self.assertEqual(repr(type(regex.compile(r"3\L<bar>4\L<bar>+5",
2576
+ bar=["one", "two", "three"]))), self.PATTERN_CLASS)
2577
+
2578
+ self.assertEqual(regex.findall(r"^\L<options>", "solid QWERT",
2579
+ options=set(['good', 'brilliant', '+s\\ol[i}d'])), [])
2580
+ self.assertEqual(regex.findall(r"^\L<options>", "+solid QWERT",
2581
+ options=set(['good', 'brilliant', '+solid'])), ['+solid'])
2582
+
2583
+ options = ["STRASSE"]
2584
+ self.assertEqual(regex.match(r"(?fi)\L<words>",
2585
+ "stra\N{LATIN SMALL LETTER SHARP S}e", words=options).span(), (0,
2586
+ 6))
2587
+
2588
+ options = ["STRASSE", "stress"]
2589
+ self.assertEqual(regex.match(r"(?fi)\L<words>",
2590
+ "stra\N{LATIN SMALL LETTER SHARP S}e", words=options).span(), (0,
2591
+ 6))
2592
+
2593
+ options = ["stra\N{LATIN SMALL LETTER SHARP S}e"]
2594
+ self.assertEqual(regex.match(r"(?fi)\L<words>", "STRASSE",
2595
+ words=options).span(), (0, 7))
2596
+
2597
+ options = ["kit"]
2598
+ self.assertEqual(regex.search(r"(?i)\L<words>", "SKITS",
2599
+ words=options).span(), (1, 4))
2600
+ self.assertEqual(regex.search(r"(?i)\L<words>",
2601
+ "SK\N{LATIN CAPITAL LETTER I WITH DOT ABOVE}TS",
2602
+ words=options).span(), (1, 4))
2603
+
2604
+ self.assertEqual(regex.search(r"(?fi)\b(\w+) +\1\b",
2605
+ " stra\N{LATIN SMALL LETTER SHARP S}e STRASSE ").span(), (1, 15))
2606
+ self.assertEqual(regex.search(r"(?fi)\b(\w+) +\1\b",
2607
+ " STRASSE stra\N{LATIN SMALL LETTER SHARP S}e ").span(), (1, 15))
2608
+
2609
+ self.assertEqual(regex.search(r"^\L<options>$", "", options=[]).span(),
2610
+ (0, 0))
2611
+
2612
+ def test_fuzzy(self):
2613
+ # Some tests borrowed from TRE library tests.
2614
+ self.assertEqual(repr(type(regex.compile('(fou){s,e<=1}'))),
2615
+ self.PATTERN_CLASS)
2616
+ self.assertEqual(repr(type(regex.compile('(fuu){s}'))),
2617
+ self.PATTERN_CLASS)
2618
+ self.assertEqual(repr(type(regex.compile('(fuu){s,e}'))),
2619
+ self.PATTERN_CLASS)
2620
+ self.assertEqual(repr(type(regex.compile('(anaconda){1i+1d<1,s<=1}'))),
2621
+ self.PATTERN_CLASS)
2622
+ self.assertEqual(repr(type(regex.compile('(anaconda){1i+1d<1,s<=1,e<=10}'))),
2623
+ self.PATTERN_CLASS)
2624
+ self.assertEqual(repr(type(regex.compile('(anaconda){s<=1,e<=1,1i+1d<1}'))),
2625
+ self.PATTERN_CLASS)
2626
+
2627
+ text = 'molasses anaconda foo bar baz smith anderson '
2628
+ self.assertEqual(regex.search('(znacnda){s<=1,e<=3,1i+1d<1}', text),
2629
+ None)
2630
+ self.assertEqual(regex.search('(znacnda){s<=1,e<=3,1i+1d<2}',
2631
+ text).span(0, 1), ((9, 17), (9, 17)))
2632
+ self.assertEqual(regex.search('(ananda){1i+1d<2}', text), None)
2633
+ self.assertEqual(regex.search(r"(?:\bznacnda){e<=2}", text)[0],
2634
+ "anaconda")
2635
+ self.assertEqual(regex.search(r"(?:\bnacnda){e<=2}", text)[0],
2636
+ "anaconda")
2637
+
2638
+ text = 'anaconda foo bar baz smith anderson'
2639
+ self.assertEqual(regex.search('(fuu){i<=3,d<=3,e<=5}', text).span(0,
2640
+ 1), ((0, 0), (0, 0)))
2641
+ self.assertEqual(regex.search('(?b)(fuu){i<=3,d<=3,e<=5}',
2642
+ text).span(0, 1), ((9, 10), (9, 10)))
2643
+ self.assertEqual(regex.search('(fuu){i<=2,d<=2,e<=5}', text).span(0,
2644
+ 1), ((7, 10), (7, 10)))
2645
+ self.assertEqual(regex.search('(?e)(fuu){i<=2,d<=2,e<=5}',
2646
+ text).span(0, 1), ((9, 10), (9, 10)))
2647
+ self.assertEqual(regex.search('(fuu){i<=3,d<=3,e}', text).span(0, 1),
2648
+ ((0, 0), (0, 0)))
2649
+ self.assertEqual(regex.search('(?b)(fuu){i<=3,d<=3,e}', text).span(0,
2650
+ 1), ((9, 10), (9, 10)))
2651
+
2652
+ self.assertEqual(repr(type(regex.compile('(approximate){s<=3,1i+1d<3}'))),
2653
+ self.PATTERN_CLASS)
2654
+
2655
+ # No cost limit.
2656
+ self.assertEqual(regex.search('(foobar){e}',
2657
+ 'xirefoabralfobarxie').span(0, 1), ((0, 6), (0, 6)))
2658
+ self.assertEqual(regex.search('(?e)(foobar){e}',
2659
+ 'xirefoabralfobarxie').span(0, 1), ((0, 3), (0, 3)))
2660
+ self.assertEqual(regex.search('(?b)(foobar){e}',
2661
+ 'xirefoabralfobarxie').span(0, 1), ((11, 16), (11, 16)))
2662
+
2663
+ # At most two errors.
2664
+ self.assertEqual(regex.search('(foobar){e<=2}',
2665
+ 'xirefoabrzlfd').span(0, 1), ((4, 9), (4, 9)))
2666
+ self.assertEqual(regex.search('(foobar){e<=2}', 'xirefoabzlfd'), None)
2667
+
2668
+ # At most two inserts or substitutions and max two errors total.
2669
+ self.assertEqual(regex.search('(foobar){i<=2,s<=2,e<=2}',
2670
+ 'oobargoobaploowap').span(0, 1), ((5, 11), (5, 11)))
2671
+
2672
+ # Find best whole word match for "foobar".
2673
+ self.assertEqual(regex.search('\\b(foobar){e}\\b', 'zfoobarz').span(0,
2674
+ 1), ((0, 8), (0, 8)))
2675
+ self.assertEqual(regex.search('\\b(foobar){e}\\b',
2676
+ 'boing zfoobarz goobar woop').span(0, 1), ((0, 6), (0, 6)))
2677
+ self.assertEqual(regex.search('(?b)\\b(foobar){e}\\b',
2678
+ 'boing zfoobarz goobar woop').span(0, 1), ((15, 21), (15, 21)))
2679
+
2680
+ # Match whole string, allow only 1 error.
2681
+ self.assertEqual(regex.search('^(foobar){e<=1}$', 'foobar').span(0, 1),
2682
+ ((0, 6), (0, 6)))
2683
+ self.assertEqual(regex.search('^(foobar){e<=1}$', 'xfoobar').span(0,
2684
+ 1), ((0, 7), (0, 7)))
2685
+ self.assertEqual(regex.search('^(foobar){e<=1}$', 'foobarx').span(0,
2686
+ 1), ((0, 7), (0, 7)))
2687
+ self.assertEqual(regex.search('^(foobar){e<=1}$', 'fooxbar').span(0,
2688
+ 1), ((0, 7), (0, 7)))
2689
+ self.assertEqual(regex.search('^(foobar){e<=1}$', 'foxbar').span(0, 1),
2690
+ ((0, 6), (0, 6)))
2691
+ self.assertEqual(regex.search('^(foobar){e<=1}$', 'xoobar').span(0, 1),
2692
+ ((0, 6), (0, 6)))
2693
+ self.assertEqual(regex.search('^(foobar){e<=1}$', 'foobax').span(0, 1),
2694
+ ((0, 6), (0, 6)))
2695
+ self.assertEqual(regex.search('^(foobar){e<=1}$', 'oobar').span(0, 1),
2696
+ ((0, 5), (0, 5)))
2697
+ self.assertEqual(regex.search('^(foobar){e<=1}$', 'fobar').span(0, 1),
2698
+ ((0, 5), (0, 5)))
2699
+ self.assertEqual(regex.search('^(foobar){e<=1}$', 'fooba').span(0, 1),
2700
+ ((0, 5), (0, 5)))
2701
+ self.assertEqual(regex.search('^(foobar){e<=1}$', 'xfoobarx'), None)
2702
+ self.assertEqual(regex.search('^(foobar){e<=1}$', 'foobarxx'), None)
2703
+ self.assertEqual(regex.search('^(foobar){e<=1}$', 'xxfoobar'), None)
2704
+ self.assertEqual(regex.search('^(foobar){e<=1}$', 'xfoxbar'), None)
2705
+ self.assertEqual(regex.search('^(foobar){e<=1}$', 'foxbarx'), None)
2706
+
2707
+ # At most one insert, two deletes, and three substitutions.
2708
+ # Additionally, deletes cost two and substitutes one, and total
2709
+ # cost must be less than 4.
2710
+ self.assertEqual(regex.search('(foobar){i<=1,d<=2,s<=3,2d+1s<4}',
2711
+ '3oifaowefbaoraofuiebofasebfaobfaorfeoaro').span(0, 1), ((6, 13), (6,
2712
+ 13)))
2713
+ self.assertEqual(regex.search('(?b)(foobar){i<=1,d<=2,s<=3,2d+1s<4}',
2714
+ '3oifaowefbaoraofuiebofasebfaobfaorfeoaro').span(0, 1), ((34, 39),
2715
+ (34, 39)))
2716
+
2717
+ # Partially fuzzy matches.
2718
+ self.assertEqual(regex.search('foo(bar){e<=1}zap', 'foobarzap').span(0,
2719
+ 1), ((0, 9), (3, 6)))
2720
+ self.assertEqual(regex.search('foo(bar){e<=1}zap', 'fobarzap'), None)
2721
+ self.assertEqual(regex.search('foo(bar){e<=1}zap', 'foobrzap').span(0,
2722
+ 1), ((0, 8), (3, 5)))
2723
+
2724
+ text = ('www.cnn.com 64.236.16.20\nwww.slashdot.org 66.35.250.150\n'
2725
+ 'For useful information, use www.slashdot.org\nthis is demo data!\n')
2726
+ self.assertEqual(regex.search(r'(?s)^.*(dot.org){e}.*$', text).span(0,
2727
+ 1), ((0, 120), (120, 120)))
2728
+ self.assertEqual(regex.search(r'(?es)^.*(dot.org){e}.*$', text).span(0,
2729
+ 1), ((0, 120), (93, 100)))
2730
+ self.assertEqual(regex.search(r'^.*(dot.org){e}.*$', text).span(0, 1),
2731
+ ((0, 119), (24, 101)))
2732
+
2733
+ # Behaviour is unexpected, but arguably not wrong. It first finds the
2734
+ # best match, then the best in what follows, etc.
2735
+ self.assertEqual(regex.findall(r"\b\L<words>{e<=1}\b",
2736
+ " book cot dog desk ", words="cat dog".split()), ["cot", "dog"])
2737
+ self.assertEqual(regex.findall(r"\b\L<words>{e<=1}\b",
2738
+ " book dog cot desk ", words="cat dog".split()), [" dog", "cot"])
2739
+ self.assertEqual(regex.findall(r"(?e)\b\L<words>{e<=1}\b",
2740
+ " book dog cot desk ", words="cat dog".split()), ["dog", "cot"])
2741
+ self.assertEqual(regex.findall(r"(?r)\b\L<words>{e<=1}\b",
2742
+ " book cot dog desk ", words="cat dog".split()), ["dog ", "cot"])
2743
+ self.assertEqual(regex.findall(r"(?er)\b\L<words>{e<=1}\b",
2744
+ " book cot dog desk ", words="cat dog".split()), ["dog", "cot"])
2745
+ self.assertEqual(regex.findall(r"(?r)\b\L<words>{e<=1}\b",
2746
+ " book dog cot desk ", words="cat dog".split()), ["cot", "dog"])
2747
+ self.assertEqual(regex.findall(br"\b\L<words>{e<=1}\b",
2748
+ b" book cot dog desk ", words=b"cat dog".split()), [b"cot", b"dog"])
2749
+ self.assertEqual(regex.findall(br"\b\L<words>{e<=1}\b",
2750
+ b" book dog cot desk ", words=b"cat dog".split()), [b" dog", b"cot"])
2751
+ self.assertEqual(regex.findall(br"(?e)\b\L<words>{e<=1}\b",
2752
+ b" book dog cot desk ", words=b"cat dog".split()), [b"dog", b"cot"])
2753
+ self.assertEqual(regex.findall(br"(?r)\b\L<words>{e<=1}\b",
2754
+ b" book cot dog desk ", words=b"cat dog".split()), [b"dog ", b"cot"])
2755
+ self.assertEqual(regex.findall(br"(?er)\b\L<words>{e<=1}\b",
2756
+ b" book cot dog desk ", words=b"cat dog".split()), [b"dog", b"cot"])
2757
+ self.assertEqual(regex.findall(br"(?r)\b\L<words>{e<=1}\b",
2758
+ b" book dog cot desk ", words=b"cat dog".split()), [b"cot", b"dog"])
2759
+
2760
+ self.assertEqual(regex.search(r"(\w+) (\1{e<=1})", "foo fou").groups(),
2761
+ ("foo", "fou"))
2762
+ self.assertEqual(regex.search(r"(?r)(\2{e<=1}) (\w+)",
2763
+ "foo fou").groups(), ("foo", "fou"))
2764
+ self.assertEqual(regex.search(br"(\w+) (\1{e<=1})",
2765
+ b"foo fou").groups(), (b"foo", b"fou"))
2766
+
2767
+ self.assertEqual(regex.findall(r"(?:(?:QR)+){e}", "abcde"), ["abcde",
2768
+ ""])
2769
+ self.assertEqual(regex.findall(r"(?:Q+){e}", "abc"), ["abc", ""])
2770
+
2771
+ # Hg issue 41: = for fuzzy matches
2772
+ self.assertEqual(regex.match(r"(?:service detection){0<e<5}",
2773
+ "servic detection").span(), (0, 16))
2774
+ self.assertEqual(regex.match(r"(?:service detection){0<e<5}",
2775
+ "service detect").span(), (0, 14))
2776
+ self.assertEqual(regex.match(r"(?:service detection){0<e<5}",
2777
+ "service detecti").span(), (0, 15))
2778
+ self.assertEqual(regex.match(r"(?:service detection){0<e<5}",
2779
+ "service detection"), None)
2780
+ self.assertEqual(regex.match(r"(?:service detection){0<e<5}",
2781
+ "in service detection").span(), (0, 20))
2782
+
2783
+ # Hg issue 109: Edit distance of fuzzy match
2784
+ self.assertEqual(regex.fullmatch(r"(?:cats|cat){e<=1}",
2785
+ "cat").fuzzy_counts, (0, 0, 1))
2786
+ self.assertEqual(regex.fullmatch(r"(?e)(?:cats|cat){e<=1}",
2787
+ "cat").fuzzy_counts, (0, 0, 0))
2788
+
2789
+ self.assertEqual(regex.fullmatch(r"(?:cat|cats){e<=1}",
2790
+ "cats").fuzzy_counts, (0, 1, 0))
2791
+ self.assertEqual(regex.fullmatch(r"(?e)(?:cat|cats){e<=1}",
2792
+ "cats").fuzzy_counts, (0, 0, 0))
2793
+
2794
+ self.assertEqual(regex.fullmatch(r"(?:cat){e<=1} (?:cat){e<=1}",
2795
+ "cat cot").fuzzy_counts, (1, 0, 0))
2796
+
2797
+ # Incorrect fuzzy changes
2798
+ self.assertEqual(regex.search(r"(?e)(GTTTTCATTCCTCATA){i<=4,d<=4,s<=4,i+d+s<=8}",
2799
+ "ATTATTTATTTTTCATA").fuzzy_changes, ([0, 6, 10, 11], [3], []))
2800
+
2801
+ # Fuzzy constraints ignored when checking for prefix/suffix in branches
2802
+ self.assertEqual(bool(regex.match('(?:fo){e<=1}|(?:fo){e<=2}', 'FO')),
2803
+ True)
2804
+
2805
+ def test_recursive(self):
2806
+ self.assertEqual(regex.search(r"(\w)(?:(?R)|(\w?))\1", "xx")[ : ],
2807
+ ("xx", "x", ""))
2808
+ self.assertEqual(regex.search(r"(\w)(?:(?R)|(\w?))\1", "aba")[ : ],
2809
+ ("aba", "a", "b"))
2810
+ self.assertEqual(regex.search(r"(\w)(?:(?R)|(\w?))\1", "abba")[ : ],
2811
+ ("abba", "a", None))
2812
+ self.assertEqual(regex.search(r"(\w)(?:(?R)|(\w?))\1", "kayak")[ : ],
2813
+ ("kayak", "k", None))
2814
+ self.assertEqual(regex.search(r"(\w)(?:(?R)|(\w?))\1", "paper")[ : ],
2815
+ ("pap", "p", "a"))
2816
+ self.assertEqual(regex.search(r"(\w)(?:(?R)|(\w?))\1", "dontmatchme"),
2817
+ None)
2818
+
2819
+ self.assertEqual(regex.search(r"(?r)\2(?:(\w?)|(?R))(\w)", "xx")[ : ],
2820
+ ("xx", "", "x"))
2821
+ self.assertEqual(regex.search(r"(?r)\2(?:(\w?)|(?R))(\w)", "aba")[ : ],
2822
+ ("aba", "b", "a"))
2823
+ self.assertEqual(regex.search(r"(?r)\2(?:(\w?)|(?R))(\w)", "abba")[ :
2824
+ ], ("abba", None, "a"))
2825
+ self.assertEqual(regex.search(r"(?r)\2(?:(\w?)|(?R))(\w)", "kayak")[ :
2826
+ ], ("kayak", None, "k"))
2827
+ self.assertEqual(regex.search(r"(?r)\2(?:(\w?)|(?R))(\w)", "paper")[ :
2828
+ ], ("pap", "a", "p"))
2829
+ self.assertEqual(regex.search(r"(?r)\2(?:(\w?)|(?R))(\w)",
2830
+ "dontmatchme"), None)
2831
+
2832
+ self.assertEqual(regex.search(r"\(((?>[^()]+)|(?R))*\)", "(ab(cd)ef)")[
2833
+ : ], ("(ab(cd)ef)", "ef"))
2834
+ self.assertEqual(regex.search(r"\(((?>[^()]+)|(?R))*\)",
2835
+ "(ab(cd)ef)").captures(1), ["ab", "cd", "(cd)", "ef"])
2836
+
2837
+ self.assertEqual(regex.search(r"(?r)\(((?R)|(?>[^()]+))*\)",
2838
+ "(ab(cd)ef)")[ : ], ("(ab(cd)ef)", "ab"))
2839
+ self.assertEqual(regex.search(r"(?r)\(((?R)|(?>[^()]+))*\)",
2840
+ "(ab(cd)ef)").captures(1), ["ef", "cd", "(cd)", "ab"])
2841
+
2842
+ self.assertEqual(regex.search(r"\(([^()]+|(?R))*\)",
2843
+ "some text (a(b(c)d)e) more text")[ : ], ("(a(b(c)d)e)", "e"))
2844
+
2845
+ self.assertEqual(regex.search(r"(?r)\(((?R)|[^()]+)*\)",
2846
+ "some text (a(b(c)d)e) more text")[ : ], ("(a(b(c)d)e)", "a"))
2847
+
2848
+ self.assertEqual(regex.search(r"(foo(\(((?:(?>[^()]+)|(?2))*)\)))",
2849
+ "foo(bar(baz)+baz(bop))")[ : ], ("foo(bar(baz)+baz(bop))",
2850
+ "foo(bar(baz)+baz(bop))", "(bar(baz)+baz(bop))",
2851
+ "bar(baz)+baz(bop)"))
2852
+
2853
+ self.assertEqual(regex.search(r"(?r)(foo(\(((?:(?2)|(?>[^()]+))*)\)))",
2854
+ "foo(bar(baz)+baz(bop))")[ : ], ("foo(bar(baz)+baz(bop))",
2855
+ "foo(bar(baz)+baz(bop))", "(bar(baz)+baz(bop))",
2856
+ "bar(baz)+baz(bop)"))
2857
+
2858
+ rgx = regex.compile(r"""^\s*(<\s*([a-zA-Z:]+)(?:\s*[a-zA-Z:]*\s*=\s*(?:'[^']*'|"[^"]*"))*\s*(/\s*)?>(?:[^<>]*|(?1))*(?(3)|<\s*/\s*\2\s*>))\s*$""")
2859
+ self.assertEqual(bool(rgx.search('<foo><bar></bar></foo>')), True)
2860
+ self.assertEqual(bool(rgx.search('<foo><bar></foo></bar>')), False)
2861
+ self.assertEqual(bool(rgx.search('<foo><bar/></foo>')), True)
2862
+ self.assertEqual(bool(rgx.search('<foo><bar></foo>')), False)
2863
+ self.assertEqual(bool(rgx.search('<foo bar=baz/>')), False)
2864
+
2865
+ self.assertEqual(bool(rgx.search('<foo bar="baz">')), False)
2866
+ self.assertEqual(bool(rgx.search('<foo bar="baz"/>')), True)
2867
+ self.assertEqual(bool(rgx.search('< fooo / >')), True)
2868
+ # The next regex should and does match. Perl 5.14 agrees.
2869
+ #self.assertEqual(bool(rgx.search('<foo/>foo')), False)
2870
+ self.assertEqual(bool(rgx.search('foo<foo/>')), False)
2871
+
2872
+ self.assertEqual(bool(rgx.search('<foo>foo</foo>')), True)
2873
+ self.assertEqual(bool(rgx.search('<foo><bar/>foo</foo>')), True)
2874
+ self.assertEqual(bool(rgx.search('<a><b><c></c></b></a>')), True)
2875
+
2876
+ def test_copy(self):
2877
+ # PatternObjects are immutable, therefore there's no need to clone them.
2878
+ r = regex.compile("a")
2879
+ self.assertTrue(copy.copy(r) is r)
2880
+ self.assertTrue(copy.deepcopy(r) is r)
2881
+
2882
+ # MatchObjects are normally mutable because the target string can be
2883
+ # detached. However, after the target string has been detached, a
2884
+ # MatchObject becomes immutable, so there's no need to clone it.
2885
+ m = r.match("a")
2886
+ self.assertTrue(copy.copy(m) is not m)
2887
+ self.assertTrue(copy.deepcopy(m) is not m)
2888
+
2889
+ self.assertTrue(m.string is not None)
2890
+ m2 = copy.copy(m)
2891
+ m2.detach_string()
2892
+ self.assertTrue(m.string is not None)
2893
+ self.assertTrue(m2.string is None)
2894
+
2895
+ # The following behaviour matches that of the re module.
2896
+ it = regex.finditer(".", "ab")
2897
+ it2 = copy.copy(it)
2898
+ self.assertEqual(next(it).group(), "a")
2899
+ self.assertEqual(next(it2).group(), "b")
2900
+
2901
+ # The following behaviour matches that of the re module.
2902
+ it = regex.finditer(".", "ab")
2903
+ it2 = copy.deepcopy(it)
2904
+ self.assertEqual(next(it).group(), "a")
2905
+ self.assertEqual(next(it2).group(), "b")
2906
+
2907
+ # The following behaviour is designed to match that of copying 'finditer'.
2908
+ it = regex.splititer(" ", "a b")
2909
+ it2 = copy.copy(it)
2910
+ self.assertEqual(next(it), "a")
2911
+ self.assertEqual(next(it2), "b")
2912
+
2913
+ # The following behaviour is designed to match that of copying 'finditer'.
2914
+ it = regex.splititer(" ", "a b")
2915
+ it2 = copy.deepcopy(it)
2916
+ self.assertEqual(next(it), "a")
2917
+ self.assertEqual(next(it2), "b")
2918
+
2919
+ def test_format(self):
2920
+ self.assertEqual(regex.subf(r"(\w+) (\w+)", "{0} => {2} {1}",
2921
+ "foo bar"), "foo bar => bar foo")
2922
+ self.assertEqual(regex.subf(r"(?<word1>\w+) (?<word2>\w+)",
2923
+ "{word2} {word1}", "foo bar"), "bar foo")
2924
+
2925
+ self.assertEqual(regex.subfn(r"(\w+) (\w+)", "{0} => {2} {1}",
2926
+ "foo bar"), ("foo bar => bar foo", 1))
2927
+ self.assertEqual(regex.subfn(r"(?<word1>\w+) (?<word2>\w+)",
2928
+ "{word2} {word1}", "foo bar"), ("bar foo", 1))
2929
+
2930
+ self.assertEqual(regex.match(r"(\w+) (\w+)",
2931
+ "foo bar").expandf("{0} => {2} {1}"), "foo bar => bar foo")
2932
+
2933
+ def test_fullmatch(self):
2934
+ self.assertEqual(bool(regex.fullmatch(r"abc", "abc")), True)
2935
+ self.assertEqual(bool(regex.fullmatch(r"abc", "abcx")), False)
2936
+ self.assertEqual(bool(regex.fullmatch(r"abc", "abcx", endpos=3)), True)
2937
+
2938
+ self.assertEqual(bool(regex.fullmatch(r"abc", "xabc", pos=1)), True)
2939
+ self.assertEqual(bool(regex.fullmatch(r"abc", "xabcy", pos=1)), False)
2940
+ self.assertEqual(bool(regex.fullmatch(r"abc", "xabcy", pos=1,
2941
+ endpos=4)), True)
2942
+
2943
+ self.assertEqual(bool(regex.fullmatch(r"(?r)abc", "abc")), True)
2944
+ self.assertEqual(bool(regex.fullmatch(r"(?r)abc", "abcx")), False)
2945
+ self.assertEqual(bool(regex.fullmatch(r"(?r)abc", "abcx", endpos=3)),
2946
+ True)
2947
+
2948
+ self.assertEqual(bool(regex.fullmatch(r"(?r)abc", "xabc", pos=1)),
2949
+ True)
2950
+ self.assertEqual(bool(regex.fullmatch(r"(?r)abc", "xabcy", pos=1)),
2951
+ False)
2952
+ self.assertEqual(bool(regex.fullmatch(r"(?r)abc", "xabcy", pos=1,
2953
+ endpos=4)), True)
2954
+
2955
+ def test_issue_18468(self):
2956
+ self.assertTypedEqual(regex.sub('y', 'a', 'xyz'), 'xaz')
2957
+ self.assertTypedEqual(regex.sub('y', StrSubclass('a'),
2958
+ StrSubclass('xyz')), 'xaz')
2959
+ self.assertTypedEqual(regex.sub(b'y', b'a', b'xyz'), b'xaz')
2960
+ self.assertTypedEqual(regex.sub(b'y', BytesSubclass(b'a'),
2961
+ BytesSubclass(b'xyz')), b'xaz')
2962
+ self.assertTypedEqual(regex.sub(b'y', bytearray(b'a'),
2963
+ bytearray(b'xyz')), b'xaz')
2964
+ self.assertTypedEqual(regex.sub(b'y', memoryview(b'a'),
2965
+ memoryview(b'xyz')), b'xaz')
2966
+
2967
+ for string in ":a:b::c", StrSubclass(":a:b::c"):
2968
+ self.assertTypedEqual(regex.split(":", string), ['', 'a', 'b', '',
2969
+ 'c'])
2970
+ if sys.version_info >= (3, 7, 0):
2971
+ self.assertTypedEqual(regex.split(":*", string), ['', '', 'a',
2972
+ '', 'b', '', 'c', ''])
2973
+ self.assertTypedEqual(regex.split("(:*)", string), ['', ':',
2974
+ '', '', 'a', ':', '', '', 'b', '::', '', '', 'c', '', ''])
2975
+ else:
2976
+ self.assertTypedEqual(regex.split(":*", string), ['', 'a', 'b',
2977
+ 'c'])
2978
+ self.assertTypedEqual(regex.split("(:*)", string), ['', ':',
2979
+ 'a', ':', 'b', '::', 'c'])
2980
+
2981
+ for string in (b":a:b::c", BytesSubclass(b":a:b::c"),
2982
+ bytearray(b":a:b::c"), memoryview(b":a:b::c")):
2983
+ self.assertTypedEqual(regex.split(b":", string), [b'', b'a', b'b',
2984
+ b'', b'c'])
2985
+ if sys.version_info >= (3, 7, 0):
2986
+ self.assertTypedEqual(regex.split(b":*", string), [b'', b'',
2987
+ b'a', b'', b'b', b'', b'c', b''])
2988
+ self.assertTypedEqual(regex.split(b"(:*)", string), [b'', b':',
2989
+ b'', b'', b'a', b':', b'', b'', b'b', b'::', b'', b'', b'c',
2990
+ b'', b''])
2991
+ else:
2992
+ self.assertTypedEqual(regex.split(b":*", string), [b'', b'a',
2993
+ b'b', b'c'])
2994
+ self.assertTypedEqual(regex.split(b"(:*)", string), [b'', b':',
2995
+ b'a', b':', b'b', b'::', b'c'])
2996
+
2997
+ for string in "a:b::c:::d", StrSubclass("a:b::c:::d"):
2998
+ self.assertTypedEqual(regex.findall(":+", string), [":", "::",
2999
+ ":::"])
3000
+ self.assertTypedEqual(regex.findall("(:+)", string), [":", "::",
3001
+ ":::"])
3002
+ self.assertTypedEqual(regex.findall("(:)(:*)", string), [(":", ""),
3003
+ (":", ":"), (":", "::")])
3004
+
3005
+ for string in (b"a:b::c:::d", BytesSubclass(b"a:b::c:::d"),
3006
+ bytearray(b"a:b::c:::d"), memoryview(b"a:b::c:::d")):
3007
+ self.assertTypedEqual(regex.findall(b":+", string), [b":", b"::",
3008
+ b":::"])
3009
+ self.assertTypedEqual(regex.findall(b"(:+)", string), [b":", b"::",
3010
+ b":::"])
3011
+ self.assertTypedEqual(regex.findall(b"(:)(:*)", string), [(b":",
3012
+ b""), (b":", b":"), (b":", b"::")])
3013
+
3014
+ for string in 'a', StrSubclass('a'):
3015
+ self.assertEqual(regex.match('a', string).groups(), ())
3016
+ self.assertEqual(regex.match('(a)', string).groups(), ('a',))
3017
+ self.assertEqual(regex.match('(a)', string).group(0), 'a')
3018
+ self.assertEqual(regex.match('(a)', string).group(1), 'a')
3019
+ self.assertEqual(regex.match('(a)', string).group(1, 1), ('a',
3020
+ 'a'))
3021
+
3022
+ for string in (b'a', BytesSubclass(b'a'), bytearray(b'a'),
3023
+ memoryview(b'a')):
3024
+ self.assertEqual(regex.match(b'a', string).groups(), ())
3025
+ self.assertEqual(regex.match(b'(a)', string).groups(), (b'a',))
3026
+ self.assertEqual(regex.match(b'(a)', string).group(0), b'a')
3027
+ self.assertEqual(regex.match(b'(a)', string).group(1), b'a')
3028
+ self.assertEqual(regex.match(b'(a)', string).group(1, 1), (b'a',
3029
+ b'a'))
3030
+
3031
+ def test_partial(self):
3032
+ self.assertEqual(regex.match('ab', 'a', partial=True).partial, True)
3033
+ self.assertEqual(regex.match('ab', 'a', partial=True).span(), (0, 1))
3034
+ self.assertEqual(regex.match(r'cats', 'cat', partial=True).partial,
3035
+ True)
3036
+ self.assertEqual(regex.match(r'cats', 'cat', partial=True).span(), (0,
3037
+ 3))
3038
+ self.assertEqual(regex.match(r'cats', 'catch', partial=True), None)
3039
+ self.assertEqual(regex.match(r'abc\w{3}', 'abcdef',
3040
+ partial=True).partial, False)
3041
+ self.assertEqual(regex.match(r'abc\w{3}', 'abcdef',
3042
+ partial=True).span(), (0, 6))
3043
+ self.assertEqual(regex.match(r'abc\w{3}', 'abcde',
3044
+ partial=True).partial, True)
3045
+ self.assertEqual(regex.match(r'abc\w{3}', 'abcde',
3046
+ partial=True).span(), (0, 5))
3047
+
3048
+ self.assertEqual(regex.match(r'\d{4}$', '1234', partial=True).partial,
3049
+ False)
3050
+
3051
+ self.assertEqual(regex.match(r'\L<words>', 'post', partial=True,
3052
+ words=['post']).partial, False)
3053
+ self.assertEqual(regex.match(r'\L<words>', 'post', partial=True,
3054
+ words=['post']).span(), (0, 4))
3055
+ self.assertEqual(regex.match(r'\L<words>', 'pos', partial=True,
3056
+ words=['post']).partial, True)
3057
+ self.assertEqual(regex.match(r'\L<words>', 'pos', partial=True,
3058
+ words=['post']).span(), (0, 3))
3059
+
3060
+ self.assertEqual(regex.match(r'(?fi)\L<words>', 'POST', partial=True,
3061
+ words=['po\uFB06']).partial, False)
3062
+ self.assertEqual(regex.match(r'(?fi)\L<words>', 'POST', partial=True,
3063
+ words=['po\uFB06']).span(), (0, 4))
3064
+ self.assertEqual(regex.match(r'(?fi)\L<words>', 'POS', partial=True,
3065
+ words=['po\uFB06']).partial, True)
3066
+ self.assertEqual(regex.match(r'(?fi)\L<words>', 'POS', partial=True,
3067
+ words=['po\uFB06']).span(), (0, 3))
3068
+ self.assertEqual(regex.match(r'(?fi)\L<words>', 'po\uFB06',
3069
+ partial=True, words=['POS']), None)
3070
+
3071
+ self.assertEqual(regex.match(r'[a-z]*4R$', 'a', partial=True).span(),
3072
+ (0, 1))
3073
+ self.assertEqual(regex.match(r'[a-z]*4R$', 'ab', partial=True).span(),
3074
+ (0, 2))
3075
+ self.assertEqual(regex.match(r'[a-z]*4R$', 'ab4', partial=True).span(),
3076
+ (0, 3))
3077
+ self.assertEqual(regex.match(r'[a-z]*4R$', 'a4', partial=True).span(),
3078
+ (0, 2))
3079
+ self.assertEqual(regex.match(r'[a-z]*4R$', 'a4R', partial=True).span(),
3080
+ (0, 3))
3081
+ self.assertEqual(regex.match(r'[a-z]*4R$', '4a', partial=True), None)
3082
+ self.assertEqual(regex.match(r'[a-z]*4R$', 'a44', partial=True), None)
3083
+
3084
+ def test_hg_bugs(self):
3085
+ # Hg issue 28: regex.compile("(?>b)") causes "TypeError: 'Character'
3086
+ # object is not subscriptable"
3087
+ self.assertEqual(bool(regex.compile("(?>b)", flags=regex.V1)), True)
3088
+
3089
+ # Hg issue 29: regex.compile("^((?>\w+)|(?>\s+))*$") causes
3090
+ # "TypeError: 'GreedyRepeat' object is not iterable"
3091
+ self.assertEqual(bool(regex.compile(r"^((?>\w+)|(?>\s+))*$",
3092
+ flags=regex.V1)), True)
3093
+
3094
+ # Hg issue 31: atomic and normal groups in recursive patterns
3095
+ self.assertEqual(regex.findall(r"\((?:(?>[^()]+)|(?R))*\)",
3096
+ "a(bcd(e)f)g(h)"), ['(bcd(e)f)', '(h)'])
3097
+ self.assertEqual(regex.findall(r"\((?:(?:[^()]+)|(?R))*\)",
3098
+ "a(bcd(e)f)g(h)"), ['(bcd(e)f)', '(h)'])
3099
+ self.assertEqual(regex.findall(r"\((?:(?>[^()]+)|(?R))*\)",
3100
+ "a(b(cd)e)f)g)h"), ['(b(cd)e)'])
3101
+ self.assertEqual(regex.findall(r"\((?:(?>[^()]+)|(?R))*\)",
3102
+ "a(bc(d(e)f)gh"), ['(d(e)f)'])
3103
+ self.assertEqual(regex.findall(r"(?r)\((?:(?>[^()]+)|(?R))*\)",
3104
+ "a(bc(d(e)f)gh"), ['(d(e)f)'])
3105
+ self.assertEqual([m.group() for m in
3106
+ regex.finditer(r"\((?:[^()]*+|(?0))*\)", "a(b(c(de)fg)h")],
3107
+ ['(c(de)fg)'])
3108
+
3109
+ # Hg issue 32: regex.search("a(bc)d", "abcd", regex.I|regex.V1) returns
3110
+ # None
3111
+ self.assertEqual(regex.search("a(bc)d", "abcd", regex.I |
3112
+ regex.V1).group(0), "abcd")
3113
+
3114
+ # Hg issue 33: regex.search("([\da-f:]+)$", "E", regex.I|regex.V1)
3115
+ # returns None
3116
+ self.assertEqual(regex.search(r"([\da-f:]+)$", "E", regex.I |
3117
+ regex.V1).group(0), "E")
3118
+ self.assertEqual(regex.search(r"([\da-f:]+)$", "e", regex.I |
3119
+ regex.V1).group(0), "e")
3120
+
3121
+ # Hg issue 34: regex.search("^(?=ab(de))(abd)(e)", "abde").groups()
3122
+ # returns (None, 'abd', 'e') instead of ('de', 'abd', 'e')
3123
+ self.assertEqual(regex.search("^(?=ab(de))(abd)(e)", "abde").groups(),
3124
+ ('de', 'abd', 'e'))
3125
+
3126
+ # Hg issue 35: regex.compile("\ ", regex.X) causes "_regex_core.error:
3127
+ # bad escape"
3128
+ self.assertEqual(bool(regex.match(r"\ ", " ", flags=regex.X)), True)
3129
+
3130
+ # Hg issue 36: regex.search("^(a|)\1{2}b", "b") returns None
3131
+ self.assertEqual(regex.search(r"^(a|)\1{2}b", "b").group(0, 1), ('b',
3132
+ ''))
3133
+
3134
+ # Hg issue 37: regex.search("^(a){0,0}", "abc").group(0,1) returns
3135
+ # ('a', 'a') instead of ('', None)
3136
+ self.assertEqual(regex.search("^(a){0,0}", "abc").group(0, 1), ('',
3137
+ None))
3138
+
3139
+ # Hg issue 38: regex.search("(?>.*/)b", "a/b") returns None
3140
+ self.assertEqual(regex.search("(?>.*/)b", "a/b").group(0), "a/b")
3141
+
3142
+ # Hg issue 39: regex.search("((?i)blah)\\s+\\1", "blah BLAH") doesn't
3143
+ # return None
3144
+ # Changed to positional flags in regex 2023.12.23.
3145
+ self.assertEqual(regex.search(r"((?i)blah)\s+\1", "blah BLAH"), None)
3146
+
3147
+ # Hg issue 40: regex.search("(\()?[^()]+(?(1)\)|)", "(abcd").group(0)
3148
+ # returns "bcd" instead of "abcd"
3149
+ self.assertEqual(regex.search(r"(\()?[^()]+(?(1)\)|)",
3150
+ "(abcd").group(0), "abcd")
3151
+
3152
+ # Hg issue 42: regex.search("(a*)*", "a", flags=regex.V1).span(1)
3153
+ # returns (0, 1) instead of (1, 1)
3154
+ self.assertEqual(regex.search("(a*)*", "a").span(1), (1, 1))
3155
+ self.assertEqual(regex.search("(a*)*", "aa").span(1), (2, 2))
3156
+ self.assertEqual(regex.search("(a*)*", "aaa").span(1), (3, 3))
3157
+
3158
+ # Hg issue 43: regex.compile("a(?#xxx)*") causes "_regex_core.error:
3159
+ # nothing to repeat"
3160
+ self.assertEqual(regex.search("a(?#xxx)*", "aaa").group(), "aaa")
3161
+
3162
+ # Hg issue 44: regex.compile("(?=abc){3}abc") causes
3163
+ # "_regex_core.error: nothing to repeat"
3164
+ self.assertEqual(regex.search("(?=abc){3}abc", "abcabcabc").span(), (0,
3165
+ 3))
3166
+
3167
+ # Hg issue 45: regex.compile("^(?:a(?:(?:))+)+") causes
3168
+ # "_regex_core.error: nothing to repeat"
3169
+ self.assertEqual(regex.search("^(?:a(?:(?:))+)+", "a").span(), (0, 1))
3170
+ self.assertEqual(regex.search("^(?:a(?:(?:))+)+", "aa").span(), (0, 2))
3171
+
3172
+ # Hg issue 46: regex.compile("a(?x: b c )d") causes
3173
+ # "_regex_core.error: missing )"
3174
+ self.assertEqual(regex.search("a(?x: b c )d", "abcd").group(0), "abcd")
3175
+
3176
+ # Hg issue 47: regex.compile("a#comment\n*", flags=regex.X) causes
3177
+ # "_regex_core.error: nothing to repeat"
3178
+ self.assertEqual(regex.search("a#comment\n*", "aaa",
3179
+ flags=regex.X).group(0), "aaa")
3180
+
3181
+ # Hg issue 48: regex.search("(a(?(1)\\1)){4}", "a"*10,
3182
+ # flags=regex.V1).group(0,1) returns ('aaaaa', 'a') instead of ('aaaaaaaaaa', 'aaaa')
3183
+ self.assertEqual(regex.search(r"(?V1)(a(?(1)\1)){1}",
3184
+ "aaaaaaaaaa").span(0, 1), ((0, 1), (0, 1)))
3185
+ self.assertEqual(regex.search(r"(?V1)(a(?(1)\1)){2}",
3186
+ "aaaaaaaaaa").span(0, 1), ((0, 3), (1, 3)))
3187
+ self.assertEqual(regex.search(r"(?V1)(a(?(1)\1)){3}",
3188
+ "aaaaaaaaaa").span(0, 1), ((0, 6), (3, 6)))
3189
+ self.assertEqual(regex.search(r"(?V1)(a(?(1)\1)){4}",
3190
+ "aaaaaaaaaa").span(0, 1), ((0, 10), (6, 10)))
3191
+
3192
+ # Hg issue 49: regex.search("(a)(?<=b(?1))", "baz", regex.V1) returns
3193
+ # None incorrectly
3194
+ self.assertEqual(regex.search("(?V1)(a)(?<=b(?1))", "baz").group(0),
3195
+ "a")
3196
+
3197
+ # Hg issue 50: not all keywords are found by named list with
3198
+ # overlapping keywords when full Unicode casefolding is required
3199
+ self.assertEqual(regex.findall(r'(?fi)\L<keywords>',
3200
+ 'POST, Post, post, po\u017Ft, po\uFB06, and po\uFB05',
3201
+ keywords=['post','pos']), ['POST', 'Post', 'post', 'po\u017Ft',
3202
+ 'po\uFB06', 'po\uFB05'])
3203
+ self.assertEqual(regex.findall(r'(?fi)pos|post',
3204
+ 'POST, Post, post, po\u017Ft, po\uFB06, and po\uFB05'), ['POS',
3205
+ 'Pos', 'pos', 'po\u017F', 'po\uFB06', 'po\uFB05'])
3206
+ self.assertEqual(regex.findall(r'(?fi)post|pos',
3207
+ 'POST, Post, post, po\u017Ft, po\uFB06, and po\uFB05'), ['POST',
3208
+ 'Post', 'post', 'po\u017Ft', 'po\uFB06', 'po\uFB05'])
3209
+ self.assertEqual(regex.findall(r'(?fi)post|another',
3210
+ 'POST, Post, post, po\u017Ft, po\uFB06, and po\uFB05'), ['POST',
3211
+ 'Post', 'post', 'po\u017Ft', 'po\uFB06', 'po\uFB05'])
3212
+
3213
+ # Hg issue 51: regex.search("((a)(?1)|(?2))", "a", flags=regex.V1)
3214
+ # returns None incorrectly
3215
+ self.assertEqual(regex.search("(?V1)((a)(?1)|(?2))", "a").group(0, 1,
3216
+ 2), ('a', 'a', None))
3217
+
3218
+ # Hg issue 52: regex.search("(\\1xx|){6}", "xx",
3219
+ # flags=regex.V1).span(0,1) returns incorrect value
3220
+ self.assertEqual(regex.search(r"(?V1)(\1xx|){6}", "xx").span(0, 1),
3221
+ ((0, 2), (2, 2)))
3222
+
3223
+ # Hg issue 53: regex.search("(a|)+", "a") causes MemoryError
3224
+ self.assertEqual(regex.search("(a|)+", "a").group(0, 1), ("a", ""))
3225
+
3226
+ # Hg issue 54: regex.search("(a|)*\\d", "a"*80) causes MemoryError
3227
+ self.assertEqual(regex.search(r"(a|)*\d", "a" * 80), None)
3228
+
3229
+ # Hg issue 55: regex.search("^(?:a?b?)*$", "ac") take a very long time.
3230
+ self.assertEqual(regex.search("^(?:a?b?)*$", "ac"), None)
3231
+
3232
+ # Hg issue 58: bad named character escape sequences like "\\N{1}"
3233
+ # treats as "N"
3234
+ self.assertRaisesRegex(regex.error, self.UNDEF_CHAR_NAME, lambda:
3235
+ regex.compile("\\N{1}"))
3236
+
3237
+ # Hg issue 59: regex.search("\\Z", "a\na\n") returns None incorrectly
3238
+ self.assertEqual(regex.search("\\Z", "a\na\n").span(0), (4, 4))
3239
+
3240
+ # Hg issue 60: regex.search("(q1|.)*(q2|.)*(x(a|bc)*y){2,}", "xayxay")
3241
+ # returns None incorrectly
3242
+ self.assertEqual(regex.search("(q1|.)*(q2|.)*(x(a|bc)*y){2,}",
3243
+ "xayxay").group(0), "xayxay")
3244
+
3245
+ # Hg issue 61: regex.search("[^a]", "A", regex.I).group(0) returns ''
3246
+ # incorrectly
3247
+ self.assertEqual(regex.search("(?i)[^a]", "A"), None)
3248
+
3249
+ # Hg issue 63: regex.search("[[:ascii:]]", "\N{KELVIN SIGN}",
3250
+ # flags=regex.I|regex.V1) doesn't return None
3251
+ self.assertEqual(regex.search("(?i)[[:ascii:]]", "\N{KELVIN SIGN}"),
3252
+ None)
3253
+
3254
+ # Hg issue 66: regex.search("((a|b(?1)c){3,5})", "baaaaca",
3255
+ # flags=regex.V1).groups() returns ('baaaac', 'baaaac') instead of ('aaaa', 'a')
3256
+ self.assertEqual(regex.search("((a|b(?1)c){3,5})", "baaaaca").group(0,
3257
+ 1, 2), ('aaaa', 'aaaa', 'a'))
3258
+
3259
+ # Hg issue 71: non-greedy quantifier in lookbehind
3260
+ self.assertEqual(regex.findall(r"(?<=:\S+ )\w+", ":9 abc :10 def"),
3261
+ ['abc', 'def'])
3262
+ self.assertEqual(regex.findall(r"(?<=:\S* )\w+", ":9 abc :10 def"),
3263
+ ['abc', 'def'])
3264
+ self.assertEqual(regex.findall(r"(?<=:\S+? )\w+", ":9 abc :10 def"),
3265
+ ['abc', 'def'])
3266
+ self.assertEqual(regex.findall(r"(?<=:\S*? )\w+", ":9 abc :10 def"),
3267
+ ['abc', 'def'])
3268
+
3269
+ # Hg issue 73: conditional patterns
3270
+ self.assertEqual(regex.search(r"(?:fe)?male", "female").group(),
3271
+ "female")
3272
+ self.assertEqual([m.group() for m in
3273
+ regex.finditer(r"(fe)?male: h(?(1)(er)|(is)) (\w+)",
3274
+ "female: her dog; male: his cat. asdsasda")], ['female: her dog',
3275
+ 'male: his cat'])
3276
+
3277
+ # Hg issue 78: "Captures" doesn't work for recursive calls
3278
+ self.assertEqual(regex.search(r'(?<rec>\((?:[^()]++|(?&rec))*\))',
3279
+ 'aaa(((1+0)+1)+1)bbb').captures('rec'), ['(1+0)', '((1+0)+1)',
3280
+ '(((1+0)+1)+1)'])
3281
+
3282
+ # Hg issue 80: Escape characters throws an exception
3283
+ self.assertRaisesRegex(regex.error, self.BAD_ESCAPE, lambda:
3284
+ regex.sub('x', '\\', 'x'), )
3285
+
3286
+ # Hg issue 82: error range does not work
3287
+ fz = "(CAGCCTCCCATTTCAGAATATACATCC){1<e<=2}"
3288
+ seq = "tcagacgagtgcgttgtaaaacgacggccagtCAGCCTCCCATTCAGAATATACATCCcgacggccagttaaaaacaatgccaaggaggtcatagctgtttcctgccagttaaaaacaatgccaaggaggtcatagctgtttcctgacgcactcgtctgagcgggctggcaagg"
3289
+ self.assertEqual(regex.search(fz, seq, regex.BESTMATCH)[0],
3290
+ "tCAGCCTCCCATTCAGAATATACATCC")
3291
+
3292
+ # Hg issue 83: slash handling in presence of a quantifier
3293
+ self.assertEqual(regex.findall(r"c..+/c", "cA/c\ncAb/c"), ['cAb/c'])
3294
+
3295
+ # Hg issue 85: Non-conformance to Unicode UAX#29 re: ZWJ / ZWNJ
3296
+ self.assertEqual(ascii(regex.sub(r"(\w+)", r"[\1]",
3297
+ '\u0905\u0928\u094d\u200d\u0928 \u0d28\u0d4d\u200d \u0915\u093f\u0928',
3298
+ regex.WORD)),
3299
+ ascii('[\u0905\u0928\u094d\u200d\u0928] [\u0d28\u0d4d\u200d] [\u0915\u093f\u0928]'))
3300
+
3301
+ # Hg issue 88: regex.match() hangs
3302
+ self.assertEqual(regex.match(r".*a.*ba.*aa", "ababba"), None)
3303
+
3304
+ # Hg issue 87: Allow duplicate names of groups
3305
+ self.assertEqual(regex.match(r'(?<x>a(?<x>b))', "ab").spans("x"), [(1,
3306
+ 2), (0, 2)])
3307
+
3308
+ # Hg issue 91: match.expand is extremely slow
3309
+ # Check that the replacement cache works.
3310
+ self.assertEqual(regex.sub(r'(-)', lambda m: m.expand(r'x'), 'a-b-c'),
3311
+ 'axbxc')
3312
+
3313
+ # Hg issue 94: Python crashes when executing regex updates
3314
+ # pattern.findall
3315
+ rx = regex.compile(r'\bt(est){i<2}', flags=regex.V1)
3316
+ self.assertEqual(rx.search("Some text"), None)
3317
+ self.assertEqual(rx.findall("Some text"), [])
3318
+
3319
+ # Hg issue 95: 'pos' for regex.error
3320
+ self.assertRaisesRegex(regex.error, self.MULTIPLE_REPEAT, lambda:
3321
+ regex.compile(r'.???'))
3322
+
3323
+ # Hg issue 97: behaviour of regex.escape's special_only is wrong
3324
+ #
3325
+ # Hg issue 244: Make `special_only=True` the default in
3326
+ # `regex.escape()`
3327
+ self.assertEqual(regex.escape('foo!?', special_only=False), 'foo\\!\\?')
3328
+ self.assertEqual(regex.escape('foo!?', special_only=True), 'foo!\\?')
3329
+ self.assertEqual(regex.escape('foo!?'), 'foo!\\?')
3330
+
3331
+ self.assertEqual(regex.escape(b'foo!?', special_only=False), b'foo\\!\\?')
3332
+ self.assertEqual(regex.escape(b'foo!?', special_only=True),
3333
+ b'foo!\\?')
3334
+ self.assertEqual(regex.escape(b'foo!?'), b'foo!\\?')
3335
+
3336
+ # Hg issue 100: strange results from regex.search
3337
+ self.assertEqual(regex.search('^([^z]*(?:WWWi|W))?$',
3338
+ 'WWWi').groups(), ('WWWi', ))
3339
+ self.assertEqual(regex.search('^([^z]*(?:WWWi|w))?$',
3340
+ 'WWWi').groups(), ('WWWi', ))
3341
+ self.assertEqual(regex.search('^([^z]*?(?:WWWi|W))?$',
3342
+ 'WWWi').groups(), ('WWWi', ))
3343
+
3344
+ # Hg issue 101: findall() broken (seems like memory corruption)
3345
+ pat = regex.compile(r'xxx', flags=regex.FULLCASE | regex.UNICODE)
3346
+ self.assertEqual([x.group() for x in pat.finditer('yxxx')], ['xxx'])
3347
+ self.assertEqual(pat.findall('yxxx'), ['xxx'])
3348
+
3349
+ raw = 'yxxx'
3350
+ self.assertEqual([x.group() for x in pat.finditer(raw)], ['xxx'])
3351
+ self.assertEqual(pat.findall(raw), ['xxx'])
3352
+
3353
+ pat = regex.compile(r'xxx', flags=regex.FULLCASE | regex.IGNORECASE |
3354
+ regex.UNICODE)
3355
+ self.assertEqual([x.group() for x in pat.finditer('yxxx')], ['xxx'])
3356
+ self.assertEqual(pat.findall('yxxx'), ['xxx'])
3357
+
3358
+ raw = 'yxxx'
3359
+ self.assertEqual([x.group() for x in pat.finditer(raw)], ['xxx'])
3360
+ self.assertEqual(pat.findall(raw), ['xxx'])
3361
+
3362
+ # Hg issue 106: * operator not working correctly with sub()
3363
+ if sys.version_info >= (3, 7, 0):
3364
+ self.assertEqual(regex.sub('(?V0).*', 'x', 'test'), 'xx')
3365
+ else:
3366
+ self.assertEqual(regex.sub('(?V0).*', 'x', 'test'), 'x')
3367
+ self.assertEqual(regex.sub('(?V1).*', 'x', 'test'), 'xx')
3368
+
3369
+ if sys.version_info >= (3, 7, 0):
3370
+ self.assertEqual(regex.sub('(?V0).*?', '|', 'test'), '|||||||||')
3371
+ else:
3372
+ self.assertEqual(regex.sub('(?V0).*?', '|', 'test'), '|t|e|s|t|')
3373
+ self.assertEqual(regex.sub('(?V1).*?', '|', 'test'), '|||||||||')
3374
+
3375
+ # Hg issue 112: re: OK, but regex: SystemError
3376
+ self.assertEqual(regex.sub(r'^(@)\n(?!.*?@)(.*)',
3377
+ r'\1\n==========\n\2', '@\n', flags=regex.DOTALL), '@\n==========\n')
3378
+
3379
+ # Hg issue 109: Edit distance of fuzzy match
3380
+ self.assertEqual(regex.match(r'(?:cats|cat){e<=1}',
3381
+ 'caz').fuzzy_counts, (1, 0, 0))
3382
+ self.assertEqual(regex.match(r'(?e)(?:cats|cat){e<=1}',
3383
+ 'caz').fuzzy_counts, (1, 0, 0))
3384
+ self.assertEqual(regex.match(r'(?b)(?:cats|cat){e<=1}',
3385
+ 'caz').fuzzy_counts, (1, 0, 0))
3386
+
3387
+ self.assertEqual(regex.match(r'(?:cat){e<=1}', 'caz').fuzzy_counts,
3388
+ (1, 0, 0))
3389
+ self.assertEqual(regex.match(r'(?e)(?:cat){e<=1}',
3390
+ 'caz').fuzzy_counts, (1, 0, 0))
3391
+ self.assertEqual(regex.match(r'(?b)(?:cat){e<=1}',
3392
+ 'caz').fuzzy_counts, (1, 0, 0))
3393
+
3394
+ self.assertEqual(regex.match(r'(?:cats){e<=2}', 'c ats').fuzzy_counts,
3395
+ (1, 1, 0))
3396
+ self.assertEqual(regex.match(r'(?e)(?:cats){e<=2}',
3397
+ 'c ats').fuzzy_counts, (0, 1, 0))
3398
+ self.assertEqual(regex.match(r'(?b)(?:cats){e<=2}',
3399
+ 'c ats').fuzzy_counts, (0, 1, 0))
3400
+
3401
+ self.assertEqual(regex.match(r'(?:cats){e<=2}',
3402
+ 'c a ts').fuzzy_counts, (0, 2, 0))
3403
+ self.assertEqual(regex.match(r'(?e)(?:cats){e<=2}',
3404
+ 'c a ts').fuzzy_counts, (0, 2, 0))
3405
+ self.assertEqual(regex.match(r'(?b)(?:cats){e<=2}',
3406
+ 'c a ts').fuzzy_counts, (0, 2, 0))
3407
+
3408
+ self.assertEqual(regex.match(r'(?:cats){e<=1}', 'c ats').fuzzy_counts,
3409
+ (0, 1, 0))
3410
+ self.assertEqual(regex.match(r'(?e)(?:cats){e<=1}',
3411
+ 'c ats').fuzzy_counts, (0, 1, 0))
3412
+ self.assertEqual(regex.match(r'(?b)(?:cats){e<=1}',
3413
+ 'c ats').fuzzy_counts, (0, 1, 0))
3414
+
3415
+ # Hg issue 115: Infinite loop when processing backreferences
3416
+ self.assertEqual(regex.findall(r'\bof ([a-z]+) of \1\b',
3417
+ 'To make use of one of these modules'), [])
3418
+
3419
+ # Hg issue 125: Reference to entire match (\g&lt;0&gt;) in
3420
+ # Pattern.sub() doesn't work as of 2014.09.22 release.
3421
+ self.assertEqual(regex.sub(r'x', r'\g<0>', 'x'), 'x')
3422
+
3423
+ # Unreported issue: no such builtin as 'ascii' in Python 2.
3424
+ self.assertEqual(bool(regex.match(r'a', 'a', regex.DEBUG)), True)
3425
+
3426
+ # Hg issue 131: nested sets behaviour
3427
+ self.assertEqual(regex.findall(r'(?V1)[[b-e]--cd]', 'abcdef'), ['b',
3428
+ 'e'])
3429
+ self.assertEqual(regex.findall(r'(?V1)[b-e--cd]', 'abcdef'), ['b',
3430
+ 'e'])
3431
+ self.assertEqual(regex.findall(r'(?V1)[[bcde]--cd]', 'abcdef'), ['b',
3432
+ 'e'])
3433
+ self.assertEqual(regex.findall(r'(?V1)[bcde--cd]', 'abcdef'), ['b',
3434
+ 'e'])
3435
+
3436
+ # Hg issue 132: index out of range on null property \p{}
3437
+ self.assertRaisesRegex(regex.error, '^unknown property at position 4$',
3438
+ lambda: regex.compile(r'\p{}'))
3439
+
3440
+ # Issue 23692.
3441
+ self.assertEqual(regex.match('(?:()|(?(1)()|z)){2}(?(2)a|z)',
3442
+ 'a').group(0, 1, 2), ('a', '', ''))
3443
+ self.assertEqual(regex.match('(?:()|(?(1)()|z)){0,2}(?(2)a|z)',
3444
+ 'a').group(0, 1, 2), ('a', '', ''))
3445
+
3446
+ # Hg issue 137: Posix character class :punct: does not seem to be
3447
+ # supported.
3448
+
3449
+ # Posix compatibility as recommended here:
3450
+ # http://www.unicode.org/reports/tr18/#Compatibility_Properties
3451
+
3452
+ # Posix in Unicode.
3453
+ chars = ''.join(chr(c) for c in range(0x10000))
3454
+
3455
+ self.assertEqual(ascii(''.join(regex.findall(r'''[[:alnum:]]+''',
3456
+ chars))), ascii(''.join(regex.findall(r'''[\p{Alpha}\p{PosixDigit}]+''',
3457
+ chars))))
3458
+ self.assertEqual(ascii(''.join(regex.findall(r'''[[:alpha:]]+''',
3459
+ chars))), ascii(''.join(regex.findall(r'''\p{Alpha}+''',
3460
+ chars))))
3461
+ self.assertEqual(ascii(''.join(regex.findall(r'''[[:ascii:]]+''',
3462
+ chars))), ascii(''.join(regex.findall(r'''[\p{InBasicLatin}]+''',
3463
+ chars))))
3464
+ self.assertEqual(ascii(''.join(regex.findall(r'''[[:blank:]]+''',
3465
+ chars))), ascii(''.join(regex.findall(r'''[\p{gc=Space_Separator}\t]+''',
3466
+ chars))))
3467
+ self.assertEqual(ascii(''.join(regex.findall(r'''[[:cntrl:]]+''',
3468
+ chars))), ascii(''.join(regex.findall(r'''\p{gc=Control}+''', chars))))
3469
+ self.assertEqual(ascii(''.join(regex.findall(r'''[[:digit:]]+''',
3470
+ chars))), ascii(''.join(regex.findall(r'''[0-9]+''', chars))))
3471
+ self.assertEqual(ascii(''.join(regex.findall(r'''[[:graph:]]+''',
3472
+ chars))), ascii(''.join(regex.findall(r'''[^\p{Space}\p{gc=Control}\p{gc=Surrogate}\p{gc=Unassigned}]+''',
3473
+ chars))))
3474
+ self.assertEqual(ascii(''.join(regex.findall(r'''[[:lower:]]+''',
3475
+ chars))), ascii(''.join(regex.findall(r'''\p{Lower}+''',
3476
+ chars))))
3477
+ self.assertEqual(ascii(''.join(regex.findall(r'''[[:print:]]+''',
3478
+ chars))), ascii(''.join(regex.findall(r'''(?V1)[\p{Graph}\p{Blank}--\p{Cntrl}]+''', chars))))
3479
+ self.assertEqual(ascii(''.join(regex.findall(r'''[[:punct:]]+''',
3480
+ chars))),
3481
+ ascii(''.join(regex.findall(r'''(?V1)[\p{gc=Punctuation}\p{gc=Symbol}--\p{Alpha}]+''',
3482
+ chars))))
3483
+ self.assertEqual(ascii(''.join(regex.findall(r'''[[:space:]]+''',
3484
+ chars))), ascii(''.join(regex.findall(r'''\p{Whitespace}+''',
3485
+ chars))))
3486
+ self.assertEqual(ascii(''.join(regex.findall(r'''[[:upper:]]+''',
3487
+ chars))), ascii(''.join(regex.findall(r'''\p{Upper}+''',
3488
+ chars))))
3489
+ self.assertEqual(ascii(''.join(regex.findall(r'''[[:word:]]+''',
3490
+ chars))), ascii(''.join(regex.findall(r'''[\p{Alpha}\p{gc=Mark}\p{Digit}\p{gc=Connector_Punctuation}\p{Join_Control}]+''',
3491
+ chars))))
3492
+ self.assertEqual(ascii(''.join(regex.findall(r'''[[:xdigit:]]+''',
3493
+ chars))), ascii(''.join(regex.findall(r'''[0-9A-Fa-f]+''',
3494
+ chars))))
3495
+
3496
+ # Posix in ASCII.
3497
+ chars = bytes(range(0x100))
3498
+
3499
+ self.assertEqual(ascii(b''.join(regex.findall(br'''(?a)[[:alnum:]]+''',
3500
+ chars))), ascii(b''.join(regex.findall(br'''(?a)[\p{Alpha}\p{PosixDigit}]+''',
3501
+ chars))))
3502
+ self.assertEqual(ascii(b''.join(regex.findall(br'''(?a)[[:alpha:]]+''',
3503
+ chars))), ascii(b''.join(regex.findall(br'''(?a)\p{Alpha}+''', chars))))
3504
+ self.assertEqual(ascii(b''.join(regex.findall(br'''(?a)[[:ascii:]]+''',
3505
+ chars))), ascii(b''.join(regex.findall(br'''(?a)[\x00-\x7F]+''', chars))))
3506
+ self.assertEqual(ascii(b''.join(regex.findall(br'''(?a)[[:blank:]]+''',
3507
+ chars))), ascii(b''.join(regex.findall(br'''(?a)[\p{gc=Space_Separator}\t]+''',
3508
+ chars))))
3509
+ self.assertEqual(ascii(b''.join(regex.findall(br'''(?a)[[:cntrl:]]+''',
3510
+ chars))), ascii(b''.join(regex.findall(br'''(?a)\p{gc=Control}+''',
3511
+ chars))))
3512
+ self.assertEqual(ascii(b''.join(regex.findall(br'''(?a)[[:digit:]]+''',
3513
+ chars))), ascii(b''.join(regex.findall(br'''(?a)[0-9]+''', chars))))
3514
+ self.assertEqual(ascii(b''.join(regex.findall(br'''(?a)[[:graph:]]+''',
3515
+ chars))), ascii(b''.join(regex.findall(br'''(?a)[^\p{Space}\p{gc=Control}\p{gc=Surrogate}\p{gc=Unassigned}]+''', chars))))
3516
+ self.assertEqual(ascii(b''.join(regex.findall(br'''(?a)[[:lower:]]+''',
3517
+ chars))), ascii(b''.join(regex.findall(br'''(?a)\p{Lower}+''', chars))))
3518
+ self.assertEqual(ascii(b''.join(regex.findall(br'''(?a)[[:print:]]+''',
3519
+ chars))), ascii(b''.join(regex.findall(br'''(?aV1)[\p{Graph}\p{Blank}--\p{Cntrl}]+''', chars))))
3520
+ self.assertEqual(ascii(b''.join(regex.findall(br'''(?a)[[:punct:]]+''',
3521
+ chars))), ascii(b''.join(regex.findall(br'''(?aV1)[\p{gc=Punctuation}\p{gc=Symbol}--\p{Alpha}]+''',
3522
+ chars))))
3523
+ self.assertEqual(ascii(b''.join(regex.findall(br'''(?a)[[:space:]]+''',
3524
+ chars))), ascii(b''.join(regex.findall(br'''(?a)\p{Whitespace}+''', chars))))
3525
+ self.assertEqual(ascii(b''.join(regex.findall(br'''(?a)[[:upper:]]+''',
3526
+ chars))), ascii(b''.join(regex.findall(br'''(?a)\p{Upper}+''', chars))))
3527
+ self.assertEqual(ascii(b''.join(regex.findall(br'''(?a)[[:word:]]+''',
3528
+ chars))), ascii(b''.join(regex.findall(br'''(?a)[\p{Alpha}\p{gc=Mark}\p{Digit}\p{gc=Connector_Punctuation}\p{Join_Control}]+''', chars))))
3529
+ self.assertEqual(ascii(b''.join(regex.findall(br'''(?a)[[:xdigit:]]+''',
3530
+ chars))), ascii(b''.join(regex.findall(br'''(?a)[0-9A-Fa-f]+''', chars))))
3531
+
3532
+ # Hg issue 138: grapheme anchored search not working properly.
3533
+ self.assertEqual(ascii(regex.search(r'\X$', 'ab\u2103').group()),
3534
+ ascii('\u2103'))
3535
+
3536
+ # Hg issue 139: Regular expression with multiple wildcards where first
3537
+ # should match empty string does not always work.
3538
+ self.assertEqual(regex.search("([^L]*)([^R]*R)", "LtR").groups(), ('',
3539
+ 'LtR'))
3540
+
3541
+ # Hg issue 140: Replace with REVERSE and groups has unexpected
3542
+ # behavior.
3543
+ self.assertEqual(regex.sub(r'(.)', r'x\1y', 'ab'), 'xayxby')
3544
+ self.assertEqual(regex.sub(r'(?r)(.)', r'x\1y', 'ab'), 'xayxby')
3545
+ self.assertEqual(regex.subf(r'(.)', 'x{1}y', 'ab'), 'xayxby')
3546
+ self.assertEqual(regex.subf(r'(?r)(.)', 'x{1}y', 'ab'), 'xayxby')
3547
+
3548
+ # Hg issue 141: Crash on a certain partial match.
3549
+ self.assertEqual(regex.fullmatch('(a)*abc', 'ab',
3550
+ partial=True).span(), (0, 2))
3551
+ self.assertEqual(regex.fullmatch('(a)*abc', 'ab',
3552
+ partial=True).partial, True)
3553
+
3554
+ # Hg issue 143: Partial matches have incorrect span if prefix is '.'
3555
+ # wildcard.
3556
+ self.assertEqual(regex.search('OXRG', 'OOGOX', partial=True).span(),
3557
+ (3, 5))
3558
+ self.assertEqual(regex.search('.XRG', 'OOGOX', partial=True).span(),
3559
+ (3, 5))
3560
+ self.assertEqual(regex.search('.{1,3}XRG', 'OOGOX',
3561
+ partial=True).span(), (1, 5))
3562
+
3563
+ # Hg issue 144: Latest version problem with matching 'R|R'.
3564
+ self.assertEqual(regex.match('R|R', 'R').span(), (0, 1))
3565
+
3566
+ # Hg issue 146: Forced-fail (?!) works improperly in conditional.
3567
+ self.assertEqual(regex.match(r'(.)(?(1)(?!))', 'xy'), None)
3568
+
3569
+ # Groups cleared after failure.
3570
+ self.assertEqual(regex.findall(r'(y)?(\d)(?(1)\b\B)', 'ax1y2z3b'),
3571
+ [('', '1'), ('', '2'), ('', '3')])
3572
+ self.assertEqual(regex.findall(r'(y)?+(\d)(?(1)\b\B)', 'ax1y2z3b'),
3573
+ [('', '1'), ('', '2'), ('', '3')])
3574
+
3575
+ # Hg issue 147: Fuzzy match can return match points beyond buffer end.
3576
+ self.assertEqual([m.span() for m in regex.finditer(r'(?i)(?:error){e}',
3577
+ 'regex failure')], [(0, 5), (5, 10), (10, 13), (13, 13)])
3578
+ self.assertEqual([m.span() for m in
3579
+ regex.finditer(r'(?fi)(?:error){e}', 'regex failure')], [(0, 5), (5,
3580
+ 10), (10, 13), (13, 13)])
3581
+
3582
+ # Hg issue 150: Have an option for POSIX-compatible longest match of
3583
+ # alternates.
3584
+ self.assertEqual(regex.search(r'(?p)\d+(\w(\d*)?|[eE]([+-]\d+))',
3585
+ '10b12')[0], '10b12')
3586
+ self.assertEqual(regex.search(r'(?p)\d+(\w(\d*)?|[eE]([+-]\d+))',
3587
+ '10E+12')[0], '10E+12')
3588
+
3589
+ self.assertEqual(regex.search(r'(?p)(\w|ae|oe|ue|ss)', 'ae')[0], 'ae')
3590
+ self.assertEqual(regex.search(r'(?p)one(self)?(selfsufficient)?',
3591
+ 'oneselfsufficient')[0], 'oneselfsufficient')
3592
+
3593
+ # Hg issue 151: Request: \K.
3594
+ self.assertEqual(regex.search(r'(ab\Kcd)', 'abcd').group(0, 1), ('cd',
3595
+ 'abcd'))
3596
+ self.assertEqual(regex.findall(r'\w\w\K\w\w', 'abcdefgh'), ['cd',
3597
+ 'gh'])
3598
+ self.assertEqual(regex.findall(r'(\w\w\K\w\w)', 'abcdefgh'), ['abcd',
3599
+ 'efgh'])
3600
+
3601
+ self.assertEqual(regex.search(r'(?r)(ab\Kcd)', 'abcd').group(0, 1),
3602
+ ('ab', 'abcd'))
3603
+ self.assertEqual(regex.findall(r'(?r)\w\w\K\w\w', 'abcdefgh'), ['ef',
3604
+ 'ab'])
3605
+ self.assertEqual(regex.findall(r'(?r)(\w\w\K\w\w)', 'abcdefgh'),
3606
+ ['efgh', 'abcd'])
3607
+
3608
+ # Hg issue 152: Request: Request: (?(DEFINE)...).
3609
+ self.assertEqual(regex.search(r'(?(DEFINE)(?<quant>\d+)(?<item>\w+))(?&quant) (?&item)',
3610
+ '5 elephants')[0], '5 elephants')
3611
+
3612
+ self.assertEqual(regex.search(r'(?&routine)(?(DEFINE)(?<routine>.))', 'a').group('routine'), None)
3613
+ self.assertEqual(regex.search(r'(?&routine)(?(DEFINE)(?<routine>.))', 'a').captures('routine'), ['a'])
3614
+
3615
+ # Hg issue 153: Request: (*SKIP).
3616
+ self.assertEqual(regex.search(r'12(*FAIL)|3', '123')[0], '3')
3617
+ self.assertEqual(regex.search(r'(?r)12(*FAIL)|3', '123')[0], '3')
3618
+
3619
+ self.assertEqual(regex.search(r'\d+(*PRUNE)\d', '123'), None)
3620
+ self.assertEqual(regex.search(r'\d+(?=(*PRUNE))\d', '123')[0], '123')
3621
+ self.assertEqual(regex.search(r'\d+(*PRUNE)bcd|[3d]', '123bcd')[0],
3622
+ '123bcd')
3623
+ self.assertEqual(regex.search(r'\d+(*PRUNE)bcd|[3d]', '123zzd')[0],
3624
+ 'd')
3625
+ self.assertEqual(regex.search(r'\d+?(*PRUNE)bcd|[3d]', '123bcd')[0],
3626
+ '3bcd')
3627
+ self.assertEqual(regex.search(r'\d+?(*PRUNE)bcd|[3d]', '123zzd')[0],
3628
+ 'd')
3629
+ self.assertEqual(regex.search(r'\d++(?<=3(*PRUNE))zzd|[4d]$',
3630
+ '123zzd')[0], '123zzd')
3631
+ self.assertEqual(regex.search(r'\d++(?<=3(*PRUNE))zzd|[4d]$',
3632
+ '124zzd')[0], 'd')
3633
+ self.assertEqual(regex.search(r'\d++(?<=(*PRUNE)3)zzd|[4d]$',
3634
+ '124zzd')[0], 'd')
3635
+ self.assertEqual(regex.search(r'\d++(?<=2(*PRUNE)3)zzd|[3d]$',
3636
+ '124zzd')[0], 'd')
3637
+
3638
+ self.assertEqual(regex.search(r'(?r)\d(*PRUNE)\d+', '123'), None)
3639
+ self.assertEqual(regex.search(r'(?r)\d(?<=(*PRUNE))\d+', '123')[0],
3640
+ '123')
3641
+ self.assertEqual(regex.search(r'(?r)\d+(*PRUNE)bcd|[3d]',
3642
+ '123bcd')[0], '123bcd')
3643
+ self.assertEqual(regex.search(r'(?r)\d+(*PRUNE)bcd|[3d]',
3644
+ '123zzd')[0], 'd')
3645
+ self.assertEqual(regex.search(r'(?r)\d++(?<=3(*PRUNE))zzd|[4d]$',
3646
+ '123zzd')[0], '123zzd')
3647
+ self.assertEqual(regex.search(r'(?r)\d++(?<=3(*PRUNE))zzd|[4d]$',
3648
+ '124zzd')[0], 'd')
3649
+ self.assertEqual(regex.search(r'(?r)\d++(?<=(*PRUNE)3)zzd|[4d]$',
3650
+ '124zzd')[0], 'd')
3651
+ self.assertEqual(regex.search(r'(?r)\d++(?<=2(*PRUNE)3)zzd|[3d]$',
3652
+ '124zzd')[0], 'd')
3653
+
3654
+ self.assertEqual(regex.search(r'\d+(*SKIP)bcd|[3d]', '123bcd')[0],
3655
+ '123bcd')
3656
+ self.assertEqual(regex.search(r'\d+(*SKIP)bcd|[3d]', '123zzd')[0],
3657
+ 'd')
3658
+ self.assertEqual(regex.search(r'\d+?(*SKIP)bcd|[3d]', '123bcd')[0],
3659
+ '3bcd')
3660
+ self.assertEqual(regex.search(r'\d+?(*SKIP)bcd|[3d]', '123zzd')[0],
3661
+ 'd')
3662
+ self.assertEqual(regex.search(r'\d++(?<=3(*SKIP))zzd|[4d]$',
3663
+ '123zzd')[0], '123zzd')
3664
+ self.assertEqual(regex.search(r'\d++(?<=3(*SKIP))zzd|[4d]$',
3665
+ '124zzd')[0], 'd')
3666
+ self.assertEqual(regex.search(r'\d++(?<=(*SKIP)3)zzd|[4d]$',
3667
+ '124zzd')[0], 'd')
3668
+ self.assertEqual(regex.search(r'\d++(?<=2(*SKIP)3)zzd|[3d]$',
3669
+ '124zzd')[0], 'd')
3670
+
3671
+ self.assertEqual(regex.search(r'(?r)\d+(*SKIP)bcd|[3d]', '123bcd')[0],
3672
+ '123bcd')
3673
+ self.assertEqual(regex.search(r'(?r)\d+(*SKIP)bcd|[3d]', '123zzd')[0],
3674
+ 'd')
3675
+ self.assertEqual(regex.search(r'(?r)\d++(?<=3(*SKIP))zzd|[4d]$',
3676
+ '123zzd')[0], '123zzd')
3677
+ self.assertEqual(regex.search(r'(?r)\d++(?<=3(*SKIP))zzd|[4d]$',
3678
+ '124zzd')[0], 'd')
3679
+ self.assertEqual(regex.search(r'(?r)\d++(?<=(*SKIP)3)zzd|[4d]$',
3680
+ '124zzd')[0], 'd')
3681
+ self.assertEqual(regex.search(r'(?r)\d++(?<=2(*SKIP)3)zzd|[3d]$',
3682
+ '124zzd')[0], 'd')
3683
+
3684
+ # Hg issue 154: Segmentation fault 11 when working with an atomic group
3685
+ text = """June 30, December 31, 2013 2012
3686
+ some words follow:
3687
+ more words and numbers 1,234,567 9,876,542
3688
+ more words and numbers 1,234,567 9,876,542"""
3689
+ self.assertEqual(len(regex.findall(r'(?<!\d)(?>2014|2013 ?2012)', text)), 1)
3690
+
3691
+ # Hg issue 156: regression on atomic grouping
3692
+ self.assertEqual(regex.match('1(?>2)', '12').span(), (0, 2))
3693
+
3694
+ # Hg issue 157: regression: segfault on complex lookaround
3695
+ self.assertEqual(regex.match(r'(?V1w)(?=(?=[^A-Z]*+[A-Z])(?=[^a-z]*+[a-z]))(?=\D*+\d)(?=\p{Alphanumeric}*+\P{Alphanumeric})\A(?s:.){8,255}+\Z',
3696
+ 'AAaa11!!')[0], 'AAaa11!!')
3697
+
3698
+ # Hg issue 158: Group issue with (?(DEFINE)...)
3699
+ TEST_REGEX = regex.compile(r'''(?smx)
3700
+ (?(DEFINE)
3701
+ (?<subcat>
3702
+ ^,[^,]+,
3703
+ )
3704
+ )
3705
+
3706
+ # Group 2 is defined on this line
3707
+ ^,([^,]+),
3708
+
3709
+ (?:(?!(?&subcat)[\r\n]+(?&subcat)).)+
3710
+ ''')
3711
+
3712
+ TEST_DATA = '''
3713
+ ,Cat 1,
3714
+ ,Brand 1,
3715
+ some
3716
+ thing
3717
+ ,Brand 2,
3718
+ other
3719
+ things
3720
+ ,Cat 2,
3721
+ ,Brand,
3722
+ Some
3723
+ thing
3724
+ '''
3725
+
3726
+ self.assertEqual([m.span(1, 2) for m in
3727
+ TEST_REGEX.finditer(TEST_DATA)], [((-1, -1), (2, 7)), ((-1, -1), (54,
3728
+ 59))])
3729
+
3730
+ # Hg issue 161: Unexpected fuzzy match results
3731
+ self.assertEqual(regex.search('(abcdefgh){e}',
3732
+ '******abcdefghijklmnopqrtuvwxyz', regex.BESTMATCH).span(), (6, 14))
3733
+ self.assertEqual(regex.search('(abcdefghi){e}',
3734
+ '******abcdefghijklmnopqrtuvwxyz', regex.BESTMATCH).span(), (6, 15))
3735
+
3736
+ # Hg issue 163: allow lookarounds in conditionals.
3737
+ self.assertEqual(regex.match(r'(?:(?=\d)\d+\b|\w+)', '123abc').span(),
3738
+ (0, 6))
3739
+ self.assertEqual(regex.match(r'(?(?=\d)\d+\b|\w+)', '123abc'), None)
3740
+ self.assertEqual(regex.search(r'(?(?<=love\s)you|(?<=hate\s)her)',
3741
+ "I love you").span(), (7, 10))
3742
+ self.assertEqual(regex.findall(r'(?(?<=love\s)you|(?<=hate\s)her)',
3743
+ "I love you but I don't hate her either"), ['you', 'her'])
3744
+
3745
+ # Hg issue 180: bug of POSIX matching.
3746
+ self.assertEqual(regex.search(r'(?p)a*(.*?)', 'aaabbb').group(0, 1),
3747
+ ('aaabbb', 'bbb'))
3748
+ self.assertEqual(regex.search(r'(?p)a*(.*)', 'aaabbb').group(0, 1),
3749
+ ('aaabbb', 'bbb'))
3750
+ self.assertEqual(regex.sub(r'(?p)a*(.*?)', r'\1', 'aaabbb'), 'bbb')
3751
+ self.assertEqual(regex.sub(r'(?p)a*(.*)', r'\1', 'aaabbb'), 'bbb')
3752
+
3753
+ # Hg issue 192: Named lists reverse matching doesn't work with
3754
+ # IGNORECASE and V1
3755
+ self.assertEqual(regex.match(r'(?irV0)\L<kw>', '21', kw=['1']).span(),
3756
+ (1, 2))
3757
+ self.assertEqual(regex.match(r'(?irV1)\L<kw>', '21', kw=['1']).span(),
3758
+ (1, 2))
3759
+
3760
+ # Hg issue 193: Alternation and .REVERSE flag.
3761
+ self.assertEqual(regex.search('a|b', '111a222').span(), (3, 4))
3762
+ self.assertEqual(regex.search('(?r)a|b', '111a222').span(), (3, 4))
3763
+
3764
+ # Hg issue 194: .FULLCASE and Backreference
3765
+ self.assertEqual(regex.search(r'(?if)<(CLI)><\1>',
3766
+ '<cli><cli>').span(), (0, 10))
3767
+ self.assertEqual(regex.search(r'(?if)<(CLI)><\1>',
3768
+ '<cli><clI>').span(), (0, 10))
3769
+ self.assertEqual(regex.search(r'(?ifr)<\1><(CLI)>',
3770
+ '<cli><clI>').span(), (0, 10))
3771
+
3772
+ # Hg issue 195: Pickle (or otherwise serial) the compiled regex
3773
+ r = regex.compile(r'\L<options>', options=['foo', 'bar'])
3774
+ p = pickle.dumps(r)
3775
+ r = pickle.loads(p)
3776
+ self.assertEqual(r.match('foo').span(), (0, 3))
3777
+
3778
+ # Hg issue 196: Fuzzy matching on repeated regex not working as
3779
+ # expected
3780
+ self.assertEqual(regex.match('(x{6}){e<=1}', 'xxxxxx',
3781
+ flags=regex.BESTMATCH).span(), (0, 6))
3782
+ self.assertEqual(regex.match('(x{6}){e<=1}', 'xxxxx',
3783
+ flags=regex.BESTMATCH).span(), (0, 5))
3784
+ self.assertEqual(regex.match('(x{6}){e<=1}', 'x',
3785
+ flags=regex.BESTMATCH), None)
3786
+ self.assertEqual(regex.match('(?r)(x{6}){e<=1}', 'xxxxxx',
3787
+ flags=regex.BESTMATCH).span(), (0, 6))
3788
+ self.assertEqual(regex.match('(?r)(x{6}){e<=1}', 'xxxxx',
3789
+ flags=regex.BESTMATCH).span(), (0, 5))
3790
+ self.assertEqual(regex.match('(?r)(x{6}){e<=1}', 'x',
3791
+ flags=regex.BESTMATCH), None)
3792
+
3793
+ # Hg issue 197: ValueError in regex.compile
3794
+ self.assertRaises(regex.error, lambda:
3795
+ regex.compile(b'00000\\0\\00\\^\50\\00\\U05000000'))
3796
+
3797
+ # Hg issue 198: ValueError in regex.compile
3798
+ self.assertRaises(regex.error, lambda: regex.compile(b"{e<l"))
3799
+
3800
+ # Hg issue 199: Segfault in re.compile
3801
+ self.assertEqual(bool(regex.compile('((?0)){e}')), True)
3802
+
3803
+ # Hg issue 200: AttributeError in regex.compile with latest regex
3804
+ self.assertEqual(bool(regex.compile('\x00?(?0){e}')), True)
3805
+
3806
+ # Hg issue 201: ENHANCEMATCH crashes interpreter
3807
+ self.assertEqual(regex.findall(r'((brown)|(lazy)){1<=e<=3} ((dog)|(fox)){1<=e<=3}',
3808
+ 'The quick borwn fax jumped over the lzy hog', regex.ENHANCEMATCH),
3809
+ [('borwn', 'borwn', '', 'fax', '', 'fax'), ('lzy', '', 'lzy', 'hog',
3810
+ 'hog', '')])
3811
+
3812
+ # Hg issue 203: partial matching bug
3813
+ self.assertEqual(regex.search(r'\d\d\d-\d\d-\d\d\d\d',
3814
+ "My SSN is 999-89-76, but don't tell.", partial=True).span(), (36,
3815
+ 36))
3816
+
3817
+ # Hg issue 204: confusion of (?aif) flags
3818
+ upper_i = '\N{CYRILLIC CAPITAL LETTER SHORT I}'
3819
+ lower_i = '\N{CYRILLIC SMALL LETTER SHORT I}'
3820
+
3821
+ self.assertEqual(bool(regex.match(r'(?ui)' + upper_i,
3822
+ lower_i)), True)
3823
+ self.assertEqual(bool(regex.match(r'(?ui)' + lower_i,
3824
+ upper_i)), True)
3825
+
3826
+ self.assertEqual(bool(regex.match(r'(?ai)' + upper_i,
3827
+ lower_i)), False)
3828
+ self.assertEqual(bool(regex.match(r'(?ai)' + lower_i,
3829
+ upper_i)), False)
3830
+
3831
+ self.assertEqual(bool(regex.match(r'(?afi)' + upper_i,
3832
+ lower_i)), False)
3833
+ self.assertEqual(bool(regex.match(r'(?afi)' + lower_i,
3834
+ upper_i)), False)
3835
+
3836
+ # Hg issue 205: Named list and (?ri) flags
3837
+ self.assertEqual(bool(regex.search(r'(?i)\L<aa>', '22', aa=['121',
3838
+ '22'])), True)
3839
+ self.assertEqual(bool(regex.search(r'(?ri)\L<aa>', '22', aa=['121',
3840
+ '22'])), True)
3841
+ self.assertEqual(bool(regex.search(r'(?fi)\L<aa>', '22', aa=['121',
3842
+ '22'])), True)
3843
+ self.assertEqual(bool(regex.search(r'(?fri)\L<aa>', '22', aa=['121',
3844
+ '22'])), True)
3845
+
3846
+ # Hg issue 208: Named list, (?ri) flags, Backreference
3847
+ self.assertEqual(regex.search(r'(?r)\1dog..(?<=(\L<aa>))$', 'ccdogcc',
3848
+ aa=['bcb', 'cc']). span(), (0, 7))
3849
+ self.assertEqual(regex.search(r'(?ir)\1dog..(?<=(\L<aa>))$',
3850
+ 'ccdogcc', aa=['bcb', 'cc']). span(), (0, 7))
3851
+
3852
+ # Hg issue 210: Fuzzy matching and Backreference
3853
+ self.assertEqual(regex.search(r'(2)(?:\1{5}){e<=1}',
3854
+ '3222212').span(), (1, 7))
3855
+ self.assertEqual(regex.search(r'(\d)(?:\1{5}){e<=1}',
3856
+ '3222212').span(), (1, 7))
3857
+
3858
+ # Hg issue 211: Segmentation fault with recursive matches and atomic
3859
+ # groups
3860
+ self.assertEqual(regex.match(r'''\A(?P<whole>(?>\((?&whole)\)|[+\-]))\Z''',
3861
+ '((-))').span(), (0, 5))
3862
+ self.assertEqual(regex.match(r'''\A(?P<whole>(?>\((?&whole)\)|[+\-]))\Z''',
3863
+ '((-)+)'), None)
3864
+
3865
+ # Hg issue 212: Unexpected matching difference with .*? between re and
3866
+ # regex
3867
+ self.assertEqual(regex.match(r"x.*? (.).*\1(.*)\1",
3868
+ 'x |y| z|').span(), (0, 9))
3869
+ self.assertEqual(regex.match(r"\.sr (.*?) (.)(.*)\2(.*)\2(.*)",
3870
+ r'.sr h |<nw>|<span class="locked">|').span(), (0, 35))
3871
+
3872
+ # Hg issue 213: Segmentation Fault
3873
+ a = '"\\xF9\\x80\\xAEqdz\\x95L\\xA7\\x89[\\xFE \\x91)\\xF9]\\xDB\'\\x99\\x09=\\x00\\xFD\\x98\\x22\\xDD\\xF1\\xB6\\xC3 Z\\xB6gv\\xA5x\\x93P\\xE1r\\x14\\x8Cv\\x0C\\xC0w\\x15r\\xFFc%" '
3874
+ py_regex_pattern = r'''(?P<http_referer>((?>(?<!\\)(?>"(?>\\.|[^\\"]+)+"|""|(?>'(?>\\.|[^\\']+)+')|''|(?>`(?>\\.|[^\\`]+)+`)|``)))) (?P<useragent>((?>(?<!\\)(?>"(?>\\.|[^\\"]+)+"|""|(?>'(?>\\.|[^\\']+)+')|''|(?>`(?>\\.|[^\\`]+)+`)|``))))'''
3875
+ self.assertEqual(bool(regex.search(py_regex_pattern, a)), False)
3876
+
3877
+ # Hg Issue 216: Invalid match when using negative lookbehind and pipe
3878
+ self.assertEqual(bool(regex.match('foo(?<=foo)', 'foo')), True)
3879
+ self.assertEqual(bool(regex.match('foo(?<!foo)', 'foo')), False)
3880
+ self.assertEqual(bool(regex.match('foo(?<=foo|x)', 'foo')), True)
3881
+ self.assertEqual(bool(regex.match('foo(?<!foo|x)', 'foo')), False)
3882
+
3883
+ # Hg issue 217: Core dump in conditional ahead match and matching \!
3884
+ # character
3885
+ self.assertEqual(bool(regex.match(r'(?(?=.*\!.*)(?P<true>.*\!\w*\:.*)|(?P<false>.*))',
3886
+ '!')), False)
3887
+
3888
+ # Hg issue 220: Misbehavior of group capture with OR operand
3889
+ self.assertEqual(regex.match(r'\w*(ea)\w*|\w*e(?!a)\w*',
3890
+ 'easier').groups(), ('ea', ))
3891
+
3892
+ # Hg issue 225: BESTMATCH in fuzzy match not working
3893
+ self.assertEqual(regex.search('(^1234$){i,d}', '12234',
3894
+ regex.BESTMATCH).span(), (0, 5))
3895
+ self.assertEqual(regex.search('(^1234$){i,d}', '12234',
3896
+ regex.BESTMATCH).fuzzy_counts, (0, 1, 0))
3897
+
3898
+ self.assertEqual(regex.search('(^1234$){s,i,d}', '12234',
3899
+ regex.BESTMATCH).span(), (0, 5))
3900
+ self.assertEqual(regex.search('(^1234$){s,i,d}', '12234',
3901
+ regex.BESTMATCH).fuzzy_counts, (0, 1, 0))
3902
+
3903
+ # Hg issue 226: Error matching at start of string
3904
+ self.assertEqual(regex.search('(^123$){s,i,d}', 'xxxxxxxx123',
3905
+ regex.BESTMATCH).span(), (0, 11))
3906
+ self.assertEqual(regex.search('(^123$){s,i,d}', 'xxxxxxxx123',
3907
+ regex.BESTMATCH).fuzzy_counts, (0, 8, 0))
3908
+
3909
+ # Hg issue 227: Incorrect behavior for ? operator with UNICODE +
3910
+ # IGNORECASE
3911
+ self.assertEqual(regex.search(r'a?yz', 'xxxxyz', flags=regex.FULLCASE |
3912
+ regex.IGNORECASE).span(), (4, 6))
3913
+
3914
+ # Hg issue 230: Is it a bug of (?(DEFINE)...)
3915
+ self.assertEqual(regex.findall(r'(?:(?![a-d]).)+', 'abcdefgh'),
3916
+ ['efgh'])
3917
+ self.assertEqual(regex.findall(r'''(?(DEFINE)(?P<mydef>(?:(?![a-d]).)))(?&mydef)+''',
3918
+ 'abcdefgh'), ['efgh'])
3919
+
3920
+ # Hg issue 238: Not fully re backward compatible
3921
+ self.assertEqual(regex.findall(r'((\w{1,3})(\.{2,10})){1,3}',
3922
+ '"Erm....yes. T..T...Thank you for that."'), [('Erm....', 'Erm',
3923
+ '....'), ('T...', 'T', '...')])
3924
+ self.assertEqual(regex.findall(r'((\w{1,3})(\.{2,10})){3}',
3925
+ '"Erm....yes. T..T...Thank you for that."'), [])
3926
+ self.assertEqual(regex.findall(r'((\w{1,3})(\.{2,10})){2}',
3927
+ '"Erm....yes. T..T...Thank you for that."'), [('T...', 'T', '...')])
3928
+ self.assertEqual(regex.findall(r'((\w{1,3})(\.{2,10})){1}',
3929
+ '"Erm....yes. T..T...Thank you for that."'), [('Erm....', 'Erm',
3930
+ '....'), ('T..', 'T', '..'), ('T...', 'T', '...')])
3931
+
3932
+ # Hg issue 247: Unexpected result with fuzzy matching and lookahead
3933
+ # expression
3934
+ self.assertEqual(regex.search(r'(?:ESTONIA(?!\w)){e<=1}',
3935
+ 'ESTONIAN WORKERS').group(), 'ESTONIAN')
3936
+ self.assertEqual(regex.search(r'(?:ESTONIA(?=\W)){e<=1}',
3937
+ 'ESTONIAN WORKERS').group(), 'ESTONIAN')
3938
+
3939
+ self.assertEqual(regex.search(r'(?:(?<!\w)ESTONIA){e<=1}',
3940
+ 'BLUB NESTONIA').group(), 'NESTONIA')
3941
+ self.assertEqual(regex.search(r'(?:(?<=\W)ESTONIA){e<=1}',
3942
+ 'BLUB NESTONIA').group(), 'NESTONIA')
3943
+
3944
+ self.assertEqual(regex.search(r'(?r)(?:ESTONIA(?!\w)){e<=1}',
3945
+ 'ESTONIAN WORKERS').group(), 'ESTONIAN')
3946
+ self.assertEqual(regex.search(r'(?r)(?:ESTONIA(?=\W)){e<=1}',
3947
+ 'ESTONIAN WORKERS').group(), 'ESTONIAN')
3948
+
3949
+ self.assertEqual(regex.search(r'(?r)(?:(?<!\w)ESTONIA){e<=1}',
3950
+ 'BLUB NESTONIA').group(), 'NESTONIA')
3951
+ self.assertEqual(regex.search(r'(?r)(?:(?<=\W)ESTONIA){e<=1}',
3952
+ 'BLUB NESTONIA').group(), 'NESTONIA')
3953
+
3954
+ # Hg issue 248: Unexpected result with fuzzy matching and more than one
3955
+ # non-greedy quantifier
3956
+ self.assertEqual(regex.search(r'(?:A.*B.*CDE){e<=2}',
3957
+ 'A B CYZ').group(), 'A B CYZ')
3958
+ self.assertEqual(regex.search(r'(?:A.*B.*?CDE){e<=2}',
3959
+ 'A B CYZ').group(), 'A B CYZ')
3960
+ self.assertEqual(regex.search(r'(?:A.*?B.*CDE){e<=2}',
3961
+ 'A B CYZ').group(), 'A B CYZ')
3962
+ self.assertEqual(regex.search(r'(?:A.*?B.*?CDE){e<=2}',
3963
+ 'A B CYZ').group(), 'A B CYZ')
3964
+
3965
+ # Hg issue 249: Add an option to regex.escape() to not escape spaces
3966
+ self.assertEqual(regex.escape(' ,0A[', special_only=False, literal_spaces=False), '\\ \\,0A\\[')
3967
+ self.assertEqual(regex.escape(' ,0A[', special_only=False, literal_spaces=True), ' \\,0A\\[')
3968
+ self.assertEqual(regex.escape(' ,0A[', special_only=True, literal_spaces=False), '\\ ,0A\\[')
3969
+ self.assertEqual(regex.escape(' ,0A[', special_only=True, literal_spaces=True), ' ,0A\\[')
3970
+
3971
+ self.assertEqual(regex.escape(' ,0A['), '\\ ,0A\\[')
3972
+
3973
+ # Hg issue 251: Segfault with a particular expression
3974
+ self.assertEqual(regex.search(r'(?(?=A)A|B)', 'A').span(), (0, 1))
3975
+ self.assertEqual(regex.search(r'(?(?=A)A|B)', 'B').span(), (0, 1))
3976
+ self.assertEqual(regex.search(r'(?(?=A)A|)', 'B').span(), (0, 0))
3977
+ self.assertEqual(regex.search(r'(?(?=X)X|)', '').span(), (0, 0))
3978
+ self.assertEqual(regex.search(r'(?(?=X))', '').span(), (0, 0))
3979
+
3980
+ # Hg issue 252: Empty capture strings when using DEFINE group reference
3981
+ # within look-behind expression
3982
+ self.assertEqual(regex.search(r'(?(DEFINE)(?<func>.))(?&func)',
3983
+ 'abc').groups(), (None, ))
3984
+ self.assertEqual(regex.search(r'(?(DEFINE)(?<func>.))(?&func)',
3985
+ 'abc').groupdict(), {'func': None})
3986
+ self.assertEqual(regex.search(r'(?(DEFINE)(?<func>.))(?&func)',
3987
+ 'abc').capturesdict(), {'func': ['a']})
3988
+
3989
+ self.assertEqual(regex.search(r'(?(DEFINE)(?<func>.))(?=(?&func))',
3990
+ 'abc').groups(), (None, ))
3991
+ self.assertEqual(regex.search(r'(?(DEFINE)(?<func>.))(?=(?&func))',
3992
+ 'abc').groupdict(), {'func': None})
3993
+ self.assertEqual(regex.search(r'(?(DEFINE)(?<func>.))(?=(?&func))',
3994
+ 'abc').capturesdict(), {'func': ['a']})
3995
+
3996
+ self.assertEqual(regex.search(r'(?(DEFINE)(?<func>.)).(?<=(?&func))',
3997
+ 'abc').groups(), (None, ))
3998
+ self.assertEqual(regex.search(r'(?(DEFINE)(?<func>.)).(?<=(?&func))',
3999
+ 'abc').groupdict(), {'func': None})
4000
+ self.assertEqual(regex.search(r'(?(DEFINE)(?<func>.)).(?<=(?&func))',
4001
+ 'abc').capturesdict(), {'func': ['a']})
4002
+
4003
+ # Hg issue 271: Comment logic different between Re and Regex
4004
+ self.assertEqual(bool(regex.match(r'ab(?#comment\))cd', 'abcd')), True)
4005
+
4006
+ # Hg issue 276: Partial Matches yield incorrect matches and bounds
4007
+ self.assertEqual(regex.search(r'[a-z]+ [a-z]*?:', 'foo bar',
4008
+ partial=True).span(), (0, 7))
4009
+ self.assertEqual(regex.search(r'(?r):[a-z]*? [a-z]+', 'foo bar',
4010
+ partial=True).span(), (0, 7))
4011
+
4012
+ # Hg issue 291: Include Script Extensions as a supported Unicode property
4013
+ self.assertEqual(bool(regex.match(r'(?u)\p{Script:Beng}',
4014
+ '\u09EF')), True)
4015
+ self.assertEqual(bool(regex.match(r'(?u)\p{Script:Bengali}',
4016
+ '\u09EF')), True)
4017
+ self.assertEqual(bool(regex.match(r'(?u)\p{Script_Extensions:Bengali}',
4018
+ '\u09EF')), True)
4019
+ self.assertEqual(bool(regex.match(r'(?u)\p{Script_Extensions:Beng}',
4020
+ '\u09EF')), True)
4021
+ self.assertEqual(bool(regex.match(r'(?u)\p{Script_Extensions:Cakm}',
4022
+ '\u09EF')), True)
4023
+ self.assertEqual(bool(regex.match(r'(?u)\p{Script_Extensions:Sylo}',
4024
+ '\u09EF')), True)
4025
+
4026
+ # Hg issue #293: scx (Script Extensions) property currently matches
4027
+ # incorrectly
4028
+ self.assertEqual(bool(regex.match(r'(?u)\p{scx:Latin}', 'P')), True)
4029
+ self.assertEqual(bool(regex.match(r'(?u)\p{scx:Ahom}', 'P')), False)
4030
+ self.assertEqual(bool(regex.match(r'(?u)\p{scx:Common}', '4')), True)
4031
+ self.assertEqual(bool(regex.match(r'(?u)\p{scx:Caucasian_Albanian}', '4')),
4032
+ False)
4033
+ self.assertEqual(bool(regex.match(r'(?u)\p{scx:Arabic}', '\u062A')), True)
4034
+ self.assertEqual(bool(regex.match(r'(?u)\p{scx:Balinese}', '\u062A')),
4035
+ False)
4036
+ self.assertEqual(bool(regex.match(r'(?u)\p{scx:Devanagari}', '\u091C')),
4037
+ True)
4038
+ self.assertEqual(bool(regex.match(r'(?u)\p{scx:Batak}', '\u091C')), False)
4039
+
4040
+ # Hg issue 296: Group references are not taken into account when group is reporting the last match
4041
+ self.assertEqual(regex.fullmatch('(?P<x>.)*(?&x)', 'abc').captures('x'),
4042
+ ['a', 'b', 'c'])
4043
+ self.assertEqual(regex.fullmatch('(?P<x>.)*(?&x)', 'abc').group('x'),
4044
+ 'b')
4045
+
4046
+ self.assertEqual(regex.fullmatch('(?P<x>.)(?P<x>.)(?P<x>.)',
4047
+ 'abc').captures('x'), ['a', 'b', 'c'])
4048
+ self.assertEqual(regex.fullmatch('(?P<x>.)(?P<x>.)(?P<x>.)',
4049
+ 'abc').group('x'), 'c')
4050
+
4051
+ # Hg issue 299: Partial gives misleading results with "open ended" regexp
4052
+ self.assertEqual(regex.match('(?:ab)*', 'ab', partial=True).partial,
4053
+ False)
4054
+ self.assertEqual(regex.match('(?:ab)*', 'abab', partial=True).partial,
4055
+ False)
4056
+ self.assertEqual(regex.match('(?:ab)*?', '', partial=True).partial,
4057
+ False)
4058
+ self.assertEqual(regex.match('(?:ab)*+', 'ab', partial=True).partial,
4059
+ False)
4060
+ self.assertEqual(regex.match('(?:ab)*+', 'abab', partial=True).partial,
4061
+ False)
4062
+ self.assertEqual(regex.match('(?:ab)+', 'ab', partial=True).partial,
4063
+ False)
4064
+ self.assertEqual(regex.match('(?:ab)+', 'abab', partial=True).partial,
4065
+ False)
4066
+ self.assertEqual(regex.match('(?:ab)+?', 'ab', partial=True).partial,
4067
+ False)
4068
+ self.assertEqual(regex.match('(?:ab)++', 'ab', partial=True).partial,
4069
+ False)
4070
+ self.assertEqual(regex.match('(?:ab)++', 'abab', partial=True).partial,
4071
+ False)
4072
+
4073
+ self.assertEqual(regex.match('(?r)(?:ab)*', 'ab', partial=True).partial,
4074
+ False)
4075
+ self.assertEqual(regex.match('(?r)(?:ab)*', 'abab', partial=True).partial,
4076
+ False)
4077
+ self.assertEqual(regex.match('(?r)(?:ab)*?', '', partial=True).partial,
4078
+ False)
4079
+ self.assertEqual(regex.match('(?r)(?:ab)*+', 'ab', partial=True).partial,
4080
+ False)
4081
+ self.assertEqual(regex.match('(?r)(?:ab)*+', 'abab', partial=True).partial,
4082
+ False)
4083
+ self.assertEqual(regex.match('(?r)(?:ab)+', 'ab', partial=True).partial,
4084
+ False)
4085
+ self.assertEqual(regex.match('(?r)(?:ab)+', 'abab', partial=True).partial,
4086
+ False)
4087
+ self.assertEqual(regex.match('(?r)(?:ab)+?', 'ab', partial=True).partial,
4088
+ False)
4089
+ self.assertEqual(regex.match('(?r)(?:ab)++', 'ab', partial=True).partial,
4090
+ False)
4091
+ self.assertEqual(regex.match('(?r)(?:ab)++', 'abab', partial=True).partial,
4092
+ False)
4093
+
4094
+ self.assertEqual(regex.match('a*', '', partial=True).partial, False)
4095
+ self.assertEqual(regex.match('a*?', '', partial=True).partial, False)
4096
+ self.assertEqual(regex.match('a*+', '', partial=True).partial, False)
4097
+ self.assertEqual(regex.match('a+', '', partial=True).partial, True)
4098
+ self.assertEqual(regex.match('a+?', '', partial=True).partial, True)
4099
+ self.assertEqual(regex.match('a++', '', partial=True).partial, True)
4100
+ self.assertEqual(regex.match('a+', 'a', partial=True).partial, False)
4101
+ self.assertEqual(regex.match('a+?', 'a', partial=True).partial, False)
4102
+ self.assertEqual(regex.match('a++', 'a', partial=True).partial, False)
4103
+
4104
+ self.assertEqual(regex.match('(?r)a*', '', partial=True).partial, False)
4105
+ self.assertEqual(regex.match('(?r)a*?', '', partial=True).partial, False)
4106
+ self.assertEqual(regex.match('(?r)a*+', '', partial=True).partial, False)
4107
+ self.assertEqual(regex.match('(?r)a+', '', partial=True).partial, True)
4108
+ self.assertEqual(regex.match('(?r)a+?', '', partial=True).partial, True)
4109
+ self.assertEqual(regex.match('(?r)a++', '', partial=True).partial, True)
4110
+ self.assertEqual(regex.match('(?r)a+', 'a', partial=True).partial, False)
4111
+ self.assertEqual(regex.match('(?r)a+?', 'a', partial=True).partial, False)
4112
+ self.assertEqual(regex.match('(?r)a++', 'a', partial=True).partial, False)
4113
+
4114
+ self.assertEqual(regex.match(r"(?:\s*\w+'*)+", 'whatever', partial=True).partial,
4115
+ False)
4116
+
4117
+ # Hg issue 300: segmentation fault
4118
+ pattern = ('(?P<termini5>GGCGTCACACTTTGCTATGCCATAGCAT[AG]TTTATCCATAAGA'
4119
+ 'TTAGCGGATCCTACCTGACGCTTTTTATCGCAACTCTCTACTGTTTCTCCATAACAGAACATATTGA'
4120
+ 'CTATCCGGTATTACCCGGCATGACAGGAGTAAAA){e<=1}'
4121
+ '(?P<gene>[ACGT]{1059}){e<=2}'
4122
+ '(?P<spacer>TAATCGTCTTGTTTGATACACAAGGGTCGCATCTGCGGCCCTTTTGCTTTTTTAAG'
4123
+ 'TTGTAAGGATATGCCATTCTAGA){e<=0}'
4124
+ '(?P<barcode>[ACGT]{18}){e<=0}'
4125
+ '(?P<termini3>AGATCGG[CT]AGAGCGTCGTGTAGGGAAAGAGTGTGG){e<=1}')
4126
+
4127
+ text = ('GCACGGCGTCACACTTTGCTATGCCATAGCATATTTATCCATAAGATTAGCGGATCCTACC'
4128
+ 'TGACGCTTTTTATCGCAACTCTCTACTGTTTCTCCATAACAGAACATATTGACTATCCGGTATTACC'
4129
+ 'CGGCATGACAGGAGTAAAAATGGCTATCGACGAAAACAAACAGAAAGCGTTGGCGGCAGCACTGGGC'
4130
+ 'CAGATTGAGAAACAATTTGGTAAAGGCTCCATCATGCGCCTGGGTGAAGACCGTTCCATGGATGTGG'
4131
+ 'AAACCATCTCTACCGGTTCGCTTTCACTGGATATCGCGCTTGGGGCAGGTGGTCTGCCGATGGGCCG'
4132
+ 'TATCGTCGAAATCTACGGACCGGAATCTTCCGGTAAAACCACGCTGACGCTGCAGGTGATCGCCGCA'
4133
+ 'GCGCAGCGTGAAGGTAAAACCTGTGCGTTTATCGATGCTGAACACGCGCTGGACCCAATCTACGCAC'
4134
+ 'GTAAACTGGGCGTCGATATCGACAACCTGCTGTGCTCCCAGCCGGACACCGGCGAGCAGGCACTGGA'
4135
+ 'AATCTGTGACGCCCTGGCGCGTTCTGGCGCAGTAGACGTTATCGTCGTTGACTCCGTGGCGGCACTG'
4136
+ 'ACGCCGAAAGCGGAAATCGAAGGCGAAATCGGCGACTCTCATATGGGCCTTGCGGCACGTATGATGA'
4137
+ 'GCCAGGCGATGCGTAAGCTGGCGGGTAACCTGAAGCAGTCCAACACGCTGCTGATCTTCATCAACCC'
4138
+ 'CATCCGTATGAAAATTGGTGTGATGTTCGGCAACCCGGAAACCACTTACCGGTGGTAACGCGCTGAA'
4139
+ 'ATTCTACGCCTCTGTTCGTCTCGACATCCGTTAAATCGGCGCGGTGAAAGAGGGCGAAAACGTGGTG'
4140
+ 'GGTAGCGAAACCCGCGTGAAAGTGGTGAAGAACAAAATCGCTGCGCCGTTTAAACAGGCTGAATTCC'
4141
+ 'AGATCCTCTACGGCGAAGGTATCAACTTCTACCCCGAACTGGTTGACCTGGGCGTAAAAGAGAAGCT'
4142
+ 'GATCGAGAAAGCAGGCGCGTGGTACAGCTACAAAGGTGAGAAGATCGGTCAGGGTAAAGCGAATGCG'
4143
+ 'ACTGCCTGGCTGAAATTTAACCCGGAAACCGCGAAAGAGATCGAGTGAAAAGTACGTGAGTTGCTGC'
4144
+ 'TGAGCAACCCGAACTCAACGCCGGATTTCTCTGTAGATGATAGCGAAGGCGTAGCAGAAACTAACGA'
4145
+ 'AGATTTTTAATCGTCTTGTTTGATACACAAGGGTCGCATCTGCGGCCCTTTTGCTTTTTTAAGTTGT'
4146
+ 'AAGGATATGCCATTCTAGACAGTTAACACACCAACAAAGATCGGTAGAGCGTCGTGTAGGGAAAGAG'
4147
+ 'TGTGGTACC')
4148
+
4149
+ m = regex.search(pattern, text, flags=regex.BESTMATCH)
4150
+ self.assertEqual(m.fuzzy_counts, (0, 1, 0))
4151
+ self.assertEqual(m.fuzzy_changes, ([], [1206], []))
4152
+
4153
+ # Hg issue 306: Fuzzy match parameters not respecting quantifier scope
4154
+ self.assertEqual(regex.search(r'(?e)(dogf(((oo){e<1})|((00){e<1}))d){e<2}',
4155
+ 'dogfood').fuzzy_counts, (0, 0, 0))
4156
+ self.assertEqual(regex.search(r'(?e)(dogf(((oo){e<1})|((00){e<1}))d){e<2}',
4157
+ 'dogfoot').fuzzy_counts, (1, 0, 0))
4158
+
4159
+ # Hg issue 312: \X not matching graphemes with zero-width-joins
4160
+ self.assertEqual(regex.findall(r'\X',
4161
+ '\U0001F468\u200D\U0001F469\u200D\U0001F467\u200D\U0001F466'),
4162
+ ['\U0001F468\u200D\U0001F469\u200D\U0001F467\u200D\U0001F466'])
4163
+
4164
+ # Hg issue 320: Abnormal performance
4165
+ self.assertEqual(bool(regex.search(r'(?=a)a', 'a')), True)
4166
+ self.assertEqual(bool(regex.search(r'(?!b)a', 'a')), True)
4167
+
4168
+ # Hg issue 327: .fullmatch() causes MemoryError
4169
+ self.assertEqual(regex.fullmatch(r'((\d)*?)*?', '123').span(), (0, 3))
4170
+
4171
+ # Hg issue 329: Wrong group matches when question mark quantifier is used within a look behind
4172
+ self.assertEqual(regex.search(r'''(?(DEFINE)(?<mydef>(?<wrong>THIS_SHOULD_NOT_MATCHx?)|(?<right>right))).*(?<=(?&mydef).*)''',
4173
+ 'x right').capturesdict(), {'mydef': ['right'], 'wrong': [], 'right':
4174
+ ['right']})
4175
+
4176
+ # Hg issue 338: specifying allowed characters when fuzzy-matching
4177
+ self.assertEqual(bool(regex.match(r'(?:cat){e<=1:[u]}', 'cut')), True)
4178
+ self.assertEqual(bool(regex.match(r'(?:cat){e<=1:u}', 'cut')), True)
4179
+
4180
+ # Hg issue 353: fuzzy changes negative indexes
4181
+ self.assertEqual(regex.search(r'(?be)(AGTGTTCCCCGCGCCAGCGGGGATAAACCG){s<=5,i<=5,d<=5,s+i+d<=10}',
4182
+ 'TTCCCCGCGCCAGCGGGGATAAACCG').fuzzy_changes, ([], [], [0, 1, 3, 5]))
4183
+
4184
+ # Git issue 364: Contradictory values in fuzzy_counts and fuzzy_changes
4185
+ self.assertEqual(regex.match(r'(?:bc){e}', 'c').fuzzy_counts, (1, 0,
4186
+ 1))
4187
+ self.assertEqual(regex.match(r'(?:bc){e}', 'c').fuzzy_changes, ([0],
4188
+ [], [1]))
4189
+ self.assertEqual(regex.match(r'(?e)(?:bc){e}', 'c').fuzzy_counts, (0,
4190
+ 0, 1))
4191
+ self.assertEqual(regex.match(r'(?e)(?:bc){e}', 'c').fuzzy_changes,
4192
+ ([], [], [0]))
4193
+ self.assertEqual(regex.match(r'(?b)(?:bc){e}', 'c').fuzzy_counts, (0,
4194
+ 0, 1))
4195
+ self.assertEqual(regex.match(r'(?b)(?:bc){e}', 'c').fuzzy_changes,
4196
+ ([], [], [0]))
4197
+
4198
+ # Git issue 370: Confusions about Fuzzy matching behavior
4199
+ self.assertEqual(regex.match('(?e)(?:^(\\$ )?\\d{1,3}(,\\d{3})*(\\.\\d{2})$){e}',
4200
+ '$ 10,112.111.12').fuzzy_counts, (6, 0, 5))
4201
+ self.assertEqual(regex.match('(?e)(?:^(\\$ )?\\d{1,3}(,\\d{3})*(\\.\\d{2})$){s<=1}',
4202
+ '$ 10,112.111.12').fuzzy_counts, (1, 0, 0))
4203
+ self.assertEqual(regex.match('(?e)(?:^(\\$ )?\\d{1,3}(,\\d{3})*(\\.\\d{2})$){s<=1,i<=1,d<=1}',
4204
+ '$ 10,112.111.12').fuzzy_counts, (1, 0, 0))
4205
+ self.assertEqual(regex.match('(?e)(?:^(\\$ )?\\d{1,3}(,\\d{3})*(\\.\\d{2})$){s<=3}',
4206
+ '$ 10,1a2.111.12').fuzzy_counts, (2, 0, 0))
4207
+ self.assertEqual(regex.match('(?e)(?:^(\\$ )?\\d{1,3}(,\\d{3})*(\\.\\d{2})$){s<=2}',
4208
+ '$ 10,1a2.111.12').fuzzy_counts, (2, 0, 0))
4209
+
4210
+ self.assertEqual(regex.fullmatch(r'(?e)(?:0?,0(?:,0)?){s<=1,d<=1}',
4211
+ ',0;0').fuzzy_counts, (1, 0, 0))
4212
+ self.assertEqual(regex.fullmatch(r'(?e)(?:0??,0(?:,0)?){s<=1,d<=1}',
4213
+ ',0;0').fuzzy_counts, (1, 0, 0))
4214
+
4215
+ # Git issue 371: Specifying character set when fuzzy-matching allows characters not in the set
4216
+ self.assertEqual(regex.search(r"\b(?e)(?:\d{6,20}){i<=5:[\-\\\/]}\b",
4217
+ "cat dog starting at 00:01132.000. hello world"), None)
4218
+
4219
+ # Git issue 385: Comments in expressions
4220
+ self.assertEqual(bool(regex.compile('(?#)')), True)
4221
+ self.assertEqual(bool(regex.compile('(?x)(?#)')), True)
4222
+
4223
+ # Git issue 394: Unexpected behaviour in fuzzy matching with limited character set with IGNORECASE flag
4224
+ self.assertEqual(regex.findall(r'(\d+){i<=2:[ab]}', '123X4Y5'),
4225
+ ['123', '4', '5'])
4226
+ self.assertEqual(regex.findall(r'(?i)(\d+){i<=2:[ab]}', '123X4Y5'),
4227
+ ['123', '4', '5'])
4228
+
4229
+ # Git issue 403: Fuzzy matching with wrong distance (unnecessary substitutions)
4230
+ self.assertEqual(regex.match(r'^(test){e<=5}$', 'terstin',
4231
+ flags=regex.B).fuzzy_counts, (0, 3, 0))
4232
+
4233
+ # Git issue 408: regex fails with a quantified backreference but succeeds with repeated backref
4234
+ self.assertEqual(bool(regex.match(r"(?:(x*)\1\1\1)*x$", "x" * 5)), True)
4235
+ self.assertEqual(bool(regex.match(r"(?:(x*)\1{3})*x$", "x" * 5)), True)
4236
+
4237
+ # Git issue 415: Fuzzy character restrictions don't apply to insertions at "right edge"
4238
+ self.assertEqual(regex.match(r't(?:es){s<=1:\d}t', 'te5t').group(),
4239
+ 'te5t')
4240
+ self.assertEqual(regex.match(r't(?:es){s<=1:\d}t', 'tezt'), None)
4241
+ self.assertEqual(regex.match(r't(?:es){i<=1:\d}t', 'tes5t').group(),
4242
+ 'tes5t')
4243
+ self.assertEqual(regex.match(r't(?:es){i<=1:\d}t', 'teszt'), None)
4244
+ self.assertEqual(regex.match(r't(?:es){i<=1:\d}t',
4245
+ 'tes5t').fuzzy_changes, ([], [3], []))
4246
+ self.assertEqual(regex.match(r't(es){i<=1,0<e<=1}t', 'tes5t').group(),
4247
+ 'tes5t')
4248
+ self.assertEqual(regex.match(r't(?:es){i<=1,0<e<=1:\d}t',
4249
+ 'tes5t').fuzzy_changes, ([], [3], []))
4250
+
4251
+ # Git issue 421: Fatal Python error: Segmentation fault
4252
+ self.assertEqual(regex.compile(r"(\d+ week|\d+ days)").split("7 days"), ['', '7 days', ''])
4253
+ self.assertEqual(regex.compile(r"(\d+ week|\d+ days)").split("10 days"), ['', '10 days', ''])
4254
+
4255
+ self.assertEqual(regex.compile(r"[ ]* Name[ ]*\* ").search(" Name *"), None)
4256
+
4257
+ self.assertEqual(regex.compile('a|\\.*pb\\.py').search('.geojs'), None)
4258
+
4259
+ p = regex.compile('(?<=(?:\\A|\\W|_))(\\d+ decades? ago|\\d+ minutes ago|\\d+ seconds ago|in \\d+ decades?|\\d+ months ago|in \\d+ minutes|\\d+ minute ago|in \\d+ seconds|\\d+ second ago|\\d+ years ago|in \\d+ months|\\d+ month ago|\\d+ weeks ago|\\d+ hours ago|in \\d+ minute|in \\d+ second|in \\d+ years|\\d+ year ago|in \\d+ month|in \\d+ weeks|\\d+ week ago|\\d+ days ago|in \\d+ hours|\\d+ hour ago|in \\d+ year|in \\d+ week|in \\d+ days|\\d+ day ago|in \\d+ hour|\\d+ min ago|\\d+ sec ago|\\d+ yr ago|\\d+ mo ago|\\d+ wk ago|in \\d+ day|\\d+ hr ago|in \\d+ min|in \\d+ sec|in \\d+ yr|in \\d+ mo|in \\d+ wk|in \\d+ hr)(?=(?:\\Z|\\W|_))', flags=regex.I | regex.V0)
4260
+ self.assertEqual(p.search('1 month ago').group(), '1 month ago')
4261
+ self.assertEqual(p.search('9 hours 1 minute ago').group(), '1 minute ago')
4262
+ self.assertEqual(p.search('10 months 1 hour ago').group(), '1 hour ago')
4263
+ self.assertEqual(p.search('1 month 10 hours ago').group(), '10 hours ago')
4264
+
4265
+ # Git issue 427: Possible bug with BESTMATCH
4266
+ sequence = 'TTCAGACGTGTGCTCTTCCGATCTCAATACCGACTCCTCACTGTGTGTCT'
4267
+ pattern = r'(?P<insert>.*)(?P<anchor>CTTCC){e<=1}(?P<umi>([ACGT]){4,6})(?P<sid>CAATACCGACTCCTCACTGTGT){e<=2}(?P<end>([ACGT]){0,6}$)'
4268
+
4269
+ m = regex.match(pattern, sequence, flags=regex.BESTMATCH)
4270
+ self.assertEqual(m.span(), (0, 50))
4271
+ self.assertEqual(m.groupdict(), {'insert': 'TTCAGACGTGTGCT', 'anchor': 'CTTCC', 'umi': 'GATCT', 'sid': 'CAATACCGACTCCTCACTGTGT', 'end': 'GTCT'})
4272
+
4273
+ m = regex.match(pattern, sequence, flags=regex.ENHANCEMATCH)
4274
+ self.assertEqual(m.span(), (0, 50))
4275
+ self.assertEqual(m.groupdict(), {'insert': 'TTCAGACGTGTGCT', 'anchor': 'CTTCC', 'umi': 'GATCT', 'sid': 'CAATACCGACTCCTCACTGTGT', 'end': 'GTCT'})
4276
+
4277
+ # Git issue 433: Disagreement between fuzzy_counts and fuzzy_changes
4278
+ pattern = r'(?P<insert>.*)(?P<anchor>AACACTGG){e<=1}(?P<umi>([AT][CG]){5}){e<=2}(?P<sid>GTAACCGAAG){e<=2}(?P<end>([ACGT]){0,6}$)'
4279
+
4280
+ sequence = 'GGAAAACACTGGTCTCAGTCTCGTAACCGAAGTGGTCG'
4281
+ m = regex.match(pattern, sequence, flags=regex.BESTMATCH)
4282
+ self.assertEqual(m.fuzzy_counts, (0, 0, 0))
4283
+ self.assertEqual(m.fuzzy_changes, ([], [], []))
4284
+
4285
+ sequence = 'GGAAAACACTGGTCTCAGTCTCGTCCCCGAAGTGGTCG'
4286
+ m = regex.match(pattern, sequence, flags=regex.BESTMATCH)
4287
+ self.assertEqual(m.fuzzy_counts, (2, 0, 0))
4288
+ self.assertEqual(m.fuzzy_changes, ([24, 25], [], []))
4289
+
4290
+ # Git issue 439: Unmatched groups: sub vs subf
4291
+ self.assertEqual(regex.sub(r'(test1)|(test2)', r'matched: \1\2', 'test1'), 'matched: test1')
4292
+ self.assertEqual(regex.subf(r'(test1)|(test2)', r'matched: {1}{2}', 'test1'), 'matched: test1')
4293
+ self.assertEqual(regex.search(r'(test1)|(test2)', 'matched: test1').expand(r'matched: \1\2'), 'matched: test1'),
4294
+ self.assertEqual(regex.search(r'(test1)|(test2)', 'matched: test1').expandf(r'matched: {1}{2}'), 'matched: test1')
4295
+
4296
+ # Git issue 442: Fuzzy regex matching doesn't seem to test insertions correctly
4297
+ self.assertEqual(regex.search(r"(?:\bha\b){i:[ ]}", "having"), None)
4298
+ self.assertEqual(regex.search(r"(?:\bha\b){i:[ ]}", "having", flags=regex.I), None)
4299
+
4300
+ # Git issue 467: Scoped inline flags 'a', 'u' and 'L' affect global flags
4301
+ self.assertEqual(regex.match(r'(?a:\w)\w', 'd\N{CYRILLIC SMALL LETTER ZHE}').span(), (0, 2))
4302
+ self.assertEqual(regex.match(r'(?a:\w)(?u:\w)', 'd\N{CYRILLIC SMALL LETTER ZHE}').span(), (0, 2))
4303
+
4304
+ # Git issue 473: Emoji classified as letter
4305
+ self.assertEqual(regex.match(r'^\p{LC}+$', '\N{SMILING CAT FACE WITH OPEN MOUTH}'), None)
4306
+ self.assertEqual(regex.match(r'^\p{So}+$', '\N{SMILING CAT FACE WITH OPEN MOUTH}').span(), (0, 1))
4307
+
4308
+ # Git issue 474: regex has no equivalent to `re.Match.groups()` for captures
4309
+ self.assertEqual(regex.match(r'(.)+', 'abc').allcaptures(), (['abc'], ['a', 'b', 'c']))
4310
+ self.assertEqual(regex.match(r'(.)+', 'abc').allspans(), ([(0, 3)], [(0, 1), (1, 2), (2, 3)]))
4311
+
4312
+ # Git issue 477: \v for vertical spacing
4313
+ self.assertEqual(bool(regex.fullmatch(r'\p{HorizSpace}+', '\t \xA0\u1680\u180E\u2000\u2001\u2002\u2003\u2004\u2005\u2006\u2007\u2008\u2009\u200A\u202F\u205F\u3000')), True)
4314
+ self.assertEqual(bool(regex.fullmatch(r'\p{VertSpace}+', '\n\v\f\r\x85\u2028\u2029')), True)
4315
+
4316
+ # Git issue 479: Segmentation fault when using conditional pattern
4317
+ self.assertEqual(regex.match(r'(?(?<=A)|(?(?![^B])C|D))', 'A'), None)
4318
+ self.assertEqual(regex.search(r'(?(?<=A)|(?(?![^B])C|D))', 'A').span(), (1, 1))
4319
+
4320
+ # Git issue 494: Backtracking failure matching regex ^a?(a?)b?c\1$ against string abca
4321
+ self.assertEqual(regex.search(r"^a?(a?)b?c\1$", "abca").span(), (0, 4))
4322
+
4323
+ # Git issue 498: Conditional negative lookahead inside positive lookahead fails to match
4324
+ self.assertEqual(regex.match(r'(?(?=a).|..)', 'ab').span(), (0, 1))
4325
+ self.assertEqual(regex.match(r'(?(?=b).|..)', 'ab').span(), (0, 2))
4326
+ self.assertEqual(regex.match(r'(?(?!a).|..)', 'ab').span(), (0, 2))
4327
+ self.assertEqual(regex.match(r'(?(?!b).|..)', 'ab').span(), (0, 1))
4328
+
4329
+ # Git issue 525: segfault when fuzzy matching empty list
4330
+ self.assertEqual(regex.match(r"(\L<foo>){e<=5}", "blah", foo=[]).span(), (0, 0))
4331
+
4332
+ # Git issue 527: `VERBOSE`/`X` flag breaks `\N` escapes
4333
+ self.assertEqual(regex.compile(r'\N{LATIN SMALL LETTER A}').match('a').span(), (0, 1))
4334
+ self.assertEqual(regex.compile(r'\N{LATIN SMALL LETTER A}', flags=regex.X).match('a').span(), (0, 1))
4335
+
4336
+ # Git issue 539: Bug: Partial matching fails on a simple example
4337
+ self.assertEqual(regex.match(r"[^/]*b/ccc", "b/ccc", partial=True).span(), (0, 5))
4338
+ self.assertEqual(regex.match(r"[^/]*b/ccc", "b/ccb", partial=True), None)
4339
+ self.assertEqual(regex.match(r"[^/]*b/ccc", "b/cc", partial=True).span(), (0, 4))
4340
+ self.assertEqual(regex.match(r"[^/]*b/xyz", "b/xy", partial=True).span(), (0, 4))
4341
+ self.assertEqual(regex.match(r"[^/]*b/xyz", "b/yz", partial=True), None)
4342
+
4343
+ self.assertEqual(regex.match(r"(?i)[^/]*b/ccc", "b/ccc", partial=True).span(), (0, 5))
4344
+ self.assertEqual(regex.match(r"(?i)[^/]*b/ccc", "b/ccb", partial=True), None)
4345
+ self.assertEqual(regex.match(r"(?i)[^/]*b/ccc", "b/cc", partial=True).span(), (0, 4))
4346
+ self.assertEqual(regex.match(r"(?i)[^/]*b/xyz", "b/xy", partial=True).span(), (0, 4))
4347
+ self.assertEqual(regex.match(r"(?i)[^/]*b/xyz", "b/yz", partial=True), None)
4348
+
4349
+ # Git issue 546: Partial match not working in some instances with non-greedy capture
4350
+ self.assertEqual(bool(regex.match(r'<thinking>.*?</thinking>', '<', partial=True)), True)
4351
+ self.assertEqual(bool(regex.match(r'<thinking>.*?</thinking>', '<thinking', partial=True)), True)
4352
+ self.assertEqual(bool(regex.match(r'<thinking>.*?</thinking>', '<thinking>', partial=True)), True)
4353
+ self.assertEqual(bool(regex.match(r'<thinking>.*?</thinking>', '<thinking>x', partial=True)), True)
4354
+ self.assertEqual(bool(regex.match(r'<thinking>.*?</thinking>', '<thinking>xyz abc', partial=True)), True)
4355
+ self.assertEqual(bool(regex.match(r'<thinking>.*?</thinking>', '<thinking>xyz abc foo', partial=True)), True)
4356
+ self.assertEqual(bool(regex.match(r'<thinking>.*?</thinking>', '<thinking>xyz abc foo ', partial=True)), True)
4357
+ self.assertEqual(bool(regex.match(r'<thinking>.*?</thinking>', '<thinking>xyz abc foo bar', partial=True)), True)
4358
+
4359
+ # Git issue 551:
4360
+ self.assertEqual(bool(regex.match(r'(?V1)[[\s\S]]', 'a')), True)
4361
+ self.assertEqual(bool(regex.match(r'(?V1)[[\s\S]-a]', 'a')), True)
4362
+ self.assertEqual(bool(regex.match(r'(?V1)[[\s\S]--a]', 'a')), False)
4363
+ self.assertEqual(bool(regex.match(r'(?V1)[[a-z]--b]', 'a')), True)
4364
+ self.assertEqual(bool(regex.match(r'(?V1)[[\s\S]--b]', 'a')), True)
4365
+ self.assertEqual(bool(regex.match(r'(?V1)[a-[\s\S]]', 'a')), True)
4366
+ self.assertEqual(bool(regex.match(r'(?V1)[a--[\s\S]]', 'a')), False)
4367
+
4368
+ self.assertEqual(regex.search(r'(?ifu)(H\N{LATIN SMALL LETTER O WITH DIAERESIS}gskolan?)[\\s\\S]*p',
4369
+ 'Yrkesh\N{LATIN SMALL LETTER O WITH DIAERESIS}gskola . Studie\N{LATIN SMALL LETTER A WITH DIAERESIS}mnen . Studie\N{LATIN SMALL LETTER A WITH DIAERESIS}mnen . Studie\N{LATIN SMALL LETTER A WITH DIAERESIS}mnen . Studie\N{LATIN SMALL LETTER A WITH DIAERESIS}mnen . Studie\N{LATIN SMALL LETTER A WITH DIAERESIS}mnen . Studie\N{LATIN SMALL LETTER A WITH DIAERESIS}mnen . Studie\N{LATIN SMALL LETTER A WITH DIAERESIS}mnen'),
4370
+ None)
4371
+
4372
+ # Git issue 572: Inline ASCII modifier doesn't seem to affect anything
4373
+ self.assertEqual(bool(regex.match(r'\d', '\uFF19')), True)
4374
+ self.assertEqual(bool(regex.match(r'(?a:\d)', '\uFF19')), False)
4375
+
4376
+ # Git issue 575: Issues with ASCII/Unicode modifiers
4377
+ self.assertEqual(regex.findall('\\d', '9\uFF19'), ['9', '\uff19'])
4378
+ self.assertEqual(regex.findall('(?u:\\d)', '9\uFF19'), ['9', '\uff19'])
4379
+ self.assertEqual(regex.findall('(?a:\\d)', '9\uFF19'), ['9'])
4380
+
4381
+ self.assertEqual(regex.findall('\\d', '9\uFF19', flags=regex.U), ['9', '\uff19'])
4382
+ self.assertEqual(regex.findall('(?u:\\d)', '9\uFF19', flags=regex.U), ['9', '\uff19'])
4383
+ self.assertEqual(regex.findall('(?a:\\d)', '9\uFF19', flags=regex.U), ['9'])
4384
+
4385
+ self.assertEqual(regex.findall('\\d', '9\uFF19', flags=regex.A), ['9'])
4386
+ self.assertEqual(regex.findall('(?u:\\d)', '9\uFF19', flags=regex.A), ['9', '\uff19'])
4387
+ self.assertEqual(regex.findall('(?a:\\d)', '9\uFF19', flags=regex.A), ['9'])
4388
+
4389
+ self.assertEqual(len(regex.findall(r'\p{L}', ''.join(chr(c) for c in range(0x100)), flags=0)), 117)
4390
+ self.assertEqual(len(regex.findall(r'\p{L}', ''.join(chr(c) for c in range(0x100)), flags=regex.A)), 52)
4391
+ self.assertEqual(len(regex.findall(r'\p{L}', ''.join(chr(c) for c in range(0x100)), flags=regex.U)), 117)
4392
+
4393
+ self.assertEqual(len(regex.findall(r'(?a:\p{L})', ''.join(chr(c) for c in range(0x100)), flags=0)), 52)
4394
+ self.assertEqual(len(regex.findall(r'(?a:\p{L})', ''.join(chr(c) for c in range(0x100)), flags=regex.A)), 52)
4395
+ self.assertEqual(len(regex.findall(r'(?a:\p{L})', ''.join(chr(c) for c in range(0x100)), flags=regex.U)), 52)
4396
+
4397
+ self.assertEqual(len(regex.findall(r'(?u:\p{L})', ''.join(chr(c) for c in range(0x100)), flags=0)), 117)
4398
+ self.assertEqual(len(regex.findall(r'(?u:\p{L})', ''.join(chr(c) for c in range(0x100)), flags=regex.A)), 117)
4399
+ self.assertEqual(len(regex.findall(r'(?u:\p{L})', ''.join(chr(c) for c in range(0x100)), flags=regex.U)), 117)
4400
+
4401
+ # Git issue 580: Regression in v2025.7.31: \P{L} no longer matches in simple patterns
4402
+ self.assertEqual(bool(regex.match(r"\A\P{L}?\p{L}", "hello,")), True)
4403
+ self.assertEqual(bool(regex.fullmatch(r"\A\P{L}*(?P<w>\p{L}+)\P{L}*\Z", "hello,")), True)
4404
+
4405
+ # Git issue 584: AttributeError: 'AnyAll' object has no attribute 'positive'
4406
+ self.assertEqual(bool(regex.compile('(\\s|\\S)')), True)
4407
+
4408
+ # Git PR 585: Fix AttributeError: 'AnyAll' object has no attribute '_key'
4409
+ self.assertEqual(bool(regex.compile('(?:[\\S\\s]|[A-D][M-Z])')), True)
4410
+
4411
+ def test_fuzzy_ext(self):
4412
+ self.assertEqual(bool(regex.fullmatch(r'(?r)(?:a){e<=1:[a-z]}', 'e')),
4413
+ True)
4414
+ self.assertEqual(bool(regex.fullmatch(r'(?:a){e<=1:[a-z]}', 'e')),
4415
+ True)
4416
+ self.assertEqual(bool(regex.fullmatch(r'(?:a){e<=1:[a-z]}', '-')),
4417
+ False)
4418
+ self.assertEqual(bool(regex.fullmatch(r'(?r)(?:a){e<=1:[a-z]}', '-')),
4419
+ False)
4420
+
4421
+ self.assertEqual(bool(regex.fullmatch(r'(?:a){e<=1:[a-z]}', 'ae')),
4422
+ True)
4423
+ self.assertEqual(bool(regex.fullmatch(r'(?r)(?:a){e<=1:[a-z]}',
4424
+ 'ae')), True)
4425
+ self.assertEqual(bool(regex.fullmatch(r'(?:a){e<=1:[a-z]}', 'a-')),
4426
+ False)
4427
+ self.assertEqual(bool(regex.fullmatch(r'(?r)(?:a){e<=1:[a-z]}',
4428
+ 'a-')), False)
4429
+
4430
+ self.assertEqual(bool(regex.fullmatch(r'(?:ab){e<=1:[a-z]}', 'ae')),
4431
+ True)
4432
+ self.assertEqual(bool(regex.fullmatch(r'(?r)(?:ab){e<=1:[a-z]}',
4433
+ 'ae')), True)
4434
+ self.assertEqual(bool(regex.fullmatch(r'(?:ab){e<=1:[a-z]}', 'a-')),
4435
+ False)
4436
+ self.assertEqual(bool(regex.fullmatch(r'(?r)(?:ab){e<=1:[a-z]}',
4437
+ 'a-')), False)
4438
+
4439
+ self.assertEqual(bool(regex.fullmatch(r'(a)\1{e<=1:[a-z]}', 'ae')),
4440
+ True)
4441
+ self.assertEqual(bool(regex.fullmatch(r'(?r)\1{e<=1:[a-z]}(a)',
4442
+ 'ea')), True)
4443
+ self.assertEqual(bool(regex.fullmatch(r'(a)\1{e<=1:[a-z]}', 'a-')),
4444
+ False)
4445
+ self.assertEqual(bool(regex.fullmatch(r'(?r)\1{e<=1:[a-z]}(a)',
4446
+ '-a')), False)
4447
+
4448
+ self.assertEqual(bool(regex.fullmatch(r'(?fiu)(?:\N{LATIN SMALL LETTER SHARP S}){e<=1:[a-z]}',
4449
+ 'ts')), True)
4450
+ self.assertEqual(bool(regex.fullmatch(r'(?fiu)(?:\N{LATIN SMALL LETTER SHARP S}){e<=1:[a-z]}',
4451
+ 'st')), True)
4452
+ self.assertEqual(bool(regex.fullmatch(r'(?firu)(?:\N{LATIN SMALL LETTER SHARP S}){e<=1:[a-z]}',
4453
+ 'st')), True)
4454
+ self.assertEqual(bool(regex.fullmatch(r'(?firu)(?:\N{LATIN SMALL LETTER SHARP S}){e<=1:[a-z]}',
4455
+ 'ts')), True)
4456
+ self.assertEqual(bool(regex.fullmatch(r'(?fiu)(?:\N{LATIN SMALL LETTER SHARP S}){e<=1:[a-z]}',
4457
+ '-s')), False)
4458
+ self.assertEqual(bool(regex.fullmatch(r'(?fiu)(?:\N{LATIN SMALL LETTER SHARP S}){e<=1:[a-z]}',
4459
+ 's-')), False)
4460
+ self.assertEqual(bool(regex.fullmatch(r'(?firu)(?:\N{LATIN SMALL LETTER SHARP S}){e<=1:[a-z]}',
4461
+ 's-')), False)
4462
+ self.assertEqual(bool(regex.fullmatch(r'(?firu)(?:\N{LATIN SMALL LETTER SHARP S}){e<=1:[a-z]}',
4463
+ '-s')), False)
4464
+
4465
+ self.assertEqual(bool(regex.fullmatch(r'(?fiu)(\N{LATIN SMALL LETTER SHARP S})\1{e<=1:[a-z]}',
4466
+ 'ssst')), True)
4467
+ self.assertEqual(bool(regex.fullmatch(r'(?fiu)(\N{LATIN SMALL LETTER SHARP S})\1{e<=1:[a-z]}',
4468
+ 'ssts')), True)
4469
+ self.assertEqual(bool(regex.fullmatch(r'(?firu)\1{e<=1:[a-z]}(\N{LATIN SMALL LETTER SHARP S})',
4470
+ 'stss')), True)
4471
+ self.assertEqual(bool(regex.fullmatch(r'(?firu)\1{e<=1:[a-z]}(\N{LATIN SMALL LETTER SHARP S})',
4472
+ 'tsss')), True)
4473
+ self.assertEqual(bool(regex.fullmatch(r'(?fiu)(\N{LATIN SMALL LETTER SHARP S})\1{e<=1:[a-z]}',
4474
+ 'ss-s')), False)
4475
+ self.assertEqual(bool(regex.fullmatch(r'(?fiu)(\N{LATIN SMALL LETTER SHARP S})\1{e<=1:[a-z]}',
4476
+ 'sss-')), False)
4477
+ self.assertEqual(bool(regex.fullmatch(r'(?firu)(\N{LATIN SMALL LETTER SHARP S})\1{e<=1:[a-z]}',
4478
+ '-s')), False)
4479
+ self.assertEqual(bool(regex.fullmatch(r'(?firu)(\N{LATIN SMALL LETTER SHARP S})\1{e<=1:[a-z]}',
4480
+ 's-')), False)
4481
+
4482
+ self.assertEqual(bool(regex.fullmatch(r'(?fiu)(ss)\1{e<=1:[a-z]}',
4483
+ '\N{LATIN SMALL LETTER SHARP S}ts')), True)
4484
+ self.assertEqual(bool(regex.fullmatch(r'(?fiu)(ss)\1{e<=1:[a-z]}',
4485
+ '\N{LATIN SMALL LETTER SHARP S}st')), True)
4486
+ self.assertEqual(bool(regex.fullmatch(r'(?firu)\1{e<=1:[a-z]}(ss)',
4487
+ 'st\N{LATIN SMALL LETTER SHARP S}')), True)
4488
+ self.assertEqual(bool(regex.fullmatch(r'(?firu)\1{e<=1:[a-z]}(ss)',
4489
+ 'ts\N{LATIN SMALL LETTER SHARP S}')), True)
4490
+ self.assertEqual(bool(regex.fullmatch(r'(?fiu)(ss)\1{e<=1:[a-z]}',
4491
+ '\N{LATIN SMALL LETTER SHARP S}-s')), False)
4492
+ self.assertEqual(bool(regex.fullmatch(r'(?fiu)(ss)\1{e<=1:[a-z]}',
4493
+ '\N{LATIN SMALL LETTER SHARP S}s-')), False)
4494
+ self.assertEqual(bool(regex.fullmatch(r'(?firu)(ss)\1{e<=1:[a-z]}',
4495
+ 's-\N{LATIN SMALL LETTER SHARP S}')), False)
4496
+ self.assertEqual(bool(regex.fullmatch(r'(?firu)(ss)\1{e<=1:[a-z]}',
4497
+ '-s\N{LATIN SMALL LETTER SHARP S}')), False)
4498
+
4499
+ def test_subscripted_captures(self):
4500
+ self.assertEqual(regex.match(r'(?P<x>.)+',
4501
+ 'abc').expandf('{0} {0[0]} {0[-1]}'), 'abc abc abc')
4502
+ self.assertEqual(regex.match(r'(?P<x>.)+',
4503
+ 'abc').expandf('{1} {1[0]} {1[1]} {1[2]} {1[-1]} {1[-2]} {1[-3]}'),
4504
+ 'c a b c c b a')
4505
+ self.assertEqual(regex.match(r'(?P<x>.)+',
4506
+ 'abc').expandf('{x} {x[0]} {x[1]} {x[2]} {x[-1]} {x[-2]} {x[-3]}'),
4507
+ 'c a b c c b a')
4508
+
4509
+ self.assertEqual(regex.subf(r'(?P<x>.)+', r'{0} {0[0]} {0[-1]}',
4510
+ 'abc'), 'abc abc abc')
4511
+ self.assertEqual(regex.subf(r'(?P<x>.)+',
4512
+ '{1} {1[0]} {1[1]} {1[2]} {1[-1]} {1[-2]} {1[-3]}', 'abc'),
4513
+ 'c a b c c b a')
4514
+ self.assertEqual(regex.subf(r'(?P<x>.)+',
4515
+ '{x} {x[0]} {x[1]} {x[2]} {x[-1]} {x[-2]} {x[-3]}', 'abc'),
4516
+ 'c a b c c b a')
4517
+
4518
+ def test_more_zerowidth(self):
4519
+ if sys.version_info >= (3, 7, 0):
4520
+ self.assertEqual(regex.split(r'\b|:+', 'a::bc'), ['', 'a', '', '',
4521
+ 'bc', ''])
4522
+ self.assertEqual(regex.sub(r'\b|:+', '-', 'a::bc'), '-a---bc-')
4523
+ self.assertEqual(regex.findall(r'\b|:+', 'a::bc'), ['', '', '::',
4524
+ '', ''])
4525
+ self.assertEqual([m.span() for m in regex.finditer(r'\b|:+',
4526
+ 'a::bc')], [(0, 0), (1, 1), (1, 3), (3, 3), (5, 5)])
4527
+ self.assertEqual([m.span() for m in regex.finditer(r'(?m)^\s*?$',
4528
+ 'foo\n\n\nbar')], [(4, 4), (4, 5), (5, 5)])
4529
+
4530
+ def test_line_ending(self):
4531
+ self.assertEqual(regex.findall(r'\R', '\r\n\n\x0B\f\r\x85\u2028\u2029'),
4532
+ ['\r\n', '\n', '\x0B', '\f', '\r', '\x85', '\u2028', '\u2029'])
4533
+ self.assertEqual(regex.findall(br'\R', b'\r\n\n\x0B\f\r\x85'), [b'\r\n',
4534
+ b'\n', b'\x0B', b'\f', b'\r'])
4535
+
4536
+ def test_main():
4537
+ unittest.main(verbosity=2)
4538
+
4539
+ if __name__ == "__main__":
4540
+ test_main()