regextest 0.1.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.gitignore +11 -0
- data/.rspec +2 -0
- data/.travis.yml +3 -0
- data/Gemfile +4 -0
- data/LICENSE.txt +25 -0
- data/README.md +88 -0
- data/Rakefile +55 -0
- data/bin/console +14 -0
- data/bin/regextest +4 -0
- data/bin/setup +7 -0
- data/contrib/Onigmo/RE.txt +522 -0
- data/contrib/Onigmo/UnicodeProps.txt +728 -0
- data/contrib/Onigmo/testpy.py +1319 -0
- data/contrib/unicode/Blocks.txt +298 -0
- data/contrib/unicode/CaseFolding.txt +1414 -0
- data/contrib/unicode/DerivedAge.txt +1538 -0
- data/contrib/unicode/DerivedCoreProperties.txt +11029 -0
- data/contrib/unicode/PropList.txt +1525 -0
- data/contrib/unicode/PropertyAliases.txt +193 -0
- data/contrib/unicode/PropertyValueAliases.txt +1420 -0
- data/contrib/unicode/README.txt +25 -0
- data/contrib/unicode/Scripts.txt +2539 -0
- data/contrib/unicode/UnicodeData.txt +29215 -0
- data/lib/pre-case-folding.rb +101 -0
- data/lib/pre-posix-char-class.rb +150 -0
- data/lib/pre-unicode.rb +116 -0
- data/lib/regextest.rb +268 -0
- data/lib/regextest/back.rb +58 -0
- data/lib/regextest/back/element.rb +151 -0
- data/lib/regextest/back/main.rb +356 -0
- data/lib/regextest/back/result.rb +498 -0
- data/lib/regextest/back/test-case.rb +268 -0
- data/lib/regextest/back/work-thread.rb +119 -0
- data/lib/regextest/common.rb +63 -0
- data/lib/regextest/front.rb +60 -0
- data/lib/regextest/front/anchor.rb +45 -0
- data/lib/regextest/front/back-refer.rb +120 -0
- data/lib/regextest/front/bracket-parser.rb +400 -0
- data/lib/regextest/front/bracket-parser.y +117 -0
- data/lib/regextest/front/bracket-scanner.rb +124 -0
- data/lib/regextest/front/bracket.rb +64 -0
- data/lib/regextest/front/builtin-functions.rb +31 -0
- data/lib/regextest/front/case-folding.rb +18 -0
- data/lib/regextest/front/char-class.rb +243 -0
- data/lib/regextest/front/empty.rb +43 -0
- data/lib/regextest/front/letter.rb +327 -0
- data/lib/regextest/front/manage-parentheses.rb +74 -0
- data/lib/regextest/front/parenthesis.rb +153 -0
- data/lib/regextest/front/parser.rb +1366 -0
- data/lib/regextest/front/parser.y +271 -0
- data/lib/regextest/front/range.rb +60 -0
- data/lib/regextest/front/repeat.rb +90 -0
- data/lib/regextest/front/repeatable.rb +77 -0
- data/lib/regextest/front/scanner.rb +187 -0
- data/lib/regextest/front/selectable.rb +65 -0
- data/lib/regextest/front/sequence.rb +73 -0
- data/lib/regextest/front/unicode.rb +1272 -0
- data/lib/regextest/regex-option.rb +144 -0
- data/lib/regextest/regexp.rb +44 -0
- data/lib/regextest/version.rb +5 -0
- data/lib/tst-reg-test.rb +159 -0
- data/regextest.gemspec +26 -0
- metadata +162 -0
@@ -0,0 +1,1319 @@
|
|
1
|
+
#!/usr/bin/env python
|
2
|
+
# -*- coding: utf-8 -*-
|
3
|
+
|
4
|
+
from __future__ import print_function, unicode_literals
|
5
|
+
from ctypes import *
|
6
|
+
import onig
|
7
|
+
import sys
|
8
|
+
import io
|
9
|
+
import locale
|
10
|
+
|
11
|
+
nerror = 0
|
12
|
+
nsucc = 0
|
13
|
+
nfail = 0
|
14
|
+
|
15
|
+
# default encoding
|
16
|
+
onig_encoding = onig.ONIG_ENCODING_EUC_JP
|
17
|
+
encoding = onig_encoding[0].name.decode()
|
18
|
+
|
19
|
+
# special syntactic settings
|
20
|
+
_syntax_default = onig.OnigSyntaxType()
|
21
|
+
onig.onig_copy_syntax(byref(_syntax_default), onig.ONIG_SYNTAX_DEFAULT)
|
22
|
+
_syntax_default.options &= ~onig.ONIG_OPTION_ASCII_RANGE
|
23
|
+
syntax_default = byref(_syntax_default)
|
24
|
+
|
25
|
+
|
26
|
+
class strptr:
|
27
|
+
"""a helper class to get a pointer to a string"""
|
28
|
+
def __init__(self, s):
|
29
|
+
if not isinstance(s, bytes):
|
30
|
+
raise TypeError
|
31
|
+
self._str = s
|
32
|
+
try:
|
33
|
+
self._ptr = cast(self._str, c_void_p) # CPython 2.x/3.x
|
34
|
+
except TypeError:
|
35
|
+
self._ptr = c_void_p(self._str) # PyPy 1.x
|
36
|
+
|
37
|
+
def getptr(self, offset=0):
|
38
|
+
if offset == -1: # -1 means the end of the string
|
39
|
+
offset = len(self._str)
|
40
|
+
elif offset > len(self._str):
|
41
|
+
raise IndexError
|
42
|
+
return self._ptr.value + offset
|
43
|
+
|
44
|
+
def cc_to_cb(s, enc, cc):
|
45
|
+
"""convert char count to byte count
|
46
|
+
|
47
|
+
arguments:
|
48
|
+
s -- unicode string
|
49
|
+
enc -- encoding name
|
50
|
+
cc -- char count
|
51
|
+
"""
|
52
|
+
s = s.encode('UTF-32LE')
|
53
|
+
clen = cc * 4
|
54
|
+
if clen > len(s):
|
55
|
+
raise IndexError
|
56
|
+
return len(s[:clen].decode('UTF-32LE').encode(enc))
|
57
|
+
|
58
|
+
def print_result(result, pattern, file=None):
|
59
|
+
if not file:
|
60
|
+
file = sys.stdout
|
61
|
+
print(result + ": ", end='', file=file)
|
62
|
+
try:
|
63
|
+
print(pattern, file=file)
|
64
|
+
except UnicodeEncodeError as e:
|
65
|
+
print('(' + str(e) + ')')
|
66
|
+
|
67
|
+
def xx(pattern, target, s_from, s_to, mem, not_match,
|
68
|
+
syn=syntax_default, opt=onig.ONIG_OPTION_DEFAULT,
|
69
|
+
err=onig.ONIG_NORMAL):
|
70
|
+
global nerror
|
71
|
+
global nsucc
|
72
|
+
global nfail
|
73
|
+
|
74
|
+
reg = onig.OnigRegex()
|
75
|
+
einfo = onig.OnigErrorInfo()
|
76
|
+
msg = create_string_buffer(onig.ONIG_MAX_ERROR_MESSAGE_LEN)
|
77
|
+
|
78
|
+
pattern2 = pattern
|
79
|
+
if not isinstance(pattern, bytes):
|
80
|
+
pattern2 = pattern.encode(encoding)
|
81
|
+
patternp = strptr(pattern2)
|
82
|
+
|
83
|
+
target2 = target
|
84
|
+
if not isinstance(target, bytes):
|
85
|
+
s_from = cc_to_cb(target, encoding, s_from)
|
86
|
+
s_to = cc_to_cb(target, encoding, s_to)
|
87
|
+
target2 = target.encode(encoding)
|
88
|
+
targetp = strptr(target2)
|
89
|
+
|
90
|
+
# cut very long outputs
|
91
|
+
limit = 100
|
92
|
+
if len(target) > limit:
|
93
|
+
target = target[:limit] + "..."
|
94
|
+
if len(pattern) > limit:
|
95
|
+
pattern = pattern[:limit] + "..."
|
96
|
+
|
97
|
+
r = onig.onig_new(byref(reg), patternp.getptr(), patternp.getptr(-1),
|
98
|
+
opt, onig_encoding, syn, byref(einfo));
|
99
|
+
if r != 0:
|
100
|
+
onig.onig_error_code_to_str(msg, r, byref(einfo))
|
101
|
+
if r == err:
|
102
|
+
nsucc += 1
|
103
|
+
print_result("OK(E)", "%s (/%s/ '%s')" % \
|
104
|
+
(msg.value.decode(), pattern, target))
|
105
|
+
else:
|
106
|
+
nerror += 1
|
107
|
+
print_result("ERROR", "%s (/%s/ '%s')" % \
|
108
|
+
(msg.value.decode(), pattern, target), file=sys.stderr)
|
109
|
+
return
|
110
|
+
|
111
|
+
if err != onig.ONIG_NORMAL:
|
112
|
+
nfail += 1
|
113
|
+
print_result("FAIL(E)", "/%s/ '%s'" % (pattern, target))
|
114
|
+
onig.onig_free(reg)
|
115
|
+
return
|
116
|
+
|
117
|
+
region = onig.onig_region_new()
|
118
|
+
r = onig.onig_search(reg, targetp.getptr(), targetp.getptr(-1),
|
119
|
+
targetp.getptr(), targetp.getptr(-1),
|
120
|
+
region, onig.ONIG_OPTION_NONE);
|
121
|
+
if r < onig.ONIG_MISMATCH:
|
122
|
+
onig.onig_error_code_to_str(msg, r)
|
123
|
+
if r == err:
|
124
|
+
nsucc += 1
|
125
|
+
print_result("OK(E)", "%s (/%s/ '%s')" % \
|
126
|
+
(msg.value.decode(), pattern, target))
|
127
|
+
else:
|
128
|
+
nerror += 1
|
129
|
+
print_result("ERROR", "%s (/%s/ '%s')" % \
|
130
|
+
(msg.value.decode(), pattern, target), file=sys.stderr)
|
131
|
+
onig.onig_region_free(region, 1)
|
132
|
+
return
|
133
|
+
|
134
|
+
if r == onig.ONIG_MISMATCH:
|
135
|
+
if not_match:
|
136
|
+
nsucc += 1
|
137
|
+
print_result("OK(N)", "/%s/ '%s'" % (pattern, target))
|
138
|
+
else:
|
139
|
+
nfail += 1
|
140
|
+
print_result("FAIL", "/%s/ '%s'" % (pattern, target))
|
141
|
+
else:
|
142
|
+
if not_match:
|
143
|
+
nfail += 1
|
144
|
+
print_result("FAIL(N)", "/%s/ '%s'" % (pattern, target))
|
145
|
+
else:
|
146
|
+
start = region[0].beg[mem]
|
147
|
+
end = region[0].end[mem]
|
148
|
+
if (start == s_from) and (end == s_to):
|
149
|
+
nsucc += 1
|
150
|
+
print_result("OK", "/%s/ '%s'" % (pattern, target))
|
151
|
+
else:
|
152
|
+
nfail += 1
|
153
|
+
print_result("FAIL", "/%s/ '%s' %d-%d : %d-%d" % (pattern, target,
|
154
|
+
s_from, s_to, start, end))
|
155
|
+
onig.onig_free(reg)
|
156
|
+
onig.onig_region_free(region, 1)
|
157
|
+
|
158
|
+
def x2(pattern, target, s_from, s_to, **kwargs):
|
159
|
+
xx(pattern, target, s_from, s_to, 0, False, **kwargs)
|
160
|
+
|
161
|
+
def x3(pattern, target, s_from, s_to, mem, **kwargs):
|
162
|
+
xx(pattern, target, s_from, s_to, mem, False, **kwargs)
|
163
|
+
|
164
|
+
def n(pattern, target, **kwargs):
|
165
|
+
xx(pattern, target, 0, 0, 0, True, **kwargs)
|
166
|
+
|
167
|
+
|
168
|
+
def is_unicode_encoding(enc):
|
169
|
+
return enc in (onig.ONIG_ENCODING_UTF32_LE,
|
170
|
+
onig.ONIG_ENCODING_UTF32_BE,
|
171
|
+
onig.ONIG_ENCODING_UTF16_LE,
|
172
|
+
onig.ONIG_ENCODING_UTF16_BE,
|
173
|
+
onig.ONIG_ENCODING_UTF8)
|
174
|
+
|
175
|
+
|
176
|
+
def set_encoding(enc):
|
177
|
+
global onig_encoding
|
178
|
+
global encoding
|
179
|
+
|
180
|
+
if isinstance(enc, onig.OnigEncoding):
|
181
|
+
onig_encoding = enc
|
182
|
+
else:
|
183
|
+
encs = {"EUC-JP": onig.ONIG_ENCODING_EUC_JP,
|
184
|
+
"SJIS": onig.ONIG_ENCODING_SJIS,
|
185
|
+
"UTF-8": onig.ONIG_ENCODING_UTF8,
|
186
|
+
"UTF-16LE": onig.ONIG_ENCODING_UTF16_LE,
|
187
|
+
"UTF-16BE": onig.ONIG_ENCODING_UTF16_BE,
|
188
|
+
"UTF-32LE": onig.ONIG_ENCODING_UTF32_LE,
|
189
|
+
"UTF-32BE": onig.ONIG_ENCODING_UTF32_BE}
|
190
|
+
onig_encoding = encs[enc]
|
191
|
+
encoding = onig_encoding[0].name.decode()
|
192
|
+
|
193
|
+
|
194
|
+
def set_output_encoding(enc=None):
|
195
|
+
if enc is None:
|
196
|
+
enc = locale.getpreferredencoding()
|
197
|
+
|
198
|
+
def get_text_writer(fo, **kwargs):
|
199
|
+
kw = dict(kwargs)
|
200
|
+
kw.setdefault('errors', 'backslashreplace') # use \uXXXX style
|
201
|
+
kw.setdefault('closefd', False)
|
202
|
+
writer = io.open(fo.fileno(), mode='w', **kw)
|
203
|
+
|
204
|
+
# work around for Python 2.x
|
205
|
+
write = writer.write # save the original write() function
|
206
|
+
enc = locale.getpreferredencoding()
|
207
|
+
writer.write = lambda s: write(s.decode(enc)) \
|
208
|
+
if isinstance(s, bytes) else write(s) # convert to unistr
|
209
|
+
return writer
|
210
|
+
|
211
|
+
sys.stdout = get_text_writer(sys.stdout, encoding=enc)
|
212
|
+
sys.stderr = get_text_writer(sys.stderr, encoding=enc)
|
213
|
+
|
214
|
+
|
215
|
+
def main():
|
216
|
+
# set encoding of the test target
|
217
|
+
if len(sys.argv) > 1:
|
218
|
+
try:
|
219
|
+
set_encoding(sys.argv[1])
|
220
|
+
except KeyError:
|
221
|
+
print("test target encoding error")
|
222
|
+
print("Usage: python testpy.py [test target encoding] [output encoding]")
|
223
|
+
sys.exit()
|
224
|
+
|
225
|
+
# set encoding of stdout/stderr
|
226
|
+
outenc = None
|
227
|
+
if len(sys.argv) > 2:
|
228
|
+
outenc = sys.argv[2]
|
229
|
+
set_output_encoding(outenc)
|
230
|
+
|
231
|
+
# Copied from onig-5.9.2/testc.c
|
232
|
+
# '?\?' which is used to avoid trigraph is replaced by '??'.
|
233
|
+
# Match positions are specified by unit of character instead of byte.
|
234
|
+
|
235
|
+
x2("", "", 0, 0);
|
236
|
+
x2("^", "", 0, 0);
|
237
|
+
x2("$", "", 0, 0);
|
238
|
+
x2("\\G", "", 0, 0);
|
239
|
+
x2("\\A", "", 0, 0);
|
240
|
+
x2("\\Z", "", 0, 0);
|
241
|
+
x2("\\z", "", 0, 0);
|
242
|
+
x2("^$", "", 0, 0);
|
243
|
+
x2("\\ca", "\001", 0, 1);
|
244
|
+
x2("\\C-b", "\002", 0, 1);
|
245
|
+
x2("\\c\\\\", "\034", 0, 1);
|
246
|
+
x2("q[\\c\\\\]", "q\034", 0, 2);
|
247
|
+
x2("", "a", 0, 0);
|
248
|
+
x2("a", "a", 0, 1);
|
249
|
+
if onig_encoding == onig.ONIG_ENCODING_UTF16_LE:
|
250
|
+
x2("\\x61\\x00", "a", 0, 1);
|
251
|
+
elif onig_encoding == onig.ONIG_ENCODING_UTF16_BE:
|
252
|
+
x2("\\x00\\x61", "a", 0, 1);
|
253
|
+
elif onig_encoding == onig.ONIG_ENCODING_UTF32_LE:
|
254
|
+
x2("\\x61\\x00\\x00\\x00", "a", 0, 1);
|
255
|
+
elif onig_encoding == onig.ONIG_ENCODING_UTF32_BE:
|
256
|
+
x2("\\x00\\x00\\x00\\x61", "a", 0, 1);
|
257
|
+
else:
|
258
|
+
x2("\\x61", "a", 0, 1);
|
259
|
+
x2("aa", "aa", 0, 2);
|
260
|
+
x2("aaa", "aaa", 0, 3);
|
261
|
+
x2("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", 0, 35);
|
262
|
+
x2("ab", "ab", 0, 2);
|
263
|
+
x2("b", "ab", 1, 2);
|
264
|
+
x2("bc", "abc", 1, 3);
|
265
|
+
x2("(?i:#RET#)", "#INS##RET#", 5, 10);
|
266
|
+
if onig_encoding == onig.ONIG_ENCODING_UTF16_LE:
|
267
|
+
x2("\\17\\00", "\017", 0, 1);
|
268
|
+
x2("\\x1f\\x00", "\x1f", 0, 1);
|
269
|
+
elif onig_encoding == onig.ONIG_ENCODING_UTF16_BE:
|
270
|
+
x2("\\00\\17", "\017", 0, 1);
|
271
|
+
x2("\\x00\\x1f", "\x1f", 0, 1);
|
272
|
+
elif onig_encoding == onig.ONIG_ENCODING_UTF32_LE:
|
273
|
+
x2("\\17\\00\\00\\00", "\017", 0, 1);
|
274
|
+
x2("\\x1f\\x00\\x00\\x00", "\x1f", 0, 1);
|
275
|
+
elif onig_encoding == onig.ONIG_ENCODING_UTF32_BE:
|
276
|
+
x2("\\00\\00\\00\\17", "\017", 0, 1);
|
277
|
+
x2("\\x00\\x00\\x00\\x1f", "\x1f", 0, 1);
|
278
|
+
else:
|
279
|
+
x2("\\17", "\017", 0, 1);
|
280
|
+
x2("\\x1f", "\x1f", 0, 1);
|
281
|
+
x2("a(?#....\\\\JJJJ)b", "ab", 0, 2);
|
282
|
+
x2("(?x) G (o O(?-x)oO) g L", "GoOoOgLe", 0, 7);
|
283
|
+
x2(".", "a", 0, 1);
|
284
|
+
n(".", "");
|
285
|
+
x2("..", "ab", 0, 2);
|
286
|
+
x2("\\w", "e", 0, 1);
|
287
|
+
n("\\W", "e");
|
288
|
+
x2("\\s", " ", 0, 1);
|
289
|
+
x2("\\S", "b", 0, 1);
|
290
|
+
x2("\\d", "4", 0, 1);
|
291
|
+
n("\\D", "4");
|
292
|
+
x2("\\b", "z ", 0, 0);
|
293
|
+
x2("\\b", " z", 1, 1);
|
294
|
+
x2("\\B", "zz ", 1, 1);
|
295
|
+
x2("\\B", "z ", 2, 2);
|
296
|
+
x2("\\B", " z", 0, 0);
|
297
|
+
x2("[ab]", "b", 0, 1);
|
298
|
+
n("[ab]", "c");
|
299
|
+
x2("[a-z]", "t", 0, 1);
|
300
|
+
n("[^a]", "a");
|
301
|
+
x2("[^a]", "\n", 0, 1);
|
302
|
+
x2("[]]", "]", 0, 1);
|
303
|
+
n("[^]]", "]");
|
304
|
+
x2("[\\^]+", "0^^1", 1, 3);
|
305
|
+
x2("[b-]", "b", 0, 1);
|
306
|
+
x2("[b-]", "-", 0, 1);
|
307
|
+
x2("[\\w]", "z", 0, 1);
|
308
|
+
n("[\\w]", " ");
|
309
|
+
x2("[\\W]", "b$", 1, 2);
|
310
|
+
x2("[\\d]", "5", 0, 1);
|
311
|
+
n("[\\d]", "e");
|
312
|
+
x2("[\\D]", "t", 0, 1);
|
313
|
+
n("[\\D]", "3");
|
314
|
+
x2("[\\s]", " ", 0, 1);
|
315
|
+
n("[\\s]", "a");
|
316
|
+
x2("[\\S]", "b", 0, 1);
|
317
|
+
n("[\\S]", " ");
|
318
|
+
x2("[\\w\\d]", "2", 0, 1);
|
319
|
+
n("[\\w\\d]", " ");
|
320
|
+
x2("[[:upper:]]", "B", 0, 1);
|
321
|
+
x2("[*[:xdigit:]+]", "+", 0, 1);
|
322
|
+
x2("[*[:xdigit:]+]", "GHIKK-9+*", 6, 7);
|
323
|
+
x2("[*[:xdigit:]+]", "-@^+", 3, 4);
|
324
|
+
n("[[:upper]]", "A");
|
325
|
+
x2("[[:upper]]", ":", 0, 1);
|
326
|
+
if onig_encoding == onig.ONIG_ENCODING_UTF16_LE:
|
327
|
+
x2("[\\044\\000-\\047\\000]", "\046", 0, 1);
|
328
|
+
x2("[\\x5a\\x00-\\x5c\\x00]", "\x5b", 0, 1);
|
329
|
+
x2("[\\x6A\\x00-\\x6D\\x00]", "\x6c", 0, 1);
|
330
|
+
n("[\\x6A\\x00-\\x6D\\x00]", "\x6E");
|
331
|
+
elif onig_encoding == onig.ONIG_ENCODING_UTF16_BE:
|
332
|
+
x2("[\\000\\044-\\000\\047]", "\046", 0, 1);
|
333
|
+
x2("[\\x00\\x5a-\\x00\\x5c]", "\x5b", 0, 1);
|
334
|
+
x2("[\\x00\\x6A-\\x00\\x6D]", "\x6c", 0, 1);
|
335
|
+
n("[\\x00\\x6A-\\x00\\x6D]", "\x6E");
|
336
|
+
elif onig_encoding == onig.ONIG_ENCODING_UTF32_LE:
|
337
|
+
x2("[\\044\\000\\000\\000-\\047\\000\\000\\000]", "\046", 0, 1);
|
338
|
+
x2("[\\x5a\\x00\\x00\\x00-\\x5c\\x00\\x00\\x00]", "\x5b", 0, 1);
|
339
|
+
x2("[\\x6A\\x00\\x00\\x00-\\x6D\\x00\\x00\\x00]", "\x6c", 0, 1);
|
340
|
+
n("[\\x6A\\x00\\x00\\x00-\\x6D\\x00\\x00\\x00]", "\x6E");
|
341
|
+
elif onig_encoding == onig.ONIG_ENCODING_UTF32_BE:
|
342
|
+
x2("[\\000\\000\\000\\044-\\000\\000\\000\\047]", "\046", 0, 1);
|
343
|
+
x2("[\\x00\\x00\\x00\\x5a-\\x00\\x00\\x00\\x5c]", "\x5b", 0, 1);
|
344
|
+
x2("[\\x00\\x00\\x00\\x6A-\\x00\\x00\\x00\\x6D]", "\x6c", 0, 1);
|
345
|
+
n("[\\x00\\x00\\x00\\x6A-\\x00\\x00\\x00\\x6D]", "\x6E");
|
346
|
+
else:
|
347
|
+
x2("[\\044-\\047]", "\046", 0, 1);
|
348
|
+
x2("[\\x5a-\\x5c]", "\x5b", 0, 1);
|
349
|
+
x2("[\\x6A-\\x6D]", "\x6c", 0, 1);
|
350
|
+
n("[\\x6A-\\x6D]", "\x6E");
|
351
|
+
n("^[0-9A-F]+ 0+ UNDEF ", "75F 00000000 SECT14A notype () External | _rb_apply");
|
352
|
+
x2("[\\[]", "[", 0, 1);
|
353
|
+
x2("[\\]]", "]", 0, 1);
|
354
|
+
x2("[&]", "&", 0, 1);
|
355
|
+
x2("[[ab]]", "b", 0, 1);
|
356
|
+
x2("[[ab]c]", "c", 0, 1);
|
357
|
+
n("[[^a]]", "a");
|
358
|
+
n("[^[a]]", "a");
|
359
|
+
x2("[[ab]&&bc]", "b", 0, 1);
|
360
|
+
n("[[ab]&&bc]", "a");
|
361
|
+
n("[[ab]&&bc]", "c");
|
362
|
+
x2("[a-z&&b-y&&c-x]", "w", 0, 1);
|
363
|
+
n("[^a-z&&b-y&&c-x]", "w");
|
364
|
+
x2("[[^a&&a]&&a-z]", "b", 0, 1);
|
365
|
+
n("[[^a&&a]&&a-z]", "a");
|
366
|
+
x2("[[^a-z&&bcdef]&&[^c-g]]", "h", 0, 1);
|
367
|
+
n("[[^a-z&&bcdef]&&[^c-g]]", "c");
|
368
|
+
x2("[^[^abc]&&[^cde]]", "c", 0, 1);
|
369
|
+
x2("[^[^abc]&&[^cde]]", "e", 0, 1);
|
370
|
+
n("[^[^abc]&&[^cde]]", "f");
|
371
|
+
x2("[a-&&-a]", "-", 0, 1);
|
372
|
+
n("[a\\-&&\\-a]", "&");
|
373
|
+
n("\\wabc", " abc");
|
374
|
+
x2("a\\Wbc", "a bc", 0, 4);
|
375
|
+
x2("a.b.c", "aabbc", 0, 5);
|
376
|
+
x2(".\\wb\\W..c", "abb bcc", 0, 7);
|
377
|
+
x2("\\s\\wzzz", " zzzz", 0, 5);
|
378
|
+
x2("aa.b", "aabb", 0, 4);
|
379
|
+
n(".a", "ab");
|
380
|
+
x2(".a", "aa", 0, 2);
|
381
|
+
x2("^a", "a", 0, 1);
|
382
|
+
x2("^a$", "a", 0, 1);
|
383
|
+
x2("^\\w$", "a", 0, 1);
|
384
|
+
n("^\\w$", " ");
|
385
|
+
x2("^\\wab$", "zab", 0, 3);
|
386
|
+
x2("^\\wabcdef$", "zabcdef", 0, 7);
|
387
|
+
x2("^\\w...def$", "zabcdef", 0, 7);
|
388
|
+
x2("\\w\\w\\s\\Waaa\\d", "aa aaa4", 0, 8);
|
389
|
+
x2("\\A\\Z", "", 0, 0);
|
390
|
+
x2("\\Axyz", "xyz", 0, 3);
|
391
|
+
x2("xyz\\Z", "xyz", 0, 3);
|
392
|
+
x2("xyz\\z", "xyz", 0, 3);
|
393
|
+
x2("a\\Z", "a", 0, 1);
|
394
|
+
x2("\\Gaz", "az", 0, 2);
|
395
|
+
n("\\Gz", "bza");
|
396
|
+
n("az\\G", "az");
|
397
|
+
n("az\\A", "az");
|
398
|
+
n("a\\Az", "az");
|
399
|
+
x2("\\^\\$", "^$", 0, 2);
|
400
|
+
x2("^x?y", "xy", 0, 2);
|
401
|
+
x2("^(x?y)", "xy", 0, 2);
|
402
|
+
x2("\\w", "_", 0, 1);
|
403
|
+
n("\\W", "_");
|
404
|
+
x2("(?=z)z", "z", 0, 1);
|
405
|
+
n("(?=z).", "a");
|
406
|
+
x2("(?!z)a", "a", 0, 1);
|
407
|
+
n("(?!z)a", "z");
|
408
|
+
x2("(?i:a)", "a", 0, 1);
|
409
|
+
x2("(?i:a)", "A", 0, 1);
|
410
|
+
x2("(?i:A)", "a", 0, 1);
|
411
|
+
n("(?i:A)", "b");
|
412
|
+
x2("(?i:[A-Z])", "a", 0, 1);
|
413
|
+
x2("(?i:[f-m])", "H", 0, 1);
|
414
|
+
x2("(?i:[f-m])", "h", 0, 1);
|
415
|
+
n("(?i:[f-m])", "e");
|
416
|
+
x2("(?i:[A-c])", "D", 0, 1);
|
417
|
+
n("(?i:[^a-z])", "A");
|
418
|
+
n("(?i:[^a-z])", "a");
|
419
|
+
x2("(?i:[!-k])", "Z", 0, 1);
|
420
|
+
x2("(?i:[!-k])", "7", 0, 1);
|
421
|
+
x2("(?i:[T-}])", "b", 0, 1);
|
422
|
+
x2("(?i:[T-}])", "{", 0, 1);
|
423
|
+
x2("(?i:\\?a)", "?A", 0, 2);
|
424
|
+
x2("(?i:\\*A)", "*a", 0, 2);
|
425
|
+
n(".", "\n");
|
426
|
+
x2("(?m:.)", "\n", 0, 1);
|
427
|
+
x2("(?m:a.)", "a\n", 0, 2);
|
428
|
+
x2("(?m:.b)", "a\nb", 1, 3);
|
429
|
+
x2(".*abc", "dddabdd\nddabc", 8, 13);
|
430
|
+
x2("(?m:.*abc)", "dddabddabc", 0, 10);
|
431
|
+
n("(?i)(?-i)a", "A");
|
432
|
+
n("(?i)(?-i:a)", "A");
|
433
|
+
x2("a?", "", 0, 0);
|
434
|
+
x2("a?", "b", 0, 0);
|
435
|
+
x2("a?", "a", 0, 1);
|
436
|
+
x2("a*", "", 0, 0);
|
437
|
+
x2("a*", "a", 0, 1);
|
438
|
+
x2("a*", "aaa", 0, 3);
|
439
|
+
x2("a*", "baaaa", 0, 0);
|
440
|
+
n("a+", "");
|
441
|
+
x2("a+", "a", 0, 1);
|
442
|
+
x2("a+", "aaaa", 0, 4);
|
443
|
+
x2("a+", "aabbb", 0, 2);
|
444
|
+
x2("a+", "baaaa", 1, 5);
|
445
|
+
x2(".?", "", 0, 0);
|
446
|
+
x2(".?", "f", 0, 1);
|
447
|
+
x2(".?", "\n", 0, 0);
|
448
|
+
x2(".*", "", 0, 0);
|
449
|
+
x2(".*", "abcde", 0, 5);
|
450
|
+
x2(".+", "z", 0, 1);
|
451
|
+
x2(".+", "zdswer\n", 0, 6);
|
452
|
+
x2("(.*)a\\1f", "babfbac", 0, 4);
|
453
|
+
x2("(.*)a\\1f", "bacbabf", 3, 7);
|
454
|
+
x2("((.*)a\\2f)", "bacbabf", 3, 7);
|
455
|
+
x2("(.*)a\\1f", "baczzzzzz\nbazz\nzzzzbabf", 19, 23);
|
456
|
+
x2("a|b", "a", 0, 1);
|
457
|
+
x2("a|b", "b", 0, 1);
|
458
|
+
x2("|a", "a", 0, 0);
|
459
|
+
x2("(|a)", "a", 0, 0);
|
460
|
+
x2("ab|bc", "ab", 0, 2);
|
461
|
+
x2("ab|bc", "bc", 0, 2);
|
462
|
+
x2("z(?:ab|bc)", "zbc", 0, 3);
|
463
|
+
x2("a(?:ab|bc)c", "aabc", 0, 4);
|
464
|
+
x2("ab|(?:ac|az)", "az", 0, 2);
|
465
|
+
x2("a|b|c", "dc", 1, 2);
|
466
|
+
x2("a|b|cd|efg|h|ijk|lmn|o|pq|rstuvwx|yz", "pqr", 0, 2);
|
467
|
+
n("a|b|cd|efg|h|ijk|lmn|o|pq|rstuvwx|yz", "mn");
|
468
|
+
x2("a|^z", "ba", 1, 2);
|
469
|
+
x2("a|^z", "za", 0, 1);
|
470
|
+
x2("a|\\Gz", "bza", 2, 3);
|
471
|
+
x2("a|\\Gz", "za", 0, 1);
|
472
|
+
x2("a|\\Az", "bza", 2, 3);
|
473
|
+
x2("a|\\Az", "za", 0, 1);
|
474
|
+
x2("a|b\\Z", "ba", 1, 2);
|
475
|
+
x2("a|b\\Z", "b", 0, 1);
|
476
|
+
x2("a|b\\z", "ba", 1, 2);
|
477
|
+
x2("a|b\\z", "b", 0, 1);
|
478
|
+
x2("\\w|\\s", " ", 0, 1);
|
479
|
+
n("\\w|\\w", " ");
|
480
|
+
x2("\\w|%", "%", 0, 1);
|
481
|
+
x2("\\w|[&$]", "&", 0, 1);
|
482
|
+
x2("[b-d]|[^e-z]", "a", 0, 1);
|
483
|
+
x2("(?:a|[c-f])|bz", "dz", 0, 1);
|
484
|
+
x2("(?:a|[c-f])|bz", "bz", 0, 2);
|
485
|
+
x2("abc|(?=zz)..f", "zzf", 0, 3);
|
486
|
+
x2("abc|(?!zz)..f", "abf", 0, 3);
|
487
|
+
x2("(?=za)..a|(?=zz)..a", "zza", 0, 3);
|
488
|
+
n("(?>a|abd)c", "abdc");
|
489
|
+
x2("(?>abd|a)c", "abdc", 0, 4);
|
490
|
+
x2("a?|b", "a", 0, 1);
|
491
|
+
x2("a?|b", "b", 0, 0);
|
492
|
+
x2("a?|b", "", 0, 0);
|
493
|
+
x2("a*|b", "aa", 0, 2);
|
494
|
+
x2("a*|b*", "ba", 0, 0);
|
495
|
+
x2("a*|b*", "ab", 0, 1);
|
496
|
+
x2("a+|b*", "", 0, 0);
|
497
|
+
x2("a+|b*", "bbb", 0, 3);
|
498
|
+
x2("a+|b*", "abbb", 0, 1);
|
499
|
+
n("a+|b+", "");
|
500
|
+
x2("(a|b)?", "b", 0, 1);
|
501
|
+
x2("(a|b)*", "ba", 0, 2);
|
502
|
+
x2("(a|b)+", "bab", 0, 3);
|
503
|
+
x2("(ab|ca)+", "caabbc", 0, 4);
|
504
|
+
x2("(ab|ca)+", "aabca", 1, 5);
|
505
|
+
x2("(ab|ca)+", "abzca", 0, 2);
|
506
|
+
x2("(a|bab)+", "ababa", 0, 5);
|
507
|
+
x2("(a|bab)+", "ba", 1, 2);
|
508
|
+
x2("(a|bab)+", "baaaba", 1, 4);
|
509
|
+
x2("(?:a|b)(?:a|b)", "ab", 0, 2);
|
510
|
+
x2("(?:a*|b*)(?:a*|b*)", "aaabbb", 0, 3);
|
511
|
+
x2("(?:a*|b*)(?:a+|b+)", "aaabbb", 0, 6);
|
512
|
+
x2("(?:a+|b+){2}", "aaabbb", 0, 6);
|
513
|
+
x2("h{0,}", "hhhh", 0, 4);
|
514
|
+
x2("(?:a+|b+){1,2}", "aaabbb", 0, 6);
|
515
|
+
n("ax{2}*a", "0axxxa1");
|
516
|
+
n("a.{0,2}a", "0aXXXa0");
|
517
|
+
n("a.{0,2}?a", "0aXXXa0");
|
518
|
+
n("a.{0,2}?a", "0aXXXXa0");
|
519
|
+
x2("^a{2,}?a$", "aaa", 0, 3);
|
520
|
+
x2("^[a-z]{2,}?$", "aaa", 0, 3);
|
521
|
+
x2("(?:a+|\\Ab*)cc", "cc", 0, 2);
|
522
|
+
n("(?:a+|\\Ab*)cc", "abcc");
|
523
|
+
x2("(?:^a+|b+)*c", "aabbbabc", 6, 8);
|
524
|
+
x2("(?:^a+|b+)*c", "aabbbbc", 0, 7);
|
525
|
+
x2("a|(?i)c", "C", 0, 1);
|
526
|
+
x2("(?i)c|a", "C", 0, 1);
|
527
|
+
x2("(?i)c|a", "A", 0, 1);
|
528
|
+
x2("(?i:c)|a", "C", 0, 1);
|
529
|
+
n("(?i:c)|a", "A");
|
530
|
+
x2("[abc]?", "abc", 0, 1);
|
531
|
+
x2("[abc]*", "abc", 0, 3);
|
532
|
+
x2("[^abc]*", "abc", 0, 0);
|
533
|
+
n("[^abc]+", "abc");
|
534
|
+
x2("a??", "aaa", 0, 0);
|
535
|
+
x2("ba??b", "bab", 0, 3);
|
536
|
+
x2("a*?", "aaa", 0, 0);
|
537
|
+
x2("ba*?", "baa", 0, 1);
|
538
|
+
x2("ba*?b", "baab", 0, 4);
|
539
|
+
x2("a+?", "aaa", 0, 1);
|
540
|
+
x2("ba+?", "baa", 0, 2);
|
541
|
+
x2("ba+?b", "baab", 0, 4);
|
542
|
+
x2("(?:a?)??", "a", 0, 0);
|
543
|
+
x2("(?:a??)?", "a", 0, 0);
|
544
|
+
x2("(?:a?)+?", "aaa", 0, 1);
|
545
|
+
x2("(?:a+)??", "aaa", 0, 0);
|
546
|
+
x2("(?:a+)??b", "aaab", 0, 4);
|
547
|
+
x2("(?:ab)?{2}", "", 0, 0);
|
548
|
+
x2("(?:ab)?{2}", "ababa", 0, 4);
|
549
|
+
x2("(?:ab)*{0}", "ababa", 0, 0);
|
550
|
+
x2("(?:ab){3,}", "abababab", 0, 8);
|
551
|
+
n("(?:ab){3,}", "abab");
|
552
|
+
x2("(?:ab){2,4}", "ababab", 0, 6);
|
553
|
+
x2("(?:ab){2,4}", "ababababab", 0, 8);
|
554
|
+
x2("(?:ab){2,4}?", "ababababab", 0, 4);
|
555
|
+
x2("(?:ab){,}", "ab{,}", 0, 5);
|
556
|
+
x2("(?:abc)+?{2}", "abcabcabc", 0, 6);
|
557
|
+
x2("(?:X*)(?i:xa)", "XXXa", 0, 4);
|
558
|
+
x2("(d+)([^abc]z)", "dddz", 0, 4);
|
559
|
+
x2("([^abc]*)([^abc]z)", "dddz", 0, 4);
|
560
|
+
x2("(\\w+)(\\wz)", "dddz", 0, 4);
|
561
|
+
x3("(a)", "a", 0, 1, 1);
|
562
|
+
x3("(ab)", "ab", 0, 2, 1);
|
563
|
+
x2("((ab))", "ab", 0, 2);
|
564
|
+
x3("((ab))", "ab", 0, 2, 1);
|
565
|
+
x3("((ab))", "ab", 0, 2, 2);
|
566
|
+
x3("((((((((((((((((((((ab))))))))))))))))))))", "ab", 0, 2, 20);
|
567
|
+
x3("(ab)(cd)", "abcd", 0, 2, 1);
|
568
|
+
x3("(ab)(cd)", "abcd", 2, 4, 2);
|
569
|
+
x3("()(a)bc(def)ghijk", "abcdefghijk", 3, 6, 3);
|
570
|
+
x3("(()(a)bc(def)ghijk)", "abcdefghijk", 3, 6, 4);
|
571
|
+
x2("(^a)", "a", 0, 1);
|
572
|
+
x3("(a)|(a)", "ba", 1, 2, 1);
|
573
|
+
x3("(^a)|(a)", "ba", 1, 2, 2);
|
574
|
+
x3("(a?)", "aaa", 0, 1, 1);
|
575
|
+
x3("(a*)", "aaa", 0, 3, 1);
|
576
|
+
x3("(a*)", "", 0, 0, 1);
|
577
|
+
x3("(a+)", "aaaaaaa", 0, 7, 1);
|
578
|
+
x3("(a+|b*)", "bbbaa", 0, 3, 1);
|
579
|
+
x3("(a+|b?)", "bbbaa", 0, 1, 1);
|
580
|
+
x3("(abc)?", "abc", 0, 3, 1);
|
581
|
+
x3("(abc)*", "abc", 0, 3, 1);
|
582
|
+
x3("(abc)+", "abc", 0, 3, 1);
|
583
|
+
x3("(xyz|abc)+", "abc", 0, 3, 1);
|
584
|
+
x3("([xyz][abc]|abc)+", "abc", 0, 3, 1);
|
585
|
+
x3("((?i:abc))", "AbC", 0, 3, 1);
|
586
|
+
x2("(abc)(?i:\\1)", "abcABC", 0, 6);
|
587
|
+
x3("((?m:a.c))", "a\nc", 0, 3, 1);
|
588
|
+
x3("((?=az)a)", "azb", 0, 1, 1);
|
589
|
+
x3("abc|(.abd)", "zabd", 0, 4, 1);
|
590
|
+
x2("(?:abc)|(ABC)", "abc", 0, 3);
|
591
|
+
x3("(?i:(abc))|(zzz)", "ABC", 0, 3, 1);
|
592
|
+
x3("a*(.)", "aaaaz", 4, 5, 1);
|
593
|
+
x3("a*?(.)", "aaaaz", 0, 1, 1);
|
594
|
+
x3("a*?(c)", "aaaac", 4, 5, 1);
|
595
|
+
x3("[bcd]a*(.)", "caaaaz", 5, 6, 1);
|
596
|
+
x3("(\\Abb)cc", "bbcc", 0, 2, 1);
|
597
|
+
n("(\\Abb)cc", "zbbcc");
|
598
|
+
x3("(^bb)cc", "bbcc", 0, 2, 1);
|
599
|
+
n("(^bb)cc", "zbbcc");
|
600
|
+
x3("cc(bb$)", "ccbb", 2, 4, 1);
|
601
|
+
n("cc(bb$)", "ccbbb");
|
602
|
+
n("(\\1)", "");
|
603
|
+
n("\\1(a)", "aa");
|
604
|
+
n("(a(b)\\1)\\2+", "ababb");
|
605
|
+
n("(?:(?:\\1|z)(a))+$", "zaa");
|
606
|
+
x2("(?:(?:\\1|z)(a))+$", "zaaa", 0, 4);
|
607
|
+
x2("(a)(?=\\1)", "aa", 0, 1);
|
608
|
+
n("(a)$|\\1", "az");
|
609
|
+
x2("(a)\\1", "aa", 0, 2);
|
610
|
+
n("(a)\\1", "ab");
|
611
|
+
x2("(a?)\\1", "aa", 0, 2);
|
612
|
+
x2("(a??)\\1", "aa", 0, 0);
|
613
|
+
x2("(a*)\\1", "aaaaa", 0, 4);
|
614
|
+
x3("(a*)\\1", "aaaaa", 0, 2, 1);
|
615
|
+
x2("a(b*)\\1", "abbbb", 0, 5);
|
616
|
+
x2("a(b*)\\1", "ab", 0, 1);
|
617
|
+
x2("(a*)(b*)\\1\\2", "aaabbaaabb", 0, 10);
|
618
|
+
x2("(a*)(b*)\\2", "aaabbbb", 0, 7);
|
619
|
+
x2("(((((((a*)b))))))c\\7", "aaabcaaa", 0, 8);
|
620
|
+
x3("(((((((a*)b))))))c\\7", "aaabcaaa", 0, 3, 7);
|
621
|
+
x2("(a)(b)(c)\\2\\1\\3", "abcbac", 0, 6);
|
622
|
+
x2("([a-d])\\1", "cc", 0, 2);
|
623
|
+
x2("(\\w\\d\\s)\\1", "f5 f5 ", 0, 6);
|
624
|
+
n("(\\w\\d\\s)\\1", "f5 f5");
|
625
|
+
x2("(who|[a-c]{3})\\1", "whowho", 0, 6);
|
626
|
+
x2("...(who|[a-c]{3})\\1", "abcwhowho", 0, 9);
|
627
|
+
x2("(who|[a-c]{3})\\1", "cbccbc", 0, 6);
|
628
|
+
x2("(^a)\\1", "aa", 0, 2);
|
629
|
+
n("(^a)\\1", "baa");
|
630
|
+
n("(a$)\\1", "aa");
|
631
|
+
n("(ab\\Z)\\1", "ab");
|
632
|
+
x2("(a*\\Z)\\1", "a", 1, 1);
|
633
|
+
x2(".(a*\\Z)\\1", "ba", 1, 2);
|
634
|
+
x3("(.(abc)\\2)", "zabcabc", 0, 7, 1);
|
635
|
+
x3("(.(..\\d.)\\2)", "z12341234", 0, 9, 1);
|
636
|
+
x2("((?i:az))\\1", "AzAz", 0, 4);
|
637
|
+
n("((?i:az))\\1", "Azaz");
|
638
|
+
x2("(?<=a)b", "ab", 1, 2);
|
639
|
+
n("(?<=a)b", "bb");
|
640
|
+
x2("(?<=a|b)b", "bb", 1, 2);
|
641
|
+
x2("(?<=a|bc)b", "bcb", 2, 3);
|
642
|
+
x2("(?<=a|bc)b", "ab", 1, 2);
|
643
|
+
x2("(?<=a|bc||defghij|klmnopq|r)z", "rz", 1, 2);
|
644
|
+
x2("(a)\\g<1>", "aa", 0, 2);
|
645
|
+
x2("(?<!a)b", "cb", 1, 2);
|
646
|
+
n("(?<!a)b", "ab");
|
647
|
+
x2("(?<!a|bc)b", "bbb", 0, 1);
|
648
|
+
n("(?<!a|bc)z", "bcz");
|
649
|
+
x2("(?<name1>a)", "a", 0, 1);
|
650
|
+
x2("(?<name_2>ab)\\g<name_2>", "abab", 0, 4);
|
651
|
+
x2("(?<name_3>.zv.)\\k<name_3>", "azvbazvb", 0, 8);
|
652
|
+
x2("(?<=\\g<ab>)|-\\zEND (?<ab>XyZ)", "XyZ", 3, 3);
|
653
|
+
x2("(?<n>|a\\g<n>)+", "", 0, 0);
|
654
|
+
x2("(?<n>|\\(\\g<n>\\))+$", "()(())", 0, 6);
|
655
|
+
x3("\\g<n>(?<n>.){0}", "X", 0, 1, 1);
|
656
|
+
x2("\\g<n>(abc|df(?<n>.YZ){2,8}){0}", "XYZ", 0, 3);
|
657
|
+
x2("\\A(?<n>(a\\g<n>)|)\\z", "aaaa", 0, 4);
|
658
|
+
x2("(?<n>|\\g<m>\\g<n>)\\z|\\zEND (?<m>a|(b)\\g<m>)", "bbbbabba", 0, 8);
|
659
|
+
x2("(?<name1240>\\w+\\sx)a+\\k<name1240>", " fg xaaaaaaaafg x", 2, 18);
|
660
|
+
x3("(z)()()(?<_9>a)\\g<_9>", "zaa", 2, 3, 1);
|
661
|
+
x2("(.)(((?<_>a)))\\k<_>", "zaa", 0, 3);
|
662
|
+
x2("((?<name1>\\d)|(?<name2>\\w))(\\k<name1>|\\k<name2>)", "ff", 0, 2);
|
663
|
+
x2("(?:(?<x>)|(?<x>efg))\\k<x>", "", 0, 0);
|
664
|
+
x2("(?:(?<x>abc)|(?<x>efg))\\k<x>", "abcefgefg", 3, 9);
|
665
|
+
n("(?:(?<x>abc)|(?<x>efg))\\k<x>", "abcefg");
|
666
|
+
x2("(?:(?<n1>.)|(?<n1>..)|(?<n1>...)|(?<n1>....)|(?<n1>.....)|(?<n1>......)|(?<n1>.......)|(?<n1>........)|(?<n1>.........)|(?<n1>..........)|(?<n1>...........)|(?<n1>............)|(?<n1>.............)|(?<n1>..............))\\k<n1>$", "a-pyumpyum", 2, 10);
|
667
|
+
x3("(?:(?<n1>.)|(?<n1>..)|(?<n1>...)|(?<n1>....)|(?<n1>.....)|(?<n1>......)|(?<n1>.......)|(?<n1>........)|(?<n1>.........)|(?<n1>..........)|(?<n1>...........)|(?<n1>............)|(?<n1>.............)|(?<n1>..............))\\k<n1>$", "xxxxabcdefghijklmnabcdefghijklmn", 4, 18, 14);
|
668
|
+
x3("(?<name1>)(?<name2>)(?<name3>)(?<name4>)(?<name5>)(?<name6>)(?<name7>)(?<name8>)(?<name9>)(?<name10>)(?<name11>)(?<name12>)(?<name13>)(?<name14>)(?<name15>)(?<name16>aaa)(?<name17>)$", "aaa", 0, 3, 16);
|
669
|
+
x2("(?<foo>a|\\(\\g<foo>\\))", "a", 0, 1);
|
670
|
+
x2("(?<foo>a|\\(\\g<foo>\\))", "((((((a))))))", 0, 13);
|
671
|
+
x3("(?<foo>a|\\(\\g<foo>\\))", "((((((((a))))))))", 0, 17, 1);
|
672
|
+
x2("\\g<bar>|\\zEND(?<bar>.*abc$)", "abcxxxabc", 0, 9);
|
673
|
+
x2("\\g<1>|\\zEND(.a.)", "bac", 0, 3);
|
674
|
+
x3("\\g<_A>\\g<_A>|\\zEND(.a.)(?<_A>.b.)", "xbxyby", 3, 6, 1);
|
675
|
+
x2("\\A(?:\\g<pon>|\\g<pan>|\\zEND (?<pan>a|c\\g<pon>c)(?<pon>b|d\\g<pan>d))$", "cdcbcdc", 0, 7);
|
676
|
+
x2("\\A(?<n>|a\\g<m>)\\z|\\zEND (?<m>\\g<n>)", "aaaa", 0, 4);
|
677
|
+
x2("(?<n>(a|b\\g<n>c){3,5})", "baaaaca", 1, 5);
|
678
|
+
x2("(?<n>(a|b\\g<n>c){3,5})", "baaaacaaaaa", 0, 10);
|
679
|
+
x2("(?<pare>\\(([^\\(\\)]++|\\g<pare>)*+\\))", "((a))", 0, 5);
|
680
|
+
x2("()*\\1", "", 0, 0);
|
681
|
+
x2("(?:()|())*\\1\\2", "", 0, 0);
|
682
|
+
x3("(?:\\1a|())*", "a", 0, 0, 1);
|
683
|
+
x2("x((.)*)*x", "0x1x2x3", 1, 6);
|
684
|
+
x2("x((.)*)*x(?i:\\1)\\Z", "0x1x2x1X2", 1, 9);
|
685
|
+
x2("(?:()|()|()|()|()|())*\\2\\5", "", 0, 0);
|
686
|
+
x2("(?:()|()|()|(x)|()|())*\\2b\\5", "b", 0, 1);
|
687
|
+
if onig_encoding == onig.ONIG_ENCODING_UTF16_LE:
|
688
|
+
x2("\\xFA\\x8F", "\u8ffa", 0, 1);
|
689
|
+
elif onig_encoding == onig.ONIG_ENCODING_UTF16_BE:
|
690
|
+
x2("\\x8F\\xFA", "\u8ffa", 0, 1);
|
691
|
+
elif onig_encoding == onig.ONIG_ENCODING_UTF32_LE:
|
692
|
+
x2("\\xFA\\x8F\\x00\\x00", "\u8ffa", 0, 1);
|
693
|
+
elif onig_encoding == onig.ONIG_ENCODING_UTF32_BE:
|
694
|
+
x2("\\x00\\x00\\x8F\\xFA", "\u8ffa", 0, 1);
|
695
|
+
elif onig_encoding == onig.ONIG_ENCODING_UTF8:
|
696
|
+
x2("\\xE8\\xBF\\xBA", "\u8ffa", 0, 1);
|
697
|
+
elif onig_encoding == onig.ONIG_ENCODING_SJIS:
|
698
|
+
x2("\\xE7\\x92", "\u8ffa", 0, 1);
|
699
|
+
elif onig_encoding == onig.ONIG_ENCODING_EUC_JP:
|
700
|
+
x2("\\xED\\xF2", "\u8ffa", 0, 1); # "迺"
|
701
|
+
x2("", "あ", 0, 0);
|
702
|
+
x2("あ", "あ", 0, 1);
|
703
|
+
n("い", "あ");
|
704
|
+
x2("うう", "うう", 0, 2);
|
705
|
+
x2("あいう", "あいう", 0, 3);
|
706
|
+
x2("こここここここここここここここここここここここここここここここここここ", "こここここここここここここここここここここここここここここここここここ", 0, 35);
|
707
|
+
x2("あ", "いあ", 1, 2);
|
708
|
+
x2("いう", "あいう", 1, 3);
|
709
|
+
# x2(b"\\xca\\xb8", b"\xca\xb8", 0, 2); # "文"
|
710
|
+
x2(".", "あ", 0, 1);
|
711
|
+
x2("..", "かき", 0, 2);
|
712
|
+
x2("\\w", "お", 0, 1);
|
713
|
+
n("\\W", "あ");
|
714
|
+
x2("[\\W]", "う$", 1, 2);
|
715
|
+
x2("\\S", "そ", 0, 1);
|
716
|
+
x2("\\S", "漢", 0, 1);
|
717
|
+
x2("\\b", "気 ", 0, 0);
|
718
|
+
x2("\\b", " ほ", 1, 1);
|
719
|
+
x2("\\B", "せそ ", 1, 1);
|
720
|
+
x2("\\B", "う ", 2, 2);
|
721
|
+
x2("\\B", " い", 0, 0);
|
722
|
+
x2("[たち]", "ち", 0, 1);
|
723
|
+
n("[なに]", "ぬ");
|
724
|
+
x2("[う-お]", "え", 0, 1);
|
725
|
+
n("[^け]", "け");
|
726
|
+
x2("[\\w]", "ね", 0, 1);
|
727
|
+
n("[\\d]", "ふ");
|
728
|
+
x2("[\\D]", "は", 0, 1);
|
729
|
+
n("[\\s]", "く");
|
730
|
+
x2("[\\S]", "へ", 0, 1);
|
731
|
+
x2("[\\w\\d]", "よ", 0, 1);
|
732
|
+
x2("[\\w\\d]", " よ", 3, 4);
|
733
|
+
n("\\w鬼車", " 鬼車");
|
734
|
+
x2("鬼\\W車", "鬼 車", 0, 3);
|
735
|
+
x2("あ.い.う", "ああいいう", 0, 5);
|
736
|
+
x2(".\\wう\\W..ぞ", "えうう うぞぞ", 0, 7);
|
737
|
+
x2("\\s\\wこここ", " ここここ", 0, 5);
|
738
|
+
x2("ああ.け", "ああけけ", 0, 4);
|
739
|
+
n(".い", "いえ");
|
740
|
+
x2(".お", "おお", 0, 2);
|
741
|
+
x2("^あ", "あ", 0, 1);
|
742
|
+
x2("^む$", "む", 0, 1);
|
743
|
+
x2("^\\w$", "に", 0, 1);
|
744
|
+
x2("^\\wかきくけこ$", "zかきくけこ", 0, 6);
|
745
|
+
x2("^\\w...うえお$", "zあいううえお", 0, 7);
|
746
|
+
x2("\\w\\w\\s\\Wおおお\\d", "aお おおお4", 0, 8);
|
747
|
+
x2("\\Aたちつ", "たちつ", 0, 3);
|
748
|
+
x2("むめも\\Z", "むめも", 0, 3);
|
749
|
+
x2("かきく\\z", "かきく", 0, 3);
|
750
|
+
x2("かきく\\Z", "かきく\n", 0, 3);
|
751
|
+
x2("\\Gぽぴ", "ぽぴ", 0, 2);
|
752
|
+
n("\\Gえ", "うえお");
|
753
|
+
n("とて\\G", "とて");
|
754
|
+
n("まみ\\A", "まみ");
|
755
|
+
n("ま\\Aみ", "まみ");
|
756
|
+
x2("(?=せ)せ", "せ", 0, 1);
|
757
|
+
n("(?=う).", "い");
|
758
|
+
x2("(?!う)か", "か", 0, 1);
|
759
|
+
n("(?!と)あ", "と");
|
760
|
+
x2("(?i:あ)", "あ", 0, 1);
|
761
|
+
x2("(?i:ぶべ)", "ぶべ", 0, 2);
|
762
|
+
n("(?i:い)", "う");
|
763
|
+
x2("(?m:よ.)", "よ\n", 0, 2);
|
764
|
+
x2("(?m:.め)", "ま\nめ", 1, 3);
|
765
|
+
x2("あ?", "", 0, 0);
|
766
|
+
x2("変?", "化", 0, 0);
|
767
|
+
x2("変?", "変", 0, 1);
|
768
|
+
x2("量*", "", 0, 0);
|
769
|
+
x2("量*", "量", 0, 1);
|
770
|
+
x2("子*", "子子子", 0, 3);
|
771
|
+
x2("馬*", "鹿馬馬馬馬", 0, 0);
|
772
|
+
n("山+", "");
|
773
|
+
x2("河+", "河", 0, 1);
|
774
|
+
x2("時+", "時時時時", 0, 4);
|
775
|
+
x2("え+", "ええううう", 0, 2);
|
776
|
+
x2("う+", "おうううう", 1, 5);
|
777
|
+
x2(".?", "た", 0, 1);
|
778
|
+
x2(".*", "ぱぴぷぺ", 0, 4);
|
779
|
+
x2(".+", "ろ", 0, 1);
|
780
|
+
x2(".+", "いうえか\n", 0, 4);
|
781
|
+
x2("あ|い", "あ", 0, 1);
|
782
|
+
x2("あ|い", "い", 0, 1);
|
783
|
+
x2("あい|いう", "あい", 0, 2);
|
784
|
+
x2("あい|いう", "いう", 0, 2);
|
785
|
+
x2("を(?:かき|きく)", "をかき", 0, 3);
|
786
|
+
x2("を(?:かき|きく)け", "をきくけ", 0, 4);
|
787
|
+
x2("あい|(?:あう|あを)", "あを", 0, 2);
|
788
|
+
x2("あ|い|う", "えう", 1, 2);
|
789
|
+
x2("あ|い|うえ|おかき|く|けこさ|しすせ|そ|たち|つてとなに|ぬね", "しすせ", 0, 3);
|
790
|
+
n("あ|い|うえ|おかき|く|けこさ|しすせ|そ|たち|つてとなに|ぬね", "すせ");
|
791
|
+
x2("あ|^わ", "ぶあ", 1, 2);
|
792
|
+
x2("あ|^を", "をあ", 0, 1);
|
793
|
+
x2("鬼|\\G車", "け車鬼", 2, 3);
|
794
|
+
x2("鬼|\\G車", "車鬼", 0, 1);
|
795
|
+
x2("鬼|\\A車", "b車鬼", 2, 3);
|
796
|
+
x2("鬼|\\A車", "車", 0, 1);
|
797
|
+
x2("鬼|車\\Z", "車鬼", 1, 2);
|
798
|
+
x2("鬼|車\\Z", "車", 0, 1);
|
799
|
+
x2("鬼|車\\Z", "車\n", 0, 1);
|
800
|
+
x2("鬼|車\\z", "車鬼", 1, 2);
|
801
|
+
x2("鬼|車\\z", "車", 0, 1);
|
802
|
+
x2("\\w|\\s", "お", 0, 1);
|
803
|
+
x2("\\w|%", "%お", 0, 1);
|
804
|
+
x2("\\w|[&$]", "う&", 0, 1);
|
805
|
+
x2("[い-け]", "う", 0, 1);
|
806
|
+
x2("[い-け]|[^か-こ]", "あ", 0, 1);
|
807
|
+
x2("[い-け]|[^か-こ]", "か", 0, 1);
|
808
|
+
x2("[^あ]", "\n", 0, 1);
|
809
|
+
x2("(?:あ|[う-き])|いを", "うを", 0, 1);
|
810
|
+
x2("(?:あ|[う-き])|いを", "いを", 0, 2);
|
811
|
+
x2("あいう|(?=けけ)..ほ", "けけほ", 0, 3);
|
812
|
+
x2("あいう|(?!けけ)..ほ", "あいほ", 0, 3);
|
813
|
+
x2("(?=をあ)..あ|(?=をを)..あ", "ををあ", 0, 3);
|
814
|
+
x2("(?<=あ|いう)い", "いうい", 2, 3);
|
815
|
+
n("(?>あ|あいえ)う", "あいえう");
|
816
|
+
x2("(?>あいえ|あ)う", "あいえう", 0, 4);
|
817
|
+
x2("あ?|い", "あ", 0, 1);
|
818
|
+
x2("あ?|い", "い", 0, 0);
|
819
|
+
x2("あ?|い", "", 0, 0);
|
820
|
+
x2("あ*|い", "ああ", 0, 2);
|
821
|
+
x2("あ*|い*", "いあ", 0, 0);
|
822
|
+
x2("あ*|い*", "あい", 0, 1);
|
823
|
+
x2("[aあ]*|い*", "aあいいい", 0, 2);
|
824
|
+
x2("あ+|い*", "", 0, 0);
|
825
|
+
x2("あ+|い*", "いいい", 0, 3);
|
826
|
+
x2("あ+|い*", "あいいい", 0, 1);
|
827
|
+
x2("あ+|い*", "aあいいい", 0, 0);
|
828
|
+
n("あ+|い+", "");
|
829
|
+
x2("(あ|い)?", "い", 0, 1);
|
830
|
+
x2("(あ|い)*", "いあ", 0, 2);
|
831
|
+
x2("(あ|い)+", "いあい", 0, 3);
|
832
|
+
x2("(あい|うあ)+", "うああいうえ", 0, 4);
|
833
|
+
x2("(あい|うえ)+", "うああいうえ", 2, 6);
|
834
|
+
x2("(あい|うあ)+", "ああいうあ", 1, 5);
|
835
|
+
x2("(あい|うあ)+", "あいをうあ", 0, 2);
|
836
|
+
x2("(あい|うあ)+", "$$zzzzあいをうあ", 6, 8);
|
837
|
+
x2("(あ|いあい)+", "あいあいあ", 0, 5);
|
838
|
+
x2("(あ|いあい)+", "いあ", 1, 2);
|
839
|
+
x2("(あ|いあい)+", "いあああいあ", 1, 4);
|
840
|
+
x2("(?:あ|い)(?:あ|い)", "あい", 0, 2);
|
841
|
+
x2("(?:あ*|い*)(?:あ*|い*)", "あああいいい", 0, 3);
|
842
|
+
x2("(?:あ*|い*)(?:あ+|い+)", "あああいいい", 0, 6);
|
843
|
+
x2("(?:あ+|い+){2}", "あああいいい", 0, 6);
|
844
|
+
x2("(?:あ+|い+){1,2}", "あああいいい", 0, 6);
|
845
|
+
x2("(?:あ+|\\Aい*)うう", "うう", 0, 2);
|
846
|
+
n("(?:あ+|\\Aい*)うう", "あいうう");
|
847
|
+
x2("(?:^あ+|い+)*う", "ああいいいあいう", 6, 8);
|
848
|
+
x2("(?:^あ+|い+)*う", "ああいいいいう", 0, 7);
|
849
|
+
x2("う{0,}", "うううう", 0, 4);
|
850
|
+
x2("あ|(?i)c", "C", 0, 1);
|
851
|
+
x2("(?i)c|あ", "C", 0, 1);
|
852
|
+
x2("(?i:あ)|a", "a", 0, 1);
|
853
|
+
n("(?i:あ)|a", "A");
|
854
|
+
x2("[あいう]?", "あいう", 0, 1);
|
855
|
+
x2("[あいう]*", "あいう", 0, 3);
|
856
|
+
x2("[^あいう]*", "あいう", 0, 0);
|
857
|
+
n("[^あいう]+", "あいう");
|
858
|
+
x2("あ??", "あああ", 0, 0);
|
859
|
+
x2("いあ??い", "いあい", 0, 3);
|
860
|
+
x2("あ*?", "あああ", 0, 0);
|
861
|
+
x2("いあ*?", "いああ", 0, 1);
|
862
|
+
x2("いあ*?い", "いああい", 0, 4);
|
863
|
+
x2("あ+?", "あああ", 0, 1);
|
864
|
+
x2("いあ+?", "いああ", 0, 2);
|
865
|
+
x2("いあ+?い", "いああい", 0, 4);
|
866
|
+
x2("(?:天?)??", "天", 0, 0);
|
867
|
+
x2("(?:天??)?", "天", 0, 0);
|
868
|
+
x2("(?:夢?)+?", "夢夢夢", 0, 1);
|
869
|
+
x2("(?:風+)??", "風風風", 0, 0);
|
870
|
+
x2("(?:雪+)??霜", "雪雪雪霜", 0, 4);
|
871
|
+
x2("(?:あい)?{2}", "", 0, 0);
|
872
|
+
x2("(?:鬼車)?{2}", "鬼車鬼車鬼", 0, 4);
|
873
|
+
x2("(?:鬼車)*{0}", "鬼車鬼車鬼", 0, 0);
|
874
|
+
x2("(?:鬼車){3,}", "鬼車鬼車鬼車鬼車", 0, 8);
|
875
|
+
n("(?:鬼車){3,}", "鬼車鬼車");
|
876
|
+
x2("(?:鬼車){2,4}", "鬼車鬼車鬼車", 0, 6);
|
877
|
+
x2("(?:鬼車){2,4}", "鬼車鬼車鬼車鬼車鬼車", 0, 8);
|
878
|
+
x2("(?:鬼車){2,4}?", "鬼車鬼車鬼車鬼車鬼車", 0, 4);
|
879
|
+
x2("(?:鬼車){,}", "鬼車{,}", 0, 5);
|
880
|
+
x2("(?:かきく)+?{2}", "かきくかきくかきく", 0, 6);
|
881
|
+
x3("(火)", "火", 0, 1, 1);
|
882
|
+
x3("(火水)", "火水", 0, 2, 1);
|
883
|
+
x2("((時間))", "時間", 0, 2);
|
884
|
+
x3("((風水))", "風水", 0, 2, 1);
|
885
|
+
x3("((昨日))", "昨日", 0, 2, 2);
|
886
|
+
x3("((((((((((((((((((((量子))))))))))))))))))))", "量子", 0, 2, 20);
|
887
|
+
x3("(あい)(うえ)", "あいうえ", 0, 2, 1);
|
888
|
+
x3("(あい)(うえ)", "あいうえ", 2, 4, 2);
|
889
|
+
x3("()(あ)いう(えおか)きくけこ", "あいうえおかきくけこ", 3, 6, 3);
|
890
|
+
x3("(()(あ)いう(えおか)きくけこ)", "あいうえおかきくけこ", 3, 6, 4);
|
891
|
+
x3(".*(フォ)ン・マ(ン()シュタ)イン", "フォン・マンシュタイン", 5, 9, 2);
|
892
|
+
x2("(^あ)", "あ", 0, 1);
|
893
|
+
x3("(あ)|(あ)", "いあ", 1, 2, 1);
|
894
|
+
x3("(^あ)|(あ)", "いあ", 1, 2, 2);
|
895
|
+
x3("(あ?)", "あああ", 0, 1, 1);
|
896
|
+
x3("(ま*)", "ままま", 0, 3, 1);
|
897
|
+
x3("(と*)", "", 0, 0, 1);
|
898
|
+
x3("(る+)", "るるるるるるる", 0, 7, 1);
|
899
|
+
x3("(ふ+|へ*)", "ふふふへへ", 0, 3, 1);
|
900
|
+
x3("(あ+|い?)", "いいいああ", 0, 1, 1);
|
901
|
+
x3("(あいう)?", "あいう", 0, 3, 1);
|
902
|
+
x3("(あいう)*", "あいう", 0, 3, 1);
|
903
|
+
x3("(あいう)+", "あいう", 0, 3, 1);
|
904
|
+
x3("(さしす|あいう)+", "あいう", 0, 3, 1);
|
905
|
+
x3("([なにぬ][かきく]|かきく)+", "かきく", 0, 3, 1);
|
906
|
+
x3("((?i:あいう))", "あいう", 0, 3, 1);
|
907
|
+
x3("((?m:あ.う))", "あ\nう", 0, 3, 1);
|
908
|
+
x3("((?=あん)あ)", "あんい", 0, 1, 1);
|
909
|
+
x3("あいう|(.あいえ)", "んあいえ", 0, 4, 1);
|
910
|
+
x3("あ*(.)", "ああああん", 4, 5, 1);
|
911
|
+
x3("あ*?(.)", "ああああん", 0, 1, 1);
|
912
|
+
x3("あ*?(ん)", "ああああん", 4, 5, 1);
|
913
|
+
x3("[いうえ]あ*(.)", "えああああん", 5, 6, 1);
|
914
|
+
x3("(\\Aいい)うう", "いいうう", 0, 2, 1);
|
915
|
+
n("(\\Aいい)うう", "んいいうう");
|
916
|
+
x3("(^いい)うう", "いいうう", 0, 2, 1);
|
917
|
+
n("(^いい)うう", "んいいうう");
|
918
|
+
x3("ろろ(るる$)", "ろろるる", 2, 4, 1);
|
919
|
+
n("ろろ(るる$)", "ろろるるる");
|
920
|
+
x2("(無)\\1", "無無", 0, 2);
|
921
|
+
n("(無)\\1", "無武");
|
922
|
+
x2("(空?)\\1", "空空", 0, 2);
|
923
|
+
x2("(空??)\\1", "空空", 0, 0);
|
924
|
+
x2("(空*)\\1", "空空空空空", 0, 4);
|
925
|
+
x3("(空*)\\1", "空空空空空", 0, 2, 1);
|
926
|
+
x2("あ(い*)\\1", "あいいいい", 0, 5);
|
927
|
+
x2("あ(い*)\\1", "あい", 0, 1);
|
928
|
+
x2("(あ*)(い*)\\1\\2", "あああいいあああいい", 0, 10);
|
929
|
+
x2("(あ*)(い*)\\2", "あああいいいい", 0, 7);
|
930
|
+
x3("(あ*)(い*)\\2", "あああいいいい", 3, 5, 2);
|
931
|
+
x2("(((((((ぽ*)ぺ))))))ぴ\\7", "ぽぽぽぺぴぽぽぽ", 0, 8);
|
932
|
+
x3("(((((((ぽ*)ぺ))))))ぴ\\7", "ぽぽぽぺぴぽぽぽ", 0, 3, 7);
|
933
|
+
x2("(は)(ひ)(ふ)\\2\\1\\3", "はひふひはふ", 0, 6);
|
934
|
+
x2("([き-け])\\1", "くく", 0, 2);
|
935
|
+
x2("(\\w\\d\\s)\\1", "あ5 あ5 ", 0, 6);
|
936
|
+
n("(\\w\\d\\s)\\1", "あ5 あ5");
|
937
|
+
x2("(誰?|[あ-う]{3})\\1", "誰?誰?", 0, 4);
|
938
|
+
x2("...(誰?|[あ-う]{3})\\1", "あaあ誰?誰?", 0, 7);
|
939
|
+
x2("(誰?|[あ-う]{3})\\1", "ういうういう", 0, 6);
|
940
|
+
x2("(^こ)\\1", "ここ", 0, 2);
|
941
|
+
n("(^む)\\1", "めむむ");
|
942
|
+
n("(あ$)\\1", "ああ");
|
943
|
+
n("(あい\\Z)\\1", "あい");
|
944
|
+
x2("(あ*\\Z)\\1", "あ", 1, 1);
|
945
|
+
x2(".(あ*\\Z)\\1", "いあ", 1, 2);
|
946
|
+
x3("(.(やいゆ)\\2)", "zやいゆやいゆ", 0, 7, 1);
|
947
|
+
x3("(.(..\\d.)\\2)", "あ12341234", 0, 9, 1);
|
948
|
+
x2("((?i:あvず))\\1", "あvずあvず", 0, 6);
|
949
|
+
x2("(?<愚か>変|\\(\\g<愚か>\\))", "((((((変))))))", 0, 13);
|
950
|
+
x2("\\A(?:\\g<阿_1>|\\g<云_2>|\\z終了 (?<阿_1>観|自\\g<云_2>自)(?<云_2>在|菩薩\\g<阿_1>菩薩))$", "菩薩自菩薩自在自菩薩自菩薩", 0, 13);
|
951
|
+
x2("[[ひふ]]", "ふ", 0, 1);
|
952
|
+
x2("[[いおう]か]", "か", 0, 1);
|
953
|
+
n("[[^あ]]", "あ");
|
954
|
+
n("[^[あ]]", "あ");
|
955
|
+
x2("[^[^あ]]", "あ", 0, 1);
|
956
|
+
x2("[[かきく]&&きく]", "く", 0, 1);
|
957
|
+
n("[[かきく]&&きく]", "か");
|
958
|
+
n("[[かきく]&&きく]", "け");
|
959
|
+
x2("[あ-ん&&い-を&&う-ゑ]", "ゑ", 0, 1);
|
960
|
+
n("[^あ-ん&&い-を&&う-ゑ]", "ゑ");
|
961
|
+
x2("[[^あ&&あ]&&あ-ん]", "い", 0, 1);
|
962
|
+
n("[[^あ&&あ]&&あ-ん]", "あ");
|
963
|
+
x2("[[^あ-ん&&いうえお]&&[^う-か]]", "き", 0, 1);
|
964
|
+
n("[[^あ-ん&&いうえお]&&[^う-か]]", "い");
|
965
|
+
x2("[^[^あいう]&&[^うえお]]", "う", 0, 1);
|
966
|
+
x2("[^[^あいう]&&[^うえお]]", "え", 0, 1);
|
967
|
+
n("[^[^あいう]&&[^うえお]]", "か");
|
968
|
+
x2("[あ-&&-あ]", "-", 0, 1);
|
969
|
+
x2("[^[^a-zあいう]&&[^bcdefgうえお]q-w]", "え", 0, 1);
|
970
|
+
x2("[^[^a-zあいう]&&[^bcdefgうえお]g-w]", "f", 0, 1);
|
971
|
+
x2("[^[^a-zあいう]&&[^bcdefgうえお]g-w]", "g", 0, 1);
|
972
|
+
n("[^[^a-zあいう]&&[^bcdefgうえお]g-w]", "2");
|
973
|
+
x2("a<b>バージョンのダウンロード<\\/b>", "a<b>バージョンのダウンロード</b>", 0, 20);
|
974
|
+
x2(".<b>バージョンのダウンロード<\\/b>", "a<b>バージョンのダウンロード</b>", 0, 20);
|
975
|
+
|
976
|
+
|
977
|
+
# additional test patterns
|
978
|
+
if is_unicode_encoding(onig_encoding):
|
979
|
+
x2("\\x{3042}\\x{3044}", "あい", 0, 2)
|
980
|
+
elif onig_encoding == onig.ONIG_ENCODING_SJIS:
|
981
|
+
x2("\\x{82a0}\\x{82A2}", "あい", 0, 2)
|
982
|
+
elif onig_encoding == onig.ONIG_ENCODING_EUC_JP:
|
983
|
+
x2("\\x{a4a2}\\x{A4A4}", "あい", 0, 2)
|
984
|
+
x2("\\p{Hiragana}\\p{Katakana}", "あイ", 0, 2)
|
985
|
+
x2("(?m)^A.B$", "X\nA\nB\nZ", 2, 5)
|
986
|
+
n("(?<!(?<=a)b|c)d", "abd")
|
987
|
+
n("(?<!(?<=a)b|c)d", "cd")
|
988
|
+
x2("(?<!(?<=a)b|c)d", "bd", 1, 2)
|
989
|
+
x2("(a){2}z", "aaz", 0, 3)
|
990
|
+
x2("(?<=a).*b", "aab", 1, 3)
|
991
|
+
x2("(?!a).*b", "ab", 1, 2)
|
992
|
+
x2("(?<=(?<!A)B)C", "BBC", 2, 3)
|
993
|
+
n("(?<=(?<!A)B)C", "ABC")
|
994
|
+
n("(?i)(?<!aa|b)c", "Aac")
|
995
|
+
n("(?i)(?<!b|aa)c", "Aac")
|
996
|
+
x2("(?<=\\babc)d", " abcd", 4, 5)
|
997
|
+
x2("(?<=\\Babc)d", "aabcd", 4, 5)
|
998
|
+
x2("a\\b?a", "aa", 0, 2)
|
999
|
+
x2("[^x]*x", "aaax", 0, 4)
|
1000
|
+
x2("(?i)[\\x{0}-B]+", "\x00\x01\x02\x1f\x20@AaBbC", 0, 10)
|
1001
|
+
x2("(?i)a{2}", "AA", 0, 2)
|
1002
|
+
if is_unicode_encoding(onig_encoding):
|
1003
|
+
# The longest script name
|
1004
|
+
x2("\\p{Other_Default_Ignorable_Code_Point}+", "\u034F\uFFF8\U000E0FFF", 0, 3)
|
1005
|
+
# The longest block name
|
1006
|
+
x2("\\p{In_Unified_Canadian_Aboriginal_Syllabics_Extended}+", "\u18B0\u18FF", 0, 2)
|
1007
|
+
# Unicode case fold
|
1008
|
+
x2("(?i)\u1ffc", "\u2126\u1fbe", 0, 2)
|
1009
|
+
x2("(?i)\u1ffc", "\u1ff3", 0, 1)
|
1010
|
+
x2("(?i)\u0390", "\u03b9\u0308\u0301", 0, 3)
|
1011
|
+
x2("(?i)\u03b9\u0308\u0301", "\u0390", 0, 1)
|
1012
|
+
x2("(?i)ff", "\ufb00", 0, 1)
|
1013
|
+
x2("(?i)\ufb01", "fi", 0, 2)
|
1014
|
+
x2("(?i)\u0149\u0149", "\u0149\u0149", 0, 2)
|
1015
|
+
# Other Unicode tests
|
1016
|
+
x2("\\x{25771}", "\U00025771", 0, 1)
|
1017
|
+
x2("[0-9-a]+", " 0123456789-a ", 1, 13) # same as [0-9\-a]
|
1018
|
+
x2("[0-9-\\s]+", " 0123456789-a ", 0, 12) # same as [0-9\-\s]
|
1019
|
+
x2("(?i:a) B", "a B", 0, 3);
|
1020
|
+
x2("(?i:a )B", "a B", 0, 3);
|
1021
|
+
x2("B (?i:a)", "B a", 0, 3);
|
1022
|
+
x2("B(?i: a)", "B a", 0, 3);
|
1023
|
+
if is_unicode_encoding(onig_encoding):
|
1024
|
+
x2("(?a)[\\p{Space}\\d]", "\u00a0", 0, 1)
|
1025
|
+
x2("(?a)[\\d\\p{Space}]", "\u00a0", 0, 1)
|
1026
|
+
n("(?a)[^\\p{Space}\\d]", "\u00a0")
|
1027
|
+
n("(?a)[^\\d\\p{Space}]", "\u00a0")
|
1028
|
+
x2("(?d)[[:space:]\\d]", "\u00a0", 0, 1)
|
1029
|
+
n("(?d)[^\\d[:space:]]", "\u00a0")
|
1030
|
+
n("x.*?\\Z$", "x\ny")
|
1031
|
+
n("x.*?\\Z$", "x\r\ny")
|
1032
|
+
x2("x.*?\\Z$", "x\n", 0, 1)
|
1033
|
+
x2("x.*?\\Z$", "x\r\n", 0, 2) # \Z will match between \r and \n, if
|
1034
|
+
# ONIG_OPTION_NEWLINE_CRLF isn't specified.
|
1035
|
+
x2("(?<=fo).*", "foo", 2, 3)
|
1036
|
+
x2("(?m)(?<=fo).*", "foo", 2, 3)
|
1037
|
+
x2("(?m)(?<=fo).+", "foo", 2, 3)
|
1038
|
+
x2("\\n?\\z", "hello", 5, 5)
|
1039
|
+
x2("\\z", "hello", 5, 5)
|
1040
|
+
x2("\\n?\\z", "こんにちは", 5, 5)
|
1041
|
+
x2("\\z", "こんにちは", 5, 5)
|
1042
|
+
x2("()" * 32767, "", 0, 0) # Issue #24
|
1043
|
+
x2("\\h+ \\H+", " 0123456789aBcDeF gh", 1, 20)
|
1044
|
+
x2("[\\h]+ [\\H]+", " 0123456789aBcDeF gh", 1, 20)
|
1045
|
+
x2("\\A(|.|(?:(.)\\g<1>\\k<2+0>))\\z", "reer", 0, 4)
|
1046
|
+
x2("\\A(?<a>|.|(?:(?<b>.)\\g<a>\\k<b+0>))\\z", "reer", 0, 4)
|
1047
|
+
x2(''' # Extended pattern
|
1048
|
+
(?<element> \g<stag> \g<content>* \g<etag> ){0}
|
1049
|
+
(?<stag> < \g<name> \s* > ){0}
|
1050
|
+
(?<name> [a-zA-Z_:]+ ){0}
|
1051
|
+
(?<content> [^<&]+ (\g<element> | [^<&]+)* ){0}
|
1052
|
+
(?<etag> </ \k<name+1> >){0}
|
1053
|
+
\g<element>''',
|
1054
|
+
"<foo>f<bar>bbb</bar>f</foo>", 0, 27, opt=onig.ONIG_OPTION_EXTEND)
|
1055
|
+
x2("\\p{Print}+", "\n a", 1, 3)
|
1056
|
+
x2("\\p{Graph}+", "\n a", 2, 3)
|
1057
|
+
n("a(?!b)", "ab");
|
1058
|
+
x2("(?:(.)\\1)*", "a" * 300, 0, 300)
|
1059
|
+
x2("\\cA\\C-B\\a[\\b]\\t\\n\\v\\f\\r\\e\\c?", "\x01\x02\x07\x08\x09\x0a\x0b\x0c\x0d\x1b\x7f", 0, 11)
|
1060
|
+
x2("(?<=(?:[a-z]|\\w){3})x", "ab1x", 3, 4) # repeat inside look-behind
|
1061
|
+
x2("(?<n>(a|b\\g<n>c){3,5}?)", "baaaaca", 1, 4)
|
1062
|
+
x2("\\p{WoRd}", "a", 0, 1) # property name is not case sensitive
|
1063
|
+
n("[[:WoRd:]]", "a", err=onig.ONIGERR_INVALID_POSIX_BRACKET_TYPE) # POSIX bracket name is case sensitive
|
1064
|
+
|
1065
|
+
# ONIG_OPTION_FIND_LONGEST option
|
1066
|
+
x2("foo|foobar", "foobar", 0, 3)
|
1067
|
+
x2("foo|foobar", "foobar", 0, 6, opt=onig.ONIG_OPTION_FIND_LONGEST)
|
1068
|
+
|
1069
|
+
# character classes (tests for character class optimization)
|
1070
|
+
x2("[@][a]", "@a", 0, 2);
|
1071
|
+
x2(".*[a][b][c][d][e]", "abcde", 0, 5);
|
1072
|
+
x2("(?i)[A\\x{41}]", "a", 0, 1);
|
1073
|
+
x2("[abA]", "a", 0, 1);
|
1074
|
+
x2("[[ab]&&[ac]]+", "aaa", 0, 3);
|
1075
|
+
x2("[[あい]&&[あう]]+", "あああ", 0, 3);
|
1076
|
+
|
1077
|
+
# possessive quantifiers
|
1078
|
+
n("a?+a", "a")
|
1079
|
+
n("a*+a", "aaaa")
|
1080
|
+
n("a++a", "aaaa")
|
1081
|
+
x2("a{2,3}+a", "aaa", 0, 3) # Not a possessive quantifier in Ruby,
|
1082
|
+
# same as "(?:a{2,3})+a"
|
1083
|
+
n("a{2,3}+a", "aaa", syn=onig.ONIG_SYNTAX_PERL)
|
1084
|
+
|
1085
|
+
# automatic possessification
|
1086
|
+
x2("\\w+\\W", "abc#", 0, 4)
|
1087
|
+
x2("[a-c]+\\W", "abc#", 0, 4)
|
1088
|
+
x2("[a-c#]+\\W", "abc#", 0, 4)
|
1089
|
+
x2("[^a-c]+\\W", "def#", 0, 4)
|
1090
|
+
x2("(?a)[^a-c]+\\W", "def#", 0, 4)
|
1091
|
+
x2("a+\\w", "aaaa", 0, 4)
|
1092
|
+
x2("#+\\w", "###a", 0, 4)
|
1093
|
+
x2("(?a)a+\\w", "aaaa", 0, 4)
|
1094
|
+
x2("(?a)あ+\\w", "あああa", 0, 4)
|
1095
|
+
x2("[a-c]+[d-f]", "abcd", 0, 4)
|
1096
|
+
x2("[^d-f]+[d-f]", "abcd", 0, 4)
|
1097
|
+
x2("[a-cあ]+[d-f]", "abcd", 0, 4)
|
1098
|
+
|
1099
|
+
# linebreak
|
1100
|
+
x2("\\R", "\n", 0, 1)
|
1101
|
+
x2("\\R", "\r", 0, 1)
|
1102
|
+
x2("\\R{3}", "\r\r\n\n", 0, 4)
|
1103
|
+
|
1104
|
+
if (is_unicode_encoding(onig_encoding)):
|
1105
|
+
x2("\\R", "\u0085", 0, 1)
|
1106
|
+
x2("\\R", "\u2028", 0, 1)
|
1107
|
+
x2("\\R", "\u2029", 0, 1)
|
1108
|
+
|
1109
|
+
# extended grapheme cluster
|
1110
|
+
x2("\\X{5}", "あいab\n", 0, 5)
|
1111
|
+
if is_unicode_encoding(onig_encoding):
|
1112
|
+
x2("\\X", "\u306F\u309A\n", 0, 2)
|
1113
|
+
|
1114
|
+
# keep
|
1115
|
+
x2("ab\\Kcd", "abcd", 2, 4)
|
1116
|
+
x2("ab\\Kc(\\Kd|z)", "abcd", 3, 4)
|
1117
|
+
x2("ab\\Kc(\\Kz|d)", "abcd", 2, 4)
|
1118
|
+
x2("(a\\K)*", "aaab", 3, 3)
|
1119
|
+
x3("(a\\K)*", "aaab", 2, 3, 1)
|
1120
|
+
# x2("a\\K?a", "aa", 0, 2) # error: differ from perl
|
1121
|
+
x2("ab(?=c\Kd)", "abcd", 2, 2) # This behaviour is currently not well defined. (see: perlre)
|
1122
|
+
x2("(?<=a\\Kb|aa)cd", "abcd", 1, 4) # This behaviour is currently not well defined. (see: perlre)
|
1123
|
+
x2("(?<=ab|a\\Ka)cd", "abcd", 2, 4) # This behaviour is currently not well defined. (see: perlre)
|
1124
|
+
|
1125
|
+
# named group and subroutine call
|
1126
|
+
x2("(?<name_2>ab)(?&name_2)", "abab", 0, 4, syn=onig.ONIG_SYNTAX_PERL);
|
1127
|
+
x2("(?<name_2>ab)(?1)", "abab", 0, 4, syn=onig.ONIG_SYNTAX_PERL);
|
1128
|
+
x2("(?'n'|\\((?&n)\\))+$", "()(())", 0, 6, syn=onig.ONIG_SYNTAX_PERL);
|
1129
|
+
x2("(a|x(?-1)x)", "xax", 0, 3, syn=onig.ONIG_SYNTAX_PERL);
|
1130
|
+
x2("(a|(x(?-2)x))", "xax", 0, 3, syn=onig.ONIG_SYNTAX_PERL);
|
1131
|
+
x2("a|x(?0)x", "xax", 0, 3, syn=onig.ONIG_SYNTAX_PERL);
|
1132
|
+
x2("a|x(?R)x", "xax", 0, 3, syn=onig.ONIG_SYNTAX_PERL);
|
1133
|
+
x2("(a|x\g<0>x)", "xax", 0, 3);
|
1134
|
+
x2("(a|x\g'0'x)", "xax", 0, 3);
|
1135
|
+
x2("(?-i:(?+1))(?i:(a)){0}", "A", 0, 1, syn=onig.ONIG_SYNTAX_PERL);
|
1136
|
+
x2("(?-i:\g<+1>)(?i:(a)){0}", "A", 0, 1);
|
1137
|
+
x2("(?-i:\g'+1')(?i:(a)){0}", "A", 0, 1);
|
1138
|
+
n("(.(?=\\g<1>))", "", err=onig.ONIGERR_NEVER_ENDING_RECURSION)
|
1139
|
+
n("(a)(?<n>b)\\g<1>\\g<n>", "abab", err=onig.ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED)
|
1140
|
+
x2("(a)(?<n>b)(?1)(?&n)", "abab", 0, 4, syn=onig.ONIG_SYNTAX_PERL)
|
1141
|
+
|
1142
|
+
# character set modifiers
|
1143
|
+
x2("(?u)\\w+", "あa#", 0, 2);
|
1144
|
+
x2("(?a)\\w+", "あa#", 1, 2);
|
1145
|
+
x2("(?u)\\W+", "あa#", 2, 3);
|
1146
|
+
x2("(?a)\\W+", "あa#", 0, 1);
|
1147
|
+
|
1148
|
+
x2("(?a)\\b", "あa", 1, 1);
|
1149
|
+
x2("(?a)\\w\\b", "aあ", 0, 1);
|
1150
|
+
x2("(?a)\\B", "a ああ ", 2, 2);
|
1151
|
+
|
1152
|
+
x2("(?u)\\B", "あ ", 2, 2);
|
1153
|
+
x2("(?a)\\B", "あ ", 0, 0);
|
1154
|
+
x2("(?a)\\B", "aあ ", 2, 2);
|
1155
|
+
|
1156
|
+
x2("(?a)a\\b", " a", 1, 2)
|
1157
|
+
x2("(?u)a\\b", " a", 1, 2)
|
1158
|
+
n("(?a)a\\B", " a")
|
1159
|
+
n("(?a)あ\\b", " あ")
|
1160
|
+
x2("(?u)あ\\b", " あ", 1, 2)
|
1161
|
+
x2("(?a)あ\\B", " あ", 1, 2)
|
1162
|
+
n("(?u)あ\\B", " あ")
|
1163
|
+
|
1164
|
+
x2("(?a)\\p{Alpha}\\P{Alpha}", "a。", 0, 2);
|
1165
|
+
x2("(?u)\\p{Alpha}\\P{Alpha}", "a。", 0, 2);
|
1166
|
+
x2("(?a)[[:word:]]+", "aあ", 0, 1);
|
1167
|
+
x2("(?a)[[:^word:]]+", "aあ", 1, 2);
|
1168
|
+
x2("(?u)[[:word:]]+", "aあ", 0, 2);
|
1169
|
+
n("(?u)[[:^word:]]+", "aあ");
|
1170
|
+
|
1171
|
+
x2("(?iu)\\p{lower}\\p{upper}", "Ab", 0, 2);
|
1172
|
+
x2("(?ia)\\p{lower}\\p{upper}", "Ab", 0, 2);
|
1173
|
+
x2("(?iu)[[:lower:]][[:upper:]]", "Ab", 0, 2);
|
1174
|
+
x2("(?ia)[[:lower:]][[:upper:]]", "Ab", 0, 2);
|
1175
|
+
|
1176
|
+
if is_unicode_encoding(onig_encoding):
|
1177
|
+
n("(?ia)\\w+", "\u212a\u017f"); # KELVIN SIGN, LATIN SMALL LETTER LONG S
|
1178
|
+
n("(?ia)[\\w]+", "\u212a\u017f");
|
1179
|
+
n("(?ia)[^\\W]+", "\u212a\u017f");
|
1180
|
+
x2("(?ia)[^\\W]+", "ks", 0, 2);
|
1181
|
+
n("(?iu)\\p{ASCII}", "\u212a");
|
1182
|
+
n("(?iu)\\P{ASCII}", "s");
|
1183
|
+
n("(?iu)[\\p{ASCII}]", "\u212a");
|
1184
|
+
n("(?iu)[\\P{ASCII}]", "s");
|
1185
|
+
n("(?ia)\\p{ASCII}", "\u212a");
|
1186
|
+
n("(?ia)\\P{ASCII}", "s");
|
1187
|
+
n("(?ia)[\\p{ASCII}]", "\u212a");
|
1188
|
+
n("(?ia)[\\P{ASCII}]", "s");
|
1189
|
+
x2("(?iu)[s]+", "Ss\u017f ", 0, 3);
|
1190
|
+
x2("(?ia)[s]+", "Ss\u017f ", 0, 3);
|
1191
|
+
x2("(?iu)[^s]+", "Ss\u017f ", 3, 4);
|
1192
|
+
x2("(?ia)[^s]+", "Ss\u017f ", 3, 4);
|
1193
|
+
x2("(?iu)[[:lower:]]", "\u017f", 0, 1);
|
1194
|
+
n("(?ia)[[:lower:]]", "\u017f");
|
1195
|
+
x2("(?u)[[:upper:]]", "\u212a", 0, 1);
|
1196
|
+
n("(?a)[[:upper:]]", "\u212a");
|
1197
|
+
|
1198
|
+
# \< and \>
|
1199
|
+
x2("\\<abc\\>", " abc ", 1, 4, syn=onig.ONIG_SYNTAX_GREP)
|
1200
|
+
n("\\<abc\\>", "zabc ", syn=onig.ONIG_SYNTAX_GREP)
|
1201
|
+
n("\\<abc\\>", " abcd", syn=onig.ONIG_SYNTAX_GREP)
|
1202
|
+
n("\\<abc\\>", "あabcい", syn=onig.ONIG_SYNTAX_GREP)
|
1203
|
+
x2("\\<abc\\>", "あabcい", 1, 4, syn=onig.ONIG_SYNTAX_GREP, opt=onig.ONIG_OPTION_ASCII_RANGE)
|
1204
|
+
n("\\<abc\\>", "zabcい", syn=onig.ONIG_SYNTAX_GREP, opt=onig.ONIG_OPTION_ASCII_RANGE)
|
1205
|
+
n("\\<abc\\>", "あabcd", syn=onig.ONIG_SYNTAX_GREP, opt=onig.ONIG_OPTION_ASCII_RANGE)
|
1206
|
+
|
1207
|
+
# \g{} backref
|
1208
|
+
x2("((?<name1>\\d)|(?<name2>\\w))(\\g{name1}|\\g{name2})", "ff", 0, 2, syn=onig.ONIG_SYNTAX_PERL);
|
1209
|
+
x2("(?:(?<x>)|(?<x>efg))\\g{x}", "", 0, 0, syn=onig.ONIG_SYNTAX_PERL);
|
1210
|
+
x2("(?:(?<x>abc)|(?<x>efg))\\g{x}", "abcefgefg", 3, 9, syn=onig.ONIG_SYNTAX_PERL);
|
1211
|
+
n("(?:(?<x>abc)|(?<x>efg))\\g{x}", "abcefg", syn=onig.ONIG_SYNTAX_PERL);
|
1212
|
+
x2("((.*)a\\g{2}f)", "bacbabf", 3, 7, syn=onig.ONIG_SYNTAX_PERL);
|
1213
|
+
x2("(.*)a\\g{1}f", "baczzzzzz\nbazz\nzzzzbabf", 19, 23, syn=onig.ONIG_SYNTAX_PERL);
|
1214
|
+
x2("((.*)a\\g{-1}f)", "bacbabf", 3, 7, syn=onig.ONIG_SYNTAX_PERL);
|
1215
|
+
x2("(.*)a\\g{-1}f", "baczzzzzz\nbazz\nzzzzbabf", 19, 23, syn=onig.ONIG_SYNTAX_PERL);
|
1216
|
+
x2("(あ*)(い*)\\g{-2}\\g{-1}", "あああいいあああいい", 0, 10, syn=onig.ONIG_SYNTAX_PERL);
|
1217
|
+
|
1218
|
+
# Python/PCRE compatible named group
|
1219
|
+
x2("(?P<name_2>ab)(?P>name_2)", "abab", 0, 4, syn=onig.ONIG_SYNTAX_PERL);
|
1220
|
+
x2("(?P<n>|\\((?P>n)\\))+$", "()(())", 0, 6, syn=onig.ONIG_SYNTAX_PERL);
|
1221
|
+
x2("((?P<name1>\\d)|(?P<name2>\\w))((?P=name1)|(?P=name2))", "ff", 0, 2, syn=onig.ONIG_SYNTAX_PERL);
|
1222
|
+
|
1223
|
+
# Fullwidth Alphabet
|
1224
|
+
n("abcdefghijklmnopqrstuvwxyz", "ABCDEFGHIJKLMNOPQRSTUVWXYZ");
|
1225
|
+
x2("(?i)abcdefghijklmnopqrstuvwxyz", "abcdefghijklmnopqrstuvwxyz", 0, 26);
|
1226
|
+
x2("(?i)abcdefghijklmnopqrstuvwxyz", "ABCDEFGHIJKLMNOPQRSTUVWXYZ", 0, 26);
|
1227
|
+
x2("(?i)ABCDEFGHIJKLMNOPQRSTUVWXYZ", "abcdefghijklmnopqrstuvwxyz", 0, 26);
|
1228
|
+
x2("(?i)ABCDEFGHIJKLMNOPQRSTUVWXYZ", "ABCDEFGHIJKLMNOPQRSTUVWXYZ", 0, 26);
|
1229
|
+
|
1230
|
+
# Greek
|
1231
|
+
n("αβγδεζηθικλμνξοπρστυφχψω", "ΑΒΓΔΕΖΗΘΙΚΛΜΝΞΟΠΡΣΤΥΦΧΨΩ");
|
1232
|
+
x2("(?i)αβγδεζηθικλμνξοπρστυφχψω", "αβγδεζηθικλμνξοπρστυφχψω", 0, 24);
|
1233
|
+
x2("(?i)αβγδεζηθικλμνξοπρστυφχψω", "ΑΒΓΔΕΖΗΘΙΚΛΜΝΞΟΠΡΣΤΥΦΧΨΩ", 0, 24);
|
1234
|
+
x2("(?i)ΑΒΓΔΕΖΗΘΙΚΛΜΝΞΟΠΡΣΤΥΦΧΨΩ", "αβγδεζηθικλμνξοπρστυφχψω", 0, 24);
|
1235
|
+
x2("(?i)ΑΒΓΔΕΖΗΘΙΚΛΜΝΞΟΠΡΣΤΥΦΧΨΩ", "ΑΒΓΔΕΖΗΘΙΚΛΜΝΞΟΠΡΣΤΥΦΧΨΩ", 0, 24);
|
1236
|
+
|
1237
|
+
# Cyrillic
|
1238
|
+
n("абвгдеёжзийклмнопрстуфхцчшщъыьэюя", "АБВГДЕЁЖЗИЙКЛМНОПРСТУФХЦЧШЩЪЫЬЭЮЯ");
|
1239
|
+
x2("(?i)абвгдеёжзийклмнопрстуфхцчшщъыьэюя", "абвгдеёжзийклмнопрстуфхцчшщъыьэюя", 0, 33);
|
1240
|
+
x2("(?i)абвгдеёжзийклмнопрстуфхцчшщъыьэюя", "АБВГДЕЁЖЗИЙКЛМНОПРСТУФХЦЧШЩЪЫЬЭЮЯ", 0, 33);
|
1241
|
+
x2("(?i)АБВГДЕЁЖЗИЙКЛМНОПРСТУФХЦЧШЩЪЫЬЭЮЯ", "абвгдеёжзийклмнопрстуфхцчшщъыьэюя", 0, 33);
|
1242
|
+
x2("(?i)АБВГДЕЁЖЗИЙКЛМНОПРСТУФХЦЧШЩЪЫЬЭЮЯ", "АБВГДЕЁЖЗИЙКЛМНОПРСТУФХЦЧШЩЪЫЬЭЮЯ", 0, 33);
|
1243
|
+
|
1244
|
+
# multiple name definition
|
1245
|
+
x2("(?<a>a)(?<a>b)\\k<a>", "aba", 0, 3)
|
1246
|
+
x2("(?<a>a)(?<a>b)\\k<a>", "abb", 0, 3)
|
1247
|
+
x2("(?<a>a)(?<a>b)\\g{a}", "aba", 0, 3, syn=onig.ONIG_SYNTAX_PERL)
|
1248
|
+
# n("(?<a>a)(?<a>b)\\g{a}", "abb", syn=onig.ONIG_SYNTAX_PERL)
|
1249
|
+
n("(?<a>a)(?<a>b)\\g<a>", "aba", err=onig.ONIGERR_MULTIPLEX_DEFINITION_NAME_CALL)
|
1250
|
+
x2("(?<a>[ac])(?<a>b)(?&a)", "abc", 0, 3, syn=onig.ONIG_SYNTAX_PERL)
|
1251
|
+
n("(?<a>[ac])(?<a>b)(?&a)", "abb", syn=onig.ONIG_SYNTAX_PERL)
|
1252
|
+
x2("(?:(?<x>abc)|(?<x>efg))(?i:\\k<x>)", "abcefgEFG", 3, 9)
|
1253
|
+
x2("(?<x>a)(?<x>b)(?i:\\k<x>)+", "abAB", 0, 4)
|
1254
|
+
|
1255
|
+
# branch reset
|
1256
|
+
# x3("(?|(c)|(?:(b)|(a)))", "a", 0, 1, 2)
|
1257
|
+
# x3("(?|(c)|(?|(b)|(a)))", "a", 0, 1, 1)
|
1258
|
+
|
1259
|
+
# conditional expression
|
1260
|
+
x2("(?:(a)|(b))(?(1)cd)e", "acde", 0, 4)
|
1261
|
+
n("(?:(a)|(b))(?(1)cd)e", "ae")
|
1262
|
+
x2("(?:(a)|(b))(?(2)cd)e", "ae", 0, 2)
|
1263
|
+
n("(?:(a)|(b))(?(2)cd)e", "acde")
|
1264
|
+
x2("(?:(a)|(b))(?(1)c|d)", "ac", 0, 2)
|
1265
|
+
x2("(?:(a)|(b))(?(1)c|d)", "bd", 0, 2)
|
1266
|
+
n("(?:(a)|(b))(?(1)c|d)", "ad")
|
1267
|
+
n("(?:(a)|(b))(?(1)c|d)", "bc")
|
1268
|
+
x2("(?:(a)|(b))(?:(?(1)cd)e|fg)", "acde", 0, 4)
|
1269
|
+
x2("(?:(a)|(b))(?:(?(1)cd|x)e|fg)", "bxe", 0, 3)
|
1270
|
+
n("(?:(a)|(b))(?:(?(2)cd|x)e|fg)", "bxe")
|
1271
|
+
x2("(?:(?<x>a)|(?<y>b))(?:(?(<x>)cd|x)e|fg)", "bxe", 0, 3)
|
1272
|
+
n("(?:(?<x>a)|(?<y>b))(?:(?(<y>)cd|x)e|fg)", "bxe")
|
1273
|
+
x2("((?<=a))?(?(1)b|c)", "abc", 1, 2)
|
1274
|
+
x2("((?<=a))?(?(1)b|c)", "bc", 1, 2)
|
1275
|
+
x2("((?<x>x)|(?<y>y))(?(<x>)y|x)", "xy", 0, 2)
|
1276
|
+
x2("((?<x>x)|(?<y>y))(?(<x>)y|x)", "yx", 0, 2)
|
1277
|
+
n("((?<x>x)|(?<y>y))(?(<x>)y|x)", "xx")
|
1278
|
+
n("((?<x>x)|(?<y>y))(?(<x>)y|x)", "yy")
|
1279
|
+
n("(a)?(?<n>b)?(?(1)a)(?(<n>)b)", "aa", err=onig.ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED)
|
1280
|
+
x2("(a)?(?<n>b)?(?(1)a)(?(<n>)b)", "aa", 0, 2, syn=onig.ONIG_SYNTAX_PERL)
|
1281
|
+
|
1282
|
+
# Implicit-anchor optimization
|
1283
|
+
x2("(?m:.*abc)", "dddabdd\nddabc", 0, 13) # optimized /(?m:.*abc)/ ==> /\A(?m:.*abc)/
|
1284
|
+
x2("(?m:.+abc)", "dddabdd\nddabc", 0, 13) # optimized
|
1285
|
+
x2("(?-m:.*abc)", "dddabdd\nddabc", 8, 13) # optimized /(?-m:.*abc)/ ==> /(?:^|\A)(?m:.*abc)/
|
1286
|
+
x2("(?-m:.+abc)", "dddabdd\nddabc", 8, 13) # optimized
|
1287
|
+
x2("(?-m:.*abc)", "dddabdd\nabc", 8, 11) # optimized
|
1288
|
+
n("(?-m:.+abc)", "dddabdd\nabc") # optimized
|
1289
|
+
x2("(?m:.*\\Z)", "dddabdd\nddabc", 0, 13) # optimized /(?m:.*\Z)/ ==> /\A(?m:.*\Z)/
|
1290
|
+
x2("(?-m:.*\\Z)", "dddabdd\nddabc", 8, 13) # optimized /(?-m:.*\Z)/ ==> /(?:^|\A)(?m:.*\Z)/
|
1291
|
+
x2("(.*)X\\1", "1234X2345", 1, 8) # not optimized
|
1292
|
+
|
1293
|
+
# Allow options in look-behind
|
1294
|
+
x2("(?<=(?i)ab)cd", "ABcd", 2, 4)
|
1295
|
+
x2("(?<=(?i:ab))cd", "ABcd", 2, 4)
|
1296
|
+
n("(?<=(?i)ab)cd", "ABCD")
|
1297
|
+
n("(?<=(?i:ab))cd", "ABCD")
|
1298
|
+
x2("(?<!(?i)ab)cd", "aacd", 2, 4)
|
1299
|
+
x2("(?<!(?i:ab))cd", "aacd", 2, 4)
|
1300
|
+
n("(?<!(?i)ab)cd", "ABcd")
|
1301
|
+
n("(?<!(?i:ab))cd", "ABcd")
|
1302
|
+
|
1303
|
+
# Perl syntax
|
1304
|
+
x2("\\Q()\\\\E", "()\\", 0, 3, syn=onig.ONIG_SYNTAX_PERL)
|
1305
|
+
|
1306
|
+
print("\nEncoding:", encoding)
|
1307
|
+
print("RESULT SUCC: %d, FAIL: %d, ERROR: %d (by Onigmo %s)" % (
|
1308
|
+
nsucc, nfail, nerror, onig.onig_version()))
|
1309
|
+
|
1310
|
+
onig.onig_end()
|
1311
|
+
|
1312
|
+
if (nfail == 0 and nerror == 0):
|
1313
|
+
exit(0)
|
1314
|
+
else:
|
1315
|
+
exit(-1)
|
1316
|
+
|
1317
|
+
if __name__ == '__main__':
|
1318
|
+
main()
|
1319
|
+
|