regextest 0.1.2

Sign up to get free protection for your applications and to get access to all the features.
Files changed (64) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +11 -0
  3. data/.rspec +2 -0
  4. data/.travis.yml +3 -0
  5. data/Gemfile +4 -0
  6. data/LICENSE.txt +25 -0
  7. data/README.md +88 -0
  8. data/Rakefile +55 -0
  9. data/bin/console +14 -0
  10. data/bin/regextest +4 -0
  11. data/bin/setup +7 -0
  12. data/contrib/Onigmo/RE.txt +522 -0
  13. data/contrib/Onigmo/UnicodeProps.txt +728 -0
  14. data/contrib/Onigmo/testpy.py +1319 -0
  15. data/contrib/unicode/Blocks.txt +298 -0
  16. data/contrib/unicode/CaseFolding.txt +1414 -0
  17. data/contrib/unicode/DerivedAge.txt +1538 -0
  18. data/contrib/unicode/DerivedCoreProperties.txt +11029 -0
  19. data/contrib/unicode/PropList.txt +1525 -0
  20. data/contrib/unicode/PropertyAliases.txt +193 -0
  21. data/contrib/unicode/PropertyValueAliases.txt +1420 -0
  22. data/contrib/unicode/README.txt +25 -0
  23. data/contrib/unicode/Scripts.txt +2539 -0
  24. data/contrib/unicode/UnicodeData.txt +29215 -0
  25. data/lib/pre-case-folding.rb +101 -0
  26. data/lib/pre-posix-char-class.rb +150 -0
  27. data/lib/pre-unicode.rb +116 -0
  28. data/lib/regextest.rb +268 -0
  29. data/lib/regextest/back.rb +58 -0
  30. data/lib/regextest/back/element.rb +151 -0
  31. data/lib/regextest/back/main.rb +356 -0
  32. data/lib/regextest/back/result.rb +498 -0
  33. data/lib/regextest/back/test-case.rb +268 -0
  34. data/lib/regextest/back/work-thread.rb +119 -0
  35. data/lib/regextest/common.rb +63 -0
  36. data/lib/regextest/front.rb +60 -0
  37. data/lib/regextest/front/anchor.rb +45 -0
  38. data/lib/regextest/front/back-refer.rb +120 -0
  39. data/lib/regextest/front/bracket-parser.rb +400 -0
  40. data/lib/regextest/front/bracket-parser.y +117 -0
  41. data/lib/regextest/front/bracket-scanner.rb +124 -0
  42. data/lib/regextest/front/bracket.rb +64 -0
  43. data/lib/regextest/front/builtin-functions.rb +31 -0
  44. data/lib/regextest/front/case-folding.rb +18 -0
  45. data/lib/regextest/front/char-class.rb +243 -0
  46. data/lib/regextest/front/empty.rb +43 -0
  47. data/lib/regextest/front/letter.rb +327 -0
  48. data/lib/regextest/front/manage-parentheses.rb +74 -0
  49. data/lib/regextest/front/parenthesis.rb +153 -0
  50. data/lib/regextest/front/parser.rb +1366 -0
  51. data/lib/regextest/front/parser.y +271 -0
  52. data/lib/regextest/front/range.rb +60 -0
  53. data/lib/regextest/front/repeat.rb +90 -0
  54. data/lib/regextest/front/repeatable.rb +77 -0
  55. data/lib/regextest/front/scanner.rb +187 -0
  56. data/lib/regextest/front/selectable.rb +65 -0
  57. data/lib/regextest/front/sequence.rb +73 -0
  58. data/lib/regextest/front/unicode.rb +1272 -0
  59. data/lib/regextest/regex-option.rb +144 -0
  60. data/lib/regextest/regexp.rb +44 -0
  61. data/lib/regextest/version.rb +5 -0
  62. data/lib/tst-reg-test.rb +159 -0
  63. data/regextest.gemspec +26 -0
  64. metadata +162 -0
@@ -0,0 +1,1319 @@
1
+ #!/usr/bin/env python
2
+ # -*- coding: utf-8 -*-
3
+
4
+ from __future__ import print_function, unicode_literals
5
+ from ctypes import *
6
+ import onig
7
+ import sys
8
+ import io
9
+ import locale
10
+
11
+ nerror = 0
12
+ nsucc = 0
13
+ nfail = 0
14
+
15
+ # default encoding
16
+ onig_encoding = onig.ONIG_ENCODING_EUC_JP
17
+ encoding = onig_encoding[0].name.decode()
18
+
19
+ # special syntactic settings
20
+ _syntax_default = onig.OnigSyntaxType()
21
+ onig.onig_copy_syntax(byref(_syntax_default), onig.ONIG_SYNTAX_DEFAULT)
22
+ _syntax_default.options &= ~onig.ONIG_OPTION_ASCII_RANGE
23
+ syntax_default = byref(_syntax_default)
24
+
25
+
26
+ class strptr:
27
+ """a helper class to get a pointer to a string"""
28
+ def __init__(self, s):
29
+ if not isinstance(s, bytes):
30
+ raise TypeError
31
+ self._str = s
32
+ try:
33
+ self._ptr = cast(self._str, c_void_p) # CPython 2.x/3.x
34
+ except TypeError:
35
+ self._ptr = c_void_p(self._str) # PyPy 1.x
36
+
37
+ def getptr(self, offset=0):
38
+ if offset == -1: # -1 means the end of the string
39
+ offset = len(self._str)
40
+ elif offset > len(self._str):
41
+ raise IndexError
42
+ return self._ptr.value + offset
43
+
44
+ def cc_to_cb(s, enc, cc):
45
+ """convert char count to byte count
46
+
47
+ arguments:
48
+ s -- unicode string
49
+ enc -- encoding name
50
+ cc -- char count
51
+ """
52
+ s = s.encode('UTF-32LE')
53
+ clen = cc * 4
54
+ if clen > len(s):
55
+ raise IndexError
56
+ return len(s[:clen].decode('UTF-32LE').encode(enc))
57
+
58
+ def print_result(result, pattern, file=None):
59
+ if not file:
60
+ file = sys.stdout
61
+ print(result + ": ", end='', file=file)
62
+ try:
63
+ print(pattern, file=file)
64
+ except UnicodeEncodeError as e:
65
+ print('(' + str(e) + ')')
66
+
67
+ def xx(pattern, target, s_from, s_to, mem, not_match,
68
+ syn=syntax_default, opt=onig.ONIG_OPTION_DEFAULT,
69
+ err=onig.ONIG_NORMAL):
70
+ global nerror
71
+ global nsucc
72
+ global nfail
73
+
74
+ reg = onig.OnigRegex()
75
+ einfo = onig.OnigErrorInfo()
76
+ msg = create_string_buffer(onig.ONIG_MAX_ERROR_MESSAGE_LEN)
77
+
78
+ pattern2 = pattern
79
+ if not isinstance(pattern, bytes):
80
+ pattern2 = pattern.encode(encoding)
81
+ patternp = strptr(pattern2)
82
+
83
+ target2 = target
84
+ if not isinstance(target, bytes):
85
+ s_from = cc_to_cb(target, encoding, s_from)
86
+ s_to = cc_to_cb(target, encoding, s_to)
87
+ target2 = target.encode(encoding)
88
+ targetp = strptr(target2)
89
+
90
+ # cut very long outputs
91
+ limit = 100
92
+ if len(target) > limit:
93
+ target = target[:limit] + "..."
94
+ if len(pattern) > limit:
95
+ pattern = pattern[:limit] + "..."
96
+
97
+ r = onig.onig_new(byref(reg), patternp.getptr(), patternp.getptr(-1),
98
+ opt, onig_encoding, syn, byref(einfo));
99
+ if r != 0:
100
+ onig.onig_error_code_to_str(msg, r, byref(einfo))
101
+ if r == err:
102
+ nsucc += 1
103
+ print_result("OK(E)", "%s (/%s/ '%s')" % \
104
+ (msg.value.decode(), pattern, target))
105
+ else:
106
+ nerror += 1
107
+ print_result("ERROR", "%s (/%s/ '%s')" % \
108
+ (msg.value.decode(), pattern, target), file=sys.stderr)
109
+ return
110
+
111
+ if err != onig.ONIG_NORMAL:
112
+ nfail += 1
113
+ print_result("FAIL(E)", "/%s/ '%s'" % (pattern, target))
114
+ onig.onig_free(reg)
115
+ return
116
+
117
+ region = onig.onig_region_new()
118
+ r = onig.onig_search(reg, targetp.getptr(), targetp.getptr(-1),
119
+ targetp.getptr(), targetp.getptr(-1),
120
+ region, onig.ONIG_OPTION_NONE);
121
+ if r < onig.ONIG_MISMATCH:
122
+ onig.onig_error_code_to_str(msg, r)
123
+ if r == err:
124
+ nsucc += 1
125
+ print_result("OK(E)", "%s (/%s/ '%s')" % \
126
+ (msg.value.decode(), pattern, target))
127
+ else:
128
+ nerror += 1
129
+ print_result("ERROR", "%s (/%s/ '%s')" % \
130
+ (msg.value.decode(), pattern, target), file=sys.stderr)
131
+ onig.onig_region_free(region, 1)
132
+ return
133
+
134
+ if r == onig.ONIG_MISMATCH:
135
+ if not_match:
136
+ nsucc += 1
137
+ print_result("OK(N)", "/%s/ '%s'" % (pattern, target))
138
+ else:
139
+ nfail += 1
140
+ print_result("FAIL", "/%s/ '%s'" % (pattern, target))
141
+ else:
142
+ if not_match:
143
+ nfail += 1
144
+ print_result("FAIL(N)", "/%s/ '%s'" % (pattern, target))
145
+ else:
146
+ start = region[0].beg[mem]
147
+ end = region[0].end[mem]
148
+ if (start == s_from) and (end == s_to):
149
+ nsucc += 1
150
+ print_result("OK", "/%s/ '%s'" % (pattern, target))
151
+ else:
152
+ nfail += 1
153
+ print_result("FAIL", "/%s/ '%s' %d-%d : %d-%d" % (pattern, target,
154
+ s_from, s_to, start, end))
155
+ onig.onig_free(reg)
156
+ onig.onig_region_free(region, 1)
157
+
158
+ def x2(pattern, target, s_from, s_to, **kwargs):
159
+ xx(pattern, target, s_from, s_to, 0, False, **kwargs)
160
+
161
+ def x3(pattern, target, s_from, s_to, mem, **kwargs):
162
+ xx(pattern, target, s_from, s_to, mem, False, **kwargs)
163
+
164
+ def n(pattern, target, **kwargs):
165
+ xx(pattern, target, 0, 0, 0, True, **kwargs)
166
+
167
+
168
+ def is_unicode_encoding(enc):
169
+ return enc in (onig.ONIG_ENCODING_UTF32_LE,
170
+ onig.ONIG_ENCODING_UTF32_BE,
171
+ onig.ONIG_ENCODING_UTF16_LE,
172
+ onig.ONIG_ENCODING_UTF16_BE,
173
+ onig.ONIG_ENCODING_UTF8)
174
+
175
+
176
+ def set_encoding(enc):
177
+ global onig_encoding
178
+ global encoding
179
+
180
+ if isinstance(enc, onig.OnigEncoding):
181
+ onig_encoding = enc
182
+ else:
183
+ encs = {"EUC-JP": onig.ONIG_ENCODING_EUC_JP,
184
+ "SJIS": onig.ONIG_ENCODING_SJIS,
185
+ "UTF-8": onig.ONIG_ENCODING_UTF8,
186
+ "UTF-16LE": onig.ONIG_ENCODING_UTF16_LE,
187
+ "UTF-16BE": onig.ONIG_ENCODING_UTF16_BE,
188
+ "UTF-32LE": onig.ONIG_ENCODING_UTF32_LE,
189
+ "UTF-32BE": onig.ONIG_ENCODING_UTF32_BE}
190
+ onig_encoding = encs[enc]
191
+ encoding = onig_encoding[0].name.decode()
192
+
193
+
194
+ def set_output_encoding(enc=None):
195
+ if enc is None:
196
+ enc = locale.getpreferredencoding()
197
+
198
+ def get_text_writer(fo, **kwargs):
199
+ kw = dict(kwargs)
200
+ kw.setdefault('errors', 'backslashreplace') # use \uXXXX style
201
+ kw.setdefault('closefd', False)
202
+ writer = io.open(fo.fileno(), mode='w', **kw)
203
+
204
+ # work around for Python 2.x
205
+ write = writer.write # save the original write() function
206
+ enc = locale.getpreferredencoding()
207
+ writer.write = lambda s: write(s.decode(enc)) \
208
+ if isinstance(s, bytes) else write(s) # convert to unistr
209
+ return writer
210
+
211
+ sys.stdout = get_text_writer(sys.stdout, encoding=enc)
212
+ sys.stderr = get_text_writer(sys.stderr, encoding=enc)
213
+
214
+
215
+ def main():
216
+ # set encoding of the test target
217
+ if len(sys.argv) > 1:
218
+ try:
219
+ set_encoding(sys.argv[1])
220
+ except KeyError:
221
+ print("test target encoding error")
222
+ print("Usage: python testpy.py [test target encoding] [output encoding]")
223
+ sys.exit()
224
+
225
+ # set encoding of stdout/stderr
226
+ outenc = None
227
+ if len(sys.argv) > 2:
228
+ outenc = sys.argv[2]
229
+ set_output_encoding(outenc)
230
+
231
+ # Copied from onig-5.9.2/testc.c
232
+ # '?\?' which is used to avoid trigraph is replaced by '??'.
233
+ # Match positions are specified by unit of character instead of byte.
234
+
235
+ x2("", "", 0, 0);
236
+ x2("^", "", 0, 0);
237
+ x2("$", "", 0, 0);
238
+ x2("\\G", "", 0, 0);
239
+ x2("\\A", "", 0, 0);
240
+ x2("\\Z", "", 0, 0);
241
+ x2("\\z", "", 0, 0);
242
+ x2("^$", "", 0, 0);
243
+ x2("\\ca", "\001", 0, 1);
244
+ x2("\\C-b", "\002", 0, 1);
245
+ x2("\\c\\\\", "\034", 0, 1);
246
+ x2("q[\\c\\\\]", "q\034", 0, 2);
247
+ x2("", "a", 0, 0);
248
+ x2("a", "a", 0, 1);
249
+ if onig_encoding == onig.ONIG_ENCODING_UTF16_LE:
250
+ x2("\\x61\\x00", "a", 0, 1);
251
+ elif onig_encoding == onig.ONIG_ENCODING_UTF16_BE:
252
+ x2("\\x00\\x61", "a", 0, 1);
253
+ elif onig_encoding == onig.ONIG_ENCODING_UTF32_LE:
254
+ x2("\\x61\\x00\\x00\\x00", "a", 0, 1);
255
+ elif onig_encoding == onig.ONIG_ENCODING_UTF32_BE:
256
+ x2("\\x00\\x00\\x00\\x61", "a", 0, 1);
257
+ else:
258
+ x2("\\x61", "a", 0, 1);
259
+ x2("aa", "aa", 0, 2);
260
+ x2("aaa", "aaa", 0, 3);
261
+ x2("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", 0, 35);
262
+ x2("ab", "ab", 0, 2);
263
+ x2("b", "ab", 1, 2);
264
+ x2("bc", "abc", 1, 3);
265
+ x2("(?i:#RET#)", "#INS##RET#", 5, 10);
266
+ if onig_encoding == onig.ONIG_ENCODING_UTF16_LE:
267
+ x2("\\17\\00", "\017", 0, 1);
268
+ x2("\\x1f\\x00", "\x1f", 0, 1);
269
+ elif onig_encoding == onig.ONIG_ENCODING_UTF16_BE:
270
+ x2("\\00\\17", "\017", 0, 1);
271
+ x2("\\x00\\x1f", "\x1f", 0, 1);
272
+ elif onig_encoding == onig.ONIG_ENCODING_UTF32_LE:
273
+ x2("\\17\\00\\00\\00", "\017", 0, 1);
274
+ x2("\\x1f\\x00\\x00\\x00", "\x1f", 0, 1);
275
+ elif onig_encoding == onig.ONIG_ENCODING_UTF32_BE:
276
+ x2("\\00\\00\\00\\17", "\017", 0, 1);
277
+ x2("\\x00\\x00\\x00\\x1f", "\x1f", 0, 1);
278
+ else:
279
+ x2("\\17", "\017", 0, 1);
280
+ x2("\\x1f", "\x1f", 0, 1);
281
+ x2("a(?#....\\\\JJJJ)b", "ab", 0, 2);
282
+ x2("(?x) G (o O(?-x)oO) g L", "GoOoOgLe", 0, 7);
283
+ x2(".", "a", 0, 1);
284
+ n(".", "");
285
+ x2("..", "ab", 0, 2);
286
+ x2("\\w", "e", 0, 1);
287
+ n("\\W", "e");
288
+ x2("\\s", " ", 0, 1);
289
+ x2("\\S", "b", 0, 1);
290
+ x2("\\d", "4", 0, 1);
291
+ n("\\D", "4");
292
+ x2("\\b", "z ", 0, 0);
293
+ x2("\\b", " z", 1, 1);
294
+ x2("\\B", "zz ", 1, 1);
295
+ x2("\\B", "z ", 2, 2);
296
+ x2("\\B", " z", 0, 0);
297
+ x2("[ab]", "b", 0, 1);
298
+ n("[ab]", "c");
299
+ x2("[a-z]", "t", 0, 1);
300
+ n("[^a]", "a");
301
+ x2("[^a]", "\n", 0, 1);
302
+ x2("[]]", "]", 0, 1);
303
+ n("[^]]", "]");
304
+ x2("[\\^]+", "0^^1", 1, 3);
305
+ x2("[b-]", "b", 0, 1);
306
+ x2("[b-]", "-", 0, 1);
307
+ x2("[\\w]", "z", 0, 1);
308
+ n("[\\w]", " ");
309
+ x2("[\\W]", "b$", 1, 2);
310
+ x2("[\\d]", "5", 0, 1);
311
+ n("[\\d]", "e");
312
+ x2("[\\D]", "t", 0, 1);
313
+ n("[\\D]", "3");
314
+ x2("[\\s]", " ", 0, 1);
315
+ n("[\\s]", "a");
316
+ x2("[\\S]", "b", 0, 1);
317
+ n("[\\S]", " ");
318
+ x2("[\\w\\d]", "2", 0, 1);
319
+ n("[\\w\\d]", " ");
320
+ x2("[[:upper:]]", "B", 0, 1);
321
+ x2("[*[:xdigit:]+]", "+", 0, 1);
322
+ x2("[*[:xdigit:]+]", "GHIKK-9+*", 6, 7);
323
+ x2("[*[:xdigit:]+]", "-@^+", 3, 4);
324
+ n("[[:upper]]", "A");
325
+ x2("[[:upper]]", ":", 0, 1);
326
+ if onig_encoding == onig.ONIG_ENCODING_UTF16_LE:
327
+ x2("[\\044\\000-\\047\\000]", "\046", 0, 1);
328
+ x2("[\\x5a\\x00-\\x5c\\x00]", "\x5b", 0, 1);
329
+ x2("[\\x6A\\x00-\\x6D\\x00]", "\x6c", 0, 1);
330
+ n("[\\x6A\\x00-\\x6D\\x00]", "\x6E");
331
+ elif onig_encoding == onig.ONIG_ENCODING_UTF16_BE:
332
+ x2("[\\000\\044-\\000\\047]", "\046", 0, 1);
333
+ x2("[\\x00\\x5a-\\x00\\x5c]", "\x5b", 0, 1);
334
+ x2("[\\x00\\x6A-\\x00\\x6D]", "\x6c", 0, 1);
335
+ n("[\\x00\\x6A-\\x00\\x6D]", "\x6E");
336
+ elif onig_encoding == onig.ONIG_ENCODING_UTF32_LE:
337
+ x2("[\\044\\000\\000\\000-\\047\\000\\000\\000]", "\046", 0, 1);
338
+ x2("[\\x5a\\x00\\x00\\x00-\\x5c\\x00\\x00\\x00]", "\x5b", 0, 1);
339
+ x2("[\\x6A\\x00\\x00\\x00-\\x6D\\x00\\x00\\x00]", "\x6c", 0, 1);
340
+ n("[\\x6A\\x00\\x00\\x00-\\x6D\\x00\\x00\\x00]", "\x6E");
341
+ elif onig_encoding == onig.ONIG_ENCODING_UTF32_BE:
342
+ x2("[\\000\\000\\000\\044-\\000\\000\\000\\047]", "\046", 0, 1);
343
+ x2("[\\x00\\x00\\x00\\x5a-\\x00\\x00\\x00\\x5c]", "\x5b", 0, 1);
344
+ x2("[\\x00\\x00\\x00\\x6A-\\x00\\x00\\x00\\x6D]", "\x6c", 0, 1);
345
+ n("[\\x00\\x00\\x00\\x6A-\\x00\\x00\\x00\\x6D]", "\x6E");
346
+ else:
347
+ x2("[\\044-\\047]", "\046", 0, 1);
348
+ x2("[\\x5a-\\x5c]", "\x5b", 0, 1);
349
+ x2("[\\x6A-\\x6D]", "\x6c", 0, 1);
350
+ n("[\\x6A-\\x6D]", "\x6E");
351
+ n("^[0-9A-F]+ 0+ UNDEF ", "75F 00000000 SECT14A notype () External | _rb_apply");
352
+ x2("[\\[]", "[", 0, 1);
353
+ x2("[\\]]", "]", 0, 1);
354
+ x2("[&]", "&", 0, 1);
355
+ x2("[[ab]]", "b", 0, 1);
356
+ x2("[[ab]c]", "c", 0, 1);
357
+ n("[[^a]]", "a");
358
+ n("[^[a]]", "a");
359
+ x2("[[ab]&&bc]", "b", 0, 1);
360
+ n("[[ab]&&bc]", "a");
361
+ n("[[ab]&&bc]", "c");
362
+ x2("[a-z&&b-y&&c-x]", "w", 0, 1);
363
+ n("[^a-z&&b-y&&c-x]", "w");
364
+ x2("[[^a&&a]&&a-z]", "b", 0, 1);
365
+ n("[[^a&&a]&&a-z]", "a");
366
+ x2("[[^a-z&&bcdef]&&[^c-g]]", "h", 0, 1);
367
+ n("[[^a-z&&bcdef]&&[^c-g]]", "c");
368
+ x2("[^[^abc]&&[^cde]]", "c", 0, 1);
369
+ x2("[^[^abc]&&[^cde]]", "e", 0, 1);
370
+ n("[^[^abc]&&[^cde]]", "f");
371
+ x2("[a-&&-a]", "-", 0, 1);
372
+ n("[a\\-&&\\-a]", "&");
373
+ n("\\wabc", " abc");
374
+ x2("a\\Wbc", "a bc", 0, 4);
375
+ x2("a.b.c", "aabbc", 0, 5);
376
+ x2(".\\wb\\W..c", "abb bcc", 0, 7);
377
+ x2("\\s\\wzzz", " zzzz", 0, 5);
378
+ x2("aa.b", "aabb", 0, 4);
379
+ n(".a", "ab");
380
+ x2(".a", "aa", 0, 2);
381
+ x2("^a", "a", 0, 1);
382
+ x2("^a$", "a", 0, 1);
383
+ x2("^\\w$", "a", 0, 1);
384
+ n("^\\w$", " ");
385
+ x2("^\\wab$", "zab", 0, 3);
386
+ x2("^\\wabcdef$", "zabcdef", 0, 7);
387
+ x2("^\\w...def$", "zabcdef", 0, 7);
388
+ x2("\\w\\w\\s\\Waaa\\d", "aa aaa4", 0, 8);
389
+ x2("\\A\\Z", "", 0, 0);
390
+ x2("\\Axyz", "xyz", 0, 3);
391
+ x2("xyz\\Z", "xyz", 0, 3);
392
+ x2("xyz\\z", "xyz", 0, 3);
393
+ x2("a\\Z", "a", 0, 1);
394
+ x2("\\Gaz", "az", 0, 2);
395
+ n("\\Gz", "bza");
396
+ n("az\\G", "az");
397
+ n("az\\A", "az");
398
+ n("a\\Az", "az");
399
+ x2("\\^\\$", "^$", 0, 2);
400
+ x2("^x?y", "xy", 0, 2);
401
+ x2("^(x?y)", "xy", 0, 2);
402
+ x2("\\w", "_", 0, 1);
403
+ n("\\W", "_");
404
+ x2("(?=z)z", "z", 0, 1);
405
+ n("(?=z).", "a");
406
+ x2("(?!z)a", "a", 0, 1);
407
+ n("(?!z)a", "z");
408
+ x2("(?i:a)", "a", 0, 1);
409
+ x2("(?i:a)", "A", 0, 1);
410
+ x2("(?i:A)", "a", 0, 1);
411
+ n("(?i:A)", "b");
412
+ x2("(?i:[A-Z])", "a", 0, 1);
413
+ x2("(?i:[f-m])", "H", 0, 1);
414
+ x2("(?i:[f-m])", "h", 0, 1);
415
+ n("(?i:[f-m])", "e");
416
+ x2("(?i:[A-c])", "D", 0, 1);
417
+ n("(?i:[^a-z])", "A");
418
+ n("(?i:[^a-z])", "a");
419
+ x2("(?i:[!-k])", "Z", 0, 1);
420
+ x2("(?i:[!-k])", "7", 0, 1);
421
+ x2("(?i:[T-}])", "b", 0, 1);
422
+ x2("(?i:[T-}])", "{", 0, 1);
423
+ x2("(?i:\\?a)", "?A", 0, 2);
424
+ x2("(?i:\\*A)", "*a", 0, 2);
425
+ n(".", "\n");
426
+ x2("(?m:.)", "\n", 0, 1);
427
+ x2("(?m:a.)", "a\n", 0, 2);
428
+ x2("(?m:.b)", "a\nb", 1, 3);
429
+ x2(".*abc", "dddabdd\nddabc", 8, 13);
430
+ x2("(?m:.*abc)", "dddabddabc", 0, 10);
431
+ n("(?i)(?-i)a", "A");
432
+ n("(?i)(?-i:a)", "A");
433
+ x2("a?", "", 0, 0);
434
+ x2("a?", "b", 0, 0);
435
+ x2("a?", "a", 0, 1);
436
+ x2("a*", "", 0, 0);
437
+ x2("a*", "a", 0, 1);
438
+ x2("a*", "aaa", 0, 3);
439
+ x2("a*", "baaaa", 0, 0);
440
+ n("a+", "");
441
+ x2("a+", "a", 0, 1);
442
+ x2("a+", "aaaa", 0, 4);
443
+ x2("a+", "aabbb", 0, 2);
444
+ x2("a+", "baaaa", 1, 5);
445
+ x2(".?", "", 0, 0);
446
+ x2(".?", "f", 0, 1);
447
+ x2(".?", "\n", 0, 0);
448
+ x2(".*", "", 0, 0);
449
+ x2(".*", "abcde", 0, 5);
450
+ x2(".+", "z", 0, 1);
451
+ x2(".+", "zdswer\n", 0, 6);
452
+ x2("(.*)a\\1f", "babfbac", 0, 4);
453
+ x2("(.*)a\\1f", "bacbabf", 3, 7);
454
+ x2("((.*)a\\2f)", "bacbabf", 3, 7);
455
+ x2("(.*)a\\1f", "baczzzzzz\nbazz\nzzzzbabf", 19, 23);
456
+ x2("a|b", "a", 0, 1);
457
+ x2("a|b", "b", 0, 1);
458
+ x2("|a", "a", 0, 0);
459
+ x2("(|a)", "a", 0, 0);
460
+ x2("ab|bc", "ab", 0, 2);
461
+ x2("ab|bc", "bc", 0, 2);
462
+ x2("z(?:ab|bc)", "zbc", 0, 3);
463
+ x2("a(?:ab|bc)c", "aabc", 0, 4);
464
+ x2("ab|(?:ac|az)", "az", 0, 2);
465
+ x2("a|b|c", "dc", 1, 2);
466
+ x2("a|b|cd|efg|h|ijk|lmn|o|pq|rstuvwx|yz", "pqr", 0, 2);
467
+ n("a|b|cd|efg|h|ijk|lmn|o|pq|rstuvwx|yz", "mn");
468
+ x2("a|^z", "ba", 1, 2);
469
+ x2("a|^z", "za", 0, 1);
470
+ x2("a|\\Gz", "bza", 2, 3);
471
+ x2("a|\\Gz", "za", 0, 1);
472
+ x2("a|\\Az", "bza", 2, 3);
473
+ x2("a|\\Az", "za", 0, 1);
474
+ x2("a|b\\Z", "ba", 1, 2);
475
+ x2("a|b\\Z", "b", 0, 1);
476
+ x2("a|b\\z", "ba", 1, 2);
477
+ x2("a|b\\z", "b", 0, 1);
478
+ x2("\\w|\\s", " ", 0, 1);
479
+ n("\\w|\\w", " ");
480
+ x2("\\w|%", "%", 0, 1);
481
+ x2("\\w|[&$]", "&", 0, 1);
482
+ x2("[b-d]|[^e-z]", "a", 0, 1);
483
+ x2("(?:a|[c-f])|bz", "dz", 0, 1);
484
+ x2("(?:a|[c-f])|bz", "bz", 0, 2);
485
+ x2("abc|(?=zz)..f", "zzf", 0, 3);
486
+ x2("abc|(?!zz)..f", "abf", 0, 3);
487
+ x2("(?=za)..a|(?=zz)..a", "zza", 0, 3);
488
+ n("(?>a|abd)c", "abdc");
489
+ x2("(?>abd|a)c", "abdc", 0, 4);
490
+ x2("a?|b", "a", 0, 1);
491
+ x2("a?|b", "b", 0, 0);
492
+ x2("a?|b", "", 0, 0);
493
+ x2("a*|b", "aa", 0, 2);
494
+ x2("a*|b*", "ba", 0, 0);
495
+ x2("a*|b*", "ab", 0, 1);
496
+ x2("a+|b*", "", 0, 0);
497
+ x2("a+|b*", "bbb", 0, 3);
498
+ x2("a+|b*", "abbb", 0, 1);
499
+ n("a+|b+", "");
500
+ x2("(a|b)?", "b", 0, 1);
501
+ x2("(a|b)*", "ba", 0, 2);
502
+ x2("(a|b)+", "bab", 0, 3);
503
+ x2("(ab|ca)+", "caabbc", 0, 4);
504
+ x2("(ab|ca)+", "aabca", 1, 5);
505
+ x2("(ab|ca)+", "abzca", 0, 2);
506
+ x2("(a|bab)+", "ababa", 0, 5);
507
+ x2("(a|bab)+", "ba", 1, 2);
508
+ x2("(a|bab)+", "baaaba", 1, 4);
509
+ x2("(?:a|b)(?:a|b)", "ab", 0, 2);
510
+ x2("(?:a*|b*)(?:a*|b*)", "aaabbb", 0, 3);
511
+ x2("(?:a*|b*)(?:a+|b+)", "aaabbb", 0, 6);
512
+ x2("(?:a+|b+){2}", "aaabbb", 0, 6);
513
+ x2("h{0,}", "hhhh", 0, 4);
514
+ x2("(?:a+|b+){1,2}", "aaabbb", 0, 6);
515
+ n("ax{2}*a", "0axxxa1");
516
+ n("a.{0,2}a", "0aXXXa0");
517
+ n("a.{0,2}?a", "0aXXXa0");
518
+ n("a.{0,2}?a", "0aXXXXa0");
519
+ x2("^a{2,}?a$", "aaa", 0, 3);
520
+ x2("^[a-z]{2,}?$", "aaa", 0, 3);
521
+ x2("(?:a+|\\Ab*)cc", "cc", 0, 2);
522
+ n("(?:a+|\\Ab*)cc", "abcc");
523
+ x2("(?:^a+|b+)*c", "aabbbabc", 6, 8);
524
+ x2("(?:^a+|b+)*c", "aabbbbc", 0, 7);
525
+ x2("a|(?i)c", "C", 0, 1);
526
+ x2("(?i)c|a", "C", 0, 1);
527
+ x2("(?i)c|a", "A", 0, 1);
528
+ x2("(?i:c)|a", "C", 0, 1);
529
+ n("(?i:c)|a", "A");
530
+ x2("[abc]?", "abc", 0, 1);
531
+ x2("[abc]*", "abc", 0, 3);
532
+ x2("[^abc]*", "abc", 0, 0);
533
+ n("[^abc]+", "abc");
534
+ x2("a??", "aaa", 0, 0);
535
+ x2("ba??b", "bab", 0, 3);
536
+ x2("a*?", "aaa", 0, 0);
537
+ x2("ba*?", "baa", 0, 1);
538
+ x2("ba*?b", "baab", 0, 4);
539
+ x2("a+?", "aaa", 0, 1);
540
+ x2("ba+?", "baa", 0, 2);
541
+ x2("ba+?b", "baab", 0, 4);
542
+ x2("(?:a?)??", "a", 0, 0);
543
+ x2("(?:a??)?", "a", 0, 0);
544
+ x2("(?:a?)+?", "aaa", 0, 1);
545
+ x2("(?:a+)??", "aaa", 0, 0);
546
+ x2("(?:a+)??b", "aaab", 0, 4);
547
+ x2("(?:ab)?{2}", "", 0, 0);
548
+ x2("(?:ab)?{2}", "ababa", 0, 4);
549
+ x2("(?:ab)*{0}", "ababa", 0, 0);
550
+ x2("(?:ab){3,}", "abababab", 0, 8);
551
+ n("(?:ab){3,}", "abab");
552
+ x2("(?:ab){2,4}", "ababab", 0, 6);
553
+ x2("(?:ab){2,4}", "ababababab", 0, 8);
554
+ x2("(?:ab){2,4}?", "ababababab", 0, 4);
555
+ x2("(?:ab){,}", "ab{,}", 0, 5);
556
+ x2("(?:abc)+?{2}", "abcabcabc", 0, 6);
557
+ x2("(?:X*)(?i:xa)", "XXXa", 0, 4);
558
+ x2("(d+)([^abc]z)", "dddz", 0, 4);
559
+ x2("([^abc]*)([^abc]z)", "dddz", 0, 4);
560
+ x2("(\\w+)(\\wz)", "dddz", 0, 4);
561
+ x3("(a)", "a", 0, 1, 1);
562
+ x3("(ab)", "ab", 0, 2, 1);
563
+ x2("((ab))", "ab", 0, 2);
564
+ x3("((ab))", "ab", 0, 2, 1);
565
+ x3("((ab))", "ab", 0, 2, 2);
566
+ x3("((((((((((((((((((((ab))))))))))))))))))))", "ab", 0, 2, 20);
567
+ x3("(ab)(cd)", "abcd", 0, 2, 1);
568
+ x3("(ab)(cd)", "abcd", 2, 4, 2);
569
+ x3("()(a)bc(def)ghijk", "abcdefghijk", 3, 6, 3);
570
+ x3("(()(a)bc(def)ghijk)", "abcdefghijk", 3, 6, 4);
571
+ x2("(^a)", "a", 0, 1);
572
+ x3("(a)|(a)", "ba", 1, 2, 1);
573
+ x3("(^a)|(a)", "ba", 1, 2, 2);
574
+ x3("(a?)", "aaa", 0, 1, 1);
575
+ x3("(a*)", "aaa", 0, 3, 1);
576
+ x3("(a*)", "", 0, 0, 1);
577
+ x3("(a+)", "aaaaaaa", 0, 7, 1);
578
+ x3("(a+|b*)", "bbbaa", 0, 3, 1);
579
+ x3("(a+|b?)", "bbbaa", 0, 1, 1);
580
+ x3("(abc)?", "abc", 0, 3, 1);
581
+ x3("(abc)*", "abc", 0, 3, 1);
582
+ x3("(abc)+", "abc", 0, 3, 1);
583
+ x3("(xyz|abc)+", "abc", 0, 3, 1);
584
+ x3("([xyz][abc]|abc)+", "abc", 0, 3, 1);
585
+ x3("((?i:abc))", "AbC", 0, 3, 1);
586
+ x2("(abc)(?i:\\1)", "abcABC", 0, 6);
587
+ x3("((?m:a.c))", "a\nc", 0, 3, 1);
588
+ x3("((?=az)a)", "azb", 0, 1, 1);
589
+ x3("abc|(.abd)", "zabd", 0, 4, 1);
590
+ x2("(?:abc)|(ABC)", "abc", 0, 3);
591
+ x3("(?i:(abc))|(zzz)", "ABC", 0, 3, 1);
592
+ x3("a*(.)", "aaaaz", 4, 5, 1);
593
+ x3("a*?(.)", "aaaaz", 0, 1, 1);
594
+ x3("a*?(c)", "aaaac", 4, 5, 1);
595
+ x3("[bcd]a*(.)", "caaaaz", 5, 6, 1);
596
+ x3("(\\Abb)cc", "bbcc", 0, 2, 1);
597
+ n("(\\Abb)cc", "zbbcc");
598
+ x3("(^bb)cc", "bbcc", 0, 2, 1);
599
+ n("(^bb)cc", "zbbcc");
600
+ x3("cc(bb$)", "ccbb", 2, 4, 1);
601
+ n("cc(bb$)", "ccbbb");
602
+ n("(\\1)", "");
603
+ n("\\1(a)", "aa");
604
+ n("(a(b)\\1)\\2+", "ababb");
605
+ n("(?:(?:\\1|z)(a))+$", "zaa");
606
+ x2("(?:(?:\\1|z)(a))+$", "zaaa", 0, 4);
607
+ x2("(a)(?=\\1)", "aa", 0, 1);
608
+ n("(a)$|\\1", "az");
609
+ x2("(a)\\1", "aa", 0, 2);
610
+ n("(a)\\1", "ab");
611
+ x2("(a?)\\1", "aa", 0, 2);
612
+ x2("(a??)\\1", "aa", 0, 0);
613
+ x2("(a*)\\1", "aaaaa", 0, 4);
614
+ x3("(a*)\\1", "aaaaa", 0, 2, 1);
615
+ x2("a(b*)\\1", "abbbb", 0, 5);
616
+ x2("a(b*)\\1", "ab", 0, 1);
617
+ x2("(a*)(b*)\\1\\2", "aaabbaaabb", 0, 10);
618
+ x2("(a*)(b*)\\2", "aaabbbb", 0, 7);
619
+ x2("(((((((a*)b))))))c\\7", "aaabcaaa", 0, 8);
620
+ x3("(((((((a*)b))))))c\\7", "aaabcaaa", 0, 3, 7);
621
+ x2("(a)(b)(c)\\2\\1\\3", "abcbac", 0, 6);
622
+ x2("([a-d])\\1", "cc", 0, 2);
623
+ x2("(\\w\\d\\s)\\1", "f5 f5 ", 0, 6);
624
+ n("(\\w\\d\\s)\\1", "f5 f5");
625
+ x2("(who|[a-c]{3})\\1", "whowho", 0, 6);
626
+ x2("...(who|[a-c]{3})\\1", "abcwhowho", 0, 9);
627
+ x2("(who|[a-c]{3})\\1", "cbccbc", 0, 6);
628
+ x2("(^a)\\1", "aa", 0, 2);
629
+ n("(^a)\\1", "baa");
630
+ n("(a$)\\1", "aa");
631
+ n("(ab\\Z)\\1", "ab");
632
+ x2("(a*\\Z)\\1", "a", 1, 1);
633
+ x2(".(a*\\Z)\\1", "ba", 1, 2);
634
+ x3("(.(abc)\\2)", "zabcabc", 0, 7, 1);
635
+ x3("(.(..\\d.)\\2)", "z12341234", 0, 9, 1);
636
+ x2("((?i:az))\\1", "AzAz", 0, 4);
637
+ n("((?i:az))\\1", "Azaz");
638
+ x2("(?<=a)b", "ab", 1, 2);
639
+ n("(?<=a)b", "bb");
640
+ x2("(?<=a|b)b", "bb", 1, 2);
641
+ x2("(?<=a|bc)b", "bcb", 2, 3);
642
+ x2("(?<=a|bc)b", "ab", 1, 2);
643
+ x2("(?<=a|bc||defghij|klmnopq|r)z", "rz", 1, 2);
644
+ x2("(a)\\g<1>", "aa", 0, 2);
645
+ x2("(?<!a)b", "cb", 1, 2);
646
+ n("(?<!a)b", "ab");
647
+ x2("(?<!a|bc)b", "bbb", 0, 1);
648
+ n("(?<!a|bc)z", "bcz");
649
+ x2("(?<name1>a)", "a", 0, 1);
650
+ x2("(?<name_2>ab)\\g<name_2>", "abab", 0, 4);
651
+ x2("(?<name_3>.zv.)\\k<name_3>", "azvbazvb", 0, 8);
652
+ x2("(?<=\\g<ab>)|-\\zEND (?<ab>XyZ)", "XyZ", 3, 3);
653
+ x2("(?<n>|a\\g<n>)+", "", 0, 0);
654
+ x2("(?<n>|\\(\\g<n>\\))+$", "()(())", 0, 6);
655
+ x3("\\g<n>(?<n>.){0}", "X", 0, 1, 1);
656
+ x2("\\g<n>(abc|df(?<n>.YZ){2,8}){0}", "XYZ", 0, 3);
657
+ x2("\\A(?<n>(a\\g<n>)|)\\z", "aaaa", 0, 4);
658
+ x2("(?<n>|\\g<m>\\g<n>)\\z|\\zEND (?<m>a|(b)\\g<m>)", "bbbbabba", 0, 8);
659
+ x2("(?<name1240>\\w+\\sx)a+\\k<name1240>", " fg xaaaaaaaafg x", 2, 18);
660
+ x3("(z)()()(?<_9>a)\\g<_9>", "zaa", 2, 3, 1);
661
+ x2("(.)(((?<_>a)))\\k<_>", "zaa", 0, 3);
662
+ x2("((?<name1>\\d)|(?<name2>\\w))(\\k<name1>|\\k<name2>)", "ff", 0, 2);
663
+ x2("(?:(?<x>)|(?<x>efg))\\k<x>", "", 0, 0);
664
+ x2("(?:(?<x>abc)|(?<x>efg))\\k<x>", "abcefgefg", 3, 9);
665
+ n("(?:(?<x>abc)|(?<x>efg))\\k<x>", "abcefg");
666
+ x2("(?:(?<n1>.)|(?<n1>..)|(?<n1>...)|(?<n1>....)|(?<n1>.....)|(?<n1>......)|(?<n1>.......)|(?<n1>........)|(?<n1>.........)|(?<n1>..........)|(?<n1>...........)|(?<n1>............)|(?<n1>.............)|(?<n1>..............))\\k<n1>$", "a-pyumpyum", 2, 10);
667
+ x3("(?:(?<n1>.)|(?<n1>..)|(?<n1>...)|(?<n1>....)|(?<n1>.....)|(?<n1>......)|(?<n1>.......)|(?<n1>........)|(?<n1>.........)|(?<n1>..........)|(?<n1>...........)|(?<n1>............)|(?<n1>.............)|(?<n1>..............))\\k<n1>$", "xxxxabcdefghijklmnabcdefghijklmn", 4, 18, 14);
668
+ x3("(?<name1>)(?<name2>)(?<name3>)(?<name4>)(?<name5>)(?<name6>)(?<name7>)(?<name8>)(?<name9>)(?<name10>)(?<name11>)(?<name12>)(?<name13>)(?<name14>)(?<name15>)(?<name16>aaa)(?<name17>)$", "aaa", 0, 3, 16);
669
+ x2("(?<foo>a|\\(\\g<foo>\\))", "a", 0, 1);
670
+ x2("(?<foo>a|\\(\\g<foo>\\))", "((((((a))))))", 0, 13);
671
+ x3("(?<foo>a|\\(\\g<foo>\\))", "((((((((a))))))))", 0, 17, 1);
672
+ x2("\\g<bar>|\\zEND(?<bar>.*abc$)", "abcxxxabc", 0, 9);
673
+ x2("\\g<1>|\\zEND(.a.)", "bac", 0, 3);
674
+ x3("\\g<_A>\\g<_A>|\\zEND(.a.)(?<_A>.b.)", "xbxyby", 3, 6, 1);
675
+ x2("\\A(?:\\g<pon>|\\g<pan>|\\zEND (?<pan>a|c\\g<pon>c)(?<pon>b|d\\g<pan>d))$", "cdcbcdc", 0, 7);
676
+ x2("\\A(?<n>|a\\g<m>)\\z|\\zEND (?<m>\\g<n>)", "aaaa", 0, 4);
677
+ x2("(?<n>(a|b\\g<n>c){3,5})", "baaaaca", 1, 5);
678
+ x2("(?<n>(a|b\\g<n>c){3,5})", "baaaacaaaaa", 0, 10);
679
+ x2("(?<pare>\\(([^\\(\\)]++|\\g<pare>)*+\\))", "((a))", 0, 5);
680
+ x2("()*\\1", "", 0, 0);
681
+ x2("(?:()|())*\\1\\2", "", 0, 0);
682
+ x3("(?:\\1a|())*", "a", 0, 0, 1);
683
+ x2("x((.)*)*x", "0x1x2x3", 1, 6);
684
+ x2("x((.)*)*x(?i:\\1)\\Z", "0x1x2x1X2", 1, 9);
685
+ x2("(?:()|()|()|()|()|())*\\2\\5", "", 0, 0);
686
+ x2("(?:()|()|()|(x)|()|())*\\2b\\5", "b", 0, 1);
687
+ if onig_encoding == onig.ONIG_ENCODING_UTF16_LE:
688
+ x2("\\xFA\\x8F", "\u8ffa", 0, 1);
689
+ elif onig_encoding == onig.ONIG_ENCODING_UTF16_BE:
690
+ x2("\\x8F\\xFA", "\u8ffa", 0, 1);
691
+ elif onig_encoding == onig.ONIG_ENCODING_UTF32_LE:
692
+ x2("\\xFA\\x8F\\x00\\x00", "\u8ffa", 0, 1);
693
+ elif onig_encoding == onig.ONIG_ENCODING_UTF32_BE:
694
+ x2("\\x00\\x00\\x8F\\xFA", "\u8ffa", 0, 1);
695
+ elif onig_encoding == onig.ONIG_ENCODING_UTF8:
696
+ x2("\\xE8\\xBF\\xBA", "\u8ffa", 0, 1);
697
+ elif onig_encoding == onig.ONIG_ENCODING_SJIS:
698
+ x2("\\xE7\\x92", "\u8ffa", 0, 1);
699
+ elif onig_encoding == onig.ONIG_ENCODING_EUC_JP:
700
+ x2("\\xED\\xF2", "\u8ffa", 0, 1); # "迺"
701
+ x2("", "あ", 0, 0);
702
+ x2("あ", "あ", 0, 1);
703
+ n("い", "あ");
704
+ x2("うう", "うう", 0, 2);
705
+ x2("あいう", "あいう", 0, 3);
706
+ x2("こここここここここここここここここここここここここここここここここここ", "こここここここここここここここここここここここここここここここここここ", 0, 35);
707
+ x2("あ", "いあ", 1, 2);
708
+ x2("いう", "あいう", 1, 3);
709
+ # x2(b"\\xca\\xb8", b"\xca\xb8", 0, 2); # "文"
710
+ x2(".", "あ", 0, 1);
711
+ x2("..", "かき", 0, 2);
712
+ x2("\\w", "お", 0, 1);
713
+ n("\\W", "あ");
714
+ x2("[\\W]", "う$", 1, 2);
715
+ x2("\\S", "そ", 0, 1);
716
+ x2("\\S", "漢", 0, 1);
717
+ x2("\\b", "気 ", 0, 0);
718
+ x2("\\b", " ほ", 1, 1);
719
+ x2("\\B", "せそ ", 1, 1);
720
+ x2("\\B", "う ", 2, 2);
721
+ x2("\\B", " い", 0, 0);
722
+ x2("[たち]", "ち", 0, 1);
723
+ n("[なに]", "ぬ");
724
+ x2("[う-お]", "え", 0, 1);
725
+ n("[^け]", "け");
726
+ x2("[\\w]", "ね", 0, 1);
727
+ n("[\\d]", "ふ");
728
+ x2("[\\D]", "は", 0, 1);
729
+ n("[\\s]", "く");
730
+ x2("[\\S]", "へ", 0, 1);
731
+ x2("[\\w\\d]", "よ", 0, 1);
732
+ x2("[\\w\\d]", " よ", 3, 4);
733
+ n("\\w鬼車", " 鬼車");
734
+ x2("鬼\\W車", "鬼 車", 0, 3);
735
+ x2("あ.い.う", "ああいいう", 0, 5);
736
+ x2(".\\wう\\W..ぞ", "えうう うぞぞ", 0, 7);
737
+ x2("\\s\\wこここ", " ここここ", 0, 5);
738
+ x2("ああ.け", "ああけけ", 0, 4);
739
+ n(".い", "いえ");
740
+ x2(".お", "おお", 0, 2);
741
+ x2("^あ", "あ", 0, 1);
742
+ x2("^む$", "む", 0, 1);
743
+ x2("^\\w$", "に", 0, 1);
744
+ x2("^\\wかきくけこ$", "zかきくけこ", 0, 6);
745
+ x2("^\\w...うえお$", "zあいううえお", 0, 7);
746
+ x2("\\w\\w\\s\\Wおおお\\d", "aお おおお4", 0, 8);
747
+ x2("\\Aたちつ", "たちつ", 0, 3);
748
+ x2("むめも\\Z", "むめも", 0, 3);
749
+ x2("かきく\\z", "かきく", 0, 3);
750
+ x2("かきく\\Z", "かきく\n", 0, 3);
751
+ x2("\\Gぽぴ", "ぽぴ", 0, 2);
752
+ n("\\Gえ", "うえお");
753
+ n("とて\\G", "とて");
754
+ n("まみ\\A", "まみ");
755
+ n("ま\\Aみ", "まみ");
756
+ x2("(?=せ)せ", "せ", 0, 1);
757
+ n("(?=う).", "い");
758
+ x2("(?!う)か", "か", 0, 1);
759
+ n("(?!と)あ", "と");
760
+ x2("(?i:あ)", "あ", 0, 1);
761
+ x2("(?i:ぶべ)", "ぶべ", 0, 2);
762
+ n("(?i:い)", "う");
763
+ x2("(?m:よ.)", "よ\n", 0, 2);
764
+ x2("(?m:.め)", "ま\nめ", 1, 3);
765
+ x2("あ?", "", 0, 0);
766
+ x2("変?", "化", 0, 0);
767
+ x2("変?", "変", 0, 1);
768
+ x2("量*", "", 0, 0);
769
+ x2("量*", "量", 0, 1);
770
+ x2("子*", "子子子", 0, 3);
771
+ x2("馬*", "鹿馬馬馬馬", 0, 0);
772
+ n("山+", "");
773
+ x2("河+", "河", 0, 1);
774
+ x2("時+", "時時時時", 0, 4);
775
+ x2("え+", "ええううう", 0, 2);
776
+ x2("う+", "おうううう", 1, 5);
777
+ x2(".?", "た", 0, 1);
778
+ x2(".*", "ぱぴぷぺ", 0, 4);
779
+ x2(".+", "ろ", 0, 1);
780
+ x2(".+", "いうえか\n", 0, 4);
781
+ x2("あ|い", "あ", 0, 1);
782
+ x2("あ|い", "い", 0, 1);
783
+ x2("あい|いう", "あい", 0, 2);
784
+ x2("あい|いう", "いう", 0, 2);
785
+ x2("を(?:かき|きく)", "をかき", 0, 3);
786
+ x2("を(?:かき|きく)け", "をきくけ", 0, 4);
787
+ x2("あい|(?:あう|あを)", "あを", 0, 2);
788
+ x2("あ|い|う", "えう", 1, 2);
789
+ x2("あ|い|うえ|おかき|く|けこさ|しすせ|そ|たち|つてとなに|ぬね", "しすせ", 0, 3);
790
+ n("あ|い|うえ|おかき|く|けこさ|しすせ|そ|たち|つてとなに|ぬね", "すせ");
791
+ x2("あ|^わ", "ぶあ", 1, 2);
792
+ x2("あ|^を", "をあ", 0, 1);
793
+ x2("鬼|\\G車", "け車鬼", 2, 3);
794
+ x2("鬼|\\G車", "車鬼", 0, 1);
795
+ x2("鬼|\\A車", "b車鬼", 2, 3);
796
+ x2("鬼|\\A車", "車", 0, 1);
797
+ x2("鬼|車\\Z", "車鬼", 1, 2);
798
+ x2("鬼|車\\Z", "車", 0, 1);
799
+ x2("鬼|車\\Z", "車\n", 0, 1);
800
+ x2("鬼|車\\z", "車鬼", 1, 2);
801
+ x2("鬼|車\\z", "車", 0, 1);
802
+ x2("\\w|\\s", "お", 0, 1);
803
+ x2("\\w|%", "%お", 0, 1);
804
+ x2("\\w|[&$]", "う&", 0, 1);
805
+ x2("[い-け]", "う", 0, 1);
806
+ x2("[い-け]|[^か-こ]", "あ", 0, 1);
807
+ x2("[い-け]|[^か-こ]", "か", 0, 1);
808
+ x2("[^あ]", "\n", 0, 1);
809
+ x2("(?:あ|[う-き])|いを", "うを", 0, 1);
810
+ x2("(?:あ|[う-き])|いを", "いを", 0, 2);
811
+ x2("あいう|(?=けけ)..ほ", "けけほ", 0, 3);
812
+ x2("あいう|(?!けけ)..ほ", "あいほ", 0, 3);
813
+ x2("(?=をあ)..あ|(?=をを)..あ", "ををあ", 0, 3);
814
+ x2("(?<=あ|いう)い", "いうい", 2, 3);
815
+ n("(?>あ|あいえ)う", "あいえう");
816
+ x2("(?>あいえ|あ)う", "あいえう", 0, 4);
817
+ x2("あ?|い", "あ", 0, 1);
818
+ x2("あ?|い", "い", 0, 0);
819
+ x2("あ?|い", "", 0, 0);
820
+ x2("あ*|い", "ああ", 0, 2);
821
+ x2("あ*|い*", "いあ", 0, 0);
822
+ x2("あ*|い*", "あい", 0, 1);
823
+ x2("[aあ]*|い*", "aあいいい", 0, 2);
824
+ x2("あ+|い*", "", 0, 0);
825
+ x2("あ+|い*", "いいい", 0, 3);
826
+ x2("あ+|い*", "あいいい", 0, 1);
827
+ x2("あ+|い*", "aあいいい", 0, 0);
828
+ n("あ+|い+", "");
829
+ x2("(あ|い)?", "い", 0, 1);
830
+ x2("(あ|い)*", "いあ", 0, 2);
831
+ x2("(あ|い)+", "いあい", 0, 3);
832
+ x2("(あい|うあ)+", "うああいうえ", 0, 4);
833
+ x2("(あい|うえ)+", "うああいうえ", 2, 6);
834
+ x2("(あい|うあ)+", "ああいうあ", 1, 5);
835
+ x2("(あい|うあ)+", "あいをうあ", 0, 2);
836
+ x2("(あい|うあ)+", "$$zzzzあいをうあ", 6, 8);
837
+ x2("(あ|いあい)+", "あいあいあ", 0, 5);
838
+ x2("(あ|いあい)+", "いあ", 1, 2);
839
+ x2("(あ|いあい)+", "いあああいあ", 1, 4);
840
+ x2("(?:あ|い)(?:あ|い)", "あい", 0, 2);
841
+ x2("(?:あ*|い*)(?:あ*|い*)", "あああいいい", 0, 3);
842
+ x2("(?:あ*|い*)(?:あ+|い+)", "あああいいい", 0, 6);
843
+ x2("(?:あ+|い+){2}", "あああいいい", 0, 6);
844
+ x2("(?:あ+|い+){1,2}", "あああいいい", 0, 6);
845
+ x2("(?:あ+|\\Aい*)うう", "うう", 0, 2);
846
+ n("(?:あ+|\\Aい*)うう", "あいうう");
847
+ x2("(?:^あ+|い+)*う", "ああいいいあいう", 6, 8);
848
+ x2("(?:^あ+|い+)*う", "ああいいいいう", 0, 7);
849
+ x2("う{0,}", "うううう", 0, 4);
850
+ x2("あ|(?i)c", "C", 0, 1);
851
+ x2("(?i)c|あ", "C", 0, 1);
852
+ x2("(?i:あ)|a", "a", 0, 1);
853
+ n("(?i:あ)|a", "A");
854
+ x2("[あいう]?", "あいう", 0, 1);
855
+ x2("[あいう]*", "あいう", 0, 3);
856
+ x2("[^あいう]*", "あいう", 0, 0);
857
+ n("[^あいう]+", "あいう");
858
+ x2("あ??", "あああ", 0, 0);
859
+ x2("いあ??い", "いあい", 0, 3);
860
+ x2("あ*?", "あああ", 0, 0);
861
+ x2("いあ*?", "いああ", 0, 1);
862
+ x2("いあ*?い", "いああい", 0, 4);
863
+ x2("あ+?", "あああ", 0, 1);
864
+ x2("いあ+?", "いああ", 0, 2);
865
+ x2("いあ+?い", "いああい", 0, 4);
866
+ x2("(?:天?)??", "天", 0, 0);
867
+ x2("(?:天??)?", "天", 0, 0);
868
+ x2("(?:夢?)+?", "夢夢夢", 0, 1);
869
+ x2("(?:風+)??", "風風風", 0, 0);
870
+ x2("(?:雪+)??霜", "雪雪雪霜", 0, 4);
871
+ x2("(?:あい)?{2}", "", 0, 0);
872
+ x2("(?:鬼車)?{2}", "鬼車鬼車鬼", 0, 4);
873
+ x2("(?:鬼車)*{0}", "鬼車鬼車鬼", 0, 0);
874
+ x2("(?:鬼車){3,}", "鬼車鬼車鬼車鬼車", 0, 8);
875
+ n("(?:鬼車){3,}", "鬼車鬼車");
876
+ x2("(?:鬼車){2,4}", "鬼車鬼車鬼車", 0, 6);
877
+ x2("(?:鬼車){2,4}", "鬼車鬼車鬼車鬼車鬼車", 0, 8);
878
+ x2("(?:鬼車){2,4}?", "鬼車鬼車鬼車鬼車鬼車", 0, 4);
879
+ x2("(?:鬼車){,}", "鬼車{,}", 0, 5);
880
+ x2("(?:かきく)+?{2}", "かきくかきくかきく", 0, 6);
881
+ x3("(火)", "火", 0, 1, 1);
882
+ x3("(火水)", "火水", 0, 2, 1);
883
+ x2("((時間))", "時間", 0, 2);
884
+ x3("((風水))", "風水", 0, 2, 1);
885
+ x3("((昨日))", "昨日", 0, 2, 2);
886
+ x3("((((((((((((((((((((量子))))))))))))))))))))", "量子", 0, 2, 20);
887
+ x3("(あい)(うえ)", "あいうえ", 0, 2, 1);
888
+ x3("(あい)(うえ)", "あいうえ", 2, 4, 2);
889
+ x3("()(あ)いう(えおか)きくけこ", "あいうえおかきくけこ", 3, 6, 3);
890
+ x3("(()(あ)いう(えおか)きくけこ)", "あいうえおかきくけこ", 3, 6, 4);
891
+ x3(".*(フォ)ン・マ(ン()シュタ)イン", "フォン・マンシュタイン", 5, 9, 2);
892
+ x2("(^あ)", "あ", 0, 1);
893
+ x3("(あ)|(あ)", "いあ", 1, 2, 1);
894
+ x3("(^あ)|(あ)", "いあ", 1, 2, 2);
895
+ x3("(あ?)", "あああ", 0, 1, 1);
896
+ x3("(ま*)", "ままま", 0, 3, 1);
897
+ x3("(と*)", "", 0, 0, 1);
898
+ x3("(る+)", "るるるるるるる", 0, 7, 1);
899
+ x3("(ふ+|へ*)", "ふふふへへ", 0, 3, 1);
900
+ x3("(あ+|い?)", "いいいああ", 0, 1, 1);
901
+ x3("(あいう)?", "あいう", 0, 3, 1);
902
+ x3("(あいう)*", "あいう", 0, 3, 1);
903
+ x3("(あいう)+", "あいう", 0, 3, 1);
904
+ x3("(さしす|あいう)+", "あいう", 0, 3, 1);
905
+ x3("([なにぬ][かきく]|かきく)+", "かきく", 0, 3, 1);
906
+ x3("((?i:あいう))", "あいう", 0, 3, 1);
907
+ x3("((?m:あ.う))", "あ\nう", 0, 3, 1);
908
+ x3("((?=あん)あ)", "あんい", 0, 1, 1);
909
+ x3("あいう|(.あいえ)", "んあいえ", 0, 4, 1);
910
+ x3("あ*(.)", "ああああん", 4, 5, 1);
911
+ x3("あ*?(.)", "ああああん", 0, 1, 1);
912
+ x3("あ*?(ん)", "ああああん", 4, 5, 1);
913
+ x3("[いうえ]あ*(.)", "えああああん", 5, 6, 1);
914
+ x3("(\\Aいい)うう", "いいうう", 0, 2, 1);
915
+ n("(\\Aいい)うう", "んいいうう");
916
+ x3("(^いい)うう", "いいうう", 0, 2, 1);
917
+ n("(^いい)うう", "んいいうう");
918
+ x3("ろろ(るる$)", "ろろるる", 2, 4, 1);
919
+ n("ろろ(るる$)", "ろろるるる");
920
+ x2("(無)\\1", "無無", 0, 2);
921
+ n("(無)\\1", "無武");
922
+ x2("(空?)\\1", "空空", 0, 2);
923
+ x2("(空??)\\1", "空空", 0, 0);
924
+ x2("(空*)\\1", "空空空空空", 0, 4);
925
+ x3("(空*)\\1", "空空空空空", 0, 2, 1);
926
+ x2("あ(い*)\\1", "あいいいい", 0, 5);
927
+ x2("あ(い*)\\1", "あい", 0, 1);
928
+ x2("(あ*)(い*)\\1\\2", "あああいいあああいい", 0, 10);
929
+ x2("(あ*)(い*)\\2", "あああいいいい", 0, 7);
930
+ x3("(あ*)(い*)\\2", "あああいいいい", 3, 5, 2);
931
+ x2("(((((((ぽ*)ぺ))))))ぴ\\7", "ぽぽぽぺぴぽぽぽ", 0, 8);
932
+ x3("(((((((ぽ*)ぺ))))))ぴ\\7", "ぽぽぽぺぴぽぽぽ", 0, 3, 7);
933
+ x2("(は)(ひ)(ふ)\\2\\1\\3", "はひふひはふ", 0, 6);
934
+ x2("([き-け])\\1", "くく", 0, 2);
935
+ x2("(\\w\\d\\s)\\1", "あ5 あ5 ", 0, 6);
936
+ n("(\\w\\d\\s)\\1", "あ5 あ5");
937
+ x2("(誰?|[あ-う]{3})\\1", "誰?誰?", 0, 4);
938
+ x2("...(誰?|[あ-う]{3})\\1", "あaあ誰?誰?", 0, 7);
939
+ x2("(誰?|[あ-う]{3})\\1", "ういうういう", 0, 6);
940
+ x2("(^こ)\\1", "ここ", 0, 2);
941
+ n("(^む)\\1", "めむむ");
942
+ n("(あ$)\\1", "ああ");
943
+ n("(あい\\Z)\\1", "あい");
944
+ x2("(あ*\\Z)\\1", "あ", 1, 1);
945
+ x2(".(あ*\\Z)\\1", "いあ", 1, 2);
946
+ x3("(.(やいゆ)\\2)", "zやいゆやいゆ", 0, 7, 1);
947
+ x3("(.(..\\d.)\\2)", "あ12341234", 0, 9, 1);
948
+ x2("((?i:あvず))\\1", "あvずあvず", 0, 6);
949
+ x2("(?<愚か>変|\\(\\g<愚か>\\))", "((((((変))))))", 0, 13);
950
+ x2("\\A(?:\\g<阿_1>|\\g<云_2>|\\z終了 (?<阿_1>観|自\\g<云_2>自)(?<云_2>在|菩薩\\g<阿_1>菩薩))$", "菩薩自菩薩自在自菩薩自菩薩", 0, 13);
951
+ x2("[[ひふ]]", "ふ", 0, 1);
952
+ x2("[[いおう]か]", "か", 0, 1);
953
+ n("[[^あ]]", "あ");
954
+ n("[^[あ]]", "あ");
955
+ x2("[^[^あ]]", "あ", 0, 1);
956
+ x2("[[かきく]&&きく]", "く", 0, 1);
957
+ n("[[かきく]&&きく]", "か");
958
+ n("[[かきく]&&きく]", "け");
959
+ x2("[あ-ん&&い-を&&う-ゑ]", "ゑ", 0, 1);
960
+ n("[^あ-ん&&い-を&&う-ゑ]", "ゑ");
961
+ x2("[[^あ&&あ]&&あ-ん]", "い", 0, 1);
962
+ n("[[^あ&&あ]&&あ-ん]", "あ");
963
+ x2("[[^あ-ん&&いうえお]&&[^う-か]]", "き", 0, 1);
964
+ n("[[^あ-ん&&いうえお]&&[^う-か]]", "い");
965
+ x2("[^[^あいう]&&[^うえお]]", "う", 0, 1);
966
+ x2("[^[^あいう]&&[^うえお]]", "え", 0, 1);
967
+ n("[^[^あいう]&&[^うえお]]", "か");
968
+ x2("[あ-&&-あ]", "-", 0, 1);
969
+ x2("[^[^a-zあいう]&&[^bcdefgうえお]q-w]", "え", 0, 1);
970
+ x2("[^[^a-zあいう]&&[^bcdefgうえお]g-w]", "f", 0, 1);
971
+ x2("[^[^a-zあいう]&&[^bcdefgうえお]g-w]", "g", 0, 1);
972
+ n("[^[^a-zあいう]&&[^bcdefgうえお]g-w]", "2");
973
+ x2("a<b>バージョンのダウンロード<\\/b>", "a<b>バージョンのダウンロード</b>", 0, 20);
974
+ x2(".<b>バージョンのダウンロード<\\/b>", "a<b>バージョンのダウンロード</b>", 0, 20);
975
+
976
+
977
+ # additional test patterns
978
+ if is_unicode_encoding(onig_encoding):
979
+ x2("\\x{3042}\\x{3044}", "あい", 0, 2)
980
+ elif onig_encoding == onig.ONIG_ENCODING_SJIS:
981
+ x2("\\x{82a0}\\x{82A2}", "あい", 0, 2)
982
+ elif onig_encoding == onig.ONIG_ENCODING_EUC_JP:
983
+ x2("\\x{a4a2}\\x{A4A4}", "あい", 0, 2)
984
+ x2("\\p{Hiragana}\\p{Katakana}", "あイ", 0, 2)
985
+ x2("(?m)^A.B$", "X\nA\nB\nZ", 2, 5)
986
+ n("(?<!(?<=a)b|c)d", "abd")
987
+ n("(?<!(?<=a)b|c)d", "cd")
988
+ x2("(?<!(?<=a)b|c)d", "bd", 1, 2)
989
+ x2("(a){2}z", "aaz", 0, 3)
990
+ x2("(?<=a).*b", "aab", 1, 3)
991
+ x2("(?!a).*b", "ab", 1, 2)
992
+ x2("(?<=(?<!A)B)C", "BBC", 2, 3)
993
+ n("(?<=(?<!A)B)C", "ABC")
994
+ n("(?i)(?<!aa|b)c", "Aac")
995
+ n("(?i)(?<!b|aa)c", "Aac")
996
+ x2("(?<=\\babc)d", " abcd", 4, 5)
997
+ x2("(?<=\\Babc)d", "aabcd", 4, 5)
998
+ x2("a\\b?a", "aa", 0, 2)
999
+ x2("[^x]*x", "aaax", 0, 4)
1000
+ x2("(?i)[\\x{0}-B]+", "\x00\x01\x02\x1f\x20@AaBbC", 0, 10)
1001
+ x2("(?i)a{2}", "AA", 0, 2)
1002
+ if is_unicode_encoding(onig_encoding):
1003
+ # The longest script name
1004
+ x2("\\p{Other_Default_Ignorable_Code_Point}+", "\u034F\uFFF8\U000E0FFF", 0, 3)
1005
+ # The longest block name
1006
+ x2("\\p{In_Unified_Canadian_Aboriginal_Syllabics_Extended}+", "\u18B0\u18FF", 0, 2)
1007
+ # Unicode case fold
1008
+ x2("(?i)\u1ffc", "\u2126\u1fbe", 0, 2)
1009
+ x2("(?i)\u1ffc", "\u1ff3", 0, 1)
1010
+ x2("(?i)\u0390", "\u03b9\u0308\u0301", 0, 3)
1011
+ x2("(?i)\u03b9\u0308\u0301", "\u0390", 0, 1)
1012
+ x2("(?i)ff", "\ufb00", 0, 1)
1013
+ x2("(?i)\ufb01", "fi", 0, 2)
1014
+ x2("(?i)\u0149\u0149", "\u0149\u0149", 0, 2)
1015
+ # Other Unicode tests
1016
+ x2("\\x{25771}", "\U00025771", 0, 1)
1017
+ x2("[0-9-a]+", " 0123456789-a ", 1, 13) # same as [0-9\-a]
1018
+ x2("[0-9-\\s]+", " 0123456789-a ", 0, 12) # same as [0-9\-\s]
1019
+ x2("(?i:a) B", "a B", 0, 3);
1020
+ x2("(?i:a )B", "a B", 0, 3);
1021
+ x2("B (?i:a)", "B a", 0, 3);
1022
+ x2("B(?i: a)", "B a", 0, 3);
1023
+ if is_unicode_encoding(onig_encoding):
1024
+ x2("(?a)[\\p{Space}\\d]", "\u00a0", 0, 1)
1025
+ x2("(?a)[\\d\\p{Space}]", "\u00a0", 0, 1)
1026
+ n("(?a)[^\\p{Space}\\d]", "\u00a0")
1027
+ n("(?a)[^\\d\\p{Space}]", "\u00a0")
1028
+ x2("(?d)[[:space:]\\d]", "\u00a0", 0, 1)
1029
+ n("(?d)[^\\d[:space:]]", "\u00a0")
1030
+ n("x.*?\\Z$", "x\ny")
1031
+ n("x.*?\\Z$", "x\r\ny")
1032
+ x2("x.*?\\Z$", "x\n", 0, 1)
1033
+ x2("x.*?\\Z$", "x\r\n", 0, 2) # \Z will match between \r and \n, if
1034
+ # ONIG_OPTION_NEWLINE_CRLF isn't specified.
1035
+ x2("(?<=fo).*", "foo", 2, 3)
1036
+ x2("(?m)(?<=fo).*", "foo", 2, 3)
1037
+ x2("(?m)(?<=fo).+", "foo", 2, 3)
1038
+ x2("\\n?\\z", "hello", 5, 5)
1039
+ x2("\\z", "hello", 5, 5)
1040
+ x2("\\n?\\z", "こんにちは", 5, 5)
1041
+ x2("\\z", "こんにちは", 5, 5)
1042
+ x2("()" * 32767, "", 0, 0) # Issue #24
1043
+ x2("\\h+ \\H+", " 0123456789aBcDeF gh", 1, 20)
1044
+ x2("[\\h]+ [\\H]+", " 0123456789aBcDeF gh", 1, 20)
1045
+ x2("\\A(|.|(?:(.)\\g<1>\\k<2+0>))\\z", "reer", 0, 4)
1046
+ x2("\\A(?<a>|.|(?:(?<b>.)\\g<a>\\k<b+0>))\\z", "reer", 0, 4)
1047
+ x2(''' # Extended pattern
1048
+ (?<element> \g<stag> \g<content>* \g<etag> ){0}
1049
+ (?<stag> < \g<name> \s* > ){0}
1050
+ (?<name> [a-zA-Z_:]+ ){0}
1051
+ (?<content> [^<&]+ (\g<element> | [^<&]+)* ){0}
1052
+ (?<etag> </ \k<name+1> >){0}
1053
+ \g<element>''',
1054
+ "<foo>f<bar>bbb</bar>f</foo>", 0, 27, opt=onig.ONIG_OPTION_EXTEND)
1055
+ x2("\\p{Print}+", "\n a", 1, 3)
1056
+ x2("\\p{Graph}+", "\n a", 2, 3)
1057
+ n("a(?!b)", "ab");
1058
+ x2("(?:(.)\\1)*", "a" * 300, 0, 300)
1059
+ x2("\\cA\\C-B\\a[\\b]\\t\\n\\v\\f\\r\\e\\c?", "\x01\x02\x07\x08\x09\x0a\x0b\x0c\x0d\x1b\x7f", 0, 11)
1060
+ x2("(?<=(?:[a-z]|\\w){3})x", "ab1x", 3, 4) # repeat inside look-behind
1061
+ x2("(?<n>(a|b\\g<n>c){3,5}?)", "baaaaca", 1, 4)
1062
+ x2("\\p{WoRd}", "a", 0, 1) # property name is not case sensitive
1063
+ n("[[:WoRd:]]", "a", err=onig.ONIGERR_INVALID_POSIX_BRACKET_TYPE) # POSIX bracket name is case sensitive
1064
+
1065
+ # ONIG_OPTION_FIND_LONGEST option
1066
+ x2("foo|foobar", "foobar", 0, 3)
1067
+ x2("foo|foobar", "foobar", 0, 6, opt=onig.ONIG_OPTION_FIND_LONGEST)
1068
+
1069
+ # character classes (tests for character class optimization)
1070
+ x2("[@][a]", "@a", 0, 2);
1071
+ x2(".*[a][b][c][d][e]", "abcde", 0, 5);
1072
+ x2("(?i)[A\\x{41}]", "a", 0, 1);
1073
+ x2("[abA]", "a", 0, 1);
1074
+ x2("[[ab]&&[ac]]+", "aaa", 0, 3);
1075
+ x2("[[あい]&&[あう]]+", "あああ", 0, 3);
1076
+
1077
+ # possessive quantifiers
1078
+ n("a?+a", "a")
1079
+ n("a*+a", "aaaa")
1080
+ n("a++a", "aaaa")
1081
+ x2("a{2,3}+a", "aaa", 0, 3) # Not a possessive quantifier in Ruby,
1082
+ # same as "(?:a{2,3})+a"
1083
+ n("a{2,3}+a", "aaa", syn=onig.ONIG_SYNTAX_PERL)
1084
+
1085
+ # automatic possessification
1086
+ x2("\\w+\\W", "abc#", 0, 4)
1087
+ x2("[a-c]+\\W", "abc#", 0, 4)
1088
+ x2("[a-c#]+\\W", "abc#", 0, 4)
1089
+ x2("[^a-c]+\\W", "def#", 0, 4)
1090
+ x2("(?a)[^a-c]+\\W", "def#", 0, 4)
1091
+ x2("a+\\w", "aaaa", 0, 4)
1092
+ x2("#+\\w", "###a", 0, 4)
1093
+ x2("(?a)a+\\w", "aaaa", 0, 4)
1094
+ x2("(?a)あ+\\w", "あああa", 0, 4)
1095
+ x2("[a-c]+[d-f]", "abcd", 0, 4)
1096
+ x2("[^d-f]+[d-f]", "abcd", 0, 4)
1097
+ x2("[a-cあ]+[d-f]", "abcd", 0, 4)
1098
+
1099
+ # linebreak
1100
+ x2("\\R", "\n", 0, 1)
1101
+ x2("\\R", "\r", 0, 1)
1102
+ x2("\\R{3}", "\r\r\n\n", 0, 4)
1103
+
1104
+ if (is_unicode_encoding(onig_encoding)):
1105
+ x2("\\R", "\u0085", 0, 1)
1106
+ x2("\\R", "\u2028", 0, 1)
1107
+ x2("\\R", "\u2029", 0, 1)
1108
+
1109
+ # extended grapheme cluster
1110
+ x2("\\X{5}", "あいab\n", 0, 5)
1111
+ if is_unicode_encoding(onig_encoding):
1112
+ x2("\\X", "\u306F\u309A\n", 0, 2)
1113
+
1114
+ # keep
1115
+ x2("ab\\Kcd", "abcd", 2, 4)
1116
+ x2("ab\\Kc(\\Kd|z)", "abcd", 3, 4)
1117
+ x2("ab\\Kc(\\Kz|d)", "abcd", 2, 4)
1118
+ x2("(a\\K)*", "aaab", 3, 3)
1119
+ x3("(a\\K)*", "aaab", 2, 3, 1)
1120
+ # x2("a\\K?a", "aa", 0, 2) # error: differ from perl
1121
+ x2("ab(?=c\Kd)", "abcd", 2, 2) # This behaviour is currently not well defined. (see: perlre)
1122
+ x2("(?<=a\\Kb|aa)cd", "abcd", 1, 4) # This behaviour is currently not well defined. (see: perlre)
1123
+ x2("(?<=ab|a\\Ka)cd", "abcd", 2, 4) # This behaviour is currently not well defined. (see: perlre)
1124
+
1125
+ # named group and subroutine call
1126
+ x2("(?<name_2>ab)(?&name_2)", "abab", 0, 4, syn=onig.ONIG_SYNTAX_PERL);
1127
+ x2("(?<name_2>ab)(?1)", "abab", 0, 4, syn=onig.ONIG_SYNTAX_PERL);
1128
+ x2("(?'n'|\\((?&n)\\))+$", "()(())", 0, 6, syn=onig.ONIG_SYNTAX_PERL);
1129
+ x2("(a|x(?-1)x)", "xax", 0, 3, syn=onig.ONIG_SYNTAX_PERL);
1130
+ x2("(a|(x(?-2)x))", "xax", 0, 3, syn=onig.ONIG_SYNTAX_PERL);
1131
+ x2("a|x(?0)x", "xax", 0, 3, syn=onig.ONIG_SYNTAX_PERL);
1132
+ x2("a|x(?R)x", "xax", 0, 3, syn=onig.ONIG_SYNTAX_PERL);
1133
+ x2("(a|x\g<0>x)", "xax", 0, 3);
1134
+ x2("(a|x\g'0'x)", "xax", 0, 3);
1135
+ x2("(?-i:(?+1))(?i:(a)){0}", "A", 0, 1, syn=onig.ONIG_SYNTAX_PERL);
1136
+ x2("(?-i:\g<+1>)(?i:(a)){0}", "A", 0, 1);
1137
+ x2("(?-i:\g'+1')(?i:(a)){0}", "A", 0, 1);
1138
+ n("(.(?=\\g<1>))", "", err=onig.ONIGERR_NEVER_ENDING_RECURSION)
1139
+ n("(a)(?<n>b)\\g<1>\\g<n>", "abab", err=onig.ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED)
1140
+ x2("(a)(?<n>b)(?1)(?&n)", "abab", 0, 4, syn=onig.ONIG_SYNTAX_PERL)
1141
+
1142
+ # character set modifiers
1143
+ x2("(?u)\\w+", "あa#", 0, 2);
1144
+ x2("(?a)\\w+", "あa#", 1, 2);
1145
+ x2("(?u)\\W+", "あa#", 2, 3);
1146
+ x2("(?a)\\W+", "あa#", 0, 1);
1147
+
1148
+ x2("(?a)\\b", "あa", 1, 1);
1149
+ x2("(?a)\\w\\b", "aあ", 0, 1);
1150
+ x2("(?a)\\B", "a ああ ", 2, 2);
1151
+
1152
+ x2("(?u)\\B", "あ ", 2, 2);
1153
+ x2("(?a)\\B", "あ ", 0, 0);
1154
+ x2("(?a)\\B", "aあ ", 2, 2);
1155
+
1156
+ x2("(?a)a\\b", " a", 1, 2)
1157
+ x2("(?u)a\\b", " a", 1, 2)
1158
+ n("(?a)a\\B", " a")
1159
+ n("(?a)あ\\b", " あ")
1160
+ x2("(?u)あ\\b", " あ", 1, 2)
1161
+ x2("(?a)あ\\B", " あ", 1, 2)
1162
+ n("(?u)あ\\B", " あ")
1163
+
1164
+ x2("(?a)\\p{Alpha}\\P{Alpha}", "a。", 0, 2);
1165
+ x2("(?u)\\p{Alpha}\\P{Alpha}", "a。", 0, 2);
1166
+ x2("(?a)[[:word:]]+", "aあ", 0, 1);
1167
+ x2("(?a)[[:^word:]]+", "aあ", 1, 2);
1168
+ x2("(?u)[[:word:]]+", "aあ", 0, 2);
1169
+ n("(?u)[[:^word:]]+", "aあ");
1170
+
1171
+ x2("(?iu)\\p{lower}\\p{upper}", "Ab", 0, 2);
1172
+ x2("(?ia)\\p{lower}\\p{upper}", "Ab", 0, 2);
1173
+ x2("(?iu)[[:lower:]][[:upper:]]", "Ab", 0, 2);
1174
+ x2("(?ia)[[:lower:]][[:upper:]]", "Ab", 0, 2);
1175
+
1176
+ if is_unicode_encoding(onig_encoding):
1177
+ n("(?ia)\\w+", "\u212a\u017f"); # KELVIN SIGN, LATIN SMALL LETTER LONG S
1178
+ n("(?ia)[\\w]+", "\u212a\u017f");
1179
+ n("(?ia)[^\\W]+", "\u212a\u017f");
1180
+ x2("(?ia)[^\\W]+", "ks", 0, 2);
1181
+ n("(?iu)\\p{ASCII}", "\u212a");
1182
+ n("(?iu)\\P{ASCII}", "s");
1183
+ n("(?iu)[\\p{ASCII}]", "\u212a");
1184
+ n("(?iu)[\\P{ASCII}]", "s");
1185
+ n("(?ia)\\p{ASCII}", "\u212a");
1186
+ n("(?ia)\\P{ASCII}", "s");
1187
+ n("(?ia)[\\p{ASCII}]", "\u212a");
1188
+ n("(?ia)[\\P{ASCII}]", "s");
1189
+ x2("(?iu)[s]+", "Ss\u017f ", 0, 3);
1190
+ x2("(?ia)[s]+", "Ss\u017f ", 0, 3);
1191
+ x2("(?iu)[^s]+", "Ss\u017f ", 3, 4);
1192
+ x2("(?ia)[^s]+", "Ss\u017f ", 3, 4);
1193
+ x2("(?iu)[[:lower:]]", "\u017f", 0, 1);
1194
+ n("(?ia)[[:lower:]]", "\u017f");
1195
+ x2("(?u)[[:upper:]]", "\u212a", 0, 1);
1196
+ n("(?a)[[:upper:]]", "\u212a");
1197
+
1198
+ # \< and \>
1199
+ x2("\\<abc\\>", " abc ", 1, 4, syn=onig.ONIG_SYNTAX_GREP)
1200
+ n("\\<abc\\>", "zabc ", syn=onig.ONIG_SYNTAX_GREP)
1201
+ n("\\<abc\\>", " abcd", syn=onig.ONIG_SYNTAX_GREP)
1202
+ n("\\<abc\\>", "あabcい", syn=onig.ONIG_SYNTAX_GREP)
1203
+ x2("\\<abc\\>", "あabcい", 1, 4, syn=onig.ONIG_SYNTAX_GREP, opt=onig.ONIG_OPTION_ASCII_RANGE)
1204
+ n("\\<abc\\>", "zabcい", syn=onig.ONIG_SYNTAX_GREP, opt=onig.ONIG_OPTION_ASCII_RANGE)
1205
+ n("\\<abc\\>", "あabcd", syn=onig.ONIG_SYNTAX_GREP, opt=onig.ONIG_OPTION_ASCII_RANGE)
1206
+
1207
+ # \g{} backref
1208
+ x2("((?<name1>\\d)|(?<name2>\\w))(\\g{name1}|\\g{name2})", "ff", 0, 2, syn=onig.ONIG_SYNTAX_PERL);
1209
+ x2("(?:(?<x>)|(?<x>efg))\\g{x}", "", 0, 0, syn=onig.ONIG_SYNTAX_PERL);
1210
+ x2("(?:(?<x>abc)|(?<x>efg))\\g{x}", "abcefgefg", 3, 9, syn=onig.ONIG_SYNTAX_PERL);
1211
+ n("(?:(?<x>abc)|(?<x>efg))\\g{x}", "abcefg", syn=onig.ONIG_SYNTAX_PERL);
1212
+ x2("((.*)a\\g{2}f)", "bacbabf", 3, 7, syn=onig.ONIG_SYNTAX_PERL);
1213
+ x2("(.*)a\\g{1}f", "baczzzzzz\nbazz\nzzzzbabf", 19, 23, syn=onig.ONIG_SYNTAX_PERL);
1214
+ x2("((.*)a\\g{-1}f)", "bacbabf", 3, 7, syn=onig.ONIG_SYNTAX_PERL);
1215
+ x2("(.*)a\\g{-1}f", "baczzzzzz\nbazz\nzzzzbabf", 19, 23, syn=onig.ONIG_SYNTAX_PERL);
1216
+ x2("(あ*)(い*)\\g{-2}\\g{-1}", "あああいいあああいい", 0, 10, syn=onig.ONIG_SYNTAX_PERL);
1217
+
1218
+ # Python/PCRE compatible named group
1219
+ x2("(?P<name_2>ab)(?P>name_2)", "abab", 0, 4, syn=onig.ONIG_SYNTAX_PERL);
1220
+ x2("(?P<n>|\\((?P>n)\\))+$", "()(())", 0, 6, syn=onig.ONIG_SYNTAX_PERL);
1221
+ x2("((?P<name1>\\d)|(?P<name2>\\w))((?P=name1)|(?P=name2))", "ff", 0, 2, syn=onig.ONIG_SYNTAX_PERL);
1222
+
1223
+ # Fullwidth Alphabet
1224
+ n("abcdefghijklmnopqrstuvwxyz", "ABCDEFGHIJKLMNOPQRSTUVWXYZ");
1225
+ x2("(?i)abcdefghijklmnopqrstuvwxyz", "abcdefghijklmnopqrstuvwxyz", 0, 26);
1226
+ x2("(?i)abcdefghijklmnopqrstuvwxyz", "ABCDEFGHIJKLMNOPQRSTUVWXYZ", 0, 26);
1227
+ x2("(?i)ABCDEFGHIJKLMNOPQRSTUVWXYZ", "abcdefghijklmnopqrstuvwxyz", 0, 26);
1228
+ x2("(?i)ABCDEFGHIJKLMNOPQRSTUVWXYZ", "ABCDEFGHIJKLMNOPQRSTUVWXYZ", 0, 26);
1229
+
1230
+ # Greek
1231
+ n("αβγδεζηθικλμνξοπρστυφχψω", "ΑΒΓΔΕΖΗΘΙΚΛΜΝΞΟΠΡΣΤΥΦΧΨΩ");
1232
+ x2("(?i)αβγδεζηθικλμνξοπρστυφχψω", "αβγδεζηθικλμνξοπρστυφχψω", 0, 24);
1233
+ x2("(?i)αβγδεζηθικλμνξοπρστυφχψω", "ΑΒΓΔΕΖΗΘΙΚΛΜΝΞΟΠΡΣΤΥΦΧΨΩ", 0, 24);
1234
+ x2("(?i)ΑΒΓΔΕΖΗΘΙΚΛΜΝΞΟΠΡΣΤΥΦΧΨΩ", "αβγδεζηθικλμνξοπρστυφχψω", 0, 24);
1235
+ x2("(?i)ΑΒΓΔΕΖΗΘΙΚΛΜΝΞΟΠΡΣΤΥΦΧΨΩ", "ΑΒΓΔΕΖΗΘΙΚΛΜΝΞΟΠΡΣΤΥΦΧΨΩ", 0, 24);
1236
+
1237
+ # Cyrillic
1238
+ n("абвгдеёжзийклмнопрстуфхцчшщъыьэюя", "АБВГДЕЁЖЗИЙКЛМНОПРСТУФХЦЧШЩЪЫЬЭЮЯ");
1239
+ x2("(?i)абвгдеёжзийклмнопрстуфхцчшщъыьэюя", "абвгдеёжзийклмнопрстуфхцчшщъыьэюя", 0, 33);
1240
+ x2("(?i)абвгдеёжзийклмнопрстуфхцчшщъыьэюя", "АБВГДЕЁЖЗИЙКЛМНОПРСТУФХЦЧШЩЪЫЬЭЮЯ", 0, 33);
1241
+ x2("(?i)АБВГДЕЁЖЗИЙКЛМНОПРСТУФХЦЧШЩЪЫЬЭЮЯ", "абвгдеёжзийклмнопрстуфхцчшщъыьэюя", 0, 33);
1242
+ x2("(?i)АБВГДЕЁЖЗИЙКЛМНОПРСТУФХЦЧШЩЪЫЬЭЮЯ", "АБВГДЕЁЖЗИЙКЛМНОПРСТУФХЦЧШЩЪЫЬЭЮЯ", 0, 33);
1243
+
1244
+ # multiple name definition
1245
+ x2("(?<a>a)(?<a>b)\\k<a>", "aba", 0, 3)
1246
+ x2("(?<a>a)(?<a>b)\\k<a>", "abb", 0, 3)
1247
+ x2("(?<a>a)(?<a>b)\\g{a}", "aba", 0, 3, syn=onig.ONIG_SYNTAX_PERL)
1248
+ # n("(?<a>a)(?<a>b)\\g{a}", "abb", syn=onig.ONIG_SYNTAX_PERL)
1249
+ n("(?<a>a)(?<a>b)\\g<a>", "aba", err=onig.ONIGERR_MULTIPLEX_DEFINITION_NAME_CALL)
1250
+ x2("(?<a>[ac])(?<a>b)(?&a)", "abc", 0, 3, syn=onig.ONIG_SYNTAX_PERL)
1251
+ n("(?<a>[ac])(?<a>b)(?&a)", "abb", syn=onig.ONIG_SYNTAX_PERL)
1252
+ x2("(?:(?<x>abc)|(?<x>efg))(?i:\\k<x>)", "abcefgEFG", 3, 9)
1253
+ x2("(?<x>a)(?<x>b)(?i:\\k<x>)+", "abAB", 0, 4)
1254
+
1255
+ # branch reset
1256
+ # x3("(?|(c)|(?:(b)|(a)))", "a", 0, 1, 2)
1257
+ # x3("(?|(c)|(?|(b)|(a)))", "a", 0, 1, 1)
1258
+
1259
+ # conditional expression
1260
+ x2("(?:(a)|(b))(?(1)cd)e", "acde", 0, 4)
1261
+ n("(?:(a)|(b))(?(1)cd)e", "ae")
1262
+ x2("(?:(a)|(b))(?(2)cd)e", "ae", 0, 2)
1263
+ n("(?:(a)|(b))(?(2)cd)e", "acde")
1264
+ x2("(?:(a)|(b))(?(1)c|d)", "ac", 0, 2)
1265
+ x2("(?:(a)|(b))(?(1)c|d)", "bd", 0, 2)
1266
+ n("(?:(a)|(b))(?(1)c|d)", "ad")
1267
+ n("(?:(a)|(b))(?(1)c|d)", "bc")
1268
+ x2("(?:(a)|(b))(?:(?(1)cd)e|fg)", "acde", 0, 4)
1269
+ x2("(?:(a)|(b))(?:(?(1)cd|x)e|fg)", "bxe", 0, 3)
1270
+ n("(?:(a)|(b))(?:(?(2)cd|x)e|fg)", "bxe")
1271
+ x2("(?:(?<x>a)|(?<y>b))(?:(?(<x>)cd|x)e|fg)", "bxe", 0, 3)
1272
+ n("(?:(?<x>a)|(?<y>b))(?:(?(<y>)cd|x)e|fg)", "bxe")
1273
+ x2("((?<=a))?(?(1)b|c)", "abc", 1, 2)
1274
+ x2("((?<=a))?(?(1)b|c)", "bc", 1, 2)
1275
+ x2("((?<x>x)|(?<y>y))(?(<x>)y|x)", "xy", 0, 2)
1276
+ x2("((?<x>x)|(?<y>y))(?(<x>)y|x)", "yx", 0, 2)
1277
+ n("((?<x>x)|(?<y>y))(?(<x>)y|x)", "xx")
1278
+ n("((?<x>x)|(?<y>y))(?(<x>)y|x)", "yy")
1279
+ n("(a)?(?<n>b)?(?(1)a)(?(<n>)b)", "aa", err=onig.ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED)
1280
+ x2("(a)?(?<n>b)?(?(1)a)(?(<n>)b)", "aa", 0, 2, syn=onig.ONIG_SYNTAX_PERL)
1281
+
1282
+ # Implicit-anchor optimization
1283
+ x2("(?m:.*abc)", "dddabdd\nddabc", 0, 13) # optimized /(?m:.*abc)/ ==> /\A(?m:.*abc)/
1284
+ x2("(?m:.+abc)", "dddabdd\nddabc", 0, 13) # optimized
1285
+ x2("(?-m:.*abc)", "dddabdd\nddabc", 8, 13) # optimized /(?-m:.*abc)/ ==> /(?:^|\A)(?m:.*abc)/
1286
+ x2("(?-m:.+abc)", "dddabdd\nddabc", 8, 13) # optimized
1287
+ x2("(?-m:.*abc)", "dddabdd\nabc", 8, 11) # optimized
1288
+ n("(?-m:.+abc)", "dddabdd\nabc") # optimized
1289
+ x2("(?m:.*\\Z)", "dddabdd\nddabc", 0, 13) # optimized /(?m:.*\Z)/ ==> /\A(?m:.*\Z)/
1290
+ x2("(?-m:.*\\Z)", "dddabdd\nddabc", 8, 13) # optimized /(?-m:.*\Z)/ ==> /(?:^|\A)(?m:.*\Z)/
1291
+ x2("(.*)X\\1", "1234X2345", 1, 8) # not optimized
1292
+
1293
+ # Allow options in look-behind
1294
+ x2("(?<=(?i)ab)cd", "ABcd", 2, 4)
1295
+ x2("(?<=(?i:ab))cd", "ABcd", 2, 4)
1296
+ n("(?<=(?i)ab)cd", "ABCD")
1297
+ n("(?<=(?i:ab))cd", "ABCD")
1298
+ x2("(?<!(?i)ab)cd", "aacd", 2, 4)
1299
+ x2("(?<!(?i:ab))cd", "aacd", 2, 4)
1300
+ n("(?<!(?i)ab)cd", "ABcd")
1301
+ n("(?<!(?i:ab))cd", "ABcd")
1302
+
1303
+ # Perl syntax
1304
+ x2("\\Q()\\\\E", "()\\", 0, 3, syn=onig.ONIG_SYNTAX_PERL)
1305
+
1306
+ print("\nEncoding:", encoding)
1307
+ print("RESULT SUCC: %d, FAIL: %d, ERROR: %d (by Onigmo %s)" % (
1308
+ nsucc, nfail, nerror, onig.onig_version()))
1309
+
1310
+ onig.onig_end()
1311
+
1312
+ if (nfail == 0 and nerror == 0):
1313
+ exit(0)
1314
+ else:
1315
+ exit(-1)
1316
+
1317
+ if __name__ == '__main__':
1318
+ main()
1319
+