regextest 0.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (64) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +11 -0
  3. data/.rspec +2 -0
  4. data/.travis.yml +3 -0
  5. data/Gemfile +4 -0
  6. data/LICENSE.txt +25 -0
  7. data/README.md +88 -0
  8. data/Rakefile +55 -0
  9. data/bin/console +14 -0
  10. data/bin/regextest +4 -0
  11. data/bin/setup +7 -0
  12. data/contrib/Onigmo/RE.txt +522 -0
  13. data/contrib/Onigmo/UnicodeProps.txt +728 -0
  14. data/contrib/Onigmo/testpy.py +1319 -0
  15. data/contrib/unicode/Blocks.txt +298 -0
  16. data/contrib/unicode/CaseFolding.txt +1414 -0
  17. data/contrib/unicode/DerivedAge.txt +1538 -0
  18. data/contrib/unicode/DerivedCoreProperties.txt +11029 -0
  19. data/contrib/unicode/PropList.txt +1525 -0
  20. data/contrib/unicode/PropertyAliases.txt +193 -0
  21. data/contrib/unicode/PropertyValueAliases.txt +1420 -0
  22. data/contrib/unicode/README.txt +25 -0
  23. data/contrib/unicode/Scripts.txt +2539 -0
  24. data/contrib/unicode/UnicodeData.txt +29215 -0
  25. data/lib/pre-case-folding.rb +101 -0
  26. data/lib/pre-posix-char-class.rb +150 -0
  27. data/lib/pre-unicode.rb +116 -0
  28. data/lib/regextest.rb +268 -0
  29. data/lib/regextest/back.rb +58 -0
  30. data/lib/regextest/back/element.rb +151 -0
  31. data/lib/regextest/back/main.rb +356 -0
  32. data/lib/regextest/back/result.rb +498 -0
  33. data/lib/regextest/back/test-case.rb +268 -0
  34. data/lib/regextest/back/work-thread.rb +119 -0
  35. data/lib/regextest/common.rb +63 -0
  36. data/lib/regextest/front.rb +60 -0
  37. data/lib/regextest/front/anchor.rb +45 -0
  38. data/lib/regextest/front/back-refer.rb +120 -0
  39. data/lib/regextest/front/bracket-parser.rb +400 -0
  40. data/lib/regextest/front/bracket-parser.y +117 -0
  41. data/lib/regextest/front/bracket-scanner.rb +124 -0
  42. data/lib/regextest/front/bracket.rb +64 -0
  43. data/lib/regextest/front/builtin-functions.rb +31 -0
  44. data/lib/regextest/front/case-folding.rb +18 -0
  45. data/lib/regextest/front/char-class.rb +243 -0
  46. data/lib/regextest/front/empty.rb +43 -0
  47. data/lib/regextest/front/letter.rb +327 -0
  48. data/lib/regextest/front/manage-parentheses.rb +74 -0
  49. data/lib/regextest/front/parenthesis.rb +153 -0
  50. data/lib/regextest/front/parser.rb +1366 -0
  51. data/lib/regextest/front/parser.y +271 -0
  52. data/lib/regextest/front/range.rb +60 -0
  53. data/lib/regextest/front/repeat.rb +90 -0
  54. data/lib/regextest/front/repeatable.rb +77 -0
  55. data/lib/regextest/front/scanner.rb +187 -0
  56. data/lib/regextest/front/selectable.rb +65 -0
  57. data/lib/regextest/front/sequence.rb +73 -0
  58. data/lib/regextest/front/unicode.rb +1272 -0
  59. data/lib/regextest/regex-option.rb +144 -0
  60. data/lib/regextest/regexp.rb +44 -0
  61. data/lib/regextest/version.rb +5 -0
  62. data/lib/tst-reg-test.rb +159 -0
  63. data/regextest.gemspec +26 -0
  64. metadata +162 -0
@@ -0,0 +1,498 @@
1
+ # encoding: utf-8
2
+
3
+ # Copyright (C) 2016 Mikio Ikoma
4
+
5
+ require 'regextest/common'
6
+ require 'regextest/back/element'
7
+
8
+ class Regextest::Back::Result
9
+ include Regextest::Common
10
+
11
+ def initialize()
12
+ @results = []
13
+ @look_aheads = []
14
+ @look_behinds = []
15
+ @positional_anchors = {}
16
+ @reluctant_repeat = {}
17
+ @start_offset = 0
18
+ @end_offset = 0
19
+ @pre_match = nil
20
+ @match = nil
21
+ @post_match = nil
22
+ end
23
+
24
+ attr_reader :results, :positional_anchors, :end_offset,
25
+ :pre_match, :match, :post_match
26
+
27
+ # get pre-match string
28
+
29
+ # Adds elem
30
+ def push_body(elem)
31
+ @results.push elem
32
+ @end_offset += 1
33
+ end
34
+
35
+ # Offset of an elem
36
+ def [](offset)
37
+ @results[offset]
38
+ end
39
+
40
+ # size of results
41
+ def size
42
+ @results.size
43
+ end
44
+
45
+ # Adds results of look_ahead
46
+ def add_look_ahead(command, sub_results)
47
+ @look_aheads.push({offset: @end_offset, cmd: command, results: sub_results})
48
+ end
49
+
50
+ # Adds results of look_behind
51
+ def add_look_behind(command, sub_results)
52
+ @look_behinds.push({offset: @end_offset, cmd: command, results: sub_results})
53
+ end
54
+
55
+ # Adds offset of anchor
56
+ def add_anchor(cmd)
57
+ @positional_anchors[cmd] ||= []
58
+ @positional_anchors[cmd].push @end_offset
59
+ end
60
+
61
+ # Adds reluctant repeat information
62
+ def add_reluctant_repeat(elem)
63
+ repeat_id = elem.param[:id]
64
+ case elem.command
65
+ when :CMD_ANC_RELUCTANT_BEGIN
66
+ @reluctant_repeat[repeat_id] = [@end_offset]
67
+ when :CMD_ANC_RELUCTANT_END
68
+ if @reluctant_repeat[repeat_id]
69
+ @reluctant_repeat[repeat_id].push @end_offset
70
+ else
71
+ raise "internal error, invalid reluctant_repeat_end command"
72
+ end
73
+ else
74
+ raise "internal error, invalid reluctant_repeat command"
75
+ end
76
+ end
77
+
78
+ # Merge results of look aheads / behinds
79
+ def merge
80
+ merge_look_ahead &&
81
+ merge_look_behind
82
+ end
83
+
84
+ # Merge results of look aheads
85
+ def merge_look_ahead
86
+ @look_aheads.each do | elem |
87
+ offset = elem[:offset]
88
+ sub_results = elem[:results]
89
+ command = elem[:cmd]
90
+
91
+ merge_anchors(offset, sub_results)
92
+ case command
93
+ when :CMD_LOOK_AHEAD
94
+ if !merge_look_ahead_elems(offset, sub_results)
95
+ return nil
96
+ end
97
+ when :CMD_NOT_LOOK_AHEAD
98
+ if !merge_not_look_ahead_elems(offset, sub_results)
99
+ return nil
100
+ end
101
+ else
102
+ raise "invalid command at merge_look_ahead: #{command}"
103
+ end
104
+ end
105
+ true
106
+ end
107
+
108
+ # Merge each elements of look aheads
109
+ def merge_look_ahead_elems(offset, sub_results)
110
+ term_offset = offset + sub_results.end_offset
111
+
112
+ # intersect elems
113
+ offset.step(term_offset-1) do | i |
114
+ sub_elem = sub_results[i-offset]
115
+
116
+ if i < @results.size # it is NOT @end_offset
117
+ if(!@results[i].intersect(sub_elem))
118
+ return nil
119
+ end
120
+ else
121
+ @results.push(sub_elem)
122
+ end
123
+ end
124
+ true
125
+ end
126
+
127
+ # Merge each elements of not-look-aheads
128
+ def merge_not_look_ahead_elems(offset, sub_results)
129
+ if Regextest::Back::Result === sub_results
130
+ term_offset = offset + sub_results.end_offset - 1
131
+ else
132
+ term_offset = offset + sub_results.size - 1
133
+ end
134
+ try_order = TstShuffle(sub_results.size.times.to_a)
135
+
136
+ found = false
137
+ # exclude, at least, one element
138
+ try_order.each do | j |
139
+ results_work = @results.dup
140
+ cur_offset = offset + j
141
+
142
+ offset.step(term_offset-1).each do | i |
143
+ sub_elem = sub_results[i-offset]
144
+
145
+ if i < results_work.size # it is NOT @end_offset
146
+ if i == cur_offset
147
+ if(!results_work[i].exclude(sub_elem))
148
+ next
149
+ else
150
+ found = true
151
+ end
152
+ else
153
+ # do nothing
154
+ end
155
+ else
156
+ if i == cur_offset
157
+ if(reverse_work = sub_elem.reverse)
158
+ results_work.push reverse_work
159
+ found = true
160
+ else
161
+ results_work.push(Regextest::Back::Element.any_char)
162
+ end
163
+ else
164
+ results_work.push(Regextest::Back::Element.any_char)
165
+ end
166
+ end
167
+ end
168
+ if found
169
+ @results = results_work
170
+ break
171
+ end
172
+ end
173
+ # pp @results
174
+ # puts "found = #{found}"
175
+ found
176
+ end
177
+
178
+ # Merge results of look behind
179
+ def merge_look_behind
180
+ @look_behinds.each do | elem |
181
+ offset = elem[:offset]
182
+ sub_results = elem[:results]
183
+ command = elem[:cmd]
184
+
185
+ merge_anchors(offset, sub_results)
186
+ case command
187
+ when :CMD_LOOK_BEHIND
188
+ if !merge_look_behind_elems(offset, sub_results)
189
+ return nil
190
+ end
191
+ when :CMD_NOT_LOOK_BEHIND
192
+ if !merge_not_look_behind_elems(offset, sub_results)
193
+ return nil
194
+ end
195
+ else
196
+ raise "invalid command at merge_look_behind: #{command}"
197
+ end
198
+ end
199
+ true
200
+ end
201
+
202
+ # Merge each elements of look behinds
203
+ def merge_look_behind_elems(offset, sub_results)
204
+ unshift_length = sub_results.end_offset - offset
205
+ if unshift_length > 0
206
+ # @results = sub_results[0..(unshift_length-1)] + @results
207
+ if !unshift_params(unshift_length)
208
+ return false
209
+ end
210
+ end
211
+
212
+ # intersect elems
213
+ results_offset = (unshift_length > 0)?0:(offset-sub_results.end_offset)
214
+ pre_part = []
215
+ 0.step(sub_results.end_offset-1) do | i |
216
+ sub_elem = sub_results[i]
217
+ if i < unshift_length
218
+ pre_part.push sub_elem
219
+ else
220
+ if(!@results[i-unshift_length].intersect(sub_elem))
221
+ return nil
222
+ end
223
+ end
224
+ end
225
+ @results = pre_part + @results
226
+ true
227
+ end
228
+
229
+ # Merge each elements of not look behinds
230
+ def merge_not_look_behind_elems(offset, sub_results)
231
+ unshift_length = sub_results.end_offset - offset
232
+ if unshift_length > 0
233
+ if !unshift_params(unshift_length)
234
+ return false
235
+ end
236
+ end
237
+
238
+ try_order = TstShuffle(sub_results.size.times.to_a)
239
+ found = false
240
+ # exclude, at least, one element
241
+ try_order.each do | j |
242
+ results_work = @results.dup
243
+
244
+ # intersect elems
245
+ results_offset = (unshift_length > 0)?0:(offset-sub_results.end_offset)
246
+ 0.step(sub_results.end_offset-1) do | i |
247
+ sub_elem = sub_results[i]
248
+
249
+ if i < unshift_length
250
+ if i == j
251
+ results_work.unshift (sub_elem.reverse)
252
+ found = true
253
+ else
254
+ results_work.unshift (Regextest::Back::Element.any_char)
255
+ end
256
+ else
257
+ if i == j
258
+ if(!results_work[results_offset+i].exclude(sub_elem))
259
+ next
260
+ else
261
+ found = true
262
+ end
263
+ else
264
+ # do nothing
265
+ end
266
+ end
267
+ end
268
+ if found
269
+ @results = results_work
270
+ break
271
+ end
272
+ end
273
+ found
274
+ end
275
+
276
+ # Merge anchors
277
+ def merge_anchors(offset, sub_results)
278
+ sub_results.positional_anchors.each do | key, value |
279
+ @positional_anchors[key] ||= []
280
+ @positional_anchors[key] |= value.map{|elem| elem + offset}
281
+ end
282
+ end
283
+
284
+ # unshift parameters
285
+ def unshift_params(unshift_length)
286
+ @look_aheads.each{|elem| elem[:offset] += unshift_length}
287
+ @look_behinds.each{|elem| elem[:offset] += unshift_length}
288
+ @positional_anchors.each do | cmd, offsets |
289
+ return false if(cmd == :CMD_ANC_STRING_BEGIN)
290
+ offsets.map!{| offset | offset += unshift_length}
291
+ end
292
+ @start_offset += unshift_length
293
+ @end_offset += unshift_length
294
+ true
295
+ end
296
+
297
+ # narrow down candidate by anchors
298
+ def narrow_down
299
+ narrow_down_by_anchors &&
300
+ narrow_down_by_reluctant_repeat
301
+ end
302
+
303
+ # narrow down candidate by reluctant repeat
304
+ def narrow_down_by_reluctant_repeat
305
+ @reluctant_repeat.each do | repeat_id, offsets |
306
+ repeat_part = @results[offsets[0]...offsets[1]]
307
+ succeed_part = @results[offsets[1]..-1]
308
+ # puts "id=#{repeat_id}, start=#{repeat_part}, end=#{succeed_part}"
309
+
310
+ if succeed_part.size > 0
311
+ # reluctant repeat is equivalent to not_look_ahead!
312
+ (offsets[0]..(offsets[1] - succeed_part.size)).to_a.each do | offset |
313
+ if !merge_not_look_ahead_elems(offset, succeed_part)
314
+ return false
315
+ end
316
+ end
317
+ end
318
+ end
319
+ return true
320
+ end
321
+
322
+ # narrow down candidate by anchors
323
+ def narrow_down_by_anchors
324
+ @positional_anchors.each do | cmd, offsets |
325
+ case cmd
326
+ when :CMD_ANC_STRING_BEGIN, :CMD_ANC_MATCH_START
327
+ return false if offsets.max > 0
328
+ when :CMD_ANC_STRING_END
329
+ return false if offsets.min < (@results.size() - 1)
330
+ when :CMD_ANC_STRING_END2
331
+ min_offset = offsets.min
332
+ if min_offset < (@results.size() -1)
333
+ return false
334
+ elsif min_offset == (@results.size() -1)
335
+ if @results[min_offset].new_line?
336
+ @results[min_offset].set_new_line
337
+ else
338
+ return false
339
+ end
340
+ end
341
+ when :CMD_ANC_LINE_BEGIN
342
+ offsets.each do | offset |
343
+ if offset == 0
344
+ # ok
345
+ elsif @results[offset-1].new_line?
346
+ @results[offset-1].set_new_line
347
+ else
348
+ return false
349
+ end
350
+ end
351
+ when :CMD_ANC_LINE_END
352
+ offsets.each do | offset |
353
+ if offset == @results.size
354
+ # ok
355
+ elsif @results[offset].new_line?
356
+ @results[offset].set_new_line
357
+ else
358
+ return false
359
+ end
360
+ end
361
+ when :CMD_ANC_WORD_BOUND
362
+ offsets.uniq.size.times do | i |
363
+ offset = offsets[i]
364
+ # puts "before offset:#{offset} #{@results}"
365
+ if offset > 0 && offset < @results.size
366
+ if !bound_process(@results[offset-1], @results[offset])
367
+ return false
368
+ end
369
+ elsif @results.size == 0
370
+ @results.push (Regextest::Back::Element.any_char)
371
+ @results.push (Regextest::Back::Element.any_char)
372
+ bound_process(@results[0], @results[1])
373
+ elsif offset == @results.size
374
+ @results.push (Regextest::Back::Element.any_char)
375
+ if !bound_process(@results[-2], @results[-1])
376
+ return false
377
+ end
378
+ elsif offset == 0
379
+ if !unshift_params(1)
380
+ return false
381
+ end
382
+ @results.unshift (Regextest::Back::Element.any_char)
383
+ if !bound_process(@results[0], @results[1])
384
+ return false
385
+ end
386
+ end
387
+ end
388
+ when :CMD_ANC_WORD_UNBOUND
389
+ offsets.uniq.size.times do | i |
390
+ offset = offsets[i]
391
+ # puts "before offset:#{offset} #{@results}"
392
+ if offset > 0 && offset < @results.size
393
+ if !unbound_process(@results[offset-1], @results[offset])
394
+ return false
395
+ end
396
+ elsif @results.size == 0
397
+ @results.push (Regextest::Back::Element.any_char)
398
+ @results.push (Regextest::Back::Element.any_char)
399
+ unbound_process(@results[0], @results[1])
400
+ elsif offset == @results.size
401
+ @results.push (Regextest::Back::Element.any_char)
402
+ if !unbound_process(@results[-2], @results[-1])
403
+ return false
404
+ end
405
+ elsif offset == 0
406
+ if !unshift_params(1)
407
+ return false
408
+ end
409
+ @results.unshift (Regextest::Back::Element.any_char)
410
+ if !unbound_process(@results[0], @results[1])
411
+ return false
412
+ end
413
+ end
414
+ end
415
+ when :CMD_ANC_LOOK_BEHIND2
416
+ @start_offset = offsets.max
417
+ else
418
+ raise "command (#{cmd}) not implemented"
419
+ end
420
+ end
421
+ return true
422
+ end
423
+
424
+ # bound process (\b)
425
+ def bound_process(elem1, elem2)
426
+ if elem1.word_elements?
427
+ elem2.set_non_word_elements
428
+ elsif elem1.non_word_elements?
429
+ elem2.set_word_elements
430
+ elsif elem2.word_elements?
431
+ elem1.set_non_word_elements
432
+ elsif elem2.non_word_elements?
433
+ elem1.set_word_elements
434
+ else
435
+ if(TstRand(2)==0)
436
+ elem1.set_word_elements
437
+ elem2.set_non_word_elements
438
+ else
439
+ elem1.set_non_word_elements
440
+ elem2.set_word_elements
441
+ end
442
+ end
443
+ if elem1.empty? || elem2.empty?
444
+ return false
445
+ end
446
+ true
447
+ end
448
+
449
+ # unbound process (\B)
450
+ def unbound_process(elem1, elem2)
451
+ if elem1.word_elements?
452
+ elem2.set_word_elements
453
+ elsif elem1.non_word_elements?
454
+ elem2.set_non_word_elements
455
+ elsif elem2.word_elements?
456
+ elem1.set_word_elements
457
+ elsif elem2.non_word_elements?
458
+ elem1.set_non_word_elements
459
+ else
460
+ if(TstRand(2)==0)
461
+ elem1.set_word_elements
462
+ elem2.set_word_elements
463
+ else
464
+ elem1.set_non_word_elements
465
+ elem2.set_non_word_elements
466
+ end
467
+ end
468
+ if elem1.empty? || elem2.empty?
469
+ return false
470
+ end
471
+ true
472
+ end
473
+
474
+ # Fixes results
475
+ def fix
476
+ @pre_match = fix_part(0, @start_offset-1)
477
+ @match = fix_part(@start_offset, @end_offset-1)
478
+ @post_match = fix_part(@end_offset, @results.size-1)
479
+
480
+ @pre_match + @match + @post_match
481
+ end
482
+
483
+ # Fixes part of results
484
+ def fix_part(start_offset, end_offset)
485
+ result = ""
486
+ start_offset.step(end_offset).each do | i |
487
+ result += @results[i].random_fix
488
+ end
489
+ result
490
+ end
491
+
492
+ end
493
+
494
+ # Test suite (execute when this file is specified in command line)
495
+ if __FILE__ == $0
496
+ end
497
+
498
+