regextest 0.1.2

Sign up to get free protection for your applications and to get access to all the features.
Files changed (64) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +11 -0
  3. data/.rspec +2 -0
  4. data/.travis.yml +3 -0
  5. data/Gemfile +4 -0
  6. data/LICENSE.txt +25 -0
  7. data/README.md +88 -0
  8. data/Rakefile +55 -0
  9. data/bin/console +14 -0
  10. data/bin/regextest +4 -0
  11. data/bin/setup +7 -0
  12. data/contrib/Onigmo/RE.txt +522 -0
  13. data/contrib/Onigmo/UnicodeProps.txt +728 -0
  14. data/contrib/Onigmo/testpy.py +1319 -0
  15. data/contrib/unicode/Blocks.txt +298 -0
  16. data/contrib/unicode/CaseFolding.txt +1414 -0
  17. data/contrib/unicode/DerivedAge.txt +1538 -0
  18. data/contrib/unicode/DerivedCoreProperties.txt +11029 -0
  19. data/contrib/unicode/PropList.txt +1525 -0
  20. data/contrib/unicode/PropertyAliases.txt +193 -0
  21. data/contrib/unicode/PropertyValueAliases.txt +1420 -0
  22. data/contrib/unicode/README.txt +25 -0
  23. data/contrib/unicode/Scripts.txt +2539 -0
  24. data/contrib/unicode/UnicodeData.txt +29215 -0
  25. data/lib/pre-case-folding.rb +101 -0
  26. data/lib/pre-posix-char-class.rb +150 -0
  27. data/lib/pre-unicode.rb +116 -0
  28. data/lib/regextest.rb +268 -0
  29. data/lib/regextest/back.rb +58 -0
  30. data/lib/regextest/back/element.rb +151 -0
  31. data/lib/regextest/back/main.rb +356 -0
  32. data/lib/regextest/back/result.rb +498 -0
  33. data/lib/regextest/back/test-case.rb +268 -0
  34. data/lib/regextest/back/work-thread.rb +119 -0
  35. data/lib/regextest/common.rb +63 -0
  36. data/lib/regextest/front.rb +60 -0
  37. data/lib/regextest/front/anchor.rb +45 -0
  38. data/lib/regextest/front/back-refer.rb +120 -0
  39. data/lib/regextest/front/bracket-parser.rb +400 -0
  40. data/lib/regextest/front/bracket-parser.y +117 -0
  41. data/lib/regextest/front/bracket-scanner.rb +124 -0
  42. data/lib/regextest/front/bracket.rb +64 -0
  43. data/lib/regextest/front/builtin-functions.rb +31 -0
  44. data/lib/regextest/front/case-folding.rb +18 -0
  45. data/lib/regextest/front/char-class.rb +243 -0
  46. data/lib/regextest/front/empty.rb +43 -0
  47. data/lib/regextest/front/letter.rb +327 -0
  48. data/lib/regextest/front/manage-parentheses.rb +74 -0
  49. data/lib/regextest/front/parenthesis.rb +153 -0
  50. data/lib/regextest/front/parser.rb +1366 -0
  51. data/lib/regextest/front/parser.y +271 -0
  52. data/lib/regextest/front/range.rb +60 -0
  53. data/lib/regextest/front/repeat.rb +90 -0
  54. data/lib/regextest/front/repeatable.rb +77 -0
  55. data/lib/regextest/front/scanner.rb +187 -0
  56. data/lib/regextest/front/selectable.rb +65 -0
  57. data/lib/regextest/front/sequence.rb +73 -0
  58. data/lib/regextest/front/unicode.rb +1272 -0
  59. data/lib/regextest/regex-option.rb +144 -0
  60. data/lib/regextest/regexp.rb +44 -0
  61. data/lib/regextest/version.rb +5 -0
  62. data/lib/tst-reg-test.rb +159 -0
  63. data/regextest.gemspec +26 -0
  64. metadata +162 -0
@@ -0,0 +1,498 @@
1
+ # encoding: utf-8
2
+
3
+ # Copyright (C) 2016 Mikio Ikoma
4
+
5
+ require 'regextest/common'
6
+ require 'regextest/back/element'
7
+
8
+ class Regextest::Back::Result
9
+ include Regextest::Common
10
+
11
+ def initialize()
12
+ @results = []
13
+ @look_aheads = []
14
+ @look_behinds = []
15
+ @positional_anchors = {}
16
+ @reluctant_repeat = {}
17
+ @start_offset = 0
18
+ @end_offset = 0
19
+ @pre_match = nil
20
+ @match = nil
21
+ @post_match = nil
22
+ end
23
+
24
+ attr_reader :results, :positional_anchors, :end_offset,
25
+ :pre_match, :match, :post_match
26
+
27
+ # get pre-match string
28
+
29
+ # Adds elem
30
+ def push_body(elem)
31
+ @results.push elem
32
+ @end_offset += 1
33
+ end
34
+
35
+ # Offset of an elem
36
+ def [](offset)
37
+ @results[offset]
38
+ end
39
+
40
+ # size of results
41
+ def size
42
+ @results.size
43
+ end
44
+
45
+ # Adds results of look_ahead
46
+ def add_look_ahead(command, sub_results)
47
+ @look_aheads.push({offset: @end_offset, cmd: command, results: sub_results})
48
+ end
49
+
50
+ # Adds results of look_behind
51
+ def add_look_behind(command, sub_results)
52
+ @look_behinds.push({offset: @end_offset, cmd: command, results: sub_results})
53
+ end
54
+
55
+ # Adds offset of anchor
56
+ def add_anchor(cmd)
57
+ @positional_anchors[cmd] ||= []
58
+ @positional_anchors[cmd].push @end_offset
59
+ end
60
+
61
+ # Adds reluctant repeat information
62
+ def add_reluctant_repeat(elem)
63
+ repeat_id = elem.param[:id]
64
+ case elem.command
65
+ when :CMD_ANC_RELUCTANT_BEGIN
66
+ @reluctant_repeat[repeat_id] = [@end_offset]
67
+ when :CMD_ANC_RELUCTANT_END
68
+ if @reluctant_repeat[repeat_id]
69
+ @reluctant_repeat[repeat_id].push @end_offset
70
+ else
71
+ raise "internal error, invalid reluctant_repeat_end command"
72
+ end
73
+ else
74
+ raise "internal error, invalid reluctant_repeat command"
75
+ end
76
+ end
77
+
78
+ # Merge results of look aheads / behinds
79
+ def merge
80
+ merge_look_ahead &&
81
+ merge_look_behind
82
+ end
83
+
84
+ # Merge results of look aheads
85
+ def merge_look_ahead
86
+ @look_aheads.each do | elem |
87
+ offset = elem[:offset]
88
+ sub_results = elem[:results]
89
+ command = elem[:cmd]
90
+
91
+ merge_anchors(offset, sub_results)
92
+ case command
93
+ when :CMD_LOOK_AHEAD
94
+ if !merge_look_ahead_elems(offset, sub_results)
95
+ return nil
96
+ end
97
+ when :CMD_NOT_LOOK_AHEAD
98
+ if !merge_not_look_ahead_elems(offset, sub_results)
99
+ return nil
100
+ end
101
+ else
102
+ raise "invalid command at merge_look_ahead: #{command}"
103
+ end
104
+ end
105
+ true
106
+ end
107
+
108
+ # Merge each elements of look aheads
109
+ def merge_look_ahead_elems(offset, sub_results)
110
+ term_offset = offset + sub_results.end_offset
111
+
112
+ # intersect elems
113
+ offset.step(term_offset-1) do | i |
114
+ sub_elem = sub_results[i-offset]
115
+
116
+ if i < @results.size # it is NOT @end_offset
117
+ if(!@results[i].intersect(sub_elem))
118
+ return nil
119
+ end
120
+ else
121
+ @results.push(sub_elem)
122
+ end
123
+ end
124
+ true
125
+ end
126
+
127
+ # Merge each elements of not-look-aheads
128
+ def merge_not_look_ahead_elems(offset, sub_results)
129
+ if Regextest::Back::Result === sub_results
130
+ term_offset = offset + sub_results.end_offset - 1
131
+ else
132
+ term_offset = offset + sub_results.size - 1
133
+ end
134
+ try_order = TstShuffle(sub_results.size.times.to_a)
135
+
136
+ found = false
137
+ # exclude, at least, one element
138
+ try_order.each do | j |
139
+ results_work = @results.dup
140
+ cur_offset = offset + j
141
+
142
+ offset.step(term_offset-1).each do | i |
143
+ sub_elem = sub_results[i-offset]
144
+
145
+ if i < results_work.size # it is NOT @end_offset
146
+ if i == cur_offset
147
+ if(!results_work[i].exclude(sub_elem))
148
+ next
149
+ else
150
+ found = true
151
+ end
152
+ else
153
+ # do nothing
154
+ end
155
+ else
156
+ if i == cur_offset
157
+ if(reverse_work = sub_elem.reverse)
158
+ results_work.push reverse_work
159
+ found = true
160
+ else
161
+ results_work.push(Regextest::Back::Element.any_char)
162
+ end
163
+ else
164
+ results_work.push(Regextest::Back::Element.any_char)
165
+ end
166
+ end
167
+ end
168
+ if found
169
+ @results = results_work
170
+ break
171
+ end
172
+ end
173
+ # pp @results
174
+ # puts "found = #{found}"
175
+ found
176
+ end
177
+
178
+ # Merge results of look behind
179
+ def merge_look_behind
180
+ @look_behinds.each do | elem |
181
+ offset = elem[:offset]
182
+ sub_results = elem[:results]
183
+ command = elem[:cmd]
184
+
185
+ merge_anchors(offset, sub_results)
186
+ case command
187
+ when :CMD_LOOK_BEHIND
188
+ if !merge_look_behind_elems(offset, sub_results)
189
+ return nil
190
+ end
191
+ when :CMD_NOT_LOOK_BEHIND
192
+ if !merge_not_look_behind_elems(offset, sub_results)
193
+ return nil
194
+ end
195
+ else
196
+ raise "invalid command at merge_look_behind: #{command}"
197
+ end
198
+ end
199
+ true
200
+ end
201
+
202
+ # Merge each elements of look behinds
203
+ def merge_look_behind_elems(offset, sub_results)
204
+ unshift_length = sub_results.end_offset - offset
205
+ if unshift_length > 0
206
+ # @results = sub_results[0..(unshift_length-1)] + @results
207
+ if !unshift_params(unshift_length)
208
+ return false
209
+ end
210
+ end
211
+
212
+ # intersect elems
213
+ results_offset = (unshift_length > 0)?0:(offset-sub_results.end_offset)
214
+ pre_part = []
215
+ 0.step(sub_results.end_offset-1) do | i |
216
+ sub_elem = sub_results[i]
217
+ if i < unshift_length
218
+ pre_part.push sub_elem
219
+ else
220
+ if(!@results[i-unshift_length].intersect(sub_elem))
221
+ return nil
222
+ end
223
+ end
224
+ end
225
+ @results = pre_part + @results
226
+ true
227
+ end
228
+
229
+ # Merge each elements of not look behinds
230
+ def merge_not_look_behind_elems(offset, sub_results)
231
+ unshift_length = sub_results.end_offset - offset
232
+ if unshift_length > 0
233
+ if !unshift_params(unshift_length)
234
+ return false
235
+ end
236
+ end
237
+
238
+ try_order = TstShuffle(sub_results.size.times.to_a)
239
+ found = false
240
+ # exclude, at least, one element
241
+ try_order.each do | j |
242
+ results_work = @results.dup
243
+
244
+ # intersect elems
245
+ results_offset = (unshift_length > 0)?0:(offset-sub_results.end_offset)
246
+ 0.step(sub_results.end_offset-1) do | i |
247
+ sub_elem = sub_results[i]
248
+
249
+ if i < unshift_length
250
+ if i == j
251
+ results_work.unshift (sub_elem.reverse)
252
+ found = true
253
+ else
254
+ results_work.unshift (Regextest::Back::Element.any_char)
255
+ end
256
+ else
257
+ if i == j
258
+ if(!results_work[results_offset+i].exclude(sub_elem))
259
+ next
260
+ else
261
+ found = true
262
+ end
263
+ else
264
+ # do nothing
265
+ end
266
+ end
267
+ end
268
+ if found
269
+ @results = results_work
270
+ break
271
+ end
272
+ end
273
+ found
274
+ end
275
+
276
+ # Merge anchors
277
+ def merge_anchors(offset, sub_results)
278
+ sub_results.positional_anchors.each do | key, value |
279
+ @positional_anchors[key] ||= []
280
+ @positional_anchors[key] |= value.map{|elem| elem + offset}
281
+ end
282
+ end
283
+
284
+ # unshift parameters
285
+ def unshift_params(unshift_length)
286
+ @look_aheads.each{|elem| elem[:offset] += unshift_length}
287
+ @look_behinds.each{|elem| elem[:offset] += unshift_length}
288
+ @positional_anchors.each do | cmd, offsets |
289
+ return false if(cmd == :CMD_ANC_STRING_BEGIN)
290
+ offsets.map!{| offset | offset += unshift_length}
291
+ end
292
+ @start_offset += unshift_length
293
+ @end_offset += unshift_length
294
+ true
295
+ end
296
+
297
+ # narrow down candidate by anchors
298
+ def narrow_down
299
+ narrow_down_by_anchors &&
300
+ narrow_down_by_reluctant_repeat
301
+ end
302
+
303
+ # narrow down candidate by reluctant repeat
304
+ def narrow_down_by_reluctant_repeat
305
+ @reluctant_repeat.each do | repeat_id, offsets |
306
+ repeat_part = @results[offsets[0]...offsets[1]]
307
+ succeed_part = @results[offsets[1]..-1]
308
+ # puts "id=#{repeat_id}, start=#{repeat_part}, end=#{succeed_part}"
309
+
310
+ if succeed_part.size > 0
311
+ # reluctant repeat is equivalent to not_look_ahead!
312
+ (offsets[0]..(offsets[1] - succeed_part.size)).to_a.each do | offset |
313
+ if !merge_not_look_ahead_elems(offset, succeed_part)
314
+ return false
315
+ end
316
+ end
317
+ end
318
+ end
319
+ return true
320
+ end
321
+
322
+ # narrow down candidate by anchors
323
+ def narrow_down_by_anchors
324
+ @positional_anchors.each do | cmd, offsets |
325
+ case cmd
326
+ when :CMD_ANC_STRING_BEGIN, :CMD_ANC_MATCH_START
327
+ return false if offsets.max > 0
328
+ when :CMD_ANC_STRING_END
329
+ return false if offsets.min < (@results.size() - 1)
330
+ when :CMD_ANC_STRING_END2
331
+ min_offset = offsets.min
332
+ if min_offset < (@results.size() -1)
333
+ return false
334
+ elsif min_offset == (@results.size() -1)
335
+ if @results[min_offset].new_line?
336
+ @results[min_offset].set_new_line
337
+ else
338
+ return false
339
+ end
340
+ end
341
+ when :CMD_ANC_LINE_BEGIN
342
+ offsets.each do | offset |
343
+ if offset == 0
344
+ # ok
345
+ elsif @results[offset-1].new_line?
346
+ @results[offset-1].set_new_line
347
+ else
348
+ return false
349
+ end
350
+ end
351
+ when :CMD_ANC_LINE_END
352
+ offsets.each do | offset |
353
+ if offset == @results.size
354
+ # ok
355
+ elsif @results[offset].new_line?
356
+ @results[offset].set_new_line
357
+ else
358
+ return false
359
+ end
360
+ end
361
+ when :CMD_ANC_WORD_BOUND
362
+ offsets.uniq.size.times do | i |
363
+ offset = offsets[i]
364
+ # puts "before offset:#{offset} #{@results}"
365
+ if offset > 0 && offset < @results.size
366
+ if !bound_process(@results[offset-1], @results[offset])
367
+ return false
368
+ end
369
+ elsif @results.size == 0
370
+ @results.push (Regextest::Back::Element.any_char)
371
+ @results.push (Regextest::Back::Element.any_char)
372
+ bound_process(@results[0], @results[1])
373
+ elsif offset == @results.size
374
+ @results.push (Regextest::Back::Element.any_char)
375
+ if !bound_process(@results[-2], @results[-1])
376
+ return false
377
+ end
378
+ elsif offset == 0
379
+ if !unshift_params(1)
380
+ return false
381
+ end
382
+ @results.unshift (Regextest::Back::Element.any_char)
383
+ if !bound_process(@results[0], @results[1])
384
+ return false
385
+ end
386
+ end
387
+ end
388
+ when :CMD_ANC_WORD_UNBOUND
389
+ offsets.uniq.size.times do | i |
390
+ offset = offsets[i]
391
+ # puts "before offset:#{offset} #{@results}"
392
+ if offset > 0 && offset < @results.size
393
+ if !unbound_process(@results[offset-1], @results[offset])
394
+ return false
395
+ end
396
+ elsif @results.size == 0
397
+ @results.push (Regextest::Back::Element.any_char)
398
+ @results.push (Regextest::Back::Element.any_char)
399
+ unbound_process(@results[0], @results[1])
400
+ elsif offset == @results.size
401
+ @results.push (Regextest::Back::Element.any_char)
402
+ if !unbound_process(@results[-2], @results[-1])
403
+ return false
404
+ end
405
+ elsif offset == 0
406
+ if !unshift_params(1)
407
+ return false
408
+ end
409
+ @results.unshift (Regextest::Back::Element.any_char)
410
+ if !unbound_process(@results[0], @results[1])
411
+ return false
412
+ end
413
+ end
414
+ end
415
+ when :CMD_ANC_LOOK_BEHIND2
416
+ @start_offset = offsets.max
417
+ else
418
+ raise "command (#{cmd}) not implemented"
419
+ end
420
+ end
421
+ return true
422
+ end
423
+
424
+ # bound process (\b)
425
+ def bound_process(elem1, elem2)
426
+ if elem1.word_elements?
427
+ elem2.set_non_word_elements
428
+ elsif elem1.non_word_elements?
429
+ elem2.set_word_elements
430
+ elsif elem2.word_elements?
431
+ elem1.set_non_word_elements
432
+ elsif elem2.non_word_elements?
433
+ elem1.set_word_elements
434
+ else
435
+ if(TstRand(2)==0)
436
+ elem1.set_word_elements
437
+ elem2.set_non_word_elements
438
+ else
439
+ elem1.set_non_word_elements
440
+ elem2.set_word_elements
441
+ end
442
+ end
443
+ if elem1.empty? || elem2.empty?
444
+ return false
445
+ end
446
+ true
447
+ end
448
+
449
+ # unbound process (\B)
450
+ def unbound_process(elem1, elem2)
451
+ if elem1.word_elements?
452
+ elem2.set_word_elements
453
+ elsif elem1.non_word_elements?
454
+ elem2.set_non_word_elements
455
+ elsif elem2.word_elements?
456
+ elem1.set_word_elements
457
+ elsif elem2.non_word_elements?
458
+ elem1.set_non_word_elements
459
+ else
460
+ if(TstRand(2)==0)
461
+ elem1.set_word_elements
462
+ elem2.set_word_elements
463
+ else
464
+ elem1.set_non_word_elements
465
+ elem2.set_non_word_elements
466
+ end
467
+ end
468
+ if elem1.empty? || elem2.empty?
469
+ return false
470
+ end
471
+ true
472
+ end
473
+
474
+ # Fixes results
475
+ def fix
476
+ @pre_match = fix_part(0, @start_offset-1)
477
+ @match = fix_part(@start_offset, @end_offset-1)
478
+ @post_match = fix_part(@end_offset, @results.size-1)
479
+
480
+ @pre_match + @match + @post_match
481
+ end
482
+
483
+ # Fixes part of results
484
+ def fix_part(start_offset, end_offset)
485
+ result = ""
486
+ start_offset.step(end_offset).each do | i |
487
+ result += @results[i].random_fix
488
+ end
489
+ result
490
+ end
491
+
492
+ end
493
+
494
+ # Test suite (execute when this file is specified in command line)
495
+ if __FILE__ == $0
496
+ end
497
+
498
+