regextest 0.1.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.gitignore +11 -0
- data/.rspec +2 -0
- data/.travis.yml +3 -0
- data/Gemfile +4 -0
- data/LICENSE.txt +25 -0
- data/README.md +88 -0
- data/Rakefile +55 -0
- data/bin/console +14 -0
- data/bin/regextest +4 -0
- data/bin/setup +7 -0
- data/contrib/Onigmo/RE.txt +522 -0
- data/contrib/Onigmo/UnicodeProps.txt +728 -0
- data/contrib/Onigmo/testpy.py +1319 -0
- data/contrib/unicode/Blocks.txt +298 -0
- data/contrib/unicode/CaseFolding.txt +1414 -0
- data/contrib/unicode/DerivedAge.txt +1538 -0
- data/contrib/unicode/DerivedCoreProperties.txt +11029 -0
- data/contrib/unicode/PropList.txt +1525 -0
- data/contrib/unicode/PropertyAliases.txt +193 -0
- data/contrib/unicode/PropertyValueAliases.txt +1420 -0
- data/contrib/unicode/README.txt +25 -0
- data/contrib/unicode/Scripts.txt +2539 -0
- data/contrib/unicode/UnicodeData.txt +29215 -0
- data/lib/pre-case-folding.rb +101 -0
- data/lib/pre-posix-char-class.rb +150 -0
- data/lib/pre-unicode.rb +116 -0
- data/lib/regextest.rb +268 -0
- data/lib/regextest/back.rb +58 -0
- data/lib/regextest/back/element.rb +151 -0
- data/lib/regextest/back/main.rb +356 -0
- data/lib/regextest/back/result.rb +498 -0
- data/lib/regextest/back/test-case.rb +268 -0
- data/lib/regextest/back/work-thread.rb +119 -0
- data/lib/regextest/common.rb +63 -0
- data/lib/regextest/front.rb +60 -0
- data/lib/regextest/front/anchor.rb +45 -0
- data/lib/regextest/front/back-refer.rb +120 -0
- data/lib/regextest/front/bracket-parser.rb +400 -0
- data/lib/regextest/front/bracket-parser.y +117 -0
- data/lib/regextest/front/bracket-scanner.rb +124 -0
- data/lib/regextest/front/bracket.rb +64 -0
- data/lib/regextest/front/builtin-functions.rb +31 -0
- data/lib/regextest/front/case-folding.rb +18 -0
- data/lib/regextest/front/char-class.rb +243 -0
- data/lib/regextest/front/empty.rb +43 -0
- data/lib/regextest/front/letter.rb +327 -0
- data/lib/regextest/front/manage-parentheses.rb +74 -0
- data/lib/regextest/front/parenthesis.rb +153 -0
- data/lib/regextest/front/parser.rb +1366 -0
- data/lib/regextest/front/parser.y +271 -0
- data/lib/regextest/front/range.rb +60 -0
- data/lib/regextest/front/repeat.rb +90 -0
- data/lib/regextest/front/repeatable.rb +77 -0
- data/lib/regextest/front/scanner.rb +187 -0
- data/lib/regextest/front/selectable.rb +65 -0
- data/lib/regextest/front/sequence.rb +73 -0
- data/lib/regextest/front/unicode.rb +1272 -0
- data/lib/regextest/regex-option.rb +144 -0
- data/lib/regextest/regexp.rb +44 -0
- data/lib/regextest/version.rb +5 -0
- data/lib/tst-reg-test.rb +159 -0
- data/regextest.gemspec +26 -0
- metadata +162 -0
@@ -0,0 +1,498 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
# Copyright (C) 2016 Mikio Ikoma
|
4
|
+
|
5
|
+
require 'regextest/common'
|
6
|
+
require 'regextest/back/element'
|
7
|
+
|
8
|
+
class Regextest::Back::Result
|
9
|
+
include Regextest::Common
|
10
|
+
|
11
|
+
def initialize()
|
12
|
+
@results = []
|
13
|
+
@look_aheads = []
|
14
|
+
@look_behinds = []
|
15
|
+
@positional_anchors = {}
|
16
|
+
@reluctant_repeat = {}
|
17
|
+
@start_offset = 0
|
18
|
+
@end_offset = 0
|
19
|
+
@pre_match = nil
|
20
|
+
@match = nil
|
21
|
+
@post_match = nil
|
22
|
+
end
|
23
|
+
|
24
|
+
attr_reader :results, :positional_anchors, :end_offset,
|
25
|
+
:pre_match, :match, :post_match
|
26
|
+
|
27
|
+
# get pre-match string
|
28
|
+
|
29
|
+
# Adds elem
|
30
|
+
def push_body(elem)
|
31
|
+
@results.push elem
|
32
|
+
@end_offset += 1
|
33
|
+
end
|
34
|
+
|
35
|
+
# Offset of an elem
|
36
|
+
def [](offset)
|
37
|
+
@results[offset]
|
38
|
+
end
|
39
|
+
|
40
|
+
# size of results
|
41
|
+
def size
|
42
|
+
@results.size
|
43
|
+
end
|
44
|
+
|
45
|
+
# Adds results of look_ahead
|
46
|
+
def add_look_ahead(command, sub_results)
|
47
|
+
@look_aheads.push({offset: @end_offset, cmd: command, results: sub_results})
|
48
|
+
end
|
49
|
+
|
50
|
+
# Adds results of look_behind
|
51
|
+
def add_look_behind(command, sub_results)
|
52
|
+
@look_behinds.push({offset: @end_offset, cmd: command, results: sub_results})
|
53
|
+
end
|
54
|
+
|
55
|
+
# Adds offset of anchor
|
56
|
+
def add_anchor(cmd)
|
57
|
+
@positional_anchors[cmd] ||= []
|
58
|
+
@positional_anchors[cmd].push @end_offset
|
59
|
+
end
|
60
|
+
|
61
|
+
# Adds reluctant repeat information
|
62
|
+
def add_reluctant_repeat(elem)
|
63
|
+
repeat_id = elem.param[:id]
|
64
|
+
case elem.command
|
65
|
+
when :CMD_ANC_RELUCTANT_BEGIN
|
66
|
+
@reluctant_repeat[repeat_id] = [@end_offset]
|
67
|
+
when :CMD_ANC_RELUCTANT_END
|
68
|
+
if @reluctant_repeat[repeat_id]
|
69
|
+
@reluctant_repeat[repeat_id].push @end_offset
|
70
|
+
else
|
71
|
+
raise "internal error, invalid reluctant_repeat_end command"
|
72
|
+
end
|
73
|
+
else
|
74
|
+
raise "internal error, invalid reluctant_repeat command"
|
75
|
+
end
|
76
|
+
end
|
77
|
+
|
78
|
+
# Merge results of look aheads / behinds
|
79
|
+
def merge
|
80
|
+
merge_look_ahead &&
|
81
|
+
merge_look_behind
|
82
|
+
end
|
83
|
+
|
84
|
+
# Merge results of look aheads
|
85
|
+
def merge_look_ahead
|
86
|
+
@look_aheads.each do | elem |
|
87
|
+
offset = elem[:offset]
|
88
|
+
sub_results = elem[:results]
|
89
|
+
command = elem[:cmd]
|
90
|
+
|
91
|
+
merge_anchors(offset, sub_results)
|
92
|
+
case command
|
93
|
+
when :CMD_LOOK_AHEAD
|
94
|
+
if !merge_look_ahead_elems(offset, sub_results)
|
95
|
+
return nil
|
96
|
+
end
|
97
|
+
when :CMD_NOT_LOOK_AHEAD
|
98
|
+
if !merge_not_look_ahead_elems(offset, sub_results)
|
99
|
+
return nil
|
100
|
+
end
|
101
|
+
else
|
102
|
+
raise "invalid command at merge_look_ahead: #{command}"
|
103
|
+
end
|
104
|
+
end
|
105
|
+
true
|
106
|
+
end
|
107
|
+
|
108
|
+
# Merge each elements of look aheads
|
109
|
+
def merge_look_ahead_elems(offset, sub_results)
|
110
|
+
term_offset = offset + sub_results.end_offset
|
111
|
+
|
112
|
+
# intersect elems
|
113
|
+
offset.step(term_offset-1) do | i |
|
114
|
+
sub_elem = sub_results[i-offset]
|
115
|
+
|
116
|
+
if i < @results.size # it is NOT @end_offset
|
117
|
+
if(!@results[i].intersect(sub_elem))
|
118
|
+
return nil
|
119
|
+
end
|
120
|
+
else
|
121
|
+
@results.push(sub_elem)
|
122
|
+
end
|
123
|
+
end
|
124
|
+
true
|
125
|
+
end
|
126
|
+
|
127
|
+
# Merge each elements of not-look-aheads
|
128
|
+
def merge_not_look_ahead_elems(offset, sub_results)
|
129
|
+
if Regextest::Back::Result === sub_results
|
130
|
+
term_offset = offset + sub_results.end_offset - 1
|
131
|
+
else
|
132
|
+
term_offset = offset + sub_results.size - 1
|
133
|
+
end
|
134
|
+
try_order = TstShuffle(sub_results.size.times.to_a)
|
135
|
+
|
136
|
+
found = false
|
137
|
+
# exclude, at least, one element
|
138
|
+
try_order.each do | j |
|
139
|
+
results_work = @results.dup
|
140
|
+
cur_offset = offset + j
|
141
|
+
|
142
|
+
offset.step(term_offset-1).each do | i |
|
143
|
+
sub_elem = sub_results[i-offset]
|
144
|
+
|
145
|
+
if i < results_work.size # it is NOT @end_offset
|
146
|
+
if i == cur_offset
|
147
|
+
if(!results_work[i].exclude(sub_elem))
|
148
|
+
next
|
149
|
+
else
|
150
|
+
found = true
|
151
|
+
end
|
152
|
+
else
|
153
|
+
# do nothing
|
154
|
+
end
|
155
|
+
else
|
156
|
+
if i == cur_offset
|
157
|
+
if(reverse_work = sub_elem.reverse)
|
158
|
+
results_work.push reverse_work
|
159
|
+
found = true
|
160
|
+
else
|
161
|
+
results_work.push(Regextest::Back::Element.any_char)
|
162
|
+
end
|
163
|
+
else
|
164
|
+
results_work.push(Regextest::Back::Element.any_char)
|
165
|
+
end
|
166
|
+
end
|
167
|
+
end
|
168
|
+
if found
|
169
|
+
@results = results_work
|
170
|
+
break
|
171
|
+
end
|
172
|
+
end
|
173
|
+
# pp @results
|
174
|
+
# puts "found = #{found}"
|
175
|
+
found
|
176
|
+
end
|
177
|
+
|
178
|
+
# Merge results of look behind
|
179
|
+
def merge_look_behind
|
180
|
+
@look_behinds.each do | elem |
|
181
|
+
offset = elem[:offset]
|
182
|
+
sub_results = elem[:results]
|
183
|
+
command = elem[:cmd]
|
184
|
+
|
185
|
+
merge_anchors(offset, sub_results)
|
186
|
+
case command
|
187
|
+
when :CMD_LOOK_BEHIND
|
188
|
+
if !merge_look_behind_elems(offset, sub_results)
|
189
|
+
return nil
|
190
|
+
end
|
191
|
+
when :CMD_NOT_LOOK_BEHIND
|
192
|
+
if !merge_not_look_behind_elems(offset, sub_results)
|
193
|
+
return nil
|
194
|
+
end
|
195
|
+
else
|
196
|
+
raise "invalid command at merge_look_behind: #{command}"
|
197
|
+
end
|
198
|
+
end
|
199
|
+
true
|
200
|
+
end
|
201
|
+
|
202
|
+
# Merge each elements of look behinds
|
203
|
+
def merge_look_behind_elems(offset, sub_results)
|
204
|
+
unshift_length = sub_results.end_offset - offset
|
205
|
+
if unshift_length > 0
|
206
|
+
# @results = sub_results[0..(unshift_length-1)] + @results
|
207
|
+
if !unshift_params(unshift_length)
|
208
|
+
return false
|
209
|
+
end
|
210
|
+
end
|
211
|
+
|
212
|
+
# intersect elems
|
213
|
+
results_offset = (unshift_length > 0)?0:(offset-sub_results.end_offset)
|
214
|
+
pre_part = []
|
215
|
+
0.step(sub_results.end_offset-1) do | i |
|
216
|
+
sub_elem = sub_results[i]
|
217
|
+
if i < unshift_length
|
218
|
+
pre_part.push sub_elem
|
219
|
+
else
|
220
|
+
if(!@results[i-unshift_length].intersect(sub_elem))
|
221
|
+
return nil
|
222
|
+
end
|
223
|
+
end
|
224
|
+
end
|
225
|
+
@results = pre_part + @results
|
226
|
+
true
|
227
|
+
end
|
228
|
+
|
229
|
+
# Merge each elements of not look behinds
|
230
|
+
def merge_not_look_behind_elems(offset, sub_results)
|
231
|
+
unshift_length = sub_results.end_offset - offset
|
232
|
+
if unshift_length > 0
|
233
|
+
if !unshift_params(unshift_length)
|
234
|
+
return false
|
235
|
+
end
|
236
|
+
end
|
237
|
+
|
238
|
+
try_order = TstShuffle(sub_results.size.times.to_a)
|
239
|
+
found = false
|
240
|
+
# exclude, at least, one element
|
241
|
+
try_order.each do | j |
|
242
|
+
results_work = @results.dup
|
243
|
+
|
244
|
+
# intersect elems
|
245
|
+
results_offset = (unshift_length > 0)?0:(offset-sub_results.end_offset)
|
246
|
+
0.step(sub_results.end_offset-1) do | i |
|
247
|
+
sub_elem = sub_results[i]
|
248
|
+
|
249
|
+
if i < unshift_length
|
250
|
+
if i == j
|
251
|
+
results_work.unshift (sub_elem.reverse)
|
252
|
+
found = true
|
253
|
+
else
|
254
|
+
results_work.unshift (Regextest::Back::Element.any_char)
|
255
|
+
end
|
256
|
+
else
|
257
|
+
if i == j
|
258
|
+
if(!results_work[results_offset+i].exclude(sub_elem))
|
259
|
+
next
|
260
|
+
else
|
261
|
+
found = true
|
262
|
+
end
|
263
|
+
else
|
264
|
+
# do nothing
|
265
|
+
end
|
266
|
+
end
|
267
|
+
end
|
268
|
+
if found
|
269
|
+
@results = results_work
|
270
|
+
break
|
271
|
+
end
|
272
|
+
end
|
273
|
+
found
|
274
|
+
end
|
275
|
+
|
276
|
+
# Merge anchors
|
277
|
+
def merge_anchors(offset, sub_results)
|
278
|
+
sub_results.positional_anchors.each do | key, value |
|
279
|
+
@positional_anchors[key] ||= []
|
280
|
+
@positional_anchors[key] |= value.map{|elem| elem + offset}
|
281
|
+
end
|
282
|
+
end
|
283
|
+
|
284
|
+
# unshift parameters
|
285
|
+
def unshift_params(unshift_length)
|
286
|
+
@look_aheads.each{|elem| elem[:offset] += unshift_length}
|
287
|
+
@look_behinds.each{|elem| elem[:offset] += unshift_length}
|
288
|
+
@positional_anchors.each do | cmd, offsets |
|
289
|
+
return false if(cmd == :CMD_ANC_STRING_BEGIN)
|
290
|
+
offsets.map!{| offset | offset += unshift_length}
|
291
|
+
end
|
292
|
+
@start_offset += unshift_length
|
293
|
+
@end_offset += unshift_length
|
294
|
+
true
|
295
|
+
end
|
296
|
+
|
297
|
+
# narrow down candidate by anchors
|
298
|
+
def narrow_down
|
299
|
+
narrow_down_by_anchors &&
|
300
|
+
narrow_down_by_reluctant_repeat
|
301
|
+
end
|
302
|
+
|
303
|
+
# narrow down candidate by reluctant repeat
|
304
|
+
def narrow_down_by_reluctant_repeat
|
305
|
+
@reluctant_repeat.each do | repeat_id, offsets |
|
306
|
+
repeat_part = @results[offsets[0]...offsets[1]]
|
307
|
+
succeed_part = @results[offsets[1]..-1]
|
308
|
+
# puts "id=#{repeat_id}, start=#{repeat_part}, end=#{succeed_part}"
|
309
|
+
|
310
|
+
if succeed_part.size > 0
|
311
|
+
# reluctant repeat is equivalent to not_look_ahead!
|
312
|
+
(offsets[0]..(offsets[1] - succeed_part.size)).to_a.each do | offset |
|
313
|
+
if !merge_not_look_ahead_elems(offset, succeed_part)
|
314
|
+
return false
|
315
|
+
end
|
316
|
+
end
|
317
|
+
end
|
318
|
+
end
|
319
|
+
return true
|
320
|
+
end
|
321
|
+
|
322
|
+
# narrow down candidate by anchors
|
323
|
+
def narrow_down_by_anchors
|
324
|
+
@positional_anchors.each do | cmd, offsets |
|
325
|
+
case cmd
|
326
|
+
when :CMD_ANC_STRING_BEGIN, :CMD_ANC_MATCH_START
|
327
|
+
return false if offsets.max > 0
|
328
|
+
when :CMD_ANC_STRING_END
|
329
|
+
return false if offsets.min < (@results.size() - 1)
|
330
|
+
when :CMD_ANC_STRING_END2
|
331
|
+
min_offset = offsets.min
|
332
|
+
if min_offset < (@results.size() -1)
|
333
|
+
return false
|
334
|
+
elsif min_offset == (@results.size() -1)
|
335
|
+
if @results[min_offset].new_line?
|
336
|
+
@results[min_offset].set_new_line
|
337
|
+
else
|
338
|
+
return false
|
339
|
+
end
|
340
|
+
end
|
341
|
+
when :CMD_ANC_LINE_BEGIN
|
342
|
+
offsets.each do | offset |
|
343
|
+
if offset == 0
|
344
|
+
# ok
|
345
|
+
elsif @results[offset-1].new_line?
|
346
|
+
@results[offset-1].set_new_line
|
347
|
+
else
|
348
|
+
return false
|
349
|
+
end
|
350
|
+
end
|
351
|
+
when :CMD_ANC_LINE_END
|
352
|
+
offsets.each do | offset |
|
353
|
+
if offset == @results.size
|
354
|
+
# ok
|
355
|
+
elsif @results[offset].new_line?
|
356
|
+
@results[offset].set_new_line
|
357
|
+
else
|
358
|
+
return false
|
359
|
+
end
|
360
|
+
end
|
361
|
+
when :CMD_ANC_WORD_BOUND
|
362
|
+
offsets.uniq.size.times do | i |
|
363
|
+
offset = offsets[i]
|
364
|
+
# puts "before offset:#{offset} #{@results}"
|
365
|
+
if offset > 0 && offset < @results.size
|
366
|
+
if !bound_process(@results[offset-1], @results[offset])
|
367
|
+
return false
|
368
|
+
end
|
369
|
+
elsif @results.size == 0
|
370
|
+
@results.push (Regextest::Back::Element.any_char)
|
371
|
+
@results.push (Regextest::Back::Element.any_char)
|
372
|
+
bound_process(@results[0], @results[1])
|
373
|
+
elsif offset == @results.size
|
374
|
+
@results.push (Regextest::Back::Element.any_char)
|
375
|
+
if !bound_process(@results[-2], @results[-1])
|
376
|
+
return false
|
377
|
+
end
|
378
|
+
elsif offset == 0
|
379
|
+
if !unshift_params(1)
|
380
|
+
return false
|
381
|
+
end
|
382
|
+
@results.unshift (Regextest::Back::Element.any_char)
|
383
|
+
if !bound_process(@results[0], @results[1])
|
384
|
+
return false
|
385
|
+
end
|
386
|
+
end
|
387
|
+
end
|
388
|
+
when :CMD_ANC_WORD_UNBOUND
|
389
|
+
offsets.uniq.size.times do | i |
|
390
|
+
offset = offsets[i]
|
391
|
+
# puts "before offset:#{offset} #{@results}"
|
392
|
+
if offset > 0 && offset < @results.size
|
393
|
+
if !unbound_process(@results[offset-1], @results[offset])
|
394
|
+
return false
|
395
|
+
end
|
396
|
+
elsif @results.size == 0
|
397
|
+
@results.push (Regextest::Back::Element.any_char)
|
398
|
+
@results.push (Regextest::Back::Element.any_char)
|
399
|
+
unbound_process(@results[0], @results[1])
|
400
|
+
elsif offset == @results.size
|
401
|
+
@results.push (Regextest::Back::Element.any_char)
|
402
|
+
if !unbound_process(@results[-2], @results[-1])
|
403
|
+
return false
|
404
|
+
end
|
405
|
+
elsif offset == 0
|
406
|
+
if !unshift_params(1)
|
407
|
+
return false
|
408
|
+
end
|
409
|
+
@results.unshift (Regextest::Back::Element.any_char)
|
410
|
+
if !unbound_process(@results[0], @results[1])
|
411
|
+
return false
|
412
|
+
end
|
413
|
+
end
|
414
|
+
end
|
415
|
+
when :CMD_ANC_LOOK_BEHIND2
|
416
|
+
@start_offset = offsets.max
|
417
|
+
else
|
418
|
+
raise "command (#{cmd}) not implemented"
|
419
|
+
end
|
420
|
+
end
|
421
|
+
return true
|
422
|
+
end
|
423
|
+
|
424
|
+
# bound process (\b)
|
425
|
+
def bound_process(elem1, elem2)
|
426
|
+
if elem1.word_elements?
|
427
|
+
elem2.set_non_word_elements
|
428
|
+
elsif elem1.non_word_elements?
|
429
|
+
elem2.set_word_elements
|
430
|
+
elsif elem2.word_elements?
|
431
|
+
elem1.set_non_word_elements
|
432
|
+
elsif elem2.non_word_elements?
|
433
|
+
elem1.set_word_elements
|
434
|
+
else
|
435
|
+
if(TstRand(2)==0)
|
436
|
+
elem1.set_word_elements
|
437
|
+
elem2.set_non_word_elements
|
438
|
+
else
|
439
|
+
elem1.set_non_word_elements
|
440
|
+
elem2.set_word_elements
|
441
|
+
end
|
442
|
+
end
|
443
|
+
if elem1.empty? || elem2.empty?
|
444
|
+
return false
|
445
|
+
end
|
446
|
+
true
|
447
|
+
end
|
448
|
+
|
449
|
+
# unbound process (\B)
|
450
|
+
def unbound_process(elem1, elem2)
|
451
|
+
if elem1.word_elements?
|
452
|
+
elem2.set_word_elements
|
453
|
+
elsif elem1.non_word_elements?
|
454
|
+
elem2.set_non_word_elements
|
455
|
+
elsif elem2.word_elements?
|
456
|
+
elem1.set_word_elements
|
457
|
+
elsif elem2.non_word_elements?
|
458
|
+
elem1.set_non_word_elements
|
459
|
+
else
|
460
|
+
if(TstRand(2)==0)
|
461
|
+
elem1.set_word_elements
|
462
|
+
elem2.set_word_elements
|
463
|
+
else
|
464
|
+
elem1.set_non_word_elements
|
465
|
+
elem2.set_non_word_elements
|
466
|
+
end
|
467
|
+
end
|
468
|
+
if elem1.empty? || elem2.empty?
|
469
|
+
return false
|
470
|
+
end
|
471
|
+
true
|
472
|
+
end
|
473
|
+
|
474
|
+
# Fixes results
|
475
|
+
def fix
|
476
|
+
@pre_match = fix_part(0, @start_offset-1)
|
477
|
+
@match = fix_part(@start_offset, @end_offset-1)
|
478
|
+
@post_match = fix_part(@end_offset, @results.size-1)
|
479
|
+
|
480
|
+
@pre_match + @match + @post_match
|
481
|
+
end
|
482
|
+
|
483
|
+
# Fixes part of results
|
484
|
+
def fix_part(start_offset, end_offset)
|
485
|
+
result = ""
|
486
|
+
start_offset.step(end_offset).each do | i |
|
487
|
+
result += @results[i].random_fix
|
488
|
+
end
|
489
|
+
result
|
490
|
+
end
|
491
|
+
|
492
|
+
end
|
493
|
+
|
494
|
+
# Test suite (execute when this file is specified in command line)
|
495
|
+
if __FILE__ == $0
|
496
|
+
end
|
497
|
+
|
498
|
+
|