regextest 0.1.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +11 -0
- data/.rspec +2 -0
- data/.travis.yml +3 -0
- data/Gemfile +4 -0
- data/LICENSE.txt +25 -0
- data/README.md +88 -0
- data/Rakefile +55 -0
- data/bin/console +14 -0
- data/bin/regextest +4 -0
- data/bin/setup +7 -0
- data/contrib/Onigmo/RE.txt +522 -0
- data/contrib/Onigmo/UnicodeProps.txt +728 -0
- data/contrib/Onigmo/testpy.py +1319 -0
- data/contrib/unicode/Blocks.txt +298 -0
- data/contrib/unicode/CaseFolding.txt +1414 -0
- data/contrib/unicode/DerivedAge.txt +1538 -0
- data/contrib/unicode/DerivedCoreProperties.txt +11029 -0
- data/contrib/unicode/PropList.txt +1525 -0
- data/contrib/unicode/PropertyAliases.txt +193 -0
- data/contrib/unicode/PropertyValueAliases.txt +1420 -0
- data/contrib/unicode/README.txt +25 -0
- data/contrib/unicode/Scripts.txt +2539 -0
- data/contrib/unicode/UnicodeData.txt +29215 -0
- data/lib/pre-case-folding.rb +101 -0
- data/lib/pre-posix-char-class.rb +150 -0
- data/lib/pre-unicode.rb +116 -0
- data/lib/regextest.rb +268 -0
- data/lib/regextest/back.rb +58 -0
- data/lib/regextest/back/element.rb +151 -0
- data/lib/regextest/back/main.rb +356 -0
- data/lib/regextest/back/result.rb +498 -0
- data/lib/regextest/back/test-case.rb +268 -0
- data/lib/regextest/back/work-thread.rb +119 -0
- data/lib/regextest/common.rb +63 -0
- data/lib/regextest/front.rb +60 -0
- data/lib/regextest/front/anchor.rb +45 -0
- data/lib/regextest/front/back-refer.rb +120 -0
- data/lib/regextest/front/bracket-parser.rb +400 -0
- data/lib/regextest/front/bracket-parser.y +117 -0
- data/lib/regextest/front/bracket-scanner.rb +124 -0
- data/lib/regextest/front/bracket.rb +64 -0
- data/lib/regextest/front/builtin-functions.rb +31 -0
- data/lib/regextest/front/case-folding.rb +18 -0
- data/lib/regextest/front/char-class.rb +243 -0
- data/lib/regextest/front/empty.rb +43 -0
- data/lib/regextest/front/letter.rb +327 -0
- data/lib/regextest/front/manage-parentheses.rb +74 -0
- data/lib/regextest/front/parenthesis.rb +153 -0
- data/lib/regextest/front/parser.rb +1366 -0
- data/lib/regextest/front/parser.y +271 -0
- data/lib/regextest/front/range.rb +60 -0
- data/lib/regextest/front/repeat.rb +90 -0
- data/lib/regextest/front/repeatable.rb +77 -0
- data/lib/regextest/front/scanner.rb +187 -0
- data/lib/regextest/front/selectable.rb +65 -0
- data/lib/regextest/front/sequence.rb +73 -0
- data/lib/regextest/front/unicode.rb +1272 -0
- data/lib/regextest/regex-option.rb +144 -0
- data/lib/regextest/regexp.rb +44 -0
- data/lib/regextest/version.rb +5 -0
- data/lib/tst-reg-test.rb +159 -0
- data/regextest.gemspec +26 -0
- metadata +162 -0
@@ -0,0 +1,498 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
# Copyright (C) 2016 Mikio Ikoma
|
4
|
+
|
5
|
+
require 'regextest/common'
|
6
|
+
require 'regextest/back/element'
|
7
|
+
|
8
|
+
class Regextest::Back::Result
|
9
|
+
include Regextest::Common
|
10
|
+
|
11
|
+
def initialize()
|
12
|
+
@results = []
|
13
|
+
@look_aheads = []
|
14
|
+
@look_behinds = []
|
15
|
+
@positional_anchors = {}
|
16
|
+
@reluctant_repeat = {}
|
17
|
+
@start_offset = 0
|
18
|
+
@end_offset = 0
|
19
|
+
@pre_match = nil
|
20
|
+
@match = nil
|
21
|
+
@post_match = nil
|
22
|
+
end
|
23
|
+
|
24
|
+
attr_reader :results, :positional_anchors, :end_offset,
|
25
|
+
:pre_match, :match, :post_match
|
26
|
+
|
27
|
+
# get pre-match string
|
28
|
+
|
29
|
+
# Adds elem
|
30
|
+
def push_body(elem)
|
31
|
+
@results.push elem
|
32
|
+
@end_offset += 1
|
33
|
+
end
|
34
|
+
|
35
|
+
# Offset of an elem
|
36
|
+
def [](offset)
|
37
|
+
@results[offset]
|
38
|
+
end
|
39
|
+
|
40
|
+
# size of results
|
41
|
+
def size
|
42
|
+
@results.size
|
43
|
+
end
|
44
|
+
|
45
|
+
# Adds results of look_ahead
|
46
|
+
def add_look_ahead(command, sub_results)
|
47
|
+
@look_aheads.push({offset: @end_offset, cmd: command, results: sub_results})
|
48
|
+
end
|
49
|
+
|
50
|
+
# Adds results of look_behind
|
51
|
+
def add_look_behind(command, sub_results)
|
52
|
+
@look_behinds.push({offset: @end_offset, cmd: command, results: sub_results})
|
53
|
+
end
|
54
|
+
|
55
|
+
# Adds offset of anchor
|
56
|
+
def add_anchor(cmd)
|
57
|
+
@positional_anchors[cmd] ||= []
|
58
|
+
@positional_anchors[cmd].push @end_offset
|
59
|
+
end
|
60
|
+
|
61
|
+
# Adds reluctant repeat information
|
62
|
+
def add_reluctant_repeat(elem)
|
63
|
+
repeat_id = elem.param[:id]
|
64
|
+
case elem.command
|
65
|
+
when :CMD_ANC_RELUCTANT_BEGIN
|
66
|
+
@reluctant_repeat[repeat_id] = [@end_offset]
|
67
|
+
when :CMD_ANC_RELUCTANT_END
|
68
|
+
if @reluctant_repeat[repeat_id]
|
69
|
+
@reluctant_repeat[repeat_id].push @end_offset
|
70
|
+
else
|
71
|
+
raise "internal error, invalid reluctant_repeat_end command"
|
72
|
+
end
|
73
|
+
else
|
74
|
+
raise "internal error, invalid reluctant_repeat command"
|
75
|
+
end
|
76
|
+
end
|
77
|
+
|
78
|
+
# Merge results of look aheads / behinds
|
79
|
+
def merge
|
80
|
+
merge_look_ahead &&
|
81
|
+
merge_look_behind
|
82
|
+
end
|
83
|
+
|
84
|
+
# Merge results of look aheads
|
85
|
+
def merge_look_ahead
|
86
|
+
@look_aheads.each do | elem |
|
87
|
+
offset = elem[:offset]
|
88
|
+
sub_results = elem[:results]
|
89
|
+
command = elem[:cmd]
|
90
|
+
|
91
|
+
merge_anchors(offset, sub_results)
|
92
|
+
case command
|
93
|
+
when :CMD_LOOK_AHEAD
|
94
|
+
if !merge_look_ahead_elems(offset, sub_results)
|
95
|
+
return nil
|
96
|
+
end
|
97
|
+
when :CMD_NOT_LOOK_AHEAD
|
98
|
+
if !merge_not_look_ahead_elems(offset, sub_results)
|
99
|
+
return nil
|
100
|
+
end
|
101
|
+
else
|
102
|
+
raise "invalid command at merge_look_ahead: #{command}"
|
103
|
+
end
|
104
|
+
end
|
105
|
+
true
|
106
|
+
end
|
107
|
+
|
108
|
+
# Merge each elements of look aheads
|
109
|
+
def merge_look_ahead_elems(offset, sub_results)
|
110
|
+
term_offset = offset + sub_results.end_offset
|
111
|
+
|
112
|
+
# intersect elems
|
113
|
+
offset.step(term_offset-1) do | i |
|
114
|
+
sub_elem = sub_results[i-offset]
|
115
|
+
|
116
|
+
if i < @results.size # it is NOT @end_offset
|
117
|
+
if(!@results[i].intersect(sub_elem))
|
118
|
+
return nil
|
119
|
+
end
|
120
|
+
else
|
121
|
+
@results.push(sub_elem)
|
122
|
+
end
|
123
|
+
end
|
124
|
+
true
|
125
|
+
end
|
126
|
+
|
127
|
+
# Merge each elements of not-look-aheads
|
128
|
+
def merge_not_look_ahead_elems(offset, sub_results)
|
129
|
+
if Regextest::Back::Result === sub_results
|
130
|
+
term_offset = offset + sub_results.end_offset - 1
|
131
|
+
else
|
132
|
+
term_offset = offset + sub_results.size - 1
|
133
|
+
end
|
134
|
+
try_order = TstShuffle(sub_results.size.times.to_a)
|
135
|
+
|
136
|
+
found = false
|
137
|
+
# exclude, at least, one element
|
138
|
+
try_order.each do | j |
|
139
|
+
results_work = @results.dup
|
140
|
+
cur_offset = offset + j
|
141
|
+
|
142
|
+
offset.step(term_offset-1).each do | i |
|
143
|
+
sub_elem = sub_results[i-offset]
|
144
|
+
|
145
|
+
if i < results_work.size # it is NOT @end_offset
|
146
|
+
if i == cur_offset
|
147
|
+
if(!results_work[i].exclude(sub_elem))
|
148
|
+
next
|
149
|
+
else
|
150
|
+
found = true
|
151
|
+
end
|
152
|
+
else
|
153
|
+
# do nothing
|
154
|
+
end
|
155
|
+
else
|
156
|
+
if i == cur_offset
|
157
|
+
if(reverse_work = sub_elem.reverse)
|
158
|
+
results_work.push reverse_work
|
159
|
+
found = true
|
160
|
+
else
|
161
|
+
results_work.push(Regextest::Back::Element.any_char)
|
162
|
+
end
|
163
|
+
else
|
164
|
+
results_work.push(Regextest::Back::Element.any_char)
|
165
|
+
end
|
166
|
+
end
|
167
|
+
end
|
168
|
+
if found
|
169
|
+
@results = results_work
|
170
|
+
break
|
171
|
+
end
|
172
|
+
end
|
173
|
+
# pp @results
|
174
|
+
# puts "found = #{found}"
|
175
|
+
found
|
176
|
+
end
|
177
|
+
|
178
|
+
# Merge results of look behind
|
179
|
+
def merge_look_behind
|
180
|
+
@look_behinds.each do | elem |
|
181
|
+
offset = elem[:offset]
|
182
|
+
sub_results = elem[:results]
|
183
|
+
command = elem[:cmd]
|
184
|
+
|
185
|
+
merge_anchors(offset, sub_results)
|
186
|
+
case command
|
187
|
+
when :CMD_LOOK_BEHIND
|
188
|
+
if !merge_look_behind_elems(offset, sub_results)
|
189
|
+
return nil
|
190
|
+
end
|
191
|
+
when :CMD_NOT_LOOK_BEHIND
|
192
|
+
if !merge_not_look_behind_elems(offset, sub_results)
|
193
|
+
return nil
|
194
|
+
end
|
195
|
+
else
|
196
|
+
raise "invalid command at merge_look_behind: #{command}"
|
197
|
+
end
|
198
|
+
end
|
199
|
+
true
|
200
|
+
end
|
201
|
+
|
202
|
+
# Merge each elements of look behinds
|
203
|
+
def merge_look_behind_elems(offset, sub_results)
|
204
|
+
unshift_length = sub_results.end_offset - offset
|
205
|
+
if unshift_length > 0
|
206
|
+
# @results = sub_results[0..(unshift_length-1)] + @results
|
207
|
+
if !unshift_params(unshift_length)
|
208
|
+
return false
|
209
|
+
end
|
210
|
+
end
|
211
|
+
|
212
|
+
# intersect elems
|
213
|
+
results_offset = (unshift_length > 0)?0:(offset-sub_results.end_offset)
|
214
|
+
pre_part = []
|
215
|
+
0.step(sub_results.end_offset-1) do | i |
|
216
|
+
sub_elem = sub_results[i]
|
217
|
+
if i < unshift_length
|
218
|
+
pre_part.push sub_elem
|
219
|
+
else
|
220
|
+
if(!@results[i-unshift_length].intersect(sub_elem))
|
221
|
+
return nil
|
222
|
+
end
|
223
|
+
end
|
224
|
+
end
|
225
|
+
@results = pre_part + @results
|
226
|
+
true
|
227
|
+
end
|
228
|
+
|
229
|
+
# Merge each elements of not look behinds
|
230
|
+
def merge_not_look_behind_elems(offset, sub_results)
|
231
|
+
unshift_length = sub_results.end_offset - offset
|
232
|
+
if unshift_length > 0
|
233
|
+
if !unshift_params(unshift_length)
|
234
|
+
return false
|
235
|
+
end
|
236
|
+
end
|
237
|
+
|
238
|
+
try_order = TstShuffle(sub_results.size.times.to_a)
|
239
|
+
found = false
|
240
|
+
# exclude, at least, one element
|
241
|
+
try_order.each do | j |
|
242
|
+
results_work = @results.dup
|
243
|
+
|
244
|
+
# intersect elems
|
245
|
+
results_offset = (unshift_length > 0)?0:(offset-sub_results.end_offset)
|
246
|
+
0.step(sub_results.end_offset-1) do | i |
|
247
|
+
sub_elem = sub_results[i]
|
248
|
+
|
249
|
+
if i < unshift_length
|
250
|
+
if i == j
|
251
|
+
results_work.unshift (sub_elem.reverse)
|
252
|
+
found = true
|
253
|
+
else
|
254
|
+
results_work.unshift (Regextest::Back::Element.any_char)
|
255
|
+
end
|
256
|
+
else
|
257
|
+
if i == j
|
258
|
+
if(!results_work[results_offset+i].exclude(sub_elem))
|
259
|
+
next
|
260
|
+
else
|
261
|
+
found = true
|
262
|
+
end
|
263
|
+
else
|
264
|
+
# do nothing
|
265
|
+
end
|
266
|
+
end
|
267
|
+
end
|
268
|
+
if found
|
269
|
+
@results = results_work
|
270
|
+
break
|
271
|
+
end
|
272
|
+
end
|
273
|
+
found
|
274
|
+
end
|
275
|
+
|
276
|
+
# Merge anchors
|
277
|
+
def merge_anchors(offset, sub_results)
|
278
|
+
sub_results.positional_anchors.each do | key, value |
|
279
|
+
@positional_anchors[key] ||= []
|
280
|
+
@positional_anchors[key] |= value.map{|elem| elem + offset}
|
281
|
+
end
|
282
|
+
end
|
283
|
+
|
284
|
+
# unshift parameters
|
285
|
+
def unshift_params(unshift_length)
|
286
|
+
@look_aheads.each{|elem| elem[:offset] += unshift_length}
|
287
|
+
@look_behinds.each{|elem| elem[:offset] += unshift_length}
|
288
|
+
@positional_anchors.each do | cmd, offsets |
|
289
|
+
return false if(cmd == :CMD_ANC_STRING_BEGIN)
|
290
|
+
offsets.map!{| offset | offset += unshift_length}
|
291
|
+
end
|
292
|
+
@start_offset += unshift_length
|
293
|
+
@end_offset += unshift_length
|
294
|
+
true
|
295
|
+
end
|
296
|
+
|
297
|
+
# narrow down candidate by anchors
|
298
|
+
def narrow_down
|
299
|
+
narrow_down_by_anchors &&
|
300
|
+
narrow_down_by_reluctant_repeat
|
301
|
+
end
|
302
|
+
|
303
|
+
# narrow down candidate by reluctant repeat
|
304
|
+
def narrow_down_by_reluctant_repeat
|
305
|
+
@reluctant_repeat.each do | repeat_id, offsets |
|
306
|
+
repeat_part = @results[offsets[0]...offsets[1]]
|
307
|
+
succeed_part = @results[offsets[1]..-1]
|
308
|
+
# puts "id=#{repeat_id}, start=#{repeat_part}, end=#{succeed_part}"
|
309
|
+
|
310
|
+
if succeed_part.size > 0
|
311
|
+
# reluctant repeat is equivalent to not_look_ahead!
|
312
|
+
(offsets[0]..(offsets[1] - succeed_part.size)).to_a.each do | offset |
|
313
|
+
if !merge_not_look_ahead_elems(offset, succeed_part)
|
314
|
+
return false
|
315
|
+
end
|
316
|
+
end
|
317
|
+
end
|
318
|
+
end
|
319
|
+
return true
|
320
|
+
end
|
321
|
+
|
322
|
+
# narrow down candidate by anchors
|
323
|
+
def narrow_down_by_anchors
|
324
|
+
@positional_anchors.each do | cmd, offsets |
|
325
|
+
case cmd
|
326
|
+
when :CMD_ANC_STRING_BEGIN, :CMD_ANC_MATCH_START
|
327
|
+
return false if offsets.max > 0
|
328
|
+
when :CMD_ANC_STRING_END
|
329
|
+
return false if offsets.min < (@results.size() - 1)
|
330
|
+
when :CMD_ANC_STRING_END2
|
331
|
+
min_offset = offsets.min
|
332
|
+
if min_offset < (@results.size() -1)
|
333
|
+
return false
|
334
|
+
elsif min_offset == (@results.size() -1)
|
335
|
+
if @results[min_offset].new_line?
|
336
|
+
@results[min_offset].set_new_line
|
337
|
+
else
|
338
|
+
return false
|
339
|
+
end
|
340
|
+
end
|
341
|
+
when :CMD_ANC_LINE_BEGIN
|
342
|
+
offsets.each do | offset |
|
343
|
+
if offset == 0
|
344
|
+
# ok
|
345
|
+
elsif @results[offset-1].new_line?
|
346
|
+
@results[offset-1].set_new_line
|
347
|
+
else
|
348
|
+
return false
|
349
|
+
end
|
350
|
+
end
|
351
|
+
when :CMD_ANC_LINE_END
|
352
|
+
offsets.each do | offset |
|
353
|
+
if offset == @results.size
|
354
|
+
# ok
|
355
|
+
elsif @results[offset].new_line?
|
356
|
+
@results[offset].set_new_line
|
357
|
+
else
|
358
|
+
return false
|
359
|
+
end
|
360
|
+
end
|
361
|
+
when :CMD_ANC_WORD_BOUND
|
362
|
+
offsets.uniq.size.times do | i |
|
363
|
+
offset = offsets[i]
|
364
|
+
# puts "before offset:#{offset} #{@results}"
|
365
|
+
if offset > 0 && offset < @results.size
|
366
|
+
if !bound_process(@results[offset-1], @results[offset])
|
367
|
+
return false
|
368
|
+
end
|
369
|
+
elsif @results.size == 0
|
370
|
+
@results.push (Regextest::Back::Element.any_char)
|
371
|
+
@results.push (Regextest::Back::Element.any_char)
|
372
|
+
bound_process(@results[0], @results[1])
|
373
|
+
elsif offset == @results.size
|
374
|
+
@results.push (Regextest::Back::Element.any_char)
|
375
|
+
if !bound_process(@results[-2], @results[-1])
|
376
|
+
return false
|
377
|
+
end
|
378
|
+
elsif offset == 0
|
379
|
+
if !unshift_params(1)
|
380
|
+
return false
|
381
|
+
end
|
382
|
+
@results.unshift (Regextest::Back::Element.any_char)
|
383
|
+
if !bound_process(@results[0], @results[1])
|
384
|
+
return false
|
385
|
+
end
|
386
|
+
end
|
387
|
+
end
|
388
|
+
when :CMD_ANC_WORD_UNBOUND
|
389
|
+
offsets.uniq.size.times do | i |
|
390
|
+
offset = offsets[i]
|
391
|
+
# puts "before offset:#{offset} #{@results}"
|
392
|
+
if offset > 0 && offset < @results.size
|
393
|
+
if !unbound_process(@results[offset-1], @results[offset])
|
394
|
+
return false
|
395
|
+
end
|
396
|
+
elsif @results.size == 0
|
397
|
+
@results.push (Regextest::Back::Element.any_char)
|
398
|
+
@results.push (Regextest::Back::Element.any_char)
|
399
|
+
unbound_process(@results[0], @results[1])
|
400
|
+
elsif offset == @results.size
|
401
|
+
@results.push (Regextest::Back::Element.any_char)
|
402
|
+
if !unbound_process(@results[-2], @results[-1])
|
403
|
+
return false
|
404
|
+
end
|
405
|
+
elsif offset == 0
|
406
|
+
if !unshift_params(1)
|
407
|
+
return false
|
408
|
+
end
|
409
|
+
@results.unshift (Regextest::Back::Element.any_char)
|
410
|
+
if !unbound_process(@results[0], @results[1])
|
411
|
+
return false
|
412
|
+
end
|
413
|
+
end
|
414
|
+
end
|
415
|
+
when :CMD_ANC_LOOK_BEHIND2
|
416
|
+
@start_offset = offsets.max
|
417
|
+
else
|
418
|
+
raise "command (#{cmd}) not implemented"
|
419
|
+
end
|
420
|
+
end
|
421
|
+
return true
|
422
|
+
end
|
423
|
+
|
424
|
+
# bound process (\b)
|
425
|
+
def bound_process(elem1, elem2)
|
426
|
+
if elem1.word_elements?
|
427
|
+
elem2.set_non_word_elements
|
428
|
+
elsif elem1.non_word_elements?
|
429
|
+
elem2.set_word_elements
|
430
|
+
elsif elem2.word_elements?
|
431
|
+
elem1.set_non_word_elements
|
432
|
+
elsif elem2.non_word_elements?
|
433
|
+
elem1.set_word_elements
|
434
|
+
else
|
435
|
+
if(TstRand(2)==0)
|
436
|
+
elem1.set_word_elements
|
437
|
+
elem2.set_non_word_elements
|
438
|
+
else
|
439
|
+
elem1.set_non_word_elements
|
440
|
+
elem2.set_word_elements
|
441
|
+
end
|
442
|
+
end
|
443
|
+
if elem1.empty? || elem2.empty?
|
444
|
+
return false
|
445
|
+
end
|
446
|
+
true
|
447
|
+
end
|
448
|
+
|
449
|
+
# unbound process (\B)
|
450
|
+
def unbound_process(elem1, elem2)
|
451
|
+
if elem1.word_elements?
|
452
|
+
elem2.set_word_elements
|
453
|
+
elsif elem1.non_word_elements?
|
454
|
+
elem2.set_non_word_elements
|
455
|
+
elsif elem2.word_elements?
|
456
|
+
elem1.set_word_elements
|
457
|
+
elsif elem2.non_word_elements?
|
458
|
+
elem1.set_non_word_elements
|
459
|
+
else
|
460
|
+
if(TstRand(2)==0)
|
461
|
+
elem1.set_word_elements
|
462
|
+
elem2.set_word_elements
|
463
|
+
else
|
464
|
+
elem1.set_non_word_elements
|
465
|
+
elem2.set_non_word_elements
|
466
|
+
end
|
467
|
+
end
|
468
|
+
if elem1.empty? || elem2.empty?
|
469
|
+
return false
|
470
|
+
end
|
471
|
+
true
|
472
|
+
end
|
473
|
+
|
474
|
+
# Fixes results
|
475
|
+
def fix
|
476
|
+
@pre_match = fix_part(0, @start_offset-1)
|
477
|
+
@match = fix_part(@start_offset, @end_offset-1)
|
478
|
+
@post_match = fix_part(@end_offset, @results.size-1)
|
479
|
+
|
480
|
+
@pre_match + @match + @post_match
|
481
|
+
end
|
482
|
+
|
483
|
+
# Fixes part of results
|
484
|
+
def fix_part(start_offset, end_offset)
|
485
|
+
result = ""
|
486
|
+
start_offset.step(end_offset).each do | i |
|
487
|
+
result += @results[i].random_fix
|
488
|
+
end
|
489
|
+
result
|
490
|
+
end
|
491
|
+
|
492
|
+
end
|
493
|
+
|
494
|
+
# Test suite (execute when this file is specified in command line)
|
495
|
+
if __FILE__ == $0
|
496
|
+
end
|
497
|
+
|
498
|
+
|