regextest 0.1.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.gitignore +11 -0
- data/.rspec +2 -0
- data/.travis.yml +3 -0
- data/Gemfile +4 -0
- data/LICENSE.txt +25 -0
- data/README.md +88 -0
- data/Rakefile +55 -0
- data/bin/console +14 -0
- data/bin/regextest +4 -0
- data/bin/setup +7 -0
- data/contrib/Onigmo/RE.txt +522 -0
- data/contrib/Onigmo/UnicodeProps.txt +728 -0
- data/contrib/Onigmo/testpy.py +1319 -0
- data/contrib/unicode/Blocks.txt +298 -0
- data/contrib/unicode/CaseFolding.txt +1414 -0
- data/contrib/unicode/DerivedAge.txt +1538 -0
- data/contrib/unicode/DerivedCoreProperties.txt +11029 -0
- data/contrib/unicode/PropList.txt +1525 -0
- data/contrib/unicode/PropertyAliases.txt +193 -0
- data/contrib/unicode/PropertyValueAliases.txt +1420 -0
- data/contrib/unicode/README.txt +25 -0
- data/contrib/unicode/Scripts.txt +2539 -0
- data/contrib/unicode/UnicodeData.txt +29215 -0
- data/lib/pre-case-folding.rb +101 -0
- data/lib/pre-posix-char-class.rb +150 -0
- data/lib/pre-unicode.rb +116 -0
- data/lib/regextest.rb +268 -0
- data/lib/regextest/back.rb +58 -0
- data/lib/regextest/back/element.rb +151 -0
- data/lib/regextest/back/main.rb +356 -0
- data/lib/regextest/back/result.rb +498 -0
- data/lib/regextest/back/test-case.rb +268 -0
- data/lib/regextest/back/work-thread.rb +119 -0
- data/lib/regextest/common.rb +63 -0
- data/lib/regextest/front.rb +60 -0
- data/lib/regextest/front/anchor.rb +45 -0
- data/lib/regextest/front/back-refer.rb +120 -0
- data/lib/regextest/front/bracket-parser.rb +400 -0
- data/lib/regextest/front/bracket-parser.y +117 -0
- data/lib/regextest/front/bracket-scanner.rb +124 -0
- data/lib/regextest/front/bracket.rb +64 -0
- data/lib/regextest/front/builtin-functions.rb +31 -0
- data/lib/regextest/front/case-folding.rb +18 -0
- data/lib/regextest/front/char-class.rb +243 -0
- data/lib/regextest/front/empty.rb +43 -0
- data/lib/regextest/front/letter.rb +327 -0
- data/lib/regextest/front/manage-parentheses.rb +74 -0
- data/lib/regextest/front/parenthesis.rb +153 -0
- data/lib/regextest/front/parser.rb +1366 -0
- data/lib/regextest/front/parser.y +271 -0
- data/lib/regextest/front/range.rb +60 -0
- data/lib/regextest/front/repeat.rb +90 -0
- data/lib/regextest/front/repeatable.rb +77 -0
- data/lib/regextest/front/scanner.rb +187 -0
- data/lib/regextest/front/selectable.rb +65 -0
- data/lib/regextest/front/sequence.rb +73 -0
- data/lib/regextest/front/unicode.rb +1272 -0
- data/lib/regextest/regex-option.rb +144 -0
- data/lib/regextest/regexp.rb +44 -0
- data/lib/regextest/version.rb +5 -0
- data/lib/tst-reg-test.rb +159 -0
- data/regextest.gemspec +26 -0
- metadata +162 -0
@@ -0,0 +1,58 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
# Copyright (C) 2016 Mikio Ikoma
|
4
|
+
|
5
|
+
class Regextest::Back; end
|
6
|
+
require 'regextest/common'
|
7
|
+
require 'regextest/back/main'
|
8
|
+
|
9
|
+
# Backend class of regextest. Generate matched string
|
10
|
+
class Regextest::Back
|
11
|
+
include Regextest::Common
|
12
|
+
|
13
|
+
# Constructor
|
14
|
+
def initialize(json_obj)
|
15
|
+
@reg_source = @@parse_options[:reg_source]
|
16
|
+
@json_obj = json_obj
|
17
|
+
|
18
|
+
# COMMENTED OUT at present
|
19
|
+
# make a hash to manage names and corresponding objects
|
20
|
+
# @name_hash = make_name_hash(@json_obj, {})
|
21
|
+
# get test cases (commented at present)
|
22
|
+
# @test_info = Regextest::Back::TestCase.new(@json_obj, @name_hash)
|
23
|
+
|
24
|
+
# default max recursion is 8.
|
25
|
+
@max_nest = TstConstRecursionMax
|
26
|
+
end
|
27
|
+
|
28
|
+
# A public method that generates string to match the regexp
|
29
|
+
def generate
|
30
|
+
generate_obj = Regextest::Back::Main.new(@json_obj, @max_nest)
|
31
|
+
generate_obj.generate
|
32
|
+
end
|
33
|
+
|
34
|
+
# make a hash to manage names and corresponding objects
|
35
|
+
def make_name_hash(target, name_hash)
|
36
|
+
# register id (and refer-name in case of parenthesis)
|
37
|
+
raise "Internal error: found duplicate id #{target["id"]}" if target["id"] && name_hash[target["id"]]
|
38
|
+
name_hash[target["id"]] = target
|
39
|
+
name_hash[target["refer_name"]] = target if(target["type"] == "LEX_PAREN")
|
40
|
+
|
41
|
+
# recursively register names
|
42
|
+
if(target["value"])
|
43
|
+
if( Array === target["value"])
|
44
|
+
target["value"].each{|child| make_name_hash(child, name_hash)}
|
45
|
+
else
|
46
|
+
make_name_hash(target["value"], name_hash)
|
47
|
+
end
|
48
|
+
end
|
49
|
+
name_hash
|
50
|
+
end
|
51
|
+
|
52
|
+
end
|
53
|
+
|
54
|
+
# Test suite (execute when this file is specified in command line)
|
55
|
+
if __FILE__ == $0
|
56
|
+
|
57
|
+
end
|
58
|
+
|
@@ -0,0 +1,151 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
# Copyright (C) 2016 Mikio Ikoma
|
4
|
+
|
5
|
+
require "pp"
|
6
|
+
require 'regextest/common'
|
7
|
+
|
8
|
+
class Regextest::Back::Element
|
9
|
+
include Regextest::Common
|
10
|
+
def initialize(param)
|
11
|
+
@command = param[:cmd]
|
12
|
+
@param = param
|
13
|
+
@candidates = param[:data] if @command == :CMD_SELECT
|
14
|
+
end
|
15
|
+
|
16
|
+
attr_reader :param, :command, :candidates
|
17
|
+
|
18
|
+
# random fix
|
19
|
+
def random_fix
|
20
|
+
if @command == :CMD_SELECT
|
21
|
+
offset = (@candidates.size > 1)?TstRand(@candidates.size):0
|
22
|
+
result = @candidates[offset]
|
23
|
+
@candidates = [result] # fixed!
|
24
|
+
else
|
25
|
+
raise "invalid command at random_fix: #{@command}"
|
26
|
+
end
|
27
|
+
result
|
28
|
+
end
|
29
|
+
|
30
|
+
# size of candidates
|
31
|
+
def size
|
32
|
+
if(@candidates)
|
33
|
+
@candidates.size
|
34
|
+
else
|
35
|
+
raise "internal error: candidates not found at size-method"
|
36
|
+
end
|
37
|
+
end
|
38
|
+
|
39
|
+
# [] of candidates
|
40
|
+
def [](num)
|
41
|
+
if(@candidates)
|
42
|
+
@candidates[num]
|
43
|
+
else
|
44
|
+
raise "internal error: candidates not found at at-method"
|
45
|
+
end
|
46
|
+
end
|
47
|
+
|
48
|
+
# narrow down candidates
|
49
|
+
def intersect(other_obj)
|
50
|
+
raise "invalid command at intersect" if(other_obj.command != :CMD_SELECT)
|
51
|
+
work = @candidates & other_obj.candidates
|
52
|
+
if work.size > 0
|
53
|
+
@candidates = work
|
54
|
+
else
|
55
|
+
nil
|
56
|
+
end
|
57
|
+
end
|
58
|
+
|
59
|
+
# exclude
|
60
|
+
def exclude(other_obj)
|
61
|
+
raise "invalid command at exclude" if(other_obj.command != :CMD_SELECT)
|
62
|
+
work = @candidates - other_obj.candidates
|
63
|
+
if work.size > 0
|
64
|
+
@candidates = work
|
65
|
+
else
|
66
|
+
nil
|
67
|
+
end
|
68
|
+
end
|
69
|
+
|
70
|
+
# join candidates
|
71
|
+
def union(other_obj)
|
72
|
+
raise "invalid command at union" if(other_obj.command != :CMD_SELECT)
|
73
|
+
#@candidates |= other_obj.candidates
|
74
|
+
@candidates += other_obj.candidates # to be faster
|
75
|
+
end
|
76
|
+
|
77
|
+
# for simple pretty print
|
78
|
+
def inspect
|
79
|
+
case @command
|
80
|
+
when :CMD_SELECT
|
81
|
+
if(@candidates)
|
82
|
+
@candidates.inspect
|
83
|
+
else
|
84
|
+
@param[:data].inspect
|
85
|
+
end
|
86
|
+
when :CMD_LOOK_BEHIND, :CMD_LOOK_AHEAD, :CMD_NOT_LOOK_BEHIND, :CMD_NOT_LOOK_AHEAD
|
87
|
+
@param.inspect
|
88
|
+
when :CMD_ANC_LINE_BEGIN, :CMD_ANC_LINE_END, :CMD_ANC_WORD_BOUND, :CMD_ANC_WORD_UNBOUND,
|
89
|
+
:CMD_ANC_STRING_BEGIN, :CMD_ANC_STRING_END, :CMD_ANC_STRING_END2, :CMD_ANC_MATCH_START,
|
90
|
+
:CMD_ANC_LOOK_BEHIND2, :CMD_ANC_RELUCTANT_BEGIN, :CMD_ANC_RELUCTANT_END
|
91
|
+
@param.inspect
|
92
|
+
else
|
93
|
+
raise "inner error, invalid command #{@command}"
|
94
|
+
end
|
95
|
+
end
|
96
|
+
|
97
|
+
# Includes new line or not
|
98
|
+
def new_line?
|
99
|
+
@candidates.index("\n")
|
100
|
+
end
|
101
|
+
|
102
|
+
# Sets new line
|
103
|
+
def set_new_line
|
104
|
+
@candidates = ["\n"]
|
105
|
+
end
|
106
|
+
|
107
|
+
# Is word-elements only?
|
108
|
+
def word_elements?
|
109
|
+
@candidates.join("").match(/^\p{Word}+$/)
|
110
|
+
end
|
111
|
+
|
112
|
+
# is non-word-elements only?
|
113
|
+
def non_word_elements?
|
114
|
+
@candidates.join("").match(/^\p{^Word}+$/)
|
115
|
+
end
|
116
|
+
|
117
|
+
# set word-elements
|
118
|
+
def set_word_elements
|
119
|
+
@candidates.select!{|elem| elem.match(/^\w$/)}
|
120
|
+
end
|
121
|
+
|
122
|
+
# set non_word-elements
|
123
|
+
def set_non_word_elements
|
124
|
+
@candidates.select!{|elem| elem.match(/^\W$/)}
|
125
|
+
end
|
126
|
+
|
127
|
+
# checks empty
|
128
|
+
def empty?
|
129
|
+
@candidates.size == 0
|
130
|
+
end
|
131
|
+
|
132
|
+
# factory method to generate any char element
|
133
|
+
def self.any_char
|
134
|
+
# BUG: must consider other character set!
|
135
|
+
Regextest::Back::Element.new({cmd: :CMD_SELECT, data: (" ".."\x7e").to_a})
|
136
|
+
end
|
137
|
+
|
138
|
+
# factory method to generate any char element
|
139
|
+
def reverse
|
140
|
+
@candidates = ((" ".."\x7e").to_a) - @candidates
|
141
|
+
self
|
142
|
+
end
|
143
|
+
|
144
|
+
|
145
|
+
end
|
146
|
+
|
147
|
+
# Test suite (execute when this file is specified in command line)
|
148
|
+
if __FILE__ == $0
|
149
|
+
end
|
150
|
+
|
151
|
+
|
@@ -0,0 +1,356 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
# Copyright (C) 2016 Mikio Ikoma
|
4
|
+
|
5
|
+
require 'regextest/common'
|
6
|
+
require 'regextest/back/element'
|
7
|
+
require 'regextest/back/result'
|
8
|
+
|
9
|
+
# Main class of back-end. Construct candidates array and narrow down
|
10
|
+
class Regextest::Back::Main
|
11
|
+
include Regextest::Common
|
12
|
+
def initialize(json_obj, max_nest)
|
13
|
+
@json_obj = json_obj
|
14
|
+
@max_nest = max_nest
|
15
|
+
@parens_hash = {} # hash to keep string generated by parentheses
|
16
|
+
@nest = 0 # current nest of back-reference
|
17
|
+
@quit_mode = false # flag for preventing from increase of nest
|
18
|
+
# if true, \g<foo> is restrained if possible
|
19
|
+
end
|
20
|
+
|
21
|
+
def generate
|
22
|
+
# seek parentheses because there are references defined ahead
|
23
|
+
seek_parens(@json_obj)
|
24
|
+
|
25
|
+
# generate pre-result of matched string (pre-result contains candidates of letters)
|
26
|
+
pre_result = generate_candidates({json: @json_obj})
|
27
|
+
return nil unless pre_result
|
28
|
+
TstLog("pre_result1:\n" + pre_result.inspect)
|
29
|
+
|
30
|
+
# narrow down the candidates
|
31
|
+
result = narrow_down_candidates(pre_result)
|
32
|
+
TstLog("pre_result2:\n" + result.inspect)
|
33
|
+
return nil if !result || !result.narrow_down
|
34
|
+
|
35
|
+
# fixes result
|
36
|
+
result.fix
|
37
|
+
|
38
|
+
result
|
39
|
+
end
|
40
|
+
|
41
|
+
# seek parentheses
|
42
|
+
def seek_parens(target)
|
43
|
+
if(target["type"] == "LEX_PAREN")
|
44
|
+
@parens_hash[target["refer_name"]] = {:target => target}
|
45
|
+
end
|
46
|
+
if(target["value"])
|
47
|
+
if( Array === target["value"])
|
48
|
+
target["value"].each{|child| seek_parens(child)}
|
49
|
+
else
|
50
|
+
seek_parens(target["value"])
|
51
|
+
end
|
52
|
+
end
|
53
|
+
end
|
54
|
+
|
55
|
+
# generate pre-result of matched string (pre-result contains candidates of letters)
|
56
|
+
def generate_candidates(param)
|
57
|
+
target = param[:json]
|
58
|
+
# puts "MATCH type:#{target["type"]}"
|
59
|
+
|
60
|
+
result = nil
|
61
|
+
case target["type"]
|
62
|
+
when "LEX_SEQ" # sequence of letters or parentheses
|
63
|
+
result = generate_candidates_seq(param)
|
64
|
+
when "LEX_SELECT"
|
65
|
+
result = generate_candidates_select(param)
|
66
|
+
when "LEX_PAREN"
|
67
|
+
result = generate_candidates_paren(param)
|
68
|
+
when "LEX_CHAR_CLASS"
|
69
|
+
result = generate_candidates_char_class(param)
|
70
|
+
when "LEX_BRACKET", "LEX_SIMPLIFIED_CLASS", "LEX_ANY_LETTER", "LEX_POSIX_CHAR_CLASS", "LEX_UNICODE_CLASS"
|
71
|
+
result = generate_candidates({json: target["value"]})
|
72
|
+
when "LEX_REPEAT"
|
73
|
+
result = generate_candidates_repeat(param)
|
74
|
+
when "LEX_RANGE"
|
75
|
+
result = generate_candidates_range(param)
|
76
|
+
when "LEX_BACK_REFER", "LEX_NAMED_REFER"
|
77
|
+
result = generate_candidates_back_refer(param)
|
78
|
+
when "LEX_NAMED_GENERATE"
|
79
|
+
result = generate_candidates_named_generate(param)
|
80
|
+
when "LEX_CHAR"
|
81
|
+
result = generate_candidates_char(param)
|
82
|
+
when "LEX_ANC_LINE_BEGIN"
|
83
|
+
result = Regextest::Back::Element.new({cmd: :CMD_ANC_LINE_BEGIN})
|
84
|
+
when "LEX_ANC_LINE_END"
|
85
|
+
result = Regextest::Back::Element.new({cmd: :CMD_ANC_LINE_END})
|
86
|
+
when "LEX_ANC_WORD_BOUND"
|
87
|
+
result = Regextest::Back::Element.new({cmd: :CMD_ANC_WORD_BOUND})
|
88
|
+
when "LEX_ANC_WORD_UNBOUND"
|
89
|
+
result = Regextest::Back::Element.new({cmd: :CMD_ANC_WORD_UNBOUND})
|
90
|
+
when "LEX_ANC_STRING_BEGIN"
|
91
|
+
result = Regextest::Back::Element.new({cmd: :CMD_ANC_STRING_BEGIN})
|
92
|
+
when "LEX_ANC_STRING_END"
|
93
|
+
result = Regextest::Back::Element.new({cmd: :CMD_ANC_STRING_END})
|
94
|
+
when "LEX_ANC_STRING_END2"
|
95
|
+
result = Regextest::Back::Element.new({cmd: :CMD_ANC_STRING_END2})
|
96
|
+
when "LEX_ANC_MATCH_START"
|
97
|
+
result = Regextest::Back::Element.new({cmd: :CMD_ANC_MATCH_START})
|
98
|
+
when "LEX_ANC_LOOK_BEHIND2"
|
99
|
+
result = Regextest::Back::Element.new({cmd: :CMD_ANC_LOOK_BEHIND2})
|
100
|
+
when "LEX_OPTION_PAREN" # options are processed at front-end
|
101
|
+
result = []
|
102
|
+
when "LEX_EMPTY"
|
103
|
+
result = []
|
104
|
+
else
|
105
|
+
raise "#{target["type"]} not implemented (from generate_candidates routine)"
|
106
|
+
end
|
107
|
+
result
|
108
|
+
end
|
109
|
+
|
110
|
+
# sequence of letters or parentheses
|
111
|
+
def generate_candidates_seq(param)
|
112
|
+
target = param[:json]
|
113
|
+
results = []
|
114
|
+
target["value"].each do |elem|
|
115
|
+
generated_string = generate_candidates({json: elem})
|
116
|
+
if(Array === generated_string)
|
117
|
+
generated_string.flatten!(1)
|
118
|
+
results += generated_string
|
119
|
+
else
|
120
|
+
results.push generated_string
|
121
|
+
end
|
122
|
+
end
|
123
|
+
# nil if one element failed
|
124
|
+
if(results.index(nil))
|
125
|
+
result = nil
|
126
|
+
else
|
127
|
+
# result = results.join("")
|
128
|
+
result = results
|
129
|
+
end
|
130
|
+
result
|
131
|
+
end
|
132
|
+
|
133
|
+
# selection of sequence. such as (aa|b|c)
|
134
|
+
def generate_candidates_select(param)
|
135
|
+
target = param[:json]
|
136
|
+
if param[:forced_select]
|
137
|
+
# index is specified by condition
|
138
|
+
if target["value"][param[:forced_select]]
|
139
|
+
result = generate_candidates({json: target["value"][param[:forced_select]]})
|
140
|
+
else
|
141
|
+
# regexp such as /^(?:b|(a))(?(1)1)$/ match "b"!
|
142
|
+
result = []
|
143
|
+
end
|
144
|
+
else
|
145
|
+
# success if there is at least one result
|
146
|
+
offsets = (0 ... target["value"].size).to_a
|
147
|
+
if !param[:atomic] && offsets.size > 1
|
148
|
+
offsets = TstShuffle(offsets) # shuffle if not atomic group (this proceduce is not sufficient...)
|
149
|
+
end
|
150
|
+
result = nil
|
151
|
+
offsets.each do | offset |
|
152
|
+
result = generate_candidates({json: target["value"][offset]})
|
153
|
+
break if(result)
|
154
|
+
end
|
155
|
+
end
|
156
|
+
result
|
157
|
+
end
|
158
|
+
|
159
|
+
# parenthesis
|
160
|
+
def generate_candidates_paren(param)
|
161
|
+
target = param[:json]
|
162
|
+
# analyze options of the parenthesis
|
163
|
+
paren_prefix = target["prefix"]
|
164
|
+
# pp target["prefix"]
|
165
|
+
if(paren_prefix == "<=")
|
166
|
+
lb_result = generate_candidates({json: target["value"]})
|
167
|
+
result = Regextest::Back::Element.new({cmd: :CMD_LOOK_BEHIND, result: lb_result})
|
168
|
+
elsif(paren_prefix == "=")
|
169
|
+
la_result = generate_candidates({json: target["value"]})
|
170
|
+
result = Regextest::Back::Element.new({cmd: :CMD_LOOK_AHEAD, result: la_result})
|
171
|
+
elsif(paren_prefix == "<!")
|
172
|
+
lb_result = generate_candidates({json: target["value"]})
|
173
|
+
result = Regextest::Back::Element.new({cmd: :CMD_NOT_LOOK_BEHIND, result: lb_result})
|
174
|
+
elsif(paren_prefix == "!")
|
175
|
+
la_result = generate_candidates({json: target["value"]})
|
176
|
+
result = Regextest::Back::Element.new({cmd: :CMD_NOT_LOOK_AHEAD, result: la_result})
|
177
|
+
elsif(paren_prefix == ">") # atomic group
|
178
|
+
generate_string = generate_candidates({json: target["value"], atomic: true})
|
179
|
+
@parens_hash[target["refer_name"]][:generated] ||= []
|
180
|
+
@parens_hash[target["refer_name"]][:generated][@nest] = generate_string
|
181
|
+
result = generate_string
|
182
|
+
elsif(paren_prefix == "") # simple parenthesis
|
183
|
+
generate_string = generate_candidates({json: target["value"]})
|
184
|
+
@parens_hash[target["refer_name"]][:generated] ||= []
|
185
|
+
@parens_hash[target["refer_name"]][:generated][@nest] = generate_string
|
186
|
+
result = generate_string
|
187
|
+
else
|
188
|
+
# when condition is specified
|
189
|
+
select_num = nil
|
190
|
+
if(target["condition_name"] && target["condition_name"].length > 0)
|
191
|
+
if @parens_hash[target["condition_name"]][:generated]
|
192
|
+
select_num = 0
|
193
|
+
else
|
194
|
+
select_num = 1
|
195
|
+
end
|
196
|
+
end
|
197
|
+
|
198
|
+
if(select_num == 1 && target["value"]["type"] != "LEX_SELECT")
|
199
|
+
result = nil
|
200
|
+
else
|
201
|
+
generate_string = generate_candidates({json: target["value"], forced_select: select_num})
|
202
|
+
|
203
|
+
@parens_hash[target["refer_name"]][:generated] ||= []
|
204
|
+
@parens_hash[target["refer_name"]][:generated][@nest] = generate_string
|
205
|
+
result = generate_string
|
206
|
+
end
|
207
|
+
end
|
208
|
+
result
|
209
|
+
end
|
210
|
+
|
211
|
+
# char class
|
212
|
+
def generate_candidates_char_class(param)
|
213
|
+
target = param[:json]
|
214
|
+
results = Regextest::Back::Element.new({cmd: :CMD_SELECT, data: []})
|
215
|
+
target["value"].each do | elem |
|
216
|
+
if sub_results = generate_candidates({json: elem})
|
217
|
+
results.union sub_results
|
218
|
+
end
|
219
|
+
end
|
220
|
+
if results.size > 0
|
221
|
+
result = results
|
222
|
+
else
|
223
|
+
result = nil
|
224
|
+
end
|
225
|
+
result
|
226
|
+
end
|
227
|
+
|
228
|
+
# repeat
|
229
|
+
def generate_candidates_repeat(param)
|
230
|
+
target = param[:json]
|
231
|
+
if(@quit_mode)
|
232
|
+
repeat = target["min_repeat"]
|
233
|
+
elsif(target["max_repeat"] > target["min_repeat"])
|
234
|
+
repeat = target["min_repeat"]+TstRand(target["max_repeat"]-target["min_repeat"]+1)
|
235
|
+
else
|
236
|
+
repeat = target["min_repeat"]
|
237
|
+
end
|
238
|
+
result = []
|
239
|
+
if target["repeat_option"].index("reluctant")
|
240
|
+
result.push Regextest::Back::Element.new({cmd: :CMD_ANC_RELUCTANT_BEGIN, id: target["id"]})
|
241
|
+
end
|
242
|
+
# puts "repeat=#{repeat} quit=#{@quit_mode} nest=#{@nest}"
|
243
|
+
repeat.times do
|
244
|
+
if( elem = generate_candidates({json: target["value"]}))
|
245
|
+
result.push elem
|
246
|
+
else
|
247
|
+
result = nil
|
248
|
+
break
|
249
|
+
end
|
250
|
+
|
251
|
+
# quit to repeat if the first element is begin anchor
|
252
|
+
elem.flatten! if Array === elem # flatten considering duplicated repeat
|
253
|
+
if elem.size > 0 && elem[0].respond_to?(:command) && elem[-1].respond_to?(:command)
|
254
|
+
break if elem[0].command == :CMD_ANC_LINE_BEGIN && !elem[-1].new_line?
|
255
|
+
break if elem[0].command == :CMD_ANC_STRING_BEGIN
|
256
|
+
end
|
257
|
+
end
|
258
|
+
if target["repeat_option"].index("reluctant")
|
259
|
+
result.push Regextest::Back::Element.new({cmd: :CMD_ANC_RELUCTANT_END, id: target["id"]})
|
260
|
+
end
|
261
|
+
result
|
262
|
+
end
|
263
|
+
|
264
|
+
# range
|
265
|
+
def generate_candidates_range(param)
|
266
|
+
target = param[:json]
|
267
|
+
letter = []
|
268
|
+
codepoints = (target["begin"]..target["end"]).to_a
|
269
|
+
letter = codepoints.map{| codepoint | [codepoint].pack("U*")} # to be faster
|
270
|
+
result = Regextest::Back::Element.new({cmd: :CMD_SELECT, data: letter})
|
271
|
+
end
|
272
|
+
|
273
|
+
# back_refer
|
274
|
+
def generate_candidates_back_refer(param)
|
275
|
+
target = param[:json]
|
276
|
+
if @parens_hash[target["refer_name"]][:generated]
|
277
|
+
relative_num = (target["relative_num"]=="")?(-1):(@nest + target["relative_num"].to_i)
|
278
|
+
result = @parens_hash[target["refer_name"]][:generated][relative_num]
|
279
|
+
else
|
280
|
+
result = nil
|
281
|
+
end
|
282
|
+
result
|
283
|
+
end
|
284
|
+
|
285
|
+
# named generate
|
286
|
+
def generate_candidates_named_generate(param)
|
287
|
+
target = param[:json]
|
288
|
+
@quit_mode = true if(@nest >= @max_nest)
|
289
|
+
if(@quit_mode)
|
290
|
+
result = nil
|
291
|
+
else
|
292
|
+
@nest += 1
|
293
|
+
if target["refer_name"] == "$$_0" # recursively call whole expression
|
294
|
+
result = generate_candidates({json: @json_obj})
|
295
|
+
else
|
296
|
+
result = generate_candidates({json: @parens_hash[target["refer_name"]][:target]})
|
297
|
+
end
|
298
|
+
@nest -= 1
|
299
|
+
end
|
300
|
+
result
|
301
|
+
end
|
302
|
+
|
303
|
+
# char
|
304
|
+
def generate_candidates_char(param)
|
305
|
+
target = param[:json]
|
306
|
+
case target["value"]
|
307
|
+
when String
|
308
|
+
result = Regextest::Back::Element.new({cmd: :CMD_SELECT, data: [target["value"]]})
|
309
|
+
else
|
310
|
+
result = generate_candidates({json: target["value"]})
|
311
|
+
end
|
312
|
+
result
|
313
|
+
end
|
314
|
+
|
315
|
+
# narrow down candidates considering anchors
|
316
|
+
def narrow_down_candidates(candidate_array)
|
317
|
+
results = Regextest::Back::Result.new
|
318
|
+
candidate_array.each do | elem |
|
319
|
+
command = elem.command
|
320
|
+
case command
|
321
|
+
when :CMD_SELECT
|
322
|
+
results.push_body elem
|
323
|
+
when :CMD_LOOK_AHEAD, :CMD_NOT_LOOK_AHEAD
|
324
|
+
if(sub_results = narrow_down_candidates(elem.param[:result]))
|
325
|
+
results.add_look_ahead(command, sub_results)
|
326
|
+
else
|
327
|
+
return nil
|
328
|
+
end
|
329
|
+
when :CMD_LOOK_BEHIND, :CMD_NOT_LOOK_BEHIND
|
330
|
+
if(sub_results = narrow_down_candidates(elem.param[:result]))
|
331
|
+
results.add_look_behind(command, sub_results)
|
332
|
+
else
|
333
|
+
return nil
|
334
|
+
end
|
335
|
+
when :CMD_ANC_LINE_BEGIN, :CMD_ANC_LINE_END, :CMD_ANC_WORD_BOUND, :CMD_ANC_WORD_UNBOUND,
|
336
|
+
:CMD_ANC_STRING_BEGIN, :CMD_ANC_STRING_END, :CMD_ANC_STRING_END2, :CMD_ANC_MATCH_START,
|
337
|
+
:CMD_ANC_LOOK_BEHIND2
|
338
|
+
results.add_anchor(command)
|
339
|
+
when :CMD_ANC_RELUCTANT_BEGIN, :CMD_ANC_RELUCTANT_END
|
340
|
+
results.add_reluctant_repeat(elem)
|
341
|
+
else
|
342
|
+
raise "inner error, invalid command at checking anchors: #{command}"
|
343
|
+
end
|
344
|
+
end
|
345
|
+
if !results.merge
|
346
|
+
return nil
|
347
|
+
end
|
348
|
+
results
|
349
|
+
end
|
350
|
+
end
|
351
|
+
|
352
|
+
# Test suite (execute when this file is specified in command line)
|
353
|
+
if __FILE__ == $0
|
354
|
+
|
355
|
+
end
|
356
|
+
|