regextest 0.1.5 → 0.1.6
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/pre/unicode.rb +5 -0
- data/lib/regextest.rb +7 -3
- data/lib/regextest/back/element.rb +36 -13
- data/lib/regextest/back/main.rb +116 -65
- data/lib/regextest/back/result.rb +19 -8
- data/lib/regextest/common.rb +4 -1
- data/lib/regextest/front.rb +5 -0
- data/lib/regextest/front/back-refer.rb +2 -2
- data/lib/regextest/front/bracket-parser.rb +121 -103
- data/lib/regextest/front/bracket-parser.y +4 -1
- data/lib/regextest/front/bracket-scanner.rb +4 -3
- data/lib/regextest/front/char-class.rb +13 -5
- data/lib/regextest/front/letter.rb +39 -6
- data/lib/regextest/front/parenthesis.rb +2 -2
- data/lib/regextest/front/parser.rb +564 -545
- data/lib/regextest/front/parser.y +8 -3
- data/lib/regextest/front/range.rb +19 -1
- data/lib/regextest/front/scanner.rb +16 -13
- data/lib/regextest/front/special-letter.rb +63 -0
- data/lib/regextest/regex-option.rb +27 -0
- data/lib/regextest/unicode.rb +5 -0
- data/lib/regextest/version.rb +1 -1
- data/lib/tst-reg-test.rb +66 -21
- metadata +3 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: dee3ca4abbe70edabbe42f690c1a3c586fe689e0
|
4
|
+
data.tar.gz: 7bfe0081c0432cb7e9f9da3e4918042a36770bc1
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 4b2bf4c54259660e4f4509ee944676388530e6203266820b22a4cc32bb82d1adb849e8c535b8d4044057517fa8169ad9c39ec2ff23391ff43672042a492515f4
|
7
|
+
data.tar.gz: 8f9faee644cd13ade02be3cdcc1ab51eb8e4f14985402eb6ea732bfb988ac2eb363c82fa2da595bd39c2f3e2bbc514ead114a33603275000fd717167eae77fc3
|
data/lib/pre/unicode.rb
CHANGED
@@ -89,6 +89,11 @@ class RegextestPreUnicode
|
|
89
89
|
# Generate hash of properties
|
90
90
|
def self.property(class_name)
|
91
91
|
case class_name.downcase
|
92
|
+
# Regextest defined char classes (from underscore)
|
93
|
+
when "_asciiprint"
|
94
|
+
([[32, 126]])
|
95
|
+
|
96
|
+
# Unicode.org defined char classes
|
92
97
|
#{ranges_source}
|
93
98
|
else
|
94
99
|
warn "Class name (#\{class_name\}) not found. Ignored."
|
data/lib/regextest.rb
CHANGED
@@ -72,8 +72,11 @@ class Regextest
|
|
72
72
|
# @raise [RuntimeError] if something wrong...
|
73
73
|
# @raise [Regextest::RegextestTimeout] if detected timeout while verification. Option 'verification: false' may be workaround.
|
74
74
|
def generate
|
75
|
-
|
76
|
-
|
75
|
+
start_time = Time.now
|
76
|
+
0.step(TstFixnumMax) do | retry_count |
|
77
|
+
duration = Time.now - start_time
|
78
|
+
break if retry_count >= TstConstRetryMax && duration >= TstConstRetryMaxSecond
|
79
|
+
|
77
80
|
# generate string
|
78
81
|
reset_random_called
|
79
82
|
@result = @back_end.generate(retry_count)
|
@@ -114,6 +117,7 @@ class Regextest
|
|
114
117
|
def to_json
|
115
118
|
@front_end.get_json_string
|
116
119
|
end
|
120
|
+
|
117
121
|
#---------------#
|
118
122
|
private
|
119
123
|
|
@@ -143,7 +147,7 @@ class Regextest
|
|
143
147
|
when Regexp
|
144
148
|
@reg_exp = param
|
145
149
|
@@parse_options[:reg_options].set(@reg_exp.options) # inner regex options have priorty
|
146
|
-
@reg_string =
|
150
|
+
@reg_string = @@parse_options[:reg_options].prefix_reg + @reg_exp.source
|
147
151
|
else
|
148
152
|
raise "Error: string or regular expression required"
|
149
153
|
end
|
@@ -8,10 +8,14 @@ require 'regextest/common'
|
|
8
8
|
class Regextest::Back::Element
|
9
9
|
include Regextest::Common
|
10
10
|
def initialize(param)
|
11
|
-
# puts "Element param:#{param[:cmd]} data:#{param[:
|
12
|
-
@command = param[:cmd]
|
11
|
+
# puts "Element param:#{param[:cmd]} data:#{param[:ranges].size}"
|
13
12
|
@param = param
|
14
|
-
@
|
13
|
+
@command = param[:cmd]
|
14
|
+
@charset = param[:charset]
|
15
|
+
if @command == :CMD_SELECT
|
16
|
+
@candidates = param[:ranges].inject([]){|result, range| result += range.to_a}
|
17
|
+
end
|
18
|
+
# @candidates = param[:data] if @command == :CMD_SELECT
|
15
19
|
end
|
16
20
|
|
17
21
|
attr_reader :param, :command, :candidates
|
@@ -33,7 +37,8 @@ class Regextest::Back::Element
|
|
33
37
|
if(@candidates)
|
34
38
|
@candidates.size
|
35
39
|
else
|
36
|
-
raise "internal error: candidates not found at size-method"
|
40
|
+
# raise "internal error: candidates not found at size-method"
|
41
|
+
0
|
37
42
|
end
|
38
43
|
end
|
39
44
|
|
@@ -82,13 +87,14 @@ class Regextest::Back::Element
|
|
82
87
|
if(@candidates)
|
83
88
|
@candidates.inspect
|
84
89
|
else
|
85
|
-
@param[:
|
90
|
+
@param[:ranges].inspect
|
86
91
|
end
|
87
92
|
when :CMD_LOOK_BEHIND, :CMD_LOOK_AHEAD, :CMD_NOT_LOOK_BEHIND, :CMD_NOT_LOOK_AHEAD
|
88
93
|
@param.inspect
|
89
94
|
when :CMD_ANC_LINE_BEGIN, :CMD_ANC_LINE_END, :CMD_ANC_WORD_BOUND, :CMD_ANC_WORD_UNBOUND,
|
90
95
|
:CMD_ANC_STRING_BEGIN, :CMD_ANC_STRING_END, :CMD_ANC_STRING_END2, :CMD_ANC_MATCH_START,
|
91
|
-
:CMD_ANC_LOOK_BEHIND2, :CMD_ANC_RELUCTANT_BEGIN, :CMD_ANC_RELUCTANT_END
|
96
|
+
:CMD_ANC_LOOK_BEHIND2, :CMD_ANC_RELUCTANT_BEGIN, :CMD_ANC_RELUCTANT_END,
|
97
|
+
:CMD_ANC_POSSESSIVE_BEGIN, :CMD_ANC_POSSESSIVE_END
|
92
98
|
@param.inspect
|
93
99
|
else
|
94
100
|
raise "inner error, invalid command #{@command}"
|
@@ -107,24 +113,41 @@ class Regextest::Back::Element
|
|
107
113
|
|
108
114
|
# Is word-elements only?
|
109
115
|
def word_elements?
|
110
|
-
letters = @candidates.map{|elem| [elem].pack("U*")}
|
111
|
-
|
116
|
+
letters = @candidates.map{|elem| [elem].pack("U*")}.join("")
|
117
|
+
if @charset == "u" || @charset == "d"
|
118
|
+
letters.match(/^\p{Word}+$/)
|
119
|
+
else
|
120
|
+
letters.match(/^\w+$/)
|
121
|
+
end
|
112
122
|
end
|
113
123
|
|
114
124
|
# is non-word-elements only?
|
115
125
|
def non_word_elements?
|
116
|
-
letters = @candidates.map{|elem| [elem].pack("U*")}
|
117
|
-
|
126
|
+
letters = @candidates.map{|elem| [elem].pack("U*")}.join("")
|
127
|
+
if @charset == "u" || @charset == "d"
|
128
|
+
letters.match(/^\p{^Word}+$/)
|
129
|
+
else
|
130
|
+
letters.match(/^\W+$/)
|
131
|
+
end
|
118
132
|
end
|
119
133
|
|
120
134
|
# set word-elements
|
121
135
|
def set_word_elements
|
122
|
-
@
|
136
|
+
if @charset == "u" || @charset == "d"
|
137
|
+
@candidates.select!{|elem| [elem].pack("U*").match(/^\p{Word}$/)}
|
138
|
+
else
|
139
|
+
@candidates.select!{|elem| [elem].pack("U*").match(/^\w$/)}
|
140
|
+
end
|
123
141
|
end
|
124
142
|
|
125
143
|
# set non_word-elements
|
126
144
|
def set_non_word_elements
|
127
|
-
@
|
145
|
+
if @charset == "u" || @charset == "d"
|
146
|
+
@candidates.select!{|elem| [elem].pack("U*").match(/^\p{^Word}$/)}
|
147
|
+
#@candidates.select!{|elem| [elem].pack("U*").match(/^[[:^word:]]$/)}
|
148
|
+
else
|
149
|
+
@candidates.select!{|elem| [elem].pack("U*").match(/^[[:^word:]]$/)}
|
150
|
+
end
|
128
151
|
end
|
129
152
|
|
130
153
|
# checks empty
|
@@ -135,7 +158,7 @@ class Regextest::Back::Element
|
|
135
158
|
# factory method to generate any char element
|
136
159
|
def self.any_char
|
137
160
|
# BUG: must consider other character set!
|
138
|
-
Regextest::Back::Element.new({cmd: :CMD_SELECT,
|
161
|
+
Regextest::Back::Element.new({cmd: :CMD_SELECT, ranges: [0x20..0x7e]})
|
139
162
|
end
|
140
163
|
|
141
164
|
# factory method to generate any char element
|
data/lib/regextest/back/main.rb
CHANGED
@@ -12,6 +12,7 @@ class Regextest::Back::Main
|
|
12
12
|
def initialize(json_obj, max_nest, retry_count = 0)
|
13
13
|
@json_obj = json_obj
|
14
14
|
@max_nest = max_nest
|
15
|
+
@past_max_nest = 0 # max nest of the past
|
15
16
|
@retry_count = retry_count
|
16
17
|
@parens_hash = {} # hash to keep string generated by parentheses
|
17
18
|
@nest = 0 # current nest of back-reference
|
@@ -24,15 +25,15 @@ class Regextest::Back::Main
|
|
24
25
|
seek_parens(@json_obj)
|
25
26
|
|
26
27
|
# generate pre-result of matched string (pre-result contains candidates of letters)
|
27
|
-
|
28
|
+
param = {}
|
29
|
+
pre_result = generate_candidates(@json_obj, param)
|
28
30
|
return nil unless pre_result
|
29
|
-
TstLog("pre_result1:\n" + pre_result.inspect)
|
30
|
-
|
31
|
+
TstLog("pre_result1:\n" + pre_result.map{|elem| elem.inspect}.join("\n"))
|
32
|
+
|
31
33
|
# narrow down the candidates
|
32
34
|
result = narrow_down_candidates(pre_result)
|
33
35
|
TstLog("pre_result2:\n" + result.inspect)
|
34
36
|
return nil if !result || !result.narrow_down
|
35
|
-
|
36
37
|
# fixes result
|
37
38
|
result.fix
|
38
39
|
|
@@ -54,32 +55,31 @@ class Regextest::Back::Main
|
|
54
55
|
end
|
55
56
|
|
56
57
|
# generate pre-result of matched string (pre-result contains candidates of letters)
|
57
|
-
def generate_candidates(param)
|
58
|
-
target = param[:json]
|
58
|
+
def generate_candidates(target, param)
|
59
59
|
# puts "MATCH type:#{target["type"]}"
|
60
60
|
|
61
61
|
result = nil
|
62
62
|
case target["type"]
|
63
63
|
when "LEX_SEQ" # sequence of letters or parentheses
|
64
|
-
result = generate_candidates_seq(param)
|
64
|
+
result = generate_candidates_seq(target, param)
|
65
65
|
when "LEX_SELECT"
|
66
|
-
result = generate_candidates_select(param)
|
66
|
+
result = generate_candidates_select(target, param)
|
67
67
|
when "LEX_PAREN"
|
68
|
-
result = generate_candidates_paren(param)
|
68
|
+
result = generate_candidates_paren(target, param)
|
69
69
|
when "LEX_CHAR_CLASS"
|
70
|
-
result = generate_candidates_char_class(param)
|
71
|
-
when "LEX_BRACKET", "LEX_SIMPLIFIED_CLASS", "LEX_ANY_LETTER", "LEX_POSIX_CHAR_CLASS", "LEX_UNICODE_CLASS"
|
72
|
-
result = generate_candidates(
|
70
|
+
result = generate_candidates_char_class(target, param)
|
71
|
+
when "LEX_BRACKET", "LEX_SIMPLIFIED_CLASS", "LEX_ANY_LETTER", "LEX_POSIX_CHAR_CLASS", "LEX_UNICODE_CLASS", "LEX_UNICODE_CLASS_BRACKET"
|
72
|
+
result = generate_candidates(target["value"], param)
|
73
73
|
when "LEX_REPEAT"
|
74
|
-
result = generate_candidates_repeat(param)
|
74
|
+
result = generate_candidates_repeat(target, param)
|
75
75
|
when "LEX_RANGE"
|
76
|
-
result = generate_candidates_range(param)
|
76
|
+
result = generate_candidates_range(target, param)
|
77
77
|
when "LEX_BACK_REFER", "LEX_NAMED_REFER"
|
78
|
-
result = generate_candidates_back_refer(param)
|
78
|
+
result = generate_candidates_back_refer(target, param)
|
79
79
|
when "LEX_NAMED_GENERATE"
|
80
|
-
result = generate_candidates_named_generate(param)
|
80
|
+
result = generate_candidates_named_generate(target, param)
|
81
81
|
when "LEX_CHAR"
|
82
|
-
result = generate_candidates_char(param)
|
82
|
+
result = generate_candidates_char(target, param)
|
83
83
|
when "LEX_ANC_LINE_BEGIN"
|
84
84
|
result = Regextest::Back::Element.new({cmd: :CMD_ANC_LINE_BEGIN})
|
85
85
|
when "LEX_ANC_LINE_END"
|
@@ -109,11 +109,10 @@ class Regextest::Back::Main
|
|
109
109
|
end
|
110
110
|
|
111
111
|
# sequence of letters or parentheses
|
112
|
-
def generate_candidates_seq(param)
|
113
|
-
target = param[:json]
|
112
|
+
def generate_candidates_seq(target, param)
|
114
113
|
results = []
|
115
114
|
target["value"].each do |elem|
|
116
|
-
generated_string = generate_candidates(
|
115
|
+
generated_string = generate_candidates(elem, param)
|
117
116
|
if(Array === generated_string)
|
118
117
|
generated_string.flatten!(1)
|
119
118
|
results += generated_string
|
@@ -132,12 +131,13 @@ class Regextest::Back::Main
|
|
132
131
|
end
|
133
132
|
|
134
133
|
# selection of sequence. such as (aa|b|c)
|
135
|
-
def generate_candidates_select(param)
|
136
|
-
target = param[:json]
|
134
|
+
def generate_candidates_select(target, param)
|
137
135
|
if param[:forced_select]
|
138
|
-
# index is specified by condition
|
139
|
-
|
140
|
-
|
136
|
+
# index is specified by condition
|
137
|
+
offset = param[:forced_select]
|
138
|
+
param.delete :forced_select
|
139
|
+
if target["value"][offset]
|
140
|
+
result = generate_candidates(target["value"][offset], param)
|
141
141
|
else
|
142
142
|
# regexp such as /^(?:b|(a))(?(1)1)$/ match "b"!
|
143
143
|
result = []
|
@@ -145,43 +145,72 @@ class Regextest::Back::Main
|
|
145
145
|
else
|
146
146
|
# success if there is at least one result
|
147
147
|
offsets = (0 ... target["value"].size).to_a
|
148
|
-
|
149
|
-
|
150
|
-
|
148
|
+
|
149
|
+
# shuffle if element size more than 1
|
150
|
+
offsets = TstShuffle(offsets) if offsets.size > 1
|
151
|
+
|
151
152
|
result = nil
|
152
|
-
|
153
|
-
|
154
|
-
|
153
|
+
if param[:atomic]
|
154
|
+
param.delete :atomic
|
155
|
+
# if atomic, assure proceeding results not appeared
|
156
|
+
offsets.each do | offset |
|
157
|
+
result = []
|
158
|
+
(0...offset).each do | prev |
|
159
|
+
la_result = generate_candidates(target["value"][prev], param)
|
160
|
+
result.push Regextest::Back::Element.new({cmd: :CMD_NOT_LOOK_AHEAD, result: la_result})
|
161
|
+
end
|
162
|
+
result.push generate_candidates(target["value"][offset], param)
|
163
|
+
break if(!result.find{|elem| !elem })
|
164
|
+
end
|
165
|
+
elsif negative_type = param[:negative]
|
166
|
+
# if negative, assure all results not appeared
|
167
|
+
result = []
|
168
|
+
offsets.each do | offset |
|
169
|
+
la_result = generate_candidates(target["value"][offset], param)
|
170
|
+
la_result.each do | elem |
|
171
|
+
if elem.command == :CMD_NOT_LOOK_AHEAD
|
172
|
+
result.push elem
|
173
|
+
else
|
174
|
+
result.push Regextest::Back::Element.new({cmd: negative_type, result: la_result})
|
175
|
+
end
|
176
|
+
end
|
177
|
+
end
|
178
|
+
param.delete :negative
|
179
|
+
else
|
180
|
+
offsets.each do | offset |
|
181
|
+
result = generate_candidates(target["value"][offset], param)
|
182
|
+
break if(result)
|
183
|
+
end
|
155
184
|
end
|
156
185
|
end
|
157
186
|
result
|
158
187
|
end
|
159
188
|
|
160
189
|
# parenthesis
|
161
|
-
def generate_candidates_paren(param)
|
162
|
-
target = param[:json]
|
190
|
+
def generate_candidates_paren(target, param)
|
163
191
|
# analyze options of the parenthesis
|
164
192
|
paren_prefix = target["prefix"]
|
165
193
|
# pp target["prefix"]
|
166
194
|
if(paren_prefix == "<=")
|
167
|
-
lb_result = generate_candidates(
|
195
|
+
lb_result = generate_candidates(target["value"], param)
|
168
196
|
result = Regextest::Back::Element.new({cmd: :CMD_LOOK_BEHIND, result: lb_result})
|
169
197
|
elsif(paren_prefix == "=")
|
170
|
-
la_result = generate_candidates(
|
198
|
+
la_result = generate_candidates(target["value"], param)
|
171
199
|
result = Regextest::Back::Element.new({cmd: :CMD_LOOK_AHEAD, result: la_result})
|
172
200
|
elsif(paren_prefix == "<!")
|
173
|
-
|
174
|
-
result =
|
201
|
+
param[:negative] = :CMD_NOT_LOOK_BEHIND
|
202
|
+
result = generate_candidates(target["value"], param)
|
175
203
|
elsif(paren_prefix == "!")
|
176
|
-
|
177
|
-
result =
|
204
|
+
param[:negative] = :CMD_NOT_LOOK_AHEAD
|
205
|
+
result = generate_candidates(target["value"], param)
|
178
206
|
elsif(paren_prefix == ">") # atomic group
|
179
|
-
|
207
|
+
param[:atomic] = true
|
208
|
+
generate_string = generate_candidates(target["value"], param)
|
180
209
|
@parens_hash[target["refer_name"]][:generated] ||= []
|
181
210
|
@parens_hash[target["refer_name"]][:generated][@nest] = generate_string
|
182
211
|
result = generate_string
|
183
212
|
elsif(paren_prefix == "") # simple parenthesis
|
184
|
-
generate_string = generate_candidates(
|
213
|
+
generate_string = generate_candidates(target["value"], param)
|
185
214
|
@parens_hash[target["refer_name"]][:generated] ||= []
|
186
215
|
@parens_hash[target["refer_name"]][:generated][@nest] = generate_string
|
187
216
|
result = generate_string
|
@@ -199,7 +228,8 @@ class Regextest::Back::Main
|
|
199
228
|
if(select_num == 1 && target["value"]["type"] != "LEX_SELECT")
|
200
229
|
result = nil
|
201
230
|
else
|
202
|
-
|
231
|
+
param[:forced_select] = select_num
|
232
|
+
generate_string = generate_candidates(target["value"], param)
|
203
233
|
|
204
234
|
@parens_hash[target["refer_name"]][:generated] ||= []
|
205
235
|
@parens_hash[target["refer_name"]][:generated][@nest] = generate_string
|
@@ -210,11 +240,11 @@ class Regextest::Back::Main
|
|
210
240
|
end
|
211
241
|
|
212
242
|
# char class
|
213
|
-
def generate_candidates_char_class(param)
|
214
|
-
|
215
|
-
results = Regextest::Back::Element.new({cmd: :CMD_SELECT,
|
243
|
+
def generate_candidates_char_class(target, param)
|
244
|
+
charset = target["charset"]
|
245
|
+
results = Regextest::Back::Element.new({cmd: :CMD_SELECT, ranges: [], charset: charset})
|
216
246
|
target["value"].each do | elem |
|
217
|
-
if sub_results = generate_candidates(
|
247
|
+
if sub_results = generate_candidates(elem, param)
|
218
248
|
results.union sub_results
|
219
249
|
end
|
220
250
|
end
|
@@ -227,12 +257,12 @@ class Regextest::Back::Main
|
|
227
257
|
end
|
228
258
|
|
229
259
|
# repeat
|
230
|
-
def generate_candidates_repeat(param)
|
231
|
-
target = param[:json]
|
260
|
+
def generate_candidates_repeat(target, param)
|
232
261
|
max_repeat = target["max_repeat"]
|
233
262
|
min_repeat = target["min_repeat"]
|
234
263
|
|
235
|
-
if
|
264
|
+
# reduce repeat count if retry and there are one or more \g<foo> calls
|
265
|
+
if @retry_count > 0 && @past_max_nest > 0
|
236
266
|
@retry_count.times{ max_repeat = (max_repeat + 1)/2 }
|
237
267
|
end
|
238
268
|
|
@@ -243,13 +273,15 @@ class Regextest::Back::Main
|
|
243
273
|
else
|
244
274
|
repeat = min_repeat
|
245
275
|
end
|
276
|
+
|
246
277
|
result = []
|
247
278
|
if target["repeat_option"].index("reluctant")
|
248
279
|
result.push Regextest::Back::Element.new({cmd: :CMD_ANC_RELUCTANT_BEGIN, id: target["id"]})
|
249
280
|
end
|
281
|
+
|
250
282
|
# puts "repeat=#{repeat} quit=#{@quit_mode} nest=#{@nest}"
|
251
283
|
repeat.times do
|
252
|
-
if( elem = generate_candidates(
|
284
|
+
if( elem = generate_candidates(target["value"], param))
|
253
285
|
result.push elem
|
254
286
|
else
|
255
287
|
result = nil
|
@@ -263,23 +295,28 @@ class Regextest::Back::Main
|
|
263
295
|
break if elem[0].command == :CMD_ANC_STRING_BEGIN
|
264
296
|
end
|
265
297
|
end
|
298
|
+
|
266
299
|
if target["repeat_option"].index("reluctant")
|
267
300
|
result.push Regextest::Back::Element.new({cmd: :CMD_ANC_RELUCTANT_END, id: target["id"]})
|
268
301
|
end
|
302
|
+
|
303
|
+
if target["repeat_option"].index("possessive")
|
304
|
+
la_result = [ generate_candidates(target["value"], param) ].flatten
|
305
|
+
result.push Regextest::Back::Element.new({cmd: :CMD_NOT_LOOK_AHEAD, result: la_result})
|
306
|
+
end
|
269
307
|
result
|
270
308
|
end
|
271
309
|
|
272
310
|
# range
|
273
|
-
def generate_candidates_range(param)
|
274
|
-
|
311
|
+
def generate_candidates_range(target, param)
|
312
|
+
charset = target["charset"]
|
275
313
|
letter = []
|
276
|
-
codepoints = (target["begin"]..target["end"])
|
277
|
-
result = Regextest::Back::Element.new({cmd: :CMD_SELECT,
|
314
|
+
codepoints = (target["begin"]..target["end"])
|
315
|
+
result = Regextest::Back::Element.new({cmd: :CMD_SELECT, ranges: [codepoints], charset: charset})
|
278
316
|
end
|
279
317
|
|
280
318
|
# back_refer
|
281
|
-
def generate_candidates_back_refer(param)
|
282
|
-
target = param[:json]
|
319
|
+
def generate_candidates_back_refer(target, param)
|
283
320
|
if @parens_hash[target["refer_name"]][:generated]
|
284
321
|
relative_num = -1 # default value
|
285
322
|
if target["relative_num"] != ""
|
@@ -289,6 +326,13 @@ class Regextest::Back::Main
|
|
289
326
|
end
|
290
327
|
# puts "relative: #{relative_num}, nest=#{@nest}, :#{target}"
|
291
328
|
result = @parens_hash[target["refer_name"]][:generated][relative_num]
|
329
|
+
|
330
|
+
# Somehow /(^a)\1/ must match with "aa"
|
331
|
+
if result.size > 0 &&
|
332
|
+
(result[0].command == :CMD_ANC_LINE_BEGIN ||
|
333
|
+
result[0].command == :CMD_ANC_STRING_BEGIN)
|
334
|
+
result = result[1..-1] # ignore first anchor
|
335
|
+
end
|
292
336
|
else
|
293
337
|
result = nil
|
294
338
|
end
|
@@ -296,17 +340,17 @@ class Regextest::Back::Main
|
|
296
340
|
end
|
297
341
|
|
298
342
|
# named generate
|
299
|
-
def generate_candidates_named_generate(param)
|
300
|
-
target = param[:json]
|
343
|
+
def generate_candidates_named_generate(target, param)
|
301
344
|
@quit_mode = true if(@nest >= @max_nest)
|
302
345
|
if(@quit_mode)
|
303
346
|
result = nil
|
304
347
|
else
|
305
348
|
@nest += 1
|
349
|
+
@past_max_nest = @nest if @nest > @past_max_nest
|
306
350
|
if target["refer_name"] == "$$_0" # recursively call whole expression
|
307
|
-
result = generate_candidates(
|
351
|
+
result = generate_candidates(@json_obj, param)
|
308
352
|
else
|
309
|
-
result = generate_candidates(
|
353
|
+
result = generate_candidates(@parens_hash[target["refer_name"]][:target], param)
|
310
354
|
end
|
311
355
|
@nest -= 1
|
312
356
|
end
|
@@ -314,14 +358,20 @@ class Regextest::Back::Main
|
|
314
358
|
end
|
315
359
|
|
316
360
|
# char
|
317
|
-
def generate_candidates_char(param)
|
318
|
-
|
361
|
+
def generate_candidates_char(target, param)
|
362
|
+
charset = target["charset"]
|
319
363
|
case target["value"]
|
320
364
|
when String
|
321
365
|
codepoint = target["value"].unpack("U*")[0]
|
322
|
-
result = Regextest::Back::Element.new(
|
366
|
+
result = Regextest::Back::Element.new(
|
367
|
+
{
|
368
|
+
cmd: :CMD_SELECT,
|
369
|
+
ranges: [codepoint..codepoint],
|
370
|
+
charset: charset
|
371
|
+
}
|
372
|
+
)
|
323
373
|
else
|
324
|
-
result = generate_candidates(
|
374
|
+
result = generate_candidates(target["value"], param)
|
325
375
|
end
|
326
376
|
result
|
327
377
|
end
|
@@ -351,7 +401,8 @@ class Regextest::Back::Main
|
|
351
401
|
:CMD_ANC_STRING_BEGIN, :CMD_ANC_STRING_END, :CMD_ANC_STRING_END2, :CMD_ANC_MATCH_START,
|
352
402
|
:CMD_ANC_LOOK_BEHIND2
|
353
403
|
results.add_anchor(command)
|
354
|
-
when :CMD_ANC_RELUCTANT_BEGIN, :CMD_ANC_RELUCTANT_END
|
404
|
+
when :CMD_ANC_RELUCTANT_BEGIN, :CMD_ANC_RELUCTANT_END,
|
405
|
+
:CMD_ANC_POSSESSIVE_BEGIN, :CMD_ANC_POSSESSIVE_END
|
355
406
|
results.add_reluctant_repeat(elem)
|
356
407
|
else
|
357
408
|
raise "inner error, invalid command at checking anchors: #{command}"
|