regextest 0.1.5 → 0.1.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/pre/unicode.rb +5 -0
- data/lib/regextest.rb +7 -3
- data/lib/regextest/back/element.rb +36 -13
- data/lib/regextest/back/main.rb +116 -65
- data/lib/regextest/back/result.rb +19 -8
- data/lib/regextest/common.rb +4 -1
- data/lib/regextest/front.rb +5 -0
- data/lib/regextest/front/back-refer.rb +2 -2
- data/lib/regextest/front/bracket-parser.rb +121 -103
- data/lib/regextest/front/bracket-parser.y +4 -1
- data/lib/regextest/front/bracket-scanner.rb +4 -3
- data/lib/regextest/front/char-class.rb +13 -5
- data/lib/regextest/front/letter.rb +39 -6
- data/lib/regextest/front/parenthesis.rb +2 -2
- data/lib/regextest/front/parser.rb +564 -545
- data/lib/regextest/front/parser.y +8 -3
- data/lib/regextest/front/range.rb +19 -1
- data/lib/regextest/front/scanner.rb +16 -13
- data/lib/regextest/front/special-letter.rb +63 -0
- data/lib/regextest/regex-option.rb +27 -0
- data/lib/regextest/unicode.rb +5 -0
- data/lib/regextest/version.rb +1 -1
- data/lib/tst-reg-test.rb +66 -21
- metadata +3 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: dee3ca4abbe70edabbe42f690c1a3c586fe689e0
|
4
|
+
data.tar.gz: 7bfe0081c0432cb7e9f9da3e4918042a36770bc1
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 4b2bf4c54259660e4f4509ee944676388530e6203266820b22a4cc32bb82d1adb849e8c535b8d4044057517fa8169ad9c39ec2ff23391ff43672042a492515f4
|
7
|
+
data.tar.gz: 8f9faee644cd13ade02be3cdcc1ab51eb8e4f14985402eb6ea732bfb988ac2eb363c82fa2da595bd39c2f3e2bbc514ead114a33603275000fd717167eae77fc3
|
data/lib/pre/unicode.rb
CHANGED
@@ -89,6 +89,11 @@ class RegextestPreUnicode
|
|
89
89
|
# Generate hash of properties
|
90
90
|
def self.property(class_name)
|
91
91
|
case class_name.downcase
|
92
|
+
# Regextest defined char classes (from underscore)
|
93
|
+
when "_asciiprint"
|
94
|
+
([[32, 126]])
|
95
|
+
|
96
|
+
# Unicode.org defined char classes
|
92
97
|
#{ranges_source}
|
93
98
|
else
|
94
99
|
warn "Class name (#\{class_name\}) not found. Ignored."
|
data/lib/regextest.rb
CHANGED
@@ -72,8 +72,11 @@ class Regextest
|
|
72
72
|
# @raise [RuntimeError] if something wrong...
|
73
73
|
# @raise [Regextest::RegextestTimeout] if detected timeout while verification. Option 'verification: false' may be workaround.
|
74
74
|
def generate
|
75
|
-
|
76
|
-
|
75
|
+
start_time = Time.now
|
76
|
+
0.step(TstFixnumMax) do | retry_count |
|
77
|
+
duration = Time.now - start_time
|
78
|
+
break if retry_count >= TstConstRetryMax && duration >= TstConstRetryMaxSecond
|
79
|
+
|
77
80
|
# generate string
|
78
81
|
reset_random_called
|
79
82
|
@result = @back_end.generate(retry_count)
|
@@ -114,6 +117,7 @@ class Regextest
|
|
114
117
|
def to_json
|
115
118
|
@front_end.get_json_string
|
116
119
|
end
|
120
|
+
|
117
121
|
#---------------#
|
118
122
|
private
|
119
123
|
|
@@ -143,7 +147,7 @@ class Regextest
|
|
143
147
|
when Regexp
|
144
148
|
@reg_exp = param
|
145
149
|
@@parse_options[:reg_options].set(@reg_exp.options) # inner regex options have priorty
|
146
|
-
@reg_string =
|
150
|
+
@reg_string = @@parse_options[:reg_options].prefix_reg + @reg_exp.source
|
147
151
|
else
|
148
152
|
raise "Error: string or regular expression required"
|
149
153
|
end
|
@@ -8,10 +8,14 @@ require 'regextest/common'
|
|
8
8
|
class Regextest::Back::Element
|
9
9
|
include Regextest::Common
|
10
10
|
def initialize(param)
|
11
|
-
# puts "Element param:#{param[:cmd]} data:#{param[:
|
12
|
-
@command = param[:cmd]
|
11
|
+
# puts "Element param:#{param[:cmd]} data:#{param[:ranges].size}"
|
13
12
|
@param = param
|
14
|
-
@
|
13
|
+
@command = param[:cmd]
|
14
|
+
@charset = param[:charset]
|
15
|
+
if @command == :CMD_SELECT
|
16
|
+
@candidates = param[:ranges].inject([]){|result, range| result += range.to_a}
|
17
|
+
end
|
18
|
+
# @candidates = param[:data] if @command == :CMD_SELECT
|
15
19
|
end
|
16
20
|
|
17
21
|
attr_reader :param, :command, :candidates
|
@@ -33,7 +37,8 @@ class Regextest::Back::Element
|
|
33
37
|
if(@candidates)
|
34
38
|
@candidates.size
|
35
39
|
else
|
36
|
-
raise "internal error: candidates not found at size-method"
|
40
|
+
# raise "internal error: candidates not found at size-method"
|
41
|
+
0
|
37
42
|
end
|
38
43
|
end
|
39
44
|
|
@@ -82,13 +87,14 @@ class Regextest::Back::Element
|
|
82
87
|
if(@candidates)
|
83
88
|
@candidates.inspect
|
84
89
|
else
|
85
|
-
@param[:
|
90
|
+
@param[:ranges].inspect
|
86
91
|
end
|
87
92
|
when :CMD_LOOK_BEHIND, :CMD_LOOK_AHEAD, :CMD_NOT_LOOK_BEHIND, :CMD_NOT_LOOK_AHEAD
|
88
93
|
@param.inspect
|
89
94
|
when :CMD_ANC_LINE_BEGIN, :CMD_ANC_LINE_END, :CMD_ANC_WORD_BOUND, :CMD_ANC_WORD_UNBOUND,
|
90
95
|
:CMD_ANC_STRING_BEGIN, :CMD_ANC_STRING_END, :CMD_ANC_STRING_END2, :CMD_ANC_MATCH_START,
|
91
|
-
:CMD_ANC_LOOK_BEHIND2, :CMD_ANC_RELUCTANT_BEGIN, :CMD_ANC_RELUCTANT_END
|
96
|
+
:CMD_ANC_LOOK_BEHIND2, :CMD_ANC_RELUCTANT_BEGIN, :CMD_ANC_RELUCTANT_END,
|
97
|
+
:CMD_ANC_POSSESSIVE_BEGIN, :CMD_ANC_POSSESSIVE_END
|
92
98
|
@param.inspect
|
93
99
|
else
|
94
100
|
raise "inner error, invalid command #{@command}"
|
@@ -107,24 +113,41 @@ class Regextest::Back::Element
|
|
107
113
|
|
108
114
|
# Is word-elements only?
|
109
115
|
def word_elements?
|
110
|
-
letters = @candidates.map{|elem| [elem].pack("U*")}
|
111
|
-
|
116
|
+
letters = @candidates.map{|elem| [elem].pack("U*")}.join("")
|
117
|
+
if @charset == "u" || @charset == "d"
|
118
|
+
letters.match(/^\p{Word}+$/)
|
119
|
+
else
|
120
|
+
letters.match(/^\w+$/)
|
121
|
+
end
|
112
122
|
end
|
113
123
|
|
114
124
|
# is non-word-elements only?
|
115
125
|
def non_word_elements?
|
116
|
-
letters = @candidates.map{|elem| [elem].pack("U*")}
|
117
|
-
|
126
|
+
letters = @candidates.map{|elem| [elem].pack("U*")}.join("")
|
127
|
+
if @charset == "u" || @charset == "d"
|
128
|
+
letters.match(/^\p{^Word}+$/)
|
129
|
+
else
|
130
|
+
letters.match(/^\W+$/)
|
131
|
+
end
|
118
132
|
end
|
119
133
|
|
120
134
|
# set word-elements
|
121
135
|
def set_word_elements
|
122
|
-
@
|
136
|
+
if @charset == "u" || @charset == "d"
|
137
|
+
@candidates.select!{|elem| [elem].pack("U*").match(/^\p{Word}$/)}
|
138
|
+
else
|
139
|
+
@candidates.select!{|elem| [elem].pack("U*").match(/^\w$/)}
|
140
|
+
end
|
123
141
|
end
|
124
142
|
|
125
143
|
# set non_word-elements
|
126
144
|
def set_non_word_elements
|
127
|
-
@
|
145
|
+
if @charset == "u" || @charset == "d"
|
146
|
+
@candidates.select!{|elem| [elem].pack("U*").match(/^\p{^Word}$/)}
|
147
|
+
#@candidates.select!{|elem| [elem].pack("U*").match(/^[[:^word:]]$/)}
|
148
|
+
else
|
149
|
+
@candidates.select!{|elem| [elem].pack("U*").match(/^[[:^word:]]$/)}
|
150
|
+
end
|
128
151
|
end
|
129
152
|
|
130
153
|
# checks empty
|
@@ -135,7 +158,7 @@ class Regextest::Back::Element
|
|
135
158
|
# factory method to generate any char element
|
136
159
|
def self.any_char
|
137
160
|
# BUG: must consider other character set!
|
138
|
-
Regextest::Back::Element.new({cmd: :CMD_SELECT,
|
161
|
+
Regextest::Back::Element.new({cmd: :CMD_SELECT, ranges: [0x20..0x7e]})
|
139
162
|
end
|
140
163
|
|
141
164
|
# factory method to generate any char element
|
data/lib/regextest/back/main.rb
CHANGED
@@ -12,6 +12,7 @@ class Regextest::Back::Main
|
|
12
12
|
def initialize(json_obj, max_nest, retry_count = 0)
|
13
13
|
@json_obj = json_obj
|
14
14
|
@max_nest = max_nest
|
15
|
+
@past_max_nest = 0 # max nest of the past
|
15
16
|
@retry_count = retry_count
|
16
17
|
@parens_hash = {} # hash to keep string generated by parentheses
|
17
18
|
@nest = 0 # current nest of back-reference
|
@@ -24,15 +25,15 @@ class Regextest::Back::Main
|
|
24
25
|
seek_parens(@json_obj)
|
25
26
|
|
26
27
|
# generate pre-result of matched string (pre-result contains candidates of letters)
|
27
|
-
|
28
|
+
param = {}
|
29
|
+
pre_result = generate_candidates(@json_obj, param)
|
28
30
|
return nil unless pre_result
|
29
|
-
TstLog("pre_result1:\n" + pre_result.inspect)
|
30
|
-
|
31
|
+
TstLog("pre_result1:\n" + pre_result.map{|elem| elem.inspect}.join("\n"))
|
32
|
+
|
31
33
|
# narrow down the candidates
|
32
34
|
result = narrow_down_candidates(pre_result)
|
33
35
|
TstLog("pre_result2:\n" + result.inspect)
|
34
36
|
return nil if !result || !result.narrow_down
|
35
|
-
|
36
37
|
# fixes result
|
37
38
|
result.fix
|
38
39
|
|
@@ -54,32 +55,31 @@ class Regextest::Back::Main
|
|
54
55
|
end
|
55
56
|
|
56
57
|
# generate pre-result of matched string (pre-result contains candidates of letters)
|
57
|
-
def generate_candidates(param)
|
58
|
-
target = param[:json]
|
58
|
+
def generate_candidates(target, param)
|
59
59
|
# puts "MATCH type:#{target["type"]}"
|
60
60
|
|
61
61
|
result = nil
|
62
62
|
case target["type"]
|
63
63
|
when "LEX_SEQ" # sequence of letters or parentheses
|
64
|
-
result = generate_candidates_seq(param)
|
64
|
+
result = generate_candidates_seq(target, param)
|
65
65
|
when "LEX_SELECT"
|
66
|
-
result = generate_candidates_select(param)
|
66
|
+
result = generate_candidates_select(target, param)
|
67
67
|
when "LEX_PAREN"
|
68
|
-
result = generate_candidates_paren(param)
|
68
|
+
result = generate_candidates_paren(target, param)
|
69
69
|
when "LEX_CHAR_CLASS"
|
70
|
-
result = generate_candidates_char_class(param)
|
71
|
-
when "LEX_BRACKET", "LEX_SIMPLIFIED_CLASS", "LEX_ANY_LETTER", "LEX_POSIX_CHAR_CLASS", "LEX_UNICODE_CLASS"
|
72
|
-
result = generate_candidates(
|
70
|
+
result = generate_candidates_char_class(target, param)
|
71
|
+
when "LEX_BRACKET", "LEX_SIMPLIFIED_CLASS", "LEX_ANY_LETTER", "LEX_POSIX_CHAR_CLASS", "LEX_UNICODE_CLASS", "LEX_UNICODE_CLASS_BRACKET"
|
72
|
+
result = generate_candidates(target["value"], param)
|
73
73
|
when "LEX_REPEAT"
|
74
|
-
result = generate_candidates_repeat(param)
|
74
|
+
result = generate_candidates_repeat(target, param)
|
75
75
|
when "LEX_RANGE"
|
76
|
-
result = generate_candidates_range(param)
|
76
|
+
result = generate_candidates_range(target, param)
|
77
77
|
when "LEX_BACK_REFER", "LEX_NAMED_REFER"
|
78
|
-
result = generate_candidates_back_refer(param)
|
78
|
+
result = generate_candidates_back_refer(target, param)
|
79
79
|
when "LEX_NAMED_GENERATE"
|
80
|
-
result = generate_candidates_named_generate(param)
|
80
|
+
result = generate_candidates_named_generate(target, param)
|
81
81
|
when "LEX_CHAR"
|
82
|
-
result = generate_candidates_char(param)
|
82
|
+
result = generate_candidates_char(target, param)
|
83
83
|
when "LEX_ANC_LINE_BEGIN"
|
84
84
|
result = Regextest::Back::Element.new({cmd: :CMD_ANC_LINE_BEGIN})
|
85
85
|
when "LEX_ANC_LINE_END"
|
@@ -109,11 +109,10 @@ class Regextest::Back::Main
|
|
109
109
|
end
|
110
110
|
|
111
111
|
# sequence of letters or parentheses
|
112
|
-
def generate_candidates_seq(param)
|
113
|
-
target = param[:json]
|
112
|
+
def generate_candidates_seq(target, param)
|
114
113
|
results = []
|
115
114
|
target["value"].each do |elem|
|
116
|
-
generated_string = generate_candidates(
|
115
|
+
generated_string = generate_candidates(elem, param)
|
117
116
|
if(Array === generated_string)
|
118
117
|
generated_string.flatten!(1)
|
119
118
|
results += generated_string
|
@@ -132,12 +131,13 @@ class Regextest::Back::Main
|
|
132
131
|
end
|
133
132
|
|
134
133
|
# selection of sequence. such as (aa|b|c)
|
135
|
-
def generate_candidates_select(param)
|
136
|
-
target = param[:json]
|
134
|
+
def generate_candidates_select(target, param)
|
137
135
|
if param[:forced_select]
|
138
|
-
# index is specified by condition
|
139
|
-
|
140
|
-
|
136
|
+
# index is specified by condition
|
137
|
+
offset = param[:forced_select]
|
138
|
+
param.delete :forced_select
|
139
|
+
if target["value"][offset]
|
140
|
+
result = generate_candidates(target["value"][offset], param)
|
141
141
|
else
|
142
142
|
# regexp such as /^(?:b|(a))(?(1)1)$/ match "b"!
|
143
143
|
result = []
|
@@ -145,43 +145,72 @@ class Regextest::Back::Main
|
|
145
145
|
else
|
146
146
|
# success if there is at least one result
|
147
147
|
offsets = (0 ... target["value"].size).to_a
|
148
|
-
|
149
|
-
|
150
|
-
|
148
|
+
|
149
|
+
# shuffle if element size more than 1
|
150
|
+
offsets = TstShuffle(offsets) if offsets.size > 1
|
151
|
+
|
151
152
|
result = nil
|
152
|
-
|
153
|
-
|
154
|
-
|
153
|
+
if param[:atomic]
|
154
|
+
param.delete :atomic
|
155
|
+
# if atomic, assure proceeding results not appeared
|
156
|
+
offsets.each do | offset |
|
157
|
+
result = []
|
158
|
+
(0...offset).each do | prev |
|
159
|
+
la_result = generate_candidates(target["value"][prev], param)
|
160
|
+
result.push Regextest::Back::Element.new({cmd: :CMD_NOT_LOOK_AHEAD, result: la_result})
|
161
|
+
end
|
162
|
+
result.push generate_candidates(target["value"][offset], param)
|
163
|
+
break if(!result.find{|elem| !elem })
|
164
|
+
end
|
165
|
+
elsif negative_type = param[:negative]
|
166
|
+
# if negative, assure all results not appeared
|
167
|
+
result = []
|
168
|
+
offsets.each do | offset |
|
169
|
+
la_result = generate_candidates(target["value"][offset], param)
|
170
|
+
la_result.each do | elem |
|
171
|
+
if elem.command == :CMD_NOT_LOOK_AHEAD
|
172
|
+
result.push elem
|
173
|
+
else
|
174
|
+
result.push Regextest::Back::Element.new({cmd: negative_type, result: la_result})
|
175
|
+
end
|
176
|
+
end
|
177
|
+
end
|
178
|
+
param.delete :negative
|
179
|
+
else
|
180
|
+
offsets.each do | offset |
|
181
|
+
result = generate_candidates(target["value"][offset], param)
|
182
|
+
break if(result)
|
183
|
+
end
|
155
184
|
end
|
156
185
|
end
|
157
186
|
result
|
158
187
|
end
|
159
188
|
|
160
189
|
# parenthesis
|
161
|
-
def generate_candidates_paren(param)
|
162
|
-
target = param[:json]
|
190
|
+
def generate_candidates_paren(target, param)
|
163
191
|
# analyze options of the parenthesis
|
164
192
|
paren_prefix = target["prefix"]
|
165
193
|
# pp target["prefix"]
|
166
194
|
if(paren_prefix == "<=")
|
167
|
-
lb_result = generate_candidates(
|
195
|
+
lb_result = generate_candidates(target["value"], param)
|
168
196
|
result = Regextest::Back::Element.new({cmd: :CMD_LOOK_BEHIND, result: lb_result})
|
169
197
|
elsif(paren_prefix == "=")
|
170
|
-
la_result = generate_candidates(
|
198
|
+
la_result = generate_candidates(target["value"], param)
|
171
199
|
result = Regextest::Back::Element.new({cmd: :CMD_LOOK_AHEAD, result: la_result})
|
172
200
|
elsif(paren_prefix == "<!")
|
173
|
-
|
174
|
-
result =
|
201
|
+
param[:negative] = :CMD_NOT_LOOK_BEHIND
|
202
|
+
result = generate_candidates(target["value"], param)
|
175
203
|
elsif(paren_prefix == "!")
|
176
|
-
|
177
|
-
result =
|
204
|
+
param[:negative] = :CMD_NOT_LOOK_AHEAD
|
205
|
+
result = generate_candidates(target["value"], param)
|
178
206
|
elsif(paren_prefix == ">") # atomic group
|
179
|
-
|
207
|
+
param[:atomic] = true
|
208
|
+
generate_string = generate_candidates(target["value"], param)
|
180
209
|
@parens_hash[target["refer_name"]][:generated] ||= []
|
181
210
|
@parens_hash[target["refer_name"]][:generated][@nest] = generate_string
|
182
211
|
result = generate_string
|
183
212
|
elsif(paren_prefix == "") # simple parenthesis
|
184
|
-
generate_string = generate_candidates(
|
213
|
+
generate_string = generate_candidates(target["value"], param)
|
185
214
|
@parens_hash[target["refer_name"]][:generated] ||= []
|
186
215
|
@parens_hash[target["refer_name"]][:generated][@nest] = generate_string
|
187
216
|
result = generate_string
|
@@ -199,7 +228,8 @@ class Regextest::Back::Main
|
|
199
228
|
if(select_num == 1 && target["value"]["type"] != "LEX_SELECT")
|
200
229
|
result = nil
|
201
230
|
else
|
202
|
-
|
231
|
+
param[:forced_select] = select_num
|
232
|
+
generate_string = generate_candidates(target["value"], param)
|
203
233
|
|
204
234
|
@parens_hash[target["refer_name"]][:generated] ||= []
|
205
235
|
@parens_hash[target["refer_name"]][:generated][@nest] = generate_string
|
@@ -210,11 +240,11 @@ class Regextest::Back::Main
|
|
210
240
|
end
|
211
241
|
|
212
242
|
# char class
|
213
|
-
def generate_candidates_char_class(param)
|
214
|
-
|
215
|
-
results = Regextest::Back::Element.new({cmd: :CMD_SELECT,
|
243
|
+
def generate_candidates_char_class(target, param)
|
244
|
+
charset = target["charset"]
|
245
|
+
results = Regextest::Back::Element.new({cmd: :CMD_SELECT, ranges: [], charset: charset})
|
216
246
|
target["value"].each do | elem |
|
217
|
-
if sub_results = generate_candidates(
|
247
|
+
if sub_results = generate_candidates(elem, param)
|
218
248
|
results.union sub_results
|
219
249
|
end
|
220
250
|
end
|
@@ -227,12 +257,12 @@ class Regextest::Back::Main
|
|
227
257
|
end
|
228
258
|
|
229
259
|
# repeat
|
230
|
-
def generate_candidates_repeat(param)
|
231
|
-
target = param[:json]
|
260
|
+
def generate_candidates_repeat(target, param)
|
232
261
|
max_repeat = target["max_repeat"]
|
233
262
|
min_repeat = target["min_repeat"]
|
234
263
|
|
235
|
-
if
|
264
|
+
# reduce repeat count if retry and there are one or more \g<foo> calls
|
265
|
+
if @retry_count > 0 && @past_max_nest > 0
|
236
266
|
@retry_count.times{ max_repeat = (max_repeat + 1)/2 }
|
237
267
|
end
|
238
268
|
|
@@ -243,13 +273,15 @@ class Regextest::Back::Main
|
|
243
273
|
else
|
244
274
|
repeat = min_repeat
|
245
275
|
end
|
276
|
+
|
246
277
|
result = []
|
247
278
|
if target["repeat_option"].index("reluctant")
|
248
279
|
result.push Regextest::Back::Element.new({cmd: :CMD_ANC_RELUCTANT_BEGIN, id: target["id"]})
|
249
280
|
end
|
281
|
+
|
250
282
|
# puts "repeat=#{repeat} quit=#{@quit_mode} nest=#{@nest}"
|
251
283
|
repeat.times do
|
252
|
-
if( elem = generate_candidates(
|
284
|
+
if( elem = generate_candidates(target["value"], param))
|
253
285
|
result.push elem
|
254
286
|
else
|
255
287
|
result = nil
|
@@ -263,23 +295,28 @@ class Regextest::Back::Main
|
|
263
295
|
break if elem[0].command == :CMD_ANC_STRING_BEGIN
|
264
296
|
end
|
265
297
|
end
|
298
|
+
|
266
299
|
if target["repeat_option"].index("reluctant")
|
267
300
|
result.push Regextest::Back::Element.new({cmd: :CMD_ANC_RELUCTANT_END, id: target["id"]})
|
268
301
|
end
|
302
|
+
|
303
|
+
if target["repeat_option"].index("possessive")
|
304
|
+
la_result = [ generate_candidates(target["value"], param) ].flatten
|
305
|
+
result.push Regextest::Back::Element.new({cmd: :CMD_NOT_LOOK_AHEAD, result: la_result})
|
306
|
+
end
|
269
307
|
result
|
270
308
|
end
|
271
309
|
|
272
310
|
# range
|
273
|
-
def generate_candidates_range(param)
|
274
|
-
|
311
|
+
def generate_candidates_range(target, param)
|
312
|
+
charset = target["charset"]
|
275
313
|
letter = []
|
276
|
-
codepoints = (target["begin"]..target["end"])
|
277
|
-
result = Regextest::Back::Element.new({cmd: :CMD_SELECT,
|
314
|
+
codepoints = (target["begin"]..target["end"])
|
315
|
+
result = Regextest::Back::Element.new({cmd: :CMD_SELECT, ranges: [codepoints], charset: charset})
|
278
316
|
end
|
279
317
|
|
280
318
|
# back_refer
|
281
|
-
def generate_candidates_back_refer(param)
|
282
|
-
target = param[:json]
|
319
|
+
def generate_candidates_back_refer(target, param)
|
283
320
|
if @parens_hash[target["refer_name"]][:generated]
|
284
321
|
relative_num = -1 # default value
|
285
322
|
if target["relative_num"] != ""
|
@@ -289,6 +326,13 @@ class Regextest::Back::Main
|
|
289
326
|
end
|
290
327
|
# puts "relative: #{relative_num}, nest=#{@nest}, :#{target}"
|
291
328
|
result = @parens_hash[target["refer_name"]][:generated][relative_num]
|
329
|
+
|
330
|
+
# Somehow /(^a)\1/ must match with "aa"
|
331
|
+
if result.size > 0 &&
|
332
|
+
(result[0].command == :CMD_ANC_LINE_BEGIN ||
|
333
|
+
result[0].command == :CMD_ANC_STRING_BEGIN)
|
334
|
+
result = result[1..-1] # ignore first anchor
|
335
|
+
end
|
292
336
|
else
|
293
337
|
result = nil
|
294
338
|
end
|
@@ -296,17 +340,17 @@ class Regextest::Back::Main
|
|
296
340
|
end
|
297
341
|
|
298
342
|
# named generate
|
299
|
-
def generate_candidates_named_generate(param)
|
300
|
-
target = param[:json]
|
343
|
+
def generate_candidates_named_generate(target, param)
|
301
344
|
@quit_mode = true if(@nest >= @max_nest)
|
302
345
|
if(@quit_mode)
|
303
346
|
result = nil
|
304
347
|
else
|
305
348
|
@nest += 1
|
349
|
+
@past_max_nest = @nest if @nest > @past_max_nest
|
306
350
|
if target["refer_name"] == "$$_0" # recursively call whole expression
|
307
|
-
result = generate_candidates(
|
351
|
+
result = generate_candidates(@json_obj, param)
|
308
352
|
else
|
309
|
-
result = generate_candidates(
|
353
|
+
result = generate_candidates(@parens_hash[target["refer_name"]][:target], param)
|
310
354
|
end
|
311
355
|
@nest -= 1
|
312
356
|
end
|
@@ -314,14 +358,20 @@ class Regextest::Back::Main
|
|
314
358
|
end
|
315
359
|
|
316
360
|
# char
|
317
|
-
def generate_candidates_char(param)
|
318
|
-
|
361
|
+
def generate_candidates_char(target, param)
|
362
|
+
charset = target["charset"]
|
319
363
|
case target["value"]
|
320
364
|
when String
|
321
365
|
codepoint = target["value"].unpack("U*")[0]
|
322
|
-
result = Regextest::Back::Element.new(
|
366
|
+
result = Regextest::Back::Element.new(
|
367
|
+
{
|
368
|
+
cmd: :CMD_SELECT,
|
369
|
+
ranges: [codepoint..codepoint],
|
370
|
+
charset: charset
|
371
|
+
}
|
372
|
+
)
|
323
373
|
else
|
324
|
-
result = generate_candidates(
|
374
|
+
result = generate_candidates(target["value"], param)
|
325
375
|
end
|
326
376
|
result
|
327
377
|
end
|
@@ -351,7 +401,8 @@ class Regextest::Back::Main
|
|
351
401
|
:CMD_ANC_STRING_BEGIN, :CMD_ANC_STRING_END, :CMD_ANC_STRING_END2, :CMD_ANC_MATCH_START,
|
352
402
|
:CMD_ANC_LOOK_BEHIND2
|
353
403
|
results.add_anchor(command)
|
354
|
-
when :CMD_ANC_RELUCTANT_BEGIN, :CMD_ANC_RELUCTANT_END
|
404
|
+
when :CMD_ANC_RELUCTANT_BEGIN, :CMD_ANC_RELUCTANT_END,
|
405
|
+
:CMD_ANC_POSSESSIVE_BEGIN, :CMD_ANC_POSSESSIVE_END
|
355
406
|
results.add_reluctant_repeat(elem)
|
356
407
|
else
|
357
408
|
raise "inner error, invalid command at checking anchors: #{command}"
|