regextest 0.1.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +11 -0
- data/.rspec +2 -0
- data/.travis.yml +3 -0
- data/Gemfile +4 -0
- data/LICENSE.txt +25 -0
- data/README.md +88 -0
- data/Rakefile +55 -0
- data/bin/console +14 -0
- data/bin/regextest +4 -0
- data/bin/setup +7 -0
- data/contrib/Onigmo/RE.txt +522 -0
- data/contrib/Onigmo/UnicodeProps.txt +728 -0
- data/contrib/Onigmo/testpy.py +1319 -0
- data/contrib/unicode/Blocks.txt +298 -0
- data/contrib/unicode/CaseFolding.txt +1414 -0
- data/contrib/unicode/DerivedAge.txt +1538 -0
- data/contrib/unicode/DerivedCoreProperties.txt +11029 -0
- data/contrib/unicode/PropList.txt +1525 -0
- data/contrib/unicode/PropertyAliases.txt +193 -0
- data/contrib/unicode/PropertyValueAliases.txt +1420 -0
- data/contrib/unicode/README.txt +25 -0
- data/contrib/unicode/Scripts.txt +2539 -0
- data/contrib/unicode/UnicodeData.txt +29215 -0
- data/lib/pre-case-folding.rb +101 -0
- data/lib/pre-posix-char-class.rb +150 -0
- data/lib/pre-unicode.rb +116 -0
- data/lib/regextest.rb +268 -0
- data/lib/regextest/back.rb +58 -0
- data/lib/regextest/back/element.rb +151 -0
- data/lib/regextest/back/main.rb +356 -0
- data/lib/regextest/back/result.rb +498 -0
- data/lib/regextest/back/test-case.rb +268 -0
- data/lib/regextest/back/work-thread.rb +119 -0
- data/lib/regextest/common.rb +63 -0
- data/lib/regextest/front.rb +60 -0
- data/lib/regextest/front/anchor.rb +45 -0
- data/lib/regextest/front/back-refer.rb +120 -0
- data/lib/regextest/front/bracket-parser.rb +400 -0
- data/lib/regextest/front/bracket-parser.y +117 -0
- data/lib/regextest/front/bracket-scanner.rb +124 -0
- data/lib/regextest/front/bracket.rb +64 -0
- data/lib/regextest/front/builtin-functions.rb +31 -0
- data/lib/regextest/front/case-folding.rb +18 -0
- data/lib/regextest/front/char-class.rb +243 -0
- data/lib/regextest/front/empty.rb +43 -0
- data/lib/regextest/front/letter.rb +327 -0
- data/lib/regextest/front/manage-parentheses.rb +74 -0
- data/lib/regextest/front/parenthesis.rb +153 -0
- data/lib/regextest/front/parser.rb +1366 -0
- data/lib/regextest/front/parser.y +271 -0
- data/lib/regextest/front/range.rb +60 -0
- data/lib/regextest/front/repeat.rb +90 -0
- data/lib/regextest/front/repeatable.rb +77 -0
- data/lib/regextest/front/scanner.rb +187 -0
- data/lib/regextest/front/selectable.rb +65 -0
- data/lib/regextest/front/sequence.rb +73 -0
- data/lib/regextest/front/unicode.rb +1272 -0
- data/lib/regextest/regex-option.rb +144 -0
- data/lib/regextest/regexp.rb +44 -0
- data/lib/regextest/version.rb +5 -0
- data/lib/tst-reg-test.rb +159 -0
- data/regextest.gemspec +26 -0
- metadata +162 -0
@@ -0,0 +1,58 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
# Copyright (C) 2016 Mikio Ikoma
|
4
|
+
|
5
|
+
class Regextest::Back; end
|
6
|
+
require 'regextest/common'
|
7
|
+
require 'regextest/back/main'
|
8
|
+
|
9
|
+
# Backend class of regextest. Generate matched string
|
10
|
+
class Regextest::Back
|
11
|
+
include Regextest::Common
|
12
|
+
|
13
|
+
# Constructor
|
14
|
+
def initialize(json_obj)
|
15
|
+
@reg_source = @@parse_options[:reg_source]
|
16
|
+
@json_obj = json_obj
|
17
|
+
|
18
|
+
# COMMENTED OUT at present
|
19
|
+
# make a hash to manage names and corresponding objects
|
20
|
+
# @name_hash = make_name_hash(@json_obj, {})
|
21
|
+
# get test cases (commented at present)
|
22
|
+
# @test_info = Regextest::Back::TestCase.new(@json_obj, @name_hash)
|
23
|
+
|
24
|
+
# default max recursion is 8.
|
25
|
+
@max_nest = TstConstRecursionMax
|
26
|
+
end
|
27
|
+
|
28
|
+
# A public method that generates string to match the regexp
|
29
|
+
def generate
|
30
|
+
generate_obj = Regextest::Back::Main.new(@json_obj, @max_nest)
|
31
|
+
generate_obj.generate
|
32
|
+
end
|
33
|
+
|
34
|
+
# make a hash to manage names and corresponding objects
|
35
|
+
def make_name_hash(target, name_hash)
|
36
|
+
# register id (and refer-name in case of parenthesis)
|
37
|
+
raise "Internal error: found duplicate id #{target["id"]}" if target["id"] && name_hash[target["id"]]
|
38
|
+
name_hash[target["id"]] = target
|
39
|
+
name_hash[target["refer_name"]] = target if(target["type"] == "LEX_PAREN")
|
40
|
+
|
41
|
+
# recursively register names
|
42
|
+
if(target["value"])
|
43
|
+
if( Array === target["value"])
|
44
|
+
target["value"].each{|child| make_name_hash(child, name_hash)}
|
45
|
+
else
|
46
|
+
make_name_hash(target["value"], name_hash)
|
47
|
+
end
|
48
|
+
end
|
49
|
+
name_hash
|
50
|
+
end
|
51
|
+
|
52
|
+
end
|
53
|
+
|
54
|
+
# Test suite (execute when this file is specified in command line)
|
55
|
+
if __FILE__ == $0
|
56
|
+
|
57
|
+
end
|
58
|
+
|
@@ -0,0 +1,151 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
# Copyright (C) 2016 Mikio Ikoma
|
4
|
+
|
5
|
+
require "pp"
|
6
|
+
require 'regextest/common'
|
7
|
+
|
8
|
+
class Regextest::Back::Element
|
9
|
+
include Regextest::Common
|
10
|
+
def initialize(param)
|
11
|
+
@command = param[:cmd]
|
12
|
+
@param = param
|
13
|
+
@candidates = param[:data] if @command == :CMD_SELECT
|
14
|
+
end
|
15
|
+
|
16
|
+
attr_reader :param, :command, :candidates
|
17
|
+
|
18
|
+
# random fix
|
19
|
+
def random_fix
|
20
|
+
if @command == :CMD_SELECT
|
21
|
+
offset = (@candidates.size > 1)?TstRand(@candidates.size):0
|
22
|
+
result = @candidates[offset]
|
23
|
+
@candidates = [result] # fixed!
|
24
|
+
else
|
25
|
+
raise "invalid command at random_fix: #{@command}"
|
26
|
+
end
|
27
|
+
result
|
28
|
+
end
|
29
|
+
|
30
|
+
# size of candidates
|
31
|
+
def size
|
32
|
+
if(@candidates)
|
33
|
+
@candidates.size
|
34
|
+
else
|
35
|
+
raise "internal error: candidates not found at size-method"
|
36
|
+
end
|
37
|
+
end
|
38
|
+
|
39
|
+
# [] of candidates
|
40
|
+
def [](num)
|
41
|
+
if(@candidates)
|
42
|
+
@candidates[num]
|
43
|
+
else
|
44
|
+
raise "internal error: candidates not found at at-method"
|
45
|
+
end
|
46
|
+
end
|
47
|
+
|
48
|
+
# narrow down candidates
|
49
|
+
def intersect(other_obj)
|
50
|
+
raise "invalid command at intersect" if(other_obj.command != :CMD_SELECT)
|
51
|
+
work = @candidates & other_obj.candidates
|
52
|
+
if work.size > 0
|
53
|
+
@candidates = work
|
54
|
+
else
|
55
|
+
nil
|
56
|
+
end
|
57
|
+
end
|
58
|
+
|
59
|
+
# exclude
|
60
|
+
def exclude(other_obj)
|
61
|
+
raise "invalid command at exclude" if(other_obj.command != :CMD_SELECT)
|
62
|
+
work = @candidates - other_obj.candidates
|
63
|
+
if work.size > 0
|
64
|
+
@candidates = work
|
65
|
+
else
|
66
|
+
nil
|
67
|
+
end
|
68
|
+
end
|
69
|
+
|
70
|
+
# join candidates
|
71
|
+
def union(other_obj)
|
72
|
+
raise "invalid command at union" if(other_obj.command != :CMD_SELECT)
|
73
|
+
#@candidates |= other_obj.candidates
|
74
|
+
@candidates += other_obj.candidates # to be faster
|
75
|
+
end
|
76
|
+
|
77
|
+
# for simple pretty print
|
78
|
+
def inspect
|
79
|
+
case @command
|
80
|
+
when :CMD_SELECT
|
81
|
+
if(@candidates)
|
82
|
+
@candidates.inspect
|
83
|
+
else
|
84
|
+
@param[:data].inspect
|
85
|
+
end
|
86
|
+
when :CMD_LOOK_BEHIND, :CMD_LOOK_AHEAD, :CMD_NOT_LOOK_BEHIND, :CMD_NOT_LOOK_AHEAD
|
87
|
+
@param.inspect
|
88
|
+
when :CMD_ANC_LINE_BEGIN, :CMD_ANC_LINE_END, :CMD_ANC_WORD_BOUND, :CMD_ANC_WORD_UNBOUND,
|
89
|
+
:CMD_ANC_STRING_BEGIN, :CMD_ANC_STRING_END, :CMD_ANC_STRING_END2, :CMD_ANC_MATCH_START,
|
90
|
+
:CMD_ANC_LOOK_BEHIND2, :CMD_ANC_RELUCTANT_BEGIN, :CMD_ANC_RELUCTANT_END
|
91
|
+
@param.inspect
|
92
|
+
else
|
93
|
+
raise "inner error, invalid command #{@command}"
|
94
|
+
end
|
95
|
+
end
|
96
|
+
|
97
|
+
# Includes new line or not
|
98
|
+
def new_line?
|
99
|
+
@candidates.index("\n")
|
100
|
+
end
|
101
|
+
|
102
|
+
# Sets new line
|
103
|
+
def set_new_line
|
104
|
+
@candidates = ["\n"]
|
105
|
+
end
|
106
|
+
|
107
|
+
# Is word-elements only?
|
108
|
+
def word_elements?
|
109
|
+
@candidates.join("").match(/^\p{Word}+$/)
|
110
|
+
end
|
111
|
+
|
112
|
+
# is non-word-elements only?
|
113
|
+
def non_word_elements?
|
114
|
+
@candidates.join("").match(/^\p{^Word}+$/)
|
115
|
+
end
|
116
|
+
|
117
|
+
# set word-elements
|
118
|
+
def set_word_elements
|
119
|
+
@candidates.select!{|elem| elem.match(/^\w$/)}
|
120
|
+
end
|
121
|
+
|
122
|
+
# set non_word-elements
|
123
|
+
def set_non_word_elements
|
124
|
+
@candidates.select!{|elem| elem.match(/^\W$/)}
|
125
|
+
end
|
126
|
+
|
127
|
+
# checks empty
|
128
|
+
def empty?
|
129
|
+
@candidates.size == 0
|
130
|
+
end
|
131
|
+
|
132
|
+
# factory method to generate any char element
|
133
|
+
def self.any_char
|
134
|
+
# BUG: must consider other character set!
|
135
|
+
Regextest::Back::Element.new({cmd: :CMD_SELECT, data: (" ".."\x7e").to_a})
|
136
|
+
end
|
137
|
+
|
138
|
+
# factory method to generate any char element
|
139
|
+
def reverse
|
140
|
+
@candidates = ((" ".."\x7e").to_a) - @candidates
|
141
|
+
self
|
142
|
+
end
|
143
|
+
|
144
|
+
|
145
|
+
end
|
146
|
+
|
147
|
+
# Test suite (execute when this file is specified in command line)
|
148
|
+
if __FILE__ == $0
|
149
|
+
end
|
150
|
+
|
151
|
+
|
@@ -0,0 +1,356 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
# Copyright (C) 2016 Mikio Ikoma
|
4
|
+
|
5
|
+
require 'regextest/common'
|
6
|
+
require 'regextest/back/element'
|
7
|
+
require 'regextest/back/result'
|
8
|
+
|
9
|
+
# Main class of back-end. Construct candidates array and narrow down
|
10
|
+
class Regextest::Back::Main
|
11
|
+
include Regextest::Common
|
12
|
+
def initialize(json_obj, max_nest)
|
13
|
+
@json_obj = json_obj
|
14
|
+
@max_nest = max_nest
|
15
|
+
@parens_hash = {} # hash to keep string generated by parentheses
|
16
|
+
@nest = 0 # current nest of back-reference
|
17
|
+
@quit_mode = false # flag for preventing from increase of nest
|
18
|
+
# if true, \g<foo> is restrained if possible
|
19
|
+
end
|
20
|
+
|
21
|
+
def generate
|
22
|
+
# seek parentheses because there are references defined ahead
|
23
|
+
seek_parens(@json_obj)
|
24
|
+
|
25
|
+
# generate pre-result of matched string (pre-result contains candidates of letters)
|
26
|
+
pre_result = generate_candidates({json: @json_obj})
|
27
|
+
return nil unless pre_result
|
28
|
+
TstLog("pre_result1:\n" + pre_result.inspect)
|
29
|
+
|
30
|
+
# narrow down the candidates
|
31
|
+
result = narrow_down_candidates(pre_result)
|
32
|
+
TstLog("pre_result2:\n" + result.inspect)
|
33
|
+
return nil if !result || !result.narrow_down
|
34
|
+
|
35
|
+
# fixes result
|
36
|
+
result.fix
|
37
|
+
|
38
|
+
result
|
39
|
+
end
|
40
|
+
|
41
|
+
# seek parentheses
|
42
|
+
def seek_parens(target)
|
43
|
+
if(target["type"] == "LEX_PAREN")
|
44
|
+
@parens_hash[target["refer_name"]] = {:target => target}
|
45
|
+
end
|
46
|
+
if(target["value"])
|
47
|
+
if( Array === target["value"])
|
48
|
+
target["value"].each{|child| seek_parens(child)}
|
49
|
+
else
|
50
|
+
seek_parens(target["value"])
|
51
|
+
end
|
52
|
+
end
|
53
|
+
end
|
54
|
+
|
55
|
+
# generate pre-result of matched string (pre-result contains candidates of letters)
|
56
|
+
def generate_candidates(param)
|
57
|
+
target = param[:json]
|
58
|
+
# puts "MATCH type:#{target["type"]}"
|
59
|
+
|
60
|
+
result = nil
|
61
|
+
case target["type"]
|
62
|
+
when "LEX_SEQ" # sequence of letters or parentheses
|
63
|
+
result = generate_candidates_seq(param)
|
64
|
+
when "LEX_SELECT"
|
65
|
+
result = generate_candidates_select(param)
|
66
|
+
when "LEX_PAREN"
|
67
|
+
result = generate_candidates_paren(param)
|
68
|
+
when "LEX_CHAR_CLASS"
|
69
|
+
result = generate_candidates_char_class(param)
|
70
|
+
when "LEX_BRACKET", "LEX_SIMPLIFIED_CLASS", "LEX_ANY_LETTER", "LEX_POSIX_CHAR_CLASS", "LEX_UNICODE_CLASS"
|
71
|
+
result = generate_candidates({json: target["value"]})
|
72
|
+
when "LEX_REPEAT"
|
73
|
+
result = generate_candidates_repeat(param)
|
74
|
+
when "LEX_RANGE"
|
75
|
+
result = generate_candidates_range(param)
|
76
|
+
when "LEX_BACK_REFER", "LEX_NAMED_REFER"
|
77
|
+
result = generate_candidates_back_refer(param)
|
78
|
+
when "LEX_NAMED_GENERATE"
|
79
|
+
result = generate_candidates_named_generate(param)
|
80
|
+
when "LEX_CHAR"
|
81
|
+
result = generate_candidates_char(param)
|
82
|
+
when "LEX_ANC_LINE_BEGIN"
|
83
|
+
result = Regextest::Back::Element.new({cmd: :CMD_ANC_LINE_BEGIN})
|
84
|
+
when "LEX_ANC_LINE_END"
|
85
|
+
result = Regextest::Back::Element.new({cmd: :CMD_ANC_LINE_END})
|
86
|
+
when "LEX_ANC_WORD_BOUND"
|
87
|
+
result = Regextest::Back::Element.new({cmd: :CMD_ANC_WORD_BOUND})
|
88
|
+
when "LEX_ANC_WORD_UNBOUND"
|
89
|
+
result = Regextest::Back::Element.new({cmd: :CMD_ANC_WORD_UNBOUND})
|
90
|
+
when "LEX_ANC_STRING_BEGIN"
|
91
|
+
result = Regextest::Back::Element.new({cmd: :CMD_ANC_STRING_BEGIN})
|
92
|
+
when "LEX_ANC_STRING_END"
|
93
|
+
result = Regextest::Back::Element.new({cmd: :CMD_ANC_STRING_END})
|
94
|
+
when "LEX_ANC_STRING_END2"
|
95
|
+
result = Regextest::Back::Element.new({cmd: :CMD_ANC_STRING_END2})
|
96
|
+
when "LEX_ANC_MATCH_START"
|
97
|
+
result = Regextest::Back::Element.new({cmd: :CMD_ANC_MATCH_START})
|
98
|
+
when "LEX_ANC_LOOK_BEHIND2"
|
99
|
+
result = Regextest::Back::Element.new({cmd: :CMD_ANC_LOOK_BEHIND2})
|
100
|
+
when "LEX_OPTION_PAREN" # options are processed at front-end
|
101
|
+
result = []
|
102
|
+
when "LEX_EMPTY"
|
103
|
+
result = []
|
104
|
+
else
|
105
|
+
raise "#{target["type"]} not implemented (from generate_candidates routine)"
|
106
|
+
end
|
107
|
+
result
|
108
|
+
end
|
109
|
+
|
110
|
+
# sequence of letters or parentheses
|
111
|
+
def generate_candidates_seq(param)
|
112
|
+
target = param[:json]
|
113
|
+
results = []
|
114
|
+
target["value"].each do |elem|
|
115
|
+
generated_string = generate_candidates({json: elem})
|
116
|
+
if(Array === generated_string)
|
117
|
+
generated_string.flatten!(1)
|
118
|
+
results += generated_string
|
119
|
+
else
|
120
|
+
results.push generated_string
|
121
|
+
end
|
122
|
+
end
|
123
|
+
# nil if one element failed
|
124
|
+
if(results.index(nil))
|
125
|
+
result = nil
|
126
|
+
else
|
127
|
+
# result = results.join("")
|
128
|
+
result = results
|
129
|
+
end
|
130
|
+
result
|
131
|
+
end
|
132
|
+
|
133
|
+
# selection of sequence. such as (aa|b|c)
|
134
|
+
def generate_candidates_select(param)
|
135
|
+
target = param[:json]
|
136
|
+
if param[:forced_select]
|
137
|
+
# index is specified by condition
|
138
|
+
if target["value"][param[:forced_select]]
|
139
|
+
result = generate_candidates({json: target["value"][param[:forced_select]]})
|
140
|
+
else
|
141
|
+
# regexp such as /^(?:b|(a))(?(1)1)$/ match "b"!
|
142
|
+
result = []
|
143
|
+
end
|
144
|
+
else
|
145
|
+
# success if there is at least one result
|
146
|
+
offsets = (0 ... target["value"].size).to_a
|
147
|
+
if !param[:atomic] && offsets.size > 1
|
148
|
+
offsets = TstShuffle(offsets) # shuffle if not atomic group (this proceduce is not sufficient...)
|
149
|
+
end
|
150
|
+
result = nil
|
151
|
+
offsets.each do | offset |
|
152
|
+
result = generate_candidates({json: target["value"][offset]})
|
153
|
+
break if(result)
|
154
|
+
end
|
155
|
+
end
|
156
|
+
result
|
157
|
+
end
|
158
|
+
|
159
|
+
# parenthesis
|
160
|
+
def generate_candidates_paren(param)
|
161
|
+
target = param[:json]
|
162
|
+
# analyze options of the parenthesis
|
163
|
+
paren_prefix = target["prefix"]
|
164
|
+
# pp target["prefix"]
|
165
|
+
if(paren_prefix == "<=")
|
166
|
+
lb_result = generate_candidates({json: target["value"]})
|
167
|
+
result = Regextest::Back::Element.new({cmd: :CMD_LOOK_BEHIND, result: lb_result})
|
168
|
+
elsif(paren_prefix == "=")
|
169
|
+
la_result = generate_candidates({json: target["value"]})
|
170
|
+
result = Regextest::Back::Element.new({cmd: :CMD_LOOK_AHEAD, result: la_result})
|
171
|
+
elsif(paren_prefix == "<!")
|
172
|
+
lb_result = generate_candidates({json: target["value"]})
|
173
|
+
result = Regextest::Back::Element.new({cmd: :CMD_NOT_LOOK_BEHIND, result: lb_result})
|
174
|
+
elsif(paren_prefix == "!")
|
175
|
+
la_result = generate_candidates({json: target["value"]})
|
176
|
+
result = Regextest::Back::Element.new({cmd: :CMD_NOT_LOOK_AHEAD, result: la_result})
|
177
|
+
elsif(paren_prefix == ">") # atomic group
|
178
|
+
generate_string = generate_candidates({json: target["value"], atomic: true})
|
179
|
+
@parens_hash[target["refer_name"]][:generated] ||= []
|
180
|
+
@parens_hash[target["refer_name"]][:generated][@nest] = generate_string
|
181
|
+
result = generate_string
|
182
|
+
elsif(paren_prefix == "") # simple parenthesis
|
183
|
+
generate_string = generate_candidates({json: target["value"]})
|
184
|
+
@parens_hash[target["refer_name"]][:generated] ||= []
|
185
|
+
@parens_hash[target["refer_name"]][:generated][@nest] = generate_string
|
186
|
+
result = generate_string
|
187
|
+
else
|
188
|
+
# when condition is specified
|
189
|
+
select_num = nil
|
190
|
+
if(target["condition_name"] && target["condition_name"].length > 0)
|
191
|
+
if @parens_hash[target["condition_name"]][:generated]
|
192
|
+
select_num = 0
|
193
|
+
else
|
194
|
+
select_num = 1
|
195
|
+
end
|
196
|
+
end
|
197
|
+
|
198
|
+
if(select_num == 1 && target["value"]["type"] != "LEX_SELECT")
|
199
|
+
result = nil
|
200
|
+
else
|
201
|
+
generate_string = generate_candidates({json: target["value"], forced_select: select_num})
|
202
|
+
|
203
|
+
@parens_hash[target["refer_name"]][:generated] ||= []
|
204
|
+
@parens_hash[target["refer_name"]][:generated][@nest] = generate_string
|
205
|
+
result = generate_string
|
206
|
+
end
|
207
|
+
end
|
208
|
+
result
|
209
|
+
end
|
210
|
+
|
211
|
+
# char class
|
212
|
+
def generate_candidates_char_class(param)
|
213
|
+
target = param[:json]
|
214
|
+
results = Regextest::Back::Element.new({cmd: :CMD_SELECT, data: []})
|
215
|
+
target["value"].each do | elem |
|
216
|
+
if sub_results = generate_candidates({json: elem})
|
217
|
+
results.union sub_results
|
218
|
+
end
|
219
|
+
end
|
220
|
+
if results.size > 0
|
221
|
+
result = results
|
222
|
+
else
|
223
|
+
result = nil
|
224
|
+
end
|
225
|
+
result
|
226
|
+
end
|
227
|
+
|
228
|
+
# repeat
|
229
|
+
def generate_candidates_repeat(param)
|
230
|
+
target = param[:json]
|
231
|
+
if(@quit_mode)
|
232
|
+
repeat = target["min_repeat"]
|
233
|
+
elsif(target["max_repeat"] > target["min_repeat"])
|
234
|
+
repeat = target["min_repeat"]+TstRand(target["max_repeat"]-target["min_repeat"]+1)
|
235
|
+
else
|
236
|
+
repeat = target["min_repeat"]
|
237
|
+
end
|
238
|
+
result = []
|
239
|
+
if target["repeat_option"].index("reluctant")
|
240
|
+
result.push Regextest::Back::Element.new({cmd: :CMD_ANC_RELUCTANT_BEGIN, id: target["id"]})
|
241
|
+
end
|
242
|
+
# puts "repeat=#{repeat} quit=#{@quit_mode} nest=#{@nest}"
|
243
|
+
repeat.times do
|
244
|
+
if( elem = generate_candidates({json: target["value"]}))
|
245
|
+
result.push elem
|
246
|
+
else
|
247
|
+
result = nil
|
248
|
+
break
|
249
|
+
end
|
250
|
+
|
251
|
+
# quit to repeat if the first element is begin anchor
|
252
|
+
elem.flatten! if Array === elem # flatten considering duplicated repeat
|
253
|
+
if elem.size > 0 && elem[0].respond_to?(:command) && elem[-1].respond_to?(:command)
|
254
|
+
break if elem[0].command == :CMD_ANC_LINE_BEGIN && !elem[-1].new_line?
|
255
|
+
break if elem[0].command == :CMD_ANC_STRING_BEGIN
|
256
|
+
end
|
257
|
+
end
|
258
|
+
if target["repeat_option"].index("reluctant")
|
259
|
+
result.push Regextest::Back::Element.new({cmd: :CMD_ANC_RELUCTANT_END, id: target["id"]})
|
260
|
+
end
|
261
|
+
result
|
262
|
+
end
|
263
|
+
|
264
|
+
# range
|
265
|
+
def generate_candidates_range(param)
|
266
|
+
target = param[:json]
|
267
|
+
letter = []
|
268
|
+
codepoints = (target["begin"]..target["end"]).to_a
|
269
|
+
letter = codepoints.map{| codepoint | [codepoint].pack("U*")} # to be faster
|
270
|
+
result = Regextest::Back::Element.new({cmd: :CMD_SELECT, data: letter})
|
271
|
+
end
|
272
|
+
|
273
|
+
# back_refer
|
274
|
+
def generate_candidates_back_refer(param)
|
275
|
+
target = param[:json]
|
276
|
+
if @parens_hash[target["refer_name"]][:generated]
|
277
|
+
relative_num = (target["relative_num"]=="")?(-1):(@nest + target["relative_num"].to_i)
|
278
|
+
result = @parens_hash[target["refer_name"]][:generated][relative_num]
|
279
|
+
else
|
280
|
+
result = nil
|
281
|
+
end
|
282
|
+
result
|
283
|
+
end
|
284
|
+
|
285
|
+
# named generate
|
286
|
+
def generate_candidates_named_generate(param)
|
287
|
+
target = param[:json]
|
288
|
+
@quit_mode = true if(@nest >= @max_nest)
|
289
|
+
if(@quit_mode)
|
290
|
+
result = nil
|
291
|
+
else
|
292
|
+
@nest += 1
|
293
|
+
if target["refer_name"] == "$$_0" # recursively call whole expression
|
294
|
+
result = generate_candidates({json: @json_obj})
|
295
|
+
else
|
296
|
+
result = generate_candidates({json: @parens_hash[target["refer_name"]][:target]})
|
297
|
+
end
|
298
|
+
@nest -= 1
|
299
|
+
end
|
300
|
+
result
|
301
|
+
end
|
302
|
+
|
303
|
+
# char
|
304
|
+
def generate_candidates_char(param)
|
305
|
+
target = param[:json]
|
306
|
+
case target["value"]
|
307
|
+
when String
|
308
|
+
result = Regextest::Back::Element.new({cmd: :CMD_SELECT, data: [target["value"]]})
|
309
|
+
else
|
310
|
+
result = generate_candidates({json: target["value"]})
|
311
|
+
end
|
312
|
+
result
|
313
|
+
end
|
314
|
+
|
315
|
+
# narrow down candidates considering anchors
|
316
|
+
def narrow_down_candidates(candidate_array)
|
317
|
+
results = Regextest::Back::Result.new
|
318
|
+
candidate_array.each do | elem |
|
319
|
+
command = elem.command
|
320
|
+
case command
|
321
|
+
when :CMD_SELECT
|
322
|
+
results.push_body elem
|
323
|
+
when :CMD_LOOK_AHEAD, :CMD_NOT_LOOK_AHEAD
|
324
|
+
if(sub_results = narrow_down_candidates(elem.param[:result]))
|
325
|
+
results.add_look_ahead(command, sub_results)
|
326
|
+
else
|
327
|
+
return nil
|
328
|
+
end
|
329
|
+
when :CMD_LOOK_BEHIND, :CMD_NOT_LOOK_BEHIND
|
330
|
+
if(sub_results = narrow_down_candidates(elem.param[:result]))
|
331
|
+
results.add_look_behind(command, sub_results)
|
332
|
+
else
|
333
|
+
return nil
|
334
|
+
end
|
335
|
+
when :CMD_ANC_LINE_BEGIN, :CMD_ANC_LINE_END, :CMD_ANC_WORD_BOUND, :CMD_ANC_WORD_UNBOUND,
|
336
|
+
:CMD_ANC_STRING_BEGIN, :CMD_ANC_STRING_END, :CMD_ANC_STRING_END2, :CMD_ANC_MATCH_START,
|
337
|
+
:CMD_ANC_LOOK_BEHIND2
|
338
|
+
results.add_anchor(command)
|
339
|
+
when :CMD_ANC_RELUCTANT_BEGIN, :CMD_ANC_RELUCTANT_END
|
340
|
+
results.add_reluctant_repeat(elem)
|
341
|
+
else
|
342
|
+
raise "inner error, invalid command at checking anchors: #{command}"
|
343
|
+
end
|
344
|
+
end
|
345
|
+
if !results.merge
|
346
|
+
return nil
|
347
|
+
end
|
348
|
+
results
|
349
|
+
end
|
350
|
+
end
|
351
|
+
|
352
|
+
# Test suite (execute when this file is specified in command line)
|
353
|
+
if __FILE__ == $0
|
354
|
+
|
355
|
+
end
|
356
|
+
|