regextest 0.1.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.gitignore +11 -0
- data/.rspec +2 -0
- data/.travis.yml +3 -0
- data/Gemfile +4 -0
- data/LICENSE.txt +25 -0
- data/README.md +88 -0
- data/Rakefile +55 -0
- data/bin/console +14 -0
- data/bin/regextest +4 -0
- data/bin/setup +7 -0
- data/contrib/Onigmo/RE.txt +522 -0
- data/contrib/Onigmo/UnicodeProps.txt +728 -0
- data/contrib/Onigmo/testpy.py +1319 -0
- data/contrib/unicode/Blocks.txt +298 -0
- data/contrib/unicode/CaseFolding.txt +1414 -0
- data/contrib/unicode/DerivedAge.txt +1538 -0
- data/contrib/unicode/DerivedCoreProperties.txt +11029 -0
- data/contrib/unicode/PropList.txt +1525 -0
- data/contrib/unicode/PropertyAliases.txt +193 -0
- data/contrib/unicode/PropertyValueAliases.txt +1420 -0
- data/contrib/unicode/README.txt +25 -0
- data/contrib/unicode/Scripts.txt +2539 -0
- data/contrib/unicode/UnicodeData.txt +29215 -0
- data/lib/pre-case-folding.rb +101 -0
- data/lib/pre-posix-char-class.rb +150 -0
- data/lib/pre-unicode.rb +116 -0
- data/lib/regextest.rb +268 -0
- data/lib/regextest/back.rb +58 -0
- data/lib/regextest/back/element.rb +151 -0
- data/lib/regextest/back/main.rb +356 -0
- data/lib/regextest/back/result.rb +498 -0
- data/lib/regextest/back/test-case.rb +268 -0
- data/lib/regextest/back/work-thread.rb +119 -0
- data/lib/regextest/common.rb +63 -0
- data/lib/regextest/front.rb +60 -0
- data/lib/regextest/front/anchor.rb +45 -0
- data/lib/regextest/front/back-refer.rb +120 -0
- data/lib/regextest/front/bracket-parser.rb +400 -0
- data/lib/regextest/front/bracket-parser.y +117 -0
- data/lib/regextest/front/bracket-scanner.rb +124 -0
- data/lib/regextest/front/bracket.rb +64 -0
- data/lib/regextest/front/builtin-functions.rb +31 -0
- data/lib/regextest/front/case-folding.rb +18 -0
- data/lib/regextest/front/char-class.rb +243 -0
- data/lib/regextest/front/empty.rb +43 -0
- data/lib/regextest/front/letter.rb +327 -0
- data/lib/regextest/front/manage-parentheses.rb +74 -0
- data/lib/regextest/front/parenthesis.rb +153 -0
- data/lib/regextest/front/parser.rb +1366 -0
- data/lib/regextest/front/parser.y +271 -0
- data/lib/regextest/front/range.rb +60 -0
- data/lib/regextest/front/repeat.rb +90 -0
- data/lib/regextest/front/repeatable.rb +77 -0
- data/lib/regextest/front/scanner.rb +187 -0
- data/lib/regextest/front/selectable.rb +65 -0
- data/lib/regextest/front/sequence.rb +73 -0
- data/lib/regextest/front/unicode.rb +1272 -0
- data/lib/regextest/regex-option.rb +144 -0
- data/lib/regextest/regexp.rb +44 -0
- data/lib/regextest/version.rb +5 -0
- data/lib/tst-reg-test.rb +159 -0
- data/regextest.gemspec +26 -0
- metadata +162 -0
@@ -0,0 +1,43 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
# Copyright (C) 2016 Mikio Ikoma
|
4
|
+
|
5
|
+
require 'regextest/common'
|
6
|
+
|
7
|
+
# Empty part
|
8
|
+
module Regextest::Front::Empty
|
9
|
+
class TEmpty
|
10
|
+
include Regextest::Common
|
11
|
+
@@id = 0 # a class variable for generating unique name of element
|
12
|
+
|
13
|
+
# Constructor
|
14
|
+
def initialize
|
15
|
+
TstLog("Empty: ")
|
16
|
+
@offset = -1
|
17
|
+
@length = 0
|
18
|
+
end
|
19
|
+
|
20
|
+
attr_reader :offset, :length
|
21
|
+
|
22
|
+
# set options
|
23
|
+
def set_options(options)
|
24
|
+
TstLog("Empty set_options: #{options[:reg_options].inspect}");
|
25
|
+
# do nothing
|
26
|
+
self
|
27
|
+
end
|
28
|
+
|
29
|
+
# generate json format
|
30
|
+
def json
|
31
|
+
@@id += 1
|
32
|
+
"{" +
|
33
|
+
"\"type\": \"LEX_EMPTY\", \"id\": \"E#{@@id}\", \"value\": \"\", " +
|
34
|
+
"\"offset\": #{@offset}, \"length\": #{@length}" +
|
35
|
+
"}"
|
36
|
+
end
|
37
|
+
end
|
38
|
+
end
|
39
|
+
|
40
|
+
# Test suite (execute when this file is specified in command line)
|
41
|
+
if __FILE__ == $0
|
42
|
+
end
|
43
|
+
|
@@ -0,0 +1,327 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
# Copyright (C) 2016 Mikio Ikoma
|
4
|
+
|
5
|
+
require 'regextest/common'
|
6
|
+
require 'regextest/front/char-class' # character class element
|
7
|
+
require 'regextest/front/range' # range of character point
|
8
|
+
require 'regextest/regex-option'
|
9
|
+
require 'regextest/front/unicode'
|
10
|
+
|
11
|
+
# A letter
|
12
|
+
module Regextest::Front::Letter
|
13
|
+
class TLetter
|
14
|
+
include Regextest::Common
|
15
|
+
include Regextest::Front::CharClass
|
16
|
+
include Regextest::Front::Range
|
17
|
+
@@id = 0 # a class variable for generating unique name of element
|
18
|
+
@@unicode_ranges = {}
|
19
|
+
|
20
|
+
# Constructor
|
21
|
+
def initialize(type, val)
|
22
|
+
TstLog("TLetter: type:#{type}, value:#{val}")
|
23
|
+
@options = nil
|
24
|
+
@data_type = type
|
25
|
+
@value = val[0] || ""
|
26
|
+
@offset = val[1] || -1
|
27
|
+
@length = val[2] || 0
|
28
|
+
@obj = nil
|
29
|
+
end
|
30
|
+
|
31
|
+
attr_reader :offset, :length, :value
|
32
|
+
|
33
|
+
# generate character(s) corresponding type of the character
|
34
|
+
def set_attr(type, val)
|
35
|
+
case type
|
36
|
+
when :LEX_CHAR, :LEX_SPACE
|
37
|
+
@data_type = :LEX_CHAR
|
38
|
+
@obj = CharClass.new([ TRange.new(val)])
|
39
|
+
when :LEX_SIMPLE_ESCAPE
|
40
|
+
@data_type = :LEX_CHAR
|
41
|
+
@obj = CharClass.new([ TRange.new(val[1..1])])
|
42
|
+
when :LEX_CODE_LITERAL, :LEX_ESCAPED_LETTER, :LEX_UNICODE, :LEX_CONTROL_LETTER, :LEX_META_LETTER, :LEX_OCTET
|
43
|
+
@data_type = :LEX_CHAR
|
44
|
+
@obj = CharClass.new([ TRange.new(eval('"'+ val + '"'))]) # convert using ruby's eval
|
45
|
+
when :LEX_BRACKET
|
46
|
+
@obj = Regextest::Front::Bracket.new(val)
|
47
|
+
when :LEX_SIMPLIFIED_CLASS
|
48
|
+
@obj = generate_simplified_class(val)
|
49
|
+
when :LEX_POSIX_CHAR_CLASS
|
50
|
+
@obj = generate_char_class(val)
|
51
|
+
when :LEX_UNICODE_CLASS
|
52
|
+
@obj = generate_unicode_char(val)
|
53
|
+
when :LEX_ANY_LETTER
|
54
|
+
@obj = generate_any_char(val)
|
55
|
+
when :LEX_SPECIAL_LETTER
|
56
|
+
@obj = generate_special_char(val)
|
57
|
+
when :LEX_AND_AND
|
58
|
+
raise "Internal error: enexpected LEX_AND_AND"
|
59
|
+
@obj = CharClass.new([TRange.new(val)])
|
60
|
+
else
|
61
|
+
raise "Error: internal error, type:#{type} not implemented"
|
62
|
+
end
|
63
|
+
end
|
64
|
+
|
65
|
+
# generate whole set of letters (depends on option)
|
66
|
+
def generate_any_char(val)
|
67
|
+
if @options[:reg_options].is_unicode?
|
68
|
+
obj = CharClass.new(TstConstUnicodeCharSet)
|
69
|
+
else
|
70
|
+
obj = CharClass.new( [ TRange.new("\x20", "\x7e") ] )
|
71
|
+
end
|
72
|
+
|
73
|
+
if( @options[:reg_options].is_multiline? )
|
74
|
+
obj.add_ranges( [ TRange.new("\n") ] )
|
75
|
+
end
|
76
|
+
obj
|
77
|
+
end
|
78
|
+
|
79
|
+
# generate special character class
|
80
|
+
def generate_special_char(val)
|
81
|
+
@data_type = :LEX_CHAR
|
82
|
+
obj = nil
|
83
|
+
case val
|
84
|
+
when "\\R"
|
85
|
+
if @options[:reg_options].is_unicode?
|
86
|
+
# BUG: "\x0a\x0d" must be supported!
|
87
|
+
obj = CharClass.new(
|
88
|
+
[ TRange.new("\x0a", "\x0d"), TRange.new("\u{85}"),
|
89
|
+
TRange.new("\u{2028}", "\u{2029}") ]
|
90
|
+
)
|
91
|
+
else
|
92
|
+
# BUG: "\x0a\x0d" must be supported!
|
93
|
+
obj = CharClass.new(
|
94
|
+
[ TRange.new("\x0a", "\x0d") ]
|
95
|
+
)
|
96
|
+
end
|
97
|
+
when "\\X"
|
98
|
+
if @options[:reg_options].is_unicode?
|
99
|
+
# BUG: (?>\P{M}\p{M}*)
|
100
|
+
obj = CharClass.new("M")
|
101
|
+
obj.set_reverse(@options)
|
102
|
+
else
|
103
|
+
obj = CharClass.new(
|
104
|
+
[ TRange.new("\x20", "\x7e"), TRange.new("\n") ]
|
105
|
+
)
|
106
|
+
end
|
107
|
+
else
|
108
|
+
raise "Error: internal error, invalid special char: #{val}"
|
109
|
+
end
|
110
|
+
obj
|
111
|
+
end
|
112
|
+
|
113
|
+
# generate simplified character class
|
114
|
+
def generate_simplified_class(val)
|
115
|
+
obj = nil
|
116
|
+
case val
|
117
|
+
when "\\w"
|
118
|
+
if @options[:reg_options].is_unicode?
|
119
|
+
obj = CharClass.new("Letter|Mark|Number|Connector_Punctuation")
|
120
|
+
else
|
121
|
+
obj = CharClass.new(
|
122
|
+
[ TRange.new('a', 'z'), TRange.new('A', 'Z'),
|
123
|
+
TRange.new('0', '9'), TRange.new('_') ]
|
124
|
+
)
|
125
|
+
end
|
126
|
+
when "\\W"
|
127
|
+
obj = CharClass.new(
|
128
|
+
[ TRange.new("\x20", "\x2f"), TRange.new("\x3a", "\x40"),
|
129
|
+
TRange.new("\x5b", "\x5e"), TRange.new("\x60"),
|
130
|
+
TRange.new("\x7b", "\x7e") ]
|
131
|
+
)
|
132
|
+
when "\\d"
|
133
|
+
if @options[:reg_options].is_unicode?
|
134
|
+
#obj = CharClass.new([ TRange.new('0', '9'), TRange.new('0', '9')])
|
135
|
+
obj = CharClass.new("Decimal_Number")
|
136
|
+
else
|
137
|
+
obj = CharClass.new(
|
138
|
+
[ TRange.new('0', '9') ]
|
139
|
+
)
|
140
|
+
end
|
141
|
+
when "\\D"
|
142
|
+
obj = CharClass.new(
|
143
|
+
[ TRange.new("\x20", "\x2f"), TRange.new("\x3a", "\x7e") ]
|
144
|
+
)
|
145
|
+
when "\\h"
|
146
|
+
obj = CharClass.new(
|
147
|
+
[ TRange.new('0', '9') , TRange.new('a', 'f'), TRange.new('A', 'F')]
|
148
|
+
)
|
149
|
+
when "\\H"
|
150
|
+
obj = CharClass.new(
|
151
|
+
[ TRange.new("\x20", "\x2f"), TRange.new("\x3a", "\x40"),
|
152
|
+
TRange.new("\x47", "\x60"), TRange.new("\x67", "\x7e")]
|
153
|
+
)
|
154
|
+
when "\\s"
|
155
|
+
ascii_ranges = [ TRange.new(' '), TRange.new("\x9", "\xd") ]
|
156
|
+
if @options[:reg_options].is_unicode?
|
157
|
+
obj = CharClass.new("Line_Separator|Paragraph_Separator|Space_Separator")
|
158
|
+
obj.add_ranges(ascii_ranges + [ TRange.new("\u{85}") ])
|
159
|
+
else
|
160
|
+
obj = CharClass.new(ascii_ranges)
|
161
|
+
end
|
162
|
+
when "\\S"
|
163
|
+
obj = CharClass.new(
|
164
|
+
[ TRange.new("\x21", "\x7e") ]
|
165
|
+
)
|
166
|
+
when "\\n", "\\r", "\\t", "\\f", "\\a", "\\e", "\\v"
|
167
|
+
obj = CharClass.new(
|
168
|
+
[ TRange.new(eval("\""+ string + "\"")) ]
|
169
|
+
)
|
170
|
+
when "\\b", "\\z", "\\A", "\\B", "\\G", "\\Z"
|
171
|
+
warn "Ignored unsupported escape char #{val}."
|
172
|
+
when "\\c", "\\x", "\\C", "\\M"
|
173
|
+
raise "Error: Unsupported escape char #{string}"
|
174
|
+
else
|
175
|
+
raise "Error: Invalid simplifiled class #{val}"
|
176
|
+
end
|
177
|
+
obj
|
178
|
+
end
|
179
|
+
|
180
|
+
# generate Unicode class (ie. \p{...} | \P{...})
|
181
|
+
def generate_unicode_char(val)
|
182
|
+
# Dynamic loading of Unicode regarding modules (for better performance).
|
183
|
+
# commented out since this code not executed at ruby 2.0.0
|
184
|
+
# require 'regextest/front/unicode'
|
185
|
+
|
186
|
+
if(md = val.match(/(p|P)\{(\^?)(\w+)\}/))
|
187
|
+
class_name = md[3].downcase
|
188
|
+
reverse = (md[2] && md[2]=="^")?true:false
|
189
|
+
|
190
|
+
# if not found at cache
|
191
|
+
if !@@unicode_ranges[class_name]
|
192
|
+
#work = Regextest::Front::Unicode.property(class_name) ||
|
193
|
+
# raise("Invalid Unicode class #{class_name} in #{val}")
|
194
|
+
# construct char class
|
195
|
+
#work = work.map{|elem| TRange.new(elem[0], elem[1])}
|
196
|
+
@@unicode_ranges[class_name] = CharClass.new(class_name)
|
197
|
+
end
|
198
|
+
else
|
199
|
+
raise "Internal error, inconsistent Unicode class #{val}"
|
200
|
+
end
|
201
|
+
|
202
|
+
# ¥P{^...} is equivalent to \p{...}
|
203
|
+
if((md[1] == "p" && !reverse) || (md[1] == "P" && reverse))
|
204
|
+
@@unicode_ranges[class_name]
|
205
|
+
else # \P{} or \p{^}
|
206
|
+
@@unicode_ranges[class_name].set_reverse(@options)
|
207
|
+
end
|
208
|
+
end
|
209
|
+
|
210
|
+
def classname_to_ranges(arrays)
|
211
|
+
end
|
212
|
+
|
213
|
+
# generate POSIX character class (ie. [[:alpha:]], etc.)
|
214
|
+
def generate_char_class(val)
|
215
|
+
if(md = val.match(/^\[\:(\^)?(\w+)\:\]$/))
|
216
|
+
reverse = (md[1] && md[1]=="^")?true:false
|
217
|
+
class_name = md[2]
|
218
|
+
else
|
219
|
+
raise "internal error, invalid POSIX class name(#{val})"
|
220
|
+
end
|
221
|
+
|
222
|
+
obj = nil
|
223
|
+
if @options[:reg_options].is_unicode?
|
224
|
+
obj = CharClass.new(class_name)
|
225
|
+
else
|
226
|
+
case class_name
|
227
|
+
when 'alnum'
|
228
|
+
obj = CharClass.new(
|
229
|
+
[ TRange.new('a', 'z'), TRange.new('A', 'Z'),
|
230
|
+
TRange.new('0', '9') ]
|
231
|
+
)
|
232
|
+
when 'alpha'
|
233
|
+
obj = CharClass.new(
|
234
|
+
[ TRange.new('a', 'z'), TRange.new('A', 'Z') ]
|
235
|
+
)
|
236
|
+
when 'cntrl'
|
237
|
+
obj = CharClass.new(
|
238
|
+
[ TRange.new("\x00", "\x1f"), TRange.new("\x7f") ]
|
239
|
+
)
|
240
|
+
when 'lower'
|
241
|
+
obj = CharClass.new(
|
242
|
+
[ TRange.new('a', 'z') ]
|
243
|
+
)
|
244
|
+
when 'print'
|
245
|
+
obj = CharClass.new(
|
246
|
+
[ TRange.new("\x20", "\x7e") ]
|
247
|
+
)
|
248
|
+
when 'space'
|
249
|
+
obj = CharClass.new(
|
250
|
+
[ TRange.new(' '), TRange.new("\n"), TRange.new("\r"),
|
251
|
+
TRange.new("\t"), TRange.new("\f"), TRange.new("\v") ]
|
252
|
+
)
|
253
|
+
when 'digit'
|
254
|
+
obj = CharClass.new(
|
255
|
+
[ TRange.new('0', '9') ]
|
256
|
+
)
|
257
|
+
when 'upper'
|
258
|
+
obj = CharClass.new(
|
259
|
+
[ TRange.new('A', 'Z') ]
|
260
|
+
)
|
261
|
+
when 'blank'
|
262
|
+
obj = CharClass.new(
|
263
|
+
[ TRange.new(' '), TRange.new("\t") ]
|
264
|
+
)
|
265
|
+
when 'graph'
|
266
|
+
obj = CharClass.new(
|
267
|
+
[ TRange.new("\x21", "\x7e") ]
|
268
|
+
)
|
269
|
+
when 'punct'
|
270
|
+
obj = CharClass.new(
|
271
|
+
[ TRange.new("\x21", "\x23"), TRange.new("\x25", "\x2a"),
|
272
|
+
TRange.new("\x2c", "\x2f"), TRange.new("\x3a", "\x3b"),
|
273
|
+
TRange.new("\x3f", "\x40"), TRange.new("\x5b", "\x5d"),
|
274
|
+
TRange.new("\x5f"), TRange.new("\x7b"), TRange.new("\x7d") ]
|
275
|
+
)
|
276
|
+
when 'xdigit'
|
277
|
+
obj = CharClass.new(
|
278
|
+
[ TRange.new('a', 'f'), TRange.new('A', 'F'),
|
279
|
+
TRange.new('0', '9') ]
|
280
|
+
)
|
281
|
+
when 'word'
|
282
|
+
obj = CharClass.new(
|
283
|
+
[ TRange.new('a', 'z'), TRange.new('A', 'Z'),
|
284
|
+
TRange.new('0', '9'), TRange.new('_') ]
|
285
|
+
)
|
286
|
+
else
|
287
|
+
raise "Error: Invalid character class #{val}"
|
288
|
+
end
|
289
|
+
end
|
290
|
+
|
291
|
+
if reverse
|
292
|
+
obj.set_reverse(@options)
|
293
|
+
end
|
294
|
+
|
295
|
+
obj
|
296
|
+
end
|
297
|
+
|
298
|
+
# enumerate codepoints
|
299
|
+
def enumerate
|
300
|
+
@obj.enumerate
|
301
|
+
end
|
302
|
+
|
303
|
+
# set options
|
304
|
+
def set_options(options)
|
305
|
+
TstLog("Letter set_options: #{options[:reg_options].inspect}")
|
306
|
+
@options = options
|
307
|
+
set_attr(@data_type, @value)
|
308
|
+
@obj.set_options(options)
|
309
|
+
self
|
310
|
+
end
|
311
|
+
|
312
|
+
# transform to json format
|
313
|
+
def json
|
314
|
+
@@id += 1
|
315
|
+
"{" +
|
316
|
+
"\"type\": \"#{@data_type}\", \"id\": \"L#{@@id}\", \"value\": #{@obj.json}, " +
|
317
|
+
"\"offset\": #{@offset}, \"length\": #{@length}" +
|
318
|
+
"}"
|
319
|
+
end
|
320
|
+
end
|
321
|
+
end
|
322
|
+
|
323
|
+
|
324
|
+
# Test suite (execute when this file is specified in command line)
|
325
|
+
if __FILE__ == $0
|
326
|
+
end
|
327
|
+
|
@@ -0,0 +1,74 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
# Copyright (C) 2016 Mikio Ikoma
|
4
|
+
|
5
|
+
# A class for managing parentheses
|
6
|
+
class Regextest::Front::ManageParentheses
|
7
|
+
def initialize()
|
8
|
+
@paren_hash = {}
|
9
|
+
@paren_array = []
|
10
|
+
end
|
11
|
+
|
12
|
+
# register a parenthesis
|
13
|
+
def add(paren)
|
14
|
+
# register capturable parentheses
|
15
|
+
if(paren.prefix.length == 0 || # capture without prefix or
|
16
|
+
(paren.prefix[-1] != ':' && # other than (?: or (?i: or (?imx), etc.
|
17
|
+
!paren.prefix.match(/^([imx]*(?:\-[imx]+)?)$/) &&
|
18
|
+
!paren.prefix.match(/^[\=\!\>]|\<[\=\!]/)
|
19
|
+
)
|
20
|
+
)
|
21
|
+
@paren_array.push paren
|
22
|
+
end
|
23
|
+
|
24
|
+
# if name (ie. (?<foo>... ), register the name
|
25
|
+
if(paren.name)
|
26
|
+
@paren_hash[paren.name] = paren
|
27
|
+
end
|
28
|
+
paren
|
29
|
+
end
|
30
|
+
|
31
|
+
# sort of parentheses (since number of parenthesis not analyze order but offset order)
|
32
|
+
def sort
|
33
|
+
# pp @paren_array.map{|paren| paren.offset}
|
34
|
+
@paren_array.sort{|x, y| x.offset <=> y.offset}.each_with_index do | paren, i |
|
35
|
+
# puts "$$_#{i+1} offset:#{paren.offset}"
|
36
|
+
refer_name = "$$_#{i+1}"
|
37
|
+
@paren_hash[refer_name] = paren # parenthesis number from 1
|
38
|
+
paren.set_refer_name(refer_name)
|
39
|
+
end
|
40
|
+
end
|
41
|
+
|
42
|
+
# search target parenthesis
|
43
|
+
def get_paren(get_id, offset = nil)
|
44
|
+
if !offset
|
45
|
+
if(Integer === get_id)
|
46
|
+
@paren_hash["$$_#{get_id}"]
|
47
|
+
else
|
48
|
+
@paren_hash[get_id]
|
49
|
+
end
|
50
|
+
else
|
51
|
+
# puts "offset = #{offset}, id = #{get_id}"
|
52
|
+
target_id = @paren_array.size + 1
|
53
|
+
@paren_array.each_with_index do | paren, i |
|
54
|
+
# puts paren.offset
|
55
|
+
if paren.offset > offset
|
56
|
+
target_id = i + 1 # paren is started from 1
|
57
|
+
break
|
58
|
+
end
|
59
|
+
end
|
60
|
+
relative_offset = get_id.to_i
|
61
|
+
if relative_offset < 0
|
62
|
+
target_id += get_id.to_i
|
63
|
+
else
|
64
|
+
target_id += get_id.to_i - 1
|
65
|
+
end
|
66
|
+
@paren_hash["$$_#{target_id}"]
|
67
|
+
end
|
68
|
+
end
|
69
|
+
end
|
70
|
+
|
71
|
+
# Test suite (execute when this file is specified in command line)
|
72
|
+
if __FILE__ == $0
|
73
|
+
end
|
74
|
+
|
@@ -0,0 +1,153 @@
|
|
1
|
+
#encoding: utf-8
|
2
|
+
|
3
|
+
# Copyright (C) 2016 Mikio Ikoma
|
4
|
+
|
5
|
+
require 'regextest/common'
|
6
|
+
require 'regextest/front/empty' # parser class for empty part ("", (|) etc.)
|
7
|
+
|
8
|
+
# Class for parsing parenthesis
|
9
|
+
module Regextest::Front::Parenthesis
|
10
|
+
|
11
|
+
class Paren
|
12
|
+
include Regextest::Common
|
13
|
+
include Regextest::Front::Empty
|
14
|
+
@@id = 0 # a class variable for generating unique name of element
|
15
|
+
|
16
|
+
# Constructor
|
17
|
+
def initialize(paren_start, element = nil, paren_end = nil)
|
18
|
+
@options = @@parse_options
|
19
|
+
@paren_type = paren_start[0]
|
20
|
+
@offset = paren_start[1]
|
21
|
+
if paren_end
|
22
|
+
@length = (paren_end[1] - paren_start[1]) + paren_end[2]
|
23
|
+
else
|
24
|
+
@length = paren_start[2]
|
25
|
+
end
|
26
|
+
|
27
|
+
# delete head '(', '?', and tail ")"
|
28
|
+
@prefix = @paren_type.sub(/^\(\??/, "")
|
29
|
+
if @prefix.index("(") != 0
|
30
|
+
@prefix.sub!(/\)$/, "")
|
31
|
+
end
|
32
|
+
|
33
|
+
@name = get_name(@prefix)
|
34
|
+
@condition = nil # set at generating json
|
35
|
+
@refer_name = nil
|
36
|
+
if element
|
37
|
+
TstLog("Parenthesis: name:#{@name}, offset:#{@offset}, element:#{element}")
|
38
|
+
@element = element
|
39
|
+
@type_name = "LEX_PAREN"
|
40
|
+
else
|
41
|
+
TstLog("Parenthesis: name:#{@name}, offset:#{@offset}, element: \"\"")
|
42
|
+
@element = TEmpty.new
|
43
|
+
@type_name = "LEX_OPTION_PAREN" # (?x-i) etc.
|
44
|
+
end
|
45
|
+
@generated_string = []
|
46
|
+
@nest = 0
|
47
|
+
end
|
48
|
+
|
49
|
+
attr_reader :prefix, :name, :refer_name, :offset, :length
|
50
|
+
|
51
|
+
# get name of parenthesis (if any)
|
52
|
+
def get_name(prefix)
|
53
|
+
if(md = prefix.match(/^[<'](\w+)[>']$/))
|
54
|
+
md[1]
|
55
|
+
else
|
56
|
+
nil
|
57
|
+
end
|
58
|
+
end
|
59
|
+
|
60
|
+
# get condition of parenthesis
|
61
|
+
def get_condition(prefix)
|
62
|
+
# puts "prefix: #{prefix}"
|
63
|
+
if(md = prefix.match(/^\((\d+)\)$/))
|
64
|
+
condition_name = @options[:parens].get_paren(md[1].to_i)
|
65
|
+
if !condition_name
|
66
|
+
raise "condition number #{prefix} is invalid"
|
67
|
+
end
|
68
|
+
elsif(md = prefix.match(/^\(<(\w+)>\)|\('(\w+)'\)$/))
|
69
|
+
match_string = md[1] || md[2]
|
70
|
+
condition_name = @options[:parens].get_paren(match_string)
|
71
|
+
if !condition_name
|
72
|
+
raise "condition name (#{match_string}) is not found"
|
73
|
+
end
|
74
|
+
else
|
75
|
+
condition_name = nil
|
76
|
+
end
|
77
|
+
|
78
|
+
# check number of elements
|
79
|
+
if(condition_name)
|
80
|
+
if(Regextest::Front::Selectable::Selectable === @element)
|
81
|
+
if(@element.candidates.size > 2)
|
82
|
+
raise "invalid condition. 1 or 2 selectable elements"
|
83
|
+
end
|
84
|
+
end
|
85
|
+
end
|
86
|
+
|
87
|
+
condition_name
|
88
|
+
end
|
89
|
+
|
90
|
+
# set unique name for back reference
|
91
|
+
def set_refer_name(name)
|
92
|
+
@refer_name = name
|
93
|
+
end
|
94
|
+
|
95
|
+
# get generated string
|
96
|
+
def get_value(relative_num = 0)
|
97
|
+
# print "gen: "; pp @generated_string
|
98
|
+
if(@generated_string.size > 0)
|
99
|
+
@generated_string[-1]
|
100
|
+
else
|
101
|
+
warn "Error: refer uninitialized parenthesis"
|
102
|
+
nil
|
103
|
+
end
|
104
|
+
end
|
105
|
+
|
106
|
+
# set options
|
107
|
+
def set_options(options)
|
108
|
+
reg_options = options[:reg_options]
|
109
|
+
TstLog("Parenthesis set_options before: #{reg_options.inspect}, prefix: #{@prefix}");
|
110
|
+
if md = @prefix.match(/^([imxdau]*(?:\-[imx]*)?)(:)?$/)
|
111
|
+
if md[2]
|
112
|
+
# deep copy if (?imx: ) pattern
|
113
|
+
cur_options = reg_options.dup
|
114
|
+
else
|
115
|
+
# replace option if (?imx) pattern
|
116
|
+
cur_options = reg_options
|
117
|
+
end
|
118
|
+
cur_options.modify(md[1])
|
119
|
+
TstLog("Parenthesis set_options after: #{cur_options.inspect}, new_regopt: #{md[1]}");
|
120
|
+
else
|
121
|
+
cur_options = reg_options
|
122
|
+
end
|
123
|
+
|
124
|
+
new_options = options.dup
|
125
|
+
new_options[:reg_options] = cur_options
|
126
|
+
|
127
|
+
@element.set_options(new_options)
|
128
|
+
self
|
129
|
+
end
|
130
|
+
|
131
|
+
# transform to json format
|
132
|
+
def json
|
133
|
+
@@id += 1
|
134
|
+
@condition = get_condition(@prefix)
|
135
|
+
condition_name = @condition.refer_name if @condition
|
136
|
+
"{\"type\": \"#{@type_name}\"," +
|
137
|
+
" \"name\": \"#{@name}\"," +
|
138
|
+
" \"offset\": \"#{@offset}\"," +
|
139
|
+
" \"length\": \"#{@length}\"," +
|
140
|
+
" \"prefix\": \"#{@prefix}\"," +
|
141
|
+
" \"refer_name\": \"#{@refer_name}\"," +
|
142
|
+
" \"condition_name\": \"#{condition_name}\"," +
|
143
|
+
" \"id\": \"p#{@@id}\", " +
|
144
|
+
" \"value\": #{@element.json}" +
|
145
|
+
"}"
|
146
|
+
end
|
147
|
+
end
|
148
|
+
end
|
149
|
+
|
150
|
+
# Test suite (execute when this file is specified in command line)
|
151
|
+
if __FILE__ == $0
|
152
|
+
end
|
153
|
+
|