regextest 0.1.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.gitignore +11 -0
- data/.rspec +2 -0
- data/.travis.yml +3 -0
- data/Gemfile +4 -0
- data/LICENSE.txt +25 -0
- data/README.md +88 -0
- data/Rakefile +55 -0
- data/bin/console +14 -0
- data/bin/regextest +4 -0
- data/bin/setup +7 -0
- data/contrib/Onigmo/RE.txt +522 -0
- data/contrib/Onigmo/UnicodeProps.txt +728 -0
- data/contrib/Onigmo/testpy.py +1319 -0
- data/contrib/unicode/Blocks.txt +298 -0
- data/contrib/unicode/CaseFolding.txt +1414 -0
- data/contrib/unicode/DerivedAge.txt +1538 -0
- data/contrib/unicode/DerivedCoreProperties.txt +11029 -0
- data/contrib/unicode/PropList.txt +1525 -0
- data/contrib/unicode/PropertyAliases.txt +193 -0
- data/contrib/unicode/PropertyValueAliases.txt +1420 -0
- data/contrib/unicode/README.txt +25 -0
- data/contrib/unicode/Scripts.txt +2539 -0
- data/contrib/unicode/UnicodeData.txt +29215 -0
- data/lib/pre-case-folding.rb +101 -0
- data/lib/pre-posix-char-class.rb +150 -0
- data/lib/pre-unicode.rb +116 -0
- data/lib/regextest.rb +268 -0
- data/lib/regextest/back.rb +58 -0
- data/lib/regextest/back/element.rb +151 -0
- data/lib/regextest/back/main.rb +356 -0
- data/lib/regextest/back/result.rb +498 -0
- data/lib/regextest/back/test-case.rb +268 -0
- data/lib/regextest/back/work-thread.rb +119 -0
- data/lib/regextest/common.rb +63 -0
- data/lib/regextest/front.rb +60 -0
- data/lib/regextest/front/anchor.rb +45 -0
- data/lib/regextest/front/back-refer.rb +120 -0
- data/lib/regextest/front/bracket-parser.rb +400 -0
- data/lib/regextest/front/bracket-parser.y +117 -0
- data/lib/regextest/front/bracket-scanner.rb +124 -0
- data/lib/regextest/front/bracket.rb +64 -0
- data/lib/regextest/front/builtin-functions.rb +31 -0
- data/lib/regextest/front/case-folding.rb +18 -0
- data/lib/regextest/front/char-class.rb +243 -0
- data/lib/regextest/front/empty.rb +43 -0
- data/lib/regextest/front/letter.rb +327 -0
- data/lib/regextest/front/manage-parentheses.rb +74 -0
- data/lib/regextest/front/parenthesis.rb +153 -0
- data/lib/regextest/front/parser.rb +1366 -0
- data/lib/regextest/front/parser.y +271 -0
- data/lib/regextest/front/range.rb +60 -0
- data/lib/regextest/front/repeat.rb +90 -0
- data/lib/regextest/front/repeatable.rb +77 -0
- data/lib/regextest/front/scanner.rb +187 -0
- data/lib/regextest/front/selectable.rb +65 -0
- data/lib/regextest/front/sequence.rb +73 -0
- data/lib/regextest/front/unicode.rb +1272 -0
- data/lib/regextest/regex-option.rb +144 -0
- data/lib/regextest/regexp.rb +44 -0
- data/lib/regextest/version.rb +5 -0
- data/lib/tst-reg-test.rb +159 -0
- data/regextest.gemspec +26 -0
- metadata +162 -0
@@ -0,0 +1,271 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
# Copyright (C) 2016 Mikio Ikoma
|
4
|
+
|
5
|
+
class RegextestFrontParser
|
6
|
+
options no_result_var
|
7
|
+
rule
|
8
|
+
# regular expression
|
9
|
+
reg_exp: reg_sel
|
10
|
+
|
11
|
+
# selectable elements
|
12
|
+
reg_sel:
|
13
|
+
{TEmpty.new}
|
14
|
+
| reg_seq
|
15
|
+
{Selectable.new(val[0])}
|
16
|
+
| reg_sel LEX_OR reg_seq
|
17
|
+
{val[0].add(val[2])}
|
18
|
+
| reg_sel LEX_OR
|
19
|
+
{val[0].add(TEmpty.new)}
|
20
|
+
| LEX_OR reg_sel
|
21
|
+
{Selectable.new(TEmpty.new).add(val[1])}
|
22
|
+
|
23
|
+
# sequence of elements
|
24
|
+
reg_seq: reg_rep
|
25
|
+
{Sequence.new(val[0])}
|
26
|
+
| reg_seq reg_rep
|
27
|
+
{val[0].add(val[1])}
|
28
|
+
| LEX_OPTION_PAREN_1 reg_seq_ex # ((?x)a b c). transit extended mode
|
29
|
+
{Sequence.new(Paren.new(val[0])).concatinate(val[1])}
|
30
|
+
| reg_seq LEX_OPTION_PAREN_1 reg_seq_ex # (a (?x)b c). transit extended mode
|
31
|
+
{val[0].add(Paren.new(val[1])).concatinate(val[2])}
|
32
|
+
| LEX_OPTION_PAREN_2 reg_seq # ((?-x)a b c). stay basic mode
|
33
|
+
{Sequence.new(Paren.new(val[0])).concatinate(val[1])}
|
34
|
+
| reg_seq LEX_OPTION_PAREN_2 reg_seq # (a (?-x)b c). stay basic mode
|
35
|
+
{val[0].add(Paren.new(val[1])).concatinate(val[2])}
|
36
|
+
| LEX_OPTION_PAREN_1 # ((?x)). transit extended mode
|
37
|
+
{Sequence.new(Paren.new(val[0]))}
|
38
|
+
| LEX_OPTION_PAREN_2 # ((?-x)). transit extended mode
|
39
|
+
{Sequence.new(Paren.new(val[0]))}
|
40
|
+
|
41
|
+
# repeatable elements
|
42
|
+
reg_rep: reg_elm
|
43
|
+
{Repeatable.new(val[0])}
|
44
|
+
| reg_rep LEX_QUANTIFIER
|
45
|
+
{val[0].set_quant(val[1])}
|
46
|
+
|
47
|
+
# element (a letter or selectable element in parentheses)
|
48
|
+
reg_elm: reg_let
|
49
|
+
{val[0]}
|
50
|
+
| LEX_PAREN_START reg_sel LEX_PAREN_END
|
51
|
+
{@options[:parens].add(Paren.new(val[0], val[1], val[2]))}
|
52
|
+
| LEX_PAREN_START_EX1 reg_sel_ex LEX_PAREN_END
|
53
|
+
{@options[:parens].add(Paren.new(val[0], val[1], val[2]))}
|
54
|
+
| LEX_PAREN_START_EX2 reg_sel LEX_PAREN_END
|
55
|
+
{@options[:parens].add(Paren.new(val[0], val[1], val[2]))}
|
56
|
+
|
57
|
+
# letter
|
58
|
+
reg_let: LEX_CHAR {TLetter.new(:LEX_CHAR, val[0])}
|
59
|
+
| LEX_OCTET {TLetter.new(:LEX_OCTET, val[0])}
|
60
|
+
| LEX_BACK_REFER {BackRefer.new(:LEX_BACK_REFER, val[0])}
|
61
|
+
| LEX_CODE_LITERAL {TLetter.new(:LEX_CODE_LITERAL, val[0])}
|
62
|
+
| LEX_NAMED_REFER {BackRefer.new(:LEX_NAMED_REFER, val[0])}
|
63
|
+
| LEX_NAMED_GENERATE {BackRefer.new(:LEX_NAMED_GENERATE, val[0])}
|
64
|
+
| LEX_CONTROL_LETTER {TLetter.new(:LEX_CONTROL_LETTER, val[0])}
|
65
|
+
| LEX_META_LETTER {TLetter.new(:LEX_CONTROL_LETTER, val[0])}
|
66
|
+
| LEX_ESCAPED_LETTER {TLetter.new(:LEX_ESCAPED_LETTER, val[0])}
|
67
|
+
| LEX_UNICODE {TLetter.new(:LEX_UNICODE, val[0])}
|
68
|
+
| LEX_SIMPLIFIED_CLASS {TLetter.new(:LEX_SIMPLIFIED_CLASS, val[0])}
|
69
|
+
| LEX_UNICODE_CLASS {TLetter.new(:LEX_UNICODE_CLASS, val[0])}
|
70
|
+
| LEX_BRACKET {@bracket_parser.parse(val[0], @options)} # using another parser
|
71
|
+
| LEX_ANC_LINE_BEGIN {Anchor.new(:LEX_ANC_LINE_BEGIN, val[0])}
|
72
|
+
| LEX_ANC_LINE_END {Anchor.new(:LEX_ANC_LINE_END, val[0])}
|
73
|
+
| LEX_ANC_WORD_BOUND {Anchor.new(:LEX_ANC_WORD_BOUND, val[0])}
|
74
|
+
| LEX_ANC_WORD_UNBOUND {Anchor.new(:LEX_ANC_WORD_UNBOUND, val[0])}
|
75
|
+
| LEX_ANC_STRING_BEGIN {Anchor.new(:LEX_ANC_STRING_BEGIN, val[0])}
|
76
|
+
| LEX_ANC_STRING_END {Anchor.new(:LEX_ANC_STRING_END, val[0])}
|
77
|
+
| LEX_ANC_STRING_END2 {Anchor.new(:LEX_ANC_STRING_END2, val[0])}
|
78
|
+
| LEX_ANC_LOOK_BEHIND2 {Anchor.new(:LEX_ANC_LOOK_BEHIND2, val[0])}
|
79
|
+
| LEX_ANC_MATCH_START {Anchor.new(:LEX_ANC_MATCH_START, val[0])}
|
80
|
+
| LEX_SPECIAL_LETTER {TLetter.new(:LEX_SPECIAL_LETTER, val[0])}
|
81
|
+
| LEX_MINUS {TLetter.new(:LEX_CHAR, val[0])} # no special meaning at basic mode
|
82
|
+
| LEX_AND_AND {TLetter.new(:LEX_AND_AND, val[0])}
|
83
|
+
| LEX_SPACE {TLetter.new(:LEX_SPACE, val[0])}
|
84
|
+
| LEX_SIMPLE_ESCAPE {TLetter.new(:LEX_SIMPLE_ESCAPE, val[0])}
|
85
|
+
| LEX_SHARP {TLetter.new(:LEX_CHAR, val[0])} # no special meaning at basic mode
|
86
|
+
| LEX_NEW_LINE {TLetter.new(:LEX_CHAR, val[0])} # no special meaning at basic mode
|
87
|
+
| LEX_ANY_LETTER {TLetter.new(:LEX_ANY_LETTER, val[0])}
|
88
|
+
|
89
|
+
# EXTENDED MODE
|
90
|
+
# selectable elements
|
91
|
+
reg_sel_ex:
|
92
|
+
{TEmpty.new}
|
93
|
+
| reg_seq_ex
|
94
|
+
{Selectable.new(val[0])}
|
95
|
+
| reg_sel_ex LEX_OR reg_seq_ex
|
96
|
+
{val[0].add(val[2])}
|
97
|
+
| reg_sel_ex LEX_OR
|
98
|
+
{val[0].add(TEmpty.new)}
|
99
|
+
| LEX_OR reg_sel_ex
|
100
|
+
{Selectable.new(TEmpty.new).add(val[1])}
|
101
|
+
|
102
|
+
# sequence of elements
|
103
|
+
reg_seq_ex: reg_rep_ex
|
104
|
+
{Sequence.new(val[0])}
|
105
|
+
| reg_seq_ex reg_rep_ex
|
106
|
+
{val[0].add(val[1])}
|
107
|
+
| LEX_OPTION_PAREN_1 reg_seq_ex # ((?x)a b c). stay extended mode
|
108
|
+
{Sequence.new(Paren.new(val[0])).concatinate(val[1])}
|
109
|
+
| reg_seq_ex LEX_OPTION_PAREN_1 reg_seq_ex # (a (?x)b c). stay extended mode
|
110
|
+
{val[0].add(Paren.new(val[1])).concatinate(val[2])}
|
111
|
+
| LEX_OPTION_PAREN_2 reg_seq # ((?-x)a b c). transit to basic mode
|
112
|
+
{Sequence.new(Paren.new(val[0])).concatinate(val[1])}
|
113
|
+
| reg_seq_ex LEX_OPTION_PAREN_2 reg_seq # (a (?-x)b c). transit to basic mode
|
114
|
+
{val[0].add(Paren.new(val[1])).concatinate(val[2])}
|
115
|
+
| LEX_OPTION_PAREN_1 # ((?x)). transit extended mode
|
116
|
+
{Sequence.new(Paren.new(val[0]))}
|
117
|
+
| LEX_OPTION_PAREN_2 # ((?-x)). transit extended mode
|
118
|
+
{Sequence.new(Paren.new(val[0]))}
|
119
|
+
|
120
|
+
# repeatable elements
|
121
|
+
reg_rep_ex: reg_elm_ex
|
122
|
+
{Repeatable.new(val[0])}
|
123
|
+
| reg_rep_ex LEX_QUANTIFIER
|
124
|
+
{val[0].set_quant(val[1])}
|
125
|
+
|
126
|
+
# element (a letter or selectable element in parentheses)
|
127
|
+
reg_elm_ex: reg_let_ex
|
128
|
+
{val[0]}
|
129
|
+
| LEX_PAREN_START reg_sel_ex LEX_PAREN_END
|
130
|
+
{@options[:parens].add(Paren.new(val[0], val[1], val[2]))}
|
131
|
+
| LEX_PAREN_START_EX1 reg_sel_ex LEX_PAREN_END
|
132
|
+
{@options[:parens].add(Paren.new(val[0], val[1], val[2]))}
|
133
|
+
| LEX_PAREN_START_EX2 reg_sel LEX_PAREN_END
|
134
|
+
{@options[:parens].add(Paren.new(val[0], val[1], val[2]))}
|
135
|
+
|
136
|
+
# letter
|
137
|
+
reg_let_ex: LEX_CHAR {TLetter.new(:LEX_CHAR, val[0])}
|
138
|
+
| LEX_OCTET {TLetter.new(:LEX_OCTET, val[0])}
|
139
|
+
| LEX_BACK_REFER {BackRefer.new(:LEX_BACK_REFER, val[0])}
|
140
|
+
| LEX_CODE_LITERAL {TLetter.new(:LEX_CODE_LITERAL, val[0])}
|
141
|
+
| LEX_NAMED_REFER {BackRefer.new(:LEX_NAMED_REFER, val[0])}
|
142
|
+
| LEX_NAMED_GENERATE {BackRefer.new(:LEX_NAMED_GENERATE, val[0])}
|
143
|
+
| LEX_CONTROL_LETTER {TLetter.new(:LEX_CONTROL_LETTER, val[0])}
|
144
|
+
| LEX_META_LETTER {TLetter.new(:LEX_CONTROL_LETTER, val[0])}
|
145
|
+
| LEX_ESCAPED_LETTER {TLetter.new(:LEX_ESCAPED_LETTER, val[0])}
|
146
|
+
| LEX_UNICODE {TLetter.new(:LEX_UNICODE, val[0])}
|
147
|
+
| LEX_SIMPLIFIED_CLASS {TLetter.new(:LEX_SIMPLIFIED_CLASS, val[0])}
|
148
|
+
| LEX_UNICODE_CLASS {TLetter.new(:LEX_UNICODE_CLASS, val[0])}
|
149
|
+
| LEX_BRACKET {@bracket_parser.parse(val[0], @options)} # using another parser
|
150
|
+
| LEX_ANC_LINE_BEGIN {Anchor.new(:LEX_ANC_LINE_BEGIN, val[0])}
|
151
|
+
| LEX_ANC_LINE_END {Anchor.new(:LEX_ANC_LINE_END, val[0])}
|
152
|
+
| LEX_ANC_WORD_BOUND {Anchor.new(:LEX_ANC_WORD_BOUND, val[0])}
|
153
|
+
| LEX_ANC_WORD_UNBOUND {Anchor.new(:LEX_ANC_WORD_UNBOUND, val[0])}
|
154
|
+
| LEX_ANC_STRING_BEGIN {Anchor.new(:LEX_ANC_STRING_BEGIN, val[0])}
|
155
|
+
| LEX_ANC_STRING_END {Anchor.new(:LEX_ANC_STRING_END, val[0])}
|
156
|
+
| LEX_ANC_STRING_END2 {Anchor.new(:LEX_ANC_STRING_END2, val[0])}
|
157
|
+
| LEX_ANC_LOOK_BEHIND2 {Anchor.new(:LEX_ANC_LOOK_BEHIND2, val[0])}
|
158
|
+
| LEX_ANC_MATCH_START {Anchor.new(:LEX_ANC_MATCH_START, val[0])}
|
159
|
+
| LEX_SPECIAL_LETTER {TLetter.new(:LEX_SPECIAL_LETTER, val[0])}
|
160
|
+
| LEX_MINUS {TLetter.new(:LEX_CHAR, val[0])}
|
161
|
+
| LEX_AND_AND {TLetter.new(:LEX_AND_AND, val[0])}
|
162
|
+
| LEX_NEW_LINE {TEmpty.new} # ignore new line at extended mode
|
163
|
+
| LEX_SPACE {TEmpty.new} # ignore spaces at extended mode
|
164
|
+
| LEX_SIMPLE_ESCAPE {TLetter.new(:LEX_SIMPLE_ESCAPE, val[0])}
|
165
|
+
| LEX_ANY_LETTER {TLetter.new(:LEX_ANY_LETTER, val[0])}
|
166
|
+
| LEX_SHARP reg_comment_ex {TEmpty.new}
|
167
|
+
|
168
|
+
# comment of extended mode
|
169
|
+
reg_comment_ex: LEX_NEW_LINE # end of the comment
|
170
|
+
| LEX_CHAR reg_comment_ex
|
171
|
+
| LEX_OCTET reg_comment_ex
|
172
|
+
| LEX_BACK_REFER reg_comment_ex
|
173
|
+
| LEX_CODE_LITERAL reg_comment_ex
|
174
|
+
| LEX_NAMED_REFER reg_comment_ex
|
175
|
+
| LEX_NAMED_GENERATE reg_comment_ex
|
176
|
+
| LEX_CONTROL_LETTER reg_comment_ex
|
177
|
+
| LEX_META_LETTER reg_comment_ex
|
178
|
+
| LEX_ESCAPED_LETTER reg_comment_ex
|
179
|
+
| LEX_UNICODE reg_comment_ex
|
180
|
+
| LEX_SIMPLIFIED_CLASS reg_comment_ex
|
181
|
+
| LEX_UNICODE_CLASS reg_comment_ex
|
182
|
+
| LEX_BRACKET reg_comment_ex
|
183
|
+
| LEX_ANC_LINE_BEGIN reg_comment_ex
|
184
|
+
| LEX_ANC_LINE_END reg_comment_ex
|
185
|
+
| LEX_ANC_WORD_BOUND reg_comment_ex
|
186
|
+
| LEX_ANC_WORD_UNBOUND reg_comment_ex
|
187
|
+
| LEX_ANC_STRING_BEGIN reg_comment_ex
|
188
|
+
| LEX_ANC_STRING_END reg_comment_ex
|
189
|
+
| LEX_ANC_STRING_END2 reg_comment_ex
|
190
|
+
| LEX_ANC_LOOK_BEHIND2 reg_comment_ex
|
191
|
+
| LEX_ANC_MATCH_START reg_comment_ex
|
192
|
+
| LEX_SPECIAL_LETTER reg_comment_ex
|
193
|
+
| LEX_MINUS reg_comment_ex
|
194
|
+
| LEX_AND_AND reg_comment_ex
|
195
|
+
| LEX_SPACE reg_comment_ex
|
196
|
+
| LEX_SIMPLE_ESCAPE reg_comment_ex
|
197
|
+
| LEX_ANY_LETTER reg_comment_ex
|
198
|
+
| LEX_SHARP reg_comment_ex
|
199
|
+
| LEX_PAREN_START reg_comment_ex
|
200
|
+
| LEX_PAREN_START_EX1 reg_comment_ex
|
201
|
+
| LEX_PAREN_START_EX2 reg_comment_ex
|
202
|
+
| LEX_PAREN_END reg_comment_ex
|
203
|
+
| LEX_QUANTIFIER reg_comment_ex
|
204
|
+
| LEX_OR reg_comment_ex
|
205
|
+
|
206
|
+
end
|
207
|
+
|
208
|
+
---- header
|
209
|
+
# parser classes
|
210
|
+
require 'regextest/front/empty' # parser class for empty part ("", (|) etc.)
|
211
|
+
require 'regextest/front/letter' # parser class for a letter
|
212
|
+
require 'regextest/front/range' # parser class for a range of letters
|
213
|
+
require 'regextest/front/selectable' # parser class for a selectable element
|
214
|
+
require 'regextest/front/parenthesis' # parser class for a parenthesis
|
215
|
+
require 'regextest/front/repeatable' # parser class for a repeatable elements
|
216
|
+
require 'regextest/front/sequence' # parser class for a sequence of elements
|
217
|
+
require 'regextest/front/bracket' # parser class for a character class (bracket)
|
218
|
+
require 'regextest/front/anchor' # parser class for a anchor
|
219
|
+
require 'regextest/front/back-refer' # parser class for a back reference
|
220
|
+
require 'regextest/front/bracket-parser' # bracket parser
|
221
|
+
|
222
|
+
---- inner
|
223
|
+
# modules for sharing procedures with bracket parser
|
224
|
+
include Regextest::Front::Empty
|
225
|
+
include Regextest::Front::Letter
|
226
|
+
include Regextest::Front::Range
|
227
|
+
include Regextest::Front::Selectable
|
228
|
+
include Regextest::Front::Parenthesis
|
229
|
+
include Regextest::Front::Repeatable
|
230
|
+
include Regextest::Front::Sequence
|
231
|
+
include Regextest::Front::Bracket
|
232
|
+
include Regextest::Front::Anchor
|
233
|
+
include Regextest::Front::BackRefer
|
234
|
+
|
235
|
+
# execute to parse
|
236
|
+
def parse(lex_words, options)
|
237
|
+
@options = options
|
238
|
+
|
239
|
+
# scanned lexical words
|
240
|
+
@q = lex_words
|
241
|
+
|
242
|
+
# bracket parser (class name is strange because of racc's restriction)
|
243
|
+
@bracket_parser = RegextestFrontBracketParser.new
|
244
|
+
|
245
|
+
# delete comments (since it is complecated to handle comments)
|
246
|
+
@q = @q.delete_if{|token| token[0] == :LEX_COMMENT}
|
247
|
+
|
248
|
+
# execute to parse
|
249
|
+
begin
|
250
|
+
parse_result = do_parse
|
251
|
+
rescue Racc::ParseError => ex
|
252
|
+
raise ex.message
|
253
|
+
end
|
254
|
+
|
255
|
+
parse_result
|
256
|
+
end
|
257
|
+
|
258
|
+
# parse next token
|
259
|
+
def next_token
|
260
|
+
@q.shift
|
261
|
+
end
|
262
|
+
|
263
|
+
# error handling routine. commented out because of readibility problem
|
264
|
+
#def on_error(t, val, vstack)
|
265
|
+
# if val
|
266
|
+
# raise "Parse error. offset=#{val[1]}, letter=#{val[0]}, stack=#{vstack}"
|
267
|
+
# else
|
268
|
+
# raise "Parse error. t=#{t}, val=#{val}, vstack=#{vstack}"
|
269
|
+
# end
|
270
|
+
#end
|
271
|
+
|
@@ -0,0 +1,60 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
# Copyright (C) 2016 Mikio Ikoma
|
4
|
+
|
5
|
+
require 'regextest/common'
|
6
|
+
require 'regextest/front/case-folding' # case folding hash
|
7
|
+
|
8
|
+
# Consective codepoints
|
9
|
+
module Regextest::Front::Range
|
10
|
+
class TRange
|
11
|
+
include Regextest::Common
|
12
|
+
@@id = 0 # a class variable for generating unique name of element
|
13
|
+
|
14
|
+
# Constructor
|
15
|
+
def initialize(letter_begin, letter_end = nil)
|
16
|
+
TstLog("TRange: #{letter_begin}-#{letter_end}")
|
17
|
+
@begin = parse_letter(letter_begin)
|
18
|
+
if letter_end
|
19
|
+
@end = parse_letter(letter_end)
|
20
|
+
else
|
21
|
+
@end = @begin
|
22
|
+
end
|
23
|
+
|
24
|
+
@offset = -1 # not used in this class
|
25
|
+
@length = -1 # not used in this class
|
26
|
+
end
|
27
|
+
|
28
|
+
attr_reader :offset, :length
|
29
|
+
|
30
|
+
# parse letter
|
31
|
+
def parse_letter(letter)
|
32
|
+
case letter
|
33
|
+
when String
|
34
|
+
letter.unpack("U*")[0]
|
35
|
+
when Integer
|
36
|
+
letter
|
37
|
+
when Regextest::Front::Letter::TLetter
|
38
|
+
eval('"' + letter.value + '"').unpack("U*")[0]
|
39
|
+
else
|
40
|
+
raise "Internal error. invalid letter class #{letter}"
|
41
|
+
end
|
42
|
+
end
|
43
|
+
|
44
|
+
# enumerate
|
45
|
+
def enumerate
|
46
|
+
(@begin..@end).to_a
|
47
|
+
end
|
48
|
+
|
49
|
+
# transform to json format (using codepoints of Unicode)
|
50
|
+
def json
|
51
|
+
@@id += 1
|
52
|
+
"{\"type\": \"LEX_RANGE\", \"id\": \"G#{@@id}\", \"begin\": #{@begin}, \"end\": #{@end}}"
|
53
|
+
end
|
54
|
+
end
|
55
|
+
end
|
56
|
+
|
57
|
+
# Test suite (execute when this file is specified in command line)
|
58
|
+
if __FILE__ == $0
|
59
|
+
end
|
60
|
+
|
@@ -0,0 +1,90 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
# Copyright (C) 2016 Mikio Ikoma
|
4
|
+
|
5
|
+
# Quantifier class
|
6
|
+
module Regextest::Front::Repeat
|
7
|
+
class Repeat
|
8
|
+
include Regextest::Common
|
9
|
+
|
10
|
+
# Constants for the class
|
11
|
+
TstOptGreedy = 1
|
12
|
+
TstOptReluctant = 2
|
13
|
+
TstOptPossessive = 4
|
14
|
+
|
15
|
+
# Constructor
|
16
|
+
def initialize(param)
|
17
|
+
@min_value = 1
|
18
|
+
@max_value = 1
|
19
|
+
@option = 0
|
20
|
+
set_values(param) if(param)
|
21
|
+
end
|
22
|
+
attr_reader :max_value, :min_value, :option
|
23
|
+
|
24
|
+
# get minimum, maximum, and option
|
25
|
+
def set_values(param)
|
26
|
+
case param
|
27
|
+
when '?', '??', '?+'
|
28
|
+
@min_value = 0
|
29
|
+
@max_value = 1
|
30
|
+
@option |= TstOptGreedy if(param.size == 1)
|
31
|
+
@option |= TstOptReluctant if(param == "??")
|
32
|
+
@option |= TstOptPossessive if(param[-1] == "+")
|
33
|
+
when '*', '*?', '*+'
|
34
|
+
@min_value = 0
|
35
|
+
@max_value = TstConstRepeatMax
|
36
|
+
@option |= TstOptGreedy if(param.size == 1)
|
37
|
+
@option |= TstOptReluctant if(param[-1] == "?")
|
38
|
+
@option |= TstOptPossessive if(param[-1] == "+")
|
39
|
+
when '+', '+?', '++'
|
40
|
+
@min_value = 1
|
41
|
+
@max_value = TstConstRepeatMax
|
42
|
+
@option |= TstOptGreedy if(param.size == 1)
|
43
|
+
@option |= TstOptReluctant if(param[-1] == "?")
|
44
|
+
@option |= TstOptPossessive if(param == "++")
|
45
|
+
when /^\{(\d+)\}([\?\+]?)$/ # {3}, etc.
|
46
|
+
@min_value = $1.to_i
|
47
|
+
@max_value = $1.to_i
|
48
|
+
@option |= TstOptGreedy if(!$2)
|
49
|
+
@option |= TstOptReluctant if($2 == "?")
|
50
|
+
@option |= TstOptPossessive if($2 == "+")
|
51
|
+
when /^\{(\d+),(\d+)\}([\?\+]?)$/ # {2,3}, etc.
|
52
|
+
@min_value = $1.to_i
|
53
|
+
@max_value = $2.to_i
|
54
|
+
@option |= TstOptGreedy if(!$2)
|
55
|
+
@option |= TstOptReluctant if($2 == "?")
|
56
|
+
@option |= TstOptPossessive if($2 == "+")
|
57
|
+
when /^\{,(\d+)\}([\?\+]?)$/ # {,3}, etc.
|
58
|
+
@min_value = 0
|
59
|
+
@max_value = $1.to_i
|
60
|
+
@option |= TstOptGreedy if(!$2)
|
61
|
+
@option |= TstOptReluctant if($2 == "?")
|
62
|
+
@option |= TstOptPossessive if($2 == "+")
|
63
|
+
when /^\{(\d+),\}([\?\+]?)$/ # {3,}, etc.
|
64
|
+
@min_value = $1.to_i
|
65
|
+
@max_value = TstConstRepeatMax
|
66
|
+
@max_value = @min_value + TstConstRepeatMax if(@max_value < @min_value)
|
67
|
+
@option |= TstOptGreedy if(!$2)
|
68
|
+
@option |= TstOptReluctant if($2 == "?")
|
69
|
+
@option |= TstOptPossessive if($2 == "+")
|
70
|
+
else
|
71
|
+
raise "Error: repeat notation #{param} invalid"
|
72
|
+
end
|
73
|
+
end
|
74
|
+
|
75
|
+
# a+?, etc.
|
76
|
+
def is_reluctant?
|
77
|
+
((@option & TstOptReluctant) != 0)
|
78
|
+
end
|
79
|
+
|
80
|
+
# a++. etc.
|
81
|
+
def is_possessive?
|
82
|
+
((@option & TstOptPossessive) != 0)
|
83
|
+
end
|
84
|
+
|
85
|
+
end
|
86
|
+
end
|
87
|
+
|
88
|
+
# Test suite (execute when this file is specified in command line)
|
89
|
+
if __FILE__ == $0
|
90
|
+
end
|
@@ -0,0 +1,77 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
# Copyright (C) 2016 Mikio Ikoma
|
4
|
+
|
5
|
+
require 'regextest/common'
|
6
|
+
require 'regextest/front/repeat'
|
7
|
+
|
8
|
+
# An element (a letter or a parenthesis) with quantifier
|
9
|
+
module Regextest::Front::Repeatable
|
10
|
+
class Repeatable
|
11
|
+
include Regextest::Common
|
12
|
+
include Regextest::Front::Repeat
|
13
|
+
@@id = 0 # a class variable for generating unique name of element
|
14
|
+
|
15
|
+
# Constructor
|
16
|
+
def initialize(value)
|
17
|
+
TstLog("Repeatable: #{value}")
|
18
|
+
@value = value
|
19
|
+
@offset = value.offset
|
20
|
+
@length = value.length
|
21
|
+
@quant = []
|
22
|
+
end
|
23
|
+
|
24
|
+
attr_reader :offset, :length
|
25
|
+
|
26
|
+
# add quantifier
|
27
|
+
def set_quant(quant_value)
|
28
|
+
quant = quant_value[0]
|
29
|
+
@length += quant_value[2]
|
30
|
+
TstLog("Repeatable quant: #{quant_value}")
|
31
|
+
@quant.push Repeat.new(quant)
|
32
|
+
self
|
33
|
+
end
|
34
|
+
|
35
|
+
# set options
|
36
|
+
def set_options(options)
|
37
|
+
TstLog("Repeatable set_options: #{options[:reg_options].inspect}");
|
38
|
+
@value.set_options(options)
|
39
|
+
self
|
40
|
+
end
|
41
|
+
|
42
|
+
# transform to json format
|
43
|
+
def json
|
44
|
+
json_string = ""
|
45
|
+
@quant.each do | current |
|
46
|
+
@@id += 1
|
47
|
+
json_string +=
|
48
|
+
"{\"type\": \"LEX_REPEAT\", " +
|
49
|
+
" \"id\": \"m#{@@id}\", " +
|
50
|
+
" \"value\": "
|
51
|
+
end
|
52
|
+
|
53
|
+
json_string += @value.json
|
54
|
+
|
55
|
+
if @quant.size > 0
|
56
|
+
work = @quant.map do | current |
|
57
|
+
repeat_option = []
|
58
|
+
repeat_option.push "reluctant" if current.is_reluctant?
|
59
|
+
repeat_option.push "possessive" if current.is_possessive?
|
60
|
+
|
61
|
+
" \"offset\": #{@offset}, " +
|
62
|
+
" \"length\": #{@length}, " +
|
63
|
+
" \"min_repeat\": #{current.min_value}, " +
|
64
|
+
" \"max_repeat\": #{current.max_value}, " +
|
65
|
+
" \"repeat_option\": #{repeat_option} " +
|
66
|
+
"}"
|
67
|
+
end
|
68
|
+
json_string += ", " + work.join(", ")
|
69
|
+
end
|
70
|
+
json_string
|
71
|
+
end
|
72
|
+
end
|
73
|
+
end
|
74
|
+
|
75
|
+
# Test suite (execute when this file is specified in command line)
|
76
|
+
if __FILE__ == $0
|
77
|
+
end
|