regextest 0.1.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +11 -0
- data/.rspec +2 -0
- data/.travis.yml +3 -0
- data/Gemfile +4 -0
- data/LICENSE.txt +25 -0
- data/README.md +88 -0
- data/Rakefile +55 -0
- data/bin/console +14 -0
- data/bin/regextest +4 -0
- data/bin/setup +7 -0
- data/contrib/Onigmo/RE.txt +522 -0
- data/contrib/Onigmo/UnicodeProps.txt +728 -0
- data/contrib/Onigmo/testpy.py +1319 -0
- data/contrib/unicode/Blocks.txt +298 -0
- data/contrib/unicode/CaseFolding.txt +1414 -0
- data/contrib/unicode/DerivedAge.txt +1538 -0
- data/contrib/unicode/DerivedCoreProperties.txt +11029 -0
- data/contrib/unicode/PropList.txt +1525 -0
- data/contrib/unicode/PropertyAliases.txt +193 -0
- data/contrib/unicode/PropertyValueAliases.txt +1420 -0
- data/contrib/unicode/README.txt +25 -0
- data/contrib/unicode/Scripts.txt +2539 -0
- data/contrib/unicode/UnicodeData.txt +29215 -0
- data/lib/pre-case-folding.rb +101 -0
- data/lib/pre-posix-char-class.rb +150 -0
- data/lib/pre-unicode.rb +116 -0
- data/lib/regextest.rb +268 -0
- data/lib/regextest/back.rb +58 -0
- data/lib/regextest/back/element.rb +151 -0
- data/lib/regextest/back/main.rb +356 -0
- data/lib/regextest/back/result.rb +498 -0
- data/lib/regextest/back/test-case.rb +268 -0
- data/lib/regextest/back/work-thread.rb +119 -0
- data/lib/regextest/common.rb +63 -0
- data/lib/regextest/front.rb +60 -0
- data/lib/regextest/front/anchor.rb +45 -0
- data/lib/regextest/front/back-refer.rb +120 -0
- data/lib/regextest/front/bracket-parser.rb +400 -0
- data/lib/regextest/front/bracket-parser.y +117 -0
- data/lib/regextest/front/bracket-scanner.rb +124 -0
- data/lib/regextest/front/bracket.rb +64 -0
- data/lib/regextest/front/builtin-functions.rb +31 -0
- data/lib/regextest/front/case-folding.rb +18 -0
- data/lib/regextest/front/char-class.rb +243 -0
- data/lib/regextest/front/empty.rb +43 -0
- data/lib/regextest/front/letter.rb +327 -0
- data/lib/regextest/front/manage-parentheses.rb +74 -0
- data/lib/regextest/front/parenthesis.rb +153 -0
- data/lib/regextest/front/parser.rb +1366 -0
- data/lib/regextest/front/parser.y +271 -0
- data/lib/regextest/front/range.rb +60 -0
- data/lib/regextest/front/repeat.rb +90 -0
- data/lib/regextest/front/repeatable.rb +77 -0
- data/lib/regextest/front/scanner.rb +187 -0
- data/lib/regextest/front/selectable.rb +65 -0
- data/lib/regextest/front/sequence.rb +73 -0
- data/lib/regextest/front/unicode.rb +1272 -0
- data/lib/regextest/regex-option.rb +144 -0
- data/lib/regextest/regexp.rb +44 -0
- data/lib/regextest/version.rb +5 -0
- data/lib/tst-reg-test.rb +159 -0
- data/regextest.gemspec +26 -0
- metadata +162 -0
@@ -0,0 +1,271 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
# Copyright (C) 2016 Mikio Ikoma
|
4
|
+
|
5
|
+
class RegextestFrontParser
|
6
|
+
options no_result_var
|
7
|
+
rule
|
8
|
+
# regular expression
|
9
|
+
reg_exp: reg_sel
|
10
|
+
|
11
|
+
# selectable elements
|
12
|
+
reg_sel:
|
13
|
+
{TEmpty.new}
|
14
|
+
| reg_seq
|
15
|
+
{Selectable.new(val[0])}
|
16
|
+
| reg_sel LEX_OR reg_seq
|
17
|
+
{val[0].add(val[2])}
|
18
|
+
| reg_sel LEX_OR
|
19
|
+
{val[0].add(TEmpty.new)}
|
20
|
+
| LEX_OR reg_sel
|
21
|
+
{Selectable.new(TEmpty.new).add(val[1])}
|
22
|
+
|
23
|
+
# sequence of elements
|
24
|
+
reg_seq: reg_rep
|
25
|
+
{Sequence.new(val[0])}
|
26
|
+
| reg_seq reg_rep
|
27
|
+
{val[0].add(val[1])}
|
28
|
+
| LEX_OPTION_PAREN_1 reg_seq_ex # ((?x)a b c). transit extended mode
|
29
|
+
{Sequence.new(Paren.new(val[0])).concatinate(val[1])}
|
30
|
+
| reg_seq LEX_OPTION_PAREN_1 reg_seq_ex # (a (?x)b c). transit extended mode
|
31
|
+
{val[0].add(Paren.new(val[1])).concatinate(val[2])}
|
32
|
+
| LEX_OPTION_PAREN_2 reg_seq # ((?-x)a b c). stay basic mode
|
33
|
+
{Sequence.new(Paren.new(val[0])).concatinate(val[1])}
|
34
|
+
| reg_seq LEX_OPTION_PAREN_2 reg_seq # (a (?-x)b c). stay basic mode
|
35
|
+
{val[0].add(Paren.new(val[1])).concatinate(val[2])}
|
36
|
+
| LEX_OPTION_PAREN_1 # ((?x)). transit extended mode
|
37
|
+
{Sequence.new(Paren.new(val[0]))}
|
38
|
+
| LEX_OPTION_PAREN_2 # ((?-x)). transit extended mode
|
39
|
+
{Sequence.new(Paren.new(val[0]))}
|
40
|
+
|
41
|
+
# repeatable elements
|
42
|
+
reg_rep: reg_elm
|
43
|
+
{Repeatable.new(val[0])}
|
44
|
+
| reg_rep LEX_QUANTIFIER
|
45
|
+
{val[0].set_quant(val[1])}
|
46
|
+
|
47
|
+
# element (a letter or selectable element in parentheses)
|
48
|
+
reg_elm: reg_let
|
49
|
+
{val[0]}
|
50
|
+
| LEX_PAREN_START reg_sel LEX_PAREN_END
|
51
|
+
{@options[:parens].add(Paren.new(val[0], val[1], val[2]))}
|
52
|
+
| LEX_PAREN_START_EX1 reg_sel_ex LEX_PAREN_END
|
53
|
+
{@options[:parens].add(Paren.new(val[0], val[1], val[2]))}
|
54
|
+
| LEX_PAREN_START_EX2 reg_sel LEX_PAREN_END
|
55
|
+
{@options[:parens].add(Paren.new(val[0], val[1], val[2]))}
|
56
|
+
|
57
|
+
# letter
|
58
|
+
reg_let: LEX_CHAR {TLetter.new(:LEX_CHAR, val[0])}
|
59
|
+
| LEX_OCTET {TLetter.new(:LEX_OCTET, val[0])}
|
60
|
+
| LEX_BACK_REFER {BackRefer.new(:LEX_BACK_REFER, val[0])}
|
61
|
+
| LEX_CODE_LITERAL {TLetter.new(:LEX_CODE_LITERAL, val[0])}
|
62
|
+
| LEX_NAMED_REFER {BackRefer.new(:LEX_NAMED_REFER, val[0])}
|
63
|
+
| LEX_NAMED_GENERATE {BackRefer.new(:LEX_NAMED_GENERATE, val[0])}
|
64
|
+
| LEX_CONTROL_LETTER {TLetter.new(:LEX_CONTROL_LETTER, val[0])}
|
65
|
+
| LEX_META_LETTER {TLetter.new(:LEX_CONTROL_LETTER, val[0])}
|
66
|
+
| LEX_ESCAPED_LETTER {TLetter.new(:LEX_ESCAPED_LETTER, val[0])}
|
67
|
+
| LEX_UNICODE {TLetter.new(:LEX_UNICODE, val[0])}
|
68
|
+
| LEX_SIMPLIFIED_CLASS {TLetter.new(:LEX_SIMPLIFIED_CLASS, val[0])}
|
69
|
+
| LEX_UNICODE_CLASS {TLetter.new(:LEX_UNICODE_CLASS, val[0])}
|
70
|
+
| LEX_BRACKET {@bracket_parser.parse(val[0], @options)} # using another parser
|
71
|
+
| LEX_ANC_LINE_BEGIN {Anchor.new(:LEX_ANC_LINE_BEGIN, val[0])}
|
72
|
+
| LEX_ANC_LINE_END {Anchor.new(:LEX_ANC_LINE_END, val[0])}
|
73
|
+
| LEX_ANC_WORD_BOUND {Anchor.new(:LEX_ANC_WORD_BOUND, val[0])}
|
74
|
+
| LEX_ANC_WORD_UNBOUND {Anchor.new(:LEX_ANC_WORD_UNBOUND, val[0])}
|
75
|
+
| LEX_ANC_STRING_BEGIN {Anchor.new(:LEX_ANC_STRING_BEGIN, val[0])}
|
76
|
+
| LEX_ANC_STRING_END {Anchor.new(:LEX_ANC_STRING_END, val[0])}
|
77
|
+
| LEX_ANC_STRING_END2 {Anchor.new(:LEX_ANC_STRING_END2, val[0])}
|
78
|
+
| LEX_ANC_LOOK_BEHIND2 {Anchor.new(:LEX_ANC_LOOK_BEHIND2, val[0])}
|
79
|
+
| LEX_ANC_MATCH_START {Anchor.new(:LEX_ANC_MATCH_START, val[0])}
|
80
|
+
| LEX_SPECIAL_LETTER {TLetter.new(:LEX_SPECIAL_LETTER, val[0])}
|
81
|
+
| LEX_MINUS {TLetter.new(:LEX_CHAR, val[0])} # no special meaning at basic mode
|
82
|
+
| LEX_AND_AND {TLetter.new(:LEX_AND_AND, val[0])}
|
83
|
+
| LEX_SPACE {TLetter.new(:LEX_SPACE, val[0])}
|
84
|
+
| LEX_SIMPLE_ESCAPE {TLetter.new(:LEX_SIMPLE_ESCAPE, val[0])}
|
85
|
+
| LEX_SHARP {TLetter.new(:LEX_CHAR, val[0])} # no special meaning at basic mode
|
86
|
+
| LEX_NEW_LINE {TLetter.new(:LEX_CHAR, val[0])} # no special meaning at basic mode
|
87
|
+
| LEX_ANY_LETTER {TLetter.new(:LEX_ANY_LETTER, val[0])}
|
88
|
+
|
89
|
+
# EXTENDED MODE
|
90
|
+
# selectable elements
|
91
|
+
reg_sel_ex:
|
92
|
+
{TEmpty.new}
|
93
|
+
| reg_seq_ex
|
94
|
+
{Selectable.new(val[0])}
|
95
|
+
| reg_sel_ex LEX_OR reg_seq_ex
|
96
|
+
{val[0].add(val[2])}
|
97
|
+
| reg_sel_ex LEX_OR
|
98
|
+
{val[0].add(TEmpty.new)}
|
99
|
+
| LEX_OR reg_sel_ex
|
100
|
+
{Selectable.new(TEmpty.new).add(val[1])}
|
101
|
+
|
102
|
+
# sequence of elements
|
103
|
+
reg_seq_ex: reg_rep_ex
|
104
|
+
{Sequence.new(val[0])}
|
105
|
+
| reg_seq_ex reg_rep_ex
|
106
|
+
{val[0].add(val[1])}
|
107
|
+
| LEX_OPTION_PAREN_1 reg_seq_ex # ((?x)a b c). stay extended mode
|
108
|
+
{Sequence.new(Paren.new(val[0])).concatinate(val[1])}
|
109
|
+
| reg_seq_ex LEX_OPTION_PAREN_1 reg_seq_ex # (a (?x)b c). stay extended mode
|
110
|
+
{val[0].add(Paren.new(val[1])).concatinate(val[2])}
|
111
|
+
| LEX_OPTION_PAREN_2 reg_seq # ((?-x)a b c). transit to basic mode
|
112
|
+
{Sequence.new(Paren.new(val[0])).concatinate(val[1])}
|
113
|
+
| reg_seq_ex LEX_OPTION_PAREN_2 reg_seq # (a (?-x)b c). transit to basic mode
|
114
|
+
{val[0].add(Paren.new(val[1])).concatinate(val[2])}
|
115
|
+
| LEX_OPTION_PAREN_1 # ((?x)). transit extended mode
|
116
|
+
{Sequence.new(Paren.new(val[0]))}
|
117
|
+
| LEX_OPTION_PAREN_2 # ((?-x)). transit extended mode
|
118
|
+
{Sequence.new(Paren.new(val[0]))}
|
119
|
+
|
120
|
+
# repeatable elements
|
121
|
+
reg_rep_ex: reg_elm_ex
|
122
|
+
{Repeatable.new(val[0])}
|
123
|
+
| reg_rep_ex LEX_QUANTIFIER
|
124
|
+
{val[0].set_quant(val[1])}
|
125
|
+
|
126
|
+
# element (a letter or selectable element in parentheses)
|
127
|
+
reg_elm_ex: reg_let_ex
|
128
|
+
{val[0]}
|
129
|
+
| LEX_PAREN_START reg_sel_ex LEX_PAREN_END
|
130
|
+
{@options[:parens].add(Paren.new(val[0], val[1], val[2]))}
|
131
|
+
| LEX_PAREN_START_EX1 reg_sel_ex LEX_PAREN_END
|
132
|
+
{@options[:parens].add(Paren.new(val[0], val[1], val[2]))}
|
133
|
+
| LEX_PAREN_START_EX2 reg_sel LEX_PAREN_END
|
134
|
+
{@options[:parens].add(Paren.new(val[0], val[1], val[2]))}
|
135
|
+
|
136
|
+
# letter
|
137
|
+
reg_let_ex: LEX_CHAR {TLetter.new(:LEX_CHAR, val[0])}
|
138
|
+
| LEX_OCTET {TLetter.new(:LEX_OCTET, val[0])}
|
139
|
+
| LEX_BACK_REFER {BackRefer.new(:LEX_BACK_REFER, val[0])}
|
140
|
+
| LEX_CODE_LITERAL {TLetter.new(:LEX_CODE_LITERAL, val[0])}
|
141
|
+
| LEX_NAMED_REFER {BackRefer.new(:LEX_NAMED_REFER, val[0])}
|
142
|
+
| LEX_NAMED_GENERATE {BackRefer.new(:LEX_NAMED_GENERATE, val[0])}
|
143
|
+
| LEX_CONTROL_LETTER {TLetter.new(:LEX_CONTROL_LETTER, val[0])}
|
144
|
+
| LEX_META_LETTER {TLetter.new(:LEX_CONTROL_LETTER, val[0])}
|
145
|
+
| LEX_ESCAPED_LETTER {TLetter.new(:LEX_ESCAPED_LETTER, val[0])}
|
146
|
+
| LEX_UNICODE {TLetter.new(:LEX_UNICODE, val[0])}
|
147
|
+
| LEX_SIMPLIFIED_CLASS {TLetter.new(:LEX_SIMPLIFIED_CLASS, val[0])}
|
148
|
+
| LEX_UNICODE_CLASS {TLetter.new(:LEX_UNICODE_CLASS, val[0])}
|
149
|
+
| LEX_BRACKET {@bracket_parser.parse(val[0], @options)} # using another parser
|
150
|
+
| LEX_ANC_LINE_BEGIN {Anchor.new(:LEX_ANC_LINE_BEGIN, val[0])}
|
151
|
+
| LEX_ANC_LINE_END {Anchor.new(:LEX_ANC_LINE_END, val[0])}
|
152
|
+
| LEX_ANC_WORD_BOUND {Anchor.new(:LEX_ANC_WORD_BOUND, val[0])}
|
153
|
+
| LEX_ANC_WORD_UNBOUND {Anchor.new(:LEX_ANC_WORD_UNBOUND, val[0])}
|
154
|
+
| LEX_ANC_STRING_BEGIN {Anchor.new(:LEX_ANC_STRING_BEGIN, val[0])}
|
155
|
+
| LEX_ANC_STRING_END {Anchor.new(:LEX_ANC_STRING_END, val[0])}
|
156
|
+
| LEX_ANC_STRING_END2 {Anchor.new(:LEX_ANC_STRING_END2, val[0])}
|
157
|
+
| LEX_ANC_LOOK_BEHIND2 {Anchor.new(:LEX_ANC_LOOK_BEHIND2, val[0])}
|
158
|
+
| LEX_ANC_MATCH_START {Anchor.new(:LEX_ANC_MATCH_START, val[0])}
|
159
|
+
| LEX_SPECIAL_LETTER {TLetter.new(:LEX_SPECIAL_LETTER, val[0])}
|
160
|
+
| LEX_MINUS {TLetter.new(:LEX_CHAR, val[0])}
|
161
|
+
| LEX_AND_AND {TLetter.new(:LEX_AND_AND, val[0])}
|
162
|
+
| LEX_NEW_LINE {TEmpty.new} # ignore new line at extended mode
|
163
|
+
| LEX_SPACE {TEmpty.new} # ignore spaces at extended mode
|
164
|
+
| LEX_SIMPLE_ESCAPE {TLetter.new(:LEX_SIMPLE_ESCAPE, val[0])}
|
165
|
+
| LEX_ANY_LETTER {TLetter.new(:LEX_ANY_LETTER, val[0])}
|
166
|
+
| LEX_SHARP reg_comment_ex {TEmpty.new}
|
167
|
+
|
168
|
+
# comment of extended mode
|
169
|
+
reg_comment_ex: LEX_NEW_LINE # end of the comment
|
170
|
+
| LEX_CHAR reg_comment_ex
|
171
|
+
| LEX_OCTET reg_comment_ex
|
172
|
+
| LEX_BACK_REFER reg_comment_ex
|
173
|
+
| LEX_CODE_LITERAL reg_comment_ex
|
174
|
+
| LEX_NAMED_REFER reg_comment_ex
|
175
|
+
| LEX_NAMED_GENERATE reg_comment_ex
|
176
|
+
| LEX_CONTROL_LETTER reg_comment_ex
|
177
|
+
| LEX_META_LETTER reg_comment_ex
|
178
|
+
| LEX_ESCAPED_LETTER reg_comment_ex
|
179
|
+
| LEX_UNICODE reg_comment_ex
|
180
|
+
| LEX_SIMPLIFIED_CLASS reg_comment_ex
|
181
|
+
| LEX_UNICODE_CLASS reg_comment_ex
|
182
|
+
| LEX_BRACKET reg_comment_ex
|
183
|
+
| LEX_ANC_LINE_BEGIN reg_comment_ex
|
184
|
+
| LEX_ANC_LINE_END reg_comment_ex
|
185
|
+
| LEX_ANC_WORD_BOUND reg_comment_ex
|
186
|
+
| LEX_ANC_WORD_UNBOUND reg_comment_ex
|
187
|
+
| LEX_ANC_STRING_BEGIN reg_comment_ex
|
188
|
+
| LEX_ANC_STRING_END reg_comment_ex
|
189
|
+
| LEX_ANC_STRING_END2 reg_comment_ex
|
190
|
+
| LEX_ANC_LOOK_BEHIND2 reg_comment_ex
|
191
|
+
| LEX_ANC_MATCH_START reg_comment_ex
|
192
|
+
| LEX_SPECIAL_LETTER reg_comment_ex
|
193
|
+
| LEX_MINUS reg_comment_ex
|
194
|
+
| LEX_AND_AND reg_comment_ex
|
195
|
+
| LEX_SPACE reg_comment_ex
|
196
|
+
| LEX_SIMPLE_ESCAPE reg_comment_ex
|
197
|
+
| LEX_ANY_LETTER reg_comment_ex
|
198
|
+
| LEX_SHARP reg_comment_ex
|
199
|
+
| LEX_PAREN_START reg_comment_ex
|
200
|
+
| LEX_PAREN_START_EX1 reg_comment_ex
|
201
|
+
| LEX_PAREN_START_EX2 reg_comment_ex
|
202
|
+
| LEX_PAREN_END reg_comment_ex
|
203
|
+
| LEX_QUANTIFIER reg_comment_ex
|
204
|
+
| LEX_OR reg_comment_ex
|
205
|
+
|
206
|
+
end
|
207
|
+
|
208
|
+
---- header
|
209
|
+
# parser classes
|
210
|
+
require 'regextest/front/empty' # parser class for empty part ("", (|) etc.)
|
211
|
+
require 'regextest/front/letter' # parser class for a letter
|
212
|
+
require 'regextest/front/range' # parser class for a range of letters
|
213
|
+
require 'regextest/front/selectable' # parser class for a selectable element
|
214
|
+
require 'regextest/front/parenthesis' # parser class for a parenthesis
|
215
|
+
require 'regextest/front/repeatable' # parser class for a repeatable elements
|
216
|
+
require 'regextest/front/sequence' # parser class for a sequence of elements
|
217
|
+
require 'regextest/front/bracket' # parser class for a character class (bracket)
|
218
|
+
require 'regextest/front/anchor' # parser class for a anchor
|
219
|
+
require 'regextest/front/back-refer' # parser class for a back reference
|
220
|
+
require 'regextest/front/bracket-parser' # bracket parser
|
221
|
+
|
222
|
+
---- inner
|
223
|
+
# modules for sharing procedures with bracket parser
|
224
|
+
include Regextest::Front::Empty
|
225
|
+
include Regextest::Front::Letter
|
226
|
+
include Regextest::Front::Range
|
227
|
+
include Regextest::Front::Selectable
|
228
|
+
include Regextest::Front::Parenthesis
|
229
|
+
include Regextest::Front::Repeatable
|
230
|
+
include Regextest::Front::Sequence
|
231
|
+
include Regextest::Front::Bracket
|
232
|
+
include Regextest::Front::Anchor
|
233
|
+
include Regextest::Front::BackRefer
|
234
|
+
|
235
|
+
# execute to parse
|
236
|
+
def parse(lex_words, options)
|
237
|
+
@options = options
|
238
|
+
|
239
|
+
# scanned lexical words
|
240
|
+
@q = lex_words
|
241
|
+
|
242
|
+
# bracket parser (class name is strange because of racc's restriction)
|
243
|
+
@bracket_parser = RegextestFrontBracketParser.new
|
244
|
+
|
245
|
+
# delete comments (since it is complecated to handle comments)
|
246
|
+
@q = @q.delete_if{|token| token[0] == :LEX_COMMENT}
|
247
|
+
|
248
|
+
# execute to parse
|
249
|
+
begin
|
250
|
+
parse_result = do_parse
|
251
|
+
rescue Racc::ParseError => ex
|
252
|
+
raise ex.message
|
253
|
+
end
|
254
|
+
|
255
|
+
parse_result
|
256
|
+
end
|
257
|
+
|
258
|
+
# parse next token
|
259
|
+
def next_token
|
260
|
+
@q.shift
|
261
|
+
end
|
262
|
+
|
263
|
+
# error handling routine. commented out because of readibility problem
|
264
|
+
#def on_error(t, val, vstack)
|
265
|
+
# if val
|
266
|
+
# raise "Parse error. offset=#{val[1]}, letter=#{val[0]}, stack=#{vstack}"
|
267
|
+
# else
|
268
|
+
# raise "Parse error. t=#{t}, val=#{val}, vstack=#{vstack}"
|
269
|
+
# end
|
270
|
+
#end
|
271
|
+
|
@@ -0,0 +1,60 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
# Copyright (C) 2016 Mikio Ikoma
|
4
|
+
|
5
|
+
require 'regextest/common'
|
6
|
+
require 'regextest/front/case-folding' # case folding hash
|
7
|
+
|
8
|
+
# Consective codepoints
|
9
|
+
module Regextest::Front::Range
|
10
|
+
class TRange
|
11
|
+
include Regextest::Common
|
12
|
+
@@id = 0 # a class variable for generating unique name of element
|
13
|
+
|
14
|
+
# Constructor
|
15
|
+
def initialize(letter_begin, letter_end = nil)
|
16
|
+
TstLog("TRange: #{letter_begin}-#{letter_end}")
|
17
|
+
@begin = parse_letter(letter_begin)
|
18
|
+
if letter_end
|
19
|
+
@end = parse_letter(letter_end)
|
20
|
+
else
|
21
|
+
@end = @begin
|
22
|
+
end
|
23
|
+
|
24
|
+
@offset = -1 # not used in this class
|
25
|
+
@length = -1 # not used in this class
|
26
|
+
end
|
27
|
+
|
28
|
+
attr_reader :offset, :length
|
29
|
+
|
30
|
+
# parse letter
|
31
|
+
def parse_letter(letter)
|
32
|
+
case letter
|
33
|
+
when String
|
34
|
+
letter.unpack("U*")[0]
|
35
|
+
when Integer
|
36
|
+
letter
|
37
|
+
when Regextest::Front::Letter::TLetter
|
38
|
+
eval('"' + letter.value + '"').unpack("U*")[0]
|
39
|
+
else
|
40
|
+
raise "Internal error. invalid letter class #{letter}"
|
41
|
+
end
|
42
|
+
end
|
43
|
+
|
44
|
+
# enumerate
|
45
|
+
def enumerate
|
46
|
+
(@begin..@end).to_a
|
47
|
+
end
|
48
|
+
|
49
|
+
# transform to json format (using codepoints of Unicode)
|
50
|
+
def json
|
51
|
+
@@id += 1
|
52
|
+
"{\"type\": \"LEX_RANGE\", \"id\": \"G#{@@id}\", \"begin\": #{@begin}, \"end\": #{@end}}"
|
53
|
+
end
|
54
|
+
end
|
55
|
+
end
|
56
|
+
|
57
|
+
# Test suite (execute when this file is specified in command line)
|
58
|
+
if __FILE__ == $0
|
59
|
+
end
|
60
|
+
|
@@ -0,0 +1,90 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
# Copyright (C) 2016 Mikio Ikoma
|
4
|
+
|
5
|
+
# Quantifier class
|
6
|
+
module Regextest::Front::Repeat
|
7
|
+
class Repeat
|
8
|
+
include Regextest::Common
|
9
|
+
|
10
|
+
# Constants for the class
|
11
|
+
TstOptGreedy = 1
|
12
|
+
TstOptReluctant = 2
|
13
|
+
TstOptPossessive = 4
|
14
|
+
|
15
|
+
# Constructor
|
16
|
+
def initialize(param)
|
17
|
+
@min_value = 1
|
18
|
+
@max_value = 1
|
19
|
+
@option = 0
|
20
|
+
set_values(param) if(param)
|
21
|
+
end
|
22
|
+
attr_reader :max_value, :min_value, :option
|
23
|
+
|
24
|
+
# get minimum, maximum, and option
|
25
|
+
def set_values(param)
|
26
|
+
case param
|
27
|
+
when '?', '??', '?+'
|
28
|
+
@min_value = 0
|
29
|
+
@max_value = 1
|
30
|
+
@option |= TstOptGreedy if(param.size == 1)
|
31
|
+
@option |= TstOptReluctant if(param == "??")
|
32
|
+
@option |= TstOptPossessive if(param[-1] == "+")
|
33
|
+
when '*', '*?', '*+'
|
34
|
+
@min_value = 0
|
35
|
+
@max_value = TstConstRepeatMax
|
36
|
+
@option |= TstOptGreedy if(param.size == 1)
|
37
|
+
@option |= TstOptReluctant if(param[-1] == "?")
|
38
|
+
@option |= TstOptPossessive if(param[-1] == "+")
|
39
|
+
when '+', '+?', '++'
|
40
|
+
@min_value = 1
|
41
|
+
@max_value = TstConstRepeatMax
|
42
|
+
@option |= TstOptGreedy if(param.size == 1)
|
43
|
+
@option |= TstOptReluctant if(param[-1] == "?")
|
44
|
+
@option |= TstOptPossessive if(param == "++")
|
45
|
+
when /^\{(\d+)\}([\?\+]?)$/ # {3}, etc.
|
46
|
+
@min_value = $1.to_i
|
47
|
+
@max_value = $1.to_i
|
48
|
+
@option |= TstOptGreedy if(!$2)
|
49
|
+
@option |= TstOptReluctant if($2 == "?")
|
50
|
+
@option |= TstOptPossessive if($2 == "+")
|
51
|
+
when /^\{(\d+),(\d+)\}([\?\+]?)$/ # {2,3}, etc.
|
52
|
+
@min_value = $1.to_i
|
53
|
+
@max_value = $2.to_i
|
54
|
+
@option |= TstOptGreedy if(!$2)
|
55
|
+
@option |= TstOptReluctant if($2 == "?")
|
56
|
+
@option |= TstOptPossessive if($2 == "+")
|
57
|
+
when /^\{,(\d+)\}([\?\+]?)$/ # {,3}, etc.
|
58
|
+
@min_value = 0
|
59
|
+
@max_value = $1.to_i
|
60
|
+
@option |= TstOptGreedy if(!$2)
|
61
|
+
@option |= TstOptReluctant if($2 == "?")
|
62
|
+
@option |= TstOptPossessive if($2 == "+")
|
63
|
+
when /^\{(\d+),\}([\?\+]?)$/ # {3,}, etc.
|
64
|
+
@min_value = $1.to_i
|
65
|
+
@max_value = TstConstRepeatMax
|
66
|
+
@max_value = @min_value + TstConstRepeatMax if(@max_value < @min_value)
|
67
|
+
@option |= TstOptGreedy if(!$2)
|
68
|
+
@option |= TstOptReluctant if($2 == "?")
|
69
|
+
@option |= TstOptPossessive if($2 == "+")
|
70
|
+
else
|
71
|
+
raise "Error: repeat notation #{param} invalid"
|
72
|
+
end
|
73
|
+
end
|
74
|
+
|
75
|
+
# a+?, etc.
|
76
|
+
def is_reluctant?
|
77
|
+
((@option & TstOptReluctant) != 0)
|
78
|
+
end
|
79
|
+
|
80
|
+
# a++. etc.
|
81
|
+
def is_possessive?
|
82
|
+
((@option & TstOptPossessive) != 0)
|
83
|
+
end
|
84
|
+
|
85
|
+
end
|
86
|
+
end
|
87
|
+
|
88
|
+
# Test suite (execute when this file is specified in command line)
|
89
|
+
if __FILE__ == $0
|
90
|
+
end
|
@@ -0,0 +1,77 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
# Copyright (C) 2016 Mikio Ikoma
|
4
|
+
|
5
|
+
require 'regextest/common'
|
6
|
+
require 'regextest/front/repeat'
|
7
|
+
|
8
|
+
# An element (a letter or a parenthesis) with quantifier
|
9
|
+
module Regextest::Front::Repeatable
|
10
|
+
class Repeatable
|
11
|
+
include Regextest::Common
|
12
|
+
include Regextest::Front::Repeat
|
13
|
+
@@id = 0 # a class variable for generating unique name of element
|
14
|
+
|
15
|
+
# Constructor
|
16
|
+
def initialize(value)
|
17
|
+
TstLog("Repeatable: #{value}")
|
18
|
+
@value = value
|
19
|
+
@offset = value.offset
|
20
|
+
@length = value.length
|
21
|
+
@quant = []
|
22
|
+
end
|
23
|
+
|
24
|
+
attr_reader :offset, :length
|
25
|
+
|
26
|
+
# add quantifier
|
27
|
+
def set_quant(quant_value)
|
28
|
+
quant = quant_value[0]
|
29
|
+
@length += quant_value[2]
|
30
|
+
TstLog("Repeatable quant: #{quant_value}")
|
31
|
+
@quant.push Repeat.new(quant)
|
32
|
+
self
|
33
|
+
end
|
34
|
+
|
35
|
+
# set options
|
36
|
+
def set_options(options)
|
37
|
+
TstLog("Repeatable set_options: #{options[:reg_options].inspect}");
|
38
|
+
@value.set_options(options)
|
39
|
+
self
|
40
|
+
end
|
41
|
+
|
42
|
+
# transform to json format
|
43
|
+
def json
|
44
|
+
json_string = ""
|
45
|
+
@quant.each do | current |
|
46
|
+
@@id += 1
|
47
|
+
json_string +=
|
48
|
+
"{\"type\": \"LEX_REPEAT\", " +
|
49
|
+
" \"id\": \"m#{@@id}\", " +
|
50
|
+
" \"value\": "
|
51
|
+
end
|
52
|
+
|
53
|
+
json_string += @value.json
|
54
|
+
|
55
|
+
if @quant.size > 0
|
56
|
+
work = @quant.map do | current |
|
57
|
+
repeat_option = []
|
58
|
+
repeat_option.push "reluctant" if current.is_reluctant?
|
59
|
+
repeat_option.push "possessive" if current.is_possessive?
|
60
|
+
|
61
|
+
" \"offset\": #{@offset}, " +
|
62
|
+
" \"length\": #{@length}, " +
|
63
|
+
" \"min_repeat\": #{current.min_value}, " +
|
64
|
+
" \"max_repeat\": #{current.max_value}, " +
|
65
|
+
" \"repeat_option\": #{repeat_option} " +
|
66
|
+
"}"
|
67
|
+
end
|
68
|
+
json_string += ", " + work.join(", ")
|
69
|
+
end
|
70
|
+
json_string
|
71
|
+
end
|
72
|
+
end
|
73
|
+
end
|
74
|
+
|
75
|
+
# Test suite (execute when this file is specified in command line)
|
76
|
+
if __FILE__ == $0
|
77
|
+
end
|