regextest 0.1.2

Sign up to get free protection for your applications and to get access to all the features.
Files changed (64) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +11 -0
  3. data/.rspec +2 -0
  4. data/.travis.yml +3 -0
  5. data/Gemfile +4 -0
  6. data/LICENSE.txt +25 -0
  7. data/README.md +88 -0
  8. data/Rakefile +55 -0
  9. data/bin/console +14 -0
  10. data/bin/regextest +4 -0
  11. data/bin/setup +7 -0
  12. data/contrib/Onigmo/RE.txt +522 -0
  13. data/contrib/Onigmo/UnicodeProps.txt +728 -0
  14. data/contrib/Onigmo/testpy.py +1319 -0
  15. data/contrib/unicode/Blocks.txt +298 -0
  16. data/contrib/unicode/CaseFolding.txt +1414 -0
  17. data/contrib/unicode/DerivedAge.txt +1538 -0
  18. data/contrib/unicode/DerivedCoreProperties.txt +11029 -0
  19. data/contrib/unicode/PropList.txt +1525 -0
  20. data/contrib/unicode/PropertyAliases.txt +193 -0
  21. data/contrib/unicode/PropertyValueAliases.txt +1420 -0
  22. data/contrib/unicode/README.txt +25 -0
  23. data/contrib/unicode/Scripts.txt +2539 -0
  24. data/contrib/unicode/UnicodeData.txt +29215 -0
  25. data/lib/pre-case-folding.rb +101 -0
  26. data/lib/pre-posix-char-class.rb +150 -0
  27. data/lib/pre-unicode.rb +116 -0
  28. data/lib/regextest.rb +268 -0
  29. data/lib/regextest/back.rb +58 -0
  30. data/lib/regextest/back/element.rb +151 -0
  31. data/lib/regextest/back/main.rb +356 -0
  32. data/lib/regextest/back/result.rb +498 -0
  33. data/lib/regextest/back/test-case.rb +268 -0
  34. data/lib/regextest/back/work-thread.rb +119 -0
  35. data/lib/regextest/common.rb +63 -0
  36. data/lib/regextest/front.rb +60 -0
  37. data/lib/regextest/front/anchor.rb +45 -0
  38. data/lib/regextest/front/back-refer.rb +120 -0
  39. data/lib/regextest/front/bracket-parser.rb +400 -0
  40. data/lib/regextest/front/bracket-parser.y +117 -0
  41. data/lib/regextest/front/bracket-scanner.rb +124 -0
  42. data/lib/regextest/front/bracket.rb +64 -0
  43. data/lib/regextest/front/builtin-functions.rb +31 -0
  44. data/lib/regextest/front/case-folding.rb +18 -0
  45. data/lib/regextest/front/char-class.rb +243 -0
  46. data/lib/regextest/front/empty.rb +43 -0
  47. data/lib/regextest/front/letter.rb +327 -0
  48. data/lib/regextest/front/manage-parentheses.rb +74 -0
  49. data/lib/regextest/front/parenthesis.rb +153 -0
  50. data/lib/regextest/front/parser.rb +1366 -0
  51. data/lib/regextest/front/parser.y +271 -0
  52. data/lib/regextest/front/range.rb +60 -0
  53. data/lib/regextest/front/repeat.rb +90 -0
  54. data/lib/regextest/front/repeatable.rb +77 -0
  55. data/lib/regextest/front/scanner.rb +187 -0
  56. data/lib/regextest/front/selectable.rb +65 -0
  57. data/lib/regextest/front/sequence.rb +73 -0
  58. data/lib/regextest/front/unicode.rb +1272 -0
  59. data/lib/regextest/regex-option.rb +144 -0
  60. data/lib/regextest/regexp.rb +44 -0
  61. data/lib/regextest/version.rb +5 -0
  62. data/lib/tst-reg-test.rb +159 -0
  63. data/regextest.gemspec +26 -0
  64. metadata +162 -0
@@ -0,0 +1,271 @@
1
+ # encoding: utf-8
2
+
3
+ # Copyright (C) 2016 Mikio Ikoma
4
+
5
+ class RegextestFrontParser
6
+ options no_result_var
7
+ rule
8
+ # regular expression
9
+ reg_exp: reg_sel
10
+
11
+ # selectable elements
12
+ reg_sel:
13
+ {TEmpty.new}
14
+ | reg_seq
15
+ {Selectable.new(val[0])}
16
+ | reg_sel LEX_OR reg_seq
17
+ {val[0].add(val[2])}
18
+ | reg_sel LEX_OR
19
+ {val[0].add(TEmpty.new)}
20
+ | LEX_OR reg_sel
21
+ {Selectable.new(TEmpty.new).add(val[1])}
22
+
23
+ # sequence of elements
24
+ reg_seq: reg_rep
25
+ {Sequence.new(val[0])}
26
+ | reg_seq reg_rep
27
+ {val[0].add(val[1])}
28
+ | LEX_OPTION_PAREN_1 reg_seq_ex # ((?x)a b c). transit extended mode
29
+ {Sequence.new(Paren.new(val[0])).concatinate(val[1])}
30
+ | reg_seq LEX_OPTION_PAREN_1 reg_seq_ex # (a (?x)b c). transit extended mode
31
+ {val[0].add(Paren.new(val[1])).concatinate(val[2])}
32
+ | LEX_OPTION_PAREN_2 reg_seq # ((?-x)a b c). stay basic mode
33
+ {Sequence.new(Paren.new(val[0])).concatinate(val[1])}
34
+ | reg_seq LEX_OPTION_PAREN_2 reg_seq # (a (?-x)b c). stay basic mode
35
+ {val[0].add(Paren.new(val[1])).concatinate(val[2])}
36
+ | LEX_OPTION_PAREN_1 # ((?x)). transit extended mode
37
+ {Sequence.new(Paren.new(val[0]))}
38
+ | LEX_OPTION_PAREN_2 # ((?-x)). transit extended mode
39
+ {Sequence.new(Paren.new(val[0]))}
40
+
41
+ # repeatable elements
42
+ reg_rep: reg_elm
43
+ {Repeatable.new(val[0])}
44
+ | reg_rep LEX_QUANTIFIER
45
+ {val[0].set_quant(val[1])}
46
+
47
+ # element (a letter or selectable element in parentheses)
48
+ reg_elm: reg_let
49
+ {val[0]}
50
+ | LEX_PAREN_START reg_sel LEX_PAREN_END
51
+ {@options[:parens].add(Paren.new(val[0], val[1], val[2]))}
52
+ | LEX_PAREN_START_EX1 reg_sel_ex LEX_PAREN_END
53
+ {@options[:parens].add(Paren.new(val[0], val[1], val[2]))}
54
+ | LEX_PAREN_START_EX2 reg_sel LEX_PAREN_END
55
+ {@options[:parens].add(Paren.new(val[0], val[1], val[2]))}
56
+
57
+ # letter
58
+ reg_let: LEX_CHAR {TLetter.new(:LEX_CHAR, val[0])}
59
+ | LEX_OCTET {TLetter.new(:LEX_OCTET, val[0])}
60
+ | LEX_BACK_REFER {BackRefer.new(:LEX_BACK_REFER, val[0])}
61
+ | LEX_CODE_LITERAL {TLetter.new(:LEX_CODE_LITERAL, val[0])}
62
+ | LEX_NAMED_REFER {BackRefer.new(:LEX_NAMED_REFER, val[0])}
63
+ | LEX_NAMED_GENERATE {BackRefer.new(:LEX_NAMED_GENERATE, val[0])}
64
+ | LEX_CONTROL_LETTER {TLetter.new(:LEX_CONTROL_LETTER, val[0])}
65
+ | LEX_META_LETTER {TLetter.new(:LEX_CONTROL_LETTER, val[0])}
66
+ | LEX_ESCAPED_LETTER {TLetter.new(:LEX_ESCAPED_LETTER, val[0])}
67
+ | LEX_UNICODE {TLetter.new(:LEX_UNICODE, val[0])}
68
+ | LEX_SIMPLIFIED_CLASS {TLetter.new(:LEX_SIMPLIFIED_CLASS, val[0])}
69
+ | LEX_UNICODE_CLASS {TLetter.new(:LEX_UNICODE_CLASS, val[0])}
70
+ | LEX_BRACKET {@bracket_parser.parse(val[0], @options)} # using another parser
71
+ | LEX_ANC_LINE_BEGIN {Anchor.new(:LEX_ANC_LINE_BEGIN, val[0])}
72
+ | LEX_ANC_LINE_END {Anchor.new(:LEX_ANC_LINE_END, val[0])}
73
+ | LEX_ANC_WORD_BOUND {Anchor.new(:LEX_ANC_WORD_BOUND, val[0])}
74
+ | LEX_ANC_WORD_UNBOUND {Anchor.new(:LEX_ANC_WORD_UNBOUND, val[0])}
75
+ | LEX_ANC_STRING_BEGIN {Anchor.new(:LEX_ANC_STRING_BEGIN, val[0])}
76
+ | LEX_ANC_STRING_END {Anchor.new(:LEX_ANC_STRING_END, val[0])}
77
+ | LEX_ANC_STRING_END2 {Anchor.new(:LEX_ANC_STRING_END2, val[0])}
78
+ | LEX_ANC_LOOK_BEHIND2 {Anchor.new(:LEX_ANC_LOOK_BEHIND2, val[0])}
79
+ | LEX_ANC_MATCH_START {Anchor.new(:LEX_ANC_MATCH_START, val[0])}
80
+ | LEX_SPECIAL_LETTER {TLetter.new(:LEX_SPECIAL_LETTER, val[0])}
81
+ | LEX_MINUS {TLetter.new(:LEX_CHAR, val[0])} # no special meaning at basic mode
82
+ | LEX_AND_AND {TLetter.new(:LEX_AND_AND, val[0])}
83
+ | LEX_SPACE {TLetter.new(:LEX_SPACE, val[0])}
84
+ | LEX_SIMPLE_ESCAPE {TLetter.new(:LEX_SIMPLE_ESCAPE, val[0])}
85
+ | LEX_SHARP {TLetter.new(:LEX_CHAR, val[0])} # no special meaning at basic mode
86
+ | LEX_NEW_LINE {TLetter.new(:LEX_CHAR, val[0])} # no special meaning at basic mode
87
+ | LEX_ANY_LETTER {TLetter.new(:LEX_ANY_LETTER, val[0])}
88
+
89
+ # EXTENDED MODE
90
+ # selectable elements
91
+ reg_sel_ex:
92
+ {TEmpty.new}
93
+ | reg_seq_ex
94
+ {Selectable.new(val[0])}
95
+ | reg_sel_ex LEX_OR reg_seq_ex
96
+ {val[0].add(val[2])}
97
+ | reg_sel_ex LEX_OR
98
+ {val[0].add(TEmpty.new)}
99
+ | LEX_OR reg_sel_ex
100
+ {Selectable.new(TEmpty.new).add(val[1])}
101
+
102
+ # sequence of elements
103
+ reg_seq_ex: reg_rep_ex
104
+ {Sequence.new(val[0])}
105
+ | reg_seq_ex reg_rep_ex
106
+ {val[0].add(val[1])}
107
+ | LEX_OPTION_PAREN_1 reg_seq_ex # ((?x)a b c). stay extended mode
108
+ {Sequence.new(Paren.new(val[0])).concatinate(val[1])}
109
+ | reg_seq_ex LEX_OPTION_PAREN_1 reg_seq_ex # (a (?x)b c). stay extended mode
110
+ {val[0].add(Paren.new(val[1])).concatinate(val[2])}
111
+ | LEX_OPTION_PAREN_2 reg_seq # ((?-x)a b c). transit to basic mode
112
+ {Sequence.new(Paren.new(val[0])).concatinate(val[1])}
113
+ | reg_seq_ex LEX_OPTION_PAREN_2 reg_seq # (a (?-x)b c). transit to basic mode
114
+ {val[0].add(Paren.new(val[1])).concatinate(val[2])}
115
+ | LEX_OPTION_PAREN_1 # ((?x)). transit extended mode
116
+ {Sequence.new(Paren.new(val[0]))}
117
+ | LEX_OPTION_PAREN_2 # ((?-x)). transit extended mode
118
+ {Sequence.new(Paren.new(val[0]))}
119
+
120
+ # repeatable elements
121
+ reg_rep_ex: reg_elm_ex
122
+ {Repeatable.new(val[0])}
123
+ | reg_rep_ex LEX_QUANTIFIER
124
+ {val[0].set_quant(val[1])}
125
+
126
+ # element (a letter or selectable element in parentheses)
127
+ reg_elm_ex: reg_let_ex
128
+ {val[0]}
129
+ | LEX_PAREN_START reg_sel_ex LEX_PAREN_END
130
+ {@options[:parens].add(Paren.new(val[0], val[1], val[2]))}
131
+ | LEX_PAREN_START_EX1 reg_sel_ex LEX_PAREN_END
132
+ {@options[:parens].add(Paren.new(val[0], val[1], val[2]))}
133
+ | LEX_PAREN_START_EX2 reg_sel LEX_PAREN_END
134
+ {@options[:parens].add(Paren.new(val[0], val[1], val[2]))}
135
+
136
+ # letter
137
+ reg_let_ex: LEX_CHAR {TLetter.new(:LEX_CHAR, val[0])}
138
+ | LEX_OCTET {TLetter.new(:LEX_OCTET, val[0])}
139
+ | LEX_BACK_REFER {BackRefer.new(:LEX_BACK_REFER, val[0])}
140
+ | LEX_CODE_LITERAL {TLetter.new(:LEX_CODE_LITERAL, val[0])}
141
+ | LEX_NAMED_REFER {BackRefer.new(:LEX_NAMED_REFER, val[0])}
142
+ | LEX_NAMED_GENERATE {BackRefer.new(:LEX_NAMED_GENERATE, val[0])}
143
+ | LEX_CONTROL_LETTER {TLetter.new(:LEX_CONTROL_LETTER, val[0])}
144
+ | LEX_META_LETTER {TLetter.new(:LEX_CONTROL_LETTER, val[0])}
145
+ | LEX_ESCAPED_LETTER {TLetter.new(:LEX_ESCAPED_LETTER, val[0])}
146
+ | LEX_UNICODE {TLetter.new(:LEX_UNICODE, val[0])}
147
+ | LEX_SIMPLIFIED_CLASS {TLetter.new(:LEX_SIMPLIFIED_CLASS, val[0])}
148
+ | LEX_UNICODE_CLASS {TLetter.new(:LEX_UNICODE_CLASS, val[0])}
149
+ | LEX_BRACKET {@bracket_parser.parse(val[0], @options)} # using another parser
150
+ | LEX_ANC_LINE_BEGIN {Anchor.new(:LEX_ANC_LINE_BEGIN, val[0])}
151
+ | LEX_ANC_LINE_END {Anchor.new(:LEX_ANC_LINE_END, val[0])}
152
+ | LEX_ANC_WORD_BOUND {Anchor.new(:LEX_ANC_WORD_BOUND, val[0])}
153
+ | LEX_ANC_WORD_UNBOUND {Anchor.new(:LEX_ANC_WORD_UNBOUND, val[0])}
154
+ | LEX_ANC_STRING_BEGIN {Anchor.new(:LEX_ANC_STRING_BEGIN, val[0])}
155
+ | LEX_ANC_STRING_END {Anchor.new(:LEX_ANC_STRING_END, val[0])}
156
+ | LEX_ANC_STRING_END2 {Anchor.new(:LEX_ANC_STRING_END2, val[0])}
157
+ | LEX_ANC_LOOK_BEHIND2 {Anchor.new(:LEX_ANC_LOOK_BEHIND2, val[0])}
158
+ | LEX_ANC_MATCH_START {Anchor.new(:LEX_ANC_MATCH_START, val[0])}
159
+ | LEX_SPECIAL_LETTER {TLetter.new(:LEX_SPECIAL_LETTER, val[0])}
160
+ | LEX_MINUS {TLetter.new(:LEX_CHAR, val[0])}
161
+ | LEX_AND_AND {TLetter.new(:LEX_AND_AND, val[0])}
162
+ | LEX_NEW_LINE {TEmpty.new} # ignore new line at extended mode
163
+ | LEX_SPACE {TEmpty.new} # ignore spaces at extended mode
164
+ | LEX_SIMPLE_ESCAPE {TLetter.new(:LEX_SIMPLE_ESCAPE, val[0])}
165
+ | LEX_ANY_LETTER {TLetter.new(:LEX_ANY_LETTER, val[0])}
166
+ | LEX_SHARP reg_comment_ex {TEmpty.new}
167
+
168
+ # comment of extended mode
169
+ reg_comment_ex: LEX_NEW_LINE # end of the comment
170
+ | LEX_CHAR reg_comment_ex
171
+ | LEX_OCTET reg_comment_ex
172
+ | LEX_BACK_REFER reg_comment_ex
173
+ | LEX_CODE_LITERAL reg_comment_ex
174
+ | LEX_NAMED_REFER reg_comment_ex
175
+ | LEX_NAMED_GENERATE reg_comment_ex
176
+ | LEX_CONTROL_LETTER reg_comment_ex
177
+ | LEX_META_LETTER reg_comment_ex
178
+ | LEX_ESCAPED_LETTER reg_comment_ex
179
+ | LEX_UNICODE reg_comment_ex
180
+ | LEX_SIMPLIFIED_CLASS reg_comment_ex
181
+ | LEX_UNICODE_CLASS reg_comment_ex
182
+ | LEX_BRACKET reg_comment_ex
183
+ | LEX_ANC_LINE_BEGIN reg_comment_ex
184
+ | LEX_ANC_LINE_END reg_comment_ex
185
+ | LEX_ANC_WORD_BOUND reg_comment_ex
186
+ | LEX_ANC_WORD_UNBOUND reg_comment_ex
187
+ | LEX_ANC_STRING_BEGIN reg_comment_ex
188
+ | LEX_ANC_STRING_END reg_comment_ex
189
+ | LEX_ANC_STRING_END2 reg_comment_ex
190
+ | LEX_ANC_LOOK_BEHIND2 reg_comment_ex
191
+ | LEX_ANC_MATCH_START reg_comment_ex
192
+ | LEX_SPECIAL_LETTER reg_comment_ex
193
+ | LEX_MINUS reg_comment_ex
194
+ | LEX_AND_AND reg_comment_ex
195
+ | LEX_SPACE reg_comment_ex
196
+ | LEX_SIMPLE_ESCAPE reg_comment_ex
197
+ | LEX_ANY_LETTER reg_comment_ex
198
+ | LEX_SHARP reg_comment_ex
199
+ | LEX_PAREN_START reg_comment_ex
200
+ | LEX_PAREN_START_EX1 reg_comment_ex
201
+ | LEX_PAREN_START_EX2 reg_comment_ex
202
+ | LEX_PAREN_END reg_comment_ex
203
+ | LEX_QUANTIFIER reg_comment_ex
204
+ | LEX_OR reg_comment_ex
205
+
206
+ end
207
+
208
+ ---- header
209
+ # parser classes
210
+ require 'regextest/front/empty' # parser class for empty part ("", (|) etc.)
211
+ require 'regextest/front/letter' # parser class for a letter
212
+ require 'regextest/front/range' # parser class for a range of letters
213
+ require 'regextest/front/selectable' # parser class for a selectable element
214
+ require 'regextest/front/parenthesis' # parser class for a parenthesis
215
+ require 'regextest/front/repeatable' # parser class for a repeatable elements
216
+ require 'regextest/front/sequence' # parser class for a sequence of elements
217
+ require 'regextest/front/bracket' # parser class for a character class (bracket)
218
+ require 'regextest/front/anchor' # parser class for a anchor
219
+ require 'regextest/front/back-refer' # parser class for a back reference
220
+ require 'regextest/front/bracket-parser' # bracket parser
221
+
222
+ ---- inner
223
+ # modules for sharing procedures with bracket parser
224
+ include Regextest::Front::Empty
225
+ include Regextest::Front::Letter
226
+ include Regextest::Front::Range
227
+ include Regextest::Front::Selectable
228
+ include Regextest::Front::Parenthesis
229
+ include Regextest::Front::Repeatable
230
+ include Regextest::Front::Sequence
231
+ include Regextest::Front::Bracket
232
+ include Regextest::Front::Anchor
233
+ include Regextest::Front::BackRefer
234
+
235
+ # execute to parse
236
+ def parse(lex_words, options)
237
+ @options = options
238
+
239
+ # scanned lexical words
240
+ @q = lex_words
241
+
242
+ # bracket parser (class name is strange because of racc's restriction)
243
+ @bracket_parser = RegextestFrontBracketParser.new
244
+
245
+ # delete comments (since it is complecated to handle comments)
246
+ @q = @q.delete_if{|token| token[0] == :LEX_COMMENT}
247
+
248
+ # execute to parse
249
+ begin
250
+ parse_result = do_parse
251
+ rescue Racc::ParseError => ex
252
+ raise ex.message
253
+ end
254
+
255
+ parse_result
256
+ end
257
+
258
+ # parse next token
259
+ def next_token
260
+ @q.shift
261
+ end
262
+
263
+ # error handling routine. commented out because of readibility problem
264
+ #def on_error(t, val, vstack)
265
+ # if val
266
+ # raise "Parse error. offset=#{val[1]}, letter=#{val[0]}, stack=#{vstack}"
267
+ # else
268
+ # raise "Parse error. t=#{t}, val=#{val}, vstack=#{vstack}"
269
+ # end
270
+ #end
271
+
@@ -0,0 +1,60 @@
1
+ # encoding: utf-8
2
+
3
+ # Copyright (C) 2016 Mikio Ikoma
4
+
5
+ require 'regextest/common'
6
+ require 'regextest/front/case-folding' # case folding hash
7
+
8
+ # Consective codepoints
9
+ module Regextest::Front::Range
10
+ class TRange
11
+ include Regextest::Common
12
+ @@id = 0 # a class variable for generating unique name of element
13
+
14
+ # Constructor
15
+ def initialize(letter_begin, letter_end = nil)
16
+ TstLog("TRange: #{letter_begin}-#{letter_end}")
17
+ @begin = parse_letter(letter_begin)
18
+ if letter_end
19
+ @end = parse_letter(letter_end)
20
+ else
21
+ @end = @begin
22
+ end
23
+
24
+ @offset = -1 # not used in this class
25
+ @length = -1 # not used in this class
26
+ end
27
+
28
+ attr_reader :offset, :length
29
+
30
+ # parse letter
31
+ def parse_letter(letter)
32
+ case letter
33
+ when String
34
+ letter.unpack("U*")[0]
35
+ when Integer
36
+ letter
37
+ when Regextest::Front::Letter::TLetter
38
+ eval('"' + letter.value + '"').unpack("U*")[0]
39
+ else
40
+ raise "Internal error. invalid letter class #{letter}"
41
+ end
42
+ end
43
+
44
+ # enumerate
45
+ def enumerate
46
+ (@begin..@end).to_a
47
+ end
48
+
49
+ # transform to json format (using codepoints of Unicode)
50
+ def json
51
+ @@id += 1
52
+ "{\"type\": \"LEX_RANGE\", \"id\": \"G#{@@id}\", \"begin\": #{@begin}, \"end\": #{@end}}"
53
+ end
54
+ end
55
+ end
56
+
57
+ # Test suite (execute when this file is specified in command line)
58
+ if __FILE__ == $0
59
+ end
60
+
@@ -0,0 +1,90 @@
1
+ # encoding: utf-8
2
+
3
+ # Copyright (C) 2016 Mikio Ikoma
4
+
5
+ # Quantifier class
6
+ module Regextest::Front::Repeat
7
+ class Repeat
8
+ include Regextest::Common
9
+
10
+ # Constants for the class
11
+ TstOptGreedy = 1
12
+ TstOptReluctant = 2
13
+ TstOptPossessive = 4
14
+
15
+ # Constructor
16
+ def initialize(param)
17
+ @min_value = 1
18
+ @max_value = 1
19
+ @option = 0
20
+ set_values(param) if(param)
21
+ end
22
+ attr_reader :max_value, :min_value, :option
23
+
24
+ # get minimum, maximum, and option
25
+ def set_values(param)
26
+ case param
27
+ when '?', '??', '?+'
28
+ @min_value = 0
29
+ @max_value = 1
30
+ @option |= TstOptGreedy if(param.size == 1)
31
+ @option |= TstOptReluctant if(param == "??")
32
+ @option |= TstOptPossessive if(param[-1] == "+")
33
+ when '*', '*?', '*+'
34
+ @min_value = 0
35
+ @max_value = TstConstRepeatMax
36
+ @option |= TstOptGreedy if(param.size == 1)
37
+ @option |= TstOptReluctant if(param[-1] == "?")
38
+ @option |= TstOptPossessive if(param[-1] == "+")
39
+ when '+', '+?', '++'
40
+ @min_value = 1
41
+ @max_value = TstConstRepeatMax
42
+ @option |= TstOptGreedy if(param.size == 1)
43
+ @option |= TstOptReluctant if(param[-1] == "?")
44
+ @option |= TstOptPossessive if(param == "++")
45
+ when /^\{(\d+)\}([\?\+]?)$/ # {3}, etc.
46
+ @min_value = $1.to_i
47
+ @max_value = $1.to_i
48
+ @option |= TstOptGreedy if(!$2)
49
+ @option |= TstOptReluctant if($2 == "?")
50
+ @option |= TstOptPossessive if($2 == "+")
51
+ when /^\{(\d+),(\d+)\}([\?\+]?)$/ # {2,3}, etc.
52
+ @min_value = $1.to_i
53
+ @max_value = $2.to_i
54
+ @option |= TstOptGreedy if(!$2)
55
+ @option |= TstOptReluctant if($2 == "?")
56
+ @option |= TstOptPossessive if($2 == "+")
57
+ when /^\{,(\d+)\}([\?\+]?)$/ # {,3}, etc.
58
+ @min_value = 0
59
+ @max_value = $1.to_i
60
+ @option |= TstOptGreedy if(!$2)
61
+ @option |= TstOptReluctant if($2 == "?")
62
+ @option |= TstOptPossessive if($2 == "+")
63
+ when /^\{(\d+),\}([\?\+]?)$/ # {3,}, etc.
64
+ @min_value = $1.to_i
65
+ @max_value = TstConstRepeatMax
66
+ @max_value = @min_value + TstConstRepeatMax if(@max_value < @min_value)
67
+ @option |= TstOptGreedy if(!$2)
68
+ @option |= TstOptReluctant if($2 == "?")
69
+ @option |= TstOptPossessive if($2 == "+")
70
+ else
71
+ raise "Error: repeat notation #{param} invalid"
72
+ end
73
+ end
74
+
75
+ # a+?, etc.
76
+ def is_reluctant?
77
+ ((@option & TstOptReluctant) != 0)
78
+ end
79
+
80
+ # a++. etc.
81
+ def is_possessive?
82
+ ((@option & TstOptPossessive) != 0)
83
+ end
84
+
85
+ end
86
+ end
87
+
88
+ # Test suite (execute when this file is specified in command line)
89
+ if __FILE__ == $0
90
+ end
@@ -0,0 +1,77 @@
1
+ # encoding: utf-8
2
+
3
+ # Copyright (C) 2016 Mikio Ikoma
4
+
5
+ require 'regextest/common'
6
+ require 'regextest/front/repeat'
7
+
8
+ # An element (a letter or a parenthesis) with quantifier
9
+ module Regextest::Front::Repeatable
10
+ class Repeatable
11
+ include Regextest::Common
12
+ include Regextest::Front::Repeat
13
+ @@id = 0 # a class variable for generating unique name of element
14
+
15
+ # Constructor
16
+ def initialize(value)
17
+ TstLog("Repeatable: #{value}")
18
+ @value = value
19
+ @offset = value.offset
20
+ @length = value.length
21
+ @quant = []
22
+ end
23
+
24
+ attr_reader :offset, :length
25
+
26
+ # add quantifier
27
+ def set_quant(quant_value)
28
+ quant = quant_value[0]
29
+ @length += quant_value[2]
30
+ TstLog("Repeatable quant: #{quant_value}")
31
+ @quant.push Repeat.new(quant)
32
+ self
33
+ end
34
+
35
+ # set options
36
+ def set_options(options)
37
+ TstLog("Repeatable set_options: #{options[:reg_options].inspect}");
38
+ @value.set_options(options)
39
+ self
40
+ end
41
+
42
+ # transform to json format
43
+ def json
44
+ json_string = ""
45
+ @quant.each do | current |
46
+ @@id += 1
47
+ json_string +=
48
+ "{\"type\": \"LEX_REPEAT\", " +
49
+ " \"id\": \"m#{@@id}\", " +
50
+ " \"value\": "
51
+ end
52
+
53
+ json_string += @value.json
54
+
55
+ if @quant.size > 0
56
+ work = @quant.map do | current |
57
+ repeat_option = []
58
+ repeat_option.push "reluctant" if current.is_reluctant?
59
+ repeat_option.push "possessive" if current.is_possessive?
60
+
61
+ " \"offset\": #{@offset}, " +
62
+ " \"length\": #{@length}, " +
63
+ " \"min_repeat\": #{current.min_value}, " +
64
+ " \"max_repeat\": #{current.max_value}, " +
65
+ " \"repeat_option\": #{repeat_option} " +
66
+ "}"
67
+ end
68
+ json_string += ", " + work.join(", ")
69
+ end
70
+ json_string
71
+ end
72
+ end
73
+ end
74
+
75
+ # Test suite (execute when this file is specified in command line)
76
+ if __FILE__ == $0
77
+ end