regextest 0.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (64) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +11 -0
  3. data/.rspec +2 -0
  4. data/.travis.yml +3 -0
  5. data/Gemfile +4 -0
  6. data/LICENSE.txt +25 -0
  7. data/README.md +88 -0
  8. data/Rakefile +55 -0
  9. data/bin/console +14 -0
  10. data/bin/regextest +4 -0
  11. data/bin/setup +7 -0
  12. data/contrib/Onigmo/RE.txt +522 -0
  13. data/contrib/Onigmo/UnicodeProps.txt +728 -0
  14. data/contrib/Onigmo/testpy.py +1319 -0
  15. data/contrib/unicode/Blocks.txt +298 -0
  16. data/contrib/unicode/CaseFolding.txt +1414 -0
  17. data/contrib/unicode/DerivedAge.txt +1538 -0
  18. data/contrib/unicode/DerivedCoreProperties.txt +11029 -0
  19. data/contrib/unicode/PropList.txt +1525 -0
  20. data/contrib/unicode/PropertyAliases.txt +193 -0
  21. data/contrib/unicode/PropertyValueAliases.txt +1420 -0
  22. data/contrib/unicode/README.txt +25 -0
  23. data/contrib/unicode/Scripts.txt +2539 -0
  24. data/contrib/unicode/UnicodeData.txt +29215 -0
  25. data/lib/pre-case-folding.rb +101 -0
  26. data/lib/pre-posix-char-class.rb +150 -0
  27. data/lib/pre-unicode.rb +116 -0
  28. data/lib/regextest.rb +268 -0
  29. data/lib/regextest/back.rb +58 -0
  30. data/lib/regextest/back/element.rb +151 -0
  31. data/lib/regextest/back/main.rb +356 -0
  32. data/lib/regextest/back/result.rb +498 -0
  33. data/lib/regextest/back/test-case.rb +268 -0
  34. data/lib/regextest/back/work-thread.rb +119 -0
  35. data/lib/regextest/common.rb +63 -0
  36. data/lib/regextest/front.rb +60 -0
  37. data/lib/regextest/front/anchor.rb +45 -0
  38. data/lib/regextest/front/back-refer.rb +120 -0
  39. data/lib/regextest/front/bracket-parser.rb +400 -0
  40. data/lib/regextest/front/bracket-parser.y +117 -0
  41. data/lib/regextest/front/bracket-scanner.rb +124 -0
  42. data/lib/regextest/front/bracket.rb +64 -0
  43. data/lib/regextest/front/builtin-functions.rb +31 -0
  44. data/lib/regextest/front/case-folding.rb +18 -0
  45. data/lib/regextest/front/char-class.rb +243 -0
  46. data/lib/regextest/front/empty.rb +43 -0
  47. data/lib/regextest/front/letter.rb +327 -0
  48. data/lib/regextest/front/manage-parentheses.rb +74 -0
  49. data/lib/regextest/front/parenthesis.rb +153 -0
  50. data/lib/regextest/front/parser.rb +1366 -0
  51. data/lib/regextest/front/parser.y +271 -0
  52. data/lib/regextest/front/range.rb +60 -0
  53. data/lib/regextest/front/repeat.rb +90 -0
  54. data/lib/regextest/front/repeatable.rb +77 -0
  55. data/lib/regextest/front/scanner.rb +187 -0
  56. data/lib/regextest/front/selectable.rb +65 -0
  57. data/lib/regextest/front/sequence.rb +73 -0
  58. data/lib/regextest/front/unicode.rb +1272 -0
  59. data/lib/regextest/regex-option.rb +144 -0
  60. data/lib/regextest/regexp.rb +44 -0
  61. data/lib/regextest/version.rb +5 -0
  62. data/lib/tst-reg-test.rb +159 -0
  63. data/regextest.gemspec +26 -0
  64. metadata +162 -0
@@ -0,0 +1,271 @@
1
+ # encoding: utf-8
2
+
3
+ # Copyright (C) 2016 Mikio Ikoma
4
+
5
+ class RegextestFrontParser
6
+ options no_result_var
7
+ rule
8
+ # regular expression
9
+ reg_exp: reg_sel
10
+
11
+ # selectable elements
12
+ reg_sel:
13
+ {TEmpty.new}
14
+ | reg_seq
15
+ {Selectable.new(val[0])}
16
+ | reg_sel LEX_OR reg_seq
17
+ {val[0].add(val[2])}
18
+ | reg_sel LEX_OR
19
+ {val[0].add(TEmpty.new)}
20
+ | LEX_OR reg_sel
21
+ {Selectable.new(TEmpty.new).add(val[1])}
22
+
23
+ # sequence of elements
24
+ reg_seq: reg_rep
25
+ {Sequence.new(val[0])}
26
+ | reg_seq reg_rep
27
+ {val[0].add(val[1])}
28
+ | LEX_OPTION_PAREN_1 reg_seq_ex # ((?x)a b c). transit extended mode
29
+ {Sequence.new(Paren.new(val[0])).concatinate(val[1])}
30
+ | reg_seq LEX_OPTION_PAREN_1 reg_seq_ex # (a (?x)b c). transit extended mode
31
+ {val[0].add(Paren.new(val[1])).concatinate(val[2])}
32
+ | LEX_OPTION_PAREN_2 reg_seq # ((?-x)a b c). stay basic mode
33
+ {Sequence.new(Paren.new(val[0])).concatinate(val[1])}
34
+ | reg_seq LEX_OPTION_PAREN_2 reg_seq # (a (?-x)b c). stay basic mode
35
+ {val[0].add(Paren.new(val[1])).concatinate(val[2])}
36
+ | LEX_OPTION_PAREN_1 # ((?x)). transit extended mode
37
+ {Sequence.new(Paren.new(val[0]))}
38
+ | LEX_OPTION_PAREN_2 # ((?-x)). transit extended mode
39
+ {Sequence.new(Paren.new(val[0]))}
40
+
41
+ # repeatable elements
42
+ reg_rep: reg_elm
43
+ {Repeatable.new(val[0])}
44
+ | reg_rep LEX_QUANTIFIER
45
+ {val[0].set_quant(val[1])}
46
+
47
+ # element (a letter or selectable element in parentheses)
48
+ reg_elm: reg_let
49
+ {val[0]}
50
+ | LEX_PAREN_START reg_sel LEX_PAREN_END
51
+ {@options[:parens].add(Paren.new(val[0], val[1], val[2]))}
52
+ | LEX_PAREN_START_EX1 reg_sel_ex LEX_PAREN_END
53
+ {@options[:parens].add(Paren.new(val[0], val[1], val[2]))}
54
+ | LEX_PAREN_START_EX2 reg_sel LEX_PAREN_END
55
+ {@options[:parens].add(Paren.new(val[0], val[1], val[2]))}
56
+
57
+ # letter
58
+ reg_let: LEX_CHAR {TLetter.new(:LEX_CHAR, val[0])}
59
+ | LEX_OCTET {TLetter.new(:LEX_OCTET, val[0])}
60
+ | LEX_BACK_REFER {BackRefer.new(:LEX_BACK_REFER, val[0])}
61
+ | LEX_CODE_LITERAL {TLetter.new(:LEX_CODE_LITERAL, val[0])}
62
+ | LEX_NAMED_REFER {BackRefer.new(:LEX_NAMED_REFER, val[0])}
63
+ | LEX_NAMED_GENERATE {BackRefer.new(:LEX_NAMED_GENERATE, val[0])}
64
+ | LEX_CONTROL_LETTER {TLetter.new(:LEX_CONTROL_LETTER, val[0])}
65
+ | LEX_META_LETTER {TLetter.new(:LEX_CONTROL_LETTER, val[0])}
66
+ | LEX_ESCAPED_LETTER {TLetter.new(:LEX_ESCAPED_LETTER, val[0])}
67
+ | LEX_UNICODE {TLetter.new(:LEX_UNICODE, val[0])}
68
+ | LEX_SIMPLIFIED_CLASS {TLetter.new(:LEX_SIMPLIFIED_CLASS, val[0])}
69
+ | LEX_UNICODE_CLASS {TLetter.new(:LEX_UNICODE_CLASS, val[0])}
70
+ | LEX_BRACKET {@bracket_parser.parse(val[0], @options)} # using another parser
71
+ | LEX_ANC_LINE_BEGIN {Anchor.new(:LEX_ANC_LINE_BEGIN, val[0])}
72
+ | LEX_ANC_LINE_END {Anchor.new(:LEX_ANC_LINE_END, val[0])}
73
+ | LEX_ANC_WORD_BOUND {Anchor.new(:LEX_ANC_WORD_BOUND, val[0])}
74
+ | LEX_ANC_WORD_UNBOUND {Anchor.new(:LEX_ANC_WORD_UNBOUND, val[0])}
75
+ | LEX_ANC_STRING_BEGIN {Anchor.new(:LEX_ANC_STRING_BEGIN, val[0])}
76
+ | LEX_ANC_STRING_END {Anchor.new(:LEX_ANC_STRING_END, val[0])}
77
+ | LEX_ANC_STRING_END2 {Anchor.new(:LEX_ANC_STRING_END2, val[0])}
78
+ | LEX_ANC_LOOK_BEHIND2 {Anchor.new(:LEX_ANC_LOOK_BEHIND2, val[0])}
79
+ | LEX_ANC_MATCH_START {Anchor.new(:LEX_ANC_MATCH_START, val[0])}
80
+ | LEX_SPECIAL_LETTER {TLetter.new(:LEX_SPECIAL_LETTER, val[0])}
81
+ | LEX_MINUS {TLetter.new(:LEX_CHAR, val[0])} # no special meaning at basic mode
82
+ | LEX_AND_AND {TLetter.new(:LEX_AND_AND, val[0])}
83
+ | LEX_SPACE {TLetter.new(:LEX_SPACE, val[0])}
84
+ | LEX_SIMPLE_ESCAPE {TLetter.new(:LEX_SIMPLE_ESCAPE, val[0])}
85
+ | LEX_SHARP {TLetter.new(:LEX_CHAR, val[0])} # no special meaning at basic mode
86
+ | LEX_NEW_LINE {TLetter.new(:LEX_CHAR, val[0])} # no special meaning at basic mode
87
+ | LEX_ANY_LETTER {TLetter.new(:LEX_ANY_LETTER, val[0])}
88
+
89
+ # EXTENDED MODE
90
+ # selectable elements
91
+ reg_sel_ex:
92
+ {TEmpty.new}
93
+ | reg_seq_ex
94
+ {Selectable.new(val[0])}
95
+ | reg_sel_ex LEX_OR reg_seq_ex
96
+ {val[0].add(val[2])}
97
+ | reg_sel_ex LEX_OR
98
+ {val[0].add(TEmpty.new)}
99
+ | LEX_OR reg_sel_ex
100
+ {Selectable.new(TEmpty.new).add(val[1])}
101
+
102
+ # sequence of elements
103
+ reg_seq_ex: reg_rep_ex
104
+ {Sequence.new(val[0])}
105
+ | reg_seq_ex reg_rep_ex
106
+ {val[0].add(val[1])}
107
+ | LEX_OPTION_PAREN_1 reg_seq_ex # ((?x)a b c). stay extended mode
108
+ {Sequence.new(Paren.new(val[0])).concatinate(val[1])}
109
+ | reg_seq_ex LEX_OPTION_PAREN_1 reg_seq_ex # (a (?x)b c). stay extended mode
110
+ {val[0].add(Paren.new(val[1])).concatinate(val[2])}
111
+ | LEX_OPTION_PAREN_2 reg_seq # ((?-x)a b c). transit to basic mode
112
+ {Sequence.new(Paren.new(val[0])).concatinate(val[1])}
113
+ | reg_seq_ex LEX_OPTION_PAREN_2 reg_seq # (a (?-x)b c). transit to basic mode
114
+ {val[0].add(Paren.new(val[1])).concatinate(val[2])}
115
+ | LEX_OPTION_PAREN_1 # ((?x)). transit extended mode
116
+ {Sequence.new(Paren.new(val[0]))}
117
+ | LEX_OPTION_PAREN_2 # ((?-x)). transit extended mode
118
+ {Sequence.new(Paren.new(val[0]))}
119
+
120
+ # repeatable elements
121
+ reg_rep_ex: reg_elm_ex
122
+ {Repeatable.new(val[0])}
123
+ | reg_rep_ex LEX_QUANTIFIER
124
+ {val[0].set_quant(val[1])}
125
+
126
+ # element (a letter or selectable element in parentheses)
127
+ reg_elm_ex: reg_let_ex
128
+ {val[0]}
129
+ | LEX_PAREN_START reg_sel_ex LEX_PAREN_END
130
+ {@options[:parens].add(Paren.new(val[0], val[1], val[2]))}
131
+ | LEX_PAREN_START_EX1 reg_sel_ex LEX_PAREN_END
132
+ {@options[:parens].add(Paren.new(val[0], val[1], val[2]))}
133
+ | LEX_PAREN_START_EX2 reg_sel LEX_PAREN_END
134
+ {@options[:parens].add(Paren.new(val[0], val[1], val[2]))}
135
+
136
+ # letter
137
+ reg_let_ex: LEX_CHAR {TLetter.new(:LEX_CHAR, val[0])}
138
+ | LEX_OCTET {TLetter.new(:LEX_OCTET, val[0])}
139
+ | LEX_BACK_REFER {BackRefer.new(:LEX_BACK_REFER, val[0])}
140
+ | LEX_CODE_LITERAL {TLetter.new(:LEX_CODE_LITERAL, val[0])}
141
+ | LEX_NAMED_REFER {BackRefer.new(:LEX_NAMED_REFER, val[0])}
142
+ | LEX_NAMED_GENERATE {BackRefer.new(:LEX_NAMED_GENERATE, val[0])}
143
+ | LEX_CONTROL_LETTER {TLetter.new(:LEX_CONTROL_LETTER, val[0])}
144
+ | LEX_META_LETTER {TLetter.new(:LEX_CONTROL_LETTER, val[0])}
145
+ | LEX_ESCAPED_LETTER {TLetter.new(:LEX_ESCAPED_LETTER, val[0])}
146
+ | LEX_UNICODE {TLetter.new(:LEX_UNICODE, val[0])}
147
+ | LEX_SIMPLIFIED_CLASS {TLetter.new(:LEX_SIMPLIFIED_CLASS, val[0])}
148
+ | LEX_UNICODE_CLASS {TLetter.new(:LEX_UNICODE_CLASS, val[0])}
149
+ | LEX_BRACKET {@bracket_parser.parse(val[0], @options)} # using another parser
150
+ | LEX_ANC_LINE_BEGIN {Anchor.new(:LEX_ANC_LINE_BEGIN, val[0])}
151
+ | LEX_ANC_LINE_END {Anchor.new(:LEX_ANC_LINE_END, val[0])}
152
+ | LEX_ANC_WORD_BOUND {Anchor.new(:LEX_ANC_WORD_BOUND, val[0])}
153
+ | LEX_ANC_WORD_UNBOUND {Anchor.new(:LEX_ANC_WORD_UNBOUND, val[0])}
154
+ | LEX_ANC_STRING_BEGIN {Anchor.new(:LEX_ANC_STRING_BEGIN, val[0])}
155
+ | LEX_ANC_STRING_END {Anchor.new(:LEX_ANC_STRING_END, val[0])}
156
+ | LEX_ANC_STRING_END2 {Anchor.new(:LEX_ANC_STRING_END2, val[0])}
157
+ | LEX_ANC_LOOK_BEHIND2 {Anchor.new(:LEX_ANC_LOOK_BEHIND2, val[0])}
158
+ | LEX_ANC_MATCH_START {Anchor.new(:LEX_ANC_MATCH_START, val[0])}
159
+ | LEX_SPECIAL_LETTER {TLetter.new(:LEX_SPECIAL_LETTER, val[0])}
160
+ | LEX_MINUS {TLetter.new(:LEX_CHAR, val[0])}
161
+ | LEX_AND_AND {TLetter.new(:LEX_AND_AND, val[0])}
162
+ | LEX_NEW_LINE {TEmpty.new} # ignore new line at extended mode
163
+ | LEX_SPACE {TEmpty.new} # ignore spaces at extended mode
164
+ | LEX_SIMPLE_ESCAPE {TLetter.new(:LEX_SIMPLE_ESCAPE, val[0])}
165
+ | LEX_ANY_LETTER {TLetter.new(:LEX_ANY_LETTER, val[0])}
166
+ | LEX_SHARP reg_comment_ex {TEmpty.new}
167
+
168
+ # comment of extended mode
169
+ reg_comment_ex: LEX_NEW_LINE # end of the comment
170
+ | LEX_CHAR reg_comment_ex
171
+ | LEX_OCTET reg_comment_ex
172
+ | LEX_BACK_REFER reg_comment_ex
173
+ | LEX_CODE_LITERAL reg_comment_ex
174
+ | LEX_NAMED_REFER reg_comment_ex
175
+ | LEX_NAMED_GENERATE reg_comment_ex
176
+ | LEX_CONTROL_LETTER reg_comment_ex
177
+ | LEX_META_LETTER reg_comment_ex
178
+ | LEX_ESCAPED_LETTER reg_comment_ex
179
+ | LEX_UNICODE reg_comment_ex
180
+ | LEX_SIMPLIFIED_CLASS reg_comment_ex
181
+ | LEX_UNICODE_CLASS reg_comment_ex
182
+ | LEX_BRACKET reg_comment_ex
183
+ | LEX_ANC_LINE_BEGIN reg_comment_ex
184
+ | LEX_ANC_LINE_END reg_comment_ex
185
+ | LEX_ANC_WORD_BOUND reg_comment_ex
186
+ | LEX_ANC_WORD_UNBOUND reg_comment_ex
187
+ | LEX_ANC_STRING_BEGIN reg_comment_ex
188
+ | LEX_ANC_STRING_END reg_comment_ex
189
+ | LEX_ANC_STRING_END2 reg_comment_ex
190
+ | LEX_ANC_LOOK_BEHIND2 reg_comment_ex
191
+ | LEX_ANC_MATCH_START reg_comment_ex
192
+ | LEX_SPECIAL_LETTER reg_comment_ex
193
+ | LEX_MINUS reg_comment_ex
194
+ | LEX_AND_AND reg_comment_ex
195
+ | LEX_SPACE reg_comment_ex
196
+ | LEX_SIMPLE_ESCAPE reg_comment_ex
197
+ | LEX_ANY_LETTER reg_comment_ex
198
+ | LEX_SHARP reg_comment_ex
199
+ | LEX_PAREN_START reg_comment_ex
200
+ | LEX_PAREN_START_EX1 reg_comment_ex
201
+ | LEX_PAREN_START_EX2 reg_comment_ex
202
+ | LEX_PAREN_END reg_comment_ex
203
+ | LEX_QUANTIFIER reg_comment_ex
204
+ | LEX_OR reg_comment_ex
205
+
206
+ end
207
+
208
+ ---- header
209
+ # parser classes
210
+ require 'regextest/front/empty' # parser class for empty part ("", (|) etc.)
211
+ require 'regextest/front/letter' # parser class for a letter
212
+ require 'regextest/front/range' # parser class for a range of letters
213
+ require 'regextest/front/selectable' # parser class for a selectable element
214
+ require 'regextest/front/parenthesis' # parser class for a parenthesis
215
+ require 'regextest/front/repeatable' # parser class for a repeatable elements
216
+ require 'regextest/front/sequence' # parser class for a sequence of elements
217
+ require 'regextest/front/bracket' # parser class for a character class (bracket)
218
+ require 'regextest/front/anchor' # parser class for a anchor
219
+ require 'regextest/front/back-refer' # parser class for a back reference
220
+ require 'regextest/front/bracket-parser' # bracket parser
221
+
222
+ ---- inner
223
+ # modules for sharing procedures with bracket parser
224
+ include Regextest::Front::Empty
225
+ include Regextest::Front::Letter
226
+ include Regextest::Front::Range
227
+ include Regextest::Front::Selectable
228
+ include Regextest::Front::Parenthesis
229
+ include Regextest::Front::Repeatable
230
+ include Regextest::Front::Sequence
231
+ include Regextest::Front::Bracket
232
+ include Regextest::Front::Anchor
233
+ include Regextest::Front::BackRefer
234
+
235
+ # execute to parse
236
+ def parse(lex_words, options)
237
+ @options = options
238
+
239
+ # scanned lexical words
240
+ @q = lex_words
241
+
242
+ # bracket parser (class name is strange because of racc's restriction)
243
+ @bracket_parser = RegextestFrontBracketParser.new
244
+
245
+ # delete comments (since it is complecated to handle comments)
246
+ @q = @q.delete_if{|token| token[0] == :LEX_COMMENT}
247
+
248
+ # execute to parse
249
+ begin
250
+ parse_result = do_parse
251
+ rescue Racc::ParseError => ex
252
+ raise ex.message
253
+ end
254
+
255
+ parse_result
256
+ end
257
+
258
+ # parse next token
259
+ def next_token
260
+ @q.shift
261
+ end
262
+
263
+ # error handling routine. commented out because of readibility problem
264
+ #def on_error(t, val, vstack)
265
+ # if val
266
+ # raise "Parse error. offset=#{val[1]}, letter=#{val[0]}, stack=#{vstack}"
267
+ # else
268
+ # raise "Parse error. t=#{t}, val=#{val}, vstack=#{vstack}"
269
+ # end
270
+ #end
271
+
@@ -0,0 +1,60 @@
1
+ # encoding: utf-8
2
+
3
+ # Copyright (C) 2016 Mikio Ikoma
4
+
5
+ require 'regextest/common'
6
+ require 'regextest/front/case-folding' # case folding hash
7
+
8
+ # Consective codepoints
9
+ module Regextest::Front::Range
10
+ class TRange
11
+ include Regextest::Common
12
+ @@id = 0 # a class variable for generating unique name of element
13
+
14
+ # Constructor
15
+ def initialize(letter_begin, letter_end = nil)
16
+ TstLog("TRange: #{letter_begin}-#{letter_end}")
17
+ @begin = parse_letter(letter_begin)
18
+ if letter_end
19
+ @end = parse_letter(letter_end)
20
+ else
21
+ @end = @begin
22
+ end
23
+
24
+ @offset = -1 # not used in this class
25
+ @length = -1 # not used in this class
26
+ end
27
+
28
+ attr_reader :offset, :length
29
+
30
+ # parse letter
31
+ def parse_letter(letter)
32
+ case letter
33
+ when String
34
+ letter.unpack("U*")[0]
35
+ when Integer
36
+ letter
37
+ when Regextest::Front::Letter::TLetter
38
+ eval('"' + letter.value + '"').unpack("U*")[0]
39
+ else
40
+ raise "Internal error. invalid letter class #{letter}"
41
+ end
42
+ end
43
+
44
+ # enumerate
45
+ def enumerate
46
+ (@begin..@end).to_a
47
+ end
48
+
49
+ # transform to json format (using codepoints of Unicode)
50
+ def json
51
+ @@id += 1
52
+ "{\"type\": \"LEX_RANGE\", \"id\": \"G#{@@id}\", \"begin\": #{@begin}, \"end\": #{@end}}"
53
+ end
54
+ end
55
+ end
56
+
57
+ # Test suite (execute when this file is specified in command line)
58
+ if __FILE__ == $0
59
+ end
60
+
@@ -0,0 +1,90 @@
1
+ # encoding: utf-8
2
+
3
+ # Copyright (C) 2016 Mikio Ikoma
4
+
5
+ # Quantifier class
6
+ module Regextest::Front::Repeat
7
+ class Repeat
8
+ include Regextest::Common
9
+
10
+ # Constants for the class
11
+ TstOptGreedy = 1
12
+ TstOptReluctant = 2
13
+ TstOptPossessive = 4
14
+
15
+ # Constructor
16
+ def initialize(param)
17
+ @min_value = 1
18
+ @max_value = 1
19
+ @option = 0
20
+ set_values(param) if(param)
21
+ end
22
+ attr_reader :max_value, :min_value, :option
23
+
24
+ # get minimum, maximum, and option
25
+ def set_values(param)
26
+ case param
27
+ when '?', '??', '?+'
28
+ @min_value = 0
29
+ @max_value = 1
30
+ @option |= TstOptGreedy if(param.size == 1)
31
+ @option |= TstOptReluctant if(param == "??")
32
+ @option |= TstOptPossessive if(param[-1] == "+")
33
+ when '*', '*?', '*+'
34
+ @min_value = 0
35
+ @max_value = TstConstRepeatMax
36
+ @option |= TstOptGreedy if(param.size == 1)
37
+ @option |= TstOptReluctant if(param[-1] == "?")
38
+ @option |= TstOptPossessive if(param[-1] == "+")
39
+ when '+', '+?', '++'
40
+ @min_value = 1
41
+ @max_value = TstConstRepeatMax
42
+ @option |= TstOptGreedy if(param.size == 1)
43
+ @option |= TstOptReluctant if(param[-1] == "?")
44
+ @option |= TstOptPossessive if(param == "++")
45
+ when /^\{(\d+)\}([\?\+]?)$/ # {3}, etc.
46
+ @min_value = $1.to_i
47
+ @max_value = $1.to_i
48
+ @option |= TstOptGreedy if(!$2)
49
+ @option |= TstOptReluctant if($2 == "?")
50
+ @option |= TstOptPossessive if($2 == "+")
51
+ when /^\{(\d+),(\d+)\}([\?\+]?)$/ # {2,3}, etc.
52
+ @min_value = $1.to_i
53
+ @max_value = $2.to_i
54
+ @option |= TstOptGreedy if(!$2)
55
+ @option |= TstOptReluctant if($2 == "?")
56
+ @option |= TstOptPossessive if($2 == "+")
57
+ when /^\{,(\d+)\}([\?\+]?)$/ # {,3}, etc.
58
+ @min_value = 0
59
+ @max_value = $1.to_i
60
+ @option |= TstOptGreedy if(!$2)
61
+ @option |= TstOptReluctant if($2 == "?")
62
+ @option |= TstOptPossessive if($2 == "+")
63
+ when /^\{(\d+),\}([\?\+]?)$/ # {3,}, etc.
64
+ @min_value = $1.to_i
65
+ @max_value = TstConstRepeatMax
66
+ @max_value = @min_value + TstConstRepeatMax if(@max_value < @min_value)
67
+ @option |= TstOptGreedy if(!$2)
68
+ @option |= TstOptReluctant if($2 == "?")
69
+ @option |= TstOptPossessive if($2 == "+")
70
+ else
71
+ raise "Error: repeat notation #{param} invalid"
72
+ end
73
+ end
74
+
75
+ # a+?, etc.
76
+ def is_reluctant?
77
+ ((@option & TstOptReluctant) != 0)
78
+ end
79
+
80
+ # a++. etc.
81
+ def is_possessive?
82
+ ((@option & TstOptPossessive) != 0)
83
+ end
84
+
85
+ end
86
+ end
87
+
88
+ # Test suite (execute when this file is specified in command line)
89
+ if __FILE__ == $0
90
+ end
@@ -0,0 +1,77 @@
1
+ # encoding: utf-8
2
+
3
+ # Copyright (C) 2016 Mikio Ikoma
4
+
5
+ require 'regextest/common'
6
+ require 'regextest/front/repeat'
7
+
8
+ # An element (a letter or a parenthesis) with quantifier
9
+ module Regextest::Front::Repeatable
10
+ class Repeatable
11
+ include Regextest::Common
12
+ include Regextest::Front::Repeat
13
+ @@id = 0 # a class variable for generating unique name of element
14
+
15
+ # Constructor
16
+ def initialize(value)
17
+ TstLog("Repeatable: #{value}")
18
+ @value = value
19
+ @offset = value.offset
20
+ @length = value.length
21
+ @quant = []
22
+ end
23
+
24
+ attr_reader :offset, :length
25
+
26
+ # add quantifier
27
+ def set_quant(quant_value)
28
+ quant = quant_value[0]
29
+ @length += quant_value[2]
30
+ TstLog("Repeatable quant: #{quant_value}")
31
+ @quant.push Repeat.new(quant)
32
+ self
33
+ end
34
+
35
+ # set options
36
+ def set_options(options)
37
+ TstLog("Repeatable set_options: #{options[:reg_options].inspect}");
38
+ @value.set_options(options)
39
+ self
40
+ end
41
+
42
+ # transform to json format
43
+ def json
44
+ json_string = ""
45
+ @quant.each do | current |
46
+ @@id += 1
47
+ json_string +=
48
+ "{\"type\": \"LEX_REPEAT\", " +
49
+ " \"id\": \"m#{@@id}\", " +
50
+ " \"value\": "
51
+ end
52
+
53
+ json_string += @value.json
54
+
55
+ if @quant.size > 0
56
+ work = @quant.map do | current |
57
+ repeat_option = []
58
+ repeat_option.push "reluctant" if current.is_reluctant?
59
+ repeat_option.push "possessive" if current.is_possessive?
60
+
61
+ " \"offset\": #{@offset}, " +
62
+ " \"length\": #{@length}, " +
63
+ " \"min_repeat\": #{current.min_value}, " +
64
+ " \"max_repeat\": #{current.max_value}, " +
65
+ " \"repeat_option\": #{repeat_option} " +
66
+ "}"
67
+ end
68
+ json_string += ", " + work.join(", ")
69
+ end
70
+ json_string
71
+ end
72
+ end
73
+ end
74
+
75
+ # Test suite (execute when this file is specified in command line)
76
+ if __FILE__ == $0
77
+ end