regexp-examples 1.1.3 → 1.1.4

Sign up to get free protection for your applications and to get access to all the features.
@@ -1,7 +1,24 @@
1
+ require_relative 'parser_helpers/parse_group_helper'
2
+ require_relative 'parser_helpers/parse_after_backslash_group_helper'
3
+ require_relative 'parser_helpers/parse_multi_group_helper'
4
+ require_relative 'parser_helpers/parse_repeater_helper'
5
+ require_relative 'parser_helpers/charset_negation_helper'
6
+
7
+ # :nodoc:
1
8
  module RegexpExamples
2
9
  IllegalSyntaxError = Class.new(StandardError)
10
+ # A Regexp parser, used to build a structured collection of objects that represents
11
+ # the regular expression.
12
+ # This object can then be used to generate strings that match the regular expression.
3
13
  class Parser
14
+ include ParseGroupHelper
15
+ include ParseAfterBackslashGroupHelper
16
+ include ParseMultiGroupHelper
17
+ include ParseRepeaterHelper
18
+ include CharsetNegationHelper
19
+
4
20
  attr_reader :regexp_string
21
+
5
22
  def initialize(regexp_string, regexp_options)
6
23
  @regexp_string = regexp_string
7
24
  @ignorecase = !(regexp_options & Regexp::IGNORECASE).zero?
@@ -27,310 +44,39 @@ module RegexpExamples
27
44
  def parse_group(repeaters)
28
45
  case next_char
29
46
  when '('
30
- group = parse_multi_group
47
+ parse_multi_group
31
48
  when '['
32
- group = parse_char_group
49
+ parse_char_group
33
50
  when '.'
34
- group = parse_dot_group
51
+ parse_dot_group
35
52
  when '|'
36
- group = parse_or_group(repeaters)
53
+ parse_or_group(repeaters)
37
54
  when '\\'
38
- group = parse_after_backslash_group
55
+ parse_after_backslash_group
39
56
  when '^'
40
- group = parse_caret
57
+ parse_caret
41
58
  when '$'
42
- group = parse_dollar
59
+ parse_dollar
43
60
  when /[#\s]/
44
- group = parse_extended_whitespace
61
+ parse_extended_whitespace
45
62
  else
46
- group = parse_single_char_group(next_char)
63
+ parse_single_char_group(next_char)
47
64
  end
48
- group
49
65
  end
50
66
 
51
67
  def parse_repeater(group)
52
68
  case next_char
53
69
  when '*'
54
- repeater = parse_star_repeater(group)
70
+ parse_star_repeater(group)
55
71
  when '+'
56
- repeater = parse_plus_repeater(group)
72
+ parse_plus_repeater(group)
57
73
  when '?'
58
- repeater = parse_question_mark_repeater(group)
74
+ parse_question_mark_repeater(group)
59
75
  when '{'
60
- repeater = parse_range_repeater(group)
61
- else
62
- repeater = parse_one_time_repeater(group)
63
- end
64
- repeater
65
- end
66
-
67
- def parse_caret
68
- if @current_position == 0
69
- return PlaceHolderGroup.new # Ignore the "illegal" character
70
- else
71
- raise_anchors_exception!
72
- end
73
- end
74
-
75
- def parse_dollar
76
- if @current_position == (regexp_string.length - 1)
77
- return PlaceHolderGroup.new # Ignore the "illegal" character
78
- else
79
- raise_anchors_exception!
80
- end
81
- end
82
-
83
- def parse_extended_whitespace
84
- if @extended
85
- skip_whitespace
86
- group = PlaceHolderGroup.new # Ignore the whitespace/comment
87
- else
88
- group = parse_single_char_group(next_char)
89
- end
90
- group
91
- end
92
-
93
- def skip_whitespace
94
- whitespace_chars = rest_of_string.match(/#.*|\s+/)[0]
95
- @current_position += whitespace_chars.length - 1
96
- end
97
-
98
- def parse_after_backslash_group
99
- @current_position += 1
100
- case
101
- when rest_of_string =~ /\A(\d{1,3})/
102
- @current_position += (Regexp.last_match(1).length - 1) # In case of 10+ backrefs!
103
- group = parse_backreference_group(Regexp.last_match(1))
104
- when rest_of_string =~ /\Ak['<]([\w-]+)['>]/ # Named capture group
105
- @current_position += (Regexp.last_match(1).length + 2)
106
- group_id = if Regexp.last_match(1).to_i < 0
107
- # RELATIVE group number, e.g. /(a)(b)(c)(d) \k<-2>/
108
- @num_groups + Regexp.last_match(1).to_i + 1
109
- else
110
- Regexp.last_match(1)
111
- end
112
- group = parse_backreference_group(group_id)
113
- when BackslashCharMap.keys.include?(next_char)
114
- group = CharGroup.new(
115
- BackslashCharMap[next_char].dup,
116
- @ignorecase
117
- )
118
- when rest_of_string =~ /\A(c|C-)(.)/ # Control character
119
- @current_position += Regexp.last_match(1).length
120
- group = parse_single_char_group(parse_control_character(Regexp.last_match(2)))
121
- when rest_of_string =~ /\Ax(\h{1,2})/ # Escape sequence
122
- @current_position += Regexp.last_match(1).length
123
- group = parse_single_char_group(parse_unicode_sequence(Regexp.last_match(1)))
124
- when rest_of_string =~ /\Au(\h{4}|\{\h{1,4}\})/ # Unicode sequence
125
- @current_position += Regexp.last_match(1).length
126
- sequence = Regexp.last_match(1).match(/\h{1,4}/)[0] # Strip off "{" and "}"
127
- group = parse_single_char_group(parse_unicode_sequence(sequence))
128
- when rest_of_string =~ /\A(p)\{(\^?)([^}]+)\}/i # Named properties
129
- @current_position += (Regexp.last_match(2).length + # 0 or 1, of '^' is present
130
- Regexp.last_match(3).length + # Length of the property name
131
- 2) # Length of opening and closing brackets (always 2)
132
- # Beware of double negatives! E.g. /\P{^Space}/
133
- is_negative = (Regexp.last_match(1) == 'P') ^ (Regexp.last_match(2) == '^')
134
- group = CharGroup.new(
135
- if is_negative
136
- CharSets::Any.dup - NamedPropertyCharMap[Regexp.last_match(3).downcase]
137
- else
138
- NamedPropertyCharMap[Regexp.last_match(3).downcase]
139
- end,
140
- @ignorecase
141
- )
142
- when next_char == 'K' # Keep (special lookbehind that CAN be supported safely!)
143
- group = PlaceHolderGroup.new
144
- when next_char == 'R' # Linebreak
145
- group = CharGroup.new(
146
- ["\r\n", "\n", "\v", "\f", "\r"],
147
- @ignorecase
148
- ) # Using "\r\n" as one character is little bit hacky...
149
- when next_char == 'g' # Subexpression call
150
- fail IllegalSyntaxError,
151
- 'Subexpression calls (\\g) cannot be supported, as they are not regular'
152
- when next_char =~ /[bB]/ # Anchors
153
- raise_anchors_exception!
154
- when next_char =~ /[AG]/ # Start of string
155
- if @current_position == 1
156
- group = PlaceHolderGroup.new
157
- else
158
- raise_anchors_exception!
159
- end
160
- when next_char =~ /[zZ]/ # End of string
161
- if @current_position == (regexp_string.length - 1)
162
- # TODO: /\Z/ should be treated as /\n?/
163
- group = PlaceHolderGroup.new
164
- else
165
- raise_anchors_exception!
166
- end
167
- else
168
- group = parse_single_char_group(next_char)
169
- end
170
- group
171
- end
172
-
173
- def parse_multi_group
174
- @current_position += 1
175
- @num_groups += 1
176
- remember_old_regexp_options do
177
- group_id = nil # init
178
- rest_of_string.match(
179
- /
180
- \A
181
- (\?)? # Is it a "special" group, i.e. starts with a "?"?
182
- (
183
- : # Non capture group
184
- |! # Neglookahead
185
- |= # Lookahead
186
- |\# # Comment group
187
- |< # Lookbehind or named capture
188
- (
189
- ! # Neglookbehind
190
- |= # Lookbehind
191
- |[^>]+ # Named capture
192
- )
193
- |[mix]*-?[mix]* # Option toggle
194
- )?
195
- /x
196
- ) do |match|
197
- case
198
- when match[1].nil? # e.g. /(normal)/
199
- group_id = @num_groups.to_s
200
- when match[2] == ':' # e.g. /(?:nocapture)/
201
- @current_position += 2
202
- when match[2] == '#' # e.g. /(?#comment)/
203
- comment_group = rest_of_string.match(/.*?[^\\](?:\\{2})*\)/)[0]
204
- @current_position += comment_group.length
205
- when match[2] =~ /\A(?=[mix-]+)([mix]*)-?([mix]*)/ # e.g. /(?i-mx)/
206
- regexp_options_toggle(Regexp.last_match(1), Regexp.last_match(2))
207
- @num_groups -= 1 # Toggle "groups" should not increase backref group count
208
- @current_position += $&.length + 1
209
- if next_char == ':' # e.g. /(?i:subexpr)/
210
- @current_position += 1
211
- else
212
- return PlaceHolderGroup.new
213
- end
214
- when %w(! =).include?(match[2]) # e.g. /(?=lookahead)/, /(?!neglookahead)/
215
- fail IllegalSyntaxError,
216
- 'Lookaheads are not regular; cannot generate examples'
217
- when %w(! =).include?(match[3]) # e.g. /(?<=lookbehind)/, /(?<!neglookbehind)/
218
- fail IllegalSyntaxError,
219
- 'Lookbehinds are not regular; cannot generate examples'
220
- else # e.g. /(?<name>namedgroup)/
221
- @current_position += (match[3].length + 3)
222
- group_id = match[3]
223
- end
224
- end
225
- MultiGroup.new(parse, group_id)
226
- end
227
- end
228
-
229
- def remember_old_regexp_options
230
- previous_ignorecase = @ignorecase
231
- previous_multiline = @multiline
232
- previous_extended = @extended
233
- group = yield
234
- @ignorecase = previous_ignorecase
235
- @multiline = previous_multiline
236
- @extended = previous_extended
237
- group
238
- end
239
-
240
- def regexp_options_toggle(on, off)
241
- regexp_option_toggle(on, off, '@ignorecase', 'i')
242
- regexp_option_toggle(on, off, '@multiline', 'm')
243
- regexp_option_toggle(on, off, '@extended', 'x')
244
- end
245
-
246
- def regexp_option_toggle(on, off, var, char)
247
- instance_variable_set(var, true) if on.include? char
248
- instance_variable_set(var, false) if off.include? char
249
- end
250
-
251
- def parse_char_group
252
- @current_position += 1 # Skip past opening "["
253
- chargroup_parser = ChargroupParser.new(rest_of_string)
254
- parsed_chars = chargroup_parser.result
255
- @current_position += (chargroup_parser.length - 1) # Step back to closing "]"
256
- CharGroup.new(parsed_chars, @ignorecase)
257
- end
258
-
259
- def parse_dot_group
260
- DotGroup.new(@multiline)
261
- end
262
-
263
- def parse_or_group(left_repeaters)
264
- @current_position += 1
265
- right_repeaters = parse
266
- OrGroup.new(left_repeaters, right_repeaters)
267
- end
268
-
269
- def parse_single_char_group(char)
270
- SingleCharGroup.new(char, @ignorecase)
271
- end
272
-
273
- def parse_backreference_group(group_id)
274
- BackReferenceGroup.new(group_id)
275
- end
276
-
277
- def parse_control_character(char)
278
- (char.ord % 32).chr # Black magic!
279
- # eval "?\\C-#{char.chr}" # Doesn't work for e.g. char = "?"
280
- end
281
-
282
- def parse_unicode_sequence(match)
283
- [match.to_i(16)].pack('U')
284
- end
285
-
286
- def parse_star_repeater(group)
287
- @current_position += 1
288
- parse_reluctant_or_possessive_repeater
289
- StarRepeater.new(group)
290
- end
291
-
292
- def parse_plus_repeater(group)
293
- @current_position += 1
294
- parse_reluctant_or_possessive_repeater
295
- PlusRepeater.new(group)
296
- end
297
-
298
- def parse_reluctant_or_possessive_repeater
299
- if next_char =~ /[?+]/
300
- # Don't treat these repeaters any differently when generating examples
301
- @current_position += 1
302
- end
303
- end
304
-
305
- def parse_question_mark_repeater(group)
306
- @current_position += 1
307
- parse_reluctant_or_possessive_repeater
308
- QuestionMarkRepeater.new(group)
309
- end
310
-
311
- def parse_range_repeater(group)
312
- match = rest_of_string.match(/\A\{(\d+)?(,)?(\d+)?\}/)
313
- @current_position += match[0].size
314
- min = match[1].to_i if match[1]
315
- has_comma = !match[2].nil?
316
- max = match[3].to_i if match[3]
317
- repeater = RangeRepeater.new(group, min, has_comma, max)
318
- parse_reluctant_or_possessive_range_repeater(repeater, min, has_comma, max)
319
- end
320
-
321
- def parse_reluctant_or_possessive_range_repeater(repeater, min, has_comma, max)
322
- # .{1}? should be equivalent to (?:.{1})?, i.e. NOT a "non-greedy quantifier"
323
- if min && !has_comma && !max && next_char == '?'
324
- repeater = parse_question_mark_repeater(repeater)
76
+ parse_range_repeater(group)
325
77
  else
326
- parse_reluctant_or_possessive_repeater
78
+ parse_one_time_repeater(group)
327
79
  end
328
- repeater
329
- end
330
-
331
- def raise_anchors_exception!
332
- fail IllegalSyntaxError,
333
- "Anchors ('#{next_char}') cannot be supported, as they are not regular"
334
80
  end
335
81
 
336
82
  def parse_one_time_repeater(group)
@@ -0,0 +1,8 @@
1
+ # A common helper used throughout various parser methods
2
+ module RegexpExamples
3
+ module CharsetNegationHelper
4
+ def negate_if(charset, is_negative)
5
+ is_negative ? (CharSets::Any.dup - charset) : charset
6
+ end
7
+ end
8
+ end
@@ -0,0 +1,144 @@
1
+ module RegexpExamples
2
+ # A collection of related helper methods, utilised by the `Parser` class
3
+ module ParseAfterBackslashGroupHelper
4
+ protected
5
+
6
+ def parse_after_backslash_group
7
+ @current_position += 1
8
+ case
9
+ when rest_of_string =~ /\A(\d{1,3})/
10
+ parse_regular_backreference_group(Regexp.last_match(1))
11
+ when rest_of_string =~ /\Ak['<]([\w-]+)['>]/
12
+ parse_named_backreference_group(Regexp.last_match(1))
13
+ when BackslashCharMap.keys.include?(next_char)
14
+ parse_backslash_special_char
15
+ when rest_of_string =~ /\A(c|C-)(.)/
16
+ parse_backslash_control_char(Regexp.last_match(1), Regexp.last_match(2))
17
+ when rest_of_string =~ /\Ax(\h{1,2})/
18
+ parse_backslash_escape_sequence(Regexp.last_match(1))
19
+ when rest_of_string =~ /\Au(\h{4}|\{\h{1,4}\})/
20
+ parse_backslash_unicode_sequence(Regexp.last_match(1))
21
+ when rest_of_string =~ /\A(p)\{(\^?)([^}]+)\}/i
22
+ parse_backslash_named_property(
23
+ Regexp.last_match(1), Regexp.last_match(2), Regexp.last_match(3)
24
+ )
25
+ when next_char == 'K' # Keep (special lookbehind that CAN be supported safely!)
26
+ PlaceHolderGroup.new
27
+ when next_char == 'R'
28
+ parse_backslash_linebreak
29
+ when next_char == 'g'
30
+ parse_backslash_subexpresion_call
31
+ when next_char =~ /[bB]/
32
+ parse_backslash_anchor
33
+ when next_char =~ /[AG]/
34
+ parse_backslash_start_of_string
35
+ when next_char =~ /[zZ]/
36
+ # TODO: /\Z/ should be treated as /\n?/
37
+ parse_backslash_end_of_string
38
+ else
39
+ parse_single_char_group(next_char)
40
+ end
41
+ end
42
+
43
+ def parse_regular_backreference_group(group_id)
44
+ @current_position += (group_id.length - 1) # In case of 10+ backrefs!
45
+ parse_backreference_group(group_id)
46
+ end
47
+
48
+ def parse_named_backreference_group(group_name)
49
+ @current_position += (group_name.length + 2)
50
+ group_id = if group_name.to_i < 0
51
+ # RELATIVE group number, e.g. /(a)(b)(c)(d) \k<-2>/
52
+ @num_groups + group_name.to_i + 1
53
+ else
54
+ group_name
55
+ end
56
+ parse_backreference_group(group_id)
57
+ end
58
+
59
+ def parse_backreference_group(group_id)
60
+ BackReferenceGroup.new(group_id)
61
+ end
62
+
63
+ def parse_backslash_special_char
64
+ CharGroup.new(
65
+ BackslashCharMap[next_char].dup,
66
+ @ignorecase
67
+ )
68
+ end
69
+
70
+ def parse_backslash_control_char(control_syntax, control_code)
71
+ @current_position += control_syntax.length
72
+ parse_single_char_group(parse_control_character(control_code))
73
+ end
74
+
75
+ def parse_backslash_escape_sequence(escape_sequence)
76
+ @current_position += escape_sequence.length
77
+ parse_single_char_group(parse_unicode_sequence(escape_sequence))
78
+ end
79
+
80
+ def parse_control_character(char)
81
+ (char.ord % 32).chr # Black magic!
82
+ # eval "?\\C-#{char.chr}" # Doesn't work for e.g. char = "?"
83
+ end
84
+
85
+ def parse_unicode_sequence(match)
86
+ [match.to_i(16)].pack('U')
87
+ end
88
+
89
+ def parse_backslash_unicode_sequence(full_hex_sequence)
90
+ @current_position += full_hex_sequence.length
91
+ sequence = full_hex_sequence.match(/\h{1,4}/)[0] # Strip off "{" and "}"
92
+ parse_single_char_group(parse_unicode_sequence(sequence))
93
+ end
94
+
95
+ def parse_backslash_named_property(p_negation, caret_negation, property_name)
96
+ @current_position += (caret_negation.length + # 0 or 1, of '^' is present
97
+ property_name.length +
98
+ 2) # Length of opening and closing brackets (always 2)
99
+ # Beware of double negatives! E.g. /\P{^Space}/
100
+ is_negative = (p_negation == 'P') ^ (caret_negation == '^')
101
+ CharGroup.new(
102
+ negate_if(NamedPropertyCharMap[property_name.downcase], is_negative),
103
+ @ignorecase
104
+ )
105
+ end
106
+
107
+ def parse_backslash_linebreak
108
+ CharGroup.new(
109
+ ["\r\n", "\n", "\v", "\f", "\r"],
110
+ @ignorecase
111
+ ) # Using "\r\n" as one character is little bit hacky...
112
+ end
113
+
114
+ def parse_backslash_subexpresion_call
115
+ fail IllegalSyntaxError,
116
+ 'Subexpression calls (\\g) cannot be supported, as they are not regular'
117
+ end
118
+
119
+ def parse_backslash_anchor
120
+ raise_anchors_exception!
121
+ end
122
+
123
+ def parse_backslash_start_of_string
124
+ if @current_position == 1
125
+ PlaceHolderGroup.new
126
+ else
127
+ raise_anchors_exception!
128
+ end
129
+ end
130
+
131
+ def parse_backslash_end_of_string
132
+ if @current_position == (regexp_string.length - 1)
133
+ PlaceHolderGroup.new
134
+ else
135
+ raise_anchors_exception!
136
+ end
137
+ end
138
+
139
+ def raise_anchors_exception!
140
+ fail IllegalSyntaxError,
141
+ "Anchors ('#{next_char}') cannot be supported, as they are not regular"
142
+ end
143
+ end
144
+ end
@@ -0,0 +1,58 @@
1
+ module RegexpExamples
2
+ # A collection of related helper methods, utilised by the `Parser` class
3
+ module ParseGroupHelper
4
+ protected
5
+
6
+ def parse_caret
7
+ if @current_position == 0
8
+ PlaceHolderGroup.new # Ignore the "illegal" character
9
+ else
10
+ raise_anchors_exception!
11
+ end
12
+ end
13
+
14
+ def parse_dollar
15
+ if @current_position == (regexp_string.length - 1)
16
+ PlaceHolderGroup.new # Ignore the "illegal" character
17
+ else
18
+ raise_anchors_exception!
19
+ end
20
+ end
21
+
22
+ def parse_extended_whitespace
23
+ if @extended
24
+ skip_whitespace
25
+ PlaceHolderGroup.new # Ignore the whitespace/comment
26
+ else
27
+ parse_single_char_group(next_char)
28
+ end
29
+ end
30
+
31
+ def skip_whitespace
32
+ whitespace_chars = rest_of_string.match(/#.*|\s+/)[0]
33
+ @current_position += whitespace_chars.length - 1
34
+ end
35
+
36
+ def parse_single_char_group(char)
37
+ SingleCharGroup.new(char, @ignorecase)
38
+ end
39
+
40
+ def parse_char_group
41
+ @current_position += 1 # Skip past opening "["
42
+ chargroup_parser = ChargroupParser.new(rest_of_string)
43
+ chargroup_parser.parse
44
+ @current_position += (chargroup_parser.length - 1) # Step back to closing "]"
45
+ CharGroup.new(chargroup_parser.result, @ignorecase)
46
+ end
47
+
48
+ def parse_dot_group
49
+ DotGroup.new(@multiline)
50
+ end
51
+
52
+ def parse_or_group(left_repeaters)
53
+ @current_position += 1
54
+ right_repeaters = parse
55
+ OrGroup.new(left_repeaters, right_repeaters)
56
+ end
57
+ end
58
+ end
@@ -0,0 +1,85 @@
1
+ module RegexpExamples
2
+ # A collection of related helper methods, utilised by the `Parser` class
3
+ module ParseMultiGroupHelper
4
+ protected
5
+
6
+ def parse_multi_group
7
+ # TODO: Clean up this ugly mess of a method!
8
+ @current_position += 1
9
+ @num_groups += 1
10
+ remember_old_regexp_options do
11
+ group_id = nil # init
12
+ rest_of_string.match(
13
+ /
14
+ \A
15
+ (\?)? # Is it a "special" group, i.e. starts with a "?"?
16
+ (
17
+ : # Non capture group
18
+ |! # Neglookahead
19
+ |= # Lookahead
20
+ |\# # Comment group
21
+ |< # Lookbehind or named capture
22
+ (
23
+ ! # Neglookbehind
24
+ |= # Lookbehind
25
+ |[^>]+ # Named capture
26
+ )
27
+ |[mix]*-?[mix]* # Option toggle
28
+ )?
29
+ /x
30
+ ) do |match|
31
+ case
32
+ when match[1].nil? # e.g. /(normal)/
33
+ group_id = @num_groups.to_s
34
+ when match[2] == ':' # e.g. /(?:nocapture)/
35
+ @current_position += 2
36
+ when match[2] == '#' # e.g. /(?#comment)/
37
+ comment_group = rest_of_string.match(/.*?[^\\](?:\\{2})*\)/)[0]
38
+ @current_position += comment_group.length
39
+ when match[2] =~ /\A(?=[mix-]+)([mix]*)-?([mix]*)/ # e.g. /(?i-mx)/
40
+ regexp_options_toggle(Regexp.last_match(1), Regexp.last_match(2))
41
+ @num_groups -= 1 # Toggle "groups" should not increase backref group count
42
+ @current_position += $&.length + 1
43
+ if next_char == ':' # e.g. /(?i:subexpr)/
44
+ @current_position += 1
45
+ else
46
+ return PlaceHolderGroup.new
47
+ end
48
+ when %w(! =).include?(match[2]) # e.g. /(?=lookahead)/, /(?!neglookahead)/
49
+ fail IllegalSyntaxError,
50
+ 'Lookaheads are not regular; cannot generate examples'
51
+ when %w(! =).include?(match[3]) # e.g. /(?<=lookbehind)/, /(?<!neglookbehind)/
52
+ fail IllegalSyntaxError,
53
+ 'Lookbehinds are not regular; cannot generate examples'
54
+ else # e.g. /(?<name>namedgroup)/
55
+ @current_position += (match[3].length + 3)
56
+ group_id = match[3]
57
+ end
58
+ end
59
+ MultiGroup.new(parse, group_id)
60
+ end
61
+ end
62
+
63
+ def remember_old_regexp_options
64
+ previous_ignorecase = @ignorecase
65
+ previous_multiline = @multiline
66
+ previous_extended = @extended
67
+ group = yield
68
+ @ignorecase = previous_ignorecase
69
+ @multiline = previous_multiline
70
+ @extended = previous_extended
71
+ group
72
+ end
73
+
74
+ def regexp_options_toggle(on, off)
75
+ regexp_option_toggle(on, off, '@ignorecase', 'i')
76
+ regexp_option_toggle(on, off, '@multiline', 'm')
77
+ regexp_option_toggle(on, off, '@extended', 'x')
78
+ end
79
+
80
+ def regexp_option_toggle(on, off, var, char)
81
+ instance_variable_set(var, true) if on.include? char
82
+ instance_variable_set(var, false) if off.include? char
83
+ end
84
+ end
85
+ end
@@ -0,0 +1,51 @@
1
+ module RegexpExamples
2
+ # A collection of related helper methods, utilised by the `Parser` class
3
+ module ParseRepeaterHelper
4
+ protected
5
+
6
+ def parse_star_repeater(group)
7
+ @current_position += 1
8
+ parse_reluctant_or_possessive_repeater
9
+ StarRepeater.new(group)
10
+ end
11
+
12
+ def parse_plus_repeater(group)
13
+ @current_position += 1
14
+ parse_reluctant_or_possessive_repeater
15
+ PlusRepeater.new(group)
16
+ end
17
+
18
+ def parse_reluctant_or_possessive_repeater
19
+ if next_char =~ /[?+]/
20
+ # Don't treat these repeaters any differently when generating examples
21
+ @current_position += 1
22
+ end
23
+ end
24
+
25
+ def parse_question_mark_repeater(group)
26
+ @current_position += 1
27
+ parse_reluctant_or_possessive_repeater
28
+ QuestionMarkRepeater.new(group)
29
+ end
30
+
31
+ def parse_range_repeater(group)
32
+ match = rest_of_string.match(/\A\{(\d+)?(,)?(\d+)?\}/)
33
+ @current_position += match[0].size
34
+ min = match[1].to_i if match[1]
35
+ has_comma = !match[2].nil?
36
+ max = match[3].to_i if match[3]
37
+ repeater = RangeRepeater.new(group, min, has_comma, max)
38
+ parse_reluctant_or_possessive_range_repeater(repeater, min, has_comma, max)
39
+ end
40
+
41
+ def parse_reluctant_or_possessive_range_repeater(repeater, min, has_comma, max)
42
+ # .{1}? should be equivalent to (?:.{1})?, i.e. NOT a "non-greedy quantifier"
43
+ if min && !has_comma && !max && next_char == '?'
44
+ repeater = parse_question_mark_repeater(repeater)
45
+ else
46
+ parse_reluctant_or_possessive_repeater
47
+ end
48
+ repeater
49
+ end
50
+ end
51
+ end