regexp-examples 1.1.3 → 1.1.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,7 +1,24 @@
1
+ require_relative 'parser_helpers/parse_group_helper'
2
+ require_relative 'parser_helpers/parse_after_backslash_group_helper'
3
+ require_relative 'parser_helpers/parse_multi_group_helper'
4
+ require_relative 'parser_helpers/parse_repeater_helper'
5
+ require_relative 'parser_helpers/charset_negation_helper'
6
+
7
+ # :nodoc:
1
8
  module RegexpExamples
2
9
  IllegalSyntaxError = Class.new(StandardError)
10
+ # A Regexp parser, used to build a structured collection of objects that represents
11
+ # the regular expression.
12
+ # This object can then be used to generate strings that match the regular expression.
3
13
  class Parser
14
+ include ParseGroupHelper
15
+ include ParseAfterBackslashGroupHelper
16
+ include ParseMultiGroupHelper
17
+ include ParseRepeaterHelper
18
+ include CharsetNegationHelper
19
+
4
20
  attr_reader :regexp_string
21
+
5
22
  def initialize(regexp_string, regexp_options)
6
23
  @regexp_string = regexp_string
7
24
  @ignorecase = !(regexp_options & Regexp::IGNORECASE).zero?
@@ -27,310 +44,39 @@ module RegexpExamples
27
44
  def parse_group(repeaters)
28
45
  case next_char
29
46
  when '('
30
- group = parse_multi_group
47
+ parse_multi_group
31
48
  when '['
32
- group = parse_char_group
49
+ parse_char_group
33
50
  when '.'
34
- group = parse_dot_group
51
+ parse_dot_group
35
52
  when '|'
36
- group = parse_or_group(repeaters)
53
+ parse_or_group(repeaters)
37
54
  when '\\'
38
- group = parse_after_backslash_group
55
+ parse_after_backslash_group
39
56
  when '^'
40
- group = parse_caret
57
+ parse_caret
41
58
  when '$'
42
- group = parse_dollar
59
+ parse_dollar
43
60
  when /[#\s]/
44
- group = parse_extended_whitespace
61
+ parse_extended_whitespace
45
62
  else
46
- group = parse_single_char_group(next_char)
63
+ parse_single_char_group(next_char)
47
64
  end
48
- group
49
65
  end
50
66
 
51
67
  def parse_repeater(group)
52
68
  case next_char
53
69
  when '*'
54
- repeater = parse_star_repeater(group)
70
+ parse_star_repeater(group)
55
71
  when '+'
56
- repeater = parse_plus_repeater(group)
72
+ parse_plus_repeater(group)
57
73
  when '?'
58
- repeater = parse_question_mark_repeater(group)
74
+ parse_question_mark_repeater(group)
59
75
  when '{'
60
- repeater = parse_range_repeater(group)
61
- else
62
- repeater = parse_one_time_repeater(group)
63
- end
64
- repeater
65
- end
66
-
67
- def parse_caret
68
- if @current_position == 0
69
- return PlaceHolderGroup.new # Ignore the "illegal" character
70
- else
71
- raise_anchors_exception!
72
- end
73
- end
74
-
75
- def parse_dollar
76
- if @current_position == (regexp_string.length - 1)
77
- return PlaceHolderGroup.new # Ignore the "illegal" character
78
- else
79
- raise_anchors_exception!
80
- end
81
- end
82
-
83
- def parse_extended_whitespace
84
- if @extended
85
- skip_whitespace
86
- group = PlaceHolderGroup.new # Ignore the whitespace/comment
87
- else
88
- group = parse_single_char_group(next_char)
89
- end
90
- group
91
- end
92
-
93
- def skip_whitespace
94
- whitespace_chars = rest_of_string.match(/#.*|\s+/)[0]
95
- @current_position += whitespace_chars.length - 1
96
- end
97
-
98
- def parse_after_backslash_group
99
- @current_position += 1
100
- case
101
- when rest_of_string =~ /\A(\d{1,3})/
102
- @current_position += (Regexp.last_match(1).length - 1) # In case of 10+ backrefs!
103
- group = parse_backreference_group(Regexp.last_match(1))
104
- when rest_of_string =~ /\Ak['<]([\w-]+)['>]/ # Named capture group
105
- @current_position += (Regexp.last_match(1).length + 2)
106
- group_id = if Regexp.last_match(1).to_i < 0
107
- # RELATIVE group number, e.g. /(a)(b)(c)(d) \k<-2>/
108
- @num_groups + Regexp.last_match(1).to_i + 1
109
- else
110
- Regexp.last_match(1)
111
- end
112
- group = parse_backreference_group(group_id)
113
- when BackslashCharMap.keys.include?(next_char)
114
- group = CharGroup.new(
115
- BackslashCharMap[next_char].dup,
116
- @ignorecase
117
- )
118
- when rest_of_string =~ /\A(c|C-)(.)/ # Control character
119
- @current_position += Regexp.last_match(1).length
120
- group = parse_single_char_group(parse_control_character(Regexp.last_match(2)))
121
- when rest_of_string =~ /\Ax(\h{1,2})/ # Escape sequence
122
- @current_position += Regexp.last_match(1).length
123
- group = parse_single_char_group(parse_unicode_sequence(Regexp.last_match(1)))
124
- when rest_of_string =~ /\Au(\h{4}|\{\h{1,4}\})/ # Unicode sequence
125
- @current_position += Regexp.last_match(1).length
126
- sequence = Regexp.last_match(1).match(/\h{1,4}/)[0] # Strip off "{" and "}"
127
- group = parse_single_char_group(parse_unicode_sequence(sequence))
128
- when rest_of_string =~ /\A(p)\{(\^?)([^}]+)\}/i # Named properties
129
- @current_position += (Regexp.last_match(2).length + # 0 or 1, of '^' is present
130
- Regexp.last_match(3).length + # Length of the property name
131
- 2) # Length of opening and closing brackets (always 2)
132
- # Beware of double negatives! E.g. /\P{^Space}/
133
- is_negative = (Regexp.last_match(1) == 'P') ^ (Regexp.last_match(2) == '^')
134
- group = CharGroup.new(
135
- if is_negative
136
- CharSets::Any.dup - NamedPropertyCharMap[Regexp.last_match(3).downcase]
137
- else
138
- NamedPropertyCharMap[Regexp.last_match(3).downcase]
139
- end,
140
- @ignorecase
141
- )
142
- when next_char == 'K' # Keep (special lookbehind that CAN be supported safely!)
143
- group = PlaceHolderGroup.new
144
- when next_char == 'R' # Linebreak
145
- group = CharGroup.new(
146
- ["\r\n", "\n", "\v", "\f", "\r"],
147
- @ignorecase
148
- ) # Using "\r\n" as one character is little bit hacky...
149
- when next_char == 'g' # Subexpression call
150
- fail IllegalSyntaxError,
151
- 'Subexpression calls (\\g) cannot be supported, as they are not regular'
152
- when next_char =~ /[bB]/ # Anchors
153
- raise_anchors_exception!
154
- when next_char =~ /[AG]/ # Start of string
155
- if @current_position == 1
156
- group = PlaceHolderGroup.new
157
- else
158
- raise_anchors_exception!
159
- end
160
- when next_char =~ /[zZ]/ # End of string
161
- if @current_position == (regexp_string.length - 1)
162
- # TODO: /\Z/ should be treated as /\n?/
163
- group = PlaceHolderGroup.new
164
- else
165
- raise_anchors_exception!
166
- end
167
- else
168
- group = parse_single_char_group(next_char)
169
- end
170
- group
171
- end
172
-
173
- def parse_multi_group
174
- @current_position += 1
175
- @num_groups += 1
176
- remember_old_regexp_options do
177
- group_id = nil # init
178
- rest_of_string.match(
179
- /
180
- \A
181
- (\?)? # Is it a "special" group, i.e. starts with a "?"?
182
- (
183
- : # Non capture group
184
- |! # Neglookahead
185
- |= # Lookahead
186
- |\# # Comment group
187
- |< # Lookbehind or named capture
188
- (
189
- ! # Neglookbehind
190
- |= # Lookbehind
191
- |[^>]+ # Named capture
192
- )
193
- |[mix]*-?[mix]* # Option toggle
194
- )?
195
- /x
196
- ) do |match|
197
- case
198
- when match[1].nil? # e.g. /(normal)/
199
- group_id = @num_groups.to_s
200
- when match[2] == ':' # e.g. /(?:nocapture)/
201
- @current_position += 2
202
- when match[2] == '#' # e.g. /(?#comment)/
203
- comment_group = rest_of_string.match(/.*?[^\\](?:\\{2})*\)/)[0]
204
- @current_position += comment_group.length
205
- when match[2] =~ /\A(?=[mix-]+)([mix]*)-?([mix]*)/ # e.g. /(?i-mx)/
206
- regexp_options_toggle(Regexp.last_match(1), Regexp.last_match(2))
207
- @num_groups -= 1 # Toggle "groups" should not increase backref group count
208
- @current_position += $&.length + 1
209
- if next_char == ':' # e.g. /(?i:subexpr)/
210
- @current_position += 1
211
- else
212
- return PlaceHolderGroup.new
213
- end
214
- when %w(! =).include?(match[2]) # e.g. /(?=lookahead)/, /(?!neglookahead)/
215
- fail IllegalSyntaxError,
216
- 'Lookaheads are not regular; cannot generate examples'
217
- when %w(! =).include?(match[3]) # e.g. /(?<=lookbehind)/, /(?<!neglookbehind)/
218
- fail IllegalSyntaxError,
219
- 'Lookbehinds are not regular; cannot generate examples'
220
- else # e.g. /(?<name>namedgroup)/
221
- @current_position += (match[3].length + 3)
222
- group_id = match[3]
223
- end
224
- end
225
- MultiGroup.new(parse, group_id)
226
- end
227
- end
228
-
229
- def remember_old_regexp_options
230
- previous_ignorecase = @ignorecase
231
- previous_multiline = @multiline
232
- previous_extended = @extended
233
- group = yield
234
- @ignorecase = previous_ignorecase
235
- @multiline = previous_multiline
236
- @extended = previous_extended
237
- group
238
- end
239
-
240
- def regexp_options_toggle(on, off)
241
- regexp_option_toggle(on, off, '@ignorecase', 'i')
242
- regexp_option_toggle(on, off, '@multiline', 'm')
243
- regexp_option_toggle(on, off, '@extended', 'x')
244
- end
245
-
246
- def regexp_option_toggle(on, off, var, char)
247
- instance_variable_set(var, true) if on.include? char
248
- instance_variable_set(var, false) if off.include? char
249
- end
250
-
251
- def parse_char_group
252
- @current_position += 1 # Skip past opening "["
253
- chargroup_parser = ChargroupParser.new(rest_of_string)
254
- parsed_chars = chargroup_parser.result
255
- @current_position += (chargroup_parser.length - 1) # Step back to closing "]"
256
- CharGroup.new(parsed_chars, @ignorecase)
257
- end
258
-
259
- def parse_dot_group
260
- DotGroup.new(@multiline)
261
- end
262
-
263
- def parse_or_group(left_repeaters)
264
- @current_position += 1
265
- right_repeaters = parse
266
- OrGroup.new(left_repeaters, right_repeaters)
267
- end
268
-
269
- def parse_single_char_group(char)
270
- SingleCharGroup.new(char, @ignorecase)
271
- end
272
-
273
- def parse_backreference_group(group_id)
274
- BackReferenceGroup.new(group_id)
275
- end
276
-
277
- def parse_control_character(char)
278
- (char.ord % 32).chr # Black magic!
279
- # eval "?\\C-#{char.chr}" # Doesn't work for e.g. char = "?"
280
- end
281
-
282
- def parse_unicode_sequence(match)
283
- [match.to_i(16)].pack('U')
284
- end
285
-
286
- def parse_star_repeater(group)
287
- @current_position += 1
288
- parse_reluctant_or_possessive_repeater
289
- StarRepeater.new(group)
290
- end
291
-
292
- def parse_plus_repeater(group)
293
- @current_position += 1
294
- parse_reluctant_or_possessive_repeater
295
- PlusRepeater.new(group)
296
- end
297
-
298
- def parse_reluctant_or_possessive_repeater
299
- if next_char =~ /[?+]/
300
- # Don't treat these repeaters any differently when generating examples
301
- @current_position += 1
302
- end
303
- end
304
-
305
- def parse_question_mark_repeater(group)
306
- @current_position += 1
307
- parse_reluctant_or_possessive_repeater
308
- QuestionMarkRepeater.new(group)
309
- end
310
-
311
- def parse_range_repeater(group)
312
- match = rest_of_string.match(/\A\{(\d+)?(,)?(\d+)?\}/)
313
- @current_position += match[0].size
314
- min = match[1].to_i if match[1]
315
- has_comma = !match[2].nil?
316
- max = match[3].to_i if match[3]
317
- repeater = RangeRepeater.new(group, min, has_comma, max)
318
- parse_reluctant_or_possessive_range_repeater(repeater, min, has_comma, max)
319
- end
320
-
321
- def parse_reluctant_or_possessive_range_repeater(repeater, min, has_comma, max)
322
- # .{1}? should be equivalent to (?:.{1})?, i.e. NOT a "non-greedy quantifier"
323
- if min && !has_comma && !max && next_char == '?'
324
- repeater = parse_question_mark_repeater(repeater)
76
+ parse_range_repeater(group)
325
77
  else
326
- parse_reluctant_or_possessive_repeater
78
+ parse_one_time_repeater(group)
327
79
  end
328
- repeater
329
- end
330
-
331
- def raise_anchors_exception!
332
- fail IllegalSyntaxError,
333
- "Anchors ('#{next_char}') cannot be supported, as they are not regular"
334
80
  end
335
81
 
336
82
  def parse_one_time_repeater(group)
@@ -0,0 +1,8 @@
1
+ # A common helper used throughout various parser methods
2
+ module RegexpExamples
3
+ module CharsetNegationHelper
4
+ def negate_if(charset, is_negative)
5
+ is_negative ? (CharSets::Any.dup - charset) : charset
6
+ end
7
+ end
8
+ end
@@ -0,0 +1,144 @@
1
+ module RegexpExamples
2
+ # A collection of related helper methods, utilised by the `Parser` class
3
+ module ParseAfterBackslashGroupHelper
4
+ protected
5
+
6
+ def parse_after_backslash_group
7
+ @current_position += 1
8
+ case
9
+ when rest_of_string =~ /\A(\d{1,3})/
10
+ parse_regular_backreference_group(Regexp.last_match(1))
11
+ when rest_of_string =~ /\Ak['<]([\w-]+)['>]/
12
+ parse_named_backreference_group(Regexp.last_match(1))
13
+ when BackslashCharMap.keys.include?(next_char)
14
+ parse_backslash_special_char
15
+ when rest_of_string =~ /\A(c|C-)(.)/
16
+ parse_backslash_control_char(Regexp.last_match(1), Regexp.last_match(2))
17
+ when rest_of_string =~ /\Ax(\h{1,2})/
18
+ parse_backslash_escape_sequence(Regexp.last_match(1))
19
+ when rest_of_string =~ /\Au(\h{4}|\{\h{1,4}\})/
20
+ parse_backslash_unicode_sequence(Regexp.last_match(1))
21
+ when rest_of_string =~ /\A(p)\{(\^?)([^}]+)\}/i
22
+ parse_backslash_named_property(
23
+ Regexp.last_match(1), Regexp.last_match(2), Regexp.last_match(3)
24
+ )
25
+ when next_char == 'K' # Keep (special lookbehind that CAN be supported safely!)
26
+ PlaceHolderGroup.new
27
+ when next_char == 'R'
28
+ parse_backslash_linebreak
29
+ when next_char == 'g'
30
+ parse_backslash_subexpresion_call
31
+ when next_char =~ /[bB]/
32
+ parse_backslash_anchor
33
+ when next_char =~ /[AG]/
34
+ parse_backslash_start_of_string
35
+ when next_char =~ /[zZ]/
36
+ # TODO: /\Z/ should be treated as /\n?/
37
+ parse_backslash_end_of_string
38
+ else
39
+ parse_single_char_group(next_char)
40
+ end
41
+ end
42
+
43
+ def parse_regular_backreference_group(group_id)
44
+ @current_position += (group_id.length - 1) # In case of 10+ backrefs!
45
+ parse_backreference_group(group_id)
46
+ end
47
+
48
+ def parse_named_backreference_group(group_name)
49
+ @current_position += (group_name.length + 2)
50
+ group_id = if group_name.to_i < 0
51
+ # RELATIVE group number, e.g. /(a)(b)(c)(d) \k<-2>/
52
+ @num_groups + group_name.to_i + 1
53
+ else
54
+ group_name
55
+ end
56
+ parse_backreference_group(group_id)
57
+ end
58
+
59
+ def parse_backreference_group(group_id)
60
+ BackReferenceGroup.new(group_id)
61
+ end
62
+
63
+ def parse_backslash_special_char
64
+ CharGroup.new(
65
+ BackslashCharMap[next_char].dup,
66
+ @ignorecase
67
+ )
68
+ end
69
+
70
+ def parse_backslash_control_char(control_syntax, control_code)
71
+ @current_position += control_syntax.length
72
+ parse_single_char_group(parse_control_character(control_code))
73
+ end
74
+
75
+ def parse_backslash_escape_sequence(escape_sequence)
76
+ @current_position += escape_sequence.length
77
+ parse_single_char_group(parse_unicode_sequence(escape_sequence))
78
+ end
79
+
80
+ def parse_control_character(char)
81
+ (char.ord % 32).chr # Black magic!
82
+ # eval "?\\C-#{char.chr}" # Doesn't work for e.g. char = "?"
83
+ end
84
+
85
+ def parse_unicode_sequence(match)
86
+ [match.to_i(16)].pack('U')
87
+ end
88
+
89
+ def parse_backslash_unicode_sequence(full_hex_sequence)
90
+ @current_position += full_hex_sequence.length
91
+ sequence = full_hex_sequence.match(/\h{1,4}/)[0] # Strip off "{" and "}"
92
+ parse_single_char_group(parse_unicode_sequence(sequence))
93
+ end
94
+
95
+ def parse_backslash_named_property(p_negation, caret_negation, property_name)
96
+ @current_position += (caret_negation.length + # 0 or 1, of '^' is present
97
+ property_name.length +
98
+ 2) # Length of opening and closing brackets (always 2)
99
+ # Beware of double negatives! E.g. /\P{^Space}/
100
+ is_negative = (p_negation == 'P') ^ (caret_negation == '^')
101
+ CharGroup.new(
102
+ negate_if(NamedPropertyCharMap[property_name.downcase], is_negative),
103
+ @ignorecase
104
+ )
105
+ end
106
+
107
+ def parse_backslash_linebreak
108
+ CharGroup.new(
109
+ ["\r\n", "\n", "\v", "\f", "\r"],
110
+ @ignorecase
111
+ ) # Using "\r\n" as one character is little bit hacky...
112
+ end
113
+
114
+ def parse_backslash_subexpresion_call
115
+ fail IllegalSyntaxError,
116
+ 'Subexpression calls (\\g) cannot be supported, as they are not regular'
117
+ end
118
+
119
+ def parse_backslash_anchor
120
+ raise_anchors_exception!
121
+ end
122
+
123
+ def parse_backslash_start_of_string
124
+ if @current_position == 1
125
+ PlaceHolderGroup.new
126
+ else
127
+ raise_anchors_exception!
128
+ end
129
+ end
130
+
131
+ def parse_backslash_end_of_string
132
+ if @current_position == (regexp_string.length - 1)
133
+ PlaceHolderGroup.new
134
+ else
135
+ raise_anchors_exception!
136
+ end
137
+ end
138
+
139
+ def raise_anchors_exception!
140
+ fail IllegalSyntaxError,
141
+ "Anchors ('#{next_char}') cannot be supported, as they are not regular"
142
+ end
143
+ end
144
+ end
@@ -0,0 +1,58 @@
1
+ module RegexpExamples
2
+ # A collection of related helper methods, utilised by the `Parser` class
3
+ module ParseGroupHelper
4
+ protected
5
+
6
+ def parse_caret
7
+ if @current_position == 0
8
+ PlaceHolderGroup.new # Ignore the "illegal" character
9
+ else
10
+ raise_anchors_exception!
11
+ end
12
+ end
13
+
14
+ def parse_dollar
15
+ if @current_position == (regexp_string.length - 1)
16
+ PlaceHolderGroup.new # Ignore the "illegal" character
17
+ else
18
+ raise_anchors_exception!
19
+ end
20
+ end
21
+
22
+ def parse_extended_whitespace
23
+ if @extended
24
+ skip_whitespace
25
+ PlaceHolderGroup.new # Ignore the whitespace/comment
26
+ else
27
+ parse_single_char_group(next_char)
28
+ end
29
+ end
30
+
31
+ def skip_whitespace
32
+ whitespace_chars = rest_of_string.match(/#.*|\s+/)[0]
33
+ @current_position += whitespace_chars.length - 1
34
+ end
35
+
36
+ def parse_single_char_group(char)
37
+ SingleCharGroup.new(char, @ignorecase)
38
+ end
39
+
40
+ def parse_char_group
41
+ @current_position += 1 # Skip past opening "["
42
+ chargroup_parser = ChargroupParser.new(rest_of_string)
43
+ chargroup_parser.parse
44
+ @current_position += (chargroup_parser.length - 1) # Step back to closing "]"
45
+ CharGroup.new(chargroup_parser.result, @ignorecase)
46
+ end
47
+
48
+ def parse_dot_group
49
+ DotGroup.new(@multiline)
50
+ end
51
+
52
+ def parse_or_group(left_repeaters)
53
+ @current_position += 1
54
+ right_repeaters = parse
55
+ OrGroup.new(left_repeaters, right_repeaters)
56
+ end
57
+ end
58
+ end
@@ -0,0 +1,85 @@
1
+ module RegexpExamples
2
+ # A collection of related helper methods, utilised by the `Parser` class
3
+ module ParseMultiGroupHelper
4
+ protected
5
+
6
+ def parse_multi_group
7
+ # TODO: Clean up this ugly mess of a method!
8
+ @current_position += 1
9
+ @num_groups += 1
10
+ remember_old_regexp_options do
11
+ group_id = nil # init
12
+ rest_of_string.match(
13
+ /
14
+ \A
15
+ (\?)? # Is it a "special" group, i.e. starts with a "?"?
16
+ (
17
+ : # Non capture group
18
+ |! # Neglookahead
19
+ |= # Lookahead
20
+ |\# # Comment group
21
+ |< # Lookbehind or named capture
22
+ (
23
+ ! # Neglookbehind
24
+ |= # Lookbehind
25
+ |[^>]+ # Named capture
26
+ )
27
+ |[mix]*-?[mix]* # Option toggle
28
+ )?
29
+ /x
30
+ ) do |match|
31
+ case
32
+ when match[1].nil? # e.g. /(normal)/
33
+ group_id = @num_groups.to_s
34
+ when match[2] == ':' # e.g. /(?:nocapture)/
35
+ @current_position += 2
36
+ when match[2] == '#' # e.g. /(?#comment)/
37
+ comment_group = rest_of_string.match(/.*?[^\\](?:\\{2})*\)/)[0]
38
+ @current_position += comment_group.length
39
+ when match[2] =~ /\A(?=[mix-]+)([mix]*)-?([mix]*)/ # e.g. /(?i-mx)/
40
+ regexp_options_toggle(Regexp.last_match(1), Regexp.last_match(2))
41
+ @num_groups -= 1 # Toggle "groups" should not increase backref group count
42
+ @current_position += $&.length + 1
43
+ if next_char == ':' # e.g. /(?i:subexpr)/
44
+ @current_position += 1
45
+ else
46
+ return PlaceHolderGroup.new
47
+ end
48
+ when %w(! =).include?(match[2]) # e.g. /(?=lookahead)/, /(?!neglookahead)/
49
+ fail IllegalSyntaxError,
50
+ 'Lookaheads are not regular; cannot generate examples'
51
+ when %w(! =).include?(match[3]) # e.g. /(?<=lookbehind)/, /(?<!neglookbehind)/
52
+ fail IllegalSyntaxError,
53
+ 'Lookbehinds are not regular; cannot generate examples'
54
+ else # e.g. /(?<name>namedgroup)/
55
+ @current_position += (match[3].length + 3)
56
+ group_id = match[3]
57
+ end
58
+ end
59
+ MultiGroup.new(parse, group_id)
60
+ end
61
+ end
62
+
63
+ def remember_old_regexp_options
64
+ previous_ignorecase = @ignorecase
65
+ previous_multiline = @multiline
66
+ previous_extended = @extended
67
+ group = yield
68
+ @ignorecase = previous_ignorecase
69
+ @multiline = previous_multiline
70
+ @extended = previous_extended
71
+ group
72
+ end
73
+
74
+ def regexp_options_toggle(on, off)
75
+ regexp_option_toggle(on, off, '@ignorecase', 'i')
76
+ regexp_option_toggle(on, off, '@multiline', 'm')
77
+ regexp_option_toggle(on, off, '@extended', 'x')
78
+ end
79
+
80
+ def regexp_option_toggle(on, off, var, char)
81
+ instance_variable_set(var, true) if on.include? char
82
+ instance_variable_set(var, false) if off.include? char
83
+ end
84
+ end
85
+ end
@@ -0,0 +1,51 @@
1
+ module RegexpExamples
2
+ # A collection of related helper methods, utilised by the `Parser` class
3
+ module ParseRepeaterHelper
4
+ protected
5
+
6
+ def parse_star_repeater(group)
7
+ @current_position += 1
8
+ parse_reluctant_or_possessive_repeater
9
+ StarRepeater.new(group)
10
+ end
11
+
12
+ def parse_plus_repeater(group)
13
+ @current_position += 1
14
+ parse_reluctant_or_possessive_repeater
15
+ PlusRepeater.new(group)
16
+ end
17
+
18
+ def parse_reluctant_or_possessive_repeater
19
+ if next_char =~ /[?+]/
20
+ # Don't treat these repeaters any differently when generating examples
21
+ @current_position += 1
22
+ end
23
+ end
24
+
25
+ def parse_question_mark_repeater(group)
26
+ @current_position += 1
27
+ parse_reluctant_or_possessive_repeater
28
+ QuestionMarkRepeater.new(group)
29
+ end
30
+
31
+ def parse_range_repeater(group)
32
+ match = rest_of_string.match(/\A\{(\d+)?(,)?(\d+)?\}/)
33
+ @current_position += match[0].size
34
+ min = match[1].to_i if match[1]
35
+ has_comma = !match[2].nil?
36
+ max = match[3].to_i if match[3]
37
+ repeater = RangeRepeater.new(group, min, has_comma, max)
38
+ parse_reluctant_or_possessive_range_repeater(repeater, min, has_comma, max)
39
+ end
40
+
41
+ def parse_reluctant_or_possessive_range_repeater(repeater, min, has_comma, max)
42
+ # .{1}? should be equivalent to (?:.{1})?, i.e. NOT a "non-greedy quantifier"
43
+ if min && !has_comma && !max && next_char == '?'
44
+ repeater = parse_question_mark_repeater(repeater)
45
+ else
46
+ parse_reluctant_or_possessive_repeater
47
+ end
48
+ repeater
49
+ end
50
+ end
51
+ end