regexp-examples 1.1.3 → 1.1.4
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.rubocop.yml +3 -0
- data/.travis.yml +0 -6
- data/db/unicode_ranges_2.1.pstore +1 -0
- data/db/unicode_ranges_2.3.pstore +0 -0
- data/db/unicode_ranges_2.4.pstore +0 -0
- data/lib/core_extensions/regexp/examples.rb +3 -0
- data/lib/regexp-examples/backreferences.rb +29 -13
- data/lib/regexp-examples/chargroup_parser.rb +15 -17
- data/lib/regexp-examples/constants.rb +10 -6
- data/lib/regexp-examples/groups.rb +11 -22
- data/lib/regexp-examples/helpers.rb +6 -7
- data/lib/regexp-examples/parser.rb +31 -285
- data/lib/regexp-examples/parser_helpers/charset_negation_helper.rb +8 -0
- data/lib/regexp-examples/parser_helpers/parse_after_backslash_group_helper.rb +144 -0
- data/lib/regexp-examples/parser_helpers/parse_group_helper.rb +58 -0
- data/lib/regexp-examples/parser_helpers/parse_multi_group_helper.rb +85 -0
- data/lib/regexp-examples/parser_helpers/parse_repeater_helper.rb +51 -0
- data/lib/regexp-examples/repeaters.rb +21 -7
- data/lib/regexp-examples/unicode_char_ranges.rb +4 -0
- data/lib/regexp-examples/version.rb +2 -1
- data/lib/regexp-examples.rb +1 -1
- data/regexp-examples.gemspec +5 -4
- data/scripts/unicode_lister.rb +15 -11
- data/spec/helpers.rb +18 -0
- data/spec/regexp-examples_spec.rb +7 -15
- data/spec/regexp-random_example_spec.rb +4 -2
- data/spec/spec_helper.rb +10 -0
- metadata +14 -5
- data/db/unicode_ranges_2.1.pstore +0 -0
@@ -1,7 +1,24 @@
|
|
1
|
+
require_relative 'parser_helpers/parse_group_helper'
|
2
|
+
require_relative 'parser_helpers/parse_after_backslash_group_helper'
|
3
|
+
require_relative 'parser_helpers/parse_multi_group_helper'
|
4
|
+
require_relative 'parser_helpers/parse_repeater_helper'
|
5
|
+
require_relative 'parser_helpers/charset_negation_helper'
|
6
|
+
|
7
|
+
# :nodoc:
|
1
8
|
module RegexpExamples
|
2
9
|
IllegalSyntaxError = Class.new(StandardError)
|
10
|
+
# A Regexp parser, used to build a structured collection of objects that represents
|
11
|
+
# the regular expression.
|
12
|
+
# This object can then be used to generate strings that match the regular expression.
|
3
13
|
class Parser
|
14
|
+
include ParseGroupHelper
|
15
|
+
include ParseAfterBackslashGroupHelper
|
16
|
+
include ParseMultiGroupHelper
|
17
|
+
include ParseRepeaterHelper
|
18
|
+
include CharsetNegationHelper
|
19
|
+
|
4
20
|
attr_reader :regexp_string
|
21
|
+
|
5
22
|
def initialize(regexp_string, regexp_options)
|
6
23
|
@regexp_string = regexp_string
|
7
24
|
@ignorecase = !(regexp_options & Regexp::IGNORECASE).zero?
|
@@ -27,310 +44,39 @@ module RegexpExamples
|
|
27
44
|
def parse_group(repeaters)
|
28
45
|
case next_char
|
29
46
|
when '('
|
30
|
-
|
47
|
+
parse_multi_group
|
31
48
|
when '['
|
32
|
-
|
49
|
+
parse_char_group
|
33
50
|
when '.'
|
34
|
-
|
51
|
+
parse_dot_group
|
35
52
|
when '|'
|
36
|
-
|
53
|
+
parse_or_group(repeaters)
|
37
54
|
when '\\'
|
38
|
-
|
55
|
+
parse_after_backslash_group
|
39
56
|
when '^'
|
40
|
-
|
57
|
+
parse_caret
|
41
58
|
when '$'
|
42
|
-
|
59
|
+
parse_dollar
|
43
60
|
when /[#\s]/
|
44
|
-
|
61
|
+
parse_extended_whitespace
|
45
62
|
else
|
46
|
-
|
63
|
+
parse_single_char_group(next_char)
|
47
64
|
end
|
48
|
-
group
|
49
65
|
end
|
50
66
|
|
51
67
|
def parse_repeater(group)
|
52
68
|
case next_char
|
53
69
|
when '*'
|
54
|
-
|
70
|
+
parse_star_repeater(group)
|
55
71
|
when '+'
|
56
|
-
|
72
|
+
parse_plus_repeater(group)
|
57
73
|
when '?'
|
58
|
-
|
74
|
+
parse_question_mark_repeater(group)
|
59
75
|
when '{'
|
60
|
-
|
61
|
-
else
|
62
|
-
repeater = parse_one_time_repeater(group)
|
63
|
-
end
|
64
|
-
repeater
|
65
|
-
end
|
66
|
-
|
67
|
-
def parse_caret
|
68
|
-
if @current_position == 0
|
69
|
-
return PlaceHolderGroup.new # Ignore the "illegal" character
|
70
|
-
else
|
71
|
-
raise_anchors_exception!
|
72
|
-
end
|
73
|
-
end
|
74
|
-
|
75
|
-
def parse_dollar
|
76
|
-
if @current_position == (regexp_string.length - 1)
|
77
|
-
return PlaceHolderGroup.new # Ignore the "illegal" character
|
78
|
-
else
|
79
|
-
raise_anchors_exception!
|
80
|
-
end
|
81
|
-
end
|
82
|
-
|
83
|
-
def parse_extended_whitespace
|
84
|
-
if @extended
|
85
|
-
skip_whitespace
|
86
|
-
group = PlaceHolderGroup.new # Ignore the whitespace/comment
|
87
|
-
else
|
88
|
-
group = parse_single_char_group(next_char)
|
89
|
-
end
|
90
|
-
group
|
91
|
-
end
|
92
|
-
|
93
|
-
def skip_whitespace
|
94
|
-
whitespace_chars = rest_of_string.match(/#.*|\s+/)[0]
|
95
|
-
@current_position += whitespace_chars.length - 1
|
96
|
-
end
|
97
|
-
|
98
|
-
def parse_after_backslash_group
|
99
|
-
@current_position += 1
|
100
|
-
case
|
101
|
-
when rest_of_string =~ /\A(\d{1,3})/
|
102
|
-
@current_position += (Regexp.last_match(1).length - 1) # In case of 10+ backrefs!
|
103
|
-
group = parse_backreference_group(Regexp.last_match(1))
|
104
|
-
when rest_of_string =~ /\Ak['<]([\w-]+)['>]/ # Named capture group
|
105
|
-
@current_position += (Regexp.last_match(1).length + 2)
|
106
|
-
group_id = if Regexp.last_match(1).to_i < 0
|
107
|
-
# RELATIVE group number, e.g. /(a)(b)(c)(d) \k<-2>/
|
108
|
-
@num_groups + Regexp.last_match(1).to_i + 1
|
109
|
-
else
|
110
|
-
Regexp.last_match(1)
|
111
|
-
end
|
112
|
-
group = parse_backreference_group(group_id)
|
113
|
-
when BackslashCharMap.keys.include?(next_char)
|
114
|
-
group = CharGroup.new(
|
115
|
-
BackslashCharMap[next_char].dup,
|
116
|
-
@ignorecase
|
117
|
-
)
|
118
|
-
when rest_of_string =~ /\A(c|C-)(.)/ # Control character
|
119
|
-
@current_position += Regexp.last_match(1).length
|
120
|
-
group = parse_single_char_group(parse_control_character(Regexp.last_match(2)))
|
121
|
-
when rest_of_string =~ /\Ax(\h{1,2})/ # Escape sequence
|
122
|
-
@current_position += Regexp.last_match(1).length
|
123
|
-
group = parse_single_char_group(parse_unicode_sequence(Regexp.last_match(1)))
|
124
|
-
when rest_of_string =~ /\Au(\h{4}|\{\h{1,4}\})/ # Unicode sequence
|
125
|
-
@current_position += Regexp.last_match(1).length
|
126
|
-
sequence = Regexp.last_match(1).match(/\h{1,4}/)[0] # Strip off "{" and "}"
|
127
|
-
group = parse_single_char_group(parse_unicode_sequence(sequence))
|
128
|
-
when rest_of_string =~ /\A(p)\{(\^?)([^}]+)\}/i # Named properties
|
129
|
-
@current_position += (Regexp.last_match(2).length + # 0 or 1, of '^' is present
|
130
|
-
Regexp.last_match(3).length + # Length of the property name
|
131
|
-
2) # Length of opening and closing brackets (always 2)
|
132
|
-
# Beware of double negatives! E.g. /\P{^Space}/
|
133
|
-
is_negative = (Regexp.last_match(1) == 'P') ^ (Regexp.last_match(2) == '^')
|
134
|
-
group = CharGroup.new(
|
135
|
-
if is_negative
|
136
|
-
CharSets::Any.dup - NamedPropertyCharMap[Regexp.last_match(3).downcase]
|
137
|
-
else
|
138
|
-
NamedPropertyCharMap[Regexp.last_match(3).downcase]
|
139
|
-
end,
|
140
|
-
@ignorecase
|
141
|
-
)
|
142
|
-
when next_char == 'K' # Keep (special lookbehind that CAN be supported safely!)
|
143
|
-
group = PlaceHolderGroup.new
|
144
|
-
when next_char == 'R' # Linebreak
|
145
|
-
group = CharGroup.new(
|
146
|
-
["\r\n", "\n", "\v", "\f", "\r"],
|
147
|
-
@ignorecase
|
148
|
-
) # Using "\r\n" as one character is little bit hacky...
|
149
|
-
when next_char == 'g' # Subexpression call
|
150
|
-
fail IllegalSyntaxError,
|
151
|
-
'Subexpression calls (\\g) cannot be supported, as they are not regular'
|
152
|
-
when next_char =~ /[bB]/ # Anchors
|
153
|
-
raise_anchors_exception!
|
154
|
-
when next_char =~ /[AG]/ # Start of string
|
155
|
-
if @current_position == 1
|
156
|
-
group = PlaceHolderGroup.new
|
157
|
-
else
|
158
|
-
raise_anchors_exception!
|
159
|
-
end
|
160
|
-
when next_char =~ /[zZ]/ # End of string
|
161
|
-
if @current_position == (regexp_string.length - 1)
|
162
|
-
# TODO: /\Z/ should be treated as /\n?/
|
163
|
-
group = PlaceHolderGroup.new
|
164
|
-
else
|
165
|
-
raise_anchors_exception!
|
166
|
-
end
|
167
|
-
else
|
168
|
-
group = parse_single_char_group(next_char)
|
169
|
-
end
|
170
|
-
group
|
171
|
-
end
|
172
|
-
|
173
|
-
def parse_multi_group
|
174
|
-
@current_position += 1
|
175
|
-
@num_groups += 1
|
176
|
-
remember_old_regexp_options do
|
177
|
-
group_id = nil # init
|
178
|
-
rest_of_string.match(
|
179
|
-
/
|
180
|
-
\A
|
181
|
-
(\?)? # Is it a "special" group, i.e. starts with a "?"?
|
182
|
-
(
|
183
|
-
: # Non capture group
|
184
|
-
|! # Neglookahead
|
185
|
-
|= # Lookahead
|
186
|
-
|\# # Comment group
|
187
|
-
|< # Lookbehind or named capture
|
188
|
-
(
|
189
|
-
! # Neglookbehind
|
190
|
-
|= # Lookbehind
|
191
|
-
|[^>]+ # Named capture
|
192
|
-
)
|
193
|
-
|[mix]*-?[mix]* # Option toggle
|
194
|
-
)?
|
195
|
-
/x
|
196
|
-
) do |match|
|
197
|
-
case
|
198
|
-
when match[1].nil? # e.g. /(normal)/
|
199
|
-
group_id = @num_groups.to_s
|
200
|
-
when match[2] == ':' # e.g. /(?:nocapture)/
|
201
|
-
@current_position += 2
|
202
|
-
when match[2] == '#' # e.g. /(?#comment)/
|
203
|
-
comment_group = rest_of_string.match(/.*?[^\\](?:\\{2})*\)/)[0]
|
204
|
-
@current_position += comment_group.length
|
205
|
-
when match[2] =~ /\A(?=[mix-]+)([mix]*)-?([mix]*)/ # e.g. /(?i-mx)/
|
206
|
-
regexp_options_toggle(Regexp.last_match(1), Regexp.last_match(2))
|
207
|
-
@num_groups -= 1 # Toggle "groups" should not increase backref group count
|
208
|
-
@current_position += $&.length + 1
|
209
|
-
if next_char == ':' # e.g. /(?i:subexpr)/
|
210
|
-
@current_position += 1
|
211
|
-
else
|
212
|
-
return PlaceHolderGroup.new
|
213
|
-
end
|
214
|
-
when %w(! =).include?(match[2]) # e.g. /(?=lookahead)/, /(?!neglookahead)/
|
215
|
-
fail IllegalSyntaxError,
|
216
|
-
'Lookaheads are not regular; cannot generate examples'
|
217
|
-
when %w(! =).include?(match[3]) # e.g. /(?<=lookbehind)/, /(?<!neglookbehind)/
|
218
|
-
fail IllegalSyntaxError,
|
219
|
-
'Lookbehinds are not regular; cannot generate examples'
|
220
|
-
else # e.g. /(?<name>namedgroup)/
|
221
|
-
@current_position += (match[3].length + 3)
|
222
|
-
group_id = match[3]
|
223
|
-
end
|
224
|
-
end
|
225
|
-
MultiGroup.new(parse, group_id)
|
226
|
-
end
|
227
|
-
end
|
228
|
-
|
229
|
-
def remember_old_regexp_options
|
230
|
-
previous_ignorecase = @ignorecase
|
231
|
-
previous_multiline = @multiline
|
232
|
-
previous_extended = @extended
|
233
|
-
group = yield
|
234
|
-
@ignorecase = previous_ignorecase
|
235
|
-
@multiline = previous_multiline
|
236
|
-
@extended = previous_extended
|
237
|
-
group
|
238
|
-
end
|
239
|
-
|
240
|
-
def regexp_options_toggle(on, off)
|
241
|
-
regexp_option_toggle(on, off, '@ignorecase', 'i')
|
242
|
-
regexp_option_toggle(on, off, '@multiline', 'm')
|
243
|
-
regexp_option_toggle(on, off, '@extended', 'x')
|
244
|
-
end
|
245
|
-
|
246
|
-
def regexp_option_toggle(on, off, var, char)
|
247
|
-
instance_variable_set(var, true) if on.include? char
|
248
|
-
instance_variable_set(var, false) if off.include? char
|
249
|
-
end
|
250
|
-
|
251
|
-
def parse_char_group
|
252
|
-
@current_position += 1 # Skip past opening "["
|
253
|
-
chargroup_parser = ChargroupParser.new(rest_of_string)
|
254
|
-
parsed_chars = chargroup_parser.result
|
255
|
-
@current_position += (chargroup_parser.length - 1) # Step back to closing "]"
|
256
|
-
CharGroup.new(parsed_chars, @ignorecase)
|
257
|
-
end
|
258
|
-
|
259
|
-
def parse_dot_group
|
260
|
-
DotGroup.new(@multiline)
|
261
|
-
end
|
262
|
-
|
263
|
-
def parse_or_group(left_repeaters)
|
264
|
-
@current_position += 1
|
265
|
-
right_repeaters = parse
|
266
|
-
OrGroup.new(left_repeaters, right_repeaters)
|
267
|
-
end
|
268
|
-
|
269
|
-
def parse_single_char_group(char)
|
270
|
-
SingleCharGroup.new(char, @ignorecase)
|
271
|
-
end
|
272
|
-
|
273
|
-
def parse_backreference_group(group_id)
|
274
|
-
BackReferenceGroup.new(group_id)
|
275
|
-
end
|
276
|
-
|
277
|
-
def parse_control_character(char)
|
278
|
-
(char.ord % 32).chr # Black magic!
|
279
|
-
# eval "?\\C-#{char.chr}" # Doesn't work for e.g. char = "?"
|
280
|
-
end
|
281
|
-
|
282
|
-
def parse_unicode_sequence(match)
|
283
|
-
[match.to_i(16)].pack('U')
|
284
|
-
end
|
285
|
-
|
286
|
-
def parse_star_repeater(group)
|
287
|
-
@current_position += 1
|
288
|
-
parse_reluctant_or_possessive_repeater
|
289
|
-
StarRepeater.new(group)
|
290
|
-
end
|
291
|
-
|
292
|
-
def parse_plus_repeater(group)
|
293
|
-
@current_position += 1
|
294
|
-
parse_reluctant_or_possessive_repeater
|
295
|
-
PlusRepeater.new(group)
|
296
|
-
end
|
297
|
-
|
298
|
-
def parse_reluctant_or_possessive_repeater
|
299
|
-
if next_char =~ /[?+]/
|
300
|
-
# Don't treat these repeaters any differently when generating examples
|
301
|
-
@current_position += 1
|
302
|
-
end
|
303
|
-
end
|
304
|
-
|
305
|
-
def parse_question_mark_repeater(group)
|
306
|
-
@current_position += 1
|
307
|
-
parse_reluctant_or_possessive_repeater
|
308
|
-
QuestionMarkRepeater.new(group)
|
309
|
-
end
|
310
|
-
|
311
|
-
def parse_range_repeater(group)
|
312
|
-
match = rest_of_string.match(/\A\{(\d+)?(,)?(\d+)?\}/)
|
313
|
-
@current_position += match[0].size
|
314
|
-
min = match[1].to_i if match[1]
|
315
|
-
has_comma = !match[2].nil?
|
316
|
-
max = match[3].to_i if match[3]
|
317
|
-
repeater = RangeRepeater.new(group, min, has_comma, max)
|
318
|
-
parse_reluctant_or_possessive_range_repeater(repeater, min, has_comma, max)
|
319
|
-
end
|
320
|
-
|
321
|
-
def parse_reluctant_or_possessive_range_repeater(repeater, min, has_comma, max)
|
322
|
-
# .{1}? should be equivalent to (?:.{1})?, i.e. NOT a "non-greedy quantifier"
|
323
|
-
if min && !has_comma && !max && next_char == '?'
|
324
|
-
repeater = parse_question_mark_repeater(repeater)
|
76
|
+
parse_range_repeater(group)
|
325
77
|
else
|
326
|
-
|
78
|
+
parse_one_time_repeater(group)
|
327
79
|
end
|
328
|
-
repeater
|
329
|
-
end
|
330
|
-
|
331
|
-
def raise_anchors_exception!
|
332
|
-
fail IllegalSyntaxError,
|
333
|
-
"Anchors ('#{next_char}') cannot be supported, as they are not regular"
|
334
80
|
end
|
335
81
|
|
336
82
|
def parse_one_time_repeater(group)
|
@@ -0,0 +1,144 @@
|
|
1
|
+
module RegexpExamples
|
2
|
+
# A collection of related helper methods, utilised by the `Parser` class
|
3
|
+
module ParseAfterBackslashGroupHelper
|
4
|
+
protected
|
5
|
+
|
6
|
+
def parse_after_backslash_group
|
7
|
+
@current_position += 1
|
8
|
+
case
|
9
|
+
when rest_of_string =~ /\A(\d{1,3})/
|
10
|
+
parse_regular_backreference_group(Regexp.last_match(1))
|
11
|
+
when rest_of_string =~ /\Ak['<]([\w-]+)['>]/
|
12
|
+
parse_named_backreference_group(Regexp.last_match(1))
|
13
|
+
when BackslashCharMap.keys.include?(next_char)
|
14
|
+
parse_backslash_special_char
|
15
|
+
when rest_of_string =~ /\A(c|C-)(.)/
|
16
|
+
parse_backslash_control_char(Regexp.last_match(1), Regexp.last_match(2))
|
17
|
+
when rest_of_string =~ /\Ax(\h{1,2})/
|
18
|
+
parse_backslash_escape_sequence(Regexp.last_match(1))
|
19
|
+
when rest_of_string =~ /\Au(\h{4}|\{\h{1,4}\})/
|
20
|
+
parse_backslash_unicode_sequence(Regexp.last_match(1))
|
21
|
+
when rest_of_string =~ /\A(p)\{(\^?)([^}]+)\}/i
|
22
|
+
parse_backslash_named_property(
|
23
|
+
Regexp.last_match(1), Regexp.last_match(2), Regexp.last_match(3)
|
24
|
+
)
|
25
|
+
when next_char == 'K' # Keep (special lookbehind that CAN be supported safely!)
|
26
|
+
PlaceHolderGroup.new
|
27
|
+
when next_char == 'R'
|
28
|
+
parse_backslash_linebreak
|
29
|
+
when next_char == 'g'
|
30
|
+
parse_backslash_subexpresion_call
|
31
|
+
when next_char =~ /[bB]/
|
32
|
+
parse_backslash_anchor
|
33
|
+
when next_char =~ /[AG]/
|
34
|
+
parse_backslash_start_of_string
|
35
|
+
when next_char =~ /[zZ]/
|
36
|
+
# TODO: /\Z/ should be treated as /\n?/
|
37
|
+
parse_backslash_end_of_string
|
38
|
+
else
|
39
|
+
parse_single_char_group(next_char)
|
40
|
+
end
|
41
|
+
end
|
42
|
+
|
43
|
+
def parse_regular_backreference_group(group_id)
|
44
|
+
@current_position += (group_id.length - 1) # In case of 10+ backrefs!
|
45
|
+
parse_backreference_group(group_id)
|
46
|
+
end
|
47
|
+
|
48
|
+
def parse_named_backreference_group(group_name)
|
49
|
+
@current_position += (group_name.length + 2)
|
50
|
+
group_id = if group_name.to_i < 0
|
51
|
+
# RELATIVE group number, e.g. /(a)(b)(c)(d) \k<-2>/
|
52
|
+
@num_groups + group_name.to_i + 1
|
53
|
+
else
|
54
|
+
group_name
|
55
|
+
end
|
56
|
+
parse_backreference_group(group_id)
|
57
|
+
end
|
58
|
+
|
59
|
+
def parse_backreference_group(group_id)
|
60
|
+
BackReferenceGroup.new(group_id)
|
61
|
+
end
|
62
|
+
|
63
|
+
def parse_backslash_special_char
|
64
|
+
CharGroup.new(
|
65
|
+
BackslashCharMap[next_char].dup,
|
66
|
+
@ignorecase
|
67
|
+
)
|
68
|
+
end
|
69
|
+
|
70
|
+
def parse_backslash_control_char(control_syntax, control_code)
|
71
|
+
@current_position += control_syntax.length
|
72
|
+
parse_single_char_group(parse_control_character(control_code))
|
73
|
+
end
|
74
|
+
|
75
|
+
def parse_backslash_escape_sequence(escape_sequence)
|
76
|
+
@current_position += escape_sequence.length
|
77
|
+
parse_single_char_group(parse_unicode_sequence(escape_sequence))
|
78
|
+
end
|
79
|
+
|
80
|
+
def parse_control_character(char)
|
81
|
+
(char.ord % 32).chr # Black magic!
|
82
|
+
# eval "?\\C-#{char.chr}" # Doesn't work for e.g. char = "?"
|
83
|
+
end
|
84
|
+
|
85
|
+
def parse_unicode_sequence(match)
|
86
|
+
[match.to_i(16)].pack('U')
|
87
|
+
end
|
88
|
+
|
89
|
+
def parse_backslash_unicode_sequence(full_hex_sequence)
|
90
|
+
@current_position += full_hex_sequence.length
|
91
|
+
sequence = full_hex_sequence.match(/\h{1,4}/)[0] # Strip off "{" and "}"
|
92
|
+
parse_single_char_group(parse_unicode_sequence(sequence))
|
93
|
+
end
|
94
|
+
|
95
|
+
def parse_backslash_named_property(p_negation, caret_negation, property_name)
|
96
|
+
@current_position += (caret_negation.length + # 0 or 1, of '^' is present
|
97
|
+
property_name.length +
|
98
|
+
2) # Length of opening and closing brackets (always 2)
|
99
|
+
# Beware of double negatives! E.g. /\P{^Space}/
|
100
|
+
is_negative = (p_negation == 'P') ^ (caret_negation == '^')
|
101
|
+
CharGroup.new(
|
102
|
+
negate_if(NamedPropertyCharMap[property_name.downcase], is_negative),
|
103
|
+
@ignorecase
|
104
|
+
)
|
105
|
+
end
|
106
|
+
|
107
|
+
def parse_backslash_linebreak
|
108
|
+
CharGroup.new(
|
109
|
+
["\r\n", "\n", "\v", "\f", "\r"],
|
110
|
+
@ignorecase
|
111
|
+
) # Using "\r\n" as one character is little bit hacky...
|
112
|
+
end
|
113
|
+
|
114
|
+
def parse_backslash_subexpresion_call
|
115
|
+
fail IllegalSyntaxError,
|
116
|
+
'Subexpression calls (\\g) cannot be supported, as they are not regular'
|
117
|
+
end
|
118
|
+
|
119
|
+
def parse_backslash_anchor
|
120
|
+
raise_anchors_exception!
|
121
|
+
end
|
122
|
+
|
123
|
+
def parse_backslash_start_of_string
|
124
|
+
if @current_position == 1
|
125
|
+
PlaceHolderGroup.new
|
126
|
+
else
|
127
|
+
raise_anchors_exception!
|
128
|
+
end
|
129
|
+
end
|
130
|
+
|
131
|
+
def parse_backslash_end_of_string
|
132
|
+
if @current_position == (regexp_string.length - 1)
|
133
|
+
PlaceHolderGroup.new
|
134
|
+
else
|
135
|
+
raise_anchors_exception!
|
136
|
+
end
|
137
|
+
end
|
138
|
+
|
139
|
+
def raise_anchors_exception!
|
140
|
+
fail IllegalSyntaxError,
|
141
|
+
"Anchors ('#{next_char}') cannot be supported, as they are not regular"
|
142
|
+
end
|
143
|
+
end
|
144
|
+
end
|
@@ -0,0 +1,58 @@
|
|
1
|
+
module RegexpExamples
|
2
|
+
# A collection of related helper methods, utilised by the `Parser` class
|
3
|
+
module ParseGroupHelper
|
4
|
+
protected
|
5
|
+
|
6
|
+
def parse_caret
|
7
|
+
if @current_position == 0
|
8
|
+
PlaceHolderGroup.new # Ignore the "illegal" character
|
9
|
+
else
|
10
|
+
raise_anchors_exception!
|
11
|
+
end
|
12
|
+
end
|
13
|
+
|
14
|
+
def parse_dollar
|
15
|
+
if @current_position == (regexp_string.length - 1)
|
16
|
+
PlaceHolderGroup.new # Ignore the "illegal" character
|
17
|
+
else
|
18
|
+
raise_anchors_exception!
|
19
|
+
end
|
20
|
+
end
|
21
|
+
|
22
|
+
def parse_extended_whitespace
|
23
|
+
if @extended
|
24
|
+
skip_whitespace
|
25
|
+
PlaceHolderGroup.new # Ignore the whitespace/comment
|
26
|
+
else
|
27
|
+
parse_single_char_group(next_char)
|
28
|
+
end
|
29
|
+
end
|
30
|
+
|
31
|
+
def skip_whitespace
|
32
|
+
whitespace_chars = rest_of_string.match(/#.*|\s+/)[0]
|
33
|
+
@current_position += whitespace_chars.length - 1
|
34
|
+
end
|
35
|
+
|
36
|
+
def parse_single_char_group(char)
|
37
|
+
SingleCharGroup.new(char, @ignorecase)
|
38
|
+
end
|
39
|
+
|
40
|
+
def parse_char_group
|
41
|
+
@current_position += 1 # Skip past opening "["
|
42
|
+
chargroup_parser = ChargroupParser.new(rest_of_string)
|
43
|
+
chargroup_parser.parse
|
44
|
+
@current_position += (chargroup_parser.length - 1) # Step back to closing "]"
|
45
|
+
CharGroup.new(chargroup_parser.result, @ignorecase)
|
46
|
+
end
|
47
|
+
|
48
|
+
def parse_dot_group
|
49
|
+
DotGroup.new(@multiline)
|
50
|
+
end
|
51
|
+
|
52
|
+
def parse_or_group(left_repeaters)
|
53
|
+
@current_position += 1
|
54
|
+
right_repeaters = parse
|
55
|
+
OrGroup.new(left_repeaters, right_repeaters)
|
56
|
+
end
|
57
|
+
end
|
58
|
+
end
|
@@ -0,0 +1,85 @@
|
|
1
|
+
module RegexpExamples
|
2
|
+
# A collection of related helper methods, utilised by the `Parser` class
|
3
|
+
module ParseMultiGroupHelper
|
4
|
+
protected
|
5
|
+
|
6
|
+
def parse_multi_group
|
7
|
+
# TODO: Clean up this ugly mess of a method!
|
8
|
+
@current_position += 1
|
9
|
+
@num_groups += 1
|
10
|
+
remember_old_regexp_options do
|
11
|
+
group_id = nil # init
|
12
|
+
rest_of_string.match(
|
13
|
+
/
|
14
|
+
\A
|
15
|
+
(\?)? # Is it a "special" group, i.e. starts with a "?"?
|
16
|
+
(
|
17
|
+
: # Non capture group
|
18
|
+
|! # Neglookahead
|
19
|
+
|= # Lookahead
|
20
|
+
|\# # Comment group
|
21
|
+
|< # Lookbehind or named capture
|
22
|
+
(
|
23
|
+
! # Neglookbehind
|
24
|
+
|= # Lookbehind
|
25
|
+
|[^>]+ # Named capture
|
26
|
+
)
|
27
|
+
|[mix]*-?[mix]* # Option toggle
|
28
|
+
)?
|
29
|
+
/x
|
30
|
+
) do |match|
|
31
|
+
case
|
32
|
+
when match[1].nil? # e.g. /(normal)/
|
33
|
+
group_id = @num_groups.to_s
|
34
|
+
when match[2] == ':' # e.g. /(?:nocapture)/
|
35
|
+
@current_position += 2
|
36
|
+
when match[2] == '#' # e.g. /(?#comment)/
|
37
|
+
comment_group = rest_of_string.match(/.*?[^\\](?:\\{2})*\)/)[0]
|
38
|
+
@current_position += comment_group.length
|
39
|
+
when match[2] =~ /\A(?=[mix-]+)([mix]*)-?([mix]*)/ # e.g. /(?i-mx)/
|
40
|
+
regexp_options_toggle(Regexp.last_match(1), Regexp.last_match(2))
|
41
|
+
@num_groups -= 1 # Toggle "groups" should not increase backref group count
|
42
|
+
@current_position += $&.length + 1
|
43
|
+
if next_char == ':' # e.g. /(?i:subexpr)/
|
44
|
+
@current_position += 1
|
45
|
+
else
|
46
|
+
return PlaceHolderGroup.new
|
47
|
+
end
|
48
|
+
when %w(! =).include?(match[2]) # e.g. /(?=lookahead)/, /(?!neglookahead)/
|
49
|
+
fail IllegalSyntaxError,
|
50
|
+
'Lookaheads are not regular; cannot generate examples'
|
51
|
+
when %w(! =).include?(match[3]) # e.g. /(?<=lookbehind)/, /(?<!neglookbehind)/
|
52
|
+
fail IllegalSyntaxError,
|
53
|
+
'Lookbehinds are not regular; cannot generate examples'
|
54
|
+
else # e.g. /(?<name>namedgroup)/
|
55
|
+
@current_position += (match[3].length + 3)
|
56
|
+
group_id = match[3]
|
57
|
+
end
|
58
|
+
end
|
59
|
+
MultiGroup.new(parse, group_id)
|
60
|
+
end
|
61
|
+
end
|
62
|
+
|
63
|
+
def remember_old_regexp_options
|
64
|
+
previous_ignorecase = @ignorecase
|
65
|
+
previous_multiline = @multiline
|
66
|
+
previous_extended = @extended
|
67
|
+
group = yield
|
68
|
+
@ignorecase = previous_ignorecase
|
69
|
+
@multiline = previous_multiline
|
70
|
+
@extended = previous_extended
|
71
|
+
group
|
72
|
+
end
|
73
|
+
|
74
|
+
def regexp_options_toggle(on, off)
|
75
|
+
regexp_option_toggle(on, off, '@ignorecase', 'i')
|
76
|
+
regexp_option_toggle(on, off, '@multiline', 'm')
|
77
|
+
regexp_option_toggle(on, off, '@extended', 'x')
|
78
|
+
end
|
79
|
+
|
80
|
+
def regexp_option_toggle(on, off, var, char)
|
81
|
+
instance_variable_set(var, true) if on.include? char
|
82
|
+
instance_variable_set(var, false) if off.include? char
|
83
|
+
end
|
84
|
+
end
|
85
|
+
end
|
@@ -0,0 +1,51 @@
|
|
1
|
+
module RegexpExamples
|
2
|
+
# A collection of related helper methods, utilised by the `Parser` class
|
3
|
+
module ParseRepeaterHelper
|
4
|
+
protected
|
5
|
+
|
6
|
+
def parse_star_repeater(group)
|
7
|
+
@current_position += 1
|
8
|
+
parse_reluctant_or_possessive_repeater
|
9
|
+
StarRepeater.new(group)
|
10
|
+
end
|
11
|
+
|
12
|
+
def parse_plus_repeater(group)
|
13
|
+
@current_position += 1
|
14
|
+
parse_reluctant_or_possessive_repeater
|
15
|
+
PlusRepeater.new(group)
|
16
|
+
end
|
17
|
+
|
18
|
+
def parse_reluctant_or_possessive_repeater
|
19
|
+
if next_char =~ /[?+]/
|
20
|
+
# Don't treat these repeaters any differently when generating examples
|
21
|
+
@current_position += 1
|
22
|
+
end
|
23
|
+
end
|
24
|
+
|
25
|
+
def parse_question_mark_repeater(group)
|
26
|
+
@current_position += 1
|
27
|
+
parse_reluctant_or_possessive_repeater
|
28
|
+
QuestionMarkRepeater.new(group)
|
29
|
+
end
|
30
|
+
|
31
|
+
def parse_range_repeater(group)
|
32
|
+
match = rest_of_string.match(/\A\{(\d+)?(,)?(\d+)?\}/)
|
33
|
+
@current_position += match[0].size
|
34
|
+
min = match[1].to_i if match[1]
|
35
|
+
has_comma = !match[2].nil?
|
36
|
+
max = match[3].to_i if match[3]
|
37
|
+
repeater = RangeRepeater.new(group, min, has_comma, max)
|
38
|
+
parse_reluctant_or_possessive_range_repeater(repeater, min, has_comma, max)
|
39
|
+
end
|
40
|
+
|
41
|
+
def parse_reluctant_or_possessive_range_repeater(repeater, min, has_comma, max)
|
42
|
+
# .{1}? should be equivalent to (?:.{1})?, i.e. NOT a "non-greedy quantifier"
|
43
|
+
if min && !has_comma && !max && next_char == '?'
|
44
|
+
repeater = parse_question_mark_repeater(repeater)
|
45
|
+
else
|
46
|
+
parse_reluctant_or_possessive_repeater
|
47
|
+
end
|
48
|
+
repeater
|
49
|
+
end
|
50
|
+
end
|
51
|
+
end
|