regexp-examples 1.1.3 → 1.1.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.rubocop.yml +3 -0
- data/.travis.yml +0 -6
- data/db/unicode_ranges_2.1.pstore +1 -0
- data/db/unicode_ranges_2.3.pstore +0 -0
- data/db/unicode_ranges_2.4.pstore +0 -0
- data/lib/core_extensions/regexp/examples.rb +3 -0
- data/lib/regexp-examples/backreferences.rb +29 -13
- data/lib/regexp-examples/chargroup_parser.rb +15 -17
- data/lib/regexp-examples/constants.rb +10 -6
- data/lib/regexp-examples/groups.rb +11 -22
- data/lib/regexp-examples/helpers.rb +6 -7
- data/lib/regexp-examples/parser.rb +31 -285
- data/lib/regexp-examples/parser_helpers/charset_negation_helper.rb +8 -0
- data/lib/regexp-examples/parser_helpers/parse_after_backslash_group_helper.rb +144 -0
- data/lib/regexp-examples/parser_helpers/parse_group_helper.rb +58 -0
- data/lib/regexp-examples/parser_helpers/parse_multi_group_helper.rb +85 -0
- data/lib/regexp-examples/parser_helpers/parse_repeater_helper.rb +51 -0
- data/lib/regexp-examples/repeaters.rb +21 -7
- data/lib/regexp-examples/unicode_char_ranges.rb +4 -0
- data/lib/regexp-examples/version.rb +2 -1
- data/lib/regexp-examples.rb +1 -1
- data/regexp-examples.gemspec +5 -4
- data/scripts/unicode_lister.rb +15 -11
- data/spec/helpers.rb +18 -0
- data/spec/regexp-examples_spec.rb +7 -15
- data/spec/regexp-random_example_spec.rb +4 -2
- data/spec/spec_helper.rb +10 -0
- metadata +14 -5
- data/db/unicode_ranges_2.1.pstore +0 -0
@@ -1,7 +1,24 @@
|
|
1
|
+
require_relative 'parser_helpers/parse_group_helper'
|
2
|
+
require_relative 'parser_helpers/parse_after_backslash_group_helper'
|
3
|
+
require_relative 'parser_helpers/parse_multi_group_helper'
|
4
|
+
require_relative 'parser_helpers/parse_repeater_helper'
|
5
|
+
require_relative 'parser_helpers/charset_negation_helper'
|
6
|
+
|
7
|
+
# :nodoc:
|
1
8
|
module RegexpExamples
|
2
9
|
IllegalSyntaxError = Class.new(StandardError)
|
10
|
+
# A Regexp parser, used to build a structured collection of objects that represents
|
11
|
+
# the regular expression.
|
12
|
+
# This object can then be used to generate strings that match the regular expression.
|
3
13
|
class Parser
|
14
|
+
include ParseGroupHelper
|
15
|
+
include ParseAfterBackslashGroupHelper
|
16
|
+
include ParseMultiGroupHelper
|
17
|
+
include ParseRepeaterHelper
|
18
|
+
include CharsetNegationHelper
|
19
|
+
|
4
20
|
attr_reader :regexp_string
|
21
|
+
|
5
22
|
def initialize(regexp_string, regexp_options)
|
6
23
|
@regexp_string = regexp_string
|
7
24
|
@ignorecase = !(regexp_options & Regexp::IGNORECASE).zero?
|
@@ -27,310 +44,39 @@ module RegexpExamples
|
|
27
44
|
def parse_group(repeaters)
|
28
45
|
case next_char
|
29
46
|
when '('
|
30
|
-
|
47
|
+
parse_multi_group
|
31
48
|
when '['
|
32
|
-
|
49
|
+
parse_char_group
|
33
50
|
when '.'
|
34
|
-
|
51
|
+
parse_dot_group
|
35
52
|
when '|'
|
36
|
-
|
53
|
+
parse_or_group(repeaters)
|
37
54
|
when '\\'
|
38
|
-
|
55
|
+
parse_after_backslash_group
|
39
56
|
when '^'
|
40
|
-
|
57
|
+
parse_caret
|
41
58
|
when '$'
|
42
|
-
|
59
|
+
parse_dollar
|
43
60
|
when /[#\s]/
|
44
|
-
|
61
|
+
parse_extended_whitespace
|
45
62
|
else
|
46
|
-
|
63
|
+
parse_single_char_group(next_char)
|
47
64
|
end
|
48
|
-
group
|
49
65
|
end
|
50
66
|
|
51
67
|
def parse_repeater(group)
|
52
68
|
case next_char
|
53
69
|
when '*'
|
54
|
-
|
70
|
+
parse_star_repeater(group)
|
55
71
|
when '+'
|
56
|
-
|
72
|
+
parse_plus_repeater(group)
|
57
73
|
when '?'
|
58
|
-
|
74
|
+
parse_question_mark_repeater(group)
|
59
75
|
when '{'
|
60
|
-
|
61
|
-
else
|
62
|
-
repeater = parse_one_time_repeater(group)
|
63
|
-
end
|
64
|
-
repeater
|
65
|
-
end
|
66
|
-
|
67
|
-
def parse_caret
|
68
|
-
if @current_position == 0
|
69
|
-
return PlaceHolderGroup.new # Ignore the "illegal" character
|
70
|
-
else
|
71
|
-
raise_anchors_exception!
|
72
|
-
end
|
73
|
-
end
|
74
|
-
|
75
|
-
def parse_dollar
|
76
|
-
if @current_position == (regexp_string.length - 1)
|
77
|
-
return PlaceHolderGroup.new # Ignore the "illegal" character
|
78
|
-
else
|
79
|
-
raise_anchors_exception!
|
80
|
-
end
|
81
|
-
end
|
82
|
-
|
83
|
-
def parse_extended_whitespace
|
84
|
-
if @extended
|
85
|
-
skip_whitespace
|
86
|
-
group = PlaceHolderGroup.new # Ignore the whitespace/comment
|
87
|
-
else
|
88
|
-
group = parse_single_char_group(next_char)
|
89
|
-
end
|
90
|
-
group
|
91
|
-
end
|
92
|
-
|
93
|
-
def skip_whitespace
|
94
|
-
whitespace_chars = rest_of_string.match(/#.*|\s+/)[0]
|
95
|
-
@current_position += whitespace_chars.length - 1
|
96
|
-
end
|
97
|
-
|
98
|
-
def parse_after_backslash_group
|
99
|
-
@current_position += 1
|
100
|
-
case
|
101
|
-
when rest_of_string =~ /\A(\d{1,3})/
|
102
|
-
@current_position += (Regexp.last_match(1).length - 1) # In case of 10+ backrefs!
|
103
|
-
group = parse_backreference_group(Regexp.last_match(1))
|
104
|
-
when rest_of_string =~ /\Ak['<]([\w-]+)['>]/ # Named capture group
|
105
|
-
@current_position += (Regexp.last_match(1).length + 2)
|
106
|
-
group_id = if Regexp.last_match(1).to_i < 0
|
107
|
-
# RELATIVE group number, e.g. /(a)(b)(c)(d) \k<-2>/
|
108
|
-
@num_groups + Regexp.last_match(1).to_i + 1
|
109
|
-
else
|
110
|
-
Regexp.last_match(1)
|
111
|
-
end
|
112
|
-
group = parse_backreference_group(group_id)
|
113
|
-
when BackslashCharMap.keys.include?(next_char)
|
114
|
-
group = CharGroup.new(
|
115
|
-
BackslashCharMap[next_char].dup,
|
116
|
-
@ignorecase
|
117
|
-
)
|
118
|
-
when rest_of_string =~ /\A(c|C-)(.)/ # Control character
|
119
|
-
@current_position += Regexp.last_match(1).length
|
120
|
-
group = parse_single_char_group(parse_control_character(Regexp.last_match(2)))
|
121
|
-
when rest_of_string =~ /\Ax(\h{1,2})/ # Escape sequence
|
122
|
-
@current_position += Regexp.last_match(1).length
|
123
|
-
group = parse_single_char_group(parse_unicode_sequence(Regexp.last_match(1)))
|
124
|
-
when rest_of_string =~ /\Au(\h{4}|\{\h{1,4}\})/ # Unicode sequence
|
125
|
-
@current_position += Regexp.last_match(1).length
|
126
|
-
sequence = Regexp.last_match(1).match(/\h{1,4}/)[0] # Strip off "{" and "}"
|
127
|
-
group = parse_single_char_group(parse_unicode_sequence(sequence))
|
128
|
-
when rest_of_string =~ /\A(p)\{(\^?)([^}]+)\}/i # Named properties
|
129
|
-
@current_position += (Regexp.last_match(2).length + # 0 or 1, of '^' is present
|
130
|
-
Regexp.last_match(3).length + # Length of the property name
|
131
|
-
2) # Length of opening and closing brackets (always 2)
|
132
|
-
# Beware of double negatives! E.g. /\P{^Space}/
|
133
|
-
is_negative = (Regexp.last_match(1) == 'P') ^ (Regexp.last_match(2) == '^')
|
134
|
-
group = CharGroup.new(
|
135
|
-
if is_negative
|
136
|
-
CharSets::Any.dup - NamedPropertyCharMap[Regexp.last_match(3).downcase]
|
137
|
-
else
|
138
|
-
NamedPropertyCharMap[Regexp.last_match(3).downcase]
|
139
|
-
end,
|
140
|
-
@ignorecase
|
141
|
-
)
|
142
|
-
when next_char == 'K' # Keep (special lookbehind that CAN be supported safely!)
|
143
|
-
group = PlaceHolderGroup.new
|
144
|
-
when next_char == 'R' # Linebreak
|
145
|
-
group = CharGroup.new(
|
146
|
-
["\r\n", "\n", "\v", "\f", "\r"],
|
147
|
-
@ignorecase
|
148
|
-
) # Using "\r\n" as one character is little bit hacky...
|
149
|
-
when next_char == 'g' # Subexpression call
|
150
|
-
fail IllegalSyntaxError,
|
151
|
-
'Subexpression calls (\\g) cannot be supported, as they are not regular'
|
152
|
-
when next_char =~ /[bB]/ # Anchors
|
153
|
-
raise_anchors_exception!
|
154
|
-
when next_char =~ /[AG]/ # Start of string
|
155
|
-
if @current_position == 1
|
156
|
-
group = PlaceHolderGroup.new
|
157
|
-
else
|
158
|
-
raise_anchors_exception!
|
159
|
-
end
|
160
|
-
when next_char =~ /[zZ]/ # End of string
|
161
|
-
if @current_position == (regexp_string.length - 1)
|
162
|
-
# TODO: /\Z/ should be treated as /\n?/
|
163
|
-
group = PlaceHolderGroup.new
|
164
|
-
else
|
165
|
-
raise_anchors_exception!
|
166
|
-
end
|
167
|
-
else
|
168
|
-
group = parse_single_char_group(next_char)
|
169
|
-
end
|
170
|
-
group
|
171
|
-
end
|
172
|
-
|
173
|
-
def parse_multi_group
|
174
|
-
@current_position += 1
|
175
|
-
@num_groups += 1
|
176
|
-
remember_old_regexp_options do
|
177
|
-
group_id = nil # init
|
178
|
-
rest_of_string.match(
|
179
|
-
/
|
180
|
-
\A
|
181
|
-
(\?)? # Is it a "special" group, i.e. starts with a "?"?
|
182
|
-
(
|
183
|
-
: # Non capture group
|
184
|
-
|! # Neglookahead
|
185
|
-
|= # Lookahead
|
186
|
-
|\# # Comment group
|
187
|
-
|< # Lookbehind or named capture
|
188
|
-
(
|
189
|
-
! # Neglookbehind
|
190
|
-
|= # Lookbehind
|
191
|
-
|[^>]+ # Named capture
|
192
|
-
)
|
193
|
-
|[mix]*-?[mix]* # Option toggle
|
194
|
-
)?
|
195
|
-
/x
|
196
|
-
) do |match|
|
197
|
-
case
|
198
|
-
when match[1].nil? # e.g. /(normal)/
|
199
|
-
group_id = @num_groups.to_s
|
200
|
-
when match[2] == ':' # e.g. /(?:nocapture)/
|
201
|
-
@current_position += 2
|
202
|
-
when match[2] == '#' # e.g. /(?#comment)/
|
203
|
-
comment_group = rest_of_string.match(/.*?[^\\](?:\\{2})*\)/)[0]
|
204
|
-
@current_position += comment_group.length
|
205
|
-
when match[2] =~ /\A(?=[mix-]+)([mix]*)-?([mix]*)/ # e.g. /(?i-mx)/
|
206
|
-
regexp_options_toggle(Regexp.last_match(1), Regexp.last_match(2))
|
207
|
-
@num_groups -= 1 # Toggle "groups" should not increase backref group count
|
208
|
-
@current_position += $&.length + 1
|
209
|
-
if next_char == ':' # e.g. /(?i:subexpr)/
|
210
|
-
@current_position += 1
|
211
|
-
else
|
212
|
-
return PlaceHolderGroup.new
|
213
|
-
end
|
214
|
-
when %w(! =).include?(match[2]) # e.g. /(?=lookahead)/, /(?!neglookahead)/
|
215
|
-
fail IllegalSyntaxError,
|
216
|
-
'Lookaheads are not regular; cannot generate examples'
|
217
|
-
when %w(! =).include?(match[3]) # e.g. /(?<=lookbehind)/, /(?<!neglookbehind)/
|
218
|
-
fail IllegalSyntaxError,
|
219
|
-
'Lookbehinds are not regular; cannot generate examples'
|
220
|
-
else # e.g. /(?<name>namedgroup)/
|
221
|
-
@current_position += (match[3].length + 3)
|
222
|
-
group_id = match[3]
|
223
|
-
end
|
224
|
-
end
|
225
|
-
MultiGroup.new(parse, group_id)
|
226
|
-
end
|
227
|
-
end
|
228
|
-
|
229
|
-
def remember_old_regexp_options
|
230
|
-
previous_ignorecase = @ignorecase
|
231
|
-
previous_multiline = @multiline
|
232
|
-
previous_extended = @extended
|
233
|
-
group = yield
|
234
|
-
@ignorecase = previous_ignorecase
|
235
|
-
@multiline = previous_multiline
|
236
|
-
@extended = previous_extended
|
237
|
-
group
|
238
|
-
end
|
239
|
-
|
240
|
-
def regexp_options_toggle(on, off)
|
241
|
-
regexp_option_toggle(on, off, '@ignorecase', 'i')
|
242
|
-
regexp_option_toggle(on, off, '@multiline', 'm')
|
243
|
-
regexp_option_toggle(on, off, '@extended', 'x')
|
244
|
-
end
|
245
|
-
|
246
|
-
def regexp_option_toggle(on, off, var, char)
|
247
|
-
instance_variable_set(var, true) if on.include? char
|
248
|
-
instance_variable_set(var, false) if off.include? char
|
249
|
-
end
|
250
|
-
|
251
|
-
def parse_char_group
|
252
|
-
@current_position += 1 # Skip past opening "["
|
253
|
-
chargroup_parser = ChargroupParser.new(rest_of_string)
|
254
|
-
parsed_chars = chargroup_parser.result
|
255
|
-
@current_position += (chargroup_parser.length - 1) # Step back to closing "]"
|
256
|
-
CharGroup.new(parsed_chars, @ignorecase)
|
257
|
-
end
|
258
|
-
|
259
|
-
def parse_dot_group
|
260
|
-
DotGroup.new(@multiline)
|
261
|
-
end
|
262
|
-
|
263
|
-
def parse_or_group(left_repeaters)
|
264
|
-
@current_position += 1
|
265
|
-
right_repeaters = parse
|
266
|
-
OrGroup.new(left_repeaters, right_repeaters)
|
267
|
-
end
|
268
|
-
|
269
|
-
def parse_single_char_group(char)
|
270
|
-
SingleCharGroup.new(char, @ignorecase)
|
271
|
-
end
|
272
|
-
|
273
|
-
def parse_backreference_group(group_id)
|
274
|
-
BackReferenceGroup.new(group_id)
|
275
|
-
end
|
276
|
-
|
277
|
-
def parse_control_character(char)
|
278
|
-
(char.ord % 32).chr # Black magic!
|
279
|
-
# eval "?\\C-#{char.chr}" # Doesn't work for e.g. char = "?"
|
280
|
-
end
|
281
|
-
|
282
|
-
def parse_unicode_sequence(match)
|
283
|
-
[match.to_i(16)].pack('U')
|
284
|
-
end
|
285
|
-
|
286
|
-
def parse_star_repeater(group)
|
287
|
-
@current_position += 1
|
288
|
-
parse_reluctant_or_possessive_repeater
|
289
|
-
StarRepeater.new(group)
|
290
|
-
end
|
291
|
-
|
292
|
-
def parse_plus_repeater(group)
|
293
|
-
@current_position += 1
|
294
|
-
parse_reluctant_or_possessive_repeater
|
295
|
-
PlusRepeater.new(group)
|
296
|
-
end
|
297
|
-
|
298
|
-
def parse_reluctant_or_possessive_repeater
|
299
|
-
if next_char =~ /[?+]/
|
300
|
-
# Don't treat these repeaters any differently when generating examples
|
301
|
-
@current_position += 1
|
302
|
-
end
|
303
|
-
end
|
304
|
-
|
305
|
-
def parse_question_mark_repeater(group)
|
306
|
-
@current_position += 1
|
307
|
-
parse_reluctant_or_possessive_repeater
|
308
|
-
QuestionMarkRepeater.new(group)
|
309
|
-
end
|
310
|
-
|
311
|
-
def parse_range_repeater(group)
|
312
|
-
match = rest_of_string.match(/\A\{(\d+)?(,)?(\d+)?\}/)
|
313
|
-
@current_position += match[0].size
|
314
|
-
min = match[1].to_i if match[1]
|
315
|
-
has_comma = !match[2].nil?
|
316
|
-
max = match[3].to_i if match[3]
|
317
|
-
repeater = RangeRepeater.new(group, min, has_comma, max)
|
318
|
-
parse_reluctant_or_possessive_range_repeater(repeater, min, has_comma, max)
|
319
|
-
end
|
320
|
-
|
321
|
-
def parse_reluctant_or_possessive_range_repeater(repeater, min, has_comma, max)
|
322
|
-
# .{1}? should be equivalent to (?:.{1})?, i.e. NOT a "non-greedy quantifier"
|
323
|
-
if min && !has_comma && !max && next_char == '?'
|
324
|
-
repeater = parse_question_mark_repeater(repeater)
|
76
|
+
parse_range_repeater(group)
|
325
77
|
else
|
326
|
-
|
78
|
+
parse_one_time_repeater(group)
|
327
79
|
end
|
328
|
-
repeater
|
329
|
-
end
|
330
|
-
|
331
|
-
def raise_anchors_exception!
|
332
|
-
fail IllegalSyntaxError,
|
333
|
-
"Anchors ('#{next_char}') cannot be supported, as they are not regular"
|
334
80
|
end
|
335
81
|
|
336
82
|
def parse_one_time_repeater(group)
|
@@ -0,0 +1,144 @@
|
|
1
|
+
module RegexpExamples
|
2
|
+
# A collection of related helper methods, utilised by the `Parser` class
|
3
|
+
module ParseAfterBackslashGroupHelper
|
4
|
+
protected
|
5
|
+
|
6
|
+
def parse_after_backslash_group
|
7
|
+
@current_position += 1
|
8
|
+
case
|
9
|
+
when rest_of_string =~ /\A(\d{1,3})/
|
10
|
+
parse_regular_backreference_group(Regexp.last_match(1))
|
11
|
+
when rest_of_string =~ /\Ak['<]([\w-]+)['>]/
|
12
|
+
parse_named_backreference_group(Regexp.last_match(1))
|
13
|
+
when BackslashCharMap.keys.include?(next_char)
|
14
|
+
parse_backslash_special_char
|
15
|
+
when rest_of_string =~ /\A(c|C-)(.)/
|
16
|
+
parse_backslash_control_char(Regexp.last_match(1), Regexp.last_match(2))
|
17
|
+
when rest_of_string =~ /\Ax(\h{1,2})/
|
18
|
+
parse_backslash_escape_sequence(Regexp.last_match(1))
|
19
|
+
when rest_of_string =~ /\Au(\h{4}|\{\h{1,4}\})/
|
20
|
+
parse_backslash_unicode_sequence(Regexp.last_match(1))
|
21
|
+
when rest_of_string =~ /\A(p)\{(\^?)([^}]+)\}/i
|
22
|
+
parse_backslash_named_property(
|
23
|
+
Regexp.last_match(1), Regexp.last_match(2), Regexp.last_match(3)
|
24
|
+
)
|
25
|
+
when next_char == 'K' # Keep (special lookbehind that CAN be supported safely!)
|
26
|
+
PlaceHolderGroup.new
|
27
|
+
when next_char == 'R'
|
28
|
+
parse_backslash_linebreak
|
29
|
+
when next_char == 'g'
|
30
|
+
parse_backslash_subexpresion_call
|
31
|
+
when next_char =~ /[bB]/
|
32
|
+
parse_backslash_anchor
|
33
|
+
when next_char =~ /[AG]/
|
34
|
+
parse_backslash_start_of_string
|
35
|
+
when next_char =~ /[zZ]/
|
36
|
+
# TODO: /\Z/ should be treated as /\n?/
|
37
|
+
parse_backslash_end_of_string
|
38
|
+
else
|
39
|
+
parse_single_char_group(next_char)
|
40
|
+
end
|
41
|
+
end
|
42
|
+
|
43
|
+
def parse_regular_backreference_group(group_id)
|
44
|
+
@current_position += (group_id.length - 1) # In case of 10+ backrefs!
|
45
|
+
parse_backreference_group(group_id)
|
46
|
+
end
|
47
|
+
|
48
|
+
def parse_named_backreference_group(group_name)
|
49
|
+
@current_position += (group_name.length + 2)
|
50
|
+
group_id = if group_name.to_i < 0
|
51
|
+
# RELATIVE group number, e.g. /(a)(b)(c)(d) \k<-2>/
|
52
|
+
@num_groups + group_name.to_i + 1
|
53
|
+
else
|
54
|
+
group_name
|
55
|
+
end
|
56
|
+
parse_backreference_group(group_id)
|
57
|
+
end
|
58
|
+
|
59
|
+
def parse_backreference_group(group_id)
|
60
|
+
BackReferenceGroup.new(group_id)
|
61
|
+
end
|
62
|
+
|
63
|
+
def parse_backslash_special_char
|
64
|
+
CharGroup.new(
|
65
|
+
BackslashCharMap[next_char].dup,
|
66
|
+
@ignorecase
|
67
|
+
)
|
68
|
+
end
|
69
|
+
|
70
|
+
def parse_backslash_control_char(control_syntax, control_code)
|
71
|
+
@current_position += control_syntax.length
|
72
|
+
parse_single_char_group(parse_control_character(control_code))
|
73
|
+
end
|
74
|
+
|
75
|
+
def parse_backslash_escape_sequence(escape_sequence)
|
76
|
+
@current_position += escape_sequence.length
|
77
|
+
parse_single_char_group(parse_unicode_sequence(escape_sequence))
|
78
|
+
end
|
79
|
+
|
80
|
+
def parse_control_character(char)
|
81
|
+
(char.ord % 32).chr # Black magic!
|
82
|
+
# eval "?\\C-#{char.chr}" # Doesn't work for e.g. char = "?"
|
83
|
+
end
|
84
|
+
|
85
|
+
def parse_unicode_sequence(match)
|
86
|
+
[match.to_i(16)].pack('U')
|
87
|
+
end
|
88
|
+
|
89
|
+
def parse_backslash_unicode_sequence(full_hex_sequence)
|
90
|
+
@current_position += full_hex_sequence.length
|
91
|
+
sequence = full_hex_sequence.match(/\h{1,4}/)[0] # Strip off "{" and "}"
|
92
|
+
parse_single_char_group(parse_unicode_sequence(sequence))
|
93
|
+
end
|
94
|
+
|
95
|
+
def parse_backslash_named_property(p_negation, caret_negation, property_name)
|
96
|
+
@current_position += (caret_negation.length + # 0 or 1, of '^' is present
|
97
|
+
property_name.length +
|
98
|
+
2) # Length of opening and closing brackets (always 2)
|
99
|
+
# Beware of double negatives! E.g. /\P{^Space}/
|
100
|
+
is_negative = (p_negation == 'P') ^ (caret_negation == '^')
|
101
|
+
CharGroup.new(
|
102
|
+
negate_if(NamedPropertyCharMap[property_name.downcase], is_negative),
|
103
|
+
@ignorecase
|
104
|
+
)
|
105
|
+
end
|
106
|
+
|
107
|
+
def parse_backslash_linebreak
|
108
|
+
CharGroup.new(
|
109
|
+
["\r\n", "\n", "\v", "\f", "\r"],
|
110
|
+
@ignorecase
|
111
|
+
) # Using "\r\n" as one character is little bit hacky...
|
112
|
+
end
|
113
|
+
|
114
|
+
def parse_backslash_subexpresion_call
|
115
|
+
fail IllegalSyntaxError,
|
116
|
+
'Subexpression calls (\\g) cannot be supported, as they are not regular'
|
117
|
+
end
|
118
|
+
|
119
|
+
def parse_backslash_anchor
|
120
|
+
raise_anchors_exception!
|
121
|
+
end
|
122
|
+
|
123
|
+
def parse_backslash_start_of_string
|
124
|
+
if @current_position == 1
|
125
|
+
PlaceHolderGroup.new
|
126
|
+
else
|
127
|
+
raise_anchors_exception!
|
128
|
+
end
|
129
|
+
end
|
130
|
+
|
131
|
+
def parse_backslash_end_of_string
|
132
|
+
if @current_position == (regexp_string.length - 1)
|
133
|
+
PlaceHolderGroup.new
|
134
|
+
else
|
135
|
+
raise_anchors_exception!
|
136
|
+
end
|
137
|
+
end
|
138
|
+
|
139
|
+
def raise_anchors_exception!
|
140
|
+
fail IllegalSyntaxError,
|
141
|
+
"Anchors ('#{next_char}') cannot be supported, as they are not regular"
|
142
|
+
end
|
143
|
+
end
|
144
|
+
end
|
@@ -0,0 +1,58 @@
|
|
1
|
+
module RegexpExamples
|
2
|
+
# A collection of related helper methods, utilised by the `Parser` class
|
3
|
+
module ParseGroupHelper
|
4
|
+
protected
|
5
|
+
|
6
|
+
def parse_caret
|
7
|
+
if @current_position == 0
|
8
|
+
PlaceHolderGroup.new # Ignore the "illegal" character
|
9
|
+
else
|
10
|
+
raise_anchors_exception!
|
11
|
+
end
|
12
|
+
end
|
13
|
+
|
14
|
+
def parse_dollar
|
15
|
+
if @current_position == (regexp_string.length - 1)
|
16
|
+
PlaceHolderGroup.new # Ignore the "illegal" character
|
17
|
+
else
|
18
|
+
raise_anchors_exception!
|
19
|
+
end
|
20
|
+
end
|
21
|
+
|
22
|
+
def parse_extended_whitespace
|
23
|
+
if @extended
|
24
|
+
skip_whitespace
|
25
|
+
PlaceHolderGroup.new # Ignore the whitespace/comment
|
26
|
+
else
|
27
|
+
parse_single_char_group(next_char)
|
28
|
+
end
|
29
|
+
end
|
30
|
+
|
31
|
+
def skip_whitespace
|
32
|
+
whitespace_chars = rest_of_string.match(/#.*|\s+/)[0]
|
33
|
+
@current_position += whitespace_chars.length - 1
|
34
|
+
end
|
35
|
+
|
36
|
+
def parse_single_char_group(char)
|
37
|
+
SingleCharGroup.new(char, @ignorecase)
|
38
|
+
end
|
39
|
+
|
40
|
+
def parse_char_group
|
41
|
+
@current_position += 1 # Skip past opening "["
|
42
|
+
chargroup_parser = ChargroupParser.new(rest_of_string)
|
43
|
+
chargroup_parser.parse
|
44
|
+
@current_position += (chargroup_parser.length - 1) # Step back to closing "]"
|
45
|
+
CharGroup.new(chargroup_parser.result, @ignorecase)
|
46
|
+
end
|
47
|
+
|
48
|
+
def parse_dot_group
|
49
|
+
DotGroup.new(@multiline)
|
50
|
+
end
|
51
|
+
|
52
|
+
def parse_or_group(left_repeaters)
|
53
|
+
@current_position += 1
|
54
|
+
right_repeaters = parse
|
55
|
+
OrGroup.new(left_repeaters, right_repeaters)
|
56
|
+
end
|
57
|
+
end
|
58
|
+
end
|
@@ -0,0 +1,85 @@
|
|
1
|
+
module RegexpExamples
|
2
|
+
# A collection of related helper methods, utilised by the `Parser` class
|
3
|
+
module ParseMultiGroupHelper
|
4
|
+
protected
|
5
|
+
|
6
|
+
def parse_multi_group
|
7
|
+
# TODO: Clean up this ugly mess of a method!
|
8
|
+
@current_position += 1
|
9
|
+
@num_groups += 1
|
10
|
+
remember_old_regexp_options do
|
11
|
+
group_id = nil # init
|
12
|
+
rest_of_string.match(
|
13
|
+
/
|
14
|
+
\A
|
15
|
+
(\?)? # Is it a "special" group, i.e. starts with a "?"?
|
16
|
+
(
|
17
|
+
: # Non capture group
|
18
|
+
|! # Neglookahead
|
19
|
+
|= # Lookahead
|
20
|
+
|\# # Comment group
|
21
|
+
|< # Lookbehind or named capture
|
22
|
+
(
|
23
|
+
! # Neglookbehind
|
24
|
+
|= # Lookbehind
|
25
|
+
|[^>]+ # Named capture
|
26
|
+
)
|
27
|
+
|[mix]*-?[mix]* # Option toggle
|
28
|
+
)?
|
29
|
+
/x
|
30
|
+
) do |match|
|
31
|
+
case
|
32
|
+
when match[1].nil? # e.g. /(normal)/
|
33
|
+
group_id = @num_groups.to_s
|
34
|
+
when match[2] == ':' # e.g. /(?:nocapture)/
|
35
|
+
@current_position += 2
|
36
|
+
when match[2] == '#' # e.g. /(?#comment)/
|
37
|
+
comment_group = rest_of_string.match(/.*?[^\\](?:\\{2})*\)/)[0]
|
38
|
+
@current_position += comment_group.length
|
39
|
+
when match[2] =~ /\A(?=[mix-]+)([mix]*)-?([mix]*)/ # e.g. /(?i-mx)/
|
40
|
+
regexp_options_toggle(Regexp.last_match(1), Regexp.last_match(2))
|
41
|
+
@num_groups -= 1 # Toggle "groups" should not increase backref group count
|
42
|
+
@current_position += $&.length + 1
|
43
|
+
if next_char == ':' # e.g. /(?i:subexpr)/
|
44
|
+
@current_position += 1
|
45
|
+
else
|
46
|
+
return PlaceHolderGroup.new
|
47
|
+
end
|
48
|
+
when %w(! =).include?(match[2]) # e.g. /(?=lookahead)/, /(?!neglookahead)/
|
49
|
+
fail IllegalSyntaxError,
|
50
|
+
'Lookaheads are not regular; cannot generate examples'
|
51
|
+
when %w(! =).include?(match[3]) # e.g. /(?<=lookbehind)/, /(?<!neglookbehind)/
|
52
|
+
fail IllegalSyntaxError,
|
53
|
+
'Lookbehinds are not regular; cannot generate examples'
|
54
|
+
else # e.g. /(?<name>namedgroup)/
|
55
|
+
@current_position += (match[3].length + 3)
|
56
|
+
group_id = match[3]
|
57
|
+
end
|
58
|
+
end
|
59
|
+
MultiGroup.new(parse, group_id)
|
60
|
+
end
|
61
|
+
end
|
62
|
+
|
63
|
+
def remember_old_regexp_options
|
64
|
+
previous_ignorecase = @ignorecase
|
65
|
+
previous_multiline = @multiline
|
66
|
+
previous_extended = @extended
|
67
|
+
group = yield
|
68
|
+
@ignorecase = previous_ignorecase
|
69
|
+
@multiline = previous_multiline
|
70
|
+
@extended = previous_extended
|
71
|
+
group
|
72
|
+
end
|
73
|
+
|
74
|
+
def regexp_options_toggle(on, off)
|
75
|
+
regexp_option_toggle(on, off, '@ignorecase', 'i')
|
76
|
+
regexp_option_toggle(on, off, '@multiline', 'm')
|
77
|
+
regexp_option_toggle(on, off, '@extended', 'x')
|
78
|
+
end
|
79
|
+
|
80
|
+
def regexp_option_toggle(on, off, var, char)
|
81
|
+
instance_variable_set(var, true) if on.include? char
|
82
|
+
instance_variable_set(var, false) if off.include? char
|
83
|
+
end
|
84
|
+
end
|
85
|
+
end
|
@@ -0,0 +1,51 @@
|
|
1
|
+
module RegexpExamples
|
2
|
+
# A collection of related helper methods, utilised by the `Parser` class
|
3
|
+
module ParseRepeaterHelper
|
4
|
+
protected
|
5
|
+
|
6
|
+
def parse_star_repeater(group)
|
7
|
+
@current_position += 1
|
8
|
+
parse_reluctant_or_possessive_repeater
|
9
|
+
StarRepeater.new(group)
|
10
|
+
end
|
11
|
+
|
12
|
+
def parse_plus_repeater(group)
|
13
|
+
@current_position += 1
|
14
|
+
parse_reluctant_or_possessive_repeater
|
15
|
+
PlusRepeater.new(group)
|
16
|
+
end
|
17
|
+
|
18
|
+
def parse_reluctant_or_possessive_repeater
|
19
|
+
if next_char =~ /[?+]/
|
20
|
+
# Don't treat these repeaters any differently when generating examples
|
21
|
+
@current_position += 1
|
22
|
+
end
|
23
|
+
end
|
24
|
+
|
25
|
+
def parse_question_mark_repeater(group)
|
26
|
+
@current_position += 1
|
27
|
+
parse_reluctant_or_possessive_repeater
|
28
|
+
QuestionMarkRepeater.new(group)
|
29
|
+
end
|
30
|
+
|
31
|
+
def parse_range_repeater(group)
|
32
|
+
match = rest_of_string.match(/\A\{(\d+)?(,)?(\d+)?\}/)
|
33
|
+
@current_position += match[0].size
|
34
|
+
min = match[1].to_i if match[1]
|
35
|
+
has_comma = !match[2].nil?
|
36
|
+
max = match[3].to_i if match[3]
|
37
|
+
repeater = RangeRepeater.new(group, min, has_comma, max)
|
38
|
+
parse_reluctant_or_possessive_range_repeater(repeater, min, has_comma, max)
|
39
|
+
end
|
40
|
+
|
41
|
+
def parse_reluctant_or_possessive_range_repeater(repeater, min, has_comma, max)
|
42
|
+
# .{1}? should be equivalent to (?:.{1})?, i.e. NOT a "non-greedy quantifier"
|
43
|
+
if min && !has_comma && !max && next_char == '?'
|
44
|
+
repeater = parse_question_mark_repeater(repeater)
|
45
|
+
else
|
46
|
+
parse_reluctant_or_possessive_repeater
|
47
|
+
end
|
48
|
+
repeater
|
49
|
+
end
|
50
|
+
end
|
51
|
+
end
|