regexp_parser 0.5.0 → 1.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +242 -0
- data/Gemfile +1 -0
- data/README.md +21 -17
- data/Rakefile +31 -0
- data/lib/regexp_parser/expression.rb +11 -9
- data/lib/regexp_parser/expression/classes/alternation.rb +5 -28
- data/lib/regexp_parser/expression/classes/backref.rb +21 -16
- data/lib/regexp_parser/expression/classes/escape.rb +81 -10
- data/lib/regexp_parser/expression/classes/group.rb +20 -20
- data/lib/regexp_parser/expression/classes/{character_class.rb → posix_class.rb} +2 -2
- data/lib/regexp_parser/expression/classes/property.rb +6 -0
- data/lib/regexp_parser/expression/classes/set.rb +10 -93
- data/lib/regexp_parser/expression/classes/set/intersection.rb +9 -0
- data/lib/regexp_parser/expression/classes/set/range.rb +23 -0
- data/lib/regexp_parser/expression/methods/strfregexp.rb +6 -4
- data/lib/regexp_parser/expression/methods/tests.rb +4 -14
- data/lib/regexp_parser/expression/methods/traverse.rb +1 -1
- data/lib/regexp_parser/expression/quantifier.rb +3 -4
- data/lib/regexp_parser/expression/sequence_operation.rb +34 -0
- data/lib/regexp_parser/expression/subexpression.rb +6 -10
- data/lib/regexp_parser/lexer.rb +13 -17
- data/lib/regexp_parser/parser.rb +170 -116
- data/lib/regexp_parser/scanner.rb +952 -2431
- data/lib/regexp_parser/scanner/char_type.rl +31 -0
- data/lib/regexp_parser/scanner/properties/long.yml +561 -0
- data/lib/regexp_parser/scanner/properties/short.yml +225 -0
- data/lib/regexp_parser/scanner/property.rl +7 -806
- data/lib/regexp_parser/scanner/scanner.rl +112 -154
- data/lib/regexp_parser/syntax/base.rb +4 -4
- data/lib/regexp_parser/syntax/tokens.rb +1 -0
- data/lib/regexp_parser/syntax/tokens/backref.rb +2 -2
- data/lib/regexp_parser/syntax/tokens/character_set.rb +3 -38
- data/lib/regexp_parser/syntax/tokens/escape.rb +2 -3
- data/lib/regexp_parser/syntax/tokens/group.rb +5 -4
- data/lib/regexp_parser/syntax/tokens/{character_class.rb → posix_class.rb} +5 -5
- data/lib/regexp_parser/syntax/tokens/unicode_property.rb +519 -266
- data/lib/regexp_parser/syntax/versions/1.8.6.rb +2 -4
- data/lib/regexp_parser/syntax/versions/1.9.1.rb +4 -10
- data/lib/regexp_parser/syntax/versions/2.0.0.rb +0 -2
- data/lib/regexp_parser/syntax/versions/2.4.1.rb +1 -1
- data/lib/regexp_parser/version.rb +1 -1
- data/regexp_parser.gemspec +2 -1
- data/test/expression/test_base.rb +2 -1
- data/test/expression/test_clone.rb +0 -57
- data/test/expression/test_set.rb +31 -8
- data/test/expression/test_strfregexp.rb +13 -4
- data/test/expression/test_subexpression.rb +25 -0
- data/test/expression/test_traverse.rb +25 -25
- data/test/helpers.rb +1 -0
- data/test/lexer/test_all.rb +1 -1
- data/test/lexer/test_conditionals.rb +9 -7
- data/test/lexer/test_nesting.rb +39 -21
- data/test/lexer/test_refcalls.rb +4 -4
- data/test/parser/set/test_intersections.rb +127 -0
- data/test/parser/set/test_ranges.rb +111 -0
- data/test/parser/test_all.rb +4 -1
- data/test/parser/test_escapes.rb +41 -9
- data/test/parser/test_groups.rb +22 -3
- data/test/parser/test_posix_classes.rb +27 -0
- data/test/parser/test_properties.rb +17 -290
- data/test/parser/test_refcalls.rb +66 -26
- data/test/parser/test_sets.rb +132 -129
- data/test/scanner/test_all.rb +1 -7
- data/test/scanner/test_conditionals.rb +16 -16
- data/test/scanner/test_errors.rb +0 -30
- data/test/scanner/test_escapes.rb +1 -2
- data/test/scanner/test_free_space.rb +28 -28
- data/test/scanner/test_groups.rb +35 -35
- data/test/scanner/test_meta.rb +1 -1
- data/test/scanner/test_properties.rb +87 -114
- data/test/scanner/test_refcalls.rb +18 -18
- data/test/scanner/test_scripts.rb +19 -351
- data/test/scanner/test_sets.rb +87 -60
- data/test/scanner/test_unicode_blocks.rb +4 -105
- data/test/support/warning_extractor.rb +1 -1
- data/test/syntax/test_syntax.rb +7 -0
- data/test/syntax/versions/test_1.8.rb +2 -4
- metadata +17 -7
- data/ChangeLog +0 -325
- data/test/scanner/test_emojis.rb +0 -31
@@ -7,22 +7,12 @@ module Regexp::Expression
|
|
7
7
|
# # is it a :group expression
|
8
8
|
# exp.type? :group
|
9
9
|
#
|
10
|
-
# # is it a :set,
|
11
|
-
# exp.type? [:set, :
|
10
|
+
# # is it a :set, or :meta
|
11
|
+
# exp.type? [:set, :meta]
|
12
12
|
#
|
13
13
|
def type?(test_type)
|
14
|
-
|
15
|
-
|
16
|
-
if test_type.include?(:*)
|
17
|
-
return (test_type.include?(type) or test_type.include?(:*))
|
18
|
-
else
|
19
|
-
return test_type.include?(type)
|
20
|
-
end
|
21
|
-
when Symbol
|
22
|
-
return (type == test_type or test_type == :*)
|
23
|
-
else
|
24
|
-
raise "Array or Symbol expected, #{test_type.class.name} given"
|
25
|
-
end
|
14
|
+
test_types = Array(test_type).map(&:to_sym)
|
15
|
+
test_types.include?(:*) || test_types.include?(type)
|
26
16
|
end
|
27
17
|
|
28
18
|
# Test if this expression has the given test_token, and optionally a given
|
@@ -45,7 +45,7 @@ module Regexp::Expression
|
|
45
45
|
# Returns a new array with the results of calling the given block once
|
46
46
|
# for every expression. If a block is not given, returns an array with
|
47
47
|
# each expression and its level index as an array.
|
48
|
-
def
|
48
|
+
def flat_map(include_self = false, &block)
|
49
49
|
result = []
|
50
50
|
|
51
51
|
each_expression(include_self) do |exp, index|
|
@@ -0,0 +1,34 @@
|
|
1
|
+
module Regexp::Expression
|
2
|
+
# abstract class
|
3
|
+
class SequenceOperation < Regexp::Expression::Subexpression
|
4
|
+
alias :sequences :expressions
|
5
|
+
alias :operands :expressions
|
6
|
+
alias :operator :text
|
7
|
+
|
8
|
+
def starts_at
|
9
|
+
expressions.first.starts_at
|
10
|
+
end
|
11
|
+
alias :ts :starts_at
|
12
|
+
|
13
|
+
def <<(exp)
|
14
|
+
expressions.last << exp
|
15
|
+
end
|
16
|
+
|
17
|
+
def add_sequence
|
18
|
+
exp = self.class::OPERAND.new(level, set_level, conditional_level)
|
19
|
+
exp.nesting_level = nesting_level + 1
|
20
|
+
expressions << exp
|
21
|
+
exp
|
22
|
+
end
|
23
|
+
|
24
|
+
def quantify(token, text, min = nil, max = nil, mode = :greedy)
|
25
|
+
sequences.last.last.quantify(token, text, min, max, mode)
|
26
|
+
sequences.last.last.quantify(token, text, min, max, mode)
|
27
|
+
end
|
28
|
+
|
29
|
+
def to_s(format = :full)
|
30
|
+
sequences.map { |e| e.to_s(format) }.join(text)
|
31
|
+
sequences.map { |e| e.to_s(format) }.join(text)
|
32
|
+
end
|
33
|
+
end
|
34
|
+
end
|
@@ -10,26 +10,22 @@ module Regexp::Expression
|
|
10
10
|
end
|
11
11
|
|
12
12
|
# Override base method to clone the expressions as well.
|
13
|
-
def
|
14
|
-
|
15
|
-
|
16
|
-
copy
|
13
|
+
def initialize_clone(other)
|
14
|
+
other.expressions = expressions.map(&:clone)
|
15
|
+
super
|
17
16
|
end
|
18
17
|
|
19
18
|
def <<(exp)
|
20
19
|
if exp.is_a?(WhiteSpace) && last && last.is_a?(WhiteSpace)
|
21
20
|
last.merge(exp)
|
22
21
|
else
|
22
|
+
exp.nesting_level = nesting_level + 1
|
23
23
|
expressions << exp
|
24
24
|
end
|
25
25
|
end
|
26
26
|
|
27
|
-
|
28
|
-
|
29
|
-
end
|
30
|
-
|
31
|
-
%w[[] all? any? at count each each_with_index empty?
|
32
|
-
fetch find first index join last length values_at].each do |m|
|
27
|
+
%w[[] all? any? at collect count each each_with_index empty?
|
28
|
+
fetch find first index join last length map values_at].each do |m|
|
33
29
|
define_method(m) { |*args, &block| expressions.send(m, *args, &block) }
|
34
30
|
end
|
35
31
|
|
data/lib/regexp_parser/lexer.rb
CHANGED
@@ -4,9 +4,10 @@
|
|
4
4
|
# given syntax flavor.
|
5
5
|
class Regexp::Lexer
|
6
6
|
|
7
|
-
OPENING_TOKENS = [
|
8
|
-
|
9
|
-
|
7
|
+
OPENING_TOKENS = [
|
8
|
+
:capture, :passive, :lookahead, :nlookahead, :lookbehind, :nlookbehind,
|
9
|
+
:atomic, :options, :options_switch, :named, :absence
|
10
|
+
].freeze
|
10
11
|
|
11
12
|
CLOSING_TOKENS = [:close].freeze
|
12
13
|
|
@@ -36,6 +37,7 @@ class Regexp::Lexer
|
|
36
37
|
nesting, set_nesting, conditional_nesting)
|
37
38
|
|
38
39
|
current = merge_literal(current) if type == :literal and
|
40
|
+
set_nesting == 0 and
|
39
41
|
last and last.type == :literal
|
40
42
|
|
41
43
|
current = merge_condition(current) if type == :conditional and
|
@@ -66,29 +68,23 @@ class Regexp::Lexer
|
|
66
68
|
attr_accessor :tokens, :nesting, :set_nesting, :conditional_nesting
|
67
69
|
|
68
70
|
def ascend(type, token)
|
69
|
-
|
71
|
+
case type
|
72
|
+
when :group, :assertion
|
70
73
|
self.nesting = nesting - 1 if CLOSING_TOKENS.include?(token)
|
71
|
-
|
72
|
-
|
73
|
-
if type == :set or type == :subset
|
74
|
+
when :set
|
74
75
|
self.set_nesting = set_nesting - 1 if token == :close
|
75
|
-
|
76
|
-
|
77
|
-
if type == :conditional
|
76
|
+
when :conditional
|
78
77
|
self.conditional_nesting = conditional_nesting - 1 if token == :close
|
79
78
|
end
|
80
79
|
end
|
81
80
|
|
82
81
|
def descend(type, token)
|
83
|
-
|
82
|
+
case type
|
83
|
+
when :group, :assertion
|
84
84
|
self.nesting = nesting + 1 if OPENING_TOKENS.include?(token)
|
85
|
-
|
86
|
-
|
87
|
-
if type == :set or type == :subset
|
85
|
+
when :set
|
88
86
|
self.set_nesting = set_nesting + 1 if token == :open
|
89
|
-
|
90
|
-
|
91
|
-
if type == :conditional
|
87
|
+
when :conditional
|
92
88
|
self.conditional_nesting = conditional_nesting + 1 if token == :open
|
93
89
|
end
|
94
90
|
end
|
data/lib/regexp_parser/parser.rb
CHANGED
@@ -33,6 +33,8 @@ class Regexp::Parser
|
|
33
33
|
self.switching_options = false
|
34
34
|
self.conditional_nesting = []
|
35
35
|
|
36
|
+
self.captured_group_counts = Hash.new(0)
|
37
|
+
|
36
38
|
Regexp::Lexer.scan(input, syntax) do |token|
|
37
39
|
parse_token(token)
|
38
40
|
end
|
@@ -48,7 +50,7 @@ class Regexp::Parser
|
|
48
50
|
|
49
51
|
attr_accessor :root, :node, :nesting,
|
50
52
|
:options_stack, :switching_options, :conditional_nesting,
|
51
|
-
:
|
53
|
+
:captured_group_counts
|
52
54
|
|
53
55
|
def options_from_input(input)
|
54
56
|
return {} unless input.is_a?(::Regexp)
|
@@ -63,9 +65,28 @@ class Regexp::Parser
|
|
63
65
|
def nest(exp)
|
64
66
|
nesting.push(exp)
|
65
67
|
node << exp
|
68
|
+
update_transplanted_subtree(exp, node)
|
66
69
|
self.node = exp
|
67
70
|
end
|
68
71
|
|
72
|
+
# subtrees are transplanted to build Alternations, Intersections, Ranges
|
73
|
+
def update_transplanted_subtree(exp, new_parent)
|
74
|
+
exp.nesting_level = new_parent.nesting_level + 1
|
75
|
+
exp.respond_to?(:each) &&
|
76
|
+
exp.each { |subexp| update_transplanted_subtree(subexp, exp) }
|
77
|
+
end
|
78
|
+
|
79
|
+
def decrease_nesting
|
80
|
+
while nesting.last.is_a?(SequenceOperation)
|
81
|
+
nesting.pop
|
82
|
+
self.node = nesting.last
|
83
|
+
end
|
84
|
+
nesting.pop
|
85
|
+
yield(node) if block_given?
|
86
|
+
self.node = nesting.last
|
87
|
+
self.node = node.last if node.last.is_a?(SequenceOperation)
|
88
|
+
end
|
89
|
+
|
69
90
|
def nest_conditional(exp)
|
70
91
|
conditional_nesting.push(exp)
|
71
92
|
node << exp
|
@@ -73,6 +94,8 @@ class Regexp::Parser
|
|
73
94
|
end
|
74
95
|
|
75
96
|
def parse_token(token)
|
97
|
+
close_completed_character_set_range
|
98
|
+
|
76
99
|
case token.type
|
77
100
|
when :meta; meta(token)
|
78
101
|
when :quantifier; quantifier(token)
|
@@ -80,12 +103,14 @@ class Regexp::Parser
|
|
80
103
|
when :escape; escape(token)
|
81
104
|
when :group; group(token)
|
82
105
|
when :assertion; group(token)
|
83
|
-
when :set
|
106
|
+
when :set; set(token)
|
84
107
|
when :type; type(token)
|
85
108
|
when :backref; backref(token)
|
86
109
|
when :conditional; conditional(token)
|
87
110
|
when :keep; keep(token)
|
88
111
|
|
112
|
+
when :posixclass, :nonposixclass
|
113
|
+
posixclass(token)
|
89
114
|
when :property, :nonproperty
|
90
115
|
property(token)
|
91
116
|
|
@@ -104,17 +129,15 @@ class Regexp::Parser
|
|
104
129
|
when :open
|
105
130
|
open_set(token)
|
106
131
|
when :close
|
107
|
-
close_set
|
132
|
+
close_set
|
108
133
|
when :negate
|
109
134
|
negate_set
|
110
|
-
when :
|
111
|
-
|
112
|
-
when
|
113
|
-
|
114
|
-
when
|
115
|
-
|
116
|
-
when *Token::UnicodeProperty::All
|
117
|
-
append_set(token)
|
135
|
+
when :range
|
136
|
+
range(token)
|
137
|
+
when :intersection
|
138
|
+
intersection(token)
|
139
|
+
when :collation, :equivalent
|
140
|
+
node << Literal.new(token, active_opts)
|
118
141
|
else
|
119
142
|
raise UnknownTokenError.new('CharacterSet', token)
|
120
143
|
end
|
@@ -125,19 +148,7 @@ class Regexp::Parser
|
|
125
148
|
when :dot
|
126
149
|
node << CharacterType::Any.new(token, active_opts)
|
127
150
|
when :alternation
|
128
|
-
|
129
|
-
elsif node.last.is_a?(Alternation)
|
130
|
-
self.node = node.last
|
131
|
-
else
|
132
|
-
alt = Alternation.new(token, active_opts)
|
133
|
-
seq = Alternative.new(alt.level, alt.set_level, alt.conditional_level)
|
134
|
-
node.expressions.count.times { seq.insert(node.expressions.pop) }
|
135
|
-
alt.alternative(seq)
|
136
|
-
|
137
|
-
node << alt
|
138
|
-
self.node = alt
|
139
|
-
end
|
140
|
-
node.alternative
|
151
|
+
sequence_operation(Alternation, token)
|
141
152
|
else
|
142
153
|
raise UnknownTokenError.new('Meta', token)
|
143
154
|
end
|
@@ -147,16 +158,16 @@ class Regexp::Parser
|
|
147
158
|
case token.token
|
148
159
|
when :name_ref
|
149
160
|
node << Backreference::Name.new(token, active_opts)
|
150
|
-
when :
|
151
|
-
node << Backreference::
|
161
|
+
when :name_recursion_ref
|
162
|
+
node << Backreference::NameRecursionLevel.new(token, active_opts)
|
152
163
|
when :name_call
|
153
164
|
node << Backreference::NameCall.new(token, active_opts)
|
154
165
|
when :number, :number_ref
|
155
166
|
node << Backreference::Number.new(token, active_opts)
|
156
167
|
when :number_rel_ref
|
157
168
|
node << Backreference::NumberRelative.new(token, active_opts)
|
158
|
-
when :
|
159
|
-
node << Backreference::
|
169
|
+
when :number_recursion_ref
|
170
|
+
node << Backreference::NumberRecursionLevel.new(token, active_opts)
|
160
171
|
when :number_call
|
161
172
|
node << Backreference::NumberCall.new(token, active_opts)
|
162
173
|
when :number_rel_call
|
@@ -217,75 +228,81 @@ class Regexp::Parser
|
|
217
228
|
end
|
218
229
|
end
|
219
230
|
|
231
|
+
def posixclass(token)
|
232
|
+
node << PosixClass.new(token)
|
233
|
+
end
|
234
|
+
|
220
235
|
include Regexp::Expression::UnicodeProperty
|
221
236
|
|
222
237
|
def property(token)
|
223
238
|
case token.token
|
224
|
-
when :alnum;
|
225
|
-
when :alpha;
|
226
|
-
when :ascii;
|
227
|
-
when :blank;
|
228
|
-
when :cntrl;
|
229
|
-
when :digit;
|
230
|
-
when :graph;
|
231
|
-
when :lower;
|
232
|
-
when :print;
|
233
|
-
when :punct;
|
234
|
-
when :space;
|
235
|
-
when :upper;
|
236
|
-
when :word;
|
237
|
-
when :xdigit;
|
238
|
-
when :xposixpunct;
|
239
|
+
when :alnum; node << Alnum.new(token, active_opts)
|
240
|
+
when :alpha; node << Alpha.new(token, active_opts)
|
241
|
+
when :ascii; node << Ascii.new(token, active_opts)
|
242
|
+
when :blank; node << Blank.new(token, active_opts)
|
243
|
+
when :cntrl; node << Cntrl.new(token, active_opts)
|
244
|
+
when :digit; node << Digit.new(token, active_opts)
|
245
|
+
when :graph; node << Graph.new(token, active_opts)
|
246
|
+
when :lower; node << Lower.new(token, active_opts)
|
247
|
+
when :print; node << Print.new(token, active_opts)
|
248
|
+
when :punct; node << Punct.new(token, active_opts)
|
249
|
+
when :space; node << Space.new(token, active_opts)
|
250
|
+
when :upper; node << Upper.new(token, active_opts)
|
251
|
+
when :word; node << Word.new(token, active_opts)
|
252
|
+
when :xdigit; node << Xdigit.new(token, active_opts)
|
253
|
+
when :xposixpunct; node << XPosixPunct.new(token, active_opts)
|
239
254
|
|
240
255
|
# only in Oniguruma (old rubies)
|
241
|
-
when :newline;
|
242
|
-
|
243
|
-
when :any;
|
244
|
-
when :assigned;
|
245
|
-
|
246
|
-
when :
|
247
|
-
when :
|
248
|
-
when :
|
249
|
-
when :
|
250
|
-
when :
|
251
|
-
when :
|
252
|
-
|
253
|
-
|
254
|
-
when :
|
255
|
-
when :
|
256
|
-
when :
|
257
|
-
|
258
|
-
when :
|
259
|
-
|
260
|
-
when :
|
261
|
-
when :
|
262
|
-
|
263
|
-
when :
|
264
|
-
|
265
|
-
when :
|
266
|
-
when :
|
267
|
-
when :
|
268
|
-
when :
|
269
|
-
when :
|
270
|
-
when :
|
271
|
-
|
272
|
-
when :
|
273
|
-
|
274
|
-
when :
|
275
|
-
when :
|
276
|
-
|
277
|
-
when :
|
278
|
-
|
279
|
-
when :
|
280
|
-
when :
|
281
|
-
when :
|
282
|
-
|
283
|
-
when :
|
284
|
-
|
285
|
-
when :
|
286
|
-
when :
|
287
|
-
when :
|
288
|
-
when :
|
256
|
+
when :newline; node << Newline.new(token, active_opts)
|
257
|
+
|
258
|
+
when :any; node << Any.new(token, active_opts)
|
259
|
+
when :assigned; node << Assigned.new(token, active_opts)
|
260
|
+
|
261
|
+
when :letter; node << Letter::Any.new(token, active_opts)
|
262
|
+
when :cased_letter; node << Letter::Cased.new(token, active_opts)
|
263
|
+
when :uppercase_letter; node << Letter::Uppercase.new(token, active_opts)
|
264
|
+
when :lowercase_letter; node << Letter::Lowercase.new(token, active_opts)
|
265
|
+
when :titlecase_letter; node << Letter::Titlecase.new(token, active_opts)
|
266
|
+
when :modifier_letter; node << Letter::Modifier.new(token, active_opts)
|
267
|
+
when :other_letter; node << Letter::Other.new(token, active_opts)
|
268
|
+
|
269
|
+
when :mark; node << Mark::Any.new(token, active_opts)
|
270
|
+
when :combining_mark; node << Mark::Combining.new(token, active_opts)
|
271
|
+
when :nonspacing_mark; node << Mark::Nonspacing.new(token, active_opts)
|
272
|
+
when :spacing_mark; node << Mark::Spacing.new(token, active_opts)
|
273
|
+
when :enclosing_mark; node << Mark::Enclosing.new(token, active_opts)
|
274
|
+
|
275
|
+
when :number; node << Number::Any.new(token, active_opts)
|
276
|
+
when :decimal_number; node << Number::Decimal.new(token, active_opts)
|
277
|
+
when :letter_number; node << Number::Letter.new(token, active_opts)
|
278
|
+
when :other_number; node << Number::Other.new(token, active_opts)
|
279
|
+
|
280
|
+
when :punctuation; node << Punctuation::Any.new(token, active_opts)
|
281
|
+
when :connector_punctuation; node << Punctuation::Connector.new(token, active_opts)
|
282
|
+
when :dash_punctuation; node << Punctuation::Dash.new(token, active_opts)
|
283
|
+
when :open_punctuation; node << Punctuation::Open.new(token, active_opts)
|
284
|
+
when :close_punctuation; node << Punctuation::Close.new(token, active_opts)
|
285
|
+
when :initial_punctuation; node << Punctuation::Initial.new(token, active_opts)
|
286
|
+
when :final_punctuation; node << Punctuation::Final.new(token, active_opts)
|
287
|
+
when :other_punctuation; node << Punctuation::Other.new(token, active_opts)
|
288
|
+
|
289
|
+
when :separator; node << Separator::Any.new(token, active_opts)
|
290
|
+
when :space_separator; node << Separator::Space.new(token, active_opts)
|
291
|
+
when :line_separator; node << Separator::Line.new(token, active_opts)
|
292
|
+
when :paragraph_separator; node << Separator::Paragraph.new(token, active_opts)
|
293
|
+
|
294
|
+
when :symbol; node << Symbol::Any.new(token, active_opts)
|
295
|
+
when :math_symbol; node << Symbol::Math.new(token, active_opts)
|
296
|
+
when :currency_symbol; node << Symbol::Currency.new(token, active_opts)
|
297
|
+
when :modifier_symbol; node << Symbol::Modifier.new(token, active_opts)
|
298
|
+
when :other_symbol; node << Symbol::Other.new(token, active_opts)
|
299
|
+
|
300
|
+
when :other; node << Codepoint::Any.new(token, active_opts)
|
301
|
+
when :control; node << Codepoint::Control.new(token, active_opts)
|
302
|
+
when :format; node << Codepoint::Format.new(token, active_opts)
|
303
|
+
when :surrogate; node << Codepoint::Surrogate.new(token, active_opts)
|
304
|
+
when :private_use; node << Codepoint::PrivateUse.new(token, active_opts)
|
305
|
+
when :unassigned; node << Codepoint::Unassigned.new(token, active_opts)
|
289
306
|
|
290
307
|
when *Token::UnicodeProperty::Age
|
291
308
|
node << Age.new(token, active_opts)
|
@@ -346,13 +363,20 @@ class Regexp::Parser
|
|
346
363
|
node << EscapeSequence::Newline.new(token, active_opts)
|
347
364
|
when :carriage
|
348
365
|
node << EscapeSequence::Return.new(token, active_opts)
|
349
|
-
when :space
|
350
|
-
node << EscapeSequence::Space.new(token, active_opts)
|
351
366
|
when :tab
|
352
367
|
node << EscapeSequence::Tab.new(token, active_opts)
|
353
368
|
when :vertical_tab
|
354
369
|
node << EscapeSequence::VerticalTab.new(token, active_opts)
|
355
370
|
|
371
|
+
when :hex
|
372
|
+
node << EscapeSequence::Hex.new(token, active_opts)
|
373
|
+
when :octal
|
374
|
+
node << EscapeSequence::Octal.new(token, active_opts)
|
375
|
+
when :codepoint
|
376
|
+
node << EscapeSequence::Codepoint.new(token, active_opts)
|
377
|
+
when :codepoint_list
|
378
|
+
node << EscapeSequence::CodepointList.new(token, active_opts)
|
379
|
+
|
356
380
|
when :control
|
357
381
|
if token.text =~ /\A(?:\\C-\\M|\\c\\M)/
|
358
382
|
node << EscapeSequence::MetaControl.new(token, active_opts)
|
@@ -447,7 +471,7 @@ class Regexp::Parser
|
|
447
471
|
mode = :greedy
|
448
472
|
end
|
449
473
|
|
450
|
-
range = range_text.gsub(/\{|\}/, '').split(',', 2)
|
474
|
+
range = range_text.gsub(/\{|\}/, '').split(',', 2)
|
451
475
|
min = range[0].empty? ? 0 : range[0]
|
452
476
|
max = range[1] ? (range[1].empty? ? -1 : range[1]) : min
|
453
477
|
|
@@ -456,7 +480,7 @@ class Regexp::Parser
|
|
456
480
|
|
457
481
|
def group(token)
|
458
482
|
case token.token
|
459
|
-
when :options
|
483
|
+
when :options, :options_switch
|
460
484
|
options_group(token)
|
461
485
|
when :close
|
462
486
|
close_group
|
@@ -470,8 +494,7 @@ class Regexp::Parser
|
|
470
494
|
def options_group(token)
|
471
495
|
positive, negative = token.text.split('-', 2)
|
472
496
|
negative ||= ''
|
473
|
-
self.switching_options =
|
474
|
-
# TODO: change this -^ to token.type == :options_switch in v1.0.0
|
497
|
+
self.switching_options = token.token.equal?(:options_switch)
|
475
498
|
|
476
499
|
new_options = active_opts.dup
|
477
500
|
|
@@ -491,9 +514,7 @@ class Regexp::Parser
|
|
491
514
|
|
492
515
|
options_stack << new_options
|
493
516
|
|
494
|
-
|
495
|
-
|
496
|
-
nest(exp)
|
517
|
+
nest(Group::Options.new(token, active_opts))
|
497
518
|
end
|
498
519
|
|
499
520
|
def open_group(token)
|
@@ -522,6 +543,12 @@ class Regexp::Parser
|
|
522
543
|
raise UnknownTokenError.new('Group type open', token)
|
523
544
|
end
|
524
545
|
|
546
|
+
if exp.capturing?
|
547
|
+
exp.number = total_captured_group_count + 1
|
548
|
+
exp.number_at_level = captured_group_count_at_level + 1
|
549
|
+
count_captured_group
|
550
|
+
end
|
551
|
+
|
525
552
|
# Push the active options to the stack again. This way we can simply pop the
|
526
553
|
# stack for any group we close, no matter if it had its own options or not.
|
527
554
|
options_stack << active_opts
|
@@ -530,38 +557,65 @@ class Regexp::Parser
|
|
530
557
|
end
|
531
558
|
|
532
559
|
def close_group
|
533
|
-
nesting.pop
|
534
560
|
options_stack.pop unless switching_options
|
535
561
|
self.switching_options = false
|
536
|
-
|
537
|
-
self.node = nesting.last
|
538
|
-
self.node = node.last if node.last and node.last.is_a?(Alternation)
|
562
|
+
decrease_nesting
|
539
563
|
end
|
540
564
|
|
541
565
|
def open_set(token)
|
542
566
|
token.token = :character
|
543
|
-
|
544
|
-
if token.type == :subset
|
545
|
-
current_set << CharacterSubSet.new(token, active_opts)
|
546
|
-
else
|
547
|
-
self.current_set = CharacterSet.new(token, active_opts)
|
548
|
-
node << current_set
|
549
|
-
end
|
567
|
+
nest(CharacterSet.new(token, active_opts))
|
550
568
|
end
|
551
569
|
|
552
570
|
def negate_set
|
553
|
-
|
571
|
+
node.negate
|
554
572
|
end
|
555
573
|
|
556
|
-
def
|
557
|
-
|
574
|
+
def close_set
|
575
|
+
decrease_nesting(&:close)
|
558
576
|
end
|
559
577
|
|
560
|
-
def
|
561
|
-
|
578
|
+
def range(token)
|
579
|
+
exp = CharacterSet::Range.new(token, active_opts)
|
580
|
+
scope = node.last.is_a?(CharacterSet::IntersectedSequence) ? node.last : node
|
581
|
+
exp << scope.expressions.pop
|
582
|
+
nest(exp)
|
583
|
+
end
|
584
|
+
|
585
|
+
def close_completed_character_set_range
|
586
|
+
decrease_nesting if node.is_a?(CharacterSet::Range) && node.complete?
|
587
|
+
end
|
588
|
+
|
589
|
+
def intersection(token)
|
590
|
+
sequence_operation(CharacterSet::Intersection, token)
|
591
|
+
end
|
592
|
+
|
593
|
+
def sequence_operation(klass, token)
|
594
|
+
if node.last.is_a?(klass)
|
595
|
+
self.node = node.last
|
596
|
+
elsif !node.is_a?(klass)
|
597
|
+
operator = klass.new(token, active_opts)
|
598
|
+
sequence = operator.add_sequence
|
599
|
+
sequence.expressions = node.expressions
|
600
|
+
node.expressions = []
|
601
|
+
nest(operator)
|
602
|
+
end
|
603
|
+
node.add_sequence
|
562
604
|
end
|
563
605
|
|
564
606
|
def active_opts
|
565
607
|
options_stack.last
|
566
608
|
end
|
609
|
+
|
610
|
+
def total_captured_group_count
|
611
|
+
captured_group_counts.values.reduce(0, :+)
|
612
|
+
end
|
613
|
+
|
614
|
+
def captured_group_count_at_level
|
615
|
+
captured_group_counts[node.level]
|
616
|
+
end
|
617
|
+
|
618
|
+
def count_captured_group
|
619
|
+
captured_group_counts[node.level] += 1
|
620
|
+
end
|
567
621
|
end # module Regexp::Parser
|