regexp_parser 0.4.13 → 0.5.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/ChangeLog +15 -0
- data/lib/regexp_parser/expression.rb +43 -51
- data/lib/regexp_parser/expression/classes/alternation.rb +6 -7
- data/lib/regexp_parser/expression/classes/character_class.rb +11 -0
- data/lib/regexp_parser/expression/classes/conditional.rb +10 -18
- data/lib/regexp_parser/expression/classes/free_space.rb +1 -1
- data/lib/regexp_parser/expression/classes/group.rb +9 -21
- data/lib/regexp_parser/expression/classes/property.rb +2 -2
- data/lib/regexp_parser/expression/classes/set.rb +1 -12
- data/lib/regexp_parser/expression/methods/traverse.rb +1 -1
- data/lib/regexp_parser/expression/quantifier.rb +9 -9
- data/lib/regexp_parser/expression/sequence.rb +5 -4
- data/lib/regexp_parser/expression/subexpression.rb +16 -59
- data/lib/regexp_parser/lexer.rb +31 -27
- data/lib/regexp_parser/parser.rb +179 -179
- data/lib/regexp_parser/scanner.rb +172 -166
- data/lib/regexp_parser/scanner/scanner.rl +44 -38
- data/lib/regexp_parser/syntax.rb +2 -53
- data/lib/regexp_parser/syntax/base.rb +13 -24
- data/lib/regexp_parser/syntax/tokens/character_class.rb +16 -0
- data/lib/regexp_parser/syntax/tokens/unicode_property.rb +26 -26
- data/lib/regexp_parser/syntax/version_lookup.rb +82 -0
- data/lib/regexp_parser/syntax/versions.rb +1 -5
- data/lib/regexp_parser/syntax/versions/1.8.6.rb +30 -0
- data/lib/regexp_parser/syntax/versions/1.9.1.rb +36 -0
- data/lib/regexp_parser/syntax/versions/1.9.3.rb +11 -0
- data/lib/regexp_parser/syntax/versions/2.0.0.rb +20 -0
- data/lib/regexp_parser/syntax/versions/2.2.0.rb +10 -0
- data/lib/regexp_parser/syntax/versions/2.3.0.rb +10 -0
- data/lib/regexp_parser/syntax/versions/2.4.0.rb +10 -0
- data/lib/regexp_parser/syntax/versions/2.4.1.rb +9 -0
- data/lib/regexp_parser/syntax/versions/2.5.0.rb +10 -0
- data/lib/regexp_parser/token.rb +6 -29
- data/lib/regexp_parser/version.rb +1 -1
- data/test/expression/test_strfregexp.rb +7 -0
- data/test/expression/test_to_h.rb +6 -0
- data/test/parser/test_properties.rb +12 -4
- data/test/support/warning_extractor.rb +3 -1
- data/test/syntax/test_all.rb +1 -1
- data/test/syntax/test_syntax.rb +5 -9
- data/test/syntax/{ruby → versions}/test_1.8.rb +14 -14
- data/test/syntax/{ruby → versions}/test_1.9.1.rb +7 -8
- data/test/syntax/{ruby → versions}/test_1.9.3.rb +7 -7
- data/test/syntax/versions/test_2.0.0.rb +37 -0
- data/test/syntax/{ruby → versions}/test_2.2.0.rb +7 -7
- data/test/syntax/versions/test_aliases.rb +129 -0
- data/test/syntax/{ruby → versions}/test_all.rb +1 -1
- metadata +73 -113
- data/lib/regexp_parser/syntax/ruby/1.8.6.rb +0 -37
- data/lib/regexp_parser/syntax/ruby/1.8.7.rb +0 -14
- data/lib/regexp_parser/syntax/ruby/1.8.rb +0 -13
- data/lib/regexp_parser/syntax/ruby/1.9.1.rb +0 -45
- data/lib/regexp_parser/syntax/ruby/1.9.2.rb +0 -9
- data/lib/regexp_parser/syntax/ruby/1.9.3.rb +0 -19
- data/lib/regexp_parser/syntax/ruby/1.9.rb +0 -8
- data/lib/regexp_parser/syntax/ruby/2.0.0.rb +0 -23
- data/lib/regexp_parser/syntax/ruby/2.0.rb +0 -8
- data/lib/regexp_parser/syntax/ruby/2.1.0.rb +0 -13
- data/lib/regexp_parser/syntax/ruby/2.1.10.rb +0 -13
- data/lib/regexp_parser/syntax/ruby/2.1.2.rb +0 -13
- data/lib/regexp_parser/syntax/ruby/2.1.3.rb +0 -13
- data/lib/regexp_parser/syntax/ruby/2.1.4.rb +0 -13
- data/lib/regexp_parser/syntax/ruby/2.1.5.rb +0 -13
- data/lib/regexp_parser/syntax/ruby/2.1.6.rb +0 -13
- data/lib/regexp_parser/syntax/ruby/2.1.7.rb +0 -13
- data/lib/regexp_parser/syntax/ruby/2.1.8.rb +0 -13
- data/lib/regexp_parser/syntax/ruby/2.1.9.rb +0 -13
- data/lib/regexp_parser/syntax/ruby/2.1.rb +0 -8
- data/lib/regexp_parser/syntax/ruby/2.2.0.rb +0 -16
- data/lib/regexp_parser/syntax/ruby/2.2.1.rb +0 -13
- data/lib/regexp_parser/syntax/ruby/2.2.10.rb +0 -13
- data/lib/regexp_parser/syntax/ruby/2.2.2.rb +0 -13
- data/lib/regexp_parser/syntax/ruby/2.2.3.rb +0 -13
- data/lib/regexp_parser/syntax/ruby/2.2.4.rb +0 -13
- data/lib/regexp_parser/syntax/ruby/2.2.5.rb +0 -13
- data/lib/regexp_parser/syntax/ruby/2.2.6.rb +0 -13
- data/lib/regexp_parser/syntax/ruby/2.2.7.rb +0 -13
- data/lib/regexp_parser/syntax/ruby/2.2.8.rb +0 -13
- data/lib/regexp_parser/syntax/ruby/2.2.9.rb +0 -13
- data/lib/regexp_parser/syntax/ruby/2.2.rb +0 -8
- data/lib/regexp_parser/syntax/ruby/2.3.0.rb +0 -16
- data/lib/regexp_parser/syntax/ruby/2.3.1.rb +0 -13
- data/lib/regexp_parser/syntax/ruby/2.3.2.rb +0 -13
- data/lib/regexp_parser/syntax/ruby/2.3.3.rb +0 -13
- data/lib/regexp_parser/syntax/ruby/2.3.4.rb +0 -13
- data/lib/regexp_parser/syntax/ruby/2.3.5.rb +0 -13
- data/lib/regexp_parser/syntax/ruby/2.3.6.rb +0 -13
- data/lib/regexp_parser/syntax/ruby/2.3.7.rb +0 -13
- data/lib/regexp_parser/syntax/ruby/2.3.rb +0 -8
- data/lib/regexp_parser/syntax/ruby/2.4.0.rb +0 -16
- data/lib/regexp_parser/syntax/ruby/2.4.1.rb +0 -15
- data/lib/regexp_parser/syntax/ruby/2.4.2.rb +0 -13
- data/lib/regexp_parser/syntax/ruby/2.4.3.rb +0 -13
- data/lib/regexp_parser/syntax/ruby/2.4.4.rb +0 -13
- data/lib/regexp_parser/syntax/ruby/2.4.rb +0 -8
- data/lib/regexp_parser/syntax/ruby/2.5.0.rb +0 -16
- data/lib/regexp_parser/syntax/ruby/2.5.1.rb +0 -13
- data/lib/regexp_parser/syntax/ruby/2.5.rb +0 -8
- data/lib/regexp_parser/syntax/ruby/2.6.0.rb +0 -13
- data/lib/regexp_parser/syntax/ruby/2.6.rb +0 -8
- data/test/syntax/ruby/test_2.0.0.rb +0 -32
- data/test/syntax/ruby/test_files.rb +0 -353
@@ -25,18 +25,19 @@ module Regexp::Expression
|
|
25
25
|
end
|
26
26
|
|
27
27
|
def starts_at
|
28
|
-
|
28
|
+
expressions.first.starts_at
|
29
29
|
end
|
30
|
+
alias :ts :starts_at
|
30
31
|
|
31
32
|
def quantify(token, text, min = nil, max = nil, mode = :greedy)
|
32
33
|
offset = -1
|
33
34
|
target = expressions[offset]
|
34
|
-
while target
|
35
|
+
while target.is_a?(FreeSpace)
|
35
36
|
target = expressions[offset -= 1]
|
36
37
|
end
|
37
38
|
|
38
|
-
raise
|
39
|
-
|
39
|
+
target || raise(ArgumentError, "No valid target found for '#{text}' "\
|
40
|
+
'quantifier')
|
40
41
|
|
41
42
|
target.quantify(token, text, min, max, mode)
|
42
43
|
end
|
@@ -6,63 +6,31 @@ module Regexp::Expression
|
|
6
6
|
def initialize(token, options = {})
|
7
7
|
super
|
8
8
|
|
9
|
-
|
9
|
+
self.expressions = []
|
10
10
|
end
|
11
11
|
|
12
12
|
# Override base method to clone the expressions as well.
|
13
13
|
def clone
|
14
14
|
copy = super
|
15
|
-
copy.expressions =
|
15
|
+
copy.expressions = expressions.map(&:clone)
|
16
16
|
copy
|
17
17
|
end
|
18
18
|
|
19
19
|
def <<(exp)
|
20
|
-
if exp.is_a?(WhiteSpace)
|
21
|
-
|
22
|
-
@expressions.last.merge(exp)
|
20
|
+
if exp.is_a?(WhiteSpace) && last && last.is_a?(WhiteSpace)
|
21
|
+
last.merge(exp)
|
23
22
|
else
|
24
|
-
|
23
|
+
expressions << exp
|
25
24
|
end
|
26
25
|
end
|
27
26
|
|
28
27
|
def insert(exp)
|
29
|
-
|
28
|
+
expressions.insert(0, exp)
|
30
29
|
end
|
31
30
|
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
def each_with_index(&block)
|
37
|
-
@expressions.each_with_index {|e, i| yield e, i}
|
38
|
-
end
|
39
|
-
|
40
|
-
def first
|
41
|
-
@expressions.first
|
42
|
-
end
|
43
|
-
|
44
|
-
def last
|
45
|
-
@expressions.last
|
46
|
-
end
|
47
|
-
|
48
|
-
def [](index)
|
49
|
-
@expressions[index]
|
50
|
-
end
|
51
|
-
|
52
|
-
def length
|
53
|
-
@expressions.length
|
54
|
-
end
|
55
|
-
|
56
|
-
def empty?
|
57
|
-
@expressions.empty?
|
58
|
-
end
|
59
|
-
|
60
|
-
def all?(&block)
|
61
|
-
@expressions.all? {|exp| yield(exp) }
|
62
|
-
end
|
63
|
-
|
64
|
-
def ts
|
65
|
-
starts_at
|
31
|
+
%w[[] all? any? at count each each_with_index empty?
|
32
|
+
fetch find first index join last length values_at].each do |m|
|
33
|
+
define_method(m) { |*args, &block| expressions.send(m, *args, &block) }
|
66
34
|
end
|
67
35
|
|
68
36
|
def te
|
@@ -70,28 +38,17 @@ module Regexp::Expression
|
|
70
38
|
end
|
71
39
|
|
72
40
|
def to_s(format = :full)
|
73
|
-
s = ''
|
74
|
-
|
75
41
|
# Note: the format does not get passed down to subexpressions.
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
s << @expressions.map{|e| e.to_s}.join unless @expressions.empty?
|
80
|
-
else
|
81
|
-
s << @text.dup
|
82
|
-
s << @expressions.map{|e| e.to_s}.join unless @expressions.empty?
|
83
|
-
s << @quantifier if quantified?
|
84
|
-
end
|
85
|
-
|
86
|
-
s
|
42
|
+
# Note: cant use #text accessor, b/c it is overriden as def text; to_s end
|
43
|
+
# in Expression::Sequence, causing infinite recursion. Clean-up needed.
|
44
|
+
"#{@text}#{expressions.join}#{quantifier_affix(format)}"
|
87
45
|
end
|
88
46
|
|
89
47
|
def to_h
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
48
|
+
super.merge({
|
49
|
+
text: to_s(:base),
|
50
|
+
expressions: expressions.map(&:to_h)
|
51
|
+
})
|
94
52
|
end
|
95
53
|
end
|
96
|
-
|
97
54
|
end
|
data/lib/regexp_parser/lexer.rb
CHANGED
@@ -17,8 +17,10 @@ class Regexp::Lexer
|
|
17
17
|
def lex(input, syntax = "ruby/#{RUBY_VERSION}", &block)
|
18
18
|
syntax = Regexp::Syntax.new(syntax)
|
19
19
|
|
20
|
-
|
21
|
-
|
20
|
+
self.tokens = []
|
21
|
+
self.nesting = 0
|
22
|
+
self.set_nesting = 0
|
23
|
+
self.conditional_nesting = 0
|
22
24
|
|
23
25
|
last = nil
|
24
26
|
Regexp::Scanner.scan(input) do |type, token, text, ts, te|
|
@@ -31,7 +33,7 @@ class Regexp::Lexer
|
|
31
33
|
last and last.type == :literal
|
32
34
|
|
33
35
|
current = Regexp::Token.new(type, token, text, ts, te,
|
34
|
-
|
36
|
+
nesting, set_nesting, conditional_nesting)
|
35
37
|
|
36
38
|
current = merge_literal(current) if type == :literal and
|
37
39
|
last and last.type == :literal
|
@@ -39,19 +41,19 @@ class Regexp::Lexer
|
|
39
41
|
current = merge_condition(current) if type == :conditional and
|
40
42
|
[:condition, :condition_close].include?(token)
|
41
43
|
|
42
|
-
last.next
|
43
|
-
current.previous
|
44
|
+
last.next = current if last
|
45
|
+
current.previous = last if last
|
44
46
|
|
45
|
-
|
47
|
+
tokens << current
|
46
48
|
last = current
|
47
49
|
|
48
50
|
descend(type, token)
|
49
51
|
end
|
50
52
|
|
51
53
|
if block_given?
|
52
|
-
|
54
|
+
tokens.map { |t| block.call(t) }
|
53
55
|
else
|
54
|
-
|
56
|
+
tokens
|
55
57
|
end
|
56
58
|
end
|
57
59
|
|
@@ -59,33 +61,35 @@ class Regexp::Lexer
|
|
59
61
|
alias :scan :lex
|
60
62
|
end
|
61
63
|
|
62
|
-
|
64
|
+
private
|
65
|
+
|
66
|
+
attr_accessor :tokens, :nesting, :set_nesting, :conditional_nesting
|
63
67
|
|
64
68
|
def ascend(type, token)
|
65
69
|
if type == :group or type == :assertion
|
66
|
-
|
70
|
+
self.nesting = nesting - 1 if CLOSING_TOKENS.include?(token)
|
67
71
|
end
|
68
72
|
|
69
73
|
if type == :set or type == :subset
|
70
|
-
|
74
|
+
self.set_nesting = set_nesting - 1 if token == :close
|
71
75
|
end
|
72
76
|
|
73
77
|
if type == :conditional
|
74
|
-
|
78
|
+
self.conditional_nesting = conditional_nesting - 1 if token == :close
|
75
79
|
end
|
76
80
|
end
|
77
81
|
|
78
82
|
def descend(type, token)
|
79
83
|
if type == :group or type == :assertion
|
80
|
-
|
84
|
+
self.nesting = nesting + 1 if OPENING_TOKENS.include?(token)
|
81
85
|
end
|
82
86
|
|
83
87
|
if type == :set or type == :subset
|
84
|
-
|
88
|
+
self.set_nesting = set_nesting + 1 if token == :open
|
85
89
|
end
|
86
90
|
|
87
91
|
if type == :conditional
|
88
|
-
|
92
|
+
self.conditional_nesting = conditional_nesting + 1 if token == :open
|
89
93
|
end
|
90
94
|
end
|
91
95
|
|
@@ -105,20 +109,20 @@ class Regexp::Lexer
|
|
105
109
|
last_length = last.length
|
106
110
|
end
|
107
111
|
|
108
|
-
|
109
|
-
|
110
|
-
|
112
|
+
tokens.pop
|
113
|
+
tokens << Regexp::Token.new(:literal, :literal, lead, token.ts,
|
114
|
+
(token.te - last_length), nesting, set_nesting, conditional_nesting)
|
111
115
|
|
112
|
-
|
113
|
-
|
114
|
-
|
116
|
+
tokens << Regexp::Token.new(:literal, :literal, last,
|
117
|
+
(token.ts + lead_length),
|
118
|
+
token.te, nesting, set_nesting, conditional_nesting)
|
115
119
|
end
|
116
120
|
end
|
117
121
|
|
118
122
|
# called by scan to merge two consecutive literals. this happens when tokens
|
119
123
|
# get normalized (as in the case of posix/bre) and end up becoming literals.
|
120
124
|
def merge_literal(current)
|
121
|
-
last =
|
125
|
+
last = tokens.pop
|
122
126
|
|
123
127
|
Regexp::Token.new(
|
124
128
|
:literal,
|
@@ -126,16 +130,16 @@ class Regexp::Lexer
|
|
126
130
|
last.text + current.text,
|
127
131
|
last.ts,
|
128
132
|
current.te,
|
129
|
-
|
130
|
-
|
131
|
-
|
133
|
+
nesting,
|
134
|
+
set_nesting,
|
135
|
+
conditional_nesting,
|
132
136
|
)
|
133
137
|
end
|
134
138
|
|
135
139
|
def merge_condition(current)
|
136
|
-
last =
|
140
|
+
last = tokens.pop
|
137
141
|
Regexp::Token.new(:conditional, :condition, last.text + current.text,
|
138
|
-
last.ts, current.te,
|
142
|
+
last.ts, current.te, nesting, set_nesting, conditional_nesting)
|
139
143
|
end
|
140
144
|
|
141
145
|
end # module Regexp::Lexer
|
data/lib/regexp_parser/parser.rb
CHANGED
@@ -23,25 +23,33 @@ class Regexp::Parser
|
|
23
23
|
end
|
24
24
|
|
25
25
|
def parse(input, syntax = "ruby/#{RUBY_VERSION}", &block)
|
26
|
-
|
26
|
+
root = Root.new(options_from_input(input))
|
27
27
|
|
28
|
-
|
29
|
-
|
30
|
-
|
28
|
+
self.root = root
|
29
|
+
self.node = root
|
30
|
+
self.nesting = [root]
|
31
|
+
|
32
|
+
self.options_stack = [root.options]
|
33
|
+
self.switching_options = false
|
34
|
+
self.conditional_nesting = []
|
31
35
|
|
32
36
|
Regexp::Lexer.scan(input, syntax) do |token|
|
33
|
-
parse_token
|
37
|
+
parse_token(token)
|
34
38
|
end
|
35
39
|
|
36
40
|
if block_given?
|
37
|
-
block.call
|
41
|
+
block.call(root)
|
38
42
|
else
|
39
|
-
|
43
|
+
root
|
40
44
|
end
|
41
45
|
end
|
42
46
|
|
43
47
|
private
|
44
48
|
|
49
|
+
attr_accessor :root, :node, :nesting,
|
50
|
+
:options_stack, :switching_options, :conditional_nesting,
|
51
|
+
:current_set
|
52
|
+
|
45
53
|
def options_from_input(input)
|
46
54
|
return {} unless input.is_a?(::Regexp)
|
47
55
|
|
@@ -53,17 +61,15 @@ class Regexp::Parser
|
|
53
61
|
end
|
54
62
|
|
55
63
|
def nest(exp)
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
@node = exp
|
64
|
+
nesting.push(exp)
|
65
|
+
node << exp
|
66
|
+
self.node = exp
|
60
67
|
end
|
61
68
|
|
62
69
|
def nest_conditional(exp)
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
@node = exp
|
70
|
+
conditional_nesting.push(exp)
|
71
|
+
node << exp
|
72
|
+
self.node = exp
|
67
73
|
end
|
68
74
|
|
69
75
|
def parse_token(token)
|
@@ -84,7 +90,7 @@ class Regexp::Parser
|
|
84
90
|
property(token)
|
85
91
|
|
86
92
|
when :literal
|
87
|
-
|
93
|
+
node << Literal.new(token, active_opts)
|
88
94
|
when :free_space
|
89
95
|
free_space(token)
|
90
96
|
|
@@ -117,28 +123,21 @@ class Regexp::Parser
|
|
117
123
|
def meta(token)
|
118
124
|
case token.token
|
119
125
|
when :dot
|
120
|
-
|
126
|
+
node << CharacterType::Any.new(token, active_opts)
|
121
127
|
when :alternation
|
122
|
-
|
123
|
-
|
124
|
-
|
125
|
-
seq = Alternative.new(alt.level, alt.set_level, alt.conditional_level)
|
126
|
-
|
127
|
-
while @node.expressions.last
|
128
|
-
seq.insert @node.expressions.pop
|
129
|
-
end
|
130
|
-
alt.alternative(seq)
|
131
|
-
|
132
|
-
@node << alt
|
133
|
-
@node = alt
|
134
|
-
@node.alternative
|
135
|
-
else
|
136
|
-
@node = @node.last
|
137
|
-
@node.alternative
|
138
|
-
end
|
128
|
+
if node.token == :alternation
|
129
|
+
elsif node.last.is_a?(Alternation)
|
130
|
+
self.node = node.last
|
139
131
|
else
|
140
|
-
|
132
|
+
alt = Alternation.new(token, active_opts)
|
133
|
+
seq = Alternative.new(alt.level, alt.set_level, alt.conditional_level)
|
134
|
+
node.expressions.count.times { seq.insert(node.expressions.pop) }
|
135
|
+
alt.alternative(seq)
|
136
|
+
|
137
|
+
node << alt
|
138
|
+
self.node = alt
|
141
139
|
end
|
140
|
+
node.alternative
|
142
141
|
else
|
143
142
|
raise UnknownTokenError.new('Meta', token)
|
144
143
|
end
|
@@ -147,21 +146,21 @@ class Regexp::Parser
|
|
147
146
|
def backref(token)
|
148
147
|
case token.token
|
149
148
|
when :name_ref
|
150
|
-
|
149
|
+
node << Backreference::Name.new(token, active_opts)
|
151
150
|
when :name_nest_ref
|
152
|
-
|
151
|
+
node << Backreference::NameNestLevel.new(token, active_opts)
|
153
152
|
when :name_call
|
154
|
-
|
153
|
+
node << Backreference::NameCall.new(token, active_opts)
|
155
154
|
when :number, :number_ref
|
156
|
-
|
155
|
+
node << Backreference::Number.new(token, active_opts)
|
157
156
|
when :number_rel_ref
|
158
|
-
|
157
|
+
node << Backreference::NumberRelative.new(token, active_opts)
|
159
158
|
when :number_nest_ref
|
160
|
-
|
159
|
+
node << Backreference::NumberNestLevel.new(token, active_opts)
|
161
160
|
when :number_call
|
162
|
-
|
161
|
+
node << Backreference::NumberCall.new(token, active_opts)
|
163
162
|
when :number_rel_call
|
164
|
-
|
163
|
+
node << Backreference::NumberCallRelative.new(token, active_opts)
|
165
164
|
else
|
166
165
|
raise UnknownTokenError.new('Backreference', token)
|
167
166
|
end
|
@@ -170,25 +169,25 @@ class Regexp::Parser
|
|
170
169
|
def type(token)
|
171
170
|
case token.token
|
172
171
|
when :digit
|
173
|
-
|
172
|
+
node << CharacterType::Digit.new(token, active_opts)
|
174
173
|
when :nondigit
|
175
|
-
|
174
|
+
node << CharacterType::NonDigit.new(token, active_opts)
|
176
175
|
when :hex
|
177
|
-
|
176
|
+
node << CharacterType::Hex.new(token, active_opts)
|
178
177
|
when :nonhex
|
179
|
-
|
178
|
+
node << CharacterType::NonHex.new(token, active_opts)
|
180
179
|
when :space
|
181
|
-
|
180
|
+
node << CharacterType::Space.new(token, active_opts)
|
182
181
|
when :nonspace
|
183
|
-
|
182
|
+
node << CharacterType::NonSpace.new(token, active_opts)
|
184
183
|
when :word
|
185
|
-
|
184
|
+
node << CharacterType::Word.new(token, active_opts)
|
186
185
|
when :nonword
|
187
|
-
|
186
|
+
node << CharacterType::NonWord.new(token, active_opts)
|
188
187
|
when :linebreak
|
189
|
-
|
188
|
+
node << CharacterType::Linebreak.new(token, active_opts)
|
190
189
|
when :xgrapheme
|
191
|
-
|
190
|
+
node << CharacterType::ExtendedGrapheme.new(token, active_opts)
|
192
191
|
else
|
193
192
|
raise UnknownTokenError.new('CharacterType', token)
|
194
193
|
end
|
@@ -199,20 +198,20 @@ class Regexp::Parser
|
|
199
198
|
when :open
|
200
199
|
nest_conditional(Conditional::Expression.new(token, active_opts))
|
201
200
|
when :condition
|
202
|
-
|
203
|
-
|
201
|
+
conditional_nesting.last.condition = Conditional::Condition.new(token, active_opts)
|
202
|
+
conditional_nesting.last.branch
|
204
203
|
when :separator
|
205
|
-
|
206
|
-
|
204
|
+
conditional_nesting.last.branch
|
205
|
+
self.node = conditional_nesting.last.branches.last
|
207
206
|
when :close
|
208
|
-
|
209
|
-
|
210
|
-
@node = if @conditional_nesting.empty?
|
211
|
-
@nesting.last
|
212
|
-
else
|
213
|
-
@conditional_nesting.last
|
214
|
-
end
|
207
|
+
conditional_nesting.pop
|
215
208
|
|
209
|
+
self.node =
|
210
|
+
if conditional_nesting.empty?
|
211
|
+
nesting.last
|
212
|
+
else
|
213
|
+
conditional_nesting.last
|
214
|
+
end
|
216
215
|
else
|
217
216
|
raise UnknownTokenError.new('Conditional', token)
|
218
217
|
end
|
@@ -222,86 +221,86 @@ class Regexp::Parser
|
|
222
221
|
|
223
222
|
def property(token)
|
224
223
|
case token.token
|
225
|
-
when :alnum;
|
226
|
-
when :alpha;
|
227
|
-
when :ascii;
|
228
|
-
when :blank;
|
229
|
-
when :cntrl;
|
230
|
-
when :digit;
|
231
|
-
when :graph;
|
232
|
-
when :lower;
|
233
|
-
when :print;
|
234
|
-
when :punct;
|
235
|
-
when :space;
|
236
|
-
when :upper;
|
237
|
-
when :word;
|
238
|
-
when :xdigit;
|
239
|
-
when :xposixpunct;
|
224
|
+
when :alnum; node << Alnum.new(token, active_opts)
|
225
|
+
when :alpha; node << Alpha.new(token, active_opts)
|
226
|
+
when :ascii; node << Ascii.new(token, active_opts)
|
227
|
+
when :blank; node << Blank.new(token, active_opts)
|
228
|
+
when :cntrl; node << Cntrl.new(token, active_opts)
|
229
|
+
when :digit; node << Digit.new(token, active_opts)
|
230
|
+
when :graph; node << Graph.new(token, active_opts)
|
231
|
+
when :lower; node << Lower.new(token, active_opts)
|
232
|
+
when :print; node << Print.new(token, active_opts)
|
233
|
+
when :punct; node << Punct.new(token, active_opts)
|
234
|
+
when :space; node << Space.new(token, active_opts)
|
235
|
+
when :upper; node << Upper.new(token, active_opts)
|
236
|
+
when :word; node << Word.new(token, active_opts)
|
237
|
+
when :xdigit; node << Xdigit.new(token, active_opts)
|
238
|
+
when :xposixpunct; node << XPosixPunct.new(token, active_opts)
|
240
239
|
|
241
240
|
# only in Oniguruma (old rubies)
|
242
|
-
when :newline;
|
243
|
-
|
244
|
-
when :any;
|
245
|
-
when :assigned;
|
246
|
-
|
247
|
-
when :letter_any;
|
248
|
-
when :letter_uppercase;
|
249
|
-
when :letter_lowercase;
|
250
|
-
when :letter_titlecase;
|
251
|
-
when :letter_modifier;
|
252
|
-
when :letter_other;
|
253
|
-
|
254
|
-
when :mark_any;
|
255
|
-
when :mark_nonspacing;
|
256
|
-
when :mark_spacing;
|
257
|
-
when :mark_enclosing;
|
258
|
-
|
259
|
-
when :number_any;
|
260
|
-
when :number_decimal;
|
261
|
-
when :number_letter;
|
262
|
-
when :number_other;
|
263
|
-
|
264
|
-
when :punct_any;
|
265
|
-
when :punct_connector;
|
266
|
-
when :punct_dash;
|
267
|
-
when :punct_open;
|
268
|
-
when :punct_close;
|
269
|
-
when :punct_initial;
|
270
|
-
when :punct_final;
|
271
|
-
when :punct_other;
|
272
|
-
|
273
|
-
when :separator_any;
|
274
|
-
when :separator_space;
|
275
|
-
when :separator_line;
|
276
|
-
when :separator_para;
|
277
|
-
|
278
|
-
when :symbol_any;
|
279
|
-
when :symbol_math;
|
280
|
-
when :symbol_currency;
|
281
|
-
when :symbol_modifier;
|
282
|
-
when :symbol_other;
|
283
|
-
|
284
|
-
when :other;
|
285
|
-
when :control;
|
286
|
-
when :format;
|
287
|
-
when :surrogate;
|
288
|
-
when :private_use;
|
289
|
-
when :unassigned;
|
241
|
+
when :newline; node << Newline.new(token, active_opts)
|
242
|
+
|
243
|
+
when :any; node << Any.new(token, active_opts)
|
244
|
+
when :assigned; node << Assigned.new(token, active_opts)
|
245
|
+
|
246
|
+
when :letter_any; node << Letter::Any.new(token, active_opts)
|
247
|
+
when :letter_uppercase; node << Letter::Uppercase.new(token, active_opts)
|
248
|
+
when :letter_lowercase; node << Letter::Lowercase.new(token, active_opts)
|
249
|
+
when :letter_titlecase; node << Letter::Titlecase.new(token, active_opts)
|
250
|
+
when :letter_modifier; node << Letter::Modifier.new(token, active_opts)
|
251
|
+
when :letter_other; node << Letter::Other.new(token, active_opts)
|
252
|
+
|
253
|
+
when :mark_any; node << Mark::Any.new(token, active_opts)
|
254
|
+
when :mark_nonspacing; node << Mark::Nonspacing.new(token, active_opts)
|
255
|
+
when :mark_spacing; node << Mark::Spacing.new(token, active_opts)
|
256
|
+
when :mark_enclosing; node << Mark::Enclosing.new(token, active_opts)
|
257
|
+
|
258
|
+
when :number_any; node << Number::Any.new(token, active_opts)
|
259
|
+
when :number_decimal; node << Number::Decimal.new(token, active_opts)
|
260
|
+
when :number_letter; node << Number::Letter.new(token, active_opts)
|
261
|
+
when :number_other; node << Number::Other.new(token, active_opts)
|
262
|
+
|
263
|
+
when :punct_any; node << Punctuation::Any.new(token, active_opts)
|
264
|
+
when :punct_connector; node << Punctuation::Connector.new(token, active_opts)
|
265
|
+
when :punct_dash; node << Punctuation::Dash.new(token, active_opts)
|
266
|
+
when :punct_open; node << Punctuation::Open.new(token, active_opts)
|
267
|
+
when :punct_close; node << Punctuation::Close.new(token, active_opts)
|
268
|
+
when :punct_initial; node << Punctuation::Initial.new(token, active_opts)
|
269
|
+
when :punct_final; node << Punctuation::Final.new(token, active_opts)
|
270
|
+
when :punct_other; node << Punctuation::Other.new(token, active_opts)
|
271
|
+
|
272
|
+
when :separator_any; node << Separator::Any.new(token, active_opts)
|
273
|
+
when :separator_space; node << Separator::Space.new(token, active_opts)
|
274
|
+
when :separator_line; node << Separator::Line.new(token, active_opts)
|
275
|
+
when :separator_para; node << Separator::Paragraph.new(token, active_opts)
|
276
|
+
|
277
|
+
when :symbol_any; node << Symbol::Any.new(token, active_opts)
|
278
|
+
when :symbol_math; node << Symbol::Math.new(token, active_opts)
|
279
|
+
when :symbol_currency; node << Symbol::Currency.new(token, active_opts)
|
280
|
+
when :symbol_modifier; node << Symbol::Modifier.new(token, active_opts)
|
281
|
+
when :symbol_other; node << Symbol::Other.new(token, active_opts)
|
282
|
+
|
283
|
+
when :other; node << Codepoint::Any.new(token, active_opts)
|
284
|
+
when :control; node << Codepoint::Control.new(token, active_opts)
|
285
|
+
when :format; node << Codepoint::Format.new(token, active_opts)
|
286
|
+
when :surrogate; node << Codepoint::Surrogate.new(token, active_opts)
|
287
|
+
when :private_use; node << Codepoint::PrivateUse.new(token, active_opts)
|
288
|
+
when :unassigned; node << Codepoint::Unassigned.new(token, active_opts)
|
290
289
|
|
291
290
|
when *Token::UnicodeProperty::Age
|
292
|
-
|
291
|
+
node << Age.new(token, active_opts)
|
293
292
|
|
294
293
|
when *Token::UnicodeProperty::Derived
|
295
|
-
|
294
|
+
node << Derived.new(token, active_opts)
|
296
295
|
|
297
296
|
when *Token::UnicodeProperty::Emoji
|
298
|
-
|
297
|
+
node << Emoji.new(token, active_opts)
|
299
298
|
|
300
299
|
when *Token::UnicodeProperty::Script
|
301
|
-
|
300
|
+
node << Script.new(token, active_opts)
|
302
301
|
|
303
302
|
when *Token::UnicodeProperty::UnicodeBlock
|
304
|
-
|
303
|
+
node << Block.new(token, active_opts)
|
305
304
|
|
306
305
|
else
|
307
306
|
raise UnknownTokenError.new('UnicodeProperty', token)
|
@@ -311,21 +310,21 @@ class Regexp::Parser
|
|
311
310
|
def anchor(token)
|
312
311
|
case token.token
|
313
312
|
when :bol
|
314
|
-
|
313
|
+
node << Anchor::BeginningOfLine.new(token, active_opts)
|
315
314
|
when :eol
|
316
|
-
|
315
|
+
node << Anchor::EndOfLine.new(token, active_opts)
|
317
316
|
when :bos
|
318
|
-
|
317
|
+
node << Anchor::BOS.new(token, active_opts)
|
319
318
|
when :eos
|
320
|
-
|
319
|
+
node << Anchor::EOS.new(token, active_opts)
|
321
320
|
when :eos_ob_eol
|
322
|
-
|
321
|
+
node << Anchor::EOSobEOL.new(token, active_opts)
|
323
322
|
when :word_boundary
|
324
|
-
|
323
|
+
node << Anchor::WordBoundary.new(token, active_opts)
|
325
324
|
when :nonword_boundary
|
326
|
-
|
325
|
+
node << Anchor::NonWordBoundary.new(token, active_opts)
|
327
326
|
when :match_start
|
328
|
-
|
327
|
+
node << Anchor::MatchStart.new(token, active_opts)
|
329
328
|
else
|
330
329
|
raise UnknownTokenError.new('Anchor', token)
|
331
330
|
end
|
@@ -335,58 +334,58 @@ class Regexp::Parser
|
|
335
334
|
case token.token
|
336
335
|
|
337
336
|
when :backspace
|
338
|
-
|
337
|
+
node << EscapeSequence::Backspace.new(token, active_opts)
|
339
338
|
|
340
339
|
when :escape
|
341
|
-
|
340
|
+
node << EscapeSequence::AsciiEscape.new(token, active_opts)
|
342
341
|
when :bell
|
343
|
-
|
342
|
+
node << EscapeSequence::Bell.new(token, active_opts)
|
344
343
|
when :form_feed
|
345
|
-
|
344
|
+
node << EscapeSequence::FormFeed.new(token, active_opts)
|
346
345
|
when :newline
|
347
|
-
|
346
|
+
node << EscapeSequence::Newline.new(token, active_opts)
|
348
347
|
when :carriage
|
349
|
-
|
348
|
+
node << EscapeSequence::Return.new(token, active_opts)
|
350
349
|
when :space
|
351
|
-
|
350
|
+
node << EscapeSequence::Space.new(token, active_opts)
|
352
351
|
when :tab
|
353
|
-
|
352
|
+
node << EscapeSequence::Tab.new(token, active_opts)
|
354
353
|
when :vertical_tab
|
355
|
-
|
354
|
+
node << EscapeSequence::VerticalTab.new(token, active_opts)
|
356
355
|
|
357
356
|
when :control
|
358
357
|
if token.text =~ /\A(?:\\C-\\M|\\c\\M)/
|
359
|
-
|
358
|
+
node << EscapeSequence::MetaControl.new(token, active_opts)
|
360
359
|
else
|
361
|
-
|
360
|
+
node << EscapeSequence::Control.new(token, active_opts)
|
362
361
|
end
|
363
362
|
|
364
363
|
when :meta_sequence
|
365
364
|
if token.text =~ /\A\\M-\\[Cc]/
|
366
|
-
|
365
|
+
node << EscapeSequence::MetaControl.new(token, active_opts)
|
367
366
|
else
|
368
|
-
|
367
|
+
node << EscapeSequence::Meta.new(token, active_opts)
|
369
368
|
end
|
370
369
|
|
371
370
|
else
|
372
371
|
# treating everything else as a literal
|
373
|
-
|
372
|
+
node << EscapeSequence::Literal.new(token, active_opts)
|
374
373
|
end
|
375
374
|
end
|
376
375
|
|
377
376
|
def keep(token)
|
378
|
-
|
377
|
+
node << Keep::Mark.new(token, active_opts)
|
379
378
|
end
|
380
379
|
|
381
380
|
def free_space(token)
|
382
381
|
case token.token
|
383
382
|
when :comment
|
384
|
-
|
383
|
+
node << Comment.new(token, active_opts)
|
385
384
|
when :whitespace
|
386
|
-
if
|
387
|
-
|
385
|
+
if node.last.is_a?(WhiteSpace)
|
386
|
+
node.last.merge(WhiteSpace.new(token, active_opts))
|
388
387
|
else
|
389
|
-
|
388
|
+
node << WhiteSpace.new(token, active_opts)
|
390
389
|
end
|
391
390
|
else
|
392
391
|
raise UnknownTokenError.new('FreeSpace', token)
|
@@ -395,13 +394,13 @@ class Regexp::Parser
|
|
395
394
|
|
396
395
|
def quantifier(token)
|
397
396
|
offset = -1
|
398
|
-
target_node =
|
399
|
-
while target_node
|
400
|
-
target_node =
|
397
|
+
target_node = node.expressions[offset]
|
398
|
+
while target_node.is_a?(FreeSpace)
|
399
|
+
target_node = node.expressions[offset -= 1]
|
401
400
|
end
|
402
401
|
|
403
|
-
raise
|
404
|
-
|
402
|
+
target_node || raise(ArgumentError, 'No valid target found for '\
|
403
|
+
"'#{token.text}' ")
|
405
404
|
|
406
405
|
case token.token
|
407
406
|
when :zero_or_one
|
@@ -462,7 +461,7 @@ class Regexp::Parser
|
|
462
461
|
when :close
|
463
462
|
close_group
|
464
463
|
when :comment
|
465
|
-
|
464
|
+
node << Group::Comment.new(token, active_opts)
|
466
465
|
else
|
467
466
|
open_group(token)
|
468
467
|
end
|
@@ -471,7 +470,7 @@ class Regexp::Parser
|
|
471
470
|
def options_group(token)
|
472
471
|
positive, negative = token.text.split('-', 2)
|
473
472
|
negative ||= ''
|
474
|
-
|
473
|
+
self.switching_options = !token.text.include?(':')
|
475
474
|
# TODO: change this -^ to token.type == :options_switch in v1.0.0
|
476
475
|
|
477
476
|
new_options = active_opts.dup
|
@@ -490,7 +489,7 @@ class Regexp::Parser
|
|
490
489
|
new_options[flag.to_sym] = true
|
491
490
|
end
|
492
491
|
|
493
|
-
|
492
|
+
options_stack << new_options
|
494
493
|
|
495
494
|
exp = Group::Options.new(token, active_opts)
|
496
495
|
|
@@ -525,43 +524,44 @@ class Regexp::Parser
|
|
525
524
|
|
526
525
|
# Push the active options to the stack again. This way we can simply pop the
|
527
526
|
# stack for any group we close, no matter if it had its own options or not.
|
528
|
-
|
527
|
+
options_stack << active_opts
|
529
528
|
|
530
529
|
nest(exp)
|
531
530
|
end
|
532
531
|
|
533
532
|
def close_group
|
534
|
-
|
535
|
-
|
536
|
-
|
533
|
+
nesting.pop
|
534
|
+
options_stack.pop unless switching_options
|
535
|
+
self.switching_options = false
|
537
536
|
|
538
|
-
|
539
|
-
|
537
|
+
self.node = nesting.last
|
538
|
+
self.node = node.last if node.last and node.last.is_a?(Alternation)
|
540
539
|
end
|
541
540
|
|
542
541
|
def open_set(token)
|
543
542
|
token.token = :character
|
544
543
|
|
545
544
|
if token.type == :subset
|
546
|
-
|
545
|
+
current_set << CharacterSubSet.new(token, active_opts)
|
547
546
|
else
|
548
|
-
|
547
|
+
self.current_set = CharacterSet.new(token, active_opts)
|
548
|
+
node << current_set
|
549
549
|
end
|
550
550
|
end
|
551
551
|
|
552
552
|
def negate_set
|
553
|
-
|
553
|
+
current_set.negate
|
554
554
|
end
|
555
555
|
|
556
556
|
def append_set(token)
|
557
|
-
|
557
|
+
current_set << token.text
|
558
558
|
end
|
559
559
|
|
560
560
|
def close_set(token)
|
561
|
-
|
561
|
+
current_set.close
|
562
562
|
end
|
563
563
|
|
564
564
|
def active_opts
|
565
|
-
|
565
|
+
options_stack.last
|
566
566
|
end
|
567
567
|
end # module Regexp::Parser
|