regexp_parser 0.4.13 → 0.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/ChangeLog +15 -0
- data/lib/regexp_parser/expression.rb +43 -51
- data/lib/regexp_parser/expression/classes/alternation.rb +6 -7
- data/lib/regexp_parser/expression/classes/character_class.rb +11 -0
- data/lib/regexp_parser/expression/classes/conditional.rb +10 -18
- data/lib/regexp_parser/expression/classes/free_space.rb +1 -1
- data/lib/regexp_parser/expression/classes/group.rb +9 -21
- data/lib/regexp_parser/expression/classes/property.rb +2 -2
- data/lib/regexp_parser/expression/classes/set.rb +1 -12
- data/lib/regexp_parser/expression/methods/traverse.rb +1 -1
- data/lib/regexp_parser/expression/quantifier.rb +9 -9
- data/lib/regexp_parser/expression/sequence.rb +5 -4
- data/lib/regexp_parser/expression/subexpression.rb +16 -59
- data/lib/regexp_parser/lexer.rb +31 -27
- data/lib/regexp_parser/parser.rb +179 -179
- data/lib/regexp_parser/scanner.rb +172 -166
- data/lib/regexp_parser/scanner/scanner.rl +44 -38
- data/lib/regexp_parser/syntax.rb +2 -53
- data/lib/regexp_parser/syntax/base.rb +13 -24
- data/lib/regexp_parser/syntax/tokens/character_class.rb +16 -0
- data/lib/regexp_parser/syntax/tokens/unicode_property.rb +26 -26
- data/lib/regexp_parser/syntax/version_lookup.rb +82 -0
- data/lib/regexp_parser/syntax/versions.rb +1 -5
- data/lib/regexp_parser/syntax/versions/1.8.6.rb +30 -0
- data/lib/regexp_parser/syntax/versions/1.9.1.rb +36 -0
- data/lib/regexp_parser/syntax/versions/1.9.3.rb +11 -0
- data/lib/regexp_parser/syntax/versions/2.0.0.rb +20 -0
- data/lib/regexp_parser/syntax/versions/2.2.0.rb +10 -0
- data/lib/regexp_parser/syntax/versions/2.3.0.rb +10 -0
- data/lib/regexp_parser/syntax/versions/2.4.0.rb +10 -0
- data/lib/regexp_parser/syntax/versions/2.4.1.rb +9 -0
- data/lib/regexp_parser/syntax/versions/2.5.0.rb +10 -0
- data/lib/regexp_parser/token.rb +6 -29
- data/lib/regexp_parser/version.rb +1 -1
- data/test/expression/test_strfregexp.rb +7 -0
- data/test/expression/test_to_h.rb +6 -0
- data/test/parser/test_properties.rb +12 -4
- data/test/support/warning_extractor.rb +3 -1
- data/test/syntax/test_all.rb +1 -1
- data/test/syntax/test_syntax.rb +5 -9
- data/test/syntax/{ruby → versions}/test_1.8.rb +14 -14
- data/test/syntax/{ruby → versions}/test_1.9.1.rb +7 -8
- data/test/syntax/{ruby → versions}/test_1.9.3.rb +7 -7
- data/test/syntax/versions/test_2.0.0.rb +37 -0
- data/test/syntax/{ruby → versions}/test_2.2.0.rb +7 -7
- data/test/syntax/versions/test_aliases.rb +129 -0
- data/test/syntax/{ruby → versions}/test_all.rb +1 -1
- metadata +73 -113
- data/lib/regexp_parser/syntax/ruby/1.8.6.rb +0 -37
- data/lib/regexp_parser/syntax/ruby/1.8.7.rb +0 -14
- data/lib/regexp_parser/syntax/ruby/1.8.rb +0 -13
- data/lib/regexp_parser/syntax/ruby/1.9.1.rb +0 -45
- data/lib/regexp_parser/syntax/ruby/1.9.2.rb +0 -9
- data/lib/regexp_parser/syntax/ruby/1.9.3.rb +0 -19
- data/lib/regexp_parser/syntax/ruby/1.9.rb +0 -8
- data/lib/regexp_parser/syntax/ruby/2.0.0.rb +0 -23
- data/lib/regexp_parser/syntax/ruby/2.0.rb +0 -8
- data/lib/regexp_parser/syntax/ruby/2.1.0.rb +0 -13
- data/lib/regexp_parser/syntax/ruby/2.1.10.rb +0 -13
- data/lib/regexp_parser/syntax/ruby/2.1.2.rb +0 -13
- data/lib/regexp_parser/syntax/ruby/2.1.3.rb +0 -13
- data/lib/regexp_parser/syntax/ruby/2.1.4.rb +0 -13
- data/lib/regexp_parser/syntax/ruby/2.1.5.rb +0 -13
- data/lib/regexp_parser/syntax/ruby/2.1.6.rb +0 -13
- data/lib/regexp_parser/syntax/ruby/2.1.7.rb +0 -13
- data/lib/regexp_parser/syntax/ruby/2.1.8.rb +0 -13
- data/lib/regexp_parser/syntax/ruby/2.1.9.rb +0 -13
- data/lib/regexp_parser/syntax/ruby/2.1.rb +0 -8
- data/lib/regexp_parser/syntax/ruby/2.2.0.rb +0 -16
- data/lib/regexp_parser/syntax/ruby/2.2.1.rb +0 -13
- data/lib/regexp_parser/syntax/ruby/2.2.10.rb +0 -13
- data/lib/regexp_parser/syntax/ruby/2.2.2.rb +0 -13
- data/lib/regexp_parser/syntax/ruby/2.2.3.rb +0 -13
- data/lib/regexp_parser/syntax/ruby/2.2.4.rb +0 -13
- data/lib/regexp_parser/syntax/ruby/2.2.5.rb +0 -13
- data/lib/regexp_parser/syntax/ruby/2.2.6.rb +0 -13
- data/lib/regexp_parser/syntax/ruby/2.2.7.rb +0 -13
- data/lib/regexp_parser/syntax/ruby/2.2.8.rb +0 -13
- data/lib/regexp_parser/syntax/ruby/2.2.9.rb +0 -13
- data/lib/regexp_parser/syntax/ruby/2.2.rb +0 -8
- data/lib/regexp_parser/syntax/ruby/2.3.0.rb +0 -16
- data/lib/regexp_parser/syntax/ruby/2.3.1.rb +0 -13
- data/lib/regexp_parser/syntax/ruby/2.3.2.rb +0 -13
- data/lib/regexp_parser/syntax/ruby/2.3.3.rb +0 -13
- data/lib/regexp_parser/syntax/ruby/2.3.4.rb +0 -13
- data/lib/regexp_parser/syntax/ruby/2.3.5.rb +0 -13
- data/lib/regexp_parser/syntax/ruby/2.3.6.rb +0 -13
- data/lib/regexp_parser/syntax/ruby/2.3.7.rb +0 -13
- data/lib/regexp_parser/syntax/ruby/2.3.rb +0 -8
- data/lib/regexp_parser/syntax/ruby/2.4.0.rb +0 -16
- data/lib/regexp_parser/syntax/ruby/2.4.1.rb +0 -15
- data/lib/regexp_parser/syntax/ruby/2.4.2.rb +0 -13
- data/lib/regexp_parser/syntax/ruby/2.4.3.rb +0 -13
- data/lib/regexp_parser/syntax/ruby/2.4.4.rb +0 -13
- data/lib/regexp_parser/syntax/ruby/2.4.rb +0 -8
- data/lib/regexp_parser/syntax/ruby/2.5.0.rb +0 -16
- data/lib/regexp_parser/syntax/ruby/2.5.1.rb +0 -13
- data/lib/regexp_parser/syntax/ruby/2.5.rb +0 -8
- data/lib/regexp_parser/syntax/ruby/2.6.0.rb +0 -13
- data/lib/regexp_parser/syntax/ruby/2.6.rb +0 -8
- data/test/syntax/ruby/test_2.0.0.rb +0 -32
- data/test/syntax/ruby/test_files.rb +0 -353
@@ -25,18 +25,19 @@ module Regexp::Expression
|
|
25
25
|
end
|
26
26
|
|
27
27
|
def starts_at
|
28
|
-
|
28
|
+
expressions.first.starts_at
|
29
29
|
end
|
30
|
+
alias :ts :starts_at
|
30
31
|
|
31
32
|
def quantify(token, text, min = nil, max = nil, mode = :greedy)
|
32
33
|
offset = -1
|
33
34
|
target = expressions[offset]
|
34
|
-
while target
|
35
|
+
while target.is_a?(FreeSpace)
|
35
36
|
target = expressions[offset -= 1]
|
36
37
|
end
|
37
38
|
|
38
|
-
raise
|
39
|
-
|
39
|
+
target || raise(ArgumentError, "No valid target found for '#{text}' "\
|
40
|
+
'quantifier')
|
40
41
|
|
41
42
|
target.quantify(token, text, min, max, mode)
|
42
43
|
end
|
@@ -6,63 +6,31 @@ module Regexp::Expression
|
|
6
6
|
def initialize(token, options = {})
|
7
7
|
super
|
8
8
|
|
9
|
-
|
9
|
+
self.expressions = []
|
10
10
|
end
|
11
11
|
|
12
12
|
# Override base method to clone the expressions as well.
|
13
13
|
def clone
|
14
14
|
copy = super
|
15
|
-
copy.expressions =
|
15
|
+
copy.expressions = expressions.map(&:clone)
|
16
16
|
copy
|
17
17
|
end
|
18
18
|
|
19
19
|
def <<(exp)
|
20
|
-
if exp.is_a?(WhiteSpace)
|
21
|
-
|
22
|
-
@expressions.last.merge(exp)
|
20
|
+
if exp.is_a?(WhiteSpace) && last && last.is_a?(WhiteSpace)
|
21
|
+
last.merge(exp)
|
23
22
|
else
|
24
|
-
|
23
|
+
expressions << exp
|
25
24
|
end
|
26
25
|
end
|
27
26
|
|
28
27
|
def insert(exp)
|
29
|
-
|
28
|
+
expressions.insert(0, exp)
|
30
29
|
end
|
31
30
|
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
def each_with_index(&block)
|
37
|
-
@expressions.each_with_index {|e, i| yield e, i}
|
38
|
-
end
|
39
|
-
|
40
|
-
def first
|
41
|
-
@expressions.first
|
42
|
-
end
|
43
|
-
|
44
|
-
def last
|
45
|
-
@expressions.last
|
46
|
-
end
|
47
|
-
|
48
|
-
def [](index)
|
49
|
-
@expressions[index]
|
50
|
-
end
|
51
|
-
|
52
|
-
def length
|
53
|
-
@expressions.length
|
54
|
-
end
|
55
|
-
|
56
|
-
def empty?
|
57
|
-
@expressions.empty?
|
58
|
-
end
|
59
|
-
|
60
|
-
def all?(&block)
|
61
|
-
@expressions.all? {|exp| yield(exp) }
|
62
|
-
end
|
63
|
-
|
64
|
-
def ts
|
65
|
-
starts_at
|
31
|
+
%w[[] all? any? at count each each_with_index empty?
|
32
|
+
fetch find first index join last length values_at].each do |m|
|
33
|
+
define_method(m) { |*args, &block| expressions.send(m, *args, &block) }
|
66
34
|
end
|
67
35
|
|
68
36
|
def te
|
@@ -70,28 +38,17 @@ module Regexp::Expression
|
|
70
38
|
end
|
71
39
|
|
72
40
|
def to_s(format = :full)
|
73
|
-
s = ''
|
74
|
-
|
75
41
|
# Note: the format does not get passed down to subexpressions.
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
s << @expressions.map{|e| e.to_s}.join unless @expressions.empty?
|
80
|
-
else
|
81
|
-
s << @text.dup
|
82
|
-
s << @expressions.map{|e| e.to_s}.join unless @expressions.empty?
|
83
|
-
s << @quantifier if quantified?
|
84
|
-
end
|
85
|
-
|
86
|
-
s
|
42
|
+
# Note: cant use #text accessor, b/c it is overriden as def text; to_s end
|
43
|
+
# in Expression::Sequence, causing infinite recursion. Clean-up needed.
|
44
|
+
"#{@text}#{expressions.join}#{quantifier_affix(format)}"
|
87
45
|
end
|
88
46
|
|
89
47
|
def to_h
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
48
|
+
super.merge({
|
49
|
+
text: to_s(:base),
|
50
|
+
expressions: expressions.map(&:to_h)
|
51
|
+
})
|
94
52
|
end
|
95
53
|
end
|
96
|
-
|
97
54
|
end
|
data/lib/regexp_parser/lexer.rb
CHANGED
@@ -17,8 +17,10 @@ class Regexp::Lexer
|
|
17
17
|
def lex(input, syntax = "ruby/#{RUBY_VERSION}", &block)
|
18
18
|
syntax = Regexp::Syntax.new(syntax)
|
19
19
|
|
20
|
-
|
21
|
-
|
20
|
+
self.tokens = []
|
21
|
+
self.nesting = 0
|
22
|
+
self.set_nesting = 0
|
23
|
+
self.conditional_nesting = 0
|
22
24
|
|
23
25
|
last = nil
|
24
26
|
Regexp::Scanner.scan(input) do |type, token, text, ts, te|
|
@@ -31,7 +33,7 @@ class Regexp::Lexer
|
|
31
33
|
last and last.type == :literal
|
32
34
|
|
33
35
|
current = Regexp::Token.new(type, token, text, ts, te,
|
34
|
-
|
36
|
+
nesting, set_nesting, conditional_nesting)
|
35
37
|
|
36
38
|
current = merge_literal(current) if type == :literal and
|
37
39
|
last and last.type == :literal
|
@@ -39,19 +41,19 @@ class Regexp::Lexer
|
|
39
41
|
current = merge_condition(current) if type == :conditional and
|
40
42
|
[:condition, :condition_close].include?(token)
|
41
43
|
|
42
|
-
last.next
|
43
|
-
current.previous
|
44
|
+
last.next = current if last
|
45
|
+
current.previous = last if last
|
44
46
|
|
45
|
-
|
47
|
+
tokens << current
|
46
48
|
last = current
|
47
49
|
|
48
50
|
descend(type, token)
|
49
51
|
end
|
50
52
|
|
51
53
|
if block_given?
|
52
|
-
|
54
|
+
tokens.map { |t| block.call(t) }
|
53
55
|
else
|
54
|
-
|
56
|
+
tokens
|
55
57
|
end
|
56
58
|
end
|
57
59
|
|
@@ -59,33 +61,35 @@ class Regexp::Lexer
|
|
59
61
|
alias :scan :lex
|
60
62
|
end
|
61
63
|
|
62
|
-
|
64
|
+
private
|
65
|
+
|
66
|
+
attr_accessor :tokens, :nesting, :set_nesting, :conditional_nesting
|
63
67
|
|
64
68
|
def ascend(type, token)
|
65
69
|
if type == :group or type == :assertion
|
66
|
-
|
70
|
+
self.nesting = nesting - 1 if CLOSING_TOKENS.include?(token)
|
67
71
|
end
|
68
72
|
|
69
73
|
if type == :set or type == :subset
|
70
|
-
|
74
|
+
self.set_nesting = set_nesting - 1 if token == :close
|
71
75
|
end
|
72
76
|
|
73
77
|
if type == :conditional
|
74
|
-
|
78
|
+
self.conditional_nesting = conditional_nesting - 1 if token == :close
|
75
79
|
end
|
76
80
|
end
|
77
81
|
|
78
82
|
def descend(type, token)
|
79
83
|
if type == :group or type == :assertion
|
80
|
-
|
84
|
+
self.nesting = nesting + 1 if OPENING_TOKENS.include?(token)
|
81
85
|
end
|
82
86
|
|
83
87
|
if type == :set or type == :subset
|
84
|
-
|
88
|
+
self.set_nesting = set_nesting + 1 if token == :open
|
85
89
|
end
|
86
90
|
|
87
91
|
if type == :conditional
|
88
|
-
|
92
|
+
self.conditional_nesting = conditional_nesting + 1 if token == :open
|
89
93
|
end
|
90
94
|
end
|
91
95
|
|
@@ -105,20 +109,20 @@ class Regexp::Lexer
|
|
105
109
|
last_length = last.length
|
106
110
|
end
|
107
111
|
|
108
|
-
|
109
|
-
|
110
|
-
|
112
|
+
tokens.pop
|
113
|
+
tokens << Regexp::Token.new(:literal, :literal, lead, token.ts,
|
114
|
+
(token.te - last_length), nesting, set_nesting, conditional_nesting)
|
111
115
|
|
112
|
-
|
113
|
-
|
114
|
-
|
116
|
+
tokens << Regexp::Token.new(:literal, :literal, last,
|
117
|
+
(token.ts + lead_length),
|
118
|
+
token.te, nesting, set_nesting, conditional_nesting)
|
115
119
|
end
|
116
120
|
end
|
117
121
|
|
118
122
|
# called by scan to merge two consecutive literals. this happens when tokens
|
119
123
|
# get normalized (as in the case of posix/bre) and end up becoming literals.
|
120
124
|
def merge_literal(current)
|
121
|
-
last =
|
125
|
+
last = tokens.pop
|
122
126
|
|
123
127
|
Regexp::Token.new(
|
124
128
|
:literal,
|
@@ -126,16 +130,16 @@ class Regexp::Lexer
|
|
126
130
|
last.text + current.text,
|
127
131
|
last.ts,
|
128
132
|
current.te,
|
129
|
-
|
130
|
-
|
131
|
-
|
133
|
+
nesting,
|
134
|
+
set_nesting,
|
135
|
+
conditional_nesting,
|
132
136
|
)
|
133
137
|
end
|
134
138
|
|
135
139
|
def merge_condition(current)
|
136
|
-
last =
|
140
|
+
last = tokens.pop
|
137
141
|
Regexp::Token.new(:conditional, :condition, last.text + current.text,
|
138
|
-
last.ts, current.te,
|
142
|
+
last.ts, current.te, nesting, set_nesting, conditional_nesting)
|
139
143
|
end
|
140
144
|
|
141
145
|
end # module Regexp::Lexer
|
data/lib/regexp_parser/parser.rb
CHANGED
@@ -23,25 +23,33 @@ class Regexp::Parser
|
|
23
23
|
end
|
24
24
|
|
25
25
|
def parse(input, syntax = "ruby/#{RUBY_VERSION}", &block)
|
26
|
-
|
26
|
+
root = Root.new(options_from_input(input))
|
27
27
|
|
28
|
-
|
29
|
-
|
30
|
-
|
28
|
+
self.root = root
|
29
|
+
self.node = root
|
30
|
+
self.nesting = [root]
|
31
|
+
|
32
|
+
self.options_stack = [root.options]
|
33
|
+
self.switching_options = false
|
34
|
+
self.conditional_nesting = []
|
31
35
|
|
32
36
|
Regexp::Lexer.scan(input, syntax) do |token|
|
33
|
-
parse_token
|
37
|
+
parse_token(token)
|
34
38
|
end
|
35
39
|
|
36
40
|
if block_given?
|
37
|
-
block.call
|
41
|
+
block.call(root)
|
38
42
|
else
|
39
|
-
|
43
|
+
root
|
40
44
|
end
|
41
45
|
end
|
42
46
|
|
43
47
|
private
|
44
48
|
|
49
|
+
attr_accessor :root, :node, :nesting,
|
50
|
+
:options_stack, :switching_options, :conditional_nesting,
|
51
|
+
:current_set
|
52
|
+
|
45
53
|
def options_from_input(input)
|
46
54
|
return {} unless input.is_a?(::Regexp)
|
47
55
|
|
@@ -53,17 +61,15 @@ class Regexp::Parser
|
|
53
61
|
end
|
54
62
|
|
55
63
|
def nest(exp)
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
@node = exp
|
64
|
+
nesting.push(exp)
|
65
|
+
node << exp
|
66
|
+
self.node = exp
|
60
67
|
end
|
61
68
|
|
62
69
|
def nest_conditional(exp)
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
@node = exp
|
70
|
+
conditional_nesting.push(exp)
|
71
|
+
node << exp
|
72
|
+
self.node = exp
|
67
73
|
end
|
68
74
|
|
69
75
|
def parse_token(token)
|
@@ -84,7 +90,7 @@ class Regexp::Parser
|
|
84
90
|
property(token)
|
85
91
|
|
86
92
|
when :literal
|
87
|
-
|
93
|
+
node << Literal.new(token, active_opts)
|
88
94
|
when :free_space
|
89
95
|
free_space(token)
|
90
96
|
|
@@ -117,28 +123,21 @@ class Regexp::Parser
|
|
117
123
|
def meta(token)
|
118
124
|
case token.token
|
119
125
|
when :dot
|
120
|
-
|
126
|
+
node << CharacterType::Any.new(token, active_opts)
|
121
127
|
when :alternation
|
122
|
-
|
123
|
-
|
124
|
-
|
125
|
-
seq = Alternative.new(alt.level, alt.set_level, alt.conditional_level)
|
126
|
-
|
127
|
-
while @node.expressions.last
|
128
|
-
seq.insert @node.expressions.pop
|
129
|
-
end
|
130
|
-
alt.alternative(seq)
|
131
|
-
|
132
|
-
@node << alt
|
133
|
-
@node = alt
|
134
|
-
@node.alternative
|
135
|
-
else
|
136
|
-
@node = @node.last
|
137
|
-
@node.alternative
|
138
|
-
end
|
128
|
+
if node.token == :alternation
|
129
|
+
elsif node.last.is_a?(Alternation)
|
130
|
+
self.node = node.last
|
139
131
|
else
|
140
|
-
|
132
|
+
alt = Alternation.new(token, active_opts)
|
133
|
+
seq = Alternative.new(alt.level, alt.set_level, alt.conditional_level)
|
134
|
+
node.expressions.count.times { seq.insert(node.expressions.pop) }
|
135
|
+
alt.alternative(seq)
|
136
|
+
|
137
|
+
node << alt
|
138
|
+
self.node = alt
|
141
139
|
end
|
140
|
+
node.alternative
|
142
141
|
else
|
143
142
|
raise UnknownTokenError.new('Meta', token)
|
144
143
|
end
|
@@ -147,21 +146,21 @@ class Regexp::Parser
|
|
147
146
|
def backref(token)
|
148
147
|
case token.token
|
149
148
|
when :name_ref
|
150
|
-
|
149
|
+
node << Backreference::Name.new(token, active_opts)
|
151
150
|
when :name_nest_ref
|
152
|
-
|
151
|
+
node << Backreference::NameNestLevel.new(token, active_opts)
|
153
152
|
when :name_call
|
154
|
-
|
153
|
+
node << Backreference::NameCall.new(token, active_opts)
|
155
154
|
when :number, :number_ref
|
156
|
-
|
155
|
+
node << Backreference::Number.new(token, active_opts)
|
157
156
|
when :number_rel_ref
|
158
|
-
|
157
|
+
node << Backreference::NumberRelative.new(token, active_opts)
|
159
158
|
when :number_nest_ref
|
160
|
-
|
159
|
+
node << Backreference::NumberNestLevel.new(token, active_opts)
|
161
160
|
when :number_call
|
162
|
-
|
161
|
+
node << Backreference::NumberCall.new(token, active_opts)
|
163
162
|
when :number_rel_call
|
164
|
-
|
163
|
+
node << Backreference::NumberCallRelative.new(token, active_opts)
|
165
164
|
else
|
166
165
|
raise UnknownTokenError.new('Backreference', token)
|
167
166
|
end
|
@@ -170,25 +169,25 @@ class Regexp::Parser
|
|
170
169
|
def type(token)
|
171
170
|
case token.token
|
172
171
|
when :digit
|
173
|
-
|
172
|
+
node << CharacterType::Digit.new(token, active_opts)
|
174
173
|
when :nondigit
|
175
|
-
|
174
|
+
node << CharacterType::NonDigit.new(token, active_opts)
|
176
175
|
when :hex
|
177
|
-
|
176
|
+
node << CharacterType::Hex.new(token, active_opts)
|
178
177
|
when :nonhex
|
179
|
-
|
178
|
+
node << CharacterType::NonHex.new(token, active_opts)
|
180
179
|
when :space
|
181
|
-
|
180
|
+
node << CharacterType::Space.new(token, active_opts)
|
182
181
|
when :nonspace
|
183
|
-
|
182
|
+
node << CharacterType::NonSpace.new(token, active_opts)
|
184
183
|
when :word
|
185
|
-
|
184
|
+
node << CharacterType::Word.new(token, active_opts)
|
186
185
|
when :nonword
|
187
|
-
|
186
|
+
node << CharacterType::NonWord.new(token, active_opts)
|
188
187
|
when :linebreak
|
189
|
-
|
188
|
+
node << CharacterType::Linebreak.new(token, active_opts)
|
190
189
|
when :xgrapheme
|
191
|
-
|
190
|
+
node << CharacterType::ExtendedGrapheme.new(token, active_opts)
|
192
191
|
else
|
193
192
|
raise UnknownTokenError.new('CharacterType', token)
|
194
193
|
end
|
@@ -199,20 +198,20 @@ class Regexp::Parser
|
|
199
198
|
when :open
|
200
199
|
nest_conditional(Conditional::Expression.new(token, active_opts))
|
201
200
|
when :condition
|
202
|
-
|
203
|
-
|
201
|
+
conditional_nesting.last.condition = Conditional::Condition.new(token, active_opts)
|
202
|
+
conditional_nesting.last.branch
|
204
203
|
when :separator
|
205
|
-
|
206
|
-
|
204
|
+
conditional_nesting.last.branch
|
205
|
+
self.node = conditional_nesting.last.branches.last
|
207
206
|
when :close
|
208
|
-
|
209
|
-
|
210
|
-
@node = if @conditional_nesting.empty?
|
211
|
-
@nesting.last
|
212
|
-
else
|
213
|
-
@conditional_nesting.last
|
214
|
-
end
|
207
|
+
conditional_nesting.pop
|
215
208
|
|
209
|
+
self.node =
|
210
|
+
if conditional_nesting.empty?
|
211
|
+
nesting.last
|
212
|
+
else
|
213
|
+
conditional_nesting.last
|
214
|
+
end
|
216
215
|
else
|
217
216
|
raise UnknownTokenError.new('Conditional', token)
|
218
217
|
end
|
@@ -222,86 +221,86 @@ class Regexp::Parser
|
|
222
221
|
|
223
222
|
def property(token)
|
224
223
|
case token.token
|
225
|
-
when :alnum;
|
226
|
-
when :alpha;
|
227
|
-
when :ascii;
|
228
|
-
when :blank;
|
229
|
-
when :cntrl;
|
230
|
-
when :digit;
|
231
|
-
when :graph;
|
232
|
-
when :lower;
|
233
|
-
when :print;
|
234
|
-
when :punct;
|
235
|
-
when :space;
|
236
|
-
when :upper;
|
237
|
-
when :word;
|
238
|
-
when :xdigit;
|
239
|
-
when :xposixpunct;
|
224
|
+
when :alnum; node << Alnum.new(token, active_opts)
|
225
|
+
when :alpha; node << Alpha.new(token, active_opts)
|
226
|
+
when :ascii; node << Ascii.new(token, active_opts)
|
227
|
+
when :blank; node << Blank.new(token, active_opts)
|
228
|
+
when :cntrl; node << Cntrl.new(token, active_opts)
|
229
|
+
when :digit; node << Digit.new(token, active_opts)
|
230
|
+
when :graph; node << Graph.new(token, active_opts)
|
231
|
+
when :lower; node << Lower.new(token, active_opts)
|
232
|
+
when :print; node << Print.new(token, active_opts)
|
233
|
+
when :punct; node << Punct.new(token, active_opts)
|
234
|
+
when :space; node << Space.new(token, active_opts)
|
235
|
+
when :upper; node << Upper.new(token, active_opts)
|
236
|
+
when :word; node << Word.new(token, active_opts)
|
237
|
+
when :xdigit; node << Xdigit.new(token, active_opts)
|
238
|
+
when :xposixpunct; node << XPosixPunct.new(token, active_opts)
|
240
239
|
|
241
240
|
# only in Oniguruma (old rubies)
|
242
|
-
when :newline;
|
243
|
-
|
244
|
-
when :any;
|
245
|
-
when :assigned;
|
246
|
-
|
247
|
-
when :letter_any;
|
248
|
-
when :letter_uppercase;
|
249
|
-
when :letter_lowercase;
|
250
|
-
when :letter_titlecase;
|
251
|
-
when :letter_modifier;
|
252
|
-
when :letter_other;
|
253
|
-
|
254
|
-
when :mark_any;
|
255
|
-
when :mark_nonspacing;
|
256
|
-
when :mark_spacing;
|
257
|
-
when :mark_enclosing;
|
258
|
-
|
259
|
-
when :number_any;
|
260
|
-
when :number_decimal;
|
261
|
-
when :number_letter;
|
262
|
-
when :number_other;
|
263
|
-
|
264
|
-
when :punct_any;
|
265
|
-
when :punct_connector;
|
266
|
-
when :punct_dash;
|
267
|
-
when :punct_open;
|
268
|
-
when :punct_close;
|
269
|
-
when :punct_initial;
|
270
|
-
when :punct_final;
|
271
|
-
when :punct_other;
|
272
|
-
|
273
|
-
when :separator_any;
|
274
|
-
when :separator_space;
|
275
|
-
when :separator_line;
|
276
|
-
when :separator_para;
|
277
|
-
|
278
|
-
when :symbol_any;
|
279
|
-
when :symbol_math;
|
280
|
-
when :symbol_currency;
|
281
|
-
when :symbol_modifier;
|
282
|
-
when :symbol_other;
|
283
|
-
|
284
|
-
when :other;
|
285
|
-
when :control;
|
286
|
-
when :format;
|
287
|
-
when :surrogate;
|
288
|
-
when :private_use;
|
289
|
-
when :unassigned;
|
241
|
+
when :newline; node << Newline.new(token, active_opts)
|
242
|
+
|
243
|
+
when :any; node << Any.new(token, active_opts)
|
244
|
+
when :assigned; node << Assigned.new(token, active_opts)
|
245
|
+
|
246
|
+
when :letter_any; node << Letter::Any.new(token, active_opts)
|
247
|
+
when :letter_uppercase; node << Letter::Uppercase.new(token, active_opts)
|
248
|
+
when :letter_lowercase; node << Letter::Lowercase.new(token, active_opts)
|
249
|
+
when :letter_titlecase; node << Letter::Titlecase.new(token, active_opts)
|
250
|
+
when :letter_modifier; node << Letter::Modifier.new(token, active_opts)
|
251
|
+
when :letter_other; node << Letter::Other.new(token, active_opts)
|
252
|
+
|
253
|
+
when :mark_any; node << Mark::Any.new(token, active_opts)
|
254
|
+
when :mark_nonspacing; node << Mark::Nonspacing.new(token, active_opts)
|
255
|
+
when :mark_spacing; node << Mark::Spacing.new(token, active_opts)
|
256
|
+
when :mark_enclosing; node << Mark::Enclosing.new(token, active_opts)
|
257
|
+
|
258
|
+
when :number_any; node << Number::Any.new(token, active_opts)
|
259
|
+
when :number_decimal; node << Number::Decimal.new(token, active_opts)
|
260
|
+
when :number_letter; node << Number::Letter.new(token, active_opts)
|
261
|
+
when :number_other; node << Number::Other.new(token, active_opts)
|
262
|
+
|
263
|
+
when :punct_any; node << Punctuation::Any.new(token, active_opts)
|
264
|
+
when :punct_connector; node << Punctuation::Connector.new(token, active_opts)
|
265
|
+
when :punct_dash; node << Punctuation::Dash.new(token, active_opts)
|
266
|
+
when :punct_open; node << Punctuation::Open.new(token, active_opts)
|
267
|
+
when :punct_close; node << Punctuation::Close.new(token, active_opts)
|
268
|
+
when :punct_initial; node << Punctuation::Initial.new(token, active_opts)
|
269
|
+
when :punct_final; node << Punctuation::Final.new(token, active_opts)
|
270
|
+
when :punct_other; node << Punctuation::Other.new(token, active_opts)
|
271
|
+
|
272
|
+
when :separator_any; node << Separator::Any.new(token, active_opts)
|
273
|
+
when :separator_space; node << Separator::Space.new(token, active_opts)
|
274
|
+
when :separator_line; node << Separator::Line.new(token, active_opts)
|
275
|
+
when :separator_para; node << Separator::Paragraph.new(token, active_opts)
|
276
|
+
|
277
|
+
when :symbol_any; node << Symbol::Any.new(token, active_opts)
|
278
|
+
when :symbol_math; node << Symbol::Math.new(token, active_opts)
|
279
|
+
when :symbol_currency; node << Symbol::Currency.new(token, active_opts)
|
280
|
+
when :symbol_modifier; node << Symbol::Modifier.new(token, active_opts)
|
281
|
+
when :symbol_other; node << Symbol::Other.new(token, active_opts)
|
282
|
+
|
283
|
+
when :other; node << Codepoint::Any.new(token, active_opts)
|
284
|
+
when :control; node << Codepoint::Control.new(token, active_opts)
|
285
|
+
when :format; node << Codepoint::Format.new(token, active_opts)
|
286
|
+
when :surrogate; node << Codepoint::Surrogate.new(token, active_opts)
|
287
|
+
when :private_use; node << Codepoint::PrivateUse.new(token, active_opts)
|
288
|
+
when :unassigned; node << Codepoint::Unassigned.new(token, active_opts)
|
290
289
|
|
291
290
|
when *Token::UnicodeProperty::Age
|
292
|
-
|
291
|
+
node << Age.new(token, active_opts)
|
293
292
|
|
294
293
|
when *Token::UnicodeProperty::Derived
|
295
|
-
|
294
|
+
node << Derived.new(token, active_opts)
|
296
295
|
|
297
296
|
when *Token::UnicodeProperty::Emoji
|
298
|
-
|
297
|
+
node << Emoji.new(token, active_opts)
|
299
298
|
|
300
299
|
when *Token::UnicodeProperty::Script
|
301
|
-
|
300
|
+
node << Script.new(token, active_opts)
|
302
301
|
|
303
302
|
when *Token::UnicodeProperty::UnicodeBlock
|
304
|
-
|
303
|
+
node << Block.new(token, active_opts)
|
305
304
|
|
306
305
|
else
|
307
306
|
raise UnknownTokenError.new('UnicodeProperty', token)
|
@@ -311,21 +310,21 @@ class Regexp::Parser
|
|
311
310
|
def anchor(token)
|
312
311
|
case token.token
|
313
312
|
when :bol
|
314
|
-
|
313
|
+
node << Anchor::BeginningOfLine.new(token, active_opts)
|
315
314
|
when :eol
|
316
|
-
|
315
|
+
node << Anchor::EndOfLine.new(token, active_opts)
|
317
316
|
when :bos
|
318
|
-
|
317
|
+
node << Anchor::BOS.new(token, active_opts)
|
319
318
|
when :eos
|
320
|
-
|
319
|
+
node << Anchor::EOS.new(token, active_opts)
|
321
320
|
when :eos_ob_eol
|
322
|
-
|
321
|
+
node << Anchor::EOSobEOL.new(token, active_opts)
|
323
322
|
when :word_boundary
|
324
|
-
|
323
|
+
node << Anchor::WordBoundary.new(token, active_opts)
|
325
324
|
when :nonword_boundary
|
326
|
-
|
325
|
+
node << Anchor::NonWordBoundary.new(token, active_opts)
|
327
326
|
when :match_start
|
328
|
-
|
327
|
+
node << Anchor::MatchStart.new(token, active_opts)
|
329
328
|
else
|
330
329
|
raise UnknownTokenError.new('Anchor', token)
|
331
330
|
end
|
@@ -335,58 +334,58 @@ class Regexp::Parser
|
|
335
334
|
case token.token
|
336
335
|
|
337
336
|
when :backspace
|
338
|
-
|
337
|
+
node << EscapeSequence::Backspace.new(token, active_opts)
|
339
338
|
|
340
339
|
when :escape
|
341
|
-
|
340
|
+
node << EscapeSequence::AsciiEscape.new(token, active_opts)
|
342
341
|
when :bell
|
343
|
-
|
342
|
+
node << EscapeSequence::Bell.new(token, active_opts)
|
344
343
|
when :form_feed
|
345
|
-
|
344
|
+
node << EscapeSequence::FormFeed.new(token, active_opts)
|
346
345
|
when :newline
|
347
|
-
|
346
|
+
node << EscapeSequence::Newline.new(token, active_opts)
|
348
347
|
when :carriage
|
349
|
-
|
348
|
+
node << EscapeSequence::Return.new(token, active_opts)
|
350
349
|
when :space
|
351
|
-
|
350
|
+
node << EscapeSequence::Space.new(token, active_opts)
|
352
351
|
when :tab
|
353
|
-
|
352
|
+
node << EscapeSequence::Tab.new(token, active_opts)
|
354
353
|
when :vertical_tab
|
355
|
-
|
354
|
+
node << EscapeSequence::VerticalTab.new(token, active_opts)
|
356
355
|
|
357
356
|
when :control
|
358
357
|
if token.text =~ /\A(?:\\C-\\M|\\c\\M)/
|
359
|
-
|
358
|
+
node << EscapeSequence::MetaControl.new(token, active_opts)
|
360
359
|
else
|
361
|
-
|
360
|
+
node << EscapeSequence::Control.new(token, active_opts)
|
362
361
|
end
|
363
362
|
|
364
363
|
when :meta_sequence
|
365
364
|
if token.text =~ /\A\\M-\\[Cc]/
|
366
|
-
|
365
|
+
node << EscapeSequence::MetaControl.new(token, active_opts)
|
367
366
|
else
|
368
|
-
|
367
|
+
node << EscapeSequence::Meta.new(token, active_opts)
|
369
368
|
end
|
370
369
|
|
371
370
|
else
|
372
371
|
# treating everything else as a literal
|
373
|
-
|
372
|
+
node << EscapeSequence::Literal.new(token, active_opts)
|
374
373
|
end
|
375
374
|
end
|
376
375
|
|
377
376
|
def keep(token)
|
378
|
-
|
377
|
+
node << Keep::Mark.new(token, active_opts)
|
379
378
|
end
|
380
379
|
|
381
380
|
def free_space(token)
|
382
381
|
case token.token
|
383
382
|
when :comment
|
384
|
-
|
383
|
+
node << Comment.new(token, active_opts)
|
385
384
|
when :whitespace
|
386
|
-
if
|
387
|
-
|
385
|
+
if node.last.is_a?(WhiteSpace)
|
386
|
+
node.last.merge(WhiteSpace.new(token, active_opts))
|
388
387
|
else
|
389
|
-
|
388
|
+
node << WhiteSpace.new(token, active_opts)
|
390
389
|
end
|
391
390
|
else
|
392
391
|
raise UnknownTokenError.new('FreeSpace', token)
|
@@ -395,13 +394,13 @@ class Regexp::Parser
|
|
395
394
|
|
396
395
|
def quantifier(token)
|
397
396
|
offset = -1
|
398
|
-
target_node =
|
399
|
-
while target_node
|
400
|
-
target_node =
|
397
|
+
target_node = node.expressions[offset]
|
398
|
+
while target_node.is_a?(FreeSpace)
|
399
|
+
target_node = node.expressions[offset -= 1]
|
401
400
|
end
|
402
401
|
|
403
|
-
raise
|
404
|
-
|
402
|
+
target_node || raise(ArgumentError, 'No valid target found for '\
|
403
|
+
"'#{token.text}' ")
|
405
404
|
|
406
405
|
case token.token
|
407
406
|
when :zero_or_one
|
@@ -462,7 +461,7 @@ class Regexp::Parser
|
|
462
461
|
when :close
|
463
462
|
close_group
|
464
463
|
when :comment
|
465
|
-
|
464
|
+
node << Group::Comment.new(token, active_opts)
|
466
465
|
else
|
467
466
|
open_group(token)
|
468
467
|
end
|
@@ -471,7 +470,7 @@ class Regexp::Parser
|
|
471
470
|
def options_group(token)
|
472
471
|
positive, negative = token.text.split('-', 2)
|
473
472
|
negative ||= ''
|
474
|
-
|
473
|
+
self.switching_options = !token.text.include?(':')
|
475
474
|
# TODO: change this -^ to token.type == :options_switch in v1.0.0
|
476
475
|
|
477
476
|
new_options = active_opts.dup
|
@@ -490,7 +489,7 @@ class Regexp::Parser
|
|
490
489
|
new_options[flag.to_sym] = true
|
491
490
|
end
|
492
491
|
|
493
|
-
|
492
|
+
options_stack << new_options
|
494
493
|
|
495
494
|
exp = Group::Options.new(token, active_opts)
|
496
495
|
|
@@ -525,43 +524,44 @@ class Regexp::Parser
|
|
525
524
|
|
526
525
|
# Push the active options to the stack again. This way we can simply pop the
|
527
526
|
# stack for any group we close, no matter if it had its own options or not.
|
528
|
-
|
527
|
+
options_stack << active_opts
|
529
528
|
|
530
529
|
nest(exp)
|
531
530
|
end
|
532
531
|
|
533
532
|
def close_group
|
534
|
-
|
535
|
-
|
536
|
-
|
533
|
+
nesting.pop
|
534
|
+
options_stack.pop unless switching_options
|
535
|
+
self.switching_options = false
|
537
536
|
|
538
|
-
|
539
|
-
|
537
|
+
self.node = nesting.last
|
538
|
+
self.node = node.last if node.last and node.last.is_a?(Alternation)
|
540
539
|
end
|
541
540
|
|
542
541
|
def open_set(token)
|
543
542
|
token.token = :character
|
544
543
|
|
545
544
|
if token.type == :subset
|
546
|
-
|
545
|
+
current_set << CharacterSubSet.new(token, active_opts)
|
547
546
|
else
|
548
|
-
|
547
|
+
self.current_set = CharacterSet.new(token, active_opts)
|
548
|
+
node << current_set
|
549
549
|
end
|
550
550
|
end
|
551
551
|
|
552
552
|
def negate_set
|
553
|
-
|
553
|
+
current_set.negate
|
554
554
|
end
|
555
555
|
|
556
556
|
def append_set(token)
|
557
|
-
|
557
|
+
current_set << token.text
|
558
558
|
end
|
559
559
|
|
560
560
|
def close_set(token)
|
561
|
-
|
561
|
+
current_set.close
|
562
562
|
end
|
563
563
|
|
564
564
|
def active_opts
|
565
|
-
|
565
|
+
options_stack.last
|
566
566
|
end
|
567
567
|
end # module Regexp::Parser
|