regexp_parser 0.4.6 → 0.4.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/ChangeLog +13 -0
- data/README.md +4 -3
- data/lib/regexp_parser/expression.rb +8 -8
- data/lib/regexp_parser/expression/classes/backref.rb +7 -7
- data/lib/regexp_parser/expression/classes/conditional.rb +2 -2
- data/lib/regexp_parser/expression/classes/group.rb +2 -2
- data/lib/regexp_parser/expression/classes/root.rb +4 -18
- data/lib/regexp_parser/expression/classes/set.rb +1 -1
- data/lib/regexp_parser/expression/subexpression.rb +2 -2
- data/lib/regexp_parser/lexer.rb +10 -6
- data/lib/regexp_parser/parser.rb +202 -172
- data/lib/regexp_parser/scanner.rb +151 -148
- data/lib/regexp_parser/scanner/scanner.rl +44 -41
- data/lib/regexp_parser/syntax/tokens/backref.rb +1 -1
- data/lib/regexp_parser/version.rb +2 -2
- data/test/expression/test_to_h.rb +2 -2
- data/test/lexer/test_refcalls.rb +3 -0
- data/test/parser/test_errors.rb +13 -9
- data/test/parser/test_groups.rb +140 -14
- data/test/parser/test_refcalls.rb +13 -0
- data/test/scanner/test_free_space.rb +43 -0
- data/test/scanner/test_refcalls.rb +3 -0
- data/test/syntax/ruby/test_1.8.rb +1 -1
- metadata +3 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: de01aa2d195d95dd0bee1afd232f85195562a8bf
|
4
|
+
data.tar.gz: b41cb58a4e07d681da7c16473f4f03d96a792ff9
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 342d6218d5553f2f2f6975f202cf650cd74c9128379348526981d829188b38836dfd88bc46b8476212d4b10aeb628479109baff075e1297c4cf69aaa4fe8ff03
|
7
|
+
data.tar.gz: 80883d05ff9bb3f5f9f296aeeb5eabde013a362c6d6c82f4eeab87438050f93fea2224f407b170b65ff55d175937c55f56d49077e0ad43b0a55832989e221544
|
data/ChangeLog
CHANGED
@@ -1,3 +1,16 @@
|
|
1
|
+
Sun Oct 15 2017 Janosch Müller <janosch84@gmail.com>
|
2
|
+
|
3
|
+
* Fixed a thread safety issue (issue #45)
|
4
|
+
* Some public class methods that were only reliable for
|
5
|
+
internal use are now private instance methods (PR #46)
|
6
|
+
* Improved the usefulness of Expression#options (issue #43) -
|
7
|
+
#options and derived methods such as #i?, #m? and #x? are now
|
8
|
+
defined for all Expressions that are affected by such flags.
|
9
|
+
* Fixed scanning of whitespace following (?x) (commit 5c94bd2)
|
10
|
+
* Fixed a Parser bug where the #number attribute of traditional
|
11
|
+
numerical backreferences was not set correctly (commit 851b620)
|
12
|
+
* Bumped version to 0.4.7
|
13
|
+
|
1
14
|
Mon Sep 18 2017 Janosch Müller <janosch84@gmail.com>
|
2
15
|
|
3
16
|
* Added Parser support for hex escapes in sets (PR #36)
|
data/README.md
CHANGED
@@ -125,9 +125,10 @@ Regexp::Scanner.scan( /(cat?([bhm]at)){3,5}/ ).map {|token| token[2]}
|
|
125
125
|
|
126
126
|
* If the input is a ruby **Regexp** object, the scanner calls #source on it to
|
127
127
|
get its string representation. #source does not include the options of
|
128
|
-
the expression (m, i, and x) To include the options in the scan, #to_s
|
129
|
-
should be called on the **Regexp** before passing it to the scanner or
|
130
|
-
|
128
|
+
the expression (m, i, and x). To include the options in the scan, #to_s
|
129
|
+
should be called on the **Regexp** before passing it to the scanner or the
|
130
|
+
lexer. For the parser, however, this is not necessary. It automatically
|
131
|
+
exposes the options of a passed **Regexp** in the returned root expression.
|
131
132
|
|
132
133
|
* To keep the scanner simple(r) and fairly reusable for other purposes, it
|
133
134
|
does not perform lexical analysis on the tokens, sticking to the task
|
@@ -8,7 +8,7 @@ module Regexp::Expression
|
|
8
8
|
attr_accessor :quantifier
|
9
9
|
attr_accessor :options
|
10
10
|
|
11
|
-
def initialize(token)
|
11
|
+
def initialize(token, options = {})
|
12
12
|
@type = token.type
|
13
13
|
@token = token.token
|
14
14
|
@text = token.text
|
@@ -17,7 +17,7 @@ module Regexp::Expression
|
|
17
17
|
@set_level = token.set_level
|
18
18
|
@conditional_level = token.conditional_level
|
19
19
|
@quantifier = nil
|
20
|
-
@options =
|
20
|
+
@options = options
|
21
21
|
end
|
22
22
|
|
23
23
|
def clone
|
@@ -95,35 +95,35 @@ module Regexp::Expression
|
|
95
95
|
end
|
96
96
|
|
97
97
|
def multiline?
|
98
|
-
|
98
|
+
@options[:m] == true
|
99
99
|
end
|
100
100
|
alias :m? :multiline?
|
101
101
|
|
102
102
|
def case_insensitive?
|
103
|
-
|
103
|
+
@options[:i] == true
|
104
104
|
end
|
105
105
|
alias :i? :case_insensitive?
|
106
106
|
alias :ignore_case? :case_insensitive?
|
107
107
|
|
108
108
|
def free_spacing?
|
109
|
-
|
109
|
+
@options[:x] == true
|
110
110
|
end
|
111
111
|
alias :x? :free_spacing?
|
112
112
|
alias :extended? :free_spacing?
|
113
113
|
|
114
114
|
if RUBY_VERSION >= '2.0'
|
115
115
|
def default_classes?
|
116
|
-
|
116
|
+
@options[:d] == true
|
117
117
|
end
|
118
118
|
alias :d? :default_classes?
|
119
119
|
|
120
120
|
def ascii_classes?
|
121
|
-
|
121
|
+
@options[:a] == true
|
122
122
|
end
|
123
123
|
alias :a? :ascii_classes?
|
124
124
|
|
125
125
|
def unicode_classes?
|
126
|
-
|
126
|
+
@options[:u] == true
|
127
127
|
end
|
128
128
|
alias :u? :unicode_classes?
|
129
129
|
end
|
@@ -6,18 +6,18 @@ module Regexp::Expression
|
|
6
6
|
class Name < Backreference::Base
|
7
7
|
attr_reader :name
|
8
8
|
|
9
|
-
def initialize(token)
|
9
|
+
def initialize(token, options = {})
|
10
10
|
@name = token.text[3..-2]
|
11
|
-
super
|
11
|
+
super
|
12
12
|
end
|
13
13
|
end
|
14
14
|
|
15
15
|
class Number < Backreference::Base
|
16
16
|
attr_reader :number
|
17
17
|
|
18
|
-
def initialize(token)
|
19
|
-
@number = token.text[3..-2]
|
20
|
-
super
|
18
|
+
def initialize(token, options = {})
|
19
|
+
@number = token.text[token.token.equal?(:number) ? 1..-1 : 3..-2]
|
20
|
+
super
|
21
21
|
end
|
22
22
|
end
|
23
23
|
|
@@ -29,9 +29,9 @@ module Regexp::Expression
|
|
29
29
|
class NameCall < Backreference::Base
|
30
30
|
attr_reader :name
|
31
31
|
|
32
|
-
def initialize(token)
|
32
|
+
def initialize(token, options = {})
|
33
33
|
@name = token.text[3..-2]
|
34
|
-
super
|
34
|
+
super
|
35
35
|
end
|
36
36
|
end
|
37
37
|
|
@@ -11,8 +11,8 @@ module Regexp::Expression
|
|
11
11
|
class Branch < Regexp::Expression::Sequence; end
|
12
12
|
|
13
13
|
class Expression < Regexp::Expression::Subexpression
|
14
|
-
def initialize(token)
|
15
|
-
super
|
14
|
+
def initialize(token, options = {})
|
15
|
+
super
|
16
16
|
|
17
17
|
@condition = nil
|
18
18
|
@branches = []
|
@@ -1,26 +1,12 @@
|
|
1
1
|
module Regexp::Expression
|
2
2
|
|
3
3
|
class Root < Regexp::Expression::Subexpression
|
4
|
-
def initialize
|
5
|
-
super
|
4
|
+
def initialize(options = {})
|
5
|
+
super(Regexp::Token.new(:expression, :root, '', 0), options)
|
6
6
|
end
|
7
7
|
|
8
|
-
|
9
|
-
|
10
|
-
end
|
11
|
-
alias :m? :multiline?
|
12
|
-
|
13
|
-
def case_insensitive?
|
14
|
-
@expressions[0].i?
|
15
|
-
end
|
16
|
-
alias :i? :case_insensitive?
|
17
|
-
alias :ignore_case? :case_insensitive?
|
18
|
-
|
19
|
-
def free_spacing?
|
20
|
-
@expressions[0].x?
|
21
|
-
end
|
22
|
-
alias :x? :free_spacing?
|
23
|
-
alias :extended? :free_spacing?
|
8
|
+
alias ignore_case? case_insensitive?
|
9
|
+
alias extended? free_spacing?
|
24
10
|
end
|
25
11
|
|
26
12
|
end
|
data/lib/regexp_parser/lexer.rb
CHANGED
@@ -2,7 +2,7 @@
|
|
2
2
|
# collects emitted tokens into an array, calculates their nesting depth, and
|
3
3
|
# normalizes tokens for the parser, and checks if they are implemented by the
|
4
4
|
# given syntax flavor.
|
5
|
-
|
5
|
+
class Regexp::Lexer
|
6
6
|
|
7
7
|
OPENING_TOKENS = [:capture, :options, :passive, :atomic, :named, :absence,
|
8
8
|
:lookahead, :nlookahead, :lookbehind, :nlookbehind
|
@@ -11,6 +11,10 @@ module Regexp::Lexer
|
|
11
11
|
CLOSING_TOKENS = [:close].freeze
|
12
12
|
|
13
13
|
def self.lex(input, syntax = "ruby/#{RUBY_VERSION}", &block)
|
14
|
+
new.lex(input, syntax, &block)
|
15
|
+
end
|
16
|
+
|
17
|
+
def lex(input, syntax = "ruby/#{RUBY_VERSION}", &block)
|
14
18
|
syntax = Regexp::Syntax.new(syntax)
|
15
19
|
|
16
20
|
@tokens = []
|
@@ -57,7 +61,7 @@ module Regexp::Lexer
|
|
57
61
|
|
58
62
|
protected
|
59
63
|
|
60
|
-
def
|
64
|
+
def ascend(type, token)
|
61
65
|
if type == :group or type == :assertion
|
62
66
|
@nesting -= 1 if CLOSING_TOKENS.include?(token)
|
63
67
|
end
|
@@ -71,7 +75,7 @@ module Regexp::Lexer
|
|
71
75
|
end
|
72
76
|
end
|
73
77
|
|
74
|
-
def
|
78
|
+
def descend(type, token)
|
75
79
|
if type == :group or type == :assertion
|
76
80
|
@nesting += 1 if OPENING_TOKENS.include?(token)
|
77
81
|
end
|
@@ -87,7 +91,7 @@ module Regexp::Lexer
|
|
87
91
|
|
88
92
|
# called by scan to break a literal run that is longer than one character
|
89
93
|
# into two separate tokens when it is followed by a quantifier
|
90
|
-
def
|
94
|
+
def break_literal(token)
|
91
95
|
text = token.text
|
92
96
|
if text.scan(/./mu).length > 1
|
93
97
|
lead = text.sub(/.\z/mu, "")
|
@@ -113,7 +117,7 @@ module Regexp::Lexer
|
|
113
117
|
|
114
118
|
# called by scan to merge two consecutive literals. this happens when tokens
|
115
119
|
# get normalized (as in the case of posix/bre) and end up becoming literals.
|
116
|
-
def
|
120
|
+
def merge_literal(current)
|
117
121
|
last = @tokens.pop
|
118
122
|
|
119
123
|
Regexp::Token.new(
|
@@ -128,7 +132,7 @@ module Regexp::Lexer
|
|
128
132
|
)
|
129
133
|
end
|
130
134
|
|
131
|
-
def
|
135
|
+
def merge_condition(current)
|
132
136
|
last = @tokens.pop
|
133
137
|
Regexp::Token.new(:conditional, :condition, last.text + current.text,
|
134
138
|
last.ts, current.te, @nesting, @set_nesting, @conditional_nesting)
|
data/lib/regexp_parser/parser.rb
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
require 'regexp_parser/expression'
|
2
2
|
|
3
|
-
|
3
|
+
class Regexp::Parser
|
4
4
|
include Regexp::Expression
|
5
5
|
include Regexp::Syntax
|
6
6
|
|
@@ -19,8 +19,14 @@ module Regexp::Parser
|
|
19
19
|
end
|
20
20
|
|
21
21
|
def self.parse(input, syntax = "ruby/#{RUBY_VERSION}", &block)
|
22
|
-
|
22
|
+
new.parse(input, syntax, &block)
|
23
|
+
end
|
24
|
+
|
25
|
+
def parse(input, syntax = "ruby/#{RUBY_VERSION}", &block)
|
26
|
+
@nesting = [@root = @node = Root.new(options_from_input(input))]
|
23
27
|
|
28
|
+
@options_stack = [@root.options]
|
29
|
+
@switching_options = false
|
24
30
|
@conditional_nesting = []
|
25
31
|
|
26
32
|
Regexp::Lexer.scan(input, syntax) do |token|
|
@@ -34,21 +40,33 @@ module Regexp::Parser
|
|
34
40
|
end
|
35
41
|
end
|
36
42
|
|
37
|
-
|
43
|
+
private
|
44
|
+
|
45
|
+
def options_from_input(input)
|
46
|
+
return {} unless input.is_a?(::Regexp)
|
47
|
+
|
48
|
+
options = {}
|
49
|
+
options[:i] = true if input.options & ::Regexp::IGNORECASE != 0
|
50
|
+
options[:m] = true if input.options & ::Regexp::MULTILINE != 0
|
51
|
+
options[:x] = true if input.options & ::Regexp::EXTENDED != 0
|
52
|
+
options
|
53
|
+
end
|
54
|
+
|
55
|
+
def nest(exp)
|
38
56
|
@nesting.push exp
|
39
57
|
|
40
58
|
@node << exp
|
41
59
|
@node = exp
|
42
60
|
end
|
43
61
|
|
44
|
-
def
|
62
|
+
def nest_conditional(exp)
|
45
63
|
@conditional_nesting.push exp
|
46
64
|
|
47
65
|
@node << exp
|
48
66
|
@node = exp
|
49
67
|
end
|
50
68
|
|
51
|
-
def
|
69
|
+
def parse_token(token)
|
52
70
|
case token.type
|
53
71
|
when :meta; meta(token)
|
54
72
|
when :quantifier; quantifier(token)
|
@@ -66,7 +84,7 @@ module Regexp::Parser
|
|
66
84
|
property(token)
|
67
85
|
|
68
86
|
when :literal
|
69
|
-
@node << Literal.new(token)
|
87
|
+
@node << Literal.new(token, active_opts)
|
70
88
|
when :free_space
|
71
89
|
free_space(token)
|
72
90
|
|
@@ -75,7 +93,7 @@ module Regexp::Parser
|
|
75
93
|
end
|
76
94
|
end
|
77
95
|
|
78
|
-
def
|
96
|
+
def set(token)
|
79
97
|
case token.token
|
80
98
|
when :open
|
81
99
|
open_set(token)
|
@@ -96,14 +114,14 @@ module Regexp::Parser
|
|
96
114
|
end
|
97
115
|
end
|
98
116
|
|
99
|
-
def
|
117
|
+
def meta(token)
|
100
118
|
case token.token
|
101
119
|
when :dot
|
102
|
-
@node << CharacterType::Any.new(token)
|
120
|
+
@node << CharacterType::Any.new(token, active_opts)
|
103
121
|
when :alternation
|
104
122
|
unless @node.token == :alternation
|
105
123
|
unless @node.last.is_a?(Alternation)
|
106
|
-
alt = Alternation.new(token)
|
124
|
+
alt = Alternation.new(token, active_opts)
|
107
125
|
seq = Alternative.new(alt.level, alt.set_level, alt.conditional_level)
|
108
126
|
|
109
127
|
while @node.expressions.last
|
@@ -126,62 +144,62 @@ module Regexp::Parser
|
|
126
144
|
end
|
127
145
|
end
|
128
146
|
|
129
|
-
def
|
147
|
+
def backref(token)
|
130
148
|
case token.token
|
131
149
|
when :name_ref
|
132
|
-
@node << Backreference::Name.new(token)
|
150
|
+
@node << Backreference::Name.new(token, active_opts)
|
133
151
|
when :name_nest_ref
|
134
|
-
@node << Backreference::NameNestLevel.new(token)
|
152
|
+
@node << Backreference::NameNestLevel.new(token, active_opts)
|
135
153
|
when :name_call
|
136
|
-
@node << Backreference::NameCall.new(token)
|
154
|
+
@node << Backreference::NameCall.new(token, active_opts)
|
137
155
|
when :number, :number_ref
|
138
|
-
@node << Backreference::Number.new(token)
|
156
|
+
@node << Backreference::Number.new(token, active_opts)
|
139
157
|
when :number_rel_ref
|
140
|
-
@node << Backreference::NumberRelative.new(token)
|
158
|
+
@node << Backreference::NumberRelative.new(token, active_opts)
|
141
159
|
when :number_nest_ref
|
142
|
-
@node << Backreference::NumberNestLevel.new(token)
|
160
|
+
@node << Backreference::NumberNestLevel.new(token, active_opts)
|
143
161
|
when :number_call
|
144
|
-
@node << Backreference::NumberCall.new(token)
|
162
|
+
@node << Backreference::NumberCall.new(token, active_opts)
|
145
163
|
when :number_rel_call
|
146
|
-
@node << Backreference::NumberCallRelative.new(token)
|
164
|
+
@node << Backreference::NumberCallRelative.new(token, active_opts)
|
147
165
|
else
|
148
166
|
raise UnknownTokenError.new('Backreference', token)
|
149
167
|
end
|
150
168
|
end
|
151
169
|
|
152
|
-
def
|
170
|
+
def type(token)
|
153
171
|
case token.token
|
154
172
|
when :digit
|
155
|
-
@node << CharacterType::Digit.new(token)
|
173
|
+
@node << CharacterType::Digit.new(token, active_opts)
|
156
174
|
when :nondigit
|
157
|
-
@node << CharacterType::NonDigit.new(token)
|
175
|
+
@node << CharacterType::NonDigit.new(token, active_opts)
|
158
176
|
when :hex
|
159
|
-
@node << CharacterType::Hex.new(token)
|
177
|
+
@node << CharacterType::Hex.new(token, active_opts)
|
160
178
|
when :nonhex
|
161
|
-
@node << CharacterType::NonHex.new(token)
|
179
|
+
@node << CharacterType::NonHex.new(token, active_opts)
|
162
180
|
when :space
|
163
|
-
@node << CharacterType::Space.new(token)
|
181
|
+
@node << CharacterType::Space.new(token, active_opts)
|
164
182
|
when :nonspace
|
165
|
-
@node << CharacterType::NonSpace.new(token)
|
183
|
+
@node << CharacterType::NonSpace.new(token, active_opts)
|
166
184
|
when :word
|
167
|
-
@node << CharacterType::Word.new(token)
|
185
|
+
@node << CharacterType::Word.new(token, active_opts)
|
168
186
|
when :nonword
|
169
|
-
@node << CharacterType::NonWord.new(token)
|
187
|
+
@node << CharacterType::NonWord.new(token, active_opts)
|
170
188
|
when :linebreak
|
171
|
-
@node << CharacterType::Linebreak.new(token)
|
189
|
+
@node << CharacterType::Linebreak.new(token, active_opts)
|
172
190
|
when :xgrapheme
|
173
|
-
@node << CharacterType::ExtendedGrapheme.new(token)
|
191
|
+
@node << CharacterType::ExtendedGrapheme.new(token, active_opts)
|
174
192
|
else
|
175
193
|
raise UnknownTokenError.new('CharacterType', token)
|
176
194
|
end
|
177
195
|
end
|
178
196
|
|
179
|
-
def
|
197
|
+
def conditional(token)
|
180
198
|
case token.token
|
181
199
|
when :open
|
182
|
-
nest_conditional(Conditional::Expression.new(token))
|
200
|
+
nest_conditional(Conditional::Expression.new(token, active_opts))
|
183
201
|
when :condition
|
184
|
-
@conditional_nesting.last.condition(Conditional::Condition.new(token))
|
202
|
+
@conditional_nesting.last.condition(Conditional::Condition.new(token, active_opts))
|
185
203
|
@conditional_nesting.last.branch
|
186
204
|
when :separator
|
187
205
|
@conditional_nesting.last.branch
|
@@ -200,175 +218,174 @@ module Regexp::Parser
|
|
200
218
|
end
|
201
219
|
end
|
202
220
|
|
203
|
-
|
204
|
-
include Regexp::Expression::UnicodeProperty
|
221
|
+
include Regexp::Expression::UnicodeProperty
|
205
222
|
|
223
|
+
def property(token)
|
206
224
|
case token.token
|
207
|
-
when :alnum; @node << Alnum.new(token)
|
208
|
-
when :alpha; @node << Alpha.new(token)
|
209
|
-
when :any; @node << Any.new(token)
|
210
|
-
when :ascii; @node << Ascii.new(token)
|
211
|
-
when :blank; @node << Blank.new(token)
|
212
|
-
when :cntrl; @node << Cntrl.new(token)
|
213
|
-
when :digit; @node << Digit.new(token)
|
214
|
-
when :graph; @node << Graph.new(token)
|
215
|
-
when :lower; @node << Lower.new(token)
|
216
|
-
when :print; @node << Print.new(token)
|
217
|
-
when :punct; @node << Punct.new(token)
|
218
|
-
when :space; @node << Space.new(token)
|
219
|
-
when :upper; @node << Upper.new(token)
|
220
|
-
when :word; @node << Word.new(token)
|
221
|
-
when :xdigit; @node << Xdigit.new(token)
|
222
|
-
when :newline; @node << Newline.new(token)
|
223
|
-
|
224
|
-
when :letter_any; @node << Letter::Any.new(token)
|
225
|
-
when :letter_uppercase; @node << Letter::Uppercase.new(token)
|
226
|
-
when :letter_lowercase; @node << Letter::Lowercase.new(token)
|
227
|
-
when :letter_titlecase; @node << Letter::Titlecase.new(token)
|
228
|
-
when :letter_modifier; @node << Letter::Modifier.new(token)
|
229
|
-
when :letter_other; @node << Letter::Other.new(token)
|
230
|
-
|
231
|
-
when :mark_any; @node << Mark::Any.new(token)
|
232
|
-
when :mark_nonspacing; @node << Mark::Nonspacing.new(token)
|
233
|
-
when :mark_spacing; @node << Mark::Spacing.new(token)
|
234
|
-
when :mark_enclosing; @node << Mark::Enclosing.new(token)
|
235
|
-
|
236
|
-
when :number_any; @node << Number::Any.new(token)
|
237
|
-
when :number_decimal; @node << Number::Decimal.new(token)
|
238
|
-
when :number_letter; @node << Number::Letter.new(token)
|
239
|
-
when :number_other; @node << Number::Other.new(token)
|
240
|
-
|
241
|
-
when :punct_any; @node << Punctuation::Any.new(token)
|
242
|
-
when :punct_connector; @node << Punctuation::Connector.new(token)
|
243
|
-
when :punct_dash; @node << Punctuation::Dash.new(token)
|
244
|
-
when :punct_open; @node << Punctuation::Open.new(token)
|
245
|
-
when :punct_close; @node << Punctuation::Close.new(token)
|
246
|
-
when :punct_initial; @node << Punctuation::Initial.new(token)
|
247
|
-
when :punct_final; @node << Punctuation::Final.new(token)
|
248
|
-
when :punct_other; @node << Punctuation::Other.new(token)
|
249
|
-
|
250
|
-
when :separator_any; @node << Separator::Any.new(token)
|
251
|
-
when :separator_space; @node << Separator::Space.new(token)
|
252
|
-
when :separator_line; @node << Separator::Line.new(token)
|
253
|
-
when :separator_para; @node << Separator::Paragraph.new(token)
|
254
|
-
|
255
|
-
when :symbol_any; @node << Symbol::Any.new(token)
|
256
|
-
when :symbol_math; @node << Symbol::Math.new(token)
|
257
|
-
when :symbol_currency; @node << Symbol::Currency.new(token)
|
258
|
-
when :symbol_modifier; @node << Symbol::Modifier.new(token)
|
259
|
-
when :symbol_other; @node << Symbol::Other.new(token)
|
260
|
-
|
261
|
-
when :other; @node << Codepoint::Any.new(token)
|
262
|
-
when :control; @node << Codepoint::Control.new(token)
|
263
|
-
when :format; @node << Codepoint::Format.new(token)
|
264
|
-
when :surrogate; @node << Codepoint::Surrogate.new(token)
|
265
|
-
when :private_use; @node << Codepoint::PrivateUse.new(token)
|
266
|
-
when :unassigned; @node << Codepoint::Unassigned.new(token)
|
225
|
+
when :alnum; @node << Alnum.new(token, active_opts)
|
226
|
+
when :alpha; @node << Alpha.new(token, active_opts)
|
227
|
+
when :any; @node << Any.new(token, active_opts)
|
228
|
+
when :ascii; @node << Ascii.new(token, active_opts)
|
229
|
+
when :blank; @node << Blank.new(token, active_opts)
|
230
|
+
when :cntrl; @node << Cntrl.new(token, active_opts)
|
231
|
+
when :digit; @node << Digit.new(token, active_opts)
|
232
|
+
when :graph; @node << Graph.new(token, active_opts)
|
233
|
+
when :lower; @node << Lower.new(token, active_opts)
|
234
|
+
when :print; @node << Print.new(token, active_opts)
|
235
|
+
when :punct; @node << Punct.new(token, active_opts)
|
236
|
+
when :space; @node << Space.new(token, active_opts)
|
237
|
+
when :upper; @node << Upper.new(token, active_opts)
|
238
|
+
when :word; @node << Word.new(token, active_opts)
|
239
|
+
when :xdigit; @node << Xdigit.new(token, active_opts)
|
240
|
+
when :newline; @node << Newline.new(token, active_opts)
|
241
|
+
|
242
|
+
when :letter_any; @node << Letter::Any.new(token, active_opts)
|
243
|
+
when :letter_uppercase; @node << Letter::Uppercase.new(token, active_opts)
|
244
|
+
when :letter_lowercase; @node << Letter::Lowercase.new(token, active_opts)
|
245
|
+
when :letter_titlecase; @node << Letter::Titlecase.new(token, active_opts)
|
246
|
+
when :letter_modifier; @node << Letter::Modifier.new(token, active_opts)
|
247
|
+
when :letter_other; @node << Letter::Other.new(token, active_opts)
|
248
|
+
|
249
|
+
when :mark_any; @node << Mark::Any.new(token, active_opts)
|
250
|
+
when :mark_nonspacing; @node << Mark::Nonspacing.new(token, active_opts)
|
251
|
+
when :mark_spacing; @node << Mark::Spacing.new(token, active_opts)
|
252
|
+
when :mark_enclosing; @node << Mark::Enclosing.new(token, active_opts)
|
253
|
+
|
254
|
+
when :number_any; @node << Number::Any.new(token, active_opts)
|
255
|
+
when :number_decimal; @node << Number::Decimal.new(token, active_opts)
|
256
|
+
when :number_letter; @node << Number::Letter.new(token, active_opts)
|
257
|
+
when :number_other; @node << Number::Other.new(token, active_opts)
|
258
|
+
|
259
|
+
when :punct_any; @node << Punctuation::Any.new(token, active_opts)
|
260
|
+
when :punct_connector; @node << Punctuation::Connector.new(token, active_opts)
|
261
|
+
when :punct_dash; @node << Punctuation::Dash.new(token, active_opts)
|
262
|
+
when :punct_open; @node << Punctuation::Open.new(token, active_opts)
|
263
|
+
when :punct_close; @node << Punctuation::Close.new(token, active_opts)
|
264
|
+
when :punct_initial; @node << Punctuation::Initial.new(token, active_opts)
|
265
|
+
when :punct_final; @node << Punctuation::Final.new(token, active_opts)
|
266
|
+
when :punct_other; @node << Punctuation::Other.new(token, active_opts)
|
267
|
+
|
268
|
+
when :separator_any; @node << Separator::Any.new(token, active_opts)
|
269
|
+
when :separator_space; @node << Separator::Space.new(token, active_opts)
|
270
|
+
when :separator_line; @node << Separator::Line.new(token, active_opts)
|
271
|
+
when :separator_para; @node << Separator::Paragraph.new(token, active_opts)
|
272
|
+
|
273
|
+
when :symbol_any; @node << Symbol::Any.new(token, active_opts)
|
274
|
+
when :symbol_math; @node << Symbol::Math.new(token, active_opts)
|
275
|
+
when :symbol_currency; @node << Symbol::Currency.new(token, active_opts)
|
276
|
+
when :symbol_modifier; @node << Symbol::Modifier.new(token, active_opts)
|
277
|
+
when :symbol_other; @node << Symbol::Other.new(token, active_opts)
|
278
|
+
|
279
|
+
when :other; @node << Codepoint::Any.new(token, active_opts)
|
280
|
+
when :control; @node << Codepoint::Control.new(token, active_opts)
|
281
|
+
when :format; @node << Codepoint::Format.new(token, active_opts)
|
282
|
+
when :surrogate; @node << Codepoint::Surrogate.new(token, active_opts)
|
283
|
+
when :private_use; @node << Codepoint::PrivateUse.new(token, active_opts)
|
284
|
+
when :unassigned; @node << Codepoint::Unassigned.new(token, active_opts)
|
267
285
|
|
268
286
|
when *Token::UnicodeProperty::Age
|
269
|
-
@node << Age.new(token)
|
287
|
+
@node << Age.new(token, active_opts)
|
270
288
|
|
271
289
|
when *Token::UnicodeProperty::Derived
|
272
|
-
@node << Derived.new(token)
|
290
|
+
@node << Derived.new(token, active_opts)
|
273
291
|
|
274
292
|
when *Regexp::Syntax::Token::UnicodeProperty::Script
|
275
|
-
@node << Script.new(token)
|
293
|
+
@node << Script.new(token, active_opts)
|
276
294
|
|
277
295
|
when *Regexp::Syntax::Token::UnicodeProperty::UnicodeBlock
|
278
|
-
@node << Block.new(token)
|
296
|
+
@node << Block.new(token, active_opts)
|
279
297
|
|
280
298
|
else
|
281
299
|
raise UnknownTokenError.new('UnicodeProperty', token)
|
282
300
|
end
|
283
301
|
end
|
284
302
|
|
285
|
-
def
|
303
|
+
def anchor(token)
|
286
304
|
case token.token
|
287
305
|
when :bol
|
288
|
-
@node << Anchor::BeginningOfLine.new(token)
|
306
|
+
@node << Anchor::BeginningOfLine.new(token, active_opts)
|
289
307
|
when :eol
|
290
|
-
@node << Anchor::EndOfLine.new(token)
|
308
|
+
@node << Anchor::EndOfLine.new(token, active_opts)
|
291
309
|
when :bos
|
292
|
-
@node << Anchor::BOS.new(token)
|
310
|
+
@node << Anchor::BOS.new(token, active_opts)
|
293
311
|
when :eos
|
294
|
-
@node << Anchor::EOS.new(token)
|
312
|
+
@node << Anchor::EOS.new(token, active_opts)
|
295
313
|
when :eos_ob_eol
|
296
|
-
@node << Anchor::EOSobEOL.new(token)
|
314
|
+
@node << Anchor::EOSobEOL.new(token, active_opts)
|
297
315
|
when :word_boundary
|
298
|
-
@node << Anchor::WordBoundary.new(token)
|
316
|
+
@node << Anchor::WordBoundary.new(token, active_opts)
|
299
317
|
when :nonword_boundary
|
300
|
-
@node << Anchor::NonWordBoundary.new(token)
|
318
|
+
@node << Anchor::NonWordBoundary.new(token, active_opts)
|
301
319
|
when :match_start
|
302
|
-
@node << Anchor::MatchStart.new(token)
|
320
|
+
@node << Anchor::MatchStart.new(token, active_opts)
|
303
321
|
else
|
304
322
|
raise UnknownTokenError.new('Anchor', token)
|
305
323
|
end
|
306
324
|
end
|
307
325
|
|
308
|
-
def
|
326
|
+
def escape(token)
|
309
327
|
case token.token
|
310
328
|
|
311
329
|
when :backspace
|
312
|
-
@node << EscapeSequence::Backspace.new(token)
|
330
|
+
@node << EscapeSequence::Backspace.new(token, active_opts)
|
313
331
|
|
314
332
|
when :escape
|
315
|
-
@node << EscapeSequence::AsciiEscape.new(token)
|
333
|
+
@node << EscapeSequence::AsciiEscape.new(token, active_opts)
|
316
334
|
when :bell
|
317
|
-
@node << EscapeSequence::Bell.new(token)
|
335
|
+
@node << EscapeSequence::Bell.new(token, active_opts)
|
318
336
|
when :form_feed
|
319
|
-
@node << EscapeSequence::FormFeed.new(token)
|
337
|
+
@node << EscapeSequence::FormFeed.new(token, active_opts)
|
320
338
|
when :newline
|
321
|
-
@node << EscapeSequence::Newline.new(token)
|
339
|
+
@node << EscapeSequence::Newline.new(token, active_opts)
|
322
340
|
when :carriage
|
323
|
-
@node << EscapeSequence::Return.new(token)
|
341
|
+
@node << EscapeSequence::Return.new(token, active_opts)
|
324
342
|
when :space
|
325
|
-
@node << EscapeSequence::Space.new(token)
|
343
|
+
@node << EscapeSequence::Space.new(token, active_opts)
|
326
344
|
when :tab
|
327
|
-
@node << EscapeSequence::Tab.new(token)
|
345
|
+
@node << EscapeSequence::Tab.new(token, active_opts)
|
328
346
|
when :vertical_tab
|
329
|
-
@node << EscapeSequence::VerticalTab.new(token)
|
347
|
+
@node << EscapeSequence::VerticalTab.new(token, active_opts)
|
330
348
|
|
331
349
|
when :control
|
332
350
|
if token.text =~ /\A(?:\\C-\\M|\\c\\M)/
|
333
|
-
@node << EscapeSequence::MetaControl.new(token)
|
351
|
+
@node << EscapeSequence::MetaControl.new(token, active_opts)
|
334
352
|
else
|
335
|
-
@node << EscapeSequence::Control.new(token)
|
353
|
+
@node << EscapeSequence::Control.new(token, active_opts)
|
336
354
|
end
|
337
355
|
|
338
356
|
when :meta_sequence
|
339
357
|
if token.text =~ /\A\\M-\\[Cc]/
|
340
|
-
@node << EscapeSequence::MetaControl.new(token)
|
358
|
+
@node << EscapeSequence::MetaControl.new(token, active_opts)
|
341
359
|
else
|
342
|
-
@node << EscapeSequence::Meta.new(token)
|
360
|
+
@node << EscapeSequence::Meta.new(token, active_opts)
|
343
361
|
end
|
344
362
|
|
345
363
|
else
|
346
364
|
# treating everything else as a literal
|
347
|
-
@node << EscapeSequence::Literal.new(token)
|
365
|
+
@node << EscapeSequence::Literal.new(token, active_opts)
|
348
366
|
end
|
349
367
|
end
|
350
368
|
|
351
|
-
|
352
|
-
|
353
|
-
@node << Keep::Mark.new(token)
|
369
|
+
def keep(token)
|
370
|
+
@node << Keep::Mark.new(token, active_opts)
|
354
371
|
end
|
355
372
|
|
356
|
-
def
|
373
|
+
def free_space(token)
|
357
374
|
case token.token
|
358
375
|
when :comment
|
359
|
-
@node << Comment.new(token)
|
376
|
+
@node << Comment.new(token, active_opts)
|
360
377
|
when :whitespace
|
361
378
|
if @node.last and @node.last.is_a?(WhiteSpace)
|
362
|
-
@node.last.merge(WhiteSpace.new(token))
|
379
|
+
@node.last.merge(WhiteSpace.new(token, active_opts))
|
363
380
|
else
|
364
|
-
@node << WhiteSpace.new(token)
|
381
|
+
@node << WhiteSpace.new(token, active_opts)
|
365
382
|
end
|
366
383
|
else
|
367
384
|
raise UnknownTokenError.new('FreeSpace', token)
|
368
385
|
end
|
369
386
|
end
|
370
387
|
|
371
|
-
def
|
388
|
+
def quantifier(token)
|
372
389
|
offset = -1
|
373
390
|
target_node = @node.expressions[offset]
|
374
391
|
while target_node and target_node.is_a?(FreeSpace)
|
@@ -378,15 +395,6 @@ module Regexp::Parser
|
|
378
395
|
raise ArgumentError.new("No valid target found for '#{token.text}' "+
|
379
396
|
"quantifier") unless target_node
|
380
397
|
|
381
|
-
unless target_node
|
382
|
-
if token.token == :zero_or_one
|
383
|
-
raise "Quantifier given without a target, or the syntax of the group " +
|
384
|
-
"or its options is incorrect"
|
385
|
-
else
|
386
|
-
raise "Quantifier `#{token.text}' given without a target"
|
387
|
-
end
|
388
|
-
end
|
389
|
-
|
390
398
|
case token.token
|
391
399
|
when :zero_or_one
|
392
400
|
target_node.quantify(:zero_or_one, token.text, 0, 1, :greedy)
|
@@ -417,7 +425,7 @@ module Regexp::Parser
|
|
417
425
|
end
|
418
426
|
end
|
419
427
|
|
420
|
-
def
|
428
|
+
def interval(target_node, token)
|
421
429
|
text = token.text
|
422
430
|
mchr = text[text.length-1].chr =~ /[?+]/ ? text[text.length-1].chr : nil
|
423
431
|
case mchr
|
@@ -439,91 +447,113 @@ module Regexp::Parser
|
|
439
447
|
target_node.quantify(:interval, text, min.to_i, max.to_i, mode)
|
440
448
|
end
|
441
449
|
|
442
|
-
def
|
450
|
+
def group(token)
|
443
451
|
case token.token
|
444
452
|
when :options
|
445
|
-
|
453
|
+
options_group(token)
|
446
454
|
when :close
|
447
455
|
close_group
|
448
456
|
when :comment
|
449
|
-
@node << Group::Comment.new(token)
|
457
|
+
@node << Group::Comment.new(token, active_opts)
|
450
458
|
else
|
451
459
|
open_group(token)
|
452
460
|
end
|
453
461
|
end
|
454
462
|
|
455
|
-
def
|
456
|
-
|
463
|
+
def options_group(token)
|
464
|
+
positive, negative = token.text.split('-', 2)
|
465
|
+
negative ||= ''
|
466
|
+
@switching_options = !token.text.include?(':')
|
467
|
+
# TODO: change this -^ to token.type == :options_switch in v1.0.0
|
468
|
+
|
469
|
+
new_options = active_opts.dup
|
457
470
|
|
458
|
-
|
459
|
-
|
460
|
-
|
461
|
-
|
462
|
-
|
463
|
-
|
464
|
-
|
465
|
-
|
466
|
-
|
471
|
+
# Negative options have precedence. E.g. /(?i-i)a/ is case-sensitive.
|
472
|
+
%w[i m x].each do |flag|
|
473
|
+
new_options[flag.to_sym] = true if positive.include?(flag)
|
474
|
+
new_options.delete(flag.to_sym) if negative.include?(flag)
|
475
|
+
end
|
476
|
+
|
477
|
+
# Any encoding flag overrides all previous encoding flags. If there are
|
478
|
+
# multiple encoding flags in an options string, the last one wins.
|
479
|
+
# E.g. /(?dau)\w/ matches UTF8 chars but /(?dua)\w/ only ASCII chars.
|
480
|
+
if (flag = positive.reverse[/[adu]/])
|
481
|
+
%w[a d u].each { |key| new_options.delete(key.to_sym) }
|
482
|
+
new_options[flag.to_sym] = true
|
483
|
+
end
|
484
|
+
|
485
|
+
@options_stack << new_options
|
486
|
+
|
487
|
+
exp = Group::Options.new(token, active_opts)
|
467
488
|
|
468
489
|
nest(exp)
|
469
490
|
end
|
470
491
|
|
471
|
-
def
|
492
|
+
def open_group(token)
|
472
493
|
case token.token
|
473
494
|
when :passive
|
474
|
-
exp = Group::Passive.new(token)
|
495
|
+
exp = Group::Passive.new(token, active_opts)
|
475
496
|
when :atomic
|
476
|
-
exp = Group::Atomic.new(token)
|
497
|
+
exp = Group::Atomic.new(token, active_opts)
|
477
498
|
when :named
|
478
|
-
exp = Group::Named.new(token)
|
499
|
+
exp = Group::Named.new(token, active_opts)
|
479
500
|
when :capture
|
480
|
-
exp = Group::Capture.new(token)
|
501
|
+
exp = Group::Capture.new(token, active_opts)
|
481
502
|
when :absence
|
482
|
-
exp = Group::Absence.new(token)
|
503
|
+
exp = Group::Absence.new(token, active_opts)
|
483
504
|
|
484
505
|
when :lookahead
|
485
|
-
exp = Assertion::Lookahead.new(token)
|
506
|
+
exp = Assertion::Lookahead.new(token, active_opts)
|
486
507
|
when :nlookahead
|
487
|
-
exp = Assertion::NegativeLookahead.new(token)
|
508
|
+
exp = Assertion::NegativeLookahead.new(token, active_opts)
|
488
509
|
when :lookbehind
|
489
|
-
exp = Assertion::Lookbehind.new(token)
|
510
|
+
exp = Assertion::Lookbehind.new(token, active_opts)
|
490
511
|
when :nlookbehind
|
491
|
-
exp = Assertion::NegativeLookbehind.new(token)
|
512
|
+
exp = Assertion::NegativeLookbehind.new(token, active_opts)
|
492
513
|
|
493
514
|
else
|
494
515
|
raise UnknownTokenError.new('Group type open', token)
|
495
516
|
end
|
496
517
|
|
518
|
+
# Push the active options to the stack again. This way we can simply pop the
|
519
|
+
# stack for any group we close, no matter if it had its own options or not.
|
520
|
+
@options_stack << active_opts
|
521
|
+
|
497
522
|
nest(exp)
|
498
523
|
end
|
499
524
|
|
500
|
-
def
|
525
|
+
def close_group
|
501
526
|
@nesting.pop
|
527
|
+
@options_stack.pop unless @switching_options
|
528
|
+
@switching_options = false
|
502
529
|
|
503
530
|
@node = @nesting.last
|
504
531
|
@node = @node.last if @node.last and @node.last.is_a?(Alternation)
|
505
532
|
end
|
506
533
|
|
507
|
-
def
|
534
|
+
def open_set(token)
|
508
535
|
token.token = :character
|
509
536
|
|
510
537
|
if token.type == :subset
|
511
|
-
@set << CharacterSubSet.new(token)
|
538
|
+
@set << CharacterSubSet.new(token, active_opts)
|
512
539
|
else
|
513
|
-
@node << (@set = CharacterSet.new(token))
|
540
|
+
@node << (@set = CharacterSet.new(token, active_opts))
|
514
541
|
end
|
515
542
|
end
|
516
543
|
|
517
|
-
def
|
544
|
+
def negate_set
|
518
545
|
@set.negate
|
519
546
|
end
|
520
547
|
|
521
|
-
def
|
548
|
+
def append_set(token)
|
522
549
|
@set << token.text
|
523
550
|
end
|
524
551
|
|
525
|
-
def
|
552
|
+
def close_set(token)
|
526
553
|
@set.close
|
527
554
|
end
|
528
555
|
|
556
|
+
def active_opts
|
557
|
+
@options_stack.last
|
558
|
+
end
|
529
559
|
end # module Regexp::Parser
|