regexp_parser 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- data/ChangeLog +4 -0
- data/LICENSE +22 -0
- data/README.rdoc +307 -0
- data/Rakefile +91 -0
- data/lib/regexp_parser/ctype.rb +48 -0
- data/lib/regexp_parser/expression/property.rb +108 -0
- data/lib/regexp_parser/expression/set.rb +59 -0
- data/lib/regexp_parser/expression.rb +287 -0
- data/lib/regexp_parser/lexer.rb +105 -0
- data/lib/regexp_parser/parser.rb +417 -0
- data/lib/regexp_parser/scanner/property.rl +534 -0
- data/lib/regexp_parser/scanner/scanner.rl +712 -0
- data/lib/regexp_parser/scanner.rb +3325 -0
- data/lib/regexp_parser/syntax/ruby/1.8.6.rb +14 -0
- data/lib/regexp_parser/syntax/ruby/1.8.7.rb +14 -0
- data/lib/regexp_parser/syntax/ruby/1.8.rb +39 -0
- data/lib/regexp_parser/syntax/ruby/1.9.1.rb +39 -0
- data/lib/regexp_parser/syntax/ruby/1.9.2.rb +10 -0
- data/lib/regexp_parser/syntax/ruby/1.9.3.rb +24 -0
- data/lib/regexp_parser/syntax/ruby/1.9.rb +8 -0
- data/lib/regexp_parser/syntax/tokens.rb +332 -0
- data/lib/regexp_parser/syntax.rb +172 -0
- data/lib/regexp_parser.rb +45 -0
- data/test/helpers.rb +8 -0
- data/test/lexer/test_all.rb +26 -0
- data/test/lexer/test_literals.rb +120 -0
- data/test/lexer/test_nesting.rb +107 -0
- data/test/lexer/test_refcalls.rb +45 -0
- data/test/parser/test_all.rb +44 -0
- data/test/parser/test_alternation.rb +46 -0
- data/test/parser/test_anchors.rb +35 -0
- data/test/parser/test_errors.rb +59 -0
- data/test/parser/test_escapes.rb +48 -0
- data/test/parser/test_expression.rb +51 -0
- data/test/parser/test_groups.rb +69 -0
- data/test/parser/test_properties.rb +346 -0
- data/test/parser/test_quantifiers.rb +236 -0
- data/test/parser/test_refcalls.rb +101 -0
- data/test/parser/test_sets.rb +99 -0
- data/test/scanner/test_all.rb +30 -0
- data/test/scanner/test_anchors.rb +35 -0
- data/test/scanner/test_errors.rb +36 -0
- data/test/scanner/test_escapes.rb +49 -0
- data/test/scanner/test_groups.rb +41 -0
- data/test/scanner/test_literals.rb +85 -0
- data/test/scanner/test_meta.rb +36 -0
- data/test/scanner/test_properties.rb +315 -0
- data/test/scanner/test_quantifiers.rb +38 -0
- data/test/scanner/test_refcalls.rb +45 -0
- data/test/scanner/test_scripts.rb +314 -0
- data/test/scanner/test_sets.rb +80 -0
- data/test/scanner/test_types.rb +30 -0
- data/test/syntax/ruby/test_1.8.rb +57 -0
- data/test/syntax/ruby/test_1.9.1.rb +39 -0
- data/test/syntax/ruby/test_1.9.3.rb +38 -0
- data/test/syntax/ruby/test_all.rb +12 -0
- data/test/syntax/test_all.rb +19 -0
- data/test/test_all.rb +4 -0
- metadata +160 -0
@@ -0,0 +1,105 @@
|
|
1
|
+
# A very thin wrapper around the scanner that breaks quantified literal runs,
|
2
|
+
# collects emitted tokens into an array, calculates their nesting depth, and
|
3
|
+
# normalizes tokens for the parser, and checks if they are implemented by the
|
4
|
+
# given syntax flavor.
|
5
|
+
module Regexp::Lexer
|
6
|
+
|
7
|
+
OPENING_TOKENS = [:capture, :options, :passive, :atomic, :named,
|
8
|
+
:lookahead, :nlookahead, :lookbehind, :nlookbehind
|
9
|
+
].freeze
|
10
|
+
|
11
|
+
CLOSING_TOKENS = [:close].freeze
|
12
|
+
|
13
|
+
def self.scan(input, syntax = 'ruby/1.9', &block)
|
14
|
+
syntax = Regexp::Syntax.new(syntax)
|
15
|
+
|
16
|
+
@tokens = []
|
17
|
+
@nesting, @set_nesting = 0, 0
|
18
|
+
|
19
|
+
last = nil
|
20
|
+
Regexp::Scanner.scan(input) do |type, token, text, ts, te|
|
21
|
+
type, token = *syntax.normalize(type, token)
|
22
|
+
syntax.check! type, token
|
23
|
+
|
24
|
+
self.ascend(type, token)
|
25
|
+
|
26
|
+
self.break_literal(last) if type == :quantifier and
|
27
|
+
last and last.type == :literal
|
28
|
+
|
29
|
+
current = Regexp::Token.new(type, token, text, ts, te,
|
30
|
+
@nesting, @set_nesting)
|
31
|
+
|
32
|
+
current = self.merge_literal(current) if type == :literal and
|
33
|
+
last and last.type == :literal
|
34
|
+
|
35
|
+
last.next(current) if last
|
36
|
+
current.previous(last) if last
|
37
|
+
|
38
|
+
@tokens << current
|
39
|
+
last = current
|
40
|
+
|
41
|
+
self.descend(type, token)
|
42
|
+
end
|
43
|
+
|
44
|
+
if block_given?
|
45
|
+
@tokens.each {|t| block.call(t)}
|
46
|
+
else
|
47
|
+
@tokens
|
48
|
+
end
|
49
|
+
end
|
50
|
+
|
51
|
+
def self.ascend(type, token)
|
52
|
+
if type == :group or type == :assertion
|
53
|
+
@nesting -= 1 if CLOSING_TOKENS.include?(token)
|
54
|
+
end
|
55
|
+
|
56
|
+
if type == :set or type == :subset
|
57
|
+
@set_nesting -= 1 if token == :close
|
58
|
+
end
|
59
|
+
end
|
60
|
+
|
61
|
+
def self.descend(type, token)
|
62
|
+
if type == :group or type == :assertion
|
63
|
+
@nesting += 1 if OPENING_TOKENS.include?(token)
|
64
|
+
end
|
65
|
+
|
66
|
+
if type == :set or type == :subset
|
67
|
+
@set_nesting += 1 if token == :open
|
68
|
+
end
|
69
|
+
end
|
70
|
+
|
71
|
+
# called by scan to break a literal run that is longer than one character
|
72
|
+
# into two separate tokens when it is followed by a quantifier
|
73
|
+
def self.break_literal(token)
|
74
|
+
text = token.text
|
75
|
+
if text.scan(/./mu).length > 1
|
76
|
+
lead = text.sub(/.\z/mu, "")
|
77
|
+
last = text[/.\z/mu] || ''
|
78
|
+
|
79
|
+
if RUBY_VERSION >= '1.9'
|
80
|
+
lead_length = lead.bytesize
|
81
|
+
last_length = last.bytesize
|
82
|
+
else
|
83
|
+
lead_length = lead.length
|
84
|
+
last_length = last.length
|
85
|
+
end
|
86
|
+
|
87
|
+
@tokens.pop
|
88
|
+
@tokens << Regexp::Token.new(:literal, :literal, lead, token.ts,
|
89
|
+
(token.te - last_length), @nesting, @set_nesting)
|
90
|
+
|
91
|
+
@tokens << Regexp::Token.new(:literal, :literal, last,
|
92
|
+
(token.ts + lead_length),
|
93
|
+
token.te, @nesting, @set_nesting)
|
94
|
+
end
|
95
|
+
end
|
96
|
+
|
97
|
+
# called by scan to merge two consecutive literals. this happens when tokens
|
98
|
+
# get normalized (as in the case of posix/bre) and end up becoming literals.
|
99
|
+
def self.merge_literal(current)
|
100
|
+
last = @tokens.pop
|
101
|
+
replace = Regexp::Token.new(:literal, :literal, last.text + current.text,
|
102
|
+
last.ts, current.te, @nesting, @set_nesting)
|
103
|
+
end
|
104
|
+
|
105
|
+
end # module Regexp::Lexer
|
@@ -0,0 +1,417 @@
|
|
1
|
+
require File.expand_path('../expression', __FILE__)
|
2
|
+
|
3
|
+
module Regexp::Parser
|
4
|
+
include Regexp::Expression
|
5
|
+
include Regexp::Syntax
|
6
|
+
|
7
|
+
class ParserError < StandardError
|
8
|
+
def initialize(what)
|
9
|
+
super what
|
10
|
+
end
|
11
|
+
end
|
12
|
+
|
13
|
+
class UnknownTokenTypeError < ParserError
|
14
|
+
def initialize(type, token)
|
15
|
+
super "Unknown #{type} type #{token.inspect}"
|
16
|
+
end
|
17
|
+
end
|
18
|
+
|
19
|
+
class UnknownTokenError < ParserError
|
20
|
+
def initialize(type, token)
|
21
|
+
super "Unknown #{type} token #{token.token}"
|
22
|
+
end
|
23
|
+
end
|
24
|
+
|
25
|
+
def self.parse(input, syntax = :any, &block)
|
26
|
+
@nesting = [@root = @node = Root.new]
|
27
|
+
|
28
|
+
Regexp::Lexer.scan(input, syntax) do |token|
|
29
|
+
self.parse_token token
|
30
|
+
end
|
31
|
+
|
32
|
+
if block_given?
|
33
|
+
block.call @root
|
34
|
+
else
|
35
|
+
@root
|
36
|
+
end
|
37
|
+
end
|
38
|
+
|
39
|
+
def self.nest(exp)
|
40
|
+
@nesting.push exp
|
41
|
+
|
42
|
+
@node << exp
|
43
|
+
@node = exp
|
44
|
+
end
|
45
|
+
|
46
|
+
def self.parse_token(token)
|
47
|
+
case token.type
|
48
|
+
when :meta; self.meta(token)
|
49
|
+
when :quantifier; self.quantifier(token)
|
50
|
+
when :anchor; self.anchor(token)
|
51
|
+
when :escape; self.escape(token)
|
52
|
+
when :group; self.group(token)
|
53
|
+
when :assertion; self.group(token)
|
54
|
+
when :set, :subset; self.set(token)
|
55
|
+
when :type; self.type(token)
|
56
|
+
when :backref; self.backref(token)
|
57
|
+
|
58
|
+
when :property, :nonproperty
|
59
|
+
self.property(token)
|
60
|
+
|
61
|
+
when :literal
|
62
|
+
@node << Literal.new(token)
|
63
|
+
|
64
|
+
else
|
65
|
+
raise UnknownTokenTypeError.new(token.type, token)
|
66
|
+
end
|
67
|
+
end
|
68
|
+
|
69
|
+
def self.set(token)
|
70
|
+
case token.token
|
71
|
+
when :open
|
72
|
+
self.open_set(token)
|
73
|
+
when :close
|
74
|
+
self.close_set
|
75
|
+
when :negate
|
76
|
+
self.negate_set
|
77
|
+
when :member, :range, :escape, :collation, :equivalent
|
78
|
+
self.append_set(token)
|
79
|
+
when *Token::Escape::All
|
80
|
+
self.append_set(token)
|
81
|
+
when *Token::CharacterSet::All
|
82
|
+
self.append_set(token)
|
83
|
+
when *Token::UnicodeProperty::All
|
84
|
+
self.append_set(token)
|
85
|
+
else
|
86
|
+
raise UnknownTokenError.new('CharacterSet', token)
|
87
|
+
end
|
88
|
+
end
|
89
|
+
|
90
|
+
def self.meta(token)
|
91
|
+
case token.token
|
92
|
+
when :dot
|
93
|
+
@node << CharacterType::Any.new(token)
|
94
|
+
when :alternation
|
95
|
+
unless @node.token == :alternation
|
96
|
+
alt = Alternation.new(token)
|
97
|
+
seq = Sequence.new
|
98
|
+
while @node.expressions.last
|
99
|
+
seq.insert @node.expressions.pop
|
100
|
+
end
|
101
|
+
alt.alternative(seq)
|
102
|
+
|
103
|
+
@node << alt
|
104
|
+
@node = alt
|
105
|
+
@node.alternative
|
106
|
+
else
|
107
|
+
@node.alternative
|
108
|
+
end
|
109
|
+
else
|
110
|
+
raise UnknownTokenError.new('Meta', token)
|
111
|
+
end
|
112
|
+
end
|
113
|
+
|
114
|
+
def self.backref(token)
|
115
|
+
case token.token
|
116
|
+
when :name_ref
|
117
|
+
@node << Backreference::Name.new(token)
|
118
|
+
when :name_nest_ref
|
119
|
+
@node << Backreference::NameNestLevel.new(token)
|
120
|
+
when :name_call
|
121
|
+
@node << Backreference::NameCall.new(token)
|
122
|
+
when :number, :number_ref
|
123
|
+
@node << Backreference::Number.new(token)
|
124
|
+
when :number_rel_ref
|
125
|
+
@node << Backreference::NumberRelative.new(token)
|
126
|
+
when :number_nest_ref
|
127
|
+
@node << Backreference::NumberNestLevel.new(token)
|
128
|
+
when :number_call
|
129
|
+
@node << Backreference::NumberCall.new(token)
|
130
|
+
when :number_rel_call
|
131
|
+
@node << Backreference::NumberCallRelative.new(token)
|
132
|
+
else
|
133
|
+
raise UnknownTokenError.new('Backreference', token)
|
134
|
+
end
|
135
|
+
end
|
136
|
+
|
137
|
+
def self.type(token)
|
138
|
+
case token.token
|
139
|
+
when :digit
|
140
|
+
@node << CharacterType::Digit.new(token)
|
141
|
+
when :nondigit
|
142
|
+
@node << CharacterType::NonDigit.new(token)
|
143
|
+
when :hex
|
144
|
+
@node << CharacterType::Hex.new(token)
|
145
|
+
when :nonhex
|
146
|
+
@node << CharacterType::NonHex.new(token)
|
147
|
+
when :space
|
148
|
+
@node << CharacterType::Space.new(token)
|
149
|
+
when :nonspace
|
150
|
+
@node << CharacterType::NonSpace.new(token)
|
151
|
+
when :word
|
152
|
+
@node << CharacterType::Word.new(token)
|
153
|
+
when :nonword
|
154
|
+
@node << CharacterType::NonWord.new(token)
|
155
|
+
else
|
156
|
+
raise UnknownTokenError.new('CharacterType', token)
|
157
|
+
end
|
158
|
+
end
|
159
|
+
|
160
|
+
def self.property(token)
|
161
|
+
include Regexp::Expression::UnicodeProperty
|
162
|
+
|
163
|
+
case token.token
|
164
|
+
when :alnum; @node << Alnum.new(token)
|
165
|
+
when :alpha; @node << Alpha.new(token)
|
166
|
+
when :any; @node << Any.new(token)
|
167
|
+
when :ascii; @node << Ascii.new(token)
|
168
|
+
when :blank; @node << Blank.new(token)
|
169
|
+
when :cntrl; @node << Cntrl.new(token)
|
170
|
+
when :digit; @node << Digit.new(token)
|
171
|
+
when :graph; @node << Graph.new(token)
|
172
|
+
when :lower; @node << Lower.new(token)
|
173
|
+
when :print; @node << Print.new(token)
|
174
|
+
when :punct; @node << Punct.new(token)
|
175
|
+
when :space; @node << Space.new(token)
|
176
|
+
when :upper; @node << Upper.new(token)
|
177
|
+
when :word; @node << Word.new(token)
|
178
|
+
when :xdigit; @node << Xdigit.new(token)
|
179
|
+
when :newline; @node << Newline.new(token)
|
180
|
+
|
181
|
+
when :letter_any; @node << Letter::Any.new(token)
|
182
|
+
when :letter_uppercase; @node << Letter::Uppercase.new(token)
|
183
|
+
when :letter_lowercase; @node << Letter::Lowercase.new(token)
|
184
|
+
when :letter_titlecase; @node << Letter::Titlecase.new(token)
|
185
|
+
when :letter_modifier; @node << Letter::Modifier.new(token)
|
186
|
+
when :letter_other; @node << Letter::Other.new(token)
|
187
|
+
|
188
|
+
when :mark_any; @node << Mark::Any.new(token)
|
189
|
+
when :mark_nonspacing; @node << Mark::Nonspacing.new(token)
|
190
|
+
when :mark_spacing; @node << Mark::Spacing.new(token)
|
191
|
+
when :mark_enclosing; @node << Mark::Enclosing.new(token)
|
192
|
+
|
193
|
+
when :number_any; @node << Number::Any.new(token)
|
194
|
+
when :number_decimal; @node << Number::Decimal.new(token)
|
195
|
+
when :number_letter; @node << Number::Letter.new(token)
|
196
|
+
when :number_other; @node << Number::Other.new(token)
|
197
|
+
|
198
|
+
when :punct_any; @node << Punctuation::Any.new(token)
|
199
|
+
when :punct_connector; @node << Punctuation::Connector.new(token)
|
200
|
+
when :punct_dash; @node << Punctuation::Dash.new(token)
|
201
|
+
when :punct_open; @node << Punctuation::Open.new(token)
|
202
|
+
when :punct_close; @node << Punctuation::Close.new(token)
|
203
|
+
when :punct_initial; @node << Punctuation::Initial.new(token)
|
204
|
+
when :punct_final; @node << Punctuation::Final.new(token)
|
205
|
+
when :punct_other; @node << Punctuation::Other.new(token)
|
206
|
+
|
207
|
+
when :separator_any; @node << Separator::Any.new(token)
|
208
|
+
when :separator_space; @node << Separator::Space.new(token)
|
209
|
+
when :separator_line; @node << Separator::Line.new(token)
|
210
|
+
when :separator_para; @node << Separator::Paragraph.new(token)
|
211
|
+
|
212
|
+
when :symbol_any; @node << Symbol::Any.new(token)
|
213
|
+
when :symbol_math; @node << Symbol::Math.new(token)
|
214
|
+
when :symbol_currency; @node << Symbol::Currency.new(token)
|
215
|
+
when :symbol_modifier; @node << Symbol::Modifier.new(token)
|
216
|
+
when :symbol_other; @node << Symbol::Other.new(token)
|
217
|
+
|
218
|
+
when :other; @node << Codepoint::Any.new(token)
|
219
|
+
when :control; @node << Codepoint::Control.new(token)
|
220
|
+
when :format; @node << Codepoint::Format.new(token)
|
221
|
+
when :surrogate; @node << Codepoint::Surrogate.new(token)
|
222
|
+
when :private_use; @node << Codepoint::PrivateUse.new(token)
|
223
|
+
when :unassigned; @node << Codepoint::Unassigned.new(token)
|
224
|
+
|
225
|
+
when *Token::UnicodeProperty::Age
|
226
|
+
@node << Age.new(token)
|
227
|
+
|
228
|
+
when *Token::UnicodeProperty::Derived
|
229
|
+
@node << Derived.new(token)
|
230
|
+
|
231
|
+
when *Regexp::Syntax::Token::UnicodeProperty::Script
|
232
|
+
@node << Script.new(token)
|
233
|
+
|
234
|
+
else
|
235
|
+
raise UnknownTokenError.new('UnicodeProperty', token)
|
236
|
+
end
|
237
|
+
end
|
238
|
+
|
239
|
+
def self.anchor(token)
|
240
|
+
case token.token
|
241
|
+
when :beginning_of_line
|
242
|
+
@node << Anchor::BeginningOfLine.new(token)
|
243
|
+
when :end_of_line
|
244
|
+
@node << Anchor::EndOfLine.new(token)
|
245
|
+
when :bos
|
246
|
+
@node << Anchor::BOS.new(token)
|
247
|
+
when :eos
|
248
|
+
@node << Anchor::EOS.new(token)
|
249
|
+
when :eos_ob_eol
|
250
|
+
@node << Anchor::EOSobEOL.new(token)
|
251
|
+
when :word_boundary
|
252
|
+
@node << Anchor::WordBoundary.new(token)
|
253
|
+
when :nonword_boundary
|
254
|
+
@node << Anchor::NonWordBoundary.new(token)
|
255
|
+
when :match_start
|
256
|
+
@node << Anchor::MatchStart.new(token)
|
257
|
+
else
|
258
|
+
raise UnknownTokenError.new('Anchor', token)
|
259
|
+
end
|
260
|
+
end
|
261
|
+
|
262
|
+
def self.escape(token)
|
263
|
+
case token.token
|
264
|
+
|
265
|
+
when :backspace
|
266
|
+
@node << EscapeSequence::Backspace.new(token)
|
267
|
+
|
268
|
+
when :escape
|
269
|
+
@node << EscapeSequence::AsciiEscape.new(token)
|
270
|
+
when :bell
|
271
|
+
@node << EscapeSequence::Bell.new(token)
|
272
|
+
when :form_feed
|
273
|
+
@node << EscapeSequence::FormFeed.new(token)
|
274
|
+
when :newline
|
275
|
+
@node << EscapeSequence::Newline.new(token)
|
276
|
+
when :carriage
|
277
|
+
@node << EscapeSequence::Return.new(token)
|
278
|
+
when :space
|
279
|
+
@node << EscapeSequence::Space.new(token)
|
280
|
+
when :tab
|
281
|
+
@node << EscapeSequence::Tab.new(token)
|
282
|
+
when :vertical_tab
|
283
|
+
@node << EscapeSequence::VerticalTab.new(token)
|
284
|
+
|
285
|
+
when :control
|
286
|
+
@node << EscapeSequence::Control.new(token)
|
287
|
+
|
288
|
+
else
|
289
|
+
# treating everything else as a literal
|
290
|
+
@node << EscapeSequence::Literal.new(token)
|
291
|
+
end
|
292
|
+
end
|
293
|
+
|
294
|
+
def self.quantifier(token)
|
295
|
+
case token.token
|
296
|
+
when :zero_or_one
|
297
|
+
@node.expressions.last.quantify(:zero_or_one, token.text, 0, 1, :greedy)
|
298
|
+
when :zero_or_one_reluctant
|
299
|
+
@node.expressions.last.quantify(:zero_or_one, token.text, 0, 1, :reluctant)
|
300
|
+
when :zero_or_one_possessive
|
301
|
+
@node.expressions.last.quantify(:zero_or_one, token.text, 0, 1, :possessive)
|
302
|
+
|
303
|
+
when :zero_or_more
|
304
|
+
@node.expressions.last.quantify(:zero_or_more, token.text, 0, -1, :greedy)
|
305
|
+
when :zero_or_more_reluctant
|
306
|
+
@node.expressions.last.quantify(:zero_or_more, token.text, 0, -1, :reluctant)
|
307
|
+
when :zero_or_more_possessive
|
308
|
+
@node.expressions.last.quantify(:zero_or_more, token.text, 0, -1, :possessive)
|
309
|
+
|
310
|
+
when :one_or_more
|
311
|
+
@node.expressions.last.quantify(:one_or_more, token.text, 1, -1, :greedy)
|
312
|
+
when :one_or_more_reluctant
|
313
|
+
@node.expressions.last.quantify(:one_or_more, token.text, 1, -1, :reluctant)
|
314
|
+
when :one_or_more_possessive
|
315
|
+
@node.expressions.last.quantify(:one_or_more, token.text, 1, -1, :possessive)
|
316
|
+
|
317
|
+
when :interval
|
318
|
+
self.interval(token.text)
|
319
|
+
|
320
|
+
else
|
321
|
+
raise UnknownTokenError.new('Quantifier', token)
|
322
|
+
end
|
323
|
+
end
|
324
|
+
|
325
|
+
def self.interval(text)
|
326
|
+
mchr = text[text.length-1].chr =~ /[?+]/ ? text[text.length-1].chr : nil
|
327
|
+
mode = case mchr
|
328
|
+
when '?'; text.chop!; :reluctant
|
329
|
+
when '+'; text.chop!; :possessive
|
330
|
+
else :greedy
|
331
|
+
end
|
332
|
+
|
333
|
+
range = text.gsub(/\{|\}/, '').split(',', 2).each {|i| i.strip}
|
334
|
+
min = range[0].empty? ? 0 : range[0]
|
335
|
+
max = range[1] ? (range[1].empty? ? -1 : range[1]) : min
|
336
|
+
|
337
|
+
@node.expressions.last.quantify(:interval, text, min.to_i, max.to_i, mode)
|
338
|
+
end
|
339
|
+
|
340
|
+
def self.group(token)
|
341
|
+
case token.token
|
342
|
+
when :options
|
343
|
+
self.options(token)
|
344
|
+
when :close
|
345
|
+
self.close_group
|
346
|
+
when :comment
|
347
|
+
@node << Group::Comment.new(token)
|
348
|
+
else
|
349
|
+
self.open_group(token)
|
350
|
+
end
|
351
|
+
end
|
352
|
+
|
353
|
+
def self.options(token)
|
354
|
+
opt = token.text.split('-', 2)
|
355
|
+
|
356
|
+
exp = Group::Options.new(token)
|
357
|
+
exp.options = {
|
358
|
+
:m => opt[0].include?('m') ? true : false,
|
359
|
+
:i => opt[0].include?('i') ? true : false,
|
360
|
+
:x => opt[0].include?('x') ? true : false
|
361
|
+
}
|
362
|
+
|
363
|
+
self.nest exp
|
364
|
+
end
|
365
|
+
|
366
|
+
def self.open_group(token)
|
367
|
+
case token.token
|
368
|
+
when :passive
|
369
|
+
exp = Group::Passive.new(token)
|
370
|
+
when :atomic
|
371
|
+
exp = Group::Atomic.new(token)
|
372
|
+
when :named
|
373
|
+
exp = Group::Named.new(token)
|
374
|
+
when :capture
|
375
|
+
exp = Group::Capture.new(token)
|
376
|
+
|
377
|
+
when :lookahead
|
378
|
+
exp = Assertion::Lookahead.new(token)
|
379
|
+
when :nlookahead
|
380
|
+
exp = Assertion::NegativeLookahead.new(token)
|
381
|
+
when :lookbehind
|
382
|
+
exp = Assertion::Lookbehind.new(token)
|
383
|
+
when :nlookbehind
|
384
|
+
exp = Assertion::NegativeLookbehind.new(token)
|
385
|
+
|
386
|
+
else
|
387
|
+
raise UnknownTokenError.new('Group type open', token)
|
388
|
+
end
|
389
|
+
|
390
|
+
self.nest exp
|
391
|
+
end
|
392
|
+
|
393
|
+
def self.close_group
|
394
|
+
last_group = @nesting.pop
|
395
|
+
@node = @nesting.last
|
396
|
+
end
|
397
|
+
|
398
|
+
def self.open_set(token)
|
399
|
+
if token.type == :subset
|
400
|
+
@set << CharacterSubSet.new(token)
|
401
|
+
else
|
402
|
+
@node << (@set = CharacterSet.new(token))
|
403
|
+
end
|
404
|
+
end
|
405
|
+
|
406
|
+
def self.negate_set
|
407
|
+
@set.negate
|
408
|
+
end
|
409
|
+
|
410
|
+
def self.append_set(token)
|
411
|
+
@set << token.text
|
412
|
+
end
|
413
|
+
|
414
|
+
def self.close_set
|
415
|
+
end
|
416
|
+
|
417
|
+
end # module Regexp::Parser
|