regexp_parser 0.4.6 → 0.4.7

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 6bc36d64eb84ebef5287ca3c866c834339b6b563
4
- data.tar.gz: 59db2b321120f59697d3ce6e0c612dd745a41ffc
3
+ metadata.gz: de01aa2d195d95dd0bee1afd232f85195562a8bf
4
+ data.tar.gz: b41cb58a4e07d681da7c16473f4f03d96a792ff9
5
5
  SHA512:
6
- metadata.gz: fcf1c339c360217fbb2a1c4cedfd2eeeca199a52983fe706c7d661141a93b3793e1de0901d6c935b61dad30a78c4ad7f510bfec6abdf765f2c6e6a6edb3adefa
7
- data.tar.gz: d0fc3eb8fd70a252d60ed25f7472ad36e76d379d5660f5d9d28735b9faa7caf1e15bc69468849e91d20832e3249a582c53ce52b67c5a01e50b2f6325dcf1063e
6
+ metadata.gz: 342d6218d5553f2f2f6975f202cf650cd74c9128379348526981d829188b38836dfd88bc46b8476212d4b10aeb628479109baff075e1297c4cf69aaa4fe8ff03
7
+ data.tar.gz: 80883d05ff9bb3f5f9f296aeeb5eabde013a362c6d6c82f4eeab87438050f93fea2224f407b170b65ff55d175937c55f56d49077e0ad43b0a55832989e221544
data/ChangeLog CHANGED
@@ -1,3 +1,16 @@
1
+ Sun Oct 15 2017 Janosch Müller <janosch84@gmail.com>
2
+
3
+ * Fixed a thread safety issue (issue #45)
4
+ * Some public class methods that were only reliable for
5
+ internal use are now private instance methods (PR #46)
6
+ * Improved the usefulness of Expression#options (issue #43) -
7
+ #options and derived methods such as #i?, #m? and #x? are now
8
+ defined for all Expressions that are affected by such flags.
9
+ * Fixed scanning of whitespace following (?x) (commit 5c94bd2)
10
+ * Fixed a Parser bug where the #number attribute of traditional
11
+ numerical backreferences was not set correctly (commit 851b620)
12
+ * Bumped version to 0.4.7
13
+
1
14
  Mon Sep 18 2017 Janosch Müller <janosch84@gmail.com>
2
15
 
3
16
  * Added Parser support for hex escapes in sets (PR #36)
data/README.md CHANGED
@@ -125,9 +125,10 @@ Regexp::Scanner.scan( /(cat?([bhm]at)){3,5}/ ).map {|token| token[2]}
125
125
 
126
126
  * If the input is a ruby **Regexp** object, the scanner calls #source on it to
127
127
  get its string representation. #source does not include the options of
128
- the expression (m, i, and x) To include the options in the scan, #to_s
129
- should be called on the **Regexp** before passing it to the scanner or any
130
- of the other modules.
128
+ the expression (m, i, and x). To include the options in the scan, #to_s
129
+ should be called on the **Regexp** before passing it to the scanner or the
130
+ lexer. For the parser, however, this is not necessary. It automatically
131
+ exposes the options of a passed **Regexp** in the returned root expression.
131
132
 
132
133
  * To keep the scanner simple(r) and fairly reusable for other purposes, it
133
134
  does not perform lexical analysis on the tokens, sticking to the task
@@ -8,7 +8,7 @@ module Regexp::Expression
8
8
  attr_accessor :quantifier
9
9
  attr_accessor :options
10
10
 
11
- def initialize(token)
11
+ def initialize(token, options = {})
12
12
  @type = token.type
13
13
  @token = token.token
14
14
  @text = token.text
@@ -17,7 +17,7 @@ module Regexp::Expression
17
17
  @set_level = token.set_level
18
18
  @conditional_level = token.conditional_level
19
19
  @quantifier = nil
20
- @options = nil
20
+ @options = options
21
21
  end
22
22
 
23
23
  def clone
@@ -95,35 +95,35 @@ module Regexp::Expression
95
95
  end
96
96
 
97
97
  def multiline?
98
- (@options and @options[:m]) ? true : false
98
+ @options[:m] == true
99
99
  end
100
100
  alias :m? :multiline?
101
101
 
102
102
  def case_insensitive?
103
- (@options and @options[:i]) ? true : false
103
+ @options[:i] == true
104
104
  end
105
105
  alias :i? :case_insensitive?
106
106
  alias :ignore_case? :case_insensitive?
107
107
 
108
108
  def free_spacing?
109
- (@options and @options[:x]) ? true : false
109
+ @options[:x] == true
110
110
  end
111
111
  alias :x? :free_spacing?
112
112
  alias :extended? :free_spacing?
113
113
 
114
114
  if RUBY_VERSION >= '2.0'
115
115
  def default_classes?
116
- (@options and @options[:d]) ? true : false
116
+ @options[:d] == true
117
117
  end
118
118
  alias :d? :default_classes?
119
119
 
120
120
  def ascii_classes?
121
- (@options and @options[:a]) ? true : false
121
+ @options[:a] == true
122
122
  end
123
123
  alias :a? :ascii_classes?
124
124
 
125
125
  def unicode_classes?
126
- (@options and @options[:u]) ? true : false
126
+ @options[:u] == true
127
127
  end
128
128
  alias :u? :unicode_classes?
129
129
  end
@@ -6,18 +6,18 @@ module Regexp::Expression
6
6
  class Name < Backreference::Base
7
7
  attr_reader :name
8
8
 
9
- def initialize(token)
9
+ def initialize(token, options = {})
10
10
  @name = token.text[3..-2]
11
- super(token)
11
+ super
12
12
  end
13
13
  end
14
14
 
15
15
  class Number < Backreference::Base
16
16
  attr_reader :number
17
17
 
18
- def initialize(token)
19
- @number = token.text[3..-2]
20
- super(token)
18
+ def initialize(token, options = {})
19
+ @number = token.text[token.token.equal?(:number) ? 1..-1 : 3..-2]
20
+ super
21
21
  end
22
22
  end
23
23
 
@@ -29,9 +29,9 @@ module Regexp::Expression
29
29
  class NameCall < Backreference::Base
30
30
  attr_reader :name
31
31
 
32
- def initialize(token)
32
+ def initialize(token, options = {})
33
33
  @name = token.text[3..-2]
34
- super(token)
34
+ super
35
35
  end
36
36
  end
37
37
 
@@ -11,8 +11,8 @@ module Regexp::Expression
11
11
  class Branch < Regexp::Expression::Sequence; end
12
12
 
13
13
  class Expression < Regexp::Expression::Subexpression
14
- def initialize(token)
15
- super(token)
14
+ def initialize(token, options = {})
15
+ super
16
16
 
17
17
  @condition = nil
18
18
  @branches = []
@@ -36,9 +36,9 @@ module Regexp::Expression
36
36
  class Named < Group::Capture
37
37
  attr_reader :name
38
38
 
39
- def initialize(token)
39
+ def initialize(token, options = {})
40
40
  @name = token.text[3..-2]
41
- super(token)
41
+ super
42
42
  end
43
43
 
44
44
  def clone
@@ -1,26 +1,12 @@
1
1
  module Regexp::Expression
2
2
 
3
3
  class Root < Regexp::Expression::Subexpression
4
- def initialize
5
- super Regexp::Token.new(:expression, :root, '', 0)
4
+ def initialize(options = {})
5
+ super(Regexp::Token.new(:expression, :root, '', 0), options)
6
6
  end
7
7
 
8
- def multiline?
9
- @expressions[0].m?
10
- end
11
- alias :m? :multiline?
12
-
13
- def case_insensitive?
14
- @expressions[0].i?
15
- end
16
- alias :i? :case_insensitive?
17
- alias :ignore_case? :case_insensitive?
18
-
19
- def free_spacing?
20
- @expressions[0].x?
21
- end
22
- alias :x? :free_spacing?
23
- alias :extended? :free_spacing?
8
+ alias ignore_case? case_insensitive?
9
+ alias extended? free_spacing?
24
10
  end
25
11
 
26
12
  end
@@ -3,7 +3,7 @@ module Regexp::Expression
3
3
  class CharacterSet < Regexp::Expression::Base
4
4
  attr_accessor :members
5
5
 
6
- def initialize(token)
6
+ def initialize(token, options = {})
7
7
  @members = []
8
8
  @negative = false
9
9
  @closed = false
@@ -3,8 +3,8 @@ module Regexp::Expression
3
3
  class Subexpression < Regexp::Expression::Base
4
4
  attr_accessor :expressions
5
5
 
6
- def initialize(token)
7
- super(token)
6
+ def initialize(token, options = {})
7
+ super
8
8
 
9
9
  @expressions = []
10
10
  end
@@ -2,7 +2,7 @@
2
2
  # collects emitted tokens into an array, calculates their nesting depth, and
3
3
  # normalizes tokens for the parser, and checks if they are implemented by the
4
4
  # given syntax flavor.
5
- module Regexp::Lexer
5
+ class Regexp::Lexer
6
6
 
7
7
  OPENING_TOKENS = [:capture, :options, :passive, :atomic, :named, :absence,
8
8
  :lookahead, :nlookahead, :lookbehind, :nlookbehind
@@ -11,6 +11,10 @@ module Regexp::Lexer
11
11
  CLOSING_TOKENS = [:close].freeze
12
12
 
13
13
  def self.lex(input, syntax = "ruby/#{RUBY_VERSION}", &block)
14
+ new.lex(input, syntax, &block)
15
+ end
16
+
17
+ def lex(input, syntax = "ruby/#{RUBY_VERSION}", &block)
14
18
  syntax = Regexp::Syntax.new(syntax)
15
19
 
16
20
  @tokens = []
@@ -57,7 +61,7 @@ module Regexp::Lexer
57
61
 
58
62
  protected
59
63
 
60
- def self.ascend(type, token)
64
+ def ascend(type, token)
61
65
  if type == :group or type == :assertion
62
66
  @nesting -= 1 if CLOSING_TOKENS.include?(token)
63
67
  end
@@ -71,7 +75,7 @@ module Regexp::Lexer
71
75
  end
72
76
  end
73
77
 
74
- def self.descend(type, token)
78
+ def descend(type, token)
75
79
  if type == :group or type == :assertion
76
80
  @nesting += 1 if OPENING_TOKENS.include?(token)
77
81
  end
@@ -87,7 +91,7 @@ module Regexp::Lexer
87
91
 
88
92
  # called by scan to break a literal run that is longer than one character
89
93
  # into two separate tokens when it is followed by a quantifier
90
- def self.break_literal(token)
94
+ def break_literal(token)
91
95
  text = token.text
92
96
  if text.scan(/./mu).length > 1
93
97
  lead = text.sub(/.\z/mu, "")
@@ -113,7 +117,7 @@ module Regexp::Lexer
113
117
 
114
118
  # called by scan to merge two consecutive literals. this happens when tokens
115
119
  # get normalized (as in the case of posix/bre) and end up becoming literals.
116
- def self.merge_literal(current)
120
+ def merge_literal(current)
117
121
  last = @tokens.pop
118
122
 
119
123
  Regexp::Token.new(
@@ -128,7 +132,7 @@ module Regexp::Lexer
128
132
  )
129
133
  end
130
134
 
131
- def self.merge_condition(current)
135
+ def merge_condition(current)
132
136
  last = @tokens.pop
133
137
  Regexp::Token.new(:conditional, :condition, last.text + current.text,
134
138
  last.ts, current.te, @nesting, @set_nesting, @conditional_nesting)
@@ -1,6 +1,6 @@
1
1
  require 'regexp_parser/expression'
2
2
 
3
- module Regexp::Parser
3
+ class Regexp::Parser
4
4
  include Regexp::Expression
5
5
  include Regexp::Syntax
6
6
 
@@ -19,8 +19,14 @@ module Regexp::Parser
19
19
  end
20
20
 
21
21
  def self.parse(input, syntax = "ruby/#{RUBY_VERSION}", &block)
22
- @nesting = [@root = @node = Root.new]
22
+ new.parse(input, syntax, &block)
23
+ end
24
+
25
+ def parse(input, syntax = "ruby/#{RUBY_VERSION}", &block)
26
+ @nesting = [@root = @node = Root.new(options_from_input(input))]
23
27
 
28
+ @options_stack = [@root.options]
29
+ @switching_options = false
24
30
  @conditional_nesting = []
25
31
 
26
32
  Regexp::Lexer.scan(input, syntax) do |token|
@@ -34,21 +40,33 @@ module Regexp::Parser
34
40
  end
35
41
  end
36
42
 
37
- def self.nest(exp)
43
+ private
44
+
45
+ def options_from_input(input)
46
+ return {} unless input.is_a?(::Regexp)
47
+
48
+ options = {}
49
+ options[:i] = true if input.options & ::Regexp::IGNORECASE != 0
50
+ options[:m] = true if input.options & ::Regexp::MULTILINE != 0
51
+ options[:x] = true if input.options & ::Regexp::EXTENDED != 0
52
+ options
53
+ end
54
+
55
+ def nest(exp)
38
56
  @nesting.push exp
39
57
 
40
58
  @node << exp
41
59
  @node = exp
42
60
  end
43
61
 
44
- def self.nest_conditional(exp)
62
+ def nest_conditional(exp)
45
63
  @conditional_nesting.push exp
46
64
 
47
65
  @node << exp
48
66
  @node = exp
49
67
  end
50
68
 
51
- def self.parse_token(token)
69
+ def parse_token(token)
52
70
  case token.type
53
71
  when :meta; meta(token)
54
72
  when :quantifier; quantifier(token)
@@ -66,7 +84,7 @@ module Regexp::Parser
66
84
  property(token)
67
85
 
68
86
  when :literal
69
- @node << Literal.new(token)
87
+ @node << Literal.new(token, active_opts)
70
88
  when :free_space
71
89
  free_space(token)
72
90
 
@@ -75,7 +93,7 @@ module Regexp::Parser
75
93
  end
76
94
  end
77
95
 
78
- def self.set(token)
96
+ def set(token)
79
97
  case token.token
80
98
  when :open
81
99
  open_set(token)
@@ -96,14 +114,14 @@ module Regexp::Parser
96
114
  end
97
115
  end
98
116
 
99
- def self.meta(token)
117
+ def meta(token)
100
118
  case token.token
101
119
  when :dot
102
- @node << CharacterType::Any.new(token)
120
+ @node << CharacterType::Any.new(token, active_opts)
103
121
  when :alternation
104
122
  unless @node.token == :alternation
105
123
  unless @node.last.is_a?(Alternation)
106
- alt = Alternation.new(token)
124
+ alt = Alternation.new(token, active_opts)
107
125
  seq = Alternative.new(alt.level, alt.set_level, alt.conditional_level)
108
126
 
109
127
  while @node.expressions.last
@@ -126,62 +144,62 @@ module Regexp::Parser
126
144
  end
127
145
  end
128
146
 
129
- def self.backref(token)
147
+ def backref(token)
130
148
  case token.token
131
149
  when :name_ref
132
- @node << Backreference::Name.new(token)
150
+ @node << Backreference::Name.new(token, active_opts)
133
151
  when :name_nest_ref
134
- @node << Backreference::NameNestLevel.new(token)
152
+ @node << Backreference::NameNestLevel.new(token, active_opts)
135
153
  when :name_call
136
- @node << Backreference::NameCall.new(token)
154
+ @node << Backreference::NameCall.new(token, active_opts)
137
155
  when :number, :number_ref
138
- @node << Backreference::Number.new(token)
156
+ @node << Backreference::Number.new(token, active_opts)
139
157
  when :number_rel_ref
140
- @node << Backreference::NumberRelative.new(token)
158
+ @node << Backreference::NumberRelative.new(token, active_opts)
141
159
  when :number_nest_ref
142
- @node << Backreference::NumberNestLevel.new(token)
160
+ @node << Backreference::NumberNestLevel.new(token, active_opts)
143
161
  when :number_call
144
- @node << Backreference::NumberCall.new(token)
162
+ @node << Backreference::NumberCall.new(token, active_opts)
145
163
  when :number_rel_call
146
- @node << Backreference::NumberCallRelative.new(token)
164
+ @node << Backreference::NumberCallRelative.new(token, active_opts)
147
165
  else
148
166
  raise UnknownTokenError.new('Backreference', token)
149
167
  end
150
168
  end
151
169
 
152
- def self.type(token)
170
+ def type(token)
153
171
  case token.token
154
172
  when :digit
155
- @node << CharacterType::Digit.new(token)
173
+ @node << CharacterType::Digit.new(token, active_opts)
156
174
  when :nondigit
157
- @node << CharacterType::NonDigit.new(token)
175
+ @node << CharacterType::NonDigit.new(token, active_opts)
158
176
  when :hex
159
- @node << CharacterType::Hex.new(token)
177
+ @node << CharacterType::Hex.new(token, active_opts)
160
178
  when :nonhex
161
- @node << CharacterType::NonHex.new(token)
179
+ @node << CharacterType::NonHex.new(token, active_opts)
162
180
  when :space
163
- @node << CharacterType::Space.new(token)
181
+ @node << CharacterType::Space.new(token, active_opts)
164
182
  when :nonspace
165
- @node << CharacterType::NonSpace.new(token)
183
+ @node << CharacterType::NonSpace.new(token, active_opts)
166
184
  when :word
167
- @node << CharacterType::Word.new(token)
185
+ @node << CharacterType::Word.new(token, active_opts)
168
186
  when :nonword
169
- @node << CharacterType::NonWord.new(token)
187
+ @node << CharacterType::NonWord.new(token, active_opts)
170
188
  when :linebreak
171
- @node << CharacterType::Linebreak.new(token)
189
+ @node << CharacterType::Linebreak.new(token, active_opts)
172
190
  when :xgrapheme
173
- @node << CharacterType::ExtendedGrapheme.new(token)
191
+ @node << CharacterType::ExtendedGrapheme.new(token, active_opts)
174
192
  else
175
193
  raise UnknownTokenError.new('CharacterType', token)
176
194
  end
177
195
  end
178
196
 
179
- def self.conditional(token)
197
+ def conditional(token)
180
198
  case token.token
181
199
  when :open
182
- nest_conditional(Conditional::Expression.new(token))
200
+ nest_conditional(Conditional::Expression.new(token, active_opts))
183
201
  when :condition
184
- @conditional_nesting.last.condition(Conditional::Condition.new(token))
202
+ @conditional_nesting.last.condition(Conditional::Condition.new(token, active_opts))
185
203
  @conditional_nesting.last.branch
186
204
  when :separator
187
205
  @conditional_nesting.last.branch
@@ -200,175 +218,174 @@ module Regexp::Parser
200
218
  end
201
219
  end
202
220
 
203
- def self.property(token)
204
- include Regexp::Expression::UnicodeProperty
221
+ include Regexp::Expression::UnicodeProperty
205
222
 
223
+ def property(token)
206
224
  case token.token
207
- when :alnum; @node << Alnum.new(token)
208
- when :alpha; @node << Alpha.new(token)
209
- when :any; @node << Any.new(token)
210
- when :ascii; @node << Ascii.new(token)
211
- when :blank; @node << Blank.new(token)
212
- when :cntrl; @node << Cntrl.new(token)
213
- when :digit; @node << Digit.new(token)
214
- when :graph; @node << Graph.new(token)
215
- when :lower; @node << Lower.new(token)
216
- when :print; @node << Print.new(token)
217
- when :punct; @node << Punct.new(token)
218
- when :space; @node << Space.new(token)
219
- when :upper; @node << Upper.new(token)
220
- when :word; @node << Word.new(token)
221
- when :xdigit; @node << Xdigit.new(token)
222
- when :newline; @node << Newline.new(token)
223
-
224
- when :letter_any; @node << Letter::Any.new(token)
225
- when :letter_uppercase; @node << Letter::Uppercase.new(token)
226
- when :letter_lowercase; @node << Letter::Lowercase.new(token)
227
- when :letter_titlecase; @node << Letter::Titlecase.new(token)
228
- when :letter_modifier; @node << Letter::Modifier.new(token)
229
- when :letter_other; @node << Letter::Other.new(token)
230
-
231
- when :mark_any; @node << Mark::Any.new(token)
232
- when :mark_nonspacing; @node << Mark::Nonspacing.new(token)
233
- when :mark_spacing; @node << Mark::Spacing.new(token)
234
- when :mark_enclosing; @node << Mark::Enclosing.new(token)
235
-
236
- when :number_any; @node << Number::Any.new(token)
237
- when :number_decimal; @node << Number::Decimal.new(token)
238
- when :number_letter; @node << Number::Letter.new(token)
239
- when :number_other; @node << Number::Other.new(token)
240
-
241
- when :punct_any; @node << Punctuation::Any.new(token)
242
- when :punct_connector; @node << Punctuation::Connector.new(token)
243
- when :punct_dash; @node << Punctuation::Dash.new(token)
244
- when :punct_open; @node << Punctuation::Open.new(token)
245
- when :punct_close; @node << Punctuation::Close.new(token)
246
- when :punct_initial; @node << Punctuation::Initial.new(token)
247
- when :punct_final; @node << Punctuation::Final.new(token)
248
- when :punct_other; @node << Punctuation::Other.new(token)
249
-
250
- when :separator_any; @node << Separator::Any.new(token)
251
- when :separator_space; @node << Separator::Space.new(token)
252
- when :separator_line; @node << Separator::Line.new(token)
253
- when :separator_para; @node << Separator::Paragraph.new(token)
254
-
255
- when :symbol_any; @node << Symbol::Any.new(token)
256
- when :symbol_math; @node << Symbol::Math.new(token)
257
- when :symbol_currency; @node << Symbol::Currency.new(token)
258
- when :symbol_modifier; @node << Symbol::Modifier.new(token)
259
- when :symbol_other; @node << Symbol::Other.new(token)
260
-
261
- when :other; @node << Codepoint::Any.new(token)
262
- when :control; @node << Codepoint::Control.new(token)
263
- when :format; @node << Codepoint::Format.new(token)
264
- when :surrogate; @node << Codepoint::Surrogate.new(token)
265
- when :private_use; @node << Codepoint::PrivateUse.new(token)
266
- when :unassigned; @node << Codepoint::Unassigned.new(token)
225
+ when :alnum; @node << Alnum.new(token, active_opts)
226
+ when :alpha; @node << Alpha.new(token, active_opts)
227
+ when :any; @node << Any.new(token, active_opts)
228
+ when :ascii; @node << Ascii.new(token, active_opts)
229
+ when :blank; @node << Blank.new(token, active_opts)
230
+ when :cntrl; @node << Cntrl.new(token, active_opts)
231
+ when :digit; @node << Digit.new(token, active_opts)
232
+ when :graph; @node << Graph.new(token, active_opts)
233
+ when :lower; @node << Lower.new(token, active_opts)
234
+ when :print; @node << Print.new(token, active_opts)
235
+ when :punct; @node << Punct.new(token, active_opts)
236
+ when :space; @node << Space.new(token, active_opts)
237
+ when :upper; @node << Upper.new(token, active_opts)
238
+ when :word; @node << Word.new(token, active_opts)
239
+ when :xdigit; @node << Xdigit.new(token, active_opts)
240
+ when :newline; @node << Newline.new(token, active_opts)
241
+
242
+ when :letter_any; @node << Letter::Any.new(token, active_opts)
243
+ when :letter_uppercase; @node << Letter::Uppercase.new(token, active_opts)
244
+ when :letter_lowercase; @node << Letter::Lowercase.new(token, active_opts)
245
+ when :letter_titlecase; @node << Letter::Titlecase.new(token, active_opts)
246
+ when :letter_modifier; @node << Letter::Modifier.new(token, active_opts)
247
+ when :letter_other; @node << Letter::Other.new(token, active_opts)
248
+
249
+ when :mark_any; @node << Mark::Any.new(token, active_opts)
250
+ when :mark_nonspacing; @node << Mark::Nonspacing.new(token, active_opts)
251
+ when :mark_spacing; @node << Mark::Spacing.new(token, active_opts)
252
+ when :mark_enclosing; @node << Mark::Enclosing.new(token, active_opts)
253
+
254
+ when :number_any; @node << Number::Any.new(token, active_opts)
255
+ when :number_decimal; @node << Number::Decimal.new(token, active_opts)
256
+ when :number_letter; @node << Number::Letter.new(token, active_opts)
257
+ when :number_other; @node << Number::Other.new(token, active_opts)
258
+
259
+ when :punct_any; @node << Punctuation::Any.new(token, active_opts)
260
+ when :punct_connector; @node << Punctuation::Connector.new(token, active_opts)
261
+ when :punct_dash; @node << Punctuation::Dash.new(token, active_opts)
262
+ when :punct_open; @node << Punctuation::Open.new(token, active_opts)
263
+ when :punct_close; @node << Punctuation::Close.new(token, active_opts)
264
+ when :punct_initial; @node << Punctuation::Initial.new(token, active_opts)
265
+ when :punct_final; @node << Punctuation::Final.new(token, active_opts)
266
+ when :punct_other; @node << Punctuation::Other.new(token, active_opts)
267
+
268
+ when :separator_any; @node << Separator::Any.new(token, active_opts)
269
+ when :separator_space; @node << Separator::Space.new(token, active_opts)
270
+ when :separator_line; @node << Separator::Line.new(token, active_opts)
271
+ when :separator_para; @node << Separator::Paragraph.new(token, active_opts)
272
+
273
+ when :symbol_any; @node << Symbol::Any.new(token, active_opts)
274
+ when :symbol_math; @node << Symbol::Math.new(token, active_opts)
275
+ when :symbol_currency; @node << Symbol::Currency.new(token, active_opts)
276
+ when :symbol_modifier; @node << Symbol::Modifier.new(token, active_opts)
277
+ when :symbol_other; @node << Symbol::Other.new(token, active_opts)
278
+
279
+ when :other; @node << Codepoint::Any.new(token, active_opts)
280
+ when :control; @node << Codepoint::Control.new(token, active_opts)
281
+ when :format; @node << Codepoint::Format.new(token, active_opts)
282
+ when :surrogate; @node << Codepoint::Surrogate.new(token, active_opts)
283
+ when :private_use; @node << Codepoint::PrivateUse.new(token, active_opts)
284
+ when :unassigned; @node << Codepoint::Unassigned.new(token, active_opts)
267
285
 
268
286
  when *Token::UnicodeProperty::Age
269
- @node << Age.new(token)
287
+ @node << Age.new(token, active_opts)
270
288
 
271
289
  when *Token::UnicodeProperty::Derived
272
- @node << Derived.new(token)
290
+ @node << Derived.new(token, active_opts)
273
291
 
274
292
  when *Regexp::Syntax::Token::UnicodeProperty::Script
275
- @node << Script.new(token)
293
+ @node << Script.new(token, active_opts)
276
294
 
277
295
  when *Regexp::Syntax::Token::UnicodeProperty::UnicodeBlock
278
- @node << Block.new(token)
296
+ @node << Block.new(token, active_opts)
279
297
 
280
298
  else
281
299
  raise UnknownTokenError.new('UnicodeProperty', token)
282
300
  end
283
301
  end
284
302
 
285
- def self.anchor(token)
303
+ def anchor(token)
286
304
  case token.token
287
305
  when :bol
288
- @node << Anchor::BeginningOfLine.new(token)
306
+ @node << Anchor::BeginningOfLine.new(token, active_opts)
289
307
  when :eol
290
- @node << Anchor::EndOfLine.new(token)
308
+ @node << Anchor::EndOfLine.new(token, active_opts)
291
309
  when :bos
292
- @node << Anchor::BOS.new(token)
310
+ @node << Anchor::BOS.new(token, active_opts)
293
311
  when :eos
294
- @node << Anchor::EOS.new(token)
312
+ @node << Anchor::EOS.new(token, active_opts)
295
313
  when :eos_ob_eol
296
- @node << Anchor::EOSobEOL.new(token)
314
+ @node << Anchor::EOSobEOL.new(token, active_opts)
297
315
  when :word_boundary
298
- @node << Anchor::WordBoundary.new(token)
316
+ @node << Anchor::WordBoundary.new(token, active_opts)
299
317
  when :nonword_boundary
300
- @node << Anchor::NonWordBoundary.new(token)
318
+ @node << Anchor::NonWordBoundary.new(token, active_opts)
301
319
  when :match_start
302
- @node << Anchor::MatchStart.new(token)
320
+ @node << Anchor::MatchStart.new(token, active_opts)
303
321
  else
304
322
  raise UnknownTokenError.new('Anchor', token)
305
323
  end
306
324
  end
307
325
 
308
- def self.escape(token)
326
+ def escape(token)
309
327
  case token.token
310
328
 
311
329
  when :backspace
312
- @node << EscapeSequence::Backspace.new(token)
330
+ @node << EscapeSequence::Backspace.new(token, active_opts)
313
331
 
314
332
  when :escape
315
- @node << EscapeSequence::AsciiEscape.new(token)
333
+ @node << EscapeSequence::AsciiEscape.new(token, active_opts)
316
334
  when :bell
317
- @node << EscapeSequence::Bell.new(token)
335
+ @node << EscapeSequence::Bell.new(token, active_opts)
318
336
  when :form_feed
319
- @node << EscapeSequence::FormFeed.new(token)
337
+ @node << EscapeSequence::FormFeed.new(token, active_opts)
320
338
  when :newline
321
- @node << EscapeSequence::Newline.new(token)
339
+ @node << EscapeSequence::Newline.new(token, active_opts)
322
340
  when :carriage
323
- @node << EscapeSequence::Return.new(token)
341
+ @node << EscapeSequence::Return.new(token, active_opts)
324
342
  when :space
325
- @node << EscapeSequence::Space.new(token)
343
+ @node << EscapeSequence::Space.new(token, active_opts)
326
344
  when :tab
327
- @node << EscapeSequence::Tab.new(token)
345
+ @node << EscapeSequence::Tab.new(token, active_opts)
328
346
  when :vertical_tab
329
- @node << EscapeSequence::VerticalTab.new(token)
347
+ @node << EscapeSequence::VerticalTab.new(token, active_opts)
330
348
 
331
349
  when :control
332
350
  if token.text =~ /\A(?:\\C-\\M|\\c\\M)/
333
- @node << EscapeSequence::MetaControl.new(token)
351
+ @node << EscapeSequence::MetaControl.new(token, active_opts)
334
352
  else
335
- @node << EscapeSequence::Control.new(token)
353
+ @node << EscapeSequence::Control.new(token, active_opts)
336
354
  end
337
355
 
338
356
  when :meta_sequence
339
357
  if token.text =~ /\A\\M-\\[Cc]/
340
- @node << EscapeSequence::MetaControl.new(token)
358
+ @node << EscapeSequence::MetaControl.new(token, active_opts)
341
359
  else
342
- @node << EscapeSequence::Meta.new(token)
360
+ @node << EscapeSequence::Meta.new(token, active_opts)
343
361
  end
344
362
 
345
363
  else
346
364
  # treating everything else as a literal
347
- @node << EscapeSequence::Literal.new(token)
365
+ @node << EscapeSequence::Literal.new(token, active_opts)
348
366
  end
349
367
  end
350
368
 
351
-
352
- def self.keep(token)
353
- @node << Keep::Mark.new(token)
369
+ def keep(token)
370
+ @node << Keep::Mark.new(token, active_opts)
354
371
  end
355
372
 
356
- def self.free_space(token)
373
+ def free_space(token)
357
374
  case token.token
358
375
  when :comment
359
- @node << Comment.new(token)
376
+ @node << Comment.new(token, active_opts)
360
377
  when :whitespace
361
378
  if @node.last and @node.last.is_a?(WhiteSpace)
362
- @node.last.merge(WhiteSpace.new(token))
379
+ @node.last.merge(WhiteSpace.new(token, active_opts))
363
380
  else
364
- @node << WhiteSpace.new(token)
381
+ @node << WhiteSpace.new(token, active_opts)
365
382
  end
366
383
  else
367
384
  raise UnknownTokenError.new('FreeSpace', token)
368
385
  end
369
386
  end
370
387
 
371
- def self.quantifier(token)
388
+ def quantifier(token)
372
389
  offset = -1
373
390
  target_node = @node.expressions[offset]
374
391
  while target_node and target_node.is_a?(FreeSpace)
@@ -378,15 +395,6 @@ module Regexp::Parser
378
395
  raise ArgumentError.new("No valid target found for '#{token.text}' "+
379
396
  "quantifier") unless target_node
380
397
 
381
- unless target_node
382
- if token.token == :zero_or_one
383
- raise "Quantifier given without a target, or the syntax of the group " +
384
- "or its options is incorrect"
385
- else
386
- raise "Quantifier `#{token.text}' given without a target"
387
- end
388
- end
389
-
390
398
  case token.token
391
399
  when :zero_or_one
392
400
  target_node.quantify(:zero_or_one, token.text, 0, 1, :greedy)
@@ -417,7 +425,7 @@ module Regexp::Parser
417
425
  end
418
426
  end
419
427
 
420
- def self.interval(target_node, token)
428
+ def interval(target_node, token)
421
429
  text = token.text
422
430
  mchr = text[text.length-1].chr =~ /[?+]/ ? text[text.length-1].chr : nil
423
431
  case mchr
@@ -439,91 +447,113 @@ module Regexp::Parser
439
447
  target_node.quantify(:interval, text, min.to_i, max.to_i, mode)
440
448
  end
441
449
 
442
- def self.group(token)
450
+ def group(token)
443
451
  case token.token
444
452
  when :options
445
- options(token)
453
+ options_group(token)
446
454
  when :close
447
455
  close_group
448
456
  when :comment
449
- @node << Group::Comment.new(token)
457
+ @node << Group::Comment.new(token, active_opts)
450
458
  else
451
459
  open_group(token)
452
460
  end
453
461
  end
454
462
 
455
- def self.options(token)
456
- opt = token.text.split('-', 2)
463
+ def options_group(token)
464
+ positive, negative = token.text.split('-', 2)
465
+ negative ||= ''
466
+ @switching_options = !token.text.include?(':')
467
+ # TODO: change this -^ to token.type == :options_switch in v1.0.0
468
+
469
+ new_options = active_opts.dup
457
470
 
458
- exp = Group::Options.new(token)
459
- exp.options = {
460
- :m => opt[0].include?('m') ? true : false,
461
- :i => opt[0].include?('i') ? true : false,
462
- :x => opt[0].include?('x') ? true : false,
463
- :d => opt[0].include?('d') ? true : false,
464
- :a => opt[0].include?('a') ? true : false,
465
- :u => opt[0].include?('u') ? true : false
466
- }
471
+ # Negative options have precedence. E.g. /(?i-i)a/ is case-sensitive.
472
+ %w[i m x].each do |flag|
473
+ new_options[flag.to_sym] = true if positive.include?(flag)
474
+ new_options.delete(flag.to_sym) if negative.include?(flag)
475
+ end
476
+
477
+ # Any encoding flag overrides all previous encoding flags. If there are
478
+ # multiple encoding flags in an options string, the last one wins.
479
+ # E.g. /(?dau)\w/ matches UTF8 chars but /(?dua)\w/ only ASCII chars.
480
+ if (flag = positive.reverse[/[adu]/])
481
+ %w[a d u].each { |key| new_options.delete(key.to_sym) }
482
+ new_options[flag.to_sym] = true
483
+ end
484
+
485
+ @options_stack << new_options
486
+
487
+ exp = Group::Options.new(token, active_opts)
467
488
 
468
489
  nest(exp)
469
490
  end
470
491
 
471
- def self.open_group(token)
492
+ def open_group(token)
472
493
  case token.token
473
494
  when :passive
474
- exp = Group::Passive.new(token)
495
+ exp = Group::Passive.new(token, active_opts)
475
496
  when :atomic
476
- exp = Group::Atomic.new(token)
497
+ exp = Group::Atomic.new(token, active_opts)
477
498
  when :named
478
- exp = Group::Named.new(token)
499
+ exp = Group::Named.new(token, active_opts)
479
500
  when :capture
480
- exp = Group::Capture.new(token)
501
+ exp = Group::Capture.new(token, active_opts)
481
502
  when :absence
482
- exp = Group::Absence.new(token)
503
+ exp = Group::Absence.new(token, active_opts)
483
504
 
484
505
  when :lookahead
485
- exp = Assertion::Lookahead.new(token)
506
+ exp = Assertion::Lookahead.new(token, active_opts)
486
507
  when :nlookahead
487
- exp = Assertion::NegativeLookahead.new(token)
508
+ exp = Assertion::NegativeLookahead.new(token, active_opts)
488
509
  when :lookbehind
489
- exp = Assertion::Lookbehind.new(token)
510
+ exp = Assertion::Lookbehind.new(token, active_opts)
490
511
  when :nlookbehind
491
- exp = Assertion::NegativeLookbehind.new(token)
512
+ exp = Assertion::NegativeLookbehind.new(token, active_opts)
492
513
 
493
514
  else
494
515
  raise UnknownTokenError.new('Group type open', token)
495
516
  end
496
517
 
518
+ # Push the active options to the stack again. This way we can simply pop the
519
+ # stack for any group we close, no matter if it had its own options or not.
520
+ @options_stack << active_opts
521
+
497
522
  nest(exp)
498
523
  end
499
524
 
500
- def self.close_group
525
+ def close_group
501
526
  @nesting.pop
527
+ @options_stack.pop unless @switching_options
528
+ @switching_options = false
502
529
 
503
530
  @node = @nesting.last
504
531
  @node = @node.last if @node.last and @node.last.is_a?(Alternation)
505
532
  end
506
533
 
507
- def self.open_set(token)
534
+ def open_set(token)
508
535
  token.token = :character
509
536
 
510
537
  if token.type == :subset
511
- @set << CharacterSubSet.new(token)
538
+ @set << CharacterSubSet.new(token, active_opts)
512
539
  else
513
- @node << (@set = CharacterSet.new(token))
540
+ @node << (@set = CharacterSet.new(token, active_opts))
514
541
  end
515
542
  end
516
543
 
517
- def self.negate_set
544
+ def negate_set
518
545
  @set.negate
519
546
  end
520
547
 
521
- def self.append_set(token)
548
+ def append_set(token)
522
549
  @set << token.text
523
550
  end
524
551
 
525
- def self.close_set(token)
552
+ def close_set(token)
526
553
  @set.close
527
554
  end
528
555
 
556
+ def active_opts
557
+ @options_stack.last
558
+ end
529
559
  end # module Regexp::Parser