regexp_parser 0.1.6 → 0.2.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (84) hide show
  1. checksums.yaml +4 -4
  2. data/ChangeLog +57 -0
  3. data/Gemfile +8 -0
  4. data/LICENSE +1 -1
  5. data/README.md +225 -206
  6. data/Rakefile +9 -3
  7. data/lib/regexp_parser.rb +7 -11
  8. data/lib/regexp_parser/expression.rb +72 -14
  9. data/lib/regexp_parser/expression/classes/alternation.rb +3 -16
  10. data/lib/regexp_parser/expression/classes/conditional.rb +57 -0
  11. data/lib/regexp_parser/expression/classes/free_space.rb +17 -0
  12. data/lib/regexp_parser/expression/classes/keep.rb +7 -0
  13. data/lib/regexp_parser/expression/classes/set.rb +28 -7
  14. data/lib/regexp_parser/expression/methods/strfregexp.rb +113 -0
  15. data/lib/regexp_parser/expression/methods/tests.rb +116 -0
  16. data/lib/regexp_parser/expression/methods/traverse.rb +63 -0
  17. data/lib/regexp_parser/expression/quantifier.rb +10 -0
  18. data/lib/regexp_parser/expression/sequence.rb +45 -0
  19. data/lib/regexp_parser/expression/subexpression.rb +29 -1
  20. data/lib/regexp_parser/lexer.rb +31 -8
  21. data/lib/regexp_parser/parser.rb +118 -45
  22. data/lib/regexp_parser/scanner.rb +1745 -1404
  23. data/lib/regexp_parser/scanner/property.rl +57 -3
  24. data/lib/regexp_parser/scanner/scanner.rl +161 -34
  25. data/lib/regexp_parser/syntax.rb +12 -2
  26. data/lib/regexp_parser/syntax/ruby/1.9.1.rb +3 -3
  27. data/lib/regexp_parser/syntax/ruby/1.9.3.rb +2 -7
  28. data/lib/regexp_parser/syntax/ruby/2.0.0.rb +4 -1
  29. data/lib/regexp_parser/syntax/ruby/2.1.4.rb +13 -0
  30. data/lib/regexp_parser/syntax/ruby/2.1.5.rb +13 -0
  31. data/lib/regexp_parser/syntax/ruby/2.1.rb +2 -2
  32. data/lib/regexp_parser/syntax/ruby/2.2.0.rb +16 -0
  33. data/lib/regexp_parser/syntax/ruby/2.2.rb +8 -0
  34. data/lib/regexp_parser/syntax/tokens.rb +19 -2
  35. data/lib/regexp_parser/syntax/tokens/conditional.rb +22 -0
  36. data/lib/regexp_parser/syntax/tokens/keep.rb +14 -0
  37. data/lib/regexp_parser/syntax/tokens/unicode_property.rb +45 -4
  38. data/lib/regexp_parser/token.rb +23 -8
  39. data/lib/regexp_parser/version.rb +5 -0
  40. data/regexp_parser.gemspec +35 -0
  41. data/test/expression/test_all.rb +6 -1
  42. data/test/expression/test_base.rb +19 -0
  43. data/test/expression/test_conditionals.rb +114 -0
  44. data/test/expression/test_free_space.rb +33 -0
  45. data/test/expression/test_set.rb +61 -0
  46. data/test/expression/test_strfregexp.rb +214 -0
  47. data/test/expression/test_subexpression.rb +24 -0
  48. data/test/expression/test_tests.rb +99 -0
  49. data/test/expression/test_to_h.rb +48 -0
  50. data/test/expression/test_to_s.rb +46 -0
  51. data/test/expression/test_traverse.rb +164 -0
  52. data/test/lexer/test_all.rb +16 -3
  53. data/test/lexer/test_conditionals.rb +101 -0
  54. data/test/lexer/test_keep.rb +24 -0
  55. data/test/lexer/test_literals.rb +51 -51
  56. data/test/lexer/test_nesting.rb +62 -62
  57. data/test/lexer/test_refcalls.rb +18 -20
  58. data/test/parser/test_all.rb +18 -3
  59. data/test/parser/test_alternation.rb +11 -14
  60. data/test/parser/test_conditionals.rb +148 -0
  61. data/test/parser/test_escapes.rb +29 -5
  62. data/test/parser/test_free_space.rb +139 -0
  63. data/test/parser/test_groups.rb +40 -0
  64. data/test/parser/test_keep.rb +21 -0
  65. data/test/scanner/test_all.rb +8 -2
  66. data/test/scanner/test_conditionals.rb +166 -0
  67. data/test/scanner/test_escapes.rb +8 -5
  68. data/test/scanner/test_free_space.rb +133 -0
  69. data/test/scanner/test_groups.rb +28 -0
  70. data/test/scanner/test_keep.rb +33 -0
  71. data/test/scanner/test_properties.rb +4 -0
  72. data/test/scanner/test_scripts.rb +71 -1
  73. data/test/syntax/ruby/test_1.9.3.rb +2 -2
  74. data/test/syntax/ruby/test_2.0.0.rb +38 -0
  75. data/test/syntax/ruby/test_2.2.0.rb +38 -0
  76. data/test/syntax/ruby/test_all.rb +1 -8
  77. data/test/syntax/ruby/test_files.rb +104 -0
  78. data/test/test_all.rb +2 -1
  79. data/test/token/test_all.rb +2 -0
  80. data/test/token/test_token.rb +109 -0
  81. metadata +75 -21
  82. data/VERSION.yml +0 -5
  83. data/lib/regexp_parser/ctype.rb +0 -48
  84. data/test/syntax/ruby/test_2.x.rb +0 -46
@@ -0,0 +1,116 @@
1
+ module Regexp::Expression
2
+ class Base
3
+
4
+ # Test if this expression has the given test_type, which can be either
5
+ # a symbol or an array of symbols to check against the expression's type.
6
+ #
7
+ # # is it a :group expression
8
+ # exp.type? :group
9
+ #
10
+ # # is it a :set, :subset, or :meta
11
+ # exp.type? [:set, :subset, :meta]
12
+ #
13
+ def type?(test_type)
14
+ case test_type
15
+ when Array
16
+ if test_type.include?(:*)
17
+ return (test_type.include?(type) or test_type.include?(:*))
18
+ else
19
+ return test_type.include?(type)
20
+ end
21
+ when Symbol
22
+ return (type == test_type or test_type == :*)
23
+ else
24
+ raise "Array or Symbol expected, #{test_type.class.name} given"
25
+ end
26
+ end
27
+
28
+ # Test if this expression has the given test_token, and optionally a given
29
+ # test_type.
30
+ #
31
+ # # Any expressions
32
+ # exp.is? :* # always returns true
33
+ #
34
+ # # is it a :capture
35
+ # exp.is? :capture
36
+ #
37
+ # # is it a :character and a :set
38
+ # exp.is? :character, :set
39
+ #
40
+ # # is it a :meta :dot
41
+ # exp.is? :dot, :meta
42
+ #
43
+ # # is it a :meta or :escape :dot
44
+ # exp.is? :dot, [:meta, :escape]
45
+ #
46
+ def is?(test_token, test_type = nil)
47
+ return true if test_token === :*
48
+ token == test_token and (test_type ? type?(test_type) : true)
49
+ end
50
+
51
+ # Test if this expression matches an entry in the given scope spec.
52
+ #
53
+ # A scope spec can be one of:
54
+ #
55
+ # . An array: Interpreted as a set of tokens, tested for inclusion
56
+ # of the expression's token.
57
+ #
58
+ # . A hash: Where the key is interpreted as the expression type
59
+ # and the value is either a symbol or an array. In this
60
+ # case, when the scope is a hash, one_of? calls itself to
61
+ # evaluate the key's value.
62
+ #
63
+ # . A symbol: matches the expression's token or type, depending on
64
+ # the level of the call. If one_of? is called directly with
65
+ # a symbol then it will always be checked against the
66
+ # type of the expression. If it's being called for a value
67
+ # from a hash, it will be checked against the token of the
68
+ # expression.
69
+ #
70
+ # # any expression
71
+ # exp.one_of?(:*) # always true
72
+ #
73
+ # # like exp.type?(:group)
74
+ # exp.one_of?(:group)
75
+ #
76
+ # # any expression of type meta
77
+ # exp.one_of?(:meta => :*)
78
+ #
79
+ # # meta dots and alternations
80
+ # exp.one_of?(:meta => [:dot, :alternation])
81
+ #
82
+ # # meta dots and any set tokens
83
+ # exp.one_of?({meta: [:dot], set: :*})
84
+ #
85
+ def one_of?(scope, top = true)
86
+ case scope
87
+ when Array
88
+ if scope.include?(:*)
89
+ return (scope.include?(token) or scope.include?(:*))
90
+ else
91
+ return scope.include?(token)
92
+ end
93
+
94
+ when Hash
95
+ if scope.has_key?(:*)
96
+ test_type = scope.has_key?(type) ? type : :*
97
+ return one_of?(scope[test_type], false)
98
+ else
99
+ return (scope.has_key?(type) and one_of?(scope[type], false))
100
+ end
101
+
102
+ when Symbol
103
+ return true if scope == :*
104
+
105
+ return is?(scope) unless top
106
+ return type?(scope) if top
107
+
108
+ else
109
+ raise "Array, Hash, or Symbol expected, #{scope.class.name} given"
110
+ end
111
+
112
+ false
113
+ end
114
+
115
+ end
116
+ end
@@ -0,0 +1,63 @@
1
+ module Regexp::Expression
2
+ class Subexpression < Regexp::Expression::Base
3
+
4
+ # Traverses the subexpression (depth-first, pre-order) and calls the given
5
+ # block for each expression with three arguments; the traversal event,
6
+ # the expression, and the index of the expression within its parent.
7
+ #
8
+ # The event argument is passed as follows:
9
+ #
10
+ # - For subexpressions, :enter upon entrering the subexpression, and
11
+ # :exit upon exiting it.
12
+ #
13
+ # - For terminal expressions, :visit is called once.
14
+ #
15
+ # Returns self.
16
+ def traverse(include_self = false, &block)
17
+ raise 'traverse requires a block' unless block_given?
18
+
19
+ block.call(:enter, self, 0) if include_self
20
+
21
+ each_with_index do |exp, index|
22
+ if exp.terminal?
23
+ block.call(:visit, exp, index)
24
+ else
25
+ block.call(:enter, exp, index)
26
+ exp.traverse(&block)
27
+ block.call(:exit, exp, index)
28
+ end
29
+ end
30
+
31
+ block.call(:exit, self, 0) if include_self
32
+
33
+ self
34
+ end
35
+ alias :walk :traverse
36
+
37
+ # Iterates over the expressions of this expression as an array, passing
38
+ # the expression and its index within its parent to the given block.
39
+ def each_expression(include_self = false, &block)
40
+ traverse(include_self) do |event, exp, index|
41
+ yield(exp, index) unless event == :exit
42
+ end
43
+ end
44
+
45
+ # Returns a new array with the results of calling the given block once
46
+ # for every expression. If a block is not given, returns an array with
47
+ # each expression and its level index as an array.
48
+ def map(include_self = false, &block)
49
+ result = []
50
+
51
+ each_expression(include_self) do |exp, index|
52
+ if block_given?
53
+ result << yield(exp, index)
54
+ else
55
+ result << [exp, index]
56
+ end
57
+ end
58
+
59
+ result
60
+ end
61
+
62
+ end
63
+ end
@@ -21,6 +21,16 @@ module Regexp::Expression
21
21
  @text.dup
22
22
  end
23
23
  alias :to_str :to_s
24
+
25
+ def to_h
26
+ {
27
+ :token => token,
28
+ :text => text,
29
+ :mode => mode,
30
+ :min => min,
31
+ :max => max
32
+ }
33
+ end
24
34
  end
25
35
 
26
36
  end
@@ -0,0 +1,45 @@
1
+ module Regexp::Expression
2
+
3
+ # A sequence of expressions. Differs from a Subexpressions by how it handles
4
+ # quantifiers, as it applies them to its last element instead of itself as
5
+ # a whole subexpression.
6
+ #
7
+ # Used as the base class for the Alternation alternatives and Conditional
8
+ # branches.
9
+ class Sequence < Regexp::Expression::Subexpression
10
+ def initialize(level, set_level, conditional_level)
11
+ super Regexp::Token.new(
12
+ :expression,
13
+ :sequence,
14
+ '',
15
+ nil, # ts
16
+ nil, # te
17
+ level,
18
+ set_level,
19
+ conditional_level
20
+ )
21
+ end
22
+
23
+ def text
24
+ to_s
25
+ end
26
+
27
+ def starts_at
28
+ @expressions.first.starts_at
29
+ end
30
+
31
+ def quantify(token, text, min = nil, max = nil, mode = :greedy)
32
+ offset = -1
33
+ target = expressions[offset]
34
+ while target and target.is_a?(FreeSpace)
35
+ target = expressions[offset -= 1]
36
+ end
37
+
38
+ raise ArgumentError.new("No valid target found for '#{text}' " +
39
+ "quantifier") unless target
40
+
41
+ target.quantify(token, text, min, max, mode)
42
+ end
43
+ end
44
+
45
+ end
@@ -17,7 +17,12 @@ module Regexp::Expression
17
17
  end
18
18
 
19
19
  def <<(exp)
20
- @expressions << exp
20
+ if exp.is_a?(WhiteSpace) and @expressions.last and
21
+ @expressions.last.is_a?(WhiteSpace)
22
+ @expressions.last.merge(exp)
23
+ else
24
+ @expressions << exp
25
+ end
21
26
  end
22
27
 
23
28
  def insert(exp)
@@ -48,6 +53,22 @@ module Regexp::Expression
48
53
  @expressions.length
49
54
  end
50
55
 
56
+ def empty?
57
+ @expressions.empty?
58
+ end
59
+
60
+ def all?(&block)
61
+ @expressions.all? {|exp| yield(exp) }
62
+ end
63
+
64
+ def ts
65
+ starts_at
66
+ end
67
+
68
+ def te
69
+ ts + to_s.length
70
+ end
71
+
51
72
  def to_s(format = :full)
52
73
  s = ''
53
74
 
@@ -64,6 +85,13 @@ module Regexp::Expression
64
85
 
65
86
  s
66
87
  end
88
+
89
+ def to_h
90
+ h = super
91
+ h[:text] = to_s(:base)
92
+ h[:expressions] = @expressions.map(&:to_h)
93
+ h
94
+ end
67
95
  end
68
96
 
69
97
  end
@@ -10,11 +10,11 @@ module Regexp::Lexer
10
10
 
11
11
  CLOSING_TOKENS = [:close].freeze
12
12
 
13
- def self.scan(input, syntax = "ruby/#{RUBY_VERSION}", &block)
13
+ def self.lex(input, syntax = "ruby/#{RUBY_VERSION}", &block)
14
14
  syntax = Regexp::Syntax.new(syntax)
15
15
 
16
16
  @tokens = []
17
- @nesting, @set_nesting = 0, 0
17
+ @nesting, @set_nesting, @conditional_nesting = 0, 0, 0
18
18
 
19
19
  last = nil
20
20
  Regexp::Scanner.scan(input) do |type, token, text, ts, te|
@@ -27,11 +27,14 @@ module Regexp::Lexer
27
27
  last and last.type == :literal
28
28
 
29
29
  current = Regexp::Token.new(type, token, text, ts, te,
30
- @nesting, @set_nesting)
30
+ @nesting, @set_nesting, @conditional_nesting)
31
31
 
32
32
  current = merge_literal(current) if type == :literal and
33
33
  last and last.type == :literal
34
34
 
35
+ current = merge_condition(current) if type == :conditional and
36
+ [:condition, :condition_close].include?(token)
37
+
35
38
  last.next(current) if last
36
39
  current.previous(last) if last
37
40
 
@@ -42,12 +45,18 @@ module Regexp::Lexer
42
45
  end
43
46
 
44
47
  if block_given?
45
- @tokens.each {|t| block.call(t)}
48
+ @tokens.map {|t| block.call(t)}
46
49
  else
47
50
  @tokens
48
51
  end
49
52
  end
50
53
 
54
+ class << self
55
+ alias :scan :lex
56
+ end
57
+
58
+ protected
59
+
51
60
  def self.ascend(type, token)
52
61
  if type == :group or type == :assertion
53
62
  @nesting -= 1 if CLOSING_TOKENS.include?(token)
@@ -56,6 +65,10 @@ module Regexp::Lexer
56
65
  if type == :set or type == :subset
57
66
  @set_nesting -= 1 if token == :close
58
67
  end
68
+
69
+ if type == :conditional
70
+ @conditional_nesting -= 1 if token == :close
71
+ end
59
72
  end
60
73
 
61
74
  def self.descend(type, token)
@@ -66,6 +79,10 @@ module Regexp::Lexer
66
79
  if type == :set or type == :subset
67
80
  @set_nesting += 1 if token == :open
68
81
  end
82
+
83
+ if type == :conditional
84
+ @conditional_nesting += 1 if token == :open
85
+ end
69
86
  end
70
87
 
71
88
  # called by scan to break a literal run that is longer than one character
@@ -86,11 +103,11 @@ module Regexp::Lexer
86
103
 
87
104
  @tokens.pop
88
105
  @tokens << Regexp::Token.new(:literal, :literal, lead, token.ts,
89
- (token.te - last_length), @nesting, @set_nesting)
106
+ (token.te - last_length), @nesting, @set_nesting, @conditional_nesting)
90
107
 
91
108
  @tokens << Regexp::Token.new(:literal, :literal, last,
92
- (token.ts + lead_length),
93
- token.te, @nesting, @set_nesting)
109
+ (token.ts + lead_length),
110
+ token.te, @nesting, @set_nesting, @conditional_nesting)
94
111
  end
95
112
  end
96
113
 
@@ -99,7 +116,13 @@ module Regexp::Lexer
99
116
  def self.merge_literal(current)
100
117
  last = @tokens.pop
101
118
  replace = Regexp::Token.new(:literal, :literal, last.text + current.text,
102
- last.ts, current.te, @nesting, @set_nesting)
119
+ last.ts, current.te, @nesting, @set_nesting, @conditional_nesting)
120
+ end
121
+
122
+ def self.merge_condition(current)
123
+ last = @tokens.pop
124
+ Regexp::Token.new(:conditional, :condition, last.text + current.text,
125
+ last.ts, current.te, @nesting, @set_nesting, @conditional_nesting)
103
126
  end
104
127
 
105
128
  end # module Regexp::Lexer
@@ -1,18 +1,14 @@
1
- require File.expand_path('../expression', __FILE__)
1
+ require 'regexp_parser/expression'
2
2
 
3
3
  module Regexp::Parser
4
4
  include Regexp::Expression
5
5
  include Regexp::Syntax
6
6
 
7
- class ParserError < StandardError
8
- def initialize(what)
9
- super what
10
- end
11
- end
7
+ class ParserError < StandardError; end
12
8
 
13
9
  class UnknownTokenTypeError < ParserError
14
10
  def initialize(type, token)
15
- super "Unknown #{type} type #{token.inspect}"
11
+ super "Unknown token type #{type} #{token.inspect}"
16
12
  end
17
13
  end
18
14
 
@@ -25,8 +21,10 @@ module Regexp::Parser
25
21
  def self.parse(input, syntax = "ruby/#{RUBY_VERSION}", &block)
26
22
  @nesting = [@root = @node = Root.new]
27
23
 
24
+ @conditional_nesting = []
25
+
28
26
  Regexp::Lexer.scan(input, syntax) do |token|
29
- self.parse_token token
27
+ parse_token token
30
28
  end
31
29
 
32
30
  if block_given?
@@ -43,23 +41,34 @@ module Regexp::Parser
43
41
  @node = exp
44
42
  end
45
43
 
44
+ def self.nest_conditional(exp)
45
+ @conditional_nesting.push exp
46
+
47
+ @node << exp
48
+ @node = exp
49
+ end
50
+
46
51
  def self.parse_token(token)
47
52
  case token.type
48
- when :meta; self.meta(token)
49
- when :quantifier; self.quantifier(token)
50
- when :anchor; self.anchor(token)
51
- when :escape; self.escape(token)
52
- when :group; self.group(token)
53
- when :assertion; self.group(token)
54
- when :set, :subset; self.set(token)
55
- when :type; self.type(token)
56
- when :backref; self.backref(token)
53
+ when :meta; meta(token)
54
+ when :quantifier; quantifier(token)
55
+ when :anchor; anchor(token)
56
+ when :escape; escape(token)
57
+ when :group; group(token)
58
+ when :assertion; group(token)
59
+ when :set, :subset; set(token)
60
+ when :type; type(token)
61
+ when :backref; backref(token)
62
+ when :conditional; conditional(token)
63
+ when :keep; keep(token)
57
64
 
58
65
  when :property, :nonproperty
59
- self.property(token)
66
+ property(token)
60
67
 
61
68
  when :literal
62
69
  @node << Literal.new(token)
70
+ when :free_space
71
+ free_space(token)
63
72
 
64
73
  else
65
74
  raise UnknownTokenTypeError.new(token.type, token)
@@ -69,19 +78,19 @@ module Regexp::Parser
69
78
  def self.set(token)
70
79
  case token.token
71
80
  when :open
72
- self.open_set(token)
81
+ open_set(token)
73
82
  when :close
74
- self.close_set(token)
83
+ close_set(token)
75
84
  when :negate
76
- self.negate_set
85
+ negate_set
77
86
  when :member, :range, :escape, :collation, :equivalent
78
- self.append_set(token)
87
+ append_set(token)
79
88
  when *Token::Escape::All
80
- self.append_set(token)
89
+ append_set(token)
81
90
  when *Token::CharacterSet::All
82
- self.append_set(token)
91
+ append_set(token)
83
92
  when *Token::UnicodeProperty::All
84
- self.append_set(token)
93
+ append_set(token)
85
94
  else
86
95
  raise UnknownTokenError.new('CharacterSet', token)
87
96
  end
@@ -95,7 +104,7 @@ module Regexp::Parser
95
104
  unless @node.token == :alternation
96
105
  unless @node.last.is_a?(Alternation)
97
106
  alt = Alternation.new(token)
98
- seq = Sequence.new
107
+ seq = Alternative.new(alt.level, alt.set_level, alt.conditional_level)
99
108
 
100
109
  while @node.expressions.last
101
110
  seq.insert @node.expressions.pop
@@ -163,6 +172,30 @@ module Regexp::Parser
163
172
  end
164
173
  end
165
174
 
175
+ def self.conditional(token)
176
+ case token.token
177
+ when :open
178
+ nest_conditional(Conditional::Expression.new(token))
179
+ when :condition
180
+ @conditional_nesting.last.condition(Conditional::Condition.new(token))
181
+ @conditional_nesting.last.branch
182
+ when :separator
183
+ @conditional_nesting.last.branch
184
+ @node = @conditional_nesting.last.branches.last
185
+ when :close
186
+ @conditional_nesting.pop
187
+
188
+ @node = if @conditional_nesting.empty?
189
+ @nesting.last
190
+ else
191
+ @conditional_nesting.last
192
+ end
193
+
194
+ else
195
+ raise UnknownTokenError.new('Conditional', token)
196
+ end
197
+ end
198
+
166
199
  def self.property(token)
167
200
  include Regexp::Expression::UnicodeProperty
168
201
 
@@ -291,14 +324,50 @@ module Regexp::Parser
291
324
  when :control
292
325
  @node << EscapeSequence::Control.new(token)
293
326
 
327
+ when :meta_sequence
328
+ if token.text =~ /\A\\M-\\C/
329
+ @node << EscapeSequence::MetaControl.new(token)
330
+ else
331
+ @node << EscapeSequence::Meta.new(token)
332
+ end
333
+
294
334
  else
295
335
  # treating everything else as a literal
296
336
  @node << EscapeSequence::Literal.new(token)
297
337
  end
298
338
  end
299
339
 
340
+
341
+ def self.keep(token)
342
+ @node << Keep::Mark.new(token)
343
+ end
344
+
345
+ def self.free_space(token)
346
+ case token.token
347
+ when :comment
348
+ @node << Comment.new(token)
349
+ when :whitespace
350
+ if @node.last and @node.last.is_a?(WhiteSpace)
351
+ @node.last.merge(WhiteSpace.new(token))
352
+ else
353
+ @node << WhiteSpace.new(token)
354
+ end
355
+ else
356
+ raise UnknownTokenError.new('FreeSpace', token)
357
+ end
358
+ end
359
+
300
360
  def self.quantifier(token)
301
- unless @node.expressions.last
361
+ offset = -1
362
+ target_node = @node.expressions[offset]
363
+ while target_node and target_node.is_a?(FreeSpace)
364
+ target_node = @node.expressions[offset -= 1]
365
+ end
366
+
367
+ raise ArgumentError.new("No valid target found for '#{token.text}' "+
368
+ "quantifier") unless target_node
369
+
370
+ unless target_node
302
371
  if token.token == :zero_or_one
303
372
  raise "Quantifier given without a target, or the syntax of the group " +
304
373
  "or its options is incorrect"
@@ -309,35 +378,36 @@ module Regexp::Parser
309
378
 
310
379
  case token.token
311
380
  when :zero_or_one
312
- @node.expressions.last.quantify(:zero_or_one, token.text, 0, 1, :greedy)
381
+ target_node.quantify(:zero_or_one, token.text, 0, 1, :greedy)
313
382
  when :zero_or_one_reluctant
314
- @node.expressions.last.quantify(:zero_or_one, token.text, 0, 1, :reluctant)
383
+ target_node.quantify(:zero_or_one, token.text, 0, 1, :reluctant)
315
384
  when :zero_or_one_possessive
316
- @node.expressions.last.quantify(:zero_or_one, token.text, 0, 1, :possessive)
385
+ target_node.quantify(:zero_or_one, token.text, 0, 1, :possessive)
317
386
 
318
387
  when :zero_or_more
319
- @node.expressions.last.quantify(:zero_or_more, token.text, 0, -1, :greedy)
388
+ target_node.quantify(:zero_or_more, token.text, 0, -1, :greedy)
320
389
  when :zero_or_more_reluctant
321
- @node.expressions.last.quantify(:zero_or_more, token.text, 0, -1, :reluctant)
390
+ target_node.quantify(:zero_or_more, token.text, 0, -1, :reluctant)
322
391
  when :zero_or_more_possessive
323
- @node.expressions.last.quantify(:zero_or_more, token.text, 0, -1, :possessive)
392
+ target_node.quantify(:zero_or_more, token.text, 0, -1, :possessive)
324
393
 
325
394
  when :one_or_more
326
- @node.expressions.last.quantify(:one_or_more, token.text, 1, -1, :greedy)
395
+ target_node.quantify(:one_or_more, token.text, 1, -1, :greedy)
327
396
  when :one_or_more_reluctant
328
- @node.expressions.last.quantify(:one_or_more, token.text, 1, -1, :reluctant)
397
+ target_node.quantify(:one_or_more, token.text, 1, -1, :reluctant)
329
398
  when :one_or_more_possessive
330
- @node.expressions.last.quantify(:one_or_more, token.text, 1, -1, :possessive)
399
+ target_node.quantify(:one_or_more, token.text, 1, -1, :possessive)
331
400
 
332
401
  when :interval
333
- self.interval(token.text)
402
+ interval(target_node, token)
334
403
 
335
404
  else
336
405
  raise UnknownTokenError.new('Quantifier', token)
337
406
  end
338
407
  end
339
408
 
340
- def self.interval(text)
409
+ def self.interval(target_node, token)
410
+ text = token.text
341
411
  mchr = text[text.length-1].chr =~ /[?+]/ ? text[text.length-1].chr : nil
342
412
  mode = case mchr
343
413
  when '?'; text.chop!; :reluctant
@@ -349,19 +419,19 @@ module Regexp::Parser
349
419
  min = range[0].empty? ? 0 : range[0]
350
420
  max = range[1] ? (range[1].empty? ? -1 : range[1]) : min
351
421
 
352
- @node.expressions.last.quantify(:interval, text, min.to_i, max.to_i, mode)
422
+ target_node.quantify(:interval, text, min.to_i, max.to_i, mode)
353
423
  end
354
424
 
355
425
  def self.group(token)
356
426
  case token.token
357
427
  when :options
358
- self.options(token)
428
+ options(token)
359
429
  when :close
360
- self.close_group
430
+ close_group
361
431
  when :comment
362
432
  @node << Group::Comment.new(token)
363
433
  else
364
- self.open_group(token)
434
+ open_group(token)
365
435
  end
366
436
  end
367
437
 
@@ -372,10 +442,13 @@ module Regexp::Parser
372
442
  exp.options = {
373
443
  :m => opt[0].include?('m') ? true : false,
374
444
  :i => opt[0].include?('i') ? true : false,
375
- :x => opt[0].include?('x') ? true : false
445
+ :x => opt[0].include?('x') ? true : false,
446
+ :d => opt[0].include?('d') ? true : false,
447
+ :a => opt[0].include?('a') ? true : false,
448
+ :u => opt[0].include?('u') ? true : false
376
449
  }
377
450
 
378
- self.nest exp
451
+ nest(exp)
379
452
  end
380
453
 
381
454
  def self.open_group(token)
@@ -402,7 +475,7 @@ module Regexp::Parser
402
475
  raise UnknownTokenError.new('Group type open', token)
403
476
  end
404
477
 
405
- self.nest exp
478
+ nest(exp)
406
479
  end
407
480
 
408
481
  def self.close_group