regexp_parser 0.1.6 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (84) hide show
  1. checksums.yaml +4 -4
  2. data/ChangeLog +57 -0
  3. data/Gemfile +8 -0
  4. data/LICENSE +1 -1
  5. data/README.md +225 -206
  6. data/Rakefile +9 -3
  7. data/lib/regexp_parser.rb +7 -11
  8. data/lib/regexp_parser/expression.rb +72 -14
  9. data/lib/regexp_parser/expression/classes/alternation.rb +3 -16
  10. data/lib/regexp_parser/expression/classes/conditional.rb +57 -0
  11. data/lib/regexp_parser/expression/classes/free_space.rb +17 -0
  12. data/lib/regexp_parser/expression/classes/keep.rb +7 -0
  13. data/lib/regexp_parser/expression/classes/set.rb +28 -7
  14. data/lib/regexp_parser/expression/methods/strfregexp.rb +113 -0
  15. data/lib/regexp_parser/expression/methods/tests.rb +116 -0
  16. data/lib/regexp_parser/expression/methods/traverse.rb +63 -0
  17. data/lib/regexp_parser/expression/quantifier.rb +10 -0
  18. data/lib/regexp_parser/expression/sequence.rb +45 -0
  19. data/lib/regexp_parser/expression/subexpression.rb +29 -1
  20. data/lib/regexp_parser/lexer.rb +31 -8
  21. data/lib/regexp_parser/parser.rb +118 -45
  22. data/lib/regexp_parser/scanner.rb +1745 -1404
  23. data/lib/regexp_parser/scanner/property.rl +57 -3
  24. data/lib/regexp_parser/scanner/scanner.rl +161 -34
  25. data/lib/regexp_parser/syntax.rb +12 -2
  26. data/lib/regexp_parser/syntax/ruby/1.9.1.rb +3 -3
  27. data/lib/regexp_parser/syntax/ruby/1.9.3.rb +2 -7
  28. data/lib/regexp_parser/syntax/ruby/2.0.0.rb +4 -1
  29. data/lib/regexp_parser/syntax/ruby/2.1.4.rb +13 -0
  30. data/lib/regexp_parser/syntax/ruby/2.1.5.rb +13 -0
  31. data/lib/regexp_parser/syntax/ruby/2.1.rb +2 -2
  32. data/lib/regexp_parser/syntax/ruby/2.2.0.rb +16 -0
  33. data/lib/regexp_parser/syntax/ruby/2.2.rb +8 -0
  34. data/lib/regexp_parser/syntax/tokens.rb +19 -2
  35. data/lib/regexp_parser/syntax/tokens/conditional.rb +22 -0
  36. data/lib/regexp_parser/syntax/tokens/keep.rb +14 -0
  37. data/lib/regexp_parser/syntax/tokens/unicode_property.rb +45 -4
  38. data/lib/regexp_parser/token.rb +23 -8
  39. data/lib/regexp_parser/version.rb +5 -0
  40. data/regexp_parser.gemspec +35 -0
  41. data/test/expression/test_all.rb +6 -1
  42. data/test/expression/test_base.rb +19 -0
  43. data/test/expression/test_conditionals.rb +114 -0
  44. data/test/expression/test_free_space.rb +33 -0
  45. data/test/expression/test_set.rb +61 -0
  46. data/test/expression/test_strfregexp.rb +214 -0
  47. data/test/expression/test_subexpression.rb +24 -0
  48. data/test/expression/test_tests.rb +99 -0
  49. data/test/expression/test_to_h.rb +48 -0
  50. data/test/expression/test_to_s.rb +46 -0
  51. data/test/expression/test_traverse.rb +164 -0
  52. data/test/lexer/test_all.rb +16 -3
  53. data/test/lexer/test_conditionals.rb +101 -0
  54. data/test/lexer/test_keep.rb +24 -0
  55. data/test/lexer/test_literals.rb +51 -51
  56. data/test/lexer/test_nesting.rb +62 -62
  57. data/test/lexer/test_refcalls.rb +18 -20
  58. data/test/parser/test_all.rb +18 -3
  59. data/test/parser/test_alternation.rb +11 -14
  60. data/test/parser/test_conditionals.rb +148 -0
  61. data/test/parser/test_escapes.rb +29 -5
  62. data/test/parser/test_free_space.rb +139 -0
  63. data/test/parser/test_groups.rb +40 -0
  64. data/test/parser/test_keep.rb +21 -0
  65. data/test/scanner/test_all.rb +8 -2
  66. data/test/scanner/test_conditionals.rb +166 -0
  67. data/test/scanner/test_escapes.rb +8 -5
  68. data/test/scanner/test_free_space.rb +133 -0
  69. data/test/scanner/test_groups.rb +28 -0
  70. data/test/scanner/test_keep.rb +33 -0
  71. data/test/scanner/test_properties.rb +4 -0
  72. data/test/scanner/test_scripts.rb +71 -1
  73. data/test/syntax/ruby/test_1.9.3.rb +2 -2
  74. data/test/syntax/ruby/test_2.0.0.rb +38 -0
  75. data/test/syntax/ruby/test_2.2.0.rb +38 -0
  76. data/test/syntax/ruby/test_all.rb +1 -8
  77. data/test/syntax/ruby/test_files.rb +104 -0
  78. data/test/test_all.rb +2 -1
  79. data/test/token/test_all.rb +2 -0
  80. data/test/token/test_token.rb +109 -0
  81. metadata +75 -21
  82. data/VERSION.yml +0 -5
  83. data/lib/regexp_parser/ctype.rb +0 -48
  84. data/test/syntax/ruby/test_2.x.rb +0 -46
@@ -0,0 +1,116 @@
1
+ module Regexp::Expression
2
+ class Base
3
+
4
+ # Test if this expression has the given test_type, which can be either
5
+ # a symbol or an array of symbols to check against the expression's type.
6
+ #
7
+ # # is it a :group expression
8
+ # exp.type? :group
9
+ #
10
+ # # is it a :set, :subset, or :meta
11
+ # exp.type? [:set, :subset, :meta]
12
+ #
13
+ def type?(test_type)
14
+ case test_type
15
+ when Array
16
+ if test_type.include?(:*)
17
+ return (test_type.include?(type) or test_type.include?(:*))
18
+ else
19
+ return test_type.include?(type)
20
+ end
21
+ when Symbol
22
+ return (type == test_type or test_type == :*)
23
+ else
24
+ raise "Array or Symbol expected, #{test_type.class.name} given"
25
+ end
26
+ end
27
+
28
+ # Test if this expression has the given test_token, and optionally a given
29
+ # test_type.
30
+ #
31
+ # # Any expressions
32
+ # exp.is? :* # always returns true
33
+ #
34
+ # # is it a :capture
35
+ # exp.is? :capture
36
+ #
37
+ # # is it a :character and a :set
38
+ # exp.is? :character, :set
39
+ #
40
+ # # is it a :meta :dot
41
+ # exp.is? :dot, :meta
42
+ #
43
+ # # is it a :meta or :escape :dot
44
+ # exp.is? :dot, [:meta, :escape]
45
+ #
46
+ def is?(test_token, test_type = nil)
47
+ return true if test_token === :*
48
+ token == test_token and (test_type ? type?(test_type) : true)
49
+ end
50
+
51
+ # Test if this expression matches an entry in the given scope spec.
52
+ #
53
+ # A scope spec can be one of:
54
+ #
55
+ # . An array: Interpreted as a set of tokens, tested for inclusion
56
+ # of the expression's token.
57
+ #
58
+ # . A hash: Where the key is interpreted as the expression type
59
+ # and the value is either a symbol or an array. In this
60
+ # case, when the scope is a hash, one_of? calls itself to
61
+ # evaluate the key's value.
62
+ #
63
+ # . A symbol: matches the expression's token or type, depending on
64
+ # the level of the call. If one_of? is called directly with
65
+ # a symbol then it will always be checked against the
66
+ # type of the expression. If it's being called for a value
67
+ # from a hash, it will be checked against the token of the
68
+ # expression.
69
+ #
70
+ # # any expression
71
+ # exp.one_of?(:*) # always true
72
+ #
73
+ # # like exp.type?(:group)
74
+ # exp.one_of?(:group)
75
+ #
76
+ # # any expression of type meta
77
+ # exp.one_of?(:meta => :*)
78
+ #
79
+ # # meta dots and alternations
80
+ # exp.one_of?(:meta => [:dot, :alternation])
81
+ #
82
+ # # meta dots and any set tokens
83
+ # exp.one_of?({meta: [:dot], set: :*})
84
+ #
85
+ def one_of?(scope, top = true)
86
+ case scope
87
+ when Array
88
+ if scope.include?(:*)
89
+ return (scope.include?(token) or scope.include?(:*))
90
+ else
91
+ return scope.include?(token)
92
+ end
93
+
94
+ when Hash
95
+ if scope.has_key?(:*)
96
+ test_type = scope.has_key?(type) ? type : :*
97
+ return one_of?(scope[test_type], false)
98
+ else
99
+ return (scope.has_key?(type) and one_of?(scope[type], false))
100
+ end
101
+
102
+ when Symbol
103
+ return true if scope == :*
104
+
105
+ return is?(scope) unless top
106
+ return type?(scope) if top
107
+
108
+ else
109
+ raise "Array, Hash, or Symbol expected, #{scope.class.name} given"
110
+ end
111
+
112
+ false
113
+ end
114
+
115
+ end
116
+ end
@@ -0,0 +1,63 @@
1
+ module Regexp::Expression
2
+ class Subexpression < Regexp::Expression::Base
3
+
4
+ # Traverses the subexpression (depth-first, pre-order) and calls the given
5
+ # block for each expression with three arguments; the traversal event,
6
+ # the expression, and the index of the expression within its parent.
7
+ #
8
+ # The event argument is passed as follows:
9
+ #
10
+ # - For subexpressions, :enter upon entrering the subexpression, and
11
+ # :exit upon exiting it.
12
+ #
13
+ # - For terminal expressions, :visit is called once.
14
+ #
15
+ # Returns self.
16
+ def traverse(include_self = false, &block)
17
+ raise 'traverse requires a block' unless block_given?
18
+
19
+ block.call(:enter, self, 0) if include_self
20
+
21
+ each_with_index do |exp, index|
22
+ if exp.terminal?
23
+ block.call(:visit, exp, index)
24
+ else
25
+ block.call(:enter, exp, index)
26
+ exp.traverse(&block)
27
+ block.call(:exit, exp, index)
28
+ end
29
+ end
30
+
31
+ block.call(:exit, self, 0) if include_self
32
+
33
+ self
34
+ end
35
+ alias :walk :traverse
36
+
37
+ # Iterates over the expressions of this expression as an array, passing
38
+ # the expression and its index within its parent to the given block.
39
+ def each_expression(include_self = false, &block)
40
+ traverse(include_self) do |event, exp, index|
41
+ yield(exp, index) unless event == :exit
42
+ end
43
+ end
44
+
45
+ # Returns a new array with the results of calling the given block once
46
+ # for every expression. If a block is not given, returns an array with
47
+ # each expression and its level index as an array.
48
+ def map(include_self = false, &block)
49
+ result = []
50
+
51
+ each_expression(include_self) do |exp, index|
52
+ if block_given?
53
+ result << yield(exp, index)
54
+ else
55
+ result << [exp, index]
56
+ end
57
+ end
58
+
59
+ result
60
+ end
61
+
62
+ end
63
+ end
@@ -21,6 +21,16 @@ module Regexp::Expression
21
21
  @text.dup
22
22
  end
23
23
  alias :to_str :to_s
24
+
25
+ def to_h
26
+ {
27
+ :token => token,
28
+ :text => text,
29
+ :mode => mode,
30
+ :min => min,
31
+ :max => max
32
+ }
33
+ end
24
34
  end
25
35
 
26
36
  end
@@ -0,0 +1,45 @@
1
+ module Regexp::Expression
2
+
3
+ # A sequence of expressions. Differs from a Subexpressions by how it handles
4
+ # quantifiers, as it applies them to its last element instead of itself as
5
+ # a whole subexpression.
6
+ #
7
+ # Used as the base class for the Alternation alternatives and Conditional
8
+ # branches.
9
+ class Sequence < Regexp::Expression::Subexpression
10
+ def initialize(level, set_level, conditional_level)
11
+ super Regexp::Token.new(
12
+ :expression,
13
+ :sequence,
14
+ '',
15
+ nil, # ts
16
+ nil, # te
17
+ level,
18
+ set_level,
19
+ conditional_level
20
+ )
21
+ end
22
+
23
+ def text
24
+ to_s
25
+ end
26
+
27
+ def starts_at
28
+ @expressions.first.starts_at
29
+ end
30
+
31
+ def quantify(token, text, min = nil, max = nil, mode = :greedy)
32
+ offset = -1
33
+ target = expressions[offset]
34
+ while target and target.is_a?(FreeSpace)
35
+ target = expressions[offset -= 1]
36
+ end
37
+
38
+ raise ArgumentError.new("No valid target found for '#{text}' " +
39
+ "quantifier") unless target
40
+
41
+ target.quantify(token, text, min, max, mode)
42
+ end
43
+ end
44
+
45
+ end
@@ -17,7 +17,12 @@ module Regexp::Expression
17
17
  end
18
18
 
19
19
  def <<(exp)
20
- @expressions << exp
20
+ if exp.is_a?(WhiteSpace) and @expressions.last and
21
+ @expressions.last.is_a?(WhiteSpace)
22
+ @expressions.last.merge(exp)
23
+ else
24
+ @expressions << exp
25
+ end
21
26
  end
22
27
 
23
28
  def insert(exp)
@@ -48,6 +53,22 @@ module Regexp::Expression
48
53
  @expressions.length
49
54
  end
50
55
 
56
+ def empty?
57
+ @expressions.empty?
58
+ end
59
+
60
+ def all?(&block)
61
+ @expressions.all? {|exp| yield(exp) }
62
+ end
63
+
64
+ def ts
65
+ starts_at
66
+ end
67
+
68
+ def te
69
+ ts + to_s.length
70
+ end
71
+
51
72
  def to_s(format = :full)
52
73
  s = ''
53
74
 
@@ -64,6 +85,13 @@ module Regexp::Expression
64
85
 
65
86
  s
66
87
  end
88
+
89
+ def to_h
90
+ h = super
91
+ h[:text] = to_s(:base)
92
+ h[:expressions] = @expressions.map(&:to_h)
93
+ h
94
+ end
67
95
  end
68
96
 
69
97
  end
@@ -10,11 +10,11 @@ module Regexp::Lexer
10
10
 
11
11
  CLOSING_TOKENS = [:close].freeze
12
12
 
13
- def self.scan(input, syntax = "ruby/#{RUBY_VERSION}", &block)
13
+ def self.lex(input, syntax = "ruby/#{RUBY_VERSION}", &block)
14
14
  syntax = Regexp::Syntax.new(syntax)
15
15
 
16
16
  @tokens = []
17
- @nesting, @set_nesting = 0, 0
17
+ @nesting, @set_nesting, @conditional_nesting = 0, 0, 0
18
18
 
19
19
  last = nil
20
20
  Regexp::Scanner.scan(input) do |type, token, text, ts, te|
@@ -27,11 +27,14 @@ module Regexp::Lexer
27
27
  last and last.type == :literal
28
28
 
29
29
  current = Regexp::Token.new(type, token, text, ts, te,
30
- @nesting, @set_nesting)
30
+ @nesting, @set_nesting, @conditional_nesting)
31
31
 
32
32
  current = merge_literal(current) if type == :literal and
33
33
  last and last.type == :literal
34
34
 
35
+ current = merge_condition(current) if type == :conditional and
36
+ [:condition, :condition_close].include?(token)
37
+
35
38
  last.next(current) if last
36
39
  current.previous(last) if last
37
40
 
@@ -42,12 +45,18 @@ module Regexp::Lexer
42
45
  end
43
46
 
44
47
  if block_given?
45
- @tokens.each {|t| block.call(t)}
48
+ @tokens.map {|t| block.call(t)}
46
49
  else
47
50
  @tokens
48
51
  end
49
52
  end
50
53
 
54
+ class << self
55
+ alias :scan :lex
56
+ end
57
+
58
+ protected
59
+
51
60
  def self.ascend(type, token)
52
61
  if type == :group or type == :assertion
53
62
  @nesting -= 1 if CLOSING_TOKENS.include?(token)
@@ -56,6 +65,10 @@ module Regexp::Lexer
56
65
  if type == :set or type == :subset
57
66
  @set_nesting -= 1 if token == :close
58
67
  end
68
+
69
+ if type == :conditional
70
+ @conditional_nesting -= 1 if token == :close
71
+ end
59
72
  end
60
73
 
61
74
  def self.descend(type, token)
@@ -66,6 +79,10 @@ module Regexp::Lexer
66
79
  if type == :set or type == :subset
67
80
  @set_nesting += 1 if token == :open
68
81
  end
82
+
83
+ if type == :conditional
84
+ @conditional_nesting += 1 if token == :open
85
+ end
69
86
  end
70
87
 
71
88
  # called by scan to break a literal run that is longer than one character
@@ -86,11 +103,11 @@ module Regexp::Lexer
86
103
 
87
104
  @tokens.pop
88
105
  @tokens << Regexp::Token.new(:literal, :literal, lead, token.ts,
89
- (token.te - last_length), @nesting, @set_nesting)
106
+ (token.te - last_length), @nesting, @set_nesting, @conditional_nesting)
90
107
 
91
108
  @tokens << Regexp::Token.new(:literal, :literal, last,
92
- (token.ts + lead_length),
93
- token.te, @nesting, @set_nesting)
109
+ (token.ts + lead_length),
110
+ token.te, @nesting, @set_nesting, @conditional_nesting)
94
111
  end
95
112
  end
96
113
 
@@ -99,7 +116,13 @@ module Regexp::Lexer
99
116
  def self.merge_literal(current)
100
117
  last = @tokens.pop
101
118
  replace = Regexp::Token.new(:literal, :literal, last.text + current.text,
102
- last.ts, current.te, @nesting, @set_nesting)
119
+ last.ts, current.te, @nesting, @set_nesting, @conditional_nesting)
120
+ end
121
+
122
+ def self.merge_condition(current)
123
+ last = @tokens.pop
124
+ Regexp::Token.new(:conditional, :condition, last.text + current.text,
125
+ last.ts, current.te, @nesting, @set_nesting, @conditional_nesting)
103
126
  end
104
127
 
105
128
  end # module Regexp::Lexer
@@ -1,18 +1,14 @@
1
- require File.expand_path('../expression', __FILE__)
1
+ require 'regexp_parser/expression'
2
2
 
3
3
  module Regexp::Parser
4
4
  include Regexp::Expression
5
5
  include Regexp::Syntax
6
6
 
7
- class ParserError < StandardError
8
- def initialize(what)
9
- super what
10
- end
11
- end
7
+ class ParserError < StandardError; end
12
8
 
13
9
  class UnknownTokenTypeError < ParserError
14
10
  def initialize(type, token)
15
- super "Unknown #{type} type #{token.inspect}"
11
+ super "Unknown token type #{type} #{token.inspect}"
16
12
  end
17
13
  end
18
14
 
@@ -25,8 +21,10 @@ module Regexp::Parser
25
21
  def self.parse(input, syntax = "ruby/#{RUBY_VERSION}", &block)
26
22
  @nesting = [@root = @node = Root.new]
27
23
 
24
+ @conditional_nesting = []
25
+
28
26
  Regexp::Lexer.scan(input, syntax) do |token|
29
- self.parse_token token
27
+ parse_token token
30
28
  end
31
29
 
32
30
  if block_given?
@@ -43,23 +41,34 @@ module Regexp::Parser
43
41
  @node = exp
44
42
  end
45
43
 
44
+ def self.nest_conditional(exp)
45
+ @conditional_nesting.push exp
46
+
47
+ @node << exp
48
+ @node = exp
49
+ end
50
+
46
51
  def self.parse_token(token)
47
52
  case token.type
48
- when :meta; self.meta(token)
49
- when :quantifier; self.quantifier(token)
50
- when :anchor; self.anchor(token)
51
- when :escape; self.escape(token)
52
- when :group; self.group(token)
53
- when :assertion; self.group(token)
54
- when :set, :subset; self.set(token)
55
- when :type; self.type(token)
56
- when :backref; self.backref(token)
53
+ when :meta; meta(token)
54
+ when :quantifier; quantifier(token)
55
+ when :anchor; anchor(token)
56
+ when :escape; escape(token)
57
+ when :group; group(token)
58
+ when :assertion; group(token)
59
+ when :set, :subset; set(token)
60
+ when :type; type(token)
61
+ when :backref; backref(token)
62
+ when :conditional; conditional(token)
63
+ when :keep; keep(token)
57
64
 
58
65
  when :property, :nonproperty
59
- self.property(token)
66
+ property(token)
60
67
 
61
68
  when :literal
62
69
  @node << Literal.new(token)
70
+ when :free_space
71
+ free_space(token)
63
72
 
64
73
  else
65
74
  raise UnknownTokenTypeError.new(token.type, token)
@@ -69,19 +78,19 @@ module Regexp::Parser
69
78
  def self.set(token)
70
79
  case token.token
71
80
  when :open
72
- self.open_set(token)
81
+ open_set(token)
73
82
  when :close
74
- self.close_set(token)
83
+ close_set(token)
75
84
  when :negate
76
- self.negate_set
85
+ negate_set
77
86
  when :member, :range, :escape, :collation, :equivalent
78
- self.append_set(token)
87
+ append_set(token)
79
88
  when *Token::Escape::All
80
- self.append_set(token)
89
+ append_set(token)
81
90
  when *Token::CharacterSet::All
82
- self.append_set(token)
91
+ append_set(token)
83
92
  when *Token::UnicodeProperty::All
84
- self.append_set(token)
93
+ append_set(token)
85
94
  else
86
95
  raise UnknownTokenError.new('CharacterSet', token)
87
96
  end
@@ -95,7 +104,7 @@ module Regexp::Parser
95
104
  unless @node.token == :alternation
96
105
  unless @node.last.is_a?(Alternation)
97
106
  alt = Alternation.new(token)
98
- seq = Sequence.new
107
+ seq = Alternative.new(alt.level, alt.set_level, alt.conditional_level)
99
108
 
100
109
  while @node.expressions.last
101
110
  seq.insert @node.expressions.pop
@@ -163,6 +172,30 @@ module Regexp::Parser
163
172
  end
164
173
  end
165
174
 
175
+ def self.conditional(token)
176
+ case token.token
177
+ when :open
178
+ nest_conditional(Conditional::Expression.new(token))
179
+ when :condition
180
+ @conditional_nesting.last.condition(Conditional::Condition.new(token))
181
+ @conditional_nesting.last.branch
182
+ when :separator
183
+ @conditional_nesting.last.branch
184
+ @node = @conditional_nesting.last.branches.last
185
+ when :close
186
+ @conditional_nesting.pop
187
+
188
+ @node = if @conditional_nesting.empty?
189
+ @nesting.last
190
+ else
191
+ @conditional_nesting.last
192
+ end
193
+
194
+ else
195
+ raise UnknownTokenError.new('Conditional', token)
196
+ end
197
+ end
198
+
166
199
  def self.property(token)
167
200
  include Regexp::Expression::UnicodeProperty
168
201
 
@@ -291,14 +324,50 @@ module Regexp::Parser
291
324
  when :control
292
325
  @node << EscapeSequence::Control.new(token)
293
326
 
327
+ when :meta_sequence
328
+ if token.text =~ /\A\\M-\\C/
329
+ @node << EscapeSequence::MetaControl.new(token)
330
+ else
331
+ @node << EscapeSequence::Meta.new(token)
332
+ end
333
+
294
334
  else
295
335
  # treating everything else as a literal
296
336
  @node << EscapeSequence::Literal.new(token)
297
337
  end
298
338
  end
299
339
 
340
+
341
+ def self.keep(token)
342
+ @node << Keep::Mark.new(token)
343
+ end
344
+
345
+ def self.free_space(token)
346
+ case token.token
347
+ when :comment
348
+ @node << Comment.new(token)
349
+ when :whitespace
350
+ if @node.last and @node.last.is_a?(WhiteSpace)
351
+ @node.last.merge(WhiteSpace.new(token))
352
+ else
353
+ @node << WhiteSpace.new(token)
354
+ end
355
+ else
356
+ raise UnknownTokenError.new('FreeSpace', token)
357
+ end
358
+ end
359
+
300
360
  def self.quantifier(token)
301
- unless @node.expressions.last
361
+ offset = -1
362
+ target_node = @node.expressions[offset]
363
+ while target_node and target_node.is_a?(FreeSpace)
364
+ target_node = @node.expressions[offset -= 1]
365
+ end
366
+
367
+ raise ArgumentError.new("No valid target found for '#{token.text}' "+
368
+ "quantifier") unless target_node
369
+
370
+ unless target_node
302
371
  if token.token == :zero_or_one
303
372
  raise "Quantifier given without a target, or the syntax of the group " +
304
373
  "or its options is incorrect"
@@ -309,35 +378,36 @@ module Regexp::Parser
309
378
 
310
379
  case token.token
311
380
  when :zero_or_one
312
- @node.expressions.last.quantify(:zero_or_one, token.text, 0, 1, :greedy)
381
+ target_node.quantify(:zero_or_one, token.text, 0, 1, :greedy)
313
382
  when :zero_or_one_reluctant
314
- @node.expressions.last.quantify(:zero_or_one, token.text, 0, 1, :reluctant)
383
+ target_node.quantify(:zero_or_one, token.text, 0, 1, :reluctant)
315
384
  when :zero_or_one_possessive
316
- @node.expressions.last.quantify(:zero_or_one, token.text, 0, 1, :possessive)
385
+ target_node.quantify(:zero_or_one, token.text, 0, 1, :possessive)
317
386
 
318
387
  when :zero_or_more
319
- @node.expressions.last.quantify(:zero_or_more, token.text, 0, -1, :greedy)
388
+ target_node.quantify(:zero_or_more, token.text, 0, -1, :greedy)
320
389
  when :zero_or_more_reluctant
321
- @node.expressions.last.quantify(:zero_or_more, token.text, 0, -1, :reluctant)
390
+ target_node.quantify(:zero_or_more, token.text, 0, -1, :reluctant)
322
391
  when :zero_or_more_possessive
323
- @node.expressions.last.quantify(:zero_or_more, token.text, 0, -1, :possessive)
392
+ target_node.quantify(:zero_or_more, token.text, 0, -1, :possessive)
324
393
 
325
394
  when :one_or_more
326
- @node.expressions.last.quantify(:one_or_more, token.text, 1, -1, :greedy)
395
+ target_node.quantify(:one_or_more, token.text, 1, -1, :greedy)
327
396
  when :one_or_more_reluctant
328
- @node.expressions.last.quantify(:one_or_more, token.text, 1, -1, :reluctant)
397
+ target_node.quantify(:one_or_more, token.text, 1, -1, :reluctant)
329
398
  when :one_or_more_possessive
330
- @node.expressions.last.quantify(:one_or_more, token.text, 1, -1, :possessive)
399
+ target_node.quantify(:one_or_more, token.text, 1, -1, :possessive)
331
400
 
332
401
  when :interval
333
- self.interval(token.text)
402
+ interval(target_node, token)
334
403
 
335
404
  else
336
405
  raise UnknownTokenError.new('Quantifier', token)
337
406
  end
338
407
  end
339
408
 
340
- def self.interval(text)
409
+ def self.interval(target_node, token)
410
+ text = token.text
341
411
  mchr = text[text.length-1].chr =~ /[?+]/ ? text[text.length-1].chr : nil
342
412
  mode = case mchr
343
413
  when '?'; text.chop!; :reluctant
@@ -349,19 +419,19 @@ module Regexp::Parser
349
419
  min = range[0].empty? ? 0 : range[0]
350
420
  max = range[1] ? (range[1].empty? ? -1 : range[1]) : min
351
421
 
352
- @node.expressions.last.quantify(:interval, text, min.to_i, max.to_i, mode)
422
+ target_node.quantify(:interval, text, min.to_i, max.to_i, mode)
353
423
  end
354
424
 
355
425
  def self.group(token)
356
426
  case token.token
357
427
  when :options
358
- self.options(token)
428
+ options(token)
359
429
  when :close
360
- self.close_group
430
+ close_group
361
431
  when :comment
362
432
  @node << Group::Comment.new(token)
363
433
  else
364
- self.open_group(token)
434
+ open_group(token)
365
435
  end
366
436
  end
367
437
 
@@ -372,10 +442,13 @@ module Regexp::Parser
372
442
  exp.options = {
373
443
  :m => opt[0].include?('m') ? true : false,
374
444
  :i => opt[0].include?('i') ? true : false,
375
- :x => opt[0].include?('x') ? true : false
445
+ :x => opt[0].include?('x') ? true : false,
446
+ :d => opt[0].include?('d') ? true : false,
447
+ :a => opt[0].include?('a') ? true : false,
448
+ :u => opt[0].include?('u') ? true : false
376
449
  }
377
450
 
378
- self.nest exp
451
+ nest(exp)
379
452
  end
380
453
 
381
454
  def self.open_group(token)
@@ -402,7 +475,7 @@ module Regexp::Parser
402
475
  raise UnknownTokenError.new('Group type open', token)
403
476
  end
404
477
 
405
- self.nest exp
478
+ nest(exp)
406
479
  end
407
480
 
408
481
  def self.close_group