RubyGems - regexp_parser - Versions diffs - 1.7.1 → 2.1.1 - Mend

regexp_parser 1.7.1 → 2.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (58) hide show

checksums.yaml +4 -4
data/CHANGELOG.md +138 -0
data/Gemfile +6 -1
data/README.md +23 -11
data/Rakefile +8 -8
data/lib/regexp_parser/error.rb +4 -0
data/lib/regexp_parser/expression.rb +13 -21
data/lib/regexp_parser/expression/classes/backref.rb +5 -0
data/lib/regexp_parser/expression/classes/conditional.rb +11 -1
data/lib/regexp_parser/expression/classes/free_space.rb +2 -2
data/lib/regexp_parser/expression/classes/group.rb +28 -3
data/lib/regexp_parser/expression/classes/property.rb +1 -1
data/lib/regexp_parser/expression/classes/root.rb +4 -16
data/lib/regexp_parser/expression/classes/set/range.rb +2 -1
data/lib/regexp_parser/expression/methods/match_length.rb +2 -2
data/lib/regexp_parser/expression/methods/traverse.rb +2 -2
data/lib/regexp_parser/expression/quantifier.rb +10 -1
data/lib/regexp_parser/expression/sequence.rb +3 -19
data/lib/regexp_parser/expression/subexpression.rb +1 -1
data/lib/regexp_parser/lexer.rb +6 -6
data/lib/regexp_parser/parser.rb +325 -344
data/lib/regexp_parser/scanner.rb +1320 -1385
data/lib/regexp_parser/scanner/char_type.rl +11 -11
data/lib/regexp_parser/scanner/property.rl +2 -2
data/lib/regexp_parser/scanner/scanner.rl +231 -253
data/lib/regexp_parser/syntax.rb +8 -6
data/lib/regexp_parser/syntax/any.rb +3 -3
data/lib/regexp_parser/syntax/base.rb +1 -1
data/lib/regexp_parser/syntax/version_lookup.rb +4 -4
data/lib/regexp_parser/version.rb +1 -1
data/regexp_parser.gemspec +1 -1
data/spec/expression/base_spec.rb +10 -0
data/spec/expression/clone_spec.rb +36 -4
data/spec/expression/free_space_spec.rb +2 -2
data/spec/expression/methods/match_length_spec.rb +2 -2
data/spec/expression/subexpression_spec.rb +1 -1
data/spec/expression/to_s_spec.rb +39 -31
data/spec/lexer/literals_spec.rb +24 -49
data/spec/lexer/refcalls_spec.rb +5 -0
data/spec/parser/all_spec.rb +2 -2
data/spec/parser/errors_spec.rb +1 -1
data/spec/parser/escapes_spec.rb +1 -1
data/spec/parser/options_spec.rb +28 -0
data/spec/parser/quantifiers_spec.rb +16 -0
data/spec/parser/refcalls_spec.rb +5 -0
data/spec/parser/set/ranges_spec.rb +3 -3
data/spec/scanner/escapes_spec.rb +12 -1
data/spec/scanner/free_space_spec.rb +32 -0
data/spec/scanner/groups_spec.rb +10 -1
data/spec/scanner/literals_spec.rb +28 -38
data/spec/scanner/options_spec.rb +36 -0
data/spec/scanner/quantifiers_spec.rb +18 -13
data/spec/scanner/refcalls_spec.rb +19 -0
data/spec/scanner/sets_spec.rb +65 -16
data/spec/spec_helper.rb +1 -0
metadata +61 -60
data/spec/expression/root_spec.rb +0 -9
data/spec/expression/sequence_spec.rb +0 -9

data/lib/regexp_parser/expression/classes/property.rb CHANGED Viewed

@@ -7,7 +7,7 @@ module Regexp::Expression
       end
       def name
-        text =~ /\A\\[pP]\{([^}]+)\}\z/; $1
+        text[/\A\\[pP]\{([^}]+)\}\z/, 1]
       end
       def shortcut

data/lib/regexp_parser/expression/classes/root.rb CHANGED Viewed

@@ -1,24 +1,12 @@
 module Regexp::Expression
   class Root < Regexp::Expression::Subexpression
-    # TODO: this override is here for backwards compatibility, remove in 2.0.0
-    def initialize(*args)
-      unless args.first.is_a?(Regexp::Token)
-        warn('WARNING: Root.new without a Token argument is deprecated and '\
-             'will be removed in 2.0.0. Use Root.build for the old behavior.')
-        return super(self.class.build_token, *args)
-      end
-      super
+    def self.build(options = {})
+      new(build_token, options)
     end
-    class << self
-      def build(options = {})
-        new(build_token, options)
-      end
-      def build_token
-        Regexp::Token.new(:expression, :root, '', 0)
-      end
+    def self.build_token
+      Regexp::Token.new(:expression, :root, '', 0)
     end
   end
 end

data/lib/regexp_parser/expression/classes/set/range.rb CHANGED Viewed

@@ -7,7 +7,8 @@ module Regexp::Expression
       alias :ts :starts_at
       def <<(exp)
-        complete? && raise("Can't add more than 2 expressions to a Range")
+        complete? and raise Regexp::Parser::Error,
+          "Can't add more than 2 expressions to a Range"
         super
       end

data/lib/regexp_parser/expression/methods/match_length.rb CHANGED Viewed

@@ -10,7 +10,7 @@ class Regexp::MatchLength
     self.exp_class = exp.class
     self.min_rep = exp.repetitions.min
     self.max_rep = exp.repetitions.max
-    if base = opts[:base]
+    if (base = opts[:base])
       self.base_min = base
       self.base_max = base
       self.reify = ->{ '.' * base }
@@ -32,7 +32,7 @@ class Regexp::MatchLength
     end
   end
-  def endless_each(&block)
+  def endless_each
     return enum_for(__method__) unless block_given?
     (min..max).each { |num| yield(num) if include?(num) }
   end

data/lib/regexp_parser/expression/methods/traverse.rb CHANGED Viewed

@@ -36,7 +36,7 @@ module Regexp::Expression
     # Iterates over the expressions of this expression as an array, passing
     # the expression and its index within its parent to the given block.
-    def each_expression(include_self = false, &block)
+    def each_expression(include_self = false)
       return enum_for(__method__, include_self) unless block_given?
       traverse(include_self) do |event, exp, index|
@@ -47,7 +47,7 @@ module Regexp::Expression
     # Returns a new array with the results of calling the given block once
     # for every expression. If a block is not given, returns an array with
     # each expression and its level index as an array.
-    def flat_map(include_self = false, &block)
+    def flat_map(include_self = false)
       result = []
       each_expression(include_self) do |exp, index|

data/lib/regexp_parser/expression/quantifier.rb CHANGED Viewed

@@ -12,7 +12,7 @@ module Regexp::Expression
       @max   = max
     end
-    def initialize_clone(orig)
+    def initialize_copy(orig)
       @text = orig.text.dup
       super
     end
@@ -40,5 +40,14 @@ module Regexp::Expression
       RUBY
     end
     alias :lazy? :reluctant?
+    def ==(other)
+      other.class == self.class &&
+        other.token == token &&
+        other.mode == mode &&
+        other.min == min &&
+        other.max == max
+    end
+    alias :eq :==
   end
 end

data/lib/regexp_parser/expression/sequence.rb CHANGED Viewed

@@ -7,16 +7,6 @@ module Regexp::Expression
   # Used as the base class for the Alternation alternatives, Conditional
   # branches, and CharacterSet::Intersection intersected sequences.
   class Sequence < Regexp::Expression::Subexpression
-    # TODO: this override is here for backwards compatibility, remove in 2.0.0
-    def initialize(*args)
-      if args.count == 3
-        warn('WARNING: Sequence.new without a Regexp::Token argument is '\
-             'deprecated and will be removed in 2.0.0.')
-        return self.class.at_levels(*args)
-      end
-      super
-    end
     class << self
       def add_to(subexpression, params = {}, active_opts = {})
         sequence = at_levels(
@@ -51,17 +41,11 @@ module Regexp::Expression
     alias :ts :starts_at
     def quantify(token, text, min = nil, max = nil, mode = :greedy)
-      offset = -1
-      target = expressions[offset]
-      while target.is_a?(FreeSpace)
-        target = expressions[offset -= 1]
-      end
-      target || raise(ArgumentError, "No valid target found for '#{text}' "\
-                                     'quantifier')
+      target = expressions.reverse.find { |exp| !exp.is_a?(FreeSpace) }
+      target or raise Regexp::Parser::Error,
+        "No valid target found for '#{text}' quantifier"
       target.quantify(token, text, min, max, mode)
     end
   end
 end

data/lib/regexp_parser/expression/subexpression.rb CHANGED Viewed

@@ -12,7 +12,7 @@ module Regexp::Expression
     end
     # Override base method to clone the expressions as well.
-    def initialize_clone(orig)
+    def initialize_copy(orig)
       self.expressions = orig.expressions.map(&:clone)
       super
     end

data/lib/regexp_parser/lexer.rb CHANGED Viewed

@@ -11,11 +11,11 @@ class Regexp::Lexer
   CLOSING_TOKENS = [:close].freeze
-  def self.lex(input, syntax = "ruby/#{RUBY_VERSION}", &block)
-    new.lex(input, syntax, &block)
+  def self.lex(input, syntax = "ruby/#{RUBY_VERSION}", options: nil, &block)
+    new.lex(input, syntax, options: options, &block)
   end
-  def lex(input, syntax = "ruby/#{RUBY_VERSION}", &block)
+  def lex(input, syntax = "ruby/#{RUBY_VERSION}", options: nil, &block)
     syntax = Regexp::Syntax.new(syntax)
     self.tokens = []
@@ -25,7 +25,7 @@ class Regexp::Lexer
     self.shift = 0
     last = nil
-    Regexp::Scanner.scan(input) do |type, token, text, ts, te|
+    Regexp::Scanner.scan(input, options: options) do |type, token, text, ts, te|
       type, token = *syntax.normalize(type, token)
       syntax.check! type, token
@@ -96,10 +96,10 @@ class Regexp::Lexer
     tokens.pop
     tokens << Regexp::Token.new(:literal, :literal, lead,
-              token.ts, (token.te - last.bytesize),
+              token.ts, (token.te - last.length),
               nesting, set_nesting, conditional_nesting)
     tokens << Regexp::Token.new(:literal, :literal, last,
-              (token.ts + lead.bytesize), token.te,
+              (token.ts + lead.length), token.te,
               nesting, set_nesting, conditional_nesting)
   end

data/lib/regexp_parser/parser.rb CHANGED Viewed

@@ -1,10 +1,10 @@
+require 'regexp_parser/error'
 require 'regexp_parser/expression'
 class Regexp::Parser
   include Regexp::Expression
-  include Regexp::Syntax
-  class ParserError < StandardError; end
+  class ParserError < Regexp::Parser::Error; end
   class UnknownTokenTypeError < ParserError
     def initialize(type, token)
@@ -18,12 +18,12 @@ class Regexp::Parser
     end
   end
-  def self.parse(input, syntax = "ruby/#{RUBY_VERSION}", &block)
-    new.parse(input, syntax, &block)
+  def self.parse(input, syntax = "ruby/#{RUBY_VERSION}", options: nil, &block)
+    new.parse(input, syntax, options: options, &block)
   end
-  def parse(input, syntax = "ruby/#{RUBY_VERSION}", &block)
-    root = Root.build(options_from_input(input))
+  def parse(input, syntax = "ruby/#{RUBY_VERSION}", options: nil, &block)
+    root = Root.build(extract_options(input, options))
     self.root = root
     self.node = root
@@ -35,7 +35,7 @@ class Regexp::Parser
     self.captured_group_counts = Hash.new(0)
-    Regexp::Lexer.scan(input, syntax) do |token|
+    Regexp::Lexer.scan(input, syntax, options: options) do |token|
       parse_token(token)
     end
@@ -54,105 +54,171 @@ class Regexp::Parser
                 :options_stack, :switching_options, :conditional_nesting,
                 :captured_group_counts
-  def options_from_input(input)
-    return {} unless input.is_a?(::Regexp)
+  def extract_options(input, options)
+    if options && !input.is_a?(String)
+      raise ArgumentError, 'options cannot be supplied unless parsing a String'
+    end
-    options = {}
-    options[:i] = true if input.options & ::Regexp::IGNORECASE != 0
-    options[:m] = true if input.options & ::Regexp::MULTILINE  != 0
-    options[:x] = true if input.options & ::Regexp::EXTENDED   != 0
-    options
-  end
+    options = input.options if input.is_a?(::Regexp)
-  def nest(exp)
-    nesting.push(exp)
-    node << exp
-    update_transplanted_subtree(exp, node)
-    self.node = exp
-  end
-  # subtrees are transplanted to build Alternations, Intersections, Ranges
-  def update_transplanted_subtree(exp, new_parent)
-    exp.nesting_level = new_parent.nesting_level + 1
-    exp.respond_to?(:each) &&
-      exp.each { |subexp| update_transplanted_subtree(subexp, exp) }
-  end
-  def decrease_nesting
-    while nesting.last.is_a?(SequenceOperation)
-      nesting.pop
-      self.node = nesting.last
-    end
-    nesting.pop
-    yield(node) if block_given?
-    self.node = nesting.last
-    self.node = node.last if node.last.is_a?(SequenceOperation)
-  end
+    return {} unless options
-  def nest_conditional(exp)
-    conditional_nesting.push(exp)
-    nest(exp)
+    enabled_options = {}
+    enabled_options[:i] = true if options & ::Regexp::IGNORECASE != 0
+    enabled_options[:m] = true if options & ::Regexp::MULTILINE  != 0
+    enabled_options[:x] = true if options & ::Regexp::EXTENDED   != 0
+    enabled_options
   end
   def parse_token(token)
-    close_completed_character_set_range
     case token.type
-    when :meta;         meta(token)
-    when :quantifier;   quantifier(token)
-    when :anchor;       anchor(token)
-    when :escape;       escape(token)
-    when :group;        group(token)
-    when :assertion;    group(token)
-    when :set;          set(token)
-    when :type;         type(token)
-    when :backref;      backref(token)
-    when :conditional;  conditional(token)
-    when :keep;         keep(token)
-    when :posixclass, :nonposixclass
-      posixclass(token)
-    when :property, :nonproperty
-      property(token)
-    when :literal
-      node << Literal.new(token, active_opts)
-    when :free_space
-      free_space(token)
+    when :anchor;                     anchor(token)
+    when :assertion, :group;          group(token)
+    when :backref;                    backref(token)
+    when :conditional;                conditional(token)
+    when :escape;                     escape(token)
+    when :free_space;                 free_space(token)
+    when :keep;                       keep(token)
+    when :literal;                    literal(token)
+    when :meta;                       meta(token)
+    when :posixclass, :nonposixclass; posixclass(token)
+    when :property, :nonproperty;     property(token)
+    when :quantifier;                 quantifier(token)
+    when :set;                        set(token)
+    when :type;                       type(token)
     else
       raise UnknownTokenTypeError.new(token.type, token)
     end
+    close_completed_character_set_range
   end
-  def set(token)
+  def anchor(token)
     case token.token
-    when :open
-      open_set(token)
-    when :close
-      close_set
-    when :negate
-      negate_set
-    when :range
-      range(token)
-    when :intersection
-      intersection(token)
-    when :collation, :equivalent
-      node << Literal.new(token, active_opts)
+    when :bol;              node << Anchor::BeginningOfLine.new(token, active_opts)
+    when :bos;              node << Anchor::BOS.new(token, active_opts)
+    when :eol;              node << Anchor::EndOfLine.new(token, active_opts)
+    when :eos;              node << Anchor::EOS.new(token, active_opts)
+    when :eos_ob_eol;       node << Anchor::EOSobEOL.new(token, active_opts)
+    when :match_start;      node << Anchor::MatchStart.new(token, active_opts)
+    when :nonword_boundary; node << Anchor::NonWordBoundary.new(token, active_opts)
+    when :word_boundary;    node << Anchor::WordBoundary.new(token, active_opts)
     else
-      raise UnknownTokenError.new('CharacterSet', token)
+      raise UnknownTokenError.new('Anchor', token)
     end
   end
-  def meta(token)
+  def group(token)
     case token.token
-    when :dot
-      node << CharacterType::Any.new(token, active_opts)
-    when :alternation
-      sequence_operation(Alternation, token)
+    when :options, :options_switch
+      options_group(token)
+    when :close
+      close_group
+    when :comment
+      node << Group::Comment.new(token, active_opts)
     else
-      raise UnknownTokenError.new('Meta', token)
+      open_group(token)
+    end
+  end
+  MOD_FLAGS = %w[i m x].map(&:to_sym)
+  ENC_FLAGS = %w[a d u].map(&:to_sym)
+  def options_group(token)
+    positive, negative = token.text.split('-', 2)
+    negative ||= ''
+    self.switching_options = token.token.equal?(:options_switch)
+    opt_changes = {}
+    new_active_opts = active_opts.dup
+    MOD_FLAGS.each do |flag|
+      if positive.include?(flag.to_s)
+        opt_changes[flag] = new_active_opts[flag] = true
+      end
+      if negative.include?(flag.to_s)
+        opt_changes[flag] = false
+        new_active_opts.delete(flag)
+      end
+    end
+    if (enc_flag = positive.reverse[/[adu]/])
+      enc_flag = enc_flag.to_sym
+      (ENC_FLAGS - [enc_flag]).each do |other|
+        opt_changes[other] = false if new_active_opts[other]
+        new_active_opts.delete(other)
+      end
+      opt_changes[enc_flag] = new_active_opts[enc_flag] = true
+    end
+    options_stack << new_active_opts
+    options_group = Group::Options.new(token, active_opts)
+    options_group.option_changes = opt_changes
+    nest(options_group)
+  end
+  def open_group(token)
+    group_class =
+      case token.token
+      when :absence;     Group::Absence
+      when :atomic;      Group::Atomic
+      when :capture;     Group::Capture
+      when :named;       Group::Named
+      when :passive;     Group::Passive
+      when :lookahead;   Assertion::Lookahead
+      when :lookbehind;  Assertion::Lookbehind
+      when :nlookahead;  Assertion::NegativeLookahead
+      when :nlookbehind; Assertion::NegativeLookbehind
+      else
+        raise UnknownTokenError.new('Group type open', token)
+      end
+    group = group_class.new(token, active_opts)
+    if group.capturing?
+      group.number          = total_captured_group_count + 1
+      group.number_at_level = captured_group_count_at_level + 1
+      count_captured_group
+    end
+    # Push the active options to the stack again. This way we can simply pop the
+    # stack for any group we close, no matter if it had its own options or not.
+    options_stack << active_opts
+    nest(group)
+  end
+  def total_captured_group_count
+    captured_group_counts.values.reduce(0, :+)
+  end
+  def captured_group_count_at_level
+    captured_group_counts[node.level]
+  end
+  def count_captured_group
+    captured_group_counts[node.level] += 1
+  end
+  def close_group
+    options_stack.pop unless switching_options
+    self.switching_options = false
+    decrease_nesting
+  end
+  def decrease_nesting
+    while nesting.last.is_a?(SequenceOperation)
+      nesting.pop
+      self.node = nesting.last
     end
+    nesting.pop
+    yield(node) if block_given?
+    self.node = nesting.last
+    self.node = node.last if node.last.is_a?(SequenceOperation)
   end
   def backref(token)
@@ -182,31 +248,9 @@ class Regexp::Parser
     end
   end
-  def type(token)
-    case token.token
-    when :digit
-      node << CharacterType::Digit.new(token, active_opts)
-    when :nondigit
-      node << CharacterType::NonDigit.new(token, active_opts)
-    when :hex
-      node << CharacterType::Hex.new(token, active_opts)
-    when :nonhex
-      node << CharacterType::NonHex.new(token, active_opts)
-    when :space
-      node << CharacterType::Space.new(token, active_opts)
-    when :nonspace
-      node << CharacterType::NonSpace.new(token, active_opts)
-    when :word
-      node << CharacterType::Word.new(token, active_opts)
-    when :nonword
-      node << CharacterType::NonWord.new(token, active_opts)
-    when :linebreak
-      node << CharacterType::Linebreak.new(token, active_opts)
-    when :xgrapheme
-      node << CharacterType::ExtendedGrapheme.new(token, active_opts)
-    else
-      raise UnknownTokenError.new('CharacterType', token)
-    end
+  def assign_effective_number(exp)
+    exp.effective_number =
+      exp.number + total_captured_group_count + (exp.number < 0 ? 1 : 0)
   end
   def conditional(token)
@@ -234,11 +278,118 @@ class Regexp::Parser
     end
   end
+  def nest_conditional(exp)
+    conditional_nesting.push(exp)
+    nest(exp)
+  end
+  def nest(exp)
+    nesting.push(exp)
+    node << exp
+    update_transplanted_subtree(exp, node)
+    self.node = exp
+  end
+  # subtrees are transplanted to build Alternations, Intersections, Ranges
+  def update_transplanted_subtree(exp, new_parent)
+    exp.nesting_level = new_parent.nesting_level + 1
+    exp.respond_to?(:each) &&
+      exp.each { |subexp| update_transplanted_subtree(subexp, exp) }
+  end
+  def escape(token)
+    case token.token
+    when :backspace;      node << EscapeSequence::Backspace.new(token, active_opts)
+    when :escape;         node << EscapeSequence::AsciiEscape.new(token, active_opts)
+    when :bell;           node << EscapeSequence::Bell.new(token, active_opts)
+    when :form_feed;      node << EscapeSequence::FormFeed.new(token, active_opts)
+    when :newline;        node << EscapeSequence::Newline.new(token, active_opts)
+    when :carriage;       node << EscapeSequence::Return.new(token, active_opts)
+    when :tab;            node << EscapeSequence::Tab.new(token, active_opts)
+    when :vertical_tab;   node << EscapeSequence::VerticalTab.new(token, active_opts)
+    when :codepoint;      node << EscapeSequence::Codepoint.new(token, active_opts)
+    when :codepoint_list; node << EscapeSequence::CodepointList.new(token, active_opts)
+    when :hex;            node << EscapeSequence::Hex.new(token, active_opts)
+    when :octal;          node << EscapeSequence::Octal.new(token, active_opts)
+    when :control
+      if token.text =~ /\A(?:\\C-\\M|\\c\\M)/
+        node << EscapeSequence::MetaControl.new(token, active_opts)
+      else
+        node << EscapeSequence::Control.new(token, active_opts)
+      end
+    when :meta_sequence
+      if token.text =~ /\A\\M-\\[Cc]/
+        node << EscapeSequence::MetaControl.new(token, active_opts)
+      else
+        node << EscapeSequence::Meta.new(token, active_opts)
+      end
+    else
+      # treating everything else as a literal
+      # TODO: maybe split this up a bit more in v3.0.0?
+      # E.g. escaped quantifiers or set meta chars are not the same
+      # as stuff that would be a literal even without the backslash.
+      # Right now, they all end up here.
+      node << EscapeSequence::Literal.new(token, active_opts)
+    end
+  end
+  def free_space(token)
+    case token.token
+    when :comment
+      node << Comment.new(token, active_opts)
+    when :whitespace
+      if node.last.is_a?(WhiteSpace)
+        node.last.merge(WhiteSpace.new(token, active_opts))
+      else
+        node << WhiteSpace.new(token, active_opts)
+      end
+    else
+      raise UnknownTokenError.new('FreeSpace', token)
+    end
+  end
+  def keep(token)
+    node << Keep::Mark.new(token, active_opts)
+  end
+  def literal(token)
+    node << Literal.new(token, active_opts)
+  end
+  def meta(token)
+    case token.token
+    when :dot
+      node << CharacterType::Any.new(token, active_opts)
+    when :alternation
+      sequence_operation(Alternation, token)
+    else
+      raise UnknownTokenError.new('Meta', token)
+    end
+  end
+  def sequence_operation(klass, token)
+    unless node.is_a?(klass)
+      operator = klass.new(token, active_opts)
+      sequence = operator.add_sequence(active_opts)
+      sequence.expressions = node.expressions
+      node.expressions = []
+      nest(operator)
+    end
+    node.add_sequence(active_opts)
+  end
   def posixclass(token)
     node << PosixClass.new(token, active_opts)
   end
   include Regexp::Expression::UnicodeProperty
+  UPTokens = Regexp::Syntax::Token::UnicodeProperty
   def property(token)
     case token.token
@@ -310,128 +461,43 @@ class Regexp::Parser
     when :private_use;            node << Codepoint::PrivateUse.new(token, active_opts)
     when :unassigned;             node << Codepoint::Unassigned.new(token, active_opts)
-    when *Token::UnicodeProperty::Age
-      node << Age.new(token, active_opts)
-    when *Token::UnicodeProperty::Derived
-      node << Derived.new(token, active_opts)
-    when *Token::UnicodeProperty::Emoji
-      node << Emoji.new(token, active_opts)
-    when *Token::UnicodeProperty::Script
-      node << Script.new(token, active_opts)
-    when *Token::UnicodeProperty::UnicodeBlock
-      node << Block.new(token, active_opts)
+    when *UPTokens::Age;          node << Age.new(token, active_opts)
+    when *UPTokens::Derived;      node << Derived.new(token, active_opts)
+    when *UPTokens::Emoji;        node << Emoji.new(token, active_opts)
+    when *UPTokens::Script;       node << Script.new(token, active_opts)
+    when *UPTokens::UnicodeBlock; node << Block.new(token, active_opts)
     else
       raise UnknownTokenError.new('UnicodeProperty', token)
     end
   end
-  def anchor(token)
-    case token.token
-    when :bol
-      node << Anchor::BeginningOfLine.new(token, active_opts)
-    when :eol
-      node << Anchor::EndOfLine.new(token, active_opts)
-    when :bos
-      node << Anchor::BOS.new(token, active_opts)
-    when :eos
-      node << Anchor::EOS.new(token, active_opts)
-    when :eos_ob_eol
-      node << Anchor::EOSobEOL.new(token, active_opts)
-    when :word_boundary
-      node << Anchor::WordBoundary.new(token, active_opts)
-    when :nonword_boundary
-      node << Anchor::NonWordBoundary.new(token, active_opts)
-    when :match_start
-      node << Anchor::MatchStart.new(token, active_opts)
-    else
-      raise UnknownTokenError.new('Anchor', token)
-    end
-  end
-  def escape(token)
-    case token.token
-    when :backspace
-      node << EscapeSequence::Backspace.new(token, active_opts)
-    when :escape
-      node << EscapeSequence::AsciiEscape.new(token, active_opts)
-    when :bell
-      node << EscapeSequence::Bell.new(token, active_opts)
-    when :form_feed
-      node << EscapeSequence::FormFeed.new(token, active_opts)
-    when :newline
-      node << EscapeSequence::Newline.new(token, active_opts)
-    when :carriage
-      node << EscapeSequence::Return.new(token, active_opts)
-    when :tab
-      node << EscapeSequence::Tab.new(token, active_opts)
-    when :vertical_tab
-      node << EscapeSequence::VerticalTab.new(token, active_opts)
-    when :hex
-      node << EscapeSequence::Hex.new(token, active_opts)
-    when :octal
-      node << EscapeSequence::Octal.new(token, active_opts)
-    when :codepoint
-      node << EscapeSequence::Codepoint.new(token, active_opts)
-    when :codepoint_list
-      node << EscapeSequence::CodepointList.new(token, active_opts)
-    when :control
-      if token.text =~ /\A(?:\\C-\\M|\\c\\M)/
-        node << EscapeSequence::MetaControl.new(token, active_opts)
-      else
-        node << EscapeSequence::Control.new(token, active_opts)
-      end
-    when :meta_sequence
-      if token.text =~ /\A\\M-\\[Cc]/
-        node << EscapeSequence::MetaControl.new(token, active_opts)
-      else
-        node << EscapeSequence::Meta.new(token, active_opts)
-      end
-    else
-      # treating everything else as a literal
-      node << EscapeSequence::Literal.new(token, active_opts)
-    end
-  end
-  def keep(token)
-    node << Keep::Mark.new(token, active_opts)
-  end
-  def free_space(token)
-    case token.token
-    when :comment
-      node << Comment.new(token, active_opts)
-    when :whitespace
-      if node.last.is_a?(WhiteSpace)
-        node.last.merge(WhiteSpace.new(token, active_opts))
-      else
-        node << WhiteSpace.new(token, active_opts)
-      end
-    else
-      raise UnknownTokenError.new('FreeSpace', token)
-    end
-  end
   def quantifier(token)
-    offset = -1
-    target_node = node.expressions[offset]
-    while target_node.is_a?(FreeSpace)
-      target_node = node.expressions[offset -= 1]
+    target_node = node.expressions.reverse.find { |exp| !exp.is_a?(FreeSpace) }
+    target_node or raise ParserError, "No valid target found for '#{token.text}'"
+    # in case of chained quantifiers, wrap target in an implicit passive group
+    # description of the problem: https://github.com/ammar/regexp_parser/issues/3
+    # rationale for this solution: https://github.com/ammar/regexp_parser/pull/69
+    if target_node.quantified?
+      new_token = Regexp::Token.new(
+        :group,
+        :passive,
+        '', # text
+        target_node.ts,
+        nil, # te (unused)
+        target_node.level,
+        target_node.set_level,
+        target_node.conditional_level
+      )
+      new_group = Group::Passive.new(new_token, active_opts)
+      new_group.implicit = true
+      new_group << target_node
+      increase_level(target_node)
+      node.expressions[node.expressions.index(target_node)] = new_group
+      target_node = new_group
     end
-    target_node || raise(ArgumentError, 'No valid target found for '\
-                                        "'#{token.text}' ")
     case token.token
     when :zero_or_one
       target_node.quantify(:zero_or_one, token.text, 0, 1, :greedy)
@@ -462,6 +528,11 @@ class Regexp::Parser
     end
   end
+  def increase_level(exp)
+    exp.level += 1
+    exp.respond_to?(:each) && exp.each { |subexp| increase_level(subexp) }
+  end
   def interval(target_node, token)
     text = token.text
     mchr = text[text.length-1].chr =~ /[?+]/ ? text[text.length-1].chr : nil
@@ -484,100 +555,16 @@ class Regexp::Parser
     target_node.quantify(:interval, text, min.to_i, max.to_i, mode)
   end
-  def group(token)
-    case token.token
-    when :options, :options_switch
-      options_group(token)
-    when :close
-      close_group
-    when :comment
-      node << Group::Comment.new(token, active_opts)
-    else
-      open_group(token)
-    end
-  end
-  MOD_FLAGS = %w[i m x].map(&:to_sym)
-  ENC_FLAGS = %w[a d u].map(&:to_sym)
-  def options_group(token)
-    positive, negative = token.text.split('-', 2)
-    negative ||= ''
-    self.switching_options = token.token.equal?(:options_switch)
-    opt_changes = {}
-    new_active_opts = active_opts.dup
-    MOD_FLAGS.each do |flag|
-      if positive.include?(flag.to_s)
-        opt_changes[flag] = new_active_opts[flag] = true
-      end
-      if negative.include?(flag.to_s)
-        opt_changes[flag] = false
-        new_active_opts.delete(flag)
-      end
-    end
-    if (enc_flag = positive.reverse[/[adu]/])
-      enc_flag = enc_flag.to_sym
-      (ENC_FLAGS - [enc_flag]).each do |other|
-        opt_changes[other] = false if new_active_opts[other]
-        new_active_opts.delete(other)
-      end
-      opt_changes[enc_flag] = new_active_opts[enc_flag] = true
-    end
-    options_stack << new_active_opts
-    options_group = Group::Options.new(token, active_opts)
-    options_group.option_changes = opt_changes
-    nest(options_group)
-  end
-  def open_group(token)
+  def set(token)
     case token.token
-    when :passive
-      exp = Group::Passive.new(token, active_opts)
-    when :atomic
-      exp = Group::Atomic.new(token, active_opts)
-    when :named
-      exp = Group::Named.new(token, active_opts)
-    when :capture
-      exp = Group::Capture.new(token, active_opts)
-    when :absence
-      exp = Group::Absence.new(token, active_opts)
-    when :lookahead
-      exp = Assertion::Lookahead.new(token, active_opts)
-    when :nlookahead
-      exp = Assertion::NegativeLookahead.new(token, active_opts)
-    when :lookbehind
-      exp = Assertion::Lookbehind.new(token, active_opts)
-    when :nlookbehind
-      exp = Assertion::NegativeLookbehind.new(token, active_opts)
+    when :open;         open_set(token)
+    when :close;        close_set
+    when :negate;       negate_set
+    when :range;        range(token)
+    when :intersection; intersection(token)
     else
-      raise UnknownTokenError.new('Group type open', token)
-    end
-    if exp.capturing?
-      exp.number          = total_captured_group_count + 1
-      exp.number_at_level = captured_group_count_at_level + 1
-      count_captured_group
+      raise UnknownTokenError.new('CharacterSet', token)
     end
-    # Push the active options to the stack again. This way we can simply pop the
-    # stack for any group we close, no matter if it had its own options or not.
-    options_stack << active_opts
-    nest(exp)
-  end
-  def close_group
-    options_stack.pop unless switching_options
-    self.switching_options = false
-    decrease_nesting
   end
   def open_set(token)
@@ -600,51 +587,45 @@ class Regexp::Parser
     nest(exp)
   end
-  def close_completed_character_set_range
-    decrease_nesting if node.is_a?(CharacterSet::Range) && node.complete?
-  end
   def intersection(token)
     sequence_operation(CharacterSet::Intersection, token)
   end
-  def sequence_operation(klass, token)
-    unless node.is_a?(klass)
-      operator = klass.new(token, active_opts)
-      sequence = operator.add_sequence(active_opts)
-      sequence.expressions = node.expressions
-      node.expressions = []
-      nest(operator)
+  def type(token)
+    case token.token
+    when :digit;     node << CharacterType::Digit.new(token, active_opts)
+    when :hex;       node << CharacterType::Hex.new(token, active_opts)
+    when :linebreak; node << CharacterType::Linebreak.new(token, active_opts)
+    when :nondigit;  node << CharacterType::NonDigit.new(token, active_opts)
+    when :nonhex;    node << CharacterType::NonHex.new(token, active_opts)
+    when :nonspace;  node << CharacterType::NonSpace.new(token, active_opts)
+    when :nonword;   node << CharacterType::NonWord.new(token, active_opts)
+    when :space;     node << CharacterType::Space.new(token, active_opts)
+    when :word;      node << CharacterType::Word.new(token, active_opts)
+    when :xgrapheme; node << CharacterType::ExtendedGrapheme.new(token, active_opts)
+    else
+      raise UnknownTokenError.new('CharacterType', token)
     end
-    node.add_sequence(active_opts)
-  end
-  def active_opts
-    options_stack.last
-  end
-  def total_captured_group_count
-    captured_group_counts.values.reduce(0, :+)
-  end
-  def captured_group_count_at_level
-    captured_group_counts[node.level]
   end
-  def count_captured_group
-    captured_group_counts[node.level] += 1
+  def close_completed_character_set_range
+    decrease_nesting if node.is_a?(CharacterSet::Range) && node.complete?
   end
-  def assign_effective_number(exp)
-    exp.effective_number =
-      exp.number + total_captured_group_count + (exp.number < 0 ? 1 : 0)
+  def active_opts
+    options_stack.last
   end
+  # Assigns referenced expressions to refering expressions, e.g. if there is
+  # an instance of Backreference::Number, its #referenced_expression is set to
+  # the instance of Group::Capture that it refers to via its number.
   def assign_referenced_expressions
     targets = {}
+    # find all referencable expressions
     root.each_expression do |exp|
       exp.is_a?(Group::Capture) && targets[exp.identifier] = exp
     end
+    # assign them to any refering expressions
     root.each_expression do |exp|
       exp.respond_to?(:reference) &&
         exp.referenced_expression = targets[exp.reference]