RubyGems - regexp_parser - Versions diffs - 1.8.2 → 2.1.0 - Mend

regexp_parser 1.8.2 → 2.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (56) hide show

checksums.yaml +4 -4
data/CHANGELOG.md +93 -0
data/Gemfile +6 -1
data/README.md +1 -4
data/Rakefile +8 -8
data/lib/regexp_parser.rb +1 -0
data/lib/regexp_parser/error.rb +4 -0
data/lib/regexp_parser/expression.rb +5 -18
data/lib/regexp_parser/expression/classes/backref.rb +5 -0
data/lib/regexp_parser/expression/classes/conditional.rb +11 -1
data/lib/regexp_parser/expression/classes/free_space.rb +2 -2
data/lib/regexp_parser/expression/classes/group.rb +28 -3
data/lib/regexp_parser/expression/classes/property.rb +1 -1
data/lib/regexp_parser/expression/classes/root.rb +4 -16
data/lib/regexp_parser/expression/classes/set/range.rb +2 -1
data/lib/regexp_parser/expression/methods/match_length.rb +2 -2
data/lib/regexp_parser/expression/methods/traverse.rb +2 -2
data/lib/regexp_parser/expression/quantifier.rb +10 -1
data/lib/regexp_parser/expression/sequence.rb +3 -19
data/lib/regexp_parser/expression/subexpression.rb +1 -1
data/lib/regexp_parser/lexer.rb +2 -2
data/lib/regexp_parser/parser.rb +306 -332
data/lib/regexp_parser/scanner.rb +1272 -1338
data/lib/regexp_parser/scanner/char_type.rl +11 -11
data/lib/regexp_parser/scanner/property.rl +2 -2
data/lib/regexp_parser/scanner/scanner.rl +206 -238
data/lib/regexp_parser/syntax.rb +7 -7
data/lib/regexp_parser/syntax/any.rb +3 -3
data/lib/regexp_parser/syntax/base.rb +1 -1
data/lib/regexp_parser/syntax/version_lookup.rb +2 -2
data/lib/regexp_parser/syntax/versions.rb +1 -1
data/lib/regexp_parser/version.rb +1 -1
data/spec/expression/base_spec.rb +10 -0
data/spec/expression/clone_spec.rb +36 -4
data/spec/expression/free_space_spec.rb +2 -2
data/spec/expression/methods/match_length_spec.rb +2 -2
data/spec/expression/subexpression_spec.rb +1 -1
data/spec/expression/to_s_spec.rb +39 -31
data/spec/lexer/literals_spec.rb +24 -49
data/spec/lexer/refcalls_spec.rb +5 -0
data/spec/parser/all_spec.rb +2 -2
data/spec/parser/errors_spec.rb +1 -1
data/spec/parser/escapes_spec.rb +1 -1
data/spec/parser/quantifiers_spec.rb +16 -0
data/spec/parser/refcalls_spec.rb +5 -0
data/spec/parser/set/ranges_spec.rb +3 -3
data/spec/scanner/escapes_spec.rb +8 -1
data/spec/scanner/groups_spec.rb +10 -1
data/spec/scanner/literals_spec.rb +28 -38
data/spec/scanner/quantifiers_spec.rb +18 -13
data/spec/scanner/refcalls_spec.rb +19 -0
data/spec/scanner/sets_spec.rb +65 -16
data/spec/spec_helper.rb +1 -0
metadata +4 -7
data/spec/expression/root_spec.rb +0 -9
data/spec/expression/sequence_spec.rb +0 -9

data/lib/regexp_parser/expression/sequence.rb CHANGED Viewed

@@ -7,16 +7,6 @@ module Regexp::Expression
   # Used as the base class for the Alternation alternatives, Conditional
   # branches, and CharacterSet::Intersection intersected sequences.
   class Sequence < Regexp::Expression::Subexpression
-    # TODO: this override is here for backwards compatibility, remove in 2.0.0
-    def initialize(*args)
-      if args.count == 3
-        warn('WARNING: Sequence.new without a Regexp::Token argument is '\
-             'deprecated and will be removed in 2.0.0.')
-        return self.class.at_levels(*args)
-      end
-      super
-    end
     class << self
       def add_to(subexpression, params = {}, active_opts = {})
         sequence = at_levels(
@@ -51,17 +41,11 @@ module Regexp::Expression
     alias :ts :starts_at
     def quantify(token, text, min = nil, max = nil, mode = :greedy)
-      offset = -1
-      target = expressions[offset]
-      while target.is_a?(FreeSpace)
-        target = expressions[offset -= 1]
-      end
-      target || raise(ArgumentError, "No valid target found for '#{text}' "\
-                                     'quantifier')
+      target = expressions.reverse.find { |exp| !exp.is_a?(FreeSpace) }
+      target or raise Regexp::Parser::Error,
+        "No valid target found for '#{text}' quantifier"
       target.quantify(token, text, min, max, mode)
     end
   end
 end

data/lib/regexp_parser/expression/subexpression.rb CHANGED Viewed

@@ -12,7 +12,7 @@ module Regexp::Expression
     end
     # Override base method to clone the expressions as well.
-    def initialize_clone(orig)
+    def initialize_copy(orig)
       self.expressions = orig.expressions.map(&:clone)
       super
     end

data/lib/regexp_parser/lexer.rb CHANGED Viewed

@@ -96,10 +96,10 @@ class Regexp::Lexer
     tokens.pop
     tokens << Regexp::Token.new(:literal, :literal, lead,
-              token.ts, (token.te - last.bytesize),
+              token.ts, (token.te - last.length),
               nesting, set_nesting, conditional_nesting)
     tokens << Regexp::Token.new(:literal, :literal, last,
-              (token.ts + lead.bytesize), token.te,
+              (token.ts + lead.length), token.te,
               nesting, set_nesting, conditional_nesting)
   end

data/lib/regexp_parser/parser.rb CHANGED Viewed

@@ -2,9 +2,8 @@ require 'regexp_parser/expression'
 class Regexp::Parser
   include Regexp::Expression
-  include Regexp::Syntax
-  class ParserError < StandardError; end
+  class ParserError < Regexp::Parser::Error; end
   class UnknownTokenTypeError < ParserError
     def initialize(type, token)
@@ -70,95 +69,155 @@ class Regexp::Parser
     enabled_options
   end
-  def nest(exp)
-    nesting.push(exp)
-    node << exp
-    update_transplanted_subtree(exp, node)
-    self.node = exp
-  end
+  def parse_token(token)
+    case token.type
+    when :anchor;                     anchor(token)
+    when :assertion, :group;          group(token)
+    when :backref;                    backref(token)
+    when :conditional;                conditional(token)
+    when :escape;                     escape(token)
+    when :free_space;                 free_space(token)
+    when :keep;                       keep(token)
+    when :literal;                    literal(token)
+    when :meta;                       meta(token)
+    when :posixclass, :nonposixclass; posixclass(token)
+    when :property, :nonproperty;     property(token)
+    when :quantifier;                 quantifier(token)
+    when :set;                        set(token)
+    when :type;                       type(token)
+    else
+      raise UnknownTokenTypeError.new(token.type, token)
+    end
-  # subtrees are transplanted to build Alternations, Intersections, Ranges
-  def update_transplanted_subtree(exp, new_parent)
-    exp.nesting_level = new_parent.nesting_level + 1
-    exp.respond_to?(:each) &&
-      exp.each { |subexp| update_transplanted_subtree(subexp, exp) }
+    close_completed_character_set_range
   end
-  def decrease_nesting
-    while nesting.last.is_a?(SequenceOperation)
-      nesting.pop
-      self.node = nesting.last
+  def anchor(token)
+    case token.token
+    when :bol;              node << Anchor::BeginningOfLine.new(token, active_opts)
+    when :bos;              node << Anchor::BOS.new(token, active_opts)
+    when :eol;              node << Anchor::EndOfLine.new(token, active_opts)
+    when :eos;              node << Anchor::EOS.new(token, active_opts)
+    when :eos_ob_eol;       node << Anchor::EOSobEOL.new(token, active_opts)
+    when :match_start;      node << Anchor::MatchStart.new(token, active_opts)
+    when :nonword_boundary; node << Anchor::NonWordBoundary.new(token, active_opts)
+    when :word_boundary;    node << Anchor::WordBoundary.new(token, active_opts)
+    else
+      raise UnknownTokenError.new('Anchor', token)
     end
-    nesting.pop
-    yield(node) if block_given?
-    self.node = nesting.last
-    self.node = node.last if node.last.is_a?(SequenceOperation)
   end
-  def nest_conditional(exp)
-    conditional_nesting.push(exp)
-    nest(exp)
+  def group(token)
+    case token.token
+    when :options, :options_switch
+      options_group(token)
+    when :close
+      close_group
+    when :comment
+      node << Group::Comment.new(token, active_opts)
+    else
+      open_group(token)
+    end
   end
-  def parse_token(token)
-    close_completed_character_set_range
+  MOD_FLAGS = %w[i m x].map(&:to_sym)
+  ENC_FLAGS = %w[a d u].map(&:to_sym)
-    case token.type
-    when :meta;         meta(token)
-    when :quantifier;   quantifier(token)
-    when :anchor;       anchor(token)
-    when :escape;       escape(token)
-    when :group;        group(token)
-    when :assertion;    group(token)
-    when :set;          set(token)
-    when :type;         type(token)
-    when :backref;      backref(token)
-    when :conditional;  conditional(token)
-    when :keep;         keep(token)
-    when :posixclass, :nonposixclass
-      posixclass(token)
-    when :property, :nonproperty
-      property(token)
-    when :literal
-      node << Literal.new(token, active_opts)
-    when :free_space
-      free_space(token)
+  def options_group(token)
+    positive, negative = token.text.split('-', 2)
+    negative ||= ''
+    self.switching_options = token.token.equal?(:options_switch)
-    else
-      raise UnknownTokenTypeError.new(token.type, token)
+    opt_changes = {}
+    new_active_opts = active_opts.dup
+    MOD_FLAGS.each do |flag|
+      if positive.include?(flag.to_s)
+        opt_changes[flag] = new_active_opts[flag] = true
+      end
+      if negative.include?(flag.to_s)
+        opt_changes[flag] = false
+        new_active_opts.delete(flag)
+      end
+    end
+    if (enc_flag = positive.reverse[/[adu]/])
+      enc_flag = enc_flag.to_sym
+      (ENC_FLAGS - [enc_flag]).each do |other|
+        opt_changes[other] = false if new_active_opts[other]
+        new_active_opts.delete(other)
+      end
+      opt_changes[enc_flag] = new_active_opts[enc_flag] = true
     end
+    options_stack << new_active_opts
+    options_group = Group::Options.new(token, active_opts)
+    options_group.option_changes = opt_changes
+    nest(options_group)
   end
-  def set(token)
-    case token.token
-    when :open
-      open_set(token)
-    when :close
-      close_set
-    when :negate
-      negate_set
-    when :range
-      range(token)
-    when :intersection
-      intersection(token)
-    when :collation, :equivalent
-      node << Literal.new(token, active_opts)
-    else
-      raise UnknownTokenError.new('CharacterSet', token)
+  def open_group(token)
+    group_class =
+      case token.token
+      when :absence;     Group::Absence
+      when :atomic;      Group::Atomic
+      when :capture;     Group::Capture
+      when :named;       Group::Named
+      when :passive;     Group::Passive
+      when :lookahead;   Assertion::Lookahead
+      when :lookbehind;  Assertion::Lookbehind
+      when :nlookahead;  Assertion::NegativeLookahead
+      when :nlookbehind; Assertion::NegativeLookbehind
+      else
+        raise UnknownTokenError.new('Group type open', token)
+      end
+    group = group_class.new(token, active_opts)
+    if group.capturing?
+      group.number          = total_captured_group_count + 1
+      group.number_at_level = captured_group_count_at_level + 1
+      count_captured_group
     end
+    # Push the active options to the stack again. This way we can simply pop the
+    # stack for any group we close, no matter if it had its own options or not.
+    options_stack << active_opts
+    nest(group)
   end
-  def meta(token)
-    case token.token
-    when :dot
-      node << CharacterType::Any.new(token, active_opts)
-    when :alternation
-      sequence_operation(Alternation, token)
-    else
-      raise UnknownTokenError.new('Meta', token)
+  def total_captured_group_count
+    captured_group_counts.values.reduce(0, :+)
+  end
+  def captured_group_count_at_level
+    captured_group_counts[node.level]
+  end
+  def count_captured_group
+    captured_group_counts[node.level] += 1
+  end
+  def close_group
+    options_stack.pop unless switching_options
+    self.switching_options = false
+    decrease_nesting
+  end
+  def decrease_nesting
+    while nesting.last.is_a?(SequenceOperation)
+      nesting.pop
+      self.node = nesting.last
     end
+    nesting.pop
+    yield(node) if block_given?
+    self.node = nesting.last
+    self.node = node.last if node.last.is_a?(SequenceOperation)
   end
   def backref(token)
@@ -188,31 +247,9 @@ class Regexp::Parser
     end
   end
-  def type(token)
-    case token.token
-    when :digit
-      node << CharacterType::Digit.new(token, active_opts)
-    when :nondigit
-      node << CharacterType::NonDigit.new(token, active_opts)
-    when :hex
-      node << CharacterType::Hex.new(token, active_opts)
-    when :nonhex
-      node << CharacterType::NonHex.new(token, active_opts)
-    when :space
-      node << CharacterType::Space.new(token, active_opts)
-    when :nonspace
-      node << CharacterType::NonSpace.new(token, active_opts)
-    when :word
-      node << CharacterType::Word.new(token, active_opts)
-    when :nonword
-      node << CharacterType::NonWord.new(token, active_opts)
-    when :linebreak
-      node << CharacterType::Linebreak.new(token, active_opts)
-    when :xgrapheme
-      node << CharacterType::ExtendedGrapheme.new(token, active_opts)
-    else
-      raise UnknownTokenError.new('CharacterType', token)
-    end
+  def assign_effective_number(exp)
+    exp.effective_number =
+      exp.number + total_captured_group_count + (exp.number < 0 ? 1 : 0)
   end
   def conditional(token)
@@ -240,11 +277,118 @@ class Regexp::Parser
     end
   end
+  def nest_conditional(exp)
+    conditional_nesting.push(exp)
+    nest(exp)
+  end
+  def nest(exp)
+    nesting.push(exp)
+    node << exp
+    update_transplanted_subtree(exp, node)
+    self.node = exp
+  end
+  # subtrees are transplanted to build Alternations, Intersections, Ranges
+  def update_transplanted_subtree(exp, new_parent)
+    exp.nesting_level = new_parent.nesting_level + 1
+    exp.respond_to?(:each) &&
+      exp.each { |subexp| update_transplanted_subtree(subexp, exp) }
+  end
+  def escape(token)
+    case token.token
+    when :backspace;      node << EscapeSequence::Backspace.new(token, active_opts)
+    when :escape;         node << EscapeSequence::AsciiEscape.new(token, active_opts)
+    when :bell;           node << EscapeSequence::Bell.new(token, active_opts)
+    when :form_feed;      node << EscapeSequence::FormFeed.new(token, active_opts)
+    when :newline;        node << EscapeSequence::Newline.new(token, active_opts)
+    when :carriage;       node << EscapeSequence::Return.new(token, active_opts)
+    when :tab;            node << EscapeSequence::Tab.new(token, active_opts)
+    when :vertical_tab;   node << EscapeSequence::VerticalTab.new(token, active_opts)
+    when :codepoint;      node << EscapeSequence::Codepoint.new(token, active_opts)
+    when :codepoint_list; node << EscapeSequence::CodepointList.new(token, active_opts)
+    when :hex;            node << EscapeSequence::Hex.new(token, active_opts)
+    when :octal;          node << EscapeSequence::Octal.new(token, active_opts)
+    when :control
+      if token.text =~ /\A(?:\\C-\\M|\\c\\M)/
+        node << EscapeSequence::MetaControl.new(token, active_opts)
+      else
+        node << EscapeSequence::Control.new(token, active_opts)
+      end
+    when :meta_sequence
+      if token.text =~ /\A\\M-\\[Cc]/
+        node << EscapeSequence::MetaControl.new(token, active_opts)
+      else
+        node << EscapeSequence::Meta.new(token, active_opts)
+      end
+    else
+      # treating everything else as a literal
+      # TODO: maybe split this up a bit more in v3.0.0?
+      # E.g. escaped quantifiers or set meta chars are not the same
+      # as stuff that would be a literal even without the backslash.
+      # Right now, they all end up here.
+      node << EscapeSequence::Literal.new(token, active_opts)
+    end
+  end
+  def free_space(token)
+    case token.token
+    when :comment
+      node << Comment.new(token, active_opts)
+    when :whitespace
+      if node.last.is_a?(WhiteSpace)
+        node.last.merge(WhiteSpace.new(token, active_opts))
+      else
+        node << WhiteSpace.new(token, active_opts)
+      end
+    else
+      raise UnknownTokenError.new('FreeSpace', token)
+    end
+  end
+  def keep(token)
+    node << Keep::Mark.new(token, active_opts)
+  end
+  def literal(token)
+    node << Literal.new(token, active_opts)
+  end
+  def meta(token)
+    case token.token
+    when :dot
+      node << CharacterType::Any.new(token, active_opts)
+    when :alternation
+      sequence_operation(Alternation, token)
+    else
+      raise UnknownTokenError.new('Meta', token)
+    end
+  end
+  def sequence_operation(klass, token)
+    unless node.is_a?(klass)
+      operator = klass.new(token, active_opts)
+      sequence = operator.add_sequence(active_opts)
+      sequence.expressions = node.expressions
+      node.expressions = []
+      nest(operator)
+    end
+    node.add_sequence(active_opts)
+  end
   def posixclass(token)
     node << PosixClass.new(token, active_opts)
   end
   include Regexp::Expression::UnicodeProperty
+  UPTokens = Regexp::Syntax::Token::UnicodeProperty
   def property(token)
     case token.token
@@ -316,128 +460,43 @@ class Regexp::Parser
     when :private_use;            node << Codepoint::PrivateUse.new(token, active_opts)
     when :unassigned;             node << Codepoint::Unassigned.new(token, active_opts)
-    when *Token::UnicodeProperty::Age
-      node << Age.new(token, active_opts)
-    when *Token::UnicodeProperty::Derived
-      node << Derived.new(token, active_opts)
-    when *Token::UnicodeProperty::Emoji
-      node << Emoji.new(token, active_opts)
-    when *Token::UnicodeProperty::Script
-      node << Script.new(token, active_opts)
-    when *Token::UnicodeProperty::UnicodeBlock
-      node << Block.new(token, active_opts)
+    when *UPTokens::Age;          node << Age.new(token, active_opts)
+    when *UPTokens::Derived;      node << Derived.new(token, active_opts)
+    when *UPTokens::Emoji;        node << Emoji.new(token, active_opts)
+    when *UPTokens::Script;       node << Script.new(token, active_opts)
+    when *UPTokens::UnicodeBlock; node << Block.new(token, active_opts)
     else
       raise UnknownTokenError.new('UnicodeProperty', token)
     end
   end
-  def anchor(token)
-    case token.token
-    when :bol
-      node << Anchor::BeginningOfLine.new(token, active_opts)
-    when :eol
-      node << Anchor::EndOfLine.new(token, active_opts)
-    when :bos
-      node << Anchor::BOS.new(token, active_opts)
-    when :eos
-      node << Anchor::EOS.new(token, active_opts)
-    when :eos_ob_eol
-      node << Anchor::EOSobEOL.new(token, active_opts)
-    when :word_boundary
-      node << Anchor::WordBoundary.new(token, active_opts)
-    when :nonword_boundary
-      node << Anchor::NonWordBoundary.new(token, active_opts)
-    when :match_start
-      node << Anchor::MatchStart.new(token, active_opts)
-    else
-      raise UnknownTokenError.new('Anchor', token)
-    end
-  end
-  def escape(token)
-    case token.token
-    when :backspace
-      node << EscapeSequence::Backspace.new(token, active_opts)
-    when :escape
-      node << EscapeSequence::AsciiEscape.new(token, active_opts)
-    when :bell
-      node << EscapeSequence::Bell.new(token, active_opts)
-    when :form_feed
-      node << EscapeSequence::FormFeed.new(token, active_opts)
-    when :newline
-      node << EscapeSequence::Newline.new(token, active_opts)
-    when :carriage
-      node << EscapeSequence::Return.new(token, active_opts)
-    when :tab
-      node << EscapeSequence::Tab.new(token, active_opts)
-    when :vertical_tab
-      node << EscapeSequence::VerticalTab.new(token, active_opts)
-    when :hex
-      node << EscapeSequence::Hex.new(token, active_opts)
-    when :octal
-      node << EscapeSequence::Octal.new(token, active_opts)
-    when :codepoint
-      node << EscapeSequence::Codepoint.new(token, active_opts)
-    when :codepoint_list
-      node << EscapeSequence::CodepointList.new(token, active_opts)
-    when :control
-      if token.text =~ /\A(?:\\C-\\M|\\c\\M)/
-        node << EscapeSequence::MetaControl.new(token, active_opts)
-      else
-        node << EscapeSequence::Control.new(token, active_opts)
-      end
-    when :meta_sequence
-      if token.text =~ /\A\\M-\\[Cc]/
-        node << EscapeSequence::MetaControl.new(token, active_opts)
-      else
-        node << EscapeSequence::Meta.new(token, active_opts)
-      end
-    else
-      # treating everything else as a literal
-      node << EscapeSequence::Literal.new(token, active_opts)
-    end
-  end
-  def keep(token)
-    node << Keep::Mark.new(token, active_opts)
-  end
-  def free_space(token)
-    case token.token
-    when :comment
-      node << Comment.new(token, active_opts)
-    when :whitespace
-      if node.last.is_a?(WhiteSpace)
-        node.last.merge(WhiteSpace.new(token, active_opts))
-      else
-        node << WhiteSpace.new(token, active_opts)
-      end
-    else
-      raise UnknownTokenError.new('FreeSpace', token)
-    end
-  end
   def quantifier(token)
-    offset = -1
-    target_node = node.expressions[offset]
-    while target_node.is_a?(FreeSpace)
-      target_node = node.expressions[offset -= 1]
+    target_node = node.expressions.reverse.find { |exp| !exp.is_a?(FreeSpace) }
+    target_node or raise ParserError, "No valid target found for '#{token.text}'"
+    # in case of chained quantifiers, wrap target in an implicit passive group
+    # description of the problem: https://github.com/ammar/regexp_parser/issues/3
+    # rationale for this solution: https://github.com/ammar/regexp_parser/pull/69
+    if target_node.quantified?
+      new_token = Regexp::Token.new(
+        :group,
+        :passive,
+        '', # text
+        target_node.ts,
+        nil, # te (unused)
+        target_node.level,
+        target_node.set_level,
+        target_node.conditional_level
+      )
+      new_group = Group::Passive.new(new_token, active_opts)
+      new_group.implicit = true
+      new_group << target_node
+      increase_level(target_node)
+      node.expressions[node.expressions.index(target_node)] = new_group
+      target_node = new_group
     end
-    target_node || raise(ArgumentError, 'No valid target found for '\
-                                        "'#{token.text}' ")
     case token.token
     when :zero_or_one
       target_node.quantify(:zero_or_one, token.text, 0, 1, :greedy)
@@ -468,6 +527,11 @@ class Regexp::Parser
     end
   end
+  def increase_level(exp)
+    exp.level += 1
+    exp.respond_to?(:each) && exp.each { |subexp| increase_level(subexp) }
+  end
   def interval(target_node, token)
     text = token.text
     mchr = text[text.length-1].chr =~ /[?+]/ ? text[text.length-1].chr : nil
@@ -490,100 +554,16 @@ class Regexp::Parser
     target_node.quantify(:interval, text, min.to_i, max.to_i, mode)
   end
-  def group(token)
-    case token.token
-    when :options, :options_switch
-      options_group(token)
-    when :close
-      close_group
-    when :comment
-      node << Group::Comment.new(token, active_opts)
-    else
-      open_group(token)
-    end
-  end
-  MOD_FLAGS = %w[i m x].map(&:to_sym)
-  ENC_FLAGS = %w[a d u].map(&:to_sym)
-  def options_group(token)
-    positive, negative = token.text.split('-', 2)
-    negative ||= ''
-    self.switching_options = token.token.equal?(:options_switch)
-    opt_changes = {}
-    new_active_opts = active_opts.dup
-    MOD_FLAGS.each do |flag|
-      if positive.include?(flag.to_s)
-        opt_changes[flag] = new_active_opts[flag] = true
-      end
-      if negative.include?(flag.to_s)
-        opt_changes[flag] = false
-        new_active_opts.delete(flag)
-      end
-    end
-    if (enc_flag = positive.reverse[/[adu]/])
-      enc_flag = enc_flag.to_sym
-      (ENC_FLAGS - [enc_flag]).each do |other|
-        opt_changes[other] = false if new_active_opts[other]
-        new_active_opts.delete(other)
-      end
-      opt_changes[enc_flag] = new_active_opts[enc_flag] = true
-    end
-    options_stack << new_active_opts
-    options_group = Group::Options.new(token, active_opts)
-    options_group.option_changes = opt_changes
-    nest(options_group)
-  end
-  def open_group(token)
+  def set(token)
     case token.token
-    when :passive
-      exp = Group::Passive.new(token, active_opts)
-    when :atomic
-      exp = Group::Atomic.new(token, active_opts)
-    when :named
-      exp = Group::Named.new(token, active_opts)
-    when :capture
-      exp = Group::Capture.new(token, active_opts)
-    when :absence
-      exp = Group::Absence.new(token, active_opts)
-    when :lookahead
-      exp = Assertion::Lookahead.new(token, active_opts)
-    when :nlookahead
-      exp = Assertion::NegativeLookahead.new(token, active_opts)
-    when :lookbehind
-      exp = Assertion::Lookbehind.new(token, active_opts)
-    when :nlookbehind
-      exp = Assertion::NegativeLookbehind.new(token, active_opts)
+    when :open;         open_set(token)
+    when :close;        close_set
+    when :negate;       negate_set
+    when :range;        range(token)
+    when :intersection; intersection(token)
     else
-      raise UnknownTokenError.new('Group type open', token)
-    end
-    if exp.capturing?
-      exp.number          = total_captured_group_count + 1
-      exp.number_at_level = captured_group_count_at_level + 1
-      count_captured_group
+      raise UnknownTokenError.new('CharacterSet', token)
     end
-    # Push the active options to the stack again. This way we can simply pop the
-    # stack for any group we close, no matter if it had its own options or not.
-    options_stack << active_opts
-    nest(exp)
-  end
-  def close_group
-    options_stack.pop unless switching_options
-    self.switching_options = false
-    decrease_nesting
   end
   def open_set(token)
@@ -606,51 +586,45 @@ class Regexp::Parser
     nest(exp)
   end
-  def close_completed_character_set_range
-    decrease_nesting if node.is_a?(CharacterSet::Range) && node.complete?
-  end
   def intersection(token)
     sequence_operation(CharacterSet::Intersection, token)
   end
-  def sequence_operation(klass, token)
-    unless node.is_a?(klass)
-      operator = klass.new(token, active_opts)
-      sequence = operator.add_sequence(active_opts)
-      sequence.expressions = node.expressions
-      node.expressions = []
-      nest(operator)
+  def type(token)
+    case token.token
+    when :digit;     node << CharacterType::Digit.new(token, active_opts)
+    when :hex;       node << CharacterType::Hex.new(token, active_opts)
+    when :linebreak; node << CharacterType::Linebreak.new(token, active_opts)
+    when :nondigit;  node << CharacterType::NonDigit.new(token, active_opts)
+    when :nonhex;    node << CharacterType::NonHex.new(token, active_opts)
+    when :nonspace;  node << CharacterType::NonSpace.new(token, active_opts)
+    when :nonword;   node << CharacterType::NonWord.new(token, active_opts)
+    when :space;     node << CharacterType::Space.new(token, active_opts)
+    when :word;      node << CharacterType::Word.new(token, active_opts)
+    when :xgrapheme; node << CharacterType::ExtendedGrapheme.new(token, active_opts)
+    else
+      raise UnknownTokenError.new('CharacterType', token)
     end
-    node.add_sequence(active_opts)
-  end
-  def active_opts
-    options_stack.last
-  end
-  def total_captured_group_count
-    captured_group_counts.values.reduce(0, :+)
-  end
-  def captured_group_count_at_level
-    captured_group_counts[node.level]
   end
-  def count_captured_group
-    captured_group_counts[node.level] += 1
+  def close_completed_character_set_range
+    decrease_nesting if node.is_a?(CharacterSet::Range) && node.complete?
   end
-  def assign_effective_number(exp)
-    exp.effective_number =
-      exp.number + total_captured_group_count + (exp.number < 0 ? 1 : 0)
+  def active_opts
+    options_stack.last
   end
+  # Assigns referenced expressions to refering expressions, e.g. if there is
+  # an instance of Backreference::Number, its #referenced_expression is set to
+  # the instance of Group::Capture that it refers to via its number.
   def assign_referenced_expressions
     targets = {}
+    # find all referencable expressions
     root.each_expression do |exp|
       exp.is_a?(Group::Capture) && targets[exp.identifier] = exp
     end
+    # assign them to any refering expressions
     root.each_expression do |exp|
       exp.respond_to?(:reference) &&
         exp.referenced_expression = targets[exp.reference]