RubyGems - regexp_parser - Versions diffs - 0.5.0 → 1.0.0 - Mend

regexp_parser 0.5.0 → 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (81) hide show

checksums.yaml +4 -4
data/CHANGELOG.md +242 -0
data/Gemfile +1 -0
data/README.md +21 -17
data/Rakefile +31 -0
data/lib/regexp_parser/expression.rb +11 -9
data/lib/regexp_parser/expression/classes/alternation.rb +5 -28
data/lib/regexp_parser/expression/classes/backref.rb +21 -16
data/lib/regexp_parser/expression/classes/escape.rb +81 -10
data/lib/regexp_parser/expression/classes/group.rb +20 -20
data/lib/regexp_parser/expression/classes/{character_class.rb → posix_class.rb} +2 -2
data/lib/regexp_parser/expression/classes/property.rb +6 -0
data/lib/regexp_parser/expression/classes/set.rb +10 -93
data/lib/regexp_parser/expression/classes/set/intersection.rb +9 -0
data/lib/regexp_parser/expression/classes/set/range.rb +23 -0
data/lib/regexp_parser/expression/methods/strfregexp.rb +6 -4
data/lib/regexp_parser/expression/methods/tests.rb +4 -14
data/lib/regexp_parser/expression/methods/traverse.rb +1 -1
data/lib/regexp_parser/expression/quantifier.rb +3 -4
data/lib/regexp_parser/expression/sequence_operation.rb +34 -0
data/lib/regexp_parser/expression/subexpression.rb +6 -10
data/lib/regexp_parser/lexer.rb +13 -17
data/lib/regexp_parser/parser.rb +170 -116
data/lib/regexp_parser/scanner.rb +952 -2431
data/lib/regexp_parser/scanner/char_type.rl +31 -0
data/lib/regexp_parser/scanner/properties/long.yml +561 -0
data/lib/regexp_parser/scanner/properties/short.yml +225 -0
data/lib/regexp_parser/scanner/property.rl +7 -806
data/lib/regexp_parser/scanner/scanner.rl +112 -154
data/lib/regexp_parser/syntax/base.rb +4 -4
data/lib/regexp_parser/syntax/tokens.rb +1 -0
data/lib/regexp_parser/syntax/tokens/backref.rb +2 -2
data/lib/regexp_parser/syntax/tokens/character_set.rb +3 -38
data/lib/regexp_parser/syntax/tokens/escape.rb +2 -3
data/lib/regexp_parser/syntax/tokens/group.rb +5 -4
data/lib/regexp_parser/syntax/tokens/{character_class.rb → posix_class.rb} +5 -5
data/lib/regexp_parser/syntax/tokens/unicode_property.rb +519 -266
data/lib/regexp_parser/syntax/versions/1.8.6.rb +2 -4
data/lib/regexp_parser/syntax/versions/1.9.1.rb +4 -10
data/lib/regexp_parser/syntax/versions/2.0.0.rb +0 -2
data/lib/regexp_parser/syntax/versions/2.4.1.rb +1 -1
data/lib/regexp_parser/version.rb +1 -1
data/regexp_parser.gemspec +2 -1
data/test/expression/test_base.rb +2 -1
data/test/expression/test_clone.rb +0 -57
data/test/expression/test_set.rb +31 -8
data/test/expression/test_strfregexp.rb +13 -4
data/test/expression/test_subexpression.rb +25 -0
data/test/expression/test_traverse.rb +25 -25
data/test/helpers.rb +1 -0
data/test/lexer/test_all.rb +1 -1
data/test/lexer/test_conditionals.rb +9 -7
data/test/lexer/test_nesting.rb +39 -21
data/test/lexer/test_refcalls.rb +4 -4
data/test/parser/set/test_intersections.rb +127 -0
data/test/parser/set/test_ranges.rb +111 -0
data/test/parser/test_all.rb +4 -1
data/test/parser/test_escapes.rb +41 -9
data/test/parser/test_groups.rb +22 -3
data/test/parser/test_posix_classes.rb +27 -0
data/test/parser/test_properties.rb +17 -290
data/test/parser/test_refcalls.rb +66 -26
data/test/parser/test_sets.rb +132 -129
data/test/scanner/test_all.rb +1 -7
data/test/scanner/test_conditionals.rb +16 -16
data/test/scanner/test_errors.rb +0 -30
data/test/scanner/test_escapes.rb +1 -2
data/test/scanner/test_free_space.rb +28 -28
data/test/scanner/test_groups.rb +35 -35
data/test/scanner/test_meta.rb +1 -1
data/test/scanner/test_properties.rb +87 -114
data/test/scanner/test_refcalls.rb +18 -18
data/test/scanner/test_scripts.rb +19 -351
data/test/scanner/test_sets.rb +87 -60
data/test/scanner/test_unicode_blocks.rb +4 -105
data/test/support/warning_extractor.rb +1 -1
data/test/syntax/test_syntax.rb +7 -0
data/test/syntax/versions/test_1.8.rb +2 -4
metadata +17 -7
data/ChangeLog +0 -325
data/test/scanner/test_emojis.rb +0 -31

data/lib/regexp_parser/expression/methods/tests.rb CHANGED Viewed

@@ -7,22 +7,12 @@ module Regexp::Expression
     #   # is it a :group expression
     #   exp.type? :group
     #
-    #   # is it a :set, :subset, or :meta
-    #   exp.type? [:set, :subset, :meta]
+    #   # is it a :set, or :meta
+    #   exp.type? [:set, :meta]
     #
     def type?(test_type)
-      case test_type
-      when Array
-        if test_type.include?(:*)
-          return (test_type.include?(type) or test_type.include?(:*))
-        else
-          return test_type.include?(type)
-        end
-      when Symbol
-        return (type == test_type or test_type == :*)
-      else
-        raise "Array or Symbol expected, #{test_type.class.name} given"
-      end
+      test_types = Array(test_type).map(&:to_sym)
+      test_types.include?(:*) || test_types.include?(type)
     end
     # Test if this expression has the given test_token, and optionally a given

data/lib/regexp_parser/expression/methods/traverse.rb CHANGED Viewed

@@ -45,7 +45,7 @@ module Regexp::Expression
     # Returns a new array with the results of calling the given block once
     # for every expression. If a block is not given, returns an array with
     # each expression and its level index as an array.
-    def map(include_self = false, &block)
+    def flat_map(include_self = false, &block)
       result = []
       each_expression(include_self) do |exp, index|

data/lib/regexp_parser/expression/quantifier.rb CHANGED Viewed

@@ -11,10 +11,9 @@ module Regexp::Expression
       @max   = max
     end
-    def clone
-      copy = dup
-      copy.instance_variable_set(:@text, text.dup)
-      copy
+    def initialize_clone(other)
+      other.instance_variable_set(:@text, text.dup)
+      super
     end
     def to_s

data/lib/regexp_parser/expression/sequence_operation.rb ADDED Viewed

@@ -0,0 +1,34 @@
+module Regexp::Expression
+  # abstract class
+  class SequenceOperation < Regexp::Expression::Subexpression
+    alias :sequences :expressions
+    alias :operands :expressions
+    alias :operator :text
+    def starts_at
+      expressions.first.starts_at
+    end
+    alias :ts :starts_at
+    def <<(exp)
+      expressions.last << exp
+    end
+    def add_sequence
+      exp = self.class::OPERAND.new(level, set_level, conditional_level)
+      exp.nesting_level = nesting_level + 1
+      expressions << exp
+      exp
+    end
+    def quantify(token, text, min = nil, max = nil, mode = :greedy)
+      sequences.last.last.quantify(token, text, min, max, mode)
+      sequences.last.last.quantify(token, text, min, max, mode)
+    end
+    def to_s(format = :full)
+      sequences.map { |e| e.to_s(format) }.join(text)
+      sequences.map { |e| e.to_s(format) }.join(text)
+    end
+  end
+end

data/lib/regexp_parser/expression/subexpression.rb CHANGED Viewed

@@ -10,26 +10,22 @@ module Regexp::Expression
     end
     # Override base method to clone the expressions as well.
-    def clone
-      copy = super
-      copy.expressions = expressions.map(&:clone)
-      copy
+    def initialize_clone(other)
+      other.expressions = expressions.map(&:clone)
+      super
     end
     def <<(exp)
       if exp.is_a?(WhiteSpace) && last && last.is_a?(WhiteSpace)
         last.merge(exp)
       else
+        exp.nesting_level = nesting_level + 1
         expressions << exp
       end
     end
-    def insert(exp)
-      expressions.insert(0, exp)
-    end
-    %w[[] all? any? at count each each_with_index empty?
-       fetch find first index join last length values_at].each do |m|
+    %w[[] all? any? at collect count each each_with_index empty?
+       fetch find first index join last length map values_at].each do |m|
       define_method(m) { |*args, &block| expressions.send(m, *args, &block) }
     end

data/lib/regexp_parser/lexer.rb CHANGED Viewed

@@ -4,9 +4,10 @@
 # given syntax flavor.
 class Regexp::Lexer
-  OPENING_TOKENS = [:capture, :options, :passive, :atomic, :named, :absence,
-                    :lookahead, :nlookahead, :lookbehind, :nlookbehind
-                   ].freeze
+  OPENING_TOKENS = [
+    :capture, :passive, :lookahead, :nlookahead, :lookbehind, :nlookbehind,
+    :atomic, :options, :options_switch, :named, :absence
+  ].freeze
   CLOSING_TOKENS = [:close].freeze
@@ -36,6 +37,7 @@ class Regexp::Lexer
                 nesting, set_nesting, conditional_nesting)
       current = merge_literal(current) if type == :literal and
+        set_nesting == 0 and
         last and last.type == :literal
       current = merge_condition(current) if type == :conditional and
@@ -66,29 +68,23 @@ class Regexp::Lexer
   attr_accessor :tokens, :nesting, :set_nesting, :conditional_nesting
   def ascend(type, token)
-    if type == :group or type == :assertion
+    case type
+    when :group, :assertion
       self.nesting = nesting - 1 if CLOSING_TOKENS.include?(token)
-    end
-    if type == :set or type == :subset
+    when :set
       self.set_nesting = set_nesting - 1 if token == :close
-    end
-    if type == :conditional
+    when :conditional
       self.conditional_nesting = conditional_nesting - 1 if token == :close
     end
   end
   def descend(type, token)
-    if type == :group or type == :assertion
+    case type
+    when :group, :assertion
       self.nesting = nesting + 1 if OPENING_TOKENS.include?(token)
-    end
-    if type == :set or type == :subset
+    when :set
       self.set_nesting = set_nesting + 1 if token == :open
-    end
-    if type == :conditional
+    when :conditional
       self.conditional_nesting = conditional_nesting + 1 if token == :open
     end
   end

data/lib/regexp_parser/parser.rb CHANGED Viewed

@@ -33,6 +33,8 @@ class Regexp::Parser
     self.switching_options = false
     self.conditional_nesting = []
+    self.captured_group_counts = Hash.new(0)
     Regexp::Lexer.scan(input, syntax) do |token|
       parse_token(token)
     end
@@ -48,7 +50,7 @@ class Regexp::Parser
   attr_accessor :root, :node, :nesting,
                 :options_stack, :switching_options, :conditional_nesting,
-                :current_set
+                :captured_group_counts
   def options_from_input(input)
     return {} unless input.is_a?(::Regexp)
@@ -63,9 +65,28 @@ class Regexp::Parser
   def nest(exp)
     nesting.push(exp)
     node << exp
+    update_transplanted_subtree(exp, node)
     self.node = exp
   end
+  # subtrees are transplanted to build Alternations, Intersections, Ranges
+  def update_transplanted_subtree(exp, new_parent)
+    exp.nesting_level = new_parent.nesting_level + 1
+    exp.respond_to?(:each) &&
+      exp.each { |subexp| update_transplanted_subtree(subexp, exp) }
+  end
+  def decrease_nesting
+    while nesting.last.is_a?(SequenceOperation)
+      nesting.pop
+      self.node = nesting.last
+    end
+    nesting.pop
+    yield(node) if block_given?
+    self.node = nesting.last
+    self.node = node.last if node.last.is_a?(SequenceOperation)
+  end
   def nest_conditional(exp)
     conditional_nesting.push(exp)
     node << exp
@@ -73,6 +94,8 @@ class Regexp::Parser
   end
   def parse_token(token)
+    close_completed_character_set_range
     case token.type
     when :meta;         meta(token)
     when :quantifier;   quantifier(token)
@@ -80,12 +103,14 @@ class Regexp::Parser
     when :escape;       escape(token)
     when :group;        group(token)
     when :assertion;    group(token)
-    when :set, :subset; set(token)
+    when :set;          set(token)
     when :type;         type(token)
     when :backref;      backref(token)
     when :conditional;  conditional(token)
     when :keep;         keep(token)
+    when :posixclass, :nonposixclass
+      posixclass(token)
     when :property, :nonproperty
       property(token)
@@ -104,17 +129,15 @@ class Regexp::Parser
     when :open
       open_set(token)
     when :close
-      close_set(token)
+      close_set
     when :negate
       negate_set
-    when :member, :range, :escape, :collation, :equivalent
-      append_set(token)
-    when *Token::Escape::All
-      append_set(token)
-    when *Token::CharacterSet::All
-      append_set(token)
-    when *Token::UnicodeProperty::All
-      append_set(token)
+    when :range
+      range(token)
+    when :intersection
+      intersection(token)
+    when :collation, :equivalent
+      node << Literal.new(token, active_opts)
     else
       raise UnknownTokenError.new('CharacterSet', token)
     end
@@ -125,19 +148,7 @@ class Regexp::Parser
     when :dot
       node << CharacterType::Any.new(token, active_opts)
     when :alternation
-      if node.token == :alternation
-      elsif node.last.is_a?(Alternation)
-        self.node = node.last
-      else
-        alt = Alternation.new(token, active_opts)
-        seq = Alternative.new(alt.level, alt.set_level, alt.conditional_level)
-        node.expressions.count.times { seq.insert(node.expressions.pop) }
-        alt.alternative(seq)
-        node << alt
-        self.node = alt
-      end
-      node.alternative
+      sequence_operation(Alternation, token)
     else
       raise UnknownTokenError.new('Meta', token)
     end
@@ -147,16 +158,16 @@ class Regexp::Parser
     case token.token
     when :name_ref
       node << Backreference::Name.new(token, active_opts)
-    when :name_nest_ref
-      node << Backreference::NameNestLevel.new(token, active_opts)
+    when :name_recursion_ref
+      node << Backreference::NameRecursionLevel.new(token, active_opts)
     when :name_call
       node << Backreference::NameCall.new(token, active_opts)
     when :number, :number_ref
       node << Backreference::Number.new(token, active_opts)
     when :number_rel_ref
       node << Backreference::NumberRelative.new(token, active_opts)
-    when :number_nest_ref
-      node << Backreference::NumberNestLevel.new(token, active_opts)
+    when :number_recursion_ref
+      node << Backreference::NumberRecursionLevel.new(token, active_opts)
     when :number_call
       node << Backreference::NumberCall.new(token, active_opts)
     when :number_rel_call
@@ -217,75 +228,81 @@ class Regexp::Parser
     end
   end
+  def posixclass(token)
+    node << PosixClass.new(token)
+  end
   include Regexp::Expression::UnicodeProperty
   def property(token)
     case token.token
-    when :alnum;            node << Alnum.new(token, active_opts)
-    when :alpha;            node << Alpha.new(token, active_opts)
-    when :ascii;            node << Ascii.new(token, active_opts)
-    when :blank;            node << Blank.new(token, active_opts)
-    when :cntrl;            node << Cntrl.new(token, active_opts)
-    when :digit;            node << Digit.new(token, active_opts)
-    when :graph;            node << Graph.new(token, active_opts)
-    when :lower;            node << Lower.new(token, active_opts)
-    when :print;            node << Print.new(token, active_opts)
-    when :punct;            node << Punct.new(token, active_opts)
-    when :space;            node << Space.new(token, active_opts)
-    when :upper;            node << Upper.new(token, active_opts)
-    when :word;             node << Word.new(token, active_opts)
-    when :xdigit;           node << Xdigit.new(token, active_opts)
-    when :xposixpunct;      node << XPosixPunct.new(token, active_opts)
+    when :alnum;                  node << Alnum.new(token, active_opts)
+    when :alpha;                  node << Alpha.new(token, active_opts)
+    when :ascii;                  node << Ascii.new(token, active_opts)
+    when :blank;                  node << Blank.new(token, active_opts)
+    when :cntrl;                  node << Cntrl.new(token, active_opts)
+    when :digit;                  node << Digit.new(token, active_opts)
+    when :graph;                  node << Graph.new(token, active_opts)
+    when :lower;                  node << Lower.new(token, active_opts)
+    when :print;                  node << Print.new(token, active_opts)
+    when :punct;                  node << Punct.new(token, active_opts)
+    when :space;                  node << Space.new(token, active_opts)
+    when :upper;                  node << Upper.new(token, active_opts)
+    when :word;                   node << Word.new(token, active_opts)
+    when :xdigit;                 node << Xdigit.new(token, active_opts)
+    when :xposixpunct;            node << XPosixPunct.new(token, active_opts)
     # only in Oniguruma (old rubies)
-    when :newline;          node << Newline.new(token, active_opts)
-    when :any;              node << Any.new(token, active_opts)
-    when :assigned;         node << Assigned.new(token, active_opts)
-    when :letter_any;       node << Letter::Any.new(token, active_opts)
-    when :letter_uppercase; node << Letter::Uppercase.new(token, active_opts)
-    when :letter_lowercase; node << Letter::Lowercase.new(token, active_opts)
-    when :letter_titlecase; node << Letter::Titlecase.new(token, active_opts)
-    when :letter_modifier;  node << Letter::Modifier.new(token, active_opts)
-    when :letter_other;     node << Letter::Other.new(token, active_opts)
-    when :mark_any;         node << Mark::Any.new(token, active_opts)
-    when :mark_nonspacing;  node << Mark::Nonspacing.new(token, active_opts)
-    when :mark_spacing;     node << Mark::Spacing.new(token, active_opts)
-    when :mark_enclosing;   node << Mark::Enclosing.new(token, active_opts)
-    when :number_any;       node << Number::Any.new(token, active_opts)
-    when :number_decimal;   node << Number::Decimal.new(token, active_opts)
-    when :number_letter;    node << Number::Letter.new(token, active_opts)
-    when :number_other;     node << Number::Other.new(token, active_opts)
-    when :punct_any;        node << Punctuation::Any.new(token, active_opts)
-    when :punct_connector;  node << Punctuation::Connector.new(token, active_opts)
-    when :punct_dash;       node << Punctuation::Dash.new(token, active_opts)
-    when :punct_open;       node << Punctuation::Open.new(token, active_opts)
-    when :punct_close;      node << Punctuation::Close.new(token, active_opts)
-    when :punct_initial;    node << Punctuation::Initial.new(token, active_opts)
-    when :punct_final;      node << Punctuation::Final.new(token, active_opts)
-    when :punct_other;      node << Punctuation::Other.new(token, active_opts)
-    when :separator_any;    node << Separator::Any.new(token, active_opts)
-    when :separator_space;  node << Separator::Space.new(token, active_opts)
-    when :separator_line;   node << Separator::Line.new(token, active_opts)
-    when :separator_para;   node << Separator::Paragraph.new(token, active_opts)
-    when :symbol_any;       node << Symbol::Any.new(token, active_opts)
-    when :symbol_math;      node << Symbol::Math.new(token, active_opts)
-    when :symbol_currency;  node << Symbol::Currency.new(token, active_opts)
-    when :symbol_modifier;  node << Symbol::Modifier.new(token, active_opts)
-    when :symbol_other;     node << Symbol::Other.new(token, active_opts)
-    when :other;            node << Codepoint::Any.new(token, active_opts)
-    when :control;          node << Codepoint::Control.new(token, active_opts)
-    when :format;           node << Codepoint::Format.new(token, active_opts)
-    when :surrogate;        node << Codepoint::Surrogate.new(token, active_opts)
-    when :private_use;      node << Codepoint::PrivateUse.new(token, active_opts)
-    when :unassigned;       node << Codepoint::Unassigned.new(token, active_opts)
+    when :newline;                node << Newline.new(token, active_opts)
+    when :any;                    node << Any.new(token, active_opts)
+    when :assigned;               node << Assigned.new(token, active_opts)
+    when :letter;                 node << Letter::Any.new(token, active_opts)
+    when :cased_letter;           node << Letter::Cased.new(token, active_opts)
+    when :uppercase_letter;       node << Letter::Uppercase.new(token, active_opts)
+    when :lowercase_letter;       node << Letter::Lowercase.new(token, active_opts)
+    when :titlecase_letter;       node << Letter::Titlecase.new(token, active_opts)
+    when :modifier_letter;        node << Letter::Modifier.new(token, active_opts)
+    when :other_letter;           node << Letter::Other.new(token, active_opts)
+    when :mark;                   node << Mark::Any.new(token, active_opts)
+    when :combining_mark;         node << Mark::Combining.new(token, active_opts)
+    when :nonspacing_mark;        node << Mark::Nonspacing.new(token, active_opts)
+    when :spacing_mark;           node << Mark::Spacing.new(token, active_opts)
+    when :enclosing_mark;         node << Mark::Enclosing.new(token, active_opts)
+    when :number;                 node << Number::Any.new(token, active_opts)
+    when :decimal_number;         node << Number::Decimal.new(token, active_opts)
+    when :letter_number;          node << Number::Letter.new(token, active_opts)
+    when :other_number;           node << Number::Other.new(token, active_opts)
+    when :punctuation;            node << Punctuation::Any.new(token, active_opts)
+    when :connector_punctuation;  node << Punctuation::Connector.new(token, active_opts)
+    when :dash_punctuation;       node << Punctuation::Dash.new(token, active_opts)
+    when :open_punctuation;       node << Punctuation::Open.new(token, active_opts)
+    when :close_punctuation;      node << Punctuation::Close.new(token, active_opts)
+    when :initial_punctuation;    node << Punctuation::Initial.new(token, active_opts)
+    when :final_punctuation;      node << Punctuation::Final.new(token, active_opts)
+    when :other_punctuation;      node << Punctuation::Other.new(token, active_opts)
+    when :separator;              node << Separator::Any.new(token, active_opts)
+    when :space_separator;        node << Separator::Space.new(token, active_opts)
+    when :line_separator;         node << Separator::Line.new(token, active_opts)
+    when :paragraph_separator;    node << Separator::Paragraph.new(token, active_opts)
+    when :symbol;                 node << Symbol::Any.new(token, active_opts)
+    when :math_symbol;            node << Symbol::Math.new(token, active_opts)
+    when :currency_symbol;        node << Symbol::Currency.new(token, active_opts)
+    when :modifier_symbol;        node << Symbol::Modifier.new(token, active_opts)
+    when :other_symbol;           node << Symbol::Other.new(token, active_opts)
+    when :other;                  node << Codepoint::Any.new(token, active_opts)
+    when :control;                node << Codepoint::Control.new(token, active_opts)
+    when :format;                 node << Codepoint::Format.new(token, active_opts)
+    when :surrogate;              node << Codepoint::Surrogate.new(token, active_opts)
+    when :private_use;            node << Codepoint::PrivateUse.new(token, active_opts)
+    when :unassigned;             node << Codepoint::Unassigned.new(token, active_opts)
     when *Token::UnicodeProperty::Age
       node << Age.new(token, active_opts)
@@ -346,13 +363,20 @@ class Regexp::Parser
       node << EscapeSequence::Newline.new(token, active_opts)
     when :carriage
       node << EscapeSequence::Return.new(token, active_opts)
-    when :space
-      node << EscapeSequence::Space.new(token, active_opts)
     when :tab
       node << EscapeSequence::Tab.new(token, active_opts)
     when :vertical_tab
       node << EscapeSequence::VerticalTab.new(token, active_opts)
+    when :hex
+      node << EscapeSequence::Hex.new(token, active_opts)
+    when :octal
+      node << EscapeSequence::Octal.new(token, active_opts)
+    when :codepoint
+      node << EscapeSequence::Codepoint.new(token, active_opts)
+    when :codepoint_list
+      node << EscapeSequence::CodepointList.new(token, active_opts)
     when :control
       if token.text =~ /\A(?:\\C-\\M|\\c\\M)/
         node << EscapeSequence::MetaControl.new(token, active_opts)
@@ -447,7 +471,7 @@ class Regexp::Parser
       mode = :greedy
     end
-    range = range_text.gsub(/\{|\}/, '').split(',', 2).each {|i| i.strip}
+    range = range_text.gsub(/\{|\}/, '').split(',', 2)
     min = range[0].empty? ? 0 : range[0]
     max = range[1] ? (range[1].empty? ? -1 : range[1]) : min
@@ -456,7 +480,7 @@ class Regexp::Parser
   def group(token)
     case token.token
-    when :options
+    when :options, :options_switch
       options_group(token)
     when :close
       close_group
@@ -470,8 +494,7 @@ class Regexp::Parser
   def options_group(token)
     positive, negative = token.text.split('-', 2)
     negative ||= ''
-    self.switching_options = !token.text.include?(':')
-    # TODO: change this -^ to token.type == :options_switch in v1.0.0
+    self.switching_options = token.token.equal?(:options_switch)
     new_options = active_opts.dup
@@ -491,9 +514,7 @@ class Regexp::Parser
     options_stack << new_options
-    exp = Group::Options.new(token, active_opts)
-    nest(exp)
+    nest(Group::Options.new(token, active_opts))
   end
   def open_group(token)
@@ -522,6 +543,12 @@ class Regexp::Parser
       raise UnknownTokenError.new('Group type open', token)
     end
+    if exp.capturing?
+      exp.number          = total_captured_group_count + 1
+      exp.number_at_level = captured_group_count_at_level + 1
+      count_captured_group
+    end
     # Push the active options to the stack again. This way we can simply pop the
     # stack for any group we close, no matter if it had its own options or not.
     options_stack << active_opts
@@ -530,38 +557,65 @@ class Regexp::Parser
   end
   def close_group
-    nesting.pop
     options_stack.pop unless switching_options
     self.switching_options = false
-    self.node = nesting.last
-    self.node = node.last if node.last and node.last.is_a?(Alternation)
+    decrease_nesting
   end
   def open_set(token)
     token.token = :character
-    if token.type == :subset
-      current_set << CharacterSubSet.new(token, active_opts)
-    else
-      self.current_set = CharacterSet.new(token, active_opts)
-      node << current_set
-    end
+    nest(CharacterSet.new(token, active_opts))
   end
   def negate_set
-    current_set.negate
+    node.negate
   end
-  def append_set(token)
-    current_set << token.text
+  def close_set
+    decrease_nesting(&:close)
   end
-  def close_set(token)
-    current_set.close
+  def range(token)
+    exp = CharacterSet::Range.new(token, active_opts)
+    scope = node.last.is_a?(CharacterSet::IntersectedSequence) ? node.last : node
+    exp << scope.expressions.pop
+    nest(exp)
+  end
+  def close_completed_character_set_range
+    decrease_nesting if node.is_a?(CharacterSet::Range) && node.complete?
+  end
+  def intersection(token)
+    sequence_operation(CharacterSet::Intersection, token)
+  end
+  def sequence_operation(klass, token)
+    if node.last.is_a?(klass)
+      self.node = node.last
+    elsif !node.is_a?(klass)
+      operator = klass.new(token, active_opts)
+      sequence = operator.add_sequence
+      sequence.expressions = node.expressions
+      node.expressions = []
+      nest(operator)
+    end
+    node.add_sequence
   end
   def active_opts
     options_stack.last
   end
+  def total_captured_group_count
+    captured_group_counts.values.reduce(0, :+)
+  end
+  def captured_group_count_at_level
+    captured_group_counts[node.level]
+  end
+  def count_captured_group
+    captured_group_counts[node.level] += 1
+  end
 end # module Regexp::Parser