regexp_parser 2.0.3 → 2.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +41 -3
- data/Gemfile +5 -1
- data/README.md +1 -1
- data/Rakefile +6 -6
- data/lib/regexp_parser/error.rb +4 -0
- data/lib/regexp_parser/expression.rb +3 -2
- data/lib/regexp_parser/expression/classes/backref.rb +5 -0
- data/lib/regexp_parser/expression/classes/conditional.rb +11 -1
- data/lib/regexp_parser/expression/classes/free_space.rb +1 -1
- data/lib/regexp_parser/expression/classes/group.rb +6 -1
- data/lib/regexp_parser/expression/classes/property.rb +1 -1
- data/lib/regexp_parser/expression/classes/set/range.rb +2 -1
- data/lib/regexp_parser/expression/quantifier.rb +1 -1
- data/lib/regexp_parser/expression/sequence.rb +3 -9
- data/lib/regexp_parser/expression/subexpression.rb +1 -1
- data/lib/regexp_parser/parser.rb +282 -332
- data/lib/regexp_parser/scanner.rb +1019 -1006
- data/lib/regexp_parser/scanner/scanner.rl +56 -79
- data/lib/regexp_parser/syntax.rb +8 -6
- data/lib/regexp_parser/syntax/any.rb +1 -1
- data/lib/regexp_parser/version.rb +1 -1
- data/spec/expression/clone_spec.rb +36 -4
- data/spec/expression/free_space_spec.rb +2 -2
- data/spec/expression/methods/match_length_spec.rb +2 -2
- data/spec/lexer/refcalls_spec.rb +5 -0
- data/spec/parser/all_spec.rb +2 -2
- data/spec/parser/refcalls_spec.rb +5 -0
- data/spec/scanner/escapes_spec.rb +1 -1
- data/spec/scanner/refcalls_spec.rb +19 -0
- data/spec/scanner/sets_spec.rb +42 -11
- metadata +4 -3
    
        checksums.yaml
    CHANGED
    
    | @@ -1,7 +1,7 @@ | |
| 1 1 | 
             
            ---
         | 
| 2 2 | 
             
            SHA256:
         | 
| 3 | 
            -
              metadata.gz:  | 
| 4 | 
            -
              data.tar.gz:  | 
| 3 | 
            +
              metadata.gz: 077b8a0c90d90cf46e44671ec1335a5373eef72c61a0bcf4de43ba5217a188c3
         | 
| 4 | 
            +
              data.tar.gz: b9aed868af73adcdf40c09720c5d10091b25a53b25a792717ceb5591039a2931
         | 
| 5 5 | 
             
            SHA512:
         | 
| 6 | 
            -
              metadata.gz:  | 
| 7 | 
            -
              data.tar.gz:  | 
| 6 | 
            +
              metadata.gz: 9c04d9a6434c6e3f322e97e8e2a1c86b3ddda88bd8821368a37b92f5836e4c3df1dc27a79165303420c3e8d5eea31bda1483824da01a40ce30961b645ba65ddd
         | 
| 7 | 
            +
              data.tar.gz: 01e5c261e9dca0c4df7c696128dbc0520ca40aa6b9393cc8d6c3bdb8386470aeb773566000b811f98c1407038216c8d2c0b444c7955ea5a881ac759796f8a440
         | 
    
        data/CHANGELOG.md
    CHANGED
    
    | @@ -1,14 +1,52 @@ | |
| 1 1 | 
             
            ## [Unreleased]
         | 
| 2 2 |  | 
| 3 | 
            +
            ## [2.1.1] - 2021-02-23 - [Janosch Müller](mailto:janosch84@gmail.com)
         | 
| 4 | 
            +
             | 
| 5 | 
            +
            ### Fixed
         | 
| 6 | 
            +
             | 
| 7 | 
            +
            - fixed `NameError` when requiring only `'regexp_parser/scanner'` in v2.1.0
         | 
| 8 | 
            +
              * thanks to [Jared White and Sam Ruby](https://github.com/ruby2js/ruby2js) for the report
         | 
| 9 | 
            +
             | 
| 10 | 
            +
            ## [2.1.0] - 2021-02-22 - [Janosch Müller](mailto:janosch84@gmail.com)
         | 
| 11 | 
            +
             | 
| 12 | 
            +
            ### Added
         | 
| 13 | 
            +
             | 
| 14 | 
            +
            - common ancestor for all scanning/parsing/lexing errors
         | 
| 15 | 
            +
              * `Regexp::Parser::Error` can now be rescued as a catch-all
         | 
| 16 | 
            +
              * the following errors (and their many descendants) now inherit from it:
         | 
| 17 | 
            +
                - `Regexp::Expression::Conditional::TooManyBranches`
         | 
| 18 | 
            +
                - `Regexp::Parser::ParserError`
         | 
| 19 | 
            +
                - `Regexp::Scanner::ScannerError`
         | 
| 20 | 
            +
                - `Regexp::Scanner::ValidationError`
         | 
| 21 | 
            +
                - `Regexp::Syntax::SyntaxError`
         | 
| 22 | 
            +
              * it replaces `ArgumentError` in some rare cases (`Regexp::Parser.parse('?')`)
         | 
| 23 | 
            +
              * thanks to [sandstrom](https://github.com/sandstrom) for the cue
         | 
| 24 | 
            +
             | 
| 25 | 
            +
            ### Fixed
         | 
| 26 | 
            +
             | 
| 27 | 
            +
            - fixed scanning of whole-pattern recursion calls `\g<0>` and `\g'0'`
         | 
| 28 | 
            +
              * a regression in v2.0.1 had caused them to be scanned as literals
         | 
| 29 | 
            +
            - fixed scanning of some backreference and subexpression call edge cases
         | 
| 30 | 
            +
              * e.g. `\k<+1>`, `\g<x-1>`
         | 
| 31 | 
            +
            - fixed tokenization of some escapes in character sets
         | 
| 32 | 
            +
              * `.`, `|`, `{`, `}`, `(`, `)`, `^`, `$`, `?`, `+`, `*`
         | 
| 33 | 
            +
              * all of these correctly emitted `#type` `:literal` and `#token` `:literal` if *not* escaped
         | 
| 34 | 
            +
              * if escaped, they emitted e.g. `#type` `:escape` and `#token` `:group_open` for `[\(]`
         | 
| 35 | 
            +
              * the escaped versions now correctly emit `#type` `:escape` and `#token` `:literal`
         | 
| 36 | 
            +
            - fixed handling of control/metacontrol escapes in character sets
         | 
| 37 | 
            +
              * e.g. `[\cX]`, `[\M-\C-X]`
         | 
| 38 | 
            +
              * they were misread as bunch of individual literals, escapes, and ranges
         | 
| 39 | 
            +
            - fixed some cases where calling `#dup`/`#clone` on expressions led to shared state
         | 
| 40 | 
            +
             | 
| 3 41 | 
             
            ## [2.0.3] - 2020-12-28 - [Janosch Müller](mailto:janosch84@gmail.com)
         | 
| 4 42 |  | 
| 5 43 | 
             
            ### Fixed
         | 
| 6 44 |  | 
| 7 45 | 
             
            - fixed error when scanning some unlikely and redundant but valid charset patterns
         | 
| 8 | 
            -
               | 
| 46 | 
            +
              * e.g. `/[[.a-b.]]/`, `/[[=e=]]/`,
         | 
| 9 47 | 
             
            - fixed ancestry of some error classes related to syntax version lookup
         | 
| 10 | 
            -
               | 
| 11 | 
            -
               | 
| 48 | 
            +
              * `NotImplementedError`, `InvalidVersionNameError`, `UnknownSyntaxNameError`
         | 
| 49 | 
            +
              * they now correctly inherit from `Regexp::Syntax::SyntaxError` instead of Rubys `::SyntaxError`
         | 
| 12 50 |  | 
| 13 51 | 
             
            ## [2.0.2] - 2020-12-25 - [Janosch Müller](mailto:janosch84@gmail.com)
         | 
| 14 52 |  | 
    
        data/Gemfile
    CHANGED
    
    
    
        data/README.md
    CHANGED
    
    | @@ -1,6 +1,6 @@ | |
| 1 1 | 
             
            # Regexp::Parser
         | 
| 2 2 |  | 
| 3 | 
            -
            [](http://badge.fury.io/rb/regexp_parser) [](https://github.com/ammar/regexp_parser/actions) [](https://codeclimate.com/github/ammar/regexp_parser/badges)
         | 
| 3 | 
            +
            [](http://badge.fury.io/rb/regexp_parser) [](https://github.com/ammar/regexp_parser/actions) [](https://github.com/ammar/regexp_parser/actions) [](https://codeclimate.com/github/ammar/regexp_parser/badges)
         | 
| 4 4 |  | 
| 5 5 | 
             
            A Ruby gem for tokenizing, parsing, and transforming regular expressions.
         | 
| 6 6 |  | 
    
        data/Rakefile
    CHANGED
    
    | @@ -7,8 +7,8 @@ require 'bundler' | |
| 7 7 | 
             
            require 'rubygems/package_task'
         | 
| 8 8 |  | 
| 9 9 |  | 
| 10 | 
            -
            RAGEL_SOURCE_DIR = File. | 
| 11 | 
            -
            RAGEL_OUTPUT_DIR = File. | 
| 10 | 
            +
            RAGEL_SOURCE_DIR = File.join(__dir__, 'lib/regexp_parser/scanner')
         | 
| 11 | 
            +
            RAGEL_OUTPUT_DIR = File.join(__dir__, 'lib/regexp_parser')
         | 
| 12 12 | 
             
            RAGEL_SOURCE_FILES = %w{scanner} # scanner.rl includes property.rl
         | 
| 13 13 |  | 
| 14 14 |  | 
| @@ -26,10 +26,10 @@ end | |
| 26 26 | 
             
            namespace :ragel do
         | 
| 27 27 | 
             
              desc "Process the ragel source files and output ruby code"
         | 
| 28 28 | 
             
              task :rb do
         | 
| 29 | 
            -
                RAGEL_SOURCE_FILES.each do | | 
| 30 | 
            -
                  output_file = "#{RAGEL_OUTPUT_DIR}/#{ | 
| 29 | 
            +
                RAGEL_SOURCE_FILES.each do |source_file|
         | 
| 30 | 
            +
                  output_file = "#{RAGEL_OUTPUT_DIR}/#{source_file}.rb"
         | 
| 31 31 | 
             
                  # using faster flat table driven FSM, about 25% larger code, but about 30% faster
         | 
| 32 | 
            -
                  sh "ragel -F1 -R #{RAGEL_SOURCE_DIR}/#{ | 
| 32 | 
            +
                  sh "ragel -F1 -R #{RAGEL_SOURCE_DIR}/#{source_file}.rl -o #{output_file}"
         | 
| 33 33 |  | 
| 34 34 | 
             
                  contents = File.read(output_file)
         | 
| 35 35 |  | 
| @@ -61,7 +61,7 @@ namespace :props do | |
| 61 61 | 
             
              task :update do
         | 
| 62 62 | 
             
                require 'regexp_property_values'
         | 
| 63 63 | 
             
                RegexpPropertyValues.update
         | 
| 64 | 
            -
                dir = File. | 
| 64 | 
            +
                dir = File.join(__dir__, 'lib/regexp_parser/scanner/properties')
         | 
| 65 65 |  | 
| 66 66 | 
             
                require 'psych'
         | 
| 67 67 | 
             
                write_hash_to_file = ->(hash, path) do
         | 
| @@ -1,5 +1,6 @@ | |
| 1 | 
            -
             | 
| 1 | 
            +
            require 'regexp_parser/error'
         | 
| 2 2 |  | 
| 3 | 
            +
            module Regexp::Expression
         | 
| 3 4 | 
             
              class Base
         | 
| 4 5 | 
             
                attr_accessor :type, :token
         | 
| 5 6 | 
             
                attr_accessor :text, :ts
         | 
| @@ -21,7 +22,7 @@ module Regexp::Expression | |
| 21 22 | 
             
                  self.options           = options
         | 
| 22 23 | 
             
                end
         | 
| 23 24 |  | 
| 24 | 
            -
                def  | 
| 25 | 
            +
                def initialize_copy(orig)
         | 
| 25 26 | 
             
                  self.text       = (orig.text       ? orig.text.dup         : nil)
         | 
| 26 27 | 
             
                  self.options    = (orig.options    ? orig.options.dup      : nil)
         | 
| 27 28 | 
             
                  self.quantifier = (orig.quantifier ? orig.quantifier.clone : nil)
         | 
| @@ -2,6 +2,11 @@ module Regexp::Expression | |
| 2 2 | 
             
              module Backreference
         | 
| 3 3 | 
             
                class Base < Regexp::Expression::Base
         | 
| 4 4 | 
             
                  attr_accessor :referenced_expression
         | 
| 5 | 
            +
             | 
| 6 | 
            +
                  def initialize_copy(orig)
         | 
| 7 | 
            +
                    self.referenced_expression = orig.referenced_expression.dup
         | 
| 8 | 
            +
                    super
         | 
| 9 | 
            +
                  end
         | 
| 5 10 | 
             
                end
         | 
| 6 11 |  | 
| 7 12 | 
             
                class Number < Backreference::Base
         | 
| @@ -1,6 +1,6 @@ | |
| 1 1 | 
             
            module Regexp::Expression
         | 
| 2 2 | 
             
              module Conditional
         | 
| 3 | 
            -
                class TooManyBranches <  | 
| 3 | 
            +
                class TooManyBranches < Regexp::Parser::Error
         | 
| 4 4 | 
             
                  def initialize
         | 
| 5 5 | 
             
                    super('The conditional expression has more than 2 branches')
         | 
| 6 6 | 
             
                  end
         | 
| @@ -15,6 +15,11 @@ module Regexp::Expression | |
| 15 15 | 
             
                    ref = text.tr("'<>()", "")
         | 
| 16 16 | 
             
                    ref =~ /\D/ ? ref : Integer(ref)
         | 
| 17 17 | 
             
                  end
         | 
| 18 | 
            +
             | 
| 19 | 
            +
                  def initialize_copy(orig)
         | 
| 20 | 
            +
                    self.referenced_expression = orig.referenced_expression.dup
         | 
| 21 | 
            +
                    super
         | 
| 22 | 
            +
                  end
         | 
| 18 23 | 
             
                end
         | 
| 19 24 |  | 
| 20 25 | 
             
                class Branch < Regexp::Expression::Sequence; end
         | 
| @@ -53,6 +58,11 @@ module Regexp::Expression | |
| 53 58 | 
             
                  def to_s(format = :full)
         | 
| 54 59 | 
             
                    "#{text}#{condition}#{branches.join('|')})#{quantifier_affix(format)}"
         | 
| 55 60 | 
             
                  end
         | 
| 61 | 
            +
             | 
| 62 | 
            +
                  def initialize_copy(orig)
         | 
| 63 | 
            +
                    self.referenced_expression = orig.referenced_expression.dup
         | 
| 64 | 
            +
                    super
         | 
| 65 | 
            +
                  end
         | 
| 56 66 | 
             
                end
         | 
| 57 67 | 
             
              end
         | 
| 58 68 | 
             
            end
         | 
| @@ -2,7 +2,7 @@ module Regexp::Expression | |
| 2 2 |  | 
| 3 3 | 
             
              class FreeSpace < Regexp::Expression::Base
         | 
| 4 4 | 
             
                def quantify(_token, _text, _min = nil, _max = nil, _mode = :greedy)
         | 
| 5 | 
            -
                  raise  | 
| 5 | 
            +
                  raise Regexp::Parser::Error, 'Can not quantify a free space object'
         | 
| 6 6 | 
             
                end
         | 
| 7 7 | 
             
              end
         | 
| 8 8 |  | 
| @@ -35,6 +35,11 @@ module Regexp::Expression | |
| 35 35 | 
             
                class Atomic  < Group::Base; end
         | 
| 36 36 | 
             
                class Options < Group::Base
         | 
| 37 37 | 
             
                  attr_accessor :option_changes
         | 
| 38 | 
            +
             | 
| 39 | 
            +
                  def initialize_copy(orig)
         | 
| 40 | 
            +
                    self.option_changes = orig.option_changes.dup
         | 
| 41 | 
            +
                    super
         | 
| 42 | 
            +
                  end
         | 
| 38 43 | 
             
                end
         | 
| 39 44 |  | 
| 40 45 | 
             
                class Capture < Group::Base
         | 
| @@ -53,7 +58,7 @@ module Regexp::Expression | |
| 53 58 | 
             
                    super
         | 
| 54 59 | 
             
                  end
         | 
| 55 60 |  | 
| 56 | 
            -
                  def  | 
| 61 | 
            +
                  def initialize_copy(orig)
         | 
| 57 62 | 
             
                    @name = orig.name.dup
         | 
| 58 63 | 
             
                    super
         | 
| 59 64 | 
             
                  end
         | 
| @@ -41,17 +41,11 @@ module Regexp::Expression | |
| 41 41 | 
             
                alias :ts :starts_at
         | 
| 42 42 |  | 
| 43 43 | 
             
                def quantify(token, text, min = nil, max = nil, mode = :greedy)
         | 
| 44 | 
            -
                   | 
| 45 | 
            -
                  target  | 
| 46 | 
            -
             | 
| 47 | 
            -
                    target = expressions[offset -= 1]
         | 
| 48 | 
            -
                  end
         | 
| 49 | 
            -
             | 
| 50 | 
            -
                  target || raise(ArgumentError, "No valid target found for '#{text}' "\
         | 
| 51 | 
            -
                                                 'quantifier')
         | 
| 44 | 
            +
                  target = expressions.reverse.find { |exp| !exp.is_a?(FreeSpace) }
         | 
| 45 | 
            +
                  target or raise Regexp::Parser::Error,
         | 
| 46 | 
            +
                    "No valid target found for '#{text}' quantifier"
         | 
| 52 47 |  | 
| 53 48 | 
             
                  target.quantify(token, text, min, max, mode)
         | 
| 54 49 | 
             
                end
         | 
| 55 50 | 
             
              end
         | 
| 56 | 
            -
             | 
| 57 51 | 
             
            end
         | 
    
        data/lib/regexp_parser/parser.rb
    CHANGED
    
    | @@ -1,10 +1,10 @@ | |
| 1 | 
            +
            require 'regexp_parser/error'
         | 
| 1 2 | 
             
            require 'regexp_parser/expression'
         | 
| 2 3 |  | 
| 3 4 | 
             
            class Regexp::Parser
         | 
| 4 5 | 
             
              include Regexp::Expression
         | 
| 5 | 
            -
              include Regexp::Syntax
         | 
| 6 6 |  | 
| 7 | 
            -
              class ParserError <  | 
| 7 | 
            +
              class ParserError < Regexp::Parser::Error; end
         | 
| 8 8 |  | 
| 9 9 | 
             
              class UnknownTokenTypeError < ParserError
         | 
| 10 10 | 
             
                def initialize(type, token)
         | 
| @@ -70,93 +70,155 @@ class Regexp::Parser | |
| 70 70 | 
             
                enabled_options
         | 
| 71 71 | 
             
              end
         | 
| 72 72 |  | 
| 73 | 
            -
              def  | 
| 74 | 
            -
                 | 
| 75 | 
            -
                 | 
| 76 | 
            -
                 | 
| 77 | 
            -
                 | 
| 78 | 
            -
             | 
| 73 | 
            +
              def parse_token(token)
         | 
| 74 | 
            +
                case token.type
         | 
| 75 | 
            +
                when :anchor;                     anchor(token)
         | 
| 76 | 
            +
                when :assertion, :group;          group(token)
         | 
| 77 | 
            +
                when :backref;                    backref(token)
         | 
| 78 | 
            +
                when :conditional;                conditional(token)
         | 
| 79 | 
            +
                when :escape;                     escape(token)
         | 
| 80 | 
            +
                when :free_space;                 free_space(token)
         | 
| 81 | 
            +
                when :keep;                       keep(token)
         | 
| 82 | 
            +
                when :literal;                    literal(token)
         | 
| 83 | 
            +
                when :meta;                       meta(token)
         | 
| 84 | 
            +
                when :posixclass, :nonposixclass; posixclass(token)
         | 
| 85 | 
            +
                when :property, :nonproperty;     property(token)
         | 
| 86 | 
            +
                when :quantifier;                 quantifier(token)
         | 
| 87 | 
            +
                when :set;                        set(token)
         | 
| 88 | 
            +
                when :type;                       type(token)
         | 
| 89 | 
            +
                else
         | 
| 90 | 
            +
                  raise UnknownTokenTypeError.new(token.type, token)
         | 
| 91 | 
            +
                end
         | 
| 79 92 |  | 
| 80 | 
            -
             | 
| 81 | 
            -
              def update_transplanted_subtree(exp, new_parent)
         | 
| 82 | 
            -
                exp.nesting_level = new_parent.nesting_level + 1
         | 
| 83 | 
            -
                exp.respond_to?(:each) &&
         | 
| 84 | 
            -
                  exp.each { |subexp| update_transplanted_subtree(subexp, exp) }
         | 
| 93 | 
            +
                close_completed_character_set_range
         | 
| 85 94 | 
             
              end
         | 
| 86 95 |  | 
| 87 | 
            -
              def  | 
| 88 | 
            -
                 | 
| 89 | 
            -
             | 
| 90 | 
            -
             | 
| 96 | 
            +
              def anchor(token)
         | 
| 97 | 
            +
                case token.token
         | 
| 98 | 
            +
                when :bol;              node << Anchor::BeginningOfLine.new(token, active_opts)
         | 
| 99 | 
            +
                when :bos;              node << Anchor::BOS.new(token, active_opts)
         | 
| 100 | 
            +
                when :eol;              node << Anchor::EndOfLine.new(token, active_opts)
         | 
| 101 | 
            +
                when :eos;              node << Anchor::EOS.new(token, active_opts)
         | 
| 102 | 
            +
                when :eos_ob_eol;       node << Anchor::EOSobEOL.new(token, active_opts)
         | 
| 103 | 
            +
                when :match_start;      node << Anchor::MatchStart.new(token, active_opts)
         | 
| 104 | 
            +
                when :nonword_boundary; node << Anchor::NonWordBoundary.new(token, active_opts)
         | 
| 105 | 
            +
                when :word_boundary;    node << Anchor::WordBoundary.new(token, active_opts)
         | 
| 106 | 
            +
                else
         | 
| 107 | 
            +
                  raise UnknownTokenError.new('Anchor', token)
         | 
| 91 108 | 
             
                end
         | 
| 92 | 
            -
                nesting.pop
         | 
| 93 | 
            -
                yield(node) if block_given?
         | 
| 94 | 
            -
                self.node = nesting.last
         | 
| 95 | 
            -
                self.node = node.last if node.last.is_a?(SequenceOperation)
         | 
| 96 109 | 
             
              end
         | 
| 97 110 |  | 
| 98 | 
            -
              def  | 
| 99 | 
            -
                 | 
| 100 | 
            -
                 | 
| 111 | 
            +
              def group(token)
         | 
| 112 | 
            +
                case token.token
         | 
| 113 | 
            +
                when :options, :options_switch
         | 
| 114 | 
            +
                  options_group(token)
         | 
| 115 | 
            +
                when :close
         | 
| 116 | 
            +
                  close_group
         | 
| 117 | 
            +
                when :comment
         | 
| 118 | 
            +
                  node << Group::Comment.new(token, active_opts)
         | 
| 119 | 
            +
                else
         | 
| 120 | 
            +
                  open_group(token)
         | 
| 121 | 
            +
                end
         | 
| 101 122 | 
             
              end
         | 
| 102 123 |  | 
| 103 | 
            -
               | 
| 104 | 
            -
             | 
| 124 | 
            +
              MOD_FLAGS = %w[i m x].map(&:to_sym)
         | 
| 125 | 
            +
              ENC_FLAGS = %w[a d u].map(&:to_sym)
         | 
| 105 126 |  | 
| 106 | 
            -
             | 
| 107 | 
            -
                 | 
| 108 | 
            -
                 | 
| 109 | 
            -
                 | 
| 110 | 
            -
                when :escape;       escape(token)
         | 
| 111 | 
            -
                when :group;        group(token)
         | 
| 112 | 
            -
                when :assertion;    group(token)
         | 
| 113 | 
            -
                when :set;          set(token)
         | 
| 114 | 
            -
                when :type;         type(token)
         | 
| 115 | 
            -
                when :backref;      backref(token)
         | 
| 116 | 
            -
                when :conditional;  conditional(token)
         | 
| 117 | 
            -
                when :keep;         keep(token)
         | 
| 118 | 
            -
             | 
| 119 | 
            -
                when :posixclass, :nonposixclass
         | 
| 120 | 
            -
                  posixclass(token)
         | 
| 121 | 
            -
                when :property, :nonproperty
         | 
| 122 | 
            -
                  property(token)
         | 
| 123 | 
            -
             | 
| 124 | 
            -
                when :literal
         | 
| 125 | 
            -
                  node << Literal.new(token, active_opts)
         | 
| 126 | 
            -
                when :free_space
         | 
| 127 | 
            -
                  free_space(token)
         | 
| 127 | 
            +
              def options_group(token)
         | 
| 128 | 
            +
                positive, negative = token.text.split('-', 2)
         | 
| 129 | 
            +
                negative ||= ''
         | 
| 130 | 
            +
                self.switching_options = token.token.equal?(:options_switch)
         | 
| 128 131 |  | 
| 129 | 
            -
                 | 
| 130 | 
            -
             | 
| 132 | 
            +
                opt_changes = {}
         | 
| 133 | 
            +
                new_active_opts = active_opts.dup
         | 
| 134 | 
            +
             | 
| 135 | 
            +
                MOD_FLAGS.each do |flag|
         | 
| 136 | 
            +
                  if positive.include?(flag.to_s)
         | 
| 137 | 
            +
                    opt_changes[flag] = new_active_opts[flag] = true
         | 
| 138 | 
            +
                  end
         | 
| 139 | 
            +
                  if negative.include?(flag.to_s)
         | 
| 140 | 
            +
                    opt_changes[flag] = false
         | 
| 141 | 
            +
                    new_active_opts.delete(flag)
         | 
| 142 | 
            +
                  end
         | 
| 143 | 
            +
                end
         | 
| 144 | 
            +
             | 
| 145 | 
            +
                if (enc_flag = positive.reverse[/[adu]/])
         | 
| 146 | 
            +
                  enc_flag = enc_flag.to_sym
         | 
| 147 | 
            +
                  (ENC_FLAGS - [enc_flag]).each do |other|
         | 
| 148 | 
            +
                    opt_changes[other] = false if new_active_opts[other]
         | 
| 149 | 
            +
                    new_active_opts.delete(other)
         | 
| 150 | 
            +
                  end
         | 
| 151 | 
            +
                  opt_changes[enc_flag] = new_active_opts[enc_flag] = true
         | 
| 131 152 | 
             
                end
         | 
| 153 | 
            +
             | 
| 154 | 
            +
                options_stack << new_active_opts
         | 
| 155 | 
            +
             | 
| 156 | 
            +
                options_group = Group::Options.new(token, active_opts)
         | 
| 157 | 
            +
                options_group.option_changes = opt_changes
         | 
| 158 | 
            +
             | 
| 159 | 
            +
                nest(options_group)
         | 
| 132 160 | 
             
              end
         | 
| 133 161 |  | 
| 134 | 
            -
              def  | 
| 135 | 
            -
                 | 
| 136 | 
            -
             | 
| 137 | 
            -
                   | 
| 138 | 
            -
             | 
| 139 | 
            -
                   | 
| 140 | 
            -
             | 
| 141 | 
            -
                   | 
| 142 | 
            -
             | 
| 143 | 
            -
                   | 
| 144 | 
            -
             | 
| 145 | 
            -
                   | 
| 146 | 
            -
             | 
| 147 | 
            -
             | 
| 162 | 
            +
              def open_group(token)
         | 
| 163 | 
            +
                group_class =
         | 
| 164 | 
            +
                  case token.token
         | 
| 165 | 
            +
                  when :absence;     Group::Absence
         | 
| 166 | 
            +
                  when :atomic;      Group::Atomic
         | 
| 167 | 
            +
                  when :capture;     Group::Capture
         | 
| 168 | 
            +
                  when :named;       Group::Named
         | 
| 169 | 
            +
                  when :passive;     Group::Passive
         | 
| 170 | 
            +
             | 
| 171 | 
            +
                  when :lookahead;   Assertion::Lookahead
         | 
| 172 | 
            +
                  when :lookbehind;  Assertion::Lookbehind
         | 
| 173 | 
            +
                  when :nlookahead;  Assertion::NegativeLookahead
         | 
| 174 | 
            +
                  when :nlookbehind; Assertion::NegativeLookbehind
         | 
| 175 | 
            +
             | 
| 176 | 
            +
                  else
         | 
| 177 | 
            +
                    raise UnknownTokenError.new('Group type open', token)
         | 
| 178 | 
            +
                  end
         | 
| 179 | 
            +
             | 
| 180 | 
            +
                group = group_class.new(token, active_opts)
         | 
| 181 | 
            +
             | 
| 182 | 
            +
                if group.capturing?
         | 
| 183 | 
            +
                  group.number          = total_captured_group_count + 1
         | 
| 184 | 
            +
                  group.number_at_level = captured_group_count_at_level + 1
         | 
| 185 | 
            +
                  count_captured_group
         | 
| 148 186 | 
             
                end
         | 
| 187 | 
            +
             | 
| 188 | 
            +
                # Push the active options to the stack again. This way we can simply pop the
         | 
| 189 | 
            +
                # stack for any group we close, no matter if it had its own options or not.
         | 
| 190 | 
            +
                options_stack << active_opts
         | 
| 191 | 
            +
             | 
| 192 | 
            +
                nest(group)
         | 
| 149 193 | 
             
              end
         | 
| 150 194 |  | 
| 151 | 
            -
              def  | 
| 152 | 
            -
                 | 
| 153 | 
            -
             | 
| 154 | 
            -
             | 
| 155 | 
            -
             | 
| 156 | 
            -
             | 
| 157 | 
            -
             | 
| 158 | 
            -
             | 
| 195 | 
            +
              def total_captured_group_count
         | 
| 196 | 
            +
                captured_group_counts.values.reduce(0, :+)
         | 
| 197 | 
            +
              end
         | 
| 198 | 
            +
             | 
| 199 | 
            +
              def captured_group_count_at_level
         | 
| 200 | 
            +
                captured_group_counts[node.level]
         | 
| 201 | 
            +
              end
         | 
| 202 | 
            +
             | 
| 203 | 
            +
              def count_captured_group
         | 
| 204 | 
            +
                captured_group_counts[node.level] += 1
         | 
| 205 | 
            +
              end
         | 
| 206 | 
            +
             | 
| 207 | 
            +
              def close_group
         | 
| 208 | 
            +
                options_stack.pop unless switching_options
         | 
| 209 | 
            +
                self.switching_options = false
         | 
| 210 | 
            +
                decrease_nesting
         | 
| 211 | 
            +
              end
         | 
| 212 | 
            +
             | 
| 213 | 
            +
              def decrease_nesting
         | 
| 214 | 
            +
                while nesting.last.is_a?(SequenceOperation)
         | 
| 215 | 
            +
                  nesting.pop
         | 
| 216 | 
            +
                  self.node = nesting.last
         | 
| 159 217 | 
             
                end
         | 
| 218 | 
            +
                nesting.pop
         | 
| 219 | 
            +
                yield(node) if block_given?
         | 
| 220 | 
            +
                self.node = nesting.last
         | 
| 221 | 
            +
                self.node = node.last if node.last.is_a?(SequenceOperation)
         | 
| 160 222 | 
             
              end
         | 
| 161 223 |  | 
| 162 224 | 
             
              def backref(token)
         | 
| @@ -186,31 +248,9 @@ class Regexp::Parser | |
| 186 248 | 
             
                end
         | 
| 187 249 | 
             
              end
         | 
| 188 250 |  | 
| 189 | 
            -
              def  | 
| 190 | 
            -
                 | 
| 191 | 
            -
             | 
| 192 | 
            -
                  node << CharacterType::Digit.new(token, active_opts)
         | 
| 193 | 
            -
                when :nondigit
         | 
| 194 | 
            -
                  node << CharacterType::NonDigit.new(token, active_opts)
         | 
| 195 | 
            -
                when :hex
         | 
| 196 | 
            -
                  node << CharacterType::Hex.new(token, active_opts)
         | 
| 197 | 
            -
                when :nonhex
         | 
| 198 | 
            -
                  node << CharacterType::NonHex.new(token, active_opts)
         | 
| 199 | 
            -
                when :space
         | 
| 200 | 
            -
                  node << CharacterType::Space.new(token, active_opts)
         | 
| 201 | 
            -
                when :nonspace
         | 
| 202 | 
            -
                  node << CharacterType::NonSpace.new(token, active_opts)
         | 
| 203 | 
            -
                when :word
         | 
| 204 | 
            -
                  node << CharacterType::Word.new(token, active_opts)
         | 
| 205 | 
            -
                when :nonword
         | 
| 206 | 
            -
                  node << CharacterType::NonWord.new(token, active_opts)
         | 
| 207 | 
            -
                when :linebreak
         | 
| 208 | 
            -
                  node << CharacterType::Linebreak.new(token, active_opts)
         | 
| 209 | 
            -
                when :xgrapheme
         | 
| 210 | 
            -
                  node << CharacterType::ExtendedGrapheme.new(token, active_opts)
         | 
| 211 | 
            -
                else
         | 
| 212 | 
            -
                  raise UnknownTokenError.new('CharacterType', token)
         | 
| 213 | 
            -
                end
         | 
| 251 | 
            +
              def assign_effective_number(exp)
         | 
| 252 | 
            +
                exp.effective_number =
         | 
| 253 | 
            +
                  exp.number + total_captured_group_count + (exp.number < 0 ? 1 : 0)
         | 
| 214 254 | 
             
              end
         | 
| 215 255 |  | 
| 216 256 | 
             
              def conditional(token)
         | 
| @@ -238,11 +278,118 @@ class Regexp::Parser | |
| 238 278 | 
             
                end
         | 
| 239 279 | 
             
              end
         | 
| 240 280 |  | 
| 281 | 
            +
              def nest_conditional(exp)
         | 
| 282 | 
            +
                conditional_nesting.push(exp)
         | 
| 283 | 
            +
                nest(exp)
         | 
| 284 | 
            +
              end
         | 
| 285 | 
            +
             | 
| 286 | 
            +
              def nest(exp)
         | 
| 287 | 
            +
                nesting.push(exp)
         | 
| 288 | 
            +
                node << exp
         | 
| 289 | 
            +
                update_transplanted_subtree(exp, node)
         | 
| 290 | 
            +
                self.node = exp
         | 
| 291 | 
            +
              end
         | 
| 292 | 
            +
             | 
| 293 | 
            +
              # subtrees are transplanted to build Alternations, Intersections, Ranges
         | 
| 294 | 
            +
              def update_transplanted_subtree(exp, new_parent)
         | 
| 295 | 
            +
                exp.nesting_level = new_parent.nesting_level + 1
         | 
| 296 | 
            +
                exp.respond_to?(:each) &&
         | 
| 297 | 
            +
                  exp.each { |subexp| update_transplanted_subtree(subexp, exp) }
         | 
| 298 | 
            +
              end
         | 
| 299 | 
            +
             | 
| 300 | 
            +
              def escape(token)
         | 
| 301 | 
            +
                case token.token
         | 
| 302 | 
            +
             | 
| 303 | 
            +
                when :backspace;      node << EscapeSequence::Backspace.new(token, active_opts)
         | 
| 304 | 
            +
             | 
| 305 | 
            +
                when :escape;         node << EscapeSequence::AsciiEscape.new(token, active_opts)
         | 
| 306 | 
            +
                when :bell;           node << EscapeSequence::Bell.new(token, active_opts)
         | 
| 307 | 
            +
                when :form_feed;      node << EscapeSequence::FormFeed.new(token, active_opts)
         | 
| 308 | 
            +
                when :newline;        node << EscapeSequence::Newline.new(token, active_opts)
         | 
| 309 | 
            +
                when :carriage;       node << EscapeSequence::Return.new(token, active_opts)
         | 
| 310 | 
            +
                when :tab;            node << EscapeSequence::Tab.new(token, active_opts)
         | 
| 311 | 
            +
                when :vertical_tab;   node << EscapeSequence::VerticalTab.new(token, active_opts)
         | 
| 312 | 
            +
             | 
| 313 | 
            +
                when :codepoint;      node << EscapeSequence::Codepoint.new(token, active_opts)
         | 
| 314 | 
            +
                when :codepoint_list; node << EscapeSequence::CodepointList.new(token, active_opts)
         | 
| 315 | 
            +
                when :hex;            node << EscapeSequence::Hex.new(token, active_opts)
         | 
| 316 | 
            +
                when :octal;          node << EscapeSequence::Octal.new(token, active_opts)
         | 
| 317 | 
            +
             | 
| 318 | 
            +
                when :control
         | 
| 319 | 
            +
                  if token.text =~ /\A(?:\\C-\\M|\\c\\M)/
         | 
| 320 | 
            +
                    node << EscapeSequence::MetaControl.new(token, active_opts)
         | 
| 321 | 
            +
                  else
         | 
| 322 | 
            +
                    node << EscapeSequence::Control.new(token, active_opts)
         | 
| 323 | 
            +
                  end
         | 
| 324 | 
            +
             | 
| 325 | 
            +
                when :meta_sequence
         | 
| 326 | 
            +
                  if token.text =~ /\A\\M-\\[Cc]/
         | 
| 327 | 
            +
                    node << EscapeSequence::MetaControl.new(token, active_opts)
         | 
| 328 | 
            +
                  else
         | 
| 329 | 
            +
                    node << EscapeSequence::Meta.new(token, active_opts)
         | 
| 330 | 
            +
                  end
         | 
| 331 | 
            +
             | 
| 332 | 
            +
                else
         | 
| 333 | 
            +
                  # treating everything else as a literal
         | 
| 334 | 
            +
                  # TODO: maybe split this up a bit more in v3.0.0?
         | 
| 335 | 
            +
                  # E.g. escaped quantifiers or set meta chars are not the same
         | 
| 336 | 
            +
                  # as stuff that would be a literal even without the backslash.
         | 
| 337 | 
            +
                  # Right now, they all end up here.
         | 
| 338 | 
            +
                  node << EscapeSequence::Literal.new(token, active_opts)
         | 
| 339 | 
            +
                end
         | 
| 340 | 
            +
              end
         | 
| 341 | 
            +
             | 
| 342 | 
            +
              def free_space(token)
         | 
| 343 | 
            +
                case token.token
         | 
| 344 | 
            +
                when :comment
         | 
| 345 | 
            +
                  node << Comment.new(token, active_opts)
         | 
| 346 | 
            +
                when :whitespace
         | 
| 347 | 
            +
                  if node.last.is_a?(WhiteSpace)
         | 
| 348 | 
            +
                    node.last.merge(WhiteSpace.new(token, active_opts))
         | 
| 349 | 
            +
                  else
         | 
| 350 | 
            +
                    node << WhiteSpace.new(token, active_opts)
         | 
| 351 | 
            +
                  end
         | 
| 352 | 
            +
                else
         | 
| 353 | 
            +
                  raise UnknownTokenError.new('FreeSpace', token)
         | 
| 354 | 
            +
                end
         | 
| 355 | 
            +
              end
         | 
| 356 | 
            +
             | 
| 357 | 
            +
              def keep(token)
         | 
| 358 | 
            +
                node << Keep::Mark.new(token, active_opts)
         | 
| 359 | 
            +
              end
         | 
| 360 | 
            +
             | 
| 361 | 
            +
              def literal(token)
         | 
| 362 | 
            +
                node << Literal.new(token, active_opts)
         | 
| 363 | 
            +
              end
         | 
| 364 | 
            +
             | 
| 365 | 
            +
              def meta(token)
         | 
| 366 | 
            +
                case token.token
         | 
| 367 | 
            +
                when :dot
         | 
| 368 | 
            +
                  node << CharacterType::Any.new(token, active_opts)
         | 
| 369 | 
            +
                when :alternation
         | 
| 370 | 
            +
                  sequence_operation(Alternation, token)
         | 
| 371 | 
            +
                else
         | 
| 372 | 
            +
                  raise UnknownTokenError.new('Meta', token)
         | 
| 373 | 
            +
                end
         | 
| 374 | 
            +
              end
         | 
| 375 | 
            +
             | 
| 376 | 
            +
              def sequence_operation(klass, token)
         | 
| 377 | 
            +
                unless node.is_a?(klass)
         | 
| 378 | 
            +
                  operator = klass.new(token, active_opts)
         | 
| 379 | 
            +
                  sequence = operator.add_sequence(active_opts)
         | 
| 380 | 
            +
                  sequence.expressions = node.expressions
         | 
| 381 | 
            +
                  node.expressions = []
         | 
| 382 | 
            +
                  nest(operator)
         | 
| 383 | 
            +
                end
         | 
| 384 | 
            +
                node.add_sequence(active_opts)
         | 
| 385 | 
            +
              end
         | 
| 386 | 
            +
             | 
| 241 387 | 
             
              def posixclass(token)
         | 
| 242 388 | 
             
                node << PosixClass.new(token, active_opts)
         | 
| 243 389 | 
             
              end
         | 
| 244 390 |  | 
| 245 391 | 
             
              include Regexp::Expression::UnicodeProperty
         | 
| 392 | 
            +
              UPTokens = Regexp::Syntax::Token::UnicodeProperty
         | 
| 246 393 |  | 
| 247 394 | 
             
              def property(token)
         | 
| 248 395 | 
             
                case token.token
         | 
| @@ -314,127 +461,20 @@ class Regexp::Parser | |
| 314 461 | 
             
                when :private_use;            node << Codepoint::PrivateUse.new(token, active_opts)
         | 
| 315 462 | 
             
                when :unassigned;             node << Codepoint::Unassigned.new(token, active_opts)
         | 
| 316 463 |  | 
| 317 | 
            -
                when * | 
| 318 | 
            -
                  node <<  | 
| 319 | 
            -
             | 
| 320 | 
            -
                when * | 
| 321 | 
            -
             | 
| 322 | 
            -
             | 
| 323 | 
            -
                when *Token::UnicodeProperty::Emoji
         | 
| 324 | 
            -
                  node << Emoji.new(token, active_opts)
         | 
| 325 | 
            -
             | 
| 326 | 
            -
                when *Token::UnicodeProperty::Script
         | 
| 327 | 
            -
                  node << Script.new(token, active_opts)
         | 
| 328 | 
            -
             | 
| 329 | 
            -
                when *Token::UnicodeProperty::UnicodeBlock
         | 
| 330 | 
            -
                  node << Block.new(token, active_opts)
         | 
| 464 | 
            +
                when *UPTokens::Age;          node << Age.new(token, active_opts)
         | 
| 465 | 
            +
                when *UPTokens::Derived;      node << Derived.new(token, active_opts)
         | 
| 466 | 
            +
                when *UPTokens::Emoji;        node << Emoji.new(token, active_opts)
         | 
| 467 | 
            +
                when *UPTokens::Script;       node << Script.new(token, active_opts)
         | 
| 468 | 
            +
                when *UPTokens::UnicodeBlock; node << Block.new(token, active_opts)
         | 
| 331 469 |  | 
| 332 470 | 
             
                else
         | 
| 333 471 | 
             
                  raise UnknownTokenError.new('UnicodeProperty', token)
         | 
| 334 472 | 
             
                end
         | 
| 335 473 | 
             
              end
         | 
| 336 474 |  | 
| 337 | 
            -
              def anchor(token)
         | 
| 338 | 
            -
                case token.token
         | 
| 339 | 
            -
                when :bol
         | 
| 340 | 
            -
                  node << Anchor::BeginningOfLine.new(token, active_opts)
         | 
| 341 | 
            -
                when :eol
         | 
| 342 | 
            -
                  node << Anchor::EndOfLine.new(token, active_opts)
         | 
| 343 | 
            -
                when :bos
         | 
| 344 | 
            -
                  node << Anchor::BOS.new(token, active_opts)
         | 
| 345 | 
            -
                when :eos
         | 
| 346 | 
            -
                  node << Anchor::EOS.new(token, active_opts)
         | 
| 347 | 
            -
                when :eos_ob_eol
         | 
| 348 | 
            -
                  node << Anchor::EOSobEOL.new(token, active_opts)
         | 
| 349 | 
            -
                when :word_boundary
         | 
| 350 | 
            -
                  node << Anchor::WordBoundary.new(token, active_opts)
         | 
| 351 | 
            -
                when :nonword_boundary
         | 
| 352 | 
            -
                  node << Anchor::NonWordBoundary.new(token, active_opts)
         | 
| 353 | 
            -
                when :match_start
         | 
| 354 | 
            -
                  node << Anchor::MatchStart.new(token, active_opts)
         | 
| 355 | 
            -
                else
         | 
| 356 | 
            -
                  raise UnknownTokenError.new('Anchor', token)
         | 
| 357 | 
            -
                end
         | 
| 358 | 
            -
              end
         | 
| 359 | 
            -
             | 
| 360 | 
            -
              def escape(token)
         | 
| 361 | 
            -
                case token.token
         | 
| 362 | 
            -
             | 
| 363 | 
            -
                when :backspace
         | 
| 364 | 
            -
                  node << EscapeSequence::Backspace.new(token, active_opts)
         | 
| 365 | 
            -
             | 
| 366 | 
            -
                when :escape
         | 
| 367 | 
            -
                  node << EscapeSequence::AsciiEscape.new(token, active_opts)
         | 
| 368 | 
            -
                when :bell
         | 
| 369 | 
            -
                  node << EscapeSequence::Bell.new(token, active_opts)
         | 
| 370 | 
            -
                when :form_feed
         | 
| 371 | 
            -
                  node << EscapeSequence::FormFeed.new(token, active_opts)
         | 
| 372 | 
            -
                when :newline
         | 
| 373 | 
            -
                  node << EscapeSequence::Newline.new(token, active_opts)
         | 
| 374 | 
            -
                when :carriage
         | 
| 375 | 
            -
                  node << EscapeSequence::Return.new(token, active_opts)
         | 
| 376 | 
            -
                when :tab
         | 
| 377 | 
            -
                  node << EscapeSequence::Tab.new(token, active_opts)
         | 
| 378 | 
            -
                when :vertical_tab
         | 
| 379 | 
            -
                  node << EscapeSequence::VerticalTab.new(token, active_opts)
         | 
| 380 | 
            -
             | 
| 381 | 
            -
                when :hex
         | 
| 382 | 
            -
                  node << EscapeSequence::Hex.new(token, active_opts)
         | 
| 383 | 
            -
                when :octal
         | 
| 384 | 
            -
                  node << EscapeSequence::Octal.new(token, active_opts)
         | 
| 385 | 
            -
                when :codepoint
         | 
| 386 | 
            -
                  node << EscapeSequence::Codepoint.new(token, active_opts)
         | 
| 387 | 
            -
                when :codepoint_list
         | 
| 388 | 
            -
                  node << EscapeSequence::CodepointList.new(token, active_opts)
         | 
| 389 | 
            -
             | 
| 390 | 
            -
                when :control
         | 
| 391 | 
            -
                  if token.text =~ /\A(?:\\C-\\M|\\c\\M)/
         | 
| 392 | 
            -
                    node << EscapeSequence::MetaControl.new(token, active_opts)
         | 
| 393 | 
            -
                  else
         | 
| 394 | 
            -
                    node << EscapeSequence::Control.new(token, active_opts)
         | 
| 395 | 
            -
                  end
         | 
| 396 | 
            -
             | 
| 397 | 
            -
                when :meta_sequence
         | 
| 398 | 
            -
                  if token.text =~ /\A\\M-\\[Cc]/
         | 
| 399 | 
            -
                    node << EscapeSequence::MetaControl.new(token, active_opts)
         | 
| 400 | 
            -
                  else
         | 
| 401 | 
            -
                    node << EscapeSequence::Meta.new(token, active_opts)
         | 
| 402 | 
            -
                  end
         | 
| 403 | 
            -
             | 
| 404 | 
            -
                else
         | 
| 405 | 
            -
                  # treating everything else as a literal
         | 
| 406 | 
            -
                  node << EscapeSequence::Literal.new(token, active_opts)
         | 
| 407 | 
            -
                end
         | 
| 408 | 
            -
              end
         | 
| 409 | 
            -
             | 
| 410 | 
            -
              def keep(token)
         | 
| 411 | 
            -
                node << Keep::Mark.new(token, active_opts)
         | 
| 412 | 
            -
              end
         | 
| 413 | 
            -
             | 
| 414 | 
            -
              def free_space(token)
         | 
| 415 | 
            -
                case token.token
         | 
| 416 | 
            -
                when :comment
         | 
| 417 | 
            -
                  node << Comment.new(token, active_opts)
         | 
| 418 | 
            -
                when :whitespace
         | 
| 419 | 
            -
                  if node.last.is_a?(WhiteSpace)
         | 
| 420 | 
            -
                    node.last.merge(WhiteSpace.new(token, active_opts))
         | 
| 421 | 
            -
                  else
         | 
| 422 | 
            -
                    node << WhiteSpace.new(token, active_opts)
         | 
| 423 | 
            -
                  end
         | 
| 424 | 
            -
                else
         | 
| 425 | 
            -
                  raise UnknownTokenError.new('FreeSpace', token)
         | 
| 426 | 
            -
                end
         | 
| 427 | 
            -
              end
         | 
| 428 | 
            -
             | 
| 429 475 | 
             
              def quantifier(token)
         | 
| 430 | 
            -
                 | 
| 431 | 
            -
                target_node  | 
| 432 | 
            -
                while target_node.is_a?(FreeSpace)
         | 
| 433 | 
            -
                  target_node = node.expressions[offset -= 1]
         | 
| 434 | 
            -
                end
         | 
| 435 | 
            -
             | 
| 436 | 
            -
                target_node || raise(ArgumentError, 'No valid target found for '\
         | 
| 437 | 
            -
                                                    "'#{token.text}' ")
         | 
| 476 | 
            +
                target_node = node.expressions.reverse.find { |exp| !exp.is_a?(FreeSpace) }
         | 
| 477 | 
            +
                target_node or raise ParserError, "No valid target found for '#{token.text}'"
         | 
| 438 478 |  | 
| 439 479 | 
             
                # in case of chained quantifiers, wrap target in an implicit passive group
         | 
| 440 480 | 
             
                # description of the problem: https://github.com/ammar/regexp_parser/issues/3
         | 
| @@ -454,7 +494,7 @@ class Regexp::Parser | |
| 454 494 | 
             
                  new_group.implicit = true
         | 
| 455 495 | 
             
                  new_group << target_node
         | 
| 456 496 | 
             
                  increase_level(target_node)
         | 
| 457 | 
            -
                  node.expressions[ | 
| 497 | 
            +
                  node.expressions[node.expressions.index(target_node)] = new_group
         | 
| 458 498 | 
             
                  target_node = new_group
         | 
| 459 499 | 
             
                end
         | 
| 460 500 |  | 
| @@ -515,100 +555,16 @@ class Regexp::Parser | |
| 515 555 | 
             
                target_node.quantify(:interval, text, min.to_i, max.to_i, mode)
         | 
| 516 556 | 
             
              end
         | 
| 517 557 |  | 
| 518 | 
            -
              def  | 
| 519 | 
            -
                case token.token
         | 
| 520 | 
            -
                when :options, :options_switch
         | 
| 521 | 
            -
                  options_group(token)
         | 
| 522 | 
            -
                when :close
         | 
| 523 | 
            -
                  close_group
         | 
| 524 | 
            -
                when :comment
         | 
| 525 | 
            -
                  node << Group::Comment.new(token, active_opts)
         | 
| 526 | 
            -
                else
         | 
| 527 | 
            -
                  open_group(token)
         | 
| 528 | 
            -
                end
         | 
| 529 | 
            -
              end
         | 
| 530 | 
            -
             | 
| 531 | 
            -
              MOD_FLAGS = %w[i m x].map(&:to_sym)
         | 
| 532 | 
            -
              ENC_FLAGS = %w[a d u].map(&:to_sym)
         | 
| 533 | 
            -
             | 
| 534 | 
            -
              def options_group(token)
         | 
| 535 | 
            -
                positive, negative = token.text.split('-', 2)
         | 
| 536 | 
            -
                negative ||= ''
         | 
| 537 | 
            -
                self.switching_options = token.token.equal?(:options_switch)
         | 
| 538 | 
            -
             | 
| 539 | 
            -
                opt_changes = {}
         | 
| 540 | 
            -
                new_active_opts = active_opts.dup
         | 
| 541 | 
            -
             | 
| 542 | 
            -
                MOD_FLAGS.each do |flag|
         | 
| 543 | 
            -
                  if positive.include?(flag.to_s)
         | 
| 544 | 
            -
                    opt_changes[flag] = new_active_opts[flag] = true
         | 
| 545 | 
            -
                  end
         | 
| 546 | 
            -
                  if negative.include?(flag.to_s)
         | 
| 547 | 
            -
                    opt_changes[flag] = false
         | 
| 548 | 
            -
                    new_active_opts.delete(flag)
         | 
| 549 | 
            -
                  end
         | 
| 550 | 
            -
                end
         | 
| 551 | 
            -
             | 
| 552 | 
            -
                if (enc_flag = positive.reverse[/[adu]/])
         | 
| 553 | 
            -
                  enc_flag = enc_flag.to_sym
         | 
| 554 | 
            -
                  (ENC_FLAGS - [enc_flag]).each do |other|
         | 
| 555 | 
            -
                    opt_changes[other] = false if new_active_opts[other]
         | 
| 556 | 
            -
                    new_active_opts.delete(other)
         | 
| 557 | 
            -
                  end
         | 
| 558 | 
            -
                  opt_changes[enc_flag] = new_active_opts[enc_flag] = true
         | 
| 559 | 
            -
                end
         | 
| 560 | 
            -
             | 
| 561 | 
            -
                options_stack << new_active_opts
         | 
| 562 | 
            -
             | 
| 563 | 
            -
                options_group = Group::Options.new(token, active_opts)
         | 
| 564 | 
            -
                options_group.option_changes = opt_changes
         | 
| 565 | 
            -
             | 
| 566 | 
            -
                nest(options_group)
         | 
| 567 | 
            -
              end
         | 
| 568 | 
            -
             | 
| 569 | 
            -
              def open_group(token)
         | 
| 558 | 
            +
              def set(token)
         | 
| 570 559 | 
             
                case token.token
         | 
| 571 | 
            -
                when : | 
| 572 | 
            -
             | 
| 573 | 
            -
                when : | 
| 574 | 
            -
             | 
| 575 | 
            -
                when : | 
| 576 | 
            -
                  exp = Group::Named.new(token, active_opts)
         | 
| 577 | 
            -
                when :capture
         | 
| 578 | 
            -
                  exp = Group::Capture.new(token, active_opts)
         | 
| 579 | 
            -
                when :absence
         | 
| 580 | 
            -
                  exp = Group::Absence.new(token, active_opts)
         | 
| 581 | 
            -
             | 
| 582 | 
            -
                when :lookahead
         | 
| 583 | 
            -
                  exp = Assertion::Lookahead.new(token, active_opts)
         | 
| 584 | 
            -
                when :nlookahead
         | 
| 585 | 
            -
                  exp = Assertion::NegativeLookahead.new(token, active_opts)
         | 
| 586 | 
            -
                when :lookbehind
         | 
| 587 | 
            -
                  exp = Assertion::Lookbehind.new(token, active_opts)
         | 
| 588 | 
            -
                when :nlookbehind
         | 
| 589 | 
            -
                  exp = Assertion::NegativeLookbehind.new(token, active_opts)
         | 
| 590 | 
            -
             | 
| 560 | 
            +
                when :open;         open_set(token)
         | 
| 561 | 
            +
                when :close;        close_set
         | 
| 562 | 
            +
                when :negate;       negate_set
         | 
| 563 | 
            +
                when :range;        range(token)
         | 
| 564 | 
            +
                when :intersection; intersection(token)
         | 
| 591 565 | 
             
                else
         | 
| 592 | 
            -
                  raise UnknownTokenError.new(' | 
| 593 | 
            -
                end
         | 
| 594 | 
            -
             | 
| 595 | 
            -
                if exp.capturing?
         | 
| 596 | 
            -
                  exp.number          = total_captured_group_count + 1
         | 
| 597 | 
            -
                  exp.number_at_level = captured_group_count_at_level + 1
         | 
| 598 | 
            -
                  count_captured_group
         | 
| 566 | 
            +
                  raise UnknownTokenError.new('CharacterSet', token)
         | 
| 599 567 | 
             
                end
         | 
| 600 | 
            -
             | 
| 601 | 
            -
                # Push the active options to the stack again. This way we can simply pop the
         | 
| 602 | 
            -
                # stack for any group we close, no matter if it had its own options or not.
         | 
| 603 | 
            -
                options_stack << active_opts
         | 
| 604 | 
            -
             | 
| 605 | 
            -
                nest(exp)
         | 
| 606 | 
            -
              end
         | 
| 607 | 
            -
             | 
| 608 | 
            -
              def close_group
         | 
| 609 | 
            -
                options_stack.pop unless switching_options
         | 
| 610 | 
            -
                self.switching_options = false
         | 
| 611 | 
            -
                decrease_nesting
         | 
| 612 568 | 
             
              end
         | 
| 613 569 |  | 
| 614 570 | 
             
              def open_set(token)
         | 
| @@ -631,51 +587,45 @@ class Regexp::Parser | |
| 631 587 | 
             
                nest(exp)
         | 
| 632 588 | 
             
              end
         | 
| 633 589 |  | 
| 634 | 
            -
              def close_completed_character_set_range
         | 
| 635 | 
            -
                decrease_nesting if node.is_a?(CharacterSet::Range) && node.complete?
         | 
| 636 | 
            -
              end
         | 
| 637 | 
            -
             | 
| 638 590 | 
             
              def intersection(token)
         | 
| 639 591 | 
             
                sequence_operation(CharacterSet::Intersection, token)
         | 
| 640 592 | 
             
              end
         | 
| 641 593 |  | 
| 642 | 
            -
              def  | 
| 643 | 
            -
                 | 
| 644 | 
            -
             | 
| 645 | 
            -
             | 
| 646 | 
            -
             | 
| 647 | 
            -
             | 
| 648 | 
            -
             | 
| 594 | 
            +
              def type(token)
         | 
| 595 | 
            +
                case token.token
         | 
| 596 | 
            +
                when :digit;     node << CharacterType::Digit.new(token, active_opts)
         | 
| 597 | 
            +
                when :hex;       node << CharacterType::Hex.new(token, active_opts)
         | 
| 598 | 
            +
                when :linebreak; node << CharacterType::Linebreak.new(token, active_opts)
         | 
| 599 | 
            +
                when :nondigit;  node << CharacterType::NonDigit.new(token, active_opts)
         | 
| 600 | 
            +
                when :nonhex;    node << CharacterType::NonHex.new(token, active_opts)
         | 
| 601 | 
            +
                when :nonspace;  node << CharacterType::NonSpace.new(token, active_opts)
         | 
| 602 | 
            +
                when :nonword;   node << CharacterType::NonWord.new(token, active_opts)
         | 
| 603 | 
            +
                when :space;     node << CharacterType::Space.new(token, active_opts)
         | 
| 604 | 
            +
                when :word;      node << CharacterType::Word.new(token, active_opts)
         | 
| 605 | 
            +
                when :xgrapheme; node << CharacterType::ExtendedGrapheme.new(token, active_opts)
         | 
| 606 | 
            +
                else
         | 
| 607 | 
            +
                  raise UnknownTokenError.new('CharacterType', token)
         | 
| 649 608 | 
             
                end
         | 
| 650 | 
            -
                node.add_sequence(active_opts)
         | 
| 651 | 
            -
              end
         | 
| 652 | 
            -
             | 
| 653 | 
            -
              def active_opts
         | 
| 654 | 
            -
                options_stack.last
         | 
| 655 | 
            -
              end
         | 
| 656 | 
            -
             | 
| 657 | 
            -
              def total_captured_group_count
         | 
| 658 | 
            -
                captured_group_counts.values.reduce(0, :+)
         | 
| 659 | 
            -
              end
         | 
| 660 | 
            -
             | 
| 661 | 
            -
              def captured_group_count_at_level
         | 
| 662 | 
            -
                captured_group_counts[node.level]
         | 
| 663 609 | 
             
              end
         | 
| 664 610 |  | 
| 665 | 
            -
              def  | 
| 666 | 
            -
                 | 
| 611 | 
            +
              def close_completed_character_set_range
         | 
| 612 | 
            +
                decrease_nesting if node.is_a?(CharacterSet::Range) && node.complete?
         | 
| 667 613 | 
             
              end
         | 
| 668 614 |  | 
| 669 | 
            -
              def  | 
| 670 | 
            -
                 | 
| 671 | 
            -
                  exp.number + total_captured_group_count + (exp.number < 0 ? 1 : 0)
         | 
| 615 | 
            +
              def active_opts
         | 
| 616 | 
            +
                options_stack.last
         | 
| 672 617 | 
             
              end
         | 
| 673 618 |  | 
| 619 | 
            +
              # Assigns referenced expressions to refering expressions, e.g. if there is
         | 
| 620 | 
            +
              # an instance of Backreference::Number, its #referenced_expression is set to
         | 
| 621 | 
            +
              # the instance of Group::Capture that it refers to via its number.
         | 
| 674 622 | 
             
              def assign_referenced_expressions
         | 
| 675 623 | 
             
                targets = {}
         | 
| 624 | 
            +
                # find all referencable expressions
         | 
| 676 625 | 
             
                root.each_expression do |exp|
         | 
| 677 626 | 
             
                  exp.is_a?(Group::Capture) && targets[exp.identifier] = exp
         | 
| 678 627 | 
             
                end
         | 
| 628 | 
            +
                # assign them to any refering expressions
         | 
| 679 629 | 
             
                root.each_expression do |exp|
         | 
| 680 630 | 
             
                  exp.respond_to?(:reference) &&
         | 
| 681 631 | 
             
                    exp.referenced_expression = targets[exp.reference]
         |