regexp_parser 2.9.0 → 2.10.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
 - data/lib/regexp_parser/expression/classes/backreference.rb +1 -20
 - data/lib/regexp_parser/expression/classes/conditional.rb +0 -14
 - data/lib/regexp_parser/expression/classes/escape_sequence.rb +18 -90
 - data/lib/regexp_parser/expression/classes/keep.rb +1 -1
 - data/lib/regexp_parser/expression/methods/escape_sequence_char.rb +5 -0
 - data/lib/regexp_parser/expression/methods/escape_sequence_codepoint.rb +68 -0
 - data/lib/regexp_parser/expression/methods/referenced_expressions.rb +28 -0
 - data/lib/regexp_parser/expression.rb +37 -34
 - data/lib/regexp_parser/parser.rb +12 -9
 - data/lib/regexp_parser/scanner/errors/scanner_error.rb +1 -1
 - data/lib/regexp_parser/scanner/scanner.rl +5 -5
 - data/lib/regexp_parser/scanner.rb +728 -771
 - data/lib/regexp_parser/syntax/token.rb +13 -13
 - data/lib/regexp_parser/syntax/versions.rb +1 -1
 - data/lib/regexp_parser/syntax.rb +1 -1
 - data/lib/regexp_parser/version.rb +1 -1
 - data/lib/regexp_parser.rb +6 -6
 - data/regexp_parser.gemspec +2 -0
 - metadata +7 -3
 
    
        checksums.yaml
    CHANGED
    
    | 
         @@ -1,7 +1,7 @@ 
     | 
|
| 
       1 
1 
     | 
    
         
             
            ---
         
     | 
| 
       2 
2 
     | 
    
         
             
            SHA256:
         
     | 
| 
       3 
     | 
    
         
            -
              metadata.gz:  
     | 
| 
       4 
     | 
    
         
            -
              data.tar.gz:  
     | 
| 
      
 3 
     | 
    
         
            +
              metadata.gz: f6ed5457d89738fa1076cf3875cd2d009973f02857ea68e055ef3ef74a78dc91
         
     | 
| 
      
 4 
     | 
    
         
            +
              data.tar.gz: d67eb5f0cb37ad106574b2ae327eefcfc13c9d585cddec6661898f4d8166ebcc
         
     | 
| 
       5 
5 
     | 
    
         
             
            SHA512:
         
     | 
| 
       6 
     | 
    
         
            -
              metadata.gz:  
     | 
| 
       7 
     | 
    
         
            -
              data.tar.gz:  
     | 
| 
      
 6 
     | 
    
         
            +
              metadata.gz: 6b8adbc3c4707fc4c823456ae1d7547f17568802de03008a17fef18a5f95af08b0e42d48ccdfab25a740603a58ab89c036d70cec94405701201e5a5af51ce392
         
     | 
| 
      
 7 
     | 
    
         
            +
              data.tar.gz: 9bea98a42ab64a9b45ddc5564cd077d7eb6d2ddc293844759bb8001aa9fefd8aa26b0e03fff7a286ccde9f7aeacacda9fbb187fe04082749d3c2605e0cece7b9
         
     | 
| 
         @@ -1,25 +1,6 @@ 
     | 
|
| 
       1 
1 
     | 
    
         
             
            module Regexp::Expression
         
     | 
| 
       2 
2 
     | 
    
         
             
              module Backreference
         
     | 
| 
       3 
     | 
    
         
            -
                class Base < Regexp::Expression::Base
         
     | 
| 
       4 
     | 
    
         
            -
                  attr_accessor :referenced_expression
         
     | 
| 
       5 
     | 
    
         
            -
             
     | 
| 
       6 
     | 
    
         
            -
                  def initialize_copy(orig)
         
     | 
| 
       7 
     | 
    
         
            -
                    exp_id = [self.class, self.starts_at]
         
     | 
| 
       8 
     | 
    
         
            -
             
     | 
| 
       9 
     | 
    
         
            -
                    # prevent infinite recursion for recursive subexp calls
         
     | 
| 
       10 
     | 
    
         
            -
                    copied = @@copied ||= {}
         
     | 
| 
       11 
     | 
    
         
            -
                    self.referenced_expression =
         
     | 
| 
       12 
     | 
    
         
            -
                      if copied[exp_id]
         
     | 
| 
       13 
     | 
    
         
            -
                        orig.referenced_expression
         
     | 
| 
       14 
     | 
    
         
            -
                      else
         
     | 
| 
       15 
     | 
    
         
            -
                        copied[exp_id] = true
         
     | 
| 
       16 
     | 
    
         
            -
                        orig.referenced_expression.dup
         
     | 
| 
       17 
     | 
    
         
            -
                      end
         
     | 
| 
       18 
     | 
    
         
            -
                    copied.clear
         
     | 
| 
       19 
     | 
    
         
            -
             
     | 
| 
       20 
     | 
    
         
            -
                    super
         
     | 
| 
       21 
     | 
    
         
            -
                  end
         
     | 
| 
       22 
     | 
    
         
            -
                end
         
     | 
| 
      
 3 
     | 
    
         
            +
                class Base < Regexp::Expression::Base; end
         
     | 
| 
       23 
4 
     | 
    
         | 
| 
       24 
5 
     | 
    
         
             
                class Number < Backreference::Base
         
     | 
| 
       25 
6 
     | 
    
         
             
                  attr_reader :number
         
     | 
| 
         @@ -7,26 +7,17 @@ module Regexp::Expression 
     | 
|
| 
       7 
7 
     | 
    
         
             
                end
         
     | 
| 
       8 
8 
     | 
    
         | 
| 
       9 
9 
     | 
    
         
             
                class Condition < Regexp::Expression::Base
         
     | 
| 
       10 
     | 
    
         
            -
                  attr_accessor :referenced_expression
         
     | 
| 
       11 
     | 
    
         
            -
             
     | 
| 
       12 
10 
     | 
    
         
             
                  # Name or number of the referenced capturing group that determines state.
         
     | 
| 
       13 
11 
     | 
    
         
             
                  # Returns a String if reference is by name, Integer if by number.
         
     | 
| 
       14 
12 
     | 
    
         
             
                  def reference
         
     | 
| 
       15 
13 
     | 
    
         
             
                    ref = text.tr("'<>()", "")
         
     | 
| 
       16 
14 
     | 
    
         
             
                    ref =~ /\D/ ? ref : Integer(ref)
         
     | 
| 
       17 
15 
     | 
    
         
             
                  end
         
     | 
| 
       18 
     | 
    
         
            -
             
     | 
| 
       19 
     | 
    
         
            -
                  def initialize_copy(orig)
         
     | 
| 
       20 
     | 
    
         
            -
                    self.referenced_expression = orig.referenced_expression.dup
         
     | 
| 
       21 
     | 
    
         
            -
                    super
         
     | 
| 
       22 
     | 
    
         
            -
                  end
         
     | 
| 
       23 
16 
     | 
    
         
             
                end
         
     | 
| 
       24 
17 
     | 
    
         | 
| 
       25 
18 
     | 
    
         
             
                class Branch < Regexp::Expression::Sequence; end
         
     | 
| 
       26 
19 
     | 
    
         | 
| 
       27 
20 
     | 
    
         
             
                class Expression < Regexp::Expression::Subexpression
         
     | 
| 
       28 
     | 
    
         
            -
                  attr_accessor :referenced_expression
         
     | 
| 
       29 
     | 
    
         
            -
             
     | 
| 
       30 
21 
     | 
    
         
             
                  def <<(exp)
         
     | 
| 
       31 
22 
     | 
    
         
             
                    expressions.last << exp
         
     | 
| 
       32 
23 
     | 
    
         
             
                  end
         
     | 
| 
         @@ -54,11 +45,6 @@ module Regexp::Expression 
     | 
|
| 
       54 
45 
     | 
    
         
             
                  def reference
         
     | 
| 
       55 
46 
     | 
    
         
             
                    condition.reference
         
     | 
| 
       56 
47 
     | 
    
         
             
                  end
         
     | 
| 
       57 
     | 
    
         
            -
             
     | 
| 
       58 
     | 
    
         
            -
                  def initialize_copy(orig)
         
     | 
| 
       59 
     | 
    
         
            -
                    self.referenced_expression = orig.referenced_expression.dup
         
     | 
| 
       60 
     | 
    
         
            -
                    super
         
     | 
| 
       61 
     | 
    
         
            -
                  end
         
     | 
| 
       62 
48 
     | 
    
         
             
                end
         
     | 
| 
       63 
49 
     | 
    
         
             
              end
         
     | 
| 
       64 
50 
     | 
    
         
             
            end
         
     | 
| 
         @@ -1,100 +1,28 @@ 
     | 
|
| 
       1 
1 
     | 
    
         
             
            module Regexp::Expression
         
     | 
| 
       2 
2 
     | 
    
         
             
              module EscapeSequence
         
     | 
| 
       3 
     | 
    
         
            -
                 
     | 
| 
       4 
     | 
    
         
            -
                  def codepoint
         
     | 
| 
       5 
     | 
    
         
            -
                    char.ord
         
     | 
| 
       6 
     | 
    
         
            -
                  end
         
     | 
| 
      
 3 
     | 
    
         
            +
                Base        = Class.new(Regexp::Expression::Base)
         
     | 
| 
       7 
4 
     | 
    
         | 
| 
       8 
     | 
    
         
            -
             
     | 
| 
       9 
     | 
    
         
            -
             
     | 
| 
       10 
     | 
    
         
            -
             
     | 
| 
       11 
     | 
    
         
            -
             
     | 
| 
       12 
     | 
    
         
            -
             
     | 
| 
       13 
     | 
    
         
            -
             
     | 
| 
       14 
     | 
    
         
            -
             
     | 
| 
       15 
     | 
    
         
            -
             
     | 
| 
       16 
     | 
    
         
            -
                      YAML.load(%Q(---\n"#{text}"\n))
         
     | 
| 
       17 
     | 
    
         
            -
                    end
         
     | 
| 
       18 
     | 
    
         
            -
                  end
         
     | 
| 
       19 
     | 
    
         
            -
                end
         
     | 
| 
      
 5 
     | 
    
         
            +
                AsciiEscape = Class.new(Base) # \e
         
     | 
| 
      
 6 
     | 
    
         
            +
                Backspace   = Class.new(Base) # \b
         
     | 
| 
      
 7 
     | 
    
         
            +
                Bell        = Class.new(Base) # \a
         
     | 
| 
      
 8 
     | 
    
         
            +
                FormFeed    = Class.new(Base) # \f
         
     | 
| 
      
 9 
     | 
    
         
            +
                Newline     = Class.new(Base) # \n
         
     | 
| 
      
 10 
     | 
    
         
            +
                Return      = Class.new(Base) # \r
         
     | 
| 
      
 11 
     | 
    
         
            +
                Tab         = Class.new(Base) # \t
         
     | 
| 
      
 12 
     | 
    
         
            +
                VerticalTab = Class.new(Base) # \v
         
     | 
| 
       20 
13 
     | 
    
         | 
| 
       21 
     | 
    
         
            -
                 
     | 
| 
       22 
     | 
    
         
            -
                  def char
         
     | 
| 
       23 
     | 
    
         
            -
                    text[1..-1]
         
     | 
| 
       24 
     | 
    
         
            -
                  end
         
     | 
| 
       25 
     | 
    
         
            -
                end
         
     | 
| 
      
 14 
     | 
    
         
            +
                Literal     = Class.new(Base) # e.g. \j, \@, \😀 (ineffectual escapes)
         
     | 
| 
       26 
15 
     | 
    
         | 
| 
       27 
     | 
    
         
            -
                 
     | 
| 
       28 
     | 
    
         
            -
                 
     | 
| 
       29 
     | 
    
         
            -
                 
     | 
| 
       30 
     | 
    
         
            -
                class FormFeed      < EscapeSequence::Base; end
         
     | 
| 
       31 
     | 
    
         
            -
                class Newline       < EscapeSequence::Base; end
         
     | 
| 
       32 
     | 
    
         
            -
                class Return        < EscapeSequence::Base; end
         
     | 
| 
       33 
     | 
    
         
            -
                class Tab           < EscapeSequence::Base; end
         
     | 
| 
       34 
     | 
    
         
            -
                class VerticalTab   < EscapeSequence::Base; end
         
     | 
| 
      
 16 
     | 
    
         
            +
                Octal       = Class.new(Base) # e.g. \012
         
     | 
| 
      
 17 
     | 
    
         
            +
                Hex         = Class.new(Base) # e.g. \x0A
         
     | 
| 
      
 18 
     | 
    
         
            +
                Codepoint   = Class.new(Base) # e.g. \u000A
         
     | 
| 
       35 
19 
     | 
    
         | 
| 
       36 
     | 
    
         
            -
                 
     | 
| 
       37 
     | 
    
         
            -
                class Codepoint     < EscapeSequence::Base; end
         
     | 
| 
      
 20 
     | 
    
         
            +
                CodepointList = Class.new(Base) # e.g. \u{A B}
         
     | 
| 
       38 
21 
     | 
    
         | 
| 
       39 
     | 
    
         
            -
                 
     | 
| 
       40 
     | 
    
         
            -
             
     | 
| 
       41 
     | 
    
         
            -
             
     | 
| 
       42 
     | 
    
         
            -
             
     | 
| 
       43 
     | 
    
         
            -
             
     | 
| 
       44 
     | 
    
         
            -
                  def codepoint
         
     | 
| 
       45 
     | 
    
         
            -
                    raise NoMethodError, 'CodepointList responds only to #codepoints'
         
     | 
| 
       46 
     | 
    
         
            -
                  end
         
     | 
| 
       47 
     | 
    
         
            -
             
     | 
| 
       48 
     | 
    
         
            -
                  def chars
         
     | 
| 
       49 
     | 
    
         
            -
                    codepoints.map { |cp| cp.chr('utf-8') }
         
     | 
| 
       50 
     | 
    
         
            -
                  end
         
     | 
| 
       51 
     | 
    
         
            -
             
     | 
| 
       52 
     | 
    
         
            -
                  def codepoints
         
     | 
| 
       53 
     | 
    
         
            -
                    text.scan(/\h+/).map(&:hex)
         
     | 
| 
       54 
     | 
    
         
            -
                  end
         
     | 
| 
       55 
     | 
    
         
            -
                end
         
     | 
| 
       56 
     | 
    
         
            -
             
     | 
| 
       57 
     | 
    
         
            -
                class Octal < EscapeSequence::Base
         
     | 
| 
       58 
     | 
    
         
            -
                  def char
         
     | 
| 
       59 
     | 
    
         
            -
                    text[1..-1].to_i(8).chr('utf-8')
         
     | 
| 
       60 
     | 
    
         
            -
                  end
         
     | 
| 
       61 
     | 
    
         
            -
                end
         
     | 
| 
       62 
     | 
    
         
            -
             
     | 
| 
       63 
     | 
    
         
            -
                class AbstractMetaControlSequence < EscapeSequence::Base
         
     | 
| 
       64 
     | 
    
         
            -
                  def char
         
     | 
| 
       65 
     | 
    
         
            -
                    codepoint.chr('utf-8')
         
     | 
| 
       66 
     | 
    
         
            -
                  end
         
     | 
| 
       67 
     | 
    
         
            -
             
     | 
| 
       68 
     | 
    
         
            -
                  private
         
     | 
| 
       69 
     | 
    
         
            -
             
     | 
| 
       70 
     | 
    
         
            -
                  def control_sequence_to_s(control_sequence)
         
     | 
| 
       71 
     | 
    
         
            -
                    five_lsb = control_sequence.unpack('B*').first[-5..-1]
         
     | 
| 
       72 
     | 
    
         
            -
                    ["000#{five_lsb}"].pack('B*')
         
     | 
| 
       73 
     | 
    
         
            -
                  end
         
     | 
| 
       74 
     | 
    
         
            -
             
     | 
| 
       75 
     | 
    
         
            -
                  def meta_char_to_codepoint(meta_char)
         
     | 
| 
       76 
     | 
    
         
            -
                    byte_value = meta_char.ord
         
     | 
| 
       77 
     | 
    
         
            -
                    byte_value < 128 ? byte_value + 128 : byte_value
         
     | 
| 
       78 
     | 
    
         
            -
                  end
         
     | 
| 
       79 
     | 
    
         
            -
                end
         
     | 
| 
       80 
     | 
    
         
            -
             
     | 
| 
       81 
     | 
    
         
            -
                class Control < AbstractMetaControlSequence
         
     | 
| 
       82 
     | 
    
         
            -
                  def codepoint
         
     | 
| 
       83 
     | 
    
         
            -
                    control_sequence_to_s(text).ord
         
     | 
| 
       84 
     | 
    
         
            -
                  end
         
     | 
| 
       85 
     | 
    
         
            -
                end
         
     | 
| 
       86 
     | 
    
         
            -
             
     | 
| 
       87 
     | 
    
         
            -
                class Meta < AbstractMetaControlSequence
         
     | 
| 
       88 
     | 
    
         
            -
                  def codepoint
         
     | 
| 
       89 
     | 
    
         
            -
                    meta_char_to_codepoint(text[-1])
         
     | 
| 
       90 
     | 
    
         
            -
                  end
         
     | 
| 
       91 
     | 
    
         
            -
                end
         
     | 
| 
       92 
     | 
    
         
            -
             
     | 
| 
       93 
     | 
    
         
            -
                class MetaControl < AbstractMetaControlSequence
         
     | 
| 
       94 
     | 
    
         
            -
                  def codepoint
         
     | 
| 
       95 
     | 
    
         
            -
                    meta_char_to_codepoint(control_sequence_to_s(text))
         
     | 
| 
       96 
     | 
    
         
            -
                  end
         
     | 
| 
       97 
     | 
    
         
            -
                end
         
     | 
| 
      
 22 
     | 
    
         
            +
                AbstractMetaControlSequence = Class.new(Base)
         
     | 
| 
      
 23 
     | 
    
         
            +
                Control                     = Class.new(AbstractMetaControlSequence) # e.g. \cB
         
     | 
| 
      
 24 
     | 
    
         
            +
                Meta                        = Class.new(AbstractMetaControlSequence) # e.g. \M-Z
         
     | 
| 
      
 25 
     | 
    
         
            +
                MetaControl                 = Class.new(AbstractMetaControlSequence) # e.g. \M-\cX
         
     | 
| 
       98 
26 
     | 
    
         
             
              end
         
     | 
| 
       99 
27 
     | 
    
         | 
| 
       100 
28 
     | 
    
         
             
              # alias for symmetry between Token::* and Expression::*
         
     | 
| 
         @@ -1,6 +1,6 @@ 
     | 
|
| 
       1 
1 
     | 
    
         
             
            module Regexp::Expression
         
     | 
| 
       2 
2 
     | 
    
         
             
              module Keep
         
     | 
| 
       3 
     | 
    
         
            -
                #  
     | 
| 
      
 3 
     | 
    
         
            +
                # TODO: in regexp_parser v3.0.0 this should possibly be a Subexpression
         
     | 
| 
       4 
4 
     | 
    
         
             
                #       that contains all expressions to its left.
         
     | 
| 
       5 
5 
     | 
    
         
             
                class Mark < Regexp::Expression::Base; end
         
     | 
| 
       6 
6 
     | 
    
         
             
              end
         
     | 
| 
         @@ -0,0 +1,68 @@ 
     | 
|
| 
      
 1 
     | 
    
         
            +
            module Regexp::Expression::EscapeSequence
         
     | 
| 
      
 2 
     | 
    
         
            +
              AsciiEscape.class_eval { def codepoint; 0x1B end }
         
     | 
| 
      
 3 
     | 
    
         
            +
              Backspace.class_eval   { def codepoint; 0x8  end }
         
     | 
| 
      
 4 
     | 
    
         
            +
              Bell.class_eval        { def codepoint; 0x7  end }
         
     | 
| 
      
 5 
     | 
    
         
            +
              FormFeed.class_eval    { def codepoint; 0xC  end }
         
     | 
| 
      
 6 
     | 
    
         
            +
              Newline.class_eval     { def codepoint; 0xA  end }
         
     | 
| 
      
 7 
     | 
    
         
            +
              Return.class_eval      { def codepoint; 0xD  end }
         
     | 
| 
      
 8 
     | 
    
         
            +
              Tab.class_eval         { def codepoint; 0x9  end }
         
     | 
| 
      
 9 
     | 
    
         
            +
              VerticalTab.class_eval { def codepoint; 0xB  end }
         
     | 
| 
      
 10 
     | 
    
         
            +
             
     | 
| 
      
 11 
     | 
    
         
            +
              Literal.class_eval     { def codepoint; text[1].ord end }
         
     | 
| 
      
 12 
     | 
    
         
            +
             
     | 
| 
      
 13 
     | 
    
         
            +
              Octal.class_eval       { def codepoint; text[/\d+/].to_i(8) end }
         
     | 
| 
      
 14 
     | 
    
         
            +
             
     | 
| 
      
 15 
     | 
    
         
            +
              Hex.class_eval         { def codepoint; text[/\h+/].hex end }
         
     | 
| 
      
 16 
     | 
    
         
            +
              Codepoint.class_eval   { def codepoint; text[/\h+/].hex end }
         
     | 
| 
      
 17 
     | 
    
         
            +
             
     | 
| 
      
 18 
     | 
    
         
            +
              CodepointList.class_eval do
         
     | 
| 
      
 19 
     | 
    
         
            +
                # Maybe this should be a unique top-level expression class?
         
     | 
| 
      
 20 
     | 
    
         
            +
                def char
         
     | 
| 
      
 21 
     | 
    
         
            +
                  raise NoMethodError, 'CodepointList responds only to #chars'
         
     | 
| 
      
 22 
     | 
    
         
            +
                end
         
     | 
| 
      
 23 
     | 
    
         
            +
             
     | 
| 
      
 24 
     | 
    
         
            +
                def codepoint
         
     | 
| 
      
 25 
     | 
    
         
            +
                  raise NoMethodError, 'CodepointList responds only to #codepoints'
         
     | 
| 
      
 26 
     | 
    
         
            +
                end
         
     | 
| 
      
 27 
     | 
    
         
            +
             
     | 
| 
      
 28 
     | 
    
         
            +
                def chars
         
     | 
| 
      
 29 
     | 
    
         
            +
                  codepoints.map { |cp| cp.chr('utf-8') }
         
     | 
| 
      
 30 
     | 
    
         
            +
                end
         
     | 
| 
      
 31 
     | 
    
         
            +
             
     | 
| 
      
 32 
     | 
    
         
            +
                def codepoints
         
     | 
| 
      
 33 
     | 
    
         
            +
                  text.scan(/\h+/).map(&:hex)
         
     | 
| 
      
 34 
     | 
    
         
            +
                end
         
     | 
| 
      
 35 
     | 
    
         
            +
              end
         
     | 
| 
      
 36 
     | 
    
         
            +
             
     | 
| 
      
 37 
     | 
    
         
            +
              AbstractMetaControlSequence.class_eval do
         
     | 
| 
      
 38 
     | 
    
         
            +
                private
         
     | 
| 
      
 39 
     | 
    
         
            +
             
     | 
| 
      
 40 
     | 
    
         
            +
                def control_sequence_to_s(control_sequence)
         
     | 
| 
      
 41 
     | 
    
         
            +
                  five_lsb = control_sequence.unpack('B*').first[-5..-1]
         
     | 
| 
      
 42 
     | 
    
         
            +
                  ["000#{five_lsb}"].pack('B*')
         
     | 
| 
      
 43 
     | 
    
         
            +
                end
         
     | 
| 
      
 44 
     | 
    
         
            +
             
     | 
| 
      
 45 
     | 
    
         
            +
                def meta_char_to_codepoint(meta_char)
         
     | 
| 
      
 46 
     | 
    
         
            +
                  byte_value = meta_char.ord
         
     | 
| 
      
 47 
     | 
    
         
            +
                  byte_value < 128 ? byte_value + 128 : byte_value
         
     | 
| 
      
 48 
     | 
    
         
            +
                end
         
     | 
| 
      
 49 
     | 
    
         
            +
              end
         
     | 
| 
      
 50 
     | 
    
         
            +
             
     | 
| 
      
 51 
     | 
    
         
            +
              Control.class_eval do
         
     | 
| 
      
 52 
     | 
    
         
            +
                def codepoint
         
     | 
| 
      
 53 
     | 
    
         
            +
                  control_sequence_to_s(text).ord
         
     | 
| 
      
 54 
     | 
    
         
            +
                end
         
     | 
| 
      
 55 
     | 
    
         
            +
              end
         
     | 
| 
      
 56 
     | 
    
         
            +
             
     | 
| 
      
 57 
     | 
    
         
            +
              Meta.class_eval do
         
     | 
| 
      
 58 
     | 
    
         
            +
                def codepoint
         
     | 
| 
      
 59 
     | 
    
         
            +
                  meta_char_to_codepoint(text[-1])
         
     | 
| 
      
 60 
     | 
    
         
            +
                end
         
     | 
| 
      
 61 
     | 
    
         
            +
              end
         
     | 
| 
      
 62 
     | 
    
         
            +
             
     | 
| 
      
 63 
     | 
    
         
            +
              MetaControl.class_eval do
         
     | 
| 
      
 64 
     | 
    
         
            +
                def codepoint
         
     | 
| 
      
 65 
     | 
    
         
            +
                  meta_char_to_codepoint(control_sequence_to_s(text))
         
     | 
| 
      
 66 
     | 
    
         
            +
                end
         
     | 
| 
      
 67 
     | 
    
         
            +
              end
         
     | 
| 
      
 68 
     | 
    
         
            +
            end
         
     | 
| 
         @@ -0,0 +1,28 @@ 
     | 
|
| 
      
 1 
     | 
    
         
            +
            module Regexp::Expression
         
     | 
| 
      
 2 
     | 
    
         
            +
              module ReferencedExpressions
         
     | 
| 
      
 3 
     | 
    
         
            +
                attr_accessor :referenced_expressions
         
     | 
| 
      
 4 
     | 
    
         
            +
             
     | 
| 
      
 5 
     | 
    
         
            +
                def referenced_expression
         
     | 
| 
      
 6 
     | 
    
         
            +
                  referenced_expressions && referenced_expressions.first
         
     | 
| 
      
 7 
     | 
    
         
            +
                end
         
     | 
| 
      
 8 
     | 
    
         
            +
             
     | 
| 
      
 9 
     | 
    
         
            +
                def initialize_copy(orig)
         
     | 
| 
      
 10 
     | 
    
         
            +
                  exp_id = [self.class, self.starts_at]
         
     | 
| 
      
 11 
     | 
    
         
            +
             
     | 
| 
      
 12 
     | 
    
         
            +
                  # prevent infinite recursion for recursive subexp calls
         
     | 
| 
      
 13 
     | 
    
         
            +
                  copied = self.class.instance_eval { @copied_ref_exps ||= {} }
         
     | 
| 
      
 14 
     | 
    
         
            +
                  self.referenced_expressions =
         
     | 
| 
      
 15 
     | 
    
         
            +
                    if copied[exp_id]
         
     | 
| 
      
 16 
     | 
    
         
            +
                      orig.referenced_expressions
         
     | 
| 
      
 17 
     | 
    
         
            +
                    else
         
     | 
| 
      
 18 
     | 
    
         
            +
                      copied[exp_id] = true
         
     | 
| 
      
 19 
     | 
    
         
            +
                      orig.referenced_expressions && orig.referenced_expressions.map(&:dup)
         
     | 
| 
      
 20 
     | 
    
         
            +
                    end
         
     | 
| 
      
 21 
     | 
    
         
            +
                  copied.clear
         
     | 
| 
      
 22 
     | 
    
         
            +
             
     | 
| 
      
 23 
     | 
    
         
            +
                  super
         
     | 
| 
      
 24 
     | 
    
         
            +
                end
         
     | 
| 
      
 25 
     | 
    
         
            +
              end
         
     | 
| 
      
 26 
     | 
    
         
            +
             
     | 
| 
      
 27 
     | 
    
         
            +
              Base.include ReferencedExpressions
         
     | 
| 
      
 28 
     | 
    
         
            +
            end
         
     | 
| 
         @@ -1,37 +1,40 @@ 
     | 
|
| 
       1 
     | 
    
         
            -
             
     | 
| 
      
 1 
     | 
    
         
            +
            require_relative 'error'
         
     | 
| 
       2 
2 
     | 
    
         | 
| 
       3 
     | 
    
         
            -
             
     | 
| 
       4 
     | 
    
         
            -
             
     | 
| 
       5 
     | 
    
         
            -
             
     | 
| 
       6 
     | 
    
         
            -
             
     | 
| 
       7 
     | 
    
         
            -
             
     | 
| 
       8 
     | 
    
         
            -
             
     | 
| 
      
 3 
     | 
    
         
            +
            require_relative 'expression/shared'
         
     | 
| 
      
 4 
     | 
    
         
            +
            require_relative 'expression/base'
         
     | 
| 
      
 5 
     | 
    
         
            +
            require_relative 'expression/quantifier'
         
     | 
| 
      
 6 
     | 
    
         
            +
            require_relative 'expression/subexpression'
         
     | 
| 
      
 7 
     | 
    
         
            +
            require_relative 'expression/sequence'
         
     | 
| 
      
 8 
     | 
    
         
            +
            require_relative 'expression/sequence_operation'
         
     | 
| 
       9 
9 
     | 
    
         | 
| 
       10 
     | 
    
         
            -
             
     | 
| 
       11 
     | 
    
         
            -
             
     | 
| 
       12 
     | 
    
         
            -
             
     | 
| 
       13 
     | 
    
         
            -
             
     | 
| 
       14 
     | 
    
         
            -
             
     | 
| 
       15 
     | 
    
         
            -
             
     | 
| 
       16 
     | 
    
         
            -
             
     | 
| 
       17 
     | 
    
         
            -
             
     | 
| 
       18 
     | 
    
         
            -
             
     | 
| 
       19 
     | 
    
         
            -
             
     | 
| 
       20 
     | 
    
         
            -
             
     | 
| 
       21 
     | 
    
         
            -
             
     | 
| 
       22 
     | 
    
         
            -
             
     | 
| 
       23 
     | 
    
         
            -
             
     | 
| 
       24 
     | 
    
         
            -
             
     | 
| 
       25 
     | 
    
         
            -
             
     | 
| 
      
 10 
     | 
    
         
            +
            require_relative 'expression/classes/alternation'
         
     | 
| 
      
 11 
     | 
    
         
            +
            require_relative 'expression/classes/anchor'
         
     | 
| 
      
 12 
     | 
    
         
            +
            require_relative 'expression/classes/backreference'
         
     | 
| 
      
 13 
     | 
    
         
            +
            require_relative 'expression/classes/character_set'
         
     | 
| 
      
 14 
     | 
    
         
            +
            require_relative 'expression/classes/character_set/intersection'
         
     | 
| 
      
 15 
     | 
    
         
            +
            require_relative 'expression/classes/character_set/range'
         
     | 
| 
      
 16 
     | 
    
         
            +
            require_relative 'expression/classes/character_type'
         
     | 
| 
      
 17 
     | 
    
         
            +
            require_relative 'expression/classes/conditional'
         
     | 
| 
      
 18 
     | 
    
         
            +
            require_relative 'expression/classes/escape_sequence'
         
     | 
| 
      
 19 
     | 
    
         
            +
            require_relative 'expression/classes/free_space'
         
     | 
| 
      
 20 
     | 
    
         
            +
            require_relative 'expression/classes/group'
         
     | 
| 
      
 21 
     | 
    
         
            +
            require_relative 'expression/classes/keep'
         
     | 
| 
      
 22 
     | 
    
         
            +
            require_relative 'expression/classes/literal'
         
     | 
| 
      
 23 
     | 
    
         
            +
            require_relative 'expression/classes/posix_class'
         
     | 
| 
      
 24 
     | 
    
         
            +
            require_relative 'expression/classes/root'
         
     | 
| 
      
 25 
     | 
    
         
            +
            require_relative 'expression/classes/unicode_property'
         
     | 
| 
       26 
26 
     | 
    
         | 
| 
       27 
     | 
    
         
            -
             
     | 
| 
       28 
     | 
    
         
            -
             
     | 
| 
       29 
     | 
    
         
            -
             
     | 
| 
       30 
     | 
    
         
            -
             
     | 
| 
       31 
     | 
    
         
            -
             
     | 
| 
       32 
     | 
    
         
            -
             
     | 
| 
       33 
     | 
    
         
            -
             
     | 
| 
       34 
     | 
    
         
            -
             
     | 
| 
       35 
     | 
    
         
            -
             
     | 
| 
       36 
     | 
    
         
            -
             
     | 
| 
       37 
     | 
    
         
            -
             
     | 
| 
      
 27 
     | 
    
         
            +
            require_relative 'expression/methods/construct'
         
     | 
| 
      
 28 
     | 
    
         
            +
            require_relative 'expression/methods/escape_sequence_char'
         
     | 
| 
      
 29 
     | 
    
         
            +
            require_relative 'expression/methods/escape_sequence_codepoint'
         
     | 
| 
      
 30 
     | 
    
         
            +
            require_relative 'expression/methods/human_name'
         
     | 
| 
      
 31 
     | 
    
         
            +
            require_relative 'expression/methods/match'
         
     | 
| 
      
 32 
     | 
    
         
            +
            require_relative 'expression/methods/match_length'
         
     | 
| 
      
 33 
     | 
    
         
            +
            require_relative 'expression/methods/negative'
         
     | 
| 
      
 34 
     | 
    
         
            +
            require_relative 'expression/methods/options'
         
     | 
| 
      
 35 
     | 
    
         
            +
            require_relative 'expression/methods/parts'
         
     | 
| 
      
 36 
     | 
    
         
            +
            require_relative 'expression/methods/printing'
         
     | 
| 
      
 37 
     | 
    
         
            +
            require_relative 'expression/methods/referenced_expressions'
         
     | 
| 
      
 38 
     | 
    
         
            +
            require_relative 'expression/methods/strfregexp'
         
     | 
| 
      
 39 
     | 
    
         
            +
            require_relative 'expression/methods/tests'
         
     | 
| 
      
 40 
     | 
    
         
            +
            require_relative 'expression/methods/traverse'
         
     | 
    
        data/lib/regexp_parser/parser.rb
    CHANGED
    
    | 
         @@ -1,5 +1,5 @@ 
     | 
|
| 
       1 
     | 
    
         
            -
             
     | 
| 
       2 
     | 
    
         
            -
             
     | 
| 
      
 1 
     | 
    
         
            +
            require_relative 'error'
         
     | 
| 
      
 2 
     | 
    
         
            +
            require_relative 'expression'
         
     | 
| 
       3 
3 
     | 
    
         | 
| 
       4 
4 
     | 
    
         
             
            class Regexp::Parser
         
     | 
| 
       5 
5 
     | 
    
         
             
              include Regexp::Expression
         
     | 
| 
         @@ -575,21 +575,24 @@ class Regexp::Parser 
     | 
|
| 
       575 
575 
     | 
    
         
             
                options_stack.last
         
     | 
| 
       576 
576 
     | 
    
         
             
              end
         
     | 
| 
       577 
577 
     | 
    
         | 
| 
       578 
     | 
    
         
            -
              # Assigns referenced expressions to  
     | 
| 
      
 578 
     | 
    
         
            +
              # Assigns referenced expressions to referring expressions, e.g. if there is
         
     | 
| 
       579 
579 
     | 
    
         
             
              # an instance of Backreference::Number, its #referenced_expression is set to
         
     | 
| 
       580 
580 
     | 
    
         
             
              # the instance of Group::Capture that it refers to via its number.
         
     | 
| 
       581 
581 
     | 
    
         
             
              def assign_referenced_expressions
         
     | 
| 
       582 
     | 
    
         
            -
                # find all  
     | 
| 
       583 
     | 
    
         
            -
                targets = { 0 => root }
         
     | 
| 
      
 582 
     | 
    
         
            +
                # find all referenceable and referring expressions
         
     | 
| 
      
 583 
     | 
    
         
            +
                targets = { 0 => [root] }
         
     | 
| 
       584 
584 
     | 
    
         
             
                referrers = []
         
     | 
| 
       585 
585 
     | 
    
         
             
                root.each_expression do |exp|
         
     | 
| 
       586 
     | 
    
         
            -
                  exp. 
     | 
| 
       587 
     | 
    
         
            -
             
     | 
| 
      
 586 
     | 
    
         
            +
                  if exp.referential?
         
     | 
| 
      
 587 
     | 
    
         
            +
                    referrers << exp
         
     | 
| 
      
 588 
     | 
    
         
            +
                  elsif exp.is_a?(Group::Capture)
         
     | 
| 
      
 589 
     | 
    
         
            +
                    (targets[exp.identifier] ||= []) << exp
         
     | 
| 
      
 590 
     | 
    
         
            +
                  end
         
     | 
| 
       588 
591 
     | 
    
         
             
                end
         
     | 
| 
       589 
     | 
    
         
            -
                # assign  
     | 
| 
      
 592 
     | 
    
         
            +
                # assign referenced expressions to referring expressions
         
     | 
| 
       590 
593 
     | 
    
         
             
                # (in a second iteration because there might be forward references)
         
     | 
| 
       591 
594 
     | 
    
         
             
                referrers.each do |exp|
         
     | 
| 
       592 
     | 
    
         
            -
                  exp. 
     | 
| 
      
 595 
     | 
    
         
            +
                  exp.referenced_expressions = targets[exp.reference] ||
         
     | 
| 
       593 
596 
     | 
    
         
             
                    raise(ParserError, "Invalid reference #{exp.reference} at pos #{exp.ts}")
         
     | 
| 
       594 
597 
     | 
    
         
             
                end
         
     | 
| 
       595 
598 
     | 
    
         
             
              end
         
     | 
| 
         @@ -78,8 +78,8 @@ 
     | 
|
| 
       78 
78 
     | 
    
         
             
              # try to treat every other group head as options group, like Ruby
         
     | 
| 
       79 
79 
     | 
    
         
             
              group_options         = '?' . ( [^!#'():<=>~]+ . ':'? ) ?;
         
     | 
| 
       80 
80 
     | 
    
         | 
| 
       81 
     | 
    
         
            -
              group_name_id_ab      = ([ 
     | 
| 
       82 
     | 
    
         
            -
              group_name_id_sq      = ([^0-9\-'] 
     | 
| 
      
 81 
     | 
    
         
            +
              group_name_id_ab      = ([^!=0-9\->] | utf8_multibyte) . ([^>] | utf8_multibyte)*;
         
     | 
| 
      
 82 
     | 
    
         
            +
              group_name_id_sq      = ([^0-9\-']   | utf8_multibyte) . ([^'] | utf8_multibyte)*;
         
     | 
| 
       83 
83 
     | 
    
         
             
              group_number          = '-'? . [0-9]+;
         
     | 
| 
       84 
84 
     | 
    
         
             
              group_level           = [+\-] . [0-9]+;
         
     | 
| 
       85 
85 
     | 
    
         | 
| 
         @@ -640,9 +640,9 @@ 
     | 
|
| 
       640 
640 
     | 
    
         
             
              *|;
         
     | 
| 
       641 
641 
     | 
    
         
             
            }%%
         
     | 
| 
       642 
642 
     | 
    
         | 
| 
       643 
     | 
    
         
            -
             
     | 
| 
       644 
     | 
    
         
            -
             
     | 
| 
       645 
     | 
    
         
            -
             
     | 
| 
      
 643 
     | 
    
         
            +
            require_relative 'scanner/errors/scanner_error'
         
     | 
| 
      
 644 
     | 
    
         
            +
            require_relative 'scanner/errors/premature_end_error'
         
     | 
| 
      
 645 
     | 
    
         
            +
            require_relative 'scanner/errors/validation_error'
         
     | 
| 
       646 
646 
     | 
    
         | 
| 
       647 
647 
     | 
    
         
             
            class Regexp::Scanner
         
     | 
| 
       648 
648 
     | 
    
         
             
              # Scans the given regular expression text, or Regexp object and collects the
         
     |