regexp_parser 2.8.1 → 2.11.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
 - data/Gemfile +6 -4
 - data/LICENSE +1 -1
 - data/Rakefile +5 -3
 - data/lib/regexp_parser/error.rb +2 -0
 - data/lib/regexp_parser/expression/base.rb +2 -0
 - data/lib/regexp_parser/expression/classes/alternation.rb +2 -0
 - data/lib/regexp_parser/expression/classes/anchor.rb +2 -0
 - data/lib/regexp_parser/expression/classes/backreference.rb +3 -20
 - data/lib/regexp_parser/expression/classes/character_set/intersection.rb +2 -0
 - data/lib/regexp_parser/expression/classes/character_set/range.rb +2 -0
 - data/lib/regexp_parser/expression/classes/character_set.rb +3 -4
 - data/lib/regexp_parser/expression/classes/character_type.rb +2 -0
 - data/lib/regexp_parser/expression/classes/conditional.rb +2 -14
 - data/lib/regexp_parser/expression/classes/escape_sequence.rb +26 -95
 - data/lib/regexp_parser/expression/classes/free_space.rb +2 -0
 - data/lib/regexp_parser/expression/classes/group.rb +2 -0
 - data/lib/regexp_parser/expression/classes/keep.rb +3 -1
 - data/lib/regexp_parser/expression/classes/literal.rb +2 -0
 - data/lib/regexp_parser/expression/classes/posix_class.rb +2 -4
 - data/lib/regexp_parser/expression/classes/root.rb +2 -0
 - data/lib/regexp_parser/expression/classes/unicode_property.rb +8 -9
 - data/lib/regexp_parser/expression/methods/construct.rb +2 -0
 - data/lib/regexp_parser/expression/methods/escape_sequence_char.rb +7 -0
 - data/lib/regexp_parser/expression/methods/escape_sequence_codepoint.rb +76 -0
 - data/lib/regexp_parser/expression/methods/human_name.rb +2 -0
 - data/lib/regexp_parser/expression/methods/match.rb +2 -0
 - data/lib/regexp_parser/expression/methods/match_length.rb +2 -0
 - data/lib/regexp_parser/expression/methods/negative.rb +22 -0
 - data/lib/regexp_parser/expression/methods/options.rb +2 -0
 - data/lib/regexp_parser/expression/methods/parts.rb +2 -0
 - data/lib/regexp_parser/expression/methods/printing.rb +2 -0
 - data/lib/regexp_parser/expression/methods/referenced_expressions.rb +30 -0
 - data/lib/regexp_parser/expression/methods/strfregexp.rb +2 -0
 - data/lib/regexp_parser/expression/methods/tests.rb +2 -0
 - data/lib/regexp_parser/expression/methods/traverse.rb +2 -0
 - data/lib/regexp_parser/expression/quantifier.rb +3 -1
 - data/lib/regexp_parser/expression/sequence.rb +2 -0
 - data/lib/regexp_parser/expression/sequence_operation.rb +2 -0
 - data/lib/regexp_parser/expression/shared.rb +6 -3
 - data/lib/regexp_parser/expression/subexpression.rb +2 -0
 - data/lib/regexp_parser/expression.rb +39 -33
 - data/lib/regexp_parser/lexer.rb +2 -0
 - data/lib/regexp_parser/parser.rb +16 -9
 - data/lib/regexp_parser/scanner/errors/premature_end_error.rb +2 -0
 - data/lib/regexp_parser/scanner/errors/scanner_error.rb +3 -1
 - data/lib/regexp_parser/scanner/errors/validation_error.rb +2 -0
 - data/lib/regexp_parser/scanner/properties/long.csv +37 -0
 - data/lib/regexp_parser/scanner/properties/short.csv +9 -0
 - data/lib/regexp_parser/scanner/scanner.rl +62 -18
 - data/lib/regexp_parser/scanner.rb +1041 -936
 - data/lib/regexp_parser/syntax/any.rb +2 -0
 - data/lib/regexp_parser/syntax/base.rb +2 -0
 - data/lib/regexp_parser/syntax/token/anchor.rb +5 -3
 - data/lib/regexp_parser/syntax/token/assertion.rb +4 -2
 - data/lib/regexp_parser/syntax/token/backreference.rb +8 -6
 - data/lib/regexp_parser/syntax/token/character_set.rb +3 -1
 - data/lib/regexp_parser/syntax/token/character_type.rb +6 -4
 - data/lib/regexp_parser/syntax/token/conditional.rb +5 -3
 - data/lib/regexp_parser/syntax/token/escape.rb +9 -7
 - data/lib/regexp_parser/syntax/token/group.rb +8 -6
 - data/lib/regexp_parser/syntax/token/keep.rb +3 -1
 - data/lib/regexp_parser/syntax/token/meta.rb +4 -2
 - data/lib/regexp_parser/syntax/token/posix_class.rb +4 -2
 - data/lib/regexp_parser/syntax/token/quantifier.rb +8 -6
 - data/lib/regexp_parser/syntax/token/unicode_property.rb +79 -46
 - data/lib/regexp_parser/syntax/token/virtual.rb +5 -3
 - data/lib/regexp_parser/syntax/token.rb +18 -16
 - data/lib/regexp_parser/syntax/version_lookup.rb +4 -2
 - data/lib/regexp_parser/syntax/versions/1.8.6.rb +2 -0
 - data/lib/regexp_parser/syntax/versions/1.9.1.rb +2 -0
 - data/lib/regexp_parser/syntax/versions/1.9.3.rb +2 -0
 - data/lib/regexp_parser/syntax/versions/2.0.0.rb +2 -0
 - data/lib/regexp_parser/syntax/versions/2.2.0.rb +2 -0
 - data/lib/regexp_parser/syntax/versions/2.3.0.rb +2 -0
 - data/lib/regexp_parser/syntax/versions/2.4.0.rb +2 -0
 - data/lib/regexp_parser/syntax/versions/2.4.1.rb +2 -0
 - data/lib/regexp_parser/syntax/versions/2.5.0.rb +2 -0
 - data/lib/regexp_parser/syntax/versions/2.6.0.rb +2 -0
 - data/lib/regexp_parser/syntax/versions/2.6.2.rb +2 -0
 - data/lib/regexp_parser/syntax/versions/2.6.3.rb +2 -0
 - data/lib/regexp_parser/syntax/versions/3.1.0.rb +2 -0
 - data/lib/regexp_parser/syntax/versions/3.2.0.rb +2 -0
 - data/lib/regexp_parser/syntax/versions/3.5.0.rb +4 -0
 - data/lib/regexp_parser/syntax/versions.rb +3 -1
 - data/lib/regexp_parser/syntax.rb +3 -1
 - data/lib/regexp_parser/token.rb +2 -0
 - data/lib/regexp_parser/version.rb +3 -1
 - data/lib/regexp_parser.rb +8 -6
 - data/regexp_parser.gemspec +7 -5
 - metadata +12 -11
 - data/CHANGELOG.md +0 -691
 - data/README.md +0 -506
 
| 
         @@ -1,3 +1,5 @@ 
     | 
|
| 
      
 1 
     | 
    
         
            +
            # frozen_string_literal: true
         
     | 
| 
      
 2 
     | 
    
         
            +
             
     | 
| 
       1 
3 
     | 
    
         
             
            module Regexp::Expression
         
     | 
| 
       2 
4 
     | 
    
         
             
              module Shared
         
     | 
| 
       3 
5 
     | 
    
         
             
                module ClassMethods; end # filled in ./methods/*.rb
         
     | 
| 
         @@ -70,11 +72,12 @@ module Regexp::Expression 
     | 
|
| 
       70 
72 
     | 
    
         
             
                # lit.to_s(:original) # => 'a +' # with quantifier AND intermittent decorations
         
     | 
| 
       71 
73 
     | 
    
         
             
                #
         
     | 
| 
       72 
74 
     | 
    
         
             
                def to_s(format = :full)
         
     | 
| 
       73 
     | 
    
         
            -
                  base =  
     | 
| 
      
 75 
     | 
    
         
            +
                  base = ''.dup
         
     | 
| 
      
 76 
     | 
    
         
            +
                  parts.each do |part|
         
     | 
| 
       74 
77 
     | 
    
         
             
                    if part.instance_of?(String)
         
     | 
| 
       75 
     | 
    
         
            -
                       
     | 
| 
      
 78 
     | 
    
         
            +
                      base << part
         
     | 
| 
       76 
79 
     | 
    
         
             
                    elsif !part.custom_to_s_handling
         
     | 
| 
       77 
     | 
    
         
            -
                       
     | 
| 
      
 80 
     | 
    
         
            +
                      base << part.to_s(:original)
         
     | 
| 
       78 
81 
     | 
    
         
             
                    end
         
     | 
| 
       79 
82 
     | 
    
         
             
                  end
         
     | 
| 
       80 
83 
     | 
    
         
             
                  "#{base}#{pre_quantifier_decoration(format)}#{quantifier_affix(format)}"
         
     | 
| 
         @@ -1,36 +1,42 @@ 
     | 
|
| 
       1 
     | 
    
         
            -
             
     | 
| 
      
 1 
     | 
    
         
            +
            # frozen_string_literal: true
         
     | 
| 
       2 
2 
     | 
    
         | 
| 
       3 
     | 
    
         
            -
             
     | 
| 
       4 
     | 
    
         
            -
            require 'regexp_parser/expression/base'
         
     | 
| 
       5 
     | 
    
         
            -
            require 'regexp_parser/expression/quantifier'
         
     | 
| 
       6 
     | 
    
         
            -
            require 'regexp_parser/expression/subexpression'
         
     | 
| 
       7 
     | 
    
         
            -
            require 'regexp_parser/expression/sequence'
         
     | 
| 
       8 
     | 
    
         
            -
            require 'regexp_parser/expression/sequence_operation'
         
     | 
| 
      
 3 
     | 
    
         
            +
            require_relative 'error'
         
     | 
| 
       9 
4 
     | 
    
         | 
| 
       10 
     | 
    
         
            -
             
     | 
| 
       11 
     | 
    
         
            -
             
     | 
| 
       12 
     | 
    
         
            -
             
     | 
| 
       13 
     | 
    
         
            -
             
     | 
| 
       14 
     | 
    
         
            -
             
     | 
| 
       15 
     | 
    
         
            -
             
     | 
| 
       16 
     | 
    
         
            -
            require 'regexp_parser/expression/classes/character_type'
         
     | 
| 
       17 
     | 
    
         
            -
            require 'regexp_parser/expression/classes/conditional'
         
     | 
| 
       18 
     | 
    
         
            -
            require 'regexp_parser/expression/classes/escape_sequence'
         
     | 
| 
       19 
     | 
    
         
            -
            require 'regexp_parser/expression/classes/free_space'
         
     | 
| 
       20 
     | 
    
         
            -
            require 'regexp_parser/expression/classes/group'
         
     | 
| 
       21 
     | 
    
         
            -
            require 'regexp_parser/expression/classes/keep'
         
     | 
| 
       22 
     | 
    
         
            -
            require 'regexp_parser/expression/classes/literal'
         
     | 
| 
       23 
     | 
    
         
            -
            require 'regexp_parser/expression/classes/posix_class'
         
     | 
| 
       24 
     | 
    
         
            -
            require 'regexp_parser/expression/classes/root'
         
     | 
| 
       25 
     | 
    
         
            -
            require 'regexp_parser/expression/classes/unicode_property'
         
     | 
| 
      
 5 
     | 
    
         
            +
            require_relative 'expression/shared'
         
     | 
| 
      
 6 
     | 
    
         
            +
            require_relative 'expression/base'
         
     | 
| 
      
 7 
     | 
    
         
            +
            require_relative 'expression/quantifier'
         
     | 
| 
      
 8 
     | 
    
         
            +
            require_relative 'expression/subexpression'
         
     | 
| 
      
 9 
     | 
    
         
            +
            require_relative 'expression/sequence'
         
     | 
| 
      
 10 
     | 
    
         
            +
            require_relative 'expression/sequence_operation'
         
     | 
| 
       26 
11 
     | 
    
         | 
| 
       27 
     | 
    
         
            -
             
     | 
| 
       28 
     | 
    
         
            -
             
     | 
| 
       29 
     | 
    
         
            -
             
     | 
| 
       30 
     | 
    
         
            -
             
     | 
| 
       31 
     | 
    
         
            -
             
     | 
| 
       32 
     | 
    
         
            -
             
     | 
| 
       33 
     | 
    
         
            -
             
     | 
| 
       34 
     | 
    
         
            -
             
     | 
| 
       35 
     | 
    
         
            -
             
     | 
| 
       36 
     | 
    
         
            -
             
     | 
| 
      
 12 
     | 
    
         
            +
            require_relative 'expression/classes/alternation'
         
     | 
| 
      
 13 
     | 
    
         
            +
            require_relative 'expression/classes/anchor'
         
     | 
| 
      
 14 
     | 
    
         
            +
            require_relative 'expression/classes/backreference'
         
     | 
| 
      
 15 
     | 
    
         
            +
            require_relative 'expression/classes/character_set'
         
     | 
| 
      
 16 
     | 
    
         
            +
            require_relative 'expression/classes/character_set/intersection'
         
     | 
| 
      
 17 
     | 
    
         
            +
            require_relative 'expression/classes/character_set/range'
         
     | 
| 
      
 18 
     | 
    
         
            +
            require_relative 'expression/classes/character_type'
         
     | 
| 
      
 19 
     | 
    
         
            +
            require_relative 'expression/classes/conditional'
         
     | 
| 
      
 20 
     | 
    
         
            +
            require_relative 'expression/classes/escape_sequence'
         
     | 
| 
      
 21 
     | 
    
         
            +
            require_relative 'expression/classes/free_space'
         
     | 
| 
      
 22 
     | 
    
         
            +
            require_relative 'expression/classes/group'
         
     | 
| 
      
 23 
     | 
    
         
            +
            require_relative 'expression/classes/keep'
         
     | 
| 
      
 24 
     | 
    
         
            +
            require_relative 'expression/classes/literal'
         
     | 
| 
      
 25 
     | 
    
         
            +
            require_relative 'expression/classes/posix_class'
         
     | 
| 
      
 26 
     | 
    
         
            +
            require_relative 'expression/classes/root'
         
     | 
| 
      
 27 
     | 
    
         
            +
            require_relative 'expression/classes/unicode_property'
         
     | 
| 
      
 28 
     | 
    
         
            +
             
     | 
| 
      
 29 
     | 
    
         
            +
            require_relative 'expression/methods/construct'
         
     | 
| 
      
 30 
     | 
    
         
            +
            require_relative 'expression/methods/escape_sequence_char'
         
     | 
| 
      
 31 
     | 
    
         
            +
            require_relative 'expression/methods/escape_sequence_codepoint'
         
     | 
| 
      
 32 
     | 
    
         
            +
            require_relative 'expression/methods/human_name'
         
     | 
| 
      
 33 
     | 
    
         
            +
            require_relative 'expression/methods/match'
         
     | 
| 
      
 34 
     | 
    
         
            +
            require_relative 'expression/methods/match_length'
         
     | 
| 
      
 35 
     | 
    
         
            +
            require_relative 'expression/methods/negative'
         
     | 
| 
      
 36 
     | 
    
         
            +
            require_relative 'expression/methods/options'
         
     | 
| 
      
 37 
     | 
    
         
            +
            require_relative 'expression/methods/parts'
         
     | 
| 
      
 38 
     | 
    
         
            +
            require_relative 'expression/methods/printing'
         
     | 
| 
      
 39 
     | 
    
         
            +
            require_relative 'expression/methods/referenced_expressions'
         
     | 
| 
      
 40 
     | 
    
         
            +
            require_relative 'expression/methods/strfregexp'
         
     | 
| 
      
 41 
     | 
    
         
            +
            require_relative 'expression/methods/tests'
         
     | 
| 
      
 42 
     | 
    
         
            +
            require_relative 'expression/methods/traverse'
         
     | 
    
        data/lib/regexp_parser/lexer.rb
    CHANGED
    
    
    
        data/lib/regexp_parser/parser.rb
    CHANGED
    
    | 
         @@ -1,5 +1,7 @@ 
     | 
|
| 
       1 
     | 
    
         
            -
             
     | 
| 
       2 
     | 
    
         
            -
             
     | 
| 
      
 1 
     | 
    
         
            +
            # frozen_string_literal: true
         
     | 
| 
      
 2 
     | 
    
         
            +
             
     | 
| 
      
 3 
     | 
    
         
            +
            require_relative 'error'
         
     | 
| 
      
 4 
     | 
    
         
            +
            require_relative 'expression'
         
     | 
| 
       3 
5 
     | 
    
         | 
| 
       4 
6 
     | 
    
         
             
            class Regexp::Parser
         
     | 
| 
       5 
7 
     | 
    
         
             
              include Regexp::Expression
         
     | 
| 
         @@ -319,6 +321,7 @@ class Regexp::Parser 
     | 
|
| 
       319 
321 
     | 
    
         
             
                when :codepoint_list; node << EscapeSequence::CodepointList.new(token, active_opts)
         
     | 
| 
       320 
322 
     | 
    
         
             
                when :hex;            node << EscapeSequence::Hex.new(token, active_opts)
         
     | 
| 
       321 
323 
     | 
    
         
             
                when :octal;          node << EscapeSequence::Octal.new(token, active_opts)
         
     | 
| 
      
 324 
     | 
    
         
            +
                when :utf8_hex;       node << EscapeSequence::UTF8Hex.new(token, active_opts)
         
     | 
| 
       322 
325 
     | 
    
         | 
| 
       323 
326 
     | 
    
         
             
                when :control
         
     | 
| 
       324 
327 
     | 
    
         
             
                  if token.text =~ /\A(?:\\C-\\M|\\c\\M)/
         
     | 
| 
         @@ -467,6 +470,7 @@ class Regexp::Parser 
     | 
|
| 
       467 
470 
     | 
    
         
             
                when *UPTokens::Age;          node << UP::Age.new(token, active_opts)
         
     | 
| 
       468 
471 
     | 
    
         
             
                when *UPTokens::Derived;      node << UP::Derived.new(token, active_opts)
         
     | 
| 
       469 
472 
     | 
    
         
             
                when *UPTokens::Emoji;        node << UP::Emoji.new(token, active_opts)
         
     | 
| 
      
 473 
     | 
    
         
            +
                when *UPTokens::Enumerated;   node << UP::Enumerated.new(token, active_opts)
         
     | 
| 
       470 
474 
     | 
    
         
             
                when *UPTokens::Script;       node << UP::Script.new(token, active_opts)
         
     | 
| 
       471 
475 
     | 
    
         
             
                when *UPTokens::UnicodeBlock; node << UP::Block.new(token, active_opts)
         
     | 
| 
       472 
476 
     | 
    
         | 
| 
         @@ -574,21 +578,24 @@ class Regexp::Parser 
     | 
|
| 
       574 
578 
     | 
    
         
             
                options_stack.last
         
     | 
| 
       575 
579 
     | 
    
         
             
              end
         
     | 
| 
       576 
580 
     | 
    
         | 
| 
       577 
     | 
    
         
            -
              # Assigns referenced expressions to  
     | 
| 
      
 581 
     | 
    
         
            +
              # Assigns referenced expressions to referring expressions, e.g. if there is
         
     | 
| 
       578 
582 
     | 
    
         
             
              # an instance of Backreference::Number, its #referenced_expression is set to
         
     | 
| 
       579 
583 
     | 
    
         
             
              # the instance of Group::Capture that it refers to via its number.
         
     | 
| 
       580 
584 
     | 
    
         
             
              def assign_referenced_expressions
         
     | 
| 
       581 
     | 
    
         
            -
                # find all  
     | 
| 
       582 
     | 
    
         
            -
                targets = { 0 => root }
         
     | 
| 
      
 585 
     | 
    
         
            +
                # find all referenceable and referring expressions
         
     | 
| 
      
 586 
     | 
    
         
            +
                targets = { 0 => [root] }
         
     | 
| 
       583 
587 
     | 
    
         
             
                referrers = []
         
     | 
| 
       584 
588 
     | 
    
         
             
                root.each_expression do |exp|
         
     | 
| 
       585 
     | 
    
         
            -
                  exp. 
     | 
| 
       586 
     | 
    
         
            -
             
     | 
| 
      
 589 
     | 
    
         
            +
                  if exp.referential?
         
     | 
| 
      
 590 
     | 
    
         
            +
                    referrers << exp
         
     | 
| 
      
 591 
     | 
    
         
            +
                  elsif exp.is_a?(Group::Capture)
         
     | 
| 
      
 592 
     | 
    
         
            +
                    (targets[exp.identifier] ||= []) << exp
         
     | 
| 
      
 593 
     | 
    
         
            +
                  end
         
     | 
| 
       587 
594 
     | 
    
         
             
                end
         
     | 
| 
       588 
     | 
    
         
            -
                # assign  
     | 
| 
      
 595 
     | 
    
         
            +
                # assign referenced expressions to referring expressions
         
     | 
| 
       589 
596 
     | 
    
         
             
                # (in a second iteration because there might be forward references)
         
     | 
| 
       590 
597 
     | 
    
         
             
                referrers.each do |exp|
         
     | 
| 
       591 
     | 
    
         
            -
                  exp. 
     | 
| 
      
 598 
     | 
    
         
            +
                  exp.referenced_expressions = targets[exp.reference] ||
         
     | 
| 
       592 
599 
     | 
    
         
             
                    raise(ParserError, "Invalid reference #{exp.reference} at pos #{exp.ts}")
         
     | 
| 
       593 
600 
     | 
    
         
             
                end
         
     | 
| 
       594 
601 
     | 
    
         
             
              end
         
     | 
| 
         @@ -8,6 +8,8 @@ age=12.1,age=12.1 
     | 
|
| 
       8 
8 
     | 
    
         
             
            age=13.0,age=13.0
         
     | 
| 
       9 
9 
     | 
    
         
             
            age=14.0,age=14.0
         
     | 
| 
       10 
10 
     | 
    
         
             
            age=15.0,age=15.0
         
     | 
| 
      
 11 
     | 
    
         
            +
            age=15.1,age=15.1
         
     | 
| 
      
 12 
     | 
    
         
            +
            age=16.0,age=16.0
         
     | 
| 
       11 
13 
     | 
    
         
             
            age=2.0,age=2.0
         
     | 
| 
       12 
14 
     | 
    
         
             
            age=2.1,age=2.1
         
     | 
| 
       13 
15 
     | 
    
         
             
            age=3.0,age=3.0
         
     | 
| 
         @@ -102,18 +104,33 @@ extendedpictographic,extended_pictographic 
     | 
|
| 
       102 
104 
     | 
    
         
             
            extender,extender
         
     | 
| 
       103 
105 
     | 
    
         
             
            finalpunctuation,final_punctuation
         
     | 
| 
       104 
106 
     | 
    
         
             
            format,format
         
     | 
| 
      
 107 
     | 
    
         
            +
            garay,garay
         
     | 
| 
       105 
108 
     | 
    
         
             
            georgian,georgian
         
     | 
| 
       106 
109 
     | 
    
         
             
            glagolitic,glagolitic
         
     | 
| 
       107 
110 
     | 
    
         
             
            gothic,gothic
         
     | 
| 
       108 
111 
     | 
    
         
             
            grantha,grantha
         
     | 
| 
       109 
112 
     | 
    
         
             
            graph,graph
         
     | 
| 
       110 
113 
     | 
    
         
             
            graphemebase,grapheme_base
         
     | 
| 
      
 114 
     | 
    
         
            +
            graphemeclusterbreak=control,grapheme_cluster_break=control
         
     | 
| 
      
 115 
     | 
    
         
            +
            graphemeclusterbreak=cr,grapheme_cluster_break=cr
         
     | 
| 
      
 116 
     | 
    
         
            +
            graphemeclusterbreak=extend,grapheme_cluster_break=extend
         
     | 
| 
      
 117 
     | 
    
         
            +
            graphemeclusterbreak=l,grapheme_cluster_break=l
         
     | 
| 
      
 118 
     | 
    
         
            +
            graphemeclusterbreak=lf,grapheme_cluster_break=lf
         
     | 
| 
      
 119 
     | 
    
         
            +
            graphemeclusterbreak=lv,grapheme_cluster_break=lv
         
     | 
| 
      
 120 
     | 
    
         
            +
            graphemeclusterbreak=lvt,grapheme_cluster_break=lvt
         
     | 
| 
      
 121 
     | 
    
         
            +
            graphemeclusterbreak=prepend,grapheme_cluster_break=prepend
         
     | 
| 
      
 122 
     | 
    
         
            +
            graphemeclusterbreak=regionalindicator,grapheme_cluster_break=regional_indicator
         
     | 
| 
      
 123 
     | 
    
         
            +
            graphemeclusterbreak=spacingmark,grapheme_cluster_break=spacingmark
         
     | 
| 
      
 124 
     | 
    
         
            +
            graphemeclusterbreak=t,grapheme_cluster_break=t
         
     | 
| 
      
 125 
     | 
    
         
            +
            graphemeclusterbreak=v,grapheme_cluster_break=v
         
     | 
| 
      
 126 
     | 
    
         
            +
            graphemeclusterbreak=zwj,grapheme_cluster_break=zwj
         
     | 
| 
       111 
127 
     | 
    
         
             
            graphemeextend,grapheme_extend
         
     | 
| 
       112 
128 
     | 
    
         
             
            graphemelink,grapheme_link
         
     | 
| 
       113 
129 
     | 
    
         
             
            greek,greek
         
     | 
| 
       114 
130 
     | 
    
         
             
            gujarati,gujarati
         
     | 
| 
       115 
131 
     | 
    
         
             
            gunjalagondi,gunjala_gondi
         
     | 
| 
       116 
132 
     | 
    
         
             
            gurmukhi,gurmukhi
         
     | 
| 
      
 133 
     | 
    
         
            +
            gurungkhema,gurung_khema
         
     | 
| 
       117 
134 
     | 
    
         
             
            han,han
         
     | 
| 
       118 
135 
     | 
    
         
             
            hangul,hangul
         
     | 
| 
       119 
136 
     | 
    
         
             
            hanifirohingya,hanifi_rohingya
         
     | 
| 
         @@ -123,11 +140,14 @@ hebrew,hebrew 
     | 
|
| 
       123 
140 
     | 
    
         
             
            hexdigit,hex_digit
         
     | 
| 
       124 
141 
     | 
    
         
             
            hiragana,hiragana
         
     | 
| 
       125 
142 
     | 
    
         
             
            hyphen,hyphen
         
     | 
| 
      
 143 
     | 
    
         
            +
            idcompatmathcontinue,id_compat_math_continue
         
     | 
| 
      
 144 
     | 
    
         
            +
            idcompatmathstart,id_compat_math_start
         
     | 
| 
       126 
145 
     | 
    
         
             
            idcontinue,id_continue
         
     | 
| 
       127 
146 
     | 
    
         
             
            ideographic,ideographic
         
     | 
| 
       128 
147 
     | 
    
         
             
            idsbinaryoperator,ids_binary_operator
         
     | 
| 
       129 
148 
     | 
    
         
             
            idstart,id_start
         
     | 
| 
       130 
149 
     | 
    
         
             
            idstrinaryoperator,ids_trinary_operator
         
     | 
| 
      
 150 
     | 
    
         
            +
            idsunaryoperator,ids_unary_operator
         
     | 
| 
       131 
151 
     | 
    
         
             
            imperialaramaic,imperial_aramaic
         
     | 
| 
       132 
152 
     | 
    
         
             
            inadlam,in_adlam
         
     | 
| 
       133 
153 
     | 
    
         
             
            inaegeannumbers,in_aegean_numbers
         
     | 
| 
         @@ -190,6 +210,7 @@ incjkunifiedideographsextensione,in_cjk_unified_ideographs_extension_e 
     | 
|
| 
       190 
210 
     | 
    
         
             
            incjkunifiedideographsextensionf,in_cjk_unified_ideographs_extension_f
         
     | 
| 
       191 
211 
     | 
    
         
             
            incjkunifiedideographsextensiong,in_cjk_unified_ideographs_extension_g
         
     | 
| 
       192 
212 
     | 
    
         
             
            incjkunifiedideographsextensionh,in_cjk_unified_ideographs_extension_h
         
     | 
| 
      
 213 
     | 
    
         
            +
            incjkunifiedideographsextensioni,in_cjk_unified_ideographs_extension_i
         
     | 
| 
       193 
214 
     | 
    
         
             
            incombiningdiacriticalmarks,in_combining_diacritical_marks
         
     | 
| 
       194 
215 
     | 
    
         
             
            incombiningdiacriticalmarksextended,in_combining_diacritical_marks_extended
         
     | 
| 
       195 
216 
     | 
    
         
             
            incombiningdiacriticalmarksforsymbols,in_combining_diacritical_marks_for_symbols
         
     | 
| 
         @@ -223,6 +244,7 @@ induployan,in_duployan 
     | 
|
| 
       223 
244 
     | 
    
         
             
            inearlydynasticcuneiform,in_early_dynastic_cuneiform
         
     | 
| 
       224 
245 
     | 
    
         
             
            inegyptianhieroglyphformatcontrols,in_egyptian_hieroglyph_format_controls
         
     | 
| 
       225 
246 
     | 
    
         
             
            inegyptianhieroglyphs,in_egyptian_hieroglyphs
         
     | 
| 
      
 247 
     | 
    
         
            +
            inegyptianhieroglyphsextendeda,in_egyptian_hieroglyphs_extended_a
         
     | 
| 
       226 
248 
     | 
    
         
             
            inelbasan,in_elbasan
         
     | 
| 
       227 
249 
     | 
    
         
             
            inelymaic,in_elymaic
         
     | 
| 
       228 
250 
     | 
    
         
             
            inemoticons,in_emoticons
         
     | 
| 
         @@ -235,6 +257,7 @@ inethiopicextended,in_ethiopic_extended 
     | 
|
| 
       235 
257 
     | 
    
         
             
            inethiopicextendeda,in_ethiopic_extended_a
         
     | 
| 
       236 
258 
     | 
    
         
             
            inethiopicextendedb,in_ethiopic_extended_b
         
     | 
| 
       237 
259 
     | 
    
         
             
            inethiopicsupplement,in_ethiopic_supplement
         
     | 
| 
      
 260 
     | 
    
         
            +
            ingaray,in_garay
         
     | 
| 
       238 
261 
     | 
    
         
             
            ingeneralpunctuation,in_general_punctuation
         
     | 
| 
       239 
262 
     | 
    
         
             
            ingeometricshapes,in_geometric_shapes
         
     | 
| 
       240 
263 
     | 
    
         
             
            ingeometricshapesextended,in_geometric_shapes_extended
         
     | 
| 
         @@ -250,6 +273,7 @@ ingreekextended,in_greek_extended 
     | 
|
| 
       250 
273 
     | 
    
         
             
            ingujarati,in_gujarati
         
     | 
| 
       251 
274 
     | 
    
         
             
            ingunjalagondi,in_gunjala_gondi
         
     | 
| 
       252 
275 
     | 
    
         
             
            ingurmukhi,in_gurmukhi
         
     | 
| 
      
 276 
     | 
    
         
            +
            ingurungkhema,in_gurung_khema
         
     | 
| 
       253 
277 
     | 
    
         
             
            inhalfwidthandfullwidthforms,in_halfwidth_and_fullwidth_forms
         
     | 
| 
       254 
278 
     | 
    
         
             
            inhangulcompatibilityjamo,in_hangul_compatibility_jamo
         
     | 
| 
       255 
279 
     | 
    
         
             
            inhanguljamo,in_hangul_jamo
         
     | 
| 
         @@ -291,6 +315,7 @@ inkhmer,in_khmer 
     | 
|
| 
       291 
315 
     | 
    
         
             
            inkhmersymbols,in_khmer_symbols
         
     | 
| 
       292 
316 
     | 
    
         
             
            inkhojki,in_khojki
         
     | 
| 
       293 
317 
     | 
    
         
             
            inkhudawadi,in_khudawadi
         
     | 
| 
      
 318 
     | 
    
         
            +
            inkiratrai,in_kirat_rai
         
     | 
| 
       294 
319 
     | 
    
         
             
            inlao,in_lao
         
     | 
| 
       295 
320 
     | 
    
         
             
            inlatin1supplement,in_latin_1_supplement
         
     | 
| 
       296 
321 
     | 
    
         
             
            inlatinextendeda,in_latin_extended_a
         
     | 
| 
         @@ -346,6 +371,7 @@ inmusicalsymbols,in_musical_symbols 
     | 
|
| 
       346 
371 
     | 
    
         
             
            inmyanmar,in_myanmar
         
     | 
| 
       347 
372 
     | 
    
         
             
            inmyanmarextendeda,in_myanmar_extended_a
         
     | 
| 
       348 
373 
     | 
    
         
             
            inmyanmarextendedb,in_myanmar_extended_b
         
     | 
| 
      
 374 
     | 
    
         
            +
            inmyanmarextendedc,in_myanmar_extended_c
         
     | 
| 
       349 
375 
     | 
    
         
             
            innabataean,in_nabataean
         
     | 
| 
       350 
376 
     | 
    
         
             
            innagmundari,in_nag_mundari
         
     | 
| 
       351 
377 
     | 
    
         
             
            innandinagari,in_nandinagari
         
     | 
| 
         @@ -367,6 +393,7 @@ inoldsogdian,in_old_sogdian 
     | 
|
| 
       367 
393 
     | 
    
         
             
            inoldsoutharabian,in_old_south_arabian
         
     | 
| 
       368 
394 
     | 
    
         
             
            inoldturkic,in_old_turkic
         
     | 
| 
       369 
395 
     | 
    
         
             
            inolduyghur,in_old_uyghur
         
     | 
| 
      
 396 
     | 
    
         
            +
            inolonal,in_ol_onal
         
     | 
| 
       370 
397 
     | 
    
         
             
            inopticalcharacterrecognition,in_optical_character_recognition
         
     | 
| 
       371 
398 
     | 
    
         
             
            inoriya,in_oriya
         
     | 
| 
       372 
399 
     | 
    
         
             
            inornamentaldingbats,in_ornamental_dingbats
         
     | 
| 
         @@ -406,6 +433,7 @@ inspacingmodifierletters,in_spacing_modifier_letters 
     | 
|
| 
       406 
433 
     | 
    
         
             
            inspecials,in_specials
         
     | 
| 
       407 
434 
     | 
    
         
             
            insundanese,in_sundanese
         
     | 
| 
       408 
435 
     | 
    
         
             
            insundanesesupplement,in_sundanese_supplement
         
     | 
| 
      
 436 
     | 
    
         
            +
            insunuwar,in_sunuwar
         
     | 
| 
       409 
437 
     | 
    
         
             
            insuperscriptsandsubscripts,in_superscripts_and_subscripts
         
     | 
| 
       410 
438 
     | 
    
         
             
            insupplementalarrowsa,in_supplemental_arrows_a
         
     | 
| 
       411 
439 
     | 
    
         
             
            insupplementalarrowsb,in_supplemental_arrows_b
         
     | 
| 
         @@ -419,6 +447,7 @@ insuttonsignwriting,in_sutton_signwriting 
     | 
|
| 
       419 
447 
     | 
    
         
             
            insylotinagri,in_syloti_nagri
         
     | 
| 
       420 
448 
     | 
    
         
             
            insymbolsandpictographsextendeda,in_symbols_and_pictographs_extended_a
         
     | 
| 
       421 
449 
     | 
    
         
             
            insymbolsforlegacycomputing,in_symbols_for_legacy_computing
         
     | 
| 
      
 450 
     | 
    
         
            +
            insymbolsforlegacycomputingsupplement,in_symbols_for_legacy_computing_supplement
         
     | 
| 
       422 
451 
     | 
    
         
             
            insyriac,in_syriac
         
     | 
| 
       423 
452 
     | 
    
         
             
            insyriacsupplement,in_syriac_supplement
         
     | 
| 
       424 
453 
     | 
    
         
             
            intagalog,in_tagalog
         
     | 
| 
         @@ -441,8 +470,10 @@ inthai,in_thai 
     | 
|
| 
       441 
470 
     | 
    
         
             
            intibetan,in_tibetan
         
     | 
| 
       442 
471 
     | 
    
         
             
            intifinagh,in_tifinagh
         
     | 
| 
       443 
472 
     | 
    
         
             
            intirhuta,in_tirhuta
         
     | 
| 
      
 473 
     | 
    
         
            +
            intodhri,in_todhri
         
     | 
| 
       444 
474 
     | 
    
         
             
            intoto,in_toto
         
     | 
| 
       445 
475 
     | 
    
         
             
            intransportandmapsymbols,in_transport_and_map_symbols
         
     | 
| 
      
 476 
     | 
    
         
            +
            intulutigalari,in_tulu_tigalari
         
     | 
| 
       446 
477 
     | 
    
         
             
            inugaritic,in_ugaritic
         
     | 
| 
       447 
478 
     | 
    
         
             
            inunifiedcanadianaboriginalsyllabics,in_unified_canadian_aboriginal_syllabics
         
     | 
| 
       448 
479 
     | 
    
         
             
            inunifiedcanadianaboriginalsyllabicsextended,in_unified_canadian_aboriginal_syllabics_extended
         
     | 
| 
         @@ -473,6 +504,7 @@ khitansmallscript,khitan_small_script 
     | 
|
| 
       473 
504 
     | 
    
         
             
            khmer,khmer
         
     | 
| 
       474 
505 
     | 
    
         
             
            khojki,khojki
         
     | 
| 
       475 
506 
     | 
    
         
             
            khudawadi,khudawadi
         
     | 
| 
      
 507 
     | 
    
         
            +
            kiratrai,kirat_rai
         
     | 
| 
       476 
508 
     | 
    
         
             
            lao,lao
         
     | 
| 
       477 
509 
     | 
    
         
             
            latin,latin
         
     | 
| 
       478 
510 
     | 
    
         
             
            lepcha,lepcha
         
     | 
| 
         @@ -506,6 +538,7 @@ meroiticcursive,meroitic_cursive 
     | 
|
| 
       506 
538 
     | 
    
         
             
            meroitichieroglyphs,meroitic_hieroglyphs
         
     | 
| 
       507 
539 
     | 
    
         
             
            miao,miao
         
     | 
| 
       508 
540 
     | 
    
         
             
            modi,modi
         
     | 
| 
      
 541 
     | 
    
         
            +
            modifiercombiningmark,modifier_combining_mark
         
     | 
| 
       509 
542 
     | 
    
         
             
            modifierletter,modifier_letter
         
     | 
| 
       510 
543 
     | 
    
         
             
            modifiersymbol,modifier_symbol
         
     | 
| 
       511 
544 
     | 
    
         
             
            mongolian,mongolian
         
     | 
| 
         @@ -535,6 +568,7 @@ oldsogdian,old_sogdian 
     | 
|
| 
       535 
568 
     | 
    
         
             
            oldsoutharabian,old_south_arabian
         
     | 
| 
       536 
569 
     | 
    
         
             
            oldturkic,old_turkic
         
     | 
| 
       537 
570 
     | 
    
         
             
            olduyghur,old_uyghur
         
     | 
| 
      
 571 
     | 
    
         
            +
            olonal,ol_onal
         
     | 
| 
       538 
572 
     | 
    
         
             
            openpunctuation,open_punctuation
         
     | 
| 
       539 
573 
     | 
    
         
             
            oriya,oriya
         
     | 
| 
       540 
574 
     | 
    
         
             
            osage,osage
         
     | 
| 
         @@ -588,6 +622,7 @@ space,space 
     | 
|
| 
       588 
622 
     | 
    
         
             
            spaceseparator,space_separator
         
     | 
| 
       589 
623 
     | 
    
         
             
            spacingmark,spacing_mark
         
     | 
| 
       590 
624 
     | 
    
         
             
            sundanese,sundanese
         
     | 
| 
      
 625 
     | 
    
         
            +
            sunuwar,sunuwar
         
     | 
| 
       591 
626 
     | 
    
         
             
            surrogate,surrogate
         
     | 
| 
       592 
627 
     | 
    
         
             
            sylotinagri,syloti_nagri
         
     | 
| 
       593 
628 
     | 
    
         
             
            symbol,symbol
         
     | 
| 
         @@ -609,7 +644,9 @@ tibetan,tibetan 
     | 
|
| 
       609 
644 
     | 
    
         
             
            tifinagh,tifinagh
         
     | 
| 
       610 
645 
     | 
    
         
             
            tirhuta,tirhuta
         
     | 
| 
       611 
646 
     | 
    
         
             
            titlecaseletter,titlecase_letter
         
     | 
| 
      
 647 
     | 
    
         
            +
            todhri,todhri
         
     | 
| 
       612 
648 
     | 
    
         
             
            toto,toto
         
     | 
| 
      
 649 
     | 
    
         
            +
            tulutigalari,tulu_tigalari
         
     | 
| 
       613 
650 
     | 
    
         
             
            ugaritic,ugaritic
         
     | 
| 
       614 
651 
     | 
    
         
             
            unassigned,unassigned
         
     | 
| 
       615 
652 
     | 
    
         
             
            unifiedideograph,unified_ideograph
         
     | 
| 
         @@ -58,6 +58,7 @@ epres,emoji_presentation 
     | 
|
| 
       58 
58 
     | 
    
         
             
            ethi,ethiopic
         
     | 
| 
       59 
59 
     | 
    
         
             
            ext,extender
         
     | 
| 
       60 
60 
     | 
    
         
             
            extpict,extended_pictographic
         
     | 
| 
      
 61 
     | 
    
         
            +
            gara,garay
         
     | 
| 
       61 
62 
     | 
    
         
             
            geor,georgian
         
     | 
| 
       62 
63 
     | 
    
         
             
            glag,glagolitic
         
     | 
| 
       63 
64 
     | 
    
         
             
            gong,gunjala_gondi
         
     | 
| 
         @@ -69,6 +70,7 @@ grek,greek 
     | 
|
| 
       69 
70 
     | 
    
         
             
            grext,grapheme_extend
         
     | 
| 
       70 
71 
     | 
    
         
             
            grlink,grapheme_link
         
     | 
| 
       71 
72 
     | 
    
         
             
            gujr,gujarati
         
     | 
| 
      
 73 
     | 
    
         
            +
            gukh,gurung_khema
         
     | 
| 
       72 
74 
     | 
    
         
             
            guru,gurmukhi
         
     | 
| 
       73 
75 
     | 
    
         
             
            hang,hangul
         
     | 
| 
       74 
76 
     | 
    
         
             
            hani,han
         
     | 
| 
         @@ -86,6 +88,7 @@ ideo,ideographic 
     | 
|
| 
       86 
88 
     | 
    
         
             
            ids,id_start
         
     | 
| 
       87 
89 
     | 
    
         
             
            idsb,ids_binary_operator
         
     | 
| 
       88 
90 
     | 
    
         
             
            idst,ids_trinary_operator
         
     | 
| 
      
 91 
     | 
    
         
            +
            idsu,ids_unary_operator
         
     | 
| 
       89 
92 
     | 
    
         
             
            ital,old_italic
         
     | 
| 
       90 
93 
     | 
    
         
             
            java,javanese
         
     | 
| 
       91 
94 
     | 
    
         
             
            joinc,join_control
         
     | 
| 
         @@ -96,6 +99,7 @@ khmr,khmer 
     | 
|
| 
       96 
99 
     | 
    
         
             
            khoj,khojki
         
     | 
| 
       97 
100 
     | 
    
         
             
            kits,khitan_small_script
         
     | 
| 
       98 
101 
     | 
    
         
             
            knda,kannada
         
     | 
| 
      
 102 
     | 
    
         
            +
            krai,kirat_rai
         
     | 
| 
       99 
103 
     | 
    
         
             
            kthi,kaithi
         
     | 
| 
       100 
104 
     | 
    
         
             
            l,letter
         
     | 
| 
       101 
105 
     | 
    
         
             
            lana,tai_tham
         
     | 
| 
         @@ -121,6 +125,7 @@ mand,mandaic 
     | 
|
| 
       121 
125 
     | 
    
         
             
            mani,manichaean
         
     | 
| 
       122 
126 
     | 
    
         
             
            marc,marchen
         
     | 
| 
       123 
127 
     | 
    
         
             
            mc,spacing_mark
         
     | 
| 
      
 128 
     | 
    
         
            +
            mcm,modifier_combining_mark
         
     | 
| 
       124 
129 
     | 
    
         
             
            me,enclosing_mark
         
     | 
| 
       125 
130 
     | 
    
         
             
            medf,medefaidrin
         
     | 
| 
       126 
131 
     | 
    
         
             
            mend,mende_kikakui
         
     | 
| 
         @@ -153,6 +158,7 @@ oids,other_id_start 
     | 
|
| 
       153 
158 
     | 
    
         
             
            olck,ol_chiki
         
     | 
| 
       154 
159 
     | 
    
         
             
            olower,other_lowercase
         
     | 
| 
       155 
160 
     | 
    
         
             
            omath,other_math
         
     | 
| 
      
 161 
     | 
    
         
            +
            onao,ol_onal
         
     | 
| 
       156 
162 
     | 
    
         
             
            orkh,old_turkic
         
     | 
| 
       157 
163 
     | 
    
         
             
            orya,oriya
         
     | 
| 
       158 
164 
     | 
    
         
             
            osge,osage
         
     | 
| 
         @@ -207,6 +213,7 @@ sora,sora_sompeng 
     | 
|
| 
       207 
213 
     | 
    
         
             
            soyo,soyombo
         
     | 
| 
       208 
214 
     | 
    
         
             
            sterm,sentence_terminal
         
     | 
| 
       209 
215 
     | 
    
         
             
            sund,sundanese
         
     | 
| 
      
 216 
     | 
    
         
            +
            sunu,sunuwar
         
     | 
| 
       210 
217 
     | 
    
         
             
            sylo,syloti_nagri
         
     | 
| 
       211 
218 
     | 
    
         
             
            syrc,syriac
         
     | 
| 
       212 
219 
     | 
    
         
             
            tagb,tagbanwa
         
     | 
| 
         @@ -224,6 +231,8 @@ thaa,thaana 
     | 
|
| 
       224 
231 
     | 
    
         
             
            tibt,tibetan
         
     | 
| 
       225 
232 
     | 
    
         
             
            tirh,tirhuta
         
     | 
| 
       226 
233 
     | 
    
         
             
            tnsa,tangsa
         
     | 
| 
      
 234 
     | 
    
         
            +
            todr,todhri
         
     | 
| 
      
 235 
     | 
    
         
            +
            tutg,tulu_tigalari
         
     | 
| 
       227 
236 
     | 
    
         
             
            ugar,ugaritic
         
     | 
| 
       228 
237 
     | 
    
         
             
            uideo,unified_ideograph
         
     | 
| 
       229 
238 
     | 
    
         
             
            vaii,vai
         
     | 
| 
         @@ -37,7 +37,8 @@ 
     | 
|
| 
       37 
37 
     | 
    
         
             
              octal_sequence        = [0-7]{1,3};
         
     | 
| 
       38 
38 
     | 
    
         | 
| 
       39 
39 
     | 
    
         
             
              hex_sequence          = 'x' . xdigit{1,2};
         
     | 
| 
       40 
     | 
    
         
            -
              hex_sequence_err      = 'x' . [^0- 
     | 
| 
      
 40 
     | 
    
         
            +
              hex_sequence_err      = 'x' . [^0-9A-Fa-f];
         
     | 
| 
      
 41 
     | 
    
         
            +
              high_hex_sequence     = 'x' . [89A-Fa-f] . xdigit . ( '\\x' . [89A-Fa-f] . xdigit )*;
         
     | 
| 
       41 
42 
     | 
    
         | 
| 
       42 
43 
     | 
    
         
             
              codepoint_single      = 'u' . xdigit{4};
         
     | 
| 
       43 
44 
     | 
    
         
             
              codepoint_list        = 'u{' . xdigit{1,6} . (space . xdigit{1,6})* . '}';
         
     | 
| 
         @@ -78,8 +79,8 @@ 
     | 
|
| 
       78 
79 
     | 
    
         
             
              # try to treat every other group head as options group, like Ruby
         
     | 
| 
       79 
80 
     | 
    
         
             
              group_options         = '?' . ( [^!#'():<=>~]+ . ':'? ) ?;
         
     | 
| 
       80 
81 
     | 
    
         | 
| 
       81 
     | 
    
         
            -
              group_name_id_ab      = ([ 
     | 
| 
       82 
     | 
    
         
            -
              group_name_id_sq      = ([^0-9\-'] 
     | 
| 
      
 82 
     | 
    
         
            +
              group_name_id_ab      = ([^!=0-9\->] | utf8_multibyte) . ([^>] | utf8_multibyte)*;
         
     | 
| 
      
 83 
     | 
    
         
            +
              group_name_id_sq      = ([^0-9\-']   | utf8_multibyte) . ([^'] | utf8_multibyte)*;
         
     | 
| 
       83 
84 
     | 
    
         
             
              group_number          = '-'? . [0-9]+;
         
     | 
| 
       84 
85 
     | 
    
         
             
              group_level           = [+\-] . [0-9]+;
         
     | 
| 
       85 
86 
     | 
    
         | 
| 
         @@ -210,7 +211,7 @@ 
     | 
|
| 
       210 
211 
     | 
    
         
             
                    type = :nonposixclass
         
     | 
| 
       211 
212 
     | 
    
         
             
                  end
         
     | 
| 
       212 
213 
     | 
    
         | 
| 
       213 
     | 
    
         
            -
                  unless  
     | 
| 
      
 214 
     | 
    
         
            +
                  unless POSIX_CLASSES[class_name]
         
     | 
| 
       214 
215 
     | 
    
         
             
                    raise ValidationError.for(:posix_class, text)
         
     | 
| 
       215 
216 
     | 
    
         
             
                  end
         
     | 
| 
       216 
217 
     | 
    
         | 
| 
         @@ -246,7 +247,7 @@ 
     | 
|
| 
       246 
247 
     | 
    
         
             
                # Treat all remaining escapes - those not supported in sets - as literal.
         
     | 
| 
       247 
248 
     | 
    
         
             
                # (This currently includes \^, \-, \&, \:, although these could potentially
         
     | 
| 
       248 
249 
     | 
    
         
             
                # be meta chars when not escaped, depending on their position in the set.)
         
     | 
| 
       249 
     | 
    
         
            -
                any > (escaped_set_alpha, 1) {
         
     | 
| 
      
 250 
     | 
    
         
            +
                (any | utf8_multibyte) > (escaped_set_alpha, 1) {
         
     | 
| 
       250 
251 
     | 
    
         
             
                  emit(:escape, :literal, copy(data, ts-1, te))
         
     | 
| 
       251 
252 
     | 
    
         
             
                  fret;
         
     | 
| 
       252 
253 
     | 
    
         
             
                };
         
     | 
| 
         @@ -256,9 +257,21 @@ 
     | 
|
| 
       256 
257 
     | 
    
         
             
              # escape sequence scanner
         
     | 
| 
       257 
258 
     | 
    
         
             
              # --------------------------------------------------------------------------
         
     | 
| 
       258 
259 
     | 
    
         
             
              escape_sequence := |*
         
     | 
| 
       259 
     | 
    
         
            -
                [1-9] {
         
     | 
| 
      
 260 
     | 
    
         
            +
                [1-9] . [0-9]* {
         
     | 
| 
       260 
261 
     | 
    
         
             
                  text = copy(data, ts-1, te)
         
     | 
| 
       261 
     | 
    
         
            -
             
     | 
| 
      
 262 
     | 
    
         
            +
             
     | 
| 
      
 263 
     | 
    
         
            +
                  # If not enough groups have been opened, there is a fallback to either an
         
     | 
| 
      
 264 
     | 
    
         
            +
                  # octal or literal interpretation for 2+ digit numerical escapes.
         
     | 
| 
      
 265 
     | 
    
         
            +
                  digits = text[1..-1]
         
     | 
| 
      
 266 
     | 
    
         
            +
                  if digits.size == 1 || digits.to_i <= capturing_group_count
         
     | 
| 
      
 267 
     | 
    
         
            +
                    emit(:backref, :number, text)
         
     | 
| 
      
 268 
     | 
    
         
            +
                  elsif digits =~ /\A[0-7]{2,}\z/
         
     | 
| 
      
 269 
     | 
    
         
            +
                    emit(:escape, :octal, text)
         
     | 
| 
      
 270 
     | 
    
         
            +
                  else
         
     | 
| 
      
 271 
     | 
    
         
            +
                    emit(:escape, :literal, text[0..1])
         
     | 
| 
      
 272 
     | 
    
         
            +
                    emit(:literal, :literal, text[2..-1])
         
     | 
| 
      
 273 
     | 
    
         
            +
                  end
         
     | 
| 
      
 274 
     | 
    
         
            +
             
     | 
| 
       262 
275 
     | 
    
         
             
                  fret;
         
     | 
| 
       263 
276 
     | 
    
         
             
                };
         
     | 
| 
       264 
277 
     | 
    
         | 
| 
         @@ -267,6 +280,13 @@ 
     | 
|
| 
       267 
280 
     | 
    
         
             
                  fret;
         
     | 
| 
       268 
281 
     | 
    
         
             
                };
         
     | 
| 
       269 
282 
     | 
    
         | 
| 
      
 283 
     | 
    
         
            +
                [8-9] . [0-9] { # special case, emits two tokens
         
     | 
| 
      
 284 
     | 
    
         
            +
                  text = copy(data, ts-1, te)
         
     | 
| 
      
 285 
     | 
    
         
            +
                  emit(:escape, :literal, text[0, 2])
         
     | 
| 
      
 286 
     | 
    
         
            +
                  emit(:literal, :literal, text[2])
         
     | 
| 
      
 287 
     | 
    
         
            +
                  fret;
         
     | 
| 
      
 288 
     | 
    
         
            +
                };
         
     | 
| 
      
 289 
     | 
    
         
            +
             
     | 
| 
       270 
290 
     | 
    
         
             
                meta_char {
         
     | 
| 
       271 
291 
     | 
    
         
             
                  case text = copy(data, ts-1, te)
         
     | 
| 
       272 
292 
     | 
    
         
             
                  when '\.';  emit(:escape, :dot,               text)
         
     | 
| 
         @@ -314,6 +334,16 @@ 
     | 
|
| 
       314 
334 
     | 
    
         
             
                  fret;
         
     | 
| 
       315 
335 
     | 
    
         
             
                };
         
     | 
| 
       316 
336 
     | 
    
         | 
| 
      
 337 
     | 
    
         
            +
                high_hex_sequence > (escaped_alpha, 5) {
         
     | 
| 
      
 338 
     | 
    
         
            +
                  text = copy(data, ts-1, te)
         
     | 
| 
      
 339 
     | 
    
         
            +
                  if regexp_encoding == Encoding::BINARY
         
     | 
| 
      
 340 
     | 
    
         
            +
                    text.split(/(?=\\)/).each { |part| emit(:escape, :hex, part) }
         
     | 
| 
      
 341 
     | 
    
         
            +
                  else
         
     | 
| 
      
 342 
     | 
    
         
            +
                    emit(:escape, :utf8_hex, text)
         
     | 
| 
      
 343 
     | 
    
         
            +
                  end
         
     | 
| 
      
 344 
     | 
    
         
            +
                  fret;
         
     | 
| 
      
 345 
     | 
    
         
            +
                };
         
     | 
| 
      
 346 
     | 
    
         
            +
             
     | 
| 
       317 
347 
     | 
    
         
             
                hex_sequence > (escaped_alpha, 5) @eof(premature_end_error) {
         
     | 
| 
       318 
348 
     | 
    
         
             
                  emit(:escape, :hex, copy(data, ts-1, te))
         
     | 
| 
       319 
349 
     | 
    
         
             
                  fret;
         
     | 
| 
         @@ -357,6 +387,7 @@ 
     | 
|
| 
       357 
387 
     | 
    
         
             
              conditional_expression := |*
         
     | 
| 
       358 
388 
     | 
    
         
             
                group_lookup . ')' {
         
     | 
| 
       359 
389 
     | 
    
         
             
                  text = copy(data, ts, te-1)
         
     | 
| 
      
 390 
     | 
    
         
            +
                  text =~ /[^0]/ or raise ValidationError.for(:backref, 'condition', 'invalid ref ID')
         
     | 
| 
       360 
391 
     | 
    
         
             
                  emit(:conditional, :condition, text)
         
     | 
| 
       361 
392 
     | 
    
         
             
                  emit(:conditional, :condition_close, ')')
         
     | 
| 
       362 
393 
     | 
    
         
             
                };
         
     | 
| 
         @@ -506,6 +537,7 @@ 
     | 
|
| 
       506 
537 
     | 
    
         
             
                };
         
     | 
| 
       507 
538 
     | 
    
         | 
| 
       508 
539 
     | 
    
         
             
                group_open @group_opened {
         
     | 
| 
      
 540 
     | 
    
         
            +
                  self.capturing_group_count = capturing_group_count + 1
         
     | 
| 
       509 
541 
     | 
    
         
             
                  text = copy(data, ts, te)
         
     | 
| 
       510 
542 
     | 
    
         
             
                  emit(:group, :capture, text)
         
     | 
| 
       511 
543 
     | 
    
         
             
                };
         
     | 
| 
         @@ -534,13 +566,13 @@ 
     | 
|
| 
       534 
566 
     | 
    
         
             
                  case text = copy(data, ts, te)
         
     | 
| 
       535 
567 
     | 
    
         
             
                  when /^\\k(.)[^0-9\-][^+\-]*['>]$/
         
     | 
| 
       536 
568 
     | 
    
         
             
                    emit(:backref, $1 == '<' ? :name_ref_ab : :name_ref_sq, text)
         
     | 
| 
       537 
     | 
    
         
            -
                  when /^\\k(.)[1-9]\d*['>]$/
         
     | 
| 
      
 569 
     | 
    
         
            +
                  when /^\\k(.)0*[1-9]\d*['>]$/
         
     | 
| 
       538 
570 
     | 
    
         
             
                    emit(:backref, $1 == '<' ? :number_ref_ab : :number_ref_sq, text)
         
     | 
| 
       539 
     | 
    
         
            -
                  when /^\\k(.)-[1-9]\d*['>]$/
         
     | 
| 
      
 571 
     | 
    
         
            +
                  when /^\\k(.)-0*[1-9]\d*['>]$/
         
     | 
| 
       540 
572 
     | 
    
         
             
                    emit(:backref, $1 == '<' ? :number_rel_ref_ab : :number_rel_ref_sq, text)
         
     | 
| 
       541 
573 
     | 
    
         
             
                  when /^\\k(.)[^0-9\-].*[+\-]\d+['>]$/
         
     | 
| 
       542 
574 
     | 
    
         
             
                    emit(:backref, $1 == '<' ? :name_recursion_ref_ab : :name_recursion_ref_sq, text)
         
     | 
| 
       543 
     | 
    
         
            -
                  when /^\\k(.)-?[1-9]\d*[+\-]\d+['>]$/
         
     | 
| 
      
 575 
     | 
    
         
            +
                  when /^\\k(.)-?0*[1-9]\d*[+\-]\d+['>]$/
         
     | 
| 
       544 
576 
     | 
    
         
             
                    emit(:backref, $1 == '<' ? :number_recursion_ref_ab : :number_recursion_ref_sq, text)
         
     | 
| 
       545 
577 
     | 
    
         
             
                  else
         
     | 
| 
       546 
578 
     | 
    
         
             
                    raise ValidationError.for(:backref, 'backreference', 'invalid ref ID')
         
     | 
| 
         @@ -553,9 +585,9 @@ 
     | 
|
| 
       553 
585 
     | 
    
         
             
                  case text = copy(data, ts, te)
         
     | 
| 
       554 
586 
     | 
    
         
             
                  when /^\\g(.)[^0-9+\-].*['>]$/
         
     | 
| 
       555 
587 
     | 
    
         
             
                    emit(:backref, $1 == '<' ? :name_call_ab : :name_call_sq, text)
         
     | 
| 
       556 
     | 
    
         
            -
                  when /^\\g(.)\d 
     | 
| 
      
 588 
     | 
    
         
            +
                  when /^\\g(.)(?:0|0*[1-9]\d*)['>]$/
         
     | 
| 
       557 
589 
     | 
    
         
             
                    emit(:backref, $1 == '<' ? :number_call_ab : :number_call_sq, text)
         
     | 
| 
       558 
     | 
    
         
            -
                  when /^\\g(.)[+-]\d 
     | 
| 
      
 590 
     | 
    
         
            +
                  when /^\\g(.)[+-]0*[1-9]\d*/
         
     | 
| 
       559 
591 
     | 
    
         
             
                    emit(:backref, $1 == '<' ? :number_rel_call_ab : :number_rel_call_sq, text)
         
     | 
| 
       560 
592 
     | 
    
         
             
                  else
         
     | 
| 
       561 
593 
     | 
    
         
             
                    raise ValidationError.for(:backref, 'subexpression call', 'invalid ref ID')
         
     | 
| 
         @@ -632,9 +664,9 @@ 
     | 
|
| 
       632 
664 
     | 
    
         
             
              *|;
         
     | 
| 
       633 
665 
     | 
    
         
             
            }%%
         
     | 
| 
       634 
666 
     | 
    
         | 
| 
       635 
     | 
    
         
            -
             
     | 
| 
       636 
     | 
    
         
            -
             
     | 
| 
       637 
     | 
    
         
            -
             
     | 
| 
      
 667 
     | 
    
         
            +
            require_relative 'scanner/errors/scanner_error'
         
     | 
| 
      
 668 
     | 
    
         
            +
            require_relative 'scanner/errors/premature_end_error'
         
     | 
| 
      
 669 
     | 
    
         
            +
            require_relative 'scanner/errors/validation_error'
         
     | 
| 
       638 
670 
     | 
    
         | 
| 
       639 
671 
     | 
    
         
             
            class Regexp::Scanner
         
     | 
| 
       640 
672 
     | 
    
         
             
              # Scans the given regular expression text, or Regexp object and collects the
         
     | 
| 
         @@ -654,6 +686,7 @@ class Regexp::Scanner 
     | 
|
| 
       654 
686 
     | 
    
         | 
| 
       655 
687 
     | 
    
         
             
                input = input_object.is_a?(Regexp) ? input_object.source : input_object
         
     | 
| 
       656 
688 
     | 
    
         
             
                self.free_spacing = free_spacing?(input_object, options)
         
     | 
| 
      
 689 
     | 
    
         
            +
                self.regexp_encoding = extract_encoding(input_object, options)
         
     | 
| 
       657 
690 
     | 
    
         
             
                self.spacing_stack = [{:free_spacing => free_spacing, :depth => 0}]
         
     | 
| 
       658 
691 
     | 
    
         | 
| 
       659 
692 
     | 
    
         
             
                data  = input.unpack("c*")
         
     | 
| 
         @@ -664,6 +697,7 @@ class Regexp::Scanner 
     | 
|
| 
       664 
697 
     | 
    
         | 
| 
       665 
698 
     | 
    
         
             
                self.set_depth = 0
         
     | 
| 
       666 
699 
     | 
    
         
             
                self.group_depth = 0
         
     | 
| 
      
 700 
     | 
    
         
            +
                self.capturing_group_count = 0
         
     | 
| 
       667 
701 
     | 
    
         
             
                self.conditional_stack = []
         
     | 
| 
       668 
702 
     | 
    
         
             
                self.char_pos = 0
         
     | 
| 
       669 
703 
     | 
    
         | 
| 
         @@ -703,10 +737,11 @@ class Regexp::Scanner 
     | 
|
| 
       703 
737 
     | 
    
         
             
                File.read("#{__dir__}/scanner/properties/#{name}.csv").scan(/(.+),(.+)/).to_h
         
     | 
| 
       704 
738 
     | 
    
         
             
              end
         
     | 
| 
       705 
739 
     | 
    
         | 
| 
       706 
     | 
    
         
            -
               
     | 
| 
      
 740 
     | 
    
         
            +
              # Use each_with_object for required_ruby_version >= 2.2, or #to_h for >= 2.6
         
     | 
| 
      
 741 
     | 
    
         
            +
              POSIX_CLASSES =
         
     | 
| 
       707 
742 
     | 
    
         
             
                %w[alnum alpha ascii blank cntrl digit graph
         
     | 
| 
       708 
743 
     | 
    
         
             
                   lower print punct space upper word xdigit]
         
     | 
| 
       709 
     | 
    
         
            -
             
     | 
| 
      
 744 
     | 
    
         
            +
                  .inject({}) { |o, e| o.merge(e => true) }.freeze
         
     | 
| 
       710 
745 
     | 
    
         | 
| 
       711 
746 
     | 
    
         
             
              # Emits an array with the details of the scanned pattern
         
     | 
| 
       712 
747 
     | 
    
         
             
              def emit(type, token, text)
         
     | 
| 
         @@ -734,16 +769,25 @@ class Regexp::Scanner 
     | 
|
| 
       734 
769 
     | 
    
         
             
                end
         
     | 
| 
       735 
770 
     | 
    
         
             
              end
         
     | 
| 
       736 
771 
     | 
    
         | 
| 
       737 
     | 
    
         
            -
              attr_accessor :literal_run # only public for #||= to work on ruby <= 2.5
         
     | 
| 
      
 772 
     | 
    
         
            +
              attr_accessor :capturing_group_count, :literal_run # only public for #||= to work on ruby <= 2.5
         
     | 
| 
       738 
773 
     | 
    
         | 
| 
       739 
774 
     | 
    
         
             
              private
         
     | 
| 
       740 
775 
     | 
    
         | 
| 
       741 
776 
     | 
    
         
             
              attr_accessor :block,
         
     | 
| 
       742 
777 
     | 
    
         
             
                            :collect_tokens, :tokens, :prev_token,
         
     | 
| 
       743 
778 
     | 
    
         
             
                            :free_spacing, :spacing_stack,
         
     | 
| 
      
 779 
     | 
    
         
            +
                            :regexp_encoding,
         
     | 
| 
       744 
780 
     | 
    
         
             
                            :group_depth, :set_depth, :conditional_stack,
         
     | 
| 
       745 
781 
     | 
    
         
             
                            :char_pos
         
     | 
| 
       746 
782 
     | 
    
         | 
| 
      
 783 
     | 
    
         
            +
              def extract_encoding(input_object, options)
         
     | 
| 
      
 784 
     | 
    
         
            +
                if input_object.is_a?(::Regexp)
         
     | 
| 
      
 785 
     | 
    
         
            +
                  input_object.encoding
         
     | 
| 
      
 786 
     | 
    
         
            +
                elsif options && (options & Regexp::NOENCODING)
         
     | 
| 
      
 787 
     | 
    
         
            +
                  Encoding::BINARY
         
     | 
| 
      
 788 
     | 
    
         
            +
                end
         
     | 
| 
      
 789 
     | 
    
         
            +
              end
         
     | 
| 
      
 790 
     | 
    
         
            +
             
     | 
| 
       747 
791 
     | 
    
         
             
              def free_spacing?(input_object, options)
         
     | 
| 
       748 
792 
     | 
    
         
             
                if options && !input_object.is_a?(String)
         
     | 
| 
       749 
793 
     | 
    
         
             
                  raise ArgumentError, 'options cannot be supplied unless scanning a String'
         
     |