regexp_parser 1.7.0 → 2.9.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
 - data/Gemfile +9 -3
 - data/LICENSE +1 -1
 - data/Rakefile +6 -70
 - data/lib/regexp_parser/error.rb +4 -0
 - data/lib/regexp_parser/expression/base.rb +76 -0
 - data/lib/regexp_parser/expression/classes/alternation.rb +1 -1
 - data/lib/regexp_parser/expression/classes/anchor.rb +0 -2
 - data/lib/regexp_parser/expression/classes/{backref.rb → backreference.rb} +22 -2
 - data/lib/regexp_parser/expression/classes/{set → character_set}/range.rb +4 -8
 - data/lib/regexp_parser/expression/classes/{set.rb → character_set.rb} +4 -8
 - data/lib/regexp_parser/expression/classes/{type.rb → character_type.rb} +0 -2
 - data/lib/regexp_parser/expression/classes/conditional.rb +11 -5
 - data/lib/regexp_parser/expression/classes/{escape.rb → escape_sequence.rb} +15 -7
 - data/lib/regexp_parser/expression/classes/free_space.rb +5 -5
 - data/lib/regexp_parser/expression/classes/group.rb +28 -15
 - data/lib/regexp_parser/expression/classes/keep.rb +2 -0
 - data/lib/regexp_parser/expression/classes/literal.rb +1 -5
 - data/lib/regexp_parser/expression/classes/posix_class.rb +5 -5
 - data/lib/regexp_parser/expression/classes/root.rb +4 -19
 - data/lib/regexp_parser/expression/classes/{property.rb → unicode_property.rb} +11 -12
 - data/lib/regexp_parser/expression/methods/construct.rb +41 -0
 - data/lib/regexp_parser/expression/methods/human_name.rb +43 -0
 - data/lib/regexp_parser/expression/methods/match_length.rb +11 -7
 - data/lib/regexp_parser/expression/methods/negative.rb +20 -0
 - data/lib/regexp_parser/expression/methods/parts.rb +23 -0
 - data/lib/regexp_parser/expression/methods/printing.rb +26 -0
 - data/lib/regexp_parser/expression/methods/strfregexp.rb +1 -1
 - data/lib/regexp_parser/expression/methods/tests.rb +47 -1
 - data/lib/regexp_parser/expression/methods/traverse.rb +34 -18
 - data/lib/regexp_parser/expression/quantifier.rb +57 -17
 - data/lib/regexp_parser/expression/sequence.rb +11 -47
 - data/lib/regexp_parser/expression/sequence_operation.rb +4 -9
 - data/lib/regexp_parser/expression/shared.rb +111 -0
 - data/lib/regexp_parser/expression/subexpression.rb +27 -19
 - data/lib/regexp_parser/expression.rb +15 -141
 - data/lib/regexp_parser/lexer.rb +83 -41
 - data/lib/regexp_parser/parser.rb +372 -429
 - data/lib/regexp_parser/scanner/char_type.rl +11 -11
 - data/lib/regexp_parser/scanner/errors/premature_end_error.rb +8 -0
 - data/lib/regexp_parser/scanner/errors/scanner_error.rb +6 -0
 - data/lib/regexp_parser/scanner/errors/validation_error.rb +63 -0
 - data/lib/regexp_parser/scanner/properties/long.csv +651 -0
 - data/lib/regexp_parser/scanner/properties/short.csv +249 -0
 - data/lib/regexp_parser/scanner/property.rl +4 -4
 - data/lib/regexp_parser/scanner/scanner.rl +303 -368
 - data/lib/regexp_parser/scanner.rb +1423 -1674
 - data/lib/regexp_parser/syntax/any.rb +2 -7
 - data/lib/regexp_parser/syntax/base.rb +92 -67
 - data/lib/regexp_parser/syntax/token/anchor.rb +15 -0
 - data/lib/regexp_parser/syntax/{tokens → token}/assertion.rb +2 -2
 - data/lib/regexp_parser/syntax/token/backreference.rb +33 -0
 - data/lib/regexp_parser/syntax/token/character_set.rb +16 -0
 - data/lib/regexp_parser/syntax/{tokens → token}/character_type.rb +3 -3
 - data/lib/regexp_parser/syntax/{tokens → token}/conditional.rb +3 -3
 - data/lib/regexp_parser/syntax/token/escape.rb +33 -0
 - data/lib/regexp_parser/syntax/{tokens → token}/group.rb +7 -7
 - data/lib/regexp_parser/syntax/{tokens → token}/keep.rb +1 -1
 - data/lib/regexp_parser/syntax/token/meta.rb +20 -0
 - data/lib/regexp_parser/syntax/{tokens → token}/posix_class.rb +3 -3
 - data/lib/regexp_parser/syntax/token/quantifier.rb +35 -0
 - data/lib/regexp_parser/syntax/token/unicode_property.rb +751 -0
 - data/lib/regexp_parser/syntax/token/virtual.rb +11 -0
 - data/lib/regexp_parser/syntax/token.rb +45 -0
 - data/lib/regexp_parser/syntax/version_lookup.rb +19 -36
 - data/lib/regexp_parser/syntax/versions/1.8.6.rb +13 -20
 - data/lib/regexp_parser/syntax/versions/1.9.1.rb +10 -17
 - data/lib/regexp_parser/syntax/versions/1.9.3.rb +3 -10
 - data/lib/regexp_parser/syntax/versions/2.0.0.rb +8 -15
 - data/lib/regexp_parser/syntax/versions/2.2.0.rb +3 -9
 - data/lib/regexp_parser/syntax/versions/2.3.0.rb +3 -9
 - data/lib/regexp_parser/syntax/versions/2.4.0.rb +3 -9
 - data/lib/regexp_parser/syntax/versions/2.4.1.rb +2 -8
 - data/lib/regexp_parser/syntax/versions/2.5.0.rb +3 -9
 - data/lib/regexp_parser/syntax/versions/2.6.0.rb +3 -9
 - data/lib/regexp_parser/syntax/versions/2.6.2.rb +3 -9
 - data/lib/regexp_parser/syntax/versions/2.6.3.rb +3 -9
 - data/lib/regexp_parser/syntax/versions/3.1.0.rb +4 -0
 - data/lib/regexp_parser/syntax/versions/3.2.0.rb +4 -0
 - data/lib/regexp_parser/syntax/versions.rb +3 -1
 - data/lib/regexp_parser/syntax.rb +8 -6
 - data/lib/regexp_parser/token.rb +9 -20
 - data/lib/regexp_parser/version.rb +1 -1
 - data/lib/regexp_parser.rb +0 -2
 - data/regexp_parser.gemspec +19 -23
 - metadata +53 -171
 - data/CHANGELOG.md +0 -349
 - data/README.md +0 -470
 - data/lib/regexp_parser/scanner/properties/long.yml +0 -594
 - data/lib/regexp_parser/scanner/properties/short.yml +0 -237
 - data/lib/regexp_parser/syntax/tokens/anchor.rb +0 -15
 - data/lib/regexp_parser/syntax/tokens/backref.rb +0 -24
 - data/lib/regexp_parser/syntax/tokens/character_set.rb +0 -13
 - data/lib/regexp_parser/syntax/tokens/escape.rb +0 -30
 - data/lib/regexp_parser/syntax/tokens/meta.rb +0 -13
 - data/lib/regexp_parser/syntax/tokens/quantifier.rb +0 -35
 - data/lib/regexp_parser/syntax/tokens/unicode_property.rb +0 -675
 - data/lib/regexp_parser/syntax/tokens.rb +0 -45
 - data/spec/expression/base_spec.rb +0 -94
 - data/spec/expression/clone_spec.rb +0 -120
 - data/spec/expression/conditional_spec.rb +0 -89
 - data/spec/expression/free_space_spec.rb +0 -27
 - data/spec/expression/methods/match_length_spec.rb +0 -161
 - data/spec/expression/methods/match_spec.rb +0 -25
 - data/spec/expression/methods/strfregexp_spec.rb +0 -224
 - data/spec/expression/methods/tests_spec.rb +0 -99
 - data/spec/expression/methods/traverse_spec.rb +0 -161
 - data/spec/expression/options_spec.rb +0 -128
 - data/spec/expression/root_spec.rb +0 -9
 - data/spec/expression/sequence_spec.rb +0 -9
 - data/spec/expression/subexpression_spec.rb +0 -50
 - data/spec/expression/to_h_spec.rb +0 -26
 - data/spec/expression/to_s_spec.rb +0 -100
 - data/spec/lexer/all_spec.rb +0 -22
 - data/spec/lexer/conditionals_spec.rb +0 -53
 - data/spec/lexer/escapes_spec.rb +0 -14
 - data/spec/lexer/keep_spec.rb +0 -10
 - data/spec/lexer/literals_spec.rb +0 -89
 - data/spec/lexer/nesting_spec.rb +0 -99
 - data/spec/lexer/refcalls_spec.rb +0 -55
 - data/spec/parser/all_spec.rb +0 -43
 - data/spec/parser/alternation_spec.rb +0 -88
 - data/spec/parser/anchors_spec.rb +0 -17
 - data/spec/parser/conditionals_spec.rb +0 -179
 - data/spec/parser/errors_spec.rb +0 -30
 - data/spec/parser/escapes_spec.rb +0 -121
 - data/spec/parser/free_space_spec.rb +0 -130
 - data/spec/parser/groups_spec.rb +0 -108
 - data/spec/parser/keep_spec.rb +0 -6
 - data/spec/parser/posix_classes_spec.rb +0 -8
 - data/spec/parser/properties_spec.rb +0 -115
 - data/spec/parser/quantifiers_spec.rb +0 -51
 - data/spec/parser/refcalls_spec.rb +0 -112
 - data/spec/parser/set/intersections_spec.rb +0 -127
 - data/spec/parser/set/ranges_spec.rb +0 -111
 - data/spec/parser/sets_spec.rb +0 -178
 - data/spec/parser/types_spec.rb +0 -18
 - data/spec/scanner/all_spec.rb +0 -18
 - data/spec/scanner/anchors_spec.rb +0 -21
 - data/spec/scanner/conditionals_spec.rb +0 -128
 - data/spec/scanner/errors_spec.rb +0 -68
 - data/spec/scanner/escapes_spec.rb +0 -53
 - data/spec/scanner/free_space_spec.rb +0 -133
 - data/spec/scanner/groups_spec.rb +0 -52
 - data/spec/scanner/keep_spec.rb +0 -10
 - data/spec/scanner/literals_spec.rb +0 -49
 - data/spec/scanner/meta_spec.rb +0 -18
 - data/spec/scanner/properties_spec.rb +0 -64
 - data/spec/scanner/quantifiers_spec.rb +0 -20
 - data/spec/scanner/refcalls_spec.rb +0 -36
 - data/spec/scanner/sets_spec.rb +0 -102
 - data/spec/scanner/types_spec.rb +0 -14
 - data/spec/spec_helper.rb +0 -15
 - data/spec/support/runner.rb +0 -42
 - data/spec/support/shared_examples.rb +0 -77
 - data/spec/support/warning_extractor.rb +0 -60
 - data/spec/syntax/syntax_spec.rb +0 -48
 - data/spec/syntax/syntax_token_map_spec.rb +0 -23
 - data/spec/syntax/versions/1.8.6_spec.rb +0 -17
 - data/spec/syntax/versions/1.9.1_spec.rb +0 -10
 - data/spec/syntax/versions/1.9.3_spec.rb +0 -9
 - data/spec/syntax/versions/2.0.0_spec.rb +0 -13
 - data/spec/syntax/versions/2.2.0_spec.rb +0 -9
 - data/spec/syntax/versions/aliases_spec.rb +0 -37
 - data/spec/token/token_spec.rb +0 -85
 - /data/lib/regexp_parser/expression/classes/{set → character_set}/intersection.rb +0 -0
 
| 
         @@ -10,17 +10,17 @@ 
     | 
|
| 
       10 
10 
     | 
    
         
             
              # --------------------------------------------------------------------------
         
     | 
| 
       11 
11 
     | 
    
         
             
              char_type := |*
         
     | 
| 
       12 
12 
     | 
    
         
             
                char_type_char {
         
     | 
| 
       13 
     | 
    
         
            -
                  case text =  
     | 
| 
       14 
     | 
    
         
            -
                  when '\d'; emit(:type, :digit,      text 
     | 
| 
       15 
     | 
    
         
            -
                  when '\D'; emit(:type, :nondigit,   text 
     | 
| 
       16 
     | 
    
         
            -
                  when '\h'; emit(:type, :hex,        text 
     | 
| 
       17 
     | 
    
         
            -
                  when '\H'; emit(:type, :nonhex,     text 
     | 
| 
       18 
     | 
    
         
            -
                  when '\s'; emit(:type, :space,      text 
     | 
| 
       19 
     | 
    
         
            -
                  when '\S'; emit(:type, :nonspace,   text 
     | 
| 
       20 
     | 
    
         
            -
                  when '\w'; emit(:type, :word,       text 
     | 
| 
       21 
     | 
    
         
            -
                  when '\W'; emit(:type, :nonword,    text 
     | 
| 
       22 
     | 
    
         
            -
                  when '\R'; emit(:type, :linebreak,  text 
     | 
| 
       23 
     | 
    
         
            -
                  when '\X'; emit(:type, :xgrapheme,  text 
     | 
| 
      
 13 
     | 
    
         
            +
                  case text = copy(data, ts-1, te)
         
     | 
| 
      
 14 
     | 
    
         
            +
                  when '\d'; emit(:type, :digit,      text)
         
     | 
| 
      
 15 
     | 
    
         
            +
                  when '\D'; emit(:type, :nondigit,   text)
         
     | 
| 
      
 16 
     | 
    
         
            +
                  when '\h'; emit(:type, :hex,        text)
         
     | 
| 
      
 17 
     | 
    
         
            +
                  when '\H'; emit(:type, :nonhex,     text)
         
     | 
| 
      
 18 
     | 
    
         
            +
                  when '\s'; emit(:type, :space,      text)
         
     | 
| 
      
 19 
     | 
    
         
            +
                  when '\S'; emit(:type, :nonspace,   text)
         
     | 
| 
      
 20 
     | 
    
         
            +
                  when '\w'; emit(:type, :word,       text)
         
     | 
| 
      
 21 
     | 
    
         
            +
                  when '\W'; emit(:type, :nonword,    text)
         
     | 
| 
      
 22 
     | 
    
         
            +
                  when '\R'; emit(:type, :linebreak,  text)
         
     | 
| 
      
 23 
     | 
    
         
            +
                  when '\X'; emit(:type, :xgrapheme,  text)
         
     | 
| 
       24 
24 
     | 
    
         
             
                  end
         
     | 
| 
       25 
25 
     | 
    
         
             
                  fret;
         
     | 
| 
       26 
26 
     | 
    
         
             
                };
         
     | 
| 
         @@ -0,0 +1,63 @@ 
     | 
|
| 
      
 1 
     | 
    
         
            +
            class Regexp::Scanner
         
     | 
| 
      
 2 
     | 
    
         
            +
              # Base for all scanner validation errors
         
     | 
| 
      
 3 
     | 
    
         
            +
              class ValidationError < ScannerError
         
     | 
| 
      
 4 
     | 
    
         
            +
                # Centralizes and unifies the handling of validation related errors.
         
     | 
| 
      
 5 
     | 
    
         
            +
                def self.for(type, problem, reason = nil)
         
     | 
| 
      
 6 
     | 
    
         
            +
                  types.fetch(type).new(problem, reason)
         
     | 
| 
      
 7 
     | 
    
         
            +
                end
         
     | 
| 
      
 8 
     | 
    
         
            +
             
     | 
| 
      
 9 
     | 
    
         
            +
                def self.types
         
     | 
| 
      
 10 
     | 
    
         
            +
                  @types ||= {
         
     | 
| 
      
 11 
     | 
    
         
            +
                    backref:      InvalidBackrefError,
         
     | 
| 
      
 12 
     | 
    
         
            +
                    group:        InvalidGroupError,
         
     | 
| 
      
 13 
     | 
    
         
            +
                    group_option: InvalidGroupOption,
         
     | 
| 
      
 14 
     | 
    
         
            +
                    posix_class:  UnknownPosixClassError,
         
     | 
| 
      
 15 
     | 
    
         
            +
                    property:     UnknownUnicodePropertyError,
         
     | 
| 
      
 16 
     | 
    
         
            +
                    sequence:     InvalidSequenceError,
         
     | 
| 
      
 17 
     | 
    
         
            +
                  }
         
     | 
| 
      
 18 
     | 
    
         
            +
                end
         
     | 
| 
      
 19 
     | 
    
         
            +
              end
         
     | 
| 
      
 20 
     | 
    
         
            +
             
     | 
| 
      
 21 
     | 
    
         
            +
              # Invalid sequence format. Used for escape sequences, mainly.
         
     | 
| 
      
 22 
     | 
    
         
            +
              class InvalidSequenceError < ValidationError
         
     | 
| 
      
 23 
     | 
    
         
            +
                def initialize(what = 'sequence', where = '')
         
     | 
| 
      
 24 
     | 
    
         
            +
                  super "Invalid #{what} at #{where}"
         
     | 
| 
      
 25 
     | 
    
         
            +
                end
         
     | 
| 
      
 26 
     | 
    
         
            +
              end
         
     | 
| 
      
 27 
     | 
    
         
            +
             
     | 
| 
      
 28 
     | 
    
         
            +
              # Invalid group. Used for named groups.
         
     | 
| 
      
 29 
     | 
    
         
            +
              class InvalidGroupError < ValidationError
         
     | 
| 
      
 30 
     | 
    
         
            +
                def initialize(what, reason)
         
     | 
| 
      
 31 
     | 
    
         
            +
                  super "Invalid #{what}, #{reason}."
         
     | 
| 
      
 32 
     | 
    
         
            +
                end
         
     | 
| 
      
 33 
     | 
    
         
            +
              end
         
     | 
| 
      
 34 
     | 
    
         
            +
             
     | 
| 
      
 35 
     | 
    
         
            +
              # Invalid groupOption. Used for inline options.
         
     | 
| 
      
 36 
     | 
    
         
            +
              # TODO: should become InvalidGroupOptionError in v3.0.0 for consistency
         
     | 
| 
      
 37 
     | 
    
         
            +
              class InvalidGroupOption < ValidationError
         
     | 
| 
      
 38 
     | 
    
         
            +
                def initialize(option, text)
         
     | 
| 
      
 39 
     | 
    
         
            +
                  super "Invalid group option #{option} in #{text}"
         
     | 
| 
      
 40 
     | 
    
         
            +
                end
         
     | 
| 
      
 41 
     | 
    
         
            +
              end
         
     | 
| 
      
 42 
     | 
    
         
            +
             
     | 
| 
      
 43 
     | 
    
         
            +
              # Invalid back reference. Used for name a number refs/calls.
         
     | 
| 
      
 44 
     | 
    
         
            +
              class InvalidBackrefError < ValidationError
         
     | 
| 
      
 45 
     | 
    
         
            +
                def initialize(what, reason)
         
     | 
| 
      
 46 
     | 
    
         
            +
                  super "Invalid back reference #{what}, #{reason}"
         
     | 
| 
      
 47 
     | 
    
         
            +
                end
         
     | 
| 
      
 48 
     | 
    
         
            +
              end
         
     | 
| 
      
 49 
     | 
    
         
            +
             
     | 
| 
      
 50 
     | 
    
         
            +
              # The property name was not recognized by the scanner.
         
     | 
| 
      
 51 
     | 
    
         
            +
              class UnknownUnicodePropertyError < ValidationError
         
     | 
| 
      
 52 
     | 
    
         
            +
                def initialize(name, _)
         
     | 
| 
      
 53 
     | 
    
         
            +
                  super "Unknown unicode character property name #{name}"
         
     | 
| 
      
 54 
     | 
    
         
            +
                end
         
     | 
| 
      
 55 
     | 
    
         
            +
              end
         
     | 
| 
      
 56 
     | 
    
         
            +
             
     | 
| 
      
 57 
     | 
    
         
            +
              # The POSIX class name was not recognized by the scanner.
         
     | 
| 
      
 58 
     | 
    
         
            +
              class UnknownPosixClassError < ValidationError
         
     | 
| 
      
 59 
     | 
    
         
            +
                def initialize(text, _)
         
     | 
| 
      
 60 
     | 
    
         
            +
                  super "Unknown POSIX class #{text}"
         
     | 
| 
      
 61 
     | 
    
         
            +
                end
         
     | 
| 
      
 62 
     | 
    
         
            +
              end
         
     | 
| 
      
 63 
     | 
    
         
            +
            end
         
     |