regexp_parser 1.7.0 → 2.8.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Gemfile +8 -2
- data/LICENSE +1 -1
- data/Rakefile +6 -70
- data/lib/regexp_parser/error.rb +4 -0
- data/lib/regexp_parser/expression/base.rb +76 -0
- data/lib/regexp_parser/expression/classes/alternation.rb +1 -1
- data/lib/regexp_parser/expression/classes/anchor.rb +0 -2
- data/lib/regexp_parser/expression/classes/{backref.rb → backreference.rb} +22 -2
- data/lib/regexp_parser/expression/classes/{set → character_set}/range.rb +4 -8
- data/lib/regexp_parser/expression/classes/{set.rb → character_set.rb} +3 -4
- data/lib/regexp_parser/expression/classes/{type.rb → character_type.rb} +0 -2
- data/lib/regexp_parser/expression/classes/conditional.rb +11 -5
- data/lib/regexp_parser/expression/classes/{escape.rb → escape_sequence.rb} +15 -7
- data/lib/regexp_parser/expression/classes/free_space.rb +5 -5
- data/lib/regexp_parser/expression/classes/group.rb +28 -15
- data/lib/regexp_parser/expression/classes/keep.rb +2 -0
- data/lib/regexp_parser/expression/classes/literal.rb +1 -5
- data/lib/regexp_parser/expression/classes/posix_class.rb +5 -1
- data/lib/regexp_parser/expression/classes/root.rb +4 -19
- data/lib/regexp_parser/expression/classes/{property.rb → unicode_property.rb} +5 -3
- data/lib/regexp_parser/expression/methods/construct.rb +41 -0
- data/lib/regexp_parser/expression/methods/human_name.rb +43 -0
- data/lib/regexp_parser/expression/methods/match_length.rb +11 -7
- data/lib/regexp_parser/expression/methods/parts.rb +23 -0
- data/lib/regexp_parser/expression/methods/printing.rb +26 -0
- data/lib/regexp_parser/expression/methods/strfregexp.rb +1 -1
- data/lib/regexp_parser/expression/methods/tests.rb +47 -1
- data/lib/regexp_parser/expression/methods/traverse.rb +34 -18
- data/lib/regexp_parser/expression/quantifier.rb +57 -17
- data/lib/regexp_parser/expression/sequence.rb +11 -47
- data/lib/regexp_parser/expression/sequence_operation.rb +4 -9
- data/lib/regexp_parser/expression/shared.rb +111 -0
- data/lib/regexp_parser/expression/subexpression.rb +27 -19
- data/lib/regexp_parser/expression.rb +14 -141
- data/lib/regexp_parser/lexer.rb +83 -41
- data/lib/regexp_parser/parser.rb +371 -429
- data/lib/regexp_parser/scanner/char_type.rl +11 -11
- data/lib/regexp_parser/scanner/errors/premature_end_error.rb +8 -0
- data/lib/regexp_parser/scanner/errors/scanner_error.rb +6 -0
- data/lib/regexp_parser/scanner/errors/validation_error.rb +63 -0
- data/lib/regexp_parser/scanner/properties/long.csv +633 -0
- data/lib/regexp_parser/scanner/properties/short.csv +248 -0
- data/lib/regexp_parser/scanner/property.rl +4 -4
- data/lib/regexp_parser/scanner/scanner.rl +303 -368
- data/lib/regexp_parser/scanner.rb +1423 -1674
- data/lib/regexp_parser/syntax/any.rb +2 -7
- data/lib/regexp_parser/syntax/base.rb +92 -67
- data/lib/regexp_parser/syntax/token/anchor.rb +15 -0
- data/lib/regexp_parser/syntax/{tokens → token}/assertion.rb +2 -2
- data/lib/regexp_parser/syntax/token/backreference.rb +33 -0
- data/lib/regexp_parser/syntax/token/character_set.rb +16 -0
- data/lib/regexp_parser/syntax/{tokens → token}/character_type.rb +3 -3
- data/lib/regexp_parser/syntax/{tokens → token}/conditional.rb +3 -3
- data/lib/regexp_parser/syntax/token/escape.rb +33 -0
- data/lib/regexp_parser/syntax/{tokens → token}/group.rb +7 -7
- data/lib/regexp_parser/syntax/{tokens → token}/keep.rb +1 -1
- data/lib/regexp_parser/syntax/token/meta.rb +20 -0
- data/lib/regexp_parser/syntax/{tokens → token}/posix_class.rb +3 -3
- data/lib/regexp_parser/syntax/token/quantifier.rb +35 -0
- data/lib/regexp_parser/syntax/token/unicode_property.rb +733 -0
- data/lib/regexp_parser/syntax/token/virtual.rb +11 -0
- data/lib/regexp_parser/syntax/token.rb +45 -0
- data/lib/regexp_parser/syntax/version_lookup.rb +19 -36
- data/lib/regexp_parser/syntax/versions/1.8.6.rb +13 -20
- data/lib/regexp_parser/syntax/versions/1.9.1.rb +10 -17
- data/lib/regexp_parser/syntax/versions/1.9.3.rb +3 -10
- data/lib/regexp_parser/syntax/versions/2.0.0.rb +8 -15
- data/lib/regexp_parser/syntax/versions/2.2.0.rb +3 -9
- data/lib/regexp_parser/syntax/versions/2.3.0.rb +3 -9
- data/lib/regexp_parser/syntax/versions/2.4.0.rb +3 -9
- data/lib/regexp_parser/syntax/versions/2.4.1.rb +2 -8
- data/lib/regexp_parser/syntax/versions/2.5.0.rb +3 -9
- data/lib/regexp_parser/syntax/versions/2.6.0.rb +3 -9
- data/lib/regexp_parser/syntax/versions/2.6.2.rb +3 -9
- data/lib/regexp_parser/syntax/versions/2.6.3.rb +3 -9
- data/lib/regexp_parser/syntax/versions/3.1.0.rb +4 -0
- data/lib/regexp_parser/syntax/versions/3.2.0.rb +4 -0
- data/lib/regexp_parser/syntax/versions.rb +3 -1
- data/lib/regexp_parser/syntax.rb +8 -6
- data/lib/regexp_parser/token.rb +9 -20
- data/lib/regexp_parser/version.rb +1 -1
- data/lib/regexp_parser.rb +0 -2
- data/regexp_parser.gemspec +19 -23
- metadata +52 -171
- data/CHANGELOG.md +0 -349
- data/README.md +0 -470
- data/lib/regexp_parser/scanner/properties/long.yml +0 -594
- data/lib/regexp_parser/scanner/properties/short.yml +0 -237
- data/lib/regexp_parser/syntax/tokens/anchor.rb +0 -15
- data/lib/regexp_parser/syntax/tokens/backref.rb +0 -24
- data/lib/regexp_parser/syntax/tokens/character_set.rb +0 -13
- data/lib/regexp_parser/syntax/tokens/escape.rb +0 -30
- data/lib/regexp_parser/syntax/tokens/meta.rb +0 -13
- data/lib/regexp_parser/syntax/tokens/quantifier.rb +0 -35
- data/lib/regexp_parser/syntax/tokens/unicode_property.rb +0 -675
- data/lib/regexp_parser/syntax/tokens.rb +0 -45
- data/spec/expression/base_spec.rb +0 -94
- data/spec/expression/clone_spec.rb +0 -120
- data/spec/expression/conditional_spec.rb +0 -89
- data/spec/expression/free_space_spec.rb +0 -27
- data/spec/expression/methods/match_length_spec.rb +0 -161
- data/spec/expression/methods/match_spec.rb +0 -25
- data/spec/expression/methods/strfregexp_spec.rb +0 -224
- data/spec/expression/methods/tests_spec.rb +0 -99
- data/spec/expression/methods/traverse_spec.rb +0 -161
- data/spec/expression/options_spec.rb +0 -128
- data/spec/expression/root_spec.rb +0 -9
- data/spec/expression/sequence_spec.rb +0 -9
- data/spec/expression/subexpression_spec.rb +0 -50
- data/spec/expression/to_h_spec.rb +0 -26
- data/spec/expression/to_s_spec.rb +0 -100
- data/spec/lexer/all_spec.rb +0 -22
- data/spec/lexer/conditionals_spec.rb +0 -53
- data/spec/lexer/escapes_spec.rb +0 -14
- data/spec/lexer/keep_spec.rb +0 -10
- data/spec/lexer/literals_spec.rb +0 -89
- data/spec/lexer/nesting_spec.rb +0 -99
- data/spec/lexer/refcalls_spec.rb +0 -55
- data/spec/parser/all_spec.rb +0 -43
- data/spec/parser/alternation_spec.rb +0 -88
- data/spec/parser/anchors_spec.rb +0 -17
- data/spec/parser/conditionals_spec.rb +0 -179
- data/spec/parser/errors_spec.rb +0 -30
- data/spec/parser/escapes_spec.rb +0 -121
- data/spec/parser/free_space_spec.rb +0 -130
- data/spec/parser/groups_spec.rb +0 -108
- data/spec/parser/keep_spec.rb +0 -6
- data/spec/parser/posix_classes_spec.rb +0 -8
- data/spec/parser/properties_spec.rb +0 -115
- data/spec/parser/quantifiers_spec.rb +0 -51
- data/spec/parser/refcalls_spec.rb +0 -112
- data/spec/parser/set/intersections_spec.rb +0 -127
- data/spec/parser/set/ranges_spec.rb +0 -111
- data/spec/parser/sets_spec.rb +0 -178
- data/spec/parser/types_spec.rb +0 -18
- data/spec/scanner/all_spec.rb +0 -18
- data/spec/scanner/anchors_spec.rb +0 -21
- data/spec/scanner/conditionals_spec.rb +0 -128
- data/spec/scanner/errors_spec.rb +0 -68
- data/spec/scanner/escapes_spec.rb +0 -53
- data/spec/scanner/free_space_spec.rb +0 -133
- data/spec/scanner/groups_spec.rb +0 -52
- data/spec/scanner/keep_spec.rb +0 -10
- data/spec/scanner/literals_spec.rb +0 -49
- data/spec/scanner/meta_spec.rb +0 -18
- data/spec/scanner/properties_spec.rb +0 -64
- data/spec/scanner/quantifiers_spec.rb +0 -20
- data/spec/scanner/refcalls_spec.rb +0 -36
- data/spec/scanner/sets_spec.rb +0 -102
- data/spec/scanner/types_spec.rb +0 -14
- data/spec/spec_helper.rb +0 -15
- data/spec/support/runner.rb +0 -42
- data/spec/support/shared_examples.rb +0 -77
- data/spec/support/warning_extractor.rb +0 -60
- data/spec/syntax/syntax_spec.rb +0 -48
- data/spec/syntax/syntax_token_map_spec.rb +0 -23
- data/spec/syntax/versions/1.8.6_spec.rb +0 -17
- data/spec/syntax/versions/1.9.1_spec.rb +0 -10
- data/spec/syntax/versions/1.9.3_spec.rb +0 -9
- data/spec/syntax/versions/2.0.0_spec.rb +0 -13
- data/spec/syntax/versions/2.2.0_spec.rb +0 -9
- data/spec/syntax/versions/aliases_spec.rb +0 -37
- data/spec/token/token_spec.rb +0 -85
- /data/lib/regexp_parser/expression/classes/{set → character_set}/intersection.rb +0 -0
| @@ -10,17 +10,17 @@ | |
| 10 10 | 
             
              # --------------------------------------------------------------------------
         | 
| 11 11 | 
             
              char_type := |*
         | 
| 12 12 | 
             
                char_type_char {
         | 
| 13 | 
            -
                  case text =  | 
| 14 | 
            -
                  when '\d'; emit(:type, :digit,      text | 
| 15 | 
            -
                  when '\D'; emit(:type, :nondigit,   text | 
| 16 | 
            -
                  when '\h'; emit(:type, :hex,        text | 
| 17 | 
            -
                  when '\H'; emit(:type, :nonhex,     text | 
| 18 | 
            -
                  when '\s'; emit(:type, :space,      text | 
| 19 | 
            -
                  when '\S'; emit(:type, :nonspace,   text | 
| 20 | 
            -
                  when '\w'; emit(:type, :word,       text | 
| 21 | 
            -
                  when '\W'; emit(:type, :nonword,    text | 
| 22 | 
            -
                  when '\R'; emit(:type, :linebreak,  text | 
| 23 | 
            -
                  when '\X'; emit(:type, :xgrapheme,  text | 
| 13 | 
            +
                  case text = copy(data, ts-1, te)
         | 
| 14 | 
            +
                  when '\d'; emit(:type, :digit,      text)
         | 
| 15 | 
            +
                  when '\D'; emit(:type, :nondigit,   text)
         | 
| 16 | 
            +
                  when '\h'; emit(:type, :hex,        text)
         | 
| 17 | 
            +
                  when '\H'; emit(:type, :nonhex,     text)
         | 
| 18 | 
            +
                  when '\s'; emit(:type, :space,      text)
         | 
| 19 | 
            +
                  when '\S'; emit(:type, :nonspace,   text)
         | 
| 20 | 
            +
                  when '\w'; emit(:type, :word,       text)
         | 
| 21 | 
            +
                  when '\W'; emit(:type, :nonword,    text)
         | 
| 22 | 
            +
                  when '\R'; emit(:type, :linebreak,  text)
         | 
| 23 | 
            +
                  when '\X'; emit(:type, :xgrapheme,  text)
         | 
| 24 24 | 
             
                  end
         | 
| 25 25 | 
             
                  fret;
         | 
| 26 26 | 
             
                };
         | 
| @@ -0,0 +1,63 @@ | |
| 1 | 
            +
            class Regexp::Scanner
         | 
| 2 | 
            +
              # Base for all scanner validation errors
         | 
| 3 | 
            +
              class ValidationError < ScannerError
         | 
| 4 | 
            +
                # Centralizes and unifies the handling of validation related errors.
         | 
| 5 | 
            +
                def self.for(type, problem, reason = nil)
         | 
| 6 | 
            +
                  types.fetch(type).new(problem, reason)
         | 
| 7 | 
            +
                end
         | 
| 8 | 
            +
             | 
| 9 | 
            +
                def self.types
         | 
| 10 | 
            +
                  @types ||= {
         | 
| 11 | 
            +
                    backref:      InvalidBackrefError,
         | 
| 12 | 
            +
                    group:        InvalidGroupError,
         | 
| 13 | 
            +
                    group_option: InvalidGroupOption,
         | 
| 14 | 
            +
                    posix_class:  UnknownPosixClassError,
         | 
| 15 | 
            +
                    property:     UnknownUnicodePropertyError,
         | 
| 16 | 
            +
                    sequence:     InvalidSequenceError,
         | 
| 17 | 
            +
                  }
         | 
| 18 | 
            +
                end
         | 
| 19 | 
            +
              end
         | 
| 20 | 
            +
             | 
| 21 | 
            +
              # Invalid sequence format. Used for escape sequences, mainly.
         | 
| 22 | 
            +
              class InvalidSequenceError < ValidationError
         | 
| 23 | 
            +
                def initialize(what = 'sequence', where = '')
         | 
| 24 | 
            +
                  super "Invalid #{what} at #{where}"
         | 
| 25 | 
            +
                end
         | 
| 26 | 
            +
              end
         | 
| 27 | 
            +
             | 
| 28 | 
            +
              # Invalid group. Used for named groups.
         | 
| 29 | 
            +
              class InvalidGroupError < ValidationError
         | 
| 30 | 
            +
                def initialize(what, reason)
         | 
| 31 | 
            +
                  super "Invalid #{what}, #{reason}."
         | 
| 32 | 
            +
                end
         | 
| 33 | 
            +
              end
         | 
| 34 | 
            +
             | 
| 35 | 
            +
              # Invalid groupOption. Used for inline options.
         | 
| 36 | 
            +
              # TODO: should become InvalidGroupOptionError in v3.0.0 for consistency
         | 
| 37 | 
            +
              class InvalidGroupOption < ValidationError
         | 
| 38 | 
            +
                def initialize(option, text)
         | 
| 39 | 
            +
                  super "Invalid group option #{option} in #{text}"
         | 
| 40 | 
            +
                end
         | 
| 41 | 
            +
              end
         | 
| 42 | 
            +
             | 
| 43 | 
            +
              # Invalid back reference. Used for name a number refs/calls.
         | 
| 44 | 
            +
              class InvalidBackrefError < ValidationError
         | 
| 45 | 
            +
                def initialize(what, reason)
         | 
| 46 | 
            +
                  super "Invalid back reference #{what}, #{reason}"
         | 
| 47 | 
            +
                end
         | 
| 48 | 
            +
              end
         | 
| 49 | 
            +
             | 
| 50 | 
            +
              # The property name was not recognized by the scanner.
         | 
| 51 | 
            +
              class UnknownUnicodePropertyError < ValidationError
         | 
| 52 | 
            +
                def initialize(name, _)
         | 
| 53 | 
            +
                  super "Unknown unicode character property name #{name}"
         | 
| 54 | 
            +
                end
         | 
| 55 | 
            +
              end
         | 
| 56 | 
            +
             | 
| 57 | 
            +
              # The POSIX class name was not recognized by the scanner.
         | 
| 58 | 
            +
              class UnknownPosixClassError < ValidationError
         | 
| 59 | 
            +
                def initialize(text, _)
         | 
| 60 | 
            +
                  super "Unknown POSIX class #{text}"
         | 
| 61 | 
            +
                end
         | 
| 62 | 
            +
              end
         | 
| 63 | 
            +
            end
         |