regexp_parser 0.5.0 → 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +242 -0
- data/Gemfile +1 -0
- data/README.md +21 -17
- data/Rakefile +31 -0
- data/lib/regexp_parser/expression.rb +11 -9
- data/lib/regexp_parser/expression/classes/alternation.rb +5 -28
- data/lib/regexp_parser/expression/classes/backref.rb +21 -16
- data/lib/regexp_parser/expression/classes/escape.rb +81 -10
- data/lib/regexp_parser/expression/classes/group.rb +20 -20
- data/lib/regexp_parser/expression/classes/{character_class.rb → posix_class.rb} +2 -2
- data/lib/regexp_parser/expression/classes/property.rb +6 -0
- data/lib/regexp_parser/expression/classes/set.rb +10 -93
- data/lib/regexp_parser/expression/classes/set/intersection.rb +9 -0
- data/lib/regexp_parser/expression/classes/set/range.rb +23 -0
- data/lib/regexp_parser/expression/methods/strfregexp.rb +6 -4
- data/lib/regexp_parser/expression/methods/tests.rb +4 -14
- data/lib/regexp_parser/expression/methods/traverse.rb +1 -1
- data/lib/regexp_parser/expression/quantifier.rb +3 -4
- data/lib/regexp_parser/expression/sequence_operation.rb +34 -0
- data/lib/regexp_parser/expression/subexpression.rb +6 -10
- data/lib/regexp_parser/lexer.rb +13 -17
- data/lib/regexp_parser/parser.rb +170 -116
- data/lib/regexp_parser/scanner.rb +952 -2431
- data/lib/regexp_parser/scanner/char_type.rl +31 -0
- data/lib/regexp_parser/scanner/properties/long.yml +561 -0
- data/lib/regexp_parser/scanner/properties/short.yml +225 -0
- data/lib/regexp_parser/scanner/property.rl +7 -806
- data/lib/regexp_parser/scanner/scanner.rl +112 -154
- data/lib/regexp_parser/syntax/base.rb +4 -4
- data/lib/regexp_parser/syntax/tokens.rb +1 -0
- data/lib/regexp_parser/syntax/tokens/backref.rb +2 -2
- data/lib/regexp_parser/syntax/tokens/character_set.rb +3 -38
- data/lib/regexp_parser/syntax/tokens/escape.rb +2 -3
- data/lib/regexp_parser/syntax/tokens/group.rb +5 -4
- data/lib/regexp_parser/syntax/tokens/{character_class.rb → posix_class.rb} +5 -5
- data/lib/regexp_parser/syntax/tokens/unicode_property.rb +519 -266
- data/lib/regexp_parser/syntax/versions/1.8.6.rb +2 -4
- data/lib/regexp_parser/syntax/versions/1.9.1.rb +4 -10
- data/lib/regexp_parser/syntax/versions/2.0.0.rb +0 -2
- data/lib/regexp_parser/syntax/versions/2.4.1.rb +1 -1
- data/lib/regexp_parser/version.rb +1 -1
- data/regexp_parser.gemspec +2 -1
- data/test/expression/test_base.rb +2 -1
- data/test/expression/test_clone.rb +0 -57
- data/test/expression/test_set.rb +31 -8
- data/test/expression/test_strfregexp.rb +13 -4
- data/test/expression/test_subexpression.rb +25 -0
- data/test/expression/test_traverse.rb +25 -25
- data/test/helpers.rb +1 -0
- data/test/lexer/test_all.rb +1 -1
- data/test/lexer/test_conditionals.rb +9 -7
- data/test/lexer/test_nesting.rb +39 -21
- data/test/lexer/test_refcalls.rb +4 -4
- data/test/parser/set/test_intersections.rb +127 -0
- data/test/parser/set/test_ranges.rb +111 -0
- data/test/parser/test_all.rb +4 -1
- data/test/parser/test_escapes.rb +41 -9
- data/test/parser/test_groups.rb +22 -3
- data/test/parser/test_posix_classes.rb +27 -0
- data/test/parser/test_properties.rb +17 -290
- data/test/parser/test_refcalls.rb +66 -26
- data/test/parser/test_sets.rb +132 -129
- data/test/scanner/test_all.rb +1 -7
- data/test/scanner/test_conditionals.rb +16 -16
- data/test/scanner/test_errors.rb +0 -30
- data/test/scanner/test_escapes.rb +1 -2
- data/test/scanner/test_free_space.rb +28 -28
- data/test/scanner/test_groups.rb +35 -35
- data/test/scanner/test_meta.rb +1 -1
- data/test/scanner/test_properties.rb +87 -114
- data/test/scanner/test_refcalls.rb +18 -18
- data/test/scanner/test_scripts.rb +19 -351
- data/test/scanner/test_sets.rb +87 -60
- data/test/scanner/test_unicode_blocks.rb +4 -105
- data/test/support/warning_extractor.rb +1 -1
- data/test/syntax/test_syntax.rb +7 -0
- data/test/syntax/versions/test_1.8.rb +2 -4
- metadata +17 -7
- data/ChangeLog +0 -325
- data/test/scanner/test_emojis.rb +0 -31
    
        data/test/helpers.rb
    CHANGED
    
    
    
        data/test/lexer/test_all.rb
    CHANGED
    
    
| @@ -43,15 +43,17 @@ class LexerConditionals < Test::Unit::TestCase | |
| 43 43 | 
             
                  [11, :conditional, :condition,        '(<B>)',  25, 30, 3, 0, 2],
         | 
| 44 44 |  | 
| 45 45 | 
             
                  [12, :set,         :open,             '[',      30, 31, 3, 0, 2],
         | 
| 46 | 
            -
                  [13, : | 
| 47 | 
            -
                  [14, :set,         : | 
| 46 | 
            +
                  [13, :literal,     :literal,          'e',      31, 32, 3, 1, 2],
         | 
| 47 | 
            +
                  [14, :set,         :range,            '-',      32, 33, 3, 1, 2],
         | 
| 48 | 
            +
                  [15, :literal,     :literal,          'g',      33, 34, 3, 1, 2],
         | 
| 49 | 
            +
                  [16, :set,         :close,            ']',      34, 35, 3, 0, 2],
         | 
| 48 50 |  | 
| 49 | 
            -
                  [ | 
| 50 | 
            -
                  [ | 
| 51 | 
            -
                  [ | 
| 51 | 
            +
                  [17, :conditional, :separator,        '|',      35, 36, 3, 0, 2],
         | 
| 52 | 
            +
                  [23, :conditional, :close,            ')',      41, 42, 3, 0, 1],
         | 
| 53 | 
            +
                  [25, :conditional, :close,            ')',      43, 44, 2, 0, 0],
         | 
| 52 54 |  | 
| 53 | 
            -
                  [ | 
| 54 | 
            -
                  [ | 
| 55 | 
            +
                  [26, :group,       :close,            ')',      44, 45, 1, 0, 0],
         | 
| 56 | 
            +
                  [27, :group,       :close,            ')',      45, 46, 0, 0, 0]
         | 
| 55 57 | 
             
                ].each do |index, type, token, text, ts, te, level, set_level, conditional_level|
         | 
| 56 58 | 
             
                  struct = tokens.at(index)
         | 
| 57 59 |  | 
    
        data/test/lexer/test_nesting.rb
    CHANGED
    
    | @@ -62,38 +62,56 @@ class LexerNesting < Test::Unit::TestCase | |
| 62 62 |  | 
| 63 63 | 
             
                'a[b-e]f' => {
         | 
| 64 64 | 
             
                  1     => [:set,         :open,          '[',      1,  2, 0, 0, 0],
         | 
| 65 | 
            -
                  2     => [: | 
| 66 | 
            -
                  3     => [:set,         : | 
| 65 | 
            +
                  2     => [:literal,     :literal,       'b',      2,  3, 0, 1, 0],
         | 
| 66 | 
            +
                  3     => [:set,         :range,         '-',      3,  4, 0, 1, 0],
         | 
| 67 | 
            +
                  4     => [:literal,     :literal,       'e',      4,  5, 0, 1, 0],
         | 
| 68 | 
            +
                  5     => [:set,         :close,         ']',      5,  6, 0, 0, 0],
         | 
| 67 69 | 
             
                },
         | 
| 68 70 |  | 
| 69 | 
            -
                '[ | 
| 71 | 
            +
                '[[:word:]&&[^c]z]' => {
         | 
| 70 72 | 
             
                  0     => [:set,         :open,          '[',      0,  1, 0, 0, 0],
         | 
| 71 | 
            -
                   | 
| 72 | 
            -
                   | 
| 73 | 
            -
                   | 
| 74 | 
            -
                   | 
| 75 | 
            -
                   | 
| 76 | 
            -
                   | 
| 73 | 
            +
                  1     => [:posixclass,  :word, '[:word:]',        1,  9, 0, 1, 0],
         | 
| 74 | 
            +
                  2     => [:set,         :intersection,  '&&',     9, 11, 0, 1, 0],
         | 
| 75 | 
            +
                  3     => [:set,         :open,          '[',     11, 12, 0, 1, 0],
         | 
| 76 | 
            +
                  4     => [:set,         :negate,        '^',     12, 13, 0, 2, 0],
         | 
| 77 | 
            +
                  5     => [:literal,     :literal,       'c',     13, 14, 0, 2, 0],
         | 
| 78 | 
            +
                  6     => [:set,         :close,         ']',     14, 15, 0, 1, 0],
         | 
| 79 | 
            +
                  7     => [:literal,     :literal,       'z',     15, 16, 0, 1, 0],
         | 
| 80 | 
            +
                  8     => [:set,         :close,         ']',     16, 17, 0, 0, 0],
         | 
| 81 | 
            +
                },
         | 
| 82 | 
            +
             | 
| 83 | 
            +
                '[\p{word}&&[^c]z]' => {
         | 
| 84 | 
            +
                  0     => [:set,         :open,          '[',      0,  1, 0, 0, 0],
         | 
| 85 | 
            +
                  1     => [:property,    :word, '\p{word}',        1,  9, 0, 1, 0],
         | 
| 86 | 
            +
                  2     => [:set,         :intersection,  '&&',     9, 11, 0, 1, 0],
         | 
| 87 | 
            +
                  3     => [:set,         :open,          '[',     11, 12, 0, 1, 0],
         | 
| 88 | 
            +
                  4     => [:set,         :negate,        '^',     12, 13, 0, 2, 0],
         | 
| 89 | 
            +
                  5     => [:literal,     :literal,       'c',     13, 14, 0, 2, 0],
         | 
| 90 | 
            +
                  6     => [:set,         :close,         ']',     14, 15, 0, 1, 0],
         | 
| 91 | 
            +
                  7     => [:literal,     :literal,       'z',     15, 16, 0, 1, 0],
         | 
| 92 | 
            +
                  8     => [:set,         :close,         ']',     16, 17, 0, 0, 0],
         | 
| 77 93 | 
             
                },
         | 
| 78 94 |  | 
| 79 95 | 
             
                '[a[b[c[d-g]]]]' => {
         | 
| 80 96 | 
             
                  0     => [:set,         :open,          '[',      0,  1, 0, 0, 0],
         | 
| 81 | 
            -
                  1     => [: | 
| 82 | 
            -
                  2     => [: | 
| 83 | 
            -
                  3     => [: | 
| 84 | 
            -
                  4     => [: | 
| 85 | 
            -
                  5     => [: | 
| 86 | 
            -
                  6     => [: | 
| 87 | 
            -
                  7     => [: | 
| 88 | 
            -
                  8     => [: | 
| 89 | 
            -
                  9     => [: | 
| 90 | 
            -
             | 
| 91 | 
            -
             | 
| 97 | 
            +
                  1     => [:literal,     :literal,       'a',      1,  2, 0, 1, 0],
         | 
| 98 | 
            +
                  2     => [:set,         :open,          '[',      2,  3, 0, 1, 0],
         | 
| 99 | 
            +
                  3     => [:literal,     :literal,       'b',      3,  4, 0, 2, 0],
         | 
| 100 | 
            +
                  4     => [:set,         :open,          '[',      4,  5, 0, 2, 0],
         | 
| 101 | 
            +
                  5     => [:literal,     :literal,       'c',      5,  6, 0, 3, 0],
         | 
| 102 | 
            +
                  6     => [:set,         :open,          '[',      6,  7, 0, 3, 0],
         | 
| 103 | 
            +
                  7     => [:literal,     :literal,       'd',      7,  8, 0, 4, 0],
         | 
| 104 | 
            +
                  8     => [:set,         :range,         '-',      8,  9, 0, 4, 0],
         | 
| 105 | 
            +
                  9     => [:literal,     :literal,       'g',      9, 10, 0, 4, 0],
         | 
| 106 | 
            +
                  10    => [:set,         :close,         ']',     10, 11, 0, 3, 0],
         | 
| 107 | 
            +
                  11    => [:set,         :close,         ']',     11, 12, 0, 2, 0],
         | 
| 108 | 
            +
                  12    => [:set,         :close,         ']',     12, 13, 0, 1, 0],
         | 
| 109 | 
            +
                  13    => [:set,         :close,         ']',     13, 14, 0, 0, 0],
         | 
| 92 110 | 
             
                },
         | 
| 93 111 | 
             
              }
         | 
| 94 112 |  | 
| 95 113 | 
             
              tests.each_with_index do |(pattern, checks), count|
         | 
| 96 | 
            -
                define_method " | 
| 114 | 
            +
                define_method "test_lex_nesting_in_'#{pattern}'_#{count}" do
         | 
| 97 115 | 
             
                  tokens = RL.lex(pattern, 'ruby/1.9')
         | 
| 98 116 |  | 
| 99 117 | 
             
                  checks.each do |offset, (type, token, text, ts, te, level, set_level, conditional_level)|
         | 
    
        data/test/lexer/test_refcalls.rb
    CHANGED
    
    | @@ -27,11 +27,11 @@ class LexerRefCalls < Test::Unit::TestCase | |
| 27 27 | 
             
                "(abc)\\g'-1'"    => [3, :backref, :number_rel_call,    "\\g'-1'",    5, 11, 0, 0, 0],
         | 
| 28 28 |  | 
| 29 29 | 
             
                # Group back-references, with nesting level
         | 
| 30 | 
            -
                '(?<X>abc)\k<X-0>'  => [3, :backref, : | 
| 31 | 
            -
                "(?<X>abc)\\k'X-0'" => [3, :backref, : | 
| 30 | 
            +
                '(?<X>abc)\k<X-0>'  => [3, :backref, :name_recursion_ref,    '\k<X-0>',    9, 16, 0, 0, 0],
         | 
| 31 | 
            +
                "(?<X>abc)\\k'X-0'" => [3, :backref, :name_recursion_ref,    "\\k'X-0'",   9, 16, 0, 0, 0],
         | 
| 32 32 |  | 
| 33 | 
            -
                '(abc)\k<1-0>'      => [3, :backref, : | 
| 34 | 
            -
                "(abc)\\k'1-0'"     => [3, :backref, : | 
| 33 | 
            +
                '(abc)\k<1-0>'      => [3, :backref, :number_recursion_ref,  '\k<1-0>',    5, 12, 0, 0, 0],
         | 
| 34 | 
            +
                "(abc)\\k'1-0'"     => [3, :backref, :number_recursion_ref,  "\\k'1-0'",   5, 12, 0, 0, 0],
         | 
| 35 35 | 
             
              }
         | 
| 36 36 |  | 
| 37 37 | 
             
              tests.each_with_index do |(pattern, (index, type, token, text, ts, te, level, set_level, conditional_level)), count|
         | 
| @@ -0,0 +1,127 @@ | |
| 1 | 
            +
            require File.expand_path('../../../helpers', __FILE__)
         | 
| 2 | 
            +
             | 
| 3 | 
            +
            # edge cases with `...-&&...` and `...&&-...` are checked in test_ranges.rb
         | 
| 4 | 
            +
             | 
| 5 | 
            +
            class ParserSetIntersections < Test::Unit::TestCase
         | 
| 6 | 
            +
              def test_parse_set_intersection
         | 
| 7 | 
            +
                root = RP.parse('[a&&z]')
         | 
| 8 | 
            +
                set  = root[0]
         | 
| 9 | 
            +
                ints = set[0]
         | 
| 10 | 
            +
             | 
| 11 | 
            +
                assert_equal 1, set.count
         | 
| 12 | 
            +
                assert_equal CharacterSet::Intersection, ints.class
         | 
| 13 | 
            +
                assert_equal 2, ints.count
         | 
| 14 | 
            +
             | 
| 15 | 
            +
                seq1, seq2 = ints.expressions
         | 
| 16 | 
            +
                assert_equal CharacterSet::IntersectedSequence, seq1.class
         | 
| 17 | 
            +
                assert_equal 1, seq1.count
         | 
| 18 | 
            +
                assert_equal 'a', seq1.first.to_s
         | 
| 19 | 
            +
                assert_equal Literal, seq1.first.class
         | 
| 20 | 
            +
                assert_equal CharacterSet::IntersectedSequence, seq2.class
         | 
| 21 | 
            +
                assert_equal 1, seq2.count
         | 
| 22 | 
            +
                assert_equal 'z', seq2.first.to_s
         | 
| 23 | 
            +
                assert_equal Literal, seq2.first.class
         | 
| 24 | 
            +
             | 
| 25 | 
            +
                refute       set.matches?('a')
         | 
| 26 | 
            +
                refute       set.matches?('&')
         | 
| 27 | 
            +
                refute       set.matches?('z')
         | 
| 28 | 
            +
              end
         | 
| 29 | 
            +
             | 
| 30 | 
            +
              def test_parse_set_intersection_range_and_subset
         | 
| 31 | 
            +
                root = RP.parse('[a-z&&[^a]]')
         | 
| 32 | 
            +
                set  = root[0]
         | 
| 33 | 
            +
                ints = set[0]
         | 
| 34 | 
            +
             | 
| 35 | 
            +
                assert_equal 1, set.count
         | 
| 36 | 
            +
                assert_equal CharacterSet::Intersection, ints.class
         | 
| 37 | 
            +
                assert_equal 2, ints.count
         | 
| 38 | 
            +
             | 
| 39 | 
            +
                seq1, seq2 = ints.expressions
         | 
| 40 | 
            +
                assert_equal CharacterSet::IntersectedSequence, seq1.class
         | 
| 41 | 
            +
                assert_equal 1, seq1.count
         | 
| 42 | 
            +
                assert_equal 'a-z', seq1.first.to_s
         | 
| 43 | 
            +
                assert_equal CharacterSet::Range, seq1.first.class
         | 
| 44 | 
            +
                assert_equal CharacterSet::IntersectedSequence, seq2.class
         | 
| 45 | 
            +
                assert_equal 1, seq2.count
         | 
| 46 | 
            +
                assert_equal '[^a]', seq2.first.to_s
         | 
| 47 | 
            +
                assert_equal CharacterSet, seq2.first.class
         | 
| 48 | 
            +
             | 
| 49 | 
            +
                refute       set.matches?('a')
         | 
| 50 | 
            +
                refute       set.matches?('&')
         | 
| 51 | 
            +
                assert       set.matches?('b')
         | 
| 52 | 
            +
              end
         | 
| 53 | 
            +
             | 
| 54 | 
            +
              def test_parse_set_intersection_trailing_range
         | 
| 55 | 
            +
                root = RP.parse('[a&&a-z]')
         | 
| 56 | 
            +
                set  = root[0]
         | 
| 57 | 
            +
                ints = set[0]
         | 
| 58 | 
            +
             | 
| 59 | 
            +
                assert_equal 1, set.count
         | 
| 60 | 
            +
                assert_equal CharacterSet::Intersection, ints.class
         | 
| 61 | 
            +
                assert_equal 2, ints.count
         | 
| 62 | 
            +
             | 
| 63 | 
            +
                seq1, seq2 = ints.expressions
         | 
| 64 | 
            +
                assert_equal CharacterSet::IntersectedSequence, seq1.class
         | 
| 65 | 
            +
                assert_equal 1, seq1.count
         | 
| 66 | 
            +
                assert_equal 'a', seq1.first.to_s
         | 
| 67 | 
            +
                assert_equal Literal, seq1.first.class
         | 
| 68 | 
            +
                assert_equal CharacterSet::IntersectedSequence, seq2.class
         | 
| 69 | 
            +
                assert_equal 1, seq2.count
         | 
| 70 | 
            +
                assert_equal 'a-z', seq2.first.to_s
         | 
| 71 | 
            +
                assert_equal CharacterSet::Range, seq2.first.class
         | 
| 72 | 
            +
             | 
| 73 | 
            +
                assert       set.matches?('a')
         | 
| 74 | 
            +
                refute       set.matches?('&')
         | 
| 75 | 
            +
                refute       set.matches?('b')
         | 
| 76 | 
            +
              end
         | 
| 77 | 
            +
             | 
| 78 | 
            +
              def test_parse_set_intersection_type
         | 
| 79 | 
            +
                root = RP.parse('[a&&\w]')
         | 
| 80 | 
            +
                set  = root[0]
         | 
| 81 | 
            +
                ints = set[0]
         | 
| 82 | 
            +
             | 
| 83 | 
            +
                assert_equal 1, set.count
         | 
| 84 | 
            +
                assert_equal CharacterSet::Intersection, ints.class
         | 
| 85 | 
            +
                assert_equal 2, ints.count
         | 
| 86 | 
            +
             | 
| 87 | 
            +
                seq1, seq2 = ints.expressions
         | 
| 88 | 
            +
                assert_equal CharacterSet::IntersectedSequence, seq1.class
         | 
| 89 | 
            +
                assert_equal 1, seq1.count
         | 
| 90 | 
            +
                assert_equal 'a', seq1.first.to_s
         | 
| 91 | 
            +
                assert_equal Literal, seq1.first.class
         | 
| 92 | 
            +
                assert_equal CharacterSet::IntersectedSequence, seq2.class
         | 
| 93 | 
            +
                assert_equal 1, seq2.count
         | 
| 94 | 
            +
                assert_equal '\w', seq2.first.to_s
         | 
| 95 | 
            +
                assert_equal CharacterType::Word, seq2.first.class
         | 
| 96 | 
            +
             | 
| 97 | 
            +
                assert       set.matches?('a')
         | 
| 98 | 
            +
                refute       set.matches?('&')
         | 
| 99 | 
            +
                refute       set.matches?('b')
         | 
| 100 | 
            +
              end
         | 
| 101 | 
            +
             | 
| 102 | 
            +
              def test_parse_set_intersection_multipart
         | 
| 103 | 
            +
                root = RP.parse('[\h&&\w&&efg]')
         | 
| 104 | 
            +
                set  = root[0]
         | 
| 105 | 
            +
                ints = set[0]
         | 
| 106 | 
            +
             | 
| 107 | 
            +
                assert_equal 1, set.count
         | 
| 108 | 
            +
                assert_equal CharacterSet::Intersection, ints.class
         | 
| 109 | 
            +
                assert_equal 3, ints.count
         | 
| 110 | 
            +
             | 
| 111 | 
            +
                seq1, seq2, seq3 = ints.expressions
         | 
| 112 | 
            +
                assert_equal CharacterSet::IntersectedSequence, seq1.class
         | 
| 113 | 
            +
                assert_equal 1, seq1.count
         | 
| 114 | 
            +
                assert_equal '\h', seq1.first.to_s
         | 
| 115 | 
            +
                assert_equal CharacterSet::IntersectedSequence, seq2.class
         | 
| 116 | 
            +
                assert_equal 1, seq2.count
         | 
| 117 | 
            +
                assert_equal '\w', seq2.first.to_s
         | 
| 118 | 
            +
                assert_equal CharacterSet::IntersectedSequence, seq3.class
         | 
| 119 | 
            +
                assert_equal 3, seq3.count
         | 
| 120 | 
            +
                assert_equal 'efg', seq3.to_s
         | 
| 121 | 
            +
             | 
| 122 | 
            +
                assert       set.matches?('e')
         | 
| 123 | 
            +
                assert       set.matches?('f')
         | 
| 124 | 
            +
                refute       set.matches?('a')
         | 
| 125 | 
            +
                refute       set.matches?('g')
         | 
| 126 | 
            +
              end
         | 
| 127 | 
            +
            end
         | 
| @@ -0,0 +1,111 @@ | |
| 1 | 
            +
            require File.expand_path('../../../helpers', __FILE__)
         | 
| 2 | 
            +
             | 
| 3 | 
            +
            class ParserSetRangs < Test::Unit::TestCase
         | 
| 4 | 
            +
              def test_parse_set_range
         | 
| 5 | 
            +
                root  = RP.parse('[a-z]')
         | 
| 6 | 
            +
                set   = root[0]
         | 
| 7 | 
            +
                range = set[0]
         | 
| 8 | 
            +
             | 
| 9 | 
            +
                assert_equal 1, set.count
         | 
| 10 | 
            +
                assert_equal CharacterSet::Range, range.class
         | 
| 11 | 
            +
                assert_equal 2, range.count
         | 
| 12 | 
            +
                assert_equal 'a', range.first.to_s
         | 
| 13 | 
            +
                assert_equal Literal, range.first.class
         | 
| 14 | 
            +
                assert_equal 'z', range.last.to_s
         | 
| 15 | 
            +
                assert_equal Literal, range.last.class
         | 
| 16 | 
            +
                assert       set.matches?('m')
         | 
| 17 | 
            +
              end
         | 
| 18 | 
            +
             | 
| 19 | 
            +
              def test_parse_set_range_hex
         | 
| 20 | 
            +
                root  = RP.parse('[\x00-\x99]')
         | 
| 21 | 
            +
                set   = root[0]
         | 
| 22 | 
            +
                range = set[0]
         | 
| 23 | 
            +
             | 
| 24 | 
            +
                assert_equal 1, set.count
         | 
| 25 | 
            +
                assert_equal CharacterSet::Range, range.class
         | 
| 26 | 
            +
                assert_equal 2, range.count
         | 
| 27 | 
            +
                assert_equal '\x00', range.first.to_s
         | 
| 28 | 
            +
                assert_equal EscapeSequence::Hex, range.first.class
         | 
| 29 | 
            +
                assert_equal '\x99', range.last.to_s
         | 
| 30 | 
            +
                assert_equal EscapeSequence::Hex, range.last.class
         | 
| 31 | 
            +
                assert       set.matches?('\x50')
         | 
| 32 | 
            +
              end
         | 
| 33 | 
            +
             | 
| 34 | 
            +
              def test_parse_set_range_unicode
         | 
| 35 | 
            +
                root  = RP.parse('[\u{40 42}-\u1234]')
         | 
| 36 | 
            +
                set   = root[0]
         | 
| 37 | 
            +
                range = set[0]
         | 
| 38 | 
            +
             | 
| 39 | 
            +
                assert_equal 1, set.count
         | 
| 40 | 
            +
                assert_equal CharacterSet::Range, range.class
         | 
| 41 | 
            +
                assert_equal 2, range.count
         | 
| 42 | 
            +
                assert_equal '\u{40 42}', range.first.to_s
         | 
| 43 | 
            +
                assert_equal EscapeSequence::CodepointList, range.first.class
         | 
| 44 | 
            +
                assert_equal '\u1234', range.last.to_s
         | 
| 45 | 
            +
                assert_equal EscapeSequence::Codepoint, range.last.class
         | 
| 46 | 
            +
                assert       set.matches?('\u600')
         | 
| 47 | 
            +
              end
         | 
| 48 | 
            +
             | 
| 49 | 
            +
              def test_parse_set_range_edge_case_leading_dash
         | 
| 50 | 
            +
                root  = RP.parse('[--z]')
         | 
| 51 | 
            +
                set   = root[0]
         | 
| 52 | 
            +
                range = set[0]
         | 
| 53 | 
            +
             | 
| 54 | 
            +
                assert_equal 1, set.count
         | 
| 55 | 
            +
                assert_equal 2, range.count
         | 
| 56 | 
            +
                assert       set.matches?('a')
         | 
| 57 | 
            +
              end
         | 
| 58 | 
            +
             | 
| 59 | 
            +
              def test_parse_set_range_edge_case_trailing_dash
         | 
| 60 | 
            +
                root  = RP.parse('[!--]')
         | 
| 61 | 
            +
                set   = root[0]
         | 
| 62 | 
            +
                range = set[0]
         | 
| 63 | 
            +
             | 
| 64 | 
            +
                assert_equal 1, set.count
         | 
| 65 | 
            +
                assert_equal 2, range.count
         | 
| 66 | 
            +
                assert       set.matches?('$')
         | 
| 67 | 
            +
              end
         | 
| 68 | 
            +
             | 
| 69 | 
            +
              def test_parse_set_range_edge_case_leading_negate
         | 
| 70 | 
            +
                root = RP.parse('[^-z]')
         | 
| 71 | 
            +
                set  = root[0]
         | 
| 72 | 
            +
             | 
| 73 | 
            +
                assert_equal 2, set.count
         | 
| 74 | 
            +
                assert       set.matches?('a')
         | 
| 75 | 
            +
                refute       set.matches?('z')
         | 
| 76 | 
            +
              end
         | 
| 77 | 
            +
             | 
| 78 | 
            +
              def test_parse_set_range_edge_case_trailing_negate
         | 
| 79 | 
            +
                root  = RP.parse('[!-^]')
         | 
| 80 | 
            +
                set   = root[0]
         | 
| 81 | 
            +
                range = set[0]
         | 
| 82 | 
            +
             | 
| 83 | 
            +
                assert_equal 1, set.count
         | 
| 84 | 
            +
                assert_equal 2, range.count
         | 
| 85 | 
            +
                assert       set.matches?('$')
         | 
| 86 | 
            +
              end
         | 
| 87 | 
            +
             | 
| 88 | 
            +
              def test_parse_set_range_edge_case_leading_intersection
         | 
| 89 | 
            +
                root  = RP.parse('[[\-ab]&&-bc]')
         | 
| 90 | 
            +
                set   = root[0]
         | 
| 91 | 
            +
             | 
| 92 | 
            +
                assert_equal 1, set.count
         | 
| 93 | 
            +
                assert_equal '-bc', set.first.last.to_s
         | 
| 94 | 
            +
                assert       set.matches?('-')
         | 
| 95 | 
            +
                assert       set.matches?('b')
         | 
| 96 | 
            +
                refute       set.matches?('a')
         | 
| 97 | 
            +
                refute       set.matches?('c')
         | 
| 98 | 
            +
              end
         | 
| 99 | 
            +
             | 
| 100 | 
            +
              def test_parse_set_range_edge_case_trailing_intersection
         | 
| 101 | 
            +
                root  = RP.parse('[bc-&&[\-ab]]')
         | 
| 102 | 
            +
                set   = root[0]
         | 
| 103 | 
            +
             | 
| 104 | 
            +
                assert_equal 1, set.count
         | 
| 105 | 
            +
                assert_equal 'bc-', set.first.first.to_s
         | 
| 106 | 
            +
                assert       set.matches?('-')
         | 
| 107 | 
            +
                assert       set.matches?('b')
         | 
| 108 | 
            +
                refute       set.matches?('a')
         | 
| 109 | 
            +
                refute       set.matches?('c')
         | 
| 110 | 
            +
              end
         | 
| 111 | 
            +
            end
         | 
    
        data/test/parser/test_all.rb
    CHANGED
    
    | @@ -2,11 +2,14 @@ require File.expand_path("../../helpers", __FILE__) | |
| 2 2 |  | 
| 3 3 | 
             
            %w{
         | 
| 4 4 | 
             
              alternation anchors errors escapes free_space groups
         | 
| 5 | 
            -
              properties quantifiers refcalls sets types
         | 
| 5 | 
            +
              posix_classes properties quantifiers refcalls sets types
         | 
| 6 6 | 
             
            }.each do|tc|
         | 
| 7 7 | 
             
              require File.expand_path("../test_#{tc}", __FILE__)
         | 
| 8 8 | 
             
            end
         | 
| 9 9 |  | 
| 10 | 
            +
            require File.expand_path('../set/test_ranges.rb', __FILE__)
         | 
| 11 | 
            +
            require File.expand_path('../set/test_intersections.rb', __FILE__)
         | 
| 12 | 
            +
             | 
| 10 13 | 
             
            if RUBY_VERSION >= '2.0.0'
         | 
| 11 14 | 
             
              %w{conditionals keep}.each do|tc|
         | 
| 12 15 | 
             
                require File.expand_path("../test_#{tc}", __FILE__)
         | 
    
        data/test/parser/test_escapes.rb
    CHANGED
    
    | @@ -11,10 +11,6 @@ class TestParserEscapes < Test::Unit::TestCase | |
| 11 11 | 
             
                /a\tc/    => [1, :escape,   :tab,               EscapeSequence::Tab],
         | 
| 12 12 | 
             
                /a\vc/    => [1, :escape,   :vertical_tab,      EscapeSequence::VerticalTab],
         | 
| 13 13 |  | 
| 14 | 
            -
                # special cases
         | 
| 15 | 
            -
                /a\bc/    => [1, :anchor,   :word_boundary,     Anchor::WordBoundary],
         | 
| 16 | 
            -
                /a\sc/    => [1, :type,     :space,             CharacterType::Space],
         | 
| 17 | 
            -
             | 
| 18 14 | 
             
                # meta character escapes
         | 
| 19 15 | 
             
                /a\.c/    => [1, :escape,   :dot,               EscapeSequence::Literal],
         | 
| 20 16 | 
             
                /a\?c/    => [1, :escape,   :zero_or_one,       EscapeSequence::Literal],
         | 
| @@ -27,14 +23,15 @@ class TestParserEscapes < Test::Unit::TestCase | |
| 27 23 | 
             
                /a\}c/    => [1, :escape,   :interval_close,    EscapeSequence::Literal],
         | 
| 28 24 |  | 
| 29 25 | 
             
                # unicode escapes
         | 
| 30 | 
            -
                /a\u0640/       => [1, :escape, :codepoint,      EscapeSequence:: | 
| 31 | 
            -
                /a\u{41 1F60D}/ => [1, :escape, :codepoint_list, EscapeSequence:: | 
| 26 | 
            +
                /a\u0640/       => [1, :escape, :codepoint,      EscapeSequence::Codepoint],
         | 
| 27 | 
            +
                /a\u{41 1F60D}/ => [1, :escape, :codepoint_list, EscapeSequence::CodepointList],
         | 
| 28 | 
            +
                /a\u{10FFFF}/   => [1, :escape, :codepoint_list, EscapeSequence::CodepointList],
         | 
| 32 29 |  | 
| 33 30 | 
             
                 # hex escapes
         | 
| 34 | 
            -
                /a\xFF/n =>  [1, :escape, :hex,                 EscapeSequence:: | 
| 31 | 
            +
                /a\xFF/n =>  [1, :escape, :hex,                 EscapeSequence::Hex],
         | 
| 35 32 |  | 
| 36 33 | 
             
                # octal escapes
         | 
| 37 | 
            -
                /a\177/n =>  [1, :escape, :octal,               EscapeSequence:: | 
| 34 | 
            +
                /a\177/n =>  [1, :escape, :octal,               EscapeSequence::Octal],
         | 
| 38 35 | 
             
              }
         | 
| 39 36 |  | 
| 40 37 | 
             
              tests.each_with_index do |(pattern, (index, type, token, klass)), count|
         | 
| @@ -50,11 +47,35 @@ class TestParserEscapes < Test::Unit::TestCase | |
| 50 47 | 
             
                end
         | 
| 51 48 | 
             
              end
         | 
| 52 49 |  | 
| 50 | 
            +
              def test_parse_chars_and_codepoints
         | 
| 51 | 
            +
                root = RP.parse(/\n\?\101\x42\u0043\u{44 45}/)
         | 
| 52 | 
            +
             | 
| 53 | 
            +
                assert_equal "\n",       root[0].char
         | 
| 54 | 
            +
                assert_equal 10,         root[0].codepoint
         | 
| 55 | 
            +
             | 
| 56 | 
            +
                assert_equal "?",        root[1].char
         | 
| 57 | 
            +
                assert_equal 63,         root[1].codepoint
         | 
| 58 | 
            +
             | 
| 59 | 
            +
                assert_equal "A",        root[2].char
         | 
| 60 | 
            +
                assert_equal 65,         root[2].codepoint
         | 
| 61 | 
            +
             | 
| 62 | 
            +
                assert_equal "B",        root[3].char
         | 
| 63 | 
            +
                assert_equal 66,         root[3].codepoint
         | 
| 64 | 
            +
             | 
| 65 | 
            +
                assert_equal "C",        root[4].char
         | 
| 66 | 
            +
                assert_equal 67,         root[4].codepoint
         | 
| 67 | 
            +
             | 
| 68 | 
            +
                assert_equal ["D", "E"], root[5].chars
         | 
| 69 | 
            +
                assert_equal [68, 69],   root[5].codepoints
         | 
| 70 | 
            +
              end
         | 
| 71 | 
            +
             | 
| 53 72 | 
             
              def test_parse_escape_control_sequence_lower
         | 
| 54 73 | 
             
                root = RP.parse(/a\\\c2b/)
         | 
| 55 74 |  | 
| 56 75 | 
             
                assert_equal EscapeSequence::Control, root[2].class
         | 
| 57 76 | 
             
                assert_equal '\\c2',                  root[2].text
         | 
| 77 | 
            +
                assert_equal "\u0012",                root[2].char
         | 
| 78 | 
            +
                assert_equal 18,                      root[2].codepoint
         | 
| 58 79 | 
             
              end
         | 
| 59 80 |  | 
| 60 81 | 
             
              def test_parse_escape_control_sequence_upper
         | 
| @@ -62,6 +83,8 @@ class TestParserEscapes < Test::Unit::TestCase | |
| 62 83 |  | 
| 63 84 | 
             
                assert_equal EscapeSequence::Control, root[2].class
         | 
| 64 85 | 
             
                assert_equal '\\C-C',                 root[2].text
         | 
| 86 | 
            +
                assert_equal "\u0003",                root[2].char
         | 
| 87 | 
            +
                assert_equal 3,                       root[2].codepoint
         | 
| 65 88 | 
             
              end
         | 
| 66 89 |  | 
| 67 90 | 
             
              def test_parse_escape_meta_sequence
         | 
| @@ -69,6 +92,8 @@ class TestParserEscapes < Test::Unit::TestCase | |
| 69 92 |  | 
| 70 93 | 
             
                assert_equal EscapeSequence::Meta, root[2].class
         | 
| 71 94 | 
             
                assert_equal '\\M-Z',              root[2].text
         | 
| 95 | 
            +
                assert_equal "\u00DA",             root[2].char
         | 
| 96 | 
            +
                assert_equal 218,                  root[2].codepoint
         | 
| 72 97 | 
             
              end
         | 
| 73 98 |  | 
| 74 99 | 
             
              def test_parse_escape_meta_control_sequence
         | 
| @@ -76,6 +101,8 @@ class TestParserEscapes < Test::Unit::TestCase | |
| 76 101 |  | 
| 77 102 | 
             
                assert_equal EscapeSequence::MetaControl, root[2].class
         | 
| 78 103 | 
             
                assert_equal '\\M-\\C-X',                 root[2].text
         | 
| 104 | 
            +
                assert_equal "\u0098",                    root[2].char
         | 
| 105 | 
            +
                assert_equal 152,                         root[2].codepoint
         | 
| 79 106 | 
             
              end
         | 
| 80 107 |  | 
| 81 108 | 
             
              def test_parse_lower_c_meta_control_sequence
         | 
| @@ -83,6 +110,8 @@ class TestParserEscapes < Test::Unit::TestCase | |
| 83 110 |  | 
| 84 111 | 
             
                assert_equal EscapeSequence::MetaControl, root[2].class
         | 
| 85 112 | 
             
                assert_equal '\\M-\\cX',                  root[2].text
         | 
| 113 | 
            +
                assert_equal "\u0098",                    root[2].char
         | 
| 114 | 
            +
                assert_equal 152,                         root[2].codepoint
         | 
| 86 115 | 
             
              end
         | 
| 87 116 |  | 
| 88 117 | 
             
              def test_parse_escape_reverse_meta_control_sequence
         | 
| @@ -90,6 +119,8 @@ class TestParserEscapes < Test::Unit::TestCase | |
| 90 119 |  | 
| 91 120 | 
             
                assert_equal EscapeSequence::MetaControl, root[2].class
         | 
| 92 121 | 
             
                assert_equal '\\C-\\M-X',                 root[2].text
         | 
| 122 | 
            +
                assert_equal "\u0098",                    root[2].char
         | 
| 123 | 
            +
                assert_equal 152,                         root[2].codepoint
         | 
| 93 124 | 
             
              end
         | 
| 94 125 |  | 
| 95 126 | 
             
              def test_parse_escape_reverse_lower_c_meta_control_sequence
         | 
| @@ -97,6 +128,7 @@ class TestParserEscapes < Test::Unit::TestCase | |
| 97 128 |  | 
| 98 129 | 
             
                assert_equal EscapeSequence::MetaControl, root[2].class
         | 
| 99 130 | 
             
                assert_equal '\\c\\M-X',                  root[2].text
         | 
| 131 | 
            +
                assert_equal "\u0098",                    root[2].char
         | 
| 132 | 
            +
                assert_equal 152,                         root[2].codepoint
         | 
| 100 133 | 
             
              end
         | 
| 101 | 
            -
             | 
| 102 134 | 
             
            end
         |