RubyGems - regexp_parser - Versions diffs - 0.4.5 → 0.4.6 - Mend

regexp_parser 0.4.5 → 0.4.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (25) hide show

checksums.yaml +4 -4
data/ChangeLog +8 -0
data/README.md +6 -5
data/lib/regexp_parser/expression/classes/type.rb +11 -9
data/lib/regexp_parser/parser.rb +10 -2
data/lib/regexp_parser/scanner/property.rl +1 -2
data/lib/regexp_parser/scanner/scanner.rl +6 -2
data/lib/regexp_parser/scanner.rb +1098 -969
data/lib/regexp_parser/syntax/ruby/1.9.1.rb +1 -1
data/lib/regexp_parser/syntax/ruby/2.0.0.rb +4 -0
data/lib/regexp_parser/syntax/tokens/character_set.rb +5 -2
data/lib/regexp_parser/syntax/tokens/character_type.rb +3 -1
data/lib/regexp_parser/syntax/tokens/escape.rb +2 -0
data/lib/regexp_parser/version.rb +1 -1
data/test/parser/test_escapes.rb +24 -0
data/test/parser/test_properties.rb +12 -0
data/test/parser/test_sets.rb +10 -0
data/test/parser/test_types.rb +18 -0
data/test/scanner/test_errors.rb +5 -0
data/test/scanner/test_escapes.rb +3 -0
data/test/scanner/test_properties.rb +8 -0
data/test/scanner/test_sets.rb +7 -0
data/test/scanner/test_types.rb +3 -0
data/test/syntax/ruby/test_1.9.1.rb +2 -1
metadata +3 -3

data/lib/regexp_parser/syntax/ruby/1.9.1.rb CHANGED Viewed

@@ -15,7 +15,7 @@ module Regexp::Syntax
         implements :backref, Backreference::All +
           SubexpressionCall::All
-        implements :escape, Escape::Unicode + Escape::Hex
+        implements :escape, Escape::Unicode + Escape::Hex + Escape::Octal
         implements :type, CharacterType::Hex

data/lib/regexp_parser/syntax/ruby/2.0.0.rb CHANGED Viewed

@@ -12,6 +12,10 @@ module Regexp::Syntax
         implements :conditional, Conditional::All
         implements :property,    UnicodeProperty::V200
         implements :nonproperty, UnicodeProperty::V200
+        implements :type,        CharacterType::Clustered
+        implements :set,         CharacterSet::Clustered
+        implements :subset,      CharacterSet::Clustered
       end
     end

data/lib/regexp_parser/syntax/tokens/character_set.rb CHANGED Viewed

@@ -5,11 +5,14 @@ module Regexp::Syntax
       OpenClose = [:open, :close]
       Basic     = [:negate, :member, :range]
-      Extended  = Basic + [:escape, :intersection, :range_hex, :backspace]
+      Extended  = Basic + [:escape, :intersection, :backspace,
+                           :member_hex, :range_hex]
       Types     = [:type_digit, :type_nondigit, :type_hex, :type_nonhex,
                    :type_space, :type_nonspace, :type_word, :type_nonword]
+      Clustered = [:type_linebreak, :type_xgrapheme]
       module POSIX
         Standard  = [
           :class_alnum, :class_alpha, :class_blank, :class_cntrl,
@@ -30,7 +33,7 @@ module Regexp::Syntax
         All = Standard + StandardNegative + Extensions + ExtensionsNegative
       end
-      All = Basic + Extended + Types + POSIX::All
+      All = Basic + Extended + Types + Clustered + POSIX::All
       Type = :set
       module SubSet

data/lib/regexp_parser/syntax/tokens/character_type.rb CHANGED Viewed

@@ -6,7 +6,9 @@ module Regexp::Syntax
       Extended  = [:digit, :nondigit, :space, :nonspace, :word, :nonword]
       Hex       = [:hex, :nonhex]
-      All = Basic + Extended + Hex
+      Clustered = [:linebreak, :xgrapheme]
+      All = Basic + Extended + Hex + Clustered
       Type = :type
     end

data/lib/regexp_parser/syntax/tokens/escape.rb CHANGED Viewed

@@ -23,6 +23,8 @@ module Regexp::Syntax
       Hex   = [:hex]
+      Octal = [:octal]
       All   = Basic + Backreference + ASCII + Meta
       Type  = :escape
     end

data/lib/regexp_parser/version.rb CHANGED Viewed

@@ -1,5 +1,5 @@
 class Regexp
   module Parser
-    VERSION = '0.4.5'
+    VERSION = '0.4.6'
   end
 end

data/test/parser/test_escapes.rb CHANGED Viewed

@@ -32,6 +32,9 @@ class TestParserEscapes < Test::Unit::TestCase
      # hex escapes
     /a\xFF/n =>  [1, :escape, :hex,                 EscapeSequence::Literal],
+    # octal escapes
+    /a\177/n =>  [1, :escape, :octal,               EscapeSequence::Literal],
   }
   tests.each_with_index do |(pattern, (index, type, token, klass)), count|
@@ -75,4 +78,25 @@ class TestParserEscapes < Test::Unit::TestCase
     assert_equal '\\M-\\C-X',                 root[2].text
   end
+  def test_parse_lower_c_meta_control_sequence
+    root = RP.parse(/\A\\\M-\cX/n)
+    assert_equal EscapeSequence::MetaControl, root[2].class
+    assert_equal '\\M-\\cX',                  root[2].text
+  end
+  def test_parse_escape_reverse_meta_control_sequence
+    root = RP.parse(/\A\\\C-\M-X/n)
+    assert_equal EscapeSequence::MetaControl, root[2].class
+    assert_equal '\\C-\\M-X',                 root[2].text
+  end
+  def test_parse_escape_reverse_lower_c_meta_control_sequence
+    root = RP.parse(/\A\\\c\M-X/n)
+    assert_equal EscapeSequence::MetaControl, root[2].class
+    assert_equal '\\c\\M-X',                  root[2].text
+  end
 end

data/test/parser/test_properties.rb CHANGED Viewed

@@ -318,6 +318,18 @@ class ParserProperties < Test::Unit::TestCase
     assert_equal true, t.expressions[1].negative?
   end
+  def test_parse_caret_nonproperty_negative
+    t = RP.parse 'ab\p{^L}cd', 'ruby/1.9'
+    assert_equal true, t.expressions[1].negative?
+  end
+  def test_parse_double_negated_property_negative
+    t = RP.parse 'ab\P{^L}cd', 'ruby/1.9'
+    assert_equal false, t.expressions[1].negative?
+  end
   def test_parse_property_age
     t = RP.parse 'ab\p{age=5.2}cd', 'ruby/1.9'

data/test/parser/test_sets.rb CHANGED Viewed

@@ -39,6 +39,16 @@ class TestParserSets < Test::Unit::TestCase
     assert_equal false, exp.include?(']')
   end
+  def test_parse_hex_members
+    root = RP.parse('[\x20\x24-\x26\x28]', :any)
+    exp  = root.expressions.at(0)
+    assert_equal true,  exp.include?('\x20')
+    assert_equal true,  exp.include?('\x24-\x26')
+    assert_equal true,  exp.include?('\x28')
+    assert_equal false, exp.include?(']')
+  end
   def test_parse_chat_type_set_members
     root = RP.parse('[\da-z]', :any)
     exp  = root.expressions.at(0)

data/test/parser/test_types.rb CHANGED Viewed

@@ -29,4 +29,22 @@ class TestParserTypes < Test::Unit::TestCase
     end
   end
+  tests_2_0 = {
+    'a\Rc'    => [1, :type,   :linebreak, CharacterType::Linebreak],
+    'a\Xc'    => [1, :type,   :xgrapheme, CharacterType::ExtendedGrapheme],
+  }
+  tests_2_0.each_with_index do |(pattern, (index, type, token, klass)), count|
+    define_method "test_parse_type_#{token}_#{count}" do
+      root = RP.parse(pattern, 'ruby/2.0')
+      exp  = root.expressions.at(index)
+      assert exp.is_a?( klass ),
+             "Expected #{klass}, but got #{exp.class.name}"
+      assert_equal type,  exp.type
+      assert_equal token, exp.token
+    end
+  end
 end

data/test/scanner/test_errors.rb CHANGED Viewed

@@ -72,14 +72,19 @@ class ScannerErrors < Test::Unit::TestCase
   def test_scanner_eof_in_control_sequence
     assert_raise( RS::PrematureEndError ) { RS.scan('\c') }
+    assert_raise( RS::PrematureEndError ) { RS.scan('\c\M') }
+    assert_raise( RS::PrematureEndError ) { RS.scan('\c\M-') }
     assert_raise( RS::PrematureEndError ) { RS.scan('\C') }
     assert_raise( RS::PrematureEndError ) { RS.scan('\C-') }
+    assert_raise( RS::PrematureEndError ) { RS.scan('\C-\M') }
+    assert_raise( RS::PrematureEndError ) { RS.scan('\C-\M-') }
   end
   def test_scanner_eof_in_meta_sequence
     assert_raise( RS::PrematureEndError ) { RS.scan('\M') }
     assert_raise( RS::PrematureEndError ) { RS.scan('\M-') }
     assert_raise( RS::PrematureEndError ) { RS.scan('\M-\\') }
+    assert_raise( RS::PrematureEndError ) { RS.scan('\M-\c') }
     assert_raise( RS::PrematureEndError ) { RS.scan('\M-\C') }
     assert_raise( RS::PrematureEndError ) { RS.scan('\M-\C-') }
   end

data/test/scanner/test_escapes.rb CHANGED Viewed

@@ -29,9 +29,12 @@ class ScannerEscapes < Test::Unit::TestCase
     /a\cBc/           => [1, :escape,  :control,          '\cB',            1,  4],
     /a\C-bc/          => [1, :escape,  :control,          '\C-b',           1,  5],
+    /a\c\M-Bc/n       => [1, :escape,  :control,          '\c\M-B',         1,  7],
+    /a\C-\M-Bc/n      => [1, :escape,  :control,          '\C-\M-B',        1,  8],
     /a\M-Bc/n         => [1, :escape,  :meta_sequence,    '\M-B',           1,  5],
     /a\M-\C-Bc/n      => [1, :escape,  :meta_sequence,    '\M-\C-B',        1,  8],
+    /a\M-\cBc/n       => [1, :escape,  :meta_sequence,    '\M-\cB',         1,  7],
     'ab\\\xcd'        => [1, :escape,  :backslash,        '\\\\',           2,  4],
     'ab\\\0cd'        => [1, :escape,  :backslash,        '\\\\',           2,  4],

data/test/scanner/test_properties.rb CHANGED Viewed

@@ -317,5 +317,13 @@ class ScannerProperties < Test::Unit::TestCase
       assert_equal :nonproperty, result[0]
       assert_equal token,        result[1]
     end
+    define_method "test_scan_double_negated_property_#{token}_#{count}" do
+      tokens = RS.scan("a\\P{^#{property}}c")
+      result = tokens.at(1)
+      assert_equal :property, result[0]
+      assert_equal token,     result[1]
+    end
   end
 end

data/test/scanner/test_sets.rb CHANGED Viewed

@@ -20,6 +20,8 @@ class ScannerSets < Test::Unit::TestCase
     '[<]'                   => [1, :set,    :member,          '<',          1, 2],
     '[>]'                   => [1, :set,    :member,          '>',          1, 2],
+    '[\x20]'                => [1, :set,    :member_hex,      '\x20',       1, 5],
     '[\.]'                  => [1, :set,    :escape,          '\.',         1, 3],
     '[\!]'                  => [1, :set,    :escape,          '\!',         1, 3],
     '[\#]'                  => [1, :set,    :escape,          '\#',         1, 3],
@@ -40,6 +42,9 @@ class ScannerSets < Test::Unit::TestCase
     '[\w]'                  => [1, :set,    :type_word,       '\w',         1, 3],
     '[\W]'                  => [1, :set,    :type_nonword,    '\W',         1, 3],
+    '[\R]'                  => [1, :set,    :type_linebreak,  '\R',         1, 3],
+    '[\X]'                  => [1, :set,    :type_xgrapheme,  '\X',         1, 3],
     '[a-c]'                 => [1, :set,    :range,           'a-c',        1, 4],
     '[a-c-]'                => [2, :set,    :member,          '-',          4, 6],
     '[a-c^]'                => [2, :set,    :member,          '^',          4, 5],
@@ -58,6 +63,8 @@ class ScannerSets < Test::Unit::TestCase
     '[a\p{digit}c]'         => [2, :set,    :digit,           '\p{digit}',  2, 11],
     '[a\P{digit}c]'         => [2, :set,    :digit,           '\P{digit}',  2, 11],
+    '[a\p{^digit}c]'        => [2, :set,    :digit,           '\p{^digit}', 2, 12],
+    '[a\P{^digit}c]'        => [2, :set,    :digit,           '\P{^digit}', 2, 12],
     '[a\p{ALPHA}c]'         => [2, :set,    :alpha,           '\p{ALPHA}',  2, 11],
     '[a\p{P}c]'             => [2, :set,    :punct_any,       '\p{P}',      2, 7],

data/test/scanner/test_types.rb CHANGED Viewed

@@ -14,6 +14,9 @@ class ScannerTypes < Test::Unit::TestCase
    'a\wc' => [1, :type,  :word,        '\w',  1, 3],
    'a\Wc' => [1, :type,  :nonword,     '\W',  1, 3],
+   'a\Rc' => [1, :type,  :linebreak,   '\R',  1, 3],
+   'a\Xc' => [1, :type,  :xgrapheme,   '\X',  1, 3],
   }
   tests.each do |(pattern, (index, type, token, text, ts, te))|

data/test/syntax/ruby/test_1.9.1.rb CHANGED Viewed

@@ -10,7 +10,8 @@ class TestSyntaxRuby_V191 < Test::Unit::TestCase
   tests = {
     :implements => {
       :escape => [
-        Escape::Backreference + Escape::ASCII + Escape::Meta + Escape::Unicode
+        Escape::Backreference + Escape::ASCII + Escape::Meta + Escape::Unicode +
+        Escape::Hex + Escape::Octal
       ].flatten,
       :type => [

metadata CHANGED Viewed

@@ -1,14 +1,14 @@
 --- !ruby/object:Gem::Specification
 name: regexp_parser
 version: !ruby/object:Gem::Version
-  version: 0.4.5
+  version: 0.4.6
 platform: ruby
 authors:
 - Ammar Ali
 autorequire:
 bindir: bin
 cert_chain: []
-date: 2017-09-17 00:00:00.000000000 Z
+date: 2017-09-18 00:00:00.000000000 Z
 dependencies: []
 description: A library for tokenizing, lexing, and parsing Ruby regular expressions.
 email:
@@ -197,7 +197,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
       version: '0'
 requirements: []
 rubyforge_project:
-rubygems_version: 2.6.13
+rubygems_version: 2.6.11
 signing_key:
 specification_version: 4
 summary: Scanner, lexer, parser for ruby's regular expressions