RubyGems - regexp_parser - Versions diffs - 0.1.6 → 0.2.0 - Mend

regexp_parser 0.1.6 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (84) hide show

checksums.yaml +4 -4
data/ChangeLog +57 -0
data/Gemfile +8 -0
data/LICENSE +1 -1
data/README.md +225 -206
data/Rakefile +9 -3
data/lib/regexp_parser.rb +7 -11
data/lib/regexp_parser/expression.rb +72 -14
data/lib/regexp_parser/expression/classes/alternation.rb +3 -16
data/lib/regexp_parser/expression/classes/conditional.rb +57 -0
data/lib/regexp_parser/expression/classes/free_space.rb +17 -0
data/lib/regexp_parser/expression/classes/keep.rb +7 -0
data/lib/regexp_parser/expression/classes/set.rb +28 -7
data/lib/regexp_parser/expression/methods/strfregexp.rb +113 -0
data/lib/regexp_parser/expression/methods/tests.rb +116 -0
data/lib/regexp_parser/expression/methods/traverse.rb +63 -0
data/lib/regexp_parser/expression/quantifier.rb +10 -0
data/lib/regexp_parser/expression/sequence.rb +45 -0
data/lib/regexp_parser/expression/subexpression.rb +29 -1
data/lib/regexp_parser/lexer.rb +31 -8
data/lib/regexp_parser/parser.rb +118 -45
data/lib/regexp_parser/scanner.rb +1745 -1404
data/lib/regexp_parser/scanner/property.rl +57 -3
data/lib/regexp_parser/scanner/scanner.rl +161 -34
data/lib/regexp_parser/syntax.rb +12 -2
data/lib/regexp_parser/syntax/ruby/1.9.1.rb +3 -3
data/lib/regexp_parser/syntax/ruby/1.9.3.rb +2 -7
data/lib/regexp_parser/syntax/ruby/2.0.0.rb +4 -1
data/lib/regexp_parser/syntax/ruby/2.1.4.rb +13 -0
data/lib/regexp_parser/syntax/ruby/2.1.5.rb +13 -0
data/lib/regexp_parser/syntax/ruby/2.1.rb +2 -2
data/lib/regexp_parser/syntax/ruby/2.2.0.rb +16 -0
data/lib/regexp_parser/syntax/ruby/2.2.rb +8 -0
data/lib/regexp_parser/syntax/tokens.rb +19 -2
data/lib/regexp_parser/syntax/tokens/conditional.rb +22 -0
data/lib/regexp_parser/syntax/tokens/keep.rb +14 -0
data/lib/regexp_parser/syntax/tokens/unicode_property.rb +45 -4
data/lib/regexp_parser/token.rb +23 -8
data/lib/regexp_parser/version.rb +5 -0
data/regexp_parser.gemspec +35 -0
data/test/expression/test_all.rb +6 -1
data/test/expression/test_base.rb +19 -0
data/test/expression/test_conditionals.rb +114 -0
data/test/expression/test_free_space.rb +33 -0
data/test/expression/test_set.rb +61 -0
data/test/expression/test_strfregexp.rb +214 -0
data/test/expression/test_subexpression.rb +24 -0
data/test/expression/test_tests.rb +99 -0
data/test/expression/test_to_h.rb +48 -0
data/test/expression/test_to_s.rb +46 -0
data/test/expression/test_traverse.rb +164 -0
data/test/lexer/test_all.rb +16 -3
data/test/lexer/test_conditionals.rb +101 -0
data/test/lexer/test_keep.rb +24 -0
data/test/lexer/test_literals.rb +51 -51
data/test/lexer/test_nesting.rb +62 -62
data/test/lexer/test_refcalls.rb +18 -20
data/test/parser/test_all.rb +18 -3
data/test/parser/test_alternation.rb +11 -14
data/test/parser/test_conditionals.rb +148 -0
data/test/parser/test_escapes.rb +29 -5
data/test/parser/test_free_space.rb +139 -0
data/test/parser/test_groups.rb +40 -0
data/test/parser/test_keep.rb +21 -0
data/test/scanner/test_all.rb +8 -2
data/test/scanner/test_conditionals.rb +166 -0
data/test/scanner/test_escapes.rb +8 -5
data/test/scanner/test_free_space.rb +133 -0
data/test/scanner/test_groups.rb +28 -0
data/test/scanner/test_keep.rb +33 -0
data/test/scanner/test_properties.rb +4 -0
data/test/scanner/test_scripts.rb +71 -1
data/test/syntax/ruby/test_1.9.3.rb +2 -2
data/test/syntax/ruby/test_2.0.0.rb +38 -0
data/test/syntax/ruby/test_2.2.0.rb +38 -0
data/test/syntax/ruby/test_all.rb +1 -8
data/test/syntax/ruby/test_files.rb +104 -0
data/test/test_all.rb +2 -1
data/test/token/test_all.rb +2 -0
data/test/token/test_token.rb +109 -0
metadata +75 -21
data/VERSION.yml +0 -5
data/lib/regexp_parser/ctype.rb +0 -48
data/test/syntax/ruby/test_2.x.rb +0 -46

data/test/parser/test_escapes.rb CHANGED Viewed

@@ -2,10 +2,6 @@ require File.expand_path("../../helpers", __FILE__)
 class TestParserEscapes < Test::Unit::TestCase
-  def test_parse_control_sequence_short
-    #root = RP.parse(/\b\d\\\c2\C-C\M-\C-2/)
-  end
   tests = {
     /a\ac/    => [1, :escape,   :bell,              EscapeSequence::Bell],
     /a\ec/    => [1, :escape,   :escape,            EscapeSequence::AsciiEscape],
@@ -33,7 +29,7 @@ class TestParserEscapes < Test::Unit::TestCase
   count = 0
   tests.each do |pattern, test|
-    define_method "test_parse_anchor_#{test[2]}_#{count+=1}" do
+    define_method "test_parse_escape_#{test[2]}_#{count+=1}" do
       root = RP.parse(pattern, 'ruby/1.9')
       exp  = root.expressions[test[0]]
@@ -45,4 +41,32 @@ class TestParserEscapes < Test::Unit::TestCase
     end
   end
+  def test_parse_escape_control_sequence_lower
+    root = RP.parse(/a\\\c2b/)
+    assert_equal( EscapeSequence::Control,  root[2].class )
+    assert_equal( '\\c2',                   root[2].text )
+  end
+  def test_parse_escape_control_sequence_upper
+    root = RP.parse(/\d\\\C-C\w/)
+    assert_equal( EscapeSequence::Control,  root[2].class )
+    assert_equal( '\\C-C',                  root[2].text )
+  end
+  def test_parse_escape_meta_sequence
+    root = RP.parse(/\Z\\\M-Z/n)
+    assert_equal( EscapeSequence::Meta,  root[2].class )
+    assert_equal( '\\M-Z',               root[2].text )
+  end
+  def test_parse_escape_meta_control_sequence
+    root = RP.parse(/\A\\\M-\C-X/n)
+    assert_equal( EscapeSequence::MetaControl,  root[2].class )
+    assert_equal( '\\M-\\C-X',                  root[2].text )
+  end
 end

data/test/parser/test_free_space.rb ADDED Viewed

@@ -0,0 +1,139 @@
+require File.expand_path("../../helpers", __FILE__)
+class ParserFreeSpace < Test::Unit::TestCase
+  def test_parse_free_space_spaces
+    regexp = /a ? b * c + d{2,4}/x
+    root   = RP.parse(regexp)
+    0.upto(6) do |i|
+      if i%2 == 1
+        # Consecutive spaces get merged by the parser, thus the two spaces.
+        assert_equal( WhiteSpace,   root[i].class )
+        assert_equal( '  ',    root[i].text )
+      else
+        assert_equal( Literal, root[i].class )
+        assert_equal( true,    root[i].quantified? )
+      end
+    end
+  end
+  def test_parse_non_free_space_literals
+    regexp = /a b c d/
+    root   = RP.parse(regexp)
+    assert_equal( Literal,    root.first.class )
+    assert_equal( 'a b c d',  root.first.text )
+  end
+  def test_parse_free_space_comments
+    regexp = %r{
+      a   ?     # One letter
+      b {2,5}   # Another one
+      [c-g]  +  # A set
+      (h|i|j) | # A group
+      klm *
+      nop +
+    }x
+    root = RP.parse(regexp)
+    alt = root.first
+    assert_equal( Alternation, alt.class )
+    alt_1 = alt.alternatives.first
+    assert_equal( Alternative, alt_1.class )
+    assert_equal( 15, alt_1.length )
+    [0, 2, 4, 6, 8, 12, 14].each do |i|
+      assert_equal( WhiteSpace, alt_1[i].class )
+    end
+    [3, 7, 11].each do |i|
+      assert_equal( Comment, alt_1[i].class )
+    end
+    alt_2 = alt.alternatives.last
+    assert_equal( Alternative, alt_2.class )
+    assert_equal( 7, alt_2.length )
+    [0, 2, 4, 6].each do |i|
+      assert_equal( WhiteSpace, alt_2[i].class )
+    end
+    assert_equal( Comment, alt_2[1].class )
+  end
+  def test_parse_free_space_nested_comments
+    # Tests depend on spacing and indentation, obviously.
+    regexp = %r{
+      # Group one
+      (
+       abc  # Comment one
+       \d?  # Optional \d
+      )+
+      # Group two
+      (
+       def  # Comment two
+       \s?  # Optional \s
+      )?
+    }x
+    root = RP.parse(regexp)
+    top_comment_1 = root[1]
+    assert_equal( Comment, top_comment_1.class )
+    assert_equal( "# Group one\n", top_comment_1.text )
+    assert_equal( 7, top_comment_1.starts_at )
+    top_comment_2 = root[5]
+    assert_equal( Comment, top_comment_2.class )
+    assert_equal( "# Group two\n", top_comment_2.text )
+    assert_equal( 95, top_comment_2.starts_at )
+    # Nested comments
+    [3, 7].each_with_index do |g, i|
+      group = root[g]
+      [3, 7].each do |c|
+        comment = group[c]
+        assert_equal( Comment, comment.class )
+        assert_equal( 14,      comment.text.length )
+      end
+    end
+  end
+  def test_parse_free_space_quantifiers
+    regexp = %r{
+      a
+      # comment 1
+      ?
+      (
+       b # comment 2
+       # comment 3
+       +
+      )
+      # comment 4
+      *
+    }x
+    root = RP.parse(regexp)
+    literal_1 = root[1]
+    assert_equal( Literal,        literal_1.class )
+    assert_equal( true,           literal_1.quantified? )
+    assert_equal( :zero_or_one,   literal_1.quantifier.token )
+    group = root[5]
+    assert_equal( Group::Capture, group.class )
+    assert_equal( true,           group.quantified? )
+    assert_equal( :zero_or_more,  group.quantifier.token )
+    literal_2 = group[1]
+    assert_equal( Literal,        literal_2.class )
+    assert_equal( true,           literal_2.quantified? )
+    assert_equal( :one_or_more,   literal_2.quantifier.token )
+  end
+end

data/test/parser/test_groups.rb CHANGED Viewed

@@ -34,6 +34,46 @@ class TestParserGroups < Test::Unit::TestCase
     assert_equal( false, t.expressions[0].expressions[1].x? )
   end
+  if RUBY_VERSION >= '2.0'
+    def test_parse_options_dau
+      t = RP.parse('(?dua:abc)')
+      assert_equal( true,  t.expressions[0].d? )
+      assert_equal( true,  t.expressions[0].a? )
+      assert_equal( true,  t.expressions[0].u? )
+    end
+    def test_parse_nested_options_dau
+      t = RP.parse('(?u:a(?d:b))')
+      assert_equal( true,  t.expressions[0].u? )
+      assert_equal( false, t.expressions[0].d? )
+      assert_equal( false, t.expressions[0].a? )
+      assert_equal( true,  t.expressions[0].expressions[1].d? )
+      assert_equal( false, t.expressions[0].expressions[1].a? )
+      assert_equal( false, t.expressions[0].expressions[1].u? )
+    end
+    def test_parse_nested_options_da
+      t = RP.parse('(?di-xm:a(?da-x:b))')
+      assert_equal( true,  t.expressions[0].d? )
+      assert_equal( true,  t.expressions[0].i? )
+      assert_equal( false, t.expressions[0].m? )
+      assert_equal( false, t.expressions[0].x? )
+      assert_equal( false, t.expressions[0].a? )
+      assert_equal( false, t.expressions[0].u? )
+      assert_equal( true,  t.expressions[0].expressions[1].d? )
+      assert_equal( true,  t.expressions[0].expressions[1].a? )
+      assert_equal( false, t.expressions[0].expressions[1].u? )
+      assert_equal( false, t.expressions[0].expressions[1].x? )
+      assert_equal( false, t.expressions[0].expressions[1].m? )
+      assert_equal( false, t.expressions[0].expressions[1].i? )
+    end
+  end
   def test_parse_lookahead
     t = RP.parse('(?=abc)(?!def)', 'ruby/1.8')

data/test/parser/test_keep.rb ADDED Viewed

@@ -0,0 +1,21 @@
+require File.expand_path("../../helpers", __FILE__)
+class ParserKeep < Test::Unit::TestCase
+  def test_parse_keep
+    regexp = /ab\Kcd/
+    root   = RP.parse(regexp)
+    assert_equal( Keep::Mark, root[1].class )
+    assert_equal( '\\K',      root[1].text )
+  end
+  def test_parse_keep_nested
+    regexp = /(a\\\Kb)/
+    root   = RP.parse(regexp)
+    assert_equal( Keep::Mark, root[0][2].class )
+    assert_equal( '\\K',      root[0][2].text )
+  end
+end

data/test/scanner/test_all.rb CHANGED Viewed

@@ -1,12 +1,18 @@
 require File.expand_path("../../helpers", __FILE__)
 %w{
-  anchors errors escapes groups literals meta properties
-  quantifiers scripts sets types
+  anchors errors escapes free_space groups literals
+  meta properties quantifiers scripts sets types
 }.each do|tc|
   require File.expand_path("../test_#{tc}", __FILE__)
 end
+if RUBY_VERSION >= '2.0.0'
+  %w{conditionals keep}.each do|tc|
+    require File.expand_path("../test_#{tc}", __FILE__)
+  end
+end
 class TestRegexpScanner < Test::Unit::TestCase
   def test_scanner_returns_an_array

data/test/scanner/test_conditionals.rb ADDED Viewed

@@ -0,0 +1,166 @@
+require File.expand_path("../../helpers", __FILE__)
+class ScannerConditionals < Test::Unit::TestCase
+  # Basic conditional scan token tests
+  tests = {
+    /(?(1)T|F)/           => [0, :conditional,  :open,              '(?',   0,  2],
+    /(?(2)T|F)/           => [1, :conditional,  :condition_open,    '(',    2,  3],
+    /(?(3)T|F)/           => [2, :conditional,  :condition,         '3',    3,  4],
+    /(?(4)T|F)/           => [3, :conditional,  :condition_close,   ')',    4,  5],
+    /(?(5)T|F)/           => [4, :literal,      :literal,           'T',    5,  6],
+    /(?(6)T|F)/           => [5, :conditional,  :separator,         '|',    6,  7],
+    /(?(7)T|F)/           => [6, :literal,      :literal,           'F',    7,  8],
+    /(?(8)T|F)/           => [7, :conditional,  :close,             ')',    8,  9],
+    /(?(1)TRUE)/          => [5, :conditional,  :close,             ')',    9, 10],
+    /(?(1)TRUE|)/         => [5, :conditional,  :separator,         '|',    9, 10],
+    /(?(2)TRUE|)/         => [6, :conditional,  :close,             ')',   10, 11],
+    /(?<N>A)(?(<N>)T|F)/  => [5, :conditional,  :condition,         '<N>', 10, 13],
+    /(?'N'A)(?('N')T|F)/  => [5, :conditional,  :condition,         "'N'", 10, 13],
+  }
+  count = 0
+  tests.each do |pattern, test|
+    define_method "test_scan_#{test[1]}_#{test[2]}_#{count+=1}" do
+      tokens = RS.scan(pattern)
+      token = tokens[test[0]]
+      assert_equal( test[1,5], token )
+    end
+  end
+  def test_scan_conditional_nested
+    regexp = /(a(b(c)))(?(1)(?(2)d|(?(3)e|f))|(?(2)(?(1)g|h)))/
+    tokens = RS.scan(regexp)
+    [ [ 0, :group,        :capture,         '(',   0,  1],
+      [ 1, :literal,      :literal,         'a',   1,  2],
+      [ 2, :group,        :capture,         '(',   2,  3],
+      [ 3, :literal,      :literal,         'b',   3,  4],
+      [ 4, :group,        :capture,         '(',   4,  5],
+      [ 5, :literal,      :literal,         'c',   5,  6],
+      [ 6, :group,        :close,           ')',   6,  7],
+      [ 7, :group,        :close,           ')',   7,  8],
+      [ 8, :group,        :close,           ')',   8,  9],
+      [ 9, :conditional,  :open,            '(?',  9, 11],
+      [10, :conditional,  :condition_open,  '(',  11, 12],
+      [11, :conditional,  :condition,       '1',  12, 13],
+      [12, :conditional,  :condition_close, ')',  13, 14],
+      [13, :conditional,  :open,            '(?', 14, 16],
+      [14, :conditional,  :condition_open,  '(',  16, 17],
+      [15, :conditional,  :condition,       '2',  17, 18],
+      [16, :conditional,  :condition_close, ')',  18, 19],
+      [17, :literal,      :literal,         'd',  19, 20],
+      [18, :conditional,  :separator,       '|',  20, 21],
+      [19, :conditional,  :open,            '(?', 21, 23],
+      [20, :conditional,  :condition_open,  '(',  23, 24],
+      [21, :conditional,  :condition,       '3',  24, 25],
+      [22, :conditional,  :condition_close, ')',  25, 26],
+      [23, :literal,      :literal,         'e',  26, 27],
+      [24, :conditional,  :separator,       '|',  27, 28],
+      [25, :literal,      :literal,         'f',  28, 29],
+      [26, :conditional,  :close,           ')',  29, 30],
+      [27, :conditional,  :close,           ')',  30, 31],
+      [28, :conditional,  :separator,       '|',  31, 32],
+      [29, :conditional,  :open,            '(?', 32, 34],
+      [30, :conditional,  :condition_open,  '(',  34, 35],
+      [31, :conditional,  :condition,       '2',  35, 36],
+      [32, :conditional,  :condition_close, ')',  36, 37],
+      [33, :conditional,  :open,            '(?', 37, 39],
+      [34, :conditional,  :condition_open,  '(',  39, 40],
+      [35, :conditional,  :condition,       '1',  40, 41],
+      [36, :conditional,  :condition_close, ')',  41, 42],
+      [37, :literal,      :literal,         'g',  42, 43],
+      [38, :conditional,  :separator,       '|',  43, 44],
+      [39, :literal,      :literal,         'h',  44, 45],
+      [40, :conditional,  :close,           ')',  45, 46],
+      [41, :conditional,  :close,           ')',  46, 47],
+      [42, :conditional,  :close,           ')',  47, 48]
+    ].each do |test|
+      assert_equal( test[1,5], tokens[test[0]] )
+    end
+  end
+  def test_scan_conditional_nested_groups
+    regexp = /((a)|(b)|((?(2)(c(d|e)+)?|(?(3)f|(?(4)(g|(h)(i)))))))/
+    tokens = RS.scan(regexp)
+    [ [ 0, :group,        :capture,         '(',   0,  1],
+      [ 1, :group,        :capture,         '(',   1,  2],
+      [ 2, :literal,      :literal,         'a',   2,  3],
+      [ 3, :group,        :close,           ')',   3,  4],
+      [ 4, :meta,         :alternation,     '|',   4,  5],
+      [ 5, :group,        :capture,         '(',   5,  6],
+      [ 6, :literal,      :literal,         'b',   6,  7],
+      [ 7, :group,        :close,           ')',   7,  8],
+      [ 8, :meta,         :alternation,     '|',   8,  9],
+      [ 9, :group,        :capture,         '(',   9, 10],
+      [10, :conditional,  :open,            '(?', 10, 12],
+      [11, :conditional,  :condition_open,  '(',  12, 13],
+      [12, :conditional,  :condition,       '2',  13, 14],
+      [13, :conditional,  :condition_close, ')',  14, 15],
+      [14, :group,        :capture,         '(',  15, 16],
+      [15, :literal,      :literal,         'c',  16, 17],
+      [16, :group,        :capture,         '(',  17, 18],
+      [17, :literal,      :literal,         'd',  18, 19],
+      [18, :meta,         :alternation,     '|',  19, 20],
+      [19, :literal,      :literal,         'e',  20, 21],
+      [20, :group,        :close,           ')',  21, 22],
+      [21, :quantifier,   :one_or_more,     '+',  22, 23],
+      [22, :group,        :close,           ')',  23, 24],
+      [23, :quantifier,   :zero_or_one,     '?',  24, 25],
+      [24, :conditional,  :separator,       '|',  25, 26],
+      [25, :conditional,  :open,            '(?', 26, 28],
+      [26, :conditional,  :condition_open,  '(',  28, 29],
+      [27, :conditional,  :condition,       '3',  29, 30],
+      [28, :conditional,  :condition_close, ')',  30, 31],
+      [29, :literal,      :literal,         'f',  31, 32],
+      [30, :conditional,  :separator,       '|',  32, 33],
+      [31, :conditional,  :open,            '(?', 33, 35],
+      [32, :conditional,  :condition_open,  '(',  35, 36],
+      [33, :conditional,  :condition,       '4',  36, 37],
+      [34, :conditional,  :condition_close, ')',  37, 38],
+      [35, :group,        :capture,         '(',  38, 39],
+      [36, :literal,      :literal,         'g',  39, 40],
+      [37, :meta,         :alternation,     '|',  40, 41],
+      [38, :group,        :capture,         '(',  41, 42],
+      [39, :literal,      :literal,         'h',  42, 43],
+      [40, :group,        :close,           ')',  43, 44],
+      [41, :group,        :capture,         '(',  44, 45],
+      [42, :literal,      :literal,         'i',  45, 46],
+      [43, :group,        :close,           ')',  46, 47],
+      [44, :group,        :close,           ')',  47, 48],
+      [45, :conditional,  :close,           ')',  48, 49],
+      [46, :conditional,  :close,           ')',  49, 50],
+      [47, :conditional,  :close,           ')',  50, 51],
+      [48, :group,        :close,           ')',  51, 52],
+      [49, :group,        :close,           ')',  52, 53]
+    ].each do |test|
+      assert_equal( test[1,5], tokens[test[0]] )
+    end
+  end
+  def test_scan_conditional_nested_alternation
+    regexp = /(a)(?(1)(b|c|d)|(e|f|g))(h)(?(2)(i|j|k)|(l|m|n))|o|p/
+    tokens = RS.scan(regexp)
+    [9, 11, 17, 19, 32, 34, 40, 42, 46, 48].each do |token|
+      assert_equal(:meta,         tokens[token][0])
+      assert_equal(:alternation,  tokens[token][1])
+      assert_equal('|',           tokens[token][2])
+      assert_equal(1,             tokens[token][4] - tokens[token][3])
+    end
+    [14, 37].each do |token|
+      assert_equal(:conditional,  tokens[token][0])
+      assert_equal(:separator,    tokens[token][1])
+      assert_equal('|',           tokens[token][2])
+      assert_equal(1,             tokens[token][4] - tokens[token][3])
+    end
+  end
+end