RubyGems - regexp_parser - Versions diffs - 0.1.0 - Mend

regexp_parser 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (59) hide show

data/ChangeLog +4 -0
data/LICENSE +22 -0
data/README.rdoc +307 -0
data/Rakefile +91 -0
data/lib/regexp_parser/ctype.rb +48 -0
data/lib/regexp_parser/expression/property.rb +108 -0
data/lib/regexp_parser/expression/set.rb +59 -0
data/lib/regexp_parser/expression.rb +287 -0
data/lib/regexp_parser/lexer.rb +105 -0
data/lib/regexp_parser/parser.rb +417 -0
data/lib/regexp_parser/scanner/property.rl +534 -0
data/lib/regexp_parser/scanner/scanner.rl +712 -0
data/lib/regexp_parser/scanner.rb +3325 -0
data/lib/regexp_parser/syntax/ruby/1.8.6.rb +14 -0
data/lib/regexp_parser/syntax/ruby/1.8.7.rb +14 -0
data/lib/regexp_parser/syntax/ruby/1.8.rb +39 -0
data/lib/regexp_parser/syntax/ruby/1.9.1.rb +39 -0
data/lib/regexp_parser/syntax/ruby/1.9.2.rb +10 -0
data/lib/regexp_parser/syntax/ruby/1.9.3.rb +24 -0
data/lib/regexp_parser/syntax/ruby/1.9.rb +8 -0
data/lib/regexp_parser/syntax/tokens.rb +332 -0
data/lib/regexp_parser/syntax.rb +172 -0
data/lib/regexp_parser.rb +45 -0
data/test/helpers.rb +8 -0
data/test/lexer/test_all.rb +26 -0
data/test/lexer/test_literals.rb +120 -0
data/test/lexer/test_nesting.rb +107 -0
data/test/lexer/test_refcalls.rb +45 -0
data/test/parser/test_all.rb +44 -0
data/test/parser/test_alternation.rb +46 -0
data/test/parser/test_anchors.rb +35 -0
data/test/parser/test_errors.rb +59 -0
data/test/parser/test_escapes.rb +48 -0
data/test/parser/test_expression.rb +51 -0
data/test/parser/test_groups.rb +69 -0
data/test/parser/test_properties.rb +346 -0
data/test/parser/test_quantifiers.rb +236 -0
data/test/parser/test_refcalls.rb +101 -0
data/test/parser/test_sets.rb +99 -0
data/test/scanner/test_all.rb +30 -0
data/test/scanner/test_anchors.rb +35 -0
data/test/scanner/test_errors.rb +36 -0
data/test/scanner/test_escapes.rb +49 -0
data/test/scanner/test_groups.rb +41 -0
data/test/scanner/test_literals.rb +85 -0
data/test/scanner/test_meta.rb +36 -0
data/test/scanner/test_properties.rb +315 -0
data/test/scanner/test_quantifiers.rb +38 -0
data/test/scanner/test_refcalls.rb +45 -0
data/test/scanner/test_scripts.rb +314 -0
data/test/scanner/test_sets.rb +80 -0
data/test/scanner/test_types.rb +30 -0
data/test/syntax/ruby/test_1.8.rb +57 -0
data/test/syntax/ruby/test_1.9.1.rb +39 -0
data/test/syntax/ruby/test_1.9.3.rb +38 -0
data/test/syntax/ruby/test_all.rb +12 -0
data/test/syntax/test_all.rb +19 -0
data/test/test_all.rb +4 -0
metadata +160 -0

data/test/parser/test_quantifiers.rb ADDED Viewed

@@ -0,0 +1,236 @@
+require File.expand_path("../../helpers", __FILE__)
+class TestRegexpParserQuantifiers < Test::Unit::TestCase
+  # ?: zero-or-one
+  def test_parse_zero_or_one_greedy
+    t = RP.parse('a?bc')
+    assert_equal( true, t.expressions.first.quantified? )
+    assert_equal( :zero_or_one, t.expressions.first.quantifier.token )
+    assert_equal( 0, t.expressions.first.quantifier.min )
+    assert_equal( 1, t.expressions.first.quantifier.max )
+    assert_equal( :greedy, t.expressions.first.quantifier.mode )
+  end
+  def test_parse_zero_or_one_reluctant
+    t = RP.parse('a??bc')
+    assert_equal( true, t.expressions.first.quantified? )
+    assert_equal( :zero_or_one, t.expressions.first.quantifier.token )
+    assert_equal( 0, t.expressions.first.quantifier.min )
+    assert_equal( 1, t.expressions.first.quantifier.max )
+    assert_equal( :reluctant, t.expressions.first.quantifier.mode )
+    assert_equal( true, t.expressions.first.reluctant? )
+  end
+  def test_parse_zero_or_one_possessive
+    t = RP.parse('a?+bc')
+    assert_equal( true, t.expressions.first.quantified? )
+    assert_equal( :zero_or_one, t.expressions.first.quantifier.token )
+    assert_equal( 0, t.expressions.first.quantifier.min )
+    assert_equal( 1, t.expressions.first.quantifier.max )
+    assert_equal( :possessive, t.expressions.first.quantifier.mode )
+    assert_equal( true, t.expressions.first.possessive? )
+  end
+  # *: zero-or-more
+  def test_parse_zero_or_more_greedy
+    t = RP.parse('a*bc')
+    assert_equal( true, t.expressions.first.quantified? )
+    assert_equal( :zero_or_more, t.expressions.first.quantifier.token )
+    assert_equal( 0, t.expressions.first.quantifier.min )
+    assert_equal( -1, t.expressions.first.quantifier.max )
+    assert_equal( :greedy, t.expressions.first.quantifier.mode )
+  end
+  def test_parse_zero_or_more_reluctant
+    t = RP.parse('a*?bc')
+    assert_equal( true, t.expressions.first.quantified? )
+    assert_equal( :zero_or_more, t.expressions.first.quantifier.token )
+    assert_equal( 0, t.expressions.first.quantifier.min )
+    assert_equal( -1, t.expressions.first.quantifier.max )
+    assert_equal( :reluctant, t.expressions.first.quantifier.mode )
+    assert_equal( true, t.expressions.first.reluctant? )
+  end
+  def test_parse_zero_or_more_possessive
+    t = RP.parse('a*+bc')
+    assert_equal( true, t.expressions.first.quantified? )
+    assert_equal( :zero_or_more, t.expressions.first.quantifier.token )
+    assert_equal( 0, t.expressions.first.quantifier.min )
+    assert_equal( -1, t.expressions.first.quantifier.max )
+    assert_equal( :possessive, t.expressions.first.quantifier.mode )
+    assert_equal( true, t.expressions.first.possessive? )
+  end
+  # +: one-or-more
+  def test_parse_one_or_more_greedy
+    t = RP.parse('a+bc')
+    assert_equal( true, t.expressions.first.quantified? )
+    assert_equal( :one_or_more, t.expressions.first.quantifier.token )
+    assert_equal( 1, t.expressions.first.quantifier.min )
+    assert_equal( -1, t.expressions.first.quantifier.max )
+    assert_equal( :greedy, t.expressions.first.quantifier.mode )
+  end
+  def test_parse_one_or_more_reluctant
+    t = RP.parse('a+?bc')
+    assert_equal( true, t.expressions.first.quantified? )
+    assert_equal( :one_or_more, t.expressions.first.quantifier.token )
+    assert_equal( 1, t.expressions.first.quantifier.min )
+    assert_equal( -1, t.expressions.first.quantifier.max )
+    assert_equal( :reluctant, t.expressions.first.quantifier.mode )
+    assert_equal( true, t.expressions.first.reluctant? )
+  end
+  def test_parse_one_or_more_possessive
+    t = RP.parse('a++bc')
+    assert_equal( true, t.expressions.first.quantified? )
+    assert_equal( :one_or_more, t.expressions.first.quantifier.token )
+    assert_equal( 1, t.expressions.first.quantifier.min )
+    assert_equal( -1, t.expressions.first.quantifier.max )
+    assert_equal( :possessive, t.expressions.first.quantifier.mode )
+    assert_equal( true, t.expressions.first.possessive? )
+  end
+  # interval: min and max
+  def test_parse_intervals_min_max_greedy
+    t = RP.parse('a{2,4}bc')
+    assert_equal( true, t.expressions.first.quantified? )
+    assert_equal( :interval, t.expressions.first.quantifier.token )
+    assert_equal( 2, t.expressions.first.quantifier.min)
+    assert_equal( 4, t.expressions.first.quantifier.max)
+    assert_equal( :greedy, t.expressions.first.quantifier.mode )
+  end
+  def test_parse_intervals_min_max_reluctant
+    t = RP.parse('a{3,5}?bc')
+    assert_equal( true, t.expressions.first.quantified? )
+    assert_equal( :interval, t.expressions.first.quantifier.token )
+    assert_equal( 3, t.expressions.first.quantifier.min)
+    assert_equal( 5, t.expressions.first.quantifier.max)
+    assert_equal( :reluctant, t.expressions.first.quantifier.mode )
+    assert_equal( true, t.expressions.first.reluctant? )
+  end
+  def test_parse_intervals_min_max_possessive
+    t = RP.parse('a{2,4}+bc')
+    assert_equal( true, t.expressions.first.quantified? )
+    assert_equal( :interval, t.expressions.first.quantifier.token )
+    assert_equal( 2, t.expressions.first.quantifier.min)
+    assert_equal( 4, t.expressions.first.quantifier.max)
+    assert_equal( :possessive, t.expressions.first.quantifier.mode )
+    assert_equal( true, t.expressions.first.possessive? )
+  end
+  # interval: min only
+  def test_parse_intervals_min_only_greedy
+    t = RP.parse('a{2,}bc')
+    assert_equal( true, t.expressions.first.quantified? )
+    assert_equal( :interval, t.expressions.first.quantifier.token )
+    assert_equal( 2, t.expressions.first.quantifier.min)
+    assert_equal( -1, t.expressions.first.quantifier.max)
+    assert_equal( :greedy, t.expressions.first.quantifier.mode )
+  end
+  def test_parse_intervals_min_only_reluctant
+    t = RP.parse('a{2,}?bc')
+    assert_equal( true, t.expressions.first.quantified? )
+    assert_equal( :interval, t.expressions.first.quantifier.token )
+    assert_equal( 2, t.expressions.first.quantifier.min)
+    assert_equal( -1, t.expressions.first.quantifier.max)
+    assert_equal( :reluctant, t.expressions.first.quantifier.mode )
+    assert_equal( true, t.expressions.first.reluctant? )
+  end
+  def test_parse_intervals_min_only_possessive
+    t = RP.parse('a{3,}+bc')
+    assert_equal( true, t.expressions.first.quantified? )
+    assert_equal( :interval, t.expressions.first.quantifier.token )
+    assert_equal( 3, t.expressions.first.quantifier.min)
+    assert_equal( -1, t.expressions.first.quantifier.max)
+    assert_equal( :possessive, t.expressions.first.quantifier.mode )
+    assert_equal( true, t.expressions.first.possessive? )
+  end
+  # interval: max only
+  def test_parse_intervals_max_only_greedy
+    t = RP.parse('a{,2}bc')
+    assert_equal( true, t.expressions.first.quantified? )
+    assert_equal( :interval, t.expressions.first.quantifier.token )
+    assert_equal( 0, t.expressions.first.quantifier.min)
+    assert_equal( 2, t.expressions.first.quantifier.max)
+    assert_equal( :greedy, t.expressions.first.quantifier.mode )
+  end
+  def test_parse_intervals_max_only_reluctant
+    t = RP.parse('a{,4}?bc')
+    assert_equal( true, t.expressions.first.quantified? )
+    assert_equal( :interval, t.expressions.first.quantifier.token )
+    assert_equal( 0, t.expressions.first.quantifier.min)
+    assert_equal( 4, t.expressions.first.quantifier.max)
+    assert_equal( :reluctant, t.expressions.first.quantifier.mode )
+    assert_equal( true, t.expressions.first.reluctant? )
+  end
+  def test_parse_intervals_max_only_possessive
+    t = RP.parse('a{,3}+bc')
+    assert_equal( true, t.expressions.first.quantified? )
+    assert_equal( :interval, t.expressions.first.quantifier.token )
+    assert_equal( 0, t.expressions.first.quantifier.min)
+    assert_equal( 3, t.expressions.first.quantifier.max)
+    assert_equal( :possessive, t.expressions.first.quantifier.mode )
+    assert_equal( true, t.expressions.first.possessive? )
+  end
+  # interval: exact
+  def test_parse_intervals_exact_greedy
+    t = RP.parse('a{2}bc')
+    assert_equal( true, t.expressions.first.quantified? )
+    assert_equal( :interval, t.expressions.first.quantifier.token )
+    assert_equal( 2, t.expressions.first.quantifier.min)
+    assert_equal( 2, t.expressions.first.quantifier.max)
+    assert_equal( :greedy, t.expressions.first.quantifier.mode )
+  end
+  def test_parse_intervals_exact_reluctant
+    t = RP.parse('a{3}?bc')
+    assert_equal( true, t.expressions.first.quantified? )
+    assert_equal( :interval, t.expressions.first.quantifier.token )
+    assert_equal( 3, t.expressions.first.quantifier.min)
+    assert_equal( 3, t.expressions.first.quantifier.max)
+    assert_equal( :reluctant, t.expressions.first.quantifier.mode )
+    assert_equal( true, t.expressions.first.reluctant? )
+  end
+  def test_parse_intervals_exact_possessive
+    t = RP.parse('a{3}+bc')
+    assert_equal( true, t.expressions.first.quantified? )
+    assert_equal( :interval, t.expressions.first.quantifier.token )
+    assert_equal( 3, t.expressions.first.quantifier.min)
+    assert_equal( 3, t.expressions.first.quantifier.max)
+    assert_equal( :possessive, t.expressions.first.quantifier.mode )
+    assert_equal( true, t.expressions.first.possessive? )
+  end
+end

data/test/parser/test_refcalls.rb ADDED Viewed

@@ -0,0 +1,101 @@
+require File.expand_path("../../helpers", __FILE__)
+class TestParserGroups < Test::Unit::TestCase
+  def test_parse_backref_named_ab
+    t = RP.parse('(?<X>abc)\k<X>')[1]
+    assert_equal( true,  t.is_a?(Backreference::Name) )
+  end
+  def test_parse_backref_named_sq
+    t = RP.parse("(?<X>abc)\\k'X'")[1]
+    assert_equal( true,  t.is_a?(Backreference::Name) )
+  end
+  def test_parse_backref_number_ab
+    t = RP.parse('(abc)\k<1>')[1]
+    assert_equal( true,  t.is_a?(Backreference::Number) )
+  end
+  def test_parse_backref_number_sq
+    t = RP.parse("(abc)\\k'1'")[1]
+    assert_equal( true,  t.is_a?(Backreference::Number) )
+  end
+  def test_parse_backref_number_relative_ab
+    t = RP.parse('(abc)\k<-1>')[1]
+    assert_equal( true,  t.is_a?(Backreference::NumberRelative) )
+  end
+  def test_parse_backref_number_relative_sq
+    t = RP.parse("(abc)\\k'-1'")[1]
+    assert_equal( true,  t.is_a?(Backreference::NumberRelative) )
+  end
+  def test_parse_backref_name_call_ab
+    t = RP.parse('(?<X>abc)\g<X>')[1]
+    assert_equal( true,  t.is_a?(Backreference::NameCall) )
+  end
+  def test_parse_backref_name_call_sq
+    t = RP.parse("(?<X>abc)\\g'X'")[1]
+    assert_equal( true,  t.is_a?(Backreference::NameCall) )
+  end
+  def test_parse_backref_number_call_ab
+    t = RP.parse('(abc)\g<1>')[1]
+    assert_equal( true,  t.is_a?(Backreference::NumberCall) )
+  end
+  def test_parse_backref_number_call_sq
+    t = RP.parse("(abc)\\g'1'")[1]
+    assert_equal( true,  t.is_a?(Backreference::NumberCall) )
+  end
+  def test_parse_backref_number_relative_call_ab
+    t = RP.parse('(abc)\g<-1>')[1]
+    assert_equal( true,  t.is_a?(Backreference::NumberCallRelative) )
+  end
+  def test_parse_backref_number_relative_call_sq
+    t = RP.parse("(abc)\\g'-1'")[1]
+    assert_equal( true,  t.is_a?(Backreference::NumberCallRelative) )
+  end
+  def test_parse_backref_name_nest_level_ab
+    t = RP.parse('(?<X>abc)\k<X-0>')[1]
+    assert_equal( true,  t.is_a?(Backreference::NameNestLevel) )
+  end
+  def test_parse_backref_name_nest_level_sq
+    t = RP.parse("(?<X>abc)\\k'X-0'")[1]
+    assert_equal( true,  t.is_a?(Backreference::NameNestLevel) )
+  end
+  def test_parse_backref_number_nest_level_ab
+    t = RP.parse('(abc)\k<1-0>')[1]
+    assert_equal( true,  t.is_a?(Backreference::NumberNestLevel) )
+  end
+  def test_parse_backref_number_nest_level_sq
+    t = RP.parse("(abc)\\k'1-0'")[1]
+    assert_equal( true,  t.is_a?(Backreference::NumberNestLevel) )
+  end
+end

data/test/parser/test_sets.rb ADDED Viewed

@@ -0,0 +1,99 @@
+require File.expand_path("../../helpers", __FILE__)
+class TestParserSets < Test::Unit::TestCase
+  def test_parse_set_basic
+    exp = RP.parse('[a-c]+', :any).expressions[0]
+    assert_equal( true, exp.is_a?(CharacterSet) )
+    assert_equal( true, exp.include?('a-c') )
+    assert_equal( true, exp.quantified? )
+    assert_equal( 1,  exp.quantifier.min )
+    assert_equal( -1, exp.quantifier.max )
+  end
+  def test_parse_set_posix_class
+    exp = RP.parse('[[:digit:][:lower:]]+', 'ruby/1.9').expressions[0]
+    assert_equal( true,  exp.is_a?(CharacterSet) )
+    assert_equal( true,  exp.include?('[:digit:]') )
+    assert_equal( true,  exp.include?('[:lower:]') )
+    assert_equal( true,  exp.matches?("6") )
+    # TODO: figure out why this generate wrong string, but only after
+    # the assertion above (to_s "piles up")
+    #assert_equal( true,  exp.matches?("v") )
+    #assert_equal( false, exp.matches?("\x48") )
+  end
+  def test_parse_set_members
+    exp = RP.parse('[ac-eh]', :any)[0]
+    assert_equal( true,  exp.include?('a') )
+    assert_equal( true,  exp.include?('c-e') )
+    assert_equal( true,  exp.include?('h') )
+    assert_equal( false, exp.include?(']') )
+  end
+  def test_parse_set_collating_sequence
+    exp = RP.parse('[a[.span-ll.]h]', :any)[0]
+    assert_equal( true,  exp.include?('[.span-ll.]') )
+    assert_equal( false, exp.include?(']') )
+  end
+  def test_parse_set_character_equivalents
+    exp = RP.parse('[a[=e=]h]', :any)[0]
+    assert_equal( true,  exp.include?('[=e=]') )
+    assert_equal( false, exp.include?(']') )
+  end
+  def test_parse_set_nesting_tos
+    pattern = '[a[b[^c]]]'
+    assert_equal( pattern, RP.parse(pattern, 'ruby/1.9').to_s )
+  end
+  def test_parse_set_nesting_include
+    exp = RP.parse('[a[b[^c]]]', 'ruby/1.9')[0]
+    assert_equal( true, exp.is_a?(CharacterSet) )
+    assert_equal( true, exp.include?('a') )
+    assert_equal( true, exp.include?('b') )
+    assert_equal( true, exp.include?('c') )
+  end
+  # character subsets and negated posix classes are not available in ruby 1.8
+  if RUBY_VERSION >= '1.9'
+    def test_parse_set_nesting_matches
+      exp = RP.parse('[a[b[^c]]]', 'ruby/1.9')[0]
+      assert_equal( true,  exp.matches?("b") )
+      # TODO: figure out why this generate wrong string, but only after
+      # the assertion above (to_s "piles up")
+      #assert_equal( false, exp.matches?("c") )
+    end
+    def test_parse_set_nesting_not_matches
+      exp = RP.parse('[a[b[^c]]]', 'ruby/1.9')[0]
+      assert_equal( false, exp.matches?("c") )
+    end
+    def test_parse_set_negated_posix_class
+      exp = RP.parse('[[:^xdigit:][:^lower:]]+', 'ruby/1.9').expressions[0]
+      assert_equal( true,  exp.is_a?(CharacterSet) )
+      assert_equal( true,  exp.include?('[:^xdigit:]') )
+      assert_equal( true,  exp.include?('[:^lower:]') )
+      assert_equal( true,  exp.matches?("GT") )
+    end
+  end
+end

data/test/scanner/test_all.rb ADDED Viewed

@@ -0,0 +1,30 @@
+require File.expand_path("../../helpers", __FILE__)
+%w{
+  anchors errors escapes groups literals meta properties
+  quantifiers scripts sets types
+}.each do|tc|
+  require File.expand_path("../test_#{tc}", __FILE__)
+end
+class TestRegexpScanner < Test::Unit::TestCase
+  def test_scanner_returns_an_array
+    assert_instance_of( Array, RS.scan('abc') )
+  end
+  def test_scanner_returns_tokens_as_arrays
+    tokens = RS.scan('^abc+[^one]{2,3}\b\d\\\C-C$')
+    assert( tokens.all?{|token|
+      token.kind_of?(Array) and token.length == 5
+    }, "Not all tokens are arrays of 5 elements")
+  end
+  def test_scanner_token_count
+    re = /^(one|two){2,3}([^d\]efm-qz\,\-]*)(ghi)+$/i
+    assert_equal(26, RS.scan(re).length )
+  end
+end