RubyGems - dhaka - Versions diffs - 2.1.0 → 2.2.0 - Mend

dhaka 2.1.0 → 2.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (39) hide show

data/lib/evaluator/evaluator.rb +18 -17
data/lib/grammar/grammar.rb +4 -5
data/lib/lexer/dfa.rb +63 -13
data/lib/lexer/lexeme.rb +3 -4
data/lib/lexer/lexer.rb +12 -3
data/lib/lexer/lexer_run.rb +22 -10
data/lib/lexer/regex_grammar.rb +88 -14
data/lib/lexer/regex_parser.rb +1523 -1401
data/lib/lexer/specification.rb +29 -3
data/lib/lexer/state.rb +32 -9
data/lib/lexer/state_machine.rb +2 -2
data/lib/parser/channel.rb +4 -4
data/lib/parser/parser.rb +17 -12
data/lib/parser/parser_state.rb +3 -1
data/test/chittagong/chittagong_lexer.rb +63 -63
data/test/chittagong/chittagong_lexer.rb.rej +189 -0
data/test/chittagong/chittagong_lexer_specification.rb +6 -8
data/test/chittagong/chittagong_parser.rb +659 -659
data/test/chittagong/chittagong_parser.rb.rej +1623 -0
data/test/{another_lalr_but_not_slr_grammar.rb → core/another_lalr_but_not_slr_grammar.rb} +1 -1
data/test/{compiled_parser_test.rb → core/compiled_parser_test.rb} +1 -1
data/test/core/dfa_test.rb +170 -0
data/test/{evaluator_test.rb → core/evaluator_test.rb} +3 -3
data/test/{grammar_test.rb → core/grammar_test.rb} +3 -3
data/test/{lalr_but_not_slr_grammar.rb → core/lalr_but_not_slr_grammar.rb} +0 -0
data/test/core/lexer_test.rb +139 -0
data/test/{malformed_grammar.rb → core/malformed_grammar.rb} +0 -0
data/test/{malformed_grammar_test.rb → core/malformed_grammar_test.rb} +1 -1
data/test/{nullable_grammar.rb → core/nullable_grammar.rb} +0 -0
data/test/{parse_result_test.rb → core/parse_result_test.rb} +1 -1
data/test/{parser_state_test.rb → core/parser_state_test.rb} +1 -1
data/test/{parser_test.rb → core/parser_test.rb} +2 -2
data/test/{precedence_grammar.rb → core/precedence_grammar.rb} +0 -0
data/test/{precedence_grammar_test.rb → core/precedence_grammar_test.rb} +1 -1
data/test/{rr_conflict_grammar.rb → core/rr_conflict_grammar.rb} +0 -0
data/test/{simple_grammar.rb → core/simple_grammar.rb} +0 -0
data/test/{sr_conflict_grammar.rb → core/sr_conflict_grammar.rb} +0 -0
metadata +25 -22
data/test/lexer_test.rb +0 -215

data/test/{another_lalr_but_not_slr_grammar.rb → core/another_lalr_but_not_slr_grammar.rb} RENAMED Viewed

@@ -1,4 +1,4 @@
-require File.dirname(__FILE__)+'/../lib/dhaka'
+require File.dirname(__FILE__)+'/../dhaka_test_helper'
 class AnotherLALRButNotSLRGrammar < Dhaka::Grammar

data/test/{compiled_parser_test.rb → core/compiled_parser_test.rb} RENAMED Viewed

@@ -1,4 +1,4 @@
-require File.dirname(__FILE__) + '/dhaka_test_helper'
+require File.dirname(__FILE__) + '/../dhaka_test_helper'
 require File.dirname(__FILE__) + "/simple_grammar"
 eval(Dhaka::Parser.new(SimpleGrammar).compile_to_ruby_source_as(:SimpleParser))

data/test/core/dfa_test.rb ADDED Viewed

@@ -0,0 +1,170 @@
+require File.dirname(__FILE__) + '/../dhaka_test_helper'
+class TestDFA < Test::Unit::TestCase
+  def test_build_AST_from_parse_tree_and_compute_follow_first_and_last
+    root      = Dhaka::LexerSupport::RegexParser.parse(Dhaka::LexerSupport::RegexTokenizer.tokenize("(a|b)*abb"))
+    star_node = root.left.left.left.left
+    or_node   = star_node.child
+    first_a   = or_node.children[0]
+    first_b   = or_node.children[1]
+    second_a  = root.left.left.left.right
+    second_b  = root.left.left.right
+    last_b    = root.left.right
+    sentinel  = root.right
+    assert(!root.nullable)
+    assert(!root.left.nullable)
+    assert(!root.left.left.nullable)
+    assert(star_node.nullable)
+    assert_equal(Set.new([first_a, first_b, second_a]), root.first)
+    assert_equal(Set.new([last_b]), root.left.last)
+    root.calculate_follow_sets
+    assert_equal(Set.new([first_a, first_b, second_a]), first_a.follow_set)
+    assert_equal(Set.new([first_a, first_b, second_a]), first_b.follow_set)
+    assert_equal(Set.new([second_b]), second_a.follow_set)
+    assert_equal(Set.new([last_b]), second_b.follow_set)
+    assert_equal(Set.new([sentinel]), last_b.follow_set)
+  end
+  def test_DFA_raises_exception_if_empty_regex
+    machine = Dhaka::LexerSupport::DFA.new("")
+    flunk "Should have thrown an unexpected end of regex exception"
+  rescue Dhaka::LexerSupport::InvalidRegexException => e
+    assert_equal("Unexpected end of regex.", e.message)
+  end
+  def test_DFA_raises_exception_if_error_parsing_regex
+    machine = Dhaka::LexerSupport::DFA.new("(a|b)*+abb")
+    flunk "Should have thrown an unexpected token exception"
+  rescue Dhaka::LexerSupport::InvalidRegexException => e
+    assert_equal("Unexpected token +: (a|b)*>>>+abb", e.message)
+  end
+  def test_match_a_regex
+    machine = Dhaka::LexerSupport::DFA.new("(a|b)*abb")
+    assert_full_match(machine, "abababb")
+    assert_full_match(machine, "ababaabb")
+    assert_empty(machine.match("abababab"))
+    assert_equal("abababb", machine.match("abababbc"))
+    assert_equal("abababb", machine.match("abababbaa"))
+  end
+  def test_match_a_regex_with_optional_characters_at_the_end
+    machine = Dhaka::LexerSupport::DFA.new("bad(c|d)+(ab)*")
+    assert_full_match(machine, "badccddabab")
+    assert_full_match(machine, "baddcc")
+    assert_empty(machine.match("badab"))
+    assert_empty(machine.match("bacdab"))
+  end
+  def test_match_a_nullable_regex
+    machine = Dhaka::LexerSupport::DFA.new("(ab)*")
+    assert_full_match(machine, "abab")
+    assert_full_match(machine, "ab")
+    assert_full_match(machine, "")
+    assert_equal("", machine.match("b"))
+  end
+  def test_match_a_regex_with_the_dot_character
+    machine = Dhaka::LexerSupport::DFA.new("ab.*cd")
+    assert_full_match(machine, "abacd")
+    assert_full_match(machine, "abcd")
+    assert_full_match(machine, "abAcd")
+    assert_empty(machine.match("ab999c"))
+  end
+  def test_match_a_regex_with_sets
+    machine = Dhaka::LexerSupport::DFA.new("ab[j-lu]*cd")
+    assert_empty(machine.match("abacd"))
+    assert_full_match(machine, "abcd")
+    assert_full_match(machine, "abjklucd")
+    assert_empty(machine.match("abijklucd"))
+    assert_empty(machine.match("ab999c"))
+  end
+  def test_match_a_regex_with_negative_sets
+    machine = Dhaka::LexerSupport::DFA.new("ab[^j-lr]*cd")
+    assert_full_match(machine, "abcd")
+    assert_empty(machine.match("abjcd"))
+    assert_empty(machine.match("abrcd"))
+    assert_empty(machine.match("abijklucd"))
+    assert_full_match(machine, "abyqcd")
+  end
+  def test_match_a_regex_with_sets_containing_escaped_characters
+    machine = Dhaka::LexerSupport::DFA.new("ab[\\^\\-.]*cd")
+    assert_full_match(machine, "abcd")
+    assert_empty(machine.match("abjcd"))
+    assert_full_match(machine, "ab^-.cd")
+    assert_empty(machine.match("abijklucd"))
+    assert_empty(machine.match("ab\\cd"))
+  end
+  def test_match_a_regex_using_unescaped_caret_and_dash_characters
+    machine = Dhaka::LexerSupport::DFA.new("(\\^-)+")
+    assert_full_match(machine, "^-")
+    assert_full_match(machine, "^-^-")
+    assert_empty(machine.match("?cd"))
+  end
+  def test_match_a_regex_using_escape_characters
+    machine = Dhaka::LexerSupport::DFA.new(%q/(-\?\(\)\\\\)*/)
+    assert_full_match(machine, "-?()\\")
+  end
+  def test_match_a_regex_using_lt_and_gt
+    machine = Dhaka::LexerSupport::DFA.new('<.+>')
+    assert_full_match(machine, "<ab>")
+    assert_full_match(machine, "<absdf><sdg><sse>")
+    assert_empty(machine.match("ab>"))
+  end
+  def test_simulating_curly_brace_quantifiers
+    machine = Dhaka::LexerSupport::DFA.new('aaa?a?a?')
+    assert_full_match(machine, "aa")
+    assert_full_match(machine, "aaa")
+    assert_full_match(machine, "aaaa")
+    assert_full_match(machine, "aaaaa")
+    assert_equal("aaaaa", machine.match("aaaaaa"))
+    assert_empty(machine.match("a"))
+  end
+  def test_matching_a_regex_with_lookahead
+    machine = Dhaka::LexerSupport::DFA.new('ab/cd')
+    assert_equal("ab", machine.match("abcd"))
+    assert_empty(machine.match("ab"))
+    assert_empty(machine.match("abef"))
+  end
+  def test_matching_a_regex_with_nullable_pre_lookahead_regex
+    machine = Dhaka::LexerSupport::DFA.new('(ab)*/cd')
+    assert_equal("ab", machine.match("abcd"))
+    assert_equal("ababab", machine.match("abababcd"))
+    assert_empty(machine.match("ababc"))
+    assert_empty(machine.match("abef"))
+  end
+  def test_matching_a_regex_with_post_lookahead_characters_in_common_with_pre_lookahead_characters
+    machine = Dhaka::LexerSupport::DFA.new('(ab)+/abcd')
+    assert_equal("ababab", machine.match("abababcd"))
+    assert_empty(machine.match("ab"))
+    assert_empty(machine.match("abef"))
+  end
+  def test_machine_with_nullable_lookahead
+    machine = Dhaka::LexerSupport::DFA.new(":/[aA\n\r\t]*")
+    assert_equal(":", machine.match(":"))
+  end
+  private
+    def assert_full_match(machine, input)
+      assert_equal(input, machine.match(input))
+    end
+    def assert_empty(input)
+      assert(input.empty?)
+    end
+end

data/test/{evaluator_test.rb → core/evaluator_test.rb} RENAMED Viewed

@@ -1,13 +1,13 @@
-require File.dirname(__FILE__) + '/dhaka_test_helper'
+require File.dirname(__FILE__) + '/../dhaka_test_helper'
 require File.dirname(__FILE__) + '/simple_grammar'
 class TestEvaluator < Test::Unit::TestCase
-  def test_throws_exception_if_evaluation_rules_not_completely_defined
+  def test_throws_exception_if_evaluation_rules_not_completely_defined_and_raise_error_option_set_to_true
     assert_raise(Dhaka::EvaluatorDefinitionError) do
       eval(
       "class IncompleteSimpleEvaluator < Dhaka::Evaluator
         self.grammar = SimpleGrammar
-        define_evaluation_rules do
+        define_evaluation_rules(:raise_error => true) do
           for_start do
             something
           end

data/test/{grammar_test.rb → core/grammar_test.rb} RENAMED Viewed

@@ -1,4 +1,4 @@
-require File.dirname(__FILE__) + '/dhaka_test_helper'
+require File.dirname(__FILE__) + '/../dhaka_test_helper'
 require File.dirname(__FILE__) + '/simple_grammar'
 class SimpleGrammarTest < Test::Unit::TestCase
@@ -43,7 +43,7 @@ class SimpleGrammarTest < Test::Unit::TestCase
     start_production  = @grammar.production_named('start')
     start_item        = Dhaka::Item.new(start_production, 0)
     kernel            = Set.new([start_item])
-    channels, closure = @grammar.closure(kernel)
+    closure, channels = @grammar.closure(kernel)
     expected_items    = Set.new(['_Start_ ::= -> S # []',
                               'S ::= -> E []',
                               'E ::= -> E - T []',
@@ -60,7 +60,7 @@ class SimpleGrammarTest < Test::Unit::TestCase
           'Spontaneous Channel from _Start_ ::= -> S # [] to S ::= -> E []'
           ])
     assert_equal(expected_items, Set.new(closure.values.collect{|item| item.to_s}))
-    assert_equal(expected_channels, Set.new(channels.collect{|item| item.to_s}))
+    assert_equal(expected_channels, Set.new(channels.values.collect{|set| set.to_a}.flatten.collect{|item| item.to_s}))
   end
   def test_export_grammar_to_bnf

data/test/{lalr_but_not_slr_grammar.rb → core/lalr_but_not_slr_grammar.rb} RENAMED Viewed

File without changes

data/test/core/lexer_test.rb ADDED Viewed

@@ -0,0 +1,139 @@
+require File.dirname(__FILE__) + '/../dhaka_test_helper'
+class TestLexer < Test::Unit::TestCase
+  class LexerSpec < Dhaka::LexerSpecification
+    for_pattern 'zz' do
+      "two zs"
+    end
+    for_pattern '\w(\w|\d)*' do
+      "word #{current_lexeme.value}"
+    end
+    # can optionally use Regexps as well
+    for_pattern(/(\d)*(\.\d+)?/) do
+      "number #{current_lexeme.value}"
+    end
+    for_pattern '<.*>' do
+      "tag #{current_lexeme.value}"
+    end
+    for_pattern ' +' do
+      #ignores whitespace
+    end
+    for_pattern "\n+" do
+      "newline"
+    end
+    for_pattern "\r+" do
+      "carriage return"
+    end
+  end
+  def test_lexer_with_valid_input
+    lexer = Dhaka::Lexer.new(LexerSpec)
+    eval(lexer.compile_to_ruby_source_as(:SomeLexer))
+    input = "these are words a z zz caPITALIZED word \r
+    this is a float 12.00 an integer 134 a float without a leading digit .2335 another word1"
+    results =  SomeLexer.lex(input).collect
+    assert_equal(
+    ["word these",
+     "word are",
+     "word words",
+     "word a",
+     "word z",
+     "two zs",
+     "word caPITALIZED",
+     "word word",
+     "carriage return",
+     "newline",
+     "word this",
+     "word is",
+     "word a",
+     "word float",
+     "number 12.00",
+     "word an",
+     "word integer",
+     "number 134",
+     "word a",
+     "word float",
+     "word without",
+     "word a",
+     "word leading",
+     "word digit",
+     "number .2335",
+     "word another",
+     "word word1"], results[0..-2])
+  end
+  def test_lexer_with_invalid_input
+    lexer = Dhaka::Lexer.new(LexerSpec)
+    result = lexer.lex("this will cause an error here 123.").each do |result|
+    end
+    assert(result.has_error?)
+    assert_equal(34, result.unexpected_char_index)
+  end
+  def test_lexer_with_greedy_character_consumption
+    lexer = Dhaka::Lexer.new(LexerSpec)
+    results = lexer.lex("<html></html>this is a word").collect
+    assert_equal(["tag <html></html>",
+     "word this",
+     "word is",
+     "word a",
+     "word word"], results[0..-2])
+  end
+  class LexerWithLookaheadsSpec < Dhaka::LexerSpecification
+    for_pattern '\s+' do
+      # ignore whitespace
+    end
+    for_pattern ':/\w+' do
+      "a symbol qualifier"
+    end
+    for_pattern(":/[^a-zA-Z \n\r\t]*") do
+      "a colon"
+    end
+    for_pattern "ab/cd" do
+      "ab followed by cd: #{current_lexeme.value}"
+    end
+    for_pattern "abc/e" do
+      "abc followed by e: #{current_lexeme.value}"
+    end
+    for_pattern '\w+' do
+      "word #{current_lexeme.value}"
+    end
+    for_pattern '\d+' do
+      "number #{current_lexeme.value}"
+    end
+  end
+  def test_lexer_with_regexes_that_use_lookaheads
+    lexer = Dhaka::Lexer.new(LexerWithLookaheadsSpec)
+    eval(lexer.compile_to_ruby_source_as(:LexerWithTrickyLookaheads))
+    results = LexerWithTrickyLookaheads.lex("234 : :whatever :1934 abcd ::").collect
+    assert_equal(["number 234",
+     "a colon",
+     "a symbol qualifier",
+     "word whatever",
+     "a colon",
+     "number 1934",
+     "ab followed by cd: ab",
+     "word cd",
+     "a colon",
+     "a colon"], results[0..-2])
+  end
+end

data/test/{malformed_grammar.rb → core/malformed_grammar.rb} RENAMED Viewed

File without changes

data/test/{malformed_grammar_test.rb → core/malformed_grammar_test.rb} RENAMED Viewed

@@ -1,4 +1,4 @@
-require File.dirname(__FILE__) + '/dhaka_test_helper'
+require File.dirname(__FILE__) + '/../dhaka_test_helper'
 require File.dirname(__FILE__) + "/malformed_grammar"
 class TestMalformedGrammar < Test::Unit::TestCase

data/test/{nullable_grammar.rb → core/nullable_grammar.rb} RENAMED Viewed

File without changes

data/test/{parse_result_test.rb → core/parse_result_test.rb} RENAMED Viewed

@@ -1,4 +1,4 @@
-require File.dirname(__FILE__) + '/dhaka_test_helper'
+require File.dirname(__FILE__) + '/../dhaka_test_helper'
 require File.dirname(__FILE__) + '/simple_grammar'
 class TestParseSuccessResult < Test::Unit::TestCase

data/test/{parser_state_test.rb → core/parser_state_test.rb} RENAMED Viewed

@@ -1,4 +1,4 @@
-require File.dirname(__FILE__) + '/dhaka_test_helper'
+require File.dirname(__FILE__) + '/../dhaka_test_helper'
 class TestParserState < Test::Unit::TestCase
   include Dhaka

data/test/{parser_test.rb → core/parser_test.rb} RENAMED Viewed

@@ -1,4 +1,4 @@
-require File.dirname(__FILE__) + '/dhaka_test_helper'
+require File.dirname(__FILE__) + '/../dhaka_test_helper'
 require File.dirname(__FILE__) + '/simple_grammar'
 require File.dirname(__FILE__) + '/nullable_grammar'
 require File.dirname(__FILE__) + '/lalr_but_not_slr_grammar'
@@ -23,7 +23,7 @@ class ParserTest < Test::Unit::TestCase
   def assert_collection_equal(expected, actual)
     assert_equal(expected.size, actual.size)
     actual.each do |actual_member|
-      assert(contains(actual_member, expected))
+      assert(contains(actual_member, expected), "Should have found #{actual_member} in expected set.")
     end
   end

data/test/{precedence_grammar.rb → core/precedence_grammar.rb} RENAMED Viewed

File without changes

data/test/{precedence_grammar_test.rb → core/precedence_grammar_test.rb} RENAMED Viewed

@@ -1,4 +1,4 @@
-require File.dirname(__FILE__) + '/dhaka_test_helper'
+require File.dirname(__FILE__) + '/../dhaka_test_helper'
 require File.dirname(__FILE__) + "/precedence_grammar"
 class TestPrecedenceGrammar < Test::Unit::TestCase

data/test/{rr_conflict_grammar.rb → core/rr_conflict_grammar.rb} RENAMED Viewed

File without changes

data/test/{simple_grammar.rb → core/simple_grammar.rb} RENAMED Viewed

File without changes

data/test/{sr_conflict_grammar.rb → core/sr_conflict_grammar.rb} RENAMED Viewed

File without changes

metadata CHANGED Viewed

@@ -1,10 +1,10 @@
 --- !ruby/object:Gem::Specification
-rubygems_version: 0.9.0
+rubygems_version: 0.9.2
 specification_version: 1
 name: dhaka
 version: !ruby/object:Gem::Version
-  version: 2.1.0
-date: 2007-03-11 00:00:00 -05:00
+  version: 2.2.0
+date: 2007-05-29 00:00:00 -04:00
 summary: An LALR1 parser generator written in Ruby
 require_paths:
 - lib
@@ -62,25 +62,6 @@ files:
 - lib/parser/token.rb
 - lib/tokenizer/tokenizer.rb
 - test/all_tests.rb
-- test/another_lalr_but_not_slr_grammar.rb
-- test/compiled_parser_test.rb
-- test/dhaka_test_helper.rb
-- test/evaluator_test.rb
-- test/fake_logger.rb
-- test/grammar_test.rb
-- test/lalr_but_not_slr_grammar.rb
-- test/lexer_test.rb
-- test/malformed_grammar.rb
-- test/malformed_grammar_test.rb
-- test/nullable_grammar.rb
-- test/parse_result_test.rb
-- test/parser_state_test.rb
-- test/parser_test.rb
-- test/precedence_grammar.rb
-- test/precedence_grammar_test.rb
-- test/rr_conflict_grammar.rb
-- test/simple_grammar.rb
-- test/sr_conflict_grammar.rb
 - test/arithmetic/arithmetic_evaluator.rb
 - test/arithmetic/arithmetic_evaluator_test.rb
 - test/arithmetic/arithmetic_grammar.rb
@@ -102,11 +83,33 @@ files:
 - test/chittagong/chittagong_evaluator_test.rb
 - test/chittagong/chittagong_grammar.rb
 - test/chittagong/chittagong_lexer.rb
+- test/chittagong/chittagong_lexer.rb.rej
 - test/chittagong/chittagong_lexer_specification.rb
 - test/chittagong/chittagong_lexer_test.rb
 - test/chittagong/chittagong_parser.rb
+- test/chittagong/chittagong_parser.rb.rej
 - test/chittagong/chittagong_parser_test.rb
 - test/chittagong/chittagong_test.rb
+- test/core/another_lalr_but_not_slr_grammar.rb
+- test/core/compiled_parser_test.rb
+- test/core/dfa_test.rb
+- test/core/evaluator_test.rb
+- test/core/grammar_test.rb
+- test/core/lalr_but_not_slr_grammar.rb
+- test/core/lexer_test.rb
+- test/core/malformed_grammar.rb
+- test/core/malformed_grammar_test.rb
+- test/core/nullable_grammar.rb
+- test/core/parse_result_test.rb
+- test/core/parser_state_test.rb
+- test/core/parser_test.rb
+- test/core/precedence_grammar.rb
+- test/core/precedence_grammar_test.rb
+- test/core/rr_conflict_grammar.rb
+- test/core/simple_grammar.rb
+- test/core/sr_conflict_grammar.rb
+- test/dhaka_test_helper.rb
+- test/fake_logger.rb
 - Rakefile
 test_files: []