RubyGems - dhaka - Versions diffs - 0.0.1 - Mend

dhaka 0.0.1

Files changed (43) hide show

data/lib/dhaka.rb +44 -0
data/lib/evaluator/evaluator.rb +70 -0
data/lib/grammar/closure_hash.rb +13 -0
data/lib/grammar/grammar.rb +129 -0
data/lib/grammar/grammar_symbol.rb +19 -0
data/lib/grammar/production.rb +14 -0
data/lib/parser/action.rb +51 -0
data/lib/parser/channel.rb +51 -0
data/lib/parser/compiled_parser.rb +35 -0
data/lib/parser/item.rb +37 -0
data/lib/parser/parse_result.rb +26 -0
data/lib/parser/parse_tree.rb +34 -0
data/lib/parser/parser.rb +125 -0
data/lib/parser/parser_methods.rb +10 -0
data/lib/parser/parser_run.rb +35 -0
data/lib/parser/parser_state.rb +66 -0
data/lib/parser/token.rb +15 -0
data/lib/tokenizer/tokenizer.rb +88 -0
data/test/all_tests.rb +11 -0
data/test/arithmetic_evaluator.rb +70 -0
data/test/arithmetic_evaluator_test.rb +55 -0
data/test/arithmetic_grammar.rb +38 -0
data/test/arithmetic_grammar_test.rb +11 -0
data/test/arithmetic_test_methods.rb +11 -0
data/test/arithmetic_tokenizer.rb +43 -0
data/test/arithmetic_tokenizer_test.rb +32 -0
data/test/bracket_grammar.rb +25 -0
data/test/bracket_tokenizer.rb +17 -0
data/test/brackets_test.rb +20 -0
data/test/compiled_arithmetic_parser.rb +252 -0
data/test/compiled_parser_test.rb +71 -0
data/test/evaluator_test.rb +8 -0
data/test/grammar_test.rb +70 -0
data/test/incomplete_arithmetic_evaluator.rb +60 -0
data/test/lalr_but_not_slr_grammar.rb +17 -0
data/test/malformed_grammar.rb +9 -0
data/test/malformed_grammar_test.rb +9 -0
data/test/nullable_grammar.rb +18 -0
data/test/parser_test.rb +168 -0
data/test/rr_conflict_grammar.rb +23 -0
data/test/simple_grammar.rb +24 -0
data/test/sr_conflict_grammar.rb +16 -0
metadata +87 -0

@@ -0,0 +1,71 @@
+require "test/unit"
+require "simple_grammar"
+require 'compiled_arithmetic_parser'
+require 'arithmetic_test_methods'
+class TestCompiledParser < Test::Unit::TestCase
+  include ArithmeticTestMethods
+  def test_compiled_parser_generates_syntax_tree_for_simple_grammar
+    grammar = SimpleGrammar
+    parser = Dhaka::Parser.new(grammar)
+    eval(parser.compile_to_ruby_source_as('Foo'))
+    syntax_tree = Foo.parse(build_tokens(['(','n','-','(','n','-','n',')',')','-','n','#'], Foo.grammar)).syntax_tree
+    assert_equal \
+      ["literal",
+       "term",
+       "literal",
+       "term",
+       "literal",
+       "subtraction",
+       "parenthetized_expression",
+       "subtraction",
+       "parenthetized_expression",
+       "term",
+       "literal",
+       "subtraction",
+       "expression",
+       "start"], syntax_tree.linearize
+  end
+  def test_compiled_parser_generates_syntax_tree_for_arithmetic_grammar
+    parser_input = ['(','n','-','(','n','/','n','-','n',')','/','n',')','#']
+    assert_equal \
+      ["getting_literals",
+       "factor",
+       "term",
+       "getting_literals",
+       "factor",
+       "getting_literals",
+       "division",
+       "term",
+       "getting_literals",
+       "factor",
+       "subtraction",
+       "unpacking_parenthetized_expression",
+       "factor",
+       "getting_literals",
+       "division",
+       "subtraction",
+       "unpacking_parenthetized_expression",
+       "factor",
+       "term",
+       "expression",
+       "start_production"], get_syntax_tree_with_compiled_arithmetic_parser(build_tokens(parser_input, CompiledArithmeticParser.grammar)).linearize
+  end
+  def test_parser_returns_nil_syntax_tree_if_empty_token_array
+    assert_nil CompiledArithmeticParser.parse([])
+  end
+  def test_parser_returns_error_result_with_index_of_bad_token_if_parse_error
+    parse_result = CompiledArithmeticParser.parse(build_tokens(['(', '-', ')', '#'], CompiledArithmeticParser.grammar))
+    assert parse_result.has_error?
+    assert_equal 1, parse_result.bad_token_index
+  end
+  def build_tokens(token_symbol_names, grammar)
+    token_symbol_names.collect {|symbol_name| Dhaka::Token.new(grammar.symbol_for_name(symbol_name), nil)}
+  end
+end

data/test/evaluator_test.rb ADDED

@@ -0,0 +1,8 @@
+require "test/unit"
+require File.dirname(__FILE__)+'/../lib/dhaka'
+class TestEvaluator < Test::Unit::TestCase
+  def test_throws_exception_if_evaluation_rules_not_completely_defined
+    assert_raise(Dhaka::EvaluatorDefinitionError) {require 'incomplete_arithmetic_evaluator'}
+  end
+end

data/test/grammar_test.rb ADDED

@@ -0,0 +1,70 @@
+#!/usr/bin/env ruby
+require 'test/unit'
+require 'simple_grammar'
+class SimpleGrammarTest < Test::Unit::TestCase
+  def setup
+    @grammar = SimpleGrammar
+  end
+  def test_loads_symbol_and_classifies_them
+    expected_non_terminals = Set.new(['E', 'S', 'T', Dhaka::START_SYMBOL_NAME])
+    expected_terminals = Set.new(['-', 'n', '(', ')', '#', Dhaka::END_SYMBOL_NAME])
+    assert_equal(expected_non_terminals, Set.new(@grammar.non_terminal_symbols.collect {|symbol| symbol.name}))
+    assert_equal(expected_terminals, Set.new(@grammar.terminal_symbols.collect {|symbol| symbol.name}))
+  end
+  def test_creates_productions
+    productions_for_E = @grammar.productions_for_symbol(@grammar.symbol_for_name('E'))
+    assert(productions_for_E.size==2)
+    expected_productions_for_E = Set.new(['subtraction E ::= E - T', 'term E ::= T'])
+    assert_equal(expected_productions_for_E, Set.new(productions_for_E.collect {|production| production.to_s}))
+    productions_for_start = @grammar.productions_for_symbol(@grammar.start_symbol)
+    assert(productions_for_start.size==1)
+    expected_productions_for_start = Set.new(['start _Start_ ::= S #'])
+    assert_equal(expected_productions_for_start, Set.new(productions_for_start.collect {|production| production.to_s}))
+  end
+  def test_symbols_in_productions_use_the_flyweight_pattern
+    assert_same(@grammar.production_named('subtraction').symbol, @grammar.production_named('term').symbol)
+    assert_same(@grammar.production_named('expression').expansion[0], @grammar.production_named('subtraction').expansion[0])
+  end
+  def test_first_with_non_terminal
+    expected_symbols = Set.new(['(', 'n'])
+    assert_equal(expected_symbols, Set.new(@grammar.first(@grammar.symbol_for_name('E')).collect {|symbol| symbol.name}))
+  end
+  def test_first_with_terminal
+    expected_symbols = Set.new(['('])
+    assert_equal(expected_symbols, Set.new(@grammar.first(@grammar.symbol_for_name('(')).collect {|symbol| symbol.name}))
+  end
+  def test_computes_closures_and_channels_given_a_kernel
+    start_production = @grammar.production_named('start')
+    start_item = Dhaka::Item.new(start_production, 0)
+    kernel = Set.new([start_item])
+    channels, closure = @grammar.closure(kernel)
+    expected_items = Set.new(['_Start_ ::= -> S # []',
+                              'S ::= -> E []',
+                              'E ::= -> E - T []',
+                              'E ::= -> T []',
+                              'T ::= -> n []',
+                              'T ::= -> ( E ) []'])
+    expected_channels = Set.new([
+          'Spontaneous Channel from E ::= -> E - T [] to E ::= -> E - T []',
+          'Spontaneous Channel from S ::= -> E [] to E ::= -> T []',
+          'Spontaneous Channel from E ::= -> T [] to T ::= -> n []',
+          'Spontaneous Channel from S ::= -> E [] to E ::= -> E - T []',
+          'Spontaneous Channel from E ::= -> T [] to T ::= -> ( E ) []',
+          'Spontaneous Channel from E ::= -> E - T [] to E ::= -> T []',
+          'Spontaneous Channel from _Start_ ::= -> S # [] to S ::= -> E []'
+          ])
+    assert_equal(expected_items, Set.new(closure.values.collect{|item| item.to_s}))
+    assert_equal(expected_channels, Set.new(channels.collect{|item| item.to_s}))
+  end
+end

data/test/incomplete_arithmetic_evaluator.rb ADDED

@@ -0,0 +1,60 @@
+require File.dirname(__FILE__)+'/../lib/dhaka'
+require 'arithmetic_grammar'
+class ArithmeticEvaluator < Dhaka::Evaluator
+  self.grammar = ArithmeticGrammar
+  define_evaluation_rules do
+    for_subtraction do
+      child_nodes[0] - child_nodes[2]
+    end
+    for_addition do
+      child_nodes[0] + child_nodes[2]
+    end
+    for_division do
+      child_nodes[0].to_f/child_nodes[2]
+    end
+    for_multiplication do
+      child_nodes[0] * child_nodes[2]
+    end
+    for_getting_literals do
+      child_nodes[0].token.value
+    end
+    for_start_production do
+      child_nodes[0]
+    end
+    for_empty_args do
+      []
+    end
+    for_evaluating_function do
+      child_nodes[0].call child_nodes[2]
+    end
+    for_concatenating_args do
+      [child_nodes[0]]+child_nodes[2]
+    end
+    for_single_args do
+      [child_nodes[0]]
+    end
+    for_min_function do
+      Proc.new {|args| args.inject {|min, elem| min = (elem < min ? elem : min)}}
+    end
+    for_max_function do
+      Proc.new {|args| args.inject {|max, elem| max = (elem > max ? elem : max)}}
+    end
+  end
+end

data/test/lalr_but_not_slr_grammar.rb ADDED

@@ -0,0 +1,17 @@
+require File.dirname(__FILE__)+'/../lib/dhaka'
+class LALRButNotSLRGrammar < Dhaka::Grammar
+  for_symbol(Dhaka::START_SYMBOL_NAME) do
+    start ['E']
+  end
+  for_symbol('E') do
+    E_Aa ['A', 'a']
+    E_bAc ['b', 'A', 'c']
+    E_dc ['d', 'c']
+    E_bda ['b', 'd', 'a']
+  end
+  for_symbol('A') do
+    A_d ['d']
+  end
+end

data/test/malformed_grammar.rb ADDED

@@ -0,0 +1,9 @@
+require File.dirname(__FILE__)+'/../lib/dhaka'
+class MalformedGrammar < Dhaka::Grammar
+  for_symbol('goo') do
+    foo ['boo']
+  end
+end

data/test/malformed_grammar_test.rb ADDED

@@ -0,0 +1,9 @@
+require "test/unit"
+require "malformed_grammar"
+class TestMalformedGrammar < Test::Unit::TestCase
+  def test_must_have_a_start_symbol_in_order_to_generate_a_parser
+    assert_raises(Dhaka::NoStartProductionsError) {Dhaka::Parser.new(MalformedGrammar)}
+  end
+end

data/test/nullable_grammar.rb ADDED

@@ -0,0 +1,18 @@
+require File.dirname(__FILE__)+'/../lib/dhaka'
+class NullableGrammar < Dhaka::Grammar
+  for_symbol(Dhaka::START_SYMBOL_NAME) do
+    tuple ['Tuple', '#']
+  end
+  for_symbol('Tuple') do
+    element_list ['(', 'Elements', ')']
+  end
+  for_symbol('Elements') do
+    empty_element_list []
+    concatenate_element_lists ['Character', 'Elements']
+  end
+  for_symbol('Character') do
+    literal_a ['a']
+    literal_b ['b']
+  end
+end

data/test/parser_test.rb ADDED

@@ -0,0 +1,168 @@
+#!/usr/bin/env ruby
+require 'test/unit'
+require 'simple_grammar'
+require 'arithmetic_grammar'
+require 'nullable_grammar'
+require 'lalr_but_not_slr_grammar'
+require 'rr_conflict_grammar'
+require 'sr_conflict_grammar'
+class ParserTest < Test::Unit::TestCase
+  def setup
+    @grammar = SimpleGrammar
+    @parser = Dhaka::Parser.new(@grammar)
+  end
+  def test_parser_generates_states_with_correct_items
+    expected_states = {}
+    expected_states[1] = Set.new(['_Start_ ::= -> S # [_End_]',
+                          'S ::= -> E [#]',
+                          'E ::= -> E - T [#-]',
+                          'E ::= -> T [#-]',
+                          'T ::= -> n [#-]',
+                          'T ::= -> ( E ) [#-]'])
+    expected_states[2] = Set.new(['E ::= T -> [#)-]'])
+    expected_states[3] = Set.new(['T ::= n -> [#)-]'])
+    expected_states[4] = Set.new(['S ::= E -> [#]',
+                          'E ::= E -> - T [#-]'])
+    expected_states[5] = Set.new(['_Start_ ::= S -> # [_End_]'])
+    expected_states[6] = Set.new(['T ::= ( -> E ) [#)-]',
+                          'E ::= -> E - T [)-]',
+                          'E ::= -> T [)-]',
+                          'T ::= -> n [)-]',
+                          'T ::= -> ( E ) [)-]'])
+    expected_states[7] = Set.new(['E ::= E - -> T [#)-]',
+                          'T ::= -> n [#)-]',
+                          'T ::= -> ( E ) [#)-]'])
+    expected_states[8] = Set.new(['E ::= E - T -> [#)-]'])
+    expected_states[9] = Set.new(['T ::= ( E -> ) [#)-]',
+                          'E ::= E -> - T [)-]'])
+    expected_states[10] = Set.new(['T ::= ( E ) -> [#)-]'])
+    expected_states[11] = Set.new(['_Start_ ::= S # -> [_End_]'])
+    actual_states = Set.new(@parser.states.collect {|state| Set.new(state.items.values.collect {|item| item.to_s})})
+    #write_parser(@parser)
+    expected_states.values.each do |state|
+      assert set_finder(state, actual_states), "expected #{state.to_a}"
+    end
+  end
+  def build_tokens(token_symbol_names, grammar)
+    token_symbol_names.collect {|symbol_name| Dhaka::Token.new(grammar.symbol_for_name(symbol_name), nil)}
+  end
+  def test_parser_generates_syntax_tree_given_a_stream_of_symbols
+    syntax_tree = @parser.parse(build_tokens(['(','n','-','(','n','-','n',')',')','-','n','#'], @grammar)).syntax_tree
+    assert_equal \
+      ["literal",
+       "term",
+       "literal",
+       "term",
+       "literal",
+       "subtraction",
+       "parenthetized_expression",
+       "subtraction",
+       "parenthetized_expression",
+       "term",
+       "literal",
+       "subtraction",
+       "expression",
+       "start"], syntax_tree.linearize
+  end
+  def get_linearized_parse_result(input, parser)
+    parser.parse(build_tokens(input, parser.grammar)).syntax_tree.linearize
+  end
+  def test_with_a_different_grammar_with_division
+    grammar = ArithmeticGrammar
+    parser = Dhaka::Parser.new(grammar)
+    #write_parser(parser)
+    parser_input = ['(','n','-','(','n','/','n','-','n',')','/','n',')','#']
+    assert_equal \
+      ["getting_literals",
+       "factor",
+       "term",
+       "getting_literals",
+       "factor",
+       "getting_literals",
+       "division",
+       "term",
+       "getting_literals",
+       "factor",
+       "subtraction",
+       "unpacking_parenthetized_expression",
+       "factor",
+       "getting_literals",
+       "division",
+       "subtraction",
+       "unpacking_parenthetized_expression",
+       "factor",
+       "term",
+       "expression",
+       "start_production"], get_linearized_parse_result(parser_input, parser)
+    parser_input = ['h','(','(','n',')','-','n',',','n',')','#']
+    assert_equal \
+      ["max_function",
+       "getting_literals",
+       "factor",
+       "term",
+       "unpacking_parenthetized_expression",
+       "factor",
+       "term",
+       "getting_literals",
+       "factor",
+       "subtraction",
+       "getting_literals",
+       "factor",
+       "term",
+       "single_args",
+       "concatenating_args",
+       "evaluating_function",
+       "function",
+       "factor",
+       "term",
+       "expression",
+       "start_production"], get_linearized_parse_result(parser_input, parser)
+  end
+  def test_with_a_grammar_with_nullables_after_terminals
+    grammar = NullableGrammar
+    parser = Dhaka::Parser.new(grammar)
+    parser_input = ['(','a',')','#']
+    assert_equal \
+      ["literal_a",
+       "empty_element_list",
+       "concatenate_element_lists",
+       "element_list",
+       "tuple"], get_linearized_parse_result(parser_input, parser)
+  end
+  def test_with_a_grammar_that_is_not_SLR
+    grammar = LALRButNotSLRGrammar
+    parser = Dhaka::Parser.new(grammar)
+    parser_input = ['b','d','c']
+    assert_equal(["A_d", "E_bAc", "start"], get_linearized_parse_result(parser_input, parser))
+  end
+  def test_with_a_grammar_that_should_generate_an_RR_conflict
+    grammar = RRConflictGrammar
+    assert_raise(Dhaka::ParserConflictError) { Dhaka::Parser.new(grammar) }
+  end
+  def test_with_a_grammar_that_should_generate_an_SR_conflict
+    grammar = SRConflictGrammar
+    assert_raise(Dhaka::ParserConflictError) { Dhaka::Parser.new(grammar) }
+  end
+  def set_finder(set1, set2)
+    set2.inject(false) {|result, member| result ||= member == set1}
+  end
+  def write_parser(parser)
+    File.open('parser.dot', 'w') do |file|
+      file << parser.to_dot
+    end
+  end
+end

data/test/rr_conflict_grammar.rb ADDED

@@ -0,0 +1,23 @@
+require File.dirname(__FILE__)+'/../lib/dhaka'
+class RRConflictGrammar < Dhaka::Grammar
+  for_symbol(Dhaka::START_SYMBOL_NAME) do
+    start ['S']
+  end
+  for_symbol('S') do
+    a_expansion ['A', 'c', 'd']
+    b_expansion ['B', 'c', 'e']
+  end
+  for_symbol('A') do
+    xy ['x', 'y']
+  end
+  for_symbol('B') do
+    xy_again ['x', 'y']
+  end
+end