RubyGems - aurum - Versions diffs - 0.1.0 - Mend

aurum 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (28) hide show

data/example/expression/expression.rb +29 -0
data/lib/aurum.rb +10 -0
data/lib/aurum/engine.rb +173 -0
data/lib/aurum/grammar.rb +234 -0
data/lib/aurum/lexical_table_generator.rb +423 -0
data/lib/aurum/parsing_table_generator.rb +445 -0
data/test/engine/lexer_test.rb +52 -0
data/test/engine/semantic_attributes_test.rb +15 -0
data/test/grammar_definition/character_class_definition_test.rb +28 -0
data/test/grammar_definition/grammar_definition_test.rb +54 -0
data/test/grammar_definition/lexical_definition_test.rb +56 -0
data/test/grammar_definition/operator_precedence_definition_test.rb +35 -0
data/test/grammar_definition/production_definition_test.rb +60 -0
data/test/lexical_table_generator/automata_test.rb +74 -0
data/test/lexical_table_generator/character_set_test.rb +73 -0
data/test/lexical_table_generator/interval_test.rb +36 -0
data/test/lexical_table_generator/pattern_test.rb +109 -0
data/test/lexical_table_generator/subset_determinizer_test.rb +19 -0
data/test/lexical_table_generator/table_generator_test.rb +126 -0
data/test/parsing_table_generator/augmented_grammar_test.rb +45 -0
data/test/parsing_table_generator/lalr_n_computation_test.rb +89 -0
data/test/parsing_table_generator/lr_0_automata_test.rb +91 -0
data/test/parsing_table_generator/lr_item_test.rb +33 -0
data/test/parsing_table_generator/parsing_table_state_test.rb +39 -0
data/test/parsing_table_generator/precedence_table_test.rb +28 -0
data/test/parsing_table_generator/production_test.rb +9 -0
data/test/test_helper.rb +103 -0
metadata +78 -0

data/test/lexical_table_generator/interval_test.rb ADDED Viewed

@@ -0,0 +1,36 @@
+$:.unshift(File.dirname(__FILE__) + '/../')
+require 'test_helper'
+class IntervalTest < Test::Unit::TestCase
+    def test_should_include_character_in_interval
+        interval = Aurum::CharacterSet::Interval.new ?a, ?c
+        assert interval.include?('b')
+        assert !interval.include?('d')
+    end
+    def test_should_use_lowest_as_first_and_highest_as_end
+        interval_a = Aurum::CharacterSet::Interval.new ?a, ?c
+        interval_b = Aurum::CharacterSet::Interval.new ?b, ?d
+        assert interval_a.merge!(interval_b)
+        assert_equal Aurum::CharacterSet::Interval.new(?a, ?d), interval_a
+    end
+    def test_should_cat_two_intervals
+        interval_a = Aurum::CharacterSet::Interval.new ?a, ?c
+        interval_b = Aurum::CharacterSet::Interval.new ?d, ?f
+        assert interval_a.merge!(interval_b)
+        assert_equal Aurum::CharacterSet::Interval.new(?a, ?f), interval_a
+    end
+    def test_should_return_nil_if_two_intervls_do_not_have_any_char_in_common
+        interval_a = Aurum::CharacterSet::Interval.new ?a, ?c
+        interval_b = Aurum::CharacterSet::Interval.new ?e, ?f
+        assert !interval_a.merge!(interval_b)
+    end
+    def test_should_def_interval_for_single_char
+        interval = Aurum::CharacterSet::Interval.new ?a
+        assert interval.include?('a')
+        assert !interval.include?('b')
+    end
+end

data/test/lexical_table_generator/pattern_test.rb ADDED Viewed

@@ -0,0 +1,109 @@
+$:.unshift(File.dirname(__FILE__) + '/../')
+require 'test_helper'
+class PatternTest < Test::Unit::TestCase
+    Epsilon = Aurum::Epsilon
+    def test_should_match_single_string
+        pattern = Aurum::Pattern.from_string 'a'
+        assert match?('a', pattern)
+        assert !match?('b', pattern)
+    end
+    def test_should_match_string_literal
+        pattern = Aurum::Pattern.from_string 'abc'
+        assert match?('abc', pattern)
+        assert !match?('abcabc', pattern)
+        assert !match?('bcd', pattern)
+    end
+    def test_should_match_character_set
+        char_set = Aurum::CharacterSet::Interval.new(?A, ?Z).to_char_set
+        pattern = Aurum::Pattern.from_char_set char_set
+        ('A'..'Z').each {|x| assert match?(x, pattern)}
+    end
+    def test_should_match_string_literal_zero_or_more_times
+        pattern = Aurum::Pattern.from_string('abc').kleene
+        assert match?('', pattern)
+        assert match?('abc' * 10, pattern)
+        assert !match?('ab', pattern)
+    end
+    def test_should_match_string_literal_one_or_more_times
+        pattern = Aurum::Pattern.from_string('abc').iterate
+        assert match?('abc', pattern)
+        assert match?('abc' * 10, pattern)
+        assert !match?('', pattern)
+    end
+    def test_should_match_string_literal_zero_or_one_time
+        pattern = Aurum::Pattern.from_string('abc').opt
+        assert match?('', pattern)
+        assert match?('abc', pattern)
+        assert !match?('abc' * 2, pattern)
+    end
+    def test_should_match_concate_string
+        first = Aurum::Pattern.from_string 'first'
+        second = Aurum::Pattern.from_string 'second'
+        pattern = Aurum::Pattern.concat(first, second)
+        assert match?('firstsecond', pattern)
+        assert !match?('first', pattern)
+        assert !match?('second', pattern)
+    end
+    def test_should_match_pattern_a_or_pattern_b
+        pattern_a = Aurum::Pattern.from_string 'patterna'
+        pattern_b = Aurum::Pattern.from_string 'patternb'
+        pattern = pattern_a | pattern_b
+        ['patterna', 'patternb'].each {|x| assert match?(x, pattern)}
+    end
+    def test_should_match_pattern_n_times
+        pattern = Aurum::Pattern.from_string 'pattern'
+        assert match?('pattern' * 5, pattern[5])
+        assert match?('pattern' * 10, pattern[10])
+    end
+    def test_should_match_pattern_n_to_m_times
+        pattern = Aurum::Pattern.from_string('pattern')[5, 7]
+        (5..7).each {|x| assert match?('pattern' * x, pattern)}
+        assert !match?('pattern' * 4, pattern)
+        assert !match?('pattern' * 8, pattern)
+    end
+    def test_should_match_everything_but_the_strings_matched_by_pattern
+        pattern = Aurum::Pattern.from_string 'pattern'
+        negative_pattern = pattern.negate
+        assert !match?('pattern', negative_pattern)
+        assert match?('anything', negative_pattern)
+    end
+    def test_should_match_everything_upto_first_occurrence_of_a_text_matched_by_pattern
+        pattern = ~ Aurum::Pattern.from_string('*/')
+        assert match?('comments */', pattern)
+        assert !match?('everything', pattern)
+    end
+    def match? expected_string, pattern
+        states = closure pattern.automata.table, [0]
+        expected_string.each_byte {|char| states = move(pattern.automata.table, states, char)}
+        states.include?(pattern.accept)
+    end
+    def move automata, states, char
+        result = []
+        states.each {|state| automata[state].each {|tran| result.concat(closure(automata, [tran.destination]))  if tran.symbols.include? char} }
+        result.uniq;
+    end
+    def closure automata, states
+        closure, unvisited = Set.new(states.dup), states.dup
+        filter = lambda {|x| x.symbols == Epsilon && !closure.include?(x.destination)}
+        while !unvisited.empty? do
+            automata[unvisited.pop].grep_each(filter){|tran| [closure, unvisited].each {|x| x << tran.destination}}
+        end
+        closure.to_a
+    end
+end

data/test/lexical_table_generator/subset_determinizer_test.rb ADDED Viewed

@@ -0,0 +1,19 @@
+$:.unshift(File.dirname(__FILE__) + '/../')
+require 'test_helper'
+require 'set'
+class SubsetDeterminizerTest < Test::Unit::TestCase
+    def test_should_create_equivalentDFA()
+        a, b, abb = Aurum::Pattern.from_string('a'), Aurum::Pattern.from_string('b'), Aurum::Pattern.from_string('abb')
+        pattern = Aurum::Pattern.concat((a | b).kleene, abb)
+        automata, accepts = pattern.automata.determinize [pattern.accept]
+        final = move automata.table, 'aaabbbaaabb'
+        assert accepts.include?(final)
+    end
+    def move table, source
+        state = 0
+        source.each_byte {|char| state = (table[state].find {|tran| tran.symbols.include? char}).destination}
+        state
+    end
+end

data/test/lexical_table_generator/table_generator_test.rb ADDED Viewed

@@ -0,0 +1,126 @@
+$:.unshift(File.dirname(__FILE__) + '/../')
+require 'test_helper'
+Aurum::LexicalTableGenerator.class_eval do
+    attr_reader :accept_states, :partitions, :lexical_automata
+	public :construct_automata, :make_initial_partitions, :refine_partitions
+    def table
+        @lexical_automata.table
+    end
+end
+class LexicalTableGeneratorTest < Test::Unit::TestCase
+    def test_should_construct_sub_automata_for_lexical_states
+        specification = {:initial => {PATTERN_A => 'recognize'},
+        :state_a => {PATTERN_B => 'recognize'}}
+        generator = Aurum::LexicalTableGenerator.new specification
+        generator.construct_automata
+        @table, @accepts, @lexical_states = generator.table, generator.accept_states, generator.lexical_states
+        assert recognize?(:initial, 'pattern_a')
+        assert !recognize?(:initial, 'pattern_b')
+        assert !recognize?(:state_a, 'pattern_a')
+        assert recognize?(:state_a, 'pattern_b')
+    end
+    def test_should_add_common_patterns_to_all_lexical_states
+        specification = {:initial => {PATTERN_A => 'recognize'},
+        :state_a => {PATTERN_B => 'recognize'},
+        :all => {PATTERN_C => 'recognize'}}
+        generator = Aurum::LexicalTableGenerator.new specification
+        generator.construct_automata
+        @table, @accepts, @lexical_states = generator.table, generator.accept_states, generator.lexical_states
+        assert recognize?(:initial, 'pattern_c')
+        assert recognize?(:state_a, 'pattern_b')
+        assert !recognize?(:all, 'pattern_b')
+    end
+    def test_initial_partiations_should_be_start_accepts_and_non_accepts
+        specification = {:initial => {PATTERN_A => 'recognize'},
+        :state_a => {PATTERN_B => 'recognize'}}
+        generator = Aurum::LexicalTableGenerator.new specification
+        generator.construct_automata
+        generator.make_initial_partitions
+        partitions = generator.partitions
+        assert_equal 3, partitions.size
+        assert partitions.include?([0])
+        assert partitions.include?(generator.accept_states.keys)
+        assert partitions.include?(generator.lexical_automata.all_states - generator.accept_states.keys - [0])
+    end
+    def test_should_not_split_accept_states_if_has_same_action
+        specification = {:initial => {PATTERN_A => 'recognize', PATTERN_B => 'recognize'}}
+        generator = Aurum::LexicalTableGenerator.new specification
+        generator.construct_automata
+        generator.make_initial_partitions
+        assert generator.partitions.include?(generator.accept_states.keys)
+    end
+    def test_should_split_accept_states_if_has_different_actions
+        specification = {:initial => {PATTERN_A => 'recognizeA', PATTERN_B => 'recognizeB'}}
+        generator = Aurum::LexicalTableGenerator.new specification
+        generator.construct_automata
+        generator.make_initial_partitions
+        assert !generator.partitions.include?(generator.accept_states.keys)
+    end
+    def test_should_partition_size_should_equal_to_state_size_if_min_dfa_given
+        specification = {:initial => {PATTERN_A => 'recognize'}}
+        generator = Aurum::LexicalTableGenerator.new specification
+        generator.construct_automata
+        generator.make_initial_partitions
+        generator.refine_partitions
+        assert_equal generator.table.size, generator.partitions.size
+    end
+    def test_should_partition_size_should_less_than_state_size
+        specification = {:initial => {ABABB => 'recognize'}}
+        generator = Aurum::LexicalTableGenerator.new specification
+        generator.construct_automata
+        generator.make_initial_partitions
+        generator.refine_partitions
+        assert generator.table.size > generator.partitions.size
+    end
+    def test_should_return_original_automata_if_min_dfa_given
+        specification = {:initial => {PATTERN_A => 'recognize'}}
+        generator = Aurum::LexicalTableGenerator.new specification
+        lexical_table, accepts = generator.lexical_table
+        assert generator.table.eql?(lexical_table)
+    end
+    def test_should_recognize_same_lexeme
+        specification = {:initial => {ABABB => 'recognize'},
+        :state_a => {PATTERN_B => 'recognize'},
+        :all => {PATTERN_C => 'recognize'}}
+        generator = Aurum::LexicalTableGenerator.new specification
+        @table, @accepts = generator.lexical_table
+        @lexical_states = generator.lexical_states
+        assert recognize?(:initial, 'aabaabaabb')
+        assert !recognize?(:initial, 'patterna')
+        assert recognize?(:state_a, 'pattern_b')
+        assert !recognize?(:all, 'pattern_b')
+    end
+    def recognize? lexical_state, source
+        begin
+            lexical_state = - @lexical_states.index(lexical_state) - 1
+            state = (@table[0].find {|tran| tran.symbols.include?(lexical_state)}).destination
+            source.each_byte {|char| state = (@table[state].find {|tran| tran.symbols.include? char}).destination}
+            @accepts.keys.include?(state)
+        rescue
+            false
+        end
+    end
+end

data/test/parsing_table_generator/augmented_grammar_test.rb ADDED Viewed

@@ -0,0 +1,45 @@
+$:.unshift(File.dirname(__FILE__) + '/../')
+require 'test_helper'
+require 'set'
+Aurum::ParsingTableGenerator.class_eval do
+    attr_reader :nullables, :first_sets
+end
+class AugmentedGrammarTest < Test::Unit::TestCase
+    def atest_should_find_all_used_symbols
+        generator = parser_generator E=>[production(E, T)], T=>[production(T, F)], F=>[production(F, ID)]
+        generator.start_from E
+        assert_equal [E, T, F, ID], generator.symbols
+        generator.start_from T
+        assert_equal [T, F, ID], generator.symbols
+    end
+    def atest_should_find_all_used_productions
+        generator = parser_generator E=>[production(E, T)], T=>[production(T, F)], F=>[production(F, ID)]
+        generator.start_from E
+        assert_equal [production(START, E), production(E, T), production(T, F), production(F, ID)].to_set, generator.productions.to_set
+        generator.start_from T
+        assert_equal [production(START, T), production(T, F), production(F, ID)].to_set, generator.productions.to_set
+    end
+    def test_should_compute_nullable_nonterminals
+        generator = parser_generator E=>[production(E, T)], T=>[production(T)], F=>[production(F, T, ID)]
+        generator.start_from E
+        assert_equal [T, E, START].to_set, generator.nullables.to_set
+        generator.start_from F
+        assert_equal [T].to_set, generator.nullables.to_set
+    end
+    def atest_first_set_should_contain_terminals_left_depends_on_nt_dirctly
+        generator = parser_generator E=>[production(E, T, ID), production(E, T, T, T, terminal('other'))], T=>[production(T)]
+        generator.start_from E
+        assert_equal [ID, terminal('other')].to_set, generator.first_sets[E].to_set
+    end
+    def atest_should_contain_fist_set_of_nt_which_left_depends_on_nt_dirctly
+        generator = parser_generator E=>[production(E, T, ID), production(E, T, T, T, terminal('other'))], T=>[production(T)], F=>[production(F, T, E)]
+        generator.start_from F
+        assert_equal generator.first_sets[F].to_set, generator.first_sets[E].to_set
+    end
+end

data/test/parsing_table_generator/lalr_n_computation_test.rb ADDED Viewed

@@ -0,0 +1,89 @@
+$:.unshift(File.dirname(__FILE__) + '/../')
+require 'test_helper'
+Aurum::ParsingTableGenerator.class_eval do
+    attr_reader :states
+    public :construct_LR0_automata, :compute_LALR_1_lookahead, :compute_LALR_n_lookahead, :default_action
+end
+class LALRLookaheadComputationTest < Test::Unit::TestCase
+    def test_should_compute_reduce_action_for_inconsistent_states
+        generator = parser_generator EXPRESSION_GRAMMAR_LALR1
+        generator.start_from E
+        generator.construct_LR0_automata
+        states = generator.states.find_all {|x| !x.consistent?}
+        generator.compute_LALR_1_lookahead
+        assert_equal [reduce(0)].to_set, states[0][terminal('$eof')]
+        assert_equal [reduce(2)].to_set, states[1][terminal('+')]
+        assert_equal [reduce(2)].to_set, states[1][terminal(')')]
+        assert_equal [reduce(1)].to_set, states[2][terminal('+')]
+        assert_equal [reduce(1)].to_set, states[2][terminal(')')]
+    end
+    def test_should_replace_conficted_state_actions_with_lookahead_action
+        generator = parser_generator BNF_GRAMMAR_LALR2
+        generator.start_from BNF
+        generator.construct_LR0_automata
+        generator.compute_LALR_1_lookahead
+        conflicted_state = (generator.states.find_all {|x| x.conflicted? })[0]
+        generator.compute_LALR_n_lookahead
+        assert_equal [lookahead_shift(generator.states.length - 1)].to_set, conflicted_state[terminal('s')]
+        assert !conflicted_state.conflicted?
+    end
+    def test_should_add_reduce_action_to_lookahead_state
+        generator = parser_generator BNF_GRAMMAR_LALR2
+        generator.start_from BNF
+        generator.construct_LR0_automata
+        generator.compute_LALR_1_lookahead
+        generator.compute_LALR_n_lookahead
+        states = generator.states
+        assert_equal [read_reduce(6)].to_set, states[-1][terminal('s')]
+        assert_equal [read_reduce(6)].to_set, states[-1][terminal('$eof')]
+        assert_equal [reduce(4)].to_set, states[-1][terminal('->')]
+    end
+    def test_should_return_default_reduce_if_no_action_for_given_symbol
+        generator = parser_generator BNF_GRAMMAR_LALR2
+        generator.start_from BNF
+        generator.construct_LR0_automata
+        generator.compute_LALR_1_lookahead
+        generator.compute_LALR_n_lookahead
+        default_actions = generator.states.map {|x| generator.default_action x}
+        assert_equal [reduce(2), reduce(1), nil, reduce(5), reduce(4), reduce(4)], default_actions
+    end
+    def test_should_return_lookahead_level
+        generator = parser_generator SIMPLE_GRAMMAR_LR0
+        table, level = generator.start_from(E).parsing_table
+        assert_equal 0, level
+        generator = parser_generator EXPRESSION_GRAMMAR_LALR1
+        table, level = generator.start_from(E).parsing_table
+        assert_equal 1, level
+        generator = parser_generator IF_GRAMMAR_LALR2
+        table, level = generator.start_from(STATEMENT).parsing_table
+        assert_equal 2, level
+    end
+	def test_should_raise_error_if_grammar_not_lalr_n
+		begin
+			parser_generator(NOT_LALR_GRAMMAR).start_from(E).parsing_table
+			assert false
+		rescue RuntimeError => error
+			assert_equal 'not LALR(n)', error.message
+		end
+    end
+    def test_should_resolve_conflicts_for_expression
+        op_a, op_b = terminal('+'), terminal('*')
+        generator = parser_generator EXPRESSION_GRAMMAR, [[op_b], [op_a]]
+        generator.start_from E
+        generator.construct_LR0_automata
+        generator.compute_LALR_1_lookahead
+        state = generator.states.last
+        assert_equal 1, state[terminal('+')].size
+        assert state[terminal('+')].to_a.first.kind_of?(Aurum::ReduceAction)
+        assert_equal 1, state[terminal('*')].size
+        assert state[terminal('*')].to_a.first.kind_of?(Aurum::ShiftAction)
+    end
+end

data/test/parsing_table_generator/lr_0_automata_test.rb ADDED Viewed

@@ -0,0 +1,91 @@
+$:.unshift(File.dirname(__FILE__) + '/../')
+require 'test_helper'
+Aurum::ParsingTableGenerator.class_eval do
+    attr_reader :states
+	public :closure, :goto, :read_set, :construct_LR0_automata
+end
+class LR0AutomataTest < Test::Unit::TestCase
+    def test_closure_should_contain_items_themselves
+        generator = parser_generator E=>[production(E, ID)]
+        assert_equal [LR_item(0, E, ID)], generator.closure([LR_item(0, E, ID)])
+    end
+    def test_closure_should_contain_all_right_most_lr_items_of_dot_symbol
+        generator = parser_generator E=>[production(E, T)], T=>[production(T, ID), production(T, terminal('other'))]
+        closure = generator.closure [LR_item(0, E, T)]
+        [LR_item(0, T, ID), LR_item(0, T, terminal('other'))].each {|x| assert closure.include?(x)}
+    end
+    def test_should_return_goto_items_if_expected_symbol_given
+        generator = parser_generator E=>[production(E, T)], T=>[production(T, ID), production(T, terminal('other'))]
+        assert_equal [LR_item(1, E, T)], generator.goto([LR_item(0, E, T)], T)
+    end
+    def test_goto_items_should_be_closured_if_expected_symbol_given
+        generator = parser_generator E=>[production(E, T, T)], T=>[production(T, ID), production(T, terminal('other'))]
+        goto = generator.goto [LR_item(0, E, T, T)], T
+        [LR_item(0, T, ID), LR_item(0, T, terminal('other'))].each {|x| assert goto.include?(x)}
+    end
+    def test_should_use_LR0_items_of_collection_as_state
+        generator = parser_generator SIMPLE_GRAMMAR_LR0
+        generator.start_from E
+        generator.construct_LR0_automata
+        states = generator.states
+        assert 3, states.length
+        assert [LR_item(0, START, E), LR_item(0, T, terminal('+'), T), LR_item(0, T, ID)], states[0]
+        assert [LR_item(1, T, terminal('+'), T)], states[1]
+        assert [LR_item(2, T, terminal('+'), T), LR_item(0, T, ID)], states[2]
+    end
+    def test_should_add_shift_action_to_states
+        generator = parser_generator SIMPLE_GRAMMAR_LR0
+        generator.start_from E
+        generator.construct_LR0_automata
+        states = generator.states
+        assert_equal [shift(1)].to_set, states[0][T]
+        assert_equal [shift(2)].to_set, states[1][terminal('+')]
+    end
+    def test_should_add_read_reduce_action_to_states
+        generator = parser_generator SIMPLE_GRAMMAR_LR0
+        generator.start_from E
+        generator.construct_LR0_automata
+        states = generator.states
+         assert_equal [read_reduce(0)].to_set, states[0][E]
+        assert_equal [read_reduce(2)].to_set, states[0][ID]
+        assert_equal [read_reduce(2)].to_set, states[2][ID]
+        assert_equal [read_reduce(1)].to_set, states[2][T]
+    end
+    def test_should_return_all_predsucceors
+        generator = parser_generator EXPRESSION_GRAMMAR_LALR1
+        generator.start_from E
+        generator.construct_LR0_automata
+        states = generator.states
+        assert_equal [LR_item(0, START, E),
+        LR_item(0, E, E, terminal('+'), T),
+        LR_item(0, E, T),
+        LR_item(0, T, T, terminal('*'), F),
+        LR_item(0, T, F),
+        LR_item(0, F, terminal('('), E, terminal(')')),
+        LR_item(0, F, ID)], states[2].predsucceors([T])[0]
+        assert_equal [LR_item(1, F, terminal('('), E, terminal(')')),
+        LR_item(0, E, E, terminal('+'), T),
+        LR_item(0, E, T),
+        LR_item(0, T, T, terminal('*'), F),
+        LR_item(0, T, F),
+        LR_item(0, F, terminal('('), E, terminal(')')),
+        LR_item(0, F, ID)], states[2].predsucceors([T])[1]
+    end
+    def test_should_return_read_set_for_state
+        generator = parser_generator EXPRESSION_GRAMMAR_LALR1
+        generator.start_from E
+        generator.construct_LR0_automata
+        states = generator.states
+        assert_equal [terminal('id'), terminal('(')].to_set, generator.read_set(states[2], terminal('*')).to_set
+    end
+end