aurum 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (28) hide show
  1. data/example/expression/expression.rb +29 -0
  2. data/lib/aurum.rb +10 -0
  3. data/lib/aurum/engine.rb +173 -0
  4. data/lib/aurum/grammar.rb +234 -0
  5. data/lib/aurum/lexical_table_generator.rb +423 -0
  6. data/lib/aurum/parsing_table_generator.rb +445 -0
  7. data/test/engine/lexer_test.rb +52 -0
  8. data/test/engine/semantic_attributes_test.rb +15 -0
  9. data/test/grammar_definition/character_class_definition_test.rb +28 -0
  10. data/test/grammar_definition/grammar_definition_test.rb +54 -0
  11. data/test/grammar_definition/lexical_definition_test.rb +56 -0
  12. data/test/grammar_definition/operator_precedence_definition_test.rb +35 -0
  13. data/test/grammar_definition/production_definition_test.rb +60 -0
  14. data/test/lexical_table_generator/automata_test.rb +74 -0
  15. data/test/lexical_table_generator/character_set_test.rb +73 -0
  16. data/test/lexical_table_generator/interval_test.rb +36 -0
  17. data/test/lexical_table_generator/pattern_test.rb +109 -0
  18. data/test/lexical_table_generator/subset_determinizer_test.rb +19 -0
  19. data/test/lexical_table_generator/table_generator_test.rb +126 -0
  20. data/test/parsing_table_generator/augmented_grammar_test.rb +45 -0
  21. data/test/parsing_table_generator/lalr_n_computation_test.rb +89 -0
  22. data/test/parsing_table_generator/lr_0_automata_test.rb +91 -0
  23. data/test/parsing_table_generator/lr_item_test.rb +33 -0
  24. data/test/parsing_table_generator/parsing_table_state_test.rb +39 -0
  25. data/test/parsing_table_generator/precedence_table_test.rb +28 -0
  26. data/test/parsing_table_generator/production_test.rb +9 -0
  27. data/test/test_helper.rb +103 -0
  28. metadata +78 -0
@@ -0,0 +1,52 @@
1
+ $:.unshift(File.dirname(__FILE__) + '/../')
2
+ require 'test_helper'
3
+
4
+ class LexerTest < Test::Unit::TestCase
5
+ def test_should_execute_recognize_token_action
6
+ specification = {:initial => {PATTERN_A => Aurum::RecognizeTokenAction.new('tokenA')}}
7
+ lexer = create_lexer specification, 'pattern_a'
8
+ symbol = lexer.next_symbol
9
+ assert_equal terminal('tokenA'), symbol
10
+ assert_equal 'pattern_a', symbol.value
11
+ end
12
+
13
+ def test_should_execute_shift_to_lexical_state_action
14
+ specification = {:initial => {PATTERN_A => Aurum::ChangeStateAction.new(:stateA)},
15
+ :stateA => {PATTERN_B => Aurum::RecognizeTokenAction.new('tokenB')}}
16
+ lexer = create_lexer specification, 'pattern_apattern_b'
17
+ symbol = lexer.next_symbol
18
+ assert_equal terminal('tokenB'), symbol
19
+ assert_equal 'pattern_apattern_b', symbol.value
20
+ end
21
+
22
+ def test_should_execute_shift_to_and_recognize_token_action
23
+ specification = {:initial => {PATTERN_A => Aurum::ChangeStateAction.new(:stateA),
24
+ PATTERN_B => Aurum::RecognizeTokenAction.new('tokenC')},
25
+ :stateA => {PATTERN_B => Aurum::RecognizeTokenAndChangeStateAction.new('tokenB', :initial)}}
26
+ lexer = create_lexer specification, 'pattern_apattern_bpattern_b'
27
+ assert_equal terminal('tokenB'), lexer.next_symbol
28
+ assert_equal terminal('tokenC'), lexer.next_symbol
29
+ end
30
+
31
+ def test_should_not_execute_ignore_action
32
+ specification = {:initial => {PATTERN_A => Aurum::RecognizeTokenAction.new('tokenA'),
33
+ PATTERN_B => Aurum::IgnoreAction}}
34
+ lexer = create_lexer specification, 'pattern_bpattern_a'
35
+ symbol = lexer.next_symbol
36
+ assert_equal terminal('tokenA'), symbol
37
+ assert_equal 'pattern_a', symbol.value
38
+ end
39
+
40
+ def test_should_return_eof_for_empty_string
41
+ specification = {:initial => {PATTERN_A => Aurum::RecognizeTokenAction.new('tokenA'),
42
+ PATTERN_B => Aurum::IgnoreAction}}
43
+ lexer = create_lexer specification, ''
44
+ assert_equal terminal('$eof'), lexer.next_symbol
45
+ end
46
+
47
+ def create_lexer specification, input
48
+ generator = Aurum::LexicalTableGenerator.new specification
49
+ table, accepts = generator.lexical_table
50
+ Aurum::Lexer.new table, accepts, generator.lexical_states, input
51
+ end
52
+ end
@@ -0,0 +1,15 @@
1
+ $:.unshift(File.dirname(__FILE__) + '/../')
2
+ require 'test_helper'
3
+
4
+ class SemanticAttributesTest < Test::Unit::TestCase
5
+
6
+ def test_s_attribute_should_be_a_object_has_arbitrary_attributes
7
+ attr = Aurum::Parser::SemanticAttributes.new
8
+ 10.times do
9
+ name, value = '', rand(100)
10
+ 5.times { name << (rand(26) + 97) }
11
+ eval "attr.#{name} = #{value}"
12
+ assert_equal value, eval("attr.#{name}")
13
+ end
14
+ end
15
+ end
@@ -0,0 +1,28 @@
1
+ $:.unshift(File.dirname(__FILE__) + '/../')
2
+ require 'test_helper'
3
+
4
+ class CharacterClassDefinitionTest < Test::Unit::TestCase
5
+ def setup
6
+ @character_class = Aurum::CharacterClassDefinition.new
7
+ end
8
+
9
+ def test_should_add_char_class_to_definition
10
+ @character_class.instance_eval do
11
+ alpha range(?a, ?z) + string('ABCDEF')
12
+ end
13
+ assert_equal 1, @character_class.definitions.size
14
+ alpha = @character_class.definitions[:alpha]
15
+ (?a..?z).each {|x| assert alpha.include?(x)}
16
+ (?A..?F).each {|x| assert alpha.include?(x)}
17
+ end
18
+
19
+ def test_should_not_redefine_char_class
20
+ @character_class.instance_eval do
21
+ alpha range(?a, ?z) + range(?A, ?Z)
22
+ alpha range(?0, ?9)
23
+ end
24
+ assert_equal 1, @character_class.definitions.size
25
+ alpha = @character_class.definitions[:alpha]
26
+ (?0..?9).each {|x| assert !alpha.include?(x)}
27
+ end
28
+ end
@@ -0,0 +1,54 @@
1
+ $:.unshift(File.dirname(__FILE__) + '/../')
2
+ require 'test_helper'
3
+
4
+ class ExpressionGrammar < Aurum::Grammar
5
+ character_classes do
6
+ number range(?0, ?9)
7
+ end
8
+
9
+ tokens do
10
+ ignore string(' ').one_or_more
11
+ _number the(:number).one_or_more
12
+ end
13
+
14
+ precedences do
15
+ operator '*', '/'
16
+ operator '+', '-'
17
+ end
18
+
19
+ productions do
20
+ expression expression, '+', expression {expression.value = expression1.value + expression2.value}
21
+ expression expression, '-', expression {expression.value = expression1.value - expression2.value}
22
+ expression expression, '*', expression {expression.value = expression1.value * expression2.value}
23
+ expression expression, '/', expression {expression.value = expression1.value / expression2.value}
24
+ expression '(', expression, ')'
25
+ expression _number {expression.value = _number.value.to_i}
26
+ end
27
+ end
28
+
29
+ class GrammarDefinitionTest < Test::Unit::TestCase
30
+ def test_should_add_literal_to_lexer
31
+ @lexer = ExpressionGrammar.lexer '21 + 35'
32
+ assert_recognize '21', terminal('_number')
33
+ assert_recognize '+', terminal('$literal_+')
34
+ assert_recognize '35', terminal('_number')
35
+ end
36
+
37
+ def test_should_create_parser
38
+ @lexer = ExpressionGrammar.lexer '21 + 35'
39
+ @parser = ExpressionGrammar.start_from 'expression'
40
+ assert_equal 56, @parser.parse(@lexer).value
41
+ @lexer = ExpressionGrammar.lexer '2 + 3 * 5'
42
+ assert_equal 17, @parser.parse(@lexer).value
43
+ @lexer = ExpressionGrammar.lexer '(2 + 3) * 5'
44
+ assert_equal 25, @parser.parse(@lexer).value
45
+ @lexer = ExpressionGrammar.lexer '2 + (3 + 5) * 7'
46
+ assert_equal 58, @parser.parse(@lexer).value
47
+ end
48
+
49
+ def assert_recognize lexeme, token
50
+ symbol = @lexer.next_symbol
51
+ assert_equal token, symbol
52
+ assert_equal lexeme, symbol.value
53
+ end
54
+ end
@@ -0,0 +1,56 @@
1
+ $:.unshift(File.dirname(__FILE__) + '/../')
2
+ require 'test_helper'
3
+
4
+ class LexicalDefinitionTest < Test::Unit::TestCase
5
+ def setup
6
+ @specification = Aurum::LexicalSpecification.new
7
+ end
8
+
9
+ def test_should_add_token_recognized_action_to_pattern
10
+ pattern = @specification._id @specification.range(?a, ?z)
11
+ action = @specification.definitions[:initial][pattern]
12
+ assert_equal Aurum::RecognizeTokenAction.new('_id'), action
13
+ end
14
+
15
+ def test_should_add_change_state_action_to_pattern
16
+ pattern = @specification.shift_to :string, '"'
17
+ action = @specification.definitions[:initial][pattern]
18
+ assert_equal Aurum::ChangeStateAction.new(:string), action
19
+ end
20
+
21
+ def test_should_add_user_define_action_to_pattern
22
+ pattern = @specification.match '"' do
23
+ user_defined
24
+ end
25
+ action = @specification.definitions[:initial][pattern]
26
+ assert action.kind_of?(Aurum::UserDefinedAction)
27
+ assert action.action
28
+ end
29
+
30
+ def test_should_add_ignore_action_to_pattern
31
+ pattern = @specification.ignore ' '
32
+ action = @specification.definitions[:initial][pattern]
33
+ assert_equal Aurum::IgnoreAction, action
34
+ end
35
+
36
+ def test_should_add_recognize_and_change_state_action_to_pattern
37
+ pattern = @specification.recognize_and_shift_to :_token, :string, 'token'
38
+ action = @specification.definitions[:initial][pattern]
39
+ assert_equal Aurum::RecognizeTokenAndChangeStateAction.new('_token',:string), action
40
+ end
41
+
42
+ def test_should_add_patterns_to_lexical_state
43
+ @specification.shift_to :state, 'state_begin' do
44
+ _state_content range(?a, ?z)
45
+ end
46
+ assert_equal 1, @specification.definitions[:state].size
47
+ end
48
+
49
+ def test_should_add_patterns_to_all_states
50
+ @specification.within :state1, :state2 do
51
+ _state_content range(?a, ?z)
52
+ end
53
+ assert_equal 1, @specification.definitions[:state1].size
54
+ assert_equal 1, @specification.definitions[:state2].size
55
+ end
56
+ end
@@ -0,0 +1,35 @@
1
+ $:.unshift(File.dirname(__FILE__) + '/../')
2
+ require 'test_helper'
3
+
4
+ class OperatorPrecedenceDefinitionTest < Test::Unit::TestCase
5
+ def setup
6
+ @precedence = Aurum::OperatorPrecedenceDefinition.new
7
+ end
8
+
9
+ def test_should_define_opeators_precedence_according_to_the_order_they_defined
10
+ @precedence.instance_eval do
11
+ operator '*', '/'
12
+ operator '+', '-'
13
+ end
14
+ precedences = @precedence.precedences
15
+ assert_equal 2, precedences.size
16
+ assert_equal [terminal('$literal_*'), terminal('$literal_/')], precedences[0]
17
+ assert_equal [terminal('$literal_+'), terminal('$literal_-')], precedences[1]
18
+ end
19
+
20
+ def test_should_define_left_associativity_of_operators
21
+ @precedence.instance_eval do
22
+ left _plus
23
+ left _minus
24
+ end
25
+ assert_equal [terminal('_plus'), terminal('_minus')], @precedence.associativities[:left]
26
+ end
27
+
28
+ def test_should_define_right_associativity_of_operators
29
+ @precedence.instance_eval do
30
+ right _plus
31
+ right _plus
32
+ end
33
+ assert_equal [terminal('_plus')], @precedence.associativities[:right]
34
+ end
35
+ end
@@ -0,0 +1,60 @@
1
+ $:.unshift(File.dirname(__FILE__) + '/../')
2
+ require 'test_helper'
3
+
4
+ class ProductionDefinitionTest < Test::Unit::TestCase
5
+ def setup
6
+ @grammar = Aurum::ProductionDefinition.new
7
+ end
8
+
9
+ def test_should_add_prodcution_to_definition
10
+ @grammar.instance_eval do
11
+ bnf bnf, rlist
12
+ end
13
+ assert_equal 1, @grammar.__definitions.length
14
+ assert_equal [production(BNF, BNF, RLIST)].to_set, @grammar.__definitions[BNF]
15
+ end
16
+
17
+ def test_should_not_add_duplication_production_to_nonterminal
18
+ @grammar.instance_eval do
19
+ bnf rlist, bnf
20
+ bnf rlist, bnf
21
+ end
22
+ assert_equal 1, @grammar.__definitions.length
23
+ assert_equal [production(BNF, RLIST, BNF)].to_set, @grammar.__definitions[BNF]
24
+ end
25
+
26
+ def test_should_add_empty_production_to_definition
27
+ @grammar.instance_eval do
28
+ bnf _
29
+ end
30
+ assert_equal 1, @grammar.__definitions.length
31
+ assert_equal [production(BNF)].to_set, @grammar.__definitions[BNF]
32
+ end
33
+
34
+ def test_should_treat_string_literal_as_terminal
35
+ @grammar.instance_eval do
36
+ t t, '+', t
37
+ end
38
+ t = nonterminal('t')
39
+ assert_equal 1, @grammar.__definitions.length
40
+ assert_equal [production(t, t, terminal('$literal_+'), t)].to_set, @grammar.__definitions[t]
41
+ end
42
+
43
+ def test_should_treat_symbol_start_with_underscore_as_terminal
44
+ @grammar.instance_eval do
45
+ t _id, '+', _id
46
+ end
47
+ t, id = nonterminal('t'), terminal('_id')
48
+ assert_equal 1, @grammar.__definitions.length
49
+ assert_equal [production(t, id, terminal('$literal_+'), id)].to_set, @grammar.__definitions[t]
50
+ end
51
+
52
+ def test_should_use_action_of_last_symbol_as_production_action
53
+ @grammar.instance_eval do
54
+ t _id, '+', _id { }
55
+ f t { }, t
56
+ end
57
+ assert @grammar.__definitions[nonterminal('t')].to_a.first.action
58
+ assert !@grammar.__definitions[nonterminal('f')].to_a.first.action
59
+ end
60
+ end
@@ -0,0 +1,74 @@
1
+ $:.unshift(File.dirname(__FILE__) + '/../')
2
+ require 'test_helper'
3
+
4
+ Aurum::Automata.class_eval do
5
+ def move start, char
6
+ state = @table[start].find {|state| state.symbols.include? char}
7
+ state ? state.destination : nil
8
+ end
9
+ end
10
+
11
+ class AutomataTest < Test::Unit::TestCase
12
+ def test_should_connect_states
13
+ automata = Aurum::Automata.new 2
14
+ automata.connect 0, 'a'.to_char_set, 1
15
+ assert_equal 1, automata.move(0, ?a)
16
+ end
17
+
18
+ def test_should_contain_all_transitions_in_merged_automata
19
+ automata = Aurum::Automata.new 2
20
+ automata.connect 0, 'b'.to_char_set, 1
21
+ other_automata = Aurum::Automata.new
22
+ other_automata.merge! automata
23
+ assert_equal 1, other_automata.move(0, ?b)
24
+ end
25
+
26
+ def test_should_return_an_automata_with_reverse_transitions
27
+ automata = Aurum::Automata.new 3
28
+ automata.connect 0, 'a'.to_char_set, 1
29
+ automata.connect 0, 'b'.to_char_set, 2
30
+ reverse_automata = automata.reverse
31
+ assert_equal 0, reverse_automata.move(1, ?a)
32
+ assert_equal 0, reverse_automata.move(2, ?b)
33
+ end
34
+
35
+ def test_should_return_an_automata_with_same_transitions
36
+ automata = Aurum::Automata.new 2
37
+ automata.connect 0, 'b'.to_char_set, 1
38
+ assert_equal 1, automata.dup.move(0, ?b)
39
+ end
40
+
41
+ def test_alphabet_should_get_symbol_ac_and_mq
42
+ automata = Aurum::Automata.new 4
43
+ automata.connect 0, interval(?a, ?c), 1
44
+ automata.connect 2, interval(?m, ?q), 3
45
+ assert_alphabet [[interval(?a, ?c), [1]], [interval(?m, ?q), [3]]], automata, [0, 2]
46
+ end
47
+
48
+ def test_alphabet_should_get_symbol_ac_df_and_g
49
+ automata = Aurum::Automata.new 4
50
+ automata.connect 0, interval(?a, ?f), 1
51
+ automata.connect 2, interval(?d, ?g), 3
52
+ assert_alphabet [[interval(?a, ?c), [1]], [interval(?d, ?f), [1, 3]], [interval(?g), [3]]], automata, [0, 2]
53
+ end
54
+
55
+ def test_alphabet_should_get_symbol_ac_dg_and_hz
56
+ automata = Aurum::Automata.new 4
57
+ automata.connect 0, interval(?a, ?z), 1
58
+ automata.connect 2, interval(?d, ?g), 3
59
+ assert_alphabet [[interval(?a, ?c), [1]], [interval(?d, ?g), [1, 3]], [interval(?h, ?z), [1]]], automata, [0, 2]
60
+ end
61
+
62
+ def assert_alphabet expected, automata, states
63
+ index = 0
64
+ automata.alphabet states do |states, symbols|
65
+ assert_equal expected[index][0].intervals, symbols.intervals
66
+ assert_equal expected[index][1], states
67
+ index += 1
68
+ end
69
+ end
70
+
71
+ def interval first, last = first
72
+ Aurum::CharacterSet::Interval.new(first, last).to_char_set
73
+ end
74
+ end
@@ -0,0 +1,73 @@
1
+ $:.unshift(File.dirname(__FILE__) + '/../')
2
+ require 'test_helper'
3
+
4
+ class CharacterSetTest < Test::Unit::TestCase
5
+ def test_should_add_string_literal_to_character_set
6
+ char_set = Aurum::CharacterSet.new
7
+ char_set << 'age'
8
+ assert char_set.intervals.include?(Aurum::CharacterSet::Interval.new(?a))
9
+ assert char_set.intervals.include?(Aurum::CharacterSet::Interval.new(?g))
10
+ assert char_set.intervals.include?(Aurum::CharacterSet::Interval.new(?e))
11
+ end
12
+
13
+ def test_should_add_range_to_character_set
14
+ char_set = Aurum::CharacterSet.new
15
+ char_set << (?a..?z)
16
+ assert char_set.intervals.include?(Aurum::CharacterSet::Interval.new(?a, ?z))
17
+ end
18
+
19
+ def test_should_merge_intervals_in_character_set
20
+ char_set = Aurum::CharacterSet.new
21
+ char_set << (?a..?d)
22
+ char_set << (?b..?f)
23
+ assert char_set.intervals.include?(Aurum::CharacterSet::Interval.new(?a, ?f))
24
+ end
25
+
26
+ def test_should_delete_interval_in_character_set
27
+ char_set = Aurum::CharacterSet.new
28
+ char_set << (?a..?d)
29
+ char_set.delete 'bc'
30
+ assert char_set.intervals.include?(Aurum::CharacterSet::Interval.new(?a))
31
+ assert char_set.intervals.include?(Aurum::CharacterSet::Interval.new(?d))
32
+ end
33
+
34
+ def test_should_return_all_points_in_character_set
35
+ char_set = Aurum::CharacterSet.new
36
+ char_set << (?a..?b)
37
+ char_set << (?d..?e)
38
+ points = char_set.to_points 1
39
+ assert_equal [point(?a, true, 1), point(?b, false, 1), point(?d, true, 1), point(?e, false, 1)], points
40
+ end
41
+
42
+ def test_should_return_sum_of_2_character_sets
43
+ char_set_a = Aurum::CharacterSet.new
44
+ char_set_a << (?a..?g)
45
+ char_set_b = Aurum::CharacterSet.new
46
+ char_set_b << (?h..?z)
47
+ assert (char_set_a + char_set_b).intervals.include?(Aurum::CharacterSet::Interval.new(?a, ?z))
48
+ end
49
+
50
+ def test_should_return_sum_of_character_set_and_string
51
+ char_set = Aurum::CharacterSet.new
52
+ char_set << (?h..?z)
53
+ assert (char_set + 'gfedcba').intervals.include?(Aurum::CharacterSet::Interval.new(?a, ?z))
54
+ end
55
+
56
+ def test_should_return_sub_of_2_character_sets
57
+ char_set_a = Aurum::CharacterSet.new
58
+ char_set_a << (?a..?z)
59
+ char_set_b = Aurum::CharacterSet.new
60
+ char_set_b << (?a..?d)
61
+ assert (char_set_a - char_set_b).intervals.include?(Aurum::CharacterSet::Interval.new(?e, ?z))
62
+ end
63
+
64
+ def test_should_return_sub_of_character_set_and_string
65
+ char_set = Aurum::CharacterSet.new
66
+ char_set << (?a..?z)
67
+ assert (char_set - 'abcd').intervals.include?(Aurum::CharacterSet::Interval.new(?e, ?z))
68
+ end
69
+
70
+ def point char, start, destination
71
+ Aurum::CharacterSet::Point.new char, start, destination
72
+ end
73
+ end