dhaka 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (43) hide show
  1. data/lib/dhaka.rb +44 -0
  2. data/lib/evaluator/evaluator.rb +70 -0
  3. data/lib/grammar/closure_hash.rb +13 -0
  4. data/lib/grammar/grammar.rb +129 -0
  5. data/lib/grammar/grammar_symbol.rb +19 -0
  6. data/lib/grammar/production.rb +14 -0
  7. data/lib/parser/action.rb +51 -0
  8. data/lib/parser/channel.rb +51 -0
  9. data/lib/parser/compiled_parser.rb +35 -0
  10. data/lib/parser/item.rb +37 -0
  11. data/lib/parser/parse_result.rb +26 -0
  12. data/lib/parser/parse_tree.rb +34 -0
  13. data/lib/parser/parser.rb +125 -0
  14. data/lib/parser/parser_methods.rb +10 -0
  15. data/lib/parser/parser_run.rb +35 -0
  16. data/lib/parser/parser_state.rb +66 -0
  17. data/lib/parser/token.rb +15 -0
  18. data/lib/tokenizer/tokenizer.rb +88 -0
  19. data/test/all_tests.rb +11 -0
  20. data/test/arithmetic_evaluator.rb +70 -0
  21. data/test/arithmetic_evaluator_test.rb +55 -0
  22. data/test/arithmetic_grammar.rb +38 -0
  23. data/test/arithmetic_grammar_test.rb +11 -0
  24. data/test/arithmetic_test_methods.rb +11 -0
  25. data/test/arithmetic_tokenizer.rb +43 -0
  26. data/test/arithmetic_tokenizer_test.rb +32 -0
  27. data/test/bracket_grammar.rb +25 -0
  28. data/test/bracket_tokenizer.rb +17 -0
  29. data/test/brackets_test.rb +20 -0
  30. data/test/compiled_arithmetic_parser.rb +252 -0
  31. data/test/compiled_parser_test.rb +71 -0
  32. data/test/evaluator_test.rb +8 -0
  33. data/test/grammar_test.rb +70 -0
  34. data/test/incomplete_arithmetic_evaluator.rb +60 -0
  35. data/test/lalr_but_not_slr_grammar.rb +17 -0
  36. data/test/malformed_grammar.rb +9 -0
  37. data/test/malformed_grammar_test.rb +9 -0
  38. data/test/nullable_grammar.rb +18 -0
  39. data/test/parser_test.rb +168 -0
  40. data/test/rr_conflict_grammar.rb +23 -0
  41. data/test/simple_grammar.rb +24 -0
  42. data/test/sr_conflict_grammar.rb +16 -0
  43. metadata +87 -0
@@ -0,0 +1,71 @@
1
+ require "test/unit"
2
+ require "simple_grammar"
3
+ require 'compiled_arithmetic_parser'
4
+ require 'arithmetic_test_methods'
5
+
6
+ class TestCompiledParser < Test::Unit::TestCase
7
+ include ArithmeticTestMethods
8
+
9
+ def test_compiled_parser_generates_syntax_tree_for_simple_grammar
10
+ grammar = SimpleGrammar
11
+ parser = Dhaka::Parser.new(grammar)
12
+ eval(parser.compile_to_ruby_source_as('Foo'))
13
+ syntax_tree = Foo.parse(build_tokens(['(','n','-','(','n','-','n',')',')','-','n','#'], Foo.grammar)).syntax_tree
14
+ assert_equal \
15
+ ["literal",
16
+ "term",
17
+ "literal",
18
+ "term",
19
+ "literal",
20
+ "subtraction",
21
+ "parenthetized_expression",
22
+ "subtraction",
23
+ "parenthetized_expression",
24
+ "term",
25
+ "literal",
26
+ "subtraction",
27
+ "expression",
28
+ "start"], syntax_tree.linearize
29
+ end
30
+
31
+ def test_compiled_parser_generates_syntax_tree_for_arithmetic_grammar
32
+ parser_input = ['(','n','-','(','n','/','n','-','n',')','/','n',')','#']
33
+ assert_equal \
34
+ ["getting_literals",
35
+ "factor",
36
+ "term",
37
+ "getting_literals",
38
+ "factor",
39
+ "getting_literals",
40
+ "division",
41
+ "term",
42
+ "getting_literals",
43
+ "factor",
44
+ "subtraction",
45
+ "unpacking_parenthetized_expression",
46
+ "factor",
47
+ "getting_literals",
48
+ "division",
49
+ "subtraction",
50
+ "unpacking_parenthetized_expression",
51
+ "factor",
52
+ "term",
53
+ "expression",
54
+ "start_production"], get_syntax_tree_with_compiled_arithmetic_parser(build_tokens(parser_input, CompiledArithmeticParser.grammar)).linearize
55
+ end
56
+
57
+ def test_parser_returns_nil_syntax_tree_if_empty_token_array
58
+ assert_nil CompiledArithmeticParser.parse([])
59
+ end
60
+
61
+ def test_parser_returns_error_result_with_index_of_bad_token_if_parse_error
62
+ parse_result = CompiledArithmeticParser.parse(build_tokens(['(', '-', ')', '#'], CompiledArithmeticParser.grammar))
63
+ assert parse_result.has_error?
64
+ assert_equal 1, parse_result.bad_token_index
65
+ end
66
+
67
+ def build_tokens(token_symbol_names, grammar)
68
+ token_symbol_names.collect {|symbol_name| Dhaka::Token.new(grammar.symbol_for_name(symbol_name), nil)}
69
+ end
70
+
71
+ end
@@ -0,0 +1,8 @@
1
+ require "test/unit"
2
+ require File.dirname(__FILE__)+'/../lib/dhaka'
3
+
4
+ class TestEvaluator < Test::Unit::TestCase
5
+ def test_throws_exception_if_evaluation_rules_not_completely_defined
6
+ assert_raise(Dhaka::EvaluatorDefinitionError) {require 'incomplete_arithmetic_evaluator'}
7
+ end
8
+ end
@@ -0,0 +1,70 @@
1
+ #!/usr/bin/env ruby
2
+ require 'test/unit'
3
+ require 'simple_grammar'
4
+
5
+ class SimpleGrammarTest < Test::Unit::TestCase
6
+
7
+ def setup
8
+ @grammar = SimpleGrammar
9
+ end
10
+
11
+ def test_loads_symbol_and_classifies_them
12
+ expected_non_terminals = Set.new(['E', 'S', 'T', Dhaka::START_SYMBOL_NAME])
13
+ expected_terminals = Set.new(['-', 'n', '(', ')', '#', Dhaka::END_SYMBOL_NAME])
14
+ assert_equal(expected_non_terminals, Set.new(@grammar.non_terminal_symbols.collect {|symbol| symbol.name}))
15
+ assert_equal(expected_terminals, Set.new(@grammar.terminal_symbols.collect {|symbol| symbol.name}))
16
+ end
17
+
18
+ def test_creates_productions
19
+ productions_for_E = @grammar.productions_for_symbol(@grammar.symbol_for_name('E'))
20
+ assert(productions_for_E.size==2)
21
+ expected_productions_for_E = Set.new(['subtraction E ::= E - T', 'term E ::= T'])
22
+ assert_equal(expected_productions_for_E, Set.new(productions_for_E.collect {|production| production.to_s}))
23
+ productions_for_start = @grammar.productions_for_symbol(@grammar.start_symbol)
24
+ assert(productions_for_start.size==1)
25
+ expected_productions_for_start = Set.new(['start _Start_ ::= S #'])
26
+ assert_equal(expected_productions_for_start, Set.new(productions_for_start.collect {|production| production.to_s}))
27
+
28
+ end
29
+
30
+ def test_symbols_in_productions_use_the_flyweight_pattern
31
+ assert_same(@grammar.production_named('subtraction').symbol, @grammar.production_named('term').symbol)
32
+ assert_same(@grammar.production_named('expression').expansion[0], @grammar.production_named('subtraction').expansion[0])
33
+ end
34
+
35
+ def test_first_with_non_terminal
36
+ expected_symbols = Set.new(['(', 'n'])
37
+ assert_equal(expected_symbols, Set.new(@grammar.first(@grammar.symbol_for_name('E')).collect {|symbol| symbol.name}))
38
+ end
39
+
40
+ def test_first_with_terminal
41
+ expected_symbols = Set.new(['('])
42
+ assert_equal(expected_symbols, Set.new(@grammar.first(@grammar.symbol_for_name('(')).collect {|symbol| symbol.name}))
43
+ end
44
+
45
+ def test_computes_closures_and_channels_given_a_kernel
46
+ start_production = @grammar.production_named('start')
47
+ start_item = Dhaka::Item.new(start_production, 0)
48
+ kernel = Set.new([start_item])
49
+ channels, closure = @grammar.closure(kernel)
50
+ expected_items = Set.new(['_Start_ ::= -> S # []',
51
+ 'S ::= -> E []',
52
+ 'E ::= -> E - T []',
53
+ 'E ::= -> T []',
54
+ 'T ::= -> n []',
55
+ 'T ::= -> ( E ) []'])
56
+ expected_channels = Set.new([
57
+ 'Spontaneous Channel from E ::= -> E - T [] to E ::= -> E - T []',
58
+ 'Spontaneous Channel from S ::= -> E [] to E ::= -> T []',
59
+ 'Spontaneous Channel from E ::= -> T [] to T ::= -> n []',
60
+ 'Spontaneous Channel from S ::= -> E [] to E ::= -> E - T []',
61
+ 'Spontaneous Channel from E ::= -> T [] to T ::= -> ( E ) []',
62
+ 'Spontaneous Channel from E ::= -> E - T [] to E ::= -> T []',
63
+ 'Spontaneous Channel from _Start_ ::= -> S # [] to S ::= -> E []'
64
+ ])
65
+ assert_equal(expected_items, Set.new(closure.values.collect{|item| item.to_s}))
66
+ assert_equal(expected_channels, Set.new(channels.collect{|item| item.to_s}))
67
+ end
68
+
69
+
70
+ end
@@ -0,0 +1,60 @@
1
+ require File.dirname(__FILE__)+'/../lib/dhaka'
2
+ require 'arithmetic_grammar'
3
+
4
+ class ArithmeticEvaluator < Dhaka::Evaluator
5
+
6
+ self.grammar = ArithmeticGrammar
7
+
8
+ define_evaluation_rules do
9
+
10
+ for_subtraction do
11
+ child_nodes[0] - child_nodes[2]
12
+ end
13
+
14
+ for_addition do
15
+ child_nodes[0] + child_nodes[2]
16
+ end
17
+
18
+ for_division do
19
+ child_nodes[0].to_f/child_nodes[2]
20
+ end
21
+
22
+ for_multiplication do
23
+ child_nodes[0] * child_nodes[2]
24
+ end
25
+
26
+ for_getting_literals do
27
+ child_nodes[0].token.value
28
+ end
29
+
30
+ for_start_production do
31
+ child_nodes[0]
32
+ end
33
+
34
+ for_empty_args do
35
+ []
36
+ end
37
+
38
+ for_evaluating_function do
39
+ child_nodes[0].call child_nodes[2]
40
+ end
41
+
42
+ for_concatenating_args do
43
+ [child_nodes[0]]+child_nodes[2]
44
+ end
45
+
46
+ for_single_args do
47
+ [child_nodes[0]]
48
+ end
49
+
50
+ for_min_function do
51
+ Proc.new {|args| args.inject {|min, elem| min = (elem < min ? elem : min)}}
52
+ end
53
+
54
+ for_max_function do
55
+ Proc.new {|args| args.inject {|max, elem| max = (elem > max ? elem : max)}}
56
+ end
57
+
58
+ end
59
+
60
+ end
@@ -0,0 +1,17 @@
1
+ require File.dirname(__FILE__)+'/../lib/dhaka'
2
+
3
+ class LALRButNotSLRGrammar < Dhaka::Grammar
4
+ for_symbol(Dhaka::START_SYMBOL_NAME) do
5
+ start ['E']
6
+ end
7
+ for_symbol('E') do
8
+ E_Aa ['A', 'a']
9
+ E_bAc ['b', 'A', 'c']
10
+ E_dc ['d', 'c']
11
+ E_bda ['b', 'd', 'a']
12
+ end
13
+ for_symbol('A') do
14
+ A_d ['d']
15
+ end
16
+ end
17
+
@@ -0,0 +1,9 @@
1
+ require File.dirname(__FILE__)+'/../lib/dhaka'
2
+
3
+ class MalformedGrammar < Dhaka::Grammar
4
+
5
+ for_symbol('goo') do
6
+ foo ['boo']
7
+ end
8
+
9
+ end
@@ -0,0 +1,9 @@
1
+ require "test/unit"
2
+
3
+ require "malformed_grammar"
4
+
5
+ class TestMalformedGrammar < Test::Unit::TestCase
6
+ def test_must_have_a_start_symbol_in_order_to_generate_a_parser
7
+ assert_raises(Dhaka::NoStartProductionsError) {Dhaka::Parser.new(MalformedGrammar)}
8
+ end
9
+ end
@@ -0,0 +1,18 @@
1
+ require File.dirname(__FILE__)+'/../lib/dhaka'
2
+
3
+ class NullableGrammar < Dhaka::Grammar
4
+ for_symbol(Dhaka::START_SYMBOL_NAME) do
5
+ tuple ['Tuple', '#']
6
+ end
7
+ for_symbol('Tuple') do
8
+ element_list ['(', 'Elements', ')']
9
+ end
10
+ for_symbol('Elements') do
11
+ empty_element_list []
12
+ concatenate_element_lists ['Character', 'Elements']
13
+ end
14
+ for_symbol('Character') do
15
+ literal_a ['a']
16
+ literal_b ['b']
17
+ end
18
+ end
@@ -0,0 +1,168 @@
1
+ #!/usr/bin/env ruby
2
+ require 'test/unit'
3
+ require 'simple_grammar'
4
+ require 'arithmetic_grammar'
5
+ require 'nullable_grammar'
6
+ require 'lalr_but_not_slr_grammar'
7
+ require 'rr_conflict_grammar'
8
+ require 'sr_conflict_grammar'
9
+
10
+ class ParserTest < Test::Unit::TestCase
11
+
12
+ def setup
13
+ @grammar = SimpleGrammar
14
+ @parser = Dhaka::Parser.new(@grammar)
15
+ end
16
+
17
+ def test_parser_generates_states_with_correct_items
18
+ expected_states = {}
19
+ expected_states[1] = Set.new(['_Start_ ::= -> S # [_End_]',
20
+ 'S ::= -> E [#]',
21
+ 'E ::= -> E - T [#-]',
22
+ 'E ::= -> T [#-]',
23
+ 'T ::= -> n [#-]',
24
+ 'T ::= -> ( E ) [#-]'])
25
+ expected_states[2] = Set.new(['E ::= T -> [#)-]'])
26
+ expected_states[3] = Set.new(['T ::= n -> [#)-]'])
27
+ expected_states[4] = Set.new(['S ::= E -> [#]',
28
+ 'E ::= E -> - T [#-]'])
29
+ expected_states[5] = Set.new(['_Start_ ::= S -> # [_End_]'])
30
+ expected_states[6] = Set.new(['T ::= ( -> E ) [#)-]',
31
+ 'E ::= -> E - T [)-]',
32
+ 'E ::= -> T [)-]',
33
+ 'T ::= -> n [)-]',
34
+ 'T ::= -> ( E ) [)-]'])
35
+ expected_states[7] = Set.new(['E ::= E - -> T [#)-]',
36
+ 'T ::= -> n [#)-]',
37
+ 'T ::= -> ( E ) [#)-]'])
38
+ expected_states[8] = Set.new(['E ::= E - T -> [#)-]'])
39
+ expected_states[9] = Set.new(['T ::= ( E -> ) [#)-]',
40
+ 'E ::= E -> - T [)-]'])
41
+ expected_states[10] = Set.new(['T ::= ( E ) -> [#)-]'])
42
+ expected_states[11] = Set.new(['_Start_ ::= S # -> [_End_]'])
43
+ actual_states = Set.new(@parser.states.collect {|state| Set.new(state.items.values.collect {|item| item.to_s})})
44
+ #write_parser(@parser)
45
+ expected_states.values.each do |state|
46
+ assert set_finder(state, actual_states), "expected #{state.to_a}"
47
+ end
48
+ end
49
+ def build_tokens(token_symbol_names, grammar)
50
+ token_symbol_names.collect {|symbol_name| Dhaka::Token.new(grammar.symbol_for_name(symbol_name), nil)}
51
+ end
52
+
53
+ def test_parser_generates_syntax_tree_given_a_stream_of_symbols
54
+ syntax_tree = @parser.parse(build_tokens(['(','n','-','(','n','-','n',')',')','-','n','#'], @grammar)).syntax_tree
55
+ assert_equal \
56
+ ["literal",
57
+ "term",
58
+ "literal",
59
+ "term",
60
+ "literal",
61
+ "subtraction",
62
+ "parenthetized_expression",
63
+ "subtraction",
64
+ "parenthetized_expression",
65
+ "term",
66
+ "literal",
67
+ "subtraction",
68
+ "expression",
69
+ "start"], syntax_tree.linearize
70
+ end
71
+
72
+ def get_linearized_parse_result(input, parser)
73
+ parser.parse(build_tokens(input, parser.grammar)).syntax_tree.linearize
74
+ end
75
+
76
+ def test_with_a_different_grammar_with_division
77
+ grammar = ArithmeticGrammar
78
+ parser = Dhaka::Parser.new(grammar)
79
+ #write_parser(parser)
80
+ parser_input = ['(','n','-','(','n','/','n','-','n',')','/','n',')','#']
81
+ assert_equal \
82
+ ["getting_literals",
83
+ "factor",
84
+ "term",
85
+ "getting_literals",
86
+ "factor",
87
+ "getting_literals",
88
+ "division",
89
+ "term",
90
+ "getting_literals",
91
+ "factor",
92
+ "subtraction",
93
+ "unpacking_parenthetized_expression",
94
+ "factor",
95
+ "getting_literals",
96
+ "division",
97
+ "subtraction",
98
+ "unpacking_parenthetized_expression",
99
+ "factor",
100
+ "term",
101
+ "expression",
102
+ "start_production"], get_linearized_parse_result(parser_input, parser)
103
+
104
+ parser_input = ['h','(','(','n',')','-','n',',','n',')','#']
105
+ assert_equal \
106
+ ["max_function",
107
+ "getting_literals",
108
+ "factor",
109
+ "term",
110
+ "unpacking_parenthetized_expression",
111
+ "factor",
112
+ "term",
113
+ "getting_literals",
114
+ "factor",
115
+ "subtraction",
116
+ "getting_literals",
117
+ "factor",
118
+ "term",
119
+ "single_args",
120
+ "concatenating_args",
121
+ "evaluating_function",
122
+ "function",
123
+ "factor",
124
+ "term",
125
+ "expression",
126
+ "start_production"], get_linearized_parse_result(parser_input, parser)
127
+ end
128
+
129
+ def test_with_a_grammar_with_nullables_after_terminals
130
+ grammar = NullableGrammar
131
+ parser = Dhaka::Parser.new(grammar)
132
+ parser_input = ['(','a',')','#']
133
+ assert_equal \
134
+ ["literal_a",
135
+ "empty_element_list",
136
+ "concatenate_element_lists",
137
+ "element_list",
138
+ "tuple"], get_linearized_parse_result(parser_input, parser)
139
+ end
140
+
141
+ def test_with_a_grammar_that_is_not_SLR
142
+ grammar = LALRButNotSLRGrammar
143
+ parser = Dhaka::Parser.new(grammar)
144
+ parser_input = ['b','d','c']
145
+ assert_equal(["A_d", "E_bAc", "start"], get_linearized_parse_result(parser_input, parser))
146
+ end
147
+
148
+ def test_with_a_grammar_that_should_generate_an_RR_conflict
149
+ grammar = RRConflictGrammar
150
+ assert_raise(Dhaka::ParserConflictError) { Dhaka::Parser.new(grammar) }
151
+ end
152
+
153
+ def test_with_a_grammar_that_should_generate_an_SR_conflict
154
+ grammar = SRConflictGrammar
155
+ assert_raise(Dhaka::ParserConflictError) { Dhaka::Parser.new(grammar) }
156
+ end
157
+
158
+ def set_finder(set1, set2)
159
+ set2.inject(false) {|result, member| result ||= member == set1}
160
+ end
161
+
162
+ def write_parser(parser)
163
+ File.open('parser.dot', 'w') do |file|
164
+ file << parser.to_dot
165
+ end
166
+ end
167
+
168
+ end
@@ -0,0 +1,23 @@
1
+ require File.dirname(__FILE__)+'/../lib/dhaka'
2
+
3
+ class RRConflictGrammar < Dhaka::Grammar
4
+
5
+ for_symbol(Dhaka::START_SYMBOL_NAME) do
6
+ start ['S']
7
+ end
8
+
9
+ for_symbol('S') do
10
+ a_expansion ['A', 'c', 'd']
11
+ b_expansion ['B', 'c', 'e']
12
+ end
13
+
14
+ for_symbol('A') do
15
+ xy ['x', 'y']
16
+ end
17
+
18
+ for_symbol('B') do
19
+ xy_again ['x', 'y']
20
+ end
21
+
22
+ end
23
+