dhaka 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (43) hide show
  1. data/lib/dhaka.rb +44 -0
  2. data/lib/evaluator/evaluator.rb +70 -0
  3. data/lib/grammar/closure_hash.rb +13 -0
  4. data/lib/grammar/grammar.rb +129 -0
  5. data/lib/grammar/grammar_symbol.rb +19 -0
  6. data/lib/grammar/production.rb +14 -0
  7. data/lib/parser/action.rb +51 -0
  8. data/lib/parser/channel.rb +51 -0
  9. data/lib/parser/compiled_parser.rb +35 -0
  10. data/lib/parser/item.rb +37 -0
  11. data/lib/parser/parse_result.rb +26 -0
  12. data/lib/parser/parse_tree.rb +34 -0
  13. data/lib/parser/parser.rb +125 -0
  14. data/lib/parser/parser_methods.rb +10 -0
  15. data/lib/parser/parser_run.rb +35 -0
  16. data/lib/parser/parser_state.rb +66 -0
  17. data/lib/parser/token.rb +15 -0
  18. data/lib/tokenizer/tokenizer.rb +88 -0
  19. data/test/all_tests.rb +11 -0
  20. data/test/arithmetic_evaluator.rb +70 -0
  21. data/test/arithmetic_evaluator_test.rb +55 -0
  22. data/test/arithmetic_grammar.rb +38 -0
  23. data/test/arithmetic_grammar_test.rb +11 -0
  24. data/test/arithmetic_test_methods.rb +11 -0
  25. data/test/arithmetic_tokenizer.rb +43 -0
  26. data/test/arithmetic_tokenizer_test.rb +32 -0
  27. data/test/bracket_grammar.rb +25 -0
  28. data/test/bracket_tokenizer.rb +17 -0
  29. data/test/brackets_test.rb +20 -0
  30. data/test/compiled_arithmetic_parser.rb +252 -0
  31. data/test/compiled_parser_test.rb +71 -0
  32. data/test/evaluator_test.rb +8 -0
  33. data/test/grammar_test.rb +70 -0
  34. data/test/incomplete_arithmetic_evaluator.rb +60 -0
  35. data/test/lalr_but_not_slr_grammar.rb +17 -0
  36. data/test/malformed_grammar.rb +9 -0
  37. data/test/malformed_grammar_test.rb +9 -0
  38. data/test/nullable_grammar.rb +18 -0
  39. data/test/parser_test.rb +168 -0
  40. data/test/rr_conflict_grammar.rb +23 -0
  41. data/test/simple_grammar.rb +24 -0
  42. data/test/sr_conflict_grammar.rb +16 -0
  43. metadata +87 -0
@@ -0,0 +1,71 @@
1
+ require "test/unit"
2
+ require "simple_grammar"
3
+ require 'compiled_arithmetic_parser'
4
+ require 'arithmetic_test_methods'
5
+
6
+ class TestCompiledParser < Test::Unit::TestCase
7
+ include ArithmeticTestMethods
8
+
9
+ def test_compiled_parser_generates_syntax_tree_for_simple_grammar
10
+ grammar = SimpleGrammar
11
+ parser = Dhaka::Parser.new(grammar)
12
+ eval(parser.compile_to_ruby_source_as('Foo'))
13
+ syntax_tree = Foo.parse(build_tokens(['(','n','-','(','n','-','n',')',')','-','n','#'], Foo.grammar)).syntax_tree
14
+ assert_equal \
15
+ ["literal",
16
+ "term",
17
+ "literal",
18
+ "term",
19
+ "literal",
20
+ "subtraction",
21
+ "parenthetized_expression",
22
+ "subtraction",
23
+ "parenthetized_expression",
24
+ "term",
25
+ "literal",
26
+ "subtraction",
27
+ "expression",
28
+ "start"], syntax_tree.linearize
29
+ end
30
+
31
+ def test_compiled_parser_generates_syntax_tree_for_arithmetic_grammar
32
+ parser_input = ['(','n','-','(','n','/','n','-','n',')','/','n',')','#']
33
+ assert_equal \
34
+ ["getting_literals",
35
+ "factor",
36
+ "term",
37
+ "getting_literals",
38
+ "factor",
39
+ "getting_literals",
40
+ "division",
41
+ "term",
42
+ "getting_literals",
43
+ "factor",
44
+ "subtraction",
45
+ "unpacking_parenthetized_expression",
46
+ "factor",
47
+ "getting_literals",
48
+ "division",
49
+ "subtraction",
50
+ "unpacking_parenthetized_expression",
51
+ "factor",
52
+ "term",
53
+ "expression",
54
+ "start_production"], get_syntax_tree_with_compiled_arithmetic_parser(build_tokens(parser_input, CompiledArithmeticParser.grammar)).linearize
55
+ end
56
+
57
+ def test_parser_returns_nil_syntax_tree_if_empty_token_array
58
+ assert_nil CompiledArithmeticParser.parse([])
59
+ end
60
+
61
+ def test_parser_returns_error_result_with_index_of_bad_token_if_parse_error
62
+ parse_result = CompiledArithmeticParser.parse(build_tokens(['(', '-', ')', '#'], CompiledArithmeticParser.grammar))
63
+ assert parse_result.has_error?
64
+ assert_equal 1, parse_result.bad_token_index
65
+ end
66
+
67
+ def build_tokens(token_symbol_names, grammar)
68
+ token_symbol_names.collect {|symbol_name| Dhaka::Token.new(grammar.symbol_for_name(symbol_name), nil)}
69
+ end
70
+
71
+ end
@@ -0,0 +1,8 @@
1
+ require "test/unit"
2
+ require File.dirname(__FILE__)+'/../lib/dhaka'
3
+
4
+ class TestEvaluator < Test::Unit::TestCase
5
+ def test_throws_exception_if_evaluation_rules_not_completely_defined
6
+ assert_raise(Dhaka::EvaluatorDefinitionError) {require 'incomplete_arithmetic_evaluator'}
7
+ end
8
+ end
@@ -0,0 +1,70 @@
1
+ #!/usr/bin/env ruby
2
+ require 'test/unit'
3
+ require 'simple_grammar'
4
+
5
+ class SimpleGrammarTest < Test::Unit::TestCase
6
+
7
+ def setup
8
+ @grammar = SimpleGrammar
9
+ end
10
+
11
+ def test_loads_symbol_and_classifies_them
12
+ expected_non_terminals = Set.new(['E', 'S', 'T', Dhaka::START_SYMBOL_NAME])
13
+ expected_terminals = Set.new(['-', 'n', '(', ')', '#', Dhaka::END_SYMBOL_NAME])
14
+ assert_equal(expected_non_terminals, Set.new(@grammar.non_terminal_symbols.collect {|symbol| symbol.name}))
15
+ assert_equal(expected_terminals, Set.new(@grammar.terminal_symbols.collect {|symbol| symbol.name}))
16
+ end
17
+
18
+ def test_creates_productions
19
+ productions_for_E = @grammar.productions_for_symbol(@grammar.symbol_for_name('E'))
20
+ assert(productions_for_E.size==2)
21
+ expected_productions_for_E = Set.new(['subtraction E ::= E - T', 'term E ::= T'])
22
+ assert_equal(expected_productions_for_E, Set.new(productions_for_E.collect {|production| production.to_s}))
23
+ productions_for_start = @grammar.productions_for_symbol(@grammar.start_symbol)
24
+ assert(productions_for_start.size==1)
25
+ expected_productions_for_start = Set.new(['start _Start_ ::= S #'])
26
+ assert_equal(expected_productions_for_start, Set.new(productions_for_start.collect {|production| production.to_s}))
27
+
28
+ end
29
+
30
+ def test_symbols_in_productions_use_the_flyweight_pattern
31
+ assert_same(@grammar.production_named('subtraction').symbol, @grammar.production_named('term').symbol)
32
+ assert_same(@grammar.production_named('expression').expansion[0], @grammar.production_named('subtraction').expansion[0])
33
+ end
34
+
35
+ def test_first_with_non_terminal
36
+ expected_symbols = Set.new(['(', 'n'])
37
+ assert_equal(expected_symbols, Set.new(@grammar.first(@grammar.symbol_for_name('E')).collect {|symbol| symbol.name}))
38
+ end
39
+
40
+ def test_first_with_terminal
41
+ expected_symbols = Set.new(['('])
42
+ assert_equal(expected_symbols, Set.new(@grammar.first(@grammar.symbol_for_name('(')).collect {|symbol| symbol.name}))
43
+ end
44
+
45
+ def test_computes_closures_and_channels_given_a_kernel
46
+ start_production = @grammar.production_named('start')
47
+ start_item = Dhaka::Item.new(start_production, 0)
48
+ kernel = Set.new([start_item])
49
+ channels, closure = @grammar.closure(kernel)
50
+ expected_items = Set.new(['_Start_ ::= -> S # []',
51
+ 'S ::= -> E []',
52
+ 'E ::= -> E - T []',
53
+ 'E ::= -> T []',
54
+ 'T ::= -> n []',
55
+ 'T ::= -> ( E ) []'])
56
+ expected_channels = Set.new([
57
+ 'Spontaneous Channel from E ::= -> E - T [] to E ::= -> E - T []',
58
+ 'Spontaneous Channel from S ::= -> E [] to E ::= -> T []',
59
+ 'Spontaneous Channel from E ::= -> T [] to T ::= -> n []',
60
+ 'Spontaneous Channel from S ::= -> E [] to E ::= -> E - T []',
61
+ 'Spontaneous Channel from E ::= -> T [] to T ::= -> ( E ) []',
62
+ 'Spontaneous Channel from E ::= -> E - T [] to E ::= -> T []',
63
+ 'Spontaneous Channel from _Start_ ::= -> S # [] to S ::= -> E []'
64
+ ])
65
+ assert_equal(expected_items, Set.new(closure.values.collect{|item| item.to_s}))
66
+ assert_equal(expected_channels, Set.new(channels.collect{|item| item.to_s}))
67
+ end
68
+
69
+
70
+ end
@@ -0,0 +1,60 @@
1
+ require File.dirname(__FILE__)+'/../lib/dhaka'
2
+ require 'arithmetic_grammar'
3
+
4
+ class ArithmeticEvaluator < Dhaka::Evaluator
5
+
6
+ self.grammar = ArithmeticGrammar
7
+
8
+ define_evaluation_rules do
9
+
10
+ for_subtraction do
11
+ child_nodes[0] - child_nodes[2]
12
+ end
13
+
14
+ for_addition do
15
+ child_nodes[0] + child_nodes[2]
16
+ end
17
+
18
+ for_division do
19
+ child_nodes[0].to_f/child_nodes[2]
20
+ end
21
+
22
+ for_multiplication do
23
+ child_nodes[0] * child_nodes[2]
24
+ end
25
+
26
+ for_getting_literals do
27
+ child_nodes[0].token.value
28
+ end
29
+
30
+ for_start_production do
31
+ child_nodes[0]
32
+ end
33
+
34
+ for_empty_args do
35
+ []
36
+ end
37
+
38
+ for_evaluating_function do
39
+ child_nodes[0].call child_nodes[2]
40
+ end
41
+
42
+ for_concatenating_args do
43
+ [child_nodes[0]]+child_nodes[2]
44
+ end
45
+
46
+ for_single_args do
47
+ [child_nodes[0]]
48
+ end
49
+
50
+ for_min_function do
51
+ Proc.new {|args| args.inject {|min, elem| min = (elem < min ? elem : min)}}
52
+ end
53
+
54
+ for_max_function do
55
+ Proc.new {|args| args.inject {|max, elem| max = (elem > max ? elem : max)}}
56
+ end
57
+
58
+ end
59
+
60
+ end
@@ -0,0 +1,17 @@
1
+ require File.dirname(__FILE__)+'/../lib/dhaka'
2
+
3
+ class LALRButNotSLRGrammar < Dhaka::Grammar
4
+ for_symbol(Dhaka::START_SYMBOL_NAME) do
5
+ start ['E']
6
+ end
7
+ for_symbol('E') do
8
+ E_Aa ['A', 'a']
9
+ E_bAc ['b', 'A', 'c']
10
+ E_dc ['d', 'c']
11
+ E_bda ['b', 'd', 'a']
12
+ end
13
+ for_symbol('A') do
14
+ A_d ['d']
15
+ end
16
+ end
17
+
@@ -0,0 +1,9 @@
1
+ require File.dirname(__FILE__)+'/../lib/dhaka'
2
+
3
+ class MalformedGrammar < Dhaka::Grammar
4
+
5
+ for_symbol('goo') do
6
+ foo ['boo']
7
+ end
8
+
9
+ end
@@ -0,0 +1,9 @@
1
+ require "test/unit"
2
+
3
+ require "malformed_grammar"
4
+
5
+ class TestMalformedGrammar < Test::Unit::TestCase
6
+ def test_must_have_a_start_symbol_in_order_to_generate_a_parser
7
+ assert_raises(Dhaka::NoStartProductionsError) {Dhaka::Parser.new(MalformedGrammar)}
8
+ end
9
+ end
@@ -0,0 +1,18 @@
1
+ require File.dirname(__FILE__)+'/../lib/dhaka'
2
+
3
+ class NullableGrammar < Dhaka::Grammar
4
+ for_symbol(Dhaka::START_SYMBOL_NAME) do
5
+ tuple ['Tuple', '#']
6
+ end
7
+ for_symbol('Tuple') do
8
+ element_list ['(', 'Elements', ')']
9
+ end
10
+ for_symbol('Elements') do
11
+ empty_element_list []
12
+ concatenate_element_lists ['Character', 'Elements']
13
+ end
14
+ for_symbol('Character') do
15
+ literal_a ['a']
16
+ literal_b ['b']
17
+ end
18
+ end
@@ -0,0 +1,168 @@
1
+ #!/usr/bin/env ruby
2
+ require 'test/unit'
3
+ require 'simple_grammar'
4
+ require 'arithmetic_grammar'
5
+ require 'nullable_grammar'
6
+ require 'lalr_but_not_slr_grammar'
7
+ require 'rr_conflict_grammar'
8
+ require 'sr_conflict_grammar'
9
+
10
+ class ParserTest < Test::Unit::TestCase
11
+
12
+ def setup
13
+ @grammar = SimpleGrammar
14
+ @parser = Dhaka::Parser.new(@grammar)
15
+ end
16
+
17
+ def test_parser_generates_states_with_correct_items
18
+ expected_states = {}
19
+ expected_states[1] = Set.new(['_Start_ ::= -> S # [_End_]',
20
+ 'S ::= -> E [#]',
21
+ 'E ::= -> E - T [#-]',
22
+ 'E ::= -> T [#-]',
23
+ 'T ::= -> n [#-]',
24
+ 'T ::= -> ( E ) [#-]'])
25
+ expected_states[2] = Set.new(['E ::= T -> [#)-]'])
26
+ expected_states[3] = Set.new(['T ::= n -> [#)-]'])
27
+ expected_states[4] = Set.new(['S ::= E -> [#]',
28
+ 'E ::= E -> - T [#-]'])
29
+ expected_states[5] = Set.new(['_Start_ ::= S -> # [_End_]'])
30
+ expected_states[6] = Set.new(['T ::= ( -> E ) [#)-]',
31
+ 'E ::= -> E - T [)-]',
32
+ 'E ::= -> T [)-]',
33
+ 'T ::= -> n [)-]',
34
+ 'T ::= -> ( E ) [)-]'])
35
+ expected_states[7] = Set.new(['E ::= E - -> T [#)-]',
36
+ 'T ::= -> n [#)-]',
37
+ 'T ::= -> ( E ) [#)-]'])
38
+ expected_states[8] = Set.new(['E ::= E - T -> [#)-]'])
39
+ expected_states[9] = Set.new(['T ::= ( E -> ) [#)-]',
40
+ 'E ::= E -> - T [)-]'])
41
+ expected_states[10] = Set.new(['T ::= ( E ) -> [#)-]'])
42
+ expected_states[11] = Set.new(['_Start_ ::= S # -> [_End_]'])
43
+ actual_states = Set.new(@parser.states.collect {|state| Set.new(state.items.values.collect {|item| item.to_s})})
44
+ #write_parser(@parser)
45
+ expected_states.values.each do |state|
46
+ assert set_finder(state, actual_states), "expected #{state.to_a}"
47
+ end
48
+ end
49
+ def build_tokens(token_symbol_names, grammar)
50
+ token_symbol_names.collect {|symbol_name| Dhaka::Token.new(grammar.symbol_for_name(symbol_name), nil)}
51
+ end
52
+
53
+ def test_parser_generates_syntax_tree_given_a_stream_of_symbols
54
+ syntax_tree = @parser.parse(build_tokens(['(','n','-','(','n','-','n',')',')','-','n','#'], @grammar)).syntax_tree
55
+ assert_equal \
56
+ ["literal",
57
+ "term",
58
+ "literal",
59
+ "term",
60
+ "literal",
61
+ "subtraction",
62
+ "parenthetized_expression",
63
+ "subtraction",
64
+ "parenthetized_expression",
65
+ "term",
66
+ "literal",
67
+ "subtraction",
68
+ "expression",
69
+ "start"], syntax_tree.linearize
70
+ end
71
+
72
+ def get_linearized_parse_result(input, parser)
73
+ parser.parse(build_tokens(input, parser.grammar)).syntax_tree.linearize
74
+ end
75
+
76
+ def test_with_a_different_grammar_with_division
77
+ grammar = ArithmeticGrammar
78
+ parser = Dhaka::Parser.new(grammar)
79
+ #write_parser(parser)
80
+ parser_input = ['(','n','-','(','n','/','n','-','n',')','/','n',')','#']
81
+ assert_equal \
82
+ ["getting_literals",
83
+ "factor",
84
+ "term",
85
+ "getting_literals",
86
+ "factor",
87
+ "getting_literals",
88
+ "division",
89
+ "term",
90
+ "getting_literals",
91
+ "factor",
92
+ "subtraction",
93
+ "unpacking_parenthetized_expression",
94
+ "factor",
95
+ "getting_literals",
96
+ "division",
97
+ "subtraction",
98
+ "unpacking_parenthetized_expression",
99
+ "factor",
100
+ "term",
101
+ "expression",
102
+ "start_production"], get_linearized_parse_result(parser_input, parser)
103
+
104
+ parser_input = ['h','(','(','n',')','-','n',',','n',')','#']
105
+ assert_equal \
106
+ ["max_function",
107
+ "getting_literals",
108
+ "factor",
109
+ "term",
110
+ "unpacking_parenthetized_expression",
111
+ "factor",
112
+ "term",
113
+ "getting_literals",
114
+ "factor",
115
+ "subtraction",
116
+ "getting_literals",
117
+ "factor",
118
+ "term",
119
+ "single_args",
120
+ "concatenating_args",
121
+ "evaluating_function",
122
+ "function",
123
+ "factor",
124
+ "term",
125
+ "expression",
126
+ "start_production"], get_linearized_parse_result(parser_input, parser)
127
+ end
128
+
129
+ def test_with_a_grammar_with_nullables_after_terminals
130
+ grammar = NullableGrammar
131
+ parser = Dhaka::Parser.new(grammar)
132
+ parser_input = ['(','a',')','#']
133
+ assert_equal \
134
+ ["literal_a",
135
+ "empty_element_list",
136
+ "concatenate_element_lists",
137
+ "element_list",
138
+ "tuple"], get_linearized_parse_result(parser_input, parser)
139
+ end
140
+
141
+ def test_with_a_grammar_that_is_not_SLR
142
+ grammar = LALRButNotSLRGrammar
143
+ parser = Dhaka::Parser.new(grammar)
144
+ parser_input = ['b','d','c']
145
+ assert_equal(["A_d", "E_bAc", "start"], get_linearized_parse_result(parser_input, parser))
146
+ end
147
+
148
+ def test_with_a_grammar_that_should_generate_an_RR_conflict
149
+ grammar = RRConflictGrammar
150
+ assert_raise(Dhaka::ParserConflictError) { Dhaka::Parser.new(grammar) }
151
+ end
152
+
153
+ def test_with_a_grammar_that_should_generate_an_SR_conflict
154
+ grammar = SRConflictGrammar
155
+ assert_raise(Dhaka::ParserConflictError) { Dhaka::Parser.new(grammar) }
156
+ end
157
+
158
+ def set_finder(set1, set2)
159
+ set2.inject(false) {|result, member| result ||= member == set1}
160
+ end
161
+
162
+ def write_parser(parser)
163
+ File.open('parser.dot', 'w') do |file|
164
+ file << parser.to_dot
165
+ end
166
+ end
167
+
168
+ end
@@ -0,0 +1,23 @@
1
+ require File.dirname(__FILE__)+'/../lib/dhaka'
2
+
3
+ class RRConflictGrammar < Dhaka::Grammar
4
+
5
+ for_symbol(Dhaka::START_SYMBOL_NAME) do
6
+ start ['S']
7
+ end
8
+
9
+ for_symbol('S') do
10
+ a_expansion ['A', 'c', 'd']
11
+ b_expansion ['B', 'c', 'e']
12
+ end
13
+
14
+ for_symbol('A') do
15
+ xy ['x', 'y']
16
+ end
17
+
18
+ for_symbol('B') do
19
+ xy_again ['x', 'y']
20
+ end
21
+
22
+ end
23
+