dhaka 1.0.0 → 2.0.0
Sign up to get free protection for your applications and to get access to all the features.
- data/lib/dhaka.rb +1 -4
- data/lib/evaluator/evaluator.rb +65 -15
- data/lib/grammar/grammar.rb +30 -0
- data/lib/grammar/grammar_symbol.rb +1 -1
- data/lib/grammar/production.rb +1 -1
- data/lib/parser/action.rb +1 -3
- data/lib/parser/parse_result.rb +9 -7
- data/lib/parser/parse_tree.rb +9 -2
- data/lib/parser/parser.rb +7 -0
- data/lib/parser/parser_run.rb +12 -19
- data/lib/parser/token.rb +10 -7
- data/lib/tokenizer/tokenizer.rb +90 -17
- data/test/all_tests.rb +7 -6
- data/test/arithmetic_evaluator_test.rb +20 -20
- data/test/arithmetic_precedence_evaluator.rb +1 -1
- data/test/arithmetic_precedence_parser_test.rb +7 -7
- data/test/arithmetic_precedence_tokenizer.rb +3 -9
- data/test/arithmetic_test_methods.rb +2 -2
- data/test/arithmetic_tokenizer.rb +3 -9
- data/test/arithmetic_tokenizer_test.rb +14 -10
- data/test/bracket_tokenizer.rb +1 -1
- data/test/chittagong_driver_test.rb +261 -0
- data/test/chittagong_evaluator.rb +218 -47
- data/test/chittagong_evaluator_test.rb +18 -20
- data/test/chittagong_grammar.rb +61 -15
- data/test/chittagong_parser_test.rb +24 -12
- data/test/chittagong_test.rb +148 -6
- data/test/chittagong_tokenizer.rb +33 -21
- data/test/chittagong_tokenizer_test.rb +16 -8
- data/test/compiled_parser_test.rb +14 -12
- data/test/parser_test.rb +16 -16
- metadata +3 -2
@@ -6,42 +6,54 @@ require "fake_logger"
|
|
6
6
|
|
7
7
|
class TestChittagongParser < Test::Unit::TestCase
|
8
8
|
def setup
|
9
|
-
|
9
|
+
fake_logger = FakeLogger.new
|
10
|
+
@parser = Dhaka::Parser.new(ChittagongGrammar, fake_logger)
|
11
|
+
assert_equal(80, fake_logger.warnings.size)
|
12
|
+
assert_equal(0, fake_logger.errors.size)
|
13
|
+
eval(@parser.compile_to_ruby_source_as(:ChittagongParser)) unless Module.const_defined? :ChittagongParser
|
10
14
|
end
|
11
15
|
|
12
16
|
def test_parses_a_series_of_statements
|
13
17
|
token_stream = build_tokens(
|
14
18
|
'newline',
|
15
|
-
'
|
16
|
-
'print', '
|
19
|
+
'word_literal', '=', 'int_literal', 'newline',
|
20
|
+
'print', 'word_literal', 'newline',
|
17
21
|
'newline',
|
18
|
-
'
|
19
|
-
'newline'
|
22
|
+
'word_literal', '=', 'word_literal', 'newline',
|
23
|
+
'newline', Dhaka::END_SYMBOL_NAME
|
20
24
|
)
|
21
25
|
|
22
26
|
result = @parser.parse(token_stream)
|
23
27
|
|
24
28
|
assert_equal(["single_term",
|
25
29
|
"some_terms",
|
30
|
+
"variable_name",
|
26
31
|
"literal",
|
27
|
-
"
|
28
|
-
"
|
32
|
+
"assignment_statement",
|
33
|
+
"main_body_simple_statement",
|
34
|
+
"single_main_body_statement",
|
29
35
|
"single_term",
|
36
|
+
"variable_name",
|
30
37
|
"variable_reference",
|
31
38
|
"print_statement",
|
32
|
-
"
|
39
|
+
"main_body_simple_statement",
|
40
|
+
"multiple_main_body_statements",
|
33
41
|
"single_term",
|
34
42
|
"multiple_terms",
|
43
|
+
"variable_name",
|
44
|
+
"variable_name",
|
35
45
|
"variable_reference",
|
36
|
-
"
|
37
|
-
"
|
46
|
+
"assignment_statement",
|
47
|
+
"main_body_simple_statement",
|
48
|
+
"multiple_main_body_statements",
|
38
49
|
"single_term",
|
39
50
|
"multiple_terms",
|
40
51
|
"some_terms",
|
41
|
-
"program"], result.
|
52
|
+
"program"], result.parse_tree.linearize.collect {|node| node.production.name})
|
53
|
+
|
42
54
|
end
|
43
55
|
|
44
56
|
def build_tokens *symbol_names
|
45
|
-
symbol_names.collect {|symbol_name| Dhaka::Token.new(
|
57
|
+
symbol_names.collect {|symbol_name| Dhaka::Token.new(symbol_name, nil, nil)}
|
46
58
|
end
|
47
59
|
end
|
data/test/chittagong_test.rb
CHANGED
@@ -5,14 +5,28 @@ require "chittagong_tokenizer"
|
|
5
5
|
require "chittagong_evaluator"
|
6
6
|
require "fake_logger"
|
7
7
|
|
8
|
+
unless Object.const_defined? :ChittagongParser
|
9
|
+
eval(Dhaka::Parser.new(ChittagongGrammar, FakeLogger.new).compile_to_ruby_source_as(:ChittagongParser))
|
10
|
+
end
|
11
|
+
|
12
|
+
|
8
13
|
class TestChittagong < Test::Unit::TestCase
|
9
14
|
|
10
|
-
def
|
11
|
-
|
15
|
+
def fact(n)
|
16
|
+
return 1 if n==1
|
17
|
+
n * fact(n-1)
|
18
|
+
end
|
19
|
+
|
20
|
+
def program_output program
|
21
|
+
output_stream = []
|
22
|
+
parse_result = ChittagongParser.parse(ChittagongTokenizer.tokenize(program))
|
23
|
+
result = ChittagongEvaluator.new([{}], output_stream).evaluate(parse_result.parse_tree)
|
24
|
+
return result, output_stream
|
12
25
|
end
|
13
26
|
|
14
|
-
def
|
27
|
+
def test_iterative_fibonacci_without_functions
|
15
28
|
program = "
|
29
|
+
|
16
30
|
n = 1
|
17
31
|
a = 0
|
18
32
|
b = 1
|
@@ -25,9 +39,137 @@ class TestChittagong < Test::Unit::TestCase
|
|
25
39
|
end
|
26
40
|
|
27
41
|
"
|
28
|
-
|
29
|
-
output_stream =
|
30
|
-
|
42
|
+
|
43
|
+
result, output_stream = program_output(program)
|
44
|
+
assert_equal(["1", "1", "2", "3", "5", "8", "13", "21", "34"], output_stream)
|
45
|
+
end
|
46
|
+
|
47
|
+
def test_iterative_fibonacci_with_functions
|
48
|
+
program = "
|
49
|
+
|
50
|
+
def fib(n)
|
51
|
+
i = 0
|
52
|
+
a = 0
|
53
|
+
b = 1
|
54
|
+
while i < n
|
55
|
+
c = a
|
56
|
+
a = b
|
57
|
+
b = c + b
|
58
|
+
i = i + 1
|
59
|
+
end
|
60
|
+
return b
|
61
|
+
end
|
62
|
+
|
63
|
+
x = 0
|
64
|
+
while x < 9
|
65
|
+
print fib(x)
|
66
|
+
x = x + 1
|
67
|
+
end
|
68
|
+
|
69
|
+
"
|
70
|
+
result, output_stream = program_output(program)
|
71
|
+
assert_equal(["1", "1", "2", "3", "5", "8", "13", "21", "34"], output_stream)
|
72
|
+
end
|
73
|
+
|
74
|
+
def test_recursive_fibonacci
|
75
|
+
program = "
|
76
|
+
|
77
|
+
def fib(n)
|
78
|
+
if n == 0
|
79
|
+
return 1
|
80
|
+
end
|
81
|
+
if n == -1
|
82
|
+
return 0
|
83
|
+
end
|
84
|
+
return fib(n-1) + fib(n-2)
|
85
|
+
end
|
86
|
+
|
87
|
+
x = 0
|
88
|
+
while x < 9
|
89
|
+
print fib(x)
|
90
|
+
x = x + 1
|
91
|
+
end
|
92
|
+
|
93
|
+
"
|
94
|
+
result, output_stream = program_output(program)
|
31
95
|
assert_equal(["1", "1", "2", "3", "5", "8", "13", "21", "34"], output_stream)
|
32
96
|
end
|
97
|
+
|
98
|
+
def test_recursive_factorial
|
99
|
+
program = "
|
100
|
+
def fact(n)
|
101
|
+
if n == 1
|
102
|
+
return 1
|
103
|
+
end
|
104
|
+
return n * fact(n-1)
|
105
|
+
end
|
106
|
+
|
107
|
+
n = 1
|
108
|
+
while n < 11
|
109
|
+
print fact(n)
|
110
|
+
n = n+1
|
111
|
+
end"
|
112
|
+
|
113
|
+
result, output_stream = program_output(program)
|
114
|
+
assert_equal((1..10).collect {|i| fact(i).to_s}, output_stream)
|
115
|
+
end
|
116
|
+
|
117
|
+
def test_various_things
|
118
|
+
program = "
|
119
|
+
|
120
|
+
a = 1
|
121
|
+
b = 2
|
122
|
+
c = 3
|
123
|
+
|
124
|
+
def foo(a, b, c)
|
125
|
+
print a
|
126
|
+
print b
|
127
|
+
print c
|
128
|
+
return c
|
129
|
+
print 999
|
130
|
+
end
|
131
|
+
|
132
|
+
foo(4, a, 6)
|
133
|
+
|
134
|
+
"
|
135
|
+
|
136
|
+
result, output_stream = program_output(program)
|
137
|
+
assert_equal(["4", "1", "6"], output_stream)
|
138
|
+
end
|
139
|
+
|
140
|
+
def test_if_else_block
|
141
|
+
program = "
|
142
|
+
|
143
|
+
def foo(a, b)
|
144
|
+
if a < b
|
145
|
+
print 1
|
146
|
+
else
|
147
|
+
print 2
|
148
|
+
end
|
149
|
+
end
|
150
|
+
|
151
|
+
foo(1, 2)
|
152
|
+
foo(2, 1)
|
153
|
+
|
154
|
+
"
|
155
|
+
|
156
|
+
result, output_stream = program_output(program)
|
157
|
+
assert_equal(["1", "2"], output_stream)
|
158
|
+
end
|
159
|
+
|
160
|
+
def test_no_arg_functions
|
161
|
+
program = "
|
162
|
+
|
163
|
+
def foo()
|
164
|
+
print 1
|
165
|
+
print 2
|
166
|
+
end
|
167
|
+
foo()
|
168
|
+
|
169
|
+
"
|
170
|
+
|
171
|
+
result, output_stream = program_output(program)
|
172
|
+
assert_equal(["1", "2"], output_stream)
|
173
|
+
end
|
174
|
+
|
33
175
|
end
|
@@ -4,32 +4,39 @@ require 'chittagong_grammar'
|
|
4
4
|
|
5
5
|
class ChittagongTokenizer < Dhaka::Tokenizer
|
6
6
|
|
7
|
-
KEYWORDS = ['print', 'if', 'else', 'end', 'while']
|
7
|
+
KEYWORDS = ['print', 'if', 'else', 'end', 'while', 'def', 'return']
|
8
8
|
|
9
9
|
digits = ('0'..'9').to_a
|
10
10
|
letters = ('a'..'z').to_a
|
11
11
|
parenths = ['(', ')']
|
12
|
-
operators = ['-', '+', '/', '*', '^', '!', '>', '<'
|
12
|
+
operators = ['-', '+', '/', '*', '^', '!', '>', '<']
|
13
|
+
equal_sign = ['=']
|
13
14
|
whitespace = [' ']
|
15
|
+
arg_separator = [',']
|
14
16
|
newline = ["\n"]
|
15
17
|
|
16
|
-
all_characters = digits + letters + parenths + operators + whitespace + newline
|
18
|
+
all_characters = digits + letters + parenths + operators + whitespace + newline + arg_separator + equal_sign
|
17
19
|
|
18
20
|
for_state Dhaka::TOKENIZER_IDLE_STATE do
|
19
|
-
for_characters(all_characters - (digits + letters + newline + whitespace)) do
|
20
|
-
|
21
|
+
for_characters(all_characters - (digits + letters + newline + whitespace + equal_sign)) do
|
22
|
+
create_token(curr_char, nil)
|
21
23
|
advance
|
22
24
|
end
|
25
|
+
for_characters(equal_sign) do
|
26
|
+
create_token('=', nil)
|
27
|
+
advance
|
28
|
+
switch_to :get_equal_sign_operator
|
29
|
+
end
|
23
30
|
for_characters digits do
|
24
|
-
|
31
|
+
create_token('int_literal', '')
|
25
32
|
switch_to :get_integer_literal
|
26
33
|
end
|
27
34
|
for_characters letters do
|
28
|
-
|
35
|
+
create_token(nil, '')
|
29
36
|
switch_to :get_word_literal
|
30
37
|
end
|
31
38
|
for_character newline do
|
32
|
-
|
39
|
+
create_token('newline', nil)
|
33
40
|
advance
|
34
41
|
end
|
35
42
|
for_character whitespace do
|
@@ -37,40 +44,45 @@ class ChittagongTokenizer < Dhaka::Tokenizer
|
|
37
44
|
end
|
38
45
|
end
|
39
46
|
|
47
|
+
for_state :get_equal_sign_operator do
|
48
|
+
for_characters all_characters - equal_sign do
|
49
|
+
switch_to Dhaka::TOKENIZER_IDLE_STATE
|
50
|
+
end
|
51
|
+
for_character equal_sign do
|
52
|
+
curr_token.symbol_name += '='
|
53
|
+
advance
|
54
|
+
switch_to Dhaka::TOKENIZER_IDLE_STATE
|
55
|
+
end
|
56
|
+
end
|
57
|
+
|
40
58
|
for_state :get_integer_literal do
|
41
59
|
for_characters all_characters - digits do
|
42
|
-
tokens << integer_literal_token(accumulator.to_i)
|
43
60
|
switch_to Dhaka::TOKENIZER_IDLE_STATE
|
44
61
|
end
|
45
62
|
for_characters digits do
|
46
|
-
|
63
|
+
curr_token.value += curr_char
|
47
64
|
advance
|
48
|
-
tokens << integer_literal_token(accumulator.to_i) unless curr_char
|
49
65
|
end
|
50
66
|
end
|
51
67
|
|
52
68
|
for_state :get_word_literal do
|
53
69
|
for_characters all_characters - letters do
|
54
|
-
|
70
|
+
curr_token.symbol_name = word_literal_symbol(curr_token.value)
|
55
71
|
switch_to Dhaka::TOKENIZER_IDLE_STATE
|
56
72
|
end
|
57
73
|
for_characters letters do
|
58
|
-
|
74
|
+
curr_token.value += curr_char
|
59
75
|
advance
|
60
|
-
|
76
|
+
curr_token.symbol_name = word_literal_symbol(curr_token.value) unless curr_char
|
61
77
|
end
|
62
78
|
end
|
63
79
|
|
64
|
-
def
|
80
|
+
def word_literal_symbol(value)
|
65
81
|
if KEYWORDS.include? value
|
66
|
-
|
82
|
+
value
|
67
83
|
else
|
68
|
-
|
84
|
+
'word_literal'
|
69
85
|
end
|
70
86
|
end
|
71
87
|
|
72
|
-
def integer_literal_token(value)
|
73
|
-
Dhaka::Token.new(ChittagongGrammar.symbol_for_name('int_literal'), value)
|
74
|
-
end
|
75
|
-
|
76
88
|
end
|
@@ -7,6 +7,7 @@ class TestChittagongTokenizer < Test::Unit::TestCase
|
|
7
7
|
input = "
|
8
8
|
x = 2 * 4
|
9
9
|
y = 2 * x
|
10
|
+
v = x == y
|
10
11
|
if x > y
|
11
12
|
print x
|
12
13
|
else
|
@@ -14,32 +15,39 @@ class TestChittagongTokenizer < Test::Unit::TestCase
|
|
14
15
|
end
|
15
16
|
"
|
16
17
|
assert_equal(["newline",
|
17
|
-
"
|
18
|
+
"word_literal",
|
18
19
|
"=",
|
19
20
|
"int_literal",
|
20
21
|
"*",
|
21
22
|
"int_literal",
|
22
23
|
"newline",
|
23
|
-
"
|
24
|
+
"word_literal",
|
24
25
|
"=",
|
25
26
|
"int_literal",
|
26
27
|
"*",
|
27
|
-
"
|
28
|
+
"word_literal",
|
29
|
+
"newline",
|
30
|
+
"word_literal",
|
31
|
+
"=",
|
32
|
+
"word_literal",
|
33
|
+
"==",
|
34
|
+
"word_literal",
|
28
35
|
"newline",
|
29
36
|
"if",
|
30
|
-
"
|
37
|
+
"word_literal",
|
31
38
|
">",
|
32
|
-
"
|
39
|
+
"word_literal",
|
33
40
|
"newline",
|
34
41
|
"print",
|
35
|
-
"
|
42
|
+
"word_literal",
|
36
43
|
"newline",
|
37
44
|
"else",
|
38
45
|
"newline",
|
39
46
|
"print",
|
40
|
-
"
|
47
|
+
"word_literal",
|
41
48
|
"newline",
|
42
49
|
"end",
|
43
|
-
"newline"
|
50
|
+
"newline",
|
51
|
+
Dhaka::END_SYMBOL_NAME], ChittagongTokenizer.tokenize(input).collect {|token| token.symbol_name})
|
44
52
|
end
|
45
53
|
end
|
@@ -9,8 +9,8 @@ eval(Dhaka::Parser.new(ArithmeticGrammar).compile_to_ruby_source_as('CompiledAri
|
|
9
9
|
class TestCompiledParser < Test::Unit::TestCase
|
10
10
|
include ArithmeticTestMethods
|
11
11
|
|
12
|
-
def
|
13
|
-
|
12
|
+
def test_compiled_parser_generates_parse_tree_for_simple_grammar
|
13
|
+
parse_tree = Foo.parse(build_tokens(['(','n','-','(','n','-','n',')',')','-','n','#',Dhaka::END_SYMBOL_NAME])).parse_tree
|
14
14
|
assert_equal \
|
15
15
|
["literal",
|
16
16
|
"term",
|
@@ -25,11 +25,11 @@ class TestCompiledParser < Test::Unit::TestCase
|
|
25
25
|
"literal",
|
26
26
|
"subtraction",
|
27
27
|
"expression",
|
28
|
-
"start"],
|
28
|
+
"start"], parse_tree.linearize.collect {|node| node.production.name}
|
29
29
|
end
|
30
30
|
|
31
|
-
def
|
32
|
-
parser_input = ['(','n','-','(','n','/','n','-','n',')','/','n',')']
|
31
|
+
def test_compiled_parser_generates_parse_tree_for_arithmetic_grammar
|
32
|
+
parser_input = ['(','n','-','(','n','/','n','-','n',')','/','n',')',Dhaka::END_SYMBOL_NAME]
|
33
33
|
assert_equal \
|
34
34
|
["getting_literals",
|
35
35
|
"factor",
|
@@ -50,21 +50,23 @@ class TestCompiledParser < Test::Unit::TestCase
|
|
50
50
|
"unpacking_parenthetized_expression",
|
51
51
|
"factor",
|
52
52
|
"term",
|
53
|
-
"expression"], parse(build_tokens(parser_input
|
53
|
+
"expression"], parse(build_tokens(parser_input)).linearize.collect {|node| node.production.name}
|
54
54
|
end
|
55
55
|
|
56
|
-
def
|
57
|
-
|
56
|
+
def test_parse_result_has_nil_parse_tree_if_empty_token_array
|
57
|
+
parse_result = CompiledArithmeticParser.parse([])
|
58
|
+
assert !CompiledArithmeticParser.parse([]).has_error?
|
59
|
+
assert_nil parse_result.parse_tree
|
58
60
|
end
|
59
61
|
|
60
62
|
def test_parser_returns_error_result_with_index_of_bad_token_if_parse_error
|
61
|
-
parse_result = CompiledArithmeticParser.parse(build_tokens(['(', '-', ')']
|
63
|
+
parse_result = CompiledArithmeticParser.parse(build_tokens(['(', '-', ')',Dhaka::END_SYMBOL_NAME]))
|
62
64
|
assert parse_result.has_error?
|
63
|
-
assert_equal
|
65
|
+
assert_equal '-', parse_result.unexpected_token.symbol_name
|
64
66
|
end
|
65
67
|
|
66
|
-
def build_tokens(token_symbol_names
|
67
|
-
token_symbol_names.collect {|symbol_name| Dhaka::Token.new(
|
68
|
+
def build_tokens(token_symbol_names)
|
69
|
+
token_symbol_names.collect {|symbol_name| Dhaka::Token.new(symbol_name, nil, nil)}
|
68
70
|
end
|
69
71
|
|
70
72
|
end
|