dhaka 1.0.0 → 2.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/lib/dhaka.rb +1 -4
- data/lib/evaluator/evaluator.rb +65 -15
- data/lib/grammar/grammar.rb +30 -0
- data/lib/grammar/grammar_symbol.rb +1 -1
- data/lib/grammar/production.rb +1 -1
- data/lib/parser/action.rb +1 -3
- data/lib/parser/parse_result.rb +9 -7
- data/lib/parser/parse_tree.rb +9 -2
- data/lib/parser/parser.rb +7 -0
- data/lib/parser/parser_run.rb +12 -19
- data/lib/parser/token.rb +10 -7
- data/lib/tokenizer/tokenizer.rb +90 -17
- data/test/all_tests.rb +7 -6
- data/test/arithmetic_evaluator_test.rb +20 -20
- data/test/arithmetic_precedence_evaluator.rb +1 -1
- data/test/arithmetic_precedence_parser_test.rb +7 -7
- data/test/arithmetic_precedence_tokenizer.rb +3 -9
- data/test/arithmetic_test_methods.rb +2 -2
- data/test/arithmetic_tokenizer.rb +3 -9
- data/test/arithmetic_tokenizer_test.rb +14 -10
- data/test/bracket_tokenizer.rb +1 -1
- data/test/chittagong_driver_test.rb +261 -0
- data/test/chittagong_evaluator.rb +218 -47
- data/test/chittagong_evaluator_test.rb +18 -20
- data/test/chittagong_grammar.rb +61 -15
- data/test/chittagong_parser_test.rb +24 -12
- data/test/chittagong_test.rb +148 -6
- data/test/chittagong_tokenizer.rb +33 -21
- data/test/chittagong_tokenizer_test.rb +16 -8
- data/test/compiled_parser_test.rb +14 -12
- data/test/parser_test.rb +16 -16
- metadata +3 -2
@@ -6,42 +6,54 @@ require "fake_logger"
|
|
6
6
|
|
7
7
|
class TestChittagongParser < Test::Unit::TestCase
|
8
8
|
def setup
|
9
|
-
|
9
|
+
fake_logger = FakeLogger.new
|
10
|
+
@parser = Dhaka::Parser.new(ChittagongGrammar, fake_logger)
|
11
|
+
assert_equal(80, fake_logger.warnings.size)
|
12
|
+
assert_equal(0, fake_logger.errors.size)
|
13
|
+
eval(@parser.compile_to_ruby_source_as(:ChittagongParser)) unless Module.const_defined? :ChittagongParser
|
10
14
|
end
|
11
15
|
|
12
16
|
def test_parses_a_series_of_statements
|
13
17
|
token_stream = build_tokens(
|
14
18
|
'newline',
|
15
|
-
'
|
16
|
-
'print', '
|
19
|
+
'word_literal', '=', 'int_literal', 'newline',
|
20
|
+
'print', 'word_literal', 'newline',
|
17
21
|
'newline',
|
18
|
-
'
|
19
|
-
'newline'
|
22
|
+
'word_literal', '=', 'word_literal', 'newline',
|
23
|
+
'newline', Dhaka::END_SYMBOL_NAME
|
20
24
|
)
|
21
25
|
|
22
26
|
result = @parser.parse(token_stream)
|
23
27
|
|
24
28
|
assert_equal(["single_term",
|
25
29
|
"some_terms",
|
30
|
+
"variable_name",
|
26
31
|
"literal",
|
27
|
-
"
|
28
|
-
"
|
32
|
+
"assignment_statement",
|
33
|
+
"main_body_simple_statement",
|
34
|
+
"single_main_body_statement",
|
29
35
|
"single_term",
|
36
|
+
"variable_name",
|
30
37
|
"variable_reference",
|
31
38
|
"print_statement",
|
32
|
-
"
|
39
|
+
"main_body_simple_statement",
|
40
|
+
"multiple_main_body_statements",
|
33
41
|
"single_term",
|
34
42
|
"multiple_terms",
|
43
|
+
"variable_name",
|
44
|
+
"variable_name",
|
35
45
|
"variable_reference",
|
36
|
-
"
|
37
|
-
"
|
46
|
+
"assignment_statement",
|
47
|
+
"main_body_simple_statement",
|
48
|
+
"multiple_main_body_statements",
|
38
49
|
"single_term",
|
39
50
|
"multiple_terms",
|
40
51
|
"some_terms",
|
41
|
-
"program"], result.
|
52
|
+
"program"], result.parse_tree.linearize.collect {|node| node.production.name})
|
53
|
+
|
42
54
|
end
|
43
55
|
|
44
56
|
def build_tokens *symbol_names
|
45
|
-
symbol_names.collect {|symbol_name| Dhaka::Token.new(
|
57
|
+
symbol_names.collect {|symbol_name| Dhaka::Token.new(symbol_name, nil, nil)}
|
46
58
|
end
|
47
59
|
end
|
data/test/chittagong_test.rb
CHANGED
@@ -5,14 +5,28 @@ require "chittagong_tokenizer"
|
|
5
5
|
require "chittagong_evaluator"
|
6
6
|
require "fake_logger"
|
7
7
|
|
8
|
+
unless Object.const_defined? :ChittagongParser
|
9
|
+
eval(Dhaka::Parser.new(ChittagongGrammar, FakeLogger.new).compile_to_ruby_source_as(:ChittagongParser))
|
10
|
+
end
|
11
|
+
|
12
|
+
|
8
13
|
class TestChittagong < Test::Unit::TestCase
|
9
14
|
|
10
|
-
def
|
11
|
-
|
15
|
+
def fact(n)
|
16
|
+
return 1 if n==1
|
17
|
+
n * fact(n-1)
|
18
|
+
end
|
19
|
+
|
20
|
+
def program_output program
|
21
|
+
output_stream = []
|
22
|
+
parse_result = ChittagongParser.parse(ChittagongTokenizer.tokenize(program))
|
23
|
+
result = ChittagongEvaluator.new([{}], output_stream).evaluate(parse_result.parse_tree)
|
24
|
+
return result, output_stream
|
12
25
|
end
|
13
26
|
|
14
|
-
def
|
27
|
+
def test_iterative_fibonacci_without_functions
|
15
28
|
program = "
|
29
|
+
|
16
30
|
n = 1
|
17
31
|
a = 0
|
18
32
|
b = 1
|
@@ -25,9 +39,137 @@ class TestChittagong < Test::Unit::TestCase
|
|
25
39
|
end
|
26
40
|
|
27
41
|
"
|
28
|
-
|
29
|
-
output_stream =
|
30
|
-
|
42
|
+
|
43
|
+
result, output_stream = program_output(program)
|
44
|
+
assert_equal(["1", "1", "2", "3", "5", "8", "13", "21", "34"], output_stream)
|
45
|
+
end
|
46
|
+
|
47
|
+
def test_iterative_fibonacci_with_functions
|
48
|
+
program = "
|
49
|
+
|
50
|
+
def fib(n)
|
51
|
+
i = 0
|
52
|
+
a = 0
|
53
|
+
b = 1
|
54
|
+
while i < n
|
55
|
+
c = a
|
56
|
+
a = b
|
57
|
+
b = c + b
|
58
|
+
i = i + 1
|
59
|
+
end
|
60
|
+
return b
|
61
|
+
end
|
62
|
+
|
63
|
+
x = 0
|
64
|
+
while x < 9
|
65
|
+
print fib(x)
|
66
|
+
x = x + 1
|
67
|
+
end
|
68
|
+
|
69
|
+
"
|
70
|
+
result, output_stream = program_output(program)
|
71
|
+
assert_equal(["1", "1", "2", "3", "5", "8", "13", "21", "34"], output_stream)
|
72
|
+
end
|
73
|
+
|
74
|
+
def test_recursive_fibonacci
|
75
|
+
program = "
|
76
|
+
|
77
|
+
def fib(n)
|
78
|
+
if n == 0
|
79
|
+
return 1
|
80
|
+
end
|
81
|
+
if n == -1
|
82
|
+
return 0
|
83
|
+
end
|
84
|
+
return fib(n-1) + fib(n-2)
|
85
|
+
end
|
86
|
+
|
87
|
+
x = 0
|
88
|
+
while x < 9
|
89
|
+
print fib(x)
|
90
|
+
x = x + 1
|
91
|
+
end
|
92
|
+
|
93
|
+
"
|
94
|
+
result, output_stream = program_output(program)
|
31
95
|
assert_equal(["1", "1", "2", "3", "5", "8", "13", "21", "34"], output_stream)
|
32
96
|
end
|
97
|
+
|
98
|
+
def test_recursive_factorial
|
99
|
+
program = "
|
100
|
+
def fact(n)
|
101
|
+
if n == 1
|
102
|
+
return 1
|
103
|
+
end
|
104
|
+
return n * fact(n-1)
|
105
|
+
end
|
106
|
+
|
107
|
+
n = 1
|
108
|
+
while n < 11
|
109
|
+
print fact(n)
|
110
|
+
n = n+1
|
111
|
+
end"
|
112
|
+
|
113
|
+
result, output_stream = program_output(program)
|
114
|
+
assert_equal((1..10).collect {|i| fact(i).to_s}, output_stream)
|
115
|
+
end
|
116
|
+
|
117
|
+
def test_various_things
|
118
|
+
program = "
|
119
|
+
|
120
|
+
a = 1
|
121
|
+
b = 2
|
122
|
+
c = 3
|
123
|
+
|
124
|
+
def foo(a, b, c)
|
125
|
+
print a
|
126
|
+
print b
|
127
|
+
print c
|
128
|
+
return c
|
129
|
+
print 999
|
130
|
+
end
|
131
|
+
|
132
|
+
foo(4, a, 6)
|
133
|
+
|
134
|
+
"
|
135
|
+
|
136
|
+
result, output_stream = program_output(program)
|
137
|
+
assert_equal(["4", "1", "6"], output_stream)
|
138
|
+
end
|
139
|
+
|
140
|
+
def test_if_else_block
|
141
|
+
program = "
|
142
|
+
|
143
|
+
def foo(a, b)
|
144
|
+
if a < b
|
145
|
+
print 1
|
146
|
+
else
|
147
|
+
print 2
|
148
|
+
end
|
149
|
+
end
|
150
|
+
|
151
|
+
foo(1, 2)
|
152
|
+
foo(2, 1)
|
153
|
+
|
154
|
+
"
|
155
|
+
|
156
|
+
result, output_stream = program_output(program)
|
157
|
+
assert_equal(["1", "2"], output_stream)
|
158
|
+
end
|
159
|
+
|
160
|
+
def test_no_arg_functions
|
161
|
+
program = "
|
162
|
+
|
163
|
+
def foo()
|
164
|
+
print 1
|
165
|
+
print 2
|
166
|
+
end
|
167
|
+
foo()
|
168
|
+
|
169
|
+
"
|
170
|
+
|
171
|
+
result, output_stream = program_output(program)
|
172
|
+
assert_equal(["1", "2"], output_stream)
|
173
|
+
end
|
174
|
+
|
33
175
|
end
|
@@ -4,32 +4,39 @@ require 'chittagong_grammar'
|
|
4
4
|
|
5
5
|
class ChittagongTokenizer < Dhaka::Tokenizer
|
6
6
|
|
7
|
-
KEYWORDS = ['print', 'if', 'else', 'end', 'while']
|
7
|
+
KEYWORDS = ['print', 'if', 'else', 'end', 'while', 'def', 'return']
|
8
8
|
|
9
9
|
digits = ('0'..'9').to_a
|
10
10
|
letters = ('a'..'z').to_a
|
11
11
|
parenths = ['(', ')']
|
12
|
-
operators = ['-', '+', '/', '*', '^', '!', '>', '<'
|
12
|
+
operators = ['-', '+', '/', '*', '^', '!', '>', '<']
|
13
|
+
equal_sign = ['=']
|
13
14
|
whitespace = [' ']
|
15
|
+
arg_separator = [',']
|
14
16
|
newline = ["\n"]
|
15
17
|
|
16
|
-
all_characters = digits + letters + parenths + operators + whitespace + newline
|
18
|
+
all_characters = digits + letters + parenths + operators + whitespace + newline + arg_separator + equal_sign
|
17
19
|
|
18
20
|
for_state Dhaka::TOKENIZER_IDLE_STATE do
|
19
|
-
for_characters(all_characters - (digits + letters + newline + whitespace)) do
|
20
|
-
|
21
|
+
for_characters(all_characters - (digits + letters + newline + whitespace + equal_sign)) do
|
22
|
+
create_token(curr_char, nil)
|
21
23
|
advance
|
22
24
|
end
|
25
|
+
for_characters(equal_sign) do
|
26
|
+
create_token('=', nil)
|
27
|
+
advance
|
28
|
+
switch_to :get_equal_sign_operator
|
29
|
+
end
|
23
30
|
for_characters digits do
|
24
|
-
|
31
|
+
create_token('int_literal', '')
|
25
32
|
switch_to :get_integer_literal
|
26
33
|
end
|
27
34
|
for_characters letters do
|
28
|
-
|
35
|
+
create_token(nil, '')
|
29
36
|
switch_to :get_word_literal
|
30
37
|
end
|
31
38
|
for_character newline do
|
32
|
-
|
39
|
+
create_token('newline', nil)
|
33
40
|
advance
|
34
41
|
end
|
35
42
|
for_character whitespace do
|
@@ -37,40 +44,45 @@ class ChittagongTokenizer < Dhaka::Tokenizer
|
|
37
44
|
end
|
38
45
|
end
|
39
46
|
|
47
|
+
for_state :get_equal_sign_operator do
|
48
|
+
for_characters all_characters - equal_sign do
|
49
|
+
switch_to Dhaka::TOKENIZER_IDLE_STATE
|
50
|
+
end
|
51
|
+
for_character equal_sign do
|
52
|
+
curr_token.symbol_name += '='
|
53
|
+
advance
|
54
|
+
switch_to Dhaka::TOKENIZER_IDLE_STATE
|
55
|
+
end
|
56
|
+
end
|
57
|
+
|
40
58
|
for_state :get_integer_literal do
|
41
59
|
for_characters all_characters - digits do
|
42
|
-
tokens << integer_literal_token(accumulator.to_i)
|
43
60
|
switch_to Dhaka::TOKENIZER_IDLE_STATE
|
44
61
|
end
|
45
62
|
for_characters digits do
|
46
|
-
|
63
|
+
curr_token.value += curr_char
|
47
64
|
advance
|
48
|
-
tokens << integer_literal_token(accumulator.to_i) unless curr_char
|
49
65
|
end
|
50
66
|
end
|
51
67
|
|
52
68
|
for_state :get_word_literal do
|
53
69
|
for_characters all_characters - letters do
|
54
|
-
|
70
|
+
curr_token.symbol_name = word_literal_symbol(curr_token.value)
|
55
71
|
switch_to Dhaka::TOKENIZER_IDLE_STATE
|
56
72
|
end
|
57
73
|
for_characters letters do
|
58
|
-
|
74
|
+
curr_token.value += curr_char
|
59
75
|
advance
|
60
|
-
|
76
|
+
curr_token.symbol_name = word_literal_symbol(curr_token.value) unless curr_char
|
61
77
|
end
|
62
78
|
end
|
63
79
|
|
64
|
-
def
|
80
|
+
def word_literal_symbol(value)
|
65
81
|
if KEYWORDS.include? value
|
66
|
-
|
82
|
+
value
|
67
83
|
else
|
68
|
-
|
84
|
+
'word_literal'
|
69
85
|
end
|
70
86
|
end
|
71
87
|
|
72
|
-
def integer_literal_token(value)
|
73
|
-
Dhaka::Token.new(ChittagongGrammar.symbol_for_name('int_literal'), value)
|
74
|
-
end
|
75
|
-
|
76
88
|
end
|
@@ -7,6 +7,7 @@ class TestChittagongTokenizer < Test::Unit::TestCase
|
|
7
7
|
input = "
|
8
8
|
x = 2 * 4
|
9
9
|
y = 2 * x
|
10
|
+
v = x == y
|
10
11
|
if x > y
|
11
12
|
print x
|
12
13
|
else
|
@@ -14,32 +15,39 @@ class TestChittagongTokenizer < Test::Unit::TestCase
|
|
14
15
|
end
|
15
16
|
"
|
16
17
|
assert_equal(["newline",
|
17
|
-
"
|
18
|
+
"word_literal",
|
18
19
|
"=",
|
19
20
|
"int_literal",
|
20
21
|
"*",
|
21
22
|
"int_literal",
|
22
23
|
"newline",
|
23
|
-
"
|
24
|
+
"word_literal",
|
24
25
|
"=",
|
25
26
|
"int_literal",
|
26
27
|
"*",
|
27
|
-
"
|
28
|
+
"word_literal",
|
29
|
+
"newline",
|
30
|
+
"word_literal",
|
31
|
+
"=",
|
32
|
+
"word_literal",
|
33
|
+
"==",
|
34
|
+
"word_literal",
|
28
35
|
"newline",
|
29
36
|
"if",
|
30
|
-
"
|
37
|
+
"word_literal",
|
31
38
|
">",
|
32
|
-
"
|
39
|
+
"word_literal",
|
33
40
|
"newline",
|
34
41
|
"print",
|
35
|
-
"
|
42
|
+
"word_literal",
|
36
43
|
"newline",
|
37
44
|
"else",
|
38
45
|
"newline",
|
39
46
|
"print",
|
40
|
-
"
|
47
|
+
"word_literal",
|
41
48
|
"newline",
|
42
49
|
"end",
|
43
|
-
"newline"
|
50
|
+
"newline",
|
51
|
+
Dhaka::END_SYMBOL_NAME], ChittagongTokenizer.tokenize(input).collect {|token| token.symbol_name})
|
44
52
|
end
|
45
53
|
end
|
@@ -9,8 +9,8 @@ eval(Dhaka::Parser.new(ArithmeticGrammar).compile_to_ruby_source_as('CompiledAri
|
|
9
9
|
class TestCompiledParser < Test::Unit::TestCase
|
10
10
|
include ArithmeticTestMethods
|
11
11
|
|
12
|
-
def
|
13
|
-
|
12
|
+
def test_compiled_parser_generates_parse_tree_for_simple_grammar
|
13
|
+
parse_tree = Foo.parse(build_tokens(['(','n','-','(','n','-','n',')',')','-','n','#',Dhaka::END_SYMBOL_NAME])).parse_tree
|
14
14
|
assert_equal \
|
15
15
|
["literal",
|
16
16
|
"term",
|
@@ -25,11 +25,11 @@ class TestCompiledParser < Test::Unit::TestCase
|
|
25
25
|
"literal",
|
26
26
|
"subtraction",
|
27
27
|
"expression",
|
28
|
-
"start"],
|
28
|
+
"start"], parse_tree.linearize.collect {|node| node.production.name}
|
29
29
|
end
|
30
30
|
|
31
|
-
def
|
32
|
-
parser_input = ['(','n','-','(','n','/','n','-','n',')','/','n',')']
|
31
|
+
def test_compiled_parser_generates_parse_tree_for_arithmetic_grammar
|
32
|
+
parser_input = ['(','n','-','(','n','/','n','-','n',')','/','n',')',Dhaka::END_SYMBOL_NAME]
|
33
33
|
assert_equal \
|
34
34
|
["getting_literals",
|
35
35
|
"factor",
|
@@ -50,21 +50,23 @@ class TestCompiledParser < Test::Unit::TestCase
|
|
50
50
|
"unpacking_parenthetized_expression",
|
51
51
|
"factor",
|
52
52
|
"term",
|
53
|
-
"expression"], parse(build_tokens(parser_input
|
53
|
+
"expression"], parse(build_tokens(parser_input)).linearize.collect {|node| node.production.name}
|
54
54
|
end
|
55
55
|
|
56
|
-
def
|
57
|
-
|
56
|
+
def test_parse_result_has_nil_parse_tree_if_empty_token_array
|
57
|
+
parse_result = CompiledArithmeticParser.parse([])
|
58
|
+
assert !CompiledArithmeticParser.parse([]).has_error?
|
59
|
+
assert_nil parse_result.parse_tree
|
58
60
|
end
|
59
61
|
|
60
62
|
def test_parser_returns_error_result_with_index_of_bad_token_if_parse_error
|
61
|
-
parse_result = CompiledArithmeticParser.parse(build_tokens(['(', '-', ')']
|
63
|
+
parse_result = CompiledArithmeticParser.parse(build_tokens(['(', '-', ')',Dhaka::END_SYMBOL_NAME]))
|
62
64
|
assert parse_result.has_error?
|
63
|
-
assert_equal
|
65
|
+
assert_equal '-', parse_result.unexpected_token.symbol_name
|
64
66
|
end
|
65
67
|
|
66
|
-
def build_tokens(token_symbol_names
|
67
|
-
token_symbol_names.collect {|symbol_name| Dhaka::Token.new(
|
68
|
+
def build_tokens(token_symbol_names)
|
69
|
+
token_symbol_names.collect {|symbol_name| Dhaka::Token.new(symbol_name, nil, nil)}
|
68
70
|
end
|
69
71
|
|
70
72
|
end
|