foreverman-dhaka 2.2.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (84) hide show
  1. data/Rakefile +64 -0
  2. data/lib/dhaka.rb +62 -0
  3. data/lib/dhaka/dot/dot.rb +29 -0
  4. data/lib/dhaka/evaluator/evaluator.rb +133 -0
  5. data/lib/dhaka/grammar/closure_hash.rb +15 -0
  6. data/lib/dhaka/grammar/grammar.rb +236 -0
  7. data/lib/dhaka/grammar/grammar_symbol.rb +27 -0
  8. data/lib/dhaka/grammar/precedence.rb +19 -0
  9. data/lib/dhaka/grammar/production.rb +36 -0
  10. data/lib/dhaka/lexer/accept_actions.rb +36 -0
  11. data/lib/dhaka/lexer/alphabet.rb +21 -0
  12. data/lib/dhaka/lexer/compiled_lexer.rb +46 -0
  13. data/lib/dhaka/lexer/dfa.rb +121 -0
  14. data/lib/dhaka/lexer/lexeme.rb +32 -0
  15. data/lib/dhaka/lexer/lexer.rb +70 -0
  16. data/lib/dhaka/lexer/lexer_run.rb +78 -0
  17. data/lib/dhaka/lexer/regex_grammar.rb +393 -0
  18. data/lib/dhaka/lexer/regex_parser.rb +2010 -0
  19. data/lib/dhaka/lexer/regex_tokenizer.rb +14 -0
  20. data/lib/dhaka/lexer/specification.rb +96 -0
  21. data/lib/dhaka/lexer/state.rb +68 -0
  22. data/lib/dhaka/lexer/state_machine.rb +37 -0
  23. data/lib/dhaka/parser/action.rb +55 -0
  24. data/lib/dhaka/parser/channel.rb +58 -0
  25. data/lib/dhaka/parser/compiled_parser.rb +51 -0
  26. data/lib/dhaka/parser/conflict.rb +54 -0
  27. data/lib/dhaka/parser/item.rb +43 -0
  28. data/lib/dhaka/parser/parse_result.rb +50 -0
  29. data/lib/dhaka/parser/parse_tree.rb +66 -0
  30. data/lib/dhaka/parser/parser.rb +165 -0
  31. data/lib/dhaka/parser/parser_methods.rb +11 -0
  32. data/lib/dhaka/parser/parser_run.rb +39 -0
  33. data/lib/dhaka/parser/parser_state.rb +74 -0
  34. data/lib/dhaka/parser/token.rb +22 -0
  35. data/lib/dhaka/runtime.rb +51 -0
  36. data/lib/dhaka/tokenizer/tokenizer.rb +190 -0
  37. data/test/all_tests.rb +5 -0
  38. data/test/arithmetic/arithmetic_evaluator.rb +64 -0
  39. data/test/arithmetic/arithmetic_evaluator_test.rb +43 -0
  40. data/test/arithmetic/arithmetic_grammar.rb +41 -0
  41. data/test/arithmetic/arithmetic_grammar_test.rb +9 -0
  42. data/test/arithmetic/arithmetic_test_methods.rb +9 -0
  43. data/test/arithmetic/arithmetic_tokenizer.rb +39 -0
  44. data/test/arithmetic/arithmetic_tokenizer_test.rb +38 -0
  45. data/test/arithmetic_precedence/arithmetic_precedence_evaluator.rb +43 -0
  46. data/test/arithmetic_precedence/arithmetic_precedence_grammar.rb +24 -0
  47. data/test/arithmetic_precedence/arithmetic_precedence_grammar_test.rb +30 -0
  48. data/test/arithmetic_precedence/arithmetic_precedence_lexer_specification.rb +23 -0
  49. data/test/arithmetic_precedence/arithmetic_precedence_parser_test.rb +33 -0
  50. data/test/brackets/bracket_grammar.rb +23 -0
  51. data/test/brackets/bracket_tokenizer.rb +22 -0
  52. data/test/brackets/brackets_test.rb +28 -0
  53. data/test/chittagong/chittagong_driver.rb +46 -0
  54. data/test/chittagong/chittagong_driver_test.rb +276 -0
  55. data/test/chittagong/chittagong_evaluator.rb +284 -0
  56. data/test/chittagong/chittagong_evaluator_test.rb +38 -0
  57. data/test/chittagong/chittagong_grammar.rb +104 -0
  58. data/test/chittagong/chittagong_lexer.rb +109 -0
  59. data/test/chittagong/chittagong_lexer_specification.rb +37 -0
  60. data/test/chittagong/chittagong_lexer_test.rb +58 -0
  61. data/test/chittagong/chittagong_parser.rb +879 -0
  62. data/test/chittagong/chittagong_parser_test.rb +55 -0
  63. data/test/chittagong/chittagong_test.rb +170 -0
  64. data/test/core/another_lalr_but_not_slr_grammar.rb +20 -0
  65. data/test/core/compiled_parser_test.rb +44 -0
  66. data/test/core/dfa_test.rb +170 -0
  67. data/test/core/evaluator_test.rb +22 -0
  68. data/test/core/grammar_test.rb +83 -0
  69. data/test/core/lalr_but_not_slr_grammar.rb +19 -0
  70. data/test/core/lexer_test.rb +139 -0
  71. data/test/core/malformed_grammar.rb +7 -0
  72. data/test/core/malformed_grammar_test.rb +8 -0
  73. data/test/core/nullable_grammar.rb +21 -0
  74. data/test/core/parse_result_test.rb +44 -0
  75. data/test/core/parser_state_test.rb +24 -0
  76. data/test/core/parser_test.rb +131 -0
  77. data/test/core/precedence_grammar.rb +17 -0
  78. data/test/core/precedence_grammar_test.rb +9 -0
  79. data/test/core/rr_conflict_grammar.rb +21 -0
  80. data/test/core/simple_grammar.rb +22 -0
  81. data/test/core/sr_conflict_grammar.rb +16 -0
  82. data/test/dhaka_test_helper.rb +18 -0
  83. data/test/fake_logger.rb +17 -0
  84. metadata +137 -0
@@ -0,0 +1,55 @@
1
+ require File.dirname(__FILE__) + '/../dhaka_test_helper'
2
+ require File.dirname(__FILE__) + '/chittagong_grammar'
3
+ begin
4
+ require File.dirname(__FILE__) + "/chittagong_parser"
5
+ rescue LoadError
6
+ puts "Please run the rake command in the root folder to generate the lexer and parser required for this test."
7
+ exit
8
+ end
9
+
10
+ class TestChittagongParser < Test::Unit::TestCase
11
+
12
+ def test_parses_a_series_of_statements
13
+ token_stream = build_tokens(
14
+ 'newline',
15
+ 'word_literal', '=', 'numeric_literal', 'newline',
16
+ 'print', 'word_literal', 'newline',
17
+ 'newline',
18
+ 'word_literal', '=', 'word_literal', 'newline',
19
+ 'newline', Dhaka::END_SYMBOL_NAME
20
+ )
21
+
22
+ result = ChittagongParser.parse(token_stream)
23
+
24
+ assert_equal(["single_term",
25
+ "some_terms",
26
+ "variable_name",
27
+ "literal",
28
+ "assignment_statement",
29
+ "main_body_simple_statement",
30
+ "single_main_body_statement",
31
+ "single_term",
32
+ "variable_name",
33
+ "variable_reference",
34
+ "print_statement",
35
+ "main_body_simple_statement",
36
+ "multiple_main_body_statements",
37
+ "single_term",
38
+ "multiple_terms",
39
+ "variable_name",
40
+ "variable_name",
41
+ "variable_reference",
42
+ "assignment_statement",
43
+ "main_body_simple_statement",
44
+ "multiple_main_body_statements",
45
+ "single_term",
46
+ "multiple_terms",
47
+ "some_terms",
48
+ "program"], result.linearize.collect {|node| node.production.name})
49
+
50
+ end
51
+
52
+ def build_tokens *symbol_names
53
+ symbol_names.collect {|symbol_name| Dhaka::Token.new(symbol_name, nil, nil)}
54
+ end
55
+ end
@@ -0,0 +1,170 @@
1
+ require File.dirname(__FILE__) + '/../dhaka_test_helper'
2
+ require File.dirname(__FILE__) + "/chittagong_grammar"
3
+ require File.dirname(__FILE__) + "/chittagong_lexer_specification"
4
+ begin
5
+ require File.dirname(__FILE__) + "/chittagong_parser"
6
+ require File.dirname(__FILE__) + "/chittagong_lexer"
7
+ rescue LoadError
8
+ puts "Please run the rake command in the root folder to generate the lexer and parser required for this test."
9
+ exit
10
+ end
11
+ require File.dirname(__FILE__) + "/chittagong_evaluator"
12
+
13
+ class TestChittagong < Test::Unit::TestCase
14
+
15
+ def fact(n)
16
+ return 1 if n==1
17
+ n * fact(n-1)
18
+ end
19
+
20
+ def program_output program
21
+ output_stream = []
22
+ parse_result = ChittagongParser.parse(ChittagongLexer.lex(program))
23
+ result = ChittagongEvaluator.new([{}], output_stream).evaluate(parse_result)
24
+ return result, output_stream
25
+ end
26
+
27
+ def test_iterative_fibonacci_without_functions
28
+ program = "
29
+
30
+ n = 1
31
+ a = 0
32
+ b = 1
33
+ while n < 10
34
+ print b
35
+ c = a
36
+ a = b
37
+ b = c + b
38
+ n = n + 1
39
+ end
40
+
41
+ "
42
+
43
+ result, output_stream = program_output(program)
44
+ assert_equal(["1.0", "1.0", "2.0", "3.0", "5.0", "8.0", "13.0", "21.0", "34.0"], output_stream)
45
+ end
46
+
47
+ def test_iterative_fibonacci_with_functions
48
+ program = "
49
+
50
+ def fib(n)
51
+ i = 0
52
+ a = 0
53
+ b = 1
54
+ while i < n
55
+ c = a
56
+ a = b
57
+ b = c + b
58
+ i = i + 1
59
+ end
60
+ return b
61
+ end
62
+
63
+ x = 0
64
+ while x < 9
65
+ print fib(x)
66
+ x = x + 1
67
+ end
68
+
69
+ "
70
+ result, output_stream = program_output(program)
71
+ assert_equal(["1.0", "1.0", "2.0", "3.0", "5.0", "8.0", "13.0", "21.0", "34.0"], output_stream)
72
+ end
73
+
74
+ def test_recursive_factorial
75
+ program = "
76
+ def fact(n)
77
+ if n == 1
78
+ return 1
79
+ end
80
+ return n * fact(n-1)
81
+ end
82
+
83
+ n = 1
84
+ while n < 11
85
+ print fact(n)
86
+ n = n+1
87
+ end"
88
+
89
+ result, output_stream = program_output(program)
90
+ assert_equal((1..10).collect {|i| fact(i).to_f.to_s}, output_stream)
91
+ end
92
+
93
+ def test_various_things
94
+ program = "
95
+
96
+ a = 1
97
+ b = 2
98
+ c = 3
99
+
100
+ def foo(a, b, c)
101
+ print a
102
+ print b
103
+ print c
104
+ return c
105
+ print 999
106
+ end
107
+
108
+ foo(4, a, 6)
109
+
110
+ "
111
+
112
+ result, output_stream = program_output(program)
113
+ assert_equal(["4.0", "1.0", "6.0"], output_stream)
114
+ end
115
+
116
+ def test_if_else_block
117
+ program = "
118
+
119
+ def foo(a, b)
120
+ if a < b
121
+ print 1
122
+ else
123
+ print 2
124
+ end
125
+ end
126
+
127
+ foo(1, 2)
128
+ foo(2, 1)
129
+
130
+ "
131
+
132
+ result, output_stream = program_output(program)
133
+ assert_equal(["1.0", "2.0"], output_stream)
134
+ end
135
+
136
+ def test_no_arg_functions
137
+ program = "
138
+
139
+ def foo()
140
+ print 1
141
+ print 2
142
+ end
143
+ foo()
144
+
145
+ "
146
+
147
+ result, output_stream = program_output(program)
148
+ assert_equal(["1.0", "2.0"], output_stream)
149
+ end
150
+
151
+ def test_decimal_numbers
152
+ program = "
153
+ print 0.2347 * 23.34
154
+ a = 1.012
155
+ b = 345.44
156
+ c = 0.234
157
+ print (a^b)/c
158
+ def foo(a)
159
+ print a
160
+ end
161
+ foo(3.4)
162
+ "
163
+
164
+ result, output_stream = program_output(program)
165
+ assert_equal(["5.477898", "263.233029427781", "3.4"], output_stream)
166
+ end
167
+
168
+
169
+
170
+ end
@@ -0,0 +1,20 @@
1
+ require File.dirname(__FILE__)+'/../dhaka_test_helper'
2
+
3
+ class AnotherLALRButNotSLRGrammar < Dhaka::Grammar
4
+
5
+ for_symbol(Dhaka::START_SYMBOL_NAME) do
6
+ assignment %w| L = R |
7
+ rhs %w| R |
8
+ end
9
+
10
+ for_symbol('L') do
11
+ contents %w| * R |
12
+ identifier %w| id |
13
+ end
14
+
15
+ for_symbol('R') do
16
+ l_value %w| L |
17
+ end
18
+
19
+ end
20
+
@@ -0,0 +1,44 @@
1
+ require File.dirname(__FILE__) + '/../dhaka_test_helper'
2
+ require File.dirname(__FILE__) + "/simple_grammar"
3
+ eval(Dhaka::Parser.new(SimpleGrammar).compile_to_ruby_source_as(:SimpleParser))
4
+
5
+ class TestCompiledParser < Test::Unit::TestCase
6
+ def test_compiled_parser_generates_parse_tree_for_simple_grammar
7
+ parse_tree = SimpleParser.parse(build_tokens(%w| ( n - ( n - n ) ) - n # | +[Dhaka::END_SYMBOL_NAME]))
8
+ assert_equal \
9
+ ["literal",
10
+ "term",
11
+ "literal",
12
+ "term",
13
+ "literal",
14
+ "subtraction",
15
+ "parenthetized_expression",
16
+ "subtraction",
17
+ "parenthetized_expression",
18
+ "term",
19
+ "literal",
20
+ "subtraction",
21
+ "expression",
22
+ "start"], parse_tree.linearize.collect {|node| node.production.name}
23
+ end
24
+
25
+ def test_parse_result_has_error_result_if_only_end_token_passed
26
+ parse_result = SimpleParser.parse(build_tokens([Dhaka::END_SYMBOL_NAME]))
27
+ assert parse_result.has_error?
28
+ end
29
+
30
+ def test_parse_result_is_nil_if_no_end_token
31
+ parse_result = SimpleParser.parse(build_tokens(%w| n - n |))
32
+ assert_nil(parse_result)
33
+ end
34
+
35
+ def test_parser_returns_error_result_with_index_of_bad_token_if_parse_error
36
+ parse_result = SimpleParser.parse(build_tokens(['(', '-', ')', Dhaka::END_SYMBOL_NAME]))
37
+ assert parse_result.has_error?
38
+ assert_equal '-', parse_result.unexpected_token.symbol_name
39
+ end
40
+
41
+ def build_tokens(token_symbol_names)
42
+ token_symbol_names.collect {|symbol_name| Dhaka::Token.new(symbol_name, nil, nil)}
43
+ end
44
+ end
@@ -0,0 +1,170 @@
1
+ require File.dirname(__FILE__) + '/../dhaka_test_helper'
2
+
3
+ class TestDFA < Test::Unit::TestCase
4
+ def test_build_AST_from_parse_tree_and_compute_follow_first_and_last
5
+ root = Dhaka::LexerSupport::RegexParser.parse(Dhaka::LexerSupport::RegexTokenizer.tokenize("(a|b)*abb"))
6
+ star_node = root.left.left.left.left
7
+ or_node = star_node.child
8
+ first_a = or_node.children[0]
9
+ first_b = or_node.children[1]
10
+ second_a = root.left.left.left.right
11
+ second_b = root.left.left.right
12
+ last_b = root.left.right
13
+ sentinel = root.right
14
+
15
+ assert(!root.nullable)
16
+ assert(!root.left.nullable)
17
+ assert(!root.left.left.nullable)
18
+ assert(star_node.nullable)
19
+
20
+ assert_equal(Set.new([first_a, first_b, second_a]), root.first)
21
+ assert_equal(Set.new([last_b]), root.left.last)
22
+
23
+ root.calculate_follow_sets
24
+
25
+ assert_equal(Set.new([first_a, first_b, second_a]), first_a.follow_set)
26
+ assert_equal(Set.new([first_a, first_b, second_a]), first_b.follow_set)
27
+ assert_equal(Set.new([second_b]), second_a.follow_set)
28
+ assert_equal(Set.new([last_b]), second_b.follow_set)
29
+ assert_equal(Set.new([sentinel]), last_b.follow_set)
30
+ end
31
+
32
+ def test_DFA_raises_exception_if_empty_regex
33
+ machine = Dhaka::LexerSupport::DFA.new("")
34
+ flunk "Should have thrown an unexpected end of regex exception"
35
+ rescue Dhaka::LexerSupport::InvalidRegexException => e
36
+ assert_equal("Unexpected end of regex.", e.message)
37
+ end
38
+
39
+ def test_DFA_raises_exception_if_error_parsing_regex
40
+ machine = Dhaka::LexerSupport::DFA.new("(a|b)*+abb")
41
+ flunk "Should have thrown an unexpected token exception"
42
+ rescue Dhaka::LexerSupport::InvalidRegexException => e
43
+ assert_equal("Unexpected token +: (a|b)*>>>+abb", e.message)
44
+ end
45
+
46
+ def test_match_a_regex
47
+ machine = Dhaka::LexerSupport::DFA.new("(a|b)*abb")
48
+ assert_full_match(machine, "abababb")
49
+ assert_full_match(machine, "ababaabb")
50
+ assert_empty(machine.match("abababab"))
51
+ assert_equal("abababb", machine.match("abababbc"))
52
+ assert_equal("abababb", machine.match("abababbaa"))
53
+ end
54
+
55
+ def test_match_a_regex_with_optional_characters_at_the_end
56
+ machine = Dhaka::LexerSupport::DFA.new("bad(c|d)+(ab)*")
57
+ assert_full_match(machine, "badccddabab")
58
+ assert_full_match(machine, "baddcc")
59
+ assert_empty(machine.match("badab"))
60
+ assert_empty(machine.match("bacdab"))
61
+ end
62
+
63
+ def test_match_a_nullable_regex
64
+ machine = Dhaka::LexerSupport::DFA.new("(ab)*")
65
+ assert_full_match(machine, "abab")
66
+ assert_full_match(machine, "ab")
67
+ assert_full_match(machine, "")
68
+ assert_equal("", machine.match("b"))
69
+ end
70
+
71
+ def test_match_a_regex_with_the_dot_character
72
+ machine = Dhaka::LexerSupport::DFA.new("ab.*cd")
73
+ assert_full_match(machine, "abacd")
74
+ assert_full_match(machine, "abcd")
75
+ assert_full_match(machine, "abAcd")
76
+ assert_empty(machine.match("ab999c"))
77
+ end
78
+
79
+ def test_match_a_regex_with_sets
80
+ machine = Dhaka::LexerSupport::DFA.new("ab[j-lu]*cd")
81
+ assert_empty(machine.match("abacd"))
82
+ assert_full_match(machine, "abcd")
83
+ assert_full_match(machine, "abjklucd")
84
+ assert_empty(machine.match("abijklucd"))
85
+ assert_empty(machine.match("ab999c"))
86
+ end
87
+
88
+ def test_match_a_regex_with_negative_sets
89
+ machine = Dhaka::LexerSupport::DFA.new("ab[^j-lr]*cd")
90
+ assert_full_match(machine, "abcd")
91
+ assert_empty(machine.match("abjcd"))
92
+ assert_empty(machine.match("abrcd"))
93
+ assert_empty(machine.match("abijklucd"))
94
+ assert_full_match(machine, "abyqcd")
95
+ end
96
+
97
+ def test_match_a_regex_with_sets_containing_escaped_characters
98
+ machine = Dhaka::LexerSupport::DFA.new("ab[\\^\\-.]*cd")
99
+ assert_full_match(machine, "abcd")
100
+ assert_empty(machine.match("abjcd"))
101
+ assert_full_match(machine, "ab^-.cd")
102
+ assert_empty(machine.match("abijklucd"))
103
+ assert_empty(machine.match("ab\\cd"))
104
+ end
105
+
106
+ def test_match_a_regex_using_unescaped_caret_and_dash_characters
107
+ machine = Dhaka::LexerSupport::DFA.new("(\\^-)+")
108
+ assert_full_match(machine, "^-")
109
+ assert_full_match(machine, "^-^-")
110
+ assert_empty(machine.match("?cd"))
111
+ end
112
+
113
+ def test_match_a_regex_using_escape_characters
114
+ machine = Dhaka::LexerSupport::DFA.new(%q/(-\?\(\)\\\\)*/)
115
+ assert_full_match(machine, "-?()\\")
116
+ end
117
+
118
+ def test_match_a_regex_using_lt_and_gt
119
+ machine = Dhaka::LexerSupport::DFA.new('<.+>')
120
+ assert_full_match(machine, "<ab>")
121
+ assert_full_match(machine, "<absdf><sdg><sse>")
122
+ assert_empty(machine.match("ab>"))
123
+ end
124
+
125
+ def test_simulating_curly_brace_quantifiers
126
+ machine = Dhaka::LexerSupport::DFA.new('aaa?a?a?')
127
+ assert_full_match(machine, "aa")
128
+ assert_full_match(machine, "aaa")
129
+ assert_full_match(machine, "aaaa")
130
+ assert_full_match(machine, "aaaaa")
131
+ assert_equal("aaaaa", machine.match("aaaaaa"))
132
+ assert_empty(machine.match("a"))
133
+ end
134
+
135
+ def test_matching_a_regex_with_lookahead
136
+ machine = Dhaka::LexerSupport::DFA.new('ab/cd')
137
+ assert_equal("ab", machine.match("abcd"))
138
+ assert_empty(machine.match("ab"))
139
+ assert_empty(machine.match("abef"))
140
+ end
141
+
142
+ def test_matching_a_regex_with_nullable_pre_lookahead_regex
143
+ machine = Dhaka::LexerSupport::DFA.new('(ab)*/cd')
144
+ assert_equal("ab", machine.match("abcd"))
145
+ assert_equal("ababab", machine.match("abababcd"))
146
+ assert_empty(machine.match("ababc"))
147
+ assert_empty(machine.match("abef"))
148
+ end
149
+
150
+ def test_matching_a_regex_with_post_lookahead_characters_in_common_with_pre_lookahead_characters
151
+ machine = Dhaka::LexerSupport::DFA.new('(ab)+/abcd')
152
+ assert_equal("ababab", machine.match("abababcd"))
153
+ assert_empty(machine.match("ab"))
154
+ assert_empty(machine.match("abef"))
155
+ end
156
+
157
+ def test_machine_with_nullable_lookahead
158
+ machine = Dhaka::LexerSupport::DFA.new(":/[aA\n\r\t]*")
159
+ assert_equal(":", machine.match(":"))
160
+ end
161
+
162
+ private
163
+ def assert_full_match(machine, input)
164
+ assert_equal(input, machine.match(input))
165
+ end
166
+
167
+ def assert_empty(input)
168
+ assert(input.empty?)
169
+ end
170
+ end