dhaka 2.0.1 → 2.1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (48) hide show
  1. data/Rakefile +64 -0
  2. data/lib/dhaka.rb +12 -0
  3. data/lib/dot/dot.rb +29 -0
  4. data/lib/evaluator/evaluator.rb +35 -26
  5. data/lib/grammar/grammar.rb +42 -17
  6. data/lib/grammar/grammar_symbol.rb +4 -3
  7. data/lib/grammar/production.rb +9 -3
  8. data/lib/lexer/compiled_lexer.rb +46 -0
  9. data/lib/lexer/dfa.rb +71 -0
  10. data/lib/lexer/lexeme.rb +33 -0
  11. data/lib/lexer/lexer.rb +61 -0
  12. data/lib/lexer/lexer_run.rb +66 -0
  13. data/lib/lexer/regex_grammar.rb +368 -0
  14. data/lib/lexer/regex_parser.rb +1888 -0
  15. data/lib/lexer/regex_tokenizer.rb +14 -0
  16. data/lib/lexer/specification.rb +69 -0
  17. data/lib/lexer/state.rb +45 -0
  18. data/lib/lexer/state_machine.rb +37 -0
  19. data/lib/parser/action.rb +3 -3
  20. data/lib/parser/compiled_parser.rb +11 -3
  21. data/lib/parser/parse_result.rb +3 -5
  22. data/lib/parser/parse_tree.rb +6 -17
  23. data/lib/parser/parser.rb +15 -14
  24. data/lib/parser/parser_run.rb +4 -2
  25. data/lib/parser/parser_state.rb +16 -8
  26. data/lib/tokenizer/tokenizer.rb +5 -3
  27. data/test/arithmetic_precedence/arithmetic_precedence_lexer_specification.rb +23 -0
  28. data/test/arithmetic_precedence/arithmetic_precedence_parser_test.rb +4 -2
  29. data/test/chittagong/chittagong_driver.rb +12 -13
  30. data/test/chittagong/chittagong_driver_test.rb +18 -11
  31. data/test/chittagong/chittagong_evaluator.rb +7 -16
  32. data/test/chittagong/chittagong_evaluator_test.rb +7 -4
  33. data/test/chittagong/chittagong_grammar.rb +0 -6
  34. data/test/chittagong/chittagong_lexer.rb +109 -0
  35. data/test/chittagong/chittagong_lexer_specification.rb +39 -0
  36. data/test/chittagong/{chittagong_tokenizer_test.rb → chittagong_lexer_test.rb} +12 -6
  37. data/test/chittagong/chittagong_parser.rb +879 -0
  38. data/test/chittagong/chittagong_parser_test.rb +8 -10
  39. data/test/chittagong/chittagong_test.rb +17 -13
  40. data/test/compiled_parser_test.rb +7 -2
  41. data/test/evaluator_test.rb +0 -1
  42. data/test/grammar_test.rb +19 -1
  43. data/test/lexer_test.rb +215 -0
  44. data/test/parse_result_test.rb +8 -8
  45. data/test/parser_state_test.rb +0 -12
  46. metadata +21 -5
  47. data/test/arithmetic_precedence/arithmetic_precedence_tokenizer.rb +0 -39
  48. data/test/chittagong/chittagong_tokenizer.rb +0 -88
@@ -1,31 +1,29 @@
1
1
  require File.dirname(__FILE__) + '/../dhaka_test_helper'
2
2
  require File.dirname(__FILE__) + '/chittagong_grammar'
3
+ begin
4
+ require File.dirname(__FILE__) + "/chittagong_parser"
5
+ rescue LoadError
6
+ puts "Please run the rake command in the root folder to generate the lexer and parser required for this test."
7
+ exit
8
+ end
3
9
 
4
10
  class TestChittagongParser < Test::Unit::TestCase
5
- def setup
6
- fake_logger = FakeLogger.new
7
- @parser = Dhaka::Parser.new(ChittagongGrammar, fake_logger)
8
- assert_equal(80, fake_logger.warnings.size)
9
- assert_equal(0, fake_logger.errors.size)
10
- eval(@parser.compile_to_ruby_source_as(:ChittagongParser)) unless defined? ChittagongParser
11
- end
12
11
 
13
12
  def test_parses_a_series_of_statements
14
13
  token_stream = build_tokens(
15
14
  'newline',
16
- 'word_literal', '=', 'int_literal', 'newline',
15
+ 'word_literal', '=', 'numeric_literal', 'newline',
17
16
  'print', 'word_literal', 'newline',
18
17
  'newline',
19
18
  'word_literal', '=', 'word_literal', 'newline',
20
19
  'newline', Dhaka::END_SYMBOL_NAME
21
20
  )
22
21
 
23
- result = @parser.parse(token_stream)
22
+ result = ChittagongParser.parse(token_stream)
24
23
 
25
24
  assert_equal(["single_term",
26
25
  "some_terms",
27
26
  "variable_name",
28
- "integer",
29
27
  "literal",
30
28
  "assignment_statement",
31
29
  "main_body_simple_statement",
@@ -1,10 +1,14 @@
1
1
  require File.dirname(__FILE__) + '/../dhaka_test_helper'
2
2
  require File.dirname(__FILE__) + "/chittagong_grammar"
3
- require File.dirname(__FILE__) + "/chittagong_tokenizer"
4
- require File.dirname(__FILE__) + "/chittagong_evaluator"
5
- unless defined? ChittagongParser
6
- eval(Dhaka::Parser.new(ChittagongGrammar, FakeLogger.new).compile_to_ruby_source_as(:ChittagongParser))
3
+ require File.dirname(__FILE__) + "/chittagong_lexer_specification"
4
+ begin
5
+ require File.dirname(__FILE__) + "/chittagong_parser"
6
+ require File.dirname(__FILE__) + "/chittagong_lexer"
7
+ rescue LoadError
8
+ puts "Please run the rake command in the root folder to generate the lexer and parser required for this test."
9
+ exit
7
10
  end
11
+ require File.dirname(__FILE__) + "/chittagong_evaluator"
8
12
 
9
13
  class TestChittagong < Test::Unit::TestCase
10
14
 
@@ -15,7 +19,7 @@ class TestChittagong < Test::Unit::TestCase
15
19
 
16
20
  def program_output program
17
21
  output_stream = []
18
- parse_result = ChittagongParser.parse(ChittagongTokenizer.tokenize(program))
22
+ parse_result = ChittagongParser.parse(ChittagongLexer.lex(program))
19
23
  result = ChittagongEvaluator.new([{}], output_stream).evaluate(parse_result)
20
24
  return result, output_stream
21
25
  end
@@ -37,7 +41,7 @@ class TestChittagong < Test::Unit::TestCase
37
41
  "
38
42
 
39
43
  result, output_stream = program_output(program)
40
- assert_equal(["1", "1", "2", "3", "5", "8", "13", "21", "34"], output_stream)
44
+ assert_equal(["1.0", "1.0", "2.0", "3.0", "5.0", "8.0", "13.0", "21.0", "34.0"], output_stream)
41
45
  end
42
46
 
43
47
  def test_iterative_fibonacci_with_functions
@@ -64,7 +68,7 @@ class TestChittagong < Test::Unit::TestCase
64
68
 
65
69
  "
66
70
  result, output_stream = program_output(program)
67
- assert_equal(["1", "1", "2", "3", "5", "8", "13", "21", "34"], output_stream)
71
+ assert_equal(["1.0", "1.0", "2.0", "3.0", "5.0", "8.0", "13.0", "21.0", "34.0"], output_stream)
68
72
  end
69
73
 
70
74
  def test_recursive_factorial
@@ -83,7 +87,7 @@ class TestChittagong < Test::Unit::TestCase
83
87
  end"
84
88
 
85
89
  result, output_stream = program_output(program)
86
- assert_equal((1..10).collect {|i| fact(i).to_s}, output_stream)
90
+ assert_equal((1..10).collect {|i| fact(i).to_f.to_s}, output_stream)
87
91
  end
88
92
 
89
93
  def test_various_things
@@ -106,7 +110,7 @@ class TestChittagong < Test::Unit::TestCase
106
110
  "
107
111
 
108
112
  result, output_stream = program_output(program)
109
- assert_equal(["4", "1", "6"], output_stream)
113
+ assert_equal(["4.0", "1.0", "6.0"], output_stream)
110
114
  end
111
115
 
112
116
  def test_if_else_block
@@ -126,7 +130,7 @@ class TestChittagong < Test::Unit::TestCase
126
130
  "
127
131
 
128
132
  result, output_stream = program_output(program)
129
- assert_equal(["1", "2"], output_stream)
133
+ assert_equal(["1.0", "2.0"], output_stream)
130
134
  end
131
135
 
132
136
  def test_no_arg_functions
@@ -141,15 +145,15 @@ class TestChittagong < Test::Unit::TestCase
141
145
  "
142
146
 
143
147
  result, output_stream = program_output(program)
144
- assert_equal(["1", "2"], output_stream)
148
+ assert_equal(["1.0", "2.0"], output_stream)
145
149
  end
146
150
 
147
151
  def test_decimal_numbers
148
152
  program = "
149
- print .2347 * 23.34
153
+ print 0.2347 * 23.34
150
154
  a = 1.012
151
155
  b = 345.44
152
- c = .234
156
+ c = 0.234
153
157
  print (a^b)/c
154
158
  def foo(a)
155
159
  print a
@@ -4,7 +4,7 @@ eval(Dhaka::Parser.new(SimpleGrammar).compile_to_ruby_source_as(:SimpleParser))
4
4
 
5
5
  class TestCompiledParser < Test::Unit::TestCase
6
6
  def test_compiled_parser_generates_parse_tree_for_simple_grammar
7
- parse_tree = SimpleParser.parse(build_tokens(['(','n','-','(','n','-','n',')',')','-','n','#',Dhaka::END_SYMBOL_NAME]))
7
+ parse_tree = SimpleParser.parse(build_tokens(%w| ( n - ( n - n ) ) - n # | +[Dhaka::END_SYMBOL_NAME]))
8
8
  assert_equal \
9
9
  ["literal",
10
10
  "term",
@@ -26,9 +26,14 @@ class TestCompiledParser < Test::Unit::TestCase
26
26
  parse_result = SimpleParser.parse(build_tokens([Dhaka::END_SYMBOL_NAME]))
27
27
  assert parse_result.has_error?
28
28
  end
29
+
30
+ def test_parse_result_is_nil_if_no_end_token
31
+ parse_result = SimpleParser.parse(build_tokens(%w| n - n |))
32
+ assert_nil(parse_result)
33
+ end
29
34
 
30
35
  def test_parser_returns_error_result_with_index_of_bad_token_if_parse_error
31
- parse_result = SimpleParser.parse(build_tokens(['(', '-', ')',Dhaka::END_SYMBOL_NAME]))
36
+ parse_result = SimpleParser.parse(build_tokens(['(', '-', ')', Dhaka::END_SYMBOL_NAME]))
32
37
  assert parse_result.has_error?
33
38
  assert_equal '-', parse_result.unexpected_token.symbol_name
34
39
  end
@@ -18,6 +18,5 @@ class TestEvaluator < Test::Unit::TestCase
18
18
  end
19
19
  end")
20
20
  end
21
-
22
21
  end
23
22
  end
@@ -6,7 +6,7 @@ class SimpleGrammarTest < Test::Unit::TestCase
6
6
  @grammar = SimpleGrammar
7
7
  end
8
8
 
9
- def test_loads_symbol_and_classifies_them
9
+ def test_loads_symbols_and_classifies_them
10
10
  expected_non_terminals = Set.new(['E', 'S', 'T', Dhaka::START_SYMBOL_NAME])
11
11
  expected_terminals = Set.new(['-', 'n', '(', ')', '#', Dhaka::END_SYMBOL_NAME])
12
12
  assert_equal(expected_non_terminals, Set.new(@grammar.non_terminal_symbols.collect {|symbol| symbol.name}))
@@ -62,4 +62,22 @@ class SimpleGrammarTest < Test::Unit::TestCase
62
62
  assert_equal(expected_items, Set.new(closure.values.collect{|item| item.to_s}))
63
63
  assert_equal(expected_channels, Set.new(channels.collect{|item| item.to_s}))
64
64
  end
65
+
66
+ def test_export_grammar_to_bnf
67
+ assert_equal(
68
+ '
69
+ "_Start_" :
70
+ | "S" "#"
71
+
72
+ "S" :
73
+ | "E"
74
+
75
+ "E" :
76
+ | "E" "-" "T"
77
+ | "T"
78
+
79
+ "T" :
80
+ | "n"
81
+ | "(" "E" ")"', @grammar.to_bnf)
82
+ end
65
83
  end
@@ -0,0 +1,215 @@
1
+ require File.dirname(__FILE__) + '/dhaka_test_helper'
2
+
3
+ class TestLexer < Test::Unit::TestCase
4
+ def test_build_AST_from_parse_tree_and_compute_follow_first_and_last
5
+ root = Dhaka::LexerSupport::RegexParser.parse(Dhaka::LexerSupport::RegexTokenizer.tokenize("(a|b)*abb"))
6
+ star_node = root.left.left.left.left
7
+ or_node = star_node.child
8
+ first_a = or_node.children[0]
9
+ first_b = or_node.children[1]
10
+ second_a = root.left.left.left.right
11
+ second_b = root.left.left.right
12
+ last_b = root.left.right
13
+ sentinel = root.right
14
+
15
+ assert(!root.nullable)
16
+ assert(!root.left.nullable)
17
+ assert(!root.left.left.nullable)
18
+ assert(star_node.nullable)
19
+
20
+ assert_equal(Set.new([first_a, first_b, second_a]), root.first)
21
+ assert_equal(Set.new([last_b]), root.left.last)
22
+
23
+ root.calculate_follow_sets
24
+
25
+ assert_equal(Set.new([first_a, first_b, second_a]), first_a.follow_set)
26
+ assert_equal(Set.new([first_a, first_b, second_a]), first_b.follow_set)
27
+ assert_equal(Set.new([second_b]), second_a.follow_set)
28
+ assert_equal(Set.new([last_b]), second_b.follow_set)
29
+ assert_equal(Set.new([sentinel]), last_b.follow_set)
30
+ end
31
+
32
+ def test_DFA_raises_exception_if_empty_regex
33
+ machine = Dhaka::LexerSupport::DFA.new("")
34
+ flunk "Should have thrown an unexpected end of regex exception"
35
+ rescue Dhaka::LexerSupport::InvalidRegexException => e
36
+ assert_equal("Unexpected end of regex.", e.message)
37
+ end
38
+
39
+ def test_DFA_raises_exception_if_error_parsing_regex
40
+ machine = Dhaka::LexerSupport::DFA.new("(a|b)*+abb")
41
+ flunk "Should have thrown an unexpected token exception"
42
+ rescue Dhaka::LexerSupport::InvalidRegexException => e
43
+ assert_equal("Unexpected token +: (a|b)*>>>+abb", e.message)
44
+ end
45
+
46
+ def test_match_a_regex
47
+ machine = Dhaka::LexerSupport::DFA.new("(a|b)*abb")
48
+ assert(machine.matches("abababb"))
49
+ assert(machine.matches("ababaabb"))
50
+ assert(!machine.matches("abababab"))
51
+ assert(!machine.matches("abababbc"))
52
+ assert(!machine.matches("abababbaa"))
53
+ end
54
+
55
+ def test_match_a_regex_with_optional_characters_at_the_end
56
+ machine = Dhaka::LexerSupport::DFA.new("bad(c|d)+(ab)*")
57
+ assert(machine.matches("badccddabab"))
58
+ assert(machine.matches("baddcc"))
59
+ assert(!machine.matches("badab"))
60
+ assert(!machine.matches("bacdab"))
61
+ end
62
+
63
+ def test_match_a_nullable_regex
64
+ machine = Dhaka::LexerSupport::DFA.new("(ab)*")
65
+ assert(machine.matches("abab"))
66
+ assert(machine.matches("ab"))
67
+ assert(machine.matches(""))
68
+ assert(!machine.matches("b"))
69
+ end
70
+
71
+ def test_match_a_regex_with_the_dot_character
72
+ machine = Dhaka::LexerSupport::DFA.new("ab.*cd")
73
+ assert(machine.matches("abacd"))
74
+ assert(machine.matches("abcd"))
75
+ assert(machine.matches("abAcd"))
76
+ assert(!machine.matches("ab999c"))
77
+ end
78
+
79
+ def test_match_a_regex_with_sets
80
+ machine = Dhaka::LexerSupport::DFA.new("ab[j-lu]*cd")
81
+ assert(!machine.matches("abacd"))
82
+ assert(machine.matches("abcd"))
83
+ assert(machine.matches("abjklucd"))
84
+ assert(!machine.matches("abijklucd"))
85
+ assert(!machine.matches("ab999c"))
86
+ end
87
+
88
+ def test_match_a_regex_with_negative_sets
89
+ machine = Dhaka::LexerSupport::DFA.new("ab[^j-lr]*cd")
90
+ assert(machine.matches("abcd"))
91
+ assert(!machine.matches("abjcd"))
92
+ assert(!machine.matches("abrcd"))
93
+ assert(!machine.matches("abijklucd"))
94
+ assert(machine.matches("abyqcd"))
95
+ end
96
+
97
+ def test_match_a_regex_with_sets_containing_escaped_characters
98
+ machine = Dhaka::LexerSupport::DFA.new("ab[\\^\\-.]*cd")
99
+ assert(machine.matches("abcd"))
100
+ assert(!machine.matches("abjcd"))
101
+ assert(machine.matches("ab^-.cd"))
102
+ assert(!machine.matches("abijklucd"))
103
+ assert(!machine.matches("ab\\cd"))
104
+ end
105
+
106
+ def test_match_a_regex_using_unescaped_caret_and_dash_characters
107
+ machine = Dhaka::LexerSupport::DFA.new("(\\^-)+")
108
+ assert(machine.matches("^-"))
109
+ assert(machine.matches("^-^-"))
110
+ assert(!machine.matches("?cd"))
111
+ end
112
+
113
+ def test_match_a_regex_using_escape_characters
114
+ machine = Dhaka::LexerSupport::DFA.new(%q/(-\?\(\)\\\\)*/)
115
+ assert(machine.matches("-?()\\"))
116
+ end
117
+
118
+ def test_match_a_regex_using_lt_and_gt
119
+ machine = Dhaka::LexerSupport::DFA.new('<.+>')
120
+ assert(machine.matches("<ab>"))
121
+ assert(machine.matches("<absdf><sdg><sse>"))
122
+ assert(!machine.matches("ab>"))
123
+ end
124
+
125
+ def test_simulating_curly_brace_quantifiers
126
+ machine = Dhaka::LexerSupport::DFA.new('aaa?a?a?')
127
+ assert(machine.matches("aa"))
128
+ assert(machine.matches("aaa"))
129
+ assert(machine.matches("aaaa"))
130
+ assert(machine.matches("aaaaa"))
131
+ assert(!machine.matches("aaaaaa"))
132
+ assert(!machine.matches("a"))
133
+ end
134
+
135
+ class LexerSpec < Dhaka::LexerSpecification
136
+
137
+ for_pattern 'zz' do
138
+ "recognized two zs"
139
+ end
140
+
141
+ for_pattern '\w(\w|\d)*' do
142
+ "recognized word token #{current_lexeme.value}"
143
+ end
144
+
145
+ for_pattern '(\d)*(\.\d+)?' do
146
+ "recognized number #{current_lexeme.value}"
147
+ end
148
+
149
+ for_pattern '<.*>' do
150
+ "recognized tag #{current_lexeme.value}"
151
+ end
152
+
153
+ for_pattern ' +' do
154
+ #ignores whitespace
155
+ end
156
+
157
+ for_pattern "\n+" do
158
+ "recognized newline"
159
+ end
160
+ end
161
+
162
+ def test_lexer_with_valid_input
163
+ lexer = Dhaka::Lexer.new(LexerSpec)
164
+ eval(lexer.compile_to_ruby_source_as(:SomeLexer))
165
+ input = "these are words a z zz caPITALIZED word
166
+ this is a float 12.00 an integer 134 a float without a leading digit .2335 another word1"
167
+ results = SomeLexer.lex(input).collect
168
+ assert_equal(
169
+ ["recognized word token these",
170
+ "recognized word token are",
171
+ "recognized word token words",
172
+ "recognized word token a",
173
+ "recognized word token z",
174
+ "recognized two zs",
175
+ "recognized word token caPITALIZED",
176
+ "recognized word token word",
177
+ "recognized newline",
178
+ "recognized word token this",
179
+ "recognized word token is",
180
+ "recognized word token a",
181
+ "recognized word token float",
182
+ "recognized number 12.00",
183
+ "recognized word token an",
184
+ "recognized word token integer",
185
+ "recognized number 134",
186
+ "recognized word token a",
187
+ "recognized word token float",
188
+ "recognized word token without",
189
+ "recognized word token a",
190
+ "recognized word token leading",
191
+ "recognized word token digit",
192
+ "recognized number .2335",
193
+ "recognized word token another",
194
+ "recognized word token word1"], results[0..-2])
195
+ end
196
+
197
+ def test_lexer_with_invalid_input
198
+ lexer = Dhaka::Lexer.new(LexerSpec)
199
+ result = lexer.lex("this will cause an error here 123.").each do |result|
200
+ end
201
+ assert(result.has_error?)
202
+ assert_equal(34, result.unexpected_char_index)
203
+ end
204
+
205
+ def test_lexer_with_greedy_character_consumption
206
+ lexer = Dhaka::Lexer.new(LexerSpec)
207
+ results = lexer.lex("<html></html>this is a word").collect
208
+ assert_equal(["recognized tag <html></html>",
209
+ "recognized word token this",
210
+ "recognized word token is",
211
+ "recognized word token a",
212
+ "recognized word token word"], results[0..-2])
213
+ end
214
+
215
+ end
@@ -7,13 +7,13 @@ class TestParseSuccessResult < Test::Unit::TestCase
7
7
  def composite_node(production, child_nodes, dot_name)
8
8
  node = ParseTreeCompositeNode.new(SimpleGrammar.production_named(production))
9
9
  node.child_nodes.concat child_nodes
10
- node.stubs(:dot_name).returns(dot_name)
10
+ node.stubs(:object_id).returns(dot_name)
11
11
  node
12
12
  end
13
13
 
14
14
  def leaf_node(token, value, dot_name)
15
15
  node = ParseTreeLeafNode.new(Token.new(token, value, nil))
16
- node.stubs(:dot_name).returns(dot_name)
16
+ node.stubs(:object_id).returns(dot_name)
17
17
  node
18
18
  end
19
19
 
@@ -25,17 +25,17 @@ class TestParseSuccessResult < Test::Unit::TestCase
25
25
  result = ParseSuccessResult.new(tree)
26
26
  assert_equal(
27
27
  %(digraph x {
28
- node [fontsize="10" shape=box size="5"]
28
+ node [fontsize="10" shape="box" size="5"]
29
29
  expression [label="subtraction E ::= E - T"]
30
- expression -> first_term
30
+ expression -> first_term
31
31
  first_term [label="literal T ::= n"]
32
- first_term -> literal_1
32
+ first_term -> literal_1
33
33
  literal_1 [label="n : 1"]
34
- expression -> subtraction_operator
34
+ expression -> subtraction_operator
35
35
  subtraction_operator [label="-"]
36
- expression -> second_term
36
+ expression -> second_term
37
37
  second_term [label="literal T ::= n"]
38
- second_term -> literal_2
38
+ second_term -> literal_2
39
39
  literal_2 [label="n : 2"]
40
40
  }),
41
41
  result.to_dot)