dhaka 2.0.1 → 2.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (48) hide show
  1. data/Rakefile +64 -0
  2. data/lib/dhaka.rb +12 -0
  3. data/lib/dot/dot.rb +29 -0
  4. data/lib/evaluator/evaluator.rb +35 -26
  5. data/lib/grammar/grammar.rb +42 -17
  6. data/lib/grammar/grammar_symbol.rb +4 -3
  7. data/lib/grammar/production.rb +9 -3
  8. data/lib/lexer/compiled_lexer.rb +46 -0
  9. data/lib/lexer/dfa.rb +71 -0
  10. data/lib/lexer/lexeme.rb +33 -0
  11. data/lib/lexer/lexer.rb +61 -0
  12. data/lib/lexer/lexer_run.rb +66 -0
  13. data/lib/lexer/regex_grammar.rb +368 -0
  14. data/lib/lexer/regex_parser.rb +1888 -0
  15. data/lib/lexer/regex_tokenizer.rb +14 -0
  16. data/lib/lexer/specification.rb +69 -0
  17. data/lib/lexer/state.rb +45 -0
  18. data/lib/lexer/state_machine.rb +37 -0
  19. data/lib/parser/action.rb +3 -3
  20. data/lib/parser/compiled_parser.rb +11 -3
  21. data/lib/parser/parse_result.rb +3 -5
  22. data/lib/parser/parse_tree.rb +6 -17
  23. data/lib/parser/parser.rb +15 -14
  24. data/lib/parser/parser_run.rb +4 -2
  25. data/lib/parser/parser_state.rb +16 -8
  26. data/lib/tokenizer/tokenizer.rb +5 -3
  27. data/test/arithmetic_precedence/arithmetic_precedence_lexer_specification.rb +23 -0
  28. data/test/arithmetic_precedence/arithmetic_precedence_parser_test.rb +4 -2
  29. data/test/chittagong/chittagong_driver.rb +12 -13
  30. data/test/chittagong/chittagong_driver_test.rb +18 -11
  31. data/test/chittagong/chittagong_evaluator.rb +7 -16
  32. data/test/chittagong/chittagong_evaluator_test.rb +7 -4
  33. data/test/chittagong/chittagong_grammar.rb +0 -6
  34. data/test/chittagong/chittagong_lexer.rb +109 -0
  35. data/test/chittagong/chittagong_lexer_specification.rb +39 -0
  36. data/test/chittagong/{chittagong_tokenizer_test.rb → chittagong_lexer_test.rb} +12 -6
  37. data/test/chittagong/chittagong_parser.rb +879 -0
  38. data/test/chittagong/chittagong_parser_test.rb +8 -10
  39. data/test/chittagong/chittagong_test.rb +17 -13
  40. data/test/compiled_parser_test.rb +7 -2
  41. data/test/evaluator_test.rb +0 -1
  42. data/test/grammar_test.rb +19 -1
  43. data/test/lexer_test.rb +215 -0
  44. data/test/parse_result_test.rb +8 -8
  45. data/test/parser_state_test.rb +0 -12
  46. metadata +21 -5
  47. data/test/arithmetic_precedence/arithmetic_precedence_tokenizer.rb +0 -39
  48. data/test/chittagong/chittagong_tokenizer.rb +0 -88
@@ -1,31 +1,29 @@
1
1
  require File.dirname(__FILE__) + '/../dhaka_test_helper'
2
2
  require File.dirname(__FILE__) + '/chittagong_grammar'
3
+ begin
4
+ require File.dirname(__FILE__) + "/chittagong_parser"
5
+ rescue LoadError
6
+ puts "Please run the rake command in the root folder to generate the lexer and parser required for this test."
7
+ exit
8
+ end
3
9
 
4
10
  class TestChittagongParser < Test::Unit::TestCase
5
- def setup
6
- fake_logger = FakeLogger.new
7
- @parser = Dhaka::Parser.new(ChittagongGrammar, fake_logger)
8
- assert_equal(80, fake_logger.warnings.size)
9
- assert_equal(0, fake_logger.errors.size)
10
- eval(@parser.compile_to_ruby_source_as(:ChittagongParser)) unless defined? ChittagongParser
11
- end
12
11
 
13
12
  def test_parses_a_series_of_statements
14
13
  token_stream = build_tokens(
15
14
  'newline',
16
- 'word_literal', '=', 'int_literal', 'newline',
15
+ 'word_literal', '=', 'numeric_literal', 'newline',
17
16
  'print', 'word_literal', 'newline',
18
17
  'newline',
19
18
  'word_literal', '=', 'word_literal', 'newline',
20
19
  'newline', Dhaka::END_SYMBOL_NAME
21
20
  )
22
21
 
23
- result = @parser.parse(token_stream)
22
+ result = ChittagongParser.parse(token_stream)
24
23
 
25
24
  assert_equal(["single_term",
26
25
  "some_terms",
27
26
  "variable_name",
28
- "integer",
29
27
  "literal",
30
28
  "assignment_statement",
31
29
  "main_body_simple_statement",
@@ -1,10 +1,14 @@
1
1
  require File.dirname(__FILE__) + '/../dhaka_test_helper'
2
2
  require File.dirname(__FILE__) + "/chittagong_grammar"
3
- require File.dirname(__FILE__) + "/chittagong_tokenizer"
4
- require File.dirname(__FILE__) + "/chittagong_evaluator"
5
- unless defined? ChittagongParser
6
- eval(Dhaka::Parser.new(ChittagongGrammar, FakeLogger.new).compile_to_ruby_source_as(:ChittagongParser))
3
+ require File.dirname(__FILE__) + "/chittagong_lexer_specification"
4
+ begin
5
+ require File.dirname(__FILE__) + "/chittagong_parser"
6
+ require File.dirname(__FILE__) + "/chittagong_lexer"
7
+ rescue LoadError
8
+ puts "Please run the rake command in the root folder to generate the lexer and parser required for this test."
9
+ exit
7
10
  end
11
+ require File.dirname(__FILE__) + "/chittagong_evaluator"
8
12
 
9
13
  class TestChittagong < Test::Unit::TestCase
10
14
 
@@ -15,7 +19,7 @@ class TestChittagong < Test::Unit::TestCase
15
19
 
16
20
  def program_output program
17
21
  output_stream = []
18
- parse_result = ChittagongParser.parse(ChittagongTokenizer.tokenize(program))
22
+ parse_result = ChittagongParser.parse(ChittagongLexer.lex(program))
19
23
  result = ChittagongEvaluator.new([{}], output_stream).evaluate(parse_result)
20
24
  return result, output_stream
21
25
  end
@@ -37,7 +41,7 @@ class TestChittagong < Test::Unit::TestCase
37
41
  "
38
42
 
39
43
  result, output_stream = program_output(program)
40
- assert_equal(["1", "1", "2", "3", "5", "8", "13", "21", "34"], output_stream)
44
+ assert_equal(["1.0", "1.0", "2.0", "3.0", "5.0", "8.0", "13.0", "21.0", "34.0"], output_stream)
41
45
  end
42
46
 
43
47
  def test_iterative_fibonacci_with_functions
@@ -64,7 +68,7 @@ class TestChittagong < Test::Unit::TestCase
64
68
 
65
69
  "
66
70
  result, output_stream = program_output(program)
67
- assert_equal(["1", "1", "2", "3", "5", "8", "13", "21", "34"], output_stream)
71
+ assert_equal(["1.0", "1.0", "2.0", "3.0", "5.0", "8.0", "13.0", "21.0", "34.0"], output_stream)
68
72
  end
69
73
 
70
74
  def test_recursive_factorial
@@ -83,7 +87,7 @@ class TestChittagong < Test::Unit::TestCase
83
87
  end"
84
88
 
85
89
  result, output_stream = program_output(program)
86
- assert_equal((1..10).collect {|i| fact(i).to_s}, output_stream)
90
+ assert_equal((1..10).collect {|i| fact(i).to_f.to_s}, output_stream)
87
91
  end
88
92
 
89
93
  def test_various_things
@@ -106,7 +110,7 @@ class TestChittagong < Test::Unit::TestCase
106
110
  "
107
111
 
108
112
  result, output_stream = program_output(program)
109
- assert_equal(["4", "1", "6"], output_stream)
113
+ assert_equal(["4.0", "1.0", "6.0"], output_stream)
110
114
  end
111
115
 
112
116
  def test_if_else_block
@@ -126,7 +130,7 @@ class TestChittagong < Test::Unit::TestCase
126
130
  "
127
131
 
128
132
  result, output_stream = program_output(program)
129
- assert_equal(["1", "2"], output_stream)
133
+ assert_equal(["1.0", "2.0"], output_stream)
130
134
  end
131
135
 
132
136
  def test_no_arg_functions
@@ -141,15 +145,15 @@ class TestChittagong < Test::Unit::TestCase
141
145
  "
142
146
 
143
147
  result, output_stream = program_output(program)
144
- assert_equal(["1", "2"], output_stream)
148
+ assert_equal(["1.0", "2.0"], output_stream)
145
149
  end
146
150
 
147
151
  def test_decimal_numbers
148
152
  program = "
149
- print .2347 * 23.34
153
+ print 0.2347 * 23.34
150
154
  a = 1.012
151
155
  b = 345.44
152
- c = .234
156
+ c = 0.234
153
157
  print (a^b)/c
154
158
  def foo(a)
155
159
  print a
@@ -4,7 +4,7 @@ eval(Dhaka::Parser.new(SimpleGrammar).compile_to_ruby_source_as(:SimpleParser))
4
4
 
5
5
  class TestCompiledParser < Test::Unit::TestCase
6
6
  def test_compiled_parser_generates_parse_tree_for_simple_grammar
7
- parse_tree = SimpleParser.parse(build_tokens(['(','n','-','(','n','-','n',')',')','-','n','#',Dhaka::END_SYMBOL_NAME]))
7
+ parse_tree = SimpleParser.parse(build_tokens(%w| ( n - ( n - n ) ) - n # | +[Dhaka::END_SYMBOL_NAME]))
8
8
  assert_equal \
9
9
  ["literal",
10
10
  "term",
@@ -26,9 +26,14 @@ class TestCompiledParser < Test::Unit::TestCase
26
26
  parse_result = SimpleParser.parse(build_tokens([Dhaka::END_SYMBOL_NAME]))
27
27
  assert parse_result.has_error?
28
28
  end
29
+
30
+ def test_parse_result_is_nil_if_no_end_token
31
+ parse_result = SimpleParser.parse(build_tokens(%w| n - n |))
32
+ assert_nil(parse_result)
33
+ end
29
34
 
30
35
  def test_parser_returns_error_result_with_index_of_bad_token_if_parse_error
31
- parse_result = SimpleParser.parse(build_tokens(['(', '-', ')',Dhaka::END_SYMBOL_NAME]))
36
+ parse_result = SimpleParser.parse(build_tokens(['(', '-', ')', Dhaka::END_SYMBOL_NAME]))
32
37
  assert parse_result.has_error?
33
38
  assert_equal '-', parse_result.unexpected_token.symbol_name
34
39
  end
@@ -18,6 +18,5 @@ class TestEvaluator < Test::Unit::TestCase
18
18
  end
19
19
  end")
20
20
  end
21
-
22
21
  end
23
22
  end
@@ -6,7 +6,7 @@ class SimpleGrammarTest < Test::Unit::TestCase
6
6
  @grammar = SimpleGrammar
7
7
  end
8
8
 
9
- def test_loads_symbol_and_classifies_them
9
+ def test_loads_symbols_and_classifies_them
10
10
  expected_non_terminals = Set.new(['E', 'S', 'T', Dhaka::START_SYMBOL_NAME])
11
11
  expected_terminals = Set.new(['-', 'n', '(', ')', '#', Dhaka::END_SYMBOL_NAME])
12
12
  assert_equal(expected_non_terminals, Set.new(@grammar.non_terminal_symbols.collect {|symbol| symbol.name}))
@@ -62,4 +62,22 @@ class SimpleGrammarTest < Test::Unit::TestCase
62
62
  assert_equal(expected_items, Set.new(closure.values.collect{|item| item.to_s}))
63
63
  assert_equal(expected_channels, Set.new(channels.collect{|item| item.to_s}))
64
64
  end
65
+
66
+ def test_export_grammar_to_bnf
67
+ assert_equal(
68
+ '
69
+ "_Start_" :
70
+ | "S" "#"
71
+
72
+ "S" :
73
+ | "E"
74
+
75
+ "E" :
76
+ | "E" "-" "T"
77
+ | "T"
78
+
79
+ "T" :
80
+ | "n"
81
+ | "(" "E" ")"', @grammar.to_bnf)
82
+ end
65
83
  end
@@ -0,0 +1,215 @@
1
+ require File.dirname(__FILE__) + '/dhaka_test_helper'
2
+
3
+ class TestLexer < Test::Unit::TestCase
4
+ def test_build_AST_from_parse_tree_and_compute_follow_first_and_last
5
+ root = Dhaka::LexerSupport::RegexParser.parse(Dhaka::LexerSupport::RegexTokenizer.tokenize("(a|b)*abb"))
6
+ star_node = root.left.left.left.left
7
+ or_node = star_node.child
8
+ first_a = or_node.children[0]
9
+ first_b = or_node.children[1]
10
+ second_a = root.left.left.left.right
11
+ second_b = root.left.left.right
12
+ last_b = root.left.right
13
+ sentinel = root.right
14
+
15
+ assert(!root.nullable)
16
+ assert(!root.left.nullable)
17
+ assert(!root.left.left.nullable)
18
+ assert(star_node.nullable)
19
+
20
+ assert_equal(Set.new([first_a, first_b, second_a]), root.first)
21
+ assert_equal(Set.new([last_b]), root.left.last)
22
+
23
+ root.calculate_follow_sets
24
+
25
+ assert_equal(Set.new([first_a, first_b, second_a]), first_a.follow_set)
26
+ assert_equal(Set.new([first_a, first_b, second_a]), first_b.follow_set)
27
+ assert_equal(Set.new([second_b]), second_a.follow_set)
28
+ assert_equal(Set.new([last_b]), second_b.follow_set)
29
+ assert_equal(Set.new([sentinel]), last_b.follow_set)
30
+ end
31
+
32
+ def test_DFA_raises_exception_if_empty_regex
33
+ machine = Dhaka::LexerSupport::DFA.new("")
34
+ flunk "Should have thrown an unexpected end of regex exception"
35
+ rescue Dhaka::LexerSupport::InvalidRegexException => e
36
+ assert_equal("Unexpected end of regex.", e.message)
37
+ end
38
+
39
+ def test_DFA_raises_exception_if_error_parsing_regex
40
+ machine = Dhaka::LexerSupport::DFA.new("(a|b)*+abb")
41
+ flunk "Should have thrown an unexpected token exception"
42
+ rescue Dhaka::LexerSupport::InvalidRegexException => e
43
+ assert_equal("Unexpected token +: (a|b)*>>>+abb", e.message)
44
+ end
45
+
46
+ def test_match_a_regex
47
+ machine = Dhaka::LexerSupport::DFA.new("(a|b)*abb")
48
+ assert(machine.matches("abababb"))
49
+ assert(machine.matches("ababaabb"))
50
+ assert(!machine.matches("abababab"))
51
+ assert(!machine.matches("abababbc"))
52
+ assert(!machine.matches("abababbaa"))
53
+ end
54
+
55
+ def test_match_a_regex_with_optional_characters_at_the_end
56
+ machine = Dhaka::LexerSupport::DFA.new("bad(c|d)+(ab)*")
57
+ assert(machine.matches("badccddabab"))
58
+ assert(machine.matches("baddcc"))
59
+ assert(!machine.matches("badab"))
60
+ assert(!machine.matches("bacdab"))
61
+ end
62
+
63
+ def test_match_a_nullable_regex
64
+ machine = Dhaka::LexerSupport::DFA.new("(ab)*")
65
+ assert(machine.matches("abab"))
66
+ assert(machine.matches("ab"))
67
+ assert(machine.matches(""))
68
+ assert(!machine.matches("b"))
69
+ end
70
+
71
+ def test_match_a_regex_with_the_dot_character
72
+ machine = Dhaka::LexerSupport::DFA.new("ab.*cd")
73
+ assert(machine.matches("abacd"))
74
+ assert(machine.matches("abcd"))
75
+ assert(machine.matches("abAcd"))
76
+ assert(!machine.matches("ab999c"))
77
+ end
78
+
79
+ def test_match_a_regex_with_sets
80
+ machine = Dhaka::LexerSupport::DFA.new("ab[j-lu]*cd")
81
+ assert(!machine.matches("abacd"))
82
+ assert(machine.matches("abcd"))
83
+ assert(machine.matches("abjklucd"))
84
+ assert(!machine.matches("abijklucd"))
85
+ assert(!machine.matches("ab999c"))
86
+ end
87
+
88
+ def test_match_a_regex_with_negative_sets
89
+ machine = Dhaka::LexerSupport::DFA.new("ab[^j-lr]*cd")
90
+ assert(machine.matches("abcd"))
91
+ assert(!machine.matches("abjcd"))
92
+ assert(!machine.matches("abrcd"))
93
+ assert(!machine.matches("abijklucd"))
94
+ assert(machine.matches("abyqcd"))
95
+ end
96
+
97
+ def test_match_a_regex_with_sets_containing_escaped_characters
98
+ machine = Dhaka::LexerSupport::DFA.new("ab[\\^\\-.]*cd")
99
+ assert(machine.matches("abcd"))
100
+ assert(!machine.matches("abjcd"))
101
+ assert(machine.matches("ab^-.cd"))
102
+ assert(!machine.matches("abijklucd"))
103
+ assert(!machine.matches("ab\\cd"))
104
+ end
105
+
106
+ def test_match_a_regex_using_unescaped_caret_and_dash_characters
107
+ machine = Dhaka::LexerSupport::DFA.new("(\\^-)+")
108
+ assert(machine.matches("^-"))
109
+ assert(machine.matches("^-^-"))
110
+ assert(!machine.matches("?cd"))
111
+ end
112
+
113
+ def test_match_a_regex_using_escape_characters
114
+ machine = Dhaka::LexerSupport::DFA.new(%q/(-\?\(\)\\\\)*/)
115
+ assert(machine.matches("-?()\\"))
116
+ end
117
+
118
+ def test_match_a_regex_using_lt_and_gt
119
+ machine = Dhaka::LexerSupport::DFA.new('<.+>')
120
+ assert(machine.matches("<ab>"))
121
+ assert(machine.matches("<absdf><sdg><sse>"))
122
+ assert(!machine.matches("ab>"))
123
+ end
124
+
125
+ def test_simulating_curly_brace_quantifiers
126
+ machine = Dhaka::LexerSupport::DFA.new('aaa?a?a?')
127
+ assert(machine.matches("aa"))
128
+ assert(machine.matches("aaa"))
129
+ assert(machine.matches("aaaa"))
130
+ assert(machine.matches("aaaaa"))
131
+ assert(!machine.matches("aaaaaa"))
132
+ assert(!machine.matches("a"))
133
+ end
134
+
135
+ class LexerSpec < Dhaka::LexerSpecification
136
+
137
+ for_pattern 'zz' do
138
+ "recognized two zs"
139
+ end
140
+
141
+ for_pattern '\w(\w|\d)*' do
142
+ "recognized word token #{current_lexeme.value}"
143
+ end
144
+
145
+ for_pattern '(\d)*(\.\d+)?' do
146
+ "recognized number #{current_lexeme.value}"
147
+ end
148
+
149
+ for_pattern '<.*>' do
150
+ "recognized tag #{current_lexeme.value}"
151
+ end
152
+
153
+ for_pattern ' +' do
154
+ #ignores whitespace
155
+ end
156
+
157
+ for_pattern "\n+" do
158
+ "recognized newline"
159
+ end
160
+ end
161
+
162
+ def test_lexer_with_valid_input
163
+ lexer = Dhaka::Lexer.new(LexerSpec)
164
+ eval(lexer.compile_to_ruby_source_as(:SomeLexer))
165
+ input = "these are words a z zz caPITALIZED word
166
+ this is a float 12.00 an integer 134 a float without a leading digit .2335 another word1"
167
+ results = SomeLexer.lex(input).collect
168
+ assert_equal(
169
+ ["recognized word token these",
170
+ "recognized word token are",
171
+ "recognized word token words",
172
+ "recognized word token a",
173
+ "recognized word token z",
174
+ "recognized two zs",
175
+ "recognized word token caPITALIZED",
176
+ "recognized word token word",
177
+ "recognized newline",
178
+ "recognized word token this",
179
+ "recognized word token is",
180
+ "recognized word token a",
181
+ "recognized word token float",
182
+ "recognized number 12.00",
183
+ "recognized word token an",
184
+ "recognized word token integer",
185
+ "recognized number 134",
186
+ "recognized word token a",
187
+ "recognized word token float",
188
+ "recognized word token without",
189
+ "recognized word token a",
190
+ "recognized word token leading",
191
+ "recognized word token digit",
192
+ "recognized number .2335",
193
+ "recognized word token another",
194
+ "recognized word token word1"], results[0..-2])
195
+ end
196
+
197
+ def test_lexer_with_invalid_input
198
+ lexer = Dhaka::Lexer.new(LexerSpec)
199
+ result = lexer.lex("this will cause an error here 123.").each do |result|
200
+ end
201
+ assert(result.has_error?)
202
+ assert_equal(34, result.unexpected_char_index)
203
+ end
204
+
205
+ def test_lexer_with_greedy_character_consumption
206
+ lexer = Dhaka::Lexer.new(LexerSpec)
207
+ results = lexer.lex("<html></html>this is a word").collect
208
+ assert_equal(["recognized tag <html></html>",
209
+ "recognized word token this",
210
+ "recognized word token is",
211
+ "recognized word token a",
212
+ "recognized word token word"], results[0..-2])
213
+ end
214
+
215
+ end
@@ -7,13 +7,13 @@ class TestParseSuccessResult < Test::Unit::TestCase
7
7
  def composite_node(production, child_nodes, dot_name)
8
8
  node = ParseTreeCompositeNode.new(SimpleGrammar.production_named(production))
9
9
  node.child_nodes.concat child_nodes
10
- node.stubs(:dot_name).returns(dot_name)
10
+ node.stubs(:object_id).returns(dot_name)
11
11
  node
12
12
  end
13
13
 
14
14
  def leaf_node(token, value, dot_name)
15
15
  node = ParseTreeLeafNode.new(Token.new(token, value, nil))
16
- node.stubs(:dot_name).returns(dot_name)
16
+ node.stubs(:object_id).returns(dot_name)
17
17
  node
18
18
  end
19
19
 
@@ -25,17 +25,17 @@ class TestParseSuccessResult < Test::Unit::TestCase
25
25
  result = ParseSuccessResult.new(tree)
26
26
  assert_equal(
27
27
  %(digraph x {
28
- node [fontsize="10" shape=box size="5"]
28
+ node [fontsize="10" shape="box" size="5"]
29
29
  expression [label="subtraction E ::= E - T"]
30
- expression -> first_term
30
+ expression -> first_term
31
31
  first_term [label="literal T ::= n"]
32
- first_term -> literal_1
32
+ first_term -> literal_1
33
33
  literal_1 [label="n : 1"]
34
- expression -> subtraction_operator
34
+ expression -> subtraction_operator
35
35
  subtraction_operator [label="-"]
36
- expression -> second_term
36
+ expression -> second_term
37
37
  second_term [label="literal T ::= n"]
38
- second_term -> literal_2
38
+ second_term -> literal_2
39
39
  literal_2 [label="n : 2"]
40
40
  }),
41
41
  result.to_dot)