dhaka 2.1.0 → 2.2.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (39) hide show
  1. data/lib/evaluator/evaluator.rb +18 -17
  2. data/lib/grammar/grammar.rb +4 -5
  3. data/lib/lexer/dfa.rb +63 -13
  4. data/lib/lexer/lexeme.rb +3 -4
  5. data/lib/lexer/lexer.rb +12 -3
  6. data/lib/lexer/lexer_run.rb +22 -10
  7. data/lib/lexer/regex_grammar.rb +88 -14
  8. data/lib/lexer/regex_parser.rb +1523 -1401
  9. data/lib/lexer/specification.rb +29 -3
  10. data/lib/lexer/state.rb +32 -9
  11. data/lib/lexer/state_machine.rb +2 -2
  12. data/lib/parser/channel.rb +4 -4
  13. data/lib/parser/parser.rb +17 -12
  14. data/lib/parser/parser_state.rb +3 -1
  15. data/test/chittagong/chittagong_lexer.rb +63 -63
  16. data/test/chittagong/chittagong_lexer.rb.rej +189 -0
  17. data/test/chittagong/chittagong_lexer_specification.rb +6 -8
  18. data/test/chittagong/chittagong_parser.rb +659 -659
  19. data/test/chittagong/chittagong_parser.rb.rej +1623 -0
  20. data/test/{another_lalr_but_not_slr_grammar.rb → core/another_lalr_but_not_slr_grammar.rb} +1 -1
  21. data/test/{compiled_parser_test.rb → core/compiled_parser_test.rb} +1 -1
  22. data/test/core/dfa_test.rb +170 -0
  23. data/test/{evaluator_test.rb → core/evaluator_test.rb} +3 -3
  24. data/test/{grammar_test.rb → core/grammar_test.rb} +3 -3
  25. data/test/{lalr_but_not_slr_grammar.rb → core/lalr_but_not_slr_grammar.rb} +0 -0
  26. data/test/core/lexer_test.rb +139 -0
  27. data/test/{malformed_grammar.rb → core/malformed_grammar.rb} +0 -0
  28. data/test/{malformed_grammar_test.rb → core/malformed_grammar_test.rb} +1 -1
  29. data/test/{nullable_grammar.rb → core/nullable_grammar.rb} +0 -0
  30. data/test/{parse_result_test.rb → core/parse_result_test.rb} +1 -1
  31. data/test/{parser_state_test.rb → core/parser_state_test.rb} +1 -1
  32. data/test/{parser_test.rb → core/parser_test.rb} +2 -2
  33. data/test/{precedence_grammar.rb → core/precedence_grammar.rb} +0 -0
  34. data/test/{precedence_grammar_test.rb → core/precedence_grammar_test.rb} +1 -1
  35. data/test/{rr_conflict_grammar.rb → core/rr_conflict_grammar.rb} +0 -0
  36. data/test/{simple_grammar.rb → core/simple_grammar.rb} +0 -0
  37. data/test/{sr_conflict_grammar.rb → core/sr_conflict_grammar.rb} +0 -0
  38. metadata +25 -22
  39. data/test/lexer_test.rb +0 -215
data/test/lexer_test.rb DELETED
@@ -1,215 +0,0 @@
1
- require File.dirname(__FILE__) + '/dhaka_test_helper'
2
-
3
- class TestLexer < Test::Unit::TestCase
4
- def test_build_AST_from_parse_tree_and_compute_follow_first_and_last
5
- root = Dhaka::LexerSupport::RegexParser.parse(Dhaka::LexerSupport::RegexTokenizer.tokenize("(a|b)*abb"))
6
- star_node = root.left.left.left.left
7
- or_node = star_node.child
8
- first_a = or_node.children[0]
9
- first_b = or_node.children[1]
10
- second_a = root.left.left.left.right
11
- second_b = root.left.left.right
12
- last_b = root.left.right
13
- sentinel = root.right
14
-
15
- assert(!root.nullable)
16
- assert(!root.left.nullable)
17
- assert(!root.left.left.nullable)
18
- assert(star_node.nullable)
19
-
20
- assert_equal(Set.new([first_a, first_b, second_a]), root.first)
21
- assert_equal(Set.new([last_b]), root.left.last)
22
-
23
- root.calculate_follow_sets
24
-
25
- assert_equal(Set.new([first_a, first_b, second_a]), first_a.follow_set)
26
- assert_equal(Set.new([first_a, first_b, second_a]), first_b.follow_set)
27
- assert_equal(Set.new([second_b]), second_a.follow_set)
28
- assert_equal(Set.new([last_b]), second_b.follow_set)
29
- assert_equal(Set.new([sentinel]), last_b.follow_set)
30
- end
31
-
32
- def test_DFA_raises_exception_if_empty_regex
33
- machine = Dhaka::LexerSupport::DFA.new("")
34
- flunk "Should have thrown an unexpected end of regex exception"
35
- rescue Dhaka::LexerSupport::InvalidRegexException => e
36
- assert_equal("Unexpected end of regex.", e.message)
37
- end
38
-
39
- def test_DFA_raises_exception_if_error_parsing_regex
40
- machine = Dhaka::LexerSupport::DFA.new("(a|b)*+abb")
41
- flunk "Should have thrown an unexpected token exception"
42
- rescue Dhaka::LexerSupport::InvalidRegexException => e
43
- assert_equal("Unexpected token +: (a|b)*>>>+abb", e.message)
44
- end
45
-
46
- def test_match_a_regex
47
- machine = Dhaka::LexerSupport::DFA.new("(a|b)*abb")
48
- assert(machine.matches("abababb"))
49
- assert(machine.matches("ababaabb"))
50
- assert(!machine.matches("abababab"))
51
- assert(!machine.matches("abababbc"))
52
- assert(!machine.matches("abababbaa"))
53
- end
54
-
55
- def test_match_a_regex_with_optional_characters_at_the_end
56
- machine = Dhaka::LexerSupport::DFA.new("bad(c|d)+(ab)*")
57
- assert(machine.matches("badccddabab"))
58
- assert(machine.matches("baddcc"))
59
- assert(!machine.matches("badab"))
60
- assert(!machine.matches("bacdab"))
61
- end
62
-
63
- def test_match_a_nullable_regex
64
- machine = Dhaka::LexerSupport::DFA.new("(ab)*")
65
- assert(machine.matches("abab"))
66
- assert(machine.matches("ab"))
67
- assert(machine.matches(""))
68
- assert(!machine.matches("b"))
69
- end
70
-
71
- def test_match_a_regex_with_the_dot_character
72
- machine = Dhaka::LexerSupport::DFA.new("ab.*cd")
73
- assert(machine.matches("abacd"))
74
- assert(machine.matches("abcd"))
75
- assert(machine.matches("abAcd"))
76
- assert(!machine.matches("ab999c"))
77
- end
78
-
79
- def test_match_a_regex_with_sets
80
- machine = Dhaka::LexerSupport::DFA.new("ab[j-lu]*cd")
81
- assert(!machine.matches("abacd"))
82
- assert(machine.matches("abcd"))
83
- assert(machine.matches("abjklucd"))
84
- assert(!machine.matches("abijklucd"))
85
- assert(!machine.matches("ab999c"))
86
- end
87
-
88
- def test_match_a_regex_with_negative_sets
89
- machine = Dhaka::LexerSupport::DFA.new("ab[^j-lr]*cd")
90
- assert(machine.matches("abcd"))
91
- assert(!machine.matches("abjcd"))
92
- assert(!machine.matches("abrcd"))
93
- assert(!machine.matches("abijklucd"))
94
- assert(machine.matches("abyqcd"))
95
- end
96
-
97
- def test_match_a_regex_with_sets_containing_escaped_characters
98
- machine = Dhaka::LexerSupport::DFA.new("ab[\\^\\-.]*cd")
99
- assert(machine.matches("abcd"))
100
- assert(!machine.matches("abjcd"))
101
- assert(machine.matches("ab^-.cd"))
102
- assert(!machine.matches("abijklucd"))
103
- assert(!machine.matches("ab\\cd"))
104
- end
105
-
106
- def test_match_a_regex_using_unescaped_caret_and_dash_characters
107
- machine = Dhaka::LexerSupport::DFA.new("(\\^-)+")
108
- assert(machine.matches("^-"))
109
- assert(machine.matches("^-^-"))
110
- assert(!machine.matches("?cd"))
111
- end
112
-
113
- def test_match_a_regex_using_escape_characters
114
- machine = Dhaka::LexerSupport::DFA.new(%q/(-\?\(\)\\\\)*/)
115
- assert(machine.matches("-?()\\"))
116
- end
117
-
118
- def test_match_a_regex_using_lt_and_gt
119
- machine = Dhaka::LexerSupport::DFA.new('<.+>')
120
- assert(machine.matches("<ab>"))
121
- assert(machine.matches("<absdf><sdg><sse>"))
122
- assert(!machine.matches("ab>"))
123
- end
124
-
125
- def test_simulating_curly_brace_quantifiers
126
- machine = Dhaka::LexerSupport::DFA.new('aaa?a?a?')
127
- assert(machine.matches("aa"))
128
- assert(machine.matches("aaa"))
129
- assert(machine.matches("aaaa"))
130
- assert(machine.matches("aaaaa"))
131
- assert(!machine.matches("aaaaaa"))
132
- assert(!machine.matches("a"))
133
- end
134
-
135
- class LexerSpec < Dhaka::LexerSpecification
136
-
137
- for_pattern 'zz' do
138
- "recognized two zs"
139
- end
140
-
141
- for_pattern '\w(\w|\d)*' do
142
- "recognized word token #{current_lexeme.value}"
143
- end
144
-
145
- for_pattern '(\d)*(\.\d+)?' do
146
- "recognized number #{current_lexeme.value}"
147
- end
148
-
149
- for_pattern '<.*>' do
150
- "recognized tag #{current_lexeme.value}"
151
- end
152
-
153
- for_pattern ' +' do
154
- #ignores whitespace
155
- end
156
-
157
- for_pattern "\n+" do
158
- "recognized newline"
159
- end
160
- end
161
-
162
- def test_lexer_with_valid_input
163
- lexer = Dhaka::Lexer.new(LexerSpec)
164
- eval(lexer.compile_to_ruby_source_as(:SomeLexer))
165
- input = "these are words a z zz caPITALIZED word
166
- this is a float 12.00 an integer 134 a float without a leading digit .2335 another word1"
167
- results = SomeLexer.lex(input).collect
168
- assert_equal(
169
- ["recognized word token these",
170
- "recognized word token are",
171
- "recognized word token words",
172
- "recognized word token a",
173
- "recognized word token z",
174
- "recognized two zs",
175
- "recognized word token caPITALIZED",
176
- "recognized word token word",
177
- "recognized newline",
178
- "recognized word token this",
179
- "recognized word token is",
180
- "recognized word token a",
181
- "recognized word token float",
182
- "recognized number 12.00",
183
- "recognized word token an",
184
- "recognized word token integer",
185
- "recognized number 134",
186
- "recognized word token a",
187
- "recognized word token float",
188
- "recognized word token without",
189
- "recognized word token a",
190
- "recognized word token leading",
191
- "recognized word token digit",
192
- "recognized number .2335",
193
- "recognized word token another",
194
- "recognized word token word1"], results[0..-2])
195
- end
196
-
197
- def test_lexer_with_invalid_input
198
- lexer = Dhaka::Lexer.new(LexerSpec)
199
- result = lexer.lex("this will cause an error here 123.").each do |result|
200
- end
201
- assert(result.has_error?)
202
- assert_equal(34, result.unexpected_char_index)
203
- end
204
-
205
- def test_lexer_with_greedy_character_consumption
206
- lexer = Dhaka::Lexer.new(LexerSpec)
207
- results = lexer.lex("<html></html>this is a word").collect
208
- assert_equal(["recognized tag <html></html>",
209
- "recognized word token this",
210
- "recognized word token is",
211
- "recognized word token a",
212
- "recognized word token word"], results[0..-2])
213
- end
214
-
215
- end