dhaka 2.1.0 → 2.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (39) hide show
  1. data/lib/evaluator/evaluator.rb +18 -17
  2. data/lib/grammar/grammar.rb +4 -5
  3. data/lib/lexer/dfa.rb +63 -13
  4. data/lib/lexer/lexeme.rb +3 -4
  5. data/lib/lexer/lexer.rb +12 -3
  6. data/lib/lexer/lexer_run.rb +22 -10
  7. data/lib/lexer/regex_grammar.rb +88 -14
  8. data/lib/lexer/regex_parser.rb +1523 -1401
  9. data/lib/lexer/specification.rb +29 -3
  10. data/lib/lexer/state.rb +32 -9
  11. data/lib/lexer/state_machine.rb +2 -2
  12. data/lib/parser/channel.rb +4 -4
  13. data/lib/parser/parser.rb +17 -12
  14. data/lib/parser/parser_state.rb +3 -1
  15. data/test/chittagong/chittagong_lexer.rb +63 -63
  16. data/test/chittagong/chittagong_lexer.rb.rej +189 -0
  17. data/test/chittagong/chittagong_lexer_specification.rb +6 -8
  18. data/test/chittagong/chittagong_parser.rb +659 -659
  19. data/test/chittagong/chittagong_parser.rb.rej +1623 -0
  20. data/test/{another_lalr_but_not_slr_grammar.rb → core/another_lalr_but_not_slr_grammar.rb} +1 -1
  21. data/test/{compiled_parser_test.rb → core/compiled_parser_test.rb} +1 -1
  22. data/test/core/dfa_test.rb +170 -0
  23. data/test/{evaluator_test.rb → core/evaluator_test.rb} +3 -3
  24. data/test/{grammar_test.rb → core/grammar_test.rb} +3 -3
  25. data/test/{lalr_but_not_slr_grammar.rb → core/lalr_but_not_slr_grammar.rb} +0 -0
  26. data/test/core/lexer_test.rb +139 -0
  27. data/test/{malformed_grammar.rb → core/malformed_grammar.rb} +0 -0
  28. data/test/{malformed_grammar_test.rb → core/malformed_grammar_test.rb} +1 -1
  29. data/test/{nullable_grammar.rb → core/nullable_grammar.rb} +0 -0
  30. data/test/{parse_result_test.rb → core/parse_result_test.rb} +1 -1
  31. data/test/{parser_state_test.rb → core/parser_state_test.rb} +1 -1
  32. data/test/{parser_test.rb → core/parser_test.rb} +2 -2
  33. data/test/{precedence_grammar.rb → core/precedence_grammar.rb} +0 -0
  34. data/test/{precedence_grammar_test.rb → core/precedence_grammar_test.rb} +1 -1
  35. data/test/{rr_conflict_grammar.rb → core/rr_conflict_grammar.rb} +0 -0
  36. data/test/{simple_grammar.rb → core/simple_grammar.rb} +0 -0
  37. data/test/{sr_conflict_grammar.rb → core/sr_conflict_grammar.rb} +0 -0
  38. metadata +25 -22
  39. data/test/lexer_test.rb +0 -215
data/test/lexer_test.rb DELETED
@@ -1,215 +0,0 @@
1
- require File.dirname(__FILE__) + '/dhaka_test_helper'
2
-
3
- class TestLexer < Test::Unit::TestCase
4
- def test_build_AST_from_parse_tree_and_compute_follow_first_and_last
5
- root = Dhaka::LexerSupport::RegexParser.parse(Dhaka::LexerSupport::RegexTokenizer.tokenize("(a|b)*abb"))
6
- star_node = root.left.left.left.left
7
- or_node = star_node.child
8
- first_a = or_node.children[0]
9
- first_b = or_node.children[1]
10
- second_a = root.left.left.left.right
11
- second_b = root.left.left.right
12
- last_b = root.left.right
13
- sentinel = root.right
14
-
15
- assert(!root.nullable)
16
- assert(!root.left.nullable)
17
- assert(!root.left.left.nullable)
18
- assert(star_node.nullable)
19
-
20
- assert_equal(Set.new([first_a, first_b, second_a]), root.first)
21
- assert_equal(Set.new([last_b]), root.left.last)
22
-
23
- root.calculate_follow_sets
24
-
25
- assert_equal(Set.new([first_a, first_b, second_a]), first_a.follow_set)
26
- assert_equal(Set.new([first_a, first_b, second_a]), first_b.follow_set)
27
- assert_equal(Set.new([second_b]), second_a.follow_set)
28
- assert_equal(Set.new([last_b]), second_b.follow_set)
29
- assert_equal(Set.new([sentinel]), last_b.follow_set)
30
- end
31
-
32
- def test_DFA_raises_exception_if_empty_regex
33
- machine = Dhaka::LexerSupport::DFA.new("")
34
- flunk "Should have thrown an unexpected end of regex exception"
35
- rescue Dhaka::LexerSupport::InvalidRegexException => e
36
- assert_equal("Unexpected end of regex.", e.message)
37
- end
38
-
39
- def test_DFA_raises_exception_if_error_parsing_regex
40
- machine = Dhaka::LexerSupport::DFA.new("(a|b)*+abb")
41
- flunk "Should have thrown an unexpected token exception"
42
- rescue Dhaka::LexerSupport::InvalidRegexException => e
43
- assert_equal("Unexpected token +: (a|b)*>>>+abb", e.message)
44
- end
45
-
46
- def test_match_a_regex
47
- machine = Dhaka::LexerSupport::DFA.new("(a|b)*abb")
48
- assert(machine.matches("abababb"))
49
- assert(machine.matches("ababaabb"))
50
- assert(!machine.matches("abababab"))
51
- assert(!machine.matches("abababbc"))
52
- assert(!machine.matches("abababbaa"))
53
- end
54
-
55
- def test_match_a_regex_with_optional_characters_at_the_end
56
- machine = Dhaka::LexerSupport::DFA.new("bad(c|d)+(ab)*")
57
- assert(machine.matches("badccddabab"))
58
- assert(machine.matches("baddcc"))
59
- assert(!machine.matches("badab"))
60
- assert(!machine.matches("bacdab"))
61
- end
62
-
63
- def test_match_a_nullable_regex
64
- machine = Dhaka::LexerSupport::DFA.new("(ab)*")
65
- assert(machine.matches("abab"))
66
- assert(machine.matches("ab"))
67
- assert(machine.matches(""))
68
- assert(!machine.matches("b"))
69
- end
70
-
71
- def test_match_a_regex_with_the_dot_character
72
- machine = Dhaka::LexerSupport::DFA.new("ab.*cd")
73
- assert(machine.matches("abacd"))
74
- assert(machine.matches("abcd"))
75
- assert(machine.matches("abAcd"))
76
- assert(!machine.matches("ab999c"))
77
- end
78
-
79
- def test_match_a_regex_with_sets
80
- machine = Dhaka::LexerSupport::DFA.new("ab[j-lu]*cd")
81
- assert(!machine.matches("abacd"))
82
- assert(machine.matches("abcd"))
83
- assert(machine.matches("abjklucd"))
84
- assert(!machine.matches("abijklucd"))
85
- assert(!machine.matches("ab999c"))
86
- end
87
-
88
- def test_match_a_regex_with_negative_sets
89
- machine = Dhaka::LexerSupport::DFA.new("ab[^j-lr]*cd")
90
- assert(machine.matches("abcd"))
91
- assert(!machine.matches("abjcd"))
92
- assert(!machine.matches("abrcd"))
93
- assert(!machine.matches("abijklucd"))
94
- assert(machine.matches("abyqcd"))
95
- end
96
-
97
- def test_match_a_regex_with_sets_containing_escaped_characters
98
- machine = Dhaka::LexerSupport::DFA.new("ab[\\^\\-.]*cd")
99
- assert(machine.matches("abcd"))
100
- assert(!machine.matches("abjcd"))
101
- assert(machine.matches("ab^-.cd"))
102
- assert(!machine.matches("abijklucd"))
103
- assert(!machine.matches("ab\\cd"))
104
- end
105
-
106
- def test_match_a_regex_using_unescaped_caret_and_dash_characters
107
- machine = Dhaka::LexerSupport::DFA.new("(\\^-)+")
108
- assert(machine.matches("^-"))
109
- assert(machine.matches("^-^-"))
110
- assert(!machine.matches("?cd"))
111
- end
112
-
113
- def test_match_a_regex_using_escape_characters
114
- machine = Dhaka::LexerSupport::DFA.new(%q/(-\?\(\)\\\\)*/)
115
- assert(machine.matches("-?()\\"))
116
- end
117
-
118
- def test_match_a_regex_using_lt_and_gt
119
- machine = Dhaka::LexerSupport::DFA.new('<.+>')
120
- assert(machine.matches("<ab>"))
121
- assert(machine.matches("<absdf><sdg><sse>"))
122
- assert(!machine.matches("ab>"))
123
- end
124
-
125
- def test_simulating_curly_brace_quantifiers
126
- machine = Dhaka::LexerSupport::DFA.new('aaa?a?a?')
127
- assert(machine.matches("aa"))
128
- assert(machine.matches("aaa"))
129
- assert(machine.matches("aaaa"))
130
- assert(machine.matches("aaaaa"))
131
- assert(!machine.matches("aaaaaa"))
132
- assert(!machine.matches("a"))
133
- end
134
-
135
- class LexerSpec < Dhaka::LexerSpecification
136
-
137
- for_pattern 'zz' do
138
- "recognized two zs"
139
- end
140
-
141
- for_pattern '\w(\w|\d)*' do
142
- "recognized word token #{current_lexeme.value}"
143
- end
144
-
145
- for_pattern '(\d)*(\.\d+)?' do
146
- "recognized number #{current_lexeme.value}"
147
- end
148
-
149
- for_pattern '<.*>' do
150
- "recognized tag #{current_lexeme.value}"
151
- end
152
-
153
- for_pattern ' +' do
154
- #ignores whitespace
155
- end
156
-
157
- for_pattern "\n+" do
158
- "recognized newline"
159
- end
160
- end
161
-
162
- def test_lexer_with_valid_input
163
- lexer = Dhaka::Lexer.new(LexerSpec)
164
- eval(lexer.compile_to_ruby_source_as(:SomeLexer))
165
- input = "these are words a z zz caPITALIZED word
166
- this is a float 12.00 an integer 134 a float without a leading digit .2335 another word1"
167
- results = SomeLexer.lex(input).collect
168
- assert_equal(
169
- ["recognized word token these",
170
- "recognized word token are",
171
- "recognized word token words",
172
- "recognized word token a",
173
- "recognized word token z",
174
- "recognized two zs",
175
- "recognized word token caPITALIZED",
176
- "recognized word token word",
177
- "recognized newline",
178
- "recognized word token this",
179
- "recognized word token is",
180
- "recognized word token a",
181
- "recognized word token float",
182
- "recognized number 12.00",
183
- "recognized word token an",
184
- "recognized word token integer",
185
- "recognized number 134",
186
- "recognized word token a",
187
- "recognized word token float",
188
- "recognized word token without",
189
- "recognized word token a",
190
- "recognized word token leading",
191
- "recognized word token digit",
192
- "recognized number .2335",
193
- "recognized word token another",
194
- "recognized word token word1"], results[0..-2])
195
- end
196
-
197
- def test_lexer_with_invalid_input
198
- lexer = Dhaka::Lexer.new(LexerSpec)
199
- result = lexer.lex("this will cause an error here 123.").each do |result|
200
- end
201
- assert(result.has_error?)
202
- assert_equal(34, result.unexpected_char_index)
203
- end
204
-
205
- def test_lexer_with_greedy_character_consumption
206
- lexer = Dhaka::Lexer.new(LexerSpec)
207
- results = lexer.lex("<html></html>this is a word").collect
208
- assert_equal(["recognized tag <html></html>",
209
- "recognized word token this",
210
- "recognized word token is",
211
- "recognized word token a",
212
- "recognized word token word"], results[0..-2])
213
- end
214
-
215
- end