dhaka 2.1.0 → 2.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/lib/evaluator/evaluator.rb +18 -17
- data/lib/grammar/grammar.rb +4 -5
- data/lib/lexer/dfa.rb +63 -13
- data/lib/lexer/lexeme.rb +3 -4
- data/lib/lexer/lexer.rb +12 -3
- data/lib/lexer/lexer_run.rb +22 -10
- data/lib/lexer/regex_grammar.rb +88 -14
- data/lib/lexer/regex_parser.rb +1523 -1401
- data/lib/lexer/specification.rb +29 -3
- data/lib/lexer/state.rb +32 -9
- data/lib/lexer/state_machine.rb +2 -2
- data/lib/parser/channel.rb +4 -4
- data/lib/parser/parser.rb +17 -12
- data/lib/parser/parser_state.rb +3 -1
- data/test/chittagong/chittagong_lexer.rb +63 -63
- data/test/chittagong/chittagong_lexer.rb.rej +189 -0
- data/test/chittagong/chittagong_lexer_specification.rb +6 -8
- data/test/chittagong/chittagong_parser.rb +659 -659
- data/test/chittagong/chittagong_parser.rb.rej +1623 -0
- data/test/{another_lalr_but_not_slr_grammar.rb → core/another_lalr_but_not_slr_grammar.rb} +1 -1
- data/test/{compiled_parser_test.rb → core/compiled_parser_test.rb} +1 -1
- data/test/core/dfa_test.rb +170 -0
- data/test/{evaluator_test.rb → core/evaluator_test.rb} +3 -3
- data/test/{grammar_test.rb → core/grammar_test.rb} +3 -3
- data/test/{lalr_but_not_slr_grammar.rb → core/lalr_but_not_slr_grammar.rb} +0 -0
- data/test/core/lexer_test.rb +139 -0
- data/test/{malformed_grammar.rb → core/malformed_grammar.rb} +0 -0
- data/test/{malformed_grammar_test.rb → core/malformed_grammar_test.rb} +1 -1
- data/test/{nullable_grammar.rb → core/nullable_grammar.rb} +0 -0
- data/test/{parse_result_test.rb → core/parse_result_test.rb} +1 -1
- data/test/{parser_state_test.rb → core/parser_state_test.rb} +1 -1
- data/test/{parser_test.rb → core/parser_test.rb} +2 -2
- data/test/{precedence_grammar.rb → core/precedence_grammar.rb} +0 -0
- data/test/{precedence_grammar_test.rb → core/precedence_grammar_test.rb} +1 -1
- data/test/{rr_conflict_grammar.rb → core/rr_conflict_grammar.rb} +0 -0
- data/test/{simple_grammar.rb → core/simple_grammar.rb} +0 -0
- data/test/{sr_conflict_grammar.rb → core/sr_conflict_grammar.rb} +0 -0
- metadata +25 -22
- data/test/lexer_test.rb +0 -215
data/test/lexer_test.rb
DELETED
@@ -1,215 +0,0 @@
|
|
1
|
-
require File.dirname(__FILE__) + '/dhaka_test_helper'
|
2
|
-
|
3
|
-
class TestLexer < Test::Unit::TestCase
|
4
|
-
def test_build_AST_from_parse_tree_and_compute_follow_first_and_last
|
5
|
-
root = Dhaka::LexerSupport::RegexParser.parse(Dhaka::LexerSupport::RegexTokenizer.tokenize("(a|b)*abb"))
|
6
|
-
star_node = root.left.left.left.left
|
7
|
-
or_node = star_node.child
|
8
|
-
first_a = or_node.children[0]
|
9
|
-
first_b = or_node.children[1]
|
10
|
-
second_a = root.left.left.left.right
|
11
|
-
second_b = root.left.left.right
|
12
|
-
last_b = root.left.right
|
13
|
-
sentinel = root.right
|
14
|
-
|
15
|
-
assert(!root.nullable)
|
16
|
-
assert(!root.left.nullable)
|
17
|
-
assert(!root.left.left.nullable)
|
18
|
-
assert(star_node.nullable)
|
19
|
-
|
20
|
-
assert_equal(Set.new([first_a, first_b, second_a]), root.first)
|
21
|
-
assert_equal(Set.new([last_b]), root.left.last)
|
22
|
-
|
23
|
-
root.calculate_follow_sets
|
24
|
-
|
25
|
-
assert_equal(Set.new([first_a, first_b, second_a]), first_a.follow_set)
|
26
|
-
assert_equal(Set.new([first_a, first_b, second_a]), first_b.follow_set)
|
27
|
-
assert_equal(Set.new([second_b]), second_a.follow_set)
|
28
|
-
assert_equal(Set.new([last_b]), second_b.follow_set)
|
29
|
-
assert_equal(Set.new([sentinel]), last_b.follow_set)
|
30
|
-
end
|
31
|
-
|
32
|
-
def test_DFA_raises_exception_if_empty_regex
|
33
|
-
machine = Dhaka::LexerSupport::DFA.new("")
|
34
|
-
flunk "Should have thrown an unexpected end of regex exception"
|
35
|
-
rescue Dhaka::LexerSupport::InvalidRegexException => e
|
36
|
-
assert_equal("Unexpected end of regex.", e.message)
|
37
|
-
end
|
38
|
-
|
39
|
-
def test_DFA_raises_exception_if_error_parsing_regex
|
40
|
-
machine = Dhaka::LexerSupport::DFA.new("(a|b)*+abb")
|
41
|
-
flunk "Should have thrown an unexpected token exception"
|
42
|
-
rescue Dhaka::LexerSupport::InvalidRegexException => e
|
43
|
-
assert_equal("Unexpected token +: (a|b)*>>>+abb", e.message)
|
44
|
-
end
|
45
|
-
|
46
|
-
def test_match_a_regex
|
47
|
-
machine = Dhaka::LexerSupport::DFA.new("(a|b)*abb")
|
48
|
-
assert(machine.matches("abababb"))
|
49
|
-
assert(machine.matches("ababaabb"))
|
50
|
-
assert(!machine.matches("abababab"))
|
51
|
-
assert(!machine.matches("abababbc"))
|
52
|
-
assert(!machine.matches("abababbaa"))
|
53
|
-
end
|
54
|
-
|
55
|
-
def test_match_a_regex_with_optional_characters_at_the_end
|
56
|
-
machine = Dhaka::LexerSupport::DFA.new("bad(c|d)+(ab)*")
|
57
|
-
assert(machine.matches("badccddabab"))
|
58
|
-
assert(machine.matches("baddcc"))
|
59
|
-
assert(!machine.matches("badab"))
|
60
|
-
assert(!machine.matches("bacdab"))
|
61
|
-
end
|
62
|
-
|
63
|
-
def test_match_a_nullable_regex
|
64
|
-
machine = Dhaka::LexerSupport::DFA.new("(ab)*")
|
65
|
-
assert(machine.matches("abab"))
|
66
|
-
assert(machine.matches("ab"))
|
67
|
-
assert(machine.matches(""))
|
68
|
-
assert(!machine.matches("b"))
|
69
|
-
end
|
70
|
-
|
71
|
-
def test_match_a_regex_with_the_dot_character
|
72
|
-
machine = Dhaka::LexerSupport::DFA.new("ab.*cd")
|
73
|
-
assert(machine.matches("abacd"))
|
74
|
-
assert(machine.matches("abcd"))
|
75
|
-
assert(machine.matches("abAcd"))
|
76
|
-
assert(!machine.matches("ab999c"))
|
77
|
-
end
|
78
|
-
|
79
|
-
def test_match_a_regex_with_sets
|
80
|
-
machine = Dhaka::LexerSupport::DFA.new("ab[j-lu]*cd")
|
81
|
-
assert(!machine.matches("abacd"))
|
82
|
-
assert(machine.matches("abcd"))
|
83
|
-
assert(machine.matches("abjklucd"))
|
84
|
-
assert(!machine.matches("abijklucd"))
|
85
|
-
assert(!machine.matches("ab999c"))
|
86
|
-
end
|
87
|
-
|
88
|
-
def test_match_a_regex_with_negative_sets
|
89
|
-
machine = Dhaka::LexerSupport::DFA.new("ab[^j-lr]*cd")
|
90
|
-
assert(machine.matches("abcd"))
|
91
|
-
assert(!machine.matches("abjcd"))
|
92
|
-
assert(!machine.matches("abrcd"))
|
93
|
-
assert(!machine.matches("abijklucd"))
|
94
|
-
assert(machine.matches("abyqcd"))
|
95
|
-
end
|
96
|
-
|
97
|
-
def test_match_a_regex_with_sets_containing_escaped_characters
|
98
|
-
machine = Dhaka::LexerSupport::DFA.new("ab[\\^\\-.]*cd")
|
99
|
-
assert(machine.matches("abcd"))
|
100
|
-
assert(!machine.matches("abjcd"))
|
101
|
-
assert(machine.matches("ab^-.cd"))
|
102
|
-
assert(!machine.matches("abijklucd"))
|
103
|
-
assert(!machine.matches("ab\\cd"))
|
104
|
-
end
|
105
|
-
|
106
|
-
def test_match_a_regex_using_unescaped_caret_and_dash_characters
|
107
|
-
machine = Dhaka::LexerSupport::DFA.new("(\\^-)+")
|
108
|
-
assert(machine.matches("^-"))
|
109
|
-
assert(machine.matches("^-^-"))
|
110
|
-
assert(!machine.matches("?cd"))
|
111
|
-
end
|
112
|
-
|
113
|
-
def test_match_a_regex_using_escape_characters
|
114
|
-
machine = Dhaka::LexerSupport::DFA.new(%q/(-\?\(\)\\\\)*/)
|
115
|
-
assert(machine.matches("-?()\\"))
|
116
|
-
end
|
117
|
-
|
118
|
-
def test_match_a_regex_using_lt_and_gt
|
119
|
-
machine = Dhaka::LexerSupport::DFA.new('<.+>')
|
120
|
-
assert(machine.matches("<ab>"))
|
121
|
-
assert(machine.matches("<absdf><sdg><sse>"))
|
122
|
-
assert(!machine.matches("ab>"))
|
123
|
-
end
|
124
|
-
|
125
|
-
def test_simulating_curly_brace_quantifiers
|
126
|
-
machine = Dhaka::LexerSupport::DFA.new('aaa?a?a?')
|
127
|
-
assert(machine.matches("aa"))
|
128
|
-
assert(machine.matches("aaa"))
|
129
|
-
assert(machine.matches("aaaa"))
|
130
|
-
assert(machine.matches("aaaaa"))
|
131
|
-
assert(!machine.matches("aaaaaa"))
|
132
|
-
assert(!machine.matches("a"))
|
133
|
-
end
|
134
|
-
|
135
|
-
class LexerSpec < Dhaka::LexerSpecification
|
136
|
-
|
137
|
-
for_pattern 'zz' do
|
138
|
-
"recognized two zs"
|
139
|
-
end
|
140
|
-
|
141
|
-
for_pattern '\w(\w|\d)*' do
|
142
|
-
"recognized word token #{current_lexeme.value}"
|
143
|
-
end
|
144
|
-
|
145
|
-
for_pattern '(\d)*(\.\d+)?' do
|
146
|
-
"recognized number #{current_lexeme.value}"
|
147
|
-
end
|
148
|
-
|
149
|
-
for_pattern '<.*>' do
|
150
|
-
"recognized tag #{current_lexeme.value}"
|
151
|
-
end
|
152
|
-
|
153
|
-
for_pattern ' +' do
|
154
|
-
#ignores whitespace
|
155
|
-
end
|
156
|
-
|
157
|
-
for_pattern "\n+" do
|
158
|
-
"recognized newline"
|
159
|
-
end
|
160
|
-
end
|
161
|
-
|
162
|
-
def test_lexer_with_valid_input
|
163
|
-
lexer = Dhaka::Lexer.new(LexerSpec)
|
164
|
-
eval(lexer.compile_to_ruby_source_as(:SomeLexer))
|
165
|
-
input = "these are words a z zz caPITALIZED word
|
166
|
-
this is a float 12.00 an integer 134 a float without a leading digit .2335 another word1"
|
167
|
-
results = SomeLexer.lex(input).collect
|
168
|
-
assert_equal(
|
169
|
-
["recognized word token these",
|
170
|
-
"recognized word token are",
|
171
|
-
"recognized word token words",
|
172
|
-
"recognized word token a",
|
173
|
-
"recognized word token z",
|
174
|
-
"recognized two zs",
|
175
|
-
"recognized word token caPITALIZED",
|
176
|
-
"recognized word token word",
|
177
|
-
"recognized newline",
|
178
|
-
"recognized word token this",
|
179
|
-
"recognized word token is",
|
180
|
-
"recognized word token a",
|
181
|
-
"recognized word token float",
|
182
|
-
"recognized number 12.00",
|
183
|
-
"recognized word token an",
|
184
|
-
"recognized word token integer",
|
185
|
-
"recognized number 134",
|
186
|
-
"recognized word token a",
|
187
|
-
"recognized word token float",
|
188
|
-
"recognized word token without",
|
189
|
-
"recognized word token a",
|
190
|
-
"recognized word token leading",
|
191
|
-
"recognized word token digit",
|
192
|
-
"recognized number .2335",
|
193
|
-
"recognized word token another",
|
194
|
-
"recognized word token word1"], results[0..-2])
|
195
|
-
end
|
196
|
-
|
197
|
-
def test_lexer_with_invalid_input
|
198
|
-
lexer = Dhaka::Lexer.new(LexerSpec)
|
199
|
-
result = lexer.lex("this will cause an error here 123.").each do |result|
|
200
|
-
end
|
201
|
-
assert(result.has_error?)
|
202
|
-
assert_equal(34, result.unexpected_char_index)
|
203
|
-
end
|
204
|
-
|
205
|
-
def test_lexer_with_greedy_character_consumption
|
206
|
-
lexer = Dhaka::Lexer.new(LexerSpec)
|
207
|
-
results = lexer.lex("<html></html>this is a word").collect
|
208
|
-
assert_equal(["recognized tag <html></html>",
|
209
|
-
"recognized word token this",
|
210
|
-
"recognized word token is",
|
211
|
-
"recognized word token a",
|
212
|
-
"recognized word token word"], results[0..-2])
|
213
|
-
end
|
214
|
-
|
215
|
-
end
|