natalie_parser 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/CHANGELOG.md +22 -0
- data/Dockerfile +26 -0
- data/Gemfile +10 -0
- data/LICENSE +21 -0
- data/README.md +55 -0
- data/Rakefile +242 -0
- data/ext/natalie_parser/extconf.rb +9 -0
- data/ext/natalie_parser/mri_creator.hpp +139 -0
- data/ext/natalie_parser/natalie_parser.cpp +144 -0
- data/include/natalie_parser/creator/debug_creator.hpp +113 -0
- data/include/natalie_parser/creator.hpp +108 -0
- data/include/natalie_parser/lexer/interpolated_string_lexer.hpp +64 -0
- data/include/natalie_parser/lexer/regexp_lexer.hpp +37 -0
- data/include/natalie_parser/lexer/word_array_lexer.hpp +57 -0
- data/include/natalie_parser/lexer.hpp +135 -0
- data/include/natalie_parser/node/alias_node.hpp +35 -0
- data/include/natalie_parser/node/arg_node.hpp +74 -0
- data/include/natalie_parser/node/array_node.hpp +34 -0
- data/include/natalie_parser/node/array_pattern_node.hpp +28 -0
- data/include/natalie_parser/node/assignment_node.hpp +34 -0
- data/include/natalie_parser/node/back_ref_node.hpp +28 -0
- data/include/natalie_parser/node/begin_block_node.hpp +25 -0
- data/include/natalie_parser/node/begin_node.hpp +52 -0
- data/include/natalie_parser/node/begin_rescue_node.hpp +47 -0
- data/include/natalie_parser/node/bignum_node.hpp +37 -0
- data/include/natalie_parser/node/block_node.hpp +55 -0
- data/include/natalie_parser/node/block_pass_node.hpp +33 -0
- data/include/natalie_parser/node/break_node.hpp +32 -0
- data/include/natalie_parser/node/call_node.hpp +85 -0
- data/include/natalie_parser/node/case_in_node.hpp +40 -0
- data/include/natalie_parser/node/case_node.hpp +52 -0
- data/include/natalie_parser/node/case_when_node.hpp +43 -0
- data/include/natalie_parser/node/class_node.hpp +39 -0
- data/include/natalie_parser/node/colon2_node.hpp +44 -0
- data/include/natalie_parser/node/colon3_node.hpp +34 -0
- data/include/natalie_parser/node/constant_node.hpp +26 -0
- data/include/natalie_parser/node/def_node.hpp +55 -0
- data/include/natalie_parser/node/defined_node.hpp +33 -0
- data/include/natalie_parser/node/encoding_node.hpp +26 -0
- data/include/natalie_parser/node/end_block_node.hpp +25 -0
- data/include/natalie_parser/node/evaluate_to_string_node.hpp +37 -0
- data/include/natalie_parser/node/false_node.hpp +23 -0
- data/include/natalie_parser/node/fixnum_node.hpp +36 -0
- data/include/natalie_parser/node/float_node.hpp +36 -0
- data/include/natalie_parser/node/hash_node.hpp +34 -0
- data/include/natalie_parser/node/hash_pattern_node.hpp +27 -0
- data/include/natalie_parser/node/identifier_node.hpp +123 -0
- data/include/natalie_parser/node/if_node.hpp +43 -0
- data/include/natalie_parser/node/infix_op_node.hpp +46 -0
- data/include/natalie_parser/node/interpolated_node.hpp +33 -0
- data/include/natalie_parser/node/interpolated_regexp_node.hpp +28 -0
- data/include/natalie_parser/node/interpolated_shell_node.hpp +22 -0
- data/include/natalie_parser/node/interpolated_string_node.hpp +31 -0
- data/include/natalie_parser/node/interpolated_symbol_key_node.hpp +18 -0
- data/include/natalie_parser/node/interpolated_symbol_node.hpp +28 -0
- data/include/natalie_parser/node/iter_node.hpp +45 -0
- data/include/natalie_parser/node/keyword_arg_node.hpp +25 -0
- data/include/natalie_parser/node/keyword_splat_node.hpp +38 -0
- data/include/natalie_parser/node/logical_and_node.hpp +40 -0
- data/include/natalie_parser/node/logical_or_node.hpp +40 -0
- data/include/natalie_parser/node/match_node.hpp +38 -0
- data/include/natalie_parser/node/module_node.hpp +32 -0
- data/include/natalie_parser/node/multiple_assignment_arg_node.hpp +32 -0
- data/include/natalie_parser/node/multiple_assignment_node.hpp +37 -0
- data/include/natalie_parser/node/next_node.hpp +37 -0
- data/include/natalie_parser/node/nil_node.hpp +23 -0
- data/include/natalie_parser/node/nil_sexp_node.hpp +23 -0
- data/include/natalie_parser/node/node.hpp +155 -0
- data/include/natalie_parser/node/node_with_args.hpp +47 -0
- data/include/natalie_parser/node/not_match_node.hpp +35 -0
- data/include/natalie_parser/node/not_node.hpp +37 -0
- data/include/natalie_parser/node/nth_ref_node.hpp +27 -0
- data/include/natalie_parser/node/op_assign_accessor_node.hpp +74 -0
- data/include/natalie_parser/node/op_assign_and_node.hpp +34 -0
- data/include/natalie_parser/node/op_assign_node.hpp +47 -0
- data/include/natalie_parser/node/op_assign_or_node.hpp +34 -0
- data/include/natalie_parser/node/pin_node.hpp +33 -0
- data/include/natalie_parser/node/range_node.hpp +52 -0
- data/include/natalie_parser/node/redo_node.hpp +20 -0
- data/include/natalie_parser/node/regexp_node.hpp +36 -0
- data/include/natalie_parser/node/retry_node.hpp +20 -0
- data/include/natalie_parser/node/return_node.hpp +34 -0
- data/include/natalie_parser/node/safe_call_node.hpp +31 -0
- data/include/natalie_parser/node/sclass_node.hpp +37 -0
- data/include/natalie_parser/node/self_node.hpp +23 -0
- data/include/natalie_parser/node/shadow_arg_node.hpp +40 -0
- data/include/natalie_parser/node/shell_node.hpp +32 -0
- data/include/natalie_parser/node/splat_node.hpp +39 -0
- data/include/natalie_parser/node/splat_value_node.hpp +32 -0
- data/include/natalie_parser/node/stabby_proc_node.hpp +29 -0
- data/include/natalie_parser/node/string_node.hpp +42 -0
- data/include/natalie_parser/node/super_node.hpp +44 -0
- data/include/natalie_parser/node/symbol_key_node.hpp +19 -0
- data/include/natalie_parser/node/symbol_node.hpp +30 -0
- data/include/natalie_parser/node/to_array_node.hpp +33 -0
- data/include/natalie_parser/node/true_node.hpp +23 -0
- data/include/natalie_parser/node/unary_op_node.hpp +41 -0
- data/include/natalie_parser/node/undef_node.hpp +31 -0
- data/include/natalie_parser/node/until_node.hpp +21 -0
- data/include/natalie_parser/node/while_node.hpp +52 -0
- data/include/natalie_parser/node/yield_node.hpp +29 -0
- data/include/natalie_parser/node.hpp +89 -0
- data/include/natalie_parser/parser.hpp +218 -0
- data/include/natalie_parser/token.hpp +842 -0
- data/include/tm/defer.hpp +34 -0
- data/include/tm/hashmap.hpp +826 -0
- data/include/tm/macros.hpp +16 -0
- data/include/tm/optional.hpp +223 -0
- data/include/tm/owned_ptr.hpp +186 -0
- data/include/tm/recursion_guard.hpp +156 -0
- data/include/tm/shared_ptr.hpp +259 -0
- data/include/tm/string.hpp +1447 -0
- data/include/tm/tests.hpp +78 -0
- data/include/tm/vector.hpp +796 -0
- data/lib/natalie_parser/sexp.rb +36 -0
- data/lib/natalie_parser/version.rb +5 -0
- data/lib/natalie_parser.rb +3 -0
- data/natalie_parser.gemspec +23 -0
- data/src/lexer/interpolated_string_lexer.cpp +88 -0
- data/src/lexer/regexp_lexer.cpp +95 -0
- data/src/lexer/word_array_lexer.cpp +134 -0
- data/src/lexer.cpp +1703 -0
- data/src/node/alias_node.cpp +11 -0
- data/src/node/assignment_node.cpp +33 -0
- data/src/node/begin_node.cpp +29 -0
- data/src/node/begin_rescue_node.cpp +33 -0
- data/src/node/class_node.cpp +22 -0
- data/src/node/interpolated_regexp_node.cpp +19 -0
- data/src/node/interpolated_shell_node.cpp +25 -0
- data/src/node/interpolated_string_node.cpp +111 -0
- data/src/node/interpolated_symbol_node.cpp +25 -0
- data/src/node/match_node.cpp +14 -0
- data/src/node/module_node.cpp +21 -0
- data/src/node/multiple_assignment_node.cpp +37 -0
- data/src/node/node.cpp +10 -0
- data/src/node/node_with_args.cpp +35 -0
- data/src/node/op_assign_node.cpp +36 -0
- data/src/node/string_node.cpp +33 -0
- data/src/parser.cpp +2972 -0
- data/src/token.cpp +27 -0
- metadata +186 -0
data/src/lexer.cpp
ADDED
|
@@ -0,0 +1,1703 @@
|
|
|
1
|
+
#include <errno.h>
|
|
2
|
+
#include <limits>
|
|
3
|
+
#include <stdlib.h>
|
|
4
|
+
|
|
5
|
+
#include "natalie_parser/lexer.hpp"
|
|
6
|
+
#include "natalie_parser/lexer/interpolated_string_lexer.hpp"
|
|
7
|
+
#include "natalie_parser/lexer/regexp_lexer.hpp"
|
|
8
|
+
#include "natalie_parser/lexer/word_array_lexer.hpp"
|
|
9
|
+
#include "natalie_parser/token.hpp"
|
|
10
|
+
|
|
11
|
+
namespace NatalieParser {
|
|
12
|
+
|
|
13
|
+
SharedPtr<Vector<Token>> Lexer::tokens() {
|
|
14
|
+
SharedPtr<Vector<Token>> tokens = new Vector<Token> {};
|
|
15
|
+
bool skip_next_newline = false;
|
|
16
|
+
Token last_doc_token;
|
|
17
|
+
for (;;) {
|
|
18
|
+
auto token = next_token();
|
|
19
|
+
if (token.is_comment())
|
|
20
|
+
continue;
|
|
21
|
+
|
|
22
|
+
if (token.is_doc()) {
|
|
23
|
+
if (last_doc_token)
|
|
24
|
+
last_doc_token.literal_string()->append(*token.literal_string());
|
|
25
|
+
else
|
|
26
|
+
last_doc_token = token;
|
|
27
|
+
continue;
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
// get rid of newlines after certain tokens
|
|
31
|
+
if (skip_next_newline) {
|
|
32
|
+
if (token.is_newline())
|
|
33
|
+
continue;
|
|
34
|
+
else
|
|
35
|
+
skip_next_newline = false;
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
// get rid of newlines before certain tokens
|
|
39
|
+
while (token.can_follow_collapsible_newline() && !tokens->is_empty() && tokens->last().is_newline())
|
|
40
|
+
tokens->pop();
|
|
41
|
+
|
|
42
|
+
if (last_doc_token) {
|
|
43
|
+
if (token.can_have_doc()) {
|
|
44
|
+
token.set_doc(last_doc_token.literal_string());
|
|
45
|
+
last_doc_token = {};
|
|
46
|
+
} else if (!token.is_end_of_line()) {
|
|
47
|
+
last_doc_token = {};
|
|
48
|
+
}
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
tokens->push(token);
|
|
52
|
+
|
|
53
|
+
m_last_token = token;
|
|
54
|
+
|
|
55
|
+
if (token.is_eof())
|
|
56
|
+
return tokens;
|
|
57
|
+
if (!token.is_valid())
|
|
58
|
+
return tokens;
|
|
59
|
+
if (token.can_precede_collapsible_newline())
|
|
60
|
+
skip_next_newline = true;
|
|
61
|
+
};
|
|
62
|
+
TM_UNREACHABLE();
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
Token Lexer::next_token() {
|
|
66
|
+
if (m_nested_lexer) {
|
|
67
|
+
auto token = m_nested_lexer->next_token();
|
|
68
|
+
if (token.is_eof()) {
|
|
69
|
+
if (m_nested_lexer->alters_parent_cursor_position()) {
|
|
70
|
+
m_index = m_nested_lexer->m_index;
|
|
71
|
+
m_cursor_line = m_nested_lexer->m_cursor_line;
|
|
72
|
+
m_cursor_column = m_nested_lexer->m_cursor_column;
|
|
73
|
+
}
|
|
74
|
+
delete m_nested_lexer;
|
|
75
|
+
m_nested_lexer = nullptr;
|
|
76
|
+
} else {
|
|
77
|
+
return token;
|
|
78
|
+
}
|
|
79
|
+
}
|
|
80
|
+
m_whitespace_precedes = skip_whitespace();
|
|
81
|
+
m_token_line = m_cursor_line;
|
|
82
|
+
m_token_column = m_cursor_column;
|
|
83
|
+
return build_next_token();
|
|
84
|
+
}
|
|
85
|
+
|
|
86
|
+
bool is_identifier_char(char c) {
|
|
87
|
+
if (!c) return false;
|
|
88
|
+
return isalnum(c) || c == '_' || (unsigned int)c >= 128;
|
|
89
|
+
}
|
|
90
|
+
|
|
91
|
+
bool is_message_suffix(char c) {
|
|
92
|
+
if (!c) return false;
|
|
93
|
+
return c == '?' || c == '!';
|
|
94
|
+
}
|
|
95
|
+
|
|
96
|
+
bool is_identifier_char_or_message_suffix(char c) {
|
|
97
|
+
return is_identifier_char(c) || is_message_suffix(c);
|
|
98
|
+
}
|
|
99
|
+
|
|
100
|
+
bool Lexer::match(size_t bytes, const char *compare) {
|
|
101
|
+
if (m_index + bytes > m_size)
|
|
102
|
+
return false;
|
|
103
|
+
if (strncmp(compare, m_input->c_str() + m_index, bytes) == 0) {
|
|
104
|
+
if (m_index + bytes < m_size && is_identifier_char_or_message_suffix(m_input->at(m_index + bytes)))
|
|
105
|
+
return false;
|
|
106
|
+
advance(bytes);
|
|
107
|
+
return true;
|
|
108
|
+
}
|
|
109
|
+
return false;
|
|
110
|
+
}
|
|
111
|
+
|
|
112
|
+
void Lexer::advance() {
|
|
113
|
+
auto c = current_char();
|
|
114
|
+
m_index++;
|
|
115
|
+
if (c == '\n') {
|
|
116
|
+
m_cursor_line++;
|
|
117
|
+
m_cursor_column = 0;
|
|
118
|
+
} else {
|
|
119
|
+
m_cursor_column++;
|
|
120
|
+
}
|
|
121
|
+
}
|
|
122
|
+
|
|
123
|
+
void Lexer::advance(size_t bytes) {
|
|
124
|
+
for (size_t i = 0; i < bytes; i++) {
|
|
125
|
+
advance();
|
|
126
|
+
}
|
|
127
|
+
}
|
|
128
|
+
|
|
129
|
+
// NOTE: this does not work across lines
|
|
130
|
+
void Lexer::rewind(size_t bytes) {
|
|
131
|
+
current_char();
|
|
132
|
+
m_cursor_column -= bytes;
|
|
133
|
+
m_index -= bytes;
|
|
134
|
+
}
|
|
135
|
+
|
|
136
|
+
bool Lexer::skip_whitespace() {
|
|
137
|
+
bool whitespace_found = false;
|
|
138
|
+
char c = current_char();
|
|
139
|
+
while (c == ' ' || c == '\t' || (c == '\\' && peek() == '\n')) {
|
|
140
|
+
whitespace_found = true;
|
|
141
|
+
advance();
|
|
142
|
+
if (c == '\\') advance();
|
|
143
|
+
c = current_char();
|
|
144
|
+
}
|
|
145
|
+
return whitespace_found;
|
|
146
|
+
}
|
|
147
|
+
|
|
148
|
+
Token Lexer::build_next_token() {
|
|
149
|
+
if (m_index >= m_size)
|
|
150
|
+
return Token { Token::Type::Eof, m_file, m_cursor_line, m_cursor_column };
|
|
151
|
+
if (m_start_char && current_char() == m_start_char) {
|
|
152
|
+
m_pair_depth++;
|
|
153
|
+
} else if (m_stop_char && current_char() == m_stop_char) {
|
|
154
|
+
if (m_pair_depth == 0)
|
|
155
|
+
return Token { Token::Type::Eof, m_file, m_cursor_line, m_cursor_column };
|
|
156
|
+
m_pair_depth--;
|
|
157
|
+
} else if (m_index == 0 && current_char() == '\xEF') {
|
|
158
|
+
// UTF-8 BOM
|
|
159
|
+
advance(); // \xEF
|
|
160
|
+
if (current_char() == '\xBB') advance();
|
|
161
|
+
if (current_char() == '\xBF') advance();
|
|
162
|
+
}
|
|
163
|
+
Token token;
|
|
164
|
+
switch (current_char()) {
|
|
165
|
+
case '=': {
|
|
166
|
+
advance();
|
|
167
|
+
switch (current_char()) {
|
|
168
|
+
case '=': {
|
|
169
|
+
advance();
|
|
170
|
+
switch (current_char()) {
|
|
171
|
+
case '=': {
|
|
172
|
+
advance();
|
|
173
|
+
return Token { Token::Type::EqualEqualEqual, m_file, m_token_line, m_token_column };
|
|
174
|
+
}
|
|
175
|
+
default:
|
|
176
|
+
return Token { Token::Type::EqualEqual, m_file, m_token_line, m_token_column };
|
|
177
|
+
}
|
|
178
|
+
}
|
|
179
|
+
case '>':
|
|
180
|
+
advance();
|
|
181
|
+
return Token { Token::Type::HashRocket, m_file, m_token_line, m_token_column };
|
|
182
|
+
case '~':
|
|
183
|
+
advance();
|
|
184
|
+
return Token { Token::Type::Match, m_file, m_token_line, m_token_column };
|
|
185
|
+
default:
|
|
186
|
+
if (m_cursor_column == 1 && match(5, "begin")) {
|
|
187
|
+
SharedPtr<String> doc = new String("=begin");
|
|
188
|
+
char c = current_char();
|
|
189
|
+
do {
|
|
190
|
+
doc->append_char(c);
|
|
191
|
+
c = next();
|
|
192
|
+
} while (c && !(m_cursor_column == 0 && match(4, "=end")));
|
|
193
|
+
doc->append("=end\n");
|
|
194
|
+
return Token { Token::Type::Doc, doc, m_file, m_token_line, m_token_column };
|
|
195
|
+
}
|
|
196
|
+
auto token = Token { Token::Type::Equal, m_file, m_token_line, m_token_column };
|
|
197
|
+
token.set_whitespace_precedes(m_whitespace_precedes);
|
|
198
|
+
return token;
|
|
199
|
+
}
|
|
200
|
+
}
|
|
201
|
+
case '+':
|
|
202
|
+
advance();
|
|
203
|
+
switch (current_char()) {
|
|
204
|
+
case '=':
|
|
205
|
+
advance();
|
|
206
|
+
return Token { Token::Type::PlusEqual, m_file, m_token_line, m_token_column };
|
|
207
|
+
case '@':
|
|
208
|
+
if (m_last_token.is_def_keyword() || m_last_token.is_dot()) {
|
|
209
|
+
advance();
|
|
210
|
+
SharedPtr<String> lit = new String("+@");
|
|
211
|
+
return Token { Token::Type::BareName, lit, m_file, m_token_line, m_token_column };
|
|
212
|
+
} else {
|
|
213
|
+
return Token { Token::Type::Plus, m_file, m_token_line, m_token_column };
|
|
214
|
+
}
|
|
215
|
+
default:
|
|
216
|
+
return Token { Token::Type::Plus, m_file, m_token_line, m_token_column };
|
|
217
|
+
}
|
|
218
|
+
case '-':
|
|
219
|
+
advance();
|
|
220
|
+
switch (current_char()) {
|
|
221
|
+
case '>':
|
|
222
|
+
advance();
|
|
223
|
+
return Token { Token::Type::Arrow, m_file, m_token_line, m_token_column };
|
|
224
|
+
case '=':
|
|
225
|
+
advance();
|
|
226
|
+
return Token { Token::Type::MinusEqual, m_file, m_token_line, m_token_column };
|
|
227
|
+
case '@':
|
|
228
|
+
if (m_last_token.is_def_keyword() || m_last_token.is_dot()) {
|
|
229
|
+
advance();
|
|
230
|
+
SharedPtr<String> lit = new String("-@");
|
|
231
|
+
return Token { Token::Type::BareName, lit, m_file, m_token_line, m_token_column };
|
|
232
|
+
} else {
|
|
233
|
+
return Token { Token::Type::Minus, m_file, m_token_line, m_token_column };
|
|
234
|
+
}
|
|
235
|
+
default:
|
|
236
|
+
return Token { Token::Type::Minus, m_file, m_token_line, m_token_column };
|
|
237
|
+
}
|
|
238
|
+
case '*':
|
|
239
|
+
advance();
|
|
240
|
+
switch (current_char()) {
|
|
241
|
+
case '*':
|
|
242
|
+
advance();
|
|
243
|
+
switch (current_char()) {
|
|
244
|
+
case '=':
|
|
245
|
+
advance();
|
|
246
|
+
return Token { Token::Type::StarStarEqual, m_file, m_token_line, m_token_column };
|
|
247
|
+
default:
|
|
248
|
+
return Token { Token::Type::StarStar, m_file, m_token_line, m_token_column };
|
|
249
|
+
}
|
|
250
|
+
case '=':
|
|
251
|
+
advance();
|
|
252
|
+
return Token { Token::Type::StarEqual, m_file, m_token_line, m_token_column };
|
|
253
|
+
default:
|
|
254
|
+
return Token { Token::Type::Star, m_file, m_token_line, m_token_column };
|
|
255
|
+
}
|
|
256
|
+
case '/': {
|
|
257
|
+
advance();
|
|
258
|
+
if (!m_last_token)
|
|
259
|
+
return consume_regexp('/', '/');
|
|
260
|
+
switch (m_last_token.type()) {
|
|
261
|
+
case Token::Type::Comma:
|
|
262
|
+
case Token::Type::Doc:
|
|
263
|
+
case Token::Type::LBracket:
|
|
264
|
+
case Token::Type::LCurlyBrace:
|
|
265
|
+
case Token::Type::LParen:
|
|
266
|
+
case Token::Type::Match:
|
|
267
|
+
case Token::Type::Newline:
|
|
268
|
+
return consume_regexp('/', '/');
|
|
269
|
+
case Token::Type::DefKeyword:
|
|
270
|
+
return Token { Token::Type::Slash, m_file, m_token_line, m_token_column };
|
|
271
|
+
default: {
|
|
272
|
+
switch (current_char()) {
|
|
273
|
+
case ' ':
|
|
274
|
+
return Token { Token::Type::Slash, m_file, m_token_line, m_token_column };
|
|
275
|
+
case '=':
|
|
276
|
+
advance();
|
|
277
|
+
return Token { Token::Type::SlashEqual, m_file, m_token_line, m_token_column };
|
|
278
|
+
default:
|
|
279
|
+
if (m_whitespace_precedes) {
|
|
280
|
+
return consume_regexp('/', '/');
|
|
281
|
+
} else {
|
|
282
|
+
return Token { Token::Type::Slash, m_file, m_token_line, m_token_column };
|
|
283
|
+
}
|
|
284
|
+
}
|
|
285
|
+
}
|
|
286
|
+
}
|
|
287
|
+
}
|
|
288
|
+
case '%':
|
|
289
|
+
advance();
|
|
290
|
+
switch (current_char()) {
|
|
291
|
+
case '=':
|
|
292
|
+
advance();
|
|
293
|
+
return Token { Token::Type::PercentEqual, m_file, m_token_line, m_token_column };
|
|
294
|
+
case 'q':
|
|
295
|
+
switch (peek()) {
|
|
296
|
+
case '[':
|
|
297
|
+
advance(2);
|
|
298
|
+
return consume_single_quoted_string('[', ']');
|
|
299
|
+
case '{':
|
|
300
|
+
advance(2);
|
|
301
|
+
return consume_single_quoted_string('{', '}');
|
|
302
|
+
case '<':
|
|
303
|
+
advance(2);
|
|
304
|
+
return consume_single_quoted_string('<', '>');
|
|
305
|
+
case '(':
|
|
306
|
+
advance(2);
|
|
307
|
+
return consume_single_quoted_string('(', ')');
|
|
308
|
+
default: {
|
|
309
|
+
char c = peek();
|
|
310
|
+
if (char_can_be_string_or_regexp_delimiter(c)) {
|
|
311
|
+
advance(2);
|
|
312
|
+
return consume_single_quoted_string(c, c);
|
|
313
|
+
} else {
|
|
314
|
+
return Token { Token::Type::Percent, m_file, m_token_line, m_token_column };
|
|
315
|
+
}
|
|
316
|
+
}
|
|
317
|
+
}
|
|
318
|
+
case 'Q':
|
|
319
|
+
switch (peek()) {
|
|
320
|
+
case '[':
|
|
321
|
+
advance(2);
|
|
322
|
+
return consume_double_quoted_string('[', ']');
|
|
323
|
+
case '{':
|
|
324
|
+
advance(2);
|
|
325
|
+
return consume_double_quoted_string('{', '}');
|
|
326
|
+
case '<':
|
|
327
|
+
advance(2);
|
|
328
|
+
return consume_double_quoted_string('<', '>');
|
|
329
|
+
case '(':
|
|
330
|
+
advance(2);
|
|
331
|
+
return consume_double_quoted_string('(', ')');
|
|
332
|
+
default: {
|
|
333
|
+
char c = peek();
|
|
334
|
+
if (char_can_be_string_or_regexp_delimiter(c)) {
|
|
335
|
+
advance(2);
|
|
336
|
+
return consume_double_quoted_string(c, c);
|
|
337
|
+
} else {
|
|
338
|
+
return Token { Token::Type::Percent, m_file, m_token_line, m_token_column };
|
|
339
|
+
}
|
|
340
|
+
}
|
|
341
|
+
}
|
|
342
|
+
case 'r':
|
|
343
|
+
switch (peek()) {
|
|
344
|
+
case '[':
|
|
345
|
+
advance(2);
|
|
346
|
+
return consume_regexp('[', ']');
|
|
347
|
+
case '{':
|
|
348
|
+
advance(2);
|
|
349
|
+
return consume_regexp('{', '}');
|
|
350
|
+
case '(':
|
|
351
|
+
advance(2);
|
|
352
|
+
return consume_regexp('(', ')');
|
|
353
|
+
case '<':
|
|
354
|
+
advance(2);
|
|
355
|
+
return consume_regexp('<', '>');
|
|
356
|
+
default: {
|
|
357
|
+
char c = peek();
|
|
358
|
+
if (char_can_be_string_or_regexp_delimiter(c)) {
|
|
359
|
+
advance(2);
|
|
360
|
+
return consume_regexp(c, c);
|
|
361
|
+
} else {
|
|
362
|
+
return Token { Token::Type::Percent, m_file, m_token_line, m_token_column };
|
|
363
|
+
}
|
|
364
|
+
}
|
|
365
|
+
}
|
|
366
|
+
case 'x':
|
|
367
|
+
switch (peek()) {
|
|
368
|
+
case '/': {
|
|
369
|
+
advance(2);
|
|
370
|
+
return consume_double_quoted_string('/', '/', Token::Type::InterpolatedShellBegin, Token::Type::InterpolatedShellEnd);
|
|
371
|
+
}
|
|
372
|
+
case '[': {
|
|
373
|
+
advance(2);
|
|
374
|
+
return consume_double_quoted_string('[', ']', Token::Type::InterpolatedShellBegin, Token::Type::InterpolatedShellEnd);
|
|
375
|
+
}
|
|
376
|
+
case '{': {
|
|
377
|
+
advance(2);
|
|
378
|
+
return consume_double_quoted_string('{', '}', Token::Type::InterpolatedShellBegin, Token::Type::InterpolatedShellEnd);
|
|
379
|
+
}
|
|
380
|
+
case '(': {
|
|
381
|
+
advance(2);
|
|
382
|
+
return consume_double_quoted_string('(', ')', Token::Type::InterpolatedShellBegin, Token::Type::InterpolatedShellEnd);
|
|
383
|
+
}
|
|
384
|
+
default:
|
|
385
|
+
return Token { Token::Type::Percent, m_file, m_token_line, m_token_column };
|
|
386
|
+
}
|
|
387
|
+
case 'w':
|
|
388
|
+
switch (peek()) {
|
|
389
|
+
case '/':
|
|
390
|
+
case '|': {
|
|
391
|
+
char c = next();
|
|
392
|
+
advance();
|
|
393
|
+
return consume_quoted_array_without_interpolation(c, c, Token::Type::PercentLowerW);
|
|
394
|
+
}
|
|
395
|
+
case '[':
|
|
396
|
+
advance(2);
|
|
397
|
+
return consume_quoted_array_without_interpolation('[', ']', Token::Type::PercentLowerW);
|
|
398
|
+
case '{':
|
|
399
|
+
advance(2);
|
|
400
|
+
return consume_quoted_array_without_interpolation('{', '}', Token::Type::PercentLowerW);
|
|
401
|
+
case '<':
|
|
402
|
+
advance(2);
|
|
403
|
+
return consume_quoted_array_without_interpolation('<', '>', Token::Type::PercentLowerW);
|
|
404
|
+
case '(':
|
|
405
|
+
advance(2);
|
|
406
|
+
return consume_quoted_array_without_interpolation('(', ')', Token::Type::PercentLowerW);
|
|
407
|
+
default:
|
|
408
|
+
return Token { Token::Type::Percent, m_file, m_token_line, m_token_column };
|
|
409
|
+
}
|
|
410
|
+
case 'W':
|
|
411
|
+
switch (peek()) {
|
|
412
|
+
case '/':
|
|
413
|
+
case '|': {
|
|
414
|
+
char c = next();
|
|
415
|
+
advance();
|
|
416
|
+
return consume_quoted_array_with_interpolation(0, c, Token::Type::PercentUpperW);
|
|
417
|
+
}
|
|
418
|
+
case '[':
|
|
419
|
+
advance(2);
|
|
420
|
+
return consume_quoted_array_with_interpolation('[', ']', Token::Type::PercentUpperW);
|
|
421
|
+
case '{':
|
|
422
|
+
advance(2);
|
|
423
|
+
return consume_quoted_array_with_interpolation('{', '}', Token::Type::PercentUpperW);
|
|
424
|
+
case '<':
|
|
425
|
+
advance(2);
|
|
426
|
+
return consume_quoted_array_with_interpolation('<', '>', Token::Type::PercentUpperW);
|
|
427
|
+
case '(':
|
|
428
|
+
advance(2);
|
|
429
|
+
return consume_quoted_array_with_interpolation('(', ')', Token::Type::PercentUpperW);
|
|
430
|
+
default:
|
|
431
|
+
return Token { Token::Type::Percent, m_file, m_token_line, m_token_column };
|
|
432
|
+
}
|
|
433
|
+
case 'i':
|
|
434
|
+
switch (peek()) {
|
|
435
|
+
case '|':
|
|
436
|
+
case '/': {
|
|
437
|
+
char c = next();
|
|
438
|
+
advance();
|
|
439
|
+
return consume_quoted_array_without_interpolation(c, c, Token::Type::PercentLowerI);
|
|
440
|
+
}
|
|
441
|
+
case '[':
|
|
442
|
+
advance(2);
|
|
443
|
+
return consume_quoted_array_without_interpolation('[', ']', Token::Type::PercentLowerI);
|
|
444
|
+
case '{':
|
|
445
|
+
advance(2);
|
|
446
|
+
return consume_quoted_array_without_interpolation('{', '}', Token::Type::PercentLowerI);
|
|
447
|
+
case '<':
|
|
448
|
+
advance(2);
|
|
449
|
+
return consume_quoted_array_without_interpolation('<', '>', Token::Type::PercentLowerI);
|
|
450
|
+
case '(':
|
|
451
|
+
advance(2);
|
|
452
|
+
return consume_quoted_array_without_interpolation('(', ')', Token::Type::PercentLowerI);
|
|
453
|
+
default:
|
|
454
|
+
return Token { Token::Type::Percent, m_file, m_token_line, m_token_column };
|
|
455
|
+
}
|
|
456
|
+
case 'I':
|
|
457
|
+
switch (peek()) {
|
|
458
|
+
case '|':
|
|
459
|
+
case '/': {
|
|
460
|
+
char c = next();
|
|
461
|
+
advance();
|
|
462
|
+
return consume_quoted_array_with_interpolation(0, c, Token::Type::PercentUpperI);
|
|
463
|
+
}
|
|
464
|
+
case '[':
|
|
465
|
+
advance(2);
|
|
466
|
+
return consume_quoted_array_with_interpolation('[', ']', Token::Type::PercentUpperI);
|
|
467
|
+
case '{':
|
|
468
|
+
advance(2);
|
|
469
|
+
return consume_quoted_array_with_interpolation('{', '}', Token::Type::PercentUpperI);
|
|
470
|
+
case '<':
|
|
471
|
+
advance(2);
|
|
472
|
+
return consume_quoted_array_with_interpolation('<', '>', Token::Type::PercentUpperI);
|
|
473
|
+
case '(':
|
|
474
|
+
advance(2);
|
|
475
|
+
return consume_quoted_array_with_interpolation('(', ')', Token::Type::PercentUpperI);
|
|
476
|
+
default:
|
|
477
|
+
return Token { Token::Type::Percent, m_file, m_token_line, m_token_column };
|
|
478
|
+
}
|
|
479
|
+
case '[':
|
|
480
|
+
advance();
|
|
481
|
+
return consume_double_quoted_string('[', ']');
|
|
482
|
+
case '{':
|
|
483
|
+
advance();
|
|
484
|
+
return consume_double_quoted_string('{', '}');
|
|
485
|
+
case '<':
|
|
486
|
+
advance();
|
|
487
|
+
return consume_double_quoted_string('<', '>');
|
|
488
|
+
case '(':
|
|
489
|
+
if (m_last_token.type() == Token::Type::DefKeyword || m_last_token.type() == Token::Type::Dot) {
|
|
490
|
+
// It's a trap! This looks like a %(string) but it's a method def/call!
|
|
491
|
+
break;
|
|
492
|
+
}
|
|
493
|
+
advance();
|
|
494
|
+
return consume_double_quoted_string('(', ')');
|
|
495
|
+
default: {
|
|
496
|
+
auto c = current_char();
|
|
497
|
+
if (char_can_be_string_or_regexp_delimiter(c)) {
|
|
498
|
+
advance();
|
|
499
|
+
return consume_double_quoted_string(c, c);
|
|
500
|
+
}
|
|
501
|
+
break;
|
|
502
|
+
}
|
|
503
|
+
}
|
|
504
|
+
return Token { Token::Type::Percent, m_file, m_token_line, m_token_column };
|
|
505
|
+
case '!':
|
|
506
|
+
advance();
|
|
507
|
+
switch (current_char()) {
|
|
508
|
+
case '=':
|
|
509
|
+
advance();
|
|
510
|
+
return Token { Token::Type::NotEqual, m_file, m_token_line, m_token_column };
|
|
511
|
+
case '~':
|
|
512
|
+
advance();
|
|
513
|
+
return Token { Token::Type::NotMatch, m_file, m_token_line, m_token_column };
|
|
514
|
+
case '@':
|
|
515
|
+
if (m_last_token.is_def_keyword() || m_last_token.is_dot()) {
|
|
516
|
+
advance();
|
|
517
|
+
SharedPtr<String> lit = new String("!@");
|
|
518
|
+
return Token { Token::Type::BareName, lit, m_file, m_token_line, m_token_column };
|
|
519
|
+
} else {
|
|
520
|
+
return Token { Token::Type::Not, m_file, m_token_line, m_token_column };
|
|
521
|
+
}
|
|
522
|
+
default:
|
|
523
|
+
return Token { Token::Type::Not, m_file, m_token_line, m_token_column };
|
|
524
|
+
}
|
|
525
|
+
case '<':
|
|
526
|
+
advance();
|
|
527
|
+
switch (current_char()) {
|
|
528
|
+
case '<': {
|
|
529
|
+
advance();
|
|
530
|
+
switch (current_char()) {
|
|
531
|
+
case '~':
|
|
532
|
+
case '-': {
|
|
533
|
+
auto next = peek();
|
|
534
|
+
if (isalpha(next))
|
|
535
|
+
return consume_heredoc();
|
|
536
|
+
switch (next) {
|
|
537
|
+
case '_':
|
|
538
|
+
case '"':
|
|
539
|
+
case '`':
|
|
540
|
+
case '\'':
|
|
541
|
+
return consume_heredoc();
|
|
542
|
+
default:
|
|
543
|
+
return Token { Token::Type::LeftShift, m_file, m_token_line, m_token_column };
|
|
544
|
+
}
|
|
545
|
+
}
|
|
546
|
+
case '=':
|
|
547
|
+
advance();
|
|
548
|
+
return Token { Token::Type::LeftShiftEqual, m_file, m_token_line, m_token_column };
|
|
549
|
+
default:
|
|
550
|
+
if (!m_whitespace_precedes) {
|
|
551
|
+
if (token_is_first_on_line())
|
|
552
|
+
return consume_heredoc();
|
|
553
|
+
else if (m_last_token.can_precede_heredoc_that_looks_like_left_shift_operator())
|
|
554
|
+
return consume_heredoc();
|
|
555
|
+
else
|
|
556
|
+
return Token { Token::Type::LeftShift, m_file, m_token_line, m_token_column };
|
|
557
|
+
}
|
|
558
|
+
if (isalpha(current_char()))
|
|
559
|
+
return consume_heredoc();
|
|
560
|
+
switch (current_char()) {
|
|
561
|
+
case '_':
|
|
562
|
+
case '"':
|
|
563
|
+
case '`':
|
|
564
|
+
case '\'':
|
|
565
|
+
return consume_heredoc();
|
|
566
|
+
default:
|
|
567
|
+
return Token { Token::Type::LeftShift, m_file, m_token_line, m_token_column };
|
|
568
|
+
}
|
|
569
|
+
}
|
|
570
|
+
}
|
|
571
|
+
case '=':
|
|
572
|
+
advance();
|
|
573
|
+
switch (current_char()) {
|
|
574
|
+
case '>':
|
|
575
|
+
advance();
|
|
576
|
+
return Token { Token::Type::Comparison, m_file, m_token_line, m_token_column };
|
|
577
|
+
default:
|
|
578
|
+
return Token { Token::Type::LessThanOrEqual, m_file, m_token_line, m_token_column };
|
|
579
|
+
}
|
|
580
|
+
default:
|
|
581
|
+
return Token { Token::Type::LessThan, m_file, m_token_line, m_token_column };
|
|
582
|
+
}
|
|
583
|
+
case '>':
|
|
584
|
+
advance();
|
|
585
|
+
switch (current_char()) {
|
|
586
|
+
case '>':
|
|
587
|
+
advance();
|
|
588
|
+
switch (current_char()) {
|
|
589
|
+
case '=':
|
|
590
|
+
advance();
|
|
591
|
+
return Token { Token::Type::RightShiftEqual, m_file, m_token_line, m_token_column };
|
|
592
|
+
default:
|
|
593
|
+
return Token { Token::Type::RightShift, m_file, m_token_line, m_token_column };
|
|
594
|
+
}
|
|
595
|
+
case '=':
|
|
596
|
+
advance();
|
|
597
|
+
return Token { Token::Type::GreaterThanOrEqual, m_file, m_token_line, m_token_column };
|
|
598
|
+
default:
|
|
599
|
+
return Token { Token::Type::GreaterThan, m_file, m_token_line, m_token_column };
|
|
600
|
+
}
|
|
601
|
+
case '&':
|
|
602
|
+
advance();
|
|
603
|
+
switch (current_char()) {
|
|
604
|
+
case '&':
|
|
605
|
+
advance();
|
|
606
|
+
switch (current_char()) {
|
|
607
|
+
case '=':
|
|
608
|
+
advance();
|
|
609
|
+
return Token { Token::Type::AmpersandAmpersandEqual, m_file, m_token_line, m_token_column };
|
|
610
|
+
default:
|
|
611
|
+
return Token { Token::Type::AmpersandAmpersand, m_file, m_token_line, m_token_column };
|
|
612
|
+
}
|
|
613
|
+
case '=':
|
|
614
|
+
advance();
|
|
615
|
+
return Token { Token::Type::AmpersandEqual, m_file, m_token_line, m_token_column };
|
|
616
|
+
case '.':
|
|
617
|
+
advance();
|
|
618
|
+
return Token { Token::Type::SafeNavigation, m_file, m_token_line, m_token_column };
|
|
619
|
+
default:
|
|
620
|
+
return Token { Token::Type::Ampersand, m_file, m_token_line, m_token_column };
|
|
621
|
+
}
|
|
622
|
+
case '|':
|
|
623
|
+
advance();
|
|
624
|
+
switch (current_char()) {
|
|
625
|
+
case '|':
|
|
626
|
+
advance();
|
|
627
|
+
switch (current_char()) {
|
|
628
|
+
case '=':
|
|
629
|
+
advance();
|
|
630
|
+
return Token { Token::Type::PipePipeEqual, m_file, m_token_line, m_token_column };
|
|
631
|
+
default:
|
|
632
|
+
return Token { Token::Type::PipePipe, m_file, m_token_line, m_token_column };
|
|
633
|
+
}
|
|
634
|
+
case '=':
|
|
635
|
+
advance();
|
|
636
|
+
return Token { Token::Type::PipeEqual, m_file, m_token_line, m_token_column };
|
|
637
|
+
default:
|
|
638
|
+
return Token { Token::Type::Pipe, m_file, m_token_line, m_token_column };
|
|
639
|
+
}
|
|
640
|
+
case '^':
|
|
641
|
+
advance();
|
|
642
|
+
switch (current_char()) {
|
|
643
|
+
case '=':
|
|
644
|
+
advance();
|
|
645
|
+
return Token { Token::Type::CaretEqual, m_file, m_token_line, m_token_column };
|
|
646
|
+
default:
|
|
647
|
+
return Token { Token::Type::Caret, m_file, m_token_line, m_token_column };
|
|
648
|
+
}
|
|
649
|
+
case '~':
|
|
650
|
+
advance();
|
|
651
|
+
switch (current_char()) {
|
|
652
|
+
case '@':
|
|
653
|
+
if (m_last_token.is_def_keyword() || m_last_token.is_dot()) {
|
|
654
|
+
advance();
|
|
655
|
+
SharedPtr<String> lit = new String("~@");
|
|
656
|
+
return Token { Token::Type::BareName, lit, m_file, m_token_line, m_token_column };
|
|
657
|
+
} else {
|
|
658
|
+
return Token { Token::Type::Tilde, m_file, m_token_line, m_token_column };
|
|
659
|
+
}
|
|
660
|
+
default:
|
|
661
|
+
return Token { Token::Type::Tilde, m_file, m_token_line, m_token_column };
|
|
662
|
+
}
|
|
663
|
+
case '?': {
|
|
664
|
+
auto c = next();
|
|
665
|
+
if (isspace(c)) {
|
|
666
|
+
m_open_ternary = true;
|
|
667
|
+
return Token { Token::Type::TernaryQuestion, m_file, m_token_line, m_token_column };
|
|
668
|
+
} else {
|
|
669
|
+
advance();
|
|
670
|
+
if (c == '\\') {
|
|
671
|
+
auto buf = new String();
|
|
672
|
+
auto result = consume_escaped_byte(*buf);
|
|
673
|
+
if (!result.first)
|
|
674
|
+
return Token { result.second, current_char(), m_file, m_token_line, m_token_column };
|
|
675
|
+
return Token { Token::Type::String, buf, m_file, m_token_line, m_token_column };
|
|
676
|
+
} else {
|
|
677
|
+
return Token { Token::Type::String, c, m_file, m_token_line, m_token_column };
|
|
678
|
+
}
|
|
679
|
+
}
|
|
680
|
+
}
|
|
681
|
+
case ':': {
|
|
682
|
+
auto c = next();
|
|
683
|
+
if (c == ':') {
|
|
684
|
+
advance();
|
|
685
|
+
return Token { Token::Type::ConstantResolution, m_file, m_token_line, m_token_column };
|
|
686
|
+
} else if (m_last_token.type() == Token::Type::InterpolatedStringEnd && !m_whitespace_precedes && !m_open_ternary) {
|
|
687
|
+
return Token { Token::Type::InterpolatedStringSymbolKey, m_file, m_token_line, m_token_column };
|
|
688
|
+
} else if (c == '"') {
|
|
689
|
+
advance();
|
|
690
|
+
return consume_double_quoted_string('"', '"', Token::Type::InterpolatedSymbolBegin, Token::Type::InterpolatedSymbolEnd);
|
|
691
|
+
} else if (c == '\'') {
|
|
692
|
+
advance();
|
|
693
|
+
auto string = consume_single_quoted_string('\'', '\'');
|
|
694
|
+
return Token { Token::Type::Symbol, string.literal(), m_file, m_token_line, m_token_column };
|
|
695
|
+
} else if (isspace(c)) {
|
|
696
|
+
m_open_ternary = false;
|
|
697
|
+
auto token = Token { Token::Type::TernaryColon, m_file, m_token_line, m_token_column };
|
|
698
|
+
token.set_whitespace_precedes(m_whitespace_precedes);
|
|
699
|
+
return token;
|
|
700
|
+
} else {
|
|
701
|
+
return consume_symbol();
|
|
702
|
+
}
|
|
703
|
+
}
|
|
704
|
+
case '@':
|
|
705
|
+
switch (peek()) {
|
|
706
|
+
case '@': {
|
|
707
|
+
// kinda janky, but we gotta trick consume_word and then prepend the '@' back on the front
|
|
708
|
+
advance();
|
|
709
|
+
auto token = consume_word(Token::Type::ClassVariable);
|
|
710
|
+
token.set_literal(String::format("@{}", token.literal()));
|
|
711
|
+
return token;
|
|
712
|
+
}
|
|
713
|
+
default:
|
|
714
|
+
return consume_word(Token::Type::InstanceVariable);
|
|
715
|
+
}
|
|
716
|
+
case '$':
|
|
717
|
+
if (peek() == '&') {
|
|
718
|
+
advance(2);
|
|
719
|
+
return Token { Token::Type::BackRef, '&', m_file, m_token_line, m_token_column };
|
|
720
|
+
} else if (peek() >= '1' && peek() <= '9') {
|
|
721
|
+
return consume_nth_ref();
|
|
722
|
+
} else {
|
|
723
|
+
return consume_global_variable();
|
|
724
|
+
}
|
|
725
|
+
case '.':
|
|
726
|
+
advance();
|
|
727
|
+
switch (current_char()) {
|
|
728
|
+
case '.':
|
|
729
|
+
advance();
|
|
730
|
+
switch (current_char()) {
|
|
731
|
+
case '.':
|
|
732
|
+
advance();
|
|
733
|
+
return Token { Token::Type::DotDotDot, m_file, m_token_line, m_token_column };
|
|
734
|
+
default:
|
|
735
|
+
return Token { Token::Type::DotDot, m_file, m_token_line, m_token_column };
|
|
736
|
+
}
|
|
737
|
+
default:
|
|
738
|
+
return Token { Token::Type::Dot, m_file, m_token_line, m_token_column };
|
|
739
|
+
}
|
|
740
|
+
case '{':
|
|
741
|
+
advance();
|
|
742
|
+
return Token { Token::Type::LCurlyBrace, m_file, m_token_line, m_token_column };
|
|
743
|
+
case '[': {
|
|
744
|
+
advance();
|
|
745
|
+
switch (current_char()) {
|
|
746
|
+
case ']':
|
|
747
|
+
advance();
|
|
748
|
+
switch (current_char()) {
|
|
749
|
+
case '=':
|
|
750
|
+
advance();
|
|
751
|
+
return Token { Token::Type::LBracketRBracketEqual, m_file, m_token_line, m_token_column };
|
|
752
|
+
default:
|
|
753
|
+
auto token = Token { Token::Type::LBracketRBracket, m_file, m_token_line, m_token_column };
|
|
754
|
+
token.set_whitespace_precedes(m_whitespace_precedes);
|
|
755
|
+
return token;
|
|
756
|
+
}
|
|
757
|
+
default:
|
|
758
|
+
auto token = Token { Token::Type::LBracket, m_file, m_token_line, m_token_column };
|
|
759
|
+
token.set_whitespace_precedes(m_whitespace_precedes);
|
|
760
|
+
return token;
|
|
761
|
+
}
|
|
762
|
+
}
|
|
763
|
+
case '(': {
|
|
764
|
+
advance();
|
|
765
|
+
auto token = Token { Token::Type::LParen, m_file, m_token_line, m_token_column };
|
|
766
|
+
token.set_whitespace_precedes(m_whitespace_precedes);
|
|
767
|
+
return token;
|
|
768
|
+
}
|
|
769
|
+
case '}':
|
|
770
|
+
advance();
|
|
771
|
+
return Token { Token::Type::RCurlyBrace, m_file, m_token_line, m_token_column };
|
|
772
|
+
case ']':
|
|
773
|
+
advance();
|
|
774
|
+
return Token { Token::Type::RBracket, m_file, m_token_line, m_token_column };
|
|
775
|
+
case ')':
|
|
776
|
+
advance();
|
|
777
|
+
return Token { Token::Type::RParen, m_file, m_token_line, m_token_column };
|
|
778
|
+
case '\n': {
|
|
779
|
+
advance();
|
|
780
|
+
auto token = Token { Token::Type::Newline, m_file, m_token_line, m_token_column };
|
|
781
|
+
if (!m_heredoc_stack.is_empty()) {
|
|
782
|
+
auto new_index = m_heredoc_stack.last();
|
|
783
|
+
while (m_index < new_index)
|
|
784
|
+
advance();
|
|
785
|
+
m_heredoc_stack.clear();
|
|
786
|
+
}
|
|
787
|
+
return token;
|
|
788
|
+
}
|
|
789
|
+
case ';':
|
|
790
|
+
advance();
|
|
791
|
+
return Token { Token::Type::Semicolon, m_file, m_token_line, m_token_column };
|
|
792
|
+
case ',':
|
|
793
|
+
advance();
|
|
794
|
+
return Token { Token::Type::Comma, m_file, m_token_line, m_token_column };
|
|
795
|
+
case '"':
|
|
796
|
+
advance();
|
|
797
|
+
return consume_double_quoted_string('"', '"');
|
|
798
|
+
case '\'':
|
|
799
|
+
advance();
|
|
800
|
+
return consume_single_quoted_string('\'', '\'');
|
|
801
|
+
case '`': {
|
|
802
|
+
advance();
|
|
803
|
+
return consume_double_quoted_string('`', '`', Token::Type::InterpolatedShellBegin, Token::Type::InterpolatedShellEnd);
|
|
804
|
+
}
|
|
805
|
+
case '#':
|
|
806
|
+
if (token_is_first_on_line()) {
|
|
807
|
+
SharedPtr<String> doc = new String();
|
|
808
|
+
bool found_comment_marker = true;
|
|
809
|
+
char c = current_char();
|
|
810
|
+
while (c) {
|
|
811
|
+
if (!found_comment_marker) {
|
|
812
|
+
if (c == '#')
|
|
813
|
+
found_comment_marker = true;
|
|
814
|
+
else if (!isspace(c))
|
|
815
|
+
break;
|
|
816
|
+
}
|
|
817
|
+
if (c == '\n' || c == '\r') {
|
|
818
|
+
doc->append_char(c);
|
|
819
|
+
found_comment_marker = false;
|
|
820
|
+
} else if (found_comment_marker)
|
|
821
|
+
doc->append_char(c);
|
|
822
|
+
c = next();
|
|
823
|
+
}
|
|
824
|
+
return Token { Token::Type::Doc, doc, m_file, m_token_line, m_token_column };
|
|
825
|
+
} else {
|
|
826
|
+
char c;
|
|
827
|
+
do {
|
|
828
|
+
c = next();
|
|
829
|
+
} while (c && c != '\n' && c != '\r');
|
|
830
|
+
return Token { Token::Type::Comment, m_file, m_token_line, m_token_column };
|
|
831
|
+
}
|
|
832
|
+
case '0':
|
|
833
|
+
case '1':
|
|
834
|
+
case '2':
|
|
835
|
+
case '3':
|
|
836
|
+
case '4':
|
|
837
|
+
case '5':
|
|
838
|
+
case '6':
|
|
839
|
+
case '7':
|
|
840
|
+
case '8':
|
|
841
|
+
case '9': {
|
|
842
|
+
auto token = consume_numeric();
|
|
843
|
+
return token;
|
|
844
|
+
}
|
|
845
|
+
};
|
|
846
|
+
|
|
847
|
+
Token keyword_token;
|
|
848
|
+
|
|
849
|
+
if (!m_last_token.is_dot() && match(4, "self")) {
|
|
850
|
+
if (current_char() == '.')
|
|
851
|
+
keyword_token = { Token::Type::SelfKeyword, m_file, m_token_line, m_token_column };
|
|
852
|
+
else
|
|
853
|
+
rewind(4);
|
|
854
|
+
}
|
|
855
|
+
|
|
856
|
+
if (!m_last_token.is_dot() && !m_last_token.is_def_keyword()) {
|
|
857
|
+
if (match(12, "__ENCODING__"))
|
|
858
|
+
keyword_token = { Token::Type::ENCODINGKeyword, m_file, m_token_line, m_token_column };
|
|
859
|
+
else if (match(8, "__LINE__"))
|
|
860
|
+
keyword_token = { Token::Type::LINEKeyword, m_file, m_token_line, m_token_column };
|
|
861
|
+
else if (match(8, "__FILE__"))
|
|
862
|
+
keyword_token = { Token::Type::FILEKeyword, m_file, m_token_line, m_token_column };
|
|
863
|
+
else if (match(5, "BEGIN"))
|
|
864
|
+
keyword_token = { Token::Type::BEGINKeyword, m_file, m_token_line, m_token_column };
|
|
865
|
+
else if (match(3, "END"))
|
|
866
|
+
keyword_token = { Token::Type::ENDKeyword, m_file, m_token_line, m_token_column };
|
|
867
|
+
else if (match(5, "alias"))
|
|
868
|
+
keyword_token = { Token::Type::AliasKeyword, m_file, m_token_line, m_token_column };
|
|
869
|
+
else if (match(3, "and"))
|
|
870
|
+
keyword_token = { Token::Type::AndKeyword, m_file, m_token_line, m_token_column };
|
|
871
|
+
else if (match(5, "begin"))
|
|
872
|
+
keyword_token = { Token::Type::BeginKeyword, m_file, m_token_line, m_token_column };
|
|
873
|
+
else if (match(5, "break"))
|
|
874
|
+
keyword_token = { Token::Type::BreakKeyword, m_file, m_token_line, m_token_column };
|
|
875
|
+
else if (match(4, "case"))
|
|
876
|
+
keyword_token = { Token::Type::CaseKeyword, m_file, m_token_line, m_token_column };
|
|
877
|
+
else if (match(5, "class"))
|
|
878
|
+
keyword_token = { Token::Type::ClassKeyword, m_file, m_token_line, m_token_column };
|
|
879
|
+
else if (match(8, "defined?"))
|
|
880
|
+
keyword_token = { Token::Type::DefinedKeyword, m_file, m_token_line, m_token_column };
|
|
881
|
+
else if (match(3, "def"))
|
|
882
|
+
keyword_token = { Token::Type::DefKeyword, m_file, m_token_line, m_token_column };
|
|
883
|
+
else if (match(2, "do"))
|
|
884
|
+
keyword_token = { Token::Type::DoKeyword, m_file, m_token_line, m_token_column };
|
|
885
|
+
else if (match(4, "else"))
|
|
886
|
+
keyword_token = { Token::Type::ElseKeyword, m_file, m_token_line, m_token_column };
|
|
887
|
+
else if (match(5, "elsif"))
|
|
888
|
+
keyword_token = { Token::Type::ElsifKeyword, m_file, m_token_line, m_token_column };
|
|
889
|
+
else if (match(3, "end"))
|
|
890
|
+
keyword_token = { Token::Type::EndKeyword, m_file, m_token_line, m_token_column };
|
|
891
|
+
else if (match(6, "ensure"))
|
|
892
|
+
keyword_token = { Token::Type::EnsureKeyword, m_file, m_token_line, m_token_column };
|
|
893
|
+
else if (match(5, "false"))
|
|
894
|
+
keyword_token = { Token::Type::FalseKeyword, m_file, m_token_line, m_token_column };
|
|
895
|
+
else if (match(3, "for"))
|
|
896
|
+
keyword_token = { Token::Type::ForKeyword, m_file, m_token_line, m_token_column };
|
|
897
|
+
else if (match(2, "if"))
|
|
898
|
+
keyword_token = { Token::Type::IfKeyword, m_file, m_token_line, m_token_column };
|
|
899
|
+
else if (match(2, "in"))
|
|
900
|
+
keyword_token = { Token::Type::InKeyword, m_file, m_token_line, m_token_column };
|
|
901
|
+
else if (match(6, "module"))
|
|
902
|
+
keyword_token = { Token::Type::ModuleKeyword, m_file, m_token_line, m_token_column };
|
|
903
|
+
else if (match(4, "next"))
|
|
904
|
+
keyword_token = { Token::Type::NextKeyword, m_file, m_token_line, m_token_column };
|
|
905
|
+
else if (match(3, "nil"))
|
|
906
|
+
keyword_token = { Token::Type::NilKeyword, m_file, m_token_line, m_token_column };
|
|
907
|
+
else if (match(3, "not"))
|
|
908
|
+
keyword_token = { Token::Type::NotKeyword, m_file, m_token_line, m_token_column };
|
|
909
|
+
else if (match(2, "or"))
|
|
910
|
+
keyword_token = { Token::Type::OrKeyword, m_file, m_token_line, m_token_column };
|
|
911
|
+
else if (match(4, "redo"))
|
|
912
|
+
keyword_token = { Token::Type::RedoKeyword, m_file, m_token_line, m_token_column };
|
|
913
|
+
else if (match(6, "rescue"))
|
|
914
|
+
keyword_token = { Token::Type::RescueKeyword, m_file, m_token_line, m_token_column };
|
|
915
|
+
else if (match(5, "retry"))
|
|
916
|
+
keyword_token = { Token::Type::RetryKeyword, m_file, m_token_line, m_token_column };
|
|
917
|
+
else if (match(6, "return"))
|
|
918
|
+
keyword_token = { Token::Type::ReturnKeyword, m_file, m_token_line, m_token_column };
|
|
919
|
+
else if (match(4, "self"))
|
|
920
|
+
keyword_token = { Token::Type::SelfKeyword, m_file, m_token_line, m_token_column };
|
|
921
|
+
else if (match(5, "super"))
|
|
922
|
+
keyword_token = { Token::Type::SuperKeyword, m_file, m_token_line, m_token_column };
|
|
923
|
+
else if (match(4, "then"))
|
|
924
|
+
keyword_token = { Token::Type::ThenKeyword, m_file, m_token_line, m_token_column };
|
|
925
|
+
else if (match(4, "true"))
|
|
926
|
+
keyword_token = { Token::Type::TrueKeyword, m_file, m_token_line, m_token_column };
|
|
927
|
+
else if (match(5, "undef"))
|
|
928
|
+
keyword_token = { Token::Type::UndefKeyword, m_file, m_token_line, m_token_column };
|
|
929
|
+
else if (match(6, "unless"))
|
|
930
|
+
keyword_token = { Token::Type::UnlessKeyword, m_file, m_token_line, m_token_column };
|
|
931
|
+
else if (match(5, "until"))
|
|
932
|
+
keyword_token = { Token::Type::UntilKeyword, m_file, m_token_line, m_token_column };
|
|
933
|
+
else if (match(4, "when"))
|
|
934
|
+
keyword_token = { Token::Type::WhenKeyword, m_file, m_token_line, m_token_column };
|
|
935
|
+
else if (match(5, "while"))
|
|
936
|
+
keyword_token = { Token::Type::WhileKeyword, m_file, m_token_line, m_token_column };
|
|
937
|
+
else if (match(5, "yield"))
|
|
938
|
+
keyword_token = { Token::Type::YieldKeyword, m_file, m_token_line, m_token_column };
|
|
939
|
+
}
|
|
940
|
+
|
|
941
|
+
// if a colon comes next, it's not a keyword -- it's a symbol!
|
|
942
|
+
if (keyword_token && current_char() == ':' && peek() != ':' && !m_open_ternary) {
|
|
943
|
+
advance(); // :
|
|
944
|
+
auto name = keyword_token.type_value();
|
|
945
|
+
return Token { Token::Type::SymbolKey, name, m_file, m_token_line, m_token_column };
|
|
946
|
+
} else if (keyword_token) {
|
|
947
|
+
return keyword_token;
|
|
948
|
+
}
|
|
949
|
+
|
|
950
|
+
auto c = current_char();
|
|
951
|
+
if ((c >= 'a' && c <= 'z') || c == '_') {
|
|
952
|
+
return consume_bare_name();
|
|
953
|
+
} else if (c >= 'A' && c <= 'Z') {
|
|
954
|
+
return consume_constant();
|
|
955
|
+
} else {
|
|
956
|
+
auto buf = consume_non_whitespace();
|
|
957
|
+
auto token = Token { Token::Type::Invalid, buf, m_file, m_token_line, m_token_column };
|
|
958
|
+
return token;
|
|
959
|
+
}
|
|
960
|
+
|
|
961
|
+
TM_UNREACHABLE();
|
|
962
|
+
}
|
|
963
|
+
|
|
964
|
+
Token Lexer::consume_symbol() {
|
|
965
|
+
char c = current_char();
|
|
966
|
+
SharedPtr<String> buf = new String("");
|
|
967
|
+
auto gobble = [&buf, this](char c) -> char { buf->append_char(c); return next(); };
|
|
968
|
+
switch (c) {
|
|
969
|
+
case '@':
|
|
970
|
+
c = gobble(c);
|
|
971
|
+
if (c == '@') c = gobble(c);
|
|
972
|
+
do {
|
|
973
|
+
c = gobble(c);
|
|
974
|
+
} while (is_identifier_char(c));
|
|
975
|
+
break;
|
|
976
|
+
case '$':
|
|
977
|
+
c = gobble(c);
|
|
978
|
+
do {
|
|
979
|
+
c = gobble(c);
|
|
980
|
+
} while (is_identifier_char(c));
|
|
981
|
+
break;
|
|
982
|
+
case '~':
|
|
983
|
+
c = gobble(c);
|
|
984
|
+
if (c == '@') advance();
|
|
985
|
+
break;
|
|
986
|
+
case '+':
|
|
987
|
+
case '-': {
|
|
988
|
+
c = gobble(c);
|
|
989
|
+
if (c == '@') gobble(c);
|
|
990
|
+
break;
|
|
991
|
+
}
|
|
992
|
+
case '&':
|
|
993
|
+
case '|':
|
|
994
|
+
case '^':
|
|
995
|
+
case '%':
|
|
996
|
+
case '/': {
|
|
997
|
+
gobble(c);
|
|
998
|
+
break;
|
|
999
|
+
}
|
|
1000
|
+
case '*':
|
|
1001
|
+
c = gobble(c);
|
|
1002
|
+
if (c == '*')
|
|
1003
|
+
gobble(c);
|
|
1004
|
+
break;
|
|
1005
|
+
case '=':
|
|
1006
|
+
switch (peek()) {
|
|
1007
|
+
case '=':
|
|
1008
|
+
c = gobble(c);
|
|
1009
|
+
c = gobble(c);
|
|
1010
|
+
if (c == '=') gobble(c);
|
|
1011
|
+
break;
|
|
1012
|
+
case '~':
|
|
1013
|
+
c = gobble(c);
|
|
1014
|
+
gobble(c);
|
|
1015
|
+
break;
|
|
1016
|
+
default:
|
|
1017
|
+
return Token { Token::Type::Invalid, c, m_file, m_token_line, m_token_column };
|
|
1018
|
+
}
|
|
1019
|
+
break;
|
|
1020
|
+
case '!':
|
|
1021
|
+
c = gobble(c);
|
|
1022
|
+
switch (c) {
|
|
1023
|
+
case '=':
|
|
1024
|
+
case '~':
|
|
1025
|
+
case '@':
|
|
1026
|
+
gobble(c);
|
|
1027
|
+
default:
|
|
1028
|
+
break;
|
|
1029
|
+
}
|
|
1030
|
+
break;
|
|
1031
|
+
case '>':
|
|
1032
|
+
c = gobble(c);
|
|
1033
|
+
switch (c) {
|
|
1034
|
+
case '=':
|
|
1035
|
+
case '>':
|
|
1036
|
+
gobble(c);
|
|
1037
|
+
default:
|
|
1038
|
+
break;
|
|
1039
|
+
}
|
|
1040
|
+
break;
|
|
1041
|
+
case '<':
|
|
1042
|
+
c = gobble(c);
|
|
1043
|
+
switch (c) {
|
|
1044
|
+
case '=':
|
|
1045
|
+
c = gobble(c);
|
|
1046
|
+
if (c == '>') gobble(c);
|
|
1047
|
+
break;
|
|
1048
|
+
case '<':
|
|
1049
|
+
gobble(c);
|
|
1050
|
+
default:
|
|
1051
|
+
break;
|
|
1052
|
+
}
|
|
1053
|
+
break;
|
|
1054
|
+
case '[':
|
|
1055
|
+
if (peek() == ']') {
|
|
1056
|
+
c = gobble(c);
|
|
1057
|
+
c = gobble(c);
|
|
1058
|
+
if (c == '=') gobble(c);
|
|
1059
|
+
} else {
|
|
1060
|
+
return Token { Token::Type::Invalid, c, m_file, m_token_line, m_token_column };
|
|
1061
|
+
}
|
|
1062
|
+
break;
|
|
1063
|
+
default:
|
|
1064
|
+
do {
|
|
1065
|
+
c = gobble(c);
|
|
1066
|
+
} while (is_identifier_char(c));
|
|
1067
|
+
switch (c) {
|
|
1068
|
+
case '?':
|
|
1069
|
+
case '!':
|
|
1070
|
+
case '=':
|
|
1071
|
+
switch (peek()) {
|
|
1072
|
+
case '>':
|
|
1073
|
+
break;
|
|
1074
|
+
default:
|
|
1075
|
+
gobble(c);
|
|
1076
|
+
}
|
|
1077
|
+
default:
|
|
1078
|
+
break;
|
|
1079
|
+
}
|
|
1080
|
+
}
|
|
1081
|
+
return Token { Token::Type::Symbol, buf, m_file, m_token_line, m_token_column };
|
|
1082
|
+
}
|
|
1083
|
+
|
|
1084
|
+
Token Lexer::consume_word(Token::Type type) {
|
|
1085
|
+
char c = current_char();
|
|
1086
|
+
SharedPtr<String> buf = new String("");
|
|
1087
|
+
do {
|
|
1088
|
+
buf->append_char(c);
|
|
1089
|
+
c = next();
|
|
1090
|
+
} while (is_identifier_char(c));
|
|
1091
|
+
switch (c) {
|
|
1092
|
+
case '?':
|
|
1093
|
+
case '!':
|
|
1094
|
+
advance();
|
|
1095
|
+
buf->append_char(c);
|
|
1096
|
+
break;
|
|
1097
|
+
default:
|
|
1098
|
+
break;
|
|
1099
|
+
}
|
|
1100
|
+
return Token { type, buf, m_file, m_token_line, m_token_column };
|
|
1101
|
+
}
|
|
1102
|
+
|
|
1103
|
+
Token Lexer::consume_bare_name() {
|
|
1104
|
+
auto token = consume_word(Token::Type::BareName);
|
|
1105
|
+
auto c = current_char();
|
|
1106
|
+
if (c == ':' && peek() != ':' && m_last_token.can_precede_symbol_key()) {
|
|
1107
|
+
advance();
|
|
1108
|
+
token.set_type(Token::Type::SymbolKey);
|
|
1109
|
+
}
|
|
1110
|
+
return token;
|
|
1111
|
+
}
|
|
1112
|
+
|
|
1113
|
+
Token Lexer::consume_constant() {
|
|
1114
|
+
auto token = consume_word(Token::Type::Constant);
|
|
1115
|
+
auto c = current_char();
|
|
1116
|
+
if (c == ':' && peek() != ':' && m_last_token.can_precede_symbol_key()) {
|
|
1117
|
+
advance();
|
|
1118
|
+
token.set_type(Token::Type::SymbolKey);
|
|
1119
|
+
}
|
|
1120
|
+
return token;
|
|
1121
|
+
}
|
|
1122
|
+
|
|
1123
|
+
Token Lexer::consume_global_variable() {
|
|
1124
|
+
switch (peek()) {
|
|
1125
|
+
case '?':
|
|
1126
|
+
case '!':
|
|
1127
|
+
case '=':
|
|
1128
|
+
case '@':
|
|
1129
|
+
case '&':
|
|
1130
|
+
case '`':
|
|
1131
|
+
case '\'':
|
|
1132
|
+
case '"':
|
|
1133
|
+
case '+':
|
|
1134
|
+
case '/':
|
|
1135
|
+
case '\\':
|
|
1136
|
+
case ';':
|
|
1137
|
+
case '<':
|
|
1138
|
+
case '>':
|
|
1139
|
+
case '$':
|
|
1140
|
+
case '*':
|
|
1141
|
+
case '.':
|
|
1142
|
+
case ',':
|
|
1143
|
+
case ':':
|
|
1144
|
+
case '_':
|
|
1145
|
+
case '~': {
|
|
1146
|
+
advance();
|
|
1147
|
+
SharedPtr<String> buf = new String("$");
|
|
1148
|
+
buf->append_char(current_char());
|
|
1149
|
+
advance();
|
|
1150
|
+
return Token { Token::Type::GlobalVariable, buf, m_file, m_token_line, m_token_column };
|
|
1151
|
+
}
|
|
1152
|
+
case '-': {
|
|
1153
|
+
SharedPtr<String> buf = new String("$-");
|
|
1154
|
+
advance(2);
|
|
1155
|
+
buf->append_char(current_char());
|
|
1156
|
+
advance();
|
|
1157
|
+
return Token { Token::Type::GlobalVariable, buf, m_file, m_token_line, m_token_column };
|
|
1158
|
+
}
|
|
1159
|
+
default: {
|
|
1160
|
+
return consume_word(Token::Type::GlobalVariable);
|
|
1161
|
+
}
|
|
1162
|
+
}
|
|
1163
|
+
}
|
|
1164
|
+
|
|
1165
|
+
bool is_valid_heredoc(bool with_dash, SharedPtr<String> doc, String heredoc_name) {
|
|
1166
|
+
if (!doc->ends_with(heredoc_name))
|
|
1167
|
+
return false;
|
|
1168
|
+
if (doc->length() - heredoc_name.length() == 0)
|
|
1169
|
+
return true;
|
|
1170
|
+
auto prefix = (*doc)[doc->length() - heredoc_name.length() - 1];
|
|
1171
|
+
return with_dash ? isspace(prefix) : prefix == '\n';
|
|
1172
|
+
}
|
|
1173
|
+
|
|
1174
|
+
size_t get_heredoc_indent(SharedPtr<String> doc) {
|
|
1175
|
+
if (doc->is_empty())
|
|
1176
|
+
return 0;
|
|
1177
|
+
size_t heredoc_indent = std::numeric_limits<size_t>::max();
|
|
1178
|
+
size_t line_indent = 0;
|
|
1179
|
+
bool maybe_blank_line = true;
|
|
1180
|
+
for (size_t i = 0; i < doc->length(); i++) {
|
|
1181
|
+
char c = (*doc)[i];
|
|
1182
|
+
if (c == '\n') {
|
|
1183
|
+
if (!maybe_blank_line && line_indent < heredoc_indent)
|
|
1184
|
+
heredoc_indent = line_indent;
|
|
1185
|
+
line_indent = 0;
|
|
1186
|
+
maybe_blank_line = true;
|
|
1187
|
+
} else if (isspace(c)) {
|
|
1188
|
+
if (maybe_blank_line)
|
|
1189
|
+
line_indent++;
|
|
1190
|
+
} else {
|
|
1191
|
+
maybe_blank_line = false;
|
|
1192
|
+
}
|
|
1193
|
+
}
|
|
1194
|
+
return heredoc_indent;
|
|
1195
|
+
}
|
|
1196
|
+
|
|
1197
|
+
void dedent_heredoc(SharedPtr<String> &doc) {
|
|
1198
|
+
size_t heredoc_indent = get_heredoc_indent(doc);
|
|
1199
|
+
if (heredoc_indent == 0)
|
|
1200
|
+
return;
|
|
1201
|
+
SharedPtr<String> new_doc = new String("");
|
|
1202
|
+
size_t line_begin = 0;
|
|
1203
|
+
for (size_t i = 0; i < doc->length(); i++) {
|
|
1204
|
+
char c = (*doc)[i];
|
|
1205
|
+
if (c == '\n') {
|
|
1206
|
+
line_begin += heredoc_indent;
|
|
1207
|
+
if (line_begin < i)
|
|
1208
|
+
new_doc->append(doc->substring(line_begin, i - line_begin));
|
|
1209
|
+
new_doc->append_char('\n');
|
|
1210
|
+
line_begin = i + 1;
|
|
1211
|
+
}
|
|
1212
|
+
}
|
|
1213
|
+
doc = new_doc;
|
|
1214
|
+
}
|
|
1215
|
+
|
|
1216
|
+
Token Lexer::consume_heredoc() {
|
|
1217
|
+
bool with_dash = false;
|
|
1218
|
+
bool should_dedent = false;
|
|
1219
|
+
switch (current_char()) {
|
|
1220
|
+
case '-':
|
|
1221
|
+
advance();
|
|
1222
|
+
with_dash = true;
|
|
1223
|
+
break;
|
|
1224
|
+
case '~':
|
|
1225
|
+
advance();
|
|
1226
|
+
with_dash = true;
|
|
1227
|
+
should_dedent = true;
|
|
1228
|
+
break;
|
|
1229
|
+
}
|
|
1230
|
+
|
|
1231
|
+
auto begin_type = Token::Type::InterpolatedStringBegin;
|
|
1232
|
+
auto end_type = Token::Type::InterpolatedStringEnd;
|
|
1233
|
+
bool should_interpolate = true;
|
|
1234
|
+
char delimiter = 0;
|
|
1235
|
+
String heredoc_name = "";
|
|
1236
|
+
switch (current_char()) {
|
|
1237
|
+
case '"':
|
|
1238
|
+
delimiter = '"';
|
|
1239
|
+
break;
|
|
1240
|
+
case '`':
|
|
1241
|
+
begin_type = Token::Type::InterpolatedShellBegin;
|
|
1242
|
+
end_type = Token::Type::InterpolatedShellEnd;
|
|
1243
|
+
delimiter = '`';
|
|
1244
|
+
break;
|
|
1245
|
+
case '\'':
|
|
1246
|
+
should_interpolate = false;
|
|
1247
|
+
delimiter = '\'';
|
|
1248
|
+
break;
|
|
1249
|
+
default:
|
|
1250
|
+
delimiter = 0;
|
|
1251
|
+
}
|
|
1252
|
+
|
|
1253
|
+
if (delimiter) {
|
|
1254
|
+
char c = next();
|
|
1255
|
+
while (c != delimiter) {
|
|
1256
|
+
switch (c) {
|
|
1257
|
+
case '\n':
|
|
1258
|
+
case '\r':
|
|
1259
|
+
case 0:
|
|
1260
|
+
return Token { Token::Type::UnterminatedString, "heredoc identifier", m_file, m_token_line, m_token_column };
|
|
1261
|
+
default:
|
|
1262
|
+
heredoc_name.append_char(c);
|
|
1263
|
+
c = next();
|
|
1264
|
+
}
|
|
1265
|
+
}
|
|
1266
|
+
advance();
|
|
1267
|
+
} else {
|
|
1268
|
+
heredoc_name = String(consume_word(Token::Type::BareName).literal());
|
|
1269
|
+
}
|
|
1270
|
+
|
|
1271
|
+
SharedPtr<String> doc = new String("");
|
|
1272
|
+
size_t heredoc_index = m_index;
|
|
1273
|
+
auto get_char = [&heredoc_index, this]() { return (heredoc_index >= m_size) ? 0 : m_input->at(heredoc_index); };
|
|
1274
|
+
|
|
1275
|
+
if (m_heredoc_stack.is_empty()) {
|
|
1276
|
+
// start consuming the heredoc on the next line
|
|
1277
|
+
while (get_char() != '\n') {
|
|
1278
|
+
if (heredoc_index >= m_size)
|
|
1279
|
+
return Token { Token::Type::UnterminatedString, "heredoc", m_file, m_token_line, m_token_column };
|
|
1280
|
+
heredoc_index++;
|
|
1281
|
+
}
|
|
1282
|
+
heredoc_index++;
|
|
1283
|
+
} else {
|
|
1284
|
+
// start consuming the heredoc right after the last one
|
|
1285
|
+
heredoc_index = m_heredoc_stack.last();
|
|
1286
|
+
}
|
|
1287
|
+
|
|
1288
|
+
// consume the heredoc until we find the delimiter, either '\n' (if << was used) or any whitespace (if <<- was used) followed by "DELIM\n"
|
|
1289
|
+
for (;;) {
|
|
1290
|
+
if (heredoc_index >= m_size) {
|
|
1291
|
+
if (is_valid_heredoc(with_dash, doc, heredoc_name))
|
|
1292
|
+
break;
|
|
1293
|
+
return Token { Token::Type::UnterminatedString, doc, m_file, m_token_line, m_token_column };
|
|
1294
|
+
}
|
|
1295
|
+
char c = get_char();
|
|
1296
|
+
heredoc_index++;
|
|
1297
|
+
if (c == '\n' && is_valid_heredoc(with_dash, doc, heredoc_name))
|
|
1298
|
+
break;
|
|
1299
|
+
doc->append_char(c);
|
|
1300
|
+
}
|
|
1301
|
+
|
|
1302
|
+
// chop the delimiter and any trailing space off the string
|
|
1303
|
+
doc->truncate(doc->length() - heredoc_name.length());
|
|
1304
|
+
doc->strip_trailing_spaces();
|
|
1305
|
+
|
|
1306
|
+
if (should_dedent)
|
|
1307
|
+
dedent_heredoc(doc);
|
|
1308
|
+
|
|
1309
|
+
// We have to keep tokenizing on the line where the heredoc was started, and then jump to the line after the heredoc.
|
|
1310
|
+
// This index is used to jump to the end of the heredoc later.
|
|
1311
|
+
m_heredoc_stack.push(heredoc_index);
|
|
1312
|
+
|
|
1313
|
+
auto token = Token { Token::Type::String, doc, m_file, m_token_line, m_token_column };
|
|
1314
|
+
|
|
1315
|
+
if (should_interpolate) {
|
|
1316
|
+
m_nested_lexer = new InterpolatedStringLexer { *this, token, end_type };
|
|
1317
|
+
return Token { begin_type, m_file, m_token_line, m_token_column };
|
|
1318
|
+
}
|
|
1319
|
+
|
|
1320
|
+
return token;
|
|
1321
|
+
}
|
|
1322
|
+
|
|
1323
|
+
Token Lexer::consume_numeric() {
|
|
1324
|
+
SharedPtr<String> chars = new String;
|
|
1325
|
+
if (current_char() == '0') {
|
|
1326
|
+
switch (peek()) {
|
|
1327
|
+
case 'd':
|
|
1328
|
+
case 'D': {
|
|
1329
|
+
advance();
|
|
1330
|
+
char c = next();
|
|
1331
|
+
if (!isdigit(c))
|
|
1332
|
+
return Token { Token::Type::Invalid, c, m_file, m_cursor_line, m_cursor_column };
|
|
1333
|
+
do {
|
|
1334
|
+
chars->append_char(c);
|
|
1335
|
+
c = next();
|
|
1336
|
+
if (c == '_')
|
|
1337
|
+
c = next();
|
|
1338
|
+
} while (isdigit(c));
|
|
1339
|
+
return chars_to_fixnum_or_bignum_token(chars, 10, 0);
|
|
1340
|
+
}
|
|
1341
|
+
case 'o':
|
|
1342
|
+
case 'O': {
|
|
1343
|
+
chars->append_char('0');
|
|
1344
|
+
chars->append_char('o');
|
|
1345
|
+
advance();
|
|
1346
|
+
char c = next();
|
|
1347
|
+
if (!(c >= '0' && c <= '7'))
|
|
1348
|
+
return Token { Token::Type::Invalid, c, m_file, m_cursor_line, m_cursor_column };
|
|
1349
|
+
do {
|
|
1350
|
+
chars->append_char(c);
|
|
1351
|
+
c = next();
|
|
1352
|
+
if (c == '_')
|
|
1353
|
+
c = next();
|
|
1354
|
+
} while (c >= '0' && c <= '7');
|
|
1355
|
+
return chars_to_fixnum_or_bignum_token(chars, 8, 2);
|
|
1356
|
+
}
|
|
1357
|
+
case 'x':
|
|
1358
|
+
case 'X': {
|
|
1359
|
+
chars->append_char('0');
|
|
1360
|
+
chars->append_char('x');
|
|
1361
|
+
advance();
|
|
1362
|
+
char c = next();
|
|
1363
|
+
if (!isxdigit(c))
|
|
1364
|
+
return Token { Token::Type::Invalid, c, m_file, m_cursor_line, m_cursor_column };
|
|
1365
|
+
do {
|
|
1366
|
+
chars->append_char(c);
|
|
1367
|
+
c = next();
|
|
1368
|
+
if (c == '_')
|
|
1369
|
+
c = next();
|
|
1370
|
+
} while (isxdigit(c));
|
|
1371
|
+
return chars_to_fixnum_or_bignum_token(chars, 16, 2);
|
|
1372
|
+
}
|
|
1373
|
+
case 'b':
|
|
1374
|
+
case 'B': {
|
|
1375
|
+
chars->append_char('0');
|
|
1376
|
+
chars->append_char('b');
|
|
1377
|
+
advance();
|
|
1378
|
+
char c = next();
|
|
1379
|
+
if (c != '0' && c != '1')
|
|
1380
|
+
return Token { Token::Type::Invalid, c, m_file, m_cursor_line, m_cursor_column };
|
|
1381
|
+
do {
|
|
1382
|
+
chars->append_char(c);
|
|
1383
|
+
c = next();
|
|
1384
|
+
if (c == '_')
|
|
1385
|
+
c = next();
|
|
1386
|
+
} while (c == '0' || c == '1');
|
|
1387
|
+
return chars_to_fixnum_or_bignum_token(chars, 2, 2);
|
|
1388
|
+
}
|
|
1389
|
+
}
|
|
1390
|
+
}
|
|
1391
|
+
char c = current_char();
|
|
1392
|
+
do {
|
|
1393
|
+
chars->append_char(c);
|
|
1394
|
+
c = next();
|
|
1395
|
+
if (c == '_')
|
|
1396
|
+
c = next();
|
|
1397
|
+
} while (isdigit(c));
|
|
1398
|
+
if ((c == '.' && isdigit(peek())) || (c == 'e' || c == 'E'))
|
|
1399
|
+
return consume_numeric_as_float(chars);
|
|
1400
|
+
else
|
|
1401
|
+
return chars_to_fixnum_or_bignum_token(chars, 10, 0);
|
|
1402
|
+
}
|
|
1403
|
+
|
|
1404
|
+
const long long max_fixnum = std::numeric_limits<long long>::max() / 2; // 63 bits for MRI
|
|
1405
|
+
|
|
1406
|
+
Token Lexer::chars_to_fixnum_or_bignum_token(SharedPtr<String> chars, int base, int offset) {
|
|
1407
|
+
errno = 0;
|
|
1408
|
+
auto fixnum = strtoll(chars->c_str() + offset, nullptr, base);
|
|
1409
|
+
if (errno != 0 || fixnum > max_fixnum)
|
|
1410
|
+
return Token { Token::Type::Bignum, chars, m_file, m_token_line, m_token_column };
|
|
1411
|
+
else
|
|
1412
|
+
return Token { Token::Type::Fixnum, fixnum, m_file, m_token_line, m_token_column };
|
|
1413
|
+
}
|
|
1414
|
+
|
|
1415
|
+
Token Lexer::consume_numeric_as_float(SharedPtr<String> chars) {
|
|
1416
|
+
char c = current_char();
|
|
1417
|
+
if (c == '.') {
|
|
1418
|
+
chars->append_char('.');
|
|
1419
|
+
c = next();
|
|
1420
|
+
do {
|
|
1421
|
+
chars->append_char(c);
|
|
1422
|
+
c = next();
|
|
1423
|
+
if (c == '_')
|
|
1424
|
+
c = next();
|
|
1425
|
+
} while (isdigit(c));
|
|
1426
|
+
}
|
|
1427
|
+
if (c == 'e' || c == 'E') {
|
|
1428
|
+
chars->append_char('e');
|
|
1429
|
+
c = next();
|
|
1430
|
+
if (c == '-' || c == '+') {
|
|
1431
|
+
chars->append_char(c);
|
|
1432
|
+
c = next();
|
|
1433
|
+
}
|
|
1434
|
+
if (!isdigit(c))
|
|
1435
|
+
return Token { Token::Type::Invalid, c, m_file, m_cursor_line, m_cursor_column };
|
|
1436
|
+
do {
|
|
1437
|
+
chars->append_char(c);
|
|
1438
|
+
c = next();
|
|
1439
|
+
if (c == '_')
|
|
1440
|
+
c = next();
|
|
1441
|
+
} while (isdigit(c));
|
|
1442
|
+
}
|
|
1443
|
+
double dbl = atof(chars->c_str());
|
|
1444
|
+
return Token { Token::Type::Float, dbl, m_file, m_token_line, m_token_column };
|
|
1445
|
+
}
|
|
1446
|
+
|
|
1447
|
+
Token Lexer::consume_nth_ref() {
|
|
1448
|
+
char c = next();
|
|
1449
|
+
long long num = 0;
|
|
1450
|
+
do {
|
|
1451
|
+
num *= 10;
|
|
1452
|
+
num += c - '0';
|
|
1453
|
+
c = next();
|
|
1454
|
+
} while (isdigit(c));
|
|
1455
|
+
return Token { Token::Type::NthRef, num, m_file, m_token_line, m_token_column };
|
|
1456
|
+
}
|
|
1457
|
+
|
|
1458
|
+
long long Lexer::consume_hex_number(int max_length, bool allow_underscore) {
|
|
1459
|
+
char c = current_char();
|
|
1460
|
+
int length = 0;
|
|
1461
|
+
long long number = 0;
|
|
1462
|
+
do {
|
|
1463
|
+
number *= 16;
|
|
1464
|
+
if (c >= 'a' && c <= 'f')
|
|
1465
|
+
number += c - 'a' + 10;
|
|
1466
|
+
else if (c >= 'A' && c <= 'F')
|
|
1467
|
+
number += c - 'A' + 10;
|
|
1468
|
+
else
|
|
1469
|
+
number += c - '0';
|
|
1470
|
+
c = next();
|
|
1471
|
+
if (allow_underscore && c == '_')
|
|
1472
|
+
c = next();
|
|
1473
|
+
} while (isxdigit(c) && (max_length == 0 || ++length < max_length));
|
|
1474
|
+
return number;
|
|
1475
|
+
}
|
|
1476
|
+
|
|
1477
|
+
long long Lexer::consume_octal_number(int max_length, bool allow_underscore) {
|
|
1478
|
+
char c = current_char();
|
|
1479
|
+
int length = 0;
|
|
1480
|
+
long long number = 0;
|
|
1481
|
+
do {
|
|
1482
|
+
number *= 8;
|
|
1483
|
+
number += c - '0';
|
|
1484
|
+
c = next();
|
|
1485
|
+
if (allow_underscore && c == '_')
|
|
1486
|
+
c = next();
|
|
1487
|
+
} while (c >= '0' && c <= '7' && (max_length == 0 || ++length < max_length));
|
|
1488
|
+
return number;
|
|
1489
|
+
}
|
|
1490
|
+
|
|
1491
|
+
// public domain
|
|
1492
|
+
// https://gist.github.com/Miouyouyou/864130e8734afe3f806512b14022226f
|
|
1493
|
+
void Lexer::utf32_codepoint_to_utf8(String &buf, long long codepoint) {
|
|
1494
|
+
if (codepoint < 0x80) {
|
|
1495
|
+
buf.append_char(codepoint);
|
|
1496
|
+
} else if (codepoint < 0x800) { // 00000yyy yyxxxxxx
|
|
1497
|
+
buf.append_char(0b11000000 | (codepoint >> 6));
|
|
1498
|
+
buf.append_char(0b10000000 | (codepoint & 0x3f));
|
|
1499
|
+
} else if (codepoint < 0x10000) { // zzzzyyyy yyxxxxxx
|
|
1500
|
+
buf.append_char(0b11100000 | (codepoint >> 12));
|
|
1501
|
+
buf.append_char(0b10000000 | ((codepoint >> 6) & 0x3f));
|
|
1502
|
+
buf.append_char(0b10000000 | (codepoint & 0x3f));
|
|
1503
|
+
} else if (codepoint < 0x200000) { // 000uuuuu zzzzyyyy yyxxxxxx
|
|
1504
|
+
buf.append_char(0b11110000 | (codepoint >> 18));
|
|
1505
|
+
buf.append_char(0b10000000 | ((codepoint >> 12) & 0x3f));
|
|
1506
|
+
buf.append_char(0b10000000 | ((codepoint >> 6) & 0x3f));
|
|
1507
|
+
buf.append_char(0b10000000 | (codepoint & 0x3f));
|
|
1508
|
+
} else {
|
|
1509
|
+
TM_UNREACHABLE();
|
|
1510
|
+
}
|
|
1511
|
+
}
|
|
1512
|
+
|
|
1513
|
+
std::pair<bool, Token::Type> Lexer::consume_escaped_byte(String &buf) {
|
|
1514
|
+
auto control_character = [&](bool meta) {
|
|
1515
|
+
char c = next();
|
|
1516
|
+
if (c == '-')
|
|
1517
|
+
c = next();
|
|
1518
|
+
int num = 0;
|
|
1519
|
+
if (!meta && c == '\\' && peek() == 'M') {
|
|
1520
|
+
advance(); // M
|
|
1521
|
+
c = next();
|
|
1522
|
+
if (c != '-')
|
|
1523
|
+
return -1;
|
|
1524
|
+
meta = true;
|
|
1525
|
+
c = next();
|
|
1526
|
+
}
|
|
1527
|
+
if (c == '?')
|
|
1528
|
+
num = 127;
|
|
1529
|
+
else if (c >= ' ' && c <= '>')
|
|
1530
|
+
num = c - ' ';
|
|
1531
|
+
else if (c >= '@' && c <= '_')
|
|
1532
|
+
num = c - '@';
|
|
1533
|
+
else if (c >= '`' && c <= '~')
|
|
1534
|
+
num = c - '`';
|
|
1535
|
+
if (meta)
|
|
1536
|
+
return num + 128;
|
|
1537
|
+
else
|
|
1538
|
+
return num;
|
|
1539
|
+
};
|
|
1540
|
+
auto c = current_char();
|
|
1541
|
+
if (c >= '0' && c <= '7') {
|
|
1542
|
+
auto number = consume_octal_number(3);
|
|
1543
|
+
buf.append_char(number);
|
|
1544
|
+
} else if (c == 'x') {
|
|
1545
|
+
// hex: 1-2 digits
|
|
1546
|
+
advance();
|
|
1547
|
+
auto number = consume_hex_number(2);
|
|
1548
|
+
buf.append_char(number);
|
|
1549
|
+
} else if (c == 'u') {
|
|
1550
|
+
c = next();
|
|
1551
|
+
if (c == '{') {
|
|
1552
|
+
c = next();
|
|
1553
|
+
// unicode characters, space separated, 1-6 hex digits
|
|
1554
|
+
while (c != '}') {
|
|
1555
|
+
if (!isxdigit(c))
|
|
1556
|
+
return { false, Token::Type::InvalidUnicodeEscape };
|
|
1557
|
+
auto codepoint = consume_hex_number(6);
|
|
1558
|
+
utf32_codepoint_to_utf8(buf, codepoint);
|
|
1559
|
+
c = current_char();
|
|
1560
|
+
while (c == ' ')
|
|
1561
|
+
c = next();
|
|
1562
|
+
}
|
|
1563
|
+
if (c == '}')
|
|
1564
|
+
advance();
|
|
1565
|
+
} else {
|
|
1566
|
+
// unicode: 4 hex digits
|
|
1567
|
+
auto codepoint = consume_hex_number(4);
|
|
1568
|
+
utf32_codepoint_to_utf8(buf, codepoint);
|
|
1569
|
+
}
|
|
1570
|
+
} else {
|
|
1571
|
+
switch (c) {
|
|
1572
|
+
case 'a':
|
|
1573
|
+
buf.append_char('\a');
|
|
1574
|
+
break;
|
|
1575
|
+
case 'b':
|
|
1576
|
+
buf.append_char('\b');
|
|
1577
|
+
break;
|
|
1578
|
+
case 'c':
|
|
1579
|
+
case 'C': {
|
|
1580
|
+
int num = control_character(false);
|
|
1581
|
+
if (num == -1)
|
|
1582
|
+
return { false, Token::Type::InvalidCharacterEscape };
|
|
1583
|
+
buf.append_char((unsigned char)num);
|
|
1584
|
+
break;
|
|
1585
|
+
}
|
|
1586
|
+
case 'e':
|
|
1587
|
+
buf.append_char('\e');
|
|
1588
|
+
break;
|
|
1589
|
+
case 'f':
|
|
1590
|
+
buf.append_char('\f');
|
|
1591
|
+
break;
|
|
1592
|
+
case 'M': {
|
|
1593
|
+
c = next();
|
|
1594
|
+
if (c != '-')
|
|
1595
|
+
return { false, Token::Type::InvalidCharacterEscape };
|
|
1596
|
+
c = next();
|
|
1597
|
+
int num = 0;
|
|
1598
|
+
if (c == '\\' && (peek() == 'c' || peek() == 'C')) {
|
|
1599
|
+
advance();
|
|
1600
|
+
num = control_character(true);
|
|
1601
|
+
} else {
|
|
1602
|
+
num = (int)c + 128;
|
|
1603
|
+
}
|
|
1604
|
+
buf.append_char((unsigned char)num);
|
|
1605
|
+
break;
|
|
1606
|
+
}
|
|
1607
|
+
case 'n':
|
|
1608
|
+
buf.append_char('\n');
|
|
1609
|
+
break;
|
|
1610
|
+
case 'r':
|
|
1611
|
+
buf.append_char('\r');
|
|
1612
|
+
break;
|
|
1613
|
+
case 's':
|
|
1614
|
+
buf.append_char((unsigned char)32);
|
|
1615
|
+
break;
|
|
1616
|
+
case 't':
|
|
1617
|
+
buf.append_char('\t');
|
|
1618
|
+
break;
|
|
1619
|
+
case 'v':
|
|
1620
|
+
buf.append_char('\v');
|
|
1621
|
+
break;
|
|
1622
|
+
case '\n':
|
|
1623
|
+
break;
|
|
1624
|
+
default:
|
|
1625
|
+
buf.append_char(c);
|
|
1626
|
+
break;
|
|
1627
|
+
}
|
|
1628
|
+
advance();
|
|
1629
|
+
}
|
|
1630
|
+
return { true, Token::Type::String };
|
|
1631
|
+
}
|
|
1632
|
+
|
|
1633
|
+
bool Lexer::token_is_first_on_line() const {
|
|
1634
|
+
return !m_last_token || m_last_token.is_newline();
|
|
1635
|
+
}
|
|
1636
|
+
|
|
1637
|
+
Token Lexer::consume_double_quoted_string(char start_char, char stop_char, Token::Type begin_type, Token::Type end_type) {
|
|
1638
|
+
m_nested_lexer = new InterpolatedStringLexer { *this, start_char, stop_char, end_type };
|
|
1639
|
+
return Token { begin_type, start_char, m_file, m_token_line, m_token_column };
|
|
1640
|
+
}
|
|
1641
|
+
|
|
1642
|
+
Token Lexer::consume_single_quoted_string(char start_char, char stop_char) {
|
|
1643
|
+
int pair_depth = 0;
|
|
1644
|
+
SharedPtr<String> buf = new String("");
|
|
1645
|
+
char c = current_char();
|
|
1646
|
+
while (c) {
|
|
1647
|
+
if (c == '\\') {
|
|
1648
|
+
c = next();
|
|
1649
|
+
if (c == stop_char || c == '\\') {
|
|
1650
|
+
buf->append_char(c);
|
|
1651
|
+
} else {
|
|
1652
|
+
buf->append_char('\\');
|
|
1653
|
+
buf->append_char(c);
|
|
1654
|
+
}
|
|
1655
|
+
} else if (c == start_char && start_char != stop_char) {
|
|
1656
|
+
pair_depth++;
|
|
1657
|
+
buf->append_char(c);
|
|
1658
|
+
} else if (c == stop_char) {
|
|
1659
|
+
if (pair_depth > 0) {
|
|
1660
|
+
pair_depth--;
|
|
1661
|
+
buf->append_char(c);
|
|
1662
|
+
} else {
|
|
1663
|
+
advance(); // '
|
|
1664
|
+
if (current_char() == ':' && !m_open_ternary) {
|
|
1665
|
+
advance(); // :
|
|
1666
|
+
return Token { Token::Type::SymbolKey, buf, m_file, m_token_line, m_token_column };
|
|
1667
|
+
} else {
|
|
1668
|
+
return Token { Token::Type::String, buf, m_file, m_token_line, m_token_column };
|
|
1669
|
+
}
|
|
1670
|
+
}
|
|
1671
|
+
} else {
|
|
1672
|
+
buf->append_char(c);
|
|
1673
|
+
}
|
|
1674
|
+
c = next();
|
|
1675
|
+
}
|
|
1676
|
+
return Token { Token::Type::UnterminatedString, start_char, m_file, m_token_line, m_token_column };
|
|
1677
|
+
}
|
|
1678
|
+
|
|
1679
|
+
Token Lexer::consume_quoted_array_without_interpolation(char start_char, char stop_char, Token::Type type) {
|
|
1680
|
+
m_nested_lexer = new WordArrayLexer { *this, start_char, stop_char, false };
|
|
1681
|
+
return Token { type, start_char, m_file, m_token_line, m_token_column };
|
|
1682
|
+
}
|
|
1683
|
+
|
|
1684
|
+
Token Lexer::consume_quoted_array_with_interpolation(char start_char, char stop_char, Token::Type type) {
|
|
1685
|
+
m_nested_lexer = new WordArrayLexer { *this, start_char, stop_char, true };
|
|
1686
|
+
return Token { type, start_char, m_file, m_token_line, m_token_column };
|
|
1687
|
+
}
|
|
1688
|
+
|
|
1689
|
+
Token Lexer::consume_regexp(char start_char, char stop_char) {
|
|
1690
|
+
m_nested_lexer = new RegexpLexer { *this, start_char, stop_char };
|
|
1691
|
+
return Token { Token::Type::InterpolatedRegexpBegin, start_char, m_file, m_token_line, m_token_column };
|
|
1692
|
+
}
|
|
1693
|
+
|
|
1694
|
+
SharedPtr<String> Lexer::consume_non_whitespace() {
|
|
1695
|
+
char c = current_char();
|
|
1696
|
+
SharedPtr<String> buf = new String("");
|
|
1697
|
+
do {
|
|
1698
|
+
buf->append_char(c);
|
|
1699
|
+
c = next();
|
|
1700
|
+
} while (c && c != ' ' && c != '\t' && c != '\n' && c != '\r');
|
|
1701
|
+
return buf;
|
|
1702
|
+
}
|
|
1703
|
+
};
|