natalie_parser 1.0.0 → 1.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +27 -0
- data/README.md +11 -4
- data/Rakefile +12 -5
- data/ext/natalie_parser/mri_creator.hpp +25 -7
- data/include/natalie_parser/creator/debug_creator.hpp +13 -3
- data/include/natalie_parser/creator.hpp +4 -2
- data/include/natalie_parser/node/array_pattern_node.hpp +20 -2
- data/include/natalie_parser/node/bignum_node.hpp +5 -1
- data/include/natalie_parser/node/case_in_node.hpp +5 -2
- data/include/natalie_parser/node/complex_node.hpp +49 -0
- data/include/natalie_parser/node/fixnum_node.hpp +5 -1
- data/include/natalie_parser/node/float_node.hpp +4 -0
- data/include/natalie_parser/node/forward_args_node.hpp +26 -0
- data/include/natalie_parser/node/hash_pattern_node.hpp +1 -0
- data/include/natalie_parser/node/infix_op_node.hpp +1 -1
- data/include/natalie_parser/node/iter_node.hpp +1 -1
- data/include/natalie_parser/node/keyword_rest_pattern_node.hpp +43 -0
- data/include/natalie_parser/node/node.hpp +7 -1
- data/include/natalie_parser/node/nth_ref_node.hpp +1 -1
- data/include/natalie_parser/node/rational_node.hpp +45 -0
- data/include/natalie_parser/node.hpp +4 -0
- data/include/natalie_parser/parser.hpp +14 -1
- data/include/natalie_parser/token.hpp +62 -13
- data/lib/natalie_parser/version.rb +1 -1
- data/src/lexer/interpolated_string_lexer.cpp +9 -9
- data/src/lexer/regexp_lexer.cpp +7 -7
- data/src/lexer/word_array_lexer.cpp +13 -13
- data/src/lexer.cpp +210 -181
- data/src/node/begin_rescue_node.cpp +1 -1
- data/src/node/interpolated_regexp_node.cpp +1 -1
- data/src/node/node.cpp +7 -0
- data/src/node/node_with_args.cpp +1 -0
- data/src/parser.cpp +261 -91
- metadata +6 -2
data/src/lexer.cpp
CHANGED
@@ -147,12 +147,12 @@ bool Lexer::skip_whitespace() {
|
|
147
147
|
|
148
148
|
Token Lexer::build_next_token() {
|
149
149
|
if (m_index >= m_size)
|
150
|
-
return Token { Token::Type::Eof, m_file, m_cursor_line, m_cursor_column };
|
150
|
+
return Token { Token::Type::Eof, m_file, m_cursor_line, m_cursor_column, m_whitespace_precedes };
|
151
151
|
if (m_start_char && current_char() == m_start_char) {
|
152
152
|
m_pair_depth++;
|
153
153
|
} else if (m_stop_char && current_char() == m_stop_char) {
|
154
154
|
if (m_pair_depth == 0)
|
155
|
-
return Token { Token::Type::Eof, m_file, m_cursor_line, m_cursor_column };
|
155
|
+
return Token { Token::Type::Eof, m_file, m_cursor_line, m_cursor_column, m_whitespace_precedes };
|
156
156
|
m_pair_depth--;
|
157
157
|
} else if (m_index == 0 && current_char() == '\xEF') {
|
158
158
|
// UTF-8 BOM
|
@@ -170,18 +170,18 @@ Token Lexer::build_next_token() {
|
|
170
170
|
switch (current_char()) {
|
171
171
|
case '=': {
|
172
172
|
advance();
|
173
|
-
return Token { Token::Type::EqualEqualEqual, m_file, m_token_line, m_token_column };
|
173
|
+
return Token { Token::Type::EqualEqualEqual, m_file, m_token_line, m_token_column, m_whitespace_precedes };
|
174
174
|
}
|
175
175
|
default:
|
176
|
-
return Token { Token::Type::EqualEqual, m_file, m_token_line, m_token_column };
|
176
|
+
return Token { Token::Type::EqualEqual, m_file, m_token_line, m_token_column, m_whitespace_precedes };
|
177
177
|
}
|
178
178
|
}
|
179
179
|
case '>':
|
180
180
|
advance();
|
181
|
-
return Token { Token::Type::HashRocket, m_file, m_token_line, m_token_column };
|
181
|
+
return Token { Token::Type::HashRocket, m_file, m_token_line, m_token_column, m_whitespace_precedes };
|
182
182
|
case '~':
|
183
183
|
advance();
|
184
|
-
return Token { Token::Type::Match, m_file, m_token_line, m_token_column };
|
184
|
+
return Token { Token::Type::Match, m_file, m_token_line, m_token_column, m_whitespace_precedes };
|
185
185
|
default:
|
186
186
|
if (m_cursor_column == 1 && match(5, "begin")) {
|
187
187
|
SharedPtr<String> doc = new String("=begin");
|
@@ -191,10 +191,9 @@ Token Lexer::build_next_token() {
|
|
191
191
|
c = next();
|
192
192
|
} while (c && !(m_cursor_column == 0 && match(4, "=end")));
|
193
193
|
doc->append("=end\n");
|
194
|
-
return Token { Token::Type::Doc, doc, m_file, m_token_line, m_token_column };
|
194
|
+
return Token { Token::Type::Doc, doc, m_file, m_token_line, m_token_column, m_whitespace_precedes };
|
195
195
|
}
|
196
|
-
auto token = Token { Token::Type::Equal, m_file, m_token_line, m_token_column };
|
197
|
-
token.set_whitespace_precedes(m_whitespace_precedes);
|
196
|
+
auto token = Token { Token::Type::Equal, m_file, m_token_line, m_token_column, m_whitespace_precedes };
|
198
197
|
return token;
|
199
198
|
}
|
200
199
|
}
|
@@ -203,37 +202,37 @@ Token Lexer::build_next_token() {
|
|
203
202
|
switch (current_char()) {
|
204
203
|
case '=':
|
205
204
|
advance();
|
206
|
-
return Token { Token::Type::PlusEqual, m_file, m_token_line, m_token_column };
|
205
|
+
return Token { Token::Type::PlusEqual, m_file, m_token_line, m_token_column, m_whitespace_precedes };
|
207
206
|
case '@':
|
208
207
|
if (m_last_token.is_def_keyword() || m_last_token.is_dot()) {
|
209
208
|
advance();
|
210
209
|
SharedPtr<String> lit = new String("+@");
|
211
|
-
return Token { Token::Type::BareName, lit, m_file, m_token_line, m_token_column };
|
210
|
+
return Token { Token::Type::BareName, lit, m_file, m_token_line, m_token_column, m_whitespace_precedes };
|
212
211
|
} else {
|
213
|
-
return Token { Token::Type::Plus, m_file, m_token_line, m_token_column };
|
212
|
+
return Token { Token::Type::Plus, m_file, m_token_line, m_token_column, m_whitespace_precedes };
|
214
213
|
}
|
215
214
|
default:
|
216
|
-
return Token { Token::Type::Plus, m_file, m_token_line, m_token_column };
|
215
|
+
return Token { Token::Type::Plus, m_file, m_token_line, m_token_column, m_whitespace_precedes };
|
217
216
|
}
|
218
217
|
case '-':
|
219
218
|
advance();
|
220
219
|
switch (current_char()) {
|
221
220
|
case '>':
|
222
221
|
advance();
|
223
|
-
return Token { Token::Type::Arrow, m_file, m_token_line, m_token_column };
|
222
|
+
return Token { Token::Type::Arrow, m_file, m_token_line, m_token_column, m_whitespace_precedes };
|
224
223
|
case '=':
|
225
224
|
advance();
|
226
|
-
return Token { Token::Type::MinusEqual, m_file, m_token_line, m_token_column };
|
225
|
+
return Token { Token::Type::MinusEqual, m_file, m_token_line, m_token_column, m_whitespace_precedes };
|
227
226
|
case '@':
|
228
227
|
if (m_last_token.is_def_keyword() || m_last_token.is_dot()) {
|
229
228
|
advance();
|
230
229
|
SharedPtr<String> lit = new String("-@");
|
231
|
-
return Token { Token::Type::BareName, lit, m_file, m_token_line, m_token_column };
|
230
|
+
return Token { Token::Type::BareName, lit, m_file, m_token_line, m_token_column, m_whitespace_precedes };
|
232
231
|
} else {
|
233
|
-
return Token { Token::Type::Minus, m_file, m_token_line, m_token_column };
|
232
|
+
return Token { Token::Type::Minus, m_file, m_token_line, m_token_column, m_whitespace_precedes };
|
234
233
|
}
|
235
234
|
default:
|
236
|
-
return Token { Token::Type::Minus, m_file, m_token_line, m_token_column };
|
235
|
+
return Token { Token::Type::Minus, m_file, m_token_line, m_token_column, m_whitespace_precedes };
|
237
236
|
}
|
238
237
|
case '*':
|
239
238
|
advance();
|
@@ -243,15 +242,15 @@ Token Lexer::build_next_token() {
|
|
243
242
|
switch (current_char()) {
|
244
243
|
case '=':
|
245
244
|
advance();
|
246
|
-
return Token { Token::Type::StarStarEqual, m_file, m_token_line, m_token_column };
|
245
|
+
return Token { Token::Type::StarStarEqual, m_file, m_token_line, m_token_column, m_whitespace_precedes };
|
247
246
|
default:
|
248
|
-
return Token { Token::Type::StarStar, m_file, m_token_line, m_token_column };
|
247
|
+
return Token { Token::Type::StarStar, m_file, m_token_line, m_token_column, m_whitespace_precedes };
|
249
248
|
}
|
250
249
|
case '=':
|
251
250
|
advance();
|
252
|
-
return Token { Token::Type::StarEqual, m_file, m_token_line, m_token_column };
|
251
|
+
return Token { Token::Type::StarEqual, m_file, m_token_line, m_token_column, m_whitespace_precedes };
|
253
252
|
default:
|
254
|
-
return Token { Token::Type::Star, m_file, m_token_line, m_token_column };
|
253
|
+
return Token { Token::Type::Star, m_file, m_token_line, m_token_column, m_whitespace_precedes };
|
255
254
|
}
|
256
255
|
case '/': {
|
257
256
|
advance();
|
@@ -267,19 +266,19 @@ Token Lexer::build_next_token() {
|
|
267
266
|
case Token::Type::Newline:
|
268
267
|
return consume_regexp('/', '/');
|
269
268
|
case Token::Type::DefKeyword:
|
270
|
-
return Token { Token::Type::Slash, m_file, m_token_line, m_token_column };
|
269
|
+
return Token { Token::Type::Slash, m_file, m_token_line, m_token_column, m_whitespace_precedes };
|
271
270
|
default: {
|
272
271
|
switch (current_char()) {
|
273
272
|
case ' ':
|
274
|
-
return Token { Token::Type::Slash, m_file, m_token_line, m_token_column };
|
273
|
+
return Token { Token::Type::Slash, m_file, m_token_line, m_token_column, m_whitespace_precedes };
|
275
274
|
case '=':
|
276
275
|
advance();
|
277
|
-
return Token { Token::Type::SlashEqual, m_file, m_token_line, m_token_column };
|
276
|
+
return Token { Token::Type::SlashEqual, m_file, m_token_line, m_token_column, m_whitespace_precedes };
|
278
277
|
default:
|
279
278
|
if (m_whitespace_precedes) {
|
280
279
|
return consume_regexp('/', '/');
|
281
280
|
} else {
|
282
|
-
return Token { Token::Type::Slash, m_file, m_token_line, m_token_column };
|
281
|
+
return Token { Token::Type::Slash, m_file, m_token_line, m_token_column, m_whitespace_precedes };
|
283
282
|
}
|
284
283
|
}
|
285
284
|
}
|
@@ -290,7 +289,7 @@ Token Lexer::build_next_token() {
|
|
290
289
|
switch (current_char()) {
|
291
290
|
case '=':
|
292
291
|
advance();
|
293
|
-
return Token { Token::Type::PercentEqual, m_file, m_token_line, m_token_column };
|
292
|
+
return Token { Token::Type::PercentEqual, m_file, m_token_line, m_token_column, m_whitespace_precedes };
|
294
293
|
case 'q':
|
295
294
|
switch (peek()) {
|
296
295
|
case '[':
|
@@ -311,7 +310,7 @@ Token Lexer::build_next_token() {
|
|
311
310
|
advance(2);
|
312
311
|
return consume_single_quoted_string(c, c);
|
313
312
|
} else {
|
314
|
-
return Token { Token::Type::Percent, m_file, m_token_line, m_token_column };
|
313
|
+
return Token { Token::Type::Percent, m_file, m_token_line, m_token_column, m_whitespace_precedes };
|
315
314
|
}
|
316
315
|
}
|
317
316
|
}
|
@@ -335,7 +334,7 @@ Token Lexer::build_next_token() {
|
|
335
334
|
advance(2);
|
336
335
|
return consume_double_quoted_string(c, c);
|
337
336
|
} else {
|
338
|
-
return Token { Token::Type::Percent, m_file, m_token_line, m_token_column };
|
337
|
+
return Token { Token::Type::Percent, m_file, m_token_line, m_token_column, m_whitespace_precedes };
|
339
338
|
}
|
340
339
|
}
|
341
340
|
}
|
@@ -359,7 +358,7 @@ Token Lexer::build_next_token() {
|
|
359
358
|
advance(2);
|
360
359
|
return consume_regexp(c, c);
|
361
360
|
} else {
|
362
|
-
return Token { Token::Type::Percent, m_file, m_token_line, m_token_column };
|
361
|
+
return Token { Token::Type::Percent, m_file, m_token_line, m_token_column, m_whitespace_precedes };
|
363
362
|
}
|
364
363
|
}
|
365
364
|
}
|
@@ -382,7 +381,7 @@ Token Lexer::build_next_token() {
|
|
382
381
|
return consume_double_quoted_string('(', ')', Token::Type::InterpolatedShellBegin, Token::Type::InterpolatedShellEnd);
|
383
382
|
}
|
384
383
|
default:
|
385
|
-
return Token { Token::Type::Percent, m_file, m_token_line, m_token_column };
|
384
|
+
return Token { Token::Type::Percent, m_file, m_token_line, m_token_column, m_whitespace_precedes };
|
386
385
|
}
|
387
386
|
case 'w':
|
388
387
|
switch (peek()) {
|
@@ -405,7 +404,7 @@ Token Lexer::build_next_token() {
|
|
405
404
|
advance(2);
|
406
405
|
return consume_quoted_array_without_interpolation('(', ')', Token::Type::PercentLowerW);
|
407
406
|
default:
|
408
|
-
return Token { Token::Type::Percent, m_file, m_token_line, m_token_column };
|
407
|
+
return Token { Token::Type::Percent, m_file, m_token_line, m_token_column, m_whitespace_precedes };
|
409
408
|
}
|
410
409
|
case 'W':
|
411
410
|
switch (peek()) {
|
@@ -428,7 +427,7 @@ Token Lexer::build_next_token() {
|
|
428
427
|
advance(2);
|
429
428
|
return consume_quoted_array_with_interpolation('(', ')', Token::Type::PercentUpperW);
|
430
429
|
default:
|
431
|
-
return Token { Token::Type::Percent, m_file, m_token_line, m_token_column };
|
430
|
+
return Token { Token::Type::Percent, m_file, m_token_line, m_token_column, m_whitespace_precedes };
|
432
431
|
}
|
433
432
|
case 'i':
|
434
433
|
switch (peek()) {
|
@@ -451,7 +450,7 @@ Token Lexer::build_next_token() {
|
|
451
450
|
advance(2);
|
452
451
|
return consume_quoted_array_without_interpolation('(', ')', Token::Type::PercentLowerI);
|
453
452
|
default:
|
454
|
-
return Token { Token::Type::Percent, m_file, m_token_line, m_token_column };
|
453
|
+
return Token { Token::Type::Percent, m_file, m_token_line, m_token_column, m_whitespace_precedes };
|
455
454
|
}
|
456
455
|
case 'I':
|
457
456
|
switch (peek()) {
|
@@ -474,7 +473,7 @@ Token Lexer::build_next_token() {
|
|
474
473
|
advance(2);
|
475
474
|
return consume_quoted_array_with_interpolation('(', ')', Token::Type::PercentUpperI);
|
476
475
|
default:
|
477
|
-
return Token { Token::Type::Percent, m_file, m_token_line, m_token_column };
|
476
|
+
return Token { Token::Type::Percent, m_file, m_token_line, m_token_column, m_whitespace_precedes };
|
478
477
|
}
|
479
478
|
case '[':
|
480
479
|
advance();
|
@@ -501,26 +500,26 @@ Token Lexer::build_next_token() {
|
|
501
500
|
break;
|
502
501
|
}
|
503
502
|
}
|
504
|
-
return Token { Token::Type::Percent, m_file, m_token_line, m_token_column };
|
503
|
+
return Token { Token::Type::Percent, m_file, m_token_line, m_token_column, m_whitespace_precedes };
|
505
504
|
case '!':
|
506
505
|
advance();
|
507
506
|
switch (current_char()) {
|
508
507
|
case '=':
|
509
508
|
advance();
|
510
|
-
return Token { Token::Type::NotEqual, m_file, m_token_line, m_token_column };
|
509
|
+
return Token { Token::Type::NotEqual, m_file, m_token_line, m_token_column, m_whitespace_precedes };
|
511
510
|
case '~':
|
512
511
|
advance();
|
513
|
-
return Token { Token::Type::NotMatch, m_file, m_token_line, m_token_column };
|
512
|
+
return Token { Token::Type::NotMatch, m_file, m_token_line, m_token_column, m_whitespace_precedes };
|
514
513
|
case '@':
|
515
514
|
if (m_last_token.is_def_keyword() || m_last_token.is_dot()) {
|
516
515
|
advance();
|
517
516
|
SharedPtr<String> lit = new String("!@");
|
518
|
-
return Token { Token::Type::BareName, lit, m_file, m_token_line, m_token_column };
|
517
|
+
return Token { Token::Type::BareName, lit, m_file, m_token_line, m_token_column, m_whitespace_precedes };
|
519
518
|
} else {
|
520
|
-
return Token { Token::Type::Not, m_file, m_token_line, m_token_column };
|
519
|
+
return Token { Token::Type::Not, m_file, m_token_line, m_token_column, m_whitespace_precedes };
|
521
520
|
}
|
522
521
|
default:
|
523
|
-
return Token { Token::Type::Not, m_file, m_token_line, m_token_column };
|
522
|
+
return Token { Token::Type::Not, m_file, m_token_line, m_token_column, m_whitespace_precedes };
|
524
523
|
}
|
525
524
|
case '<':
|
526
525
|
advance();
|
@@ -540,12 +539,12 @@ Token Lexer::build_next_token() {
|
|
540
539
|
case '\'':
|
541
540
|
return consume_heredoc();
|
542
541
|
default:
|
543
|
-
return Token { Token::Type::LeftShift, m_file, m_token_line, m_token_column };
|
542
|
+
return Token { Token::Type::LeftShift, m_file, m_token_line, m_token_column, m_whitespace_precedes };
|
544
543
|
}
|
545
544
|
}
|
546
545
|
case '=':
|
547
546
|
advance();
|
548
|
-
return Token { Token::Type::LeftShiftEqual, m_file, m_token_line, m_token_column };
|
547
|
+
return Token { Token::Type::LeftShiftEqual, m_file, m_token_line, m_token_column, m_whitespace_precedes };
|
549
548
|
default:
|
550
549
|
if (!m_whitespace_precedes) {
|
551
550
|
if (token_is_first_on_line())
|
@@ -553,7 +552,7 @@ Token Lexer::build_next_token() {
|
|
553
552
|
else if (m_last_token.can_precede_heredoc_that_looks_like_left_shift_operator())
|
554
553
|
return consume_heredoc();
|
555
554
|
else
|
556
|
-
return Token { Token::Type::LeftShift, m_file, m_token_line, m_token_column };
|
555
|
+
return Token { Token::Type::LeftShift, m_file, m_token_line, m_token_column, m_whitespace_precedes };
|
557
556
|
}
|
558
557
|
if (isalpha(current_char()))
|
559
558
|
return consume_heredoc();
|
@@ -564,7 +563,7 @@ Token Lexer::build_next_token() {
|
|
564
563
|
case '\'':
|
565
564
|
return consume_heredoc();
|
566
565
|
default:
|
567
|
-
return Token { Token::Type::LeftShift, m_file, m_token_line, m_token_column };
|
566
|
+
return Token { Token::Type::LeftShift, m_file, m_token_line, m_token_column, m_whitespace_precedes };
|
568
567
|
}
|
569
568
|
}
|
570
569
|
}
|
@@ -573,12 +572,12 @@ Token Lexer::build_next_token() {
|
|
573
572
|
switch (current_char()) {
|
574
573
|
case '>':
|
575
574
|
advance();
|
576
|
-
return Token { Token::Type::Comparison, m_file, m_token_line, m_token_column };
|
575
|
+
return Token { Token::Type::Comparison, m_file, m_token_line, m_token_column, m_whitespace_precedes };
|
577
576
|
default:
|
578
|
-
return Token { Token::Type::LessThanOrEqual, m_file, m_token_line, m_token_column };
|
577
|
+
return Token { Token::Type::LessThanOrEqual, m_file, m_token_line, m_token_column, m_whitespace_precedes };
|
579
578
|
}
|
580
579
|
default:
|
581
|
-
return Token { Token::Type::LessThan, m_file, m_token_line, m_token_column };
|
580
|
+
return Token { Token::Type::LessThan, m_file, m_token_line, m_token_column, m_whitespace_precedes };
|
582
581
|
}
|
583
582
|
case '>':
|
584
583
|
advance();
|
@@ -588,15 +587,15 @@ Token Lexer::build_next_token() {
|
|
588
587
|
switch (current_char()) {
|
589
588
|
case '=':
|
590
589
|
advance();
|
591
|
-
return Token { Token::Type::RightShiftEqual, m_file, m_token_line, m_token_column };
|
590
|
+
return Token { Token::Type::RightShiftEqual, m_file, m_token_line, m_token_column, m_whitespace_precedes };
|
592
591
|
default:
|
593
|
-
return Token { Token::Type::RightShift, m_file, m_token_line, m_token_column };
|
592
|
+
return Token { Token::Type::RightShift, m_file, m_token_line, m_token_column, m_whitespace_precedes };
|
594
593
|
}
|
595
594
|
case '=':
|
596
595
|
advance();
|
597
|
-
return Token { Token::Type::GreaterThanOrEqual, m_file, m_token_line, m_token_column };
|
596
|
+
return Token { Token::Type::GreaterThanOrEqual, m_file, m_token_line, m_token_column, m_whitespace_precedes };
|
598
597
|
default:
|
599
|
-
return Token { Token::Type::GreaterThan, m_file, m_token_line, m_token_column };
|
598
|
+
return Token { Token::Type::GreaterThan, m_file, m_token_line, m_token_column, m_whitespace_precedes };
|
600
599
|
}
|
601
600
|
case '&':
|
602
601
|
advance();
|
@@ -606,18 +605,18 @@ Token Lexer::build_next_token() {
|
|
606
605
|
switch (current_char()) {
|
607
606
|
case '=':
|
608
607
|
advance();
|
609
|
-
return Token { Token::Type::AmpersandAmpersandEqual, m_file, m_token_line, m_token_column };
|
608
|
+
return Token { Token::Type::AmpersandAmpersandEqual, m_file, m_token_line, m_token_column, m_whitespace_precedes };
|
610
609
|
default:
|
611
|
-
return Token { Token::Type::AmpersandAmpersand, m_file, m_token_line, m_token_column };
|
610
|
+
return Token { Token::Type::AmpersandAmpersand, m_file, m_token_line, m_token_column, m_whitespace_precedes };
|
612
611
|
}
|
613
612
|
case '=':
|
614
613
|
advance();
|
615
|
-
return Token { Token::Type::AmpersandEqual, m_file, m_token_line, m_token_column };
|
614
|
+
return Token { Token::Type::AmpersandEqual, m_file, m_token_line, m_token_column, m_whitespace_precedes };
|
616
615
|
case '.':
|
617
616
|
advance();
|
618
|
-
return Token { Token::Type::SafeNavigation, m_file, m_token_line, m_token_column };
|
617
|
+
return Token { Token::Type::SafeNavigation, m_file, m_token_line, m_token_column, m_whitespace_precedes };
|
619
618
|
default:
|
620
|
-
return Token { Token::Type::Ampersand, m_file, m_token_line, m_token_column };
|
619
|
+
return Token { Token::Type::Ampersand, m_file, m_token_line, m_token_column, m_whitespace_precedes };
|
621
620
|
}
|
622
621
|
case '|':
|
623
622
|
advance();
|
@@ -627,24 +626,24 @@ Token Lexer::build_next_token() {
|
|
627
626
|
switch (current_char()) {
|
628
627
|
case '=':
|
629
628
|
advance();
|
630
|
-
return Token { Token::Type::PipePipeEqual, m_file, m_token_line, m_token_column };
|
629
|
+
return Token { Token::Type::PipePipeEqual, m_file, m_token_line, m_token_column, m_whitespace_precedes };
|
631
630
|
default:
|
632
|
-
return Token { Token::Type::PipePipe, m_file, m_token_line, m_token_column };
|
631
|
+
return Token { Token::Type::PipePipe, m_file, m_token_line, m_token_column, m_whitespace_precedes };
|
633
632
|
}
|
634
633
|
case '=':
|
635
634
|
advance();
|
636
|
-
return Token { Token::Type::PipeEqual, m_file, m_token_line, m_token_column };
|
635
|
+
return Token { Token::Type::PipeEqual, m_file, m_token_line, m_token_column, m_whitespace_precedes };
|
637
636
|
default:
|
638
|
-
return Token { Token::Type::Pipe, m_file, m_token_line, m_token_column };
|
637
|
+
return Token { Token::Type::Pipe, m_file, m_token_line, m_token_column, m_whitespace_precedes };
|
639
638
|
}
|
640
639
|
case '^':
|
641
640
|
advance();
|
642
641
|
switch (current_char()) {
|
643
642
|
case '=':
|
644
643
|
advance();
|
645
|
-
return Token { Token::Type::CaretEqual, m_file, m_token_line, m_token_column };
|
644
|
+
return Token { Token::Type::CaretEqual, m_file, m_token_line, m_token_column, m_whitespace_precedes };
|
646
645
|
default:
|
647
|
-
return Token { Token::Type::Caret, m_file, m_token_line, m_token_column };
|
646
|
+
return Token { Token::Type::Caret, m_file, m_token_line, m_token_column, m_whitespace_precedes };
|
648
647
|
}
|
649
648
|
case '~':
|
650
649
|
advance();
|
@@ -653,28 +652,28 @@ Token Lexer::build_next_token() {
|
|
653
652
|
if (m_last_token.is_def_keyword() || m_last_token.is_dot()) {
|
654
653
|
advance();
|
655
654
|
SharedPtr<String> lit = new String("~@");
|
656
|
-
return Token { Token::Type::BareName, lit, m_file, m_token_line, m_token_column };
|
655
|
+
return Token { Token::Type::BareName, lit, m_file, m_token_line, m_token_column, m_whitespace_precedes };
|
657
656
|
} else {
|
658
|
-
return Token { Token::Type::Tilde, m_file, m_token_line, m_token_column };
|
657
|
+
return Token { Token::Type::Tilde, m_file, m_token_line, m_token_column, m_whitespace_precedes };
|
659
658
|
}
|
660
659
|
default:
|
661
|
-
return Token { Token::Type::Tilde, m_file, m_token_line, m_token_column };
|
660
|
+
return Token { Token::Type::Tilde, m_file, m_token_line, m_token_column, m_whitespace_precedes };
|
662
661
|
}
|
663
662
|
case '?': {
|
664
663
|
auto c = next();
|
665
664
|
if (isspace(c)) {
|
666
665
|
m_open_ternary = true;
|
667
|
-
return Token { Token::Type::TernaryQuestion, m_file, m_token_line, m_token_column };
|
666
|
+
return Token { Token::Type::TernaryQuestion, m_file, m_token_line, m_token_column, m_whitespace_precedes };
|
668
667
|
} else {
|
669
668
|
advance();
|
670
669
|
if (c == '\\') {
|
671
670
|
auto buf = new String();
|
672
671
|
auto result = consume_escaped_byte(*buf);
|
673
672
|
if (!result.first)
|
674
|
-
return Token { result.second, current_char(), m_file, m_token_line, m_token_column };
|
675
|
-
return Token { Token::Type::String, buf, m_file, m_token_line, m_token_column };
|
673
|
+
return Token { result.second, current_char(), m_file, m_token_line, m_token_column, m_whitespace_precedes };
|
674
|
+
return Token { Token::Type::String, buf, m_file, m_token_line, m_token_column, m_whitespace_precedes };
|
676
675
|
} else {
|
677
|
-
return Token { Token::Type::String, c, m_file, m_token_line, m_token_column };
|
676
|
+
return Token { Token::Type::String, c, m_file, m_token_line, m_token_column, m_whitespace_precedes };
|
678
677
|
}
|
679
678
|
}
|
680
679
|
}
|
@@ -682,20 +681,19 @@ Token Lexer::build_next_token() {
|
|
682
681
|
auto c = next();
|
683
682
|
if (c == ':') {
|
684
683
|
advance();
|
685
|
-
return Token { Token::Type::ConstantResolution, m_file, m_token_line, m_token_column };
|
684
|
+
return Token { Token::Type::ConstantResolution, m_file, m_token_line, m_token_column, m_whitespace_precedes };
|
686
685
|
} else if (m_last_token.type() == Token::Type::InterpolatedStringEnd && !m_whitespace_precedes && !m_open_ternary) {
|
687
|
-
return Token { Token::Type::InterpolatedStringSymbolKey, m_file, m_token_line, m_token_column };
|
686
|
+
return Token { Token::Type::InterpolatedStringSymbolKey, m_file, m_token_line, m_token_column, m_whitespace_precedes };
|
688
687
|
} else if (c == '"') {
|
689
688
|
advance();
|
690
689
|
return consume_double_quoted_string('"', '"', Token::Type::InterpolatedSymbolBegin, Token::Type::InterpolatedSymbolEnd);
|
691
690
|
} else if (c == '\'') {
|
692
691
|
advance();
|
693
692
|
auto string = consume_single_quoted_string('\'', '\'');
|
694
|
-
return Token { Token::Type::Symbol, string.literal(), m_file, m_token_line, m_token_column };
|
693
|
+
return Token { Token::Type::Symbol, string.literal(), m_file, m_token_line, m_token_column, m_whitespace_precedes };
|
695
694
|
} else if (isspace(c)) {
|
696
695
|
m_open_ternary = false;
|
697
|
-
auto token = Token { Token::Type::TernaryColon, m_file, m_token_line, m_token_column };
|
698
|
-
token.set_whitespace_precedes(m_whitespace_precedes);
|
696
|
+
auto token = Token { Token::Type::TernaryColon, m_file, m_token_line, m_token_column, m_whitespace_precedes };
|
699
697
|
return token;
|
700
698
|
} else {
|
701
699
|
return consume_symbol();
|
@@ -716,7 +714,7 @@ Token Lexer::build_next_token() {
|
|
716
714
|
case '$':
|
717
715
|
if (peek() == '&') {
|
718
716
|
advance(2);
|
719
|
-
return Token { Token::Type::BackRef, '&', m_file, m_token_line, m_token_column };
|
717
|
+
return Token { Token::Type::BackRef, '&', m_file, m_token_line, m_token_column, m_whitespace_precedes };
|
720
718
|
} else if (peek() >= '1' && peek() <= '9') {
|
721
719
|
return consume_nth_ref();
|
722
720
|
} else {
|
@@ -730,16 +728,16 @@ Token Lexer::build_next_token() {
|
|
730
728
|
switch (current_char()) {
|
731
729
|
case '.':
|
732
730
|
advance();
|
733
|
-
return Token { Token::Type::DotDotDot, m_file, m_token_line, m_token_column };
|
731
|
+
return Token { Token::Type::DotDotDot, m_file, m_token_line, m_token_column, m_whitespace_precedes };
|
734
732
|
default:
|
735
|
-
return Token { Token::Type::DotDot, m_file, m_token_line, m_token_column };
|
733
|
+
return Token { Token::Type::DotDot, m_file, m_token_line, m_token_column, m_whitespace_precedes };
|
736
734
|
}
|
737
735
|
default:
|
738
|
-
return Token { Token::Type::Dot, m_file, m_token_line, m_token_column };
|
736
|
+
return Token { Token::Type::Dot, m_file, m_token_line, m_token_column, m_whitespace_precedes };
|
739
737
|
}
|
740
738
|
case '{':
|
741
739
|
advance();
|
742
|
-
return Token { Token::Type::LCurlyBrace, m_file, m_token_line, m_token_column };
|
740
|
+
return Token { Token::Type::LCurlyBrace, m_file, m_token_line, m_token_column, m_whitespace_precedes };
|
743
741
|
case '[': {
|
744
742
|
advance();
|
745
743
|
switch (current_char()) {
|
@@ -748,36 +746,33 @@ Token Lexer::build_next_token() {
|
|
748
746
|
switch (current_char()) {
|
749
747
|
case '=':
|
750
748
|
advance();
|
751
|
-
return Token { Token::Type::LBracketRBracketEqual, m_file, m_token_line, m_token_column };
|
749
|
+
return Token { Token::Type::LBracketRBracketEqual, m_file, m_token_line, m_token_column, m_whitespace_precedes };
|
752
750
|
default:
|
753
|
-
auto token = Token { Token::Type::LBracketRBracket, m_file, m_token_line, m_token_column };
|
754
|
-
token.set_whitespace_precedes(m_whitespace_precedes);
|
751
|
+
auto token = Token { Token::Type::LBracketRBracket, m_file, m_token_line, m_token_column, m_whitespace_precedes };
|
755
752
|
return token;
|
756
753
|
}
|
757
754
|
default:
|
758
|
-
auto token = Token { Token::Type::LBracket, m_file, m_token_line, m_token_column };
|
759
|
-
token.set_whitespace_precedes(m_whitespace_precedes);
|
755
|
+
auto token = Token { Token::Type::LBracket, m_file, m_token_line, m_token_column, m_whitespace_precedes };
|
760
756
|
return token;
|
761
757
|
}
|
762
758
|
}
|
763
759
|
case '(': {
|
764
760
|
advance();
|
765
|
-
auto token = Token { Token::Type::LParen, m_file, m_token_line, m_token_column };
|
766
|
-
token.set_whitespace_precedes(m_whitespace_precedes);
|
761
|
+
auto token = Token { Token::Type::LParen, m_file, m_token_line, m_token_column, m_whitespace_precedes };
|
767
762
|
return token;
|
768
763
|
}
|
769
764
|
case '}':
|
770
765
|
advance();
|
771
|
-
return Token { Token::Type::RCurlyBrace, m_file, m_token_line, m_token_column };
|
766
|
+
return Token { Token::Type::RCurlyBrace, m_file, m_token_line, m_token_column, m_whitespace_precedes };
|
772
767
|
case ']':
|
773
768
|
advance();
|
774
|
-
return Token { Token::Type::RBracket, m_file, m_token_line, m_token_column };
|
769
|
+
return Token { Token::Type::RBracket, m_file, m_token_line, m_token_column, m_whitespace_precedes };
|
775
770
|
case ')':
|
776
771
|
advance();
|
777
|
-
return Token { Token::Type::RParen, m_file, m_token_line, m_token_column };
|
772
|
+
return Token { Token::Type::RParen, m_file, m_token_line, m_token_column, m_whitespace_precedes };
|
778
773
|
case '\n': {
|
779
774
|
advance();
|
780
|
-
auto token = Token { Token::Type::Newline, m_file, m_token_line, m_token_column };
|
775
|
+
auto token = Token { Token::Type::Newline, m_file, m_token_line, m_token_column, m_whitespace_precedes };
|
781
776
|
if (!m_heredoc_stack.is_empty()) {
|
782
777
|
auto new_index = m_heredoc_stack.last();
|
783
778
|
while (m_index < new_index)
|
@@ -788,10 +783,10 @@ Token Lexer::build_next_token() {
|
|
788
783
|
}
|
789
784
|
case ';':
|
790
785
|
advance();
|
791
|
-
return Token { Token::Type::Semicolon, m_file, m_token_line, m_token_column };
|
786
|
+
return Token { Token::Type::Semicolon, m_file, m_token_line, m_token_column, m_whitespace_precedes };
|
792
787
|
case ',':
|
793
788
|
advance();
|
794
|
-
return Token { Token::Type::Comma, m_file, m_token_line, m_token_column };
|
789
|
+
return Token { Token::Type::Comma, m_file, m_token_line, m_token_column, m_whitespace_precedes };
|
795
790
|
case '"':
|
796
791
|
advance();
|
797
792
|
return consume_double_quoted_string('"', '"');
|
@@ -821,13 +816,13 @@ Token Lexer::build_next_token() {
|
|
821
816
|
doc->append_char(c);
|
822
817
|
c = next();
|
823
818
|
}
|
824
|
-
return Token { Token::Type::Doc, doc, m_file, m_token_line, m_token_column };
|
819
|
+
return Token { Token::Type::Doc, doc, m_file, m_token_line, m_token_column, m_whitespace_precedes };
|
825
820
|
} else {
|
826
821
|
char c;
|
827
822
|
do {
|
828
823
|
c = next();
|
829
824
|
} while (c && c != '\n' && c != '\r');
|
830
|
-
return Token { Token::Type::Comment, m_file, m_token_line, m_token_column };
|
825
|
+
return Token { Token::Type::Comment, m_file, m_token_line, m_token_column, m_whitespace_precedes };
|
831
826
|
}
|
832
827
|
case '0':
|
833
828
|
case '1':
|
@@ -842,107 +837,124 @@ Token Lexer::build_next_token() {
|
|
842
837
|
auto token = consume_numeric();
|
843
838
|
return token;
|
844
839
|
}
|
840
|
+
case 'i':
|
841
|
+
if (m_last_token.can_be_complex_or_rational() && !isalnum(peek())) {
|
842
|
+
advance();
|
843
|
+
return Token { Token::Type::Complex, m_file, m_token_line, m_token_column, m_whitespace_precedes };
|
844
|
+
}
|
845
|
+
break;
|
846
|
+
case 'r':
|
847
|
+
if (m_last_token.can_be_complex_or_rational()) {
|
848
|
+
if (peek() == 'i') {
|
849
|
+
advance(2);
|
850
|
+
return Token { Token::Type::RationalComplex, m_file, m_token_line, m_token_column, m_whitespace_precedes };
|
851
|
+
} else if (!isalnum(peek())) {
|
852
|
+
advance();
|
853
|
+
return Token { Token::Type::Rational, m_file, m_token_line, m_token_column, m_whitespace_precedes };
|
854
|
+
}
|
855
|
+
}
|
856
|
+
break;
|
845
857
|
};
|
846
858
|
|
847
859
|
Token keyword_token;
|
848
860
|
|
849
861
|
if (!m_last_token.is_dot() && match(4, "self")) {
|
850
862
|
if (current_char() == '.')
|
851
|
-
keyword_token = { Token::Type::SelfKeyword, m_file, m_token_line, m_token_column };
|
863
|
+
keyword_token = { Token::Type::SelfKeyword, m_file, m_token_line, m_token_column, m_whitespace_precedes };
|
852
864
|
else
|
853
865
|
rewind(4);
|
854
866
|
}
|
855
867
|
|
856
868
|
if (!m_last_token.is_dot() && !m_last_token.is_def_keyword()) {
|
857
869
|
if (match(12, "__ENCODING__"))
|
858
|
-
keyword_token = { Token::Type::ENCODINGKeyword, m_file, m_token_line, m_token_column };
|
870
|
+
keyword_token = { Token::Type::ENCODINGKeyword, m_file, m_token_line, m_token_column, m_whitespace_precedes };
|
859
871
|
else if (match(8, "__LINE__"))
|
860
|
-
keyword_token = { Token::Type::LINEKeyword, m_file, m_token_line, m_token_column };
|
872
|
+
keyword_token = { Token::Type::LINEKeyword, m_file, m_token_line, m_token_column, m_whitespace_precedes };
|
861
873
|
else if (match(8, "__FILE__"))
|
862
|
-
keyword_token = { Token::Type::FILEKeyword, m_file, m_token_line, m_token_column };
|
874
|
+
keyword_token = { Token::Type::FILEKeyword, m_file, m_token_line, m_token_column, m_whitespace_precedes };
|
863
875
|
else if (match(5, "BEGIN"))
|
864
|
-
keyword_token = { Token::Type::BEGINKeyword, m_file, m_token_line, m_token_column };
|
876
|
+
keyword_token = { Token::Type::BEGINKeyword, m_file, m_token_line, m_token_column, m_whitespace_precedes };
|
865
877
|
else if (match(3, "END"))
|
866
|
-
keyword_token = { Token::Type::ENDKeyword, m_file, m_token_line, m_token_column };
|
878
|
+
keyword_token = { Token::Type::ENDKeyword, m_file, m_token_line, m_token_column, m_whitespace_precedes };
|
867
879
|
else if (match(5, "alias"))
|
868
|
-
keyword_token = { Token::Type::AliasKeyword, m_file, m_token_line, m_token_column };
|
880
|
+
keyword_token = { Token::Type::AliasKeyword, m_file, m_token_line, m_token_column, m_whitespace_precedes };
|
869
881
|
else if (match(3, "and"))
|
870
|
-
keyword_token = { Token::Type::AndKeyword, m_file, m_token_line, m_token_column };
|
882
|
+
keyword_token = { Token::Type::AndKeyword, m_file, m_token_line, m_token_column, m_whitespace_precedes };
|
871
883
|
else if (match(5, "begin"))
|
872
|
-
keyword_token = { Token::Type::BeginKeyword, m_file, m_token_line, m_token_column };
|
884
|
+
keyword_token = { Token::Type::BeginKeyword, m_file, m_token_line, m_token_column, m_whitespace_precedes };
|
873
885
|
else if (match(5, "break"))
|
874
|
-
keyword_token = { Token::Type::BreakKeyword, m_file, m_token_line, m_token_column };
|
886
|
+
keyword_token = { Token::Type::BreakKeyword, m_file, m_token_line, m_token_column, m_whitespace_precedes };
|
875
887
|
else if (match(4, "case"))
|
876
|
-
keyword_token = { Token::Type::CaseKeyword, m_file, m_token_line, m_token_column };
|
888
|
+
keyword_token = { Token::Type::CaseKeyword, m_file, m_token_line, m_token_column, m_whitespace_precedes };
|
877
889
|
else if (match(5, "class"))
|
878
|
-
keyword_token = { Token::Type::ClassKeyword, m_file, m_token_line, m_token_column };
|
890
|
+
keyword_token = { Token::Type::ClassKeyword, m_file, m_token_line, m_token_column, m_whitespace_precedes };
|
879
891
|
else if (match(8, "defined?"))
|
880
|
-
keyword_token = { Token::Type::DefinedKeyword, m_file, m_token_line, m_token_column };
|
892
|
+
keyword_token = { Token::Type::DefinedKeyword, m_file, m_token_line, m_token_column, m_whitespace_precedes };
|
881
893
|
else if (match(3, "def"))
|
882
|
-
keyword_token = { Token::Type::DefKeyword, m_file, m_token_line, m_token_column };
|
894
|
+
keyword_token = { Token::Type::DefKeyword, m_file, m_token_line, m_token_column, m_whitespace_precedes };
|
883
895
|
else if (match(2, "do"))
|
884
|
-
keyword_token = { Token::Type::DoKeyword, m_file, m_token_line, m_token_column };
|
896
|
+
keyword_token = { Token::Type::DoKeyword, m_file, m_token_line, m_token_column, m_whitespace_precedes };
|
885
897
|
else if (match(4, "else"))
|
886
|
-
keyword_token = { Token::Type::ElseKeyword, m_file, m_token_line, m_token_column };
|
898
|
+
keyword_token = { Token::Type::ElseKeyword, m_file, m_token_line, m_token_column, m_whitespace_precedes };
|
887
899
|
else if (match(5, "elsif"))
|
888
|
-
keyword_token = { Token::Type::ElsifKeyword, m_file, m_token_line, m_token_column };
|
900
|
+
keyword_token = { Token::Type::ElsifKeyword, m_file, m_token_line, m_token_column, m_whitespace_precedes };
|
889
901
|
else if (match(3, "end"))
|
890
|
-
keyword_token = { Token::Type::EndKeyword, m_file, m_token_line, m_token_column };
|
902
|
+
keyword_token = { Token::Type::EndKeyword, m_file, m_token_line, m_token_column, m_whitespace_precedes };
|
891
903
|
else if (match(6, "ensure"))
|
892
|
-
keyword_token = { Token::Type::EnsureKeyword, m_file, m_token_line, m_token_column };
|
904
|
+
keyword_token = { Token::Type::EnsureKeyword, m_file, m_token_line, m_token_column, m_whitespace_precedes };
|
893
905
|
else if (match(5, "false"))
|
894
|
-
keyword_token = { Token::Type::FalseKeyword, m_file, m_token_line, m_token_column };
|
906
|
+
keyword_token = { Token::Type::FalseKeyword, m_file, m_token_line, m_token_column, m_whitespace_precedes };
|
895
907
|
else if (match(3, "for"))
|
896
|
-
keyword_token = { Token::Type::ForKeyword, m_file, m_token_line, m_token_column };
|
908
|
+
keyword_token = { Token::Type::ForKeyword, m_file, m_token_line, m_token_column, m_whitespace_precedes };
|
897
909
|
else if (match(2, "if"))
|
898
|
-
keyword_token = { Token::Type::IfKeyword, m_file, m_token_line, m_token_column };
|
910
|
+
keyword_token = { Token::Type::IfKeyword, m_file, m_token_line, m_token_column, m_whitespace_precedes };
|
899
911
|
else if (match(2, "in"))
|
900
|
-
keyword_token = { Token::Type::InKeyword, m_file, m_token_line, m_token_column };
|
912
|
+
keyword_token = { Token::Type::InKeyword, m_file, m_token_line, m_token_column, m_whitespace_precedes };
|
901
913
|
else if (match(6, "module"))
|
902
|
-
keyword_token = { Token::Type::ModuleKeyword, m_file, m_token_line, m_token_column };
|
914
|
+
keyword_token = { Token::Type::ModuleKeyword, m_file, m_token_line, m_token_column, m_whitespace_precedes };
|
903
915
|
else if (match(4, "next"))
|
904
|
-
keyword_token = { Token::Type::NextKeyword, m_file, m_token_line, m_token_column };
|
916
|
+
keyword_token = { Token::Type::NextKeyword, m_file, m_token_line, m_token_column, m_whitespace_precedes };
|
905
917
|
else if (match(3, "nil"))
|
906
|
-
keyword_token = { Token::Type::NilKeyword, m_file, m_token_line, m_token_column };
|
918
|
+
keyword_token = { Token::Type::NilKeyword, m_file, m_token_line, m_token_column, m_whitespace_precedes };
|
907
919
|
else if (match(3, "not"))
|
908
|
-
keyword_token = { Token::Type::NotKeyword, m_file, m_token_line, m_token_column };
|
920
|
+
keyword_token = { Token::Type::NotKeyword, m_file, m_token_line, m_token_column, m_whitespace_precedes };
|
909
921
|
else if (match(2, "or"))
|
910
|
-
keyword_token = { Token::Type::OrKeyword, m_file, m_token_line, m_token_column };
|
922
|
+
keyword_token = { Token::Type::OrKeyword, m_file, m_token_line, m_token_column, m_whitespace_precedes };
|
911
923
|
else if (match(4, "redo"))
|
912
|
-
keyword_token = { Token::Type::RedoKeyword, m_file, m_token_line, m_token_column };
|
924
|
+
keyword_token = { Token::Type::RedoKeyword, m_file, m_token_line, m_token_column, m_whitespace_precedes };
|
913
925
|
else if (match(6, "rescue"))
|
914
|
-
keyword_token = { Token::Type::RescueKeyword, m_file, m_token_line, m_token_column };
|
926
|
+
keyword_token = { Token::Type::RescueKeyword, m_file, m_token_line, m_token_column, m_whitespace_precedes };
|
915
927
|
else if (match(5, "retry"))
|
916
|
-
keyword_token = { Token::Type::RetryKeyword, m_file, m_token_line, m_token_column };
|
928
|
+
keyword_token = { Token::Type::RetryKeyword, m_file, m_token_line, m_token_column, m_whitespace_precedes };
|
917
929
|
else if (match(6, "return"))
|
918
|
-
keyword_token = { Token::Type::ReturnKeyword, m_file, m_token_line, m_token_column };
|
930
|
+
keyword_token = { Token::Type::ReturnKeyword, m_file, m_token_line, m_token_column, m_whitespace_precedes };
|
919
931
|
else if (match(4, "self"))
|
920
|
-
keyword_token = { Token::Type::SelfKeyword, m_file, m_token_line, m_token_column };
|
932
|
+
keyword_token = { Token::Type::SelfKeyword, m_file, m_token_line, m_token_column, m_whitespace_precedes };
|
921
933
|
else if (match(5, "super"))
|
922
|
-
keyword_token = { Token::Type::SuperKeyword, m_file, m_token_line, m_token_column };
|
934
|
+
keyword_token = { Token::Type::SuperKeyword, m_file, m_token_line, m_token_column, m_whitespace_precedes };
|
923
935
|
else if (match(4, "then"))
|
924
|
-
keyword_token = { Token::Type::ThenKeyword, m_file, m_token_line, m_token_column };
|
936
|
+
keyword_token = { Token::Type::ThenKeyword, m_file, m_token_line, m_token_column, m_whitespace_precedes };
|
925
937
|
else if (match(4, "true"))
|
926
|
-
keyword_token = { Token::Type::TrueKeyword, m_file, m_token_line, m_token_column };
|
938
|
+
keyword_token = { Token::Type::TrueKeyword, m_file, m_token_line, m_token_column, m_whitespace_precedes };
|
927
939
|
else if (match(5, "undef"))
|
928
|
-
keyword_token = { Token::Type::UndefKeyword, m_file, m_token_line, m_token_column };
|
940
|
+
keyword_token = { Token::Type::UndefKeyword, m_file, m_token_line, m_token_column, m_whitespace_precedes };
|
929
941
|
else if (match(6, "unless"))
|
930
|
-
keyword_token = { Token::Type::UnlessKeyword, m_file, m_token_line, m_token_column };
|
942
|
+
keyword_token = { Token::Type::UnlessKeyword, m_file, m_token_line, m_token_column, m_whitespace_precedes };
|
931
943
|
else if (match(5, "until"))
|
932
|
-
keyword_token = { Token::Type::UntilKeyword, m_file, m_token_line, m_token_column };
|
944
|
+
keyword_token = { Token::Type::UntilKeyword, m_file, m_token_line, m_token_column, m_whitespace_precedes };
|
933
945
|
else if (match(4, "when"))
|
934
|
-
keyword_token = { Token::Type::WhenKeyword, m_file, m_token_line, m_token_column };
|
946
|
+
keyword_token = { Token::Type::WhenKeyword, m_file, m_token_line, m_token_column, m_whitespace_precedes };
|
935
947
|
else if (match(5, "while"))
|
936
|
-
keyword_token = { Token::Type::WhileKeyword, m_file, m_token_line, m_token_column };
|
948
|
+
keyword_token = { Token::Type::WhileKeyword, m_file, m_token_line, m_token_column, m_whitespace_precedes };
|
937
949
|
else if (match(5, "yield"))
|
938
|
-
keyword_token = { Token::Type::YieldKeyword, m_file, m_token_line, m_token_column };
|
950
|
+
keyword_token = { Token::Type::YieldKeyword, m_file, m_token_line, m_token_column, m_whitespace_precedes };
|
939
951
|
}
|
940
952
|
|
941
953
|
// if a colon comes next, it's not a keyword -- it's a symbol!
|
942
954
|
if (keyword_token && current_char() == ':' && peek() != ':' && !m_open_ternary) {
|
943
955
|
advance(); // :
|
944
956
|
auto name = keyword_token.type_value();
|
945
|
-
return Token { Token::Type::SymbolKey, name, m_file, m_token_line, m_token_column };
|
957
|
+
return Token { Token::Type::SymbolKey, name, m_file, m_token_line, m_token_column, m_whitespace_precedes };
|
946
958
|
} else if (keyword_token) {
|
947
959
|
return keyword_token;
|
948
960
|
}
|
@@ -954,7 +966,7 @@ Token Lexer::build_next_token() {
|
|
954
966
|
return consume_constant();
|
955
967
|
} else {
|
956
968
|
auto buf = consume_non_whitespace();
|
957
|
-
auto token = Token { Token::Type::Invalid, buf, m_file, m_token_line, m_token_column };
|
969
|
+
auto token = Token { Token::Type::Invalid, buf, m_file, m_token_line, m_token_column, m_whitespace_precedes };
|
958
970
|
return token;
|
959
971
|
}
|
960
972
|
|
@@ -1014,7 +1026,7 @@ Token Lexer::consume_symbol() {
|
|
1014
1026
|
gobble(c);
|
1015
1027
|
break;
|
1016
1028
|
default:
|
1017
|
-
return Token { Token::Type::Invalid, c, m_file, m_token_line, m_token_column };
|
1029
|
+
return Token { Token::Type::Invalid, c, m_file, m_token_line, m_token_column, m_whitespace_precedes };
|
1018
1030
|
}
|
1019
1031
|
break;
|
1020
1032
|
case '!':
|
@@ -1057,7 +1069,7 @@ Token Lexer::consume_symbol() {
|
|
1057
1069
|
c = gobble(c);
|
1058
1070
|
if (c == '=') gobble(c);
|
1059
1071
|
} else {
|
1060
|
-
return Token { Token::Type::Invalid, c, m_file, m_token_line, m_token_column };
|
1072
|
+
return Token { Token::Type::Invalid, c, m_file, m_token_line, m_token_column, m_whitespace_precedes };
|
1061
1073
|
}
|
1062
1074
|
break;
|
1063
1075
|
default:
|
@@ -1078,7 +1090,7 @@ Token Lexer::consume_symbol() {
|
|
1078
1090
|
break;
|
1079
1091
|
}
|
1080
1092
|
}
|
1081
|
-
return Token { Token::Type::Symbol, buf, m_file, m_token_line, m_token_column };
|
1093
|
+
return Token { Token::Type::Symbol, buf, m_file, m_token_line, m_token_column, m_whitespace_precedes };
|
1082
1094
|
}
|
1083
1095
|
|
1084
1096
|
Token Lexer::consume_word(Token::Type type) {
|
@@ -1097,7 +1109,7 @@ Token Lexer::consume_word(Token::Type type) {
|
|
1097
1109
|
default:
|
1098
1110
|
break;
|
1099
1111
|
}
|
1100
|
-
return Token { type, buf, m_file, m_token_line, m_token_column };
|
1112
|
+
return Token { type, buf, m_file, m_token_line, m_token_column, m_whitespace_precedes };
|
1101
1113
|
}
|
1102
1114
|
|
1103
1115
|
Token Lexer::consume_bare_name() {
|
@@ -1147,14 +1159,14 @@ Token Lexer::consume_global_variable() {
|
|
1147
1159
|
SharedPtr<String> buf = new String("$");
|
1148
1160
|
buf->append_char(current_char());
|
1149
1161
|
advance();
|
1150
|
-
return Token { Token::Type::GlobalVariable, buf, m_file, m_token_line, m_token_column };
|
1162
|
+
return Token { Token::Type::GlobalVariable, buf, m_file, m_token_line, m_token_column, m_whitespace_precedes };
|
1151
1163
|
}
|
1152
1164
|
case '-': {
|
1153
1165
|
SharedPtr<String> buf = new String("$-");
|
1154
1166
|
advance(2);
|
1155
1167
|
buf->append_char(current_char());
|
1156
1168
|
advance();
|
1157
|
-
return Token { Token::Type::GlobalVariable, buf, m_file, m_token_line, m_token_column };
|
1169
|
+
return Token { Token::Type::GlobalVariable, buf, m_file, m_token_line, m_token_column, m_whitespace_precedes };
|
1158
1170
|
}
|
1159
1171
|
default: {
|
1160
1172
|
return consume_word(Token::Type::GlobalVariable);
|
@@ -1257,7 +1269,7 @@ Token Lexer::consume_heredoc() {
|
|
1257
1269
|
case '\n':
|
1258
1270
|
case '\r':
|
1259
1271
|
case 0:
|
1260
|
-
return Token { Token::Type::UnterminatedString, "heredoc identifier", m_file, m_token_line, m_token_column };
|
1272
|
+
return Token { Token::Type::UnterminatedString, "heredoc identifier", m_file, m_token_line, m_token_column, m_whitespace_precedes };
|
1261
1273
|
default:
|
1262
1274
|
heredoc_name.append_char(c);
|
1263
1275
|
c = next();
|
@@ -1276,7 +1288,7 @@ Token Lexer::consume_heredoc() {
|
|
1276
1288
|
// start consuming the heredoc on the next line
|
1277
1289
|
while (get_char() != '\n') {
|
1278
1290
|
if (heredoc_index >= m_size)
|
1279
|
-
return Token { Token::Type::UnterminatedString, "heredoc", m_file, m_token_line, m_token_column };
|
1291
|
+
return Token { Token::Type::UnterminatedString, "heredoc", m_file, m_token_line, m_token_column, m_whitespace_precedes };
|
1280
1292
|
heredoc_index++;
|
1281
1293
|
}
|
1282
1294
|
heredoc_index++;
|
@@ -1290,7 +1302,7 @@ Token Lexer::consume_heredoc() {
|
|
1290
1302
|
if (heredoc_index >= m_size) {
|
1291
1303
|
if (is_valid_heredoc(with_dash, doc, heredoc_name))
|
1292
1304
|
break;
|
1293
|
-
return Token { Token::Type::UnterminatedString, doc, m_file, m_token_line, m_token_column };
|
1305
|
+
return Token { Token::Type::UnterminatedString, doc, m_file, m_token_line, m_token_column, m_whitespace_precedes };
|
1294
1306
|
}
|
1295
1307
|
char c = get_char();
|
1296
1308
|
heredoc_index++;
|
@@ -1310,11 +1322,11 @@ Token Lexer::consume_heredoc() {
|
|
1310
1322
|
// This index is used to jump to the end of the heredoc later.
|
1311
1323
|
m_heredoc_stack.push(heredoc_index);
|
1312
1324
|
|
1313
|
-
auto token = Token { Token::Type::String, doc, m_file, m_token_line, m_token_column };
|
1325
|
+
auto token = Token { Token::Type::String, doc, m_file, m_token_line, m_token_column, m_whitespace_precedes };
|
1314
1326
|
|
1315
1327
|
if (should_interpolate) {
|
1316
1328
|
m_nested_lexer = new InterpolatedStringLexer { *this, token, end_type };
|
1317
|
-
return Token { begin_type, m_file, m_token_line, m_token_column };
|
1329
|
+
return Token { begin_type, m_file, m_token_line, m_token_column, m_whitespace_precedes };
|
1318
1330
|
}
|
1319
1331
|
|
1320
1332
|
return token;
|
@@ -1322,21 +1334,40 @@ Token Lexer::consume_heredoc() {
|
|
1322
1334
|
|
1323
1335
|
Token Lexer::consume_numeric() {
|
1324
1336
|
SharedPtr<String> chars = new String;
|
1337
|
+
|
1338
|
+
auto consume_decimal_digits_and_build_token = [&]() {
|
1339
|
+
char c = current_char();
|
1340
|
+
do {
|
1341
|
+
chars->append_char(c);
|
1342
|
+
c = next();
|
1343
|
+
if (c == '_')
|
1344
|
+
c = next();
|
1345
|
+
} while (isdigit(c));
|
1346
|
+
if ((c == '.' && isdigit(peek())) || (c == 'e' || c == 'E'))
|
1347
|
+
return consume_numeric_as_float(chars);
|
1348
|
+
else
|
1349
|
+
return chars_to_fixnum_or_bignum_token(chars, 10, 0);
|
1350
|
+
};
|
1351
|
+
|
1352
|
+
Token token;
|
1353
|
+
|
1325
1354
|
if (current_char() == '0') {
|
1355
|
+
// special-prefixed literals 0d, 0x, etc.
|
1326
1356
|
switch (peek()) {
|
1327
1357
|
case 'd':
|
1328
1358
|
case 'D': {
|
1329
1359
|
advance();
|
1330
1360
|
char c = next();
|
1331
1361
|
if (!isdigit(c))
|
1332
|
-
return Token { Token::Type::Invalid, c, m_file, m_cursor_line, m_cursor_column };
|
1362
|
+
return Token { Token::Type::Invalid, c, m_file, m_cursor_line, m_cursor_column, m_whitespace_precedes };
|
1333
1363
|
do {
|
1334
1364
|
chars->append_char(c);
|
1335
1365
|
c = next();
|
1336
1366
|
if (c == '_')
|
1337
1367
|
c = next();
|
1338
1368
|
} while (isdigit(c));
|
1339
|
-
|
1369
|
+
token = chars_to_fixnum_or_bignum_token(chars, 10, 0);
|
1370
|
+
break;
|
1340
1371
|
}
|
1341
1372
|
case 'o':
|
1342
1373
|
case 'O': {
|
@@ -1345,14 +1376,15 @@ Token Lexer::consume_numeric() {
|
|
1345
1376
|
advance();
|
1346
1377
|
char c = next();
|
1347
1378
|
if (!(c >= '0' && c <= '7'))
|
1348
|
-
return Token { Token::Type::Invalid, c, m_file, m_cursor_line, m_cursor_column };
|
1379
|
+
return Token { Token::Type::Invalid, c, m_file, m_cursor_line, m_cursor_column, m_whitespace_precedes };
|
1349
1380
|
do {
|
1350
1381
|
chars->append_char(c);
|
1351
1382
|
c = next();
|
1352
1383
|
if (c == '_')
|
1353
1384
|
c = next();
|
1354
1385
|
} while (c >= '0' && c <= '7');
|
1355
|
-
|
1386
|
+
token = chars_to_fixnum_or_bignum_token(chars, 8, 2);
|
1387
|
+
break;
|
1356
1388
|
}
|
1357
1389
|
case 'x':
|
1358
1390
|
case 'X': {
|
@@ -1361,14 +1393,15 @@ Token Lexer::consume_numeric() {
|
|
1361
1393
|
advance();
|
1362
1394
|
char c = next();
|
1363
1395
|
if (!isxdigit(c))
|
1364
|
-
return Token { Token::Type::Invalid, c, m_file, m_cursor_line, m_cursor_column };
|
1396
|
+
return Token { Token::Type::Invalid, c, m_file, m_cursor_line, m_cursor_column, m_whitespace_precedes };
|
1365
1397
|
do {
|
1366
1398
|
chars->append_char(c);
|
1367
1399
|
c = next();
|
1368
1400
|
if (c == '_')
|
1369
1401
|
c = next();
|
1370
1402
|
} while (isxdigit(c));
|
1371
|
-
|
1403
|
+
token = chars_to_fixnum_or_bignum_token(chars, 16, 2);
|
1404
|
+
break;
|
1372
1405
|
}
|
1373
1406
|
case 'b':
|
1374
1407
|
case 'B': {
|
@@ -1377,28 +1410,24 @@ Token Lexer::consume_numeric() {
|
|
1377
1410
|
advance();
|
1378
1411
|
char c = next();
|
1379
1412
|
if (c != '0' && c != '1')
|
1380
|
-
return Token { Token::Type::Invalid, c, m_file, m_cursor_line, m_cursor_column };
|
1413
|
+
return Token { Token::Type::Invalid, c, m_file, m_cursor_line, m_cursor_column, m_whitespace_precedes };
|
1381
1414
|
do {
|
1382
1415
|
chars->append_char(c);
|
1383
1416
|
c = next();
|
1384
1417
|
if (c == '_')
|
1385
1418
|
c = next();
|
1386
1419
|
} while (c == '0' || c == '1');
|
1387
|
-
|
1420
|
+
token = chars_to_fixnum_or_bignum_token(chars, 2, 2);
|
1421
|
+
break;
|
1388
1422
|
}
|
1423
|
+
default:
|
1424
|
+
token = consume_decimal_digits_and_build_token();
|
1389
1425
|
}
|
1426
|
+
} else {
|
1427
|
+
token = consume_decimal_digits_and_build_token();
|
1390
1428
|
}
|
1391
|
-
|
1392
|
-
|
1393
|
-
chars->append_char(c);
|
1394
|
-
c = next();
|
1395
|
-
if (c == '_')
|
1396
|
-
c = next();
|
1397
|
-
} while (isdigit(c));
|
1398
|
-
if ((c == '.' && isdigit(peek())) || (c == 'e' || c == 'E'))
|
1399
|
-
return consume_numeric_as_float(chars);
|
1400
|
-
else
|
1401
|
-
return chars_to_fixnum_or_bignum_token(chars, 10, 0);
|
1429
|
+
|
1430
|
+
return token;
|
1402
1431
|
}
|
1403
1432
|
|
1404
1433
|
const long long max_fixnum = std::numeric_limits<long long>::max() / 2; // 63 bits for MRI
|
@@ -1407,9 +1436,9 @@ Token Lexer::chars_to_fixnum_or_bignum_token(SharedPtr<String> chars, int base,
|
|
1407
1436
|
errno = 0;
|
1408
1437
|
auto fixnum = strtoll(chars->c_str() + offset, nullptr, base);
|
1409
1438
|
if (errno != 0 || fixnum > max_fixnum)
|
1410
|
-
return Token { Token::Type::Bignum, chars, m_file, m_token_line, m_token_column };
|
1439
|
+
return Token { Token::Type::Bignum, chars, m_file, m_token_line, m_token_column, m_whitespace_precedes };
|
1411
1440
|
else
|
1412
|
-
return Token { Token::Type::Fixnum, fixnum, m_file, m_token_line, m_token_column };
|
1441
|
+
return Token { Token::Type::Fixnum, fixnum, m_file, m_token_line, m_token_column, m_whitespace_precedes };
|
1413
1442
|
}
|
1414
1443
|
|
1415
1444
|
Token Lexer::consume_numeric_as_float(SharedPtr<String> chars) {
|
@@ -1432,7 +1461,7 @@ Token Lexer::consume_numeric_as_float(SharedPtr<String> chars) {
|
|
1432
1461
|
c = next();
|
1433
1462
|
}
|
1434
1463
|
if (!isdigit(c))
|
1435
|
-
return Token { Token::Type::Invalid, c, m_file, m_cursor_line, m_cursor_column };
|
1464
|
+
return Token { Token::Type::Invalid, c, m_file, m_cursor_line, m_cursor_column, m_whitespace_precedes };
|
1436
1465
|
do {
|
1437
1466
|
chars->append_char(c);
|
1438
1467
|
c = next();
|
@@ -1441,7 +1470,7 @@ Token Lexer::consume_numeric_as_float(SharedPtr<String> chars) {
|
|
1441
1470
|
} while (isdigit(c));
|
1442
1471
|
}
|
1443
1472
|
double dbl = atof(chars->c_str());
|
1444
|
-
return Token { Token::Type::Float, dbl, m_file, m_token_line, m_token_column };
|
1473
|
+
return Token { Token::Type::Float, dbl, m_file, m_token_line, m_token_column, m_whitespace_precedes };
|
1445
1474
|
}
|
1446
1475
|
|
1447
1476
|
Token Lexer::consume_nth_ref() {
|
@@ -1452,7 +1481,7 @@ Token Lexer::consume_nth_ref() {
|
|
1452
1481
|
num += c - '0';
|
1453
1482
|
c = next();
|
1454
1483
|
} while (isdigit(c));
|
1455
|
-
return Token { Token::Type::NthRef, num, m_file, m_token_line, m_token_column };
|
1484
|
+
return Token { Token::Type::NthRef, num, m_file, m_token_line, m_token_column, m_whitespace_precedes };
|
1456
1485
|
}
|
1457
1486
|
|
1458
1487
|
long long Lexer::consume_hex_number(int max_length, bool allow_underscore) {
|
@@ -1636,7 +1665,7 @@ bool Lexer::token_is_first_on_line() const {
|
|
1636
1665
|
|
1637
1666
|
Token Lexer::consume_double_quoted_string(char start_char, char stop_char, Token::Type begin_type, Token::Type end_type) {
|
1638
1667
|
m_nested_lexer = new InterpolatedStringLexer { *this, start_char, stop_char, end_type };
|
1639
|
-
return Token { begin_type, start_char, m_file, m_token_line, m_token_column };
|
1668
|
+
return Token { begin_type, start_char, m_file, m_token_line, m_token_column, m_whitespace_precedes };
|
1640
1669
|
}
|
1641
1670
|
|
1642
1671
|
Token Lexer::consume_single_quoted_string(char start_char, char stop_char) {
|
@@ -1663,9 +1692,9 @@ Token Lexer::consume_single_quoted_string(char start_char, char stop_char) {
|
|
1663
1692
|
advance(); // '
|
1664
1693
|
if (current_char() == ':' && !m_open_ternary) {
|
1665
1694
|
advance(); // :
|
1666
|
-
return Token { Token::Type::SymbolKey, buf, m_file, m_token_line, m_token_column };
|
1695
|
+
return Token { Token::Type::SymbolKey, buf, m_file, m_token_line, m_token_column, m_whitespace_precedes };
|
1667
1696
|
} else {
|
1668
|
-
return Token { Token::Type::String, buf, m_file, m_token_line, m_token_column };
|
1697
|
+
return Token { Token::Type::String, buf, m_file, m_token_line, m_token_column, m_whitespace_precedes };
|
1669
1698
|
}
|
1670
1699
|
}
|
1671
1700
|
} else {
|
@@ -1673,22 +1702,22 @@ Token Lexer::consume_single_quoted_string(char start_char, char stop_char) {
|
|
1673
1702
|
}
|
1674
1703
|
c = next();
|
1675
1704
|
}
|
1676
|
-
return Token { Token::Type::UnterminatedString, start_char, m_file, m_token_line, m_token_column };
|
1705
|
+
return Token { Token::Type::UnterminatedString, start_char, m_file, m_token_line, m_token_column, m_whitespace_precedes };
|
1677
1706
|
}
|
1678
1707
|
|
1679
1708
|
Token Lexer::consume_quoted_array_without_interpolation(char start_char, char stop_char, Token::Type type) {
|
1680
1709
|
m_nested_lexer = new WordArrayLexer { *this, start_char, stop_char, false };
|
1681
|
-
return Token { type, start_char, m_file, m_token_line, m_token_column };
|
1710
|
+
return Token { type, start_char, m_file, m_token_line, m_token_column, m_whitespace_precedes };
|
1682
1711
|
}
|
1683
1712
|
|
1684
1713
|
Token Lexer::consume_quoted_array_with_interpolation(char start_char, char stop_char, Token::Type type) {
|
1685
1714
|
m_nested_lexer = new WordArrayLexer { *this, start_char, stop_char, true };
|
1686
|
-
return Token { type, start_char, m_file, m_token_line, m_token_column };
|
1715
|
+
return Token { type, start_char, m_file, m_token_line, m_token_column, m_whitespace_precedes };
|
1687
1716
|
}
|
1688
1717
|
|
1689
1718
|
Token Lexer::consume_regexp(char start_char, char stop_char) {
|
1690
1719
|
m_nested_lexer = new RegexpLexer { *this, start_char, stop_char };
|
1691
|
-
return Token { Token::Type::InterpolatedRegexpBegin, start_char, m_file, m_token_line, m_token_column };
|
1720
|
+
return Token { Token::Type::InterpolatedRegexpBegin, start_char, m_file, m_token_line, m_token_column, m_whitespace_precedes };
|
1692
1721
|
}
|
1693
1722
|
|
1694
1723
|
SharedPtr<String> Lexer::consume_non_whitespace() {
|