natalie_parser 1.0.0 → 1.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (35) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +27 -0
  3. data/README.md +11 -4
  4. data/Rakefile +12 -5
  5. data/ext/natalie_parser/mri_creator.hpp +25 -7
  6. data/include/natalie_parser/creator/debug_creator.hpp +13 -3
  7. data/include/natalie_parser/creator.hpp +4 -2
  8. data/include/natalie_parser/node/array_pattern_node.hpp +20 -2
  9. data/include/natalie_parser/node/bignum_node.hpp +5 -1
  10. data/include/natalie_parser/node/case_in_node.hpp +5 -2
  11. data/include/natalie_parser/node/complex_node.hpp +49 -0
  12. data/include/natalie_parser/node/fixnum_node.hpp +5 -1
  13. data/include/natalie_parser/node/float_node.hpp +4 -0
  14. data/include/natalie_parser/node/forward_args_node.hpp +26 -0
  15. data/include/natalie_parser/node/hash_pattern_node.hpp +1 -0
  16. data/include/natalie_parser/node/infix_op_node.hpp +1 -1
  17. data/include/natalie_parser/node/iter_node.hpp +1 -1
  18. data/include/natalie_parser/node/keyword_rest_pattern_node.hpp +43 -0
  19. data/include/natalie_parser/node/node.hpp +7 -1
  20. data/include/natalie_parser/node/nth_ref_node.hpp +1 -1
  21. data/include/natalie_parser/node/rational_node.hpp +45 -0
  22. data/include/natalie_parser/node.hpp +4 -0
  23. data/include/natalie_parser/parser.hpp +14 -1
  24. data/include/natalie_parser/token.hpp +62 -13
  25. data/lib/natalie_parser/version.rb +1 -1
  26. data/src/lexer/interpolated_string_lexer.cpp +9 -9
  27. data/src/lexer/regexp_lexer.cpp +7 -7
  28. data/src/lexer/word_array_lexer.cpp +13 -13
  29. data/src/lexer.cpp +210 -181
  30. data/src/node/begin_rescue_node.cpp +1 -1
  31. data/src/node/interpolated_regexp_node.cpp +1 -1
  32. data/src/node/node.cpp +7 -0
  33. data/src/node/node_with_args.cpp +1 -0
  34. data/src/parser.cpp +261 -91
  35. metadata +6 -2
data/src/lexer.cpp CHANGED
@@ -147,12 +147,12 @@ bool Lexer::skip_whitespace() {
147
147
 
148
148
  Token Lexer::build_next_token() {
149
149
  if (m_index >= m_size)
150
- return Token { Token::Type::Eof, m_file, m_cursor_line, m_cursor_column };
150
+ return Token { Token::Type::Eof, m_file, m_cursor_line, m_cursor_column, m_whitespace_precedes };
151
151
  if (m_start_char && current_char() == m_start_char) {
152
152
  m_pair_depth++;
153
153
  } else if (m_stop_char && current_char() == m_stop_char) {
154
154
  if (m_pair_depth == 0)
155
- return Token { Token::Type::Eof, m_file, m_cursor_line, m_cursor_column };
155
+ return Token { Token::Type::Eof, m_file, m_cursor_line, m_cursor_column, m_whitespace_precedes };
156
156
  m_pair_depth--;
157
157
  } else if (m_index == 0 && current_char() == '\xEF') {
158
158
  // UTF-8 BOM
@@ -170,18 +170,18 @@ Token Lexer::build_next_token() {
170
170
  switch (current_char()) {
171
171
  case '=': {
172
172
  advance();
173
- return Token { Token::Type::EqualEqualEqual, m_file, m_token_line, m_token_column };
173
+ return Token { Token::Type::EqualEqualEqual, m_file, m_token_line, m_token_column, m_whitespace_precedes };
174
174
  }
175
175
  default:
176
- return Token { Token::Type::EqualEqual, m_file, m_token_line, m_token_column };
176
+ return Token { Token::Type::EqualEqual, m_file, m_token_line, m_token_column, m_whitespace_precedes };
177
177
  }
178
178
  }
179
179
  case '>':
180
180
  advance();
181
- return Token { Token::Type::HashRocket, m_file, m_token_line, m_token_column };
181
+ return Token { Token::Type::HashRocket, m_file, m_token_line, m_token_column, m_whitespace_precedes };
182
182
  case '~':
183
183
  advance();
184
- return Token { Token::Type::Match, m_file, m_token_line, m_token_column };
184
+ return Token { Token::Type::Match, m_file, m_token_line, m_token_column, m_whitespace_precedes };
185
185
  default:
186
186
  if (m_cursor_column == 1 && match(5, "begin")) {
187
187
  SharedPtr<String> doc = new String("=begin");
@@ -191,10 +191,9 @@ Token Lexer::build_next_token() {
191
191
  c = next();
192
192
  } while (c && !(m_cursor_column == 0 && match(4, "=end")));
193
193
  doc->append("=end\n");
194
- return Token { Token::Type::Doc, doc, m_file, m_token_line, m_token_column };
194
+ return Token { Token::Type::Doc, doc, m_file, m_token_line, m_token_column, m_whitespace_precedes };
195
195
  }
196
- auto token = Token { Token::Type::Equal, m_file, m_token_line, m_token_column };
197
- token.set_whitespace_precedes(m_whitespace_precedes);
196
+ auto token = Token { Token::Type::Equal, m_file, m_token_line, m_token_column, m_whitespace_precedes };
198
197
  return token;
199
198
  }
200
199
  }
@@ -203,37 +202,37 @@ Token Lexer::build_next_token() {
203
202
  switch (current_char()) {
204
203
  case '=':
205
204
  advance();
206
- return Token { Token::Type::PlusEqual, m_file, m_token_line, m_token_column };
205
+ return Token { Token::Type::PlusEqual, m_file, m_token_line, m_token_column, m_whitespace_precedes };
207
206
  case '@':
208
207
  if (m_last_token.is_def_keyword() || m_last_token.is_dot()) {
209
208
  advance();
210
209
  SharedPtr<String> lit = new String("+@");
211
- return Token { Token::Type::BareName, lit, m_file, m_token_line, m_token_column };
210
+ return Token { Token::Type::BareName, lit, m_file, m_token_line, m_token_column, m_whitespace_precedes };
212
211
  } else {
213
- return Token { Token::Type::Plus, m_file, m_token_line, m_token_column };
212
+ return Token { Token::Type::Plus, m_file, m_token_line, m_token_column, m_whitespace_precedes };
214
213
  }
215
214
  default:
216
- return Token { Token::Type::Plus, m_file, m_token_line, m_token_column };
215
+ return Token { Token::Type::Plus, m_file, m_token_line, m_token_column, m_whitespace_precedes };
217
216
  }
218
217
  case '-':
219
218
  advance();
220
219
  switch (current_char()) {
221
220
  case '>':
222
221
  advance();
223
- return Token { Token::Type::Arrow, m_file, m_token_line, m_token_column };
222
+ return Token { Token::Type::Arrow, m_file, m_token_line, m_token_column, m_whitespace_precedes };
224
223
  case '=':
225
224
  advance();
226
- return Token { Token::Type::MinusEqual, m_file, m_token_line, m_token_column };
225
+ return Token { Token::Type::MinusEqual, m_file, m_token_line, m_token_column, m_whitespace_precedes };
227
226
  case '@':
228
227
  if (m_last_token.is_def_keyword() || m_last_token.is_dot()) {
229
228
  advance();
230
229
  SharedPtr<String> lit = new String("-@");
231
- return Token { Token::Type::BareName, lit, m_file, m_token_line, m_token_column };
230
+ return Token { Token::Type::BareName, lit, m_file, m_token_line, m_token_column, m_whitespace_precedes };
232
231
  } else {
233
- return Token { Token::Type::Minus, m_file, m_token_line, m_token_column };
232
+ return Token { Token::Type::Minus, m_file, m_token_line, m_token_column, m_whitespace_precedes };
234
233
  }
235
234
  default:
236
- return Token { Token::Type::Minus, m_file, m_token_line, m_token_column };
235
+ return Token { Token::Type::Minus, m_file, m_token_line, m_token_column, m_whitespace_precedes };
237
236
  }
238
237
  case '*':
239
238
  advance();
@@ -243,15 +242,15 @@ Token Lexer::build_next_token() {
243
242
  switch (current_char()) {
244
243
  case '=':
245
244
  advance();
246
- return Token { Token::Type::StarStarEqual, m_file, m_token_line, m_token_column };
245
+ return Token { Token::Type::StarStarEqual, m_file, m_token_line, m_token_column, m_whitespace_precedes };
247
246
  default:
248
- return Token { Token::Type::StarStar, m_file, m_token_line, m_token_column };
247
+ return Token { Token::Type::StarStar, m_file, m_token_line, m_token_column, m_whitespace_precedes };
249
248
  }
250
249
  case '=':
251
250
  advance();
252
- return Token { Token::Type::StarEqual, m_file, m_token_line, m_token_column };
251
+ return Token { Token::Type::StarEqual, m_file, m_token_line, m_token_column, m_whitespace_precedes };
253
252
  default:
254
- return Token { Token::Type::Star, m_file, m_token_line, m_token_column };
253
+ return Token { Token::Type::Star, m_file, m_token_line, m_token_column, m_whitespace_precedes };
255
254
  }
256
255
  case '/': {
257
256
  advance();
@@ -267,19 +266,19 @@ Token Lexer::build_next_token() {
267
266
  case Token::Type::Newline:
268
267
  return consume_regexp('/', '/');
269
268
  case Token::Type::DefKeyword:
270
- return Token { Token::Type::Slash, m_file, m_token_line, m_token_column };
269
+ return Token { Token::Type::Slash, m_file, m_token_line, m_token_column, m_whitespace_precedes };
271
270
  default: {
272
271
  switch (current_char()) {
273
272
  case ' ':
274
- return Token { Token::Type::Slash, m_file, m_token_line, m_token_column };
273
+ return Token { Token::Type::Slash, m_file, m_token_line, m_token_column, m_whitespace_precedes };
275
274
  case '=':
276
275
  advance();
277
- return Token { Token::Type::SlashEqual, m_file, m_token_line, m_token_column };
276
+ return Token { Token::Type::SlashEqual, m_file, m_token_line, m_token_column, m_whitespace_precedes };
278
277
  default:
279
278
  if (m_whitespace_precedes) {
280
279
  return consume_regexp('/', '/');
281
280
  } else {
282
- return Token { Token::Type::Slash, m_file, m_token_line, m_token_column };
281
+ return Token { Token::Type::Slash, m_file, m_token_line, m_token_column, m_whitespace_precedes };
283
282
  }
284
283
  }
285
284
  }
@@ -290,7 +289,7 @@ Token Lexer::build_next_token() {
290
289
  switch (current_char()) {
291
290
  case '=':
292
291
  advance();
293
- return Token { Token::Type::PercentEqual, m_file, m_token_line, m_token_column };
292
+ return Token { Token::Type::PercentEqual, m_file, m_token_line, m_token_column, m_whitespace_precedes };
294
293
  case 'q':
295
294
  switch (peek()) {
296
295
  case '[':
@@ -311,7 +310,7 @@ Token Lexer::build_next_token() {
311
310
  advance(2);
312
311
  return consume_single_quoted_string(c, c);
313
312
  } else {
314
- return Token { Token::Type::Percent, m_file, m_token_line, m_token_column };
313
+ return Token { Token::Type::Percent, m_file, m_token_line, m_token_column, m_whitespace_precedes };
315
314
  }
316
315
  }
317
316
  }
@@ -335,7 +334,7 @@ Token Lexer::build_next_token() {
335
334
  advance(2);
336
335
  return consume_double_quoted_string(c, c);
337
336
  } else {
338
- return Token { Token::Type::Percent, m_file, m_token_line, m_token_column };
337
+ return Token { Token::Type::Percent, m_file, m_token_line, m_token_column, m_whitespace_precedes };
339
338
  }
340
339
  }
341
340
  }
@@ -359,7 +358,7 @@ Token Lexer::build_next_token() {
359
358
  advance(2);
360
359
  return consume_regexp(c, c);
361
360
  } else {
362
- return Token { Token::Type::Percent, m_file, m_token_line, m_token_column };
361
+ return Token { Token::Type::Percent, m_file, m_token_line, m_token_column, m_whitespace_precedes };
363
362
  }
364
363
  }
365
364
  }
@@ -382,7 +381,7 @@ Token Lexer::build_next_token() {
382
381
  return consume_double_quoted_string('(', ')', Token::Type::InterpolatedShellBegin, Token::Type::InterpolatedShellEnd);
383
382
  }
384
383
  default:
385
- return Token { Token::Type::Percent, m_file, m_token_line, m_token_column };
384
+ return Token { Token::Type::Percent, m_file, m_token_line, m_token_column, m_whitespace_precedes };
386
385
  }
387
386
  case 'w':
388
387
  switch (peek()) {
@@ -405,7 +404,7 @@ Token Lexer::build_next_token() {
405
404
  advance(2);
406
405
  return consume_quoted_array_without_interpolation('(', ')', Token::Type::PercentLowerW);
407
406
  default:
408
- return Token { Token::Type::Percent, m_file, m_token_line, m_token_column };
407
+ return Token { Token::Type::Percent, m_file, m_token_line, m_token_column, m_whitespace_precedes };
409
408
  }
410
409
  case 'W':
411
410
  switch (peek()) {
@@ -428,7 +427,7 @@ Token Lexer::build_next_token() {
428
427
  advance(2);
429
428
  return consume_quoted_array_with_interpolation('(', ')', Token::Type::PercentUpperW);
430
429
  default:
431
- return Token { Token::Type::Percent, m_file, m_token_line, m_token_column };
430
+ return Token { Token::Type::Percent, m_file, m_token_line, m_token_column, m_whitespace_precedes };
432
431
  }
433
432
  case 'i':
434
433
  switch (peek()) {
@@ -451,7 +450,7 @@ Token Lexer::build_next_token() {
451
450
  advance(2);
452
451
  return consume_quoted_array_without_interpolation('(', ')', Token::Type::PercentLowerI);
453
452
  default:
454
- return Token { Token::Type::Percent, m_file, m_token_line, m_token_column };
453
+ return Token { Token::Type::Percent, m_file, m_token_line, m_token_column, m_whitespace_precedes };
455
454
  }
456
455
  case 'I':
457
456
  switch (peek()) {
@@ -474,7 +473,7 @@ Token Lexer::build_next_token() {
474
473
  advance(2);
475
474
  return consume_quoted_array_with_interpolation('(', ')', Token::Type::PercentUpperI);
476
475
  default:
477
- return Token { Token::Type::Percent, m_file, m_token_line, m_token_column };
476
+ return Token { Token::Type::Percent, m_file, m_token_line, m_token_column, m_whitespace_precedes };
478
477
  }
479
478
  case '[':
480
479
  advance();
@@ -501,26 +500,26 @@ Token Lexer::build_next_token() {
501
500
  break;
502
501
  }
503
502
  }
504
- return Token { Token::Type::Percent, m_file, m_token_line, m_token_column };
503
+ return Token { Token::Type::Percent, m_file, m_token_line, m_token_column, m_whitespace_precedes };
505
504
  case '!':
506
505
  advance();
507
506
  switch (current_char()) {
508
507
  case '=':
509
508
  advance();
510
- return Token { Token::Type::NotEqual, m_file, m_token_line, m_token_column };
509
+ return Token { Token::Type::NotEqual, m_file, m_token_line, m_token_column, m_whitespace_precedes };
511
510
  case '~':
512
511
  advance();
513
- return Token { Token::Type::NotMatch, m_file, m_token_line, m_token_column };
512
+ return Token { Token::Type::NotMatch, m_file, m_token_line, m_token_column, m_whitespace_precedes };
514
513
  case '@':
515
514
  if (m_last_token.is_def_keyword() || m_last_token.is_dot()) {
516
515
  advance();
517
516
  SharedPtr<String> lit = new String("!@");
518
- return Token { Token::Type::BareName, lit, m_file, m_token_line, m_token_column };
517
+ return Token { Token::Type::BareName, lit, m_file, m_token_line, m_token_column, m_whitespace_precedes };
519
518
  } else {
520
- return Token { Token::Type::Not, m_file, m_token_line, m_token_column };
519
+ return Token { Token::Type::Not, m_file, m_token_line, m_token_column, m_whitespace_precedes };
521
520
  }
522
521
  default:
523
- return Token { Token::Type::Not, m_file, m_token_line, m_token_column };
522
+ return Token { Token::Type::Not, m_file, m_token_line, m_token_column, m_whitespace_precedes };
524
523
  }
525
524
  case '<':
526
525
  advance();
@@ -540,12 +539,12 @@ Token Lexer::build_next_token() {
540
539
  case '\'':
541
540
  return consume_heredoc();
542
541
  default:
543
- return Token { Token::Type::LeftShift, m_file, m_token_line, m_token_column };
542
+ return Token { Token::Type::LeftShift, m_file, m_token_line, m_token_column, m_whitespace_precedes };
544
543
  }
545
544
  }
546
545
  case '=':
547
546
  advance();
548
- return Token { Token::Type::LeftShiftEqual, m_file, m_token_line, m_token_column };
547
+ return Token { Token::Type::LeftShiftEqual, m_file, m_token_line, m_token_column, m_whitespace_precedes };
549
548
  default:
550
549
  if (!m_whitespace_precedes) {
551
550
  if (token_is_first_on_line())
@@ -553,7 +552,7 @@ Token Lexer::build_next_token() {
553
552
  else if (m_last_token.can_precede_heredoc_that_looks_like_left_shift_operator())
554
553
  return consume_heredoc();
555
554
  else
556
- return Token { Token::Type::LeftShift, m_file, m_token_line, m_token_column };
555
+ return Token { Token::Type::LeftShift, m_file, m_token_line, m_token_column, m_whitespace_precedes };
557
556
  }
558
557
  if (isalpha(current_char()))
559
558
  return consume_heredoc();
@@ -564,7 +563,7 @@ Token Lexer::build_next_token() {
564
563
  case '\'':
565
564
  return consume_heredoc();
566
565
  default:
567
- return Token { Token::Type::LeftShift, m_file, m_token_line, m_token_column };
566
+ return Token { Token::Type::LeftShift, m_file, m_token_line, m_token_column, m_whitespace_precedes };
568
567
  }
569
568
  }
570
569
  }
@@ -573,12 +572,12 @@ Token Lexer::build_next_token() {
573
572
  switch (current_char()) {
574
573
  case '>':
575
574
  advance();
576
- return Token { Token::Type::Comparison, m_file, m_token_line, m_token_column };
575
+ return Token { Token::Type::Comparison, m_file, m_token_line, m_token_column, m_whitespace_precedes };
577
576
  default:
578
- return Token { Token::Type::LessThanOrEqual, m_file, m_token_line, m_token_column };
577
+ return Token { Token::Type::LessThanOrEqual, m_file, m_token_line, m_token_column, m_whitespace_precedes };
579
578
  }
580
579
  default:
581
- return Token { Token::Type::LessThan, m_file, m_token_line, m_token_column };
580
+ return Token { Token::Type::LessThan, m_file, m_token_line, m_token_column, m_whitespace_precedes };
582
581
  }
583
582
  case '>':
584
583
  advance();
@@ -588,15 +587,15 @@ Token Lexer::build_next_token() {
588
587
  switch (current_char()) {
589
588
  case '=':
590
589
  advance();
591
- return Token { Token::Type::RightShiftEqual, m_file, m_token_line, m_token_column };
590
+ return Token { Token::Type::RightShiftEqual, m_file, m_token_line, m_token_column, m_whitespace_precedes };
592
591
  default:
593
- return Token { Token::Type::RightShift, m_file, m_token_line, m_token_column };
592
+ return Token { Token::Type::RightShift, m_file, m_token_line, m_token_column, m_whitespace_precedes };
594
593
  }
595
594
  case '=':
596
595
  advance();
597
- return Token { Token::Type::GreaterThanOrEqual, m_file, m_token_line, m_token_column };
596
+ return Token { Token::Type::GreaterThanOrEqual, m_file, m_token_line, m_token_column, m_whitespace_precedes };
598
597
  default:
599
- return Token { Token::Type::GreaterThan, m_file, m_token_line, m_token_column };
598
+ return Token { Token::Type::GreaterThan, m_file, m_token_line, m_token_column, m_whitespace_precedes };
600
599
  }
601
600
  case '&':
602
601
  advance();
@@ -606,18 +605,18 @@ Token Lexer::build_next_token() {
606
605
  switch (current_char()) {
607
606
  case '=':
608
607
  advance();
609
- return Token { Token::Type::AmpersandAmpersandEqual, m_file, m_token_line, m_token_column };
608
+ return Token { Token::Type::AmpersandAmpersandEqual, m_file, m_token_line, m_token_column, m_whitespace_precedes };
610
609
  default:
611
- return Token { Token::Type::AmpersandAmpersand, m_file, m_token_line, m_token_column };
610
+ return Token { Token::Type::AmpersandAmpersand, m_file, m_token_line, m_token_column, m_whitespace_precedes };
612
611
  }
613
612
  case '=':
614
613
  advance();
615
- return Token { Token::Type::AmpersandEqual, m_file, m_token_line, m_token_column };
614
+ return Token { Token::Type::AmpersandEqual, m_file, m_token_line, m_token_column, m_whitespace_precedes };
616
615
  case '.':
617
616
  advance();
618
- return Token { Token::Type::SafeNavigation, m_file, m_token_line, m_token_column };
617
+ return Token { Token::Type::SafeNavigation, m_file, m_token_line, m_token_column, m_whitespace_precedes };
619
618
  default:
620
- return Token { Token::Type::Ampersand, m_file, m_token_line, m_token_column };
619
+ return Token { Token::Type::Ampersand, m_file, m_token_line, m_token_column, m_whitespace_precedes };
621
620
  }
622
621
  case '|':
623
622
  advance();
@@ -627,24 +626,24 @@ Token Lexer::build_next_token() {
627
626
  switch (current_char()) {
628
627
  case '=':
629
628
  advance();
630
- return Token { Token::Type::PipePipeEqual, m_file, m_token_line, m_token_column };
629
+ return Token { Token::Type::PipePipeEqual, m_file, m_token_line, m_token_column, m_whitespace_precedes };
631
630
  default:
632
- return Token { Token::Type::PipePipe, m_file, m_token_line, m_token_column };
631
+ return Token { Token::Type::PipePipe, m_file, m_token_line, m_token_column, m_whitespace_precedes };
633
632
  }
634
633
  case '=':
635
634
  advance();
636
- return Token { Token::Type::PipeEqual, m_file, m_token_line, m_token_column };
635
+ return Token { Token::Type::PipeEqual, m_file, m_token_line, m_token_column, m_whitespace_precedes };
637
636
  default:
638
- return Token { Token::Type::Pipe, m_file, m_token_line, m_token_column };
637
+ return Token { Token::Type::Pipe, m_file, m_token_line, m_token_column, m_whitespace_precedes };
639
638
  }
640
639
  case '^':
641
640
  advance();
642
641
  switch (current_char()) {
643
642
  case '=':
644
643
  advance();
645
- return Token { Token::Type::CaretEqual, m_file, m_token_line, m_token_column };
644
+ return Token { Token::Type::CaretEqual, m_file, m_token_line, m_token_column, m_whitespace_precedes };
646
645
  default:
647
- return Token { Token::Type::Caret, m_file, m_token_line, m_token_column };
646
+ return Token { Token::Type::Caret, m_file, m_token_line, m_token_column, m_whitespace_precedes };
648
647
  }
649
648
  case '~':
650
649
  advance();
@@ -653,28 +652,28 @@ Token Lexer::build_next_token() {
653
652
  if (m_last_token.is_def_keyword() || m_last_token.is_dot()) {
654
653
  advance();
655
654
  SharedPtr<String> lit = new String("~@");
656
- return Token { Token::Type::BareName, lit, m_file, m_token_line, m_token_column };
655
+ return Token { Token::Type::BareName, lit, m_file, m_token_line, m_token_column, m_whitespace_precedes };
657
656
  } else {
658
- return Token { Token::Type::Tilde, m_file, m_token_line, m_token_column };
657
+ return Token { Token::Type::Tilde, m_file, m_token_line, m_token_column, m_whitespace_precedes };
659
658
  }
660
659
  default:
661
- return Token { Token::Type::Tilde, m_file, m_token_line, m_token_column };
660
+ return Token { Token::Type::Tilde, m_file, m_token_line, m_token_column, m_whitespace_precedes };
662
661
  }
663
662
  case '?': {
664
663
  auto c = next();
665
664
  if (isspace(c)) {
666
665
  m_open_ternary = true;
667
- return Token { Token::Type::TernaryQuestion, m_file, m_token_line, m_token_column };
666
+ return Token { Token::Type::TernaryQuestion, m_file, m_token_line, m_token_column, m_whitespace_precedes };
668
667
  } else {
669
668
  advance();
670
669
  if (c == '\\') {
671
670
  auto buf = new String();
672
671
  auto result = consume_escaped_byte(*buf);
673
672
  if (!result.first)
674
- return Token { result.second, current_char(), m_file, m_token_line, m_token_column };
675
- return Token { Token::Type::String, buf, m_file, m_token_line, m_token_column };
673
+ return Token { result.second, current_char(), m_file, m_token_line, m_token_column, m_whitespace_precedes };
674
+ return Token { Token::Type::String, buf, m_file, m_token_line, m_token_column, m_whitespace_precedes };
676
675
  } else {
677
- return Token { Token::Type::String, c, m_file, m_token_line, m_token_column };
676
+ return Token { Token::Type::String, c, m_file, m_token_line, m_token_column, m_whitespace_precedes };
678
677
  }
679
678
  }
680
679
  }
@@ -682,20 +681,19 @@ Token Lexer::build_next_token() {
682
681
  auto c = next();
683
682
  if (c == ':') {
684
683
  advance();
685
- return Token { Token::Type::ConstantResolution, m_file, m_token_line, m_token_column };
684
+ return Token { Token::Type::ConstantResolution, m_file, m_token_line, m_token_column, m_whitespace_precedes };
686
685
  } else if (m_last_token.type() == Token::Type::InterpolatedStringEnd && !m_whitespace_precedes && !m_open_ternary) {
687
- return Token { Token::Type::InterpolatedStringSymbolKey, m_file, m_token_line, m_token_column };
686
+ return Token { Token::Type::InterpolatedStringSymbolKey, m_file, m_token_line, m_token_column, m_whitespace_precedes };
688
687
  } else if (c == '"') {
689
688
  advance();
690
689
  return consume_double_quoted_string('"', '"', Token::Type::InterpolatedSymbolBegin, Token::Type::InterpolatedSymbolEnd);
691
690
  } else if (c == '\'') {
692
691
  advance();
693
692
  auto string = consume_single_quoted_string('\'', '\'');
694
- return Token { Token::Type::Symbol, string.literal(), m_file, m_token_line, m_token_column };
693
+ return Token { Token::Type::Symbol, string.literal(), m_file, m_token_line, m_token_column, m_whitespace_precedes };
695
694
  } else if (isspace(c)) {
696
695
  m_open_ternary = false;
697
- auto token = Token { Token::Type::TernaryColon, m_file, m_token_line, m_token_column };
698
- token.set_whitespace_precedes(m_whitespace_precedes);
696
+ auto token = Token { Token::Type::TernaryColon, m_file, m_token_line, m_token_column, m_whitespace_precedes };
699
697
  return token;
700
698
  } else {
701
699
  return consume_symbol();
@@ -716,7 +714,7 @@ Token Lexer::build_next_token() {
716
714
  case '$':
717
715
  if (peek() == '&') {
718
716
  advance(2);
719
- return Token { Token::Type::BackRef, '&', m_file, m_token_line, m_token_column };
717
+ return Token { Token::Type::BackRef, '&', m_file, m_token_line, m_token_column, m_whitespace_precedes };
720
718
  } else if (peek() >= '1' && peek() <= '9') {
721
719
  return consume_nth_ref();
722
720
  } else {
@@ -730,16 +728,16 @@ Token Lexer::build_next_token() {
730
728
  switch (current_char()) {
731
729
  case '.':
732
730
  advance();
733
- return Token { Token::Type::DotDotDot, m_file, m_token_line, m_token_column };
731
+ return Token { Token::Type::DotDotDot, m_file, m_token_line, m_token_column, m_whitespace_precedes };
734
732
  default:
735
- return Token { Token::Type::DotDot, m_file, m_token_line, m_token_column };
733
+ return Token { Token::Type::DotDot, m_file, m_token_line, m_token_column, m_whitespace_precedes };
736
734
  }
737
735
  default:
738
- return Token { Token::Type::Dot, m_file, m_token_line, m_token_column };
736
+ return Token { Token::Type::Dot, m_file, m_token_line, m_token_column, m_whitespace_precedes };
739
737
  }
740
738
  case '{':
741
739
  advance();
742
- return Token { Token::Type::LCurlyBrace, m_file, m_token_line, m_token_column };
740
+ return Token { Token::Type::LCurlyBrace, m_file, m_token_line, m_token_column, m_whitespace_precedes };
743
741
  case '[': {
744
742
  advance();
745
743
  switch (current_char()) {
@@ -748,36 +746,33 @@ Token Lexer::build_next_token() {
748
746
  switch (current_char()) {
749
747
  case '=':
750
748
  advance();
751
- return Token { Token::Type::LBracketRBracketEqual, m_file, m_token_line, m_token_column };
749
+ return Token { Token::Type::LBracketRBracketEqual, m_file, m_token_line, m_token_column, m_whitespace_precedes };
752
750
  default:
753
- auto token = Token { Token::Type::LBracketRBracket, m_file, m_token_line, m_token_column };
754
- token.set_whitespace_precedes(m_whitespace_precedes);
751
+ auto token = Token { Token::Type::LBracketRBracket, m_file, m_token_line, m_token_column, m_whitespace_precedes };
755
752
  return token;
756
753
  }
757
754
  default:
758
- auto token = Token { Token::Type::LBracket, m_file, m_token_line, m_token_column };
759
- token.set_whitespace_precedes(m_whitespace_precedes);
755
+ auto token = Token { Token::Type::LBracket, m_file, m_token_line, m_token_column, m_whitespace_precedes };
760
756
  return token;
761
757
  }
762
758
  }
763
759
  case '(': {
764
760
  advance();
765
- auto token = Token { Token::Type::LParen, m_file, m_token_line, m_token_column };
766
- token.set_whitespace_precedes(m_whitespace_precedes);
761
+ auto token = Token { Token::Type::LParen, m_file, m_token_line, m_token_column, m_whitespace_precedes };
767
762
  return token;
768
763
  }
769
764
  case '}':
770
765
  advance();
771
- return Token { Token::Type::RCurlyBrace, m_file, m_token_line, m_token_column };
766
+ return Token { Token::Type::RCurlyBrace, m_file, m_token_line, m_token_column, m_whitespace_precedes };
772
767
  case ']':
773
768
  advance();
774
- return Token { Token::Type::RBracket, m_file, m_token_line, m_token_column };
769
+ return Token { Token::Type::RBracket, m_file, m_token_line, m_token_column, m_whitespace_precedes };
775
770
  case ')':
776
771
  advance();
777
- return Token { Token::Type::RParen, m_file, m_token_line, m_token_column };
772
+ return Token { Token::Type::RParen, m_file, m_token_line, m_token_column, m_whitespace_precedes };
778
773
  case '\n': {
779
774
  advance();
780
- auto token = Token { Token::Type::Newline, m_file, m_token_line, m_token_column };
775
+ auto token = Token { Token::Type::Newline, m_file, m_token_line, m_token_column, m_whitespace_precedes };
781
776
  if (!m_heredoc_stack.is_empty()) {
782
777
  auto new_index = m_heredoc_stack.last();
783
778
  while (m_index < new_index)
@@ -788,10 +783,10 @@ Token Lexer::build_next_token() {
788
783
  }
789
784
  case ';':
790
785
  advance();
791
- return Token { Token::Type::Semicolon, m_file, m_token_line, m_token_column };
786
+ return Token { Token::Type::Semicolon, m_file, m_token_line, m_token_column, m_whitespace_precedes };
792
787
  case ',':
793
788
  advance();
794
- return Token { Token::Type::Comma, m_file, m_token_line, m_token_column };
789
+ return Token { Token::Type::Comma, m_file, m_token_line, m_token_column, m_whitespace_precedes };
795
790
  case '"':
796
791
  advance();
797
792
  return consume_double_quoted_string('"', '"');
@@ -821,13 +816,13 @@ Token Lexer::build_next_token() {
821
816
  doc->append_char(c);
822
817
  c = next();
823
818
  }
824
- return Token { Token::Type::Doc, doc, m_file, m_token_line, m_token_column };
819
+ return Token { Token::Type::Doc, doc, m_file, m_token_line, m_token_column, m_whitespace_precedes };
825
820
  } else {
826
821
  char c;
827
822
  do {
828
823
  c = next();
829
824
  } while (c && c != '\n' && c != '\r');
830
- return Token { Token::Type::Comment, m_file, m_token_line, m_token_column };
825
+ return Token { Token::Type::Comment, m_file, m_token_line, m_token_column, m_whitespace_precedes };
831
826
  }
832
827
  case '0':
833
828
  case '1':
@@ -842,107 +837,124 @@ Token Lexer::build_next_token() {
842
837
  auto token = consume_numeric();
843
838
  return token;
844
839
  }
840
+ case 'i':
841
+ if (m_last_token.can_be_complex_or_rational() && !isalnum(peek())) {
842
+ advance();
843
+ return Token { Token::Type::Complex, m_file, m_token_line, m_token_column, m_whitespace_precedes };
844
+ }
845
+ break;
846
+ case 'r':
847
+ if (m_last_token.can_be_complex_or_rational()) {
848
+ if (peek() == 'i') {
849
+ advance(2);
850
+ return Token { Token::Type::RationalComplex, m_file, m_token_line, m_token_column, m_whitespace_precedes };
851
+ } else if (!isalnum(peek())) {
852
+ advance();
853
+ return Token { Token::Type::Rational, m_file, m_token_line, m_token_column, m_whitespace_precedes };
854
+ }
855
+ }
856
+ break;
845
857
  };
846
858
 
847
859
  Token keyword_token;
848
860
 
849
861
  if (!m_last_token.is_dot() && match(4, "self")) {
850
862
  if (current_char() == '.')
851
- keyword_token = { Token::Type::SelfKeyword, m_file, m_token_line, m_token_column };
863
+ keyword_token = { Token::Type::SelfKeyword, m_file, m_token_line, m_token_column, m_whitespace_precedes };
852
864
  else
853
865
  rewind(4);
854
866
  }
855
867
 
856
868
  if (!m_last_token.is_dot() && !m_last_token.is_def_keyword()) {
857
869
  if (match(12, "__ENCODING__"))
858
- keyword_token = { Token::Type::ENCODINGKeyword, m_file, m_token_line, m_token_column };
870
+ keyword_token = { Token::Type::ENCODINGKeyword, m_file, m_token_line, m_token_column, m_whitespace_precedes };
859
871
  else if (match(8, "__LINE__"))
860
- keyword_token = { Token::Type::LINEKeyword, m_file, m_token_line, m_token_column };
872
+ keyword_token = { Token::Type::LINEKeyword, m_file, m_token_line, m_token_column, m_whitespace_precedes };
861
873
  else if (match(8, "__FILE__"))
862
- keyword_token = { Token::Type::FILEKeyword, m_file, m_token_line, m_token_column };
874
+ keyword_token = { Token::Type::FILEKeyword, m_file, m_token_line, m_token_column, m_whitespace_precedes };
863
875
  else if (match(5, "BEGIN"))
864
- keyword_token = { Token::Type::BEGINKeyword, m_file, m_token_line, m_token_column };
876
+ keyword_token = { Token::Type::BEGINKeyword, m_file, m_token_line, m_token_column, m_whitespace_precedes };
865
877
  else if (match(3, "END"))
866
- keyword_token = { Token::Type::ENDKeyword, m_file, m_token_line, m_token_column };
878
+ keyword_token = { Token::Type::ENDKeyword, m_file, m_token_line, m_token_column, m_whitespace_precedes };
867
879
  else if (match(5, "alias"))
868
- keyword_token = { Token::Type::AliasKeyword, m_file, m_token_line, m_token_column };
880
+ keyword_token = { Token::Type::AliasKeyword, m_file, m_token_line, m_token_column, m_whitespace_precedes };
869
881
  else if (match(3, "and"))
870
- keyword_token = { Token::Type::AndKeyword, m_file, m_token_line, m_token_column };
882
+ keyword_token = { Token::Type::AndKeyword, m_file, m_token_line, m_token_column, m_whitespace_precedes };
871
883
  else if (match(5, "begin"))
872
- keyword_token = { Token::Type::BeginKeyword, m_file, m_token_line, m_token_column };
884
+ keyword_token = { Token::Type::BeginKeyword, m_file, m_token_line, m_token_column, m_whitespace_precedes };
873
885
  else if (match(5, "break"))
874
- keyword_token = { Token::Type::BreakKeyword, m_file, m_token_line, m_token_column };
886
+ keyword_token = { Token::Type::BreakKeyword, m_file, m_token_line, m_token_column, m_whitespace_precedes };
875
887
  else if (match(4, "case"))
876
- keyword_token = { Token::Type::CaseKeyword, m_file, m_token_line, m_token_column };
888
+ keyword_token = { Token::Type::CaseKeyword, m_file, m_token_line, m_token_column, m_whitespace_precedes };
877
889
  else if (match(5, "class"))
878
- keyword_token = { Token::Type::ClassKeyword, m_file, m_token_line, m_token_column };
890
+ keyword_token = { Token::Type::ClassKeyword, m_file, m_token_line, m_token_column, m_whitespace_precedes };
879
891
  else if (match(8, "defined?"))
880
- keyword_token = { Token::Type::DefinedKeyword, m_file, m_token_line, m_token_column };
892
+ keyword_token = { Token::Type::DefinedKeyword, m_file, m_token_line, m_token_column, m_whitespace_precedes };
881
893
  else if (match(3, "def"))
882
- keyword_token = { Token::Type::DefKeyword, m_file, m_token_line, m_token_column };
894
+ keyword_token = { Token::Type::DefKeyword, m_file, m_token_line, m_token_column, m_whitespace_precedes };
883
895
  else if (match(2, "do"))
884
- keyword_token = { Token::Type::DoKeyword, m_file, m_token_line, m_token_column };
896
+ keyword_token = { Token::Type::DoKeyword, m_file, m_token_line, m_token_column, m_whitespace_precedes };
885
897
  else if (match(4, "else"))
886
- keyword_token = { Token::Type::ElseKeyword, m_file, m_token_line, m_token_column };
898
+ keyword_token = { Token::Type::ElseKeyword, m_file, m_token_line, m_token_column, m_whitespace_precedes };
887
899
  else if (match(5, "elsif"))
888
- keyword_token = { Token::Type::ElsifKeyword, m_file, m_token_line, m_token_column };
900
+ keyword_token = { Token::Type::ElsifKeyword, m_file, m_token_line, m_token_column, m_whitespace_precedes };
889
901
  else if (match(3, "end"))
890
- keyword_token = { Token::Type::EndKeyword, m_file, m_token_line, m_token_column };
902
+ keyword_token = { Token::Type::EndKeyword, m_file, m_token_line, m_token_column, m_whitespace_precedes };
891
903
  else if (match(6, "ensure"))
892
- keyword_token = { Token::Type::EnsureKeyword, m_file, m_token_line, m_token_column };
904
+ keyword_token = { Token::Type::EnsureKeyword, m_file, m_token_line, m_token_column, m_whitespace_precedes };
893
905
  else if (match(5, "false"))
894
- keyword_token = { Token::Type::FalseKeyword, m_file, m_token_line, m_token_column };
906
+ keyword_token = { Token::Type::FalseKeyword, m_file, m_token_line, m_token_column, m_whitespace_precedes };
895
907
  else if (match(3, "for"))
896
- keyword_token = { Token::Type::ForKeyword, m_file, m_token_line, m_token_column };
908
+ keyword_token = { Token::Type::ForKeyword, m_file, m_token_line, m_token_column, m_whitespace_precedes };
897
909
  else if (match(2, "if"))
898
- keyword_token = { Token::Type::IfKeyword, m_file, m_token_line, m_token_column };
910
+ keyword_token = { Token::Type::IfKeyword, m_file, m_token_line, m_token_column, m_whitespace_precedes };
899
911
  else if (match(2, "in"))
900
- keyword_token = { Token::Type::InKeyword, m_file, m_token_line, m_token_column };
912
+ keyword_token = { Token::Type::InKeyword, m_file, m_token_line, m_token_column, m_whitespace_precedes };
901
913
  else if (match(6, "module"))
902
- keyword_token = { Token::Type::ModuleKeyword, m_file, m_token_line, m_token_column };
914
+ keyword_token = { Token::Type::ModuleKeyword, m_file, m_token_line, m_token_column, m_whitespace_precedes };
903
915
  else if (match(4, "next"))
904
- keyword_token = { Token::Type::NextKeyword, m_file, m_token_line, m_token_column };
916
+ keyword_token = { Token::Type::NextKeyword, m_file, m_token_line, m_token_column, m_whitespace_precedes };
905
917
  else if (match(3, "nil"))
906
- keyword_token = { Token::Type::NilKeyword, m_file, m_token_line, m_token_column };
918
+ keyword_token = { Token::Type::NilKeyword, m_file, m_token_line, m_token_column, m_whitespace_precedes };
907
919
  else if (match(3, "not"))
908
- keyword_token = { Token::Type::NotKeyword, m_file, m_token_line, m_token_column };
920
+ keyword_token = { Token::Type::NotKeyword, m_file, m_token_line, m_token_column, m_whitespace_precedes };
909
921
  else if (match(2, "or"))
910
- keyword_token = { Token::Type::OrKeyword, m_file, m_token_line, m_token_column };
922
+ keyword_token = { Token::Type::OrKeyword, m_file, m_token_line, m_token_column, m_whitespace_precedes };
911
923
  else if (match(4, "redo"))
912
- keyword_token = { Token::Type::RedoKeyword, m_file, m_token_line, m_token_column };
924
+ keyword_token = { Token::Type::RedoKeyword, m_file, m_token_line, m_token_column, m_whitespace_precedes };
913
925
  else if (match(6, "rescue"))
914
- keyword_token = { Token::Type::RescueKeyword, m_file, m_token_line, m_token_column };
926
+ keyword_token = { Token::Type::RescueKeyword, m_file, m_token_line, m_token_column, m_whitespace_precedes };
915
927
  else if (match(5, "retry"))
916
- keyword_token = { Token::Type::RetryKeyword, m_file, m_token_line, m_token_column };
928
+ keyword_token = { Token::Type::RetryKeyword, m_file, m_token_line, m_token_column, m_whitespace_precedes };
917
929
  else if (match(6, "return"))
918
- keyword_token = { Token::Type::ReturnKeyword, m_file, m_token_line, m_token_column };
930
+ keyword_token = { Token::Type::ReturnKeyword, m_file, m_token_line, m_token_column, m_whitespace_precedes };
919
931
  else if (match(4, "self"))
920
- keyword_token = { Token::Type::SelfKeyword, m_file, m_token_line, m_token_column };
932
+ keyword_token = { Token::Type::SelfKeyword, m_file, m_token_line, m_token_column, m_whitespace_precedes };
921
933
  else if (match(5, "super"))
922
- keyword_token = { Token::Type::SuperKeyword, m_file, m_token_line, m_token_column };
934
+ keyword_token = { Token::Type::SuperKeyword, m_file, m_token_line, m_token_column, m_whitespace_precedes };
923
935
  else if (match(4, "then"))
924
- keyword_token = { Token::Type::ThenKeyword, m_file, m_token_line, m_token_column };
936
+ keyword_token = { Token::Type::ThenKeyword, m_file, m_token_line, m_token_column, m_whitespace_precedes };
925
937
  else if (match(4, "true"))
926
- keyword_token = { Token::Type::TrueKeyword, m_file, m_token_line, m_token_column };
938
+ keyword_token = { Token::Type::TrueKeyword, m_file, m_token_line, m_token_column, m_whitespace_precedes };
927
939
  else if (match(5, "undef"))
928
- keyword_token = { Token::Type::UndefKeyword, m_file, m_token_line, m_token_column };
940
+ keyword_token = { Token::Type::UndefKeyword, m_file, m_token_line, m_token_column, m_whitespace_precedes };
929
941
  else if (match(6, "unless"))
930
- keyword_token = { Token::Type::UnlessKeyword, m_file, m_token_line, m_token_column };
942
+ keyword_token = { Token::Type::UnlessKeyword, m_file, m_token_line, m_token_column, m_whitespace_precedes };
931
943
  else if (match(5, "until"))
932
- keyword_token = { Token::Type::UntilKeyword, m_file, m_token_line, m_token_column };
944
+ keyword_token = { Token::Type::UntilKeyword, m_file, m_token_line, m_token_column, m_whitespace_precedes };
933
945
  else if (match(4, "when"))
934
- keyword_token = { Token::Type::WhenKeyword, m_file, m_token_line, m_token_column };
946
+ keyword_token = { Token::Type::WhenKeyword, m_file, m_token_line, m_token_column, m_whitespace_precedes };
935
947
  else if (match(5, "while"))
936
- keyword_token = { Token::Type::WhileKeyword, m_file, m_token_line, m_token_column };
948
+ keyword_token = { Token::Type::WhileKeyword, m_file, m_token_line, m_token_column, m_whitespace_precedes };
937
949
  else if (match(5, "yield"))
938
- keyword_token = { Token::Type::YieldKeyword, m_file, m_token_line, m_token_column };
950
+ keyword_token = { Token::Type::YieldKeyword, m_file, m_token_line, m_token_column, m_whitespace_precedes };
939
951
  }
940
952
 
941
953
  // if a colon comes next, it's not a keyword -- it's a symbol!
942
954
  if (keyword_token && current_char() == ':' && peek() != ':' && !m_open_ternary) {
943
955
  advance(); // :
944
956
  auto name = keyword_token.type_value();
945
- return Token { Token::Type::SymbolKey, name, m_file, m_token_line, m_token_column };
957
+ return Token { Token::Type::SymbolKey, name, m_file, m_token_line, m_token_column, m_whitespace_precedes };
946
958
  } else if (keyword_token) {
947
959
  return keyword_token;
948
960
  }
@@ -954,7 +966,7 @@ Token Lexer::build_next_token() {
954
966
  return consume_constant();
955
967
  } else {
956
968
  auto buf = consume_non_whitespace();
957
- auto token = Token { Token::Type::Invalid, buf, m_file, m_token_line, m_token_column };
969
+ auto token = Token { Token::Type::Invalid, buf, m_file, m_token_line, m_token_column, m_whitespace_precedes };
958
970
  return token;
959
971
  }
960
972
 
@@ -1014,7 +1026,7 @@ Token Lexer::consume_symbol() {
1014
1026
  gobble(c);
1015
1027
  break;
1016
1028
  default:
1017
- return Token { Token::Type::Invalid, c, m_file, m_token_line, m_token_column };
1029
+ return Token { Token::Type::Invalid, c, m_file, m_token_line, m_token_column, m_whitespace_precedes };
1018
1030
  }
1019
1031
  break;
1020
1032
  case '!':
@@ -1057,7 +1069,7 @@ Token Lexer::consume_symbol() {
1057
1069
  c = gobble(c);
1058
1070
  if (c == '=') gobble(c);
1059
1071
  } else {
1060
- return Token { Token::Type::Invalid, c, m_file, m_token_line, m_token_column };
1072
+ return Token { Token::Type::Invalid, c, m_file, m_token_line, m_token_column, m_whitespace_precedes };
1061
1073
  }
1062
1074
  break;
1063
1075
  default:
@@ -1078,7 +1090,7 @@ Token Lexer::consume_symbol() {
1078
1090
  break;
1079
1091
  }
1080
1092
  }
1081
- return Token { Token::Type::Symbol, buf, m_file, m_token_line, m_token_column };
1093
+ return Token { Token::Type::Symbol, buf, m_file, m_token_line, m_token_column, m_whitespace_precedes };
1082
1094
  }
1083
1095
 
1084
1096
  Token Lexer::consume_word(Token::Type type) {
@@ -1097,7 +1109,7 @@ Token Lexer::consume_word(Token::Type type) {
1097
1109
  default:
1098
1110
  break;
1099
1111
  }
1100
- return Token { type, buf, m_file, m_token_line, m_token_column };
1112
+ return Token { type, buf, m_file, m_token_line, m_token_column, m_whitespace_precedes };
1101
1113
  }
1102
1114
 
1103
1115
  Token Lexer::consume_bare_name() {
@@ -1147,14 +1159,14 @@ Token Lexer::consume_global_variable() {
1147
1159
  SharedPtr<String> buf = new String("$");
1148
1160
  buf->append_char(current_char());
1149
1161
  advance();
1150
- return Token { Token::Type::GlobalVariable, buf, m_file, m_token_line, m_token_column };
1162
+ return Token { Token::Type::GlobalVariable, buf, m_file, m_token_line, m_token_column, m_whitespace_precedes };
1151
1163
  }
1152
1164
  case '-': {
1153
1165
  SharedPtr<String> buf = new String("$-");
1154
1166
  advance(2);
1155
1167
  buf->append_char(current_char());
1156
1168
  advance();
1157
- return Token { Token::Type::GlobalVariable, buf, m_file, m_token_line, m_token_column };
1169
+ return Token { Token::Type::GlobalVariable, buf, m_file, m_token_line, m_token_column, m_whitespace_precedes };
1158
1170
  }
1159
1171
  default: {
1160
1172
  return consume_word(Token::Type::GlobalVariable);
@@ -1257,7 +1269,7 @@ Token Lexer::consume_heredoc() {
1257
1269
  case '\n':
1258
1270
  case '\r':
1259
1271
  case 0:
1260
- return Token { Token::Type::UnterminatedString, "heredoc identifier", m_file, m_token_line, m_token_column };
1272
+ return Token { Token::Type::UnterminatedString, "heredoc identifier", m_file, m_token_line, m_token_column, m_whitespace_precedes };
1261
1273
  default:
1262
1274
  heredoc_name.append_char(c);
1263
1275
  c = next();
@@ -1276,7 +1288,7 @@ Token Lexer::consume_heredoc() {
1276
1288
  // start consuming the heredoc on the next line
1277
1289
  while (get_char() != '\n') {
1278
1290
  if (heredoc_index >= m_size)
1279
- return Token { Token::Type::UnterminatedString, "heredoc", m_file, m_token_line, m_token_column };
1291
+ return Token { Token::Type::UnterminatedString, "heredoc", m_file, m_token_line, m_token_column, m_whitespace_precedes };
1280
1292
  heredoc_index++;
1281
1293
  }
1282
1294
  heredoc_index++;
@@ -1290,7 +1302,7 @@ Token Lexer::consume_heredoc() {
1290
1302
  if (heredoc_index >= m_size) {
1291
1303
  if (is_valid_heredoc(with_dash, doc, heredoc_name))
1292
1304
  break;
1293
- return Token { Token::Type::UnterminatedString, doc, m_file, m_token_line, m_token_column };
1305
+ return Token { Token::Type::UnterminatedString, doc, m_file, m_token_line, m_token_column, m_whitespace_precedes };
1294
1306
  }
1295
1307
  char c = get_char();
1296
1308
  heredoc_index++;
@@ -1310,11 +1322,11 @@ Token Lexer::consume_heredoc() {
1310
1322
  // This index is used to jump to the end of the heredoc later.
1311
1323
  m_heredoc_stack.push(heredoc_index);
1312
1324
 
1313
- auto token = Token { Token::Type::String, doc, m_file, m_token_line, m_token_column };
1325
+ auto token = Token { Token::Type::String, doc, m_file, m_token_line, m_token_column, m_whitespace_precedes };
1314
1326
 
1315
1327
  if (should_interpolate) {
1316
1328
  m_nested_lexer = new InterpolatedStringLexer { *this, token, end_type };
1317
- return Token { begin_type, m_file, m_token_line, m_token_column };
1329
+ return Token { begin_type, m_file, m_token_line, m_token_column, m_whitespace_precedes };
1318
1330
  }
1319
1331
 
1320
1332
  return token;
@@ -1322,21 +1334,40 @@ Token Lexer::consume_heredoc() {
1322
1334
 
1323
1335
  Token Lexer::consume_numeric() {
1324
1336
  SharedPtr<String> chars = new String;
1337
+
1338
+ auto consume_decimal_digits_and_build_token = [&]() {
1339
+ char c = current_char();
1340
+ do {
1341
+ chars->append_char(c);
1342
+ c = next();
1343
+ if (c == '_')
1344
+ c = next();
1345
+ } while (isdigit(c));
1346
+ if ((c == '.' && isdigit(peek())) || (c == 'e' || c == 'E'))
1347
+ return consume_numeric_as_float(chars);
1348
+ else
1349
+ return chars_to_fixnum_or_bignum_token(chars, 10, 0);
1350
+ };
1351
+
1352
+ Token token;
1353
+
1325
1354
  if (current_char() == '0') {
1355
+ // special-prefixed literals 0d, 0x, etc.
1326
1356
  switch (peek()) {
1327
1357
  case 'd':
1328
1358
  case 'D': {
1329
1359
  advance();
1330
1360
  char c = next();
1331
1361
  if (!isdigit(c))
1332
- return Token { Token::Type::Invalid, c, m_file, m_cursor_line, m_cursor_column };
1362
+ return Token { Token::Type::Invalid, c, m_file, m_cursor_line, m_cursor_column, m_whitespace_precedes };
1333
1363
  do {
1334
1364
  chars->append_char(c);
1335
1365
  c = next();
1336
1366
  if (c == '_')
1337
1367
  c = next();
1338
1368
  } while (isdigit(c));
1339
- return chars_to_fixnum_or_bignum_token(chars, 10, 0);
1369
+ token = chars_to_fixnum_or_bignum_token(chars, 10, 0);
1370
+ break;
1340
1371
  }
1341
1372
  case 'o':
1342
1373
  case 'O': {
@@ -1345,14 +1376,15 @@ Token Lexer::consume_numeric() {
1345
1376
  advance();
1346
1377
  char c = next();
1347
1378
  if (!(c >= '0' && c <= '7'))
1348
- return Token { Token::Type::Invalid, c, m_file, m_cursor_line, m_cursor_column };
1379
+ return Token { Token::Type::Invalid, c, m_file, m_cursor_line, m_cursor_column, m_whitespace_precedes };
1349
1380
  do {
1350
1381
  chars->append_char(c);
1351
1382
  c = next();
1352
1383
  if (c == '_')
1353
1384
  c = next();
1354
1385
  } while (c >= '0' && c <= '7');
1355
- return chars_to_fixnum_or_bignum_token(chars, 8, 2);
1386
+ token = chars_to_fixnum_or_bignum_token(chars, 8, 2);
1387
+ break;
1356
1388
  }
1357
1389
  case 'x':
1358
1390
  case 'X': {
@@ -1361,14 +1393,15 @@ Token Lexer::consume_numeric() {
1361
1393
  advance();
1362
1394
  char c = next();
1363
1395
  if (!isxdigit(c))
1364
- return Token { Token::Type::Invalid, c, m_file, m_cursor_line, m_cursor_column };
1396
+ return Token { Token::Type::Invalid, c, m_file, m_cursor_line, m_cursor_column, m_whitespace_precedes };
1365
1397
  do {
1366
1398
  chars->append_char(c);
1367
1399
  c = next();
1368
1400
  if (c == '_')
1369
1401
  c = next();
1370
1402
  } while (isxdigit(c));
1371
- return chars_to_fixnum_or_bignum_token(chars, 16, 2);
1403
+ token = chars_to_fixnum_or_bignum_token(chars, 16, 2);
1404
+ break;
1372
1405
  }
1373
1406
  case 'b':
1374
1407
  case 'B': {
@@ -1377,28 +1410,24 @@ Token Lexer::consume_numeric() {
1377
1410
  advance();
1378
1411
  char c = next();
1379
1412
  if (c != '0' && c != '1')
1380
- return Token { Token::Type::Invalid, c, m_file, m_cursor_line, m_cursor_column };
1413
+ return Token { Token::Type::Invalid, c, m_file, m_cursor_line, m_cursor_column, m_whitespace_precedes };
1381
1414
  do {
1382
1415
  chars->append_char(c);
1383
1416
  c = next();
1384
1417
  if (c == '_')
1385
1418
  c = next();
1386
1419
  } while (c == '0' || c == '1');
1387
- return chars_to_fixnum_or_bignum_token(chars, 2, 2);
1420
+ token = chars_to_fixnum_or_bignum_token(chars, 2, 2);
1421
+ break;
1388
1422
  }
1423
+ default:
1424
+ token = consume_decimal_digits_and_build_token();
1389
1425
  }
1426
+ } else {
1427
+ token = consume_decimal_digits_and_build_token();
1390
1428
  }
1391
- char c = current_char();
1392
- do {
1393
- chars->append_char(c);
1394
- c = next();
1395
- if (c == '_')
1396
- c = next();
1397
- } while (isdigit(c));
1398
- if ((c == '.' && isdigit(peek())) || (c == 'e' || c == 'E'))
1399
- return consume_numeric_as_float(chars);
1400
- else
1401
- return chars_to_fixnum_or_bignum_token(chars, 10, 0);
1429
+
1430
+ return token;
1402
1431
  }
1403
1432
 
1404
1433
  const long long max_fixnum = std::numeric_limits<long long>::max() / 2; // 63 bits for MRI
@@ -1407,9 +1436,9 @@ Token Lexer::chars_to_fixnum_or_bignum_token(SharedPtr<String> chars, int base,
1407
1436
  errno = 0;
1408
1437
  auto fixnum = strtoll(chars->c_str() + offset, nullptr, base);
1409
1438
  if (errno != 0 || fixnum > max_fixnum)
1410
- return Token { Token::Type::Bignum, chars, m_file, m_token_line, m_token_column };
1439
+ return Token { Token::Type::Bignum, chars, m_file, m_token_line, m_token_column, m_whitespace_precedes };
1411
1440
  else
1412
- return Token { Token::Type::Fixnum, fixnum, m_file, m_token_line, m_token_column };
1441
+ return Token { Token::Type::Fixnum, fixnum, m_file, m_token_line, m_token_column, m_whitespace_precedes };
1413
1442
  }
1414
1443
 
1415
1444
  Token Lexer::consume_numeric_as_float(SharedPtr<String> chars) {
@@ -1432,7 +1461,7 @@ Token Lexer::consume_numeric_as_float(SharedPtr<String> chars) {
1432
1461
  c = next();
1433
1462
  }
1434
1463
  if (!isdigit(c))
1435
- return Token { Token::Type::Invalid, c, m_file, m_cursor_line, m_cursor_column };
1464
+ return Token { Token::Type::Invalid, c, m_file, m_cursor_line, m_cursor_column, m_whitespace_precedes };
1436
1465
  do {
1437
1466
  chars->append_char(c);
1438
1467
  c = next();
@@ -1441,7 +1470,7 @@ Token Lexer::consume_numeric_as_float(SharedPtr<String> chars) {
1441
1470
  } while (isdigit(c));
1442
1471
  }
1443
1472
  double dbl = atof(chars->c_str());
1444
- return Token { Token::Type::Float, dbl, m_file, m_token_line, m_token_column };
1473
+ return Token { Token::Type::Float, dbl, m_file, m_token_line, m_token_column, m_whitespace_precedes };
1445
1474
  }
1446
1475
 
1447
1476
  Token Lexer::consume_nth_ref() {
@@ -1452,7 +1481,7 @@ Token Lexer::consume_nth_ref() {
1452
1481
  num += c - '0';
1453
1482
  c = next();
1454
1483
  } while (isdigit(c));
1455
- return Token { Token::Type::NthRef, num, m_file, m_token_line, m_token_column };
1484
+ return Token { Token::Type::NthRef, num, m_file, m_token_line, m_token_column, m_whitespace_precedes };
1456
1485
  }
1457
1486
 
1458
1487
  long long Lexer::consume_hex_number(int max_length, bool allow_underscore) {
@@ -1636,7 +1665,7 @@ bool Lexer::token_is_first_on_line() const {
1636
1665
 
1637
1666
  Token Lexer::consume_double_quoted_string(char start_char, char stop_char, Token::Type begin_type, Token::Type end_type) {
1638
1667
  m_nested_lexer = new InterpolatedStringLexer { *this, start_char, stop_char, end_type };
1639
- return Token { begin_type, start_char, m_file, m_token_line, m_token_column };
1668
+ return Token { begin_type, start_char, m_file, m_token_line, m_token_column, m_whitespace_precedes };
1640
1669
  }
1641
1670
 
1642
1671
  Token Lexer::consume_single_quoted_string(char start_char, char stop_char) {
@@ -1663,9 +1692,9 @@ Token Lexer::consume_single_quoted_string(char start_char, char stop_char) {
1663
1692
  advance(); // '
1664
1693
  if (current_char() == ':' && !m_open_ternary) {
1665
1694
  advance(); // :
1666
- return Token { Token::Type::SymbolKey, buf, m_file, m_token_line, m_token_column };
1695
+ return Token { Token::Type::SymbolKey, buf, m_file, m_token_line, m_token_column, m_whitespace_precedes };
1667
1696
  } else {
1668
- return Token { Token::Type::String, buf, m_file, m_token_line, m_token_column };
1697
+ return Token { Token::Type::String, buf, m_file, m_token_line, m_token_column, m_whitespace_precedes };
1669
1698
  }
1670
1699
  }
1671
1700
  } else {
@@ -1673,22 +1702,22 @@ Token Lexer::consume_single_quoted_string(char start_char, char stop_char) {
1673
1702
  }
1674
1703
  c = next();
1675
1704
  }
1676
- return Token { Token::Type::UnterminatedString, start_char, m_file, m_token_line, m_token_column };
1705
+ return Token { Token::Type::UnterminatedString, start_char, m_file, m_token_line, m_token_column, m_whitespace_precedes };
1677
1706
  }
1678
1707
 
1679
1708
  Token Lexer::consume_quoted_array_without_interpolation(char start_char, char stop_char, Token::Type type) {
1680
1709
  m_nested_lexer = new WordArrayLexer { *this, start_char, stop_char, false };
1681
- return Token { type, start_char, m_file, m_token_line, m_token_column };
1710
+ return Token { type, start_char, m_file, m_token_line, m_token_column, m_whitespace_precedes };
1682
1711
  }
1683
1712
 
1684
1713
  Token Lexer::consume_quoted_array_with_interpolation(char start_char, char stop_char, Token::Type type) {
1685
1714
  m_nested_lexer = new WordArrayLexer { *this, start_char, stop_char, true };
1686
- return Token { type, start_char, m_file, m_token_line, m_token_column };
1715
+ return Token { type, start_char, m_file, m_token_line, m_token_column, m_whitespace_precedes };
1687
1716
  }
1688
1717
 
1689
1718
  Token Lexer::consume_regexp(char start_char, char stop_char) {
1690
1719
  m_nested_lexer = new RegexpLexer { *this, start_char, stop_char };
1691
- return Token { Token::Type::InterpolatedRegexpBegin, start_char, m_file, m_token_line, m_token_column };
1720
+ return Token { Token::Type::InterpolatedRegexpBegin, start_char, m_file, m_token_line, m_token_column, m_whitespace_precedes };
1692
1721
  }
1693
1722
 
1694
1723
  SharedPtr<String> Lexer::consume_non_whitespace() {