natalie_parser 1.1.1 → 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/src/lexer.cpp CHANGED
@@ -147,12 +147,12 @@ bool Lexer::skip_whitespace() {
147
147
 
148
148
  Token Lexer::build_next_token() {
149
149
  if (m_index >= m_size)
150
- return Token { Token::Type::Eof, m_file, m_cursor_line, m_cursor_column };
150
+ return Token { Token::Type::Eof, m_file, m_cursor_line, m_cursor_column, m_whitespace_precedes };
151
151
  if (m_start_char && current_char() == m_start_char) {
152
152
  m_pair_depth++;
153
153
  } else if (m_stop_char && current_char() == m_stop_char) {
154
154
  if (m_pair_depth == 0)
155
- return Token { Token::Type::Eof, m_file, m_cursor_line, m_cursor_column };
155
+ return Token { Token::Type::Eof, m_file, m_cursor_line, m_cursor_column, m_whitespace_precedes };
156
156
  m_pair_depth--;
157
157
  } else if (m_index == 0 && current_char() == '\xEF') {
158
158
  // UTF-8 BOM
@@ -170,18 +170,18 @@ Token Lexer::build_next_token() {
170
170
  switch (current_char()) {
171
171
  case '=': {
172
172
  advance();
173
- return Token { Token::Type::EqualEqualEqual, m_file, m_token_line, m_token_column };
173
+ return Token { Token::Type::EqualEqualEqual, m_file, m_token_line, m_token_column, m_whitespace_precedes };
174
174
  }
175
175
  default:
176
- return Token { Token::Type::EqualEqual, m_file, m_token_line, m_token_column };
176
+ return Token { Token::Type::EqualEqual, m_file, m_token_line, m_token_column, m_whitespace_precedes };
177
177
  }
178
178
  }
179
179
  case '>':
180
180
  advance();
181
- return Token { Token::Type::HashRocket, m_file, m_token_line, m_token_column };
181
+ return Token { Token::Type::HashRocket, m_file, m_token_line, m_token_column, m_whitespace_precedes };
182
182
  case '~':
183
183
  advance();
184
- return Token { Token::Type::Match, m_file, m_token_line, m_token_column };
184
+ return Token { Token::Type::Match, m_file, m_token_line, m_token_column, m_whitespace_precedes };
185
185
  default:
186
186
  if (m_cursor_column == 1 && match(5, "begin")) {
187
187
  SharedPtr<String> doc = new String("=begin");
@@ -191,10 +191,9 @@ Token Lexer::build_next_token() {
191
191
  c = next();
192
192
  } while (c && !(m_cursor_column == 0 && match(4, "=end")));
193
193
  doc->append("=end\n");
194
- return Token { Token::Type::Doc, doc, m_file, m_token_line, m_token_column };
194
+ return Token { Token::Type::Doc, doc, m_file, m_token_line, m_token_column, m_whitespace_precedes };
195
195
  }
196
- auto token = Token { Token::Type::Equal, m_file, m_token_line, m_token_column };
197
- token.set_whitespace_precedes(m_whitespace_precedes);
196
+ auto token = Token { Token::Type::Equal, m_file, m_token_line, m_token_column, m_whitespace_precedes };
198
197
  return token;
199
198
  }
200
199
  }
@@ -203,37 +202,37 @@ Token Lexer::build_next_token() {
203
202
  switch (current_char()) {
204
203
  case '=':
205
204
  advance();
206
- return Token { Token::Type::PlusEqual, m_file, m_token_line, m_token_column };
205
+ return Token { Token::Type::PlusEqual, m_file, m_token_line, m_token_column, m_whitespace_precedes };
207
206
  case '@':
208
207
  if (m_last_token.is_def_keyword() || m_last_token.is_dot()) {
209
208
  advance();
210
209
  SharedPtr<String> lit = new String("+@");
211
- return Token { Token::Type::BareName, lit, m_file, m_token_line, m_token_column };
210
+ return Token { Token::Type::BareName, lit, m_file, m_token_line, m_token_column, m_whitespace_precedes };
212
211
  } else {
213
- return Token { Token::Type::Plus, m_file, m_token_line, m_token_column };
212
+ return Token { Token::Type::Plus, m_file, m_token_line, m_token_column, m_whitespace_precedes };
214
213
  }
215
214
  default:
216
- return Token { Token::Type::Plus, m_file, m_token_line, m_token_column };
215
+ return Token { Token::Type::Plus, m_file, m_token_line, m_token_column, m_whitespace_precedes };
217
216
  }
218
217
  case '-':
219
218
  advance();
220
219
  switch (current_char()) {
221
220
  case '>':
222
221
  advance();
223
- return Token { Token::Type::Arrow, m_file, m_token_line, m_token_column };
222
+ return Token { Token::Type::Arrow, m_file, m_token_line, m_token_column, m_whitespace_precedes };
224
223
  case '=':
225
224
  advance();
226
- return Token { Token::Type::MinusEqual, m_file, m_token_line, m_token_column };
225
+ return Token { Token::Type::MinusEqual, m_file, m_token_line, m_token_column, m_whitespace_precedes };
227
226
  case '@':
228
227
  if (m_last_token.is_def_keyword() || m_last_token.is_dot()) {
229
228
  advance();
230
229
  SharedPtr<String> lit = new String("-@");
231
- return Token { Token::Type::BareName, lit, m_file, m_token_line, m_token_column };
230
+ return Token { Token::Type::BareName, lit, m_file, m_token_line, m_token_column, m_whitespace_precedes };
232
231
  } else {
233
- return Token { Token::Type::Minus, m_file, m_token_line, m_token_column };
232
+ return Token { Token::Type::Minus, m_file, m_token_line, m_token_column, m_whitespace_precedes };
234
233
  }
235
234
  default:
236
- return Token { Token::Type::Minus, m_file, m_token_line, m_token_column };
235
+ return Token { Token::Type::Minus, m_file, m_token_line, m_token_column, m_whitespace_precedes };
237
236
  }
238
237
  case '*':
239
238
  advance();
@@ -243,15 +242,15 @@ Token Lexer::build_next_token() {
243
242
  switch (current_char()) {
244
243
  case '=':
245
244
  advance();
246
- return Token { Token::Type::StarStarEqual, m_file, m_token_line, m_token_column };
245
+ return Token { Token::Type::StarStarEqual, m_file, m_token_line, m_token_column, m_whitespace_precedes };
247
246
  default:
248
- return Token { Token::Type::StarStar, m_file, m_token_line, m_token_column };
247
+ return Token { Token::Type::StarStar, m_file, m_token_line, m_token_column, m_whitespace_precedes };
249
248
  }
250
249
  case '=':
251
250
  advance();
252
- return Token { Token::Type::StarEqual, m_file, m_token_line, m_token_column };
251
+ return Token { Token::Type::StarEqual, m_file, m_token_line, m_token_column, m_whitespace_precedes };
253
252
  default:
254
- return Token { Token::Type::Star, m_file, m_token_line, m_token_column };
253
+ return Token { Token::Type::Star, m_file, m_token_line, m_token_column, m_whitespace_precedes };
255
254
  }
256
255
  case '/': {
257
256
  advance();
@@ -267,19 +266,19 @@ Token Lexer::build_next_token() {
267
266
  case Token::Type::Newline:
268
267
  return consume_regexp('/', '/');
269
268
  case Token::Type::DefKeyword:
270
- return Token { Token::Type::Slash, m_file, m_token_line, m_token_column };
269
+ return Token { Token::Type::Slash, m_file, m_token_line, m_token_column, m_whitespace_precedes };
271
270
  default: {
272
271
  switch (current_char()) {
273
272
  case ' ':
274
- return Token { Token::Type::Slash, m_file, m_token_line, m_token_column };
273
+ return Token { Token::Type::Slash, m_file, m_token_line, m_token_column, m_whitespace_precedes };
275
274
  case '=':
276
275
  advance();
277
- return Token { Token::Type::SlashEqual, m_file, m_token_line, m_token_column };
276
+ return Token { Token::Type::SlashEqual, m_file, m_token_line, m_token_column, m_whitespace_precedes };
278
277
  default:
279
278
  if (m_whitespace_precedes) {
280
279
  return consume_regexp('/', '/');
281
280
  } else {
282
- return Token { Token::Type::Slash, m_file, m_token_line, m_token_column };
281
+ return Token { Token::Type::Slash, m_file, m_token_line, m_token_column, m_whitespace_precedes };
283
282
  }
284
283
  }
285
284
  }
@@ -290,7 +289,7 @@ Token Lexer::build_next_token() {
290
289
  switch (current_char()) {
291
290
  case '=':
292
291
  advance();
293
- return Token { Token::Type::PercentEqual, m_file, m_token_line, m_token_column };
292
+ return Token { Token::Type::PercentEqual, m_file, m_token_line, m_token_column, m_whitespace_precedes };
294
293
  case 'q':
295
294
  switch (peek()) {
296
295
  case '[':
@@ -311,7 +310,7 @@ Token Lexer::build_next_token() {
311
310
  advance(2);
312
311
  return consume_single_quoted_string(c, c);
313
312
  } else {
314
- return Token { Token::Type::Percent, m_file, m_token_line, m_token_column };
313
+ return Token { Token::Type::Percent, m_file, m_token_line, m_token_column, m_whitespace_precedes };
315
314
  }
316
315
  }
317
316
  }
@@ -335,7 +334,7 @@ Token Lexer::build_next_token() {
335
334
  advance(2);
336
335
  return consume_double_quoted_string(c, c);
337
336
  } else {
338
- return Token { Token::Type::Percent, m_file, m_token_line, m_token_column };
337
+ return Token { Token::Type::Percent, m_file, m_token_line, m_token_column, m_whitespace_precedes };
339
338
  }
340
339
  }
341
340
  }
@@ -359,7 +358,7 @@ Token Lexer::build_next_token() {
359
358
  advance(2);
360
359
  return consume_regexp(c, c);
361
360
  } else {
362
- return Token { Token::Type::Percent, m_file, m_token_line, m_token_column };
361
+ return Token { Token::Type::Percent, m_file, m_token_line, m_token_column, m_whitespace_precedes };
363
362
  }
364
363
  }
365
364
  }
@@ -382,7 +381,7 @@ Token Lexer::build_next_token() {
382
381
  return consume_double_quoted_string('(', ')', Token::Type::InterpolatedShellBegin, Token::Type::InterpolatedShellEnd);
383
382
  }
384
383
  default:
385
- return Token { Token::Type::Percent, m_file, m_token_line, m_token_column };
384
+ return Token { Token::Type::Percent, m_file, m_token_line, m_token_column, m_whitespace_precedes };
386
385
  }
387
386
  case 'w':
388
387
  switch (peek()) {
@@ -405,7 +404,7 @@ Token Lexer::build_next_token() {
405
404
  advance(2);
406
405
  return consume_quoted_array_without_interpolation('(', ')', Token::Type::PercentLowerW);
407
406
  default:
408
- return Token { Token::Type::Percent, m_file, m_token_line, m_token_column };
407
+ return Token { Token::Type::Percent, m_file, m_token_line, m_token_column, m_whitespace_precedes };
409
408
  }
410
409
  case 'W':
411
410
  switch (peek()) {
@@ -428,7 +427,7 @@ Token Lexer::build_next_token() {
428
427
  advance(2);
429
428
  return consume_quoted_array_with_interpolation('(', ')', Token::Type::PercentUpperW);
430
429
  default:
431
- return Token { Token::Type::Percent, m_file, m_token_line, m_token_column };
430
+ return Token { Token::Type::Percent, m_file, m_token_line, m_token_column, m_whitespace_precedes };
432
431
  }
433
432
  case 'i':
434
433
  switch (peek()) {
@@ -451,7 +450,7 @@ Token Lexer::build_next_token() {
451
450
  advance(2);
452
451
  return consume_quoted_array_without_interpolation('(', ')', Token::Type::PercentLowerI);
453
452
  default:
454
- return Token { Token::Type::Percent, m_file, m_token_line, m_token_column };
453
+ return Token { Token::Type::Percent, m_file, m_token_line, m_token_column, m_whitespace_precedes };
455
454
  }
456
455
  case 'I':
457
456
  switch (peek()) {
@@ -474,7 +473,7 @@ Token Lexer::build_next_token() {
474
473
  advance(2);
475
474
  return consume_quoted_array_with_interpolation('(', ')', Token::Type::PercentUpperI);
476
475
  default:
477
- return Token { Token::Type::Percent, m_file, m_token_line, m_token_column };
476
+ return Token { Token::Type::Percent, m_file, m_token_line, m_token_column, m_whitespace_precedes };
478
477
  }
479
478
  case '[':
480
479
  advance();
@@ -501,26 +500,26 @@ Token Lexer::build_next_token() {
501
500
  break;
502
501
  }
503
502
  }
504
- return Token { Token::Type::Percent, m_file, m_token_line, m_token_column };
503
+ return Token { Token::Type::Percent, m_file, m_token_line, m_token_column, m_whitespace_precedes };
505
504
  case '!':
506
505
  advance();
507
506
  switch (current_char()) {
508
507
  case '=':
509
508
  advance();
510
- return Token { Token::Type::NotEqual, m_file, m_token_line, m_token_column };
509
+ return Token { Token::Type::NotEqual, m_file, m_token_line, m_token_column, m_whitespace_precedes };
511
510
  case '~':
512
511
  advance();
513
- return Token { Token::Type::NotMatch, m_file, m_token_line, m_token_column };
512
+ return Token { Token::Type::NotMatch, m_file, m_token_line, m_token_column, m_whitespace_precedes };
514
513
  case '@':
515
514
  if (m_last_token.is_def_keyword() || m_last_token.is_dot()) {
516
515
  advance();
517
516
  SharedPtr<String> lit = new String("!@");
518
- return Token { Token::Type::BareName, lit, m_file, m_token_line, m_token_column };
517
+ return Token { Token::Type::BareName, lit, m_file, m_token_line, m_token_column, m_whitespace_precedes };
519
518
  } else {
520
- return Token { Token::Type::Not, m_file, m_token_line, m_token_column };
519
+ return Token { Token::Type::Not, m_file, m_token_line, m_token_column, m_whitespace_precedes };
521
520
  }
522
521
  default:
523
- return Token { Token::Type::Not, m_file, m_token_line, m_token_column };
522
+ return Token { Token::Type::Not, m_file, m_token_line, m_token_column, m_whitespace_precedes };
524
523
  }
525
524
  case '<':
526
525
  advance();
@@ -540,12 +539,12 @@ Token Lexer::build_next_token() {
540
539
  case '\'':
541
540
  return consume_heredoc();
542
541
  default:
543
- return Token { Token::Type::LeftShift, m_file, m_token_line, m_token_column };
542
+ return Token { Token::Type::LeftShift, m_file, m_token_line, m_token_column, m_whitespace_precedes };
544
543
  }
545
544
  }
546
545
  case '=':
547
546
  advance();
548
- return Token { Token::Type::LeftShiftEqual, m_file, m_token_line, m_token_column };
547
+ return Token { Token::Type::LeftShiftEqual, m_file, m_token_line, m_token_column, m_whitespace_precedes };
549
548
  default:
550
549
  if (!m_whitespace_precedes) {
551
550
  if (token_is_first_on_line())
@@ -553,7 +552,7 @@ Token Lexer::build_next_token() {
553
552
  else if (m_last_token.can_precede_heredoc_that_looks_like_left_shift_operator())
554
553
  return consume_heredoc();
555
554
  else
556
- return Token { Token::Type::LeftShift, m_file, m_token_line, m_token_column };
555
+ return Token { Token::Type::LeftShift, m_file, m_token_line, m_token_column, m_whitespace_precedes };
557
556
  }
558
557
  if (isalpha(current_char()))
559
558
  return consume_heredoc();
@@ -564,7 +563,7 @@ Token Lexer::build_next_token() {
564
563
  case '\'':
565
564
  return consume_heredoc();
566
565
  default:
567
- return Token { Token::Type::LeftShift, m_file, m_token_line, m_token_column };
566
+ return Token { Token::Type::LeftShift, m_file, m_token_line, m_token_column, m_whitespace_precedes };
568
567
  }
569
568
  }
570
569
  }
@@ -573,12 +572,12 @@ Token Lexer::build_next_token() {
573
572
  switch (current_char()) {
574
573
  case '>':
575
574
  advance();
576
- return Token { Token::Type::Comparison, m_file, m_token_line, m_token_column };
575
+ return Token { Token::Type::Comparison, m_file, m_token_line, m_token_column, m_whitespace_precedes };
577
576
  default:
578
- return Token { Token::Type::LessThanOrEqual, m_file, m_token_line, m_token_column };
577
+ return Token { Token::Type::LessThanOrEqual, m_file, m_token_line, m_token_column, m_whitespace_precedes };
579
578
  }
580
579
  default:
581
- return Token { Token::Type::LessThan, m_file, m_token_line, m_token_column };
580
+ return Token { Token::Type::LessThan, m_file, m_token_line, m_token_column, m_whitespace_precedes };
582
581
  }
583
582
  case '>':
584
583
  advance();
@@ -588,15 +587,15 @@ Token Lexer::build_next_token() {
588
587
  switch (current_char()) {
589
588
  case '=':
590
589
  advance();
591
- return Token { Token::Type::RightShiftEqual, m_file, m_token_line, m_token_column };
590
+ return Token { Token::Type::RightShiftEqual, m_file, m_token_line, m_token_column, m_whitespace_precedes };
592
591
  default:
593
- return Token { Token::Type::RightShift, m_file, m_token_line, m_token_column };
592
+ return Token { Token::Type::RightShift, m_file, m_token_line, m_token_column, m_whitespace_precedes };
594
593
  }
595
594
  case '=':
596
595
  advance();
597
- return Token { Token::Type::GreaterThanOrEqual, m_file, m_token_line, m_token_column };
596
+ return Token { Token::Type::GreaterThanOrEqual, m_file, m_token_line, m_token_column, m_whitespace_precedes };
598
597
  default:
599
- return Token { Token::Type::GreaterThan, m_file, m_token_line, m_token_column };
598
+ return Token { Token::Type::GreaterThan, m_file, m_token_line, m_token_column, m_whitespace_precedes };
600
599
  }
601
600
  case '&':
602
601
  advance();
@@ -606,18 +605,18 @@ Token Lexer::build_next_token() {
606
605
  switch (current_char()) {
607
606
  case '=':
608
607
  advance();
609
- return Token { Token::Type::AmpersandAmpersandEqual, m_file, m_token_line, m_token_column };
608
+ return Token { Token::Type::AmpersandAmpersandEqual, m_file, m_token_line, m_token_column, m_whitespace_precedes };
610
609
  default:
611
- return Token { Token::Type::AmpersandAmpersand, m_file, m_token_line, m_token_column };
610
+ return Token { Token::Type::AmpersandAmpersand, m_file, m_token_line, m_token_column, m_whitespace_precedes };
612
611
  }
613
612
  case '=':
614
613
  advance();
615
- return Token { Token::Type::AmpersandEqual, m_file, m_token_line, m_token_column };
614
+ return Token { Token::Type::AmpersandEqual, m_file, m_token_line, m_token_column, m_whitespace_precedes };
616
615
  case '.':
617
616
  advance();
618
- return Token { Token::Type::SafeNavigation, m_file, m_token_line, m_token_column };
617
+ return Token { Token::Type::SafeNavigation, m_file, m_token_line, m_token_column, m_whitespace_precedes };
619
618
  default:
620
- return Token { Token::Type::Ampersand, m_file, m_token_line, m_token_column };
619
+ return Token { Token::Type::Ampersand, m_file, m_token_line, m_token_column, m_whitespace_precedes };
621
620
  }
622
621
  case '|':
623
622
  advance();
@@ -627,24 +626,24 @@ Token Lexer::build_next_token() {
627
626
  switch (current_char()) {
628
627
  case '=':
629
628
  advance();
630
- return Token { Token::Type::PipePipeEqual, m_file, m_token_line, m_token_column };
629
+ return Token { Token::Type::PipePipeEqual, m_file, m_token_line, m_token_column, m_whitespace_precedes };
631
630
  default:
632
- return Token { Token::Type::PipePipe, m_file, m_token_line, m_token_column };
631
+ return Token { Token::Type::PipePipe, m_file, m_token_line, m_token_column, m_whitespace_precedes };
633
632
  }
634
633
  case '=':
635
634
  advance();
636
- return Token { Token::Type::PipeEqual, m_file, m_token_line, m_token_column };
635
+ return Token { Token::Type::PipeEqual, m_file, m_token_line, m_token_column, m_whitespace_precedes };
637
636
  default:
638
- return Token { Token::Type::Pipe, m_file, m_token_line, m_token_column };
637
+ return Token { Token::Type::Pipe, m_file, m_token_line, m_token_column, m_whitespace_precedes };
639
638
  }
640
639
  case '^':
641
640
  advance();
642
641
  switch (current_char()) {
643
642
  case '=':
644
643
  advance();
645
- return Token { Token::Type::CaretEqual, m_file, m_token_line, m_token_column };
644
+ return Token { Token::Type::CaretEqual, m_file, m_token_line, m_token_column, m_whitespace_precedes };
646
645
  default:
647
- return Token { Token::Type::Caret, m_file, m_token_line, m_token_column };
646
+ return Token { Token::Type::Caret, m_file, m_token_line, m_token_column, m_whitespace_precedes };
648
647
  }
649
648
  case '~':
650
649
  advance();
@@ -653,28 +652,28 @@ Token Lexer::build_next_token() {
653
652
  if (m_last_token.is_def_keyword() || m_last_token.is_dot()) {
654
653
  advance();
655
654
  SharedPtr<String> lit = new String("~@");
656
- return Token { Token::Type::BareName, lit, m_file, m_token_line, m_token_column };
655
+ return Token { Token::Type::BareName, lit, m_file, m_token_line, m_token_column, m_whitespace_precedes };
657
656
  } else {
658
- return Token { Token::Type::Tilde, m_file, m_token_line, m_token_column };
657
+ return Token { Token::Type::Tilde, m_file, m_token_line, m_token_column, m_whitespace_precedes };
659
658
  }
660
659
  default:
661
- return Token { Token::Type::Tilde, m_file, m_token_line, m_token_column };
660
+ return Token { Token::Type::Tilde, m_file, m_token_line, m_token_column, m_whitespace_precedes };
662
661
  }
663
662
  case '?': {
664
663
  auto c = next();
665
664
  if (isspace(c)) {
666
665
  m_open_ternary = true;
667
- return Token { Token::Type::TernaryQuestion, m_file, m_token_line, m_token_column };
666
+ return Token { Token::Type::TernaryQuestion, m_file, m_token_line, m_token_column, m_whitespace_precedes };
668
667
  } else {
669
668
  advance();
670
669
  if (c == '\\') {
671
670
  auto buf = new String();
672
671
  auto result = consume_escaped_byte(*buf);
673
672
  if (!result.first)
674
- return Token { result.second, current_char(), m_file, m_token_line, m_token_column };
675
- return Token { Token::Type::String, buf, m_file, m_token_line, m_token_column };
673
+ return Token { result.second, current_char(), m_file, m_token_line, m_token_column, m_whitespace_precedes };
674
+ return Token { Token::Type::String, buf, m_file, m_token_line, m_token_column, m_whitespace_precedes };
676
675
  } else {
677
- return Token { Token::Type::String, c, m_file, m_token_line, m_token_column };
676
+ return Token { Token::Type::String, c, m_file, m_token_line, m_token_column, m_whitespace_precedes };
678
677
  }
679
678
  }
680
679
  }
@@ -682,20 +681,19 @@ Token Lexer::build_next_token() {
682
681
  auto c = next();
683
682
  if (c == ':') {
684
683
  advance();
685
- return Token { Token::Type::ConstantResolution, m_file, m_token_line, m_token_column };
684
+ return Token { Token::Type::ConstantResolution, m_file, m_token_line, m_token_column, m_whitespace_precedes };
686
685
  } else if (m_last_token.type() == Token::Type::InterpolatedStringEnd && !m_whitespace_precedes && !m_open_ternary) {
687
- return Token { Token::Type::InterpolatedStringSymbolKey, m_file, m_token_line, m_token_column };
686
+ return Token { Token::Type::InterpolatedStringSymbolKey, m_file, m_token_line, m_token_column, m_whitespace_precedes };
688
687
  } else if (c == '"') {
689
688
  advance();
690
689
  return consume_double_quoted_string('"', '"', Token::Type::InterpolatedSymbolBegin, Token::Type::InterpolatedSymbolEnd);
691
690
  } else if (c == '\'') {
692
691
  advance();
693
692
  auto string = consume_single_quoted_string('\'', '\'');
694
- return Token { Token::Type::Symbol, string.literal(), m_file, m_token_line, m_token_column };
693
+ return Token { Token::Type::Symbol, string.literal(), m_file, m_token_line, m_token_column, m_whitespace_precedes };
695
694
  } else if (isspace(c)) {
696
695
  m_open_ternary = false;
697
- auto token = Token { Token::Type::TernaryColon, m_file, m_token_line, m_token_column };
698
- token.set_whitespace_precedes(m_whitespace_precedes);
696
+ auto token = Token { Token::Type::TernaryColon, m_file, m_token_line, m_token_column, m_whitespace_precedes };
699
697
  return token;
700
698
  } else {
701
699
  return consume_symbol();
@@ -716,7 +714,7 @@ Token Lexer::build_next_token() {
716
714
  case '$':
717
715
  if (peek() == '&') {
718
716
  advance(2);
719
- return Token { Token::Type::BackRef, '&', m_file, m_token_line, m_token_column };
717
+ return Token { Token::Type::BackRef, '&', m_file, m_token_line, m_token_column, m_whitespace_precedes };
720
718
  } else if (peek() >= '1' && peek() <= '9') {
721
719
  return consume_nth_ref();
722
720
  } else {
@@ -730,16 +728,16 @@ Token Lexer::build_next_token() {
730
728
  switch (current_char()) {
731
729
  case '.':
732
730
  advance();
733
- return Token { Token::Type::DotDotDot, m_file, m_token_line, m_token_column };
731
+ return Token { Token::Type::DotDotDot, m_file, m_token_line, m_token_column, m_whitespace_precedes };
734
732
  default:
735
- return Token { Token::Type::DotDot, m_file, m_token_line, m_token_column };
733
+ return Token { Token::Type::DotDot, m_file, m_token_line, m_token_column, m_whitespace_precedes };
736
734
  }
737
735
  default:
738
- return Token { Token::Type::Dot, m_file, m_token_line, m_token_column };
736
+ return Token { Token::Type::Dot, m_file, m_token_line, m_token_column, m_whitespace_precedes };
739
737
  }
740
738
  case '{':
741
739
  advance();
742
- return Token { Token::Type::LCurlyBrace, m_file, m_token_line, m_token_column };
740
+ return Token { Token::Type::LCurlyBrace, m_file, m_token_line, m_token_column, m_whitespace_precedes };
743
741
  case '[': {
744
742
  advance();
745
743
  switch (current_char()) {
@@ -748,36 +746,33 @@ Token Lexer::build_next_token() {
748
746
  switch (current_char()) {
749
747
  case '=':
750
748
  advance();
751
- return Token { Token::Type::LBracketRBracketEqual, m_file, m_token_line, m_token_column };
749
+ return Token { Token::Type::LBracketRBracketEqual, m_file, m_token_line, m_token_column, m_whitespace_precedes };
752
750
  default:
753
- auto token = Token { Token::Type::LBracketRBracket, m_file, m_token_line, m_token_column };
754
- token.set_whitespace_precedes(m_whitespace_precedes);
751
+ auto token = Token { Token::Type::LBracketRBracket, m_file, m_token_line, m_token_column, m_whitespace_precedes };
755
752
  return token;
756
753
  }
757
754
  default:
758
- auto token = Token { Token::Type::LBracket, m_file, m_token_line, m_token_column };
759
- token.set_whitespace_precedes(m_whitespace_precedes);
755
+ auto token = Token { Token::Type::LBracket, m_file, m_token_line, m_token_column, m_whitespace_precedes };
760
756
  return token;
761
757
  }
762
758
  }
763
759
  case '(': {
764
760
  advance();
765
- auto token = Token { Token::Type::LParen, m_file, m_token_line, m_token_column };
766
- token.set_whitespace_precedes(m_whitespace_precedes);
761
+ auto token = Token { Token::Type::LParen, m_file, m_token_line, m_token_column, m_whitespace_precedes };
767
762
  return token;
768
763
  }
769
764
  case '}':
770
765
  advance();
771
- return Token { Token::Type::RCurlyBrace, m_file, m_token_line, m_token_column };
766
+ return Token { Token::Type::RCurlyBrace, m_file, m_token_line, m_token_column, m_whitespace_precedes };
772
767
  case ']':
773
768
  advance();
774
- return Token { Token::Type::RBracket, m_file, m_token_line, m_token_column };
769
+ return Token { Token::Type::RBracket, m_file, m_token_line, m_token_column, m_whitespace_precedes };
775
770
  case ')':
776
771
  advance();
777
- return Token { Token::Type::RParen, m_file, m_token_line, m_token_column };
772
+ return Token { Token::Type::RParen, m_file, m_token_line, m_token_column, m_whitespace_precedes };
778
773
  case '\n': {
779
774
  advance();
780
- auto token = Token { Token::Type::Newline, m_file, m_token_line, m_token_column };
775
+ auto token = Token { Token::Type::Newline, m_file, m_token_line, m_token_column, m_whitespace_precedes };
781
776
  if (!m_heredoc_stack.is_empty()) {
782
777
  auto new_index = m_heredoc_stack.last();
783
778
  while (m_index < new_index)
@@ -788,10 +783,10 @@ Token Lexer::build_next_token() {
788
783
  }
789
784
  case ';':
790
785
  advance();
791
- return Token { Token::Type::Semicolon, m_file, m_token_line, m_token_column };
786
+ return Token { Token::Type::Semicolon, m_file, m_token_line, m_token_column, m_whitespace_precedes };
792
787
  case ',':
793
788
  advance();
794
- return Token { Token::Type::Comma, m_file, m_token_line, m_token_column };
789
+ return Token { Token::Type::Comma, m_file, m_token_line, m_token_column, m_whitespace_precedes };
795
790
  case '"':
796
791
  advance();
797
792
  return consume_double_quoted_string('"', '"');
@@ -821,13 +816,13 @@ Token Lexer::build_next_token() {
821
816
  doc->append_char(c);
822
817
  c = next();
823
818
  }
824
- return Token { Token::Type::Doc, doc, m_file, m_token_line, m_token_column };
819
+ return Token { Token::Type::Doc, doc, m_file, m_token_line, m_token_column, m_whitespace_precedes };
825
820
  } else {
826
821
  char c;
827
822
  do {
828
823
  c = next();
829
824
  } while (c && c != '\n' && c != '\r');
830
- return Token { Token::Type::Comment, m_file, m_token_line, m_token_column };
825
+ return Token { Token::Type::Comment, m_file, m_token_line, m_token_column, m_whitespace_precedes };
831
826
  }
832
827
  case '0':
833
828
  case '1':
@@ -845,17 +840,17 @@ Token Lexer::build_next_token() {
845
840
  case 'i':
846
841
  if (m_last_token.can_be_complex_or_rational() && !isalnum(peek())) {
847
842
  advance();
848
- return Token { Token::Type::Complex, m_file, m_token_line, m_token_column };
843
+ return Token { Token::Type::Complex, m_file, m_token_line, m_token_column, m_whitespace_precedes };
849
844
  }
850
845
  break;
851
846
  case 'r':
852
847
  if (m_last_token.can_be_complex_or_rational()) {
853
848
  if (peek() == 'i') {
854
849
  advance(2);
855
- return Token { Token::Type::RationalComplex, m_file, m_token_line, m_token_column };
850
+ return Token { Token::Type::RationalComplex, m_file, m_token_line, m_token_column, m_whitespace_precedes };
856
851
  } else if (!isalnum(peek())) {
857
852
  advance();
858
- return Token { Token::Type::Rational, m_file, m_token_line, m_token_column };
853
+ return Token { Token::Type::Rational, m_file, m_token_line, m_token_column, m_whitespace_precedes };
859
854
  }
860
855
  }
861
856
  break;
@@ -865,101 +860,101 @@ Token Lexer::build_next_token() {
865
860
 
866
861
  if (!m_last_token.is_dot() && match(4, "self")) {
867
862
  if (current_char() == '.')
868
- keyword_token = { Token::Type::SelfKeyword, m_file, m_token_line, m_token_column };
863
+ keyword_token = { Token::Type::SelfKeyword, m_file, m_token_line, m_token_column, m_whitespace_precedes };
869
864
  else
870
865
  rewind(4);
871
866
  }
872
867
 
873
868
  if (!m_last_token.is_dot() && !m_last_token.is_def_keyword()) {
874
869
  if (match(12, "__ENCODING__"))
875
- keyword_token = { Token::Type::ENCODINGKeyword, m_file, m_token_line, m_token_column };
870
+ keyword_token = { Token::Type::ENCODINGKeyword, m_file, m_token_line, m_token_column, m_whitespace_precedes };
876
871
  else if (match(8, "__LINE__"))
877
- keyword_token = { Token::Type::LINEKeyword, m_file, m_token_line, m_token_column };
872
+ keyword_token = { Token::Type::LINEKeyword, m_file, m_token_line, m_token_column, m_whitespace_precedes };
878
873
  else if (match(8, "__FILE__"))
879
- keyword_token = { Token::Type::FILEKeyword, m_file, m_token_line, m_token_column };
874
+ keyword_token = { Token::Type::FILEKeyword, m_file, m_token_line, m_token_column, m_whitespace_precedes };
880
875
  else if (match(5, "BEGIN"))
881
- keyword_token = { Token::Type::BEGINKeyword, m_file, m_token_line, m_token_column };
876
+ keyword_token = { Token::Type::BEGINKeyword, m_file, m_token_line, m_token_column, m_whitespace_precedes };
882
877
  else if (match(3, "END"))
883
- keyword_token = { Token::Type::ENDKeyword, m_file, m_token_line, m_token_column };
878
+ keyword_token = { Token::Type::ENDKeyword, m_file, m_token_line, m_token_column, m_whitespace_precedes };
884
879
  else if (match(5, "alias"))
885
- keyword_token = { Token::Type::AliasKeyword, m_file, m_token_line, m_token_column };
880
+ keyword_token = { Token::Type::AliasKeyword, m_file, m_token_line, m_token_column, m_whitespace_precedes };
886
881
  else if (match(3, "and"))
887
- keyword_token = { Token::Type::AndKeyword, m_file, m_token_line, m_token_column };
882
+ keyword_token = { Token::Type::AndKeyword, m_file, m_token_line, m_token_column, m_whitespace_precedes };
888
883
  else if (match(5, "begin"))
889
- keyword_token = { Token::Type::BeginKeyword, m_file, m_token_line, m_token_column };
884
+ keyword_token = { Token::Type::BeginKeyword, m_file, m_token_line, m_token_column, m_whitespace_precedes };
890
885
  else if (match(5, "break"))
891
- keyword_token = { Token::Type::BreakKeyword, m_file, m_token_line, m_token_column };
886
+ keyword_token = { Token::Type::BreakKeyword, m_file, m_token_line, m_token_column, m_whitespace_precedes };
892
887
  else if (match(4, "case"))
893
- keyword_token = { Token::Type::CaseKeyword, m_file, m_token_line, m_token_column };
888
+ keyword_token = { Token::Type::CaseKeyword, m_file, m_token_line, m_token_column, m_whitespace_precedes };
894
889
  else if (match(5, "class"))
895
- keyword_token = { Token::Type::ClassKeyword, m_file, m_token_line, m_token_column };
890
+ keyword_token = { Token::Type::ClassKeyword, m_file, m_token_line, m_token_column, m_whitespace_precedes };
896
891
  else if (match(8, "defined?"))
897
- keyword_token = { Token::Type::DefinedKeyword, m_file, m_token_line, m_token_column };
892
+ keyword_token = { Token::Type::DefinedKeyword, m_file, m_token_line, m_token_column, m_whitespace_precedes };
898
893
  else if (match(3, "def"))
899
- keyword_token = { Token::Type::DefKeyword, m_file, m_token_line, m_token_column };
894
+ keyword_token = { Token::Type::DefKeyword, m_file, m_token_line, m_token_column, m_whitespace_precedes };
900
895
  else if (match(2, "do"))
901
- keyword_token = { Token::Type::DoKeyword, m_file, m_token_line, m_token_column };
896
+ keyword_token = { Token::Type::DoKeyword, m_file, m_token_line, m_token_column, m_whitespace_precedes };
902
897
  else if (match(4, "else"))
903
- keyword_token = { Token::Type::ElseKeyword, m_file, m_token_line, m_token_column };
898
+ keyword_token = { Token::Type::ElseKeyword, m_file, m_token_line, m_token_column, m_whitespace_precedes };
904
899
  else if (match(5, "elsif"))
905
- keyword_token = { Token::Type::ElsifKeyword, m_file, m_token_line, m_token_column };
900
+ keyword_token = { Token::Type::ElsifKeyword, m_file, m_token_line, m_token_column, m_whitespace_precedes };
906
901
  else if (match(3, "end"))
907
- keyword_token = { Token::Type::EndKeyword, m_file, m_token_line, m_token_column };
902
+ keyword_token = { Token::Type::EndKeyword, m_file, m_token_line, m_token_column, m_whitespace_precedes };
908
903
  else if (match(6, "ensure"))
909
- keyword_token = { Token::Type::EnsureKeyword, m_file, m_token_line, m_token_column };
904
+ keyword_token = { Token::Type::EnsureKeyword, m_file, m_token_line, m_token_column, m_whitespace_precedes };
910
905
  else if (match(5, "false"))
911
- keyword_token = { Token::Type::FalseKeyword, m_file, m_token_line, m_token_column };
906
+ keyword_token = { Token::Type::FalseKeyword, m_file, m_token_line, m_token_column, m_whitespace_precedes };
912
907
  else if (match(3, "for"))
913
- keyword_token = { Token::Type::ForKeyword, m_file, m_token_line, m_token_column };
908
+ keyword_token = { Token::Type::ForKeyword, m_file, m_token_line, m_token_column, m_whitespace_precedes };
914
909
  else if (match(2, "if"))
915
- keyword_token = { Token::Type::IfKeyword, m_file, m_token_line, m_token_column };
910
+ keyword_token = { Token::Type::IfKeyword, m_file, m_token_line, m_token_column, m_whitespace_precedes };
916
911
  else if (match(2, "in"))
917
- keyword_token = { Token::Type::InKeyword, m_file, m_token_line, m_token_column };
912
+ keyword_token = { Token::Type::InKeyword, m_file, m_token_line, m_token_column, m_whitespace_precedes };
918
913
  else if (match(6, "module"))
919
- keyword_token = { Token::Type::ModuleKeyword, m_file, m_token_line, m_token_column };
914
+ keyword_token = { Token::Type::ModuleKeyword, m_file, m_token_line, m_token_column, m_whitespace_precedes };
920
915
  else if (match(4, "next"))
921
- keyword_token = { Token::Type::NextKeyword, m_file, m_token_line, m_token_column };
916
+ keyword_token = { Token::Type::NextKeyword, m_file, m_token_line, m_token_column, m_whitespace_precedes };
922
917
  else if (match(3, "nil"))
923
- keyword_token = { Token::Type::NilKeyword, m_file, m_token_line, m_token_column };
918
+ keyword_token = { Token::Type::NilKeyword, m_file, m_token_line, m_token_column, m_whitespace_precedes };
924
919
  else if (match(3, "not"))
925
- keyword_token = { Token::Type::NotKeyword, m_file, m_token_line, m_token_column };
920
+ keyword_token = { Token::Type::NotKeyword, m_file, m_token_line, m_token_column, m_whitespace_precedes };
926
921
  else if (match(2, "or"))
927
- keyword_token = { Token::Type::OrKeyword, m_file, m_token_line, m_token_column };
922
+ keyword_token = { Token::Type::OrKeyword, m_file, m_token_line, m_token_column, m_whitespace_precedes };
928
923
  else if (match(4, "redo"))
929
- keyword_token = { Token::Type::RedoKeyword, m_file, m_token_line, m_token_column };
924
+ keyword_token = { Token::Type::RedoKeyword, m_file, m_token_line, m_token_column, m_whitespace_precedes };
930
925
  else if (match(6, "rescue"))
931
- keyword_token = { Token::Type::RescueKeyword, m_file, m_token_line, m_token_column };
926
+ keyword_token = { Token::Type::RescueKeyword, m_file, m_token_line, m_token_column, m_whitespace_precedes };
932
927
  else if (match(5, "retry"))
933
- keyword_token = { Token::Type::RetryKeyword, m_file, m_token_line, m_token_column };
928
+ keyword_token = { Token::Type::RetryKeyword, m_file, m_token_line, m_token_column, m_whitespace_precedes };
934
929
  else if (match(6, "return"))
935
- keyword_token = { Token::Type::ReturnKeyword, m_file, m_token_line, m_token_column };
930
+ keyword_token = { Token::Type::ReturnKeyword, m_file, m_token_line, m_token_column, m_whitespace_precedes };
936
931
  else if (match(4, "self"))
937
- keyword_token = { Token::Type::SelfKeyword, m_file, m_token_line, m_token_column };
932
+ keyword_token = { Token::Type::SelfKeyword, m_file, m_token_line, m_token_column, m_whitespace_precedes };
938
933
  else if (match(5, "super"))
939
- keyword_token = { Token::Type::SuperKeyword, m_file, m_token_line, m_token_column };
934
+ keyword_token = { Token::Type::SuperKeyword, m_file, m_token_line, m_token_column, m_whitespace_precedes };
940
935
  else if (match(4, "then"))
941
- keyword_token = { Token::Type::ThenKeyword, m_file, m_token_line, m_token_column };
936
+ keyword_token = { Token::Type::ThenKeyword, m_file, m_token_line, m_token_column, m_whitespace_precedes };
942
937
  else if (match(4, "true"))
943
- keyword_token = { Token::Type::TrueKeyword, m_file, m_token_line, m_token_column };
938
+ keyword_token = { Token::Type::TrueKeyword, m_file, m_token_line, m_token_column, m_whitespace_precedes };
944
939
  else if (match(5, "undef"))
945
- keyword_token = { Token::Type::UndefKeyword, m_file, m_token_line, m_token_column };
940
+ keyword_token = { Token::Type::UndefKeyword, m_file, m_token_line, m_token_column, m_whitespace_precedes };
946
941
  else if (match(6, "unless"))
947
- keyword_token = { Token::Type::UnlessKeyword, m_file, m_token_line, m_token_column };
942
+ keyword_token = { Token::Type::UnlessKeyword, m_file, m_token_line, m_token_column, m_whitespace_precedes };
948
943
  else if (match(5, "until"))
949
- keyword_token = { Token::Type::UntilKeyword, m_file, m_token_line, m_token_column };
944
+ keyword_token = { Token::Type::UntilKeyword, m_file, m_token_line, m_token_column, m_whitespace_precedes };
950
945
  else if (match(4, "when"))
951
- keyword_token = { Token::Type::WhenKeyword, m_file, m_token_line, m_token_column };
946
+ keyword_token = { Token::Type::WhenKeyword, m_file, m_token_line, m_token_column, m_whitespace_precedes };
952
947
  else if (match(5, "while"))
953
- keyword_token = { Token::Type::WhileKeyword, m_file, m_token_line, m_token_column };
948
+ keyword_token = { Token::Type::WhileKeyword, m_file, m_token_line, m_token_column, m_whitespace_precedes };
954
949
  else if (match(5, "yield"))
955
- keyword_token = { Token::Type::YieldKeyword, m_file, m_token_line, m_token_column };
950
+ keyword_token = { Token::Type::YieldKeyword, m_file, m_token_line, m_token_column, m_whitespace_precedes };
956
951
  }
957
952
 
958
953
  // if a colon comes next, it's not a keyword -- it's a symbol!
959
954
  if (keyword_token && current_char() == ':' && peek() != ':' && !m_open_ternary) {
960
955
  advance(); // :
961
956
  auto name = keyword_token.type_value();
962
- return Token { Token::Type::SymbolKey, name, m_file, m_token_line, m_token_column };
957
+ return Token { Token::Type::SymbolKey, name, m_file, m_token_line, m_token_column, m_whitespace_precedes };
963
958
  } else if (keyword_token) {
964
959
  return keyword_token;
965
960
  }
@@ -971,7 +966,7 @@ Token Lexer::build_next_token() {
971
966
  return consume_constant();
972
967
  } else {
973
968
  auto buf = consume_non_whitespace();
974
- auto token = Token { Token::Type::Invalid, buf, m_file, m_token_line, m_token_column };
969
+ auto token = Token { Token::Type::Invalid, buf, m_file, m_token_line, m_token_column, m_whitespace_precedes };
975
970
  return token;
976
971
  }
977
972
 
@@ -1031,7 +1026,7 @@ Token Lexer::consume_symbol() {
1031
1026
  gobble(c);
1032
1027
  break;
1033
1028
  default:
1034
- return Token { Token::Type::Invalid, c, m_file, m_token_line, m_token_column };
1029
+ return Token { Token::Type::Invalid, c, m_file, m_token_line, m_token_column, m_whitespace_precedes };
1035
1030
  }
1036
1031
  break;
1037
1032
  case '!':
@@ -1074,7 +1069,7 @@ Token Lexer::consume_symbol() {
1074
1069
  c = gobble(c);
1075
1070
  if (c == '=') gobble(c);
1076
1071
  } else {
1077
- return Token { Token::Type::Invalid, c, m_file, m_token_line, m_token_column };
1072
+ return Token { Token::Type::Invalid, c, m_file, m_token_line, m_token_column, m_whitespace_precedes };
1078
1073
  }
1079
1074
  break;
1080
1075
  default:
@@ -1095,7 +1090,7 @@ Token Lexer::consume_symbol() {
1095
1090
  break;
1096
1091
  }
1097
1092
  }
1098
- return Token { Token::Type::Symbol, buf, m_file, m_token_line, m_token_column };
1093
+ return Token { Token::Type::Symbol, buf, m_file, m_token_line, m_token_column, m_whitespace_precedes };
1099
1094
  }
1100
1095
 
1101
1096
  Token Lexer::consume_word(Token::Type type) {
@@ -1114,7 +1109,7 @@ Token Lexer::consume_word(Token::Type type) {
1114
1109
  default:
1115
1110
  break;
1116
1111
  }
1117
- return Token { type, buf, m_file, m_token_line, m_token_column };
1112
+ return Token { type, buf, m_file, m_token_line, m_token_column, m_whitespace_precedes };
1118
1113
  }
1119
1114
 
1120
1115
  Token Lexer::consume_bare_name() {
@@ -1164,14 +1159,14 @@ Token Lexer::consume_global_variable() {
1164
1159
  SharedPtr<String> buf = new String("$");
1165
1160
  buf->append_char(current_char());
1166
1161
  advance();
1167
- return Token { Token::Type::GlobalVariable, buf, m_file, m_token_line, m_token_column };
1162
+ return Token { Token::Type::GlobalVariable, buf, m_file, m_token_line, m_token_column, m_whitespace_precedes };
1168
1163
  }
1169
1164
  case '-': {
1170
1165
  SharedPtr<String> buf = new String("$-");
1171
1166
  advance(2);
1172
1167
  buf->append_char(current_char());
1173
1168
  advance();
1174
- return Token { Token::Type::GlobalVariable, buf, m_file, m_token_line, m_token_column };
1169
+ return Token { Token::Type::GlobalVariable, buf, m_file, m_token_line, m_token_column, m_whitespace_precedes };
1175
1170
  }
1176
1171
  default: {
1177
1172
  return consume_word(Token::Type::GlobalVariable);
@@ -1274,7 +1269,7 @@ Token Lexer::consume_heredoc() {
1274
1269
  case '\n':
1275
1270
  case '\r':
1276
1271
  case 0:
1277
- return Token { Token::Type::UnterminatedString, "heredoc identifier", m_file, m_token_line, m_token_column };
1272
+ return Token { Token::Type::UnterminatedString, "heredoc identifier", m_file, m_token_line, m_token_column, m_whitespace_precedes };
1278
1273
  default:
1279
1274
  heredoc_name.append_char(c);
1280
1275
  c = next();
@@ -1293,7 +1288,7 @@ Token Lexer::consume_heredoc() {
1293
1288
  // start consuming the heredoc on the next line
1294
1289
  while (get_char() != '\n') {
1295
1290
  if (heredoc_index >= m_size)
1296
- return Token { Token::Type::UnterminatedString, "heredoc", m_file, m_token_line, m_token_column };
1291
+ return Token { Token::Type::UnterminatedString, "heredoc", m_file, m_token_line, m_token_column, m_whitespace_precedes };
1297
1292
  heredoc_index++;
1298
1293
  }
1299
1294
  heredoc_index++;
@@ -1307,7 +1302,7 @@ Token Lexer::consume_heredoc() {
1307
1302
  if (heredoc_index >= m_size) {
1308
1303
  if (is_valid_heredoc(with_dash, doc, heredoc_name))
1309
1304
  break;
1310
- return Token { Token::Type::UnterminatedString, doc, m_file, m_token_line, m_token_column };
1305
+ return Token { Token::Type::UnterminatedString, doc, m_file, m_token_line, m_token_column, m_whitespace_precedes };
1311
1306
  }
1312
1307
  char c = get_char();
1313
1308
  heredoc_index++;
@@ -1327,11 +1322,11 @@ Token Lexer::consume_heredoc() {
1327
1322
  // This index is used to jump to the end of the heredoc later.
1328
1323
  m_heredoc_stack.push(heredoc_index);
1329
1324
 
1330
- auto token = Token { Token::Type::String, doc, m_file, m_token_line, m_token_column };
1325
+ auto token = Token { Token::Type::String, doc, m_file, m_token_line, m_token_column, m_whitespace_precedes };
1331
1326
 
1332
1327
  if (should_interpolate) {
1333
1328
  m_nested_lexer = new InterpolatedStringLexer { *this, token, end_type };
1334
- return Token { begin_type, m_file, m_token_line, m_token_column };
1329
+ return Token { begin_type, m_file, m_token_line, m_token_column, m_whitespace_precedes };
1335
1330
  }
1336
1331
 
1337
1332
  return token;
@@ -1364,7 +1359,7 @@ Token Lexer::consume_numeric() {
1364
1359
  advance();
1365
1360
  char c = next();
1366
1361
  if (!isdigit(c))
1367
- return Token { Token::Type::Invalid, c, m_file, m_cursor_line, m_cursor_column };
1362
+ return Token { Token::Type::Invalid, c, m_file, m_cursor_line, m_cursor_column, m_whitespace_precedes };
1368
1363
  do {
1369
1364
  chars->append_char(c);
1370
1365
  c = next();
@@ -1381,7 +1376,7 @@ Token Lexer::consume_numeric() {
1381
1376
  advance();
1382
1377
  char c = next();
1383
1378
  if (!(c >= '0' && c <= '7'))
1384
- return Token { Token::Type::Invalid, c, m_file, m_cursor_line, m_cursor_column };
1379
+ return Token { Token::Type::Invalid, c, m_file, m_cursor_line, m_cursor_column, m_whitespace_precedes };
1385
1380
  do {
1386
1381
  chars->append_char(c);
1387
1382
  c = next();
@@ -1398,7 +1393,7 @@ Token Lexer::consume_numeric() {
1398
1393
  advance();
1399
1394
  char c = next();
1400
1395
  if (!isxdigit(c))
1401
- return Token { Token::Type::Invalid, c, m_file, m_cursor_line, m_cursor_column };
1396
+ return Token { Token::Type::Invalid, c, m_file, m_cursor_line, m_cursor_column, m_whitespace_precedes };
1402
1397
  do {
1403
1398
  chars->append_char(c);
1404
1399
  c = next();
@@ -1415,7 +1410,7 @@ Token Lexer::consume_numeric() {
1415
1410
  advance();
1416
1411
  char c = next();
1417
1412
  if (c != '0' && c != '1')
1418
- return Token { Token::Type::Invalid, c, m_file, m_cursor_line, m_cursor_column };
1413
+ return Token { Token::Type::Invalid, c, m_file, m_cursor_line, m_cursor_column, m_whitespace_precedes };
1419
1414
  do {
1420
1415
  chars->append_char(c);
1421
1416
  c = next();
@@ -1441,9 +1436,9 @@ Token Lexer::chars_to_fixnum_or_bignum_token(SharedPtr<String> chars, int base,
1441
1436
  errno = 0;
1442
1437
  auto fixnum = strtoll(chars->c_str() + offset, nullptr, base);
1443
1438
  if (errno != 0 || fixnum > max_fixnum)
1444
- return Token { Token::Type::Bignum, chars, m_file, m_token_line, m_token_column };
1439
+ return Token { Token::Type::Bignum, chars, m_file, m_token_line, m_token_column, m_whitespace_precedes };
1445
1440
  else
1446
- return Token { Token::Type::Fixnum, fixnum, m_file, m_token_line, m_token_column };
1441
+ return Token { Token::Type::Fixnum, fixnum, m_file, m_token_line, m_token_column, m_whitespace_precedes };
1447
1442
  }
1448
1443
 
1449
1444
  Token Lexer::consume_numeric_as_float(SharedPtr<String> chars) {
@@ -1466,7 +1461,7 @@ Token Lexer::consume_numeric_as_float(SharedPtr<String> chars) {
1466
1461
  c = next();
1467
1462
  }
1468
1463
  if (!isdigit(c))
1469
- return Token { Token::Type::Invalid, c, m_file, m_cursor_line, m_cursor_column };
1464
+ return Token { Token::Type::Invalid, c, m_file, m_cursor_line, m_cursor_column, m_whitespace_precedes };
1470
1465
  do {
1471
1466
  chars->append_char(c);
1472
1467
  c = next();
@@ -1475,7 +1470,7 @@ Token Lexer::consume_numeric_as_float(SharedPtr<String> chars) {
1475
1470
  } while (isdigit(c));
1476
1471
  }
1477
1472
  double dbl = atof(chars->c_str());
1478
- return Token { Token::Type::Float, dbl, m_file, m_token_line, m_token_column };
1473
+ return Token { Token::Type::Float, dbl, m_file, m_token_line, m_token_column, m_whitespace_precedes };
1479
1474
  }
1480
1475
 
1481
1476
  Token Lexer::consume_nth_ref() {
@@ -1486,7 +1481,7 @@ Token Lexer::consume_nth_ref() {
1486
1481
  num += c - '0';
1487
1482
  c = next();
1488
1483
  } while (isdigit(c));
1489
- return Token { Token::Type::NthRef, num, m_file, m_token_line, m_token_column };
1484
+ return Token { Token::Type::NthRef, num, m_file, m_token_line, m_token_column, m_whitespace_precedes };
1490
1485
  }
1491
1486
 
1492
1487
  long long Lexer::consume_hex_number(int max_length, bool allow_underscore) {
@@ -1670,7 +1665,7 @@ bool Lexer::token_is_first_on_line() const {
1670
1665
 
1671
1666
  Token Lexer::consume_double_quoted_string(char start_char, char stop_char, Token::Type begin_type, Token::Type end_type) {
1672
1667
  m_nested_lexer = new InterpolatedStringLexer { *this, start_char, stop_char, end_type };
1673
- return Token { begin_type, start_char, m_file, m_token_line, m_token_column };
1668
+ return Token { begin_type, start_char, m_file, m_token_line, m_token_column, m_whitespace_precedes };
1674
1669
  }
1675
1670
 
1676
1671
  Token Lexer::consume_single_quoted_string(char start_char, char stop_char) {
@@ -1697,9 +1692,9 @@ Token Lexer::consume_single_quoted_string(char start_char, char stop_char) {
1697
1692
  advance(); // '
1698
1693
  if (current_char() == ':' && !m_open_ternary) {
1699
1694
  advance(); // :
1700
- return Token { Token::Type::SymbolKey, buf, m_file, m_token_line, m_token_column };
1695
+ return Token { Token::Type::SymbolKey, buf, m_file, m_token_line, m_token_column, m_whitespace_precedes };
1701
1696
  } else {
1702
- return Token { Token::Type::String, buf, m_file, m_token_line, m_token_column };
1697
+ return Token { Token::Type::String, buf, m_file, m_token_line, m_token_column, m_whitespace_precedes };
1703
1698
  }
1704
1699
  }
1705
1700
  } else {
@@ -1707,22 +1702,22 @@ Token Lexer::consume_single_quoted_string(char start_char, char stop_char) {
1707
1702
  }
1708
1703
  c = next();
1709
1704
  }
1710
- return Token { Token::Type::UnterminatedString, start_char, m_file, m_token_line, m_token_column };
1705
+ return Token { Token::Type::UnterminatedString, start_char, m_file, m_token_line, m_token_column, m_whitespace_precedes };
1711
1706
  }
1712
1707
 
1713
1708
  Token Lexer::consume_quoted_array_without_interpolation(char start_char, char stop_char, Token::Type type) {
1714
1709
  m_nested_lexer = new WordArrayLexer { *this, start_char, stop_char, false };
1715
- return Token { type, start_char, m_file, m_token_line, m_token_column };
1710
+ return Token { type, start_char, m_file, m_token_line, m_token_column, m_whitespace_precedes };
1716
1711
  }
1717
1712
 
1718
1713
  Token Lexer::consume_quoted_array_with_interpolation(char start_char, char stop_char, Token::Type type) {
1719
1714
  m_nested_lexer = new WordArrayLexer { *this, start_char, stop_char, true };
1720
- return Token { type, start_char, m_file, m_token_line, m_token_column };
1715
+ return Token { type, start_char, m_file, m_token_line, m_token_column, m_whitespace_precedes };
1721
1716
  }
1722
1717
 
1723
1718
  Token Lexer::consume_regexp(char start_char, char stop_char) {
1724
1719
  m_nested_lexer = new RegexpLexer { *this, start_char, stop_char };
1725
- return Token { Token::Type::InterpolatedRegexpBegin, start_char, m_file, m_token_line, m_token_column };
1720
+ return Token { Token::Type::InterpolatedRegexpBegin, start_char, m_file, m_token_line, m_token_column, m_whitespace_precedes };
1726
1721
  }
1727
1722
 
1728
1723
  SharedPtr<String> Lexer::consume_non_whitespace() {