yarp 0.8.0 → 0.10.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (63) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +48 -1
  3. data/Makefile +5 -1
  4. data/README.md +4 -3
  5. data/config.yml +461 -150
  6. data/docs/configuration.md +1 -0
  7. data/docs/encoding.md +5 -5
  8. data/docs/ruby_api.md +2 -0
  9. data/docs/serialization.md +3 -3
  10. data/docs/testing.md +2 -2
  11. data/ext/yarp/api_node.c +810 -199
  12. data/ext/yarp/extension.c +94 -31
  13. data/ext/yarp/extension.h +2 -2
  14. data/include/yarp/ast.h +653 -150
  15. data/include/yarp/defines.h +2 -1
  16. data/include/yarp/diagnostic.h +3 -3
  17. data/include/yarp/enc/yp_encoding.h +10 -10
  18. data/include/yarp/node.h +10 -0
  19. data/include/yarp/parser.h +19 -19
  20. data/include/yarp/regexp.h +1 -1
  21. data/include/yarp/unescape.h +7 -5
  22. data/include/yarp/util/yp_buffer.h +3 -0
  23. data/include/yarp/util/yp_char.h +16 -16
  24. data/include/yarp/util/yp_constant_pool.h +2 -2
  25. data/include/yarp/util/yp_newline_list.h +7 -4
  26. data/include/yarp/util/yp_string.h +4 -4
  27. data/include/yarp/util/yp_string_list.h +0 -3
  28. data/include/yarp/util/yp_strpbrk.h +1 -1
  29. data/include/yarp/version.h +2 -2
  30. data/include/yarp.h +14 -3
  31. data/lib/yarp/desugar_visitor.rb +204 -0
  32. data/lib/yarp/ffi.rb +27 -1
  33. data/lib/yarp/lex_compat.rb +93 -25
  34. data/lib/yarp/mutation_visitor.rb +683 -0
  35. data/lib/yarp/node.rb +3121 -597
  36. data/lib/yarp/serialize.rb +198 -126
  37. data/lib/yarp.rb +53 -7
  38. data/src/diagnostic.c +1 -1
  39. data/src/enc/yp_big5.c +15 -42
  40. data/src/enc/yp_euc_jp.c +16 -43
  41. data/src/enc/yp_gbk.c +19 -46
  42. data/src/enc/yp_shift_jis.c +16 -43
  43. data/src/enc/yp_tables.c +36 -38
  44. data/src/enc/yp_unicode.c +20 -25
  45. data/src/enc/yp_windows_31j.c +16 -43
  46. data/src/node.c +1444 -836
  47. data/src/prettyprint.c +324 -103
  48. data/src/regexp.c +21 -21
  49. data/src/serialize.c +429 -276
  50. data/src/token_type.c +2 -2
  51. data/src/unescape.c +184 -136
  52. data/src/util/yp_buffer.c +7 -2
  53. data/src/util/yp_char.c +34 -34
  54. data/src/util/yp_constant_pool.c +4 -4
  55. data/src/util/yp_memchr.c +1 -1
  56. data/src/util/yp_newline_list.c +14 -3
  57. data/src/util/yp_string.c +22 -20
  58. data/src/util/yp_string_list.c +0 -6
  59. data/src/util/yp_strncasecmp.c +3 -6
  60. data/src/util/yp_strpbrk.c +8 -8
  61. data/src/yarp.c +1504 -615
  62. data/yarp.gemspec +3 -1
  63. metadata +4 -2
data/src/yarp.c CHANGED
@@ -1,5 +1,4 @@
1
1
  #include "yarp.h"
2
- #include "yarp/version.h"
3
2
 
4
3
  // The YARP version and the serialization format.
5
4
  const char *
@@ -162,14 +161,18 @@ debug_token(yp_token_t * token) {
162
161
 
163
162
  #endif
164
163
 
164
+ /* Macros for min/max. */
165
+ #define MIN(a,b) (((a)<(b))?(a):(b))
166
+ #define MAX(a,b) (((a)>(b))?(a):(b))
167
+
165
168
  /******************************************************************************/
166
169
  /* Lex mode manipulations */
167
170
  /******************************************************************************/
168
171
 
169
172
  // Returns the incrementor character that should be used to increment the
170
173
  // nesting count if one is possible.
171
- static inline char
172
- lex_mode_incrementor(const char start) {
174
+ static inline uint8_t
175
+ lex_mode_incrementor(const uint8_t start) {
173
176
  switch (start) {
174
177
  case '(':
175
178
  case '[':
@@ -183,8 +186,8 @@ lex_mode_incrementor(const char start) {
183
186
 
184
187
  // Returns the matching character that should be used to terminate a list
185
188
  // beginning with the given character.
186
- static inline char
187
- lex_mode_terminator(const char start) {
189
+ static inline uint8_t
190
+ lex_mode_terminator(const uint8_t start) {
188
191
  switch (start) {
189
192
  case '(':
190
193
  return ')';
@@ -222,9 +225,9 @@ lex_mode_push(yp_parser_t *parser, yp_lex_mode_t lex_mode) {
222
225
 
223
226
  // Push on a new list lex mode.
224
227
  static inline bool
225
- lex_mode_push_list(yp_parser_t *parser, bool interpolation, char delimiter) {
226
- char incrementor = lex_mode_incrementor(delimiter);
227
- char terminator = lex_mode_terminator(delimiter);
228
+ lex_mode_push_list(yp_parser_t *parser, bool interpolation, uint8_t delimiter) {
229
+ uint8_t incrementor = lex_mode_incrementor(delimiter);
230
+ uint8_t terminator = lex_mode_terminator(delimiter);
228
231
 
229
232
  yp_lex_mode_t lex_mode = {
230
233
  .mode = YP_LEX_LIST,
@@ -238,7 +241,7 @@ lex_mode_push_list(yp_parser_t *parser, bool interpolation, char delimiter) {
238
241
 
239
242
  // These are the places where we need to split up the content of the list.
240
243
  // We'll use strpbrk to find the first of these characters.
241
- char *breakpoints = lex_mode.as.list.breakpoints;
244
+ uint8_t *breakpoints = lex_mode.as.list.breakpoints;
242
245
  memcpy(breakpoints, "\\ \t\f\r\v\n\0\0\0", sizeof(lex_mode.as.list.breakpoints));
243
246
 
244
247
  // Now we'll add the terminator to the list of breakpoints.
@@ -261,7 +264,7 @@ lex_mode_push_list(yp_parser_t *parser, bool interpolation, char delimiter) {
261
264
 
262
265
  // Push on a new regexp lex mode.
263
266
  static inline bool
264
- lex_mode_push_regexp(yp_parser_t *parser, char incrementor, char terminator) {
267
+ lex_mode_push_regexp(yp_parser_t *parser, uint8_t incrementor, uint8_t terminator) {
265
268
  yp_lex_mode_t lex_mode = {
266
269
  .mode = YP_LEX_REGEXP,
267
270
  .as.regexp = {
@@ -274,7 +277,7 @@ lex_mode_push_regexp(yp_parser_t *parser, char incrementor, char terminator) {
274
277
  // These are the places where we need to split up the content of the
275
278
  // regular expression. We'll use strpbrk to find the first of these
276
279
  // characters.
277
- char *breakpoints = lex_mode.as.regexp.breakpoints;
280
+ uint8_t *breakpoints = lex_mode.as.regexp.breakpoints;
278
281
  memcpy(breakpoints, "\n\\#\0\0", sizeof(lex_mode.as.regexp.breakpoints));
279
282
 
280
283
  // First we'll add the terminator.
@@ -290,7 +293,7 @@ lex_mode_push_regexp(yp_parser_t *parser, char incrementor, char terminator) {
290
293
 
291
294
  // Push on a new string lex mode.
292
295
  static inline bool
293
- lex_mode_push_string(yp_parser_t *parser, bool interpolation, bool label_allowed, char incrementor, char terminator) {
296
+ lex_mode_push_string(yp_parser_t *parser, bool interpolation, bool label_allowed, uint8_t incrementor, uint8_t terminator) {
294
297
  yp_lex_mode_t lex_mode = {
295
298
  .mode = YP_LEX_STRING,
296
299
  .as.string = {
@@ -304,7 +307,7 @@ lex_mode_push_string(yp_parser_t *parser, bool interpolation, bool label_allowed
304
307
 
305
308
  // These are the places where we need to split up the content of the
306
309
  // string. We'll use strpbrk to find the first of these characters.
307
- char *breakpoints = lex_mode.as.string.breakpoints;
310
+ uint8_t *breakpoints = lex_mode.as.string.breakpoints;
308
311
  memcpy(breakpoints, "\n\\\0\0\0", sizeof(lex_mode.as.string.breakpoints));
309
312
 
310
313
  // Now add in the terminator.
@@ -362,7 +365,7 @@ lex_state_ignored_p(yp_parser_t *parser) {
362
365
 
363
366
  if (ignored) {
364
367
  return YP_IGNORED_NEWLINE_ALL;
365
- } else if (parser->lex_state == (YP_LEX_STATE_ARG | YP_LEX_STATE_LABELED)) {
368
+ } else if ((parser->lex_state & ~((unsigned int) YP_LEX_STATE_LABEL)) == (YP_LEX_STATE_ARG | YP_LEX_STATE_LABELED)) {
366
369
  return YP_IGNORED_NEWLINE_PATTERN;
367
370
  } else {
368
371
  return YP_IGNORED_NEWLINE_NONE;
@@ -381,6 +384,9 @@ lex_state_arg_p(yp_parser_t *parser) {
381
384
 
382
385
  static inline bool
383
386
  lex_state_spcarg_p(yp_parser_t *parser, bool space_seen) {
387
+ if (parser->current.end >= parser->end) {
388
+ return false;
389
+ }
384
390
  return lex_state_arg_p(parser) && space_seen && !yp_char_is_whitespace(*parser->current.end);
385
391
  }
386
392
 
@@ -421,7 +427,7 @@ debug_lex_state_set(yp_parser_t *parser, yp_lex_state_t state, char const * call
421
427
 
422
428
  // Retrieve the constant pool id for the given location.
423
429
  static inline yp_constant_id_t
424
- yp_parser_constant_id_location(yp_parser_t *parser, const char *start, const char *end) {
430
+ yp_parser_constant_id_location(yp_parser_t *parser, const uint8_t *start, const uint8_t *end) {
425
431
  return yp_constant_pool_insert(&parser->constant_pool, start, (size_t) (end - start));
426
432
  }
427
433
 
@@ -536,17 +542,116 @@ yp_arguments_validate(yp_parser_t *parser, yp_arguments_t *arguments) {
536
542
  }
537
543
  }
538
544
 
545
+ /******************************************************************************/
546
+ /* Scope node functions */
547
+ /******************************************************************************/
548
+
549
+ // Generate a scope node from the given node.
550
+ void
551
+ yp_scope_node_init(yp_node_t *node, yp_scope_node_t *scope) {
552
+ scope->base.type = YP_NODE_SCOPE_NODE;
553
+ scope->base.location.start = node->location.start;
554
+ scope->base.location.end = node->location.end;
555
+
556
+ scope->parameters = NULL;
557
+ scope->body = NULL;
558
+ yp_constant_id_list_init(&scope->locals);
559
+
560
+ switch (YP_NODE_TYPE(node)) {
561
+ case YP_NODE_BLOCK_NODE: {
562
+ yp_block_node_t *cast = (yp_block_node_t *) node;
563
+ if (cast->parameters) scope->parameters = cast->parameters->parameters;
564
+ scope->body = cast->body;
565
+ scope->locals = cast->locals;
566
+ break;
567
+ }
568
+ case YP_NODE_CLASS_NODE: {
569
+ yp_class_node_t *cast = (yp_class_node_t *) node;
570
+ scope->body = cast->body;
571
+ scope->locals = cast->locals;
572
+ break;
573
+ }
574
+ case YP_NODE_DEF_NODE: {
575
+ yp_def_node_t *cast = (yp_def_node_t *) node;
576
+ scope->parameters = cast->parameters;
577
+ scope->body = cast->body;
578
+ scope->locals = cast->locals;
579
+ break;
580
+ }
581
+ case YP_NODE_LAMBDA_NODE: {
582
+ yp_lambda_node_t *cast = (yp_lambda_node_t *) node;
583
+ if (cast->parameters) scope->parameters = cast->parameters->parameters;
584
+ scope->body = cast->body;
585
+ scope->locals = cast->locals;
586
+ break;
587
+ }
588
+ case YP_NODE_MODULE_NODE: {
589
+ yp_module_node_t *cast = (yp_module_node_t *) node;
590
+ scope->body = cast->body;
591
+ scope->locals = cast->locals;
592
+ break;
593
+ }
594
+ case YP_NODE_PROGRAM_NODE: {
595
+ yp_program_node_t *cast = (yp_program_node_t *) node;
596
+ scope->body = (yp_node_t *) cast->statements;
597
+ scope->locals = cast->locals;
598
+ break;
599
+ }
600
+ case YP_NODE_SINGLETON_CLASS_NODE: {
601
+ yp_singleton_class_node_t *cast = (yp_singleton_class_node_t *) node;
602
+ scope->body = cast->body;
603
+ scope->locals = cast->locals;
604
+ break;
605
+ }
606
+ default:
607
+ assert(false && "unreachable");
608
+ break;
609
+ }
610
+ }
611
+
539
612
  /******************************************************************************/
540
613
  /* Node creation functions */
541
614
  /******************************************************************************/
542
615
 
616
+ // Parse the decimal number represented by the range of bytes. returns
617
+ // UINT32_MAX if the number fails to parse. This function assumes that the range
618
+ // of bytes has already been validated to contain only decimal digits.
619
+ static uint32_t
620
+ parse_decimal_number(yp_parser_t *parser, const uint8_t *start, const uint8_t *end) {
621
+ ptrdiff_t diff = end - start;
622
+ assert(diff > 0 && ((unsigned long) diff < SIZE_MAX));
623
+ size_t length = (size_t) diff;
624
+
625
+ char *digits = calloc(length + 1, sizeof(char));
626
+ memcpy(digits, start, length);
627
+ digits[length] = '\0';
628
+
629
+ char *endptr;
630
+ errno = 0;
631
+ unsigned long value = strtoul(digits, &endptr, 10);
632
+
633
+ if ((digits == endptr) || (*endptr != '\0') || (errno == ERANGE)) {
634
+ yp_diagnostic_list_append(&parser->error_list, start, end, "invalid decimal number");
635
+ value = UINT32_MAX;
636
+ }
637
+
638
+ free(digits);
639
+
640
+ if (value > UINT32_MAX) {
641
+ yp_diagnostic_list_append(&parser->error_list, start, end, "invalid decimal number");
642
+ value = UINT32_MAX;
643
+ }
644
+
645
+ return (uint32_t) value;
646
+ }
647
+
543
648
  // Parse out the options for a regular expression.
544
649
  static inline yp_node_flags_t
545
650
  yp_regular_expression_flags_create(const yp_token_t *closing) {
546
651
  yp_node_flags_t flags = 0;
547
652
 
548
653
  if (closing->type == YP_TOKEN_REGEXP_END) {
549
- for (const char *flag = closing->start + 1; flag < closing->end; flag++) {
654
+ for (const uint8_t *flag = closing->start + 1; flag < closing->end; flag++) {
550
655
  switch (*flag) {
551
656
  case 'i': flags |= YP_REGULAR_EXPRESSION_FLAGS_IGNORE_CASE; break;
552
657
  case 'm': flags |= YP_REGULAR_EXPRESSION_FLAGS_MULTI_LINE; break;
@@ -588,7 +693,7 @@ yp_alloc_node(YP_ATTRIBUTE_UNUSED yp_parser_t *parser, size_t size) {
588
693
 
589
694
  // Allocate a new MissingNode node.
590
695
  static yp_missing_node_t *
591
- yp_missing_node_create(yp_parser_t *parser, const char *start, const char *end) {
696
+ yp_missing_node_create(yp_parser_t *parser, const uint8_t *start, const uint8_t *end) {
592
697
  yp_missing_node_t *node = YP_ALLOC_NODE(parser, yp_missing_node_t);
593
698
  *node = (yp_missing_node_t) {{ .type = YP_NODE_MISSING_NODE, .location = { .start = start, .end = end } }};
594
699
  return node;
@@ -658,27 +763,6 @@ yp_and_node_create(yp_parser_t *parser, yp_node_t *left, const yp_token_t *opera
658
763
  return node;
659
764
  }
660
765
 
661
- // Allocate and initialize a new AndWriteNode.
662
- static yp_and_write_node_t *
663
- yp_and_write_node_create(yp_parser_t *parser, yp_node_t *target, const yp_token_t *operator, yp_node_t *value) {
664
- yp_and_write_node_t *node = YP_ALLOC_NODE(parser, yp_and_write_node_t);
665
-
666
- *node = (yp_and_write_node_t) {
667
- {
668
- .type = YP_NODE_AND_WRITE_NODE,
669
- .location = {
670
- .start = target->location.start,
671
- .end = value->location.end
672
- },
673
- },
674
- .target = target,
675
- .operator_loc = YP_LOCATION_TOKEN_VALUE(operator),
676
- .value = value
677
- };
678
-
679
- return node;
680
- }
681
-
682
766
  // Allocate an initialize a new arguments node.
683
767
  static yp_arguments_node_t *
684
768
  yp_arguments_node_create(yp_parser_t *parser) {
@@ -878,7 +962,7 @@ yp_array_pattern_node_requireds_append(yp_array_pattern_node_t *node, yp_node_t
878
962
  static yp_assoc_node_t *
879
963
  yp_assoc_node_create(yp_parser_t *parser, yp_node_t *key, const yp_token_t *operator, yp_node_t *value) {
880
964
  yp_assoc_node_t *node = YP_ALLOC_NODE(parser, yp_assoc_node_t);
881
- const char *end;
965
+ const uint8_t *end;
882
966
 
883
967
  if (value != NULL) {
884
968
  end = value->location.end;
@@ -1062,7 +1146,7 @@ static yp_block_parameters_node_t *
1062
1146
  yp_block_parameters_node_create(yp_parser_t *parser, yp_parameters_node_t *parameters, const yp_token_t *opening) {
1063
1147
  yp_block_parameters_node_t *node = YP_ALLOC_NODE(parser, yp_block_parameters_node_t);
1064
1148
 
1065
- const char *start;
1149
+ const uint8_t *start;
1066
1150
  if (opening->type != YP_TOKEN_NOT_PROVIDED) {
1067
1151
  start = opening->start;
1068
1152
  } else if (parameters != NULL) {
@@ -1071,7 +1155,7 @@ yp_block_parameters_node_create(yp_parser_t *parser, yp_parameters_node_t *param
1071
1155
  start = NULL;
1072
1156
  }
1073
1157
 
1074
- const char *end;
1158
+ const uint8_t *end;
1075
1159
  if (parameters != NULL) {
1076
1160
  end = parameters->base.location.end;
1077
1161
  } else if (opening->type != YP_TOKEN_NOT_PROVIDED) {
@@ -1151,7 +1235,7 @@ yp_call_node_create(yp_parser_t *parser) {
1151
1235
  },
1152
1236
  .receiver = NULL,
1153
1237
  .operator_loc = YP_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE,
1154
- .message_loc = YP_LOCATION_NULL_VALUE(parser),
1238
+ .message_loc = YP_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE,
1155
1239
  .opening_loc = YP_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE,
1156
1240
  .arguments = NULL,
1157
1241
  .closing_loc = YP_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE,
@@ -1192,8 +1276,8 @@ static yp_call_node_t *
1192
1276
  yp_call_node_binary_create(yp_parser_t *parser, yp_node_t *receiver, yp_token_t *operator, yp_node_t *argument) {
1193
1277
  yp_call_node_t *node = yp_call_node_create(parser);
1194
1278
 
1195
- node->base.location.start = receiver->location.start;
1196
- node->base.location.end = argument->location.end;
1279
+ node->base.location.start = MIN(receiver->location.start, argument->location.start);
1280
+ node->base.location.end = MAX(receiver->location.end, argument->location.end);
1197
1281
 
1198
1282
  node->receiver = receiver;
1199
1283
  node->message_loc = YP_OPTIONAL_LOCATION_TOKEN_VALUE(operator);
@@ -1389,7 +1473,7 @@ yp_call_operator_write_node_create(yp_parser_t *parser, yp_call_node_t *target,
1389
1473
  .target = target,
1390
1474
  .operator_loc = YP_LOCATION_TOKEN_VALUE(operator),
1391
1475
  .value = value,
1392
- .operator_id = yp_parser_constant_id_location(parser, operator->start, operator->end - 1)
1476
+ .operator = yp_parser_constant_id_location(parser, operator->start, operator->end - 1)
1393
1477
  };
1394
1478
 
1395
1479
  return node;
@@ -1486,7 +1570,7 @@ yp_case_node_end_keyword_loc_set(yp_case_node_t *node, const yp_token_t *end_key
1486
1570
 
1487
1571
  // Allocate a new ClassNode node.
1488
1572
  static yp_class_node_t *
1489
- yp_class_node_create(yp_parser_t *parser, yp_constant_id_list_t *locals, const yp_token_t *class_keyword, yp_node_t *constant_path, const yp_token_t *inheritance_operator, yp_node_t *superclass, yp_node_t *body, const yp_token_t *end_keyword) {
1573
+ yp_class_node_create(yp_parser_t *parser, yp_constant_id_list_t *locals, const yp_token_t *class_keyword, yp_node_t *constant_path, const yp_token_t *name, const yp_token_t *inheritance_operator, yp_node_t *superclass, yp_node_t *body, const yp_token_t *end_keyword) {
1490
1574
  yp_class_node_t *node = YP_ALLOC_NODE(parser, yp_class_node_t);
1491
1575
 
1492
1576
  *node = (yp_class_node_t) {
@@ -1500,7 +1584,78 @@ yp_class_node_create(yp_parser_t *parser, yp_constant_id_list_t *locals, const y
1500
1584
  .inheritance_operator_loc = YP_OPTIONAL_LOCATION_TOKEN_VALUE(inheritance_operator),
1501
1585
  .superclass = superclass,
1502
1586
  .body = body,
1503
- .end_keyword_loc = YP_LOCATION_TOKEN_VALUE(end_keyword)
1587
+ .end_keyword_loc = YP_LOCATION_TOKEN_VALUE(end_keyword),
1588
+ .name = YP_EMPTY_STRING
1589
+ };
1590
+
1591
+ yp_string_shared_init(&node->name, name->start, name->end);
1592
+ return node;
1593
+ }
1594
+
1595
+ // Allocate and initialize a new ClassVariableAndWriteNode node.
1596
+ static yp_class_variable_and_write_node_t *
1597
+ yp_class_variable_and_write_node_create(yp_parser_t *parser, yp_class_variable_read_node_t *target, const yp_token_t *operator, yp_node_t *value) {
1598
+ assert(operator->type == YP_TOKEN_AMPERSAND_AMPERSAND_EQUAL);
1599
+ yp_class_variable_and_write_node_t *node = YP_ALLOC_NODE(parser, yp_class_variable_and_write_node_t);
1600
+
1601
+ *node = (yp_class_variable_and_write_node_t) {
1602
+ {
1603
+ .type = YP_NODE_CLASS_VARIABLE_AND_WRITE_NODE,
1604
+ .location = {
1605
+ .start = target->base.location.start,
1606
+ .end = value->location.end
1607
+ }
1608
+ },
1609
+ .name = target->name,
1610
+ .name_loc = target->base.location,
1611
+ .operator_loc = YP_LOCATION_TOKEN_VALUE(operator),
1612
+ .value = value
1613
+ };
1614
+
1615
+ return node;
1616
+ }
1617
+
1618
+ // Allocate and initialize a new ClassVariableOperatorWriteNode node.
1619
+ static yp_class_variable_operator_write_node_t *
1620
+ yp_class_variable_operator_write_node_create(yp_parser_t *parser, yp_class_variable_read_node_t *target, const yp_token_t *operator, yp_node_t *value) {
1621
+ yp_class_variable_operator_write_node_t *node = YP_ALLOC_NODE(parser, yp_class_variable_operator_write_node_t);
1622
+
1623
+ *node = (yp_class_variable_operator_write_node_t) {
1624
+ {
1625
+ .type = YP_NODE_CLASS_VARIABLE_OPERATOR_WRITE_NODE,
1626
+ .location = {
1627
+ .start = target->base.location.start,
1628
+ .end = value->location.end
1629
+ }
1630
+ },
1631
+ .name = target->name,
1632
+ .name_loc = target->base.location,
1633
+ .operator_loc = YP_LOCATION_TOKEN_VALUE(operator),
1634
+ .value = value,
1635
+ .operator = yp_parser_constant_id_location(parser, operator->start, operator->end - 1)
1636
+ };
1637
+
1638
+ return node;
1639
+ }
1640
+
1641
+ // Allocate and initialize a new ClassVariableOrWriteNode node.
1642
+ static yp_class_variable_or_write_node_t *
1643
+ yp_class_variable_or_write_node_create(yp_parser_t *parser, yp_class_variable_read_node_t *target, const yp_token_t *operator, yp_node_t *value) {
1644
+ assert(operator->type == YP_TOKEN_PIPE_PIPE_EQUAL);
1645
+ yp_class_variable_or_write_node_t *node = YP_ALLOC_NODE(parser, yp_class_variable_or_write_node_t);
1646
+
1647
+ *node = (yp_class_variable_or_write_node_t) {
1648
+ {
1649
+ .type = YP_NODE_CLASS_VARIABLE_OR_WRITE_NODE,
1650
+ .location = {
1651
+ .start = target->base.location.start,
1652
+ .end = value->location.end
1653
+ }
1654
+ },
1655
+ .name = target->name,
1656
+ .name_loc = target->base.location,
1657
+ .operator_loc = YP_LOCATION_TOKEN_VALUE(operator),
1658
+ .value = value
1504
1659
  };
1505
1660
 
1506
1661
  return node;
@@ -1511,13 +1666,21 @@ static yp_class_variable_read_node_t *
1511
1666
  yp_class_variable_read_node_create(yp_parser_t *parser, const yp_token_t *token) {
1512
1667
  assert(token->type == YP_TOKEN_CLASS_VARIABLE);
1513
1668
  yp_class_variable_read_node_t *node = YP_ALLOC_NODE(parser, yp_class_variable_read_node_t);
1514
- *node = (yp_class_variable_read_node_t) {{ .type = YP_NODE_CLASS_VARIABLE_READ_NODE, .location = YP_LOCATION_TOKEN_VALUE(token) }};
1669
+
1670
+ *node = (yp_class_variable_read_node_t) {
1671
+ {
1672
+ .type = YP_NODE_CLASS_VARIABLE_READ_NODE,
1673
+ .location = YP_LOCATION_TOKEN_VALUE(token)
1674
+ },
1675
+ .name = yp_parser_constant_id_location(parser, token->start, token->end)
1676
+ };
1677
+
1515
1678
  return node;
1516
1679
  }
1517
1680
 
1518
1681
  // Initialize a new ClassVariableWriteNode node from a ClassVariableRead node.
1519
1682
  static yp_class_variable_write_node_t *
1520
- yp_class_variable_read_node_to_class_variable_write_node(yp_parser_t *parser, yp_class_variable_read_node_t *read_node, yp_token_t *operator, yp_node_t *value) {
1683
+ yp_class_variable_write_node_create(yp_parser_t *parser, yp_class_variable_read_node_t *read_node, yp_token_t *operator, yp_node_t *value) {
1521
1684
  yp_class_variable_write_node_t *node = YP_ALLOC_NODE(parser, yp_class_variable_write_node_t);
1522
1685
 
1523
1686
  *node = (yp_class_variable_write_node_t) {
@@ -1525,10 +1688,11 @@ yp_class_variable_read_node_to_class_variable_write_node(yp_parser_t *parser, yp
1525
1688
  .type = YP_NODE_CLASS_VARIABLE_WRITE_NODE,
1526
1689
  .location = {
1527
1690
  .start = read_node->base.location.start,
1528
- .end = value != NULL ? value->location.end : read_node->base.location.end
1691
+ .end = value->location.end
1529
1692
  },
1530
1693
  },
1531
- .name_loc = YP_LOCATION_NODE_VALUE((yp_node_t *)read_node),
1694
+ .name = read_node->name,
1695
+ .name_loc = YP_LOCATION_NODE_VALUE((yp_node_t *) read_node),
1532
1696
  .operator_loc = YP_OPTIONAL_LOCATION_TOKEN_VALUE(operator),
1533
1697
  .value = value
1534
1698
  };
@@ -1536,6 +1700,72 @@ yp_class_variable_read_node_to_class_variable_write_node(yp_parser_t *parser, yp
1536
1700
  return node;
1537
1701
  }
1538
1702
 
1703
+ // Allocate and initialize a new ConstantPathAndWriteNode node.
1704
+ static yp_constant_path_and_write_node_t *
1705
+ yp_constant_path_and_write_node_create(yp_parser_t *parser, yp_constant_path_node_t *target, const yp_token_t *operator, yp_node_t *value) {
1706
+ assert(operator->type == YP_TOKEN_AMPERSAND_AMPERSAND_EQUAL);
1707
+ yp_constant_path_and_write_node_t *node = YP_ALLOC_NODE(parser, yp_constant_path_and_write_node_t);
1708
+
1709
+ *node = (yp_constant_path_and_write_node_t) {
1710
+ {
1711
+ .type = YP_NODE_CONSTANT_PATH_AND_WRITE_NODE,
1712
+ .location = {
1713
+ .start = target->base.location.start,
1714
+ .end = value->location.end
1715
+ }
1716
+ },
1717
+ .target = target,
1718
+ .operator_loc = YP_LOCATION_TOKEN_VALUE(operator),
1719
+ .value = value
1720
+ };
1721
+
1722
+ return node;
1723
+ }
1724
+
1725
+ // Allocate and initialize a new ConstantPathOperatorWriteNode node.
1726
+ static yp_constant_path_operator_write_node_t *
1727
+ yp_constant_path_operator_write_node_create(yp_parser_t *parser, yp_constant_path_node_t *target, const yp_token_t *operator, yp_node_t *value) {
1728
+ yp_constant_path_operator_write_node_t *node = YP_ALLOC_NODE(parser, yp_constant_path_operator_write_node_t);
1729
+
1730
+ *node = (yp_constant_path_operator_write_node_t) {
1731
+ {
1732
+ .type = YP_NODE_CONSTANT_PATH_OPERATOR_WRITE_NODE,
1733
+ .location = {
1734
+ .start = target->base.location.start,
1735
+ .end = value->location.end
1736
+ }
1737
+ },
1738
+ .target = target,
1739
+ .operator_loc = YP_LOCATION_TOKEN_VALUE(operator),
1740
+ .value = value,
1741
+ .operator = yp_parser_constant_id_location(parser, operator->start, operator->end - 1)
1742
+ };
1743
+
1744
+ return node;
1745
+ }
1746
+
1747
+ // Allocate and initialize a new ConstantPathOrWriteNode node.
1748
+ static yp_constant_path_or_write_node_t *
1749
+ yp_constant_path_or_write_node_create(yp_parser_t *parser, yp_constant_path_node_t *target, const yp_token_t *operator, yp_node_t *value) {
1750
+ assert(operator->type == YP_TOKEN_PIPE_PIPE_EQUAL);
1751
+ yp_constant_path_or_write_node_t *node = YP_ALLOC_NODE(parser, yp_constant_path_or_write_node_t);
1752
+
1753
+ *node = (yp_constant_path_or_write_node_t) {
1754
+ {
1755
+ .type = YP_NODE_CONSTANT_PATH_OR_WRITE_NODE,
1756
+ .location = {
1757
+ .start = target->base.location.start,
1758
+ .end = value->location.end
1759
+ }
1760
+ },
1761
+ .target = target,
1762
+ .operator_loc = YP_LOCATION_TOKEN_VALUE(operator),
1763
+ .value = value
1764
+ };
1765
+
1766
+ return node;
1767
+ }
1768
+
1539
1769
  // Allocate and initialize a new ConstantPathNode node.
1540
1770
  static yp_constant_path_node_t *
1541
1771
  yp_constant_path_node_create(yp_parser_t *parser, yp_node_t *parent, const yp_token_t *delimiter, yp_node_t *child) {
@@ -1567,7 +1797,7 @@ yp_constant_path_write_node_create(yp_parser_t *parser, yp_constant_path_node_t
1567
1797
  .type = YP_NODE_CONSTANT_PATH_WRITE_NODE,
1568
1798
  .location = {
1569
1799
  .start = target->base.location.start,
1570
- .end = (value == NULL ? target->base.location.end : value->location.end)
1800
+ .end = value->location.end
1571
1801
  },
1572
1802
  },
1573
1803
  .target = target,
@@ -1578,6 +1808,74 @@ yp_constant_path_write_node_create(yp_parser_t *parser, yp_constant_path_node_t
1578
1808
  return node;
1579
1809
  }
1580
1810
 
1811
+ // Allocate and initialize a new ConstantAndWriteNode node.
1812
+ static yp_constant_and_write_node_t *
1813
+ yp_constant_and_write_node_create(yp_parser_t *parser, yp_node_t *target, const yp_token_t *operator, yp_node_t *value) {
1814
+ assert(YP_NODE_TYPE_P(target, YP_NODE_CONSTANT_READ_NODE));
1815
+ assert(operator->type == YP_TOKEN_AMPERSAND_AMPERSAND_EQUAL);
1816
+ yp_constant_and_write_node_t *node = YP_ALLOC_NODE(parser, yp_constant_and_write_node_t);
1817
+
1818
+ *node = (yp_constant_and_write_node_t) {
1819
+ {
1820
+ .type = YP_NODE_CONSTANT_AND_WRITE_NODE,
1821
+ .location = {
1822
+ .start = target->location.start,
1823
+ .end = value->location.end
1824
+ }
1825
+ },
1826
+ .name_loc = target->location,
1827
+ .operator_loc = YP_LOCATION_TOKEN_VALUE(operator),
1828
+ .value = value
1829
+ };
1830
+
1831
+ return node;
1832
+ }
1833
+
1834
+ // Allocate and initialize a new ConstantOperatorWriteNode node.
1835
+ static yp_constant_operator_write_node_t *
1836
+ yp_constant_operator_write_node_create(yp_parser_t *parser, yp_node_t *target, const yp_token_t *operator, yp_node_t *value) {
1837
+ yp_constant_operator_write_node_t *node = YP_ALLOC_NODE(parser, yp_constant_operator_write_node_t);
1838
+
1839
+ *node = (yp_constant_operator_write_node_t) {
1840
+ {
1841
+ .type = YP_NODE_CONSTANT_OPERATOR_WRITE_NODE,
1842
+ .location = {
1843
+ .start = target->location.start,
1844
+ .end = value->location.end
1845
+ }
1846
+ },
1847
+ .name_loc = target->location,
1848
+ .operator_loc = YP_LOCATION_TOKEN_VALUE(operator),
1849
+ .value = value,
1850
+ .operator = yp_parser_constant_id_location(parser, operator->start, operator->end - 1)
1851
+ };
1852
+
1853
+ return node;
1854
+ }
1855
+
1856
+ // Allocate and initialize a new ConstantOrWriteNode node.
1857
+ static yp_constant_or_write_node_t *
1858
+ yp_constant_or_write_node_create(yp_parser_t *parser, yp_node_t *target, const yp_token_t *operator, yp_node_t *value) {
1859
+ assert(YP_NODE_TYPE_P(target, YP_NODE_CONSTANT_READ_NODE));
1860
+ assert(operator->type == YP_TOKEN_PIPE_PIPE_EQUAL);
1861
+ yp_constant_or_write_node_t *node = YP_ALLOC_NODE(parser, yp_constant_or_write_node_t);
1862
+
1863
+ *node = (yp_constant_or_write_node_t) {
1864
+ {
1865
+ .type = YP_NODE_CONSTANT_OR_WRITE_NODE,
1866
+ .location = {
1867
+ .start = target->location.start,
1868
+ .end = value->location.end
1869
+ }
1870
+ },
1871
+ .name_loc = target->location,
1872
+ .operator_loc = YP_LOCATION_TOKEN_VALUE(operator),
1873
+ .value = value
1874
+ };
1875
+
1876
+ return node;
1877
+ }
1878
+
1581
1879
  // Allocate and initialize a new ConstantReadNode node.
1582
1880
  static yp_constant_read_node_t *
1583
1881
  yp_constant_read_node_create(yp_parser_t *parser, const yp_token_t *name) {
@@ -1598,7 +1896,7 @@ yp_constant_write_node_create(yp_parser_t *parser, yp_location_t *name_loc, cons
1598
1896
  .type = YP_NODE_CONSTANT_WRITE_NODE,
1599
1897
  .location = {
1600
1898
  .start = name_loc->start,
1601
- .end = value != NULL ? value->location.end : name_loc->end
1899
+ .end = value->location.end
1602
1900
  },
1603
1901
  },
1604
1902
  .name_loc = *name_loc,
@@ -1626,7 +1924,7 @@ yp_def_node_create(
1626
1924
  const yp_token_t *end_keyword
1627
1925
  ) {
1628
1926
  yp_def_node_t *node = YP_ALLOC_NODE(parser, yp_def_node_t);
1629
- const char *end;
1927
+ const uint8_t *end;
1630
1928
 
1631
1929
  if (end_keyword->type == YP_TOKEN_NOT_PROVIDED) {
1632
1930
  end = body->location.end;
@@ -1681,7 +1979,7 @@ yp_defined_node_create(yp_parser_t *parser, const yp_token_t *lparen, yp_node_t
1681
1979
  static yp_else_node_t *
1682
1980
  yp_else_node_create(yp_parser_t *parser, const yp_token_t *else_keyword, yp_statements_node_t *statements, const yp_token_t *end_keyword) {
1683
1981
  yp_else_node_t *node = YP_ALLOC_NODE(parser, yp_else_node_t);
1684
- const char *end = NULL;
1982
+ const uint8_t *end = NULL;
1685
1983
  if ((end_keyword->type == YP_TOKEN_NOT_PROVIDED) && (statements != NULL)) {
1686
1984
  end = statements->base.location.end;
1687
1985
  } else {
@@ -2012,6 +2310,74 @@ yp_hash_pattern_node_node_list_create(yp_parser_t *parser, yp_node_list_t *assoc
2012
2310
  return node;
2013
2311
  }
2014
2312
 
2313
+ // Allocate and initialize a new GlobalVariableAndWriteNode node.
2314
+ static yp_global_variable_and_write_node_t *
2315
+ yp_global_variable_and_write_node_create(yp_parser_t *parser, yp_node_t *target, const yp_token_t *operator, yp_node_t *value) {
2316
+ assert(YP_NODE_TYPE_P(target, YP_NODE_GLOBAL_VARIABLE_READ_NODE) || YP_NODE_TYPE_P(target, YP_NODE_BACK_REFERENCE_READ_NODE) || YP_NODE_TYPE_P(target, YP_NODE_NUMBERED_REFERENCE_READ_NODE));
2317
+ assert(operator->type == YP_TOKEN_AMPERSAND_AMPERSAND_EQUAL);
2318
+ yp_global_variable_and_write_node_t *node = YP_ALLOC_NODE(parser, yp_global_variable_and_write_node_t);
2319
+
2320
+ *node = (yp_global_variable_and_write_node_t) {
2321
+ {
2322
+ .type = YP_NODE_GLOBAL_VARIABLE_AND_WRITE_NODE,
2323
+ .location = {
2324
+ .start = target->location.start,
2325
+ .end = value->location.end
2326
+ }
2327
+ },
2328
+ .name_loc = target->location,
2329
+ .operator_loc = YP_LOCATION_TOKEN_VALUE(operator),
2330
+ .value = value
2331
+ };
2332
+
2333
+ return node;
2334
+ }
2335
+
2336
+ // Allocate and initialize a new GlobalVariableOperatorWriteNode node.
2337
+ static yp_global_variable_operator_write_node_t *
2338
+ yp_global_variable_operator_write_node_create(yp_parser_t *parser, yp_node_t *target, const yp_token_t *operator, yp_node_t *value) {
2339
+ yp_global_variable_operator_write_node_t *node = YP_ALLOC_NODE(parser, yp_global_variable_operator_write_node_t);
2340
+
2341
+ *node = (yp_global_variable_operator_write_node_t) {
2342
+ {
2343
+ .type = YP_NODE_GLOBAL_VARIABLE_OPERATOR_WRITE_NODE,
2344
+ .location = {
2345
+ .start = target->location.start,
2346
+ .end = value->location.end
2347
+ }
2348
+ },
2349
+ .name_loc = target->location,
2350
+ .operator_loc = YP_LOCATION_TOKEN_VALUE(operator),
2351
+ .value = value,
2352
+ .operator = yp_parser_constant_id_location(parser, operator->start, operator->end - 1)
2353
+ };
2354
+
2355
+ return node;
2356
+ }
2357
+
2358
+ // Allocate and initialize a new GlobalVariableOrWriteNode node.
2359
+ static yp_global_variable_or_write_node_t *
2360
+ yp_global_variable_or_write_node_create(yp_parser_t *parser, yp_node_t *target, const yp_token_t *operator, yp_node_t *value) {
2361
+ assert(YP_NODE_TYPE_P(target, YP_NODE_GLOBAL_VARIABLE_READ_NODE) || YP_NODE_TYPE_P(target, YP_NODE_BACK_REFERENCE_READ_NODE) || YP_NODE_TYPE_P(target, YP_NODE_NUMBERED_REFERENCE_READ_NODE));
2362
+ assert(operator->type == YP_TOKEN_PIPE_PIPE_EQUAL);
2363
+ yp_global_variable_or_write_node_t *node = YP_ALLOC_NODE(parser, yp_global_variable_or_write_node_t);
2364
+
2365
+ *node = (yp_global_variable_or_write_node_t) {
2366
+ {
2367
+ .type = YP_NODE_GLOBAL_VARIABLE_OR_WRITE_NODE,
2368
+ .location = {
2369
+ .start = target->location.start,
2370
+ .end = value->location.end
2371
+ }
2372
+ },
2373
+ .name_loc = target->location,
2374
+ .operator_loc = YP_LOCATION_TOKEN_VALUE(operator),
2375
+ .value = value
2376
+ };
2377
+
2378
+ return node;
2379
+ }
2380
+
2015
2381
  // Allocate a new GlobalVariableReadNode node.
2016
2382
  static yp_global_variable_read_node_t *
2017
2383
  yp_global_variable_read_node_create(yp_parser_t *parser, const yp_token_t *name) {
@@ -2037,7 +2403,7 @@ yp_global_variable_write_node_create(yp_parser_t *parser, const yp_location_t *n
2037
2403
  .type = YP_NODE_GLOBAL_VARIABLE_WRITE_NODE,
2038
2404
  .location = {
2039
2405
  .start = name_loc->start,
2040
- .end = (value == NULL ? name_loc->end : value->location.end)
2406
+ .end = value->location.end
2041
2407
  },
2042
2408
  },
2043
2409
  .name_loc = *name_loc,
@@ -2093,7 +2459,7 @@ yp_if_node_create(yp_parser_t *parser,
2093
2459
  yp_flip_flop(predicate);
2094
2460
  yp_if_node_t *node = YP_ALLOC_NODE(parser, yp_if_node_t);
2095
2461
 
2096
- const char *end;
2462
+ const uint8_t *end;
2097
2463
  if (end_keyword->type != YP_TOKEN_NOT_PROVIDED) {
2098
2464
  end = end_keyword->end;
2099
2465
  } else if (consequent != NULL) {
@@ -2276,7 +2642,7 @@ static yp_in_node_t *
2276
2642
  yp_in_node_create(yp_parser_t *parser, yp_node_t *pattern, yp_statements_node_t *statements, const yp_token_t *in_keyword, const yp_token_t *then_keyword) {
2277
2643
  yp_in_node_t *node = YP_ALLOC_NODE(parser, yp_in_node_t);
2278
2644
 
2279
- const char *end;
2645
+ const uint8_t *end;
2280
2646
  if (statements != NULL) {
2281
2647
  end = statements->base.location.end;
2282
2648
  } else if (then_keyword->type != YP_TOKEN_NOT_PROVIDED) {
@@ -2302,15 +2668,88 @@ yp_in_node_create(yp_parser_t *parser, yp_node_t *pattern, yp_statements_node_t
2302
2668
  return node;
2303
2669
  }
2304
2670
 
2671
+ // Allocate and initialize a new InstanceVariableAndWriteNode node.
2672
+ static yp_instance_variable_and_write_node_t *
2673
+ yp_instance_variable_and_write_node_create(yp_parser_t *parser, yp_instance_variable_read_node_t *target, const yp_token_t *operator, yp_node_t *value) {
2674
+ assert(operator->type == YP_TOKEN_AMPERSAND_AMPERSAND_EQUAL);
2675
+ yp_instance_variable_and_write_node_t *node = YP_ALLOC_NODE(parser, yp_instance_variable_and_write_node_t);
2676
+
2677
+ *node = (yp_instance_variable_and_write_node_t) {
2678
+ {
2679
+ .type = YP_NODE_INSTANCE_VARIABLE_AND_WRITE_NODE,
2680
+ .location = {
2681
+ .start = target->base.location.start,
2682
+ .end = value->location.end
2683
+ }
2684
+ },
2685
+ .name = target->name,
2686
+ .name_loc = target->base.location,
2687
+ .operator_loc = YP_LOCATION_TOKEN_VALUE(operator),
2688
+ .value = value
2689
+ };
2690
+
2691
+ return node;
2692
+ }
2693
+
2694
+ // Allocate and initialize a new InstanceVariableOperatorWriteNode node.
2695
+ static yp_instance_variable_operator_write_node_t *
2696
+ yp_instance_variable_operator_write_node_create(yp_parser_t *parser, yp_instance_variable_read_node_t *target, const yp_token_t *operator, yp_node_t *value) {
2697
+ yp_instance_variable_operator_write_node_t *node = YP_ALLOC_NODE(parser, yp_instance_variable_operator_write_node_t);
2698
+
2699
+ *node = (yp_instance_variable_operator_write_node_t) {
2700
+ {
2701
+ .type = YP_NODE_INSTANCE_VARIABLE_OPERATOR_WRITE_NODE,
2702
+ .location = {
2703
+ .start = target->base.location.start,
2704
+ .end = value->location.end
2705
+ }
2706
+ },
2707
+ .name = target->name,
2708
+ .name_loc = target->base.location,
2709
+ .operator_loc = YP_LOCATION_TOKEN_VALUE(operator),
2710
+ .value = value,
2711
+ .operator = yp_parser_constant_id_location(parser, operator->start, operator->end - 1)
2712
+ };
2713
+
2714
+ return node;
2715
+ }
2716
+
2717
+ // Allocate and initialize a new InstanceVariableOrWriteNode node.
2718
+ static yp_instance_variable_or_write_node_t *
2719
+ yp_instance_variable_or_write_node_create(yp_parser_t *parser, yp_instance_variable_read_node_t *target, const yp_token_t *operator, yp_node_t *value) {
2720
+ assert(operator->type == YP_TOKEN_PIPE_PIPE_EQUAL);
2721
+ yp_instance_variable_or_write_node_t *node = YP_ALLOC_NODE(parser, yp_instance_variable_or_write_node_t);
2722
+
2723
+ *node = (yp_instance_variable_or_write_node_t) {
2724
+ {
2725
+ .type = YP_NODE_INSTANCE_VARIABLE_OR_WRITE_NODE,
2726
+ .location = {
2727
+ .start = target->base.location.start,
2728
+ .end = value->location.end
2729
+ }
2730
+ },
2731
+ .name = target->name,
2732
+ .name_loc = target->base.location,
2733
+ .operator_loc = YP_LOCATION_TOKEN_VALUE(operator),
2734
+ .value = value
2735
+ };
2736
+
2737
+ return node;
2738
+ }
2739
+
2305
2740
  // Allocate and initialize a new InstanceVariableReadNode node.
2306
2741
  static yp_instance_variable_read_node_t *
2307
2742
  yp_instance_variable_read_node_create(yp_parser_t *parser, const yp_token_t *token) {
2308
2743
  assert(token->type == YP_TOKEN_INSTANCE_VARIABLE);
2309
2744
  yp_instance_variable_read_node_t *node = YP_ALLOC_NODE(parser, yp_instance_variable_read_node_t);
2310
2745
 
2311
- *node = (yp_instance_variable_read_node_t) {{
2312
- .type = YP_NODE_INSTANCE_VARIABLE_READ_NODE, .location = YP_LOCATION_TOKEN_VALUE(token)
2313
- }};
2746
+ *node = (yp_instance_variable_read_node_t) {
2747
+ {
2748
+ .type = YP_NODE_INSTANCE_VARIABLE_READ_NODE,
2749
+ .location = YP_LOCATION_TOKEN_VALUE(token)
2750
+ },
2751
+ .name = yp_parser_constant_id_location(parser, token->start, token->end)
2752
+ };
2314
2753
 
2315
2754
  return node;
2316
2755
  }
@@ -2324,9 +2763,10 @@ yp_instance_variable_write_node_create(yp_parser_t *parser, yp_instance_variable
2324
2763
  .type = YP_NODE_INSTANCE_VARIABLE_WRITE_NODE,
2325
2764
  .location = {
2326
2765
  .start = read_node->base.location.start,
2327
- .end = value == NULL ? read_node->base.location.end : value->location.end
2766
+ .end = value->location.end
2328
2767
  }
2329
2768
  },
2769
+ .name = read_node->name,
2330
2770
  .name_loc = YP_LOCATION_NODE_BASE_VALUE(read_node),
2331
2771
  .operator_loc = YP_OPTIONAL_LOCATION_TOKEN_VALUE(operator),
2332
2772
  .value = value
@@ -2358,8 +2798,13 @@ yp_interpolated_regular_expression_node_create(yp_parser_t *parser, const yp_tok
2358
2798
 
2359
2799
  static inline void
2360
2800
  yp_interpolated_regular_expression_node_append(yp_interpolated_regular_expression_node_t *node, yp_node_t *part) {
2801
+ if (node->base.location.start > part->location.start) {
2802
+ node->base.location.start = part->location.start;
2803
+ }
2804
+ if (node->base.location.end < part->location.end) {
2805
+ node->base.location.end = part->location.end;
2806
+ }
2361
2807
  yp_node_list_append(&node->parts, part);
2362
- node->base.location.end = part->location.end;
2363
2808
  }
2364
2809
 
2365
2810
  static inline void
@@ -2431,17 +2876,12 @@ yp_interpolated_symbol_node_create(yp_parser_t *parser, const yp_token_t *openin
2431
2876
 
2432
2877
  static inline void
2433
2878
  yp_interpolated_symbol_node_append(yp_interpolated_symbol_node_t *node, yp_node_t *part) {
2434
- yp_node_list_append(&node->parts, part);
2435
- if (!node->base.location.start) {
2879
+ if (node->parts.size == 0 && node->opening_loc.start == NULL) {
2436
2880
  node->base.location.start = part->location.start;
2437
2881
  }
2438
- node->base.location.end = part->location.end;
2439
- }
2440
2882
 
2441
- static inline void
2442
- yp_interpolated_symbol_node_closing_set(yp_interpolated_symbol_node_t *node, const yp_token_t *closing) {
2443
- node->closing_loc = YP_OPTIONAL_LOCATION_TOKEN_VALUE(closing);
2444
- node->base.location.end = closing->end;
2883
+ yp_node_list_append(&node->parts, part);
2884
+ node->base.location.end = part->location.end;
2445
2885
  }
2446
2886
 
2447
2887
  // Allocate a new InterpolatedXStringNode node.
@@ -2551,10 +2991,11 @@ static yp_lambda_node_t *
2551
2991
  yp_lambda_node_create(
2552
2992
  yp_parser_t *parser,
2553
2993
  yp_constant_id_list_t *locals,
2994
+ const yp_token_t *operator,
2554
2995
  const yp_token_t *opening,
2996
+ const yp_token_t *closing,
2555
2997
  yp_block_parameters_node_t *parameters,
2556
- yp_node_t *body,
2557
- const yp_token_t *closing
2998
+ yp_node_t *body
2558
2999
  ) {
2559
3000
  yp_lambda_node_t *node = YP_ALLOC_NODE(parser, yp_lambda_node_t);
2560
3001
 
@@ -2562,12 +3003,14 @@ yp_lambda_node_create(
2562
3003
  {
2563
3004
  .type = YP_NODE_LAMBDA_NODE,
2564
3005
  .location = {
2565
- .start = opening->start,
3006
+ .start = operator->start,
2566
3007
  .end = closing->end
2567
3008
  },
2568
3009
  },
2569
3010
  .locals = *locals,
3011
+ .operator_loc = YP_LOCATION_TOKEN_VALUE(operator),
2570
3012
  .opening_loc = YP_LOCATION_TOKEN_VALUE(opening),
3013
+ .closing_loc = YP_LOCATION_TOKEN_VALUE(closing),
2571
3014
  .parameters = parameters,
2572
3015
  .body = body
2573
3016
  };
@@ -2575,6 +3018,80 @@ yp_lambda_node_create(
2575
3018
  return node;
2576
3019
  }
2577
3020
 
3021
+ // Allocate and initialize a new LocalVariableAndWriteNode node.
3022
+ static yp_local_variable_and_write_node_t *
3023
+ yp_local_variable_and_write_node_create(yp_parser_t *parser, yp_node_t *target, const yp_token_t *operator, yp_node_t *value, yp_constant_id_t name, uint32_t depth) {
3024
+ assert(YP_NODE_TYPE_P(target, YP_NODE_LOCAL_VARIABLE_READ_NODE) || YP_NODE_TYPE_P(target, YP_NODE_CALL_NODE));
3025
+ assert(operator->type == YP_TOKEN_AMPERSAND_AMPERSAND_EQUAL);
3026
+ yp_local_variable_and_write_node_t *node = YP_ALLOC_NODE(parser, yp_local_variable_and_write_node_t);
3027
+
3028
+ *node = (yp_local_variable_and_write_node_t) {
3029
+ {
3030
+ .type = YP_NODE_LOCAL_VARIABLE_AND_WRITE_NODE,
3031
+ .location = {
3032
+ .start = target->location.start,
3033
+ .end = value->location.end
3034
+ }
3035
+ },
3036
+ .name_loc = target->location,
3037
+ .operator_loc = YP_LOCATION_TOKEN_VALUE(operator),
3038
+ .value = value,
3039
+ .name = name,
3040
+ .depth = depth
3041
+ };
3042
+
3043
+ return node;
3044
+ }
3045
+
3046
+ // Allocate and initialize a new LocalVariableOperatorWriteNode node.
3047
+ static yp_local_variable_operator_write_node_t *
3048
+ yp_local_variable_operator_write_node_create(yp_parser_t *parser, yp_node_t *target, const yp_token_t *operator, yp_node_t *value, yp_constant_id_t name, uint32_t depth) {
3049
+ yp_local_variable_operator_write_node_t *node = YP_ALLOC_NODE(parser, yp_local_variable_operator_write_node_t);
3050
+
3051
+ *node = (yp_local_variable_operator_write_node_t) {
3052
+ {
3053
+ .type = YP_NODE_LOCAL_VARIABLE_OPERATOR_WRITE_NODE,
3054
+ .location = {
3055
+ .start = target->location.start,
3056
+ .end = value->location.end
3057
+ }
3058
+ },
3059
+ .name_loc = target->location,
3060
+ .operator_loc = YP_LOCATION_TOKEN_VALUE(operator),
3061
+ .value = value,
3062
+ .name = name,
3063
+ .operator = yp_parser_constant_id_location(parser, operator->start, operator->end - 1),
3064
+ .depth = depth
3065
+ };
3066
+
3067
+ return node;
3068
+ }
3069
+
3070
+ // Allocate and initialize a new LocalVariableOrWriteNode node.
3071
+ static yp_local_variable_or_write_node_t *
3072
+ yp_local_variable_or_write_node_create(yp_parser_t *parser, yp_node_t *target, const yp_token_t *operator, yp_node_t *value, yp_constant_id_t name, uint32_t depth) {
3073
+ assert(YP_NODE_TYPE_P(target, YP_NODE_LOCAL_VARIABLE_READ_NODE) || YP_NODE_TYPE_P(target, YP_NODE_CALL_NODE));
3074
+ assert(operator->type == YP_TOKEN_PIPE_PIPE_EQUAL);
3075
+ yp_local_variable_or_write_node_t *node = YP_ALLOC_NODE(parser, yp_local_variable_or_write_node_t);
3076
+
3077
+ *node = (yp_local_variable_or_write_node_t) {
3078
+ {
3079
+ .type = YP_NODE_LOCAL_VARIABLE_OR_WRITE_NODE,
3080
+ .location = {
3081
+ .start = target->location.start,
3082
+ .end = value->location.end
3083
+ }
3084
+ },
3085
+ .name_loc = target->location,
3086
+ .operator_loc = YP_LOCATION_TOKEN_VALUE(operator),
3087
+ .value = value,
3088
+ .name = name,
3089
+ .depth = depth
3090
+ };
3091
+
3092
+ return node;
3093
+ }
3094
+
2578
3095
  // Allocate a new LocalVariableReadNode node.
2579
3096
  static yp_local_variable_read_node_t *
2580
3097
  yp_local_variable_read_node_create(yp_parser_t *parser, const yp_token_t *name, uint32_t depth) {
@@ -2585,7 +3102,7 @@ yp_local_variable_read_node_create(yp_parser_t *parser, const yp_token_t *name,
2585
3102
  .type = YP_NODE_LOCAL_VARIABLE_READ_NODE,
2586
3103
  .location = YP_LOCATION_TOKEN_VALUE(name)
2587
3104
  },
2588
- .constant_id = yp_parser_constant_id_token(parser, name),
3105
+ .name = yp_parser_constant_id_token(parser, name),
2589
3106
  .depth = depth
2590
3107
  };
2591
3108
 
@@ -2594,7 +3111,7 @@ yp_local_variable_read_node_create(yp_parser_t *parser, const yp_token_t *name,
2594
3111
 
2595
3112
  // Allocate and initialize a new LocalVariableWriteNode node.
2596
3113
  static yp_local_variable_write_node_t *
2597
- yp_local_variable_write_node_create(yp_parser_t *parser, yp_constant_id_t constant_id, uint32_t depth, yp_node_t *value, const yp_location_t *name_loc, const yp_token_t *operator) {
3114
+ yp_local_variable_write_node_create(yp_parser_t *parser, yp_constant_id_t name, uint32_t depth, yp_node_t *value, const yp_location_t *name_loc, const yp_token_t *operator) {
2598
3115
  yp_local_variable_write_node_t *node = YP_ALLOC_NODE(parser, yp_local_variable_write_node_t);
2599
3116
 
2600
3117
  *node = (yp_local_variable_write_node_t) {
@@ -2602,10 +3119,10 @@ yp_local_variable_write_node_create(yp_parser_t *parser, yp_constant_id_t consta
2602
3119
  .type = YP_NODE_LOCAL_VARIABLE_WRITE_NODE,
2603
3120
  .location = {
2604
3121
  .start = name_loc->start,
2605
- .end = value == NULL ? name_loc->end : value->location.end
3122
+ .end = value->location.end
2606
3123
  }
2607
3124
  },
2608
- .constant_id = constant_id,
3125
+ .name = name,
2609
3126
  .depth = depth,
2610
3127
  .value = value,
2611
3128
  .name_loc = *name_loc,
@@ -2615,21 +3132,18 @@ yp_local_variable_write_node_create(yp_parser_t *parser, yp_constant_id_t consta
2615
3132
  return node;
2616
3133
  }
2617
3134
 
2618
- // Allocate and initialize a new LocalVariableWriteNode node without an operator or target.
2619
- static yp_local_variable_write_node_t *
3135
+ // Allocate and initialize a new LocalVariableTargetNode node.
3136
+ static yp_local_variable_target_node_t *
2620
3137
  yp_local_variable_target_node_create(yp_parser_t *parser, const yp_token_t *name) {
2621
- yp_local_variable_write_node_t *node = YP_ALLOC_NODE(parser, yp_local_variable_write_node_t);
3138
+ yp_local_variable_target_node_t *node = YP_ALLOC_NODE(parser, yp_local_variable_target_node_t);
2622
3139
 
2623
- *node = (yp_local_variable_write_node_t) {
3140
+ *node = (yp_local_variable_target_node_t) {
2624
3141
  {
2625
- .type = YP_NODE_LOCAL_VARIABLE_WRITE_NODE,
3142
+ .type = YP_NODE_LOCAL_VARIABLE_TARGET_NODE,
2626
3143
  .location = YP_LOCATION_TOKEN_VALUE(name)
2627
3144
  },
2628
- .constant_id = yp_parser_constant_id_token(parser, name),
2629
- .depth = 0,
2630
- .value = NULL,
2631
- .name_loc = YP_LOCATION_TOKEN_VALUE(name),
2632
- .operator_loc = { .start = NULL, .end = NULL }
3145
+ .name = yp_parser_constant_id_token(parser, name),
3146
+ .depth = 0
2633
3147
  };
2634
3148
 
2635
3149
  return node;
@@ -2679,7 +3193,7 @@ yp_match_required_node_create(yp_parser_t *parser, yp_node_t *value, yp_node_t *
2679
3193
 
2680
3194
  // Allocate a new ModuleNode node.
2681
3195
  static yp_module_node_t *
2682
- yp_module_node_create(yp_parser_t *parser, yp_constant_id_list_t *locals, const yp_token_t *module_keyword, yp_node_t *constant_path, yp_node_t *body, const yp_token_t *end_keyword) {
3196
+ yp_module_node_create(yp_parser_t *parser, yp_constant_id_list_t *locals, const yp_token_t *module_keyword, yp_node_t *constant_path, const yp_token_t *name, yp_node_t *body, const yp_token_t *end_keyword) {
2683
3197
  yp_module_node_t *node = YP_ALLOC_NODE(parser, yp_module_node_t);
2684
3198
 
2685
3199
  *node = (yp_module_node_t) {
@@ -2694,9 +3208,11 @@ yp_module_node_create(yp_parser_t *parser, yp_constant_id_list_t *locals, const
2694
3208
  .module_keyword_loc = YP_LOCATION_TOKEN_VALUE(module_keyword),
2695
3209
  .constant_path = constant_path,
2696
3210
  .body = body,
2697
- .end_keyword_loc = YP_LOCATION_TOKEN_VALUE(end_keyword)
3211
+ .end_keyword_loc = YP_LOCATION_TOKEN_VALUE(end_keyword),
3212
+ .name = YP_EMPTY_STRING
2698
3213
  };
2699
3214
 
3215
+ yp_string_shared_init(&node->name, name->start, name->end);
2700
3216
  return node;
2701
3217
  }
2702
3218
 
@@ -2708,7 +3224,10 @@ yp_multi_write_node_create(yp_parser_t *parser, const yp_token_t *operator, yp_n
2708
3224
  *node = (yp_multi_write_node_t) {
2709
3225
  {
2710
3226
  .type = YP_NODE_MULTI_WRITE_NODE,
2711
- .location = { .start = NULL, .end = NULL },
3227
+ .location = {
3228
+ .start = lparen_loc->start,
3229
+ .end = value == NULL ? rparen_loc->end : value->location.end
3230
+ },
2712
3231
  },
2713
3232
  .operator_loc = YP_OPTIONAL_LOCATION_TOKEN_VALUE(operator),
2714
3233
  .value = value,
@@ -2802,29 +3321,8 @@ yp_numbered_reference_read_node_create(yp_parser_t *parser, const yp_token_t *na
2802
3321
  {
2803
3322
  .type = YP_NODE_NUMBERED_REFERENCE_READ_NODE,
2804
3323
  .location = YP_LOCATION_TOKEN_VALUE(name),
2805
- }
2806
- };
2807
-
2808
- return node;
2809
- }
2810
-
2811
- // Allocate and initialize a new OperatorWriteNode.
2812
- static yp_operator_write_node_t *
2813
- yp_operator_write_node_create(yp_parser_t *parser, yp_node_t *target, const yp_token_t *operator, yp_node_t *value) {
2814
- yp_operator_write_node_t *node = YP_ALLOC_NODE(parser, yp_operator_write_node_t);
2815
-
2816
- *node = (yp_operator_write_node_t) {
2817
- {
2818
- .type = YP_NODE_OPERATOR_WRITE_NODE,
2819
- .location = {
2820
- .start = target->location.start,
2821
- .end = value->location.end
2822
- },
2823
3324
  },
2824
- .target = target,
2825
- .operator_loc = YP_LOCATION_TOKEN_VALUE(operator),
2826
- .operator = yp_parser_constant_id_location(parser, operator->start, operator->end - 1),
2827
- .value = value
3325
+ .number = parse_decimal_number(parser, name->start + 1, name->end)
2828
3326
  };
2829
3327
 
2830
3328
  return node;
@@ -2843,7 +3341,7 @@ yp_optional_parameter_node_create(yp_parser_t *parser, const yp_token_t *name, c
2843
3341
  .end = value->location.end
2844
3342
  }
2845
3343
  },
2846
- .constant_id = yp_parser_constant_id_token(parser, name),
3344
+ .name = yp_parser_constant_id_token(parser, name),
2847
3345
  .name_loc = YP_LOCATION_TOKEN_VALUE(name),
2848
3346
  .operator_loc = YP_LOCATION_TOKEN_VALUE(operator),
2849
3347
  .value = value
@@ -2863,32 +3361,11 @@ yp_or_node_create(yp_parser_t *parser, yp_node_t *left, const yp_token_t *operat
2863
3361
  .location = {
2864
3362
  .start = left->location.start,
2865
3363
  .end = right->location.end
2866
- }
2867
- },
2868
- .left = left,
2869
- .right = right,
2870
- .operator_loc = YP_LOCATION_TOKEN_VALUE(operator)
2871
- };
2872
-
2873
- return node;
2874
- }
2875
-
2876
- // Allocate and initialize a new OrWriteNode.
2877
- static yp_or_write_node_t *
2878
- yp_or_write_node_create(yp_parser_t *parser, yp_node_t *target, const yp_token_t *operator, yp_node_t *value) {
2879
- yp_or_write_node_t *node = YP_ALLOC_NODE(parser, yp_or_write_node_t);
2880
-
2881
- *node = (yp_or_write_node_t) {
2882
- {
2883
- .type = YP_NODE_OR_WRITE_NODE,
2884
- .location = {
2885
- .start = target->location.start,
2886
- .end = value->location.end
2887
- },
3364
+ }
2888
3365
  },
2889
- .target = target,
2890
- .operator_loc = YP_LOCATION_TOKEN_VALUE(operator),
2891
- .value = value
3366
+ .left = left,
3367
+ .right = right,
3368
+ .operator_loc = YP_LOCATION_TOKEN_VALUE(operator)
2892
3369
  };
2893
3370
 
2894
3371
  return node;
@@ -3161,8 +3638,8 @@ yp_regular_expression_node_create(yp_parser_t *parser, const yp_token_t *opening
3161
3638
  .type = YP_NODE_REGULAR_EXPRESSION_NODE,
3162
3639
  .flags = yp_regular_expression_flags_create(closing),
3163
3640
  .location = {
3164
- .start = opening->start,
3165
- .end = closing->end
3641
+ .start = MIN(opening->start, closing->start),
3642
+ .end = MAX(opening->end, closing->end)
3166
3643
  }
3167
3644
  },
3168
3645
  .opening_loc = YP_LOCATION_TOKEN_VALUE(opening),
@@ -3215,7 +3692,7 @@ yp_required_parameter_node_create(yp_parser_t *parser, const yp_token_t *token)
3215
3692
  .type = YP_NODE_REQUIRED_PARAMETER_NODE,
3216
3693
  .location = YP_LOCATION_TOKEN_VALUE(token)
3217
3694
  },
3218
- .constant_id = yp_parser_constant_id_token(parser, token)
3695
+ .name = yp_parser_constant_id_token(parser, token)
3219
3696
  };
3220
3697
 
3221
3698
  return node;
@@ -3466,19 +3943,21 @@ yp_statements_node_body_length(yp_statements_node_t *node) {
3466
3943
 
3467
3944
  // Set the location of the given StatementsNode.
3468
3945
  static void
3469
- yp_statements_node_location_set(yp_statements_node_t *node, const char *start, const char *end) {
3946
+ yp_statements_node_location_set(yp_statements_node_t *node, const uint8_t *start, const uint8_t *end) {
3470
3947
  node->base.location = (yp_location_t) { .start = start, .end = end };
3471
3948
  }
3472
3949
 
3473
3950
  // Append a new node to the given StatementsNode node's body.
3474
3951
  static void
3475
3952
  yp_statements_node_body_append(yp_statements_node_t *node, yp_node_t *statement) {
3476
- if (yp_statements_node_body_length(node) == 0) {
3953
+ if (yp_statements_node_body_length(node) == 0 || statement->location.start < node->base.location.start) {
3477
3954
  node->base.location.start = statement->location.start;
3478
3955
  }
3956
+ if (statement->location.end > node->base.location.end) {
3957
+ node->base.location.end = statement->location.end;
3958
+ }
3479
3959
 
3480
3960
  yp_node_list_append(&node->body, statement);
3481
- node->base.location.end = statement->location.end;
3482
3961
 
3483
3962
  // Every statement gets marked as a place where a newline can occur.
3484
3963
  statement->flags |= YP_NODE_FLAG_NEWLINE;
@@ -3532,7 +4011,7 @@ yp_super_node_create(yp_parser_t *parser, const yp_token_t *keyword, yp_argument
3532
4011
  assert(keyword->type == YP_TOKEN_KEYWORD_SUPER);
3533
4012
  yp_super_node_t *node = YP_ALLOC_NODE(parser, yp_super_node_t);
3534
4013
 
3535
- const char *end;
4014
+ const uint8_t *end;
3536
4015
  if (arguments->block != NULL) {
3537
4016
  end = arguments->block->base.location.end;
3538
4017
  } else if (arguments->closing_loc.start != NULL) {
@@ -3600,7 +4079,7 @@ yp_symbol_node_label_create(yp_parser_t *parser, const yp_token_t *token) {
3600
4079
  assert((label.end - label.start) >= 0);
3601
4080
  yp_string_shared_init(&node->unescaped, label.start, label.end);
3602
4081
 
3603
- yp_unescape_manipulate_string(parser, &node->unescaped, YP_UNESCAPE_ALL, &parser->error_list);
4082
+ yp_unescape_manipulate_string(parser, &node->unescaped, YP_UNESCAPE_ALL);
3604
4083
  break;
3605
4084
  }
3606
4085
  case YP_TOKEN_MISSING: {
@@ -3623,7 +4102,7 @@ yp_symbol_node_label_create(yp_parser_t *parser, const yp_token_t *token) {
3623
4102
  // Check if the given node is a label in a hash.
3624
4103
  static bool
3625
4104
  yp_symbol_node_label_p(yp_node_t *node) {
3626
- const char *end = NULL;
4105
+ const uint8_t *end = NULL;
3627
4106
 
3628
4107
  switch (YP_NODE_TYPE(node)) {
3629
4108
  case YP_NODE_SYMBOL_NODE:
@@ -3641,20 +4120,20 @@ yp_symbol_node_label_p(yp_node_t *node) {
3641
4120
 
3642
4121
  // Convert the given StringNode node to a SymbolNode node.
3643
4122
  static yp_symbol_node_t *
3644
- yp_string_node_to_symbol_node(yp_parser_t *parser, yp_string_node_t *node) {
4123
+ yp_string_node_to_symbol_node(yp_parser_t *parser, yp_string_node_t *node, const yp_token_t *opening, const yp_token_t *closing) {
3645
4124
  yp_symbol_node_t *new_node = YP_ALLOC_NODE(parser, yp_symbol_node_t);
3646
4125
 
3647
4126
  *new_node = (yp_symbol_node_t) {
3648
4127
  {
3649
4128
  .type = YP_NODE_SYMBOL_NODE,
3650
4129
  .location = {
3651
- .start = node->base.location.start - 2,
3652
- .end = node->base.location.end + 1
4130
+ .start = opening->start,
4131
+ .end = closing->end
3653
4132
  }
3654
4133
  },
3655
- .opening_loc = node->opening_loc,
4134
+ .opening_loc = YP_OPTIONAL_LOCATION_TOKEN_VALUE(opening),
3656
4135
  .value_loc = node->content_loc,
3657
- .closing_loc = node->closing_loc,
4136
+ .closing_loc = YP_OPTIONAL_LOCATION_TOKEN_VALUE(closing),
3658
4137
  .unescaped = node->unescaped
3659
4138
  };
3660
4139
 
@@ -3731,7 +4210,7 @@ yp_unless_node_create(yp_parser_t *parser, const yp_token_t *keyword, yp_node_t
3731
4210
  yp_flip_flop(predicate);
3732
4211
  yp_unless_node_t *node = YP_ALLOC_NODE(parser, yp_unless_node_t);
3733
4212
 
3734
- const char *end;
4213
+ const uint8_t *end;
3735
4214
  if (statements != NULL) {
3736
4215
  end = statements->base.location.end;
3737
4216
  } else {
@@ -3793,34 +4272,43 @@ yp_unless_node_end_keyword_loc_set(yp_unless_node_t *node, const yp_token_t *end
3793
4272
 
3794
4273
  // Allocate a new UntilNode node.
3795
4274
  static yp_until_node_t *
3796
- yp_until_node_create(yp_parser_t *parser, const yp_token_t *keyword, yp_node_t *predicate, yp_statements_node_t *statements, yp_node_flags_t flags) {
4275
+ yp_until_node_create(yp_parser_t *parser, const yp_token_t *keyword, const yp_token_t *closing, yp_node_t *predicate, yp_statements_node_t *statements, yp_node_flags_t flags) {
3797
4276
  yp_until_node_t *node = YP_ALLOC_NODE(parser, yp_until_node_t);
3798
- bool has_statements = (statements != NULL) && (statements->body.size != 0);
3799
4277
 
3800
- const char *start = NULL;
3801
- if (has_statements && (keyword->start > statements->base.location.start)) {
3802
- start = statements->base.location.start;
3803
- } else {
3804
- start = keyword->start;
3805
- }
4278
+ *node = (yp_until_node_t) {
4279
+ {
4280
+ .type = YP_NODE_UNTIL_NODE,
4281
+ .flags = flags,
4282
+ .location = {
4283
+ .start = keyword->start,
4284
+ .end = closing->end,
4285
+ },
4286
+ },
4287
+ .keyword_loc = YP_LOCATION_TOKEN_VALUE(keyword),
4288
+ .closing_loc = YP_OPTIONAL_LOCATION_TOKEN_VALUE(closing),
4289
+ .predicate = predicate,
4290
+ .statements = statements
4291
+ };
3806
4292
 
3807
- const char *end = NULL;
3808
- if (has_statements && (predicate->location.end < statements->base.location.end)) {
3809
- end = statements->base.location.end;
3810
- } else {
3811
- end = predicate->location.end;
3812
- }
4293
+ return node;
4294
+ }
4295
+
4296
+ // Allocate a new UntilNode node.
4297
+ static yp_until_node_t *
4298
+ yp_until_node_modifier_create(yp_parser_t *parser, const yp_token_t *keyword, yp_node_t *predicate, yp_statements_node_t *statements, yp_node_flags_t flags) {
4299
+ yp_until_node_t *node = YP_ALLOC_NODE(parser, yp_until_node_t);
3813
4300
 
3814
4301
  *node = (yp_until_node_t) {
3815
4302
  {
3816
4303
  .type = YP_NODE_UNTIL_NODE,
3817
4304
  .flags = flags,
3818
4305
  .location = {
3819
- .start = start,
3820
- .end = end,
4306
+ .start = statements->base.location.start,
4307
+ .end = predicate->location.end,
3821
4308
  },
3822
4309
  },
3823
4310
  .keyword_loc = YP_LOCATION_TOKEN_VALUE(keyword),
4311
+ .closing_loc = YP_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE,
3824
4312
  .predicate = predicate,
3825
4313
  .statements = statements
3826
4314
  };
@@ -3868,34 +4356,43 @@ yp_when_node_statements_set(yp_when_node_t *node, yp_statements_node_t *statemen
3868
4356
 
3869
4357
  // Allocate a new WhileNode node.
3870
4358
  static yp_while_node_t *
3871
- yp_while_node_create(yp_parser_t *parser, const yp_token_t *keyword, yp_node_t *predicate, yp_statements_node_t *statements, yp_node_flags_t flags) {
4359
+ yp_while_node_create(yp_parser_t *parser, const yp_token_t *keyword, const yp_token_t *closing, yp_node_t *predicate, yp_statements_node_t *statements, yp_node_flags_t flags) {
3872
4360
  yp_while_node_t *node = YP_ALLOC_NODE(parser, yp_while_node_t);
3873
4361
 
3874
- const char *start = NULL;
3875
- bool has_statements = (statements != NULL) && (statements->body.size != 0);
3876
- if (has_statements && (keyword->start > statements->base.location.start)) {
3877
- start = statements->base.location.start;
3878
- } else {
3879
- start = keyword->start;
3880
- }
4362
+ *node = (yp_while_node_t) {
4363
+ {
4364
+ .type = YP_NODE_WHILE_NODE,
4365
+ .flags = flags,
4366
+ .location = {
4367
+ .start = keyword->start,
4368
+ .end = closing->end
4369
+ },
4370
+ },
4371
+ .keyword_loc = YP_LOCATION_TOKEN_VALUE(keyword),
4372
+ .closing_loc = YP_OPTIONAL_LOCATION_TOKEN_VALUE(closing),
4373
+ .predicate = predicate,
4374
+ .statements = statements
4375
+ };
3881
4376
 
3882
- const char *end = NULL;
3883
- if (has_statements && (predicate->location.end < statements->base.location.end)) {
3884
- end = statements->base.location.end;
3885
- } else {
3886
- end = predicate->location.end;
3887
- }
4377
+ return node;
4378
+ }
4379
+
4380
+ // Allocate a new WhileNode node.
4381
+ static yp_while_node_t *
4382
+ yp_while_node_modifier_create(yp_parser_t *parser, const yp_token_t *keyword, yp_node_t *predicate, yp_statements_node_t *statements, yp_node_flags_t flags) {
4383
+ yp_while_node_t *node = YP_ALLOC_NODE(parser, yp_while_node_t);
3888
4384
 
3889
4385
  *node = (yp_while_node_t) {
3890
4386
  {
3891
4387
  .type = YP_NODE_WHILE_NODE,
3892
4388
  .flags = flags,
3893
4389
  .location = {
3894
- .start = start,
3895
- .end = end,
4390
+ .start = statements->base.location.start,
4391
+ .end = predicate->location.end
3896
4392
  },
3897
4393
  },
3898
4394
  .keyword_loc = YP_LOCATION_TOKEN_VALUE(keyword),
4395
+ .closing_loc = YP_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE,
3899
4396
  .predicate = predicate,
3900
4397
  .statements = statements
3901
4398
  };
@@ -3930,7 +4427,7 @@ static yp_yield_node_t *
3930
4427
  yp_yield_node_create(yp_parser_t *parser, const yp_token_t *keyword, const yp_location_t *lparen_loc, yp_arguments_node_t *arguments, const yp_location_t *rparen_loc) {
3931
4428
  yp_yield_node_t *node = YP_ALLOC_NODE(parser, yp_yield_node_t);
3932
4429
 
3933
- const char *end;
4430
+ const uint8_t *end;
3934
4431
  if (rparen_loc->start != NULL) {
3935
4432
  end = rparen_loc->end;
3936
4433
  } else if (arguments != NULL) {
@@ -4003,13 +4500,15 @@ yp_parser_local_depth(yp_parser_t *parser, yp_token_t *token) {
4003
4500
  }
4004
4501
 
4005
4502
  // Add a local variable from a location to the current scope.
4006
- static void
4007
- yp_parser_local_add_location(yp_parser_t *parser, const char *start, const char *end) {
4503
+ static yp_constant_id_t
4504
+ yp_parser_local_add_location(yp_parser_t *parser, const uint8_t *start, const uint8_t *end) {
4008
4505
  yp_constant_id_t constant_id = yp_parser_constant_id_location(parser, start, end);
4009
4506
 
4010
4507
  if (!yp_constant_id_list_includes(&parser->current_scope->locals, constant_id)) {
4011
4508
  yp_constant_id_list_append(&parser->current_scope->locals, constant_id);
4012
4509
  }
4510
+
4511
+ return constant_id;
4013
4512
  }
4014
4513
 
4015
4514
  // Add a local variable from a token to the current scope.
@@ -4051,15 +4550,13 @@ yp_parser_scope_pop(yp_parser_t *parser) {
4051
4550
  // reason we have the encoding_changed boolean to check if we need to go through
4052
4551
  // the function pointer or can just directly use the UTF-8 functions.
4053
4552
  static inline size_t
4054
- char_is_identifier_start(yp_parser_t *parser, const char *c) {
4055
- const unsigned char uc = (unsigned char) *c;
4056
-
4553
+ char_is_identifier_start(yp_parser_t *parser, const uint8_t *b) {
4057
4554
  if (parser->encoding_changed) {
4058
- return parser->encoding.alpha_char(c, parser->end - c) || (uc == '_') || (uc >= 0x80);
4059
- } else if (uc < 0x80) {
4060
- return (yp_encoding_unicode_table[uc] & YP_ENCODING_ALPHABETIC_BIT ? 1 : 0) || (uc == '_');
4555
+ return parser->encoding.alpha_char(b, parser->end - b) || (*b == '_') || (*b >= 0x80);
4556
+ } else if (*b < 0x80) {
4557
+ return (yp_encoding_unicode_table[*b] & YP_ENCODING_ALPHABETIC_BIT ? 1 : 0) || (*b == '_');
4061
4558
  } else {
4062
- return (size_t) (yp_encoding_utf_8_alpha_char(c, parser->end - c) || 1u);
4559
+ return (size_t) (yp_encoding_utf_8_alpha_char(b, parser->end - b) || 1u);
4063
4560
  }
4064
4561
  }
4065
4562
 
@@ -4067,15 +4564,13 @@ char_is_identifier_start(yp_parser_t *parser, const char *c) {
4067
4564
  // the identifiers in a source file once the first character has been found. So
4068
4565
  // it's important that it be as fast as possible.
4069
4566
  static inline size_t
4070
- char_is_identifier(yp_parser_t *parser, const char *c) {
4071
- const unsigned char uc = (unsigned char) *c;
4072
-
4567
+ char_is_identifier(yp_parser_t *parser, const uint8_t *b) {
4073
4568
  if (parser->encoding_changed) {
4074
- return parser->encoding.alnum_char(c, parser->end - c) || (uc == '_') || (uc >= 0x80);
4075
- } else if (uc < 0x80) {
4076
- return (yp_encoding_unicode_table[uc] & YP_ENCODING_ALPHANUMERIC_BIT ? 1 : 0) || (uc == '_');
4569
+ return parser->encoding.alnum_char(b, parser->end - b) || (*b == '_') || (*b >= 0x80);
4570
+ } else if (*b < 0x80) {
4571
+ return (yp_encoding_unicode_table[*b] & YP_ENCODING_ALPHANUMERIC_BIT ? 1 : 0) || (*b == '_');
4077
4572
  } else {
4078
- return (size_t) (yp_encoding_utf_8_alnum_char(c, parser->end - c) || 1u);
4573
+ return (size_t) (yp_encoding_utf_8_alnum_char(b, parser->end - b) || 1u);
4079
4574
  }
4080
4575
  }
4081
4576
 
@@ -4097,15 +4592,15 @@ const unsigned int yp_global_name_punctuation_hash[(0x7e - 0x20 + 31) / 32] = {
4097
4592
  #undef PUNCT
4098
4593
 
4099
4594
  static inline bool
4100
- char_is_global_name_punctuation(const char c) {
4101
- const unsigned int i = (const unsigned int) c;
4595
+ char_is_global_name_punctuation(const uint8_t b) {
4596
+ const unsigned int i = (const unsigned int) b;
4102
4597
  if (i <= 0x20 || 0x7e < i) return false;
4103
4598
 
4104
- return (yp_global_name_punctuation_hash[(i - 0x20) / 32] >> (c % 32)) & 1;
4599
+ return (yp_global_name_punctuation_hash[(i - 0x20) / 32] >> (i % 32)) & 1;
4105
4600
  }
4106
4601
 
4107
4602
  static inline bool
4108
- token_is_numbered_parameter(const char *start, const char *end) {
4603
+ token_is_numbered_parameter(const uint8_t *start, const uint8_t *end) {
4109
4604
  return (end - start == 2) && (start[0] == '_') && (start[1] != '0') && (yp_char_is_decimal_digit(start[1]));
4110
4605
  }
4111
4606
 
@@ -4157,44 +4652,47 @@ yp_do_loop_stack_p(yp_parser_t *parser) {
4157
4652
  /* Lexer check helpers */
4158
4653
  /******************************************************************************/
4159
4654
 
4160
- // Get the next character in the source starting from parser->current.end and
4161
- // adding the given offset. If that position is beyond the end of the source
4162
- // then return '\0'.
4163
- static inline char
4164
- peek_at(yp_parser_t *parser, size_t offset) {
4165
- if (parser->current.end + offset < parser->end) {
4166
- return parser->current.end[offset];
4655
+ // Get the next character in the source starting from +cursor+. If that position
4656
+ // is beyond the end of the source then return '\0'.
4657
+ static inline uint8_t
4658
+ peek_at(yp_parser_t *parser, const uint8_t *cursor) {
4659
+ if (cursor < parser->end) {
4660
+ return *cursor;
4167
4661
  } else {
4168
4662
  return '\0';
4169
4663
  }
4170
4664
  }
4171
4665
 
4666
+ // Get the next character in the source starting from parser->current.end and
4667
+ // adding the given offset. If that position is beyond the end of the source
4668
+ // then return '\0'.
4669
+ static inline uint8_t
4670
+ peek_offset(yp_parser_t *parser, ptrdiff_t offset) {
4671
+ return peek_at(parser, parser->current.end + offset);
4672
+ }
4673
+
4172
4674
  // Get the next character in the source starting from parser->current.end. If
4173
4675
  // that position is beyond the end of the source then return '\0'.
4174
- static inline char
4676
+ static inline uint8_t
4175
4677
  peek(yp_parser_t *parser) {
4176
- if (parser->current.end < parser->end) {
4177
- return *parser->current.end;
4178
- } else {
4179
- return '\0';
4180
- }
4678
+ return peek_at(parser, parser->current.end);
4181
4679
  }
4182
4680
 
4183
4681
  // Get the next string of length len in the source starting from parser->current.end.
4184
4682
  // If the string extends beyond the end of the source, return the empty string ""
4185
- static inline const char*
4683
+ static inline const uint8_t *
4186
4684
  peek_string(yp_parser_t *parser, size_t len) {
4187
4685
  if (parser->current.end + len <= parser->end) {
4188
4686
  return parser->current.end;
4189
4687
  } else {
4190
- return "";
4688
+ return (const uint8_t *) "";
4191
4689
  }
4192
4690
  }
4193
4691
 
4194
4692
  // If the character to be read matches the given value, then returns true and
4195
4693
  // advanced the current pointer.
4196
4694
  static inline bool
4197
- match(yp_parser_t *parser, char value) {
4695
+ match(yp_parser_t *parser, uint8_t value) {
4198
4696
  if (peek(parser) == value) {
4199
4697
  parser->current.end++;
4200
4698
  return true;
@@ -4202,9 +4700,38 @@ match(yp_parser_t *parser, char value) {
4202
4700
  return false;
4203
4701
  }
4204
4702
 
4703
+ // Return the length of the line ending string starting at +cursor+, or 0 if it
4704
+ // is not a line ending. This function is intended to be CRLF/LF agnostic.
4705
+ static inline size_t
4706
+ match_eol_at(yp_parser_t *parser, const uint8_t *cursor) {
4707
+ if (peek_at(parser, cursor) == '\n') {
4708
+ return 1;
4709
+ }
4710
+ if (peek_at(parser, cursor) == '\r' && peek_at(parser, cursor + 1) == '\n') {
4711
+ return 2;
4712
+ }
4713
+ return 0;
4714
+ }
4715
+
4716
+ // Return the length of the line ending string starting at
4717
+ // parser->current.end + offset, or 0 if it is not a line ending. This function
4718
+ // is intended to be CRLF/LF agnostic.
4719
+ static inline size_t
4720
+ match_eol_offset(yp_parser_t *parser, ptrdiff_t offset) {
4721
+ return match_eol_at(parser, parser->current.end + offset);
4722
+ }
4723
+
4724
+ // Return the length of the line ending string starting at parser->current.end,
4725
+ // or 0 if it is not a line ending. This function is intended to be CRLF/LF
4726
+ // agnostic.
4727
+ static inline size_t
4728
+ match_eol(yp_parser_t *parser) {
4729
+ return match_eol_at(parser, parser->current.end);
4730
+ }
4731
+
4205
4732
  // Skip to the next newline character or NUL byte.
4206
- static inline const char *
4207
- next_newline(const char *cursor, ptrdiff_t length) {
4733
+ static inline const uint8_t *
4734
+ next_newline(const uint8_t *cursor, ptrdiff_t length) {
4208
4735
  assert(length >= 0);
4209
4736
 
4210
4737
  // Note that it's okay for us to use memchr here to look for \n because none
@@ -4215,21 +4742,23 @@ next_newline(const char *cursor, ptrdiff_t length) {
4215
4742
 
4216
4743
  // Find the start of the encoding comment. This is effectively an inlined
4217
4744
  // version of strnstr with some modifications.
4218
- static inline const char *
4219
- parser_lex_encoding_comment_start(yp_parser_t *parser, const char *cursor, ptrdiff_t remaining) {
4745
+ static inline const uint8_t *
4746
+ parser_lex_encoding_comment_start(yp_parser_t *parser, const uint8_t *cursor, ptrdiff_t remaining) {
4220
4747
  assert(remaining >= 0);
4221
4748
  size_t length = (size_t) remaining;
4222
4749
 
4223
4750
  size_t key_length = strlen("coding:");
4224
4751
  if (key_length > length) return NULL;
4225
4752
 
4226
- const char *cursor_limit = cursor + length - key_length + 1;
4753
+ const uint8_t *cursor_limit = cursor + length - key_length + 1;
4227
4754
  while ((cursor = yp_memchr(cursor, 'c', (size_t) (cursor_limit - cursor), parser->encoding_changed, &parser->encoding)) != NULL) {
4228
- if (
4229
- (strncmp(cursor, "coding", key_length - 1) == 0) &&
4230
- (cursor[key_length - 1] == ':' || cursor[key_length - 1] == '=')
4231
- ) {
4232
- return cursor + key_length;
4755
+ if (memcmp(cursor, "coding", key_length - 1) == 0) {
4756
+ size_t whitespace_after_coding = yp_strspn_inline_whitespace(cursor + key_length - 1, parser->end - (cursor + key_length - 1));
4757
+ size_t cur_pos = key_length + whitespace_after_coding;
4758
+
4759
+ if (cursor[cur_pos - 1] == ':' || cursor[cur_pos - 1] == '=') {
4760
+ return cursor + cur_pos;
4761
+ }
4233
4762
  }
4234
4763
 
4235
4764
  cursor++;
@@ -4242,13 +4771,13 @@ parser_lex_encoding_comment_start(yp_parser_t *parser, const char *cursor, ptrdi
4242
4771
  // actions are necessary for it here.
4243
4772
  static void
4244
4773
  parser_lex_encoding_comment(yp_parser_t *parser) {
4245
- const char *start = parser->current.start + 1;
4246
- const char *end = next_newline(start, parser->end - start);
4774
+ const uint8_t *start = parser->current.start + 1;
4775
+ const uint8_t *end = next_newline(start, parser->end - start);
4247
4776
  if (end == NULL) end = parser->end;
4248
4777
 
4249
4778
  // These are the patterns we're going to match to find the encoding comment.
4250
4779
  // This is definitely not complete or even really correct.
4251
- const char *encoding_start = parser_lex_encoding_comment_start(parser, start, end - start);
4780
+ const uint8_t *encoding_start = parser_lex_encoding_comment_start(parser, start, end - start);
4252
4781
 
4253
4782
  // If we didn't find anything that matched our patterns, then return. Note
4254
4783
  // that this does a _very_ poor job of actually finding the encoding, and
@@ -4261,7 +4790,7 @@ parser_lex_encoding_comment(yp_parser_t *parser) {
4261
4790
 
4262
4791
  // Now determine the end of the encoding string. This is either the end of
4263
4792
  // the line, the first whitespace character, or a punctuation mark.
4264
- const char *encoding_end = yp_strpbrk(parser, encoding_start, " \t\f\r\v\n;,", end - encoding_start);
4793
+ const uint8_t *encoding_end = yp_strpbrk(parser, encoding_start, (const uint8_t *) " \t\f\r\v\n;,", end - encoding_start);
4265
4794
  encoding_end = encoding_end == NULL ? end : encoding_end;
4266
4795
 
4267
4796
  // Finally, we can determine the width of the encoding string.
@@ -4283,7 +4812,7 @@ parser_lex_encoding_comment(yp_parser_t *parser) {
4283
4812
  // Extensions like utf-8 can contain extra encoding details like,
4284
4813
  // utf-8-dos, utf-8-linux, utf-8-mac. We treat these all as utf-8 should
4285
4814
  // treat any encoding starting utf-8 as utf-8.
4286
- if ((encoding_start + 5 <= parser->end) && (yp_strncasecmp(encoding_start, "utf-8", 5) == 0)) {
4815
+ if ((encoding_start + 5 <= parser->end) && (yp_strncasecmp(encoding_start, (const uint8_t *) "utf-8", 5) == 0)) {
4287
4816
  // We don't need to do anything here because the default encoding is
4288
4817
  // already UTF-8. We'll just return.
4289
4818
  return;
@@ -4292,7 +4821,7 @@ parser_lex_encoding_comment(yp_parser_t *parser) {
4292
4821
  // Next, we're going to loop through each of the encodings that we handle
4293
4822
  // explicitly. If we found one that we understand, we'll use that value.
4294
4823
  #define ENCODING(value, prebuilt) \
4295
- if (width == sizeof(value) - 1 && encoding_start + width <= parser->end && yp_strncasecmp(encoding_start, value, width) == 0) { \
4824
+ if (width == sizeof(value) - 1 && encoding_start + width <= parser->end && yp_strncasecmp(encoding_start, (const uint8_t *) value, width) == 0) { \
4296
4825
  parser->encoding = prebuilt; \
4297
4826
  parser->encoding_changed |= true; \
4298
4827
  if (parser->encoding_changed_callback != NULL) parser->encoding_changed_callback(parser); \
@@ -4432,14 +4961,9 @@ context_push(yp_parser_t *parser, yp_context_t context) {
4432
4961
 
4433
4962
  static void
4434
4963
  context_pop(yp_parser_t *parser) {
4435
- if (parser->current_context->prev == NULL) {
4436
- free(parser->current_context);
4437
- parser->current_context = NULL;
4438
- } else {
4439
- yp_context_node_t *prev = parser->current_context->prev;
4440
- free(parser->current_context);
4441
- parser->current_context = prev;
4442
- }
4964
+ yp_context_node_t *prev = parser->current_context->prev;
4965
+ free(parser->current_context);
4966
+ parser->current_context = prev;
4443
4967
  }
4444
4968
 
4445
4969
  static bool
@@ -4485,7 +5009,7 @@ lex_optional_float_suffix(yp_parser_t *parser) {
4485
5009
  // Here we're going to attempt to parse the optional decimal portion of a
4486
5010
  // float. If it's not there, then it's okay and we'll just continue on.
4487
5011
  if (peek(parser) == '.') {
4488
- if (yp_char_is_decimal_digit(peek_at(parser, 1))) {
5012
+ if (yp_char_is_decimal_digit(peek_offset(parser, 1))) {
4489
5013
  parser->current.end += 2;
4490
5014
  parser->current.end += yp_strspn_decimal_number(parser->current.end, parser->end - parser->current.end);
4491
5015
  type = YP_TOKEN_FLOAT;
@@ -4518,12 +5042,13 @@ static yp_token_type_t
4518
5042
  lex_numeric_prefix(yp_parser_t *parser) {
4519
5043
  yp_token_type_t type = YP_TOKEN_INTEGER;
4520
5044
 
4521
- if (parser->current.end[-1] == '0') {
5045
+ if (peek_offset(parser, -1) == '0') {
4522
5046
  switch (*parser->current.end) {
4523
5047
  // 0d1111 is a decimal number
4524
5048
  case 'd':
4525
5049
  case 'D':
4526
- if (yp_char_is_decimal_digit(*++parser->current.end)) {
5050
+ parser->current.end++;
5051
+ if (yp_char_is_decimal_digit(peek(parser))) {
4527
5052
  parser->current.end += yp_strspn_decimal_number(parser->current.end, parser->end - parser->current.end);
4528
5053
  } else {
4529
5054
  yp_diagnostic_list_append(&parser->error_list, parser->current.start, parser->current.end, "Invalid decimal number.");
@@ -4534,7 +5059,8 @@ lex_numeric_prefix(yp_parser_t *parser) {
4534
5059
  // 0b1111 is a binary number
4535
5060
  case 'b':
4536
5061
  case 'B':
4537
- if (yp_char_is_binary_digit(*++parser->current.end)) {
5062
+ parser->current.end++;
5063
+ if (yp_char_is_binary_digit(peek(parser))) {
4538
5064
  parser->current.end += yp_strspn_binary_number(parser->current.end, parser->end - parser->current.end);
4539
5065
  } else {
4540
5066
  yp_diagnostic_list_append(&parser->error_list, parser->current.start, parser->current.end, "Invalid binary number.");
@@ -4545,7 +5071,8 @@ lex_numeric_prefix(yp_parser_t *parser) {
4545
5071
  // 0o1111 is an octal number
4546
5072
  case 'o':
4547
5073
  case 'O':
4548
- if (yp_char_is_octal_digit(*++parser->current.end)) {
5074
+ parser->current.end++;
5075
+ if (yp_char_is_octal_digit(peek(parser))) {
4549
5076
  parser->current.end += yp_strspn_octal_number(parser->current.end, parser->end - parser->current.end);
4550
5077
  } else {
4551
5078
  yp_diagnostic_list_append(&parser->error_list, parser->current.start, parser->current.end, "Invalid octal number.");
@@ -4569,7 +5096,8 @@ lex_numeric_prefix(yp_parser_t *parser) {
4569
5096
  // 0x1111 is a hexadecimal number
4570
5097
  case 'x':
4571
5098
  case 'X':
4572
- if (yp_char_is_hexadecimal_digit(*++parser->current.end)) {
5099
+ parser->current.end++;
5100
+ if (yp_char_is_hexadecimal_digit(peek(parser))) {
4573
5101
  parser->current.end += yp_strspn_hexadecimal_number(parser->current.end, parser->end - parser->current.end);
4574
5102
  } else {
4575
5103
  yp_diagnostic_list_append(&parser->error_list, parser->current.start, parser->current.end, "Invalid hexadecimal number.");
@@ -4601,7 +5129,7 @@ lex_numeric_prefix(yp_parser_t *parser) {
4601
5129
 
4602
5130
  // If the last character that we consumed was an underscore, then this is
4603
5131
  // actually an invalid integer value, and we should return an invalid token.
4604
- if (parser->current.end[-1] == '_') {
5132
+ if (peek_offset(parser, -1) == '_') {
4605
5133
  yp_diagnostic_list_append(&parser->error_list, parser->current.start, parser->current.end, "Number literal cannot end with a `_`.");
4606
5134
  }
4607
5135
 
@@ -4615,7 +5143,7 @@ lex_numeric(yp_parser_t *parser) {
4615
5143
  if (parser->current.end < parser->end) {
4616
5144
  type = lex_numeric_prefix(parser);
4617
5145
 
4618
- const char *end = parser->current.end;
5146
+ const uint8_t *end = parser->current.end;
4619
5147
  yp_token_type_t suffix_type = type;
4620
5148
 
4621
5149
  if (type == YP_TOKEN_INTEGER) {
@@ -4640,8 +5168,8 @@ lex_numeric(yp_parser_t *parser) {
4640
5168
  }
4641
5169
  }
4642
5170
 
4643
- const unsigned char uc = (const unsigned char) peek(parser);
4644
- if (uc != '\0' && (uc >= 0x80 || ((uc >= 'a' && uc <= 'z') || (uc >= 'A' && uc <= 'Z')) || uc == '_')) {
5171
+ const uint8_t b = peek(parser);
5172
+ if (b != '\0' && (b >= 0x80 || ((b >= 'a' && b <= 'z') || (b >= 'A' && b <= 'Z')) || b == '_')) {
4645
5173
  parser->current.end = end;
4646
5174
  } else {
4647
5175
  type = suffix_type;
@@ -4653,6 +5181,11 @@ lex_numeric(yp_parser_t *parser) {
4653
5181
 
4654
5182
  static yp_token_type_t
4655
5183
  lex_global_variable(yp_parser_t *parser) {
5184
+ if (parser->current.end >= parser->end) {
5185
+ yp_diagnostic_list_append(&parser->error_list, parser->current.start, parser->current.end, "Invalid global variable.");
5186
+ return YP_TOKEN_GLOBAL_VARIABLE;
5187
+ }
5188
+
4656
5189
  switch (*parser->current.end) {
4657
5190
  case '~': // $~: match-data
4658
5191
  case '*': // $*: argv
@@ -4741,7 +5274,7 @@ lex_keyword(yp_parser_t *parser, const char *value, yp_lex_state_t state, yp_tok
4741
5274
  yp_lex_state_t last_state = parser->lex_state;
4742
5275
 
4743
5276
  const size_t vlen = strlen(value);
4744
- if (parser->current.start + vlen <= parser->end && strncmp(parser->current.start, value, vlen) == 0) {
5277
+ if (parser->current.start + vlen <= parser->end && memcmp(parser->current.start, value, vlen) == 0) {
4745
5278
  if (parser->lex_state & YP_LEX_STATE_FNAME) {
4746
5279
  lex_state_set(parser, YP_LEX_STATE_ENDFN);
4747
5280
  } else {
@@ -4782,7 +5315,7 @@ lex_identifier(yp_parser_t *parser, bool previous_command_start) {
4782
5315
 
4783
5316
  if (
4784
5317
  ((lex_state_p(parser, YP_LEX_STATE_LABEL | YP_LEX_STATE_ENDFN) && !previous_command_start) || lex_state_arg_p(parser)) &&
4785
- (peek(parser) == ':') && (peek_at(parser, 1) != ':')
5318
+ (peek(parser) == ':') && (peek_offset(parser, 1) != ':')
4786
5319
  ) {
4787
5320
  // If we're in a position where we can accept a : at the end of an
4788
5321
  // identifier, then we'll optionally accept it.
@@ -4798,7 +5331,7 @@ lex_identifier(yp_parser_t *parser, bool previous_command_start) {
4798
5331
  }
4799
5332
 
4800
5333
  return YP_TOKEN_IDENTIFIER;
4801
- } else if (lex_state_p(parser, YP_LEX_STATE_FNAME) && peek_at(parser, 1) != '~' && peek_at(parser, 1) != '>' && (peek_at(parser, 1) != '=' || peek_at(parser, 2) == '>') && match(parser, '=')) {
5334
+ } else if (lex_state_p(parser, YP_LEX_STATE_FNAME) && peek_offset(parser, 1) != '~' && peek_offset(parser, 1) != '>' && (peek_offset(parser, 1) != '=' || peek_offset(parser, 2) == '>') && match(parser, '=')) {
4802
5335
  // If we're in a position where we can accept a = at the end of an
4803
5336
  // identifier, then we'll optionally accept it.
4804
5337
  return YP_TOKEN_IDENTIFIER;
@@ -4806,7 +5339,7 @@ lex_identifier(yp_parser_t *parser, bool previous_command_start) {
4806
5339
 
4807
5340
  if (
4808
5341
  ((lex_state_p(parser, YP_LEX_STATE_LABEL | YP_LEX_STATE_ENDFN) && !previous_command_start) || lex_state_arg_p(parser)) &&
4809
- peek(parser) == ':' && peek_at(parser, 1) != ':'
5342
+ peek(parser) == ':' && peek_offset(parser, 1) != ':'
4810
5343
  ) {
4811
5344
  // If we're in a position where we can accept a : at the end of an
4812
5345
  // identifier, then we'll optionally accept it.
@@ -4907,7 +5440,7 @@ current_token_starts_line(yp_parser_t *parser) {
4907
5440
  // this token type.
4908
5441
  //
4909
5442
  static yp_token_type_t
4910
- lex_interpolation(yp_parser_t *parser, const char *pound) {
5443
+ lex_interpolation(yp_parser_t *parser, const uint8_t *pound) {
4911
5444
  // If there is no content following this #, then we're at the end of
4912
5445
  // the string and we can safely return string content.
4913
5446
  if (pound + 1 >= parser->end) {
@@ -4928,7 +5461,7 @@ lex_interpolation(yp_parser_t *parser, const char *pound) {
4928
5461
 
4929
5462
  // If we're looking at a @ and there's another @, then we'll skip past the
4930
5463
  // second @.
4931
- const char *variable = pound + 2;
5464
+ const uint8_t *variable = pound + 2;
4932
5465
  if (*variable == '@' && pound + 3 < parser->end) variable++;
4933
5466
 
4934
5467
  if (char_is_identifier_start(parser, variable)) {
@@ -4964,7 +5497,7 @@ lex_interpolation(yp_parser_t *parser, const char *pound) {
4964
5497
  // This is the character that we're going to check to see if it is the
4965
5498
  // start of an identifier that would indicate that this is a global
4966
5499
  // variable.
4967
- const char *check = pound + 2;
5500
+ const uint8_t *check = pound + 2;
4968
5501
 
4969
5502
  if (pound[2] == '-') {
4970
5503
  if (pound + 3 >= parser->end) {
@@ -5074,7 +5607,7 @@ lex_question_mark(yp_parser_t *parser) {
5074
5607
 
5075
5608
  if (parser->current.start[1] == '\\') {
5076
5609
  lex_state_set(parser, YP_LEX_STATE_END);
5077
- parser->current.end += yp_unescape_calculate_difference(parser->current.start + 1, parser->end, YP_UNESCAPE_ALL, true, &parser->error_list);
5610
+ parser->current.end += yp_unescape_calculate_difference(parser, parser->current.start + 1, YP_UNESCAPE_ALL, true);
5078
5611
  return YP_TOKEN_CHARACTER_LITERAL;
5079
5612
  } else {
5080
5613
  size_t encoding_width = parser->encoding.char_width(parser->current.end, parser->end - parser->current.end);
@@ -5083,7 +5616,7 @@ lex_question_mark(yp_parser_t *parser) {
5083
5616
  // an underscore. We check for this case
5084
5617
  if (
5085
5618
  !(parser->encoding.alnum_char(parser->current.end, parser->end - parser->current.end) ||
5086
- *parser->current.end == '_') ||
5619
+ peek(parser) == '_') ||
5087
5620
  (
5088
5621
  (parser->current.end + encoding_width >= parser->end) ||
5089
5622
  !char_is_identifier(parser, parser->current.end + encoding_width)
@@ -5155,7 +5688,7 @@ parser_comment(yp_parser_t *parser, yp_comment_type_t type) {
5155
5688
  static yp_token_type_t
5156
5689
  lex_embdoc(yp_parser_t *parser) {
5157
5690
  // First, lex out the EMBDOC_BEGIN token.
5158
- const char *newline = next_newline(parser->current.end, parser->end - parser->current.end);
5691
+ const uint8_t *newline = next_newline(parser->current.end, parser->end - parser->current.end);
5159
5692
 
5160
5693
  if (newline == NULL) {
5161
5694
  parser->current.end = parser->end;
@@ -5178,9 +5711,9 @@ lex_embdoc(yp_parser_t *parser) {
5178
5711
 
5179
5712
  // If we've hit the end of the embedded documentation then we'll return that
5180
5713
  // token here.
5181
- if (strncmp(parser->current.end, "=end", 4) == 0 &&
5714
+ if (memcmp(parser->current.end, "=end", 4) == 0 &&
5182
5715
  (parser->current.end + 4 == parser->end || yp_char_is_whitespace(parser->current.end[4]))) {
5183
- const char *newline = next_newline(parser->current.end, parser->end - parser->current.end);
5716
+ const uint8_t *newline = next_newline(parser->current.end, parser->end - parser->current.end);
5184
5717
 
5185
5718
  if (newline == NULL) {
5186
5719
  parser->current.end = parser->end;
@@ -5200,7 +5733,7 @@ lex_embdoc(yp_parser_t *parser) {
5200
5733
 
5201
5734
  // Otherwise, we'll parse until the end of the line and return a line of
5202
5735
  // embedded documentation.
5203
- const char *newline = next_newline(parser->current.end, parser->end - parser->current.end);
5736
+ const uint8_t *newline = next_newline(parser->current.end, parser->end - parser->current.end);
5204
5737
 
5205
5738
  if (newline == NULL) {
5206
5739
  parser->current.end = parser->end;
@@ -5299,30 +5832,22 @@ parser_lex(yp_parser_t *parser) {
5299
5832
  space_seen = true;
5300
5833
  break;
5301
5834
  case '\r':
5302
- if (peek_at(parser, 1) == '\n') {
5835
+ if (match_eol_offset(parser, 1)) {
5303
5836
  chomping = false;
5304
5837
  } else {
5305
5838
  parser->current.end++;
5306
5839
  space_seen = true;
5307
5840
  }
5308
5841
  break;
5309
- case '\\':
5310
- if (peek_at(parser, 1) == '\n') {
5311
- if (parser->heredoc_end) {
5312
- parser->current.end = parser->heredoc_end;
5313
- parser->heredoc_end = NULL;
5314
- } else {
5315
- yp_newline_list_append(&parser->newline_list, parser->current.end + 1);
5316
- parser->current.end += 2;
5317
- space_seen = true;
5318
- }
5319
- } else if (peek_at(parser, 1) == '\r' && peek_at(parser, 2) == '\n') {
5842
+ case '\\': {
5843
+ size_t eol_length = match_eol_offset(parser, 1);
5844
+ if (eol_length) {
5320
5845
  if (parser->heredoc_end) {
5321
5846
  parser->current.end = parser->heredoc_end;
5322
5847
  parser->heredoc_end = NULL;
5323
5848
  } else {
5324
- yp_newline_list_append(&parser->newline_list, parser->current.end + 2);
5325
- parser->current.end += 3;
5849
+ parser->current.end += eol_length + 1;
5850
+ yp_newline_list_append(&parser->newline_list, parser->current.end - 1);
5326
5851
  space_seen = true;
5327
5852
  }
5328
5853
  } else if (yp_char_is_inline_whitespace(*parser->current.end)) {
@@ -5330,7 +5855,9 @@ parser_lex(yp_parser_t *parser) {
5330
5855
  } else {
5331
5856
  chomping = false;
5332
5857
  }
5858
+
5333
5859
  break;
5860
+ }
5334
5861
  default:
5335
5862
  chomping = false;
5336
5863
  break;
@@ -5340,13 +5867,14 @@ parser_lex(yp_parser_t *parser) {
5340
5867
  // Next, we'll set to start of this token to be the current end.
5341
5868
  parser->current.start = parser->current.end;
5342
5869
 
5343
- // We'll check if we're at the end of the file. If we are, then we need to
5344
- // return the EOF token.
5870
+ // We'll check if we're at the end of the file. If we are, then we
5871
+ // need to return the EOF token.
5345
5872
  if (parser->current.end >= parser->end) {
5346
5873
  LEX(YP_TOKEN_EOF);
5347
5874
  }
5348
5875
 
5349
- // Finally, we'll check the current character to determine the next token.
5876
+ // Finally, we'll check the current character to determine the next
5877
+ // token.
5350
5878
  switch (*parser->current.end++) {
5351
5879
  case '\0': // NUL or end of script
5352
5880
  case '\004': // ^D
@@ -5355,17 +5883,15 @@ parser_lex(yp_parser_t *parser) {
5355
5883
  LEX(YP_TOKEN_EOF);
5356
5884
 
5357
5885
  case '#': { // comments
5358
- const char *ending = next_newline(parser->current.end, parser->end - parser->current.end);
5359
- while (ending && ending < parser->end && *ending != '\n') {
5360
- ending = next_newline(ending + 1, parser->end - ending);
5361
- }
5886
+ const uint8_t *ending = next_newline(parser->current.end, parser->end - parser->current.end);
5362
5887
 
5363
5888
  parser->current.end = ending == NULL ? parser->end : ending + 1;
5364
5889
  parser->current.type = YP_TOKEN_COMMENT;
5365
5890
  parser_lex_callback(parser);
5366
5891
 
5367
- // If we found a comment while lexing, then we're going to add it to the
5368
- // list of comments in the file and keep lexing.
5892
+ // If we found a comment while lexing, then we're going to
5893
+ // add it to the list of comments in the file and keep
5894
+ // lexing.
5369
5895
  yp_comment_t *comment = parser_comment(parser, YP_COMMENT_INLINE);
5370
5896
  yp_list_append(&parser->comment_list, (yp_list_node_t *) comment);
5371
5897
 
@@ -5376,21 +5902,29 @@ parser_lex(yp_parser_t *parser) {
5376
5902
  lexed_comment = true;
5377
5903
  }
5378
5904
  /* fallthrough */
5379
- case '\r': {
5380
- // The only way you can have carriage returns in this particular loop
5381
- // is if you have a carriage return followed by a newline. In that
5382
- // case we'll just skip over the carriage return and continue lexing,
5383
- // in order to make it so that the newline token encapsulates both the
5384
- // carriage return and the newline. Note that we need to check that
5385
- // we haven't already lexed a comment here because that falls through
5386
- // into here as well.
5387
- if (!lexed_comment) parser->current.end++;
5388
- }
5389
- /* fallthrough */
5905
+ case '\r':
5390
5906
  case '\n': {
5391
- if (parser->heredoc_end == NULL) {
5392
- yp_newline_list_append(&parser->newline_list, parser->current.end - 1);
5393
- } else {
5907
+ size_t eol_length = match_eol_at(parser, parser->current.end - 1);
5908
+ if (eol_length) {
5909
+ // The only way you can have carriage returns in this
5910
+ // particular loop is if you have a carriage return
5911
+ // followed by a newline. In that case we'll just skip
5912
+ // over the carriage return and continue lexing, in
5913
+ // order to make it so that the newline token
5914
+ // encapsulates both the carriage return and the
5915
+ // newline. Note that we need to check that we haven't
5916
+ // already lexed a comment here because that falls
5917
+ // through into here as well.
5918
+ if (!lexed_comment) {
5919
+ parser->current.end += eol_length - 1; // skip CR
5920
+ }
5921
+
5922
+ if (parser->heredoc_end == NULL) {
5923
+ yp_newline_list_append(&parser->newline_list, parser->current.end - 1);
5924
+ }
5925
+ }
5926
+
5927
+ if (parser->heredoc_end) {
5394
5928
  parser_flush_heredoc_end(parser);
5395
5929
  }
5396
5930
 
@@ -5418,7 +5952,7 @@ parser_lex(yp_parser_t *parser) {
5418
5952
  // (either . or &.) that starts the next line. If there is, then this
5419
5953
  // is going to become an ignored newline and we're going to instead
5420
5954
  // return the call operator.
5421
- const char *next_content = parser->next_start == NULL ? parser->current.end : parser->next_start;
5955
+ const uint8_t *next_content = parser->next_start == NULL ? parser->current.end : parser->next_start;
5422
5956
  next_content += yp_strspn_inline_whitespace(next_content, parser->end - next_content);
5423
5957
 
5424
5958
  if (next_content < parser->end) {
@@ -5429,15 +5963,15 @@ parser_lex(yp_parser_t *parser) {
5429
5963
  // Otherwise we'll return a regular newline.
5430
5964
  if (next_content[0] == '#') {
5431
5965
  // Here we look for a "." or "&." following a "\n".
5432
- const char *following = next_newline(next_content, parser->end - next_content);
5966
+ const uint8_t *following = next_newline(next_content, parser->end - next_content);
5433
5967
 
5434
- while (following && (following < parser->end)) {
5968
+ while (following && (following + 1 < parser->end)) {
5435
5969
  following++;
5436
5970
  following += yp_strspn_inline_whitespace(following, parser->end - following);
5437
5971
 
5438
5972
  // If this is not followed by a comment, then we can break out
5439
5973
  // of this loop.
5440
- if (*following != '#') break;
5974
+ if (peek_at(parser, following) != '#') break;
5441
5975
 
5442
5976
  // If there is a comment, then we need to find the end of the
5443
5977
  // comment and continue searching from there.
@@ -5446,7 +5980,13 @@ parser_lex(yp_parser_t *parser) {
5446
5980
 
5447
5981
  // If the lex state was ignored, or we hit a '.' or a '&.',
5448
5982
  // we will lex the ignored newline
5449
- if (lex_state_ignored_p(parser) || (following && ((following[0] == '.') || (following + 1 < parser->end && following[0] == '&' && following[1] == '.')))) {
5983
+ if (
5984
+ lex_state_ignored_p(parser) ||
5985
+ (following && (
5986
+ (peek_at(parser, following) == '.') ||
5987
+ (peek_at(parser, following) == '&' && peek_at(parser, following + 1) == '.')
5988
+ ))
5989
+ ) {
5450
5990
  if (!lexed_comment) parser_lex_ignored_newline(parser);
5451
5991
  lexed_comment = false;
5452
5992
  goto lex_next_token;
@@ -5459,7 +5999,7 @@ parser_lex(yp_parser_t *parser) {
5459
5999
  // To match ripper, we need to emit an ignored newline even though
5460
6000
  // its a real newline in the case that we have a beginless range
5461
6001
  // on a subsequent line.
5462
- if ((next_content + 1 < parser->end) && (next_content[1] == '.')) {
6002
+ if (peek_at(parser, next_content + 1) == '.') {
5463
6003
  if (!lexed_comment) parser_lex_ignored_newline(parser);
5464
6004
  lex_state_set(parser, YP_LEX_STATE_BEG);
5465
6005
  parser->command_start = true;
@@ -5477,7 +6017,7 @@ parser_lex(yp_parser_t *parser) {
5477
6017
 
5478
6018
  // If we hit a &. after a newline, then we're in a call chain and
5479
6019
  // we need to return the call operator.
5480
- if (next_content + 1 < parser->end && next_content[0] == '&' && next_content[1] == '.') {
6020
+ if (peek_at(parser, next_content) == '&' && peek_at(parser, next_content + 1) == '.') {
5481
6021
  if (!lexed_comment) parser_lex_ignored_newline(parser);
5482
6022
  lex_state_set(parser, YP_LEX_STATE_DOT);
5483
6023
  parser->current.start = next_content;
@@ -5674,7 +6214,7 @@ parser_lex(yp_parser_t *parser) {
5674
6214
 
5675
6215
  // = => =~ == === =begin
5676
6216
  case '=':
5677
- if (current_token_starts_line(parser) && strncmp(peek_string(parser, 5), "begin", 5) == 0 && yp_char_is_whitespace(peek_at(parser, 5))) {
6217
+ if (current_token_starts_line(parser) && memcmp(peek_string(parser, 5), "begin", 5) == 0 && yp_char_is_whitespace(peek_offset(parser, 5))) {
5678
6218
  yp_token_type_t type = lex_embdoc(parser);
5679
6219
 
5680
6220
  if (type == YP_TOKEN_EOF) {
@@ -5712,7 +6252,7 @@ parser_lex(yp_parser_t *parser) {
5712
6252
  !lex_state_end_p(parser) &&
5713
6253
  (!lex_state_p(parser, YP_LEX_STATE_ARG_ANY) || lex_state_p(parser, YP_LEX_STATE_LABELED) || space_seen)
5714
6254
  ) {
5715
- const char *end = parser->current.end;
6255
+ const uint8_t *end = parser->current.end;
5716
6256
 
5717
6257
  yp_heredoc_quote_t quote = YP_HEREDOC_QUOTE_NONE;
5718
6258
  yp_heredoc_indent_t indent = YP_HEREDOC_INDENT_NONE;
@@ -5734,7 +6274,7 @@ parser_lex(yp_parser_t *parser) {
5734
6274
  quote = YP_HEREDOC_QUOTE_SINGLE;
5735
6275
  }
5736
6276
 
5737
- const char *ident_start = parser->current.end;
6277
+ const uint8_t *ident_start = parser->current.end;
5738
6278
  size_t width = 0;
5739
6279
 
5740
6280
  if (parser->current.end >= parser->end) {
@@ -5757,7 +6297,7 @@ parser_lex(yp_parser_t *parser) {
5757
6297
  }
5758
6298
 
5759
6299
  size_t ident_length = (size_t) (parser->current.end - ident_start);
5760
- if (quote != YP_HEREDOC_QUOTE_NONE && !match(parser, (char) quote)) {
6300
+ if (quote != YP_HEREDOC_QUOTE_NONE && !match(parser, (uint8_t) quote)) {
5761
6301
  // TODO: handle unterminated heredoc
5762
6302
  }
5763
6303
 
@@ -5773,7 +6313,7 @@ parser_lex(yp_parser_t *parser) {
5773
6313
  });
5774
6314
 
5775
6315
  if (parser->heredoc_end == NULL) {
5776
- const char *body_start = next_newline(parser->current.end, parser->end - parser->current.end);
6316
+ const uint8_t *body_start = next_newline(parser->current.end, parser->end - parser->current.end);
5777
6317
 
5778
6318
  if (body_start == NULL) {
5779
6319
  // If there is no newline after the heredoc identifier, then
@@ -6098,13 +6638,13 @@ parser_lex(yp_parser_t *parser) {
6098
6638
  LEX(YP_TOKEN_COLON_COLON);
6099
6639
  }
6100
6640
 
6101
- if (lex_state_end_p(parser) || yp_char_is_whitespace(*parser->current.end) || (*parser->current.end == '#')) {
6641
+ if (lex_state_end_p(parser) || yp_char_is_whitespace(peek(parser)) || peek(parser) == '#') {
6102
6642
  lex_state_set(parser, YP_LEX_STATE_BEG);
6103
6643
  LEX(YP_TOKEN_COLON);
6104
6644
  }
6105
6645
 
6106
- if ((*parser->current.end == '"') || (*parser->current.end == '\'')) {
6107
- lex_mode_push_string(parser, *parser->current.end == '"', false, '\0', *parser->current.end);
6646
+ if (peek(parser) == '"' || peek(parser) == '\'') {
6647
+ lex_mode_push_string(parser, peek(parser) == '"', false, '\0', *parser->current.end);
6108
6648
  parser->current.end++;
6109
6649
  }
6110
6650
 
@@ -6173,25 +6713,26 @@ parser_lex(yp_parser_t *parser) {
6173
6713
  }
6174
6714
  else if(
6175
6715
  lex_state_beg_p(parser) ||
6176
- (lex_state_p(parser, YP_LEX_STATE_FITEM) && (*parser->current.end == 's')) ||
6716
+ (lex_state_p(parser, YP_LEX_STATE_FITEM) && (peek(parser) == 's')) ||
6177
6717
  lex_state_spcarg_p(parser, space_seen)
6178
6718
  ) {
6179
6719
  if (!parser->encoding.alnum_char(parser->current.end, parser->end - parser->current.end)) {
6180
6720
  lex_mode_push_string(parser, true, false, lex_mode_incrementor(*parser->current.end), lex_mode_terminator(*parser->current.end));
6181
6721
 
6182
- if (*parser->current.end == '\r') {
6722
+ size_t eol_length = match_eol(parser);
6723
+ if (eol_length) {
6724
+ parser->current.end += eol_length;
6725
+ yp_newline_list_append(&parser->newline_list, parser->current.end - 1);
6726
+ } else {
6183
6727
  parser->current.end++;
6184
6728
  }
6185
6729
 
6186
- if (*parser->current.end == '\n') {
6187
- yp_newline_list_append(&parser->newline_list, parser->current.end);
6730
+ if (parser->current.end < parser->end) {
6731
+ LEX(YP_TOKEN_STRING_BEGIN);
6188
6732
  }
6189
-
6190
- parser->current.end++;
6191
- LEX(YP_TOKEN_STRING_BEGIN);
6192
6733
  }
6193
6734
 
6194
- switch (*parser->current.end) {
6735
+ switch (peek(parser)) {
6195
6736
  case 'i': {
6196
6737
  parser->current.end++;
6197
6738
 
@@ -6215,6 +6756,7 @@ parser_lex(yp_parser_t *parser) {
6215
6756
 
6216
6757
  if (parser->current.end < parser->end) {
6217
6758
  lex_mode_push_regexp(parser, lex_mode_incrementor(*parser->current.end), lex_mode_terminator(*parser->current.end));
6759
+ yp_newline_list_check_append(&parser->newline_list, parser->current.end);
6218
6760
  parser->current.end++;
6219
6761
  }
6220
6762
 
@@ -6225,6 +6767,7 @@ parser_lex(yp_parser_t *parser) {
6225
6767
 
6226
6768
  if (parser->current.end < parser->end) {
6227
6769
  lex_mode_push_string(parser, false, false, lex_mode_incrementor(*parser->current.end), lex_mode_terminator(*parser->current.end));
6770
+ yp_newline_list_check_append(&parser->newline_list, parser->current.end);
6228
6771
  parser->current.end++;
6229
6772
  }
6230
6773
 
@@ -6235,6 +6778,7 @@ parser_lex(yp_parser_t *parser) {
6235
6778
 
6236
6779
  if (parser->current.end < parser->end) {
6237
6780
  lex_mode_push_string(parser, true, false, lex_mode_incrementor(*parser->current.end), lex_mode_terminator(*parser->current.end));
6781
+ yp_newline_list_check_append(&parser->newline_list, parser->current.end);
6238
6782
  parser->current.end++;
6239
6783
  }
6240
6784
 
@@ -6284,7 +6828,7 @@ parser_lex(yp_parser_t *parser) {
6284
6828
  // unparseable. In this case we'll just drop it from the parser
6285
6829
  // and skip past it and hope that the next token is something
6286
6830
  // that we can parse.
6287
- yp_diagnostic_list_append(&parser->error_list, parser->current.start, parser->current.end, "invalid %% token");
6831
+ yp_diagnostic_list_append(&parser->error_list, parser->current.start, parser->current.end, "Invalid %% token");
6288
6832
  goto lex_next_token;
6289
6833
  }
6290
6834
  }
@@ -6335,9 +6879,10 @@ parser_lex(yp_parser_t *parser) {
6335
6879
  if (
6336
6880
  ((parser->current.end - parser->current.start) == 7) &&
6337
6881
  current_token_starts_line(parser) &&
6338
- (strncmp(parser->current.start, "__END__", 7) == 0) &&
6339
- (parser->current.end == parser->end || *parser->current.end == '\n' || (*parser->current.end == '\r' && parser->current.end[1] == '\n'))
6340
- ) {
6882
+ (memcmp(parser->current.start, "__END__", 7) == 0) &&
6883
+ (parser->current.end == parser->end || match_eol(parser))
6884
+ )
6885
+ {
6341
6886
  parser->current.end = parser->end;
6342
6887
  parser->current.type = YP_TOKEN___END__;
6343
6888
  parser_lex_callback(parser);
@@ -6394,7 +6939,7 @@ parser_lex(yp_parser_t *parser) {
6394
6939
 
6395
6940
  if ((whitespace = yp_strspn_whitespace_newlines(parser->current.end, parser->end - parser->current.end, &parser->newline_list, should_stop)) > 0) {
6396
6941
  parser->current.end += whitespace;
6397
- if (parser->current.end[-1] == '\n') {
6942
+ if (peek_offset(parser, -1) == '\n') {
6398
6943
  // mutates next_start
6399
6944
  parser_flush_heredoc_end(parser);
6400
6945
  }
@@ -6410,8 +6955,8 @@ parser_lex(yp_parser_t *parser) {
6410
6955
  // Here we'll get a list of the places where strpbrk should break,
6411
6956
  // and then find the first one.
6412
6957
  yp_lex_mode_t *lex_mode = parser->lex_modes.current;
6413
- const char *breakpoints = lex_mode->as.list.breakpoints;
6414
- const char *breakpoint = yp_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end);
6958
+ const uint8_t *breakpoints = lex_mode->as.list.breakpoints;
6959
+ const uint8_t *breakpoint = yp_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end);
6415
6960
 
6416
6961
  while (breakpoint != NULL) {
6417
6962
  // If we hit a null byte, skip directly past it.
@@ -6458,12 +7003,25 @@ parser_lex(yp_parser_t *parser) {
6458
7003
  // and find the next breakpoint.
6459
7004
  if (*breakpoint == '\\') {
6460
7005
  yp_unescape_type_t unescape_type = lex_mode->as.list.interpolation ? YP_UNESCAPE_ALL : YP_UNESCAPE_MINIMAL;
6461
- size_t difference = yp_unescape_calculate_difference(breakpoint, parser->end, unescape_type, false, &parser->error_list);
7006
+ size_t difference = yp_unescape_calculate_difference(parser, breakpoint, unescape_type, false);
7007
+ if (difference == 0) {
7008
+ // we're at the end of the file
7009
+ breakpoint = NULL;
7010
+ continue;
7011
+ }
6462
7012
 
6463
- // If the result is an escaped newline, then we need to
6464
- // track that newline.
7013
+ // If the result is an escaped newline ...
6465
7014
  if (breakpoint[difference - 1] == '\n') {
6466
- yp_newline_list_append(&parser->newline_list, breakpoint + difference - 1);
7015
+ if (parser->heredoc_end) {
7016
+ // ... if we are on the same line as a heredoc, flush the heredoc and
7017
+ // continue parsing after heredoc_end.
7018
+ parser->current.end = breakpoint + difference;
7019
+ parser_flush_heredoc_end(parser);
7020
+ LEX(YP_TOKEN_STRING_CONTENT);
7021
+ } else {
7022
+ // ... else track the newline.
7023
+ yp_newline_list_append(&parser->newline_list, breakpoint + difference - 1);
7024
+ }
6467
7025
  }
6468
7026
 
6469
7027
  breakpoint = yp_strpbrk(parser, breakpoint + difference, breakpoints, parser->end - (breakpoint + difference));
@@ -6499,7 +7057,13 @@ parser_lex(yp_parser_t *parser) {
6499
7057
 
6500
7058
  case YP_LEX_REGEXP: {
6501
7059
  // First, we'll set to start of this token to be the current end.
6502
- parser->current.start = parser->current.end;
7060
+ if (parser->next_start == NULL) {
7061
+ parser->current.start = parser->current.end;
7062
+ } else {
7063
+ parser->current.start = parser->next_start;
7064
+ parser->current.end = parser->next_start;
7065
+ parser->next_start = NULL;
7066
+ }
6503
7067
 
6504
7068
  // We'll check if we're at the end of the file. If we are, then we need to
6505
7069
  // return the EOF token.
@@ -6513,8 +7077,8 @@ parser_lex(yp_parser_t *parser) {
6513
7077
  // These are the places where we need to split up the content of the
6514
7078
  // regular expression. We'll use strpbrk to find the first of these
6515
7079
  // characters.
6516
- const char *breakpoints = lex_mode->as.regexp.breakpoints;
6517
- const char *breakpoint = yp_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end);
7080
+ const uint8_t *breakpoints = lex_mode->as.regexp.breakpoints;
7081
+ const uint8_t *breakpoint = yp_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end);
6518
7082
 
6519
7083
  while (breakpoint != NULL) {
6520
7084
  // If we hit a null byte, skip directly past it.
@@ -6526,7 +7090,16 @@ parser_lex(yp_parser_t *parser) {
6526
7090
  // If we've hit a newline, then we need to track that in the
6527
7091
  // list of newlines.
6528
7092
  if (*breakpoint == '\n') {
6529
- yp_newline_list_append(&parser->newline_list, breakpoint);
7093
+ // For the special case of a newline-terminated regular expression, we will pass
7094
+ // through this branch twice -- once with YP_TOKEN_REGEXP_BEGIN and then again
7095
+ // with YP_TOKEN_STRING_CONTENT. Let's avoid tracking the newline twice, by
7096
+ // tracking it only in the REGEXP_BEGIN case.
7097
+ if (
7098
+ !(lex_mode->as.regexp.terminator == '\n' && parser->current.type != YP_TOKEN_REGEXP_BEGIN)
7099
+ && parser->heredoc_end == NULL
7100
+ ) {
7101
+ yp_newline_list_append(&parser->newline_list, breakpoint);
7102
+ }
6530
7103
 
6531
7104
  if (lex_mode->as.regexp.terminator != '\n') {
6532
7105
  // If the terminator is not a newline, then we can set
@@ -6567,12 +7140,25 @@ parser_lex(yp_parser_t *parser) {
6567
7140
  // literally. In this case we'll skip past the next character
6568
7141
  // and find the next breakpoint.
6569
7142
  if (*breakpoint == '\\') {
6570
- size_t difference = yp_unescape_calculate_difference(breakpoint, parser->end, YP_UNESCAPE_ALL, false, &parser->error_list);
7143
+ size_t difference = yp_unescape_calculate_difference(parser, breakpoint, YP_UNESCAPE_ALL, false);
7144
+ if (difference == 0) {
7145
+ // we're at the end of the file
7146
+ breakpoint = NULL;
7147
+ continue;
7148
+ }
6571
7149
 
6572
- // If the result is an escaped newline, then we need to
6573
- // track that newline.
7150
+ // If the result is an escaped newline ...
6574
7151
  if (breakpoint[difference - 1] == '\n') {
6575
- yp_newline_list_append(&parser->newline_list, breakpoint + difference - 1);
7152
+ if (parser->heredoc_end) {
7153
+ // ... if we are on the same line as a heredoc, flush the heredoc and
7154
+ // continue parsing after heredoc_end.
7155
+ parser->current.end = breakpoint + difference;
7156
+ parser_flush_heredoc_end(parser);
7157
+ LEX(YP_TOKEN_STRING_CONTENT);
7158
+ } else {
7159
+ // ... else track the newline.
7160
+ yp_newline_list_append(&parser->newline_list, breakpoint + difference - 1);
7161
+ }
6576
7162
  }
6577
7163
 
6578
7164
  breakpoint = yp_strpbrk(parser, breakpoint + difference, breakpoints, parser->end - (breakpoint + difference));
@@ -6624,8 +7210,8 @@ parser_lex(yp_parser_t *parser) {
6624
7210
 
6625
7211
  // These are the places where we need to split up the content of the
6626
7212
  // string. We'll use strpbrk to find the first of these characters.
6627
- const char *breakpoints = parser->lex_modes.current->as.string.breakpoints;
6628
- const char *breakpoint = yp_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end);
7213
+ const uint8_t *breakpoints = parser->lex_modes.current->as.string.breakpoints;
7214
+ const uint8_t *breakpoint = yp_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end);
6629
7215
 
6630
7216
  while (breakpoint != NULL) {
6631
7217
  // If we hit the incrementor, then we'll increment then nesting and
@@ -6660,21 +7246,18 @@ parser_lex(yp_parser_t *parser) {
6660
7246
 
6661
7247
  // Otherwise we need to switch back to the parent lex mode and
6662
7248
  // return the end of the string.
6663
- if (*parser->current.end == '\r' && parser->current.end + 1 < parser->end && parser->current.end[1] == '\n') {
6664
- parser->current.end = breakpoint + 2;
6665
- yp_newline_list_append(&parser->newline_list, breakpoint + 1);
7249
+ size_t eol_length = match_eol_at(parser, breakpoint);
7250
+ if (eol_length) {
7251
+ parser->current.end = breakpoint + eol_length;
7252
+ yp_newline_list_append(&parser->newline_list, parser->current.end - 1);
6666
7253
  } else {
6667
- if (*parser->current.end == '\n') {
6668
- yp_newline_list_append(&parser->newline_list, parser->current.end);
6669
- }
6670
-
6671
7254
  parser->current.end = breakpoint + 1;
6672
7255
  }
6673
7256
 
6674
7257
  if (
6675
7258
  parser->lex_modes.current->as.string.label_allowed &&
6676
7259
  (peek(parser) == ':') &&
6677
- (peek_at(parser, 1) != ':')
7260
+ (peek_offset(parser, 1) != ':')
6678
7261
  ) {
6679
7262
  parser->current.end++;
6680
7263
  lex_state_set(parser, YP_LEX_STATE_ARG | YP_LEX_STATE_LABELED);
@@ -6712,12 +7295,25 @@ parser_lex(yp_parser_t *parser) {
6712
7295
  // literally. In this case we'll skip past the next character and
6713
7296
  // find the next breakpoint.
6714
7297
  yp_unescape_type_t unescape_type = parser->lex_modes.current->as.string.interpolation ? YP_UNESCAPE_ALL : YP_UNESCAPE_MINIMAL;
6715
- size_t difference = yp_unescape_calculate_difference(breakpoint, parser->end, unescape_type, false, &parser->error_list);
7298
+ size_t difference = yp_unescape_calculate_difference(parser, breakpoint, unescape_type, false);
7299
+ if (difference == 0) {
7300
+ // we're at the end of the file
7301
+ breakpoint = NULL;
7302
+ break;
7303
+ }
6716
7304
 
6717
- // If the result is an escaped newline, then we need to
6718
- // track that newline.
7305
+ // If the result is an escaped newline ...
6719
7306
  if (breakpoint[difference - 1] == '\n') {
6720
- yp_newline_list_append(&parser->newline_list, breakpoint + difference - 1);
7307
+ if (parser->heredoc_end) {
7308
+ // ... if we are on the same line as a heredoc, flush the heredoc and
7309
+ // continue parsing after heredoc_end.
7310
+ parser->current.end = breakpoint + difference;
7311
+ parser_flush_heredoc_end(parser);
7312
+ LEX(YP_TOKEN_STRING_CONTENT);
7313
+ } else {
7314
+ // ... else track the newline.
7315
+ yp_newline_list_append(&parser->newline_list, breakpoint + difference - 1);
7316
+ }
6721
7317
  }
6722
7318
 
6723
7319
  breakpoint = yp_strpbrk(parser, breakpoint + difference, breakpoints, parser->end - (breakpoint + difference));
@@ -6765,27 +7361,25 @@ parser_lex(yp_parser_t *parser) {
6765
7361
 
6766
7362
  // Now let's grab the information about the identifier off of the current
6767
7363
  // lex mode.
6768
- const char *ident_start = parser->lex_modes.current->as.heredoc.ident_start;
7364
+ const uint8_t *ident_start = parser->lex_modes.current->as.heredoc.ident_start;
6769
7365
  size_t ident_length = parser->lex_modes.current->as.heredoc.ident_length;
6770
7366
 
6771
7367
  // If we are immediately following a newline and we have hit the
6772
7368
  // terminator, then we need to return the ending of the heredoc.
6773
- if (parser->current.start[-1] == '\n') {
6774
- const char *start = parser->current.start;
7369
+ if (current_token_starts_line(parser)) {
7370
+ const uint8_t *start = parser->current.start;
6775
7371
  if (parser->lex_modes.current->as.heredoc.indent != YP_HEREDOC_INDENT_NONE) {
6776
7372
  start += yp_strspn_inline_whitespace(start, parser->end - start);
6777
7373
  }
6778
7374
 
6779
- if ((start + ident_length <= parser->end) && (strncmp(start, ident_start, ident_length) == 0)) {
7375
+ if ((start + ident_length <= parser->end) && (memcmp(start, ident_start, ident_length) == 0)) {
6780
7376
  bool matched = true;
6781
7377
  bool at_end = false;
6782
7378
 
6783
- if ((start + ident_length < parser->end) && (start[ident_length] == '\n')) {
6784
- parser->current.end = start + ident_length + 1;
6785
- yp_newline_list_append(&parser->newline_list, start + ident_length);
6786
- } else if ((start + ident_length + 1 < parser->end) && (start[ident_length] == '\r') && (start[ident_length + 1] == '\n')) {
6787
- parser->current.end = start + ident_length + 2;
6788
- yp_newline_list_append(&parser->newline_list, start + ident_length + 1);
7379
+ size_t eol_length = match_eol_at(parser, start + ident_length);
7380
+ if (eol_length) {
7381
+ parser->current.end = start + ident_length + eol_length;
7382
+ yp_newline_list_append(&parser->newline_list, parser->current.end - 1);
6789
7383
  } else if (parser->end == (start + ident_length)) {
6790
7384
  parser->current.end = start + ident_length;
6791
7385
  at_end = true;
@@ -6813,14 +7407,14 @@ parser_lex(yp_parser_t *parser) {
6813
7407
  // Otherwise we'll be parsing string content. These are the places where
6814
7408
  // we need to split up the content of the heredoc. We'll use strpbrk to
6815
7409
  // find the first of these characters.
6816
- char breakpoints[] = "\n\\#";
7410
+ uint8_t breakpoints[] = "\n\\#";
6817
7411
 
6818
7412
  yp_heredoc_quote_t quote = parser->lex_modes.current->as.heredoc.quote;
6819
7413
  if (quote == YP_HEREDOC_QUOTE_SINGLE) {
6820
7414
  breakpoints[2] = '\0';
6821
7415
  }
6822
7416
 
6823
- const char *breakpoint = yp_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end);
7417
+ const uint8_t *breakpoint = yp_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end);
6824
7418
 
6825
7419
  while (breakpoint != NULL) {
6826
7420
  switch (*breakpoint) {
@@ -6837,7 +7431,7 @@ parser_lex(yp_parser_t *parser) {
6837
7431
 
6838
7432
  yp_newline_list_append(&parser->newline_list, breakpoint);
6839
7433
 
6840
- const char *start = breakpoint + 1;
7434
+ const uint8_t *start = breakpoint + 1;
6841
7435
  if (parser->lex_modes.current->as.heredoc.indent != YP_HEREDOC_INDENT_NONE) {
6842
7436
  start += yp_strspn_inline_whitespace(start, parser->end - start);
6843
7437
  }
@@ -6848,21 +7442,12 @@ parser_lex(yp_parser_t *parser) {
6848
7442
  // again and return the end of the heredoc.
6849
7443
  if (
6850
7444
  (start + ident_length <= parser->end) &&
6851
- (strncmp(start, ident_start, ident_length) == 0)
7445
+ (memcmp(start, ident_start, ident_length) == 0)
6852
7446
  ) {
6853
- // Heredoc terminators must be followed by a newline or EOF to be valid.
6854
- if (start + ident_length == parser->end || start[ident_length] == '\n') {
6855
- parser->current.end = breakpoint + 1;
6856
- LEX(YP_TOKEN_STRING_CONTENT);
6857
- }
6858
-
6859
- // They can also be followed by a carriage return and then a
6860
- // newline. Be sure here that we don't accidentally read off the
6861
- // end.
7447
+ // Heredoc terminators must be followed by a newline, CRLF, or EOF to be valid.
6862
7448
  if (
6863
- (start + ident_length + 1 < parser->end) &&
6864
- (start[ident_length] == '\r') &&
6865
- (start[ident_length + 1] == '\n')
7449
+ start + ident_length == parser->end ||
7450
+ match_eol_at(parser, start + ident_length)
6866
7451
  ) {
6867
7452
  parser->current.end = breakpoint + 1;
6868
7453
  LEX(YP_TOKEN_STRING_CONTENT);
@@ -6881,18 +7466,20 @@ parser_lex(yp_parser_t *parser) {
6881
7466
  // stop looping before the newline and not after the
6882
7467
  // newline so that we can still potentially find the
6883
7468
  // terminator of the heredoc.
6884
- if (breakpoint + 1 < parser->end && breakpoint[1] == '\n') {
6885
- breakpoint++;
6886
- } else if (breakpoint + 2 < parser->end && breakpoint[1] == '\r' && breakpoint[2] == '\n') {
6887
- breakpoint += 2;
7469
+ size_t eol_length = match_eol_at(parser, breakpoint + 1);
7470
+ if (eol_length) {
7471
+ breakpoint += eol_length;
6888
7472
  } else {
6889
7473
  yp_unescape_type_t unescape_type = (quote == YP_HEREDOC_QUOTE_SINGLE) ? YP_UNESCAPE_MINIMAL : YP_UNESCAPE_ALL;
6890
- size_t difference = yp_unescape_calculate_difference(breakpoint, parser->end, unescape_type, false, &parser->error_list);
6891
-
6892
- if (breakpoint[difference - 1] == '\n') {
6893
- yp_newline_list_append(&parser->newline_list, breakpoint + difference - 1);
7474
+ size_t difference = yp_unescape_calculate_difference(parser, breakpoint, unescape_type, false);
7475
+ if (difference == 0) {
7476
+ // we're at the end of the file
7477
+ breakpoint = NULL;
7478
+ break;
6894
7479
  }
6895
7480
 
7481
+ yp_newline_list_check_append(&parser->newline_list, breakpoint + difference - 1);
7482
+
6896
7483
  breakpoint = yp_strpbrk(parser, breakpoint + difference, breakpoints, parser->end - (breakpoint + difference));
6897
7484
  }
6898
7485
 
@@ -6945,7 +7532,7 @@ yp_regular_expression_node_create_and_unescape(yp_parser_t *parser, const yp_tok
6945
7532
  assert((content->end - content->start) >= 0);
6946
7533
  yp_string_shared_init(&node->unescaped, content->start, content->end);
6947
7534
 
6948
- yp_unescape_manipulate_string(parser, &node->unescaped, unescape_type, &parser->error_list);
7535
+ yp_unescape_manipulate_string(parser, &node->unescaped, unescape_type);
6949
7536
  return node;
6950
7537
  }
6951
7538
 
@@ -6956,7 +7543,18 @@ yp_symbol_node_create_and_unescape(yp_parser_t *parser, const yp_token_t *openin
6956
7543
  assert((content->end - content->start) >= 0);
6957
7544
  yp_string_shared_init(&node->unescaped, content->start, content->end);
6958
7545
 
6959
- yp_unescape_manipulate_string(parser, &node->unescaped, unescape_type, &parser->error_list);
7546
+ yp_unescape_manipulate_string(parser, &node->unescaped, unescape_type);
7547
+ return node;
7548
+ }
7549
+
7550
+ static yp_string_node_t *
7551
+ yp_char_literal_node_create_and_unescape(yp_parser_t *parser, const yp_token_t *opening, const yp_token_t *content, const yp_token_t *closing, yp_unescape_type_t unescape_type) {
7552
+ yp_string_node_t *node = yp_string_node_create(parser, opening, content, closing);
7553
+
7554
+ assert((content->end - content->start) >= 0);
7555
+ yp_string_shared_init(&node->unescaped, content->start, content->end);
7556
+
7557
+ yp_unescape_manipulate_char_literal(parser, &node->unescaped, unescape_type);
6960
7558
  return node;
6961
7559
  }
6962
7560
 
@@ -6967,7 +7565,7 @@ yp_string_node_create_and_unescape(yp_parser_t *parser, const yp_token_t *openin
6967
7565
  assert((content->end - content->start) >= 0);
6968
7566
  yp_string_shared_init(&node->unescaped, content->start, content->end);
6969
7567
 
6970
- yp_unescape_manipulate_string(parser, &node->unescaped, unescape_type, &parser->error_list);
7568
+ yp_unescape_manipulate_string(parser, &node->unescaped, unescape_type);
6971
7569
  return node;
6972
7570
  }
6973
7571
 
@@ -6978,7 +7576,7 @@ yp_xstring_node_create_and_unescape(yp_parser_t *parser, const yp_token_t *openi
6978
7576
  assert((content->end - content->start) >= 0);
6979
7577
  yp_string_shared_init(&node->unescaped, content->start, content->end);
6980
7578
 
6981
- yp_unescape_manipulate_string(parser, &node->unescaped, YP_UNESCAPE_ALL, &parser->error_list);
7579
+ yp_unescape_manipulate_string(parser, &node->unescaped, YP_UNESCAPE_ALL);
6982
7580
  return node;
6983
7581
  }
6984
7582
 
@@ -7315,27 +7913,162 @@ token_begins_expression_p(yp_token_type_t type) {
7315
7913
  }
7316
7914
  }
7317
7915
 
7318
- // Parse an expression with the given binding power that may be optionally
7319
- // prefixed by the * operator.
7320
- static yp_node_t *
7321
- parse_starred_expression(yp_parser_t *parser, yp_binding_power_t binding_power, const char *message) {
7322
- if (accept(parser, YP_TOKEN_USTAR)) {
7323
- yp_token_t operator = parser->previous;
7324
- yp_node_t *expression = parse_expression(parser, binding_power, "Expected expression after `*'.");
7325
- return (yp_node_t *) yp_splat_node_create(parser, &operator, expression);
7326
- }
7916
+ // Parse an expression with the given binding power that may be optionally
7917
+ // prefixed by the * operator.
7918
+ static yp_node_t *
7919
+ parse_starred_expression(yp_parser_t *parser, yp_binding_power_t binding_power, const char *message) {
7920
+ if (accept(parser, YP_TOKEN_USTAR)) {
7921
+ yp_token_t operator = parser->previous;
7922
+ yp_node_t *expression = parse_expression(parser, binding_power, "Expected expression after `*'.");
7923
+ return (yp_node_t *) yp_splat_node_create(parser, &operator, expression);
7924
+ }
7925
+
7926
+ return parse_expression(parser, binding_power, message);
7927
+ }
7928
+
7929
+ // Convert the given node into a valid target node.
7930
+ static yp_node_t *
7931
+ parse_target(yp_parser_t *parser, yp_node_t *target) {
7932
+ switch (YP_NODE_TYPE(target)) {
7933
+ case YP_NODE_MISSING_NODE:
7934
+ return target;
7935
+ case YP_NODE_CLASS_VARIABLE_READ_NODE:
7936
+ assert(sizeof(yp_class_variable_target_node_t) == sizeof(yp_class_variable_read_node_t));
7937
+ target->type = YP_NODE_CLASS_VARIABLE_TARGET_NODE;
7938
+ return target;
7939
+ case YP_NODE_CONSTANT_PATH_NODE:
7940
+ assert(sizeof(yp_constant_path_target_node_t) == sizeof(yp_constant_path_node_t));
7941
+ target->type = YP_NODE_CONSTANT_PATH_TARGET_NODE;
7942
+ return target;
7943
+ case YP_NODE_CONSTANT_READ_NODE:
7944
+ assert(sizeof(yp_constant_target_node_t) == sizeof(yp_constant_read_node_t));
7945
+ target->type = YP_NODE_CONSTANT_TARGET_NODE;
7946
+ return target;
7947
+ case YP_NODE_BACK_REFERENCE_READ_NODE:
7948
+ assert(sizeof(yp_global_variable_target_node_t) == sizeof(yp_back_reference_read_node_t));
7949
+ /* fallthrough */
7950
+ case YP_NODE_NUMBERED_REFERENCE_READ_NODE:
7951
+ assert(sizeof(yp_global_variable_target_node_t) == sizeof(yp_numbered_reference_read_node_t));
7952
+ yp_diagnostic_list_append(&parser->error_list, target->location.start, target->location.end, "Can't set variable");
7953
+ /* fallthrough */
7954
+ case YP_NODE_GLOBAL_VARIABLE_READ_NODE:
7955
+ assert(sizeof(yp_global_variable_target_node_t) == sizeof(yp_global_variable_read_node_t));
7956
+ target->type = YP_NODE_GLOBAL_VARIABLE_TARGET_NODE;
7957
+ return target;
7958
+ case YP_NODE_LOCAL_VARIABLE_READ_NODE:
7959
+ assert(sizeof(yp_local_variable_target_node_t) == sizeof(yp_local_variable_read_node_t));
7960
+ target->type = YP_NODE_LOCAL_VARIABLE_TARGET_NODE;
7961
+ return target;
7962
+ case YP_NODE_INSTANCE_VARIABLE_READ_NODE:
7963
+ assert(sizeof(yp_instance_variable_target_node_t) == sizeof(yp_instance_variable_read_node_t));
7964
+ target->type = YP_NODE_INSTANCE_VARIABLE_TARGET_NODE;
7965
+ return target;
7966
+ case YP_NODE_MULTI_WRITE_NODE:
7967
+ return target;
7968
+ case YP_NODE_SPLAT_NODE: {
7969
+ yp_splat_node_t *splat = (yp_splat_node_t *) target;
7970
+
7971
+ if (splat->expression != NULL) {
7972
+ splat->expression = parse_target(parser, splat->expression);
7973
+ }
7974
+
7975
+ yp_token_t operator = not_provided(parser);
7976
+ yp_location_t location = { .start = NULL, .end = NULL };
7977
+
7978
+ yp_multi_write_node_t *multi_write = yp_multi_write_node_create(parser, &operator, NULL, &location, &location);
7979
+ yp_multi_write_node_targets_append(multi_write, (yp_node_t *) splat);
7980
+
7981
+ return (yp_node_t *) multi_write;
7982
+ }
7983
+ case YP_NODE_CALL_NODE: {
7984
+ yp_call_node_t *call = (yp_call_node_t *) target;
7985
+
7986
+ // If we have no arguments to the call node and we need this to be a
7987
+ // target then this is either a method call or a local variable write.
7988
+ if (
7989
+ (call->opening_loc.start == NULL) &&
7990
+ (call->arguments == NULL) &&
7991
+ (call->block == NULL)
7992
+ ) {
7993
+ if (call->receiver == NULL) {
7994
+ // When we get here, we have a local variable write, because it
7995
+ // was previously marked as a method call but now we have an =.
7996
+ // This looks like:
7997
+ //
7998
+ // foo = 1
7999
+ //
8000
+ // When it was parsed in the prefix position, foo was seen as a
8001
+ // method call with no receiver and no arguments. Now we have an
8002
+ // =, so we know it's a local variable write.
8003
+ const yp_location_t message = call->message_loc;
8004
+
8005
+ yp_parser_local_add_location(parser, message.start, message.end);
8006
+ yp_node_destroy(parser, target);
7327
8007
 
7328
- return parse_expression(parser, binding_power, message);
8008
+ const yp_token_t name = { .type = YP_TOKEN_IDENTIFIER, .start = message.start, .end = message.end };
8009
+ target = (yp_node_t *) yp_local_variable_read_node_create(parser, &name, 0);
8010
+
8011
+ assert(sizeof(yp_local_variable_target_node_t) == sizeof(yp_local_variable_read_node_t));
8012
+ target->type = YP_NODE_LOCAL_VARIABLE_TARGET_NODE;
8013
+
8014
+ if (token_is_numbered_parameter(message.start, message.end)) {
8015
+ yp_diagnostic_list_append(&parser->error_list, message.start, message.end, "reserved for numbered parameter");
8016
+ }
8017
+
8018
+ return target;
8019
+ }
8020
+
8021
+ // The method name needs to change. If we previously had foo, we now
8022
+ // need foo=. In this case we'll allocate a new owned string, copy
8023
+ // the previous method name in, and append an =.
8024
+ size_t length = yp_string_length(&call->name);
8025
+
8026
+ uint8_t *name = calloc(length + 1, sizeof(uint8_t));
8027
+ if (name == NULL) return NULL;
8028
+
8029
+ memcpy(name, yp_string_source(&call->name), length);
8030
+ name[length] = '=';
8031
+
8032
+ // Now switch the name to the new string.
8033
+ yp_string_free(&call->name);
8034
+ yp_string_owned_init(&call->name, name, length + 1);
8035
+
8036
+ return target;
8037
+ }
8038
+
8039
+ // If there is no call operator and the message is "[]" then this is
8040
+ // an aref expression, and we can transform it into an aset
8041
+ // expression.
8042
+ if (
8043
+ (call->operator_loc.start == NULL) &&
8044
+ (call->message_loc.start[0] == '[') &&
8045
+ (call->message_loc.end[-1] == ']') &&
8046
+ (call->block == NULL)
8047
+ ) {
8048
+ // Free the previous name and replace it with "[]=".
8049
+ yp_string_free(&call->name);
8050
+ yp_string_constant_init(&call->name, "[]=", 3);
8051
+ return target;
8052
+ }
8053
+ }
8054
+ /* fallthrough */
8055
+ default:
8056
+ // In this case we have a node that we don't know how to convert
8057
+ // into a target. We need to treat it as an error. For now, we'll
8058
+ // mark it as an error and just skip right past it.
8059
+ yp_diagnostic_list_append(&parser->error_list, target->location.start, target->location.end, "Unexpected write target.");
8060
+ return target;
8061
+ }
7329
8062
  }
7330
8063
 
7331
- // Convert the given node into a valid target node.
8064
+ // Convert the given node into a valid write node.
7332
8065
  static yp_node_t *
7333
- parse_target(yp_parser_t *parser, yp_node_t *target, yp_token_t *operator, yp_node_t *value) {
8066
+ parse_write(yp_parser_t *parser, yp_node_t *target, yp_token_t *operator, yp_node_t *value) {
7334
8067
  switch (YP_NODE_TYPE(target)) {
7335
8068
  case YP_NODE_MISSING_NODE:
7336
8069
  return target;
7337
8070
  case YP_NODE_CLASS_VARIABLE_READ_NODE: {
7338
- yp_class_variable_write_node_t *write_node = yp_class_variable_read_node_to_class_variable_write_node(parser, (yp_class_variable_read_node_t *) target, operator, value);
8071
+ yp_class_variable_write_node_t *write_node = yp_class_variable_write_node_create(parser, (yp_class_variable_read_node_t *) target, operator, value);
7339
8072
  yp_node_destroy(parser, target);
7340
8073
  return (yp_node_t *) write_node;
7341
8074
  }
@@ -7360,7 +8093,7 @@ parse_target(yp_parser_t *parser, yp_node_t *target, yp_token_t *operator, yp_no
7360
8093
  case YP_NODE_LOCAL_VARIABLE_READ_NODE: {
7361
8094
  yp_local_variable_read_node_t *local_read = (yp_local_variable_read_node_t *) target;
7362
8095
 
7363
- yp_constant_id_t constant_id = local_read->constant_id;
8096
+ yp_constant_id_t constant_id = local_read->name;
7364
8097
  uint32_t depth = local_read->depth;
7365
8098
 
7366
8099
  yp_location_t name_loc = target->location;
@@ -7377,18 +8110,15 @@ parse_target(yp_parser_t *parser, yp_node_t *target, yp_token_t *operator, yp_no
7377
8110
  yp_multi_write_node_t *multi_write = (yp_multi_write_node_t *) target;
7378
8111
  yp_multi_write_node_operator_loc_set(multi_write, operator);
7379
8112
 
7380
- if (value != NULL) {
7381
- multi_write->value = value;
7382
- multi_write->base.location.end = value->location.end;
7383
- }
7384
-
8113
+ multi_write->value = value;
8114
+ multi_write->base.location.end = value->location.end;
7385
8115
  return (yp_node_t *) multi_write;
7386
8116
  }
7387
8117
  case YP_NODE_SPLAT_NODE: {
7388
8118
  yp_splat_node_t *splat = (yp_splat_node_t *) target;
7389
8119
 
7390
8120
  if (splat->expression != NULL) {
7391
- splat->expression = parse_target(parser, splat->expression, operator, value);
8121
+ splat->expression = parse_write(parser, splat->expression, operator, value);
7392
8122
  }
7393
8123
 
7394
8124
  yp_location_t location = { .start = NULL, .end = NULL };
@@ -7441,22 +8171,21 @@ parse_target(yp_parser_t *parser, yp_node_t *target, yp_token_t *operator, yp_no
7441
8171
  // method call with no arguments. Now we have an =, so we know it's
7442
8172
  // a method call with an argument. In this case we will create the
7443
8173
  // arguments node, parse the argument, and add it to the list.
7444
- if (value) {
7445
- yp_arguments_node_t *arguments = yp_arguments_node_create(parser);
7446
- call->arguments = arguments;
7447
- yp_arguments_node_arguments_append(arguments, value);
7448
- target->location.end = arguments->base.location.end;
7449
- }
8174
+ yp_arguments_node_t *arguments = yp_arguments_node_create(parser);
8175
+ call->arguments = arguments;
8176
+ yp_arguments_node_arguments_append(arguments, value);
8177
+ target->location.end = arguments->base.location.end;
7450
8178
 
7451
8179
  // The method name needs to change. If we previously had foo, we now
7452
8180
  // need foo=. In this case we'll allocate a new owned string, copy
7453
8181
  // the previous method name in, and append an =.
7454
8182
  size_t length = yp_string_length(&call->name);
7455
8183
 
7456
- char *name = calloc(length + 2, sizeof(char));
8184
+ uint8_t *name = calloc(length + 1, sizeof(uint8_t));
7457
8185
  if (name == NULL) return NULL;
7458
8186
 
7459
- snprintf(name, length + 2, "%.*s=", (int) length, yp_string_source(&call->name));
8187
+ memcpy(name, yp_string_source(&call->name), length);
8188
+ name[length] = '=';
7460
8189
 
7461
8190
  // Now switch the name to the new string.
7462
8191
  yp_string_free(&call->name);
@@ -7474,15 +8203,13 @@ parse_target(yp_parser_t *parser, yp_node_t *target, yp_token_t *operator, yp_no
7474
8203
  (call->message_loc.end[-1] == ']') &&
7475
8204
  (call->block == NULL)
7476
8205
  ) {
7477
- if (value != NULL) {
7478
- if (call->arguments == NULL) {
7479
- call->arguments = yp_arguments_node_create(parser);
7480
- }
7481
-
7482
- yp_arguments_node_arguments_append(call->arguments, value);
7483
- target->location.end = value->location.end;
8206
+ if (call->arguments == NULL) {
8207
+ call->arguments = yp_arguments_node_create(parser);
7484
8208
  }
7485
8209
 
8210
+ yp_arguments_node_arguments_append(call->arguments, value);
8211
+ target->location.end = value->location.end;
8212
+
7486
8213
  // Free the previous name and replace it with "[]=".
7487
8214
  yp_string_free(&call->name);
7488
8215
  yp_string_constant_init(&call->name, "[]=", 3);
@@ -7494,9 +8221,7 @@ parse_target(yp_parser_t *parser, yp_node_t *target, yp_token_t *operator, yp_no
7494
8221
  // syntax error. In this case we'll fall through to our default
7495
8222
  // handling. We need to free the value that we parsed because there
7496
8223
  // is no way for us to attach it to the tree at this point.
7497
- if (value != NULL) {
7498
- yp_node_destroy(parser, value);
7499
- }
8224
+ yp_node_destroy(parser, value);
7500
8225
  }
7501
8226
  /* fallthrough */
7502
8227
  default:
@@ -7524,7 +8249,7 @@ parse_targets(yp_parser_t *parser, yp_node_t *first_target, yp_binding_power_t b
7524
8249
  // location that we know requires a multi write, as in the case of a for loop.
7525
8250
  // In this case we will set up the parsing loop slightly differently.
7526
8251
  if (first_target != NULL) {
7527
- first_target = parse_target(parser, first_target, &operator, NULL);
8252
+ first_target = parse_target(parser, first_target);
7528
8253
 
7529
8254
  if (!match_type_p(parser, YP_TOKEN_COMMA)) {
7530
8255
  return first_target;
@@ -7555,9 +8280,8 @@ parse_targets(yp_parser_t *parser, yp_node_t *first_target, yp_binding_power_t b
7555
8280
  yp_node_t *name = NULL;
7556
8281
 
7557
8282
  if (token_begins_expression_p(parser->current.type)) {
7558
- yp_token_t operator = not_provided(parser);
7559
8283
  name = parse_expression(parser, binding_power, "Expected an expression after '*'.");
7560
- name = parse_target(parser, name, &operator, NULL);
8284
+ name = parse_target(parser, name);
7561
8285
  }
7562
8286
 
7563
8287
  yp_node_t *splat = (yp_node_t *) yp_splat_node_create(parser, &star_operator, name);
@@ -7587,6 +8311,8 @@ parse_targets(yp_parser_t *parser, yp_node_t *first_target, yp_binding_power_t b
7587
8311
 
7588
8312
  if (YP_NODE_TYPE_P(child_target, YP_NODE_MULTI_WRITE_NODE)) {
7589
8313
  target = (yp_multi_write_node_t *) child_target;
8314
+ target->base.location.start = lparen.start;
8315
+ target->base.location.end = rparen.end;
7590
8316
  target->lparen_loc = (yp_location_t) { .start = lparen.start, .end = lparen.end };
7591
8317
  target->rparen_loc = (yp_location_t) { .start = rparen.start, .end = rparen.end };
7592
8318
  } else {
@@ -7603,6 +8329,7 @@ parse_targets(yp_parser_t *parser, yp_node_t *first_target, yp_binding_power_t b
7603
8329
  yp_multi_write_node_targets_append(target, child_target);
7604
8330
  }
7605
8331
 
8332
+ target->base.location.start = lparen.start;
7606
8333
  target->base.location.end = rparen.end;
7607
8334
  yp_multi_write_node_targets_append(result, (yp_node_t *) target);
7608
8335
  }
@@ -7625,7 +8352,7 @@ parse_targets(yp_parser_t *parser, yp_node_t *first_target, yp_binding_power_t b
7625
8352
  }
7626
8353
 
7627
8354
  yp_node_t *target = parse_expression(parser, binding_power, "Expected another expression after ','.");
7628
- target = parse_target(parser, target, &operator, NULL);
8355
+ target = parse_target(parser, target);
7629
8356
 
7630
8357
  yp_multi_write_node_targets_append(result, target);
7631
8358
  }
@@ -8085,7 +8812,6 @@ parse_parameters(
8085
8812
  bool looping = true;
8086
8813
 
8087
8814
  yp_do_loop_stack_push(parser, false);
8088
-
8089
8815
  yp_parameters_order_t order = YP_PARAMETERS_ORDER_NONE;
8090
8816
 
8091
8817
  do {
@@ -8377,8 +9103,7 @@ parse_rescues(yp_parser_t *parser, yp_begin_node_t *parent_node) {
8377
9103
  yp_rescue_node_operator_set(rescue, &parser->previous);
8378
9104
 
8379
9105
  yp_node_t *reference = parse_expression(parser, YP_BINDING_POWER_INDEX, "Expected an exception variable after `=>` in rescue statement.");
8380
- yp_token_t operator = not_provided(parser);
8381
- reference = parse_target(parser, reference, &operator, NULL);
9106
+ reference = parse_target(parser, reference);
8382
9107
 
8383
9108
  yp_rescue_node_reference_set(rescue, reference);
8384
9109
  break;
@@ -8408,8 +9133,7 @@ parse_rescues(yp_parser_t *parser, yp_begin_node_t *parent_node) {
8408
9133
  yp_rescue_node_operator_set(rescue, &parser->previous);
8409
9134
 
8410
9135
  yp_node_t *reference = parse_expression(parser, YP_BINDING_POWER_INDEX, "Expected an exception variable after `=>` in rescue statement.");
8411
- yp_token_t operator = not_provided(parser);
8412
- reference = parse_target(parser, reference, &operator, NULL);
9136
+ reference = parse_target(parser, reference);
8413
9137
 
8414
9138
  yp_rescue_node_reference_set(rescue, reference);
8415
9139
  break;
@@ -8426,10 +9150,12 @@ parse_rescues(yp_parser_t *parser, yp_begin_node_t *parent_node) {
8426
9150
  }
8427
9151
 
8428
9152
  if (!match_any_type_p(parser, 3, YP_TOKEN_KEYWORD_ELSE, YP_TOKEN_KEYWORD_ENSURE, YP_TOKEN_KEYWORD_END)) {
9153
+ yp_accepts_block_stack_push(parser, true);
8429
9154
  yp_statements_node_t *statements = parse_statements(parser, YP_CONTEXT_RESCUE);
8430
9155
  if (statements) {
8431
9156
  yp_rescue_node_statements_set(rescue, statements);
8432
9157
  }
9158
+ yp_accepts_block_stack_pop(parser);
8433
9159
  accept_any(parser, 2, YP_TOKEN_NEWLINE, YP_TOKEN_SEMICOLON);
8434
9160
  }
8435
9161
 
@@ -8446,7 +9172,7 @@ parse_rescues(yp_parser_t *parser, yp_begin_node_t *parent_node) {
8446
9172
  // since we won't know the end until we've found all consequent
8447
9173
  // clauses. This sets the end location on all rescues once we know it
8448
9174
  if (current) {
8449
- const char *end_to_set = current->base.location.end;
9175
+ const uint8_t *end_to_set = current->base.location.end;
8450
9176
  current = parent_node->rescue_clause;
8451
9177
  while (current) {
8452
9178
  current->base.location.end = end_to_set;
@@ -8460,7 +9186,9 @@ parse_rescues(yp_parser_t *parser, yp_begin_node_t *parent_node) {
8460
9186
 
8461
9187
  yp_statements_node_t *else_statements = NULL;
8462
9188
  if (!match_any_type_p(parser, 2, YP_TOKEN_KEYWORD_END, YP_TOKEN_KEYWORD_ENSURE)) {
9189
+ yp_accepts_block_stack_push(parser, true);
8463
9190
  else_statements = parse_statements(parser, YP_CONTEXT_RESCUE_ELSE);
9191
+ yp_accepts_block_stack_pop(parser);
8464
9192
  accept_any(parser, 2, YP_TOKEN_NEWLINE, YP_TOKEN_SEMICOLON);
8465
9193
  }
8466
9194
 
@@ -8474,7 +9202,9 @@ parse_rescues(yp_parser_t *parser, yp_begin_node_t *parent_node) {
8474
9202
 
8475
9203
  yp_statements_node_t *ensure_statements = NULL;
8476
9204
  if (!match_type_p(parser, YP_TOKEN_KEYWORD_END)) {
9205
+ yp_accepts_block_stack_push(parser, true);
8477
9206
  ensure_statements = parse_statements(parser, YP_CONTEXT_ENSURE);
9207
+ yp_accepts_block_stack_pop(parser);
8478
9208
  accept_any(parser, 2, YP_TOKEN_NEWLINE, YP_TOKEN_SEMICOLON);
8479
9209
  }
8480
9210
 
@@ -8499,7 +9229,7 @@ parse_rescues_as_begin(yp_parser_t *parser, yp_statements_node_t *statements) {
8499
9229
  // All nodes within a begin node are optional, so we look
8500
9230
  // for the earliest possible node that we can use to set
8501
9231
  // the BeginNode's start location
8502
- const char * start = begin_node->base.location.start;
9232
+ const uint8_t *start = begin_node->base.location.start;
8503
9233
  if (begin_node->statements) {
8504
9234
  start = begin_node->statements->base.location.start;
8505
9235
  } else if (begin_node->rescue_clause) {
@@ -8584,7 +9314,9 @@ parse_block(yp_parser_t *parser) {
8584
9314
  } else {
8585
9315
  if (!match_type_p(parser, YP_TOKEN_KEYWORD_END)) {
8586
9316
  if (!match_any_type_p(parser, 3, YP_TOKEN_KEYWORD_RESCUE, YP_TOKEN_KEYWORD_ELSE, YP_TOKEN_KEYWORD_ENSURE)) {
9317
+ yp_accepts_block_stack_push(parser, true);
8587
9318
  statements = (yp_node_t *) parse_statements(parser, YP_CONTEXT_BLOCK_KEYWORDS);
9319
+ yp_accepts_block_stack_pop(parser);
8588
9320
  }
8589
9321
 
8590
9322
  if (match_any_type_p(parser, 2, YP_TOKEN_KEYWORD_RESCUE, YP_TOKEN_KEYWORD_ENSURE)) {
@@ -8961,14 +9693,10 @@ parse_string_part(yp_parser_t *parser) {
8961
9693
 
8962
9694
  static yp_node_t *
8963
9695
  parse_symbol(yp_parser_t *parser, yp_lex_mode_t *lex_mode, yp_lex_state_t next_state) {
8964
- bool lex_string = lex_mode->mode == YP_LEX_STRING;
8965
- bool can_be_interpolated = lex_string && lex_mode->as.string.interpolation;
8966
9696
  yp_token_t opening = parser->previous;
8967
9697
 
8968
- if (!lex_string) {
8969
- if (next_state != YP_LEX_STATE_NONE) {
8970
- lex_state_set(parser, next_state);
8971
- }
9698
+ if (lex_mode->mode != YP_LEX_STRING) {
9699
+ if (next_state != YP_LEX_STATE_NONE) lex_state_set(parser, next_state);
8972
9700
  yp_token_t symbol;
8973
9701
 
8974
9702
  switch (parser->current.type) {
@@ -8998,37 +9726,44 @@ parse_symbol(yp_parser_t *parser, yp_lex_mode_t *lex_mode, yp_lex_state_t next_s
8998
9726
  return (yp_node_t *) yp_symbol_node_create_and_unescape(parser, &opening, &symbol, &closing, YP_UNESCAPE_ALL);
8999
9727
  }
9000
9728
 
9001
- if (can_be_interpolated) {
9002
- // Create a node_list first. We'll use this to check if it should be an InterpolatedSymbolNode
9003
- // or a SymbolNode
9729
+ if (lex_mode->as.string.interpolation) {
9730
+ // If we have the end of the symbol, then we can return an empty symbol.
9731
+ if (match_type_p(parser, YP_TOKEN_STRING_END)) {
9732
+ if (next_state != YP_LEX_STATE_NONE) lex_state_set(parser, next_state);
9733
+ parser_lex(parser);
9734
+
9735
+ yp_token_t content = not_provided(parser);
9736
+ yp_token_t closing = parser->previous;
9737
+ return (yp_node_t *) yp_symbol_node_create_and_unescape(parser, &opening, &content, &closing, YP_UNESCAPE_NONE);
9738
+ }
9739
+
9740
+ // Now we can parse the first part of the symbol.
9741
+ yp_node_t *part = parse_string_part(parser);
9742
+
9743
+ // If we got a string part, then it's possible that we could transform
9744
+ // what looks like an interpolated symbol into a regular symbol.
9745
+ if (part && YP_NODE_TYPE_P(part, YP_NODE_STRING_NODE) && match_any_type_p(parser, 2, YP_TOKEN_STRING_END, YP_TOKEN_EOF)) {
9746
+ if (next_state != YP_LEX_STATE_NONE) lex_state_set(parser, next_state);
9747
+ parser_lex(parser);
9748
+
9749
+ return (yp_node_t *) yp_string_node_to_symbol_node(parser, (yp_string_node_t *) part, &opening, &parser->previous);
9750
+ }
9751
+
9752
+ // Create a node_list first. We'll use this to check if it should be an
9753
+ // InterpolatedSymbolNode or a SymbolNode.
9004
9754
  yp_node_list_t node_list = YP_EMPTY_NODE_LIST;
9755
+ if (part) yp_node_list_append(&node_list, part);
9005
9756
 
9006
9757
  while (!match_any_type_p(parser, 2, YP_TOKEN_STRING_END, YP_TOKEN_EOF)) {
9007
- yp_node_t *part = parse_string_part(parser);
9008
- if (part != NULL) {
9758
+ if ((part = parse_string_part(parser)) != NULL) {
9009
9759
  yp_node_list_append(&node_list, part);
9010
9760
  }
9011
9761
  }
9012
9762
 
9013
- yp_node_t *res;
9014
- // If the only element on the node_list is a StringNode, we know this is a SymbolNode
9015
- // and not an InterpolatedSymbolNode
9016
- if (node_list.size == 1 && YP_NODE_TYPE_P(node_list.nodes[0], YP_NODE_STRING_NODE)) {
9017
- res = (yp_node_t *)yp_string_node_to_symbol_node(parser, (yp_string_node_t *)node_list.nodes[0]);
9018
- free(node_list.nodes);
9019
- }
9020
- else {
9021
- yp_interpolated_symbol_node_t *interpolated = yp_interpolated_symbol_node_create(parser, &opening, &node_list, &opening);
9022
- yp_interpolated_symbol_node_closing_set(interpolated, &parser->current);
9023
- res = (yp_node_t *) interpolated;
9024
- }
9025
-
9026
- if (next_state != YP_LEX_STATE_NONE) {
9027
- lex_state_set(parser, next_state);
9028
- }
9763
+ if (next_state != YP_LEX_STATE_NONE) lex_state_set(parser, next_state);
9029
9764
  expect(parser, YP_TOKEN_STRING_END, "Expected a closing delimiter for an interpolated symbol.");
9030
9765
 
9031
- return res;
9766
+ return (yp_node_t *) yp_interpolated_symbol_node_create(parser, &opening, &node_list, &parser->previous);
9032
9767
  }
9033
9768
 
9034
9769
  yp_token_t content;
@@ -9162,19 +9897,22 @@ parse_heredoc_common_whitespace(yp_parser_t *parser, yp_node_list_t *nodes) {
9162
9897
  yp_node_t *node = nodes->nodes[index];
9163
9898
 
9164
9899
  if (!YP_NODE_TYPE_P(node, YP_NODE_STRING_NODE)) continue;
9165
- yp_location_t *content_loc = &((yp_string_node_t *) node)->content_loc;
9900
+ const yp_location_t *content_loc = &((yp_string_node_t *) node)->content_loc;
9166
9901
 
9167
9902
  // If the previous node wasn't a string node, we don't want to trim
9168
9903
  // whitespace. This could happen after an interpolated expression or
9169
9904
  // variable.
9170
9905
  if (index == 0 || YP_NODE_TYPE_P(nodes->nodes[index - 1], YP_NODE_STRING_NODE)) {
9171
9906
  int cur_whitespace;
9172
- const char *cur_char = content_loc->start;
9907
+ const uint8_t *cur_char = content_loc->start;
9173
9908
 
9174
9909
  while (cur_char && cur_char < content_loc->end) {
9175
- // Any empty newlines aren't included in the minimum whitespace calculation
9176
- while (cur_char < content_loc->end && *cur_char == '\n') cur_char++;
9177
- while (cur_char + 1 < content_loc->end && *cur_char == '\r' && cur_char[1] == '\n') cur_char += 2;
9910
+ // Any empty newlines aren't included in the minimum whitespace
9911
+ // calculation.
9912
+ size_t eol_length;
9913
+ while ((eol_length = match_eol_at(parser, cur_char))) {
9914
+ cur_char += eol_length;
9915
+ }
9178
9916
 
9179
9917
  if (cur_char == content_loc->end) break;
9180
9918
 
@@ -9189,11 +9927,12 @@ parse_heredoc_common_whitespace(yp_parser_t *parser, yp_node_list_t *nodes) {
9189
9927
  cur_char++;
9190
9928
  }
9191
9929
 
9192
- // If we hit a newline, then we have encountered a line that contains
9193
- // only whitespace, and it shouldn't be considered in the calculation of
9194
- // common leading whitespace.
9195
- if (*cur_char == '\n') {
9196
- cur_char++;
9930
+ // If we hit a newline, then we have encountered a line that
9931
+ // contains only whitespace, and it shouldn't be considered in
9932
+ // the calculation of common leading whitespace.
9933
+ eol_length = match_eol_at(parser, cur_char);
9934
+ if (eol_length) {
9935
+ cur_char += eol_length;
9197
9936
  continue;
9198
9937
  }
9199
9938
 
@@ -9256,15 +9995,15 @@ parse_heredoc_dedent(yp_parser_t *parser, yp_node_t *node, yp_heredoc_quote_t qu
9256
9995
  // destination to move bytes into. We'll also use it for bounds checking
9257
9996
  // since we don't require that these strings be null terminated.
9258
9997
  size_t dest_length = yp_string_length(string);
9259
- char *source_start = string->source;
9998
+ uint8_t *source_start = (uint8_t *) string->source;
9260
9999
 
9261
- const char *source_cursor = source_start;
9262
- const char *source_end = source_cursor + dest_length;
10000
+ const uint8_t *source_cursor = source_start;
10001
+ const uint8_t *source_end = source_cursor + dest_length;
9263
10002
 
9264
10003
  // We're going to move bytes backward in the string when we get leading
9265
10004
  // whitespace, so we'll maintain a pointer to the current position in the
9266
10005
  // string that we're writing to.
9267
- char *dest_cursor = source_start;
10006
+ uint8_t *dest_cursor = source_start;
9268
10007
 
9269
10008
  while (source_cursor < source_end) {
9270
10009
  // If we need to dedent the next element within the heredoc or the next
@@ -9291,7 +10030,7 @@ parse_heredoc_dedent(yp_parser_t *parser, yp_node_t *node, yp_heredoc_quote_t qu
9291
10030
 
9292
10031
  // At this point we have dedented all that we need to, so we need to find
9293
10032
  // the next newline.
9294
- const char *breakpoint = next_newline(source_cursor, source_end - source_cursor);
10033
+ const uint8_t *breakpoint = next_newline(source_cursor, source_end - source_cursor);
9295
10034
 
9296
10035
  if (breakpoint == NULL) {
9297
10036
  // If there isn't another newline, then we can just move the rest of the
@@ -9314,7 +10053,7 @@ parse_heredoc_dedent(yp_parser_t *parser, yp_node_t *node, yp_heredoc_quote_t qu
9314
10053
  yp_node_destroy(parser, node);
9315
10054
  } else {
9316
10055
  string->length = dest_length;
9317
- yp_unescape_manipulate_string(parser, string, (quote == YP_HEREDOC_QUOTE_SINGLE) ? YP_UNESCAPE_MINIMAL : YP_UNESCAPE_ALL, &parser->error_list);
10056
+ yp_unescape_manipulate_string(parser, string, (quote == YP_HEREDOC_QUOTE_SINGLE) ? YP_UNESCAPE_MINIMAL : YP_UNESCAPE_ALL);
9318
10057
  nodes->nodes[write_index++] = node;
9319
10058
  }
9320
10059
 
@@ -9503,7 +10242,7 @@ parse_pattern_hash(yp_parser_t *parser, yp_node_t *first_assoc) {
9503
10242
  yp_node_t *key = ((yp_assoc_node_t *) first_assoc)->key;
9504
10243
 
9505
10244
  if (YP_NODE_TYPE_P(key, YP_NODE_SYMBOL_NODE)) {
9506
- yp_location_t *value_loc = &((yp_symbol_node_t *) key)->value_loc;
10245
+ const yp_location_t *value_loc = &((yp_symbol_node_t *) key)->value_loc;
9507
10246
  yp_parser_local_add_location(parser, value_loc->start, value_loc->end);
9508
10247
  }
9509
10248
  }
@@ -9531,7 +10270,7 @@ parse_pattern_hash(yp_parser_t *parser, yp_node_t *first_assoc) {
9531
10270
  if (!match_any_type_p(parser, 7, YP_TOKEN_COMMA, YP_TOKEN_KEYWORD_THEN, YP_TOKEN_BRACE_RIGHT, YP_TOKEN_BRACKET_RIGHT, YP_TOKEN_PARENTHESIS_RIGHT, YP_TOKEN_NEWLINE, YP_TOKEN_SEMICOLON)) {
9532
10271
  value = parse_pattern(parser, false, "Expected a pattern expression after the key.");
9533
10272
  } else {
9534
- yp_location_t *value_loc = &((yp_symbol_node_t *) key)->value_loc;
10273
+ const yp_location_t *value_loc = &((yp_symbol_node_t *) key)->value_loc;
9535
10274
  yp_parser_local_add_location(parser, value_loc->start, value_loc->end);
9536
10275
  }
9537
10276
 
@@ -10071,10 +10810,8 @@ parse_expression_prefix(yp_parser_t *parser, yp_binding_power_t binding_power) {
10071
10810
  }
10072
10811
  case YP_TOKEN_PARENTHESIS_LEFT:
10073
10812
  case YP_TOKEN_PARENTHESIS_LEFT_PARENTHESES: {
10074
- yp_token_type_t current_token_type = parser->current.type;
10813
+ yp_token_t opening = parser->current;
10075
10814
  parser_lex(parser);
10076
-
10077
- yp_token_t opening = parser->previous;
10078
10815
  while (accept_any(parser, 2, YP_TOKEN_SEMICOLON, YP_TOKEN_NEWLINE));
10079
10816
 
10080
10817
  // If this is the end of the file or we match a right parenthesis, then
@@ -10093,7 +10830,7 @@ parse_expression_prefix(yp_parser_t *parser, yp_binding_power_t binding_power) {
10093
10830
  // If we hit a right parenthesis, then we're done parsing the parentheses
10094
10831
  // node, and we can check which kind of node we should return.
10095
10832
  if (match_type_p(parser, YP_TOKEN_PARENTHESIS_RIGHT)) {
10096
- if (current_token_type == YP_TOKEN_PARENTHESIS_LEFT_PARENTHESES) {
10833
+ if (opening.type == YP_TOKEN_PARENTHESIS_LEFT_PARENTHESES) {
10097
10834
  lex_state_set(parser, YP_LEX_STATE_ENDARG);
10098
10835
  }
10099
10836
  parser_lex(parser);
@@ -10111,6 +10848,8 @@ parse_expression_prefix(yp_parser_t *parser, yp_binding_power_t binding_power) {
10111
10848
 
10112
10849
  if (multi_statement->lparen_loc.start == NULL) {
10113
10850
  multi_write = (yp_multi_write_node_t *) statement;
10851
+ multi_write->base.location.start = lparen_loc.start;
10852
+ multi_write->base.location.end = rparen_loc.end;
10114
10853
  multi_write->lparen_loc = lparen_loc;
10115
10854
  multi_write->rparen_loc = rparen_loc;
10116
10855
  } else {
@@ -10193,7 +10932,7 @@ parse_expression_prefix(yp_parser_t *parser, yp_binding_power_t binding_power) {
10193
10932
 
10194
10933
  yp_token_t closing = not_provided(parser);
10195
10934
 
10196
- return (yp_node_t *) yp_string_node_create_and_unescape(parser, &opening, &content, &closing, YP_UNESCAPE_ALL);
10935
+ return (yp_node_t *) yp_char_literal_node_create_and_unescape(parser, &opening, &content, &closing, YP_UNESCAPE_ALL);
10197
10936
  }
10198
10937
  case YP_TOKEN_CLASS_VARIABLE: {
10199
10938
  parser_lex(parser);
@@ -10213,7 +10952,7 @@ parse_expression_prefix(yp_parser_t *parser, yp_binding_power_t binding_power) {
10213
10952
  // fact a method call, not a constant read.
10214
10953
  if (
10215
10954
  match_type_p(parser, YP_TOKEN_PARENTHESIS_LEFT) ||
10216
- (binding_power <= YP_BINDING_POWER_ASSIGNMENT && (token_begins_expression_p(parser->current.type) || match_any_type_p(parser, 2, YP_TOKEN_USTAR, YP_TOKEN_USTAR_STAR))) ||
10955
+ (binding_power <= YP_BINDING_POWER_ASSIGNMENT && (token_begins_expression_p(parser->current.type) || match_any_type_p(parser, 3, YP_TOKEN_UAMPERSAND, YP_TOKEN_USTAR, YP_TOKEN_USTAR_STAR))) ||
10217
10956
  (yp_accepts_block_stack_p(parser) && match_any_type_p(parser, 2, YP_TOKEN_KEYWORD_DO, YP_TOKEN_BRACE_LEFT))
10218
10957
  ) {
10219
10958
  yp_arguments_t arguments = YP_EMPTY_ARGUMENTS;
@@ -10336,7 +11075,7 @@ parse_expression_prefix(yp_parser_t *parser, yp_binding_power_t binding_power) {
10336
11075
  // can still be a method call if it is followed by arguments or
10337
11076
  // a block, so we need to check for that here.
10338
11077
  if (
10339
- (binding_power <= YP_BINDING_POWER_ASSIGNMENT && (token_begins_expression_p(parser->current.type) || match_any_type_p(parser, 2, YP_TOKEN_USTAR, YP_TOKEN_USTAR_STAR))) ||
11078
+ (binding_power <= YP_BINDING_POWER_ASSIGNMENT && (token_begins_expression_p(parser->current.type) || match_any_type_p(parser, 3, YP_TOKEN_UAMPERSAND, YP_TOKEN_USTAR, YP_TOKEN_USTAR_STAR))) ||
10340
11079
  (yp_accepts_block_stack_p(parser) && match_any_type_p(parser, 2, YP_TOKEN_KEYWORD_DO, YP_TOKEN_BRACE_LEFT))
10341
11080
  ) {
10342
11081
  yp_arguments_t arguments = YP_EMPTY_ARGUMENTS;
@@ -10738,7 +11477,9 @@ parse_expression_prefix(yp_parser_t *parser, yp_binding_power_t binding_power) {
10738
11477
 
10739
11478
  yp_node_t *statements = NULL;
10740
11479
  if (!match_any_type_p(parser, 3, YP_TOKEN_KEYWORD_RESCUE, YP_TOKEN_KEYWORD_ENSURE, YP_TOKEN_KEYWORD_END)) {
11480
+ yp_accepts_block_stack_push(parser, true);
10741
11481
  statements = (yp_node_t *) parse_statements(parser, YP_CONTEXT_SCLASS);
11482
+ yp_accepts_block_stack_pop(parser);
10742
11483
  }
10743
11484
 
10744
11485
  if (match_any_type_p(parser, 2, YP_TOKEN_KEYWORD_RESCUE, YP_TOKEN_KEYWORD_ENSURE)) {
@@ -10754,7 +11495,12 @@ parse_expression_prefix(yp_parser_t *parser, yp_binding_power_t binding_power) {
10754
11495
  return (yp_node_t *) yp_singleton_class_node_create(parser, &locals, &class_keyword, &operator, expression, statements, &parser->previous);
10755
11496
  }
10756
11497
 
10757
- yp_node_t *name = parse_expression(parser, YP_BINDING_POWER_INDEX, "Expected to find a class name after `class`.");
11498
+ yp_node_t *constant_path = parse_expression(parser, YP_BINDING_POWER_INDEX, "Expected to find a class name after `class`.");
11499
+ yp_token_t name = parser->previous;
11500
+ if (name.type != YP_TOKEN_CONSTANT) {
11501
+ yp_diagnostic_list_append(&parser->error_list, name.start, name.end, "Expected a constant name after `class`.");
11502
+ }
11503
+
10758
11504
  yp_token_t inheritance_operator;
10759
11505
  yp_node_t *superclass;
10760
11506
 
@@ -10795,7 +11541,7 @@ parse_expression_prefix(yp_parser_t *parser, yp_binding_power_t binding_power) {
10795
11541
  yp_constant_id_list_t locals = parser->current_scope->locals;
10796
11542
  yp_parser_scope_pop(parser);
10797
11543
  yp_do_loop_stack_pop(parser);
10798
- return (yp_node_t *) yp_class_node_create(parser, &locals, &class_keyword, name, &inheritance_operator, superclass, statements, &parser->previous);
11544
+ return (yp_node_t *) yp_class_node_create(parser, &locals, &class_keyword, constant_path, &name, &inheritance_operator, superclass, statements, &parser->previous);
10799
11545
  }
10800
11546
  case YP_TOKEN_KEYWORD_DEF: {
10801
11547
  yp_token_t def_keyword = parser->current;
@@ -10954,6 +11700,12 @@ parse_expression_prefix(yp_parser_t *parser, yp_binding_power_t binding_power) {
10954
11700
  break;
10955
11701
  }
10956
11702
  case YP_CASE_PARAMETER: {
11703
+ // If we're about to lex a label, we need to add the label
11704
+ // state to make sure the next newline is ignored.
11705
+ if (parser->current.type == YP_TOKEN_LABEL) {
11706
+ lex_state_set(parser, parser->lex_state | YP_LEX_STATE_LABEL);
11707
+ }
11708
+
10957
11709
  lparen = not_provided(parser);
10958
11710
  rparen = not_provided(parser);
10959
11711
  params = parse_parameters(parser, YP_BINDING_POWER_DEFINED, false, false, true);
@@ -11008,7 +11760,9 @@ parse_expression_prefix(yp_parser_t *parser, yp_binding_power_t binding_power) {
11008
11760
  yp_do_loop_stack_push(parser, false);
11009
11761
 
11010
11762
  if (!match_any_type_p(parser, 3, YP_TOKEN_KEYWORD_RESCUE, YP_TOKEN_KEYWORD_ENSURE, YP_TOKEN_KEYWORD_END)) {
11763
+ yp_accepts_block_stack_push(parser, true);
11011
11764
  statements = (yp_node_t *) parse_statements(parser, YP_CONTEXT_DEF);
11765
+ yp_accepts_block_stack_pop(parser);
11012
11766
  }
11013
11767
 
11014
11768
  if (match_any_type_p(parser, 2, YP_TOKEN_KEYWORD_RESCUE, YP_TOKEN_KEYWORD_ENSURE)) {
@@ -11183,13 +11937,14 @@ parse_expression_prefix(yp_parser_t *parser, yp_binding_power_t binding_power) {
11183
11937
  parser_lex(parser);
11184
11938
 
11185
11939
  yp_token_t module_keyword = parser->previous;
11186
- yp_node_t *name = parse_expression(parser, YP_BINDING_POWER_INDEX, "Expected to find a module name after `module`.");
11940
+ yp_node_t *constant_path = parse_expression(parser, YP_BINDING_POWER_INDEX, "Expected to find a module name after `module`.");
11941
+ yp_token_t name;
11187
11942
 
11188
- // If we can recover from a syntax error that occurred while parsing the
11189
- // name of the module, then we'll handle that here.
11190
- if (YP_NODE_TYPE_P(name, YP_NODE_MISSING_NODE)) {
11191
- yp_token_t end_keyword = (yp_token_t) { .type = YP_TOKEN_MISSING, .start = parser->previous.end, .end = parser->previous.end };
11192
- return (yp_node_t *) yp_module_node_create(parser, NULL, &module_keyword, name, NULL, &end_keyword);
11943
+ // If we can recover from a syntax error that occurred while parsing
11944
+ // the name of the module, then we'll handle that here.
11945
+ if (YP_NODE_TYPE_P(constant_path, YP_NODE_MISSING_NODE)) {
11946
+ yp_token_t missing = (yp_token_t) { .type = YP_TOKEN_MISSING, .start = parser->previous.end, .end = parser->previous.end };
11947
+ return (yp_node_t *) yp_module_node_create(parser, NULL, &module_keyword, constant_path, &missing, NULL, &missing);
11193
11948
  }
11194
11949
 
11195
11950
  while (accept(parser, YP_TOKEN_COLON_COLON)) {
@@ -11198,7 +11953,15 @@ parse_expression_prefix(yp_parser_t *parser, yp_binding_power_t binding_power) {
11198
11953
  expect(parser, YP_TOKEN_CONSTANT, "Expected to find a module name after `::`.");
11199
11954
  yp_node_t *constant = (yp_node_t *) yp_constant_read_node_create(parser, &parser->previous);
11200
11955
 
11201
- name = (yp_node_t *)yp_constant_path_node_create(parser, name, &double_colon, constant);
11956
+ constant_path = (yp_node_t *) yp_constant_path_node_create(parser, constant_path, &double_colon, constant);
11957
+ }
11958
+
11959
+ // Here we retrieve the name of the module. If it wasn't a constant,
11960
+ // then it's possible that `module foo` was passed, which is a
11961
+ // syntax error. We handle that here as well.
11962
+ name = parser->previous;
11963
+ if (name.type != YP_TOKEN_CONSTANT) {
11964
+ yp_diagnostic_list_append(&parser->error_list, name.start, name.end, "Expected to find a module name after `module`.");
11202
11965
  }
11203
11966
 
11204
11967
  yp_parser_scope_push(parser, true);
@@ -11225,7 +11988,7 @@ parse_expression_prefix(yp_parser_t *parser, yp_binding_power_t binding_power) {
11225
11988
  yp_diagnostic_list_append(&parser->error_list, module_keyword.start, module_keyword.end, "Module definition in method body");
11226
11989
  }
11227
11990
 
11228
- return (yp_node_t *) yp_module_node_create(parser, &locals, &module_keyword, name, statements, &parser->previous);
11991
+ return (yp_node_t *) yp_module_node_create(parser, &locals, &module_keyword, constant_path, &name, statements, &parser->previous);
11229
11992
  }
11230
11993
  case YP_TOKEN_KEYWORD_NIL:
11231
11994
  parser_lex(parser);
@@ -11261,12 +12024,7 @@ parse_expression_prefix(yp_parser_t *parser, yp_binding_power_t binding_power) {
11261
12024
  expect(parser, YP_TOKEN_KEYWORD_END, "Expected `end` to close `until` statement.");
11262
12025
  }
11263
12026
 
11264
- yp_until_node_t *until_node = yp_until_node_create(parser, &keyword, predicate, statements, 0);
11265
- if (parser->previous.type == YP_TOKEN_KEYWORD_END) {
11266
- until_node->base.location.end = parser->previous.end;
11267
- }
11268
-
11269
- return (yp_node_t *) until_node;
12027
+ return (yp_node_t *) yp_until_node_create(parser, &keyword, &parser->previous, predicate, statements, 0);
11270
12028
  }
11271
12029
  case YP_TOKEN_KEYWORD_WHILE: {
11272
12030
  yp_do_loop_stack_push(parser, true);
@@ -11287,25 +12045,16 @@ parse_expression_prefix(yp_parser_t *parser, yp_binding_power_t binding_power) {
11287
12045
  expect(parser, YP_TOKEN_KEYWORD_END, "Expected `end` to close `while` statement.");
11288
12046
  }
11289
12047
 
11290
- yp_while_node_t *while_node = yp_while_node_create(parser, &keyword, predicate, statements, 0);
11291
- if (parser->previous.type == YP_TOKEN_KEYWORD_END) {
11292
- while_node->base.location.end = parser->previous.end;
11293
- }
11294
- return (yp_node_t *) while_node;
12048
+ return (yp_node_t *) yp_while_node_create(parser, &keyword, &parser->previous, predicate, statements, 0);
11295
12049
  }
11296
12050
  case YP_TOKEN_PERCENT_LOWER_I: {
11297
12051
  parser_lex(parser);
11298
12052
  yp_array_node_t *array = yp_array_node_create(parser, &parser->previous);
11299
12053
 
11300
12054
  while (!match_any_type_p(parser, 2, YP_TOKEN_STRING_END, YP_TOKEN_EOF)) {
11301
- if (yp_array_node_size(array) == 0) {
11302
- accept(parser, YP_TOKEN_WORDS_SEP);
11303
- } else {
11304
- expect(parser, YP_TOKEN_WORDS_SEP, "Expected a separator for the symbols in a `%i` list.");
11305
- if (match_type_p(parser, YP_TOKEN_STRING_END)) break;
11306
- }
11307
-
12055
+ accept(parser, YP_TOKEN_WORDS_SEP);
11308
12056
  if (match_type_p(parser, YP_TOKEN_STRING_END)) break;
12057
+
11309
12058
  expect(parser, YP_TOKEN_STRING_CONTENT, "Expected a symbol in a `%i` list.");
11310
12059
 
11311
12060
  yp_token_t opening = not_provided(parser);
@@ -11360,6 +12109,19 @@ parse_expression_prefix(yp_parser_t *parser, yp_binding_power_t binding_power) {
11360
12109
  // to the list of child nodes.
11361
12110
  yp_node_t *part = parse_string_part(parser);
11362
12111
  yp_interpolated_symbol_node_append((yp_interpolated_symbol_node_t *) current, part);
12112
+ } else if (YP_NODE_TYPE_P(current, YP_NODE_SYMBOL_NODE)) {
12113
+ // If we hit string content and the current node is a string node,
12114
+ // then we need to convert the current node into an interpolated
12115
+ // string and add the string content to the list of child nodes.
12116
+ yp_token_t opening = not_provided(parser);
12117
+ yp_token_t closing = not_provided(parser);
12118
+ yp_interpolated_symbol_node_t *interpolated =
12119
+ yp_interpolated_symbol_node_create(parser, &opening, NULL, &closing);
12120
+ yp_interpolated_symbol_node_append(interpolated, current);
12121
+
12122
+ yp_node_t *part = parse_string_part(parser);
12123
+ yp_interpolated_symbol_node_append(interpolated, part);
12124
+ current = (yp_node_t *) interpolated;
11363
12125
  } else {
11364
12126
  assert(false && "unreachable");
11365
12127
  }
@@ -11462,12 +12224,9 @@ parse_expression_prefix(yp_parser_t *parser, yp_binding_power_t binding_power) {
11462
12224
  accept(parser, YP_TOKEN_WORDS_SEP);
11463
12225
 
11464
12226
  while (!match_any_type_p(parser, 2, YP_TOKEN_STRING_END, YP_TOKEN_EOF)) {
11465
- if (yp_array_node_size(array) == 0) {
11466
- accept(parser, YP_TOKEN_WORDS_SEP);
11467
- } else {
11468
- expect(parser, YP_TOKEN_WORDS_SEP, "Expected a separator for the strings in a `%w` list.");
11469
- if (match_type_p(parser, YP_TOKEN_STRING_END)) break;
11470
- }
12227
+ accept(parser, YP_TOKEN_WORDS_SEP);
12228
+ if (match_type_p(parser, YP_TOKEN_STRING_END)) break;
12229
+
11471
12230
  expect(parser, YP_TOKEN_STRING_CONTENT, "Expected a string in a `%w` list.");
11472
12231
 
11473
12232
  yp_token_t opening = not_provided(parser);
@@ -11517,6 +12276,19 @@ parse_expression_prefix(yp_parser_t *parser, yp_binding_power_t binding_power) {
11517
12276
  // to the list of child nodes.
11518
12277
  yp_node_t *part = parse_string_part(parser);
11519
12278
  yp_interpolated_string_node_append((yp_interpolated_string_node_t *) current, part);
12279
+ } else if (YP_NODE_TYPE_P(current, YP_NODE_STRING_NODE)) {
12280
+ // If we hit string content and the current node is a string node,
12281
+ // then we need to convert the current node into an interpolated
12282
+ // string and add the string content to the list of child nodes.
12283
+ yp_token_t opening = not_provided(parser);
12284
+ yp_token_t closing = not_provided(parser);
12285
+ yp_interpolated_string_node_t *interpolated =
12286
+ yp_interpolated_string_node_create(parser, &opening, NULL, &closing);
12287
+ yp_interpolated_string_node_append(interpolated, current);
12288
+
12289
+ yp_node_t *part = parse_string_part(parser);
12290
+ yp_interpolated_string_node_append(interpolated, part);
12291
+ current = (yp_node_t *) interpolated;
11520
12292
  } else {
11521
12293
  assert(false && "unreachable");
11522
12294
  }
@@ -11797,30 +12569,32 @@ parse_expression_prefix(yp_parser_t *parser, yp_binding_power_t binding_power) {
11797
12569
  yp_accepts_block_stack_push(parser, true);
11798
12570
  parser_lex(parser);
11799
12571
 
11800
- yp_token_t opening = parser->previous;
12572
+ yp_token_t operator = parser->previous;
11801
12573
  yp_parser_scope_push(parser, false);
11802
12574
  yp_block_parameters_node_t *params;
11803
12575
 
11804
12576
  switch (parser->current.type) {
11805
12577
  case YP_TOKEN_PARENTHESIS_LEFT: {
11806
- yp_token_t block_parameters_opening = parser->current;
12578
+ yp_token_t opening = parser->current;
11807
12579
  parser_lex(parser);
11808
12580
 
11809
12581
  if (match_type_p(parser, YP_TOKEN_PARENTHESIS_RIGHT)) {
11810
- params = yp_block_parameters_node_create(parser, NULL, &block_parameters_opening);
12582
+ params = yp_block_parameters_node_create(parser, NULL, &opening);
11811
12583
  } else {
11812
- params = parse_block_parameters(parser, false, &block_parameters_opening, true);
12584
+ params = parse_block_parameters(parser, false, &opening, true);
11813
12585
  }
11814
12586
 
11815
12587
  accept(parser, YP_TOKEN_NEWLINE);
11816
12588
  expect(parser, YP_TOKEN_PARENTHESIS_RIGHT, "Expected ')' after left parenthesis.");
11817
- yp_block_parameters_node_closing_set(params, &parser->previous);
11818
12589
 
12590
+ yp_block_parameters_node_closing_set(params, &parser->previous);
11819
12591
  break;
11820
12592
  }
11821
12593
  case YP_CASE_PARAMETER: {
12594
+ yp_accepts_block_stack_push(parser, false);
11822
12595
  yp_token_t opening = not_provided(parser);
11823
12596
  params = parse_block_parameters(parser, false, &opening, true);
12597
+ yp_accepts_block_stack_pop(parser);
11824
12598
  break;
11825
12599
  }
11826
12600
  default: {
@@ -11829,19 +12603,25 @@ parse_expression_prefix(yp_parser_t *parser, yp_binding_power_t binding_power) {
11829
12603
  }
11830
12604
  }
11831
12605
 
12606
+ yp_token_t opening;
11832
12607
  yp_node_t *body = NULL;
11833
12608
  parser->lambda_enclosure_nesting = previous_lambda_enclosure_nesting;
11834
12609
 
11835
12610
  if (accept(parser, YP_TOKEN_LAMBDA_BEGIN)) {
12611
+ opening = parser->previous;
12612
+
11836
12613
  if (!accept(parser, YP_TOKEN_BRACE_RIGHT)) {
11837
12614
  body = (yp_node_t *) parse_statements(parser, YP_CONTEXT_LAMBDA_BRACES);
11838
12615
  expect(parser, YP_TOKEN_BRACE_RIGHT, "Expecting '}' to close lambda block.");
11839
12616
  }
11840
12617
  } else {
11841
12618
  expect(parser, YP_TOKEN_KEYWORD_DO, "Expected a 'do' keyword or a '{' to open lambda block.");
12619
+ opening = parser->previous;
11842
12620
 
11843
12621
  if (!match_any_type_p(parser, 3, YP_TOKEN_KEYWORD_END, YP_TOKEN_KEYWORD_RESCUE, YP_TOKEN_KEYWORD_ENSURE)) {
12622
+ yp_accepts_block_stack_push(parser, true);
11844
12623
  body = (yp_node_t *) parse_statements(parser, YP_CONTEXT_LAMBDA_DO_END);
12624
+ yp_accepts_block_stack_pop(parser);
11845
12625
  }
11846
12626
 
11847
12627
  if (match_any_type_p(parser, 2, YP_TOKEN_KEYWORD_RESCUE, YP_TOKEN_KEYWORD_ENSURE)) {
@@ -11855,7 +12635,7 @@ parse_expression_prefix(yp_parser_t *parser, yp_binding_power_t binding_power) {
11855
12635
  yp_constant_id_list_t locals = parser->current_scope->locals;
11856
12636
  yp_parser_scope_pop(parser);
11857
12637
  yp_accepts_block_stack_pop(parser);
11858
- return (yp_node_t *) yp_lambda_node_create(parser, &locals, &opening, params, body, &parser->previous);
12638
+ return (yp_node_t *) yp_lambda_node_create(parser, &locals, &operator, &opening, &parser->previous, params, body);
11859
12639
  }
11860
12640
  case YP_TOKEN_UPLUS: {
11861
12641
  parser_lex(parser);
@@ -12074,7 +12854,7 @@ parse_expression_infix(yp_parser_t *parser, yp_node_t *node, yp_binding_power_t
12074
12854
  case YP_CASE_WRITABLE: {
12075
12855
  parser_lex(parser);
12076
12856
  yp_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, "Expected a value after =.");
12077
- return parse_target(parser, node, &token, value);
12857
+ return parse_write(parser, node, &token, value);
12078
12858
  }
12079
12859
  case YP_NODE_SPLAT_NODE: {
12080
12860
  yp_splat_node_t *splat_node = (yp_splat_node_t *) node;
@@ -12083,7 +12863,7 @@ parse_expression_infix(yp_parser_t *parser, yp_node_t *node, yp_binding_power_t
12083
12863
  case YP_CASE_WRITABLE:
12084
12864
  parser_lex(parser);
12085
12865
  yp_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, "Expected a value after =.");
12086
- return parse_target(parser, (yp_node_t *) splat_node, &token, value);
12866
+ return parse_write(parser, (yp_node_t *) splat_node, &token, value);
12087
12867
  default:
12088
12868
  break;
12089
12869
  }
@@ -12105,19 +12885,57 @@ parse_expression_infix(yp_parser_t *parser, yp_node_t *node, yp_binding_power_t
12105
12885
  case YP_NODE_NUMBERED_REFERENCE_READ_NODE:
12106
12886
  yp_diagnostic_list_append(&parser->error_list, node->location.start, node->location.end, "Can't set variable");
12107
12887
  /* fallthrough */
12108
- case YP_NODE_CLASS_VARIABLE_READ_NODE:
12109
- case YP_NODE_CONSTANT_PATH_NODE:
12110
- case YP_NODE_CONSTANT_READ_NODE:
12111
- case YP_NODE_GLOBAL_VARIABLE_READ_NODE:
12112
- case YP_NODE_INSTANCE_VARIABLE_READ_NODE:
12113
- case YP_NODE_LOCAL_VARIABLE_READ_NODE: {
12888
+ case YP_NODE_GLOBAL_VARIABLE_READ_NODE: {
12889
+ parser_lex(parser);
12890
+
12891
+ yp_node_t *value = parse_expression(parser, binding_power, "Expected a value after &&=");
12892
+ yp_node_t *result = (yp_node_t *) yp_global_variable_and_write_node_create(parser, node, &token, value);
12893
+
12894
+ yp_node_destroy(parser, node);
12895
+ return result;
12896
+ }
12897
+ case YP_NODE_CLASS_VARIABLE_READ_NODE: {
12898
+ parser_lex(parser);
12899
+
12900
+ yp_node_t *value = parse_expression(parser, binding_power, "Expected a value after &&=");
12901
+ yp_node_t *result = (yp_node_t *) yp_class_variable_and_write_node_create(parser, (yp_class_variable_read_node_t *) node, &token, value);
12902
+
12903
+ yp_node_destroy(parser, node);
12904
+ return result;
12905
+ }
12906
+ case YP_NODE_CONSTANT_PATH_NODE: {
12907
+ parser_lex(parser);
12908
+
12909
+ yp_node_t *value = parse_expression(parser, binding_power, "Expected a value after &&=");
12910
+ return (yp_node_t *) yp_constant_path_and_write_node_create(parser, (yp_constant_path_node_t *) node, &token, value);
12911
+ }
12912
+ case YP_NODE_CONSTANT_READ_NODE: {
12913
+ parser_lex(parser);
12914
+
12915
+ yp_node_t *value = parse_expression(parser, binding_power, "Expected a value after &&=");
12916
+ yp_node_t *result = (yp_node_t *) yp_constant_and_write_node_create(parser, node, &token, value);
12917
+
12918
+ yp_node_destroy(parser, node);
12919
+ return result;
12920
+ }
12921
+ case YP_NODE_INSTANCE_VARIABLE_READ_NODE: {
12114
12922
  parser_lex(parser);
12115
12923
 
12116
- yp_token_t operator = not_provided(parser);
12117
- node = parse_target(parser, node, &operator, NULL);
12924
+ yp_node_t *value = parse_expression(parser, binding_power, "Expected a value after &&=");
12925
+ yp_node_t *result = (yp_node_t *) yp_instance_variable_and_write_node_create(parser, (yp_instance_variable_read_node_t *) node, &token, value);
12926
+
12927
+ yp_node_destroy(parser, node);
12928
+ return result;
12929
+ }
12930
+ case YP_NODE_LOCAL_VARIABLE_READ_NODE: {
12931
+ yp_local_variable_read_node_t *cast = (yp_local_variable_read_node_t *) node;
12932
+ parser_lex(parser);
12118
12933
 
12119
12934
  yp_node_t *value = parse_expression(parser, binding_power, "Expected a value after &&=");
12120
- return (yp_node_t *) yp_and_write_node_create(parser, node, &token, value);
12935
+ yp_node_t *result = (yp_node_t *) yp_local_variable_and_write_node_create(parser, node, &token, value, cast->name, cast->depth);
12936
+
12937
+ yp_node_destroy(parser, node);
12938
+ return result;
12121
12939
  }
12122
12940
  case YP_NODE_CALL_NODE: {
12123
12941
  yp_call_node_t *call_node = (yp_call_node_t *) node;
@@ -12127,25 +12945,22 @@ parse_expression_infix(yp_parser_t *parser, yp_node_t *node, yp_binding_power_t
12127
12945
  // will transform it into a local variable write.
12128
12946
  if (yp_call_node_variable_call_p(call_node)) {
12129
12947
  yp_location_t message_loc = call_node->message_loc;
12130
- yp_parser_local_add_location(parser, message_loc.start, message_loc.end);
12948
+ yp_constant_id_t constant_id = yp_parser_local_add_location(parser, message_loc.start, message_loc.end);
12131
12949
 
12132
12950
  if (token_is_numbered_parameter(message_loc.start, message_loc.end)) {
12133
12951
  yp_diagnostic_list_append(&parser->error_list, message_loc.start, message_loc.end, "reserved for numbered parameter");
12134
12952
  }
12135
12953
 
12136
12954
  parser_lex(parser);
12137
-
12138
- yp_token_t operator = not_provided(parser);
12139
- node = parse_target(parser, node, &operator, NULL);
12140
-
12141
12955
  yp_node_t *value = parse_expression(parser, binding_power, "Expected a value after &&=");
12142
- return (yp_node_t *) yp_and_write_node_create(parser, node, &token, value);
12956
+ yp_node_t *result = (yp_node_t *) yp_local_variable_and_write_node_create(parser, node, &token, value, constant_id, 0);
12957
+
12958
+ yp_node_destroy(parser, node);
12959
+ return result;
12143
12960
  }
12144
12961
 
12145
12962
  parser_lex(parser);
12146
-
12147
- yp_token_t operator = not_provided(parser);
12148
- node = parse_target(parser, node, &operator, NULL);
12963
+ node = parse_target(parser, node);
12149
12964
 
12150
12965
  yp_node_t *value = parse_expression(parser, binding_power, "Expected a value after &&=");
12151
12966
  return (yp_node_t *) yp_call_operator_and_write_node_create(parser, (yp_call_node_t *) node, &token, value);
@@ -12171,19 +12986,57 @@ parse_expression_infix(yp_parser_t *parser, yp_node_t *node, yp_binding_power_t
12171
12986
  case YP_NODE_NUMBERED_REFERENCE_READ_NODE:
12172
12987
  yp_diagnostic_list_append(&parser->error_list, node->location.start, node->location.end, "Can't set variable");
12173
12988
  /* fallthrough */
12174
- case YP_NODE_CLASS_VARIABLE_READ_NODE:
12175
- case YP_NODE_CONSTANT_PATH_NODE:
12176
- case YP_NODE_CONSTANT_READ_NODE:
12177
- case YP_NODE_GLOBAL_VARIABLE_READ_NODE:
12178
- case YP_NODE_INSTANCE_VARIABLE_READ_NODE:
12179
- case YP_NODE_LOCAL_VARIABLE_READ_NODE: {
12989
+ case YP_NODE_GLOBAL_VARIABLE_READ_NODE: {
12990
+ parser_lex(parser);
12991
+
12992
+ yp_node_t *value = parse_expression(parser, binding_power, "Expected a value after ||=");
12993
+ yp_node_t *result = (yp_node_t *) yp_global_variable_or_write_node_create(parser, node, &token, value);
12994
+
12995
+ yp_node_destroy(parser, node);
12996
+ return result;
12997
+ }
12998
+ case YP_NODE_CLASS_VARIABLE_READ_NODE: {
12999
+ parser_lex(parser);
13000
+
13001
+ yp_node_t *value = parse_expression(parser, binding_power, "Expected a value after ||=");
13002
+ yp_node_t *result = (yp_node_t *) yp_class_variable_or_write_node_create(parser, (yp_class_variable_read_node_t *) node, &token, value);
13003
+
13004
+ yp_node_destroy(parser, node);
13005
+ return result;
13006
+ }
13007
+ case YP_NODE_CONSTANT_PATH_NODE: {
13008
+ parser_lex(parser);
13009
+
13010
+ yp_node_t *value = parse_expression(parser, binding_power, "Expected a value after ||=");
13011
+ return (yp_node_t *) yp_constant_path_or_write_node_create(parser, (yp_constant_path_node_t *) node, &token, value);
13012
+ }
13013
+ case YP_NODE_CONSTANT_READ_NODE: {
13014
+ parser_lex(parser);
13015
+
13016
+ yp_node_t *value = parse_expression(parser, binding_power, "Expected a value after ||=");
13017
+ yp_node_t *result = (yp_node_t *) yp_constant_or_write_node_create(parser, node, &token, value);
13018
+
13019
+ yp_node_destroy(parser, node);
13020
+ return result;
13021
+ }
13022
+ case YP_NODE_INSTANCE_VARIABLE_READ_NODE: {
12180
13023
  parser_lex(parser);
12181
13024
 
12182
- yp_token_t operator = not_provided(parser);
12183
- node = parse_target(parser, node, &operator, NULL);
13025
+ yp_node_t *value = parse_expression(parser, binding_power, "Expected a value after ||=");
13026
+ yp_node_t *result = (yp_node_t *) yp_instance_variable_or_write_node_create(parser, (yp_instance_variable_read_node_t *) node, &token, value);
13027
+
13028
+ yp_node_destroy(parser, node);
13029
+ return result;
13030
+ }
13031
+ case YP_NODE_LOCAL_VARIABLE_READ_NODE: {
13032
+ yp_local_variable_read_node_t *cast = (yp_local_variable_read_node_t *) node;
13033
+ parser_lex(parser);
12184
13034
 
12185
13035
  yp_node_t *value = parse_expression(parser, binding_power, "Expected a value after ||=");
12186
- return (yp_node_t *) yp_or_write_node_create(parser, node, &token, value);
13036
+ yp_node_t *result = (yp_node_t *) yp_local_variable_or_write_node_create(parser, node, &token, value, cast->name, cast->depth);
13037
+
13038
+ yp_node_destroy(parser, node);
13039
+ return result;
12187
13040
  }
12188
13041
  case YP_NODE_CALL_NODE: {
12189
13042
  yp_call_node_t *call_node = (yp_call_node_t *) node;
@@ -12193,25 +13046,22 @@ parse_expression_infix(yp_parser_t *parser, yp_node_t *node, yp_binding_power_t
12193
13046
  // will transform it into a local variable write.
12194
13047
  if (yp_call_node_variable_call_p(call_node)) {
12195
13048
  yp_location_t message_loc = call_node->message_loc;
12196
- yp_parser_local_add_location(parser, message_loc.start, message_loc.end);
13049
+ yp_constant_id_t constant_id = yp_parser_local_add_location(parser, message_loc.start, message_loc.end);
12197
13050
 
12198
13051
  if (token_is_numbered_parameter(message_loc.start, message_loc.end)) {
12199
13052
  yp_diagnostic_list_append(&parser->error_list, message_loc.start, message_loc.end, "reserved for numbered parameter");
12200
13053
  }
12201
13054
 
12202
13055
  parser_lex(parser);
12203
-
12204
- yp_token_t operator = not_provided(parser);
12205
- node = parse_target(parser, node, &operator, NULL);
12206
-
12207
13056
  yp_node_t *value = parse_expression(parser, binding_power, "Expected a value after ||=");
12208
- return (yp_node_t *) yp_or_write_node_create(parser, node, &token, value);
13057
+ yp_node_t *result = (yp_node_t *) yp_local_variable_or_write_node_create(parser, node, &token, value, constant_id, 0);
13058
+
13059
+ yp_node_destroy(parser, node);
13060
+ return result;
12209
13061
  }
12210
13062
 
12211
13063
  parser_lex(parser);
12212
-
12213
- yp_token_t operator = not_provided(parser);
12214
- node = parse_target(parser, node, &operator, NULL);
13064
+ node = parse_target(parser, node);
12215
13065
 
12216
13066
  yp_node_t *value = parse_expression(parser, binding_power, "Expected a value after ||=");
12217
13067
  return (yp_node_t *) yp_call_operator_or_write_node_create(parser, (yp_call_node_t *) node, &token, value);
@@ -12247,19 +13097,57 @@ parse_expression_infix(yp_parser_t *parser, yp_node_t *node, yp_binding_power_t
12247
13097
  case YP_NODE_NUMBERED_REFERENCE_READ_NODE:
12248
13098
  yp_diagnostic_list_append(&parser->error_list, node->location.start, node->location.end, "Can't set variable");
12249
13099
  /* fallthrough */
12250
- case YP_NODE_CLASS_VARIABLE_READ_NODE:
12251
- case YP_NODE_CONSTANT_PATH_NODE:
12252
- case YP_NODE_CONSTANT_READ_NODE:
12253
- case YP_NODE_GLOBAL_VARIABLE_READ_NODE:
12254
- case YP_NODE_INSTANCE_VARIABLE_READ_NODE:
13100
+ case YP_NODE_GLOBAL_VARIABLE_READ_NODE: {
13101
+ parser_lex(parser);
13102
+
13103
+ yp_node_t *value = parse_expression(parser, binding_power, "Expected a value after the operator.");
13104
+ yp_node_t *result = (yp_node_t *) yp_global_variable_operator_write_node_create(parser, node, &token, value);
13105
+
13106
+ yp_node_destroy(parser, node);
13107
+ return result;
13108
+ }
13109
+ case YP_NODE_CLASS_VARIABLE_READ_NODE: {
13110
+ parser_lex(parser);
13111
+
13112
+ yp_node_t *value = parse_expression(parser, binding_power, "Expected a value after the operator.");
13113
+ yp_node_t *result = (yp_node_t *) yp_class_variable_operator_write_node_create(parser, (yp_class_variable_read_node_t *) node, &token, value);
13114
+
13115
+ yp_node_destroy(parser, node);
13116
+ return result;
13117
+ }
13118
+ case YP_NODE_CONSTANT_PATH_NODE: {
13119
+ parser_lex(parser);
13120
+
13121
+ yp_node_t *value = parse_expression(parser, binding_power, "Expected a value after the operator.");
13122
+ return (yp_node_t *) yp_constant_path_operator_write_node_create(parser, (yp_constant_path_node_t *) node, &token, value);
13123
+ }
13124
+ case YP_NODE_CONSTANT_READ_NODE: {
13125
+ parser_lex(parser);
13126
+
13127
+ yp_node_t *value = parse_expression(parser, binding_power, "Expected a value after the operator.");
13128
+ yp_node_t *result = (yp_node_t *) yp_constant_operator_write_node_create(parser, node, &token, value);
13129
+
13130
+ yp_node_destroy(parser, node);
13131
+ return result;
13132
+ }
13133
+ case YP_NODE_INSTANCE_VARIABLE_READ_NODE: {
13134
+ parser_lex(parser);
13135
+
13136
+ yp_node_t *value = parse_expression(parser, binding_power, "Expected a value after the operator.");
13137
+ yp_node_t *result = (yp_node_t *) yp_instance_variable_operator_write_node_create(parser, (yp_instance_variable_read_node_t *) node, &token, value);
13138
+
13139
+ yp_node_destroy(parser, node);
13140
+ return result;
13141
+ }
12255
13142
  case YP_NODE_LOCAL_VARIABLE_READ_NODE: {
13143
+ yp_local_variable_read_node_t *cast = (yp_local_variable_read_node_t *) node;
12256
13144
  parser_lex(parser);
12257
13145
 
12258
- yp_token_t operator = not_provided(parser);
12259
- node = parse_target(parser, node, &operator, NULL);
13146
+ yp_node_t *value = parse_expression(parser, binding_power, "Expected a value after the operator.");
13147
+ yp_node_t *result = (yp_node_t *) yp_local_variable_operator_write_node_create(parser, node, &token, value, cast->name, cast->depth);
12260
13148
 
12261
- yp_node_t *value = parse_expression(parser, binding_power, "Expected a value after the operator");
12262
- return (yp_node_t *) yp_operator_write_node_create(parser, node, &token, value);
13149
+ yp_node_destroy(parser, node);
13150
+ return result;
12263
13151
  }
12264
13152
  case YP_NODE_CALL_NODE: {
12265
13153
  yp_call_node_t *call_node = (yp_call_node_t *) node;
@@ -12269,25 +13157,23 @@ parse_expression_infix(yp_parser_t *parser, yp_node_t *node, yp_binding_power_t
12269
13157
  // will transform it into a local variable write.
12270
13158
  if (yp_call_node_variable_call_p(call_node)) {
12271
13159
  yp_location_t message_loc = call_node->message_loc;
12272
- yp_parser_local_add_location(parser, message_loc.start, message_loc.end);
13160
+ yp_constant_id_t constant_id = yp_parser_local_add_location(parser, message_loc.start, message_loc.end);
12273
13161
 
12274
13162
  if (token_is_numbered_parameter(message_loc.start, message_loc.end)) {
12275
13163
  yp_diagnostic_list_append(&parser->error_list, message_loc.start, message_loc.end, "reserved for numbered parameter");
12276
13164
  }
12277
13165
 
12278
13166
  parser_lex(parser);
13167
+ yp_node_t *value = parse_expression(parser, binding_power, "Expected a value after the operator.");
13168
+ yp_node_t *result = (yp_node_t *) yp_local_variable_operator_write_node_create(parser, node, &token, value, constant_id, 0);
12279
13169
 
12280
- yp_token_t operator = not_provided(parser);
12281
- node = parse_target(parser, node, &operator, NULL);
12282
-
12283
- yp_node_t *value = parse_expression(parser, binding_power, "Expected a value after &&=");
12284
- return (yp_node_t *) yp_operator_write_node_create(parser, node, &token, value);
13170
+ yp_node_destroy(parser, node);
13171
+ return result;
12285
13172
  }
12286
13173
 
12287
- yp_token_t operator = not_provided(parser);
12288
- node = parse_target(parser, node, &operator, NULL);
12289
-
13174
+ node = parse_target(parser, node);
12290
13175
  parser_lex(parser);
13176
+
12291
13177
  yp_node_t *value = parse_expression(parser, binding_power, "Expected a value after the operator.");
12292
13178
  return (yp_node_t *) yp_call_operator_write_node_create(parser, (yp_call_node_t *) node, &token, value);
12293
13179
  }
@@ -12336,7 +13222,7 @@ parse_expression_infix(yp_parser_t *parser, yp_node_t *node, yp_binding_power_t
12336
13222
  yp_string_list_t named_captures;
12337
13223
  yp_string_list_init(&named_captures);
12338
13224
 
12339
- yp_location_t *content_loc = &((yp_regular_expression_node_t *) node)->content_loc;
13225
+ const yp_location_t *content_loc = &((yp_regular_expression_node_t *) node)->content_loc;
12340
13226
 
12341
13227
  if (yp_regexp_named_capture_group_names(content_loc->start, (size_t) (content_loc->end - content_loc->start), &named_captures, parser->encoding_changed, &parser->encoding)) {
12342
13228
  for (size_t index = 0; index < named_captures.length; index++) {
@@ -12456,7 +13342,7 @@ parse_expression_infix(yp_parser_t *parser, yp_node_t *node, yp_binding_power_t
12456
13342
  yp_statements_node_body_append(statements, node);
12457
13343
 
12458
13344
  yp_node_t *predicate = parse_expression(parser, binding_power, "Expected a predicate after 'until'");
12459
- return (yp_node_t *) yp_until_node_create(parser, &token, predicate, statements, YP_NODE_TYPE_P(node, YP_NODE_BEGIN_NODE) ? YP_LOOP_FLAGS_BEGIN_MODIFIER : 0);
13345
+ return (yp_node_t *) yp_until_node_modifier_create(parser, &token, predicate, statements, YP_NODE_TYPE_P(node, YP_NODE_BEGIN_NODE) ? YP_LOOP_FLAGS_BEGIN_MODIFIER : 0);
12460
13346
  }
12461
13347
  case YP_TOKEN_KEYWORD_WHILE_MODIFIER: {
12462
13348
  parser_lex(parser);
@@ -12464,7 +13350,7 @@ parse_expression_infix(yp_parser_t *parser, yp_node_t *node, yp_binding_power_t
12464
13350
  yp_statements_node_body_append(statements, node);
12465
13351
 
12466
13352
  yp_node_t *predicate = parse_expression(parser, binding_power, "Expected a predicate after 'while'");
12467
- return (yp_node_t *) yp_while_node_create(parser, &token, predicate, statements, YP_NODE_TYPE_P(node, YP_NODE_BEGIN_NODE) ? YP_LOOP_FLAGS_BEGIN_MODIFIER : 0);
13353
+ return (yp_node_t *) yp_while_node_modifier_create(parser, &token, predicate, statements, YP_NODE_TYPE_P(node, YP_NODE_BEGIN_NODE) ? YP_LOOP_FLAGS_BEGIN_MODIFIER : 0);
12468
13354
  }
12469
13355
  case YP_TOKEN_QUESTION_MARK: {
12470
13356
  parser_lex(parser);
@@ -12502,7 +13388,7 @@ parse_expression_infix(yp_parser_t *parser, yp_node_t *node, yp_binding_power_t
12502
13388
 
12503
13389
  if (
12504
13390
  (parser->current.type == YP_TOKEN_PARENTHESIS_LEFT) ||
12505
- (token_begins_expression_p(parser->current.type) || match_any_type_p(parser, 2, YP_TOKEN_USTAR, YP_TOKEN_USTAR_STAR))
13391
+ (token_begins_expression_p(parser->current.type) || match_any_type_p(parser, 3, YP_TOKEN_UAMPERSAND, YP_TOKEN_USTAR, YP_TOKEN_USTAR_STAR))
12506
13392
  ) {
12507
13393
  // If we have a constant immediately following a '::' operator, then
12508
13394
  // this can either be a constant path or a method call, depending on
@@ -12734,7 +13620,7 @@ yp_metadata_read_u32(const char *ptr) {
12734
13620
  // ]*
12735
13621
  // ]
12736
13622
  // ```
12737
- static void
13623
+ void
12738
13624
  yp_parser_metadata(yp_parser_t *parser, const char *metadata) {
12739
13625
  uint32_t filepath_size = yp_metadata_read_u32(metadata);
12740
13626
  metadata += 4;
@@ -12760,7 +13646,7 @@ yp_parser_metadata(yp_parser_t *parser, const char *metadata) {
12760
13646
  uint32_t local_size = yp_metadata_read_u32(metadata);
12761
13647
  metadata += 4;
12762
13648
 
12763
- yp_parser_local_add_location(parser, metadata, metadata + local_size);
13649
+ yp_parser_local_add_location(parser, (const uint8_t *) metadata, (const uint8_t *) (metadata + local_size));
12764
13650
  metadata += local_size;
12765
13651
  }
12766
13652
  }
@@ -12772,7 +13658,9 @@ yp_parser_metadata(yp_parser_t *parser, const char *metadata) {
12772
13658
 
12773
13659
  // Initialize a parser with the given start and end pointers.
12774
13660
  YP_EXPORTED_FUNCTION void
12775
- yp_parser_init(yp_parser_t *parser, const char *source, size_t size, const char *filepath) {
13661
+ yp_parser_init(yp_parser_t *parser, const uint8_t *source, size_t size, const char *filepath) {
13662
+ assert(source != NULL);
13663
+
12776
13664
  // Set filepath to the file that was passed
12777
13665
  if (!filepath) filepath = "";
12778
13666
  yp_string_t filepath_string;
@@ -12841,15 +13729,16 @@ yp_parser_init(yp_parser_t *parser, const char *source, size_t size, const char
12841
13729
  size_t newline_size = size / 22;
12842
13730
  yp_newline_list_init(&parser->newline_list, source, newline_size < 4 ? 4 : newline_size);
12843
13731
 
12844
- assert(source != NULL);
12845
- if (size >= 3 && (unsigned char) source[0] == 0xef && (unsigned char) source[1] == 0xbb && (unsigned char) source[2] == 0xbf) {
12846
- // If the first three bytes of the source are the UTF-8 BOM, then we'll skip
12847
- // over them.
13732
+ // Skip past the UTF-8 BOM if it exists.
13733
+ if (size >= 3 && source[0] == 0xef && source[1] == 0xbb && source[2] == 0xbf) {
12848
13734
  parser->current.end += 3;
12849
- } else if (size >= 2 && source[0] == '#' && source[1] == '!') {
12850
- // If the first two bytes of the source are a shebang, then we'll indicate
12851
- // that the encoding comment is at the end of the shebang.
12852
- const char *encoding_comment_start = next_newline(source, (ptrdiff_t) size);
13735
+ parser->encoding_comment_start += 3;
13736
+ }
13737
+
13738
+ // If the first two bytes of the source are a shebang, then we'll indicate
13739
+ // that the encoding comment is at the end of the shebang.
13740
+ if (peek(parser) == '#' && peek_offset(parser, 1) == '!') {
13741
+ const uint8_t *encoding_comment_start = next_newline(source, (ptrdiff_t) size);
12853
13742
  if (encoding_comment_start) {
12854
13743
  parser->encoding_comment_start = encoding_comment_start + 1;
12855
13744
  }
@@ -12921,7 +13810,7 @@ yp_serialize(yp_parser_t *parser, yp_node_t *node, yp_buffer_t *buffer) {
12921
13810
  // Parse and serialize the AST represented by the given source to the given
12922
13811
  // buffer.
12923
13812
  YP_EXPORTED_FUNCTION void
12924
- yp_parse_serialize(const char *source, size_t size, yp_buffer_t *buffer, const char *metadata) {
13813
+ yp_parse_serialize(const uint8_t *source, size_t size, yp_buffer_t *buffer, const char *metadata) {
12925
13814
  yp_parser_t parser;
12926
13815
  yp_parser_init(&parser, source, size, NULL);
12927
13816
  if (metadata) yp_parser_metadata(&parser, metadata);