yarp 0.8.0 → 0.10.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (63) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +48 -1
  3. data/Makefile +5 -1
  4. data/README.md +4 -3
  5. data/config.yml +461 -150
  6. data/docs/configuration.md +1 -0
  7. data/docs/encoding.md +5 -5
  8. data/docs/ruby_api.md +2 -0
  9. data/docs/serialization.md +3 -3
  10. data/docs/testing.md +2 -2
  11. data/ext/yarp/api_node.c +810 -199
  12. data/ext/yarp/extension.c +94 -31
  13. data/ext/yarp/extension.h +2 -2
  14. data/include/yarp/ast.h +653 -150
  15. data/include/yarp/defines.h +2 -1
  16. data/include/yarp/diagnostic.h +3 -3
  17. data/include/yarp/enc/yp_encoding.h +10 -10
  18. data/include/yarp/node.h +10 -0
  19. data/include/yarp/parser.h +19 -19
  20. data/include/yarp/regexp.h +1 -1
  21. data/include/yarp/unescape.h +7 -5
  22. data/include/yarp/util/yp_buffer.h +3 -0
  23. data/include/yarp/util/yp_char.h +16 -16
  24. data/include/yarp/util/yp_constant_pool.h +2 -2
  25. data/include/yarp/util/yp_newline_list.h +7 -4
  26. data/include/yarp/util/yp_string.h +4 -4
  27. data/include/yarp/util/yp_string_list.h +0 -3
  28. data/include/yarp/util/yp_strpbrk.h +1 -1
  29. data/include/yarp/version.h +2 -2
  30. data/include/yarp.h +14 -3
  31. data/lib/yarp/desugar_visitor.rb +204 -0
  32. data/lib/yarp/ffi.rb +27 -1
  33. data/lib/yarp/lex_compat.rb +93 -25
  34. data/lib/yarp/mutation_visitor.rb +683 -0
  35. data/lib/yarp/node.rb +3121 -597
  36. data/lib/yarp/serialize.rb +198 -126
  37. data/lib/yarp.rb +53 -7
  38. data/src/diagnostic.c +1 -1
  39. data/src/enc/yp_big5.c +15 -42
  40. data/src/enc/yp_euc_jp.c +16 -43
  41. data/src/enc/yp_gbk.c +19 -46
  42. data/src/enc/yp_shift_jis.c +16 -43
  43. data/src/enc/yp_tables.c +36 -38
  44. data/src/enc/yp_unicode.c +20 -25
  45. data/src/enc/yp_windows_31j.c +16 -43
  46. data/src/node.c +1444 -836
  47. data/src/prettyprint.c +324 -103
  48. data/src/regexp.c +21 -21
  49. data/src/serialize.c +429 -276
  50. data/src/token_type.c +2 -2
  51. data/src/unescape.c +184 -136
  52. data/src/util/yp_buffer.c +7 -2
  53. data/src/util/yp_char.c +34 -34
  54. data/src/util/yp_constant_pool.c +4 -4
  55. data/src/util/yp_memchr.c +1 -1
  56. data/src/util/yp_newline_list.c +14 -3
  57. data/src/util/yp_string.c +22 -20
  58. data/src/util/yp_string_list.c +0 -6
  59. data/src/util/yp_strncasecmp.c +3 -6
  60. data/src/util/yp_strpbrk.c +8 -8
  61. data/src/yarp.c +1504 -615
  62. data/yarp.gemspec +3 -1
  63. metadata +4 -2
data/src/yarp.c CHANGED
@@ -1,5 +1,4 @@
1
1
  #include "yarp.h"
2
- #include "yarp/version.h"
3
2
 
4
3
  // The YARP version and the serialization format.
5
4
  const char *
@@ -162,14 +161,18 @@ debug_token(yp_token_t * token) {
162
161
 
163
162
  #endif
164
163
 
164
+ /* Macros for min/max. */
165
+ #define MIN(a,b) (((a)<(b))?(a):(b))
166
+ #define MAX(a,b) (((a)>(b))?(a):(b))
167
+
165
168
  /******************************************************************************/
166
169
  /* Lex mode manipulations */
167
170
  /******************************************************************************/
168
171
 
169
172
  // Returns the incrementor character that should be used to increment the
170
173
  // nesting count if one is possible.
171
- static inline char
172
- lex_mode_incrementor(const char start) {
174
+ static inline uint8_t
175
+ lex_mode_incrementor(const uint8_t start) {
173
176
  switch (start) {
174
177
  case '(':
175
178
  case '[':
@@ -183,8 +186,8 @@ lex_mode_incrementor(const char start) {
183
186
 
184
187
  // Returns the matching character that should be used to terminate a list
185
188
  // beginning with the given character.
186
- static inline char
187
- lex_mode_terminator(const char start) {
189
+ static inline uint8_t
190
+ lex_mode_terminator(const uint8_t start) {
188
191
  switch (start) {
189
192
  case '(':
190
193
  return ')';
@@ -222,9 +225,9 @@ lex_mode_push(yp_parser_t *parser, yp_lex_mode_t lex_mode) {
222
225
 
223
226
  // Push on a new list lex mode.
224
227
  static inline bool
225
- lex_mode_push_list(yp_parser_t *parser, bool interpolation, char delimiter) {
226
- char incrementor = lex_mode_incrementor(delimiter);
227
- char terminator = lex_mode_terminator(delimiter);
228
+ lex_mode_push_list(yp_parser_t *parser, bool interpolation, uint8_t delimiter) {
229
+ uint8_t incrementor = lex_mode_incrementor(delimiter);
230
+ uint8_t terminator = lex_mode_terminator(delimiter);
228
231
 
229
232
  yp_lex_mode_t lex_mode = {
230
233
  .mode = YP_LEX_LIST,
@@ -238,7 +241,7 @@ lex_mode_push_list(yp_parser_t *parser, bool interpolation, char delimiter) {
238
241
 
239
242
  // These are the places where we need to split up the content of the list.
240
243
  // We'll use strpbrk to find the first of these characters.
241
- char *breakpoints = lex_mode.as.list.breakpoints;
244
+ uint8_t *breakpoints = lex_mode.as.list.breakpoints;
242
245
  memcpy(breakpoints, "\\ \t\f\r\v\n\0\0\0", sizeof(lex_mode.as.list.breakpoints));
243
246
 
244
247
  // Now we'll add the terminator to the list of breakpoints.
@@ -261,7 +264,7 @@ lex_mode_push_list(yp_parser_t *parser, bool interpolation, char delimiter) {
261
264
 
262
265
  // Push on a new regexp lex mode.
263
266
  static inline bool
264
- lex_mode_push_regexp(yp_parser_t *parser, char incrementor, char terminator) {
267
+ lex_mode_push_regexp(yp_parser_t *parser, uint8_t incrementor, uint8_t terminator) {
265
268
  yp_lex_mode_t lex_mode = {
266
269
  .mode = YP_LEX_REGEXP,
267
270
  .as.regexp = {
@@ -274,7 +277,7 @@ lex_mode_push_regexp(yp_parser_t *parser, char incrementor, char terminator) {
274
277
  // These are the places where we need to split up the content of the
275
278
  // regular expression. We'll use strpbrk to find the first of these
276
279
  // characters.
277
- char *breakpoints = lex_mode.as.regexp.breakpoints;
280
+ uint8_t *breakpoints = lex_mode.as.regexp.breakpoints;
278
281
  memcpy(breakpoints, "\n\\#\0\0", sizeof(lex_mode.as.regexp.breakpoints));
279
282
 
280
283
  // First we'll add the terminator.
@@ -290,7 +293,7 @@ lex_mode_push_regexp(yp_parser_t *parser, char incrementor, char terminator) {
290
293
 
291
294
  // Push on a new string lex mode.
292
295
  static inline bool
293
- lex_mode_push_string(yp_parser_t *parser, bool interpolation, bool label_allowed, char incrementor, char terminator) {
296
+ lex_mode_push_string(yp_parser_t *parser, bool interpolation, bool label_allowed, uint8_t incrementor, uint8_t terminator) {
294
297
  yp_lex_mode_t lex_mode = {
295
298
  .mode = YP_LEX_STRING,
296
299
  .as.string = {
@@ -304,7 +307,7 @@ lex_mode_push_string(yp_parser_t *parser, bool interpolation, bool label_allowed
304
307
 
305
308
  // These are the places where we need to split up the content of the
306
309
  // string. We'll use strpbrk to find the first of these characters.
307
- char *breakpoints = lex_mode.as.string.breakpoints;
310
+ uint8_t *breakpoints = lex_mode.as.string.breakpoints;
308
311
  memcpy(breakpoints, "\n\\\0\0\0", sizeof(lex_mode.as.string.breakpoints));
309
312
 
310
313
  // Now add in the terminator.
@@ -362,7 +365,7 @@ lex_state_ignored_p(yp_parser_t *parser) {
362
365
 
363
366
  if (ignored) {
364
367
  return YP_IGNORED_NEWLINE_ALL;
365
- } else if (parser->lex_state == (YP_LEX_STATE_ARG | YP_LEX_STATE_LABELED)) {
368
+ } else if ((parser->lex_state & ~((unsigned int) YP_LEX_STATE_LABEL)) == (YP_LEX_STATE_ARG | YP_LEX_STATE_LABELED)) {
366
369
  return YP_IGNORED_NEWLINE_PATTERN;
367
370
  } else {
368
371
  return YP_IGNORED_NEWLINE_NONE;
@@ -381,6 +384,9 @@ lex_state_arg_p(yp_parser_t *parser) {
381
384
 
382
385
  static inline bool
383
386
  lex_state_spcarg_p(yp_parser_t *parser, bool space_seen) {
387
+ if (parser->current.end >= parser->end) {
388
+ return false;
389
+ }
384
390
  return lex_state_arg_p(parser) && space_seen && !yp_char_is_whitespace(*parser->current.end);
385
391
  }
386
392
 
@@ -421,7 +427,7 @@ debug_lex_state_set(yp_parser_t *parser, yp_lex_state_t state, char const * call
421
427
 
422
428
  // Retrieve the constant pool id for the given location.
423
429
  static inline yp_constant_id_t
424
- yp_parser_constant_id_location(yp_parser_t *parser, const char *start, const char *end) {
430
+ yp_parser_constant_id_location(yp_parser_t *parser, const uint8_t *start, const uint8_t *end) {
425
431
  return yp_constant_pool_insert(&parser->constant_pool, start, (size_t) (end - start));
426
432
  }
427
433
 
@@ -536,17 +542,116 @@ yp_arguments_validate(yp_parser_t *parser, yp_arguments_t *arguments) {
536
542
  }
537
543
  }
538
544
 
545
+ /******************************************************************************/
546
+ /* Scope node functions */
547
+ /******************************************************************************/
548
+
549
+ // Generate a scope node from the given node.
550
+ void
551
+ yp_scope_node_init(yp_node_t *node, yp_scope_node_t *scope) {
552
+ scope->base.type = YP_NODE_SCOPE_NODE;
553
+ scope->base.location.start = node->location.start;
554
+ scope->base.location.end = node->location.end;
555
+
556
+ scope->parameters = NULL;
557
+ scope->body = NULL;
558
+ yp_constant_id_list_init(&scope->locals);
559
+
560
+ switch (YP_NODE_TYPE(node)) {
561
+ case YP_NODE_BLOCK_NODE: {
562
+ yp_block_node_t *cast = (yp_block_node_t *) node;
563
+ if (cast->parameters) scope->parameters = cast->parameters->parameters;
564
+ scope->body = cast->body;
565
+ scope->locals = cast->locals;
566
+ break;
567
+ }
568
+ case YP_NODE_CLASS_NODE: {
569
+ yp_class_node_t *cast = (yp_class_node_t *) node;
570
+ scope->body = cast->body;
571
+ scope->locals = cast->locals;
572
+ break;
573
+ }
574
+ case YP_NODE_DEF_NODE: {
575
+ yp_def_node_t *cast = (yp_def_node_t *) node;
576
+ scope->parameters = cast->parameters;
577
+ scope->body = cast->body;
578
+ scope->locals = cast->locals;
579
+ break;
580
+ }
581
+ case YP_NODE_LAMBDA_NODE: {
582
+ yp_lambda_node_t *cast = (yp_lambda_node_t *) node;
583
+ if (cast->parameters) scope->parameters = cast->parameters->parameters;
584
+ scope->body = cast->body;
585
+ scope->locals = cast->locals;
586
+ break;
587
+ }
588
+ case YP_NODE_MODULE_NODE: {
589
+ yp_module_node_t *cast = (yp_module_node_t *) node;
590
+ scope->body = cast->body;
591
+ scope->locals = cast->locals;
592
+ break;
593
+ }
594
+ case YP_NODE_PROGRAM_NODE: {
595
+ yp_program_node_t *cast = (yp_program_node_t *) node;
596
+ scope->body = (yp_node_t *) cast->statements;
597
+ scope->locals = cast->locals;
598
+ break;
599
+ }
600
+ case YP_NODE_SINGLETON_CLASS_NODE: {
601
+ yp_singleton_class_node_t *cast = (yp_singleton_class_node_t *) node;
602
+ scope->body = cast->body;
603
+ scope->locals = cast->locals;
604
+ break;
605
+ }
606
+ default:
607
+ assert(false && "unreachable");
608
+ break;
609
+ }
610
+ }
611
+
539
612
  /******************************************************************************/
540
613
  /* Node creation functions */
541
614
  /******************************************************************************/
542
615
 
616
+ // Parse the decimal number represented by the range of bytes. returns
617
+ // UINT32_MAX if the number fails to parse. This function assumes that the range
618
+ // of bytes has already been validated to contain only decimal digits.
619
+ static uint32_t
620
+ parse_decimal_number(yp_parser_t *parser, const uint8_t *start, const uint8_t *end) {
621
+ ptrdiff_t diff = end - start;
622
+ assert(diff > 0 && ((unsigned long) diff < SIZE_MAX));
623
+ size_t length = (size_t) diff;
624
+
625
+ char *digits = calloc(length + 1, sizeof(char));
626
+ memcpy(digits, start, length);
627
+ digits[length] = '\0';
628
+
629
+ char *endptr;
630
+ errno = 0;
631
+ unsigned long value = strtoul(digits, &endptr, 10);
632
+
633
+ if ((digits == endptr) || (*endptr != '\0') || (errno == ERANGE)) {
634
+ yp_diagnostic_list_append(&parser->error_list, start, end, "invalid decimal number");
635
+ value = UINT32_MAX;
636
+ }
637
+
638
+ free(digits);
639
+
640
+ if (value > UINT32_MAX) {
641
+ yp_diagnostic_list_append(&parser->error_list, start, end, "invalid decimal number");
642
+ value = UINT32_MAX;
643
+ }
644
+
645
+ return (uint32_t) value;
646
+ }
647
+
543
648
  // Parse out the options for a regular expression.
544
649
  static inline yp_node_flags_t
545
650
  yp_regular_expression_flags_create(const yp_token_t *closing) {
546
651
  yp_node_flags_t flags = 0;
547
652
 
548
653
  if (closing->type == YP_TOKEN_REGEXP_END) {
549
- for (const char *flag = closing->start + 1; flag < closing->end; flag++) {
654
+ for (const uint8_t *flag = closing->start + 1; flag < closing->end; flag++) {
550
655
  switch (*flag) {
551
656
  case 'i': flags |= YP_REGULAR_EXPRESSION_FLAGS_IGNORE_CASE; break;
552
657
  case 'm': flags |= YP_REGULAR_EXPRESSION_FLAGS_MULTI_LINE; break;
@@ -588,7 +693,7 @@ yp_alloc_node(YP_ATTRIBUTE_UNUSED yp_parser_t *parser, size_t size) {
588
693
 
589
694
  // Allocate a new MissingNode node.
590
695
  static yp_missing_node_t *
591
- yp_missing_node_create(yp_parser_t *parser, const char *start, const char *end) {
696
+ yp_missing_node_create(yp_parser_t *parser, const uint8_t *start, const uint8_t *end) {
592
697
  yp_missing_node_t *node = YP_ALLOC_NODE(parser, yp_missing_node_t);
593
698
  *node = (yp_missing_node_t) {{ .type = YP_NODE_MISSING_NODE, .location = { .start = start, .end = end } }};
594
699
  return node;
@@ -658,27 +763,6 @@ yp_and_node_create(yp_parser_t *parser, yp_node_t *left, const yp_token_t *opera
658
763
  return node;
659
764
  }
660
765
 
661
- // Allocate and initialize a new AndWriteNode.
662
- static yp_and_write_node_t *
663
- yp_and_write_node_create(yp_parser_t *parser, yp_node_t *target, const yp_token_t *operator, yp_node_t *value) {
664
- yp_and_write_node_t *node = YP_ALLOC_NODE(parser, yp_and_write_node_t);
665
-
666
- *node = (yp_and_write_node_t) {
667
- {
668
- .type = YP_NODE_AND_WRITE_NODE,
669
- .location = {
670
- .start = target->location.start,
671
- .end = value->location.end
672
- },
673
- },
674
- .target = target,
675
- .operator_loc = YP_LOCATION_TOKEN_VALUE(operator),
676
- .value = value
677
- };
678
-
679
- return node;
680
- }
681
-
682
766
  // Allocate an initialize a new arguments node.
683
767
  static yp_arguments_node_t *
684
768
  yp_arguments_node_create(yp_parser_t *parser) {
@@ -878,7 +962,7 @@ yp_array_pattern_node_requireds_append(yp_array_pattern_node_t *node, yp_node_t
878
962
  static yp_assoc_node_t *
879
963
  yp_assoc_node_create(yp_parser_t *parser, yp_node_t *key, const yp_token_t *operator, yp_node_t *value) {
880
964
  yp_assoc_node_t *node = YP_ALLOC_NODE(parser, yp_assoc_node_t);
881
- const char *end;
965
+ const uint8_t *end;
882
966
 
883
967
  if (value != NULL) {
884
968
  end = value->location.end;
@@ -1062,7 +1146,7 @@ static yp_block_parameters_node_t *
1062
1146
  yp_block_parameters_node_create(yp_parser_t *parser, yp_parameters_node_t *parameters, const yp_token_t *opening) {
1063
1147
  yp_block_parameters_node_t *node = YP_ALLOC_NODE(parser, yp_block_parameters_node_t);
1064
1148
 
1065
- const char *start;
1149
+ const uint8_t *start;
1066
1150
  if (opening->type != YP_TOKEN_NOT_PROVIDED) {
1067
1151
  start = opening->start;
1068
1152
  } else if (parameters != NULL) {
@@ -1071,7 +1155,7 @@ yp_block_parameters_node_create(yp_parser_t *parser, yp_parameters_node_t *param
1071
1155
  start = NULL;
1072
1156
  }
1073
1157
 
1074
- const char *end;
1158
+ const uint8_t *end;
1075
1159
  if (parameters != NULL) {
1076
1160
  end = parameters->base.location.end;
1077
1161
  } else if (opening->type != YP_TOKEN_NOT_PROVIDED) {
@@ -1151,7 +1235,7 @@ yp_call_node_create(yp_parser_t *parser) {
1151
1235
  },
1152
1236
  .receiver = NULL,
1153
1237
  .operator_loc = YP_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE,
1154
- .message_loc = YP_LOCATION_NULL_VALUE(parser),
1238
+ .message_loc = YP_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE,
1155
1239
  .opening_loc = YP_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE,
1156
1240
  .arguments = NULL,
1157
1241
  .closing_loc = YP_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE,
@@ -1192,8 +1276,8 @@ static yp_call_node_t *
1192
1276
  yp_call_node_binary_create(yp_parser_t *parser, yp_node_t *receiver, yp_token_t *operator, yp_node_t *argument) {
1193
1277
  yp_call_node_t *node = yp_call_node_create(parser);
1194
1278
 
1195
- node->base.location.start = receiver->location.start;
1196
- node->base.location.end = argument->location.end;
1279
+ node->base.location.start = MIN(receiver->location.start, argument->location.start);
1280
+ node->base.location.end = MAX(receiver->location.end, argument->location.end);
1197
1281
 
1198
1282
  node->receiver = receiver;
1199
1283
  node->message_loc = YP_OPTIONAL_LOCATION_TOKEN_VALUE(operator);
@@ -1389,7 +1473,7 @@ yp_call_operator_write_node_create(yp_parser_t *parser, yp_call_node_t *target,
1389
1473
  .target = target,
1390
1474
  .operator_loc = YP_LOCATION_TOKEN_VALUE(operator),
1391
1475
  .value = value,
1392
- .operator_id = yp_parser_constant_id_location(parser, operator->start, operator->end - 1)
1476
+ .operator = yp_parser_constant_id_location(parser, operator->start, operator->end - 1)
1393
1477
  };
1394
1478
 
1395
1479
  return node;
@@ -1486,7 +1570,7 @@ yp_case_node_end_keyword_loc_set(yp_case_node_t *node, const yp_token_t *end_key
1486
1570
 
1487
1571
  // Allocate a new ClassNode node.
1488
1572
  static yp_class_node_t *
1489
- yp_class_node_create(yp_parser_t *parser, yp_constant_id_list_t *locals, const yp_token_t *class_keyword, yp_node_t *constant_path, const yp_token_t *inheritance_operator, yp_node_t *superclass, yp_node_t *body, const yp_token_t *end_keyword) {
1573
+ yp_class_node_create(yp_parser_t *parser, yp_constant_id_list_t *locals, const yp_token_t *class_keyword, yp_node_t *constant_path, const yp_token_t *name, const yp_token_t *inheritance_operator, yp_node_t *superclass, yp_node_t *body, const yp_token_t *end_keyword) {
1490
1574
  yp_class_node_t *node = YP_ALLOC_NODE(parser, yp_class_node_t);
1491
1575
 
1492
1576
  *node = (yp_class_node_t) {
@@ -1500,7 +1584,78 @@ yp_class_node_create(yp_parser_t *parser, yp_constant_id_list_t *locals, const y
1500
1584
  .inheritance_operator_loc = YP_OPTIONAL_LOCATION_TOKEN_VALUE(inheritance_operator),
1501
1585
  .superclass = superclass,
1502
1586
  .body = body,
1503
- .end_keyword_loc = YP_LOCATION_TOKEN_VALUE(end_keyword)
1587
+ .end_keyword_loc = YP_LOCATION_TOKEN_VALUE(end_keyword),
1588
+ .name = YP_EMPTY_STRING
1589
+ };
1590
+
1591
+ yp_string_shared_init(&node->name, name->start, name->end);
1592
+ return node;
1593
+ }
1594
+
1595
+ // Allocate and initialize a new ClassVariableAndWriteNode node.
1596
+ static yp_class_variable_and_write_node_t *
1597
+ yp_class_variable_and_write_node_create(yp_parser_t *parser, yp_class_variable_read_node_t *target, const yp_token_t *operator, yp_node_t *value) {
1598
+ assert(operator->type == YP_TOKEN_AMPERSAND_AMPERSAND_EQUAL);
1599
+ yp_class_variable_and_write_node_t *node = YP_ALLOC_NODE(parser, yp_class_variable_and_write_node_t);
1600
+
1601
+ *node = (yp_class_variable_and_write_node_t) {
1602
+ {
1603
+ .type = YP_NODE_CLASS_VARIABLE_AND_WRITE_NODE,
1604
+ .location = {
1605
+ .start = target->base.location.start,
1606
+ .end = value->location.end
1607
+ }
1608
+ },
1609
+ .name = target->name,
1610
+ .name_loc = target->base.location,
1611
+ .operator_loc = YP_LOCATION_TOKEN_VALUE(operator),
1612
+ .value = value
1613
+ };
1614
+
1615
+ return node;
1616
+ }
1617
+
1618
+ // Allocate and initialize a new ClassVariableOperatorWriteNode node.
1619
+ static yp_class_variable_operator_write_node_t *
1620
+ yp_class_variable_operator_write_node_create(yp_parser_t *parser, yp_class_variable_read_node_t *target, const yp_token_t *operator, yp_node_t *value) {
1621
+ yp_class_variable_operator_write_node_t *node = YP_ALLOC_NODE(parser, yp_class_variable_operator_write_node_t);
1622
+
1623
+ *node = (yp_class_variable_operator_write_node_t) {
1624
+ {
1625
+ .type = YP_NODE_CLASS_VARIABLE_OPERATOR_WRITE_NODE,
1626
+ .location = {
1627
+ .start = target->base.location.start,
1628
+ .end = value->location.end
1629
+ }
1630
+ },
1631
+ .name = target->name,
1632
+ .name_loc = target->base.location,
1633
+ .operator_loc = YP_LOCATION_TOKEN_VALUE(operator),
1634
+ .value = value,
1635
+ .operator = yp_parser_constant_id_location(parser, operator->start, operator->end - 1)
1636
+ };
1637
+
1638
+ return node;
1639
+ }
1640
+
1641
+ // Allocate and initialize a new ClassVariableOrWriteNode node.
1642
+ static yp_class_variable_or_write_node_t *
1643
+ yp_class_variable_or_write_node_create(yp_parser_t *parser, yp_class_variable_read_node_t *target, const yp_token_t *operator, yp_node_t *value) {
1644
+ assert(operator->type == YP_TOKEN_PIPE_PIPE_EQUAL);
1645
+ yp_class_variable_or_write_node_t *node = YP_ALLOC_NODE(parser, yp_class_variable_or_write_node_t);
1646
+
1647
+ *node = (yp_class_variable_or_write_node_t) {
1648
+ {
1649
+ .type = YP_NODE_CLASS_VARIABLE_OR_WRITE_NODE,
1650
+ .location = {
1651
+ .start = target->base.location.start,
1652
+ .end = value->location.end
1653
+ }
1654
+ },
1655
+ .name = target->name,
1656
+ .name_loc = target->base.location,
1657
+ .operator_loc = YP_LOCATION_TOKEN_VALUE(operator),
1658
+ .value = value
1504
1659
  };
1505
1660
 
1506
1661
  return node;
@@ -1511,13 +1666,21 @@ static yp_class_variable_read_node_t *
1511
1666
  yp_class_variable_read_node_create(yp_parser_t *parser, const yp_token_t *token) {
1512
1667
  assert(token->type == YP_TOKEN_CLASS_VARIABLE);
1513
1668
  yp_class_variable_read_node_t *node = YP_ALLOC_NODE(parser, yp_class_variable_read_node_t);
1514
- *node = (yp_class_variable_read_node_t) {{ .type = YP_NODE_CLASS_VARIABLE_READ_NODE, .location = YP_LOCATION_TOKEN_VALUE(token) }};
1669
+
1670
+ *node = (yp_class_variable_read_node_t) {
1671
+ {
1672
+ .type = YP_NODE_CLASS_VARIABLE_READ_NODE,
1673
+ .location = YP_LOCATION_TOKEN_VALUE(token)
1674
+ },
1675
+ .name = yp_parser_constant_id_location(parser, token->start, token->end)
1676
+ };
1677
+
1515
1678
  return node;
1516
1679
  }
1517
1680
 
1518
1681
  // Initialize a new ClassVariableWriteNode node from a ClassVariableRead node.
1519
1682
  static yp_class_variable_write_node_t *
1520
- yp_class_variable_read_node_to_class_variable_write_node(yp_parser_t *parser, yp_class_variable_read_node_t *read_node, yp_token_t *operator, yp_node_t *value) {
1683
+ yp_class_variable_write_node_create(yp_parser_t *parser, yp_class_variable_read_node_t *read_node, yp_token_t *operator, yp_node_t *value) {
1521
1684
  yp_class_variable_write_node_t *node = YP_ALLOC_NODE(parser, yp_class_variable_write_node_t);
1522
1685
 
1523
1686
  *node = (yp_class_variable_write_node_t) {
@@ -1525,10 +1688,11 @@ yp_class_variable_read_node_to_class_variable_write_node(yp_parser_t *parser, yp
1525
1688
  .type = YP_NODE_CLASS_VARIABLE_WRITE_NODE,
1526
1689
  .location = {
1527
1690
  .start = read_node->base.location.start,
1528
- .end = value != NULL ? value->location.end : read_node->base.location.end
1691
+ .end = value->location.end
1529
1692
  },
1530
1693
  },
1531
- .name_loc = YP_LOCATION_NODE_VALUE((yp_node_t *)read_node),
1694
+ .name = read_node->name,
1695
+ .name_loc = YP_LOCATION_NODE_VALUE((yp_node_t *) read_node),
1532
1696
  .operator_loc = YP_OPTIONAL_LOCATION_TOKEN_VALUE(operator),
1533
1697
  .value = value
1534
1698
  };
@@ -1536,6 +1700,72 @@ yp_class_variable_read_node_to_class_variable_write_node(yp_parser_t *parser, yp
1536
1700
  return node;
1537
1701
  }
1538
1702
 
1703
+ // Allocate and initialize a new ConstantPathAndWriteNode node.
1704
+ static yp_constant_path_and_write_node_t *
1705
+ yp_constant_path_and_write_node_create(yp_parser_t *parser, yp_constant_path_node_t *target, const yp_token_t *operator, yp_node_t *value) {
1706
+ assert(operator->type == YP_TOKEN_AMPERSAND_AMPERSAND_EQUAL);
1707
+ yp_constant_path_and_write_node_t *node = YP_ALLOC_NODE(parser, yp_constant_path_and_write_node_t);
1708
+
1709
+ *node = (yp_constant_path_and_write_node_t) {
1710
+ {
1711
+ .type = YP_NODE_CONSTANT_PATH_AND_WRITE_NODE,
1712
+ .location = {
1713
+ .start = target->base.location.start,
1714
+ .end = value->location.end
1715
+ }
1716
+ },
1717
+ .target = target,
1718
+ .operator_loc = YP_LOCATION_TOKEN_VALUE(operator),
1719
+ .value = value
1720
+ };
1721
+
1722
+ return node;
1723
+ }
1724
+
1725
+ // Allocate and initialize a new ConstantPathOperatorWriteNode node.
1726
+ static yp_constant_path_operator_write_node_t *
1727
+ yp_constant_path_operator_write_node_create(yp_parser_t *parser, yp_constant_path_node_t *target, const yp_token_t *operator, yp_node_t *value) {
1728
+ yp_constant_path_operator_write_node_t *node = YP_ALLOC_NODE(parser, yp_constant_path_operator_write_node_t);
1729
+
1730
+ *node = (yp_constant_path_operator_write_node_t) {
1731
+ {
1732
+ .type = YP_NODE_CONSTANT_PATH_OPERATOR_WRITE_NODE,
1733
+ .location = {
1734
+ .start = target->base.location.start,
1735
+ .end = value->location.end
1736
+ }
1737
+ },
1738
+ .target = target,
1739
+ .operator_loc = YP_LOCATION_TOKEN_VALUE(operator),
1740
+ .value = value,
1741
+ .operator = yp_parser_constant_id_location(parser, operator->start, operator->end - 1)
1742
+ };
1743
+
1744
+ return node;
1745
+ }
1746
+
1747
+ // Allocate and initialize a new ConstantPathOrWriteNode node.
1748
+ static yp_constant_path_or_write_node_t *
1749
+ yp_constant_path_or_write_node_create(yp_parser_t *parser, yp_constant_path_node_t *target, const yp_token_t *operator, yp_node_t *value) {
1750
+ assert(operator->type == YP_TOKEN_PIPE_PIPE_EQUAL);
1751
+ yp_constant_path_or_write_node_t *node = YP_ALLOC_NODE(parser, yp_constant_path_or_write_node_t);
1752
+
1753
+ *node = (yp_constant_path_or_write_node_t) {
1754
+ {
1755
+ .type = YP_NODE_CONSTANT_PATH_OR_WRITE_NODE,
1756
+ .location = {
1757
+ .start = target->base.location.start,
1758
+ .end = value->location.end
1759
+ }
1760
+ },
1761
+ .target = target,
1762
+ .operator_loc = YP_LOCATION_TOKEN_VALUE(operator),
1763
+ .value = value
1764
+ };
1765
+
1766
+ return node;
1767
+ }
1768
+
1539
1769
  // Allocate and initialize a new ConstantPathNode node.
1540
1770
  static yp_constant_path_node_t *
1541
1771
  yp_constant_path_node_create(yp_parser_t *parser, yp_node_t *parent, const yp_token_t *delimiter, yp_node_t *child) {
@@ -1567,7 +1797,7 @@ yp_constant_path_write_node_create(yp_parser_t *parser, yp_constant_path_node_t
1567
1797
  .type = YP_NODE_CONSTANT_PATH_WRITE_NODE,
1568
1798
  .location = {
1569
1799
  .start = target->base.location.start,
1570
- .end = (value == NULL ? target->base.location.end : value->location.end)
1800
+ .end = value->location.end
1571
1801
  },
1572
1802
  },
1573
1803
  .target = target,
@@ -1578,6 +1808,74 @@ yp_constant_path_write_node_create(yp_parser_t *parser, yp_constant_path_node_t
1578
1808
  return node;
1579
1809
  }
1580
1810
 
1811
+ // Allocate and initialize a new ConstantAndWriteNode node.
1812
+ static yp_constant_and_write_node_t *
1813
+ yp_constant_and_write_node_create(yp_parser_t *parser, yp_node_t *target, const yp_token_t *operator, yp_node_t *value) {
1814
+ assert(YP_NODE_TYPE_P(target, YP_NODE_CONSTANT_READ_NODE));
1815
+ assert(operator->type == YP_TOKEN_AMPERSAND_AMPERSAND_EQUAL);
1816
+ yp_constant_and_write_node_t *node = YP_ALLOC_NODE(parser, yp_constant_and_write_node_t);
1817
+
1818
+ *node = (yp_constant_and_write_node_t) {
1819
+ {
1820
+ .type = YP_NODE_CONSTANT_AND_WRITE_NODE,
1821
+ .location = {
1822
+ .start = target->location.start,
1823
+ .end = value->location.end
1824
+ }
1825
+ },
1826
+ .name_loc = target->location,
1827
+ .operator_loc = YP_LOCATION_TOKEN_VALUE(operator),
1828
+ .value = value
1829
+ };
1830
+
1831
+ return node;
1832
+ }
1833
+
1834
+ // Allocate and initialize a new ConstantOperatorWriteNode node.
1835
+ static yp_constant_operator_write_node_t *
1836
+ yp_constant_operator_write_node_create(yp_parser_t *parser, yp_node_t *target, const yp_token_t *operator, yp_node_t *value) {
1837
+ yp_constant_operator_write_node_t *node = YP_ALLOC_NODE(parser, yp_constant_operator_write_node_t);
1838
+
1839
+ *node = (yp_constant_operator_write_node_t) {
1840
+ {
1841
+ .type = YP_NODE_CONSTANT_OPERATOR_WRITE_NODE,
1842
+ .location = {
1843
+ .start = target->location.start,
1844
+ .end = value->location.end
1845
+ }
1846
+ },
1847
+ .name_loc = target->location,
1848
+ .operator_loc = YP_LOCATION_TOKEN_VALUE(operator),
1849
+ .value = value,
1850
+ .operator = yp_parser_constant_id_location(parser, operator->start, operator->end - 1)
1851
+ };
1852
+
1853
+ return node;
1854
+ }
1855
+
1856
+ // Allocate and initialize a new ConstantOrWriteNode node.
1857
+ static yp_constant_or_write_node_t *
1858
+ yp_constant_or_write_node_create(yp_parser_t *parser, yp_node_t *target, const yp_token_t *operator, yp_node_t *value) {
1859
+ assert(YP_NODE_TYPE_P(target, YP_NODE_CONSTANT_READ_NODE));
1860
+ assert(operator->type == YP_TOKEN_PIPE_PIPE_EQUAL);
1861
+ yp_constant_or_write_node_t *node = YP_ALLOC_NODE(parser, yp_constant_or_write_node_t);
1862
+
1863
+ *node = (yp_constant_or_write_node_t) {
1864
+ {
1865
+ .type = YP_NODE_CONSTANT_OR_WRITE_NODE,
1866
+ .location = {
1867
+ .start = target->location.start,
1868
+ .end = value->location.end
1869
+ }
1870
+ },
1871
+ .name_loc = target->location,
1872
+ .operator_loc = YP_LOCATION_TOKEN_VALUE(operator),
1873
+ .value = value
1874
+ };
1875
+
1876
+ return node;
1877
+ }
1878
+
1581
1879
  // Allocate and initialize a new ConstantReadNode node.
1582
1880
  static yp_constant_read_node_t *
1583
1881
  yp_constant_read_node_create(yp_parser_t *parser, const yp_token_t *name) {
@@ -1598,7 +1896,7 @@ yp_constant_write_node_create(yp_parser_t *parser, yp_location_t *name_loc, cons
1598
1896
  .type = YP_NODE_CONSTANT_WRITE_NODE,
1599
1897
  .location = {
1600
1898
  .start = name_loc->start,
1601
- .end = value != NULL ? value->location.end : name_loc->end
1899
+ .end = value->location.end
1602
1900
  },
1603
1901
  },
1604
1902
  .name_loc = *name_loc,
@@ -1626,7 +1924,7 @@ yp_def_node_create(
1626
1924
  const yp_token_t *end_keyword
1627
1925
  ) {
1628
1926
  yp_def_node_t *node = YP_ALLOC_NODE(parser, yp_def_node_t);
1629
- const char *end;
1927
+ const uint8_t *end;
1630
1928
 
1631
1929
  if (end_keyword->type == YP_TOKEN_NOT_PROVIDED) {
1632
1930
  end = body->location.end;
@@ -1681,7 +1979,7 @@ yp_defined_node_create(yp_parser_t *parser, const yp_token_t *lparen, yp_node_t
1681
1979
  static yp_else_node_t *
1682
1980
  yp_else_node_create(yp_parser_t *parser, const yp_token_t *else_keyword, yp_statements_node_t *statements, const yp_token_t *end_keyword) {
1683
1981
  yp_else_node_t *node = YP_ALLOC_NODE(parser, yp_else_node_t);
1684
- const char *end = NULL;
1982
+ const uint8_t *end = NULL;
1685
1983
  if ((end_keyword->type == YP_TOKEN_NOT_PROVIDED) && (statements != NULL)) {
1686
1984
  end = statements->base.location.end;
1687
1985
  } else {
@@ -2012,6 +2310,74 @@ yp_hash_pattern_node_node_list_create(yp_parser_t *parser, yp_node_list_t *assoc
2012
2310
  return node;
2013
2311
  }
2014
2312
 
2313
+ // Allocate and initialize a new GlobalVariableAndWriteNode node.
2314
+ static yp_global_variable_and_write_node_t *
2315
+ yp_global_variable_and_write_node_create(yp_parser_t *parser, yp_node_t *target, const yp_token_t *operator, yp_node_t *value) {
2316
+ assert(YP_NODE_TYPE_P(target, YP_NODE_GLOBAL_VARIABLE_READ_NODE) || YP_NODE_TYPE_P(target, YP_NODE_BACK_REFERENCE_READ_NODE) || YP_NODE_TYPE_P(target, YP_NODE_NUMBERED_REFERENCE_READ_NODE));
2317
+ assert(operator->type == YP_TOKEN_AMPERSAND_AMPERSAND_EQUAL);
2318
+ yp_global_variable_and_write_node_t *node = YP_ALLOC_NODE(parser, yp_global_variable_and_write_node_t);
2319
+
2320
+ *node = (yp_global_variable_and_write_node_t) {
2321
+ {
2322
+ .type = YP_NODE_GLOBAL_VARIABLE_AND_WRITE_NODE,
2323
+ .location = {
2324
+ .start = target->location.start,
2325
+ .end = value->location.end
2326
+ }
2327
+ },
2328
+ .name_loc = target->location,
2329
+ .operator_loc = YP_LOCATION_TOKEN_VALUE(operator),
2330
+ .value = value
2331
+ };
2332
+
2333
+ return node;
2334
+ }
2335
+
2336
+ // Allocate and initialize a new GlobalVariableOperatorWriteNode node.
2337
+ static yp_global_variable_operator_write_node_t *
2338
+ yp_global_variable_operator_write_node_create(yp_parser_t *parser, yp_node_t *target, const yp_token_t *operator, yp_node_t *value) {
2339
+ yp_global_variable_operator_write_node_t *node = YP_ALLOC_NODE(parser, yp_global_variable_operator_write_node_t);
2340
+
2341
+ *node = (yp_global_variable_operator_write_node_t) {
2342
+ {
2343
+ .type = YP_NODE_GLOBAL_VARIABLE_OPERATOR_WRITE_NODE,
2344
+ .location = {
2345
+ .start = target->location.start,
2346
+ .end = value->location.end
2347
+ }
2348
+ },
2349
+ .name_loc = target->location,
2350
+ .operator_loc = YP_LOCATION_TOKEN_VALUE(operator),
2351
+ .value = value,
2352
+ .operator = yp_parser_constant_id_location(parser, operator->start, operator->end - 1)
2353
+ };
2354
+
2355
+ return node;
2356
+ }
2357
+
2358
+ // Allocate and initialize a new GlobalVariableOrWriteNode node.
2359
+ static yp_global_variable_or_write_node_t *
2360
+ yp_global_variable_or_write_node_create(yp_parser_t *parser, yp_node_t *target, const yp_token_t *operator, yp_node_t *value) {
2361
+ assert(YP_NODE_TYPE_P(target, YP_NODE_GLOBAL_VARIABLE_READ_NODE) || YP_NODE_TYPE_P(target, YP_NODE_BACK_REFERENCE_READ_NODE) || YP_NODE_TYPE_P(target, YP_NODE_NUMBERED_REFERENCE_READ_NODE));
2362
+ assert(operator->type == YP_TOKEN_PIPE_PIPE_EQUAL);
2363
+ yp_global_variable_or_write_node_t *node = YP_ALLOC_NODE(parser, yp_global_variable_or_write_node_t);
2364
+
2365
+ *node = (yp_global_variable_or_write_node_t) {
2366
+ {
2367
+ .type = YP_NODE_GLOBAL_VARIABLE_OR_WRITE_NODE,
2368
+ .location = {
2369
+ .start = target->location.start,
2370
+ .end = value->location.end
2371
+ }
2372
+ },
2373
+ .name_loc = target->location,
2374
+ .operator_loc = YP_LOCATION_TOKEN_VALUE(operator),
2375
+ .value = value
2376
+ };
2377
+
2378
+ return node;
2379
+ }
2380
+
2015
2381
  // Allocate a new GlobalVariableReadNode node.
2016
2382
  static yp_global_variable_read_node_t *
2017
2383
  yp_global_variable_read_node_create(yp_parser_t *parser, const yp_token_t *name) {
@@ -2037,7 +2403,7 @@ yp_global_variable_write_node_create(yp_parser_t *parser, const yp_location_t *n
2037
2403
  .type = YP_NODE_GLOBAL_VARIABLE_WRITE_NODE,
2038
2404
  .location = {
2039
2405
  .start = name_loc->start,
2040
- .end = (value == NULL ? name_loc->end : value->location.end)
2406
+ .end = value->location.end
2041
2407
  },
2042
2408
  },
2043
2409
  .name_loc = *name_loc,
@@ -2093,7 +2459,7 @@ yp_if_node_create(yp_parser_t *parser,
2093
2459
  yp_flip_flop(predicate);
2094
2460
  yp_if_node_t *node = YP_ALLOC_NODE(parser, yp_if_node_t);
2095
2461
 
2096
- const char *end;
2462
+ const uint8_t *end;
2097
2463
  if (end_keyword->type != YP_TOKEN_NOT_PROVIDED) {
2098
2464
  end = end_keyword->end;
2099
2465
  } else if (consequent != NULL) {
@@ -2276,7 +2642,7 @@ static yp_in_node_t *
2276
2642
  yp_in_node_create(yp_parser_t *parser, yp_node_t *pattern, yp_statements_node_t *statements, const yp_token_t *in_keyword, const yp_token_t *then_keyword) {
2277
2643
  yp_in_node_t *node = YP_ALLOC_NODE(parser, yp_in_node_t);
2278
2644
 
2279
- const char *end;
2645
+ const uint8_t *end;
2280
2646
  if (statements != NULL) {
2281
2647
  end = statements->base.location.end;
2282
2648
  } else if (then_keyword->type != YP_TOKEN_NOT_PROVIDED) {
@@ -2302,15 +2668,88 @@ yp_in_node_create(yp_parser_t *parser, yp_node_t *pattern, yp_statements_node_t
2302
2668
  return node;
2303
2669
  }
2304
2670
 
2671
+ // Allocate and initialize a new InstanceVariableAndWriteNode node.
2672
+ static yp_instance_variable_and_write_node_t *
2673
+ yp_instance_variable_and_write_node_create(yp_parser_t *parser, yp_instance_variable_read_node_t *target, const yp_token_t *operator, yp_node_t *value) {
2674
+ assert(operator->type == YP_TOKEN_AMPERSAND_AMPERSAND_EQUAL);
2675
+ yp_instance_variable_and_write_node_t *node = YP_ALLOC_NODE(parser, yp_instance_variable_and_write_node_t);
2676
+
2677
+ *node = (yp_instance_variable_and_write_node_t) {
2678
+ {
2679
+ .type = YP_NODE_INSTANCE_VARIABLE_AND_WRITE_NODE,
2680
+ .location = {
2681
+ .start = target->base.location.start,
2682
+ .end = value->location.end
2683
+ }
2684
+ },
2685
+ .name = target->name,
2686
+ .name_loc = target->base.location,
2687
+ .operator_loc = YP_LOCATION_TOKEN_VALUE(operator),
2688
+ .value = value
2689
+ };
2690
+
2691
+ return node;
2692
+ }
2693
+
2694
+ // Allocate and initialize a new InstanceVariableOperatorWriteNode node.
2695
+ static yp_instance_variable_operator_write_node_t *
2696
+ yp_instance_variable_operator_write_node_create(yp_parser_t *parser, yp_instance_variable_read_node_t *target, const yp_token_t *operator, yp_node_t *value) {
2697
+ yp_instance_variable_operator_write_node_t *node = YP_ALLOC_NODE(parser, yp_instance_variable_operator_write_node_t);
2698
+
2699
+ *node = (yp_instance_variable_operator_write_node_t) {
2700
+ {
2701
+ .type = YP_NODE_INSTANCE_VARIABLE_OPERATOR_WRITE_NODE,
2702
+ .location = {
2703
+ .start = target->base.location.start,
2704
+ .end = value->location.end
2705
+ }
2706
+ },
2707
+ .name = target->name,
2708
+ .name_loc = target->base.location,
2709
+ .operator_loc = YP_LOCATION_TOKEN_VALUE(operator),
2710
+ .value = value,
2711
+ .operator = yp_parser_constant_id_location(parser, operator->start, operator->end - 1)
2712
+ };
2713
+
2714
+ return node;
2715
+ }
2716
+
2717
+ // Allocate and initialize a new InstanceVariableOrWriteNode node.
2718
+ static yp_instance_variable_or_write_node_t *
2719
+ yp_instance_variable_or_write_node_create(yp_parser_t *parser, yp_instance_variable_read_node_t *target, const yp_token_t *operator, yp_node_t *value) {
2720
+ assert(operator->type == YP_TOKEN_PIPE_PIPE_EQUAL);
2721
+ yp_instance_variable_or_write_node_t *node = YP_ALLOC_NODE(parser, yp_instance_variable_or_write_node_t);
2722
+
2723
+ *node = (yp_instance_variable_or_write_node_t) {
2724
+ {
2725
+ .type = YP_NODE_INSTANCE_VARIABLE_OR_WRITE_NODE,
2726
+ .location = {
2727
+ .start = target->base.location.start,
2728
+ .end = value->location.end
2729
+ }
2730
+ },
2731
+ .name = target->name,
2732
+ .name_loc = target->base.location,
2733
+ .operator_loc = YP_LOCATION_TOKEN_VALUE(operator),
2734
+ .value = value
2735
+ };
2736
+
2737
+ return node;
2738
+ }
2739
+
2305
2740
  // Allocate and initialize a new InstanceVariableReadNode node.
2306
2741
  static yp_instance_variable_read_node_t *
2307
2742
  yp_instance_variable_read_node_create(yp_parser_t *parser, const yp_token_t *token) {
2308
2743
  assert(token->type == YP_TOKEN_INSTANCE_VARIABLE);
2309
2744
  yp_instance_variable_read_node_t *node = YP_ALLOC_NODE(parser, yp_instance_variable_read_node_t);
2310
2745
 
2311
- *node = (yp_instance_variable_read_node_t) {{
2312
- .type = YP_NODE_INSTANCE_VARIABLE_READ_NODE, .location = YP_LOCATION_TOKEN_VALUE(token)
2313
- }};
2746
+ *node = (yp_instance_variable_read_node_t) {
2747
+ {
2748
+ .type = YP_NODE_INSTANCE_VARIABLE_READ_NODE,
2749
+ .location = YP_LOCATION_TOKEN_VALUE(token)
2750
+ },
2751
+ .name = yp_parser_constant_id_location(parser, token->start, token->end)
2752
+ };
2314
2753
 
2315
2754
  return node;
2316
2755
  }
@@ -2324,9 +2763,10 @@ yp_instance_variable_write_node_create(yp_parser_t *parser, yp_instance_variable
2324
2763
  .type = YP_NODE_INSTANCE_VARIABLE_WRITE_NODE,
2325
2764
  .location = {
2326
2765
  .start = read_node->base.location.start,
2327
- .end = value == NULL ? read_node->base.location.end : value->location.end
2766
+ .end = value->location.end
2328
2767
  }
2329
2768
  },
2769
+ .name = read_node->name,
2330
2770
  .name_loc = YP_LOCATION_NODE_BASE_VALUE(read_node),
2331
2771
  .operator_loc = YP_OPTIONAL_LOCATION_TOKEN_VALUE(operator),
2332
2772
  .value = value
@@ -2358,8 +2798,13 @@ yp_interpolated_regular_expression_node_create(yp_parser_t *parser, const yp_tok
2358
2798
 
2359
2799
  static inline void
2360
2800
  yp_interpolated_regular_expression_node_append(yp_interpolated_regular_expression_node_t *node, yp_node_t *part) {
2801
+ if (node->base.location.start > part->location.start) {
2802
+ node->base.location.start = part->location.start;
2803
+ }
2804
+ if (node->base.location.end < part->location.end) {
2805
+ node->base.location.end = part->location.end;
2806
+ }
2361
2807
  yp_node_list_append(&node->parts, part);
2362
- node->base.location.end = part->location.end;
2363
2808
  }
2364
2809
 
2365
2810
  static inline void
@@ -2431,17 +2876,12 @@ yp_interpolated_symbol_node_create(yp_parser_t *parser, const yp_token_t *openin
2431
2876
 
2432
2877
  static inline void
2433
2878
  yp_interpolated_symbol_node_append(yp_interpolated_symbol_node_t *node, yp_node_t *part) {
2434
- yp_node_list_append(&node->parts, part);
2435
- if (!node->base.location.start) {
2879
+ if (node->parts.size == 0 && node->opening_loc.start == NULL) {
2436
2880
  node->base.location.start = part->location.start;
2437
2881
  }
2438
- node->base.location.end = part->location.end;
2439
- }
2440
2882
 
2441
- static inline void
2442
- yp_interpolated_symbol_node_closing_set(yp_interpolated_symbol_node_t *node, const yp_token_t *closing) {
2443
- node->closing_loc = YP_OPTIONAL_LOCATION_TOKEN_VALUE(closing);
2444
- node->base.location.end = closing->end;
2883
+ yp_node_list_append(&node->parts, part);
2884
+ node->base.location.end = part->location.end;
2445
2885
  }
2446
2886
 
2447
2887
  // Allocate a new InterpolatedXStringNode node.
@@ -2551,10 +2991,11 @@ static yp_lambda_node_t *
2551
2991
  yp_lambda_node_create(
2552
2992
  yp_parser_t *parser,
2553
2993
  yp_constant_id_list_t *locals,
2994
+ const yp_token_t *operator,
2554
2995
  const yp_token_t *opening,
2996
+ const yp_token_t *closing,
2555
2997
  yp_block_parameters_node_t *parameters,
2556
- yp_node_t *body,
2557
- const yp_token_t *closing
2998
+ yp_node_t *body
2558
2999
  ) {
2559
3000
  yp_lambda_node_t *node = YP_ALLOC_NODE(parser, yp_lambda_node_t);
2560
3001
 
@@ -2562,12 +3003,14 @@ yp_lambda_node_create(
2562
3003
  {
2563
3004
  .type = YP_NODE_LAMBDA_NODE,
2564
3005
  .location = {
2565
- .start = opening->start,
3006
+ .start = operator->start,
2566
3007
  .end = closing->end
2567
3008
  },
2568
3009
  },
2569
3010
  .locals = *locals,
3011
+ .operator_loc = YP_LOCATION_TOKEN_VALUE(operator),
2570
3012
  .opening_loc = YP_LOCATION_TOKEN_VALUE(opening),
3013
+ .closing_loc = YP_LOCATION_TOKEN_VALUE(closing),
2571
3014
  .parameters = parameters,
2572
3015
  .body = body
2573
3016
  };
@@ -2575,6 +3018,80 @@ yp_lambda_node_create(
2575
3018
  return node;
2576
3019
  }
2577
3020
 
3021
+ // Allocate and initialize a new LocalVariableAndWriteNode node.
3022
+ static yp_local_variable_and_write_node_t *
3023
+ yp_local_variable_and_write_node_create(yp_parser_t *parser, yp_node_t *target, const yp_token_t *operator, yp_node_t *value, yp_constant_id_t name, uint32_t depth) {
3024
+ assert(YP_NODE_TYPE_P(target, YP_NODE_LOCAL_VARIABLE_READ_NODE) || YP_NODE_TYPE_P(target, YP_NODE_CALL_NODE));
3025
+ assert(operator->type == YP_TOKEN_AMPERSAND_AMPERSAND_EQUAL);
3026
+ yp_local_variable_and_write_node_t *node = YP_ALLOC_NODE(parser, yp_local_variable_and_write_node_t);
3027
+
3028
+ *node = (yp_local_variable_and_write_node_t) {
3029
+ {
3030
+ .type = YP_NODE_LOCAL_VARIABLE_AND_WRITE_NODE,
3031
+ .location = {
3032
+ .start = target->location.start,
3033
+ .end = value->location.end
3034
+ }
3035
+ },
3036
+ .name_loc = target->location,
3037
+ .operator_loc = YP_LOCATION_TOKEN_VALUE(operator),
3038
+ .value = value,
3039
+ .name = name,
3040
+ .depth = depth
3041
+ };
3042
+
3043
+ return node;
3044
+ }
3045
+
3046
+ // Allocate and initialize a new LocalVariableOperatorWriteNode node.
3047
+ static yp_local_variable_operator_write_node_t *
3048
+ yp_local_variable_operator_write_node_create(yp_parser_t *parser, yp_node_t *target, const yp_token_t *operator, yp_node_t *value, yp_constant_id_t name, uint32_t depth) {
3049
+ yp_local_variable_operator_write_node_t *node = YP_ALLOC_NODE(parser, yp_local_variable_operator_write_node_t);
3050
+
3051
+ *node = (yp_local_variable_operator_write_node_t) {
3052
+ {
3053
+ .type = YP_NODE_LOCAL_VARIABLE_OPERATOR_WRITE_NODE,
3054
+ .location = {
3055
+ .start = target->location.start,
3056
+ .end = value->location.end
3057
+ }
3058
+ },
3059
+ .name_loc = target->location,
3060
+ .operator_loc = YP_LOCATION_TOKEN_VALUE(operator),
3061
+ .value = value,
3062
+ .name = name,
3063
+ .operator = yp_parser_constant_id_location(parser, operator->start, operator->end - 1),
3064
+ .depth = depth
3065
+ };
3066
+
3067
+ return node;
3068
+ }
3069
+
3070
+ // Allocate and initialize a new LocalVariableOrWriteNode node.
3071
+ static yp_local_variable_or_write_node_t *
3072
+ yp_local_variable_or_write_node_create(yp_parser_t *parser, yp_node_t *target, const yp_token_t *operator, yp_node_t *value, yp_constant_id_t name, uint32_t depth) {
3073
+ assert(YP_NODE_TYPE_P(target, YP_NODE_LOCAL_VARIABLE_READ_NODE) || YP_NODE_TYPE_P(target, YP_NODE_CALL_NODE));
3074
+ assert(operator->type == YP_TOKEN_PIPE_PIPE_EQUAL);
3075
+ yp_local_variable_or_write_node_t *node = YP_ALLOC_NODE(parser, yp_local_variable_or_write_node_t);
3076
+
3077
+ *node = (yp_local_variable_or_write_node_t) {
3078
+ {
3079
+ .type = YP_NODE_LOCAL_VARIABLE_OR_WRITE_NODE,
3080
+ .location = {
3081
+ .start = target->location.start,
3082
+ .end = value->location.end
3083
+ }
3084
+ },
3085
+ .name_loc = target->location,
3086
+ .operator_loc = YP_LOCATION_TOKEN_VALUE(operator),
3087
+ .value = value,
3088
+ .name = name,
3089
+ .depth = depth
3090
+ };
3091
+
3092
+ return node;
3093
+ }
3094
+
2578
3095
  // Allocate a new LocalVariableReadNode node.
2579
3096
  static yp_local_variable_read_node_t *
2580
3097
  yp_local_variable_read_node_create(yp_parser_t *parser, const yp_token_t *name, uint32_t depth) {
@@ -2585,7 +3102,7 @@ yp_local_variable_read_node_create(yp_parser_t *parser, const yp_token_t *name,
2585
3102
  .type = YP_NODE_LOCAL_VARIABLE_READ_NODE,
2586
3103
  .location = YP_LOCATION_TOKEN_VALUE(name)
2587
3104
  },
2588
- .constant_id = yp_parser_constant_id_token(parser, name),
3105
+ .name = yp_parser_constant_id_token(parser, name),
2589
3106
  .depth = depth
2590
3107
  };
2591
3108
 
@@ -2594,7 +3111,7 @@ yp_local_variable_read_node_create(yp_parser_t *parser, const yp_token_t *name,
2594
3111
 
2595
3112
  // Allocate and initialize a new LocalVariableWriteNode node.
2596
3113
  static yp_local_variable_write_node_t *
2597
- yp_local_variable_write_node_create(yp_parser_t *parser, yp_constant_id_t constant_id, uint32_t depth, yp_node_t *value, const yp_location_t *name_loc, const yp_token_t *operator) {
3114
+ yp_local_variable_write_node_create(yp_parser_t *parser, yp_constant_id_t name, uint32_t depth, yp_node_t *value, const yp_location_t *name_loc, const yp_token_t *operator) {
2598
3115
  yp_local_variable_write_node_t *node = YP_ALLOC_NODE(parser, yp_local_variable_write_node_t);
2599
3116
 
2600
3117
  *node = (yp_local_variable_write_node_t) {
@@ -2602,10 +3119,10 @@ yp_local_variable_write_node_create(yp_parser_t *parser, yp_constant_id_t consta
2602
3119
  .type = YP_NODE_LOCAL_VARIABLE_WRITE_NODE,
2603
3120
  .location = {
2604
3121
  .start = name_loc->start,
2605
- .end = value == NULL ? name_loc->end : value->location.end
3122
+ .end = value->location.end
2606
3123
  }
2607
3124
  },
2608
- .constant_id = constant_id,
3125
+ .name = name,
2609
3126
  .depth = depth,
2610
3127
  .value = value,
2611
3128
  .name_loc = *name_loc,
@@ -2615,21 +3132,18 @@ yp_local_variable_write_node_create(yp_parser_t *parser, yp_constant_id_t consta
2615
3132
  return node;
2616
3133
  }
2617
3134
 
2618
- // Allocate and initialize a new LocalVariableWriteNode node without an operator or target.
2619
- static yp_local_variable_write_node_t *
3135
+ // Allocate and initialize a new LocalVariableTargetNode node.
3136
+ static yp_local_variable_target_node_t *
2620
3137
  yp_local_variable_target_node_create(yp_parser_t *parser, const yp_token_t *name) {
2621
- yp_local_variable_write_node_t *node = YP_ALLOC_NODE(parser, yp_local_variable_write_node_t);
3138
+ yp_local_variable_target_node_t *node = YP_ALLOC_NODE(parser, yp_local_variable_target_node_t);
2622
3139
 
2623
- *node = (yp_local_variable_write_node_t) {
3140
+ *node = (yp_local_variable_target_node_t) {
2624
3141
  {
2625
- .type = YP_NODE_LOCAL_VARIABLE_WRITE_NODE,
3142
+ .type = YP_NODE_LOCAL_VARIABLE_TARGET_NODE,
2626
3143
  .location = YP_LOCATION_TOKEN_VALUE(name)
2627
3144
  },
2628
- .constant_id = yp_parser_constant_id_token(parser, name),
2629
- .depth = 0,
2630
- .value = NULL,
2631
- .name_loc = YP_LOCATION_TOKEN_VALUE(name),
2632
- .operator_loc = { .start = NULL, .end = NULL }
3145
+ .name = yp_parser_constant_id_token(parser, name),
3146
+ .depth = 0
2633
3147
  };
2634
3148
 
2635
3149
  return node;
@@ -2679,7 +3193,7 @@ yp_match_required_node_create(yp_parser_t *parser, yp_node_t *value, yp_node_t *
2679
3193
 
2680
3194
  // Allocate a new ModuleNode node.
2681
3195
  static yp_module_node_t *
2682
- yp_module_node_create(yp_parser_t *parser, yp_constant_id_list_t *locals, const yp_token_t *module_keyword, yp_node_t *constant_path, yp_node_t *body, const yp_token_t *end_keyword) {
3196
+ yp_module_node_create(yp_parser_t *parser, yp_constant_id_list_t *locals, const yp_token_t *module_keyword, yp_node_t *constant_path, const yp_token_t *name, yp_node_t *body, const yp_token_t *end_keyword) {
2683
3197
  yp_module_node_t *node = YP_ALLOC_NODE(parser, yp_module_node_t);
2684
3198
 
2685
3199
  *node = (yp_module_node_t) {
@@ -2694,9 +3208,11 @@ yp_module_node_create(yp_parser_t *parser, yp_constant_id_list_t *locals, const
2694
3208
  .module_keyword_loc = YP_LOCATION_TOKEN_VALUE(module_keyword),
2695
3209
  .constant_path = constant_path,
2696
3210
  .body = body,
2697
- .end_keyword_loc = YP_LOCATION_TOKEN_VALUE(end_keyword)
3211
+ .end_keyword_loc = YP_LOCATION_TOKEN_VALUE(end_keyword),
3212
+ .name = YP_EMPTY_STRING
2698
3213
  };
2699
3214
 
3215
+ yp_string_shared_init(&node->name, name->start, name->end);
2700
3216
  return node;
2701
3217
  }
2702
3218
 
@@ -2708,7 +3224,10 @@ yp_multi_write_node_create(yp_parser_t *parser, const yp_token_t *operator, yp_n
2708
3224
  *node = (yp_multi_write_node_t) {
2709
3225
  {
2710
3226
  .type = YP_NODE_MULTI_WRITE_NODE,
2711
- .location = { .start = NULL, .end = NULL },
3227
+ .location = {
3228
+ .start = lparen_loc->start,
3229
+ .end = value == NULL ? rparen_loc->end : value->location.end
3230
+ },
2712
3231
  },
2713
3232
  .operator_loc = YP_OPTIONAL_LOCATION_TOKEN_VALUE(operator),
2714
3233
  .value = value,
@@ -2802,29 +3321,8 @@ yp_numbered_reference_read_node_create(yp_parser_t *parser, const yp_token_t *na
2802
3321
  {
2803
3322
  .type = YP_NODE_NUMBERED_REFERENCE_READ_NODE,
2804
3323
  .location = YP_LOCATION_TOKEN_VALUE(name),
2805
- }
2806
- };
2807
-
2808
- return node;
2809
- }
2810
-
2811
- // Allocate and initialize a new OperatorWriteNode.
2812
- static yp_operator_write_node_t *
2813
- yp_operator_write_node_create(yp_parser_t *parser, yp_node_t *target, const yp_token_t *operator, yp_node_t *value) {
2814
- yp_operator_write_node_t *node = YP_ALLOC_NODE(parser, yp_operator_write_node_t);
2815
-
2816
- *node = (yp_operator_write_node_t) {
2817
- {
2818
- .type = YP_NODE_OPERATOR_WRITE_NODE,
2819
- .location = {
2820
- .start = target->location.start,
2821
- .end = value->location.end
2822
- },
2823
3324
  },
2824
- .target = target,
2825
- .operator_loc = YP_LOCATION_TOKEN_VALUE(operator),
2826
- .operator = yp_parser_constant_id_location(parser, operator->start, operator->end - 1),
2827
- .value = value
3325
+ .number = parse_decimal_number(parser, name->start + 1, name->end)
2828
3326
  };
2829
3327
 
2830
3328
  return node;
@@ -2843,7 +3341,7 @@ yp_optional_parameter_node_create(yp_parser_t *parser, const yp_token_t *name, c
2843
3341
  .end = value->location.end
2844
3342
  }
2845
3343
  },
2846
- .constant_id = yp_parser_constant_id_token(parser, name),
3344
+ .name = yp_parser_constant_id_token(parser, name),
2847
3345
  .name_loc = YP_LOCATION_TOKEN_VALUE(name),
2848
3346
  .operator_loc = YP_LOCATION_TOKEN_VALUE(operator),
2849
3347
  .value = value
@@ -2863,32 +3361,11 @@ yp_or_node_create(yp_parser_t *parser, yp_node_t *left, const yp_token_t *operat
2863
3361
  .location = {
2864
3362
  .start = left->location.start,
2865
3363
  .end = right->location.end
2866
- }
2867
- },
2868
- .left = left,
2869
- .right = right,
2870
- .operator_loc = YP_LOCATION_TOKEN_VALUE(operator)
2871
- };
2872
-
2873
- return node;
2874
- }
2875
-
2876
- // Allocate and initialize a new OrWriteNode.
2877
- static yp_or_write_node_t *
2878
- yp_or_write_node_create(yp_parser_t *parser, yp_node_t *target, const yp_token_t *operator, yp_node_t *value) {
2879
- yp_or_write_node_t *node = YP_ALLOC_NODE(parser, yp_or_write_node_t);
2880
-
2881
- *node = (yp_or_write_node_t) {
2882
- {
2883
- .type = YP_NODE_OR_WRITE_NODE,
2884
- .location = {
2885
- .start = target->location.start,
2886
- .end = value->location.end
2887
- },
3364
+ }
2888
3365
  },
2889
- .target = target,
2890
- .operator_loc = YP_LOCATION_TOKEN_VALUE(operator),
2891
- .value = value
3366
+ .left = left,
3367
+ .right = right,
3368
+ .operator_loc = YP_LOCATION_TOKEN_VALUE(operator)
2892
3369
  };
2893
3370
 
2894
3371
  return node;
@@ -3161,8 +3638,8 @@ yp_regular_expression_node_create(yp_parser_t *parser, const yp_token_t *opening
3161
3638
  .type = YP_NODE_REGULAR_EXPRESSION_NODE,
3162
3639
  .flags = yp_regular_expression_flags_create(closing),
3163
3640
  .location = {
3164
- .start = opening->start,
3165
- .end = closing->end
3641
+ .start = MIN(opening->start, closing->start),
3642
+ .end = MAX(opening->end, closing->end)
3166
3643
  }
3167
3644
  },
3168
3645
  .opening_loc = YP_LOCATION_TOKEN_VALUE(opening),
@@ -3215,7 +3692,7 @@ yp_required_parameter_node_create(yp_parser_t *parser, const yp_token_t *token)
3215
3692
  .type = YP_NODE_REQUIRED_PARAMETER_NODE,
3216
3693
  .location = YP_LOCATION_TOKEN_VALUE(token)
3217
3694
  },
3218
- .constant_id = yp_parser_constant_id_token(parser, token)
3695
+ .name = yp_parser_constant_id_token(parser, token)
3219
3696
  };
3220
3697
 
3221
3698
  return node;
@@ -3466,19 +3943,21 @@ yp_statements_node_body_length(yp_statements_node_t *node) {
3466
3943
 
3467
3944
  // Set the location of the given StatementsNode.
3468
3945
  static void
3469
- yp_statements_node_location_set(yp_statements_node_t *node, const char *start, const char *end) {
3946
+ yp_statements_node_location_set(yp_statements_node_t *node, const uint8_t *start, const uint8_t *end) {
3470
3947
  node->base.location = (yp_location_t) { .start = start, .end = end };
3471
3948
  }
3472
3949
 
3473
3950
  // Append a new node to the given StatementsNode node's body.
3474
3951
  static void
3475
3952
  yp_statements_node_body_append(yp_statements_node_t *node, yp_node_t *statement) {
3476
- if (yp_statements_node_body_length(node) == 0) {
3953
+ if (yp_statements_node_body_length(node) == 0 || statement->location.start < node->base.location.start) {
3477
3954
  node->base.location.start = statement->location.start;
3478
3955
  }
3956
+ if (statement->location.end > node->base.location.end) {
3957
+ node->base.location.end = statement->location.end;
3958
+ }
3479
3959
 
3480
3960
  yp_node_list_append(&node->body, statement);
3481
- node->base.location.end = statement->location.end;
3482
3961
 
3483
3962
  // Every statement gets marked as a place where a newline can occur.
3484
3963
  statement->flags |= YP_NODE_FLAG_NEWLINE;
@@ -3532,7 +4011,7 @@ yp_super_node_create(yp_parser_t *parser, const yp_token_t *keyword, yp_argument
3532
4011
  assert(keyword->type == YP_TOKEN_KEYWORD_SUPER);
3533
4012
  yp_super_node_t *node = YP_ALLOC_NODE(parser, yp_super_node_t);
3534
4013
 
3535
- const char *end;
4014
+ const uint8_t *end;
3536
4015
  if (arguments->block != NULL) {
3537
4016
  end = arguments->block->base.location.end;
3538
4017
  } else if (arguments->closing_loc.start != NULL) {
@@ -3600,7 +4079,7 @@ yp_symbol_node_label_create(yp_parser_t *parser, const yp_token_t *token) {
3600
4079
  assert((label.end - label.start) >= 0);
3601
4080
  yp_string_shared_init(&node->unescaped, label.start, label.end);
3602
4081
 
3603
- yp_unescape_manipulate_string(parser, &node->unescaped, YP_UNESCAPE_ALL, &parser->error_list);
4082
+ yp_unescape_manipulate_string(parser, &node->unescaped, YP_UNESCAPE_ALL);
3604
4083
  break;
3605
4084
  }
3606
4085
  case YP_TOKEN_MISSING: {
@@ -3623,7 +4102,7 @@ yp_symbol_node_label_create(yp_parser_t *parser, const yp_token_t *token) {
3623
4102
  // Check if the given node is a label in a hash.
3624
4103
  static bool
3625
4104
  yp_symbol_node_label_p(yp_node_t *node) {
3626
- const char *end = NULL;
4105
+ const uint8_t *end = NULL;
3627
4106
 
3628
4107
  switch (YP_NODE_TYPE(node)) {
3629
4108
  case YP_NODE_SYMBOL_NODE:
@@ -3641,20 +4120,20 @@ yp_symbol_node_label_p(yp_node_t *node) {
3641
4120
 
3642
4121
  // Convert the given StringNode node to a SymbolNode node.
3643
4122
  static yp_symbol_node_t *
3644
- yp_string_node_to_symbol_node(yp_parser_t *parser, yp_string_node_t *node) {
4123
+ yp_string_node_to_symbol_node(yp_parser_t *parser, yp_string_node_t *node, const yp_token_t *opening, const yp_token_t *closing) {
3645
4124
  yp_symbol_node_t *new_node = YP_ALLOC_NODE(parser, yp_symbol_node_t);
3646
4125
 
3647
4126
  *new_node = (yp_symbol_node_t) {
3648
4127
  {
3649
4128
  .type = YP_NODE_SYMBOL_NODE,
3650
4129
  .location = {
3651
- .start = node->base.location.start - 2,
3652
- .end = node->base.location.end + 1
4130
+ .start = opening->start,
4131
+ .end = closing->end
3653
4132
  }
3654
4133
  },
3655
- .opening_loc = node->opening_loc,
4134
+ .opening_loc = YP_OPTIONAL_LOCATION_TOKEN_VALUE(opening),
3656
4135
  .value_loc = node->content_loc,
3657
- .closing_loc = node->closing_loc,
4136
+ .closing_loc = YP_OPTIONAL_LOCATION_TOKEN_VALUE(closing),
3658
4137
  .unescaped = node->unescaped
3659
4138
  };
3660
4139
 
@@ -3731,7 +4210,7 @@ yp_unless_node_create(yp_parser_t *parser, const yp_token_t *keyword, yp_node_t
3731
4210
  yp_flip_flop(predicate);
3732
4211
  yp_unless_node_t *node = YP_ALLOC_NODE(parser, yp_unless_node_t);
3733
4212
 
3734
- const char *end;
4213
+ const uint8_t *end;
3735
4214
  if (statements != NULL) {
3736
4215
  end = statements->base.location.end;
3737
4216
  } else {
@@ -3793,34 +4272,43 @@ yp_unless_node_end_keyword_loc_set(yp_unless_node_t *node, const yp_token_t *end
3793
4272
 
3794
4273
  // Allocate a new UntilNode node.
3795
4274
  static yp_until_node_t *
3796
- yp_until_node_create(yp_parser_t *parser, const yp_token_t *keyword, yp_node_t *predicate, yp_statements_node_t *statements, yp_node_flags_t flags) {
4275
+ yp_until_node_create(yp_parser_t *parser, const yp_token_t *keyword, const yp_token_t *closing, yp_node_t *predicate, yp_statements_node_t *statements, yp_node_flags_t flags) {
3797
4276
  yp_until_node_t *node = YP_ALLOC_NODE(parser, yp_until_node_t);
3798
- bool has_statements = (statements != NULL) && (statements->body.size != 0);
3799
4277
 
3800
- const char *start = NULL;
3801
- if (has_statements && (keyword->start > statements->base.location.start)) {
3802
- start = statements->base.location.start;
3803
- } else {
3804
- start = keyword->start;
3805
- }
4278
+ *node = (yp_until_node_t) {
4279
+ {
4280
+ .type = YP_NODE_UNTIL_NODE,
4281
+ .flags = flags,
4282
+ .location = {
4283
+ .start = keyword->start,
4284
+ .end = closing->end,
4285
+ },
4286
+ },
4287
+ .keyword_loc = YP_LOCATION_TOKEN_VALUE(keyword),
4288
+ .closing_loc = YP_OPTIONAL_LOCATION_TOKEN_VALUE(closing),
4289
+ .predicate = predicate,
4290
+ .statements = statements
4291
+ };
3806
4292
 
3807
- const char *end = NULL;
3808
- if (has_statements && (predicate->location.end < statements->base.location.end)) {
3809
- end = statements->base.location.end;
3810
- } else {
3811
- end = predicate->location.end;
3812
- }
4293
+ return node;
4294
+ }
4295
+
4296
+ // Allocate a new UntilNode node.
4297
+ static yp_until_node_t *
4298
+ yp_until_node_modifier_create(yp_parser_t *parser, const yp_token_t *keyword, yp_node_t *predicate, yp_statements_node_t *statements, yp_node_flags_t flags) {
4299
+ yp_until_node_t *node = YP_ALLOC_NODE(parser, yp_until_node_t);
3813
4300
 
3814
4301
  *node = (yp_until_node_t) {
3815
4302
  {
3816
4303
  .type = YP_NODE_UNTIL_NODE,
3817
4304
  .flags = flags,
3818
4305
  .location = {
3819
- .start = start,
3820
- .end = end,
4306
+ .start = statements->base.location.start,
4307
+ .end = predicate->location.end,
3821
4308
  },
3822
4309
  },
3823
4310
  .keyword_loc = YP_LOCATION_TOKEN_VALUE(keyword),
4311
+ .closing_loc = YP_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE,
3824
4312
  .predicate = predicate,
3825
4313
  .statements = statements
3826
4314
  };
@@ -3868,34 +4356,43 @@ yp_when_node_statements_set(yp_when_node_t *node, yp_statements_node_t *statemen
3868
4356
 
3869
4357
  // Allocate a new WhileNode node.
3870
4358
  static yp_while_node_t *
3871
- yp_while_node_create(yp_parser_t *parser, const yp_token_t *keyword, yp_node_t *predicate, yp_statements_node_t *statements, yp_node_flags_t flags) {
4359
+ yp_while_node_create(yp_parser_t *parser, const yp_token_t *keyword, const yp_token_t *closing, yp_node_t *predicate, yp_statements_node_t *statements, yp_node_flags_t flags) {
3872
4360
  yp_while_node_t *node = YP_ALLOC_NODE(parser, yp_while_node_t);
3873
4361
 
3874
- const char *start = NULL;
3875
- bool has_statements = (statements != NULL) && (statements->body.size != 0);
3876
- if (has_statements && (keyword->start > statements->base.location.start)) {
3877
- start = statements->base.location.start;
3878
- } else {
3879
- start = keyword->start;
3880
- }
4362
+ *node = (yp_while_node_t) {
4363
+ {
4364
+ .type = YP_NODE_WHILE_NODE,
4365
+ .flags = flags,
4366
+ .location = {
4367
+ .start = keyword->start,
4368
+ .end = closing->end
4369
+ },
4370
+ },
4371
+ .keyword_loc = YP_LOCATION_TOKEN_VALUE(keyword),
4372
+ .closing_loc = YP_OPTIONAL_LOCATION_TOKEN_VALUE(closing),
4373
+ .predicate = predicate,
4374
+ .statements = statements
4375
+ };
3881
4376
 
3882
- const char *end = NULL;
3883
- if (has_statements && (predicate->location.end < statements->base.location.end)) {
3884
- end = statements->base.location.end;
3885
- } else {
3886
- end = predicate->location.end;
3887
- }
4377
+ return node;
4378
+ }
4379
+
4380
+ // Allocate a new WhileNode node.
4381
+ static yp_while_node_t *
4382
+ yp_while_node_modifier_create(yp_parser_t *parser, const yp_token_t *keyword, yp_node_t *predicate, yp_statements_node_t *statements, yp_node_flags_t flags) {
4383
+ yp_while_node_t *node = YP_ALLOC_NODE(parser, yp_while_node_t);
3888
4384
 
3889
4385
  *node = (yp_while_node_t) {
3890
4386
  {
3891
4387
  .type = YP_NODE_WHILE_NODE,
3892
4388
  .flags = flags,
3893
4389
  .location = {
3894
- .start = start,
3895
- .end = end,
4390
+ .start = statements->base.location.start,
4391
+ .end = predicate->location.end
3896
4392
  },
3897
4393
  },
3898
4394
  .keyword_loc = YP_LOCATION_TOKEN_VALUE(keyword),
4395
+ .closing_loc = YP_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE,
3899
4396
  .predicate = predicate,
3900
4397
  .statements = statements
3901
4398
  };
@@ -3930,7 +4427,7 @@ static yp_yield_node_t *
3930
4427
  yp_yield_node_create(yp_parser_t *parser, const yp_token_t *keyword, const yp_location_t *lparen_loc, yp_arguments_node_t *arguments, const yp_location_t *rparen_loc) {
3931
4428
  yp_yield_node_t *node = YP_ALLOC_NODE(parser, yp_yield_node_t);
3932
4429
 
3933
- const char *end;
4430
+ const uint8_t *end;
3934
4431
  if (rparen_loc->start != NULL) {
3935
4432
  end = rparen_loc->end;
3936
4433
  } else if (arguments != NULL) {
@@ -4003,13 +4500,15 @@ yp_parser_local_depth(yp_parser_t *parser, yp_token_t *token) {
4003
4500
  }
4004
4501
 
4005
4502
  // Add a local variable from a location to the current scope.
4006
- static void
4007
- yp_parser_local_add_location(yp_parser_t *parser, const char *start, const char *end) {
4503
+ static yp_constant_id_t
4504
+ yp_parser_local_add_location(yp_parser_t *parser, const uint8_t *start, const uint8_t *end) {
4008
4505
  yp_constant_id_t constant_id = yp_parser_constant_id_location(parser, start, end);
4009
4506
 
4010
4507
  if (!yp_constant_id_list_includes(&parser->current_scope->locals, constant_id)) {
4011
4508
  yp_constant_id_list_append(&parser->current_scope->locals, constant_id);
4012
4509
  }
4510
+
4511
+ return constant_id;
4013
4512
  }
4014
4513
 
4015
4514
  // Add a local variable from a token to the current scope.
@@ -4051,15 +4550,13 @@ yp_parser_scope_pop(yp_parser_t *parser) {
4051
4550
  // reason we have the encoding_changed boolean to check if we need to go through
4052
4551
  // the function pointer or can just directly use the UTF-8 functions.
4053
4552
  static inline size_t
4054
- char_is_identifier_start(yp_parser_t *parser, const char *c) {
4055
- const unsigned char uc = (unsigned char) *c;
4056
-
4553
+ char_is_identifier_start(yp_parser_t *parser, const uint8_t *b) {
4057
4554
  if (parser->encoding_changed) {
4058
- return parser->encoding.alpha_char(c, parser->end - c) || (uc == '_') || (uc >= 0x80);
4059
- } else if (uc < 0x80) {
4060
- return (yp_encoding_unicode_table[uc] & YP_ENCODING_ALPHABETIC_BIT ? 1 : 0) || (uc == '_');
4555
+ return parser->encoding.alpha_char(b, parser->end - b) || (*b == '_') || (*b >= 0x80);
4556
+ } else if (*b < 0x80) {
4557
+ return (yp_encoding_unicode_table[*b] & YP_ENCODING_ALPHABETIC_BIT ? 1 : 0) || (*b == '_');
4061
4558
  } else {
4062
- return (size_t) (yp_encoding_utf_8_alpha_char(c, parser->end - c) || 1u);
4559
+ return (size_t) (yp_encoding_utf_8_alpha_char(b, parser->end - b) || 1u);
4063
4560
  }
4064
4561
  }
4065
4562
 
@@ -4067,15 +4564,13 @@ char_is_identifier_start(yp_parser_t *parser, const char *c) {
4067
4564
  // the identifiers in a source file once the first character has been found. So
4068
4565
  // it's important that it be as fast as possible.
4069
4566
  static inline size_t
4070
- char_is_identifier(yp_parser_t *parser, const char *c) {
4071
- const unsigned char uc = (unsigned char) *c;
4072
-
4567
+ char_is_identifier(yp_parser_t *parser, const uint8_t *b) {
4073
4568
  if (parser->encoding_changed) {
4074
- return parser->encoding.alnum_char(c, parser->end - c) || (uc == '_') || (uc >= 0x80);
4075
- } else if (uc < 0x80) {
4076
- return (yp_encoding_unicode_table[uc] & YP_ENCODING_ALPHANUMERIC_BIT ? 1 : 0) || (uc == '_');
4569
+ return parser->encoding.alnum_char(b, parser->end - b) || (*b == '_') || (*b >= 0x80);
4570
+ } else if (*b < 0x80) {
4571
+ return (yp_encoding_unicode_table[*b] & YP_ENCODING_ALPHANUMERIC_BIT ? 1 : 0) || (*b == '_');
4077
4572
  } else {
4078
- return (size_t) (yp_encoding_utf_8_alnum_char(c, parser->end - c) || 1u);
4573
+ return (size_t) (yp_encoding_utf_8_alnum_char(b, parser->end - b) || 1u);
4079
4574
  }
4080
4575
  }
4081
4576
 
@@ -4097,15 +4592,15 @@ const unsigned int yp_global_name_punctuation_hash[(0x7e - 0x20 + 31) / 32] = {
4097
4592
  #undef PUNCT
4098
4593
 
4099
4594
  static inline bool
4100
- char_is_global_name_punctuation(const char c) {
4101
- const unsigned int i = (const unsigned int) c;
4595
+ char_is_global_name_punctuation(const uint8_t b) {
4596
+ const unsigned int i = (const unsigned int) b;
4102
4597
  if (i <= 0x20 || 0x7e < i) return false;
4103
4598
 
4104
- return (yp_global_name_punctuation_hash[(i - 0x20) / 32] >> (c % 32)) & 1;
4599
+ return (yp_global_name_punctuation_hash[(i - 0x20) / 32] >> (i % 32)) & 1;
4105
4600
  }
4106
4601
 
4107
4602
  static inline bool
4108
- token_is_numbered_parameter(const char *start, const char *end) {
4603
+ token_is_numbered_parameter(const uint8_t *start, const uint8_t *end) {
4109
4604
  return (end - start == 2) && (start[0] == '_') && (start[1] != '0') && (yp_char_is_decimal_digit(start[1]));
4110
4605
  }
4111
4606
 
@@ -4157,44 +4652,47 @@ yp_do_loop_stack_p(yp_parser_t *parser) {
4157
4652
  /* Lexer check helpers */
4158
4653
  /******************************************************************************/
4159
4654
 
4160
- // Get the next character in the source starting from parser->current.end and
4161
- // adding the given offset. If that position is beyond the end of the source
4162
- // then return '\0'.
4163
- static inline char
4164
- peek_at(yp_parser_t *parser, size_t offset) {
4165
- if (parser->current.end + offset < parser->end) {
4166
- return parser->current.end[offset];
4655
+ // Get the next character in the source starting from +cursor+. If that position
4656
+ // is beyond the end of the source then return '\0'.
4657
+ static inline uint8_t
4658
+ peek_at(yp_parser_t *parser, const uint8_t *cursor) {
4659
+ if (cursor < parser->end) {
4660
+ return *cursor;
4167
4661
  } else {
4168
4662
  return '\0';
4169
4663
  }
4170
4664
  }
4171
4665
 
4666
+ // Get the next character in the source starting from parser->current.end and
4667
+ // adding the given offset. If that position is beyond the end of the source
4668
+ // then return '\0'.
4669
+ static inline uint8_t
4670
+ peek_offset(yp_parser_t *parser, ptrdiff_t offset) {
4671
+ return peek_at(parser, parser->current.end + offset);
4672
+ }
4673
+
4172
4674
  // Get the next character in the source starting from parser->current.end. If
4173
4675
  // that position is beyond the end of the source then return '\0'.
4174
- static inline char
4676
+ static inline uint8_t
4175
4677
  peek(yp_parser_t *parser) {
4176
- if (parser->current.end < parser->end) {
4177
- return *parser->current.end;
4178
- } else {
4179
- return '\0';
4180
- }
4678
+ return peek_at(parser, parser->current.end);
4181
4679
  }
4182
4680
 
4183
4681
  // Get the next string of length len in the source starting from parser->current.end.
4184
4682
  // If the string extends beyond the end of the source, return the empty string ""
4185
- static inline const char*
4683
+ static inline const uint8_t *
4186
4684
  peek_string(yp_parser_t *parser, size_t len) {
4187
4685
  if (parser->current.end + len <= parser->end) {
4188
4686
  return parser->current.end;
4189
4687
  } else {
4190
- return "";
4688
+ return (const uint8_t *) "";
4191
4689
  }
4192
4690
  }
4193
4691
 
4194
4692
  // If the character to be read matches the given value, then returns true and
4195
4693
  // advanced the current pointer.
4196
4694
  static inline bool
4197
- match(yp_parser_t *parser, char value) {
4695
+ match(yp_parser_t *parser, uint8_t value) {
4198
4696
  if (peek(parser) == value) {
4199
4697
  parser->current.end++;
4200
4698
  return true;
@@ -4202,9 +4700,38 @@ match(yp_parser_t *parser, char value) {
4202
4700
  return false;
4203
4701
  }
4204
4702
 
4703
+ // Return the length of the line ending string starting at +cursor+, or 0 if it
4704
+ // is not a line ending. This function is intended to be CRLF/LF agnostic.
4705
+ static inline size_t
4706
+ match_eol_at(yp_parser_t *parser, const uint8_t *cursor) {
4707
+ if (peek_at(parser, cursor) == '\n') {
4708
+ return 1;
4709
+ }
4710
+ if (peek_at(parser, cursor) == '\r' && peek_at(parser, cursor + 1) == '\n') {
4711
+ return 2;
4712
+ }
4713
+ return 0;
4714
+ }
4715
+
4716
+ // Return the length of the line ending string starting at
4717
+ // parser->current.end + offset, or 0 if it is not a line ending. This function
4718
+ // is intended to be CRLF/LF agnostic.
4719
+ static inline size_t
4720
+ match_eol_offset(yp_parser_t *parser, ptrdiff_t offset) {
4721
+ return match_eol_at(parser, parser->current.end + offset);
4722
+ }
4723
+
4724
+ // Return the length of the line ending string starting at parser->current.end,
4725
+ // or 0 if it is not a line ending. This function is intended to be CRLF/LF
4726
+ // agnostic.
4727
+ static inline size_t
4728
+ match_eol(yp_parser_t *parser) {
4729
+ return match_eol_at(parser, parser->current.end);
4730
+ }
4731
+
4205
4732
  // Skip to the next newline character or NUL byte.
4206
- static inline const char *
4207
- next_newline(const char *cursor, ptrdiff_t length) {
4733
+ static inline const uint8_t *
4734
+ next_newline(const uint8_t *cursor, ptrdiff_t length) {
4208
4735
  assert(length >= 0);
4209
4736
 
4210
4737
  // Note that it's okay for us to use memchr here to look for \n because none
@@ -4215,21 +4742,23 @@ next_newline(const char *cursor, ptrdiff_t length) {
4215
4742
 
4216
4743
  // Find the start of the encoding comment. This is effectively an inlined
4217
4744
  // version of strnstr with some modifications.
4218
- static inline const char *
4219
- parser_lex_encoding_comment_start(yp_parser_t *parser, const char *cursor, ptrdiff_t remaining) {
4745
+ static inline const uint8_t *
4746
+ parser_lex_encoding_comment_start(yp_parser_t *parser, const uint8_t *cursor, ptrdiff_t remaining) {
4220
4747
  assert(remaining >= 0);
4221
4748
  size_t length = (size_t) remaining;
4222
4749
 
4223
4750
  size_t key_length = strlen("coding:");
4224
4751
  if (key_length > length) return NULL;
4225
4752
 
4226
- const char *cursor_limit = cursor + length - key_length + 1;
4753
+ const uint8_t *cursor_limit = cursor + length - key_length + 1;
4227
4754
  while ((cursor = yp_memchr(cursor, 'c', (size_t) (cursor_limit - cursor), parser->encoding_changed, &parser->encoding)) != NULL) {
4228
- if (
4229
- (strncmp(cursor, "coding", key_length - 1) == 0) &&
4230
- (cursor[key_length - 1] == ':' || cursor[key_length - 1] == '=')
4231
- ) {
4232
- return cursor + key_length;
4755
+ if (memcmp(cursor, "coding", key_length - 1) == 0) {
4756
+ size_t whitespace_after_coding = yp_strspn_inline_whitespace(cursor + key_length - 1, parser->end - (cursor + key_length - 1));
4757
+ size_t cur_pos = key_length + whitespace_after_coding;
4758
+
4759
+ if (cursor[cur_pos - 1] == ':' || cursor[cur_pos - 1] == '=') {
4760
+ return cursor + cur_pos;
4761
+ }
4233
4762
  }
4234
4763
 
4235
4764
  cursor++;
@@ -4242,13 +4771,13 @@ parser_lex_encoding_comment_start(yp_parser_t *parser, const char *cursor, ptrdi
4242
4771
  // actions are necessary for it here.
4243
4772
  static void
4244
4773
  parser_lex_encoding_comment(yp_parser_t *parser) {
4245
- const char *start = parser->current.start + 1;
4246
- const char *end = next_newline(start, parser->end - start);
4774
+ const uint8_t *start = parser->current.start + 1;
4775
+ const uint8_t *end = next_newline(start, parser->end - start);
4247
4776
  if (end == NULL) end = parser->end;
4248
4777
 
4249
4778
  // These are the patterns we're going to match to find the encoding comment.
4250
4779
  // This is definitely not complete or even really correct.
4251
- const char *encoding_start = parser_lex_encoding_comment_start(parser, start, end - start);
4780
+ const uint8_t *encoding_start = parser_lex_encoding_comment_start(parser, start, end - start);
4252
4781
 
4253
4782
  // If we didn't find anything that matched our patterns, then return. Note
4254
4783
  // that this does a _very_ poor job of actually finding the encoding, and
@@ -4261,7 +4790,7 @@ parser_lex_encoding_comment(yp_parser_t *parser) {
4261
4790
 
4262
4791
  // Now determine the end of the encoding string. This is either the end of
4263
4792
  // the line, the first whitespace character, or a punctuation mark.
4264
- const char *encoding_end = yp_strpbrk(parser, encoding_start, " \t\f\r\v\n;,", end - encoding_start);
4793
+ const uint8_t *encoding_end = yp_strpbrk(parser, encoding_start, (const uint8_t *) " \t\f\r\v\n;,", end - encoding_start);
4265
4794
  encoding_end = encoding_end == NULL ? end : encoding_end;
4266
4795
 
4267
4796
  // Finally, we can determine the width of the encoding string.
@@ -4283,7 +4812,7 @@ parser_lex_encoding_comment(yp_parser_t *parser) {
4283
4812
  // Extensions like utf-8 can contain extra encoding details like,
4284
4813
  // utf-8-dos, utf-8-linux, utf-8-mac. We treat these all as utf-8 should
4285
4814
  // treat any encoding starting utf-8 as utf-8.
4286
- if ((encoding_start + 5 <= parser->end) && (yp_strncasecmp(encoding_start, "utf-8", 5) == 0)) {
4815
+ if ((encoding_start + 5 <= parser->end) && (yp_strncasecmp(encoding_start, (const uint8_t *) "utf-8", 5) == 0)) {
4287
4816
  // We don't need to do anything here because the default encoding is
4288
4817
  // already UTF-8. We'll just return.
4289
4818
  return;
@@ -4292,7 +4821,7 @@ parser_lex_encoding_comment(yp_parser_t *parser) {
4292
4821
  // Next, we're going to loop through each of the encodings that we handle
4293
4822
  // explicitly. If we found one that we understand, we'll use that value.
4294
4823
  #define ENCODING(value, prebuilt) \
4295
- if (width == sizeof(value) - 1 && encoding_start + width <= parser->end && yp_strncasecmp(encoding_start, value, width) == 0) { \
4824
+ if (width == sizeof(value) - 1 && encoding_start + width <= parser->end && yp_strncasecmp(encoding_start, (const uint8_t *) value, width) == 0) { \
4296
4825
  parser->encoding = prebuilt; \
4297
4826
  parser->encoding_changed |= true; \
4298
4827
  if (parser->encoding_changed_callback != NULL) parser->encoding_changed_callback(parser); \
@@ -4432,14 +4961,9 @@ context_push(yp_parser_t *parser, yp_context_t context) {
4432
4961
 
4433
4962
  static void
4434
4963
  context_pop(yp_parser_t *parser) {
4435
- if (parser->current_context->prev == NULL) {
4436
- free(parser->current_context);
4437
- parser->current_context = NULL;
4438
- } else {
4439
- yp_context_node_t *prev = parser->current_context->prev;
4440
- free(parser->current_context);
4441
- parser->current_context = prev;
4442
- }
4964
+ yp_context_node_t *prev = parser->current_context->prev;
4965
+ free(parser->current_context);
4966
+ parser->current_context = prev;
4443
4967
  }
4444
4968
 
4445
4969
  static bool
@@ -4485,7 +5009,7 @@ lex_optional_float_suffix(yp_parser_t *parser) {
4485
5009
  // Here we're going to attempt to parse the optional decimal portion of a
4486
5010
  // float. If it's not there, then it's okay and we'll just continue on.
4487
5011
  if (peek(parser) == '.') {
4488
- if (yp_char_is_decimal_digit(peek_at(parser, 1))) {
5012
+ if (yp_char_is_decimal_digit(peek_offset(parser, 1))) {
4489
5013
  parser->current.end += 2;
4490
5014
  parser->current.end += yp_strspn_decimal_number(parser->current.end, parser->end - parser->current.end);
4491
5015
  type = YP_TOKEN_FLOAT;
@@ -4518,12 +5042,13 @@ static yp_token_type_t
4518
5042
  lex_numeric_prefix(yp_parser_t *parser) {
4519
5043
  yp_token_type_t type = YP_TOKEN_INTEGER;
4520
5044
 
4521
- if (parser->current.end[-1] == '0') {
5045
+ if (peek_offset(parser, -1) == '0') {
4522
5046
  switch (*parser->current.end) {
4523
5047
  // 0d1111 is a decimal number
4524
5048
  case 'd':
4525
5049
  case 'D':
4526
- if (yp_char_is_decimal_digit(*++parser->current.end)) {
5050
+ parser->current.end++;
5051
+ if (yp_char_is_decimal_digit(peek(parser))) {
4527
5052
  parser->current.end += yp_strspn_decimal_number(parser->current.end, parser->end - parser->current.end);
4528
5053
  } else {
4529
5054
  yp_diagnostic_list_append(&parser->error_list, parser->current.start, parser->current.end, "Invalid decimal number.");
@@ -4534,7 +5059,8 @@ lex_numeric_prefix(yp_parser_t *parser) {
4534
5059
  // 0b1111 is a binary number
4535
5060
  case 'b':
4536
5061
  case 'B':
4537
- if (yp_char_is_binary_digit(*++parser->current.end)) {
5062
+ parser->current.end++;
5063
+ if (yp_char_is_binary_digit(peek(parser))) {
4538
5064
  parser->current.end += yp_strspn_binary_number(parser->current.end, parser->end - parser->current.end);
4539
5065
  } else {
4540
5066
  yp_diagnostic_list_append(&parser->error_list, parser->current.start, parser->current.end, "Invalid binary number.");
@@ -4545,7 +5071,8 @@ lex_numeric_prefix(yp_parser_t *parser) {
4545
5071
  // 0o1111 is an octal number
4546
5072
  case 'o':
4547
5073
  case 'O':
4548
- if (yp_char_is_octal_digit(*++parser->current.end)) {
5074
+ parser->current.end++;
5075
+ if (yp_char_is_octal_digit(peek(parser))) {
4549
5076
  parser->current.end += yp_strspn_octal_number(parser->current.end, parser->end - parser->current.end);
4550
5077
  } else {
4551
5078
  yp_diagnostic_list_append(&parser->error_list, parser->current.start, parser->current.end, "Invalid octal number.");
@@ -4569,7 +5096,8 @@ lex_numeric_prefix(yp_parser_t *parser) {
4569
5096
  // 0x1111 is a hexadecimal number
4570
5097
  case 'x':
4571
5098
  case 'X':
4572
- if (yp_char_is_hexadecimal_digit(*++parser->current.end)) {
5099
+ parser->current.end++;
5100
+ if (yp_char_is_hexadecimal_digit(peek(parser))) {
4573
5101
  parser->current.end += yp_strspn_hexadecimal_number(parser->current.end, parser->end - parser->current.end);
4574
5102
  } else {
4575
5103
  yp_diagnostic_list_append(&parser->error_list, parser->current.start, parser->current.end, "Invalid hexadecimal number.");
@@ -4601,7 +5129,7 @@ lex_numeric_prefix(yp_parser_t *parser) {
4601
5129
 
4602
5130
  // If the last character that we consumed was an underscore, then this is
4603
5131
  // actually an invalid integer value, and we should return an invalid token.
4604
- if (parser->current.end[-1] == '_') {
5132
+ if (peek_offset(parser, -1) == '_') {
4605
5133
  yp_diagnostic_list_append(&parser->error_list, parser->current.start, parser->current.end, "Number literal cannot end with a `_`.");
4606
5134
  }
4607
5135
 
@@ -4615,7 +5143,7 @@ lex_numeric(yp_parser_t *parser) {
4615
5143
  if (parser->current.end < parser->end) {
4616
5144
  type = lex_numeric_prefix(parser);
4617
5145
 
4618
- const char *end = parser->current.end;
5146
+ const uint8_t *end = parser->current.end;
4619
5147
  yp_token_type_t suffix_type = type;
4620
5148
 
4621
5149
  if (type == YP_TOKEN_INTEGER) {
@@ -4640,8 +5168,8 @@ lex_numeric(yp_parser_t *parser) {
4640
5168
  }
4641
5169
  }
4642
5170
 
4643
- const unsigned char uc = (const unsigned char) peek(parser);
4644
- if (uc != '\0' && (uc >= 0x80 || ((uc >= 'a' && uc <= 'z') || (uc >= 'A' && uc <= 'Z')) || uc == '_')) {
5171
+ const uint8_t b = peek(parser);
5172
+ if (b != '\0' && (b >= 0x80 || ((b >= 'a' && b <= 'z') || (b >= 'A' && b <= 'Z')) || b == '_')) {
4645
5173
  parser->current.end = end;
4646
5174
  } else {
4647
5175
  type = suffix_type;
@@ -4653,6 +5181,11 @@ lex_numeric(yp_parser_t *parser) {
4653
5181
 
4654
5182
  static yp_token_type_t
4655
5183
  lex_global_variable(yp_parser_t *parser) {
5184
+ if (parser->current.end >= parser->end) {
5185
+ yp_diagnostic_list_append(&parser->error_list, parser->current.start, parser->current.end, "Invalid global variable.");
5186
+ return YP_TOKEN_GLOBAL_VARIABLE;
5187
+ }
5188
+
4656
5189
  switch (*parser->current.end) {
4657
5190
  case '~': // $~: match-data
4658
5191
  case '*': // $*: argv
@@ -4741,7 +5274,7 @@ lex_keyword(yp_parser_t *parser, const char *value, yp_lex_state_t state, yp_tok
4741
5274
  yp_lex_state_t last_state = parser->lex_state;
4742
5275
 
4743
5276
  const size_t vlen = strlen(value);
4744
- if (parser->current.start + vlen <= parser->end && strncmp(parser->current.start, value, vlen) == 0) {
5277
+ if (parser->current.start + vlen <= parser->end && memcmp(parser->current.start, value, vlen) == 0) {
4745
5278
  if (parser->lex_state & YP_LEX_STATE_FNAME) {
4746
5279
  lex_state_set(parser, YP_LEX_STATE_ENDFN);
4747
5280
  } else {
@@ -4782,7 +5315,7 @@ lex_identifier(yp_parser_t *parser, bool previous_command_start) {
4782
5315
 
4783
5316
  if (
4784
5317
  ((lex_state_p(parser, YP_LEX_STATE_LABEL | YP_LEX_STATE_ENDFN) && !previous_command_start) || lex_state_arg_p(parser)) &&
4785
- (peek(parser) == ':') && (peek_at(parser, 1) != ':')
5318
+ (peek(parser) == ':') && (peek_offset(parser, 1) != ':')
4786
5319
  ) {
4787
5320
  // If we're in a position where we can accept a : at the end of an
4788
5321
  // identifier, then we'll optionally accept it.
@@ -4798,7 +5331,7 @@ lex_identifier(yp_parser_t *parser, bool previous_command_start) {
4798
5331
  }
4799
5332
 
4800
5333
  return YP_TOKEN_IDENTIFIER;
4801
- } else if (lex_state_p(parser, YP_LEX_STATE_FNAME) && peek_at(parser, 1) != '~' && peek_at(parser, 1) != '>' && (peek_at(parser, 1) != '=' || peek_at(parser, 2) == '>') && match(parser, '=')) {
5334
+ } else if (lex_state_p(parser, YP_LEX_STATE_FNAME) && peek_offset(parser, 1) != '~' && peek_offset(parser, 1) != '>' && (peek_offset(parser, 1) != '=' || peek_offset(parser, 2) == '>') && match(parser, '=')) {
4802
5335
  // If we're in a position where we can accept a = at the end of an
4803
5336
  // identifier, then we'll optionally accept it.
4804
5337
  return YP_TOKEN_IDENTIFIER;
@@ -4806,7 +5339,7 @@ lex_identifier(yp_parser_t *parser, bool previous_command_start) {
4806
5339
 
4807
5340
  if (
4808
5341
  ((lex_state_p(parser, YP_LEX_STATE_LABEL | YP_LEX_STATE_ENDFN) && !previous_command_start) || lex_state_arg_p(parser)) &&
4809
- peek(parser) == ':' && peek_at(parser, 1) != ':'
5342
+ peek(parser) == ':' && peek_offset(parser, 1) != ':'
4810
5343
  ) {
4811
5344
  // If we're in a position where we can accept a : at the end of an
4812
5345
  // identifier, then we'll optionally accept it.
@@ -4907,7 +5440,7 @@ current_token_starts_line(yp_parser_t *parser) {
4907
5440
  // this token type.
4908
5441
  //
4909
5442
  static yp_token_type_t
4910
- lex_interpolation(yp_parser_t *parser, const char *pound) {
5443
+ lex_interpolation(yp_parser_t *parser, const uint8_t *pound) {
4911
5444
  // If there is no content following this #, then we're at the end of
4912
5445
  // the string and we can safely return string content.
4913
5446
  if (pound + 1 >= parser->end) {
@@ -4928,7 +5461,7 @@ lex_interpolation(yp_parser_t *parser, const char *pound) {
4928
5461
 
4929
5462
  // If we're looking at a @ and there's another @, then we'll skip past the
4930
5463
  // second @.
4931
- const char *variable = pound + 2;
5464
+ const uint8_t *variable = pound + 2;
4932
5465
  if (*variable == '@' && pound + 3 < parser->end) variable++;
4933
5466
 
4934
5467
  if (char_is_identifier_start(parser, variable)) {
@@ -4964,7 +5497,7 @@ lex_interpolation(yp_parser_t *parser, const char *pound) {
4964
5497
  // This is the character that we're going to check to see if it is the
4965
5498
  // start of an identifier that would indicate that this is a global
4966
5499
  // variable.
4967
- const char *check = pound + 2;
5500
+ const uint8_t *check = pound + 2;
4968
5501
 
4969
5502
  if (pound[2] == '-') {
4970
5503
  if (pound + 3 >= parser->end) {
@@ -5074,7 +5607,7 @@ lex_question_mark(yp_parser_t *parser) {
5074
5607
 
5075
5608
  if (parser->current.start[1] == '\\') {
5076
5609
  lex_state_set(parser, YP_LEX_STATE_END);
5077
- parser->current.end += yp_unescape_calculate_difference(parser->current.start + 1, parser->end, YP_UNESCAPE_ALL, true, &parser->error_list);
5610
+ parser->current.end += yp_unescape_calculate_difference(parser, parser->current.start + 1, YP_UNESCAPE_ALL, true);
5078
5611
  return YP_TOKEN_CHARACTER_LITERAL;
5079
5612
  } else {
5080
5613
  size_t encoding_width = parser->encoding.char_width(parser->current.end, parser->end - parser->current.end);
@@ -5083,7 +5616,7 @@ lex_question_mark(yp_parser_t *parser) {
5083
5616
  // an underscore. We check for this case
5084
5617
  if (
5085
5618
  !(parser->encoding.alnum_char(parser->current.end, parser->end - parser->current.end) ||
5086
- *parser->current.end == '_') ||
5619
+ peek(parser) == '_') ||
5087
5620
  (
5088
5621
  (parser->current.end + encoding_width >= parser->end) ||
5089
5622
  !char_is_identifier(parser, parser->current.end + encoding_width)
@@ -5155,7 +5688,7 @@ parser_comment(yp_parser_t *parser, yp_comment_type_t type) {
5155
5688
  static yp_token_type_t
5156
5689
  lex_embdoc(yp_parser_t *parser) {
5157
5690
  // First, lex out the EMBDOC_BEGIN token.
5158
- const char *newline = next_newline(parser->current.end, parser->end - parser->current.end);
5691
+ const uint8_t *newline = next_newline(parser->current.end, parser->end - parser->current.end);
5159
5692
 
5160
5693
  if (newline == NULL) {
5161
5694
  parser->current.end = parser->end;
@@ -5178,9 +5711,9 @@ lex_embdoc(yp_parser_t *parser) {
5178
5711
 
5179
5712
  // If we've hit the end of the embedded documentation then we'll return that
5180
5713
  // token here.
5181
- if (strncmp(parser->current.end, "=end", 4) == 0 &&
5714
+ if (memcmp(parser->current.end, "=end", 4) == 0 &&
5182
5715
  (parser->current.end + 4 == parser->end || yp_char_is_whitespace(parser->current.end[4]))) {
5183
- const char *newline = next_newline(parser->current.end, parser->end - parser->current.end);
5716
+ const uint8_t *newline = next_newline(parser->current.end, parser->end - parser->current.end);
5184
5717
 
5185
5718
  if (newline == NULL) {
5186
5719
  parser->current.end = parser->end;
@@ -5200,7 +5733,7 @@ lex_embdoc(yp_parser_t *parser) {
5200
5733
 
5201
5734
  // Otherwise, we'll parse until the end of the line and return a line of
5202
5735
  // embedded documentation.
5203
- const char *newline = next_newline(parser->current.end, parser->end - parser->current.end);
5736
+ const uint8_t *newline = next_newline(parser->current.end, parser->end - parser->current.end);
5204
5737
 
5205
5738
  if (newline == NULL) {
5206
5739
  parser->current.end = parser->end;
@@ -5299,30 +5832,22 @@ parser_lex(yp_parser_t *parser) {
5299
5832
  space_seen = true;
5300
5833
  break;
5301
5834
  case '\r':
5302
- if (peek_at(parser, 1) == '\n') {
5835
+ if (match_eol_offset(parser, 1)) {
5303
5836
  chomping = false;
5304
5837
  } else {
5305
5838
  parser->current.end++;
5306
5839
  space_seen = true;
5307
5840
  }
5308
5841
  break;
5309
- case '\\':
5310
- if (peek_at(parser, 1) == '\n') {
5311
- if (parser->heredoc_end) {
5312
- parser->current.end = parser->heredoc_end;
5313
- parser->heredoc_end = NULL;
5314
- } else {
5315
- yp_newline_list_append(&parser->newline_list, parser->current.end + 1);
5316
- parser->current.end += 2;
5317
- space_seen = true;
5318
- }
5319
- } else if (peek_at(parser, 1) == '\r' && peek_at(parser, 2) == '\n') {
5842
+ case '\\': {
5843
+ size_t eol_length = match_eol_offset(parser, 1);
5844
+ if (eol_length) {
5320
5845
  if (parser->heredoc_end) {
5321
5846
  parser->current.end = parser->heredoc_end;
5322
5847
  parser->heredoc_end = NULL;
5323
5848
  } else {
5324
- yp_newline_list_append(&parser->newline_list, parser->current.end + 2);
5325
- parser->current.end += 3;
5849
+ parser->current.end += eol_length + 1;
5850
+ yp_newline_list_append(&parser->newline_list, parser->current.end - 1);
5326
5851
  space_seen = true;
5327
5852
  }
5328
5853
  } else if (yp_char_is_inline_whitespace(*parser->current.end)) {
@@ -5330,7 +5855,9 @@ parser_lex(yp_parser_t *parser) {
5330
5855
  } else {
5331
5856
  chomping = false;
5332
5857
  }
5858
+
5333
5859
  break;
5860
+ }
5334
5861
  default:
5335
5862
  chomping = false;
5336
5863
  break;
@@ -5340,13 +5867,14 @@ parser_lex(yp_parser_t *parser) {
5340
5867
  // Next, we'll set to start of this token to be the current end.
5341
5868
  parser->current.start = parser->current.end;
5342
5869
 
5343
- // We'll check if we're at the end of the file. If we are, then we need to
5344
- // return the EOF token.
5870
+ // We'll check if we're at the end of the file. If we are, then we
5871
+ // need to return the EOF token.
5345
5872
  if (parser->current.end >= parser->end) {
5346
5873
  LEX(YP_TOKEN_EOF);
5347
5874
  }
5348
5875
 
5349
- // Finally, we'll check the current character to determine the next token.
5876
+ // Finally, we'll check the current character to determine the next
5877
+ // token.
5350
5878
  switch (*parser->current.end++) {
5351
5879
  case '\0': // NUL or end of script
5352
5880
  case '\004': // ^D
@@ -5355,17 +5883,15 @@ parser_lex(yp_parser_t *parser) {
5355
5883
  LEX(YP_TOKEN_EOF);
5356
5884
 
5357
5885
  case '#': { // comments
5358
- const char *ending = next_newline(parser->current.end, parser->end - parser->current.end);
5359
- while (ending && ending < parser->end && *ending != '\n') {
5360
- ending = next_newline(ending + 1, parser->end - ending);
5361
- }
5886
+ const uint8_t *ending = next_newline(parser->current.end, parser->end - parser->current.end);
5362
5887
 
5363
5888
  parser->current.end = ending == NULL ? parser->end : ending + 1;
5364
5889
  parser->current.type = YP_TOKEN_COMMENT;
5365
5890
  parser_lex_callback(parser);
5366
5891
 
5367
- // If we found a comment while lexing, then we're going to add it to the
5368
- // list of comments in the file and keep lexing.
5892
+ // If we found a comment while lexing, then we're going to
5893
+ // add it to the list of comments in the file and keep
5894
+ // lexing.
5369
5895
  yp_comment_t *comment = parser_comment(parser, YP_COMMENT_INLINE);
5370
5896
  yp_list_append(&parser->comment_list, (yp_list_node_t *) comment);
5371
5897
 
@@ -5376,21 +5902,29 @@ parser_lex(yp_parser_t *parser) {
5376
5902
  lexed_comment = true;
5377
5903
  }
5378
5904
  /* fallthrough */
5379
- case '\r': {
5380
- // The only way you can have carriage returns in this particular loop
5381
- // is if you have a carriage return followed by a newline. In that
5382
- // case we'll just skip over the carriage return and continue lexing,
5383
- // in order to make it so that the newline token encapsulates both the
5384
- // carriage return and the newline. Note that we need to check that
5385
- // we haven't already lexed a comment here because that falls through
5386
- // into here as well.
5387
- if (!lexed_comment) parser->current.end++;
5388
- }
5389
- /* fallthrough */
5905
+ case '\r':
5390
5906
  case '\n': {
5391
- if (parser->heredoc_end == NULL) {
5392
- yp_newline_list_append(&parser->newline_list, parser->current.end - 1);
5393
- } else {
5907
+ size_t eol_length = match_eol_at(parser, parser->current.end - 1);
5908
+ if (eol_length) {
5909
+ // The only way you can have carriage returns in this
5910
+ // particular loop is if you have a carriage return
5911
+ // followed by a newline. In that case we'll just skip
5912
+ // over the carriage return and continue lexing, in
5913
+ // order to make it so that the newline token
5914
+ // encapsulates both the carriage return and the
5915
+ // newline. Note that we need to check that we haven't
5916
+ // already lexed a comment here because that falls
5917
+ // through into here as well.
5918
+ if (!lexed_comment) {
5919
+ parser->current.end += eol_length - 1; // skip CR
5920
+ }
5921
+
5922
+ if (parser->heredoc_end == NULL) {
5923
+ yp_newline_list_append(&parser->newline_list, parser->current.end - 1);
5924
+ }
5925
+ }
5926
+
5927
+ if (parser->heredoc_end) {
5394
5928
  parser_flush_heredoc_end(parser);
5395
5929
  }
5396
5930
 
@@ -5418,7 +5952,7 @@ parser_lex(yp_parser_t *parser) {
5418
5952
  // (either . or &.) that starts the next line. If there is, then this
5419
5953
  // is going to become an ignored newline and we're going to instead
5420
5954
  // return the call operator.
5421
- const char *next_content = parser->next_start == NULL ? parser->current.end : parser->next_start;
5955
+ const uint8_t *next_content = parser->next_start == NULL ? parser->current.end : parser->next_start;
5422
5956
  next_content += yp_strspn_inline_whitespace(next_content, parser->end - next_content);
5423
5957
 
5424
5958
  if (next_content < parser->end) {
@@ -5429,15 +5963,15 @@ parser_lex(yp_parser_t *parser) {
5429
5963
  // Otherwise we'll return a regular newline.
5430
5964
  if (next_content[0] == '#') {
5431
5965
  // Here we look for a "." or "&." following a "\n".
5432
- const char *following = next_newline(next_content, parser->end - next_content);
5966
+ const uint8_t *following = next_newline(next_content, parser->end - next_content);
5433
5967
 
5434
- while (following && (following < parser->end)) {
5968
+ while (following && (following + 1 < parser->end)) {
5435
5969
  following++;
5436
5970
  following += yp_strspn_inline_whitespace(following, parser->end - following);
5437
5971
 
5438
5972
  // If this is not followed by a comment, then we can break out
5439
5973
  // of this loop.
5440
- if (*following != '#') break;
5974
+ if (peek_at(parser, following) != '#') break;
5441
5975
 
5442
5976
  // If there is a comment, then we need to find the end of the
5443
5977
  // comment and continue searching from there.
@@ -5446,7 +5980,13 @@ parser_lex(yp_parser_t *parser) {
5446
5980
 
5447
5981
  // If the lex state was ignored, or we hit a '.' or a '&.',
5448
5982
  // we will lex the ignored newline
5449
- if (lex_state_ignored_p(parser) || (following && ((following[0] == '.') || (following + 1 < parser->end && following[0] == '&' && following[1] == '.')))) {
5983
+ if (
5984
+ lex_state_ignored_p(parser) ||
5985
+ (following && (
5986
+ (peek_at(parser, following) == '.') ||
5987
+ (peek_at(parser, following) == '&' && peek_at(parser, following + 1) == '.')
5988
+ ))
5989
+ ) {
5450
5990
  if (!lexed_comment) parser_lex_ignored_newline(parser);
5451
5991
  lexed_comment = false;
5452
5992
  goto lex_next_token;
@@ -5459,7 +5999,7 @@ parser_lex(yp_parser_t *parser) {
5459
5999
  // To match ripper, we need to emit an ignored newline even though
5460
6000
  // its a real newline in the case that we have a beginless range
5461
6001
  // on a subsequent line.
5462
- if ((next_content + 1 < parser->end) && (next_content[1] == '.')) {
6002
+ if (peek_at(parser, next_content + 1) == '.') {
5463
6003
  if (!lexed_comment) parser_lex_ignored_newline(parser);
5464
6004
  lex_state_set(parser, YP_LEX_STATE_BEG);
5465
6005
  parser->command_start = true;
@@ -5477,7 +6017,7 @@ parser_lex(yp_parser_t *parser) {
5477
6017
 
5478
6018
  // If we hit a &. after a newline, then we're in a call chain and
5479
6019
  // we need to return the call operator.
5480
- if (next_content + 1 < parser->end && next_content[0] == '&' && next_content[1] == '.') {
6020
+ if (peek_at(parser, next_content) == '&' && peek_at(parser, next_content + 1) == '.') {
5481
6021
  if (!lexed_comment) parser_lex_ignored_newline(parser);
5482
6022
  lex_state_set(parser, YP_LEX_STATE_DOT);
5483
6023
  parser->current.start = next_content;
@@ -5674,7 +6214,7 @@ parser_lex(yp_parser_t *parser) {
5674
6214
 
5675
6215
  // = => =~ == === =begin
5676
6216
  case '=':
5677
- if (current_token_starts_line(parser) && strncmp(peek_string(parser, 5), "begin", 5) == 0 && yp_char_is_whitespace(peek_at(parser, 5))) {
6217
+ if (current_token_starts_line(parser) && memcmp(peek_string(parser, 5), "begin", 5) == 0 && yp_char_is_whitespace(peek_offset(parser, 5))) {
5678
6218
  yp_token_type_t type = lex_embdoc(parser);
5679
6219
 
5680
6220
  if (type == YP_TOKEN_EOF) {
@@ -5712,7 +6252,7 @@ parser_lex(yp_parser_t *parser) {
5712
6252
  !lex_state_end_p(parser) &&
5713
6253
  (!lex_state_p(parser, YP_LEX_STATE_ARG_ANY) || lex_state_p(parser, YP_LEX_STATE_LABELED) || space_seen)
5714
6254
  ) {
5715
- const char *end = parser->current.end;
6255
+ const uint8_t *end = parser->current.end;
5716
6256
 
5717
6257
  yp_heredoc_quote_t quote = YP_HEREDOC_QUOTE_NONE;
5718
6258
  yp_heredoc_indent_t indent = YP_HEREDOC_INDENT_NONE;
@@ -5734,7 +6274,7 @@ parser_lex(yp_parser_t *parser) {
5734
6274
  quote = YP_HEREDOC_QUOTE_SINGLE;
5735
6275
  }
5736
6276
 
5737
- const char *ident_start = parser->current.end;
6277
+ const uint8_t *ident_start = parser->current.end;
5738
6278
  size_t width = 0;
5739
6279
 
5740
6280
  if (parser->current.end >= parser->end) {
@@ -5757,7 +6297,7 @@ parser_lex(yp_parser_t *parser) {
5757
6297
  }
5758
6298
 
5759
6299
  size_t ident_length = (size_t) (parser->current.end - ident_start);
5760
- if (quote != YP_HEREDOC_QUOTE_NONE && !match(parser, (char) quote)) {
6300
+ if (quote != YP_HEREDOC_QUOTE_NONE && !match(parser, (uint8_t) quote)) {
5761
6301
  // TODO: handle unterminated heredoc
5762
6302
  }
5763
6303
 
@@ -5773,7 +6313,7 @@ parser_lex(yp_parser_t *parser) {
5773
6313
  });
5774
6314
 
5775
6315
  if (parser->heredoc_end == NULL) {
5776
- const char *body_start = next_newline(parser->current.end, parser->end - parser->current.end);
6316
+ const uint8_t *body_start = next_newline(parser->current.end, parser->end - parser->current.end);
5777
6317
 
5778
6318
  if (body_start == NULL) {
5779
6319
  // If there is no newline after the heredoc identifier, then
@@ -6098,13 +6638,13 @@ parser_lex(yp_parser_t *parser) {
6098
6638
  LEX(YP_TOKEN_COLON_COLON);
6099
6639
  }
6100
6640
 
6101
- if (lex_state_end_p(parser) || yp_char_is_whitespace(*parser->current.end) || (*parser->current.end == '#')) {
6641
+ if (lex_state_end_p(parser) || yp_char_is_whitespace(peek(parser)) || peek(parser) == '#') {
6102
6642
  lex_state_set(parser, YP_LEX_STATE_BEG);
6103
6643
  LEX(YP_TOKEN_COLON);
6104
6644
  }
6105
6645
 
6106
- if ((*parser->current.end == '"') || (*parser->current.end == '\'')) {
6107
- lex_mode_push_string(parser, *parser->current.end == '"', false, '\0', *parser->current.end);
6646
+ if (peek(parser) == '"' || peek(parser) == '\'') {
6647
+ lex_mode_push_string(parser, peek(parser) == '"', false, '\0', *parser->current.end);
6108
6648
  parser->current.end++;
6109
6649
  }
6110
6650
 
@@ -6173,25 +6713,26 @@ parser_lex(yp_parser_t *parser) {
6173
6713
  }
6174
6714
  else if(
6175
6715
  lex_state_beg_p(parser) ||
6176
- (lex_state_p(parser, YP_LEX_STATE_FITEM) && (*parser->current.end == 's')) ||
6716
+ (lex_state_p(parser, YP_LEX_STATE_FITEM) && (peek(parser) == 's')) ||
6177
6717
  lex_state_spcarg_p(parser, space_seen)
6178
6718
  ) {
6179
6719
  if (!parser->encoding.alnum_char(parser->current.end, parser->end - parser->current.end)) {
6180
6720
  lex_mode_push_string(parser, true, false, lex_mode_incrementor(*parser->current.end), lex_mode_terminator(*parser->current.end));
6181
6721
 
6182
- if (*parser->current.end == '\r') {
6722
+ size_t eol_length = match_eol(parser);
6723
+ if (eol_length) {
6724
+ parser->current.end += eol_length;
6725
+ yp_newline_list_append(&parser->newline_list, parser->current.end - 1);
6726
+ } else {
6183
6727
  parser->current.end++;
6184
6728
  }
6185
6729
 
6186
- if (*parser->current.end == '\n') {
6187
- yp_newline_list_append(&parser->newline_list, parser->current.end);
6730
+ if (parser->current.end < parser->end) {
6731
+ LEX(YP_TOKEN_STRING_BEGIN);
6188
6732
  }
6189
-
6190
- parser->current.end++;
6191
- LEX(YP_TOKEN_STRING_BEGIN);
6192
6733
  }
6193
6734
 
6194
- switch (*parser->current.end) {
6735
+ switch (peek(parser)) {
6195
6736
  case 'i': {
6196
6737
  parser->current.end++;
6197
6738
 
@@ -6215,6 +6756,7 @@ parser_lex(yp_parser_t *parser) {
6215
6756
 
6216
6757
  if (parser->current.end < parser->end) {
6217
6758
  lex_mode_push_regexp(parser, lex_mode_incrementor(*parser->current.end), lex_mode_terminator(*parser->current.end));
6759
+ yp_newline_list_check_append(&parser->newline_list, parser->current.end);
6218
6760
  parser->current.end++;
6219
6761
  }
6220
6762
 
@@ -6225,6 +6767,7 @@ parser_lex(yp_parser_t *parser) {
6225
6767
 
6226
6768
  if (parser->current.end < parser->end) {
6227
6769
  lex_mode_push_string(parser, false, false, lex_mode_incrementor(*parser->current.end), lex_mode_terminator(*parser->current.end));
6770
+ yp_newline_list_check_append(&parser->newline_list, parser->current.end);
6228
6771
  parser->current.end++;
6229
6772
  }
6230
6773
 
@@ -6235,6 +6778,7 @@ parser_lex(yp_parser_t *parser) {
6235
6778
 
6236
6779
  if (parser->current.end < parser->end) {
6237
6780
  lex_mode_push_string(parser, true, false, lex_mode_incrementor(*parser->current.end), lex_mode_terminator(*parser->current.end));
6781
+ yp_newline_list_check_append(&parser->newline_list, parser->current.end);
6238
6782
  parser->current.end++;
6239
6783
  }
6240
6784
 
@@ -6284,7 +6828,7 @@ parser_lex(yp_parser_t *parser) {
6284
6828
  // unparseable. In this case we'll just drop it from the parser
6285
6829
  // and skip past it and hope that the next token is something
6286
6830
  // that we can parse.
6287
- yp_diagnostic_list_append(&parser->error_list, parser->current.start, parser->current.end, "invalid %% token");
6831
+ yp_diagnostic_list_append(&parser->error_list, parser->current.start, parser->current.end, "Invalid %% token");
6288
6832
  goto lex_next_token;
6289
6833
  }
6290
6834
  }
@@ -6335,9 +6879,10 @@ parser_lex(yp_parser_t *parser) {
6335
6879
  if (
6336
6880
  ((parser->current.end - parser->current.start) == 7) &&
6337
6881
  current_token_starts_line(parser) &&
6338
- (strncmp(parser->current.start, "__END__", 7) == 0) &&
6339
- (parser->current.end == parser->end || *parser->current.end == '\n' || (*parser->current.end == '\r' && parser->current.end[1] == '\n'))
6340
- ) {
6882
+ (memcmp(parser->current.start, "__END__", 7) == 0) &&
6883
+ (parser->current.end == parser->end || match_eol(parser))
6884
+ )
6885
+ {
6341
6886
  parser->current.end = parser->end;
6342
6887
  parser->current.type = YP_TOKEN___END__;
6343
6888
  parser_lex_callback(parser);
@@ -6394,7 +6939,7 @@ parser_lex(yp_parser_t *parser) {
6394
6939
 
6395
6940
  if ((whitespace = yp_strspn_whitespace_newlines(parser->current.end, parser->end - parser->current.end, &parser->newline_list, should_stop)) > 0) {
6396
6941
  parser->current.end += whitespace;
6397
- if (parser->current.end[-1] == '\n') {
6942
+ if (peek_offset(parser, -1) == '\n') {
6398
6943
  // mutates next_start
6399
6944
  parser_flush_heredoc_end(parser);
6400
6945
  }
@@ -6410,8 +6955,8 @@ parser_lex(yp_parser_t *parser) {
6410
6955
  // Here we'll get a list of the places where strpbrk should break,
6411
6956
  // and then find the first one.
6412
6957
  yp_lex_mode_t *lex_mode = parser->lex_modes.current;
6413
- const char *breakpoints = lex_mode->as.list.breakpoints;
6414
- const char *breakpoint = yp_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end);
6958
+ const uint8_t *breakpoints = lex_mode->as.list.breakpoints;
6959
+ const uint8_t *breakpoint = yp_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end);
6415
6960
 
6416
6961
  while (breakpoint != NULL) {
6417
6962
  // If we hit a null byte, skip directly past it.
@@ -6458,12 +7003,25 @@ parser_lex(yp_parser_t *parser) {
6458
7003
  // and find the next breakpoint.
6459
7004
  if (*breakpoint == '\\') {
6460
7005
  yp_unescape_type_t unescape_type = lex_mode->as.list.interpolation ? YP_UNESCAPE_ALL : YP_UNESCAPE_MINIMAL;
6461
- size_t difference = yp_unescape_calculate_difference(breakpoint, parser->end, unescape_type, false, &parser->error_list);
7006
+ size_t difference = yp_unescape_calculate_difference(parser, breakpoint, unescape_type, false);
7007
+ if (difference == 0) {
7008
+ // we're at the end of the file
7009
+ breakpoint = NULL;
7010
+ continue;
7011
+ }
6462
7012
 
6463
- // If the result is an escaped newline, then we need to
6464
- // track that newline.
7013
+ // If the result is an escaped newline ...
6465
7014
  if (breakpoint[difference - 1] == '\n') {
6466
- yp_newline_list_append(&parser->newline_list, breakpoint + difference - 1);
7015
+ if (parser->heredoc_end) {
7016
+ // ... if we are on the same line as a heredoc, flush the heredoc and
7017
+ // continue parsing after heredoc_end.
7018
+ parser->current.end = breakpoint + difference;
7019
+ parser_flush_heredoc_end(parser);
7020
+ LEX(YP_TOKEN_STRING_CONTENT);
7021
+ } else {
7022
+ // ... else track the newline.
7023
+ yp_newline_list_append(&parser->newline_list, breakpoint + difference - 1);
7024
+ }
6467
7025
  }
6468
7026
 
6469
7027
  breakpoint = yp_strpbrk(parser, breakpoint + difference, breakpoints, parser->end - (breakpoint + difference));
@@ -6499,7 +7057,13 @@ parser_lex(yp_parser_t *parser) {
6499
7057
 
6500
7058
  case YP_LEX_REGEXP: {
6501
7059
  // First, we'll set to start of this token to be the current end.
6502
- parser->current.start = parser->current.end;
7060
+ if (parser->next_start == NULL) {
7061
+ parser->current.start = parser->current.end;
7062
+ } else {
7063
+ parser->current.start = parser->next_start;
7064
+ parser->current.end = parser->next_start;
7065
+ parser->next_start = NULL;
7066
+ }
6503
7067
 
6504
7068
  // We'll check if we're at the end of the file. If we are, then we need to
6505
7069
  // return the EOF token.
@@ -6513,8 +7077,8 @@ parser_lex(yp_parser_t *parser) {
6513
7077
  // These are the places where we need to split up the content of the
6514
7078
  // regular expression. We'll use strpbrk to find the first of these
6515
7079
  // characters.
6516
- const char *breakpoints = lex_mode->as.regexp.breakpoints;
6517
- const char *breakpoint = yp_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end);
7080
+ const uint8_t *breakpoints = lex_mode->as.regexp.breakpoints;
7081
+ const uint8_t *breakpoint = yp_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end);
6518
7082
 
6519
7083
  while (breakpoint != NULL) {
6520
7084
  // If we hit a null byte, skip directly past it.
@@ -6526,7 +7090,16 @@ parser_lex(yp_parser_t *parser) {
6526
7090
  // If we've hit a newline, then we need to track that in the
6527
7091
  // list of newlines.
6528
7092
  if (*breakpoint == '\n') {
6529
- yp_newline_list_append(&parser->newline_list, breakpoint);
7093
+ // For the special case of a newline-terminated regular expression, we will pass
7094
+ // through this branch twice -- once with YP_TOKEN_REGEXP_BEGIN and then again
7095
+ // with YP_TOKEN_STRING_CONTENT. Let's avoid tracking the newline twice, by
7096
+ // tracking it only in the REGEXP_BEGIN case.
7097
+ if (
7098
+ !(lex_mode->as.regexp.terminator == '\n' && parser->current.type != YP_TOKEN_REGEXP_BEGIN)
7099
+ && parser->heredoc_end == NULL
7100
+ ) {
7101
+ yp_newline_list_append(&parser->newline_list, breakpoint);
7102
+ }
6530
7103
 
6531
7104
  if (lex_mode->as.regexp.terminator != '\n') {
6532
7105
  // If the terminator is not a newline, then we can set
@@ -6567,12 +7140,25 @@ parser_lex(yp_parser_t *parser) {
6567
7140
  // literally. In this case we'll skip past the next character
6568
7141
  // and find the next breakpoint.
6569
7142
  if (*breakpoint == '\\') {
6570
- size_t difference = yp_unescape_calculate_difference(breakpoint, parser->end, YP_UNESCAPE_ALL, false, &parser->error_list);
7143
+ size_t difference = yp_unescape_calculate_difference(parser, breakpoint, YP_UNESCAPE_ALL, false);
7144
+ if (difference == 0) {
7145
+ // we're at the end of the file
7146
+ breakpoint = NULL;
7147
+ continue;
7148
+ }
6571
7149
 
6572
- // If the result is an escaped newline, then we need to
6573
- // track that newline.
7150
+ // If the result is an escaped newline ...
6574
7151
  if (breakpoint[difference - 1] == '\n') {
6575
- yp_newline_list_append(&parser->newline_list, breakpoint + difference - 1);
7152
+ if (parser->heredoc_end) {
7153
+ // ... if we are on the same line as a heredoc, flush the heredoc and
7154
+ // continue parsing after heredoc_end.
7155
+ parser->current.end = breakpoint + difference;
7156
+ parser_flush_heredoc_end(parser);
7157
+ LEX(YP_TOKEN_STRING_CONTENT);
7158
+ } else {
7159
+ // ... else track the newline.
7160
+ yp_newline_list_append(&parser->newline_list, breakpoint + difference - 1);
7161
+ }
6576
7162
  }
6577
7163
 
6578
7164
  breakpoint = yp_strpbrk(parser, breakpoint + difference, breakpoints, parser->end - (breakpoint + difference));
@@ -6624,8 +7210,8 @@ parser_lex(yp_parser_t *parser) {
6624
7210
 
6625
7211
  // These are the places where we need to split up the content of the
6626
7212
  // string. We'll use strpbrk to find the first of these characters.
6627
- const char *breakpoints = parser->lex_modes.current->as.string.breakpoints;
6628
- const char *breakpoint = yp_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end);
7213
+ const uint8_t *breakpoints = parser->lex_modes.current->as.string.breakpoints;
7214
+ const uint8_t *breakpoint = yp_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end);
6629
7215
 
6630
7216
  while (breakpoint != NULL) {
6631
7217
  // If we hit the incrementor, then we'll increment then nesting and
@@ -6660,21 +7246,18 @@ parser_lex(yp_parser_t *parser) {
6660
7246
 
6661
7247
  // Otherwise we need to switch back to the parent lex mode and
6662
7248
  // return the end of the string.
6663
- if (*parser->current.end == '\r' && parser->current.end + 1 < parser->end && parser->current.end[1] == '\n') {
6664
- parser->current.end = breakpoint + 2;
6665
- yp_newline_list_append(&parser->newline_list, breakpoint + 1);
7249
+ size_t eol_length = match_eol_at(parser, breakpoint);
7250
+ if (eol_length) {
7251
+ parser->current.end = breakpoint + eol_length;
7252
+ yp_newline_list_append(&parser->newline_list, parser->current.end - 1);
6666
7253
  } else {
6667
- if (*parser->current.end == '\n') {
6668
- yp_newline_list_append(&parser->newline_list, parser->current.end);
6669
- }
6670
-
6671
7254
  parser->current.end = breakpoint + 1;
6672
7255
  }
6673
7256
 
6674
7257
  if (
6675
7258
  parser->lex_modes.current->as.string.label_allowed &&
6676
7259
  (peek(parser) == ':') &&
6677
- (peek_at(parser, 1) != ':')
7260
+ (peek_offset(parser, 1) != ':')
6678
7261
  ) {
6679
7262
  parser->current.end++;
6680
7263
  lex_state_set(parser, YP_LEX_STATE_ARG | YP_LEX_STATE_LABELED);
@@ -6712,12 +7295,25 @@ parser_lex(yp_parser_t *parser) {
6712
7295
  // literally. In this case we'll skip past the next character and
6713
7296
  // find the next breakpoint.
6714
7297
  yp_unescape_type_t unescape_type = parser->lex_modes.current->as.string.interpolation ? YP_UNESCAPE_ALL : YP_UNESCAPE_MINIMAL;
6715
- size_t difference = yp_unescape_calculate_difference(breakpoint, parser->end, unescape_type, false, &parser->error_list);
7298
+ size_t difference = yp_unescape_calculate_difference(parser, breakpoint, unescape_type, false);
7299
+ if (difference == 0) {
7300
+ // we're at the end of the file
7301
+ breakpoint = NULL;
7302
+ break;
7303
+ }
6716
7304
 
6717
- // If the result is an escaped newline, then we need to
6718
- // track that newline.
7305
+ // If the result is an escaped newline ...
6719
7306
  if (breakpoint[difference - 1] == '\n') {
6720
- yp_newline_list_append(&parser->newline_list, breakpoint + difference - 1);
7307
+ if (parser->heredoc_end) {
7308
+ // ... if we are on the same line as a heredoc, flush the heredoc and
7309
+ // continue parsing after heredoc_end.
7310
+ parser->current.end = breakpoint + difference;
7311
+ parser_flush_heredoc_end(parser);
7312
+ LEX(YP_TOKEN_STRING_CONTENT);
7313
+ } else {
7314
+ // ... else track the newline.
7315
+ yp_newline_list_append(&parser->newline_list, breakpoint + difference - 1);
7316
+ }
6721
7317
  }
6722
7318
 
6723
7319
  breakpoint = yp_strpbrk(parser, breakpoint + difference, breakpoints, parser->end - (breakpoint + difference));
@@ -6765,27 +7361,25 @@ parser_lex(yp_parser_t *parser) {
6765
7361
 
6766
7362
  // Now let's grab the information about the identifier off of the current
6767
7363
  // lex mode.
6768
- const char *ident_start = parser->lex_modes.current->as.heredoc.ident_start;
7364
+ const uint8_t *ident_start = parser->lex_modes.current->as.heredoc.ident_start;
6769
7365
  size_t ident_length = parser->lex_modes.current->as.heredoc.ident_length;
6770
7366
 
6771
7367
  // If we are immediately following a newline and we have hit the
6772
7368
  // terminator, then we need to return the ending of the heredoc.
6773
- if (parser->current.start[-1] == '\n') {
6774
- const char *start = parser->current.start;
7369
+ if (current_token_starts_line(parser)) {
7370
+ const uint8_t *start = parser->current.start;
6775
7371
  if (parser->lex_modes.current->as.heredoc.indent != YP_HEREDOC_INDENT_NONE) {
6776
7372
  start += yp_strspn_inline_whitespace(start, parser->end - start);
6777
7373
  }
6778
7374
 
6779
- if ((start + ident_length <= parser->end) && (strncmp(start, ident_start, ident_length) == 0)) {
7375
+ if ((start + ident_length <= parser->end) && (memcmp(start, ident_start, ident_length) == 0)) {
6780
7376
  bool matched = true;
6781
7377
  bool at_end = false;
6782
7378
 
6783
- if ((start + ident_length < parser->end) && (start[ident_length] == '\n')) {
6784
- parser->current.end = start + ident_length + 1;
6785
- yp_newline_list_append(&parser->newline_list, start + ident_length);
6786
- } else if ((start + ident_length + 1 < parser->end) && (start[ident_length] == '\r') && (start[ident_length + 1] == '\n')) {
6787
- parser->current.end = start + ident_length + 2;
6788
- yp_newline_list_append(&parser->newline_list, start + ident_length + 1);
7379
+ size_t eol_length = match_eol_at(parser, start + ident_length);
7380
+ if (eol_length) {
7381
+ parser->current.end = start + ident_length + eol_length;
7382
+ yp_newline_list_append(&parser->newline_list, parser->current.end - 1);
6789
7383
  } else if (parser->end == (start + ident_length)) {
6790
7384
  parser->current.end = start + ident_length;
6791
7385
  at_end = true;
@@ -6813,14 +7407,14 @@ parser_lex(yp_parser_t *parser) {
6813
7407
  // Otherwise we'll be parsing string content. These are the places where
6814
7408
  // we need to split up the content of the heredoc. We'll use strpbrk to
6815
7409
  // find the first of these characters.
6816
- char breakpoints[] = "\n\\#";
7410
+ uint8_t breakpoints[] = "\n\\#";
6817
7411
 
6818
7412
  yp_heredoc_quote_t quote = parser->lex_modes.current->as.heredoc.quote;
6819
7413
  if (quote == YP_HEREDOC_QUOTE_SINGLE) {
6820
7414
  breakpoints[2] = '\0';
6821
7415
  }
6822
7416
 
6823
- const char *breakpoint = yp_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end);
7417
+ const uint8_t *breakpoint = yp_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end);
6824
7418
 
6825
7419
  while (breakpoint != NULL) {
6826
7420
  switch (*breakpoint) {
@@ -6837,7 +7431,7 @@ parser_lex(yp_parser_t *parser) {
6837
7431
 
6838
7432
  yp_newline_list_append(&parser->newline_list, breakpoint);
6839
7433
 
6840
- const char *start = breakpoint + 1;
7434
+ const uint8_t *start = breakpoint + 1;
6841
7435
  if (parser->lex_modes.current->as.heredoc.indent != YP_HEREDOC_INDENT_NONE) {
6842
7436
  start += yp_strspn_inline_whitespace(start, parser->end - start);
6843
7437
  }
@@ -6848,21 +7442,12 @@ parser_lex(yp_parser_t *parser) {
6848
7442
  // again and return the end of the heredoc.
6849
7443
  if (
6850
7444
  (start + ident_length <= parser->end) &&
6851
- (strncmp(start, ident_start, ident_length) == 0)
7445
+ (memcmp(start, ident_start, ident_length) == 0)
6852
7446
  ) {
6853
- // Heredoc terminators must be followed by a newline or EOF to be valid.
6854
- if (start + ident_length == parser->end || start[ident_length] == '\n') {
6855
- parser->current.end = breakpoint + 1;
6856
- LEX(YP_TOKEN_STRING_CONTENT);
6857
- }
6858
-
6859
- // They can also be followed by a carriage return and then a
6860
- // newline. Be sure here that we don't accidentally read off the
6861
- // end.
7447
+ // Heredoc terminators must be followed by a newline, CRLF, or EOF to be valid.
6862
7448
  if (
6863
- (start + ident_length + 1 < parser->end) &&
6864
- (start[ident_length] == '\r') &&
6865
- (start[ident_length + 1] == '\n')
7449
+ start + ident_length == parser->end ||
7450
+ match_eol_at(parser, start + ident_length)
6866
7451
  ) {
6867
7452
  parser->current.end = breakpoint + 1;
6868
7453
  LEX(YP_TOKEN_STRING_CONTENT);
@@ -6881,18 +7466,20 @@ parser_lex(yp_parser_t *parser) {
6881
7466
  // stop looping before the newline and not after the
6882
7467
  // newline so that we can still potentially find the
6883
7468
  // terminator of the heredoc.
6884
- if (breakpoint + 1 < parser->end && breakpoint[1] == '\n') {
6885
- breakpoint++;
6886
- } else if (breakpoint + 2 < parser->end && breakpoint[1] == '\r' && breakpoint[2] == '\n') {
6887
- breakpoint += 2;
7469
+ size_t eol_length = match_eol_at(parser, breakpoint + 1);
7470
+ if (eol_length) {
7471
+ breakpoint += eol_length;
6888
7472
  } else {
6889
7473
  yp_unescape_type_t unescape_type = (quote == YP_HEREDOC_QUOTE_SINGLE) ? YP_UNESCAPE_MINIMAL : YP_UNESCAPE_ALL;
6890
- size_t difference = yp_unescape_calculate_difference(breakpoint, parser->end, unescape_type, false, &parser->error_list);
6891
-
6892
- if (breakpoint[difference - 1] == '\n') {
6893
- yp_newline_list_append(&parser->newline_list, breakpoint + difference - 1);
7474
+ size_t difference = yp_unescape_calculate_difference(parser, breakpoint, unescape_type, false);
7475
+ if (difference == 0) {
7476
+ // we're at the end of the file
7477
+ breakpoint = NULL;
7478
+ break;
6894
7479
  }
6895
7480
 
7481
+ yp_newline_list_check_append(&parser->newline_list, breakpoint + difference - 1);
7482
+
6896
7483
  breakpoint = yp_strpbrk(parser, breakpoint + difference, breakpoints, parser->end - (breakpoint + difference));
6897
7484
  }
6898
7485
 
@@ -6945,7 +7532,7 @@ yp_regular_expression_node_create_and_unescape(yp_parser_t *parser, const yp_tok
6945
7532
  assert((content->end - content->start) >= 0);
6946
7533
  yp_string_shared_init(&node->unescaped, content->start, content->end);
6947
7534
 
6948
- yp_unescape_manipulate_string(parser, &node->unescaped, unescape_type, &parser->error_list);
7535
+ yp_unescape_manipulate_string(parser, &node->unescaped, unescape_type);
6949
7536
  return node;
6950
7537
  }
6951
7538
 
@@ -6956,7 +7543,18 @@ yp_symbol_node_create_and_unescape(yp_parser_t *parser, const yp_token_t *openin
6956
7543
  assert((content->end - content->start) >= 0);
6957
7544
  yp_string_shared_init(&node->unescaped, content->start, content->end);
6958
7545
 
6959
- yp_unescape_manipulate_string(parser, &node->unescaped, unescape_type, &parser->error_list);
7546
+ yp_unescape_manipulate_string(parser, &node->unescaped, unescape_type);
7547
+ return node;
7548
+ }
7549
+
7550
+ static yp_string_node_t *
7551
+ yp_char_literal_node_create_and_unescape(yp_parser_t *parser, const yp_token_t *opening, const yp_token_t *content, const yp_token_t *closing, yp_unescape_type_t unescape_type) {
7552
+ yp_string_node_t *node = yp_string_node_create(parser, opening, content, closing);
7553
+
7554
+ assert((content->end - content->start) >= 0);
7555
+ yp_string_shared_init(&node->unescaped, content->start, content->end);
7556
+
7557
+ yp_unescape_manipulate_char_literal(parser, &node->unescaped, unescape_type);
6960
7558
  return node;
6961
7559
  }
6962
7560
 
@@ -6967,7 +7565,7 @@ yp_string_node_create_and_unescape(yp_parser_t *parser, const yp_token_t *openin
6967
7565
  assert((content->end - content->start) >= 0);
6968
7566
  yp_string_shared_init(&node->unescaped, content->start, content->end);
6969
7567
 
6970
- yp_unescape_manipulate_string(parser, &node->unescaped, unescape_type, &parser->error_list);
7568
+ yp_unescape_manipulate_string(parser, &node->unescaped, unescape_type);
6971
7569
  return node;
6972
7570
  }
6973
7571
 
@@ -6978,7 +7576,7 @@ yp_xstring_node_create_and_unescape(yp_parser_t *parser, const yp_token_t *openi
6978
7576
  assert((content->end - content->start) >= 0);
6979
7577
  yp_string_shared_init(&node->unescaped, content->start, content->end);
6980
7578
 
6981
- yp_unescape_manipulate_string(parser, &node->unescaped, YP_UNESCAPE_ALL, &parser->error_list);
7579
+ yp_unescape_manipulate_string(parser, &node->unescaped, YP_UNESCAPE_ALL);
6982
7580
  return node;
6983
7581
  }
6984
7582
 
@@ -7315,27 +7913,162 @@ token_begins_expression_p(yp_token_type_t type) {
7315
7913
  }
7316
7914
  }
7317
7915
 
7318
- // Parse an expression with the given binding power that may be optionally
7319
- // prefixed by the * operator.
7320
- static yp_node_t *
7321
- parse_starred_expression(yp_parser_t *parser, yp_binding_power_t binding_power, const char *message) {
7322
- if (accept(parser, YP_TOKEN_USTAR)) {
7323
- yp_token_t operator = parser->previous;
7324
- yp_node_t *expression = parse_expression(parser, binding_power, "Expected expression after `*'.");
7325
- return (yp_node_t *) yp_splat_node_create(parser, &operator, expression);
7326
- }
7916
+ // Parse an expression with the given binding power that may be optionally
7917
+ // prefixed by the * operator.
7918
+ static yp_node_t *
7919
+ parse_starred_expression(yp_parser_t *parser, yp_binding_power_t binding_power, const char *message) {
7920
+ if (accept(parser, YP_TOKEN_USTAR)) {
7921
+ yp_token_t operator = parser->previous;
7922
+ yp_node_t *expression = parse_expression(parser, binding_power, "Expected expression after `*'.");
7923
+ return (yp_node_t *) yp_splat_node_create(parser, &operator, expression);
7924
+ }
7925
+
7926
+ return parse_expression(parser, binding_power, message);
7927
+ }
7928
+
7929
+ // Convert the given node into a valid target node.
7930
+ static yp_node_t *
7931
+ parse_target(yp_parser_t *parser, yp_node_t *target) {
7932
+ switch (YP_NODE_TYPE(target)) {
7933
+ case YP_NODE_MISSING_NODE:
7934
+ return target;
7935
+ case YP_NODE_CLASS_VARIABLE_READ_NODE:
7936
+ assert(sizeof(yp_class_variable_target_node_t) == sizeof(yp_class_variable_read_node_t));
7937
+ target->type = YP_NODE_CLASS_VARIABLE_TARGET_NODE;
7938
+ return target;
7939
+ case YP_NODE_CONSTANT_PATH_NODE:
7940
+ assert(sizeof(yp_constant_path_target_node_t) == sizeof(yp_constant_path_node_t));
7941
+ target->type = YP_NODE_CONSTANT_PATH_TARGET_NODE;
7942
+ return target;
7943
+ case YP_NODE_CONSTANT_READ_NODE:
7944
+ assert(sizeof(yp_constant_target_node_t) == sizeof(yp_constant_read_node_t));
7945
+ target->type = YP_NODE_CONSTANT_TARGET_NODE;
7946
+ return target;
7947
+ case YP_NODE_BACK_REFERENCE_READ_NODE:
7948
+ assert(sizeof(yp_global_variable_target_node_t) == sizeof(yp_back_reference_read_node_t));
7949
+ /* fallthrough */
7950
+ case YP_NODE_NUMBERED_REFERENCE_READ_NODE:
7951
+ assert(sizeof(yp_global_variable_target_node_t) == sizeof(yp_numbered_reference_read_node_t));
7952
+ yp_diagnostic_list_append(&parser->error_list, target->location.start, target->location.end, "Can't set variable");
7953
+ /* fallthrough */
7954
+ case YP_NODE_GLOBAL_VARIABLE_READ_NODE:
7955
+ assert(sizeof(yp_global_variable_target_node_t) == sizeof(yp_global_variable_read_node_t));
7956
+ target->type = YP_NODE_GLOBAL_VARIABLE_TARGET_NODE;
7957
+ return target;
7958
+ case YP_NODE_LOCAL_VARIABLE_READ_NODE:
7959
+ assert(sizeof(yp_local_variable_target_node_t) == sizeof(yp_local_variable_read_node_t));
7960
+ target->type = YP_NODE_LOCAL_VARIABLE_TARGET_NODE;
7961
+ return target;
7962
+ case YP_NODE_INSTANCE_VARIABLE_READ_NODE:
7963
+ assert(sizeof(yp_instance_variable_target_node_t) == sizeof(yp_instance_variable_read_node_t));
7964
+ target->type = YP_NODE_INSTANCE_VARIABLE_TARGET_NODE;
7965
+ return target;
7966
+ case YP_NODE_MULTI_WRITE_NODE:
7967
+ return target;
7968
+ case YP_NODE_SPLAT_NODE: {
7969
+ yp_splat_node_t *splat = (yp_splat_node_t *) target;
7970
+
7971
+ if (splat->expression != NULL) {
7972
+ splat->expression = parse_target(parser, splat->expression);
7973
+ }
7974
+
7975
+ yp_token_t operator = not_provided(parser);
7976
+ yp_location_t location = { .start = NULL, .end = NULL };
7977
+
7978
+ yp_multi_write_node_t *multi_write = yp_multi_write_node_create(parser, &operator, NULL, &location, &location);
7979
+ yp_multi_write_node_targets_append(multi_write, (yp_node_t *) splat);
7980
+
7981
+ return (yp_node_t *) multi_write;
7982
+ }
7983
+ case YP_NODE_CALL_NODE: {
7984
+ yp_call_node_t *call = (yp_call_node_t *) target;
7985
+
7986
+ // If we have no arguments to the call node and we need this to be a
7987
+ // target then this is either a method call or a local variable write.
7988
+ if (
7989
+ (call->opening_loc.start == NULL) &&
7990
+ (call->arguments == NULL) &&
7991
+ (call->block == NULL)
7992
+ ) {
7993
+ if (call->receiver == NULL) {
7994
+ // When we get here, we have a local variable write, because it
7995
+ // was previously marked as a method call but now we have an =.
7996
+ // This looks like:
7997
+ //
7998
+ // foo = 1
7999
+ //
8000
+ // When it was parsed in the prefix position, foo was seen as a
8001
+ // method call with no receiver and no arguments. Now we have an
8002
+ // =, so we know it's a local variable write.
8003
+ const yp_location_t message = call->message_loc;
8004
+
8005
+ yp_parser_local_add_location(parser, message.start, message.end);
8006
+ yp_node_destroy(parser, target);
7327
8007
 
7328
- return parse_expression(parser, binding_power, message);
8008
+ const yp_token_t name = { .type = YP_TOKEN_IDENTIFIER, .start = message.start, .end = message.end };
8009
+ target = (yp_node_t *) yp_local_variable_read_node_create(parser, &name, 0);
8010
+
8011
+ assert(sizeof(yp_local_variable_target_node_t) == sizeof(yp_local_variable_read_node_t));
8012
+ target->type = YP_NODE_LOCAL_VARIABLE_TARGET_NODE;
8013
+
8014
+ if (token_is_numbered_parameter(message.start, message.end)) {
8015
+ yp_diagnostic_list_append(&parser->error_list, message.start, message.end, "reserved for numbered parameter");
8016
+ }
8017
+
8018
+ return target;
8019
+ }
8020
+
8021
+ // The method name needs to change. If we previously had foo, we now
8022
+ // need foo=. In this case we'll allocate a new owned string, copy
8023
+ // the previous method name in, and append an =.
8024
+ size_t length = yp_string_length(&call->name);
8025
+
8026
+ uint8_t *name = calloc(length + 1, sizeof(uint8_t));
8027
+ if (name == NULL) return NULL;
8028
+
8029
+ memcpy(name, yp_string_source(&call->name), length);
8030
+ name[length] = '=';
8031
+
8032
+ // Now switch the name to the new string.
8033
+ yp_string_free(&call->name);
8034
+ yp_string_owned_init(&call->name, name, length + 1);
8035
+
8036
+ return target;
8037
+ }
8038
+
8039
+ // If there is no call operator and the message is "[]" then this is
8040
+ // an aref expression, and we can transform it into an aset
8041
+ // expression.
8042
+ if (
8043
+ (call->operator_loc.start == NULL) &&
8044
+ (call->message_loc.start[0] == '[') &&
8045
+ (call->message_loc.end[-1] == ']') &&
8046
+ (call->block == NULL)
8047
+ ) {
8048
+ // Free the previous name and replace it with "[]=".
8049
+ yp_string_free(&call->name);
8050
+ yp_string_constant_init(&call->name, "[]=", 3);
8051
+ return target;
8052
+ }
8053
+ }
8054
+ /* fallthrough */
8055
+ default:
8056
+ // In this case we have a node that we don't know how to convert
8057
+ // into a target. We need to treat it as an error. For now, we'll
8058
+ // mark it as an error and just skip right past it.
8059
+ yp_diagnostic_list_append(&parser->error_list, target->location.start, target->location.end, "Unexpected write target.");
8060
+ return target;
8061
+ }
7329
8062
  }
7330
8063
 
7331
- // Convert the given node into a valid target node.
8064
+ // Convert the given node into a valid write node.
7332
8065
  static yp_node_t *
7333
- parse_target(yp_parser_t *parser, yp_node_t *target, yp_token_t *operator, yp_node_t *value) {
8066
+ parse_write(yp_parser_t *parser, yp_node_t *target, yp_token_t *operator, yp_node_t *value) {
7334
8067
  switch (YP_NODE_TYPE(target)) {
7335
8068
  case YP_NODE_MISSING_NODE:
7336
8069
  return target;
7337
8070
  case YP_NODE_CLASS_VARIABLE_READ_NODE: {
7338
- yp_class_variable_write_node_t *write_node = yp_class_variable_read_node_to_class_variable_write_node(parser, (yp_class_variable_read_node_t *) target, operator, value);
8071
+ yp_class_variable_write_node_t *write_node = yp_class_variable_write_node_create(parser, (yp_class_variable_read_node_t *) target, operator, value);
7339
8072
  yp_node_destroy(parser, target);
7340
8073
  return (yp_node_t *) write_node;
7341
8074
  }
@@ -7360,7 +8093,7 @@ parse_target(yp_parser_t *parser, yp_node_t *target, yp_token_t *operator, yp_no
7360
8093
  case YP_NODE_LOCAL_VARIABLE_READ_NODE: {
7361
8094
  yp_local_variable_read_node_t *local_read = (yp_local_variable_read_node_t *) target;
7362
8095
 
7363
- yp_constant_id_t constant_id = local_read->constant_id;
8096
+ yp_constant_id_t constant_id = local_read->name;
7364
8097
  uint32_t depth = local_read->depth;
7365
8098
 
7366
8099
  yp_location_t name_loc = target->location;
@@ -7377,18 +8110,15 @@ parse_target(yp_parser_t *parser, yp_node_t *target, yp_token_t *operator, yp_no
7377
8110
  yp_multi_write_node_t *multi_write = (yp_multi_write_node_t *) target;
7378
8111
  yp_multi_write_node_operator_loc_set(multi_write, operator);
7379
8112
 
7380
- if (value != NULL) {
7381
- multi_write->value = value;
7382
- multi_write->base.location.end = value->location.end;
7383
- }
7384
-
8113
+ multi_write->value = value;
8114
+ multi_write->base.location.end = value->location.end;
7385
8115
  return (yp_node_t *) multi_write;
7386
8116
  }
7387
8117
  case YP_NODE_SPLAT_NODE: {
7388
8118
  yp_splat_node_t *splat = (yp_splat_node_t *) target;
7389
8119
 
7390
8120
  if (splat->expression != NULL) {
7391
- splat->expression = parse_target(parser, splat->expression, operator, value);
8121
+ splat->expression = parse_write(parser, splat->expression, operator, value);
7392
8122
  }
7393
8123
 
7394
8124
  yp_location_t location = { .start = NULL, .end = NULL };
@@ -7441,22 +8171,21 @@ parse_target(yp_parser_t *parser, yp_node_t *target, yp_token_t *operator, yp_no
7441
8171
  // method call with no arguments. Now we have an =, so we know it's
7442
8172
  // a method call with an argument. In this case we will create the
7443
8173
  // arguments node, parse the argument, and add it to the list.
7444
- if (value) {
7445
- yp_arguments_node_t *arguments = yp_arguments_node_create(parser);
7446
- call->arguments = arguments;
7447
- yp_arguments_node_arguments_append(arguments, value);
7448
- target->location.end = arguments->base.location.end;
7449
- }
8174
+ yp_arguments_node_t *arguments = yp_arguments_node_create(parser);
8175
+ call->arguments = arguments;
8176
+ yp_arguments_node_arguments_append(arguments, value);
8177
+ target->location.end = arguments->base.location.end;
7450
8178
 
7451
8179
  // The method name needs to change. If we previously had foo, we now
7452
8180
  // need foo=. In this case we'll allocate a new owned string, copy
7453
8181
  // the previous method name in, and append an =.
7454
8182
  size_t length = yp_string_length(&call->name);
7455
8183
 
7456
- char *name = calloc(length + 2, sizeof(char));
8184
+ uint8_t *name = calloc(length + 1, sizeof(uint8_t));
7457
8185
  if (name == NULL) return NULL;
7458
8186
 
7459
- snprintf(name, length + 2, "%.*s=", (int) length, yp_string_source(&call->name));
8187
+ memcpy(name, yp_string_source(&call->name), length);
8188
+ name[length] = '=';
7460
8189
 
7461
8190
  // Now switch the name to the new string.
7462
8191
  yp_string_free(&call->name);
@@ -7474,15 +8203,13 @@ parse_target(yp_parser_t *parser, yp_node_t *target, yp_token_t *operator, yp_no
7474
8203
  (call->message_loc.end[-1] == ']') &&
7475
8204
  (call->block == NULL)
7476
8205
  ) {
7477
- if (value != NULL) {
7478
- if (call->arguments == NULL) {
7479
- call->arguments = yp_arguments_node_create(parser);
7480
- }
7481
-
7482
- yp_arguments_node_arguments_append(call->arguments, value);
7483
- target->location.end = value->location.end;
8206
+ if (call->arguments == NULL) {
8207
+ call->arguments = yp_arguments_node_create(parser);
7484
8208
  }
7485
8209
 
8210
+ yp_arguments_node_arguments_append(call->arguments, value);
8211
+ target->location.end = value->location.end;
8212
+
7486
8213
  // Free the previous name and replace it with "[]=".
7487
8214
  yp_string_free(&call->name);
7488
8215
  yp_string_constant_init(&call->name, "[]=", 3);
@@ -7494,9 +8221,7 @@ parse_target(yp_parser_t *parser, yp_node_t *target, yp_token_t *operator, yp_no
7494
8221
  // syntax error. In this case we'll fall through to our default
7495
8222
  // handling. We need to free the value that we parsed because there
7496
8223
  // is no way for us to attach it to the tree at this point.
7497
- if (value != NULL) {
7498
- yp_node_destroy(parser, value);
7499
- }
8224
+ yp_node_destroy(parser, value);
7500
8225
  }
7501
8226
  /* fallthrough */
7502
8227
  default:
@@ -7524,7 +8249,7 @@ parse_targets(yp_parser_t *parser, yp_node_t *first_target, yp_binding_power_t b
7524
8249
  // location that we know requires a multi write, as in the case of a for loop.
7525
8250
  // In this case we will set up the parsing loop slightly differently.
7526
8251
  if (first_target != NULL) {
7527
- first_target = parse_target(parser, first_target, &operator, NULL);
8252
+ first_target = parse_target(parser, first_target);
7528
8253
 
7529
8254
  if (!match_type_p(parser, YP_TOKEN_COMMA)) {
7530
8255
  return first_target;
@@ -7555,9 +8280,8 @@ parse_targets(yp_parser_t *parser, yp_node_t *first_target, yp_binding_power_t b
7555
8280
  yp_node_t *name = NULL;
7556
8281
 
7557
8282
  if (token_begins_expression_p(parser->current.type)) {
7558
- yp_token_t operator = not_provided(parser);
7559
8283
  name = parse_expression(parser, binding_power, "Expected an expression after '*'.");
7560
- name = parse_target(parser, name, &operator, NULL);
8284
+ name = parse_target(parser, name);
7561
8285
  }
7562
8286
 
7563
8287
  yp_node_t *splat = (yp_node_t *) yp_splat_node_create(parser, &star_operator, name);
@@ -7587,6 +8311,8 @@ parse_targets(yp_parser_t *parser, yp_node_t *first_target, yp_binding_power_t b
7587
8311
 
7588
8312
  if (YP_NODE_TYPE_P(child_target, YP_NODE_MULTI_WRITE_NODE)) {
7589
8313
  target = (yp_multi_write_node_t *) child_target;
8314
+ target->base.location.start = lparen.start;
8315
+ target->base.location.end = rparen.end;
7590
8316
  target->lparen_loc = (yp_location_t) { .start = lparen.start, .end = lparen.end };
7591
8317
  target->rparen_loc = (yp_location_t) { .start = rparen.start, .end = rparen.end };
7592
8318
  } else {
@@ -7603,6 +8329,7 @@ parse_targets(yp_parser_t *parser, yp_node_t *first_target, yp_binding_power_t b
7603
8329
  yp_multi_write_node_targets_append(target, child_target);
7604
8330
  }
7605
8331
 
8332
+ target->base.location.start = lparen.start;
7606
8333
  target->base.location.end = rparen.end;
7607
8334
  yp_multi_write_node_targets_append(result, (yp_node_t *) target);
7608
8335
  }
@@ -7625,7 +8352,7 @@ parse_targets(yp_parser_t *parser, yp_node_t *first_target, yp_binding_power_t b
7625
8352
  }
7626
8353
 
7627
8354
  yp_node_t *target = parse_expression(parser, binding_power, "Expected another expression after ','.");
7628
- target = parse_target(parser, target, &operator, NULL);
8355
+ target = parse_target(parser, target);
7629
8356
 
7630
8357
  yp_multi_write_node_targets_append(result, target);
7631
8358
  }
@@ -8085,7 +8812,6 @@ parse_parameters(
8085
8812
  bool looping = true;
8086
8813
 
8087
8814
  yp_do_loop_stack_push(parser, false);
8088
-
8089
8815
  yp_parameters_order_t order = YP_PARAMETERS_ORDER_NONE;
8090
8816
 
8091
8817
  do {
@@ -8377,8 +9103,7 @@ parse_rescues(yp_parser_t *parser, yp_begin_node_t *parent_node) {
8377
9103
  yp_rescue_node_operator_set(rescue, &parser->previous);
8378
9104
 
8379
9105
  yp_node_t *reference = parse_expression(parser, YP_BINDING_POWER_INDEX, "Expected an exception variable after `=>` in rescue statement.");
8380
- yp_token_t operator = not_provided(parser);
8381
- reference = parse_target(parser, reference, &operator, NULL);
9106
+ reference = parse_target(parser, reference);
8382
9107
 
8383
9108
  yp_rescue_node_reference_set(rescue, reference);
8384
9109
  break;
@@ -8408,8 +9133,7 @@ parse_rescues(yp_parser_t *parser, yp_begin_node_t *parent_node) {
8408
9133
  yp_rescue_node_operator_set(rescue, &parser->previous);
8409
9134
 
8410
9135
  yp_node_t *reference = parse_expression(parser, YP_BINDING_POWER_INDEX, "Expected an exception variable after `=>` in rescue statement.");
8411
- yp_token_t operator = not_provided(parser);
8412
- reference = parse_target(parser, reference, &operator, NULL);
9136
+ reference = parse_target(parser, reference);
8413
9137
 
8414
9138
  yp_rescue_node_reference_set(rescue, reference);
8415
9139
  break;
@@ -8426,10 +9150,12 @@ parse_rescues(yp_parser_t *parser, yp_begin_node_t *parent_node) {
8426
9150
  }
8427
9151
 
8428
9152
  if (!match_any_type_p(parser, 3, YP_TOKEN_KEYWORD_ELSE, YP_TOKEN_KEYWORD_ENSURE, YP_TOKEN_KEYWORD_END)) {
9153
+ yp_accepts_block_stack_push(parser, true);
8429
9154
  yp_statements_node_t *statements = parse_statements(parser, YP_CONTEXT_RESCUE);
8430
9155
  if (statements) {
8431
9156
  yp_rescue_node_statements_set(rescue, statements);
8432
9157
  }
9158
+ yp_accepts_block_stack_pop(parser);
8433
9159
  accept_any(parser, 2, YP_TOKEN_NEWLINE, YP_TOKEN_SEMICOLON);
8434
9160
  }
8435
9161
 
@@ -8446,7 +9172,7 @@ parse_rescues(yp_parser_t *parser, yp_begin_node_t *parent_node) {
8446
9172
  // since we won't know the end until we've found all consequent
8447
9173
  // clauses. This sets the end location on all rescues once we know it
8448
9174
  if (current) {
8449
- const char *end_to_set = current->base.location.end;
9175
+ const uint8_t *end_to_set = current->base.location.end;
8450
9176
  current = parent_node->rescue_clause;
8451
9177
  while (current) {
8452
9178
  current->base.location.end = end_to_set;
@@ -8460,7 +9186,9 @@ parse_rescues(yp_parser_t *parser, yp_begin_node_t *parent_node) {
8460
9186
 
8461
9187
  yp_statements_node_t *else_statements = NULL;
8462
9188
  if (!match_any_type_p(parser, 2, YP_TOKEN_KEYWORD_END, YP_TOKEN_KEYWORD_ENSURE)) {
9189
+ yp_accepts_block_stack_push(parser, true);
8463
9190
  else_statements = parse_statements(parser, YP_CONTEXT_RESCUE_ELSE);
9191
+ yp_accepts_block_stack_pop(parser);
8464
9192
  accept_any(parser, 2, YP_TOKEN_NEWLINE, YP_TOKEN_SEMICOLON);
8465
9193
  }
8466
9194
 
@@ -8474,7 +9202,9 @@ parse_rescues(yp_parser_t *parser, yp_begin_node_t *parent_node) {
8474
9202
 
8475
9203
  yp_statements_node_t *ensure_statements = NULL;
8476
9204
  if (!match_type_p(parser, YP_TOKEN_KEYWORD_END)) {
9205
+ yp_accepts_block_stack_push(parser, true);
8477
9206
  ensure_statements = parse_statements(parser, YP_CONTEXT_ENSURE);
9207
+ yp_accepts_block_stack_pop(parser);
8478
9208
  accept_any(parser, 2, YP_TOKEN_NEWLINE, YP_TOKEN_SEMICOLON);
8479
9209
  }
8480
9210
 
@@ -8499,7 +9229,7 @@ parse_rescues_as_begin(yp_parser_t *parser, yp_statements_node_t *statements) {
8499
9229
  // All nodes within a begin node are optional, so we look
8500
9230
  // for the earliest possible node that we can use to set
8501
9231
  // the BeginNode's start location
8502
- const char * start = begin_node->base.location.start;
9232
+ const uint8_t *start = begin_node->base.location.start;
8503
9233
  if (begin_node->statements) {
8504
9234
  start = begin_node->statements->base.location.start;
8505
9235
  } else if (begin_node->rescue_clause) {
@@ -8584,7 +9314,9 @@ parse_block(yp_parser_t *parser) {
8584
9314
  } else {
8585
9315
  if (!match_type_p(parser, YP_TOKEN_KEYWORD_END)) {
8586
9316
  if (!match_any_type_p(parser, 3, YP_TOKEN_KEYWORD_RESCUE, YP_TOKEN_KEYWORD_ELSE, YP_TOKEN_KEYWORD_ENSURE)) {
9317
+ yp_accepts_block_stack_push(parser, true);
8587
9318
  statements = (yp_node_t *) parse_statements(parser, YP_CONTEXT_BLOCK_KEYWORDS);
9319
+ yp_accepts_block_stack_pop(parser);
8588
9320
  }
8589
9321
 
8590
9322
  if (match_any_type_p(parser, 2, YP_TOKEN_KEYWORD_RESCUE, YP_TOKEN_KEYWORD_ENSURE)) {
@@ -8961,14 +9693,10 @@ parse_string_part(yp_parser_t *parser) {
8961
9693
 
8962
9694
  static yp_node_t *
8963
9695
  parse_symbol(yp_parser_t *parser, yp_lex_mode_t *lex_mode, yp_lex_state_t next_state) {
8964
- bool lex_string = lex_mode->mode == YP_LEX_STRING;
8965
- bool can_be_interpolated = lex_string && lex_mode->as.string.interpolation;
8966
9696
  yp_token_t opening = parser->previous;
8967
9697
 
8968
- if (!lex_string) {
8969
- if (next_state != YP_LEX_STATE_NONE) {
8970
- lex_state_set(parser, next_state);
8971
- }
9698
+ if (lex_mode->mode != YP_LEX_STRING) {
9699
+ if (next_state != YP_LEX_STATE_NONE) lex_state_set(parser, next_state);
8972
9700
  yp_token_t symbol;
8973
9701
 
8974
9702
  switch (parser->current.type) {
@@ -8998,37 +9726,44 @@ parse_symbol(yp_parser_t *parser, yp_lex_mode_t *lex_mode, yp_lex_state_t next_s
8998
9726
  return (yp_node_t *) yp_symbol_node_create_and_unescape(parser, &opening, &symbol, &closing, YP_UNESCAPE_ALL);
8999
9727
  }
9000
9728
 
9001
- if (can_be_interpolated) {
9002
- // Create a node_list first. We'll use this to check if it should be an InterpolatedSymbolNode
9003
- // or a SymbolNode
9729
+ if (lex_mode->as.string.interpolation) {
9730
+ // If we have the end of the symbol, then we can return an empty symbol.
9731
+ if (match_type_p(parser, YP_TOKEN_STRING_END)) {
9732
+ if (next_state != YP_LEX_STATE_NONE) lex_state_set(parser, next_state);
9733
+ parser_lex(parser);
9734
+
9735
+ yp_token_t content = not_provided(parser);
9736
+ yp_token_t closing = parser->previous;
9737
+ return (yp_node_t *) yp_symbol_node_create_and_unescape(parser, &opening, &content, &closing, YP_UNESCAPE_NONE);
9738
+ }
9739
+
9740
+ // Now we can parse the first part of the symbol.
9741
+ yp_node_t *part = parse_string_part(parser);
9742
+
9743
+ // If we got a string part, then it's possible that we could transform
9744
+ // what looks like an interpolated symbol into a regular symbol.
9745
+ if (part && YP_NODE_TYPE_P(part, YP_NODE_STRING_NODE) && match_any_type_p(parser, 2, YP_TOKEN_STRING_END, YP_TOKEN_EOF)) {
9746
+ if (next_state != YP_LEX_STATE_NONE) lex_state_set(parser, next_state);
9747
+ parser_lex(parser);
9748
+
9749
+ return (yp_node_t *) yp_string_node_to_symbol_node(parser, (yp_string_node_t *) part, &opening, &parser->previous);
9750
+ }
9751
+
9752
+ // Create a node_list first. We'll use this to check if it should be an
9753
+ // InterpolatedSymbolNode or a SymbolNode.
9004
9754
  yp_node_list_t node_list = YP_EMPTY_NODE_LIST;
9755
+ if (part) yp_node_list_append(&node_list, part);
9005
9756
 
9006
9757
  while (!match_any_type_p(parser, 2, YP_TOKEN_STRING_END, YP_TOKEN_EOF)) {
9007
- yp_node_t *part = parse_string_part(parser);
9008
- if (part != NULL) {
9758
+ if ((part = parse_string_part(parser)) != NULL) {
9009
9759
  yp_node_list_append(&node_list, part);
9010
9760
  }
9011
9761
  }
9012
9762
 
9013
- yp_node_t *res;
9014
- // If the only element on the node_list is a StringNode, we know this is a SymbolNode
9015
- // and not an InterpolatedSymbolNode
9016
- if (node_list.size == 1 && YP_NODE_TYPE_P(node_list.nodes[0], YP_NODE_STRING_NODE)) {
9017
- res = (yp_node_t *)yp_string_node_to_symbol_node(parser, (yp_string_node_t *)node_list.nodes[0]);
9018
- free(node_list.nodes);
9019
- }
9020
- else {
9021
- yp_interpolated_symbol_node_t *interpolated = yp_interpolated_symbol_node_create(parser, &opening, &node_list, &opening);
9022
- yp_interpolated_symbol_node_closing_set(interpolated, &parser->current);
9023
- res = (yp_node_t *) interpolated;
9024
- }
9025
-
9026
- if (next_state != YP_LEX_STATE_NONE) {
9027
- lex_state_set(parser, next_state);
9028
- }
9763
+ if (next_state != YP_LEX_STATE_NONE) lex_state_set(parser, next_state);
9029
9764
  expect(parser, YP_TOKEN_STRING_END, "Expected a closing delimiter for an interpolated symbol.");
9030
9765
 
9031
- return res;
9766
+ return (yp_node_t *) yp_interpolated_symbol_node_create(parser, &opening, &node_list, &parser->previous);
9032
9767
  }
9033
9768
 
9034
9769
  yp_token_t content;
@@ -9162,19 +9897,22 @@ parse_heredoc_common_whitespace(yp_parser_t *parser, yp_node_list_t *nodes) {
9162
9897
  yp_node_t *node = nodes->nodes[index];
9163
9898
 
9164
9899
  if (!YP_NODE_TYPE_P(node, YP_NODE_STRING_NODE)) continue;
9165
- yp_location_t *content_loc = &((yp_string_node_t *) node)->content_loc;
9900
+ const yp_location_t *content_loc = &((yp_string_node_t *) node)->content_loc;
9166
9901
 
9167
9902
  // If the previous node wasn't a string node, we don't want to trim
9168
9903
  // whitespace. This could happen after an interpolated expression or
9169
9904
  // variable.
9170
9905
  if (index == 0 || YP_NODE_TYPE_P(nodes->nodes[index - 1], YP_NODE_STRING_NODE)) {
9171
9906
  int cur_whitespace;
9172
- const char *cur_char = content_loc->start;
9907
+ const uint8_t *cur_char = content_loc->start;
9173
9908
 
9174
9909
  while (cur_char && cur_char < content_loc->end) {
9175
- // Any empty newlines aren't included in the minimum whitespace calculation
9176
- while (cur_char < content_loc->end && *cur_char == '\n') cur_char++;
9177
- while (cur_char + 1 < content_loc->end && *cur_char == '\r' && cur_char[1] == '\n') cur_char += 2;
9910
+ // Any empty newlines aren't included in the minimum whitespace
9911
+ // calculation.
9912
+ size_t eol_length;
9913
+ while ((eol_length = match_eol_at(parser, cur_char))) {
9914
+ cur_char += eol_length;
9915
+ }
9178
9916
 
9179
9917
  if (cur_char == content_loc->end) break;
9180
9918
 
@@ -9189,11 +9927,12 @@ parse_heredoc_common_whitespace(yp_parser_t *parser, yp_node_list_t *nodes) {
9189
9927
  cur_char++;
9190
9928
  }
9191
9929
 
9192
- // If we hit a newline, then we have encountered a line that contains
9193
- // only whitespace, and it shouldn't be considered in the calculation of
9194
- // common leading whitespace.
9195
- if (*cur_char == '\n') {
9196
- cur_char++;
9930
+ // If we hit a newline, then we have encountered a line that
9931
+ // contains only whitespace, and it shouldn't be considered in
9932
+ // the calculation of common leading whitespace.
9933
+ eol_length = match_eol_at(parser, cur_char);
9934
+ if (eol_length) {
9935
+ cur_char += eol_length;
9197
9936
  continue;
9198
9937
  }
9199
9938
 
@@ -9256,15 +9995,15 @@ parse_heredoc_dedent(yp_parser_t *parser, yp_node_t *node, yp_heredoc_quote_t qu
9256
9995
  // destination to move bytes into. We'll also use it for bounds checking
9257
9996
  // since we don't require that these strings be null terminated.
9258
9997
  size_t dest_length = yp_string_length(string);
9259
- char *source_start = string->source;
9998
+ uint8_t *source_start = (uint8_t *) string->source;
9260
9999
 
9261
- const char *source_cursor = source_start;
9262
- const char *source_end = source_cursor + dest_length;
10000
+ const uint8_t *source_cursor = source_start;
10001
+ const uint8_t *source_end = source_cursor + dest_length;
9263
10002
 
9264
10003
  // We're going to move bytes backward in the string when we get leading
9265
10004
  // whitespace, so we'll maintain a pointer to the current position in the
9266
10005
  // string that we're writing to.
9267
- char *dest_cursor = source_start;
10006
+ uint8_t *dest_cursor = source_start;
9268
10007
 
9269
10008
  while (source_cursor < source_end) {
9270
10009
  // If we need to dedent the next element within the heredoc or the next
@@ -9291,7 +10030,7 @@ parse_heredoc_dedent(yp_parser_t *parser, yp_node_t *node, yp_heredoc_quote_t qu
9291
10030
 
9292
10031
  // At this point we have dedented all that we need to, so we need to find
9293
10032
  // the next newline.
9294
- const char *breakpoint = next_newline(source_cursor, source_end - source_cursor);
10033
+ const uint8_t *breakpoint = next_newline(source_cursor, source_end - source_cursor);
9295
10034
 
9296
10035
  if (breakpoint == NULL) {
9297
10036
  // If there isn't another newline, then we can just move the rest of the
@@ -9314,7 +10053,7 @@ parse_heredoc_dedent(yp_parser_t *parser, yp_node_t *node, yp_heredoc_quote_t qu
9314
10053
  yp_node_destroy(parser, node);
9315
10054
  } else {
9316
10055
  string->length = dest_length;
9317
- yp_unescape_manipulate_string(parser, string, (quote == YP_HEREDOC_QUOTE_SINGLE) ? YP_UNESCAPE_MINIMAL : YP_UNESCAPE_ALL, &parser->error_list);
10056
+ yp_unescape_manipulate_string(parser, string, (quote == YP_HEREDOC_QUOTE_SINGLE) ? YP_UNESCAPE_MINIMAL : YP_UNESCAPE_ALL);
9318
10057
  nodes->nodes[write_index++] = node;
9319
10058
  }
9320
10059
 
@@ -9503,7 +10242,7 @@ parse_pattern_hash(yp_parser_t *parser, yp_node_t *first_assoc) {
9503
10242
  yp_node_t *key = ((yp_assoc_node_t *) first_assoc)->key;
9504
10243
 
9505
10244
  if (YP_NODE_TYPE_P(key, YP_NODE_SYMBOL_NODE)) {
9506
- yp_location_t *value_loc = &((yp_symbol_node_t *) key)->value_loc;
10245
+ const yp_location_t *value_loc = &((yp_symbol_node_t *) key)->value_loc;
9507
10246
  yp_parser_local_add_location(parser, value_loc->start, value_loc->end);
9508
10247
  }
9509
10248
  }
@@ -9531,7 +10270,7 @@ parse_pattern_hash(yp_parser_t *parser, yp_node_t *first_assoc) {
9531
10270
  if (!match_any_type_p(parser, 7, YP_TOKEN_COMMA, YP_TOKEN_KEYWORD_THEN, YP_TOKEN_BRACE_RIGHT, YP_TOKEN_BRACKET_RIGHT, YP_TOKEN_PARENTHESIS_RIGHT, YP_TOKEN_NEWLINE, YP_TOKEN_SEMICOLON)) {
9532
10271
  value = parse_pattern(parser, false, "Expected a pattern expression after the key.");
9533
10272
  } else {
9534
- yp_location_t *value_loc = &((yp_symbol_node_t *) key)->value_loc;
10273
+ const yp_location_t *value_loc = &((yp_symbol_node_t *) key)->value_loc;
9535
10274
  yp_parser_local_add_location(parser, value_loc->start, value_loc->end);
9536
10275
  }
9537
10276
 
@@ -10071,10 +10810,8 @@ parse_expression_prefix(yp_parser_t *parser, yp_binding_power_t binding_power) {
10071
10810
  }
10072
10811
  case YP_TOKEN_PARENTHESIS_LEFT:
10073
10812
  case YP_TOKEN_PARENTHESIS_LEFT_PARENTHESES: {
10074
- yp_token_type_t current_token_type = parser->current.type;
10813
+ yp_token_t opening = parser->current;
10075
10814
  parser_lex(parser);
10076
-
10077
- yp_token_t opening = parser->previous;
10078
10815
  while (accept_any(parser, 2, YP_TOKEN_SEMICOLON, YP_TOKEN_NEWLINE));
10079
10816
 
10080
10817
  // If this is the end of the file or we match a right parenthesis, then
@@ -10093,7 +10830,7 @@ parse_expression_prefix(yp_parser_t *parser, yp_binding_power_t binding_power) {
10093
10830
  // If we hit a right parenthesis, then we're done parsing the parentheses
10094
10831
  // node, and we can check which kind of node we should return.
10095
10832
  if (match_type_p(parser, YP_TOKEN_PARENTHESIS_RIGHT)) {
10096
- if (current_token_type == YP_TOKEN_PARENTHESIS_LEFT_PARENTHESES) {
10833
+ if (opening.type == YP_TOKEN_PARENTHESIS_LEFT_PARENTHESES) {
10097
10834
  lex_state_set(parser, YP_LEX_STATE_ENDARG);
10098
10835
  }
10099
10836
  parser_lex(parser);
@@ -10111,6 +10848,8 @@ parse_expression_prefix(yp_parser_t *parser, yp_binding_power_t binding_power) {
10111
10848
 
10112
10849
  if (multi_statement->lparen_loc.start == NULL) {
10113
10850
  multi_write = (yp_multi_write_node_t *) statement;
10851
+ multi_write->base.location.start = lparen_loc.start;
10852
+ multi_write->base.location.end = rparen_loc.end;
10114
10853
  multi_write->lparen_loc = lparen_loc;
10115
10854
  multi_write->rparen_loc = rparen_loc;
10116
10855
  } else {
@@ -10193,7 +10932,7 @@ parse_expression_prefix(yp_parser_t *parser, yp_binding_power_t binding_power) {
10193
10932
 
10194
10933
  yp_token_t closing = not_provided(parser);
10195
10934
 
10196
- return (yp_node_t *) yp_string_node_create_and_unescape(parser, &opening, &content, &closing, YP_UNESCAPE_ALL);
10935
+ return (yp_node_t *) yp_char_literal_node_create_and_unescape(parser, &opening, &content, &closing, YP_UNESCAPE_ALL);
10197
10936
  }
10198
10937
  case YP_TOKEN_CLASS_VARIABLE: {
10199
10938
  parser_lex(parser);
@@ -10213,7 +10952,7 @@ parse_expression_prefix(yp_parser_t *parser, yp_binding_power_t binding_power) {
10213
10952
  // fact a method call, not a constant read.
10214
10953
  if (
10215
10954
  match_type_p(parser, YP_TOKEN_PARENTHESIS_LEFT) ||
10216
- (binding_power <= YP_BINDING_POWER_ASSIGNMENT && (token_begins_expression_p(parser->current.type) || match_any_type_p(parser, 2, YP_TOKEN_USTAR, YP_TOKEN_USTAR_STAR))) ||
10955
+ (binding_power <= YP_BINDING_POWER_ASSIGNMENT && (token_begins_expression_p(parser->current.type) || match_any_type_p(parser, 3, YP_TOKEN_UAMPERSAND, YP_TOKEN_USTAR, YP_TOKEN_USTAR_STAR))) ||
10217
10956
  (yp_accepts_block_stack_p(parser) && match_any_type_p(parser, 2, YP_TOKEN_KEYWORD_DO, YP_TOKEN_BRACE_LEFT))
10218
10957
  ) {
10219
10958
  yp_arguments_t arguments = YP_EMPTY_ARGUMENTS;
@@ -10336,7 +11075,7 @@ parse_expression_prefix(yp_parser_t *parser, yp_binding_power_t binding_power) {
10336
11075
  // can still be a method call if it is followed by arguments or
10337
11076
  // a block, so we need to check for that here.
10338
11077
  if (
10339
- (binding_power <= YP_BINDING_POWER_ASSIGNMENT && (token_begins_expression_p(parser->current.type) || match_any_type_p(parser, 2, YP_TOKEN_USTAR, YP_TOKEN_USTAR_STAR))) ||
11078
+ (binding_power <= YP_BINDING_POWER_ASSIGNMENT && (token_begins_expression_p(parser->current.type) || match_any_type_p(parser, 3, YP_TOKEN_UAMPERSAND, YP_TOKEN_USTAR, YP_TOKEN_USTAR_STAR))) ||
10340
11079
  (yp_accepts_block_stack_p(parser) && match_any_type_p(parser, 2, YP_TOKEN_KEYWORD_DO, YP_TOKEN_BRACE_LEFT))
10341
11080
  ) {
10342
11081
  yp_arguments_t arguments = YP_EMPTY_ARGUMENTS;
@@ -10738,7 +11477,9 @@ parse_expression_prefix(yp_parser_t *parser, yp_binding_power_t binding_power) {
10738
11477
 
10739
11478
  yp_node_t *statements = NULL;
10740
11479
  if (!match_any_type_p(parser, 3, YP_TOKEN_KEYWORD_RESCUE, YP_TOKEN_KEYWORD_ENSURE, YP_TOKEN_KEYWORD_END)) {
11480
+ yp_accepts_block_stack_push(parser, true);
10741
11481
  statements = (yp_node_t *) parse_statements(parser, YP_CONTEXT_SCLASS);
11482
+ yp_accepts_block_stack_pop(parser);
10742
11483
  }
10743
11484
 
10744
11485
  if (match_any_type_p(parser, 2, YP_TOKEN_KEYWORD_RESCUE, YP_TOKEN_KEYWORD_ENSURE)) {
@@ -10754,7 +11495,12 @@ parse_expression_prefix(yp_parser_t *parser, yp_binding_power_t binding_power) {
10754
11495
  return (yp_node_t *) yp_singleton_class_node_create(parser, &locals, &class_keyword, &operator, expression, statements, &parser->previous);
10755
11496
  }
10756
11497
 
10757
- yp_node_t *name = parse_expression(parser, YP_BINDING_POWER_INDEX, "Expected to find a class name after `class`.");
11498
+ yp_node_t *constant_path = parse_expression(parser, YP_BINDING_POWER_INDEX, "Expected to find a class name after `class`.");
11499
+ yp_token_t name = parser->previous;
11500
+ if (name.type != YP_TOKEN_CONSTANT) {
11501
+ yp_diagnostic_list_append(&parser->error_list, name.start, name.end, "Expected a constant name after `class`.");
11502
+ }
11503
+
10758
11504
  yp_token_t inheritance_operator;
10759
11505
  yp_node_t *superclass;
10760
11506
 
@@ -10795,7 +11541,7 @@ parse_expression_prefix(yp_parser_t *parser, yp_binding_power_t binding_power) {
10795
11541
  yp_constant_id_list_t locals = parser->current_scope->locals;
10796
11542
  yp_parser_scope_pop(parser);
10797
11543
  yp_do_loop_stack_pop(parser);
10798
- return (yp_node_t *) yp_class_node_create(parser, &locals, &class_keyword, name, &inheritance_operator, superclass, statements, &parser->previous);
11544
+ return (yp_node_t *) yp_class_node_create(parser, &locals, &class_keyword, constant_path, &name, &inheritance_operator, superclass, statements, &parser->previous);
10799
11545
  }
10800
11546
  case YP_TOKEN_KEYWORD_DEF: {
10801
11547
  yp_token_t def_keyword = parser->current;
@@ -10954,6 +11700,12 @@ parse_expression_prefix(yp_parser_t *parser, yp_binding_power_t binding_power) {
10954
11700
  break;
10955
11701
  }
10956
11702
  case YP_CASE_PARAMETER: {
11703
+ // If we're about to lex a label, we need to add the label
11704
+ // state to make sure the next newline is ignored.
11705
+ if (parser->current.type == YP_TOKEN_LABEL) {
11706
+ lex_state_set(parser, parser->lex_state | YP_LEX_STATE_LABEL);
11707
+ }
11708
+
10957
11709
  lparen = not_provided(parser);
10958
11710
  rparen = not_provided(parser);
10959
11711
  params = parse_parameters(parser, YP_BINDING_POWER_DEFINED, false, false, true);
@@ -11008,7 +11760,9 @@ parse_expression_prefix(yp_parser_t *parser, yp_binding_power_t binding_power) {
11008
11760
  yp_do_loop_stack_push(parser, false);
11009
11761
 
11010
11762
  if (!match_any_type_p(parser, 3, YP_TOKEN_KEYWORD_RESCUE, YP_TOKEN_KEYWORD_ENSURE, YP_TOKEN_KEYWORD_END)) {
11763
+ yp_accepts_block_stack_push(parser, true);
11011
11764
  statements = (yp_node_t *) parse_statements(parser, YP_CONTEXT_DEF);
11765
+ yp_accepts_block_stack_pop(parser);
11012
11766
  }
11013
11767
 
11014
11768
  if (match_any_type_p(parser, 2, YP_TOKEN_KEYWORD_RESCUE, YP_TOKEN_KEYWORD_ENSURE)) {
@@ -11183,13 +11937,14 @@ parse_expression_prefix(yp_parser_t *parser, yp_binding_power_t binding_power) {
11183
11937
  parser_lex(parser);
11184
11938
 
11185
11939
  yp_token_t module_keyword = parser->previous;
11186
- yp_node_t *name = parse_expression(parser, YP_BINDING_POWER_INDEX, "Expected to find a module name after `module`.");
11940
+ yp_node_t *constant_path = parse_expression(parser, YP_BINDING_POWER_INDEX, "Expected to find a module name after `module`.");
11941
+ yp_token_t name;
11187
11942
 
11188
- // If we can recover from a syntax error that occurred while parsing the
11189
- // name of the module, then we'll handle that here.
11190
- if (YP_NODE_TYPE_P(name, YP_NODE_MISSING_NODE)) {
11191
- yp_token_t end_keyword = (yp_token_t) { .type = YP_TOKEN_MISSING, .start = parser->previous.end, .end = parser->previous.end };
11192
- return (yp_node_t *) yp_module_node_create(parser, NULL, &module_keyword, name, NULL, &end_keyword);
11943
+ // If we can recover from a syntax error that occurred while parsing
11944
+ // the name of the module, then we'll handle that here.
11945
+ if (YP_NODE_TYPE_P(constant_path, YP_NODE_MISSING_NODE)) {
11946
+ yp_token_t missing = (yp_token_t) { .type = YP_TOKEN_MISSING, .start = parser->previous.end, .end = parser->previous.end };
11947
+ return (yp_node_t *) yp_module_node_create(parser, NULL, &module_keyword, constant_path, &missing, NULL, &missing);
11193
11948
  }
11194
11949
 
11195
11950
  while (accept(parser, YP_TOKEN_COLON_COLON)) {
@@ -11198,7 +11953,15 @@ parse_expression_prefix(yp_parser_t *parser, yp_binding_power_t binding_power) {
11198
11953
  expect(parser, YP_TOKEN_CONSTANT, "Expected to find a module name after `::`.");
11199
11954
  yp_node_t *constant = (yp_node_t *) yp_constant_read_node_create(parser, &parser->previous);
11200
11955
 
11201
- name = (yp_node_t *)yp_constant_path_node_create(parser, name, &double_colon, constant);
11956
+ constant_path = (yp_node_t *) yp_constant_path_node_create(parser, constant_path, &double_colon, constant);
11957
+ }
11958
+
11959
+ // Here we retrieve the name of the module. If it wasn't a constant,
11960
+ // then it's possible that `module foo` was passed, which is a
11961
+ // syntax error. We handle that here as well.
11962
+ name = parser->previous;
11963
+ if (name.type != YP_TOKEN_CONSTANT) {
11964
+ yp_diagnostic_list_append(&parser->error_list, name.start, name.end, "Expected to find a module name after `module`.");
11202
11965
  }
11203
11966
 
11204
11967
  yp_parser_scope_push(parser, true);
@@ -11225,7 +11988,7 @@ parse_expression_prefix(yp_parser_t *parser, yp_binding_power_t binding_power) {
11225
11988
  yp_diagnostic_list_append(&parser->error_list, module_keyword.start, module_keyword.end, "Module definition in method body");
11226
11989
  }
11227
11990
 
11228
- return (yp_node_t *) yp_module_node_create(parser, &locals, &module_keyword, name, statements, &parser->previous);
11991
+ return (yp_node_t *) yp_module_node_create(parser, &locals, &module_keyword, constant_path, &name, statements, &parser->previous);
11229
11992
  }
11230
11993
  case YP_TOKEN_KEYWORD_NIL:
11231
11994
  parser_lex(parser);
@@ -11261,12 +12024,7 @@ parse_expression_prefix(yp_parser_t *parser, yp_binding_power_t binding_power) {
11261
12024
  expect(parser, YP_TOKEN_KEYWORD_END, "Expected `end` to close `until` statement.");
11262
12025
  }
11263
12026
 
11264
- yp_until_node_t *until_node = yp_until_node_create(parser, &keyword, predicate, statements, 0);
11265
- if (parser->previous.type == YP_TOKEN_KEYWORD_END) {
11266
- until_node->base.location.end = parser->previous.end;
11267
- }
11268
-
11269
- return (yp_node_t *) until_node;
12027
+ return (yp_node_t *) yp_until_node_create(parser, &keyword, &parser->previous, predicate, statements, 0);
11270
12028
  }
11271
12029
  case YP_TOKEN_KEYWORD_WHILE: {
11272
12030
  yp_do_loop_stack_push(parser, true);
@@ -11287,25 +12045,16 @@ parse_expression_prefix(yp_parser_t *parser, yp_binding_power_t binding_power) {
11287
12045
  expect(parser, YP_TOKEN_KEYWORD_END, "Expected `end` to close `while` statement.");
11288
12046
  }
11289
12047
 
11290
- yp_while_node_t *while_node = yp_while_node_create(parser, &keyword, predicate, statements, 0);
11291
- if (parser->previous.type == YP_TOKEN_KEYWORD_END) {
11292
- while_node->base.location.end = parser->previous.end;
11293
- }
11294
- return (yp_node_t *) while_node;
12048
+ return (yp_node_t *) yp_while_node_create(parser, &keyword, &parser->previous, predicate, statements, 0);
11295
12049
  }
11296
12050
  case YP_TOKEN_PERCENT_LOWER_I: {
11297
12051
  parser_lex(parser);
11298
12052
  yp_array_node_t *array = yp_array_node_create(parser, &parser->previous);
11299
12053
 
11300
12054
  while (!match_any_type_p(parser, 2, YP_TOKEN_STRING_END, YP_TOKEN_EOF)) {
11301
- if (yp_array_node_size(array) == 0) {
11302
- accept(parser, YP_TOKEN_WORDS_SEP);
11303
- } else {
11304
- expect(parser, YP_TOKEN_WORDS_SEP, "Expected a separator for the symbols in a `%i` list.");
11305
- if (match_type_p(parser, YP_TOKEN_STRING_END)) break;
11306
- }
11307
-
12055
+ accept(parser, YP_TOKEN_WORDS_SEP);
11308
12056
  if (match_type_p(parser, YP_TOKEN_STRING_END)) break;
12057
+
11309
12058
  expect(parser, YP_TOKEN_STRING_CONTENT, "Expected a symbol in a `%i` list.");
11310
12059
 
11311
12060
  yp_token_t opening = not_provided(parser);
@@ -11360,6 +12109,19 @@ parse_expression_prefix(yp_parser_t *parser, yp_binding_power_t binding_power) {
11360
12109
  // to the list of child nodes.
11361
12110
  yp_node_t *part = parse_string_part(parser);
11362
12111
  yp_interpolated_symbol_node_append((yp_interpolated_symbol_node_t *) current, part);
12112
+ } else if (YP_NODE_TYPE_P(current, YP_NODE_SYMBOL_NODE)) {
12113
+ // If we hit string content and the current node is a string node,
12114
+ // then we need to convert the current node into an interpolated
12115
+ // string and add the string content to the list of child nodes.
12116
+ yp_token_t opening = not_provided(parser);
12117
+ yp_token_t closing = not_provided(parser);
12118
+ yp_interpolated_symbol_node_t *interpolated =
12119
+ yp_interpolated_symbol_node_create(parser, &opening, NULL, &closing);
12120
+ yp_interpolated_symbol_node_append(interpolated, current);
12121
+
12122
+ yp_node_t *part = parse_string_part(parser);
12123
+ yp_interpolated_symbol_node_append(interpolated, part);
12124
+ current = (yp_node_t *) interpolated;
11363
12125
  } else {
11364
12126
  assert(false && "unreachable");
11365
12127
  }
@@ -11462,12 +12224,9 @@ parse_expression_prefix(yp_parser_t *parser, yp_binding_power_t binding_power) {
11462
12224
  accept(parser, YP_TOKEN_WORDS_SEP);
11463
12225
 
11464
12226
  while (!match_any_type_p(parser, 2, YP_TOKEN_STRING_END, YP_TOKEN_EOF)) {
11465
- if (yp_array_node_size(array) == 0) {
11466
- accept(parser, YP_TOKEN_WORDS_SEP);
11467
- } else {
11468
- expect(parser, YP_TOKEN_WORDS_SEP, "Expected a separator for the strings in a `%w` list.");
11469
- if (match_type_p(parser, YP_TOKEN_STRING_END)) break;
11470
- }
12227
+ accept(parser, YP_TOKEN_WORDS_SEP);
12228
+ if (match_type_p(parser, YP_TOKEN_STRING_END)) break;
12229
+
11471
12230
  expect(parser, YP_TOKEN_STRING_CONTENT, "Expected a string in a `%w` list.");
11472
12231
 
11473
12232
  yp_token_t opening = not_provided(parser);
@@ -11517,6 +12276,19 @@ parse_expression_prefix(yp_parser_t *parser, yp_binding_power_t binding_power) {
11517
12276
  // to the list of child nodes.
11518
12277
  yp_node_t *part = parse_string_part(parser);
11519
12278
  yp_interpolated_string_node_append((yp_interpolated_string_node_t *) current, part);
12279
+ } else if (YP_NODE_TYPE_P(current, YP_NODE_STRING_NODE)) {
12280
+ // If we hit string content and the current node is a string node,
12281
+ // then we need to convert the current node into an interpolated
12282
+ // string and add the string content to the list of child nodes.
12283
+ yp_token_t opening = not_provided(parser);
12284
+ yp_token_t closing = not_provided(parser);
12285
+ yp_interpolated_string_node_t *interpolated =
12286
+ yp_interpolated_string_node_create(parser, &opening, NULL, &closing);
12287
+ yp_interpolated_string_node_append(interpolated, current);
12288
+
12289
+ yp_node_t *part = parse_string_part(parser);
12290
+ yp_interpolated_string_node_append(interpolated, part);
12291
+ current = (yp_node_t *) interpolated;
11520
12292
  } else {
11521
12293
  assert(false && "unreachable");
11522
12294
  }
@@ -11797,30 +12569,32 @@ parse_expression_prefix(yp_parser_t *parser, yp_binding_power_t binding_power) {
11797
12569
  yp_accepts_block_stack_push(parser, true);
11798
12570
  parser_lex(parser);
11799
12571
 
11800
- yp_token_t opening = parser->previous;
12572
+ yp_token_t operator = parser->previous;
11801
12573
  yp_parser_scope_push(parser, false);
11802
12574
  yp_block_parameters_node_t *params;
11803
12575
 
11804
12576
  switch (parser->current.type) {
11805
12577
  case YP_TOKEN_PARENTHESIS_LEFT: {
11806
- yp_token_t block_parameters_opening = parser->current;
12578
+ yp_token_t opening = parser->current;
11807
12579
  parser_lex(parser);
11808
12580
 
11809
12581
  if (match_type_p(parser, YP_TOKEN_PARENTHESIS_RIGHT)) {
11810
- params = yp_block_parameters_node_create(parser, NULL, &block_parameters_opening);
12582
+ params = yp_block_parameters_node_create(parser, NULL, &opening);
11811
12583
  } else {
11812
- params = parse_block_parameters(parser, false, &block_parameters_opening, true);
12584
+ params = parse_block_parameters(parser, false, &opening, true);
11813
12585
  }
11814
12586
 
11815
12587
  accept(parser, YP_TOKEN_NEWLINE);
11816
12588
  expect(parser, YP_TOKEN_PARENTHESIS_RIGHT, "Expected ')' after left parenthesis.");
11817
- yp_block_parameters_node_closing_set(params, &parser->previous);
11818
12589
 
12590
+ yp_block_parameters_node_closing_set(params, &parser->previous);
11819
12591
  break;
11820
12592
  }
11821
12593
  case YP_CASE_PARAMETER: {
12594
+ yp_accepts_block_stack_push(parser, false);
11822
12595
  yp_token_t opening = not_provided(parser);
11823
12596
  params = parse_block_parameters(parser, false, &opening, true);
12597
+ yp_accepts_block_stack_pop(parser);
11824
12598
  break;
11825
12599
  }
11826
12600
  default: {
@@ -11829,19 +12603,25 @@ parse_expression_prefix(yp_parser_t *parser, yp_binding_power_t binding_power) {
11829
12603
  }
11830
12604
  }
11831
12605
 
12606
+ yp_token_t opening;
11832
12607
  yp_node_t *body = NULL;
11833
12608
  parser->lambda_enclosure_nesting = previous_lambda_enclosure_nesting;
11834
12609
 
11835
12610
  if (accept(parser, YP_TOKEN_LAMBDA_BEGIN)) {
12611
+ opening = parser->previous;
12612
+
11836
12613
  if (!accept(parser, YP_TOKEN_BRACE_RIGHT)) {
11837
12614
  body = (yp_node_t *) parse_statements(parser, YP_CONTEXT_LAMBDA_BRACES);
11838
12615
  expect(parser, YP_TOKEN_BRACE_RIGHT, "Expecting '}' to close lambda block.");
11839
12616
  }
11840
12617
  } else {
11841
12618
  expect(parser, YP_TOKEN_KEYWORD_DO, "Expected a 'do' keyword or a '{' to open lambda block.");
12619
+ opening = parser->previous;
11842
12620
 
11843
12621
  if (!match_any_type_p(parser, 3, YP_TOKEN_KEYWORD_END, YP_TOKEN_KEYWORD_RESCUE, YP_TOKEN_KEYWORD_ENSURE)) {
12622
+ yp_accepts_block_stack_push(parser, true);
11844
12623
  body = (yp_node_t *) parse_statements(parser, YP_CONTEXT_LAMBDA_DO_END);
12624
+ yp_accepts_block_stack_pop(parser);
11845
12625
  }
11846
12626
 
11847
12627
  if (match_any_type_p(parser, 2, YP_TOKEN_KEYWORD_RESCUE, YP_TOKEN_KEYWORD_ENSURE)) {
@@ -11855,7 +12635,7 @@ parse_expression_prefix(yp_parser_t *parser, yp_binding_power_t binding_power) {
11855
12635
  yp_constant_id_list_t locals = parser->current_scope->locals;
11856
12636
  yp_parser_scope_pop(parser);
11857
12637
  yp_accepts_block_stack_pop(parser);
11858
- return (yp_node_t *) yp_lambda_node_create(parser, &locals, &opening, params, body, &parser->previous);
12638
+ return (yp_node_t *) yp_lambda_node_create(parser, &locals, &operator, &opening, &parser->previous, params, body);
11859
12639
  }
11860
12640
  case YP_TOKEN_UPLUS: {
11861
12641
  parser_lex(parser);
@@ -12074,7 +12854,7 @@ parse_expression_infix(yp_parser_t *parser, yp_node_t *node, yp_binding_power_t
12074
12854
  case YP_CASE_WRITABLE: {
12075
12855
  parser_lex(parser);
12076
12856
  yp_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, "Expected a value after =.");
12077
- return parse_target(parser, node, &token, value);
12857
+ return parse_write(parser, node, &token, value);
12078
12858
  }
12079
12859
  case YP_NODE_SPLAT_NODE: {
12080
12860
  yp_splat_node_t *splat_node = (yp_splat_node_t *) node;
@@ -12083,7 +12863,7 @@ parse_expression_infix(yp_parser_t *parser, yp_node_t *node, yp_binding_power_t
12083
12863
  case YP_CASE_WRITABLE:
12084
12864
  parser_lex(parser);
12085
12865
  yp_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, "Expected a value after =.");
12086
- return parse_target(parser, (yp_node_t *) splat_node, &token, value);
12866
+ return parse_write(parser, (yp_node_t *) splat_node, &token, value);
12087
12867
  default:
12088
12868
  break;
12089
12869
  }
@@ -12105,19 +12885,57 @@ parse_expression_infix(yp_parser_t *parser, yp_node_t *node, yp_binding_power_t
12105
12885
  case YP_NODE_NUMBERED_REFERENCE_READ_NODE:
12106
12886
  yp_diagnostic_list_append(&parser->error_list, node->location.start, node->location.end, "Can't set variable");
12107
12887
  /* fallthrough */
12108
- case YP_NODE_CLASS_VARIABLE_READ_NODE:
12109
- case YP_NODE_CONSTANT_PATH_NODE:
12110
- case YP_NODE_CONSTANT_READ_NODE:
12111
- case YP_NODE_GLOBAL_VARIABLE_READ_NODE:
12112
- case YP_NODE_INSTANCE_VARIABLE_READ_NODE:
12113
- case YP_NODE_LOCAL_VARIABLE_READ_NODE: {
12888
+ case YP_NODE_GLOBAL_VARIABLE_READ_NODE: {
12889
+ parser_lex(parser);
12890
+
12891
+ yp_node_t *value = parse_expression(parser, binding_power, "Expected a value after &&=");
12892
+ yp_node_t *result = (yp_node_t *) yp_global_variable_and_write_node_create(parser, node, &token, value);
12893
+
12894
+ yp_node_destroy(parser, node);
12895
+ return result;
12896
+ }
12897
+ case YP_NODE_CLASS_VARIABLE_READ_NODE: {
12898
+ parser_lex(parser);
12899
+
12900
+ yp_node_t *value = parse_expression(parser, binding_power, "Expected a value after &&=");
12901
+ yp_node_t *result = (yp_node_t *) yp_class_variable_and_write_node_create(parser, (yp_class_variable_read_node_t *) node, &token, value);
12902
+
12903
+ yp_node_destroy(parser, node);
12904
+ return result;
12905
+ }
12906
+ case YP_NODE_CONSTANT_PATH_NODE: {
12907
+ parser_lex(parser);
12908
+
12909
+ yp_node_t *value = parse_expression(parser, binding_power, "Expected a value after &&=");
12910
+ return (yp_node_t *) yp_constant_path_and_write_node_create(parser, (yp_constant_path_node_t *) node, &token, value);
12911
+ }
12912
+ case YP_NODE_CONSTANT_READ_NODE: {
12913
+ parser_lex(parser);
12914
+
12915
+ yp_node_t *value = parse_expression(parser, binding_power, "Expected a value after &&=");
12916
+ yp_node_t *result = (yp_node_t *) yp_constant_and_write_node_create(parser, node, &token, value);
12917
+
12918
+ yp_node_destroy(parser, node);
12919
+ return result;
12920
+ }
12921
+ case YP_NODE_INSTANCE_VARIABLE_READ_NODE: {
12114
12922
  parser_lex(parser);
12115
12923
 
12116
- yp_token_t operator = not_provided(parser);
12117
- node = parse_target(parser, node, &operator, NULL);
12924
+ yp_node_t *value = parse_expression(parser, binding_power, "Expected a value after &&=");
12925
+ yp_node_t *result = (yp_node_t *) yp_instance_variable_and_write_node_create(parser, (yp_instance_variable_read_node_t *) node, &token, value);
12926
+
12927
+ yp_node_destroy(parser, node);
12928
+ return result;
12929
+ }
12930
+ case YP_NODE_LOCAL_VARIABLE_READ_NODE: {
12931
+ yp_local_variable_read_node_t *cast = (yp_local_variable_read_node_t *) node;
12932
+ parser_lex(parser);
12118
12933
 
12119
12934
  yp_node_t *value = parse_expression(parser, binding_power, "Expected a value after &&=");
12120
- return (yp_node_t *) yp_and_write_node_create(parser, node, &token, value);
12935
+ yp_node_t *result = (yp_node_t *) yp_local_variable_and_write_node_create(parser, node, &token, value, cast->name, cast->depth);
12936
+
12937
+ yp_node_destroy(parser, node);
12938
+ return result;
12121
12939
  }
12122
12940
  case YP_NODE_CALL_NODE: {
12123
12941
  yp_call_node_t *call_node = (yp_call_node_t *) node;
@@ -12127,25 +12945,22 @@ parse_expression_infix(yp_parser_t *parser, yp_node_t *node, yp_binding_power_t
12127
12945
  // will transform it into a local variable write.
12128
12946
  if (yp_call_node_variable_call_p(call_node)) {
12129
12947
  yp_location_t message_loc = call_node->message_loc;
12130
- yp_parser_local_add_location(parser, message_loc.start, message_loc.end);
12948
+ yp_constant_id_t constant_id = yp_parser_local_add_location(parser, message_loc.start, message_loc.end);
12131
12949
 
12132
12950
  if (token_is_numbered_parameter(message_loc.start, message_loc.end)) {
12133
12951
  yp_diagnostic_list_append(&parser->error_list, message_loc.start, message_loc.end, "reserved for numbered parameter");
12134
12952
  }
12135
12953
 
12136
12954
  parser_lex(parser);
12137
-
12138
- yp_token_t operator = not_provided(parser);
12139
- node = parse_target(parser, node, &operator, NULL);
12140
-
12141
12955
  yp_node_t *value = parse_expression(parser, binding_power, "Expected a value after &&=");
12142
- return (yp_node_t *) yp_and_write_node_create(parser, node, &token, value);
12956
+ yp_node_t *result = (yp_node_t *) yp_local_variable_and_write_node_create(parser, node, &token, value, constant_id, 0);
12957
+
12958
+ yp_node_destroy(parser, node);
12959
+ return result;
12143
12960
  }
12144
12961
 
12145
12962
  parser_lex(parser);
12146
-
12147
- yp_token_t operator = not_provided(parser);
12148
- node = parse_target(parser, node, &operator, NULL);
12963
+ node = parse_target(parser, node);
12149
12964
 
12150
12965
  yp_node_t *value = parse_expression(parser, binding_power, "Expected a value after &&=");
12151
12966
  return (yp_node_t *) yp_call_operator_and_write_node_create(parser, (yp_call_node_t *) node, &token, value);
@@ -12171,19 +12986,57 @@ parse_expression_infix(yp_parser_t *parser, yp_node_t *node, yp_binding_power_t
12171
12986
  case YP_NODE_NUMBERED_REFERENCE_READ_NODE:
12172
12987
  yp_diagnostic_list_append(&parser->error_list, node->location.start, node->location.end, "Can't set variable");
12173
12988
  /* fallthrough */
12174
- case YP_NODE_CLASS_VARIABLE_READ_NODE:
12175
- case YP_NODE_CONSTANT_PATH_NODE:
12176
- case YP_NODE_CONSTANT_READ_NODE:
12177
- case YP_NODE_GLOBAL_VARIABLE_READ_NODE:
12178
- case YP_NODE_INSTANCE_VARIABLE_READ_NODE:
12179
- case YP_NODE_LOCAL_VARIABLE_READ_NODE: {
12989
+ case YP_NODE_GLOBAL_VARIABLE_READ_NODE: {
12990
+ parser_lex(parser);
12991
+
12992
+ yp_node_t *value = parse_expression(parser, binding_power, "Expected a value after ||=");
12993
+ yp_node_t *result = (yp_node_t *) yp_global_variable_or_write_node_create(parser, node, &token, value);
12994
+
12995
+ yp_node_destroy(parser, node);
12996
+ return result;
12997
+ }
12998
+ case YP_NODE_CLASS_VARIABLE_READ_NODE: {
12999
+ parser_lex(parser);
13000
+
13001
+ yp_node_t *value = parse_expression(parser, binding_power, "Expected a value after ||=");
13002
+ yp_node_t *result = (yp_node_t *) yp_class_variable_or_write_node_create(parser, (yp_class_variable_read_node_t *) node, &token, value);
13003
+
13004
+ yp_node_destroy(parser, node);
13005
+ return result;
13006
+ }
13007
+ case YP_NODE_CONSTANT_PATH_NODE: {
13008
+ parser_lex(parser);
13009
+
13010
+ yp_node_t *value = parse_expression(parser, binding_power, "Expected a value after ||=");
13011
+ return (yp_node_t *) yp_constant_path_or_write_node_create(parser, (yp_constant_path_node_t *) node, &token, value);
13012
+ }
13013
+ case YP_NODE_CONSTANT_READ_NODE: {
13014
+ parser_lex(parser);
13015
+
13016
+ yp_node_t *value = parse_expression(parser, binding_power, "Expected a value after ||=");
13017
+ yp_node_t *result = (yp_node_t *) yp_constant_or_write_node_create(parser, node, &token, value);
13018
+
13019
+ yp_node_destroy(parser, node);
13020
+ return result;
13021
+ }
13022
+ case YP_NODE_INSTANCE_VARIABLE_READ_NODE: {
12180
13023
  parser_lex(parser);
12181
13024
 
12182
- yp_token_t operator = not_provided(parser);
12183
- node = parse_target(parser, node, &operator, NULL);
13025
+ yp_node_t *value = parse_expression(parser, binding_power, "Expected a value after ||=");
13026
+ yp_node_t *result = (yp_node_t *) yp_instance_variable_or_write_node_create(parser, (yp_instance_variable_read_node_t *) node, &token, value);
13027
+
13028
+ yp_node_destroy(parser, node);
13029
+ return result;
13030
+ }
13031
+ case YP_NODE_LOCAL_VARIABLE_READ_NODE: {
13032
+ yp_local_variable_read_node_t *cast = (yp_local_variable_read_node_t *) node;
13033
+ parser_lex(parser);
12184
13034
 
12185
13035
  yp_node_t *value = parse_expression(parser, binding_power, "Expected a value after ||=");
12186
- return (yp_node_t *) yp_or_write_node_create(parser, node, &token, value);
13036
+ yp_node_t *result = (yp_node_t *) yp_local_variable_or_write_node_create(parser, node, &token, value, cast->name, cast->depth);
13037
+
13038
+ yp_node_destroy(parser, node);
13039
+ return result;
12187
13040
  }
12188
13041
  case YP_NODE_CALL_NODE: {
12189
13042
  yp_call_node_t *call_node = (yp_call_node_t *) node;
@@ -12193,25 +13046,22 @@ parse_expression_infix(yp_parser_t *parser, yp_node_t *node, yp_binding_power_t
12193
13046
  // will transform it into a local variable write.
12194
13047
  if (yp_call_node_variable_call_p(call_node)) {
12195
13048
  yp_location_t message_loc = call_node->message_loc;
12196
- yp_parser_local_add_location(parser, message_loc.start, message_loc.end);
13049
+ yp_constant_id_t constant_id = yp_parser_local_add_location(parser, message_loc.start, message_loc.end);
12197
13050
 
12198
13051
  if (token_is_numbered_parameter(message_loc.start, message_loc.end)) {
12199
13052
  yp_diagnostic_list_append(&parser->error_list, message_loc.start, message_loc.end, "reserved for numbered parameter");
12200
13053
  }
12201
13054
 
12202
13055
  parser_lex(parser);
12203
-
12204
- yp_token_t operator = not_provided(parser);
12205
- node = parse_target(parser, node, &operator, NULL);
12206
-
12207
13056
  yp_node_t *value = parse_expression(parser, binding_power, "Expected a value after ||=");
12208
- return (yp_node_t *) yp_or_write_node_create(parser, node, &token, value);
13057
+ yp_node_t *result = (yp_node_t *) yp_local_variable_or_write_node_create(parser, node, &token, value, constant_id, 0);
13058
+
13059
+ yp_node_destroy(parser, node);
13060
+ return result;
12209
13061
  }
12210
13062
 
12211
13063
  parser_lex(parser);
12212
-
12213
- yp_token_t operator = not_provided(parser);
12214
- node = parse_target(parser, node, &operator, NULL);
13064
+ node = parse_target(parser, node);
12215
13065
 
12216
13066
  yp_node_t *value = parse_expression(parser, binding_power, "Expected a value after ||=");
12217
13067
  return (yp_node_t *) yp_call_operator_or_write_node_create(parser, (yp_call_node_t *) node, &token, value);
@@ -12247,19 +13097,57 @@ parse_expression_infix(yp_parser_t *parser, yp_node_t *node, yp_binding_power_t
12247
13097
  case YP_NODE_NUMBERED_REFERENCE_READ_NODE:
12248
13098
  yp_diagnostic_list_append(&parser->error_list, node->location.start, node->location.end, "Can't set variable");
12249
13099
  /* fallthrough */
12250
- case YP_NODE_CLASS_VARIABLE_READ_NODE:
12251
- case YP_NODE_CONSTANT_PATH_NODE:
12252
- case YP_NODE_CONSTANT_READ_NODE:
12253
- case YP_NODE_GLOBAL_VARIABLE_READ_NODE:
12254
- case YP_NODE_INSTANCE_VARIABLE_READ_NODE:
13100
+ case YP_NODE_GLOBAL_VARIABLE_READ_NODE: {
13101
+ parser_lex(parser);
13102
+
13103
+ yp_node_t *value = parse_expression(parser, binding_power, "Expected a value after the operator.");
13104
+ yp_node_t *result = (yp_node_t *) yp_global_variable_operator_write_node_create(parser, node, &token, value);
13105
+
13106
+ yp_node_destroy(parser, node);
13107
+ return result;
13108
+ }
13109
+ case YP_NODE_CLASS_VARIABLE_READ_NODE: {
13110
+ parser_lex(parser);
13111
+
13112
+ yp_node_t *value = parse_expression(parser, binding_power, "Expected a value after the operator.");
13113
+ yp_node_t *result = (yp_node_t *) yp_class_variable_operator_write_node_create(parser, (yp_class_variable_read_node_t *) node, &token, value);
13114
+
13115
+ yp_node_destroy(parser, node);
13116
+ return result;
13117
+ }
13118
+ case YP_NODE_CONSTANT_PATH_NODE: {
13119
+ parser_lex(parser);
13120
+
13121
+ yp_node_t *value = parse_expression(parser, binding_power, "Expected a value after the operator.");
13122
+ return (yp_node_t *) yp_constant_path_operator_write_node_create(parser, (yp_constant_path_node_t *) node, &token, value);
13123
+ }
13124
+ case YP_NODE_CONSTANT_READ_NODE: {
13125
+ parser_lex(parser);
13126
+
13127
+ yp_node_t *value = parse_expression(parser, binding_power, "Expected a value after the operator.");
13128
+ yp_node_t *result = (yp_node_t *) yp_constant_operator_write_node_create(parser, node, &token, value);
13129
+
13130
+ yp_node_destroy(parser, node);
13131
+ return result;
13132
+ }
13133
+ case YP_NODE_INSTANCE_VARIABLE_READ_NODE: {
13134
+ parser_lex(parser);
13135
+
13136
+ yp_node_t *value = parse_expression(parser, binding_power, "Expected a value after the operator.");
13137
+ yp_node_t *result = (yp_node_t *) yp_instance_variable_operator_write_node_create(parser, (yp_instance_variable_read_node_t *) node, &token, value);
13138
+
13139
+ yp_node_destroy(parser, node);
13140
+ return result;
13141
+ }
12255
13142
  case YP_NODE_LOCAL_VARIABLE_READ_NODE: {
13143
+ yp_local_variable_read_node_t *cast = (yp_local_variable_read_node_t *) node;
12256
13144
  parser_lex(parser);
12257
13145
 
12258
- yp_token_t operator = not_provided(parser);
12259
- node = parse_target(parser, node, &operator, NULL);
13146
+ yp_node_t *value = parse_expression(parser, binding_power, "Expected a value after the operator.");
13147
+ yp_node_t *result = (yp_node_t *) yp_local_variable_operator_write_node_create(parser, node, &token, value, cast->name, cast->depth);
12260
13148
 
12261
- yp_node_t *value = parse_expression(parser, binding_power, "Expected a value after the operator");
12262
- return (yp_node_t *) yp_operator_write_node_create(parser, node, &token, value);
13149
+ yp_node_destroy(parser, node);
13150
+ return result;
12263
13151
  }
12264
13152
  case YP_NODE_CALL_NODE: {
12265
13153
  yp_call_node_t *call_node = (yp_call_node_t *) node;
@@ -12269,25 +13157,23 @@ parse_expression_infix(yp_parser_t *parser, yp_node_t *node, yp_binding_power_t
12269
13157
  // will transform it into a local variable write.
12270
13158
  if (yp_call_node_variable_call_p(call_node)) {
12271
13159
  yp_location_t message_loc = call_node->message_loc;
12272
- yp_parser_local_add_location(parser, message_loc.start, message_loc.end);
13160
+ yp_constant_id_t constant_id = yp_parser_local_add_location(parser, message_loc.start, message_loc.end);
12273
13161
 
12274
13162
  if (token_is_numbered_parameter(message_loc.start, message_loc.end)) {
12275
13163
  yp_diagnostic_list_append(&parser->error_list, message_loc.start, message_loc.end, "reserved for numbered parameter");
12276
13164
  }
12277
13165
 
12278
13166
  parser_lex(parser);
13167
+ yp_node_t *value = parse_expression(parser, binding_power, "Expected a value after the operator.");
13168
+ yp_node_t *result = (yp_node_t *) yp_local_variable_operator_write_node_create(parser, node, &token, value, constant_id, 0);
12279
13169
 
12280
- yp_token_t operator = not_provided(parser);
12281
- node = parse_target(parser, node, &operator, NULL);
12282
-
12283
- yp_node_t *value = parse_expression(parser, binding_power, "Expected a value after &&=");
12284
- return (yp_node_t *) yp_operator_write_node_create(parser, node, &token, value);
13170
+ yp_node_destroy(parser, node);
13171
+ return result;
12285
13172
  }
12286
13173
 
12287
- yp_token_t operator = not_provided(parser);
12288
- node = parse_target(parser, node, &operator, NULL);
12289
-
13174
+ node = parse_target(parser, node);
12290
13175
  parser_lex(parser);
13176
+
12291
13177
  yp_node_t *value = parse_expression(parser, binding_power, "Expected a value after the operator.");
12292
13178
  return (yp_node_t *) yp_call_operator_write_node_create(parser, (yp_call_node_t *) node, &token, value);
12293
13179
  }
@@ -12336,7 +13222,7 @@ parse_expression_infix(yp_parser_t *parser, yp_node_t *node, yp_binding_power_t
12336
13222
  yp_string_list_t named_captures;
12337
13223
  yp_string_list_init(&named_captures);
12338
13224
 
12339
- yp_location_t *content_loc = &((yp_regular_expression_node_t *) node)->content_loc;
13225
+ const yp_location_t *content_loc = &((yp_regular_expression_node_t *) node)->content_loc;
12340
13226
 
12341
13227
  if (yp_regexp_named_capture_group_names(content_loc->start, (size_t) (content_loc->end - content_loc->start), &named_captures, parser->encoding_changed, &parser->encoding)) {
12342
13228
  for (size_t index = 0; index < named_captures.length; index++) {
@@ -12456,7 +13342,7 @@ parse_expression_infix(yp_parser_t *parser, yp_node_t *node, yp_binding_power_t
12456
13342
  yp_statements_node_body_append(statements, node);
12457
13343
 
12458
13344
  yp_node_t *predicate = parse_expression(parser, binding_power, "Expected a predicate after 'until'");
12459
- return (yp_node_t *) yp_until_node_create(parser, &token, predicate, statements, YP_NODE_TYPE_P(node, YP_NODE_BEGIN_NODE) ? YP_LOOP_FLAGS_BEGIN_MODIFIER : 0);
13345
+ return (yp_node_t *) yp_until_node_modifier_create(parser, &token, predicate, statements, YP_NODE_TYPE_P(node, YP_NODE_BEGIN_NODE) ? YP_LOOP_FLAGS_BEGIN_MODIFIER : 0);
12460
13346
  }
12461
13347
  case YP_TOKEN_KEYWORD_WHILE_MODIFIER: {
12462
13348
  parser_lex(parser);
@@ -12464,7 +13350,7 @@ parse_expression_infix(yp_parser_t *parser, yp_node_t *node, yp_binding_power_t
12464
13350
  yp_statements_node_body_append(statements, node);
12465
13351
 
12466
13352
  yp_node_t *predicate = parse_expression(parser, binding_power, "Expected a predicate after 'while'");
12467
- return (yp_node_t *) yp_while_node_create(parser, &token, predicate, statements, YP_NODE_TYPE_P(node, YP_NODE_BEGIN_NODE) ? YP_LOOP_FLAGS_BEGIN_MODIFIER : 0);
13353
+ return (yp_node_t *) yp_while_node_modifier_create(parser, &token, predicate, statements, YP_NODE_TYPE_P(node, YP_NODE_BEGIN_NODE) ? YP_LOOP_FLAGS_BEGIN_MODIFIER : 0);
12468
13354
  }
12469
13355
  case YP_TOKEN_QUESTION_MARK: {
12470
13356
  parser_lex(parser);
@@ -12502,7 +13388,7 @@ parse_expression_infix(yp_parser_t *parser, yp_node_t *node, yp_binding_power_t
12502
13388
 
12503
13389
  if (
12504
13390
  (parser->current.type == YP_TOKEN_PARENTHESIS_LEFT) ||
12505
- (token_begins_expression_p(parser->current.type) || match_any_type_p(parser, 2, YP_TOKEN_USTAR, YP_TOKEN_USTAR_STAR))
13391
+ (token_begins_expression_p(parser->current.type) || match_any_type_p(parser, 3, YP_TOKEN_UAMPERSAND, YP_TOKEN_USTAR, YP_TOKEN_USTAR_STAR))
12506
13392
  ) {
12507
13393
  // If we have a constant immediately following a '::' operator, then
12508
13394
  // this can either be a constant path or a method call, depending on
@@ -12734,7 +13620,7 @@ yp_metadata_read_u32(const char *ptr) {
12734
13620
  // ]*
12735
13621
  // ]
12736
13622
  // ```
12737
- static void
13623
+ void
12738
13624
  yp_parser_metadata(yp_parser_t *parser, const char *metadata) {
12739
13625
  uint32_t filepath_size = yp_metadata_read_u32(metadata);
12740
13626
  metadata += 4;
@@ -12760,7 +13646,7 @@ yp_parser_metadata(yp_parser_t *parser, const char *metadata) {
12760
13646
  uint32_t local_size = yp_metadata_read_u32(metadata);
12761
13647
  metadata += 4;
12762
13648
 
12763
- yp_parser_local_add_location(parser, metadata, metadata + local_size);
13649
+ yp_parser_local_add_location(parser, (const uint8_t *) metadata, (const uint8_t *) (metadata + local_size));
12764
13650
  metadata += local_size;
12765
13651
  }
12766
13652
  }
@@ -12772,7 +13658,9 @@ yp_parser_metadata(yp_parser_t *parser, const char *metadata) {
12772
13658
 
12773
13659
  // Initialize a parser with the given start and end pointers.
12774
13660
  YP_EXPORTED_FUNCTION void
12775
- yp_parser_init(yp_parser_t *parser, const char *source, size_t size, const char *filepath) {
13661
+ yp_parser_init(yp_parser_t *parser, const uint8_t *source, size_t size, const char *filepath) {
13662
+ assert(source != NULL);
13663
+
12776
13664
  // Set filepath to the file that was passed
12777
13665
  if (!filepath) filepath = "";
12778
13666
  yp_string_t filepath_string;
@@ -12841,15 +13729,16 @@ yp_parser_init(yp_parser_t *parser, const char *source, size_t size, const char
12841
13729
  size_t newline_size = size / 22;
12842
13730
  yp_newline_list_init(&parser->newline_list, source, newline_size < 4 ? 4 : newline_size);
12843
13731
 
12844
- assert(source != NULL);
12845
- if (size >= 3 && (unsigned char) source[0] == 0xef && (unsigned char) source[1] == 0xbb && (unsigned char) source[2] == 0xbf) {
12846
- // If the first three bytes of the source are the UTF-8 BOM, then we'll skip
12847
- // over them.
13732
+ // Skip past the UTF-8 BOM if it exists.
13733
+ if (size >= 3 && source[0] == 0xef && source[1] == 0xbb && source[2] == 0xbf) {
12848
13734
  parser->current.end += 3;
12849
- } else if (size >= 2 && source[0] == '#' && source[1] == '!') {
12850
- // If the first two bytes of the source are a shebang, then we'll indicate
12851
- // that the encoding comment is at the end of the shebang.
12852
- const char *encoding_comment_start = next_newline(source, (ptrdiff_t) size);
13735
+ parser->encoding_comment_start += 3;
13736
+ }
13737
+
13738
+ // If the first two bytes of the source are a shebang, then we'll indicate
13739
+ // that the encoding comment is at the end of the shebang.
13740
+ if (peek(parser) == '#' && peek_offset(parser, 1) == '!') {
13741
+ const uint8_t *encoding_comment_start = next_newline(source, (ptrdiff_t) size);
12853
13742
  if (encoding_comment_start) {
12854
13743
  parser->encoding_comment_start = encoding_comment_start + 1;
12855
13744
  }
@@ -12921,7 +13810,7 @@ yp_serialize(yp_parser_t *parser, yp_node_t *node, yp_buffer_t *buffer) {
12921
13810
  // Parse and serialize the AST represented by the given source to the given
12922
13811
  // buffer.
12923
13812
  YP_EXPORTED_FUNCTION void
12924
- yp_parse_serialize(const char *source, size_t size, yp_buffer_t *buffer, const char *metadata) {
13813
+ yp_parse_serialize(const uint8_t *source, size_t size, yp_buffer_t *buffer, const char *metadata) {
12925
13814
  yp_parser_t parser;
12926
13815
  yp_parser_init(&parser, source, size, NULL);
12927
13816
  if (metadata) yp_parser_metadata(&parser, metadata);