yarp 0.9.0 → 0.10.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (54) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +15 -1
  3. data/Makefile +5 -1
  4. data/config.yml +156 -125
  5. data/docs/encoding.md +5 -5
  6. data/docs/serialization.md +2 -2
  7. data/ext/yarp/api_node.c +142 -98
  8. data/ext/yarp/extension.c +21 -7
  9. data/ext/yarp/extension.h +1 -1
  10. data/include/yarp/ast.h +327 -18
  11. data/include/yarp/defines.h +2 -1
  12. data/include/yarp/diagnostic.h +3 -3
  13. data/include/yarp/enc/yp_encoding.h +10 -10
  14. data/include/yarp/parser.h +19 -19
  15. data/include/yarp/regexp.h +1 -1
  16. data/include/yarp/unescape.h +4 -4
  17. data/include/yarp/util/yp_buffer.h +3 -0
  18. data/include/yarp/util/yp_char.h +16 -16
  19. data/include/yarp/util/yp_constant_pool.h +2 -2
  20. data/include/yarp/util/yp_newline_list.h +5 -5
  21. data/include/yarp/util/yp_string.h +4 -4
  22. data/include/yarp/util/yp_string_list.h +0 -3
  23. data/include/yarp/util/yp_strpbrk.h +1 -1
  24. data/include/yarp/version.h +2 -2
  25. data/include/yarp.h +5 -4
  26. data/lib/yarp/desugar_visitor.rb +59 -122
  27. data/lib/yarp/node.rb +230 -240
  28. data/lib/yarp/serialize.rb +16 -16
  29. data/lib/yarp.rb +5 -5
  30. data/src/diagnostic.c +1 -1
  31. data/src/enc/yp_big5.c +15 -42
  32. data/src/enc/yp_euc_jp.c +16 -43
  33. data/src/enc/yp_gbk.c +19 -46
  34. data/src/enc/yp_shift_jis.c +16 -43
  35. data/src/enc/yp_tables.c +36 -38
  36. data/src/enc/yp_unicode.c +20 -25
  37. data/src/enc/yp_windows_31j.c +16 -43
  38. data/src/node.c +1271 -899
  39. data/src/prettyprint.c +87 -48
  40. data/src/regexp.c +21 -21
  41. data/src/serialize.c +28 -15
  42. data/src/unescape.c +151 -121
  43. data/src/util/yp_buffer.c +7 -2
  44. data/src/util/yp_char.c +34 -34
  45. data/src/util/yp_constant_pool.c +4 -4
  46. data/src/util/yp_memchr.c +1 -1
  47. data/src/util/yp_newline_list.c +5 -4
  48. data/src/util/yp_string.c +22 -20
  49. data/src/util/yp_string_list.c +0 -6
  50. data/src/util/yp_strncasecmp.c +3 -6
  51. data/src/util/yp_strpbrk.c +8 -8
  52. data/src/yarp.c +355 -216
  53. data/yarp.gemspec +1 -1
  54. metadata +2 -2
data/src/yarp.c CHANGED
@@ -161,14 +161,18 @@ debug_token(yp_token_t * token) {
161
161
 
162
162
  #endif
163
163
 
164
+ /* Macros for min/max. */
165
+ #define MIN(a,b) (((a)<(b))?(a):(b))
166
+ #define MAX(a,b) (((a)>(b))?(a):(b))
167
+
164
168
  /******************************************************************************/
165
169
  /* Lex mode manipulations */
166
170
  /******************************************************************************/
167
171
 
168
172
  // Returns the incrementor character that should be used to increment the
169
173
  // nesting count if one is possible.
170
- static inline char
171
- lex_mode_incrementor(const char start) {
174
+ static inline uint8_t
175
+ lex_mode_incrementor(const uint8_t start) {
172
176
  switch (start) {
173
177
  case '(':
174
178
  case '[':
@@ -182,8 +186,8 @@ lex_mode_incrementor(const char start) {
182
186
 
183
187
  // Returns the matching character that should be used to terminate a list
184
188
  // beginning with the given character.
185
- static inline char
186
- lex_mode_terminator(const char start) {
189
+ static inline uint8_t
190
+ lex_mode_terminator(const uint8_t start) {
187
191
  switch (start) {
188
192
  case '(':
189
193
  return ')';
@@ -221,9 +225,9 @@ lex_mode_push(yp_parser_t *parser, yp_lex_mode_t lex_mode) {
221
225
 
222
226
  // Push on a new list lex mode.
223
227
  static inline bool
224
- lex_mode_push_list(yp_parser_t *parser, bool interpolation, char delimiter) {
225
- char incrementor = lex_mode_incrementor(delimiter);
226
- char terminator = lex_mode_terminator(delimiter);
228
+ lex_mode_push_list(yp_parser_t *parser, bool interpolation, uint8_t delimiter) {
229
+ uint8_t incrementor = lex_mode_incrementor(delimiter);
230
+ uint8_t terminator = lex_mode_terminator(delimiter);
227
231
 
228
232
  yp_lex_mode_t lex_mode = {
229
233
  .mode = YP_LEX_LIST,
@@ -237,7 +241,7 @@ lex_mode_push_list(yp_parser_t *parser, bool interpolation, char delimiter) {
237
241
 
238
242
  // These are the places where we need to split up the content of the list.
239
243
  // We'll use strpbrk to find the first of these characters.
240
- char *breakpoints = lex_mode.as.list.breakpoints;
244
+ uint8_t *breakpoints = lex_mode.as.list.breakpoints;
241
245
  memcpy(breakpoints, "\\ \t\f\r\v\n\0\0\0", sizeof(lex_mode.as.list.breakpoints));
242
246
 
243
247
  // Now we'll add the terminator to the list of breakpoints.
@@ -260,7 +264,7 @@ lex_mode_push_list(yp_parser_t *parser, bool interpolation, char delimiter) {
260
264
 
261
265
  // Push on a new regexp lex mode.
262
266
  static inline bool
263
- lex_mode_push_regexp(yp_parser_t *parser, char incrementor, char terminator) {
267
+ lex_mode_push_regexp(yp_parser_t *parser, uint8_t incrementor, uint8_t terminator) {
264
268
  yp_lex_mode_t lex_mode = {
265
269
  .mode = YP_LEX_REGEXP,
266
270
  .as.regexp = {
@@ -273,7 +277,7 @@ lex_mode_push_regexp(yp_parser_t *parser, char incrementor, char terminator) {
273
277
  // These are the places where we need to split up the content of the
274
278
  // regular expression. We'll use strpbrk to find the first of these
275
279
  // characters.
276
- char *breakpoints = lex_mode.as.regexp.breakpoints;
280
+ uint8_t *breakpoints = lex_mode.as.regexp.breakpoints;
277
281
  memcpy(breakpoints, "\n\\#\0\0", sizeof(lex_mode.as.regexp.breakpoints));
278
282
 
279
283
  // First we'll add the terminator.
@@ -289,7 +293,7 @@ lex_mode_push_regexp(yp_parser_t *parser, char incrementor, char terminator) {
289
293
 
290
294
  // Push on a new string lex mode.
291
295
  static inline bool
292
- lex_mode_push_string(yp_parser_t *parser, bool interpolation, bool label_allowed, char incrementor, char terminator) {
296
+ lex_mode_push_string(yp_parser_t *parser, bool interpolation, bool label_allowed, uint8_t incrementor, uint8_t terminator) {
293
297
  yp_lex_mode_t lex_mode = {
294
298
  .mode = YP_LEX_STRING,
295
299
  .as.string = {
@@ -303,7 +307,7 @@ lex_mode_push_string(yp_parser_t *parser, bool interpolation, bool label_allowed
303
307
 
304
308
  // These are the places where we need to split up the content of the
305
309
  // string. We'll use strpbrk to find the first of these characters.
306
- char *breakpoints = lex_mode.as.string.breakpoints;
310
+ uint8_t *breakpoints = lex_mode.as.string.breakpoints;
307
311
  memcpy(breakpoints, "\n\\\0\0\0", sizeof(lex_mode.as.string.breakpoints));
308
312
 
309
313
  // Now add in the terminator.
@@ -380,6 +384,9 @@ lex_state_arg_p(yp_parser_t *parser) {
380
384
 
381
385
  static inline bool
382
386
  lex_state_spcarg_p(yp_parser_t *parser, bool space_seen) {
387
+ if (parser->current.end >= parser->end) {
388
+ return false;
389
+ }
383
390
  return lex_state_arg_p(parser) && space_seen && !yp_char_is_whitespace(*parser->current.end);
384
391
  }
385
392
 
@@ -420,7 +427,7 @@ debug_lex_state_set(yp_parser_t *parser, yp_lex_state_t state, char const * call
420
427
 
421
428
  // Retrieve the constant pool id for the given location.
422
429
  static inline yp_constant_id_t
423
- yp_parser_constant_id_location(yp_parser_t *parser, const char *start, const char *end) {
430
+ yp_parser_constant_id_location(yp_parser_t *parser, const uint8_t *start, const uint8_t *end) {
424
431
  return yp_constant_pool_insert(&parser->constant_pool, start, (size_t) (end - start));
425
432
  }
426
433
 
@@ -606,13 +613,45 @@ yp_scope_node_init(yp_node_t *node, yp_scope_node_t *scope) {
606
613
  /* Node creation functions */
607
614
  /******************************************************************************/
608
615
 
616
+ // Parse the decimal number represented by the range of bytes. returns
617
+ // UINT32_MAX if the number fails to parse. This function assumes that the range
618
+ // of bytes has already been validated to contain only decimal digits.
619
+ static uint32_t
620
+ parse_decimal_number(yp_parser_t *parser, const uint8_t *start, const uint8_t *end) {
621
+ ptrdiff_t diff = end - start;
622
+ assert(diff > 0 && ((unsigned long) diff < SIZE_MAX));
623
+ size_t length = (size_t) diff;
624
+
625
+ char *digits = calloc(length + 1, sizeof(char));
626
+ memcpy(digits, start, length);
627
+ digits[length] = '\0';
628
+
629
+ char *endptr;
630
+ errno = 0;
631
+ unsigned long value = strtoul(digits, &endptr, 10);
632
+
633
+ if ((digits == endptr) || (*endptr != '\0') || (errno == ERANGE)) {
634
+ yp_diagnostic_list_append(&parser->error_list, start, end, "invalid decimal number");
635
+ value = UINT32_MAX;
636
+ }
637
+
638
+ free(digits);
639
+
640
+ if (value > UINT32_MAX) {
641
+ yp_diagnostic_list_append(&parser->error_list, start, end, "invalid decimal number");
642
+ value = UINT32_MAX;
643
+ }
644
+
645
+ return (uint32_t) value;
646
+ }
647
+
609
648
  // Parse out the options for a regular expression.
610
649
  static inline yp_node_flags_t
611
650
  yp_regular_expression_flags_create(const yp_token_t *closing) {
612
651
  yp_node_flags_t flags = 0;
613
652
 
614
653
  if (closing->type == YP_TOKEN_REGEXP_END) {
615
- for (const char *flag = closing->start + 1; flag < closing->end; flag++) {
654
+ for (const uint8_t *flag = closing->start + 1; flag < closing->end; flag++) {
616
655
  switch (*flag) {
617
656
  case 'i': flags |= YP_REGULAR_EXPRESSION_FLAGS_IGNORE_CASE; break;
618
657
  case 'm': flags |= YP_REGULAR_EXPRESSION_FLAGS_MULTI_LINE; break;
@@ -654,7 +693,7 @@ yp_alloc_node(YP_ATTRIBUTE_UNUSED yp_parser_t *parser, size_t size) {
654
693
 
655
694
  // Allocate a new MissingNode node.
656
695
  static yp_missing_node_t *
657
- yp_missing_node_create(yp_parser_t *parser, const char *start, const char *end) {
696
+ yp_missing_node_create(yp_parser_t *parser, const uint8_t *start, const uint8_t *end) {
658
697
  yp_missing_node_t *node = YP_ALLOC_NODE(parser, yp_missing_node_t);
659
698
  *node = (yp_missing_node_t) {{ .type = YP_NODE_MISSING_NODE, .location = { .start = start, .end = end } }};
660
699
  return node;
@@ -923,7 +962,7 @@ yp_array_pattern_node_requireds_append(yp_array_pattern_node_t *node, yp_node_t
923
962
  static yp_assoc_node_t *
924
963
  yp_assoc_node_create(yp_parser_t *parser, yp_node_t *key, const yp_token_t *operator, yp_node_t *value) {
925
964
  yp_assoc_node_t *node = YP_ALLOC_NODE(parser, yp_assoc_node_t);
926
- const char *end;
965
+ const uint8_t *end;
927
966
 
928
967
  if (value != NULL) {
929
968
  end = value->location.end;
@@ -1107,7 +1146,7 @@ static yp_block_parameters_node_t *
1107
1146
  yp_block_parameters_node_create(yp_parser_t *parser, yp_parameters_node_t *parameters, const yp_token_t *opening) {
1108
1147
  yp_block_parameters_node_t *node = YP_ALLOC_NODE(parser, yp_block_parameters_node_t);
1109
1148
 
1110
- const char *start;
1149
+ const uint8_t *start;
1111
1150
  if (opening->type != YP_TOKEN_NOT_PROVIDED) {
1112
1151
  start = opening->start;
1113
1152
  } else if (parameters != NULL) {
@@ -1116,7 +1155,7 @@ yp_block_parameters_node_create(yp_parser_t *parser, yp_parameters_node_t *param
1116
1155
  start = NULL;
1117
1156
  }
1118
1157
 
1119
- const char *end;
1158
+ const uint8_t *end;
1120
1159
  if (parameters != NULL) {
1121
1160
  end = parameters->base.location.end;
1122
1161
  } else if (opening->type != YP_TOKEN_NOT_PROVIDED) {
@@ -1237,8 +1276,8 @@ static yp_call_node_t *
1237
1276
  yp_call_node_binary_create(yp_parser_t *parser, yp_node_t *receiver, yp_token_t *operator, yp_node_t *argument) {
1238
1277
  yp_call_node_t *node = yp_call_node_create(parser);
1239
1278
 
1240
- node->base.location.start = receiver->location.start;
1241
- node->base.location.end = argument->location.end;
1279
+ node->base.location.start = MIN(receiver->location.start, argument->location.start);
1280
+ node->base.location.end = MAX(receiver->location.end, argument->location.end);
1242
1281
 
1243
1282
  node->receiver = receiver;
1244
1283
  node->message_loc = YP_OPTIONAL_LOCATION_TOKEN_VALUE(operator);
@@ -1434,7 +1473,7 @@ yp_call_operator_write_node_create(yp_parser_t *parser, yp_call_node_t *target,
1434
1473
  .target = target,
1435
1474
  .operator_loc = YP_LOCATION_TOKEN_VALUE(operator),
1436
1475
  .value = value,
1437
- .operator_id = yp_parser_constant_id_location(parser, operator->start, operator->end - 1)
1476
+ .operator = yp_parser_constant_id_location(parser, operator->start, operator->end - 1)
1438
1477
  };
1439
1478
 
1440
1479
  return node;
@@ -1555,8 +1594,7 @@ yp_class_node_create(yp_parser_t *parser, yp_constant_id_list_t *locals, const y
1555
1594
 
1556
1595
  // Allocate and initialize a new ClassVariableAndWriteNode node.
1557
1596
  static yp_class_variable_and_write_node_t *
1558
- yp_class_variable_and_write_node_create(yp_parser_t *parser, yp_node_t *target, const yp_token_t *operator, yp_node_t *value) {
1559
- assert(YP_NODE_TYPE_P(target, YP_NODE_CLASS_VARIABLE_READ_NODE));
1597
+ yp_class_variable_and_write_node_create(yp_parser_t *parser, yp_class_variable_read_node_t *target, const yp_token_t *operator, yp_node_t *value) {
1560
1598
  assert(operator->type == YP_TOKEN_AMPERSAND_AMPERSAND_EQUAL);
1561
1599
  yp_class_variable_and_write_node_t *node = YP_ALLOC_NODE(parser, yp_class_variable_and_write_node_t);
1562
1600
 
@@ -1564,11 +1602,12 @@ yp_class_variable_and_write_node_create(yp_parser_t *parser, yp_node_t *target,
1564
1602
  {
1565
1603
  .type = YP_NODE_CLASS_VARIABLE_AND_WRITE_NODE,
1566
1604
  .location = {
1567
- .start = target->location.start,
1605
+ .start = target->base.location.start,
1568
1606
  .end = value->location.end
1569
1607
  }
1570
1608
  },
1571
- .name_loc = target->location,
1609
+ .name = target->name,
1610
+ .name_loc = target->base.location,
1572
1611
  .operator_loc = YP_LOCATION_TOKEN_VALUE(operator),
1573
1612
  .value = value
1574
1613
  };
@@ -1578,18 +1617,19 @@ yp_class_variable_and_write_node_create(yp_parser_t *parser, yp_node_t *target,
1578
1617
 
1579
1618
  // Allocate and initialize a new ClassVariableOperatorWriteNode node.
1580
1619
  static yp_class_variable_operator_write_node_t *
1581
- yp_class_variable_operator_write_node_create(yp_parser_t *parser, yp_node_t *target, const yp_token_t *operator, yp_node_t *value) {
1620
+ yp_class_variable_operator_write_node_create(yp_parser_t *parser, yp_class_variable_read_node_t *target, const yp_token_t *operator, yp_node_t *value) {
1582
1621
  yp_class_variable_operator_write_node_t *node = YP_ALLOC_NODE(parser, yp_class_variable_operator_write_node_t);
1583
1622
 
1584
1623
  *node = (yp_class_variable_operator_write_node_t) {
1585
1624
  {
1586
1625
  .type = YP_NODE_CLASS_VARIABLE_OPERATOR_WRITE_NODE,
1587
1626
  .location = {
1588
- .start = target->location.start,
1627
+ .start = target->base.location.start,
1589
1628
  .end = value->location.end
1590
1629
  }
1591
1630
  },
1592
- .name_loc = target->location,
1631
+ .name = target->name,
1632
+ .name_loc = target->base.location,
1593
1633
  .operator_loc = YP_LOCATION_TOKEN_VALUE(operator),
1594
1634
  .value = value,
1595
1635
  .operator = yp_parser_constant_id_location(parser, operator->start, operator->end - 1)
@@ -1600,8 +1640,7 @@ yp_class_variable_operator_write_node_create(yp_parser_t *parser, yp_node_t *tar
1600
1640
 
1601
1641
  // Allocate and initialize a new ClassVariableOrWriteNode node.
1602
1642
  static yp_class_variable_or_write_node_t *
1603
- yp_class_variable_or_write_node_create(yp_parser_t *parser, yp_node_t *target, const yp_token_t *operator, yp_node_t *value) {
1604
- assert(YP_NODE_TYPE_P(target, YP_NODE_CLASS_VARIABLE_READ_NODE));
1643
+ yp_class_variable_or_write_node_create(yp_parser_t *parser, yp_class_variable_read_node_t *target, const yp_token_t *operator, yp_node_t *value) {
1605
1644
  assert(operator->type == YP_TOKEN_PIPE_PIPE_EQUAL);
1606
1645
  yp_class_variable_or_write_node_t *node = YP_ALLOC_NODE(parser, yp_class_variable_or_write_node_t);
1607
1646
 
@@ -1609,11 +1648,12 @@ yp_class_variable_or_write_node_create(yp_parser_t *parser, yp_node_t *target, c
1609
1648
  {
1610
1649
  .type = YP_NODE_CLASS_VARIABLE_OR_WRITE_NODE,
1611
1650
  .location = {
1612
- .start = target->location.start,
1651
+ .start = target->base.location.start,
1613
1652
  .end = value->location.end
1614
1653
  }
1615
1654
  },
1616
- .name_loc = target->location,
1655
+ .name = target->name,
1656
+ .name_loc = target->base.location,
1617
1657
  .operator_loc = YP_LOCATION_TOKEN_VALUE(operator),
1618
1658
  .value = value
1619
1659
  };
@@ -1626,13 +1666,21 @@ static yp_class_variable_read_node_t *
1626
1666
  yp_class_variable_read_node_create(yp_parser_t *parser, const yp_token_t *token) {
1627
1667
  assert(token->type == YP_TOKEN_CLASS_VARIABLE);
1628
1668
  yp_class_variable_read_node_t *node = YP_ALLOC_NODE(parser, yp_class_variable_read_node_t);
1629
- *node = (yp_class_variable_read_node_t) {{ .type = YP_NODE_CLASS_VARIABLE_READ_NODE, .location = YP_LOCATION_TOKEN_VALUE(token) }};
1669
+
1670
+ *node = (yp_class_variable_read_node_t) {
1671
+ {
1672
+ .type = YP_NODE_CLASS_VARIABLE_READ_NODE,
1673
+ .location = YP_LOCATION_TOKEN_VALUE(token)
1674
+ },
1675
+ .name = yp_parser_constant_id_location(parser, token->start, token->end)
1676
+ };
1677
+
1630
1678
  return node;
1631
1679
  }
1632
1680
 
1633
1681
  // Initialize a new ClassVariableWriteNode node from a ClassVariableRead node.
1634
1682
  static yp_class_variable_write_node_t *
1635
- yp_class_variable_read_node_to_class_variable_write_node(yp_parser_t *parser, yp_class_variable_read_node_t *read_node, yp_token_t *operator, yp_node_t *value) {
1683
+ yp_class_variable_write_node_create(yp_parser_t *parser, yp_class_variable_read_node_t *read_node, yp_token_t *operator, yp_node_t *value) {
1636
1684
  yp_class_variable_write_node_t *node = YP_ALLOC_NODE(parser, yp_class_variable_write_node_t);
1637
1685
 
1638
1686
  *node = (yp_class_variable_write_node_t) {
@@ -1643,6 +1691,7 @@ yp_class_variable_read_node_to_class_variable_write_node(yp_parser_t *parser, yp
1643
1691
  .end = value->location.end
1644
1692
  },
1645
1693
  },
1694
+ .name = read_node->name,
1646
1695
  .name_loc = YP_LOCATION_NODE_VALUE((yp_node_t *) read_node),
1647
1696
  .operator_loc = YP_OPTIONAL_LOCATION_TOKEN_VALUE(operator),
1648
1697
  .value = value
@@ -1875,7 +1924,7 @@ yp_def_node_create(
1875
1924
  const yp_token_t *end_keyword
1876
1925
  ) {
1877
1926
  yp_def_node_t *node = YP_ALLOC_NODE(parser, yp_def_node_t);
1878
- const char *end;
1927
+ const uint8_t *end;
1879
1928
 
1880
1929
  if (end_keyword->type == YP_TOKEN_NOT_PROVIDED) {
1881
1930
  end = body->location.end;
@@ -1930,7 +1979,7 @@ yp_defined_node_create(yp_parser_t *parser, const yp_token_t *lparen, yp_node_t
1930
1979
  static yp_else_node_t *
1931
1980
  yp_else_node_create(yp_parser_t *parser, const yp_token_t *else_keyword, yp_statements_node_t *statements, const yp_token_t *end_keyword) {
1932
1981
  yp_else_node_t *node = YP_ALLOC_NODE(parser, yp_else_node_t);
1933
- const char *end = NULL;
1982
+ const uint8_t *end = NULL;
1934
1983
  if ((end_keyword->type == YP_TOKEN_NOT_PROVIDED) && (statements != NULL)) {
1935
1984
  end = statements->base.location.end;
1936
1985
  } else {
@@ -2410,7 +2459,7 @@ yp_if_node_create(yp_parser_t *parser,
2410
2459
  yp_flip_flop(predicate);
2411
2460
  yp_if_node_t *node = YP_ALLOC_NODE(parser, yp_if_node_t);
2412
2461
 
2413
- const char *end;
2462
+ const uint8_t *end;
2414
2463
  if (end_keyword->type != YP_TOKEN_NOT_PROVIDED) {
2415
2464
  end = end_keyword->end;
2416
2465
  } else if (consequent != NULL) {
@@ -2593,7 +2642,7 @@ static yp_in_node_t *
2593
2642
  yp_in_node_create(yp_parser_t *parser, yp_node_t *pattern, yp_statements_node_t *statements, const yp_token_t *in_keyword, const yp_token_t *then_keyword) {
2594
2643
  yp_in_node_t *node = YP_ALLOC_NODE(parser, yp_in_node_t);
2595
2644
 
2596
- const char *end;
2645
+ const uint8_t *end;
2597
2646
  if (statements != NULL) {
2598
2647
  end = statements->base.location.end;
2599
2648
  } else if (then_keyword->type != YP_TOKEN_NOT_PROVIDED) {
@@ -2621,8 +2670,7 @@ yp_in_node_create(yp_parser_t *parser, yp_node_t *pattern, yp_statements_node_t
2621
2670
 
2622
2671
  // Allocate and initialize a new InstanceVariableAndWriteNode node.
2623
2672
  static yp_instance_variable_and_write_node_t *
2624
- yp_instance_variable_and_write_node_create(yp_parser_t *parser, yp_node_t *target, const yp_token_t *operator, yp_node_t *value) {
2625
- assert(YP_NODE_TYPE_P(target, YP_NODE_INSTANCE_VARIABLE_READ_NODE));
2673
+ yp_instance_variable_and_write_node_create(yp_parser_t *parser, yp_instance_variable_read_node_t *target, const yp_token_t *operator, yp_node_t *value) {
2626
2674
  assert(operator->type == YP_TOKEN_AMPERSAND_AMPERSAND_EQUAL);
2627
2675
  yp_instance_variable_and_write_node_t *node = YP_ALLOC_NODE(parser, yp_instance_variable_and_write_node_t);
2628
2676
 
@@ -2630,11 +2678,12 @@ yp_instance_variable_and_write_node_create(yp_parser_t *parser, yp_node_t *targe
2630
2678
  {
2631
2679
  .type = YP_NODE_INSTANCE_VARIABLE_AND_WRITE_NODE,
2632
2680
  .location = {
2633
- .start = target->location.start,
2681
+ .start = target->base.location.start,
2634
2682
  .end = value->location.end
2635
2683
  }
2636
2684
  },
2637
- .name_loc = target->location,
2685
+ .name = target->name,
2686
+ .name_loc = target->base.location,
2638
2687
  .operator_loc = YP_LOCATION_TOKEN_VALUE(operator),
2639
2688
  .value = value
2640
2689
  };
@@ -2644,18 +2693,19 @@ yp_instance_variable_and_write_node_create(yp_parser_t *parser, yp_node_t *targe
2644
2693
 
2645
2694
  // Allocate and initialize a new InstanceVariableOperatorWriteNode node.
2646
2695
  static yp_instance_variable_operator_write_node_t *
2647
- yp_instance_variable_operator_write_node_create(yp_parser_t *parser, yp_node_t *target, const yp_token_t *operator, yp_node_t *value) {
2696
+ yp_instance_variable_operator_write_node_create(yp_parser_t *parser, yp_instance_variable_read_node_t *target, const yp_token_t *operator, yp_node_t *value) {
2648
2697
  yp_instance_variable_operator_write_node_t *node = YP_ALLOC_NODE(parser, yp_instance_variable_operator_write_node_t);
2649
2698
 
2650
2699
  *node = (yp_instance_variable_operator_write_node_t) {
2651
2700
  {
2652
2701
  .type = YP_NODE_INSTANCE_VARIABLE_OPERATOR_WRITE_NODE,
2653
2702
  .location = {
2654
- .start = target->location.start,
2703
+ .start = target->base.location.start,
2655
2704
  .end = value->location.end
2656
2705
  }
2657
2706
  },
2658
- .name_loc = target->location,
2707
+ .name = target->name,
2708
+ .name_loc = target->base.location,
2659
2709
  .operator_loc = YP_LOCATION_TOKEN_VALUE(operator),
2660
2710
  .value = value,
2661
2711
  .operator = yp_parser_constant_id_location(parser, operator->start, operator->end - 1)
@@ -2666,8 +2716,7 @@ yp_instance_variable_operator_write_node_create(yp_parser_t *parser, yp_node_t *
2666
2716
 
2667
2717
  // Allocate and initialize a new InstanceVariableOrWriteNode node.
2668
2718
  static yp_instance_variable_or_write_node_t *
2669
- yp_instance_variable_or_write_node_create(yp_parser_t *parser, yp_node_t *target, const yp_token_t *operator, yp_node_t *value) {
2670
- assert(YP_NODE_TYPE_P(target, YP_NODE_INSTANCE_VARIABLE_READ_NODE));
2719
+ yp_instance_variable_or_write_node_create(yp_parser_t *parser, yp_instance_variable_read_node_t *target, const yp_token_t *operator, yp_node_t *value) {
2671
2720
  assert(operator->type == YP_TOKEN_PIPE_PIPE_EQUAL);
2672
2721
  yp_instance_variable_or_write_node_t *node = YP_ALLOC_NODE(parser, yp_instance_variable_or_write_node_t);
2673
2722
 
@@ -2675,11 +2724,12 @@ yp_instance_variable_or_write_node_create(yp_parser_t *parser, yp_node_t *target
2675
2724
  {
2676
2725
  .type = YP_NODE_INSTANCE_VARIABLE_OR_WRITE_NODE,
2677
2726
  .location = {
2678
- .start = target->location.start,
2727
+ .start = target->base.location.start,
2679
2728
  .end = value->location.end
2680
2729
  }
2681
2730
  },
2682
- .name_loc = target->location,
2731
+ .name = target->name,
2732
+ .name_loc = target->base.location,
2683
2733
  .operator_loc = YP_LOCATION_TOKEN_VALUE(operator),
2684
2734
  .value = value
2685
2735
  };
@@ -2693,9 +2743,13 @@ yp_instance_variable_read_node_create(yp_parser_t *parser, const yp_token_t *tok
2693
2743
  assert(token->type == YP_TOKEN_INSTANCE_VARIABLE);
2694
2744
  yp_instance_variable_read_node_t *node = YP_ALLOC_NODE(parser, yp_instance_variable_read_node_t);
2695
2745
 
2696
- *node = (yp_instance_variable_read_node_t) {{
2697
- .type = YP_NODE_INSTANCE_VARIABLE_READ_NODE, .location = YP_LOCATION_TOKEN_VALUE(token)
2698
- }};
2746
+ *node = (yp_instance_variable_read_node_t) {
2747
+ {
2748
+ .type = YP_NODE_INSTANCE_VARIABLE_READ_NODE,
2749
+ .location = YP_LOCATION_TOKEN_VALUE(token)
2750
+ },
2751
+ .name = yp_parser_constant_id_location(parser, token->start, token->end)
2752
+ };
2699
2753
 
2700
2754
  return node;
2701
2755
  }
@@ -2712,6 +2766,7 @@ yp_instance_variable_write_node_create(yp_parser_t *parser, yp_instance_variable
2712
2766
  .end = value->location.end
2713
2767
  }
2714
2768
  },
2769
+ .name = read_node->name,
2715
2770
  .name_loc = YP_LOCATION_NODE_BASE_VALUE(read_node),
2716
2771
  .operator_loc = YP_OPTIONAL_LOCATION_TOKEN_VALUE(operator),
2717
2772
  .value = value
@@ -2743,8 +2798,13 @@ yp_interpolated_regular_expression_node_create(yp_parser_t *parser, const yp_tok
2743
2798
 
2744
2799
  static inline void
2745
2800
  yp_interpolated_regular_expression_node_append(yp_interpolated_regular_expression_node_t *node, yp_node_t *part) {
2801
+ if (node->base.location.start > part->location.start) {
2802
+ node->base.location.start = part->location.start;
2803
+ }
2804
+ if (node->base.location.end < part->location.end) {
2805
+ node->base.location.end = part->location.end;
2806
+ }
2746
2807
  yp_node_list_append(&node->parts, part);
2747
- node->base.location.end = part->location.end;
2748
2808
  }
2749
2809
 
2750
2810
  static inline void
@@ -2816,10 +2876,11 @@ yp_interpolated_symbol_node_create(yp_parser_t *parser, const yp_token_t *openin
2816
2876
 
2817
2877
  static inline void
2818
2878
  yp_interpolated_symbol_node_append(yp_interpolated_symbol_node_t *node, yp_node_t *part) {
2819
- yp_node_list_append(&node->parts, part);
2820
- if (!node->base.location.start) {
2879
+ if (node->parts.size == 0 && node->opening_loc.start == NULL) {
2821
2880
  node->base.location.start = part->location.start;
2822
2881
  }
2882
+
2883
+ yp_node_list_append(&node->parts, part);
2823
2884
  node->base.location.end = part->location.end;
2824
2885
  }
2825
2886
 
@@ -2959,7 +3020,7 @@ yp_lambda_node_create(
2959
3020
 
2960
3021
  // Allocate and initialize a new LocalVariableAndWriteNode node.
2961
3022
  static yp_local_variable_and_write_node_t *
2962
- yp_local_variable_and_write_node_create(yp_parser_t *parser, yp_node_t *target, const yp_token_t *operator, yp_node_t *value, yp_constant_id_t constant_id, uint32_t depth) {
3023
+ yp_local_variable_and_write_node_create(yp_parser_t *parser, yp_node_t *target, const yp_token_t *operator, yp_node_t *value, yp_constant_id_t name, uint32_t depth) {
2963
3024
  assert(YP_NODE_TYPE_P(target, YP_NODE_LOCAL_VARIABLE_READ_NODE) || YP_NODE_TYPE_P(target, YP_NODE_CALL_NODE));
2964
3025
  assert(operator->type == YP_TOKEN_AMPERSAND_AMPERSAND_EQUAL);
2965
3026
  yp_local_variable_and_write_node_t *node = YP_ALLOC_NODE(parser, yp_local_variable_and_write_node_t);
@@ -2975,7 +3036,7 @@ yp_local_variable_and_write_node_create(yp_parser_t *parser, yp_node_t *target,
2975
3036
  .name_loc = target->location,
2976
3037
  .operator_loc = YP_LOCATION_TOKEN_VALUE(operator),
2977
3038
  .value = value,
2978
- .constant_id = constant_id,
3039
+ .name = name,
2979
3040
  .depth = depth
2980
3041
  };
2981
3042
 
@@ -2984,7 +3045,7 @@ yp_local_variable_and_write_node_create(yp_parser_t *parser, yp_node_t *target,
2984
3045
 
2985
3046
  // Allocate and initialize a new LocalVariableOperatorWriteNode node.
2986
3047
  static yp_local_variable_operator_write_node_t *
2987
- yp_local_variable_operator_write_node_create(yp_parser_t *parser, yp_node_t *target, const yp_token_t *operator, yp_node_t *value, yp_constant_id_t constant_id, uint32_t depth) {
3048
+ yp_local_variable_operator_write_node_create(yp_parser_t *parser, yp_node_t *target, const yp_token_t *operator, yp_node_t *value, yp_constant_id_t name, uint32_t depth) {
2988
3049
  yp_local_variable_operator_write_node_t *node = YP_ALLOC_NODE(parser, yp_local_variable_operator_write_node_t);
2989
3050
 
2990
3051
  *node = (yp_local_variable_operator_write_node_t) {
@@ -2998,8 +3059,8 @@ yp_local_variable_operator_write_node_create(yp_parser_t *parser, yp_node_t *tar
2998
3059
  .name_loc = target->location,
2999
3060
  .operator_loc = YP_LOCATION_TOKEN_VALUE(operator),
3000
3061
  .value = value,
3001
- .constant_id = constant_id,
3002
- .operator_id = yp_parser_constant_id_location(parser, operator->start, operator->end - 1),
3062
+ .name = name,
3063
+ .operator = yp_parser_constant_id_location(parser, operator->start, operator->end - 1),
3003
3064
  .depth = depth
3004
3065
  };
3005
3066
 
@@ -3008,7 +3069,7 @@ yp_local_variable_operator_write_node_create(yp_parser_t *parser, yp_node_t *tar
3008
3069
 
3009
3070
  // Allocate and initialize a new LocalVariableOrWriteNode node.
3010
3071
  static yp_local_variable_or_write_node_t *
3011
- yp_local_variable_or_write_node_create(yp_parser_t *parser, yp_node_t *target, const yp_token_t *operator, yp_node_t *value, yp_constant_id_t constant_id, uint32_t depth) {
3072
+ yp_local_variable_or_write_node_create(yp_parser_t *parser, yp_node_t *target, const yp_token_t *operator, yp_node_t *value, yp_constant_id_t name, uint32_t depth) {
3012
3073
  assert(YP_NODE_TYPE_P(target, YP_NODE_LOCAL_VARIABLE_READ_NODE) || YP_NODE_TYPE_P(target, YP_NODE_CALL_NODE));
3013
3074
  assert(operator->type == YP_TOKEN_PIPE_PIPE_EQUAL);
3014
3075
  yp_local_variable_or_write_node_t *node = YP_ALLOC_NODE(parser, yp_local_variable_or_write_node_t);
@@ -3024,7 +3085,7 @@ yp_local_variable_or_write_node_create(yp_parser_t *parser, yp_node_t *target, c
3024
3085
  .name_loc = target->location,
3025
3086
  .operator_loc = YP_LOCATION_TOKEN_VALUE(operator),
3026
3087
  .value = value,
3027
- .constant_id = constant_id,
3088
+ .name = name,
3028
3089
  .depth = depth
3029
3090
  };
3030
3091
 
@@ -3041,7 +3102,7 @@ yp_local_variable_read_node_create(yp_parser_t *parser, const yp_token_t *name,
3041
3102
  .type = YP_NODE_LOCAL_VARIABLE_READ_NODE,
3042
3103
  .location = YP_LOCATION_TOKEN_VALUE(name)
3043
3104
  },
3044
- .constant_id = yp_parser_constant_id_token(parser, name),
3105
+ .name = yp_parser_constant_id_token(parser, name),
3045
3106
  .depth = depth
3046
3107
  };
3047
3108
 
@@ -3050,7 +3111,7 @@ yp_local_variable_read_node_create(yp_parser_t *parser, const yp_token_t *name,
3050
3111
 
3051
3112
  // Allocate and initialize a new LocalVariableWriteNode node.
3052
3113
  static yp_local_variable_write_node_t *
3053
- yp_local_variable_write_node_create(yp_parser_t *parser, yp_constant_id_t constant_id, uint32_t depth, yp_node_t *value, const yp_location_t *name_loc, const yp_token_t *operator) {
3114
+ yp_local_variable_write_node_create(yp_parser_t *parser, yp_constant_id_t name, uint32_t depth, yp_node_t *value, const yp_location_t *name_loc, const yp_token_t *operator) {
3054
3115
  yp_local_variable_write_node_t *node = YP_ALLOC_NODE(parser, yp_local_variable_write_node_t);
3055
3116
 
3056
3117
  *node = (yp_local_variable_write_node_t) {
@@ -3061,7 +3122,7 @@ yp_local_variable_write_node_create(yp_parser_t *parser, yp_constant_id_t consta
3061
3122
  .end = value->location.end
3062
3123
  }
3063
3124
  },
3064
- .constant_id = constant_id,
3125
+ .name = name,
3065
3126
  .depth = depth,
3066
3127
  .value = value,
3067
3128
  .name_loc = *name_loc,
@@ -3081,7 +3142,7 @@ yp_local_variable_target_node_create(yp_parser_t *parser, const yp_token_t *name
3081
3142
  .type = YP_NODE_LOCAL_VARIABLE_TARGET_NODE,
3082
3143
  .location = YP_LOCATION_TOKEN_VALUE(name)
3083
3144
  },
3084
- .constant_id = yp_parser_constant_id_token(parser, name),
3145
+ .name = yp_parser_constant_id_token(parser, name),
3085
3146
  .depth = 0
3086
3147
  };
3087
3148
 
@@ -3260,7 +3321,8 @@ yp_numbered_reference_read_node_create(yp_parser_t *parser, const yp_token_t *na
3260
3321
  {
3261
3322
  .type = YP_NODE_NUMBERED_REFERENCE_READ_NODE,
3262
3323
  .location = YP_LOCATION_TOKEN_VALUE(name),
3263
- }
3324
+ },
3325
+ .number = parse_decimal_number(parser, name->start + 1, name->end)
3264
3326
  };
3265
3327
 
3266
3328
  return node;
@@ -3279,7 +3341,7 @@ yp_optional_parameter_node_create(yp_parser_t *parser, const yp_token_t *name, c
3279
3341
  .end = value->location.end
3280
3342
  }
3281
3343
  },
3282
- .constant_id = yp_parser_constant_id_token(parser, name),
3344
+ .name = yp_parser_constant_id_token(parser, name),
3283
3345
  .name_loc = YP_LOCATION_TOKEN_VALUE(name),
3284
3346
  .operator_loc = YP_LOCATION_TOKEN_VALUE(operator),
3285
3347
  .value = value
@@ -3576,8 +3638,8 @@ yp_regular_expression_node_create(yp_parser_t *parser, const yp_token_t *opening
3576
3638
  .type = YP_NODE_REGULAR_EXPRESSION_NODE,
3577
3639
  .flags = yp_regular_expression_flags_create(closing),
3578
3640
  .location = {
3579
- .start = opening->start,
3580
- .end = closing->end
3641
+ .start = MIN(opening->start, closing->start),
3642
+ .end = MAX(opening->end, closing->end)
3581
3643
  }
3582
3644
  },
3583
3645
  .opening_loc = YP_LOCATION_TOKEN_VALUE(opening),
@@ -3630,7 +3692,7 @@ yp_required_parameter_node_create(yp_parser_t *parser, const yp_token_t *token)
3630
3692
  .type = YP_NODE_REQUIRED_PARAMETER_NODE,
3631
3693
  .location = YP_LOCATION_TOKEN_VALUE(token)
3632
3694
  },
3633
- .constant_id = yp_parser_constant_id_token(parser, token)
3695
+ .name = yp_parser_constant_id_token(parser, token)
3634
3696
  };
3635
3697
 
3636
3698
  return node;
@@ -3881,19 +3943,21 @@ yp_statements_node_body_length(yp_statements_node_t *node) {
3881
3943
 
3882
3944
  // Set the location of the given StatementsNode.
3883
3945
  static void
3884
- yp_statements_node_location_set(yp_statements_node_t *node, const char *start, const char *end) {
3946
+ yp_statements_node_location_set(yp_statements_node_t *node, const uint8_t *start, const uint8_t *end) {
3885
3947
  node->base.location = (yp_location_t) { .start = start, .end = end };
3886
3948
  }
3887
3949
 
3888
3950
  // Append a new node to the given StatementsNode node's body.
3889
3951
  static void
3890
3952
  yp_statements_node_body_append(yp_statements_node_t *node, yp_node_t *statement) {
3891
- if (yp_statements_node_body_length(node) == 0) {
3953
+ if (yp_statements_node_body_length(node) == 0 || statement->location.start < node->base.location.start) {
3892
3954
  node->base.location.start = statement->location.start;
3893
3955
  }
3956
+ if (statement->location.end > node->base.location.end) {
3957
+ node->base.location.end = statement->location.end;
3958
+ }
3894
3959
 
3895
3960
  yp_node_list_append(&node->body, statement);
3896
- node->base.location.end = statement->location.end;
3897
3961
 
3898
3962
  // Every statement gets marked as a place where a newline can occur.
3899
3963
  statement->flags |= YP_NODE_FLAG_NEWLINE;
@@ -3947,7 +4011,7 @@ yp_super_node_create(yp_parser_t *parser, const yp_token_t *keyword, yp_argument
3947
4011
  assert(keyword->type == YP_TOKEN_KEYWORD_SUPER);
3948
4012
  yp_super_node_t *node = YP_ALLOC_NODE(parser, yp_super_node_t);
3949
4013
 
3950
- const char *end;
4014
+ const uint8_t *end;
3951
4015
  if (arguments->block != NULL) {
3952
4016
  end = arguments->block->base.location.end;
3953
4017
  } else if (arguments->closing_loc.start != NULL) {
@@ -4038,7 +4102,7 @@ yp_symbol_node_label_create(yp_parser_t *parser, const yp_token_t *token) {
4038
4102
  // Check if the given node is a label in a hash.
4039
4103
  static bool
4040
4104
  yp_symbol_node_label_p(yp_node_t *node) {
4041
- const char *end = NULL;
4105
+ const uint8_t *end = NULL;
4042
4106
 
4043
4107
  switch (YP_NODE_TYPE(node)) {
4044
4108
  case YP_NODE_SYMBOL_NODE:
@@ -4146,7 +4210,7 @@ yp_unless_node_create(yp_parser_t *parser, const yp_token_t *keyword, yp_node_t
4146
4210
  yp_flip_flop(predicate);
4147
4211
  yp_unless_node_t *node = YP_ALLOC_NODE(parser, yp_unless_node_t);
4148
4212
 
4149
- const char *end;
4213
+ const uint8_t *end;
4150
4214
  if (statements != NULL) {
4151
4215
  end = statements->base.location.end;
4152
4216
  } else {
@@ -4363,7 +4427,7 @@ static yp_yield_node_t *
4363
4427
  yp_yield_node_create(yp_parser_t *parser, const yp_token_t *keyword, const yp_location_t *lparen_loc, yp_arguments_node_t *arguments, const yp_location_t *rparen_loc) {
4364
4428
  yp_yield_node_t *node = YP_ALLOC_NODE(parser, yp_yield_node_t);
4365
4429
 
4366
- const char *end;
4430
+ const uint8_t *end;
4367
4431
  if (rparen_loc->start != NULL) {
4368
4432
  end = rparen_loc->end;
4369
4433
  } else if (arguments != NULL) {
@@ -4437,7 +4501,7 @@ yp_parser_local_depth(yp_parser_t *parser, yp_token_t *token) {
4437
4501
 
4438
4502
  // Add a local variable from a location to the current scope.
4439
4503
  static yp_constant_id_t
4440
- yp_parser_local_add_location(yp_parser_t *parser, const char *start, const char *end) {
4504
+ yp_parser_local_add_location(yp_parser_t *parser, const uint8_t *start, const uint8_t *end) {
4441
4505
  yp_constant_id_t constant_id = yp_parser_constant_id_location(parser, start, end);
4442
4506
 
4443
4507
  if (!yp_constant_id_list_includes(&parser->current_scope->locals, constant_id)) {
@@ -4486,15 +4550,13 @@ yp_parser_scope_pop(yp_parser_t *parser) {
4486
4550
  // reason we have the encoding_changed boolean to check if we need to go through
4487
4551
  // the function pointer or can just directly use the UTF-8 functions.
4488
4552
  static inline size_t
4489
- char_is_identifier_start(yp_parser_t *parser, const char *c) {
4490
- const unsigned char uc = (unsigned char) *c;
4491
-
4553
+ char_is_identifier_start(yp_parser_t *parser, const uint8_t *b) {
4492
4554
  if (parser->encoding_changed) {
4493
- return parser->encoding.alpha_char(c, parser->end - c) || (uc == '_') || (uc >= 0x80);
4494
- } else if (uc < 0x80) {
4495
- return (yp_encoding_unicode_table[uc] & YP_ENCODING_ALPHABETIC_BIT ? 1 : 0) || (uc == '_');
4555
+ return parser->encoding.alpha_char(b, parser->end - b) || (*b == '_') || (*b >= 0x80);
4556
+ } else if (*b < 0x80) {
4557
+ return (yp_encoding_unicode_table[*b] & YP_ENCODING_ALPHABETIC_BIT ? 1 : 0) || (*b == '_');
4496
4558
  } else {
4497
- return (size_t) (yp_encoding_utf_8_alpha_char(c, parser->end - c) || 1u);
4559
+ return (size_t) (yp_encoding_utf_8_alpha_char(b, parser->end - b) || 1u);
4498
4560
  }
4499
4561
  }
4500
4562
 
@@ -4502,15 +4564,13 @@ char_is_identifier_start(yp_parser_t *parser, const char *c) {
4502
4564
  // the identifiers in a source file once the first character has been found. So
4503
4565
  // it's important that it be as fast as possible.
4504
4566
  static inline size_t
4505
- char_is_identifier(yp_parser_t *parser, const char *c) {
4506
- const unsigned char uc = (unsigned char) *c;
4507
-
4567
+ char_is_identifier(yp_parser_t *parser, const uint8_t *b) {
4508
4568
  if (parser->encoding_changed) {
4509
- return parser->encoding.alnum_char(c, parser->end - c) || (uc == '_') || (uc >= 0x80);
4510
- } else if (uc < 0x80) {
4511
- return (yp_encoding_unicode_table[uc] & YP_ENCODING_ALPHANUMERIC_BIT ? 1 : 0) || (uc == '_');
4569
+ return parser->encoding.alnum_char(b, parser->end - b) || (*b == '_') || (*b >= 0x80);
4570
+ } else if (*b < 0x80) {
4571
+ return (yp_encoding_unicode_table[*b] & YP_ENCODING_ALPHANUMERIC_BIT ? 1 : 0) || (*b == '_');
4512
4572
  } else {
4513
- return (size_t) (yp_encoding_utf_8_alnum_char(c, parser->end - c) || 1u);
4573
+ return (size_t) (yp_encoding_utf_8_alnum_char(b, parser->end - b) || 1u);
4514
4574
  }
4515
4575
  }
4516
4576
 
@@ -4532,15 +4592,15 @@ const unsigned int yp_global_name_punctuation_hash[(0x7e - 0x20 + 31) / 32] = {
4532
4592
  #undef PUNCT
4533
4593
 
4534
4594
  static inline bool
4535
- char_is_global_name_punctuation(const char c) {
4536
- const unsigned int i = (const unsigned int) c;
4595
+ char_is_global_name_punctuation(const uint8_t b) {
4596
+ const unsigned int i = (const unsigned int) b;
4537
4597
  if (i <= 0x20 || 0x7e < i) return false;
4538
4598
 
4539
- return (yp_global_name_punctuation_hash[(i - 0x20) / 32] >> (c % 32)) & 1;
4599
+ return (yp_global_name_punctuation_hash[(i - 0x20) / 32] >> (i % 32)) & 1;
4540
4600
  }
4541
4601
 
4542
4602
  static inline bool
4543
- token_is_numbered_parameter(const char *start, const char *end) {
4603
+ token_is_numbered_parameter(const uint8_t *start, const uint8_t *end) {
4544
4604
  return (end - start == 2) && (start[0] == '_') && (start[1] != '0') && (yp_char_is_decimal_digit(start[1]));
4545
4605
  }
4546
4606
 
@@ -4594,8 +4654,8 @@ yp_do_loop_stack_p(yp_parser_t *parser) {
4594
4654
 
4595
4655
  // Get the next character in the source starting from +cursor+. If that position
4596
4656
  // is beyond the end of the source then return '\0'.
4597
- static inline char
4598
- peek_at(yp_parser_t *parser, const char *cursor) {
4657
+ static inline uint8_t
4658
+ peek_at(yp_parser_t *parser, const uint8_t *cursor) {
4599
4659
  if (cursor < parser->end) {
4600
4660
  return *cursor;
4601
4661
  } else {
@@ -4606,33 +4666,33 @@ peek_at(yp_parser_t *parser, const char *cursor) {
4606
4666
  // Get the next character in the source starting from parser->current.end and
4607
4667
  // adding the given offset. If that position is beyond the end of the source
4608
4668
  // then return '\0'.
4609
- static inline char
4669
+ static inline uint8_t
4610
4670
  peek_offset(yp_parser_t *parser, ptrdiff_t offset) {
4611
4671
  return peek_at(parser, parser->current.end + offset);
4612
4672
  }
4613
4673
 
4614
4674
  // Get the next character in the source starting from parser->current.end. If
4615
4675
  // that position is beyond the end of the source then return '\0'.
4616
- static inline char
4676
+ static inline uint8_t
4617
4677
  peek(yp_parser_t *parser) {
4618
4678
  return peek_at(parser, parser->current.end);
4619
4679
  }
4620
4680
 
4621
4681
  // Get the next string of length len in the source starting from parser->current.end.
4622
4682
  // If the string extends beyond the end of the source, return the empty string ""
4623
- static inline const char*
4683
+ static inline const uint8_t *
4624
4684
  peek_string(yp_parser_t *parser, size_t len) {
4625
4685
  if (parser->current.end + len <= parser->end) {
4626
4686
  return parser->current.end;
4627
4687
  } else {
4628
- return "";
4688
+ return (const uint8_t *) "";
4629
4689
  }
4630
4690
  }
4631
4691
 
4632
4692
  // If the character to be read matches the given value, then returns true and
4633
4693
  // advanced the current pointer.
4634
4694
  static inline bool
4635
- match(yp_parser_t *parser, char value) {
4695
+ match(yp_parser_t *parser, uint8_t value) {
4636
4696
  if (peek(parser) == value) {
4637
4697
  parser->current.end++;
4638
4698
  return true;
@@ -4643,7 +4703,7 @@ match(yp_parser_t *parser, char value) {
4643
4703
  // Return the length of the line ending string starting at +cursor+, or 0 if it
4644
4704
  // is not a line ending. This function is intended to be CRLF/LF agnostic.
4645
4705
  static inline size_t
4646
- match_eol_at(yp_parser_t *parser, const char *cursor) {
4706
+ match_eol_at(yp_parser_t *parser, const uint8_t *cursor) {
4647
4707
  if (peek_at(parser, cursor) == '\n') {
4648
4708
  return 1;
4649
4709
  }
@@ -4670,8 +4730,8 @@ match_eol(yp_parser_t *parser) {
4670
4730
  }
4671
4731
 
4672
4732
  // Skip to the next newline character or NUL byte.
4673
- static inline const char *
4674
- next_newline(const char *cursor, ptrdiff_t length) {
4733
+ static inline const uint8_t *
4734
+ next_newline(const uint8_t *cursor, ptrdiff_t length) {
4675
4735
  assert(length >= 0);
4676
4736
 
4677
4737
  // Note that it's okay for us to use memchr here to look for \n because none
@@ -4682,17 +4742,17 @@ next_newline(const char *cursor, ptrdiff_t length) {
4682
4742
 
4683
4743
  // Find the start of the encoding comment. This is effectively an inlined
4684
4744
  // version of strnstr with some modifications.
4685
- static inline const char *
4686
- parser_lex_encoding_comment_start(yp_parser_t *parser, const char *cursor, ptrdiff_t remaining) {
4745
+ static inline const uint8_t *
4746
+ parser_lex_encoding_comment_start(yp_parser_t *parser, const uint8_t *cursor, ptrdiff_t remaining) {
4687
4747
  assert(remaining >= 0);
4688
4748
  size_t length = (size_t) remaining;
4689
4749
 
4690
4750
  size_t key_length = strlen("coding:");
4691
4751
  if (key_length > length) return NULL;
4692
4752
 
4693
- const char *cursor_limit = cursor + length - key_length + 1;
4753
+ const uint8_t *cursor_limit = cursor + length - key_length + 1;
4694
4754
  while ((cursor = yp_memchr(cursor, 'c', (size_t) (cursor_limit - cursor), parser->encoding_changed, &parser->encoding)) != NULL) {
4695
- if (strncmp(cursor, "coding", key_length - 1) == 0) {
4755
+ if (memcmp(cursor, "coding", key_length - 1) == 0) {
4696
4756
  size_t whitespace_after_coding = yp_strspn_inline_whitespace(cursor + key_length - 1, parser->end - (cursor + key_length - 1));
4697
4757
  size_t cur_pos = key_length + whitespace_after_coding;
4698
4758
 
@@ -4711,13 +4771,13 @@ parser_lex_encoding_comment_start(yp_parser_t *parser, const char *cursor, ptrdi
4711
4771
  // actions are necessary for it here.
4712
4772
  static void
4713
4773
  parser_lex_encoding_comment(yp_parser_t *parser) {
4714
- const char *start = parser->current.start + 1;
4715
- const char *end = next_newline(start, parser->end - start);
4774
+ const uint8_t *start = parser->current.start + 1;
4775
+ const uint8_t *end = next_newline(start, parser->end - start);
4716
4776
  if (end == NULL) end = parser->end;
4717
4777
 
4718
4778
  // These are the patterns we're going to match to find the encoding comment.
4719
4779
  // This is definitely not complete or even really correct.
4720
- const char *encoding_start = parser_lex_encoding_comment_start(parser, start, end - start);
4780
+ const uint8_t *encoding_start = parser_lex_encoding_comment_start(parser, start, end - start);
4721
4781
 
4722
4782
  // If we didn't find anything that matched our patterns, then return. Note
4723
4783
  // that this does a _very_ poor job of actually finding the encoding, and
@@ -4730,7 +4790,7 @@ parser_lex_encoding_comment(yp_parser_t *parser) {
4730
4790
 
4731
4791
  // Now determine the end of the encoding string. This is either the end of
4732
4792
  // the line, the first whitespace character, or a punctuation mark.
4733
- const char *encoding_end = yp_strpbrk(parser, encoding_start, " \t\f\r\v\n;,", end - encoding_start);
4793
+ const uint8_t *encoding_end = yp_strpbrk(parser, encoding_start, (const uint8_t *) " \t\f\r\v\n;,", end - encoding_start);
4734
4794
  encoding_end = encoding_end == NULL ? end : encoding_end;
4735
4795
 
4736
4796
  // Finally, we can determine the width of the encoding string.
@@ -4752,7 +4812,7 @@ parser_lex_encoding_comment(yp_parser_t *parser) {
4752
4812
  // Extensions like utf-8 can contain extra encoding details like,
4753
4813
  // utf-8-dos, utf-8-linux, utf-8-mac. We treat these all as utf-8 should
4754
4814
  // treat any encoding starting utf-8 as utf-8.
4755
- if ((encoding_start + 5 <= parser->end) && (yp_strncasecmp(encoding_start, "utf-8", 5) == 0)) {
4815
+ if ((encoding_start + 5 <= parser->end) && (yp_strncasecmp(encoding_start, (const uint8_t *) "utf-8", 5) == 0)) {
4756
4816
  // We don't need to do anything here because the default encoding is
4757
4817
  // already UTF-8. We'll just return.
4758
4818
  return;
@@ -4761,7 +4821,7 @@ parser_lex_encoding_comment(yp_parser_t *parser) {
4761
4821
  // Next, we're going to loop through each of the encodings that we handle
4762
4822
  // explicitly. If we found one that we understand, we'll use that value.
4763
4823
  #define ENCODING(value, prebuilt) \
4764
- if (width == sizeof(value) - 1 && encoding_start + width <= parser->end && yp_strncasecmp(encoding_start, value, width) == 0) { \
4824
+ if (width == sizeof(value) - 1 && encoding_start + width <= parser->end && yp_strncasecmp(encoding_start, (const uint8_t *) value, width) == 0) { \
4765
4825
  parser->encoding = prebuilt; \
4766
4826
  parser->encoding_changed |= true; \
4767
4827
  if (parser->encoding_changed_callback != NULL) parser->encoding_changed_callback(parser); \
@@ -4901,14 +4961,9 @@ context_push(yp_parser_t *parser, yp_context_t context) {
4901
4961
 
4902
4962
  static void
4903
4963
  context_pop(yp_parser_t *parser) {
4904
- if (parser->current_context->prev == NULL) {
4905
- free(parser->current_context);
4906
- parser->current_context = NULL;
4907
- } else {
4908
- yp_context_node_t *prev = parser->current_context->prev;
4909
- free(parser->current_context);
4910
- parser->current_context = prev;
4911
- }
4964
+ yp_context_node_t *prev = parser->current_context->prev;
4965
+ free(parser->current_context);
4966
+ parser->current_context = prev;
4912
4967
  }
4913
4968
 
4914
4969
  static bool
@@ -4992,7 +5047,8 @@ lex_numeric_prefix(yp_parser_t *parser) {
4992
5047
  // 0d1111 is a decimal number
4993
5048
  case 'd':
4994
5049
  case 'D':
4995
- if (yp_char_is_decimal_digit(*++parser->current.end)) {
5050
+ parser->current.end++;
5051
+ if (yp_char_is_decimal_digit(peek(parser))) {
4996
5052
  parser->current.end += yp_strspn_decimal_number(parser->current.end, parser->end - parser->current.end);
4997
5053
  } else {
4998
5054
  yp_diagnostic_list_append(&parser->error_list, parser->current.start, parser->current.end, "Invalid decimal number.");
@@ -5003,7 +5059,8 @@ lex_numeric_prefix(yp_parser_t *parser) {
5003
5059
  // 0b1111 is a binary number
5004
5060
  case 'b':
5005
5061
  case 'B':
5006
- if (yp_char_is_binary_digit(*++parser->current.end)) {
5062
+ parser->current.end++;
5063
+ if (yp_char_is_binary_digit(peek(parser))) {
5007
5064
  parser->current.end += yp_strspn_binary_number(parser->current.end, parser->end - parser->current.end);
5008
5065
  } else {
5009
5066
  yp_diagnostic_list_append(&parser->error_list, parser->current.start, parser->current.end, "Invalid binary number.");
@@ -5014,7 +5071,8 @@ lex_numeric_prefix(yp_parser_t *parser) {
5014
5071
  // 0o1111 is an octal number
5015
5072
  case 'o':
5016
5073
  case 'O':
5017
- if (yp_char_is_octal_digit(*++parser->current.end)) {
5074
+ parser->current.end++;
5075
+ if (yp_char_is_octal_digit(peek(parser))) {
5018
5076
  parser->current.end += yp_strspn_octal_number(parser->current.end, parser->end - parser->current.end);
5019
5077
  } else {
5020
5078
  yp_diagnostic_list_append(&parser->error_list, parser->current.start, parser->current.end, "Invalid octal number.");
@@ -5038,7 +5096,8 @@ lex_numeric_prefix(yp_parser_t *parser) {
5038
5096
  // 0x1111 is a hexadecimal number
5039
5097
  case 'x':
5040
5098
  case 'X':
5041
- if (yp_char_is_hexadecimal_digit(*++parser->current.end)) {
5099
+ parser->current.end++;
5100
+ if (yp_char_is_hexadecimal_digit(peek(parser))) {
5042
5101
  parser->current.end += yp_strspn_hexadecimal_number(parser->current.end, parser->end - parser->current.end);
5043
5102
  } else {
5044
5103
  yp_diagnostic_list_append(&parser->error_list, parser->current.start, parser->current.end, "Invalid hexadecimal number.");
@@ -5084,7 +5143,7 @@ lex_numeric(yp_parser_t *parser) {
5084
5143
  if (parser->current.end < parser->end) {
5085
5144
  type = lex_numeric_prefix(parser);
5086
5145
 
5087
- const char *end = parser->current.end;
5146
+ const uint8_t *end = parser->current.end;
5088
5147
  yp_token_type_t suffix_type = type;
5089
5148
 
5090
5149
  if (type == YP_TOKEN_INTEGER) {
@@ -5109,8 +5168,8 @@ lex_numeric(yp_parser_t *parser) {
5109
5168
  }
5110
5169
  }
5111
5170
 
5112
- const unsigned char uc = (const unsigned char) peek(parser);
5113
- if (uc != '\0' && (uc >= 0x80 || ((uc >= 'a' && uc <= 'z') || (uc >= 'A' && uc <= 'Z')) || uc == '_')) {
5171
+ const uint8_t b = peek(parser);
5172
+ if (b != '\0' && (b >= 0x80 || ((b >= 'a' && b <= 'z') || (b >= 'A' && b <= 'Z')) || b == '_')) {
5114
5173
  parser->current.end = end;
5115
5174
  } else {
5116
5175
  type = suffix_type;
@@ -5122,6 +5181,11 @@ lex_numeric(yp_parser_t *parser) {
5122
5181
 
5123
5182
  static yp_token_type_t
5124
5183
  lex_global_variable(yp_parser_t *parser) {
5184
+ if (parser->current.end >= parser->end) {
5185
+ yp_diagnostic_list_append(&parser->error_list, parser->current.start, parser->current.end, "Invalid global variable.");
5186
+ return YP_TOKEN_GLOBAL_VARIABLE;
5187
+ }
5188
+
5125
5189
  switch (*parser->current.end) {
5126
5190
  case '~': // $~: match-data
5127
5191
  case '*': // $*: argv
@@ -5210,7 +5274,7 @@ lex_keyword(yp_parser_t *parser, const char *value, yp_lex_state_t state, yp_tok
5210
5274
  yp_lex_state_t last_state = parser->lex_state;
5211
5275
 
5212
5276
  const size_t vlen = strlen(value);
5213
- if (parser->current.start + vlen <= parser->end && strncmp(parser->current.start, value, vlen) == 0) {
5277
+ if (parser->current.start + vlen <= parser->end && memcmp(parser->current.start, value, vlen) == 0) {
5214
5278
  if (parser->lex_state & YP_LEX_STATE_FNAME) {
5215
5279
  lex_state_set(parser, YP_LEX_STATE_ENDFN);
5216
5280
  } else {
@@ -5376,7 +5440,7 @@ current_token_starts_line(yp_parser_t *parser) {
5376
5440
  // this token type.
5377
5441
  //
5378
5442
  static yp_token_type_t
5379
- lex_interpolation(yp_parser_t *parser, const char *pound) {
5443
+ lex_interpolation(yp_parser_t *parser, const uint8_t *pound) {
5380
5444
  // If there is no content following this #, then we're at the end of
5381
5445
  // the string and we can safely return string content.
5382
5446
  if (pound + 1 >= parser->end) {
@@ -5397,7 +5461,7 @@ lex_interpolation(yp_parser_t *parser, const char *pound) {
5397
5461
 
5398
5462
  // If we're looking at a @ and there's another @, then we'll skip past the
5399
5463
  // second @.
5400
- const char *variable = pound + 2;
5464
+ const uint8_t *variable = pound + 2;
5401
5465
  if (*variable == '@' && pound + 3 < parser->end) variable++;
5402
5466
 
5403
5467
  if (char_is_identifier_start(parser, variable)) {
@@ -5433,7 +5497,7 @@ lex_interpolation(yp_parser_t *parser, const char *pound) {
5433
5497
  // This is the character that we're going to check to see if it is the
5434
5498
  // start of an identifier that would indicate that this is a global
5435
5499
  // variable.
5436
- const char *check = pound + 2;
5500
+ const uint8_t *check = pound + 2;
5437
5501
 
5438
5502
  if (pound[2] == '-') {
5439
5503
  if (pound + 3 >= parser->end) {
@@ -5624,7 +5688,7 @@ parser_comment(yp_parser_t *parser, yp_comment_type_t type) {
5624
5688
  static yp_token_type_t
5625
5689
  lex_embdoc(yp_parser_t *parser) {
5626
5690
  // First, lex out the EMBDOC_BEGIN token.
5627
- const char *newline = next_newline(parser->current.end, parser->end - parser->current.end);
5691
+ const uint8_t *newline = next_newline(parser->current.end, parser->end - parser->current.end);
5628
5692
 
5629
5693
  if (newline == NULL) {
5630
5694
  parser->current.end = parser->end;
@@ -5647,9 +5711,9 @@ lex_embdoc(yp_parser_t *parser) {
5647
5711
 
5648
5712
  // If we've hit the end of the embedded documentation then we'll return that
5649
5713
  // token here.
5650
- if (strncmp(parser->current.end, "=end", 4) == 0 &&
5714
+ if (memcmp(parser->current.end, "=end", 4) == 0 &&
5651
5715
  (parser->current.end + 4 == parser->end || yp_char_is_whitespace(parser->current.end[4]))) {
5652
- const char *newline = next_newline(parser->current.end, parser->end - parser->current.end);
5716
+ const uint8_t *newline = next_newline(parser->current.end, parser->end - parser->current.end);
5653
5717
 
5654
5718
  if (newline == NULL) {
5655
5719
  parser->current.end = parser->end;
@@ -5669,7 +5733,7 @@ lex_embdoc(yp_parser_t *parser) {
5669
5733
 
5670
5734
  // Otherwise, we'll parse until the end of the line and return a line of
5671
5735
  // embedded documentation.
5672
- const char *newline = next_newline(parser->current.end, parser->end - parser->current.end);
5736
+ const uint8_t *newline = next_newline(parser->current.end, parser->end - parser->current.end);
5673
5737
 
5674
5738
  if (newline == NULL) {
5675
5739
  parser->current.end = parser->end;
@@ -5819,7 +5883,7 @@ parser_lex(yp_parser_t *parser) {
5819
5883
  LEX(YP_TOKEN_EOF);
5820
5884
 
5821
5885
  case '#': { // comments
5822
- const char *ending = next_newline(parser->current.end, parser->end - parser->current.end);
5886
+ const uint8_t *ending = next_newline(parser->current.end, parser->end - parser->current.end);
5823
5887
 
5824
5888
  parser->current.end = ending == NULL ? parser->end : ending + 1;
5825
5889
  parser->current.type = YP_TOKEN_COMMENT;
@@ -5888,7 +5952,7 @@ parser_lex(yp_parser_t *parser) {
5888
5952
  // (either . or &.) that starts the next line. If there is, then this
5889
5953
  // is going to become an ignored newline and we're going to instead
5890
5954
  // return the call operator.
5891
- const char *next_content = parser->next_start == NULL ? parser->current.end : parser->next_start;
5955
+ const uint8_t *next_content = parser->next_start == NULL ? parser->current.end : parser->next_start;
5892
5956
  next_content += yp_strspn_inline_whitespace(next_content, parser->end - next_content);
5893
5957
 
5894
5958
  if (next_content < parser->end) {
@@ -5899,15 +5963,15 @@ parser_lex(yp_parser_t *parser) {
5899
5963
  // Otherwise we'll return a regular newline.
5900
5964
  if (next_content[0] == '#') {
5901
5965
  // Here we look for a "." or "&." following a "\n".
5902
- const char *following = next_newline(next_content, parser->end - next_content);
5966
+ const uint8_t *following = next_newline(next_content, parser->end - next_content);
5903
5967
 
5904
- while (following && (following < parser->end)) {
5968
+ while (following && (following + 1 < parser->end)) {
5905
5969
  following++;
5906
5970
  following += yp_strspn_inline_whitespace(following, parser->end - following);
5907
5971
 
5908
5972
  // If this is not followed by a comment, then we can break out
5909
5973
  // of this loop.
5910
- if (*following != '#') break;
5974
+ if (peek_at(parser, following) != '#') break;
5911
5975
 
5912
5976
  // If there is a comment, then we need to find the end of the
5913
5977
  // comment and continue searching from there.
@@ -6150,7 +6214,7 @@ parser_lex(yp_parser_t *parser) {
6150
6214
 
6151
6215
  // = => =~ == === =begin
6152
6216
  case '=':
6153
- if (current_token_starts_line(parser) && strncmp(peek_string(parser, 5), "begin", 5) == 0 && yp_char_is_whitespace(peek_offset(parser, 5))) {
6217
+ if (current_token_starts_line(parser) && memcmp(peek_string(parser, 5), "begin", 5) == 0 && yp_char_is_whitespace(peek_offset(parser, 5))) {
6154
6218
  yp_token_type_t type = lex_embdoc(parser);
6155
6219
 
6156
6220
  if (type == YP_TOKEN_EOF) {
@@ -6188,7 +6252,7 @@ parser_lex(yp_parser_t *parser) {
6188
6252
  !lex_state_end_p(parser) &&
6189
6253
  (!lex_state_p(parser, YP_LEX_STATE_ARG_ANY) || lex_state_p(parser, YP_LEX_STATE_LABELED) || space_seen)
6190
6254
  ) {
6191
- const char *end = parser->current.end;
6255
+ const uint8_t *end = parser->current.end;
6192
6256
 
6193
6257
  yp_heredoc_quote_t quote = YP_HEREDOC_QUOTE_NONE;
6194
6258
  yp_heredoc_indent_t indent = YP_HEREDOC_INDENT_NONE;
@@ -6210,7 +6274,7 @@ parser_lex(yp_parser_t *parser) {
6210
6274
  quote = YP_HEREDOC_QUOTE_SINGLE;
6211
6275
  }
6212
6276
 
6213
- const char *ident_start = parser->current.end;
6277
+ const uint8_t *ident_start = parser->current.end;
6214
6278
  size_t width = 0;
6215
6279
 
6216
6280
  if (parser->current.end >= parser->end) {
@@ -6233,7 +6297,7 @@ parser_lex(yp_parser_t *parser) {
6233
6297
  }
6234
6298
 
6235
6299
  size_t ident_length = (size_t) (parser->current.end - ident_start);
6236
- if (quote != YP_HEREDOC_QUOTE_NONE && !match(parser, (char) quote)) {
6300
+ if (quote != YP_HEREDOC_QUOTE_NONE && !match(parser, (uint8_t) quote)) {
6237
6301
  // TODO: handle unterminated heredoc
6238
6302
  }
6239
6303
 
@@ -6249,7 +6313,7 @@ parser_lex(yp_parser_t *parser) {
6249
6313
  });
6250
6314
 
6251
6315
  if (parser->heredoc_end == NULL) {
6252
- const char *body_start = next_newline(parser->current.end, parser->end - parser->current.end);
6316
+ const uint8_t *body_start = next_newline(parser->current.end, parser->end - parser->current.end);
6253
6317
 
6254
6318
  if (body_start == NULL) {
6255
6319
  // If there is no newline after the heredoc identifier, then
@@ -6574,7 +6638,7 @@ parser_lex(yp_parser_t *parser) {
6574
6638
  LEX(YP_TOKEN_COLON_COLON);
6575
6639
  }
6576
6640
 
6577
- if (lex_state_end_p(parser) || yp_char_is_whitespace(*parser->current.end) || peek(parser) == '#') {
6641
+ if (lex_state_end_p(parser) || yp_char_is_whitespace(peek(parser)) || peek(parser) == '#') {
6578
6642
  lex_state_set(parser, YP_LEX_STATE_BEG);
6579
6643
  LEX(YP_TOKEN_COLON);
6580
6644
  }
@@ -6815,7 +6879,7 @@ parser_lex(yp_parser_t *parser) {
6815
6879
  if (
6816
6880
  ((parser->current.end - parser->current.start) == 7) &&
6817
6881
  current_token_starts_line(parser) &&
6818
- (strncmp(parser->current.start, "__END__", 7) == 0) &&
6882
+ (memcmp(parser->current.start, "__END__", 7) == 0) &&
6819
6883
  (parser->current.end == parser->end || match_eol(parser))
6820
6884
  )
6821
6885
  {
@@ -6891,8 +6955,8 @@ parser_lex(yp_parser_t *parser) {
6891
6955
  // Here we'll get a list of the places where strpbrk should break,
6892
6956
  // and then find the first one.
6893
6957
  yp_lex_mode_t *lex_mode = parser->lex_modes.current;
6894
- const char *breakpoints = lex_mode->as.list.breakpoints;
6895
- const char *breakpoint = yp_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end);
6958
+ const uint8_t *breakpoints = lex_mode->as.list.breakpoints;
6959
+ const uint8_t *breakpoint = yp_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end);
6896
6960
 
6897
6961
  while (breakpoint != NULL) {
6898
6962
  // If we hit a null byte, skip directly past it.
@@ -6940,10 +7004,25 @@ parser_lex(yp_parser_t *parser) {
6940
7004
  if (*breakpoint == '\\') {
6941
7005
  yp_unescape_type_t unescape_type = lex_mode->as.list.interpolation ? YP_UNESCAPE_ALL : YP_UNESCAPE_MINIMAL;
6942
7006
  size_t difference = yp_unescape_calculate_difference(parser, breakpoint, unescape_type, false);
7007
+ if (difference == 0) {
7008
+ // we're at the end of the file
7009
+ breakpoint = NULL;
7010
+ continue;
7011
+ }
6943
7012
 
6944
- // If the result is an escaped newline, then we need to
6945
- // track that newline.
6946
- yp_newline_list_check_append(&parser->newline_list, breakpoint + difference - 1);
7013
+ // If the result is an escaped newline ...
7014
+ if (breakpoint[difference - 1] == '\n') {
7015
+ if (parser->heredoc_end) {
7016
+ // ... if we are on the same line as a heredoc, flush the heredoc and
7017
+ // continue parsing after heredoc_end.
7018
+ parser->current.end = breakpoint + difference;
7019
+ parser_flush_heredoc_end(parser);
7020
+ LEX(YP_TOKEN_STRING_CONTENT);
7021
+ } else {
7022
+ // ... else track the newline.
7023
+ yp_newline_list_append(&parser->newline_list, breakpoint + difference - 1);
7024
+ }
7025
+ }
6947
7026
 
6948
7027
  breakpoint = yp_strpbrk(parser, breakpoint + difference, breakpoints, parser->end - (breakpoint + difference));
6949
7028
  continue;
@@ -6998,8 +7077,8 @@ parser_lex(yp_parser_t *parser) {
6998
7077
  // These are the places where we need to split up the content of the
6999
7078
  // regular expression. We'll use strpbrk to find the first of these
7000
7079
  // characters.
7001
- const char *breakpoints = lex_mode->as.regexp.breakpoints;
7002
- const char *breakpoint = yp_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end);
7080
+ const uint8_t *breakpoints = lex_mode->as.regexp.breakpoints;
7081
+ const uint8_t *breakpoint = yp_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end);
7003
7082
 
7004
7083
  while (breakpoint != NULL) {
7005
7084
  // If we hit a null byte, skip directly past it.
@@ -7062,9 +7141,14 @@ parser_lex(yp_parser_t *parser) {
7062
7141
  // and find the next breakpoint.
7063
7142
  if (*breakpoint == '\\') {
7064
7143
  size_t difference = yp_unescape_calculate_difference(parser, breakpoint, YP_UNESCAPE_ALL, false);
7144
+ if (difference == 0) {
7145
+ // we're at the end of the file
7146
+ breakpoint = NULL;
7147
+ continue;
7148
+ }
7065
7149
 
7066
7150
  // If the result is an escaped newline ...
7067
- if (*(breakpoint + difference - 1) == '\n') {
7151
+ if (breakpoint[difference - 1] == '\n') {
7068
7152
  if (parser->heredoc_end) {
7069
7153
  // ... if we are on the same line as a heredoc, flush the heredoc and
7070
7154
  // continue parsing after heredoc_end.
@@ -7126,8 +7210,8 @@ parser_lex(yp_parser_t *parser) {
7126
7210
 
7127
7211
  // These are the places where we need to split up the content of the
7128
7212
  // string. We'll use strpbrk to find the first of these characters.
7129
- const char *breakpoints = parser->lex_modes.current->as.string.breakpoints;
7130
- const char *breakpoint = yp_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end);
7213
+ const uint8_t *breakpoints = parser->lex_modes.current->as.string.breakpoints;
7214
+ const uint8_t *breakpoint = yp_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end);
7131
7215
 
7132
7216
  while (breakpoint != NULL) {
7133
7217
  // If we hit the incrementor, then we'll increment then nesting and
@@ -7212,9 +7296,14 @@ parser_lex(yp_parser_t *parser) {
7212
7296
  // find the next breakpoint.
7213
7297
  yp_unescape_type_t unescape_type = parser->lex_modes.current->as.string.interpolation ? YP_UNESCAPE_ALL : YP_UNESCAPE_MINIMAL;
7214
7298
  size_t difference = yp_unescape_calculate_difference(parser, breakpoint, unescape_type, false);
7299
+ if (difference == 0) {
7300
+ // we're at the end of the file
7301
+ breakpoint = NULL;
7302
+ break;
7303
+ }
7215
7304
 
7216
7305
  // If the result is an escaped newline ...
7217
- if (*(breakpoint + difference - 1) == '\n') {
7306
+ if (breakpoint[difference - 1] == '\n') {
7218
7307
  if (parser->heredoc_end) {
7219
7308
  // ... if we are on the same line as a heredoc, flush the heredoc and
7220
7309
  // continue parsing after heredoc_end.
@@ -7272,18 +7361,18 @@ parser_lex(yp_parser_t *parser) {
7272
7361
 
7273
7362
  // Now let's grab the information about the identifier off of the current
7274
7363
  // lex mode.
7275
- const char *ident_start = parser->lex_modes.current->as.heredoc.ident_start;
7364
+ const uint8_t *ident_start = parser->lex_modes.current->as.heredoc.ident_start;
7276
7365
  size_t ident_length = parser->lex_modes.current->as.heredoc.ident_length;
7277
7366
 
7278
7367
  // If we are immediately following a newline and we have hit the
7279
7368
  // terminator, then we need to return the ending of the heredoc.
7280
7369
  if (current_token_starts_line(parser)) {
7281
- const char *start = parser->current.start;
7370
+ const uint8_t *start = parser->current.start;
7282
7371
  if (parser->lex_modes.current->as.heredoc.indent != YP_HEREDOC_INDENT_NONE) {
7283
7372
  start += yp_strspn_inline_whitespace(start, parser->end - start);
7284
7373
  }
7285
7374
 
7286
- if ((start + ident_length <= parser->end) && (strncmp(start, ident_start, ident_length) == 0)) {
7375
+ if ((start + ident_length <= parser->end) && (memcmp(start, ident_start, ident_length) == 0)) {
7287
7376
  bool matched = true;
7288
7377
  bool at_end = false;
7289
7378
 
@@ -7318,14 +7407,14 @@ parser_lex(yp_parser_t *parser) {
7318
7407
  // Otherwise we'll be parsing string content. These are the places where
7319
7408
  // we need to split up the content of the heredoc. We'll use strpbrk to
7320
7409
  // find the first of these characters.
7321
- char breakpoints[] = "\n\\#";
7410
+ uint8_t breakpoints[] = "\n\\#";
7322
7411
 
7323
7412
  yp_heredoc_quote_t quote = parser->lex_modes.current->as.heredoc.quote;
7324
7413
  if (quote == YP_HEREDOC_QUOTE_SINGLE) {
7325
7414
  breakpoints[2] = '\0';
7326
7415
  }
7327
7416
 
7328
- const char *breakpoint = yp_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end);
7417
+ const uint8_t *breakpoint = yp_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end);
7329
7418
 
7330
7419
  while (breakpoint != NULL) {
7331
7420
  switch (*breakpoint) {
@@ -7342,7 +7431,7 @@ parser_lex(yp_parser_t *parser) {
7342
7431
 
7343
7432
  yp_newline_list_append(&parser->newline_list, breakpoint);
7344
7433
 
7345
- const char *start = breakpoint + 1;
7434
+ const uint8_t *start = breakpoint + 1;
7346
7435
  if (parser->lex_modes.current->as.heredoc.indent != YP_HEREDOC_INDENT_NONE) {
7347
7436
  start += yp_strspn_inline_whitespace(start, parser->end - start);
7348
7437
  }
@@ -7353,7 +7442,7 @@ parser_lex(yp_parser_t *parser) {
7353
7442
  // again and return the end of the heredoc.
7354
7443
  if (
7355
7444
  (start + ident_length <= parser->end) &&
7356
- (strncmp(start, ident_start, ident_length) == 0)
7445
+ (memcmp(start, ident_start, ident_length) == 0)
7357
7446
  ) {
7358
7447
  // Heredoc terminators must be followed by a newline, CRLF, or EOF to be valid.
7359
7448
  if (
@@ -7383,6 +7472,11 @@ parser_lex(yp_parser_t *parser) {
7383
7472
  } else {
7384
7473
  yp_unescape_type_t unescape_type = (quote == YP_HEREDOC_QUOTE_SINGLE) ? YP_UNESCAPE_MINIMAL : YP_UNESCAPE_ALL;
7385
7474
  size_t difference = yp_unescape_calculate_difference(parser, breakpoint, unescape_type, false);
7475
+ if (difference == 0) {
7476
+ // we're at the end of the file
7477
+ breakpoint = NULL;
7478
+ break;
7479
+ }
7386
7480
 
7387
7481
  yp_newline_list_check_append(&parser->newline_list, breakpoint + difference - 1);
7388
7482
 
@@ -7453,6 +7547,17 @@ yp_symbol_node_create_and_unescape(yp_parser_t *parser, const yp_token_t *openin
7453
7547
  return node;
7454
7548
  }
7455
7549
 
7550
+ static yp_string_node_t *
7551
+ yp_char_literal_node_create_and_unescape(yp_parser_t *parser, const yp_token_t *opening, const yp_token_t *content, const yp_token_t *closing, yp_unescape_type_t unescape_type) {
7552
+ yp_string_node_t *node = yp_string_node_create(parser, opening, content, closing);
7553
+
7554
+ assert((content->end - content->start) >= 0);
7555
+ yp_string_shared_init(&node->unescaped, content->start, content->end);
7556
+
7557
+ yp_unescape_manipulate_char_literal(parser, &node->unescaped, unescape_type);
7558
+ return node;
7559
+ }
7560
+
7456
7561
  static yp_string_node_t *
7457
7562
  yp_string_node_create_and_unescape(yp_parser_t *parser, const yp_token_t *opening, const yp_token_t *content, const yp_token_t *closing, yp_unescape_type_t unescape_type) {
7458
7563
  yp_string_node_t *node = yp_string_node_create(parser, opening, content, closing);
@@ -7918,10 +8023,11 @@ parse_target(yp_parser_t *parser, yp_node_t *target) {
7918
8023
  // the previous method name in, and append an =.
7919
8024
  size_t length = yp_string_length(&call->name);
7920
8025
 
7921
- char *name = calloc(length + 2, sizeof(char));
8026
+ uint8_t *name = calloc(length + 1, sizeof(uint8_t));
7922
8027
  if (name == NULL) return NULL;
7923
8028
 
7924
- snprintf(name, length + 2, "%.*s=", (int) length, yp_string_source(&call->name));
8029
+ memcpy(name, yp_string_source(&call->name), length);
8030
+ name[length] = '=';
7925
8031
 
7926
8032
  // Now switch the name to the new string.
7927
8033
  yp_string_free(&call->name);
@@ -7962,7 +8068,7 @@ parse_write(yp_parser_t *parser, yp_node_t *target, yp_token_t *operator, yp_nod
7962
8068
  case YP_NODE_MISSING_NODE:
7963
8069
  return target;
7964
8070
  case YP_NODE_CLASS_VARIABLE_READ_NODE: {
7965
- yp_class_variable_write_node_t *write_node = yp_class_variable_read_node_to_class_variable_write_node(parser, (yp_class_variable_read_node_t *) target, operator, value);
8071
+ yp_class_variable_write_node_t *write_node = yp_class_variable_write_node_create(parser, (yp_class_variable_read_node_t *) target, operator, value);
7966
8072
  yp_node_destroy(parser, target);
7967
8073
  return (yp_node_t *) write_node;
7968
8074
  }
@@ -7987,7 +8093,7 @@ parse_write(yp_parser_t *parser, yp_node_t *target, yp_token_t *operator, yp_nod
7987
8093
  case YP_NODE_LOCAL_VARIABLE_READ_NODE: {
7988
8094
  yp_local_variable_read_node_t *local_read = (yp_local_variable_read_node_t *) target;
7989
8095
 
7990
- yp_constant_id_t constant_id = local_read->constant_id;
8096
+ yp_constant_id_t constant_id = local_read->name;
7991
8097
  uint32_t depth = local_read->depth;
7992
8098
 
7993
8099
  yp_location_t name_loc = target->location;
@@ -8075,10 +8181,11 @@ parse_write(yp_parser_t *parser, yp_node_t *target, yp_token_t *operator, yp_nod
8075
8181
  // the previous method name in, and append an =.
8076
8182
  size_t length = yp_string_length(&call->name);
8077
8183
 
8078
- char *name = calloc(length + 2, sizeof(char));
8184
+ uint8_t *name = calloc(length + 1, sizeof(uint8_t));
8079
8185
  if (name == NULL) return NULL;
8080
8186
 
8081
- snprintf(name, length + 2, "%.*s=", (int) length, yp_string_source(&call->name));
8187
+ memcpy(name, yp_string_source(&call->name), length);
8188
+ name[length] = '=';
8082
8189
 
8083
8190
  // Now switch the name to the new string.
8084
8191
  yp_string_free(&call->name);
@@ -9043,10 +9150,12 @@ parse_rescues(yp_parser_t *parser, yp_begin_node_t *parent_node) {
9043
9150
  }
9044
9151
 
9045
9152
  if (!match_any_type_p(parser, 3, YP_TOKEN_KEYWORD_ELSE, YP_TOKEN_KEYWORD_ENSURE, YP_TOKEN_KEYWORD_END)) {
9153
+ yp_accepts_block_stack_push(parser, true);
9046
9154
  yp_statements_node_t *statements = parse_statements(parser, YP_CONTEXT_RESCUE);
9047
9155
  if (statements) {
9048
9156
  yp_rescue_node_statements_set(rescue, statements);
9049
9157
  }
9158
+ yp_accepts_block_stack_pop(parser);
9050
9159
  accept_any(parser, 2, YP_TOKEN_NEWLINE, YP_TOKEN_SEMICOLON);
9051
9160
  }
9052
9161
 
@@ -9063,7 +9172,7 @@ parse_rescues(yp_parser_t *parser, yp_begin_node_t *parent_node) {
9063
9172
  // since we won't know the end until we've found all consequent
9064
9173
  // clauses. This sets the end location on all rescues once we know it
9065
9174
  if (current) {
9066
- const char *end_to_set = current->base.location.end;
9175
+ const uint8_t *end_to_set = current->base.location.end;
9067
9176
  current = parent_node->rescue_clause;
9068
9177
  while (current) {
9069
9178
  current->base.location.end = end_to_set;
@@ -9077,7 +9186,9 @@ parse_rescues(yp_parser_t *parser, yp_begin_node_t *parent_node) {
9077
9186
 
9078
9187
  yp_statements_node_t *else_statements = NULL;
9079
9188
  if (!match_any_type_p(parser, 2, YP_TOKEN_KEYWORD_END, YP_TOKEN_KEYWORD_ENSURE)) {
9189
+ yp_accepts_block_stack_push(parser, true);
9080
9190
  else_statements = parse_statements(parser, YP_CONTEXT_RESCUE_ELSE);
9191
+ yp_accepts_block_stack_pop(parser);
9081
9192
  accept_any(parser, 2, YP_TOKEN_NEWLINE, YP_TOKEN_SEMICOLON);
9082
9193
  }
9083
9194
 
@@ -9091,7 +9202,9 @@ parse_rescues(yp_parser_t *parser, yp_begin_node_t *parent_node) {
9091
9202
 
9092
9203
  yp_statements_node_t *ensure_statements = NULL;
9093
9204
  if (!match_type_p(parser, YP_TOKEN_KEYWORD_END)) {
9205
+ yp_accepts_block_stack_push(parser, true);
9094
9206
  ensure_statements = parse_statements(parser, YP_CONTEXT_ENSURE);
9207
+ yp_accepts_block_stack_pop(parser);
9095
9208
  accept_any(parser, 2, YP_TOKEN_NEWLINE, YP_TOKEN_SEMICOLON);
9096
9209
  }
9097
9210
 
@@ -9116,7 +9229,7 @@ parse_rescues_as_begin(yp_parser_t *parser, yp_statements_node_t *statements) {
9116
9229
  // All nodes within a begin node are optional, so we look
9117
9230
  // for the earliest possible node that we can use to set
9118
9231
  // the BeginNode's start location
9119
- const char * start = begin_node->base.location.start;
9232
+ const uint8_t *start = begin_node->base.location.start;
9120
9233
  if (begin_node->statements) {
9121
9234
  start = begin_node->statements->base.location.start;
9122
9235
  } else if (begin_node->rescue_clause) {
@@ -9201,7 +9314,9 @@ parse_block(yp_parser_t *parser) {
9201
9314
  } else {
9202
9315
  if (!match_type_p(parser, YP_TOKEN_KEYWORD_END)) {
9203
9316
  if (!match_any_type_p(parser, 3, YP_TOKEN_KEYWORD_RESCUE, YP_TOKEN_KEYWORD_ELSE, YP_TOKEN_KEYWORD_ENSURE)) {
9317
+ yp_accepts_block_stack_push(parser, true);
9204
9318
  statements = (yp_node_t *) parse_statements(parser, YP_CONTEXT_BLOCK_KEYWORDS);
9319
+ yp_accepts_block_stack_pop(parser);
9205
9320
  }
9206
9321
 
9207
9322
  if (match_any_type_p(parser, 2, YP_TOKEN_KEYWORD_RESCUE, YP_TOKEN_KEYWORD_ENSURE)) {
@@ -9782,14 +9897,14 @@ parse_heredoc_common_whitespace(yp_parser_t *parser, yp_node_list_t *nodes) {
9782
9897
  yp_node_t *node = nodes->nodes[index];
9783
9898
 
9784
9899
  if (!YP_NODE_TYPE_P(node, YP_NODE_STRING_NODE)) continue;
9785
- yp_location_t *content_loc = &((yp_string_node_t *) node)->content_loc;
9900
+ const yp_location_t *content_loc = &((yp_string_node_t *) node)->content_loc;
9786
9901
 
9787
9902
  // If the previous node wasn't a string node, we don't want to trim
9788
9903
  // whitespace. This could happen after an interpolated expression or
9789
9904
  // variable.
9790
9905
  if (index == 0 || YP_NODE_TYPE_P(nodes->nodes[index - 1], YP_NODE_STRING_NODE)) {
9791
9906
  int cur_whitespace;
9792
- const char *cur_char = content_loc->start;
9907
+ const uint8_t *cur_char = content_loc->start;
9793
9908
 
9794
9909
  while (cur_char && cur_char < content_loc->end) {
9795
9910
  // Any empty newlines aren't included in the minimum whitespace
@@ -9880,15 +9995,15 @@ parse_heredoc_dedent(yp_parser_t *parser, yp_node_t *node, yp_heredoc_quote_t qu
9880
9995
  // destination to move bytes into. We'll also use it for bounds checking
9881
9996
  // since we don't require that these strings be null terminated.
9882
9997
  size_t dest_length = yp_string_length(string);
9883
- char *source_start = string->source;
9998
+ uint8_t *source_start = (uint8_t *) string->source;
9884
9999
 
9885
- const char *source_cursor = source_start;
9886
- const char *source_end = source_cursor + dest_length;
10000
+ const uint8_t *source_cursor = source_start;
10001
+ const uint8_t *source_end = source_cursor + dest_length;
9887
10002
 
9888
10003
  // We're going to move bytes backward in the string when we get leading
9889
10004
  // whitespace, so we'll maintain a pointer to the current position in the
9890
10005
  // string that we're writing to.
9891
- char *dest_cursor = source_start;
10006
+ uint8_t *dest_cursor = source_start;
9892
10007
 
9893
10008
  while (source_cursor < source_end) {
9894
10009
  // If we need to dedent the next element within the heredoc or the next
@@ -9915,7 +10030,7 @@ parse_heredoc_dedent(yp_parser_t *parser, yp_node_t *node, yp_heredoc_quote_t qu
9915
10030
 
9916
10031
  // At this point we have dedented all that we need to, so we need to find
9917
10032
  // the next newline.
9918
- const char *breakpoint = next_newline(source_cursor, source_end - source_cursor);
10033
+ const uint8_t *breakpoint = next_newline(source_cursor, source_end - source_cursor);
9919
10034
 
9920
10035
  if (breakpoint == NULL) {
9921
10036
  // If there isn't another newline, then we can just move the rest of the
@@ -10127,7 +10242,7 @@ parse_pattern_hash(yp_parser_t *parser, yp_node_t *first_assoc) {
10127
10242
  yp_node_t *key = ((yp_assoc_node_t *) first_assoc)->key;
10128
10243
 
10129
10244
  if (YP_NODE_TYPE_P(key, YP_NODE_SYMBOL_NODE)) {
10130
- yp_location_t *value_loc = &((yp_symbol_node_t *) key)->value_loc;
10245
+ const yp_location_t *value_loc = &((yp_symbol_node_t *) key)->value_loc;
10131
10246
  yp_parser_local_add_location(parser, value_loc->start, value_loc->end);
10132
10247
  }
10133
10248
  }
@@ -10155,7 +10270,7 @@ parse_pattern_hash(yp_parser_t *parser, yp_node_t *first_assoc) {
10155
10270
  if (!match_any_type_p(parser, 7, YP_TOKEN_COMMA, YP_TOKEN_KEYWORD_THEN, YP_TOKEN_BRACE_RIGHT, YP_TOKEN_BRACKET_RIGHT, YP_TOKEN_PARENTHESIS_RIGHT, YP_TOKEN_NEWLINE, YP_TOKEN_SEMICOLON)) {
10156
10271
  value = parse_pattern(parser, false, "Expected a pattern expression after the key.");
10157
10272
  } else {
10158
- yp_location_t *value_loc = &((yp_symbol_node_t *) key)->value_loc;
10273
+ const yp_location_t *value_loc = &((yp_symbol_node_t *) key)->value_loc;
10159
10274
  yp_parser_local_add_location(parser, value_loc->start, value_loc->end);
10160
10275
  }
10161
10276
 
@@ -10817,7 +10932,7 @@ parse_expression_prefix(yp_parser_t *parser, yp_binding_power_t binding_power) {
10817
10932
 
10818
10933
  yp_token_t closing = not_provided(parser);
10819
10934
 
10820
- return (yp_node_t *) yp_string_node_create_and_unescape(parser, &opening, &content, &closing, YP_UNESCAPE_ALL);
10935
+ return (yp_node_t *) yp_char_literal_node_create_and_unescape(parser, &opening, &content, &closing, YP_UNESCAPE_ALL);
10821
10936
  }
10822
10937
  case YP_TOKEN_CLASS_VARIABLE: {
10823
10938
  parser_lex(parser);
@@ -11362,7 +11477,9 @@ parse_expression_prefix(yp_parser_t *parser, yp_binding_power_t binding_power) {
11362
11477
 
11363
11478
  yp_node_t *statements = NULL;
11364
11479
  if (!match_any_type_p(parser, 3, YP_TOKEN_KEYWORD_RESCUE, YP_TOKEN_KEYWORD_ENSURE, YP_TOKEN_KEYWORD_END)) {
11480
+ yp_accepts_block_stack_push(parser, true);
11365
11481
  statements = (yp_node_t *) parse_statements(parser, YP_CONTEXT_SCLASS);
11482
+ yp_accepts_block_stack_pop(parser);
11366
11483
  }
11367
11484
 
11368
11485
  if (match_any_type_p(parser, 2, YP_TOKEN_KEYWORD_RESCUE, YP_TOKEN_KEYWORD_ENSURE)) {
@@ -11643,7 +11760,9 @@ parse_expression_prefix(yp_parser_t *parser, yp_binding_power_t binding_power) {
11643
11760
  yp_do_loop_stack_push(parser, false);
11644
11761
 
11645
11762
  if (!match_any_type_p(parser, 3, YP_TOKEN_KEYWORD_RESCUE, YP_TOKEN_KEYWORD_ENSURE, YP_TOKEN_KEYWORD_END)) {
11763
+ yp_accepts_block_stack_push(parser, true);
11646
11764
  statements = (yp_node_t *) parse_statements(parser, YP_CONTEXT_DEF);
11765
+ yp_accepts_block_stack_pop(parser);
11647
11766
  }
11648
11767
 
11649
11768
  if (match_any_type_p(parser, 2, YP_TOKEN_KEYWORD_RESCUE, YP_TOKEN_KEYWORD_ENSURE)) {
@@ -11933,14 +12052,9 @@ parse_expression_prefix(yp_parser_t *parser, yp_binding_power_t binding_power) {
11933
12052
  yp_array_node_t *array = yp_array_node_create(parser, &parser->previous);
11934
12053
 
11935
12054
  while (!match_any_type_p(parser, 2, YP_TOKEN_STRING_END, YP_TOKEN_EOF)) {
11936
- if (yp_array_node_size(array) == 0) {
11937
- accept(parser, YP_TOKEN_WORDS_SEP);
11938
- } else {
11939
- expect(parser, YP_TOKEN_WORDS_SEP, "Expected a separator for the symbols in a `%i` list.");
11940
- if (match_type_p(parser, YP_TOKEN_STRING_END)) break;
11941
- }
11942
-
12055
+ accept(parser, YP_TOKEN_WORDS_SEP);
11943
12056
  if (match_type_p(parser, YP_TOKEN_STRING_END)) break;
12057
+
11944
12058
  expect(parser, YP_TOKEN_STRING_CONTENT, "Expected a symbol in a `%i` list.");
11945
12059
 
11946
12060
  yp_token_t opening = not_provided(parser);
@@ -11995,6 +12109,19 @@ parse_expression_prefix(yp_parser_t *parser, yp_binding_power_t binding_power) {
11995
12109
  // to the list of child nodes.
11996
12110
  yp_node_t *part = parse_string_part(parser);
11997
12111
  yp_interpolated_symbol_node_append((yp_interpolated_symbol_node_t *) current, part);
12112
+ } else if (YP_NODE_TYPE_P(current, YP_NODE_SYMBOL_NODE)) {
12113
+ // If we hit string content and the current node is a string node,
12114
+ // then we need to convert the current node into an interpolated
12115
+ // string and add the string content to the list of child nodes.
12116
+ yp_token_t opening = not_provided(parser);
12117
+ yp_token_t closing = not_provided(parser);
12118
+ yp_interpolated_symbol_node_t *interpolated =
12119
+ yp_interpolated_symbol_node_create(parser, &opening, NULL, &closing);
12120
+ yp_interpolated_symbol_node_append(interpolated, current);
12121
+
12122
+ yp_node_t *part = parse_string_part(parser);
12123
+ yp_interpolated_symbol_node_append(interpolated, part);
12124
+ current = (yp_node_t *) interpolated;
11998
12125
  } else {
11999
12126
  assert(false && "unreachable");
12000
12127
  }
@@ -12097,12 +12224,9 @@ parse_expression_prefix(yp_parser_t *parser, yp_binding_power_t binding_power) {
12097
12224
  accept(parser, YP_TOKEN_WORDS_SEP);
12098
12225
 
12099
12226
  while (!match_any_type_p(parser, 2, YP_TOKEN_STRING_END, YP_TOKEN_EOF)) {
12100
- if (yp_array_node_size(array) == 0) {
12101
- accept(parser, YP_TOKEN_WORDS_SEP);
12102
- } else {
12103
- expect(parser, YP_TOKEN_WORDS_SEP, "Expected a separator for the strings in a `%w` list.");
12104
- if (match_type_p(parser, YP_TOKEN_STRING_END)) break;
12105
- }
12227
+ accept(parser, YP_TOKEN_WORDS_SEP);
12228
+ if (match_type_p(parser, YP_TOKEN_STRING_END)) break;
12229
+
12106
12230
  expect(parser, YP_TOKEN_STRING_CONTENT, "Expected a string in a `%w` list.");
12107
12231
 
12108
12232
  yp_token_t opening = not_provided(parser);
@@ -12152,6 +12276,19 @@ parse_expression_prefix(yp_parser_t *parser, yp_binding_power_t binding_power) {
12152
12276
  // to the list of child nodes.
12153
12277
  yp_node_t *part = parse_string_part(parser);
12154
12278
  yp_interpolated_string_node_append((yp_interpolated_string_node_t *) current, part);
12279
+ } else if (YP_NODE_TYPE_P(current, YP_NODE_STRING_NODE)) {
12280
+ // If we hit string content and the current node is a string node,
12281
+ // then we need to convert the current node into an interpolated
12282
+ // string and add the string content to the list of child nodes.
12283
+ yp_token_t opening = not_provided(parser);
12284
+ yp_token_t closing = not_provided(parser);
12285
+ yp_interpolated_string_node_t *interpolated =
12286
+ yp_interpolated_string_node_create(parser, &opening, NULL, &closing);
12287
+ yp_interpolated_string_node_append(interpolated, current);
12288
+
12289
+ yp_node_t *part = parse_string_part(parser);
12290
+ yp_interpolated_string_node_append(interpolated, part);
12291
+ current = (yp_node_t *) interpolated;
12155
12292
  } else {
12156
12293
  assert(false && "unreachable");
12157
12294
  }
@@ -12482,7 +12619,9 @@ parse_expression_prefix(yp_parser_t *parser, yp_binding_power_t binding_power) {
12482
12619
  opening = parser->previous;
12483
12620
 
12484
12621
  if (!match_any_type_p(parser, 3, YP_TOKEN_KEYWORD_END, YP_TOKEN_KEYWORD_RESCUE, YP_TOKEN_KEYWORD_ENSURE)) {
12622
+ yp_accepts_block_stack_push(parser, true);
12485
12623
  body = (yp_node_t *) parse_statements(parser, YP_CONTEXT_LAMBDA_DO_END);
12624
+ yp_accepts_block_stack_pop(parser);
12486
12625
  }
12487
12626
 
12488
12627
  if (match_any_type_p(parser, 2, YP_TOKEN_KEYWORD_RESCUE, YP_TOKEN_KEYWORD_ENSURE)) {
@@ -12759,7 +12898,7 @@ parse_expression_infix(yp_parser_t *parser, yp_node_t *node, yp_binding_power_t
12759
12898
  parser_lex(parser);
12760
12899
 
12761
12900
  yp_node_t *value = parse_expression(parser, binding_power, "Expected a value after &&=");
12762
- yp_node_t *result = (yp_node_t *) yp_class_variable_and_write_node_create(parser, node, &token, value);
12901
+ yp_node_t *result = (yp_node_t *) yp_class_variable_and_write_node_create(parser, (yp_class_variable_read_node_t *) node, &token, value);
12763
12902
 
12764
12903
  yp_node_destroy(parser, node);
12765
12904
  return result;
@@ -12783,7 +12922,7 @@ parse_expression_infix(yp_parser_t *parser, yp_node_t *node, yp_binding_power_t
12783
12922
  parser_lex(parser);
12784
12923
 
12785
12924
  yp_node_t *value = parse_expression(parser, binding_power, "Expected a value after &&=");
12786
- yp_node_t *result = (yp_node_t *) yp_instance_variable_and_write_node_create(parser, node, &token, value);
12925
+ yp_node_t *result = (yp_node_t *) yp_instance_variable_and_write_node_create(parser, (yp_instance_variable_read_node_t *) node, &token, value);
12787
12926
 
12788
12927
  yp_node_destroy(parser, node);
12789
12928
  return result;
@@ -12793,7 +12932,7 @@ parse_expression_infix(yp_parser_t *parser, yp_node_t *node, yp_binding_power_t
12793
12932
  parser_lex(parser);
12794
12933
 
12795
12934
  yp_node_t *value = parse_expression(parser, binding_power, "Expected a value after &&=");
12796
- yp_node_t *result = (yp_node_t *) yp_local_variable_and_write_node_create(parser, node, &token, value, cast->constant_id, cast->depth);
12935
+ yp_node_t *result = (yp_node_t *) yp_local_variable_and_write_node_create(parser, node, &token, value, cast->name, cast->depth);
12797
12936
 
12798
12937
  yp_node_destroy(parser, node);
12799
12938
  return result;
@@ -12860,7 +12999,7 @@ parse_expression_infix(yp_parser_t *parser, yp_node_t *node, yp_binding_power_t
12860
12999
  parser_lex(parser);
12861
13000
 
12862
13001
  yp_node_t *value = parse_expression(parser, binding_power, "Expected a value after ||=");
12863
- yp_node_t *result = (yp_node_t *) yp_class_variable_or_write_node_create(parser, node, &token, value);
13002
+ yp_node_t *result = (yp_node_t *) yp_class_variable_or_write_node_create(parser, (yp_class_variable_read_node_t *) node, &token, value);
12864
13003
 
12865
13004
  yp_node_destroy(parser, node);
12866
13005
  return result;
@@ -12884,7 +13023,7 @@ parse_expression_infix(yp_parser_t *parser, yp_node_t *node, yp_binding_power_t
12884
13023
  parser_lex(parser);
12885
13024
 
12886
13025
  yp_node_t *value = parse_expression(parser, binding_power, "Expected a value after ||=");
12887
- yp_node_t *result = (yp_node_t *) yp_instance_variable_or_write_node_create(parser, node, &token, value);
13026
+ yp_node_t *result = (yp_node_t *) yp_instance_variable_or_write_node_create(parser, (yp_instance_variable_read_node_t *) node, &token, value);
12888
13027
 
12889
13028
  yp_node_destroy(parser, node);
12890
13029
  return result;
@@ -12894,7 +13033,7 @@ parse_expression_infix(yp_parser_t *parser, yp_node_t *node, yp_binding_power_t
12894
13033
  parser_lex(parser);
12895
13034
 
12896
13035
  yp_node_t *value = parse_expression(parser, binding_power, "Expected a value after ||=");
12897
- yp_node_t *result = (yp_node_t *) yp_local_variable_or_write_node_create(parser, node, &token, value, cast->constant_id, cast->depth);
13036
+ yp_node_t *result = (yp_node_t *) yp_local_variable_or_write_node_create(parser, node, &token, value, cast->name, cast->depth);
12898
13037
 
12899
13038
  yp_node_destroy(parser, node);
12900
13039
  return result;
@@ -12971,7 +13110,7 @@ parse_expression_infix(yp_parser_t *parser, yp_node_t *node, yp_binding_power_t
12971
13110
  parser_lex(parser);
12972
13111
 
12973
13112
  yp_node_t *value = parse_expression(parser, binding_power, "Expected a value after the operator.");
12974
- yp_node_t *result = (yp_node_t *) yp_class_variable_operator_write_node_create(parser, node, &token, value);
13113
+ yp_node_t *result = (yp_node_t *) yp_class_variable_operator_write_node_create(parser, (yp_class_variable_read_node_t *) node, &token, value);
12975
13114
 
12976
13115
  yp_node_destroy(parser, node);
12977
13116
  return result;
@@ -12995,7 +13134,7 @@ parse_expression_infix(yp_parser_t *parser, yp_node_t *node, yp_binding_power_t
12995
13134
  parser_lex(parser);
12996
13135
 
12997
13136
  yp_node_t *value = parse_expression(parser, binding_power, "Expected a value after the operator.");
12998
- yp_node_t *result = (yp_node_t *) yp_instance_variable_operator_write_node_create(parser, node, &token, value);
13137
+ yp_node_t *result = (yp_node_t *) yp_instance_variable_operator_write_node_create(parser, (yp_instance_variable_read_node_t *) node, &token, value);
12999
13138
 
13000
13139
  yp_node_destroy(parser, node);
13001
13140
  return result;
@@ -13005,7 +13144,7 @@ parse_expression_infix(yp_parser_t *parser, yp_node_t *node, yp_binding_power_t
13005
13144
  parser_lex(parser);
13006
13145
 
13007
13146
  yp_node_t *value = parse_expression(parser, binding_power, "Expected a value after the operator.");
13008
- yp_node_t *result = (yp_node_t *) yp_local_variable_operator_write_node_create(parser, node, &token, value, cast->constant_id, cast->depth);
13147
+ yp_node_t *result = (yp_node_t *) yp_local_variable_operator_write_node_create(parser, node, &token, value, cast->name, cast->depth);
13009
13148
 
13010
13149
  yp_node_destroy(parser, node);
13011
13150
  return result;
@@ -13083,7 +13222,7 @@ parse_expression_infix(yp_parser_t *parser, yp_node_t *node, yp_binding_power_t
13083
13222
  yp_string_list_t named_captures;
13084
13223
  yp_string_list_init(&named_captures);
13085
13224
 
13086
- yp_location_t *content_loc = &((yp_regular_expression_node_t *) node)->content_loc;
13225
+ const yp_location_t *content_loc = &((yp_regular_expression_node_t *) node)->content_loc;
13087
13226
 
13088
13227
  if (yp_regexp_named_capture_group_names(content_loc->start, (size_t) (content_loc->end - content_loc->start), &named_captures, parser->encoding_changed, &parser->encoding)) {
13089
13228
  for (size_t index = 0; index < named_captures.length; index++) {
@@ -13507,7 +13646,7 @@ yp_parser_metadata(yp_parser_t *parser, const char *metadata) {
13507
13646
  uint32_t local_size = yp_metadata_read_u32(metadata);
13508
13647
  metadata += 4;
13509
13648
 
13510
- yp_parser_local_add_location(parser, metadata, metadata + local_size);
13649
+ yp_parser_local_add_location(parser, (const uint8_t *) metadata, (const uint8_t *) (metadata + local_size));
13511
13650
  metadata += local_size;
13512
13651
  }
13513
13652
  }
@@ -13519,7 +13658,7 @@ yp_parser_metadata(yp_parser_t *parser, const char *metadata) {
13519
13658
 
13520
13659
  // Initialize a parser with the given start and end pointers.
13521
13660
  YP_EXPORTED_FUNCTION void
13522
- yp_parser_init(yp_parser_t *parser, const char *source, size_t size, const char *filepath) {
13661
+ yp_parser_init(yp_parser_t *parser, const uint8_t *source, size_t size, const char *filepath) {
13523
13662
  assert(source != NULL);
13524
13663
 
13525
13664
  // Set filepath to the file that was passed
@@ -13591,7 +13730,7 @@ yp_parser_init(yp_parser_t *parser, const char *source, size_t size, const char
13591
13730
  yp_newline_list_init(&parser->newline_list, source, newline_size < 4 ? 4 : newline_size);
13592
13731
 
13593
13732
  // Skip past the UTF-8 BOM if it exists.
13594
- if (size >= 3 && (unsigned char) source[0] == 0xef && (unsigned char) source[1] == 0xbb && (unsigned char) source[2] == 0xbf) {
13733
+ if (size >= 3 && source[0] == 0xef && source[1] == 0xbb && source[2] == 0xbf) {
13595
13734
  parser->current.end += 3;
13596
13735
  parser->encoding_comment_start += 3;
13597
13736
  }
@@ -13599,7 +13738,7 @@ yp_parser_init(yp_parser_t *parser, const char *source, size_t size, const char
13599
13738
  // If the first two bytes of the source are a shebang, then we'll indicate
13600
13739
  // that the encoding comment is at the end of the shebang.
13601
13740
  if (peek(parser) == '#' && peek_offset(parser, 1) == '!') {
13602
- const char *encoding_comment_start = next_newline(source, (ptrdiff_t) size);
13741
+ const uint8_t *encoding_comment_start = next_newline(source, (ptrdiff_t) size);
13603
13742
  if (encoding_comment_start) {
13604
13743
  parser->encoding_comment_start = encoding_comment_start + 1;
13605
13744
  }
@@ -13671,7 +13810,7 @@ yp_serialize(yp_parser_t *parser, yp_node_t *node, yp_buffer_t *buffer) {
13671
13810
  // Parse and serialize the AST represented by the given source to the given
13672
13811
  // buffer.
13673
13812
  YP_EXPORTED_FUNCTION void
13674
- yp_parse_serialize(const char *source, size_t size, yp_buffer_t *buffer, const char *metadata) {
13813
+ yp_parse_serialize(const uint8_t *source, size_t size, yp_buffer_t *buffer, const char *metadata) {
13675
13814
  yp_parser_t parser;
13676
13815
  yp_parser_init(&parser, source, size, NULL);
13677
13816
  if (metadata) yp_parser_metadata(&parser, metadata);