yarp 0.7.0 → 0.9.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/src/yarp.c CHANGED
@@ -1,5 +1,4 @@
1
1
  #include "yarp.h"
2
- #include "yarp/version.h"
3
2
 
4
3
  // The YARP version and the serialization format.
5
4
  const char *
@@ -362,7 +361,7 @@ lex_state_ignored_p(yp_parser_t *parser) {
362
361
 
363
362
  if (ignored) {
364
363
  return YP_IGNORED_NEWLINE_ALL;
365
- } else if (parser->lex_state == (YP_LEX_STATE_ARG | YP_LEX_STATE_LABELED)) {
364
+ } else if ((parser->lex_state & ~((unsigned int) YP_LEX_STATE_LABEL)) == (YP_LEX_STATE_ARG | YP_LEX_STATE_LABELED)) {
366
365
  return YP_IGNORED_NEWLINE_PATTERN;
367
366
  } else {
368
367
  return YP_IGNORED_NEWLINE_NONE;
@@ -450,8 +449,8 @@ yp_flip_flop(yp_node_t *node) {
450
449
  case YP_NODE_PARENTHESES_NODE: {
451
450
  yp_parentheses_node_t *cast = (yp_parentheses_node_t *) node;
452
451
 
453
- if ((cast->statements != NULL) && YP_NODE_TYPE_P(cast->statements, YP_NODE_STATEMENTS_NODE)) {
454
- yp_statements_node_t *statements = (yp_statements_node_t *) cast->statements;
452
+ if ((cast->body != NULL) && YP_NODE_TYPE_P(cast->body, YP_NODE_STATEMENTS_NODE)) {
453
+ yp_statements_node_t *statements = (yp_statements_node_t *) cast->body;
455
454
  if (statements->body.size == 1) yp_flip_flop(statements->body.nodes[0]);
456
455
  }
457
456
 
@@ -459,8 +458,12 @@ yp_flip_flop(yp_node_t *node) {
459
458
  }
460
459
  case YP_NODE_RANGE_NODE: {
461
460
  yp_range_node_t *cast = (yp_range_node_t *) node;
462
- yp_flip_flop(cast->left);
463
- yp_flip_flop(cast->right);
461
+ if (cast->left) {
462
+ yp_flip_flop(cast->left);
463
+ }
464
+ if (cast->right) {
465
+ yp_flip_flop(cast->right);
466
+ }
464
467
 
465
468
  // Here we change the range node into a flip flop node. We can do
466
469
  // this since the nodes are exactly the same except for the type.
@@ -532,6 +535,73 @@ yp_arguments_validate(yp_parser_t *parser, yp_arguments_t *arguments) {
532
535
  }
533
536
  }
534
537
 
538
+ /******************************************************************************/
539
+ /* Scope node functions */
540
+ /******************************************************************************/
541
+
542
+ // Generate a scope node from the given node.
543
+ void
544
+ yp_scope_node_init(yp_node_t *node, yp_scope_node_t *scope) {
545
+ scope->base.type = YP_NODE_SCOPE_NODE;
546
+ scope->base.location.start = node->location.start;
547
+ scope->base.location.end = node->location.end;
548
+
549
+ scope->parameters = NULL;
550
+ scope->body = NULL;
551
+ yp_constant_id_list_init(&scope->locals);
552
+
553
+ switch (YP_NODE_TYPE(node)) {
554
+ case YP_NODE_BLOCK_NODE: {
555
+ yp_block_node_t *cast = (yp_block_node_t *) node;
556
+ if (cast->parameters) scope->parameters = cast->parameters->parameters;
557
+ scope->body = cast->body;
558
+ scope->locals = cast->locals;
559
+ break;
560
+ }
561
+ case YP_NODE_CLASS_NODE: {
562
+ yp_class_node_t *cast = (yp_class_node_t *) node;
563
+ scope->body = cast->body;
564
+ scope->locals = cast->locals;
565
+ break;
566
+ }
567
+ case YP_NODE_DEF_NODE: {
568
+ yp_def_node_t *cast = (yp_def_node_t *) node;
569
+ scope->parameters = cast->parameters;
570
+ scope->body = cast->body;
571
+ scope->locals = cast->locals;
572
+ break;
573
+ }
574
+ case YP_NODE_LAMBDA_NODE: {
575
+ yp_lambda_node_t *cast = (yp_lambda_node_t *) node;
576
+ if (cast->parameters) scope->parameters = cast->parameters->parameters;
577
+ scope->body = cast->body;
578
+ scope->locals = cast->locals;
579
+ break;
580
+ }
581
+ case YP_NODE_MODULE_NODE: {
582
+ yp_module_node_t *cast = (yp_module_node_t *) node;
583
+ scope->body = cast->body;
584
+ scope->locals = cast->locals;
585
+ break;
586
+ }
587
+ case YP_NODE_PROGRAM_NODE: {
588
+ yp_program_node_t *cast = (yp_program_node_t *) node;
589
+ scope->body = (yp_node_t *) cast->statements;
590
+ scope->locals = cast->locals;
591
+ break;
592
+ }
593
+ case YP_NODE_SINGLETON_CLASS_NODE: {
594
+ yp_singleton_class_node_t *cast = (yp_singleton_class_node_t *) node;
595
+ scope->body = cast->body;
596
+ scope->locals = cast->locals;
597
+ break;
598
+ }
599
+ default:
600
+ assert(false && "unreachable");
601
+ break;
602
+ }
603
+ }
604
+
535
605
  /******************************************************************************/
536
606
  /* Node creation functions */
537
607
  /******************************************************************************/
@@ -993,7 +1063,7 @@ yp_block_argument_node_create(yp_parser_t *parser, const yp_token_t *operator, y
993
1063
 
994
1064
  // Allocate and initialize a new BlockNode node.
995
1065
  static yp_block_node_t *
996
- yp_block_node_create(yp_parser_t *parser, yp_constant_id_list_t *locals, const yp_token_t *opening, yp_block_parameters_node_t *parameters, yp_node_t *statements, const yp_token_t *closing) {
1066
+ yp_block_node_create(yp_parser_t *parser, yp_constant_id_list_t *locals, const yp_token_t *opening, yp_block_parameters_node_t *parameters, yp_node_t *body, const yp_token_t *closing) {
997
1067
  yp_block_node_t *node = YP_ALLOC_NODE(parser, yp_block_node_t);
998
1068
 
999
1069
  *node = (yp_block_node_t) {
@@ -1003,7 +1073,7 @@ yp_block_node_create(yp_parser_t *parser, yp_constant_id_list_t *locals, const y
1003
1073
  },
1004
1074
  .locals = *locals,
1005
1075
  .parameters = parameters,
1006
- .statements = statements,
1076
+ .body = body,
1007
1077
  .opening_loc = YP_LOCATION_TOKEN_VALUE(opening),
1008
1078
  .closing_loc = YP_LOCATION_TOKEN_VALUE(closing)
1009
1079
  };
@@ -1126,7 +1196,7 @@ yp_call_node_create(yp_parser_t *parser) {
1126
1196
  },
1127
1197
  .receiver = NULL,
1128
1198
  .operator_loc = YP_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE,
1129
- .message_loc = YP_LOCATION_NULL_VALUE(parser),
1199
+ .message_loc = YP_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE,
1130
1200
  .opening_loc = YP_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE,
1131
1201
  .arguments = NULL,
1132
1202
  .closing_loc = YP_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE,
@@ -1461,7 +1531,7 @@ yp_case_node_end_keyword_loc_set(yp_case_node_t *node, const yp_token_t *end_key
1461
1531
 
1462
1532
  // Allocate a new ClassNode node.
1463
1533
  static yp_class_node_t *
1464
- yp_class_node_create(yp_parser_t *parser, yp_constant_id_list_t *locals, const yp_token_t *class_keyword, yp_node_t *constant_path, const yp_token_t *inheritance_operator, yp_node_t *superclass, yp_node_t *statements, const yp_token_t *end_keyword) {
1534
+ yp_class_node_create(yp_parser_t *parser, yp_constant_id_list_t *locals, const yp_token_t *class_keyword, yp_node_t *constant_path, const yp_token_t *name, const yp_token_t *inheritance_operator, yp_node_t *superclass, yp_node_t *body, const yp_token_t *end_keyword) {
1465
1535
  yp_class_node_t *node = YP_ALLOC_NODE(parser, yp_class_node_t);
1466
1536
 
1467
1537
  *node = (yp_class_node_t) {
@@ -1474,23 +1544,25 @@ yp_class_node_create(yp_parser_t *parser, yp_constant_id_list_t *locals, const y
1474
1544
  .constant_path = constant_path,
1475
1545
  .inheritance_operator_loc = YP_OPTIONAL_LOCATION_TOKEN_VALUE(inheritance_operator),
1476
1546
  .superclass = superclass,
1477
- .statements = statements,
1478
- .end_keyword_loc = YP_LOCATION_TOKEN_VALUE(end_keyword)
1547
+ .body = body,
1548
+ .end_keyword_loc = YP_LOCATION_TOKEN_VALUE(end_keyword),
1549
+ .name = YP_EMPTY_STRING
1479
1550
  };
1480
1551
 
1552
+ yp_string_shared_init(&node->name, name->start, name->end);
1481
1553
  return node;
1482
1554
  }
1483
1555
 
1484
- // Allocate and initialize a new ClassVariableOperatorAndWriteNode node.
1485
- static yp_class_variable_operator_and_write_node_t *
1486
- yp_class_variable_operator_and_write_node_create(yp_parser_t *parser, yp_node_t *target, const yp_token_t *operator, yp_node_t *value) {
1556
+ // Allocate and initialize a new ClassVariableAndWriteNode node.
1557
+ static yp_class_variable_and_write_node_t *
1558
+ yp_class_variable_and_write_node_create(yp_parser_t *parser, yp_node_t *target, const yp_token_t *operator, yp_node_t *value) {
1487
1559
  assert(YP_NODE_TYPE_P(target, YP_NODE_CLASS_VARIABLE_READ_NODE));
1488
1560
  assert(operator->type == YP_TOKEN_AMPERSAND_AMPERSAND_EQUAL);
1489
- yp_class_variable_operator_and_write_node_t *node = YP_ALLOC_NODE(parser, yp_class_variable_operator_and_write_node_t);
1561
+ yp_class_variable_and_write_node_t *node = YP_ALLOC_NODE(parser, yp_class_variable_and_write_node_t);
1490
1562
 
1491
- *node = (yp_class_variable_operator_and_write_node_t) {
1563
+ *node = (yp_class_variable_and_write_node_t) {
1492
1564
  {
1493
- .type = YP_NODE_CLASS_VARIABLE_OPERATOR_AND_WRITE_NODE,
1565
+ .type = YP_NODE_CLASS_VARIABLE_AND_WRITE_NODE,
1494
1566
  .location = {
1495
1567
  .start = target->location.start,
1496
1568
  .end = value->location.end
@@ -1526,16 +1598,16 @@ yp_class_variable_operator_write_node_create(yp_parser_t *parser, yp_node_t *tar
1526
1598
  return node;
1527
1599
  }
1528
1600
 
1529
- // Allocate and initialize a new ClassVariableOperatorOrWriteNode node.
1530
- static yp_class_variable_operator_or_write_node_t *
1531
- yp_class_variable_operator_or_write_node_create(yp_parser_t *parser, yp_node_t *target, const yp_token_t *operator, yp_node_t *value) {
1601
+ // Allocate and initialize a new ClassVariableOrWriteNode node.
1602
+ static yp_class_variable_or_write_node_t *
1603
+ yp_class_variable_or_write_node_create(yp_parser_t *parser, yp_node_t *target, const yp_token_t *operator, yp_node_t *value) {
1532
1604
  assert(YP_NODE_TYPE_P(target, YP_NODE_CLASS_VARIABLE_READ_NODE));
1533
1605
  assert(operator->type == YP_TOKEN_PIPE_PIPE_EQUAL);
1534
- yp_class_variable_operator_or_write_node_t *node = YP_ALLOC_NODE(parser, yp_class_variable_operator_or_write_node_t);
1606
+ yp_class_variable_or_write_node_t *node = YP_ALLOC_NODE(parser, yp_class_variable_or_write_node_t);
1535
1607
 
1536
- *node = (yp_class_variable_operator_or_write_node_t) {
1608
+ *node = (yp_class_variable_or_write_node_t) {
1537
1609
  {
1538
- .type = YP_NODE_CLASS_VARIABLE_OPERATOR_OR_WRITE_NODE,
1610
+ .type = YP_NODE_CLASS_VARIABLE_OR_WRITE_NODE,
1539
1611
  .location = {
1540
1612
  .start = target->location.start,
1541
1613
  .end = value->location.end
@@ -1568,10 +1640,10 @@ yp_class_variable_read_node_to_class_variable_write_node(yp_parser_t *parser, yp
1568
1640
  .type = YP_NODE_CLASS_VARIABLE_WRITE_NODE,
1569
1641
  .location = {
1570
1642
  .start = read_node->base.location.start,
1571
- .end = value != NULL ? value->location.end : read_node->base.location.end
1643
+ .end = value->location.end
1572
1644
  },
1573
1645
  },
1574
- .name_loc = YP_LOCATION_NODE_VALUE((yp_node_t *)read_node),
1646
+ .name_loc = YP_LOCATION_NODE_VALUE((yp_node_t *) read_node),
1575
1647
  .operator_loc = YP_OPTIONAL_LOCATION_TOKEN_VALUE(operator),
1576
1648
  .value = value
1577
1649
  };
@@ -1579,15 +1651,15 @@ yp_class_variable_read_node_to_class_variable_write_node(yp_parser_t *parser, yp
1579
1651
  return node;
1580
1652
  }
1581
1653
 
1582
- // Allocate and initialize a new ConstantPathOperatorAndWriteNode node.
1583
- static yp_constant_path_operator_and_write_node_t *
1584
- yp_constant_path_operator_and_write_node_create(yp_parser_t *parser, yp_constant_path_node_t *target, const yp_token_t *operator, yp_node_t *value) {
1654
+ // Allocate and initialize a new ConstantPathAndWriteNode node.
1655
+ static yp_constant_path_and_write_node_t *
1656
+ yp_constant_path_and_write_node_create(yp_parser_t *parser, yp_constant_path_node_t *target, const yp_token_t *operator, yp_node_t *value) {
1585
1657
  assert(operator->type == YP_TOKEN_AMPERSAND_AMPERSAND_EQUAL);
1586
- yp_constant_path_operator_and_write_node_t *node = YP_ALLOC_NODE(parser, yp_constant_path_operator_and_write_node_t);
1658
+ yp_constant_path_and_write_node_t *node = YP_ALLOC_NODE(parser, yp_constant_path_and_write_node_t);
1587
1659
 
1588
- *node = (yp_constant_path_operator_and_write_node_t) {
1660
+ *node = (yp_constant_path_and_write_node_t) {
1589
1661
  {
1590
- .type = YP_NODE_CONSTANT_PATH_OPERATOR_AND_WRITE_NODE,
1662
+ .type = YP_NODE_CONSTANT_PATH_AND_WRITE_NODE,
1591
1663
  .location = {
1592
1664
  .start = target->base.location.start,
1593
1665
  .end = value->location.end
@@ -1623,15 +1695,15 @@ yp_constant_path_operator_write_node_create(yp_parser_t *parser, yp_constant_pat
1623
1695
  return node;
1624
1696
  }
1625
1697
 
1626
- // Allocate and initialize a new ConstantPathOperatorOrWriteNode node.
1627
- static yp_constant_path_operator_or_write_node_t *
1628
- yp_constant_path_operator_or_write_node_create(yp_parser_t *parser, yp_constant_path_node_t *target, const yp_token_t *operator, yp_node_t *value) {
1698
+ // Allocate and initialize a new ConstantPathOrWriteNode node.
1699
+ static yp_constant_path_or_write_node_t *
1700
+ yp_constant_path_or_write_node_create(yp_parser_t *parser, yp_constant_path_node_t *target, const yp_token_t *operator, yp_node_t *value) {
1629
1701
  assert(operator->type == YP_TOKEN_PIPE_PIPE_EQUAL);
1630
- yp_constant_path_operator_or_write_node_t *node = YP_ALLOC_NODE(parser, yp_constant_path_operator_or_write_node_t);
1702
+ yp_constant_path_or_write_node_t *node = YP_ALLOC_NODE(parser, yp_constant_path_or_write_node_t);
1631
1703
 
1632
- *node = (yp_constant_path_operator_or_write_node_t) {
1704
+ *node = (yp_constant_path_or_write_node_t) {
1633
1705
  {
1634
- .type = YP_NODE_CONSTANT_PATH_OPERATOR_OR_WRITE_NODE,
1706
+ .type = YP_NODE_CONSTANT_PATH_OR_WRITE_NODE,
1635
1707
  .location = {
1636
1708
  .start = target->base.location.start,
1637
1709
  .end = value->location.end
@@ -1676,7 +1748,7 @@ yp_constant_path_write_node_create(yp_parser_t *parser, yp_constant_path_node_t
1676
1748
  .type = YP_NODE_CONSTANT_PATH_WRITE_NODE,
1677
1749
  .location = {
1678
1750
  .start = target->base.location.start,
1679
- .end = (value == NULL ? target->base.location.end : value->location.end)
1751
+ .end = value->location.end
1680
1752
  },
1681
1753
  },
1682
1754
  .target = target,
@@ -1687,16 +1759,16 @@ yp_constant_path_write_node_create(yp_parser_t *parser, yp_constant_path_node_t
1687
1759
  return node;
1688
1760
  }
1689
1761
 
1690
- // Allocate and initialize a new ConstantOperatorAndWriteNode node.
1691
- static yp_constant_operator_and_write_node_t *
1692
- yp_constant_operator_and_write_node_create(yp_parser_t *parser, yp_node_t *target, const yp_token_t *operator, yp_node_t *value) {
1762
+ // Allocate and initialize a new ConstantAndWriteNode node.
1763
+ static yp_constant_and_write_node_t *
1764
+ yp_constant_and_write_node_create(yp_parser_t *parser, yp_node_t *target, const yp_token_t *operator, yp_node_t *value) {
1693
1765
  assert(YP_NODE_TYPE_P(target, YP_NODE_CONSTANT_READ_NODE));
1694
1766
  assert(operator->type == YP_TOKEN_AMPERSAND_AMPERSAND_EQUAL);
1695
- yp_constant_operator_and_write_node_t *node = YP_ALLOC_NODE(parser, yp_constant_operator_and_write_node_t);
1767
+ yp_constant_and_write_node_t *node = YP_ALLOC_NODE(parser, yp_constant_and_write_node_t);
1696
1768
 
1697
- *node = (yp_constant_operator_and_write_node_t) {
1769
+ *node = (yp_constant_and_write_node_t) {
1698
1770
  {
1699
- .type = YP_NODE_CONSTANT_OPERATOR_AND_WRITE_NODE,
1771
+ .type = YP_NODE_CONSTANT_AND_WRITE_NODE,
1700
1772
  .location = {
1701
1773
  .start = target->location.start,
1702
1774
  .end = value->location.end
@@ -1732,16 +1804,16 @@ yp_constant_operator_write_node_create(yp_parser_t *parser, yp_node_t *target, c
1732
1804
  return node;
1733
1805
  }
1734
1806
 
1735
- // Allocate and initialize a new ConstantOperatorOrWriteNode node.
1736
- static yp_constant_operator_or_write_node_t *
1737
- yp_constant_operator_or_write_node_create(yp_parser_t *parser, yp_node_t *target, const yp_token_t *operator, yp_node_t *value) {
1807
+ // Allocate and initialize a new ConstantOrWriteNode node.
1808
+ static yp_constant_or_write_node_t *
1809
+ yp_constant_or_write_node_create(yp_parser_t *parser, yp_node_t *target, const yp_token_t *operator, yp_node_t *value) {
1738
1810
  assert(YP_NODE_TYPE_P(target, YP_NODE_CONSTANT_READ_NODE));
1739
1811
  assert(operator->type == YP_TOKEN_PIPE_PIPE_EQUAL);
1740
- yp_constant_operator_or_write_node_t *node = YP_ALLOC_NODE(parser, yp_constant_operator_or_write_node_t);
1812
+ yp_constant_or_write_node_t *node = YP_ALLOC_NODE(parser, yp_constant_or_write_node_t);
1741
1813
 
1742
- *node = (yp_constant_operator_or_write_node_t) {
1814
+ *node = (yp_constant_or_write_node_t) {
1743
1815
  {
1744
- .type = YP_NODE_CONSTANT_OPERATOR_OR_WRITE_NODE,
1816
+ .type = YP_NODE_CONSTANT_OR_WRITE_NODE,
1745
1817
  .location = {
1746
1818
  .start = target->location.start,
1747
1819
  .end = value->location.end
@@ -1775,7 +1847,7 @@ yp_constant_write_node_create(yp_parser_t *parser, yp_location_t *name_loc, cons
1775
1847
  .type = YP_NODE_CONSTANT_WRITE_NODE,
1776
1848
  .location = {
1777
1849
  .start = name_loc->start,
1778
- .end = value != NULL ? value->location.end : name_loc->end
1850
+ .end = value->location.end
1779
1851
  },
1780
1852
  },
1781
1853
  .name_loc = *name_loc,
@@ -1793,7 +1865,7 @@ yp_def_node_create(
1793
1865
  const yp_token_t *name,
1794
1866
  yp_node_t *receiver,
1795
1867
  yp_parameters_node_t *parameters,
1796
- yp_node_t *statements,
1868
+ yp_node_t *body,
1797
1869
  yp_constant_id_list_t *locals,
1798
1870
  const yp_token_t *def_keyword,
1799
1871
  const yp_token_t *operator,
@@ -1806,7 +1878,7 @@ yp_def_node_create(
1806
1878
  const char *end;
1807
1879
 
1808
1880
  if (end_keyword->type == YP_TOKEN_NOT_PROVIDED) {
1809
- end = statements->location.end;
1881
+ end = body->location.end;
1810
1882
  } else {
1811
1883
  end = end_keyword->end;
1812
1884
  }
@@ -1819,7 +1891,7 @@ yp_def_node_create(
1819
1891
  .name_loc = YP_LOCATION_TOKEN_VALUE(name),
1820
1892
  .receiver = receiver,
1821
1893
  .parameters = parameters,
1822
- .statements = statements,
1894
+ .body = body,
1823
1895
  .locals = *locals,
1824
1896
  .def_keyword_loc = YP_LOCATION_TOKEN_VALUE(def_keyword),
1825
1897
  .operator_loc = YP_OPTIONAL_LOCATION_TOKEN_VALUE(operator),
@@ -2189,16 +2261,16 @@ yp_hash_pattern_node_node_list_create(yp_parser_t *parser, yp_node_list_t *assoc
2189
2261
  return node;
2190
2262
  }
2191
2263
 
2192
- // Allocate and initialize a new GlobalVariableOperatorAndWriteNode node.
2193
- static yp_global_variable_operator_and_write_node_t *
2194
- yp_global_variable_operator_and_write_node_create(yp_parser_t *parser, yp_node_t *target, const yp_token_t *operator, yp_node_t *value) {
2264
+ // Allocate and initialize a new GlobalVariableAndWriteNode node.
2265
+ static yp_global_variable_and_write_node_t *
2266
+ yp_global_variable_and_write_node_create(yp_parser_t *parser, yp_node_t *target, const yp_token_t *operator, yp_node_t *value) {
2195
2267
  assert(YP_NODE_TYPE_P(target, YP_NODE_GLOBAL_VARIABLE_READ_NODE) || YP_NODE_TYPE_P(target, YP_NODE_BACK_REFERENCE_READ_NODE) || YP_NODE_TYPE_P(target, YP_NODE_NUMBERED_REFERENCE_READ_NODE));
2196
2268
  assert(operator->type == YP_TOKEN_AMPERSAND_AMPERSAND_EQUAL);
2197
- yp_global_variable_operator_and_write_node_t *node = YP_ALLOC_NODE(parser, yp_global_variable_operator_and_write_node_t);
2269
+ yp_global_variable_and_write_node_t *node = YP_ALLOC_NODE(parser, yp_global_variable_and_write_node_t);
2198
2270
 
2199
- *node = (yp_global_variable_operator_and_write_node_t) {
2271
+ *node = (yp_global_variable_and_write_node_t) {
2200
2272
  {
2201
- .type = YP_NODE_GLOBAL_VARIABLE_OPERATOR_AND_WRITE_NODE,
2273
+ .type = YP_NODE_GLOBAL_VARIABLE_AND_WRITE_NODE,
2202
2274
  .location = {
2203
2275
  .start = target->location.start,
2204
2276
  .end = value->location.end
@@ -2234,16 +2306,16 @@ yp_global_variable_operator_write_node_create(yp_parser_t *parser, yp_node_t *ta
2234
2306
  return node;
2235
2307
  }
2236
2308
 
2237
- // Allocate and initialize a new GlobalVariableOperatorOrWriteNode node.
2238
- static yp_global_variable_operator_or_write_node_t *
2239
- yp_global_variable_operator_or_write_node_create(yp_parser_t *parser, yp_node_t *target, const yp_token_t *operator, yp_node_t *value) {
2309
+ // Allocate and initialize a new GlobalVariableOrWriteNode node.
2310
+ static yp_global_variable_or_write_node_t *
2311
+ yp_global_variable_or_write_node_create(yp_parser_t *parser, yp_node_t *target, const yp_token_t *operator, yp_node_t *value) {
2240
2312
  assert(YP_NODE_TYPE_P(target, YP_NODE_GLOBAL_VARIABLE_READ_NODE) || YP_NODE_TYPE_P(target, YP_NODE_BACK_REFERENCE_READ_NODE) || YP_NODE_TYPE_P(target, YP_NODE_NUMBERED_REFERENCE_READ_NODE));
2241
2313
  assert(operator->type == YP_TOKEN_PIPE_PIPE_EQUAL);
2242
- yp_global_variable_operator_or_write_node_t *node = YP_ALLOC_NODE(parser, yp_global_variable_operator_or_write_node_t);
2314
+ yp_global_variable_or_write_node_t *node = YP_ALLOC_NODE(parser, yp_global_variable_or_write_node_t);
2243
2315
 
2244
- *node = (yp_global_variable_operator_or_write_node_t) {
2316
+ *node = (yp_global_variable_or_write_node_t) {
2245
2317
  {
2246
- .type = YP_NODE_GLOBAL_VARIABLE_OPERATOR_OR_WRITE_NODE,
2318
+ .type = YP_NODE_GLOBAL_VARIABLE_OR_WRITE_NODE,
2247
2319
  .location = {
2248
2320
  .start = target->location.start,
2249
2321
  .end = value->location.end
@@ -2282,7 +2354,7 @@ yp_global_variable_write_node_create(yp_parser_t *parser, const yp_location_t *n
2282
2354
  .type = YP_NODE_GLOBAL_VARIABLE_WRITE_NODE,
2283
2355
  .location = {
2284
2356
  .start = name_loc->start,
2285
- .end = (value == NULL ? name_loc->end : value->location.end)
2357
+ .end = value->location.end
2286
2358
  },
2287
2359
  },
2288
2360
  .name_loc = *name_loc,
@@ -2547,16 +2619,16 @@ yp_in_node_create(yp_parser_t *parser, yp_node_t *pattern, yp_statements_node_t
2547
2619
  return node;
2548
2620
  }
2549
2621
 
2550
- // Allocate and initialize a new InstanceVariableOperatorAndWriteNode node.
2551
- static yp_instance_variable_operator_and_write_node_t *
2552
- yp_instance_variable_operator_and_write_node_create(yp_parser_t *parser, yp_node_t *target, const yp_token_t *operator, yp_node_t *value) {
2622
+ // Allocate and initialize a new InstanceVariableAndWriteNode node.
2623
+ static yp_instance_variable_and_write_node_t *
2624
+ yp_instance_variable_and_write_node_create(yp_parser_t *parser, yp_node_t *target, const yp_token_t *operator, yp_node_t *value) {
2553
2625
  assert(YP_NODE_TYPE_P(target, YP_NODE_INSTANCE_VARIABLE_READ_NODE));
2554
2626
  assert(operator->type == YP_TOKEN_AMPERSAND_AMPERSAND_EQUAL);
2555
- yp_instance_variable_operator_and_write_node_t *node = YP_ALLOC_NODE(parser, yp_instance_variable_operator_and_write_node_t);
2627
+ yp_instance_variable_and_write_node_t *node = YP_ALLOC_NODE(parser, yp_instance_variable_and_write_node_t);
2556
2628
 
2557
- *node = (yp_instance_variable_operator_and_write_node_t) {
2629
+ *node = (yp_instance_variable_and_write_node_t) {
2558
2630
  {
2559
- .type = YP_NODE_INSTANCE_VARIABLE_OPERATOR_AND_WRITE_NODE,
2631
+ .type = YP_NODE_INSTANCE_VARIABLE_AND_WRITE_NODE,
2560
2632
  .location = {
2561
2633
  .start = target->location.start,
2562
2634
  .end = value->location.end
@@ -2592,16 +2664,16 @@ yp_instance_variable_operator_write_node_create(yp_parser_t *parser, yp_node_t *
2592
2664
  return node;
2593
2665
  }
2594
2666
 
2595
- // Allocate and initialize a new InstanceVariableOperatorOrWriteNode node.
2596
- static yp_instance_variable_operator_or_write_node_t *
2597
- yp_instance_variable_operator_or_write_node_create(yp_parser_t *parser, yp_node_t *target, const yp_token_t *operator, yp_node_t *value) {
2667
+ // Allocate and initialize a new InstanceVariableOrWriteNode node.
2668
+ static yp_instance_variable_or_write_node_t *
2669
+ yp_instance_variable_or_write_node_create(yp_parser_t *parser, yp_node_t *target, const yp_token_t *operator, yp_node_t *value) {
2598
2670
  assert(YP_NODE_TYPE_P(target, YP_NODE_INSTANCE_VARIABLE_READ_NODE));
2599
2671
  assert(operator->type == YP_TOKEN_PIPE_PIPE_EQUAL);
2600
- yp_instance_variable_operator_or_write_node_t *node = YP_ALLOC_NODE(parser, yp_instance_variable_operator_or_write_node_t);
2672
+ yp_instance_variable_or_write_node_t *node = YP_ALLOC_NODE(parser, yp_instance_variable_or_write_node_t);
2601
2673
 
2602
- *node = (yp_instance_variable_operator_or_write_node_t) {
2674
+ *node = (yp_instance_variable_or_write_node_t) {
2603
2675
  {
2604
- .type = YP_NODE_INSTANCE_VARIABLE_OPERATOR_OR_WRITE_NODE,
2676
+ .type = YP_NODE_INSTANCE_VARIABLE_OR_WRITE_NODE,
2605
2677
  .location = {
2606
2678
  .start = target->location.start,
2607
2679
  .end = value->location.end
@@ -2637,7 +2709,7 @@ yp_instance_variable_write_node_create(yp_parser_t *parser, yp_instance_variable
2637
2709
  .type = YP_NODE_INSTANCE_VARIABLE_WRITE_NODE,
2638
2710
  .location = {
2639
2711
  .start = read_node->base.location.start,
2640
- .end = value == NULL ? read_node->base.location.end : value->location.end
2712
+ .end = value->location.end
2641
2713
  }
2642
2714
  },
2643
2715
  .name_loc = YP_LOCATION_NODE_BASE_VALUE(read_node),
@@ -2706,6 +2778,10 @@ yp_interpolated_string_node_create(yp_parser_t *parser, const yp_token_t *openin
2706
2778
  // Append a part to an InterpolatedStringNode node.
2707
2779
  static inline void
2708
2780
  yp_interpolated_string_node_append(yp_interpolated_string_node_t *node, yp_node_t *part) {
2781
+ if (node->parts.size == 0 && node->opening_loc.start == NULL) {
2782
+ node->base.location.start = part->location.start;
2783
+ }
2784
+
2709
2785
  yp_node_list_append(&node->parts, part);
2710
2786
  node->base.location.end = part->location.end;
2711
2787
  }
@@ -2747,12 +2823,6 @@ yp_interpolated_symbol_node_append(yp_interpolated_symbol_node_t *node, yp_node_
2747
2823
  node->base.location.end = part->location.end;
2748
2824
  }
2749
2825
 
2750
- static inline void
2751
- yp_interpolated_symbol_node_closing_set(yp_interpolated_symbol_node_t *node, const yp_token_t *closing) {
2752
- node->closing_loc = YP_OPTIONAL_LOCATION_TOKEN_VALUE(closing);
2753
- node->base.location.end = closing->end;
2754
- }
2755
-
2756
2826
  // Allocate a new InterpolatedXStringNode node.
2757
2827
  static yp_interpolated_x_string_node_t *
2758
2828
  yp_interpolated_xstring_node_create(yp_parser_t *parser, const yp_token_t *opening, const yp_token_t *closing) {
@@ -2860,10 +2930,11 @@ static yp_lambda_node_t *
2860
2930
  yp_lambda_node_create(
2861
2931
  yp_parser_t *parser,
2862
2932
  yp_constant_id_list_t *locals,
2933
+ const yp_token_t *operator,
2863
2934
  const yp_token_t *opening,
2935
+ const yp_token_t *closing,
2864
2936
  yp_block_parameters_node_t *parameters,
2865
- yp_node_t *statements,
2866
- const yp_token_t *closing
2937
+ yp_node_t *body
2867
2938
  ) {
2868
2939
  yp_lambda_node_t *node = YP_ALLOC_NODE(parser, yp_lambda_node_t);
2869
2940
 
@@ -2871,29 +2942,31 @@ yp_lambda_node_create(
2871
2942
  {
2872
2943
  .type = YP_NODE_LAMBDA_NODE,
2873
2944
  .location = {
2874
- .start = opening->start,
2945
+ .start = operator->start,
2875
2946
  .end = closing->end
2876
2947
  },
2877
2948
  },
2878
2949
  .locals = *locals,
2950
+ .operator_loc = YP_LOCATION_TOKEN_VALUE(operator),
2879
2951
  .opening_loc = YP_LOCATION_TOKEN_VALUE(opening),
2952
+ .closing_loc = YP_LOCATION_TOKEN_VALUE(closing),
2880
2953
  .parameters = parameters,
2881
- .statements = statements
2954
+ .body = body
2882
2955
  };
2883
2956
 
2884
2957
  return node;
2885
2958
  }
2886
2959
 
2887
- // Allocate and initialize a new LocalVariableOperatorAndWriteNode node.
2888
- static yp_local_variable_operator_and_write_node_t *
2889
- yp_local_variable_operator_and_write_node_create(yp_parser_t *parser, yp_node_t *target, const yp_token_t *operator, yp_node_t *value, yp_constant_id_t constant_id) {
2960
+ // Allocate and initialize a new LocalVariableAndWriteNode node.
2961
+ static yp_local_variable_and_write_node_t *
2962
+ yp_local_variable_and_write_node_create(yp_parser_t *parser, yp_node_t *target, const yp_token_t *operator, yp_node_t *value, yp_constant_id_t constant_id, uint32_t depth) {
2890
2963
  assert(YP_NODE_TYPE_P(target, YP_NODE_LOCAL_VARIABLE_READ_NODE) || YP_NODE_TYPE_P(target, YP_NODE_CALL_NODE));
2891
2964
  assert(operator->type == YP_TOKEN_AMPERSAND_AMPERSAND_EQUAL);
2892
- yp_local_variable_operator_and_write_node_t *node = YP_ALLOC_NODE(parser, yp_local_variable_operator_and_write_node_t);
2965
+ yp_local_variable_and_write_node_t *node = YP_ALLOC_NODE(parser, yp_local_variable_and_write_node_t);
2893
2966
 
2894
- *node = (yp_local_variable_operator_and_write_node_t) {
2967
+ *node = (yp_local_variable_and_write_node_t) {
2895
2968
  {
2896
- .type = YP_NODE_LOCAL_VARIABLE_OPERATOR_AND_WRITE_NODE,
2969
+ .type = YP_NODE_LOCAL_VARIABLE_AND_WRITE_NODE,
2897
2970
  .location = {
2898
2971
  .start = target->location.start,
2899
2972
  .end = value->location.end
@@ -2902,7 +2975,8 @@ yp_local_variable_operator_and_write_node_create(yp_parser_t *parser, yp_node_t
2902
2975
  .name_loc = target->location,
2903
2976
  .operator_loc = YP_LOCATION_TOKEN_VALUE(operator),
2904
2977
  .value = value,
2905
- .constant_id = constant_id
2978
+ .constant_id = constant_id,
2979
+ .depth = depth
2906
2980
  };
2907
2981
 
2908
2982
  return node;
@@ -2910,7 +2984,7 @@ yp_local_variable_operator_and_write_node_create(yp_parser_t *parser, yp_node_t
2910
2984
 
2911
2985
  // Allocate and initialize a new LocalVariableOperatorWriteNode node.
2912
2986
  static yp_local_variable_operator_write_node_t *
2913
- yp_local_variable_operator_write_node_create(yp_parser_t *parser, yp_node_t *target, const yp_token_t *operator, yp_node_t *value, yp_constant_id_t constant_id) {
2987
+ yp_local_variable_operator_write_node_create(yp_parser_t *parser, yp_node_t *target, const yp_token_t *operator, yp_node_t *value, yp_constant_id_t constant_id, uint32_t depth) {
2914
2988
  yp_local_variable_operator_write_node_t *node = YP_ALLOC_NODE(parser, yp_local_variable_operator_write_node_t);
2915
2989
 
2916
2990
  *node = (yp_local_variable_operator_write_node_t) {
@@ -2925,22 +2999,23 @@ yp_local_variable_operator_write_node_create(yp_parser_t *parser, yp_node_t *tar
2925
2999
  .operator_loc = YP_LOCATION_TOKEN_VALUE(operator),
2926
3000
  .value = value,
2927
3001
  .constant_id = constant_id,
2928
- .operator_id = yp_parser_constant_id_location(parser, operator->start, operator->end - 1)
3002
+ .operator_id = yp_parser_constant_id_location(parser, operator->start, operator->end - 1),
3003
+ .depth = depth
2929
3004
  };
2930
3005
 
2931
3006
  return node;
2932
3007
  }
2933
3008
 
2934
- // Allocate and initialize a new LocalVariableOperatorOrWriteNode node.
2935
- static yp_local_variable_operator_or_write_node_t *
2936
- yp_local_variable_operator_or_write_node_create(yp_parser_t *parser, yp_node_t *target, const yp_token_t *operator, yp_node_t *value, yp_constant_id_t constant_id) {
3009
+ // Allocate and initialize a new LocalVariableOrWriteNode node.
3010
+ static yp_local_variable_or_write_node_t *
3011
+ yp_local_variable_or_write_node_create(yp_parser_t *parser, yp_node_t *target, const yp_token_t *operator, yp_node_t *value, yp_constant_id_t constant_id, uint32_t depth) {
2937
3012
  assert(YP_NODE_TYPE_P(target, YP_NODE_LOCAL_VARIABLE_READ_NODE) || YP_NODE_TYPE_P(target, YP_NODE_CALL_NODE));
2938
3013
  assert(operator->type == YP_TOKEN_PIPE_PIPE_EQUAL);
2939
- yp_local_variable_operator_or_write_node_t *node = YP_ALLOC_NODE(parser, yp_local_variable_operator_or_write_node_t);
3014
+ yp_local_variable_or_write_node_t *node = YP_ALLOC_NODE(parser, yp_local_variable_or_write_node_t);
2940
3015
 
2941
- *node = (yp_local_variable_operator_or_write_node_t) {
3016
+ *node = (yp_local_variable_or_write_node_t) {
2942
3017
  {
2943
- .type = YP_NODE_LOCAL_VARIABLE_OPERATOR_OR_WRITE_NODE,
3018
+ .type = YP_NODE_LOCAL_VARIABLE_OR_WRITE_NODE,
2944
3019
  .location = {
2945
3020
  .start = target->location.start,
2946
3021
  .end = value->location.end
@@ -2949,7 +3024,8 @@ yp_local_variable_operator_or_write_node_create(yp_parser_t *parser, yp_node_t *
2949
3024
  .name_loc = target->location,
2950
3025
  .operator_loc = YP_LOCATION_TOKEN_VALUE(operator),
2951
3026
  .value = value,
2952
- .constant_id = constant_id
3027
+ .constant_id = constant_id,
3028
+ .depth = depth
2953
3029
  };
2954
3030
 
2955
3031
  return node;
@@ -2982,7 +3058,7 @@ yp_local_variable_write_node_create(yp_parser_t *parser, yp_constant_id_t consta
2982
3058
  .type = YP_NODE_LOCAL_VARIABLE_WRITE_NODE,
2983
3059
  .location = {
2984
3060
  .start = name_loc->start,
2985
- .end = value == NULL ? name_loc->end : value->location.end
3061
+ .end = value->location.end
2986
3062
  }
2987
3063
  },
2988
3064
  .constant_id = constant_id,
@@ -2995,21 +3071,18 @@ yp_local_variable_write_node_create(yp_parser_t *parser, yp_constant_id_t consta
2995
3071
  return node;
2996
3072
  }
2997
3073
 
2998
- // Allocate and initialize a new LocalVariableWriteNode node without an operator or target.
2999
- static yp_local_variable_write_node_t *
3074
+ // Allocate and initialize a new LocalVariableTargetNode node.
3075
+ static yp_local_variable_target_node_t *
3000
3076
  yp_local_variable_target_node_create(yp_parser_t *parser, const yp_token_t *name) {
3001
- yp_local_variable_write_node_t *node = YP_ALLOC_NODE(parser, yp_local_variable_write_node_t);
3077
+ yp_local_variable_target_node_t *node = YP_ALLOC_NODE(parser, yp_local_variable_target_node_t);
3002
3078
 
3003
- *node = (yp_local_variable_write_node_t) {
3079
+ *node = (yp_local_variable_target_node_t) {
3004
3080
  {
3005
- .type = YP_NODE_LOCAL_VARIABLE_WRITE_NODE,
3081
+ .type = YP_NODE_LOCAL_VARIABLE_TARGET_NODE,
3006
3082
  .location = YP_LOCATION_TOKEN_VALUE(name)
3007
3083
  },
3008
3084
  .constant_id = yp_parser_constant_id_token(parser, name),
3009
- .depth = 0,
3010
- .value = NULL,
3011
- .name_loc = YP_LOCATION_TOKEN_VALUE(name),
3012
- .operator_loc = { .start = NULL, .end = NULL }
3085
+ .depth = 0
3013
3086
  };
3014
3087
 
3015
3088
  return node;
@@ -3059,7 +3132,7 @@ yp_match_required_node_create(yp_parser_t *parser, yp_node_t *value, yp_node_t *
3059
3132
 
3060
3133
  // Allocate a new ModuleNode node.
3061
3134
  static yp_module_node_t *
3062
- yp_module_node_create(yp_parser_t *parser, yp_constant_id_list_t *locals, const yp_token_t *module_keyword, yp_node_t *constant_path, yp_node_t *statements, const yp_token_t *end_keyword) {
3135
+ yp_module_node_create(yp_parser_t *parser, yp_constant_id_list_t *locals, const yp_token_t *module_keyword, yp_node_t *constant_path, const yp_token_t *name, yp_node_t *body, const yp_token_t *end_keyword) {
3063
3136
  yp_module_node_t *node = YP_ALLOC_NODE(parser, yp_module_node_t);
3064
3137
 
3065
3138
  *node = (yp_module_node_t) {
@@ -3073,10 +3146,12 @@ yp_module_node_create(yp_parser_t *parser, yp_constant_id_list_t *locals, const
3073
3146
  .locals = (locals == NULL ? ((yp_constant_id_list_t) { .ids = NULL, .size = 0, .capacity = 0 }) : *locals),
3074
3147
  .module_keyword_loc = YP_LOCATION_TOKEN_VALUE(module_keyword),
3075
3148
  .constant_path = constant_path,
3076
- .statements = statements,
3077
- .end_keyword_loc = YP_LOCATION_TOKEN_VALUE(end_keyword)
3149
+ .body = body,
3150
+ .end_keyword_loc = YP_LOCATION_TOKEN_VALUE(end_keyword),
3151
+ .name = YP_EMPTY_STRING
3078
3152
  };
3079
3153
 
3154
+ yp_string_shared_init(&node->name, name->start, name->end);
3080
3155
  return node;
3081
3156
  }
3082
3157
 
@@ -3088,7 +3163,10 @@ yp_multi_write_node_create(yp_parser_t *parser, const yp_token_t *operator, yp_n
3088
3163
  *node = (yp_multi_write_node_t) {
3089
3164
  {
3090
3165
  .type = YP_NODE_MULTI_WRITE_NODE,
3091
- .location = { .start = NULL, .end = NULL },
3166
+ .location = {
3167
+ .start = lparen_loc->start,
3168
+ .end = value == NULL ? rparen_loc->end : value->location.end
3169
+ },
3092
3170
  },
3093
3171
  .operator_loc = YP_OPTIONAL_LOCATION_TOKEN_VALUE(operator),
3094
3172
  .value = value,
@@ -3343,7 +3421,7 @@ yp_program_node_create(yp_parser_t *parser, yp_constant_id_list_t *locals, yp_st
3343
3421
 
3344
3422
  // Allocate and initialize new ParenthesesNode node.
3345
3423
  static yp_parentheses_node_t *
3346
- yp_parentheses_node_create(yp_parser_t *parser, const yp_token_t *opening, yp_node_t *statements, const yp_token_t *closing) {
3424
+ yp_parentheses_node_create(yp_parser_t *parser, const yp_token_t *opening, yp_node_t *body, const yp_token_t *closing) {
3347
3425
  yp_parentheses_node_t *node = YP_ALLOC_NODE(parser, yp_parentheses_node_t);
3348
3426
 
3349
3427
  *node = (yp_parentheses_node_t) {
@@ -3354,7 +3432,7 @@ yp_parentheses_node_create(yp_parser_t *parser, const yp_token_t *opening, yp_no
3354
3432
  .end = closing->end
3355
3433
  }
3356
3434
  },
3357
- .statements = statements,
3435
+ .body = body,
3358
3436
  .opening_loc = YP_LOCATION_TOKEN_VALUE(opening),
3359
3437
  .closing_loc = YP_LOCATION_TOKEN_VALUE(closing)
3360
3438
  };
@@ -3700,7 +3778,7 @@ yp_self_node_create(yp_parser_t *parser, const yp_token_t *token) {
3700
3778
 
3701
3779
  // Allocate a new SingletonClassNode node.
3702
3780
  static yp_singleton_class_node_t *
3703
- yp_singleton_class_node_create(yp_parser_t *parser, yp_constant_id_list_t *locals, const yp_token_t *class_keyword, const yp_token_t *operator, yp_node_t *expression, yp_node_t *statements, const yp_token_t *end_keyword) {
3781
+ yp_singleton_class_node_create(yp_parser_t *parser, yp_constant_id_list_t *locals, const yp_token_t *class_keyword, const yp_token_t *operator, yp_node_t *expression, yp_node_t *body, const yp_token_t *end_keyword) {
3704
3782
  yp_singleton_class_node_t *node = YP_ALLOC_NODE(parser, yp_singleton_class_node_t);
3705
3783
 
3706
3784
  *node = (yp_singleton_class_node_t) {
@@ -3715,7 +3793,7 @@ yp_singleton_class_node_create(yp_parser_t *parser, yp_constant_id_list_t *local
3715
3793
  .class_keyword_loc = YP_LOCATION_TOKEN_VALUE(class_keyword),
3716
3794
  .operator_loc = YP_LOCATION_TOKEN_VALUE(operator),
3717
3795
  .expression = expression,
3718
- .statements = statements,
3796
+ .body = body,
3719
3797
  .end_keyword_loc = YP_LOCATION_TOKEN_VALUE(end_keyword)
3720
3798
  };
3721
3799
 
@@ -3934,10 +4012,10 @@ yp_symbol_node_label_create(yp_parser_t *parser, const yp_token_t *token) {
3934
4012
  yp_token_t label = { .type = YP_TOKEN_LABEL, .start = token->start, .end = token->end - 1 };
3935
4013
  node = yp_symbol_node_create(parser, &opening, &label, &closing);
3936
4014
 
3937
- ptrdiff_t length = label.end - label.start;
3938
- assert(length >= 0);
4015
+ assert((label.end - label.start) >= 0);
4016
+ yp_string_shared_init(&node->unescaped, label.start, label.end);
3939
4017
 
3940
- yp_unescape_manipulate_string(parser, label.start, (size_t) length, &node->unescaped, YP_UNESCAPE_ALL, &parser->error_list);
4018
+ yp_unescape_manipulate_string(parser, &node->unescaped, YP_UNESCAPE_ALL);
3941
4019
  break;
3942
4020
  }
3943
4021
  case YP_TOKEN_MISSING: {
@@ -3978,20 +4056,20 @@ yp_symbol_node_label_p(yp_node_t *node) {
3978
4056
 
3979
4057
  // Convert the given StringNode node to a SymbolNode node.
3980
4058
  static yp_symbol_node_t *
3981
- yp_string_node_to_symbol_node(yp_parser_t *parser, yp_string_node_t *node) {
4059
+ yp_string_node_to_symbol_node(yp_parser_t *parser, yp_string_node_t *node, const yp_token_t *opening, const yp_token_t *closing) {
3982
4060
  yp_symbol_node_t *new_node = YP_ALLOC_NODE(parser, yp_symbol_node_t);
3983
4061
 
3984
4062
  *new_node = (yp_symbol_node_t) {
3985
4063
  {
3986
4064
  .type = YP_NODE_SYMBOL_NODE,
3987
4065
  .location = {
3988
- .start = node->base.location.start - 2,
3989
- .end = node->base.location.end + 1
4066
+ .start = opening->start,
4067
+ .end = closing->end
3990
4068
  }
3991
4069
  },
3992
- .opening_loc = node->opening_loc,
4070
+ .opening_loc = YP_OPTIONAL_LOCATION_TOKEN_VALUE(opening),
3993
4071
  .value_loc = node->content_loc,
3994
- .closing_loc = node->closing_loc,
4072
+ .closing_loc = YP_OPTIONAL_LOCATION_TOKEN_VALUE(closing),
3995
4073
  .unescaped = node->unescaped
3996
4074
  };
3997
4075
 
@@ -4130,34 +4208,43 @@ yp_unless_node_end_keyword_loc_set(yp_unless_node_t *node, const yp_token_t *end
4130
4208
 
4131
4209
  // Allocate a new UntilNode node.
4132
4210
  static yp_until_node_t *
4133
- yp_until_node_create(yp_parser_t *parser, const yp_token_t *keyword, yp_node_t *predicate, yp_statements_node_t *statements, yp_node_flags_t flags) {
4211
+ yp_until_node_create(yp_parser_t *parser, const yp_token_t *keyword, const yp_token_t *closing, yp_node_t *predicate, yp_statements_node_t *statements, yp_node_flags_t flags) {
4134
4212
  yp_until_node_t *node = YP_ALLOC_NODE(parser, yp_until_node_t);
4135
- bool has_statements = (statements != NULL) && (statements->body.size != 0);
4136
4213
 
4137
- const char *start = NULL;
4138
- if (has_statements && (keyword->start > statements->base.location.start)) {
4139
- start = statements->base.location.start;
4140
- } else {
4141
- start = keyword->start;
4142
- }
4214
+ *node = (yp_until_node_t) {
4215
+ {
4216
+ .type = YP_NODE_UNTIL_NODE,
4217
+ .flags = flags,
4218
+ .location = {
4219
+ .start = keyword->start,
4220
+ .end = closing->end,
4221
+ },
4222
+ },
4223
+ .keyword_loc = YP_LOCATION_TOKEN_VALUE(keyword),
4224
+ .closing_loc = YP_OPTIONAL_LOCATION_TOKEN_VALUE(closing),
4225
+ .predicate = predicate,
4226
+ .statements = statements
4227
+ };
4143
4228
 
4144
- const char *end = NULL;
4145
- if (has_statements && (predicate->location.end < statements->base.location.end)) {
4146
- end = statements->base.location.end;
4147
- } else {
4148
- end = predicate->location.end;
4149
- }
4229
+ return node;
4230
+ }
4231
+
4232
+ // Allocate a new UntilNode node.
4233
+ static yp_until_node_t *
4234
+ yp_until_node_modifier_create(yp_parser_t *parser, const yp_token_t *keyword, yp_node_t *predicate, yp_statements_node_t *statements, yp_node_flags_t flags) {
4235
+ yp_until_node_t *node = YP_ALLOC_NODE(parser, yp_until_node_t);
4150
4236
 
4151
4237
  *node = (yp_until_node_t) {
4152
4238
  {
4153
4239
  .type = YP_NODE_UNTIL_NODE,
4154
4240
  .flags = flags,
4155
4241
  .location = {
4156
- .start = start,
4157
- .end = end,
4242
+ .start = statements->base.location.start,
4243
+ .end = predicate->location.end,
4158
4244
  },
4159
4245
  },
4160
4246
  .keyword_loc = YP_LOCATION_TOKEN_VALUE(keyword),
4247
+ .closing_loc = YP_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE,
4161
4248
  .predicate = predicate,
4162
4249
  .statements = statements
4163
4250
  };
@@ -4205,34 +4292,43 @@ yp_when_node_statements_set(yp_when_node_t *node, yp_statements_node_t *statemen
4205
4292
 
4206
4293
  // Allocate a new WhileNode node.
4207
4294
  static yp_while_node_t *
4208
- yp_while_node_create(yp_parser_t *parser, const yp_token_t *keyword, yp_node_t *predicate, yp_statements_node_t *statements, yp_node_flags_t flags) {
4295
+ yp_while_node_create(yp_parser_t *parser, const yp_token_t *keyword, const yp_token_t *closing, yp_node_t *predicate, yp_statements_node_t *statements, yp_node_flags_t flags) {
4209
4296
  yp_while_node_t *node = YP_ALLOC_NODE(parser, yp_while_node_t);
4210
4297
 
4211
- const char *start = NULL;
4212
- bool has_statements = (statements != NULL) && (statements->body.size != 0);
4213
- if (has_statements && (keyword->start > statements->base.location.start)) {
4214
- start = statements->base.location.start;
4215
- } else {
4216
- start = keyword->start;
4217
- }
4298
+ *node = (yp_while_node_t) {
4299
+ {
4300
+ .type = YP_NODE_WHILE_NODE,
4301
+ .flags = flags,
4302
+ .location = {
4303
+ .start = keyword->start,
4304
+ .end = closing->end
4305
+ },
4306
+ },
4307
+ .keyword_loc = YP_LOCATION_TOKEN_VALUE(keyword),
4308
+ .closing_loc = YP_OPTIONAL_LOCATION_TOKEN_VALUE(closing),
4309
+ .predicate = predicate,
4310
+ .statements = statements
4311
+ };
4218
4312
 
4219
- const char *end = NULL;
4220
- if (has_statements && (predicate->location.end < statements->base.location.end)) {
4221
- end = statements->base.location.end;
4222
- } else {
4223
- end = predicate->location.end;
4224
- }
4313
+ return node;
4314
+ }
4315
+
4316
+ // Allocate a new WhileNode node.
4317
+ static yp_while_node_t *
4318
+ yp_while_node_modifier_create(yp_parser_t *parser, const yp_token_t *keyword, yp_node_t *predicate, yp_statements_node_t *statements, yp_node_flags_t flags) {
4319
+ yp_while_node_t *node = YP_ALLOC_NODE(parser, yp_while_node_t);
4225
4320
 
4226
4321
  *node = (yp_while_node_t) {
4227
4322
  {
4228
4323
  .type = YP_NODE_WHILE_NODE,
4229
4324
  .flags = flags,
4230
4325
  .location = {
4231
- .start = start,
4232
- .end = end,
4326
+ .start = statements->base.location.start,
4327
+ .end = predicate->location.end
4233
4328
  },
4234
4329
  },
4235
4330
  .keyword_loc = YP_LOCATION_TOKEN_VALUE(keyword),
4331
+ .closing_loc = YP_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE,
4236
4332
  .predicate = predicate,
4237
4333
  .statements = statements
4238
4334
  };
@@ -4340,13 +4436,15 @@ yp_parser_local_depth(yp_parser_t *parser, yp_token_t *token) {
4340
4436
  }
4341
4437
 
4342
4438
  // Add a local variable from a location to the current scope.
4343
- static void
4439
+ static yp_constant_id_t
4344
4440
  yp_parser_local_add_location(yp_parser_t *parser, const char *start, const char *end) {
4345
4441
  yp_constant_id_t constant_id = yp_parser_constant_id_location(parser, start, end);
4346
4442
 
4347
4443
  if (!yp_constant_id_list_includes(&parser->current_scope->locals, constant_id)) {
4348
4444
  yp_constant_id_list_append(&parser->current_scope->locals, constant_id);
4349
4445
  }
4446
+
4447
+ return constant_id;
4350
4448
  }
4351
4449
 
4352
4450
  // Add a local variable from a token to the current scope.
@@ -4494,27 +4592,30 @@ yp_do_loop_stack_p(yp_parser_t *parser) {
4494
4592
  /* Lexer check helpers */
4495
4593
  /******************************************************************************/
4496
4594
 
4497
- // Get the next character in the source starting from parser->current.end and
4498
- // adding the given offset. If that position is beyond the end of the source
4499
- // then return '\0'.
4595
+ // Get the next character in the source starting from +cursor+. If that position
4596
+ // is beyond the end of the source then return '\0'.
4500
4597
  static inline char
4501
- peek_at(yp_parser_t *parser, size_t offset) {
4502
- if (parser->current.end + offset < parser->end) {
4503
- return parser->current.end[offset];
4598
+ peek_at(yp_parser_t *parser, const char *cursor) {
4599
+ if (cursor < parser->end) {
4600
+ return *cursor;
4504
4601
  } else {
4505
4602
  return '\0';
4506
4603
  }
4507
4604
  }
4508
4605
 
4606
+ // Get the next character in the source starting from parser->current.end and
4607
+ // adding the given offset. If that position is beyond the end of the source
4608
+ // then return '\0'.
4609
+ static inline char
4610
+ peek_offset(yp_parser_t *parser, ptrdiff_t offset) {
4611
+ return peek_at(parser, parser->current.end + offset);
4612
+ }
4613
+
4509
4614
  // Get the next character in the source starting from parser->current.end. If
4510
4615
  // that position is beyond the end of the source then return '\0'.
4511
4616
  static inline char
4512
4617
  peek(yp_parser_t *parser) {
4513
- if (parser->current.end < parser->end) {
4514
- return *parser->current.end;
4515
- } else {
4516
- return '\0';
4517
- }
4618
+ return peek_at(parser, parser->current.end);
4518
4619
  }
4519
4620
 
4520
4621
  // Get the next string of length len in the source starting from parser->current.end.
@@ -4539,6 +4640,35 @@ match(yp_parser_t *parser, char value) {
4539
4640
  return false;
4540
4641
  }
4541
4642
 
4643
+ // Return the length of the line ending string starting at +cursor+, or 0 if it
4644
+ // is not a line ending. This function is intended to be CRLF/LF agnostic.
4645
+ static inline size_t
4646
+ match_eol_at(yp_parser_t *parser, const char *cursor) {
4647
+ if (peek_at(parser, cursor) == '\n') {
4648
+ return 1;
4649
+ }
4650
+ if (peek_at(parser, cursor) == '\r' && peek_at(parser, cursor + 1) == '\n') {
4651
+ return 2;
4652
+ }
4653
+ return 0;
4654
+ }
4655
+
4656
+ // Return the length of the line ending string starting at
4657
+ // parser->current.end + offset, or 0 if it is not a line ending. This function
4658
+ // is intended to be CRLF/LF agnostic.
4659
+ static inline size_t
4660
+ match_eol_offset(yp_parser_t *parser, ptrdiff_t offset) {
4661
+ return match_eol_at(parser, parser->current.end + offset);
4662
+ }
4663
+
4664
+ // Return the length of the line ending string starting at parser->current.end,
4665
+ // or 0 if it is not a line ending. This function is intended to be CRLF/LF
4666
+ // agnostic.
4667
+ static inline size_t
4668
+ match_eol(yp_parser_t *parser) {
4669
+ return match_eol_at(parser, parser->current.end);
4670
+ }
4671
+
4542
4672
  // Skip to the next newline character or NUL byte.
4543
4673
  static inline const char *
4544
4674
  next_newline(const char *cursor, ptrdiff_t length) {
@@ -4562,11 +4692,13 @@ parser_lex_encoding_comment_start(yp_parser_t *parser, const char *cursor, ptrdi
4562
4692
 
4563
4693
  const char *cursor_limit = cursor + length - key_length + 1;
4564
4694
  while ((cursor = yp_memchr(cursor, 'c', (size_t) (cursor_limit - cursor), parser->encoding_changed, &parser->encoding)) != NULL) {
4565
- if (
4566
- (strncmp(cursor, "coding", key_length - 1) == 0) &&
4567
- (cursor[key_length - 1] == ':' || cursor[key_length - 1] == '=')
4568
- ) {
4569
- return cursor + key_length;
4695
+ if (strncmp(cursor, "coding", key_length - 1) == 0) {
4696
+ size_t whitespace_after_coding = yp_strspn_inline_whitespace(cursor + key_length - 1, parser->end - (cursor + key_length - 1));
4697
+ size_t cur_pos = key_length + whitespace_after_coding;
4698
+
4699
+ if (cursor[cur_pos - 1] == ':' || cursor[cur_pos - 1] == '=') {
4700
+ return cursor + cur_pos;
4701
+ }
4570
4702
  }
4571
4703
 
4572
4704
  cursor++;
@@ -4822,7 +4954,7 @@ lex_optional_float_suffix(yp_parser_t *parser) {
4822
4954
  // Here we're going to attempt to parse the optional decimal portion of a
4823
4955
  // float. If it's not there, then it's okay and we'll just continue on.
4824
4956
  if (peek(parser) == '.') {
4825
- if (yp_char_is_decimal_digit(peek_at(parser, 1))) {
4957
+ if (yp_char_is_decimal_digit(peek_offset(parser, 1))) {
4826
4958
  parser->current.end += 2;
4827
4959
  parser->current.end += yp_strspn_decimal_number(parser->current.end, parser->end - parser->current.end);
4828
4960
  type = YP_TOKEN_FLOAT;
@@ -4855,7 +4987,7 @@ static yp_token_type_t
4855
4987
  lex_numeric_prefix(yp_parser_t *parser) {
4856
4988
  yp_token_type_t type = YP_TOKEN_INTEGER;
4857
4989
 
4858
- if (parser->current.end[-1] == '0') {
4990
+ if (peek_offset(parser, -1) == '0') {
4859
4991
  switch (*parser->current.end) {
4860
4992
  // 0d1111 is a decimal number
4861
4993
  case 'd':
@@ -4938,7 +5070,7 @@ lex_numeric_prefix(yp_parser_t *parser) {
4938
5070
 
4939
5071
  // If the last character that we consumed was an underscore, then this is
4940
5072
  // actually an invalid integer value, and we should return an invalid token.
4941
- if (parser->current.end[-1] == '_') {
5073
+ if (peek_offset(parser, -1) == '_') {
4942
5074
  yp_diagnostic_list_append(&parser->error_list, parser->current.start, parser->current.end, "Number literal cannot end with a `_`.");
4943
5075
  }
4944
5076
 
@@ -5119,7 +5251,7 @@ lex_identifier(yp_parser_t *parser, bool previous_command_start) {
5119
5251
 
5120
5252
  if (
5121
5253
  ((lex_state_p(parser, YP_LEX_STATE_LABEL | YP_LEX_STATE_ENDFN) && !previous_command_start) || lex_state_arg_p(parser)) &&
5122
- (peek(parser) == ':') && (peek_at(parser, 1) != ':')
5254
+ (peek(parser) == ':') && (peek_offset(parser, 1) != ':')
5123
5255
  ) {
5124
5256
  // If we're in a position where we can accept a : at the end of an
5125
5257
  // identifier, then we'll optionally accept it.
@@ -5135,7 +5267,7 @@ lex_identifier(yp_parser_t *parser, bool previous_command_start) {
5135
5267
  }
5136
5268
 
5137
5269
  return YP_TOKEN_IDENTIFIER;
5138
- } else if (lex_state_p(parser, YP_LEX_STATE_FNAME) && peek_at(parser, 1) != '~' && peek_at(parser, 1) != '>' && (peek_at(parser, 1) != '=' || peek_at(parser, 2) == '>') && match(parser, '=')) {
5270
+ } else if (lex_state_p(parser, YP_LEX_STATE_FNAME) && peek_offset(parser, 1) != '~' && peek_offset(parser, 1) != '>' && (peek_offset(parser, 1) != '=' || peek_offset(parser, 2) == '>') && match(parser, '=')) {
5139
5271
  // If we're in a position where we can accept a = at the end of an
5140
5272
  // identifier, then we'll optionally accept it.
5141
5273
  return YP_TOKEN_IDENTIFIER;
@@ -5143,7 +5275,7 @@ lex_identifier(yp_parser_t *parser, bool previous_command_start) {
5143
5275
 
5144
5276
  if (
5145
5277
  ((lex_state_p(parser, YP_LEX_STATE_LABEL | YP_LEX_STATE_ENDFN) && !previous_command_start) || lex_state_arg_p(parser)) &&
5146
- peek(parser) == ':' && peek_at(parser, 1) != ':'
5278
+ peek(parser) == ':' && peek_offset(parser, 1) != ':'
5147
5279
  ) {
5148
5280
  // If we're in a position where we can accept a : at the end of an
5149
5281
  // identifier, then we'll optionally accept it.
@@ -5411,7 +5543,7 @@ lex_question_mark(yp_parser_t *parser) {
5411
5543
 
5412
5544
  if (parser->current.start[1] == '\\') {
5413
5545
  lex_state_set(parser, YP_LEX_STATE_END);
5414
- parser->current.end += yp_unescape_calculate_difference(parser->current.start + 1, parser->end, YP_UNESCAPE_ALL, true, &parser->error_list);
5546
+ parser->current.end += yp_unescape_calculate_difference(parser, parser->current.start + 1, YP_UNESCAPE_ALL, true);
5415
5547
  return YP_TOKEN_CHARACTER_LITERAL;
5416
5548
  } else {
5417
5549
  size_t encoding_width = parser->encoding.char_width(parser->current.end, parser->end - parser->current.end);
@@ -5420,7 +5552,7 @@ lex_question_mark(yp_parser_t *parser) {
5420
5552
  // an underscore. We check for this case
5421
5553
  if (
5422
5554
  !(parser->encoding.alnum_char(parser->current.end, parser->end - parser->current.end) ||
5423
- *parser->current.end == '_') ||
5555
+ peek(parser) == '_') ||
5424
5556
  (
5425
5557
  (parser->current.end + encoding_width >= parser->end) ||
5426
5558
  !char_is_identifier(parser, parser->current.end + encoding_width)
@@ -5636,28 +5768,32 @@ parser_lex(yp_parser_t *parser) {
5636
5768
  space_seen = true;
5637
5769
  break;
5638
5770
  case '\r':
5639
- if (peek_at(parser, 1) == '\n') {
5771
+ if (match_eol_offset(parser, 1)) {
5640
5772
  chomping = false;
5641
5773
  } else {
5642
5774
  parser->current.end++;
5643
5775
  space_seen = true;
5644
5776
  }
5645
5777
  break;
5646
- case '\\':
5647
- if (peek_at(parser, 1) == '\n') {
5648
- yp_newline_list_append(&parser->newline_list, parser->current.end + 1);
5649
- parser->current.end += 2;
5650
- space_seen = true;
5651
- } else if (parser->current.end + 2 < parser->end && peek_at(parser, 1) == '\r' && peek_at(parser, 2) == '\n') {
5652
- yp_newline_list_append(&parser->newline_list, parser->current.end + 2);
5653
- parser->current.end += 3;
5654
- space_seen = true;
5778
+ case '\\': {
5779
+ size_t eol_length = match_eol_offset(parser, 1);
5780
+ if (eol_length) {
5781
+ if (parser->heredoc_end) {
5782
+ parser->current.end = parser->heredoc_end;
5783
+ parser->heredoc_end = NULL;
5784
+ } else {
5785
+ parser->current.end += eol_length + 1;
5786
+ yp_newline_list_append(&parser->newline_list, parser->current.end - 1);
5787
+ space_seen = true;
5788
+ }
5655
5789
  } else if (yp_char_is_inline_whitespace(*parser->current.end)) {
5656
5790
  parser->current.end += 2;
5657
5791
  } else {
5658
5792
  chomping = false;
5659
5793
  }
5794
+
5660
5795
  break;
5796
+ }
5661
5797
  default:
5662
5798
  chomping = false;
5663
5799
  break;
@@ -5667,13 +5803,14 @@ parser_lex(yp_parser_t *parser) {
5667
5803
  // Next, we'll set to start of this token to be the current end.
5668
5804
  parser->current.start = parser->current.end;
5669
5805
 
5670
- // We'll check if we're at the end of the file. If we are, then we need to
5671
- // return the EOF token.
5806
+ // We'll check if we're at the end of the file. If we are, then we
5807
+ // need to return the EOF token.
5672
5808
  if (parser->current.end >= parser->end) {
5673
5809
  LEX(YP_TOKEN_EOF);
5674
5810
  }
5675
5811
 
5676
- // Finally, we'll check the current character to determine the next token.
5812
+ // Finally, we'll check the current character to determine the next
5813
+ // token.
5677
5814
  switch (*parser->current.end++) {
5678
5815
  case '\0': // NUL or end of script
5679
5816
  case '\004': // ^D
@@ -5683,16 +5820,14 @@ parser_lex(yp_parser_t *parser) {
5683
5820
 
5684
5821
  case '#': { // comments
5685
5822
  const char *ending = next_newline(parser->current.end, parser->end - parser->current.end);
5686
- while (ending && ending < parser->end && *ending != '\n') {
5687
- ending = next_newline(ending + 1, parser->end - ending);
5688
- }
5689
5823
 
5690
5824
  parser->current.end = ending == NULL ? parser->end : ending + 1;
5691
5825
  parser->current.type = YP_TOKEN_COMMENT;
5692
5826
  parser_lex_callback(parser);
5693
5827
 
5694
- // If we found a comment while lexing, then we're going to add it to the
5695
- // list of comments in the file and keep lexing.
5828
+ // If we found a comment while lexing, then we're going to
5829
+ // add it to the list of comments in the file and keep
5830
+ // lexing.
5696
5831
  yp_comment_t *comment = parser_comment(parser, YP_COMMENT_INLINE);
5697
5832
  yp_list_append(&parser->comment_list, (yp_list_node_t *) comment);
5698
5833
 
@@ -5703,21 +5838,29 @@ parser_lex(yp_parser_t *parser) {
5703
5838
  lexed_comment = true;
5704
5839
  }
5705
5840
  /* fallthrough */
5706
- case '\r': {
5707
- // The only way you can have carriage returns in this particular loop
5708
- // is if you have a carriage return followed by a newline. In that
5709
- // case we'll just skip over the carriage return and continue lexing,
5710
- // in order to make it so that the newline token encapsulates both the
5711
- // carriage return and the newline. Note that we need to check that
5712
- // we haven't already lexed a comment here because that falls through
5713
- // into here as well.
5714
- if (!lexed_comment) parser->current.end++;
5715
- }
5716
- /* fallthrough */
5841
+ case '\r':
5717
5842
  case '\n': {
5718
- if (parser->heredoc_end == NULL) {
5719
- yp_newline_list_append(&parser->newline_list, parser->current.end - 1);
5720
- } else {
5843
+ size_t eol_length = match_eol_at(parser, parser->current.end - 1);
5844
+ if (eol_length) {
5845
+ // The only way you can have carriage returns in this
5846
+ // particular loop is if you have a carriage return
5847
+ // followed by a newline. In that case we'll just skip
5848
+ // over the carriage return and continue lexing, in
5849
+ // order to make it so that the newline token
5850
+ // encapsulates both the carriage return and the
5851
+ // newline. Note that we need to check that we haven't
5852
+ // already lexed a comment here because that falls
5853
+ // through into here as well.
5854
+ if (!lexed_comment) {
5855
+ parser->current.end += eol_length - 1; // skip CR
5856
+ }
5857
+
5858
+ if (parser->heredoc_end == NULL) {
5859
+ yp_newline_list_append(&parser->newline_list, parser->current.end - 1);
5860
+ }
5861
+ }
5862
+
5863
+ if (parser->heredoc_end) {
5721
5864
  parser_flush_heredoc_end(parser);
5722
5865
  }
5723
5866
 
@@ -5773,7 +5916,13 @@ parser_lex(yp_parser_t *parser) {
5773
5916
 
5774
5917
  // If the lex state was ignored, or we hit a '.' or a '&.',
5775
5918
  // we will lex the ignored newline
5776
- if (lex_state_ignored_p(parser) || (following && ((following[0] == '.') || (following + 1 < parser->end && following[0] == '&' && following[1] == '.')))) {
5919
+ if (
5920
+ lex_state_ignored_p(parser) ||
5921
+ (following && (
5922
+ (peek_at(parser, following) == '.') ||
5923
+ (peek_at(parser, following) == '&' && peek_at(parser, following + 1) == '.')
5924
+ ))
5925
+ ) {
5777
5926
  if (!lexed_comment) parser_lex_ignored_newline(parser);
5778
5927
  lexed_comment = false;
5779
5928
  goto lex_next_token;
@@ -5786,7 +5935,7 @@ parser_lex(yp_parser_t *parser) {
5786
5935
  // To match ripper, we need to emit an ignored newline even though
5787
5936
  // its a real newline in the case that we have a beginless range
5788
5937
  // on a subsequent line.
5789
- if ((next_content + 1 < parser->end) && (next_content[1] == '.')) {
5938
+ if (peek_at(parser, next_content + 1) == '.') {
5790
5939
  if (!lexed_comment) parser_lex_ignored_newline(parser);
5791
5940
  lex_state_set(parser, YP_LEX_STATE_BEG);
5792
5941
  parser->command_start = true;
@@ -5804,7 +5953,7 @@ parser_lex(yp_parser_t *parser) {
5804
5953
 
5805
5954
  // If we hit a &. after a newline, then we're in a call chain and
5806
5955
  // we need to return the call operator.
5807
- if (next_content + 1 < parser->end && next_content[0] == '&' && next_content[1] == '.') {
5956
+ if (peek_at(parser, next_content) == '&' && peek_at(parser, next_content + 1) == '.') {
5808
5957
  if (!lexed_comment) parser_lex_ignored_newline(parser);
5809
5958
  lex_state_set(parser, YP_LEX_STATE_DOT);
5810
5959
  parser->current.start = next_content;
@@ -6001,7 +6150,7 @@ parser_lex(yp_parser_t *parser) {
6001
6150
 
6002
6151
  // = => =~ == === =begin
6003
6152
  case '=':
6004
- if (current_token_starts_line(parser) && strncmp(peek_string(parser, 5), "begin", 5) == 0 && yp_char_is_whitespace(peek_at(parser, 5))) {
6153
+ if (current_token_starts_line(parser) && strncmp(peek_string(parser, 5), "begin", 5) == 0 && yp_char_is_whitespace(peek_offset(parser, 5))) {
6005
6154
  yp_token_type_t type = lex_embdoc(parser);
6006
6155
 
6007
6156
  if (type == YP_TOKEN_EOF) {
@@ -6425,13 +6574,13 @@ parser_lex(yp_parser_t *parser) {
6425
6574
  LEX(YP_TOKEN_COLON_COLON);
6426
6575
  }
6427
6576
 
6428
- if (lex_state_end_p(parser) || yp_char_is_whitespace(*parser->current.end) || (*parser->current.end == '#')) {
6577
+ if (lex_state_end_p(parser) || yp_char_is_whitespace(*parser->current.end) || peek(parser) == '#') {
6429
6578
  lex_state_set(parser, YP_LEX_STATE_BEG);
6430
6579
  LEX(YP_TOKEN_COLON);
6431
6580
  }
6432
6581
 
6433
- if ((*parser->current.end == '"') || (*parser->current.end == '\'')) {
6434
- lex_mode_push_string(parser, *parser->current.end == '"', false, '\0', *parser->current.end);
6582
+ if (peek(parser) == '"' || peek(parser) == '\'') {
6583
+ lex_mode_push_string(parser, peek(parser) == '"', false, '\0', *parser->current.end);
6435
6584
  parser->current.end++;
6436
6585
  }
6437
6586
 
@@ -6486,13 +6635,11 @@ parser_lex(yp_parser_t *parser) {
6486
6635
 
6487
6636
  // % %= %i %I %q %Q %w %W
6488
6637
  case '%': {
6489
- // In a BEG state, if you encounter a % then you must be
6490
- // starting something. In this case if there is no
6491
- // subsequent character then we have an invalid token. We're
6492
- // going to say it's the percent operator because we don't
6493
- // want to move into the string lex mode unnecessarily.
6494
- if (lex_state_beg_p(parser) && (parser->current.end >= parser->end)) {
6495
- yp_diagnostic_list_append(&parser->error_list, parser->current.start, parser->current.end, "unexpected end of input");
6638
+ // If there is no subsequent character then we have an invalid token. We're
6639
+ // going to say it's the percent operator because we don't want to move into the
6640
+ // string lex mode unnecessarily.
6641
+ if ((lex_state_beg_p(parser) || lex_state_arg_p(parser)) && (parser->current.end >= parser->end)) {
6642
+ yp_diagnostic_list_append(&parser->error_list, parser->current.start, parser->current.end, "Unexpected end of input");
6496
6643
  LEX(YP_TOKEN_PERCENT);
6497
6644
  }
6498
6645
 
@@ -6502,25 +6649,26 @@ parser_lex(yp_parser_t *parser) {
6502
6649
  }
6503
6650
  else if(
6504
6651
  lex_state_beg_p(parser) ||
6505
- (lex_state_p(parser, YP_LEX_STATE_FITEM) && (*parser->current.end == 's')) ||
6652
+ (lex_state_p(parser, YP_LEX_STATE_FITEM) && (peek(parser) == 's')) ||
6506
6653
  lex_state_spcarg_p(parser, space_seen)
6507
6654
  ) {
6508
6655
  if (!parser->encoding.alnum_char(parser->current.end, parser->end - parser->current.end)) {
6509
6656
  lex_mode_push_string(parser, true, false, lex_mode_incrementor(*parser->current.end), lex_mode_terminator(*parser->current.end));
6510
6657
 
6511
- if (*parser->current.end == '\r') {
6658
+ size_t eol_length = match_eol(parser);
6659
+ if (eol_length) {
6660
+ parser->current.end += eol_length;
6661
+ yp_newline_list_append(&parser->newline_list, parser->current.end - 1);
6662
+ } else {
6512
6663
  parser->current.end++;
6513
6664
  }
6514
6665
 
6515
- if (*parser->current.end == '\n') {
6516
- yp_newline_list_append(&parser->newline_list, parser->current.end);
6666
+ if (parser->current.end < parser->end) {
6667
+ LEX(YP_TOKEN_STRING_BEGIN);
6517
6668
  }
6518
-
6519
- parser->current.end++;
6520
- LEX(YP_TOKEN_STRING_BEGIN);
6521
6669
  }
6522
6670
 
6523
- switch (*parser->current.end) {
6671
+ switch (peek(parser)) {
6524
6672
  case 'i': {
6525
6673
  parser->current.end++;
6526
6674
 
@@ -6544,6 +6692,7 @@ parser_lex(yp_parser_t *parser) {
6544
6692
 
6545
6693
  if (parser->current.end < parser->end) {
6546
6694
  lex_mode_push_regexp(parser, lex_mode_incrementor(*parser->current.end), lex_mode_terminator(*parser->current.end));
6695
+ yp_newline_list_check_append(&parser->newline_list, parser->current.end);
6547
6696
  parser->current.end++;
6548
6697
  }
6549
6698
 
@@ -6554,6 +6703,7 @@ parser_lex(yp_parser_t *parser) {
6554
6703
 
6555
6704
  if (parser->current.end < parser->end) {
6556
6705
  lex_mode_push_string(parser, false, false, lex_mode_incrementor(*parser->current.end), lex_mode_terminator(*parser->current.end));
6706
+ yp_newline_list_check_append(&parser->newline_list, parser->current.end);
6557
6707
  parser->current.end++;
6558
6708
  }
6559
6709
 
@@ -6564,6 +6714,7 @@ parser_lex(yp_parser_t *parser) {
6564
6714
 
6565
6715
  if (parser->current.end < parser->end) {
6566
6716
  lex_mode_push_string(parser, true, false, lex_mode_incrementor(*parser->current.end), lex_mode_terminator(*parser->current.end));
6717
+ yp_newline_list_check_append(&parser->newline_list, parser->current.end);
6567
6718
  parser->current.end++;
6568
6719
  }
6569
6720
 
@@ -6613,7 +6764,7 @@ parser_lex(yp_parser_t *parser) {
6613
6764
  // unparseable. In this case we'll just drop it from the parser
6614
6765
  // and skip past it and hope that the next token is something
6615
6766
  // that we can parse.
6616
- yp_diagnostic_list_append(&parser->error_list, parser->current.start, parser->current.end, "invalid %% token");
6767
+ yp_diagnostic_list_append(&parser->error_list, parser->current.start, parser->current.end, "Invalid %% token");
6617
6768
  goto lex_next_token;
6618
6769
  }
6619
6770
  }
@@ -6665,8 +6816,9 @@ parser_lex(yp_parser_t *parser) {
6665
6816
  ((parser->current.end - parser->current.start) == 7) &&
6666
6817
  current_token_starts_line(parser) &&
6667
6818
  (strncmp(parser->current.start, "__END__", 7) == 0) &&
6668
- (*parser->current.end == '\n' || (*parser->current.end == '\r' && parser->current.end[1] == '\n'))
6669
- ) {
6819
+ (parser->current.end == parser->end || match_eol(parser))
6820
+ )
6821
+ {
6670
6822
  parser->current.end = parser->end;
6671
6823
  parser->current.type = YP_TOKEN___END__;
6672
6824
  parser_lex_callback(parser);
@@ -6723,7 +6875,7 @@ parser_lex(yp_parser_t *parser) {
6723
6875
 
6724
6876
  if ((whitespace = yp_strspn_whitespace_newlines(parser->current.end, parser->end - parser->current.end, &parser->newline_list, should_stop)) > 0) {
6725
6877
  parser->current.end += whitespace;
6726
- if (parser->current.end[-1] == '\n') {
6878
+ if (peek_offset(parser, -1) == '\n') {
6727
6879
  // mutates next_start
6728
6880
  parser_flush_heredoc_end(parser);
6729
6881
  }
@@ -6787,13 +6939,11 @@ parser_lex(yp_parser_t *parser) {
6787
6939
  // and find the next breakpoint.
6788
6940
  if (*breakpoint == '\\') {
6789
6941
  yp_unescape_type_t unescape_type = lex_mode->as.list.interpolation ? YP_UNESCAPE_ALL : YP_UNESCAPE_MINIMAL;
6790
- size_t difference = yp_unescape_calculate_difference(breakpoint, parser->end, unescape_type, false, &parser->error_list);
6942
+ size_t difference = yp_unescape_calculate_difference(parser, breakpoint, unescape_type, false);
6791
6943
 
6792
6944
  // If the result is an escaped newline, then we need to
6793
6945
  // track that newline.
6794
- if (breakpoint[difference - 1] == '\n') {
6795
- yp_newline_list_append(&parser->newline_list, breakpoint + difference - 1);
6796
- }
6946
+ yp_newline_list_check_append(&parser->newline_list, breakpoint + difference - 1);
6797
6947
 
6798
6948
  breakpoint = yp_strpbrk(parser, breakpoint + difference, breakpoints, parser->end - (breakpoint + difference));
6799
6949
  continue;
@@ -6828,7 +6978,13 @@ parser_lex(yp_parser_t *parser) {
6828
6978
 
6829
6979
  case YP_LEX_REGEXP: {
6830
6980
  // First, we'll set to start of this token to be the current end.
6831
- parser->current.start = parser->current.end;
6981
+ if (parser->next_start == NULL) {
6982
+ parser->current.start = parser->current.end;
6983
+ } else {
6984
+ parser->current.start = parser->next_start;
6985
+ parser->current.end = parser->next_start;
6986
+ parser->next_start = NULL;
6987
+ }
6832
6988
 
6833
6989
  // We'll check if we're at the end of the file. If we are, then we need to
6834
6990
  // return the EOF token.
@@ -6855,7 +7011,16 @@ parser_lex(yp_parser_t *parser) {
6855
7011
  // If we've hit a newline, then we need to track that in the
6856
7012
  // list of newlines.
6857
7013
  if (*breakpoint == '\n') {
6858
- yp_newline_list_append(&parser->newline_list, breakpoint);
7014
+ // For the special case of a newline-terminated regular expression, we will pass
7015
+ // through this branch twice -- once with YP_TOKEN_REGEXP_BEGIN and then again
7016
+ // with YP_TOKEN_STRING_CONTENT. Let's avoid tracking the newline twice, by
7017
+ // tracking it only in the REGEXP_BEGIN case.
7018
+ if (
7019
+ !(lex_mode->as.regexp.terminator == '\n' && parser->current.type != YP_TOKEN_REGEXP_BEGIN)
7020
+ && parser->heredoc_end == NULL
7021
+ ) {
7022
+ yp_newline_list_append(&parser->newline_list, breakpoint);
7023
+ }
6859
7024
 
6860
7025
  if (lex_mode->as.regexp.terminator != '\n') {
6861
7026
  // If the terminator is not a newline, then we can set
@@ -6896,12 +7061,20 @@ parser_lex(yp_parser_t *parser) {
6896
7061
  // literally. In this case we'll skip past the next character
6897
7062
  // and find the next breakpoint.
6898
7063
  if (*breakpoint == '\\') {
6899
- size_t difference = yp_unescape_calculate_difference(breakpoint, parser->end, YP_UNESCAPE_ALL, false, &parser->error_list);
6900
-
6901
- // If the result is an escaped newline, then we need to
6902
- // track that newline.
6903
- if (breakpoint[difference - 1] == '\n') {
6904
- yp_newline_list_append(&parser->newline_list, breakpoint + difference - 1);
7064
+ size_t difference = yp_unescape_calculate_difference(parser, breakpoint, YP_UNESCAPE_ALL, false);
7065
+
7066
+ // If the result is an escaped newline ...
7067
+ if (*(breakpoint + difference - 1) == '\n') {
7068
+ if (parser->heredoc_end) {
7069
+ // ... if we are on the same line as a heredoc, flush the heredoc and
7070
+ // continue parsing after heredoc_end.
7071
+ parser->current.end = breakpoint + difference;
7072
+ parser_flush_heredoc_end(parser);
7073
+ LEX(YP_TOKEN_STRING_CONTENT);
7074
+ } else {
7075
+ // ... else track the newline.
7076
+ yp_newline_list_append(&parser->newline_list, breakpoint + difference - 1);
7077
+ }
6905
7078
  }
6906
7079
 
6907
7080
  breakpoint = yp_strpbrk(parser, breakpoint + difference, breakpoints, parser->end - (breakpoint + difference));
@@ -6989,21 +7162,18 @@ parser_lex(yp_parser_t *parser) {
6989
7162
 
6990
7163
  // Otherwise we need to switch back to the parent lex mode and
6991
7164
  // return the end of the string.
6992
- if (*parser->current.end == '\r' && parser->current.end + 1 < parser->end && parser->current.end[1] == '\n') {
6993
- parser->current.end = breakpoint + 2;
6994
- yp_newline_list_append(&parser->newline_list, breakpoint + 1);
7165
+ size_t eol_length = match_eol_at(parser, breakpoint);
7166
+ if (eol_length) {
7167
+ parser->current.end = breakpoint + eol_length;
7168
+ yp_newline_list_append(&parser->newline_list, parser->current.end - 1);
6995
7169
  } else {
6996
- if (*parser->current.end == '\n') {
6997
- yp_newline_list_append(&parser->newline_list, parser->current.end);
6998
- }
6999
-
7000
7170
  parser->current.end = breakpoint + 1;
7001
7171
  }
7002
7172
 
7003
7173
  if (
7004
7174
  parser->lex_modes.current->as.string.label_allowed &&
7005
7175
  (peek(parser) == ':') &&
7006
- (peek_at(parser, 1) != ':')
7176
+ (peek_offset(parser, 1) != ':')
7007
7177
  ) {
7008
7178
  parser->current.end++;
7009
7179
  lex_state_set(parser, YP_LEX_STATE_ARG | YP_LEX_STATE_LABELED);
@@ -7041,12 +7211,20 @@ parser_lex(yp_parser_t *parser) {
7041
7211
  // literally. In this case we'll skip past the next character and
7042
7212
  // find the next breakpoint.
7043
7213
  yp_unescape_type_t unescape_type = parser->lex_modes.current->as.string.interpolation ? YP_UNESCAPE_ALL : YP_UNESCAPE_MINIMAL;
7044
- size_t difference = yp_unescape_calculate_difference(breakpoint, parser->end, unescape_type, false, &parser->error_list);
7045
-
7046
- // If the result is an escaped newline, then we need to
7047
- // track that newline.
7048
- if (breakpoint[difference - 1] == '\n') {
7049
- yp_newline_list_append(&parser->newline_list, breakpoint + difference - 1);
7214
+ size_t difference = yp_unescape_calculate_difference(parser, breakpoint, unescape_type, false);
7215
+
7216
+ // If the result is an escaped newline ...
7217
+ if (*(breakpoint + difference - 1) == '\n') {
7218
+ if (parser->heredoc_end) {
7219
+ // ... if we are on the same line as a heredoc, flush the heredoc and
7220
+ // continue parsing after heredoc_end.
7221
+ parser->current.end = breakpoint + difference;
7222
+ parser_flush_heredoc_end(parser);
7223
+ LEX(YP_TOKEN_STRING_CONTENT);
7224
+ } else {
7225
+ // ... else track the newline.
7226
+ yp_newline_list_append(&parser->newline_list, breakpoint + difference - 1);
7227
+ }
7050
7228
  }
7051
7229
 
7052
7230
  breakpoint = yp_strpbrk(parser, breakpoint + difference, breakpoints, parser->end - (breakpoint + difference));
@@ -7082,6 +7260,7 @@ parser_lex(yp_parser_t *parser) {
7082
7260
  } else {
7083
7261
  parser->current.start = parser->next_start;
7084
7262
  parser->current.end = parser->next_start;
7263
+ parser->heredoc_end = NULL;
7085
7264
  parser->next_start = NULL;
7086
7265
  }
7087
7266
 
@@ -7098,7 +7277,7 @@ parser_lex(yp_parser_t *parser) {
7098
7277
 
7099
7278
  // If we are immediately following a newline and we have hit the
7100
7279
  // terminator, then we need to return the ending of the heredoc.
7101
- if (parser->current.start[-1] == '\n') {
7280
+ if (current_token_starts_line(parser)) {
7102
7281
  const char *start = parser->current.start;
7103
7282
  if (parser->lex_modes.current->as.heredoc.indent != YP_HEREDOC_INDENT_NONE) {
7104
7283
  start += yp_strspn_inline_whitespace(start, parser->end - start);
@@ -7108,12 +7287,10 @@ parser_lex(yp_parser_t *parser) {
7108
7287
  bool matched = true;
7109
7288
  bool at_end = false;
7110
7289
 
7111
- if ((start + ident_length < parser->end) && (start[ident_length] == '\n')) {
7112
- parser->current.end = start + ident_length + 1;
7113
- yp_newline_list_append(&parser->newline_list, start + ident_length);
7114
- } else if ((start + ident_length + 1 < parser->end) && (start[ident_length] == '\r') && (start[ident_length + 1] == '\n')) {
7115
- parser->current.end = start + ident_length + 2;
7116
- yp_newline_list_append(&parser->newline_list, start + ident_length + 1);
7290
+ size_t eol_length = match_eol_at(parser, start + ident_length);
7291
+ if (eol_length) {
7292
+ parser->current.end = start + ident_length + eol_length;
7293
+ yp_newline_list_append(&parser->newline_list, parser->current.end - 1);
7117
7294
  } else if (parser->end == (start + ident_length)) {
7118
7295
  parser->current.end = start + ident_length;
7119
7296
  at_end = true;
@@ -7178,19 +7355,10 @@ parser_lex(yp_parser_t *parser) {
7178
7355
  (start + ident_length <= parser->end) &&
7179
7356
  (strncmp(start, ident_start, ident_length) == 0)
7180
7357
  ) {
7181
- // Heredoc terminators must be followed by a newline or EOF to be valid.
7182
- if (start + ident_length == parser->end || start[ident_length] == '\n') {
7183
- parser->current.end = breakpoint + 1;
7184
- LEX(YP_TOKEN_STRING_CONTENT);
7185
- }
7186
-
7187
- // They can also be followed by a carriage return and then a
7188
- // newline. Be sure here that we don't accidentally read off the
7189
- // end.
7358
+ // Heredoc terminators must be followed by a newline, CRLF, or EOF to be valid.
7190
7359
  if (
7191
- (start + ident_length + 1 < parser->end) &&
7192
- (start[ident_length] == '\r') &&
7193
- (start[ident_length + 1] == '\n')
7360
+ start + ident_length == parser->end ||
7361
+ match_eol_at(parser, start + ident_length)
7194
7362
  ) {
7195
7363
  parser->current.end = breakpoint + 1;
7196
7364
  LEX(YP_TOKEN_STRING_CONTENT);
@@ -7203,21 +7371,24 @@ parser_lex(yp_parser_t *parser) {
7203
7371
  break;
7204
7372
  }
7205
7373
  case '\\': {
7206
- // If we hit escapes, then we need to treat the next token
7207
- // literally. In this case we'll skip past the next character and
7208
- // find the next breakpoint.
7209
- if (breakpoint[1] == '\n') {
7210
- breakpoint++;
7374
+ // If we hit an escape, then we need to skip past
7375
+ // however many characters the escape takes up. However
7376
+ // it's important that if \n or \r\n are escaped that we
7377
+ // stop looping before the newline and not after the
7378
+ // newline so that we can still potentially find the
7379
+ // terminator of the heredoc.
7380
+ size_t eol_length = match_eol_at(parser, breakpoint + 1);
7381
+ if (eol_length) {
7382
+ breakpoint += eol_length;
7211
7383
  } else {
7212
7384
  yp_unescape_type_t unescape_type = (quote == YP_HEREDOC_QUOTE_SINGLE) ? YP_UNESCAPE_MINIMAL : YP_UNESCAPE_ALL;
7213
- size_t difference = yp_unescape_calculate_difference(breakpoint, parser->end, unescape_type, false, &parser->error_list);
7385
+ size_t difference = yp_unescape_calculate_difference(parser, breakpoint, unescape_type, false);
7214
7386
 
7215
- if (breakpoint[difference - 1] == '\n') {
7216
- yp_newline_list_append(&parser->newline_list, breakpoint + difference - 1);
7217
- }
7387
+ yp_newline_list_check_append(&parser->newline_list, breakpoint + difference - 1);
7218
7388
 
7219
7389
  breakpoint = yp_strpbrk(parser, breakpoint + difference, breakpoints, parser->end - (breakpoint + difference));
7220
7390
  }
7391
+
7221
7392
  break;
7222
7393
  }
7223
7394
  case '#': {
@@ -7264,10 +7435,10 @@ static yp_regular_expression_node_t *
7264
7435
  yp_regular_expression_node_create_and_unescape(yp_parser_t *parser, const yp_token_t *opening, const yp_token_t *content, const yp_token_t *closing, yp_unescape_type_t unescape_type) {
7265
7436
  yp_regular_expression_node_t *node = yp_regular_expression_node_create(parser, opening, content, closing);
7266
7437
 
7267
- ptrdiff_t length = content->end - content->start;
7268
- assert(length >= 0);
7438
+ assert((content->end - content->start) >= 0);
7439
+ yp_string_shared_init(&node->unescaped, content->start, content->end);
7269
7440
 
7270
- yp_unescape_manipulate_string(parser, content->start, (size_t) length, &node->unescaped, unescape_type, &parser->error_list);
7441
+ yp_unescape_manipulate_string(parser, &node->unescaped, unescape_type);
7271
7442
  return node;
7272
7443
  }
7273
7444
 
@@ -7275,10 +7446,10 @@ static yp_symbol_node_t *
7275
7446
  yp_symbol_node_create_and_unescape(yp_parser_t *parser, const yp_token_t *opening, const yp_token_t *content, const yp_token_t *closing, yp_unescape_type_t unescape_type) {
7276
7447
  yp_symbol_node_t *node = yp_symbol_node_create(parser, opening, content, closing);
7277
7448
 
7278
- ptrdiff_t length = content->end - content->start;
7279
- assert(length >= 0);
7449
+ assert((content->end - content->start) >= 0);
7450
+ yp_string_shared_init(&node->unescaped, content->start, content->end);
7280
7451
 
7281
- yp_unescape_manipulate_string(parser, content->start, (size_t) length, &node->unescaped, unescape_type, &parser->error_list);
7452
+ yp_unescape_manipulate_string(parser, &node->unescaped, unescape_type);
7282
7453
  return node;
7283
7454
  }
7284
7455
 
@@ -7286,10 +7457,10 @@ static yp_string_node_t *
7286
7457
  yp_string_node_create_and_unescape(yp_parser_t *parser, const yp_token_t *opening, const yp_token_t *content, const yp_token_t *closing, yp_unescape_type_t unescape_type) {
7287
7458
  yp_string_node_t *node = yp_string_node_create(parser, opening, content, closing);
7288
7459
 
7289
- ptrdiff_t length = content->end - content->start;
7290
- assert(length >= 0);
7460
+ assert((content->end - content->start) >= 0);
7461
+ yp_string_shared_init(&node->unescaped, content->start, content->end);
7291
7462
 
7292
- yp_unescape_manipulate_string(parser, content->start, (size_t) length, &node->unescaped, unescape_type, &parser->error_list);
7463
+ yp_unescape_manipulate_string(parser, &node->unescaped, unescape_type);
7293
7464
  return node;
7294
7465
  }
7295
7466
 
@@ -7297,10 +7468,10 @@ static yp_x_string_node_t *
7297
7468
  yp_xstring_node_create_and_unescape(yp_parser_t *parser, const yp_token_t *opening, const yp_token_t *content, const yp_token_t *closing) {
7298
7469
  yp_x_string_node_t *node = yp_xstring_node_create(parser, opening, content, closing);
7299
7470
 
7300
- ptrdiff_t length = content->end - content->start;
7301
- assert(length >= 0);
7471
+ assert((content->end - content->start) >= 0);
7472
+ yp_string_shared_init(&node->unescaped, content->start, content->end);
7302
7473
 
7303
- yp_unescape_manipulate_string(parser, content->start, (size_t) length, &node->unescaped, YP_UNESCAPE_ALL, &parser->error_list);
7474
+ yp_unescape_manipulate_string(parser, &node->unescaped, YP_UNESCAPE_ALL);
7304
7475
  return node;
7305
7476
  }
7306
7477
 
@@ -7652,19 +7823,153 @@ parse_starred_expression(yp_parser_t *parser, yp_binding_power_t binding_power,
7652
7823
 
7653
7824
  // Convert the given node into a valid target node.
7654
7825
  static yp_node_t *
7655
- parse_target(yp_parser_t *parser, yp_node_t *target, yp_token_t *operator, yp_node_t *value) {
7826
+ parse_target(yp_parser_t *parser, yp_node_t *target) {
7656
7827
  switch (YP_NODE_TYPE(target)) {
7657
7828
  case YP_NODE_MISSING_NODE:
7658
7829
  return target;
7659
- case YP_NODE_CLASS_VARIABLE_READ_NODE: {
7660
- yp_class_variable_write_node_t *write_node = yp_class_variable_read_node_to_class_variable_write_node(parser, (yp_class_variable_read_node_t *) target, operator, value);
7661
- yp_node_destroy(parser, target);
7662
- return (yp_node_t *) write_node;
7663
- }
7830
+ case YP_NODE_CLASS_VARIABLE_READ_NODE:
7831
+ assert(sizeof(yp_class_variable_target_node_t) == sizeof(yp_class_variable_read_node_t));
7832
+ target->type = YP_NODE_CLASS_VARIABLE_TARGET_NODE;
7833
+ return target;
7664
7834
  case YP_NODE_CONSTANT_PATH_NODE:
7665
- return (yp_node_t *) yp_constant_path_write_node_create(parser, (yp_constant_path_node_t *) target, operator, value);
7666
- case YP_NODE_CONSTANT_READ_NODE: {
7667
- yp_constant_write_node_t *node = yp_constant_write_node_create(parser, &target->location, operator, value);
7835
+ assert(sizeof(yp_constant_path_target_node_t) == sizeof(yp_constant_path_node_t));
7836
+ target->type = YP_NODE_CONSTANT_PATH_TARGET_NODE;
7837
+ return target;
7838
+ case YP_NODE_CONSTANT_READ_NODE:
7839
+ assert(sizeof(yp_constant_target_node_t) == sizeof(yp_constant_read_node_t));
7840
+ target->type = YP_NODE_CONSTANT_TARGET_NODE;
7841
+ return target;
7842
+ case YP_NODE_BACK_REFERENCE_READ_NODE:
7843
+ assert(sizeof(yp_global_variable_target_node_t) == sizeof(yp_back_reference_read_node_t));
7844
+ /* fallthrough */
7845
+ case YP_NODE_NUMBERED_REFERENCE_READ_NODE:
7846
+ assert(sizeof(yp_global_variable_target_node_t) == sizeof(yp_numbered_reference_read_node_t));
7847
+ yp_diagnostic_list_append(&parser->error_list, target->location.start, target->location.end, "Can't set variable");
7848
+ /* fallthrough */
7849
+ case YP_NODE_GLOBAL_VARIABLE_READ_NODE:
7850
+ assert(sizeof(yp_global_variable_target_node_t) == sizeof(yp_global_variable_read_node_t));
7851
+ target->type = YP_NODE_GLOBAL_VARIABLE_TARGET_NODE;
7852
+ return target;
7853
+ case YP_NODE_LOCAL_VARIABLE_READ_NODE:
7854
+ assert(sizeof(yp_local_variable_target_node_t) == sizeof(yp_local_variable_read_node_t));
7855
+ target->type = YP_NODE_LOCAL_VARIABLE_TARGET_NODE;
7856
+ return target;
7857
+ case YP_NODE_INSTANCE_VARIABLE_READ_NODE:
7858
+ assert(sizeof(yp_instance_variable_target_node_t) == sizeof(yp_instance_variable_read_node_t));
7859
+ target->type = YP_NODE_INSTANCE_VARIABLE_TARGET_NODE;
7860
+ return target;
7861
+ case YP_NODE_MULTI_WRITE_NODE:
7862
+ return target;
7863
+ case YP_NODE_SPLAT_NODE: {
7864
+ yp_splat_node_t *splat = (yp_splat_node_t *) target;
7865
+
7866
+ if (splat->expression != NULL) {
7867
+ splat->expression = parse_target(parser, splat->expression);
7868
+ }
7869
+
7870
+ yp_token_t operator = not_provided(parser);
7871
+ yp_location_t location = { .start = NULL, .end = NULL };
7872
+
7873
+ yp_multi_write_node_t *multi_write = yp_multi_write_node_create(parser, &operator, NULL, &location, &location);
7874
+ yp_multi_write_node_targets_append(multi_write, (yp_node_t *) splat);
7875
+
7876
+ return (yp_node_t *) multi_write;
7877
+ }
7878
+ case YP_NODE_CALL_NODE: {
7879
+ yp_call_node_t *call = (yp_call_node_t *) target;
7880
+
7881
+ // If we have no arguments to the call node and we need this to be a
7882
+ // target then this is either a method call or a local variable write.
7883
+ if (
7884
+ (call->opening_loc.start == NULL) &&
7885
+ (call->arguments == NULL) &&
7886
+ (call->block == NULL)
7887
+ ) {
7888
+ if (call->receiver == NULL) {
7889
+ // When we get here, we have a local variable write, because it
7890
+ // was previously marked as a method call but now we have an =.
7891
+ // This looks like:
7892
+ //
7893
+ // foo = 1
7894
+ //
7895
+ // When it was parsed in the prefix position, foo was seen as a
7896
+ // method call with no receiver and no arguments. Now we have an
7897
+ // =, so we know it's a local variable write.
7898
+ const yp_location_t message = call->message_loc;
7899
+
7900
+ yp_parser_local_add_location(parser, message.start, message.end);
7901
+ yp_node_destroy(parser, target);
7902
+
7903
+ const yp_token_t name = { .type = YP_TOKEN_IDENTIFIER, .start = message.start, .end = message.end };
7904
+ target = (yp_node_t *) yp_local_variable_read_node_create(parser, &name, 0);
7905
+
7906
+ assert(sizeof(yp_local_variable_target_node_t) == sizeof(yp_local_variable_read_node_t));
7907
+ target->type = YP_NODE_LOCAL_VARIABLE_TARGET_NODE;
7908
+
7909
+ if (token_is_numbered_parameter(message.start, message.end)) {
7910
+ yp_diagnostic_list_append(&parser->error_list, message.start, message.end, "reserved for numbered parameter");
7911
+ }
7912
+
7913
+ return target;
7914
+ }
7915
+
7916
+ // The method name needs to change. If we previously had foo, we now
7917
+ // need foo=. In this case we'll allocate a new owned string, copy
7918
+ // the previous method name in, and append an =.
7919
+ size_t length = yp_string_length(&call->name);
7920
+
7921
+ char *name = calloc(length + 2, sizeof(char));
7922
+ if (name == NULL) return NULL;
7923
+
7924
+ snprintf(name, length + 2, "%.*s=", (int) length, yp_string_source(&call->name));
7925
+
7926
+ // Now switch the name to the new string.
7927
+ yp_string_free(&call->name);
7928
+ yp_string_owned_init(&call->name, name, length + 1);
7929
+
7930
+ return target;
7931
+ }
7932
+
7933
+ // If there is no call operator and the message is "[]" then this is
7934
+ // an aref expression, and we can transform it into an aset
7935
+ // expression.
7936
+ if (
7937
+ (call->operator_loc.start == NULL) &&
7938
+ (call->message_loc.start[0] == '[') &&
7939
+ (call->message_loc.end[-1] == ']') &&
7940
+ (call->block == NULL)
7941
+ ) {
7942
+ // Free the previous name and replace it with "[]=".
7943
+ yp_string_free(&call->name);
7944
+ yp_string_constant_init(&call->name, "[]=", 3);
7945
+ return target;
7946
+ }
7947
+ }
7948
+ /* fallthrough */
7949
+ default:
7950
+ // In this case we have a node that we don't know how to convert
7951
+ // into a target. We need to treat it as an error. For now, we'll
7952
+ // mark it as an error and just skip right past it.
7953
+ yp_diagnostic_list_append(&parser->error_list, target->location.start, target->location.end, "Unexpected write target.");
7954
+ return target;
7955
+ }
7956
+ }
7957
+
7958
+ // Convert the given node into a valid write node.
7959
+ static yp_node_t *
7960
+ parse_write(yp_parser_t *parser, yp_node_t *target, yp_token_t *operator, yp_node_t *value) {
7961
+ switch (YP_NODE_TYPE(target)) {
7962
+ case YP_NODE_MISSING_NODE:
7963
+ return target;
7964
+ case YP_NODE_CLASS_VARIABLE_READ_NODE: {
7965
+ yp_class_variable_write_node_t *write_node = yp_class_variable_read_node_to_class_variable_write_node(parser, (yp_class_variable_read_node_t *) target, operator, value);
7966
+ yp_node_destroy(parser, target);
7967
+ return (yp_node_t *) write_node;
7968
+ }
7969
+ case YP_NODE_CONSTANT_PATH_NODE:
7970
+ return (yp_node_t *) yp_constant_path_write_node_create(parser, (yp_constant_path_node_t *) target, operator, value);
7971
+ case YP_NODE_CONSTANT_READ_NODE: {
7972
+ yp_constant_write_node_t *node = yp_constant_write_node_create(parser, &target->location, operator, value);
7668
7973
  yp_node_destroy(parser, target);
7669
7974
 
7670
7975
  return (yp_node_t *) node;
@@ -7699,18 +8004,15 @@ parse_target(yp_parser_t *parser, yp_node_t *target, yp_token_t *operator, yp_no
7699
8004
  yp_multi_write_node_t *multi_write = (yp_multi_write_node_t *) target;
7700
8005
  yp_multi_write_node_operator_loc_set(multi_write, operator);
7701
8006
 
7702
- if (value != NULL) {
7703
- multi_write->value = value;
7704
- multi_write->base.location.end = value->location.end;
7705
- }
7706
-
8007
+ multi_write->value = value;
8008
+ multi_write->base.location.end = value->location.end;
7707
8009
  return (yp_node_t *) multi_write;
7708
8010
  }
7709
8011
  case YP_NODE_SPLAT_NODE: {
7710
8012
  yp_splat_node_t *splat = (yp_splat_node_t *) target;
7711
8013
 
7712
8014
  if (splat->expression != NULL) {
7713
- splat->expression = parse_target(parser, splat->expression, operator, value);
8015
+ splat->expression = parse_write(parser, splat->expression, operator, value);
7714
8016
  }
7715
8017
 
7716
8018
  yp_location_t location = { .start = NULL, .end = NULL };
@@ -7763,12 +8065,10 @@ parse_target(yp_parser_t *parser, yp_node_t *target, yp_token_t *operator, yp_no
7763
8065
  // method call with no arguments. Now we have an =, so we know it's
7764
8066
  // a method call with an argument. In this case we will create the
7765
8067
  // arguments node, parse the argument, and add it to the list.
7766
- if (value) {
7767
- yp_arguments_node_t *arguments = yp_arguments_node_create(parser);
7768
- call->arguments = arguments;
7769
- yp_arguments_node_arguments_append(arguments, value);
7770
- target->location.end = arguments->base.location.end;
7771
- }
8068
+ yp_arguments_node_t *arguments = yp_arguments_node_create(parser);
8069
+ call->arguments = arguments;
8070
+ yp_arguments_node_arguments_append(arguments, value);
8071
+ target->location.end = arguments->base.location.end;
7772
8072
 
7773
8073
  // The method name needs to change. If we previously had foo, we now
7774
8074
  // need foo=. In this case we'll allocate a new owned string, copy
@@ -7796,15 +8096,13 @@ parse_target(yp_parser_t *parser, yp_node_t *target, yp_token_t *operator, yp_no
7796
8096
  (call->message_loc.end[-1] == ']') &&
7797
8097
  (call->block == NULL)
7798
8098
  ) {
7799
- if (value != NULL) {
7800
- if (call->arguments == NULL) {
7801
- call->arguments = yp_arguments_node_create(parser);
7802
- }
7803
-
7804
- yp_arguments_node_arguments_append(call->arguments, value);
7805
- target->location.end = value->location.end;
8099
+ if (call->arguments == NULL) {
8100
+ call->arguments = yp_arguments_node_create(parser);
7806
8101
  }
7807
8102
 
8103
+ yp_arguments_node_arguments_append(call->arguments, value);
8104
+ target->location.end = value->location.end;
8105
+
7808
8106
  // Free the previous name and replace it with "[]=".
7809
8107
  yp_string_free(&call->name);
7810
8108
  yp_string_constant_init(&call->name, "[]=", 3);
@@ -7816,9 +8114,7 @@ parse_target(yp_parser_t *parser, yp_node_t *target, yp_token_t *operator, yp_no
7816
8114
  // syntax error. In this case we'll fall through to our default
7817
8115
  // handling. We need to free the value that we parsed because there
7818
8116
  // is no way for us to attach it to the tree at this point.
7819
- if (value != NULL) {
7820
- yp_node_destroy(parser, value);
7821
- }
8117
+ yp_node_destroy(parser, value);
7822
8118
  }
7823
8119
  /* fallthrough */
7824
8120
  default:
@@ -7846,7 +8142,7 @@ parse_targets(yp_parser_t *parser, yp_node_t *first_target, yp_binding_power_t b
7846
8142
  // location that we know requires a multi write, as in the case of a for loop.
7847
8143
  // In this case we will set up the parsing loop slightly differently.
7848
8144
  if (first_target != NULL) {
7849
- first_target = parse_target(parser, first_target, &operator, NULL);
8145
+ first_target = parse_target(parser, first_target);
7850
8146
 
7851
8147
  if (!match_type_p(parser, YP_TOKEN_COMMA)) {
7852
8148
  return first_target;
@@ -7877,9 +8173,8 @@ parse_targets(yp_parser_t *parser, yp_node_t *first_target, yp_binding_power_t b
7877
8173
  yp_node_t *name = NULL;
7878
8174
 
7879
8175
  if (token_begins_expression_p(parser->current.type)) {
7880
- yp_token_t operator = not_provided(parser);
7881
8176
  name = parse_expression(parser, binding_power, "Expected an expression after '*'.");
7882
- name = parse_target(parser, name, &operator, NULL);
8177
+ name = parse_target(parser, name);
7883
8178
  }
7884
8179
 
7885
8180
  yp_node_t *splat = (yp_node_t *) yp_splat_node_create(parser, &star_operator, name);
@@ -7909,6 +8204,8 @@ parse_targets(yp_parser_t *parser, yp_node_t *first_target, yp_binding_power_t b
7909
8204
 
7910
8205
  if (YP_NODE_TYPE_P(child_target, YP_NODE_MULTI_WRITE_NODE)) {
7911
8206
  target = (yp_multi_write_node_t *) child_target;
8207
+ target->base.location.start = lparen.start;
8208
+ target->base.location.end = rparen.end;
7912
8209
  target->lparen_loc = (yp_location_t) { .start = lparen.start, .end = lparen.end };
7913
8210
  target->rparen_loc = (yp_location_t) { .start = rparen.start, .end = rparen.end };
7914
8211
  } else {
@@ -7925,6 +8222,7 @@ parse_targets(yp_parser_t *parser, yp_node_t *first_target, yp_binding_power_t b
7925
8222
  yp_multi_write_node_targets_append(target, child_target);
7926
8223
  }
7927
8224
 
8225
+ target->base.location.start = lparen.start;
7928
8226
  target->base.location.end = rparen.end;
7929
8227
  yp_multi_write_node_targets_append(result, (yp_node_t *) target);
7930
8228
  }
@@ -7947,7 +8245,7 @@ parse_targets(yp_parser_t *parser, yp_node_t *first_target, yp_binding_power_t b
7947
8245
  }
7948
8246
 
7949
8247
  yp_node_t *target = parse_expression(parser, binding_power, "Expected another expression after ','.");
7950
- target = parse_target(parser, target, &operator, NULL);
8248
+ target = parse_target(parser, target);
7951
8249
 
7952
8250
  yp_multi_write_node_targets_append(result, target);
7953
8251
  }
@@ -8407,7 +8705,6 @@ parse_parameters(
8407
8705
  bool looping = true;
8408
8706
 
8409
8707
  yp_do_loop_stack_push(parser, false);
8410
-
8411
8708
  yp_parameters_order_t order = YP_PARAMETERS_ORDER_NONE;
8412
8709
 
8413
8710
  do {
@@ -8699,8 +8996,7 @@ parse_rescues(yp_parser_t *parser, yp_begin_node_t *parent_node) {
8699
8996
  yp_rescue_node_operator_set(rescue, &parser->previous);
8700
8997
 
8701
8998
  yp_node_t *reference = parse_expression(parser, YP_BINDING_POWER_INDEX, "Expected an exception variable after `=>` in rescue statement.");
8702
- yp_token_t operator = not_provided(parser);
8703
- reference = parse_target(parser, reference, &operator, NULL);
8999
+ reference = parse_target(parser, reference);
8704
9000
 
8705
9001
  yp_rescue_node_reference_set(rescue, reference);
8706
9002
  break;
@@ -8730,8 +9026,7 @@ parse_rescues(yp_parser_t *parser, yp_begin_node_t *parent_node) {
8730
9026
  yp_rescue_node_operator_set(rescue, &parser->previous);
8731
9027
 
8732
9028
  yp_node_t *reference = parse_expression(parser, YP_BINDING_POWER_INDEX, "Expected an exception variable after `=>` in rescue statement.");
8733
- yp_token_t operator = not_provided(parser);
8734
- reference = parse_target(parser, reference, &operator, NULL);
9029
+ reference = parse_target(parser, reference);
8735
9030
 
8736
9031
  yp_rescue_node_reference_set(rescue, reference);
8737
9032
  break;
@@ -8999,7 +9294,7 @@ parse_conditional(yp_parser_t *parser, yp_context_t context) {
8999
9294
  }
9000
9295
 
9001
9296
  yp_token_t end_keyword = not_provided(parser);
9002
- yp_node_t *parent;
9297
+ yp_node_t *parent = NULL;
9003
9298
 
9004
9299
  switch (context) {
9005
9300
  case YP_CONTEXT_IF:
@@ -9009,7 +9304,6 @@ parse_conditional(yp_parser_t *parser, yp_context_t context) {
9009
9304
  parent = (yp_node_t *) yp_unless_node_create(parser, &keyword, predicate, statements);
9010
9305
  break;
9011
9306
  default:
9012
- parent = NULL;
9013
9307
  assert(false && "unreachable");
9014
9308
  break;
9015
9309
  }
@@ -9055,50 +9349,49 @@ parse_conditional(yp_parser_t *parser, yp_context_t context) {
9055
9349
  switch (context) {
9056
9350
  case YP_CONTEXT_IF:
9057
9351
  ((yp_if_node_t *) current)->consequent = (yp_node_t *) else_node;
9058
- // Recurse down if nodes setting the appropriate end location in
9059
- // all cases.
9060
- yp_node_t *recursing_node = parent;
9061
- bool recursing = true;
9062
-
9063
- while (recursing) {
9064
- switch (YP_NODE_TYPE(recursing_node)) {
9065
- case YP_NODE_IF_NODE:
9066
- yp_if_node_end_keyword_loc_set((yp_if_node_t *) recursing_node, &parser->previous);
9067
- recursing_node = ((yp_if_node_t *) recursing_node)->consequent;
9068
- break;
9069
- case YP_NODE_ELSE_NODE:
9070
- yp_else_node_end_keyword_loc_set((yp_else_node_t *) recursing_node, &parser->previous);
9071
- recursing = false;
9072
- break;
9073
- default: {
9074
- recursing = false;
9075
- break;
9076
- }
9077
- }
9078
- }
9079
9352
  break;
9080
9353
  case YP_CONTEXT_UNLESS:
9081
9354
  ((yp_unless_node_t *) parent)->consequent = else_node;
9082
- yp_unless_node_end_keyword_loc_set((yp_unless_node_t *) parent, &parser->previous);
9083
9355
  break;
9084
9356
  default:
9085
9357
  assert(false && "unreachable");
9086
9358
  break;
9087
9359
  }
9088
9360
  } else {
9089
- expect(parser, YP_TOKEN_KEYWORD_END, "Expected `end` to close `if` statement.");
9361
+ expect(parser, YP_TOKEN_KEYWORD_END, "Expected `end` to close conditional statement.");
9362
+ }
9090
9363
 
9091
- switch (context) {
9092
- case YP_CONTEXT_IF:
9093
- yp_if_node_end_keyword_loc_set((yp_if_node_t *) parent, &parser->previous);
9094
- break;
9095
- case YP_CONTEXT_UNLESS:
9096
- yp_unless_node_end_keyword_loc_set((yp_unless_node_t *) parent, &parser->previous);
9097
- break;
9098
- default:
9099
- assert(false && "unreachable");
9100
- break;
9364
+ // Set the appropriate end location for all of the nodes in the subtree.
9365
+ switch (context) {
9366
+ case YP_CONTEXT_IF: {
9367
+ yp_node_t *current = parent;
9368
+ bool recursing = true;
9369
+
9370
+ while (recursing) {
9371
+ switch (YP_NODE_TYPE(current)) {
9372
+ case YP_NODE_IF_NODE:
9373
+ yp_if_node_end_keyword_loc_set((yp_if_node_t *) current, &parser->previous);
9374
+ current = ((yp_if_node_t *) current)->consequent;
9375
+ recursing = current != NULL;
9376
+ break;
9377
+ case YP_NODE_ELSE_NODE:
9378
+ yp_else_node_end_keyword_loc_set((yp_else_node_t *) current, &parser->previous);
9379
+ recursing = false;
9380
+ break;
9381
+ default: {
9382
+ recursing = false;
9383
+ break;
9384
+ }
9385
+ }
9386
+ }
9387
+ break;
9101
9388
  }
9389
+ case YP_CONTEXT_UNLESS:
9390
+ yp_unless_node_end_keyword_loc_set((yp_unless_node_t *) parent, &parser->previous);
9391
+ break;
9392
+ default:
9393
+ assert(false && "unreachable");
9394
+ break;
9102
9395
  }
9103
9396
 
9104
9397
  return parent;
@@ -9172,7 +9465,12 @@ parse_string_part(yp_parser_t *parser) {
9172
9465
  yp_unescape_type_t unescape_type = YP_UNESCAPE_ALL;
9173
9466
 
9174
9467
  if (parser->lex_modes.current->mode == YP_LEX_HEREDOC) {
9175
- if (parser->lex_modes.current->as.heredoc.quote == YP_HEREDOC_QUOTE_SINGLE) {
9468
+ if (parser->lex_modes.current->as.heredoc.indent == YP_HEREDOC_INDENT_TILDE) {
9469
+ // If we're in a tilde heredoc, we want to unescape it later
9470
+ // because we don't want unescaped newlines to disappear
9471
+ // before we handle them in the dedent.
9472
+ unescape_type = YP_UNESCAPE_NONE;
9473
+ } else if (parser->lex_modes.current->as.heredoc.quote == YP_HEREDOC_QUOTE_SINGLE) {
9176
9474
  unescape_type = YP_UNESCAPE_MINIMAL;
9177
9475
  }
9178
9476
  }
@@ -9280,14 +9578,10 @@ parse_string_part(yp_parser_t *parser) {
9280
9578
 
9281
9579
  static yp_node_t *
9282
9580
  parse_symbol(yp_parser_t *parser, yp_lex_mode_t *lex_mode, yp_lex_state_t next_state) {
9283
- bool lex_string = lex_mode->mode == YP_LEX_STRING;
9284
- bool can_be_interpolated = lex_string && lex_mode->as.string.interpolation;
9285
9581
  yp_token_t opening = parser->previous;
9286
9582
 
9287
- if (!lex_string) {
9288
- if (next_state != YP_LEX_STATE_NONE) {
9289
- lex_state_set(parser, next_state);
9290
- }
9583
+ if (lex_mode->mode != YP_LEX_STRING) {
9584
+ if (next_state != YP_LEX_STATE_NONE) lex_state_set(parser, next_state);
9291
9585
  yp_token_t symbol;
9292
9586
 
9293
9587
  switch (parser->current.type) {
@@ -9317,37 +9611,44 @@ parse_symbol(yp_parser_t *parser, yp_lex_mode_t *lex_mode, yp_lex_state_t next_s
9317
9611
  return (yp_node_t *) yp_symbol_node_create_and_unescape(parser, &opening, &symbol, &closing, YP_UNESCAPE_ALL);
9318
9612
  }
9319
9613
 
9320
- if (can_be_interpolated) {
9321
- // Create a node_list first. We'll use this to check if it should be an InterpolatedSymbolNode
9322
- // or a SymbolNode
9614
+ if (lex_mode->as.string.interpolation) {
9615
+ // If we have the end of the symbol, then we can return an empty symbol.
9616
+ if (match_type_p(parser, YP_TOKEN_STRING_END)) {
9617
+ if (next_state != YP_LEX_STATE_NONE) lex_state_set(parser, next_state);
9618
+ parser_lex(parser);
9619
+
9620
+ yp_token_t content = not_provided(parser);
9621
+ yp_token_t closing = parser->previous;
9622
+ return (yp_node_t *) yp_symbol_node_create_and_unescape(parser, &opening, &content, &closing, YP_UNESCAPE_NONE);
9623
+ }
9624
+
9625
+ // Now we can parse the first part of the symbol.
9626
+ yp_node_t *part = parse_string_part(parser);
9627
+
9628
+ // If we got a string part, then it's possible that we could transform
9629
+ // what looks like an interpolated symbol into a regular symbol.
9630
+ if (part && YP_NODE_TYPE_P(part, YP_NODE_STRING_NODE) && match_any_type_p(parser, 2, YP_TOKEN_STRING_END, YP_TOKEN_EOF)) {
9631
+ if (next_state != YP_LEX_STATE_NONE) lex_state_set(parser, next_state);
9632
+ parser_lex(parser);
9633
+
9634
+ return (yp_node_t *) yp_string_node_to_symbol_node(parser, (yp_string_node_t *) part, &opening, &parser->previous);
9635
+ }
9636
+
9637
+ // Create a node_list first. We'll use this to check if it should be an
9638
+ // InterpolatedSymbolNode or a SymbolNode.
9323
9639
  yp_node_list_t node_list = YP_EMPTY_NODE_LIST;
9640
+ if (part) yp_node_list_append(&node_list, part);
9324
9641
 
9325
9642
  while (!match_any_type_p(parser, 2, YP_TOKEN_STRING_END, YP_TOKEN_EOF)) {
9326
- yp_node_t *part = parse_string_part(parser);
9327
- if (part != NULL) {
9643
+ if ((part = parse_string_part(parser)) != NULL) {
9328
9644
  yp_node_list_append(&node_list, part);
9329
9645
  }
9330
9646
  }
9331
9647
 
9332
- yp_node_t *res;
9333
- // If the only element on the node_list is a StringNode, we know this is a SymbolNode
9334
- // and not an InterpolatedSymbolNode
9335
- if (node_list.size == 1 && YP_NODE_TYPE_P(node_list.nodes[0], YP_NODE_STRING_NODE)) {
9336
- res = (yp_node_t *)yp_string_node_to_symbol_node(parser, (yp_string_node_t *)node_list.nodes[0]);
9337
- free(node_list.nodes);
9338
- }
9339
- else {
9340
- yp_interpolated_symbol_node_t *interpolated = yp_interpolated_symbol_node_create(parser, &opening, &node_list, &opening);
9341
- yp_interpolated_symbol_node_closing_set(interpolated, &parser->current);
9342
- res = (yp_node_t *) interpolated;
9343
- }
9344
-
9345
- if (next_state != YP_LEX_STATE_NONE) {
9346
- lex_state_set(parser, next_state);
9347
- }
9648
+ if (next_state != YP_LEX_STATE_NONE) lex_state_set(parser, next_state);
9348
9649
  expect(parser, YP_TOKEN_STRING_END, "Expected a closing delimiter for an interpolated symbol.");
9349
9650
 
9350
- return res;
9651
+ return (yp_node_t *) yp_interpolated_symbol_node_create(parser, &opening, &node_list, &parser->previous);
9351
9652
  }
9352
9653
 
9353
9654
  yp_token_t content;
@@ -9491,9 +9792,12 @@ parse_heredoc_common_whitespace(yp_parser_t *parser, yp_node_list_t *nodes) {
9491
9792
  const char *cur_char = content_loc->start;
9492
9793
 
9493
9794
  while (cur_char && cur_char < content_loc->end) {
9494
- // Any empty newlines aren't included in the minimum whitespace calculation
9495
- while (cur_char < content_loc->end && *cur_char == '\n') cur_char++;
9496
- while (cur_char + 1 < content_loc->end && *cur_char == '\r' && cur_char[1] == '\n') cur_char += 2;
9795
+ // Any empty newlines aren't included in the minimum whitespace
9796
+ // calculation.
9797
+ size_t eol_length;
9798
+ while ((eol_length = match_eol_at(parser, cur_char))) {
9799
+ cur_char += eol_length;
9800
+ }
9497
9801
 
9498
9802
  if (cur_char == content_loc->end) break;
9499
9803
 
@@ -9508,11 +9812,12 @@ parse_heredoc_common_whitespace(yp_parser_t *parser, yp_node_list_t *nodes) {
9508
9812
  cur_char++;
9509
9813
  }
9510
9814
 
9511
- // If we hit a newline, then we have encountered a line that contains
9512
- // only whitespace, and it shouldn't be considered in the calculation of
9513
- // common leading whitespace.
9514
- if (*cur_char == '\n') {
9515
- cur_char++;
9815
+ // If we hit a newline, then we have encountered a line that
9816
+ // contains only whitespace, and it shouldn't be considered in
9817
+ // the calculation of common leading whitespace.
9818
+ eol_length = match_eol_at(parser, cur_char);
9819
+ if (eol_length) {
9820
+ cur_char += eol_length;
9516
9821
  continue;
9517
9822
  }
9518
9823
 
@@ -9545,14 +9850,30 @@ parse_heredoc_dedent(yp_parser_t *parser, yp_node_t *node, yp_heredoc_quote_t qu
9545
9850
  int common_whitespace;
9546
9851
  if ((common_whitespace = parse_heredoc_common_whitespace(parser, nodes)) <= 0) return;
9547
9852
 
9548
- // Iterate over all nodes, and trim whitespace accordingly.
9549
- for (size_t index = 0; index < nodes->size; index++) {
9550
- yp_node_t *node = nodes->nodes[index];
9551
- if (!YP_NODE_TYPE_P(node, YP_NODE_STRING_NODE)) continue;
9853
+ // The next node should be dedented if it's the first node in the list or if
9854
+ // if follows a string node.
9855
+ bool dedent_next = true;
9856
+
9857
+ // Iterate over all nodes, and trim whitespace accordingly. We're going to
9858
+ // keep around two indices: a read and a write. If we end up trimming all of
9859
+ // the whitespace from a node, then we'll drop it from the list entirely.
9860
+ size_t write_index = 0;
9861
+
9862
+ for (size_t read_index = 0; read_index < nodes->size; read_index++) {
9863
+ yp_node_t *node = nodes->nodes[read_index];
9864
+
9865
+ // We're not manipulating child nodes that aren't strings. In this case
9866
+ // we'll skip past it and indicate that the subsequent node should not
9867
+ // be dedented.
9868
+ if (!YP_NODE_TYPE_P(node, YP_NODE_STRING_NODE)) {
9869
+ nodes->nodes[write_index++] = node;
9870
+ dedent_next = false;
9871
+ continue;
9872
+ }
9552
9873
 
9553
9874
  // Get a reference to the string struct that is being held by the string
9554
9875
  // node. This is the value we're going to actual manipulate.
9555
- yp_string_t *string = &((yp_string_node_t *) node)->unescaped;
9876
+ yp_string_t *string = &(((yp_string_node_t *) node)->unescaped);
9556
9877
  yp_string_ensure_owned(string);
9557
9878
 
9558
9879
  // Now get the bounds of the existing string. We'll use this as a
@@ -9568,7 +9889,6 @@ parse_heredoc_dedent(yp_parser_t *parser, yp_node_t *node, yp_heredoc_quote_t qu
9568
9889
  // whitespace, so we'll maintain a pointer to the current position in the
9569
9890
  // string that we're writing to.
9570
9891
  char *dest_cursor = source_start;
9571
- bool dedent_next = (index == 0) || YP_NODE_TYPE_P(nodes->nodes[index - 1], YP_NODE_STRING_NODE);
9572
9892
 
9573
9893
  while (source_cursor < source_end) {
9574
9894
  // If we need to dedent the next element within the heredoc or the next
@@ -9613,8 +9933,20 @@ parse_heredoc_dedent(yp_parser_t *parser, yp_node_t *node, yp_heredoc_quote_t qu
9613
9933
  dedent_next = true;
9614
9934
  }
9615
9935
 
9616
- string->length = dest_length;
9936
+ // We only want to write this node into the list if it has any content.
9937
+ if (dest_length == 0) {
9938
+ yp_node_destroy(parser, node);
9939
+ } else {
9940
+ string->length = dest_length;
9941
+ yp_unescape_manipulate_string(parser, string, (quote == YP_HEREDOC_QUOTE_SINGLE) ? YP_UNESCAPE_MINIMAL : YP_UNESCAPE_ALL);
9942
+ nodes->nodes[write_index++] = node;
9943
+ }
9944
+
9945
+ // We always dedent the next node if it follows a string node.
9946
+ dedent_next = true;
9617
9947
  }
9948
+
9949
+ nodes->size = write_index;
9618
9950
  }
9619
9951
 
9620
9952
  static yp_node_t *
@@ -10363,10 +10695,8 @@ parse_expression_prefix(yp_parser_t *parser, yp_binding_power_t binding_power) {
10363
10695
  }
10364
10696
  case YP_TOKEN_PARENTHESIS_LEFT:
10365
10697
  case YP_TOKEN_PARENTHESIS_LEFT_PARENTHESES: {
10366
- yp_token_type_t current_token_type = parser->current.type;
10698
+ yp_token_t opening = parser->current;
10367
10699
  parser_lex(parser);
10368
-
10369
- yp_token_t opening = parser->previous;
10370
10700
  while (accept_any(parser, 2, YP_TOKEN_SEMICOLON, YP_TOKEN_NEWLINE));
10371
10701
 
10372
10702
  // If this is the end of the file or we match a right parenthesis, then
@@ -10385,7 +10715,7 @@ parse_expression_prefix(yp_parser_t *parser, yp_binding_power_t binding_power) {
10385
10715
  // If we hit a right parenthesis, then we're done parsing the parentheses
10386
10716
  // node, and we can check which kind of node we should return.
10387
10717
  if (match_type_p(parser, YP_TOKEN_PARENTHESIS_RIGHT)) {
10388
- if (current_token_type == YP_TOKEN_PARENTHESIS_LEFT_PARENTHESES) {
10718
+ if (opening.type == YP_TOKEN_PARENTHESIS_LEFT_PARENTHESES) {
10389
10719
  lex_state_set(parser, YP_LEX_STATE_ENDARG);
10390
10720
  }
10391
10721
  parser_lex(parser);
@@ -10403,6 +10733,8 @@ parse_expression_prefix(yp_parser_t *parser, yp_binding_power_t binding_power) {
10403
10733
 
10404
10734
  if (multi_statement->lparen_loc.start == NULL) {
10405
10735
  multi_write = (yp_multi_write_node_t *) statement;
10736
+ multi_write->base.location.start = lparen_loc.start;
10737
+ multi_write->base.location.end = rparen_loc.end;
10406
10738
  multi_write->lparen_loc = lparen_loc;
10407
10739
  multi_write->rparen_loc = rparen_loc;
10408
10740
  } else {
@@ -10505,7 +10837,7 @@ parse_expression_prefix(yp_parser_t *parser, yp_binding_power_t binding_power) {
10505
10837
  // fact a method call, not a constant read.
10506
10838
  if (
10507
10839
  match_type_p(parser, YP_TOKEN_PARENTHESIS_LEFT) ||
10508
- (binding_power <= YP_BINDING_POWER_ASSIGNMENT && (token_begins_expression_p(parser->current.type) || match_any_type_p(parser, 2, YP_TOKEN_USTAR, YP_TOKEN_USTAR_STAR))) ||
10840
+ (binding_power <= YP_BINDING_POWER_ASSIGNMENT && (token_begins_expression_p(parser->current.type) || match_any_type_p(parser, 3, YP_TOKEN_UAMPERSAND, YP_TOKEN_USTAR, YP_TOKEN_USTAR_STAR))) ||
10509
10841
  (yp_accepts_block_stack_p(parser) && match_any_type_p(parser, 2, YP_TOKEN_KEYWORD_DO, YP_TOKEN_BRACE_LEFT))
10510
10842
  ) {
10511
10843
  yp_arguments_t arguments = YP_EMPTY_ARGUMENTS;
@@ -10628,7 +10960,7 @@ parse_expression_prefix(yp_parser_t *parser, yp_binding_power_t binding_power) {
10628
10960
  // can still be a method call if it is followed by arguments or
10629
10961
  // a block, so we need to check for that here.
10630
10962
  if (
10631
- (binding_power <= YP_BINDING_POWER_ASSIGNMENT && (token_begins_expression_p(parser->current.type) || match_any_type_p(parser, 2, YP_TOKEN_USTAR, YP_TOKEN_USTAR_STAR))) ||
10963
+ (binding_power <= YP_BINDING_POWER_ASSIGNMENT && (token_begins_expression_p(parser->current.type) || match_any_type_p(parser, 3, YP_TOKEN_UAMPERSAND, YP_TOKEN_USTAR, YP_TOKEN_USTAR_STAR))) ||
10632
10964
  (yp_accepts_block_stack_p(parser) && match_any_type_p(parser, 2, YP_TOKEN_KEYWORD_DO, YP_TOKEN_BRACE_LEFT))
10633
10965
  ) {
10634
10966
  yp_arguments_t arguments = YP_EMPTY_ARGUMENTS;
@@ -10673,12 +11005,15 @@ parse_expression_prefix(yp_parser_t *parser, yp_binding_power_t binding_power) {
10673
11005
 
10674
11006
  lex_state_set(parser, YP_LEX_STATE_END);
10675
11007
  expect(parser, YP_TOKEN_HEREDOC_END, "Expected a closing delimiter for heredoc.");
11008
+
10676
11009
  if (quote == YP_HEREDOC_QUOTE_BACKTICK) {
10677
11010
  assert(YP_NODE_TYPE_P(node, YP_NODE_INTERPOLATED_X_STRING_NODE));
10678
11011
  yp_interpolated_xstring_node_closing_set(((yp_interpolated_x_string_node_t *) node), &parser->previous);
11012
+ node->location = ((yp_interpolated_x_string_node_t *) node)->opening_loc;
10679
11013
  } else {
10680
11014
  assert(YP_NODE_TYPE_P(node, YP_NODE_INTERPOLATED_STRING_NODE));
10681
11015
  yp_interpolated_string_node_closing_set((yp_interpolated_string_node_t *) node, &parser->previous);
11016
+ node->location = ((yp_interpolated_string_node_t *) node)->opening_loc;
10682
11017
  }
10683
11018
 
10684
11019
  // If this is a heredoc that is indented with a ~, then we need to dedent
@@ -11043,7 +11378,12 @@ parse_expression_prefix(yp_parser_t *parser, yp_binding_power_t binding_power) {
11043
11378
  return (yp_node_t *) yp_singleton_class_node_create(parser, &locals, &class_keyword, &operator, expression, statements, &parser->previous);
11044
11379
  }
11045
11380
 
11046
- yp_node_t *name = parse_expression(parser, YP_BINDING_POWER_INDEX, "Expected to find a class name after `class`.");
11381
+ yp_node_t *constant_path = parse_expression(parser, YP_BINDING_POWER_INDEX, "Expected to find a class name after `class`.");
11382
+ yp_token_t name = parser->previous;
11383
+ if (name.type != YP_TOKEN_CONSTANT) {
11384
+ yp_diagnostic_list_append(&parser->error_list, name.start, name.end, "Expected a constant name after `class`.");
11385
+ }
11386
+
11047
11387
  yp_token_t inheritance_operator;
11048
11388
  yp_node_t *superclass;
11049
11389
 
@@ -11084,7 +11424,7 @@ parse_expression_prefix(yp_parser_t *parser, yp_binding_power_t binding_power) {
11084
11424
  yp_constant_id_list_t locals = parser->current_scope->locals;
11085
11425
  yp_parser_scope_pop(parser);
11086
11426
  yp_do_loop_stack_pop(parser);
11087
- return (yp_node_t *) yp_class_node_create(parser, &locals, &class_keyword, name, &inheritance_operator, superclass, statements, &parser->previous);
11427
+ return (yp_node_t *) yp_class_node_create(parser, &locals, &class_keyword, constant_path, &name, &inheritance_operator, superclass, statements, &parser->previous);
11088
11428
  }
11089
11429
  case YP_TOKEN_KEYWORD_DEF: {
11090
11430
  yp_token_t def_keyword = parser->current;
@@ -11243,6 +11583,12 @@ parse_expression_prefix(yp_parser_t *parser, yp_binding_power_t binding_power) {
11243
11583
  break;
11244
11584
  }
11245
11585
  case YP_CASE_PARAMETER: {
11586
+ // If we're about to lex a label, we need to add the label
11587
+ // state to make sure the next newline is ignored.
11588
+ if (parser->current.type == YP_TOKEN_LABEL) {
11589
+ lex_state_set(parser, parser->lex_state | YP_LEX_STATE_LABEL);
11590
+ }
11591
+
11246
11592
  lparen = not_provided(parser);
11247
11593
  rparen = not_provided(parser);
11248
11594
  params = parse_parameters(parser, YP_BINDING_POWER_DEFINED, false, false, true);
@@ -11472,13 +11818,14 @@ parse_expression_prefix(yp_parser_t *parser, yp_binding_power_t binding_power) {
11472
11818
  parser_lex(parser);
11473
11819
 
11474
11820
  yp_token_t module_keyword = parser->previous;
11475
- yp_node_t *name = parse_expression(parser, YP_BINDING_POWER_INDEX, "Expected to find a module name after `module`.");
11821
+ yp_node_t *constant_path = parse_expression(parser, YP_BINDING_POWER_INDEX, "Expected to find a module name after `module`.");
11822
+ yp_token_t name;
11476
11823
 
11477
- // If we can recover from a syntax error that occurred while parsing the
11478
- // name of the module, then we'll handle that here.
11479
- if (YP_NODE_TYPE_P(name, YP_NODE_MISSING_NODE)) {
11480
- yp_token_t end_keyword = (yp_token_t) { .type = YP_TOKEN_MISSING, .start = parser->previous.end, .end = parser->previous.end };
11481
- return (yp_node_t *) yp_module_node_create(parser, NULL, &module_keyword, name, NULL, &end_keyword);
11824
+ // If we can recover from a syntax error that occurred while parsing
11825
+ // the name of the module, then we'll handle that here.
11826
+ if (YP_NODE_TYPE_P(constant_path, YP_NODE_MISSING_NODE)) {
11827
+ yp_token_t missing = (yp_token_t) { .type = YP_TOKEN_MISSING, .start = parser->previous.end, .end = parser->previous.end };
11828
+ return (yp_node_t *) yp_module_node_create(parser, NULL, &module_keyword, constant_path, &missing, NULL, &missing);
11482
11829
  }
11483
11830
 
11484
11831
  while (accept(parser, YP_TOKEN_COLON_COLON)) {
@@ -11487,7 +11834,15 @@ parse_expression_prefix(yp_parser_t *parser, yp_binding_power_t binding_power) {
11487
11834
  expect(parser, YP_TOKEN_CONSTANT, "Expected to find a module name after `::`.");
11488
11835
  yp_node_t *constant = (yp_node_t *) yp_constant_read_node_create(parser, &parser->previous);
11489
11836
 
11490
- name = (yp_node_t *)yp_constant_path_node_create(parser, name, &double_colon, constant);
11837
+ constant_path = (yp_node_t *) yp_constant_path_node_create(parser, constant_path, &double_colon, constant);
11838
+ }
11839
+
11840
+ // Here we retrieve the name of the module. If it wasn't a constant,
11841
+ // then it's possible that `module foo` was passed, which is a
11842
+ // syntax error. We handle that here as well.
11843
+ name = parser->previous;
11844
+ if (name.type != YP_TOKEN_CONSTANT) {
11845
+ yp_diagnostic_list_append(&parser->error_list, name.start, name.end, "Expected to find a module name after `module`.");
11491
11846
  }
11492
11847
 
11493
11848
  yp_parser_scope_push(parser, true);
@@ -11514,7 +11869,7 @@ parse_expression_prefix(yp_parser_t *parser, yp_binding_power_t binding_power) {
11514
11869
  yp_diagnostic_list_append(&parser->error_list, module_keyword.start, module_keyword.end, "Module definition in method body");
11515
11870
  }
11516
11871
 
11517
- return (yp_node_t *) yp_module_node_create(parser, &locals, &module_keyword, name, statements, &parser->previous);
11872
+ return (yp_node_t *) yp_module_node_create(parser, &locals, &module_keyword, constant_path, &name, statements, &parser->previous);
11518
11873
  }
11519
11874
  case YP_TOKEN_KEYWORD_NIL:
11520
11875
  parser_lex(parser);
@@ -11550,12 +11905,7 @@ parse_expression_prefix(yp_parser_t *parser, yp_binding_power_t binding_power) {
11550
11905
  expect(parser, YP_TOKEN_KEYWORD_END, "Expected `end` to close `until` statement.");
11551
11906
  }
11552
11907
 
11553
- yp_until_node_t *until_node = yp_until_node_create(parser, &keyword, predicate, statements, 0);
11554
- if (parser->previous.type == YP_TOKEN_KEYWORD_END) {
11555
- until_node->base.location.end = parser->previous.end;
11556
- }
11557
-
11558
- return (yp_node_t *) until_node;
11908
+ return (yp_node_t *) yp_until_node_create(parser, &keyword, &parser->previous, predicate, statements, 0);
11559
11909
  }
11560
11910
  case YP_TOKEN_KEYWORD_WHILE: {
11561
11911
  yp_do_loop_stack_push(parser, true);
@@ -11576,11 +11926,7 @@ parse_expression_prefix(yp_parser_t *parser, yp_binding_power_t binding_power) {
11576
11926
  expect(parser, YP_TOKEN_KEYWORD_END, "Expected `end` to close `while` statement.");
11577
11927
  }
11578
11928
 
11579
- yp_while_node_t *while_node = yp_while_node_create(parser, &keyword, predicate, statements, 0);
11580
- if (parser->previous.type == YP_TOKEN_KEYWORD_END) {
11581
- while_node->base.location.end = parser->previous.end;
11582
- }
11583
- return (yp_node_t *) while_node;
11929
+ return (yp_node_t *) yp_while_node_create(parser, &keyword, &parser->previous, predicate, statements, 0);
11584
11930
  }
11585
11931
  case YP_TOKEN_PERCENT_LOWER_I: {
11586
11932
  parser_lex(parser);
@@ -12086,30 +12432,32 @@ parse_expression_prefix(yp_parser_t *parser, yp_binding_power_t binding_power) {
12086
12432
  yp_accepts_block_stack_push(parser, true);
12087
12433
  parser_lex(parser);
12088
12434
 
12089
- yp_token_t opening = parser->previous;
12435
+ yp_token_t operator = parser->previous;
12090
12436
  yp_parser_scope_push(parser, false);
12091
12437
  yp_block_parameters_node_t *params;
12092
12438
 
12093
12439
  switch (parser->current.type) {
12094
12440
  case YP_TOKEN_PARENTHESIS_LEFT: {
12095
- yp_token_t block_parameters_opening = parser->current;
12441
+ yp_token_t opening = parser->current;
12096
12442
  parser_lex(parser);
12097
12443
 
12098
12444
  if (match_type_p(parser, YP_TOKEN_PARENTHESIS_RIGHT)) {
12099
- params = yp_block_parameters_node_create(parser, NULL, &block_parameters_opening);
12445
+ params = yp_block_parameters_node_create(parser, NULL, &opening);
12100
12446
  } else {
12101
- params = parse_block_parameters(parser, false, &block_parameters_opening, true);
12447
+ params = parse_block_parameters(parser, false, &opening, true);
12102
12448
  }
12103
12449
 
12104
12450
  accept(parser, YP_TOKEN_NEWLINE);
12105
12451
  expect(parser, YP_TOKEN_PARENTHESIS_RIGHT, "Expected ')' after left parenthesis.");
12106
- yp_block_parameters_node_closing_set(params, &parser->previous);
12107
12452
 
12453
+ yp_block_parameters_node_closing_set(params, &parser->previous);
12108
12454
  break;
12109
12455
  }
12110
12456
  case YP_CASE_PARAMETER: {
12457
+ yp_accepts_block_stack_push(parser, false);
12111
12458
  yp_token_t opening = not_provided(parser);
12112
12459
  params = parse_block_parameters(parser, false, &opening, true);
12460
+ yp_accepts_block_stack_pop(parser);
12113
12461
  break;
12114
12462
  }
12115
12463
  default: {
@@ -12118,16 +12466,20 @@ parse_expression_prefix(yp_parser_t *parser, yp_binding_power_t binding_power) {
12118
12466
  }
12119
12467
  }
12120
12468
 
12469
+ yp_token_t opening;
12121
12470
  yp_node_t *body = NULL;
12122
12471
  parser->lambda_enclosure_nesting = previous_lambda_enclosure_nesting;
12123
12472
 
12124
12473
  if (accept(parser, YP_TOKEN_LAMBDA_BEGIN)) {
12474
+ opening = parser->previous;
12475
+
12125
12476
  if (!accept(parser, YP_TOKEN_BRACE_RIGHT)) {
12126
12477
  body = (yp_node_t *) parse_statements(parser, YP_CONTEXT_LAMBDA_BRACES);
12127
12478
  expect(parser, YP_TOKEN_BRACE_RIGHT, "Expecting '}' to close lambda block.");
12128
12479
  }
12129
12480
  } else {
12130
12481
  expect(parser, YP_TOKEN_KEYWORD_DO, "Expected a 'do' keyword or a '{' to open lambda block.");
12482
+ opening = parser->previous;
12131
12483
 
12132
12484
  if (!match_any_type_p(parser, 3, YP_TOKEN_KEYWORD_END, YP_TOKEN_KEYWORD_RESCUE, YP_TOKEN_KEYWORD_ENSURE)) {
12133
12485
  body = (yp_node_t *) parse_statements(parser, YP_CONTEXT_LAMBDA_DO_END);
@@ -12144,7 +12496,7 @@ parse_expression_prefix(yp_parser_t *parser, yp_binding_power_t binding_power) {
12144
12496
  yp_constant_id_list_t locals = parser->current_scope->locals;
12145
12497
  yp_parser_scope_pop(parser);
12146
12498
  yp_accepts_block_stack_pop(parser);
12147
- return (yp_node_t *) yp_lambda_node_create(parser, &locals, &opening, params, body, &parser->previous);
12499
+ return (yp_node_t *) yp_lambda_node_create(parser, &locals, &operator, &opening, &parser->previous, params, body);
12148
12500
  }
12149
12501
  case YP_TOKEN_UPLUS: {
12150
12502
  parser_lex(parser);
@@ -12363,7 +12715,7 @@ parse_expression_infix(yp_parser_t *parser, yp_node_t *node, yp_binding_power_t
12363
12715
  case YP_CASE_WRITABLE: {
12364
12716
  parser_lex(parser);
12365
12717
  yp_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, "Expected a value after =.");
12366
- return parse_target(parser, node, &token, value);
12718
+ return parse_write(parser, node, &token, value);
12367
12719
  }
12368
12720
  case YP_NODE_SPLAT_NODE: {
12369
12721
  yp_splat_node_t *splat_node = (yp_splat_node_t *) node;
@@ -12372,7 +12724,7 @@ parse_expression_infix(yp_parser_t *parser, yp_node_t *node, yp_binding_power_t
12372
12724
  case YP_CASE_WRITABLE:
12373
12725
  parser_lex(parser);
12374
12726
  yp_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, "Expected a value after =.");
12375
- return parse_target(parser, (yp_node_t *) splat_node, &token, value);
12727
+ return parse_write(parser, (yp_node_t *) splat_node, &token, value);
12376
12728
  default:
12377
12729
  break;
12378
12730
  }
@@ -12398,48 +12750,16 @@ parse_expression_infix(yp_parser_t *parser, yp_node_t *node, yp_binding_power_t
12398
12750
  parser_lex(parser);
12399
12751
 
12400
12752
  yp_node_t *value = parse_expression(parser, binding_power, "Expected a value after &&=");
12401
- yp_node_t *result = (yp_node_t *) yp_global_variable_operator_and_write_node_create(parser, node, &token, value);
12753
+ yp_node_t *result = (yp_node_t *) yp_global_variable_and_write_node_create(parser, node, &token, value);
12402
12754
 
12403
12755
  yp_node_destroy(parser, node);
12404
12756
  return result;
12405
12757
  }
12406
- case YP_NODE_CALL_NODE: {
12407
- yp_call_node_t *call_node = (yp_call_node_t *) node;
12408
-
12409
- // If we have a vcall (a method with no arguments and no
12410
- // receiver that could have been a local variable) then we
12411
- // will transform it into a local variable write.
12412
- if (yp_call_node_variable_call_p(call_node)) {
12413
- yp_location_t message_loc = call_node->message_loc;
12414
- yp_parser_local_add_location(parser, message_loc.start, message_loc.end);
12415
-
12416
- if (token_is_numbered_parameter(message_loc.start, message_loc.end)) {
12417
- yp_diagnostic_list_append(&parser->error_list, message_loc.start, message_loc.end, "reserved for numbered parameter");
12418
- }
12419
-
12420
- parser_lex(parser);
12421
-
12422
- yp_node_t *value = parse_expression(parser, binding_power, "Expected a value after &&=");
12423
- yp_constant_id_t constant_id = yp_parser_constant_id_location(parser, message_loc.start, message_loc.end);
12424
- yp_node_t *result = (yp_node_t *) yp_local_variable_operator_and_write_node_create(parser, node, &token, value, constant_id);
12425
-
12426
- yp_node_destroy(parser, node);
12427
- return result;
12428
- }
12429
-
12430
- parser_lex(parser);
12431
-
12432
- yp_token_t operator = not_provided(parser);
12433
- node = parse_target(parser, node, &operator, NULL);
12434
-
12435
- yp_node_t *value = parse_expression(parser, binding_power, "Expected a value after &&=");
12436
- return (yp_node_t *) yp_call_operator_and_write_node_create(parser, (yp_call_node_t *) node, &token, value);
12437
- }
12438
12758
  case YP_NODE_CLASS_VARIABLE_READ_NODE: {
12439
12759
  parser_lex(parser);
12440
12760
 
12441
12761
  yp_node_t *value = parse_expression(parser, binding_power, "Expected a value after &&=");
12442
- yp_node_t *result = (yp_node_t *) yp_class_variable_operator_and_write_node_create(parser, node, &token, value);
12762
+ yp_node_t *result = (yp_node_t *) yp_class_variable_and_write_node_create(parser, node, &token, value);
12443
12763
 
12444
12764
  yp_node_destroy(parser, node);
12445
12765
  return result;
@@ -12448,13 +12768,13 @@ parse_expression_infix(yp_parser_t *parser, yp_node_t *node, yp_binding_power_t
12448
12768
  parser_lex(parser);
12449
12769
 
12450
12770
  yp_node_t *value = parse_expression(parser, binding_power, "Expected a value after &&=");
12451
- return (yp_node_t *) yp_constant_path_operator_and_write_node_create(parser, (yp_constant_path_node_t *) node, &token, value);
12771
+ return (yp_node_t *) yp_constant_path_and_write_node_create(parser, (yp_constant_path_node_t *) node, &token, value);
12452
12772
  }
12453
12773
  case YP_NODE_CONSTANT_READ_NODE: {
12454
12774
  parser_lex(parser);
12455
12775
 
12456
12776
  yp_node_t *value = parse_expression(parser, binding_power, "Expected a value after &&=");
12457
- yp_node_t *result = (yp_node_t *) yp_constant_operator_and_write_node_create(parser, node, &token, value);
12777
+ yp_node_t *result = (yp_node_t *) yp_constant_and_write_node_create(parser, node, &token, value);
12458
12778
 
12459
12779
  yp_node_destroy(parser, node);
12460
12780
  return result;
@@ -12463,21 +12783,49 @@ parse_expression_infix(yp_parser_t *parser, yp_node_t *node, yp_binding_power_t
12463
12783
  parser_lex(parser);
12464
12784
 
12465
12785
  yp_node_t *value = parse_expression(parser, binding_power, "Expected a value after &&=");
12466
- yp_node_t *result = (yp_node_t *) yp_instance_variable_operator_and_write_node_create(parser, node, &token, value);
12786
+ yp_node_t *result = (yp_node_t *) yp_instance_variable_and_write_node_create(parser, node, &token, value);
12467
12787
 
12468
12788
  yp_node_destroy(parser, node);
12469
12789
  return result;
12470
12790
  }
12471
12791
  case YP_NODE_LOCAL_VARIABLE_READ_NODE: {
12792
+ yp_local_variable_read_node_t *cast = (yp_local_variable_read_node_t *) node;
12472
12793
  parser_lex(parser);
12473
12794
 
12474
12795
  yp_node_t *value = parse_expression(parser, binding_power, "Expected a value after &&=");
12475
- yp_constant_id_t constant_id = ((yp_local_variable_read_node_t *) node)->constant_id;
12476
- yp_node_t *result = (yp_node_t *) yp_local_variable_operator_and_write_node_create(parser, node, &token, value, constant_id);
12796
+ yp_node_t *result = (yp_node_t *) yp_local_variable_and_write_node_create(parser, node, &token, value, cast->constant_id, cast->depth);
12477
12797
 
12478
12798
  yp_node_destroy(parser, node);
12479
12799
  return result;
12480
12800
  }
12801
+ case YP_NODE_CALL_NODE: {
12802
+ yp_call_node_t *call_node = (yp_call_node_t *) node;
12803
+
12804
+ // If we have a vcall (a method with no arguments and no
12805
+ // receiver that could have been a local variable) then we
12806
+ // will transform it into a local variable write.
12807
+ if (yp_call_node_variable_call_p(call_node)) {
12808
+ yp_location_t message_loc = call_node->message_loc;
12809
+ yp_constant_id_t constant_id = yp_parser_local_add_location(parser, message_loc.start, message_loc.end);
12810
+
12811
+ if (token_is_numbered_parameter(message_loc.start, message_loc.end)) {
12812
+ yp_diagnostic_list_append(&parser->error_list, message_loc.start, message_loc.end, "reserved for numbered parameter");
12813
+ }
12814
+
12815
+ parser_lex(parser);
12816
+ yp_node_t *value = parse_expression(parser, binding_power, "Expected a value after &&=");
12817
+ yp_node_t *result = (yp_node_t *) yp_local_variable_and_write_node_create(parser, node, &token, value, constant_id, 0);
12818
+
12819
+ yp_node_destroy(parser, node);
12820
+ return result;
12821
+ }
12822
+
12823
+ parser_lex(parser);
12824
+ node = parse_target(parser, node);
12825
+
12826
+ yp_node_t *value = parse_expression(parser, binding_power, "Expected a value after &&=");
12827
+ return (yp_node_t *) yp_call_operator_and_write_node_create(parser, (yp_call_node_t *) node, &token, value);
12828
+ }
12481
12829
  case YP_NODE_MULTI_WRITE_NODE: {
12482
12830
  parser_lex(parser);
12483
12831
  yp_diagnostic_list_append(&parser->error_list, token.start, token.end, "Cannot use `&&=' on a multi-write.");
@@ -12503,48 +12851,16 @@ parse_expression_infix(yp_parser_t *parser, yp_node_t *node, yp_binding_power_t
12503
12851
  parser_lex(parser);
12504
12852
 
12505
12853
  yp_node_t *value = parse_expression(parser, binding_power, "Expected a value after ||=");
12506
- yp_node_t *result = (yp_node_t *) yp_global_variable_operator_or_write_node_create(parser, node, &token, value);
12854
+ yp_node_t *result = (yp_node_t *) yp_global_variable_or_write_node_create(parser, node, &token, value);
12507
12855
 
12508
12856
  yp_node_destroy(parser, node);
12509
12857
  return result;
12510
12858
  }
12511
- case YP_NODE_CALL_NODE: {
12512
- yp_call_node_t *call_node = (yp_call_node_t *) node;
12513
-
12514
- // If we have a vcall (a method with no arguments and no
12515
- // receiver that could have been a local variable) then we
12516
- // will transform it into a local variable write.
12517
- if (yp_call_node_variable_call_p(call_node)) {
12518
- yp_location_t message_loc = call_node->message_loc;
12519
- yp_parser_local_add_location(parser, message_loc.start, message_loc.end);
12520
-
12521
- if (token_is_numbered_parameter(message_loc.start, message_loc.end)) {
12522
- yp_diagnostic_list_append(&parser->error_list, message_loc.start, message_loc.end, "reserved for numbered parameter");
12523
- }
12524
-
12525
- parser_lex(parser);
12526
-
12527
- yp_node_t *value = parse_expression(parser, binding_power, "Expected a value after ||=");
12528
- yp_constant_id_t constant_id = yp_parser_constant_id_location(parser, message_loc.start, message_loc.end);
12529
- yp_node_t *result = (yp_node_t *) yp_local_variable_operator_or_write_node_create(parser, node, &token, value, constant_id);
12530
-
12531
- yp_node_destroy(parser, node);
12532
- return result;
12533
- }
12534
-
12535
- parser_lex(parser);
12536
-
12537
- yp_token_t operator = not_provided(parser);
12538
- node = parse_target(parser, node, &operator, NULL);
12539
-
12540
- yp_node_t *value = parse_expression(parser, binding_power, "Expected a value after ||=");
12541
- return (yp_node_t *) yp_call_operator_or_write_node_create(parser, (yp_call_node_t *) node, &token, value);
12542
- }
12543
12859
  case YP_NODE_CLASS_VARIABLE_READ_NODE: {
12544
12860
  parser_lex(parser);
12545
12861
 
12546
12862
  yp_node_t *value = parse_expression(parser, binding_power, "Expected a value after ||=");
12547
- yp_node_t *result = (yp_node_t *) yp_class_variable_operator_or_write_node_create(parser, node, &token, value);
12863
+ yp_node_t *result = (yp_node_t *) yp_class_variable_or_write_node_create(parser, node, &token, value);
12548
12864
 
12549
12865
  yp_node_destroy(parser, node);
12550
12866
  return result;
@@ -12553,13 +12869,13 @@ parse_expression_infix(yp_parser_t *parser, yp_node_t *node, yp_binding_power_t
12553
12869
  parser_lex(parser);
12554
12870
 
12555
12871
  yp_node_t *value = parse_expression(parser, binding_power, "Expected a value after ||=");
12556
- return (yp_node_t *) yp_constant_path_operator_or_write_node_create(parser, (yp_constant_path_node_t *) node, &token, value);
12872
+ return (yp_node_t *) yp_constant_path_or_write_node_create(parser, (yp_constant_path_node_t *) node, &token, value);
12557
12873
  }
12558
12874
  case YP_NODE_CONSTANT_READ_NODE: {
12559
12875
  parser_lex(parser);
12560
12876
 
12561
12877
  yp_node_t *value = parse_expression(parser, binding_power, "Expected a value after ||=");
12562
- yp_node_t *result = (yp_node_t *) yp_constant_operator_or_write_node_create(parser, node, &token, value);
12878
+ yp_node_t *result = (yp_node_t *) yp_constant_or_write_node_create(parser, node, &token, value);
12563
12879
 
12564
12880
  yp_node_destroy(parser, node);
12565
12881
  return result;
@@ -12568,21 +12884,49 @@ parse_expression_infix(yp_parser_t *parser, yp_node_t *node, yp_binding_power_t
12568
12884
  parser_lex(parser);
12569
12885
 
12570
12886
  yp_node_t *value = parse_expression(parser, binding_power, "Expected a value after ||=");
12571
- yp_node_t *result = (yp_node_t *) yp_instance_variable_operator_or_write_node_create(parser, node, &token, value);
12887
+ yp_node_t *result = (yp_node_t *) yp_instance_variable_or_write_node_create(parser, node, &token, value);
12572
12888
 
12573
12889
  yp_node_destroy(parser, node);
12574
12890
  return result;
12575
12891
  }
12576
12892
  case YP_NODE_LOCAL_VARIABLE_READ_NODE: {
12893
+ yp_local_variable_read_node_t *cast = (yp_local_variable_read_node_t *) node;
12577
12894
  parser_lex(parser);
12578
12895
 
12579
12896
  yp_node_t *value = parse_expression(parser, binding_power, "Expected a value after ||=");
12580
- yp_constant_id_t constant_id = ((yp_local_variable_read_node_t *) node)->constant_id;
12581
- yp_node_t *result = (yp_node_t *) yp_local_variable_operator_or_write_node_create(parser, node, &token, value, constant_id);
12897
+ yp_node_t *result = (yp_node_t *) yp_local_variable_or_write_node_create(parser, node, &token, value, cast->constant_id, cast->depth);
12582
12898
 
12583
12899
  yp_node_destroy(parser, node);
12584
12900
  return result;
12585
12901
  }
12902
+ case YP_NODE_CALL_NODE: {
12903
+ yp_call_node_t *call_node = (yp_call_node_t *) node;
12904
+
12905
+ // If we have a vcall (a method with no arguments and no
12906
+ // receiver that could have been a local variable) then we
12907
+ // will transform it into a local variable write.
12908
+ if (yp_call_node_variable_call_p(call_node)) {
12909
+ yp_location_t message_loc = call_node->message_loc;
12910
+ yp_constant_id_t constant_id = yp_parser_local_add_location(parser, message_loc.start, message_loc.end);
12911
+
12912
+ if (token_is_numbered_parameter(message_loc.start, message_loc.end)) {
12913
+ yp_diagnostic_list_append(&parser->error_list, message_loc.start, message_loc.end, "reserved for numbered parameter");
12914
+ }
12915
+
12916
+ parser_lex(parser);
12917
+ yp_node_t *value = parse_expression(parser, binding_power, "Expected a value after ||=");
12918
+ yp_node_t *result = (yp_node_t *) yp_local_variable_or_write_node_create(parser, node, &token, value, constant_id, 0);
12919
+
12920
+ yp_node_destroy(parser, node);
12921
+ return result;
12922
+ }
12923
+
12924
+ parser_lex(parser);
12925
+ node = parse_target(parser, node);
12926
+
12927
+ yp_node_t *value = parse_expression(parser, binding_power, "Expected a value after ||=");
12928
+ return (yp_node_t *) yp_call_operator_or_write_node_create(parser, (yp_call_node_t *) node, &token, value);
12929
+ }
12586
12930
  case YP_NODE_MULTI_WRITE_NODE: {
12587
12931
  parser_lex(parser);
12588
12932
  yp_diagnostic_list_append(&parser->error_list, token.start, token.end, "Cannot use `||=' on a multi-write.");
@@ -12617,43 +12961,12 @@ parse_expression_infix(yp_parser_t *parser, yp_node_t *node, yp_binding_power_t
12617
12961
  case YP_NODE_GLOBAL_VARIABLE_READ_NODE: {
12618
12962
  parser_lex(parser);
12619
12963
 
12620
- yp_node_t *value = parse_expression(parser, binding_power, "Expected a value after the operator");
12964
+ yp_node_t *value = parse_expression(parser, binding_power, "Expected a value after the operator.");
12621
12965
  yp_node_t *result = (yp_node_t *) yp_global_variable_operator_write_node_create(parser, node, &token, value);
12622
12966
 
12623
12967
  yp_node_destroy(parser, node);
12624
12968
  return result;
12625
12969
  }
12626
- case YP_NODE_CALL_NODE: {
12627
- yp_call_node_t *call_node = (yp_call_node_t *) node;
12628
-
12629
- // If we have a vcall (a method with no arguments and no
12630
- // receiver that could have been a local variable) then we
12631
- // will transform it into a local variable write.
12632
- if (yp_call_node_variable_call_p(call_node)) {
12633
- yp_location_t message_loc = call_node->message_loc;
12634
- yp_parser_local_add_location(parser, message_loc.start, message_loc.end);
12635
-
12636
- if (token_is_numbered_parameter(message_loc.start, message_loc.end)) {
12637
- yp_diagnostic_list_append(&parser->error_list, message_loc.start, message_loc.end, "reserved for numbered parameter");
12638
- }
12639
-
12640
- parser_lex(parser);
12641
-
12642
- yp_node_t *value = parse_expression(parser, binding_power, "Expected a value after &&=");
12643
- yp_constant_id_t constant_id = yp_parser_constant_id_location(parser, message_loc.start, message_loc.end);
12644
- yp_node_t *result = (yp_node_t *) yp_local_variable_operator_write_node_create(parser, node, &token, value, constant_id);
12645
-
12646
- yp_node_destroy(parser, node);
12647
- return result;
12648
- }
12649
-
12650
- yp_token_t operator = not_provided(parser);
12651
- node = parse_target(parser, node, &operator, NULL);
12652
-
12653
- parser_lex(parser);
12654
- yp_node_t *value = parse_expression(parser, binding_power, "Expected a value after the operator.");
12655
- return (yp_node_t *) yp_call_operator_write_node_create(parser, (yp_call_node_t *) node, &token, value);
12656
- }
12657
12970
  case YP_NODE_CLASS_VARIABLE_READ_NODE: {
12658
12971
  parser_lex(parser);
12659
12972
 
@@ -12688,15 +13001,43 @@ parse_expression_infix(yp_parser_t *parser, yp_node_t *node, yp_binding_power_t
12688
13001
  return result;
12689
13002
  }
12690
13003
  case YP_NODE_LOCAL_VARIABLE_READ_NODE: {
13004
+ yp_local_variable_read_node_t *cast = (yp_local_variable_read_node_t *) node;
12691
13005
  parser_lex(parser);
12692
13006
 
12693
13007
  yp_node_t *value = parse_expression(parser, binding_power, "Expected a value after the operator.");
12694
- yp_constant_id_t constant_id = ((yp_local_variable_read_node_t *) node)->constant_id;
12695
- yp_node_t *result = (yp_node_t *) yp_local_variable_operator_write_node_create(parser, node, &token, value, constant_id);
13008
+ yp_node_t *result = (yp_node_t *) yp_local_variable_operator_write_node_create(parser, node, &token, value, cast->constant_id, cast->depth);
12696
13009
 
12697
13010
  yp_node_destroy(parser, node);
12698
13011
  return result;
12699
13012
  }
13013
+ case YP_NODE_CALL_NODE: {
13014
+ yp_call_node_t *call_node = (yp_call_node_t *) node;
13015
+
13016
+ // If we have a vcall (a method with no arguments and no
13017
+ // receiver that could have been a local variable) then we
13018
+ // will transform it into a local variable write.
13019
+ if (yp_call_node_variable_call_p(call_node)) {
13020
+ yp_location_t message_loc = call_node->message_loc;
13021
+ yp_constant_id_t constant_id = yp_parser_local_add_location(parser, message_loc.start, message_loc.end);
13022
+
13023
+ if (token_is_numbered_parameter(message_loc.start, message_loc.end)) {
13024
+ yp_diagnostic_list_append(&parser->error_list, message_loc.start, message_loc.end, "reserved for numbered parameter");
13025
+ }
13026
+
13027
+ parser_lex(parser);
13028
+ yp_node_t *value = parse_expression(parser, binding_power, "Expected a value after the operator.");
13029
+ yp_node_t *result = (yp_node_t *) yp_local_variable_operator_write_node_create(parser, node, &token, value, constant_id, 0);
13030
+
13031
+ yp_node_destroy(parser, node);
13032
+ return result;
13033
+ }
13034
+
13035
+ node = parse_target(parser, node);
13036
+ parser_lex(parser);
13037
+
13038
+ yp_node_t *value = parse_expression(parser, binding_power, "Expected a value after the operator.");
13039
+ return (yp_node_t *) yp_call_operator_write_node_create(parser, (yp_call_node_t *) node, &token, value);
13040
+ }
12700
13041
  case YP_NODE_MULTI_WRITE_NODE: {
12701
13042
  parser_lex(parser);
12702
13043
  yp_diagnostic_list_append(&parser->error_list, token.start, token.end, "Unexpected operator.");
@@ -12862,7 +13203,7 @@ parse_expression_infix(yp_parser_t *parser, yp_node_t *node, yp_binding_power_t
12862
13203
  yp_statements_node_body_append(statements, node);
12863
13204
 
12864
13205
  yp_node_t *predicate = parse_expression(parser, binding_power, "Expected a predicate after 'until'");
12865
- return (yp_node_t *) yp_until_node_create(parser, &token, predicate, statements, YP_NODE_TYPE_P(node, YP_NODE_BEGIN_NODE) ? YP_LOOP_FLAGS_BEGIN_MODIFIER : 0);
13206
+ return (yp_node_t *) yp_until_node_modifier_create(parser, &token, predicate, statements, YP_NODE_TYPE_P(node, YP_NODE_BEGIN_NODE) ? YP_LOOP_FLAGS_BEGIN_MODIFIER : 0);
12866
13207
  }
12867
13208
  case YP_TOKEN_KEYWORD_WHILE_MODIFIER: {
12868
13209
  parser_lex(parser);
@@ -12870,7 +13211,7 @@ parse_expression_infix(yp_parser_t *parser, yp_node_t *node, yp_binding_power_t
12870
13211
  yp_statements_node_body_append(statements, node);
12871
13212
 
12872
13213
  yp_node_t *predicate = parse_expression(parser, binding_power, "Expected a predicate after 'while'");
12873
- return (yp_node_t *) yp_while_node_create(parser, &token, predicate, statements, YP_NODE_TYPE_P(node, YP_NODE_BEGIN_NODE) ? YP_LOOP_FLAGS_BEGIN_MODIFIER : 0);
13214
+ return (yp_node_t *) yp_while_node_modifier_create(parser, &token, predicate, statements, YP_NODE_TYPE_P(node, YP_NODE_BEGIN_NODE) ? YP_LOOP_FLAGS_BEGIN_MODIFIER : 0);
12874
13215
  }
12875
13216
  case YP_TOKEN_QUESTION_MARK: {
12876
13217
  parser_lex(parser);
@@ -12908,7 +13249,7 @@ parse_expression_infix(yp_parser_t *parser, yp_node_t *node, yp_binding_power_t
12908
13249
 
12909
13250
  if (
12910
13251
  (parser->current.type == YP_TOKEN_PARENTHESIS_LEFT) ||
12911
- (token_begins_expression_p(parser->current.type) || match_any_type_p(parser, 2, YP_TOKEN_USTAR, YP_TOKEN_USTAR_STAR))
13252
+ (token_begins_expression_p(parser->current.type) || match_any_type_p(parser, 3, YP_TOKEN_UAMPERSAND, YP_TOKEN_USTAR, YP_TOKEN_USTAR_STAR))
12912
13253
  ) {
12913
13254
  // If we have a constant immediately following a '::' operator, then
12914
13255
  // this can either be a constant path or a method call, depending on
@@ -13140,7 +13481,7 @@ yp_metadata_read_u32(const char *ptr) {
13140
13481
  // ]*
13141
13482
  // ]
13142
13483
  // ```
13143
- static void
13484
+ void
13144
13485
  yp_parser_metadata(yp_parser_t *parser, const char *metadata) {
13145
13486
  uint32_t filepath_size = yp_metadata_read_u32(metadata);
13146
13487
  metadata += 4;
@@ -13179,6 +13520,8 @@ yp_parser_metadata(yp_parser_t *parser, const char *metadata) {
13179
13520
  // Initialize a parser with the given start and end pointers.
13180
13521
  YP_EXPORTED_FUNCTION void
13181
13522
  yp_parser_init(yp_parser_t *parser, const char *source, size_t size, const char *filepath) {
13523
+ assert(source != NULL);
13524
+
13182
13525
  // Set filepath to the file that was passed
13183
13526
  if (!filepath) filepath = "";
13184
13527
  yp_string_t filepath_string;
@@ -13190,6 +13533,8 @@ yp_parser_init(yp_parser_t *parser, const char *source, size_t size, const char
13190
13533
  .enclosure_nesting = 0,
13191
13534
  .lambda_enclosure_nesting = -1,
13192
13535
  .brace_nesting = 0,
13536
+ .do_loop_stack = YP_STATE_STACK_EMPTY,
13537
+ .accepts_block_stack = YP_STATE_STACK_EMPTY,
13193
13538
  .lex_modes = {
13194
13539
  .index = 0,
13195
13540
  .stack = {{ .mode = YP_LEX_DEFAULT }},
@@ -13201,6 +13546,9 @@ yp_parser_init(yp_parser_t *parser, const char *source, size_t size, const char
13201
13546
  .current = { .type = YP_TOKEN_EOF, .start = source, .end = source },
13202
13547
  .next_start = NULL,
13203
13548
  .heredoc_end = NULL,
13549
+ .comment_list = YP_LIST_EMPTY,
13550
+ .warning_list = YP_LIST_EMPTY,
13551
+ .error_list = YP_LIST_EMPTY,
13204
13552
  .current_scope = NULL,
13205
13553
  .current_context = NULL,
13206
13554
  .recovering = false,
@@ -13213,16 +13561,12 @@ yp_parser_init(yp_parser_t *parser, const char *source, size_t size, const char
13213
13561
  .pattern_matching_newlines = false,
13214
13562
  .in_keyword_arg = false,
13215
13563
  .filepath_string = filepath_string,
13564
+ .constant_pool = YP_CONSTANT_POOL_EMPTY,
13565
+ .newline_list = YP_NEWLINE_LIST_EMPTY
13216
13566
  };
13217
13567
 
13218
- yp_state_stack_init(&parser->do_loop_stack);
13219
- yp_state_stack_init(&parser->accepts_block_stack);
13220
13568
  yp_accepts_block_stack_push(parser, true);
13221
13569
 
13222
- yp_list_init(&parser->warning_list);
13223
- yp_list_init(&parser->error_list);
13224
- yp_list_init(&parser->comment_list);
13225
-
13226
13570
  // Initialize the constant pool. We're going to completely guess as to the
13227
13571
  // number of constants that we'll need based on the size of the input. The
13228
13572
  // ratio we chose here is actually less arbitrary than you might think.
@@ -13246,14 +13590,15 @@ yp_parser_init(yp_parser_t *parser, const char *source, size_t size, const char
13246
13590
  size_t newline_size = size / 22;
13247
13591
  yp_newline_list_init(&parser->newline_list, source, newline_size < 4 ? 4 : newline_size);
13248
13592
 
13249
- assert(source != NULL);
13593
+ // Skip past the UTF-8 BOM if it exists.
13250
13594
  if (size >= 3 && (unsigned char) source[0] == 0xef && (unsigned char) source[1] == 0xbb && (unsigned char) source[2] == 0xbf) {
13251
- // If the first three bytes of the source are the UTF-8 BOM, then we'll skip
13252
- // over them.
13253
13595
  parser->current.end += 3;
13254
- } else if (size >= 2 && source[0] == '#' && source[1] == '!') {
13255
- // If the first two bytes of the source are a shebang, then we'll indicate
13256
- // that the encoding comment is at the end of the shebang.
13596
+ parser->encoding_comment_start += 3;
13597
+ }
13598
+
13599
+ // If the first two bytes of the source are a shebang, then we'll indicate
13600
+ // that the encoding comment is at the end of the shebang.
13601
+ if (peek(parser) == '#' && peek_offset(parser, 1) == '!') {
13257
13602
  const char *encoding_comment_start = next_newline(source, (ptrdiff_t) size);
13258
13603
  if (encoding_comment_start) {
13259
13604
  parser->encoding_comment_start = encoding_comment_start + 1;