yarp 0.7.0 → 0.9.0

Sign up to get free protection for your applications and to get access to all the features.
data/src/yarp.c CHANGED
@@ -1,5 +1,4 @@
1
1
  #include "yarp.h"
2
- #include "yarp/version.h"
3
2
 
4
3
  // The YARP version and the serialization format.
5
4
  const char *
@@ -362,7 +361,7 @@ lex_state_ignored_p(yp_parser_t *parser) {
362
361
 
363
362
  if (ignored) {
364
363
  return YP_IGNORED_NEWLINE_ALL;
365
- } else if (parser->lex_state == (YP_LEX_STATE_ARG | YP_LEX_STATE_LABELED)) {
364
+ } else if ((parser->lex_state & ~((unsigned int) YP_LEX_STATE_LABEL)) == (YP_LEX_STATE_ARG | YP_LEX_STATE_LABELED)) {
366
365
  return YP_IGNORED_NEWLINE_PATTERN;
367
366
  } else {
368
367
  return YP_IGNORED_NEWLINE_NONE;
@@ -450,8 +449,8 @@ yp_flip_flop(yp_node_t *node) {
450
449
  case YP_NODE_PARENTHESES_NODE: {
451
450
  yp_parentheses_node_t *cast = (yp_parentheses_node_t *) node;
452
451
 
453
- if ((cast->statements != NULL) && YP_NODE_TYPE_P(cast->statements, YP_NODE_STATEMENTS_NODE)) {
454
- yp_statements_node_t *statements = (yp_statements_node_t *) cast->statements;
452
+ if ((cast->body != NULL) && YP_NODE_TYPE_P(cast->body, YP_NODE_STATEMENTS_NODE)) {
453
+ yp_statements_node_t *statements = (yp_statements_node_t *) cast->body;
455
454
  if (statements->body.size == 1) yp_flip_flop(statements->body.nodes[0]);
456
455
  }
457
456
 
@@ -459,8 +458,12 @@ yp_flip_flop(yp_node_t *node) {
459
458
  }
460
459
  case YP_NODE_RANGE_NODE: {
461
460
  yp_range_node_t *cast = (yp_range_node_t *) node;
462
- yp_flip_flop(cast->left);
463
- yp_flip_flop(cast->right);
461
+ if (cast->left) {
462
+ yp_flip_flop(cast->left);
463
+ }
464
+ if (cast->right) {
465
+ yp_flip_flop(cast->right);
466
+ }
464
467
 
465
468
  // Here we change the range node into a flip flop node. We can do
466
469
  // this since the nodes are exactly the same except for the type.
@@ -532,6 +535,73 @@ yp_arguments_validate(yp_parser_t *parser, yp_arguments_t *arguments) {
532
535
  }
533
536
  }
534
537
 
538
+ /******************************************************************************/
539
+ /* Scope node functions */
540
+ /******************************************************************************/
541
+
542
+ // Generate a scope node from the given node.
543
+ void
544
+ yp_scope_node_init(yp_node_t *node, yp_scope_node_t *scope) {
545
+ scope->base.type = YP_NODE_SCOPE_NODE;
546
+ scope->base.location.start = node->location.start;
547
+ scope->base.location.end = node->location.end;
548
+
549
+ scope->parameters = NULL;
550
+ scope->body = NULL;
551
+ yp_constant_id_list_init(&scope->locals);
552
+
553
+ switch (YP_NODE_TYPE(node)) {
554
+ case YP_NODE_BLOCK_NODE: {
555
+ yp_block_node_t *cast = (yp_block_node_t *) node;
556
+ if (cast->parameters) scope->parameters = cast->parameters->parameters;
557
+ scope->body = cast->body;
558
+ scope->locals = cast->locals;
559
+ break;
560
+ }
561
+ case YP_NODE_CLASS_NODE: {
562
+ yp_class_node_t *cast = (yp_class_node_t *) node;
563
+ scope->body = cast->body;
564
+ scope->locals = cast->locals;
565
+ break;
566
+ }
567
+ case YP_NODE_DEF_NODE: {
568
+ yp_def_node_t *cast = (yp_def_node_t *) node;
569
+ scope->parameters = cast->parameters;
570
+ scope->body = cast->body;
571
+ scope->locals = cast->locals;
572
+ break;
573
+ }
574
+ case YP_NODE_LAMBDA_NODE: {
575
+ yp_lambda_node_t *cast = (yp_lambda_node_t *) node;
576
+ if (cast->parameters) scope->parameters = cast->parameters->parameters;
577
+ scope->body = cast->body;
578
+ scope->locals = cast->locals;
579
+ break;
580
+ }
581
+ case YP_NODE_MODULE_NODE: {
582
+ yp_module_node_t *cast = (yp_module_node_t *) node;
583
+ scope->body = cast->body;
584
+ scope->locals = cast->locals;
585
+ break;
586
+ }
587
+ case YP_NODE_PROGRAM_NODE: {
588
+ yp_program_node_t *cast = (yp_program_node_t *) node;
589
+ scope->body = (yp_node_t *) cast->statements;
590
+ scope->locals = cast->locals;
591
+ break;
592
+ }
593
+ case YP_NODE_SINGLETON_CLASS_NODE: {
594
+ yp_singleton_class_node_t *cast = (yp_singleton_class_node_t *) node;
595
+ scope->body = cast->body;
596
+ scope->locals = cast->locals;
597
+ break;
598
+ }
599
+ default:
600
+ assert(false && "unreachable");
601
+ break;
602
+ }
603
+ }
604
+
535
605
  /******************************************************************************/
536
606
  /* Node creation functions */
537
607
  /******************************************************************************/
@@ -993,7 +1063,7 @@ yp_block_argument_node_create(yp_parser_t *parser, const yp_token_t *operator, y
993
1063
 
994
1064
  // Allocate and initialize a new BlockNode node.
995
1065
  static yp_block_node_t *
996
- yp_block_node_create(yp_parser_t *parser, yp_constant_id_list_t *locals, const yp_token_t *opening, yp_block_parameters_node_t *parameters, yp_node_t *statements, const yp_token_t *closing) {
1066
+ yp_block_node_create(yp_parser_t *parser, yp_constant_id_list_t *locals, const yp_token_t *opening, yp_block_parameters_node_t *parameters, yp_node_t *body, const yp_token_t *closing) {
997
1067
  yp_block_node_t *node = YP_ALLOC_NODE(parser, yp_block_node_t);
998
1068
 
999
1069
  *node = (yp_block_node_t) {
@@ -1003,7 +1073,7 @@ yp_block_node_create(yp_parser_t *parser, yp_constant_id_list_t *locals, const y
1003
1073
  },
1004
1074
  .locals = *locals,
1005
1075
  .parameters = parameters,
1006
- .statements = statements,
1076
+ .body = body,
1007
1077
  .opening_loc = YP_LOCATION_TOKEN_VALUE(opening),
1008
1078
  .closing_loc = YP_LOCATION_TOKEN_VALUE(closing)
1009
1079
  };
@@ -1126,7 +1196,7 @@ yp_call_node_create(yp_parser_t *parser) {
1126
1196
  },
1127
1197
  .receiver = NULL,
1128
1198
  .operator_loc = YP_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE,
1129
- .message_loc = YP_LOCATION_NULL_VALUE(parser),
1199
+ .message_loc = YP_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE,
1130
1200
  .opening_loc = YP_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE,
1131
1201
  .arguments = NULL,
1132
1202
  .closing_loc = YP_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE,
@@ -1461,7 +1531,7 @@ yp_case_node_end_keyword_loc_set(yp_case_node_t *node, const yp_token_t *end_key
1461
1531
 
1462
1532
  // Allocate a new ClassNode node.
1463
1533
  static yp_class_node_t *
1464
- yp_class_node_create(yp_parser_t *parser, yp_constant_id_list_t *locals, const yp_token_t *class_keyword, yp_node_t *constant_path, const yp_token_t *inheritance_operator, yp_node_t *superclass, yp_node_t *statements, const yp_token_t *end_keyword) {
1534
+ yp_class_node_create(yp_parser_t *parser, yp_constant_id_list_t *locals, const yp_token_t *class_keyword, yp_node_t *constant_path, const yp_token_t *name, const yp_token_t *inheritance_operator, yp_node_t *superclass, yp_node_t *body, const yp_token_t *end_keyword) {
1465
1535
  yp_class_node_t *node = YP_ALLOC_NODE(parser, yp_class_node_t);
1466
1536
 
1467
1537
  *node = (yp_class_node_t) {
@@ -1474,23 +1544,25 @@ yp_class_node_create(yp_parser_t *parser, yp_constant_id_list_t *locals, const y
1474
1544
  .constant_path = constant_path,
1475
1545
  .inheritance_operator_loc = YP_OPTIONAL_LOCATION_TOKEN_VALUE(inheritance_operator),
1476
1546
  .superclass = superclass,
1477
- .statements = statements,
1478
- .end_keyword_loc = YP_LOCATION_TOKEN_VALUE(end_keyword)
1547
+ .body = body,
1548
+ .end_keyword_loc = YP_LOCATION_TOKEN_VALUE(end_keyword),
1549
+ .name = YP_EMPTY_STRING
1479
1550
  };
1480
1551
 
1552
+ yp_string_shared_init(&node->name, name->start, name->end);
1481
1553
  return node;
1482
1554
  }
1483
1555
 
1484
- // Allocate and initialize a new ClassVariableOperatorAndWriteNode node.
1485
- static yp_class_variable_operator_and_write_node_t *
1486
- yp_class_variable_operator_and_write_node_create(yp_parser_t *parser, yp_node_t *target, const yp_token_t *operator, yp_node_t *value) {
1556
+ // Allocate and initialize a new ClassVariableAndWriteNode node.
1557
+ static yp_class_variable_and_write_node_t *
1558
+ yp_class_variable_and_write_node_create(yp_parser_t *parser, yp_node_t *target, const yp_token_t *operator, yp_node_t *value) {
1487
1559
  assert(YP_NODE_TYPE_P(target, YP_NODE_CLASS_VARIABLE_READ_NODE));
1488
1560
  assert(operator->type == YP_TOKEN_AMPERSAND_AMPERSAND_EQUAL);
1489
- yp_class_variable_operator_and_write_node_t *node = YP_ALLOC_NODE(parser, yp_class_variable_operator_and_write_node_t);
1561
+ yp_class_variable_and_write_node_t *node = YP_ALLOC_NODE(parser, yp_class_variable_and_write_node_t);
1490
1562
 
1491
- *node = (yp_class_variable_operator_and_write_node_t) {
1563
+ *node = (yp_class_variable_and_write_node_t) {
1492
1564
  {
1493
- .type = YP_NODE_CLASS_VARIABLE_OPERATOR_AND_WRITE_NODE,
1565
+ .type = YP_NODE_CLASS_VARIABLE_AND_WRITE_NODE,
1494
1566
  .location = {
1495
1567
  .start = target->location.start,
1496
1568
  .end = value->location.end
@@ -1526,16 +1598,16 @@ yp_class_variable_operator_write_node_create(yp_parser_t *parser, yp_node_t *tar
1526
1598
  return node;
1527
1599
  }
1528
1600
 
1529
- // Allocate and initialize a new ClassVariableOperatorOrWriteNode node.
1530
- static yp_class_variable_operator_or_write_node_t *
1531
- yp_class_variable_operator_or_write_node_create(yp_parser_t *parser, yp_node_t *target, const yp_token_t *operator, yp_node_t *value) {
1601
+ // Allocate and initialize a new ClassVariableOrWriteNode node.
1602
+ static yp_class_variable_or_write_node_t *
1603
+ yp_class_variable_or_write_node_create(yp_parser_t *parser, yp_node_t *target, const yp_token_t *operator, yp_node_t *value) {
1532
1604
  assert(YP_NODE_TYPE_P(target, YP_NODE_CLASS_VARIABLE_READ_NODE));
1533
1605
  assert(operator->type == YP_TOKEN_PIPE_PIPE_EQUAL);
1534
- yp_class_variable_operator_or_write_node_t *node = YP_ALLOC_NODE(parser, yp_class_variable_operator_or_write_node_t);
1606
+ yp_class_variable_or_write_node_t *node = YP_ALLOC_NODE(parser, yp_class_variable_or_write_node_t);
1535
1607
 
1536
- *node = (yp_class_variable_operator_or_write_node_t) {
1608
+ *node = (yp_class_variable_or_write_node_t) {
1537
1609
  {
1538
- .type = YP_NODE_CLASS_VARIABLE_OPERATOR_OR_WRITE_NODE,
1610
+ .type = YP_NODE_CLASS_VARIABLE_OR_WRITE_NODE,
1539
1611
  .location = {
1540
1612
  .start = target->location.start,
1541
1613
  .end = value->location.end
@@ -1568,10 +1640,10 @@ yp_class_variable_read_node_to_class_variable_write_node(yp_parser_t *parser, yp
1568
1640
  .type = YP_NODE_CLASS_VARIABLE_WRITE_NODE,
1569
1641
  .location = {
1570
1642
  .start = read_node->base.location.start,
1571
- .end = value != NULL ? value->location.end : read_node->base.location.end
1643
+ .end = value->location.end
1572
1644
  },
1573
1645
  },
1574
- .name_loc = YP_LOCATION_NODE_VALUE((yp_node_t *)read_node),
1646
+ .name_loc = YP_LOCATION_NODE_VALUE((yp_node_t *) read_node),
1575
1647
  .operator_loc = YP_OPTIONAL_LOCATION_TOKEN_VALUE(operator),
1576
1648
  .value = value
1577
1649
  };
@@ -1579,15 +1651,15 @@ yp_class_variable_read_node_to_class_variable_write_node(yp_parser_t *parser, yp
1579
1651
  return node;
1580
1652
  }
1581
1653
 
1582
- // Allocate and initialize a new ConstantPathOperatorAndWriteNode node.
1583
- static yp_constant_path_operator_and_write_node_t *
1584
- yp_constant_path_operator_and_write_node_create(yp_parser_t *parser, yp_constant_path_node_t *target, const yp_token_t *operator, yp_node_t *value) {
1654
+ // Allocate and initialize a new ConstantPathAndWriteNode node.
1655
+ static yp_constant_path_and_write_node_t *
1656
+ yp_constant_path_and_write_node_create(yp_parser_t *parser, yp_constant_path_node_t *target, const yp_token_t *operator, yp_node_t *value) {
1585
1657
  assert(operator->type == YP_TOKEN_AMPERSAND_AMPERSAND_EQUAL);
1586
- yp_constant_path_operator_and_write_node_t *node = YP_ALLOC_NODE(parser, yp_constant_path_operator_and_write_node_t);
1658
+ yp_constant_path_and_write_node_t *node = YP_ALLOC_NODE(parser, yp_constant_path_and_write_node_t);
1587
1659
 
1588
- *node = (yp_constant_path_operator_and_write_node_t) {
1660
+ *node = (yp_constant_path_and_write_node_t) {
1589
1661
  {
1590
- .type = YP_NODE_CONSTANT_PATH_OPERATOR_AND_WRITE_NODE,
1662
+ .type = YP_NODE_CONSTANT_PATH_AND_WRITE_NODE,
1591
1663
  .location = {
1592
1664
  .start = target->base.location.start,
1593
1665
  .end = value->location.end
@@ -1623,15 +1695,15 @@ yp_constant_path_operator_write_node_create(yp_parser_t *parser, yp_constant_pat
1623
1695
  return node;
1624
1696
  }
1625
1697
 
1626
- // Allocate and initialize a new ConstantPathOperatorOrWriteNode node.
1627
- static yp_constant_path_operator_or_write_node_t *
1628
- yp_constant_path_operator_or_write_node_create(yp_parser_t *parser, yp_constant_path_node_t *target, const yp_token_t *operator, yp_node_t *value) {
1698
+ // Allocate and initialize a new ConstantPathOrWriteNode node.
1699
+ static yp_constant_path_or_write_node_t *
1700
+ yp_constant_path_or_write_node_create(yp_parser_t *parser, yp_constant_path_node_t *target, const yp_token_t *operator, yp_node_t *value) {
1629
1701
  assert(operator->type == YP_TOKEN_PIPE_PIPE_EQUAL);
1630
- yp_constant_path_operator_or_write_node_t *node = YP_ALLOC_NODE(parser, yp_constant_path_operator_or_write_node_t);
1702
+ yp_constant_path_or_write_node_t *node = YP_ALLOC_NODE(parser, yp_constant_path_or_write_node_t);
1631
1703
 
1632
- *node = (yp_constant_path_operator_or_write_node_t) {
1704
+ *node = (yp_constant_path_or_write_node_t) {
1633
1705
  {
1634
- .type = YP_NODE_CONSTANT_PATH_OPERATOR_OR_WRITE_NODE,
1706
+ .type = YP_NODE_CONSTANT_PATH_OR_WRITE_NODE,
1635
1707
  .location = {
1636
1708
  .start = target->base.location.start,
1637
1709
  .end = value->location.end
@@ -1676,7 +1748,7 @@ yp_constant_path_write_node_create(yp_parser_t *parser, yp_constant_path_node_t
1676
1748
  .type = YP_NODE_CONSTANT_PATH_WRITE_NODE,
1677
1749
  .location = {
1678
1750
  .start = target->base.location.start,
1679
- .end = (value == NULL ? target->base.location.end : value->location.end)
1751
+ .end = value->location.end
1680
1752
  },
1681
1753
  },
1682
1754
  .target = target,
@@ -1687,16 +1759,16 @@ yp_constant_path_write_node_create(yp_parser_t *parser, yp_constant_path_node_t
1687
1759
  return node;
1688
1760
  }
1689
1761
 
1690
- // Allocate and initialize a new ConstantOperatorAndWriteNode node.
1691
- static yp_constant_operator_and_write_node_t *
1692
- yp_constant_operator_and_write_node_create(yp_parser_t *parser, yp_node_t *target, const yp_token_t *operator, yp_node_t *value) {
1762
+ // Allocate and initialize a new ConstantAndWriteNode node.
1763
+ static yp_constant_and_write_node_t *
1764
+ yp_constant_and_write_node_create(yp_parser_t *parser, yp_node_t *target, const yp_token_t *operator, yp_node_t *value) {
1693
1765
  assert(YP_NODE_TYPE_P(target, YP_NODE_CONSTANT_READ_NODE));
1694
1766
  assert(operator->type == YP_TOKEN_AMPERSAND_AMPERSAND_EQUAL);
1695
- yp_constant_operator_and_write_node_t *node = YP_ALLOC_NODE(parser, yp_constant_operator_and_write_node_t);
1767
+ yp_constant_and_write_node_t *node = YP_ALLOC_NODE(parser, yp_constant_and_write_node_t);
1696
1768
 
1697
- *node = (yp_constant_operator_and_write_node_t) {
1769
+ *node = (yp_constant_and_write_node_t) {
1698
1770
  {
1699
- .type = YP_NODE_CONSTANT_OPERATOR_AND_WRITE_NODE,
1771
+ .type = YP_NODE_CONSTANT_AND_WRITE_NODE,
1700
1772
  .location = {
1701
1773
  .start = target->location.start,
1702
1774
  .end = value->location.end
@@ -1732,16 +1804,16 @@ yp_constant_operator_write_node_create(yp_parser_t *parser, yp_node_t *target, c
1732
1804
  return node;
1733
1805
  }
1734
1806
 
1735
- // Allocate and initialize a new ConstantOperatorOrWriteNode node.
1736
- static yp_constant_operator_or_write_node_t *
1737
- yp_constant_operator_or_write_node_create(yp_parser_t *parser, yp_node_t *target, const yp_token_t *operator, yp_node_t *value) {
1807
+ // Allocate and initialize a new ConstantOrWriteNode node.
1808
+ static yp_constant_or_write_node_t *
1809
+ yp_constant_or_write_node_create(yp_parser_t *parser, yp_node_t *target, const yp_token_t *operator, yp_node_t *value) {
1738
1810
  assert(YP_NODE_TYPE_P(target, YP_NODE_CONSTANT_READ_NODE));
1739
1811
  assert(operator->type == YP_TOKEN_PIPE_PIPE_EQUAL);
1740
- yp_constant_operator_or_write_node_t *node = YP_ALLOC_NODE(parser, yp_constant_operator_or_write_node_t);
1812
+ yp_constant_or_write_node_t *node = YP_ALLOC_NODE(parser, yp_constant_or_write_node_t);
1741
1813
 
1742
- *node = (yp_constant_operator_or_write_node_t) {
1814
+ *node = (yp_constant_or_write_node_t) {
1743
1815
  {
1744
- .type = YP_NODE_CONSTANT_OPERATOR_OR_WRITE_NODE,
1816
+ .type = YP_NODE_CONSTANT_OR_WRITE_NODE,
1745
1817
  .location = {
1746
1818
  .start = target->location.start,
1747
1819
  .end = value->location.end
@@ -1775,7 +1847,7 @@ yp_constant_write_node_create(yp_parser_t *parser, yp_location_t *name_loc, cons
1775
1847
  .type = YP_NODE_CONSTANT_WRITE_NODE,
1776
1848
  .location = {
1777
1849
  .start = name_loc->start,
1778
- .end = value != NULL ? value->location.end : name_loc->end
1850
+ .end = value->location.end
1779
1851
  },
1780
1852
  },
1781
1853
  .name_loc = *name_loc,
@@ -1793,7 +1865,7 @@ yp_def_node_create(
1793
1865
  const yp_token_t *name,
1794
1866
  yp_node_t *receiver,
1795
1867
  yp_parameters_node_t *parameters,
1796
- yp_node_t *statements,
1868
+ yp_node_t *body,
1797
1869
  yp_constant_id_list_t *locals,
1798
1870
  const yp_token_t *def_keyword,
1799
1871
  const yp_token_t *operator,
@@ -1806,7 +1878,7 @@ yp_def_node_create(
1806
1878
  const char *end;
1807
1879
 
1808
1880
  if (end_keyword->type == YP_TOKEN_NOT_PROVIDED) {
1809
- end = statements->location.end;
1881
+ end = body->location.end;
1810
1882
  } else {
1811
1883
  end = end_keyword->end;
1812
1884
  }
@@ -1819,7 +1891,7 @@ yp_def_node_create(
1819
1891
  .name_loc = YP_LOCATION_TOKEN_VALUE(name),
1820
1892
  .receiver = receiver,
1821
1893
  .parameters = parameters,
1822
- .statements = statements,
1894
+ .body = body,
1823
1895
  .locals = *locals,
1824
1896
  .def_keyword_loc = YP_LOCATION_TOKEN_VALUE(def_keyword),
1825
1897
  .operator_loc = YP_OPTIONAL_LOCATION_TOKEN_VALUE(operator),
@@ -2189,16 +2261,16 @@ yp_hash_pattern_node_node_list_create(yp_parser_t *parser, yp_node_list_t *assoc
2189
2261
  return node;
2190
2262
  }
2191
2263
 
2192
- // Allocate and initialize a new GlobalVariableOperatorAndWriteNode node.
2193
- static yp_global_variable_operator_and_write_node_t *
2194
- yp_global_variable_operator_and_write_node_create(yp_parser_t *parser, yp_node_t *target, const yp_token_t *operator, yp_node_t *value) {
2264
+ // Allocate and initialize a new GlobalVariableAndWriteNode node.
2265
+ static yp_global_variable_and_write_node_t *
2266
+ yp_global_variable_and_write_node_create(yp_parser_t *parser, yp_node_t *target, const yp_token_t *operator, yp_node_t *value) {
2195
2267
  assert(YP_NODE_TYPE_P(target, YP_NODE_GLOBAL_VARIABLE_READ_NODE) || YP_NODE_TYPE_P(target, YP_NODE_BACK_REFERENCE_READ_NODE) || YP_NODE_TYPE_P(target, YP_NODE_NUMBERED_REFERENCE_READ_NODE));
2196
2268
  assert(operator->type == YP_TOKEN_AMPERSAND_AMPERSAND_EQUAL);
2197
- yp_global_variable_operator_and_write_node_t *node = YP_ALLOC_NODE(parser, yp_global_variable_operator_and_write_node_t);
2269
+ yp_global_variable_and_write_node_t *node = YP_ALLOC_NODE(parser, yp_global_variable_and_write_node_t);
2198
2270
 
2199
- *node = (yp_global_variable_operator_and_write_node_t) {
2271
+ *node = (yp_global_variable_and_write_node_t) {
2200
2272
  {
2201
- .type = YP_NODE_GLOBAL_VARIABLE_OPERATOR_AND_WRITE_NODE,
2273
+ .type = YP_NODE_GLOBAL_VARIABLE_AND_WRITE_NODE,
2202
2274
  .location = {
2203
2275
  .start = target->location.start,
2204
2276
  .end = value->location.end
@@ -2234,16 +2306,16 @@ yp_global_variable_operator_write_node_create(yp_parser_t *parser, yp_node_t *ta
2234
2306
  return node;
2235
2307
  }
2236
2308
 
2237
- // Allocate and initialize a new GlobalVariableOperatorOrWriteNode node.
2238
- static yp_global_variable_operator_or_write_node_t *
2239
- yp_global_variable_operator_or_write_node_create(yp_parser_t *parser, yp_node_t *target, const yp_token_t *operator, yp_node_t *value) {
2309
+ // Allocate and initialize a new GlobalVariableOrWriteNode node.
2310
+ static yp_global_variable_or_write_node_t *
2311
+ yp_global_variable_or_write_node_create(yp_parser_t *parser, yp_node_t *target, const yp_token_t *operator, yp_node_t *value) {
2240
2312
  assert(YP_NODE_TYPE_P(target, YP_NODE_GLOBAL_VARIABLE_READ_NODE) || YP_NODE_TYPE_P(target, YP_NODE_BACK_REFERENCE_READ_NODE) || YP_NODE_TYPE_P(target, YP_NODE_NUMBERED_REFERENCE_READ_NODE));
2241
2313
  assert(operator->type == YP_TOKEN_PIPE_PIPE_EQUAL);
2242
- yp_global_variable_operator_or_write_node_t *node = YP_ALLOC_NODE(parser, yp_global_variable_operator_or_write_node_t);
2314
+ yp_global_variable_or_write_node_t *node = YP_ALLOC_NODE(parser, yp_global_variable_or_write_node_t);
2243
2315
 
2244
- *node = (yp_global_variable_operator_or_write_node_t) {
2316
+ *node = (yp_global_variable_or_write_node_t) {
2245
2317
  {
2246
- .type = YP_NODE_GLOBAL_VARIABLE_OPERATOR_OR_WRITE_NODE,
2318
+ .type = YP_NODE_GLOBAL_VARIABLE_OR_WRITE_NODE,
2247
2319
  .location = {
2248
2320
  .start = target->location.start,
2249
2321
  .end = value->location.end
@@ -2282,7 +2354,7 @@ yp_global_variable_write_node_create(yp_parser_t *parser, const yp_location_t *n
2282
2354
  .type = YP_NODE_GLOBAL_VARIABLE_WRITE_NODE,
2283
2355
  .location = {
2284
2356
  .start = name_loc->start,
2285
- .end = (value == NULL ? name_loc->end : value->location.end)
2357
+ .end = value->location.end
2286
2358
  },
2287
2359
  },
2288
2360
  .name_loc = *name_loc,
@@ -2547,16 +2619,16 @@ yp_in_node_create(yp_parser_t *parser, yp_node_t *pattern, yp_statements_node_t
2547
2619
  return node;
2548
2620
  }
2549
2621
 
2550
- // Allocate and initialize a new InstanceVariableOperatorAndWriteNode node.
2551
- static yp_instance_variable_operator_and_write_node_t *
2552
- yp_instance_variable_operator_and_write_node_create(yp_parser_t *parser, yp_node_t *target, const yp_token_t *operator, yp_node_t *value) {
2622
+ // Allocate and initialize a new InstanceVariableAndWriteNode node.
2623
+ static yp_instance_variable_and_write_node_t *
2624
+ yp_instance_variable_and_write_node_create(yp_parser_t *parser, yp_node_t *target, const yp_token_t *operator, yp_node_t *value) {
2553
2625
  assert(YP_NODE_TYPE_P(target, YP_NODE_INSTANCE_VARIABLE_READ_NODE));
2554
2626
  assert(operator->type == YP_TOKEN_AMPERSAND_AMPERSAND_EQUAL);
2555
- yp_instance_variable_operator_and_write_node_t *node = YP_ALLOC_NODE(parser, yp_instance_variable_operator_and_write_node_t);
2627
+ yp_instance_variable_and_write_node_t *node = YP_ALLOC_NODE(parser, yp_instance_variable_and_write_node_t);
2556
2628
 
2557
- *node = (yp_instance_variable_operator_and_write_node_t) {
2629
+ *node = (yp_instance_variable_and_write_node_t) {
2558
2630
  {
2559
- .type = YP_NODE_INSTANCE_VARIABLE_OPERATOR_AND_WRITE_NODE,
2631
+ .type = YP_NODE_INSTANCE_VARIABLE_AND_WRITE_NODE,
2560
2632
  .location = {
2561
2633
  .start = target->location.start,
2562
2634
  .end = value->location.end
@@ -2592,16 +2664,16 @@ yp_instance_variable_operator_write_node_create(yp_parser_t *parser, yp_node_t *
2592
2664
  return node;
2593
2665
  }
2594
2666
 
2595
- // Allocate and initialize a new InstanceVariableOperatorOrWriteNode node.
2596
- static yp_instance_variable_operator_or_write_node_t *
2597
- yp_instance_variable_operator_or_write_node_create(yp_parser_t *parser, yp_node_t *target, const yp_token_t *operator, yp_node_t *value) {
2667
+ // Allocate and initialize a new InstanceVariableOrWriteNode node.
2668
+ static yp_instance_variable_or_write_node_t *
2669
+ yp_instance_variable_or_write_node_create(yp_parser_t *parser, yp_node_t *target, const yp_token_t *operator, yp_node_t *value) {
2598
2670
  assert(YP_NODE_TYPE_P(target, YP_NODE_INSTANCE_VARIABLE_READ_NODE));
2599
2671
  assert(operator->type == YP_TOKEN_PIPE_PIPE_EQUAL);
2600
- yp_instance_variable_operator_or_write_node_t *node = YP_ALLOC_NODE(parser, yp_instance_variable_operator_or_write_node_t);
2672
+ yp_instance_variable_or_write_node_t *node = YP_ALLOC_NODE(parser, yp_instance_variable_or_write_node_t);
2601
2673
 
2602
- *node = (yp_instance_variable_operator_or_write_node_t) {
2674
+ *node = (yp_instance_variable_or_write_node_t) {
2603
2675
  {
2604
- .type = YP_NODE_INSTANCE_VARIABLE_OPERATOR_OR_WRITE_NODE,
2676
+ .type = YP_NODE_INSTANCE_VARIABLE_OR_WRITE_NODE,
2605
2677
  .location = {
2606
2678
  .start = target->location.start,
2607
2679
  .end = value->location.end
@@ -2637,7 +2709,7 @@ yp_instance_variable_write_node_create(yp_parser_t *parser, yp_instance_variable
2637
2709
  .type = YP_NODE_INSTANCE_VARIABLE_WRITE_NODE,
2638
2710
  .location = {
2639
2711
  .start = read_node->base.location.start,
2640
- .end = value == NULL ? read_node->base.location.end : value->location.end
2712
+ .end = value->location.end
2641
2713
  }
2642
2714
  },
2643
2715
  .name_loc = YP_LOCATION_NODE_BASE_VALUE(read_node),
@@ -2706,6 +2778,10 @@ yp_interpolated_string_node_create(yp_parser_t *parser, const yp_token_t *openin
2706
2778
  // Append a part to an InterpolatedStringNode node.
2707
2779
  static inline void
2708
2780
  yp_interpolated_string_node_append(yp_interpolated_string_node_t *node, yp_node_t *part) {
2781
+ if (node->parts.size == 0 && node->opening_loc.start == NULL) {
2782
+ node->base.location.start = part->location.start;
2783
+ }
2784
+
2709
2785
  yp_node_list_append(&node->parts, part);
2710
2786
  node->base.location.end = part->location.end;
2711
2787
  }
@@ -2747,12 +2823,6 @@ yp_interpolated_symbol_node_append(yp_interpolated_symbol_node_t *node, yp_node_
2747
2823
  node->base.location.end = part->location.end;
2748
2824
  }
2749
2825
 
2750
- static inline void
2751
- yp_interpolated_symbol_node_closing_set(yp_interpolated_symbol_node_t *node, const yp_token_t *closing) {
2752
- node->closing_loc = YP_OPTIONAL_LOCATION_TOKEN_VALUE(closing);
2753
- node->base.location.end = closing->end;
2754
- }
2755
-
2756
2826
  // Allocate a new InterpolatedXStringNode node.
2757
2827
  static yp_interpolated_x_string_node_t *
2758
2828
  yp_interpolated_xstring_node_create(yp_parser_t *parser, const yp_token_t *opening, const yp_token_t *closing) {
@@ -2860,10 +2930,11 @@ static yp_lambda_node_t *
2860
2930
  yp_lambda_node_create(
2861
2931
  yp_parser_t *parser,
2862
2932
  yp_constant_id_list_t *locals,
2933
+ const yp_token_t *operator,
2863
2934
  const yp_token_t *opening,
2935
+ const yp_token_t *closing,
2864
2936
  yp_block_parameters_node_t *parameters,
2865
- yp_node_t *statements,
2866
- const yp_token_t *closing
2937
+ yp_node_t *body
2867
2938
  ) {
2868
2939
  yp_lambda_node_t *node = YP_ALLOC_NODE(parser, yp_lambda_node_t);
2869
2940
 
@@ -2871,29 +2942,31 @@ yp_lambda_node_create(
2871
2942
  {
2872
2943
  .type = YP_NODE_LAMBDA_NODE,
2873
2944
  .location = {
2874
- .start = opening->start,
2945
+ .start = operator->start,
2875
2946
  .end = closing->end
2876
2947
  },
2877
2948
  },
2878
2949
  .locals = *locals,
2950
+ .operator_loc = YP_LOCATION_TOKEN_VALUE(operator),
2879
2951
  .opening_loc = YP_LOCATION_TOKEN_VALUE(opening),
2952
+ .closing_loc = YP_LOCATION_TOKEN_VALUE(closing),
2880
2953
  .parameters = parameters,
2881
- .statements = statements
2954
+ .body = body
2882
2955
  };
2883
2956
 
2884
2957
  return node;
2885
2958
  }
2886
2959
 
2887
- // Allocate and initialize a new LocalVariableOperatorAndWriteNode node.
2888
- static yp_local_variable_operator_and_write_node_t *
2889
- yp_local_variable_operator_and_write_node_create(yp_parser_t *parser, yp_node_t *target, const yp_token_t *operator, yp_node_t *value, yp_constant_id_t constant_id) {
2960
+ // Allocate and initialize a new LocalVariableAndWriteNode node.
2961
+ static yp_local_variable_and_write_node_t *
2962
+ yp_local_variable_and_write_node_create(yp_parser_t *parser, yp_node_t *target, const yp_token_t *operator, yp_node_t *value, yp_constant_id_t constant_id, uint32_t depth) {
2890
2963
  assert(YP_NODE_TYPE_P(target, YP_NODE_LOCAL_VARIABLE_READ_NODE) || YP_NODE_TYPE_P(target, YP_NODE_CALL_NODE));
2891
2964
  assert(operator->type == YP_TOKEN_AMPERSAND_AMPERSAND_EQUAL);
2892
- yp_local_variable_operator_and_write_node_t *node = YP_ALLOC_NODE(parser, yp_local_variable_operator_and_write_node_t);
2965
+ yp_local_variable_and_write_node_t *node = YP_ALLOC_NODE(parser, yp_local_variable_and_write_node_t);
2893
2966
 
2894
- *node = (yp_local_variable_operator_and_write_node_t) {
2967
+ *node = (yp_local_variable_and_write_node_t) {
2895
2968
  {
2896
- .type = YP_NODE_LOCAL_VARIABLE_OPERATOR_AND_WRITE_NODE,
2969
+ .type = YP_NODE_LOCAL_VARIABLE_AND_WRITE_NODE,
2897
2970
  .location = {
2898
2971
  .start = target->location.start,
2899
2972
  .end = value->location.end
@@ -2902,7 +2975,8 @@ yp_local_variable_operator_and_write_node_create(yp_parser_t *parser, yp_node_t
2902
2975
  .name_loc = target->location,
2903
2976
  .operator_loc = YP_LOCATION_TOKEN_VALUE(operator),
2904
2977
  .value = value,
2905
- .constant_id = constant_id
2978
+ .constant_id = constant_id,
2979
+ .depth = depth
2906
2980
  };
2907
2981
 
2908
2982
  return node;
@@ -2910,7 +2984,7 @@ yp_local_variable_operator_and_write_node_create(yp_parser_t *parser, yp_node_t
2910
2984
 
2911
2985
  // Allocate and initialize a new LocalVariableOperatorWriteNode node.
2912
2986
  static yp_local_variable_operator_write_node_t *
2913
- yp_local_variable_operator_write_node_create(yp_parser_t *parser, yp_node_t *target, const yp_token_t *operator, yp_node_t *value, yp_constant_id_t constant_id) {
2987
+ yp_local_variable_operator_write_node_create(yp_parser_t *parser, yp_node_t *target, const yp_token_t *operator, yp_node_t *value, yp_constant_id_t constant_id, uint32_t depth) {
2914
2988
  yp_local_variable_operator_write_node_t *node = YP_ALLOC_NODE(parser, yp_local_variable_operator_write_node_t);
2915
2989
 
2916
2990
  *node = (yp_local_variable_operator_write_node_t) {
@@ -2925,22 +2999,23 @@ yp_local_variable_operator_write_node_create(yp_parser_t *parser, yp_node_t *tar
2925
2999
  .operator_loc = YP_LOCATION_TOKEN_VALUE(operator),
2926
3000
  .value = value,
2927
3001
  .constant_id = constant_id,
2928
- .operator_id = yp_parser_constant_id_location(parser, operator->start, operator->end - 1)
3002
+ .operator_id = yp_parser_constant_id_location(parser, operator->start, operator->end - 1),
3003
+ .depth = depth
2929
3004
  };
2930
3005
 
2931
3006
  return node;
2932
3007
  }
2933
3008
 
2934
- // Allocate and initialize a new LocalVariableOperatorOrWriteNode node.
2935
- static yp_local_variable_operator_or_write_node_t *
2936
- yp_local_variable_operator_or_write_node_create(yp_parser_t *parser, yp_node_t *target, const yp_token_t *operator, yp_node_t *value, yp_constant_id_t constant_id) {
3009
+ // Allocate and initialize a new LocalVariableOrWriteNode node.
3010
+ static yp_local_variable_or_write_node_t *
3011
+ yp_local_variable_or_write_node_create(yp_parser_t *parser, yp_node_t *target, const yp_token_t *operator, yp_node_t *value, yp_constant_id_t constant_id, uint32_t depth) {
2937
3012
  assert(YP_NODE_TYPE_P(target, YP_NODE_LOCAL_VARIABLE_READ_NODE) || YP_NODE_TYPE_P(target, YP_NODE_CALL_NODE));
2938
3013
  assert(operator->type == YP_TOKEN_PIPE_PIPE_EQUAL);
2939
- yp_local_variable_operator_or_write_node_t *node = YP_ALLOC_NODE(parser, yp_local_variable_operator_or_write_node_t);
3014
+ yp_local_variable_or_write_node_t *node = YP_ALLOC_NODE(parser, yp_local_variable_or_write_node_t);
2940
3015
 
2941
- *node = (yp_local_variable_operator_or_write_node_t) {
3016
+ *node = (yp_local_variable_or_write_node_t) {
2942
3017
  {
2943
- .type = YP_NODE_LOCAL_VARIABLE_OPERATOR_OR_WRITE_NODE,
3018
+ .type = YP_NODE_LOCAL_VARIABLE_OR_WRITE_NODE,
2944
3019
  .location = {
2945
3020
  .start = target->location.start,
2946
3021
  .end = value->location.end
@@ -2949,7 +3024,8 @@ yp_local_variable_operator_or_write_node_create(yp_parser_t *parser, yp_node_t *
2949
3024
  .name_loc = target->location,
2950
3025
  .operator_loc = YP_LOCATION_TOKEN_VALUE(operator),
2951
3026
  .value = value,
2952
- .constant_id = constant_id
3027
+ .constant_id = constant_id,
3028
+ .depth = depth
2953
3029
  };
2954
3030
 
2955
3031
  return node;
@@ -2982,7 +3058,7 @@ yp_local_variable_write_node_create(yp_parser_t *parser, yp_constant_id_t consta
2982
3058
  .type = YP_NODE_LOCAL_VARIABLE_WRITE_NODE,
2983
3059
  .location = {
2984
3060
  .start = name_loc->start,
2985
- .end = value == NULL ? name_loc->end : value->location.end
3061
+ .end = value->location.end
2986
3062
  }
2987
3063
  },
2988
3064
  .constant_id = constant_id,
@@ -2995,21 +3071,18 @@ yp_local_variable_write_node_create(yp_parser_t *parser, yp_constant_id_t consta
2995
3071
  return node;
2996
3072
  }
2997
3073
 
2998
- // Allocate and initialize a new LocalVariableWriteNode node without an operator or target.
2999
- static yp_local_variable_write_node_t *
3074
+ // Allocate and initialize a new LocalVariableTargetNode node.
3075
+ static yp_local_variable_target_node_t *
3000
3076
  yp_local_variable_target_node_create(yp_parser_t *parser, const yp_token_t *name) {
3001
- yp_local_variable_write_node_t *node = YP_ALLOC_NODE(parser, yp_local_variable_write_node_t);
3077
+ yp_local_variable_target_node_t *node = YP_ALLOC_NODE(parser, yp_local_variable_target_node_t);
3002
3078
 
3003
- *node = (yp_local_variable_write_node_t) {
3079
+ *node = (yp_local_variable_target_node_t) {
3004
3080
  {
3005
- .type = YP_NODE_LOCAL_VARIABLE_WRITE_NODE,
3081
+ .type = YP_NODE_LOCAL_VARIABLE_TARGET_NODE,
3006
3082
  .location = YP_LOCATION_TOKEN_VALUE(name)
3007
3083
  },
3008
3084
  .constant_id = yp_parser_constant_id_token(parser, name),
3009
- .depth = 0,
3010
- .value = NULL,
3011
- .name_loc = YP_LOCATION_TOKEN_VALUE(name),
3012
- .operator_loc = { .start = NULL, .end = NULL }
3085
+ .depth = 0
3013
3086
  };
3014
3087
 
3015
3088
  return node;
@@ -3059,7 +3132,7 @@ yp_match_required_node_create(yp_parser_t *parser, yp_node_t *value, yp_node_t *
3059
3132
 
3060
3133
  // Allocate a new ModuleNode node.
3061
3134
  static yp_module_node_t *
3062
- yp_module_node_create(yp_parser_t *parser, yp_constant_id_list_t *locals, const yp_token_t *module_keyword, yp_node_t *constant_path, yp_node_t *statements, const yp_token_t *end_keyword) {
3135
+ yp_module_node_create(yp_parser_t *parser, yp_constant_id_list_t *locals, const yp_token_t *module_keyword, yp_node_t *constant_path, const yp_token_t *name, yp_node_t *body, const yp_token_t *end_keyword) {
3063
3136
  yp_module_node_t *node = YP_ALLOC_NODE(parser, yp_module_node_t);
3064
3137
 
3065
3138
  *node = (yp_module_node_t) {
@@ -3073,10 +3146,12 @@ yp_module_node_create(yp_parser_t *parser, yp_constant_id_list_t *locals, const
3073
3146
  .locals = (locals == NULL ? ((yp_constant_id_list_t) { .ids = NULL, .size = 0, .capacity = 0 }) : *locals),
3074
3147
  .module_keyword_loc = YP_LOCATION_TOKEN_VALUE(module_keyword),
3075
3148
  .constant_path = constant_path,
3076
- .statements = statements,
3077
- .end_keyword_loc = YP_LOCATION_TOKEN_VALUE(end_keyword)
3149
+ .body = body,
3150
+ .end_keyword_loc = YP_LOCATION_TOKEN_VALUE(end_keyword),
3151
+ .name = YP_EMPTY_STRING
3078
3152
  };
3079
3153
 
3154
+ yp_string_shared_init(&node->name, name->start, name->end);
3080
3155
  return node;
3081
3156
  }
3082
3157
 
@@ -3088,7 +3163,10 @@ yp_multi_write_node_create(yp_parser_t *parser, const yp_token_t *operator, yp_n
3088
3163
  *node = (yp_multi_write_node_t) {
3089
3164
  {
3090
3165
  .type = YP_NODE_MULTI_WRITE_NODE,
3091
- .location = { .start = NULL, .end = NULL },
3166
+ .location = {
3167
+ .start = lparen_loc->start,
3168
+ .end = value == NULL ? rparen_loc->end : value->location.end
3169
+ },
3092
3170
  },
3093
3171
  .operator_loc = YP_OPTIONAL_LOCATION_TOKEN_VALUE(operator),
3094
3172
  .value = value,
@@ -3343,7 +3421,7 @@ yp_program_node_create(yp_parser_t *parser, yp_constant_id_list_t *locals, yp_st
3343
3421
 
3344
3422
  // Allocate and initialize new ParenthesesNode node.
3345
3423
  static yp_parentheses_node_t *
3346
- yp_parentheses_node_create(yp_parser_t *parser, const yp_token_t *opening, yp_node_t *statements, const yp_token_t *closing) {
3424
+ yp_parentheses_node_create(yp_parser_t *parser, const yp_token_t *opening, yp_node_t *body, const yp_token_t *closing) {
3347
3425
  yp_parentheses_node_t *node = YP_ALLOC_NODE(parser, yp_parentheses_node_t);
3348
3426
 
3349
3427
  *node = (yp_parentheses_node_t) {
@@ -3354,7 +3432,7 @@ yp_parentheses_node_create(yp_parser_t *parser, const yp_token_t *opening, yp_no
3354
3432
  .end = closing->end
3355
3433
  }
3356
3434
  },
3357
- .statements = statements,
3435
+ .body = body,
3358
3436
  .opening_loc = YP_LOCATION_TOKEN_VALUE(opening),
3359
3437
  .closing_loc = YP_LOCATION_TOKEN_VALUE(closing)
3360
3438
  };
@@ -3700,7 +3778,7 @@ yp_self_node_create(yp_parser_t *parser, const yp_token_t *token) {
3700
3778
 
3701
3779
  // Allocate a new SingletonClassNode node.
3702
3780
  static yp_singleton_class_node_t *
3703
- yp_singleton_class_node_create(yp_parser_t *parser, yp_constant_id_list_t *locals, const yp_token_t *class_keyword, const yp_token_t *operator, yp_node_t *expression, yp_node_t *statements, const yp_token_t *end_keyword) {
3781
+ yp_singleton_class_node_create(yp_parser_t *parser, yp_constant_id_list_t *locals, const yp_token_t *class_keyword, const yp_token_t *operator, yp_node_t *expression, yp_node_t *body, const yp_token_t *end_keyword) {
3704
3782
  yp_singleton_class_node_t *node = YP_ALLOC_NODE(parser, yp_singleton_class_node_t);
3705
3783
 
3706
3784
  *node = (yp_singleton_class_node_t) {
@@ -3715,7 +3793,7 @@ yp_singleton_class_node_create(yp_parser_t *parser, yp_constant_id_list_t *local
3715
3793
  .class_keyword_loc = YP_LOCATION_TOKEN_VALUE(class_keyword),
3716
3794
  .operator_loc = YP_LOCATION_TOKEN_VALUE(operator),
3717
3795
  .expression = expression,
3718
- .statements = statements,
3796
+ .body = body,
3719
3797
  .end_keyword_loc = YP_LOCATION_TOKEN_VALUE(end_keyword)
3720
3798
  };
3721
3799
 
@@ -3934,10 +4012,10 @@ yp_symbol_node_label_create(yp_parser_t *parser, const yp_token_t *token) {
3934
4012
  yp_token_t label = { .type = YP_TOKEN_LABEL, .start = token->start, .end = token->end - 1 };
3935
4013
  node = yp_symbol_node_create(parser, &opening, &label, &closing);
3936
4014
 
3937
- ptrdiff_t length = label.end - label.start;
3938
- assert(length >= 0);
4015
+ assert((label.end - label.start) >= 0);
4016
+ yp_string_shared_init(&node->unescaped, label.start, label.end);
3939
4017
 
3940
- yp_unescape_manipulate_string(parser, label.start, (size_t) length, &node->unescaped, YP_UNESCAPE_ALL, &parser->error_list);
4018
+ yp_unescape_manipulate_string(parser, &node->unescaped, YP_UNESCAPE_ALL);
3941
4019
  break;
3942
4020
  }
3943
4021
  case YP_TOKEN_MISSING: {
@@ -3978,20 +4056,20 @@ yp_symbol_node_label_p(yp_node_t *node) {
3978
4056
 
3979
4057
  // Convert the given StringNode node to a SymbolNode node.
3980
4058
  static yp_symbol_node_t *
3981
- yp_string_node_to_symbol_node(yp_parser_t *parser, yp_string_node_t *node) {
4059
+ yp_string_node_to_symbol_node(yp_parser_t *parser, yp_string_node_t *node, const yp_token_t *opening, const yp_token_t *closing) {
3982
4060
  yp_symbol_node_t *new_node = YP_ALLOC_NODE(parser, yp_symbol_node_t);
3983
4061
 
3984
4062
  *new_node = (yp_symbol_node_t) {
3985
4063
  {
3986
4064
  .type = YP_NODE_SYMBOL_NODE,
3987
4065
  .location = {
3988
- .start = node->base.location.start - 2,
3989
- .end = node->base.location.end + 1
4066
+ .start = opening->start,
4067
+ .end = closing->end
3990
4068
  }
3991
4069
  },
3992
- .opening_loc = node->opening_loc,
4070
+ .opening_loc = YP_OPTIONAL_LOCATION_TOKEN_VALUE(opening),
3993
4071
  .value_loc = node->content_loc,
3994
- .closing_loc = node->closing_loc,
4072
+ .closing_loc = YP_OPTIONAL_LOCATION_TOKEN_VALUE(closing),
3995
4073
  .unescaped = node->unescaped
3996
4074
  };
3997
4075
 
@@ -4130,34 +4208,43 @@ yp_unless_node_end_keyword_loc_set(yp_unless_node_t *node, const yp_token_t *end
4130
4208
 
4131
4209
  // Allocate a new UntilNode node.
4132
4210
  static yp_until_node_t *
4133
- yp_until_node_create(yp_parser_t *parser, const yp_token_t *keyword, yp_node_t *predicate, yp_statements_node_t *statements, yp_node_flags_t flags) {
4211
+ yp_until_node_create(yp_parser_t *parser, const yp_token_t *keyword, const yp_token_t *closing, yp_node_t *predicate, yp_statements_node_t *statements, yp_node_flags_t flags) {
4134
4212
  yp_until_node_t *node = YP_ALLOC_NODE(parser, yp_until_node_t);
4135
- bool has_statements = (statements != NULL) && (statements->body.size != 0);
4136
4213
 
4137
- const char *start = NULL;
4138
- if (has_statements && (keyword->start > statements->base.location.start)) {
4139
- start = statements->base.location.start;
4140
- } else {
4141
- start = keyword->start;
4142
- }
4214
+ *node = (yp_until_node_t) {
4215
+ {
4216
+ .type = YP_NODE_UNTIL_NODE,
4217
+ .flags = flags,
4218
+ .location = {
4219
+ .start = keyword->start,
4220
+ .end = closing->end,
4221
+ },
4222
+ },
4223
+ .keyword_loc = YP_LOCATION_TOKEN_VALUE(keyword),
4224
+ .closing_loc = YP_OPTIONAL_LOCATION_TOKEN_VALUE(closing),
4225
+ .predicate = predicate,
4226
+ .statements = statements
4227
+ };
4143
4228
 
4144
- const char *end = NULL;
4145
- if (has_statements && (predicate->location.end < statements->base.location.end)) {
4146
- end = statements->base.location.end;
4147
- } else {
4148
- end = predicate->location.end;
4149
- }
4229
+ return node;
4230
+ }
4231
+
4232
+ // Allocate a new UntilNode node.
4233
+ static yp_until_node_t *
4234
+ yp_until_node_modifier_create(yp_parser_t *parser, const yp_token_t *keyword, yp_node_t *predicate, yp_statements_node_t *statements, yp_node_flags_t flags) {
4235
+ yp_until_node_t *node = YP_ALLOC_NODE(parser, yp_until_node_t);
4150
4236
 
4151
4237
  *node = (yp_until_node_t) {
4152
4238
  {
4153
4239
  .type = YP_NODE_UNTIL_NODE,
4154
4240
  .flags = flags,
4155
4241
  .location = {
4156
- .start = start,
4157
- .end = end,
4242
+ .start = statements->base.location.start,
4243
+ .end = predicate->location.end,
4158
4244
  },
4159
4245
  },
4160
4246
  .keyword_loc = YP_LOCATION_TOKEN_VALUE(keyword),
4247
+ .closing_loc = YP_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE,
4161
4248
  .predicate = predicate,
4162
4249
  .statements = statements
4163
4250
  };
@@ -4205,34 +4292,43 @@ yp_when_node_statements_set(yp_when_node_t *node, yp_statements_node_t *statemen
4205
4292
 
4206
4293
  // Allocate a new WhileNode node.
4207
4294
  static yp_while_node_t *
4208
- yp_while_node_create(yp_parser_t *parser, const yp_token_t *keyword, yp_node_t *predicate, yp_statements_node_t *statements, yp_node_flags_t flags) {
4295
+ yp_while_node_create(yp_parser_t *parser, const yp_token_t *keyword, const yp_token_t *closing, yp_node_t *predicate, yp_statements_node_t *statements, yp_node_flags_t flags) {
4209
4296
  yp_while_node_t *node = YP_ALLOC_NODE(parser, yp_while_node_t);
4210
4297
 
4211
- const char *start = NULL;
4212
- bool has_statements = (statements != NULL) && (statements->body.size != 0);
4213
- if (has_statements && (keyword->start > statements->base.location.start)) {
4214
- start = statements->base.location.start;
4215
- } else {
4216
- start = keyword->start;
4217
- }
4298
+ *node = (yp_while_node_t) {
4299
+ {
4300
+ .type = YP_NODE_WHILE_NODE,
4301
+ .flags = flags,
4302
+ .location = {
4303
+ .start = keyword->start,
4304
+ .end = closing->end
4305
+ },
4306
+ },
4307
+ .keyword_loc = YP_LOCATION_TOKEN_VALUE(keyword),
4308
+ .closing_loc = YP_OPTIONAL_LOCATION_TOKEN_VALUE(closing),
4309
+ .predicate = predicate,
4310
+ .statements = statements
4311
+ };
4218
4312
 
4219
- const char *end = NULL;
4220
- if (has_statements && (predicate->location.end < statements->base.location.end)) {
4221
- end = statements->base.location.end;
4222
- } else {
4223
- end = predicate->location.end;
4224
- }
4313
+ return node;
4314
+ }
4315
+
4316
+ // Allocate a new WhileNode node.
4317
+ static yp_while_node_t *
4318
+ yp_while_node_modifier_create(yp_parser_t *parser, const yp_token_t *keyword, yp_node_t *predicate, yp_statements_node_t *statements, yp_node_flags_t flags) {
4319
+ yp_while_node_t *node = YP_ALLOC_NODE(parser, yp_while_node_t);
4225
4320
 
4226
4321
  *node = (yp_while_node_t) {
4227
4322
  {
4228
4323
  .type = YP_NODE_WHILE_NODE,
4229
4324
  .flags = flags,
4230
4325
  .location = {
4231
- .start = start,
4232
- .end = end,
4326
+ .start = statements->base.location.start,
4327
+ .end = predicate->location.end
4233
4328
  },
4234
4329
  },
4235
4330
  .keyword_loc = YP_LOCATION_TOKEN_VALUE(keyword),
4331
+ .closing_loc = YP_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE,
4236
4332
  .predicate = predicate,
4237
4333
  .statements = statements
4238
4334
  };
@@ -4340,13 +4436,15 @@ yp_parser_local_depth(yp_parser_t *parser, yp_token_t *token) {
4340
4436
  }
4341
4437
 
4342
4438
  // Add a local variable from a location to the current scope.
4343
- static void
4439
+ static yp_constant_id_t
4344
4440
  yp_parser_local_add_location(yp_parser_t *parser, const char *start, const char *end) {
4345
4441
  yp_constant_id_t constant_id = yp_parser_constant_id_location(parser, start, end);
4346
4442
 
4347
4443
  if (!yp_constant_id_list_includes(&parser->current_scope->locals, constant_id)) {
4348
4444
  yp_constant_id_list_append(&parser->current_scope->locals, constant_id);
4349
4445
  }
4446
+
4447
+ return constant_id;
4350
4448
  }
4351
4449
 
4352
4450
  // Add a local variable from a token to the current scope.
@@ -4494,27 +4592,30 @@ yp_do_loop_stack_p(yp_parser_t *parser) {
4494
4592
  /* Lexer check helpers */
4495
4593
  /******************************************************************************/
4496
4594
 
4497
- // Get the next character in the source starting from parser->current.end and
4498
- // adding the given offset. If that position is beyond the end of the source
4499
- // then return '\0'.
4595
+ // Get the next character in the source starting from +cursor+. If that position
4596
+ // is beyond the end of the source then return '\0'.
4500
4597
  static inline char
4501
- peek_at(yp_parser_t *parser, size_t offset) {
4502
- if (parser->current.end + offset < parser->end) {
4503
- return parser->current.end[offset];
4598
+ peek_at(yp_parser_t *parser, const char *cursor) {
4599
+ if (cursor < parser->end) {
4600
+ return *cursor;
4504
4601
  } else {
4505
4602
  return '\0';
4506
4603
  }
4507
4604
  }
4508
4605
 
4606
+ // Get the next character in the source starting from parser->current.end and
4607
+ // adding the given offset. If that position is beyond the end of the source
4608
+ // then return '\0'.
4609
+ static inline char
4610
+ peek_offset(yp_parser_t *parser, ptrdiff_t offset) {
4611
+ return peek_at(parser, parser->current.end + offset);
4612
+ }
4613
+
4509
4614
  // Get the next character in the source starting from parser->current.end. If
4510
4615
  // that position is beyond the end of the source then return '\0'.
4511
4616
  static inline char
4512
4617
  peek(yp_parser_t *parser) {
4513
- if (parser->current.end < parser->end) {
4514
- return *parser->current.end;
4515
- } else {
4516
- return '\0';
4517
- }
4618
+ return peek_at(parser, parser->current.end);
4518
4619
  }
4519
4620
 
4520
4621
  // Get the next string of length len in the source starting from parser->current.end.
@@ -4539,6 +4640,35 @@ match(yp_parser_t *parser, char value) {
4539
4640
  return false;
4540
4641
  }
4541
4642
 
4643
+ // Return the length of the line ending string starting at +cursor+, or 0 if it
4644
+ // is not a line ending. This function is intended to be CRLF/LF agnostic.
4645
+ static inline size_t
4646
+ match_eol_at(yp_parser_t *parser, const char *cursor) {
4647
+ if (peek_at(parser, cursor) == '\n') {
4648
+ return 1;
4649
+ }
4650
+ if (peek_at(parser, cursor) == '\r' && peek_at(parser, cursor + 1) == '\n') {
4651
+ return 2;
4652
+ }
4653
+ return 0;
4654
+ }
4655
+
4656
+ // Return the length of the line ending string starting at
4657
+ // parser->current.end + offset, or 0 if it is not a line ending. This function
4658
+ // is intended to be CRLF/LF agnostic.
4659
+ static inline size_t
4660
+ match_eol_offset(yp_parser_t *parser, ptrdiff_t offset) {
4661
+ return match_eol_at(parser, parser->current.end + offset);
4662
+ }
4663
+
4664
+ // Return the length of the line ending string starting at parser->current.end,
4665
+ // or 0 if it is not a line ending. This function is intended to be CRLF/LF
4666
+ // agnostic.
4667
+ static inline size_t
4668
+ match_eol(yp_parser_t *parser) {
4669
+ return match_eol_at(parser, parser->current.end);
4670
+ }
4671
+
4542
4672
  // Skip to the next newline character or NUL byte.
4543
4673
  static inline const char *
4544
4674
  next_newline(const char *cursor, ptrdiff_t length) {
@@ -4562,11 +4692,13 @@ parser_lex_encoding_comment_start(yp_parser_t *parser, const char *cursor, ptrdi
4562
4692
 
4563
4693
  const char *cursor_limit = cursor + length - key_length + 1;
4564
4694
  while ((cursor = yp_memchr(cursor, 'c', (size_t) (cursor_limit - cursor), parser->encoding_changed, &parser->encoding)) != NULL) {
4565
- if (
4566
- (strncmp(cursor, "coding", key_length - 1) == 0) &&
4567
- (cursor[key_length - 1] == ':' || cursor[key_length - 1] == '=')
4568
- ) {
4569
- return cursor + key_length;
4695
+ if (strncmp(cursor, "coding", key_length - 1) == 0) {
4696
+ size_t whitespace_after_coding = yp_strspn_inline_whitespace(cursor + key_length - 1, parser->end - (cursor + key_length - 1));
4697
+ size_t cur_pos = key_length + whitespace_after_coding;
4698
+
4699
+ if (cursor[cur_pos - 1] == ':' || cursor[cur_pos - 1] == '=') {
4700
+ return cursor + cur_pos;
4701
+ }
4570
4702
  }
4571
4703
 
4572
4704
  cursor++;
@@ -4822,7 +4954,7 @@ lex_optional_float_suffix(yp_parser_t *parser) {
4822
4954
  // Here we're going to attempt to parse the optional decimal portion of a
4823
4955
  // float. If it's not there, then it's okay and we'll just continue on.
4824
4956
  if (peek(parser) == '.') {
4825
- if (yp_char_is_decimal_digit(peek_at(parser, 1))) {
4957
+ if (yp_char_is_decimal_digit(peek_offset(parser, 1))) {
4826
4958
  parser->current.end += 2;
4827
4959
  parser->current.end += yp_strspn_decimal_number(parser->current.end, parser->end - parser->current.end);
4828
4960
  type = YP_TOKEN_FLOAT;
@@ -4855,7 +4987,7 @@ static yp_token_type_t
4855
4987
  lex_numeric_prefix(yp_parser_t *parser) {
4856
4988
  yp_token_type_t type = YP_TOKEN_INTEGER;
4857
4989
 
4858
- if (parser->current.end[-1] == '0') {
4990
+ if (peek_offset(parser, -1) == '0') {
4859
4991
  switch (*parser->current.end) {
4860
4992
  // 0d1111 is a decimal number
4861
4993
  case 'd':
@@ -4938,7 +5070,7 @@ lex_numeric_prefix(yp_parser_t *parser) {
4938
5070
 
4939
5071
  // If the last character that we consumed was an underscore, then this is
4940
5072
  // actually an invalid integer value, and we should return an invalid token.
4941
- if (parser->current.end[-1] == '_') {
5073
+ if (peek_offset(parser, -1) == '_') {
4942
5074
  yp_diagnostic_list_append(&parser->error_list, parser->current.start, parser->current.end, "Number literal cannot end with a `_`.");
4943
5075
  }
4944
5076
 
@@ -5119,7 +5251,7 @@ lex_identifier(yp_parser_t *parser, bool previous_command_start) {
5119
5251
 
5120
5252
  if (
5121
5253
  ((lex_state_p(parser, YP_LEX_STATE_LABEL | YP_LEX_STATE_ENDFN) && !previous_command_start) || lex_state_arg_p(parser)) &&
5122
- (peek(parser) == ':') && (peek_at(parser, 1) != ':')
5254
+ (peek(parser) == ':') && (peek_offset(parser, 1) != ':')
5123
5255
  ) {
5124
5256
  // If we're in a position where we can accept a : at the end of an
5125
5257
  // identifier, then we'll optionally accept it.
@@ -5135,7 +5267,7 @@ lex_identifier(yp_parser_t *parser, bool previous_command_start) {
5135
5267
  }
5136
5268
 
5137
5269
  return YP_TOKEN_IDENTIFIER;
5138
- } else if (lex_state_p(parser, YP_LEX_STATE_FNAME) && peek_at(parser, 1) != '~' && peek_at(parser, 1) != '>' && (peek_at(parser, 1) != '=' || peek_at(parser, 2) == '>') && match(parser, '=')) {
5270
+ } else if (lex_state_p(parser, YP_LEX_STATE_FNAME) && peek_offset(parser, 1) != '~' && peek_offset(parser, 1) != '>' && (peek_offset(parser, 1) != '=' || peek_offset(parser, 2) == '>') && match(parser, '=')) {
5139
5271
  // If we're in a position where we can accept a = at the end of an
5140
5272
  // identifier, then we'll optionally accept it.
5141
5273
  return YP_TOKEN_IDENTIFIER;
@@ -5143,7 +5275,7 @@ lex_identifier(yp_parser_t *parser, bool previous_command_start) {
5143
5275
 
5144
5276
  if (
5145
5277
  ((lex_state_p(parser, YP_LEX_STATE_LABEL | YP_LEX_STATE_ENDFN) && !previous_command_start) || lex_state_arg_p(parser)) &&
5146
- peek(parser) == ':' && peek_at(parser, 1) != ':'
5278
+ peek(parser) == ':' && peek_offset(parser, 1) != ':'
5147
5279
  ) {
5148
5280
  // If we're in a position where we can accept a : at the end of an
5149
5281
  // identifier, then we'll optionally accept it.
@@ -5411,7 +5543,7 @@ lex_question_mark(yp_parser_t *parser) {
5411
5543
 
5412
5544
  if (parser->current.start[1] == '\\') {
5413
5545
  lex_state_set(parser, YP_LEX_STATE_END);
5414
- parser->current.end += yp_unescape_calculate_difference(parser->current.start + 1, parser->end, YP_UNESCAPE_ALL, true, &parser->error_list);
5546
+ parser->current.end += yp_unescape_calculate_difference(parser, parser->current.start + 1, YP_UNESCAPE_ALL, true);
5415
5547
  return YP_TOKEN_CHARACTER_LITERAL;
5416
5548
  } else {
5417
5549
  size_t encoding_width = parser->encoding.char_width(parser->current.end, parser->end - parser->current.end);
@@ -5420,7 +5552,7 @@ lex_question_mark(yp_parser_t *parser) {
5420
5552
  // an underscore. We check for this case
5421
5553
  if (
5422
5554
  !(parser->encoding.alnum_char(parser->current.end, parser->end - parser->current.end) ||
5423
- *parser->current.end == '_') ||
5555
+ peek(parser) == '_') ||
5424
5556
  (
5425
5557
  (parser->current.end + encoding_width >= parser->end) ||
5426
5558
  !char_is_identifier(parser, parser->current.end + encoding_width)
@@ -5636,28 +5768,32 @@ parser_lex(yp_parser_t *parser) {
5636
5768
  space_seen = true;
5637
5769
  break;
5638
5770
  case '\r':
5639
- if (peek_at(parser, 1) == '\n') {
5771
+ if (match_eol_offset(parser, 1)) {
5640
5772
  chomping = false;
5641
5773
  } else {
5642
5774
  parser->current.end++;
5643
5775
  space_seen = true;
5644
5776
  }
5645
5777
  break;
5646
- case '\\':
5647
- if (peek_at(parser, 1) == '\n') {
5648
- yp_newline_list_append(&parser->newline_list, parser->current.end + 1);
5649
- parser->current.end += 2;
5650
- space_seen = true;
5651
- } else if (parser->current.end + 2 < parser->end && peek_at(parser, 1) == '\r' && peek_at(parser, 2) == '\n') {
5652
- yp_newline_list_append(&parser->newline_list, parser->current.end + 2);
5653
- parser->current.end += 3;
5654
- space_seen = true;
5778
+ case '\\': {
5779
+ size_t eol_length = match_eol_offset(parser, 1);
5780
+ if (eol_length) {
5781
+ if (parser->heredoc_end) {
5782
+ parser->current.end = parser->heredoc_end;
5783
+ parser->heredoc_end = NULL;
5784
+ } else {
5785
+ parser->current.end += eol_length + 1;
5786
+ yp_newline_list_append(&parser->newline_list, parser->current.end - 1);
5787
+ space_seen = true;
5788
+ }
5655
5789
  } else if (yp_char_is_inline_whitespace(*parser->current.end)) {
5656
5790
  parser->current.end += 2;
5657
5791
  } else {
5658
5792
  chomping = false;
5659
5793
  }
5794
+
5660
5795
  break;
5796
+ }
5661
5797
  default:
5662
5798
  chomping = false;
5663
5799
  break;
@@ -5667,13 +5803,14 @@ parser_lex(yp_parser_t *parser) {
5667
5803
  // Next, we'll set to start of this token to be the current end.
5668
5804
  parser->current.start = parser->current.end;
5669
5805
 
5670
- // We'll check if we're at the end of the file. If we are, then we need to
5671
- // return the EOF token.
5806
+ // We'll check if we're at the end of the file. If we are, then we
5807
+ // need to return the EOF token.
5672
5808
  if (parser->current.end >= parser->end) {
5673
5809
  LEX(YP_TOKEN_EOF);
5674
5810
  }
5675
5811
 
5676
- // Finally, we'll check the current character to determine the next token.
5812
+ // Finally, we'll check the current character to determine the next
5813
+ // token.
5677
5814
  switch (*parser->current.end++) {
5678
5815
  case '\0': // NUL or end of script
5679
5816
  case '\004': // ^D
@@ -5683,16 +5820,14 @@ parser_lex(yp_parser_t *parser) {
5683
5820
 
5684
5821
  case '#': { // comments
5685
5822
  const char *ending = next_newline(parser->current.end, parser->end - parser->current.end);
5686
- while (ending && ending < parser->end && *ending != '\n') {
5687
- ending = next_newline(ending + 1, parser->end - ending);
5688
- }
5689
5823
 
5690
5824
  parser->current.end = ending == NULL ? parser->end : ending + 1;
5691
5825
  parser->current.type = YP_TOKEN_COMMENT;
5692
5826
  parser_lex_callback(parser);
5693
5827
 
5694
- // If we found a comment while lexing, then we're going to add it to the
5695
- // list of comments in the file and keep lexing.
5828
+ // If we found a comment while lexing, then we're going to
5829
+ // add it to the list of comments in the file and keep
5830
+ // lexing.
5696
5831
  yp_comment_t *comment = parser_comment(parser, YP_COMMENT_INLINE);
5697
5832
  yp_list_append(&parser->comment_list, (yp_list_node_t *) comment);
5698
5833
 
@@ -5703,21 +5838,29 @@ parser_lex(yp_parser_t *parser) {
5703
5838
  lexed_comment = true;
5704
5839
  }
5705
5840
  /* fallthrough */
5706
- case '\r': {
5707
- // The only way you can have carriage returns in this particular loop
5708
- // is if you have a carriage return followed by a newline. In that
5709
- // case we'll just skip over the carriage return and continue lexing,
5710
- // in order to make it so that the newline token encapsulates both the
5711
- // carriage return and the newline. Note that we need to check that
5712
- // we haven't already lexed a comment here because that falls through
5713
- // into here as well.
5714
- if (!lexed_comment) parser->current.end++;
5715
- }
5716
- /* fallthrough */
5841
+ case '\r':
5717
5842
  case '\n': {
5718
- if (parser->heredoc_end == NULL) {
5719
- yp_newline_list_append(&parser->newline_list, parser->current.end - 1);
5720
- } else {
5843
+ size_t eol_length = match_eol_at(parser, parser->current.end - 1);
5844
+ if (eol_length) {
5845
+ // The only way you can have carriage returns in this
5846
+ // particular loop is if you have a carriage return
5847
+ // followed by a newline. In that case we'll just skip
5848
+ // over the carriage return and continue lexing, in
5849
+ // order to make it so that the newline token
5850
+ // encapsulates both the carriage return and the
5851
+ // newline. Note that we need to check that we haven't
5852
+ // already lexed a comment here because that falls
5853
+ // through into here as well.
5854
+ if (!lexed_comment) {
5855
+ parser->current.end += eol_length - 1; // skip CR
5856
+ }
5857
+
5858
+ if (parser->heredoc_end == NULL) {
5859
+ yp_newline_list_append(&parser->newline_list, parser->current.end - 1);
5860
+ }
5861
+ }
5862
+
5863
+ if (parser->heredoc_end) {
5721
5864
  parser_flush_heredoc_end(parser);
5722
5865
  }
5723
5866
 
@@ -5773,7 +5916,13 @@ parser_lex(yp_parser_t *parser) {
5773
5916
 
5774
5917
  // If the lex state was ignored, or we hit a '.' or a '&.',
5775
5918
  // we will lex the ignored newline
5776
- if (lex_state_ignored_p(parser) || (following && ((following[0] == '.') || (following + 1 < parser->end && following[0] == '&' && following[1] == '.')))) {
5919
+ if (
5920
+ lex_state_ignored_p(parser) ||
5921
+ (following && (
5922
+ (peek_at(parser, following) == '.') ||
5923
+ (peek_at(parser, following) == '&' && peek_at(parser, following + 1) == '.')
5924
+ ))
5925
+ ) {
5777
5926
  if (!lexed_comment) parser_lex_ignored_newline(parser);
5778
5927
  lexed_comment = false;
5779
5928
  goto lex_next_token;
@@ -5786,7 +5935,7 @@ parser_lex(yp_parser_t *parser) {
5786
5935
  // To match ripper, we need to emit an ignored newline even though
5787
5936
  // its a real newline in the case that we have a beginless range
5788
5937
  // on a subsequent line.
5789
- if ((next_content + 1 < parser->end) && (next_content[1] == '.')) {
5938
+ if (peek_at(parser, next_content + 1) == '.') {
5790
5939
  if (!lexed_comment) parser_lex_ignored_newline(parser);
5791
5940
  lex_state_set(parser, YP_LEX_STATE_BEG);
5792
5941
  parser->command_start = true;
@@ -5804,7 +5953,7 @@ parser_lex(yp_parser_t *parser) {
5804
5953
 
5805
5954
  // If we hit a &. after a newline, then we're in a call chain and
5806
5955
  // we need to return the call operator.
5807
- if (next_content + 1 < parser->end && next_content[0] == '&' && next_content[1] == '.') {
5956
+ if (peek_at(parser, next_content) == '&' && peek_at(parser, next_content + 1) == '.') {
5808
5957
  if (!lexed_comment) parser_lex_ignored_newline(parser);
5809
5958
  lex_state_set(parser, YP_LEX_STATE_DOT);
5810
5959
  parser->current.start = next_content;
@@ -6001,7 +6150,7 @@ parser_lex(yp_parser_t *parser) {
6001
6150
 
6002
6151
  // = => =~ == === =begin
6003
6152
  case '=':
6004
- if (current_token_starts_line(parser) && strncmp(peek_string(parser, 5), "begin", 5) == 0 && yp_char_is_whitespace(peek_at(parser, 5))) {
6153
+ if (current_token_starts_line(parser) && strncmp(peek_string(parser, 5), "begin", 5) == 0 && yp_char_is_whitespace(peek_offset(parser, 5))) {
6005
6154
  yp_token_type_t type = lex_embdoc(parser);
6006
6155
 
6007
6156
  if (type == YP_TOKEN_EOF) {
@@ -6425,13 +6574,13 @@ parser_lex(yp_parser_t *parser) {
6425
6574
  LEX(YP_TOKEN_COLON_COLON);
6426
6575
  }
6427
6576
 
6428
- if (lex_state_end_p(parser) || yp_char_is_whitespace(*parser->current.end) || (*parser->current.end == '#')) {
6577
+ if (lex_state_end_p(parser) || yp_char_is_whitespace(*parser->current.end) || peek(parser) == '#') {
6429
6578
  lex_state_set(parser, YP_LEX_STATE_BEG);
6430
6579
  LEX(YP_TOKEN_COLON);
6431
6580
  }
6432
6581
 
6433
- if ((*parser->current.end == '"') || (*parser->current.end == '\'')) {
6434
- lex_mode_push_string(parser, *parser->current.end == '"', false, '\0', *parser->current.end);
6582
+ if (peek(parser) == '"' || peek(parser) == '\'') {
6583
+ lex_mode_push_string(parser, peek(parser) == '"', false, '\0', *parser->current.end);
6435
6584
  parser->current.end++;
6436
6585
  }
6437
6586
 
@@ -6486,13 +6635,11 @@ parser_lex(yp_parser_t *parser) {
6486
6635
 
6487
6636
  // % %= %i %I %q %Q %w %W
6488
6637
  case '%': {
6489
- // In a BEG state, if you encounter a % then you must be
6490
- // starting something. In this case if there is no
6491
- // subsequent character then we have an invalid token. We're
6492
- // going to say it's the percent operator because we don't
6493
- // want to move into the string lex mode unnecessarily.
6494
- if (lex_state_beg_p(parser) && (parser->current.end >= parser->end)) {
6495
- yp_diagnostic_list_append(&parser->error_list, parser->current.start, parser->current.end, "unexpected end of input");
6638
+ // If there is no subsequent character then we have an invalid token. We're
6639
+ // going to say it's the percent operator because we don't want to move into the
6640
+ // string lex mode unnecessarily.
6641
+ if ((lex_state_beg_p(parser) || lex_state_arg_p(parser)) && (parser->current.end >= parser->end)) {
6642
+ yp_diagnostic_list_append(&parser->error_list, parser->current.start, parser->current.end, "Unexpected end of input");
6496
6643
  LEX(YP_TOKEN_PERCENT);
6497
6644
  }
6498
6645
 
@@ -6502,25 +6649,26 @@ parser_lex(yp_parser_t *parser) {
6502
6649
  }
6503
6650
  else if(
6504
6651
  lex_state_beg_p(parser) ||
6505
- (lex_state_p(parser, YP_LEX_STATE_FITEM) && (*parser->current.end == 's')) ||
6652
+ (lex_state_p(parser, YP_LEX_STATE_FITEM) && (peek(parser) == 's')) ||
6506
6653
  lex_state_spcarg_p(parser, space_seen)
6507
6654
  ) {
6508
6655
  if (!parser->encoding.alnum_char(parser->current.end, parser->end - parser->current.end)) {
6509
6656
  lex_mode_push_string(parser, true, false, lex_mode_incrementor(*parser->current.end), lex_mode_terminator(*parser->current.end));
6510
6657
 
6511
- if (*parser->current.end == '\r') {
6658
+ size_t eol_length = match_eol(parser);
6659
+ if (eol_length) {
6660
+ parser->current.end += eol_length;
6661
+ yp_newline_list_append(&parser->newline_list, parser->current.end - 1);
6662
+ } else {
6512
6663
  parser->current.end++;
6513
6664
  }
6514
6665
 
6515
- if (*parser->current.end == '\n') {
6516
- yp_newline_list_append(&parser->newline_list, parser->current.end);
6666
+ if (parser->current.end < parser->end) {
6667
+ LEX(YP_TOKEN_STRING_BEGIN);
6517
6668
  }
6518
-
6519
- parser->current.end++;
6520
- LEX(YP_TOKEN_STRING_BEGIN);
6521
6669
  }
6522
6670
 
6523
- switch (*parser->current.end) {
6671
+ switch (peek(parser)) {
6524
6672
  case 'i': {
6525
6673
  parser->current.end++;
6526
6674
 
@@ -6544,6 +6692,7 @@ parser_lex(yp_parser_t *parser) {
6544
6692
 
6545
6693
  if (parser->current.end < parser->end) {
6546
6694
  lex_mode_push_regexp(parser, lex_mode_incrementor(*parser->current.end), lex_mode_terminator(*parser->current.end));
6695
+ yp_newline_list_check_append(&parser->newline_list, parser->current.end);
6547
6696
  parser->current.end++;
6548
6697
  }
6549
6698
 
@@ -6554,6 +6703,7 @@ parser_lex(yp_parser_t *parser) {
6554
6703
 
6555
6704
  if (parser->current.end < parser->end) {
6556
6705
  lex_mode_push_string(parser, false, false, lex_mode_incrementor(*parser->current.end), lex_mode_terminator(*parser->current.end));
6706
+ yp_newline_list_check_append(&parser->newline_list, parser->current.end);
6557
6707
  parser->current.end++;
6558
6708
  }
6559
6709
 
@@ -6564,6 +6714,7 @@ parser_lex(yp_parser_t *parser) {
6564
6714
 
6565
6715
  if (parser->current.end < parser->end) {
6566
6716
  lex_mode_push_string(parser, true, false, lex_mode_incrementor(*parser->current.end), lex_mode_terminator(*parser->current.end));
6717
+ yp_newline_list_check_append(&parser->newline_list, parser->current.end);
6567
6718
  parser->current.end++;
6568
6719
  }
6569
6720
 
@@ -6613,7 +6764,7 @@ parser_lex(yp_parser_t *parser) {
6613
6764
  // unparseable. In this case we'll just drop it from the parser
6614
6765
  // and skip past it and hope that the next token is something
6615
6766
  // that we can parse.
6616
- yp_diagnostic_list_append(&parser->error_list, parser->current.start, parser->current.end, "invalid %% token");
6767
+ yp_diagnostic_list_append(&parser->error_list, parser->current.start, parser->current.end, "Invalid %% token");
6617
6768
  goto lex_next_token;
6618
6769
  }
6619
6770
  }
@@ -6665,8 +6816,9 @@ parser_lex(yp_parser_t *parser) {
6665
6816
  ((parser->current.end - parser->current.start) == 7) &&
6666
6817
  current_token_starts_line(parser) &&
6667
6818
  (strncmp(parser->current.start, "__END__", 7) == 0) &&
6668
- (*parser->current.end == '\n' || (*parser->current.end == '\r' && parser->current.end[1] == '\n'))
6669
- ) {
6819
+ (parser->current.end == parser->end || match_eol(parser))
6820
+ )
6821
+ {
6670
6822
  parser->current.end = parser->end;
6671
6823
  parser->current.type = YP_TOKEN___END__;
6672
6824
  parser_lex_callback(parser);
@@ -6723,7 +6875,7 @@ parser_lex(yp_parser_t *parser) {
6723
6875
 
6724
6876
  if ((whitespace = yp_strspn_whitespace_newlines(parser->current.end, parser->end - parser->current.end, &parser->newline_list, should_stop)) > 0) {
6725
6877
  parser->current.end += whitespace;
6726
- if (parser->current.end[-1] == '\n') {
6878
+ if (peek_offset(parser, -1) == '\n') {
6727
6879
  // mutates next_start
6728
6880
  parser_flush_heredoc_end(parser);
6729
6881
  }
@@ -6787,13 +6939,11 @@ parser_lex(yp_parser_t *parser) {
6787
6939
  // and find the next breakpoint.
6788
6940
  if (*breakpoint == '\\') {
6789
6941
  yp_unescape_type_t unescape_type = lex_mode->as.list.interpolation ? YP_UNESCAPE_ALL : YP_UNESCAPE_MINIMAL;
6790
- size_t difference = yp_unescape_calculate_difference(breakpoint, parser->end, unescape_type, false, &parser->error_list);
6942
+ size_t difference = yp_unescape_calculate_difference(parser, breakpoint, unescape_type, false);
6791
6943
 
6792
6944
  // If the result is an escaped newline, then we need to
6793
6945
  // track that newline.
6794
- if (breakpoint[difference - 1] == '\n') {
6795
- yp_newline_list_append(&parser->newline_list, breakpoint + difference - 1);
6796
- }
6946
+ yp_newline_list_check_append(&parser->newline_list, breakpoint + difference - 1);
6797
6947
 
6798
6948
  breakpoint = yp_strpbrk(parser, breakpoint + difference, breakpoints, parser->end - (breakpoint + difference));
6799
6949
  continue;
@@ -6828,7 +6978,13 @@ parser_lex(yp_parser_t *parser) {
6828
6978
 
6829
6979
  case YP_LEX_REGEXP: {
6830
6980
  // First, we'll set to start of this token to be the current end.
6831
- parser->current.start = parser->current.end;
6981
+ if (parser->next_start == NULL) {
6982
+ parser->current.start = parser->current.end;
6983
+ } else {
6984
+ parser->current.start = parser->next_start;
6985
+ parser->current.end = parser->next_start;
6986
+ parser->next_start = NULL;
6987
+ }
6832
6988
 
6833
6989
  // We'll check if we're at the end of the file. If we are, then we need to
6834
6990
  // return the EOF token.
@@ -6855,7 +7011,16 @@ parser_lex(yp_parser_t *parser) {
6855
7011
  // If we've hit a newline, then we need to track that in the
6856
7012
  // list of newlines.
6857
7013
  if (*breakpoint == '\n') {
6858
- yp_newline_list_append(&parser->newline_list, breakpoint);
7014
+ // For the special case of a newline-terminated regular expression, we will pass
7015
+ // through this branch twice -- once with YP_TOKEN_REGEXP_BEGIN and then again
7016
+ // with YP_TOKEN_STRING_CONTENT. Let's avoid tracking the newline twice, by
7017
+ // tracking it only in the REGEXP_BEGIN case.
7018
+ if (
7019
+ !(lex_mode->as.regexp.terminator == '\n' && parser->current.type != YP_TOKEN_REGEXP_BEGIN)
7020
+ && parser->heredoc_end == NULL
7021
+ ) {
7022
+ yp_newline_list_append(&parser->newline_list, breakpoint);
7023
+ }
6859
7024
 
6860
7025
  if (lex_mode->as.regexp.terminator != '\n') {
6861
7026
  // If the terminator is not a newline, then we can set
@@ -6896,12 +7061,20 @@ parser_lex(yp_parser_t *parser) {
6896
7061
  // literally. In this case we'll skip past the next character
6897
7062
  // and find the next breakpoint.
6898
7063
  if (*breakpoint == '\\') {
6899
- size_t difference = yp_unescape_calculate_difference(breakpoint, parser->end, YP_UNESCAPE_ALL, false, &parser->error_list);
6900
-
6901
- // If the result is an escaped newline, then we need to
6902
- // track that newline.
6903
- if (breakpoint[difference - 1] == '\n') {
6904
- yp_newline_list_append(&parser->newline_list, breakpoint + difference - 1);
7064
+ size_t difference = yp_unescape_calculate_difference(parser, breakpoint, YP_UNESCAPE_ALL, false);
7065
+
7066
+ // If the result is an escaped newline ...
7067
+ if (*(breakpoint + difference - 1) == '\n') {
7068
+ if (parser->heredoc_end) {
7069
+ // ... if we are on the same line as a heredoc, flush the heredoc and
7070
+ // continue parsing after heredoc_end.
7071
+ parser->current.end = breakpoint + difference;
7072
+ parser_flush_heredoc_end(parser);
7073
+ LEX(YP_TOKEN_STRING_CONTENT);
7074
+ } else {
7075
+ // ... else track the newline.
7076
+ yp_newline_list_append(&parser->newline_list, breakpoint + difference - 1);
7077
+ }
6905
7078
  }
6906
7079
 
6907
7080
  breakpoint = yp_strpbrk(parser, breakpoint + difference, breakpoints, parser->end - (breakpoint + difference));
@@ -6989,21 +7162,18 @@ parser_lex(yp_parser_t *parser) {
6989
7162
 
6990
7163
  // Otherwise we need to switch back to the parent lex mode and
6991
7164
  // return the end of the string.
6992
- if (*parser->current.end == '\r' && parser->current.end + 1 < parser->end && parser->current.end[1] == '\n') {
6993
- parser->current.end = breakpoint + 2;
6994
- yp_newline_list_append(&parser->newline_list, breakpoint + 1);
7165
+ size_t eol_length = match_eol_at(parser, breakpoint);
7166
+ if (eol_length) {
7167
+ parser->current.end = breakpoint + eol_length;
7168
+ yp_newline_list_append(&parser->newline_list, parser->current.end - 1);
6995
7169
  } else {
6996
- if (*parser->current.end == '\n') {
6997
- yp_newline_list_append(&parser->newline_list, parser->current.end);
6998
- }
6999
-
7000
7170
  parser->current.end = breakpoint + 1;
7001
7171
  }
7002
7172
 
7003
7173
  if (
7004
7174
  parser->lex_modes.current->as.string.label_allowed &&
7005
7175
  (peek(parser) == ':') &&
7006
- (peek_at(parser, 1) != ':')
7176
+ (peek_offset(parser, 1) != ':')
7007
7177
  ) {
7008
7178
  parser->current.end++;
7009
7179
  lex_state_set(parser, YP_LEX_STATE_ARG | YP_LEX_STATE_LABELED);
@@ -7041,12 +7211,20 @@ parser_lex(yp_parser_t *parser) {
7041
7211
  // literally. In this case we'll skip past the next character and
7042
7212
  // find the next breakpoint.
7043
7213
  yp_unescape_type_t unescape_type = parser->lex_modes.current->as.string.interpolation ? YP_UNESCAPE_ALL : YP_UNESCAPE_MINIMAL;
7044
- size_t difference = yp_unescape_calculate_difference(breakpoint, parser->end, unescape_type, false, &parser->error_list);
7045
-
7046
- // If the result is an escaped newline, then we need to
7047
- // track that newline.
7048
- if (breakpoint[difference - 1] == '\n') {
7049
- yp_newline_list_append(&parser->newline_list, breakpoint + difference - 1);
7214
+ size_t difference = yp_unescape_calculate_difference(parser, breakpoint, unescape_type, false);
7215
+
7216
+ // If the result is an escaped newline ...
7217
+ if (*(breakpoint + difference - 1) == '\n') {
7218
+ if (parser->heredoc_end) {
7219
+ // ... if we are on the same line as a heredoc, flush the heredoc and
7220
+ // continue parsing after heredoc_end.
7221
+ parser->current.end = breakpoint + difference;
7222
+ parser_flush_heredoc_end(parser);
7223
+ LEX(YP_TOKEN_STRING_CONTENT);
7224
+ } else {
7225
+ // ... else track the newline.
7226
+ yp_newline_list_append(&parser->newline_list, breakpoint + difference - 1);
7227
+ }
7050
7228
  }
7051
7229
 
7052
7230
  breakpoint = yp_strpbrk(parser, breakpoint + difference, breakpoints, parser->end - (breakpoint + difference));
@@ -7082,6 +7260,7 @@ parser_lex(yp_parser_t *parser) {
7082
7260
  } else {
7083
7261
  parser->current.start = parser->next_start;
7084
7262
  parser->current.end = parser->next_start;
7263
+ parser->heredoc_end = NULL;
7085
7264
  parser->next_start = NULL;
7086
7265
  }
7087
7266
 
@@ -7098,7 +7277,7 @@ parser_lex(yp_parser_t *parser) {
7098
7277
 
7099
7278
  // If we are immediately following a newline and we have hit the
7100
7279
  // terminator, then we need to return the ending of the heredoc.
7101
- if (parser->current.start[-1] == '\n') {
7280
+ if (current_token_starts_line(parser)) {
7102
7281
  const char *start = parser->current.start;
7103
7282
  if (parser->lex_modes.current->as.heredoc.indent != YP_HEREDOC_INDENT_NONE) {
7104
7283
  start += yp_strspn_inline_whitespace(start, parser->end - start);
@@ -7108,12 +7287,10 @@ parser_lex(yp_parser_t *parser) {
7108
7287
  bool matched = true;
7109
7288
  bool at_end = false;
7110
7289
 
7111
- if ((start + ident_length < parser->end) && (start[ident_length] == '\n')) {
7112
- parser->current.end = start + ident_length + 1;
7113
- yp_newline_list_append(&parser->newline_list, start + ident_length);
7114
- } else if ((start + ident_length + 1 < parser->end) && (start[ident_length] == '\r') && (start[ident_length + 1] == '\n')) {
7115
- parser->current.end = start + ident_length + 2;
7116
- yp_newline_list_append(&parser->newline_list, start + ident_length + 1);
7290
+ size_t eol_length = match_eol_at(parser, start + ident_length);
7291
+ if (eol_length) {
7292
+ parser->current.end = start + ident_length + eol_length;
7293
+ yp_newline_list_append(&parser->newline_list, parser->current.end - 1);
7117
7294
  } else if (parser->end == (start + ident_length)) {
7118
7295
  parser->current.end = start + ident_length;
7119
7296
  at_end = true;
@@ -7178,19 +7355,10 @@ parser_lex(yp_parser_t *parser) {
7178
7355
  (start + ident_length <= parser->end) &&
7179
7356
  (strncmp(start, ident_start, ident_length) == 0)
7180
7357
  ) {
7181
- // Heredoc terminators must be followed by a newline or EOF to be valid.
7182
- if (start + ident_length == parser->end || start[ident_length] == '\n') {
7183
- parser->current.end = breakpoint + 1;
7184
- LEX(YP_TOKEN_STRING_CONTENT);
7185
- }
7186
-
7187
- // They can also be followed by a carriage return and then a
7188
- // newline. Be sure here that we don't accidentally read off the
7189
- // end.
7358
+ // Heredoc terminators must be followed by a newline, CRLF, or EOF to be valid.
7190
7359
  if (
7191
- (start + ident_length + 1 < parser->end) &&
7192
- (start[ident_length] == '\r') &&
7193
- (start[ident_length + 1] == '\n')
7360
+ start + ident_length == parser->end ||
7361
+ match_eol_at(parser, start + ident_length)
7194
7362
  ) {
7195
7363
  parser->current.end = breakpoint + 1;
7196
7364
  LEX(YP_TOKEN_STRING_CONTENT);
@@ -7203,21 +7371,24 @@ parser_lex(yp_parser_t *parser) {
7203
7371
  break;
7204
7372
  }
7205
7373
  case '\\': {
7206
- // If we hit escapes, then we need to treat the next token
7207
- // literally. In this case we'll skip past the next character and
7208
- // find the next breakpoint.
7209
- if (breakpoint[1] == '\n') {
7210
- breakpoint++;
7374
+ // If we hit an escape, then we need to skip past
7375
+ // however many characters the escape takes up. However
7376
+ // it's important that if \n or \r\n are escaped that we
7377
+ // stop looping before the newline and not after the
7378
+ // newline so that we can still potentially find the
7379
+ // terminator of the heredoc.
7380
+ size_t eol_length = match_eol_at(parser, breakpoint + 1);
7381
+ if (eol_length) {
7382
+ breakpoint += eol_length;
7211
7383
  } else {
7212
7384
  yp_unescape_type_t unescape_type = (quote == YP_HEREDOC_QUOTE_SINGLE) ? YP_UNESCAPE_MINIMAL : YP_UNESCAPE_ALL;
7213
- size_t difference = yp_unescape_calculate_difference(breakpoint, parser->end, unescape_type, false, &parser->error_list);
7385
+ size_t difference = yp_unescape_calculate_difference(parser, breakpoint, unescape_type, false);
7214
7386
 
7215
- if (breakpoint[difference - 1] == '\n') {
7216
- yp_newline_list_append(&parser->newline_list, breakpoint + difference - 1);
7217
- }
7387
+ yp_newline_list_check_append(&parser->newline_list, breakpoint + difference - 1);
7218
7388
 
7219
7389
  breakpoint = yp_strpbrk(parser, breakpoint + difference, breakpoints, parser->end - (breakpoint + difference));
7220
7390
  }
7391
+
7221
7392
  break;
7222
7393
  }
7223
7394
  case '#': {
@@ -7264,10 +7435,10 @@ static yp_regular_expression_node_t *
7264
7435
  yp_regular_expression_node_create_and_unescape(yp_parser_t *parser, const yp_token_t *opening, const yp_token_t *content, const yp_token_t *closing, yp_unescape_type_t unescape_type) {
7265
7436
  yp_regular_expression_node_t *node = yp_regular_expression_node_create(parser, opening, content, closing);
7266
7437
 
7267
- ptrdiff_t length = content->end - content->start;
7268
- assert(length >= 0);
7438
+ assert((content->end - content->start) >= 0);
7439
+ yp_string_shared_init(&node->unescaped, content->start, content->end);
7269
7440
 
7270
- yp_unescape_manipulate_string(parser, content->start, (size_t) length, &node->unescaped, unescape_type, &parser->error_list);
7441
+ yp_unescape_manipulate_string(parser, &node->unescaped, unescape_type);
7271
7442
  return node;
7272
7443
  }
7273
7444
 
@@ -7275,10 +7446,10 @@ static yp_symbol_node_t *
7275
7446
  yp_symbol_node_create_and_unescape(yp_parser_t *parser, const yp_token_t *opening, const yp_token_t *content, const yp_token_t *closing, yp_unescape_type_t unescape_type) {
7276
7447
  yp_symbol_node_t *node = yp_symbol_node_create(parser, opening, content, closing);
7277
7448
 
7278
- ptrdiff_t length = content->end - content->start;
7279
- assert(length >= 0);
7449
+ assert((content->end - content->start) >= 0);
7450
+ yp_string_shared_init(&node->unescaped, content->start, content->end);
7280
7451
 
7281
- yp_unescape_manipulate_string(parser, content->start, (size_t) length, &node->unescaped, unescape_type, &parser->error_list);
7452
+ yp_unescape_manipulate_string(parser, &node->unescaped, unescape_type);
7282
7453
  return node;
7283
7454
  }
7284
7455
 
@@ -7286,10 +7457,10 @@ static yp_string_node_t *
7286
7457
  yp_string_node_create_and_unescape(yp_parser_t *parser, const yp_token_t *opening, const yp_token_t *content, const yp_token_t *closing, yp_unescape_type_t unescape_type) {
7287
7458
  yp_string_node_t *node = yp_string_node_create(parser, opening, content, closing);
7288
7459
 
7289
- ptrdiff_t length = content->end - content->start;
7290
- assert(length >= 0);
7460
+ assert((content->end - content->start) >= 0);
7461
+ yp_string_shared_init(&node->unescaped, content->start, content->end);
7291
7462
 
7292
- yp_unescape_manipulate_string(parser, content->start, (size_t) length, &node->unescaped, unescape_type, &parser->error_list);
7463
+ yp_unescape_manipulate_string(parser, &node->unescaped, unescape_type);
7293
7464
  return node;
7294
7465
  }
7295
7466
 
@@ -7297,10 +7468,10 @@ static yp_x_string_node_t *
7297
7468
  yp_xstring_node_create_and_unescape(yp_parser_t *parser, const yp_token_t *opening, const yp_token_t *content, const yp_token_t *closing) {
7298
7469
  yp_x_string_node_t *node = yp_xstring_node_create(parser, opening, content, closing);
7299
7470
 
7300
- ptrdiff_t length = content->end - content->start;
7301
- assert(length >= 0);
7471
+ assert((content->end - content->start) >= 0);
7472
+ yp_string_shared_init(&node->unescaped, content->start, content->end);
7302
7473
 
7303
- yp_unescape_manipulate_string(parser, content->start, (size_t) length, &node->unescaped, YP_UNESCAPE_ALL, &parser->error_list);
7474
+ yp_unescape_manipulate_string(parser, &node->unescaped, YP_UNESCAPE_ALL);
7304
7475
  return node;
7305
7476
  }
7306
7477
 
@@ -7652,19 +7823,153 @@ parse_starred_expression(yp_parser_t *parser, yp_binding_power_t binding_power,
7652
7823
 
7653
7824
  // Convert the given node into a valid target node.
7654
7825
  static yp_node_t *
7655
- parse_target(yp_parser_t *parser, yp_node_t *target, yp_token_t *operator, yp_node_t *value) {
7826
+ parse_target(yp_parser_t *parser, yp_node_t *target) {
7656
7827
  switch (YP_NODE_TYPE(target)) {
7657
7828
  case YP_NODE_MISSING_NODE:
7658
7829
  return target;
7659
- case YP_NODE_CLASS_VARIABLE_READ_NODE: {
7660
- yp_class_variable_write_node_t *write_node = yp_class_variable_read_node_to_class_variable_write_node(parser, (yp_class_variable_read_node_t *) target, operator, value);
7661
- yp_node_destroy(parser, target);
7662
- return (yp_node_t *) write_node;
7663
- }
7830
+ case YP_NODE_CLASS_VARIABLE_READ_NODE:
7831
+ assert(sizeof(yp_class_variable_target_node_t) == sizeof(yp_class_variable_read_node_t));
7832
+ target->type = YP_NODE_CLASS_VARIABLE_TARGET_NODE;
7833
+ return target;
7664
7834
  case YP_NODE_CONSTANT_PATH_NODE:
7665
- return (yp_node_t *) yp_constant_path_write_node_create(parser, (yp_constant_path_node_t *) target, operator, value);
7666
- case YP_NODE_CONSTANT_READ_NODE: {
7667
- yp_constant_write_node_t *node = yp_constant_write_node_create(parser, &target->location, operator, value);
7835
+ assert(sizeof(yp_constant_path_target_node_t) == sizeof(yp_constant_path_node_t));
7836
+ target->type = YP_NODE_CONSTANT_PATH_TARGET_NODE;
7837
+ return target;
7838
+ case YP_NODE_CONSTANT_READ_NODE:
7839
+ assert(sizeof(yp_constant_target_node_t) == sizeof(yp_constant_read_node_t));
7840
+ target->type = YP_NODE_CONSTANT_TARGET_NODE;
7841
+ return target;
7842
+ case YP_NODE_BACK_REFERENCE_READ_NODE:
7843
+ assert(sizeof(yp_global_variable_target_node_t) == sizeof(yp_back_reference_read_node_t));
7844
+ /* fallthrough */
7845
+ case YP_NODE_NUMBERED_REFERENCE_READ_NODE:
7846
+ assert(sizeof(yp_global_variable_target_node_t) == sizeof(yp_numbered_reference_read_node_t));
7847
+ yp_diagnostic_list_append(&parser->error_list, target->location.start, target->location.end, "Can't set variable");
7848
+ /* fallthrough */
7849
+ case YP_NODE_GLOBAL_VARIABLE_READ_NODE:
7850
+ assert(sizeof(yp_global_variable_target_node_t) == sizeof(yp_global_variable_read_node_t));
7851
+ target->type = YP_NODE_GLOBAL_VARIABLE_TARGET_NODE;
7852
+ return target;
7853
+ case YP_NODE_LOCAL_VARIABLE_READ_NODE:
7854
+ assert(sizeof(yp_local_variable_target_node_t) == sizeof(yp_local_variable_read_node_t));
7855
+ target->type = YP_NODE_LOCAL_VARIABLE_TARGET_NODE;
7856
+ return target;
7857
+ case YP_NODE_INSTANCE_VARIABLE_READ_NODE:
7858
+ assert(sizeof(yp_instance_variable_target_node_t) == sizeof(yp_instance_variable_read_node_t));
7859
+ target->type = YP_NODE_INSTANCE_VARIABLE_TARGET_NODE;
7860
+ return target;
7861
+ case YP_NODE_MULTI_WRITE_NODE:
7862
+ return target;
7863
+ case YP_NODE_SPLAT_NODE: {
7864
+ yp_splat_node_t *splat = (yp_splat_node_t *) target;
7865
+
7866
+ if (splat->expression != NULL) {
7867
+ splat->expression = parse_target(parser, splat->expression);
7868
+ }
7869
+
7870
+ yp_token_t operator = not_provided(parser);
7871
+ yp_location_t location = { .start = NULL, .end = NULL };
7872
+
7873
+ yp_multi_write_node_t *multi_write = yp_multi_write_node_create(parser, &operator, NULL, &location, &location);
7874
+ yp_multi_write_node_targets_append(multi_write, (yp_node_t *) splat);
7875
+
7876
+ return (yp_node_t *) multi_write;
7877
+ }
7878
+ case YP_NODE_CALL_NODE: {
7879
+ yp_call_node_t *call = (yp_call_node_t *) target;
7880
+
7881
+ // If we have no arguments to the call node and we need this to be a
7882
+ // target then this is either a method call or a local variable write.
7883
+ if (
7884
+ (call->opening_loc.start == NULL) &&
7885
+ (call->arguments == NULL) &&
7886
+ (call->block == NULL)
7887
+ ) {
7888
+ if (call->receiver == NULL) {
7889
+ // When we get here, we have a local variable write, because it
7890
+ // was previously marked as a method call but now we have an =.
7891
+ // This looks like:
7892
+ //
7893
+ // foo = 1
7894
+ //
7895
+ // When it was parsed in the prefix position, foo was seen as a
7896
+ // method call with no receiver and no arguments. Now we have an
7897
+ // =, so we know it's a local variable write.
7898
+ const yp_location_t message = call->message_loc;
7899
+
7900
+ yp_parser_local_add_location(parser, message.start, message.end);
7901
+ yp_node_destroy(parser, target);
7902
+
7903
+ const yp_token_t name = { .type = YP_TOKEN_IDENTIFIER, .start = message.start, .end = message.end };
7904
+ target = (yp_node_t *) yp_local_variable_read_node_create(parser, &name, 0);
7905
+
7906
+ assert(sizeof(yp_local_variable_target_node_t) == sizeof(yp_local_variable_read_node_t));
7907
+ target->type = YP_NODE_LOCAL_VARIABLE_TARGET_NODE;
7908
+
7909
+ if (token_is_numbered_parameter(message.start, message.end)) {
7910
+ yp_diagnostic_list_append(&parser->error_list, message.start, message.end, "reserved for numbered parameter");
7911
+ }
7912
+
7913
+ return target;
7914
+ }
7915
+
7916
+ // The method name needs to change. If we previously had foo, we now
7917
+ // need foo=. In this case we'll allocate a new owned string, copy
7918
+ // the previous method name in, and append an =.
7919
+ size_t length = yp_string_length(&call->name);
7920
+
7921
+ char *name = calloc(length + 2, sizeof(char));
7922
+ if (name == NULL) return NULL;
7923
+
7924
+ snprintf(name, length + 2, "%.*s=", (int) length, yp_string_source(&call->name));
7925
+
7926
+ // Now switch the name to the new string.
7927
+ yp_string_free(&call->name);
7928
+ yp_string_owned_init(&call->name, name, length + 1);
7929
+
7930
+ return target;
7931
+ }
7932
+
7933
+ // If there is no call operator and the message is "[]" then this is
7934
+ // an aref expression, and we can transform it into an aset
7935
+ // expression.
7936
+ if (
7937
+ (call->operator_loc.start == NULL) &&
7938
+ (call->message_loc.start[0] == '[') &&
7939
+ (call->message_loc.end[-1] == ']') &&
7940
+ (call->block == NULL)
7941
+ ) {
7942
+ // Free the previous name and replace it with "[]=".
7943
+ yp_string_free(&call->name);
7944
+ yp_string_constant_init(&call->name, "[]=", 3);
7945
+ return target;
7946
+ }
7947
+ }
7948
+ /* fallthrough */
7949
+ default:
7950
+ // In this case we have a node that we don't know how to convert
7951
+ // into a target. We need to treat it as an error. For now, we'll
7952
+ // mark it as an error and just skip right past it.
7953
+ yp_diagnostic_list_append(&parser->error_list, target->location.start, target->location.end, "Unexpected write target.");
7954
+ return target;
7955
+ }
7956
+ }
7957
+
7958
+ // Convert the given node into a valid write node.
7959
+ static yp_node_t *
7960
+ parse_write(yp_parser_t *parser, yp_node_t *target, yp_token_t *operator, yp_node_t *value) {
7961
+ switch (YP_NODE_TYPE(target)) {
7962
+ case YP_NODE_MISSING_NODE:
7963
+ return target;
7964
+ case YP_NODE_CLASS_VARIABLE_READ_NODE: {
7965
+ yp_class_variable_write_node_t *write_node = yp_class_variable_read_node_to_class_variable_write_node(parser, (yp_class_variable_read_node_t *) target, operator, value);
7966
+ yp_node_destroy(parser, target);
7967
+ return (yp_node_t *) write_node;
7968
+ }
7969
+ case YP_NODE_CONSTANT_PATH_NODE:
7970
+ return (yp_node_t *) yp_constant_path_write_node_create(parser, (yp_constant_path_node_t *) target, operator, value);
7971
+ case YP_NODE_CONSTANT_READ_NODE: {
7972
+ yp_constant_write_node_t *node = yp_constant_write_node_create(parser, &target->location, operator, value);
7668
7973
  yp_node_destroy(parser, target);
7669
7974
 
7670
7975
  return (yp_node_t *) node;
@@ -7699,18 +8004,15 @@ parse_target(yp_parser_t *parser, yp_node_t *target, yp_token_t *operator, yp_no
7699
8004
  yp_multi_write_node_t *multi_write = (yp_multi_write_node_t *) target;
7700
8005
  yp_multi_write_node_operator_loc_set(multi_write, operator);
7701
8006
 
7702
- if (value != NULL) {
7703
- multi_write->value = value;
7704
- multi_write->base.location.end = value->location.end;
7705
- }
7706
-
8007
+ multi_write->value = value;
8008
+ multi_write->base.location.end = value->location.end;
7707
8009
  return (yp_node_t *) multi_write;
7708
8010
  }
7709
8011
  case YP_NODE_SPLAT_NODE: {
7710
8012
  yp_splat_node_t *splat = (yp_splat_node_t *) target;
7711
8013
 
7712
8014
  if (splat->expression != NULL) {
7713
- splat->expression = parse_target(parser, splat->expression, operator, value);
8015
+ splat->expression = parse_write(parser, splat->expression, operator, value);
7714
8016
  }
7715
8017
 
7716
8018
  yp_location_t location = { .start = NULL, .end = NULL };
@@ -7763,12 +8065,10 @@ parse_target(yp_parser_t *parser, yp_node_t *target, yp_token_t *operator, yp_no
7763
8065
  // method call with no arguments. Now we have an =, so we know it's
7764
8066
  // a method call with an argument. In this case we will create the
7765
8067
  // arguments node, parse the argument, and add it to the list.
7766
- if (value) {
7767
- yp_arguments_node_t *arguments = yp_arguments_node_create(parser);
7768
- call->arguments = arguments;
7769
- yp_arguments_node_arguments_append(arguments, value);
7770
- target->location.end = arguments->base.location.end;
7771
- }
8068
+ yp_arguments_node_t *arguments = yp_arguments_node_create(parser);
8069
+ call->arguments = arguments;
8070
+ yp_arguments_node_arguments_append(arguments, value);
8071
+ target->location.end = arguments->base.location.end;
7772
8072
 
7773
8073
  // The method name needs to change. If we previously had foo, we now
7774
8074
  // need foo=. In this case we'll allocate a new owned string, copy
@@ -7796,15 +8096,13 @@ parse_target(yp_parser_t *parser, yp_node_t *target, yp_token_t *operator, yp_no
7796
8096
  (call->message_loc.end[-1] == ']') &&
7797
8097
  (call->block == NULL)
7798
8098
  ) {
7799
- if (value != NULL) {
7800
- if (call->arguments == NULL) {
7801
- call->arguments = yp_arguments_node_create(parser);
7802
- }
7803
-
7804
- yp_arguments_node_arguments_append(call->arguments, value);
7805
- target->location.end = value->location.end;
8099
+ if (call->arguments == NULL) {
8100
+ call->arguments = yp_arguments_node_create(parser);
7806
8101
  }
7807
8102
 
8103
+ yp_arguments_node_arguments_append(call->arguments, value);
8104
+ target->location.end = value->location.end;
8105
+
7808
8106
  // Free the previous name and replace it with "[]=".
7809
8107
  yp_string_free(&call->name);
7810
8108
  yp_string_constant_init(&call->name, "[]=", 3);
@@ -7816,9 +8114,7 @@ parse_target(yp_parser_t *parser, yp_node_t *target, yp_token_t *operator, yp_no
7816
8114
  // syntax error. In this case we'll fall through to our default
7817
8115
  // handling. We need to free the value that we parsed because there
7818
8116
  // is no way for us to attach it to the tree at this point.
7819
- if (value != NULL) {
7820
- yp_node_destroy(parser, value);
7821
- }
8117
+ yp_node_destroy(parser, value);
7822
8118
  }
7823
8119
  /* fallthrough */
7824
8120
  default:
@@ -7846,7 +8142,7 @@ parse_targets(yp_parser_t *parser, yp_node_t *first_target, yp_binding_power_t b
7846
8142
  // location that we know requires a multi write, as in the case of a for loop.
7847
8143
  // In this case we will set up the parsing loop slightly differently.
7848
8144
  if (first_target != NULL) {
7849
- first_target = parse_target(parser, first_target, &operator, NULL);
8145
+ first_target = parse_target(parser, first_target);
7850
8146
 
7851
8147
  if (!match_type_p(parser, YP_TOKEN_COMMA)) {
7852
8148
  return first_target;
@@ -7877,9 +8173,8 @@ parse_targets(yp_parser_t *parser, yp_node_t *first_target, yp_binding_power_t b
7877
8173
  yp_node_t *name = NULL;
7878
8174
 
7879
8175
  if (token_begins_expression_p(parser->current.type)) {
7880
- yp_token_t operator = not_provided(parser);
7881
8176
  name = parse_expression(parser, binding_power, "Expected an expression after '*'.");
7882
- name = parse_target(parser, name, &operator, NULL);
8177
+ name = parse_target(parser, name);
7883
8178
  }
7884
8179
 
7885
8180
  yp_node_t *splat = (yp_node_t *) yp_splat_node_create(parser, &star_operator, name);
@@ -7909,6 +8204,8 @@ parse_targets(yp_parser_t *parser, yp_node_t *first_target, yp_binding_power_t b
7909
8204
 
7910
8205
  if (YP_NODE_TYPE_P(child_target, YP_NODE_MULTI_WRITE_NODE)) {
7911
8206
  target = (yp_multi_write_node_t *) child_target;
8207
+ target->base.location.start = lparen.start;
8208
+ target->base.location.end = rparen.end;
7912
8209
  target->lparen_loc = (yp_location_t) { .start = lparen.start, .end = lparen.end };
7913
8210
  target->rparen_loc = (yp_location_t) { .start = rparen.start, .end = rparen.end };
7914
8211
  } else {
@@ -7925,6 +8222,7 @@ parse_targets(yp_parser_t *parser, yp_node_t *first_target, yp_binding_power_t b
7925
8222
  yp_multi_write_node_targets_append(target, child_target);
7926
8223
  }
7927
8224
 
8225
+ target->base.location.start = lparen.start;
7928
8226
  target->base.location.end = rparen.end;
7929
8227
  yp_multi_write_node_targets_append(result, (yp_node_t *) target);
7930
8228
  }
@@ -7947,7 +8245,7 @@ parse_targets(yp_parser_t *parser, yp_node_t *first_target, yp_binding_power_t b
7947
8245
  }
7948
8246
 
7949
8247
  yp_node_t *target = parse_expression(parser, binding_power, "Expected another expression after ','.");
7950
- target = parse_target(parser, target, &operator, NULL);
8248
+ target = parse_target(parser, target);
7951
8249
 
7952
8250
  yp_multi_write_node_targets_append(result, target);
7953
8251
  }
@@ -8407,7 +8705,6 @@ parse_parameters(
8407
8705
  bool looping = true;
8408
8706
 
8409
8707
  yp_do_loop_stack_push(parser, false);
8410
-
8411
8708
  yp_parameters_order_t order = YP_PARAMETERS_ORDER_NONE;
8412
8709
 
8413
8710
  do {
@@ -8699,8 +8996,7 @@ parse_rescues(yp_parser_t *parser, yp_begin_node_t *parent_node) {
8699
8996
  yp_rescue_node_operator_set(rescue, &parser->previous);
8700
8997
 
8701
8998
  yp_node_t *reference = parse_expression(parser, YP_BINDING_POWER_INDEX, "Expected an exception variable after `=>` in rescue statement.");
8702
- yp_token_t operator = not_provided(parser);
8703
- reference = parse_target(parser, reference, &operator, NULL);
8999
+ reference = parse_target(parser, reference);
8704
9000
 
8705
9001
  yp_rescue_node_reference_set(rescue, reference);
8706
9002
  break;
@@ -8730,8 +9026,7 @@ parse_rescues(yp_parser_t *parser, yp_begin_node_t *parent_node) {
8730
9026
  yp_rescue_node_operator_set(rescue, &parser->previous);
8731
9027
 
8732
9028
  yp_node_t *reference = parse_expression(parser, YP_BINDING_POWER_INDEX, "Expected an exception variable after `=>` in rescue statement.");
8733
- yp_token_t operator = not_provided(parser);
8734
- reference = parse_target(parser, reference, &operator, NULL);
9029
+ reference = parse_target(parser, reference);
8735
9030
 
8736
9031
  yp_rescue_node_reference_set(rescue, reference);
8737
9032
  break;
@@ -8999,7 +9294,7 @@ parse_conditional(yp_parser_t *parser, yp_context_t context) {
8999
9294
  }
9000
9295
 
9001
9296
  yp_token_t end_keyword = not_provided(parser);
9002
- yp_node_t *parent;
9297
+ yp_node_t *parent = NULL;
9003
9298
 
9004
9299
  switch (context) {
9005
9300
  case YP_CONTEXT_IF:
@@ -9009,7 +9304,6 @@ parse_conditional(yp_parser_t *parser, yp_context_t context) {
9009
9304
  parent = (yp_node_t *) yp_unless_node_create(parser, &keyword, predicate, statements);
9010
9305
  break;
9011
9306
  default:
9012
- parent = NULL;
9013
9307
  assert(false && "unreachable");
9014
9308
  break;
9015
9309
  }
@@ -9055,50 +9349,49 @@ parse_conditional(yp_parser_t *parser, yp_context_t context) {
9055
9349
  switch (context) {
9056
9350
  case YP_CONTEXT_IF:
9057
9351
  ((yp_if_node_t *) current)->consequent = (yp_node_t *) else_node;
9058
- // Recurse down if nodes setting the appropriate end location in
9059
- // all cases.
9060
- yp_node_t *recursing_node = parent;
9061
- bool recursing = true;
9062
-
9063
- while (recursing) {
9064
- switch (YP_NODE_TYPE(recursing_node)) {
9065
- case YP_NODE_IF_NODE:
9066
- yp_if_node_end_keyword_loc_set((yp_if_node_t *) recursing_node, &parser->previous);
9067
- recursing_node = ((yp_if_node_t *) recursing_node)->consequent;
9068
- break;
9069
- case YP_NODE_ELSE_NODE:
9070
- yp_else_node_end_keyword_loc_set((yp_else_node_t *) recursing_node, &parser->previous);
9071
- recursing = false;
9072
- break;
9073
- default: {
9074
- recursing = false;
9075
- break;
9076
- }
9077
- }
9078
- }
9079
9352
  break;
9080
9353
  case YP_CONTEXT_UNLESS:
9081
9354
  ((yp_unless_node_t *) parent)->consequent = else_node;
9082
- yp_unless_node_end_keyword_loc_set((yp_unless_node_t *) parent, &parser->previous);
9083
9355
  break;
9084
9356
  default:
9085
9357
  assert(false && "unreachable");
9086
9358
  break;
9087
9359
  }
9088
9360
  } else {
9089
- expect(parser, YP_TOKEN_KEYWORD_END, "Expected `end` to close `if` statement.");
9361
+ expect(parser, YP_TOKEN_KEYWORD_END, "Expected `end` to close conditional statement.");
9362
+ }
9090
9363
 
9091
- switch (context) {
9092
- case YP_CONTEXT_IF:
9093
- yp_if_node_end_keyword_loc_set((yp_if_node_t *) parent, &parser->previous);
9094
- break;
9095
- case YP_CONTEXT_UNLESS:
9096
- yp_unless_node_end_keyword_loc_set((yp_unless_node_t *) parent, &parser->previous);
9097
- break;
9098
- default:
9099
- assert(false && "unreachable");
9100
- break;
9364
+ // Set the appropriate end location for all of the nodes in the subtree.
9365
+ switch (context) {
9366
+ case YP_CONTEXT_IF: {
9367
+ yp_node_t *current = parent;
9368
+ bool recursing = true;
9369
+
9370
+ while (recursing) {
9371
+ switch (YP_NODE_TYPE(current)) {
9372
+ case YP_NODE_IF_NODE:
9373
+ yp_if_node_end_keyword_loc_set((yp_if_node_t *) current, &parser->previous);
9374
+ current = ((yp_if_node_t *) current)->consequent;
9375
+ recursing = current != NULL;
9376
+ break;
9377
+ case YP_NODE_ELSE_NODE:
9378
+ yp_else_node_end_keyword_loc_set((yp_else_node_t *) current, &parser->previous);
9379
+ recursing = false;
9380
+ break;
9381
+ default: {
9382
+ recursing = false;
9383
+ break;
9384
+ }
9385
+ }
9386
+ }
9387
+ break;
9101
9388
  }
9389
+ case YP_CONTEXT_UNLESS:
9390
+ yp_unless_node_end_keyword_loc_set((yp_unless_node_t *) parent, &parser->previous);
9391
+ break;
9392
+ default:
9393
+ assert(false && "unreachable");
9394
+ break;
9102
9395
  }
9103
9396
 
9104
9397
  return parent;
@@ -9172,7 +9465,12 @@ parse_string_part(yp_parser_t *parser) {
9172
9465
  yp_unescape_type_t unescape_type = YP_UNESCAPE_ALL;
9173
9466
 
9174
9467
  if (parser->lex_modes.current->mode == YP_LEX_HEREDOC) {
9175
- if (parser->lex_modes.current->as.heredoc.quote == YP_HEREDOC_QUOTE_SINGLE) {
9468
+ if (parser->lex_modes.current->as.heredoc.indent == YP_HEREDOC_INDENT_TILDE) {
9469
+ // If we're in a tilde heredoc, we want to unescape it later
9470
+ // because we don't want unescaped newlines to disappear
9471
+ // before we handle them in the dedent.
9472
+ unescape_type = YP_UNESCAPE_NONE;
9473
+ } else if (parser->lex_modes.current->as.heredoc.quote == YP_HEREDOC_QUOTE_SINGLE) {
9176
9474
  unescape_type = YP_UNESCAPE_MINIMAL;
9177
9475
  }
9178
9476
  }
@@ -9280,14 +9578,10 @@ parse_string_part(yp_parser_t *parser) {
9280
9578
 
9281
9579
  static yp_node_t *
9282
9580
  parse_symbol(yp_parser_t *parser, yp_lex_mode_t *lex_mode, yp_lex_state_t next_state) {
9283
- bool lex_string = lex_mode->mode == YP_LEX_STRING;
9284
- bool can_be_interpolated = lex_string && lex_mode->as.string.interpolation;
9285
9581
  yp_token_t opening = parser->previous;
9286
9582
 
9287
- if (!lex_string) {
9288
- if (next_state != YP_LEX_STATE_NONE) {
9289
- lex_state_set(parser, next_state);
9290
- }
9583
+ if (lex_mode->mode != YP_LEX_STRING) {
9584
+ if (next_state != YP_LEX_STATE_NONE) lex_state_set(parser, next_state);
9291
9585
  yp_token_t symbol;
9292
9586
 
9293
9587
  switch (parser->current.type) {
@@ -9317,37 +9611,44 @@ parse_symbol(yp_parser_t *parser, yp_lex_mode_t *lex_mode, yp_lex_state_t next_s
9317
9611
  return (yp_node_t *) yp_symbol_node_create_and_unescape(parser, &opening, &symbol, &closing, YP_UNESCAPE_ALL);
9318
9612
  }
9319
9613
 
9320
- if (can_be_interpolated) {
9321
- // Create a node_list first. We'll use this to check if it should be an InterpolatedSymbolNode
9322
- // or a SymbolNode
9614
+ if (lex_mode->as.string.interpolation) {
9615
+ // If we have the end of the symbol, then we can return an empty symbol.
9616
+ if (match_type_p(parser, YP_TOKEN_STRING_END)) {
9617
+ if (next_state != YP_LEX_STATE_NONE) lex_state_set(parser, next_state);
9618
+ parser_lex(parser);
9619
+
9620
+ yp_token_t content = not_provided(parser);
9621
+ yp_token_t closing = parser->previous;
9622
+ return (yp_node_t *) yp_symbol_node_create_and_unescape(parser, &opening, &content, &closing, YP_UNESCAPE_NONE);
9623
+ }
9624
+
9625
+ // Now we can parse the first part of the symbol.
9626
+ yp_node_t *part = parse_string_part(parser);
9627
+
9628
+ // If we got a string part, then it's possible that we could transform
9629
+ // what looks like an interpolated symbol into a regular symbol.
9630
+ if (part && YP_NODE_TYPE_P(part, YP_NODE_STRING_NODE) && match_any_type_p(parser, 2, YP_TOKEN_STRING_END, YP_TOKEN_EOF)) {
9631
+ if (next_state != YP_LEX_STATE_NONE) lex_state_set(parser, next_state);
9632
+ parser_lex(parser);
9633
+
9634
+ return (yp_node_t *) yp_string_node_to_symbol_node(parser, (yp_string_node_t *) part, &opening, &parser->previous);
9635
+ }
9636
+
9637
+ // Create a node_list first. We'll use this to check if it should be an
9638
+ // InterpolatedSymbolNode or a SymbolNode.
9323
9639
  yp_node_list_t node_list = YP_EMPTY_NODE_LIST;
9640
+ if (part) yp_node_list_append(&node_list, part);
9324
9641
 
9325
9642
  while (!match_any_type_p(parser, 2, YP_TOKEN_STRING_END, YP_TOKEN_EOF)) {
9326
- yp_node_t *part = parse_string_part(parser);
9327
- if (part != NULL) {
9643
+ if ((part = parse_string_part(parser)) != NULL) {
9328
9644
  yp_node_list_append(&node_list, part);
9329
9645
  }
9330
9646
  }
9331
9647
 
9332
- yp_node_t *res;
9333
- // If the only element on the node_list is a StringNode, we know this is a SymbolNode
9334
- // and not an InterpolatedSymbolNode
9335
- if (node_list.size == 1 && YP_NODE_TYPE_P(node_list.nodes[0], YP_NODE_STRING_NODE)) {
9336
- res = (yp_node_t *)yp_string_node_to_symbol_node(parser, (yp_string_node_t *)node_list.nodes[0]);
9337
- free(node_list.nodes);
9338
- }
9339
- else {
9340
- yp_interpolated_symbol_node_t *interpolated = yp_interpolated_symbol_node_create(parser, &opening, &node_list, &opening);
9341
- yp_interpolated_symbol_node_closing_set(interpolated, &parser->current);
9342
- res = (yp_node_t *) interpolated;
9343
- }
9344
-
9345
- if (next_state != YP_LEX_STATE_NONE) {
9346
- lex_state_set(parser, next_state);
9347
- }
9648
+ if (next_state != YP_LEX_STATE_NONE) lex_state_set(parser, next_state);
9348
9649
  expect(parser, YP_TOKEN_STRING_END, "Expected a closing delimiter for an interpolated symbol.");
9349
9650
 
9350
- return res;
9651
+ return (yp_node_t *) yp_interpolated_symbol_node_create(parser, &opening, &node_list, &parser->previous);
9351
9652
  }
9352
9653
 
9353
9654
  yp_token_t content;
@@ -9491,9 +9792,12 @@ parse_heredoc_common_whitespace(yp_parser_t *parser, yp_node_list_t *nodes) {
9491
9792
  const char *cur_char = content_loc->start;
9492
9793
 
9493
9794
  while (cur_char && cur_char < content_loc->end) {
9494
- // Any empty newlines aren't included in the minimum whitespace calculation
9495
- while (cur_char < content_loc->end && *cur_char == '\n') cur_char++;
9496
- while (cur_char + 1 < content_loc->end && *cur_char == '\r' && cur_char[1] == '\n') cur_char += 2;
9795
+ // Any empty newlines aren't included in the minimum whitespace
9796
+ // calculation.
9797
+ size_t eol_length;
9798
+ while ((eol_length = match_eol_at(parser, cur_char))) {
9799
+ cur_char += eol_length;
9800
+ }
9497
9801
 
9498
9802
  if (cur_char == content_loc->end) break;
9499
9803
 
@@ -9508,11 +9812,12 @@ parse_heredoc_common_whitespace(yp_parser_t *parser, yp_node_list_t *nodes) {
9508
9812
  cur_char++;
9509
9813
  }
9510
9814
 
9511
- // If we hit a newline, then we have encountered a line that contains
9512
- // only whitespace, and it shouldn't be considered in the calculation of
9513
- // common leading whitespace.
9514
- if (*cur_char == '\n') {
9515
- cur_char++;
9815
+ // If we hit a newline, then we have encountered a line that
9816
+ // contains only whitespace, and it shouldn't be considered in
9817
+ // the calculation of common leading whitespace.
9818
+ eol_length = match_eol_at(parser, cur_char);
9819
+ if (eol_length) {
9820
+ cur_char += eol_length;
9516
9821
  continue;
9517
9822
  }
9518
9823
 
@@ -9545,14 +9850,30 @@ parse_heredoc_dedent(yp_parser_t *parser, yp_node_t *node, yp_heredoc_quote_t qu
9545
9850
  int common_whitespace;
9546
9851
  if ((common_whitespace = parse_heredoc_common_whitespace(parser, nodes)) <= 0) return;
9547
9852
 
9548
- // Iterate over all nodes, and trim whitespace accordingly.
9549
- for (size_t index = 0; index < nodes->size; index++) {
9550
- yp_node_t *node = nodes->nodes[index];
9551
- if (!YP_NODE_TYPE_P(node, YP_NODE_STRING_NODE)) continue;
9853
+ // The next node should be dedented if it's the first node in the list or if
9854
+ // if follows a string node.
9855
+ bool dedent_next = true;
9856
+
9857
+ // Iterate over all nodes, and trim whitespace accordingly. We're going to
9858
+ // keep around two indices: a read and a write. If we end up trimming all of
9859
+ // the whitespace from a node, then we'll drop it from the list entirely.
9860
+ size_t write_index = 0;
9861
+
9862
+ for (size_t read_index = 0; read_index < nodes->size; read_index++) {
9863
+ yp_node_t *node = nodes->nodes[read_index];
9864
+
9865
+ // We're not manipulating child nodes that aren't strings. In this case
9866
+ // we'll skip past it and indicate that the subsequent node should not
9867
+ // be dedented.
9868
+ if (!YP_NODE_TYPE_P(node, YP_NODE_STRING_NODE)) {
9869
+ nodes->nodes[write_index++] = node;
9870
+ dedent_next = false;
9871
+ continue;
9872
+ }
9552
9873
 
9553
9874
  // Get a reference to the string struct that is being held by the string
9554
9875
  // node. This is the value we're going to actual manipulate.
9555
- yp_string_t *string = &((yp_string_node_t *) node)->unescaped;
9876
+ yp_string_t *string = &(((yp_string_node_t *) node)->unescaped);
9556
9877
  yp_string_ensure_owned(string);
9557
9878
 
9558
9879
  // Now get the bounds of the existing string. We'll use this as a
@@ -9568,7 +9889,6 @@ parse_heredoc_dedent(yp_parser_t *parser, yp_node_t *node, yp_heredoc_quote_t qu
9568
9889
  // whitespace, so we'll maintain a pointer to the current position in the
9569
9890
  // string that we're writing to.
9570
9891
  char *dest_cursor = source_start;
9571
- bool dedent_next = (index == 0) || YP_NODE_TYPE_P(nodes->nodes[index - 1], YP_NODE_STRING_NODE);
9572
9892
 
9573
9893
  while (source_cursor < source_end) {
9574
9894
  // If we need to dedent the next element within the heredoc or the next
@@ -9613,8 +9933,20 @@ parse_heredoc_dedent(yp_parser_t *parser, yp_node_t *node, yp_heredoc_quote_t qu
9613
9933
  dedent_next = true;
9614
9934
  }
9615
9935
 
9616
- string->length = dest_length;
9936
+ // We only want to write this node into the list if it has any content.
9937
+ if (dest_length == 0) {
9938
+ yp_node_destroy(parser, node);
9939
+ } else {
9940
+ string->length = dest_length;
9941
+ yp_unescape_manipulate_string(parser, string, (quote == YP_HEREDOC_QUOTE_SINGLE) ? YP_UNESCAPE_MINIMAL : YP_UNESCAPE_ALL);
9942
+ nodes->nodes[write_index++] = node;
9943
+ }
9944
+
9945
+ // We always dedent the next node if it follows a string node.
9946
+ dedent_next = true;
9617
9947
  }
9948
+
9949
+ nodes->size = write_index;
9618
9950
  }
9619
9951
 
9620
9952
  static yp_node_t *
@@ -10363,10 +10695,8 @@ parse_expression_prefix(yp_parser_t *parser, yp_binding_power_t binding_power) {
10363
10695
  }
10364
10696
  case YP_TOKEN_PARENTHESIS_LEFT:
10365
10697
  case YP_TOKEN_PARENTHESIS_LEFT_PARENTHESES: {
10366
- yp_token_type_t current_token_type = parser->current.type;
10698
+ yp_token_t opening = parser->current;
10367
10699
  parser_lex(parser);
10368
-
10369
- yp_token_t opening = parser->previous;
10370
10700
  while (accept_any(parser, 2, YP_TOKEN_SEMICOLON, YP_TOKEN_NEWLINE));
10371
10701
 
10372
10702
  // If this is the end of the file or we match a right parenthesis, then
@@ -10385,7 +10715,7 @@ parse_expression_prefix(yp_parser_t *parser, yp_binding_power_t binding_power) {
10385
10715
  // If we hit a right parenthesis, then we're done parsing the parentheses
10386
10716
  // node, and we can check which kind of node we should return.
10387
10717
  if (match_type_p(parser, YP_TOKEN_PARENTHESIS_RIGHT)) {
10388
- if (current_token_type == YP_TOKEN_PARENTHESIS_LEFT_PARENTHESES) {
10718
+ if (opening.type == YP_TOKEN_PARENTHESIS_LEFT_PARENTHESES) {
10389
10719
  lex_state_set(parser, YP_LEX_STATE_ENDARG);
10390
10720
  }
10391
10721
  parser_lex(parser);
@@ -10403,6 +10733,8 @@ parse_expression_prefix(yp_parser_t *parser, yp_binding_power_t binding_power) {
10403
10733
 
10404
10734
  if (multi_statement->lparen_loc.start == NULL) {
10405
10735
  multi_write = (yp_multi_write_node_t *) statement;
10736
+ multi_write->base.location.start = lparen_loc.start;
10737
+ multi_write->base.location.end = rparen_loc.end;
10406
10738
  multi_write->lparen_loc = lparen_loc;
10407
10739
  multi_write->rparen_loc = rparen_loc;
10408
10740
  } else {
@@ -10505,7 +10837,7 @@ parse_expression_prefix(yp_parser_t *parser, yp_binding_power_t binding_power) {
10505
10837
  // fact a method call, not a constant read.
10506
10838
  if (
10507
10839
  match_type_p(parser, YP_TOKEN_PARENTHESIS_LEFT) ||
10508
- (binding_power <= YP_BINDING_POWER_ASSIGNMENT && (token_begins_expression_p(parser->current.type) || match_any_type_p(parser, 2, YP_TOKEN_USTAR, YP_TOKEN_USTAR_STAR))) ||
10840
+ (binding_power <= YP_BINDING_POWER_ASSIGNMENT && (token_begins_expression_p(parser->current.type) || match_any_type_p(parser, 3, YP_TOKEN_UAMPERSAND, YP_TOKEN_USTAR, YP_TOKEN_USTAR_STAR))) ||
10509
10841
  (yp_accepts_block_stack_p(parser) && match_any_type_p(parser, 2, YP_TOKEN_KEYWORD_DO, YP_TOKEN_BRACE_LEFT))
10510
10842
  ) {
10511
10843
  yp_arguments_t arguments = YP_EMPTY_ARGUMENTS;
@@ -10628,7 +10960,7 @@ parse_expression_prefix(yp_parser_t *parser, yp_binding_power_t binding_power) {
10628
10960
  // can still be a method call if it is followed by arguments or
10629
10961
  // a block, so we need to check for that here.
10630
10962
  if (
10631
- (binding_power <= YP_BINDING_POWER_ASSIGNMENT && (token_begins_expression_p(parser->current.type) || match_any_type_p(parser, 2, YP_TOKEN_USTAR, YP_TOKEN_USTAR_STAR))) ||
10963
+ (binding_power <= YP_BINDING_POWER_ASSIGNMENT && (token_begins_expression_p(parser->current.type) || match_any_type_p(parser, 3, YP_TOKEN_UAMPERSAND, YP_TOKEN_USTAR, YP_TOKEN_USTAR_STAR))) ||
10632
10964
  (yp_accepts_block_stack_p(parser) && match_any_type_p(parser, 2, YP_TOKEN_KEYWORD_DO, YP_TOKEN_BRACE_LEFT))
10633
10965
  ) {
10634
10966
  yp_arguments_t arguments = YP_EMPTY_ARGUMENTS;
@@ -10673,12 +11005,15 @@ parse_expression_prefix(yp_parser_t *parser, yp_binding_power_t binding_power) {
10673
11005
 
10674
11006
  lex_state_set(parser, YP_LEX_STATE_END);
10675
11007
  expect(parser, YP_TOKEN_HEREDOC_END, "Expected a closing delimiter for heredoc.");
11008
+
10676
11009
  if (quote == YP_HEREDOC_QUOTE_BACKTICK) {
10677
11010
  assert(YP_NODE_TYPE_P(node, YP_NODE_INTERPOLATED_X_STRING_NODE));
10678
11011
  yp_interpolated_xstring_node_closing_set(((yp_interpolated_x_string_node_t *) node), &parser->previous);
11012
+ node->location = ((yp_interpolated_x_string_node_t *) node)->opening_loc;
10679
11013
  } else {
10680
11014
  assert(YP_NODE_TYPE_P(node, YP_NODE_INTERPOLATED_STRING_NODE));
10681
11015
  yp_interpolated_string_node_closing_set((yp_interpolated_string_node_t *) node, &parser->previous);
11016
+ node->location = ((yp_interpolated_string_node_t *) node)->opening_loc;
10682
11017
  }
10683
11018
 
10684
11019
  // If this is a heredoc that is indented with a ~, then we need to dedent
@@ -11043,7 +11378,12 @@ parse_expression_prefix(yp_parser_t *parser, yp_binding_power_t binding_power) {
11043
11378
  return (yp_node_t *) yp_singleton_class_node_create(parser, &locals, &class_keyword, &operator, expression, statements, &parser->previous);
11044
11379
  }
11045
11380
 
11046
- yp_node_t *name = parse_expression(parser, YP_BINDING_POWER_INDEX, "Expected to find a class name after `class`.");
11381
+ yp_node_t *constant_path = parse_expression(parser, YP_BINDING_POWER_INDEX, "Expected to find a class name after `class`.");
11382
+ yp_token_t name = parser->previous;
11383
+ if (name.type != YP_TOKEN_CONSTANT) {
11384
+ yp_diagnostic_list_append(&parser->error_list, name.start, name.end, "Expected a constant name after `class`.");
11385
+ }
11386
+
11047
11387
  yp_token_t inheritance_operator;
11048
11388
  yp_node_t *superclass;
11049
11389
 
@@ -11084,7 +11424,7 @@ parse_expression_prefix(yp_parser_t *parser, yp_binding_power_t binding_power) {
11084
11424
  yp_constant_id_list_t locals = parser->current_scope->locals;
11085
11425
  yp_parser_scope_pop(parser);
11086
11426
  yp_do_loop_stack_pop(parser);
11087
- return (yp_node_t *) yp_class_node_create(parser, &locals, &class_keyword, name, &inheritance_operator, superclass, statements, &parser->previous);
11427
+ return (yp_node_t *) yp_class_node_create(parser, &locals, &class_keyword, constant_path, &name, &inheritance_operator, superclass, statements, &parser->previous);
11088
11428
  }
11089
11429
  case YP_TOKEN_KEYWORD_DEF: {
11090
11430
  yp_token_t def_keyword = parser->current;
@@ -11243,6 +11583,12 @@ parse_expression_prefix(yp_parser_t *parser, yp_binding_power_t binding_power) {
11243
11583
  break;
11244
11584
  }
11245
11585
  case YP_CASE_PARAMETER: {
11586
+ // If we're about to lex a label, we need to add the label
11587
+ // state to make sure the next newline is ignored.
11588
+ if (parser->current.type == YP_TOKEN_LABEL) {
11589
+ lex_state_set(parser, parser->lex_state | YP_LEX_STATE_LABEL);
11590
+ }
11591
+
11246
11592
  lparen = not_provided(parser);
11247
11593
  rparen = not_provided(parser);
11248
11594
  params = parse_parameters(parser, YP_BINDING_POWER_DEFINED, false, false, true);
@@ -11472,13 +11818,14 @@ parse_expression_prefix(yp_parser_t *parser, yp_binding_power_t binding_power) {
11472
11818
  parser_lex(parser);
11473
11819
 
11474
11820
  yp_token_t module_keyword = parser->previous;
11475
- yp_node_t *name = parse_expression(parser, YP_BINDING_POWER_INDEX, "Expected to find a module name after `module`.");
11821
+ yp_node_t *constant_path = parse_expression(parser, YP_BINDING_POWER_INDEX, "Expected to find a module name after `module`.");
11822
+ yp_token_t name;
11476
11823
 
11477
- // If we can recover from a syntax error that occurred while parsing the
11478
- // name of the module, then we'll handle that here.
11479
- if (YP_NODE_TYPE_P(name, YP_NODE_MISSING_NODE)) {
11480
- yp_token_t end_keyword = (yp_token_t) { .type = YP_TOKEN_MISSING, .start = parser->previous.end, .end = parser->previous.end };
11481
- return (yp_node_t *) yp_module_node_create(parser, NULL, &module_keyword, name, NULL, &end_keyword);
11824
+ // If we can recover from a syntax error that occurred while parsing
11825
+ // the name of the module, then we'll handle that here.
11826
+ if (YP_NODE_TYPE_P(constant_path, YP_NODE_MISSING_NODE)) {
11827
+ yp_token_t missing = (yp_token_t) { .type = YP_TOKEN_MISSING, .start = parser->previous.end, .end = parser->previous.end };
11828
+ return (yp_node_t *) yp_module_node_create(parser, NULL, &module_keyword, constant_path, &missing, NULL, &missing);
11482
11829
  }
11483
11830
 
11484
11831
  while (accept(parser, YP_TOKEN_COLON_COLON)) {
@@ -11487,7 +11834,15 @@ parse_expression_prefix(yp_parser_t *parser, yp_binding_power_t binding_power) {
11487
11834
  expect(parser, YP_TOKEN_CONSTANT, "Expected to find a module name after `::`.");
11488
11835
  yp_node_t *constant = (yp_node_t *) yp_constant_read_node_create(parser, &parser->previous);
11489
11836
 
11490
- name = (yp_node_t *)yp_constant_path_node_create(parser, name, &double_colon, constant);
11837
+ constant_path = (yp_node_t *) yp_constant_path_node_create(parser, constant_path, &double_colon, constant);
11838
+ }
11839
+
11840
+ // Here we retrieve the name of the module. If it wasn't a constant,
11841
+ // then it's possible that `module foo` was passed, which is a
11842
+ // syntax error. We handle that here as well.
11843
+ name = parser->previous;
11844
+ if (name.type != YP_TOKEN_CONSTANT) {
11845
+ yp_diagnostic_list_append(&parser->error_list, name.start, name.end, "Expected to find a module name after `module`.");
11491
11846
  }
11492
11847
 
11493
11848
  yp_parser_scope_push(parser, true);
@@ -11514,7 +11869,7 @@ parse_expression_prefix(yp_parser_t *parser, yp_binding_power_t binding_power) {
11514
11869
  yp_diagnostic_list_append(&parser->error_list, module_keyword.start, module_keyword.end, "Module definition in method body");
11515
11870
  }
11516
11871
 
11517
- return (yp_node_t *) yp_module_node_create(parser, &locals, &module_keyword, name, statements, &parser->previous);
11872
+ return (yp_node_t *) yp_module_node_create(parser, &locals, &module_keyword, constant_path, &name, statements, &parser->previous);
11518
11873
  }
11519
11874
  case YP_TOKEN_KEYWORD_NIL:
11520
11875
  parser_lex(parser);
@@ -11550,12 +11905,7 @@ parse_expression_prefix(yp_parser_t *parser, yp_binding_power_t binding_power) {
11550
11905
  expect(parser, YP_TOKEN_KEYWORD_END, "Expected `end` to close `until` statement.");
11551
11906
  }
11552
11907
 
11553
- yp_until_node_t *until_node = yp_until_node_create(parser, &keyword, predicate, statements, 0);
11554
- if (parser->previous.type == YP_TOKEN_KEYWORD_END) {
11555
- until_node->base.location.end = parser->previous.end;
11556
- }
11557
-
11558
- return (yp_node_t *) until_node;
11908
+ return (yp_node_t *) yp_until_node_create(parser, &keyword, &parser->previous, predicate, statements, 0);
11559
11909
  }
11560
11910
  case YP_TOKEN_KEYWORD_WHILE: {
11561
11911
  yp_do_loop_stack_push(parser, true);
@@ -11576,11 +11926,7 @@ parse_expression_prefix(yp_parser_t *parser, yp_binding_power_t binding_power) {
11576
11926
  expect(parser, YP_TOKEN_KEYWORD_END, "Expected `end` to close `while` statement.");
11577
11927
  }
11578
11928
 
11579
- yp_while_node_t *while_node = yp_while_node_create(parser, &keyword, predicate, statements, 0);
11580
- if (parser->previous.type == YP_TOKEN_KEYWORD_END) {
11581
- while_node->base.location.end = parser->previous.end;
11582
- }
11583
- return (yp_node_t *) while_node;
11929
+ return (yp_node_t *) yp_while_node_create(parser, &keyword, &parser->previous, predicate, statements, 0);
11584
11930
  }
11585
11931
  case YP_TOKEN_PERCENT_LOWER_I: {
11586
11932
  parser_lex(parser);
@@ -12086,30 +12432,32 @@ parse_expression_prefix(yp_parser_t *parser, yp_binding_power_t binding_power) {
12086
12432
  yp_accepts_block_stack_push(parser, true);
12087
12433
  parser_lex(parser);
12088
12434
 
12089
- yp_token_t opening = parser->previous;
12435
+ yp_token_t operator = parser->previous;
12090
12436
  yp_parser_scope_push(parser, false);
12091
12437
  yp_block_parameters_node_t *params;
12092
12438
 
12093
12439
  switch (parser->current.type) {
12094
12440
  case YP_TOKEN_PARENTHESIS_LEFT: {
12095
- yp_token_t block_parameters_opening = parser->current;
12441
+ yp_token_t opening = parser->current;
12096
12442
  parser_lex(parser);
12097
12443
 
12098
12444
  if (match_type_p(parser, YP_TOKEN_PARENTHESIS_RIGHT)) {
12099
- params = yp_block_parameters_node_create(parser, NULL, &block_parameters_opening);
12445
+ params = yp_block_parameters_node_create(parser, NULL, &opening);
12100
12446
  } else {
12101
- params = parse_block_parameters(parser, false, &block_parameters_opening, true);
12447
+ params = parse_block_parameters(parser, false, &opening, true);
12102
12448
  }
12103
12449
 
12104
12450
  accept(parser, YP_TOKEN_NEWLINE);
12105
12451
  expect(parser, YP_TOKEN_PARENTHESIS_RIGHT, "Expected ')' after left parenthesis.");
12106
- yp_block_parameters_node_closing_set(params, &parser->previous);
12107
12452
 
12453
+ yp_block_parameters_node_closing_set(params, &parser->previous);
12108
12454
  break;
12109
12455
  }
12110
12456
  case YP_CASE_PARAMETER: {
12457
+ yp_accepts_block_stack_push(parser, false);
12111
12458
  yp_token_t opening = not_provided(parser);
12112
12459
  params = parse_block_parameters(parser, false, &opening, true);
12460
+ yp_accepts_block_stack_pop(parser);
12113
12461
  break;
12114
12462
  }
12115
12463
  default: {
@@ -12118,16 +12466,20 @@ parse_expression_prefix(yp_parser_t *parser, yp_binding_power_t binding_power) {
12118
12466
  }
12119
12467
  }
12120
12468
 
12469
+ yp_token_t opening;
12121
12470
  yp_node_t *body = NULL;
12122
12471
  parser->lambda_enclosure_nesting = previous_lambda_enclosure_nesting;
12123
12472
 
12124
12473
  if (accept(parser, YP_TOKEN_LAMBDA_BEGIN)) {
12474
+ opening = parser->previous;
12475
+
12125
12476
  if (!accept(parser, YP_TOKEN_BRACE_RIGHT)) {
12126
12477
  body = (yp_node_t *) parse_statements(parser, YP_CONTEXT_LAMBDA_BRACES);
12127
12478
  expect(parser, YP_TOKEN_BRACE_RIGHT, "Expecting '}' to close lambda block.");
12128
12479
  }
12129
12480
  } else {
12130
12481
  expect(parser, YP_TOKEN_KEYWORD_DO, "Expected a 'do' keyword or a '{' to open lambda block.");
12482
+ opening = parser->previous;
12131
12483
 
12132
12484
  if (!match_any_type_p(parser, 3, YP_TOKEN_KEYWORD_END, YP_TOKEN_KEYWORD_RESCUE, YP_TOKEN_KEYWORD_ENSURE)) {
12133
12485
  body = (yp_node_t *) parse_statements(parser, YP_CONTEXT_LAMBDA_DO_END);
@@ -12144,7 +12496,7 @@ parse_expression_prefix(yp_parser_t *parser, yp_binding_power_t binding_power) {
12144
12496
  yp_constant_id_list_t locals = parser->current_scope->locals;
12145
12497
  yp_parser_scope_pop(parser);
12146
12498
  yp_accepts_block_stack_pop(parser);
12147
- return (yp_node_t *) yp_lambda_node_create(parser, &locals, &opening, params, body, &parser->previous);
12499
+ return (yp_node_t *) yp_lambda_node_create(parser, &locals, &operator, &opening, &parser->previous, params, body);
12148
12500
  }
12149
12501
  case YP_TOKEN_UPLUS: {
12150
12502
  parser_lex(parser);
@@ -12363,7 +12715,7 @@ parse_expression_infix(yp_parser_t *parser, yp_node_t *node, yp_binding_power_t
12363
12715
  case YP_CASE_WRITABLE: {
12364
12716
  parser_lex(parser);
12365
12717
  yp_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, "Expected a value after =.");
12366
- return parse_target(parser, node, &token, value);
12718
+ return parse_write(parser, node, &token, value);
12367
12719
  }
12368
12720
  case YP_NODE_SPLAT_NODE: {
12369
12721
  yp_splat_node_t *splat_node = (yp_splat_node_t *) node;
@@ -12372,7 +12724,7 @@ parse_expression_infix(yp_parser_t *parser, yp_node_t *node, yp_binding_power_t
12372
12724
  case YP_CASE_WRITABLE:
12373
12725
  parser_lex(parser);
12374
12726
  yp_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, "Expected a value after =.");
12375
- return parse_target(parser, (yp_node_t *) splat_node, &token, value);
12727
+ return parse_write(parser, (yp_node_t *) splat_node, &token, value);
12376
12728
  default:
12377
12729
  break;
12378
12730
  }
@@ -12398,48 +12750,16 @@ parse_expression_infix(yp_parser_t *parser, yp_node_t *node, yp_binding_power_t
12398
12750
  parser_lex(parser);
12399
12751
 
12400
12752
  yp_node_t *value = parse_expression(parser, binding_power, "Expected a value after &&=");
12401
- yp_node_t *result = (yp_node_t *) yp_global_variable_operator_and_write_node_create(parser, node, &token, value);
12753
+ yp_node_t *result = (yp_node_t *) yp_global_variable_and_write_node_create(parser, node, &token, value);
12402
12754
 
12403
12755
  yp_node_destroy(parser, node);
12404
12756
  return result;
12405
12757
  }
12406
- case YP_NODE_CALL_NODE: {
12407
- yp_call_node_t *call_node = (yp_call_node_t *) node;
12408
-
12409
- // If we have a vcall (a method with no arguments and no
12410
- // receiver that could have been a local variable) then we
12411
- // will transform it into a local variable write.
12412
- if (yp_call_node_variable_call_p(call_node)) {
12413
- yp_location_t message_loc = call_node->message_loc;
12414
- yp_parser_local_add_location(parser, message_loc.start, message_loc.end);
12415
-
12416
- if (token_is_numbered_parameter(message_loc.start, message_loc.end)) {
12417
- yp_diagnostic_list_append(&parser->error_list, message_loc.start, message_loc.end, "reserved for numbered parameter");
12418
- }
12419
-
12420
- parser_lex(parser);
12421
-
12422
- yp_node_t *value = parse_expression(parser, binding_power, "Expected a value after &&=");
12423
- yp_constant_id_t constant_id = yp_parser_constant_id_location(parser, message_loc.start, message_loc.end);
12424
- yp_node_t *result = (yp_node_t *) yp_local_variable_operator_and_write_node_create(parser, node, &token, value, constant_id);
12425
-
12426
- yp_node_destroy(parser, node);
12427
- return result;
12428
- }
12429
-
12430
- parser_lex(parser);
12431
-
12432
- yp_token_t operator = not_provided(parser);
12433
- node = parse_target(parser, node, &operator, NULL);
12434
-
12435
- yp_node_t *value = parse_expression(parser, binding_power, "Expected a value after &&=");
12436
- return (yp_node_t *) yp_call_operator_and_write_node_create(parser, (yp_call_node_t *) node, &token, value);
12437
- }
12438
12758
  case YP_NODE_CLASS_VARIABLE_READ_NODE: {
12439
12759
  parser_lex(parser);
12440
12760
 
12441
12761
  yp_node_t *value = parse_expression(parser, binding_power, "Expected a value after &&=");
12442
- yp_node_t *result = (yp_node_t *) yp_class_variable_operator_and_write_node_create(parser, node, &token, value);
12762
+ yp_node_t *result = (yp_node_t *) yp_class_variable_and_write_node_create(parser, node, &token, value);
12443
12763
 
12444
12764
  yp_node_destroy(parser, node);
12445
12765
  return result;
@@ -12448,13 +12768,13 @@ parse_expression_infix(yp_parser_t *parser, yp_node_t *node, yp_binding_power_t
12448
12768
  parser_lex(parser);
12449
12769
 
12450
12770
  yp_node_t *value = parse_expression(parser, binding_power, "Expected a value after &&=");
12451
- return (yp_node_t *) yp_constant_path_operator_and_write_node_create(parser, (yp_constant_path_node_t *) node, &token, value);
12771
+ return (yp_node_t *) yp_constant_path_and_write_node_create(parser, (yp_constant_path_node_t *) node, &token, value);
12452
12772
  }
12453
12773
  case YP_NODE_CONSTANT_READ_NODE: {
12454
12774
  parser_lex(parser);
12455
12775
 
12456
12776
  yp_node_t *value = parse_expression(parser, binding_power, "Expected a value after &&=");
12457
- yp_node_t *result = (yp_node_t *) yp_constant_operator_and_write_node_create(parser, node, &token, value);
12777
+ yp_node_t *result = (yp_node_t *) yp_constant_and_write_node_create(parser, node, &token, value);
12458
12778
 
12459
12779
  yp_node_destroy(parser, node);
12460
12780
  return result;
@@ -12463,21 +12783,49 @@ parse_expression_infix(yp_parser_t *parser, yp_node_t *node, yp_binding_power_t
12463
12783
  parser_lex(parser);
12464
12784
 
12465
12785
  yp_node_t *value = parse_expression(parser, binding_power, "Expected a value after &&=");
12466
- yp_node_t *result = (yp_node_t *) yp_instance_variable_operator_and_write_node_create(parser, node, &token, value);
12786
+ yp_node_t *result = (yp_node_t *) yp_instance_variable_and_write_node_create(parser, node, &token, value);
12467
12787
 
12468
12788
  yp_node_destroy(parser, node);
12469
12789
  return result;
12470
12790
  }
12471
12791
  case YP_NODE_LOCAL_VARIABLE_READ_NODE: {
12792
+ yp_local_variable_read_node_t *cast = (yp_local_variable_read_node_t *) node;
12472
12793
  parser_lex(parser);
12473
12794
 
12474
12795
  yp_node_t *value = parse_expression(parser, binding_power, "Expected a value after &&=");
12475
- yp_constant_id_t constant_id = ((yp_local_variable_read_node_t *) node)->constant_id;
12476
- yp_node_t *result = (yp_node_t *) yp_local_variable_operator_and_write_node_create(parser, node, &token, value, constant_id);
12796
+ yp_node_t *result = (yp_node_t *) yp_local_variable_and_write_node_create(parser, node, &token, value, cast->constant_id, cast->depth);
12477
12797
 
12478
12798
  yp_node_destroy(parser, node);
12479
12799
  return result;
12480
12800
  }
12801
+ case YP_NODE_CALL_NODE: {
12802
+ yp_call_node_t *call_node = (yp_call_node_t *) node;
12803
+
12804
+ // If we have a vcall (a method with no arguments and no
12805
+ // receiver that could have been a local variable) then we
12806
+ // will transform it into a local variable write.
12807
+ if (yp_call_node_variable_call_p(call_node)) {
12808
+ yp_location_t message_loc = call_node->message_loc;
12809
+ yp_constant_id_t constant_id = yp_parser_local_add_location(parser, message_loc.start, message_loc.end);
12810
+
12811
+ if (token_is_numbered_parameter(message_loc.start, message_loc.end)) {
12812
+ yp_diagnostic_list_append(&parser->error_list, message_loc.start, message_loc.end, "reserved for numbered parameter");
12813
+ }
12814
+
12815
+ parser_lex(parser);
12816
+ yp_node_t *value = parse_expression(parser, binding_power, "Expected a value after &&=");
12817
+ yp_node_t *result = (yp_node_t *) yp_local_variable_and_write_node_create(parser, node, &token, value, constant_id, 0);
12818
+
12819
+ yp_node_destroy(parser, node);
12820
+ return result;
12821
+ }
12822
+
12823
+ parser_lex(parser);
12824
+ node = parse_target(parser, node);
12825
+
12826
+ yp_node_t *value = parse_expression(parser, binding_power, "Expected a value after &&=");
12827
+ return (yp_node_t *) yp_call_operator_and_write_node_create(parser, (yp_call_node_t *) node, &token, value);
12828
+ }
12481
12829
  case YP_NODE_MULTI_WRITE_NODE: {
12482
12830
  parser_lex(parser);
12483
12831
  yp_diagnostic_list_append(&parser->error_list, token.start, token.end, "Cannot use `&&=' on a multi-write.");
@@ -12503,48 +12851,16 @@ parse_expression_infix(yp_parser_t *parser, yp_node_t *node, yp_binding_power_t
12503
12851
  parser_lex(parser);
12504
12852
 
12505
12853
  yp_node_t *value = parse_expression(parser, binding_power, "Expected a value after ||=");
12506
- yp_node_t *result = (yp_node_t *) yp_global_variable_operator_or_write_node_create(parser, node, &token, value);
12854
+ yp_node_t *result = (yp_node_t *) yp_global_variable_or_write_node_create(parser, node, &token, value);
12507
12855
 
12508
12856
  yp_node_destroy(parser, node);
12509
12857
  return result;
12510
12858
  }
12511
- case YP_NODE_CALL_NODE: {
12512
- yp_call_node_t *call_node = (yp_call_node_t *) node;
12513
-
12514
- // If we have a vcall (a method with no arguments and no
12515
- // receiver that could have been a local variable) then we
12516
- // will transform it into a local variable write.
12517
- if (yp_call_node_variable_call_p(call_node)) {
12518
- yp_location_t message_loc = call_node->message_loc;
12519
- yp_parser_local_add_location(parser, message_loc.start, message_loc.end);
12520
-
12521
- if (token_is_numbered_parameter(message_loc.start, message_loc.end)) {
12522
- yp_diagnostic_list_append(&parser->error_list, message_loc.start, message_loc.end, "reserved for numbered parameter");
12523
- }
12524
-
12525
- parser_lex(parser);
12526
-
12527
- yp_node_t *value = parse_expression(parser, binding_power, "Expected a value after ||=");
12528
- yp_constant_id_t constant_id = yp_parser_constant_id_location(parser, message_loc.start, message_loc.end);
12529
- yp_node_t *result = (yp_node_t *) yp_local_variable_operator_or_write_node_create(parser, node, &token, value, constant_id);
12530
-
12531
- yp_node_destroy(parser, node);
12532
- return result;
12533
- }
12534
-
12535
- parser_lex(parser);
12536
-
12537
- yp_token_t operator = not_provided(parser);
12538
- node = parse_target(parser, node, &operator, NULL);
12539
-
12540
- yp_node_t *value = parse_expression(parser, binding_power, "Expected a value after ||=");
12541
- return (yp_node_t *) yp_call_operator_or_write_node_create(parser, (yp_call_node_t *) node, &token, value);
12542
- }
12543
12859
  case YP_NODE_CLASS_VARIABLE_READ_NODE: {
12544
12860
  parser_lex(parser);
12545
12861
 
12546
12862
  yp_node_t *value = parse_expression(parser, binding_power, "Expected a value after ||=");
12547
- yp_node_t *result = (yp_node_t *) yp_class_variable_operator_or_write_node_create(parser, node, &token, value);
12863
+ yp_node_t *result = (yp_node_t *) yp_class_variable_or_write_node_create(parser, node, &token, value);
12548
12864
 
12549
12865
  yp_node_destroy(parser, node);
12550
12866
  return result;
@@ -12553,13 +12869,13 @@ parse_expression_infix(yp_parser_t *parser, yp_node_t *node, yp_binding_power_t
12553
12869
  parser_lex(parser);
12554
12870
 
12555
12871
  yp_node_t *value = parse_expression(parser, binding_power, "Expected a value after ||=");
12556
- return (yp_node_t *) yp_constant_path_operator_or_write_node_create(parser, (yp_constant_path_node_t *) node, &token, value);
12872
+ return (yp_node_t *) yp_constant_path_or_write_node_create(parser, (yp_constant_path_node_t *) node, &token, value);
12557
12873
  }
12558
12874
  case YP_NODE_CONSTANT_READ_NODE: {
12559
12875
  parser_lex(parser);
12560
12876
 
12561
12877
  yp_node_t *value = parse_expression(parser, binding_power, "Expected a value after ||=");
12562
- yp_node_t *result = (yp_node_t *) yp_constant_operator_or_write_node_create(parser, node, &token, value);
12878
+ yp_node_t *result = (yp_node_t *) yp_constant_or_write_node_create(parser, node, &token, value);
12563
12879
 
12564
12880
  yp_node_destroy(parser, node);
12565
12881
  return result;
@@ -12568,21 +12884,49 @@ parse_expression_infix(yp_parser_t *parser, yp_node_t *node, yp_binding_power_t
12568
12884
  parser_lex(parser);
12569
12885
 
12570
12886
  yp_node_t *value = parse_expression(parser, binding_power, "Expected a value after ||=");
12571
- yp_node_t *result = (yp_node_t *) yp_instance_variable_operator_or_write_node_create(parser, node, &token, value);
12887
+ yp_node_t *result = (yp_node_t *) yp_instance_variable_or_write_node_create(parser, node, &token, value);
12572
12888
 
12573
12889
  yp_node_destroy(parser, node);
12574
12890
  return result;
12575
12891
  }
12576
12892
  case YP_NODE_LOCAL_VARIABLE_READ_NODE: {
12893
+ yp_local_variable_read_node_t *cast = (yp_local_variable_read_node_t *) node;
12577
12894
  parser_lex(parser);
12578
12895
 
12579
12896
  yp_node_t *value = parse_expression(parser, binding_power, "Expected a value after ||=");
12580
- yp_constant_id_t constant_id = ((yp_local_variable_read_node_t *) node)->constant_id;
12581
- yp_node_t *result = (yp_node_t *) yp_local_variable_operator_or_write_node_create(parser, node, &token, value, constant_id);
12897
+ yp_node_t *result = (yp_node_t *) yp_local_variable_or_write_node_create(parser, node, &token, value, cast->constant_id, cast->depth);
12582
12898
 
12583
12899
  yp_node_destroy(parser, node);
12584
12900
  return result;
12585
12901
  }
12902
+ case YP_NODE_CALL_NODE: {
12903
+ yp_call_node_t *call_node = (yp_call_node_t *) node;
12904
+
12905
+ // If we have a vcall (a method with no arguments and no
12906
+ // receiver that could have been a local variable) then we
12907
+ // will transform it into a local variable write.
12908
+ if (yp_call_node_variable_call_p(call_node)) {
12909
+ yp_location_t message_loc = call_node->message_loc;
12910
+ yp_constant_id_t constant_id = yp_parser_local_add_location(parser, message_loc.start, message_loc.end);
12911
+
12912
+ if (token_is_numbered_parameter(message_loc.start, message_loc.end)) {
12913
+ yp_diagnostic_list_append(&parser->error_list, message_loc.start, message_loc.end, "reserved for numbered parameter");
12914
+ }
12915
+
12916
+ parser_lex(parser);
12917
+ yp_node_t *value = parse_expression(parser, binding_power, "Expected a value after ||=");
12918
+ yp_node_t *result = (yp_node_t *) yp_local_variable_or_write_node_create(parser, node, &token, value, constant_id, 0);
12919
+
12920
+ yp_node_destroy(parser, node);
12921
+ return result;
12922
+ }
12923
+
12924
+ parser_lex(parser);
12925
+ node = parse_target(parser, node);
12926
+
12927
+ yp_node_t *value = parse_expression(parser, binding_power, "Expected a value after ||=");
12928
+ return (yp_node_t *) yp_call_operator_or_write_node_create(parser, (yp_call_node_t *) node, &token, value);
12929
+ }
12586
12930
  case YP_NODE_MULTI_WRITE_NODE: {
12587
12931
  parser_lex(parser);
12588
12932
  yp_diagnostic_list_append(&parser->error_list, token.start, token.end, "Cannot use `||=' on a multi-write.");
@@ -12617,43 +12961,12 @@ parse_expression_infix(yp_parser_t *parser, yp_node_t *node, yp_binding_power_t
12617
12961
  case YP_NODE_GLOBAL_VARIABLE_READ_NODE: {
12618
12962
  parser_lex(parser);
12619
12963
 
12620
- yp_node_t *value = parse_expression(parser, binding_power, "Expected a value after the operator");
12964
+ yp_node_t *value = parse_expression(parser, binding_power, "Expected a value after the operator.");
12621
12965
  yp_node_t *result = (yp_node_t *) yp_global_variable_operator_write_node_create(parser, node, &token, value);
12622
12966
 
12623
12967
  yp_node_destroy(parser, node);
12624
12968
  return result;
12625
12969
  }
12626
- case YP_NODE_CALL_NODE: {
12627
- yp_call_node_t *call_node = (yp_call_node_t *) node;
12628
-
12629
- // If we have a vcall (a method with no arguments and no
12630
- // receiver that could have been a local variable) then we
12631
- // will transform it into a local variable write.
12632
- if (yp_call_node_variable_call_p(call_node)) {
12633
- yp_location_t message_loc = call_node->message_loc;
12634
- yp_parser_local_add_location(parser, message_loc.start, message_loc.end);
12635
-
12636
- if (token_is_numbered_parameter(message_loc.start, message_loc.end)) {
12637
- yp_diagnostic_list_append(&parser->error_list, message_loc.start, message_loc.end, "reserved for numbered parameter");
12638
- }
12639
-
12640
- parser_lex(parser);
12641
-
12642
- yp_node_t *value = parse_expression(parser, binding_power, "Expected a value after &&=");
12643
- yp_constant_id_t constant_id = yp_parser_constant_id_location(parser, message_loc.start, message_loc.end);
12644
- yp_node_t *result = (yp_node_t *) yp_local_variable_operator_write_node_create(parser, node, &token, value, constant_id);
12645
-
12646
- yp_node_destroy(parser, node);
12647
- return result;
12648
- }
12649
-
12650
- yp_token_t operator = not_provided(parser);
12651
- node = parse_target(parser, node, &operator, NULL);
12652
-
12653
- parser_lex(parser);
12654
- yp_node_t *value = parse_expression(parser, binding_power, "Expected a value after the operator.");
12655
- return (yp_node_t *) yp_call_operator_write_node_create(parser, (yp_call_node_t *) node, &token, value);
12656
- }
12657
12970
  case YP_NODE_CLASS_VARIABLE_READ_NODE: {
12658
12971
  parser_lex(parser);
12659
12972
 
@@ -12688,15 +13001,43 @@ parse_expression_infix(yp_parser_t *parser, yp_node_t *node, yp_binding_power_t
12688
13001
  return result;
12689
13002
  }
12690
13003
  case YP_NODE_LOCAL_VARIABLE_READ_NODE: {
13004
+ yp_local_variable_read_node_t *cast = (yp_local_variable_read_node_t *) node;
12691
13005
  parser_lex(parser);
12692
13006
 
12693
13007
  yp_node_t *value = parse_expression(parser, binding_power, "Expected a value after the operator.");
12694
- yp_constant_id_t constant_id = ((yp_local_variable_read_node_t *) node)->constant_id;
12695
- yp_node_t *result = (yp_node_t *) yp_local_variable_operator_write_node_create(parser, node, &token, value, constant_id);
13008
+ yp_node_t *result = (yp_node_t *) yp_local_variable_operator_write_node_create(parser, node, &token, value, cast->constant_id, cast->depth);
12696
13009
 
12697
13010
  yp_node_destroy(parser, node);
12698
13011
  return result;
12699
13012
  }
13013
+ case YP_NODE_CALL_NODE: {
13014
+ yp_call_node_t *call_node = (yp_call_node_t *) node;
13015
+
13016
+ // If we have a vcall (a method with no arguments and no
13017
+ // receiver that could have been a local variable) then we
13018
+ // will transform it into a local variable write.
13019
+ if (yp_call_node_variable_call_p(call_node)) {
13020
+ yp_location_t message_loc = call_node->message_loc;
13021
+ yp_constant_id_t constant_id = yp_parser_local_add_location(parser, message_loc.start, message_loc.end);
13022
+
13023
+ if (token_is_numbered_parameter(message_loc.start, message_loc.end)) {
13024
+ yp_diagnostic_list_append(&parser->error_list, message_loc.start, message_loc.end, "reserved for numbered parameter");
13025
+ }
13026
+
13027
+ parser_lex(parser);
13028
+ yp_node_t *value = parse_expression(parser, binding_power, "Expected a value after the operator.");
13029
+ yp_node_t *result = (yp_node_t *) yp_local_variable_operator_write_node_create(parser, node, &token, value, constant_id, 0);
13030
+
13031
+ yp_node_destroy(parser, node);
13032
+ return result;
13033
+ }
13034
+
13035
+ node = parse_target(parser, node);
13036
+ parser_lex(parser);
13037
+
13038
+ yp_node_t *value = parse_expression(parser, binding_power, "Expected a value after the operator.");
13039
+ return (yp_node_t *) yp_call_operator_write_node_create(parser, (yp_call_node_t *) node, &token, value);
13040
+ }
12700
13041
  case YP_NODE_MULTI_WRITE_NODE: {
12701
13042
  parser_lex(parser);
12702
13043
  yp_diagnostic_list_append(&parser->error_list, token.start, token.end, "Unexpected operator.");
@@ -12862,7 +13203,7 @@ parse_expression_infix(yp_parser_t *parser, yp_node_t *node, yp_binding_power_t
12862
13203
  yp_statements_node_body_append(statements, node);
12863
13204
 
12864
13205
  yp_node_t *predicate = parse_expression(parser, binding_power, "Expected a predicate after 'until'");
12865
- return (yp_node_t *) yp_until_node_create(parser, &token, predicate, statements, YP_NODE_TYPE_P(node, YP_NODE_BEGIN_NODE) ? YP_LOOP_FLAGS_BEGIN_MODIFIER : 0);
13206
+ return (yp_node_t *) yp_until_node_modifier_create(parser, &token, predicate, statements, YP_NODE_TYPE_P(node, YP_NODE_BEGIN_NODE) ? YP_LOOP_FLAGS_BEGIN_MODIFIER : 0);
12866
13207
  }
12867
13208
  case YP_TOKEN_KEYWORD_WHILE_MODIFIER: {
12868
13209
  parser_lex(parser);
@@ -12870,7 +13211,7 @@ parse_expression_infix(yp_parser_t *parser, yp_node_t *node, yp_binding_power_t
12870
13211
  yp_statements_node_body_append(statements, node);
12871
13212
 
12872
13213
  yp_node_t *predicate = parse_expression(parser, binding_power, "Expected a predicate after 'while'");
12873
- return (yp_node_t *) yp_while_node_create(parser, &token, predicate, statements, YP_NODE_TYPE_P(node, YP_NODE_BEGIN_NODE) ? YP_LOOP_FLAGS_BEGIN_MODIFIER : 0);
13214
+ return (yp_node_t *) yp_while_node_modifier_create(parser, &token, predicate, statements, YP_NODE_TYPE_P(node, YP_NODE_BEGIN_NODE) ? YP_LOOP_FLAGS_BEGIN_MODIFIER : 0);
12874
13215
  }
12875
13216
  case YP_TOKEN_QUESTION_MARK: {
12876
13217
  parser_lex(parser);
@@ -12908,7 +13249,7 @@ parse_expression_infix(yp_parser_t *parser, yp_node_t *node, yp_binding_power_t
12908
13249
 
12909
13250
  if (
12910
13251
  (parser->current.type == YP_TOKEN_PARENTHESIS_LEFT) ||
12911
- (token_begins_expression_p(parser->current.type) || match_any_type_p(parser, 2, YP_TOKEN_USTAR, YP_TOKEN_USTAR_STAR))
13252
+ (token_begins_expression_p(parser->current.type) || match_any_type_p(parser, 3, YP_TOKEN_UAMPERSAND, YP_TOKEN_USTAR, YP_TOKEN_USTAR_STAR))
12912
13253
  ) {
12913
13254
  // If we have a constant immediately following a '::' operator, then
12914
13255
  // this can either be a constant path or a method call, depending on
@@ -13140,7 +13481,7 @@ yp_metadata_read_u32(const char *ptr) {
13140
13481
  // ]*
13141
13482
  // ]
13142
13483
  // ```
13143
- static void
13484
+ void
13144
13485
  yp_parser_metadata(yp_parser_t *parser, const char *metadata) {
13145
13486
  uint32_t filepath_size = yp_metadata_read_u32(metadata);
13146
13487
  metadata += 4;
@@ -13179,6 +13520,8 @@ yp_parser_metadata(yp_parser_t *parser, const char *metadata) {
13179
13520
  // Initialize a parser with the given start and end pointers.
13180
13521
  YP_EXPORTED_FUNCTION void
13181
13522
  yp_parser_init(yp_parser_t *parser, const char *source, size_t size, const char *filepath) {
13523
+ assert(source != NULL);
13524
+
13182
13525
  // Set filepath to the file that was passed
13183
13526
  if (!filepath) filepath = "";
13184
13527
  yp_string_t filepath_string;
@@ -13190,6 +13533,8 @@ yp_parser_init(yp_parser_t *parser, const char *source, size_t size, const char
13190
13533
  .enclosure_nesting = 0,
13191
13534
  .lambda_enclosure_nesting = -1,
13192
13535
  .brace_nesting = 0,
13536
+ .do_loop_stack = YP_STATE_STACK_EMPTY,
13537
+ .accepts_block_stack = YP_STATE_STACK_EMPTY,
13193
13538
  .lex_modes = {
13194
13539
  .index = 0,
13195
13540
  .stack = {{ .mode = YP_LEX_DEFAULT }},
@@ -13201,6 +13546,9 @@ yp_parser_init(yp_parser_t *parser, const char *source, size_t size, const char
13201
13546
  .current = { .type = YP_TOKEN_EOF, .start = source, .end = source },
13202
13547
  .next_start = NULL,
13203
13548
  .heredoc_end = NULL,
13549
+ .comment_list = YP_LIST_EMPTY,
13550
+ .warning_list = YP_LIST_EMPTY,
13551
+ .error_list = YP_LIST_EMPTY,
13204
13552
  .current_scope = NULL,
13205
13553
  .current_context = NULL,
13206
13554
  .recovering = false,
@@ -13213,16 +13561,12 @@ yp_parser_init(yp_parser_t *parser, const char *source, size_t size, const char
13213
13561
  .pattern_matching_newlines = false,
13214
13562
  .in_keyword_arg = false,
13215
13563
  .filepath_string = filepath_string,
13564
+ .constant_pool = YP_CONSTANT_POOL_EMPTY,
13565
+ .newline_list = YP_NEWLINE_LIST_EMPTY
13216
13566
  };
13217
13567
 
13218
- yp_state_stack_init(&parser->do_loop_stack);
13219
- yp_state_stack_init(&parser->accepts_block_stack);
13220
13568
  yp_accepts_block_stack_push(parser, true);
13221
13569
 
13222
- yp_list_init(&parser->warning_list);
13223
- yp_list_init(&parser->error_list);
13224
- yp_list_init(&parser->comment_list);
13225
-
13226
13570
  // Initialize the constant pool. We're going to completely guess as to the
13227
13571
  // number of constants that we'll need based on the size of the input. The
13228
13572
  // ratio we chose here is actually less arbitrary than you might think.
@@ -13246,14 +13590,15 @@ yp_parser_init(yp_parser_t *parser, const char *source, size_t size, const char
13246
13590
  size_t newline_size = size / 22;
13247
13591
  yp_newline_list_init(&parser->newline_list, source, newline_size < 4 ? 4 : newline_size);
13248
13592
 
13249
- assert(source != NULL);
13593
+ // Skip past the UTF-8 BOM if it exists.
13250
13594
  if (size >= 3 && (unsigned char) source[0] == 0xef && (unsigned char) source[1] == 0xbb && (unsigned char) source[2] == 0xbf) {
13251
- // If the first three bytes of the source are the UTF-8 BOM, then we'll skip
13252
- // over them.
13253
13595
  parser->current.end += 3;
13254
- } else if (size >= 2 && source[0] == '#' && source[1] == '!') {
13255
- // If the first two bytes of the source are a shebang, then we'll indicate
13256
- // that the encoding comment is at the end of the shebang.
13596
+ parser->encoding_comment_start += 3;
13597
+ }
13598
+
13599
+ // If the first two bytes of the source are a shebang, then we'll indicate
13600
+ // that the encoding comment is at the end of the shebang.
13601
+ if (peek(parser) == '#' && peek_offset(parser, 1) == '!') {
13257
13602
  const char *encoding_comment_start = next_newline(source, (ptrdiff_t) size);
13258
13603
  if (encoding_comment_start) {
13259
13604
  parser->encoding_comment_start = encoding_comment_start + 1;