prism 0.17.1 → 0.18.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (50) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +30 -1
  3. data/Makefile +5 -5
  4. data/README.md +2 -2
  5. data/config.yml +26 -13
  6. data/docs/build_system.md +6 -6
  7. data/docs/building.md +1 -1
  8. data/docs/configuration.md +1 -0
  9. data/docs/encoding.md +68 -32
  10. data/docs/heredocs.md +1 -1
  11. data/docs/javascript.md +29 -1
  12. data/docs/ruby_api.md +14 -0
  13. data/ext/prism/api_node.c +74 -45
  14. data/ext/prism/extconf.rb +91 -127
  15. data/ext/prism/extension.c +1 -1
  16. data/ext/prism/extension.h +1 -1
  17. data/include/prism/ast.h +148 -133
  18. data/include/prism/diagnostic.h +27 -1
  19. data/include/prism/enc/pm_encoding.h +42 -1
  20. data/include/prism/parser.h +6 -0
  21. data/include/prism/version.h +3 -3
  22. data/lib/prism/compiler.rb +3 -3
  23. data/lib/prism/debug.rb +4 -0
  24. data/lib/prism/desugar_compiler.rb +1 -0
  25. data/lib/prism/dispatcher.rb +14 -14
  26. data/lib/prism/dot_visitor.rb +4334 -0
  27. data/lib/prism/dsl.rb +11 -11
  28. data/lib/prism/ffi.rb +3 -3
  29. data/lib/prism/mutation_compiler.rb +6 -6
  30. data/lib/prism/node.rb +182 -113
  31. data/lib/prism/node_ext.rb +61 -3
  32. data/lib/prism/parse_result.rb +46 -12
  33. data/lib/prism/serialize.rb +125 -131
  34. data/lib/prism/visitor.rb +3 -3
  35. data/lib/prism.rb +1 -0
  36. data/prism.gemspec +5 -1
  37. data/rbi/prism.rbi +83 -54
  38. data/sig/prism.rbs +47 -32
  39. data/src/diagnostic.c +61 -3
  40. data/src/enc/pm_big5.c +63 -0
  41. data/src/enc/pm_cp51932.c +57 -0
  42. data/src/enc/pm_euc_jp.c +10 -0
  43. data/src/enc/pm_gbk.c +5 -2
  44. data/src/enc/pm_tables.c +1478 -148
  45. data/src/node.c +33 -21
  46. data/src/prettyprint.c +1027 -925
  47. data/src/prism.c +925 -374
  48. data/src/regexp.c +12 -12
  49. data/src/serialize.c +36 -9
  50. metadata +6 -2
data/src/prism.c CHANGED
@@ -16,7 +16,7 @@ pm_version(void) {
16
16
 
17
17
  #ifndef PM_DEBUG_LOGGING
18
18
  /**
19
- * Debugging logging will provide you will additional debugging functions as
19
+ * Debugging logging will provide you with additional debugging functions as
20
20
  * well as automatically replace some functions with their debugging
21
21
  * counterparts.
22
22
  */
@@ -275,6 +275,16 @@ lex_mode_push_list(pm_parser_t *parser, bool interpolation, uint8_t delimiter) {
275
275
  return lex_mode_push(parser, lex_mode);
276
276
  }
277
277
 
278
+ /**
279
+ * Push on a new list lex mode that is only used for compatibility. This is
280
+ * called when we're at the end of the file. We want the parser to be able to
281
+ * perform its normal error tolerance.
282
+ */
283
+ static inline bool
284
+ lex_mode_push_list_eof(pm_parser_t *parser) {
285
+ return lex_mode_push_list(parser, false, '\0');
286
+ }
287
+
278
288
  /**
279
289
  * Push on a new regexp lex mode.
280
290
  */
@@ -346,6 +356,16 @@ lex_mode_push_string(pm_parser_t *parser, bool interpolation, bool label_allowed
346
356
  return lex_mode_push(parser, lex_mode);
347
357
  }
348
358
 
359
+ /**
360
+ * Push on a new string lex mode that is only used for compatibility. This is
361
+ * called when we're at the end of the file. We want the parser to be able to
362
+ * perform its normal error tolerance.
363
+ */
364
+ static inline bool
365
+ lex_mode_push_string_eof(pm_parser_t *parser) {
366
+ return lex_mode_push_string(parser, false, false, '\0', '\0');
367
+ }
368
+
349
369
  /**
350
370
  * Pop the current lex state off the stack. If we're within the pre-allocated
351
371
  * space of the lex state stack, then we'll just decrement the index. Otherwise
@@ -395,7 +415,7 @@ lex_state_ignored_p(pm_parser_t *parser) {
395
415
 
396
416
  static inline bool
397
417
  lex_state_beg_p(pm_parser_t *parser) {
398
- return lex_state_p(parser, PM_LEX_STATE_BEG_ANY) || (parser->lex_state == (PM_LEX_STATE_ARG | PM_LEX_STATE_LABELED));
418
+ return lex_state_p(parser, PM_LEX_STATE_BEG_ANY) || ((parser->lex_state & (PM_LEX_STATE_ARG | PM_LEX_STATE_LABELED)) == (PM_LEX_STATE_ARG | PM_LEX_STATE_LABELED));
399
419
  }
400
420
 
401
421
  static inline bool
@@ -459,6 +479,11 @@ pm_parser_err(pm_parser_t *parser, const uint8_t *start, const uint8_t *end, pm_
459
479
  pm_diagnostic_list_append(&parser->error_list, start, end, diag_id);
460
480
  }
461
481
 
482
+ /**
483
+ * Append an error to the list of errors on the parser using a format string.
484
+ */
485
+ #define PM_PARSER_ERR_FORMAT(parser, start, end, diag_id, ...) pm_diagnostic_list_append_format(&parser->error_list, start, end, diag_id, __VA_ARGS__)
486
+
462
487
  /**
463
488
  * Append an error to the list of errors on the parser using the location of the
464
489
  * current token.
@@ -469,12 +494,10 @@ pm_parser_err_current(pm_parser_t *parser, pm_diagnostic_id_t diag_id) {
469
494
  }
470
495
 
471
496
  /**
472
- * Append an error to the list of errors on the parser using the given location.
497
+ * Append an error to the list of errors on the parser using the given location
498
+ * using a format string.
473
499
  */
474
- static inline void
475
- pm_parser_err_location(pm_parser_t *parser, const pm_location_t *location, pm_diagnostic_id_t diag_id) {
476
- pm_parser_err(parser, location->start, location->end, diag_id);
477
- }
500
+ #define PM_PARSER_ERR_LOCATION_FORMAT(parser, location, diag_id, ...) pm_diagnostic_list_append_format(&parser->error_list, (location)->start, (location)->end, diag_id, __VA_ARGS__)
478
501
 
479
502
  /**
480
503
  * Append an error to the list of errors on the parser using the location of the
@@ -485,6 +508,12 @@ pm_parser_err_node(pm_parser_t *parser, const pm_node_t *node, pm_diagnostic_id_
485
508
  pm_parser_err(parser, node->location.start, node->location.end, diag_id);
486
509
  }
487
510
 
511
+ /**
512
+ * Append an error to the list of errors on the parser using the location of the
513
+ * given node and a format string.
514
+ */
515
+ #define PM_PARSER_ERR_NODE_FORMAT(parser, node, diag_id, ...) pm_diagnostic_list_append_format(&parser->error_list, node->location.start, node->location.end, diag_id, __VA_ARGS__)
516
+
488
517
  /**
489
518
  * Append an error to the list of errors on the parser using the location of the
490
519
  * previous token.
@@ -503,6 +532,12 @@ pm_parser_err_token(pm_parser_t *parser, const pm_token_t *token, pm_diagnostic_
503
532
  pm_parser_err(parser, token->start, token->end, diag_id);
504
533
  }
505
534
 
535
+ /**
536
+ * Append an error to the list of errors on the parser using the location of the
537
+ * given token and a format string.
538
+ */
539
+ #define PM_PARSER_ERR_TOKEN_FORMAT(parser, token, diag_id, ...) pm_diagnostic_list_append_format(&parser->error_list, token->start, token->end, diag_id, __VA_ARGS__)
540
+
506
541
  /**
507
542
  * Append a warning to the list of warnings on the parser.
508
543
  */
@@ -567,6 +602,102 @@ pm_parser_optional_constant_id_token(pm_parser_t *parser, const pm_token_t *toke
567
602
  return token->type == PM_TOKEN_NOT_PROVIDED ? 0 : pm_parser_constant_id_token(parser, token);
568
603
  }
569
604
 
605
+ /**
606
+ * Check whether or not the given node is value expression.
607
+ * If the node is value node, it returns NULL.
608
+ * If not, it returns the pointer to the node to be inspected as "void expression".
609
+ */
610
+ static pm_node_t*
611
+ pm_check_value_expression(pm_node_t *node) {
612
+ pm_node_t* void_node = NULL;
613
+
614
+ while (node != NULL) {
615
+ switch (PM_NODE_TYPE(node)) {
616
+ case PM_RETURN_NODE:
617
+ case PM_BREAK_NODE:
618
+ case PM_NEXT_NODE:
619
+ case PM_REDO_NODE:
620
+ case PM_RETRY_NODE:
621
+ case PM_MATCH_REQUIRED_NODE:
622
+ return void_node != NULL ? void_node : node;
623
+ case PM_MATCH_PREDICATE_NODE:
624
+ return NULL;
625
+ case PM_BEGIN_NODE: {
626
+ pm_begin_node_t *cast = (pm_begin_node_t *) node;
627
+ node = (pm_node_t *) cast->statements;
628
+ break;
629
+ }
630
+ case PM_PARENTHESES_NODE: {
631
+ pm_parentheses_node_t *cast = (pm_parentheses_node_t *) node;
632
+ node = (pm_node_t *) cast->body;
633
+ break;
634
+ }
635
+ case PM_STATEMENTS_NODE: {
636
+ pm_statements_node_t *cast = (pm_statements_node_t *) node;
637
+ node = cast->body.nodes[cast->body.size - 1];
638
+ break;
639
+ }
640
+ case PM_IF_NODE: {
641
+ pm_if_node_t *cast = (pm_if_node_t *) node;
642
+ if (cast->statements == NULL || cast->consequent == NULL) {
643
+ return NULL;
644
+ }
645
+ pm_node_t *vn = pm_check_value_expression((pm_node_t *) cast->statements);
646
+ if (vn == NULL) {
647
+ return NULL;
648
+ }
649
+ if (void_node == NULL) {
650
+ void_node = vn;
651
+ }
652
+ node = cast->consequent;
653
+ break;
654
+ }
655
+ case PM_UNLESS_NODE: {
656
+ pm_unless_node_t *cast = (pm_unless_node_t *) node;
657
+ if (cast->statements == NULL || cast->consequent == NULL) {
658
+ return NULL;
659
+ }
660
+ pm_node_t *vn = pm_check_value_expression((pm_node_t *) cast->statements);
661
+ if (vn == NULL) {
662
+ return NULL;
663
+ }
664
+ if (void_node == NULL) {
665
+ void_node = vn;
666
+ }
667
+ node = (pm_node_t *) cast->consequent;
668
+ break;
669
+ }
670
+ case PM_ELSE_NODE: {
671
+ pm_else_node_t *cast = (pm_else_node_t *) node;
672
+ node = (pm_node_t *) cast->statements;
673
+ break;
674
+ }
675
+ case PM_AND_NODE: {
676
+ pm_and_node_t *cast = (pm_and_node_t *) node;
677
+ node = cast->left;
678
+ break;
679
+ }
680
+ case PM_OR_NODE: {
681
+ pm_or_node_t *cast = (pm_or_node_t *) node;
682
+ node = cast->left;
683
+ break;
684
+ }
685
+ default:
686
+ return NULL;
687
+ }
688
+ }
689
+
690
+ return NULL;
691
+ }
692
+
693
+ static inline void
694
+ pm_assert_value_expression(pm_parser_t *parser, pm_node_t *node) {
695
+ pm_node_t *void_node = pm_check_value_expression(node);
696
+ if (void_node != NULL) {
697
+ pm_parser_err_node(parser, void_node, PM_ERR_VOID_EXPRESSION);
698
+ }
699
+ }
700
+
570
701
  /**
571
702
  * The predicate of conditional nodes can change what would otherwise be regular
572
703
  * nodes into specialized nodes. For example:
@@ -680,6 +811,27 @@ typedef struct {
680
811
  pm_node_t *block;
681
812
  } pm_arguments_t;
682
813
 
814
+ /**
815
+ * Retrieve the end location of a `pm_arguments_t` object.
816
+ */
817
+ static inline const uint8_t *
818
+ pm_arguments_end(pm_arguments_t *arguments) {
819
+ if (arguments->block != NULL) {
820
+ const uint8_t *end = arguments->block->location.end;
821
+ if (arguments->closing_loc.start != NULL && arguments->closing_loc.end > end) {
822
+ end = arguments->closing_loc.end;
823
+ }
824
+ return end;
825
+ }
826
+ if (arguments->closing_loc.start != NULL) {
827
+ return arguments->closing_loc.end;
828
+ }
829
+ if (arguments->arguments != NULL) {
830
+ return arguments->arguments->base.location.end;
831
+ }
832
+ return arguments->closing_loc.end;
833
+ }
834
+
683
835
  /**
684
836
  * Check that we're not about to attempt to attach a brace block to a call that
685
837
  * has arguments without parentheses.
@@ -894,6 +1046,8 @@ pm_alternation_pattern_node_create(pm_parser_t *parser, pm_node_t *left, pm_node
894
1046
  */
895
1047
  static pm_and_node_t *
896
1048
  pm_and_node_create(pm_parser_t *parser, pm_node_t *left, const pm_token_t *operator, pm_node_t *right) {
1049
+ pm_assert_value_expression(parser, left);
1050
+
897
1051
  pm_and_node_t *node = PM_ALLOC_NODE(parser, pm_and_node_t);
898
1052
 
899
1053
  *node = (pm_and_node_t) {
@@ -1488,14 +1642,12 @@ pm_call_node_create(pm_parser_t *parser) {
1488
1642
  */
1489
1643
  static pm_call_node_t *
1490
1644
  pm_call_node_aref_create(pm_parser_t *parser, pm_node_t *receiver, pm_arguments_t *arguments) {
1645
+ pm_assert_value_expression(parser, receiver);
1646
+
1491
1647
  pm_call_node_t *node = pm_call_node_create(parser);
1492
1648
 
1493
1649
  node->base.location.start = receiver->location.start;
1494
- if (arguments->block != NULL) {
1495
- node->base.location.end = arguments->block->location.end;
1496
- } else {
1497
- node->base.location.end = arguments->closing_loc.end;
1498
- }
1650
+ node->base.location.end = pm_arguments_end(arguments);
1499
1651
 
1500
1652
  node->receiver = receiver;
1501
1653
  node->message_loc.start = arguments->opening_loc.start;
@@ -1515,6 +1667,9 @@ pm_call_node_aref_create(pm_parser_t *parser, pm_node_t *receiver, pm_arguments_
1515
1667
  */
1516
1668
  static pm_call_node_t *
1517
1669
  pm_call_node_binary_create(pm_parser_t *parser, pm_node_t *receiver, pm_token_t *operator, pm_node_t *argument) {
1670
+ pm_assert_value_expression(parser, receiver);
1671
+ pm_assert_value_expression(parser, argument);
1672
+
1518
1673
  pm_call_node_t *node = pm_call_node_create(parser);
1519
1674
 
1520
1675
  node->base.location.start = MIN(receiver->location.start, argument->location.start);
@@ -1536,18 +1691,16 @@ pm_call_node_binary_create(pm_parser_t *parser, pm_node_t *receiver, pm_token_t
1536
1691
  */
1537
1692
  static pm_call_node_t *
1538
1693
  pm_call_node_call_create(pm_parser_t *parser, pm_node_t *receiver, pm_token_t *operator, pm_token_t *message, pm_arguments_t *arguments) {
1694
+ pm_assert_value_expression(parser, receiver);
1695
+
1539
1696
  pm_call_node_t *node = pm_call_node_create(parser);
1540
1697
 
1541
1698
  node->base.location.start = receiver->location.start;
1542
- if (arguments->block != NULL) {
1543
- node->base.location.end = arguments->block->location.end;
1544
- } else if (arguments->closing_loc.start != NULL) {
1545
- node->base.location.end = arguments->closing_loc.end;
1546
- } else if (arguments->arguments != NULL) {
1547
- node->base.location.end = arguments->arguments->base.location.end;
1548
- } else {
1549
- node->base.location.end = message->end;
1699
+ const uint8_t *end = pm_arguments_end(arguments);
1700
+ if (end == NULL) {
1701
+ end = message->end;
1550
1702
  }
1703
+ node->base.location.end = end;
1551
1704
 
1552
1705
  node->receiver = receiver;
1553
1706
  node->call_operator_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(operator);
@@ -1574,15 +1727,7 @@ pm_call_node_fcall_create(pm_parser_t *parser, pm_token_t *message, pm_arguments
1574
1727
  pm_call_node_t *node = pm_call_node_create(parser);
1575
1728
 
1576
1729
  node->base.location.start = message->start;
1577
- if (arguments->block != NULL) {
1578
- node->base.location.end = arguments->block->location.end;
1579
- } else if (arguments->closing_loc.start != NULL) {
1580
- node->base.location.end = arguments->closing_loc.end;
1581
- } else if (arguments->arguments != NULL) {
1582
- node->base.location.end = arguments->arguments->base.location.end;
1583
- } else {
1584
- node->base.location.end = arguments->closing_loc.end;
1585
- }
1730
+ node->base.location.end = pm_arguments_end(arguments);
1586
1731
 
1587
1732
  node->message_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(message);
1588
1733
  node->opening_loc = arguments->opening_loc;
@@ -1599,6 +1744,8 @@ pm_call_node_fcall_create(pm_parser_t *parser, pm_token_t *message, pm_arguments
1599
1744
  */
1600
1745
  static pm_call_node_t *
1601
1746
  pm_call_node_not_create(pm_parser_t *parser, pm_node_t *receiver, pm_token_t *message, pm_arguments_t *arguments) {
1747
+ pm_assert_value_expression(parser, receiver);
1748
+
1602
1749
  pm_call_node_t *node = pm_call_node_create(parser);
1603
1750
 
1604
1751
  node->base.location.start = message->start;
@@ -1623,14 +1770,12 @@ pm_call_node_not_create(pm_parser_t *parser, pm_node_t *receiver, pm_token_t *me
1623
1770
  */
1624
1771
  static pm_call_node_t *
1625
1772
  pm_call_node_shorthand_create(pm_parser_t *parser, pm_node_t *receiver, pm_token_t *operator, pm_arguments_t *arguments) {
1773
+ pm_assert_value_expression(parser, receiver);
1774
+
1626
1775
  pm_call_node_t *node = pm_call_node_create(parser);
1627
1776
 
1628
1777
  node->base.location.start = receiver->location.start;
1629
- if (arguments->block != NULL) {
1630
- node->base.location.end = arguments->block->location.end;
1631
- } else {
1632
- node->base.location.end = arguments->closing_loc.end;
1633
- }
1778
+ node->base.location.end = pm_arguments_end(arguments);
1634
1779
 
1635
1780
  node->receiver = receiver;
1636
1781
  node->call_operator_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(operator);
@@ -1652,6 +1797,8 @@ pm_call_node_shorthand_create(pm_parser_t *parser, pm_node_t *receiver, pm_token
1652
1797
  */
1653
1798
  static pm_call_node_t *
1654
1799
  pm_call_node_unary_create(pm_parser_t *parser, pm_token_t *operator, pm_node_t *receiver, const char *name) {
1800
+ pm_assert_value_expression(parser, receiver);
1801
+
1655
1802
  pm_call_node_t *node = pm_call_node_create(parser);
1656
1803
 
1657
1804
  node->base.location.start = operator->start;
@@ -1981,7 +2128,7 @@ pm_capture_pattern_node_create(pm_parser_t *parser, pm_node_t *value, pm_node_t
1981
2128
  * Allocate and initialize a new CaseNode node.
1982
2129
  */
1983
2130
  static pm_case_node_t *
1984
- pm_case_node_create(pm_parser_t *parser, const pm_token_t *case_keyword, pm_node_t *predicate, pm_else_node_t *consequent, const pm_token_t *end_keyword) {
2131
+ pm_case_node_create(pm_parser_t *parser, const pm_token_t *case_keyword, pm_node_t *predicate, const pm_token_t *end_keyword) {
1985
2132
  pm_case_node_t *node = PM_ALLOC_NODE(parser, pm_case_node_t);
1986
2133
 
1987
2134
  *node = (pm_case_node_t) {
@@ -1993,7 +2140,7 @@ pm_case_node_create(pm_parser_t *parser, const pm_token_t *case_keyword, pm_node
1993
2140
  },
1994
2141
  },
1995
2142
  .predicate = predicate,
1996
- .consequent = consequent,
2143
+ .consequent = NULL,
1997
2144
  .case_keyword_loc = PM_LOCATION_TOKEN_VALUE(case_keyword),
1998
2145
  .end_keyword_loc = PM_LOCATION_TOKEN_VALUE(end_keyword),
1999
2146
  .conditions = { 0 }
@@ -2007,7 +2154,7 @@ pm_case_node_create(pm_parser_t *parser, const pm_token_t *case_keyword, pm_node
2007
2154
  */
2008
2155
  static void
2009
2156
  pm_case_node_condition_append(pm_case_node_t *node, pm_node_t *condition) {
2010
- assert(PM_NODE_TYPE_P(condition, PM_WHEN_NODE) || PM_NODE_TYPE_P(condition, PM_IN_NODE));
2157
+ assert(PM_NODE_TYPE_P(condition, PM_WHEN_NODE));
2011
2158
 
2012
2159
  pm_node_list_append(&node->conditions, condition);
2013
2160
  node->base.location.end = condition->location.end;
@@ -2031,6 +2178,60 @@ pm_case_node_end_keyword_loc_set(pm_case_node_t *node, const pm_token_t *end_key
2031
2178
  node->end_keyword_loc = PM_LOCATION_TOKEN_VALUE(end_keyword);
2032
2179
  }
2033
2180
 
2181
+ /**
2182
+ * Allocate and initialize a new CaseMatchNode node.
2183
+ */
2184
+ static pm_case_match_node_t *
2185
+ pm_case_match_node_create(pm_parser_t *parser, const pm_token_t *case_keyword, pm_node_t *predicate, const pm_token_t *end_keyword) {
2186
+ pm_case_match_node_t *node = PM_ALLOC_NODE(parser, pm_case_match_node_t);
2187
+
2188
+ *node = (pm_case_match_node_t) {
2189
+ {
2190
+ .type = PM_CASE_MATCH_NODE,
2191
+ .location = {
2192
+ .start = case_keyword->start,
2193
+ .end = end_keyword->end
2194
+ },
2195
+ },
2196
+ .predicate = predicate,
2197
+ .consequent = NULL,
2198
+ .case_keyword_loc = PM_LOCATION_TOKEN_VALUE(case_keyword),
2199
+ .end_keyword_loc = PM_LOCATION_TOKEN_VALUE(end_keyword),
2200
+ .conditions = { 0 }
2201
+ };
2202
+
2203
+ return node;
2204
+ }
2205
+
2206
+ /**
2207
+ * Append a new condition to a CaseMatchNode node.
2208
+ */
2209
+ static void
2210
+ pm_case_match_node_condition_append(pm_case_match_node_t *node, pm_node_t *condition) {
2211
+ assert(PM_NODE_TYPE_P(condition, PM_IN_NODE));
2212
+
2213
+ pm_node_list_append(&node->conditions, condition);
2214
+ node->base.location.end = condition->location.end;
2215
+ }
2216
+
2217
+ /**
2218
+ * Set the consequent of a CaseMatchNode node.
2219
+ */
2220
+ static void
2221
+ pm_case_match_node_consequent_set(pm_case_match_node_t *node, pm_else_node_t *consequent) {
2222
+ node->consequent = consequent;
2223
+ node->base.location.end = consequent->base.location.end;
2224
+ }
2225
+
2226
+ /**
2227
+ * Set the end location for a CaseMatchNode node.
2228
+ */
2229
+ static void
2230
+ pm_case_match_node_end_keyword_loc_set(pm_case_match_node_t *node, const pm_token_t *end_keyword) {
2231
+ node->base.location.end = end_keyword->end;
2232
+ node->end_keyword_loc = PM_LOCATION_TOKEN_VALUE(end_keyword);
2233
+ }
2234
+
2034
2235
  /**
2035
2236
  * Allocate a new ClassNode node.
2036
2237
  */
@@ -3075,6 +3276,7 @@ static pm_if_node_t *
3075
3276
  pm_if_node_create(pm_parser_t *parser,
3076
3277
  const pm_token_t *if_keyword,
3077
3278
  pm_node_t *predicate,
3279
+ const pm_token_t *then_keyword,
3078
3280
  pm_statements_node_t *statements,
3079
3281
  pm_node_t *consequent,
3080
3282
  const pm_token_t *end_keyword
@@ -3104,6 +3306,7 @@ pm_if_node_create(pm_parser_t *parser,
3104
3306
  },
3105
3307
  .if_keyword_loc = PM_LOCATION_TOKEN_VALUE(if_keyword),
3106
3308
  .predicate = predicate,
3309
+ .then_keyword_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(then_keyword),
3107
3310
  .statements = statements,
3108
3311
  .consequent = consequent,
3109
3312
  .end_keyword_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(end_keyword)
@@ -3134,6 +3337,7 @@ pm_if_node_modifier_create(pm_parser_t *parser, pm_node_t *statement, const pm_t
3134
3337
  },
3135
3338
  .if_keyword_loc = PM_LOCATION_TOKEN_VALUE(if_keyword),
3136
3339
  .predicate = predicate,
3340
+ .then_keyword_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE,
3137
3341
  .statements = statements,
3138
3342
  .consequent = NULL,
3139
3343
  .end_keyword_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE
@@ -3146,7 +3350,8 @@ pm_if_node_modifier_create(pm_parser_t *parser, pm_node_t *statement, const pm_t
3146
3350
  * Allocate and initialize an if node from a ternary expression.
3147
3351
  */
3148
3352
  static pm_if_node_t *
3149
- pm_if_node_ternary_create(pm_parser_t *parser, pm_node_t *predicate, pm_node_t *true_expression, const pm_token_t *colon, pm_node_t *false_expression) {
3353
+ pm_if_node_ternary_create(pm_parser_t *parser, pm_node_t *predicate, const pm_token_t *qmark, pm_node_t *true_expression, const pm_token_t *colon, pm_node_t *false_expression) {
3354
+ pm_assert_value_expression(parser, predicate);
3150
3355
  pm_conditional_predicate(predicate);
3151
3356
 
3152
3357
  pm_statements_node_t *if_statements = pm_statements_node_create(parser);
@@ -3171,6 +3376,7 @@ pm_if_node_ternary_create(pm_parser_t *parser, pm_node_t *predicate, pm_node_t *
3171
3376
  },
3172
3377
  .if_keyword_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE,
3173
3378
  .predicate = predicate,
3379
+ .then_keyword_loc = PM_LOCATION_TOKEN_VALUE(qmark),
3174
3380
  .statements = if_statements,
3175
3381
  .consequent = (pm_node_t *)else_node,
3176
3382
  .end_keyword_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE
@@ -3871,39 +4077,68 @@ pm_local_variable_write_node_create(pm_parser_t *parser, pm_constant_id_t name,
3871
4077
  return node;
3872
4078
  }
3873
4079
 
4080
+ /**
4081
+ * Returns true if the given bounds comprise a numbered parameter (i.e., they
4082
+ * are of the form /^_\d$/).
4083
+ */
3874
4084
  static inline bool
3875
- token_is_numbered_parameter(const uint8_t *start, const uint8_t *end) {
4085
+ pm_token_is_numbered_parameter(const uint8_t *start, const uint8_t *end) {
3876
4086
  return (end - start == 2) && (start[0] == '_') && (start[1] != '0') && (pm_char_is_decimal_digit(start[1]));
3877
4087
  }
3878
4088
 
3879
4089
  /**
3880
- * Allocate and initialize a new LocalVariableTargetNode node.
4090
+ * Ensure the given bounds do not comprise a numbered parameter. If they do, add
4091
+ * an appropriate error message to the parser.
4092
+ */
4093
+ static inline void
4094
+ pm_refute_numbered_parameter(pm_parser_t *parser, const uint8_t *start, const uint8_t *end) {
4095
+ if (pm_token_is_numbered_parameter(start, end)) {
4096
+ PM_PARSER_ERR_FORMAT(parser, start, end, PM_ERR_PARAMETER_NUMBERED_RESERVED, start);
4097
+ }
4098
+ }
4099
+
4100
+ /**
4101
+ * Allocate and initialize a new LocalVariableTargetNode node with the given
4102
+ * name and depth.
3881
4103
  */
3882
4104
  static pm_local_variable_target_node_t *
3883
- pm_local_variable_target_node_create(pm_parser_t *parser, const pm_token_t *name) {
4105
+ pm_local_variable_target_node_create_values(pm_parser_t *parser, const pm_location_t *location, pm_constant_id_t name, uint32_t depth) {
3884
4106
  pm_local_variable_target_node_t *node = PM_ALLOC_NODE(parser, pm_local_variable_target_node_t);
3885
4107
 
3886
- if (token_is_numbered_parameter(name->start, name->end)) {
3887
- pm_parser_err_token(parser, name, PM_ERR_PARAMETER_NUMBERED_RESERVED);
3888
- }
3889
-
3890
4108
  *node = (pm_local_variable_target_node_t) {
3891
4109
  {
3892
4110
  .type = PM_LOCAL_VARIABLE_TARGET_NODE,
3893
- .location = PM_LOCATION_TOKEN_VALUE(name)
4111
+ .location = *location
3894
4112
  },
3895
- .name = pm_parser_constant_id_token(parser, name),
3896
- .depth = 0
4113
+ .name = name,
4114
+ .depth = depth
3897
4115
  };
3898
4116
 
3899
4117
  return node;
3900
4118
  }
3901
4119
 
4120
+ /**
4121
+ * Allocate and initialize a new LocalVariableTargetNode node.
4122
+ */
4123
+ static pm_local_variable_target_node_t *
4124
+ pm_local_variable_target_node_create(pm_parser_t *parser, const pm_token_t *name) {
4125
+ pm_refute_numbered_parameter(parser, name->start, name->end);
4126
+
4127
+ return pm_local_variable_target_node_create_values(
4128
+ parser,
4129
+ &(pm_location_t) { .start = name->start, .end = name->end },
4130
+ pm_parser_constant_id_token(parser, name),
4131
+ 0
4132
+ );
4133
+ }
4134
+
3902
4135
  /**
3903
4136
  * Allocate and initialize a new MatchPredicateNode node.
3904
4137
  */
3905
4138
  static pm_match_predicate_node_t *
3906
4139
  pm_match_predicate_node_create(pm_parser_t *parser, pm_node_t *value, pm_node_t *pattern, const pm_token_t *operator) {
4140
+ pm_assert_value_expression(parser, value);
4141
+
3907
4142
  pm_match_predicate_node_t *node = PM_ALLOC_NODE(parser, pm_match_predicate_node_t);
3908
4143
 
3909
4144
  *node = (pm_match_predicate_node_t) {
@@ -3927,6 +4162,8 @@ pm_match_predicate_node_create(pm_parser_t *parser, pm_node_t *value, pm_node_t
3927
4162
  */
3928
4163
  static pm_match_required_node_t *
3929
4164
  pm_match_required_node_create(pm_parser_t *parser, pm_node_t *value, pm_node_t *pattern, const pm_token_t *operator) {
4165
+ pm_assert_value_expression(parser, value);
4166
+
3930
4167
  pm_match_required_node_t *node = PM_ALLOC_NODE(parser, pm_match_required_node_t);
3931
4168
 
3932
4169
  *node = (pm_match_required_node_t) {
@@ -3957,10 +4194,10 @@ pm_match_write_node_create(pm_parser_t *parser, pm_call_node_t *call) {
3957
4194
  .type = PM_MATCH_WRITE_NODE,
3958
4195
  .location = call->base.location
3959
4196
  },
3960
- .call = call
4197
+ .call = call,
4198
+ .targets = { 0 }
3961
4199
  };
3962
4200
 
3963
- pm_constant_id_list_init(&node->locals);
3964
4201
  return node;
3965
4202
  }
3966
4203
 
@@ -4200,6 +4437,8 @@ pm_optional_parameter_node_create(pm_parser_t *parser, const pm_token_t *name, c
4200
4437
  */
4201
4438
  static pm_or_node_t *
4202
4439
  pm_or_node_create(pm_parser_t *parser, pm_node_t *left, const pm_token_t *operator, pm_node_t *right) {
4440
+ pm_assert_value_expression(parser, left);
4441
+
4203
4442
  pm_or_node_t *node = PM_ALLOC_NODE(parser, pm_or_node_t);
4204
4443
 
4205
4444
  *node = (pm_or_node_t) {
@@ -4470,6 +4709,9 @@ pm_pre_execution_node_create(pm_parser_t *parser, const pm_token_t *keyword, con
4470
4709
  */
4471
4710
  static pm_range_node_t *
4472
4711
  pm_range_node_create(pm_parser_t *parser, pm_node_t *left, const pm_token_t *operator, pm_node_t *right) {
4712
+ pm_assert_value_expression(parser, left);
4713
+ pm_assert_value_expression(parser, right);
4714
+
4473
4715
  pm_range_node_t *node = PM_ALLOC_NODE(parser, pm_range_node_t);
4474
4716
  pm_node_flags_t flags = 0;
4475
4717
 
@@ -4885,28 +5127,6 @@ pm_statements_node_body_append(pm_statements_node_t *node, pm_node_t *statement)
4885
5127
  statement->flags |= PM_NODE_FLAG_NEWLINE;
4886
5128
  }
4887
5129
 
4888
- /**
4889
- * Allocate a new StringConcatNode node.
4890
- */
4891
- static pm_string_concat_node_t *
4892
- pm_string_concat_node_create(pm_parser_t *parser, pm_node_t *left, pm_node_t *right) {
4893
- pm_string_concat_node_t *node = PM_ALLOC_NODE(parser, pm_string_concat_node_t);
4894
-
4895
- *node = (pm_string_concat_node_t) {
4896
- {
4897
- .type = PM_STRING_CONCAT_NODE,
4898
- .location = {
4899
- .start = left->location.start,
4900
- .end = right->location.end
4901
- }
4902
- },
4903
- .left = left,
4904
- .right = right
4905
- };
4906
-
4907
- return node;
4908
- }
4909
-
4910
5130
  /**
4911
5131
  * Allocate a new StringNode node with the current string on the parser.
4912
5132
  */
@@ -4964,16 +5184,9 @@ pm_super_node_create(pm_parser_t *parser, const pm_token_t *keyword, pm_argument
4964
5184
  assert(keyword->type == PM_TOKEN_KEYWORD_SUPER);
4965
5185
  pm_super_node_t *node = PM_ALLOC_NODE(parser, pm_super_node_t);
4966
5186
 
4967
- const uint8_t *end;
4968
- if (arguments->block != NULL) {
4969
- end = arguments->block->location.end;
4970
- } else if (arguments->closing_loc.start != NULL) {
4971
- end = arguments->closing_loc.end;
4972
- } else if (arguments->arguments != NULL) {
4973
- end = arguments->arguments->base.location.end;
4974
- } else {
5187
+ const uint8_t *end = pm_arguments_end(arguments);
5188
+ if (end == NULL) {
4975
5189
  assert(false && "unreachable");
4976
- end = NULL;
4977
5190
  }
4978
5191
 
4979
5192
  *node = (pm_super_node_t) {
@@ -5207,7 +5420,7 @@ pm_undef_node_append(pm_undef_node_t *node, pm_node_t *name) {
5207
5420
  * Allocate a new UnlessNode node.
5208
5421
  */
5209
5422
  static pm_unless_node_t *
5210
- pm_unless_node_create(pm_parser_t *parser, const pm_token_t *keyword, pm_node_t *predicate, pm_statements_node_t *statements) {
5423
+ pm_unless_node_create(pm_parser_t *parser, const pm_token_t *keyword, pm_node_t *predicate, const pm_token_t *then_keyword, pm_statements_node_t *statements) {
5211
5424
  pm_conditional_predicate(predicate);
5212
5425
  pm_unless_node_t *node = PM_ALLOC_NODE(parser, pm_unless_node_t);
5213
5426
 
@@ -5229,6 +5442,7 @@ pm_unless_node_create(pm_parser_t *parser, const pm_token_t *keyword, pm_node_t
5229
5442
  },
5230
5443
  .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword),
5231
5444
  .predicate = predicate,
5445
+ .then_keyword_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(then_keyword),
5232
5446
  .statements = statements,
5233
5447
  .consequent = NULL,
5234
5448
  .end_keyword_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE
@@ -5259,6 +5473,7 @@ pm_unless_node_modifier_create(pm_parser_t *parser, pm_node_t *statement, const
5259
5473
  },
5260
5474
  .keyword_loc = PM_LOCATION_TOKEN_VALUE(unless_keyword),
5261
5475
  .predicate = predicate,
5476
+ .then_keyword_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE,
5262
5477
  .statements = statements,
5263
5478
  .consequent = NULL,
5264
5479
  .end_keyword_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE
@@ -5535,17 +5750,16 @@ pm_parser_scope_push_transparent(pm_parser_t *parser) {
5535
5750
  }
5536
5751
 
5537
5752
  /**
5538
- * Check if the current scope has a given local variables.
5753
+ * Check if any of the currently visible scopes contain a local variable
5754
+ * described by the given constant id.
5539
5755
  */
5540
5756
  static int
5541
- pm_parser_local_depth(pm_parser_t *parser, pm_token_t *token) {
5542
- pm_constant_id_t constant_id = pm_parser_constant_id_token(parser, token);
5757
+ pm_parser_local_depth_constant_id(pm_parser_t *parser, pm_constant_id_t constant_id) {
5543
5758
  pm_scope_t *scope = parser->current_scope;
5544
5759
  int depth = 0;
5545
5760
 
5546
5761
  while (scope != NULL) {
5547
- if (!scope->transparent &&
5548
- pm_constant_id_list_includes(&scope->locals, constant_id)) return depth;
5762
+ if (!scope->transparent && pm_constant_id_list_includes(&scope->locals, constant_id)) return depth;
5549
5763
  if (scope->closed) break;
5550
5764
 
5551
5765
  scope = scope->previous;
@@ -5555,6 +5769,16 @@ pm_parser_local_depth(pm_parser_t *parser, pm_token_t *token) {
5555
5769
  return -1;
5556
5770
  }
5557
5771
 
5772
+ /**
5773
+ * Check if any of the currently visible scopes contain a local variable
5774
+ * described by the given token. This function implicitly inserts a constant
5775
+ * into the constant pool.
5776
+ */
5777
+ static inline int
5778
+ pm_parser_local_depth(pm_parser_t *parser, pm_token_t *token) {
5779
+ return pm_parser_local_depth_constant_id(parser, pm_parser_constant_id_token(parser, token));
5780
+ }
5781
+
5558
5782
  /**
5559
5783
  * Add a constant id to the local table of the current scope.
5560
5784
  */
@@ -5569,15 +5793,6 @@ pm_parser_local_add(pm_parser_t *parser, pm_constant_id_t constant_id) {
5569
5793
  }
5570
5794
  }
5571
5795
 
5572
- /**
5573
- * Add a local variable from a constant string to the current scope.
5574
- */
5575
- static inline void
5576
- pm_parser_local_add_constant(pm_parser_t *parser, const char *start, size_t length) {
5577
- pm_constant_id_t constant_id = pm_parser_constant_id_constant(parser, start, length);
5578
- if (constant_id != 0) pm_parser_local_add(parser, constant_id);
5579
- }
5580
-
5581
5796
  /**
5582
5797
  * Add a local variable from a location to the current scope.
5583
5798
  */
@@ -5614,12 +5829,10 @@ static void
5614
5829
  pm_parser_parameter_name_check(pm_parser_t *parser, const pm_token_t *name) {
5615
5830
  // We want to check whether the parameter name is a numbered parameter or
5616
5831
  // not.
5617
- if (token_is_numbered_parameter(name->start, name->end)) {
5618
- pm_parser_err_token(parser, name, PM_ERR_PARAMETER_NUMBERED_RESERVED);
5619
- }
5832
+ pm_refute_numbered_parameter(parser, name->start, name->end);
5620
5833
 
5621
5834
  // We want to ignore any parameter name that starts with an underscore.
5622
- if ((*name->start == '_')) return;
5835
+ if ((name->start < name->end) && (*name->start == '_')) return;
5623
5836
 
5624
5837
  // Otherwise we'll fetch the constant id for the parameter name and check
5625
5838
  // whether it's already in the current scope.
@@ -5655,7 +5868,16 @@ pm_parser_scope_pop(pm_parser_t *parser) {
5655
5868
  static inline size_t
5656
5869
  char_is_identifier_start(pm_parser_t *parser, const uint8_t *b) {
5657
5870
  if (parser->encoding_changed) {
5658
- return parser->encoding.alpha_char(b, parser->end - b) || (*b == '_') || (*b >= 0x80);
5871
+ size_t width;
5872
+ if ((width = parser->encoding.alpha_char(b, parser->end - b)) != 0) {
5873
+ return width;
5874
+ } else if (*b == '_') {
5875
+ return 1;
5876
+ } else if (*b >= 0x80) {
5877
+ return parser->encoding.char_width(b, parser->end - b);
5878
+ } else {
5879
+ return 0;
5880
+ }
5659
5881
  } else if (*b < 0x80) {
5660
5882
  return (pm_encoding_unicode_table[*b] & PRISM_ENCODING_ALPHABETIC_BIT ? 1 : 0) || (*b == '_');
5661
5883
  } else {
@@ -5671,7 +5893,16 @@ char_is_identifier_start(pm_parser_t *parser, const uint8_t *b) {
5671
5893
  static inline size_t
5672
5894
  char_is_identifier(pm_parser_t *parser, const uint8_t *b) {
5673
5895
  if (parser->encoding_changed) {
5674
- return parser->encoding.alnum_char(b, parser->end - b) || (*b == '_') || (*b >= 0x80);
5896
+ size_t width;
5897
+ if ((width = parser->encoding.alnum_char(b, parser->end - b)) != 0) {
5898
+ return width;
5899
+ } else if (*b == '_') {
5900
+ return 1;
5901
+ } else if (*b >= 0x80) {
5902
+ return parser->encoding.char_width(b, parser->end - b);
5903
+ } else {
5904
+ return 0;
5905
+ }
5675
5906
  } else if (*b < 0x80) {
5676
5907
  return (pm_encoding_unicode_table[*b] & PRISM_ENCODING_ALPHANUMERIC_BIT ? 1 : 0) || (*b == '_');
5677
5908
  } else {
@@ -5866,10 +6097,18 @@ parser_lex_magic_comment_encoding_value(pm_parser_t *parser, const uint8_t *star
5866
6097
  }
5867
6098
 
5868
6099
  // Next, we're going to check for UTF-8. This is the most common encoding.
5869
- // Extensions like utf-8 can contain extra encoding details like,
5870
- // utf-8-dos, utf-8-linux, utf-8-mac. We treat these all as utf-8 should
5871
- // treat any encoding starting utf-8 as utf-8.
6100
+ // utf-8 can contain extra information at the end about the platform it is
6101
+ // encoded on, such as utf-8-mac or utf-8-unix. We'll ignore those suffixes.
5872
6102
  if ((start + 5 <= end) && (pm_strncasecmp(start, (const uint8_t *) "utf-8", 5) == 0)) {
6103
+ // We need to explicitly handle utf-8-hfs, as that one needs to switch
6104
+ // over to being utf8-mac.
6105
+ if (width == 9 && (pm_strncasecmp(start + 5, (const uint8_t *) "-hfs", 4) == 0)) {
6106
+ parser->encoding = pm_encoding_utf8_mac;
6107
+ parser->encoding_changed = true;
6108
+ if (parser->encoding_changed_callback != NULL) parser->encoding_changed_callback(parser);
6109
+ return true;
6110
+ }
6111
+
5873
6112
  // We don't need to do anything here because the default encoding is
5874
6113
  // already UTF-8. We'll just return.
5875
6114
  return true;
@@ -5877,51 +6116,160 @@ parser_lex_magic_comment_encoding_value(pm_parser_t *parser, const uint8_t *star
5877
6116
 
5878
6117
  // Next, we're going to loop through each of the encodings that we handle
5879
6118
  // explicitly. If we found one that we understand, we'll use that value.
5880
- #define ENCODING(value, prebuilt) \
6119
+ #define ENCODING1(value, prebuilt) \
5881
6120
  if (width == sizeof(value) - 1 && start + width <= end && pm_strncasecmp(start, (const uint8_t *) value, width) == 0) { \
5882
6121
  parser->encoding = prebuilt; \
5883
- parser->encoding_changed |= true; \
6122
+ parser->encoding_changed = true; \
5884
6123
  if (parser->encoding_changed_callback != NULL) parser->encoding_changed_callback(parser); \
5885
6124
  return true; \
5886
6125
  }
5887
6126
 
5888
- // Check most common first. (This is pretty arbitrary.)
5889
- ENCODING("ascii", pm_encoding_ascii);
5890
- ENCODING("ascii-8bit", pm_encoding_ascii_8bit);
5891
- ENCODING("us-ascii", pm_encoding_ascii);
5892
- ENCODING("binary", pm_encoding_ascii_8bit);
5893
- ENCODING("shift_jis", pm_encoding_shift_jis);
5894
- ENCODING("euc-jp", pm_encoding_euc_jp);
5895
-
5896
- // Then check all the others.
5897
- ENCODING("big5", pm_encoding_big5);
5898
- ENCODING("gbk", pm_encoding_gbk);
5899
- ENCODING("iso-8859-1", pm_encoding_iso_8859_1);
5900
- ENCODING("iso-8859-2", pm_encoding_iso_8859_2);
5901
- ENCODING("iso-8859-3", pm_encoding_iso_8859_3);
5902
- ENCODING("iso-8859-4", pm_encoding_iso_8859_4);
5903
- ENCODING("iso-8859-5", pm_encoding_iso_8859_5);
5904
- ENCODING("iso-8859-6", pm_encoding_iso_8859_6);
5905
- ENCODING("iso-8859-7", pm_encoding_iso_8859_7);
5906
- ENCODING("iso-8859-8", pm_encoding_iso_8859_8);
5907
- ENCODING("iso-8859-9", pm_encoding_iso_8859_9);
5908
- ENCODING("iso-8859-10", pm_encoding_iso_8859_10);
5909
- ENCODING("iso-8859-11", pm_encoding_iso_8859_11);
5910
- ENCODING("iso-8859-13", pm_encoding_iso_8859_13);
5911
- ENCODING("iso-8859-14", pm_encoding_iso_8859_14);
5912
- ENCODING("iso-8859-15", pm_encoding_iso_8859_15);
5913
- ENCODING("iso-8859-16", pm_encoding_iso_8859_16);
5914
- ENCODING("koi8-r", pm_encoding_koi8_r);
5915
- ENCODING("windows-31j", pm_encoding_windows_31j);
5916
- ENCODING("windows-1251", pm_encoding_windows_1251);
5917
- ENCODING("windows-1252", pm_encoding_windows_1252);
5918
- ENCODING("cp1251", pm_encoding_windows_1251);
5919
- ENCODING("cp1252", pm_encoding_windows_1252);
5920
- ENCODING("cp932", pm_encoding_windows_31j);
5921
- ENCODING("sjis", pm_encoding_windows_31j);
5922
- ENCODING("utf8-mac", pm_encoding_utf8_mac);
5923
-
5924
- #undef ENCODING
6127
+ // A convenience macros for comparing two aliases for the same encoding.
6128
+ #define ENCODING2(value1, value2, prebuilt) ENCODING1(value1, prebuilt) ENCODING1(value2, prebuilt)
6129
+
6130
+ if (width >= 3) {
6131
+ switch (*start) {
6132
+ case 'A': case 'a':
6133
+ ENCODING1("ASCII", pm_encoding_ascii);
6134
+ ENCODING1("ASCII-8BIT", pm_encoding_ascii_8bit);
6135
+ ENCODING1("ANSI_X3.4-1968", pm_encoding_ascii);
6136
+ break;
6137
+ case 'B': case 'b':
6138
+ ENCODING1("BINARY", pm_encoding_ascii_8bit);
6139
+ ENCODING1("Big5", pm_encoding_big5);
6140
+ ENCODING1("Big5-HKSCS", pm_encoding_big5_hkscs);
6141
+ ENCODING1("Big5-UAO", pm_encoding_big5_uao);
6142
+ break;
6143
+ case 'C': case 'c':
6144
+ ENCODING1("CP437", pm_encoding_ibm437);
6145
+ ENCODING1("CP720", pm_encoding_ibm720);
6146
+ ENCODING1("CP737", pm_encoding_ibm737);
6147
+ ENCODING1("CP775", pm_encoding_ibm775);
6148
+ ENCODING1("CP850", pm_encoding_cp850);
6149
+ ENCODING1("CP852", pm_encoding_cp852);
6150
+ ENCODING1("CP855", pm_encoding_cp855);
6151
+ ENCODING1("CP857", pm_encoding_ibm857);
6152
+ ENCODING1("CP860", pm_encoding_ibm860);
6153
+ ENCODING1("CP861", pm_encoding_ibm861);
6154
+ ENCODING1("CP862", pm_encoding_ibm862);
6155
+ ENCODING1("CP864", pm_encoding_ibm864);
6156
+ ENCODING1("CP865", pm_encoding_ibm865);
6157
+ ENCODING1("CP866", pm_encoding_ibm866);
6158
+ ENCODING1("CP869", pm_encoding_ibm869);
6159
+ ENCODING1("CP874", pm_encoding_windows_874);
6160
+ ENCODING1("CP878", pm_encoding_koi8_r);
6161
+ ENCODING1("CP863", pm_encoding_ibm863);
6162
+ ENCODING2("CP932", "csWindows31J", pm_encoding_windows_31j);
6163
+ ENCODING1("CP936", pm_encoding_gbk);
6164
+ ENCODING1("CP1250", pm_encoding_windows_1250);
6165
+ ENCODING1("CP1251", pm_encoding_windows_1251);
6166
+ ENCODING1("CP1252", pm_encoding_windows_1252);
6167
+ ENCODING1("CP1253", pm_encoding_windows_1253);
6168
+ ENCODING1("CP1254", pm_encoding_windows_1254);
6169
+ ENCODING1("CP1255", pm_encoding_windows_1255);
6170
+ ENCODING1("CP1256", pm_encoding_windows_1256);
6171
+ ENCODING1("CP1257", pm_encoding_windows_1257);
6172
+ ENCODING1("CP1258", pm_encoding_windows_1258);
6173
+ ENCODING1("CP51932", pm_encoding_cp51932);
6174
+ ENCODING1("CP65001", pm_encoding_utf_8);
6175
+ break;
6176
+ case 'E': case 'e':
6177
+ ENCODING2("EUC-JP", "eucJP", pm_encoding_euc_jp);
6178
+ ENCODING1("external", pm_encoding_utf_8);
6179
+ break;
6180
+ case 'F': case 'f':
6181
+ ENCODING1("filesystem", pm_encoding_utf_8);
6182
+ break;
6183
+ case 'G': case 'g':
6184
+ ENCODING1("GB1988", pm_encoding_gb1988);
6185
+ ENCODING1("GBK", pm_encoding_gbk);
6186
+ break;
6187
+ case 'I': case 'i':
6188
+ ENCODING1("IBM437", pm_encoding_ibm437);
6189
+ ENCODING1("IBM720", pm_encoding_ibm720);
6190
+ ENCODING1("IBM737", pm_encoding_ibm737);
6191
+ ENCODING1("IBM775", pm_encoding_ibm775);
6192
+ ENCODING1("IBM850", pm_encoding_cp850);
6193
+ ENCODING1("IBM852", pm_encoding_ibm852);
6194
+ ENCODING1("IBM855", pm_encoding_ibm855);
6195
+ ENCODING1("IBM857", pm_encoding_ibm857);
6196
+ ENCODING1("IBM860", pm_encoding_ibm860);
6197
+ ENCODING1("IBM861", pm_encoding_ibm861);
6198
+ ENCODING1("IBM862", pm_encoding_ibm862);
6199
+ ENCODING1("IBM863", pm_encoding_ibm863);
6200
+ ENCODING1("IBM864", pm_encoding_ibm864);
6201
+ ENCODING1("IBM865", pm_encoding_ibm865);
6202
+ ENCODING1("IBM866", pm_encoding_ibm866);
6203
+ ENCODING1("IBM869", pm_encoding_ibm869);
6204
+ ENCODING2("ISO-8859-1", "ISO8859-1", pm_encoding_iso_8859_1);
6205
+ ENCODING2("ISO-8859-2", "ISO8859-2", pm_encoding_iso_8859_2);
6206
+ ENCODING2("ISO-8859-3", "ISO8859-3", pm_encoding_iso_8859_3);
6207
+ ENCODING2("ISO-8859-4", "ISO8859-4", pm_encoding_iso_8859_4);
6208
+ ENCODING2("ISO-8859-5", "ISO8859-5", pm_encoding_iso_8859_5);
6209
+ ENCODING2("ISO-8859-6", "ISO8859-6", pm_encoding_iso_8859_6);
6210
+ ENCODING2("ISO-8859-7", "ISO8859-7", pm_encoding_iso_8859_7);
6211
+ ENCODING2("ISO-8859-8", "ISO8859-8", pm_encoding_iso_8859_8);
6212
+ ENCODING2("ISO-8859-9", "ISO8859-9", pm_encoding_iso_8859_9);
6213
+ ENCODING2("ISO-8859-10", "ISO8859-10", pm_encoding_iso_8859_10);
6214
+ ENCODING2("ISO-8859-11", "ISO8859-11", pm_encoding_iso_8859_11);
6215
+ ENCODING2("ISO-8859-13", "ISO8859-13", pm_encoding_iso_8859_13);
6216
+ ENCODING2("ISO-8859-14", "ISO8859-14", pm_encoding_iso_8859_14);
6217
+ ENCODING2("ISO-8859-15", "ISO8859-15", pm_encoding_iso_8859_15);
6218
+ ENCODING2("ISO-8859-16", "ISO8859-16", pm_encoding_iso_8859_16);
6219
+ break;
6220
+ case 'K': case 'k':
6221
+ ENCODING1("KOI8-R", pm_encoding_koi8_r);
6222
+ break;
6223
+ case 'L': case 'l':
6224
+ ENCODING1("locale", pm_encoding_utf_8);
6225
+ break;
6226
+ case 'M': case 'm':
6227
+ ENCODING1("macCentEuro", pm_encoding_mac_cent_euro);
6228
+ ENCODING1("macCroatian", pm_encoding_mac_croatian);
6229
+ ENCODING1("macCyrillic", pm_encoding_mac_cyrillic);
6230
+ ENCODING1("macGreek", pm_encoding_mac_greek);
6231
+ ENCODING1("macIceland", pm_encoding_mac_iceland);
6232
+ ENCODING1("macRoman", pm_encoding_mac_roman);
6233
+ ENCODING1("macRomania", pm_encoding_mac_romania);
6234
+ ENCODING1("macThai", pm_encoding_mac_thai);
6235
+ ENCODING1("macTurkish", pm_encoding_mac_turkish);
6236
+ ENCODING1("macUkraine", pm_encoding_mac_ukraine);
6237
+ break;
6238
+ case 'P': case 'p':
6239
+ ENCODING1("PCK", pm_encoding_windows_31j);
6240
+ break;
6241
+ case 'S': case 's':
6242
+ ENCODING1("Shift_JIS", pm_encoding_shift_jis);
6243
+ ENCODING1("SJIS", pm_encoding_windows_31j);
6244
+ break;
6245
+ case 'T': case 't':
6246
+ ENCODING1("TIS-620", pm_encoding_tis_620);
6247
+ break;
6248
+ case 'U': case 'u':
6249
+ ENCODING1("US-ASCII", pm_encoding_ascii);
6250
+ ENCODING2("UTF8-MAC", "UTF-8-HFS", pm_encoding_utf8_mac);
6251
+ break;
6252
+ case 'W': case 'w':
6253
+ ENCODING1("Windows-31J", pm_encoding_windows_31j);
6254
+ ENCODING1("Windows-874", pm_encoding_windows_874);
6255
+ ENCODING1("Windows-1250", pm_encoding_windows_1250);
6256
+ ENCODING1("Windows-1251", pm_encoding_windows_1251);
6257
+ ENCODING1("Windows-1252", pm_encoding_windows_1252);
6258
+ ENCODING1("Windows-1253", pm_encoding_windows_1253);
6259
+ ENCODING1("Windows-1254", pm_encoding_windows_1254);
6260
+ ENCODING1("Windows-1255", pm_encoding_windows_1255);
6261
+ ENCODING1("Windows-1256", pm_encoding_windows_1256);
6262
+ ENCODING1("Windows-1257", pm_encoding_windows_1257);
6263
+ ENCODING1("Windows-1258", pm_encoding_windows_1258);
6264
+ break;
6265
+ case '6':
6266
+ ENCODING1("646", pm_encoding_ascii);
6267
+ break;
6268
+ }
6269
+ }
6270
+
6271
+ #undef ENCODING2
6272
+ #undef ENCODING1
5925
6273
 
5926
6274
  return false;
5927
6275
  }
@@ -6026,6 +6374,8 @@ parser_lex_magic_comment_emacs_marker(pm_parser_t *parser, const uint8_t *cursor
6026
6374
  */
6027
6375
  static inline bool
6028
6376
  parser_lex_magic_comment(pm_parser_t *parser, bool semantic_token_seen) {
6377
+ bool result = true;
6378
+
6029
6379
  const uint8_t *start = parser->current.start + 1;
6030
6380
  const uint8_t *end = parser->current.end;
6031
6381
  if (end - start <= 7) return false;
@@ -6123,7 +6473,7 @@ parser_lex_magic_comment(pm_parser_t *parser, bool semantic_token_seen) {
6123
6473
  (key_length == 8 && pm_strncasecmp(key_source, (const uint8_t *) "encoding", 8) == 0) ||
6124
6474
  (key_length == 6 && pm_strncasecmp(key_source, (const uint8_t *) "coding", 6) == 0)
6125
6475
  ) {
6126
- parser_lex_magic_comment_encoding_value(parser, value_start, value_end);
6476
+ result = parser_lex_magic_comment_encoding_value(parser, value_start, value_end);
6127
6477
  }
6128
6478
  }
6129
6479
 
@@ -6150,7 +6500,7 @@ parser_lex_magic_comment(pm_parser_t *parser, bool semantic_token_seen) {
6150
6500
  }
6151
6501
  }
6152
6502
 
6153
- return true;
6503
+ return result;
6154
6504
  }
6155
6505
 
6156
6506
  /******************************************************************************/
@@ -6324,7 +6674,7 @@ pm_strspn_hexadecimal_number_validate(pm_parser_t *parser, const uint8_t *string
6324
6674
  }
6325
6675
 
6326
6676
  static pm_token_type_t
6327
- lex_optional_float_suffix(pm_parser_t *parser) {
6677
+ lex_optional_float_suffix(pm_parser_t *parser, bool* seen_e) {
6328
6678
  pm_token_type_t type = PM_TOKEN_INTEGER;
6329
6679
 
6330
6680
  // Here we're going to attempt to parse the optional decimal portion of a
@@ -6345,8 +6695,9 @@ lex_optional_float_suffix(pm_parser_t *parser) {
6345
6695
  // float. If it's not there, it's okay and we'll just continue on.
6346
6696
  if (match(parser, 'e') || match(parser, 'E')) {
6347
6697
  (void) (match(parser, '+') || match(parser, '-'));
6698
+ *seen_e = true;
6348
6699
 
6349
- if (pm_char_is_decimal_digit(*parser->current.end)) {
6700
+ if (pm_char_is_decimal_digit(peek(parser))) {
6350
6701
  parser->current.end++;
6351
6702
  parser->current.end += pm_strspn_decimal_number_validate(parser, parser->current.end);
6352
6703
  type = PM_TOKEN_FLOAT;
@@ -6360,8 +6711,9 @@ lex_optional_float_suffix(pm_parser_t *parser) {
6360
6711
  }
6361
6712
 
6362
6713
  static pm_token_type_t
6363
- lex_numeric_prefix(pm_parser_t *parser) {
6714
+ lex_numeric_prefix(pm_parser_t *parser, bool* seen_e) {
6364
6715
  pm_token_type_t type = PM_TOKEN_INTEGER;
6716
+ *seen_e = false;
6365
6717
 
6366
6718
  if (peek_offset(parser, -1) == '0') {
6367
6719
  switch (*parser->current.end) {
@@ -6432,14 +6784,14 @@ lex_numeric_prefix(pm_parser_t *parser) {
6432
6784
 
6433
6785
  // 0.xxx is a float
6434
6786
  case '.': {
6435
- type = lex_optional_float_suffix(parser);
6787
+ type = lex_optional_float_suffix(parser, seen_e);
6436
6788
  break;
6437
6789
  }
6438
6790
 
6439
6791
  // 0exxx is a float
6440
6792
  case 'e':
6441
6793
  case 'E': {
6442
- type = lex_optional_float_suffix(parser);
6794
+ type = lex_optional_float_suffix(parser, seen_e);
6443
6795
  break;
6444
6796
  }
6445
6797
  }
@@ -6449,7 +6801,7 @@ lex_numeric_prefix(pm_parser_t *parser) {
6449
6801
  parser->current.end += pm_strspn_decimal_number_validate(parser, parser->current.end);
6450
6802
 
6451
6803
  // Afterward, we'll lex as far as we can into an optional float suffix.
6452
- type = lex_optional_float_suffix(parser);
6804
+ type = lex_optional_float_suffix(parser, seen_e);
6453
6805
  }
6454
6806
 
6455
6807
  return type;
@@ -6461,7 +6813,8 @@ lex_numeric(pm_parser_t *parser) {
6461
6813
  parser->integer_base = PM_INTEGER_BASE_FLAGS_DECIMAL;
6462
6814
 
6463
6815
  if (parser->current.end < parser->end) {
6464
- type = lex_numeric_prefix(parser);
6816
+ bool seen_e = false;
6817
+ type = lex_numeric_prefix(parser, &seen_e);
6465
6818
 
6466
6819
  const uint8_t *end = parser->current.end;
6467
6820
  pm_token_type_t suffix_type = type;
@@ -6477,7 +6830,7 @@ lex_numeric(pm_parser_t *parser) {
6477
6830
  suffix_type = PM_TOKEN_INTEGER_IMAGINARY;
6478
6831
  }
6479
6832
  } else {
6480
- if (match(parser, 'r')) {
6833
+ if (!seen_e && match(parser, 'r')) {
6481
6834
  suffix_type = PM_TOKEN_FLOAT_RATIONAL;
6482
6835
 
6483
6836
  if (match(parser, 'i')) {
@@ -6584,17 +6937,21 @@ lex_global_variable(pm_parser_t *parser) {
6584
6937
 
6585
6938
  /**
6586
6939
  * This function checks if the current token matches a keyword. If it does, it
6587
- * returns true. Otherwise, it returns false. The arguments are as follows:
6940
+ * returns the token type. Otherwise, it returns PM_TOKEN_EOF. The arguments are as follows:
6588
6941
  *
6942
+ * * `parser` - the parser object
6943
+ * * `current_start` - pointer to the start of the current token
6589
6944
  * * `value` - the literal string that we're checking for
6590
- * * `width` - the length of the token
6945
+ * * `vlen` - the length of the token
6591
6946
  * * `state` - the state that we should transition to if the token matches
6947
+ * * `type` - the expected token type
6948
+ * * `modifier_type` - the expected modifier token type
6592
6949
  */
6593
6950
  static inline pm_token_type_t
6594
- lex_keyword(pm_parser_t *parser, const char *value, size_t vlen, pm_lex_state_t state, pm_token_type_t type, pm_token_type_t modifier_type) {
6595
- pm_lex_state_t last_state = parser->lex_state;
6951
+ lex_keyword(pm_parser_t *parser, const uint8_t *current_start, const char *value, size_t vlen, pm_lex_state_t state, pm_token_type_t type, pm_token_type_t modifier_type) {
6952
+ if (memcmp(current_start, value, vlen) == 0) {
6953
+ pm_lex_state_t last_state = parser->lex_state;
6596
6954
 
6597
- if (parser->current.start + vlen <= parser->end && memcmp(parser->current.start, value, vlen) == 0) {
6598
6955
  if (parser->lex_state & PM_LEX_STATE_FNAME) {
6599
6956
  lex_state_set(parser, PM_LEX_STATE_ENDFN);
6600
6957
  } else {
@@ -6650,7 +7007,7 @@ lex_identifier(pm_parser_t *parser, bool previous_command_start) {
6650
7007
  }
6651
7008
 
6652
7009
  if (parser->lex_state != PM_LEX_STATE_DOT) {
6653
- if (width == 8 && (lex_keyword(parser, "defined?", width, PM_LEX_STATE_ARG, PM_TOKEN_KEYWORD_DEFINED, PM_TOKEN_EOF) != PM_TOKEN_EOF)) {
7010
+ if (width == 8 && (lex_keyword(parser, current_start, "defined?", width, PM_LEX_STATE_ARG, PM_TOKEN_KEYWORD_DEFINED, PM_TOKEN_EOF) != PM_TOKEN_EOF)) {
6654
7011
  return PM_TOKEN_KEYWORD_DEFINED;
6655
7012
  }
6656
7013
  }
@@ -6678,67 +7035,66 @@ lex_identifier(pm_parser_t *parser, bool previous_command_start) {
6678
7035
 
6679
7036
  if (parser->lex_state != PM_LEX_STATE_DOT) {
6680
7037
  pm_token_type_t type;
6681
-
6682
7038
  switch (width) {
6683
7039
  case 2:
6684
- if (lex_keyword(parser, "do", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_DO, PM_TOKEN_EOF) != PM_TOKEN_EOF) {
7040
+ if (lex_keyword(parser, current_start, "do", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_DO, PM_TOKEN_EOF) != PM_TOKEN_EOF) {
6685
7041
  if (pm_do_loop_stack_p(parser)) {
6686
7042
  return PM_TOKEN_KEYWORD_DO_LOOP;
6687
7043
  }
6688
7044
  return PM_TOKEN_KEYWORD_DO;
6689
7045
  }
6690
7046
 
6691
- if ((type = lex_keyword(parser, "if", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_IF, PM_TOKEN_KEYWORD_IF_MODIFIER)) != PM_TOKEN_EOF) return type;
6692
- if ((type = lex_keyword(parser, "in", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_IN, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
6693
- if ((type = lex_keyword(parser, "or", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_OR, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
7047
+ if ((type = lex_keyword(parser, current_start, "if", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_IF, PM_TOKEN_KEYWORD_IF_MODIFIER)) != PM_TOKEN_EOF) return type;
7048
+ if ((type = lex_keyword(parser, current_start, "in", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_IN, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
7049
+ if ((type = lex_keyword(parser, current_start, "or", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_OR, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
6694
7050
  break;
6695
7051
  case 3:
6696
- if ((type = lex_keyword(parser, "and", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_AND, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
6697
- if ((type = lex_keyword(parser, "def", width, PM_LEX_STATE_FNAME, PM_TOKEN_KEYWORD_DEF, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
6698
- if ((type = lex_keyword(parser, "end", width, PM_LEX_STATE_END, PM_TOKEN_KEYWORD_END, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
6699
- if ((type = lex_keyword(parser, "END", width, PM_LEX_STATE_END, PM_TOKEN_KEYWORD_END_UPCASE, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
6700
- if ((type = lex_keyword(parser, "for", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_FOR, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
6701
- if ((type = lex_keyword(parser, "nil", width, PM_LEX_STATE_END, PM_TOKEN_KEYWORD_NIL, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
6702
- if ((type = lex_keyword(parser, "not", width, PM_LEX_STATE_ARG, PM_TOKEN_KEYWORD_NOT, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
7052
+ if ((type = lex_keyword(parser, current_start, "and", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_AND, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
7053
+ if ((type = lex_keyword(parser, current_start, "def", width, PM_LEX_STATE_FNAME, PM_TOKEN_KEYWORD_DEF, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
7054
+ if ((type = lex_keyword(parser, current_start, "end", width, PM_LEX_STATE_END, PM_TOKEN_KEYWORD_END, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
7055
+ if ((type = lex_keyword(parser, current_start, "END", width, PM_LEX_STATE_END, PM_TOKEN_KEYWORD_END_UPCASE, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
7056
+ if ((type = lex_keyword(parser, current_start, "for", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_FOR, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
7057
+ if ((type = lex_keyword(parser, current_start, "nil", width, PM_LEX_STATE_END, PM_TOKEN_KEYWORD_NIL, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
7058
+ if ((type = lex_keyword(parser, current_start, "not", width, PM_LEX_STATE_ARG, PM_TOKEN_KEYWORD_NOT, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
6703
7059
  break;
6704
7060
  case 4:
6705
- if ((type = lex_keyword(parser, "case", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_CASE, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
6706
- if ((type = lex_keyword(parser, "else", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_ELSE, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
6707
- if ((type = lex_keyword(parser, "next", width, PM_LEX_STATE_MID, PM_TOKEN_KEYWORD_NEXT, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
6708
- if ((type = lex_keyword(parser, "redo", width, PM_LEX_STATE_END, PM_TOKEN_KEYWORD_REDO, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
6709
- if ((type = lex_keyword(parser, "self", width, PM_LEX_STATE_END, PM_TOKEN_KEYWORD_SELF, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
6710
- if ((type = lex_keyword(parser, "then", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_THEN, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
6711
- if ((type = lex_keyword(parser, "true", width, PM_LEX_STATE_END, PM_TOKEN_KEYWORD_TRUE, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
6712
- if ((type = lex_keyword(parser, "when", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_WHEN, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
7061
+ if ((type = lex_keyword(parser, current_start, "case", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_CASE, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
7062
+ if ((type = lex_keyword(parser, current_start, "else", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_ELSE, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
7063
+ if ((type = lex_keyword(parser, current_start, "next", width, PM_LEX_STATE_MID, PM_TOKEN_KEYWORD_NEXT, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
7064
+ if ((type = lex_keyword(parser, current_start, "redo", width, PM_LEX_STATE_END, PM_TOKEN_KEYWORD_REDO, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
7065
+ if ((type = lex_keyword(parser, current_start, "self", width, PM_LEX_STATE_END, PM_TOKEN_KEYWORD_SELF, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
7066
+ if ((type = lex_keyword(parser, current_start, "then", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_THEN, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
7067
+ if ((type = lex_keyword(parser, current_start, "true", width, PM_LEX_STATE_END, PM_TOKEN_KEYWORD_TRUE, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
7068
+ if ((type = lex_keyword(parser, current_start, "when", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_WHEN, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
6713
7069
  break;
6714
7070
  case 5:
6715
- if ((type = lex_keyword(parser, "alias", width, PM_LEX_STATE_FNAME | PM_LEX_STATE_FITEM, PM_TOKEN_KEYWORD_ALIAS, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
6716
- if ((type = lex_keyword(parser, "begin", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_BEGIN, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
6717
- if ((type = lex_keyword(parser, "BEGIN", width, PM_LEX_STATE_END, PM_TOKEN_KEYWORD_BEGIN_UPCASE, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
6718
- if ((type = lex_keyword(parser, "break", width, PM_LEX_STATE_MID, PM_TOKEN_KEYWORD_BREAK, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
6719
- if ((type = lex_keyword(parser, "class", width, PM_LEX_STATE_CLASS, PM_TOKEN_KEYWORD_CLASS, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
6720
- if ((type = lex_keyword(parser, "elsif", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_ELSIF, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
6721
- if ((type = lex_keyword(parser, "false", width, PM_LEX_STATE_END, PM_TOKEN_KEYWORD_FALSE, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
6722
- if ((type = lex_keyword(parser, "retry", width, PM_LEX_STATE_END, PM_TOKEN_KEYWORD_RETRY, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
6723
- if ((type = lex_keyword(parser, "super", width, PM_LEX_STATE_ARG, PM_TOKEN_KEYWORD_SUPER, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
6724
- if ((type = lex_keyword(parser, "undef", width, PM_LEX_STATE_FNAME | PM_LEX_STATE_FITEM, PM_TOKEN_KEYWORD_UNDEF, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
6725
- if ((type = lex_keyword(parser, "until", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_UNTIL, PM_TOKEN_KEYWORD_UNTIL_MODIFIER)) != PM_TOKEN_EOF) return type;
6726
- if ((type = lex_keyword(parser, "while", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_WHILE, PM_TOKEN_KEYWORD_WHILE_MODIFIER)) != PM_TOKEN_EOF) return type;
6727
- if ((type = lex_keyword(parser, "yield", width, PM_LEX_STATE_ARG, PM_TOKEN_KEYWORD_YIELD, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
7071
+ if ((type = lex_keyword(parser, current_start, "alias", width, PM_LEX_STATE_FNAME | PM_LEX_STATE_FITEM, PM_TOKEN_KEYWORD_ALIAS, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
7072
+ if ((type = lex_keyword(parser, current_start, "begin", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_BEGIN, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
7073
+ if ((type = lex_keyword(parser, current_start, "BEGIN", width, PM_LEX_STATE_END, PM_TOKEN_KEYWORD_BEGIN_UPCASE, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
7074
+ if ((type = lex_keyword(parser, current_start, "break", width, PM_LEX_STATE_MID, PM_TOKEN_KEYWORD_BREAK, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
7075
+ if ((type = lex_keyword(parser, current_start, "class", width, PM_LEX_STATE_CLASS, PM_TOKEN_KEYWORD_CLASS, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
7076
+ if ((type = lex_keyword(parser, current_start, "elsif", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_ELSIF, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
7077
+ if ((type = lex_keyword(parser, current_start, "false", width, PM_LEX_STATE_END, PM_TOKEN_KEYWORD_FALSE, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
7078
+ if ((type = lex_keyword(parser, current_start, "retry", width, PM_LEX_STATE_END, PM_TOKEN_KEYWORD_RETRY, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
7079
+ if ((type = lex_keyword(parser, current_start, "super", width, PM_LEX_STATE_ARG, PM_TOKEN_KEYWORD_SUPER, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
7080
+ if ((type = lex_keyword(parser, current_start, "undef", width, PM_LEX_STATE_FNAME | PM_LEX_STATE_FITEM, PM_TOKEN_KEYWORD_UNDEF, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
7081
+ if ((type = lex_keyword(parser, current_start, "until", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_UNTIL, PM_TOKEN_KEYWORD_UNTIL_MODIFIER)) != PM_TOKEN_EOF) return type;
7082
+ if ((type = lex_keyword(parser, current_start, "while", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_WHILE, PM_TOKEN_KEYWORD_WHILE_MODIFIER)) != PM_TOKEN_EOF) return type;
7083
+ if ((type = lex_keyword(parser, current_start, "yield", width, PM_LEX_STATE_ARG, PM_TOKEN_KEYWORD_YIELD, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
6728
7084
  break;
6729
7085
  case 6:
6730
- if ((type = lex_keyword(parser, "ensure", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_ENSURE, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
6731
- if ((type = lex_keyword(parser, "module", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_MODULE, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
6732
- if ((type = lex_keyword(parser, "rescue", width, PM_LEX_STATE_MID, PM_TOKEN_KEYWORD_RESCUE, PM_TOKEN_KEYWORD_RESCUE_MODIFIER)) != PM_TOKEN_EOF) return type;
6733
- if ((type = lex_keyword(parser, "return", width, PM_LEX_STATE_MID, PM_TOKEN_KEYWORD_RETURN, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
6734
- if ((type = lex_keyword(parser, "unless", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_UNLESS, PM_TOKEN_KEYWORD_UNLESS_MODIFIER)) != PM_TOKEN_EOF) return type;
7086
+ if ((type = lex_keyword(parser, current_start, "ensure", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_ENSURE, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
7087
+ if ((type = lex_keyword(parser, current_start, "module", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_MODULE, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
7088
+ if ((type = lex_keyword(parser, current_start, "rescue", width, PM_LEX_STATE_MID, PM_TOKEN_KEYWORD_RESCUE, PM_TOKEN_KEYWORD_RESCUE_MODIFIER)) != PM_TOKEN_EOF) return type;
7089
+ if ((type = lex_keyword(parser, current_start, "return", width, PM_LEX_STATE_MID, PM_TOKEN_KEYWORD_RETURN, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
7090
+ if ((type = lex_keyword(parser, current_start, "unless", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_UNLESS, PM_TOKEN_KEYWORD_UNLESS_MODIFIER)) != PM_TOKEN_EOF) return type;
6735
7091
  break;
6736
7092
  case 8:
6737
- if ((type = lex_keyword(parser, "__LINE__", width, PM_LEX_STATE_END, PM_TOKEN_KEYWORD___LINE__, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
6738
- if ((type = lex_keyword(parser, "__FILE__", width, PM_LEX_STATE_END, PM_TOKEN_KEYWORD___FILE__, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
7093
+ if ((type = lex_keyword(parser, current_start, "__LINE__", width, PM_LEX_STATE_END, PM_TOKEN_KEYWORD___LINE__, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
7094
+ if ((type = lex_keyword(parser, current_start, "__FILE__", width, PM_LEX_STATE_END, PM_TOKEN_KEYWORD___FILE__, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
6739
7095
  break;
6740
7096
  case 12:
6741
- if ((type = lex_keyword(parser, "__ENCODING__", width, PM_LEX_STATE_END, PM_TOKEN_KEYWORD___ENCODING__, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
7097
+ if ((type = lex_keyword(parser, current_start, "__ENCODING__", width, PM_LEX_STATE_END, PM_TOKEN_KEYWORD___ENCODING__, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
6742
7098
  break;
6743
7099
  }
6744
7100
  }
@@ -8676,6 +9032,8 @@ parser_lex(pm_parser_t *parser) {
8676
9032
 
8677
9033
  if (parser->current.end < parser->end) {
8678
9034
  lex_mode_push_list(parser, false, *parser->current.end++);
9035
+ } else {
9036
+ lex_mode_push_list_eof(parser);
8679
9037
  }
8680
9038
 
8681
9039
  LEX(PM_TOKEN_PERCENT_LOWER_I);
@@ -8685,6 +9043,8 @@ parser_lex(pm_parser_t *parser) {
8685
9043
 
8686
9044
  if (parser->current.end < parser->end) {
8687
9045
  lex_mode_push_list(parser, true, *parser->current.end++);
9046
+ } else {
9047
+ lex_mode_push_list_eof(parser);
8688
9048
  }
8689
9049
 
8690
9050
  LEX(PM_TOKEN_PERCENT_UPPER_I);
@@ -8696,6 +9056,8 @@ parser_lex(pm_parser_t *parser) {
8696
9056
  lex_mode_push_regexp(parser, lex_mode_incrementor(*parser->current.end), lex_mode_terminator(*parser->current.end));
8697
9057
  pm_newline_list_check_append(&parser->newline_list, parser->current.end);
8698
9058
  parser->current.end++;
9059
+ } else {
9060
+ lex_mode_push_regexp(parser, '\0', '\0');
8699
9061
  }
8700
9062
 
8701
9063
  LEX(PM_TOKEN_REGEXP_BEGIN);
@@ -8707,6 +9069,8 @@ parser_lex(pm_parser_t *parser) {
8707
9069
  lex_mode_push_string(parser, false, false, lex_mode_incrementor(*parser->current.end), lex_mode_terminator(*parser->current.end));
8708
9070
  pm_newline_list_check_append(&parser->newline_list, parser->current.end);
8709
9071
  parser->current.end++;
9072
+ } else {
9073
+ lex_mode_push_string_eof(parser);
8710
9074
  }
8711
9075
 
8712
9076
  LEX(PM_TOKEN_STRING_BEGIN);
@@ -8718,6 +9082,8 @@ parser_lex(pm_parser_t *parser) {
8718
9082
  lex_mode_push_string(parser, true, false, lex_mode_incrementor(*parser->current.end), lex_mode_terminator(*parser->current.end));
8719
9083
  pm_newline_list_check_append(&parser->newline_list, parser->current.end);
8720
9084
  parser->current.end++;
9085
+ } else {
9086
+ lex_mode_push_string_eof(parser);
8721
9087
  }
8722
9088
 
8723
9089
  LEX(PM_TOKEN_STRING_BEGIN);
@@ -8729,6 +9095,8 @@ parser_lex(pm_parser_t *parser) {
8729
9095
  lex_mode_push_string(parser, false, false, lex_mode_incrementor(*parser->current.end), lex_mode_terminator(*parser->current.end));
8730
9096
  lex_state_set(parser, PM_LEX_STATE_FNAME | PM_LEX_STATE_FITEM);
8731
9097
  parser->current.end++;
9098
+ } else {
9099
+ lex_mode_push_string_eof(parser);
8732
9100
  }
8733
9101
 
8734
9102
  LEX(PM_TOKEN_SYMBOL_BEGIN);
@@ -8738,6 +9106,8 @@ parser_lex(pm_parser_t *parser) {
8738
9106
 
8739
9107
  if (parser->current.end < parser->end) {
8740
9108
  lex_mode_push_list(parser, false, *parser->current.end++);
9109
+ } else {
9110
+ lex_mode_push_list_eof(parser);
8741
9111
  }
8742
9112
 
8743
9113
  LEX(PM_TOKEN_PERCENT_LOWER_W);
@@ -8747,6 +9117,8 @@ parser_lex(pm_parser_t *parser) {
8747
9117
 
8748
9118
  if (parser->current.end < parser->end) {
8749
9119
  lex_mode_push_list(parser, true, *parser->current.end++);
9120
+ } else {
9121
+ lex_mode_push_list_eof(parser);
8750
9122
  }
8751
9123
 
8752
9124
  LEX(PM_TOKEN_PERCENT_UPPER_W);
@@ -8757,6 +9129,8 @@ parser_lex(pm_parser_t *parser) {
8757
9129
  if (parser->current.end < parser->end) {
8758
9130
  lex_mode_push_string(parser, true, false, lex_mode_incrementor(*parser->current.end), lex_mode_terminator(*parser->current.end));
8759
9131
  parser->current.end++;
9132
+ } else {
9133
+ lex_mode_push_string_eof(parser);
8760
9134
  }
8761
9135
 
8762
9136
  LEX(PM_TOKEN_PERCENT_LOWER_X);
@@ -8859,7 +9233,7 @@ parser_lex(pm_parser_t *parser) {
8859
9233
  !(last_state & (PM_LEX_STATE_DOT | PM_LEX_STATE_FNAME)) &&
8860
9234
  (type == PM_TOKEN_IDENTIFIER) &&
8861
9235
  ((pm_parser_local_depth(parser, &parser->current) != -1) ||
8862
- token_is_numbered_parameter(parser->current.start, parser->current.end))
9236
+ pm_token_is_numbered_parameter(parser->current.start, parser->current.end))
8863
9237
  ) {
8864
9238
  lex_state_set(parser, PM_LEX_STATE_END | PM_LEX_STATE_LABEL);
8865
9239
  }
@@ -9511,6 +9885,7 @@ parser_lex(pm_parser_t *parser) {
9511
9885
  parser->heredoc_end = parser->current.end;
9512
9886
  }
9513
9887
 
9888
+ parser->current_string_common_whitespace = parser->lex_modes.current->as.heredoc.common_whitespace;
9514
9889
  lex_mode_pop(parser);
9515
9890
  if (!at_end) {
9516
9891
  lex_state_set(parser, PM_LEX_STATE_END);
@@ -9728,11 +10103,11 @@ parser_lex(pm_parser_t *parser) {
9728
10103
  typedef enum {
9729
10104
  PM_BINDING_POWER_UNSET = 0, // used to indicate this token cannot be used as an infix operator
9730
10105
  PM_BINDING_POWER_STATEMENT = 2,
9731
- PM_BINDING_POWER_MODIFIER = 4, // if unless until while in
10106
+ PM_BINDING_POWER_MODIFIER = 4, // if unless until while
9732
10107
  PM_BINDING_POWER_MODIFIER_RESCUE = 6, // rescue
9733
10108
  PM_BINDING_POWER_COMPOSITION = 8, // and or
9734
10109
  PM_BINDING_POWER_NOT = 10, // not
9735
- PM_BINDING_POWER_MATCH = 12, // =>
10110
+ PM_BINDING_POWER_MATCH = 12, // => in
9736
10111
  PM_BINDING_POWER_DEFINED = 14, // defined?
9737
10112
  PM_BINDING_POWER_ASSIGNMENT = 16, // = += -= *= /= %= &= |= ^= &&= ||= <<= >>= **=
9738
10113
  PM_BINDING_POWER_TERNARY = 18, // ?:
@@ -9767,34 +10142,37 @@ typedef struct {
9767
10142
 
9768
10143
  /** Whether or not this token can be used as a binary operator. */
9769
10144
  bool binary;
10145
+
10146
+ /**
10147
+ * Whether or not this token can be used as non-associative binary operator.
10148
+ * Non-associative operators (e.g. in and =>) need special treatment in parse_expression.
10149
+ */
10150
+ bool nonassoc;
9770
10151
  } pm_binding_powers_t;
9771
10152
 
9772
- #define BINDING_POWER_ASSIGNMENT { PM_BINDING_POWER_UNARY, PM_BINDING_POWER_ASSIGNMENT, true }
9773
- #define LEFT_ASSOCIATIVE(precedence) { precedence, precedence + 1, true }
9774
- #define RIGHT_ASSOCIATIVE(precedence) { precedence, precedence, true }
9775
- #define RIGHT_ASSOCIATIVE_UNARY(precedence) { precedence, precedence, false }
10153
+ #define BINDING_POWER_ASSIGNMENT { PM_BINDING_POWER_UNARY, PM_BINDING_POWER_ASSIGNMENT, true, false }
10154
+ #define LEFT_ASSOCIATIVE(precedence) { precedence, precedence + 1, true, false }
10155
+ #define RIGHT_ASSOCIATIVE(precedence) { precedence, precedence, true, false }
10156
+ #define NON_ASSOCIATIVE(precedence) { precedence + 1, precedence + 1, true, true }
10157
+ #define RIGHT_ASSOCIATIVE_UNARY(precedence) { precedence, precedence, false, false }
9776
10158
 
9777
10159
  pm_binding_powers_t pm_binding_powers[PM_TOKEN_MAXIMUM] = {
9778
- // if unless until while in rescue
10160
+ // if unless until while
9779
10161
  [PM_TOKEN_KEYWORD_IF_MODIFIER] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_MODIFIER),
9780
10162
  [PM_TOKEN_KEYWORD_UNLESS_MODIFIER] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_MODIFIER),
9781
10163
  [PM_TOKEN_KEYWORD_UNTIL_MODIFIER] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_MODIFIER),
9782
10164
  [PM_TOKEN_KEYWORD_WHILE_MODIFIER] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_MODIFIER),
9783
- [PM_TOKEN_KEYWORD_IN] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_MODIFIER),
9784
10165
 
9785
- // rescue modifier
9786
- [PM_TOKEN_KEYWORD_RESCUE_MODIFIER] = {
9787
- PM_BINDING_POWER_ASSIGNMENT,
9788
- PM_BINDING_POWER_MODIFIER_RESCUE + 1,
9789
- true
9790
- },
10166
+ // rescue
10167
+ [PM_TOKEN_KEYWORD_RESCUE_MODIFIER] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_MODIFIER_RESCUE),
9791
10168
 
9792
10169
  // and or
9793
10170
  [PM_TOKEN_KEYWORD_AND] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_COMPOSITION),
9794
10171
  [PM_TOKEN_KEYWORD_OR] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_COMPOSITION),
9795
10172
 
9796
- // =>
9797
- [PM_TOKEN_EQUAL_GREATER] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_MATCH),
10173
+ // => in
10174
+ [PM_TOKEN_EQUAL_GREATER] = NON_ASSOCIATIVE(PM_BINDING_POWER_MATCH),
10175
+ [PM_TOKEN_KEYWORD_IN] = NON_ASSOCIATIVE(PM_BINDING_POWER_MATCH),
9798
10176
 
9799
10177
  // &&= &= ^= = >>= <<= -= %= |= += /= *= **=
9800
10178
  [PM_TOKEN_AMPERSAND_AMPERSAND_EQUAL] = BINDING_POWER_ASSIGNMENT,
@@ -9816,8 +10194,8 @@ pm_binding_powers_t pm_binding_powers[PM_TOKEN_MAXIMUM] = {
9816
10194
  [PM_TOKEN_QUESTION_MARK] = RIGHT_ASSOCIATIVE(PM_BINDING_POWER_TERNARY),
9817
10195
 
9818
10196
  // .. ...
9819
- [PM_TOKEN_DOT_DOT] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_RANGE),
9820
- [PM_TOKEN_DOT_DOT_DOT] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_RANGE),
10197
+ [PM_TOKEN_DOT_DOT] = NON_ASSOCIATIVE(PM_BINDING_POWER_RANGE),
10198
+ [PM_TOKEN_DOT_DOT_DOT] = NON_ASSOCIATIVE(PM_BINDING_POWER_RANGE),
9821
10199
 
9822
10200
  // ||
9823
10201
  [PM_TOKEN_PIPE_PIPE] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_LOGICAL_OR),
@@ -9862,7 +10240,7 @@ pm_binding_powers_t pm_binding_powers[PM_TOKEN_MAXIMUM] = {
9862
10240
 
9863
10241
  // -@
9864
10242
  [PM_TOKEN_UMINUS] = RIGHT_ASSOCIATIVE_UNARY(PM_BINDING_POWER_UMINUS),
9865
- [PM_TOKEN_UMINUS_NUM] = { PM_BINDING_POWER_UMINUS, PM_BINDING_POWER_MAX, false },
10243
+ [PM_TOKEN_UMINUS_NUM] = { PM_BINDING_POWER_UMINUS, PM_BINDING_POWER_MAX, false, false },
9866
10244
 
9867
10245
  // **
9868
10246
  [PM_TOKEN_STAR_STAR] = RIGHT_ASSOCIATIVE(PM_BINDING_POWER_EXPONENT),
@@ -10038,6 +10416,16 @@ expect3(pm_parser_t *parser, pm_token_type_t type1, pm_token_type_t type2, pm_to
10038
10416
  static pm_node_t *
10039
10417
  parse_expression(pm_parser_t *parser, pm_binding_power_t binding_power, pm_diagnostic_id_t diag_id);
10040
10418
 
10419
+ /**
10420
+ * This is a wrapper of parse_expression, which also checks whether the resulting node is value expression.
10421
+ */
10422
+ static pm_node_t *
10423
+ parse_value_expression(pm_parser_t *parser, pm_binding_power_t binding_power, pm_diagnostic_id_t diag_id) {
10424
+ pm_node_t *node = parse_expression(parser, binding_power, diag_id);
10425
+ pm_assert_value_expression(parser, node);
10426
+ return node;
10427
+ }
10428
+
10041
10429
  /**
10042
10430
  * This function controls whether or not we will attempt to parse an expression
10043
10431
  * beginning at the subsequent token. It is used when we are in a context where
@@ -10121,11 +10509,11 @@ static pm_node_t *
10121
10509
  parse_starred_expression(pm_parser_t *parser, pm_binding_power_t binding_power, pm_diagnostic_id_t diag_id) {
10122
10510
  if (accept1(parser, PM_TOKEN_USTAR)) {
10123
10511
  pm_token_t operator = parser->previous;
10124
- pm_node_t *expression = parse_expression(parser, binding_power, PM_ERR_EXPECT_EXPRESSION_AFTER_STAR);
10512
+ pm_node_t *expression = parse_value_expression(parser, binding_power, PM_ERR_EXPECT_EXPRESSION_AFTER_STAR);
10125
10513
  return (pm_node_t *) pm_splat_node_create(parser, &operator, expression);
10126
10514
  }
10127
10515
 
10128
- return parse_expression(parser, binding_power, diag_id);
10516
+ return parse_value_expression(parser, binding_power, diag_id);
10129
10517
  }
10130
10518
 
10131
10519
  /**
@@ -10147,6 +10535,8 @@ parse_write_name(pm_parser_t *parser, pm_constant_id_t *name_field) {
10147
10535
  name[length] = '=';
10148
10536
 
10149
10537
  // Now switch the name to the new string.
10538
+ // This silences clang analyzer warning about leak of memory pointed by `name`.
10539
+ // NOLINTNEXTLINE(clang-analyzer-*)
10150
10540
  *name_field = pm_constant_pool_insert_owned(&parser->constant_pool, name, length + 1);
10151
10541
  }
10152
10542
 
@@ -10179,8 +10569,8 @@ parse_target(pm_parser_t *parser, pm_node_t *target) {
10179
10569
  target->type = PM_GLOBAL_VARIABLE_TARGET_NODE;
10180
10570
  return target;
10181
10571
  case PM_LOCAL_VARIABLE_READ_NODE:
10182
- if (token_is_numbered_parameter(target->location.start, target->location.end)) {
10183
- pm_parser_err_node(parser, target, PM_ERR_PARAMETER_NUMBERED_RESERVED);
10572
+ if (pm_token_is_numbered_parameter(target->location.start, target->location.end)) {
10573
+ PM_PARSER_ERR_NODE_FORMAT(parser, target, PM_ERR_PARAMETER_NUMBERED_RESERVED, target->location.start);
10184
10574
  } else {
10185
10575
  assert(sizeof(pm_local_variable_target_node_t) == sizeof(pm_local_variable_read_node_t));
10186
10576
  target->type = PM_LOCAL_VARIABLE_TARGET_NODE;
@@ -10238,10 +10628,7 @@ parse_target(pm_parser_t *parser, pm_node_t *target) {
10238
10628
  assert(sizeof(pm_local_variable_target_node_t) == sizeof(pm_local_variable_read_node_t));
10239
10629
  target->type = PM_LOCAL_VARIABLE_TARGET_NODE;
10240
10630
 
10241
- if (token_is_numbered_parameter(message.start, message.end)) {
10242
- pm_parser_err_location(parser, &message, PM_ERR_PARAMETER_NUMBERED_RESERVED);
10243
- }
10244
-
10631
+ pm_refute_numbered_parameter(parser, message.start, message.end);
10245
10632
  return target;
10246
10633
  }
10247
10634
 
@@ -10284,8 +10671,12 @@ static pm_node_t *
10284
10671
  parse_target_validate(pm_parser_t *parser, pm_node_t *target) {
10285
10672
  pm_node_t *result = parse_target(parser, target);
10286
10673
 
10287
- // Ensure that we have either an = or a ) after the targets.
10288
- if (!match3(parser, PM_TOKEN_EQUAL, PM_TOKEN_PARENTHESIS_RIGHT, PM_TOKEN_KEYWORD_IN)) {
10674
+ // Ensure that we have one of an =, an 'in' in for indexes, and a ')' in parens after the targets.
10675
+ if (
10676
+ !match1(parser, PM_TOKEN_EQUAL) &&
10677
+ !(context_p(parser, PM_CONTEXT_FOR_INDEX) && match1(parser, PM_TOKEN_KEYWORD_IN)) &&
10678
+ !(context_p(parser, PM_CONTEXT_PARENS) && match1(parser, PM_TOKEN_PARENTHESIS_RIGHT))
10679
+ ) {
10289
10680
  pm_parser_err_node(parser, result, PM_ERR_WRITE_TARGET_UNEXPECTED);
10290
10681
  }
10291
10682
 
@@ -10322,10 +10713,7 @@ parse_write(pm_parser_t *parser, pm_node_t *target, pm_token_t *operator, pm_nod
10322
10713
  return (pm_node_t *) node;
10323
10714
  }
10324
10715
  case PM_LOCAL_VARIABLE_READ_NODE: {
10325
- if (token_is_numbered_parameter(target->location.start, target->location.end)) {
10326
- pm_parser_err_node(parser, target, PM_ERR_PARAMETER_NUMBERED_RESERVED);
10327
- }
10328
-
10716
+ pm_refute_numbered_parameter(parser, target->location.start, target->location.end);
10329
10717
  pm_local_variable_read_node_t *local_read = (pm_local_variable_read_node_t *) target;
10330
10718
 
10331
10719
  pm_constant_id_t constant_id = local_read->name;
@@ -10387,10 +10775,7 @@ parse_write(pm_parser_t *parser, pm_node_t *target, pm_token_t *operator, pm_nod
10387
10775
  pm_constant_id_t constant_id = pm_parser_constant_id_location(parser, message.start, message.end);
10388
10776
  target = (pm_node_t *) pm_local_variable_write_node_create(parser, constant_id, 0, value, &message, operator);
10389
10777
 
10390
- if (token_is_numbered_parameter(message.start, message.end)) {
10391
- pm_parser_err_location(parser, &message, PM_ERR_PARAMETER_NUMBERED_RESERVED);
10392
- }
10393
-
10778
+ pm_refute_numbered_parameter(parser, message.start, message.end);
10394
10779
  return target;
10395
10780
  }
10396
10781
 
@@ -10616,7 +11001,7 @@ parse_assocs(pm_parser_t *parser, pm_node_t *node) {
10616
11001
  pm_node_t *value = NULL;
10617
11002
 
10618
11003
  if (token_begins_expression_p(parser->current.type)) {
10619
- value = parse_expression(parser, PM_BINDING_POWER_DEFINED, PM_ERR_EXPECT_EXPRESSION_AFTER_SPLAT_HASH);
11004
+ value = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, PM_ERR_EXPECT_EXPRESSION_AFTER_SPLAT_HASH);
10620
11005
  } else if (pm_parser_local_depth(parser, &operator) == -1) {
10621
11006
  pm_parser_err_token(parser, &operator, PM_ERR_EXPECT_EXPRESSION_AFTER_SPLAT_HASH);
10622
11007
  }
@@ -10634,7 +11019,7 @@ parse_assocs(pm_parser_t *parser, pm_node_t *node) {
10634
11019
  pm_node_t *value = NULL;
10635
11020
 
10636
11021
  if (token_begins_expression_p(parser->current.type)) {
10637
- value = parse_expression(parser, PM_BINDING_POWER_DEFINED, PM_ERR_HASH_EXPRESSION_AFTER_LABEL);
11022
+ value = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, PM_ERR_HASH_EXPRESSION_AFTER_LABEL);
10638
11023
  } else {
10639
11024
  if (parser->encoding.isupper_char(label.start, (label.end - 1) - label.start)) {
10640
11025
  pm_token_t constant = { .type = PM_TOKEN_CONSTANT, .start = label.start, .end = label.end - 1 };
@@ -10658,7 +11043,7 @@ parse_assocs(pm_parser_t *parser, pm_node_t *node) {
10658
11043
  break;
10659
11044
  }
10660
11045
  default: {
10661
- pm_node_t *key = parse_expression(parser, PM_BINDING_POWER_DEFINED, PM_ERR_HASH_KEY);
11046
+ pm_node_t *key = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, PM_ERR_HASH_KEY);
10662
11047
  pm_token_t operator;
10663
11048
 
10664
11049
  if (pm_symbol_node_label_p(key)) {
@@ -10668,7 +11053,7 @@ parse_assocs(pm_parser_t *parser, pm_node_t *node) {
10668
11053
  operator = parser->previous;
10669
11054
  }
10670
11055
 
10671
- pm_node_t *value = parse_expression(parser, PM_BINDING_POWER_DEFINED, PM_ERR_HASH_VALUE);
11056
+ pm_node_t *value = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, PM_ERR_HASH_VALUE);
10672
11057
  element = (pm_node_t *) pm_assoc_node_create(parser, key, &operator, value);
10673
11058
  break;
10674
11059
  }
@@ -10726,13 +11111,18 @@ parse_arguments(pm_parser_t *parser, pm_arguments_t *arguments, bool accepts_for
10726
11111
  return;
10727
11112
  }
10728
11113
 
11114
+ bool parsed_first_argument = false;
10729
11115
  bool parsed_bare_hash = false;
10730
11116
  bool parsed_block_argument = false;
11117
+ bool parsed_forwarding_arguments = false;
10731
11118
 
10732
11119
  while (!match1(parser, PM_TOKEN_EOF)) {
10733
11120
  if (parsed_block_argument) {
10734
11121
  pm_parser_err_current(parser, PM_ERR_ARGUMENT_AFTER_BLOCK);
10735
11122
  }
11123
+ if (parsed_forwarding_arguments) {
11124
+ pm_parser_err_current(parser, PM_ERR_ARGUMENT_AFTER_FORWARDING_ELLIPSES);
11125
+ }
10736
11126
 
10737
11127
  pm_node_t *argument = NULL;
10738
11128
 
@@ -10764,7 +11154,7 @@ parse_arguments(pm_parser_t *parser, pm_arguments_t *arguments, bool accepts_for
10764
11154
  pm_node_t *expression = NULL;
10765
11155
 
10766
11156
  if (token_begins_expression_p(parser->current.type)) {
10767
- expression = parse_expression(parser, PM_BINDING_POWER_DEFINED, PM_ERR_EXPECT_ARGUMENT);
11157
+ expression = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, PM_ERR_EXPECT_ARGUMENT);
10768
11158
  } else if (pm_parser_local_depth(parser, &operator) == -1) {
10769
11159
  pm_parser_err_token(parser, &operator, PM_ERR_ARGUMENT_NO_FORWARDING_AMP);
10770
11160
  }
@@ -10783,14 +11173,14 @@ parse_arguments(pm_parser_t *parser, pm_arguments_t *arguments, bool accepts_for
10783
11173
  parser_lex(parser);
10784
11174
  pm_token_t operator = parser->previous;
10785
11175
 
10786
- if (match2(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_TOKEN_COMMA)) {
11176
+ if (match3(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_TOKEN_COMMA, PM_TOKEN_SEMICOLON)) {
10787
11177
  if (pm_parser_local_depth(parser, &parser->previous) == -1) {
10788
11178
  pm_parser_err_token(parser, &operator, PM_ERR_ARGUMENT_NO_FORWARDING_STAR);
10789
11179
  }
10790
11180
 
10791
11181
  argument = (pm_node_t *) pm_splat_node_create(parser, &operator, NULL);
10792
11182
  } else {
10793
- pm_node_t *expression = parse_expression(parser, PM_BINDING_POWER_DEFINED, PM_ERR_EXPECT_EXPRESSION_AFTER_SPLAT);
11183
+ pm_node_t *expression = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, PM_ERR_EXPECT_EXPRESSION_AFTER_SPLAT);
10794
11184
 
10795
11185
  if (parsed_bare_hash) {
10796
11186
  pm_parser_err(parser, operator.start, expression->location.end, PM_ERR_ARGUMENT_SPLAT_AFTER_ASSOC_SPLAT);
@@ -10816,9 +11206,13 @@ parse_arguments(pm_parser_t *parser, pm_arguments_t *arguments, bool accepts_for
10816
11206
  if (pm_parser_local_depth(parser, &parser->previous) == -1) {
10817
11207
  pm_parser_err_previous(parser, PM_ERR_ARGUMENT_NO_FORWARDING_ELLIPSES);
10818
11208
  }
11209
+ if (parsed_first_argument && terminator == PM_TOKEN_EOF) {
11210
+ pm_parser_err_previous(parser, PM_ERR_ARGUMENT_FORWARDING_UNBOUND);
11211
+ }
10819
11212
 
10820
11213
  argument = (pm_node_t *) pm_forwarding_arguments_node_create(parser, &parser->previous);
10821
11214
  parse_arguments_append(parser, arguments, argument);
11215
+ parsed_forwarding_arguments = true;
10822
11216
  break;
10823
11217
  }
10824
11218
  }
@@ -10826,7 +11220,7 @@ parse_arguments(pm_parser_t *parser, pm_arguments_t *arguments, bool accepts_for
10826
11220
  /* fallthrough */
10827
11221
  default: {
10828
11222
  if (argument == NULL) {
10829
- argument = parse_expression(parser, PM_BINDING_POWER_DEFINED, PM_ERR_EXPECT_ARGUMENT);
11223
+ argument = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, PM_ERR_EXPECT_ARGUMENT);
10830
11224
  }
10831
11225
 
10832
11226
  bool contains_keyword_splat = false;
@@ -10845,7 +11239,7 @@ parse_arguments(pm_parser_t *parser, pm_arguments_t *arguments, bool accepts_for
10845
11239
  pm_keyword_hash_node_t *bare_hash = pm_keyword_hash_node_create(parser);
10846
11240
 
10847
11241
  // Finish parsing the one we are part way through
10848
- pm_node_t *value = parse_expression(parser, PM_BINDING_POWER_DEFINED, PM_ERR_HASH_VALUE);
11242
+ pm_node_t *value = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, PM_ERR_HASH_VALUE);
10849
11243
 
10850
11244
  argument = (pm_node_t *) pm_assoc_node_create(parser, argument, &operator, value);
10851
11245
  pm_keyword_hash_node_elements_append(bare_hash, argument);
@@ -10870,6 +11264,8 @@ parse_arguments(pm_parser_t *parser, pm_arguments_t *arguments, bool accepts_for
10870
11264
  }
10871
11265
  }
10872
11266
 
11267
+ parsed_first_argument = true;
11268
+
10873
11269
  // If parsing the argument failed, we need to stop parsing arguments.
10874
11270
  if (PM_NODE_TYPE_P(argument, PM_MISSING_NODE) || parser->recovering) break;
10875
11271
 
@@ -11029,7 +11425,7 @@ parse_parameters(
11029
11425
  pm_binding_power_t binding_power,
11030
11426
  bool uses_parentheses,
11031
11427
  bool allows_trailing_comma,
11032
- bool allows_forwarding_parameter
11428
+ bool allows_forwarding_parameters
11033
11429
  ) {
11034
11430
  pm_parameters_node_t *params = pm_parameters_node_create(parser);
11035
11431
  bool looping = true;
@@ -11064,7 +11460,10 @@ parse_parameters(
11064
11460
  pm_parser_local_add_token(parser, &name);
11065
11461
  } else {
11066
11462
  name = not_provided(parser);
11067
- pm_parser_local_add_token(parser, &operator);
11463
+
11464
+ if (allows_forwarding_parameters) {
11465
+ pm_parser_local_add_token(parser, &operator);
11466
+ }
11068
11467
  }
11069
11468
 
11070
11469
  pm_block_parameter_node_t *param = pm_block_parameter_node_create(parser, &name, &operator);
@@ -11078,7 +11477,7 @@ parse_parameters(
11078
11477
  break;
11079
11478
  }
11080
11479
  case PM_TOKEN_UDOT_DOT_DOT: {
11081
- if (!allows_forwarding_parameter) {
11480
+ if (!allows_forwarding_parameters) {
11082
11481
  pm_parser_err_current(parser, PM_ERR_ARGUMENT_NO_FORWARDING_ELLIPSES);
11083
11482
  }
11084
11483
 
@@ -11086,9 +11485,7 @@ parse_parameters(
11086
11485
  update_parameter_state(parser, &parser->current, &order);
11087
11486
  parser_lex(parser);
11088
11487
 
11089
- if (allows_forwarding_parameter) {
11090
- pm_parser_local_add_constant(parser, "*", 1);
11091
- pm_parser_local_add_constant(parser, "&", 1);
11488
+ if (allows_forwarding_parameters) {
11092
11489
  pm_parser_local_add_token(parser, &parser->previous);
11093
11490
  }
11094
11491
 
@@ -11148,7 +11545,7 @@ parse_parameters(
11148
11545
  if (accept1(parser, PM_TOKEN_EQUAL)) {
11149
11546
  pm_token_t operator = parser->previous;
11150
11547
  context_push(parser, PM_CONTEXT_DEFAULT_PARAMS);
11151
- pm_node_t *value = parse_expression(parser, binding_power, PM_ERR_PARAMETER_NO_DEFAULT);
11548
+ pm_node_t *value = parse_value_expression(parser, binding_power, PM_ERR_PARAMETER_NO_DEFAULT);
11152
11549
 
11153
11550
  pm_optional_parameter_node_t *param = pm_optional_parameter_node_create(parser, &name, &operator, value);
11154
11551
  pm_parameters_node_optionals_append(params, param);
@@ -11207,7 +11604,7 @@ parse_parameters(
11207
11604
 
11208
11605
  if (token_begins_expression_p(parser->current.type)) {
11209
11606
  context_push(parser, PM_CONTEXT_DEFAULT_PARAMS);
11210
- pm_node_t *value = parse_expression(parser, binding_power, PM_ERR_PARAMETER_NO_DEFAULT_KW);
11607
+ pm_node_t *value = parse_value_expression(parser, binding_power, PM_ERR_PARAMETER_NO_DEFAULT_KW);
11211
11608
  context_pop(parser);
11212
11609
  param = (pm_node_t *) pm_optional_keyword_parameter_node_create(parser, &name, value);
11213
11610
  }
@@ -11244,7 +11641,10 @@ parse_parameters(
11244
11641
  pm_parser_local_add_token(parser, &name);
11245
11642
  } else {
11246
11643
  name = not_provided(parser);
11247
- pm_parser_local_add_token(parser, &operator);
11644
+
11645
+ if (allows_forwarding_parameters) {
11646
+ pm_parser_local_add_token(parser, &operator);
11647
+ }
11248
11648
  }
11249
11649
 
11250
11650
  pm_rest_parameter_node_t *param = pm_rest_parameter_node_create(parser, &operator, &name);
@@ -11276,7 +11676,10 @@ parse_parameters(
11276
11676
  pm_parser_local_add_token(parser, &name);
11277
11677
  } else {
11278
11678
  name = not_provided(parser);
11279
- pm_parser_local_add_token(parser, &operator);
11679
+
11680
+ if (allows_forwarding_parameters) {
11681
+ pm_parser_local_add_token(parser, &operator);
11682
+ }
11280
11683
  }
11281
11684
 
11282
11685
  param = (pm_node_t *) pm_keyword_rest_parameter_node_create(parser, &operator, &name);
@@ -11622,6 +12025,13 @@ parse_arguments_list(pm_parser_t *parser, pm_arguments_t *arguments, bool accept
11622
12025
  // argument to this method call.
11623
12026
  parse_arguments(parser, arguments, true, PM_TOKEN_EOF);
11624
12027
 
12028
+ // If we have done with the arguments and still not consumed the comma,
12029
+ // then we have a trailing comma where we need to check whether it is
12030
+ // allowed or not.
12031
+ if (parser->previous.type == PM_TOKEN_COMMA && !match1(parser, PM_TOKEN_SEMICOLON)) {
12032
+ pm_parser_err_previous(parser, PM_ERR_EXPECT_ARGUMENT);
12033
+ }
12034
+
11625
12035
  pm_accepts_block_stack_pop(parser);
11626
12036
  }
11627
12037
 
@@ -11658,14 +12068,19 @@ parse_arguments_list(pm_parser_t *parser, pm_arguments_t *arguments, bool accept
11658
12068
  }
11659
12069
 
11660
12070
  static inline pm_node_t *
11661
- parse_predicate(pm_parser_t *parser, pm_binding_power_t binding_power, pm_context_t context) {
12071
+ parse_predicate(pm_parser_t *parser, pm_binding_power_t binding_power, pm_context_t context, pm_token_t *then_keyword) {
11662
12072
  context_push(parser, PM_CONTEXT_PREDICATE);
11663
12073
  pm_diagnostic_id_t error_id = context == PM_CONTEXT_IF ? PM_ERR_CONDITIONAL_IF_PREDICATE : PM_ERR_CONDITIONAL_UNLESS_PREDICATE;
11664
- pm_node_t *predicate = parse_expression(parser, binding_power, error_id);
12074
+ pm_node_t *predicate = parse_value_expression(parser, binding_power, error_id);
11665
12075
 
11666
12076
  // Predicates are closed by a term, a "then", or a term and then a "then".
11667
12077
  bool predicate_closed = accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
11668
- predicate_closed |= accept1(parser, PM_TOKEN_KEYWORD_THEN);
12078
+
12079
+ if (accept1(parser, PM_TOKEN_KEYWORD_THEN)) {
12080
+ predicate_closed = true;
12081
+ *then_keyword = parser->previous;
12082
+ }
12083
+
11669
12084
  if (!predicate_closed) {
11670
12085
  pm_parser_err_current(parser, PM_ERR_CONDITIONAL_PREDICATE_TERM);
11671
12086
  }
@@ -11677,7 +12092,9 @@ parse_predicate(pm_parser_t *parser, pm_binding_power_t binding_power, pm_contex
11677
12092
  static inline pm_node_t *
11678
12093
  parse_conditional(pm_parser_t *parser, pm_context_t context) {
11679
12094
  pm_token_t keyword = parser->previous;
11680
- pm_node_t *predicate = parse_predicate(parser, PM_BINDING_POWER_MODIFIER, context);
12095
+ pm_token_t then_keyword = not_provided(parser);
12096
+
12097
+ pm_node_t *predicate = parse_predicate(parser, PM_BINDING_POWER_MODIFIER, context, &then_keyword);
11681
12098
  pm_statements_node_t *statements = NULL;
11682
12099
 
11683
12100
  if (!match3(parser, PM_TOKEN_KEYWORD_ELSIF, PM_TOKEN_KEYWORD_ELSE, PM_TOKEN_KEYWORD_END)) {
@@ -11692,10 +12109,10 @@ parse_conditional(pm_parser_t *parser, pm_context_t context) {
11692
12109
 
11693
12110
  switch (context) {
11694
12111
  case PM_CONTEXT_IF:
11695
- parent = (pm_node_t *) pm_if_node_create(parser, &keyword, predicate, statements, NULL, &end_keyword);
12112
+ parent = (pm_node_t *) pm_if_node_create(parser, &keyword, predicate, &then_keyword, statements, NULL, &end_keyword);
11696
12113
  break;
11697
12114
  case PM_CONTEXT_UNLESS:
11698
- parent = (pm_node_t *) pm_unless_node_create(parser, &keyword, predicate, statements);
12115
+ parent = (pm_node_t *) pm_unless_node_create(parser, &keyword, predicate, &then_keyword, statements);
11699
12116
  break;
11700
12117
  default:
11701
12118
  assert(false && "unreachable");
@@ -11709,14 +12126,14 @@ parse_conditional(pm_parser_t *parser, pm_context_t context) {
11709
12126
  if (context == PM_CONTEXT_IF) {
11710
12127
  while (accept1(parser, PM_TOKEN_KEYWORD_ELSIF)) {
11711
12128
  pm_token_t elsif_keyword = parser->previous;
11712
- pm_node_t *predicate = parse_predicate(parser, PM_BINDING_POWER_MODIFIER, PM_CONTEXT_ELSIF);
12129
+ pm_node_t *predicate = parse_predicate(parser, PM_BINDING_POWER_MODIFIER, PM_CONTEXT_ELSIF, &then_keyword);
11713
12130
  pm_accepts_block_stack_push(parser, true);
11714
12131
  pm_statements_node_t *statements = parse_statements(parser, PM_CONTEXT_ELSIF);
11715
12132
  pm_accepts_block_stack_pop(parser);
11716
12133
 
11717
12134
  accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
11718
12135
 
11719
- pm_node_t *elsif = (pm_node_t *) pm_if_node_create(parser, &elsif_keyword, predicate, statements, NULL, &end_keyword);
12136
+ pm_node_t *elsif = (pm_node_t *) pm_if_node_create(parser, &elsif_keyword, predicate, &then_keyword, statements, NULL, &end_keyword);
11720
12137
  ((pm_if_node_t *) current)->consequent = elsif;
11721
12138
  current = elsif;
11722
12139
  }
@@ -12165,7 +12582,7 @@ parse_variable_call(pm_parser_t *parser) {
12165
12582
  return (pm_node_t *) pm_local_variable_read_node_create(parser, &parser->previous, (uint32_t) depth);
12166
12583
  }
12167
12584
 
12168
- if (!parser->current_scope->closed && token_is_numbered_parameter(parser->previous.start, parser->previous.end)) {
12585
+ if (!parser->current_scope->closed && pm_token_is_numbered_parameter(parser->previous.start, parser->previous.end)) {
12169
12586
  // Indicate that this scope is using numbered params so that child
12170
12587
  // scopes cannot.
12171
12588
  parser->current_scope->numbered_params = true;
@@ -12212,15 +12629,23 @@ parse_variable_call(pm_parser_t *parser) {
12212
12629
  return (pm_node_t *) node;
12213
12630
  }
12214
12631
 
12632
+ /**
12633
+ * Parse the method definition name based on the current token available on the
12634
+ * parser. If it does not match a valid method definition name, then a missing
12635
+ * token is returned.
12636
+ */
12215
12637
  static inline pm_token_t
12216
12638
  parse_method_definition_name(pm_parser_t *parser) {
12217
12639
  switch (parser->current.type) {
12218
12640
  case PM_CASE_KEYWORD:
12219
12641
  case PM_TOKEN_CONSTANT:
12220
- case PM_TOKEN_IDENTIFIER:
12221
12642
  case PM_TOKEN_METHOD_NAME:
12222
12643
  parser_lex(parser);
12223
12644
  return parser->previous;
12645
+ case PM_TOKEN_IDENTIFIER:
12646
+ pm_refute_numbered_parameter(parser, parser->current.start, parser->current.end);
12647
+ parser_lex(parser);
12648
+ return parser->previous;
12224
12649
  case PM_CASE_OPERATOR:
12225
12650
  lex_state_set(parser, PM_LEX_STATE_ENDFN);
12226
12651
  parser_lex(parser);
@@ -12781,7 +13206,7 @@ parse_pattern_primitive(pm_parser_t *parser, pm_diagnostic_id_t diag_id) {
12781
13206
  pm_token_t lparen = parser->current;
12782
13207
  parser_lex(parser);
12783
13208
 
12784
- pm_node_t *expression = parse_expression(parser, PM_BINDING_POWER_STATEMENT, PM_ERR_PATTERN_EXPRESSION_AFTER_PIN);
13209
+ pm_node_t *expression = parse_value_expression(parser, PM_BINDING_POWER_STATEMENT, PM_ERR_PATTERN_EXPRESSION_AFTER_PIN);
12785
13210
  parser->pattern_matching_newlines = previous_pattern_matching_newlines;
12786
13211
 
12787
13212
  accept1(parser, PM_TOKEN_NEWLINE);
@@ -13024,9 +13449,10 @@ parse_strings_empty_content(const uint8_t *location) {
13024
13449
  * Parse a set of strings that could be concatenated together.
13025
13450
  */
13026
13451
  static inline pm_node_t *
13027
- parse_strings(pm_parser_t *parser) {
13452
+ parse_strings(pm_parser_t *parser, pm_node_t *current) {
13028
13453
  assert(parser->current.type == PM_TOKEN_STRING_BEGIN);
13029
- pm_node_t *result = NULL;
13454
+
13455
+ bool concating = false;
13030
13456
  bool state_is_arg_labeled = lex_state_p(parser, PM_LEX_STATE_ARG | PM_LEX_STATE_LABELED);
13031
13457
 
13032
13458
  while (match1(parser, PM_TOKEN_STRING_BEGIN)) {
@@ -13162,7 +13588,7 @@ parse_strings(pm_parser_t *parser) {
13162
13588
  }
13163
13589
  }
13164
13590
 
13165
- if (result == NULL) {
13591
+ if (current == NULL) {
13166
13592
  // If the node we just parsed is a symbol node, then we can't
13167
13593
  // concatenate it with anything else, so we can now return that
13168
13594
  // node.
@@ -13172,7 +13598,7 @@ parse_strings(pm_parser_t *parser) {
13172
13598
 
13173
13599
  // If we don't already have a node, then it's fine and we can just
13174
13600
  // set the result to be the node we just parsed.
13175
- result = node;
13601
+ current = node;
13176
13602
  } else {
13177
13603
  // Otherwise we need to check the type of the node we just parsed.
13178
13604
  // If it cannot be concatenated with the previous node, then we'll
@@ -13181,13 +13607,22 @@ parse_strings(pm_parser_t *parser) {
13181
13607
  pm_parser_err_node(parser, node, PM_ERR_STRING_CONCATENATION);
13182
13608
  }
13183
13609
 
13184
- // Either way we will create a concat node to hold the strings
13185
- // together.
13186
- result = (pm_node_t *) pm_string_concat_node_create(parser, result, node);
13610
+ // If we haven't already created our container for concatenation,
13611
+ // we'll do that now.
13612
+ if (!concating) {
13613
+ concating = true;
13614
+ pm_token_t bounds = not_provided(parser);
13615
+
13616
+ pm_interpolated_string_node_t *container = pm_interpolated_string_node_create(parser, &bounds, NULL, &bounds);
13617
+ pm_interpolated_string_node_append(container, current);
13618
+ current = (pm_node_t *) container;
13619
+ }
13620
+
13621
+ pm_interpolated_string_node_append((pm_interpolated_string_node_t *) current, node);
13187
13622
  }
13188
13623
  }
13189
13624
 
13190
- return result;
13625
+ return current;
13191
13626
  }
13192
13627
 
13193
13628
  /**
@@ -13304,7 +13739,9 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power) {
13304
13739
  // Otherwise, we're going to parse the first statement in the list
13305
13740
  // of statements within the parentheses.
13306
13741
  pm_accepts_block_stack_push(parser, true);
13742
+ context_push(parser, PM_CONTEXT_PARENS);
13307
13743
  pm_node_t *statement = parse_expression(parser, PM_BINDING_POWER_STATEMENT, PM_ERR_CANNOT_PARSE_EXPRESSION);
13744
+ context_pop(parser);
13308
13745
 
13309
13746
  // Determine if this statement is followed by a terminator. In the
13310
13747
  // case of a single statement, this is fine. But in the case of
@@ -13446,8 +13883,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power) {
13446
13883
  // Characters can be followed by strings in which case they are
13447
13884
  // automatically concatenated.
13448
13885
  if (match1(parser, PM_TOKEN_STRING_BEGIN)) {
13449
- pm_node_t *concat = parse_strings(parser);
13450
- return (pm_node_t *) pm_string_concat_node_create(parser, node, concat);
13886
+ return parse_strings(parser, node);
13451
13887
  }
13452
13888
 
13453
13889
  return node;
@@ -13661,7 +14097,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power) {
13661
14097
  cast->base.type = PM_X_STRING_NODE;
13662
14098
  }
13663
14099
 
13664
- size_t common_whitespace = lex_mode->as.heredoc.common_whitespace;
14100
+ size_t common_whitespace = parser->current_string_common_whitespace;
13665
14101
  if (indent == PM_HEREDOC_INDENT_TILDE && (common_whitespace != (size_t) -1) && (common_whitespace != 0)) {
13666
14102
  parse_heredoc_dedent_string(&cast->unescaped, common_whitespace);
13667
14103
  }
@@ -13707,7 +14143,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power) {
13707
14143
 
13708
14144
  // If this is a heredoc that is indented with a ~, then we need
13709
14145
  // to dedent each line by the common leading whitespace.
13710
- size_t common_whitespace = lex_mode->as.heredoc.common_whitespace;
14146
+ size_t common_whitespace = parser->current_string_common_whitespace;
13711
14147
  if (indent == PM_HEREDOC_INDENT_TILDE && (common_whitespace != (size_t) -1) && (common_whitespace != 0)) {
13712
14148
  pm_node_list_t *nodes;
13713
14149
  if (quote == PM_HEREDOC_QUOTE_BACKTICK) {
@@ -13721,8 +14157,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power) {
13721
14157
  }
13722
14158
 
13723
14159
  if (match1(parser, PM_TOKEN_STRING_BEGIN)) {
13724
- pm_node_t *concat = parse_strings(parser);
13725
- return (pm_node_t *) pm_string_concat_node_create(parser, node, concat);
14160
+ return parse_strings(parser, node);
13726
14161
  }
13727
14162
 
13728
14163
  return node;
@@ -13811,21 +14246,23 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power) {
13811
14246
  } else if (!token_begins_expression_p(parser->current.type)) {
13812
14247
  predicate = NULL;
13813
14248
  } else {
13814
- predicate = parse_expression(parser, PM_BINDING_POWER_COMPOSITION, PM_ERR_CASE_EXPRESSION_AFTER_CASE);
14249
+ predicate = parse_value_expression(parser, PM_BINDING_POWER_COMPOSITION, PM_ERR_CASE_EXPRESSION_AFTER_CASE);
13815
14250
  while (accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON));
13816
14251
  }
13817
14252
 
13818
14253
  if (accept1(parser, PM_TOKEN_KEYWORD_END)) {
13819
14254
  pm_parser_err_token(parser, &case_keyword, PM_ERR_CASE_MISSING_CONDITIONS);
13820
- return (pm_node_t *) pm_case_node_create(parser, &case_keyword, predicate, NULL, &parser->previous);
14255
+ return (pm_node_t *) pm_case_node_create(parser, &case_keyword, predicate, &parser->previous);
13821
14256
  }
13822
14257
 
13823
14258
  // At this point we can create a case node, though we don't yet know if it
13824
14259
  // is a case-in or case-when node.
13825
14260
  pm_token_t end_keyword = not_provided(parser);
13826
- pm_case_node_t *case_node = pm_case_node_create(parser, &case_keyword, predicate, NULL, &end_keyword);
14261
+ pm_node_t *node;
13827
14262
 
13828
14263
  if (match1(parser, PM_TOKEN_KEYWORD_WHEN)) {
14264
+ pm_case_node_t *case_node = pm_case_node_create(parser, &case_keyword, predicate, &end_keyword);
14265
+
13829
14266
  // At this point we've seen a when keyword, so we know this is a
13830
14267
  // case-when node. We will continue to parse the when nodes until we hit
13831
14268
  // the end of the list.
@@ -13836,14 +14273,14 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power) {
13836
14273
  do {
13837
14274
  if (accept1(parser, PM_TOKEN_USTAR)) {
13838
14275
  pm_token_t operator = parser->previous;
13839
- pm_node_t *expression = parse_expression(parser, PM_BINDING_POWER_DEFINED, PM_ERR_EXPECT_EXPRESSION_AFTER_STAR);
14276
+ pm_node_t *expression = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, PM_ERR_EXPECT_EXPRESSION_AFTER_STAR);
13840
14277
 
13841
14278
  pm_splat_node_t *splat_node = pm_splat_node_create(parser, &operator, expression);
13842
14279
  pm_when_node_conditions_append(when_node, (pm_node_t *) splat_node);
13843
14280
 
13844
14281
  if (PM_NODE_TYPE_P(expression, PM_MISSING_NODE)) break;
13845
14282
  } else {
13846
- pm_node_t *condition = parse_expression(parser, PM_BINDING_POWER_DEFINED, PM_ERR_CASE_EXPRESSION_AFTER_WHEN);
14283
+ pm_node_t *condition = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, PM_ERR_CASE_EXPRESSION_AFTER_WHEN);
13847
14284
  pm_when_node_conditions_append(when_node, condition);
13848
14285
 
13849
14286
  if (PM_NODE_TYPE_P(condition, PM_MISSING_NODE)) break;
@@ -13865,7 +14302,23 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power) {
13865
14302
 
13866
14303
  pm_case_node_condition_append(case_node, (pm_node_t *) when_node);
13867
14304
  }
14305
+
14306
+ // If we didn't parse any conditions (in or when) then we need
14307
+ // to indicate that we have an error.
14308
+ if (case_node->conditions.size == 0) {
14309
+ pm_parser_err_token(parser, &case_keyword, PM_ERR_CASE_MISSING_CONDITIONS);
14310
+ }
14311
+
14312
+ node = (pm_node_t *) case_node;
13868
14313
  } else {
14314
+ pm_case_match_node_t *case_node = pm_case_match_node_create(parser, &case_keyword, predicate, &end_keyword);
14315
+
14316
+ // If this is a case-match node (i.e., it is a pattern matching
14317
+ // case statement) then we must have a predicate.
14318
+ if (predicate == NULL) {
14319
+ pm_parser_err_token(parser, &case_keyword, PM_ERR_CASE_MATCH_MISSING_PREDICATE);
14320
+ }
14321
+
13869
14322
  // At this point we expect that we're parsing a case-in node. We will
13870
14323
  // continue to parse the in nodes until we hit the end of the list.
13871
14324
  while (match1(parser, PM_TOKEN_KEYWORD_IN)) {
@@ -13884,11 +14337,11 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power) {
13884
14337
  // for guard clauses in the form of `if` or `unless` statements.
13885
14338
  if (accept1(parser, PM_TOKEN_KEYWORD_IF_MODIFIER)) {
13886
14339
  pm_token_t keyword = parser->previous;
13887
- pm_node_t *predicate = parse_expression(parser, PM_BINDING_POWER_DEFINED, PM_ERR_CONDITIONAL_IF_PREDICATE);
14340
+ pm_node_t *predicate = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, PM_ERR_CONDITIONAL_IF_PREDICATE);
13888
14341
  pattern = (pm_node_t *) pm_if_node_modifier_create(parser, pattern, &keyword, predicate);
13889
14342
  } else if (accept1(parser, PM_TOKEN_KEYWORD_UNLESS_MODIFIER)) {
13890
14343
  pm_token_t keyword = parser->previous;
13891
- pm_node_t *predicate = parse_expression(parser, PM_BINDING_POWER_DEFINED, PM_ERR_CONDITIONAL_UNLESS_PREDICATE);
14344
+ pm_node_t *predicate = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, PM_ERR_CONDITIONAL_UNLESS_PREDICATE);
13892
14345
  pattern = (pm_node_t *) pm_unless_node_modifier_create(parser, pattern, &keyword, predicate);
13893
14346
  }
13894
14347
 
@@ -13919,14 +14372,16 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power) {
13919
14372
  // Now that we have the full pattern and statements, we can create the
13920
14373
  // node and attach it to the case node.
13921
14374
  pm_node_t *condition = (pm_node_t *) pm_in_node_create(parser, pattern, statements, &in_keyword, &then_keyword);
13922
- pm_case_node_condition_append(case_node, condition);
14375
+ pm_case_match_node_condition_append(case_node, condition);
13923
14376
  }
13924
- }
13925
14377
 
13926
- // If we didn't parse any conditions (in or when) then we need to
13927
- // indicate that we have an error.
13928
- if (case_node->conditions.size == 0) {
13929
- pm_parser_err_token(parser, &case_keyword, PM_ERR_CASE_MISSING_CONDITIONS);
14378
+ // If we didn't parse any conditions (in or when) then we need
14379
+ // to indicate that we have an error.
14380
+ if (case_node->conditions.size == 0) {
14381
+ pm_parser_err_token(parser, &case_keyword, PM_ERR_CASE_MISSING_CONDITIONS);
14382
+ }
14383
+
14384
+ node = (pm_node_t *) case_node;
13930
14385
  }
13931
14386
 
13932
14387
  accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
@@ -13940,12 +14395,21 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power) {
13940
14395
  else_node = pm_else_node_create(parser, &else_keyword, NULL, &parser->current);
13941
14396
  }
13942
14397
 
13943
- pm_case_node_consequent_set(case_node, else_node);
14398
+ if (PM_NODE_TYPE_P(node, PM_CASE_NODE)) {
14399
+ pm_case_node_consequent_set((pm_case_node_t *) node, else_node);
14400
+ } else {
14401
+ pm_case_match_node_consequent_set((pm_case_match_node_t *) node, else_node);
14402
+ }
13944
14403
  }
13945
14404
 
13946
14405
  expect1(parser, PM_TOKEN_KEYWORD_END, PM_ERR_CASE_TERM);
13947
- pm_case_node_end_keyword_loc_set(case_node, &parser->previous);
13948
- return (pm_node_t *) case_node;
14406
+ if (PM_NODE_TYPE_P(node, PM_CASE_NODE)) {
14407
+ pm_case_node_end_keyword_loc_set((pm_case_node_t *) node, &parser->previous);
14408
+ } else {
14409
+ pm_case_match_node_end_keyword_loc_set((pm_case_match_node_t *) node, &parser->previous);
14410
+ }
14411
+
14412
+ return node;
13949
14413
  }
13950
14414
  case PM_TOKEN_KEYWORD_BEGIN: {
13951
14415
  parser_lex(parser);
@@ -14101,7 +14565,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power) {
14101
14565
  parser->command_start = true;
14102
14566
  parser_lex(parser);
14103
14567
 
14104
- superclass = parse_expression(parser, PM_BINDING_POWER_COMPOSITION, PM_ERR_CLASS_SUPERCLASS);
14568
+ superclass = parse_value_expression(parser, PM_BINDING_POWER_COMPOSITION, PM_ERR_CLASS_SUPERCLASS);
14105
14569
  } else {
14106
14570
  inheritance_operator = not_provided(parser);
14107
14571
  superclass = NULL;
@@ -14172,6 +14636,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power) {
14172
14636
  operator = parser->previous;
14173
14637
  name = parse_method_definition_name(parser);
14174
14638
  } else {
14639
+ pm_refute_numbered_parameter(parser, parser->previous.start, parser->previous.end);
14175
14640
  pm_parser_scope_push(parser, true);
14176
14641
  name = parser->previous;
14177
14642
  }
@@ -14245,7 +14710,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power) {
14245
14710
  case PM_TOKEN_PARENTHESIS_LEFT: {
14246
14711
  parser_lex(parser);
14247
14712
  pm_token_t lparen = parser->previous;
14248
- pm_node_t *expression = parse_expression(parser, PM_BINDING_POWER_STATEMENT, PM_ERR_DEF_RECEIVER);
14713
+ pm_node_t *expression = parse_value_expression(parser, PM_BINDING_POWER_STATEMENT, PM_ERR_DEF_RECEIVER);
14249
14714
 
14250
14715
  expect1(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_ERR_EXPECT_RPAREN);
14251
14716
  pm_token_t rparen = parser->previous;
@@ -14326,6 +14791,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power) {
14326
14791
  equal = parser->previous;
14327
14792
 
14328
14793
  context_push(parser, PM_CONTEXT_DEF);
14794
+ pm_do_loop_stack_push(parser, false);
14329
14795
  statements = (pm_node_t *) pm_statements_node_create(parser);
14330
14796
 
14331
14797
  pm_node_t *statement = parse_expression(parser, PM_BINDING_POWER_DEFINED + 1, PM_ERR_DEF_ENDLESS);
@@ -14338,6 +14804,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power) {
14338
14804
  }
14339
14805
 
14340
14806
  pm_statements_node_body_append((pm_statements_node_t *) statements, statement);
14807
+ pm_do_loop_stack_pop(parser);
14341
14808
  context_pop(parser);
14342
14809
  end_keyword = not_provided(parser);
14343
14810
  } else {
@@ -14425,6 +14892,10 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power) {
14425
14892
  parser_lex(parser);
14426
14893
  pm_token_t keyword = parser->previous;
14427
14894
 
14895
+ if (context_def_p(parser)) {
14896
+ pm_parser_warn_token(parser, &keyword, PM_WARN_END_IN_METHOD);
14897
+ }
14898
+
14428
14899
  expect1(parser, PM_TOKEN_BRACE_LEFT, PM_ERR_END_UPCASE_BRACE);
14429
14900
  pm_token_t opening = parser->previous;
14430
14901
  pm_statements_node_t *statements = parse_statements(parser, PM_CONTEXT_POSTEXE);
@@ -14474,7 +14945,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power) {
14474
14945
  expect1(parser, PM_TOKEN_KEYWORD_IN, PM_ERR_FOR_IN);
14475
14946
  pm_token_t in_keyword = parser->previous;
14476
14947
 
14477
- pm_node_t *collection = parse_expression(parser, PM_BINDING_POWER_COMPOSITION, PM_ERR_FOR_COLLECTION);
14948
+ pm_node_t *collection = parse_value_expression(parser, PM_BINDING_POWER_COMPOSITION, PM_ERR_FOR_COLLECTION);
14478
14949
  pm_do_loop_stack_pop(parser);
14479
14950
 
14480
14951
  pm_token_t do_keyword;
@@ -14636,7 +15107,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power) {
14636
15107
  parser_lex(parser);
14637
15108
  pm_token_t keyword = parser->previous;
14638
15109
 
14639
- pm_node_t *predicate = parse_expression(parser, PM_BINDING_POWER_COMPOSITION, PM_ERR_CONDITIONAL_UNTIL_PREDICATE);
15110
+ pm_node_t *predicate = parse_value_expression(parser, PM_BINDING_POWER_COMPOSITION, PM_ERR_CONDITIONAL_UNTIL_PREDICATE);
14640
15111
  pm_do_loop_stack_pop(parser);
14641
15112
 
14642
15113
  expect3(parser, PM_TOKEN_KEYWORD_DO_LOOP, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON, PM_ERR_CONDITIONAL_UNTIL_PREDICATE);
@@ -14657,7 +15128,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power) {
14657
15128
  parser_lex(parser);
14658
15129
  pm_token_t keyword = parser->previous;
14659
15130
 
14660
- pm_node_t *predicate = parse_expression(parser, PM_BINDING_POWER_COMPOSITION, PM_ERR_CONDITIONAL_WHILE_PREDICATE);
15131
+ pm_node_t *predicate = parse_value_expression(parser, PM_BINDING_POWER_COMPOSITION, PM_ERR_CONDITIONAL_WHILE_PREDICATE);
14661
15132
  pm_do_loop_stack_pop(parser);
14662
15133
 
14663
15134
  expect3(parser, PM_TOKEN_KEYWORD_DO_LOOP, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON, PM_ERR_CONDITIONAL_WHILE_PREDICATE);
@@ -15293,7 +15764,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power) {
15293
15764
  return (pm_node_t *) node;
15294
15765
  }
15295
15766
  case PM_TOKEN_STRING_BEGIN:
15296
- return parse_strings(parser);
15767
+ return parse_strings(parser, NULL);
15297
15768
  case PM_TOKEN_SYMBOL_BEGIN: {
15298
15769
  pm_lex_mode_t lex_mode = *parser->lex_modes.current;
15299
15770
  parser_lex(parser);
@@ -15310,10 +15781,29 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power) {
15310
15781
  }
15311
15782
 
15312
15783
  static inline pm_node_t *
15313
- parse_assignment_value(pm_parser_t *parser, pm_binding_power_t previous_binding_power, pm_binding_power_t binding_power, pm_diagnostic_id_t diag_id) {
15784
+ parse_assignment_value(pm_parser_t *parser, pm_binding_power_t binding_power, pm_diagnostic_id_t diag_id) {
15785
+ pm_node_t *value = parse_value_expression(parser, binding_power, diag_id);
15786
+
15787
+ // Contradicting binding powers, the right-hand-side value of rthe assignment allows the `rescue` modifier.
15788
+ if (match1(parser, PM_TOKEN_KEYWORD_RESCUE_MODIFIER)) {
15789
+ pm_token_t rescue = parser->current;
15790
+ parser_lex(parser);
15791
+ pm_node_t *right = parse_expression(parser, binding_power, PM_ERR_RESCUE_MODIFIER_VALUE);
15792
+
15793
+ return (pm_node_t *) pm_rescue_modifier_node_create(parser, value, &rescue, right);
15794
+ }
15795
+
15796
+ return value;
15797
+ }
15798
+
15799
+
15800
+ static inline pm_node_t *
15801
+ parse_assignment_values(pm_parser_t *parser, pm_binding_power_t previous_binding_power, pm_binding_power_t binding_power, pm_diagnostic_id_t diag_id) {
15314
15802
  pm_node_t *value = parse_starred_expression(parser, binding_power, diag_id);
15315
15803
 
15804
+ bool is_single_value = true;
15316
15805
  if (previous_binding_power == PM_BINDING_POWER_STATEMENT && (PM_NODE_TYPE_P(value, PM_SPLAT_NODE) || match1(parser, PM_TOKEN_COMMA))) {
15806
+ is_single_value = false;
15317
15807
  pm_token_t opening = not_provided(parser);
15318
15808
  pm_array_node_t *array = pm_array_node_create(parser, &opening);
15319
15809
 
@@ -15327,6 +15817,15 @@ parse_assignment_value(pm_parser_t *parser, pm_binding_power_t previous_binding_
15327
15817
  }
15328
15818
  }
15329
15819
 
15820
+ // Contradicting binding powers, the right-hand-side value of the assignment allows the `rescue` modifier.
15821
+ if (is_single_value && match1(parser, PM_TOKEN_KEYWORD_RESCUE_MODIFIER)) {
15822
+ pm_token_t rescue = parser->current;
15823
+ parser_lex(parser);
15824
+ pm_node_t *right = parse_expression(parser, binding_power, PM_ERR_RESCUE_MODIFIER_VALUE);
15825
+
15826
+ return (pm_node_t *) pm_rescue_modifier_node_create(parser, value, &rescue, right);
15827
+ }
15828
+
15330
15829
  return value;
15331
15830
  }
15332
15831
 
@@ -15352,6 +15851,25 @@ parse_call_operator_write(pm_parser_t *parser, pm_call_node_t *call_node, const
15352
15851
  }
15353
15852
  }
15354
15853
 
15854
+ static bool
15855
+ name_is_identifier(pm_parser_t *parser, const uint8_t *source, size_t length) {
15856
+ if (length == 0) {
15857
+ return false;
15858
+ }
15859
+
15860
+ size_t width = char_is_identifier_start(parser, source);
15861
+ if (!width) {
15862
+ return false;
15863
+ }
15864
+
15865
+ uint8_t *cursor = ((uint8_t *)source) + width;
15866
+ while (cursor < source + length && (width = char_is_identifier(parser, cursor))) {
15867
+ cursor += width;
15868
+ }
15869
+
15870
+ return cursor == source + length;
15871
+ }
15872
+
15355
15873
  /**
15356
15874
  * Potentially change a =~ with a regular expression with named captures into a
15357
15875
  * match write node.
@@ -15362,42 +15880,77 @@ parse_regular_expression_named_captures(pm_parser_t *parser, const pm_string_t *
15362
15880
  pm_node_t *result;
15363
15881
 
15364
15882
  if (pm_regexp_named_capture_group_names(pm_string_source(content), pm_string_length(content), &named_captures, parser->encoding_changed, &parser->encoding) && (named_captures.length > 0)) {
15365
- pm_match_write_node_t *match = pm_match_write_node_create(parser, call);
15883
+ // Since we should not create a MatchWriteNode when all capture names
15884
+ // are invalid, creating a MatchWriteNode is delayed here.
15885
+ pm_match_write_node_t *match = NULL;
15886
+ pm_constant_id_list_t names = { 0 };
15366
15887
 
15367
15888
  for (size_t index = 0; index < named_captures.length; index++) {
15368
- pm_string_t *name = &named_captures.strings[index];
15889
+ pm_string_t *string = &named_captures.strings[index];
15890
+
15891
+ const uint8_t *source = pm_string_source(string);
15892
+ size_t length = pm_string_length(string);
15369
15893
 
15370
- const uint8_t *source = pm_string_source(name);
15371
- size_t length = pm_string_length(name);
15894
+ pm_location_t location;
15895
+ pm_constant_id_t name;
15896
+
15897
+ // If the name of the capture group isn't a valid identifier, we do
15898
+ // not add it to the local table.
15899
+ if (!name_is_identifier(parser, source, length)) continue;
15372
15900
 
15373
- pm_constant_id_t local;
15374
15901
  if (content->type == PM_STRING_SHARED) {
15375
15902
  // If the unescaped string is a slice of the source, then we can
15376
15903
  // copy the names directly. The pointers will line up.
15377
- local = pm_parser_local_add_location(parser, source, source + length);
15378
-
15379
- if (token_is_numbered_parameter(source, source + length)) {
15380
- pm_parser_err(parser, source, source + length, PM_ERR_PARAMETER_NUMBERED_RESERVED);
15381
- }
15904
+ location = (pm_location_t) { .start = source, .end = source + length };
15905
+ name = pm_parser_constant_id_location(parser, location.start, location.end);
15906
+ pm_refute_numbered_parameter(parser, source, source + length);
15382
15907
  } else {
15383
15908
  // Otherwise, the name is a slice of the malloc-ed owned string,
15384
15909
  // in which case we need to copy it out into a new string.
15910
+ location = call->receiver->location;
15911
+
15385
15912
  void *memory = malloc(length);
15386
15913
  if (memory == NULL) abort();
15387
15914
 
15388
15915
  memcpy(memory, source, length);
15389
- local = pm_parser_local_add_owned(parser, (const uint8_t *) memory, length);
15916
+ name = pm_parser_constant_id_owned(parser, (const uint8_t *) memory, length);
15390
15917
 
15391
- if (token_is_numbered_parameter(source, source + length)) {
15918
+ if (pm_token_is_numbered_parameter(source, source + length)) {
15392
15919
  const pm_location_t *location = &call->receiver->location;
15393
- pm_parser_err_location(parser, location, PM_ERR_PARAMETER_NUMBERED_RESERVED);
15920
+ PM_PARSER_ERR_LOCATION_FORMAT(parser, location, PM_ERR_PARAMETER_NUMBERED_RESERVED, location->start);
15394
15921
  }
15395
15922
  }
15396
15923
 
15397
- pm_constant_id_list_append(&match->locals, local);
15924
+ if (name != 0) {
15925
+ // We dont want to create duplicate targets if the capture name
15926
+ // is duplicated.
15927
+ if (pm_constant_id_list_includes(&names, name)) continue;
15928
+ pm_constant_id_list_append(&names, name);
15929
+
15930
+ // Here we lazily create the MatchWriteNode since we know we're
15931
+ // about to add a target.
15932
+ if (match == NULL) match = pm_match_write_node_create(parser, call);
15933
+
15934
+ // First, find the depth of the local that is being assigned.
15935
+ int depth;
15936
+ if ((depth = pm_parser_local_depth_constant_id(parser, name)) == -1) {
15937
+ pm_parser_local_add(parser, name);
15938
+ }
15939
+
15940
+ // Next, create the local variable target and add it to the
15941
+ // list of targets for the match.
15942
+ pm_node_t *target = (pm_node_t *) pm_local_variable_target_node_create_values(parser, &location, name, depth == -1 ? 0 : (uint32_t) depth);
15943
+ pm_node_list_append(&match->targets, target);
15944
+ }
15945
+ }
15946
+
15947
+ if (match != NULL) {
15948
+ result = (pm_node_t *) match;
15949
+ } else {
15950
+ result = (pm_node_t *) call;
15398
15951
  }
15399
15952
 
15400
- result = (pm_node_t *) match;
15953
+ pm_constant_id_list_free(&names);
15401
15954
  } else {
15402
15955
  result = (pm_node_t *) call;
15403
15956
  }
@@ -15426,7 +15979,7 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
15426
15979
  /* fallthrough */
15427
15980
  case PM_CASE_WRITABLE: {
15428
15981
  parser_lex(parser);
15429
- pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, PM_ERR_EXPECT_EXPRESSION_AFTER_EQUAL);
15982
+ pm_node_t *value = parse_assignment_values(parser, previous_binding_power, binding_power, PM_ERR_EXPECT_EXPRESSION_AFTER_EQUAL);
15430
15983
  return parse_write(parser, node, &token, value);
15431
15984
  }
15432
15985
  case PM_SPLAT_NODE: {
@@ -15434,7 +15987,7 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
15434
15987
  pm_multi_target_node_targets_append(parser, multi_target, node);
15435
15988
 
15436
15989
  parser_lex(parser);
15437
- pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, PM_ERR_EXPECT_EXPRESSION_AFTER_EQUAL);
15990
+ pm_node_t *value = parse_assignment_values(parser, previous_binding_power, binding_power, PM_ERR_EXPECT_EXPRESSION_AFTER_EQUAL);
15438
15991
  return parse_write(parser, (pm_node_t *) multi_target, &token, value);
15439
15992
  }
15440
15993
  default:
@@ -15456,7 +16009,7 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
15456
16009
  case PM_GLOBAL_VARIABLE_READ_NODE: {
15457
16010
  parser_lex(parser);
15458
16011
 
15459
- pm_node_t *value = parse_expression(parser, binding_power, PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ);
16012
+ pm_node_t *value = parse_assignment_value(parser, binding_power, PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ);
15460
16013
  pm_node_t *result = (pm_node_t *) pm_global_variable_and_write_node_create(parser, node, &token, value);
15461
16014
 
15462
16015
  pm_node_destroy(parser, node);
@@ -15465,7 +16018,7 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
15465
16018
  case PM_CLASS_VARIABLE_READ_NODE: {
15466
16019
  parser_lex(parser);
15467
16020
 
15468
- pm_node_t *value = parse_expression(parser, binding_power, PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ);
16021
+ pm_node_t *value = parse_assignment_value(parser, binding_power, PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ);
15469
16022
  pm_node_t *result = (pm_node_t *) pm_class_variable_and_write_node_create(parser, (pm_class_variable_read_node_t *) node, &token, value);
15470
16023
 
15471
16024
  pm_node_destroy(parser, node);
@@ -15474,13 +16027,13 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
15474
16027
  case PM_CONSTANT_PATH_NODE: {
15475
16028
  parser_lex(parser);
15476
16029
 
15477
- pm_node_t *value = parse_expression(parser, binding_power, PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ);
16030
+ pm_node_t *value = parse_assignment_value(parser, binding_power, PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ);
15478
16031
  return (pm_node_t *) pm_constant_path_and_write_node_create(parser, (pm_constant_path_node_t *) node, &token, value);
15479
16032
  }
15480
16033
  case PM_CONSTANT_READ_NODE: {
15481
16034
  parser_lex(parser);
15482
16035
 
15483
- pm_node_t *value = parse_expression(parser, binding_power, PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ);
16036
+ pm_node_t *value = parse_assignment_value(parser, binding_power, PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ);
15484
16037
  pm_node_t *result = (pm_node_t *) pm_constant_and_write_node_create(parser, (pm_constant_read_node_t *) node, &token, value);
15485
16038
 
15486
16039
  pm_node_destroy(parser, node);
@@ -15489,7 +16042,7 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
15489
16042
  case PM_INSTANCE_VARIABLE_READ_NODE: {
15490
16043
  parser_lex(parser);
15491
16044
 
15492
- pm_node_t *value = parse_expression(parser, binding_power, PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ);
16045
+ pm_node_t *value = parse_assignment_value(parser, binding_power, PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ);
15493
16046
  pm_node_t *result = (pm_node_t *) pm_instance_variable_and_write_node_create(parser, (pm_instance_variable_read_node_t *) node, &token, value);
15494
16047
 
15495
16048
  pm_node_destroy(parser, node);
@@ -15499,7 +16052,7 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
15499
16052
  pm_local_variable_read_node_t *cast = (pm_local_variable_read_node_t *) node;
15500
16053
  parser_lex(parser);
15501
16054
 
15502
- pm_node_t *value = parse_expression(parser, binding_power, PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ);
16055
+ pm_node_t *value = parse_assignment_value(parser, binding_power, PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ);
15503
16056
  pm_node_t *result = (pm_node_t *) pm_local_variable_and_write_node_create(parser, node, &token, value, cast->name, cast->depth);
15504
16057
 
15505
16058
  pm_node_destroy(parser, node);
@@ -15513,14 +16066,11 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
15513
16066
  // receiver that could have been a local variable) then we
15514
16067
  // will transform it into a local variable write.
15515
16068
  if (pm_call_node_variable_call_p(cast)) {
15516
- pm_location_t message_loc = cast->message_loc;
15517
- pm_constant_id_t constant_id = pm_parser_local_add_location(parser, message_loc.start, message_loc.end);
15518
-
15519
- if (token_is_numbered_parameter(message_loc.start, message_loc.end)) {
15520
- pm_parser_err_location(parser, &message_loc, PM_ERR_PARAMETER_NUMBERED_RESERVED);
15521
- }
16069
+ pm_location_t *message_loc = &cast->message_loc;
16070
+ pm_refute_numbered_parameter(parser, message_loc->start, message_loc->end);
15522
16071
 
15523
- pm_node_t *value = parse_expression(parser, binding_power, PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ);
16072
+ pm_constant_id_t constant_id = pm_parser_local_add_location(parser, message_loc->start, message_loc->end);
16073
+ pm_node_t *value = parse_assignment_value(parser, binding_power, PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ);
15524
16074
  pm_node_t *result = (pm_node_t *) pm_local_variable_and_write_node_create(parser, (pm_node_t *) cast, &token, value, constant_id, 0);
15525
16075
 
15526
16076
  pm_node_destroy(parser, (pm_node_t *) cast);
@@ -15531,7 +16081,7 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
15531
16081
  // this is an aref expression, and we can transform it into
15532
16082
  // an aset expression.
15533
16083
  if (pm_call_node_index_p(cast)) {
15534
- pm_node_t *value = parse_expression(parser, binding_power, PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ);
16084
+ pm_node_t *value = parse_assignment_value(parser, binding_power, PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ);
15535
16085
  return (pm_node_t *) pm_index_and_write_node_create(parser, cast, &token, value);
15536
16086
  }
15537
16087
 
@@ -15543,7 +16093,7 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
15543
16093
  }
15544
16094
 
15545
16095
  parse_call_operator_write(parser, cast, &token);
15546
- pm_node_t *value = parse_expression(parser, binding_power, PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ);
16096
+ pm_node_t *value = parse_assignment_value(parser, binding_power, PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ);
15547
16097
  return (pm_node_t *) pm_call_and_write_node_create(parser, cast, &token, value);
15548
16098
  }
15549
16099
  case PM_MULTI_WRITE_NODE: {
@@ -15570,7 +16120,7 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
15570
16120
  case PM_GLOBAL_VARIABLE_READ_NODE: {
15571
16121
  parser_lex(parser);
15572
16122
 
15573
- pm_node_t *value = parse_expression(parser, binding_power, PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ);
16123
+ pm_node_t *value = parse_assignment_value(parser, binding_power, PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ);
15574
16124
  pm_node_t *result = (pm_node_t *) pm_global_variable_or_write_node_create(parser, node, &token, value);
15575
16125
 
15576
16126
  pm_node_destroy(parser, node);
@@ -15579,7 +16129,7 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
15579
16129
  case PM_CLASS_VARIABLE_READ_NODE: {
15580
16130
  parser_lex(parser);
15581
16131
 
15582
- pm_node_t *value = parse_expression(parser, binding_power, PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ);
16132
+ pm_node_t *value = parse_assignment_value(parser, binding_power, PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ);
15583
16133
  pm_node_t *result = (pm_node_t *) pm_class_variable_or_write_node_create(parser, (pm_class_variable_read_node_t *) node, &token, value);
15584
16134
 
15585
16135
  pm_node_destroy(parser, node);
@@ -15588,13 +16138,13 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
15588
16138
  case PM_CONSTANT_PATH_NODE: {
15589
16139
  parser_lex(parser);
15590
16140
 
15591
- pm_node_t *value = parse_expression(parser, binding_power, PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ);
16141
+ pm_node_t *value = parse_assignment_value(parser, binding_power, PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ);
15592
16142
  return (pm_node_t *) pm_constant_path_or_write_node_create(parser, (pm_constant_path_node_t *) node, &token, value);
15593
16143
  }
15594
16144
  case PM_CONSTANT_READ_NODE: {
15595
16145
  parser_lex(parser);
15596
16146
 
15597
- pm_node_t *value = parse_expression(parser, binding_power, PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ);
16147
+ pm_node_t *value = parse_assignment_value(parser, binding_power, PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ);
15598
16148
  pm_node_t *result = (pm_node_t *) pm_constant_or_write_node_create(parser, (pm_constant_read_node_t *) node, &token, value);
15599
16149
 
15600
16150
  pm_node_destroy(parser, node);
@@ -15603,7 +16153,7 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
15603
16153
  case PM_INSTANCE_VARIABLE_READ_NODE: {
15604
16154
  parser_lex(parser);
15605
16155
 
15606
- pm_node_t *value = parse_expression(parser, binding_power, PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ);
16156
+ pm_node_t *value = parse_assignment_value(parser, binding_power, PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ);
15607
16157
  pm_node_t *result = (pm_node_t *) pm_instance_variable_or_write_node_create(parser, (pm_instance_variable_read_node_t *) node, &token, value);
15608
16158
 
15609
16159
  pm_node_destroy(parser, node);
@@ -15613,7 +16163,7 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
15613
16163
  pm_local_variable_read_node_t *cast = (pm_local_variable_read_node_t *) node;
15614
16164
  parser_lex(parser);
15615
16165
 
15616
- pm_node_t *value = parse_expression(parser, binding_power, PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ);
16166
+ pm_node_t *value = parse_assignment_value(parser, binding_power, PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ);
15617
16167
  pm_node_t *result = (pm_node_t *) pm_local_variable_or_write_node_create(parser, node, &token, value, cast->name, cast->depth);
15618
16168
 
15619
16169
  pm_node_destroy(parser, node);
@@ -15627,14 +16177,11 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
15627
16177
  // receiver that could have been a local variable) then we
15628
16178
  // will transform it into a local variable write.
15629
16179
  if (pm_call_node_variable_call_p(cast)) {
15630
- pm_location_t message_loc = cast->message_loc;
15631
- pm_constant_id_t constant_id = pm_parser_local_add_location(parser, message_loc.start, message_loc.end);
16180
+ pm_location_t *message_loc = &cast->message_loc;
16181
+ pm_refute_numbered_parameter(parser, message_loc->start, message_loc->end);
15632
16182
 
15633
- if (token_is_numbered_parameter(message_loc.start, message_loc.end)) {
15634
- pm_parser_err_location(parser, &message_loc, PM_ERR_PARAMETER_NUMBERED_RESERVED);
15635
- }
15636
-
15637
- pm_node_t *value = parse_expression(parser, binding_power, PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ);
16183
+ pm_constant_id_t constant_id = pm_parser_local_add_location(parser, message_loc->start, message_loc->end);
16184
+ pm_node_t *value = parse_assignment_value(parser, binding_power, PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ);
15638
16185
  pm_node_t *result = (pm_node_t *) pm_local_variable_or_write_node_create(parser, (pm_node_t *) cast, &token, value, constant_id, 0);
15639
16186
 
15640
16187
  pm_node_destroy(parser, (pm_node_t *) cast);
@@ -15645,7 +16192,7 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
15645
16192
  // this is an aref expression, and we can transform it into
15646
16193
  // an aset expression.
15647
16194
  if (pm_call_node_index_p(cast)) {
15648
- pm_node_t *value = parse_expression(parser, binding_power, PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ);
16195
+ pm_node_t *value = parse_assignment_value(parser, binding_power, PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ);
15649
16196
  return (pm_node_t *) pm_index_or_write_node_create(parser, cast, &token, value);
15650
16197
  }
15651
16198
 
@@ -15657,7 +16204,7 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
15657
16204
  }
15658
16205
 
15659
16206
  parse_call_operator_write(parser, cast, &token);
15660
- pm_node_t *value = parse_expression(parser, binding_power, PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ);
16207
+ pm_node_t *value = parse_assignment_value(parser, binding_power, PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ);
15661
16208
  return (pm_node_t *) pm_call_or_write_node_create(parser, cast, &token, value);
15662
16209
  }
15663
16210
  case PM_MULTI_WRITE_NODE: {
@@ -15694,7 +16241,7 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
15694
16241
  case PM_GLOBAL_VARIABLE_READ_NODE: {
15695
16242
  parser_lex(parser);
15696
16243
 
15697
- pm_node_t *value = parse_expression(parser, binding_power, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR);
16244
+ pm_node_t *value = parse_assignment_value(parser, binding_power, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR);
15698
16245
  pm_node_t *result = (pm_node_t *) pm_global_variable_operator_write_node_create(parser, node, &token, value);
15699
16246
 
15700
16247
  pm_node_destroy(parser, node);
@@ -15703,7 +16250,7 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
15703
16250
  case PM_CLASS_VARIABLE_READ_NODE: {
15704
16251
  parser_lex(parser);
15705
16252
 
15706
- pm_node_t *value = parse_expression(parser, binding_power, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR);
16253
+ pm_node_t *value = parse_assignment_value(parser, binding_power, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR);
15707
16254
  pm_node_t *result = (pm_node_t *) pm_class_variable_operator_write_node_create(parser, (pm_class_variable_read_node_t *) node, &token, value);
15708
16255
 
15709
16256
  pm_node_destroy(parser, node);
@@ -15712,13 +16259,13 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
15712
16259
  case PM_CONSTANT_PATH_NODE: {
15713
16260
  parser_lex(parser);
15714
16261
 
15715
- pm_node_t *value = parse_expression(parser, binding_power, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR);
16262
+ pm_node_t *value = parse_assignment_value(parser, binding_power, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR);
15716
16263
  return (pm_node_t *) pm_constant_path_operator_write_node_create(parser, (pm_constant_path_node_t *) node, &token, value);
15717
16264
  }
15718
16265
  case PM_CONSTANT_READ_NODE: {
15719
16266
  parser_lex(parser);
15720
16267
 
15721
- pm_node_t *value = parse_expression(parser, binding_power, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR);
16268
+ pm_node_t *value = parse_assignment_value(parser, binding_power, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR);
15722
16269
  pm_node_t *result = (pm_node_t *) pm_constant_operator_write_node_create(parser, (pm_constant_read_node_t *) node, &token, value);
15723
16270
 
15724
16271
  pm_node_destroy(parser, node);
@@ -15727,7 +16274,7 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
15727
16274
  case PM_INSTANCE_VARIABLE_READ_NODE: {
15728
16275
  parser_lex(parser);
15729
16276
 
15730
- pm_node_t *value = parse_expression(parser, binding_power, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR);
16277
+ pm_node_t *value = parse_assignment_value(parser, binding_power, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR);
15731
16278
  pm_node_t *result = (pm_node_t *) pm_instance_variable_operator_write_node_create(parser, (pm_instance_variable_read_node_t *) node, &token, value);
15732
16279
 
15733
16280
  pm_node_destroy(parser, node);
@@ -15737,7 +16284,7 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
15737
16284
  pm_local_variable_read_node_t *cast = (pm_local_variable_read_node_t *) node;
15738
16285
  parser_lex(parser);
15739
16286
 
15740
- pm_node_t *value = parse_expression(parser, binding_power, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR);
16287
+ pm_node_t *value = parse_assignment_value(parser, binding_power, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR);
15741
16288
  pm_node_t *result = (pm_node_t *) pm_local_variable_operator_write_node_create(parser, node, &token, value, cast->name, cast->depth);
15742
16289
 
15743
16290
  pm_node_destroy(parser, node);
@@ -15751,14 +16298,11 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
15751
16298
  // receiver that could have been a local variable) then we
15752
16299
  // will transform it into a local variable write.
15753
16300
  if (pm_call_node_variable_call_p(cast)) {
15754
- pm_location_t message_loc = cast->message_loc;
15755
- pm_constant_id_t constant_id = pm_parser_local_add_location(parser, message_loc.start, message_loc.end);
16301
+ pm_location_t *message_loc = &cast->message_loc;
16302
+ pm_refute_numbered_parameter(parser, message_loc->start, message_loc->end);
15756
16303
 
15757
- if (token_is_numbered_parameter(message_loc.start, message_loc.end)) {
15758
- pm_parser_err_location(parser, &message_loc, PM_ERR_PARAMETER_NUMBERED_RESERVED);
15759
- }
15760
-
15761
- pm_node_t *value = parse_expression(parser, binding_power, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR);
16304
+ pm_constant_id_t constant_id = pm_parser_local_add_location(parser, message_loc->start, message_loc->end);
16305
+ pm_node_t *value = parse_assignment_value(parser, binding_power, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR);
15762
16306
  pm_node_t *result = (pm_node_t *) pm_local_variable_operator_write_node_create(parser, (pm_node_t *) cast, &token, value, constant_id, 0);
15763
16307
 
15764
16308
  pm_node_destroy(parser, (pm_node_t *) cast);
@@ -15769,7 +16313,7 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
15769
16313
  // this is an aref expression, and we can transform it into
15770
16314
  // an aset expression.
15771
16315
  if (pm_call_node_index_p(cast)) {
15772
- pm_node_t *value = parse_expression(parser, binding_power, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR);
16316
+ pm_node_t *value = parse_assignment_value(parser, binding_power, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR);
15773
16317
  return (pm_node_t *) pm_index_operator_write_node_create(parser, cast, &token, value);
15774
16318
  }
15775
16319
 
@@ -15781,7 +16325,7 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
15781
16325
  }
15782
16326
 
15783
16327
  parse_call_operator_write(parser, cast, &token);
15784
- pm_node_t *value = parse_expression(parser, binding_power, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR);
16328
+ pm_node_t *value = parse_assignment_value(parser, binding_power, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR);
15785
16329
  return (pm_node_t *) pm_call_operator_write_node_create(parser, cast, &token, value);
15786
16330
  }
15787
16331
  case PM_MULTI_WRITE_NODE: {
@@ -15969,14 +16513,14 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
15969
16513
  pm_token_t keyword = parser->current;
15970
16514
  parser_lex(parser);
15971
16515
 
15972
- pm_node_t *predicate = parse_expression(parser, binding_power, PM_ERR_CONDITIONAL_IF_PREDICATE);
16516
+ pm_node_t *predicate = parse_value_expression(parser, binding_power, PM_ERR_CONDITIONAL_IF_PREDICATE);
15973
16517
  return (pm_node_t *) pm_if_node_modifier_create(parser, node, &keyword, predicate);
15974
16518
  }
15975
16519
  case PM_TOKEN_KEYWORD_UNLESS_MODIFIER: {
15976
16520
  pm_token_t keyword = parser->current;
15977
16521
  parser_lex(parser);
15978
16522
 
15979
- pm_node_t *predicate = parse_expression(parser, binding_power, PM_ERR_CONDITIONAL_UNLESS_PREDICATE);
16523
+ pm_node_t *predicate = parse_value_expression(parser, binding_power, PM_ERR_CONDITIONAL_UNLESS_PREDICATE);
15980
16524
  return (pm_node_t *) pm_unless_node_modifier_create(parser, node, &keyword, predicate);
15981
16525
  }
15982
16526
  case PM_TOKEN_KEYWORD_UNTIL_MODIFIER: {
@@ -15984,7 +16528,7 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
15984
16528
  pm_statements_node_t *statements = pm_statements_node_create(parser);
15985
16529
  pm_statements_node_body_append(statements, node);
15986
16530
 
15987
- pm_node_t *predicate = parse_expression(parser, binding_power, PM_ERR_CONDITIONAL_UNTIL_PREDICATE);
16531
+ pm_node_t *predicate = parse_value_expression(parser, binding_power, PM_ERR_CONDITIONAL_UNTIL_PREDICATE);
15988
16532
  return (pm_node_t *) pm_until_node_modifier_create(parser, &token, predicate, statements, PM_NODE_TYPE_P(node, PM_BEGIN_NODE) ? PM_LOOP_FLAGS_BEGIN_MODIFIER : 0);
15989
16533
  }
15990
16534
  case PM_TOKEN_KEYWORD_WHILE_MODIFIER: {
@@ -15992,10 +16536,11 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
15992
16536
  pm_statements_node_t *statements = pm_statements_node_create(parser);
15993
16537
  pm_statements_node_body_append(statements, node);
15994
16538
 
15995
- pm_node_t *predicate = parse_expression(parser, binding_power, PM_ERR_CONDITIONAL_WHILE_PREDICATE);
16539
+ pm_node_t *predicate = parse_value_expression(parser, binding_power, PM_ERR_CONDITIONAL_WHILE_PREDICATE);
15996
16540
  return (pm_node_t *) pm_while_node_modifier_create(parser, &token, predicate, statements, PM_NODE_TYPE_P(node, PM_BEGIN_NODE) ? PM_LOOP_FLAGS_BEGIN_MODIFIER : 0);
15997
16541
  }
15998
16542
  case PM_TOKEN_QUESTION_MARK: {
16543
+ pm_token_t qmark = parser->current;
15999
16544
  parser_lex(parser);
16000
16545
  pm_node_t *true_expression = parse_expression(parser, PM_BINDING_POWER_DEFINED, PM_ERR_TERNARY_EXPRESSION_TRUE);
16001
16546
 
@@ -16009,7 +16554,7 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
16009
16554
  pm_token_t colon = (pm_token_t) { .type = PM_TOKEN_MISSING, .start = parser->previous.end, .end = parser->previous.end };
16010
16555
  pm_node_t *false_expression = (pm_node_t *) pm_missing_node_create(parser, colon.start, colon.end);
16011
16556
 
16012
- return (pm_node_t *) pm_if_node_ternary_create(parser, node, true_expression, &colon, false_expression);
16557
+ return (pm_node_t *) pm_if_node_ternary_create(parser, node, &qmark, true_expression, &colon, false_expression);
16013
16558
  }
16014
16559
 
16015
16560
  accept1(parser, PM_TOKEN_NEWLINE);
@@ -16018,7 +16563,7 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
16018
16563
  pm_token_t colon = parser->previous;
16019
16564
  pm_node_t *false_expression = parse_expression(parser, PM_BINDING_POWER_DEFINED, PM_ERR_TERNARY_EXPRESSION_FALSE);
16020
16565
 
16021
- return (pm_node_t *) pm_if_node_ternary_create(parser, node, true_expression, &colon, false_expression);
16566
+ return (pm_node_t *) pm_if_node_ternary_create(parser, node, &qmark, true_expression, &colon, false_expression);
16022
16567
  }
16023
16568
  case PM_TOKEN_COLON_COLON: {
16024
16569
  parser_lex(parser);
@@ -16212,6 +16757,12 @@ parse_expression(pm_parser_t *parser, pm_binding_power_t binding_power, pm_diagn
16212
16757
  current_binding_powers.binary
16213
16758
  ) {
16214
16759
  node = parse_expression_infix(parser, node, binding_power, current_binding_powers.right);
16760
+ if (
16761
+ current_binding_powers.nonassoc &&
16762
+ current_binding_powers.right <= pm_binding_powers[parser->current.type].left
16763
+ ) {
16764
+ break;
16765
+ }
16215
16766
  }
16216
16767
 
16217
16768
  return node;