prism 0.17.0 → 0.18.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (52) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +37 -1
  3. data/Makefile +5 -5
  4. data/README.md +2 -2
  5. data/config.yml +26 -13
  6. data/docs/build_system.md +6 -6
  7. data/docs/building.md +1 -1
  8. data/docs/configuration.md +1 -0
  9. data/docs/encoding.md +68 -32
  10. data/docs/heredocs.md +1 -1
  11. data/docs/javascript.md +29 -1
  12. data/docs/releasing.md +4 -1
  13. data/docs/ruby_api.md +14 -0
  14. data/ext/prism/api_node.c +74 -45
  15. data/ext/prism/extconf.rb +91 -127
  16. data/ext/prism/extension.c +4 -1
  17. data/ext/prism/extension.h +1 -1
  18. data/include/prism/ast.h +148 -133
  19. data/include/prism/diagnostic.h +27 -1
  20. data/include/prism/enc/pm_encoding.h +42 -1
  21. data/include/prism/parser.h +6 -0
  22. data/include/prism/version.h +2 -2
  23. data/lib/prism/compiler.rb +3 -3
  24. data/lib/prism/debug.rb +4 -0
  25. data/lib/prism/desugar_compiler.rb +1 -0
  26. data/lib/prism/dispatcher.rb +14 -14
  27. data/lib/prism/dot_visitor.rb +4334 -0
  28. data/lib/prism/dsl.rb +11 -11
  29. data/lib/prism/ffi.rb +3 -3
  30. data/lib/prism/mutation_compiler.rb +6 -6
  31. data/lib/prism/node.rb +182 -113
  32. data/lib/prism/node_ext.rb +61 -3
  33. data/lib/prism/parse_result.rb +46 -12
  34. data/lib/prism/serialize.rb +124 -130
  35. data/lib/prism/visitor.rb +3 -3
  36. data/lib/prism.rb +1 -0
  37. data/prism.gemspec +5 -1
  38. data/rbi/prism.rbi +5565 -5540
  39. data/rbi/prism_static.rbi +138 -142
  40. data/sig/prism.rbs +47 -32
  41. data/src/diagnostic.c +61 -3
  42. data/src/enc/pm_big5.c +63 -0
  43. data/src/enc/pm_cp51932.c +57 -0
  44. data/src/enc/pm_euc_jp.c +10 -0
  45. data/src/enc/pm_gbk.c +5 -2
  46. data/src/enc/pm_tables.c +1478 -148
  47. data/src/node.c +33 -21
  48. data/src/prettyprint.c +1027 -925
  49. data/src/prism.c +925 -374
  50. data/src/regexp.c +12 -12
  51. data/src/serialize.c +36 -9
  52. metadata +6 -2
data/src/prism.c CHANGED
@@ -16,7 +16,7 @@ pm_version(void) {
16
16
 
17
17
  #ifndef PM_DEBUG_LOGGING
18
18
  /**
19
- * Debugging logging will provide you will additional debugging functions as
19
+ * Debugging logging will provide you with additional debugging functions as
20
20
  * well as automatically replace some functions with their debugging
21
21
  * counterparts.
22
22
  */
@@ -275,6 +275,16 @@ lex_mode_push_list(pm_parser_t *parser, bool interpolation, uint8_t delimiter) {
275
275
  return lex_mode_push(parser, lex_mode);
276
276
  }
277
277
 
278
+ /**
279
+ * Push on a new list lex mode that is only used for compatibility. This is
280
+ * called when we're at the end of the file. We want the parser to be able to
281
+ * perform its normal error tolerance.
282
+ */
283
+ static inline bool
284
+ lex_mode_push_list_eof(pm_parser_t *parser) {
285
+ return lex_mode_push_list(parser, false, '\0');
286
+ }
287
+
278
288
  /**
279
289
  * Push on a new regexp lex mode.
280
290
  */
@@ -346,6 +356,16 @@ lex_mode_push_string(pm_parser_t *parser, bool interpolation, bool label_allowed
346
356
  return lex_mode_push(parser, lex_mode);
347
357
  }
348
358
 
359
+ /**
360
+ * Push on a new string lex mode that is only used for compatibility. This is
361
+ * called when we're at the end of the file. We want the parser to be able to
362
+ * perform its normal error tolerance.
363
+ */
364
+ static inline bool
365
+ lex_mode_push_string_eof(pm_parser_t *parser) {
366
+ return lex_mode_push_string(parser, false, false, '\0', '\0');
367
+ }
368
+
349
369
  /**
350
370
  * Pop the current lex state off the stack. If we're within the pre-allocated
351
371
  * space of the lex state stack, then we'll just decrement the index. Otherwise
@@ -395,7 +415,7 @@ lex_state_ignored_p(pm_parser_t *parser) {
395
415
 
396
416
  static inline bool
397
417
  lex_state_beg_p(pm_parser_t *parser) {
398
- return lex_state_p(parser, PM_LEX_STATE_BEG_ANY) || (parser->lex_state == (PM_LEX_STATE_ARG | PM_LEX_STATE_LABELED));
418
+ return lex_state_p(parser, PM_LEX_STATE_BEG_ANY) || ((parser->lex_state & (PM_LEX_STATE_ARG | PM_LEX_STATE_LABELED)) == (PM_LEX_STATE_ARG | PM_LEX_STATE_LABELED));
399
419
  }
400
420
 
401
421
  static inline bool
@@ -459,6 +479,11 @@ pm_parser_err(pm_parser_t *parser, const uint8_t *start, const uint8_t *end, pm_
459
479
  pm_diagnostic_list_append(&parser->error_list, start, end, diag_id);
460
480
  }
461
481
 
482
+ /**
483
+ * Append an error to the list of errors on the parser using a format string.
484
+ */
485
+ #define PM_PARSER_ERR_FORMAT(parser, start, end, diag_id, ...) pm_diagnostic_list_append_format(&parser->error_list, start, end, diag_id, __VA_ARGS__)
486
+
462
487
  /**
463
488
  * Append an error to the list of errors on the parser using the location of the
464
489
  * current token.
@@ -469,12 +494,10 @@ pm_parser_err_current(pm_parser_t *parser, pm_diagnostic_id_t diag_id) {
469
494
  }
470
495
 
471
496
  /**
472
- * Append an error to the list of errors on the parser using the given location.
497
+ * Append an error to the list of errors on the parser using the given location
498
+ * using a format string.
473
499
  */
474
- static inline void
475
- pm_parser_err_location(pm_parser_t *parser, const pm_location_t *location, pm_diagnostic_id_t diag_id) {
476
- pm_parser_err(parser, location->start, location->end, diag_id);
477
- }
500
+ #define PM_PARSER_ERR_LOCATION_FORMAT(parser, location, diag_id, ...) pm_diagnostic_list_append_format(&parser->error_list, (location)->start, (location)->end, diag_id, __VA_ARGS__)
478
501
 
479
502
  /**
480
503
  * Append an error to the list of errors on the parser using the location of the
@@ -485,6 +508,12 @@ pm_parser_err_node(pm_parser_t *parser, const pm_node_t *node, pm_diagnostic_id_
485
508
  pm_parser_err(parser, node->location.start, node->location.end, diag_id);
486
509
  }
487
510
 
511
+ /**
512
+ * Append an error to the list of errors on the parser using the location of the
513
+ * given node and a format string.
514
+ */
515
+ #define PM_PARSER_ERR_NODE_FORMAT(parser, node, diag_id, ...) pm_diagnostic_list_append_format(&parser->error_list, node->location.start, node->location.end, diag_id, __VA_ARGS__)
516
+
488
517
  /**
489
518
  * Append an error to the list of errors on the parser using the location of the
490
519
  * previous token.
@@ -503,6 +532,12 @@ pm_parser_err_token(pm_parser_t *parser, const pm_token_t *token, pm_diagnostic_
503
532
  pm_parser_err(parser, token->start, token->end, diag_id);
504
533
  }
505
534
 
535
+ /**
536
+ * Append an error to the list of errors on the parser using the location of the
537
+ * given token and a format string.
538
+ */
539
+ #define PM_PARSER_ERR_TOKEN_FORMAT(parser, token, diag_id, ...) pm_diagnostic_list_append_format(&parser->error_list, token->start, token->end, diag_id, __VA_ARGS__)
540
+
506
541
  /**
507
542
  * Append a warning to the list of warnings on the parser.
508
543
  */
@@ -567,6 +602,102 @@ pm_parser_optional_constant_id_token(pm_parser_t *parser, const pm_token_t *toke
567
602
  return token->type == PM_TOKEN_NOT_PROVIDED ? 0 : pm_parser_constant_id_token(parser, token);
568
603
  }
569
604
 
605
+ /**
606
+ * Check whether or not the given node is value expression.
607
+ * If the node is value node, it returns NULL.
608
+ * If not, it returns the pointer to the node to be inspected as "void expression".
609
+ */
610
+ static pm_node_t*
611
+ pm_check_value_expression(pm_node_t *node) {
612
+ pm_node_t* void_node = NULL;
613
+
614
+ while (node != NULL) {
615
+ switch (PM_NODE_TYPE(node)) {
616
+ case PM_RETURN_NODE:
617
+ case PM_BREAK_NODE:
618
+ case PM_NEXT_NODE:
619
+ case PM_REDO_NODE:
620
+ case PM_RETRY_NODE:
621
+ case PM_MATCH_REQUIRED_NODE:
622
+ return void_node != NULL ? void_node : node;
623
+ case PM_MATCH_PREDICATE_NODE:
624
+ return NULL;
625
+ case PM_BEGIN_NODE: {
626
+ pm_begin_node_t *cast = (pm_begin_node_t *) node;
627
+ node = (pm_node_t *) cast->statements;
628
+ break;
629
+ }
630
+ case PM_PARENTHESES_NODE: {
631
+ pm_parentheses_node_t *cast = (pm_parentheses_node_t *) node;
632
+ node = (pm_node_t *) cast->body;
633
+ break;
634
+ }
635
+ case PM_STATEMENTS_NODE: {
636
+ pm_statements_node_t *cast = (pm_statements_node_t *) node;
637
+ node = cast->body.nodes[cast->body.size - 1];
638
+ break;
639
+ }
640
+ case PM_IF_NODE: {
641
+ pm_if_node_t *cast = (pm_if_node_t *) node;
642
+ if (cast->statements == NULL || cast->consequent == NULL) {
643
+ return NULL;
644
+ }
645
+ pm_node_t *vn = pm_check_value_expression((pm_node_t *) cast->statements);
646
+ if (vn == NULL) {
647
+ return NULL;
648
+ }
649
+ if (void_node == NULL) {
650
+ void_node = vn;
651
+ }
652
+ node = cast->consequent;
653
+ break;
654
+ }
655
+ case PM_UNLESS_NODE: {
656
+ pm_unless_node_t *cast = (pm_unless_node_t *) node;
657
+ if (cast->statements == NULL || cast->consequent == NULL) {
658
+ return NULL;
659
+ }
660
+ pm_node_t *vn = pm_check_value_expression((pm_node_t *) cast->statements);
661
+ if (vn == NULL) {
662
+ return NULL;
663
+ }
664
+ if (void_node == NULL) {
665
+ void_node = vn;
666
+ }
667
+ node = (pm_node_t *) cast->consequent;
668
+ break;
669
+ }
670
+ case PM_ELSE_NODE: {
671
+ pm_else_node_t *cast = (pm_else_node_t *) node;
672
+ node = (pm_node_t *) cast->statements;
673
+ break;
674
+ }
675
+ case PM_AND_NODE: {
676
+ pm_and_node_t *cast = (pm_and_node_t *) node;
677
+ node = cast->left;
678
+ break;
679
+ }
680
+ case PM_OR_NODE: {
681
+ pm_or_node_t *cast = (pm_or_node_t *) node;
682
+ node = cast->left;
683
+ break;
684
+ }
685
+ default:
686
+ return NULL;
687
+ }
688
+ }
689
+
690
+ return NULL;
691
+ }
692
+
693
+ static inline void
694
+ pm_assert_value_expression(pm_parser_t *parser, pm_node_t *node) {
695
+ pm_node_t *void_node = pm_check_value_expression(node);
696
+ if (void_node != NULL) {
697
+ pm_parser_err_node(parser, void_node, PM_ERR_VOID_EXPRESSION);
698
+ }
699
+ }
700
+
570
701
  /**
571
702
  * The predicate of conditional nodes can change what would otherwise be regular
572
703
  * nodes into specialized nodes. For example:
@@ -680,6 +811,27 @@ typedef struct {
680
811
  pm_node_t *block;
681
812
  } pm_arguments_t;
682
813
 
814
+ /**
815
+ * Retrieve the end location of a `pm_arguments_t` object.
816
+ */
817
+ static inline const uint8_t *
818
+ pm_arguments_end(pm_arguments_t *arguments) {
819
+ if (arguments->block != NULL) {
820
+ const uint8_t *end = arguments->block->location.end;
821
+ if (arguments->closing_loc.start != NULL && arguments->closing_loc.end > end) {
822
+ end = arguments->closing_loc.end;
823
+ }
824
+ return end;
825
+ }
826
+ if (arguments->closing_loc.start != NULL) {
827
+ return arguments->closing_loc.end;
828
+ }
829
+ if (arguments->arguments != NULL) {
830
+ return arguments->arguments->base.location.end;
831
+ }
832
+ return arguments->closing_loc.end;
833
+ }
834
+
683
835
  /**
684
836
  * Check that we're not about to attempt to attach a brace block to a call that
685
837
  * has arguments without parentheses.
@@ -894,6 +1046,8 @@ pm_alternation_pattern_node_create(pm_parser_t *parser, pm_node_t *left, pm_node
894
1046
  */
895
1047
  static pm_and_node_t *
896
1048
  pm_and_node_create(pm_parser_t *parser, pm_node_t *left, const pm_token_t *operator, pm_node_t *right) {
1049
+ pm_assert_value_expression(parser, left);
1050
+
897
1051
  pm_and_node_t *node = PM_ALLOC_NODE(parser, pm_and_node_t);
898
1052
 
899
1053
  *node = (pm_and_node_t) {
@@ -1488,14 +1642,12 @@ pm_call_node_create(pm_parser_t *parser) {
1488
1642
  */
1489
1643
  static pm_call_node_t *
1490
1644
  pm_call_node_aref_create(pm_parser_t *parser, pm_node_t *receiver, pm_arguments_t *arguments) {
1645
+ pm_assert_value_expression(parser, receiver);
1646
+
1491
1647
  pm_call_node_t *node = pm_call_node_create(parser);
1492
1648
 
1493
1649
  node->base.location.start = receiver->location.start;
1494
- if (arguments->block != NULL) {
1495
- node->base.location.end = arguments->block->location.end;
1496
- } else {
1497
- node->base.location.end = arguments->closing_loc.end;
1498
- }
1650
+ node->base.location.end = pm_arguments_end(arguments);
1499
1651
 
1500
1652
  node->receiver = receiver;
1501
1653
  node->message_loc.start = arguments->opening_loc.start;
@@ -1515,6 +1667,9 @@ pm_call_node_aref_create(pm_parser_t *parser, pm_node_t *receiver, pm_arguments_
1515
1667
  */
1516
1668
  static pm_call_node_t *
1517
1669
  pm_call_node_binary_create(pm_parser_t *parser, pm_node_t *receiver, pm_token_t *operator, pm_node_t *argument) {
1670
+ pm_assert_value_expression(parser, receiver);
1671
+ pm_assert_value_expression(parser, argument);
1672
+
1518
1673
  pm_call_node_t *node = pm_call_node_create(parser);
1519
1674
 
1520
1675
  node->base.location.start = MIN(receiver->location.start, argument->location.start);
@@ -1536,18 +1691,16 @@ pm_call_node_binary_create(pm_parser_t *parser, pm_node_t *receiver, pm_token_t
1536
1691
  */
1537
1692
  static pm_call_node_t *
1538
1693
  pm_call_node_call_create(pm_parser_t *parser, pm_node_t *receiver, pm_token_t *operator, pm_token_t *message, pm_arguments_t *arguments) {
1694
+ pm_assert_value_expression(parser, receiver);
1695
+
1539
1696
  pm_call_node_t *node = pm_call_node_create(parser);
1540
1697
 
1541
1698
  node->base.location.start = receiver->location.start;
1542
- if (arguments->block != NULL) {
1543
- node->base.location.end = arguments->block->location.end;
1544
- } else if (arguments->closing_loc.start != NULL) {
1545
- node->base.location.end = arguments->closing_loc.end;
1546
- } else if (arguments->arguments != NULL) {
1547
- node->base.location.end = arguments->arguments->base.location.end;
1548
- } else {
1549
- node->base.location.end = message->end;
1699
+ const uint8_t *end = pm_arguments_end(arguments);
1700
+ if (end == NULL) {
1701
+ end = message->end;
1550
1702
  }
1703
+ node->base.location.end = end;
1551
1704
 
1552
1705
  node->receiver = receiver;
1553
1706
  node->call_operator_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(operator);
@@ -1574,15 +1727,7 @@ pm_call_node_fcall_create(pm_parser_t *parser, pm_token_t *message, pm_arguments
1574
1727
  pm_call_node_t *node = pm_call_node_create(parser);
1575
1728
 
1576
1729
  node->base.location.start = message->start;
1577
- if (arguments->block != NULL) {
1578
- node->base.location.end = arguments->block->location.end;
1579
- } else if (arguments->closing_loc.start != NULL) {
1580
- node->base.location.end = arguments->closing_loc.end;
1581
- } else if (arguments->arguments != NULL) {
1582
- node->base.location.end = arguments->arguments->base.location.end;
1583
- } else {
1584
- node->base.location.end = arguments->closing_loc.end;
1585
- }
1730
+ node->base.location.end = pm_arguments_end(arguments);
1586
1731
 
1587
1732
  node->message_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(message);
1588
1733
  node->opening_loc = arguments->opening_loc;
@@ -1599,6 +1744,8 @@ pm_call_node_fcall_create(pm_parser_t *parser, pm_token_t *message, pm_arguments
1599
1744
  */
1600
1745
  static pm_call_node_t *
1601
1746
  pm_call_node_not_create(pm_parser_t *parser, pm_node_t *receiver, pm_token_t *message, pm_arguments_t *arguments) {
1747
+ pm_assert_value_expression(parser, receiver);
1748
+
1602
1749
  pm_call_node_t *node = pm_call_node_create(parser);
1603
1750
 
1604
1751
  node->base.location.start = message->start;
@@ -1623,14 +1770,12 @@ pm_call_node_not_create(pm_parser_t *parser, pm_node_t *receiver, pm_token_t *me
1623
1770
  */
1624
1771
  static pm_call_node_t *
1625
1772
  pm_call_node_shorthand_create(pm_parser_t *parser, pm_node_t *receiver, pm_token_t *operator, pm_arguments_t *arguments) {
1773
+ pm_assert_value_expression(parser, receiver);
1774
+
1626
1775
  pm_call_node_t *node = pm_call_node_create(parser);
1627
1776
 
1628
1777
  node->base.location.start = receiver->location.start;
1629
- if (arguments->block != NULL) {
1630
- node->base.location.end = arguments->block->location.end;
1631
- } else {
1632
- node->base.location.end = arguments->closing_loc.end;
1633
- }
1778
+ node->base.location.end = pm_arguments_end(arguments);
1634
1779
 
1635
1780
  node->receiver = receiver;
1636
1781
  node->call_operator_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(operator);
@@ -1652,6 +1797,8 @@ pm_call_node_shorthand_create(pm_parser_t *parser, pm_node_t *receiver, pm_token
1652
1797
  */
1653
1798
  static pm_call_node_t *
1654
1799
  pm_call_node_unary_create(pm_parser_t *parser, pm_token_t *operator, pm_node_t *receiver, const char *name) {
1800
+ pm_assert_value_expression(parser, receiver);
1801
+
1655
1802
  pm_call_node_t *node = pm_call_node_create(parser);
1656
1803
 
1657
1804
  node->base.location.start = operator->start;
@@ -1981,7 +2128,7 @@ pm_capture_pattern_node_create(pm_parser_t *parser, pm_node_t *value, pm_node_t
1981
2128
  * Allocate and initialize a new CaseNode node.
1982
2129
  */
1983
2130
  static pm_case_node_t *
1984
- pm_case_node_create(pm_parser_t *parser, const pm_token_t *case_keyword, pm_node_t *predicate, pm_else_node_t *consequent, const pm_token_t *end_keyword) {
2131
+ pm_case_node_create(pm_parser_t *parser, const pm_token_t *case_keyword, pm_node_t *predicate, const pm_token_t *end_keyword) {
1985
2132
  pm_case_node_t *node = PM_ALLOC_NODE(parser, pm_case_node_t);
1986
2133
 
1987
2134
  *node = (pm_case_node_t) {
@@ -1993,7 +2140,7 @@ pm_case_node_create(pm_parser_t *parser, const pm_token_t *case_keyword, pm_node
1993
2140
  },
1994
2141
  },
1995
2142
  .predicate = predicate,
1996
- .consequent = consequent,
2143
+ .consequent = NULL,
1997
2144
  .case_keyword_loc = PM_LOCATION_TOKEN_VALUE(case_keyword),
1998
2145
  .end_keyword_loc = PM_LOCATION_TOKEN_VALUE(end_keyword),
1999
2146
  .conditions = { 0 }
@@ -2007,7 +2154,7 @@ pm_case_node_create(pm_parser_t *parser, const pm_token_t *case_keyword, pm_node
2007
2154
  */
2008
2155
  static void
2009
2156
  pm_case_node_condition_append(pm_case_node_t *node, pm_node_t *condition) {
2010
- assert(PM_NODE_TYPE_P(condition, PM_WHEN_NODE) || PM_NODE_TYPE_P(condition, PM_IN_NODE));
2157
+ assert(PM_NODE_TYPE_P(condition, PM_WHEN_NODE));
2011
2158
 
2012
2159
  pm_node_list_append(&node->conditions, condition);
2013
2160
  node->base.location.end = condition->location.end;
@@ -2031,6 +2178,60 @@ pm_case_node_end_keyword_loc_set(pm_case_node_t *node, const pm_token_t *end_key
2031
2178
  node->end_keyword_loc = PM_LOCATION_TOKEN_VALUE(end_keyword);
2032
2179
  }
2033
2180
 
2181
+ /**
2182
+ * Allocate and initialize a new CaseMatchNode node.
2183
+ */
2184
+ static pm_case_match_node_t *
2185
+ pm_case_match_node_create(pm_parser_t *parser, const pm_token_t *case_keyword, pm_node_t *predicate, const pm_token_t *end_keyword) {
2186
+ pm_case_match_node_t *node = PM_ALLOC_NODE(parser, pm_case_match_node_t);
2187
+
2188
+ *node = (pm_case_match_node_t) {
2189
+ {
2190
+ .type = PM_CASE_MATCH_NODE,
2191
+ .location = {
2192
+ .start = case_keyword->start,
2193
+ .end = end_keyword->end
2194
+ },
2195
+ },
2196
+ .predicate = predicate,
2197
+ .consequent = NULL,
2198
+ .case_keyword_loc = PM_LOCATION_TOKEN_VALUE(case_keyword),
2199
+ .end_keyword_loc = PM_LOCATION_TOKEN_VALUE(end_keyword),
2200
+ .conditions = { 0 }
2201
+ };
2202
+
2203
+ return node;
2204
+ }
2205
+
2206
+ /**
2207
+ * Append a new condition to a CaseMatchNode node.
2208
+ */
2209
+ static void
2210
+ pm_case_match_node_condition_append(pm_case_match_node_t *node, pm_node_t *condition) {
2211
+ assert(PM_NODE_TYPE_P(condition, PM_IN_NODE));
2212
+
2213
+ pm_node_list_append(&node->conditions, condition);
2214
+ node->base.location.end = condition->location.end;
2215
+ }
2216
+
2217
+ /**
2218
+ * Set the consequent of a CaseMatchNode node.
2219
+ */
2220
+ static void
2221
+ pm_case_match_node_consequent_set(pm_case_match_node_t *node, pm_else_node_t *consequent) {
2222
+ node->consequent = consequent;
2223
+ node->base.location.end = consequent->base.location.end;
2224
+ }
2225
+
2226
+ /**
2227
+ * Set the end location for a CaseMatchNode node.
2228
+ */
2229
+ static void
2230
+ pm_case_match_node_end_keyword_loc_set(pm_case_match_node_t *node, const pm_token_t *end_keyword) {
2231
+ node->base.location.end = end_keyword->end;
2232
+ node->end_keyword_loc = PM_LOCATION_TOKEN_VALUE(end_keyword);
2233
+ }
2234
+
2034
2235
  /**
2035
2236
  * Allocate a new ClassNode node.
2036
2237
  */
@@ -3075,6 +3276,7 @@ static pm_if_node_t *
3075
3276
  pm_if_node_create(pm_parser_t *parser,
3076
3277
  const pm_token_t *if_keyword,
3077
3278
  pm_node_t *predicate,
3279
+ const pm_token_t *then_keyword,
3078
3280
  pm_statements_node_t *statements,
3079
3281
  pm_node_t *consequent,
3080
3282
  const pm_token_t *end_keyword
@@ -3104,6 +3306,7 @@ pm_if_node_create(pm_parser_t *parser,
3104
3306
  },
3105
3307
  .if_keyword_loc = PM_LOCATION_TOKEN_VALUE(if_keyword),
3106
3308
  .predicate = predicate,
3309
+ .then_keyword_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(then_keyword),
3107
3310
  .statements = statements,
3108
3311
  .consequent = consequent,
3109
3312
  .end_keyword_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(end_keyword)
@@ -3134,6 +3337,7 @@ pm_if_node_modifier_create(pm_parser_t *parser, pm_node_t *statement, const pm_t
3134
3337
  },
3135
3338
  .if_keyword_loc = PM_LOCATION_TOKEN_VALUE(if_keyword),
3136
3339
  .predicate = predicate,
3340
+ .then_keyword_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE,
3137
3341
  .statements = statements,
3138
3342
  .consequent = NULL,
3139
3343
  .end_keyword_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE
@@ -3146,7 +3350,8 @@ pm_if_node_modifier_create(pm_parser_t *parser, pm_node_t *statement, const pm_t
3146
3350
  * Allocate and initialize an if node from a ternary expression.
3147
3351
  */
3148
3352
  static pm_if_node_t *
3149
- pm_if_node_ternary_create(pm_parser_t *parser, pm_node_t *predicate, pm_node_t *true_expression, const pm_token_t *colon, pm_node_t *false_expression) {
3353
+ pm_if_node_ternary_create(pm_parser_t *parser, pm_node_t *predicate, const pm_token_t *qmark, pm_node_t *true_expression, const pm_token_t *colon, pm_node_t *false_expression) {
3354
+ pm_assert_value_expression(parser, predicate);
3150
3355
  pm_conditional_predicate(predicate);
3151
3356
 
3152
3357
  pm_statements_node_t *if_statements = pm_statements_node_create(parser);
@@ -3171,6 +3376,7 @@ pm_if_node_ternary_create(pm_parser_t *parser, pm_node_t *predicate, pm_node_t *
3171
3376
  },
3172
3377
  .if_keyword_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE,
3173
3378
  .predicate = predicate,
3379
+ .then_keyword_loc = PM_LOCATION_TOKEN_VALUE(qmark),
3174
3380
  .statements = if_statements,
3175
3381
  .consequent = (pm_node_t *)else_node,
3176
3382
  .end_keyword_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE
@@ -3871,39 +4077,68 @@ pm_local_variable_write_node_create(pm_parser_t *parser, pm_constant_id_t name,
3871
4077
  return node;
3872
4078
  }
3873
4079
 
4080
+ /**
4081
+ * Returns true if the given bounds comprise a numbered parameter (i.e., they
4082
+ * are of the form /^_\d$/).
4083
+ */
3874
4084
  static inline bool
3875
- token_is_numbered_parameter(const uint8_t *start, const uint8_t *end) {
4085
+ pm_token_is_numbered_parameter(const uint8_t *start, const uint8_t *end) {
3876
4086
  return (end - start == 2) && (start[0] == '_') && (start[1] != '0') && (pm_char_is_decimal_digit(start[1]));
3877
4087
  }
3878
4088
 
3879
4089
  /**
3880
- * Allocate and initialize a new LocalVariableTargetNode node.
4090
+ * Ensure the given bounds do not comprise a numbered parameter. If they do, add
4091
+ * an appropriate error message to the parser.
4092
+ */
4093
+ static inline void
4094
+ pm_refute_numbered_parameter(pm_parser_t *parser, const uint8_t *start, const uint8_t *end) {
4095
+ if (pm_token_is_numbered_parameter(start, end)) {
4096
+ PM_PARSER_ERR_FORMAT(parser, start, end, PM_ERR_PARAMETER_NUMBERED_RESERVED, start);
4097
+ }
4098
+ }
4099
+
4100
+ /**
4101
+ * Allocate and initialize a new LocalVariableTargetNode node with the given
4102
+ * name and depth.
3881
4103
  */
3882
4104
  static pm_local_variable_target_node_t *
3883
- pm_local_variable_target_node_create(pm_parser_t *parser, const pm_token_t *name) {
4105
+ pm_local_variable_target_node_create_values(pm_parser_t *parser, const pm_location_t *location, pm_constant_id_t name, uint32_t depth) {
3884
4106
  pm_local_variable_target_node_t *node = PM_ALLOC_NODE(parser, pm_local_variable_target_node_t);
3885
4107
 
3886
- if (token_is_numbered_parameter(name->start, name->end)) {
3887
- pm_parser_err_token(parser, name, PM_ERR_PARAMETER_NUMBERED_RESERVED);
3888
- }
3889
-
3890
4108
  *node = (pm_local_variable_target_node_t) {
3891
4109
  {
3892
4110
  .type = PM_LOCAL_VARIABLE_TARGET_NODE,
3893
- .location = PM_LOCATION_TOKEN_VALUE(name)
4111
+ .location = *location
3894
4112
  },
3895
- .name = pm_parser_constant_id_token(parser, name),
3896
- .depth = 0
4113
+ .name = name,
4114
+ .depth = depth
3897
4115
  };
3898
4116
 
3899
4117
  return node;
3900
4118
  }
3901
4119
 
4120
+ /**
4121
+ * Allocate and initialize a new LocalVariableTargetNode node.
4122
+ */
4123
+ static pm_local_variable_target_node_t *
4124
+ pm_local_variable_target_node_create(pm_parser_t *parser, const pm_token_t *name) {
4125
+ pm_refute_numbered_parameter(parser, name->start, name->end);
4126
+
4127
+ return pm_local_variable_target_node_create_values(
4128
+ parser,
4129
+ &(pm_location_t) { .start = name->start, .end = name->end },
4130
+ pm_parser_constant_id_token(parser, name),
4131
+ 0
4132
+ );
4133
+ }
4134
+
3902
4135
  /**
3903
4136
  * Allocate and initialize a new MatchPredicateNode node.
3904
4137
  */
3905
4138
  static pm_match_predicate_node_t *
3906
4139
  pm_match_predicate_node_create(pm_parser_t *parser, pm_node_t *value, pm_node_t *pattern, const pm_token_t *operator) {
4140
+ pm_assert_value_expression(parser, value);
4141
+
3907
4142
  pm_match_predicate_node_t *node = PM_ALLOC_NODE(parser, pm_match_predicate_node_t);
3908
4143
 
3909
4144
  *node = (pm_match_predicate_node_t) {
@@ -3927,6 +4162,8 @@ pm_match_predicate_node_create(pm_parser_t *parser, pm_node_t *value, pm_node_t
3927
4162
  */
3928
4163
  static pm_match_required_node_t *
3929
4164
  pm_match_required_node_create(pm_parser_t *parser, pm_node_t *value, pm_node_t *pattern, const pm_token_t *operator) {
4165
+ pm_assert_value_expression(parser, value);
4166
+
3930
4167
  pm_match_required_node_t *node = PM_ALLOC_NODE(parser, pm_match_required_node_t);
3931
4168
 
3932
4169
  *node = (pm_match_required_node_t) {
@@ -3957,10 +4194,10 @@ pm_match_write_node_create(pm_parser_t *parser, pm_call_node_t *call) {
3957
4194
  .type = PM_MATCH_WRITE_NODE,
3958
4195
  .location = call->base.location
3959
4196
  },
3960
- .call = call
4197
+ .call = call,
4198
+ .targets = { 0 }
3961
4199
  };
3962
4200
 
3963
- pm_constant_id_list_init(&node->locals);
3964
4201
  return node;
3965
4202
  }
3966
4203
 
@@ -4200,6 +4437,8 @@ pm_optional_parameter_node_create(pm_parser_t *parser, const pm_token_t *name, c
4200
4437
  */
4201
4438
  static pm_or_node_t *
4202
4439
  pm_or_node_create(pm_parser_t *parser, pm_node_t *left, const pm_token_t *operator, pm_node_t *right) {
4440
+ pm_assert_value_expression(parser, left);
4441
+
4203
4442
  pm_or_node_t *node = PM_ALLOC_NODE(parser, pm_or_node_t);
4204
4443
 
4205
4444
  *node = (pm_or_node_t) {
@@ -4470,6 +4709,9 @@ pm_pre_execution_node_create(pm_parser_t *parser, const pm_token_t *keyword, con
4470
4709
  */
4471
4710
  static pm_range_node_t *
4472
4711
  pm_range_node_create(pm_parser_t *parser, pm_node_t *left, const pm_token_t *operator, pm_node_t *right) {
4712
+ pm_assert_value_expression(parser, left);
4713
+ pm_assert_value_expression(parser, right);
4714
+
4473
4715
  pm_range_node_t *node = PM_ALLOC_NODE(parser, pm_range_node_t);
4474
4716
  pm_node_flags_t flags = 0;
4475
4717
 
@@ -4885,28 +5127,6 @@ pm_statements_node_body_append(pm_statements_node_t *node, pm_node_t *statement)
4885
5127
  statement->flags |= PM_NODE_FLAG_NEWLINE;
4886
5128
  }
4887
5129
 
4888
- /**
4889
- * Allocate a new StringConcatNode node.
4890
- */
4891
- static pm_string_concat_node_t *
4892
- pm_string_concat_node_create(pm_parser_t *parser, pm_node_t *left, pm_node_t *right) {
4893
- pm_string_concat_node_t *node = PM_ALLOC_NODE(parser, pm_string_concat_node_t);
4894
-
4895
- *node = (pm_string_concat_node_t) {
4896
- {
4897
- .type = PM_STRING_CONCAT_NODE,
4898
- .location = {
4899
- .start = left->location.start,
4900
- .end = right->location.end
4901
- }
4902
- },
4903
- .left = left,
4904
- .right = right
4905
- };
4906
-
4907
- return node;
4908
- }
4909
-
4910
5130
  /**
4911
5131
  * Allocate a new StringNode node with the current string on the parser.
4912
5132
  */
@@ -4964,16 +5184,9 @@ pm_super_node_create(pm_parser_t *parser, const pm_token_t *keyword, pm_argument
4964
5184
  assert(keyword->type == PM_TOKEN_KEYWORD_SUPER);
4965
5185
  pm_super_node_t *node = PM_ALLOC_NODE(parser, pm_super_node_t);
4966
5186
 
4967
- const uint8_t *end;
4968
- if (arguments->block != NULL) {
4969
- end = arguments->block->location.end;
4970
- } else if (arguments->closing_loc.start != NULL) {
4971
- end = arguments->closing_loc.end;
4972
- } else if (arguments->arguments != NULL) {
4973
- end = arguments->arguments->base.location.end;
4974
- } else {
5187
+ const uint8_t *end = pm_arguments_end(arguments);
5188
+ if (end == NULL) {
4975
5189
  assert(false && "unreachable");
4976
- end = NULL;
4977
5190
  }
4978
5191
 
4979
5192
  *node = (pm_super_node_t) {
@@ -5207,7 +5420,7 @@ pm_undef_node_append(pm_undef_node_t *node, pm_node_t *name) {
5207
5420
  * Allocate a new UnlessNode node.
5208
5421
  */
5209
5422
  static pm_unless_node_t *
5210
- pm_unless_node_create(pm_parser_t *parser, const pm_token_t *keyword, pm_node_t *predicate, pm_statements_node_t *statements) {
5423
+ pm_unless_node_create(pm_parser_t *parser, const pm_token_t *keyword, pm_node_t *predicate, const pm_token_t *then_keyword, pm_statements_node_t *statements) {
5211
5424
  pm_conditional_predicate(predicate);
5212
5425
  pm_unless_node_t *node = PM_ALLOC_NODE(parser, pm_unless_node_t);
5213
5426
 
@@ -5229,6 +5442,7 @@ pm_unless_node_create(pm_parser_t *parser, const pm_token_t *keyword, pm_node_t
5229
5442
  },
5230
5443
  .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword),
5231
5444
  .predicate = predicate,
5445
+ .then_keyword_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(then_keyword),
5232
5446
  .statements = statements,
5233
5447
  .consequent = NULL,
5234
5448
  .end_keyword_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE
@@ -5259,6 +5473,7 @@ pm_unless_node_modifier_create(pm_parser_t *parser, pm_node_t *statement, const
5259
5473
  },
5260
5474
  .keyword_loc = PM_LOCATION_TOKEN_VALUE(unless_keyword),
5261
5475
  .predicate = predicate,
5476
+ .then_keyword_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE,
5262
5477
  .statements = statements,
5263
5478
  .consequent = NULL,
5264
5479
  .end_keyword_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE
@@ -5535,17 +5750,16 @@ pm_parser_scope_push_transparent(pm_parser_t *parser) {
5535
5750
  }
5536
5751
 
5537
5752
  /**
5538
- * Check if the current scope has a given local variables.
5753
+ * Check if any of the currently visible scopes contain a local variable
5754
+ * described by the given constant id.
5539
5755
  */
5540
5756
  static int
5541
- pm_parser_local_depth(pm_parser_t *parser, pm_token_t *token) {
5542
- pm_constant_id_t constant_id = pm_parser_constant_id_token(parser, token);
5757
+ pm_parser_local_depth_constant_id(pm_parser_t *parser, pm_constant_id_t constant_id) {
5543
5758
  pm_scope_t *scope = parser->current_scope;
5544
5759
  int depth = 0;
5545
5760
 
5546
5761
  while (scope != NULL) {
5547
- if (!scope->transparent &&
5548
- pm_constant_id_list_includes(&scope->locals, constant_id)) return depth;
5762
+ if (!scope->transparent && pm_constant_id_list_includes(&scope->locals, constant_id)) return depth;
5549
5763
  if (scope->closed) break;
5550
5764
 
5551
5765
  scope = scope->previous;
@@ -5555,6 +5769,16 @@ pm_parser_local_depth(pm_parser_t *parser, pm_token_t *token) {
5555
5769
  return -1;
5556
5770
  }
5557
5771
 
5772
+ /**
5773
+ * Check if any of the currently visible scopes contain a local variable
5774
+ * described by the given token. This function implicitly inserts a constant
5775
+ * into the constant pool.
5776
+ */
5777
+ static inline int
5778
+ pm_parser_local_depth(pm_parser_t *parser, pm_token_t *token) {
5779
+ return pm_parser_local_depth_constant_id(parser, pm_parser_constant_id_token(parser, token));
5780
+ }
5781
+
5558
5782
  /**
5559
5783
  * Add a constant id to the local table of the current scope.
5560
5784
  */
@@ -5569,15 +5793,6 @@ pm_parser_local_add(pm_parser_t *parser, pm_constant_id_t constant_id) {
5569
5793
  }
5570
5794
  }
5571
5795
 
5572
- /**
5573
- * Add a local variable from a constant string to the current scope.
5574
- */
5575
- static inline void
5576
- pm_parser_local_add_constant(pm_parser_t *parser, const char *start, size_t length) {
5577
- pm_constant_id_t constant_id = pm_parser_constant_id_constant(parser, start, length);
5578
- if (constant_id != 0) pm_parser_local_add(parser, constant_id);
5579
- }
5580
-
5581
5796
  /**
5582
5797
  * Add a local variable from a location to the current scope.
5583
5798
  */
@@ -5614,12 +5829,10 @@ static void
5614
5829
  pm_parser_parameter_name_check(pm_parser_t *parser, const pm_token_t *name) {
5615
5830
  // We want to check whether the parameter name is a numbered parameter or
5616
5831
  // not.
5617
- if (token_is_numbered_parameter(name->start, name->end)) {
5618
- pm_parser_err_token(parser, name, PM_ERR_PARAMETER_NUMBERED_RESERVED);
5619
- }
5832
+ pm_refute_numbered_parameter(parser, name->start, name->end);
5620
5833
 
5621
5834
  // We want to ignore any parameter name that starts with an underscore.
5622
- if ((*name->start == '_')) return;
5835
+ if ((name->start < name->end) && (*name->start == '_')) return;
5623
5836
 
5624
5837
  // Otherwise we'll fetch the constant id for the parameter name and check
5625
5838
  // whether it's already in the current scope.
@@ -5655,7 +5868,16 @@ pm_parser_scope_pop(pm_parser_t *parser) {
5655
5868
  static inline size_t
5656
5869
  char_is_identifier_start(pm_parser_t *parser, const uint8_t *b) {
5657
5870
  if (parser->encoding_changed) {
5658
- return parser->encoding.alpha_char(b, parser->end - b) || (*b == '_') || (*b >= 0x80);
5871
+ size_t width;
5872
+ if ((width = parser->encoding.alpha_char(b, parser->end - b)) != 0) {
5873
+ return width;
5874
+ } else if (*b == '_') {
5875
+ return 1;
5876
+ } else if (*b >= 0x80) {
5877
+ return parser->encoding.char_width(b, parser->end - b);
5878
+ } else {
5879
+ return 0;
5880
+ }
5659
5881
  } else if (*b < 0x80) {
5660
5882
  return (pm_encoding_unicode_table[*b] & PRISM_ENCODING_ALPHABETIC_BIT ? 1 : 0) || (*b == '_');
5661
5883
  } else {
@@ -5671,7 +5893,16 @@ char_is_identifier_start(pm_parser_t *parser, const uint8_t *b) {
5671
5893
  static inline size_t
5672
5894
  char_is_identifier(pm_parser_t *parser, const uint8_t *b) {
5673
5895
  if (parser->encoding_changed) {
5674
- return parser->encoding.alnum_char(b, parser->end - b) || (*b == '_') || (*b >= 0x80);
5896
+ size_t width;
5897
+ if ((width = parser->encoding.alnum_char(b, parser->end - b)) != 0) {
5898
+ return width;
5899
+ } else if (*b == '_') {
5900
+ return 1;
5901
+ } else if (*b >= 0x80) {
5902
+ return parser->encoding.char_width(b, parser->end - b);
5903
+ } else {
5904
+ return 0;
5905
+ }
5675
5906
  } else if (*b < 0x80) {
5676
5907
  return (pm_encoding_unicode_table[*b] & PRISM_ENCODING_ALPHANUMERIC_BIT ? 1 : 0) || (*b == '_');
5677
5908
  } else {
@@ -5866,10 +6097,18 @@ parser_lex_magic_comment_encoding_value(pm_parser_t *parser, const uint8_t *star
5866
6097
  }
5867
6098
 
5868
6099
  // Next, we're going to check for UTF-8. This is the most common encoding.
5869
- // Extensions like utf-8 can contain extra encoding details like,
5870
- // utf-8-dos, utf-8-linux, utf-8-mac. We treat these all as utf-8 should
5871
- // treat any encoding starting utf-8 as utf-8.
6100
+ // utf-8 can contain extra information at the end about the platform it is
6101
+ // encoded on, such as utf-8-mac or utf-8-unix. We'll ignore those suffixes.
5872
6102
  if ((start + 5 <= end) && (pm_strncasecmp(start, (const uint8_t *) "utf-8", 5) == 0)) {
6103
+ // We need to explicitly handle utf-8-hfs, as that one needs to switch
6104
+ // over to being utf8-mac.
6105
+ if (width == 9 && (pm_strncasecmp(start + 5, (const uint8_t *) "-hfs", 4) == 0)) {
6106
+ parser->encoding = pm_encoding_utf8_mac;
6107
+ parser->encoding_changed = true;
6108
+ if (parser->encoding_changed_callback != NULL) parser->encoding_changed_callback(parser);
6109
+ return true;
6110
+ }
6111
+
5873
6112
  // We don't need to do anything here because the default encoding is
5874
6113
  // already UTF-8. We'll just return.
5875
6114
  return true;
@@ -5877,51 +6116,160 @@ parser_lex_magic_comment_encoding_value(pm_parser_t *parser, const uint8_t *star
5877
6116
 
5878
6117
  // Next, we're going to loop through each of the encodings that we handle
5879
6118
  // explicitly. If we found one that we understand, we'll use that value.
5880
- #define ENCODING(value, prebuilt) \
6119
+ #define ENCODING1(value, prebuilt) \
5881
6120
  if (width == sizeof(value) - 1 && start + width <= end && pm_strncasecmp(start, (const uint8_t *) value, width) == 0) { \
5882
6121
  parser->encoding = prebuilt; \
5883
- parser->encoding_changed |= true; \
6122
+ parser->encoding_changed = true; \
5884
6123
  if (parser->encoding_changed_callback != NULL) parser->encoding_changed_callback(parser); \
5885
6124
  return true; \
5886
6125
  }
5887
6126
 
5888
- // Check most common first. (This is pretty arbitrary.)
5889
- ENCODING("ascii", pm_encoding_ascii);
5890
- ENCODING("ascii-8bit", pm_encoding_ascii_8bit);
5891
- ENCODING("us-ascii", pm_encoding_ascii);
5892
- ENCODING("binary", pm_encoding_ascii_8bit);
5893
- ENCODING("shift_jis", pm_encoding_shift_jis);
5894
- ENCODING("euc-jp", pm_encoding_euc_jp);
5895
-
5896
- // Then check all the others.
5897
- ENCODING("big5", pm_encoding_big5);
5898
- ENCODING("gbk", pm_encoding_gbk);
5899
- ENCODING("iso-8859-1", pm_encoding_iso_8859_1);
5900
- ENCODING("iso-8859-2", pm_encoding_iso_8859_2);
5901
- ENCODING("iso-8859-3", pm_encoding_iso_8859_3);
5902
- ENCODING("iso-8859-4", pm_encoding_iso_8859_4);
5903
- ENCODING("iso-8859-5", pm_encoding_iso_8859_5);
5904
- ENCODING("iso-8859-6", pm_encoding_iso_8859_6);
5905
- ENCODING("iso-8859-7", pm_encoding_iso_8859_7);
5906
- ENCODING("iso-8859-8", pm_encoding_iso_8859_8);
5907
- ENCODING("iso-8859-9", pm_encoding_iso_8859_9);
5908
- ENCODING("iso-8859-10", pm_encoding_iso_8859_10);
5909
- ENCODING("iso-8859-11", pm_encoding_iso_8859_11);
5910
- ENCODING("iso-8859-13", pm_encoding_iso_8859_13);
5911
- ENCODING("iso-8859-14", pm_encoding_iso_8859_14);
5912
- ENCODING("iso-8859-15", pm_encoding_iso_8859_15);
5913
- ENCODING("iso-8859-16", pm_encoding_iso_8859_16);
5914
- ENCODING("koi8-r", pm_encoding_koi8_r);
5915
- ENCODING("windows-31j", pm_encoding_windows_31j);
5916
- ENCODING("windows-1251", pm_encoding_windows_1251);
5917
- ENCODING("windows-1252", pm_encoding_windows_1252);
5918
- ENCODING("cp1251", pm_encoding_windows_1251);
5919
- ENCODING("cp1252", pm_encoding_windows_1252);
5920
- ENCODING("cp932", pm_encoding_windows_31j);
5921
- ENCODING("sjis", pm_encoding_windows_31j);
5922
- ENCODING("utf8-mac", pm_encoding_utf8_mac);
5923
-
5924
- #undef ENCODING
6127
+ // A convenience macros for comparing two aliases for the same encoding.
6128
+ #define ENCODING2(value1, value2, prebuilt) ENCODING1(value1, prebuilt) ENCODING1(value2, prebuilt)
6129
+
6130
+ if (width >= 3) {
6131
+ switch (*start) {
6132
+ case 'A': case 'a':
6133
+ ENCODING1("ASCII", pm_encoding_ascii);
6134
+ ENCODING1("ASCII-8BIT", pm_encoding_ascii_8bit);
6135
+ ENCODING1("ANSI_X3.4-1968", pm_encoding_ascii);
6136
+ break;
6137
+ case 'B': case 'b':
6138
+ ENCODING1("BINARY", pm_encoding_ascii_8bit);
6139
+ ENCODING1("Big5", pm_encoding_big5);
6140
+ ENCODING1("Big5-HKSCS", pm_encoding_big5_hkscs);
6141
+ ENCODING1("Big5-UAO", pm_encoding_big5_uao);
6142
+ break;
6143
+ case 'C': case 'c':
6144
+ ENCODING1("CP437", pm_encoding_ibm437);
6145
+ ENCODING1("CP720", pm_encoding_ibm720);
6146
+ ENCODING1("CP737", pm_encoding_ibm737);
6147
+ ENCODING1("CP775", pm_encoding_ibm775);
6148
+ ENCODING1("CP850", pm_encoding_cp850);
6149
+ ENCODING1("CP852", pm_encoding_cp852);
6150
+ ENCODING1("CP855", pm_encoding_cp855);
6151
+ ENCODING1("CP857", pm_encoding_ibm857);
6152
+ ENCODING1("CP860", pm_encoding_ibm860);
6153
+ ENCODING1("CP861", pm_encoding_ibm861);
6154
+ ENCODING1("CP862", pm_encoding_ibm862);
6155
+ ENCODING1("CP864", pm_encoding_ibm864);
6156
+ ENCODING1("CP865", pm_encoding_ibm865);
6157
+ ENCODING1("CP866", pm_encoding_ibm866);
6158
+ ENCODING1("CP869", pm_encoding_ibm869);
6159
+ ENCODING1("CP874", pm_encoding_windows_874);
6160
+ ENCODING1("CP878", pm_encoding_koi8_r);
6161
+ ENCODING1("CP863", pm_encoding_ibm863);
6162
+ ENCODING2("CP932", "csWindows31J", pm_encoding_windows_31j);
6163
+ ENCODING1("CP936", pm_encoding_gbk);
6164
+ ENCODING1("CP1250", pm_encoding_windows_1250);
6165
+ ENCODING1("CP1251", pm_encoding_windows_1251);
6166
+ ENCODING1("CP1252", pm_encoding_windows_1252);
6167
+ ENCODING1("CP1253", pm_encoding_windows_1253);
6168
+ ENCODING1("CP1254", pm_encoding_windows_1254);
6169
+ ENCODING1("CP1255", pm_encoding_windows_1255);
6170
+ ENCODING1("CP1256", pm_encoding_windows_1256);
6171
+ ENCODING1("CP1257", pm_encoding_windows_1257);
6172
+ ENCODING1("CP1258", pm_encoding_windows_1258);
6173
+ ENCODING1("CP51932", pm_encoding_cp51932);
6174
+ ENCODING1("CP65001", pm_encoding_utf_8);
6175
+ break;
6176
+ case 'E': case 'e':
6177
+ ENCODING2("EUC-JP", "eucJP", pm_encoding_euc_jp);
6178
+ ENCODING1("external", pm_encoding_utf_8);
6179
+ break;
6180
+ case 'F': case 'f':
6181
+ ENCODING1("filesystem", pm_encoding_utf_8);
6182
+ break;
6183
+ case 'G': case 'g':
6184
+ ENCODING1("GB1988", pm_encoding_gb1988);
6185
+ ENCODING1("GBK", pm_encoding_gbk);
6186
+ break;
6187
+ case 'I': case 'i':
6188
+ ENCODING1("IBM437", pm_encoding_ibm437);
6189
+ ENCODING1("IBM720", pm_encoding_ibm720);
6190
+ ENCODING1("IBM737", pm_encoding_ibm737);
6191
+ ENCODING1("IBM775", pm_encoding_ibm775);
6192
+ ENCODING1("IBM850", pm_encoding_cp850);
6193
+ ENCODING1("IBM852", pm_encoding_ibm852);
6194
+ ENCODING1("IBM855", pm_encoding_ibm855);
6195
+ ENCODING1("IBM857", pm_encoding_ibm857);
6196
+ ENCODING1("IBM860", pm_encoding_ibm860);
6197
+ ENCODING1("IBM861", pm_encoding_ibm861);
6198
+ ENCODING1("IBM862", pm_encoding_ibm862);
6199
+ ENCODING1("IBM863", pm_encoding_ibm863);
6200
+ ENCODING1("IBM864", pm_encoding_ibm864);
6201
+ ENCODING1("IBM865", pm_encoding_ibm865);
6202
+ ENCODING1("IBM866", pm_encoding_ibm866);
6203
+ ENCODING1("IBM869", pm_encoding_ibm869);
6204
+ ENCODING2("ISO-8859-1", "ISO8859-1", pm_encoding_iso_8859_1);
6205
+ ENCODING2("ISO-8859-2", "ISO8859-2", pm_encoding_iso_8859_2);
6206
+ ENCODING2("ISO-8859-3", "ISO8859-3", pm_encoding_iso_8859_3);
6207
+ ENCODING2("ISO-8859-4", "ISO8859-4", pm_encoding_iso_8859_4);
6208
+ ENCODING2("ISO-8859-5", "ISO8859-5", pm_encoding_iso_8859_5);
6209
+ ENCODING2("ISO-8859-6", "ISO8859-6", pm_encoding_iso_8859_6);
6210
+ ENCODING2("ISO-8859-7", "ISO8859-7", pm_encoding_iso_8859_7);
6211
+ ENCODING2("ISO-8859-8", "ISO8859-8", pm_encoding_iso_8859_8);
6212
+ ENCODING2("ISO-8859-9", "ISO8859-9", pm_encoding_iso_8859_9);
6213
+ ENCODING2("ISO-8859-10", "ISO8859-10", pm_encoding_iso_8859_10);
6214
+ ENCODING2("ISO-8859-11", "ISO8859-11", pm_encoding_iso_8859_11);
6215
+ ENCODING2("ISO-8859-13", "ISO8859-13", pm_encoding_iso_8859_13);
6216
+ ENCODING2("ISO-8859-14", "ISO8859-14", pm_encoding_iso_8859_14);
6217
+ ENCODING2("ISO-8859-15", "ISO8859-15", pm_encoding_iso_8859_15);
6218
+ ENCODING2("ISO-8859-16", "ISO8859-16", pm_encoding_iso_8859_16);
6219
+ break;
6220
+ case 'K': case 'k':
6221
+ ENCODING1("KOI8-R", pm_encoding_koi8_r);
6222
+ break;
6223
+ case 'L': case 'l':
6224
+ ENCODING1("locale", pm_encoding_utf_8);
6225
+ break;
6226
+ case 'M': case 'm':
6227
+ ENCODING1("macCentEuro", pm_encoding_mac_cent_euro);
6228
+ ENCODING1("macCroatian", pm_encoding_mac_croatian);
6229
+ ENCODING1("macCyrillic", pm_encoding_mac_cyrillic);
6230
+ ENCODING1("macGreek", pm_encoding_mac_greek);
6231
+ ENCODING1("macIceland", pm_encoding_mac_iceland);
6232
+ ENCODING1("macRoman", pm_encoding_mac_roman);
6233
+ ENCODING1("macRomania", pm_encoding_mac_romania);
6234
+ ENCODING1("macThai", pm_encoding_mac_thai);
6235
+ ENCODING1("macTurkish", pm_encoding_mac_turkish);
6236
+ ENCODING1("macUkraine", pm_encoding_mac_ukraine);
6237
+ break;
6238
+ case 'P': case 'p':
6239
+ ENCODING1("PCK", pm_encoding_windows_31j);
6240
+ break;
6241
+ case 'S': case 's':
6242
+ ENCODING1("Shift_JIS", pm_encoding_shift_jis);
6243
+ ENCODING1("SJIS", pm_encoding_windows_31j);
6244
+ break;
6245
+ case 'T': case 't':
6246
+ ENCODING1("TIS-620", pm_encoding_tis_620);
6247
+ break;
6248
+ case 'U': case 'u':
6249
+ ENCODING1("US-ASCII", pm_encoding_ascii);
6250
+ ENCODING2("UTF8-MAC", "UTF-8-HFS", pm_encoding_utf8_mac);
6251
+ break;
6252
+ case 'W': case 'w':
6253
+ ENCODING1("Windows-31J", pm_encoding_windows_31j);
6254
+ ENCODING1("Windows-874", pm_encoding_windows_874);
6255
+ ENCODING1("Windows-1250", pm_encoding_windows_1250);
6256
+ ENCODING1("Windows-1251", pm_encoding_windows_1251);
6257
+ ENCODING1("Windows-1252", pm_encoding_windows_1252);
6258
+ ENCODING1("Windows-1253", pm_encoding_windows_1253);
6259
+ ENCODING1("Windows-1254", pm_encoding_windows_1254);
6260
+ ENCODING1("Windows-1255", pm_encoding_windows_1255);
6261
+ ENCODING1("Windows-1256", pm_encoding_windows_1256);
6262
+ ENCODING1("Windows-1257", pm_encoding_windows_1257);
6263
+ ENCODING1("Windows-1258", pm_encoding_windows_1258);
6264
+ break;
6265
+ case '6':
6266
+ ENCODING1("646", pm_encoding_ascii);
6267
+ break;
6268
+ }
6269
+ }
6270
+
6271
+ #undef ENCODING2
6272
+ #undef ENCODING1
5925
6273
 
5926
6274
  return false;
5927
6275
  }
@@ -6026,6 +6374,8 @@ parser_lex_magic_comment_emacs_marker(pm_parser_t *parser, const uint8_t *cursor
6026
6374
  */
6027
6375
  static inline bool
6028
6376
  parser_lex_magic_comment(pm_parser_t *parser, bool semantic_token_seen) {
6377
+ bool result = true;
6378
+
6029
6379
  const uint8_t *start = parser->current.start + 1;
6030
6380
  const uint8_t *end = parser->current.end;
6031
6381
  if (end - start <= 7) return false;
@@ -6123,7 +6473,7 @@ parser_lex_magic_comment(pm_parser_t *parser, bool semantic_token_seen) {
6123
6473
  (key_length == 8 && pm_strncasecmp(key_source, (const uint8_t *) "encoding", 8) == 0) ||
6124
6474
  (key_length == 6 && pm_strncasecmp(key_source, (const uint8_t *) "coding", 6) == 0)
6125
6475
  ) {
6126
- parser_lex_magic_comment_encoding_value(parser, value_start, value_end);
6476
+ result = parser_lex_magic_comment_encoding_value(parser, value_start, value_end);
6127
6477
  }
6128
6478
  }
6129
6479
 
@@ -6150,7 +6500,7 @@ parser_lex_magic_comment(pm_parser_t *parser, bool semantic_token_seen) {
6150
6500
  }
6151
6501
  }
6152
6502
 
6153
- return true;
6503
+ return result;
6154
6504
  }
6155
6505
 
6156
6506
  /******************************************************************************/
@@ -6324,7 +6674,7 @@ pm_strspn_hexadecimal_number_validate(pm_parser_t *parser, const uint8_t *string
6324
6674
  }
6325
6675
 
6326
6676
  static pm_token_type_t
6327
- lex_optional_float_suffix(pm_parser_t *parser) {
6677
+ lex_optional_float_suffix(pm_parser_t *parser, bool* seen_e) {
6328
6678
  pm_token_type_t type = PM_TOKEN_INTEGER;
6329
6679
 
6330
6680
  // Here we're going to attempt to parse the optional decimal portion of a
@@ -6345,8 +6695,9 @@ lex_optional_float_suffix(pm_parser_t *parser) {
6345
6695
  // float. If it's not there, it's okay and we'll just continue on.
6346
6696
  if (match(parser, 'e') || match(parser, 'E')) {
6347
6697
  (void) (match(parser, '+') || match(parser, '-'));
6698
+ *seen_e = true;
6348
6699
 
6349
- if (pm_char_is_decimal_digit(*parser->current.end)) {
6700
+ if (pm_char_is_decimal_digit(peek(parser))) {
6350
6701
  parser->current.end++;
6351
6702
  parser->current.end += pm_strspn_decimal_number_validate(parser, parser->current.end);
6352
6703
  type = PM_TOKEN_FLOAT;
@@ -6360,8 +6711,9 @@ lex_optional_float_suffix(pm_parser_t *parser) {
6360
6711
  }
6361
6712
 
6362
6713
  static pm_token_type_t
6363
- lex_numeric_prefix(pm_parser_t *parser) {
6714
+ lex_numeric_prefix(pm_parser_t *parser, bool* seen_e) {
6364
6715
  pm_token_type_t type = PM_TOKEN_INTEGER;
6716
+ *seen_e = false;
6365
6717
 
6366
6718
  if (peek_offset(parser, -1) == '0') {
6367
6719
  switch (*parser->current.end) {
@@ -6432,14 +6784,14 @@ lex_numeric_prefix(pm_parser_t *parser) {
6432
6784
 
6433
6785
  // 0.xxx is a float
6434
6786
  case '.': {
6435
- type = lex_optional_float_suffix(parser);
6787
+ type = lex_optional_float_suffix(parser, seen_e);
6436
6788
  break;
6437
6789
  }
6438
6790
 
6439
6791
  // 0exxx is a float
6440
6792
  case 'e':
6441
6793
  case 'E': {
6442
- type = lex_optional_float_suffix(parser);
6794
+ type = lex_optional_float_suffix(parser, seen_e);
6443
6795
  break;
6444
6796
  }
6445
6797
  }
@@ -6449,7 +6801,7 @@ lex_numeric_prefix(pm_parser_t *parser) {
6449
6801
  parser->current.end += pm_strspn_decimal_number_validate(parser, parser->current.end);
6450
6802
 
6451
6803
  // Afterward, we'll lex as far as we can into an optional float suffix.
6452
- type = lex_optional_float_suffix(parser);
6804
+ type = lex_optional_float_suffix(parser, seen_e);
6453
6805
  }
6454
6806
 
6455
6807
  return type;
@@ -6461,7 +6813,8 @@ lex_numeric(pm_parser_t *parser) {
6461
6813
  parser->integer_base = PM_INTEGER_BASE_FLAGS_DECIMAL;
6462
6814
 
6463
6815
  if (parser->current.end < parser->end) {
6464
- type = lex_numeric_prefix(parser);
6816
+ bool seen_e = false;
6817
+ type = lex_numeric_prefix(parser, &seen_e);
6465
6818
 
6466
6819
  const uint8_t *end = parser->current.end;
6467
6820
  pm_token_type_t suffix_type = type;
@@ -6477,7 +6830,7 @@ lex_numeric(pm_parser_t *parser) {
6477
6830
  suffix_type = PM_TOKEN_INTEGER_IMAGINARY;
6478
6831
  }
6479
6832
  } else {
6480
- if (match(parser, 'r')) {
6833
+ if (!seen_e && match(parser, 'r')) {
6481
6834
  suffix_type = PM_TOKEN_FLOAT_RATIONAL;
6482
6835
 
6483
6836
  if (match(parser, 'i')) {
@@ -6584,17 +6937,21 @@ lex_global_variable(pm_parser_t *parser) {
6584
6937
 
6585
6938
  /**
6586
6939
  * This function checks if the current token matches a keyword. If it does, it
6587
- * returns true. Otherwise, it returns false. The arguments are as follows:
6940
+ * returns the token type. Otherwise, it returns PM_TOKEN_EOF. The arguments are as follows:
6588
6941
  *
6942
+ * * `parser` - the parser object
6943
+ * * `current_start` - pointer to the start of the current token
6589
6944
  * * `value` - the literal string that we're checking for
6590
- * * `width` - the length of the token
6945
+ * * `vlen` - the length of the token
6591
6946
  * * `state` - the state that we should transition to if the token matches
6947
+ * * `type` - the expected token type
6948
+ * * `modifier_type` - the expected modifier token type
6592
6949
  */
6593
6950
  static inline pm_token_type_t
6594
- lex_keyword(pm_parser_t *parser, const char *value, size_t vlen, pm_lex_state_t state, pm_token_type_t type, pm_token_type_t modifier_type) {
6595
- pm_lex_state_t last_state = parser->lex_state;
6951
+ lex_keyword(pm_parser_t *parser, const uint8_t *current_start, const char *value, size_t vlen, pm_lex_state_t state, pm_token_type_t type, pm_token_type_t modifier_type) {
6952
+ if (memcmp(current_start, value, vlen) == 0) {
6953
+ pm_lex_state_t last_state = parser->lex_state;
6596
6954
 
6597
- if (parser->current.start + vlen <= parser->end && memcmp(parser->current.start, value, vlen) == 0) {
6598
6955
  if (parser->lex_state & PM_LEX_STATE_FNAME) {
6599
6956
  lex_state_set(parser, PM_LEX_STATE_ENDFN);
6600
6957
  } else {
@@ -6650,7 +7007,7 @@ lex_identifier(pm_parser_t *parser, bool previous_command_start) {
6650
7007
  }
6651
7008
 
6652
7009
  if (parser->lex_state != PM_LEX_STATE_DOT) {
6653
- if (width == 8 && (lex_keyword(parser, "defined?", width, PM_LEX_STATE_ARG, PM_TOKEN_KEYWORD_DEFINED, PM_TOKEN_EOF) != PM_TOKEN_EOF)) {
7010
+ if (width == 8 && (lex_keyword(parser, current_start, "defined?", width, PM_LEX_STATE_ARG, PM_TOKEN_KEYWORD_DEFINED, PM_TOKEN_EOF) != PM_TOKEN_EOF)) {
6654
7011
  return PM_TOKEN_KEYWORD_DEFINED;
6655
7012
  }
6656
7013
  }
@@ -6678,67 +7035,66 @@ lex_identifier(pm_parser_t *parser, bool previous_command_start) {
6678
7035
 
6679
7036
  if (parser->lex_state != PM_LEX_STATE_DOT) {
6680
7037
  pm_token_type_t type;
6681
-
6682
7038
  switch (width) {
6683
7039
  case 2:
6684
- if (lex_keyword(parser, "do", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_DO, PM_TOKEN_EOF) != PM_TOKEN_EOF) {
7040
+ if (lex_keyword(parser, current_start, "do", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_DO, PM_TOKEN_EOF) != PM_TOKEN_EOF) {
6685
7041
  if (pm_do_loop_stack_p(parser)) {
6686
7042
  return PM_TOKEN_KEYWORD_DO_LOOP;
6687
7043
  }
6688
7044
  return PM_TOKEN_KEYWORD_DO;
6689
7045
  }
6690
7046
 
6691
- if ((type = lex_keyword(parser, "if", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_IF, PM_TOKEN_KEYWORD_IF_MODIFIER)) != PM_TOKEN_EOF) return type;
6692
- if ((type = lex_keyword(parser, "in", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_IN, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
6693
- if ((type = lex_keyword(parser, "or", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_OR, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
7047
+ if ((type = lex_keyword(parser, current_start, "if", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_IF, PM_TOKEN_KEYWORD_IF_MODIFIER)) != PM_TOKEN_EOF) return type;
7048
+ if ((type = lex_keyword(parser, current_start, "in", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_IN, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
7049
+ if ((type = lex_keyword(parser, current_start, "or", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_OR, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
6694
7050
  break;
6695
7051
  case 3:
6696
- if ((type = lex_keyword(parser, "and", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_AND, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
6697
- if ((type = lex_keyword(parser, "def", width, PM_LEX_STATE_FNAME, PM_TOKEN_KEYWORD_DEF, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
6698
- if ((type = lex_keyword(parser, "end", width, PM_LEX_STATE_END, PM_TOKEN_KEYWORD_END, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
6699
- if ((type = lex_keyword(parser, "END", width, PM_LEX_STATE_END, PM_TOKEN_KEYWORD_END_UPCASE, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
6700
- if ((type = lex_keyword(parser, "for", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_FOR, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
6701
- if ((type = lex_keyword(parser, "nil", width, PM_LEX_STATE_END, PM_TOKEN_KEYWORD_NIL, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
6702
- if ((type = lex_keyword(parser, "not", width, PM_LEX_STATE_ARG, PM_TOKEN_KEYWORD_NOT, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
7052
+ if ((type = lex_keyword(parser, current_start, "and", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_AND, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
7053
+ if ((type = lex_keyword(parser, current_start, "def", width, PM_LEX_STATE_FNAME, PM_TOKEN_KEYWORD_DEF, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
7054
+ if ((type = lex_keyword(parser, current_start, "end", width, PM_LEX_STATE_END, PM_TOKEN_KEYWORD_END, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
7055
+ if ((type = lex_keyword(parser, current_start, "END", width, PM_LEX_STATE_END, PM_TOKEN_KEYWORD_END_UPCASE, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
7056
+ if ((type = lex_keyword(parser, current_start, "for", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_FOR, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
7057
+ if ((type = lex_keyword(parser, current_start, "nil", width, PM_LEX_STATE_END, PM_TOKEN_KEYWORD_NIL, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
7058
+ if ((type = lex_keyword(parser, current_start, "not", width, PM_LEX_STATE_ARG, PM_TOKEN_KEYWORD_NOT, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
6703
7059
  break;
6704
7060
  case 4:
6705
- if ((type = lex_keyword(parser, "case", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_CASE, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
6706
- if ((type = lex_keyword(parser, "else", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_ELSE, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
6707
- if ((type = lex_keyword(parser, "next", width, PM_LEX_STATE_MID, PM_TOKEN_KEYWORD_NEXT, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
6708
- if ((type = lex_keyword(parser, "redo", width, PM_LEX_STATE_END, PM_TOKEN_KEYWORD_REDO, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
6709
- if ((type = lex_keyword(parser, "self", width, PM_LEX_STATE_END, PM_TOKEN_KEYWORD_SELF, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
6710
- if ((type = lex_keyword(parser, "then", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_THEN, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
6711
- if ((type = lex_keyword(parser, "true", width, PM_LEX_STATE_END, PM_TOKEN_KEYWORD_TRUE, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
6712
- if ((type = lex_keyword(parser, "when", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_WHEN, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
7061
+ if ((type = lex_keyword(parser, current_start, "case", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_CASE, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
7062
+ if ((type = lex_keyword(parser, current_start, "else", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_ELSE, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
7063
+ if ((type = lex_keyword(parser, current_start, "next", width, PM_LEX_STATE_MID, PM_TOKEN_KEYWORD_NEXT, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
7064
+ if ((type = lex_keyword(parser, current_start, "redo", width, PM_LEX_STATE_END, PM_TOKEN_KEYWORD_REDO, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
7065
+ if ((type = lex_keyword(parser, current_start, "self", width, PM_LEX_STATE_END, PM_TOKEN_KEYWORD_SELF, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
7066
+ if ((type = lex_keyword(parser, current_start, "then", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_THEN, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
7067
+ if ((type = lex_keyword(parser, current_start, "true", width, PM_LEX_STATE_END, PM_TOKEN_KEYWORD_TRUE, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
7068
+ if ((type = lex_keyword(parser, current_start, "when", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_WHEN, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
6713
7069
  break;
6714
7070
  case 5:
6715
- if ((type = lex_keyword(parser, "alias", width, PM_LEX_STATE_FNAME | PM_LEX_STATE_FITEM, PM_TOKEN_KEYWORD_ALIAS, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
6716
- if ((type = lex_keyword(parser, "begin", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_BEGIN, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
6717
- if ((type = lex_keyword(parser, "BEGIN", width, PM_LEX_STATE_END, PM_TOKEN_KEYWORD_BEGIN_UPCASE, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
6718
- if ((type = lex_keyword(parser, "break", width, PM_LEX_STATE_MID, PM_TOKEN_KEYWORD_BREAK, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
6719
- if ((type = lex_keyword(parser, "class", width, PM_LEX_STATE_CLASS, PM_TOKEN_KEYWORD_CLASS, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
6720
- if ((type = lex_keyword(parser, "elsif", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_ELSIF, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
6721
- if ((type = lex_keyword(parser, "false", width, PM_LEX_STATE_END, PM_TOKEN_KEYWORD_FALSE, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
6722
- if ((type = lex_keyword(parser, "retry", width, PM_LEX_STATE_END, PM_TOKEN_KEYWORD_RETRY, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
6723
- if ((type = lex_keyword(parser, "super", width, PM_LEX_STATE_ARG, PM_TOKEN_KEYWORD_SUPER, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
6724
- if ((type = lex_keyword(parser, "undef", width, PM_LEX_STATE_FNAME | PM_LEX_STATE_FITEM, PM_TOKEN_KEYWORD_UNDEF, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
6725
- if ((type = lex_keyword(parser, "until", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_UNTIL, PM_TOKEN_KEYWORD_UNTIL_MODIFIER)) != PM_TOKEN_EOF) return type;
6726
- if ((type = lex_keyword(parser, "while", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_WHILE, PM_TOKEN_KEYWORD_WHILE_MODIFIER)) != PM_TOKEN_EOF) return type;
6727
- if ((type = lex_keyword(parser, "yield", width, PM_LEX_STATE_ARG, PM_TOKEN_KEYWORD_YIELD, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
7071
+ if ((type = lex_keyword(parser, current_start, "alias", width, PM_LEX_STATE_FNAME | PM_LEX_STATE_FITEM, PM_TOKEN_KEYWORD_ALIAS, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
7072
+ if ((type = lex_keyword(parser, current_start, "begin", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_BEGIN, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
7073
+ if ((type = lex_keyword(parser, current_start, "BEGIN", width, PM_LEX_STATE_END, PM_TOKEN_KEYWORD_BEGIN_UPCASE, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
7074
+ if ((type = lex_keyword(parser, current_start, "break", width, PM_LEX_STATE_MID, PM_TOKEN_KEYWORD_BREAK, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
7075
+ if ((type = lex_keyword(parser, current_start, "class", width, PM_LEX_STATE_CLASS, PM_TOKEN_KEYWORD_CLASS, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
7076
+ if ((type = lex_keyword(parser, current_start, "elsif", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_ELSIF, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
7077
+ if ((type = lex_keyword(parser, current_start, "false", width, PM_LEX_STATE_END, PM_TOKEN_KEYWORD_FALSE, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
7078
+ if ((type = lex_keyword(parser, current_start, "retry", width, PM_LEX_STATE_END, PM_TOKEN_KEYWORD_RETRY, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
7079
+ if ((type = lex_keyword(parser, current_start, "super", width, PM_LEX_STATE_ARG, PM_TOKEN_KEYWORD_SUPER, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
7080
+ if ((type = lex_keyword(parser, current_start, "undef", width, PM_LEX_STATE_FNAME | PM_LEX_STATE_FITEM, PM_TOKEN_KEYWORD_UNDEF, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
7081
+ if ((type = lex_keyword(parser, current_start, "until", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_UNTIL, PM_TOKEN_KEYWORD_UNTIL_MODIFIER)) != PM_TOKEN_EOF) return type;
7082
+ if ((type = lex_keyword(parser, current_start, "while", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_WHILE, PM_TOKEN_KEYWORD_WHILE_MODIFIER)) != PM_TOKEN_EOF) return type;
7083
+ if ((type = lex_keyword(parser, current_start, "yield", width, PM_LEX_STATE_ARG, PM_TOKEN_KEYWORD_YIELD, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
6728
7084
  break;
6729
7085
  case 6:
6730
- if ((type = lex_keyword(parser, "ensure", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_ENSURE, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
6731
- if ((type = lex_keyword(parser, "module", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_MODULE, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
6732
- if ((type = lex_keyword(parser, "rescue", width, PM_LEX_STATE_MID, PM_TOKEN_KEYWORD_RESCUE, PM_TOKEN_KEYWORD_RESCUE_MODIFIER)) != PM_TOKEN_EOF) return type;
6733
- if ((type = lex_keyword(parser, "return", width, PM_LEX_STATE_MID, PM_TOKEN_KEYWORD_RETURN, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
6734
- if ((type = lex_keyword(parser, "unless", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_UNLESS, PM_TOKEN_KEYWORD_UNLESS_MODIFIER)) != PM_TOKEN_EOF) return type;
7086
+ if ((type = lex_keyword(parser, current_start, "ensure", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_ENSURE, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
7087
+ if ((type = lex_keyword(parser, current_start, "module", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_MODULE, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
7088
+ if ((type = lex_keyword(parser, current_start, "rescue", width, PM_LEX_STATE_MID, PM_TOKEN_KEYWORD_RESCUE, PM_TOKEN_KEYWORD_RESCUE_MODIFIER)) != PM_TOKEN_EOF) return type;
7089
+ if ((type = lex_keyword(parser, current_start, "return", width, PM_LEX_STATE_MID, PM_TOKEN_KEYWORD_RETURN, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
7090
+ if ((type = lex_keyword(parser, current_start, "unless", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_UNLESS, PM_TOKEN_KEYWORD_UNLESS_MODIFIER)) != PM_TOKEN_EOF) return type;
6735
7091
  break;
6736
7092
  case 8:
6737
- if ((type = lex_keyword(parser, "__LINE__", width, PM_LEX_STATE_END, PM_TOKEN_KEYWORD___LINE__, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
6738
- if ((type = lex_keyword(parser, "__FILE__", width, PM_LEX_STATE_END, PM_TOKEN_KEYWORD___FILE__, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
7093
+ if ((type = lex_keyword(parser, current_start, "__LINE__", width, PM_LEX_STATE_END, PM_TOKEN_KEYWORD___LINE__, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
7094
+ if ((type = lex_keyword(parser, current_start, "__FILE__", width, PM_LEX_STATE_END, PM_TOKEN_KEYWORD___FILE__, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
6739
7095
  break;
6740
7096
  case 12:
6741
- if ((type = lex_keyword(parser, "__ENCODING__", width, PM_LEX_STATE_END, PM_TOKEN_KEYWORD___ENCODING__, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
7097
+ if ((type = lex_keyword(parser, current_start, "__ENCODING__", width, PM_LEX_STATE_END, PM_TOKEN_KEYWORD___ENCODING__, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
6742
7098
  break;
6743
7099
  }
6744
7100
  }
@@ -8676,6 +9032,8 @@ parser_lex(pm_parser_t *parser) {
8676
9032
 
8677
9033
  if (parser->current.end < parser->end) {
8678
9034
  lex_mode_push_list(parser, false, *parser->current.end++);
9035
+ } else {
9036
+ lex_mode_push_list_eof(parser);
8679
9037
  }
8680
9038
 
8681
9039
  LEX(PM_TOKEN_PERCENT_LOWER_I);
@@ -8685,6 +9043,8 @@ parser_lex(pm_parser_t *parser) {
8685
9043
 
8686
9044
  if (parser->current.end < parser->end) {
8687
9045
  lex_mode_push_list(parser, true, *parser->current.end++);
9046
+ } else {
9047
+ lex_mode_push_list_eof(parser);
8688
9048
  }
8689
9049
 
8690
9050
  LEX(PM_TOKEN_PERCENT_UPPER_I);
@@ -8696,6 +9056,8 @@ parser_lex(pm_parser_t *parser) {
8696
9056
  lex_mode_push_regexp(parser, lex_mode_incrementor(*parser->current.end), lex_mode_terminator(*parser->current.end));
8697
9057
  pm_newline_list_check_append(&parser->newline_list, parser->current.end);
8698
9058
  parser->current.end++;
9059
+ } else {
9060
+ lex_mode_push_regexp(parser, '\0', '\0');
8699
9061
  }
8700
9062
 
8701
9063
  LEX(PM_TOKEN_REGEXP_BEGIN);
@@ -8707,6 +9069,8 @@ parser_lex(pm_parser_t *parser) {
8707
9069
  lex_mode_push_string(parser, false, false, lex_mode_incrementor(*parser->current.end), lex_mode_terminator(*parser->current.end));
8708
9070
  pm_newline_list_check_append(&parser->newline_list, parser->current.end);
8709
9071
  parser->current.end++;
9072
+ } else {
9073
+ lex_mode_push_string_eof(parser);
8710
9074
  }
8711
9075
 
8712
9076
  LEX(PM_TOKEN_STRING_BEGIN);
@@ -8718,6 +9082,8 @@ parser_lex(pm_parser_t *parser) {
8718
9082
  lex_mode_push_string(parser, true, false, lex_mode_incrementor(*parser->current.end), lex_mode_terminator(*parser->current.end));
8719
9083
  pm_newline_list_check_append(&parser->newline_list, parser->current.end);
8720
9084
  parser->current.end++;
9085
+ } else {
9086
+ lex_mode_push_string_eof(parser);
8721
9087
  }
8722
9088
 
8723
9089
  LEX(PM_TOKEN_STRING_BEGIN);
@@ -8729,6 +9095,8 @@ parser_lex(pm_parser_t *parser) {
8729
9095
  lex_mode_push_string(parser, false, false, lex_mode_incrementor(*parser->current.end), lex_mode_terminator(*parser->current.end));
8730
9096
  lex_state_set(parser, PM_LEX_STATE_FNAME | PM_LEX_STATE_FITEM);
8731
9097
  parser->current.end++;
9098
+ } else {
9099
+ lex_mode_push_string_eof(parser);
8732
9100
  }
8733
9101
 
8734
9102
  LEX(PM_TOKEN_SYMBOL_BEGIN);
@@ -8738,6 +9106,8 @@ parser_lex(pm_parser_t *parser) {
8738
9106
 
8739
9107
  if (parser->current.end < parser->end) {
8740
9108
  lex_mode_push_list(parser, false, *parser->current.end++);
9109
+ } else {
9110
+ lex_mode_push_list_eof(parser);
8741
9111
  }
8742
9112
 
8743
9113
  LEX(PM_TOKEN_PERCENT_LOWER_W);
@@ -8747,6 +9117,8 @@ parser_lex(pm_parser_t *parser) {
8747
9117
 
8748
9118
  if (parser->current.end < parser->end) {
8749
9119
  lex_mode_push_list(parser, true, *parser->current.end++);
9120
+ } else {
9121
+ lex_mode_push_list_eof(parser);
8750
9122
  }
8751
9123
 
8752
9124
  LEX(PM_TOKEN_PERCENT_UPPER_W);
@@ -8757,6 +9129,8 @@ parser_lex(pm_parser_t *parser) {
8757
9129
  if (parser->current.end < parser->end) {
8758
9130
  lex_mode_push_string(parser, true, false, lex_mode_incrementor(*parser->current.end), lex_mode_terminator(*parser->current.end));
8759
9131
  parser->current.end++;
9132
+ } else {
9133
+ lex_mode_push_string_eof(parser);
8760
9134
  }
8761
9135
 
8762
9136
  LEX(PM_TOKEN_PERCENT_LOWER_X);
@@ -8859,7 +9233,7 @@ parser_lex(pm_parser_t *parser) {
8859
9233
  !(last_state & (PM_LEX_STATE_DOT | PM_LEX_STATE_FNAME)) &&
8860
9234
  (type == PM_TOKEN_IDENTIFIER) &&
8861
9235
  ((pm_parser_local_depth(parser, &parser->current) != -1) ||
8862
- token_is_numbered_parameter(parser->current.start, parser->current.end))
9236
+ pm_token_is_numbered_parameter(parser->current.start, parser->current.end))
8863
9237
  ) {
8864
9238
  lex_state_set(parser, PM_LEX_STATE_END | PM_LEX_STATE_LABEL);
8865
9239
  }
@@ -9511,6 +9885,7 @@ parser_lex(pm_parser_t *parser) {
9511
9885
  parser->heredoc_end = parser->current.end;
9512
9886
  }
9513
9887
 
9888
+ parser->current_string_common_whitespace = parser->lex_modes.current->as.heredoc.common_whitespace;
9514
9889
  lex_mode_pop(parser);
9515
9890
  if (!at_end) {
9516
9891
  lex_state_set(parser, PM_LEX_STATE_END);
@@ -9728,11 +10103,11 @@ parser_lex(pm_parser_t *parser) {
9728
10103
  typedef enum {
9729
10104
  PM_BINDING_POWER_UNSET = 0, // used to indicate this token cannot be used as an infix operator
9730
10105
  PM_BINDING_POWER_STATEMENT = 2,
9731
- PM_BINDING_POWER_MODIFIER = 4, // if unless until while in
10106
+ PM_BINDING_POWER_MODIFIER = 4, // if unless until while
9732
10107
  PM_BINDING_POWER_MODIFIER_RESCUE = 6, // rescue
9733
10108
  PM_BINDING_POWER_COMPOSITION = 8, // and or
9734
10109
  PM_BINDING_POWER_NOT = 10, // not
9735
- PM_BINDING_POWER_MATCH = 12, // =>
10110
+ PM_BINDING_POWER_MATCH = 12, // => in
9736
10111
  PM_BINDING_POWER_DEFINED = 14, // defined?
9737
10112
  PM_BINDING_POWER_ASSIGNMENT = 16, // = += -= *= /= %= &= |= ^= &&= ||= <<= >>= **=
9738
10113
  PM_BINDING_POWER_TERNARY = 18, // ?:
@@ -9767,34 +10142,37 @@ typedef struct {
9767
10142
 
9768
10143
  /** Whether or not this token can be used as a binary operator. */
9769
10144
  bool binary;
10145
+
10146
+ /**
10147
+ * Whether or not this token can be used as non-associative binary operator.
10148
+ * Non-associative operators (e.g. in and =>) need special treatment in parse_expression.
10149
+ */
10150
+ bool nonassoc;
9770
10151
  } pm_binding_powers_t;
9771
10152
 
9772
- #define BINDING_POWER_ASSIGNMENT { PM_BINDING_POWER_UNARY, PM_BINDING_POWER_ASSIGNMENT, true }
9773
- #define LEFT_ASSOCIATIVE(precedence) { precedence, precedence + 1, true }
9774
- #define RIGHT_ASSOCIATIVE(precedence) { precedence, precedence, true }
9775
- #define RIGHT_ASSOCIATIVE_UNARY(precedence) { precedence, precedence, false }
10153
+ #define BINDING_POWER_ASSIGNMENT { PM_BINDING_POWER_UNARY, PM_BINDING_POWER_ASSIGNMENT, true, false }
10154
+ #define LEFT_ASSOCIATIVE(precedence) { precedence, precedence + 1, true, false }
10155
+ #define RIGHT_ASSOCIATIVE(precedence) { precedence, precedence, true, false }
10156
+ #define NON_ASSOCIATIVE(precedence) { precedence + 1, precedence + 1, true, true }
10157
+ #define RIGHT_ASSOCIATIVE_UNARY(precedence) { precedence, precedence, false, false }
9776
10158
 
9777
10159
  pm_binding_powers_t pm_binding_powers[PM_TOKEN_MAXIMUM] = {
9778
- // if unless until while in rescue
10160
+ // if unless until while
9779
10161
  [PM_TOKEN_KEYWORD_IF_MODIFIER] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_MODIFIER),
9780
10162
  [PM_TOKEN_KEYWORD_UNLESS_MODIFIER] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_MODIFIER),
9781
10163
  [PM_TOKEN_KEYWORD_UNTIL_MODIFIER] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_MODIFIER),
9782
10164
  [PM_TOKEN_KEYWORD_WHILE_MODIFIER] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_MODIFIER),
9783
- [PM_TOKEN_KEYWORD_IN] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_MODIFIER),
9784
10165
 
9785
- // rescue modifier
9786
- [PM_TOKEN_KEYWORD_RESCUE_MODIFIER] = {
9787
- PM_BINDING_POWER_ASSIGNMENT,
9788
- PM_BINDING_POWER_MODIFIER_RESCUE + 1,
9789
- true
9790
- },
10166
+ // rescue
10167
+ [PM_TOKEN_KEYWORD_RESCUE_MODIFIER] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_MODIFIER_RESCUE),
9791
10168
 
9792
10169
  // and or
9793
10170
  [PM_TOKEN_KEYWORD_AND] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_COMPOSITION),
9794
10171
  [PM_TOKEN_KEYWORD_OR] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_COMPOSITION),
9795
10172
 
9796
- // =>
9797
- [PM_TOKEN_EQUAL_GREATER] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_MATCH),
10173
+ // => in
10174
+ [PM_TOKEN_EQUAL_GREATER] = NON_ASSOCIATIVE(PM_BINDING_POWER_MATCH),
10175
+ [PM_TOKEN_KEYWORD_IN] = NON_ASSOCIATIVE(PM_BINDING_POWER_MATCH),
9798
10176
 
9799
10177
  // &&= &= ^= = >>= <<= -= %= |= += /= *= **=
9800
10178
  [PM_TOKEN_AMPERSAND_AMPERSAND_EQUAL] = BINDING_POWER_ASSIGNMENT,
@@ -9816,8 +10194,8 @@ pm_binding_powers_t pm_binding_powers[PM_TOKEN_MAXIMUM] = {
9816
10194
  [PM_TOKEN_QUESTION_MARK] = RIGHT_ASSOCIATIVE(PM_BINDING_POWER_TERNARY),
9817
10195
 
9818
10196
  // .. ...
9819
- [PM_TOKEN_DOT_DOT] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_RANGE),
9820
- [PM_TOKEN_DOT_DOT_DOT] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_RANGE),
10197
+ [PM_TOKEN_DOT_DOT] = NON_ASSOCIATIVE(PM_BINDING_POWER_RANGE),
10198
+ [PM_TOKEN_DOT_DOT_DOT] = NON_ASSOCIATIVE(PM_BINDING_POWER_RANGE),
9821
10199
 
9822
10200
  // ||
9823
10201
  [PM_TOKEN_PIPE_PIPE] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_LOGICAL_OR),
@@ -9862,7 +10240,7 @@ pm_binding_powers_t pm_binding_powers[PM_TOKEN_MAXIMUM] = {
9862
10240
 
9863
10241
  // -@
9864
10242
  [PM_TOKEN_UMINUS] = RIGHT_ASSOCIATIVE_UNARY(PM_BINDING_POWER_UMINUS),
9865
- [PM_TOKEN_UMINUS_NUM] = { PM_BINDING_POWER_UMINUS, PM_BINDING_POWER_MAX, false },
10243
+ [PM_TOKEN_UMINUS_NUM] = { PM_BINDING_POWER_UMINUS, PM_BINDING_POWER_MAX, false, false },
9866
10244
 
9867
10245
  // **
9868
10246
  [PM_TOKEN_STAR_STAR] = RIGHT_ASSOCIATIVE(PM_BINDING_POWER_EXPONENT),
@@ -10038,6 +10416,16 @@ expect3(pm_parser_t *parser, pm_token_type_t type1, pm_token_type_t type2, pm_to
10038
10416
  static pm_node_t *
10039
10417
  parse_expression(pm_parser_t *parser, pm_binding_power_t binding_power, pm_diagnostic_id_t diag_id);
10040
10418
 
10419
+ /**
10420
+ * This is a wrapper of parse_expression, which also checks whether the resulting node is value expression.
10421
+ */
10422
+ static pm_node_t *
10423
+ parse_value_expression(pm_parser_t *parser, pm_binding_power_t binding_power, pm_diagnostic_id_t diag_id) {
10424
+ pm_node_t *node = parse_expression(parser, binding_power, diag_id);
10425
+ pm_assert_value_expression(parser, node);
10426
+ return node;
10427
+ }
10428
+
10041
10429
  /**
10042
10430
  * This function controls whether or not we will attempt to parse an expression
10043
10431
  * beginning at the subsequent token. It is used when we are in a context where
@@ -10121,11 +10509,11 @@ static pm_node_t *
10121
10509
  parse_starred_expression(pm_parser_t *parser, pm_binding_power_t binding_power, pm_diagnostic_id_t diag_id) {
10122
10510
  if (accept1(parser, PM_TOKEN_USTAR)) {
10123
10511
  pm_token_t operator = parser->previous;
10124
- pm_node_t *expression = parse_expression(parser, binding_power, PM_ERR_EXPECT_EXPRESSION_AFTER_STAR);
10512
+ pm_node_t *expression = parse_value_expression(parser, binding_power, PM_ERR_EXPECT_EXPRESSION_AFTER_STAR);
10125
10513
  return (pm_node_t *) pm_splat_node_create(parser, &operator, expression);
10126
10514
  }
10127
10515
 
10128
- return parse_expression(parser, binding_power, diag_id);
10516
+ return parse_value_expression(parser, binding_power, diag_id);
10129
10517
  }
10130
10518
 
10131
10519
  /**
@@ -10147,6 +10535,8 @@ parse_write_name(pm_parser_t *parser, pm_constant_id_t *name_field) {
10147
10535
  name[length] = '=';
10148
10536
 
10149
10537
  // Now switch the name to the new string.
10538
+ // This silences clang analyzer warning about leak of memory pointed by `name`.
10539
+ // NOLINTNEXTLINE(clang-analyzer-*)
10150
10540
  *name_field = pm_constant_pool_insert_owned(&parser->constant_pool, name, length + 1);
10151
10541
  }
10152
10542
 
@@ -10179,8 +10569,8 @@ parse_target(pm_parser_t *parser, pm_node_t *target) {
10179
10569
  target->type = PM_GLOBAL_VARIABLE_TARGET_NODE;
10180
10570
  return target;
10181
10571
  case PM_LOCAL_VARIABLE_READ_NODE:
10182
- if (token_is_numbered_parameter(target->location.start, target->location.end)) {
10183
- pm_parser_err_node(parser, target, PM_ERR_PARAMETER_NUMBERED_RESERVED);
10572
+ if (pm_token_is_numbered_parameter(target->location.start, target->location.end)) {
10573
+ PM_PARSER_ERR_NODE_FORMAT(parser, target, PM_ERR_PARAMETER_NUMBERED_RESERVED, target->location.start);
10184
10574
  } else {
10185
10575
  assert(sizeof(pm_local_variable_target_node_t) == sizeof(pm_local_variable_read_node_t));
10186
10576
  target->type = PM_LOCAL_VARIABLE_TARGET_NODE;
@@ -10238,10 +10628,7 @@ parse_target(pm_parser_t *parser, pm_node_t *target) {
10238
10628
  assert(sizeof(pm_local_variable_target_node_t) == sizeof(pm_local_variable_read_node_t));
10239
10629
  target->type = PM_LOCAL_VARIABLE_TARGET_NODE;
10240
10630
 
10241
- if (token_is_numbered_parameter(message.start, message.end)) {
10242
- pm_parser_err_location(parser, &message, PM_ERR_PARAMETER_NUMBERED_RESERVED);
10243
- }
10244
-
10631
+ pm_refute_numbered_parameter(parser, message.start, message.end);
10245
10632
  return target;
10246
10633
  }
10247
10634
 
@@ -10284,8 +10671,12 @@ static pm_node_t *
10284
10671
  parse_target_validate(pm_parser_t *parser, pm_node_t *target) {
10285
10672
  pm_node_t *result = parse_target(parser, target);
10286
10673
 
10287
- // Ensure that we have either an = or a ) after the targets.
10288
- if (!match3(parser, PM_TOKEN_EQUAL, PM_TOKEN_PARENTHESIS_RIGHT, PM_TOKEN_KEYWORD_IN)) {
10674
+ // Ensure that we have one of an =, an 'in' in for indexes, and a ')' in parens after the targets.
10675
+ if (
10676
+ !match1(parser, PM_TOKEN_EQUAL) &&
10677
+ !(context_p(parser, PM_CONTEXT_FOR_INDEX) && match1(parser, PM_TOKEN_KEYWORD_IN)) &&
10678
+ !(context_p(parser, PM_CONTEXT_PARENS) && match1(parser, PM_TOKEN_PARENTHESIS_RIGHT))
10679
+ ) {
10289
10680
  pm_parser_err_node(parser, result, PM_ERR_WRITE_TARGET_UNEXPECTED);
10290
10681
  }
10291
10682
 
@@ -10322,10 +10713,7 @@ parse_write(pm_parser_t *parser, pm_node_t *target, pm_token_t *operator, pm_nod
10322
10713
  return (pm_node_t *) node;
10323
10714
  }
10324
10715
  case PM_LOCAL_VARIABLE_READ_NODE: {
10325
- if (token_is_numbered_parameter(target->location.start, target->location.end)) {
10326
- pm_parser_err_node(parser, target, PM_ERR_PARAMETER_NUMBERED_RESERVED);
10327
- }
10328
-
10716
+ pm_refute_numbered_parameter(parser, target->location.start, target->location.end);
10329
10717
  pm_local_variable_read_node_t *local_read = (pm_local_variable_read_node_t *) target;
10330
10718
 
10331
10719
  pm_constant_id_t constant_id = local_read->name;
@@ -10387,10 +10775,7 @@ parse_write(pm_parser_t *parser, pm_node_t *target, pm_token_t *operator, pm_nod
10387
10775
  pm_constant_id_t constant_id = pm_parser_constant_id_location(parser, message.start, message.end);
10388
10776
  target = (pm_node_t *) pm_local_variable_write_node_create(parser, constant_id, 0, value, &message, operator);
10389
10777
 
10390
- if (token_is_numbered_parameter(message.start, message.end)) {
10391
- pm_parser_err_location(parser, &message, PM_ERR_PARAMETER_NUMBERED_RESERVED);
10392
- }
10393
-
10778
+ pm_refute_numbered_parameter(parser, message.start, message.end);
10394
10779
  return target;
10395
10780
  }
10396
10781
 
@@ -10616,7 +11001,7 @@ parse_assocs(pm_parser_t *parser, pm_node_t *node) {
10616
11001
  pm_node_t *value = NULL;
10617
11002
 
10618
11003
  if (token_begins_expression_p(parser->current.type)) {
10619
- value = parse_expression(parser, PM_BINDING_POWER_DEFINED, PM_ERR_EXPECT_EXPRESSION_AFTER_SPLAT_HASH);
11004
+ value = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, PM_ERR_EXPECT_EXPRESSION_AFTER_SPLAT_HASH);
10620
11005
  } else if (pm_parser_local_depth(parser, &operator) == -1) {
10621
11006
  pm_parser_err_token(parser, &operator, PM_ERR_EXPECT_EXPRESSION_AFTER_SPLAT_HASH);
10622
11007
  }
@@ -10634,7 +11019,7 @@ parse_assocs(pm_parser_t *parser, pm_node_t *node) {
10634
11019
  pm_node_t *value = NULL;
10635
11020
 
10636
11021
  if (token_begins_expression_p(parser->current.type)) {
10637
- value = parse_expression(parser, PM_BINDING_POWER_DEFINED, PM_ERR_HASH_EXPRESSION_AFTER_LABEL);
11022
+ value = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, PM_ERR_HASH_EXPRESSION_AFTER_LABEL);
10638
11023
  } else {
10639
11024
  if (parser->encoding.isupper_char(label.start, (label.end - 1) - label.start)) {
10640
11025
  pm_token_t constant = { .type = PM_TOKEN_CONSTANT, .start = label.start, .end = label.end - 1 };
@@ -10658,7 +11043,7 @@ parse_assocs(pm_parser_t *parser, pm_node_t *node) {
10658
11043
  break;
10659
11044
  }
10660
11045
  default: {
10661
- pm_node_t *key = parse_expression(parser, PM_BINDING_POWER_DEFINED, PM_ERR_HASH_KEY);
11046
+ pm_node_t *key = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, PM_ERR_HASH_KEY);
10662
11047
  pm_token_t operator;
10663
11048
 
10664
11049
  if (pm_symbol_node_label_p(key)) {
@@ -10668,7 +11053,7 @@ parse_assocs(pm_parser_t *parser, pm_node_t *node) {
10668
11053
  operator = parser->previous;
10669
11054
  }
10670
11055
 
10671
- pm_node_t *value = parse_expression(parser, PM_BINDING_POWER_DEFINED, PM_ERR_HASH_VALUE);
11056
+ pm_node_t *value = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, PM_ERR_HASH_VALUE);
10672
11057
  element = (pm_node_t *) pm_assoc_node_create(parser, key, &operator, value);
10673
11058
  break;
10674
11059
  }
@@ -10726,13 +11111,18 @@ parse_arguments(pm_parser_t *parser, pm_arguments_t *arguments, bool accepts_for
10726
11111
  return;
10727
11112
  }
10728
11113
 
11114
+ bool parsed_first_argument = false;
10729
11115
  bool parsed_bare_hash = false;
10730
11116
  bool parsed_block_argument = false;
11117
+ bool parsed_forwarding_arguments = false;
10731
11118
 
10732
11119
  while (!match1(parser, PM_TOKEN_EOF)) {
10733
11120
  if (parsed_block_argument) {
10734
11121
  pm_parser_err_current(parser, PM_ERR_ARGUMENT_AFTER_BLOCK);
10735
11122
  }
11123
+ if (parsed_forwarding_arguments) {
11124
+ pm_parser_err_current(parser, PM_ERR_ARGUMENT_AFTER_FORWARDING_ELLIPSES);
11125
+ }
10736
11126
 
10737
11127
  pm_node_t *argument = NULL;
10738
11128
 
@@ -10764,7 +11154,7 @@ parse_arguments(pm_parser_t *parser, pm_arguments_t *arguments, bool accepts_for
10764
11154
  pm_node_t *expression = NULL;
10765
11155
 
10766
11156
  if (token_begins_expression_p(parser->current.type)) {
10767
- expression = parse_expression(parser, PM_BINDING_POWER_DEFINED, PM_ERR_EXPECT_ARGUMENT);
11157
+ expression = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, PM_ERR_EXPECT_ARGUMENT);
10768
11158
  } else if (pm_parser_local_depth(parser, &operator) == -1) {
10769
11159
  pm_parser_err_token(parser, &operator, PM_ERR_ARGUMENT_NO_FORWARDING_AMP);
10770
11160
  }
@@ -10783,14 +11173,14 @@ parse_arguments(pm_parser_t *parser, pm_arguments_t *arguments, bool accepts_for
10783
11173
  parser_lex(parser);
10784
11174
  pm_token_t operator = parser->previous;
10785
11175
 
10786
- if (match2(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_TOKEN_COMMA)) {
11176
+ if (match3(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_TOKEN_COMMA, PM_TOKEN_SEMICOLON)) {
10787
11177
  if (pm_parser_local_depth(parser, &parser->previous) == -1) {
10788
11178
  pm_parser_err_token(parser, &operator, PM_ERR_ARGUMENT_NO_FORWARDING_STAR);
10789
11179
  }
10790
11180
 
10791
11181
  argument = (pm_node_t *) pm_splat_node_create(parser, &operator, NULL);
10792
11182
  } else {
10793
- pm_node_t *expression = parse_expression(parser, PM_BINDING_POWER_DEFINED, PM_ERR_EXPECT_EXPRESSION_AFTER_SPLAT);
11183
+ pm_node_t *expression = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, PM_ERR_EXPECT_EXPRESSION_AFTER_SPLAT);
10794
11184
 
10795
11185
  if (parsed_bare_hash) {
10796
11186
  pm_parser_err(parser, operator.start, expression->location.end, PM_ERR_ARGUMENT_SPLAT_AFTER_ASSOC_SPLAT);
@@ -10816,9 +11206,13 @@ parse_arguments(pm_parser_t *parser, pm_arguments_t *arguments, bool accepts_for
10816
11206
  if (pm_parser_local_depth(parser, &parser->previous) == -1) {
10817
11207
  pm_parser_err_previous(parser, PM_ERR_ARGUMENT_NO_FORWARDING_ELLIPSES);
10818
11208
  }
11209
+ if (parsed_first_argument && terminator == PM_TOKEN_EOF) {
11210
+ pm_parser_err_previous(parser, PM_ERR_ARGUMENT_FORWARDING_UNBOUND);
11211
+ }
10819
11212
 
10820
11213
  argument = (pm_node_t *) pm_forwarding_arguments_node_create(parser, &parser->previous);
10821
11214
  parse_arguments_append(parser, arguments, argument);
11215
+ parsed_forwarding_arguments = true;
10822
11216
  break;
10823
11217
  }
10824
11218
  }
@@ -10826,7 +11220,7 @@ parse_arguments(pm_parser_t *parser, pm_arguments_t *arguments, bool accepts_for
10826
11220
  /* fallthrough */
10827
11221
  default: {
10828
11222
  if (argument == NULL) {
10829
- argument = parse_expression(parser, PM_BINDING_POWER_DEFINED, PM_ERR_EXPECT_ARGUMENT);
11223
+ argument = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, PM_ERR_EXPECT_ARGUMENT);
10830
11224
  }
10831
11225
 
10832
11226
  bool contains_keyword_splat = false;
@@ -10845,7 +11239,7 @@ parse_arguments(pm_parser_t *parser, pm_arguments_t *arguments, bool accepts_for
10845
11239
  pm_keyword_hash_node_t *bare_hash = pm_keyword_hash_node_create(parser);
10846
11240
 
10847
11241
  // Finish parsing the one we are part way through
10848
- pm_node_t *value = parse_expression(parser, PM_BINDING_POWER_DEFINED, PM_ERR_HASH_VALUE);
11242
+ pm_node_t *value = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, PM_ERR_HASH_VALUE);
10849
11243
 
10850
11244
  argument = (pm_node_t *) pm_assoc_node_create(parser, argument, &operator, value);
10851
11245
  pm_keyword_hash_node_elements_append(bare_hash, argument);
@@ -10870,6 +11264,8 @@ parse_arguments(pm_parser_t *parser, pm_arguments_t *arguments, bool accepts_for
10870
11264
  }
10871
11265
  }
10872
11266
 
11267
+ parsed_first_argument = true;
11268
+
10873
11269
  // If parsing the argument failed, we need to stop parsing arguments.
10874
11270
  if (PM_NODE_TYPE_P(argument, PM_MISSING_NODE) || parser->recovering) break;
10875
11271
 
@@ -11029,7 +11425,7 @@ parse_parameters(
11029
11425
  pm_binding_power_t binding_power,
11030
11426
  bool uses_parentheses,
11031
11427
  bool allows_trailing_comma,
11032
- bool allows_forwarding_parameter
11428
+ bool allows_forwarding_parameters
11033
11429
  ) {
11034
11430
  pm_parameters_node_t *params = pm_parameters_node_create(parser);
11035
11431
  bool looping = true;
@@ -11064,7 +11460,10 @@ parse_parameters(
11064
11460
  pm_parser_local_add_token(parser, &name);
11065
11461
  } else {
11066
11462
  name = not_provided(parser);
11067
- pm_parser_local_add_token(parser, &operator);
11463
+
11464
+ if (allows_forwarding_parameters) {
11465
+ pm_parser_local_add_token(parser, &operator);
11466
+ }
11068
11467
  }
11069
11468
 
11070
11469
  pm_block_parameter_node_t *param = pm_block_parameter_node_create(parser, &name, &operator);
@@ -11078,7 +11477,7 @@ parse_parameters(
11078
11477
  break;
11079
11478
  }
11080
11479
  case PM_TOKEN_UDOT_DOT_DOT: {
11081
- if (!allows_forwarding_parameter) {
11480
+ if (!allows_forwarding_parameters) {
11082
11481
  pm_parser_err_current(parser, PM_ERR_ARGUMENT_NO_FORWARDING_ELLIPSES);
11083
11482
  }
11084
11483
 
@@ -11086,9 +11485,7 @@ parse_parameters(
11086
11485
  update_parameter_state(parser, &parser->current, &order);
11087
11486
  parser_lex(parser);
11088
11487
 
11089
- if (allows_forwarding_parameter) {
11090
- pm_parser_local_add_constant(parser, "*", 1);
11091
- pm_parser_local_add_constant(parser, "&", 1);
11488
+ if (allows_forwarding_parameters) {
11092
11489
  pm_parser_local_add_token(parser, &parser->previous);
11093
11490
  }
11094
11491
 
@@ -11148,7 +11545,7 @@ parse_parameters(
11148
11545
  if (accept1(parser, PM_TOKEN_EQUAL)) {
11149
11546
  pm_token_t operator = parser->previous;
11150
11547
  context_push(parser, PM_CONTEXT_DEFAULT_PARAMS);
11151
- pm_node_t *value = parse_expression(parser, binding_power, PM_ERR_PARAMETER_NO_DEFAULT);
11548
+ pm_node_t *value = parse_value_expression(parser, binding_power, PM_ERR_PARAMETER_NO_DEFAULT);
11152
11549
 
11153
11550
  pm_optional_parameter_node_t *param = pm_optional_parameter_node_create(parser, &name, &operator, value);
11154
11551
  pm_parameters_node_optionals_append(params, param);
@@ -11207,7 +11604,7 @@ parse_parameters(
11207
11604
 
11208
11605
  if (token_begins_expression_p(parser->current.type)) {
11209
11606
  context_push(parser, PM_CONTEXT_DEFAULT_PARAMS);
11210
- pm_node_t *value = parse_expression(parser, binding_power, PM_ERR_PARAMETER_NO_DEFAULT_KW);
11607
+ pm_node_t *value = parse_value_expression(parser, binding_power, PM_ERR_PARAMETER_NO_DEFAULT_KW);
11211
11608
  context_pop(parser);
11212
11609
  param = (pm_node_t *) pm_optional_keyword_parameter_node_create(parser, &name, value);
11213
11610
  }
@@ -11244,7 +11641,10 @@ parse_parameters(
11244
11641
  pm_parser_local_add_token(parser, &name);
11245
11642
  } else {
11246
11643
  name = not_provided(parser);
11247
- pm_parser_local_add_token(parser, &operator);
11644
+
11645
+ if (allows_forwarding_parameters) {
11646
+ pm_parser_local_add_token(parser, &operator);
11647
+ }
11248
11648
  }
11249
11649
 
11250
11650
  pm_rest_parameter_node_t *param = pm_rest_parameter_node_create(parser, &operator, &name);
@@ -11276,7 +11676,10 @@ parse_parameters(
11276
11676
  pm_parser_local_add_token(parser, &name);
11277
11677
  } else {
11278
11678
  name = not_provided(parser);
11279
- pm_parser_local_add_token(parser, &operator);
11679
+
11680
+ if (allows_forwarding_parameters) {
11681
+ pm_parser_local_add_token(parser, &operator);
11682
+ }
11280
11683
  }
11281
11684
 
11282
11685
  param = (pm_node_t *) pm_keyword_rest_parameter_node_create(parser, &operator, &name);
@@ -11622,6 +12025,13 @@ parse_arguments_list(pm_parser_t *parser, pm_arguments_t *arguments, bool accept
11622
12025
  // argument to this method call.
11623
12026
  parse_arguments(parser, arguments, true, PM_TOKEN_EOF);
11624
12027
 
12028
+ // If we have done with the arguments and still not consumed the comma,
12029
+ // then we have a trailing comma where we need to check whether it is
12030
+ // allowed or not.
12031
+ if (parser->previous.type == PM_TOKEN_COMMA && !match1(parser, PM_TOKEN_SEMICOLON)) {
12032
+ pm_parser_err_previous(parser, PM_ERR_EXPECT_ARGUMENT);
12033
+ }
12034
+
11625
12035
  pm_accepts_block_stack_pop(parser);
11626
12036
  }
11627
12037
 
@@ -11658,14 +12068,19 @@ parse_arguments_list(pm_parser_t *parser, pm_arguments_t *arguments, bool accept
11658
12068
  }
11659
12069
 
11660
12070
  static inline pm_node_t *
11661
- parse_predicate(pm_parser_t *parser, pm_binding_power_t binding_power, pm_context_t context) {
12071
+ parse_predicate(pm_parser_t *parser, pm_binding_power_t binding_power, pm_context_t context, pm_token_t *then_keyword) {
11662
12072
  context_push(parser, PM_CONTEXT_PREDICATE);
11663
12073
  pm_diagnostic_id_t error_id = context == PM_CONTEXT_IF ? PM_ERR_CONDITIONAL_IF_PREDICATE : PM_ERR_CONDITIONAL_UNLESS_PREDICATE;
11664
- pm_node_t *predicate = parse_expression(parser, binding_power, error_id);
12074
+ pm_node_t *predicate = parse_value_expression(parser, binding_power, error_id);
11665
12075
 
11666
12076
  // Predicates are closed by a term, a "then", or a term and then a "then".
11667
12077
  bool predicate_closed = accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
11668
- predicate_closed |= accept1(parser, PM_TOKEN_KEYWORD_THEN);
12078
+
12079
+ if (accept1(parser, PM_TOKEN_KEYWORD_THEN)) {
12080
+ predicate_closed = true;
12081
+ *then_keyword = parser->previous;
12082
+ }
12083
+
11669
12084
  if (!predicate_closed) {
11670
12085
  pm_parser_err_current(parser, PM_ERR_CONDITIONAL_PREDICATE_TERM);
11671
12086
  }
@@ -11677,7 +12092,9 @@ parse_predicate(pm_parser_t *parser, pm_binding_power_t binding_power, pm_contex
11677
12092
  static inline pm_node_t *
11678
12093
  parse_conditional(pm_parser_t *parser, pm_context_t context) {
11679
12094
  pm_token_t keyword = parser->previous;
11680
- pm_node_t *predicate = parse_predicate(parser, PM_BINDING_POWER_MODIFIER, context);
12095
+ pm_token_t then_keyword = not_provided(parser);
12096
+
12097
+ pm_node_t *predicate = parse_predicate(parser, PM_BINDING_POWER_MODIFIER, context, &then_keyword);
11681
12098
  pm_statements_node_t *statements = NULL;
11682
12099
 
11683
12100
  if (!match3(parser, PM_TOKEN_KEYWORD_ELSIF, PM_TOKEN_KEYWORD_ELSE, PM_TOKEN_KEYWORD_END)) {
@@ -11692,10 +12109,10 @@ parse_conditional(pm_parser_t *parser, pm_context_t context) {
11692
12109
 
11693
12110
  switch (context) {
11694
12111
  case PM_CONTEXT_IF:
11695
- parent = (pm_node_t *) pm_if_node_create(parser, &keyword, predicate, statements, NULL, &end_keyword);
12112
+ parent = (pm_node_t *) pm_if_node_create(parser, &keyword, predicate, &then_keyword, statements, NULL, &end_keyword);
11696
12113
  break;
11697
12114
  case PM_CONTEXT_UNLESS:
11698
- parent = (pm_node_t *) pm_unless_node_create(parser, &keyword, predicate, statements);
12115
+ parent = (pm_node_t *) pm_unless_node_create(parser, &keyword, predicate, &then_keyword, statements);
11699
12116
  break;
11700
12117
  default:
11701
12118
  assert(false && "unreachable");
@@ -11709,14 +12126,14 @@ parse_conditional(pm_parser_t *parser, pm_context_t context) {
11709
12126
  if (context == PM_CONTEXT_IF) {
11710
12127
  while (accept1(parser, PM_TOKEN_KEYWORD_ELSIF)) {
11711
12128
  pm_token_t elsif_keyword = parser->previous;
11712
- pm_node_t *predicate = parse_predicate(parser, PM_BINDING_POWER_MODIFIER, PM_CONTEXT_ELSIF);
12129
+ pm_node_t *predicate = parse_predicate(parser, PM_BINDING_POWER_MODIFIER, PM_CONTEXT_ELSIF, &then_keyword);
11713
12130
  pm_accepts_block_stack_push(parser, true);
11714
12131
  pm_statements_node_t *statements = parse_statements(parser, PM_CONTEXT_ELSIF);
11715
12132
  pm_accepts_block_stack_pop(parser);
11716
12133
 
11717
12134
  accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
11718
12135
 
11719
- pm_node_t *elsif = (pm_node_t *) pm_if_node_create(parser, &elsif_keyword, predicate, statements, NULL, &end_keyword);
12136
+ pm_node_t *elsif = (pm_node_t *) pm_if_node_create(parser, &elsif_keyword, predicate, &then_keyword, statements, NULL, &end_keyword);
11720
12137
  ((pm_if_node_t *) current)->consequent = elsif;
11721
12138
  current = elsif;
11722
12139
  }
@@ -12165,7 +12582,7 @@ parse_variable_call(pm_parser_t *parser) {
12165
12582
  return (pm_node_t *) pm_local_variable_read_node_create(parser, &parser->previous, (uint32_t) depth);
12166
12583
  }
12167
12584
 
12168
- if (!parser->current_scope->closed && token_is_numbered_parameter(parser->previous.start, parser->previous.end)) {
12585
+ if (!parser->current_scope->closed && pm_token_is_numbered_parameter(parser->previous.start, parser->previous.end)) {
12169
12586
  // Indicate that this scope is using numbered params so that child
12170
12587
  // scopes cannot.
12171
12588
  parser->current_scope->numbered_params = true;
@@ -12212,15 +12629,23 @@ parse_variable_call(pm_parser_t *parser) {
12212
12629
  return (pm_node_t *) node;
12213
12630
  }
12214
12631
 
12632
+ /**
12633
+ * Parse the method definition name based on the current token available on the
12634
+ * parser. If it does not match a valid method definition name, then a missing
12635
+ * token is returned.
12636
+ */
12215
12637
  static inline pm_token_t
12216
12638
  parse_method_definition_name(pm_parser_t *parser) {
12217
12639
  switch (parser->current.type) {
12218
12640
  case PM_CASE_KEYWORD:
12219
12641
  case PM_TOKEN_CONSTANT:
12220
- case PM_TOKEN_IDENTIFIER:
12221
12642
  case PM_TOKEN_METHOD_NAME:
12222
12643
  parser_lex(parser);
12223
12644
  return parser->previous;
12645
+ case PM_TOKEN_IDENTIFIER:
12646
+ pm_refute_numbered_parameter(parser, parser->current.start, parser->current.end);
12647
+ parser_lex(parser);
12648
+ return parser->previous;
12224
12649
  case PM_CASE_OPERATOR:
12225
12650
  lex_state_set(parser, PM_LEX_STATE_ENDFN);
12226
12651
  parser_lex(parser);
@@ -12781,7 +13206,7 @@ parse_pattern_primitive(pm_parser_t *parser, pm_diagnostic_id_t diag_id) {
12781
13206
  pm_token_t lparen = parser->current;
12782
13207
  parser_lex(parser);
12783
13208
 
12784
- pm_node_t *expression = parse_expression(parser, PM_BINDING_POWER_STATEMENT, PM_ERR_PATTERN_EXPRESSION_AFTER_PIN);
13209
+ pm_node_t *expression = parse_value_expression(parser, PM_BINDING_POWER_STATEMENT, PM_ERR_PATTERN_EXPRESSION_AFTER_PIN);
12785
13210
  parser->pattern_matching_newlines = previous_pattern_matching_newlines;
12786
13211
 
12787
13212
  accept1(parser, PM_TOKEN_NEWLINE);
@@ -13024,9 +13449,10 @@ parse_strings_empty_content(const uint8_t *location) {
13024
13449
  * Parse a set of strings that could be concatenated together.
13025
13450
  */
13026
13451
  static inline pm_node_t *
13027
- parse_strings(pm_parser_t *parser) {
13452
+ parse_strings(pm_parser_t *parser, pm_node_t *current) {
13028
13453
  assert(parser->current.type == PM_TOKEN_STRING_BEGIN);
13029
- pm_node_t *result = NULL;
13454
+
13455
+ bool concating = false;
13030
13456
  bool state_is_arg_labeled = lex_state_p(parser, PM_LEX_STATE_ARG | PM_LEX_STATE_LABELED);
13031
13457
 
13032
13458
  while (match1(parser, PM_TOKEN_STRING_BEGIN)) {
@@ -13162,7 +13588,7 @@ parse_strings(pm_parser_t *parser) {
13162
13588
  }
13163
13589
  }
13164
13590
 
13165
- if (result == NULL) {
13591
+ if (current == NULL) {
13166
13592
  // If the node we just parsed is a symbol node, then we can't
13167
13593
  // concatenate it with anything else, so we can now return that
13168
13594
  // node.
@@ -13172,7 +13598,7 @@ parse_strings(pm_parser_t *parser) {
13172
13598
 
13173
13599
  // If we don't already have a node, then it's fine and we can just
13174
13600
  // set the result to be the node we just parsed.
13175
- result = node;
13601
+ current = node;
13176
13602
  } else {
13177
13603
  // Otherwise we need to check the type of the node we just parsed.
13178
13604
  // If it cannot be concatenated with the previous node, then we'll
@@ -13181,13 +13607,22 @@ parse_strings(pm_parser_t *parser) {
13181
13607
  pm_parser_err_node(parser, node, PM_ERR_STRING_CONCATENATION);
13182
13608
  }
13183
13609
 
13184
- // Either way we will create a concat node to hold the strings
13185
- // together.
13186
- result = (pm_node_t *) pm_string_concat_node_create(parser, result, node);
13610
+ // If we haven't already created our container for concatenation,
13611
+ // we'll do that now.
13612
+ if (!concating) {
13613
+ concating = true;
13614
+ pm_token_t bounds = not_provided(parser);
13615
+
13616
+ pm_interpolated_string_node_t *container = pm_interpolated_string_node_create(parser, &bounds, NULL, &bounds);
13617
+ pm_interpolated_string_node_append(container, current);
13618
+ current = (pm_node_t *) container;
13619
+ }
13620
+
13621
+ pm_interpolated_string_node_append((pm_interpolated_string_node_t *) current, node);
13187
13622
  }
13188
13623
  }
13189
13624
 
13190
- return result;
13625
+ return current;
13191
13626
  }
13192
13627
 
13193
13628
  /**
@@ -13304,7 +13739,9 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power) {
13304
13739
  // Otherwise, we're going to parse the first statement in the list
13305
13740
  // of statements within the parentheses.
13306
13741
  pm_accepts_block_stack_push(parser, true);
13742
+ context_push(parser, PM_CONTEXT_PARENS);
13307
13743
  pm_node_t *statement = parse_expression(parser, PM_BINDING_POWER_STATEMENT, PM_ERR_CANNOT_PARSE_EXPRESSION);
13744
+ context_pop(parser);
13308
13745
 
13309
13746
  // Determine if this statement is followed by a terminator. In the
13310
13747
  // case of a single statement, this is fine. But in the case of
@@ -13446,8 +13883,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power) {
13446
13883
  // Characters can be followed by strings in which case they are
13447
13884
  // automatically concatenated.
13448
13885
  if (match1(parser, PM_TOKEN_STRING_BEGIN)) {
13449
- pm_node_t *concat = parse_strings(parser);
13450
- return (pm_node_t *) pm_string_concat_node_create(parser, node, concat);
13886
+ return parse_strings(parser, node);
13451
13887
  }
13452
13888
 
13453
13889
  return node;
@@ -13661,7 +14097,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power) {
13661
14097
  cast->base.type = PM_X_STRING_NODE;
13662
14098
  }
13663
14099
 
13664
- size_t common_whitespace = lex_mode->as.heredoc.common_whitespace;
14100
+ size_t common_whitespace = parser->current_string_common_whitespace;
13665
14101
  if (indent == PM_HEREDOC_INDENT_TILDE && (common_whitespace != (size_t) -1) && (common_whitespace != 0)) {
13666
14102
  parse_heredoc_dedent_string(&cast->unescaped, common_whitespace);
13667
14103
  }
@@ -13707,7 +14143,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power) {
13707
14143
 
13708
14144
  // If this is a heredoc that is indented with a ~, then we need
13709
14145
  // to dedent each line by the common leading whitespace.
13710
- size_t common_whitespace = lex_mode->as.heredoc.common_whitespace;
14146
+ size_t common_whitespace = parser->current_string_common_whitespace;
13711
14147
  if (indent == PM_HEREDOC_INDENT_TILDE && (common_whitespace != (size_t) -1) && (common_whitespace != 0)) {
13712
14148
  pm_node_list_t *nodes;
13713
14149
  if (quote == PM_HEREDOC_QUOTE_BACKTICK) {
@@ -13721,8 +14157,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power) {
13721
14157
  }
13722
14158
 
13723
14159
  if (match1(parser, PM_TOKEN_STRING_BEGIN)) {
13724
- pm_node_t *concat = parse_strings(parser);
13725
- return (pm_node_t *) pm_string_concat_node_create(parser, node, concat);
14160
+ return parse_strings(parser, node);
13726
14161
  }
13727
14162
 
13728
14163
  return node;
@@ -13811,21 +14246,23 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power) {
13811
14246
  } else if (!token_begins_expression_p(parser->current.type)) {
13812
14247
  predicate = NULL;
13813
14248
  } else {
13814
- predicate = parse_expression(parser, PM_BINDING_POWER_COMPOSITION, PM_ERR_CASE_EXPRESSION_AFTER_CASE);
14249
+ predicate = parse_value_expression(parser, PM_BINDING_POWER_COMPOSITION, PM_ERR_CASE_EXPRESSION_AFTER_CASE);
13815
14250
  while (accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON));
13816
14251
  }
13817
14252
 
13818
14253
  if (accept1(parser, PM_TOKEN_KEYWORD_END)) {
13819
14254
  pm_parser_err_token(parser, &case_keyword, PM_ERR_CASE_MISSING_CONDITIONS);
13820
- return (pm_node_t *) pm_case_node_create(parser, &case_keyword, predicate, NULL, &parser->previous);
14255
+ return (pm_node_t *) pm_case_node_create(parser, &case_keyword, predicate, &parser->previous);
13821
14256
  }
13822
14257
 
13823
14258
  // At this point we can create a case node, though we don't yet know if it
13824
14259
  // is a case-in or case-when node.
13825
14260
  pm_token_t end_keyword = not_provided(parser);
13826
- pm_case_node_t *case_node = pm_case_node_create(parser, &case_keyword, predicate, NULL, &end_keyword);
14261
+ pm_node_t *node;
13827
14262
 
13828
14263
  if (match1(parser, PM_TOKEN_KEYWORD_WHEN)) {
14264
+ pm_case_node_t *case_node = pm_case_node_create(parser, &case_keyword, predicate, &end_keyword);
14265
+
13829
14266
  // At this point we've seen a when keyword, so we know this is a
13830
14267
  // case-when node. We will continue to parse the when nodes until we hit
13831
14268
  // the end of the list.
@@ -13836,14 +14273,14 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power) {
13836
14273
  do {
13837
14274
  if (accept1(parser, PM_TOKEN_USTAR)) {
13838
14275
  pm_token_t operator = parser->previous;
13839
- pm_node_t *expression = parse_expression(parser, PM_BINDING_POWER_DEFINED, PM_ERR_EXPECT_EXPRESSION_AFTER_STAR);
14276
+ pm_node_t *expression = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, PM_ERR_EXPECT_EXPRESSION_AFTER_STAR);
13840
14277
 
13841
14278
  pm_splat_node_t *splat_node = pm_splat_node_create(parser, &operator, expression);
13842
14279
  pm_when_node_conditions_append(when_node, (pm_node_t *) splat_node);
13843
14280
 
13844
14281
  if (PM_NODE_TYPE_P(expression, PM_MISSING_NODE)) break;
13845
14282
  } else {
13846
- pm_node_t *condition = parse_expression(parser, PM_BINDING_POWER_DEFINED, PM_ERR_CASE_EXPRESSION_AFTER_WHEN);
14283
+ pm_node_t *condition = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, PM_ERR_CASE_EXPRESSION_AFTER_WHEN);
13847
14284
  pm_when_node_conditions_append(when_node, condition);
13848
14285
 
13849
14286
  if (PM_NODE_TYPE_P(condition, PM_MISSING_NODE)) break;
@@ -13865,7 +14302,23 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power) {
13865
14302
 
13866
14303
  pm_case_node_condition_append(case_node, (pm_node_t *) when_node);
13867
14304
  }
14305
+
14306
+ // If we didn't parse any conditions (in or when) then we need
14307
+ // to indicate that we have an error.
14308
+ if (case_node->conditions.size == 0) {
14309
+ pm_parser_err_token(parser, &case_keyword, PM_ERR_CASE_MISSING_CONDITIONS);
14310
+ }
14311
+
14312
+ node = (pm_node_t *) case_node;
13868
14313
  } else {
14314
+ pm_case_match_node_t *case_node = pm_case_match_node_create(parser, &case_keyword, predicate, &end_keyword);
14315
+
14316
+ // If this is a case-match node (i.e., it is a pattern matching
14317
+ // case statement) then we must have a predicate.
14318
+ if (predicate == NULL) {
14319
+ pm_parser_err_token(parser, &case_keyword, PM_ERR_CASE_MATCH_MISSING_PREDICATE);
14320
+ }
14321
+
13869
14322
  // At this point we expect that we're parsing a case-in node. We will
13870
14323
  // continue to parse the in nodes until we hit the end of the list.
13871
14324
  while (match1(parser, PM_TOKEN_KEYWORD_IN)) {
@@ -13884,11 +14337,11 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power) {
13884
14337
  // for guard clauses in the form of `if` or `unless` statements.
13885
14338
  if (accept1(parser, PM_TOKEN_KEYWORD_IF_MODIFIER)) {
13886
14339
  pm_token_t keyword = parser->previous;
13887
- pm_node_t *predicate = parse_expression(parser, PM_BINDING_POWER_DEFINED, PM_ERR_CONDITIONAL_IF_PREDICATE);
14340
+ pm_node_t *predicate = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, PM_ERR_CONDITIONAL_IF_PREDICATE);
13888
14341
  pattern = (pm_node_t *) pm_if_node_modifier_create(parser, pattern, &keyword, predicate);
13889
14342
  } else if (accept1(parser, PM_TOKEN_KEYWORD_UNLESS_MODIFIER)) {
13890
14343
  pm_token_t keyword = parser->previous;
13891
- pm_node_t *predicate = parse_expression(parser, PM_BINDING_POWER_DEFINED, PM_ERR_CONDITIONAL_UNLESS_PREDICATE);
14344
+ pm_node_t *predicate = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, PM_ERR_CONDITIONAL_UNLESS_PREDICATE);
13892
14345
  pattern = (pm_node_t *) pm_unless_node_modifier_create(parser, pattern, &keyword, predicate);
13893
14346
  }
13894
14347
 
@@ -13919,14 +14372,16 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power) {
13919
14372
  // Now that we have the full pattern and statements, we can create the
13920
14373
  // node and attach it to the case node.
13921
14374
  pm_node_t *condition = (pm_node_t *) pm_in_node_create(parser, pattern, statements, &in_keyword, &then_keyword);
13922
- pm_case_node_condition_append(case_node, condition);
14375
+ pm_case_match_node_condition_append(case_node, condition);
13923
14376
  }
13924
- }
13925
14377
 
13926
- // If we didn't parse any conditions (in or when) then we need to
13927
- // indicate that we have an error.
13928
- if (case_node->conditions.size == 0) {
13929
- pm_parser_err_token(parser, &case_keyword, PM_ERR_CASE_MISSING_CONDITIONS);
14378
+ // If we didn't parse any conditions (in or when) then we need
14379
+ // to indicate that we have an error.
14380
+ if (case_node->conditions.size == 0) {
14381
+ pm_parser_err_token(parser, &case_keyword, PM_ERR_CASE_MISSING_CONDITIONS);
14382
+ }
14383
+
14384
+ node = (pm_node_t *) case_node;
13930
14385
  }
13931
14386
 
13932
14387
  accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
@@ -13940,12 +14395,21 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power) {
13940
14395
  else_node = pm_else_node_create(parser, &else_keyword, NULL, &parser->current);
13941
14396
  }
13942
14397
 
13943
- pm_case_node_consequent_set(case_node, else_node);
14398
+ if (PM_NODE_TYPE_P(node, PM_CASE_NODE)) {
14399
+ pm_case_node_consequent_set((pm_case_node_t *) node, else_node);
14400
+ } else {
14401
+ pm_case_match_node_consequent_set((pm_case_match_node_t *) node, else_node);
14402
+ }
13944
14403
  }
13945
14404
 
13946
14405
  expect1(parser, PM_TOKEN_KEYWORD_END, PM_ERR_CASE_TERM);
13947
- pm_case_node_end_keyword_loc_set(case_node, &parser->previous);
13948
- return (pm_node_t *) case_node;
14406
+ if (PM_NODE_TYPE_P(node, PM_CASE_NODE)) {
14407
+ pm_case_node_end_keyword_loc_set((pm_case_node_t *) node, &parser->previous);
14408
+ } else {
14409
+ pm_case_match_node_end_keyword_loc_set((pm_case_match_node_t *) node, &parser->previous);
14410
+ }
14411
+
14412
+ return node;
13949
14413
  }
13950
14414
  case PM_TOKEN_KEYWORD_BEGIN: {
13951
14415
  parser_lex(parser);
@@ -14101,7 +14565,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power) {
14101
14565
  parser->command_start = true;
14102
14566
  parser_lex(parser);
14103
14567
 
14104
- superclass = parse_expression(parser, PM_BINDING_POWER_COMPOSITION, PM_ERR_CLASS_SUPERCLASS);
14568
+ superclass = parse_value_expression(parser, PM_BINDING_POWER_COMPOSITION, PM_ERR_CLASS_SUPERCLASS);
14105
14569
  } else {
14106
14570
  inheritance_operator = not_provided(parser);
14107
14571
  superclass = NULL;
@@ -14172,6 +14636,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power) {
14172
14636
  operator = parser->previous;
14173
14637
  name = parse_method_definition_name(parser);
14174
14638
  } else {
14639
+ pm_refute_numbered_parameter(parser, parser->previous.start, parser->previous.end);
14175
14640
  pm_parser_scope_push(parser, true);
14176
14641
  name = parser->previous;
14177
14642
  }
@@ -14245,7 +14710,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power) {
14245
14710
  case PM_TOKEN_PARENTHESIS_LEFT: {
14246
14711
  parser_lex(parser);
14247
14712
  pm_token_t lparen = parser->previous;
14248
- pm_node_t *expression = parse_expression(parser, PM_BINDING_POWER_STATEMENT, PM_ERR_DEF_RECEIVER);
14713
+ pm_node_t *expression = parse_value_expression(parser, PM_BINDING_POWER_STATEMENT, PM_ERR_DEF_RECEIVER);
14249
14714
 
14250
14715
  expect1(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_ERR_EXPECT_RPAREN);
14251
14716
  pm_token_t rparen = parser->previous;
@@ -14326,6 +14791,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power) {
14326
14791
  equal = parser->previous;
14327
14792
 
14328
14793
  context_push(parser, PM_CONTEXT_DEF);
14794
+ pm_do_loop_stack_push(parser, false);
14329
14795
  statements = (pm_node_t *) pm_statements_node_create(parser);
14330
14796
 
14331
14797
  pm_node_t *statement = parse_expression(parser, PM_BINDING_POWER_DEFINED + 1, PM_ERR_DEF_ENDLESS);
@@ -14338,6 +14804,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power) {
14338
14804
  }
14339
14805
 
14340
14806
  pm_statements_node_body_append((pm_statements_node_t *) statements, statement);
14807
+ pm_do_loop_stack_pop(parser);
14341
14808
  context_pop(parser);
14342
14809
  end_keyword = not_provided(parser);
14343
14810
  } else {
@@ -14425,6 +14892,10 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power) {
14425
14892
  parser_lex(parser);
14426
14893
  pm_token_t keyword = parser->previous;
14427
14894
 
14895
+ if (context_def_p(parser)) {
14896
+ pm_parser_warn_token(parser, &keyword, PM_WARN_END_IN_METHOD);
14897
+ }
14898
+
14428
14899
  expect1(parser, PM_TOKEN_BRACE_LEFT, PM_ERR_END_UPCASE_BRACE);
14429
14900
  pm_token_t opening = parser->previous;
14430
14901
  pm_statements_node_t *statements = parse_statements(parser, PM_CONTEXT_POSTEXE);
@@ -14474,7 +14945,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power) {
14474
14945
  expect1(parser, PM_TOKEN_KEYWORD_IN, PM_ERR_FOR_IN);
14475
14946
  pm_token_t in_keyword = parser->previous;
14476
14947
 
14477
- pm_node_t *collection = parse_expression(parser, PM_BINDING_POWER_COMPOSITION, PM_ERR_FOR_COLLECTION);
14948
+ pm_node_t *collection = parse_value_expression(parser, PM_BINDING_POWER_COMPOSITION, PM_ERR_FOR_COLLECTION);
14478
14949
  pm_do_loop_stack_pop(parser);
14479
14950
 
14480
14951
  pm_token_t do_keyword;
@@ -14636,7 +15107,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power) {
14636
15107
  parser_lex(parser);
14637
15108
  pm_token_t keyword = parser->previous;
14638
15109
 
14639
- pm_node_t *predicate = parse_expression(parser, PM_BINDING_POWER_COMPOSITION, PM_ERR_CONDITIONAL_UNTIL_PREDICATE);
15110
+ pm_node_t *predicate = parse_value_expression(parser, PM_BINDING_POWER_COMPOSITION, PM_ERR_CONDITIONAL_UNTIL_PREDICATE);
14640
15111
  pm_do_loop_stack_pop(parser);
14641
15112
 
14642
15113
  expect3(parser, PM_TOKEN_KEYWORD_DO_LOOP, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON, PM_ERR_CONDITIONAL_UNTIL_PREDICATE);
@@ -14657,7 +15128,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power) {
14657
15128
  parser_lex(parser);
14658
15129
  pm_token_t keyword = parser->previous;
14659
15130
 
14660
- pm_node_t *predicate = parse_expression(parser, PM_BINDING_POWER_COMPOSITION, PM_ERR_CONDITIONAL_WHILE_PREDICATE);
15131
+ pm_node_t *predicate = parse_value_expression(parser, PM_BINDING_POWER_COMPOSITION, PM_ERR_CONDITIONAL_WHILE_PREDICATE);
14661
15132
  pm_do_loop_stack_pop(parser);
14662
15133
 
14663
15134
  expect3(parser, PM_TOKEN_KEYWORD_DO_LOOP, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON, PM_ERR_CONDITIONAL_WHILE_PREDICATE);
@@ -15293,7 +15764,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power) {
15293
15764
  return (pm_node_t *) node;
15294
15765
  }
15295
15766
  case PM_TOKEN_STRING_BEGIN:
15296
- return parse_strings(parser);
15767
+ return parse_strings(parser, NULL);
15297
15768
  case PM_TOKEN_SYMBOL_BEGIN: {
15298
15769
  pm_lex_mode_t lex_mode = *parser->lex_modes.current;
15299
15770
  parser_lex(parser);
@@ -15310,10 +15781,29 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power) {
15310
15781
  }
15311
15782
 
15312
15783
  static inline pm_node_t *
15313
- parse_assignment_value(pm_parser_t *parser, pm_binding_power_t previous_binding_power, pm_binding_power_t binding_power, pm_diagnostic_id_t diag_id) {
15784
+ parse_assignment_value(pm_parser_t *parser, pm_binding_power_t binding_power, pm_diagnostic_id_t diag_id) {
15785
+ pm_node_t *value = parse_value_expression(parser, binding_power, diag_id);
15786
+
15787
+ // Contradicting binding powers, the right-hand-side value of rthe assignment allows the `rescue` modifier.
15788
+ if (match1(parser, PM_TOKEN_KEYWORD_RESCUE_MODIFIER)) {
15789
+ pm_token_t rescue = parser->current;
15790
+ parser_lex(parser);
15791
+ pm_node_t *right = parse_expression(parser, binding_power, PM_ERR_RESCUE_MODIFIER_VALUE);
15792
+
15793
+ return (pm_node_t *) pm_rescue_modifier_node_create(parser, value, &rescue, right);
15794
+ }
15795
+
15796
+ return value;
15797
+ }
15798
+
15799
+
15800
+ static inline pm_node_t *
15801
+ parse_assignment_values(pm_parser_t *parser, pm_binding_power_t previous_binding_power, pm_binding_power_t binding_power, pm_diagnostic_id_t diag_id) {
15314
15802
  pm_node_t *value = parse_starred_expression(parser, binding_power, diag_id);
15315
15803
 
15804
+ bool is_single_value = true;
15316
15805
  if (previous_binding_power == PM_BINDING_POWER_STATEMENT && (PM_NODE_TYPE_P(value, PM_SPLAT_NODE) || match1(parser, PM_TOKEN_COMMA))) {
15806
+ is_single_value = false;
15317
15807
  pm_token_t opening = not_provided(parser);
15318
15808
  pm_array_node_t *array = pm_array_node_create(parser, &opening);
15319
15809
 
@@ -15327,6 +15817,15 @@ parse_assignment_value(pm_parser_t *parser, pm_binding_power_t previous_binding_
15327
15817
  }
15328
15818
  }
15329
15819
 
15820
+ // Contradicting binding powers, the right-hand-side value of the assignment allows the `rescue` modifier.
15821
+ if (is_single_value && match1(parser, PM_TOKEN_KEYWORD_RESCUE_MODIFIER)) {
15822
+ pm_token_t rescue = parser->current;
15823
+ parser_lex(parser);
15824
+ pm_node_t *right = parse_expression(parser, binding_power, PM_ERR_RESCUE_MODIFIER_VALUE);
15825
+
15826
+ return (pm_node_t *) pm_rescue_modifier_node_create(parser, value, &rescue, right);
15827
+ }
15828
+
15330
15829
  return value;
15331
15830
  }
15332
15831
 
@@ -15352,6 +15851,25 @@ parse_call_operator_write(pm_parser_t *parser, pm_call_node_t *call_node, const
15352
15851
  }
15353
15852
  }
15354
15853
 
15854
+ static bool
15855
+ name_is_identifier(pm_parser_t *parser, const uint8_t *source, size_t length) {
15856
+ if (length == 0) {
15857
+ return false;
15858
+ }
15859
+
15860
+ size_t width = char_is_identifier_start(parser, source);
15861
+ if (!width) {
15862
+ return false;
15863
+ }
15864
+
15865
+ uint8_t *cursor = ((uint8_t *)source) + width;
15866
+ while (cursor < source + length && (width = char_is_identifier(parser, cursor))) {
15867
+ cursor += width;
15868
+ }
15869
+
15870
+ return cursor == source + length;
15871
+ }
15872
+
15355
15873
  /**
15356
15874
  * Potentially change a =~ with a regular expression with named captures into a
15357
15875
  * match write node.
@@ -15362,42 +15880,77 @@ parse_regular_expression_named_captures(pm_parser_t *parser, const pm_string_t *
15362
15880
  pm_node_t *result;
15363
15881
 
15364
15882
  if (pm_regexp_named_capture_group_names(pm_string_source(content), pm_string_length(content), &named_captures, parser->encoding_changed, &parser->encoding) && (named_captures.length > 0)) {
15365
- pm_match_write_node_t *match = pm_match_write_node_create(parser, call);
15883
+ // Since we should not create a MatchWriteNode when all capture names
15884
+ // are invalid, creating a MatchWriteNode is delayed here.
15885
+ pm_match_write_node_t *match = NULL;
15886
+ pm_constant_id_list_t names = { 0 };
15366
15887
 
15367
15888
  for (size_t index = 0; index < named_captures.length; index++) {
15368
- pm_string_t *name = &named_captures.strings[index];
15889
+ pm_string_t *string = &named_captures.strings[index];
15890
+
15891
+ const uint8_t *source = pm_string_source(string);
15892
+ size_t length = pm_string_length(string);
15369
15893
 
15370
- const uint8_t *source = pm_string_source(name);
15371
- size_t length = pm_string_length(name);
15894
+ pm_location_t location;
15895
+ pm_constant_id_t name;
15896
+
15897
+ // If the name of the capture group isn't a valid identifier, we do
15898
+ // not add it to the local table.
15899
+ if (!name_is_identifier(parser, source, length)) continue;
15372
15900
 
15373
- pm_constant_id_t local;
15374
15901
  if (content->type == PM_STRING_SHARED) {
15375
15902
  // If the unescaped string is a slice of the source, then we can
15376
15903
  // copy the names directly. The pointers will line up.
15377
- local = pm_parser_local_add_location(parser, source, source + length);
15378
-
15379
- if (token_is_numbered_parameter(source, source + length)) {
15380
- pm_parser_err(parser, source, source + length, PM_ERR_PARAMETER_NUMBERED_RESERVED);
15381
- }
15904
+ location = (pm_location_t) { .start = source, .end = source + length };
15905
+ name = pm_parser_constant_id_location(parser, location.start, location.end);
15906
+ pm_refute_numbered_parameter(parser, source, source + length);
15382
15907
  } else {
15383
15908
  // Otherwise, the name is a slice of the malloc-ed owned string,
15384
15909
  // in which case we need to copy it out into a new string.
15910
+ location = call->receiver->location;
15911
+
15385
15912
  void *memory = malloc(length);
15386
15913
  if (memory == NULL) abort();
15387
15914
 
15388
15915
  memcpy(memory, source, length);
15389
- local = pm_parser_local_add_owned(parser, (const uint8_t *) memory, length);
15916
+ name = pm_parser_constant_id_owned(parser, (const uint8_t *) memory, length);
15390
15917
 
15391
- if (token_is_numbered_parameter(source, source + length)) {
15918
+ if (pm_token_is_numbered_parameter(source, source + length)) {
15392
15919
  const pm_location_t *location = &call->receiver->location;
15393
- pm_parser_err_location(parser, location, PM_ERR_PARAMETER_NUMBERED_RESERVED);
15920
+ PM_PARSER_ERR_LOCATION_FORMAT(parser, location, PM_ERR_PARAMETER_NUMBERED_RESERVED, location->start);
15394
15921
  }
15395
15922
  }
15396
15923
 
15397
- pm_constant_id_list_append(&match->locals, local);
15924
+ if (name != 0) {
15925
+ // We dont want to create duplicate targets if the capture name
15926
+ // is duplicated.
15927
+ if (pm_constant_id_list_includes(&names, name)) continue;
15928
+ pm_constant_id_list_append(&names, name);
15929
+
15930
+ // Here we lazily create the MatchWriteNode since we know we're
15931
+ // about to add a target.
15932
+ if (match == NULL) match = pm_match_write_node_create(parser, call);
15933
+
15934
+ // First, find the depth of the local that is being assigned.
15935
+ int depth;
15936
+ if ((depth = pm_parser_local_depth_constant_id(parser, name)) == -1) {
15937
+ pm_parser_local_add(parser, name);
15938
+ }
15939
+
15940
+ // Next, create the local variable target and add it to the
15941
+ // list of targets for the match.
15942
+ pm_node_t *target = (pm_node_t *) pm_local_variable_target_node_create_values(parser, &location, name, depth == -1 ? 0 : (uint32_t) depth);
15943
+ pm_node_list_append(&match->targets, target);
15944
+ }
15945
+ }
15946
+
15947
+ if (match != NULL) {
15948
+ result = (pm_node_t *) match;
15949
+ } else {
15950
+ result = (pm_node_t *) call;
15398
15951
  }
15399
15952
 
15400
- result = (pm_node_t *) match;
15953
+ pm_constant_id_list_free(&names);
15401
15954
  } else {
15402
15955
  result = (pm_node_t *) call;
15403
15956
  }
@@ -15426,7 +15979,7 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
15426
15979
  /* fallthrough */
15427
15980
  case PM_CASE_WRITABLE: {
15428
15981
  parser_lex(parser);
15429
- pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, PM_ERR_EXPECT_EXPRESSION_AFTER_EQUAL);
15982
+ pm_node_t *value = parse_assignment_values(parser, previous_binding_power, binding_power, PM_ERR_EXPECT_EXPRESSION_AFTER_EQUAL);
15430
15983
  return parse_write(parser, node, &token, value);
15431
15984
  }
15432
15985
  case PM_SPLAT_NODE: {
@@ -15434,7 +15987,7 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
15434
15987
  pm_multi_target_node_targets_append(parser, multi_target, node);
15435
15988
 
15436
15989
  parser_lex(parser);
15437
- pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, PM_ERR_EXPECT_EXPRESSION_AFTER_EQUAL);
15990
+ pm_node_t *value = parse_assignment_values(parser, previous_binding_power, binding_power, PM_ERR_EXPECT_EXPRESSION_AFTER_EQUAL);
15438
15991
  return parse_write(parser, (pm_node_t *) multi_target, &token, value);
15439
15992
  }
15440
15993
  default:
@@ -15456,7 +16009,7 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
15456
16009
  case PM_GLOBAL_VARIABLE_READ_NODE: {
15457
16010
  parser_lex(parser);
15458
16011
 
15459
- pm_node_t *value = parse_expression(parser, binding_power, PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ);
16012
+ pm_node_t *value = parse_assignment_value(parser, binding_power, PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ);
15460
16013
  pm_node_t *result = (pm_node_t *) pm_global_variable_and_write_node_create(parser, node, &token, value);
15461
16014
 
15462
16015
  pm_node_destroy(parser, node);
@@ -15465,7 +16018,7 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
15465
16018
  case PM_CLASS_VARIABLE_READ_NODE: {
15466
16019
  parser_lex(parser);
15467
16020
 
15468
- pm_node_t *value = parse_expression(parser, binding_power, PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ);
16021
+ pm_node_t *value = parse_assignment_value(parser, binding_power, PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ);
15469
16022
  pm_node_t *result = (pm_node_t *) pm_class_variable_and_write_node_create(parser, (pm_class_variable_read_node_t *) node, &token, value);
15470
16023
 
15471
16024
  pm_node_destroy(parser, node);
@@ -15474,13 +16027,13 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
15474
16027
  case PM_CONSTANT_PATH_NODE: {
15475
16028
  parser_lex(parser);
15476
16029
 
15477
- pm_node_t *value = parse_expression(parser, binding_power, PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ);
16030
+ pm_node_t *value = parse_assignment_value(parser, binding_power, PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ);
15478
16031
  return (pm_node_t *) pm_constant_path_and_write_node_create(parser, (pm_constant_path_node_t *) node, &token, value);
15479
16032
  }
15480
16033
  case PM_CONSTANT_READ_NODE: {
15481
16034
  parser_lex(parser);
15482
16035
 
15483
- pm_node_t *value = parse_expression(parser, binding_power, PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ);
16036
+ pm_node_t *value = parse_assignment_value(parser, binding_power, PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ);
15484
16037
  pm_node_t *result = (pm_node_t *) pm_constant_and_write_node_create(parser, (pm_constant_read_node_t *) node, &token, value);
15485
16038
 
15486
16039
  pm_node_destroy(parser, node);
@@ -15489,7 +16042,7 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
15489
16042
  case PM_INSTANCE_VARIABLE_READ_NODE: {
15490
16043
  parser_lex(parser);
15491
16044
 
15492
- pm_node_t *value = parse_expression(parser, binding_power, PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ);
16045
+ pm_node_t *value = parse_assignment_value(parser, binding_power, PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ);
15493
16046
  pm_node_t *result = (pm_node_t *) pm_instance_variable_and_write_node_create(parser, (pm_instance_variable_read_node_t *) node, &token, value);
15494
16047
 
15495
16048
  pm_node_destroy(parser, node);
@@ -15499,7 +16052,7 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
15499
16052
  pm_local_variable_read_node_t *cast = (pm_local_variable_read_node_t *) node;
15500
16053
  parser_lex(parser);
15501
16054
 
15502
- pm_node_t *value = parse_expression(parser, binding_power, PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ);
16055
+ pm_node_t *value = parse_assignment_value(parser, binding_power, PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ);
15503
16056
  pm_node_t *result = (pm_node_t *) pm_local_variable_and_write_node_create(parser, node, &token, value, cast->name, cast->depth);
15504
16057
 
15505
16058
  pm_node_destroy(parser, node);
@@ -15513,14 +16066,11 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
15513
16066
  // receiver that could have been a local variable) then we
15514
16067
  // will transform it into a local variable write.
15515
16068
  if (pm_call_node_variable_call_p(cast)) {
15516
- pm_location_t message_loc = cast->message_loc;
15517
- pm_constant_id_t constant_id = pm_parser_local_add_location(parser, message_loc.start, message_loc.end);
15518
-
15519
- if (token_is_numbered_parameter(message_loc.start, message_loc.end)) {
15520
- pm_parser_err_location(parser, &message_loc, PM_ERR_PARAMETER_NUMBERED_RESERVED);
15521
- }
16069
+ pm_location_t *message_loc = &cast->message_loc;
16070
+ pm_refute_numbered_parameter(parser, message_loc->start, message_loc->end);
15522
16071
 
15523
- pm_node_t *value = parse_expression(parser, binding_power, PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ);
16072
+ pm_constant_id_t constant_id = pm_parser_local_add_location(parser, message_loc->start, message_loc->end);
16073
+ pm_node_t *value = parse_assignment_value(parser, binding_power, PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ);
15524
16074
  pm_node_t *result = (pm_node_t *) pm_local_variable_and_write_node_create(parser, (pm_node_t *) cast, &token, value, constant_id, 0);
15525
16075
 
15526
16076
  pm_node_destroy(parser, (pm_node_t *) cast);
@@ -15531,7 +16081,7 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
15531
16081
  // this is an aref expression, and we can transform it into
15532
16082
  // an aset expression.
15533
16083
  if (pm_call_node_index_p(cast)) {
15534
- pm_node_t *value = parse_expression(parser, binding_power, PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ);
16084
+ pm_node_t *value = parse_assignment_value(parser, binding_power, PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ);
15535
16085
  return (pm_node_t *) pm_index_and_write_node_create(parser, cast, &token, value);
15536
16086
  }
15537
16087
 
@@ -15543,7 +16093,7 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
15543
16093
  }
15544
16094
 
15545
16095
  parse_call_operator_write(parser, cast, &token);
15546
- pm_node_t *value = parse_expression(parser, binding_power, PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ);
16096
+ pm_node_t *value = parse_assignment_value(parser, binding_power, PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ);
15547
16097
  return (pm_node_t *) pm_call_and_write_node_create(parser, cast, &token, value);
15548
16098
  }
15549
16099
  case PM_MULTI_WRITE_NODE: {
@@ -15570,7 +16120,7 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
15570
16120
  case PM_GLOBAL_VARIABLE_READ_NODE: {
15571
16121
  parser_lex(parser);
15572
16122
 
15573
- pm_node_t *value = parse_expression(parser, binding_power, PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ);
16123
+ pm_node_t *value = parse_assignment_value(parser, binding_power, PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ);
15574
16124
  pm_node_t *result = (pm_node_t *) pm_global_variable_or_write_node_create(parser, node, &token, value);
15575
16125
 
15576
16126
  pm_node_destroy(parser, node);
@@ -15579,7 +16129,7 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
15579
16129
  case PM_CLASS_VARIABLE_READ_NODE: {
15580
16130
  parser_lex(parser);
15581
16131
 
15582
- pm_node_t *value = parse_expression(parser, binding_power, PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ);
16132
+ pm_node_t *value = parse_assignment_value(parser, binding_power, PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ);
15583
16133
  pm_node_t *result = (pm_node_t *) pm_class_variable_or_write_node_create(parser, (pm_class_variable_read_node_t *) node, &token, value);
15584
16134
 
15585
16135
  pm_node_destroy(parser, node);
@@ -15588,13 +16138,13 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
15588
16138
  case PM_CONSTANT_PATH_NODE: {
15589
16139
  parser_lex(parser);
15590
16140
 
15591
- pm_node_t *value = parse_expression(parser, binding_power, PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ);
16141
+ pm_node_t *value = parse_assignment_value(parser, binding_power, PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ);
15592
16142
  return (pm_node_t *) pm_constant_path_or_write_node_create(parser, (pm_constant_path_node_t *) node, &token, value);
15593
16143
  }
15594
16144
  case PM_CONSTANT_READ_NODE: {
15595
16145
  parser_lex(parser);
15596
16146
 
15597
- pm_node_t *value = parse_expression(parser, binding_power, PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ);
16147
+ pm_node_t *value = parse_assignment_value(parser, binding_power, PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ);
15598
16148
  pm_node_t *result = (pm_node_t *) pm_constant_or_write_node_create(parser, (pm_constant_read_node_t *) node, &token, value);
15599
16149
 
15600
16150
  pm_node_destroy(parser, node);
@@ -15603,7 +16153,7 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
15603
16153
  case PM_INSTANCE_VARIABLE_READ_NODE: {
15604
16154
  parser_lex(parser);
15605
16155
 
15606
- pm_node_t *value = parse_expression(parser, binding_power, PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ);
16156
+ pm_node_t *value = parse_assignment_value(parser, binding_power, PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ);
15607
16157
  pm_node_t *result = (pm_node_t *) pm_instance_variable_or_write_node_create(parser, (pm_instance_variable_read_node_t *) node, &token, value);
15608
16158
 
15609
16159
  pm_node_destroy(parser, node);
@@ -15613,7 +16163,7 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
15613
16163
  pm_local_variable_read_node_t *cast = (pm_local_variable_read_node_t *) node;
15614
16164
  parser_lex(parser);
15615
16165
 
15616
- pm_node_t *value = parse_expression(parser, binding_power, PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ);
16166
+ pm_node_t *value = parse_assignment_value(parser, binding_power, PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ);
15617
16167
  pm_node_t *result = (pm_node_t *) pm_local_variable_or_write_node_create(parser, node, &token, value, cast->name, cast->depth);
15618
16168
 
15619
16169
  pm_node_destroy(parser, node);
@@ -15627,14 +16177,11 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
15627
16177
  // receiver that could have been a local variable) then we
15628
16178
  // will transform it into a local variable write.
15629
16179
  if (pm_call_node_variable_call_p(cast)) {
15630
- pm_location_t message_loc = cast->message_loc;
15631
- pm_constant_id_t constant_id = pm_parser_local_add_location(parser, message_loc.start, message_loc.end);
16180
+ pm_location_t *message_loc = &cast->message_loc;
16181
+ pm_refute_numbered_parameter(parser, message_loc->start, message_loc->end);
15632
16182
 
15633
- if (token_is_numbered_parameter(message_loc.start, message_loc.end)) {
15634
- pm_parser_err_location(parser, &message_loc, PM_ERR_PARAMETER_NUMBERED_RESERVED);
15635
- }
15636
-
15637
- pm_node_t *value = parse_expression(parser, binding_power, PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ);
16183
+ pm_constant_id_t constant_id = pm_parser_local_add_location(parser, message_loc->start, message_loc->end);
16184
+ pm_node_t *value = parse_assignment_value(parser, binding_power, PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ);
15638
16185
  pm_node_t *result = (pm_node_t *) pm_local_variable_or_write_node_create(parser, (pm_node_t *) cast, &token, value, constant_id, 0);
15639
16186
 
15640
16187
  pm_node_destroy(parser, (pm_node_t *) cast);
@@ -15645,7 +16192,7 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
15645
16192
  // this is an aref expression, and we can transform it into
15646
16193
  // an aset expression.
15647
16194
  if (pm_call_node_index_p(cast)) {
15648
- pm_node_t *value = parse_expression(parser, binding_power, PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ);
16195
+ pm_node_t *value = parse_assignment_value(parser, binding_power, PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ);
15649
16196
  return (pm_node_t *) pm_index_or_write_node_create(parser, cast, &token, value);
15650
16197
  }
15651
16198
 
@@ -15657,7 +16204,7 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
15657
16204
  }
15658
16205
 
15659
16206
  parse_call_operator_write(parser, cast, &token);
15660
- pm_node_t *value = parse_expression(parser, binding_power, PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ);
16207
+ pm_node_t *value = parse_assignment_value(parser, binding_power, PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ);
15661
16208
  return (pm_node_t *) pm_call_or_write_node_create(parser, cast, &token, value);
15662
16209
  }
15663
16210
  case PM_MULTI_WRITE_NODE: {
@@ -15694,7 +16241,7 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
15694
16241
  case PM_GLOBAL_VARIABLE_READ_NODE: {
15695
16242
  parser_lex(parser);
15696
16243
 
15697
- pm_node_t *value = parse_expression(parser, binding_power, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR);
16244
+ pm_node_t *value = parse_assignment_value(parser, binding_power, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR);
15698
16245
  pm_node_t *result = (pm_node_t *) pm_global_variable_operator_write_node_create(parser, node, &token, value);
15699
16246
 
15700
16247
  pm_node_destroy(parser, node);
@@ -15703,7 +16250,7 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
15703
16250
  case PM_CLASS_VARIABLE_READ_NODE: {
15704
16251
  parser_lex(parser);
15705
16252
 
15706
- pm_node_t *value = parse_expression(parser, binding_power, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR);
16253
+ pm_node_t *value = parse_assignment_value(parser, binding_power, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR);
15707
16254
  pm_node_t *result = (pm_node_t *) pm_class_variable_operator_write_node_create(parser, (pm_class_variable_read_node_t *) node, &token, value);
15708
16255
 
15709
16256
  pm_node_destroy(parser, node);
@@ -15712,13 +16259,13 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
15712
16259
  case PM_CONSTANT_PATH_NODE: {
15713
16260
  parser_lex(parser);
15714
16261
 
15715
- pm_node_t *value = parse_expression(parser, binding_power, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR);
16262
+ pm_node_t *value = parse_assignment_value(parser, binding_power, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR);
15716
16263
  return (pm_node_t *) pm_constant_path_operator_write_node_create(parser, (pm_constant_path_node_t *) node, &token, value);
15717
16264
  }
15718
16265
  case PM_CONSTANT_READ_NODE: {
15719
16266
  parser_lex(parser);
15720
16267
 
15721
- pm_node_t *value = parse_expression(parser, binding_power, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR);
16268
+ pm_node_t *value = parse_assignment_value(parser, binding_power, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR);
15722
16269
  pm_node_t *result = (pm_node_t *) pm_constant_operator_write_node_create(parser, (pm_constant_read_node_t *) node, &token, value);
15723
16270
 
15724
16271
  pm_node_destroy(parser, node);
@@ -15727,7 +16274,7 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
15727
16274
  case PM_INSTANCE_VARIABLE_READ_NODE: {
15728
16275
  parser_lex(parser);
15729
16276
 
15730
- pm_node_t *value = parse_expression(parser, binding_power, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR);
16277
+ pm_node_t *value = parse_assignment_value(parser, binding_power, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR);
15731
16278
  pm_node_t *result = (pm_node_t *) pm_instance_variable_operator_write_node_create(parser, (pm_instance_variable_read_node_t *) node, &token, value);
15732
16279
 
15733
16280
  pm_node_destroy(parser, node);
@@ -15737,7 +16284,7 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
15737
16284
  pm_local_variable_read_node_t *cast = (pm_local_variable_read_node_t *) node;
15738
16285
  parser_lex(parser);
15739
16286
 
15740
- pm_node_t *value = parse_expression(parser, binding_power, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR);
16287
+ pm_node_t *value = parse_assignment_value(parser, binding_power, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR);
15741
16288
  pm_node_t *result = (pm_node_t *) pm_local_variable_operator_write_node_create(parser, node, &token, value, cast->name, cast->depth);
15742
16289
 
15743
16290
  pm_node_destroy(parser, node);
@@ -15751,14 +16298,11 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
15751
16298
  // receiver that could have been a local variable) then we
15752
16299
  // will transform it into a local variable write.
15753
16300
  if (pm_call_node_variable_call_p(cast)) {
15754
- pm_location_t message_loc = cast->message_loc;
15755
- pm_constant_id_t constant_id = pm_parser_local_add_location(parser, message_loc.start, message_loc.end);
16301
+ pm_location_t *message_loc = &cast->message_loc;
16302
+ pm_refute_numbered_parameter(parser, message_loc->start, message_loc->end);
15756
16303
 
15757
- if (token_is_numbered_parameter(message_loc.start, message_loc.end)) {
15758
- pm_parser_err_location(parser, &message_loc, PM_ERR_PARAMETER_NUMBERED_RESERVED);
15759
- }
15760
-
15761
- pm_node_t *value = parse_expression(parser, binding_power, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR);
16304
+ pm_constant_id_t constant_id = pm_parser_local_add_location(parser, message_loc->start, message_loc->end);
16305
+ pm_node_t *value = parse_assignment_value(parser, binding_power, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR);
15762
16306
  pm_node_t *result = (pm_node_t *) pm_local_variable_operator_write_node_create(parser, (pm_node_t *) cast, &token, value, constant_id, 0);
15763
16307
 
15764
16308
  pm_node_destroy(parser, (pm_node_t *) cast);
@@ -15769,7 +16313,7 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
15769
16313
  // this is an aref expression, and we can transform it into
15770
16314
  // an aset expression.
15771
16315
  if (pm_call_node_index_p(cast)) {
15772
- pm_node_t *value = parse_expression(parser, binding_power, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR);
16316
+ pm_node_t *value = parse_assignment_value(parser, binding_power, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR);
15773
16317
  return (pm_node_t *) pm_index_operator_write_node_create(parser, cast, &token, value);
15774
16318
  }
15775
16319
 
@@ -15781,7 +16325,7 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
15781
16325
  }
15782
16326
 
15783
16327
  parse_call_operator_write(parser, cast, &token);
15784
- pm_node_t *value = parse_expression(parser, binding_power, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR);
16328
+ pm_node_t *value = parse_assignment_value(parser, binding_power, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR);
15785
16329
  return (pm_node_t *) pm_call_operator_write_node_create(parser, cast, &token, value);
15786
16330
  }
15787
16331
  case PM_MULTI_WRITE_NODE: {
@@ -15969,14 +16513,14 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
15969
16513
  pm_token_t keyword = parser->current;
15970
16514
  parser_lex(parser);
15971
16515
 
15972
- pm_node_t *predicate = parse_expression(parser, binding_power, PM_ERR_CONDITIONAL_IF_PREDICATE);
16516
+ pm_node_t *predicate = parse_value_expression(parser, binding_power, PM_ERR_CONDITIONAL_IF_PREDICATE);
15973
16517
  return (pm_node_t *) pm_if_node_modifier_create(parser, node, &keyword, predicate);
15974
16518
  }
15975
16519
  case PM_TOKEN_KEYWORD_UNLESS_MODIFIER: {
15976
16520
  pm_token_t keyword = parser->current;
15977
16521
  parser_lex(parser);
15978
16522
 
15979
- pm_node_t *predicate = parse_expression(parser, binding_power, PM_ERR_CONDITIONAL_UNLESS_PREDICATE);
16523
+ pm_node_t *predicate = parse_value_expression(parser, binding_power, PM_ERR_CONDITIONAL_UNLESS_PREDICATE);
15980
16524
  return (pm_node_t *) pm_unless_node_modifier_create(parser, node, &keyword, predicate);
15981
16525
  }
15982
16526
  case PM_TOKEN_KEYWORD_UNTIL_MODIFIER: {
@@ -15984,7 +16528,7 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
15984
16528
  pm_statements_node_t *statements = pm_statements_node_create(parser);
15985
16529
  pm_statements_node_body_append(statements, node);
15986
16530
 
15987
- pm_node_t *predicate = parse_expression(parser, binding_power, PM_ERR_CONDITIONAL_UNTIL_PREDICATE);
16531
+ pm_node_t *predicate = parse_value_expression(parser, binding_power, PM_ERR_CONDITIONAL_UNTIL_PREDICATE);
15988
16532
  return (pm_node_t *) pm_until_node_modifier_create(parser, &token, predicate, statements, PM_NODE_TYPE_P(node, PM_BEGIN_NODE) ? PM_LOOP_FLAGS_BEGIN_MODIFIER : 0);
15989
16533
  }
15990
16534
  case PM_TOKEN_KEYWORD_WHILE_MODIFIER: {
@@ -15992,10 +16536,11 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
15992
16536
  pm_statements_node_t *statements = pm_statements_node_create(parser);
15993
16537
  pm_statements_node_body_append(statements, node);
15994
16538
 
15995
- pm_node_t *predicate = parse_expression(parser, binding_power, PM_ERR_CONDITIONAL_WHILE_PREDICATE);
16539
+ pm_node_t *predicate = parse_value_expression(parser, binding_power, PM_ERR_CONDITIONAL_WHILE_PREDICATE);
15996
16540
  return (pm_node_t *) pm_while_node_modifier_create(parser, &token, predicate, statements, PM_NODE_TYPE_P(node, PM_BEGIN_NODE) ? PM_LOOP_FLAGS_BEGIN_MODIFIER : 0);
15997
16541
  }
15998
16542
  case PM_TOKEN_QUESTION_MARK: {
16543
+ pm_token_t qmark = parser->current;
15999
16544
  parser_lex(parser);
16000
16545
  pm_node_t *true_expression = parse_expression(parser, PM_BINDING_POWER_DEFINED, PM_ERR_TERNARY_EXPRESSION_TRUE);
16001
16546
 
@@ -16009,7 +16554,7 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
16009
16554
  pm_token_t colon = (pm_token_t) { .type = PM_TOKEN_MISSING, .start = parser->previous.end, .end = parser->previous.end };
16010
16555
  pm_node_t *false_expression = (pm_node_t *) pm_missing_node_create(parser, colon.start, colon.end);
16011
16556
 
16012
- return (pm_node_t *) pm_if_node_ternary_create(parser, node, true_expression, &colon, false_expression);
16557
+ return (pm_node_t *) pm_if_node_ternary_create(parser, node, &qmark, true_expression, &colon, false_expression);
16013
16558
  }
16014
16559
 
16015
16560
  accept1(parser, PM_TOKEN_NEWLINE);
@@ -16018,7 +16563,7 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
16018
16563
  pm_token_t colon = parser->previous;
16019
16564
  pm_node_t *false_expression = parse_expression(parser, PM_BINDING_POWER_DEFINED, PM_ERR_TERNARY_EXPRESSION_FALSE);
16020
16565
 
16021
- return (pm_node_t *) pm_if_node_ternary_create(parser, node, true_expression, &colon, false_expression);
16566
+ return (pm_node_t *) pm_if_node_ternary_create(parser, node, &qmark, true_expression, &colon, false_expression);
16022
16567
  }
16023
16568
  case PM_TOKEN_COLON_COLON: {
16024
16569
  parser_lex(parser);
@@ -16212,6 +16757,12 @@ parse_expression(pm_parser_t *parser, pm_binding_power_t binding_power, pm_diagn
16212
16757
  current_binding_powers.binary
16213
16758
  ) {
16214
16759
  node = parse_expression_infix(parser, node, binding_power, current_binding_powers.right);
16760
+ if (
16761
+ current_binding_powers.nonassoc &&
16762
+ current_binding_powers.right <= pm_binding_powers[parser->current.type].left
16763
+ ) {
16764
+ break;
16765
+ }
16215
16766
  }
16216
16767
 
16217
16768
  return node;