prism 0.19.0 → 0.21.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (58) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +48 -1
  3. data/Makefile +5 -0
  4. data/README.md +8 -6
  5. data/config.yml +236 -38
  6. data/docs/build_system.md +19 -2
  7. data/docs/cruby_compilation.md +27 -0
  8. data/docs/parser_translation.md +34 -0
  9. data/docs/parsing_rules.md +19 -0
  10. data/docs/releasing.md +3 -3
  11. data/docs/ruby_api.md +1 -1
  12. data/docs/serialization.md +17 -5
  13. data/ext/prism/api_node.c +101 -81
  14. data/ext/prism/extension.c +74 -11
  15. data/ext/prism/extension.h +1 -1
  16. data/include/prism/ast.h +1700 -505
  17. data/include/prism/defines.h +8 -0
  18. data/include/prism/diagnostic.h +39 -2
  19. data/include/prism/encoding.h +10 -0
  20. data/include/prism/options.h +40 -14
  21. data/include/prism/parser.h +34 -18
  22. data/include/prism/util/pm_buffer.h +9 -0
  23. data/include/prism/util/pm_constant_pool.h +18 -0
  24. data/include/prism/util/pm_newline_list.h +0 -11
  25. data/include/prism/version.h +2 -2
  26. data/include/prism.h +19 -2
  27. data/lib/prism/debug.rb +11 -5
  28. data/lib/prism/dot_visitor.rb +36 -14
  29. data/lib/prism/dsl.rb +22 -22
  30. data/lib/prism/ffi.rb +2 -2
  31. data/lib/prism/node.rb +1020 -737
  32. data/lib/prism/node_ext.rb +2 -2
  33. data/lib/prism/parse_result.rb +17 -9
  34. data/lib/prism/serialize.rb +53 -29
  35. data/lib/prism/translation/parser/compiler.rb +1828 -0
  36. data/lib/prism/translation/parser/lexer.rb +335 -0
  37. data/lib/prism/translation/parser/rubocop.rb +37 -0
  38. data/lib/prism/translation/parser.rb +171 -0
  39. data/lib/prism/translation.rb +11 -0
  40. data/lib/prism.rb +1 -0
  41. data/prism.gemspec +12 -5
  42. data/rbi/prism.rbi +150 -88
  43. data/rbi/prism_static.rbi +15 -3
  44. data/sig/prism.rbs +996 -961
  45. data/sig/prism_static.rbs +123 -46
  46. data/src/diagnostic.c +259 -219
  47. data/src/encoding.c +5 -9
  48. data/src/node.c +2 -6
  49. data/src/options.c +24 -5
  50. data/src/prettyprint.c +174 -42
  51. data/src/prism.c +1344 -479
  52. data/src/serialize.c +12 -9
  53. data/src/token_type.c +353 -4
  54. data/src/util/pm_buffer.c +11 -0
  55. data/src/util/pm_constant_pool.c +37 -11
  56. data/src/util/pm_newline_list.c +2 -14
  57. metadata +10 -3
  58. data/docs/building.md +0 -29
data/src/prism.c CHANGED
@@ -164,7 +164,7 @@ debug_state(pm_parser_t *parser) {
164
164
 
165
165
  PRISM_ATTRIBUTE_UNUSED static void
166
166
  debug_token(pm_token_t * token) {
167
- fprintf(stderr, "%s: \"%.*s\"\n", pm_token_type_to_str(token->type), (int) (token->end - token->start), token->start);
167
+ fprintf(stderr, "%s: \"%.*s\"\n", pm_token_type_human(token->type), (int) (token->end - token->start), token->start);
168
168
  }
169
169
 
170
170
  #endif
@@ -423,6 +423,11 @@ lex_state_beg_p(pm_parser_t *parser) {
423
423
  return lex_state_p(parser, PM_LEX_STATE_BEG_ANY) || ((parser->lex_state & (PM_LEX_STATE_ARG | PM_LEX_STATE_LABELED)) == (PM_LEX_STATE_ARG | PM_LEX_STATE_LABELED));
424
424
  }
425
425
 
426
+ static inline bool
427
+ lex_state_arg_labeled_p(pm_parser_t *parser) {
428
+ return (parser->lex_state & (PM_LEX_STATE_ARG | PM_LEX_STATE_LABELED)) == (PM_LEX_STATE_ARG | PM_LEX_STATE_LABELED);
429
+ }
430
+
426
431
  static inline bool
427
432
  lex_state_arg_p(pm_parser_t *parser) {
428
433
  return lex_state_p(parser, PM_LEX_STATE_ARG_ANY);
@@ -548,9 +553,7 @@ pm_parser_err_token(pm_parser_t *parser, const pm_token_t *token, pm_diagnostic_
548
553
  */
549
554
  static inline void
550
555
  pm_parser_warn(pm_parser_t *parser, const uint8_t *start, const uint8_t *end, pm_diagnostic_id_t diag_id) {
551
- if (!parser->suppress_warnings) {
552
- pm_diagnostic_list_append(&parser->warning_list, start, end, diag_id);
553
- }
556
+ pm_diagnostic_list_append(&parser->warning_list, start, end, diag_id);
554
557
  }
555
558
 
556
559
  /**
@@ -813,6 +816,9 @@ typedef struct {
813
816
 
814
817
  /** The optional block attached to the call. */
815
818
  pm_node_t *block;
819
+
820
+ /** The flag indicating whether this arguments list has forwarding argument. */
821
+ bool has_forwarding;
816
822
  } pm_arguments_t;
817
823
 
818
824
  /**
@@ -864,6 +870,105 @@ pm_arguments_validate_block(pm_parser_t *parser, pm_arguments_t *arguments, pm_b
864
870
  pm_parser_err_node(parser, (pm_node_t *) block, PM_ERR_ARGUMENT_UNEXPECTED_BLOCK);
865
871
  }
866
872
 
873
+ /******************************************************************************/
874
+ /* Basic character checks */
875
+ /******************************************************************************/
876
+
877
+ /**
878
+ * This function is used extremely frequently to lex all of the identifiers in a
879
+ * source file, so it's important that it be as fast as possible. For this
880
+ * reason we have the encoding_changed boolean to check if we need to go through
881
+ * the function pointer or can just directly use the UTF-8 functions.
882
+ */
883
+ static inline size_t
884
+ char_is_identifier_start(const pm_parser_t *parser, const uint8_t *b) {
885
+ if (parser->encoding_changed) {
886
+ size_t width;
887
+ if ((width = parser->encoding->alpha_char(b, parser->end - b)) != 0) {
888
+ return width;
889
+ } else if (*b == '_') {
890
+ return 1;
891
+ } else if (*b >= 0x80) {
892
+ return parser->encoding->char_width(b, parser->end - b);
893
+ } else {
894
+ return 0;
895
+ }
896
+ } else if (*b < 0x80) {
897
+ return (pm_encoding_unicode_table[*b] & PRISM_ENCODING_ALPHABETIC_BIT ? 1 : 0) || (*b == '_');
898
+ } else {
899
+ return pm_encoding_utf_8_char_width(b, parser->end - b);
900
+ }
901
+ }
902
+
903
+ /**
904
+ * Similar to char_is_identifier but this function assumes that the encoding
905
+ * has not been changed.
906
+ */
907
+ static inline size_t
908
+ char_is_identifier_utf8(const uint8_t *b, const uint8_t *end) {
909
+ if (*b < 0x80) {
910
+ return (*b == '_') || (pm_encoding_unicode_table[*b] & PRISM_ENCODING_ALPHANUMERIC_BIT ? 1 : 0);
911
+ } else {
912
+ return pm_encoding_utf_8_char_width(b, end - b);
913
+ }
914
+ }
915
+
916
+ /**
917
+ * Like the above, this function is also used extremely frequently to lex all of
918
+ * the identifiers in a source file once the first character has been found. So
919
+ * it's important that it be as fast as possible.
920
+ */
921
+ static inline size_t
922
+ char_is_identifier(pm_parser_t *parser, const uint8_t *b) {
923
+ if (parser->encoding_changed) {
924
+ size_t width;
925
+ if ((width = parser->encoding->alnum_char(b, parser->end - b)) != 0) {
926
+ return width;
927
+ } else if (*b == '_') {
928
+ return 1;
929
+ } else if (*b >= 0x80) {
930
+ return parser->encoding->char_width(b, parser->end - b);
931
+ } else {
932
+ return 0;
933
+ }
934
+ }
935
+ return char_is_identifier_utf8(b, parser->end);
936
+ }
937
+
938
+ // Here we're defining a perfect hash for the characters that are allowed in
939
+ // global names. This is used to quickly check the next character after a $ to
940
+ // see if it's a valid character for a global name.
941
+ #define BIT(c, idx) (((c) / 32 - 1 == idx) ? (1U << ((c) % 32)) : 0)
942
+ #define PUNCT(idx) ( \
943
+ BIT('~', idx) | BIT('*', idx) | BIT('$', idx) | BIT('?', idx) | \
944
+ BIT('!', idx) | BIT('@', idx) | BIT('/', idx) | BIT('\\', idx) | \
945
+ BIT(';', idx) | BIT(',', idx) | BIT('.', idx) | BIT('=', idx) | \
946
+ BIT(':', idx) | BIT('<', idx) | BIT('>', idx) | BIT('\"', idx) | \
947
+ BIT('&', idx) | BIT('`', idx) | BIT('\'', idx) | BIT('+', idx) | \
948
+ BIT('0', idx))
949
+
950
+ const unsigned int pm_global_name_punctuation_hash[(0x7e - 0x20 + 31) / 32] = { PUNCT(0), PUNCT(1), PUNCT(2) };
951
+
952
+ #undef BIT
953
+ #undef PUNCT
954
+
955
+ static inline bool
956
+ char_is_global_name_punctuation(const uint8_t b) {
957
+ const unsigned int i = (const unsigned int) b;
958
+ if (i <= 0x20 || 0x7e < i) return false;
959
+
960
+ return (pm_global_name_punctuation_hash[(i - 0x20) / 32] >> (i % 32)) & 1;
961
+ }
962
+
963
+ static inline bool
964
+ token_is_setter_name(pm_token_t *token) {
965
+ return (
966
+ (token->type == PM_TOKEN_IDENTIFIER) &&
967
+ (token->end - token->start >= 2) &&
968
+ (token->end[-1] == '=')
969
+ );
970
+ }
971
+
867
972
  /******************************************************************************/
868
973
  /* Node flag handling functions */
869
974
  /******************************************************************************/
@@ -884,6 +989,22 @@ pm_node_flag_unset(pm_node_t *node, pm_node_flags_t flag) {
884
989
  node->flags &= (pm_node_flags_t) ~flag;
885
990
  }
886
991
 
992
+ /**
993
+ * Set the repeated parameter flag on the given node.
994
+ */
995
+ static inline void
996
+ pm_node_flag_set_repeated_parameter(pm_node_t *node) {
997
+ assert(PM_NODE_TYPE(node) == PM_BLOCK_LOCAL_VARIABLE_NODE ||
998
+ PM_NODE_TYPE(node) == PM_BLOCK_PARAMETER_NODE ||
999
+ PM_NODE_TYPE(node) == PM_KEYWORD_REST_PARAMETER_NODE ||
1000
+ PM_NODE_TYPE(node) == PM_OPTIONAL_KEYWORD_PARAMETER_NODE ||
1001
+ PM_NODE_TYPE(node) == PM_OPTIONAL_PARAMETER_NODE ||
1002
+ PM_NODE_TYPE(node) == PM_REQUIRED_KEYWORD_PARAMETER_NODE ||
1003
+ PM_NODE_TYPE(node) == PM_REQUIRED_PARAMETER_NODE ||
1004
+ PM_NODE_TYPE(node) == PM_REST_PARAMETER_NODE);
1005
+
1006
+ pm_node_flag_set(node, PM_PARAMETER_FLAGS_REPEATED_PARAMETER);
1007
+ }
887
1008
 
888
1009
  /******************************************************************************/
889
1010
  /* Node creation functions */
@@ -977,7 +1098,7 @@ static inline void *
977
1098
  pm_alloc_node(PRISM_ATTRIBUTE_UNUSED pm_parser_t *parser, size_t size) {
978
1099
  void *memory = calloc(1, size);
979
1100
  if (memory == NULL) {
980
- fprintf(stderr, "Failed to allocate %zu bytes\n", size);
1101
+ fprintf(stderr, "Failed to allocate %d bytes\n", (int) size);
981
1102
  abort();
982
1103
  }
983
1104
  return memory;
@@ -1325,7 +1446,7 @@ pm_assoc_node_create(pm_parser_t *parser, pm_node_t *key, const pm_token_t *oper
1325
1446
  pm_assoc_node_t *node = PM_ALLOC_NODE(parser, pm_assoc_node_t);
1326
1447
  const uint8_t *end;
1327
1448
 
1328
- if (value != NULL) {
1449
+ if (value != NULL && value->location.end > key->location.end) {
1329
1450
  end = value->location.end;
1330
1451
  } else if (operator->type != PM_TOKEN_NOT_PROVIDED) {
1331
1452
  end = operator->end;
@@ -1333,6 +1454,13 @@ pm_assoc_node_create(pm_parser_t *parser, pm_node_t *key, const pm_token_t *oper
1333
1454
  end = key->location.end;
1334
1455
  }
1335
1456
 
1457
+ // Hash string keys will be frozen, so we can mark them as frozen here so
1458
+ // that the compiler picks them up and also when we check for static literal
1459
+ // on the keys it gets factored in.
1460
+ if (PM_NODE_TYPE_P(key, PM_STRING_NODE)) {
1461
+ key->flags |= PM_STRING_FLAGS_FROZEN | PM_NODE_FLAG_STATIC_LITERAL;
1462
+ }
1463
+
1336
1464
  // If the key and value of this assoc node are both static literals, then
1337
1465
  // we can mark this node as a static literal.
1338
1466
  pm_node_flags_t flags = 0;
@@ -1490,7 +1618,7 @@ pm_block_argument_node_create(pm_parser_t *parser, const pm_token_t *operator, p
1490
1618
  * Allocate and initialize a new BlockNode node.
1491
1619
  */
1492
1620
  static pm_block_node_t *
1493
- pm_block_node_create(pm_parser_t *parser, pm_constant_id_list_t *locals, uint32_t locals_body_index, const pm_token_t *opening, pm_node_t *parameters, pm_node_t *body, const pm_token_t *closing) {
1621
+ pm_block_node_create(pm_parser_t *parser, pm_constant_id_list_t *locals, const pm_token_t *opening, pm_node_t *parameters, pm_node_t *body, const pm_token_t *closing) {
1494
1622
  pm_block_node_t *node = PM_ALLOC_NODE(parser, pm_block_node_t);
1495
1623
 
1496
1624
  *node = (pm_block_node_t) {
@@ -1499,7 +1627,6 @@ pm_block_node_create(pm_parser_t *parser, pm_constant_id_list_t *locals, uint32_
1499
1627
  .location = { .start = opening->start, .end = closing->end },
1500
1628
  },
1501
1629
  .locals = *locals,
1502
- .locals_body_index = locals_body_index,
1503
1630
  .parameters = parameters,
1504
1631
  .body = body,
1505
1632
  .opening_loc = PM_LOCATION_TOKEN_VALUE(opening),
@@ -1645,12 +1772,13 @@ pm_break_node_create(pm_parser_t *parser, const pm_token_t *keyword, pm_argument
1645
1772
  * in the various specializations of this function.
1646
1773
  */
1647
1774
  static pm_call_node_t *
1648
- pm_call_node_create(pm_parser_t *parser) {
1775
+ pm_call_node_create(pm_parser_t *parser, pm_node_flags_t flags) {
1649
1776
  pm_call_node_t *node = PM_ALLOC_NODE(parser, pm_call_node_t);
1650
1777
 
1651
1778
  *node = (pm_call_node_t) {
1652
1779
  {
1653
1780
  .type = PM_CALL_NODE,
1781
+ .flags = flags,
1654
1782
  .location = PM_LOCATION_NULL_VALUE(parser),
1655
1783
  },
1656
1784
  .receiver = NULL,
@@ -1666,6 +1794,15 @@ pm_call_node_create(pm_parser_t *parser) {
1666
1794
  return node;
1667
1795
  }
1668
1796
 
1797
+ /**
1798
+ * Returns the value that the ignore visibility flag should be set to for the
1799
+ * given receiver.
1800
+ */
1801
+ static inline pm_node_flags_t
1802
+ pm_call_node_ignore_visibility_flag(const pm_node_t *receiver) {
1803
+ return PM_NODE_TYPE_P(receiver, PM_SELF_NODE) ? PM_CALL_NODE_FLAGS_IGNORE_VISIBILITY : 0;
1804
+ }
1805
+
1669
1806
  /**
1670
1807
  * Allocate and initialize a new CallNode node from an aref or an aset
1671
1808
  * expression.
@@ -1674,7 +1811,7 @@ static pm_call_node_t *
1674
1811
  pm_call_node_aref_create(pm_parser_t *parser, pm_node_t *receiver, pm_arguments_t *arguments) {
1675
1812
  pm_assert_value_expression(parser, receiver);
1676
1813
 
1677
- pm_call_node_t *node = pm_call_node_create(parser);
1814
+ pm_call_node_t *node = pm_call_node_create(parser, pm_call_node_ignore_visibility_flag(receiver));
1678
1815
 
1679
1816
  node->base.location.start = receiver->location.start;
1680
1817
  node->base.location.end = pm_arguments_end(arguments);
@@ -1700,7 +1837,7 @@ pm_call_node_binary_create(pm_parser_t *parser, pm_node_t *receiver, pm_token_t
1700
1837
  pm_assert_value_expression(parser, receiver);
1701
1838
  pm_assert_value_expression(parser, argument);
1702
1839
 
1703
- pm_call_node_t *node = pm_call_node_create(parser);
1840
+ pm_call_node_t *node = pm_call_node_create(parser, pm_call_node_ignore_visibility_flag(receiver));
1704
1841
 
1705
1842
  node->base.location.start = MIN(receiver->location.start, argument->location.start);
1706
1843
  node->base.location.end = MAX(receiver->location.end, argument->location.end);
@@ -1723,7 +1860,7 @@ static pm_call_node_t *
1723
1860
  pm_call_node_call_create(pm_parser_t *parser, pm_node_t *receiver, pm_token_t *operator, pm_token_t *message, pm_arguments_t *arguments) {
1724
1861
  pm_assert_value_expression(parser, receiver);
1725
1862
 
1726
- pm_call_node_t *node = pm_call_node_create(parser);
1863
+ pm_call_node_t *node = pm_call_node_create(parser, pm_call_node_ignore_visibility_flag(receiver));
1727
1864
 
1728
1865
  node->base.location.start = receiver->location.start;
1729
1866
  const uint8_t *end = pm_arguments_end(arguments);
@@ -1754,7 +1891,7 @@ pm_call_node_call_create(pm_parser_t *parser, pm_node_t *receiver, pm_token_t *o
1754
1891
  */
1755
1892
  static pm_call_node_t *
1756
1893
  pm_call_node_fcall_create(pm_parser_t *parser, pm_token_t *message, pm_arguments_t *arguments) {
1757
- pm_call_node_t *node = pm_call_node_create(parser);
1894
+ pm_call_node_t *node = pm_call_node_create(parser, PM_CALL_NODE_FLAGS_IGNORE_VISIBILITY);
1758
1895
 
1759
1896
  node->base.location.start = message->start;
1760
1897
  node->base.location.end = pm_arguments_end(arguments);
@@ -1776,7 +1913,7 @@ static pm_call_node_t *
1776
1913
  pm_call_node_not_create(pm_parser_t *parser, pm_node_t *receiver, pm_token_t *message, pm_arguments_t *arguments) {
1777
1914
  pm_assert_value_expression(parser, receiver);
1778
1915
 
1779
- pm_call_node_t *node = pm_call_node_create(parser);
1916
+ pm_call_node_t *node = pm_call_node_create(parser, receiver == NULL ? 0 : pm_call_node_ignore_visibility_flag(receiver));
1780
1917
 
1781
1918
  node->base.location.start = message->start;
1782
1919
  if (arguments->closing_loc.start != NULL) {
@@ -1802,7 +1939,7 @@ static pm_call_node_t *
1802
1939
  pm_call_node_shorthand_create(pm_parser_t *parser, pm_node_t *receiver, pm_token_t *operator, pm_arguments_t *arguments) {
1803
1940
  pm_assert_value_expression(parser, receiver);
1804
1941
 
1805
- pm_call_node_t *node = pm_call_node_create(parser);
1942
+ pm_call_node_t *node = pm_call_node_create(parser, pm_call_node_ignore_visibility_flag(receiver));
1806
1943
 
1807
1944
  node->base.location.start = receiver->location.start;
1808
1945
  node->base.location.end = pm_arguments_end(arguments);
@@ -1829,7 +1966,7 @@ static pm_call_node_t *
1829
1966
  pm_call_node_unary_create(pm_parser_t *parser, pm_token_t *operator, pm_node_t *receiver, const char *name) {
1830
1967
  pm_assert_value_expression(parser, receiver);
1831
1968
 
1832
- pm_call_node_t *node = pm_call_node_create(parser);
1969
+ pm_call_node_t *node = pm_call_node_create(parser, pm_call_node_ignore_visibility_flag(receiver));
1833
1970
 
1834
1971
  node->base.location.start = operator->start;
1835
1972
  node->base.location.end = receiver->location.end;
@@ -1847,7 +1984,7 @@ pm_call_node_unary_create(pm_parser_t *parser, pm_token_t *operator, pm_node_t *
1847
1984
  */
1848
1985
  static pm_call_node_t *
1849
1986
  pm_call_node_variable_call_create(pm_parser_t *parser, pm_token_t *message) {
1850
- pm_call_node_t *node = pm_call_node_create(parser);
1987
+ pm_call_node_t *node = pm_call_node_create(parser, PM_CALL_NODE_FLAGS_IGNORE_VISIBILITY);
1851
1988
 
1852
1989
  node->base.location = PM_LOCATION_TOKEN_VALUE(message);
1853
1990
  node->message_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(message);
@@ -1885,11 +2022,12 @@ pm_call_node_index_p(pm_call_node_t *node) {
1885
2022
  * operator assignment.
1886
2023
  */
1887
2024
  static inline bool
1888
- pm_call_node_writable_p(pm_call_node_t *node) {
2025
+ pm_call_node_writable_p(const pm_parser_t *parser, const pm_call_node_t *node) {
1889
2026
  return (
1890
2027
  (node->message_loc.start != NULL) &&
1891
2028
  (node->message_loc.end[-1] != '!') &&
1892
2029
  (node->message_loc.end[-1] != '?') &&
2030
+ char_is_identifier_start(parser, node->message_loc.start) &&
1893
2031
  (node->opening_loc.start == NULL) &&
1894
2032
  (node->arguments == NULL) &&
1895
2033
  (node->block == NULL)
@@ -2167,11 +2305,12 @@ pm_call_target_node_create(pm_parser_t *parser, pm_call_node_t *target) {
2167
2305
  static pm_index_target_node_t *
2168
2306
  pm_index_target_node_create(pm_parser_t *parser, pm_call_node_t *target) {
2169
2307
  pm_index_target_node_t *node = PM_ALLOC_NODE(parser, pm_index_target_node_t);
2308
+ pm_node_flags_t flags = target->base.flags;
2170
2309
 
2171
2310
  *node = (pm_index_target_node_t) {
2172
2311
  {
2173
2312
  .type = PM_INDEX_TARGET_NODE,
2174
- .flags = target->base.flags,
2313
+ .flags = flags | PM_CALL_NODE_FLAGS_ATTRIBUTE_WRITE,
2175
2314
  .location = target->base.location
2176
2315
  },
2177
2316
  .receiver = target->receiver,
@@ -2701,6 +2840,50 @@ pm_constant_write_node_create(pm_parser_t *parser, pm_constant_read_node_t *targ
2701
2840
  return node;
2702
2841
  }
2703
2842
 
2843
+ /**
2844
+ * Check if the receiver of a `def` node is allowed.
2845
+ */
2846
+ static void
2847
+ pm_def_node_receiver_check(pm_parser_t *parser, const pm_node_t *node) {
2848
+ switch (PM_NODE_TYPE(node)) {
2849
+ case PM_BEGIN_NODE: {
2850
+ const pm_begin_node_t *cast = (pm_begin_node_t *) node;
2851
+ if (cast->statements != NULL) pm_def_node_receiver_check(parser, (pm_node_t *) cast->statements);
2852
+ break;
2853
+ }
2854
+ case PM_PARENTHESES_NODE: {
2855
+ const pm_parentheses_node_t *cast = (const pm_parentheses_node_t *) node;
2856
+ if (cast->body != NULL) pm_def_node_receiver_check(parser, cast->body);
2857
+ break;
2858
+ }
2859
+ case PM_STATEMENTS_NODE: {
2860
+ const pm_statements_node_t *cast = (const pm_statements_node_t *) node;
2861
+ pm_def_node_receiver_check(parser, cast->body.nodes[cast->body.size - 1]);
2862
+ break;
2863
+ }
2864
+ case PM_ARRAY_NODE:
2865
+ case PM_FLOAT_NODE:
2866
+ case PM_IMAGINARY_NODE:
2867
+ case PM_INTEGER_NODE:
2868
+ case PM_INTERPOLATED_REGULAR_EXPRESSION_NODE:
2869
+ case PM_INTERPOLATED_STRING_NODE:
2870
+ case PM_INTERPOLATED_SYMBOL_NODE:
2871
+ case PM_INTERPOLATED_X_STRING_NODE:
2872
+ case PM_RATIONAL_NODE:
2873
+ case PM_REGULAR_EXPRESSION_NODE:
2874
+ case PM_SOURCE_ENCODING_NODE:
2875
+ case PM_SOURCE_FILE_NODE:
2876
+ case PM_SOURCE_LINE_NODE:
2877
+ case PM_STRING_NODE:
2878
+ case PM_SYMBOL_NODE:
2879
+ case PM_X_STRING_NODE:
2880
+ pm_parser_err_node(parser, node, PM_ERR_SINGLETON_FOR_LITERALS);
2881
+ break;
2882
+ default:
2883
+ break;
2884
+ }
2885
+ }
2886
+
2704
2887
  /**
2705
2888
  * Allocate and initialize a new DefNode node.
2706
2889
  */
@@ -2712,7 +2895,6 @@ pm_def_node_create(
2712
2895
  pm_parameters_node_t *parameters,
2713
2896
  pm_node_t *body,
2714
2897
  pm_constant_id_list_t *locals,
2715
- uint32_t locals_body_index,
2716
2898
  const pm_token_t *def_keyword,
2717
2899
  const pm_token_t *operator,
2718
2900
  const pm_token_t *lparen,
@@ -2729,6 +2911,10 @@ pm_def_node_create(
2729
2911
  end = end_keyword->end;
2730
2912
  }
2731
2913
 
2914
+ if ((receiver != NULL) && PM_NODE_TYPE_P(receiver, PM_PARENTHESES_NODE)) {
2915
+ pm_def_node_receiver_check(parser, receiver);
2916
+ }
2917
+
2732
2918
  *node = (pm_def_node_t) {
2733
2919
  {
2734
2920
  .type = PM_DEF_NODE,
@@ -2740,7 +2926,6 @@ pm_def_node_create(
2740
2926
  .parameters = parameters,
2741
2927
  .body = body,
2742
2928
  .locals = *locals,
2743
- .locals_body_index = locals_body_index,
2744
2929
  .def_keyword_loc = PM_LOCATION_TOKEN_VALUE(def_keyword),
2745
2930
  .operator_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(operator),
2746
2931
  .lparen_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(lparen),
@@ -3962,9 +4147,8 @@ pm_keyword_hash_node_create(pm_parser_t *parser) {
3962
4147
  */
3963
4148
  static void
3964
4149
  pm_keyword_hash_node_elements_append(pm_keyword_hash_node_t *hash, pm_node_t *element) {
3965
- // If the element being added is not an AssocNode or does not have a symbol key, then
3966
- // we want to turn the STATIC_KEYS flag off.
3967
- // TODO: Rename the flag to SYMBOL_KEYS instead.
4150
+ // If the element being added is not an AssocNode or does not have a symbol
4151
+ // key, then we want to turn the SYMBOL_KEYS flag off.
3968
4152
  if (!PM_NODE_TYPE_P(element, PM_ASSOC_NODE) || !PM_NODE_TYPE_P(((pm_assoc_node_t *) element)->key, PM_SYMBOL_NODE)) {
3969
4153
  pm_node_flag_unset((pm_node_t *)hash, PM_KEYWORD_HASH_NODE_FLAGS_SYMBOL_KEYS);
3970
4154
  }
@@ -4051,7 +4235,6 @@ static pm_lambda_node_t *
4051
4235
  pm_lambda_node_create(
4052
4236
  pm_parser_t *parser,
4053
4237
  pm_constant_id_list_t *locals,
4054
- uint32_t locals_body_index,
4055
4238
  const pm_token_t *operator,
4056
4239
  const pm_token_t *opening,
4057
4240
  const pm_token_t *closing,
@@ -4069,7 +4252,6 @@ pm_lambda_node_create(
4069
4252
  },
4070
4253
  },
4071
4254
  .locals = *locals,
4072
- .locals_body_index = locals_body_index,
4073
4255
  .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
4074
4256
  .opening_loc = PM_LOCATION_TOKEN_VALUE(opening),
4075
4257
  .closing_loc = PM_LOCATION_TOKEN_VALUE(closing),
@@ -4161,12 +4343,10 @@ pm_local_variable_or_write_node_create(pm_parser_t *parser, pm_node_t *target, c
4161
4343
  }
4162
4344
 
4163
4345
  /**
4164
- * Allocate a new LocalVariableReadNode node.
4346
+ * Allocate a new LocalVariableReadNode node with constant_id.
4165
4347
  */
4166
4348
  static pm_local_variable_read_node_t *
4167
- pm_local_variable_read_node_create(pm_parser_t *parser, const pm_token_t *name, uint32_t depth) {
4168
- pm_constant_id_t name_id = pm_parser_constant_id_token(parser, name);
4169
-
4349
+ pm_local_variable_read_node_create_constant_id(pm_parser_t *parser, const pm_token_t *name, pm_constant_id_t name_id, uint32_t depth) {
4170
4350
  if (parser->current_param_name == name_id) {
4171
4351
  pm_parser_err_token(parser, name, PM_ERR_PARAMETER_CIRCULAR);
4172
4352
  }
@@ -4185,6 +4365,15 @@ pm_local_variable_read_node_create(pm_parser_t *parser, const pm_token_t *name,
4185
4365
  return node;
4186
4366
  }
4187
4367
 
4368
+ /**
4369
+ * Allocate a new LocalVariableReadNode node.
4370
+ */
4371
+ static pm_local_variable_read_node_t *
4372
+ pm_local_variable_read_node_create(pm_parser_t *parser, const pm_token_t *name, uint32_t depth) {
4373
+ pm_constant_id_t name_id = pm_parser_constant_id_token(parser, name);
4374
+ return pm_local_variable_read_node_create_constant_id(parser, name, name_id, depth);
4375
+ }
4376
+
4188
4377
  /**
4189
4378
  * Allocate and initialize a new LocalVariableWriteNode node.
4190
4379
  */
@@ -4210,6 +4399,57 @@ pm_local_variable_write_node_create(pm_parser_t *parser, pm_constant_id_t name,
4210
4399
  return node;
4211
4400
  }
4212
4401
 
4402
+ /**
4403
+ * Returns true if the given bounds comprise `it`.
4404
+ */
4405
+ static inline bool
4406
+ pm_token_is_it(const uint8_t *start, const uint8_t *end) {
4407
+ return (end - start == 2) && (start[0] == 'i') && (start[1] == 't');
4408
+ }
4409
+
4410
+ /**
4411
+ * Returns true if the given node is `it` default parameter.
4412
+ */
4413
+ static inline bool
4414
+ pm_node_is_it(pm_parser_t *parser, pm_node_t *node) {
4415
+ // Check if it's a local variable reference
4416
+ if (node->type != PM_CALL_NODE) {
4417
+ return false;
4418
+ }
4419
+
4420
+ // Check if it's a variable call
4421
+ pm_call_node_t *call_node = (pm_call_node_t *) node;
4422
+ if (!pm_call_node_variable_call_p(call_node)) {
4423
+ return false;
4424
+ }
4425
+
4426
+ // Check if it's called `it`
4427
+ pm_constant_id_t id = ((pm_call_node_t *)node)->name;
4428
+ pm_constant_t *constant = pm_constant_pool_id_to_constant(&parser->constant_pool, id);
4429
+ return pm_token_is_it(constant->start, constant->start + constant->length);
4430
+ }
4431
+
4432
+ /**
4433
+ * Convert a `it` variable call node to a node for `it` default parameter.
4434
+ */
4435
+ static pm_node_t *
4436
+ pm_node_check_it(pm_parser_t *parser, pm_node_t *node) {
4437
+ if (
4438
+ (parser->version != PM_OPTIONS_VERSION_CRUBY_3_3_0) &&
4439
+ !parser->current_scope->closed &&
4440
+ pm_node_is_it(parser, node)
4441
+ ) {
4442
+ if (parser->current_scope->explicit_params) {
4443
+ pm_parser_err_previous(parser, PM_ERR_IT_NOT_ALLOWED);
4444
+ } else {
4445
+ pm_node_destroy(parser, node);
4446
+ pm_constant_id_t name_id = pm_parser_constant_id_constant(parser, "0it", 3);
4447
+ node = (pm_node_t *) pm_local_variable_read_node_create_constant_id(parser, &parser->previous, name_id, 0);
4448
+ }
4449
+ }
4450
+ return node;
4451
+ }
4452
+
4213
4453
  /**
4214
4454
  * Returns true if the given bounds comprise a numbered parameter (i.e., they
4215
4455
  * are of the form /^_\d$/).
@@ -5195,7 +5435,7 @@ pm_source_file_node_create(pm_parser_t *parser, const pm_token_t *file_keyword)
5195
5435
  .flags = PM_NODE_FLAG_STATIC_LITERAL,
5196
5436
  .location = PM_LOCATION_TOKEN_VALUE(file_keyword),
5197
5437
  },
5198
- .filepath = parser->filepath_string,
5438
+ .filepath = parser->filepath
5199
5439
  };
5200
5440
 
5201
5441
  return node;
@@ -5372,18 +5612,59 @@ pm_super_node_create(pm_parser_t *parser, const pm_token_t *keyword, pm_argument
5372
5612
  return node;
5373
5613
  }
5374
5614
 
5615
+ /**
5616
+ * Read through the contents of a string and check if it consists solely of US ASCII code points.
5617
+ */
5618
+ static bool
5619
+ pm_ascii_only_p(const pm_string_t *contents) {
5620
+ const size_t length = pm_string_length(contents);
5621
+ const uint8_t *source = pm_string_source(contents);
5622
+
5623
+ for (size_t index = 0; index < length; index++) {
5624
+ if (source[index] & 0x80) return false;
5625
+ }
5626
+
5627
+ return true;
5628
+ }
5629
+
5630
+ /**
5631
+ * Ruby "downgrades" the encoding of Symbols to US-ASCII if the associated
5632
+ * encoding is ASCII-compatible and the Symbol consists only of US-ASCII code
5633
+ * points. Otherwise, the encoding may be explicitly set with an escape
5634
+ * sequence.
5635
+ */
5636
+ static inline pm_node_flags_t
5637
+ parse_symbol_encoding(const pm_parser_t *parser, const pm_string_t *contents) {
5638
+ if (parser->explicit_encoding != NULL) {
5639
+ // A Symbol may optionally have its encoding explicitly set. This will
5640
+ // happen if an escape sequence results in a non-ASCII code point.
5641
+ if (parser->explicit_encoding == PM_ENCODING_UTF_8_ENTRY) {
5642
+ return PM_SYMBOL_FLAGS_FORCED_UTF8_ENCODING;
5643
+ } else if (parser->encoding == PM_ENCODING_US_ASCII_ENTRY) {
5644
+ return PM_SYMBOL_FLAGS_FORCED_BINARY_ENCODING;
5645
+ }
5646
+ } else if (pm_ascii_only_p(contents)) {
5647
+ // Ruby stipulates that all source files must use an ASCII-compatible
5648
+ // encoding. Thus, all symbols appearing in source are eligible for
5649
+ // "downgrading" to US-ASCII.
5650
+ return PM_SYMBOL_FLAGS_FORCED_US_ASCII_ENCODING;
5651
+ }
5652
+
5653
+ return 0;
5654
+ }
5655
+
5375
5656
  /**
5376
5657
  * Allocate and initialize a new SymbolNode node with the given unescaped
5377
5658
  * string.
5378
5659
  */
5379
5660
  static pm_symbol_node_t *
5380
- pm_symbol_node_create_unescaped(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *value, const pm_token_t *closing, const pm_string_t *unescaped) {
5661
+ pm_symbol_node_create_unescaped(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *value, const pm_token_t *closing, const pm_string_t *unescaped, pm_node_flags_t flags) {
5381
5662
  pm_symbol_node_t *node = PM_ALLOC_NODE(parser, pm_symbol_node_t);
5382
5663
 
5383
5664
  *node = (pm_symbol_node_t) {
5384
5665
  {
5385
5666
  .type = PM_SYMBOL_NODE,
5386
- .flags = PM_NODE_FLAG_STATIC_LITERAL,
5667
+ .flags = PM_NODE_FLAG_STATIC_LITERAL | flags,
5387
5668
  .location = {
5388
5669
  .start = (opening->type == PM_TOKEN_NOT_PROVIDED ? value->start : opening->start),
5389
5670
  .end = (closing->type == PM_TOKEN_NOT_PROVIDED ? value->end : closing->end)
@@ -5403,7 +5684,7 @@ pm_symbol_node_create_unescaped(pm_parser_t *parser, const pm_token_t *opening,
5403
5684
  */
5404
5685
  static inline pm_symbol_node_t *
5405
5686
  pm_symbol_node_create(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *value, const pm_token_t *closing) {
5406
- return pm_symbol_node_create_unescaped(parser, opening, value, closing, &PM_STRING_EMPTY);
5687
+ return pm_symbol_node_create_unescaped(parser, opening, value, closing, &PM_STRING_EMPTY, 0);
5407
5688
  }
5408
5689
 
5409
5690
  /**
@@ -5411,7 +5692,7 @@ pm_symbol_node_create(pm_parser_t *parser, const pm_token_t *opening, const pm_t
5411
5692
  */
5412
5693
  static pm_symbol_node_t *
5413
5694
  pm_symbol_node_create_current_string(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *value, const pm_token_t *closing) {
5414
- pm_symbol_node_t *node = pm_symbol_node_create_unescaped(parser, opening, value, closing, &parser->current_string);
5695
+ pm_symbol_node_t *node = pm_symbol_node_create_unescaped(parser, opening, value, closing, &parser->current_string, parse_symbol_encoding(parser, &parser->current_string));
5415
5696
  parser->current_string = PM_STRING_EMPTY;
5416
5697
  return node;
5417
5698
  }
@@ -5433,6 +5714,8 @@ pm_symbol_node_label_create(pm_parser_t *parser, const pm_token_t *token) {
5433
5714
 
5434
5715
  assert((label.end - label.start) >= 0);
5435
5716
  pm_string_shared_init(&node->unescaped, label.start, label.end);
5717
+ pm_node_flag_set((pm_node_t *) node, parse_symbol_encoding(parser, &node->unescaped));
5718
+
5436
5719
  break;
5437
5720
  }
5438
5721
  case PM_TOKEN_MISSING: {
@@ -5495,6 +5778,8 @@ pm_string_node_to_symbol_node(pm_parser_t *parser, pm_string_node_t *node, const
5495
5778
  .unescaped = node->unescaped
5496
5779
  };
5497
5780
 
5781
+ pm_node_flag_set((pm_node_t *)new_node, parse_symbol_encoding(parser, &node->unescaped));
5782
+
5498
5783
  // We are explicitly _not_ using pm_node_destroy here because we don't want
5499
5784
  // to trash the unescaped string. We could instead copy the string if we
5500
5785
  // know that it is owned, but we're taking the fast path for now.
@@ -5885,6 +6170,7 @@ pm_parser_scope_push(pm_parser_t *parser, bool closed) {
5885
6170
  .closed = closed,
5886
6171
  .explicit_params = false,
5887
6172
  .numbered_parameters = 0,
6173
+ .forwarding_params = 0,
5888
6174
  };
5889
6175
 
5890
6176
  pm_constant_id_list_init(&scope->locals);
@@ -5893,6 +6179,76 @@ pm_parser_scope_push(pm_parser_t *parser, bool closed) {
5893
6179
  return true;
5894
6180
  }
5895
6181
 
6182
+ static void
6183
+ pm_parser_scope_forwarding_param_check(pm_parser_t *parser, const pm_token_t * token, const uint8_t mask, pm_diagnostic_id_t diag)
6184
+ {
6185
+ pm_scope_t *scope = parser->current_scope;
6186
+ while (scope) {
6187
+ if (scope->forwarding_params & mask) {
6188
+ if (!scope->closed) {
6189
+ pm_parser_err_token(parser, token, diag);
6190
+ return;
6191
+ }
6192
+ return;
6193
+ }
6194
+ if (scope->closed) break;
6195
+ scope = scope->previous;
6196
+ }
6197
+
6198
+ pm_parser_err_token(parser, token, diag);
6199
+ }
6200
+
6201
+ static inline void
6202
+ pm_parser_scope_forwarding_block_check(pm_parser_t *parser, const pm_token_t * token)
6203
+ {
6204
+ pm_parser_scope_forwarding_param_check(parser, token, PM_FORWARDING_BLOCK, PM_ERR_ARGUMENT_NO_FORWARDING_AMP);
6205
+ }
6206
+
6207
+ static void
6208
+ pm_parser_scope_forwarding_positionals_check(pm_parser_t *parser, const pm_token_t * token)
6209
+ {
6210
+ pm_parser_scope_forwarding_param_check(parser, token, PM_FORWARDING_POSITIONALS, PM_ERR_ARGUMENT_NO_FORWARDING_STAR);
6211
+ }
6212
+
6213
+ static inline void
6214
+ pm_parser_scope_forwarding_all_check(pm_parser_t *parser, const pm_token_t * token)
6215
+ {
6216
+ pm_parser_scope_forwarding_param_check(parser, token, PM_FORWARDING_ALL, PM_ERR_ARGUMENT_NO_FORWARDING_ELLIPSES);
6217
+ }
6218
+
6219
+ static inline void
6220
+ pm_parser_scope_forwarding_keywords_check(pm_parser_t *parser, const pm_token_t * token)
6221
+ {
6222
+ pm_parser_scope_forwarding_param_check(parser, token, PM_FORWARDING_KEYWORDS, PM_ERR_EXPECT_EXPRESSION_AFTER_SPLAT_HASH);
6223
+ }
6224
+
6225
+ /**
6226
+ * Save the current param name as the return value and set it to the given
6227
+ * constant id.
6228
+ */
6229
+ static inline pm_constant_id_t
6230
+ pm_parser_current_param_name_set(pm_parser_t *parser, pm_constant_id_t current_param_name) {
6231
+ pm_constant_id_t saved_param_name = parser->current_param_name;
6232
+ parser->current_param_name = current_param_name;
6233
+ return saved_param_name;
6234
+ }
6235
+
6236
+ /**
6237
+ * Save the current param name as the return value and clear it.
6238
+ */
6239
+ static inline pm_constant_id_t
6240
+ pm_parser_current_param_name_unset(pm_parser_t *parser) {
6241
+ return pm_parser_current_param_name_set(parser, PM_CONSTANT_ID_UNSET);
6242
+ }
6243
+
6244
+ /**
6245
+ * Restore the current param name from the given value.
6246
+ */
6247
+ static inline void
6248
+ pm_parser_current_param_name_restore(pm_parser_t *parser, pm_constant_id_t saved_param_name) {
6249
+ parser->current_param_name = saved_param_name;
6250
+ }
6251
+
5896
6252
  /**
5897
6253
  * Check if any of the currently visible scopes contain a local variable
5898
6254
  * described by the given constant id.
@@ -5969,26 +6325,41 @@ pm_parser_local_add_owned(pm_parser_t *parser, const uint8_t *start, size_t leng
5969
6325
  return constant_id;
5970
6326
  }
5971
6327
 
6328
+ /**
6329
+ * Add a local variable from a constant string to the current scope.
6330
+ */
6331
+ static pm_constant_id_t
6332
+ pm_parser_local_add_constant(pm_parser_t *parser, const char *start, size_t length) {
6333
+ pm_constant_id_t constant_id = pm_parser_constant_id_constant(parser, start, length);
6334
+ if (constant_id != 0) pm_parser_local_add(parser, constant_id);
6335
+ return constant_id;
6336
+ }
6337
+
5972
6338
  /**
5973
6339
  * Add a parameter name to the current scope and check whether the name of the
5974
6340
  * parameter is unique or not.
6341
+ *
6342
+ * Returns `true` if this is a duplicate parameter name, otherwise returns
6343
+ * false.
5975
6344
  */
5976
- static void
6345
+ static bool
5977
6346
  pm_parser_parameter_name_check(pm_parser_t *parser, const pm_token_t *name) {
5978
6347
  // We want to check whether the parameter name is a numbered parameter or
5979
6348
  // not.
5980
6349
  pm_refute_numbered_parameter(parser, name->start, name->end);
5981
6350
 
5982
- // We want to ignore any parameter name that starts with an underscore.
5983
- if ((name->start < name->end) && (*name->start == '_')) return;
5984
-
5985
6351
  // Otherwise we'll fetch the constant id for the parameter name and check
5986
6352
  // whether it's already in the current scope.
5987
6353
  pm_constant_id_t constant_id = pm_parser_constant_id_token(parser, name);
5988
6354
 
5989
6355
  if (pm_constant_id_list_includes(&parser->current_scope->locals, constant_id)) {
5990
- pm_parser_err_token(parser, name, PM_ERR_PARAMETER_NAME_REPEAT);
6356
+ // Add an error if the parameter doesn't start with _ and has been seen before
6357
+ if ((name->start < name->end) && (*name->start != '_')) {
6358
+ pm_parser_err_token(parser, name, PM_ERR_PARAMETER_NAME_REPEAT);
6359
+ }
6360
+ return true;
5991
6361
  }
6362
+ return false;
5992
6363
  }
5993
6364
 
5994
6365
  /**
@@ -6003,105 +6374,6 @@ pm_parser_scope_pop(pm_parser_t *parser) {
6003
6374
  free(scope);
6004
6375
  }
6005
6376
 
6006
- /******************************************************************************/
6007
- /* Basic character checks */
6008
- /******************************************************************************/
6009
-
6010
- /**
6011
- * This function is used extremely frequently to lex all of the identifiers in a
6012
- * source file, so it's important that it be as fast as possible. For this
6013
- * reason we have the encoding_changed boolean to check if we need to go through
6014
- * the function pointer or can just directly use the UTF-8 functions.
6015
- */
6016
- static inline size_t
6017
- char_is_identifier_start(pm_parser_t *parser, const uint8_t *b) {
6018
- if (parser->encoding_changed) {
6019
- size_t width;
6020
- if ((width = parser->encoding->alpha_char(b, parser->end - b)) != 0) {
6021
- return width;
6022
- } else if (*b == '_') {
6023
- return 1;
6024
- } else if (*b >= 0x80) {
6025
- return parser->encoding->char_width(b, parser->end - b);
6026
- } else {
6027
- return 0;
6028
- }
6029
- } else if (*b < 0x80) {
6030
- return (pm_encoding_unicode_table[*b] & PRISM_ENCODING_ALPHABETIC_BIT ? 1 : 0) || (*b == '_');
6031
- } else {
6032
- return (size_t) (pm_encoding_utf_8_alpha_char(b, parser->end - b) || 1u);
6033
- }
6034
- }
6035
-
6036
- /**
6037
- * Similar to char_is_identifier but this function assumes that the encoding
6038
- * has not been changed.
6039
- */
6040
- static inline size_t
6041
- char_is_identifier_utf8(const uint8_t *b, const uint8_t *end) {
6042
- if (*b < 0x80) {
6043
- return (*b == '_') || (pm_encoding_unicode_table[*b] & PRISM_ENCODING_ALPHANUMERIC_BIT ? 1 : 0);
6044
- } else {
6045
- return (size_t) (pm_encoding_utf_8_alnum_char(b, end - b) || 1u);
6046
- }
6047
- }
6048
-
6049
- /**
6050
- * Like the above, this function is also used extremely frequently to lex all of
6051
- * the identifiers in a source file once the first character has been found. So
6052
- * it's important that it be as fast as possible.
6053
- */
6054
- static inline size_t
6055
- char_is_identifier(pm_parser_t *parser, const uint8_t *b) {
6056
- if (parser->encoding_changed) {
6057
- size_t width;
6058
- if ((width = parser->encoding->alnum_char(b, parser->end - b)) != 0) {
6059
- return width;
6060
- } else if (*b == '_') {
6061
- return 1;
6062
- } else if (*b >= 0x80) {
6063
- return parser->encoding->char_width(b, parser->end - b);
6064
- } else {
6065
- return 0;
6066
- }
6067
- }
6068
- return char_is_identifier_utf8(b, parser->end);
6069
- }
6070
-
6071
- // Here we're defining a perfect hash for the characters that are allowed in
6072
- // global names. This is used to quickly check the next character after a $ to
6073
- // see if it's a valid character for a global name.
6074
- #define BIT(c, idx) (((c) / 32 - 1 == idx) ? (1U << ((c) % 32)) : 0)
6075
- #define PUNCT(idx) ( \
6076
- BIT('~', idx) | BIT('*', idx) | BIT('$', idx) | BIT('?', idx) | \
6077
- BIT('!', idx) | BIT('@', idx) | BIT('/', idx) | BIT('\\', idx) | \
6078
- BIT(';', idx) | BIT(',', idx) | BIT('.', idx) | BIT('=', idx) | \
6079
- BIT(':', idx) | BIT('<', idx) | BIT('>', idx) | BIT('\"', idx) | \
6080
- BIT('&', idx) | BIT('`', idx) | BIT('\'', idx) | BIT('+', idx) | \
6081
- BIT('0', idx))
6082
-
6083
- const unsigned int pm_global_name_punctuation_hash[(0x7e - 0x20 + 31) / 32] = { PUNCT(0), PUNCT(1), PUNCT(2) };
6084
-
6085
- #undef BIT
6086
- #undef PUNCT
6087
-
6088
- static inline bool
6089
- char_is_global_name_punctuation(const uint8_t b) {
6090
- const unsigned int i = (const unsigned int) b;
6091
- if (i <= 0x20 || 0x7e < i) return false;
6092
-
6093
- return (pm_global_name_punctuation_hash[(i - 0x20) / 32] >> (i % 32)) & 1;
6094
- }
6095
-
6096
- static inline bool
6097
- token_is_setter_name(pm_token_t *token) {
6098
- return (
6099
- (token->type == PM_TOKEN_IDENTIFIER) &&
6100
- (token->end - token->start >= 2) &&
6101
- (token->end[-1] == '=')
6102
- );
6103
- }
6104
-
6105
6377
  /******************************************************************************/
6106
6378
  /* Stack helpers */
6107
6379
  /******************************************************************************/
@@ -6317,8 +6589,10 @@ parser_lex_magic_comment_encoding(pm_parser_t *parser) {
6317
6589
  */
6318
6590
  static void
6319
6591
  parser_lex_magic_comment_frozen_string_literal_value(pm_parser_t *parser, const uint8_t *start, const uint8_t *end) {
6320
- if (start + 4 <= end && pm_strncasecmp(start, (const uint8_t *) "true", 4) == 0) {
6592
+ if ((start + 4 <= end) && pm_strncasecmp(start, (const uint8_t *) "true", 4) == 0) {
6321
6593
  parser->frozen_string_literal = true;
6594
+ } else if ((start + 5 <= end) && pm_strncasecmp(start, (const uint8_t *) "false", 5) == 0) {
6595
+ parser->frozen_string_literal = false;
6322
6596
  }
6323
6597
  }
6324
6598
 
@@ -6541,21 +6815,27 @@ context_terminator(pm_context_t context, pm_token_t *token) {
6541
6815
  return token->type == PM_TOKEN_BRACE_RIGHT;
6542
6816
  case PM_CONTEXT_PREDICATE:
6543
6817
  return token->type == PM_TOKEN_KEYWORD_THEN || token->type == PM_TOKEN_NEWLINE || token->type == PM_TOKEN_SEMICOLON;
6818
+ case PM_CONTEXT_NONE:
6819
+ return false;
6544
6820
  }
6545
6821
 
6546
6822
  return false;
6547
6823
  }
6548
6824
 
6549
- static bool
6550
- context_recoverable(pm_parser_t *parser, pm_token_t *token) {
6825
+ /**
6826
+ * Returns the context that the given token is found to be terminating, or
6827
+ * returns PM_CONTEXT_NONE.
6828
+ */
6829
+ static pm_context_t
6830
+ context_recoverable(const pm_parser_t *parser, pm_token_t *token) {
6551
6831
  pm_context_node_t *context_node = parser->current_context;
6552
6832
 
6553
6833
  while (context_node != NULL) {
6554
- if (context_terminator(context_node->context, token)) return true;
6834
+ if (context_terminator(context_node->context, token)) return context_node->context;
6555
6835
  context_node = context_node->prev;
6556
6836
  }
6557
6837
 
6558
- return false;
6838
+ return PM_CONTEXT_NONE;
6559
6839
  }
6560
6840
 
6561
6841
  static bool
@@ -6583,7 +6863,7 @@ context_pop(pm_parser_t *parser) {
6583
6863
  }
6584
6864
 
6585
6865
  static bool
6586
- context_p(pm_parser_t *parser, pm_context_t context) {
6866
+ context_p(const pm_parser_t *parser, pm_context_t context) {
6587
6867
  pm_context_node_t *context_node = parser->current_context;
6588
6868
 
6589
6869
  while (context_node != NULL) {
@@ -6595,7 +6875,7 @@ context_p(pm_parser_t *parser, pm_context_t context) {
6595
6875
  }
6596
6876
 
6597
6877
  static bool
6598
- context_def_p(pm_parser_t *parser) {
6878
+ context_def_p(const pm_parser_t *parser) {
6599
6879
  pm_context_node_t *context_node = parser->current_context;
6600
6880
 
6601
6881
  while (context_node != NULL) {
@@ -6618,6 +6898,55 @@ context_def_p(pm_parser_t *parser) {
6618
6898
  return false;
6619
6899
  }
6620
6900
 
6901
+ /**
6902
+ * Returns a human readable string for the given context, used in error
6903
+ * messages.
6904
+ */
6905
+ static const char *
6906
+ context_human(pm_context_t context) {
6907
+ switch (context) {
6908
+ case PM_CONTEXT_NONE:
6909
+ assert(false && "unreachable");
6910
+ return "";
6911
+ case PM_CONTEXT_BEGIN: return "begin statement";
6912
+ case PM_CONTEXT_BLOCK_BRACES: return "'{'..'}' block";
6913
+ case PM_CONTEXT_BLOCK_KEYWORDS: return "'do'..'end' block";
6914
+ case PM_CONTEXT_CASE_WHEN: return "'when' clause";
6915
+ case PM_CONTEXT_CASE_IN: return "'in' clause";
6916
+ case PM_CONTEXT_CLASS: return "class definition";
6917
+ case PM_CONTEXT_DEF: return "method definition";
6918
+ case PM_CONTEXT_DEF_PARAMS: return "method parameters";
6919
+ case PM_CONTEXT_DEFAULT_PARAMS: return "parameter default value";
6920
+ case PM_CONTEXT_ELSE: return "'else' clause";
6921
+ case PM_CONTEXT_ELSIF: return "'elsif' clause";
6922
+ case PM_CONTEXT_EMBEXPR: return "embedded expression";
6923
+ case PM_CONTEXT_ENSURE: return "'ensure' clause";
6924
+ case PM_CONTEXT_ENSURE_DEF: return "'ensure' clause";
6925
+ case PM_CONTEXT_FOR: return "for loop";
6926
+ case PM_CONTEXT_FOR_INDEX: return "for loop index";
6927
+ case PM_CONTEXT_IF: return "if statement";
6928
+ case PM_CONTEXT_LAMBDA_BRACES: return "'{'..'}' lambda block";
6929
+ case PM_CONTEXT_LAMBDA_DO_END: return "'do'..'end' lambda block";
6930
+ case PM_CONTEXT_MAIN: return "top level context";
6931
+ case PM_CONTEXT_MODULE: return "module definition";
6932
+ case PM_CONTEXT_PARENS: return "parentheses";
6933
+ case PM_CONTEXT_POSTEXE: return "'END' block";
6934
+ case PM_CONTEXT_PREDICATE: return "predicate";
6935
+ case PM_CONTEXT_PREEXE: return "'BEGIN' block";
6936
+ case PM_CONTEXT_RESCUE_ELSE: return "'else' clause";
6937
+ case PM_CONTEXT_RESCUE_ELSE_DEF: return "'else' clause";
6938
+ case PM_CONTEXT_RESCUE: return "'rescue' clause";
6939
+ case PM_CONTEXT_RESCUE_DEF: return "'rescue' clause";
6940
+ case PM_CONTEXT_SCLASS: return "singleton class definition";
6941
+ case PM_CONTEXT_UNLESS: return "unless statement";
6942
+ case PM_CONTEXT_UNTIL: return "until statement";
6943
+ case PM_CONTEXT_WHILE: return "while statement";
6944
+ }
6945
+
6946
+ assert(false && "unreachable");
6947
+ return "";
6948
+ }
6949
+
6621
6950
  /******************************************************************************/
6622
6951
  /* Specific token lexers */
6623
6952
  /******************************************************************************/
@@ -7360,6 +7689,28 @@ escape_write_byte_encoded(pm_parser_t *parser, pm_buffer_t *buffer, uint8_t byte
7360
7689
  pm_buffer_append_byte(buffer, byte);
7361
7690
  }
7362
7691
 
7692
+ /**
7693
+ * Write each byte of the given escaped character into the buffer.
7694
+ */
7695
+ static inline void
7696
+ escape_write_escape_encoded(pm_parser_t *parser, pm_buffer_t *buffer) {
7697
+ size_t width;
7698
+ if (parser->encoding_changed) {
7699
+ width = parser->encoding->char_width(parser->current.end, parser->end - parser->current.end);
7700
+ } else {
7701
+ width = pm_encoding_utf_8_char_width(parser->current.end, parser->end - parser->current.end);
7702
+ }
7703
+
7704
+ // TODO: If the character is invalid in the given encoding, then we'll just
7705
+ // push one byte into the buffer. This should actually be an error.
7706
+ width = (width == 0) ? 1 : width;
7707
+
7708
+ for (size_t index = 0; index < width; index++) {
7709
+ escape_write_byte_encoded(parser, buffer, *parser->current.end);
7710
+ parser->current.end++;
7711
+ }
7712
+ }
7713
+
7363
7714
  /**
7364
7715
  * The regular expression engine doesn't support the same escape sequences as
7365
7716
  * Ruby does. So first we have to read the escape sequence, and then we have to
@@ -7698,7 +8049,7 @@ escape_read(pm_parser_t *parser, pm_buffer_t *buffer, uint8_t flags) {
7698
8049
  /* fallthrough */
7699
8050
  default: {
7700
8051
  if (parser->current.end < parser->end) {
7701
- escape_write_byte_encoded(parser, buffer, *parser->current.end++);
8052
+ escape_write_escape_encoded(parser, buffer);
7702
8053
  }
7703
8054
  return;
7704
8055
  }
@@ -7975,14 +8326,43 @@ typedef struct {
7975
8326
  * Push the given byte into the token buffer.
7976
8327
  */
7977
8328
  static inline void
7978
- pm_token_buffer_push(pm_token_buffer_t *token_buffer, uint8_t byte) {
8329
+ pm_token_buffer_push_byte(pm_token_buffer_t *token_buffer, uint8_t byte) {
7979
8330
  pm_buffer_append_byte(&token_buffer->buffer, byte);
7980
8331
  }
7981
8332
 
8333
+ /**
8334
+ * Append the given bytes into the token buffer.
8335
+ */
8336
+ static inline void
8337
+ pm_token_buffer_push_bytes(pm_token_buffer_t *token_buffer, const uint8_t *bytes, size_t length) {
8338
+ pm_buffer_append_bytes(&token_buffer->buffer, bytes, length);
8339
+ }
8340
+
8341
+ /**
8342
+ * Push an escaped character into the token buffer.
8343
+ */
8344
+ static inline void
8345
+ pm_token_buffer_push_escaped(pm_token_buffer_t *token_buffer, pm_parser_t *parser) {
8346
+ // First, determine the width of the character to be escaped.
8347
+ size_t width;
8348
+ if (parser->encoding_changed) {
8349
+ width = parser->encoding->char_width(parser->current.end, parser->end - parser->current.end);
8350
+ } else {
8351
+ width = pm_encoding_utf_8_char_width(parser->current.end, parser->end - parser->current.end);
8352
+ }
8353
+
8354
+ // TODO: If the character is invalid in the given encoding, then we'll just
8355
+ // push one byte into the buffer. This should actually be an error.
8356
+ width = (width == 0 ? 1 : width);
8357
+
8358
+ // Now, push the bytes into the buffer.
8359
+ pm_token_buffer_push_bytes(token_buffer, parser->current.end, width);
8360
+ parser->current.end += width;
8361
+ }
8362
+
7982
8363
  /**
7983
8364
  * When we're about to return from lexing the current token and we know for sure
7984
8365
  * that we have found an escape sequence, this function is called to copy the
7985
- *
7986
8366
  * contents of the token buffer into the current string on the parser so that it
7987
8367
  * can be attached to the correct node.
7988
8368
  */
@@ -7997,7 +8377,6 @@ pm_token_buffer_copy(pm_parser_t *parser, pm_token_buffer_t *token_buffer) {
7997
8377
  * string. If we haven't pushed anything into the buffer, this means that we
7998
8378
  * never found an escape sequence, so we can directly reference the bounds of
7999
8379
  * the current string. Either way, at the return of this function it is expected
8000
- *
8001
8380
  * that parser->current_string is established in such a way that it can be
8002
8381
  * attached to a node.
8003
8382
  */
@@ -8016,7 +8395,6 @@ pm_token_buffer_flush(pm_parser_t *parser, pm_token_buffer_t *token_buffer) {
8016
8395
  * point into the buffer because we're about to provide a string that has
8017
8396
  * different content than a direct slice of the source.
8018
8397
  *
8019
- *
8020
8398
  * It is expected that the parser's current token end will be pointing at one
8021
8399
  * byte past the backslash that starts the escape sequence.
8022
8400
  */
@@ -8070,6 +8448,34 @@ pm_heredoc_strspn_inline_whitespace(pm_parser_t *parser, const uint8_t **cursor,
8070
8448
  return whitespace;
8071
8449
  }
8072
8450
 
8451
+ /**
8452
+ * Lex past the delimiter of a percent literal. Handle newlines and heredocs
8453
+ * appropriately.
8454
+ */
8455
+ static uint8_t
8456
+ pm_lex_percent_delimiter(pm_parser_t *parser) {
8457
+ size_t eol_length = match_eol(parser);
8458
+
8459
+ if (eol_length) {
8460
+ if (parser->heredoc_end) {
8461
+ // If we have already lexed a heredoc, then the newline has already
8462
+ // been added to the list. In this case we want to just flush the
8463
+ // heredoc end.
8464
+ parser_flush_heredoc_end(parser);
8465
+ } else {
8466
+ // Otherwise, we'll add the newline to the list of newlines.
8467
+ pm_newline_list_append(&parser->newline_list, parser->current.end + eol_length - 1);
8468
+ }
8469
+
8470
+ const uint8_t delimiter = *parser->current.end;
8471
+ parser->current.end += eol_length;
8472
+
8473
+ return delimiter;
8474
+ }
8475
+
8476
+ return *parser->current.end++;
8477
+ }
8478
+
8073
8479
  /**
8074
8480
  * This is a convenience macro that will set the current token type, call the
8075
8481
  * lex callback, and then return from the parser_lex function.
@@ -8635,7 +9041,7 @@ parser_lex(pm_parser_t *parser) {
8635
9041
  // this is not a valid heredoc declaration. In this case we
8636
9042
  // will add an error, but we will still return a heredoc
8637
9043
  // start.
8638
- pm_parser_err_current(parser, PM_ERR_EMBDOC_TERM);
9044
+ pm_parser_err_current(parser, PM_ERR_HEREDOC_TERM);
8639
9045
  body_start = parser->end;
8640
9046
  } else {
8641
9047
  // Otherwise, we want to indicate that the body of the
@@ -8826,12 +9232,10 @@ parser_lex(pm_parser_t *parser) {
8826
9232
  LEX(PM_TOKEN_PLUS_EQUAL);
8827
9233
  }
8828
9234
 
8829
- bool spcarg = lex_state_spcarg_p(parser, space_seen);
8830
- if (spcarg) {
8831
- pm_parser_warn_token(parser, &parser->current, PM_WARN_AMBIGUOUS_FIRST_ARGUMENT_PLUS);
8832
- }
8833
-
8834
- if (lex_state_beg_p(parser) || spcarg) {
9235
+ if (
9236
+ lex_state_beg_p(parser) ||
9237
+ (lex_state_spcarg_p(parser, space_seen) ? (pm_parser_warn_token(parser, &parser->current, PM_WARN_AMBIGUOUS_FIRST_ARGUMENT_PLUS), true) : false)
9238
+ ) {
8835
9239
  lex_state_set(parser, PM_LEX_STATE_BEG);
8836
9240
 
8837
9241
  if (pm_char_is_decimal_digit(peek(parser))) {
@@ -8871,11 +9275,12 @@ parser_lex(pm_parser_t *parser) {
8871
9275
  }
8872
9276
 
8873
9277
  bool spcarg = lex_state_spcarg_p(parser, space_seen);
8874
- if (spcarg) {
9278
+ bool is_beg = lex_state_beg_p(parser);
9279
+ if (!is_beg && spcarg) {
8875
9280
  pm_parser_warn_token(parser, &parser->current, PM_WARN_AMBIGUOUS_FIRST_ARGUMENT_MINUS);
8876
9281
  }
8877
9282
 
8878
- if (lex_state_beg_p(parser) || spcarg) {
9283
+ if (is_beg || spcarg) {
8879
9284
  lex_state_set(parser, PM_LEX_STATE_BEG);
8880
9285
  LEX(pm_char_is_decimal_digit(peek(parser)) ? PM_TOKEN_UMINUS_NUM : PM_TOKEN_UMINUS);
8881
9286
  }
@@ -9026,15 +9431,8 @@ parser_lex(pm_parser_t *parser) {
9026
9431
  pm_parser_err_current(parser, PM_ERR_INVALID_PERCENT);
9027
9432
  }
9028
9433
 
9029
- lex_mode_push_string(parser, true, false, lex_mode_incrementor(*parser->current.end), lex_mode_terminator(*parser->current.end));
9030
-
9031
- size_t eol_length = match_eol(parser);
9032
- if (eol_length) {
9033
- parser->current.end += eol_length;
9034
- pm_newline_list_append(&parser->newline_list, parser->current.end - 1);
9035
- } else {
9036
- parser->current.end++;
9037
- }
9434
+ const uint8_t delimiter = pm_lex_percent_delimiter(parser);
9435
+ lex_mode_push_string(parser, true, false, lex_mode_incrementor(delimiter), lex_mode_terminator(delimiter));
9038
9436
 
9039
9437
  if (parser->current.end < parser->end) {
9040
9438
  LEX(PM_TOKEN_STRING_BEGIN);
@@ -9054,7 +9452,7 @@ parser_lex(pm_parser_t *parser) {
9054
9452
  parser->current.end++;
9055
9453
 
9056
9454
  if (parser->current.end < parser->end) {
9057
- lex_mode_push_list(parser, false, *parser->current.end++);
9455
+ lex_mode_push_list(parser, false, pm_lex_percent_delimiter(parser));
9058
9456
  } else {
9059
9457
  lex_mode_push_list_eof(parser);
9060
9458
  }
@@ -9065,7 +9463,7 @@ parser_lex(pm_parser_t *parser) {
9065
9463
  parser->current.end++;
9066
9464
 
9067
9465
  if (parser->current.end < parser->end) {
9068
- lex_mode_push_list(parser, true, *parser->current.end++);
9466
+ lex_mode_push_list(parser, true, pm_lex_percent_delimiter(parser));
9069
9467
  } else {
9070
9468
  lex_mode_push_list_eof(parser);
9071
9469
  }
@@ -9076,9 +9474,8 @@ parser_lex(pm_parser_t *parser) {
9076
9474
  parser->current.end++;
9077
9475
 
9078
9476
  if (parser->current.end < parser->end) {
9079
- lex_mode_push_regexp(parser, lex_mode_incrementor(*parser->current.end), lex_mode_terminator(*parser->current.end));
9080
- pm_newline_list_check_append(&parser->newline_list, parser->current.end);
9081
- parser->current.end++;
9477
+ const uint8_t delimiter = pm_lex_percent_delimiter(parser);
9478
+ lex_mode_push_regexp(parser, lex_mode_incrementor(delimiter), lex_mode_terminator(delimiter));
9082
9479
  } else {
9083
9480
  lex_mode_push_regexp(parser, '\0', '\0');
9084
9481
  }
@@ -9089,9 +9486,8 @@ parser_lex(pm_parser_t *parser) {
9089
9486
  parser->current.end++;
9090
9487
 
9091
9488
  if (parser->current.end < parser->end) {
9092
- lex_mode_push_string(parser, false, false, lex_mode_incrementor(*parser->current.end), lex_mode_terminator(*parser->current.end));
9093
- pm_newline_list_check_append(&parser->newline_list, parser->current.end);
9094
- parser->current.end++;
9489
+ const uint8_t delimiter = pm_lex_percent_delimiter(parser);
9490
+ lex_mode_push_string(parser, false, false, lex_mode_incrementor(delimiter), lex_mode_terminator(delimiter));
9095
9491
  } else {
9096
9492
  lex_mode_push_string_eof(parser);
9097
9493
  }
@@ -9102,9 +9498,8 @@ parser_lex(pm_parser_t *parser) {
9102
9498
  parser->current.end++;
9103
9499
 
9104
9500
  if (parser->current.end < parser->end) {
9105
- lex_mode_push_string(parser, true, false, lex_mode_incrementor(*parser->current.end), lex_mode_terminator(*parser->current.end));
9106
- pm_newline_list_check_append(&parser->newline_list, parser->current.end);
9107
- parser->current.end++;
9501
+ const uint8_t delimiter = pm_lex_percent_delimiter(parser);
9502
+ lex_mode_push_string(parser, true, false, lex_mode_incrementor(delimiter), lex_mode_terminator(delimiter));
9108
9503
  } else {
9109
9504
  lex_mode_push_string_eof(parser);
9110
9505
  }
@@ -9115,9 +9510,9 @@ parser_lex(pm_parser_t *parser) {
9115
9510
  parser->current.end++;
9116
9511
 
9117
9512
  if (parser->current.end < parser->end) {
9118
- lex_mode_push_string(parser, false, false, lex_mode_incrementor(*parser->current.end), lex_mode_terminator(*parser->current.end));
9513
+ const uint8_t delimiter = pm_lex_percent_delimiter(parser);
9514
+ lex_mode_push_string(parser, false, false, lex_mode_incrementor(delimiter), lex_mode_terminator(delimiter));
9119
9515
  lex_state_set(parser, PM_LEX_STATE_FNAME | PM_LEX_STATE_FITEM);
9120
- parser->current.end++;
9121
9516
  } else {
9122
9517
  lex_mode_push_string_eof(parser);
9123
9518
  }
@@ -9128,7 +9523,7 @@ parser_lex(pm_parser_t *parser) {
9128
9523
  parser->current.end++;
9129
9524
 
9130
9525
  if (parser->current.end < parser->end) {
9131
- lex_mode_push_list(parser, false, *parser->current.end++);
9526
+ lex_mode_push_list(parser, false, pm_lex_percent_delimiter(parser));
9132
9527
  } else {
9133
9528
  lex_mode_push_list_eof(parser);
9134
9529
  }
@@ -9139,7 +9534,7 @@ parser_lex(pm_parser_t *parser) {
9139
9534
  parser->current.end++;
9140
9535
 
9141
9536
  if (parser->current.end < parser->end) {
9142
- lex_mode_push_list(parser, true, *parser->current.end++);
9537
+ lex_mode_push_list(parser, true, pm_lex_percent_delimiter(parser));
9143
9538
  } else {
9144
9539
  lex_mode_push_list_eof(parser);
9145
9540
  }
@@ -9150,8 +9545,8 @@ parser_lex(pm_parser_t *parser) {
9150
9545
  parser->current.end++;
9151
9546
 
9152
9547
  if (parser->current.end < parser->end) {
9153
- lex_mode_push_string(parser, true, false, lex_mode_incrementor(*parser->current.end), lex_mode_terminator(*parser->current.end));
9154
- parser->current.end++;
9548
+ const uint8_t delimiter = pm_lex_percent_delimiter(parser);
9549
+ lex_mode_push_string(parser, true, false, lex_mode_incrementor(delimiter), lex_mode_terminator(delimiter));
9155
9550
  } else {
9156
9551
  lex_mode_push_string_eof(parser);
9157
9552
  }
@@ -9377,18 +9772,18 @@ parser_lex(pm_parser_t *parser) {
9377
9772
  case '\t':
9378
9773
  case '\v':
9379
9774
  case '\\':
9380
- pm_token_buffer_push(&token_buffer, peeked);
9775
+ pm_token_buffer_push_byte(&token_buffer, peeked);
9381
9776
  parser->current.end++;
9382
9777
  break;
9383
9778
  case '\r':
9384
9779
  parser->current.end++;
9385
9780
  if (peek(parser) != '\n') {
9386
- pm_token_buffer_push(&token_buffer, '\r');
9781
+ pm_token_buffer_push_byte(&token_buffer, '\r');
9387
9782
  break;
9388
9783
  }
9389
9784
  /* fallthrough */
9390
9785
  case '\n':
9391
- pm_token_buffer_push(&token_buffer, '\n');
9786
+ pm_token_buffer_push_byte(&token_buffer, '\n');
9392
9787
 
9393
9788
  if (parser->heredoc_end) {
9394
9789
  // ... if we are on the same line as a heredoc,
@@ -9406,14 +9801,13 @@ parser_lex(pm_parser_t *parser) {
9406
9801
  break;
9407
9802
  default:
9408
9803
  if (peeked == lex_mode->as.list.incrementor || peeked == lex_mode->as.list.terminator) {
9409
- pm_token_buffer_push(&token_buffer, peeked);
9804
+ pm_token_buffer_push_byte(&token_buffer, peeked);
9410
9805
  parser->current.end++;
9411
9806
  } else if (lex_mode->as.list.interpolation) {
9412
9807
  escape_read(parser, &token_buffer.buffer, PM_ESCAPE_FLAG_NONE);
9413
9808
  } else {
9414
- pm_token_buffer_push(&token_buffer, '\\');
9415
- pm_token_buffer_push(&token_buffer, peeked);
9416
- parser->current.end++;
9809
+ pm_token_buffer_push_byte(&token_buffer, '\\');
9810
+ pm_token_buffer_push_escaped(&token_buffer, parser);
9417
9811
  }
9418
9812
 
9419
9813
  break;
@@ -9571,9 +9965,9 @@ parser_lex(pm_parser_t *parser) {
9571
9965
  parser->current.end++;
9572
9966
  if (peek(parser) != '\n') {
9573
9967
  if (lex_mode->as.regexp.terminator != '\r') {
9574
- pm_token_buffer_push(&token_buffer, '\\');
9968
+ pm_token_buffer_push_byte(&token_buffer, '\\');
9575
9969
  }
9576
- pm_token_buffer_push(&token_buffer, '\r');
9970
+ pm_token_buffer_push_byte(&token_buffer, '\r');
9577
9971
  break;
9578
9972
  }
9579
9973
  /* fallthrough */
@@ -9608,20 +10002,19 @@ parser_lex(pm_parser_t *parser) {
9608
10002
  case '$': case ')': case '*': case '+':
9609
10003
  case '.': case '>': case '?': case ']':
9610
10004
  case '^': case '|': case '}':
9611
- pm_token_buffer_push(&token_buffer, '\\');
10005
+ pm_token_buffer_push_byte(&token_buffer, '\\');
9612
10006
  break;
9613
10007
  default:
9614
10008
  break;
9615
10009
  }
9616
10010
 
9617
- pm_token_buffer_push(&token_buffer, peeked);
10011
+ pm_token_buffer_push_byte(&token_buffer, peeked);
9618
10012
  parser->current.end++;
9619
10013
  break;
9620
10014
  }
9621
10015
 
9622
- if (peeked < 0x80) pm_token_buffer_push(&token_buffer, '\\');
9623
- pm_token_buffer_push(&token_buffer, peeked);
9624
- parser->current.end++;
10016
+ if (peeked < 0x80) pm_token_buffer_push_byte(&token_buffer, '\\');
10017
+ pm_token_buffer_push_escaped(&token_buffer, parser);
9625
10018
  break;
9626
10019
  }
9627
10020
 
@@ -9788,23 +10181,23 @@ parser_lex(pm_parser_t *parser) {
9788
10181
 
9789
10182
  switch (peeked) {
9790
10183
  case '\\':
9791
- pm_token_buffer_push(&token_buffer, '\\');
10184
+ pm_token_buffer_push_byte(&token_buffer, '\\');
9792
10185
  parser->current.end++;
9793
10186
  break;
9794
10187
  case '\r':
9795
10188
  parser->current.end++;
9796
10189
  if (peek(parser) != '\n') {
9797
10190
  if (!lex_mode->as.string.interpolation) {
9798
- pm_token_buffer_push(&token_buffer, '\\');
10191
+ pm_token_buffer_push_byte(&token_buffer, '\\');
9799
10192
  }
9800
- pm_token_buffer_push(&token_buffer, '\r');
10193
+ pm_token_buffer_push_byte(&token_buffer, '\r');
9801
10194
  break;
9802
10195
  }
9803
10196
  /* fallthrough */
9804
10197
  case '\n':
9805
10198
  if (!lex_mode->as.string.interpolation) {
9806
- pm_token_buffer_push(&token_buffer, '\\');
9807
- pm_token_buffer_push(&token_buffer, '\n');
10199
+ pm_token_buffer_push_byte(&token_buffer, '\\');
10200
+ pm_token_buffer_push_byte(&token_buffer, '\n');
9808
10201
  }
9809
10202
 
9810
10203
  if (parser->heredoc_end) {
@@ -9823,17 +10216,16 @@ parser_lex(pm_parser_t *parser) {
9823
10216
  break;
9824
10217
  default:
9825
10218
  if (lex_mode->as.string.incrementor != '\0' && peeked == lex_mode->as.string.incrementor) {
9826
- pm_token_buffer_push(&token_buffer, peeked);
10219
+ pm_token_buffer_push_byte(&token_buffer, peeked);
9827
10220
  parser->current.end++;
9828
10221
  } else if (lex_mode->as.string.terminator != '\0' && peeked == lex_mode->as.string.terminator) {
9829
- pm_token_buffer_push(&token_buffer, peeked);
10222
+ pm_token_buffer_push_byte(&token_buffer, peeked);
9830
10223
  parser->current.end++;
9831
10224
  } else if (lex_mode->as.string.interpolation) {
9832
10225
  escape_read(parser, &token_buffer.buffer, PM_ESCAPE_FLAG_NONE);
9833
10226
  } else {
9834
- pm_token_buffer_push(&token_buffer, '\\');
9835
- pm_token_buffer_push(&token_buffer, peeked);
9836
- parser->current.end++;
10227
+ pm_token_buffer_push_byte(&token_buffer, '\\');
10228
+ pm_token_buffer_push_escaped(&token_buffer, parser);
9837
10229
  }
9838
10230
 
9839
10231
  break;
@@ -9888,15 +10280,22 @@ parser_lex(pm_parser_t *parser) {
9888
10280
  parser->next_start = NULL;
9889
10281
  }
9890
10282
 
9891
- // We'll check if we're at the end of the file. If we are, then we need to
9892
- // return the EOF token.
10283
+ // Now let's grab the information about the identifier off of the
10284
+ // current lex mode.
10285
+ pm_lex_mode_t *lex_mode = parser->lex_modes.current;
10286
+
10287
+ // We'll check if we're at the end of the file. If we are, then we
10288
+ // will add an error (because we weren't able to find the
10289
+ // terminator) but still continue parsing so that content after the
10290
+ // declaration of the heredoc can be parsed.
9893
10291
  if (parser->current.end >= parser->end) {
9894
- LEX(PM_TOKEN_EOF);
10292
+ pm_parser_err_current(parser, PM_ERR_HEREDOC_TERM);
10293
+ parser->next_start = lex_mode->as.heredoc.next_start;
10294
+ parser->heredoc_end = parser->current.end;
10295
+ lex_state_set(parser, PM_LEX_STATE_END);
10296
+ LEX(PM_TOKEN_HEREDOC_END);
9895
10297
  }
9896
10298
 
9897
- // Now let's grab the information about the identifier off of the current
9898
- // lex mode.
9899
- pm_lex_mode_t *lex_mode = parser->lex_modes.current;
9900
10299
  const uint8_t *ident_start = lex_mode->as.heredoc.ident_start;
9901
10300
  size_t ident_length = lex_mode->as.heredoc.ident_length;
9902
10301
 
@@ -10083,21 +10482,20 @@ parser_lex(pm_parser_t *parser) {
10083
10482
  case '\r':
10084
10483
  parser->current.end++;
10085
10484
  if (peek(parser) != '\n') {
10086
- pm_token_buffer_push(&token_buffer, '\\');
10087
- pm_token_buffer_push(&token_buffer, '\r');
10485
+ pm_token_buffer_push_byte(&token_buffer, '\\');
10486
+ pm_token_buffer_push_byte(&token_buffer, '\r');
10088
10487
  break;
10089
10488
  }
10090
10489
  /* fallthrough */
10091
10490
  case '\n':
10092
- pm_token_buffer_push(&token_buffer, '\\');
10093
- pm_token_buffer_push(&token_buffer, '\n');
10491
+ pm_token_buffer_push_byte(&token_buffer, '\\');
10492
+ pm_token_buffer_push_byte(&token_buffer, '\n');
10094
10493
  token_buffer.cursor = parser->current.end + 1;
10095
10494
  breakpoint = parser->current.end;
10096
10495
  continue;
10097
10496
  default:
10098
- parser->current.end++;
10099
- pm_token_buffer_push(&token_buffer, '\\');
10100
- pm_token_buffer_push(&token_buffer, peeked);
10497
+ pm_token_buffer_push_byte(&token_buffer, '\\');
10498
+ pm_token_buffer_push_escaped(&token_buffer, parser);
10101
10499
  break;
10102
10500
  }
10103
10501
  } else {
@@ -10105,7 +10503,7 @@ parser_lex(pm_parser_t *parser) {
10105
10503
  case '\r':
10106
10504
  parser->current.end++;
10107
10505
  if (peek(parser) != '\n') {
10108
- pm_token_buffer_push(&token_buffer, '\r');
10506
+ pm_token_buffer_push_byte(&token_buffer, '\r');
10109
10507
  break;
10110
10508
  }
10111
10509
  /* fallthrough */
@@ -10184,8 +10582,8 @@ parser_lex(pm_parser_t *parser) {
10184
10582
  typedef enum {
10185
10583
  PM_BINDING_POWER_UNSET = 0, // used to indicate this token cannot be used as an infix operator
10186
10584
  PM_BINDING_POWER_STATEMENT = 2,
10187
- PM_BINDING_POWER_MODIFIER = 4, // if unless until while
10188
- PM_BINDING_POWER_MODIFIER_RESCUE = 6, // rescue
10585
+ PM_BINDING_POWER_MODIFIER_RESCUE = 4, // rescue
10586
+ PM_BINDING_POWER_MODIFIER = 6, // if unless until while
10189
10587
  PM_BINDING_POWER_COMPOSITION = 8, // and or
10190
10588
  PM_BINDING_POWER_NOT = 10, // not
10191
10589
  PM_BINDING_POWER_MATCH = 12, // => in
@@ -10239,15 +10637,15 @@ typedef struct {
10239
10637
  #define RIGHT_ASSOCIATIVE_UNARY(precedence) { precedence, precedence, false, false }
10240
10638
 
10241
10639
  pm_binding_powers_t pm_binding_powers[PM_TOKEN_MAXIMUM] = {
10640
+ // rescue
10641
+ [PM_TOKEN_KEYWORD_RESCUE_MODIFIER] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_MODIFIER_RESCUE),
10642
+
10242
10643
  // if unless until while
10243
10644
  [PM_TOKEN_KEYWORD_IF_MODIFIER] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_MODIFIER),
10244
10645
  [PM_TOKEN_KEYWORD_UNLESS_MODIFIER] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_MODIFIER),
10245
10646
  [PM_TOKEN_KEYWORD_UNTIL_MODIFIER] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_MODIFIER),
10246
10647
  [PM_TOKEN_KEYWORD_WHILE_MODIFIER] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_MODIFIER),
10247
10648
 
10248
- // rescue
10249
- [PM_TOKEN_KEYWORD_RESCUE_MODIFIER] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_MODIFIER_RESCUE),
10250
-
10251
10649
  // and or
10252
10650
  [PM_TOKEN_KEYWORD_AND] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_COMPOSITION),
10253
10651
  [PM_TOKEN_KEYWORD_OR] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_COMPOSITION),
@@ -10377,16 +10775,8 @@ match3(const pm_parser_t *parser, pm_token_type_t type1, pm_token_type_t type2,
10377
10775
  * Returns true if the current token is any of the four given types.
10378
10776
  */
10379
10777
  static inline bool
10380
- match4(const pm_parser_t *parser, pm_token_type_t type1, pm_token_type_t type2, pm_token_type_t type3, pm_token_type_t type4) {
10381
- return match1(parser, type1) || match1(parser, type2) || match1(parser, type3) || match1(parser, type4);
10382
- }
10383
-
10384
- /**
10385
- * Returns true if the current token is any of the five given types.
10386
- */
10387
- static inline bool
10388
- match5(const pm_parser_t *parser, pm_token_type_t type1, pm_token_type_t type2, pm_token_type_t type3, pm_token_type_t type4, pm_token_type_t type5) {
10389
- return match1(parser, type1) || match1(parser, type2) || match1(parser, type3) || match1(parser, type4) || match1(parser, type5);
10778
+ match4(const pm_parser_t *parser, pm_token_type_t type1, pm_token_type_t type2, pm_token_type_t type3, pm_token_type_t type4) {
10779
+ return match1(parser, type1) || match1(parser, type2) || match1(parser, type3) || match1(parser, type4);
10390
10780
  }
10391
10781
 
10392
10782
  /**
@@ -10866,7 +11256,7 @@ parse_write(pm_parser_t *parser, pm_node_t *target, pm_token_t *operator, pm_nod
10866
11256
  return target;
10867
11257
  }
10868
11258
 
10869
- if (*call->message_loc.start == '_' || parser->encoding->alnum_char(call->message_loc.start, call->message_loc.end - call->message_loc.start)) {
11259
+ if (char_is_identifier_start(parser, call->message_loc.start)) {
10870
11260
  // When we get here, we have a method call, because it was
10871
11261
  // previously marked as a method call but now we have an =. This
10872
11262
  // looks like:
@@ -10984,6 +11374,7 @@ parse_targets(pm_parser_t *parser, pm_node_t *first_target, pm_binding_power_t b
10984
11374
  static pm_node_t *
10985
11375
  parse_targets_validate(pm_parser_t *parser, pm_node_t *first_target, pm_binding_power_t binding_power) {
10986
11376
  pm_node_t *result = parse_targets(parser, first_target, binding_power);
11377
+ accept1(parser, PM_TOKEN_NEWLINE);
10987
11378
 
10988
11379
  // Ensure that we have either an = or a ) after the targets.
10989
11380
  if (!match2(parser, PM_TOKEN_EQUAL, PM_TOKEN_PARENTHESIS_RIGHT)) {
@@ -11024,7 +11415,7 @@ parse_statements(pm_parser_t *parser, pm_context_t context) {
11024
11415
  break;
11025
11416
  }
11026
11417
 
11027
- // If we have a terminator, then we will parse all consequtive terminators
11418
+ // If we have a terminator, then we will parse all consecutive terminators
11028
11419
  // and then continue parsing the statements list.
11029
11420
  if (accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON)) {
11030
11421
  // If we have a terminator, then we will continue parsing the statements
@@ -11084,8 +11475,9 @@ parse_assocs(pm_parser_t *parser, pm_node_t *node) {
11084
11475
 
11085
11476
  if (token_begins_expression_p(parser->current.type)) {
11086
11477
  value = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, false, PM_ERR_EXPECT_EXPRESSION_AFTER_SPLAT_HASH);
11087
- } else if (pm_parser_local_depth(parser, &operator) == -1) {
11088
- pm_parser_err_token(parser, &operator, PM_ERR_EXPECT_EXPRESSION_AFTER_SPLAT_HASH);
11478
+ }
11479
+ else {
11480
+ pm_parser_scope_forwarding_keywords_check(parser, &operator);
11089
11481
  }
11090
11482
 
11091
11483
  element = (pm_node_t *) pm_assoc_splat_node_create(parser, value, &operator);
@@ -11234,13 +11626,8 @@ parse_arguments(pm_parser_t *parser, pm_arguments_t *arguments, bool accepts_for
11234
11626
  if (token_begins_expression_p(parser->current.type)) {
11235
11627
  expression = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, false, PM_ERR_EXPECT_ARGUMENT);
11236
11628
  } else {
11237
- if (pm_parser_local_depth(parser, &operator) == -1) {
11238
- // A block forwarding in a method having `...` parameter (e.g. `def foo(...); bar(&); end`) is available.
11239
- pm_constant_id_t ellipsis_id = pm_parser_constant_id_constant(parser, "...", 3);
11240
- if (pm_parser_local_depth_constant_id(parser, ellipsis_id) == -1) {
11241
- pm_parser_err_token(parser, &operator, PM_ERR_ARGUMENT_NO_FORWARDING_AMP);
11242
- }
11243
- }
11629
+ // A block forwarding in a method having `...` parameter (e.g. `def foo(...); bar(&); end`) is available.
11630
+ pm_parser_scope_forwarding_block_check(parser, &operator);
11244
11631
  }
11245
11632
 
11246
11633
  argument = (pm_node_t *) pm_block_argument_node_create(parser, &operator, expression);
@@ -11258,10 +11645,7 @@ parse_arguments(pm_parser_t *parser, pm_arguments_t *arguments, bool accepts_for
11258
11645
  pm_token_t operator = parser->previous;
11259
11646
 
11260
11647
  if (match4(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_TOKEN_COMMA, PM_TOKEN_SEMICOLON, PM_TOKEN_BRACKET_RIGHT)) {
11261
- if (pm_parser_local_depth(parser, &parser->previous) == -1) {
11262
- pm_parser_err_token(parser, &operator, PM_ERR_ARGUMENT_NO_FORWARDING_STAR);
11263
- }
11264
-
11648
+ pm_parser_scope_forwarding_positionals_check(parser, &operator);
11265
11649
  argument = (pm_node_t *) pm_splat_node_create(parser, &operator, NULL);
11266
11650
  } else {
11267
11651
  pm_node_t *expression = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, false, PM_ERR_EXPECT_EXPRESSION_AFTER_SPLAT);
@@ -11287,15 +11671,14 @@ parse_arguments(pm_parser_t *parser, pm_arguments_t *arguments, bool accepts_for
11287
11671
  pm_node_t *right = parse_expression(parser, PM_BINDING_POWER_RANGE, false, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR);
11288
11672
  argument = (pm_node_t *) pm_range_node_create(parser, NULL, &operator, right);
11289
11673
  } else {
11290
- if (pm_parser_local_depth(parser, &parser->previous) == -1) {
11291
- pm_parser_err_previous(parser, PM_ERR_ARGUMENT_NO_FORWARDING_ELLIPSES);
11292
- }
11674
+ pm_parser_scope_forwarding_all_check(parser, &parser->previous);
11293
11675
  if (parsed_first_argument && terminator == PM_TOKEN_EOF) {
11294
11676
  pm_parser_err_previous(parser, PM_ERR_ARGUMENT_FORWARDING_UNBOUND);
11295
11677
  }
11296
11678
 
11297
11679
  argument = (pm_node_t *) pm_forwarding_arguments_node_create(parser, &parser->previous);
11298
11680
  parse_arguments_append(parser, arguments, argument);
11681
+ arguments->has_forwarding = true;
11299
11682
  parsed_forwarding_arguments = true;
11300
11683
  break;
11301
11684
  }
@@ -11338,6 +11721,9 @@ parse_arguments(pm_parser_t *parser, pm_arguments_t *arguments, bool accepts_for
11338
11721
  }
11339
11722
 
11340
11723
  parsed_bare_hash = true;
11724
+ } else if (accept1(parser, PM_TOKEN_KEYWORD_IN)) {
11725
+ // TODO: Could we solve this with binding powers instead?
11726
+ pm_parser_err_current(parser, PM_ERR_ARGUMENT_IN);
11341
11727
  }
11342
11728
 
11343
11729
  parse_arguments_append(parser, arguments, argument);
@@ -11414,7 +11800,9 @@ parse_required_destructured_parameter(pm_parser_t *parser) {
11414
11800
  if (accept1(parser, PM_TOKEN_IDENTIFIER)) {
11415
11801
  pm_token_t name = parser->previous;
11416
11802
  value = (pm_node_t *) pm_required_parameter_node_create(parser, &name);
11417
- pm_parser_parameter_name_check(parser, &name);
11803
+ if (pm_parser_parameter_name_check(parser, &name)) {
11804
+ pm_node_flag_set_repeated_parameter(value);
11805
+ }
11418
11806
  pm_parser_local_add_token(parser, &name);
11419
11807
  }
11420
11808
 
@@ -11424,7 +11812,9 @@ parse_required_destructured_parameter(pm_parser_t *parser) {
11424
11812
  pm_token_t name = parser->previous;
11425
11813
 
11426
11814
  param = (pm_node_t *) pm_required_parameter_node_create(parser, &name);
11427
- pm_parser_parameter_name_check(parser, &name);
11815
+ if (pm_parser_parameter_name_check(parser, &name)) {
11816
+ pm_node_flag_set_repeated_parameter(param);
11817
+ }
11428
11818
  pm_parser_local_add_token(parser, &name);
11429
11819
  }
11430
11820
 
@@ -11541,19 +11931,20 @@ parse_parameters(
11541
11931
  pm_token_t operator = parser->previous;
11542
11932
  pm_token_t name;
11543
11933
 
11934
+ bool repeated = false;
11544
11935
  if (accept1(parser, PM_TOKEN_IDENTIFIER)) {
11545
11936
  name = parser->previous;
11546
- pm_parser_parameter_name_check(parser, &name);
11937
+ repeated = pm_parser_parameter_name_check(parser, &name);
11547
11938
  pm_parser_local_add_token(parser, &name);
11548
11939
  } else {
11549
11940
  name = not_provided(parser);
11550
-
11551
- if (allows_forwarding_parameters) {
11552
- pm_parser_local_add_token(parser, &operator);
11553
- }
11941
+ parser->current_scope->forwarding_params |= PM_FORWARDING_BLOCK;
11554
11942
  }
11555
11943
 
11556
11944
  pm_block_parameter_node_t *param = pm_block_parameter_node_create(parser, &name, &operator);
11945
+ if (repeated) {
11946
+ pm_node_flag_set_repeated_parameter((pm_node_t *)param);
11947
+ }
11557
11948
  if (params->block == NULL) {
11558
11949
  pm_parameters_node_block_set(params, param);
11559
11950
  } else {
@@ -11572,9 +11963,8 @@ parse_parameters(
11572
11963
  update_parameter_state(parser, &parser->current, &order);
11573
11964
  parser_lex(parser);
11574
11965
 
11575
- if (allows_forwarding_parameters) {
11576
- pm_parser_local_add_token(parser, &parser->previous);
11577
- }
11966
+ parser->current_scope->forwarding_params |= PM_FORWARDING_BLOCK;
11967
+ parser->current_scope->forwarding_params |= PM_FORWARDING_ALL;
11578
11968
 
11579
11969
  pm_forwarding_parameter_node_t *param = pm_forwarding_parameter_node_create(parser, &parser->previous);
11580
11970
  if (params->keyword_rest != NULL) {
@@ -11626,20 +12016,23 @@ parse_parameters(
11626
12016
  }
11627
12017
 
11628
12018
  pm_token_t name = parser->previous;
11629
- pm_parser_parameter_name_check(parser, &name);
12019
+ bool repeated = pm_parser_parameter_name_check(parser, &name);
11630
12020
  pm_parser_local_add_token(parser, &name);
11631
12021
 
11632
12022
  if (accept1(parser, PM_TOKEN_EQUAL)) {
11633
12023
  pm_token_t operator = parser->previous;
11634
12024
  context_push(parser, PM_CONTEXT_DEFAULT_PARAMS);
11635
- pm_constant_id_t old_param_name = parser->current_param_name;
11636
- parser->current_param_name = pm_parser_constant_id_token(parser, &name);
12025
+
12026
+ pm_constant_id_t saved_param_name = pm_parser_current_param_name_set(parser, pm_parser_constant_id_token(parser, &name));
11637
12027
  pm_node_t *value = parse_value_expression(parser, binding_power, false, PM_ERR_PARAMETER_NO_DEFAULT);
11638
12028
 
11639
12029
  pm_optional_parameter_node_t *param = pm_optional_parameter_node_create(parser, &name, &operator, value);
12030
+ if (repeated) {
12031
+ pm_node_flag_set_repeated_parameter((pm_node_t *)param);
12032
+ }
11640
12033
  pm_parameters_node_optionals_append(params, param);
11641
12034
 
11642
- parser->current_param_name = old_param_name;
12035
+ pm_parser_current_param_name_restore(parser, saved_param_name);
11643
12036
  context_pop(parser);
11644
12037
 
11645
12038
  // If parsing the value of the parameter resulted in error recovery,
@@ -11651,9 +12044,15 @@ parse_parameters(
11651
12044
  }
11652
12045
  } else if (order > PM_PARAMETERS_ORDER_AFTER_OPTIONAL) {
11653
12046
  pm_required_parameter_node_t *param = pm_required_parameter_node_create(parser, &name);
12047
+ if (repeated) {
12048
+ pm_node_flag_set_repeated_parameter((pm_node_t *)param);
12049
+ }
11654
12050
  pm_parameters_node_requireds_append(params, (pm_node_t *) param);
11655
12051
  } else {
11656
12052
  pm_required_parameter_node_t *param = pm_required_parameter_node_create(parser, &name);
12053
+ if (repeated) {
12054
+ pm_node_flag_set_repeated_parameter((pm_node_t *)param);
12055
+ }
11657
12056
  pm_parameters_node_posts_append(params, (pm_node_t *) param);
11658
12057
  }
11659
12058
 
@@ -11668,7 +12067,7 @@ parse_parameters(
11668
12067
  pm_token_t local = name;
11669
12068
  local.end -= 1;
11670
12069
 
11671
- pm_parser_parameter_name_check(parser, &local);
12070
+ bool repeated = pm_parser_parameter_name_check(parser, &local);
11672
12071
  pm_parser_local_add_token(parser, &local);
11673
12072
 
11674
12073
  switch (parser->current.type) {
@@ -11676,6 +12075,9 @@ parse_parameters(
11676
12075
  case PM_TOKEN_PARENTHESIS_RIGHT:
11677
12076
  case PM_TOKEN_PIPE: {
11678
12077
  pm_node_t *param = (pm_node_t *) pm_required_keyword_parameter_node_create(parser, &name);
12078
+ if (repeated) {
12079
+ pm_node_flag_set_repeated_parameter(param);
12080
+ }
11679
12081
  pm_parameters_node_keywords_append(params, param);
11680
12082
  break;
11681
12083
  }
@@ -11687,6 +12089,9 @@ parse_parameters(
11687
12089
  }
11688
12090
 
11689
12091
  pm_node_t *param = (pm_node_t *) pm_required_keyword_parameter_node_create(parser, &name);
12092
+ if (repeated) {
12093
+ pm_node_flag_set_repeated_parameter(param);
12094
+ }
11690
12095
  pm_parameters_node_keywords_append(params, param);
11691
12096
  break;
11692
12097
  }
@@ -11695,17 +12100,22 @@ parse_parameters(
11695
12100
 
11696
12101
  if (token_begins_expression_p(parser->current.type)) {
11697
12102
  context_push(parser, PM_CONTEXT_DEFAULT_PARAMS);
11698
- pm_constant_id_t old_param_name = parser->current_param_name;
11699
- parser->current_param_name = pm_parser_constant_id_token(parser, &local);
12103
+
12104
+ pm_constant_id_t saved_param_name = pm_parser_current_param_name_set(parser, pm_parser_constant_id_token(parser, &local));
11700
12105
  pm_node_t *value = parse_value_expression(parser, binding_power, false, PM_ERR_PARAMETER_NO_DEFAULT_KW);
11701
- parser->current_param_name = old_param_name;
12106
+
12107
+ pm_parser_current_param_name_restore(parser, saved_param_name);
11702
12108
  context_pop(parser);
12109
+
11703
12110
  param = (pm_node_t *) pm_optional_keyword_parameter_node_create(parser, &name, value);
11704
12111
  }
11705
12112
  else {
11706
12113
  param = (pm_node_t *) pm_required_keyword_parameter_node_create(parser, &name);
11707
12114
  }
11708
12115
 
12116
+ if (repeated) {
12117
+ pm_node_flag_set_repeated_parameter(param);
12118
+ }
11709
12119
  pm_parameters_node_keywords_append(params, param);
11710
12120
 
11711
12121
  // If parsing the value of the parameter resulted in error recovery,
@@ -11728,20 +12138,21 @@ parse_parameters(
11728
12138
 
11729
12139
  pm_token_t operator = parser->previous;
11730
12140
  pm_token_t name;
11731
-
12141
+ bool repeated = false;
11732
12142
  if (accept1(parser, PM_TOKEN_IDENTIFIER)) {
11733
12143
  name = parser->previous;
11734
- pm_parser_parameter_name_check(parser, &name);
12144
+ repeated = pm_parser_parameter_name_check(parser, &name);
11735
12145
  pm_parser_local_add_token(parser, &name);
11736
12146
  } else {
11737
12147
  name = not_provided(parser);
11738
12148
 
11739
- if (allows_forwarding_parameters) {
11740
- pm_parser_local_add_token(parser, &operator);
11741
- }
12149
+ parser->current_scope->forwarding_params |= PM_FORWARDING_POSITIONALS;
11742
12150
  }
11743
12151
 
11744
12152
  pm_node_t *param = (pm_node_t *) pm_rest_parameter_node_create(parser, &operator, &name);
12153
+ if (repeated) {
12154
+ pm_node_flag_set_repeated_parameter(param);
12155
+ }
11745
12156
  if (params->rest == NULL) {
11746
12157
  pm_parameters_node_rest_set(params, param);
11747
12158
  } else {
@@ -11764,19 +12175,21 @@ parse_parameters(
11764
12175
  } else {
11765
12176
  pm_token_t name;
11766
12177
 
12178
+ bool repeated = false;
11767
12179
  if (accept1(parser, PM_TOKEN_IDENTIFIER)) {
11768
12180
  name = parser->previous;
11769
- pm_parser_parameter_name_check(parser, &name);
12181
+ repeated = pm_parser_parameter_name_check(parser, &name);
11770
12182
  pm_parser_local_add_token(parser, &name);
11771
12183
  } else {
11772
12184
  name = not_provided(parser);
11773
12185
 
11774
- if (allows_forwarding_parameters) {
11775
- pm_parser_local_add_token(parser, &operator);
11776
- }
12186
+ parser->current_scope->forwarding_params |= PM_FORWARDING_KEYWORDS;
11777
12187
  }
11778
12188
 
11779
12189
  param = (pm_node_t *) pm_keyword_rest_parameter_node_create(parser, &operator, &name);
12190
+ if (repeated) {
12191
+ pm_node_flag_set_repeated_parameter(param);
12192
+ }
11780
12193
  }
11781
12194
 
11782
12195
  if (params->keyword_rest == NULL) {
@@ -12012,10 +12425,13 @@ parse_block_parameters(
12012
12425
  if ((opening->type != PM_TOKEN_NOT_PROVIDED) && accept1(parser, PM_TOKEN_SEMICOLON)) {
12013
12426
  do {
12014
12427
  expect1(parser, PM_TOKEN_IDENTIFIER, PM_ERR_BLOCK_PARAM_LOCAL_VARIABLE);
12015
- pm_parser_parameter_name_check(parser, &parser->previous);
12428
+ bool repeated = pm_parser_parameter_name_check(parser, &parser->previous);
12016
12429
  pm_parser_local_add_token(parser, &parser->previous);
12017
12430
 
12018
12431
  pm_block_local_variable_node_t *local = pm_block_local_variable_node_create(parser, &parser->previous);
12432
+ if (repeated) {
12433
+ pm_node_flag_set_repeated_parameter((pm_node_t *)local);
12434
+ }
12019
12435
  pm_block_parameters_node_append_local(block_parameters, local);
12020
12436
  } while (accept1(parser, PM_TOKEN_COMMA));
12021
12437
  }
@@ -12031,8 +12447,10 @@ parse_block(pm_parser_t *parser) {
12031
12447
  pm_token_t opening = parser->previous;
12032
12448
  accept1(parser, PM_TOKEN_NEWLINE);
12033
12449
 
12450
+ pm_constant_id_t saved_param_name = pm_parser_current_param_name_unset(parser);
12034
12451
  pm_accepts_block_stack_push(parser, true);
12035
12452
  pm_parser_scope_push(parser, false);
12453
+
12036
12454
  pm_block_parameters_node_t *block_parameters = NULL;
12037
12455
 
12038
12456
  if (accept1(parser, PM_TOKEN_PIPE)) {
@@ -12053,12 +12471,6 @@ parse_block(pm_parser_t *parser) {
12053
12471
  pm_block_parameters_node_closing_set(block_parameters, &parser->previous);
12054
12472
  }
12055
12473
 
12056
- uint32_t locals_body_index = 0;
12057
-
12058
- if (block_parameters) {
12059
- locals_body_index = (uint32_t) parser->current_scope->locals.size;
12060
- }
12061
-
12062
12474
  accept1(parser, PM_TOKEN_NEWLINE);
12063
12475
  pm_node_t *statements = NULL;
12064
12476
 
@@ -12090,13 +12502,14 @@ parse_block(pm_parser_t *parser) {
12090
12502
 
12091
12503
  if (parameters == NULL && (maximum > 0)) {
12092
12504
  parameters = (pm_node_t *) pm_numbered_parameters_node_create(parser, &(pm_location_t) { .start = opening.start, .end = parser->previous.end }, maximum);
12093
- locals_body_index = maximum;
12094
12505
  }
12095
12506
 
12096
12507
  pm_constant_id_list_t locals = parser->current_scope->locals;
12097
12508
  pm_parser_scope_pop(parser);
12098
12509
  pm_accepts_block_stack_pop(parser);
12099
- return pm_block_node_create(parser, &locals, locals_body_index, &opening, parameters, statements, &parser->previous);
12510
+ pm_parser_current_param_name_restore(parser, saved_param_name);
12511
+
12512
+ return pm_block_node_create(parser, &locals, &opening, parameters, statements, &parser->previous);
12100
12513
  }
12101
12514
 
12102
12515
  /**
@@ -12157,14 +12570,20 @@ parse_arguments_list(pm_parser_t *parser, pm_arguments_t *arguments, bool accept
12157
12570
  }
12158
12571
 
12159
12572
  if (block != NULL) {
12160
- if (arguments->block == NULL) {
12573
+ if (arguments->block == NULL && !arguments->has_forwarding) {
12161
12574
  arguments->block = (pm_node_t *) block;
12162
12575
  } else {
12163
- pm_parser_err_node(parser, (pm_node_t *) block, PM_ERR_ARGUMENT_BLOCK_MULTI);
12164
- if (arguments->arguments == NULL) {
12165
- arguments->arguments = pm_arguments_node_create(parser);
12576
+ if (arguments->has_forwarding) {
12577
+ pm_parser_err_node(parser, (pm_node_t *) block, PM_ERR_ARGUMENT_BLOCK_FORWARDING);
12578
+ } else {
12579
+ pm_parser_err_node(parser, (pm_node_t *) block, PM_ERR_ARGUMENT_BLOCK_MULTI);
12580
+ }
12581
+ if (arguments->block != NULL) {
12582
+ if (arguments->arguments == NULL) {
12583
+ arguments->arguments = pm_arguments_node_create(parser);
12584
+ }
12585
+ pm_arguments_node_arguments_append(arguments->arguments, arguments->block);
12166
12586
  }
12167
- pm_arguments_node_arguments_append(arguments->arguments, arguments->block);
12168
12587
  arguments->block = (pm_node_t *) block;
12169
12588
  }
12170
12589
  }
@@ -12384,8 +12803,14 @@ static inline pm_node_flags_t
12384
12803
  parse_unescaped_encoding(const pm_parser_t *parser) {
12385
12804
  if (parser->explicit_encoding != NULL) {
12386
12805
  if (parser->explicit_encoding == PM_ENCODING_UTF_8_ENTRY) {
12806
+ // If the there's an explicit encoding and it's using a UTF-8 escape
12807
+ // sequence, then mark the string as UTF-8.
12387
12808
  return PM_STRING_FLAGS_FORCED_UTF8_ENCODING;
12388
12809
  } else if (parser->encoding == PM_ENCODING_US_ASCII_ENTRY) {
12810
+ // If there's a non-UTF-8 escape sequence being used, then the
12811
+ // string uses the source encoding, unless the source is marked as
12812
+ // US-ASCII. In that case the string is forced as ASCII-8BIT in
12813
+ // order to keep the string valid.
12389
12814
  return PM_STRING_FLAGS_FORCED_BINARY_ENCODING;
12390
12815
  }
12391
12816
  }
@@ -12509,14 +12934,54 @@ parse_string_part(pm_parser_t *parser) {
12509
12934
  }
12510
12935
  }
12511
12936
 
12937
+ /**
12938
+ * When creating a symbol, unary operators that cannot be binary operators
12939
+ * automatically drop trailing `@` characters. This happens at the parser level,
12940
+ * such that `~@` is parsed as `~` and `!@` is parsed as `!`. We do that here.
12941
+ */
12942
+ static const uint8_t *
12943
+ parse_operator_symbol_name(const pm_token_t *name) {
12944
+ switch (name->type) {
12945
+ case PM_TOKEN_TILDE:
12946
+ case PM_TOKEN_BANG:
12947
+ if (name->end[-1] == '@') return name->end - 1;
12948
+ /* fallthrough */
12949
+ default:
12950
+ return name->end;
12951
+ }
12952
+ }
12953
+
12954
+ static pm_node_t *
12955
+ parse_operator_symbol(pm_parser_t *parser, const pm_token_t *opening, pm_lex_state_t next_state) {
12956
+ pm_token_t closing = not_provided(parser);
12957
+ pm_symbol_node_t *symbol = pm_symbol_node_create(parser, opening, &parser->current, &closing);
12958
+
12959
+ const uint8_t *end = parse_operator_symbol_name(&parser->current);
12960
+
12961
+ if (next_state != PM_LEX_STATE_NONE) lex_state_set(parser, next_state);
12962
+ parser_lex(parser);
12963
+
12964
+ pm_string_shared_init(&symbol->unescaped, parser->previous.start, end);
12965
+ pm_node_flag_set((pm_node_t *) symbol, PM_SYMBOL_FLAGS_FORCED_US_ASCII_ENCODING);
12966
+
12967
+ return (pm_node_t *) symbol;
12968
+ }
12969
+
12970
+ /**
12971
+ * Parse a symbol node. This function will get called immediately after finding
12972
+ * a symbol opening token. This handles parsing bare symbols and interpolated
12973
+ * symbols.
12974
+ */
12512
12975
  static pm_node_t *
12513
12976
  parse_symbol(pm_parser_t *parser, pm_lex_mode_t *lex_mode, pm_lex_state_t next_state) {
12514
- pm_token_t opening = parser->previous;
12977
+ const pm_token_t opening = parser->previous;
12515
12978
 
12516
12979
  if (lex_mode->mode != PM_LEX_STRING) {
12517
12980
  if (next_state != PM_LEX_STATE_NONE) lex_state_set(parser, next_state);
12518
12981
 
12519
12982
  switch (parser->current.type) {
12983
+ case PM_CASE_OPERATOR:
12984
+ return parse_operator_symbol(parser, &opening, next_state == PM_LEX_STATE_NONE ? PM_LEX_STATE_ENDFN : next_state);
12520
12985
  case PM_TOKEN_IDENTIFIER:
12521
12986
  case PM_TOKEN_CONSTANT:
12522
12987
  case PM_TOKEN_INSTANCE_VARIABLE:
@@ -12528,10 +12993,6 @@ parse_symbol(pm_parser_t *parser, pm_lex_mode_t *lex_mode, pm_lex_state_t next_s
12528
12993
  case PM_CASE_KEYWORD:
12529
12994
  parser_lex(parser);
12530
12995
  break;
12531
- case PM_CASE_OPERATOR:
12532
- lex_state_set(parser, next_state == PM_LEX_STATE_NONE ? PM_LEX_STATE_ENDFN : next_state);
12533
- parser_lex(parser);
12534
- break;
12535
12996
  default:
12536
12997
  expect2(parser, PM_TOKEN_IDENTIFIER, PM_TOKEN_METHOD_NAME, PM_ERR_SYMBOL_INVALID);
12537
12998
  break;
@@ -12541,6 +13002,8 @@ parse_symbol(pm_parser_t *parser, pm_lex_mode_t *lex_mode, pm_lex_state_t next_s
12541
13002
  pm_symbol_node_t *symbol = pm_symbol_node_create(parser, &opening, &parser->previous, &closing);
12542
13003
 
12543
13004
  pm_string_shared_init(&symbol->unescaped, parser->previous.start, parser->previous.end);
13005
+ pm_node_flag_set((pm_node_t *) symbol, parse_symbol_encoding(parser, &symbol->unescaped));
13006
+
12544
13007
  return (pm_node_t *) symbol;
12545
13008
  }
12546
13009
 
@@ -12637,7 +13100,8 @@ parse_symbol(pm_parser_t *parser, pm_lex_mode_t *lex_mode, pm_lex_state_t next_s
12637
13100
  } else {
12638
13101
  expect1(parser, PM_TOKEN_STRING_END, PM_ERR_SYMBOL_TERM_DYNAMIC);
12639
13102
  }
12640
- return (pm_node_t *) pm_symbol_node_create_unescaped(parser, &opening, &content, &parser->previous, &unescaped);
13103
+
13104
+ return (pm_node_t *) pm_symbol_node_create_unescaped(parser, &opening, &content, &parser->previous, &unescaped, parse_symbol_encoding(parser, &unescaped));
12641
13105
  }
12642
13106
 
12643
13107
  /**
@@ -12647,8 +13111,11 @@ parse_symbol(pm_parser_t *parser, pm_lex_mode_t *lex_mode, pm_lex_state_t next_s
12647
13111
  static inline pm_node_t *
12648
13112
  parse_undef_argument(pm_parser_t *parser) {
12649
13113
  switch (parser->current.type) {
13114
+ case PM_CASE_OPERATOR: {
13115
+ const pm_token_t opening = not_provided(parser);
13116
+ return parse_operator_symbol(parser, &opening, PM_LEX_STATE_NONE);
13117
+ }
12650
13118
  case PM_CASE_KEYWORD:
12651
- case PM_CASE_OPERATOR:
12652
13119
  case PM_TOKEN_CONSTANT:
12653
13120
  case PM_TOKEN_IDENTIFIER:
12654
13121
  case PM_TOKEN_METHOD_NAME: {
@@ -12659,6 +13126,8 @@ parse_undef_argument(pm_parser_t *parser) {
12659
13126
  pm_symbol_node_t *symbol = pm_symbol_node_create(parser, &opening, &parser->previous, &closing);
12660
13127
 
12661
13128
  pm_string_shared_init(&symbol->unescaped, parser->previous.start, parser->previous.end);
13129
+ pm_node_flag_set((pm_node_t *) symbol, parse_symbol_encoding(parser, &symbol->unescaped));
13130
+
12662
13131
  return (pm_node_t *) symbol;
12663
13132
  }
12664
13133
  case PM_TOKEN_SYMBOL_BEGIN: {
@@ -12682,21 +13151,24 @@ parse_undef_argument(pm_parser_t *parser) {
12682
13151
  static inline pm_node_t *
12683
13152
  parse_alias_argument(pm_parser_t *parser, bool first) {
12684
13153
  switch (parser->current.type) {
12685
- case PM_CASE_OPERATOR:
13154
+ case PM_CASE_OPERATOR: {
13155
+ const pm_token_t opening = not_provided(parser);
13156
+ return parse_operator_symbol(parser, &opening, first ? PM_LEX_STATE_FNAME | PM_LEX_STATE_FITEM : PM_LEX_STATE_NONE);
13157
+ }
12686
13158
  case PM_CASE_KEYWORD:
12687
13159
  case PM_TOKEN_CONSTANT:
12688
13160
  case PM_TOKEN_IDENTIFIER:
12689
13161
  case PM_TOKEN_METHOD_NAME: {
12690
- if (first) {
12691
- lex_state_set(parser, PM_LEX_STATE_FNAME | PM_LEX_STATE_FITEM);
12692
- }
12693
-
13162
+ if (first) lex_state_set(parser, PM_LEX_STATE_FNAME | PM_LEX_STATE_FITEM);
12694
13163
  parser_lex(parser);
13164
+
12695
13165
  pm_token_t opening = not_provided(parser);
12696
13166
  pm_token_t closing = not_provided(parser);
12697
13167
  pm_symbol_node_t *symbol = pm_symbol_node_create(parser, &opening, &parser->previous, &closing);
12698
13168
 
12699
13169
  pm_string_shared_init(&symbol->unescaped, parser->previous.start, parser->previous.end);
13170
+ pm_node_flag_set((pm_node_t *) symbol, parse_symbol_encoding(parser, &symbol->unescaped));
13171
+
12700
13172
  return (pm_node_t *) symbol;
12701
13173
  }
12702
13174
  case PM_TOKEN_SYMBOL_BEGIN: {
@@ -12733,6 +13205,64 @@ outer_scope_using_numbered_parameters_p(pm_parser_t *parser) {
12733
13205
  return false;
12734
13206
  }
12735
13207
 
13208
+ /**
13209
+ * These are the names of the various numbered parameters. We have them here so
13210
+ * that when we insert them into the constant pool we can use a constant string
13211
+ * and not have to allocate.
13212
+ */
13213
+ static const char * const pm_numbered_parameter_names[] = {
13214
+ "_1", "_2", "_3", "_4", "_5", "_6", "_7", "_8", "_9"
13215
+ };
13216
+
13217
+ /**
13218
+ * Parse an identifier into either a local variable read. If the local variable
13219
+ * is not found, it returns NULL instead.
13220
+ */
13221
+ static pm_local_variable_read_node_t *
13222
+ parse_variable(pm_parser_t *parser) {
13223
+ int depth;
13224
+ if ((depth = pm_parser_local_depth(parser, &parser->previous)) != -1) {
13225
+ return pm_local_variable_read_node_create(parser, &parser->previous, (uint32_t) depth);
13226
+ }
13227
+
13228
+ if (!parser->current_scope->closed && pm_token_is_numbered_parameter(parser->previous.start, parser->previous.end)) {
13229
+ // Now that we know we have a numbered parameter, we need to check
13230
+ // if it's allowed in this context. If it is, then we will create a
13231
+ // local variable read. If it's not, then we'll create a normal call
13232
+ // node but add an error.
13233
+ if (parser->current_scope->explicit_params) {
13234
+ pm_parser_err_previous(parser, PM_ERR_NUMBERED_PARAMETER_NOT_ALLOWED);
13235
+ } else if (outer_scope_using_numbered_parameters_p(parser)) {
13236
+ pm_parser_err_previous(parser, PM_ERR_NUMBERED_PARAMETER_OUTER_SCOPE);
13237
+ } else {
13238
+ // Indicate that this scope is using numbered params so that child
13239
+ // scopes cannot. We subtract the value for the character '0' to get
13240
+ // the actual integer value of the number (only _1 through _9 are
13241
+ // valid).
13242
+ uint8_t numbered_parameters = (uint8_t) (parser->previous.start[1] - '0');
13243
+ if (numbered_parameters > parser->current_scope->numbered_parameters) {
13244
+ parser->current_scope->numbered_parameters = numbered_parameters;
13245
+ pm_parser_numbered_parameters_set(parser, numbered_parameters);
13246
+ }
13247
+
13248
+ // When you use a numbered parameter, it implies the existence
13249
+ // of all of the locals that exist before it. For example,
13250
+ // referencing _2 means that _1 must exist. Therefore here we
13251
+ // loop through all of the possibilities and add them into the
13252
+ // constant pool.
13253
+ for (uint8_t numbered_parameter = 1; numbered_parameter <= numbered_parameters - 1; numbered_parameter++) {
13254
+ pm_parser_local_add_constant(parser, pm_numbered_parameter_names[numbered_parameter - 1], 2);
13255
+ }
13256
+
13257
+ // Finally we can create the local variable read node.
13258
+ pm_constant_id_t name_id = pm_parser_local_add_constant(parser, pm_numbered_parameter_names[numbered_parameters - 1], 2);
13259
+ return pm_local_variable_read_node_create_constant_id(parser, &parser->previous, name_id, 0);
13260
+ }
13261
+ }
13262
+
13263
+ return NULL;
13264
+ }
13265
+
12736
13266
  /**
12737
13267
  * Parse an identifier into either a local variable read or a call.
12738
13268
  */
@@ -12741,56 +13271,8 @@ parse_variable_call(pm_parser_t *parser) {
12741
13271
  pm_node_flags_t flags = 0;
12742
13272
 
12743
13273
  if (!match1(parser, PM_TOKEN_PARENTHESIS_LEFT) && (parser->previous.end[-1] != '!') && (parser->previous.end[-1] != '?')) {
12744
- int depth;
12745
- if ((depth = pm_parser_local_depth(parser, &parser->previous)) != -1) {
12746
- return (pm_node_t *) pm_local_variable_read_node_create(parser, &parser->previous, (uint32_t) depth);
12747
- }
12748
-
12749
- if (!parser->current_scope->closed && pm_token_is_numbered_parameter(parser->previous.start, parser->previous.end)) {
12750
- // Now that we know we have a numbered parameter, we need to check
12751
- // if it's allowed in this context. If it is, then we will create a
12752
- // local variable read. If it's not, then we'll create a normal call
12753
- // node but add an error.
12754
- if (parser->current_scope->explicit_params) {
12755
- pm_parser_err_previous(parser, PM_ERR_NUMBERED_PARAMETER_NOT_ALLOWED);
12756
- } else if (outer_scope_using_numbered_parameters_p(parser)) {
12757
- pm_parser_err_previous(parser, PM_ERR_NUMBERED_PARAMETER_OUTER_SCOPE);
12758
- } else {
12759
- // Indicate that this scope is using numbered params so that child
12760
- // scopes cannot.
12761
- uint8_t number = parser->previous.start[1];
12762
-
12763
- // We subtract the value for the character '0' to get the actual
12764
- // integer value of the number (only _1 through _9 are valid)
12765
- uint8_t numbered_parameters = (uint8_t) (number - '0');
12766
- if (numbered_parameters > parser->current_scope->numbered_parameters) {
12767
- parser->current_scope->numbered_parameters = numbered_parameters;
12768
- pm_parser_numbered_parameters_set(parser, numbered_parameters);
12769
- }
12770
-
12771
- // When you use a numbered parameter, it implies the existence
12772
- // of all of the locals that exist before it. For example,
12773
- // referencing _2 means that _1 must exist. Therefore here we
12774
- // loop through all of the possibilities and add them into the
12775
- // constant pool.
12776
- uint8_t current = '1';
12777
- uint8_t *value;
12778
-
12779
- while (current < number) {
12780
- value = malloc(2);
12781
- value[0] = '_';
12782
- value[1] = current++;
12783
- pm_parser_local_add_owned(parser, value, 2);
12784
- }
12785
-
12786
- // Now we can add the actual token that is being used. For
12787
- // this one we can add a shared version since it is directly
12788
- // referenced in the source.
12789
- pm_parser_local_add_token(parser, &parser->previous);
12790
- return (pm_node_t *) pm_local_variable_read_node_create(parser, &parser->previous, 0);
12791
- }
12792
- }
12793
-
13274
+ pm_local_variable_read_node_t *node = parse_variable(parser);
13275
+ if (node != NULL) return (pm_node_t *) node;
12794
13276
  flags |= PM_CALL_NODE_FLAGS_VARIABLE_CALL;
12795
13277
  }
12796
13278
 
@@ -13076,43 +13558,77 @@ parse_pattern_keyword_rest(pm_parser_t *parser) {
13076
13558
  return (pm_node_t *) pm_assoc_splat_node_create(parser, value, &operator);
13077
13559
  }
13078
13560
 
13561
+ /**
13562
+ * Create an implicit node for the value of a hash pattern that has omitted the
13563
+ * value. This will use an implicit local variable target.
13564
+ */
13565
+ static pm_node_t *
13566
+ parse_pattern_hash_implicit_value(pm_parser_t *parser, pm_symbol_node_t *key) {
13567
+ const pm_location_t *value_loc = &((pm_symbol_node_t *) key)->value_loc;
13568
+ pm_constant_id_t name = pm_parser_constant_id_location(parser, value_loc->start, value_loc->end);
13569
+
13570
+ int current_depth = pm_parser_local_depth_constant_id(parser, name);
13571
+ uint32_t depth;
13572
+
13573
+ if (current_depth == -1) {
13574
+ pm_parser_local_add_location(parser, value_loc->start, value_loc->end);
13575
+ depth = 0;
13576
+ } else {
13577
+ depth = (uint32_t) current_depth;
13578
+ }
13579
+
13580
+ pm_local_variable_target_node_t *target = pm_local_variable_target_node_create_values(parser, value_loc, name, depth);
13581
+ return (pm_node_t *) pm_implicit_node_create(parser, (pm_node_t *) target);
13582
+ }
13583
+
13079
13584
  /**
13080
13585
  * Parse a hash pattern.
13081
13586
  */
13082
13587
  static pm_hash_pattern_node_t *
13083
- parse_pattern_hash(pm_parser_t *parser, pm_node_t *first_assoc) {
13588
+ parse_pattern_hash(pm_parser_t *parser, pm_node_t *first_node) {
13084
13589
  pm_node_list_t assocs = { 0 };
13085
13590
  pm_node_t *rest = NULL;
13086
13591
 
13087
- switch (PM_NODE_TYPE(first_assoc)) {
13088
- case PM_ASSOC_NODE: {
13089
- if (!match7(parser, PM_TOKEN_COMMA, PM_TOKEN_KEYWORD_THEN, PM_TOKEN_BRACE_RIGHT, PM_TOKEN_BRACKET_RIGHT, PM_TOKEN_PARENTHESIS_RIGHT, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON)) {
13090
- // Here we have a value for the first assoc in the list, so we will
13091
- // parse it now and update the first assoc.
13092
- pm_node_t *value = parse_pattern(parser, false, PM_ERR_PATTERN_EXPRESSION_AFTER_KEY);
13592
+ switch (PM_NODE_TYPE(first_node)) {
13593
+ case PM_ASSOC_SPLAT_NODE:
13594
+ case PM_NO_KEYWORDS_PARAMETER_NODE:
13595
+ rest = first_node;
13596
+ break;
13597
+ case PM_SYMBOL_NODE: {
13598
+ if (pm_symbol_node_label_p(first_node)) {
13599
+ pm_node_t *value;
13600
+
13601
+ if (!match7(parser, PM_TOKEN_COMMA, PM_TOKEN_KEYWORD_THEN, PM_TOKEN_BRACE_RIGHT, PM_TOKEN_BRACKET_RIGHT, PM_TOKEN_PARENTHESIS_RIGHT, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON)) {
13602
+ // Here we have a value for the first assoc in the list, so
13603
+ // we will parse it now.
13604
+ value = parse_pattern(parser, false, PM_ERR_PATTERN_EXPRESSION_AFTER_KEY);
13605
+ } else {
13606
+ // Otherwise, we will create an implicit local variable
13607
+ // target for the value.
13608
+ value = parse_pattern_hash_implicit_value(parser, (pm_symbol_node_t *) first_node);
13609
+ }
13093
13610
 
13094
- pm_assoc_node_t *assoc = (pm_assoc_node_t *) first_assoc;
13095
- assoc->base.location.end = value->location.end;
13096
- assoc->value = value;
13097
- } else {
13098
- pm_node_t *key = ((pm_assoc_node_t *) first_assoc)->key;
13611
+ pm_token_t operator = not_provided(parser);
13612
+ pm_node_t *assoc = (pm_node_t *) pm_assoc_node_create(parser, first_node, &operator, value);
13099
13613
 
13100
- if (PM_NODE_TYPE_P(key, PM_SYMBOL_NODE)) {
13101
- const pm_location_t *value_loc = &((pm_symbol_node_t *) key)->value_loc;
13102
- pm_parser_local_add_location(parser, value_loc->start, value_loc->end);
13103
- }
13614
+ pm_node_list_append(&assocs, assoc);
13615
+ break;
13104
13616
  }
13617
+ }
13618
+ /* fallthrough */
13619
+ default: {
13620
+ // If we get anything else, then this is an error. For this we'll
13621
+ // create a missing node for the value and create an assoc node for
13622
+ // the first node in the list.
13623
+ pm_parser_err_node(parser, first_node, PM_ERR_PATTERN_HASH_KEY_LABEL);
13624
+
13625
+ pm_token_t operator = not_provided(parser);
13626
+ pm_node_t *value = (pm_node_t *) pm_missing_node_create(parser, first_node->location.start, first_node->location.end);
13627
+ pm_node_t *assoc = (pm_node_t *) pm_assoc_node_create(parser, first_node, &operator, value);
13105
13628
 
13106
- pm_node_list_append(&assocs, first_assoc);
13629
+ pm_node_list_append(&assocs, assoc);
13107
13630
  break;
13108
13631
  }
13109
- case PM_ASSOC_SPLAT_NODE:
13110
- case PM_NO_KEYWORDS_PARAMETER_NODE:
13111
- rest = first_assoc;
13112
- break;
13113
- default:
13114
- assert(false);
13115
- break;
13116
13632
  }
13117
13633
 
13118
13634
  // If there are any other assocs, then we'll parse them now.
@@ -13141,6 +13657,7 @@ parse_pattern_hash(pm_parser_t *parser, pm_node_t *first_assoc) {
13141
13657
  } else {
13142
13658
  const pm_location_t *value_loc = &((pm_symbol_node_t *) key)->value_loc;
13143
13659
  pm_parser_local_add_location(parser, value_loc->start, value_loc->end);
13660
+ value = parse_pattern_hash_implicit_value(parser, (pm_symbol_node_t *) key);
13144
13661
  }
13145
13662
 
13146
13663
  pm_token_t operator = not_provided(parser);
@@ -13246,45 +13763,29 @@ parse_pattern_primitive(pm_parser_t *parser, pm_diagnostic_id_t diag_id) {
13246
13763
  // pattern node.
13247
13764
  node = pm_hash_pattern_node_empty_create(parser, &opening, &parser->previous);
13248
13765
  } else {
13249
- pm_node_t *first_assoc;
13766
+ pm_node_t *first_node;
13250
13767
 
13251
13768
  switch (parser->current.type) {
13252
- case PM_TOKEN_LABEL: {
13769
+ case PM_TOKEN_LABEL:
13253
13770
  parser_lex(parser);
13254
-
13255
- pm_symbol_node_t *key = pm_symbol_node_label_create(parser, &parser->previous);
13256
- pm_token_t operator = not_provided(parser);
13257
-
13258
- first_assoc = (pm_node_t *) pm_assoc_node_create(parser, (pm_node_t *) key, &operator, NULL);
13771
+ first_node = (pm_node_t *) pm_symbol_node_label_create(parser, &parser->previous);
13259
13772
  break;
13260
- }
13261
13773
  case PM_TOKEN_USTAR_STAR:
13262
- first_assoc = parse_pattern_keyword_rest(parser);
13774
+ first_node = parse_pattern_keyword_rest(parser);
13263
13775
  break;
13264
- case PM_TOKEN_STRING_BEGIN: {
13265
- pm_node_t *key = parse_expression(parser, PM_BINDING_POWER_MAX, false, PM_ERR_PATTERN_HASH_KEY);
13266
- pm_token_t operator = not_provided(parser);
13267
-
13268
- if (!pm_symbol_node_label_p(key)) {
13269
- pm_parser_err_node(parser, key, PM_ERR_PATTERN_HASH_KEY_LABEL);
13270
- }
13271
-
13272
- first_assoc = (pm_node_t *) pm_assoc_node_create(parser, key, &operator, NULL);
13776
+ case PM_TOKEN_STRING_BEGIN:
13777
+ first_node = parse_expression(parser, PM_BINDING_POWER_MAX, false, PM_ERR_PATTERN_HASH_KEY);
13273
13778
  break;
13274
- }
13275
13779
  default: {
13276
13780
  parser_lex(parser);
13277
13781
  pm_parser_err_previous(parser, PM_ERR_PATTERN_HASH_KEY);
13278
13782
 
13279
- pm_missing_node_t *key = pm_missing_node_create(parser, parser->previous.start, parser->previous.end);
13280
- pm_token_t operator = not_provided(parser);
13281
-
13282
- first_assoc = (pm_node_t *) pm_assoc_node_create(parser, (pm_node_t *) key, &operator, NULL);
13783
+ first_node = (pm_node_t *) pm_missing_node_create(parser, parser->previous.start, parser->previous.end);
13283
13784
  break;
13284
13785
  }
13285
13786
  }
13286
13787
 
13287
- node = parse_pattern_hash(parser, first_assoc);
13788
+ node = parse_pattern_hash(parser, first_node);
13288
13789
 
13289
13790
  accept1(parser, PM_TOKEN_NEWLINE);
13290
13791
  expect1(parser, PM_TOKEN_BRACE_RIGHT, PM_ERR_PATTERN_TERM_BRACE);
@@ -13350,7 +13851,16 @@ parse_pattern_primitive(pm_parser_t *parser, pm_diagnostic_id_t diag_id) {
13350
13851
  switch (parser->current.type) {
13351
13852
  case PM_TOKEN_IDENTIFIER: {
13352
13853
  parser_lex(parser);
13353
- pm_node_t *variable = (pm_node_t *) pm_local_variable_read_node_create(parser, &parser->previous, 0);
13854
+ pm_node_t *variable = (pm_node_t *) parse_variable(parser);
13855
+ if (variable == NULL) {
13856
+ if (parser->version != PM_OPTIONS_VERSION_CRUBY_3_3_0 && pm_token_is_it(parser->previous.start, parser->previous.end)) {
13857
+ pm_constant_id_t name_id = pm_parser_constant_id_constant(parser, "0it", 3);
13858
+ variable = (pm_node_t *) pm_local_variable_read_node_create_constant_id(parser, &parser->previous, name_id, 0);
13859
+ } else {
13860
+ PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->previous, PM_ERR_NO_LOCAL_VARIABLE, (int) (parser->previous.end - parser->previous.start), parser->previous.start);
13861
+ variable = (pm_node_t *) pm_local_variable_read_node_create(parser, &parser->previous, 0);
13862
+ }
13863
+ }
13354
13864
 
13355
13865
  return (pm_node_t *) pm_pinned_variable_node_create(parser, &operator, variable);
13356
13866
  }
@@ -13519,9 +14029,7 @@ parse_pattern(pm_parser_t *parser, bool top_pattern, pm_diagnostic_id_t diag_id)
13519
14029
  case PM_TOKEN_LABEL: {
13520
14030
  parser_lex(parser);
13521
14031
  pm_node_t *key = (pm_node_t *) pm_symbol_node_label_create(parser, &parser->previous);
13522
- pm_token_t operator = not_provided(parser);
13523
-
13524
- return (pm_node_t *) parse_pattern_hash(parser, (pm_node_t *) pm_assoc_node_create(parser, key, &operator, NULL));
14032
+ return (pm_node_t *) parse_pattern_hash(parser, key);
13525
14033
  }
13526
14034
  case PM_TOKEN_USTAR_STAR: {
13527
14035
  node = parse_pattern_keyword_rest(parser);
@@ -13544,8 +14052,7 @@ parse_pattern(pm_parser_t *parser, bool top_pattern, pm_diagnostic_id_t diag_id)
13544
14052
  // If we got a dynamic label symbol, then we need to treat it like the
13545
14053
  // beginning of a hash pattern.
13546
14054
  if (pm_symbol_node_label_p(node)) {
13547
- pm_token_t operator = not_provided(parser);
13548
- return (pm_node_t *) parse_pattern_hash(parser, (pm_node_t *) pm_assoc_node_create(parser, node, &operator, NULL));
14055
+ return (pm_node_t *) parse_pattern_hash(parser, node);
13549
14056
  }
13550
14057
 
13551
14058
  if (top_pattern && match1(parser, PM_TOKEN_COMMA)) {
@@ -13558,7 +14065,7 @@ parse_pattern(pm_parser_t *parser, bool top_pattern, pm_diagnostic_id_t diag_id)
13558
14065
  // Gather up all of the patterns into the list.
13559
14066
  while (accept1(parser, PM_TOKEN_COMMA)) {
13560
14067
  // Break early here in case we have a trailing comma.
13561
- if (match5(parser, PM_TOKEN_KEYWORD_THEN, PM_TOKEN_BRACE_RIGHT, PM_TOKEN_BRACKET_RIGHT, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON)) {
14068
+ if (match6(parser, PM_TOKEN_KEYWORD_THEN, PM_TOKEN_BRACE_RIGHT, PM_TOKEN_BRACKET_RIGHT, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON, PM_TOKEN_EOF)) {
13562
14069
  node = (pm_node_t *) pm_implicit_rest_node_create(parser, &parser->previous);
13563
14070
  pm_node_list_append(&nodes, node);
13564
14071
  break;
@@ -13644,7 +14151,7 @@ parse_strings(pm_parser_t *parser, pm_node_t *current) {
13644
14151
  assert(parser->current.type == PM_TOKEN_STRING_BEGIN);
13645
14152
 
13646
14153
  bool concating = false;
13647
- bool state_is_arg_labeled = lex_state_p(parser, PM_LEX_STATE_ARG | PM_LEX_STATE_LABELED);
14154
+ bool state_is_arg_labeled = lex_state_arg_labeled_p(parser);
13648
14155
 
13649
14156
  while (match1(parser, PM_TOKEN_STRING_BEGIN)) {
13650
14157
  pm_node_t *node = NULL;
@@ -13719,7 +14226,7 @@ parse_strings(pm_parser_t *parser, pm_node_t *current) {
13719
14226
  expect1(parser, PM_TOKEN_STRING_END, PM_ERR_STRING_LITERAL_TERM);
13720
14227
  node = (pm_node_t *) pm_interpolated_string_node_create(parser, &opening, &parts, &parser->previous);
13721
14228
  } else if (accept1(parser, PM_TOKEN_LABEL_END) && !state_is_arg_labeled) {
13722
- node = (pm_node_t *) pm_symbol_node_create_unescaped(parser, &opening, &content, &parser->previous, &unescaped);
14229
+ node = (pm_node_t *) pm_symbol_node_create_unescaped(parser, &opening, &content, &parser->previous, &unescaped, parse_symbol_encoding(parser, &unescaped));
13723
14230
  } else if (match1(parser, PM_TOKEN_EOF)) {
13724
14231
  pm_parser_err_token(parser, &opening, PM_ERR_STRING_LITERAL_TERM);
13725
14232
  node = (pm_node_t *) pm_string_node_create_unescaped(parser, &opening, &content, &parser->current, &unescaped);
@@ -13741,7 +14248,7 @@ parse_strings(pm_parser_t *parser, pm_node_t *current) {
13741
14248
  pm_node_flag_set(node, parse_unescaped_encoding(parser));
13742
14249
  expect1(parser, PM_TOKEN_STRING_END, PM_ERR_STRING_LITERAL_TERM);
13743
14250
  } else if (accept1(parser, PM_TOKEN_LABEL_END)) {
13744
- node = (pm_node_t *) pm_symbol_node_create_unescaped(parser, &opening, &content, &parser->previous, &unescaped);
14251
+ node = (pm_node_t *) pm_symbol_node_create_unescaped(parser, &opening, &content, &parser->previous, &unescaped, parse_symbol_encoding(parser, &unescaped));
13745
14252
  } else {
13746
14253
  // If we get here, then we have interpolation so we'll need
13747
14254
  // to create a string or symbol node with interpolation.
@@ -13834,7 +14341,7 @@ parse_strings(pm_parser_t *parser, pm_node_t *current) {
13834
14341
  * Parse an expression that begins with the previous node that we just lexed.
13835
14342
  */
13836
14343
  static inline pm_node_t *
13837
- parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, bool accepts_command_call) {
14344
+ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, bool accepts_command_call, pm_diagnostic_id_t diag_id) {
13838
14345
  switch (parser->current.type) {
13839
14346
  case PM_TOKEN_BRACKET_LEFT_ARRAY: {
13840
14347
  parser_lex(parser);
@@ -13866,9 +14373,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
13866
14373
  pm_node_t *expression = NULL;
13867
14374
 
13868
14375
  if (match3(parser, PM_TOKEN_BRACKET_RIGHT, PM_TOKEN_COMMA, PM_TOKEN_EOF)) {
13869
- if (pm_parser_local_depth(parser, &parser->previous) == -1) {
13870
- pm_parser_err_token(parser, &operator, PM_ERR_ARGUMENT_NO_FORWARDING_STAR);
13871
- }
14376
+ pm_parser_scope_forwarding_positionals_check(parser, &operator);
13872
14377
  } else {
13873
14378
  expression = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, false, PM_ERR_ARRAY_EXPRESSION_AFTER_STAR);
13874
14379
  }
@@ -14113,7 +14618,8 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
14113
14618
  if (
14114
14619
  match1(parser, PM_TOKEN_PARENTHESIS_LEFT) ||
14115
14620
  (accepts_command_call && (token_begins_expression_p(parser->current.type) || match3(parser, PM_TOKEN_UAMPERSAND, PM_TOKEN_USTAR, PM_TOKEN_USTAR_STAR))) ||
14116
- (pm_accepts_block_stack_p(parser) && match2(parser, PM_TOKEN_KEYWORD_DO, PM_TOKEN_BRACE_LEFT))
14621
+ (pm_accepts_block_stack_p(parser) && match1(parser, PM_TOKEN_KEYWORD_DO)) ||
14622
+ match1(parser, PM_TOKEN_BRACE_LEFT)
14117
14623
  ) {
14118
14624
  pm_arguments_t arguments = { 0 };
14119
14625
  parse_arguments_list(parser, &arguments, true, accepts_command_call);
@@ -14237,7 +14743,8 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
14237
14743
  // a block, so we need to check for that here.
14238
14744
  if (
14239
14745
  (accepts_command_call && (token_begins_expression_p(parser->current.type) || match3(parser, PM_TOKEN_UAMPERSAND, PM_TOKEN_USTAR, PM_TOKEN_USTAR_STAR))) ||
14240
- (pm_accepts_block_stack_p(parser) && match2(parser, PM_TOKEN_KEYWORD_DO, PM_TOKEN_BRACE_LEFT))
14746
+ (pm_accepts_block_stack_p(parser) && match1(parser, PM_TOKEN_KEYWORD_DO)) ||
14747
+ match1(parser, PM_TOKEN_BRACE_LEFT)
14241
14748
  ) {
14242
14749
  pm_arguments_t arguments = { 0 };
14243
14750
  parse_arguments_list(parser, &arguments, true, accepts_command_call);
@@ -14250,6 +14757,31 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
14250
14757
 
14251
14758
  if ((binding_power == PM_BINDING_POWER_STATEMENT) && match1(parser, PM_TOKEN_COMMA)) {
14252
14759
  node = parse_targets_validate(parser, node, PM_BINDING_POWER_INDEX);
14760
+ } else {
14761
+ // Check if `it` is not going to be assigned.
14762
+ switch (parser->current.type) {
14763
+ case PM_TOKEN_AMPERSAND_AMPERSAND_EQUAL:
14764
+ case PM_TOKEN_AMPERSAND_EQUAL:
14765
+ case PM_TOKEN_CARET_EQUAL:
14766
+ case PM_TOKEN_EQUAL:
14767
+ case PM_TOKEN_GREATER_GREATER_EQUAL:
14768
+ case PM_TOKEN_LESS_LESS_EQUAL:
14769
+ case PM_TOKEN_MINUS_EQUAL:
14770
+ case PM_TOKEN_PARENTHESIS_RIGHT:
14771
+ case PM_TOKEN_PERCENT_EQUAL:
14772
+ case PM_TOKEN_PIPE_EQUAL:
14773
+ case PM_TOKEN_PIPE_PIPE_EQUAL:
14774
+ case PM_TOKEN_PLUS_EQUAL:
14775
+ case PM_TOKEN_SLASH_EQUAL:
14776
+ case PM_TOKEN_STAR_EQUAL:
14777
+ case PM_TOKEN_STAR_STAR_EQUAL:
14778
+ break;
14779
+ default:
14780
+ // Once we know it's neither a method call nor an
14781
+ // assignment, we can finally create `it` default
14782
+ // parameter.
14783
+ node = pm_node_check_it(parser, node);
14784
+ }
14253
14785
  }
14254
14786
 
14255
14787
  return node;
@@ -14286,6 +14818,9 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
14286
14818
  // If we get here, then we tried to find something in the
14287
14819
  // heredoc but couldn't actually parse anything, so we'll just
14288
14820
  // return a missing node.
14821
+ //
14822
+ // parse_string_part handles its own errors, so there is no need
14823
+ // for us to add one here.
14289
14824
  node = (pm_node_t *) pm_missing_node_create(parser, parser->previous.start, parser->previous.end);
14290
14825
  } else if (PM_NODE_TYPE_P(part, PM_STRING_NODE) && match2(parser, PM_TOKEN_HEREDOC_END, PM_TOKEN_EOF)) {
14291
14826
  // If we get here, then the part that we parsed was plain string
@@ -14549,11 +15084,11 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
14549
15084
  // for guard clauses in the form of `if` or `unless` statements.
14550
15085
  if (accept1(parser, PM_TOKEN_KEYWORD_IF_MODIFIER)) {
14551
15086
  pm_token_t keyword = parser->previous;
14552
- pm_node_t *predicate = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, true, PM_ERR_CONDITIONAL_IF_PREDICATE);
15087
+ pm_node_t *predicate = parse_value_expression(parser, PM_BINDING_POWER_COMPOSITION, true, PM_ERR_CONDITIONAL_IF_PREDICATE);
14553
15088
  pattern = (pm_node_t *) pm_if_node_modifier_create(parser, pattern, &keyword, predicate);
14554
15089
  } else if (accept1(parser, PM_TOKEN_KEYWORD_UNLESS_MODIFIER)) {
14555
15090
  pm_token_t keyword = parser->previous;
14556
- pm_node_t *predicate = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, true, PM_ERR_CONDITIONAL_UNLESS_PREDICATE);
15091
+ pm_node_t *predicate = parse_value_expression(parser, PM_BINDING_POWER_COMPOSITION, true, PM_ERR_CONDITIONAL_UNLESS_PREDICATE);
14557
15092
  pattern = (pm_node_t *) pm_unless_node_modifier_create(parser, pattern, &keyword, predicate);
14558
15093
  }
14559
15094
 
@@ -14742,8 +15277,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
14742
15277
  pm_token_t operator = parser->previous;
14743
15278
  pm_node_t *expression = parse_value_expression(parser, PM_BINDING_POWER_NOT, true, PM_ERR_EXPECT_EXPRESSION_AFTER_LESS_LESS);
14744
15279
 
14745
- pm_constant_id_t old_param_name = parser->current_param_name;
14746
- parser->current_param_name = 0;
15280
+ pm_constant_id_t saved_param_name = pm_parser_current_param_name_unset(parser);
14747
15281
  pm_parser_scope_push(parser, true);
14748
15282
  accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
14749
15283
 
@@ -14760,11 +15294,12 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
14760
15294
  }
14761
15295
 
14762
15296
  expect1(parser, PM_TOKEN_KEYWORD_END, PM_ERR_CLASS_TERM);
14763
-
14764
15297
  pm_constant_id_list_t locals = parser->current_scope->locals;
15298
+
14765
15299
  pm_parser_scope_pop(parser);
14766
- parser->current_param_name = old_param_name;
14767
15300
  pm_do_loop_stack_pop(parser);
15301
+ pm_parser_current_param_name_restore(parser, saved_param_name);
15302
+
14768
15303
  return (pm_node_t *) pm_singleton_class_node_create(parser, &locals, &class_keyword, &operator, expression, statements, &parser->previous);
14769
15304
  }
14770
15305
 
@@ -14790,9 +15325,9 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
14790
15325
  superclass = NULL;
14791
15326
  }
14792
15327
 
14793
- pm_constant_id_t old_param_name = parser->current_param_name;
14794
- parser->current_param_name = 0;
15328
+ pm_constant_id_t saved_param_name = pm_parser_current_param_name_unset(parser);
14795
15329
  pm_parser_scope_push(parser, true);
15330
+
14796
15331
  if (inheritance_operator.type != PM_TOKEN_NOT_PROVIDED) {
14797
15332
  expect2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON, PM_ERR_CLASS_UNEXPECTED_END);
14798
15333
  } else {
@@ -14818,9 +15353,10 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
14818
15353
  }
14819
15354
 
14820
15355
  pm_constant_id_list_t locals = parser->current_scope->locals;
15356
+
14821
15357
  pm_parser_scope_pop(parser);
14822
- parser->current_param_name = old_param_name;
14823
15358
  pm_do_loop_stack_pop(parser);
15359
+ pm_parser_current_param_name_restore(parser, saved_param_name);
14824
15360
 
14825
15361
  if (!PM_NODE_TYPE_P(constant_path, PM_CONSTANT_PATH_NODE) && !(PM_NODE_TYPE_P(constant_path, PM_CONSTANT_READ_NODE))) {
14826
15362
  pm_parser_err_node(parser, constant_path, PM_ERR_CLASS_NAME);
@@ -14835,18 +15371,21 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
14835
15371
  pm_token_t operator = not_provided(parser);
14836
15372
  pm_token_t name = (pm_token_t) { .type = PM_TOKEN_MISSING, .start = def_keyword.end, .end = def_keyword.end };
14837
15373
 
14838
- // This context is necessary for lexing `...` in a bare params correctly.
14839
- // It must be pushed before lexing the first param, so it is here.
15374
+ // This context is necessary for lexing `...` in a bare params
15375
+ // correctly. It must be pushed before lexing the first param, so it
15376
+ // is here.
14840
15377
  context_push(parser, PM_CONTEXT_DEF_PARAMS);
15378
+ pm_constant_id_t saved_param_name;
15379
+
14841
15380
  parser_lex(parser);
14842
- pm_constant_id_t old_param_name = parser->current_param_name;
14843
15381
 
14844
15382
  switch (parser->current.type) {
14845
15383
  case PM_CASE_OPERATOR:
15384
+ saved_param_name = pm_parser_current_param_name_unset(parser);
14846
15385
  pm_parser_scope_push(parser, true);
14847
- parser->current_param_name = 0;
14848
15386
  lex_state_set(parser, PM_LEX_STATE_ENDFN);
14849
15387
  parser_lex(parser);
15388
+
14850
15389
  name = parser->previous;
14851
15390
  break;
14852
15391
  case PM_TOKEN_IDENTIFIER: {
@@ -14854,18 +15393,20 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
14854
15393
 
14855
15394
  if (match2(parser, PM_TOKEN_DOT, PM_TOKEN_COLON_COLON)) {
14856
15395
  receiver = parse_variable_call(parser);
15396
+ receiver = pm_node_check_it(parser, receiver);
14857
15397
 
15398
+ saved_param_name = pm_parser_current_param_name_unset(parser);
14858
15399
  pm_parser_scope_push(parser, true);
14859
- parser->current_param_name = 0;
14860
15400
  lex_state_set(parser, PM_LEX_STATE_FNAME);
14861
15401
  parser_lex(parser);
14862
15402
 
14863
15403
  operator = parser->previous;
14864
15404
  name = parse_method_definition_name(parser);
14865
15405
  } else {
15406
+ saved_param_name = pm_parser_current_param_name_unset(parser);
14866
15407
  pm_refute_numbered_parameter(parser, parser->previous.start, parser->previous.end);
14867
15408
  pm_parser_scope_push(parser, true);
14868
- parser->current_param_name = 0;
15409
+
14869
15410
  name = parser->previous;
14870
15411
  }
14871
15412
 
@@ -14882,9 +15423,10 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
14882
15423
  case PM_TOKEN_KEYWORD___FILE__:
14883
15424
  case PM_TOKEN_KEYWORD___LINE__:
14884
15425
  case PM_TOKEN_KEYWORD___ENCODING__: {
15426
+ saved_param_name = pm_parser_current_param_name_unset(parser);
14885
15427
  pm_parser_scope_push(parser, true);
14886
- parser->current_param_name = 0;
14887
15428
  parser_lex(parser);
15429
+
14888
15430
  pm_token_t identifier = parser->previous;
14889
15431
 
14890
15432
  if (match2(parser, PM_TOKEN_DOT, PM_TOKEN_COLON_COLON)) {
@@ -14946,6 +15488,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
14946
15488
  pm_token_t lparen = parser->previous;
14947
15489
  pm_node_t *expression = parse_value_expression(parser, PM_BINDING_POWER_STATEMENT, true, PM_ERR_DEF_RECEIVER);
14948
15490
 
15491
+ accept1(parser, PM_TOKEN_NEWLINE);
14949
15492
  expect1(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_ERR_EXPECT_RPAREN);
14950
15493
  pm_token_t rparen = parser->previous;
14951
15494
 
@@ -14955,8 +15498,8 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
14955
15498
  operator = parser->previous;
14956
15499
  receiver = (pm_node_t *) pm_parentheses_node_create(parser, &lparen, expression, &rparen);
14957
15500
 
15501
+ saved_param_name = pm_parser_current_param_name_unset(parser);
14958
15502
  pm_parser_scope_push(parser, true);
14959
- parser->current_param_name = 0;
14960
15503
 
14961
15504
  // To push `PM_CONTEXT_DEF_PARAMS` again is for the same reason as described the above.
14962
15505
  context_push(parser, PM_CONTEXT_DEF_PARAMS);
@@ -14964,8 +15507,9 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
14964
15507
  break;
14965
15508
  }
14966
15509
  default:
15510
+ saved_param_name = pm_parser_current_param_name_unset(parser);
14967
15511
  pm_parser_scope_push(parser, true);
14968
- parser->current_param_name = 0;
15512
+
14969
15513
  name = parse_method_definition_name(parser);
14970
15514
  break;
14971
15515
  }
@@ -15018,8 +15562,6 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
15018
15562
  }
15019
15563
  }
15020
15564
 
15021
- uint32_t locals_body_index = (uint32_t) parser->current_scope->locals.size;
15022
-
15023
15565
  context_pop(parser);
15024
15566
  pm_node_t *statements = NULL;
15025
15567
  pm_token_t equal;
@@ -15080,8 +15622,16 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
15080
15622
  }
15081
15623
 
15082
15624
  pm_constant_id_list_t locals = parser->current_scope->locals;
15083
- parser->current_param_name = old_param_name;
15625
+
15084
15626
  pm_parser_scope_pop(parser);
15627
+ pm_parser_current_param_name_restore(parser, saved_param_name);
15628
+
15629
+ /**
15630
+ * If the final character is @. As is the case when defining
15631
+ * methods to override the unary operators, we should ignore
15632
+ * the @ in the same way we do for symbols.
15633
+ */
15634
+ name.end = parse_operator_symbol_name(&name);
15085
15635
 
15086
15636
  return (pm_node_t *) pm_def_node_create(
15087
15637
  parser,
@@ -15090,7 +15640,6 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
15090
15640
  params,
15091
15641
  statements,
15092
15642
  &locals,
15093
- locals_body_index,
15094
15643
  &def_keyword,
15095
15644
  &operator,
15096
15645
  &lparen,
@@ -15309,9 +15858,9 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
15309
15858
  pm_parser_err_token(parser, &name, PM_ERR_MODULE_NAME);
15310
15859
  }
15311
15860
 
15312
- pm_constant_id_t old_param_name = parser->current_param_name;
15313
- parser->current_param_name = 0;
15861
+ pm_constant_id_t saved_param_name = pm_parser_current_param_name_unset(parser);
15314
15862
  pm_parser_scope_push(parser, true);
15863
+
15315
15864
  accept2(parser, PM_TOKEN_SEMICOLON, PM_TOKEN_NEWLINE);
15316
15865
  pm_node_t *statements = NULL;
15317
15866
 
@@ -15328,7 +15877,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
15328
15877
 
15329
15878
  pm_constant_id_list_t locals = parser->current_scope->locals;
15330
15879
  pm_parser_scope_pop(parser);
15331
- parser->current_param_name = old_param_name;
15880
+ pm_parser_current_param_name_restore(parser, saved_param_name);
15332
15881
 
15333
15882
  expect1(parser, PM_TOKEN_KEYWORD_END, PM_ERR_MODULE_TERM);
15334
15883
 
@@ -15914,6 +16463,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
15914
16463
  // context of a multiple assignment. We enforce that here. We'll
15915
16464
  // still lex past it though and create a missing node place.
15916
16465
  if (binding_power != PM_BINDING_POWER_STATEMENT) {
16466
+ pm_parser_err_previous(parser, diag_id);
15917
16467
  return (pm_node_t *) pm_missing_node_create(parser, parser->previous.start, parser->previous.end);
15918
16468
  }
15919
16469
 
@@ -15995,7 +16545,9 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
15995
16545
  parser_lex(parser);
15996
16546
 
15997
16547
  pm_token_t operator = parser->previous;
16548
+ pm_constant_id_t saved_param_name = pm_parser_current_param_name_unset(parser);
15998
16549
  pm_parser_scope_push(parser, false);
16550
+
15999
16551
  pm_block_parameters_node_t *block_parameters;
16000
16552
 
16001
16553
  switch (parser->current.type) {
@@ -16030,12 +16582,6 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
16030
16582
  }
16031
16583
  }
16032
16584
 
16033
- uint32_t locals_body_index = 0;
16034
-
16035
- if (block_parameters) {
16036
- locals_body_index = (uint32_t) parser->current_scope->locals.size;
16037
- }
16038
-
16039
16585
  pm_token_t opening;
16040
16586
  pm_node_t *body = NULL;
16041
16587
  parser->lambda_enclosure_nesting = previous_lambda_enclosure_nesting;
@@ -16070,13 +16616,15 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
16070
16616
 
16071
16617
  if (parameters == NULL && (maximum > 0)) {
16072
16618
  parameters = (pm_node_t *) pm_numbered_parameters_node_create(parser, &(pm_location_t) { .start = operator.start, .end = parser->previous.end }, maximum);
16073
- locals_body_index = maximum;
16074
16619
  }
16075
16620
 
16076
16621
  pm_constant_id_list_t locals = parser->current_scope->locals;
16622
+
16077
16623
  pm_parser_scope_pop(parser);
16078
16624
  pm_accepts_block_stack_pop(parser);
16079
- return (pm_node_t *) pm_lambda_node_create(parser, &locals, locals_body_index, &operator, &opening, &parser->previous, parameters, body);
16625
+ pm_parser_current_param_name_restore(parser, saved_param_name);
16626
+
16627
+ return (pm_node_t *) pm_lambda_node_create(parser, &locals, &operator, &opening, &parser->previous, parameters, body);
16080
16628
  }
16081
16629
  case PM_TOKEN_UPLUS: {
16082
16630
  parser_lex(parser);
@@ -16095,12 +16643,34 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
16095
16643
 
16096
16644
  return parse_symbol(parser, &lex_mode, PM_LEX_STATE_END);
16097
16645
  }
16098
- default:
16099
- if (context_recoverable(parser, &parser->current)) {
16646
+ default: {
16647
+ pm_context_t recoverable = context_recoverable(parser, &parser->current);
16648
+
16649
+ if (recoverable != PM_CONTEXT_NONE) {
16100
16650
  parser->recovering = true;
16651
+
16652
+ // If the given error is not the generic one, then we'll add it
16653
+ // here because it will provide more context in addition to the
16654
+ // recoverable error that we will also add.
16655
+ if (diag_id != PM_ERR_CANNOT_PARSE_EXPRESSION) {
16656
+ pm_parser_err_previous(parser, diag_id);
16657
+ }
16658
+
16659
+ // If we get here, then we are assuming this token is closing a
16660
+ // parent context, so we'll indicate that to the user so that
16661
+ // they know how we behaved.
16662
+ PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_UNEXPECTED_TOKEN_CLOSE_CONTEXT, pm_token_type_human(parser->current.type), context_human(recoverable));
16663
+ } else if (diag_id == PM_ERR_CANNOT_PARSE_EXPRESSION) {
16664
+ // We're going to make a special case here, because "cannot
16665
+ // parse expression" is pretty generic, and we know here that we
16666
+ // have an unexpected token.
16667
+ PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_UNEXPECTED_TOKEN_IGNORE, pm_token_type_human(parser->current.type));
16668
+ } else {
16669
+ pm_parser_err_previous(parser, diag_id);
16101
16670
  }
16102
16671
 
16103
16672
  return (pm_node_t *) pm_missing_node_create(parser, parser->previous.start, parser->previous.end);
16673
+ }
16104
16674
  }
16105
16675
  }
16106
16676
 
@@ -16412,7 +16982,7 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
16412
16982
  }
16413
16983
 
16414
16984
  // If this node cannot be writable, then we have an error.
16415
- if (pm_call_node_writable_p(cast)) {
16985
+ if (pm_call_node_writable_p(parser, cast)) {
16416
16986
  parse_write_name(parser, &cast->name);
16417
16987
  } else {
16418
16988
  pm_parser_err_node(parser, node, PM_ERR_WRITE_TARGET_UNEXPECTED);
@@ -16523,7 +17093,7 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
16523
17093
  }
16524
17094
 
16525
17095
  // If this node cannot be writable, then we have an error.
16526
- if (pm_call_node_writable_p(cast)) {
17096
+ if (pm_call_node_writable_p(parser, cast)) {
16527
17097
  parse_write_name(parser, &cast->name);
16528
17098
  } else {
16529
17099
  pm_parser_err_node(parser, node, PM_ERR_WRITE_TARGET_UNEXPECTED);
@@ -16644,7 +17214,7 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
16644
17214
  }
16645
17215
 
16646
17216
  // If this node cannot be writable, then we have an error.
16647
- if (pm_call_node_writable_p(cast)) {
17217
+ if (pm_call_node_writable_p(parser, cast)) {
16648
17218
  parse_write_name(parser, &cast->name);
16649
17219
  } else {
16650
17220
  pm_parser_err_node(parser, node, PM_ERR_WRITE_TARGET_UNEXPECTED);
@@ -17063,15 +17633,12 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
17063
17633
  */
17064
17634
  static pm_node_t *
17065
17635
  parse_expression(pm_parser_t *parser, pm_binding_power_t binding_power, bool accepts_command_call, pm_diagnostic_id_t diag_id) {
17066
- pm_token_t recovery = parser->previous;
17067
- pm_node_t *node = parse_expression_prefix(parser, binding_power, accepts_command_call);
17636
+ pm_node_t *node = parse_expression_prefix(parser, binding_power, accepts_command_call, diag_id);
17068
17637
 
17069
17638
  switch (PM_NODE_TYPE(node)) {
17070
17639
  case PM_MISSING_NODE:
17071
17640
  // If we found a syntax error, then the type of node returned by
17072
- // parse_expression_prefix is going to be a missing node. In that
17073
- // case we need to add the error message to the parser's error list.
17074
- pm_parser_err(parser, recovery.end, recovery.end, diag_id);
17641
+ // parse_expression_prefix is going to be a missing node.
17075
17642
  return node;
17076
17643
  case PM_PRE_EXECUTION_NODE:
17077
17644
  case PM_POST_EXECUTION_NODE:
@@ -17080,7 +17647,7 @@ parse_expression(pm_parser_t *parser, pm_binding_power_t binding_power, bool acc
17080
17647
  case PM_UNDEF_NODE:
17081
17648
  // These expressions are statements, and cannot be followed by
17082
17649
  // operators (except modifiers).
17083
- if (pm_binding_powers[parser->current.type].left > PM_BINDING_POWER_MODIFIER_RESCUE) {
17650
+ if (pm_binding_powers[parser->current.type].left > PM_BINDING_POWER_MODIFIER) {
17084
17651
  return node;
17085
17652
  }
17086
17653
  break;
@@ -17175,9 +17742,14 @@ parse_expression(pm_parser_t *parser, pm_binding_power_t binding_power, bool acc
17175
17742
 
17176
17743
  static pm_node_t *
17177
17744
  parse_program(pm_parser_t *parser) {
17178
- pm_parser_scope_push(parser, !parser->current_scope);
17179
- parser_lex(parser);
17745
+ // If the current scope is NULL, then we want to push a new top level scope.
17746
+ // The current scope could exist in the event that we are parsing an eval
17747
+ // and the user has passed into scopes that already exist.
17748
+ if (parser->current_scope == NULL) {
17749
+ pm_parser_scope_push(parser, true);
17750
+ }
17180
17751
 
17752
+ parser_lex(parser);
17181
17753
  pm_statements_node_t *statements = parse_statements(parser, PM_CONTEXT_MAIN);
17182
17754
  if (!statements) {
17183
17755
  statements = pm_statements_node_create(parser);
@@ -17234,7 +17806,7 @@ pm_parser_init(pm_parser_t *parser, const uint8_t *source, size_t size, const pm
17234
17806
  .encoding_changed_callback = NULL,
17235
17807
  .encoding_comment_start = source,
17236
17808
  .lex_callback = NULL,
17237
- .filepath_string = { 0 },
17809
+ .filepath = { 0 },
17238
17810
  .constant_pool = { 0 },
17239
17811
  .newline_list = { 0 },
17240
17812
  .integer_base = 0,
@@ -17248,8 +17820,7 @@ pm_parser_init(pm_parser_t *parser, const uint8_t *source, size_t size, const pm
17248
17820
  .in_keyword_arg = false,
17249
17821
  .current_param_name = 0,
17250
17822
  .semantic_token_seen = false,
17251
- .frozen_string_literal = false,
17252
- .suppress_warnings = false
17823
+ .frozen_string_literal = false
17253
17824
  };
17254
17825
 
17255
17826
  // Initialize the constant pool. We're going to completely guess as to the
@@ -17278,7 +17849,7 @@ pm_parser_init(pm_parser_t *parser, const uint8_t *source, size_t size, const pm
17278
17849
  // If options were provided to this parse, establish them here.
17279
17850
  if (options != NULL) {
17280
17851
  // filepath option
17281
- parser->filepath_string = options->filepath;
17852
+ parser->filepath = options->filepath;
17282
17853
 
17283
17854
  // line option
17284
17855
  parser->start_line = options->line;
@@ -17295,10 +17866,8 @@ pm_parser_init(pm_parser_t *parser, const uint8_t *source, size_t size, const pm
17295
17866
  parser->frozen_string_literal = true;
17296
17867
  }
17297
17868
 
17298
- // suppress_warnings option
17299
- if (options->suppress_warnings) {
17300
- parser->suppress_warnings = true;
17301
- }
17869
+ // version option
17870
+ parser->version = options->version;
17302
17871
 
17303
17872
  // scopes option
17304
17873
  for (size_t scope_index = 0; scope_index < options->scopes_count; scope_index++) {
@@ -17382,7 +17951,7 @@ pm_magic_comment_list_free(pm_list_t *list) {
17382
17951
  */
17383
17952
  PRISM_EXPORTED_FUNCTION void
17384
17953
  pm_parser_free(pm_parser_t *parser) {
17385
- pm_string_free(&parser->filepath_string);
17954
+ pm_string_free(&parser->filepath);
17386
17955
  pm_diagnostic_list_free(&parser->error_list);
17387
17956
  pm_diagnostic_list_free(&parser->warning_list);
17388
17957
  pm_comment_list_free(&parser->comment_list);
@@ -17484,3 +18053,299 @@ pm_serialize_parse_comments(pm_buffer_t *buffer, const uint8_t *source, size_t s
17484
18053
  #undef PM_LOCATION_NODE_VALUE
17485
18054
  #undef PM_LOCATION_NULL_VALUE
17486
18055
  #undef PM_LOCATION_TOKEN_VALUE
18056
+
18057
+ /** An error that is going to be formatted into the output. */
18058
+ typedef struct {
18059
+ /** A pointer to the diagnostic that was generated during parsing. */
18060
+ pm_diagnostic_t *error;
18061
+
18062
+ /** The start line of the diagnostic message. */
18063
+ uint32_t line;
18064
+
18065
+ /** The column start of the diagnostic message. */
18066
+ uint32_t column_start;
18067
+
18068
+ /** The column end of the diagnostic message. */
18069
+ uint32_t column_end;
18070
+ } pm_error_t;
18071
+
18072
+ /** The format that will be used to format the errors into the output. */
18073
+ typedef struct {
18074
+ /** The prefix that will be used for line numbers. */
18075
+ const char *number_prefix;
18076
+
18077
+ /** The prefix that will be used for blank lines. */
18078
+ const char *blank_prefix;
18079
+
18080
+ /** The divider that will be used between sections of source code. */
18081
+ const char *divider;
18082
+
18083
+ /** The length of the blank prefix. */
18084
+ size_t blank_prefix_length;
18085
+
18086
+ /** The length of the divider. */
18087
+ size_t divider_length;
18088
+ } pm_error_format_t;
18089
+
18090
+ #define PM_COLOR_GRAY "\033[38;5;102m"
18091
+ #define PM_COLOR_RED "\033[1;31m"
18092
+ #define PM_COLOR_RESET "\033[0m"
18093
+
18094
+ static inline pm_error_t *
18095
+ pm_parser_errors_format_sort(const pm_list_t *error_list, const pm_newline_list_t *newline_list) {
18096
+ pm_error_t *errors = calloc(error_list->size, sizeof(pm_error_t));
18097
+
18098
+ for (pm_diagnostic_t *error = (pm_diagnostic_t *) error_list->head; error != NULL; error = (pm_diagnostic_t *) error->node.next) {
18099
+ pm_line_column_t start = pm_newline_list_line_column(newline_list, error->location.start);
18100
+ pm_line_column_t end = pm_newline_list_line_column(newline_list, error->location.end);
18101
+
18102
+ // We're going to insert this error into the array in sorted order. We
18103
+ // do this by finding the first error that has a line number greater
18104
+ // than the current error and then inserting the current error before
18105
+ // that one.
18106
+ size_t index = 0;
18107
+ while (
18108
+ (index < error_list->size) &&
18109
+ (errors[index].error != NULL) &&
18110
+ (
18111
+ (errors[index].line < ((uint32_t) start.line)) ||
18112
+ (errors[index].line == ((uint32_t) start.line) && errors[index].column_start < ((uint32_t) start.column))
18113
+ )
18114
+ ) index++;
18115
+
18116
+ // Now we're going to shift all of the errors after this one down one
18117
+ // index to make room for the new error.
18118
+ if (index + 1 < error_list->size) {
18119
+ memmove(&errors[index + 1], &errors[index], sizeof(pm_error_t) * (error_list->size - index - 1));
18120
+ }
18121
+
18122
+ // Finally, we'll insert the error into the array.
18123
+ uint32_t column_end;
18124
+ if (start.line == end.line) {
18125
+ column_end = (uint32_t) end.column;
18126
+ } else {
18127
+ column_end = (uint32_t) (newline_list->offsets[start.line] - newline_list->offsets[start.line - 1] - 1);
18128
+ }
18129
+
18130
+ // Ensure we have at least one column of error.
18131
+ if (((uint32_t) start.column) == column_end) column_end++;
18132
+
18133
+ errors[index] = (pm_error_t) {
18134
+ .error = error,
18135
+ .line = (uint32_t) start.line,
18136
+ .column_start = (uint32_t) start.column,
18137
+ .column_end = column_end
18138
+ };
18139
+ }
18140
+
18141
+ return errors;
18142
+ }
18143
+
18144
+ static inline void
18145
+ pm_parser_errors_format_line(const pm_parser_t *parser, const pm_newline_list_t *newline_list, const char *number_prefix, size_t line, pm_buffer_t *buffer) {
18146
+ const uint8_t *start = &parser->start[newline_list->offsets[line - 1]];
18147
+ const uint8_t *end;
18148
+
18149
+ if (line >= newline_list->size) {
18150
+ end = parser->end;
18151
+ } else {
18152
+ end = &parser->start[newline_list->offsets[line]];
18153
+ }
18154
+
18155
+ pm_buffer_append_format(buffer, number_prefix, (uint32_t) line);
18156
+ pm_buffer_append_string(buffer, (const char *) start, (size_t) (end - start));
18157
+
18158
+ if (end == parser->end && end[-1] != '\n') {
18159
+ pm_buffer_append_string(buffer, "\n", 1);
18160
+ }
18161
+ }
18162
+
18163
+ /**
18164
+ * Format the errors on the parser into the given buffer.
18165
+ */
18166
+ PRISM_EXPORTED_FUNCTION void
18167
+ pm_parser_errors_format(const pm_parser_t *parser, pm_buffer_t *buffer, bool colorize) {
18168
+ const pm_list_t *error_list = &parser->error_list;
18169
+ assert(error_list->size != 0);
18170
+
18171
+ // First, we're going to sort all of the errors by line number using an
18172
+ // insertion sort into a newly allocated array.
18173
+ const pm_newline_list_t *newline_list = &parser->newline_list;
18174
+ pm_error_t *errors = pm_parser_errors_format_sort(error_list, newline_list);
18175
+
18176
+ // Now we're going to determine how we're going to format line numbers and
18177
+ // blank lines based on the maximum number of digits in the line numbers
18178
+ // that are going to be displayed.
18179
+ pm_error_format_t error_format;
18180
+ size_t max_line_number = errors[error_list->size - 1].line;
18181
+
18182
+ if (max_line_number < 10) {
18183
+ if (colorize) {
18184
+ error_format = (pm_error_format_t) {
18185
+ .number_prefix = PM_COLOR_GRAY "%1" PRIu32 " | " PM_COLOR_RESET,
18186
+ .blank_prefix = PM_COLOR_GRAY " | " PM_COLOR_RESET,
18187
+ .divider = PM_COLOR_GRAY " ~~~~~" PM_COLOR_RESET "\n"
18188
+ };
18189
+ } else {
18190
+ error_format = (pm_error_format_t) {
18191
+ .number_prefix = "%1" PRIu32 " | ",
18192
+ .blank_prefix = " | ",
18193
+ .divider = " ~~~~~\n"
18194
+ };
18195
+ }
18196
+ } else if (max_line_number < 100) {
18197
+ if (colorize) {
18198
+ error_format = (pm_error_format_t) {
18199
+ .number_prefix = PM_COLOR_GRAY "%2" PRIu32 " | " PM_COLOR_RESET,
18200
+ .blank_prefix = PM_COLOR_GRAY " | " PM_COLOR_RESET,
18201
+ .divider = PM_COLOR_GRAY " ~~~~~~" PM_COLOR_RESET "\n"
18202
+ };
18203
+ } else {
18204
+ error_format = (pm_error_format_t) {
18205
+ .number_prefix = "%2" PRIu32 " | ",
18206
+ .blank_prefix = " | ",
18207
+ .divider = " ~~~~~~\n"
18208
+ };
18209
+ }
18210
+ } else if (max_line_number < 1000) {
18211
+ if (colorize) {
18212
+ error_format = (pm_error_format_t) {
18213
+ .number_prefix = PM_COLOR_GRAY "%3" PRIu32 " | " PM_COLOR_RESET,
18214
+ .blank_prefix = PM_COLOR_GRAY " | " PM_COLOR_RESET,
18215
+ .divider = PM_COLOR_GRAY " ~~~~~~~" PM_COLOR_RESET "\n"
18216
+ };
18217
+ } else {
18218
+ error_format = (pm_error_format_t) {
18219
+ .number_prefix = "%3" PRIu32 " | ",
18220
+ .blank_prefix = " | ",
18221
+ .divider = " ~~~~~~~\n"
18222
+ };
18223
+ }
18224
+ } else if (max_line_number < 10000) {
18225
+ if (colorize) {
18226
+ error_format = (pm_error_format_t) {
18227
+ .number_prefix = PM_COLOR_GRAY "%4" PRIu32 " | " PM_COLOR_RESET,
18228
+ .blank_prefix = PM_COLOR_GRAY " | " PM_COLOR_RESET,
18229
+ .divider = PM_COLOR_GRAY " ~~~~~~~~" PM_COLOR_RESET "\n"
18230
+ };
18231
+ } else {
18232
+ error_format = (pm_error_format_t) {
18233
+ .number_prefix = "%4" PRIu32 " | ",
18234
+ .blank_prefix = " | ",
18235
+ .divider = " ~~~~~~~~\n"
18236
+ };
18237
+ }
18238
+ } else {
18239
+ if (colorize) {
18240
+ error_format = (pm_error_format_t) {
18241
+ .number_prefix = PM_COLOR_GRAY "%5" PRIu32 " | " PM_COLOR_RESET,
18242
+ .blank_prefix = PM_COLOR_GRAY " | " PM_COLOR_RESET,
18243
+ .divider = PM_COLOR_GRAY " ~~~~~~~~" PM_COLOR_RESET "\n"
18244
+ };
18245
+ } else {
18246
+ error_format = (pm_error_format_t) {
18247
+ .number_prefix = "%5" PRIu32 " | ",
18248
+ .blank_prefix = " | ",
18249
+ .divider = " ~~~~~~~~\n"
18250
+ };
18251
+ }
18252
+ }
18253
+
18254
+ error_format.blank_prefix_length = strlen(error_format.blank_prefix);
18255
+ error_format.divider_length = strlen(error_format.divider);
18256
+
18257
+ // Now we're going to iterate through every error in our error list and
18258
+ // display it. While we're iterating, we will display some padding lines of
18259
+ // the source before the error to give some context. We'll be careful not to
18260
+ // display the same line twice in case the errors are close enough in the
18261
+ // source.
18262
+ uint32_t last_line = 0;
18263
+ const pm_encoding_t *encoding = parser->encoding;
18264
+
18265
+ for (size_t index = 0; index < error_list->size; index++) {
18266
+ pm_error_t *error = &errors[index];
18267
+
18268
+ // Here we determine how many lines of padding of the source to display,
18269
+ // based on the difference from the last line that was displayed.
18270
+ if (error->line - last_line > 1) {
18271
+ if (error->line - last_line > 2) {
18272
+ if ((index != 0) && (error->line - last_line > 3)) {
18273
+ pm_buffer_append_string(buffer, error_format.divider, error_format.divider_length);
18274
+ }
18275
+
18276
+ pm_buffer_append_string(buffer, " ", 2);
18277
+ pm_parser_errors_format_line(parser, newline_list, error_format.number_prefix, error->line - 2, buffer);
18278
+ }
18279
+
18280
+ pm_buffer_append_string(buffer, " ", 2);
18281
+ pm_parser_errors_format_line(parser, newline_list, error_format.number_prefix, error->line - 1, buffer);
18282
+ }
18283
+
18284
+ // If this is the first error or we're on a new line, then we'll display
18285
+ // the line that has the error in it.
18286
+ if ((index == 0) || (error->line != last_line)) {
18287
+ if (colorize) {
18288
+ pm_buffer_append_string(buffer, PM_COLOR_RED "> " PM_COLOR_RESET, 13);
18289
+ } else {
18290
+ pm_buffer_append_string(buffer, "> ", 2);
18291
+ }
18292
+ pm_parser_errors_format_line(parser, newline_list, error_format.number_prefix, error->line, buffer);
18293
+ }
18294
+
18295
+ // Now we'll display the actual error message. We'll do this by first
18296
+ // putting the prefix to the line, then a bunch of blank spaces
18297
+ // depending on the column, then as many carets as we need to display
18298
+ // the width of the error, then the error message itself.
18299
+ //
18300
+ // Note that this doesn't take into account the width of the actual
18301
+ // character when displayed in the terminal. For some east-asian
18302
+ // languages or emoji, this means it can be thrown off pretty badly. We
18303
+ // will need to solve this eventually.
18304
+ pm_buffer_append_string(buffer, " ", 2);
18305
+ pm_buffer_append_string(buffer, error_format.blank_prefix, error_format.blank_prefix_length);
18306
+
18307
+ size_t column = 0;
18308
+ const uint8_t *start = &parser->start[newline_list->offsets[error->line - 1]];
18309
+
18310
+ while (column < error->column_end) {
18311
+ if (column < error->column_start) {
18312
+ pm_buffer_append_byte(buffer, ' ');
18313
+ } else if (colorize) {
18314
+ pm_buffer_append_string(buffer, PM_COLOR_RED "^" PM_COLOR_RESET, 12);
18315
+ } else {
18316
+ pm_buffer_append_byte(buffer, '^');
18317
+ }
18318
+
18319
+ size_t char_width = encoding->char_width(start + column, parser->end - (start + column));
18320
+ column += (char_width == 0 ? 1 : char_width);
18321
+ }
18322
+
18323
+ pm_buffer_append_byte(buffer, ' ');
18324
+
18325
+ const char *message = error->error->message;
18326
+ pm_buffer_append_string(buffer, message, strlen(message));
18327
+ pm_buffer_append_byte(buffer, '\n');
18328
+
18329
+ // Here we determine how many lines of padding to display after the
18330
+ // error, depending on where the next error is in source.
18331
+ last_line = error->line;
18332
+ size_t next_line = (index == error_list->size - 1) ? newline_list->size : errors[index + 1].line;
18333
+
18334
+ if (next_line - last_line > 1) {
18335
+ pm_buffer_append_string(buffer, " ", 2);
18336
+ pm_parser_errors_format_line(parser, newline_list, error_format.number_prefix, ++last_line, buffer);
18337
+ }
18338
+
18339
+ if (next_line - last_line > 1) {
18340
+ pm_buffer_append_string(buffer, " ", 2);
18341
+ pm_parser_errors_format_line(parser, newline_list, error_format.number_prefix, ++last_line, buffer);
18342
+ }
18343
+ }
18344
+
18345
+ // Finally, we'll free the array of errors that we allocated.
18346
+ free(errors);
18347
+ }
18348
+
18349
+ #undef PM_COLOR_GRAY
18350
+ #undef PM_COLOR_RED
18351
+ #undef PM_COLOR_RESET