prism 0.19.0 → 0.21.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (58) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +48 -1
  3. data/Makefile +5 -0
  4. data/README.md +8 -6
  5. data/config.yml +236 -38
  6. data/docs/build_system.md +19 -2
  7. data/docs/cruby_compilation.md +27 -0
  8. data/docs/parser_translation.md +34 -0
  9. data/docs/parsing_rules.md +19 -0
  10. data/docs/releasing.md +3 -3
  11. data/docs/ruby_api.md +1 -1
  12. data/docs/serialization.md +17 -5
  13. data/ext/prism/api_node.c +101 -81
  14. data/ext/prism/extension.c +74 -11
  15. data/ext/prism/extension.h +1 -1
  16. data/include/prism/ast.h +1700 -505
  17. data/include/prism/defines.h +8 -0
  18. data/include/prism/diagnostic.h +39 -2
  19. data/include/prism/encoding.h +10 -0
  20. data/include/prism/options.h +40 -14
  21. data/include/prism/parser.h +34 -18
  22. data/include/prism/util/pm_buffer.h +9 -0
  23. data/include/prism/util/pm_constant_pool.h +18 -0
  24. data/include/prism/util/pm_newline_list.h +0 -11
  25. data/include/prism/version.h +2 -2
  26. data/include/prism.h +19 -2
  27. data/lib/prism/debug.rb +11 -5
  28. data/lib/prism/dot_visitor.rb +36 -14
  29. data/lib/prism/dsl.rb +22 -22
  30. data/lib/prism/ffi.rb +2 -2
  31. data/lib/prism/node.rb +1020 -737
  32. data/lib/prism/node_ext.rb +2 -2
  33. data/lib/prism/parse_result.rb +17 -9
  34. data/lib/prism/serialize.rb +53 -29
  35. data/lib/prism/translation/parser/compiler.rb +1828 -0
  36. data/lib/prism/translation/parser/lexer.rb +335 -0
  37. data/lib/prism/translation/parser/rubocop.rb +37 -0
  38. data/lib/prism/translation/parser.rb +171 -0
  39. data/lib/prism/translation.rb +11 -0
  40. data/lib/prism.rb +1 -0
  41. data/prism.gemspec +12 -5
  42. data/rbi/prism.rbi +150 -88
  43. data/rbi/prism_static.rbi +15 -3
  44. data/sig/prism.rbs +996 -961
  45. data/sig/prism_static.rbs +123 -46
  46. data/src/diagnostic.c +259 -219
  47. data/src/encoding.c +5 -9
  48. data/src/node.c +2 -6
  49. data/src/options.c +24 -5
  50. data/src/prettyprint.c +174 -42
  51. data/src/prism.c +1344 -479
  52. data/src/serialize.c +12 -9
  53. data/src/token_type.c +353 -4
  54. data/src/util/pm_buffer.c +11 -0
  55. data/src/util/pm_constant_pool.c +37 -11
  56. data/src/util/pm_newline_list.c +2 -14
  57. metadata +10 -3
  58. data/docs/building.md +0 -29
data/src/prism.c CHANGED
@@ -164,7 +164,7 @@ debug_state(pm_parser_t *parser) {
164
164
 
165
165
  PRISM_ATTRIBUTE_UNUSED static void
166
166
  debug_token(pm_token_t * token) {
167
- fprintf(stderr, "%s: \"%.*s\"\n", pm_token_type_to_str(token->type), (int) (token->end - token->start), token->start);
167
+ fprintf(stderr, "%s: \"%.*s\"\n", pm_token_type_human(token->type), (int) (token->end - token->start), token->start);
168
168
  }
169
169
 
170
170
  #endif
@@ -423,6 +423,11 @@ lex_state_beg_p(pm_parser_t *parser) {
423
423
  return lex_state_p(parser, PM_LEX_STATE_BEG_ANY) || ((parser->lex_state & (PM_LEX_STATE_ARG | PM_LEX_STATE_LABELED)) == (PM_LEX_STATE_ARG | PM_LEX_STATE_LABELED));
424
424
  }
425
425
 
426
+ static inline bool
427
+ lex_state_arg_labeled_p(pm_parser_t *parser) {
428
+ return (parser->lex_state & (PM_LEX_STATE_ARG | PM_LEX_STATE_LABELED)) == (PM_LEX_STATE_ARG | PM_LEX_STATE_LABELED);
429
+ }
430
+
426
431
  static inline bool
427
432
  lex_state_arg_p(pm_parser_t *parser) {
428
433
  return lex_state_p(parser, PM_LEX_STATE_ARG_ANY);
@@ -548,9 +553,7 @@ pm_parser_err_token(pm_parser_t *parser, const pm_token_t *token, pm_diagnostic_
548
553
  */
549
554
  static inline void
550
555
  pm_parser_warn(pm_parser_t *parser, const uint8_t *start, const uint8_t *end, pm_diagnostic_id_t diag_id) {
551
- if (!parser->suppress_warnings) {
552
- pm_diagnostic_list_append(&parser->warning_list, start, end, diag_id);
553
- }
556
+ pm_diagnostic_list_append(&parser->warning_list, start, end, diag_id);
554
557
  }
555
558
 
556
559
  /**
@@ -813,6 +816,9 @@ typedef struct {
813
816
 
814
817
  /** The optional block attached to the call. */
815
818
  pm_node_t *block;
819
+
820
+ /** The flag indicating whether this arguments list has forwarding argument. */
821
+ bool has_forwarding;
816
822
  } pm_arguments_t;
817
823
 
818
824
  /**
@@ -864,6 +870,105 @@ pm_arguments_validate_block(pm_parser_t *parser, pm_arguments_t *arguments, pm_b
864
870
  pm_parser_err_node(parser, (pm_node_t *) block, PM_ERR_ARGUMENT_UNEXPECTED_BLOCK);
865
871
  }
866
872
 
873
+ /******************************************************************************/
874
+ /* Basic character checks */
875
+ /******************************************************************************/
876
+
877
+ /**
878
+ * This function is used extremely frequently to lex all of the identifiers in a
879
+ * source file, so it's important that it be as fast as possible. For this
880
+ * reason we have the encoding_changed boolean to check if we need to go through
881
+ * the function pointer or can just directly use the UTF-8 functions.
882
+ */
883
+ static inline size_t
884
+ char_is_identifier_start(const pm_parser_t *parser, const uint8_t *b) {
885
+ if (parser->encoding_changed) {
886
+ size_t width;
887
+ if ((width = parser->encoding->alpha_char(b, parser->end - b)) != 0) {
888
+ return width;
889
+ } else if (*b == '_') {
890
+ return 1;
891
+ } else if (*b >= 0x80) {
892
+ return parser->encoding->char_width(b, parser->end - b);
893
+ } else {
894
+ return 0;
895
+ }
896
+ } else if (*b < 0x80) {
897
+ return (pm_encoding_unicode_table[*b] & PRISM_ENCODING_ALPHABETIC_BIT ? 1 : 0) || (*b == '_');
898
+ } else {
899
+ return pm_encoding_utf_8_char_width(b, parser->end - b);
900
+ }
901
+ }
902
+
903
+ /**
904
+ * Similar to char_is_identifier but this function assumes that the encoding
905
+ * has not been changed.
906
+ */
907
+ static inline size_t
908
+ char_is_identifier_utf8(const uint8_t *b, const uint8_t *end) {
909
+ if (*b < 0x80) {
910
+ return (*b == '_') || (pm_encoding_unicode_table[*b] & PRISM_ENCODING_ALPHANUMERIC_BIT ? 1 : 0);
911
+ } else {
912
+ return pm_encoding_utf_8_char_width(b, end - b);
913
+ }
914
+ }
915
+
916
+ /**
917
+ * Like the above, this function is also used extremely frequently to lex all of
918
+ * the identifiers in a source file once the first character has been found. So
919
+ * it's important that it be as fast as possible.
920
+ */
921
+ static inline size_t
922
+ char_is_identifier(pm_parser_t *parser, const uint8_t *b) {
923
+ if (parser->encoding_changed) {
924
+ size_t width;
925
+ if ((width = parser->encoding->alnum_char(b, parser->end - b)) != 0) {
926
+ return width;
927
+ } else if (*b == '_') {
928
+ return 1;
929
+ } else if (*b >= 0x80) {
930
+ return parser->encoding->char_width(b, parser->end - b);
931
+ } else {
932
+ return 0;
933
+ }
934
+ }
935
+ return char_is_identifier_utf8(b, parser->end);
936
+ }
937
+
938
+ // Here we're defining a perfect hash for the characters that are allowed in
939
+ // global names. This is used to quickly check the next character after a $ to
940
+ // see if it's a valid character for a global name.
941
+ #define BIT(c, idx) (((c) / 32 - 1 == idx) ? (1U << ((c) % 32)) : 0)
942
+ #define PUNCT(idx) ( \
943
+ BIT('~', idx) | BIT('*', idx) | BIT('$', idx) | BIT('?', idx) | \
944
+ BIT('!', idx) | BIT('@', idx) | BIT('/', idx) | BIT('\\', idx) | \
945
+ BIT(';', idx) | BIT(',', idx) | BIT('.', idx) | BIT('=', idx) | \
946
+ BIT(':', idx) | BIT('<', idx) | BIT('>', idx) | BIT('\"', idx) | \
947
+ BIT('&', idx) | BIT('`', idx) | BIT('\'', idx) | BIT('+', idx) | \
948
+ BIT('0', idx))
949
+
950
+ const unsigned int pm_global_name_punctuation_hash[(0x7e - 0x20 + 31) / 32] = { PUNCT(0), PUNCT(1), PUNCT(2) };
951
+
952
+ #undef BIT
953
+ #undef PUNCT
954
+
955
+ static inline bool
956
+ char_is_global_name_punctuation(const uint8_t b) {
957
+ const unsigned int i = (const unsigned int) b;
958
+ if (i <= 0x20 || 0x7e < i) return false;
959
+
960
+ return (pm_global_name_punctuation_hash[(i - 0x20) / 32] >> (i % 32)) & 1;
961
+ }
962
+
963
+ static inline bool
964
+ token_is_setter_name(pm_token_t *token) {
965
+ return (
966
+ (token->type == PM_TOKEN_IDENTIFIER) &&
967
+ (token->end - token->start >= 2) &&
968
+ (token->end[-1] == '=')
969
+ );
970
+ }
971
+
867
972
  /******************************************************************************/
868
973
  /* Node flag handling functions */
869
974
  /******************************************************************************/
@@ -884,6 +989,22 @@ pm_node_flag_unset(pm_node_t *node, pm_node_flags_t flag) {
884
989
  node->flags &= (pm_node_flags_t) ~flag;
885
990
  }
886
991
 
992
+ /**
993
+ * Set the repeated parameter flag on the given node.
994
+ */
995
+ static inline void
996
+ pm_node_flag_set_repeated_parameter(pm_node_t *node) {
997
+ assert(PM_NODE_TYPE(node) == PM_BLOCK_LOCAL_VARIABLE_NODE ||
998
+ PM_NODE_TYPE(node) == PM_BLOCK_PARAMETER_NODE ||
999
+ PM_NODE_TYPE(node) == PM_KEYWORD_REST_PARAMETER_NODE ||
1000
+ PM_NODE_TYPE(node) == PM_OPTIONAL_KEYWORD_PARAMETER_NODE ||
1001
+ PM_NODE_TYPE(node) == PM_OPTIONAL_PARAMETER_NODE ||
1002
+ PM_NODE_TYPE(node) == PM_REQUIRED_KEYWORD_PARAMETER_NODE ||
1003
+ PM_NODE_TYPE(node) == PM_REQUIRED_PARAMETER_NODE ||
1004
+ PM_NODE_TYPE(node) == PM_REST_PARAMETER_NODE);
1005
+
1006
+ pm_node_flag_set(node, PM_PARAMETER_FLAGS_REPEATED_PARAMETER);
1007
+ }
887
1008
 
888
1009
  /******************************************************************************/
889
1010
  /* Node creation functions */
@@ -977,7 +1098,7 @@ static inline void *
977
1098
  pm_alloc_node(PRISM_ATTRIBUTE_UNUSED pm_parser_t *parser, size_t size) {
978
1099
  void *memory = calloc(1, size);
979
1100
  if (memory == NULL) {
980
- fprintf(stderr, "Failed to allocate %zu bytes\n", size);
1101
+ fprintf(stderr, "Failed to allocate %d bytes\n", (int) size);
981
1102
  abort();
982
1103
  }
983
1104
  return memory;
@@ -1325,7 +1446,7 @@ pm_assoc_node_create(pm_parser_t *parser, pm_node_t *key, const pm_token_t *oper
1325
1446
  pm_assoc_node_t *node = PM_ALLOC_NODE(parser, pm_assoc_node_t);
1326
1447
  const uint8_t *end;
1327
1448
 
1328
- if (value != NULL) {
1449
+ if (value != NULL && value->location.end > key->location.end) {
1329
1450
  end = value->location.end;
1330
1451
  } else if (operator->type != PM_TOKEN_NOT_PROVIDED) {
1331
1452
  end = operator->end;
@@ -1333,6 +1454,13 @@ pm_assoc_node_create(pm_parser_t *parser, pm_node_t *key, const pm_token_t *oper
1333
1454
  end = key->location.end;
1334
1455
  }
1335
1456
 
1457
+ // Hash string keys will be frozen, so we can mark them as frozen here so
1458
+ // that the compiler picks them up and also when we check for static literal
1459
+ // on the keys it gets factored in.
1460
+ if (PM_NODE_TYPE_P(key, PM_STRING_NODE)) {
1461
+ key->flags |= PM_STRING_FLAGS_FROZEN | PM_NODE_FLAG_STATIC_LITERAL;
1462
+ }
1463
+
1336
1464
  // If the key and value of this assoc node are both static literals, then
1337
1465
  // we can mark this node as a static literal.
1338
1466
  pm_node_flags_t flags = 0;
@@ -1490,7 +1618,7 @@ pm_block_argument_node_create(pm_parser_t *parser, const pm_token_t *operator, p
1490
1618
  * Allocate and initialize a new BlockNode node.
1491
1619
  */
1492
1620
  static pm_block_node_t *
1493
- pm_block_node_create(pm_parser_t *parser, pm_constant_id_list_t *locals, uint32_t locals_body_index, const pm_token_t *opening, pm_node_t *parameters, pm_node_t *body, const pm_token_t *closing) {
1621
+ pm_block_node_create(pm_parser_t *parser, pm_constant_id_list_t *locals, const pm_token_t *opening, pm_node_t *parameters, pm_node_t *body, const pm_token_t *closing) {
1494
1622
  pm_block_node_t *node = PM_ALLOC_NODE(parser, pm_block_node_t);
1495
1623
 
1496
1624
  *node = (pm_block_node_t) {
@@ -1499,7 +1627,6 @@ pm_block_node_create(pm_parser_t *parser, pm_constant_id_list_t *locals, uint32_
1499
1627
  .location = { .start = opening->start, .end = closing->end },
1500
1628
  },
1501
1629
  .locals = *locals,
1502
- .locals_body_index = locals_body_index,
1503
1630
  .parameters = parameters,
1504
1631
  .body = body,
1505
1632
  .opening_loc = PM_LOCATION_TOKEN_VALUE(opening),
@@ -1645,12 +1772,13 @@ pm_break_node_create(pm_parser_t *parser, const pm_token_t *keyword, pm_argument
1645
1772
  * in the various specializations of this function.
1646
1773
  */
1647
1774
  static pm_call_node_t *
1648
- pm_call_node_create(pm_parser_t *parser) {
1775
+ pm_call_node_create(pm_parser_t *parser, pm_node_flags_t flags) {
1649
1776
  pm_call_node_t *node = PM_ALLOC_NODE(parser, pm_call_node_t);
1650
1777
 
1651
1778
  *node = (pm_call_node_t) {
1652
1779
  {
1653
1780
  .type = PM_CALL_NODE,
1781
+ .flags = flags,
1654
1782
  .location = PM_LOCATION_NULL_VALUE(parser),
1655
1783
  },
1656
1784
  .receiver = NULL,
@@ -1666,6 +1794,15 @@ pm_call_node_create(pm_parser_t *parser) {
1666
1794
  return node;
1667
1795
  }
1668
1796
 
1797
+ /**
1798
+ * Returns the value that the ignore visibility flag should be set to for the
1799
+ * given receiver.
1800
+ */
1801
+ static inline pm_node_flags_t
1802
+ pm_call_node_ignore_visibility_flag(const pm_node_t *receiver) {
1803
+ return PM_NODE_TYPE_P(receiver, PM_SELF_NODE) ? PM_CALL_NODE_FLAGS_IGNORE_VISIBILITY : 0;
1804
+ }
1805
+
1669
1806
  /**
1670
1807
  * Allocate and initialize a new CallNode node from an aref or an aset
1671
1808
  * expression.
@@ -1674,7 +1811,7 @@ static pm_call_node_t *
1674
1811
  pm_call_node_aref_create(pm_parser_t *parser, pm_node_t *receiver, pm_arguments_t *arguments) {
1675
1812
  pm_assert_value_expression(parser, receiver);
1676
1813
 
1677
- pm_call_node_t *node = pm_call_node_create(parser);
1814
+ pm_call_node_t *node = pm_call_node_create(parser, pm_call_node_ignore_visibility_flag(receiver));
1678
1815
 
1679
1816
  node->base.location.start = receiver->location.start;
1680
1817
  node->base.location.end = pm_arguments_end(arguments);
@@ -1700,7 +1837,7 @@ pm_call_node_binary_create(pm_parser_t *parser, pm_node_t *receiver, pm_token_t
1700
1837
  pm_assert_value_expression(parser, receiver);
1701
1838
  pm_assert_value_expression(parser, argument);
1702
1839
 
1703
- pm_call_node_t *node = pm_call_node_create(parser);
1840
+ pm_call_node_t *node = pm_call_node_create(parser, pm_call_node_ignore_visibility_flag(receiver));
1704
1841
 
1705
1842
  node->base.location.start = MIN(receiver->location.start, argument->location.start);
1706
1843
  node->base.location.end = MAX(receiver->location.end, argument->location.end);
@@ -1723,7 +1860,7 @@ static pm_call_node_t *
1723
1860
  pm_call_node_call_create(pm_parser_t *parser, pm_node_t *receiver, pm_token_t *operator, pm_token_t *message, pm_arguments_t *arguments) {
1724
1861
  pm_assert_value_expression(parser, receiver);
1725
1862
 
1726
- pm_call_node_t *node = pm_call_node_create(parser);
1863
+ pm_call_node_t *node = pm_call_node_create(parser, pm_call_node_ignore_visibility_flag(receiver));
1727
1864
 
1728
1865
  node->base.location.start = receiver->location.start;
1729
1866
  const uint8_t *end = pm_arguments_end(arguments);
@@ -1754,7 +1891,7 @@ pm_call_node_call_create(pm_parser_t *parser, pm_node_t *receiver, pm_token_t *o
1754
1891
  */
1755
1892
  static pm_call_node_t *
1756
1893
  pm_call_node_fcall_create(pm_parser_t *parser, pm_token_t *message, pm_arguments_t *arguments) {
1757
- pm_call_node_t *node = pm_call_node_create(parser);
1894
+ pm_call_node_t *node = pm_call_node_create(parser, PM_CALL_NODE_FLAGS_IGNORE_VISIBILITY);
1758
1895
 
1759
1896
  node->base.location.start = message->start;
1760
1897
  node->base.location.end = pm_arguments_end(arguments);
@@ -1776,7 +1913,7 @@ static pm_call_node_t *
1776
1913
  pm_call_node_not_create(pm_parser_t *parser, pm_node_t *receiver, pm_token_t *message, pm_arguments_t *arguments) {
1777
1914
  pm_assert_value_expression(parser, receiver);
1778
1915
 
1779
- pm_call_node_t *node = pm_call_node_create(parser);
1916
+ pm_call_node_t *node = pm_call_node_create(parser, receiver == NULL ? 0 : pm_call_node_ignore_visibility_flag(receiver));
1780
1917
 
1781
1918
  node->base.location.start = message->start;
1782
1919
  if (arguments->closing_loc.start != NULL) {
@@ -1802,7 +1939,7 @@ static pm_call_node_t *
1802
1939
  pm_call_node_shorthand_create(pm_parser_t *parser, pm_node_t *receiver, pm_token_t *operator, pm_arguments_t *arguments) {
1803
1940
  pm_assert_value_expression(parser, receiver);
1804
1941
 
1805
- pm_call_node_t *node = pm_call_node_create(parser);
1942
+ pm_call_node_t *node = pm_call_node_create(parser, pm_call_node_ignore_visibility_flag(receiver));
1806
1943
 
1807
1944
  node->base.location.start = receiver->location.start;
1808
1945
  node->base.location.end = pm_arguments_end(arguments);
@@ -1829,7 +1966,7 @@ static pm_call_node_t *
1829
1966
  pm_call_node_unary_create(pm_parser_t *parser, pm_token_t *operator, pm_node_t *receiver, const char *name) {
1830
1967
  pm_assert_value_expression(parser, receiver);
1831
1968
 
1832
- pm_call_node_t *node = pm_call_node_create(parser);
1969
+ pm_call_node_t *node = pm_call_node_create(parser, pm_call_node_ignore_visibility_flag(receiver));
1833
1970
 
1834
1971
  node->base.location.start = operator->start;
1835
1972
  node->base.location.end = receiver->location.end;
@@ -1847,7 +1984,7 @@ pm_call_node_unary_create(pm_parser_t *parser, pm_token_t *operator, pm_node_t *
1847
1984
  */
1848
1985
  static pm_call_node_t *
1849
1986
  pm_call_node_variable_call_create(pm_parser_t *parser, pm_token_t *message) {
1850
- pm_call_node_t *node = pm_call_node_create(parser);
1987
+ pm_call_node_t *node = pm_call_node_create(parser, PM_CALL_NODE_FLAGS_IGNORE_VISIBILITY);
1851
1988
 
1852
1989
  node->base.location = PM_LOCATION_TOKEN_VALUE(message);
1853
1990
  node->message_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(message);
@@ -1885,11 +2022,12 @@ pm_call_node_index_p(pm_call_node_t *node) {
1885
2022
  * operator assignment.
1886
2023
  */
1887
2024
  static inline bool
1888
- pm_call_node_writable_p(pm_call_node_t *node) {
2025
+ pm_call_node_writable_p(const pm_parser_t *parser, const pm_call_node_t *node) {
1889
2026
  return (
1890
2027
  (node->message_loc.start != NULL) &&
1891
2028
  (node->message_loc.end[-1] != '!') &&
1892
2029
  (node->message_loc.end[-1] != '?') &&
2030
+ char_is_identifier_start(parser, node->message_loc.start) &&
1893
2031
  (node->opening_loc.start == NULL) &&
1894
2032
  (node->arguments == NULL) &&
1895
2033
  (node->block == NULL)
@@ -2167,11 +2305,12 @@ pm_call_target_node_create(pm_parser_t *parser, pm_call_node_t *target) {
2167
2305
  static pm_index_target_node_t *
2168
2306
  pm_index_target_node_create(pm_parser_t *parser, pm_call_node_t *target) {
2169
2307
  pm_index_target_node_t *node = PM_ALLOC_NODE(parser, pm_index_target_node_t);
2308
+ pm_node_flags_t flags = target->base.flags;
2170
2309
 
2171
2310
  *node = (pm_index_target_node_t) {
2172
2311
  {
2173
2312
  .type = PM_INDEX_TARGET_NODE,
2174
- .flags = target->base.flags,
2313
+ .flags = flags | PM_CALL_NODE_FLAGS_ATTRIBUTE_WRITE,
2175
2314
  .location = target->base.location
2176
2315
  },
2177
2316
  .receiver = target->receiver,
@@ -2701,6 +2840,50 @@ pm_constant_write_node_create(pm_parser_t *parser, pm_constant_read_node_t *targ
2701
2840
  return node;
2702
2841
  }
2703
2842
 
2843
+ /**
2844
+ * Check if the receiver of a `def` node is allowed.
2845
+ */
2846
+ static void
2847
+ pm_def_node_receiver_check(pm_parser_t *parser, const pm_node_t *node) {
2848
+ switch (PM_NODE_TYPE(node)) {
2849
+ case PM_BEGIN_NODE: {
2850
+ const pm_begin_node_t *cast = (pm_begin_node_t *) node;
2851
+ if (cast->statements != NULL) pm_def_node_receiver_check(parser, (pm_node_t *) cast->statements);
2852
+ break;
2853
+ }
2854
+ case PM_PARENTHESES_NODE: {
2855
+ const pm_parentheses_node_t *cast = (const pm_parentheses_node_t *) node;
2856
+ if (cast->body != NULL) pm_def_node_receiver_check(parser, cast->body);
2857
+ break;
2858
+ }
2859
+ case PM_STATEMENTS_NODE: {
2860
+ const pm_statements_node_t *cast = (const pm_statements_node_t *) node;
2861
+ pm_def_node_receiver_check(parser, cast->body.nodes[cast->body.size - 1]);
2862
+ break;
2863
+ }
2864
+ case PM_ARRAY_NODE:
2865
+ case PM_FLOAT_NODE:
2866
+ case PM_IMAGINARY_NODE:
2867
+ case PM_INTEGER_NODE:
2868
+ case PM_INTERPOLATED_REGULAR_EXPRESSION_NODE:
2869
+ case PM_INTERPOLATED_STRING_NODE:
2870
+ case PM_INTERPOLATED_SYMBOL_NODE:
2871
+ case PM_INTERPOLATED_X_STRING_NODE:
2872
+ case PM_RATIONAL_NODE:
2873
+ case PM_REGULAR_EXPRESSION_NODE:
2874
+ case PM_SOURCE_ENCODING_NODE:
2875
+ case PM_SOURCE_FILE_NODE:
2876
+ case PM_SOURCE_LINE_NODE:
2877
+ case PM_STRING_NODE:
2878
+ case PM_SYMBOL_NODE:
2879
+ case PM_X_STRING_NODE:
2880
+ pm_parser_err_node(parser, node, PM_ERR_SINGLETON_FOR_LITERALS);
2881
+ break;
2882
+ default:
2883
+ break;
2884
+ }
2885
+ }
2886
+
2704
2887
  /**
2705
2888
  * Allocate and initialize a new DefNode node.
2706
2889
  */
@@ -2712,7 +2895,6 @@ pm_def_node_create(
2712
2895
  pm_parameters_node_t *parameters,
2713
2896
  pm_node_t *body,
2714
2897
  pm_constant_id_list_t *locals,
2715
- uint32_t locals_body_index,
2716
2898
  const pm_token_t *def_keyword,
2717
2899
  const pm_token_t *operator,
2718
2900
  const pm_token_t *lparen,
@@ -2729,6 +2911,10 @@ pm_def_node_create(
2729
2911
  end = end_keyword->end;
2730
2912
  }
2731
2913
 
2914
+ if ((receiver != NULL) && PM_NODE_TYPE_P(receiver, PM_PARENTHESES_NODE)) {
2915
+ pm_def_node_receiver_check(parser, receiver);
2916
+ }
2917
+
2732
2918
  *node = (pm_def_node_t) {
2733
2919
  {
2734
2920
  .type = PM_DEF_NODE,
@@ -2740,7 +2926,6 @@ pm_def_node_create(
2740
2926
  .parameters = parameters,
2741
2927
  .body = body,
2742
2928
  .locals = *locals,
2743
- .locals_body_index = locals_body_index,
2744
2929
  .def_keyword_loc = PM_LOCATION_TOKEN_VALUE(def_keyword),
2745
2930
  .operator_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(operator),
2746
2931
  .lparen_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(lparen),
@@ -3962,9 +4147,8 @@ pm_keyword_hash_node_create(pm_parser_t *parser) {
3962
4147
  */
3963
4148
  static void
3964
4149
  pm_keyword_hash_node_elements_append(pm_keyword_hash_node_t *hash, pm_node_t *element) {
3965
- // If the element being added is not an AssocNode or does not have a symbol key, then
3966
- // we want to turn the STATIC_KEYS flag off.
3967
- // TODO: Rename the flag to SYMBOL_KEYS instead.
4150
+ // If the element being added is not an AssocNode or does not have a symbol
4151
+ // key, then we want to turn the SYMBOL_KEYS flag off.
3968
4152
  if (!PM_NODE_TYPE_P(element, PM_ASSOC_NODE) || !PM_NODE_TYPE_P(((pm_assoc_node_t *) element)->key, PM_SYMBOL_NODE)) {
3969
4153
  pm_node_flag_unset((pm_node_t *)hash, PM_KEYWORD_HASH_NODE_FLAGS_SYMBOL_KEYS);
3970
4154
  }
@@ -4051,7 +4235,6 @@ static pm_lambda_node_t *
4051
4235
  pm_lambda_node_create(
4052
4236
  pm_parser_t *parser,
4053
4237
  pm_constant_id_list_t *locals,
4054
- uint32_t locals_body_index,
4055
4238
  const pm_token_t *operator,
4056
4239
  const pm_token_t *opening,
4057
4240
  const pm_token_t *closing,
@@ -4069,7 +4252,6 @@ pm_lambda_node_create(
4069
4252
  },
4070
4253
  },
4071
4254
  .locals = *locals,
4072
- .locals_body_index = locals_body_index,
4073
4255
  .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
4074
4256
  .opening_loc = PM_LOCATION_TOKEN_VALUE(opening),
4075
4257
  .closing_loc = PM_LOCATION_TOKEN_VALUE(closing),
@@ -4161,12 +4343,10 @@ pm_local_variable_or_write_node_create(pm_parser_t *parser, pm_node_t *target, c
4161
4343
  }
4162
4344
 
4163
4345
  /**
4164
- * Allocate a new LocalVariableReadNode node.
4346
+ * Allocate a new LocalVariableReadNode node with constant_id.
4165
4347
  */
4166
4348
  static pm_local_variable_read_node_t *
4167
- pm_local_variable_read_node_create(pm_parser_t *parser, const pm_token_t *name, uint32_t depth) {
4168
- pm_constant_id_t name_id = pm_parser_constant_id_token(parser, name);
4169
-
4349
+ pm_local_variable_read_node_create_constant_id(pm_parser_t *parser, const pm_token_t *name, pm_constant_id_t name_id, uint32_t depth) {
4170
4350
  if (parser->current_param_name == name_id) {
4171
4351
  pm_parser_err_token(parser, name, PM_ERR_PARAMETER_CIRCULAR);
4172
4352
  }
@@ -4185,6 +4365,15 @@ pm_local_variable_read_node_create(pm_parser_t *parser, const pm_token_t *name,
4185
4365
  return node;
4186
4366
  }
4187
4367
 
4368
+ /**
4369
+ * Allocate a new LocalVariableReadNode node.
4370
+ */
4371
+ static pm_local_variable_read_node_t *
4372
+ pm_local_variable_read_node_create(pm_parser_t *parser, const pm_token_t *name, uint32_t depth) {
4373
+ pm_constant_id_t name_id = pm_parser_constant_id_token(parser, name);
4374
+ return pm_local_variable_read_node_create_constant_id(parser, name, name_id, depth);
4375
+ }
4376
+
4188
4377
  /**
4189
4378
  * Allocate and initialize a new LocalVariableWriteNode node.
4190
4379
  */
@@ -4210,6 +4399,57 @@ pm_local_variable_write_node_create(pm_parser_t *parser, pm_constant_id_t name,
4210
4399
  return node;
4211
4400
  }
4212
4401
 
4402
+ /**
4403
+ * Returns true if the given bounds comprise `it`.
4404
+ */
4405
+ static inline bool
4406
+ pm_token_is_it(const uint8_t *start, const uint8_t *end) {
4407
+ return (end - start == 2) && (start[0] == 'i') && (start[1] == 't');
4408
+ }
4409
+
4410
+ /**
4411
+ * Returns true if the given node is `it` default parameter.
4412
+ */
4413
+ static inline bool
4414
+ pm_node_is_it(pm_parser_t *parser, pm_node_t *node) {
4415
+ // Check if it's a local variable reference
4416
+ if (node->type != PM_CALL_NODE) {
4417
+ return false;
4418
+ }
4419
+
4420
+ // Check if it's a variable call
4421
+ pm_call_node_t *call_node = (pm_call_node_t *) node;
4422
+ if (!pm_call_node_variable_call_p(call_node)) {
4423
+ return false;
4424
+ }
4425
+
4426
+ // Check if it's called `it`
4427
+ pm_constant_id_t id = ((pm_call_node_t *)node)->name;
4428
+ pm_constant_t *constant = pm_constant_pool_id_to_constant(&parser->constant_pool, id);
4429
+ return pm_token_is_it(constant->start, constant->start + constant->length);
4430
+ }
4431
+
4432
+ /**
4433
+ * Convert a `it` variable call node to a node for `it` default parameter.
4434
+ */
4435
+ static pm_node_t *
4436
+ pm_node_check_it(pm_parser_t *parser, pm_node_t *node) {
4437
+ if (
4438
+ (parser->version != PM_OPTIONS_VERSION_CRUBY_3_3_0) &&
4439
+ !parser->current_scope->closed &&
4440
+ pm_node_is_it(parser, node)
4441
+ ) {
4442
+ if (parser->current_scope->explicit_params) {
4443
+ pm_parser_err_previous(parser, PM_ERR_IT_NOT_ALLOWED);
4444
+ } else {
4445
+ pm_node_destroy(parser, node);
4446
+ pm_constant_id_t name_id = pm_parser_constant_id_constant(parser, "0it", 3);
4447
+ node = (pm_node_t *) pm_local_variable_read_node_create_constant_id(parser, &parser->previous, name_id, 0);
4448
+ }
4449
+ }
4450
+ return node;
4451
+ }
4452
+
4213
4453
  /**
4214
4454
  * Returns true if the given bounds comprise a numbered parameter (i.e., they
4215
4455
  * are of the form /^_\d$/).
@@ -5195,7 +5435,7 @@ pm_source_file_node_create(pm_parser_t *parser, const pm_token_t *file_keyword)
5195
5435
  .flags = PM_NODE_FLAG_STATIC_LITERAL,
5196
5436
  .location = PM_LOCATION_TOKEN_VALUE(file_keyword),
5197
5437
  },
5198
- .filepath = parser->filepath_string,
5438
+ .filepath = parser->filepath
5199
5439
  };
5200
5440
 
5201
5441
  return node;
@@ -5372,18 +5612,59 @@ pm_super_node_create(pm_parser_t *parser, const pm_token_t *keyword, pm_argument
5372
5612
  return node;
5373
5613
  }
5374
5614
 
5615
+ /**
5616
+ * Read through the contents of a string and check if it consists solely of US ASCII code points.
5617
+ */
5618
+ static bool
5619
+ pm_ascii_only_p(const pm_string_t *contents) {
5620
+ const size_t length = pm_string_length(contents);
5621
+ const uint8_t *source = pm_string_source(contents);
5622
+
5623
+ for (size_t index = 0; index < length; index++) {
5624
+ if (source[index] & 0x80) return false;
5625
+ }
5626
+
5627
+ return true;
5628
+ }
5629
+
5630
+ /**
5631
+ * Ruby "downgrades" the encoding of Symbols to US-ASCII if the associated
5632
+ * encoding is ASCII-compatible and the Symbol consists only of US-ASCII code
5633
+ * points. Otherwise, the encoding may be explicitly set with an escape
5634
+ * sequence.
5635
+ */
5636
+ static inline pm_node_flags_t
5637
+ parse_symbol_encoding(const pm_parser_t *parser, const pm_string_t *contents) {
5638
+ if (parser->explicit_encoding != NULL) {
5639
+ // A Symbol may optionally have its encoding explicitly set. This will
5640
+ // happen if an escape sequence results in a non-ASCII code point.
5641
+ if (parser->explicit_encoding == PM_ENCODING_UTF_8_ENTRY) {
5642
+ return PM_SYMBOL_FLAGS_FORCED_UTF8_ENCODING;
5643
+ } else if (parser->encoding == PM_ENCODING_US_ASCII_ENTRY) {
5644
+ return PM_SYMBOL_FLAGS_FORCED_BINARY_ENCODING;
5645
+ }
5646
+ } else if (pm_ascii_only_p(contents)) {
5647
+ // Ruby stipulates that all source files must use an ASCII-compatible
5648
+ // encoding. Thus, all symbols appearing in source are eligible for
5649
+ // "downgrading" to US-ASCII.
5650
+ return PM_SYMBOL_FLAGS_FORCED_US_ASCII_ENCODING;
5651
+ }
5652
+
5653
+ return 0;
5654
+ }
5655
+
5375
5656
  /**
5376
5657
  * Allocate and initialize a new SymbolNode node with the given unescaped
5377
5658
  * string.
5378
5659
  */
5379
5660
  static pm_symbol_node_t *
5380
- pm_symbol_node_create_unescaped(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *value, const pm_token_t *closing, const pm_string_t *unescaped) {
5661
+ pm_symbol_node_create_unescaped(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *value, const pm_token_t *closing, const pm_string_t *unescaped, pm_node_flags_t flags) {
5381
5662
  pm_symbol_node_t *node = PM_ALLOC_NODE(parser, pm_symbol_node_t);
5382
5663
 
5383
5664
  *node = (pm_symbol_node_t) {
5384
5665
  {
5385
5666
  .type = PM_SYMBOL_NODE,
5386
- .flags = PM_NODE_FLAG_STATIC_LITERAL,
5667
+ .flags = PM_NODE_FLAG_STATIC_LITERAL | flags,
5387
5668
  .location = {
5388
5669
  .start = (opening->type == PM_TOKEN_NOT_PROVIDED ? value->start : opening->start),
5389
5670
  .end = (closing->type == PM_TOKEN_NOT_PROVIDED ? value->end : closing->end)
@@ -5403,7 +5684,7 @@ pm_symbol_node_create_unescaped(pm_parser_t *parser, const pm_token_t *opening,
5403
5684
  */
5404
5685
  static inline pm_symbol_node_t *
5405
5686
  pm_symbol_node_create(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *value, const pm_token_t *closing) {
5406
- return pm_symbol_node_create_unescaped(parser, opening, value, closing, &PM_STRING_EMPTY);
5687
+ return pm_symbol_node_create_unescaped(parser, opening, value, closing, &PM_STRING_EMPTY, 0);
5407
5688
  }
5408
5689
 
5409
5690
  /**
@@ -5411,7 +5692,7 @@ pm_symbol_node_create(pm_parser_t *parser, const pm_token_t *opening, const pm_t
5411
5692
  */
5412
5693
  static pm_symbol_node_t *
5413
5694
  pm_symbol_node_create_current_string(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *value, const pm_token_t *closing) {
5414
- pm_symbol_node_t *node = pm_symbol_node_create_unescaped(parser, opening, value, closing, &parser->current_string);
5695
+ pm_symbol_node_t *node = pm_symbol_node_create_unescaped(parser, opening, value, closing, &parser->current_string, parse_symbol_encoding(parser, &parser->current_string));
5415
5696
  parser->current_string = PM_STRING_EMPTY;
5416
5697
  return node;
5417
5698
  }
@@ -5433,6 +5714,8 @@ pm_symbol_node_label_create(pm_parser_t *parser, const pm_token_t *token) {
5433
5714
 
5434
5715
  assert((label.end - label.start) >= 0);
5435
5716
  pm_string_shared_init(&node->unescaped, label.start, label.end);
5717
+ pm_node_flag_set((pm_node_t *) node, parse_symbol_encoding(parser, &node->unescaped));
5718
+
5436
5719
  break;
5437
5720
  }
5438
5721
  case PM_TOKEN_MISSING: {
@@ -5495,6 +5778,8 @@ pm_string_node_to_symbol_node(pm_parser_t *parser, pm_string_node_t *node, const
5495
5778
  .unescaped = node->unescaped
5496
5779
  };
5497
5780
 
5781
+ pm_node_flag_set((pm_node_t *)new_node, parse_symbol_encoding(parser, &node->unescaped));
5782
+
5498
5783
  // We are explicitly _not_ using pm_node_destroy here because we don't want
5499
5784
  // to trash the unescaped string. We could instead copy the string if we
5500
5785
  // know that it is owned, but we're taking the fast path for now.
@@ -5885,6 +6170,7 @@ pm_parser_scope_push(pm_parser_t *parser, bool closed) {
5885
6170
  .closed = closed,
5886
6171
  .explicit_params = false,
5887
6172
  .numbered_parameters = 0,
6173
+ .forwarding_params = 0,
5888
6174
  };
5889
6175
 
5890
6176
  pm_constant_id_list_init(&scope->locals);
@@ -5893,6 +6179,76 @@ pm_parser_scope_push(pm_parser_t *parser, bool closed) {
5893
6179
  return true;
5894
6180
  }
5895
6181
 
6182
+ static void
6183
+ pm_parser_scope_forwarding_param_check(pm_parser_t *parser, const pm_token_t * token, const uint8_t mask, pm_diagnostic_id_t diag)
6184
+ {
6185
+ pm_scope_t *scope = parser->current_scope;
6186
+ while (scope) {
6187
+ if (scope->forwarding_params & mask) {
6188
+ if (!scope->closed) {
6189
+ pm_parser_err_token(parser, token, diag);
6190
+ return;
6191
+ }
6192
+ return;
6193
+ }
6194
+ if (scope->closed) break;
6195
+ scope = scope->previous;
6196
+ }
6197
+
6198
+ pm_parser_err_token(parser, token, diag);
6199
+ }
6200
+
6201
+ static inline void
6202
+ pm_parser_scope_forwarding_block_check(pm_parser_t *parser, const pm_token_t * token)
6203
+ {
6204
+ pm_parser_scope_forwarding_param_check(parser, token, PM_FORWARDING_BLOCK, PM_ERR_ARGUMENT_NO_FORWARDING_AMP);
6205
+ }
6206
+
6207
+ static void
6208
+ pm_parser_scope_forwarding_positionals_check(pm_parser_t *parser, const pm_token_t * token)
6209
+ {
6210
+ pm_parser_scope_forwarding_param_check(parser, token, PM_FORWARDING_POSITIONALS, PM_ERR_ARGUMENT_NO_FORWARDING_STAR);
6211
+ }
6212
+
6213
+ static inline void
6214
+ pm_parser_scope_forwarding_all_check(pm_parser_t *parser, const pm_token_t * token)
6215
+ {
6216
+ pm_parser_scope_forwarding_param_check(parser, token, PM_FORWARDING_ALL, PM_ERR_ARGUMENT_NO_FORWARDING_ELLIPSES);
6217
+ }
6218
+
6219
+ static inline void
6220
+ pm_parser_scope_forwarding_keywords_check(pm_parser_t *parser, const pm_token_t * token)
6221
+ {
6222
+ pm_parser_scope_forwarding_param_check(parser, token, PM_FORWARDING_KEYWORDS, PM_ERR_EXPECT_EXPRESSION_AFTER_SPLAT_HASH);
6223
+ }
6224
+
6225
+ /**
6226
+ * Save the current param name as the return value and set it to the given
6227
+ * constant id.
6228
+ */
6229
+ static inline pm_constant_id_t
6230
+ pm_parser_current_param_name_set(pm_parser_t *parser, pm_constant_id_t current_param_name) {
6231
+ pm_constant_id_t saved_param_name = parser->current_param_name;
6232
+ parser->current_param_name = current_param_name;
6233
+ return saved_param_name;
6234
+ }
6235
+
6236
+ /**
6237
+ * Save the current param name as the return value and clear it.
6238
+ */
6239
+ static inline pm_constant_id_t
6240
+ pm_parser_current_param_name_unset(pm_parser_t *parser) {
6241
+ return pm_parser_current_param_name_set(parser, PM_CONSTANT_ID_UNSET);
6242
+ }
6243
+
6244
+ /**
6245
+ * Restore the current param name from the given value.
6246
+ */
6247
+ static inline void
6248
+ pm_parser_current_param_name_restore(pm_parser_t *parser, pm_constant_id_t saved_param_name) {
6249
+ parser->current_param_name = saved_param_name;
6250
+ }
6251
+
5896
6252
  /**
5897
6253
  * Check if any of the currently visible scopes contain a local variable
5898
6254
  * described by the given constant id.
@@ -5969,26 +6325,41 @@ pm_parser_local_add_owned(pm_parser_t *parser, const uint8_t *start, size_t leng
5969
6325
  return constant_id;
5970
6326
  }
5971
6327
 
6328
+ /**
6329
+ * Add a local variable from a constant string to the current scope.
6330
+ */
6331
+ static pm_constant_id_t
6332
+ pm_parser_local_add_constant(pm_parser_t *parser, const char *start, size_t length) {
6333
+ pm_constant_id_t constant_id = pm_parser_constant_id_constant(parser, start, length);
6334
+ if (constant_id != 0) pm_parser_local_add(parser, constant_id);
6335
+ return constant_id;
6336
+ }
6337
+
5972
6338
  /**
5973
6339
  * Add a parameter name to the current scope and check whether the name of the
5974
6340
  * parameter is unique or not.
6341
+ *
6342
+ * Returns `true` if this is a duplicate parameter name, otherwise returns
6343
+ * false.
5975
6344
  */
5976
- static void
6345
+ static bool
5977
6346
  pm_parser_parameter_name_check(pm_parser_t *parser, const pm_token_t *name) {
5978
6347
  // We want to check whether the parameter name is a numbered parameter or
5979
6348
  // not.
5980
6349
  pm_refute_numbered_parameter(parser, name->start, name->end);
5981
6350
 
5982
- // We want to ignore any parameter name that starts with an underscore.
5983
- if ((name->start < name->end) && (*name->start == '_')) return;
5984
-
5985
6351
  // Otherwise we'll fetch the constant id for the parameter name and check
5986
6352
  // whether it's already in the current scope.
5987
6353
  pm_constant_id_t constant_id = pm_parser_constant_id_token(parser, name);
5988
6354
 
5989
6355
  if (pm_constant_id_list_includes(&parser->current_scope->locals, constant_id)) {
5990
- pm_parser_err_token(parser, name, PM_ERR_PARAMETER_NAME_REPEAT);
6356
+ // Add an error if the parameter doesn't start with _ and has been seen before
6357
+ if ((name->start < name->end) && (*name->start != '_')) {
6358
+ pm_parser_err_token(parser, name, PM_ERR_PARAMETER_NAME_REPEAT);
6359
+ }
6360
+ return true;
5991
6361
  }
6362
+ return false;
5992
6363
  }
5993
6364
 
5994
6365
  /**
@@ -6003,105 +6374,6 @@ pm_parser_scope_pop(pm_parser_t *parser) {
6003
6374
  free(scope);
6004
6375
  }
6005
6376
 
6006
- /******************************************************************************/
6007
- /* Basic character checks */
6008
- /******************************************************************************/
6009
-
6010
- /**
6011
- * This function is used extremely frequently to lex all of the identifiers in a
6012
- * source file, so it's important that it be as fast as possible. For this
6013
- * reason we have the encoding_changed boolean to check if we need to go through
6014
- * the function pointer or can just directly use the UTF-8 functions.
6015
- */
6016
- static inline size_t
6017
- char_is_identifier_start(pm_parser_t *parser, const uint8_t *b) {
6018
- if (parser->encoding_changed) {
6019
- size_t width;
6020
- if ((width = parser->encoding->alpha_char(b, parser->end - b)) != 0) {
6021
- return width;
6022
- } else if (*b == '_') {
6023
- return 1;
6024
- } else if (*b >= 0x80) {
6025
- return parser->encoding->char_width(b, parser->end - b);
6026
- } else {
6027
- return 0;
6028
- }
6029
- } else if (*b < 0x80) {
6030
- return (pm_encoding_unicode_table[*b] & PRISM_ENCODING_ALPHABETIC_BIT ? 1 : 0) || (*b == '_');
6031
- } else {
6032
- return (size_t) (pm_encoding_utf_8_alpha_char(b, parser->end - b) || 1u);
6033
- }
6034
- }
6035
-
6036
- /**
6037
- * Similar to char_is_identifier but this function assumes that the encoding
6038
- * has not been changed.
6039
- */
6040
- static inline size_t
6041
- char_is_identifier_utf8(const uint8_t *b, const uint8_t *end) {
6042
- if (*b < 0x80) {
6043
- return (*b == '_') || (pm_encoding_unicode_table[*b] & PRISM_ENCODING_ALPHANUMERIC_BIT ? 1 : 0);
6044
- } else {
6045
- return (size_t) (pm_encoding_utf_8_alnum_char(b, end - b) || 1u);
6046
- }
6047
- }
6048
-
6049
- /**
6050
- * Like the above, this function is also used extremely frequently to lex all of
6051
- * the identifiers in a source file once the first character has been found. So
6052
- * it's important that it be as fast as possible.
6053
- */
6054
- static inline size_t
6055
- char_is_identifier(pm_parser_t *parser, const uint8_t *b) {
6056
- if (parser->encoding_changed) {
6057
- size_t width;
6058
- if ((width = parser->encoding->alnum_char(b, parser->end - b)) != 0) {
6059
- return width;
6060
- } else if (*b == '_') {
6061
- return 1;
6062
- } else if (*b >= 0x80) {
6063
- return parser->encoding->char_width(b, parser->end - b);
6064
- } else {
6065
- return 0;
6066
- }
6067
- }
6068
- return char_is_identifier_utf8(b, parser->end);
6069
- }
6070
-
6071
- // Here we're defining a perfect hash for the characters that are allowed in
6072
- // global names. This is used to quickly check the next character after a $ to
6073
- // see if it's a valid character for a global name.
6074
- #define BIT(c, idx) (((c) / 32 - 1 == idx) ? (1U << ((c) % 32)) : 0)
6075
- #define PUNCT(idx) ( \
6076
- BIT('~', idx) | BIT('*', idx) | BIT('$', idx) | BIT('?', idx) | \
6077
- BIT('!', idx) | BIT('@', idx) | BIT('/', idx) | BIT('\\', idx) | \
6078
- BIT(';', idx) | BIT(',', idx) | BIT('.', idx) | BIT('=', idx) | \
6079
- BIT(':', idx) | BIT('<', idx) | BIT('>', idx) | BIT('\"', idx) | \
6080
- BIT('&', idx) | BIT('`', idx) | BIT('\'', idx) | BIT('+', idx) | \
6081
- BIT('0', idx))
6082
-
6083
- const unsigned int pm_global_name_punctuation_hash[(0x7e - 0x20 + 31) / 32] = { PUNCT(0), PUNCT(1), PUNCT(2) };
6084
-
6085
- #undef BIT
6086
- #undef PUNCT
6087
-
6088
- static inline bool
6089
- char_is_global_name_punctuation(const uint8_t b) {
6090
- const unsigned int i = (const unsigned int) b;
6091
- if (i <= 0x20 || 0x7e < i) return false;
6092
-
6093
- return (pm_global_name_punctuation_hash[(i - 0x20) / 32] >> (i % 32)) & 1;
6094
- }
6095
-
6096
- static inline bool
6097
- token_is_setter_name(pm_token_t *token) {
6098
- return (
6099
- (token->type == PM_TOKEN_IDENTIFIER) &&
6100
- (token->end - token->start >= 2) &&
6101
- (token->end[-1] == '=')
6102
- );
6103
- }
6104
-
6105
6377
  /******************************************************************************/
6106
6378
  /* Stack helpers */
6107
6379
  /******************************************************************************/
@@ -6317,8 +6589,10 @@ parser_lex_magic_comment_encoding(pm_parser_t *parser) {
6317
6589
  */
6318
6590
  static void
6319
6591
  parser_lex_magic_comment_frozen_string_literal_value(pm_parser_t *parser, const uint8_t *start, const uint8_t *end) {
6320
- if (start + 4 <= end && pm_strncasecmp(start, (const uint8_t *) "true", 4) == 0) {
6592
+ if ((start + 4 <= end) && pm_strncasecmp(start, (const uint8_t *) "true", 4) == 0) {
6321
6593
  parser->frozen_string_literal = true;
6594
+ } else if ((start + 5 <= end) && pm_strncasecmp(start, (const uint8_t *) "false", 5) == 0) {
6595
+ parser->frozen_string_literal = false;
6322
6596
  }
6323
6597
  }
6324
6598
 
@@ -6541,21 +6815,27 @@ context_terminator(pm_context_t context, pm_token_t *token) {
6541
6815
  return token->type == PM_TOKEN_BRACE_RIGHT;
6542
6816
  case PM_CONTEXT_PREDICATE:
6543
6817
  return token->type == PM_TOKEN_KEYWORD_THEN || token->type == PM_TOKEN_NEWLINE || token->type == PM_TOKEN_SEMICOLON;
6818
+ case PM_CONTEXT_NONE:
6819
+ return false;
6544
6820
  }
6545
6821
 
6546
6822
  return false;
6547
6823
  }
6548
6824
 
6549
- static bool
6550
- context_recoverable(pm_parser_t *parser, pm_token_t *token) {
6825
+ /**
6826
+ * Returns the context that the given token is found to be terminating, or
6827
+ * returns PM_CONTEXT_NONE.
6828
+ */
6829
+ static pm_context_t
6830
+ context_recoverable(const pm_parser_t *parser, pm_token_t *token) {
6551
6831
  pm_context_node_t *context_node = parser->current_context;
6552
6832
 
6553
6833
  while (context_node != NULL) {
6554
- if (context_terminator(context_node->context, token)) return true;
6834
+ if (context_terminator(context_node->context, token)) return context_node->context;
6555
6835
  context_node = context_node->prev;
6556
6836
  }
6557
6837
 
6558
- return false;
6838
+ return PM_CONTEXT_NONE;
6559
6839
  }
6560
6840
 
6561
6841
  static bool
@@ -6583,7 +6863,7 @@ context_pop(pm_parser_t *parser) {
6583
6863
  }
6584
6864
 
6585
6865
  static bool
6586
- context_p(pm_parser_t *parser, pm_context_t context) {
6866
+ context_p(const pm_parser_t *parser, pm_context_t context) {
6587
6867
  pm_context_node_t *context_node = parser->current_context;
6588
6868
 
6589
6869
  while (context_node != NULL) {
@@ -6595,7 +6875,7 @@ context_p(pm_parser_t *parser, pm_context_t context) {
6595
6875
  }
6596
6876
 
6597
6877
  static bool
6598
- context_def_p(pm_parser_t *parser) {
6878
+ context_def_p(const pm_parser_t *parser) {
6599
6879
  pm_context_node_t *context_node = parser->current_context;
6600
6880
 
6601
6881
  while (context_node != NULL) {
@@ -6618,6 +6898,55 @@ context_def_p(pm_parser_t *parser) {
6618
6898
  return false;
6619
6899
  }
6620
6900
 
6901
+ /**
6902
+ * Returns a human readable string for the given context, used in error
6903
+ * messages.
6904
+ */
6905
+ static const char *
6906
+ context_human(pm_context_t context) {
6907
+ switch (context) {
6908
+ case PM_CONTEXT_NONE:
6909
+ assert(false && "unreachable");
6910
+ return "";
6911
+ case PM_CONTEXT_BEGIN: return "begin statement";
6912
+ case PM_CONTEXT_BLOCK_BRACES: return "'{'..'}' block";
6913
+ case PM_CONTEXT_BLOCK_KEYWORDS: return "'do'..'end' block";
6914
+ case PM_CONTEXT_CASE_WHEN: return "'when' clause";
6915
+ case PM_CONTEXT_CASE_IN: return "'in' clause";
6916
+ case PM_CONTEXT_CLASS: return "class definition";
6917
+ case PM_CONTEXT_DEF: return "method definition";
6918
+ case PM_CONTEXT_DEF_PARAMS: return "method parameters";
6919
+ case PM_CONTEXT_DEFAULT_PARAMS: return "parameter default value";
6920
+ case PM_CONTEXT_ELSE: return "'else' clause";
6921
+ case PM_CONTEXT_ELSIF: return "'elsif' clause";
6922
+ case PM_CONTEXT_EMBEXPR: return "embedded expression";
6923
+ case PM_CONTEXT_ENSURE: return "'ensure' clause";
6924
+ case PM_CONTEXT_ENSURE_DEF: return "'ensure' clause";
6925
+ case PM_CONTEXT_FOR: return "for loop";
6926
+ case PM_CONTEXT_FOR_INDEX: return "for loop index";
6927
+ case PM_CONTEXT_IF: return "if statement";
6928
+ case PM_CONTEXT_LAMBDA_BRACES: return "'{'..'}' lambda block";
6929
+ case PM_CONTEXT_LAMBDA_DO_END: return "'do'..'end' lambda block";
6930
+ case PM_CONTEXT_MAIN: return "top level context";
6931
+ case PM_CONTEXT_MODULE: return "module definition";
6932
+ case PM_CONTEXT_PARENS: return "parentheses";
6933
+ case PM_CONTEXT_POSTEXE: return "'END' block";
6934
+ case PM_CONTEXT_PREDICATE: return "predicate";
6935
+ case PM_CONTEXT_PREEXE: return "'BEGIN' block";
6936
+ case PM_CONTEXT_RESCUE_ELSE: return "'else' clause";
6937
+ case PM_CONTEXT_RESCUE_ELSE_DEF: return "'else' clause";
6938
+ case PM_CONTEXT_RESCUE: return "'rescue' clause";
6939
+ case PM_CONTEXT_RESCUE_DEF: return "'rescue' clause";
6940
+ case PM_CONTEXT_SCLASS: return "singleton class definition";
6941
+ case PM_CONTEXT_UNLESS: return "unless statement";
6942
+ case PM_CONTEXT_UNTIL: return "until statement";
6943
+ case PM_CONTEXT_WHILE: return "while statement";
6944
+ }
6945
+
6946
+ assert(false && "unreachable");
6947
+ return "";
6948
+ }
6949
+
6621
6950
  /******************************************************************************/
6622
6951
  /* Specific token lexers */
6623
6952
  /******************************************************************************/
@@ -7360,6 +7689,28 @@ escape_write_byte_encoded(pm_parser_t *parser, pm_buffer_t *buffer, uint8_t byte
7360
7689
  pm_buffer_append_byte(buffer, byte);
7361
7690
  }
7362
7691
 
7692
+ /**
7693
+ * Write each byte of the given escaped character into the buffer.
7694
+ */
7695
+ static inline void
7696
+ escape_write_escape_encoded(pm_parser_t *parser, pm_buffer_t *buffer) {
7697
+ size_t width;
7698
+ if (parser->encoding_changed) {
7699
+ width = parser->encoding->char_width(parser->current.end, parser->end - parser->current.end);
7700
+ } else {
7701
+ width = pm_encoding_utf_8_char_width(parser->current.end, parser->end - parser->current.end);
7702
+ }
7703
+
7704
+ // TODO: If the character is invalid in the given encoding, then we'll just
7705
+ // push one byte into the buffer. This should actually be an error.
7706
+ width = (width == 0) ? 1 : width;
7707
+
7708
+ for (size_t index = 0; index < width; index++) {
7709
+ escape_write_byte_encoded(parser, buffer, *parser->current.end);
7710
+ parser->current.end++;
7711
+ }
7712
+ }
7713
+
7363
7714
  /**
7364
7715
  * The regular expression engine doesn't support the same escape sequences as
7365
7716
  * Ruby does. So first we have to read the escape sequence, and then we have to
@@ -7698,7 +8049,7 @@ escape_read(pm_parser_t *parser, pm_buffer_t *buffer, uint8_t flags) {
7698
8049
  /* fallthrough */
7699
8050
  default: {
7700
8051
  if (parser->current.end < parser->end) {
7701
- escape_write_byte_encoded(parser, buffer, *parser->current.end++);
8052
+ escape_write_escape_encoded(parser, buffer);
7702
8053
  }
7703
8054
  return;
7704
8055
  }
@@ -7975,14 +8326,43 @@ typedef struct {
7975
8326
  * Push the given byte into the token buffer.
7976
8327
  */
7977
8328
  static inline void
7978
- pm_token_buffer_push(pm_token_buffer_t *token_buffer, uint8_t byte) {
8329
+ pm_token_buffer_push_byte(pm_token_buffer_t *token_buffer, uint8_t byte) {
7979
8330
  pm_buffer_append_byte(&token_buffer->buffer, byte);
7980
8331
  }
7981
8332
 
8333
+ /**
8334
+ * Append the given bytes into the token buffer.
8335
+ */
8336
+ static inline void
8337
+ pm_token_buffer_push_bytes(pm_token_buffer_t *token_buffer, const uint8_t *bytes, size_t length) {
8338
+ pm_buffer_append_bytes(&token_buffer->buffer, bytes, length);
8339
+ }
8340
+
8341
+ /**
8342
+ * Push an escaped character into the token buffer.
8343
+ */
8344
+ static inline void
8345
+ pm_token_buffer_push_escaped(pm_token_buffer_t *token_buffer, pm_parser_t *parser) {
8346
+ // First, determine the width of the character to be escaped.
8347
+ size_t width;
8348
+ if (parser->encoding_changed) {
8349
+ width = parser->encoding->char_width(parser->current.end, parser->end - parser->current.end);
8350
+ } else {
8351
+ width = pm_encoding_utf_8_char_width(parser->current.end, parser->end - parser->current.end);
8352
+ }
8353
+
8354
+ // TODO: If the character is invalid in the given encoding, then we'll just
8355
+ // push one byte into the buffer. This should actually be an error.
8356
+ width = (width == 0 ? 1 : width);
8357
+
8358
+ // Now, push the bytes into the buffer.
8359
+ pm_token_buffer_push_bytes(token_buffer, parser->current.end, width);
8360
+ parser->current.end += width;
8361
+ }
8362
+
7982
8363
  /**
7983
8364
  * When we're about to return from lexing the current token and we know for sure
7984
8365
  * that we have found an escape sequence, this function is called to copy the
7985
- *
7986
8366
  * contents of the token buffer into the current string on the parser so that it
7987
8367
  * can be attached to the correct node.
7988
8368
  */
@@ -7997,7 +8377,6 @@ pm_token_buffer_copy(pm_parser_t *parser, pm_token_buffer_t *token_buffer) {
7997
8377
  * string. If we haven't pushed anything into the buffer, this means that we
7998
8378
  * never found an escape sequence, so we can directly reference the bounds of
7999
8379
  * the current string. Either way, at the return of this function it is expected
8000
- *
8001
8380
  * that parser->current_string is established in such a way that it can be
8002
8381
  * attached to a node.
8003
8382
  */
@@ -8016,7 +8395,6 @@ pm_token_buffer_flush(pm_parser_t *parser, pm_token_buffer_t *token_buffer) {
8016
8395
  * point into the buffer because we're about to provide a string that has
8017
8396
  * different content than a direct slice of the source.
8018
8397
  *
8019
- *
8020
8398
  * It is expected that the parser's current token end will be pointing at one
8021
8399
  * byte past the backslash that starts the escape sequence.
8022
8400
  */
@@ -8070,6 +8448,34 @@ pm_heredoc_strspn_inline_whitespace(pm_parser_t *parser, const uint8_t **cursor,
8070
8448
  return whitespace;
8071
8449
  }
8072
8450
 
8451
+ /**
8452
+ * Lex past the delimiter of a percent literal. Handle newlines and heredocs
8453
+ * appropriately.
8454
+ */
8455
+ static uint8_t
8456
+ pm_lex_percent_delimiter(pm_parser_t *parser) {
8457
+ size_t eol_length = match_eol(parser);
8458
+
8459
+ if (eol_length) {
8460
+ if (parser->heredoc_end) {
8461
+ // If we have already lexed a heredoc, then the newline has already
8462
+ // been added to the list. In this case we want to just flush the
8463
+ // heredoc end.
8464
+ parser_flush_heredoc_end(parser);
8465
+ } else {
8466
+ // Otherwise, we'll add the newline to the list of newlines.
8467
+ pm_newline_list_append(&parser->newline_list, parser->current.end + eol_length - 1);
8468
+ }
8469
+
8470
+ const uint8_t delimiter = *parser->current.end;
8471
+ parser->current.end += eol_length;
8472
+
8473
+ return delimiter;
8474
+ }
8475
+
8476
+ return *parser->current.end++;
8477
+ }
8478
+
8073
8479
  /**
8074
8480
  * This is a convenience macro that will set the current token type, call the
8075
8481
  * lex callback, and then return from the parser_lex function.
@@ -8635,7 +9041,7 @@ parser_lex(pm_parser_t *parser) {
8635
9041
  // this is not a valid heredoc declaration. In this case we
8636
9042
  // will add an error, but we will still return a heredoc
8637
9043
  // start.
8638
- pm_parser_err_current(parser, PM_ERR_EMBDOC_TERM);
9044
+ pm_parser_err_current(parser, PM_ERR_HEREDOC_TERM);
8639
9045
  body_start = parser->end;
8640
9046
  } else {
8641
9047
  // Otherwise, we want to indicate that the body of the
@@ -8826,12 +9232,10 @@ parser_lex(pm_parser_t *parser) {
8826
9232
  LEX(PM_TOKEN_PLUS_EQUAL);
8827
9233
  }
8828
9234
 
8829
- bool spcarg = lex_state_spcarg_p(parser, space_seen);
8830
- if (spcarg) {
8831
- pm_parser_warn_token(parser, &parser->current, PM_WARN_AMBIGUOUS_FIRST_ARGUMENT_PLUS);
8832
- }
8833
-
8834
- if (lex_state_beg_p(parser) || spcarg) {
9235
+ if (
9236
+ lex_state_beg_p(parser) ||
9237
+ (lex_state_spcarg_p(parser, space_seen) ? (pm_parser_warn_token(parser, &parser->current, PM_WARN_AMBIGUOUS_FIRST_ARGUMENT_PLUS), true) : false)
9238
+ ) {
8835
9239
  lex_state_set(parser, PM_LEX_STATE_BEG);
8836
9240
 
8837
9241
  if (pm_char_is_decimal_digit(peek(parser))) {
@@ -8871,11 +9275,12 @@ parser_lex(pm_parser_t *parser) {
8871
9275
  }
8872
9276
 
8873
9277
  bool spcarg = lex_state_spcarg_p(parser, space_seen);
8874
- if (spcarg) {
9278
+ bool is_beg = lex_state_beg_p(parser);
9279
+ if (!is_beg && spcarg) {
8875
9280
  pm_parser_warn_token(parser, &parser->current, PM_WARN_AMBIGUOUS_FIRST_ARGUMENT_MINUS);
8876
9281
  }
8877
9282
 
8878
- if (lex_state_beg_p(parser) || spcarg) {
9283
+ if (is_beg || spcarg) {
8879
9284
  lex_state_set(parser, PM_LEX_STATE_BEG);
8880
9285
  LEX(pm_char_is_decimal_digit(peek(parser)) ? PM_TOKEN_UMINUS_NUM : PM_TOKEN_UMINUS);
8881
9286
  }
@@ -9026,15 +9431,8 @@ parser_lex(pm_parser_t *parser) {
9026
9431
  pm_parser_err_current(parser, PM_ERR_INVALID_PERCENT);
9027
9432
  }
9028
9433
 
9029
- lex_mode_push_string(parser, true, false, lex_mode_incrementor(*parser->current.end), lex_mode_terminator(*parser->current.end));
9030
-
9031
- size_t eol_length = match_eol(parser);
9032
- if (eol_length) {
9033
- parser->current.end += eol_length;
9034
- pm_newline_list_append(&parser->newline_list, parser->current.end - 1);
9035
- } else {
9036
- parser->current.end++;
9037
- }
9434
+ const uint8_t delimiter = pm_lex_percent_delimiter(parser);
9435
+ lex_mode_push_string(parser, true, false, lex_mode_incrementor(delimiter), lex_mode_terminator(delimiter));
9038
9436
 
9039
9437
  if (parser->current.end < parser->end) {
9040
9438
  LEX(PM_TOKEN_STRING_BEGIN);
@@ -9054,7 +9452,7 @@ parser_lex(pm_parser_t *parser) {
9054
9452
  parser->current.end++;
9055
9453
 
9056
9454
  if (parser->current.end < parser->end) {
9057
- lex_mode_push_list(parser, false, *parser->current.end++);
9455
+ lex_mode_push_list(parser, false, pm_lex_percent_delimiter(parser));
9058
9456
  } else {
9059
9457
  lex_mode_push_list_eof(parser);
9060
9458
  }
@@ -9065,7 +9463,7 @@ parser_lex(pm_parser_t *parser) {
9065
9463
  parser->current.end++;
9066
9464
 
9067
9465
  if (parser->current.end < parser->end) {
9068
- lex_mode_push_list(parser, true, *parser->current.end++);
9466
+ lex_mode_push_list(parser, true, pm_lex_percent_delimiter(parser));
9069
9467
  } else {
9070
9468
  lex_mode_push_list_eof(parser);
9071
9469
  }
@@ -9076,9 +9474,8 @@ parser_lex(pm_parser_t *parser) {
9076
9474
  parser->current.end++;
9077
9475
 
9078
9476
  if (parser->current.end < parser->end) {
9079
- lex_mode_push_regexp(parser, lex_mode_incrementor(*parser->current.end), lex_mode_terminator(*parser->current.end));
9080
- pm_newline_list_check_append(&parser->newline_list, parser->current.end);
9081
- parser->current.end++;
9477
+ const uint8_t delimiter = pm_lex_percent_delimiter(parser);
9478
+ lex_mode_push_regexp(parser, lex_mode_incrementor(delimiter), lex_mode_terminator(delimiter));
9082
9479
  } else {
9083
9480
  lex_mode_push_regexp(parser, '\0', '\0');
9084
9481
  }
@@ -9089,9 +9486,8 @@ parser_lex(pm_parser_t *parser) {
9089
9486
  parser->current.end++;
9090
9487
 
9091
9488
  if (parser->current.end < parser->end) {
9092
- lex_mode_push_string(parser, false, false, lex_mode_incrementor(*parser->current.end), lex_mode_terminator(*parser->current.end));
9093
- pm_newline_list_check_append(&parser->newline_list, parser->current.end);
9094
- parser->current.end++;
9489
+ const uint8_t delimiter = pm_lex_percent_delimiter(parser);
9490
+ lex_mode_push_string(parser, false, false, lex_mode_incrementor(delimiter), lex_mode_terminator(delimiter));
9095
9491
  } else {
9096
9492
  lex_mode_push_string_eof(parser);
9097
9493
  }
@@ -9102,9 +9498,8 @@ parser_lex(pm_parser_t *parser) {
9102
9498
  parser->current.end++;
9103
9499
 
9104
9500
  if (parser->current.end < parser->end) {
9105
- lex_mode_push_string(parser, true, false, lex_mode_incrementor(*parser->current.end), lex_mode_terminator(*parser->current.end));
9106
- pm_newline_list_check_append(&parser->newline_list, parser->current.end);
9107
- parser->current.end++;
9501
+ const uint8_t delimiter = pm_lex_percent_delimiter(parser);
9502
+ lex_mode_push_string(parser, true, false, lex_mode_incrementor(delimiter), lex_mode_terminator(delimiter));
9108
9503
  } else {
9109
9504
  lex_mode_push_string_eof(parser);
9110
9505
  }
@@ -9115,9 +9510,9 @@ parser_lex(pm_parser_t *parser) {
9115
9510
  parser->current.end++;
9116
9511
 
9117
9512
  if (parser->current.end < parser->end) {
9118
- lex_mode_push_string(parser, false, false, lex_mode_incrementor(*parser->current.end), lex_mode_terminator(*parser->current.end));
9513
+ const uint8_t delimiter = pm_lex_percent_delimiter(parser);
9514
+ lex_mode_push_string(parser, false, false, lex_mode_incrementor(delimiter), lex_mode_terminator(delimiter));
9119
9515
  lex_state_set(parser, PM_LEX_STATE_FNAME | PM_LEX_STATE_FITEM);
9120
- parser->current.end++;
9121
9516
  } else {
9122
9517
  lex_mode_push_string_eof(parser);
9123
9518
  }
@@ -9128,7 +9523,7 @@ parser_lex(pm_parser_t *parser) {
9128
9523
  parser->current.end++;
9129
9524
 
9130
9525
  if (parser->current.end < parser->end) {
9131
- lex_mode_push_list(parser, false, *parser->current.end++);
9526
+ lex_mode_push_list(parser, false, pm_lex_percent_delimiter(parser));
9132
9527
  } else {
9133
9528
  lex_mode_push_list_eof(parser);
9134
9529
  }
@@ -9139,7 +9534,7 @@ parser_lex(pm_parser_t *parser) {
9139
9534
  parser->current.end++;
9140
9535
 
9141
9536
  if (parser->current.end < parser->end) {
9142
- lex_mode_push_list(parser, true, *parser->current.end++);
9537
+ lex_mode_push_list(parser, true, pm_lex_percent_delimiter(parser));
9143
9538
  } else {
9144
9539
  lex_mode_push_list_eof(parser);
9145
9540
  }
@@ -9150,8 +9545,8 @@ parser_lex(pm_parser_t *parser) {
9150
9545
  parser->current.end++;
9151
9546
 
9152
9547
  if (parser->current.end < parser->end) {
9153
- lex_mode_push_string(parser, true, false, lex_mode_incrementor(*parser->current.end), lex_mode_terminator(*parser->current.end));
9154
- parser->current.end++;
9548
+ const uint8_t delimiter = pm_lex_percent_delimiter(parser);
9549
+ lex_mode_push_string(parser, true, false, lex_mode_incrementor(delimiter), lex_mode_terminator(delimiter));
9155
9550
  } else {
9156
9551
  lex_mode_push_string_eof(parser);
9157
9552
  }
@@ -9377,18 +9772,18 @@ parser_lex(pm_parser_t *parser) {
9377
9772
  case '\t':
9378
9773
  case '\v':
9379
9774
  case '\\':
9380
- pm_token_buffer_push(&token_buffer, peeked);
9775
+ pm_token_buffer_push_byte(&token_buffer, peeked);
9381
9776
  parser->current.end++;
9382
9777
  break;
9383
9778
  case '\r':
9384
9779
  parser->current.end++;
9385
9780
  if (peek(parser) != '\n') {
9386
- pm_token_buffer_push(&token_buffer, '\r');
9781
+ pm_token_buffer_push_byte(&token_buffer, '\r');
9387
9782
  break;
9388
9783
  }
9389
9784
  /* fallthrough */
9390
9785
  case '\n':
9391
- pm_token_buffer_push(&token_buffer, '\n');
9786
+ pm_token_buffer_push_byte(&token_buffer, '\n');
9392
9787
 
9393
9788
  if (parser->heredoc_end) {
9394
9789
  // ... if we are on the same line as a heredoc,
@@ -9406,14 +9801,13 @@ parser_lex(pm_parser_t *parser) {
9406
9801
  break;
9407
9802
  default:
9408
9803
  if (peeked == lex_mode->as.list.incrementor || peeked == lex_mode->as.list.terminator) {
9409
- pm_token_buffer_push(&token_buffer, peeked);
9804
+ pm_token_buffer_push_byte(&token_buffer, peeked);
9410
9805
  parser->current.end++;
9411
9806
  } else if (lex_mode->as.list.interpolation) {
9412
9807
  escape_read(parser, &token_buffer.buffer, PM_ESCAPE_FLAG_NONE);
9413
9808
  } else {
9414
- pm_token_buffer_push(&token_buffer, '\\');
9415
- pm_token_buffer_push(&token_buffer, peeked);
9416
- parser->current.end++;
9809
+ pm_token_buffer_push_byte(&token_buffer, '\\');
9810
+ pm_token_buffer_push_escaped(&token_buffer, parser);
9417
9811
  }
9418
9812
 
9419
9813
  break;
@@ -9571,9 +9965,9 @@ parser_lex(pm_parser_t *parser) {
9571
9965
  parser->current.end++;
9572
9966
  if (peek(parser) != '\n') {
9573
9967
  if (lex_mode->as.regexp.terminator != '\r') {
9574
- pm_token_buffer_push(&token_buffer, '\\');
9968
+ pm_token_buffer_push_byte(&token_buffer, '\\');
9575
9969
  }
9576
- pm_token_buffer_push(&token_buffer, '\r');
9970
+ pm_token_buffer_push_byte(&token_buffer, '\r');
9577
9971
  break;
9578
9972
  }
9579
9973
  /* fallthrough */
@@ -9608,20 +10002,19 @@ parser_lex(pm_parser_t *parser) {
9608
10002
  case '$': case ')': case '*': case '+':
9609
10003
  case '.': case '>': case '?': case ']':
9610
10004
  case '^': case '|': case '}':
9611
- pm_token_buffer_push(&token_buffer, '\\');
10005
+ pm_token_buffer_push_byte(&token_buffer, '\\');
9612
10006
  break;
9613
10007
  default:
9614
10008
  break;
9615
10009
  }
9616
10010
 
9617
- pm_token_buffer_push(&token_buffer, peeked);
10011
+ pm_token_buffer_push_byte(&token_buffer, peeked);
9618
10012
  parser->current.end++;
9619
10013
  break;
9620
10014
  }
9621
10015
 
9622
- if (peeked < 0x80) pm_token_buffer_push(&token_buffer, '\\');
9623
- pm_token_buffer_push(&token_buffer, peeked);
9624
- parser->current.end++;
10016
+ if (peeked < 0x80) pm_token_buffer_push_byte(&token_buffer, '\\');
10017
+ pm_token_buffer_push_escaped(&token_buffer, parser);
9625
10018
  break;
9626
10019
  }
9627
10020
 
@@ -9788,23 +10181,23 @@ parser_lex(pm_parser_t *parser) {
9788
10181
 
9789
10182
  switch (peeked) {
9790
10183
  case '\\':
9791
- pm_token_buffer_push(&token_buffer, '\\');
10184
+ pm_token_buffer_push_byte(&token_buffer, '\\');
9792
10185
  parser->current.end++;
9793
10186
  break;
9794
10187
  case '\r':
9795
10188
  parser->current.end++;
9796
10189
  if (peek(parser) != '\n') {
9797
10190
  if (!lex_mode->as.string.interpolation) {
9798
- pm_token_buffer_push(&token_buffer, '\\');
10191
+ pm_token_buffer_push_byte(&token_buffer, '\\');
9799
10192
  }
9800
- pm_token_buffer_push(&token_buffer, '\r');
10193
+ pm_token_buffer_push_byte(&token_buffer, '\r');
9801
10194
  break;
9802
10195
  }
9803
10196
  /* fallthrough */
9804
10197
  case '\n':
9805
10198
  if (!lex_mode->as.string.interpolation) {
9806
- pm_token_buffer_push(&token_buffer, '\\');
9807
- pm_token_buffer_push(&token_buffer, '\n');
10199
+ pm_token_buffer_push_byte(&token_buffer, '\\');
10200
+ pm_token_buffer_push_byte(&token_buffer, '\n');
9808
10201
  }
9809
10202
 
9810
10203
  if (parser->heredoc_end) {
@@ -9823,17 +10216,16 @@ parser_lex(pm_parser_t *parser) {
9823
10216
  break;
9824
10217
  default:
9825
10218
  if (lex_mode->as.string.incrementor != '\0' && peeked == lex_mode->as.string.incrementor) {
9826
- pm_token_buffer_push(&token_buffer, peeked);
10219
+ pm_token_buffer_push_byte(&token_buffer, peeked);
9827
10220
  parser->current.end++;
9828
10221
  } else if (lex_mode->as.string.terminator != '\0' && peeked == lex_mode->as.string.terminator) {
9829
- pm_token_buffer_push(&token_buffer, peeked);
10222
+ pm_token_buffer_push_byte(&token_buffer, peeked);
9830
10223
  parser->current.end++;
9831
10224
  } else if (lex_mode->as.string.interpolation) {
9832
10225
  escape_read(parser, &token_buffer.buffer, PM_ESCAPE_FLAG_NONE);
9833
10226
  } else {
9834
- pm_token_buffer_push(&token_buffer, '\\');
9835
- pm_token_buffer_push(&token_buffer, peeked);
9836
- parser->current.end++;
10227
+ pm_token_buffer_push_byte(&token_buffer, '\\');
10228
+ pm_token_buffer_push_escaped(&token_buffer, parser);
9837
10229
  }
9838
10230
 
9839
10231
  break;
@@ -9888,15 +10280,22 @@ parser_lex(pm_parser_t *parser) {
9888
10280
  parser->next_start = NULL;
9889
10281
  }
9890
10282
 
9891
- // We'll check if we're at the end of the file. If we are, then we need to
9892
- // return the EOF token.
10283
+ // Now let's grab the information about the identifier off of the
10284
+ // current lex mode.
10285
+ pm_lex_mode_t *lex_mode = parser->lex_modes.current;
10286
+
10287
+ // We'll check if we're at the end of the file. If we are, then we
10288
+ // will add an error (because we weren't able to find the
10289
+ // terminator) but still continue parsing so that content after the
10290
+ // declaration of the heredoc can be parsed.
9893
10291
  if (parser->current.end >= parser->end) {
9894
- LEX(PM_TOKEN_EOF);
10292
+ pm_parser_err_current(parser, PM_ERR_HEREDOC_TERM);
10293
+ parser->next_start = lex_mode->as.heredoc.next_start;
10294
+ parser->heredoc_end = parser->current.end;
10295
+ lex_state_set(parser, PM_LEX_STATE_END);
10296
+ LEX(PM_TOKEN_HEREDOC_END);
9895
10297
  }
9896
10298
 
9897
- // Now let's grab the information about the identifier off of the current
9898
- // lex mode.
9899
- pm_lex_mode_t *lex_mode = parser->lex_modes.current;
9900
10299
  const uint8_t *ident_start = lex_mode->as.heredoc.ident_start;
9901
10300
  size_t ident_length = lex_mode->as.heredoc.ident_length;
9902
10301
 
@@ -10083,21 +10482,20 @@ parser_lex(pm_parser_t *parser) {
10083
10482
  case '\r':
10084
10483
  parser->current.end++;
10085
10484
  if (peek(parser) != '\n') {
10086
- pm_token_buffer_push(&token_buffer, '\\');
10087
- pm_token_buffer_push(&token_buffer, '\r');
10485
+ pm_token_buffer_push_byte(&token_buffer, '\\');
10486
+ pm_token_buffer_push_byte(&token_buffer, '\r');
10088
10487
  break;
10089
10488
  }
10090
10489
  /* fallthrough */
10091
10490
  case '\n':
10092
- pm_token_buffer_push(&token_buffer, '\\');
10093
- pm_token_buffer_push(&token_buffer, '\n');
10491
+ pm_token_buffer_push_byte(&token_buffer, '\\');
10492
+ pm_token_buffer_push_byte(&token_buffer, '\n');
10094
10493
  token_buffer.cursor = parser->current.end + 1;
10095
10494
  breakpoint = parser->current.end;
10096
10495
  continue;
10097
10496
  default:
10098
- parser->current.end++;
10099
- pm_token_buffer_push(&token_buffer, '\\');
10100
- pm_token_buffer_push(&token_buffer, peeked);
10497
+ pm_token_buffer_push_byte(&token_buffer, '\\');
10498
+ pm_token_buffer_push_escaped(&token_buffer, parser);
10101
10499
  break;
10102
10500
  }
10103
10501
  } else {
@@ -10105,7 +10503,7 @@ parser_lex(pm_parser_t *parser) {
10105
10503
  case '\r':
10106
10504
  parser->current.end++;
10107
10505
  if (peek(parser) != '\n') {
10108
- pm_token_buffer_push(&token_buffer, '\r');
10506
+ pm_token_buffer_push_byte(&token_buffer, '\r');
10109
10507
  break;
10110
10508
  }
10111
10509
  /* fallthrough */
@@ -10184,8 +10582,8 @@ parser_lex(pm_parser_t *parser) {
10184
10582
  typedef enum {
10185
10583
  PM_BINDING_POWER_UNSET = 0, // used to indicate this token cannot be used as an infix operator
10186
10584
  PM_BINDING_POWER_STATEMENT = 2,
10187
- PM_BINDING_POWER_MODIFIER = 4, // if unless until while
10188
- PM_BINDING_POWER_MODIFIER_RESCUE = 6, // rescue
10585
+ PM_BINDING_POWER_MODIFIER_RESCUE = 4, // rescue
10586
+ PM_BINDING_POWER_MODIFIER = 6, // if unless until while
10189
10587
  PM_BINDING_POWER_COMPOSITION = 8, // and or
10190
10588
  PM_BINDING_POWER_NOT = 10, // not
10191
10589
  PM_BINDING_POWER_MATCH = 12, // => in
@@ -10239,15 +10637,15 @@ typedef struct {
10239
10637
  #define RIGHT_ASSOCIATIVE_UNARY(precedence) { precedence, precedence, false, false }
10240
10638
 
10241
10639
  pm_binding_powers_t pm_binding_powers[PM_TOKEN_MAXIMUM] = {
10640
+ // rescue
10641
+ [PM_TOKEN_KEYWORD_RESCUE_MODIFIER] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_MODIFIER_RESCUE),
10642
+
10242
10643
  // if unless until while
10243
10644
  [PM_TOKEN_KEYWORD_IF_MODIFIER] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_MODIFIER),
10244
10645
  [PM_TOKEN_KEYWORD_UNLESS_MODIFIER] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_MODIFIER),
10245
10646
  [PM_TOKEN_KEYWORD_UNTIL_MODIFIER] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_MODIFIER),
10246
10647
  [PM_TOKEN_KEYWORD_WHILE_MODIFIER] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_MODIFIER),
10247
10648
 
10248
- // rescue
10249
- [PM_TOKEN_KEYWORD_RESCUE_MODIFIER] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_MODIFIER_RESCUE),
10250
-
10251
10649
  // and or
10252
10650
  [PM_TOKEN_KEYWORD_AND] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_COMPOSITION),
10253
10651
  [PM_TOKEN_KEYWORD_OR] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_COMPOSITION),
@@ -10377,16 +10775,8 @@ match3(const pm_parser_t *parser, pm_token_type_t type1, pm_token_type_t type2,
10377
10775
  * Returns true if the current token is any of the four given types.
10378
10776
  */
10379
10777
  static inline bool
10380
- match4(const pm_parser_t *parser, pm_token_type_t type1, pm_token_type_t type2, pm_token_type_t type3, pm_token_type_t type4) {
10381
- return match1(parser, type1) || match1(parser, type2) || match1(parser, type3) || match1(parser, type4);
10382
- }
10383
-
10384
- /**
10385
- * Returns true if the current token is any of the five given types.
10386
- */
10387
- static inline bool
10388
- match5(const pm_parser_t *parser, pm_token_type_t type1, pm_token_type_t type2, pm_token_type_t type3, pm_token_type_t type4, pm_token_type_t type5) {
10389
- return match1(parser, type1) || match1(parser, type2) || match1(parser, type3) || match1(parser, type4) || match1(parser, type5);
10778
+ match4(const pm_parser_t *parser, pm_token_type_t type1, pm_token_type_t type2, pm_token_type_t type3, pm_token_type_t type4) {
10779
+ return match1(parser, type1) || match1(parser, type2) || match1(parser, type3) || match1(parser, type4);
10390
10780
  }
10391
10781
 
10392
10782
  /**
@@ -10866,7 +11256,7 @@ parse_write(pm_parser_t *parser, pm_node_t *target, pm_token_t *operator, pm_nod
10866
11256
  return target;
10867
11257
  }
10868
11258
 
10869
- if (*call->message_loc.start == '_' || parser->encoding->alnum_char(call->message_loc.start, call->message_loc.end - call->message_loc.start)) {
11259
+ if (char_is_identifier_start(parser, call->message_loc.start)) {
10870
11260
  // When we get here, we have a method call, because it was
10871
11261
  // previously marked as a method call but now we have an =. This
10872
11262
  // looks like:
@@ -10984,6 +11374,7 @@ parse_targets(pm_parser_t *parser, pm_node_t *first_target, pm_binding_power_t b
10984
11374
  static pm_node_t *
10985
11375
  parse_targets_validate(pm_parser_t *parser, pm_node_t *first_target, pm_binding_power_t binding_power) {
10986
11376
  pm_node_t *result = parse_targets(parser, first_target, binding_power);
11377
+ accept1(parser, PM_TOKEN_NEWLINE);
10987
11378
 
10988
11379
  // Ensure that we have either an = or a ) after the targets.
10989
11380
  if (!match2(parser, PM_TOKEN_EQUAL, PM_TOKEN_PARENTHESIS_RIGHT)) {
@@ -11024,7 +11415,7 @@ parse_statements(pm_parser_t *parser, pm_context_t context) {
11024
11415
  break;
11025
11416
  }
11026
11417
 
11027
- // If we have a terminator, then we will parse all consequtive terminators
11418
+ // If we have a terminator, then we will parse all consecutive terminators
11028
11419
  // and then continue parsing the statements list.
11029
11420
  if (accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON)) {
11030
11421
  // If we have a terminator, then we will continue parsing the statements
@@ -11084,8 +11475,9 @@ parse_assocs(pm_parser_t *parser, pm_node_t *node) {
11084
11475
 
11085
11476
  if (token_begins_expression_p(parser->current.type)) {
11086
11477
  value = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, false, PM_ERR_EXPECT_EXPRESSION_AFTER_SPLAT_HASH);
11087
- } else if (pm_parser_local_depth(parser, &operator) == -1) {
11088
- pm_parser_err_token(parser, &operator, PM_ERR_EXPECT_EXPRESSION_AFTER_SPLAT_HASH);
11478
+ }
11479
+ else {
11480
+ pm_parser_scope_forwarding_keywords_check(parser, &operator);
11089
11481
  }
11090
11482
 
11091
11483
  element = (pm_node_t *) pm_assoc_splat_node_create(parser, value, &operator);
@@ -11234,13 +11626,8 @@ parse_arguments(pm_parser_t *parser, pm_arguments_t *arguments, bool accepts_for
11234
11626
  if (token_begins_expression_p(parser->current.type)) {
11235
11627
  expression = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, false, PM_ERR_EXPECT_ARGUMENT);
11236
11628
  } else {
11237
- if (pm_parser_local_depth(parser, &operator) == -1) {
11238
- // A block forwarding in a method having `...` parameter (e.g. `def foo(...); bar(&); end`) is available.
11239
- pm_constant_id_t ellipsis_id = pm_parser_constant_id_constant(parser, "...", 3);
11240
- if (pm_parser_local_depth_constant_id(parser, ellipsis_id) == -1) {
11241
- pm_parser_err_token(parser, &operator, PM_ERR_ARGUMENT_NO_FORWARDING_AMP);
11242
- }
11243
- }
11629
+ // A block forwarding in a method having `...` parameter (e.g. `def foo(...); bar(&); end`) is available.
11630
+ pm_parser_scope_forwarding_block_check(parser, &operator);
11244
11631
  }
11245
11632
 
11246
11633
  argument = (pm_node_t *) pm_block_argument_node_create(parser, &operator, expression);
@@ -11258,10 +11645,7 @@ parse_arguments(pm_parser_t *parser, pm_arguments_t *arguments, bool accepts_for
11258
11645
  pm_token_t operator = parser->previous;
11259
11646
 
11260
11647
  if (match4(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_TOKEN_COMMA, PM_TOKEN_SEMICOLON, PM_TOKEN_BRACKET_RIGHT)) {
11261
- if (pm_parser_local_depth(parser, &parser->previous) == -1) {
11262
- pm_parser_err_token(parser, &operator, PM_ERR_ARGUMENT_NO_FORWARDING_STAR);
11263
- }
11264
-
11648
+ pm_parser_scope_forwarding_positionals_check(parser, &operator);
11265
11649
  argument = (pm_node_t *) pm_splat_node_create(parser, &operator, NULL);
11266
11650
  } else {
11267
11651
  pm_node_t *expression = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, false, PM_ERR_EXPECT_EXPRESSION_AFTER_SPLAT);
@@ -11287,15 +11671,14 @@ parse_arguments(pm_parser_t *parser, pm_arguments_t *arguments, bool accepts_for
11287
11671
  pm_node_t *right = parse_expression(parser, PM_BINDING_POWER_RANGE, false, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR);
11288
11672
  argument = (pm_node_t *) pm_range_node_create(parser, NULL, &operator, right);
11289
11673
  } else {
11290
- if (pm_parser_local_depth(parser, &parser->previous) == -1) {
11291
- pm_parser_err_previous(parser, PM_ERR_ARGUMENT_NO_FORWARDING_ELLIPSES);
11292
- }
11674
+ pm_parser_scope_forwarding_all_check(parser, &parser->previous);
11293
11675
  if (parsed_first_argument && terminator == PM_TOKEN_EOF) {
11294
11676
  pm_parser_err_previous(parser, PM_ERR_ARGUMENT_FORWARDING_UNBOUND);
11295
11677
  }
11296
11678
 
11297
11679
  argument = (pm_node_t *) pm_forwarding_arguments_node_create(parser, &parser->previous);
11298
11680
  parse_arguments_append(parser, arguments, argument);
11681
+ arguments->has_forwarding = true;
11299
11682
  parsed_forwarding_arguments = true;
11300
11683
  break;
11301
11684
  }
@@ -11338,6 +11721,9 @@ parse_arguments(pm_parser_t *parser, pm_arguments_t *arguments, bool accepts_for
11338
11721
  }
11339
11722
 
11340
11723
  parsed_bare_hash = true;
11724
+ } else if (accept1(parser, PM_TOKEN_KEYWORD_IN)) {
11725
+ // TODO: Could we solve this with binding powers instead?
11726
+ pm_parser_err_current(parser, PM_ERR_ARGUMENT_IN);
11341
11727
  }
11342
11728
 
11343
11729
  parse_arguments_append(parser, arguments, argument);
@@ -11414,7 +11800,9 @@ parse_required_destructured_parameter(pm_parser_t *parser) {
11414
11800
  if (accept1(parser, PM_TOKEN_IDENTIFIER)) {
11415
11801
  pm_token_t name = parser->previous;
11416
11802
  value = (pm_node_t *) pm_required_parameter_node_create(parser, &name);
11417
- pm_parser_parameter_name_check(parser, &name);
11803
+ if (pm_parser_parameter_name_check(parser, &name)) {
11804
+ pm_node_flag_set_repeated_parameter(value);
11805
+ }
11418
11806
  pm_parser_local_add_token(parser, &name);
11419
11807
  }
11420
11808
 
@@ -11424,7 +11812,9 @@ parse_required_destructured_parameter(pm_parser_t *parser) {
11424
11812
  pm_token_t name = parser->previous;
11425
11813
 
11426
11814
  param = (pm_node_t *) pm_required_parameter_node_create(parser, &name);
11427
- pm_parser_parameter_name_check(parser, &name);
11815
+ if (pm_parser_parameter_name_check(parser, &name)) {
11816
+ pm_node_flag_set_repeated_parameter(param);
11817
+ }
11428
11818
  pm_parser_local_add_token(parser, &name);
11429
11819
  }
11430
11820
 
@@ -11541,19 +11931,20 @@ parse_parameters(
11541
11931
  pm_token_t operator = parser->previous;
11542
11932
  pm_token_t name;
11543
11933
 
11934
+ bool repeated = false;
11544
11935
  if (accept1(parser, PM_TOKEN_IDENTIFIER)) {
11545
11936
  name = parser->previous;
11546
- pm_parser_parameter_name_check(parser, &name);
11937
+ repeated = pm_parser_parameter_name_check(parser, &name);
11547
11938
  pm_parser_local_add_token(parser, &name);
11548
11939
  } else {
11549
11940
  name = not_provided(parser);
11550
-
11551
- if (allows_forwarding_parameters) {
11552
- pm_parser_local_add_token(parser, &operator);
11553
- }
11941
+ parser->current_scope->forwarding_params |= PM_FORWARDING_BLOCK;
11554
11942
  }
11555
11943
 
11556
11944
  pm_block_parameter_node_t *param = pm_block_parameter_node_create(parser, &name, &operator);
11945
+ if (repeated) {
11946
+ pm_node_flag_set_repeated_parameter((pm_node_t *)param);
11947
+ }
11557
11948
  if (params->block == NULL) {
11558
11949
  pm_parameters_node_block_set(params, param);
11559
11950
  } else {
@@ -11572,9 +11963,8 @@ parse_parameters(
11572
11963
  update_parameter_state(parser, &parser->current, &order);
11573
11964
  parser_lex(parser);
11574
11965
 
11575
- if (allows_forwarding_parameters) {
11576
- pm_parser_local_add_token(parser, &parser->previous);
11577
- }
11966
+ parser->current_scope->forwarding_params |= PM_FORWARDING_BLOCK;
11967
+ parser->current_scope->forwarding_params |= PM_FORWARDING_ALL;
11578
11968
 
11579
11969
  pm_forwarding_parameter_node_t *param = pm_forwarding_parameter_node_create(parser, &parser->previous);
11580
11970
  if (params->keyword_rest != NULL) {
@@ -11626,20 +12016,23 @@ parse_parameters(
11626
12016
  }
11627
12017
 
11628
12018
  pm_token_t name = parser->previous;
11629
- pm_parser_parameter_name_check(parser, &name);
12019
+ bool repeated = pm_parser_parameter_name_check(parser, &name);
11630
12020
  pm_parser_local_add_token(parser, &name);
11631
12021
 
11632
12022
  if (accept1(parser, PM_TOKEN_EQUAL)) {
11633
12023
  pm_token_t operator = parser->previous;
11634
12024
  context_push(parser, PM_CONTEXT_DEFAULT_PARAMS);
11635
- pm_constant_id_t old_param_name = parser->current_param_name;
11636
- parser->current_param_name = pm_parser_constant_id_token(parser, &name);
12025
+
12026
+ pm_constant_id_t saved_param_name = pm_parser_current_param_name_set(parser, pm_parser_constant_id_token(parser, &name));
11637
12027
  pm_node_t *value = parse_value_expression(parser, binding_power, false, PM_ERR_PARAMETER_NO_DEFAULT);
11638
12028
 
11639
12029
  pm_optional_parameter_node_t *param = pm_optional_parameter_node_create(parser, &name, &operator, value);
12030
+ if (repeated) {
12031
+ pm_node_flag_set_repeated_parameter((pm_node_t *)param);
12032
+ }
11640
12033
  pm_parameters_node_optionals_append(params, param);
11641
12034
 
11642
- parser->current_param_name = old_param_name;
12035
+ pm_parser_current_param_name_restore(parser, saved_param_name);
11643
12036
  context_pop(parser);
11644
12037
 
11645
12038
  // If parsing the value of the parameter resulted in error recovery,
@@ -11651,9 +12044,15 @@ parse_parameters(
11651
12044
  }
11652
12045
  } else if (order > PM_PARAMETERS_ORDER_AFTER_OPTIONAL) {
11653
12046
  pm_required_parameter_node_t *param = pm_required_parameter_node_create(parser, &name);
12047
+ if (repeated) {
12048
+ pm_node_flag_set_repeated_parameter((pm_node_t *)param);
12049
+ }
11654
12050
  pm_parameters_node_requireds_append(params, (pm_node_t *) param);
11655
12051
  } else {
11656
12052
  pm_required_parameter_node_t *param = pm_required_parameter_node_create(parser, &name);
12053
+ if (repeated) {
12054
+ pm_node_flag_set_repeated_parameter((pm_node_t *)param);
12055
+ }
11657
12056
  pm_parameters_node_posts_append(params, (pm_node_t *) param);
11658
12057
  }
11659
12058
 
@@ -11668,7 +12067,7 @@ parse_parameters(
11668
12067
  pm_token_t local = name;
11669
12068
  local.end -= 1;
11670
12069
 
11671
- pm_parser_parameter_name_check(parser, &local);
12070
+ bool repeated = pm_parser_parameter_name_check(parser, &local);
11672
12071
  pm_parser_local_add_token(parser, &local);
11673
12072
 
11674
12073
  switch (parser->current.type) {
@@ -11676,6 +12075,9 @@ parse_parameters(
11676
12075
  case PM_TOKEN_PARENTHESIS_RIGHT:
11677
12076
  case PM_TOKEN_PIPE: {
11678
12077
  pm_node_t *param = (pm_node_t *) pm_required_keyword_parameter_node_create(parser, &name);
12078
+ if (repeated) {
12079
+ pm_node_flag_set_repeated_parameter(param);
12080
+ }
11679
12081
  pm_parameters_node_keywords_append(params, param);
11680
12082
  break;
11681
12083
  }
@@ -11687,6 +12089,9 @@ parse_parameters(
11687
12089
  }
11688
12090
 
11689
12091
  pm_node_t *param = (pm_node_t *) pm_required_keyword_parameter_node_create(parser, &name);
12092
+ if (repeated) {
12093
+ pm_node_flag_set_repeated_parameter(param);
12094
+ }
11690
12095
  pm_parameters_node_keywords_append(params, param);
11691
12096
  break;
11692
12097
  }
@@ -11695,17 +12100,22 @@ parse_parameters(
11695
12100
 
11696
12101
  if (token_begins_expression_p(parser->current.type)) {
11697
12102
  context_push(parser, PM_CONTEXT_DEFAULT_PARAMS);
11698
- pm_constant_id_t old_param_name = parser->current_param_name;
11699
- parser->current_param_name = pm_parser_constant_id_token(parser, &local);
12103
+
12104
+ pm_constant_id_t saved_param_name = pm_parser_current_param_name_set(parser, pm_parser_constant_id_token(parser, &local));
11700
12105
  pm_node_t *value = parse_value_expression(parser, binding_power, false, PM_ERR_PARAMETER_NO_DEFAULT_KW);
11701
- parser->current_param_name = old_param_name;
12106
+
12107
+ pm_parser_current_param_name_restore(parser, saved_param_name);
11702
12108
  context_pop(parser);
12109
+
11703
12110
  param = (pm_node_t *) pm_optional_keyword_parameter_node_create(parser, &name, value);
11704
12111
  }
11705
12112
  else {
11706
12113
  param = (pm_node_t *) pm_required_keyword_parameter_node_create(parser, &name);
11707
12114
  }
11708
12115
 
12116
+ if (repeated) {
12117
+ pm_node_flag_set_repeated_parameter(param);
12118
+ }
11709
12119
  pm_parameters_node_keywords_append(params, param);
11710
12120
 
11711
12121
  // If parsing the value of the parameter resulted in error recovery,
@@ -11728,20 +12138,21 @@ parse_parameters(
11728
12138
 
11729
12139
  pm_token_t operator = parser->previous;
11730
12140
  pm_token_t name;
11731
-
12141
+ bool repeated = false;
11732
12142
  if (accept1(parser, PM_TOKEN_IDENTIFIER)) {
11733
12143
  name = parser->previous;
11734
- pm_parser_parameter_name_check(parser, &name);
12144
+ repeated = pm_parser_parameter_name_check(parser, &name);
11735
12145
  pm_parser_local_add_token(parser, &name);
11736
12146
  } else {
11737
12147
  name = not_provided(parser);
11738
12148
 
11739
- if (allows_forwarding_parameters) {
11740
- pm_parser_local_add_token(parser, &operator);
11741
- }
12149
+ parser->current_scope->forwarding_params |= PM_FORWARDING_POSITIONALS;
11742
12150
  }
11743
12151
 
11744
12152
  pm_node_t *param = (pm_node_t *) pm_rest_parameter_node_create(parser, &operator, &name);
12153
+ if (repeated) {
12154
+ pm_node_flag_set_repeated_parameter(param);
12155
+ }
11745
12156
  if (params->rest == NULL) {
11746
12157
  pm_parameters_node_rest_set(params, param);
11747
12158
  } else {
@@ -11764,19 +12175,21 @@ parse_parameters(
11764
12175
  } else {
11765
12176
  pm_token_t name;
11766
12177
 
12178
+ bool repeated = false;
11767
12179
  if (accept1(parser, PM_TOKEN_IDENTIFIER)) {
11768
12180
  name = parser->previous;
11769
- pm_parser_parameter_name_check(parser, &name);
12181
+ repeated = pm_parser_parameter_name_check(parser, &name);
11770
12182
  pm_parser_local_add_token(parser, &name);
11771
12183
  } else {
11772
12184
  name = not_provided(parser);
11773
12185
 
11774
- if (allows_forwarding_parameters) {
11775
- pm_parser_local_add_token(parser, &operator);
11776
- }
12186
+ parser->current_scope->forwarding_params |= PM_FORWARDING_KEYWORDS;
11777
12187
  }
11778
12188
 
11779
12189
  param = (pm_node_t *) pm_keyword_rest_parameter_node_create(parser, &operator, &name);
12190
+ if (repeated) {
12191
+ pm_node_flag_set_repeated_parameter(param);
12192
+ }
11780
12193
  }
11781
12194
 
11782
12195
  if (params->keyword_rest == NULL) {
@@ -12012,10 +12425,13 @@ parse_block_parameters(
12012
12425
  if ((opening->type != PM_TOKEN_NOT_PROVIDED) && accept1(parser, PM_TOKEN_SEMICOLON)) {
12013
12426
  do {
12014
12427
  expect1(parser, PM_TOKEN_IDENTIFIER, PM_ERR_BLOCK_PARAM_LOCAL_VARIABLE);
12015
- pm_parser_parameter_name_check(parser, &parser->previous);
12428
+ bool repeated = pm_parser_parameter_name_check(parser, &parser->previous);
12016
12429
  pm_parser_local_add_token(parser, &parser->previous);
12017
12430
 
12018
12431
  pm_block_local_variable_node_t *local = pm_block_local_variable_node_create(parser, &parser->previous);
12432
+ if (repeated) {
12433
+ pm_node_flag_set_repeated_parameter((pm_node_t *)local);
12434
+ }
12019
12435
  pm_block_parameters_node_append_local(block_parameters, local);
12020
12436
  } while (accept1(parser, PM_TOKEN_COMMA));
12021
12437
  }
@@ -12031,8 +12447,10 @@ parse_block(pm_parser_t *parser) {
12031
12447
  pm_token_t opening = parser->previous;
12032
12448
  accept1(parser, PM_TOKEN_NEWLINE);
12033
12449
 
12450
+ pm_constant_id_t saved_param_name = pm_parser_current_param_name_unset(parser);
12034
12451
  pm_accepts_block_stack_push(parser, true);
12035
12452
  pm_parser_scope_push(parser, false);
12453
+
12036
12454
  pm_block_parameters_node_t *block_parameters = NULL;
12037
12455
 
12038
12456
  if (accept1(parser, PM_TOKEN_PIPE)) {
@@ -12053,12 +12471,6 @@ parse_block(pm_parser_t *parser) {
12053
12471
  pm_block_parameters_node_closing_set(block_parameters, &parser->previous);
12054
12472
  }
12055
12473
 
12056
- uint32_t locals_body_index = 0;
12057
-
12058
- if (block_parameters) {
12059
- locals_body_index = (uint32_t) parser->current_scope->locals.size;
12060
- }
12061
-
12062
12474
  accept1(parser, PM_TOKEN_NEWLINE);
12063
12475
  pm_node_t *statements = NULL;
12064
12476
 
@@ -12090,13 +12502,14 @@ parse_block(pm_parser_t *parser) {
12090
12502
 
12091
12503
  if (parameters == NULL && (maximum > 0)) {
12092
12504
  parameters = (pm_node_t *) pm_numbered_parameters_node_create(parser, &(pm_location_t) { .start = opening.start, .end = parser->previous.end }, maximum);
12093
- locals_body_index = maximum;
12094
12505
  }
12095
12506
 
12096
12507
  pm_constant_id_list_t locals = parser->current_scope->locals;
12097
12508
  pm_parser_scope_pop(parser);
12098
12509
  pm_accepts_block_stack_pop(parser);
12099
- return pm_block_node_create(parser, &locals, locals_body_index, &opening, parameters, statements, &parser->previous);
12510
+ pm_parser_current_param_name_restore(parser, saved_param_name);
12511
+
12512
+ return pm_block_node_create(parser, &locals, &opening, parameters, statements, &parser->previous);
12100
12513
  }
12101
12514
 
12102
12515
  /**
@@ -12157,14 +12570,20 @@ parse_arguments_list(pm_parser_t *parser, pm_arguments_t *arguments, bool accept
12157
12570
  }
12158
12571
 
12159
12572
  if (block != NULL) {
12160
- if (arguments->block == NULL) {
12573
+ if (arguments->block == NULL && !arguments->has_forwarding) {
12161
12574
  arguments->block = (pm_node_t *) block;
12162
12575
  } else {
12163
- pm_parser_err_node(parser, (pm_node_t *) block, PM_ERR_ARGUMENT_BLOCK_MULTI);
12164
- if (arguments->arguments == NULL) {
12165
- arguments->arguments = pm_arguments_node_create(parser);
12576
+ if (arguments->has_forwarding) {
12577
+ pm_parser_err_node(parser, (pm_node_t *) block, PM_ERR_ARGUMENT_BLOCK_FORWARDING);
12578
+ } else {
12579
+ pm_parser_err_node(parser, (pm_node_t *) block, PM_ERR_ARGUMENT_BLOCK_MULTI);
12580
+ }
12581
+ if (arguments->block != NULL) {
12582
+ if (arguments->arguments == NULL) {
12583
+ arguments->arguments = pm_arguments_node_create(parser);
12584
+ }
12585
+ pm_arguments_node_arguments_append(arguments->arguments, arguments->block);
12166
12586
  }
12167
- pm_arguments_node_arguments_append(arguments->arguments, arguments->block);
12168
12587
  arguments->block = (pm_node_t *) block;
12169
12588
  }
12170
12589
  }
@@ -12384,8 +12803,14 @@ static inline pm_node_flags_t
12384
12803
  parse_unescaped_encoding(const pm_parser_t *parser) {
12385
12804
  if (parser->explicit_encoding != NULL) {
12386
12805
  if (parser->explicit_encoding == PM_ENCODING_UTF_8_ENTRY) {
12806
+ // If the there's an explicit encoding and it's using a UTF-8 escape
12807
+ // sequence, then mark the string as UTF-8.
12387
12808
  return PM_STRING_FLAGS_FORCED_UTF8_ENCODING;
12388
12809
  } else if (parser->encoding == PM_ENCODING_US_ASCII_ENTRY) {
12810
+ // If there's a non-UTF-8 escape sequence being used, then the
12811
+ // string uses the source encoding, unless the source is marked as
12812
+ // US-ASCII. In that case the string is forced as ASCII-8BIT in
12813
+ // order to keep the string valid.
12389
12814
  return PM_STRING_FLAGS_FORCED_BINARY_ENCODING;
12390
12815
  }
12391
12816
  }
@@ -12509,14 +12934,54 @@ parse_string_part(pm_parser_t *parser) {
12509
12934
  }
12510
12935
  }
12511
12936
 
12937
+ /**
12938
+ * When creating a symbol, unary operators that cannot be binary operators
12939
+ * automatically drop trailing `@` characters. This happens at the parser level,
12940
+ * such that `~@` is parsed as `~` and `!@` is parsed as `!`. We do that here.
12941
+ */
12942
+ static const uint8_t *
12943
+ parse_operator_symbol_name(const pm_token_t *name) {
12944
+ switch (name->type) {
12945
+ case PM_TOKEN_TILDE:
12946
+ case PM_TOKEN_BANG:
12947
+ if (name->end[-1] == '@') return name->end - 1;
12948
+ /* fallthrough */
12949
+ default:
12950
+ return name->end;
12951
+ }
12952
+ }
12953
+
12954
+ static pm_node_t *
12955
+ parse_operator_symbol(pm_parser_t *parser, const pm_token_t *opening, pm_lex_state_t next_state) {
12956
+ pm_token_t closing = not_provided(parser);
12957
+ pm_symbol_node_t *symbol = pm_symbol_node_create(parser, opening, &parser->current, &closing);
12958
+
12959
+ const uint8_t *end = parse_operator_symbol_name(&parser->current);
12960
+
12961
+ if (next_state != PM_LEX_STATE_NONE) lex_state_set(parser, next_state);
12962
+ parser_lex(parser);
12963
+
12964
+ pm_string_shared_init(&symbol->unescaped, parser->previous.start, end);
12965
+ pm_node_flag_set((pm_node_t *) symbol, PM_SYMBOL_FLAGS_FORCED_US_ASCII_ENCODING);
12966
+
12967
+ return (pm_node_t *) symbol;
12968
+ }
12969
+
12970
+ /**
12971
+ * Parse a symbol node. This function will get called immediately after finding
12972
+ * a symbol opening token. This handles parsing bare symbols and interpolated
12973
+ * symbols.
12974
+ */
12512
12975
  static pm_node_t *
12513
12976
  parse_symbol(pm_parser_t *parser, pm_lex_mode_t *lex_mode, pm_lex_state_t next_state) {
12514
- pm_token_t opening = parser->previous;
12977
+ const pm_token_t opening = parser->previous;
12515
12978
 
12516
12979
  if (lex_mode->mode != PM_LEX_STRING) {
12517
12980
  if (next_state != PM_LEX_STATE_NONE) lex_state_set(parser, next_state);
12518
12981
 
12519
12982
  switch (parser->current.type) {
12983
+ case PM_CASE_OPERATOR:
12984
+ return parse_operator_symbol(parser, &opening, next_state == PM_LEX_STATE_NONE ? PM_LEX_STATE_ENDFN : next_state);
12520
12985
  case PM_TOKEN_IDENTIFIER:
12521
12986
  case PM_TOKEN_CONSTANT:
12522
12987
  case PM_TOKEN_INSTANCE_VARIABLE:
@@ -12528,10 +12993,6 @@ parse_symbol(pm_parser_t *parser, pm_lex_mode_t *lex_mode, pm_lex_state_t next_s
12528
12993
  case PM_CASE_KEYWORD:
12529
12994
  parser_lex(parser);
12530
12995
  break;
12531
- case PM_CASE_OPERATOR:
12532
- lex_state_set(parser, next_state == PM_LEX_STATE_NONE ? PM_LEX_STATE_ENDFN : next_state);
12533
- parser_lex(parser);
12534
- break;
12535
12996
  default:
12536
12997
  expect2(parser, PM_TOKEN_IDENTIFIER, PM_TOKEN_METHOD_NAME, PM_ERR_SYMBOL_INVALID);
12537
12998
  break;
@@ -12541,6 +13002,8 @@ parse_symbol(pm_parser_t *parser, pm_lex_mode_t *lex_mode, pm_lex_state_t next_s
12541
13002
  pm_symbol_node_t *symbol = pm_symbol_node_create(parser, &opening, &parser->previous, &closing);
12542
13003
 
12543
13004
  pm_string_shared_init(&symbol->unescaped, parser->previous.start, parser->previous.end);
13005
+ pm_node_flag_set((pm_node_t *) symbol, parse_symbol_encoding(parser, &symbol->unescaped));
13006
+
12544
13007
  return (pm_node_t *) symbol;
12545
13008
  }
12546
13009
 
@@ -12637,7 +13100,8 @@ parse_symbol(pm_parser_t *parser, pm_lex_mode_t *lex_mode, pm_lex_state_t next_s
12637
13100
  } else {
12638
13101
  expect1(parser, PM_TOKEN_STRING_END, PM_ERR_SYMBOL_TERM_DYNAMIC);
12639
13102
  }
12640
- return (pm_node_t *) pm_symbol_node_create_unescaped(parser, &opening, &content, &parser->previous, &unescaped);
13103
+
13104
+ return (pm_node_t *) pm_symbol_node_create_unescaped(parser, &opening, &content, &parser->previous, &unescaped, parse_symbol_encoding(parser, &unescaped));
12641
13105
  }
12642
13106
 
12643
13107
  /**
@@ -12647,8 +13111,11 @@ parse_symbol(pm_parser_t *parser, pm_lex_mode_t *lex_mode, pm_lex_state_t next_s
12647
13111
  static inline pm_node_t *
12648
13112
  parse_undef_argument(pm_parser_t *parser) {
12649
13113
  switch (parser->current.type) {
13114
+ case PM_CASE_OPERATOR: {
13115
+ const pm_token_t opening = not_provided(parser);
13116
+ return parse_operator_symbol(parser, &opening, PM_LEX_STATE_NONE);
13117
+ }
12650
13118
  case PM_CASE_KEYWORD:
12651
- case PM_CASE_OPERATOR:
12652
13119
  case PM_TOKEN_CONSTANT:
12653
13120
  case PM_TOKEN_IDENTIFIER:
12654
13121
  case PM_TOKEN_METHOD_NAME: {
@@ -12659,6 +13126,8 @@ parse_undef_argument(pm_parser_t *parser) {
12659
13126
  pm_symbol_node_t *symbol = pm_symbol_node_create(parser, &opening, &parser->previous, &closing);
12660
13127
 
12661
13128
  pm_string_shared_init(&symbol->unescaped, parser->previous.start, parser->previous.end);
13129
+ pm_node_flag_set((pm_node_t *) symbol, parse_symbol_encoding(parser, &symbol->unescaped));
13130
+
12662
13131
  return (pm_node_t *) symbol;
12663
13132
  }
12664
13133
  case PM_TOKEN_SYMBOL_BEGIN: {
@@ -12682,21 +13151,24 @@ parse_undef_argument(pm_parser_t *parser) {
12682
13151
  static inline pm_node_t *
12683
13152
  parse_alias_argument(pm_parser_t *parser, bool first) {
12684
13153
  switch (parser->current.type) {
12685
- case PM_CASE_OPERATOR:
13154
+ case PM_CASE_OPERATOR: {
13155
+ const pm_token_t opening = not_provided(parser);
13156
+ return parse_operator_symbol(parser, &opening, first ? PM_LEX_STATE_FNAME | PM_LEX_STATE_FITEM : PM_LEX_STATE_NONE);
13157
+ }
12686
13158
  case PM_CASE_KEYWORD:
12687
13159
  case PM_TOKEN_CONSTANT:
12688
13160
  case PM_TOKEN_IDENTIFIER:
12689
13161
  case PM_TOKEN_METHOD_NAME: {
12690
- if (first) {
12691
- lex_state_set(parser, PM_LEX_STATE_FNAME | PM_LEX_STATE_FITEM);
12692
- }
12693
-
13162
+ if (first) lex_state_set(parser, PM_LEX_STATE_FNAME | PM_LEX_STATE_FITEM);
12694
13163
  parser_lex(parser);
13164
+
12695
13165
  pm_token_t opening = not_provided(parser);
12696
13166
  pm_token_t closing = not_provided(parser);
12697
13167
  pm_symbol_node_t *symbol = pm_symbol_node_create(parser, &opening, &parser->previous, &closing);
12698
13168
 
12699
13169
  pm_string_shared_init(&symbol->unescaped, parser->previous.start, parser->previous.end);
13170
+ pm_node_flag_set((pm_node_t *) symbol, parse_symbol_encoding(parser, &symbol->unescaped));
13171
+
12700
13172
  return (pm_node_t *) symbol;
12701
13173
  }
12702
13174
  case PM_TOKEN_SYMBOL_BEGIN: {
@@ -12733,6 +13205,64 @@ outer_scope_using_numbered_parameters_p(pm_parser_t *parser) {
12733
13205
  return false;
12734
13206
  }
12735
13207
 
13208
+ /**
13209
+ * These are the names of the various numbered parameters. We have them here so
13210
+ * that when we insert them into the constant pool we can use a constant string
13211
+ * and not have to allocate.
13212
+ */
13213
+ static const char * const pm_numbered_parameter_names[] = {
13214
+ "_1", "_2", "_3", "_4", "_5", "_6", "_7", "_8", "_9"
13215
+ };
13216
+
13217
+ /**
13218
+ * Parse an identifier into either a local variable read. If the local variable
13219
+ * is not found, it returns NULL instead.
13220
+ */
13221
+ static pm_local_variable_read_node_t *
13222
+ parse_variable(pm_parser_t *parser) {
13223
+ int depth;
13224
+ if ((depth = pm_parser_local_depth(parser, &parser->previous)) != -1) {
13225
+ return pm_local_variable_read_node_create(parser, &parser->previous, (uint32_t) depth);
13226
+ }
13227
+
13228
+ if (!parser->current_scope->closed && pm_token_is_numbered_parameter(parser->previous.start, parser->previous.end)) {
13229
+ // Now that we know we have a numbered parameter, we need to check
13230
+ // if it's allowed in this context. If it is, then we will create a
13231
+ // local variable read. If it's not, then we'll create a normal call
13232
+ // node but add an error.
13233
+ if (parser->current_scope->explicit_params) {
13234
+ pm_parser_err_previous(parser, PM_ERR_NUMBERED_PARAMETER_NOT_ALLOWED);
13235
+ } else if (outer_scope_using_numbered_parameters_p(parser)) {
13236
+ pm_parser_err_previous(parser, PM_ERR_NUMBERED_PARAMETER_OUTER_SCOPE);
13237
+ } else {
13238
+ // Indicate that this scope is using numbered params so that child
13239
+ // scopes cannot. We subtract the value for the character '0' to get
13240
+ // the actual integer value of the number (only _1 through _9 are
13241
+ // valid).
13242
+ uint8_t numbered_parameters = (uint8_t) (parser->previous.start[1] - '0');
13243
+ if (numbered_parameters > parser->current_scope->numbered_parameters) {
13244
+ parser->current_scope->numbered_parameters = numbered_parameters;
13245
+ pm_parser_numbered_parameters_set(parser, numbered_parameters);
13246
+ }
13247
+
13248
+ // When you use a numbered parameter, it implies the existence
13249
+ // of all of the locals that exist before it. For example,
13250
+ // referencing _2 means that _1 must exist. Therefore here we
13251
+ // loop through all of the possibilities and add them into the
13252
+ // constant pool.
13253
+ for (uint8_t numbered_parameter = 1; numbered_parameter <= numbered_parameters - 1; numbered_parameter++) {
13254
+ pm_parser_local_add_constant(parser, pm_numbered_parameter_names[numbered_parameter - 1], 2);
13255
+ }
13256
+
13257
+ // Finally we can create the local variable read node.
13258
+ pm_constant_id_t name_id = pm_parser_local_add_constant(parser, pm_numbered_parameter_names[numbered_parameters - 1], 2);
13259
+ return pm_local_variable_read_node_create_constant_id(parser, &parser->previous, name_id, 0);
13260
+ }
13261
+ }
13262
+
13263
+ return NULL;
13264
+ }
13265
+
12736
13266
  /**
12737
13267
  * Parse an identifier into either a local variable read or a call.
12738
13268
  */
@@ -12741,56 +13271,8 @@ parse_variable_call(pm_parser_t *parser) {
12741
13271
  pm_node_flags_t flags = 0;
12742
13272
 
12743
13273
  if (!match1(parser, PM_TOKEN_PARENTHESIS_LEFT) && (parser->previous.end[-1] != '!') && (parser->previous.end[-1] != '?')) {
12744
- int depth;
12745
- if ((depth = pm_parser_local_depth(parser, &parser->previous)) != -1) {
12746
- return (pm_node_t *) pm_local_variable_read_node_create(parser, &parser->previous, (uint32_t) depth);
12747
- }
12748
-
12749
- if (!parser->current_scope->closed && pm_token_is_numbered_parameter(parser->previous.start, parser->previous.end)) {
12750
- // Now that we know we have a numbered parameter, we need to check
12751
- // if it's allowed in this context. If it is, then we will create a
12752
- // local variable read. If it's not, then we'll create a normal call
12753
- // node but add an error.
12754
- if (parser->current_scope->explicit_params) {
12755
- pm_parser_err_previous(parser, PM_ERR_NUMBERED_PARAMETER_NOT_ALLOWED);
12756
- } else if (outer_scope_using_numbered_parameters_p(parser)) {
12757
- pm_parser_err_previous(parser, PM_ERR_NUMBERED_PARAMETER_OUTER_SCOPE);
12758
- } else {
12759
- // Indicate that this scope is using numbered params so that child
12760
- // scopes cannot.
12761
- uint8_t number = parser->previous.start[1];
12762
-
12763
- // We subtract the value for the character '0' to get the actual
12764
- // integer value of the number (only _1 through _9 are valid)
12765
- uint8_t numbered_parameters = (uint8_t) (number - '0');
12766
- if (numbered_parameters > parser->current_scope->numbered_parameters) {
12767
- parser->current_scope->numbered_parameters = numbered_parameters;
12768
- pm_parser_numbered_parameters_set(parser, numbered_parameters);
12769
- }
12770
-
12771
- // When you use a numbered parameter, it implies the existence
12772
- // of all of the locals that exist before it. For example,
12773
- // referencing _2 means that _1 must exist. Therefore here we
12774
- // loop through all of the possibilities and add them into the
12775
- // constant pool.
12776
- uint8_t current = '1';
12777
- uint8_t *value;
12778
-
12779
- while (current < number) {
12780
- value = malloc(2);
12781
- value[0] = '_';
12782
- value[1] = current++;
12783
- pm_parser_local_add_owned(parser, value, 2);
12784
- }
12785
-
12786
- // Now we can add the actual token that is being used. For
12787
- // this one we can add a shared version since it is directly
12788
- // referenced in the source.
12789
- pm_parser_local_add_token(parser, &parser->previous);
12790
- return (pm_node_t *) pm_local_variable_read_node_create(parser, &parser->previous, 0);
12791
- }
12792
- }
12793
-
13274
+ pm_local_variable_read_node_t *node = parse_variable(parser);
13275
+ if (node != NULL) return (pm_node_t *) node;
12794
13276
  flags |= PM_CALL_NODE_FLAGS_VARIABLE_CALL;
12795
13277
  }
12796
13278
 
@@ -13076,43 +13558,77 @@ parse_pattern_keyword_rest(pm_parser_t *parser) {
13076
13558
  return (pm_node_t *) pm_assoc_splat_node_create(parser, value, &operator);
13077
13559
  }
13078
13560
 
13561
+ /**
13562
+ * Create an implicit node for the value of a hash pattern that has omitted the
13563
+ * value. This will use an implicit local variable target.
13564
+ */
13565
+ static pm_node_t *
13566
+ parse_pattern_hash_implicit_value(pm_parser_t *parser, pm_symbol_node_t *key) {
13567
+ const pm_location_t *value_loc = &((pm_symbol_node_t *) key)->value_loc;
13568
+ pm_constant_id_t name = pm_parser_constant_id_location(parser, value_loc->start, value_loc->end);
13569
+
13570
+ int current_depth = pm_parser_local_depth_constant_id(parser, name);
13571
+ uint32_t depth;
13572
+
13573
+ if (current_depth == -1) {
13574
+ pm_parser_local_add_location(parser, value_loc->start, value_loc->end);
13575
+ depth = 0;
13576
+ } else {
13577
+ depth = (uint32_t) current_depth;
13578
+ }
13579
+
13580
+ pm_local_variable_target_node_t *target = pm_local_variable_target_node_create_values(parser, value_loc, name, depth);
13581
+ return (pm_node_t *) pm_implicit_node_create(parser, (pm_node_t *) target);
13582
+ }
13583
+
13079
13584
  /**
13080
13585
  * Parse a hash pattern.
13081
13586
  */
13082
13587
  static pm_hash_pattern_node_t *
13083
- parse_pattern_hash(pm_parser_t *parser, pm_node_t *first_assoc) {
13588
+ parse_pattern_hash(pm_parser_t *parser, pm_node_t *first_node) {
13084
13589
  pm_node_list_t assocs = { 0 };
13085
13590
  pm_node_t *rest = NULL;
13086
13591
 
13087
- switch (PM_NODE_TYPE(first_assoc)) {
13088
- case PM_ASSOC_NODE: {
13089
- if (!match7(parser, PM_TOKEN_COMMA, PM_TOKEN_KEYWORD_THEN, PM_TOKEN_BRACE_RIGHT, PM_TOKEN_BRACKET_RIGHT, PM_TOKEN_PARENTHESIS_RIGHT, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON)) {
13090
- // Here we have a value for the first assoc in the list, so we will
13091
- // parse it now and update the first assoc.
13092
- pm_node_t *value = parse_pattern(parser, false, PM_ERR_PATTERN_EXPRESSION_AFTER_KEY);
13592
+ switch (PM_NODE_TYPE(first_node)) {
13593
+ case PM_ASSOC_SPLAT_NODE:
13594
+ case PM_NO_KEYWORDS_PARAMETER_NODE:
13595
+ rest = first_node;
13596
+ break;
13597
+ case PM_SYMBOL_NODE: {
13598
+ if (pm_symbol_node_label_p(first_node)) {
13599
+ pm_node_t *value;
13600
+
13601
+ if (!match7(parser, PM_TOKEN_COMMA, PM_TOKEN_KEYWORD_THEN, PM_TOKEN_BRACE_RIGHT, PM_TOKEN_BRACKET_RIGHT, PM_TOKEN_PARENTHESIS_RIGHT, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON)) {
13602
+ // Here we have a value for the first assoc in the list, so
13603
+ // we will parse it now.
13604
+ value = parse_pattern(parser, false, PM_ERR_PATTERN_EXPRESSION_AFTER_KEY);
13605
+ } else {
13606
+ // Otherwise, we will create an implicit local variable
13607
+ // target for the value.
13608
+ value = parse_pattern_hash_implicit_value(parser, (pm_symbol_node_t *) first_node);
13609
+ }
13093
13610
 
13094
- pm_assoc_node_t *assoc = (pm_assoc_node_t *) first_assoc;
13095
- assoc->base.location.end = value->location.end;
13096
- assoc->value = value;
13097
- } else {
13098
- pm_node_t *key = ((pm_assoc_node_t *) first_assoc)->key;
13611
+ pm_token_t operator = not_provided(parser);
13612
+ pm_node_t *assoc = (pm_node_t *) pm_assoc_node_create(parser, first_node, &operator, value);
13099
13613
 
13100
- if (PM_NODE_TYPE_P(key, PM_SYMBOL_NODE)) {
13101
- const pm_location_t *value_loc = &((pm_symbol_node_t *) key)->value_loc;
13102
- pm_parser_local_add_location(parser, value_loc->start, value_loc->end);
13103
- }
13614
+ pm_node_list_append(&assocs, assoc);
13615
+ break;
13104
13616
  }
13617
+ }
13618
+ /* fallthrough */
13619
+ default: {
13620
+ // If we get anything else, then this is an error. For this we'll
13621
+ // create a missing node for the value and create an assoc node for
13622
+ // the first node in the list.
13623
+ pm_parser_err_node(parser, first_node, PM_ERR_PATTERN_HASH_KEY_LABEL);
13624
+
13625
+ pm_token_t operator = not_provided(parser);
13626
+ pm_node_t *value = (pm_node_t *) pm_missing_node_create(parser, first_node->location.start, first_node->location.end);
13627
+ pm_node_t *assoc = (pm_node_t *) pm_assoc_node_create(parser, first_node, &operator, value);
13105
13628
 
13106
- pm_node_list_append(&assocs, first_assoc);
13629
+ pm_node_list_append(&assocs, assoc);
13107
13630
  break;
13108
13631
  }
13109
- case PM_ASSOC_SPLAT_NODE:
13110
- case PM_NO_KEYWORDS_PARAMETER_NODE:
13111
- rest = first_assoc;
13112
- break;
13113
- default:
13114
- assert(false);
13115
- break;
13116
13632
  }
13117
13633
 
13118
13634
  // If there are any other assocs, then we'll parse them now.
@@ -13141,6 +13657,7 @@ parse_pattern_hash(pm_parser_t *parser, pm_node_t *first_assoc) {
13141
13657
  } else {
13142
13658
  const pm_location_t *value_loc = &((pm_symbol_node_t *) key)->value_loc;
13143
13659
  pm_parser_local_add_location(parser, value_loc->start, value_loc->end);
13660
+ value = parse_pattern_hash_implicit_value(parser, (pm_symbol_node_t *) key);
13144
13661
  }
13145
13662
 
13146
13663
  pm_token_t operator = not_provided(parser);
@@ -13246,45 +13763,29 @@ parse_pattern_primitive(pm_parser_t *parser, pm_diagnostic_id_t diag_id) {
13246
13763
  // pattern node.
13247
13764
  node = pm_hash_pattern_node_empty_create(parser, &opening, &parser->previous);
13248
13765
  } else {
13249
- pm_node_t *first_assoc;
13766
+ pm_node_t *first_node;
13250
13767
 
13251
13768
  switch (parser->current.type) {
13252
- case PM_TOKEN_LABEL: {
13769
+ case PM_TOKEN_LABEL:
13253
13770
  parser_lex(parser);
13254
-
13255
- pm_symbol_node_t *key = pm_symbol_node_label_create(parser, &parser->previous);
13256
- pm_token_t operator = not_provided(parser);
13257
-
13258
- first_assoc = (pm_node_t *) pm_assoc_node_create(parser, (pm_node_t *) key, &operator, NULL);
13771
+ first_node = (pm_node_t *) pm_symbol_node_label_create(parser, &parser->previous);
13259
13772
  break;
13260
- }
13261
13773
  case PM_TOKEN_USTAR_STAR:
13262
- first_assoc = parse_pattern_keyword_rest(parser);
13774
+ first_node = parse_pattern_keyword_rest(parser);
13263
13775
  break;
13264
- case PM_TOKEN_STRING_BEGIN: {
13265
- pm_node_t *key = parse_expression(parser, PM_BINDING_POWER_MAX, false, PM_ERR_PATTERN_HASH_KEY);
13266
- pm_token_t operator = not_provided(parser);
13267
-
13268
- if (!pm_symbol_node_label_p(key)) {
13269
- pm_parser_err_node(parser, key, PM_ERR_PATTERN_HASH_KEY_LABEL);
13270
- }
13271
-
13272
- first_assoc = (pm_node_t *) pm_assoc_node_create(parser, key, &operator, NULL);
13776
+ case PM_TOKEN_STRING_BEGIN:
13777
+ first_node = parse_expression(parser, PM_BINDING_POWER_MAX, false, PM_ERR_PATTERN_HASH_KEY);
13273
13778
  break;
13274
- }
13275
13779
  default: {
13276
13780
  parser_lex(parser);
13277
13781
  pm_parser_err_previous(parser, PM_ERR_PATTERN_HASH_KEY);
13278
13782
 
13279
- pm_missing_node_t *key = pm_missing_node_create(parser, parser->previous.start, parser->previous.end);
13280
- pm_token_t operator = not_provided(parser);
13281
-
13282
- first_assoc = (pm_node_t *) pm_assoc_node_create(parser, (pm_node_t *) key, &operator, NULL);
13783
+ first_node = (pm_node_t *) pm_missing_node_create(parser, parser->previous.start, parser->previous.end);
13283
13784
  break;
13284
13785
  }
13285
13786
  }
13286
13787
 
13287
- node = parse_pattern_hash(parser, first_assoc);
13788
+ node = parse_pattern_hash(parser, first_node);
13288
13789
 
13289
13790
  accept1(parser, PM_TOKEN_NEWLINE);
13290
13791
  expect1(parser, PM_TOKEN_BRACE_RIGHT, PM_ERR_PATTERN_TERM_BRACE);
@@ -13350,7 +13851,16 @@ parse_pattern_primitive(pm_parser_t *parser, pm_diagnostic_id_t diag_id) {
13350
13851
  switch (parser->current.type) {
13351
13852
  case PM_TOKEN_IDENTIFIER: {
13352
13853
  parser_lex(parser);
13353
- pm_node_t *variable = (pm_node_t *) pm_local_variable_read_node_create(parser, &parser->previous, 0);
13854
+ pm_node_t *variable = (pm_node_t *) parse_variable(parser);
13855
+ if (variable == NULL) {
13856
+ if (parser->version != PM_OPTIONS_VERSION_CRUBY_3_3_0 && pm_token_is_it(parser->previous.start, parser->previous.end)) {
13857
+ pm_constant_id_t name_id = pm_parser_constant_id_constant(parser, "0it", 3);
13858
+ variable = (pm_node_t *) pm_local_variable_read_node_create_constant_id(parser, &parser->previous, name_id, 0);
13859
+ } else {
13860
+ PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->previous, PM_ERR_NO_LOCAL_VARIABLE, (int) (parser->previous.end - parser->previous.start), parser->previous.start);
13861
+ variable = (pm_node_t *) pm_local_variable_read_node_create(parser, &parser->previous, 0);
13862
+ }
13863
+ }
13354
13864
 
13355
13865
  return (pm_node_t *) pm_pinned_variable_node_create(parser, &operator, variable);
13356
13866
  }
@@ -13519,9 +14029,7 @@ parse_pattern(pm_parser_t *parser, bool top_pattern, pm_diagnostic_id_t diag_id)
13519
14029
  case PM_TOKEN_LABEL: {
13520
14030
  parser_lex(parser);
13521
14031
  pm_node_t *key = (pm_node_t *) pm_symbol_node_label_create(parser, &parser->previous);
13522
- pm_token_t operator = not_provided(parser);
13523
-
13524
- return (pm_node_t *) parse_pattern_hash(parser, (pm_node_t *) pm_assoc_node_create(parser, key, &operator, NULL));
14032
+ return (pm_node_t *) parse_pattern_hash(parser, key);
13525
14033
  }
13526
14034
  case PM_TOKEN_USTAR_STAR: {
13527
14035
  node = parse_pattern_keyword_rest(parser);
@@ -13544,8 +14052,7 @@ parse_pattern(pm_parser_t *parser, bool top_pattern, pm_diagnostic_id_t diag_id)
13544
14052
  // If we got a dynamic label symbol, then we need to treat it like the
13545
14053
  // beginning of a hash pattern.
13546
14054
  if (pm_symbol_node_label_p(node)) {
13547
- pm_token_t operator = not_provided(parser);
13548
- return (pm_node_t *) parse_pattern_hash(parser, (pm_node_t *) pm_assoc_node_create(parser, node, &operator, NULL));
14055
+ return (pm_node_t *) parse_pattern_hash(parser, node);
13549
14056
  }
13550
14057
 
13551
14058
  if (top_pattern && match1(parser, PM_TOKEN_COMMA)) {
@@ -13558,7 +14065,7 @@ parse_pattern(pm_parser_t *parser, bool top_pattern, pm_diagnostic_id_t diag_id)
13558
14065
  // Gather up all of the patterns into the list.
13559
14066
  while (accept1(parser, PM_TOKEN_COMMA)) {
13560
14067
  // Break early here in case we have a trailing comma.
13561
- if (match5(parser, PM_TOKEN_KEYWORD_THEN, PM_TOKEN_BRACE_RIGHT, PM_TOKEN_BRACKET_RIGHT, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON)) {
14068
+ if (match6(parser, PM_TOKEN_KEYWORD_THEN, PM_TOKEN_BRACE_RIGHT, PM_TOKEN_BRACKET_RIGHT, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON, PM_TOKEN_EOF)) {
13562
14069
  node = (pm_node_t *) pm_implicit_rest_node_create(parser, &parser->previous);
13563
14070
  pm_node_list_append(&nodes, node);
13564
14071
  break;
@@ -13644,7 +14151,7 @@ parse_strings(pm_parser_t *parser, pm_node_t *current) {
13644
14151
  assert(parser->current.type == PM_TOKEN_STRING_BEGIN);
13645
14152
 
13646
14153
  bool concating = false;
13647
- bool state_is_arg_labeled = lex_state_p(parser, PM_LEX_STATE_ARG | PM_LEX_STATE_LABELED);
14154
+ bool state_is_arg_labeled = lex_state_arg_labeled_p(parser);
13648
14155
 
13649
14156
  while (match1(parser, PM_TOKEN_STRING_BEGIN)) {
13650
14157
  pm_node_t *node = NULL;
@@ -13719,7 +14226,7 @@ parse_strings(pm_parser_t *parser, pm_node_t *current) {
13719
14226
  expect1(parser, PM_TOKEN_STRING_END, PM_ERR_STRING_LITERAL_TERM);
13720
14227
  node = (pm_node_t *) pm_interpolated_string_node_create(parser, &opening, &parts, &parser->previous);
13721
14228
  } else if (accept1(parser, PM_TOKEN_LABEL_END) && !state_is_arg_labeled) {
13722
- node = (pm_node_t *) pm_symbol_node_create_unescaped(parser, &opening, &content, &parser->previous, &unescaped);
14229
+ node = (pm_node_t *) pm_symbol_node_create_unescaped(parser, &opening, &content, &parser->previous, &unescaped, parse_symbol_encoding(parser, &unescaped));
13723
14230
  } else if (match1(parser, PM_TOKEN_EOF)) {
13724
14231
  pm_parser_err_token(parser, &opening, PM_ERR_STRING_LITERAL_TERM);
13725
14232
  node = (pm_node_t *) pm_string_node_create_unescaped(parser, &opening, &content, &parser->current, &unescaped);
@@ -13741,7 +14248,7 @@ parse_strings(pm_parser_t *parser, pm_node_t *current) {
13741
14248
  pm_node_flag_set(node, parse_unescaped_encoding(parser));
13742
14249
  expect1(parser, PM_TOKEN_STRING_END, PM_ERR_STRING_LITERAL_TERM);
13743
14250
  } else if (accept1(parser, PM_TOKEN_LABEL_END)) {
13744
- node = (pm_node_t *) pm_symbol_node_create_unescaped(parser, &opening, &content, &parser->previous, &unescaped);
14251
+ node = (pm_node_t *) pm_symbol_node_create_unescaped(parser, &opening, &content, &parser->previous, &unescaped, parse_symbol_encoding(parser, &unescaped));
13745
14252
  } else {
13746
14253
  // If we get here, then we have interpolation so we'll need
13747
14254
  // to create a string or symbol node with interpolation.
@@ -13834,7 +14341,7 @@ parse_strings(pm_parser_t *parser, pm_node_t *current) {
13834
14341
  * Parse an expression that begins with the previous node that we just lexed.
13835
14342
  */
13836
14343
  static inline pm_node_t *
13837
- parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, bool accepts_command_call) {
14344
+ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, bool accepts_command_call, pm_diagnostic_id_t diag_id) {
13838
14345
  switch (parser->current.type) {
13839
14346
  case PM_TOKEN_BRACKET_LEFT_ARRAY: {
13840
14347
  parser_lex(parser);
@@ -13866,9 +14373,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
13866
14373
  pm_node_t *expression = NULL;
13867
14374
 
13868
14375
  if (match3(parser, PM_TOKEN_BRACKET_RIGHT, PM_TOKEN_COMMA, PM_TOKEN_EOF)) {
13869
- if (pm_parser_local_depth(parser, &parser->previous) == -1) {
13870
- pm_parser_err_token(parser, &operator, PM_ERR_ARGUMENT_NO_FORWARDING_STAR);
13871
- }
14376
+ pm_parser_scope_forwarding_positionals_check(parser, &operator);
13872
14377
  } else {
13873
14378
  expression = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, false, PM_ERR_ARRAY_EXPRESSION_AFTER_STAR);
13874
14379
  }
@@ -14113,7 +14618,8 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
14113
14618
  if (
14114
14619
  match1(parser, PM_TOKEN_PARENTHESIS_LEFT) ||
14115
14620
  (accepts_command_call && (token_begins_expression_p(parser->current.type) || match3(parser, PM_TOKEN_UAMPERSAND, PM_TOKEN_USTAR, PM_TOKEN_USTAR_STAR))) ||
14116
- (pm_accepts_block_stack_p(parser) && match2(parser, PM_TOKEN_KEYWORD_DO, PM_TOKEN_BRACE_LEFT))
14621
+ (pm_accepts_block_stack_p(parser) && match1(parser, PM_TOKEN_KEYWORD_DO)) ||
14622
+ match1(parser, PM_TOKEN_BRACE_LEFT)
14117
14623
  ) {
14118
14624
  pm_arguments_t arguments = { 0 };
14119
14625
  parse_arguments_list(parser, &arguments, true, accepts_command_call);
@@ -14237,7 +14743,8 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
14237
14743
  // a block, so we need to check for that here.
14238
14744
  if (
14239
14745
  (accepts_command_call && (token_begins_expression_p(parser->current.type) || match3(parser, PM_TOKEN_UAMPERSAND, PM_TOKEN_USTAR, PM_TOKEN_USTAR_STAR))) ||
14240
- (pm_accepts_block_stack_p(parser) && match2(parser, PM_TOKEN_KEYWORD_DO, PM_TOKEN_BRACE_LEFT))
14746
+ (pm_accepts_block_stack_p(parser) && match1(parser, PM_TOKEN_KEYWORD_DO)) ||
14747
+ match1(parser, PM_TOKEN_BRACE_LEFT)
14241
14748
  ) {
14242
14749
  pm_arguments_t arguments = { 0 };
14243
14750
  parse_arguments_list(parser, &arguments, true, accepts_command_call);
@@ -14250,6 +14757,31 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
14250
14757
 
14251
14758
  if ((binding_power == PM_BINDING_POWER_STATEMENT) && match1(parser, PM_TOKEN_COMMA)) {
14252
14759
  node = parse_targets_validate(parser, node, PM_BINDING_POWER_INDEX);
14760
+ } else {
14761
+ // Check if `it` is not going to be assigned.
14762
+ switch (parser->current.type) {
14763
+ case PM_TOKEN_AMPERSAND_AMPERSAND_EQUAL:
14764
+ case PM_TOKEN_AMPERSAND_EQUAL:
14765
+ case PM_TOKEN_CARET_EQUAL:
14766
+ case PM_TOKEN_EQUAL:
14767
+ case PM_TOKEN_GREATER_GREATER_EQUAL:
14768
+ case PM_TOKEN_LESS_LESS_EQUAL:
14769
+ case PM_TOKEN_MINUS_EQUAL:
14770
+ case PM_TOKEN_PARENTHESIS_RIGHT:
14771
+ case PM_TOKEN_PERCENT_EQUAL:
14772
+ case PM_TOKEN_PIPE_EQUAL:
14773
+ case PM_TOKEN_PIPE_PIPE_EQUAL:
14774
+ case PM_TOKEN_PLUS_EQUAL:
14775
+ case PM_TOKEN_SLASH_EQUAL:
14776
+ case PM_TOKEN_STAR_EQUAL:
14777
+ case PM_TOKEN_STAR_STAR_EQUAL:
14778
+ break;
14779
+ default:
14780
+ // Once we know it's neither a method call nor an
14781
+ // assignment, we can finally create `it` default
14782
+ // parameter.
14783
+ node = pm_node_check_it(parser, node);
14784
+ }
14253
14785
  }
14254
14786
 
14255
14787
  return node;
@@ -14286,6 +14818,9 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
14286
14818
  // If we get here, then we tried to find something in the
14287
14819
  // heredoc but couldn't actually parse anything, so we'll just
14288
14820
  // return a missing node.
14821
+ //
14822
+ // parse_string_part handles its own errors, so there is no need
14823
+ // for us to add one here.
14289
14824
  node = (pm_node_t *) pm_missing_node_create(parser, parser->previous.start, parser->previous.end);
14290
14825
  } else if (PM_NODE_TYPE_P(part, PM_STRING_NODE) && match2(parser, PM_TOKEN_HEREDOC_END, PM_TOKEN_EOF)) {
14291
14826
  // If we get here, then the part that we parsed was plain string
@@ -14549,11 +15084,11 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
14549
15084
  // for guard clauses in the form of `if` or `unless` statements.
14550
15085
  if (accept1(parser, PM_TOKEN_KEYWORD_IF_MODIFIER)) {
14551
15086
  pm_token_t keyword = parser->previous;
14552
- pm_node_t *predicate = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, true, PM_ERR_CONDITIONAL_IF_PREDICATE);
15087
+ pm_node_t *predicate = parse_value_expression(parser, PM_BINDING_POWER_COMPOSITION, true, PM_ERR_CONDITIONAL_IF_PREDICATE);
14553
15088
  pattern = (pm_node_t *) pm_if_node_modifier_create(parser, pattern, &keyword, predicate);
14554
15089
  } else if (accept1(parser, PM_TOKEN_KEYWORD_UNLESS_MODIFIER)) {
14555
15090
  pm_token_t keyword = parser->previous;
14556
- pm_node_t *predicate = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, true, PM_ERR_CONDITIONAL_UNLESS_PREDICATE);
15091
+ pm_node_t *predicate = parse_value_expression(parser, PM_BINDING_POWER_COMPOSITION, true, PM_ERR_CONDITIONAL_UNLESS_PREDICATE);
14557
15092
  pattern = (pm_node_t *) pm_unless_node_modifier_create(parser, pattern, &keyword, predicate);
14558
15093
  }
14559
15094
 
@@ -14742,8 +15277,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
14742
15277
  pm_token_t operator = parser->previous;
14743
15278
  pm_node_t *expression = parse_value_expression(parser, PM_BINDING_POWER_NOT, true, PM_ERR_EXPECT_EXPRESSION_AFTER_LESS_LESS);
14744
15279
 
14745
- pm_constant_id_t old_param_name = parser->current_param_name;
14746
- parser->current_param_name = 0;
15280
+ pm_constant_id_t saved_param_name = pm_parser_current_param_name_unset(parser);
14747
15281
  pm_parser_scope_push(parser, true);
14748
15282
  accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
14749
15283
 
@@ -14760,11 +15294,12 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
14760
15294
  }
14761
15295
 
14762
15296
  expect1(parser, PM_TOKEN_KEYWORD_END, PM_ERR_CLASS_TERM);
14763
-
14764
15297
  pm_constant_id_list_t locals = parser->current_scope->locals;
15298
+
14765
15299
  pm_parser_scope_pop(parser);
14766
- parser->current_param_name = old_param_name;
14767
15300
  pm_do_loop_stack_pop(parser);
15301
+ pm_parser_current_param_name_restore(parser, saved_param_name);
15302
+
14768
15303
  return (pm_node_t *) pm_singleton_class_node_create(parser, &locals, &class_keyword, &operator, expression, statements, &parser->previous);
14769
15304
  }
14770
15305
 
@@ -14790,9 +15325,9 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
14790
15325
  superclass = NULL;
14791
15326
  }
14792
15327
 
14793
- pm_constant_id_t old_param_name = parser->current_param_name;
14794
- parser->current_param_name = 0;
15328
+ pm_constant_id_t saved_param_name = pm_parser_current_param_name_unset(parser);
14795
15329
  pm_parser_scope_push(parser, true);
15330
+
14796
15331
  if (inheritance_operator.type != PM_TOKEN_NOT_PROVIDED) {
14797
15332
  expect2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON, PM_ERR_CLASS_UNEXPECTED_END);
14798
15333
  } else {
@@ -14818,9 +15353,10 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
14818
15353
  }
14819
15354
 
14820
15355
  pm_constant_id_list_t locals = parser->current_scope->locals;
15356
+
14821
15357
  pm_parser_scope_pop(parser);
14822
- parser->current_param_name = old_param_name;
14823
15358
  pm_do_loop_stack_pop(parser);
15359
+ pm_parser_current_param_name_restore(parser, saved_param_name);
14824
15360
 
14825
15361
  if (!PM_NODE_TYPE_P(constant_path, PM_CONSTANT_PATH_NODE) && !(PM_NODE_TYPE_P(constant_path, PM_CONSTANT_READ_NODE))) {
14826
15362
  pm_parser_err_node(parser, constant_path, PM_ERR_CLASS_NAME);
@@ -14835,18 +15371,21 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
14835
15371
  pm_token_t operator = not_provided(parser);
14836
15372
  pm_token_t name = (pm_token_t) { .type = PM_TOKEN_MISSING, .start = def_keyword.end, .end = def_keyword.end };
14837
15373
 
14838
- // This context is necessary for lexing `...` in a bare params correctly.
14839
- // It must be pushed before lexing the first param, so it is here.
15374
+ // This context is necessary for lexing `...` in a bare params
15375
+ // correctly. It must be pushed before lexing the first param, so it
15376
+ // is here.
14840
15377
  context_push(parser, PM_CONTEXT_DEF_PARAMS);
15378
+ pm_constant_id_t saved_param_name;
15379
+
14841
15380
  parser_lex(parser);
14842
- pm_constant_id_t old_param_name = parser->current_param_name;
14843
15381
 
14844
15382
  switch (parser->current.type) {
14845
15383
  case PM_CASE_OPERATOR:
15384
+ saved_param_name = pm_parser_current_param_name_unset(parser);
14846
15385
  pm_parser_scope_push(parser, true);
14847
- parser->current_param_name = 0;
14848
15386
  lex_state_set(parser, PM_LEX_STATE_ENDFN);
14849
15387
  parser_lex(parser);
15388
+
14850
15389
  name = parser->previous;
14851
15390
  break;
14852
15391
  case PM_TOKEN_IDENTIFIER: {
@@ -14854,18 +15393,20 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
14854
15393
 
14855
15394
  if (match2(parser, PM_TOKEN_DOT, PM_TOKEN_COLON_COLON)) {
14856
15395
  receiver = parse_variable_call(parser);
15396
+ receiver = pm_node_check_it(parser, receiver);
14857
15397
 
15398
+ saved_param_name = pm_parser_current_param_name_unset(parser);
14858
15399
  pm_parser_scope_push(parser, true);
14859
- parser->current_param_name = 0;
14860
15400
  lex_state_set(parser, PM_LEX_STATE_FNAME);
14861
15401
  parser_lex(parser);
14862
15402
 
14863
15403
  operator = parser->previous;
14864
15404
  name = parse_method_definition_name(parser);
14865
15405
  } else {
15406
+ saved_param_name = pm_parser_current_param_name_unset(parser);
14866
15407
  pm_refute_numbered_parameter(parser, parser->previous.start, parser->previous.end);
14867
15408
  pm_parser_scope_push(parser, true);
14868
- parser->current_param_name = 0;
15409
+
14869
15410
  name = parser->previous;
14870
15411
  }
14871
15412
 
@@ -14882,9 +15423,10 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
14882
15423
  case PM_TOKEN_KEYWORD___FILE__:
14883
15424
  case PM_TOKEN_KEYWORD___LINE__:
14884
15425
  case PM_TOKEN_KEYWORD___ENCODING__: {
15426
+ saved_param_name = pm_parser_current_param_name_unset(parser);
14885
15427
  pm_parser_scope_push(parser, true);
14886
- parser->current_param_name = 0;
14887
15428
  parser_lex(parser);
15429
+
14888
15430
  pm_token_t identifier = parser->previous;
14889
15431
 
14890
15432
  if (match2(parser, PM_TOKEN_DOT, PM_TOKEN_COLON_COLON)) {
@@ -14946,6 +15488,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
14946
15488
  pm_token_t lparen = parser->previous;
14947
15489
  pm_node_t *expression = parse_value_expression(parser, PM_BINDING_POWER_STATEMENT, true, PM_ERR_DEF_RECEIVER);
14948
15490
 
15491
+ accept1(parser, PM_TOKEN_NEWLINE);
14949
15492
  expect1(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_ERR_EXPECT_RPAREN);
14950
15493
  pm_token_t rparen = parser->previous;
14951
15494
 
@@ -14955,8 +15498,8 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
14955
15498
  operator = parser->previous;
14956
15499
  receiver = (pm_node_t *) pm_parentheses_node_create(parser, &lparen, expression, &rparen);
14957
15500
 
15501
+ saved_param_name = pm_parser_current_param_name_unset(parser);
14958
15502
  pm_parser_scope_push(parser, true);
14959
- parser->current_param_name = 0;
14960
15503
 
14961
15504
  // To push `PM_CONTEXT_DEF_PARAMS` again is for the same reason as described the above.
14962
15505
  context_push(parser, PM_CONTEXT_DEF_PARAMS);
@@ -14964,8 +15507,9 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
14964
15507
  break;
14965
15508
  }
14966
15509
  default:
15510
+ saved_param_name = pm_parser_current_param_name_unset(parser);
14967
15511
  pm_parser_scope_push(parser, true);
14968
- parser->current_param_name = 0;
15512
+
14969
15513
  name = parse_method_definition_name(parser);
14970
15514
  break;
14971
15515
  }
@@ -15018,8 +15562,6 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
15018
15562
  }
15019
15563
  }
15020
15564
 
15021
- uint32_t locals_body_index = (uint32_t) parser->current_scope->locals.size;
15022
-
15023
15565
  context_pop(parser);
15024
15566
  pm_node_t *statements = NULL;
15025
15567
  pm_token_t equal;
@@ -15080,8 +15622,16 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
15080
15622
  }
15081
15623
 
15082
15624
  pm_constant_id_list_t locals = parser->current_scope->locals;
15083
- parser->current_param_name = old_param_name;
15625
+
15084
15626
  pm_parser_scope_pop(parser);
15627
+ pm_parser_current_param_name_restore(parser, saved_param_name);
15628
+
15629
+ /**
15630
+ * If the final character is @. As is the case when defining
15631
+ * methods to override the unary operators, we should ignore
15632
+ * the @ in the same way we do for symbols.
15633
+ */
15634
+ name.end = parse_operator_symbol_name(&name);
15085
15635
 
15086
15636
  return (pm_node_t *) pm_def_node_create(
15087
15637
  parser,
@@ -15090,7 +15640,6 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
15090
15640
  params,
15091
15641
  statements,
15092
15642
  &locals,
15093
- locals_body_index,
15094
15643
  &def_keyword,
15095
15644
  &operator,
15096
15645
  &lparen,
@@ -15309,9 +15858,9 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
15309
15858
  pm_parser_err_token(parser, &name, PM_ERR_MODULE_NAME);
15310
15859
  }
15311
15860
 
15312
- pm_constant_id_t old_param_name = parser->current_param_name;
15313
- parser->current_param_name = 0;
15861
+ pm_constant_id_t saved_param_name = pm_parser_current_param_name_unset(parser);
15314
15862
  pm_parser_scope_push(parser, true);
15863
+
15315
15864
  accept2(parser, PM_TOKEN_SEMICOLON, PM_TOKEN_NEWLINE);
15316
15865
  pm_node_t *statements = NULL;
15317
15866
 
@@ -15328,7 +15877,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
15328
15877
 
15329
15878
  pm_constant_id_list_t locals = parser->current_scope->locals;
15330
15879
  pm_parser_scope_pop(parser);
15331
- parser->current_param_name = old_param_name;
15880
+ pm_parser_current_param_name_restore(parser, saved_param_name);
15332
15881
 
15333
15882
  expect1(parser, PM_TOKEN_KEYWORD_END, PM_ERR_MODULE_TERM);
15334
15883
 
@@ -15914,6 +16463,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
15914
16463
  // context of a multiple assignment. We enforce that here. We'll
15915
16464
  // still lex past it though and create a missing node place.
15916
16465
  if (binding_power != PM_BINDING_POWER_STATEMENT) {
16466
+ pm_parser_err_previous(parser, diag_id);
15917
16467
  return (pm_node_t *) pm_missing_node_create(parser, parser->previous.start, parser->previous.end);
15918
16468
  }
15919
16469
 
@@ -15995,7 +16545,9 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
15995
16545
  parser_lex(parser);
15996
16546
 
15997
16547
  pm_token_t operator = parser->previous;
16548
+ pm_constant_id_t saved_param_name = pm_parser_current_param_name_unset(parser);
15998
16549
  pm_parser_scope_push(parser, false);
16550
+
15999
16551
  pm_block_parameters_node_t *block_parameters;
16000
16552
 
16001
16553
  switch (parser->current.type) {
@@ -16030,12 +16582,6 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
16030
16582
  }
16031
16583
  }
16032
16584
 
16033
- uint32_t locals_body_index = 0;
16034
-
16035
- if (block_parameters) {
16036
- locals_body_index = (uint32_t) parser->current_scope->locals.size;
16037
- }
16038
-
16039
16585
  pm_token_t opening;
16040
16586
  pm_node_t *body = NULL;
16041
16587
  parser->lambda_enclosure_nesting = previous_lambda_enclosure_nesting;
@@ -16070,13 +16616,15 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
16070
16616
 
16071
16617
  if (parameters == NULL && (maximum > 0)) {
16072
16618
  parameters = (pm_node_t *) pm_numbered_parameters_node_create(parser, &(pm_location_t) { .start = operator.start, .end = parser->previous.end }, maximum);
16073
- locals_body_index = maximum;
16074
16619
  }
16075
16620
 
16076
16621
  pm_constant_id_list_t locals = parser->current_scope->locals;
16622
+
16077
16623
  pm_parser_scope_pop(parser);
16078
16624
  pm_accepts_block_stack_pop(parser);
16079
- return (pm_node_t *) pm_lambda_node_create(parser, &locals, locals_body_index, &operator, &opening, &parser->previous, parameters, body);
16625
+ pm_parser_current_param_name_restore(parser, saved_param_name);
16626
+
16627
+ return (pm_node_t *) pm_lambda_node_create(parser, &locals, &operator, &opening, &parser->previous, parameters, body);
16080
16628
  }
16081
16629
  case PM_TOKEN_UPLUS: {
16082
16630
  parser_lex(parser);
@@ -16095,12 +16643,34 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
16095
16643
 
16096
16644
  return parse_symbol(parser, &lex_mode, PM_LEX_STATE_END);
16097
16645
  }
16098
- default:
16099
- if (context_recoverable(parser, &parser->current)) {
16646
+ default: {
16647
+ pm_context_t recoverable = context_recoverable(parser, &parser->current);
16648
+
16649
+ if (recoverable != PM_CONTEXT_NONE) {
16100
16650
  parser->recovering = true;
16651
+
16652
+ // If the given error is not the generic one, then we'll add it
16653
+ // here because it will provide more context in addition to the
16654
+ // recoverable error that we will also add.
16655
+ if (diag_id != PM_ERR_CANNOT_PARSE_EXPRESSION) {
16656
+ pm_parser_err_previous(parser, diag_id);
16657
+ }
16658
+
16659
+ // If we get here, then we are assuming this token is closing a
16660
+ // parent context, so we'll indicate that to the user so that
16661
+ // they know how we behaved.
16662
+ PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_UNEXPECTED_TOKEN_CLOSE_CONTEXT, pm_token_type_human(parser->current.type), context_human(recoverable));
16663
+ } else if (diag_id == PM_ERR_CANNOT_PARSE_EXPRESSION) {
16664
+ // We're going to make a special case here, because "cannot
16665
+ // parse expression" is pretty generic, and we know here that we
16666
+ // have an unexpected token.
16667
+ PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_UNEXPECTED_TOKEN_IGNORE, pm_token_type_human(parser->current.type));
16668
+ } else {
16669
+ pm_parser_err_previous(parser, diag_id);
16101
16670
  }
16102
16671
 
16103
16672
  return (pm_node_t *) pm_missing_node_create(parser, parser->previous.start, parser->previous.end);
16673
+ }
16104
16674
  }
16105
16675
  }
16106
16676
 
@@ -16412,7 +16982,7 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
16412
16982
  }
16413
16983
 
16414
16984
  // If this node cannot be writable, then we have an error.
16415
- if (pm_call_node_writable_p(cast)) {
16985
+ if (pm_call_node_writable_p(parser, cast)) {
16416
16986
  parse_write_name(parser, &cast->name);
16417
16987
  } else {
16418
16988
  pm_parser_err_node(parser, node, PM_ERR_WRITE_TARGET_UNEXPECTED);
@@ -16523,7 +17093,7 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
16523
17093
  }
16524
17094
 
16525
17095
  // If this node cannot be writable, then we have an error.
16526
- if (pm_call_node_writable_p(cast)) {
17096
+ if (pm_call_node_writable_p(parser, cast)) {
16527
17097
  parse_write_name(parser, &cast->name);
16528
17098
  } else {
16529
17099
  pm_parser_err_node(parser, node, PM_ERR_WRITE_TARGET_UNEXPECTED);
@@ -16644,7 +17214,7 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
16644
17214
  }
16645
17215
 
16646
17216
  // If this node cannot be writable, then we have an error.
16647
- if (pm_call_node_writable_p(cast)) {
17217
+ if (pm_call_node_writable_p(parser, cast)) {
16648
17218
  parse_write_name(parser, &cast->name);
16649
17219
  } else {
16650
17220
  pm_parser_err_node(parser, node, PM_ERR_WRITE_TARGET_UNEXPECTED);
@@ -17063,15 +17633,12 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
17063
17633
  */
17064
17634
  static pm_node_t *
17065
17635
  parse_expression(pm_parser_t *parser, pm_binding_power_t binding_power, bool accepts_command_call, pm_diagnostic_id_t diag_id) {
17066
- pm_token_t recovery = parser->previous;
17067
- pm_node_t *node = parse_expression_prefix(parser, binding_power, accepts_command_call);
17636
+ pm_node_t *node = parse_expression_prefix(parser, binding_power, accepts_command_call, diag_id);
17068
17637
 
17069
17638
  switch (PM_NODE_TYPE(node)) {
17070
17639
  case PM_MISSING_NODE:
17071
17640
  // If we found a syntax error, then the type of node returned by
17072
- // parse_expression_prefix is going to be a missing node. In that
17073
- // case we need to add the error message to the parser's error list.
17074
- pm_parser_err(parser, recovery.end, recovery.end, diag_id);
17641
+ // parse_expression_prefix is going to be a missing node.
17075
17642
  return node;
17076
17643
  case PM_PRE_EXECUTION_NODE:
17077
17644
  case PM_POST_EXECUTION_NODE:
@@ -17080,7 +17647,7 @@ parse_expression(pm_parser_t *parser, pm_binding_power_t binding_power, bool acc
17080
17647
  case PM_UNDEF_NODE:
17081
17648
  // These expressions are statements, and cannot be followed by
17082
17649
  // operators (except modifiers).
17083
- if (pm_binding_powers[parser->current.type].left > PM_BINDING_POWER_MODIFIER_RESCUE) {
17650
+ if (pm_binding_powers[parser->current.type].left > PM_BINDING_POWER_MODIFIER) {
17084
17651
  return node;
17085
17652
  }
17086
17653
  break;
@@ -17175,9 +17742,14 @@ parse_expression(pm_parser_t *parser, pm_binding_power_t binding_power, bool acc
17175
17742
 
17176
17743
  static pm_node_t *
17177
17744
  parse_program(pm_parser_t *parser) {
17178
- pm_parser_scope_push(parser, !parser->current_scope);
17179
- parser_lex(parser);
17745
+ // If the current scope is NULL, then we want to push a new top level scope.
17746
+ // The current scope could exist in the event that we are parsing an eval
17747
+ // and the user has passed into scopes that already exist.
17748
+ if (parser->current_scope == NULL) {
17749
+ pm_parser_scope_push(parser, true);
17750
+ }
17180
17751
 
17752
+ parser_lex(parser);
17181
17753
  pm_statements_node_t *statements = parse_statements(parser, PM_CONTEXT_MAIN);
17182
17754
  if (!statements) {
17183
17755
  statements = pm_statements_node_create(parser);
@@ -17234,7 +17806,7 @@ pm_parser_init(pm_parser_t *parser, const uint8_t *source, size_t size, const pm
17234
17806
  .encoding_changed_callback = NULL,
17235
17807
  .encoding_comment_start = source,
17236
17808
  .lex_callback = NULL,
17237
- .filepath_string = { 0 },
17809
+ .filepath = { 0 },
17238
17810
  .constant_pool = { 0 },
17239
17811
  .newline_list = { 0 },
17240
17812
  .integer_base = 0,
@@ -17248,8 +17820,7 @@ pm_parser_init(pm_parser_t *parser, const uint8_t *source, size_t size, const pm
17248
17820
  .in_keyword_arg = false,
17249
17821
  .current_param_name = 0,
17250
17822
  .semantic_token_seen = false,
17251
- .frozen_string_literal = false,
17252
- .suppress_warnings = false
17823
+ .frozen_string_literal = false
17253
17824
  };
17254
17825
 
17255
17826
  // Initialize the constant pool. We're going to completely guess as to the
@@ -17278,7 +17849,7 @@ pm_parser_init(pm_parser_t *parser, const uint8_t *source, size_t size, const pm
17278
17849
  // If options were provided to this parse, establish them here.
17279
17850
  if (options != NULL) {
17280
17851
  // filepath option
17281
- parser->filepath_string = options->filepath;
17852
+ parser->filepath = options->filepath;
17282
17853
 
17283
17854
  // line option
17284
17855
  parser->start_line = options->line;
@@ -17295,10 +17866,8 @@ pm_parser_init(pm_parser_t *parser, const uint8_t *source, size_t size, const pm
17295
17866
  parser->frozen_string_literal = true;
17296
17867
  }
17297
17868
 
17298
- // suppress_warnings option
17299
- if (options->suppress_warnings) {
17300
- parser->suppress_warnings = true;
17301
- }
17869
+ // version option
17870
+ parser->version = options->version;
17302
17871
 
17303
17872
  // scopes option
17304
17873
  for (size_t scope_index = 0; scope_index < options->scopes_count; scope_index++) {
@@ -17382,7 +17951,7 @@ pm_magic_comment_list_free(pm_list_t *list) {
17382
17951
  */
17383
17952
  PRISM_EXPORTED_FUNCTION void
17384
17953
  pm_parser_free(pm_parser_t *parser) {
17385
- pm_string_free(&parser->filepath_string);
17954
+ pm_string_free(&parser->filepath);
17386
17955
  pm_diagnostic_list_free(&parser->error_list);
17387
17956
  pm_diagnostic_list_free(&parser->warning_list);
17388
17957
  pm_comment_list_free(&parser->comment_list);
@@ -17484,3 +18053,299 @@ pm_serialize_parse_comments(pm_buffer_t *buffer, const uint8_t *source, size_t s
17484
18053
  #undef PM_LOCATION_NODE_VALUE
17485
18054
  #undef PM_LOCATION_NULL_VALUE
17486
18055
  #undef PM_LOCATION_TOKEN_VALUE
18056
+
18057
+ /** An error that is going to be formatted into the output. */
18058
+ typedef struct {
18059
+ /** A pointer to the diagnostic that was generated during parsing. */
18060
+ pm_diagnostic_t *error;
18061
+
18062
+ /** The start line of the diagnostic message. */
18063
+ uint32_t line;
18064
+
18065
+ /** The column start of the diagnostic message. */
18066
+ uint32_t column_start;
18067
+
18068
+ /** The column end of the diagnostic message. */
18069
+ uint32_t column_end;
18070
+ } pm_error_t;
18071
+
18072
+ /** The format that will be used to format the errors into the output. */
18073
+ typedef struct {
18074
+ /** The prefix that will be used for line numbers. */
18075
+ const char *number_prefix;
18076
+
18077
+ /** The prefix that will be used for blank lines. */
18078
+ const char *blank_prefix;
18079
+
18080
+ /** The divider that will be used between sections of source code. */
18081
+ const char *divider;
18082
+
18083
+ /** The length of the blank prefix. */
18084
+ size_t blank_prefix_length;
18085
+
18086
+ /** The length of the divider. */
18087
+ size_t divider_length;
18088
+ } pm_error_format_t;
18089
+
18090
+ #define PM_COLOR_GRAY "\033[38;5;102m"
18091
+ #define PM_COLOR_RED "\033[1;31m"
18092
+ #define PM_COLOR_RESET "\033[0m"
18093
+
18094
+ static inline pm_error_t *
18095
+ pm_parser_errors_format_sort(const pm_list_t *error_list, const pm_newline_list_t *newline_list) {
18096
+ pm_error_t *errors = calloc(error_list->size, sizeof(pm_error_t));
18097
+
18098
+ for (pm_diagnostic_t *error = (pm_diagnostic_t *) error_list->head; error != NULL; error = (pm_diagnostic_t *) error->node.next) {
18099
+ pm_line_column_t start = pm_newline_list_line_column(newline_list, error->location.start);
18100
+ pm_line_column_t end = pm_newline_list_line_column(newline_list, error->location.end);
18101
+
18102
+ // We're going to insert this error into the array in sorted order. We
18103
+ // do this by finding the first error that has a line number greater
18104
+ // than the current error and then inserting the current error before
18105
+ // that one.
18106
+ size_t index = 0;
18107
+ while (
18108
+ (index < error_list->size) &&
18109
+ (errors[index].error != NULL) &&
18110
+ (
18111
+ (errors[index].line < ((uint32_t) start.line)) ||
18112
+ (errors[index].line == ((uint32_t) start.line) && errors[index].column_start < ((uint32_t) start.column))
18113
+ )
18114
+ ) index++;
18115
+
18116
+ // Now we're going to shift all of the errors after this one down one
18117
+ // index to make room for the new error.
18118
+ if (index + 1 < error_list->size) {
18119
+ memmove(&errors[index + 1], &errors[index], sizeof(pm_error_t) * (error_list->size - index - 1));
18120
+ }
18121
+
18122
+ // Finally, we'll insert the error into the array.
18123
+ uint32_t column_end;
18124
+ if (start.line == end.line) {
18125
+ column_end = (uint32_t) end.column;
18126
+ } else {
18127
+ column_end = (uint32_t) (newline_list->offsets[start.line] - newline_list->offsets[start.line - 1] - 1);
18128
+ }
18129
+
18130
+ // Ensure we have at least one column of error.
18131
+ if (((uint32_t) start.column) == column_end) column_end++;
18132
+
18133
+ errors[index] = (pm_error_t) {
18134
+ .error = error,
18135
+ .line = (uint32_t) start.line,
18136
+ .column_start = (uint32_t) start.column,
18137
+ .column_end = column_end
18138
+ };
18139
+ }
18140
+
18141
+ return errors;
18142
+ }
18143
+
18144
+ static inline void
18145
+ pm_parser_errors_format_line(const pm_parser_t *parser, const pm_newline_list_t *newline_list, const char *number_prefix, size_t line, pm_buffer_t *buffer) {
18146
+ const uint8_t *start = &parser->start[newline_list->offsets[line - 1]];
18147
+ const uint8_t *end;
18148
+
18149
+ if (line >= newline_list->size) {
18150
+ end = parser->end;
18151
+ } else {
18152
+ end = &parser->start[newline_list->offsets[line]];
18153
+ }
18154
+
18155
+ pm_buffer_append_format(buffer, number_prefix, (uint32_t) line);
18156
+ pm_buffer_append_string(buffer, (const char *) start, (size_t) (end - start));
18157
+
18158
+ if (end == parser->end && end[-1] != '\n') {
18159
+ pm_buffer_append_string(buffer, "\n", 1);
18160
+ }
18161
+ }
18162
+
18163
+ /**
18164
+ * Format the errors on the parser into the given buffer.
18165
+ */
18166
+ PRISM_EXPORTED_FUNCTION void
18167
+ pm_parser_errors_format(const pm_parser_t *parser, pm_buffer_t *buffer, bool colorize) {
18168
+ const pm_list_t *error_list = &parser->error_list;
18169
+ assert(error_list->size != 0);
18170
+
18171
+ // First, we're going to sort all of the errors by line number using an
18172
+ // insertion sort into a newly allocated array.
18173
+ const pm_newline_list_t *newline_list = &parser->newline_list;
18174
+ pm_error_t *errors = pm_parser_errors_format_sort(error_list, newline_list);
18175
+
18176
+ // Now we're going to determine how we're going to format line numbers and
18177
+ // blank lines based on the maximum number of digits in the line numbers
18178
+ // that are going to be displayed.
18179
+ pm_error_format_t error_format;
18180
+ size_t max_line_number = errors[error_list->size - 1].line;
18181
+
18182
+ if (max_line_number < 10) {
18183
+ if (colorize) {
18184
+ error_format = (pm_error_format_t) {
18185
+ .number_prefix = PM_COLOR_GRAY "%1" PRIu32 " | " PM_COLOR_RESET,
18186
+ .blank_prefix = PM_COLOR_GRAY " | " PM_COLOR_RESET,
18187
+ .divider = PM_COLOR_GRAY " ~~~~~" PM_COLOR_RESET "\n"
18188
+ };
18189
+ } else {
18190
+ error_format = (pm_error_format_t) {
18191
+ .number_prefix = "%1" PRIu32 " | ",
18192
+ .blank_prefix = " | ",
18193
+ .divider = " ~~~~~\n"
18194
+ };
18195
+ }
18196
+ } else if (max_line_number < 100) {
18197
+ if (colorize) {
18198
+ error_format = (pm_error_format_t) {
18199
+ .number_prefix = PM_COLOR_GRAY "%2" PRIu32 " | " PM_COLOR_RESET,
18200
+ .blank_prefix = PM_COLOR_GRAY " | " PM_COLOR_RESET,
18201
+ .divider = PM_COLOR_GRAY " ~~~~~~" PM_COLOR_RESET "\n"
18202
+ };
18203
+ } else {
18204
+ error_format = (pm_error_format_t) {
18205
+ .number_prefix = "%2" PRIu32 " | ",
18206
+ .blank_prefix = " | ",
18207
+ .divider = " ~~~~~~\n"
18208
+ };
18209
+ }
18210
+ } else if (max_line_number < 1000) {
18211
+ if (colorize) {
18212
+ error_format = (pm_error_format_t) {
18213
+ .number_prefix = PM_COLOR_GRAY "%3" PRIu32 " | " PM_COLOR_RESET,
18214
+ .blank_prefix = PM_COLOR_GRAY " | " PM_COLOR_RESET,
18215
+ .divider = PM_COLOR_GRAY " ~~~~~~~" PM_COLOR_RESET "\n"
18216
+ };
18217
+ } else {
18218
+ error_format = (pm_error_format_t) {
18219
+ .number_prefix = "%3" PRIu32 " | ",
18220
+ .blank_prefix = " | ",
18221
+ .divider = " ~~~~~~~\n"
18222
+ };
18223
+ }
18224
+ } else if (max_line_number < 10000) {
18225
+ if (colorize) {
18226
+ error_format = (pm_error_format_t) {
18227
+ .number_prefix = PM_COLOR_GRAY "%4" PRIu32 " | " PM_COLOR_RESET,
18228
+ .blank_prefix = PM_COLOR_GRAY " | " PM_COLOR_RESET,
18229
+ .divider = PM_COLOR_GRAY " ~~~~~~~~" PM_COLOR_RESET "\n"
18230
+ };
18231
+ } else {
18232
+ error_format = (pm_error_format_t) {
18233
+ .number_prefix = "%4" PRIu32 " | ",
18234
+ .blank_prefix = " | ",
18235
+ .divider = " ~~~~~~~~\n"
18236
+ };
18237
+ }
18238
+ } else {
18239
+ if (colorize) {
18240
+ error_format = (pm_error_format_t) {
18241
+ .number_prefix = PM_COLOR_GRAY "%5" PRIu32 " | " PM_COLOR_RESET,
18242
+ .blank_prefix = PM_COLOR_GRAY " | " PM_COLOR_RESET,
18243
+ .divider = PM_COLOR_GRAY " ~~~~~~~~" PM_COLOR_RESET "\n"
18244
+ };
18245
+ } else {
18246
+ error_format = (pm_error_format_t) {
18247
+ .number_prefix = "%5" PRIu32 " | ",
18248
+ .blank_prefix = " | ",
18249
+ .divider = " ~~~~~~~~\n"
18250
+ };
18251
+ }
18252
+ }
18253
+
18254
+ error_format.blank_prefix_length = strlen(error_format.blank_prefix);
18255
+ error_format.divider_length = strlen(error_format.divider);
18256
+
18257
+ // Now we're going to iterate through every error in our error list and
18258
+ // display it. While we're iterating, we will display some padding lines of
18259
+ // the source before the error to give some context. We'll be careful not to
18260
+ // display the same line twice in case the errors are close enough in the
18261
+ // source.
18262
+ uint32_t last_line = 0;
18263
+ const pm_encoding_t *encoding = parser->encoding;
18264
+
18265
+ for (size_t index = 0; index < error_list->size; index++) {
18266
+ pm_error_t *error = &errors[index];
18267
+
18268
+ // Here we determine how many lines of padding of the source to display,
18269
+ // based on the difference from the last line that was displayed.
18270
+ if (error->line - last_line > 1) {
18271
+ if (error->line - last_line > 2) {
18272
+ if ((index != 0) && (error->line - last_line > 3)) {
18273
+ pm_buffer_append_string(buffer, error_format.divider, error_format.divider_length);
18274
+ }
18275
+
18276
+ pm_buffer_append_string(buffer, " ", 2);
18277
+ pm_parser_errors_format_line(parser, newline_list, error_format.number_prefix, error->line - 2, buffer);
18278
+ }
18279
+
18280
+ pm_buffer_append_string(buffer, " ", 2);
18281
+ pm_parser_errors_format_line(parser, newline_list, error_format.number_prefix, error->line - 1, buffer);
18282
+ }
18283
+
18284
+ // If this is the first error or we're on a new line, then we'll display
18285
+ // the line that has the error in it.
18286
+ if ((index == 0) || (error->line != last_line)) {
18287
+ if (colorize) {
18288
+ pm_buffer_append_string(buffer, PM_COLOR_RED "> " PM_COLOR_RESET, 13);
18289
+ } else {
18290
+ pm_buffer_append_string(buffer, "> ", 2);
18291
+ }
18292
+ pm_parser_errors_format_line(parser, newline_list, error_format.number_prefix, error->line, buffer);
18293
+ }
18294
+
18295
+ // Now we'll display the actual error message. We'll do this by first
18296
+ // putting the prefix to the line, then a bunch of blank spaces
18297
+ // depending on the column, then as many carets as we need to display
18298
+ // the width of the error, then the error message itself.
18299
+ //
18300
+ // Note that this doesn't take into account the width of the actual
18301
+ // character when displayed in the terminal. For some east-asian
18302
+ // languages or emoji, this means it can be thrown off pretty badly. We
18303
+ // will need to solve this eventually.
18304
+ pm_buffer_append_string(buffer, " ", 2);
18305
+ pm_buffer_append_string(buffer, error_format.blank_prefix, error_format.blank_prefix_length);
18306
+
18307
+ size_t column = 0;
18308
+ const uint8_t *start = &parser->start[newline_list->offsets[error->line - 1]];
18309
+
18310
+ while (column < error->column_end) {
18311
+ if (column < error->column_start) {
18312
+ pm_buffer_append_byte(buffer, ' ');
18313
+ } else if (colorize) {
18314
+ pm_buffer_append_string(buffer, PM_COLOR_RED "^" PM_COLOR_RESET, 12);
18315
+ } else {
18316
+ pm_buffer_append_byte(buffer, '^');
18317
+ }
18318
+
18319
+ size_t char_width = encoding->char_width(start + column, parser->end - (start + column));
18320
+ column += (char_width == 0 ? 1 : char_width);
18321
+ }
18322
+
18323
+ pm_buffer_append_byte(buffer, ' ');
18324
+
18325
+ const char *message = error->error->message;
18326
+ pm_buffer_append_string(buffer, message, strlen(message));
18327
+ pm_buffer_append_byte(buffer, '\n');
18328
+
18329
+ // Here we determine how many lines of padding to display after the
18330
+ // error, depending on where the next error is in source.
18331
+ last_line = error->line;
18332
+ size_t next_line = (index == error_list->size - 1) ? newline_list->size : errors[index + 1].line;
18333
+
18334
+ if (next_line - last_line > 1) {
18335
+ pm_buffer_append_string(buffer, " ", 2);
18336
+ pm_parser_errors_format_line(parser, newline_list, error_format.number_prefix, ++last_line, buffer);
18337
+ }
18338
+
18339
+ if (next_line - last_line > 1) {
18340
+ pm_buffer_append_string(buffer, " ", 2);
18341
+ pm_parser_errors_format_line(parser, newline_list, error_format.number_prefix, ++last_line, buffer);
18342
+ }
18343
+ }
18344
+
18345
+ // Finally, we'll free the array of errors that we allocated.
18346
+ free(errors);
18347
+ }
18348
+
18349
+ #undef PM_COLOR_GRAY
18350
+ #undef PM_COLOR_RED
18351
+ #undef PM_COLOR_RESET