prism 0.27.0 → 0.28.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (58) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +26 -1
  3. data/config.yml +39 -27
  4. data/docs/configuration.md +1 -0
  5. data/ext/prism/api_node.c +814 -807
  6. data/ext/prism/extension.c +5 -3
  7. data/ext/prism/extension.h +1 -1
  8. data/include/prism/ast.h +38 -16
  9. data/include/prism/diagnostic.h +12 -5
  10. data/include/prism/options.h +2 -2
  11. data/include/prism/parser.h +10 -0
  12. data/include/prism/static_literals.h +8 -6
  13. data/include/prism/version.h +2 -2
  14. data/lib/prism/dot_visitor.rb +22 -6
  15. data/lib/prism/dsl.rb +8 -8
  16. data/lib/prism/ffi.rb +3 -3
  17. data/lib/prism/inspect_visitor.rb +2156 -0
  18. data/lib/prism/lex_compat.rb +1 -1
  19. data/lib/prism/mutation_compiler.rb +2 -2
  20. data/lib/prism/node.rb +589 -1715
  21. data/lib/prism/node_ext.rb +34 -5
  22. data/lib/prism/parse_result.rb +78 -0
  23. data/lib/prism/pattern.rb +12 -6
  24. data/lib/prism/polyfill/byteindex.rb +13 -0
  25. data/lib/prism/polyfill/unpack1.rb +14 -0
  26. data/lib/prism/reflection.rb +13 -13
  27. data/lib/prism/serialize.rb +21 -14
  28. data/lib/prism/translation/parser/compiler.rb +2 -2
  29. data/lib/prism/translation/parser.rb +6 -6
  30. data/lib/prism/translation/ripper.rb +13 -9
  31. data/lib/prism/translation/ruby_parser.rb +4 -4
  32. data/lib/prism.rb +2 -1
  33. data/prism.gemspec +36 -38
  34. data/rbi/prism/compiler.rbi +3 -5
  35. data/rbi/prism/inspect_visitor.rbi +12 -0
  36. data/rbi/prism/node.rbi +354 -319
  37. data/rbi/prism/parse_result.rbi +23 -0
  38. data/rbi/prism/translation/ripper.rbi +1 -11
  39. data/sig/prism/dsl.rbs +3 -3
  40. data/sig/prism/inspect_visitor.rbs +22 -0
  41. data/sig/prism/node.rbs +64 -47
  42. data/sig/prism/parse_result.rbs +12 -0
  43. data/src/diagnostic.c +38 -24
  44. data/src/node.c +41 -16
  45. data/src/options.c +2 -2
  46. data/src/prettyprint.c +61 -18
  47. data/src/prism.c +607 -185
  48. data/src/serialize.c +5 -2
  49. data/src/static_literals.c +120 -34
  50. data/src/token_type.c +4 -4
  51. metadata +7 -9
  52. data/lib/prism/node_inspector.rb +0 -68
  53. data/lib/prism/polyfill/string.rb +0 -12
  54. data/rbi/prism/desugar_compiler.rbi +0 -5
  55. data/rbi/prism/mutation_compiler.rbi +0 -5
  56. data/rbi/prism/translation/parser/compiler.rbi +0 -13
  57. data/rbi/prism/translation/ripper/ripper_compiler.rbi +0 -5
  58. data/rbi/prism/translation/ruby_parser.rbi +0 -11
data/src/prism.c CHANGED
@@ -672,6 +672,26 @@ pm_parser_warn_node(pm_parser_t *parser, const pm_node_t *node, pm_diagnostic_id
672
672
  #define PM_PARSER_WARN_NODE_FORMAT(parser, node, diag_id, ...) \
673
673
  PM_PARSER_WARN_FORMAT(parser, (node)->location.start, (node)->location.end, diag_id, __VA_ARGS__)
674
674
 
675
+ /**
676
+ * Add an error for an expected heredoc terminator. This is a special function
677
+ * only because it grabs its location off of a lex mode instead of a node or a
678
+ * token.
679
+ */
680
+ static void
681
+ pm_parser_err_heredoc_term(pm_parser_t *parser, pm_lex_mode_t *lex_mode) {
682
+ const uint8_t *ident_start = lex_mode->as.heredoc.ident_start;
683
+ size_t ident_length = lex_mode->as.heredoc.ident_length;
684
+
685
+ PM_PARSER_ERR_FORMAT(
686
+ parser,
687
+ ident_start,
688
+ ident_start + ident_length,
689
+ PM_ERR_HEREDOC_TERM,
690
+ (int) ident_length,
691
+ (const char *) ident_start
692
+ );
693
+ }
694
+
675
695
  /******************************************************************************/
676
696
  /* Scope-related functions */
677
697
  /******************************************************************************/
@@ -1405,7 +1425,7 @@ pm_conditional_predicate_warn_write_literal_p(const pm_node_t *node) {
1405
1425
  static inline void
1406
1426
  pm_conditional_predicate_warn_write_literal(pm_parser_t *parser, const pm_node_t *node) {
1407
1427
  if (pm_conditional_predicate_warn_write_literal_p(node)) {
1408
- pm_parser_warn_node(parser, node, parser->version == PM_OPTIONS_VERSION_CRUBY_3_3_0 ? PM_WARN_EQUAL_IN_CONDITIONAL_3_3_0 : PM_WARN_EQUAL_IN_CONDITIONAL);
1428
+ pm_parser_warn_node(parser, node, parser->version == PM_OPTIONS_VERSION_CRUBY_3_3 ? PM_WARN_EQUAL_IN_CONDITIONAL_3_3 : PM_WARN_EQUAL_IN_CONDITIONAL);
1409
1429
  }
1410
1430
  }
1411
1431
 
@@ -2923,6 +2943,29 @@ pm_call_and_write_node_create(pm_parser_t *parser, pm_call_node_t *target, const
2923
2943
  return node;
2924
2944
  }
2925
2945
 
2946
+ /**
2947
+ * Validate that index expressions do not have keywords or blocks if we are
2948
+ * parsing as Ruby 3.4+.
2949
+ */
2950
+ static void
2951
+ pm_index_arguments_check(pm_parser_t *parser, const pm_arguments_node_t *arguments, const pm_node_t *block) {
2952
+ if (parser->version != PM_OPTIONS_VERSION_CRUBY_3_3) {
2953
+ if (arguments != NULL && PM_NODE_FLAG_P(arguments, PM_ARGUMENTS_NODE_FLAGS_CONTAINS_KEYWORDS)) {
2954
+ pm_node_t *node;
2955
+ PM_NODE_LIST_FOREACH(&arguments->arguments, index, node) {
2956
+ if (PM_NODE_TYPE_P(node, PM_KEYWORD_HASH_NODE)) {
2957
+ pm_parser_err_node(parser, node, PM_ERR_UNEXPECTED_INDEX_KEYWORDS);
2958
+ break;
2959
+ }
2960
+ }
2961
+ }
2962
+
2963
+ if (block != NULL) {
2964
+ pm_parser_err_node(parser, block, PM_ERR_UNEXPECTED_INDEX_BLOCK);
2965
+ }
2966
+ }
2967
+ }
2968
+
2926
2969
  /**
2927
2970
  * Allocate and initialize a new IndexAndWriteNode node.
2928
2971
  */
@@ -2931,6 +2974,8 @@ pm_index_and_write_node_create(pm_parser_t *parser, pm_call_node_t *target, cons
2931
2974
  assert(operator->type == PM_TOKEN_AMPERSAND_AMPERSAND_EQUAL);
2932
2975
  pm_index_and_write_node_t *node = PM_ALLOC_NODE(parser, pm_index_and_write_node_t);
2933
2976
 
2977
+ pm_index_arguments_check(parser, target->arguments, target->block);
2978
+
2934
2979
  *node = (pm_index_and_write_node_t) {
2935
2980
  {
2936
2981
  .type = PM_INDEX_AND_WRITE_NODE,
@@ -3002,6 +3047,8 @@ static pm_index_operator_write_node_t *
3002
3047
  pm_index_operator_write_node_create(pm_parser_t *parser, pm_call_node_t *target, const pm_token_t *operator, pm_node_t *value) {
3003
3048
  pm_index_operator_write_node_t *node = PM_ALLOC_NODE(parser, pm_index_operator_write_node_t);
3004
3049
 
3050
+ pm_index_arguments_check(parser, target->arguments, target->block);
3051
+
3005
3052
  *node = (pm_index_operator_write_node_t) {
3006
3053
  {
3007
3054
  .type = PM_INDEX_OPERATOR_WRITE_NODE,
@@ -3075,6 +3122,8 @@ pm_index_or_write_node_create(pm_parser_t *parser, pm_call_node_t *target, const
3075
3122
  assert(operator->type == PM_TOKEN_PIPE_PIPE_EQUAL);
3076
3123
  pm_index_or_write_node_t *node = PM_ALLOC_NODE(parser, pm_index_or_write_node_t);
3077
3124
 
3125
+ pm_index_arguments_check(parser, target->arguments, target->block);
3126
+
3078
3127
  *node = (pm_index_or_write_node_t) {
3079
3128
  {
3080
3129
  .type = PM_INDEX_OR_WRITE_NODE,
@@ -3139,6 +3188,8 @@ pm_index_target_node_create(pm_parser_t *parser, pm_call_node_t *target) {
3139
3188
  pm_index_target_node_t *node = PM_ALLOC_NODE(parser, pm_index_target_node_t);
3140
3189
  pm_node_flags_t flags = target->base.flags;
3141
3190
 
3191
+ pm_index_arguments_check(parser, target->arguments, target->block);
3192
+
3142
3193
  *node = (pm_index_target_node_t) {
3143
3194
  {
3144
3195
  .type = PM_INDEX_TARGET_NODE,
@@ -3510,22 +3561,27 @@ pm_constant_path_or_write_node_create(pm_parser_t *parser, pm_constant_path_node
3510
3561
  * Allocate and initialize a new ConstantPathNode node.
3511
3562
  */
3512
3563
  static pm_constant_path_node_t *
3513
- pm_constant_path_node_create(pm_parser_t *parser, pm_node_t *parent, const pm_token_t *delimiter, pm_node_t *child) {
3564
+ pm_constant_path_node_create(pm_parser_t *parser, pm_node_t *parent, const pm_token_t *delimiter, const pm_token_t *name_token) {
3514
3565
  pm_assert_value_expression(parser, parent);
3515
-
3516
3566
  pm_constant_path_node_t *node = PM_ALLOC_NODE(parser, pm_constant_path_node_t);
3517
3567
 
3568
+ pm_constant_id_t name = PM_CONSTANT_ID_UNSET;
3569
+ if (name_token->type == PM_TOKEN_CONSTANT) {
3570
+ name = pm_parser_constant_id_token(parser, name_token);
3571
+ }
3572
+
3518
3573
  *node = (pm_constant_path_node_t) {
3519
3574
  {
3520
3575
  .type = PM_CONSTANT_PATH_NODE,
3521
3576
  .location = {
3522
3577
  .start = parent == NULL ? delimiter->start : parent->location.start,
3523
- .end = child->location.end
3578
+ .end = name_token->end
3524
3579
  },
3525
3580
  },
3526
3581
  .parent = parent,
3527
- .child = child,
3528
- .delimiter_loc = PM_LOCATION_TOKEN_VALUE(delimiter)
3582
+ .name = name,
3583
+ .delimiter_loc = PM_LOCATION_TOKEN_VALUE(delimiter),
3584
+ .name_loc = PM_LOCATION_TOKEN_VALUE(name_token)
3529
3585
  };
3530
3586
 
3531
3587
  return node;
@@ -3716,6 +3772,113 @@ pm_def_node_receiver_check(pm_parser_t *parser, const pm_node_t *node) {
3716
3772
  }
3717
3773
  }
3718
3774
 
3775
+ /**
3776
+ * When a method body is created, we want to check if the last statement is a
3777
+ * return or a statement that houses a return. If it is, then we want to mark
3778
+ * that return as being redundant so that we can compile it differently but also
3779
+ * so that we can indicate that to the user.
3780
+ */
3781
+ static void
3782
+ pm_def_node_body_redundant_return(pm_node_t *node) {
3783
+ switch (PM_NODE_TYPE(node)) {
3784
+ case PM_RETURN_NODE:
3785
+ node->flags |= PM_RETURN_NODE_FLAGS_REDUNDANT;
3786
+ break;
3787
+ case PM_BEGIN_NODE: {
3788
+ pm_begin_node_t *cast = (pm_begin_node_t *) node;
3789
+
3790
+ if (cast->statements != NULL && cast->else_clause == NULL) {
3791
+ pm_def_node_body_redundant_return((pm_node_t *) cast->statements);
3792
+ }
3793
+ break;
3794
+ }
3795
+ case PM_STATEMENTS_NODE: {
3796
+ pm_statements_node_t *cast = (pm_statements_node_t *) node;
3797
+
3798
+ if (cast->body.size > 0) {
3799
+ pm_def_node_body_redundant_return(cast->body.nodes[cast->body.size - 1]);
3800
+ }
3801
+ break;
3802
+ }
3803
+ case PM_IF_NODE: {
3804
+ pm_if_node_t *cast = (pm_if_node_t *) node;
3805
+
3806
+ if (cast->statements != NULL) {
3807
+ pm_def_node_body_redundant_return((pm_node_t *) cast->statements);
3808
+ }
3809
+
3810
+ if (cast->consequent != NULL) {
3811
+ pm_def_node_body_redundant_return(cast->consequent);
3812
+ }
3813
+ break;
3814
+ }
3815
+ case PM_UNLESS_NODE: {
3816
+ pm_unless_node_t *cast = (pm_unless_node_t *) node;
3817
+
3818
+ if (cast->statements != NULL) {
3819
+ pm_def_node_body_redundant_return((pm_node_t *) cast->statements);
3820
+ }
3821
+
3822
+ if (cast->consequent != NULL) {
3823
+ pm_def_node_body_redundant_return((pm_node_t *) cast->consequent);
3824
+ }
3825
+ break;
3826
+ }
3827
+ case PM_ELSE_NODE: {
3828
+ pm_else_node_t *cast = (pm_else_node_t *) node;
3829
+
3830
+ if (cast->statements != NULL) {
3831
+ pm_def_node_body_redundant_return((pm_node_t *) cast->statements);
3832
+ }
3833
+ break;
3834
+ }
3835
+ case PM_CASE_NODE: {
3836
+ pm_case_node_t *cast = (pm_case_node_t *) node;
3837
+ pm_node_t *condition;
3838
+
3839
+ PM_NODE_LIST_FOREACH(&cast->conditions, index, condition) {
3840
+ pm_def_node_body_redundant_return(condition);
3841
+ }
3842
+
3843
+ if (cast->consequent != NULL) {
3844
+ pm_def_node_body_redundant_return((pm_node_t *) cast->consequent);
3845
+ }
3846
+ break;
3847
+ }
3848
+ case PM_WHEN_NODE: {
3849
+ pm_when_node_t *cast = (pm_when_node_t *) node;
3850
+
3851
+ if (cast->statements != NULL) {
3852
+ pm_def_node_body_redundant_return((pm_node_t *) cast->statements);
3853
+ }
3854
+ break;
3855
+ }
3856
+ case PM_CASE_MATCH_NODE: {
3857
+ pm_case_match_node_t *cast = (pm_case_match_node_t *) node;
3858
+ pm_node_t *condition;
3859
+
3860
+ PM_NODE_LIST_FOREACH(&cast->conditions, index, condition) {
3861
+ pm_def_node_body_redundant_return(condition);
3862
+ }
3863
+
3864
+ if (cast->consequent != NULL) {
3865
+ pm_def_node_body_redundant_return((pm_node_t *) cast->consequent);
3866
+ }
3867
+ break;
3868
+ }
3869
+ case PM_IN_NODE: {
3870
+ pm_in_node_t *cast = (pm_in_node_t *) node;
3871
+
3872
+ if (cast->statements != NULL) {
3873
+ pm_def_node_body_redundant_return((pm_node_t *) cast->statements);
3874
+ }
3875
+ break;
3876
+ }
3877
+ default:
3878
+ break;
3879
+ }
3880
+ }
3881
+
3719
3882
  /**
3720
3883
  * Allocate and initialize a new DefNode node.
3721
3884
  */
@@ -3748,6 +3911,10 @@ pm_def_node_create(
3748
3911
  pm_def_node_receiver_check(parser, receiver);
3749
3912
  }
3750
3913
 
3914
+ if (body != NULL) {
3915
+ pm_def_node_body_redundant_return(body);
3916
+ }
3917
+
3751
3918
  *node = (pm_def_node_t) {
3752
3919
  {
3753
3920
  .type = PM_DEF_NODE,
@@ -4922,6 +5089,50 @@ pm_instance_variable_write_node_create(pm_parser_t *parser, pm_instance_variable
4922
5089
  return node;
4923
5090
  }
4924
5091
 
5092
+ /**
5093
+ * Append a part into a list of string parts. Importantly this handles nested
5094
+ * interpolated strings by not necessarily removing the marker for static
5095
+ * literals.
5096
+ */
5097
+ static void
5098
+ pm_interpolated_node_append(pm_node_t *node, pm_node_list_t *parts, pm_node_t *part) {
5099
+ switch (PM_NODE_TYPE(part)) {
5100
+ case PM_STRING_NODE:
5101
+ pm_node_flag_set(part, PM_NODE_FLAG_STATIC_LITERAL | PM_STRING_FLAGS_FROZEN);
5102
+ break;
5103
+ case PM_EMBEDDED_STATEMENTS_NODE: {
5104
+ pm_embedded_statements_node_t *cast = (pm_embedded_statements_node_t *) part;
5105
+ pm_node_t *embedded = (cast->statements != NULL && cast->statements->body.size == 1) ? cast->statements->body.nodes[0] : NULL;
5106
+
5107
+ if (embedded == NULL) {
5108
+ // If there are no statements or more than one statement, then
5109
+ // we lose the static literal flag.
5110
+ pm_node_flag_unset(node, PM_NODE_FLAG_STATIC_LITERAL);
5111
+ } else if (PM_NODE_TYPE_P(embedded, PM_STRING_NODE)) {
5112
+ // If the embedded statement is a string, then we can keep the
5113
+ // static literal flag and mark the string as frozen.
5114
+ pm_node_flag_set(embedded, PM_NODE_FLAG_STATIC_LITERAL | PM_STRING_FLAGS_FROZEN);
5115
+ } else if (PM_NODE_TYPE_P(embedded, PM_INTERPOLATED_STRING_NODE) && PM_NODE_FLAG_P(embedded, PM_NODE_FLAG_STATIC_LITERAL)) {
5116
+ // If the embedded statement is an interpolated string and it's
5117
+ // a static literal, then we can keep the static literal flag.
5118
+ } else {
5119
+ // Otherwise we lose the static literal flag.
5120
+ pm_node_flag_unset(node, PM_NODE_FLAG_STATIC_LITERAL);
5121
+ }
5122
+
5123
+ break;
5124
+ }
5125
+ case PM_EMBEDDED_VARIABLE_NODE:
5126
+ pm_node_flag_unset((pm_node_t *) node, PM_NODE_FLAG_STATIC_LITERAL);
5127
+ break;
5128
+ default:
5129
+ assert(false && "unexpected node type");
5130
+ break;
5131
+ }
5132
+
5133
+ pm_node_list_append(parts, part);
5134
+ }
5135
+
4925
5136
  /**
4926
5137
  * Allocate a new InterpolatedRegularExpressionNode node.
4927
5138
  */
@@ -4955,54 +5166,113 @@ pm_interpolated_regular_expression_node_append(pm_interpolated_regular_expressio
4955
5166
  node->base.location.end = part->location.end;
4956
5167
  }
4957
5168
 
4958
- if (PM_NODE_TYPE_P(part, PM_STRING_NODE)) {
4959
- pm_node_flag_set(part, PM_NODE_FLAG_STATIC_LITERAL | PM_STRING_FLAGS_FROZEN);
4960
- }
4961
-
4962
- if (!PM_NODE_FLAG_P(part, PM_NODE_FLAG_STATIC_LITERAL)) {
4963
- pm_node_flag_unset((pm_node_t *) node, PM_NODE_FLAG_STATIC_LITERAL);
4964
- }
4965
-
4966
- pm_node_list_append(&node->parts, part);
5169
+ pm_interpolated_node_append((pm_node_t *) node, &node->parts, part);
4967
5170
  }
4968
5171
 
4969
5172
  static inline void
4970
5173
  pm_interpolated_regular_expression_node_closing_set(pm_parser_t *parser, pm_interpolated_regular_expression_node_t *node, const pm_token_t *closing) {
4971
5174
  node->closing_loc = PM_LOCATION_TOKEN_VALUE(closing);
4972
5175
  node->base.location.end = closing->end;
4973
- pm_node_flag_set((pm_node_t *)node, pm_regular_expression_flags_create(parser, closing));
5176
+ pm_node_flag_set((pm_node_t *) node, pm_regular_expression_flags_create(parser, closing));
4974
5177
  }
4975
5178
 
4976
5179
  /**
4977
5180
  * Append a part to an InterpolatedStringNode node.
5181
+ *
5182
+ * This has some somewhat complicated semantics, because we need to update
5183
+ * multiple flags that have somewhat confusing interactions.
5184
+ *
5185
+ * PM_NODE_FLAG_STATIC_LITERAL indicates that the node should be treated as a
5186
+ * single static literal string that can be pushed onto the stack on its own.
5187
+ * Note that this doesn't necessarily mean that the string will be frozen or
5188
+ * not; the instructions in CRuby will be either putobject or putstring,
5189
+ * depending on the combination of `--enable-frozen-string-literal`,
5190
+ * `# frozen_string_literal: true`, and whether or not there is interpolation.
5191
+ *
5192
+ * PM_INTERPOLATED_STRING_NODE_FLAGS_FROZEN indicates that the string should be
5193
+ * explicitly frozen. This will only happen if the string is comprised entirely
5194
+ * of string parts that are themselves static literals and frozen.
5195
+ *
5196
+ * PM_INTERPOLATED_STRING_NODE_FLAGS_MUTABLE indicates that the string should
5197
+ * be explicitly marked as mutable. This will happen from
5198
+ * `--disable-frozen-string-literal` or `# frozen_string_literal: false`. This
5199
+ * is necessary to indicate that the string should be left up to the runtime,
5200
+ * which could potentially use a chilled string otherwise.
4978
5201
  */
4979
5202
  static inline void
4980
- pm_interpolated_string_node_append(pm_parser_t *parser, pm_interpolated_string_node_t *node, pm_node_t *part) {
5203
+ pm_interpolated_string_node_append(pm_interpolated_string_node_t *node, pm_node_t *part) {
5204
+ #define CLEAR_FLAGS(node) \
5205
+ node->base.flags = (pm_node_flags_t) (node->base.flags & ~(PM_NODE_FLAG_STATIC_LITERAL | PM_INTERPOLATED_STRING_NODE_FLAGS_FROZEN | PM_INTERPOLATED_STRING_NODE_FLAGS_MUTABLE))
5206
+
5207
+ #define MUTABLE_FLAGS(node) \
5208
+ node->base.flags = (pm_node_flags_t) ((node->base.flags | PM_INTERPOLATED_STRING_NODE_FLAGS_MUTABLE) & ~PM_INTERPOLATED_STRING_NODE_FLAGS_FROZEN);
5209
+
4981
5210
  if (node->parts.size == 0 && node->opening_loc.start == NULL) {
4982
5211
  node->base.location.start = part->location.start;
4983
5212
  }
4984
5213
 
4985
- if (PM_NODE_TYPE_P(part, PM_STRING_NODE)) {
4986
- pm_node_flag_set(part, PM_NODE_FLAG_STATIC_LITERAL | PM_STRING_FLAGS_FROZEN);
4987
- }
5214
+ node->base.location.end = MAX(node->base.location.end, part->location.end);
4988
5215
 
4989
- if (!PM_NODE_FLAG_P(part, PM_NODE_FLAG_STATIC_LITERAL)) {
4990
- pm_node_flag_unset((pm_node_t *) node, PM_NODE_FLAG_STATIC_LITERAL | PM_INTERPOLATED_STRING_NODE_FLAGS_FROZEN | PM_INTERPOLATED_STRING_NODE_FLAGS_MUTABLE);
5216
+ switch (PM_NODE_TYPE(part)) {
5217
+ case PM_STRING_NODE:
5218
+ pm_node_flag_set(part, PM_NODE_FLAG_STATIC_LITERAL | PM_STRING_FLAGS_FROZEN);
5219
+ break;
5220
+ case PM_INTERPOLATED_STRING_NODE:
5221
+ if (PM_NODE_FLAG_P(part, PM_NODE_FLAG_STATIC_LITERAL)) {
5222
+ // If the string that we're concatenating is a static literal,
5223
+ // then we can keep the static literal flag for this string.
5224
+ } else {
5225
+ // Otherwise, we lose the static literal flag here and we should
5226
+ // also clear the mutability flags.
5227
+ CLEAR_FLAGS(node);
5228
+ }
5229
+ break;
5230
+ case PM_EMBEDDED_STATEMENTS_NODE: {
5231
+ pm_embedded_statements_node_t *cast = (pm_embedded_statements_node_t *) part;
5232
+ pm_node_t *embedded = (cast->statements != NULL && cast->statements->body.size == 1) ? cast->statements->body.nodes[0] : NULL;
5233
+
5234
+ if (embedded == NULL) {
5235
+ // If we're embedding multiple statements or no statements, then
5236
+ // the string is not longer a static literal.
5237
+ CLEAR_FLAGS(node);
5238
+ } else if (PM_NODE_TYPE_P(embedded, PM_STRING_NODE)) {
5239
+ // If the embedded statement is a string, then we can make that
5240
+ // string as frozen and static literal, and not touch the static
5241
+ // literal status of this string.
5242
+ pm_node_flag_set(embedded, PM_NODE_FLAG_STATIC_LITERAL | PM_STRING_FLAGS_FROZEN);
5243
+
5244
+ if (PM_NODE_FLAG_P(node, PM_NODE_FLAG_STATIC_LITERAL)) {
5245
+ MUTABLE_FLAGS(node);
5246
+ }
5247
+ } else if (PM_NODE_TYPE_P(embedded, PM_INTERPOLATED_STRING_NODE) && PM_NODE_FLAG_P(embedded, PM_NODE_FLAG_STATIC_LITERAL)) {
5248
+ // If the embedded statement is an interpolated string, but that
5249
+ // string is marked as static literal, then we can keep our
5250
+ // static literal status for this string.
5251
+ if (PM_NODE_FLAG_P(node, PM_NODE_FLAG_STATIC_LITERAL)) {
5252
+ MUTABLE_FLAGS(node);
5253
+ }
5254
+ } else {
5255
+ // In all other cases, we lose the static literal flag here and
5256
+ // become mutable.
5257
+ CLEAR_FLAGS(node);
5258
+ }
5259
+
5260
+ break;
5261
+ }
5262
+ case PM_EMBEDDED_VARIABLE_NODE:
5263
+ // Embedded variables clear static literal, which means we also
5264
+ // should clear the mutability flags.
5265
+ CLEAR_FLAGS(node);
5266
+ break;
5267
+ default:
5268
+ assert(false && "unexpected node type");
5269
+ break;
4991
5270
  }
4992
5271
 
4993
5272
  pm_node_list_append(&node->parts, part);
4994
- node->base.location.end = MAX(node->base.location.end, part->location.end);
4995
5273
 
4996
- if (PM_NODE_FLAG_P(node, PM_NODE_FLAG_STATIC_LITERAL)) {
4997
- switch (parser->frozen_string_literal) {
4998
- case PM_OPTIONS_FROZEN_STRING_LITERAL_DISABLED:
4999
- pm_node_flag_set((pm_node_t *) node, PM_INTERPOLATED_STRING_NODE_FLAGS_MUTABLE);
5000
- break;
5001
- case PM_OPTIONS_FROZEN_STRING_LITERAL_ENABLED:
5002
- pm_node_flag_set((pm_node_t *) node, PM_INTERPOLATED_STRING_NODE_FLAGS_FROZEN);
5003
- break;
5004
- }
5005
- }
5274
+ #undef CLEAR_FLAGS
5275
+ #undef MUTABLE_FLAGS
5006
5276
  }
5007
5277
 
5008
5278
  /**
@@ -5011,11 +5281,21 @@ pm_interpolated_string_node_append(pm_parser_t *parser, pm_interpolated_string_n
5011
5281
  static pm_interpolated_string_node_t *
5012
5282
  pm_interpolated_string_node_create(pm_parser_t *parser, const pm_token_t *opening, const pm_node_list_t *parts, const pm_token_t *closing) {
5013
5283
  pm_interpolated_string_node_t *node = PM_ALLOC_NODE(parser, pm_interpolated_string_node_t);
5284
+ pm_node_flags_t flags = PM_NODE_FLAG_STATIC_LITERAL;
5285
+
5286
+ switch (parser->frozen_string_literal) {
5287
+ case PM_OPTIONS_FROZEN_STRING_LITERAL_DISABLED:
5288
+ flags |= PM_INTERPOLATED_STRING_NODE_FLAGS_MUTABLE;
5289
+ break;
5290
+ case PM_OPTIONS_FROZEN_STRING_LITERAL_ENABLED:
5291
+ flags |= PM_INTERPOLATED_STRING_NODE_FLAGS_FROZEN;
5292
+ break;
5293
+ }
5014
5294
 
5015
5295
  *node = (pm_interpolated_string_node_t) {
5016
5296
  {
5017
5297
  .type = PM_INTERPOLATED_STRING_NODE,
5018
- .flags = PM_NODE_FLAG_STATIC_LITERAL,
5298
+ .flags = flags,
5019
5299
  .location = {
5020
5300
  .start = opening->start,
5021
5301
  .end = closing->end,
@@ -5029,7 +5309,7 @@ pm_interpolated_string_node_create(pm_parser_t *parser, const pm_token_t *openin
5029
5309
  if (parts != NULL) {
5030
5310
  pm_node_t *part;
5031
5311
  PM_NODE_LIST_FOREACH(parts, index, part) {
5032
- pm_interpolated_string_node_append(parser, node, part);
5312
+ pm_interpolated_string_node_append(node, part);
5033
5313
  }
5034
5314
  }
5035
5315
 
@@ -5051,15 +5331,7 @@ pm_interpolated_symbol_node_append(pm_interpolated_symbol_node_t *node, pm_node_
5051
5331
  node->base.location.start = part->location.start;
5052
5332
  }
5053
5333
 
5054
- if (PM_NODE_TYPE_P(part, PM_STRING_NODE)) {
5055
- pm_node_flag_set(part, PM_NODE_FLAG_STATIC_LITERAL | PM_STRING_FLAGS_FROZEN);
5056
- }
5057
-
5058
- if (!PM_NODE_FLAG_P(part, PM_NODE_FLAG_STATIC_LITERAL)) {
5059
- pm_node_flag_unset((pm_node_t *) node, PM_NODE_FLAG_STATIC_LITERAL);
5060
- }
5061
-
5062
- pm_node_list_append(&node->parts, part);
5334
+ pm_interpolated_node_append((pm_node_t *) node, &node->parts, part);
5063
5335
  node->base.location.end = MAX(node->base.location.end, part->location.end);
5064
5336
  }
5065
5337
 
@@ -5125,11 +5397,7 @@ pm_interpolated_xstring_node_create(pm_parser_t *parser, const pm_token_t *openi
5125
5397
 
5126
5398
  static inline void
5127
5399
  pm_interpolated_xstring_node_append(pm_interpolated_x_string_node_t *node, pm_node_t *part) {
5128
- if (PM_NODE_TYPE_P(part, PM_STRING_NODE)) {
5129
- pm_node_flag_set(part, PM_NODE_FLAG_STATIC_LITERAL | PM_STRING_FLAGS_FROZEN);
5130
- }
5131
-
5132
- pm_node_list_append(&node->parts, part);
5400
+ pm_interpolated_node_append((pm_node_t *) node, &node->parts, part);
5133
5401
  node->base.location.end = part->location.end;
5134
5402
  }
5135
5403
 
@@ -6397,6 +6665,7 @@ pm_return_node_create(pm_parser_t *parser, const pm_token_t *keyword, pm_argumen
6397
6665
  *node = (pm_return_node_t) {
6398
6666
  {
6399
6667
  .type = PM_RETURN_NODE,
6668
+ .flags = 0,
6400
6669
  .location = {
6401
6670
  .start = keyword->start,
6402
6671
  .end = (arguments == NULL ? keyword->end : arguments->base.location.end)
@@ -6729,7 +6998,8 @@ pm_super_node_create(pm_parser_t *parser, const pm_token_t *keyword, pm_argument
6729
6998
  }
6730
6999
 
6731
7000
  /**
6732
- * Read through the contents of a string and check if it consists solely of US ASCII code points.
7001
+ * Read through the contents of a string and check if it consists solely of
7002
+ * US-ASCII code points.
6733
7003
  */
6734
7004
  static bool
6735
7005
  pm_ascii_only_p(const pm_string_t *contents) {
@@ -6743,27 +7013,72 @@ pm_ascii_only_p(const pm_string_t *contents) {
6743
7013
  return true;
6744
7014
  }
6745
7015
 
7016
+ /**
7017
+ * Validate that the contents of the given symbol are all valid UTF-8.
7018
+ */
7019
+ static void
7020
+ parse_symbol_encoding_validate_utf8(pm_parser_t *parser, const pm_token_t *location, const pm_string_t *contents) {
7021
+ for (const uint8_t *cursor = pm_string_source(contents), *end = cursor + pm_string_length(contents); cursor < end;) {
7022
+ size_t width = pm_encoding_utf_8_char_width(cursor, end - cursor);
7023
+
7024
+ if (width == 0) {
7025
+ pm_parser_err(parser, location->start, location->end, PM_ERR_INVALID_SYMBOL);
7026
+ break;
7027
+ }
7028
+
7029
+ cursor += width;
7030
+ }
7031
+ }
7032
+
7033
+ /**
7034
+ * Validate that the contents of the given symbol are all valid in the encoding
7035
+ * of the parser.
7036
+ */
7037
+ static void
7038
+ parse_symbol_encoding_validate_other(pm_parser_t *parser, const pm_token_t *location, const pm_string_t *contents) {
7039
+ const pm_encoding_t *encoding = parser->encoding;
7040
+
7041
+ for (const uint8_t *cursor = pm_string_source(contents), *end = cursor + pm_string_length(contents); cursor < end;) {
7042
+ size_t width = encoding->char_width(cursor, end - cursor);
7043
+
7044
+ if (width == 0) {
7045
+ pm_parser_err(parser, location->start, location->end, PM_ERR_INVALID_SYMBOL);
7046
+ break;
7047
+ }
7048
+
7049
+ cursor += width;
7050
+ }
7051
+ }
7052
+
6746
7053
  /**
6747
7054
  * Ruby "downgrades" the encoding of Symbols to US-ASCII if the associated
6748
7055
  * encoding is ASCII-compatible and the Symbol consists only of US-ASCII code
6749
7056
  * points. Otherwise, the encoding may be explicitly set with an escape
6750
7057
  * sequence.
7058
+ *
7059
+ * If the validate flag is set, then it will check the contents of the symbol
7060
+ * to ensure that all characters are valid in the encoding.
6751
7061
  */
6752
7062
  static inline pm_node_flags_t
6753
- parse_symbol_encoding(const pm_parser_t *parser, const pm_string_t *contents) {
7063
+ parse_symbol_encoding(pm_parser_t *parser, const pm_token_t *location, const pm_string_t *contents, bool validate) {
6754
7064
  if (parser->explicit_encoding != NULL) {
6755
7065
  // A Symbol may optionally have its encoding explicitly set. This will
6756
7066
  // happen if an escape sequence results in a non-ASCII code point.
6757
7067
  if (parser->explicit_encoding == PM_ENCODING_UTF_8_ENTRY) {
7068
+ if (validate) parse_symbol_encoding_validate_utf8(parser, location, contents);
6758
7069
  return PM_SYMBOL_FLAGS_FORCED_UTF8_ENCODING;
6759
7070
  } else if (parser->encoding == PM_ENCODING_US_ASCII_ENTRY) {
6760
7071
  return PM_SYMBOL_FLAGS_FORCED_BINARY_ENCODING;
7072
+ } else if (validate) {
7073
+ parse_symbol_encoding_validate_other(parser, location, contents);
6761
7074
  }
6762
7075
  } else if (pm_ascii_only_p(contents)) {
6763
7076
  // Ruby stipulates that all source files must use an ASCII-compatible
6764
7077
  // encoding. Thus, all symbols appearing in source are eligible for
6765
7078
  // "downgrading" to US-ASCII.
6766
7079
  return PM_SYMBOL_FLAGS_FORCED_US_ASCII_ENCODING;
7080
+ } else if (validate) {
7081
+ parse_symbol_encoding_validate_other(parser, location, contents);
6767
7082
  }
6768
7083
 
6769
7084
  return 0;
@@ -6931,7 +7246,7 @@ pm_symbol_node_create(pm_parser_t *parser, const pm_token_t *opening, const pm_t
6931
7246
  */
6932
7247
  static pm_symbol_node_t *
6933
7248
  pm_symbol_node_create_current_string(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *value, const pm_token_t *closing) {
6934
- pm_symbol_node_t *node = pm_symbol_node_create_unescaped(parser, opening, value, closing, &parser->current_string, parse_symbol_encoding(parser, &parser->current_string));
7249
+ pm_symbol_node_t *node = pm_symbol_node_create_unescaped(parser, opening, value, closing, &parser->current_string, parse_symbol_encoding(parser, value, &parser->current_string, false));
6935
7250
  parser->current_string = PM_STRING_EMPTY;
6936
7251
  return node;
6937
7252
  }
@@ -6953,7 +7268,7 @@ pm_symbol_node_label_create(pm_parser_t *parser, const pm_token_t *token) {
6953
7268
 
6954
7269
  assert((label.end - label.start) >= 0);
6955
7270
  pm_string_shared_init(&node->unescaped, label.start, label.end);
6956
- pm_node_flag_set((pm_node_t *) node, parse_symbol_encoding(parser, &node->unescaped));
7271
+ pm_node_flag_set((pm_node_t *) node, parse_symbol_encoding(parser, &label, &node->unescaped, false));
6957
7272
 
6958
7273
  break;
6959
7274
  }
@@ -7038,7 +7353,8 @@ pm_string_node_to_symbol_node(pm_parser_t *parser, pm_string_node_t *node, const
7038
7353
  .unescaped = node->unescaped
7039
7354
  };
7040
7355
 
7041
- pm_node_flag_set((pm_node_t *)new_node, parse_symbol_encoding(parser, &node->unescaped));
7356
+ pm_token_t content = { .type = PM_TOKEN_IDENTIFIER, .start = node->content_loc.start, .end = node->content_loc.end };
7357
+ pm_node_flag_set((pm_node_t *) new_node, parse_symbol_encoding(parser, &content, &node->unescaped, true));
7042
7358
 
7043
7359
  // We are explicitly _not_ using pm_node_destroy here because we don't want
7044
7360
  // to trash the unescaped string. We could instead copy the string if we
@@ -7574,7 +7890,7 @@ pm_local_variable_read_node_create_it(pm_parser_t *parser, const pm_token_t *nam
7574
7890
  static pm_node_t *
7575
7891
  pm_node_check_it(pm_parser_t *parser, pm_node_t *node) {
7576
7892
  if (
7577
- (parser->version != PM_OPTIONS_VERSION_CRUBY_3_3_0) &&
7893
+ (parser->version != PM_OPTIONS_VERSION_CRUBY_3_3) &&
7578
7894
  !parser->current_scope->closed &&
7579
7895
  (parser->current_scope->numbered_parameters != PM_SCOPE_NUMBERED_PARAMETERS_DISALLOWED) &&
7580
7896
  pm_node_is_it(parser, node)
@@ -8298,10 +8614,11 @@ context_human(pm_context_t context) {
8298
8614
  /* Specific token lexers */
8299
8615
  /******************************************************************************/
8300
8616
 
8301
- static void
8302
- pm_strspn_number_validate(pm_parser_t *parser, const uint8_t *invalid) {
8617
+ static inline void
8618
+ pm_strspn_number_validate(pm_parser_t *parser, const uint8_t *string, size_t length, const uint8_t *invalid) {
8303
8619
  if (invalid != NULL) {
8304
- pm_parser_err(parser, invalid, invalid + 1, PM_ERR_INVALID_NUMBER_UNDERSCORE);
8620
+ pm_diagnostic_id_t diag_id = (invalid == (string + length - 1)) ? PM_ERR_INVALID_NUMBER_UNDERSCORE_TRAILING : PM_ERR_INVALID_NUMBER_UNDERSCORE_INNER;
8621
+ pm_parser_err(parser, invalid, invalid + 1, diag_id);
8305
8622
  }
8306
8623
  }
8307
8624
 
@@ -8309,7 +8626,7 @@ static size_t
8309
8626
  pm_strspn_binary_number_validate(pm_parser_t *parser, const uint8_t *string) {
8310
8627
  const uint8_t *invalid = NULL;
8311
8628
  size_t length = pm_strspn_binary_number(string, parser->end - string, &invalid);
8312
- pm_strspn_number_validate(parser, invalid);
8629
+ pm_strspn_number_validate(parser, string, length, invalid);
8313
8630
  return length;
8314
8631
  }
8315
8632
 
@@ -8317,7 +8634,7 @@ static size_t
8317
8634
  pm_strspn_octal_number_validate(pm_parser_t *parser, const uint8_t *string) {
8318
8635
  const uint8_t *invalid = NULL;
8319
8636
  size_t length = pm_strspn_octal_number(string, parser->end - string, &invalid);
8320
- pm_strspn_number_validate(parser, invalid);
8637
+ pm_strspn_number_validate(parser, string, length, invalid);
8321
8638
  return length;
8322
8639
  }
8323
8640
 
@@ -8325,7 +8642,7 @@ static size_t
8325
8642
  pm_strspn_decimal_number_validate(pm_parser_t *parser, const uint8_t *string) {
8326
8643
  const uint8_t *invalid = NULL;
8327
8644
  size_t length = pm_strspn_decimal_number(string, parser->end - string, &invalid);
8328
- pm_strspn_number_validate(parser, invalid);
8645
+ pm_strspn_number_validate(parser, string, length, invalid);
8329
8646
  return length;
8330
8647
  }
8331
8648
 
@@ -8333,7 +8650,7 @@ static size_t
8333
8650
  pm_strspn_hexadecimal_number_validate(pm_parser_t *parser, const uint8_t *string) {
8334
8651
  const uint8_t *invalid = NULL;
8335
8652
  size_t length = pm_strspn_hexadecimal_number(string, parser->end - string, &invalid);
8336
- pm_strspn_number_validate(parser, invalid);
8653
+ pm_strspn_number_validate(parser, string, length, invalid);
8337
8654
  return length;
8338
8655
  }
8339
8656
 
@@ -8395,6 +8712,7 @@ lex_numeric_prefix(pm_parser_t *parser, bool* seen_e) {
8395
8712
  if (pm_char_is_decimal_digit(peek(parser))) {
8396
8713
  parser->current.end += pm_strspn_decimal_number_validate(parser, parser->current.end);
8397
8714
  } else {
8715
+ match(parser, '_');
8398
8716
  pm_parser_err_current(parser, PM_ERR_INVALID_NUMBER_DECIMAL);
8399
8717
  }
8400
8718
 
@@ -8407,6 +8725,7 @@ lex_numeric_prefix(pm_parser_t *parser, bool* seen_e) {
8407
8725
  if (pm_char_is_binary_digit(peek(parser))) {
8408
8726
  parser->current.end += pm_strspn_binary_number_validate(parser, parser->current.end);
8409
8727
  } else {
8728
+ match(parser, '_');
8410
8729
  pm_parser_err_current(parser, PM_ERR_INVALID_NUMBER_BINARY);
8411
8730
  }
8412
8731
 
@@ -8420,6 +8739,7 @@ lex_numeric_prefix(pm_parser_t *parser, bool* seen_e) {
8420
8739
  if (pm_char_is_octal_digit(peek(parser))) {
8421
8740
  parser->current.end += pm_strspn_octal_number_validate(parser, parser->current.end);
8422
8741
  } else {
8742
+ match(parser, '_');
8423
8743
  pm_parser_err_current(parser, PM_ERR_INVALID_NUMBER_OCTAL);
8424
8744
  }
8425
8745
 
@@ -8447,6 +8767,7 @@ lex_numeric_prefix(pm_parser_t *parser, bool* seen_e) {
8447
8767
  if (pm_char_is_hexadecimal_digit(peek(parser))) {
8448
8768
  parser->current.end += pm_strspn_hexadecimal_number_validate(parser, parser->current.end);
8449
8769
  } else {
8770
+ match(parser, '_');
8450
8771
  pm_parser_err_current(parser, PM_ERR_INVALID_NUMBER_HEXADECIMAL);
8451
8772
  }
8452
8773
 
@@ -8567,7 +8888,7 @@ lex_global_variable(pm_parser_t *parser) {
8567
8888
  } while (parser->current.end < parser->end && (width = char_is_identifier(parser, parser->current.end)) > 0);
8568
8889
 
8569
8890
  // $0 isn't allowed to be followed by anything.
8570
- pm_diagnostic_id_t diag_id = parser->version == PM_OPTIONS_VERSION_CRUBY_3_3_0 ? PM_ERR_INVALID_VARIABLE_GLOBAL_3_3_0 : PM_ERR_INVALID_VARIABLE_GLOBAL;
8891
+ pm_diagnostic_id_t diag_id = parser->version == PM_OPTIONS_VERSION_CRUBY_3_3 ? PM_ERR_INVALID_VARIABLE_GLOBAL_3_3 : PM_ERR_INVALID_VARIABLE_GLOBAL;
8571
8892
  PM_PARSER_ERR_TOKEN_FORMAT_CONTENT(parser, parser->current, diag_id);
8572
8893
  }
8573
8894
 
@@ -8603,7 +8924,7 @@ lex_global_variable(pm_parser_t *parser) {
8603
8924
  } else {
8604
8925
  // If we get here, then we have a $ followed by something that
8605
8926
  // isn't recognized as a global variable.
8606
- pm_diagnostic_id_t diag_id = parser->version == PM_OPTIONS_VERSION_CRUBY_3_3_0 ? PM_ERR_INVALID_VARIABLE_GLOBAL_3_3_0 : PM_ERR_INVALID_VARIABLE_GLOBAL;
8927
+ pm_diagnostic_id_t diag_id = parser->version == PM_OPTIONS_VERSION_CRUBY_3_3 ? PM_ERR_INVALID_VARIABLE_GLOBAL_3_3 : PM_ERR_INVALID_VARIABLE_GLOBAL;
8607
8928
  size_t width = parser->encoding->char_width(parser->current.end, parser->end - parser->current.end);
8608
8929
  PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, diag_id, (int) ((parser->current.end + width) - parser->current.start), (const char *) parser->current.start);
8609
8930
  }
@@ -9241,22 +9562,7 @@ escape_read(pm_parser_t *parser, pm_buffer_t *buffer, pm_buffer_t *regular_expre
9241
9562
  const uint8_t *start = parser->current.end - 1;
9242
9563
  parser->current.end++;
9243
9564
 
9244
- if (
9245
- (parser->current.end + 4 <= parser->end) &&
9246
- pm_char_is_hexadecimal_digit(parser->current.end[0]) &&
9247
- pm_char_is_hexadecimal_digit(parser->current.end[1]) &&
9248
- pm_char_is_hexadecimal_digit(parser->current.end[2]) &&
9249
- pm_char_is_hexadecimal_digit(parser->current.end[3])
9250
- ) {
9251
- uint32_t value = escape_unicode(parser->current.end, 4);
9252
-
9253
- if (flags & PM_ESCAPE_FLAG_REGEXP) {
9254
- pm_buffer_append_bytes(regular_expression_buffer, start, (size_t) (parser->current.end + 4 - start));
9255
- }
9256
- escape_write_unicode(parser, buffer, flags, start, parser->current.end + 4, value);
9257
-
9258
- parser->current.end += 4;
9259
- } else if (peek(parser) == '{') {
9565
+ if (peek(parser) == '{') {
9260
9566
  const uint8_t *unicode_codepoints_start = parser->current.end - 2;
9261
9567
 
9262
9568
  parser->current.end++;
@@ -9306,7 +9612,21 @@ escape_read(pm_parser_t *parser, pm_buffer_t *buffer, pm_buffer_t *regular_expre
9306
9612
  pm_buffer_append_bytes(regular_expression_buffer, unicode_codepoints_start, (size_t) (parser->current.end - unicode_codepoints_start));
9307
9613
  }
9308
9614
  } else {
9309
- pm_parser_err_current(parser, PM_ERR_ESCAPE_INVALID_UNICODE);
9615
+ size_t length = pm_strspn_hexadecimal_digit(parser->current.end, MIN(parser->end - parser->current.end, 4));
9616
+
9617
+ if (length == 4) {
9618
+ uint32_t value = escape_unicode(parser->current.end, 4);
9619
+
9620
+ if (flags & PM_ESCAPE_FLAG_REGEXP) {
9621
+ pm_buffer_append_bytes(regular_expression_buffer, start, (size_t) (parser->current.end + 4 - start));
9622
+ }
9623
+
9624
+ escape_write_unicode(parser, buffer, flags, start, parser->current.end + 4, value);
9625
+ parser->current.end += 4;
9626
+ } else {
9627
+ parser->current.end += length;
9628
+ pm_parser_err_current(parser, PM_ERR_ESCAPE_INVALID_UNICODE);
9629
+ }
9310
9630
  }
9311
9631
 
9312
9632
  return;
@@ -9560,8 +9880,8 @@ lex_at_variable(pm_parser_t *parser) {
9560
9880
  }
9561
9881
  } else if (parser->current.end < parser->end && pm_char_is_decimal_digit(*parser->current.end)) {
9562
9882
  pm_diagnostic_id_t diag_id = (type == PM_TOKEN_CLASS_VARIABLE) ? PM_ERR_INCOMPLETE_VARIABLE_CLASS : PM_ERR_INCOMPLETE_VARIABLE_INSTANCE;
9563
- if (parser->version == PM_OPTIONS_VERSION_CRUBY_3_3_0) {
9564
- diag_id = (type == PM_TOKEN_CLASS_VARIABLE) ? PM_ERR_INCOMPLETE_VARIABLE_CLASS_3_3_0 : PM_ERR_INCOMPLETE_VARIABLE_INSTANCE_3_3_0;
9883
+ if (parser->version == PM_OPTIONS_VERSION_CRUBY_3_3) {
9884
+ diag_id = (type == PM_TOKEN_CLASS_VARIABLE) ? PM_ERR_INCOMPLETE_VARIABLE_CLASS_3_3 : PM_ERR_INCOMPLETE_VARIABLE_INSTANCE_3_3;
9565
9885
  }
9566
9886
 
9567
9887
  size_t width = parser->encoding->char_width(parser->current.end, parser->end - parser->current.end);
@@ -10545,8 +10865,11 @@ parser_lex(pm_parser_t *parser) {
10545
10865
  }
10546
10866
 
10547
10867
  size_t ident_length = (size_t) (parser->current.end - ident_start);
10868
+ bool ident_error = false;
10869
+
10548
10870
  if (quote != PM_HEREDOC_QUOTE_NONE && !match(parser, (uint8_t) quote)) {
10549
- // TODO: handle unterminated heredoc
10871
+ pm_parser_err(parser, ident_start, ident_start + ident_length, PM_ERR_HEREDOC_IDENTIFIER);
10872
+ ident_error = true;
10550
10873
  }
10551
10874
 
10552
10875
  parser->explicit_encoding = NULL;
@@ -10571,7 +10894,7 @@ parser_lex(pm_parser_t *parser) {
10571
10894
  // this is not a valid heredoc declaration. In this case we
10572
10895
  // will add an error, but we will still return a heredoc
10573
10896
  // start.
10574
- pm_parser_err_current(parser, PM_ERR_HEREDOC_TERM);
10897
+ if (!ident_error) pm_parser_err_heredoc_term(parser, parser->lex_modes.current);
10575
10898
  body_start = parser->end;
10576
10899
  } else {
10577
10900
  // Otherwise, we want to indicate that the body of the
@@ -11898,7 +12221,7 @@ parser_lex(pm_parser_t *parser) {
11898
12221
  // terminator) but still continue parsing so that content after the
11899
12222
  // declaration of the heredoc can be parsed.
11900
12223
  if (parser->current.end >= parser->end) {
11901
- pm_parser_err_current(parser, PM_ERR_HEREDOC_TERM);
12224
+ pm_parser_err_heredoc_term(parser, lex_mode);
11902
12225
  parser->next_start = lex_mode->as.heredoc.next_start;
11903
12226
  parser->heredoc_end = parser->current.end;
11904
12227
  lex_state_set(parser, PM_LEX_STATE_END);
@@ -12537,6 +12860,23 @@ expect3(pm_parser_t *parser, pm_token_type_t type1, pm_token_type_t type2, pm_to
12537
12860
  parser->previous.type = PM_TOKEN_MISSING;
12538
12861
  }
12539
12862
 
12863
+ /**
12864
+ * A special expect1 that expects a heredoc terminator and handles popping the
12865
+ * lex mode accordingly.
12866
+ */
12867
+ static void
12868
+ expect1_heredoc_term(pm_parser_t *parser, pm_lex_mode_t *lex_mode) {
12869
+ if (match1(parser, PM_TOKEN_HEREDOC_END)) {
12870
+ lex_mode_pop(parser);
12871
+ parser_lex(parser);
12872
+ } else {
12873
+ pm_parser_err_heredoc_term(parser, lex_mode);
12874
+ lex_mode_pop(parser);
12875
+ parser->previous.start = parser->previous.end;
12876
+ parser->previous.type = PM_TOKEN_MISSING;
12877
+ }
12878
+ }
12879
+
12540
12880
  static pm_node_t *
12541
12881
  parse_expression(pm_parser_t *parser, pm_binding_power_t binding_power, bool accepts_command_call, pm_diagnostic_id_t diag_id);
12542
12882
 
@@ -12664,25 +13004,72 @@ parse_write_name(pm_parser_t *parser, pm_constant_id_t *name_field) {
12664
13004
  *name_field = pm_constant_pool_insert_owned(&parser->constant_pool, name, length + 1);
12665
13005
  }
12666
13006
 
13007
+ /**
13008
+ * Certain expressions are not targetable, but in order to provide a better
13009
+ * experience we give a specific error message. In order to maintain as much
13010
+ * information in the tree as possible, we replace them with local variable
13011
+ * writes.
13012
+ */
13013
+ static pm_node_t *
13014
+ parse_unwriteable_target(pm_parser_t *parser, pm_node_t *target) {
13015
+ switch (PM_NODE_TYPE(target)) {
13016
+ case PM_SOURCE_ENCODING_NODE: pm_parser_err_node(parser, target, PM_ERR_EXPRESSION_NOT_WRITABLE_ENCODING); break;
13017
+ case PM_FALSE_NODE: pm_parser_err_node(parser, target, PM_ERR_EXPRESSION_NOT_WRITABLE_FALSE); break;
13018
+ case PM_SOURCE_FILE_NODE: pm_parser_err_node(parser, target, PM_ERR_EXPRESSION_NOT_WRITABLE_FILE); break;
13019
+ case PM_SOURCE_LINE_NODE: pm_parser_err_node(parser, target, PM_ERR_EXPRESSION_NOT_WRITABLE_LINE); break;
13020
+ case PM_NIL_NODE: pm_parser_err_node(parser, target, PM_ERR_EXPRESSION_NOT_WRITABLE_NIL); break;
13021
+ case PM_SELF_NODE: pm_parser_err_node(parser, target, PM_ERR_EXPRESSION_NOT_WRITABLE_SELF); break;
13022
+ case PM_TRUE_NODE: pm_parser_err_node(parser, target, PM_ERR_EXPRESSION_NOT_WRITABLE_TRUE); break;
13023
+ default: break;
13024
+ }
13025
+
13026
+ pm_constant_id_t name = pm_parser_constant_id_location(parser, target->location.start, target->location.end);
13027
+ pm_local_variable_target_node_t *result = pm_local_variable_target_node_create(parser, &target->location, name, 0);
13028
+
13029
+ pm_node_destroy(parser, target);
13030
+ return (pm_node_t *) result;
13031
+ }
13032
+
12667
13033
  /**
12668
13034
  * Convert the given node into a valid target node.
12669
13035
  */
12670
13036
  static pm_node_t *
12671
- parse_target(pm_parser_t *parser, pm_node_t *target) {
13037
+ parse_target(pm_parser_t *parser, pm_node_t *target, bool multiple) {
12672
13038
  switch (PM_NODE_TYPE(target)) {
12673
13039
  case PM_MISSING_NODE:
12674
13040
  return target;
13041
+ case PM_SOURCE_ENCODING_NODE:
13042
+ case PM_FALSE_NODE:
13043
+ case PM_SOURCE_FILE_NODE:
13044
+ case PM_SOURCE_LINE_NODE:
13045
+ case PM_NIL_NODE:
13046
+ case PM_SELF_NODE:
13047
+ case PM_TRUE_NODE: {
13048
+ // In these special cases, we have specific error messages and we
13049
+ // will replace them with local variable writes.
13050
+ return parse_unwriteable_target(parser, target);
13051
+ }
12675
13052
  case PM_CLASS_VARIABLE_READ_NODE:
12676
13053
  assert(sizeof(pm_class_variable_target_node_t) == sizeof(pm_class_variable_read_node_t));
12677
13054
  target->type = PM_CLASS_VARIABLE_TARGET_NODE;
12678
13055
  return target;
12679
13056
  case PM_CONSTANT_PATH_NODE:
13057
+ if (context_def_p(parser)) {
13058
+ pm_parser_err_node(parser, target, PM_ERR_WRITE_TARGET_IN_METHOD);
13059
+ }
13060
+
12680
13061
  assert(sizeof(pm_constant_path_target_node_t) == sizeof(pm_constant_path_node_t));
12681
13062
  target->type = PM_CONSTANT_PATH_TARGET_NODE;
13063
+
12682
13064
  return target;
12683
13065
  case PM_CONSTANT_READ_NODE:
13066
+ if (context_def_p(parser)) {
13067
+ pm_parser_err_node(parser, target, PM_ERR_WRITE_TARGET_IN_METHOD);
13068
+ }
13069
+
12684
13070
  assert(sizeof(pm_constant_target_node_t) == sizeof(pm_constant_read_node_t));
12685
13071
  target->type = PM_CONSTANT_TARGET_NODE;
13072
+
12686
13073
  return target;
12687
13074
  case PM_BACK_REFERENCE_READ_NODE:
12688
13075
  case PM_NUMBERED_REFERENCE_READ_NODE:
@@ -12715,7 +13102,7 @@ parse_target(pm_parser_t *parser, pm_node_t *target) {
12715
13102
  pm_splat_node_t *splat = (pm_splat_node_t *) target;
12716
13103
 
12717
13104
  if (splat->expression != NULL) {
12718
- splat->expression = parse_target(parser, splat->expression);
13105
+ splat->expression = parse_target(parser, splat->expression, multiple);
12719
13106
  }
12720
13107
 
12721
13108
  return (pm_node_t *) splat;
@@ -12753,6 +13140,10 @@ parse_target(pm_parser_t *parser, pm_node_t *target) {
12753
13140
  }
12754
13141
 
12755
13142
  if (*call->message_loc.start == '_' || parser->encoding->alnum_char(call->message_loc.start, call->message_loc.end - call->message_loc.start)) {
13143
+ if (multiple && PM_NODE_FLAG_P(call, PM_CALL_NODE_FLAGS_SAFE_NAVIGATION)) {
13144
+ pm_parser_err_node(parser, (const pm_node_t *) call, PM_ERR_UNEXPECTED_SAFE_NAVIGATION);
13145
+ }
13146
+
12756
13147
  parse_write_name(parser, &call->name);
12757
13148
  return (pm_node_t *) pm_call_target_node_create(parser, call);
12758
13149
  }
@@ -12780,8 +13171,8 @@ parse_target(pm_parser_t *parser, pm_node_t *target) {
12780
13171
  * assignment.
12781
13172
  */
12782
13173
  static pm_node_t *
12783
- parse_target_validate(pm_parser_t *parser, pm_node_t *target) {
12784
- pm_node_t *result = parse_target(parser, target);
13174
+ parse_target_validate(pm_parser_t *parser, pm_node_t *target, bool multiple) {
13175
+ pm_node_t *result = parse_target(parser, target, multiple);
12785
13176
 
12786
13177
  // Ensure that we have one of an =, an 'in' in for indexes, and a ')' in parens after the targets.
12787
13178
  if (
@@ -12826,13 +13217,20 @@ parse_write(pm_parser_t *parser, pm_node_t *target, pm_token_t *operator, pm_nod
12826
13217
  }
12827
13218
  case PM_CONSTANT_PATH_NODE: {
12828
13219
  pm_node_t *node = (pm_node_t *) pm_constant_path_write_node_create(parser, (pm_constant_path_node_t *) target, operator, value);
13220
+
13221
+ if (context_def_p(parser)) {
13222
+ pm_parser_err_node(parser, node, PM_ERR_WRITE_TARGET_IN_METHOD);
13223
+ }
13224
+
12829
13225
  return parse_shareable_constant_write(parser, node);
12830
13226
  }
12831
13227
  case PM_CONSTANT_READ_NODE: {
12832
13228
  pm_node_t *node = (pm_node_t *) pm_constant_write_node_create(parser, (pm_constant_read_node_t *) target, operator, value);
13229
+
12833
13230
  if (context_def_p(parser)) {
12834
13231
  pm_parser_err_node(parser, node, PM_ERR_WRITE_TARGET_IN_METHOD);
12835
13232
  }
13233
+
12836
13234
  pm_node_destroy(parser, target);
12837
13235
  return parse_shareable_constant_write(parser, node);
12838
13236
  }
@@ -13011,7 +13409,7 @@ parse_targets(pm_parser_t *parser, pm_node_t *first_target, pm_binding_power_t b
13011
13409
  bool has_rest = PM_NODE_TYPE_P(first_target, PM_SPLAT_NODE);
13012
13410
 
13013
13411
  pm_multi_target_node_t *result = pm_multi_target_node_create(parser);
13014
- pm_multi_target_node_targets_append(parser, result, parse_target(parser, first_target));
13412
+ pm_multi_target_node_targets_append(parser, result, parse_target(parser, first_target, true));
13015
13413
 
13016
13414
  while (accept1(parser, PM_TOKEN_COMMA)) {
13017
13415
  if (accept1(parser, PM_TOKEN_USTAR)) {
@@ -13027,7 +13425,7 @@ parse_targets(pm_parser_t *parser, pm_node_t *first_target, pm_binding_power_t b
13027
13425
 
13028
13426
  if (token_begins_expression_p(parser->current.type)) {
13029
13427
  name = parse_expression(parser, binding_power, false, PM_ERR_EXPECT_EXPRESSION_AFTER_STAR);
13030
- name = parse_target(parser, name);
13428
+ name = parse_target(parser, name, true);
13031
13429
  }
13032
13430
 
13033
13431
  pm_node_t *splat = (pm_node_t *) pm_splat_node_create(parser, &star_operator, name);
@@ -13035,7 +13433,7 @@ parse_targets(pm_parser_t *parser, pm_node_t *first_target, pm_binding_power_t b
13035
13433
  has_rest = true;
13036
13434
  } else if (token_begins_expression_p(parser->current.type)) {
13037
13435
  pm_node_t *target = parse_expression(parser, binding_power, false, PM_ERR_EXPECT_EXPRESSION_AFTER_COMMA);
13038
- target = parse_target(parser, target);
13436
+ target = parse_target(parser, target, true);
13039
13437
 
13040
13438
  pm_multi_target_node_targets_append(parser, result, target);
13041
13439
  } else if (!match1(parser, PM_TOKEN_EOF)) {
@@ -13152,11 +13550,11 @@ parse_statements(pm_parser_t *parser, pm_context_t context) {
13152
13550
  */
13153
13551
  static void
13154
13552
  pm_hash_key_static_literals_add(pm_parser_t *parser, pm_static_literals_t *literals, pm_node_t *node) {
13155
- const pm_node_t *duplicated = pm_static_literals_add(parser, literals, node);
13553
+ const pm_node_t *duplicated = pm_static_literals_add(&parser->newline_list, parser->start_line, literals, node);
13156
13554
 
13157
13555
  if (duplicated != NULL) {
13158
13556
  pm_buffer_t buffer = { 0 };
13159
- pm_static_literal_inspect(&buffer, parser, duplicated);
13557
+ pm_static_literal_inspect(&buffer, &parser->newline_list, parser->start_line, parser->encoding->name, duplicated);
13160
13558
 
13161
13559
  pm_diagnostic_list_append_format(
13162
13560
  &parser->warning_list,
@@ -13178,7 +13576,7 @@ pm_hash_key_static_literals_add(pm_parser_t *parser, pm_static_literals_t *liter
13178
13576
  */
13179
13577
  static void
13180
13578
  pm_when_clause_static_literals_add(pm_parser_t *parser, pm_static_literals_t *literals, pm_node_t *node) {
13181
- if (pm_static_literals_add(parser, literals, node) != NULL) {
13579
+ if (pm_static_literals_add(&parser->newline_list, parser->start_line, literals, node) != NULL) {
13182
13580
  pm_diagnostic_list_append_format(
13183
13581
  &parser->warning_list,
13184
13582
  node->location.start,
@@ -13206,10 +13604,16 @@ parse_assocs(pm_parser_t *parser, pm_static_literals_t *literals, pm_node_t *nod
13206
13604
  pm_token_t operator = parser->previous;
13207
13605
  pm_node_t *value = NULL;
13208
13606
 
13209
- if (token_begins_expression_p(parser->current.type)) {
13607
+ if (match1(parser, PM_TOKEN_BRACE_LEFT)) {
13608
+ // If we're about to parse a nested hash that is being
13609
+ // pushed into this hash directly with **, then we want the
13610
+ // inner hash to share the static literals with the outer
13611
+ // hash.
13612
+ parser->current_hash_keys = literals;
13210
13613
  value = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, false, PM_ERR_EXPECT_EXPRESSION_AFTER_SPLAT_HASH);
13211
- }
13212
- else {
13614
+ } else if (token_begins_expression_p(parser->current.type)) {
13615
+ value = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, false, PM_ERR_EXPECT_EXPRESSION_AFTER_SPLAT_HASH);
13616
+ } else {
13213
13617
  pm_parser_scope_forwarding_keywords_check(parser, &operator);
13214
13618
  }
13215
13619
 
@@ -13360,15 +13764,16 @@ parse_arguments(pm_parser_t *parser, pm_arguments_t *arguments, bool accepts_for
13360
13764
  pm_keyword_hash_node_t *hash = pm_keyword_hash_node_create(parser);
13361
13765
  argument = (pm_node_t *) hash;
13362
13766
 
13363
- pm_static_literals_t literals = { 0 };
13364
- bool contains_keyword_splat = parse_assocs(parser, &literals, (pm_node_t *) hash);
13767
+ pm_static_literals_t hash_keys = { 0 };
13768
+ bool contains_keyword_splat = parse_assocs(parser, &hash_keys, (pm_node_t *) hash);
13365
13769
 
13366
13770
  parse_arguments_append(parser, arguments, argument);
13367
- if (contains_keyword_splat) {
13368
- pm_node_flag_set((pm_node_t *)arguments->arguments, PM_ARGUMENTS_NODE_FLAGS_CONTAINS_KEYWORD_SPLAT);
13369
- }
13370
13771
 
13371
- pm_static_literals_free(&literals);
13772
+ pm_node_flags_t flags = PM_ARGUMENTS_NODE_FLAGS_CONTAINS_KEYWORDS;
13773
+ if (contains_keyword_splat) flags |= PM_ARGUMENTS_NODE_FLAGS_CONTAINS_KEYWORD_SPLAT;
13774
+ pm_node_flag_set((pm_node_t *) arguments->arguments, flags);
13775
+
13776
+ pm_static_literals_free(&hash_keys);
13372
13777
  parsed_bare_hash = true;
13373
13778
 
13374
13779
  break;
@@ -13444,7 +13849,9 @@ parse_arguments(pm_parser_t *parser, pm_arguments_t *arguments, bool accepts_for
13444
13849
  argument = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, !parsed_first_argument, PM_ERR_EXPECT_ARGUMENT);
13445
13850
  }
13446
13851
 
13852
+ bool contains_keywords = false;
13447
13853
  bool contains_keyword_splat = false;
13854
+
13448
13855
  if (pm_symbol_node_label_p(argument) || accept1(parser, PM_TOKEN_EQUAL_GREATER)) {
13449
13856
  if (parsed_bare_hash) {
13450
13857
  pm_parser_err_previous(parser, PM_ERR_ARGUMENT_BARE_HASH);
@@ -13458,10 +13865,11 @@ parse_arguments(pm_parser_t *parser, pm_arguments_t *arguments, bool accepts_for
13458
13865
  }
13459
13866
 
13460
13867
  pm_keyword_hash_node_t *bare_hash = pm_keyword_hash_node_create(parser);
13868
+ contains_keywords = true;
13461
13869
 
13462
13870
  // Create the set of static literals for this hash.
13463
- pm_static_literals_t literals = { 0 };
13464
- pm_hash_key_static_literals_add(parser, &literals, argument);
13871
+ pm_static_literals_t hash_keys = { 0 };
13872
+ pm_hash_key_static_literals_add(parser, &hash_keys, argument);
13465
13873
 
13466
13874
  // Finish parsing the one we are part way through.
13467
13875
  pm_node_t *value = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, false, PM_ERR_HASH_VALUE);
@@ -13475,10 +13883,10 @@ parse_arguments(pm_parser_t *parser, pm_arguments_t *arguments, bool accepts_for
13475
13883
  token_begins_expression_p(parser->current.type) ||
13476
13884
  match2(parser, PM_TOKEN_USTAR_STAR, PM_TOKEN_LABEL)
13477
13885
  )) {
13478
- contains_keyword_splat = parse_assocs(parser, &literals, (pm_node_t *) bare_hash);
13886
+ contains_keyword_splat = parse_assocs(parser, &hash_keys, (pm_node_t *) bare_hash);
13479
13887
  }
13480
13888
 
13481
- pm_static_literals_free(&literals);
13889
+ pm_static_literals_free(&hash_keys);
13482
13890
  parsed_bare_hash = true;
13483
13891
  } else if (accept1(parser, PM_TOKEN_KEYWORD_IN)) {
13484
13892
  // TODO: Could we solve this with binding powers instead?
@@ -13486,9 +13894,12 @@ parse_arguments(pm_parser_t *parser, pm_arguments_t *arguments, bool accepts_for
13486
13894
  }
13487
13895
 
13488
13896
  parse_arguments_append(parser, arguments, argument);
13489
- if (contains_keyword_splat) {
13490
- pm_node_flag_set((pm_node_t *)arguments->arguments, PM_ARGUMENTS_NODE_FLAGS_CONTAINS_KEYWORD_SPLAT);
13491
- }
13897
+
13898
+ pm_node_flags_t flags = 0;
13899
+ if (contains_keywords) flags |= PM_ARGUMENTS_NODE_FLAGS_CONTAINS_KEYWORDS;
13900
+ if (contains_keyword_splat) flags |= PM_ARGUMENTS_NODE_FLAGS_CONTAINS_KEYWORD_SPLAT;
13901
+ pm_node_flag_set((pm_node_t *) arguments->arguments, flags);
13902
+
13492
13903
  break;
13493
13904
  }
13494
13905
  }
@@ -13601,7 +14012,6 @@ typedef enum {
13601
14012
  PM_PARAMETERS_ORDER_OPTIONAL,
13602
14013
  PM_PARAMETERS_ORDER_NAMED,
13603
14014
  PM_PARAMETERS_ORDER_NONE,
13604
-
13605
14015
  } pm_parameters_order_t;
13606
14016
 
13607
14017
  /**
@@ -13909,6 +14319,7 @@ parse_parameters(
13909
14319
  pm_token_t operator = parser->previous;
13910
14320
  pm_token_t name;
13911
14321
  bool repeated = false;
14322
+
13912
14323
  if (accept1(parser, PM_TOKEN_IDENTIFIER)) {
13913
14324
  name = parser->previous;
13914
14325
  repeated = pm_parser_parameter_name_check(parser, &name);
@@ -13922,6 +14333,7 @@ parse_parameters(
13922
14333
  if (repeated) {
13923
14334
  pm_node_flag_set_repeated_parameter(param);
13924
14335
  }
14336
+
13925
14337
  if (params->rest == NULL) {
13926
14338
  pm_parameters_node_rest_set(params, param);
13927
14339
  } else {
@@ -13933,6 +14345,7 @@ parse_parameters(
13933
14345
  }
13934
14346
  case PM_TOKEN_STAR_STAR:
13935
14347
  case PM_TOKEN_USTAR_STAR: {
14348
+ pm_parameters_order_t previous_order = order;
13936
14349
  update_parameter_state(parser, &parser->current, &order);
13937
14350
  parser_lex(parser);
13938
14351
 
@@ -13940,6 +14353,10 @@ parse_parameters(
13940
14353
  pm_node_t *param;
13941
14354
 
13942
14355
  if (accept1(parser, PM_TOKEN_KEYWORD_NIL)) {
14356
+ if (previous_order <= PM_PARAMETERS_ORDER_KEYWORDS) {
14357
+ pm_parser_err_previous(parser, PM_ERR_PARAMETER_UNEXPECTED_NO_KW);
14358
+ }
14359
+
13943
14360
  param = (pm_node_t *) pm_no_keywords_parameter_node_create(parser, &operator, &parser->previous);
13944
14361
  } else {
13945
14362
  pm_token_t name;
@@ -14037,7 +14454,7 @@ parse_rescues(pm_parser_t *parser, pm_begin_node_t *parent_node, pm_rescues_type
14037
14454
  pm_rescue_node_operator_set(rescue, &parser->previous);
14038
14455
 
14039
14456
  pm_node_t *reference = parse_expression(parser, PM_BINDING_POWER_INDEX, false, PM_ERR_RESCUE_VARIABLE);
14040
- reference = parse_target(parser, reference);
14457
+ reference = parse_target(parser, reference, false);
14041
14458
 
14042
14459
  pm_rescue_node_reference_set(rescue, reference);
14043
14460
  break;
@@ -14067,7 +14484,7 @@ parse_rescues(pm_parser_t *parser, pm_begin_node_t *parent_node, pm_rescues_type
14067
14484
  pm_rescue_node_operator_set(rescue, &parser->previous);
14068
14485
 
14069
14486
  pm_node_t *reference = parse_expression(parser, PM_BINDING_POWER_INDEX, false, PM_ERR_RESCUE_VARIABLE);
14070
- reference = parse_target(parser, reference);
14487
+ reference = parse_target(parser, reference, false);
14071
14488
 
14072
14489
  pm_rescue_node_reference_set(rescue, reference);
14073
14490
  break;
@@ -15036,7 +15453,7 @@ parse_symbol(pm_parser_t *parser, pm_lex_mode_t *lex_mode, pm_lex_state_t next_s
15036
15453
  pm_symbol_node_t *symbol = pm_symbol_node_create(parser, &opening, &parser->previous, &closing);
15037
15454
 
15038
15455
  pm_string_shared_init(&symbol->unescaped, parser->previous.start, parser->previous.end);
15039
- pm_node_flag_set((pm_node_t *) symbol, parse_symbol_encoding(parser, &symbol->unescaped));
15456
+ pm_node_flag_set((pm_node_t *) symbol, parse_symbol_encoding(parser, &parser->previous, &symbol->unescaped, false));
15040
15457
 
15041
15458
  return (pm_node_t *) symbol;
15042
15459
  }
@@ -15136,7 +15553,7 @@ parse_symbol(pm_parser_t *parser, pm_lex_mode_t *lex_mode, pm_lex_state_t next_s
15136
15553
  expect1(parser, PM_TOKEN_STRING_END, PM_ERR_SYMBOL_TERM_DYNAMIC);
15137
15554
  }
15138
15555
 
15139
- return (pm_node_t *) pm_symbol_node_create_unescaped(parser, &opening, &content, &parser->previous, &unescaped, parse_symbol_encoding(parser, &unescaped));
15556
+ return (pm_node_t *) pm_symbol_node_create_unescaped(parser, &opening, &content, &parser->previous, &unescaped, parse_symbol_encoding(parser, &content, &unescaped, false));
15140
15557
  }
15141
15558
 
15142
15559
  /**
@@ -15161,7 +15578,7 @@ parse_undef_argument(pm_parser_t *parser) {
15161
15578
  pm_symbol_node_t *symbol = pm_symbol_node_create(parser, &opening, &parser->previous, &closing);
15162
15579
 
15163
15580
  pm_string_shared_init(&symbol->unescaped, parser->previous.start, parser->previous.end);
15164
- pm_node_flag_set((pm_node_t *) symbol, parse_symbol_encoding(parser, &symbol->unescaped));
15581
+ pm_node_flag_set((pm_node_t *) symbol, parse_symbol_encoding(parser, &parser->previous, &symbol->unescaped, false));
15165
15582
 
15166
15583
  return (pm_node_t *) symbol;
15167
15584
  }
@@ -15202,7 +15619,7 @@ parse_alias_argument(pm_parser_t *parser, bool first) {
15202
15619
  pm_symbol_node_t *symbol = pm_symbol_node_create(parser, &opening, &parser->previous, &closing);
15203
15620
 
15204
15621
  pm_string_shared_init(&symbol->unescaped, parser->previous.start, parser->previous.end);
15205
- pm_node_flag_set((pm_node_t *) symbol, parse_symbol_encoding(parser, &symbol->unescaped));
15622
+ pm_node_flag_set((pm_node_t *) symbol, parse_symbol_encoding(parser, &parser->previous, &symbol->unescaped, false));
15206
15623
 
15207
15624
  return (pm_node_t *) symbol;
15208
15625
  }
@@ -15459,9 +15876,7 @@ parse_pattern_constant_path(pm_parser_t *parser, pm_constant_id_list_t *captures
15459
15876
  while (accept1(parser, PM_TOKEN_COLON_COLON)) {
15460
15877
  pm_token_t delimiter = parser->previous;
15461
15878
  expect1(parser, PM_TOKEN_CONSTANT, PM_ERR_CONSTANT_PATH_COLON_COLON_CONSTANT);
15462
-
15463
- pm_node_t *child = (pm_node_t *) pm_constant_read_node_create(parser, &parser->previous);
15464
- node = (pm_node_t *) pm_constant_path_node_create(parser, node, &delimiter, child);
15879
+ node = (pm_node_t *) pm_constant_path_node_create(parser, node, &delimiter, &parser->previous);
15465
15880
  }
15466
15881
 
15467
15882
  // If there is a [ or ( that follows, then this is part of a larger pattern
@@ -15678,7 +16093,7 @@ parse_pattern_hash_implicit_value(pm_parser_t *parser, pm_constant_id_list_t *ca
15678
16093
  */
15679
16094
  static void
15680
16095
  parse_pattern_hash_key(pm_parser_t *parser, pm_static_literals_t *keys, pm_node_t *node) {
15681
- if (pm_static_literals_add(parser, keys, node) != NULL) {
16096
+ if (pm_static_literals_add(&parser->newline_list, parser->start_line, keys, node) != NULL) {
15682
16097
  pm_parser_err_node(parser, node, PM_ERR_PATTERN_HASH_KEY_DUPLICATE);
15683
16098
  }
15684
16099
  }
@@ -15966,7 +16381,7 @@ parse_pattern_primitive(pm_parser_t *parser, pm_constant_id_list_t *captures, pm
15966
16381
 
15967
16382
  if (variable == NULL) {
15968
16383
  if (
15969
- (parser->version != PM_OPTIONS_VERSION_CRUBY_3_3_0) &&
16384
+ (parser->version != PM_OPTIONS_VERSION_CRUBY_3_3) &&
15970
16385
  !parser->current_scope->closed &&
15971
16386
  (parser->current_scope->numbered_parameters != PM_SCOPE_NUMBERED_PARAMETERS_DISALLOWED) &&
15972
16387
  pm_token_is_it(parser->previous.start, parser->previous.end)
@@ -16040,8 +16455,7 @@ parse_pattern_primitive(pm_parser_t *parser, pm_constant_id_list_t *captures, pm
16040
16455
  parser_lex(parser);
16041
16456
 
16042
16457
  expect1(parser, PM_TOKEN_CONSTANT, PM_ERR_CONSTANT_PATH_COLON_COLON_CONSTANT);
16043
- pm_node_t *child = (pm_node_t *) pm_constant_read_node_create(parser, &parser->previous);
16044
- pm_constant_path_node_t *node = pm_constant_path_node_create(parser, NULL, &delimiter, child);
16458
+ pm_constant_path_node_t *node = pm_constant_path_node_create(parser, NULL, &delimiter, &parser->previous);
16045
16459
 
16046
16460
  return parse_pattern_constant_path(parser, captures, (pm_node_t *) node);
16047
16461
  }
@@ -16367,7 +16781,7 @@ parse_strings(pm_parser_t *parser, pm_node_t *current) {
16367
16781
 
16368
16782
  pm_node_list_free(&parts);
16369
16783
  } else if (accept1(parser, PM_TOKEN_LABEL_END) && !state_is_arg_labeled) {
16370
- node = (pm_node_t *) pm_symbol_node_create_unescaped(parser, &opening, &content, &parser->previous, &unescaped, parse_symbol_encoding(parser, &unescaped));
16784
+ node = (pm_node_t *) pm_symbol_node_create_unescaped(parser, &opening, &content, &parser->previous, &unescaped, parse_symbol_encoding(parser, &content, &unescaped, true));
16371
16785
  } else if (match1(parser, PM_TOKEN_EOF)) {
16372
16786
  pm_parser_err_token(parser, &opening, PM_ERR_STRING_LITERAL_EOF);
16373
16787
  node = (pm_node_t *) pm_string_node_create_unescaped(parser, &opening, &content, &parser->current, &unescaped);
@@ -16393,7 +16807,7 @@ parse_strings(pm_parser_t *parser, pm_node_t *current) {
16393
16807
  pm_node_flag_set(node, parse_unescaped_encoding(parser));
16394
16808
  expect1(parser, PM_TOKEN_STRING_END, PM_ERR_STRING_LITERAL_EOF);
16395
16809
  } else if (accept1(parser, PM_TOKEN_LABEL_END)) {
16396
- node = (pm_node_t *) pm_symbol_node_create_unescaped(parser, &opening, &content, &parser->previous, &unescaped, parse_symbol_encoding(parser, &unescaped));
16810
+ node = (pm_node_t *) pm_symbol_node_create_unescaped(parser, &opening, &content, &parser->previous, &unescaped, parse_symbol_encoding(parser, &content, &unescaped, true));
16397
16811
  } else {
16398
16812
  // If we get here, then we have interpolation so we'll need
16399
16813
  // to create a string or symbol node with interpolation.
@@ -16475,11 +16889,11 @@ parse_strings(pm_parser_t *parser, pm_node_t *current) {
16475
16889
  pm_token_t bounds = not_provided(parser);
16476
16890
 
16477
16891
  pm_interpolated_string_node_t *container = pm_interpolated_string_node_create(parser, &bounds, NULL, &bounds);
16478
- pm_interpolated_string_node_append(parser, container, current);
16892
+ pm_interpolated_string_node_append(container, current);
16479
16893
  current = (pm_node_t *) container;
16480
16894
  }
16481
16895
 
16482
- pm_interpolated_string_node_append(parser, (pm_interpolated_string_node_t *) current, node);
16896
+ pm_interpolated_string_node_append((pm_interpolated_string_node_t *) current, node);
16483
16897
  }
16484
16898
  }
16485
16899
 
@@ -16724,13 +17138,13 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
16724
17138
  }
16725
17139
 
16726
17140
  element = (pm_node_t *) pm_keyword_hash_node_create(parser);
16727
- pm_static_literals_t literals = { 0 };
17141
+ pm_static_literals_t hash_keys = { 0 };
16728
17142
 
16729
17143
  if (!match8(parser, PM_TOKEN_EOF, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON, PM_TOKEN_EOF, PM_TOKEN_BRACE_RIGHT, PM_TOKEN_BRACKET_RIGHT, PM_TOKEN_KEYWORD_DO, PM_TOKEN_PARENTHESIS_RIGHT)) {
16730
- parse_assocs(parser, &literals, element);
17144
+ parse_assocs(parser, &hash_keys, element);
16731
17145
  }
16732
17146
 
16733
- pm_static_literals_free(&literals);
17147
+ pm_static_literals_free(&hash_keys);
16734
17148
  parsed_bare_hash = true;
16735
17149
  } else {
16736
17150
  element = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, false, PM_ERR_ARRAY_EXPRESSION);
@@ -16741,8 +17155,8 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
16741
17155
  }
16742
17156
 
16743
17157
  pm_keyword_hash_node_t *hash = pm_keyword_hash_node_create(parser);
16744
- pm_static_literals_t literals = { 0 };
16745
- pm_hash_key_static_literals_add(parser, &literals, element);
17158
+ pm_static_literals_t hash_keys = { 0 };
17159
+ pm_hash_key_static_literals_add(parser, &hash_keys, element);
16746
17160
 
16747
17161
  pm_token_t operator;
16748
17162
  if (parser->previous.type == PM_TOKEN_EQUAL_GREATER) {
@@ -16757,10 +17171,10 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
16757
17171
 
16758
17172
  element = (pm_node_t *) hash;
16759
17173
  if (accept1(parser, PM_TOKEN_COMMA) && !match1(parser, PM_TOKEN_BRACKET_RIGHT)) {
16760
- parse_assocs(parser, &literals, element);
17174
+ parse_assocs(parser, &hash_keys, element);
16761
17175
  }
16762
17176
 
16763
- pm_static_literals_free(&literals);
17177
+ pm_static_literals_free(&hash_keys);
16764
17178
  parsed_bare_hash = true;
16765
17179
  }
16766
17180
  }
@@ -16854,7 +17268,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
16854
17268
  return (pm_node_t *) multi_target;
16855
17269
  }
16856
17270
 
16857
- return parse_target_validate(parser, (pm_node_t *) multi_target);
17271
+ return parse_target_validate(parser, (pm_node_t *) multi_target, false);
16858
17272
  }
16859
17273
 
16860
17274
  // If we have a single statement and are ending on a right parenthesis
@@ -16920,14 +17334,30 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
16920
17334
  return (pm_node_t *) pm_parentheses_node_create(parser, &opening, (pm_node_t *) statements, &parser->previous);
16921
17335
  }
16922
17336
  case PM_TOKEN_BRACE_LEFT: {
17337
+ // If we were passed a current_hash_keys via the parser, then that
17338
+ // means we're already parsing a hash and we want to share the set
17339
+ // of hash keys with this inner hash we're about to parse for the
17340
+ // sake of warnings. We'll set it to NULL after we grab it to make
17341
+ // sure subsequent expressions don't use it. Effectively this is a
17342
+ // way of getting around passing it to every call to
17343
+ // parse_expression.
17344
+ pm_static_literals_t *current_hash_keys = parser->current_hash_keys;
17345
+ parser->current_hash_keys = NULL;
17346
+
16923
17347
  pm_accepts_block_stack_push(parser, true);
16924
17348
  parser_lex(parser);
16925
17349
 
16926
17350
  pm_hash_node_t *node = pm_hash_node_create(parser, &parser->previous);
16927
- pm_static_literals_t literals = { 0 };
16928
17351
 
16929
17352
  if (!match2(parser, PM_TOKEN_BRACE_RIGHT, PM_TOKEN_EOF)) {
16930
- parse_assocs(parser, &literals, (pm_node_t *) node);
17353
+ if (current_hash_keys != NULL) {
17354
+ parse_assocs(parser, current_hash_keys, (pm_node_t *) node);
17355
+ } else {
17356
+ pm_static_literals_t hash_keys = { 0 };
17357
+ parse_assocs(parser, &hash_keys, (pm_node_t *) node);
17358
+ pm_static_literals_free(&hash_keys);
17359
+ }
17360
+
16931
17361
  accept1(parser, PM_TOKEN_NEWLINE);
16932
17362
  }
16933
17363
 
@@ -16935,7 +17365,6 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
16935
17365
  expect1(parser, PM_TOKEN_BRACE_RIGHT, PM_ERR_HASH_TERM);
16936
17366
  pm_hash_node_closing_loc_set(node, &parser->previous);
16937
17367
 
16938
- pm_static_literals_free(&literals);
16939
17368
  return (pm_node_t *) node;
16940
17369
  }
16941
17370
  case PM_TOKEN_CHARACTER_LITERAL: {
@@ -17000,12 +17429,10 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
17000
17429
  }
17001
17430
  case PM_TOKEN_UCOLON_COLON: {
17002
17431
  parser_lex(parser);
17003
-
17004
17432
  pm_token_t delimiter = parser->previous;
17005
- expect1(parser, PM_TOKEN_CONSTANT, PM_ERR_CONSTANT_PATH_COLON_COLON_CONSTANT);
17006
17433
 
17007
- pm_node_t *constant = (pm_node_t *) pm_constant_read_node_create(parser, &parser->previous);
17008
- pm_node_t *node = (pm_node_t *)pm_constant_path_node_create(parser, NULL, &delimiter, constant);
17434
+ expect1(parser, PM_TOKEN_CONSTANT, PM_ERR_CONSTANT_PATH_COLON_COLON_CONSTANT);
17435
+ pm_node_t *node = (pm_node_t *) pm_constant_path_node_create(parser, NULL, &delimiter, &parser->previous);
17009
17436
 
17010
17437
  if ((binding_power == PM_BINDING_POWER_STATEMENT) && match1(parser, PM_TOKEN_COMMA)) {
17011
17438
  node = parse_targets_validate(parser, node, PM_BINDING_POWER_INDEX);
@@ -17165,8 +17592,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
17165
17592
  if (match2(parser, PM_TOKEN_HEREDOC_END, PM_TOKEN_EOF)) {
17166
17593
  // If we get here, then we have an empty heredoc. We'll create
17167
17594
  // an empty content token and return an empty string node.
17168
- lex_mode_pop(parser);
17169
- expect1(parser, PM_TOKEN_HEREDOC_END, PM_ERR_HEREDOC_TERM);
17595
+ expect1_heredoc_term(parser, lex_mode);
17170
17596
  pm_token_t content = parse_strings_empty_content(parser->previous.start);
17171
17597
 
17172
17598
  if (quote == PM_HEREDOC_QUOTE_BACKTICK) {
@@ -17207,8 +17633,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
17207
17633
  }
17208
17634
 
17209
17635
  node = (pm_node_t *) cast;
17210
- lex_mode_pop(parser);
17211
- expect1(parser, PM_TOKEN_HEREDOC_END, PM_ERR_HEREDOC_TERM);
17636
+ expect1_heredoc_term(parser, lex_mode);
17212
17637
  } else {
17213
17638
  // If we get here, then we have multiple parts in the heredoc,
17214
17639
  // so we'll need to create an interpolated string node to hold
@@ -17230,20 +17655,18 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
17230
17655
  pm_interpolated_x_string_node_t *cast = pm_interpolated_xstring_node_create(parser, &opening, &opening);
17231
17656
  cast->parts = parts;
17232
17657
 
17233
- lex_mode_pop(parser);
17234
- expect1(parser, PM_TOKEN_HEREDOC_END, PM_ERR_HEREDOC_TERM);
17235
-
17658
+ expect1_heredoc_term(parser, lex_mode);
17236
17659
  pm_interpolated_xstring_node_closing_set(cast, &parser->previous);
17660
+
17237
17661
  cast->base.location = cast->opening_loc;
17238
17662
  node = (pm_node_t *) cast;
17239
17663
  } else {
17240
17664
  pm_interpolated_string_node_t *cast = pm_interpolated_string_node_create(parser, &opening, &parts, &opening);
17241
17665
  pm_node_list_free(&parts);
17242
17666
 
17243
- lex_mode_pop(parser);
17244
- expect1(parser, PM_TOKEN_HEREDOC_END, PM_ERR_HEREDOC_TERM);
17245
-
17667
+ expect1_heredoc_term(parser, lex_mode);
17246
17668
  pm_interpolated_string_node_closing_set(cast, &parser->previous);
17669
+
17247
17670
  cast->base.location = cast->opening_loc;
17248
17671
  node = (pm_node_t *) cast;
17249
17672
  }
@@ -18145,7 +18568,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
18145
18568
  if (match1(parser, PM_TOKEN_COMMA)) {
18146
18569
  index = parse_targets(parser, index, PM_BINDING_POWER_INDEX);
18147
18570
  } else {
18148
- index = parse_target(parser, index);
18571
+ index = parse_target(parser, index, false);
18149
18572
  }
18150
18573
 
18151
18574
  context_pop(parser);
@@ -18267,9 +18690,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
18267
18690
  pm_token_t double_colon = parser->previous;
18268
18691
 
18269
18692
  expect1(parser, PM_TOKEN_CONSTANT, PM_ERR_CONSTANT_PATH_COLON_COLON_CONSTANT);
18270
- pm_node_t *constant = (pm_node_t *) pm_constant_read_node_create(parser, &parser->previous);
18271
-
18272
- constant_path = (pm_node_t *) pm_constant_path_node_create(parser, constant_path, &double_colon, constant);
18693
+ constant_path = (pm_node_t *) pm_constant_path_node_create(parser, constant_path, &double_colon, &parser->previous);
18273
18694
  }
18274
18695
 
18275
18696
  // Here we retrieve the name of the module. If it wasn't a constant,
@@ -18649,15 +19070,15 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
18649
19070
  // If we hit string content and the current node is
18650
19071
  // an interpolated string, then we need to append
18651
19072
  // the string content to the list of child nodes.
18652
- pm_interpolated_string_node_append(parser, (pm_interpolated_string_node_t *) current, string);
19073
+ pm_interpolated_string_node_append((pm_interpolated_string_node_t *) current, string);
18653
19074
  } else if (PM_NODE_TYPE_P(current, PM_STRING_NODE)) {
18654
19075
  // If we hit string content and the current node is
18655
19076
  // a string node, then we need to convert the
18656
19077
  // current node into an interpolated string and add
18657
19078
  // the string content to the list of child nodes.
18658
19079
  pm_interpolated_string_node_t *interpolated = pm_interpolated_string_node_create(parser, &opening, NULL, &closing);
18659
- pm_interpolated_string_node_append(parser, interpolated, current);
18660
- pm_interpolated_string_node_append(parser, interpolated, string);
19080
+ pm_interpolated_string_node_append(interpolated, current);
19081
+ pm_interpolated_string_node_append(interpolated, string);
18661
19082
  current = (pm_node_t *) interpolated;
18662
19083
  } else {
18663
19084
  assert(false && "unreachable");
@@ -18682,7 +19103,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
18682
19103
  pm_token_t opening = not_provided(parser);
18683
19104
  pm_token_t closing = not_provided(parser);
18684
19105
  pm_interpolated_string_node_t *interpolated = pm_interpolated_string_node_create(parser, &opening, NULL, &closing);
18685
- pm_interpolated_string_node_append(parser, interpolated, current);
19106
+ pm_interpolated_string_node_append(interpolated, current);
18686
19107
  current = (pm_node_t *) interpolated;
18687
19108
  } else {
18688
19109
  // If we hit an embedded variable and the current
@@ -18691,7 +19112,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
18691
19112
  }
18692
19113
 
18693
19114
  pm_node_t *part = parse_string_part(parser);
18694
- pm_interpolated_string_node_append(parser, (pm_interpolated_string_node_t *) current, part);
19115
+ pm_interpolated_string_node_append((pm_interpolated_string_node_t *) current, part);
18695
19116
  break;
18696
19117
  }
18697
19118
  case PM_TOKEN_EMBEXPR_BEGIN: {
@@ -18711,7 +19132,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
18711
19132
  pm_token_t opening = not_provided(parser);
18712
19133
  pm_token_t closing = not_provided(parser);
18713
19134
  pm_interpolated_string_node_t *interpolated = pm_interpolated_string_node_create(parser, &opening, NULL, &closing);
18714
- pm_interpolated_string_node_append(parser, interpolated, current);
19135
+ pm_interpolated_string_node_append(interpolated, current);
18715
19136
  current = (pm_node_t *) interpolated;
18716
19137
  } else if (PM_NODE_TYPE_P(current, PM_INTERPOLATED_STRING_NODE)) {
18717
19138
  // If we hit an embedded expression and the current
@@ -18722,7 +19143,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
18722
19143
  }
18723
19144
 
18724
19145
  pm_node_t *part = parse_string_part(parser);
18725
- pm_interpolated_string_node_append(parser, (pm_interpolated_string_node_t *) current, part);
19146
+ pm_interpolated_string_node_append((pm_interpolated_string_node_t *) current, part);
18726
19147
  break;
18727
19148
  }
18728
19149
  default:
@@ -18926,7 +19347,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
18926
19347
  if (match1(parser, PM_TOKEN_COMMA)) {
18927
19348
  return parse_targets_validate(parser, splat, PM_BINDING_POWER_INDEX);
18928
19349
  } else {
18929
- return parse_target_validate(parser, splat);
19350
+ return parse_target_validate(parser, splat, true);
18930
19351
  }
18931
19352
  }
18932
19353
  case PM_TOKEN_BANG: {
@@ -20059,8 +20480,7 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
20059
20480
  path = (pm_node_t *) pm_call_node_call_create(parser, node, &delimiter, &message, &arguments);
20060
20481
  } else {
20061
20482
  // Otherwise, this is a constant path. That would look like Foo::Bar.
20062
- pm_node_t *child = (pm_node_t *) pm_constant_read_node_create(parser, &parser->previous);
20063
- path = (pm_node_t *)pm_constant_path_node_create(parser, node, &delimiter, child);
20483
+ path = (pm_node_t *) pm_constant_path_node_create(parser, node, &delimiter, &parser->previous);
20064
20484
  }
20065
20485
 
20066
20486
  // If this is followed by a comma then it is a multiple assignment.
@@ -20099,9 +20519,8 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
20099
20519
  return (pm_node_t *) pm_call_node_shorthand_create(parser, node, &delimiter, &arguments);
20100
20520
  }
20101
20521
  default: {
20102
- pm_parser_err_token(parser, &delimiter, PM_ERR_CONSTANT_PATH_COLON_COLON_CONSTANT);
20103
- pm_node_t *child = (pm_node_t *) pm_missing_node_create(parser, delimiter.start, delimiter.end);
20104
- return (pm_node_t *)pm_constant_path_node_create(parser, node, &delimiter, child);
20522
+ expect1(parser, PM_TOKEN_CONSTANT, PM_ERR_CONSTANT_PATH_COLON_COLON_CONSTANT);
20523
+ return (pm_node_t *) pm_constant_path_node_create(parser, node, &delimiter, &parser->previous);
20105
20524
  }
20106
20525
  }
20107
20526
  }
@@ -21246,25 +21665,28 @@ pm_parser_errors_format(const pm_parser_t *parser, const pm_list_t *error_list,
21246
21665
  pm_buffer_append_string(buffer, error_format.blank_prefix, error_format.blank_prefix_length);
21247
21666
 
21248
21667
  size_t column = 0;
21249
- while (column < error->column_end) {
21250
- if (column < error->column_start) {
21251
- pm_buffer_append_byte(buffer, ' ');
21252
- } else {
21253
- const uint8_t caret = column == error->column_start ? '^' : '~';
21668
+ while (column < error->column_start) {
21669
+ pm_buffer_append_byte(buffer, ' ');
21254
21670
 
21255
- if (colorize) {
21256
- pm_buffer_append_string(buffer, PM_COLOR_RED, 7);
21257
- pm_buffer_append_byte(buffer, caret);
21258
- pm_buffer_append_string(buffer, PM_COLOR_RESET, 3);
21259
- } else {
21260
- pm_buffer_append_byte(buffer, caret);
21261
- }
21262
- }
21671
+ size_t char_width = encoding->char_width(start + column, parser->end - (start + column));
21672
+ column += (char_width == 0 ? 1 : char_width);
21673
+ }
21674
+
21675
+ if (colorize) pm_buffer_append_string(buffer, PM_COLOR_RED, 7);
21676
+ pm_buffer_append_byte(buffer, '^');
21677
+
21678
+ size_t char_width = encoding->char_width(start + column, parser->end - (start + column));
21679
+ column += (char_width == 0 ? 1 : char_width);
21680
+
21681
+ while (column < error->column_end) {
21682
+ pm_buffer_append_byte(buffer, '~');
21263
21683
 
21264
21684
  size_t char_width = encoding->char_width(start + column, parser->end - (start + column));
21265
21685
  column += (char_width == 0 ? 1 : char_width);
21266
21686
  }
21267
21687
 
21688
+ if (colorize) pm_buffer_append_string(buffer, PM_COLOR_RESET, 3);
21689
+
21268
21690
  if (inline_messages) {
21269
21691
  pm_buffer_append_byte(buffer, ' ');
21270
21692
  assert(error->error != NULL);