prism 0.26.0 → 0.28.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (67) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +45 -1
  3. data/Makefile +3 -2
  4. data/config.yml +305 -20
  5. data/docs/configuration.md +1 -0
  6. data/ext/prism/api_node.c +884 -879
  7. data/ext/prism/extconf.rb +23 -4
  8. data/ext/prism/extension.c +16 -9
  9. data/ext/prism/extension.h +1 -1
  10. data/include/prism/ast.h +298 -9
  11. data/include/prism/diagnostic.h +15 -5
  12. data/include/prism/options.h +2 -2
  13. data/include/prism/parser.h +10 -0
  14. data/include/prism/static_literals.h +8 -6
  15. data/include/prism/version.h +2 -2
  16. data/lib/prism/dot_visitor.rb +22 -6
  17. data/lib/prism/dsl.rb +8 -8
  18. data/lib/prism/ffi.rb +4 -4
  19. data/lib/prism/inspect_visitor.rb +2156 -0
  20. data/lib/prism/lex_compat.rb +18 -1
  21. data/lib/prism/mutation_compiler.rb +2 -2
  22. data/lib/prism/node.rb +2345 -1964
  23. data/lib/prism/node_ext.rb +34 -5
  24. data/lib/prism/parse_result/newlines.rb +0 -2
  25. data/lib/prism/parse_result.rb +137 -13
  26. data/lib/prism/pattern.rb +12 -6
  27. data/lib/prism/polyfill/byteindex.rb +13 -0
  28. data/lib/prism/polyfill/unpack1.rb +14 -0
  29. data/lib/prism/reflection.rb +21 -31
  30. data/lib/prism/serialize.rb +27 -17
  31. data/lib/prism/translation/parser/compiler.rb +34 -15
  32. data/lib/prism/translation/parser.rb +6 -6
  33. data/lib/prism/translation/ripper.rb +72 -68
  34. data/lib/prism/translation/ruby_parser.rb +69 -31
  35. data/lib/prism.rb +3 -2
  36. data/prism.gemspec +36 -38
  37. data/rbi/prism/compiler.rbi +3 -5
  38. data/rbi/prism/inspect_visitor.rbi +12 -0
  39. data/rbi/prism/node.rbi +359 -321
  40. data/rbi/prism/parse_result.rbi +85 -34
  41. data/rbi/prism/reflection.rbi +7 -13
  42. data/rbi/prism/translation/ripper.rbi +1 -11
  43. data/rbi/prism.rbi +9 -9
  44. data/sig/prism/dsl.rbs +3 -3
  45. data/sig/prism/inspect_visitor.rbs +22 -0
  46. data/sig/prism/node.rbs +68 -48
  47. data/sig/prism/parse_result.rbs +42 -10
  48. data/sig/prism/reflection.rbs +2 -8
  49. data/sig/prism/serialize.rbs +2 -3
  50. data/sig/prism.rbs +9 -9
  51. data/src/diagnostic.c +44 -24
  52. data/src/node.c +41 -16
  53. data/src/options.c +2 -2
  54. data/src/prettyprint.c +61 -18
  55. data/src/prism.c +623 -188
  56. data/src/serialize.c +5 -2
  57. data/src/static_literals.c +120 -34
  58. data/src/token_type.c +4 -4
  59. data/src/util/pm_integer.c +9 -2
  60. metadata +7 -9
  61. data/lib/prism/node_inspector.rb +0 -68
  62. data/lib/prism/polyfill/string.rb +0 -12
  63. data/rbi/prism/desugar_compiler.rbi +0 -5
  64. data/rbi/prism/mutation_compiler.rbi +0 -5
  65. data/rbi/prism/translation/parser/compiler.rbi +0 -13
  66. data/rbi/prism/translation/ripper/ripper_compiler.rbi +0 -5
  67. data/rbi/prism/translation/ruby_parser.rbi +0 -11
data/src/prism.c CHANGED
@@ -672,6 +672,26 @@ pm_parser_warn_node(pm_parser_t *parser, const pm_node_t *node, pm_diagnostic_id
672
672
  #define PM_PARSER_WARN_NODE_FORMAT(parser, node, diag_id, ...) \
673
673
  PM_PARSER_WARN_FORMAT(parser, (node)->location.start, (node)->location.end, diag_id, __VA_ARGS__)
674
674
 
675
+ /**
676
+ * Add an error for an expected heredoc terminator. This is a special function
677
+ * only because it grabs its location off of a lex mode instead of a node or a
678
+ * token.
679
+ */
680
+ static void
681
+ pm_parser_err_heredoc_term(pm_parser_t *parser, pm_lex_mode_t *lex_mode) {
682
+ const uint8_t *ident_start = lex_mode->as.heredoc.ident_start;
683
+ size_t ident_length = lex_mode->as.heredoc.ident_length;
684
+
685
+ PM_PARSER_ERR_FORMAT(
686
+ parser,
687
+ ident_start,
688
+ ident_start + ident_length,
689
+ PM_ERR_HEREDOC_TERM,
690
+ (int) ident_length,
691
+ (const char *) ident_start
692
+ );
693
+ }
694
+
675
695
  /******************************************************************************/
676
696
  /* Scope-related functions */
677
697
  /******************************************************************************/
@@ -1405,7 +1425,7 @@ pm_conditional_predicate_warn_write_literal_p(const pm_node_t *node) {
1405
1425
  static inline void
1406
1426
  pm_conditional_predicate_warn_write_literal(pm_parser_t *parser, const pm_node_t *node) {
1407
1427
  if (pm_conditional_predicate_warn_write_literal_p(node)) {
1408
- pm_parser_warn_node(parser, node, parser->version == PM_OPTIONS_VERSION_CRUBY_3_3_0 ? PM_WARN_EQUAL_IN_CONDITIONAL_3_3_0 : PM_WARN_EQUAL_IN_CONDITIONAL);
1428
+ pm_parser_warn_node(parser, node, parser->version == PM_OPTIONS_VERSION_CRUBY_3_3 ? PM_WARN_EQUAL_IN_CONDITIONAL_3_3 : PM_WARN_EQUAL_IN_CONDITIONAL);
1409
1429
  }
1410
1430
  }
1411
1431
 
@@ -2923,6 +2943,29 @@ pm_call_and_write_node_create(pm_parser_t *parser, pm_call_node_t *target, const
2923
2943
  return node;
2924
2944
  }
2925
2945
 
2946
+ /**
2947
+ * Validate that index expressions do not have keywords or blocks if we are
2948
+ * parsing as Ruby 3.4+.
2949
+ */
2950
+ static void
2951
+ pm_index_arguments_check(pm_parser_t *parser, const pm_arguments_node_t *arguments, const pm_node_t *block) {
2952
+ if (parser->version != PM_OPTIONS_VERSION_CRUBY_3_3) {
2953
+ if (arguments != NULL && PM_NODE_FLAG_P(arguments, PM_ARGUMENTS_NODE_FLAGS_CONTAINS_KEYWORDS)) {
2954
+ pm_node_t *node;
2955
+ PM_NODE_LIST_FOREACH(&arguments->arguments, index, node) {
2956
+ if (PM_NODE_TYPE_P(node, PM_KEYWORD_HASH_NODE)) {
2957
+ pm_parser_err_node(parser, node, PM_ERR_UNEXPECTED_INDEX_KEYWORDS);
2958
+ break;
2959
+ }
2960
+ }
2961
+ }
2962
+
2963
+ if (block != NULL) {
2964
+ pm_parser_err_node(parser, block, PM_ERR_UNEXPECTED_INDEX_BLOCK);
2965
+ }
2966
+ }
2967
+ }
2968
+
2926
2969
  /**
2927
2970
  * Allocate and initialize a new IndexAndWriteNode node.
2928
2971
  */
@@ -2931,6 +2974,8 @@ pm_index_and_write_node_create(pm_parser_t *parser, pm_call_node_t *target, cons
2931
2974
  assert(operator->type == PM_TOKEN_AMPERSAND_AMPERSAND_EQUAL);
2932
2975
  pm_index_and_write_node_t *node = PM_ALLOC_NODE(parser, pm_index_and_write_node_t);
2933
2976
 
2977
+ pm_index_arguments_check(parser, target->arguments, target->block);
2978
+
2934
2979
  *node = (pm_index_and_write_node_t) {
2935
2980
  {
2936
2981
  .type = PM_INDEX_AND_WRITE_NODE,
@@ -3002,6 +3047,8 @@ static pm_index_operator_write_node_t *
3002
3047
  pm_index_operator_write_node_create(pm_parser_t *parser, pm_call_node_t *target, const pm_token_t *operator, pm_node_t *value) {
3003
3048
  pm_index_operator_write_node_t *node = PM_ALLOC_NODE(parser, pm_index_operator_write_node_t);
3004
3049
 
3050
+ pm_index_arguments_check(parser, target->arguments, target->block);
3051
+
3005
3052
  *node = (pm_index_operator_write_node_t) {
3006
3053
  {
3007
3054
  .type = PM_INDEX_OPERATOR_WRITE_NODE,
@@ -3075,6 +3122,8 @@ pm_index_or_write_node_create(pm_parser_t *parser, pm_call_node_t *target, const
3075
3122
  assert(operator->type == PM_TOKEN_PIPE_PIPE_EQUAL);
3076
3123
  pm_index_or_write_node_t *node = PM_ALLOC_NODE(parser, pm_index_or_write_node_t);
3077
3124
 
3125
+ pm_index_arguments_check(parser, target->arguments, target->block);
3126
+
3078
3127
  *node = (pm_index_or_write_node_t) {
3079
3128
  {
3080
3129
  .type = PM_INDEX_OR_WRITE_NODE,
@@ -3139,6 +3188,8 @@ pm_index_target_node_create(pm_parser_t *parser, pm_call_node_t *target) {
3139
3188
  pm_index_target_node_t *node = PM_ALLOC_NODE(parser, pm_index_target_node_t);
3140
3189
  pm_node_flags_t flags = target->base.flags;
3141
3190
 
3191
+ pm_index_arguments_check(parser, target->arguments, target->block);
3192
+
3142
3193
  *node = (pm_index_target_node_t) {
3143
3194
  {
3144
3195
  .type = PM_INDEX_TARGET_NODE,
@@ -3510,22 +3561,27 @@ pm_constant_path_or_write_node_create(pm_parser_t *parser, pm_constant_path_node
3510
3561
  * Allocate and initialize a new ConstantPathNode node.
3511
3562
  */
3512
3563
  static pm_constant_path_node_t *
3513
- pm_constant_path_node_create(pm_parser_t *parser, pm_node_t *parent, const pm_token_t *delimiter, pm_node_t *child) {
3564
+ pm_constant_path_node_create(pm_parser_t *parser, pm_node_t *parent, const pm_token_t *delimiter, const pm_token_t *name_token) {
3514
3565
  pm_assert_value_expression(parser, parent);
3515
-
3516
3566
  pm_constant_path_node_t *node = PM_ALLOC_NODE(parser, pm_constant_path_node_t);
3517
3567
 
3568
+ pm_constant_id_t name = PM_CONSTANT_ID_UNSET;
3569
+ if (name_token->type == PM_TOKEN_CONSTANT) {
3570
+ name = pm_parser_constant_id_token(parser, name_token);
3571
+ }
3572
+
3518
3573
  *node = (pm_constant_path_node_t) {
3519
3574
  {
3520
3575
  .type = PM_CONSTANT_PATH_NODE,
3521
3576
  .location = {
3522
3577
  .start = parent == NULL ? delimiter->start : parent->location.start,
3523
- .end = child->location.end
3578
+ .end = name_token->end
3524
3579
  },
3525
3580
  },
3526
3581
  .parent = parent,
3527
- .child = child,
3528
- .delimiter_loc = PM_LOCATION_TOKEN_VALUE(delimiter)
3582
+ .name = name,
3583
+ .delimiter_loc = PM_LOCATION_TOKEN_VALUE(delimiter),
3584
+ .name_loc = PM_LOCATION_TOKEN_VALUE(name_token)
3529
3585
  };
3530
3586
 
3531
3587
  return node;
@@ -3716,6 +3772,113 @@ pm_def_node_receiver_check(pm_parser_t *parser, const pm_node_t *node) {
3716
3772
  }
3717
3773
  }
3718
3774
 
3775
+ /**
3776
+ * When a method body is created, we want to check if the last statement is a
3777
+ * return or a statement that houses a return. If it is, then we want to mark
3778
+ * that return as being redundant so that we can compile it differently but also
3779
+ * so that we can indicate that to the user.
3780
+ */
3781
+ static void
3782
+ pm_def_node_body_redundant_return(pm_node_t *node) {
3783
+ switch (PM_NODE_TYPE(node)) {
3784
+ case PM_RETURN_NODE:
3785
+ node->flags |= PM_RETURN_NODE_FLAGS_REDUNDANT;
3786
+ break;
3787
+ case PM_BEGIN_NODE: {
3788
+ pm_begin_node_t *cast = (pm_begin_node_t *) node;
3789
+
3790
+ if (cast->statements != NULL && cast->else_clause == NULL) {
3791
+ pm_def_node_body_redundant_return((pm_node_t *) cast->statements);
3792
+ }
3793
+ break;
3794
+ }
3795
+ case PM_STATEMENTS_NODE: {
3796
+ pm_statements_node_t *cast = (pm_statements_node_t *) node;
3797
+
3798
+ if (cast->body.size > 0) {
3799
+ pm_def_node_body_redundant_return(cast->body.nodes[cast->body.size - 1]);
3800
+ }
3801
+ break;
3802
+ }
3803
+ case PM_IF_NODE: {
3804
+ pm_if_node_t *cast = (pm_if_node_t *) node;
3805
+
3806
+ if (cast->statements != NULL) {
3807
+ pm_def_node_body_redundant_return((pm_node_t *) cast->statements);
3808
+ }
3809
+
3810
+ if (cast->consequent != NULL) {
3811
+ pm_def_node_body_redundant_return(cast->consequent);
3812
+ }
3813
+ break;
3814
+ }
3815
+ case PM_UNLESS_NODE: {
3816
+ pm_unless_node_t *cast = (pm_unless_node_t *) node;
3817
+
3818
+ if (cast->statements != NULL) {
3819
+ pm_def_node_body_redundant_return((pm_node_t *) cast->statements);
3820
+ }
3821
+
3822
+ if (cast->consequent != NULL) {
3823
+ pm_def_node_body_redundant_return((pm_node_t *) cast->consequent);
3824
+ }
3825
+ break;
3826
+ }
3827
+ case PM_ELSE_NODE: {
3828
+ pm_else_node_t *cast = (pm_else_node_t *) node;
3829
+
3830
+ if (cast->statements != NULL) {
3831
+ pm_def_node_body_redundant_return((pm_node_t *) cast->statements);
3832
+ }
3833
+ break;
3834
+ }
3835
+ case PM_CASE_NODE: {
3836
+ pm_case_node_t *cast = (pm_case_node_t *) node;
3837
+ pm_node_t *condition;
3838
+
3839
+ PM_NODE_LIST_FOREACH(&cast->conditions, index, condition) {
3840
+ pm_def_node_body_redundant_return(condition);
3841
+ }
3842
+
3843
+ if (cast->consequent != NULL) {
3844
+ pm_def_node_body_redundant_return((pm_node_t *) cast->consequent);
3845
+ }
3846
+ break;
3847
+ }
3848
+ case PM_WHEN_NODE: {
3849
+ pm_when_node_t *cast = (pm_when_node_t *) node;
3850
+
3851
+ if (cast->statements != NULL) {
3852
+ pm_def_node_body_redundant_return((pm_node_t *) cast->statements);
3853
+ }
3854
+ break;
3855
+ }
3856
+ case PM_CASE_MATCH_NODE: {
3857
+ pm_case_match_node_t *cast = (pm_case_match_node_t *) node;
3858
+ pm_node_t *condition;
3859
+
3860
+ PM_NODE_LIST_FOREACH(&cast->conditions, index, condition) {
3861
+ pm_def_node_body_redundant_return(condition);
3862
+ }
3863
+
3864
+ if (cast->consequent != NULL) {
3865
+ pm_def_node_body_redundant_return((pm_node_t *) cast->consequent);
3866
+ }
3867
+ break;
3868
+ }
3869
+ case PM_IN_NODE: {
3870
+ pm_in_node_t *cast = (pm_in_node_t *) node;
3871
+
3872
+ if (cast->statements != NULL) {
3873
+ pm_def_node_body_redundant_return((pm_node_t *) cast->statements);
3874
+ }
3875
+ break;
3876
+ }
3877
+ default:
3878
+ break;
3879
+ }
3880
+ }
3881
+
3719
3882
  /**
3720
3883
  * Allocate and initialize a new DefNode node.
3721
3884
  */
@@ -3748,6 +3911,10 @@ pm_def_node_create(
3748
3911
  pm_def_node_receiver_check(parser, receiver);
3749
3912
  }
3750
3913
 
3914
+ if (body != NULL) {
3915
+ pm_def_node_body_redundant_return(body);
3916
+ }
3917
+
3751
3918
  *node = (pm_def_node_t) {
3752
3919
  {
3753
3920
  .type = PM_DEF_NODE,
@@ -4922,6 +5089,50 @@ pm_instance_variable_write_node_create(pm_parser_t *parser, pm_instance_variable
4922
5089
  return node;
4923
5090
  }
4924
5091
 
5092
+ /**
5093
+ * Append a part into a list of string parts. Importantly this handles nested
5094
+ * interpolated strings by not necessarily removing the marker for static
5095
+ * literals.
5096
+ */
5097
+ static void
5098
+ pm_interpolated_node_append(pm_node_t *node, pm_node_list_t *parts, pm_node_t *part) {
5099
+ switch (PM_NODE_TYPE(part)) {
5100
+ case PM_STRING_NODE:
5101
+ pm_node_flag_set(part, PM_NODE_FLAG_STATIC_LITERAL | PM_STRING_FLAGS_FROZEN);
5102
+ break;
5103
+ case PM_EMBEDDED_STATEMENTS_NODE: {
5104
+ pm_embedded_statements_node_t *cast = (pm_embedded_statements_node_t *) part;
5105
+ pm_node_t *embedded = (cast->statements != NULL && cast->statements->body.size == 1) ? cast->statements->body.nodes[0] : NULL;
5106
+
5107
+ if (embedded == NULL) {
5108
+ // If there are no statements or more than one statement, then
5109
+ // we lose the static literal flag.
5110
+ pm_node_flag_unset(node, PM_NODE_FLAG_STATIC_LITERAL);
5111
+ } else if (PM_NODE_TYPE_P(embedded, PM_STRING_NODE)) {
5112
+ // If the embedded statement is a string, then we can keep the
5113
+ // static literal flag and mark the string as frozen.
5114
+ pm_node_flag_set(embedded, PM_NODE_FLAG_STATIC_LITERAL | PM_STRING_FLAGS_FROZEN);
5115
+ } else if (PM_NODE_TYPE_P(embedded, PM_INTERPOLATED_STRING_NODE) && PM_NODE_FLAG_P(embedded, PM_NODE_FLAG_STATIC_LITERAL)) {
5116
+ // If the embedded statement is an interpolated string and it's
5117
+ // a static literal, then we can keep the static literal flag.
5118
+ } else {
5119
+ // Otherwise we lose the static literal flag.
5120
+ pm_node_flag_unset(node, PM_NODE_FLAG_STATIC_LITERAL);
5121
+ }
5122
+
5123
+ break;
5124
+ }
5125
+ case PM_EMBEDDED_VARIABLE_NODE:
5126
+ pm_node_flag_unset((pm_node_t *) node, PM_NODE_FLAG_STATIC_LITERAL);
5127
+ break;
5128
+ default:
5129
+ assert(false && "unexpected node type");
5130
+ break;
5131
+ }
5132
+
5133
+ pm_node_list_append(parts, part);
5134
+ }
5135
+
4925
5136
  /**
4926
5137
  * Allocate a new InterpolatedRegularExpressionNode node.
4927
5138
  */
@@ -4955,54 +5166,113 @@ pm_interpolated_regular_expression_node_append(pm_interpolated_regular_expressio
4955
5166
  node->base.location.end = part->location.end;
4956
5167
  }
4957
5168
 
4958
- if (PM_NODE_TYPE_P(part, PM_STRING_NODE)) {
4959
- pm_node_flag_set(part, PM_NODE_FLAG_STATIC_LITERAL | PM_STRING_FLAGS_FROZEN);
4960
- }
4961
-
4962
- if (!PM_NODE_FLAG_P(part, PM_NODE_FLAG_STATIC_LITERAL)) {
4963
- pm_node_flag_unset((pm_node_t *) node, PM_NODE_FLAG_STATIC_LITERAL);
4964
- }
4965
-
4966
- pm_node_list_append(&node->parts, part);
5169
+ pm_interpolated_node_append((pm_node_t *) node, &node->parts, part);
4967
5170
  }
4968
5171
 
4969
5172
  static inline void
4970
5173
  pm_interpolated_regular_expression_node_closing_set(pm_parser_t *parser, pm_interpolated_regular_expression_node_t *node, const pm_token_t *closing) {
4971
5174
  node->closing_loc = PM_LOCATION_TOKEN_VALUE(closing);
4972
5175
  node->base.location.end = closing->end;
4973
- pm_node_flag_set((pm_node_t *)node, pm_regular_expression_flags_create(parser, closing));
5176
+ pm_node_flag_set((pm_node_t *) node, pm_regular_expression_flags_create(parser, closing));
4974
5177
  }
4975
5178
 
4976
5179
  /**
4977
5180
  * Append a part to an InterpolatedStringNode node.
5181
+ *
5182
+ * This has some somewhat complicated semantics, because we need to update
5183
+ * multiple flags that have somewhat confusing interactions.
5184
+ *
5185
+ * PM_NODE_FLAG_STATIC_LITERAL indicates that the node should be treated as a
5186
+ * single static literal string that can be pushed onto the stack on its own.
5187
+ * Note that this doesn't necessarily mean that the string will be frozen or
5188
+ * not; the instructions in CRuby will be either putobject or putstring,
5189
+ * depending on the combination of `--enable-frozen-string-literal`,
5190
+ * `# frozen_string_literal: true`, and whether or not there is interpolation.
5191
+ *
5192
+ * PM_INTERPOLATED_STRING_NODE_FLAGS_FROZEN indicates that the string should be
5193
+ * explicitly frozen. This will only happen if the string is comprised entirely
5194
+ * of string parts that are themselves static literals and frozen.
5195
+ *
5196
+ * PM_INTERPOLATED_STRING_NODE_FLAGS_MUTABLE indicates that the string should
5197
+ * be explicitly marked as mutable. This will happen from
5198
+ * `--disable-frozen-string-literal` or `# frozen_string_literal: false`. This
5199
+ * is necessary to indicate that the string should be left up to the runtime,
5200
+ * which could potentially use a chilled string otherwise.
4978
5201
  */
4979
5202
  static inline void
4980
- pm_interpolated_string_node_append(pm_parser_t *parser, pm_interpolated_string_node_t *node, pm_node_t *part) {
5203
+ pm_interpolated_string_node_append(pm_interpolated_string_node_t *node, pm_node_t *part) {
5204
+ #define CLEAR_FLAGS(node) \
5205
+ node->base.flags = (pm_node_flags_t) (node->base.flags & ~(PM_NODE_FLAG_STATIC_LITERAL | PM_INTERPOLATED_STRING_NODE_FLAGS_FROZEN | PM_INTERPOLATED_STRING_NODE_FLAGS_MUTABLE))
5206
+
5207
+ #define MUTABLE_FLAGS(node) \
5208
+ node->base.flags = (pm_node_flags_t) ((node->base.flags | PM_INTERPOLATED_STRING_NODE_FLAGS_MUTABLE) & ~PM_INTERPOLATED_STRING_NODE_FLAGS_FROZEN);
5209
+
4981
5210
  if (node->parts.size == 0 && node->opening_loc.start == NULL) {
4982
5211
  node->base.location.start = part->location.start;
4983
5212
  }
4984
5213
 
4985
- if (PM_NODE_TYPE_P(part, PM_STRING_NODE)) {
4986
- pm_node_flag_set(part, PM_NODE_FLAG_STATIC_LITERAL | PM_STRING_FLAGS_FROZEN);
4987
- }
5214
+ node->base.location.end = MAX(node->base.location.end, part->location.end);
5215
+
5216
+ switch (PM_NODE_TYPE(part)) {
5217
+ case PM_STRING_NODE:
5218
+ pm_node_flag_set(part, PM_NODE_FLAG_STATIC_LITERAL | PM_STRING_FLAGS_FROZEN);
5219
+ break;
5220
+ case PM_INTERPOLATED_STRING_NODE:
5221
+ if (PM_NODE_FLAG_P(part, PM_NODE_FLAG_STATIC_LITERAL)) {
5222
+ // If the string that we're concatenating is a static literal,
5223
+ // then we can keep the static literal flag for this string.
5224
+ } else {
5225
+ // Otherwise, we lose the static literal flag here and we should
5226
+ // also clear the mutability flags.
5227
+ CLEAR_FLAGS(node);
5228
+ }
5229
+ break;
5230
+ case PM_EMBEDDED_STATEMENTS_NODE: {
5231
+ pm_embedded_statements_node_t *cast = (pm_embedded_statements_node_t *) part;
5232
+ pm_node_t *embedded = (cast->statements != NULL && cast->statements->body.size == 1) ? cast->statements->body.nodes[0] : NULL;
5233
+
5234
+ if (embedded == NULL) {
5235
+ // If we're embedding multiple statements or no statements, then
5236
+ // the string is not longer a static literal.
5237
+ CLEAR_FLAGS(node);
5238
+ } else if (PM_NODE_TYPE_P(embedded, PM_STRING_NODE)) {
5239
+ // If the embedded statement is a string, then we can make that
5240
+ // string as frozen and static literal, and not touch the static
5241
+ // literal status of this string.
5242
+ pm_node_flag_set(embedded, PM_NODE_FLAG_STATIC_LITERAL | PM_STRING_FLAGS_FROZEN);
5243
+
5244
+ if (PM_NODE_FLAG_P(node, PM_NODE_FLAG_STATIC_LITERAL)) {
5245
+ MUTABLE_FLAGS(node);
5246
+ }
5247
+ } else if (PM_NODE_TYPE_P(embedded, PM_INTERPOLATED_STRING_NODE) && PM_NODE_FLAG_P(embedded, PM_NODE_FLAG_STATIC_LITERAL)) {
5248
+ // If the embedded statement is an interpolated string, but that
5249
+ // string is marked as static literal, then we can keep our
5250
+ // static literal status for this string.
5251
+ if (PM_NODE_FLAG_P(node, PM_NODE_FLAG_STATIC_LITERAL)) {
5252
+ MUTABLE_FLAGS(node);
5253
+ }
5254
+ } else {
5255
+ // In all other cases, we lose the static literal flag here and
5256
+ // become mutable.
5257
+ CLEAR_FLAGS(node);
5258
+ }
4988
5259
 
4989
- if (!PM_NODE_FLAG_P(part, PM_NODE_FLAG_STATIC_LITERAL)) {
4990
- pm_node_flag_unset((pm_node_t *) node, PM_NODE_FLAG_STATIC_LITERAL | PM_INTERPOLATED_STRING_NODE_FLAGS_FROZEN | PM_INTERPOLATED_STRING_NODE_FLAGS_MUTABLE);
5260
+ break;
5261
+ }
5262
+ case PM_EMBEDDED_VARIABLE_NODE:
5263
+ // Embedded variables clear static literal, which means we also
5264
+ // should clear the mutability flags.
5265
+ CLEAR_FLAGS(node);
5266
+ break;
5267
+ default:
5268
+ assert(false && "unexpected node type");
5269
+ break;
4991
5270
  }
4992
5271
 
4993
5272
  pm_node_list_append(&node->parts, part);
4994
- node->base.location.end = MAX(node->base.location.end, part->location.end);
4995
5273
 
4996
- if (PM_NODE_FLAG_P(node, PM_NODE_FLAG_STATIC_LITERAL)) {
4997
- switch (parser->frozen_string_literal) {
4998
- case PM_OPTIONS_FROZEN_STRING_LITERAL_DISABLED:
4999
- pm_node_flag_set((pm_node_t *) node, PM_INTERPOLATED_STRING_NODE_FLAGS_MUTABLE);
5000
- break;
5001
- case PM_OPTIONS_FROZEN_STRING_LITERAL_ENABLED:
5002
- pm_node_flag_set((pm_node_t *) node, PM_INTERPOLATED_STRING_NODE_FLAGS_FROZEN);
5003
- break;
5004
- }
5005
- }
5274
+ #undef CLEAR_FLAGS
5275
+ #undef MUTABLE_FLAGS
5006
5276
  }
5007
5277
 
5008
5278
  /**
@@ -5011,11 +5281,21 @@ pm_interpolated_string_node_append(pm_parser_t *parser, pm_interpolated_string_n
5011
5281
  static pm_interpolated_string_node_t *
5012
5282
  pm_interpolated_string_node_create(pm_parser_t *parser, const pm_token_t *opening, const pm_node_list_t *parts, const pm_token_t *closing) {
5013
5283
  pm_interpolated_string_node_t *node = PM_ALLOC_NODE(parser, pm_interpolated_string_node_t);
5284
+ pm_node_flags_t flags = PM_NODE_FLAG_STATIC_LITERAL;
5285
+
5286
+ switch (parser->frozen_string_literal) {
5287
+ case PM_OPTIONS_FROZEN_STRING_LITERAL_DISABLED:
5288
+ flags |= PM_INTERPOLATED_STRING_NODE_FLAGS_MUTABLE;
5289
+ break;
5290
+ case PM_OPTIONS_FROZEN_STRING_LITERAL_ENABLED:
5291
+ flags |= PM_INTERPOLATED_STRING_NODE_FLAGS_FROZEN;
5292
+ break;
5293
+ }
5014
5294
 
5015
5295
  *node = (pm_interpolated_string_node_t) {
5016
5296
  {
5017
5297
  .type = PM_INTERPOLATED_STRING_NODE,
5018
- .flags = PM_NODE_FLAG_STATIC_LITERAL,
5298
+ .flags = flags,
5019
5299
  .location = {
5020
5300
  .start = opening->start,
5021
5301
  .end = closing->end,
@@ -5029,7 +5309,7 @@ pm_interpolated_string_node_create(pm_parser_t *parser, const pm_token_t *openin
5029
5309
  if (parts != NULL) {
5030
5310
  pm_node_t *part;
5031
5311
  PM_NODE_LIST_FOREACH(parts, index, part) {
5032
- pm_interpolated_string_node_append(parser, node, part);
5312
+ pm_interpolated_string_node_append(node, part);
5033
5313
  }
5034
5314
  }
5035
5315
 
@@ -5051,15 +5331,7 @@ pm_interpolated_symbol_node_append(pm_interpolated_symbol_node_t *node, pm_node_
5051
5331
  node->base.location.start = part->location.start;
5052
5332
  }
5053
5333
 
5054
- if (PM_NODE_TYPE_P(part, PM_STRING_NODE)) {
5055
- pm_node_flag_set(part, PM_NODE_FLAG_STATIC_LITERAL | PM_STRING_FLAGS_FROZEN);
5056
- }
5057
-
5058
- if (!PM_NODE_FLAG_P(part, PM_NODE_FLAG_STATIC_LITERAL)) {
5059
- pm_node_flag_unset((pm_node_t *) node, PM_NODE_FLAG_STATIC_LITERAL);
5060
- }
5061
-
5062
- pm_node_list_append(&node->parts, part);
5334
+ pm_interpolated_node_append((pm_node_t *) node, &node->parts, part);
5063
5335
  node->base.location.end = MAX(node->base.location.end, part->location.end);
5064
5336
  }
5065
5337
 
@@ -5125,11 +5397,7 @@ pm_interpolated_xstring_node_create(pm_parser_t *parser, const pm_token_t *openi
5125
5397
 
5126
5398
  static inline void
5127
5399
  pm_interpolated_xstring_node_append(pm_interpolated_x_string_node_t *node, pm_node_t *part) {
5128
- if (PM_NODE_TYPE_P(part, PM_STRING_NODE)) {
5129
- pm_node_flag_set(part, PM_NODE_FLAG_STATIC_LITERAL | PM_STRING_FLAGS_FROZEN);
5130
- }
5131
-
5132
- pm_node_list_append(&node->parts, part);
5400
+ pm_interpolated_node_append((pm_node_t *) node, &node->parts, part);
5133
5401
  node->base.location.end = part->location.end;
5134
5402
  }
5135
5403
 
@@ -6397,6 +6665,7 @@ pm_return_node_create(pm_parser_t *parser, const pm_token_t *keyword, pm_argumen
6397
6665
  *node = (pm_return_node_t) {
6398
6666
  {
6399
6667
  .type = PM_RETURN_NODE,
6668
+ .flags = 0,
6400
6669
  .location = {
6401
6670
  .start = keyword->start,
6402
6671
  .end = (arguments == NULL ? keyword->end : arguments->base.location.end)
@@ -6729,7 +6998,8 @@ pm_super_node_create(pm_parser_t *parser, const pm_token_t *keyword, pm_argument
6729
6998
  }
6730
6999
 
6731
7000
  /**
6732
- * Read through the contents of a string and check if it consists solely of US ASCII code points.
7001
+ * Read through the contents of a string and check if it consists solely of
7002
+ * US-ASCII code points.
6733
7003
  */
6734
7004
  static bool
6735
7005
  pm_ascii_only_p(const pm_string_t *contents) {
@@ -6743,27 +7013,72 @@ pm_ascii_only_p(const pm_string_t *contents) {
6743
7013
  return true;
6744
7014
  }
6745
7015
 
7016
+ /**
7017
+ * Validate that the contents of the given symbol are all valid UTF-8.
7018
+ */
7019
+ static void
7020
+ parse_symbol_encoding_validate_utf8(pm_parser_t *parser, const pm_token_t *location, const pm_string_t *contents) {
7021
+ for (const uint8_t *cursor = pm_string_source(contents), *end = cursor + pm_string_length(contents); cursor < end;) {
7022
+ size_t width = pm_encoding_utf_8_char_width(cursor, end - cursor);
7023
+
7024
+ if (width == 0) {
7025
+ pm_parser_err(parser, location->start, location->end, PM_ERR_INVALID_SYMBOL);
7026
+ break;
7027
+ }
7028
+
7029
+ cursor += width;
7030
+ }
7031
+ }
7032
+
7033
+ /**
7034
+ * Validate that the contents of the given symbol are all valid in the encoding
7035
+ * of the parser.
7036
+ */
7037
+ static void
7038
+ parse_symbol_encoding_validate_other(pm_parser_t *parser, const pm_token_t *location, const pm_string_t *contents) {
7039
+ const pm_encoding_t *encoding = parser->encoding;
7040
+
7041
+ for (const uint8_t *cursor = pm_string_source(contents), *end = cursor + pm_string_length(contents); cursor < end;) {
7042
+ size_t width = encoding->char_width(cursor, end - cursor);
7043
+
7044
+ if (width == 0) {
7045
+ pm_parser_err(parser, location->start, location->end, PM_ERR_INVALID_SYMBOL);
7046
+ break;
7047
+ }
7048
+
7049
+ cursor += width;
7050
+ }
7051
+ }
7052
+
6746
7053
  /**
6747
7054
  * Ruby "downgrades" the encoding of Symbols to US-ASCII if the associated
6748
7055
  * encoding is ASCII-compatible and the Symbol consists only of US-ASCII code
6749
7056
  * points. Otherwise, the encoding may be explicitly set with an escape
6750
7057
  * sequence.
7058
+ *
7059
+ * If the validate flag is set, then it will check the contents of the symbol
7060
+ * to ensure that all characters are valid in the encoding.
6751
7061
  */
6752
7062
  static inline pm_node_flags_t
6753
- parse_symbol_encoding(const pm_parser_t *parser, const pm_string_t *contents) {
7063
+ parse_symbol_encoding(pm_parser_t *parser, const pm_token_t *location, const pm_string_t *contents, bool validate) {
6754
7064
  if (parser->explicit_encoding != NULL) {
6755
7065
  // A Symbol may optionally have its encoding explicitly set. This will
6756
7066
  // happen if an escape sequence results in a non-ASCII code point.
6757
7067
  if (parser->explicit_encoding == PM_ENCODING_UTF_8_ENTRY) {
7068
+ if (validate) parse_symbol_encoding_validate_utf8(parser, location, contents);
6758
7069
  return PM_SYMBOL_FLAGS_FORCED_UTF8_ENCODING;
6759
7070
  } else if (parser->encoding == PM_ENCODING_US_ASCII_ENTRY) {
6760
7071
  return PM_SYMBOL_FLAGS_FORCED_BINARY_ENCODING;
7072
+ } else if (validate) {
7073
+ parse_symbol_encoding_validate_other(parser, location, contents);
6761
7074
  }
6762
7075
  } else if (pm_ascii_only_p(contents)) {
6763
7076
  // Ruby stipulates that all source files must use an ASCII-compatible
6764
7077
  // encoding. Thus, all symbols appearing in source are eligible for
6765
7078
  // "downgrading" to US-ASCII.
6766
7079
  return PM_SYMBOL_FLAGS_FORCED_US_ASCII_ENCODING;
7080
+ } else if (validate) {
7081
+ parse_symbol_encoding_validate_other(parser, location, contents);
6767
7082
  }
6768
7083
 
6769
7084
  return 0;
@@ -6931,7 +7246,7 @@ pm_symbol_node_create(pm_parser_t *parser, const pm_token_t *opening, const pm_t
6931
7246
  */
6932
7247
  static pm_symbol_node_t *
6933
7248
  pm_symbol_node_create_current_string(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *value, const pm_token_t *closing) {
6934
- pm_symbol_node_t *node = pm_symbol_node_create_unescaped(parser, opening, value, closing, &parser->current_string, parse_symbol_encoding(parser, &parser->current_string));
7249
+ pm_symbol_node_t *node = pm_symbol_node_create_unescaped(parser, opening, value, closing, &parser->current_string, parse_symbol_encoding(parser, value, &parser->current_string, false));
6935
7250
  parser->current_string = PM_STRING_EMPTY;
6936
7251
  return node;
6937
7252
  }
@@ -6953,7 +7268,7 @@ pm_symbol_node_label_create(pm_parser_t *parser, const pm_token_t *token) {
6953
7268
 
6954
7269
  assert((label.end - label.start) >= 0);
6955
7270
  pm_string_shared_init(&node->unescaped, label.start, label.end);
6956
- pm_node_flag_set((pm_node_t *) node, parse_symbol_encoding(parser, &node->unescaped));
7271
+ pm_node_flag_set((pm_node_t *) node, parse_symbol_encoding(parser, &label, &node->unescaped, false));
6957
7272
 
6958
7273
  break;
6959
7274
  }
@@ -7038,7 +7353,8 @@ pm_string_node_to_symbol_node(pm_parser_t *parser, pm_string_node_t *node, const
7038
7353
  .unescaped = node->unescaped
7039
7354
  };
7040
7355
 
7041
- pm_node_flag_set((pm_node_t *)new_node, parse_symbol_encoding(parser, &node->unescaped));
7356
+ pm_token_t content = { .type = PM_TOKEN_IDENTIFIER, .start = node->content_loc.start, .end = node->content_loc.end };
7357
+ pm_node_flag_set((pm_node_t *) new_node, parse_symbol_encoding(parser, &content, &node->unescaped, true));
7042
7358
 
7043
7359
  // We are explicitly _not_ using pm_node_destroy here because we don't want
7044
7360
  // to trash the unescaped string. We could instead copy the string if we
@@ -7574,7 +7890,7 @@ pm_local_variable_read_node_create_it(pm_parser_t *parser, const pm_token_t *nam
7574
7890
  static pm_node_t *
7575
7891
  pm_node_check_it(pm_parser_t *parser, pm_node_t *node) {
7576
7892
  if (
7577
- (parser->version != PM_OPTIONS_VERSION_CRUBY_3_3_0) &&
7893
+ (parser->version != PM_OPTIONS_VERSION_CRUBY_3_3) &&
7578
7894
  !parser->current_scope->closed &&
7579
7895
  (parser->current_scope->numbered_parameters != PM_SCOPE_NUMBERED_PARAMETERS_DISALLOWED) &&
7580
7896
  pm_node_is_it(parser, node)
@@ -8298,10 +8614,11 @@ context_human(pm_context_t context) {
8298
8614
  /* Specific token lexers */
8299
8615
  /******************************************************************************/
8300
8616
 
8301
- static void
8302
- pm_strspn_number_validate(pm_parser_t *parser, const uint8_t *invalid) {
8617
+ static inline void
8618
+ pm_strspn_number_validate(pm_parser_t *parser, const uint8_t *string, size_t length, const uint8_t *invalid) {
8303
8619
  if (invalid != NULL) {
8304
- pm_parser_err(parser, invalid, invalid + 1, PM_ERR_INVALID_NUMBER_UNDERSCORE);
8620
+ pm_diagnostic_id_t diag_id = (invalid == (string + length - 1)) ? PM_ERR_INVALID_NUMBER_UNDERSCORE_TRAILING : PM_ERR_INVALID_NUMBER_UNDERSCORE_INNER;
8621
+ pm_parser_err(parser, invalid, invalid + 1, diag_id);
8305
8622
  }
8306
8623
  }
8307
8624
 
@@ -8309,7 +8626,7 @@ static size_t
8309
8626
  pm_strspn_binary_number_validate(pm_parser_t *parser, const uint8_t *string) {
8310
8627
  const uint8_t *invalid = NULL;
8311
8628
  size_t length = pm_strspn_binary_number(string, parser->end - string, &invalid);
8312
- pm_strspn_number_validate(parser, invalid);
8629
+ pm_strspn_number_validate(parser, string, length, invalid);
8313
8630
  return length;
8314
8631
  }
8315
8632
 
@@ -8317,7 +8634,7 @@ static size_t
8317
8634
  pm_strspn_octal_number_validate(pm_parser_t *parser, const uint8_t *string) {
8318
8635
  const uint8_t *invalid = NULL;
8319
8636
  size_t length = pm_strspn_octal_number(string, parser->end - string, &invalid);
8320
- pm_strspn_number_validate(parser, invalid);
8637
+ pm_strspn_number_validate(parser, string, length, invalid);
8321
8638
  return length;
8322
8639
  }
8323
8640
 
@@ -8325,7 +8642,7 @@ static size_t
8325
8642
  pm_strspn_decimal_number_validate(pm_parser_t *parser, const uint8_t *string) {
8326
8643
  const uint8_t *invalid = NULL;
8327
8644
  size_t length = pm_strspn_decimal_number(string, parser->end - string, &invalid);
8328
- pm_strspn_number_validate(parser, invalid);
8645
+ pm_strspn_number_validate(parser, string, length, invalid);
8329
8646
  return length;
8330
8647
  }
8331
8648
 
@@ -8333,7 +8650,7 @@ static size_t
8333
8650
  pm_strspn_hexadecimal_number_validate(pm_parser_t *parser, const uint8_t *string) {
8334
8651
  const uint8_t *invalid = NULL;
8335
8652
  size_t length = pm_strspn_hexadecimal_number(string, parser->end - string, &invalid);
8336
- pm_strspn_number_validate(parser, invalid);
8653
+ pm_strspn_number_validate(parser, string, length, invalid);
8337
8654
  return length;
8338
8655
  }
8339
8656
 
@@ -8395,6 +8712,7 @@ lex_numeric_prefix(pm_parser_t *parser, bool* seen_e) {
8395
8712
  if (pm_char_is_decimal_digit(peek(parser))) {
8396
8713
  parser->current.end += pm_strspn_decimal_number_validate(parser, parser->current.end);
8397
8714
  } else {
8715
+ match(parser, '_');
8398
8716
  pm_parser_err_current(parser, PM_ERR_INVALID_NUMBER_DECIMAL);
8399
8717
  }
8400
8718
 
@@ -8407,6 +8725,7 @@ lex_numeric_prefix(pm_parser_t *parser, bool* seen_e) {
8407
8725
  if (pm_char_is_binary_digit(peek(parser))) {
8408
8726
  parser->current.end += pm_strspn_binary_number_validate(parser, parser->current.end);
8409
8727
  } else {
8728
+ match(parser, '_');
8410
8729
  pm_parser_err_current(parser, PM_ERR_INVALID_NUMBER_BINARY);
8411
8730
  }
8412
8731
 
@@ -8420,6 +8739,7 @@ lex_numeric_prefix(pm_parser_t *parser, bool* seen_e) {
8420
8739
  if (pm_char_is_octal_digit(peek(parser))) {
8421
8740
  parser->current.end += pm_strspn_octal_number_validate(parser, parser->current.end);
8422
8741
  } else {
8742
+ match(parser, '_');
8423
8743
  pm_parser_err_current(parser, PM_ERR_INVALID_NUMBER_OCTAL);
8424
8744
  }
8425
8745
 
@@ -8447,6 +8767,7 @@ lex_numeric_prefix(pm_parser_t *parser, bool* seen_e) {
8447
8767
  if (pm_char_is_hexadecimal_digit(peek(parser))) {
8448
8768
  parser->current.end += pm_strspn_hexadecimal_number_validate(parser, parser->current.end);
8449
8769
  } else {
8770
+ match(parser, '_');
8450
8771
  pm_parser_err_current(parser, PM_ERR_INVALID_NUMBER_HEXADECIMAL);
8451
8772
  }
8452
8773
 
@@ -8567,7 +8888,7 @@ lex_global_variable(pm_parser_t *parser) {
8567
8888
  } while (parser->current.end < parser->end && (width = char_is_identifier(parser, parser->current.end)) > 0);
8568
8889
 
8569
8890
  // $0 isn't allowed to be followed by anything.
8570
- pm_diagnostic_id_t diag_id = parser->version == PM_OPTIONS_VERSION_CRUBY_3_3_0 ? PM_ERR_INVALID_VARIABLE_GLOBAL_3_3_0 : PM_ERR_INVALID_VARIABLE_GLOBAL;
8891
+ pm_diagnostic_id_t diag_id = parser->version == PM_OPTIONS_VERSION_CRUBY_3_3 ? PM_ERR_INVALID_VARIABLE_GLOBAL_3_3 : PM_ERR_INVALID_VARIABLE_GLOBAL;
8571
8892
  PM_PARSER_ERR_TOKEN_FORMAT_CONTENT(parser, parser->current, diag_id);
8572
8893
  }
8573
8894
 
@@ -8603,7 +8924,7 @@ lex_global_variable(pm_parser_t *parser) {
8603
8924
  } else {
8604
8925
  // If we get here, then we have a $ followed by something that
8605
8926
  // isn't recognized as a global variable.
8606
- pm_diagnostic_id_t diag_id = parser->version == PM_OPTIONS_VERSION_CRUBY_3_3_0 ? PM_ERR_INVALID_VARIABLE_GLOBAL_3_3_0 : PM_ERR_INVALID_VARIABLE_GLOBAL;
8927
+ pm_diagnostic_id_t diag_id = parser->version == PM_OPTIONS_VERSION_CRUBY_3_3 ? PM_ERR_INVALID_VARIABLE_GLOBAL_3_3 : PM_ERR_INVALID_VARIABLE_GLOBAL;
8607
8928
  size_t width = parser->encoding->char_width(parser->current.end, parser->end - parser->current.end);
8608
8929
  PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, diag_id, (int) ((parser->current.end + width) - parser->current.start), (const char *) parser->current.start);
8609
8930
  }
@@ -9241,22 +9562,7 @@ escape_read(pm_parser_t *parser, pm_buffer_t *buffer, pm_buffer_t *regular_expre
9241
9562
  const uint8_t *start = parser->current.end - 1;
9242
9563
  parser->current.end++;
9243
9564
 
9244
- if (
9245
- (parser->current.end + 4 <= parser->end) &&
9246
- pm_char_is_hexadecimal_digit(parser->current.end[0]) &&
9247
- pm_char_is_hexadecimal_digit(parser->current.end[1]) &&
9248
- pm_char_is_hexadecimal_digit(parser->current.end[2]) &&
9249
- pm_char_is_hexadecimal_digit(parser->current.end[3])
9250
- ) {
9251
- uint32_t value = escape_unicode(parser->current.end, 4);
9252
-
9253
- if (flags & PM_ESCAPE_FLAG_REGEXP) {
9254
- pm_buffer_append_bytes(regular_expression_buffer, start, (size_t) (parser->current.end + 4 - start));
9255
- }
9256
- escape_write_unicode(parser, buffer, flags, start, parser->current.end + 4, value);
9257
-
9258
- parser->current.end += 4;
9259
- } else if (peek(parser) == '{') {
9565
+ if (peek(parser) == '{') {
9260
9566
  const uint8_t *unicode_codepoints_start = parser->current.end - 2;
9261
9567
 
9262
9568
  parser->current.end++;
@@ -9306,7 +9612,21 @@ escape_read(pm_parser_t *parser, pm_buffer_t *buffer, pm_buffer_t *regular_expre
9306
9612
  pm_buffer_append_bytes(regular_expression_buffer, unicode_codepoints_start, (size_t) (parser->current.end - unicode_codepoints_start));
9307
9613
  }
9308
9614
  } else {
9309
- pm_parser_err_current(parser, PM_ERR_ESCAPE_INVALID_UNICODE);
9615
+ size_t length = pm_strspn_hexadecimal_digit(parser->current.end, MIN(parser->end - parser->current.end, 4));
9616
+
9617
+ if (length == 4) {
9618
+ uint32_t value = escape_unicode(parser->current.end, 4);
9619
+
9620
+ if (flags & PM_ESCAPE_FLAG_REGEXP) {
9621
+ pm_buffer_append_bytes(regular_expression_buffer, start, (size_t) (parser->current.end + 4 - start));
9622
+ }
9623
+
9624
+ escape_write_unicode(parser, buffer, flags, start, parser->current.end + 4, value);
9625
+ parser->current.end += 4;
9626
+ } else {
9627
+ parser->current.end += length;
9628
+ pm_parser_err_current(parser, PM_ERR_ESCAPE_INVALID_UNICODE);
9629
+ }
9310
9630
  }
9311
9631
 
9312
9632
  return;
@@ -9560,8 +9880,8 @@ lex_at_variable(pm_parser_t *parser) {
9560
9880
  }
9561
9881
  } else if (parser->current.end < parser->end && pm_char_is_decimal_digit(*parser->current.end)) {
9562
9882
  pm_diagnostic_id_t diag_id = (type == PM_TOKEN_CLASS_VARIABLE) ? PM_ERR_INCOMPLETE_VARIABLE_CLASS : PM_ERR_INCOMPLETE_VARIABLE_INSTANCE;
9563
- if (parser->version == PM_OPTIONS_VERSION_CRUBY_3_3_0) {
9564
- diag_id = (type == PM_TOKEN_CLASS_VARIABLE) ? PM_ERR_INCOMPLETE_VARIABLE_CLASS_3_3_0 : PM_ERR_INCOMPLETE_VARIABLE_INSTANCE_3_3_0;
9883
+ if (parser->version == PM_OPTIONS_VERSION_CRUBY_3_3) {
9884
+ diag_id = (type == PM_TOKEN_CLASS_VARIABLE) ? PM_ERR_INCOMPLETE_VARIABLE_CLASS_3_3 : PM_ERR_INCOMPLETE_VARIABLE_INSTANCE_3_3;
9565
9885
  }
9566
9886
 
9567
9887
  size_t width = parser->encoding->char_width(parser->current.end, parser->end - parser->current.end);
@@ -10545,8 +10865,11 @@ parser_lex(pm_parser_t *parser) {
10545
10865
  }
10546
10866
 
10547
10867
  size_t ident_length = (size_t) (parser->current.end - ident_start);
10868
+ bool ident_error = false;
10869
+
10548
10870
  if (quote != PM_HEREDOC_QUOTE_NONE && !match(parser, (uint8_t) quote)) {
10549
- // TODO: handle unterminated heredoc
10871
+ pm_parser_err(parser, ident_start, ident_start + ident_length, PM_ERR_HEREDOC_IDENTIFIER);
10872
+ ident_error = true;
10550
10873
  }
10551
10874
 
10552
10875
  parser->explicit_encoding = NULL;
@@ -10571,7 +10894,7 @@ parser_lex(pm_parser_t *parser) {
10571
10894
  // this is not a valid heredoc declaration. In this case we
10572
10895
  // will add an error, but we will still return a heredoc
10573
10896
  // start.
10574
- pm_parser_err_current(parser, PM_ERR_HEREDOC_TERM);
10897
+ if (!ident_error) pm_parser_err_heredoc_term(parser, parser->lex_modes.current);
10575
10898
  body_start = parser->end;
10576
10899
  } else {
10577
10900
  // Otherwise, we want to indicate that the body of the
@@ -11898,7 +12221,7 @@ parser_lex(pm_parser_t *parser) {
11898
12221
  // terminator) but still continue parsing so that content after the
11899
12222
  // declaration of the heredoc can be parsed.
11900
12223
  if (parser->current.end >= parser->end) {
11901
- pm_parser_err_current(parser, PM_ERR_HEREDOC_TERM);
12224
+ pm_parser_err_heredoc_term(parser, lex_mode);
11902
12225
  parser->next_start = lex_mode->as.heredoc.next_start;
11903
12226
  parser->heredoc_end = parser->current.end;
11904
12227
  lex_state_set(parser, PM_LEX_STATE_END);
@@ -12537,6 +12860,23 @@ expect3(pm_parser_t *parser, pm_token_type_t type1, pm_token_type_t type2, pm_to
12537
12860
  parser->previous.type = PM_TOKEN_MISSING;
12538
12861
  }
12539
12862
 
12863
+ /**
12864
+ * A special expect1 that expects a heredoc terminator and handles popping the
12865
+ * lex mode accordingly.
12866
+ */
12867
+ static void
12868
+ expect1_heredoc_term(pm_parser_t *parser, pm_lex_mode_t *lex_mode) {
12869
+ if (match1(parser, PM_TOKEN_HEREDOC_END)) {
12870
+ lex_mode_pop(parser);
12871
+ parser_lex(parser);
12872
+ } else {
12873
+ pm_parser_err_heredoc_term(parser, lex_mode);
12874
+ lex_mode_pop(parser);
12875
+ parser->previous.start = parser->previous.end;
12876
+ parser->previous.type = PM_TOKEN_MISSING;
12877
+ }
12878
+ }
12879
+
12540
12880
  static pm_node_t *
12541
12881
  parse_expression(pm_parser_t *parser, pm_binding_power_t binding_power, bool accepts_command_call, pm_diagnostic_id_t diag_id);
12542
12882
 
@@ -12664,25 +13004,72 @@ parse_write_name(pm_parser_t *parser, pm_constant_id_t *name_field) {
12664
13004
  *name_field = pm_constant_pool_insert_owned(&parser->constant_pool, name, length + 1);
12665
13005
  }
12666
13006
 
13007
+ /**
13008
+ * Certain expressions are not targetable, but in order to provide a better
13009
+ * experience we give a specific error message. In order to maintain as much
13010
+ * information in the tree as possible, we replace them with local variable
13011
+ * writes.
13012
+ */
13013
+ static pm_node_t *
13014
+ parse_unwriteable_target(pm_parser_t *parser, pm_node_t *target) {
13015
+ switch (PM_NODE_TYPE(target)) {
13016
+ case PM_SOURCE_ENCODING_NODE: pm_parser_err_node(parser, target, PM_ERR_EXPRESSION_NOT_WRITABLE_ENCODING); break;
13017
+ case PM_FALSE_NODE: pm_parser_err_node(parser, target, PM_ERR_EXPRESSION_NOT_WRITABLE_FALSE); break;
13018
+ case PM_SOURCE_FILE_NODE: pm_parser_err_node(parser, target, PM_ERR_EXPRESSION_NOT_WRITABLE_FILE); break;
13019
+ case PM_SOURCE_LINE_NODE: pm_parser_err_node(parser, target, PM_ERR_EXPRESSION_NOT_WRITABLE_LINE); break;
13020
+ case PM_NIL_NODE: pm_parser_err_node(parser, target, PM_ERR_EXPRESSION_NOT_WRITABLE_NIL); break;
13021
+ case PM_SELF_NODE: pm_parser_err_node(parser, target, PM_ERR_EXPRESSION_NOT_WRITABLE_SELF); break;
13022
+ case PM_TRUE_NODE: pm_parser_err_node(parser, target, PM_ERR_EXPRESSION_NOT_WRITABLE_TRUE); break;
13023
+ default: break;
13024
+ }
13025
+
13026
+ pm_constant_id_t name = pm_parser_constant_id_location(parser, target->location.start, target->location.end);
13027
+ pm_local_variable_target_node_t *result = pm_local_variable_target_node_create(parser, &target->location, name, 0);
13028
+
13029
+ pm_node_destroy(parser, target);
13030
+ return (pm_node_t *) result;
13031
+ }
13032
+
12667
13033
  /**
12668
13034
  * Convert the given node into a valid target node.
12669
13035
  */
12670
13036
  static pm_node_t *
12671
- parse_target(pm_parser_t *parser, pm_node_t *target) {
13037
+ parse_target(pm_parser_t *parser, pm_node_t *target, bool multiple) {
12672
13038
  switch (PM_NODE_TYPE(target)) {
12673
13039
  case PM_MISSING_NODE:
12674
13040
  return target;
13041
+ case PM_SOURCE_ENCODING_NODE:
13042
+ case PM_FALSE_NODE:
13043
+ case PM_SOURCE_FILE_NODE:
13044
+ case PM_SOURCE_LINE_NODE:
13045
+ case PM_NIL_NODE:
13046
+ case PM_SELF_NODE:
13047
+ case PM_TRUE_NODE: {
13048
+ // In these special cases, we have specific error messages and we
13049
+ // will replace them with local variable writes.
13050
+ return parse_unwriteable_target(parser, target);
13051
+ }
12675
13052
  case PM_CLASS_VARIABLE_READ_NODE:
12676
13053
  assert(sizeof(pm_class_variable_target_node_t) == sizeof(pm_class_variable_read_node_t));
12677
13054
  target->type = PM_CLASS_VARIABLE_TARGET_NODE;
12678
13055
  return target;
12679
13056
  case PM_CONSTANT_PATH_NODE:
13057
+ if (context_def_p(parser)) {
13058
+ pm_parser_err_node(parser, target, PM_ERR_WRITE_TARGET_IN_METHOD);
13059
+ }
13060
+
12680
13061
  assert(sizeof(pm_constant_path_target_node_t) == sizeof(pm_constant_path_node_t));
12681
13062
  target->type = PM_CONSTANT_PATH_TARGET_NODE;
13063
+
12682
13064
  return target;
12683
13065
  case PM_CONSTANT_READ_NODE:
13066
+ if (context_def_p(parser)) {
13067
+ pm_parser_err_node(parser, target, PM_ERR_WRITE_TARGET_IN_METHOD);
13068
+ }
13069
+
12684
13070
  assert(sizeof(pm_constant_target_node_t) == sizeof(pm_constant_read_node_t));
12685
13071
  target->type = PM_CONSTANT_TARGET_NODE;
13072
+
12686
13073
  return target;
12687
13074
  case PM_BACK_REFERENCE_READ_NODE:
12688
13075
  case PM_NUMBERED_REFERENCE_READ_NODE:
@@ -12715,7 +13102,7 @@ parse_target(pm_parser_t *parser, pm_node_t *target) {
12715
13102
  pm_splat_node_t *splat = (pm_splat_node_t *) target;
12716
13103
 
12717
13104
  if (splat->expression != NULL) {
12718
- splat->expression = parse_target(parser, splat->expression);
13105
+ splat->expression = parse_target(parser, splat->expression, multiple);
12719
13106
  }
12720
13107
 
12721
13108
  return (pm_node_t *) splat;
@@ -12753,6 +13140,10 @@ parse_target(pm_parser_t *parser, pm_node_t *target) {
12753
13140
  }
12754
13141
 
12755
13142
  if (*call->message_loc.start == '_' || parser->encoding->alnum_char(call->message_loc.start, call->message_loc.end - call->message_loc.start)) {
13143
+ if (multiple && PM_NODE_FLAG_P(call, PM_CALL_NODE_FLAGS_SAFE_NAVIGATION)) {
13144
+ pm_parser_err_node(parser, (const pm_node_t *) call, PM_ERR_UNEXPECTED_SAFE_NAVIGATION);
13145
+ }
13146
+
12756
13147
  parse_write_name(parser, &call->name);
12757
13148
  return (pm_node_t *) pm_call_target_node_create(parser, call);
12758
13149
  }
@@ -12780,8 +13171,8 @@ parse_target(pm_parser_t *parser, pm_node_t *target) {
12780
13171
  * assignment.
12781
13172
  */
12782
13173
  static pm_node_t *
12783
- parse_target_validate(pm_parser_t *parser, pm_node_t *target) {
12784
- pm_node_t *result = parse_target(parser, target);
13174
+ parse_target_validate(pm_parser_t *parser, pm_node_t *target, bool multiple) {
13175
+ pm_node_t *result = parse_target(parser, target, multiple);
12785
13176
 
12786
13177
  // Ensure that we have one of an =, an 'in' in for indexes, and a ')' in parens after the targets.
12787
13178
  if (
@@ -12826,13 +13217,20 @@ parse_write(pm_parser_t *parser, pm_node_t *target, pm_token_t *operator, pm_nod
12826
13217
  }
12827
13218
  case PM_CONSTANT_PATH_NODE: {
12828
13219
  pm_node_t *node = (pm_node_t *) pm_constant_path_write_node_create(parser, (pm_constant_path_node_t *) target, operator, value);
13220
+
13221
+ if (context_def_p(parser)) {
13222
+ pm_parser_err_node(parser, node, PM_ERR_WRITE_TARGET_IN_METHOD);
13223
+ }
13224
+
12829
13225
  return parse_shareable_constant_write(parser, node);
12830
13226
  }
12831
13227
  case PM_CONSTANT_READ_NODE: {
12832
13228
  pm_node_t *node = (pm_node_t *) pm_constant_write_node_create(parser, (pm_constant_read_node_t *) target, operator, value);
13229
+
12833
13230
  if (context_def_p(parser)) {
12834
13231
  pm_parser_err_node(parser, node, PM_ERR_WRITE_TARGET_IN_METHOD);
12835
13232
  }
13233
+
12836
13234
  pm_node_destroy(parser, target);
12837
13235
  return parse_shareable_constant_write(parser, node);
12838
13236
  }
@@ -13011,7 +13409,7 @@ parse_targets(pm_parser_t *parser, pm_node_t *first_target, pm_binding_power_t b
13011
13409
  bool has_rest = PM_NODE_TYPE_P(first_target, PM_SPLAT_NODE);
13012
13410
 
13013
13411
  pm_multi_target_node_t *result = pm_multi_target_node_create(parser);
13014
- pm_multi_target_node_targets_append(parser, result, parse_target(parser, first_target));
13412
+ pm_multi_target_node_targets_append(parser, result, parse_target(parser, first_target, true));
13015
13413
 
13016
13414
  while (accept1(parser, PM_TOKEN_COMMA)) {
13017
13415
  if (accept1(parser, PM_TOKEN_USTAR)) {
@@ -13027,7 +13425,7 @@ parse_targets(pm_parser_t *parser, pm_node_t *first_target, pm_binding_power_t b
13027
13425
 
13028
13426
  if (token_begins_expression_p(parser->current.type)) {
13029
13427
  name = parse_expression(parser, binding_power, false, PM_ERR_EXPECT_EXPRESSION_AFTER_STAR);
13030
- name = parse_target(parser, name);
13428
+ name = parse_target(parser, name, true);
13031
13429
  }
13032
13430
 
13033
13431
  pm_node_t *splat = (pm_node_t *) pm_splat_node_create(parser, &star_operator, name);
@@ -13035,7 +13433,7 @@ parse_targets(pm_parser_t *parser, pm_node_t *first_target, pm_binding_power_t b
13035
13433
  has_rest = true;
13036
13434
  } else if (token_begins_expression_p(parser->current.type)) {
13037
13435
  pm_node_t *target = parse_expression(parser, binding_power, false, PM_ERR_EXPECT_EXPRESSION_AFTER_COMMA);
13038
- target = parse_target(parser, target);
13436
+ target = parse_target(parser, target, true);
13039
13437
 
13040
13438
  pm_multi_target_node_targets_append(parser, result, target);
13041
13439
  } else if (!match1(parser, PM_TOKEN_EOF)) {
@@ -13152,11 +13550,11 @@ parse_statements(pm_parser_t *parser, pm_context_t context) {
13152
13550
  */
13153
13551
  static void
13154
13552
  pm_hash_key_static_literals_add(pm_parser_t *parser, pm_static_literals_t *literals, pm_node_t *node) {
13155
- const pm_node_t *duplicated = pm_static_literals_add(parser, literals, node);
13553
+ const pm_node_t *duplicated = pm_static_literals_add(&parser->newline_list, parser->start_line, literals, node);
13156
13554
 
13157
13555
  if (duplicated != NULL) {
13158
13556
  pm_buffer_t buffer = { 0 };
13159
- pm_static_literal_inspect(&buffer, parser, duplicated);
13557
+ pm_static_literal_inspect(&buffer, &parser->newline_list, parser->start_line, parser->encoding->name, duplicated);
13160
13558
 
13161
13559
  pm_diagnostic_list_append_format(
13162
13560
  &parser->warning_list,
@@ -13178,7 +13576,7 @@ pm_hash_key_static_literals_add(pm_parser_t *parser, pm_static_literals_t *liter
13178
13576
  */
13179
13577
  static void
13180
13578
  pm_when_clause_static_literals_add(pm_parser_t *parser, pm_static_literals_t *literals, pm_node_t *node) {
13181
- if (pm_static_literals_add(parser, literals, node) != NULL) {
13579
+ if (pm_static_literals_add(&parser->newline_list, parser->start_line, literals, node) != NULL) {
13182
13580
  pm_diagnostic_list_append_format(
13183
13581
  &parser->warning_list,
13184
13582
  node->location.start,
@@ -13206,10 +13604,16 @@ parse_assocs(pm_parser_t *parser, pm_static_literals_t *literals, pm_node_t *nod
13206
13604
  pm_token_t operator = parser->previous;
13207
13605
  pm_node_t *value = NULL;
13208
13606
 
13209
- if (token_begins_expression_p(parser->current.type)) {
13607
+ if (match1(parser, PM_TOKEN_BRACE_LEFT)) {
13608
+ // If we're about to parse a nested hash that is being
13609
+ // pushed into this hash directly with **, then we want the
13610
+ // inner hash to share the static literals with the outer
13611
+ // hash.
13612
+ parser->current_hash_keys = literals;
13210
13613
  value = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, false, PM_ERR_EXPECT_EXPRESSION_AFTER_SPLAT_HASH);
13211
- }
13212
- else {
13614
+ } else if (token_begins_expression_p(parser->current.type)) {
13615
+ value = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, false, PM_ERR_EXPECT_EXPRESSION_AFTER_SPLAT_HASH);
13616
+ } else {
13213
13617
  pm_parser_scope_forwarding_keywords_check(parser, &operator);
13214
13618
  }
13215
13619
 
@@ -13234,9 +13638,15 @@ parse_assocs(pm_parser_t *parser, pm_static_literals_t *literals, pm_node_t *nod
13234
13638
  pm_token_t constant = { .type = PM_TOKEN_CONSTANT, .start = label.start, .end = label.end - 1 };
13235
13639
  value = (pm_node_t *) pm_constant_read_node_create(parser, &constant);
13236
13640
  } else {
13237
- int depth = pm_parser_local_depth(parser, &((pm_token_t) { .type = PM_TOKEN_IDENTIFIER, .start = label.start, .end = label.end - 1 }));
13641
+ int depth = -1;
13238
13642
  pm_token_t identifier = { .type = PM_TOKEN_IDENTIFIER, .start = label.start, .end = label.end - 1 };
13239
13643
 
13644
+ if (identifier.end[-1] == '!' || identifier.end[-1] == '?') {
13645
+ PM_PARSER_ERR_TOKEN_FORMAT_CONTENT(parser, identifier, PM_ERR_INVALID_LOCAL_VARIABLE_READ);
13646
+ } else {
13647
+ depth = pm_parser_local_depth(parser, &identifier);
13648
+ }
13649
+
13240
13650
  if (depth == -1) {
13241
13651
  value = (pm_node_t *) pm_call_node_variable_call_create(parser, &identifier);
13242
13652
  } else {
@@ -13354,15 +13764,16 @@ parse_arguments(pm_parser_t *parser, pm_arguments_t *arguments, bool accepts_for
13354
13764
  pm_keyword_hash_node_t *hash = pm_keyword_hash_node_create(parser);
13355
13765
  argument = (pm_node_t *) hash;
13356
13766
 
13357
- pm_static_literals_t literals = { 0 };
13358
- bool contains_keyword_splat = parse_assocs(parser, &literals, (pm_node_t *) hash);
13767
+ pm_static_literals_t hash_keys = { 0 };
13768
+ bool contains_keyword_splat = parse_assocs(parser, &hash_keys, (pm_node_t *) hash);
13359
13769
 
13360
13770
  parse_arguments_append(parser, arguments, argument);
13361
- if (contains_keyword_splat) {
13362
- pm_node_flag_set((pm_node_t *)arguments->arguments, PM_ARGUMENTS_NODE_FLAGS_CONTAINS_KEYWORD_SPLAT);
13363
- }
13364
13771
 
13365
- pm_static_literals_free(&literals);
13772
+ pm_node_flags_t flags = PM_ARGUMENTS_NODE_FLAGS_CONTAINS_KEYWORDS;
13773
+ if (contains_keyword_splat) flags |= PM_ARGUMENTS_NODE_FLAGS_CONTAINS_KEYWORD_SPLAT;
13774
+ pm_node_flag_set((pm_node_t *) arguments->arguments, flags);
13775
+
13776
+ pm_static_literals_free(&hash_keys);
13366
13777
  parsed_bare_hash = true;
13367
13778
 
13368
13779
  break;
@@ -13438,7 +13849,9 @@ parse_arguments(pm_parser_t *parser, pm_arguments_t *arguments, bool accepts_for
13438
13849
  argument = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, !parsed_first_argument, PM_ERR_EXPECT_ARGUMENT);
13439
13850
  }
13440
13851
 
13852
+ bool contains_keywords = false;
13441
13853
  bool contains_keyword_splat = false;
13854
+
13442
13855
  if (pm_symbol_node_label_p(argument) || accept1(parser, PM_TOKEN_EQUAL_GREATER)) {
13443
13856
  if (parsed_bare_hash) {
13444
13857
  pm_parser_err_previous(parser, PM_ERR_ARGUMENT_BARE_HASH);
@@ -13452,10 +13865,11 @@ parse_arguments(pm_parser_t *parser, pm_arguments_t *arguments, bool accepts_for
13452
13865
  }
13453
13866
 
13454
13867
  pm_keyword_hash_node_t *bare_hash = pm_keyword_hash_node_create(parser);
13868
+ contains_keywords = true;
13455
13869
 
13456
13870
  // Create the set of static literals for this hash.
13457
- pm_static_literals_t literals = { 0 };
13458
- pm_hash_key_static_literals_add(parser, &literals, argument);
13871
+ pm_static_literals_t hash_keys = { 0 };
13872
+ pm_hash_key_static_literals_add(parser, &hash_keys, argument);
13459
13873
 
13460
13874
  // Finish parsing the one we are part way through.
13461
13875
  pm_node_t *value = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, false, PM_ERR_HASH_VALUE);
@@ -13469,10 +13883,10 @@ parse_arguments(pm_parser_t *parser, pm_arguments_t *arguments, bool accepts_for
13469
13883
  token_begins_expression_p(parser->current.type) ||
13470
13884
  match2(parser, PM_TOKEN_USTAR_STAR, PM_TOKEN_LABEL)
13471
13885
  )) {
13472
- contains_keyword_splat = parse_assocs(parser, &literals, (pm_node_t *) bare_hash);
13886
+ contains_keyword_splat = parse_assocs(parser, &hash_keys, (pm_node_t *) bare_hash);
13473
13887
  }
13474
13888
 
13475
- pm_static_literals_free(&literals);
13889
+ pm_static_literals_free(&hash_keys);
13476
13890
  parsed_bare_hash = true;
13477
13891
  } else if (accept1(parser, PM_TOKEN_KEYWORD_IN)) {
13478
13892
  // TODO: Could we solve this with binding powers instead?
@@ -13480,9 +13894,12 @@ parse_arguments(pm_parser_t *parser, pm_arguments_t *arguments, bool accepts_for
13480
13894
  }
13481
13895
 
13482
13896
  parse_arguments_append(parser, arguments, argument);
13483
- if (contains_keyword_splat) {
13484
- pm_node_flag_set((pm_node_t *)arguments->arguments, PM_ARGUMENTS_NODE_FLAGS_CONTAINS_KEYWORD_SPLAT);
13485
- }
13897
+
13898
+ pm_node_flags_t flags = 0;
13899
+ if (contains_keywords) flags |= PM_ARGUMENTS_NODE_FLAGS_CONTAINS_KEYWORDS;
13900
+ if (contains_keyword_splat) flags |= PM_ARGUMENTS_NODE_FLAGS_CONTAINS_KEYWORD_SPLAT;
13901
+ pm_node_flag_set((pm_node_t *) arguments->arguments, flags);
13902
+
13486
13903
  break;
13487
13904
  }
13488
13905
  }
@@ -13595,7 +14012,6 @@ typedef enum {
13595
14012
  PM_PARAMETERS_ORDER_OPTIONAL,
13596
14013
  PM_PARAMETERS_ORDER_NAMED,
13597
14014
  PM_PARAMETERS_ORDER_NONE,
13598
-
13599
14015
  } pm_parameters_order_t;
13600
14016
 
13601
14017
  /**
@@ -13903,6 +14319,7 @@ parse_parameters(
13903
14319
  pm_token_t operator = parser->previous;
13904
14320
  pm_token_t name;
13905
14321
  bool repeated = false;
14322
+
13906
14323
  if (accept1(parser, PM_TOKEN_IDENTIFIER)) {
13907
14324
  name = parser->previous;
13908
14325
  repeated = pm_parser_parameter_name_check(parser, &name);
@@ -13916,6 +14333,7 @@ parse_parameters(
13916
14333
  if (repeated) {
13917
14334
  pm_node_flag_set_repeated_parameter(param);
13918
14335
  }
14336
+
13919
14337
  if (params->rest == NULL) {
13920
14338
  pm_parameters_node_rest_set(params, param);
13921
14339
  } else {
@@ -13927,6 +14345,7 @@ parse_parameters(
13927
14345
  }
13928
14346
  case PM_TOKEN_STAR_STAR:
13929
14347
  case PM_TOKEN_USTAR_STAR: {
14348
+ pm_parameters_order_t previous_order = order;
13930
14349
  update_parameter_state(parser, &parser->current, &order);
13931
14350
  parser_lex(parser);
13932
14351
 
@@ -13934,6 +14353,10 @@ parse_parameters(
13934
14353
  pm_node_t *param;
13935
14354
 
13936
14355
  if (accept1(parser, PM_TOKEN_KEYWORD_NIL)) {
14356
+ if (previous_order <= PM_PARAMETERS_ORDER_KEYWORDS) {
14357
+ pm_parser_err_previous(parser, PM_ERR_PARAMETER_UNEXPECTED_NO_KW);
14358
+ }
14359
+
13937
14360
  param = (pm_node_t *) pm_no_keywords_parameter_node_create(parser, &operator, &parser->previous);
13938
14361
  } else {
13939
14362
  pm_token_t name;
@@ -14031,7 +14454,7 @@ parse_rescues(pm_parser_t *parser, pm_begin_node_t *parent_node, pm_rescues_type
14031
14454
  pm_rescue_node_operator_set(rescue, &parser->previous);
14032
14455
 
14033
14456
  pm_node_t *reference = parse_expression(parser, PM_BINDING_POWER_INDEX, false, PM_ERR_RESCUE_VARIABLE);
14034
- reference = parse_target(parser, reference);
14457
+ reference = parse_target(parser, reference, false);
14035
14458
 
14036
14459
  pm_rescue_node_reference_set(rescue, reference);
14037
14460
  break;
@@ -14061,7 +14484,7 @@ parse_rescues(pm_parser_t *parser, pm_begin_node_t *parent_node, pm_rescues_type
14061
14484
  pm_rescue_node_operator_set(rescue, &parser->previous);
14062
14485
 
14063
14486
  pm_node_t *reference = parse_expression(parser, PM_BINDING_POWER_INDEX, false, PM_ERR_RESCUE_VARIABLE);
14064
- reference = parse_target(parser, reference);
14487
+ reference = parse_target(parser, reference, false);
14065
14488
 
14066
14489
  pm_rescue_node_reference_set(rescue, reference);
14067
14490
  break;
@@ -15030,7 +15453,7 @@ parse_symbol(pm_parser_t *parser, pm_lex_mode_t *lex_mode, pm_lex_state_t next_s
15030
15453
  pm_symbol_node_t *symbol = pm_symbol_node_create(parser, &opening, &parser->previous, &closing);
15031
15454
 
15032
15455
  pm_string_shared_init(&symbol->unescaped, parser->previous.start, parser->previous.end);
15033
- pm_node_flag_set((pm_node_t *) symbol, parse_symbol_encoding(parser, &symbol->unescaped));
15456
+ pm_node_flag_set((pm_node_t *) symbol, parse_symbol_encoding(parser, &parser->previous, &symbol->unescaped, false));
15034
15457
 
15035
15458
  return (pm_node_t *) symbol;
15036
15459
  }
@@ -15130,7 +15553,7 @@ parse_symbol(pm_parser_t *parser, pm_lex_mode_t *lex_mode, pm_lex_state_t next_s
15130
15553
  expect1(parser, PM_TOKEN_STRING_END, PM_ERR_SYMBOL_TERM_DYNAMIC);
15131
15554
  }
15132
15555
 
15133
- return (pm_node_t *) pm_symbol_node_create_unescaped(parser, &opening, &content, &parser->previous, &unescaped, parse_symbol_encoding(parser, &unescaped));
15556
+ return (pm_node_t *) pm_symbol_node_create_unescaped(parser, &opening, &content, &parser->previous, &unescaped, parse_symbol_encoding(parser, &content, &unescaped, false));
15134
15557
  }
15135
15558
 
15136
15559
  /**
@@ -15155,7 +15578,7 @@ parse_undef_argument(pm_parser_t *parser) {
15155
15578
  pm_symbol_node_t *symbol = pm_symbol_node_create(parser, &opening, &parser->previous, &closing);
15156
15579
 
15157
15580
  pm_string_shared_init(&symbol->unescaped, parser->previous.start, parser->previous.end);
15158
- pm_node_flag_set((pm_node_t *) symbol, parse_symbol_encoding(parser, &symbol->unescaped));
15581
+ pm_node_flag_set((pm_node_t *) symbol, parse_symbol_encoding(parser, &parser->previous, &symbol->unescaped, false));
15159
15582
 
15160
15583
  return (pm_node_t *) symbol;
15161
15584
  }
@@ -15196,7 +15619,7 @@ parse_alias_argument(pm_parser_t *parser, bool first) {
15196
15619
  pm_symbol_node_t *symbol = pm_symbol_node_create(parser, &opening, &parser->previous, &closing);
15197
15620
 
15198
15621
  pm_string_shared_init(&symbol->unescaped, parser->previous.start, parser->previous.end);
15199
- pm_node_flag_set((pm_node_t *) symbol, parse_symbol_encoding(parser, &symbol->unescaped));
15622
+ pm_node_flag_set((pm_node_t *) symbol, parse_symbol_encoding(parser, &parser->previous, &symbol->unescaped, false));
15200
15623
 
15201
15624
  return (pm_node_t *) symbol;
15202
15625
  }
@@ -15453,9 +15876,7 @@ parse_pattern_constant_path(pm_parser_t *parser, pm_constant_id_list_t *captures
15453
15876
  while (accept1(parser, PM_TOKEN_COLON_COLON)) {
15454
15877
  pm_token_t delimiter = parser->previous;
15455
15878
  expect1(parser, PM_TOKEN_CONSTANT, PM_ERR_CONSTANT_PATH_COLON_COLON_CONSTANT);
15456
-
15457
- pm_node_t *child = (pm_node_t *) pm_constant_read_node_create(parser, &parser->previous);
15458
- node = (pm_node_t *) pm_constant_path_node_create(parser, node, &delimiter, child);
15879
+ node = (pm_node_t *) pm_constant_path_node_create(parser, node, &delimiter, &parser->previous);
15459
15880
  }
15460
15881
 
15461
15882
  // If there is a [ or ( that follows, then this is part of a larger pattern
@@ -15643,8 +16064,15 @@ parse_pattern_hash_implicit_value(pm_parser_t *parser, pm_constant_id_list_t *ca
15643
16064
  const pm_location_t *value_loc = &((pm_symbol_node_t *) key)->value_loc;
15644
16065
  pm_constant_id_t constant_id = pm_parser_constant_id_location(parser, value_loc->start, value_loc->end);
15645
16066
 
15646
- int depth;
15647
- if ((depth = pm_parser_local_depth_constant_id(parser, constant_id)) == -1) {
16067
+ int depth = -1;
16068
+ if (value_loc->end[-1] == '!' || value_loc->end[-1] == '?') {
16069
+ pm_parser_err(parser, key->base.location.start, key->base.location.end, PM_ERR_PATTERN_HASH_KEY_LOCALS);
16070
+ PM_PARSER_ERR_LOCATION_FORMAT(parser, value_loc, PM_ERR_INVALID_LOCAL_VARIABLE_WRITE, (int) (value_loc->end - value_loc->start), (const char *) value_loc->start);
16071
+ } else {
16072
+ depth = pm_parser_local_depth_constant_id(parser, constant_id);
16073
+ }
16074
+
16075
+ if (depth == -1) {
15648
16076
  pm_parser_local_add(parser, constant_id, value_loc->start, value_loc->end, 0);
15649
16077
  }
15650
16078
 
@@ -15665,7 +16093,7 @@ parse_pattern_hash_implicit_value(pm_parser_t *parser, pm_constant_id_list_t *ca
15665
16093
  */
15666
16094
  static void
15667
16095
  parse_pattern_hash_key(pm_parser_t *parser, pm_static_literals_t *keys, pm_node_t *node) {
15668
- if (pm_static_literals_add(parser, keys, node) != NULL) {
16096
+ if (pm_static_literals_add(&parser->newline_list, parser->start_line, keys, node) != NULL) {
15669
16097
  pm_parser_err_node(parser, node, PM_ERR_PATTERN_HASH_KEY_DUPLICATE);
15670
16098
  }
15671
16099
  }
@@ -15953,7 +16381,7 @@ parse_pattern_primitive(pm_parser_t *parser, pm_constant_id_list_t *captures, pm
15953
16381
 
15954
16382
  if (variable == NULL) {
15955
16383
  if (
15956
- (parser->version != PM_OPTIONS_VERSION_CRUBY_3_3_0) &&
16384
+ (parser->version != PM_OPTIONS_VERSION_CRUBY_3_3) &&
15957
16385
  !parser->current_scope->closed &&
15958
16386
  (parser->current_scope->numbered_parameters != PM_SCOPE_NUMBERED_PARAMETERS_DISALLOWED) &&
15959
16387
  pm_token_is_it(parser->previous.start, parser->previous.end)
@@ -16027,8 +16455,7 @@ parse_pattern_primitive(pm_parser_t *parser, pm_constant_id_list_t *captures, pm
16027
16455
  parser_lex(parser);
16028
16456
 
16029
16457
  expect1(parser, PM_TOKEN_CONSTANT, PM_ERR_CONSTANT_PATH_COLON_COLON_CONSTANT);
16030
- pm_node_t *child = (pm_node_t *) pm_constant_read_node_create(parser, &parser->previous);
16031
- pm_constant_path_node_t *node = pm_constant_path_node_create(parser, NULL, &delimiter, child);
16458
+ pm_constant_path_node_t *node = pm_constant_path_node_create(parser, NULL, &delimiter, &parser->previous);
16032
16459
 
16033
16460
  return parse_pattern_constant_path(parser, captures, (pm_node_t *) node);
16034
16461
  }
@@ -16354,7 +16781,7 @@ parse_strings(pm_parser_t *parser, pm_node_t *current) {
16354
16781
 
16355
16782
  pm_node_list_free(&parts);
16356
16783
  } else if (accept1(parser, PM_TOKEN_LABEL_END) && !state_is_arg_labeled) {
16357
- node = (pm_node_t *) pm_symbol_node_create_unescaped(parser, &opening, &content, &parser->previous, &unescaped, parse_symbol_encoding(parser, &unescaped));
16784
+ node = (pm_node_t *) pm_symbol_node_create_unescaped(parser, &opening, &content, &parser->previous, &unescaped, parse_symbol_encoding(parser, &content, &unescaped, true));
16358
16785
  } else if (match1(parser, PM_TOKEN_EOF)) {
16359
16786
  pm_parser_err_token(parser, &opening, PM_ERR_STRING_LITERAL_EOF);
16360
16787
  node = (pm_node_t *) pm_string_node_create_unescaped(parser, &opening, &content, &parser->current, &unescaped);
@@ -16380,7 +16807,7 @@ parse_strings(pm_parser_t *parser, pm_node_t *current) {
16380
16807
  pm_node_flag_set(node, parse_unescaped_encoding(parser));
16381
16808
  expect1(parser, PM_TOKEN_STRING_END, PM_ERR_STRING_LITERAL_EOF);
16382
16809
  } else if (accept1(parser, PM_TOKEN_LABEL_END)) {
16383
- node = (pm_node_t *) pm_symbol_node_create_unescaped(parser, &opening, &content, &parser->previous, &unescaped, parse_symbol_encoding(parser, &unescaped));
16810
+ node = (pm_node_t *) pm_symbol_node_create_unescaped(parser, &opening, &content, &parser->previous, &unescaped, parse_symbol_encoding(parser, &content, &unescaped, true));
16384
16811
  } else {
16385
16812
  // If we get here, then we have interpolation so we'll need
16386
16813
  // to create a string or symbol node with interpolation.
@@ -16462,11 +16889,11 @@ parse_strings(pm_parser_t *parser, pm_node_t *current) {
16462
16889
  pm_token_t bounds = not_provided(parser);
16463
16890
 
16464
16891
  pm_interpolated_string_node_t *container = pm_interpolated_string_node_create(parser, &bounds, NULL, &bounds);
16465
- pm_interpolated_string_node_append(parser, container, current);
16892
+ pm_interpolated_string_node_append(container, current);
16466
16893
  current = (pm_node_t *) container;
16467
16894
  }
16468
16895
 
16469
- pm_interpolated_string_node_append(parser, (pm_interpolated_string_node_t *) current, node);
16896
+ pm_interpolated_string_node_append((pm_interpolated_string_node_t *) current, node);
16470
16897
  }
16471
16898
  }
16472
16899
 
@@ -16711,13 +17138,13 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
16711
17138
  }
16712
17139
 
16713
17140
  element = (pm_node_t *) pm_keyword_hash_node_create(parser);
16714
- pm_static_literals_t literals = { 0 };
17141
+ pm_static_literals_t hash_keys = { 0 };
16715
17142
 
16716
17143
  if (!match8(parser, PM_TOKEN_EOF, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON, PM_TOKEN_EOF, PM_TOKEN_BRACE_RIGHT, PM_TOKEN_BRACKET_RIGHT, PM_TOKEN_KEYWORD_DO, PM_TOKEN_PARENTHESIS_RIGHT)) {
16717
- parse_assocs(parser, &literals, element);
17144
+ parse_assocs(parser, &hash_keys, element);
16718
17145
  }
16719
17146
 
16720
- pm_static_literals_free(&literals);
17147
+ pm_static_literals_free(&hash_keys);
16721
17148
  parsed_bare_hash = true;
16722
17149
  } else {
16723
17150
  element = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, false, PM_ERR_ARRAY_EXPRESSION);
@@ -16728,8 +17155,8 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
16728
17155
  }
16729
17156
 
16730
17157
  pm_keyword_hash_node_t *hash = pm_keyword_hash_node_create(parser);
16731
- pm_static_literals_t literals = { 0 };
16732
- pm_hash_key_static_literals_add(parser, &literals, element);
17158
+ pm_static_literals_t hash_keys = { 0 };
17159
+ pm_hash_key_static_literals_add(parser, &hash_keys, element);
16733
17160
 
16734
17161
  pm_token_t operator;
16735
17162
  if (parser->previous.type == PM_TOKEN_EQUAL_GREATER) {
@@ -16744,10 +17171,10 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
16744
17171
 
16745
17172
  element = (pm_node_t *) hash;
16746
17173
  if (accept1(parser, PM_TOKEN_COMMA) && !match1(parser, PM_TOKEN_BRACKET_RIGHT)) {
16747
- parse_assocs(parser, &literals, element);
17174
+ parse_assocs(parser, &hash_keys, element);
16748
17175
  }
16749
17176
 
16750
- pm_static_literals_free(&literals);
17177
+ pm_static_literals_free(&hash_keys);
16751
17178
  parsed_bare_hash = true;
16752
17179
  }
16753
17180
  }
@@ -16841,7 +17268,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
16841
17268
  return (pm_node_t *) multi_target;
16842
17269
  }
16843
17270
 
16844
- return parse_target_validate(parser, (pm_node_t *) multi_target);
17271
+ return parse_target_validate(parser, (pm_node_t *) multi_target, false);
16845
17272
  }
16846
17273
 
16847
17274
  // If we have a single statement and are ending on a right parenthesis
@@ -16907,14 +17334,30 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
16907
17334
  return (pm_node_t *) pm_parentheses_node_create(parser, &opening, (pm_node_t *) statements, &parser->previous);
16908
17335
  }
16909
17336
  case PM_TOKEN_BRACE_LEFT: {
17337
+ // If we were passed a current_hash_keys via the parser, then that
17338
+ // means we're already parsing a hash and we want to share the set
17339
+ // of hash keys with this inner hash we're about to parse for the
17340
+ // sake of warnings. We'll set it to NULL after we grab it to make
17341
+ // sure subsequent expressions don't use it. Effectively this is a
17342
+ // way of getting around passing it to every call to
17343
+ // parse_expression.
17344
+ pm_static_literals_t *current_hash_keys = parser->current_hash_keys;
17345
+ parser->current_hash_keys = NULL;
17346
+
16910
17347
  pm_accepts_block_stack_push(parser, true);
16911
17348
  parser_lex(parser);
16912
17349
 
16913
17350
  pm_hash_node_t *node = pm_hash_node_create(parser, &parser->previous);
16914
- pm_static_literals_t literals = { 0 };
16915
17351
 
16916
17352
  if (!match2(parser, PM_TOKEN_BRACE_RIGHT, PM_TOKEN_EOF)) {
16917
- parse_assocs(parser, &literals, (pm_node_t *) node);
17353
+ if (current_hash_keys != NULL) {
17354
+ parse_assocs(parser, current_hash_keys, (pm_node_t *) node);
17355
+ } else {
17356
+ pm_static_literals_t hash_keys = { 0 };
17357
+ parse_assocs(parser, &hash_keys, (pm_node_t *) node);
17358
+ pm_static_literals_free(&hash_keys);
17359
+ }
17360
+
16918
17361
  accept1(parser, PM_TOKEN_NEWLINE);
16919
17362
  }
16920
17363
 
@@ -16922,7 +17365,6 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
16922
17365
  expect1(parser, PM_TOKEN_BRACE_RIGHT, PM_ERR_HASH_TERM);
16923
17366
  pm_hash_node_closing_loc_set(node, &parser->previous);
16924
17367
 
16925
- pm_static_literals_free(&literals);
16926
17368
  return (pm_node_t *) node;
16927
17369
  }
16928
17370
  case PM_TOKEN_CHARACTER_LITERAL: {
@@ -16987,12 +17429,10 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
16987
17429
  }
16988
17430
  case PM_TOKEN_UCOLON_COLON: {
16989
17431
  parser_lex(parser);
16990
-
16991
17432
  pm_token_t delimiter = parser->previous;
16992
- expect1(parser, PM_TOKEN_CONSTANT, PM_ERR_CONSTANT_PATH_COLON_COLON_CONSTANT);
16993
17433
 
16994
- pm_node_t *constant = (pm_node_t *) pm_constant_read_node_create(parser, &parser->previous);
16995
- pm_node_t *node = (pm_node_t *)pm_constant_path_node_create(parser, NULL, &delimiter, constant);
17434
+ expect1(parser, PM_TOKEN_CONSTANT, PM_ERR_CONSTANT_PATH_COLON_COLON_CONSTANT);
17435
+ pm_node_t *node = (pm_node_t *) pm_constant_path_node_create(parser, NULL, &delimiter, &parser->previous);
16996
17436
 
16997
17437
  if ((binding_power == PM_BINDING_POWER_STATEMENT) && match1(parser, PM_TOKEN_COMMA)) {
16998
17438
  node = parse_targets_validate(parser, node, PM_BINDING_POWER_INDEX);
@@ -17152,8 +17592,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
17152
17592
  if (match2(parser, PM_TOKEN_HEREDOC_END, PM_TOKEN_EOF)) {
17153
17593
  // If we get here, then we have an empty heredoc. We'll create
17154
17594
  // an empty content token and return an empty string node.
17155
- lex_mode_pop(parser);
17156
- expect1(parser, PM_TOKEN_HEREDOC_END, PM_ERR_HEREDOC_TERM);
17595
+ expect1_heredoc_term(parser, lex_mode);
17157
17596
  pm_token_t content = parse_strings_empty_content(parser->previous.start);
17158
17597
 
17159
17598
  if (quote == PM_HEREDOC_QUOTE_BACKTICK) {
@@ -17194,8 +17633,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
17194
17633
  }
17195
17634
 
17196
17635
  node = (pm_node_t *) cast;
17197
- lex_mode_pop(parser);
17198
- expect1(parser, PM_TOKEN_HEREDOC_END, PM_ERR_HEREDOC_TERM);
17636
+ expect1_heredoc_term(parser, lex_mode);
17199
17637
  } else {
17200
17638
  // If we get here, then we have multiple parts in the heredoc,
17201
17639
  // so we'll need to create an interpolated string node to hold
@@ -17217,20 +17655,18 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
17217
17655
  pm_interpolated_x_string_node_t *cast = pm_interpolated_xstring_node_create(parser, &opening, &opening);
17218
17656
  cast->parts = parts;
17219
17657
 
17220
- lex_mode_pop(parser);
17221
- expect1(parser, PM_TOKEN_HEREDOC_END, PM_ERR_HEREDOC_TERM);
17222
-
17658
+ expect1_heredoc_term(parser, lex_mode);
17223
17659
  pm_interpolated_xstring_node_closing_set(cast, &parser->previous);
17660
+
17224
17661
  cast->base.location = cast->opening_loc;
17225
17662
  node = (pm_node_t *) cast;
17226
17663
  } else {
17227
17664
  pm_interpolated_string_node_t *cast = pm_interpolated_string_node_create(parser, &opening, &parts, &opening);
17228
17665
  pm_node_list_free(&parts);
17229
17666
 
17230
- lex_mode_pop(parser);
17231
- expect1(parser, PM_TOKEN_HEREDOC_END, PM_ERR_HEREDOC_TERM);
17232
-
17667
+ expect1_heredoc_term(parser, lex_mode);
17233
17668
  pm_interpolated_string_node_closing_set(cast, &parser->previous);
17669
+
17234
17670
  cast->base.location = cast->opening_loc;
17235
17671
  node = (pm_node_t *) cast;
17236
17672
  }
@@ -18132,7 +18568,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
18132
18568
  if (match1(parser, PM_TOKEN_COMMA)) {
18133
18569
  index = parse_targets(parser, index, PM_BINDING_POWER_INDEX);
18134
18570
  } else {
18135
- index = parse_target(parser, index);
18571
+ index = parse_target(parser, index, false);
18136
18572
  }
18137
18573
 
18138
18574
  context_pop(parser);
@@ -18254,9 +18690,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
18254
18690
  pm_token_t double_colon = parser->previous;
18255
18691
 
18256
18692
  expect1(parser, PM_TOKEN_CONSTANT, PM_ERR_CONSTANT_PATH_COLON_COLON_CONSTANT);
18257
- pm_node_t *constant = (pm_node_t *) pm_constant_read_node_create(parser, &parser->previous);
18258
-
18259
- constant_path = (pm_node_t *) pm_constant_path_node_create(parser, constant_path, &double_colon, constant);
18693
+ constant_path = (pm_node_t *) pm_constant_path_node_create(parser, constant_path, &double_colon, &parser->previous);
18260
18694
  }
18261
18695
 
18262
18696
  // Here we retrieve the name of the module. If it wasn't a constant,
@@ -18636,15 +19070,15 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
18636
19070
  // If we hit string content and the current node is
18637
19071
  // an interpolated string, then we need to append
18638
19072
  // the string content to the list of child nodes.
18639
- pm_interpolated_string_node_append(parser, (pm_interpolated_string_node_t *) current, string);
19073
+ pm_interpolated_string_node_append((pm_interpolated_string_node_t *) current, string);
18640
19074
  } else if (PM_NODE_TYPE_P(current, PM_STRING_NODE)) {
18641
19075
  // If we hit string content and the current node is
18642
19076
  // a string node, then we need to convert the
18643
19077
  // current node into an interpolated string and add
18644
19078
  // the string content to the list of child nodes.
18645
19079
  pm_interpolated_string_node_t *interpolated = pm_interpolated_string_node_create(parser, &opening, NULL, &closing);
18646
- pm_interpolated_string_node_append(parser, interpolated, current);
18647
- pm_interpolated_string_node_append(parser, interpolated, string);
19080
+ pm_interpolated_string_node_append(interpolated, current);
19081
+ pm_interpolated_string_node_append(interpolated, string);
18648
19082
  current = (pm_node_t *) interpolated;
18649
19083
  } else {
18650
19084
  assert(false && "unreachable");
@@ -18669,7 +19103,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
18669
19103
  pm_token_t opening = not_provided(parser);
18670
19104
  pm_token_t closing = not_provided(parser);
18671
19105
  pm_interpolated_string_node_t *interpolated = pm_interpolated_string_node_create(parser, &opening, NULL, &closing);
18672
- pm_interpolated_string_node_append(parser, interpolated, current);
19106
+ pm_interpolated_string_node_append(interpolated, current);
18673
19107
  current = (pm_node_t *) interpolated;
18674
19108
  } else {
18675
19109
  // If we hit an embedded variable and the current
@@ -18678,7 +19112,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
18678
19112
  }
18679
19113
 
18680
19114
  pm_node_t *part = parse_string_part(parser);
18681
- pm_interpolated_string_node_append(parser, (pm_interpolated_string_node_t *) current, part);
19115
+ pm_interpolated_string_node_append((pm_interpolated_string_node_t *) current, part);
18682
19116
  break;
18683
19117
  }
18684
19118
  case PM_TOKEN_EMBEXPR_BEGIN: {
@@ -18698,7 +19132,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
18698
19132
  pm_token_t opening = not_provided(parser);
18699
19133
  pm_token_t closing = not_provided(parser);
18700
19134
  pm_interpolated_string_node_t *interpolated = pm_interpolated_string_node_create(parser, &opening, NULL, &closing);
18701
- pm_interpolated_string_node_append(parser, interpolated, current);
19135
+ pm_interpolated_string_node_append(interpolated, current);
18702
19136
  current = (pm_node_t *) interpolated;
18703
19137
  } else if (PM_NODE_TYPE_P(current, PM_INTERPOLATED_STRING_NODE)) {
18704
19138
  // If we hit an embedded expression and the current
@@ -18709,7 +19143,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
18709
19143
  }
18710
19144
 
18711
19145
  pm_node_t *part = parse_string_part(parser);
18712
- pm_interpolated_string_node_append(parser, (pm_interpolated_string_node_t *) current, part);
19146
+ pm_interpolated_string_node_append((pm_interpolated_string_node_t *) current, part);
18713
19147
  break;
18714
19148
  }
18715
19149
  default:
@@ -18913,7 +19347,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
18913
19347
  if (match1(parser, PM_TOKEN_COMMA)) {
18914
19348
  return parse_targets_validate(parser, splat, PM_BINDING_POWER_INDEX);
18915
19349
  } else {
18916
- return parse_target_validate(parser, splat);
19350
+ return parse_target_validate(parser, splat, true);
18917
19351
  }
18918
19352
  }
18919
19353
  case PM_TOKEN_BANG: {
@@ -20046,8 +20480,7 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
20046
20480
  path = (pm_node_t *) pm_call_node_call_create(parser, node, &delimiter, &message, &arguments);
20047
20481
  } else {
20048
20482
  // Otherwise, this is a constant path. That would look like Foo::Bar.
20049
- pm_node_t *child = (pm_node_t *) pm_constant_read_node_create(parser, &parser->previous);
20050
- path = (pm_node_t *)pm_constant_path_node_create(parser, node, &delimiter, child);
20483
+ path = (pm_node_t *) pm_constant_path_node_create(parser, node, &delimiter, &parser->previous);
20051
20484
  }
20052
20485
 
20053
20486
  // If this is followed by a comma then it is a multiple assignment.
@@ -20086,9 +20519,8 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
20086
20519
  return (pm_node_t *) pm_call_node_shorthand_create(parser, node, &delimiter, &arguments);
20087
20520
  }
20088
20521
  default: {
20089
- pm_parser_err_token(parser, &delimiter, PM_ERR_CONSTANT_PATH_COLON_COLON_CONSTANT);
20090
- pm_node_t *child = (pm_node_t *) pm_missing_node_create(parser, delimiter.start, delimiter.end);
20091
- return (pm_node_t *)pm_constant_path_node_create(parser, node, &delimiter, child);
20522
+ expect1(parser, PM_TOKEN_CONSTANT, PM_ERR_CONSTANT_PATH_COLON_COLON_CONSTANT);
20523
+ return (pm_node_t *) pm_constant_path_node_create(parser, node, &delimiter, &parser->previous);
20092
20524
  }
20093
20525
  }
20094
20526
  }
@@ -21233,25 +21665,28 @@ pm_parser_errors_format(const pm_parser_t *parser, const pm_list_t *error_list,
21233
21665
  pm_buffer_append_string(buffer, error_format.blank_prefix, error_format.blank_prefix_length);
21234
21666
 
21235
21667
  size_t column = 0;
21236
- while (column < error->column_end) {
21237
- if (column < error->column_start) {
21238
- pm_buffer_append_byte(buffer, ' ');
21239
- } else {
21240
- const uint8_t caret = column == error->column_start ? '^' : '~';
21668
+ while (column < error->column_start) {
21669
+ pm_buffer_append_byte(buffer, ' ');
21241
21670
 
21242
- if (colorize) {
21243
- pm_buffer_append_string(buffer, PM_COLOR_RED, 7);
21244
- pm_buffer_append_byte(buffer, caret);
21245
- pm_buffer_append_string(buffer, PM_COLOR_RESET, 3);
21246
- } else {
21247
- pm_buffer_append_byte(buffer, caret);
21248
- }
21249
- }
21671
+ size_t char_width = encoding->char_width(start + column, parser->end - (start + column));
21672
+ column += (char_width == 0 ? 1 : char_width);
21673
+ }
21674
+
21675
+ if (colorize) pm_buffer_append_string(buffer, PM_COLOR_RED, 7);
21676
+ pm_buffer_append_byte(buffer, '^');
21677
+
21678
+ size_t char_width = encoding->char_width(start + column, parser->end - (start + column));
21679
+ column += (char_width == 0 ? 1 : char_width);
21680
+
21681
+ while (column < error->column_end) {
21682
+ pm_buffer_append_byte(buffer, '~');
21250
21683
 
21251
21684
  size_t char_width = encoding->char_width(start + column, parser->end - (start + column));
21252
21685
  column += (char_width == 0 ? 1 : char_width);
21253
21686
  }
21254
21687
 
21688
+ if (colorize) pm_buffer_append_string(buffer, PM_COLOR_RESET, 3);
21689
+
21255
21690
  if (inline_messages) {
21256
21691
  pm_buffer_append_byte(buffer, ' ');
21257
21692
  assert(error->error != NULL);