prism 0.27.0 → 0.29.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (65) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +45 -1
  3. data/config.yml +68 -44
  4. data/docs/configuration.md +1 -0
  5. data/ext/prism/api_node.c +854 -847
  6. data/ext/prism/extconf.rb +27 -23
  7. data/ext/prism/extension.c +5 -3
  8. data/ext/prism/extension.h +1 -1
  9. data/include/prism/ast.h +70 -48
  10. data/include/prism/diagnostic.h +23 -6
  11. data/include/prism/options.h +2 -2
  12. data/include/prism/parser.h +10 -0
  13. data/include/prism/static_literals.h +8 -6
  14. data/include/prism/version.h +2 -2
  15. data/lib/prism/desugar_compiler.rb +4 -4
  16. data/lib/prism/dot_visitor.rb +54 -38
  17. data/lib/prism/dsl.rb +24 -24
  18. data/lib/prism/ffi.rb +4 -4
  19. data/lib/prism/inspect_visitor.rb +2156 -0
  20. data/lib/prism/lex_compat.rb +1 -1
  21. data/lib/prism/mutation_compiler.rb +2 -2
  22. data/lib/prism/node.rb +737 -1863
  23. data/lib/prism/node_ext.rb +176 -5
  24. data/lib/prism/parse_result/comments.rb +1 -1
  25. data/lib/prism/parse_result/newlines.rb +1 -1
  26. data/lib/prism/parse_result.rb +78 -0
  27. data/lib/prism/pattern.rb +12 -6
  28. data/lib/prism/polyfill/byteindex.rb +13 -0
  29. data/lib/prism/polyfill/unpack1.rb +14 -0
  30. data/lib/prism/reflection.rb +20 -20
  31. data/lib/prism/serialize.rb +32 -15
  32. data/lib/prism/translation/parser/compiler.rb +156 -26
  33. data/lib/prism/translation/parser.rb +7 -7
  34. data/lib/prism/translation/ripper.rb +29 -25
  35. data/lib/prism/translation/ruby_parser.rb +13 -13
  36. data/lib/prism.rb +2 -1
  37. data/prism.gemspec +37 -38
  38. data/rbi/prism/compiler.rbi +3 -5
  39. data/rbi/prism/inspect_visitor.rbi +12 -0
  40. data/rbi/prism/node.rbi +405 -370
  41. data/rbi/prism/node_ext.rbi +5 -0
  42. data/rbi/prism/parse_result.rbi +23 -0
  43. data/rbi/prism/translation/ripper.rbi +1 -11
  44. data/sig/prism/dsl.rbs +12 -12
  45. data/sig/prism/inspect_visitor.rbs +22 -0
  46. data/sig/prism/lex_compat.rbs +10 -0
  47. data/sig/prism/node.rbs +108 -91
  48. data/sig/prism/node_ext.rbs +4 -0
  49. data/sig/prism/parse_result.rbs +12 -0
  50. data/src/diagnostic.c +66 -33
  51. data/src/node.c +89 -64
  52. data/src/options.c +2 -2
  53. data/src/prettyprint.c +109 -66
  54. data/src/prism.c +862 -317
  55. data/src/serialize.c +21 -18
  56. data/src/static_literals.c +120 -34
  57. data/src/token_type.c +6 -6
  58. metadata +8 -9
  59. data/lib/prism/node_inspector.rb +0 -68
  60. data/lib/prism/polyfill/string.rb +0 -12
  61. data/rbi/prism/desugar_compiler.rbi +0 -5
  62. data/rbi/prism/mutation_compiler.rbi +0 -5
  63. data/rbi/prism/translation/parser/compiler.rbi +0 -13
  64. data/rbi/prism/translation/ripper/ripper_compiler.rbi +0 -5
  65. data/rbi/prism/translation/ruby_parser.rbi +0 -11
data/src/prism.c CHANGED
@@ -672,6 +672,26 @@ pm_parser_warn_node(pm_parser_t *parser, const pm_node_t *node, pm_diagnostic_id
672
672
  #define PM_PARSER_WARN_NODE_FORMAT(parser, node, diag_id, ...) \
673
673
  PM_PARSER_WARN_FORMAT(parser, (node)->location.start, (node)->location.end, diag_id, __VA_ARGS__)
674
674
 
675
+ /**
676
+ * Add an error for an expected heredoc terminator. This is a special function
677
+ * only because it grabs its location off of a lex mode instead of a node or a
678
+ * token.
679
+ */
680
+ static void
681
+ pm_parser_err_heredoc_term(pm_parser_t *parser, pm_lex_mode_t *lex_mode) {
682
+ const uint8_t *ident_start = lex_mode->as.heredoc.ident_start;
683
+ size_t ident_length = lex_mode->as.heredoc.ident_length;
684
+
685
+ PM_PARSER_ERR_FORMAT(
686
+ parser,
687
+ ident_start,
688
+ ident_start + ident_length,
689
+ PM_ERR_HEREDOC_TERM,
690
+ (int) ident_length,
691
+ (const char *) ident_start
692
+ );
693
+ }
694
+
675
695
  /******************************************************************************/
676
696
  /* Scope-related functions */
677
697
  /******************************************************************************/
@@ -729,42 +749,97 @@ pm_parser_scope_find(pm_parser_t *parser, uint32_t depth) {
729
749
  return scope;
730
750
  }
731
751
 
732
- static void
733
- pm_parser_scope_forwarding_param_check(pm_parser_t *parser, const pm_token_t * token, const uint8_t mask, pm_diagnostic_id_t diag) {
752
+ typedef enum {
753
+ PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_PASS,
754
+ PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_CONFLICT,
755
+ PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_FAIL
756
+ } pm_scope_forwarding_param_check_result_t;
757
+
758
+ static pm_scope_forwarding_param_check_result_t
759
+ pm_parser_scope_forwarding_param_check(pm_parser_t *parser, const uint8_t mask) {
734
760
  pm_scope_t *scope = parser->current_scope;
735
- while (scope) {
761
+ bool conflict = false;
762
+
763
+ while (scope != NULL) {
736
764
  if (scope->parameters & mask) {
737
- if (!scope->closed) {
738
- pm_parser_err_token(parser, token, diag);
739
- return;
765
+ if (scope->closed) {
766
+ if (conflict) {
767
+ return PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_CONFLICT;
768
+ } else {
769
+ return PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_PASS;
770
+ }
740
771
  }
741
- return;
772
+
773
+ conflict = true;
742
774
  }
775
+
743
776
  if (scope->closed) break;
744
777
  scope = scope->previous;
745
778
  }
746
779
 
747
- pm_parser_err_token(parser, token, diag);
780
+ return PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_FAIL;
748
781
  }
749
782
 
750
- static inline void
783
+ static void
751
784
  pm_parser_scope_forwarding_block_check(pm_parser_t *parser, const pm_token_t * token) {
752
- pm_parser_scope_forwarding_param_check(parser, token, PM_SCOPE_PARAMETERS_FORWARDING_BLOCK, PM_ERR_ARGUMENT_NO_FORWARDING_AMP);
785
+ switch (pm_parser_scope_forwarding_param_check(parser, PM_SCOPE_PARAMETERS_FORWARDING_BLOCK)) {
786
+ case PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_PASS:
787
+ // Pass.
788
+ break;
789
+ case PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_CONFLICT:
790
+ pm_parser_err_token(parser, token, PM_ERR_ARGUMENT_CONFLICT_AMPERSAND);
791
+ break;
792
+ case PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_FAIL:
793
+ pm_parser_err_token(parser, token, PM_ERR_ARGUMENT_NO_FORWARDING_AMPERSAND);
794
+ break;
795
+ }
753
796
  }
754
797
 
755
- static inline void
798
+ static void
756
799
  pm_parser_scope_forwarding_positionals_check(pm_parser_t *parser, const pm_token_t * token) {
757
- pm_parser_scope_forwarding_param_check(parser, token, PM_SCOPE_PARAMETERS_FORWARDING_POSITIONALS, PM_ERR_ARGUMENT_NO_FORWARDING_STAR);
800
+ switch (pm_parser_scope_forwarding_param_check(parser, PM_SCOPE_PARAMETERS_FORWARDING_POSITIONALS)) {
801
+ case PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_PASS:
802
+ // Pass.
803
+ break;
804
+ case PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_CONFLICT:
805
+ pm_parser_err_token(parser, token, PM_ERR_ARGUMENT_CONFLICT_STAR);
806
+ break;
807
+ case PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_FAIL:
808
+ pm_parser_err_token(parser, token, PM_ERR_ARGUMENT_NO_FORWARDING_STAR);
809
+ break;
810
+ }
758
811
  }
759
812
 
760
- static inline void
761
- pm_parser_scope_forwarding_all_check(pm_parser_t *parser, const pm_token_t * token) {
762
- pm_parser_scope_forwarding_param_check(parser, token, PM_SCOPE_PARAMETERS_FORWARDING_ALL, PM_ERR_ARGUMENT_NO_FORWARDING_ELLIPSES);
813
+ static void
814
+ pm_parser_scope_forwarding_all_check(pm_parser_t *parser, const pm_token_t *token) {
815
+ switch (pm_parser_scope_forwarding_param_check(parser, PM_SCOPE_PARAMETERS_FORWARDING_ALL)) {
816
+ case PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_PASS:
817
+ // Pass.
818
+ break;
819
+ case PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_CONFLICT:
820
+ // This shouldn't happen, because ... is not allowed in the
821
+ // declaration of blocks. If we get here, we assume we already have
822
+ // an error for this.
823
+ break;
824
+ case PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_FAIL:
825
+ pm_parser_err_token(parser, token, PM_ERR_ARGUMENT_NO_FORWARDING_ELLIPSES);
826
+ break;
827
+ }
763
828
  }
764
829
 
765
- static inline void
830
+ static void
766
831
  pm_parser_scope_forwarding_keywords_check(pm_parser_t *parser, const pm_token_t * token) {
767
- pm_parser_scope_forwarding_param_check(parser, token, PM_SCOPE_PARAMETERS_FORWARDING_KEYWORDS, PM_ERR_ARGUMENT_NO_FORWARDING_STAR_STAR);
832
+ switch (pm_parser_scope_forwarding_param_check(parser, PM_SCOPE_PARAMETERS_FORWARDING_KEYWORDS)) {
833
+ case PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_PASS:
834
+ // Pass.
835
+ break;
836
+ case PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_CONFLICT:
837
+ pm_parser_err_token(parser, token, PM_ERR_ARGUMENT_CONFLICT_STAR_STAR);
838
+ break;
839
+ case PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_FAIL:
840
+ pm_parser_err_token(parser, token, PM_ERR_ARGUMENT_NO_FORWARDING_STAR_STAR);
841
+ break;
842
+ }
768
843
  }
769
844
 
770
845
  /**
@@ -1405,7 +1480,7 @@ pm_conditional_predicate_warn_write_literal_p(const pm_node_t *node) {
1405
1480
  static inline void
1406
1481
  pm_conditional_predicate_warn_write_literal(pm_parser_t *parser, const pm_node_t *node) {
1407
1482
  if (pm_conditional_predicate_warn_write_literal_p(node)) {
1408
- pm_parser_warn_node(parser, node, parser->version == PM_OPTIONS_VERSION_CRUBY_3_3_0 ? PM_WARN_EQUAL_IN_CONDITIONAL_3_3_0 : PM_WARN_EQUAL_IN_CONDITIONAL);
1483
+ pm_parser_warn_node(parser, node, parser->version == PM_OPTIONS_VERSION_CRUBY_3_3 ? PM_WARN_EQUAL_IN_CONDITIONAL_3_3 : PM_WARN_EQUAL_IN_CONDITIONAL);
1409
1484
  }
1410
1485
  }
1411
1486
 
@@ -1683,7 +1758,7 @@ char_is_identifier_utf8(const uint8_t *b, const uint8_t *end) {
1683
1758
  * it's important that it be as fast as possible.
1684
1759
  */
1685
1760
  static inline size_t
1686
- char_is_identifier(pm_parser_t *parser, const uint8_t *b) {
1761
+ char_is_identifier(const pm_parser_t *parser, const uint8_t *b) {
1687
1762
  if (parser->encoding_changed) {
1688
1763
  size_t width;
1689
1764
  if ((width = parser->encoding->alnum_char(b, parser->end - b)) != 0) {
@@ -2923,6 +2998,29 @@ pm_call_and_write_node_create(pm_parser_t *parser, pm_call_node_t *target, const
2923
2998
  return node;
2924
2999
  }
2925
3000
 
3001
+ /**
3002
+ * Validate that index expressions do not have keywords or blocks if we are
3003
+ * parsing as Ruby 3.4+.
3004
+ */
3005
+ static void
3006
+ pm_index_arguments_check(pm_parser_t *parser, const pm_arguments_node_t *arguments, const pm_node_t *block) {
3007
+ if (parser->version != PM_OPTIONS_VERSION_CRUBY_3_3) {
3008
+ if (arguments != NULL && PM_NODE_FLAG_P(arguments, PM_ARGUMENTS_NODE_FLAGS_CONTAINS_KEYWORDS)) {
3009
+ pm_node_t *node;
3010
+ PM_NODE_LIST_FOREACH(&arguments->arguments, index, node) {
3011
+ if (PM_NODE_TYPE_P(node, PM_KEYWORD_HASH_NODE)) {
3012
+ pm_parser_err_node(parser, node, PM_ERR_UNEXPECTED_INDEX_KEYWORDS);
3013
+ break;
3014
+ }
3015
+ }
3016
+ }
3017
+
3018
+ if (block != NULL) {
3019
+ pm_parser_err_node(parser, block, PM_ERR_UNEXPECTED_INDEX_BLOCK);
3020
+ }
3021
+ }
3022
+ }
3023
+
2926
3024
  /**
2927
3025
  * Allocate and initialize a new IndexAndWriteNode node.
2928
3026
  */
@@ -2931,6 +3029,8 @@ pm_index_and_write_node_create(pm_parser_t *parser, pm_call_node_t *target, cons
2931
3029
  assert(operator->type == PM_TOKEN_AMPERSAND_AMPERSAND_EQUAL);
2932
3030
  pm_index_and_write_node_t *node = PM_ALLOC_NODE(parser, pm_index_and_write_node_t);
2933
3031
 
3032
+ pm_index_arguments_check(parser, target->arguments, target->block);
3033
+
2934
3034
  *node = (pm_index_and_write_node_t) {
2935
3035
  {
2936
3036
  .type = PM_INDEX_AND_WRITE_NODE,
@@ -2980,8 +3080,8 @@ pm_call_operator_write_node_create(pm_parser_t *parser, pm_call_node_t *target,
2980
3080
  .message_loc = target->message_loc,
2981
3081
  .read_name = 0,
2982
3082
  .write_name = target->name,
2983
- .operator = pm_parser_constant_id_location(parser, operator->start, operator->end - 1),
2984
- .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
3083
+ .binary_operator = pm_parser_constant_id_location(parser, operator->start, operator->end - 1),
3084
+ .binary_operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
2985
3085
  .value = value
2986
3086
  };
2987
3087
 
@@ -3002,6 +3102,8 @@ static pm_index_operator_write_node_t *
3002
3102
  pm_index_operator_write_node_create(pm_parser_t *parser, pm_call_node_t *target, const pm_token_t *operator, pm_node_t *value) {
3003
3103
  pm_index_operator_write_node_t *node = PM_ALLOC_NODE(parser, pm_index_operator_write_node_t);
3004
3104
 
3105
+ pm_index_arguments_check(parser, target->arguments, target->block);
3106
+
3005
3107
  *node = (pm_index_operator_write_node_t) {
3006
3108
  {
3007
3109
  .type = PM_INDEX_OPERATOR_WRITE_NODE,
@@ -3017,8 +3119,8 @@ pm_index_operator_write_node_create(pm_parser_t *parser, pm_call_node_t *target,
3017
3119
  .arguments = target->arguments,
3018
3120
  .closing_loc = target->closing_loc,
3019
3121
  .block = target->block,
3020
- .operator = pm_parser_constant_id_location(parser, operator->start, operator->end - 1),
3021
- .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
3122
+ .binary_operator = pm_parser_constant_id_location(parser, operator->start, operator->end - 1),
3123
+ .binary_operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
3022
3124
  .value = value
3023
3125
  };
3024
3126
 
@@ -3075,6 +3177,8 @@ pm_index_or_write_node_create(pm_parser_t *parser, pm_call_node_t *target, const
3075
3177
  assert(operator->type == PM_TOKEN_PIPE_PIPE_EQUAL);
3076
3178
  pm_index_or_write_node_t *node = PM_ALLOC_NODE(parser, pm_index_or_write_node_t);
3077
3179
 
3180
+ pm_index_arguments_check(parser, target->arguments, target->block);
3181
+
3078
3182
  *node = (pm_index_or_write_node_t) {
3079
3183
  {
3080
3184
  .type = PM_INDEX_OR_WRITE_NODE,
@@ -3139,6 +3243,8 @@ pm_index_target_node_create(pm_parser_t *parser, pm_call_node_t *target) {
3139
3243
  pm_index_target_node_t *node = PM_ALLOC_NODE(parser, pm_index_target_node_t);
3140
3244
  pm_node_flags_t flags = target->base.flags;
3141
3245
 
3246
+ pm_index_arguments_check(parser, target->arguments, target->block);
3247
+
3142
3248
  *node = (pm_index_target_node_t) {
3143
3249
  {
3144
3250
  .type = PM_INDEX_TARGET_NODE,
@@ -3358,9 +3464,9 @@ pm_class_variable_operator_write_node_create(pm_parser_t *parser, pm_class_varia
3358
3464
  },
3359
3465
  .name = target->name,
3360
3466
  .name_loc = target->base.location,
3361
- .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
3467
+ .binary_operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
3362
3468
  .value = value,
3363
- .operator = pm_parser_constant_id_location(parser, operator->start, operator->end - 1)
3469
+ .binary_operator = pm_parser_constant_id_location(parser, operator->start, operator->end - 1)
3364
3470
  };
3365
3471
 
3366
3472
  return node;
@@ -3474,9 +3580,9 @@ pm_constant_path_operator_write_node_create(pm_parser_t *parser, pm_constant_pat
3474
3580
  }
3475
3581
  },
3476
3582
  .target = target,
3477
- .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
3583
+ .binary_operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
3478
3584
  .value = value,
3479
- .operator = pm_parser_constant_id_location(parser, operator->start, operator->end - 1)
3585
+ .binary_operator = pm_parser_constant_id_location(parser, operator->start, operator->end - 1)
3480
3586
  };
3481
3587
 
3482
3588
  return node;
@@ -3510,22 +3616,27 @@ pm_constant_path_or_write_node_create(pm_parser_t *parser, pm_constant_path_node
3510
3616
  * Allocate and initialize a new ConstantPathNode node.
3511
3617
  */
3512
3618
  static pm_constant_path_node_t *
3513
- pm_constant_path_node_create(pm_parser_t *parser, pm_node_t *parent, const pm_token_t *delimiter, pm_node_t *child) {
3619
+ pm_constant_path_node_create(pm_parser_t *parser, pm_node_t *parent, const pm_token_t *delimiter, const pm_token_t *name_token) {
3514
3620
  pm_assert_value_expression(parser, parent);
3515
-
3516
3621
  pm_constant_path_node_t *node = PM_ALLOC_NODE(parser, pm_constant_path_node_t);
3517
3622
 
3623
+ pm_constant_id_t name = PM_CONSTANT_ID_UNSET;
3624
+ if (name_token->type == PM_TOKEN_CONSTANT) {
3625
+ name = pm_parser_constant_id_token(parser, name_token);
3626
+ }
3627
+
3518
3628
  *node = (pm_constant_path_node_t) {
3519
3629
  {
3520
3630
  .type = PM_CONSTANT_PATH_NODE,
3521
3631
  .location = {
3522
3632
  .start = parent == NULL ? delimiter->start : parent->location.start,
3523
- .end = child->location.end
3633
+ .end = name_token->end
3524
3634
  },
3525
3635
  },
3526
3636
  .parent = parent,
3527
- .child = child,
3528
- .delimiter_loc = PM_LOCATION_TOKEN_VALUE(delimiter)
3637
+ .name = name,
3638
+ .delimiter_loc = PM_LOCATION_TOKEN_VALUE(delimiter),
3639
+ .name_loc = PM_LOCATION_TOKEN_VALUE(name_token)
3529
3640
  };
3530
3641
 
3531
3642
  return node;
@@ -3596,9 +3707,9 @@ pm_constant_operator_write_node_create(pm_parser_t *parser, pm_constant_read_nod
3596
3707
  },
3597
3708
  .name = target->name,
3598
3709
  .name_loc = target->base.location,
3599
- .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
3710
+ .binary_operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
3600
3711
  .value = value,
3601
- .operator = pm_parser_constant_id_location(parser, operator->start, operator->end - 1)
3712
+ .binary_operator = pm_parser_constant_id_location(parser, operator->start, operator->end - 1)
3602
3713
  };
3603
3714
 
3604
3715
  return node;
@@ -3716,6 +3827,113 @@ pm_def_node_receiver_check(pm_parser_t *parser, const pm_node_t *node) {
3716
3827
  }
3717
3828
  }
3718
3829
 
3830
+ /**
3831
+ * When a method body is created, we want to check if the last statement is a
3832
+ * return or a statement that houses a return. If it is, then we want to mark
3833
+ * that return as being redundant so that we can compile it differently but also
3834
+ * so that we can indicate that to the user.
3835
+ */
3836
+ static void
3837
+ pm_def_node_body_redundant_return(pm_node_t *node) {
3838
+ switch (PM_NODE_TYPE(node)) {
3839
+ case PM_RETURN_NODE:
3840
+ node->flags |= PM_RETURN_NODE_FLAGS_REDUNDANT;
3841
+ break;
3842
+ case PM_BEGIN_NODE: {
3843
+ pm_begin_node_t *cast = (pm_begin_node_t *) node;
3844
+
3845
+ if (cast->statements != NULL && cast->else_clause == NULL) {
3846
+ pm_def_node_body_redundant_return((pm_node_t *) cast->statements);
3847
+ }
3848
+ break;
3849
+ }
3850
+ case PM_STATEMENTS_NODE: {
3851
+ pm_statements_node_t *cast = (pm_statements_node_t *) node;
3852
+
3853
+ if (cast->body.size > 0) {
3854
+ pm_def_node_body_redundant_return(cast->body.nodes[cast->body.size - 1]);
3855
+ }
3856
+ break;
3857
+ }
3858
+ case PM_IF_NODE: {
3859
+ pm_if_node_t *cast = (pm_if_node_t *) node;
3860
+
3861
+ if (cast->statements != NULL) {
3862
+ pm_def_node_body_redundant_return((pm_node_t *) cast->statements);
3863
+ }
3864
+
3865
+ if (cast->consequent != NULL) {
3866
+ pm_def_node_body_redundant_return(cast->consequent);
3867
+ }
3868
+ break;
3869
+ }
3870
+ case PM_UNLESS_NODE: {
3871
+ pm_unless_node_t *cast = (pm_unless_node_t *) node;
3872
+
3873
+ if (cast->statements != NULL) {
3874
+ pm_def_node_body_redundant_return((pm_node_t *) cast->statements);
3875
+ }
3876
+
3877
+ if (cast->consequent != NULL) {
3878
+ pm_def_node_body_redundant_return((pm_node_t *) cast->consequent);
3879
+ }
3880
+ break;
3881
+ }
3882
+ case PM_ELSE_NODE: {
3883
+ pm_else_node_t *cast = (pm_else_node_t *) node;
3884
+
3885
+ if (cast->statements != NULL) {
3886
+ pm_def_node_body_redundant_return((pm_node_t *) cast->statements);
3887
+ }
3888
+ break;
3889
+ }
3890
+ case PM_CASE_NODE: {
3891
+ pm_case_node_t *cast = (pm_case_node_t *) node;
3892
+ pm_node_t *condition;
3893
+
3894
+ PM_NODE_LIST_FOREACH(&cast->conditions, index, condition) {
3895
+ pm_def_node_body_redundant_return(condition);
3896
+ }
3897
+
3898
+ if (cast->consequent != NULL) {
3899
+ pm_def_node_body_redundant_return((pm_node_t *) cast->consequent);
3900
+ }
3901
+ break;
3902
+ }
3903
+ case PM_WHEN_NODE: {
3904
+ pm_when_node_t *cast = (pm_when_node_t *) node;
3905
+
3906
+ if (cast->statements != NULL) {
3907
+ pm_def_node_body_redundant_return((pm_node_t *) cast->statements);
3908
+ }
3909
+ break;
3910
+ }
3911
+ case PM_CASE_MATCH_NODE: {
3912
+ pm_case_match_node_t *cast = (pm_case_match_node_t *) node;
3913
+ pm_node_t *condition;
3914
+
3915
+ PM_NODE_LIST_FOREACH(&cast->conditions, index, condition) {
3916
+ pm_def_node_body_redundant_return(condition);
3917
+ }
3918
+
3919
+ if (cast->consequent != NULL) {
3920
+ pm_def_node_body_redundant_return((pm_node_t *) cast->consequent);
3921
+ }
3922
+ break;
3923
+ }
3924
+ case PM_IN_NODE: {
3925
+ pm_in_node_t *cast = (pm_in_node_t *) node;
3926
+
3927
+ if (cast->statements != NULL) {
3928
+ pm_def_node_body_redundant_return((pm_node_t *) cast->statements);
3929
+ }
3930
+ break;
3931
+ }
3932
+ default:
3933
+ break;
3934
+ }
3935
+ }
3936
+
3719
3937
  /**
3720
3938
  * Allocate and initialize a new DefNode node.
3721
3939
  */
@@ -3748,6 +3966,10 @@ pm_def_node_create(
3748
3966
  pm_def_node_receiver_check(parser, receiver);
3749
3967
  }
3750
3968
 
3969
+ if (body != NULL) {
3970
+ pm_def_node_body_redundant_return(body);
3971
+ }
3972
+
3751
3973
  *node = (pm_def_node_t) {
3752
3974
  {
3753
3975
  .type = PM_DEF_NODE,
@@ -4338,9 +4560,9 @@ pm_global_variable_operator_write_node_create(pm_parser_t *parser, pm_node_t *ta
4338
4560
  },
4339
4561
  .name = pm_global_variable_write_name(parser, target),
4340
4562
  .name_loc = target->location,
4341
- .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
4563
+ .binary_operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
4342
4564
  .value = value,
4343
- .operator = pm_parser_constant_id_location(parser, operator->start, operator->end - 1)
4565
+ .binary_operator = pm_parser_constant_id_location(parser, operator->start, operator->end - 1)
4344
4566
  };
4345
4567
 
4346
4568
  return node;
@@ -4846,9 +5068,9 @@ pm_instance_variable_operator_write_node_create(pm_parser_t *parser, pm_instance
4846
5068
  },
4847
5069
  .name = target->name,
4848
5070
  .name_loc = target->base.location,
4849
- .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
5071
+ .binary_operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
4850
5072
  .value = value,
4851
- .operator = pm_parser_constant_id_location(parser, operator->start, operator->end - 1)
5073
+ .binary_operator = pm_parser_constant_id_location(parser, operator->start, operator->end - 1)
4852
5074
  };
4853
5075
 
4854
5076
  return node;
@@ -4922,6 +5144,50 @@ pm_instance_variable_write_node_create(pm_parser_t *parser, pm_instance_variable
4922
5144
  return node;
4923
5145
  }
4924
5146
 
5147
+ /**
5148
+ * Append a part into a list of string parts. Importantly this handles nested
5149
+ * interpolated strings by not necessarily removing the marker for static
5150
+ * literals.
5151
+ */
5152
+ static void
5153
+ pm_interpolated_node_append(pm_node_t *node, pm_node_list_t *parts, pm_node_t *part) {
5154
+ switch (PM_NODE_TYPE(part)) {
5155
+ case PM_STRING_NODE:
5156
+ pm_node_flag_set(part, PM_NODE_FLAG_STATIC_LITERAL | PM_STRING_FLAGS_FROZEN);
5157
+ break;
5158
+ case PM_EMBEDDED_STATEMENTS_NODE: {
5159
+ pm_embedded_statements_node_t *cast = (pm_embedded_statements_node_t *) part;
5160
+ pm_node_t *embedded = (cast->statements != NULL && cast->statements->body.size == 1) ? cast->statements->body.nodes[0] : NULL;
5161
+
5162
+ if (embedded == NULL) {
5163
+ // If there are no statements or more than one statement, then
5164
+ // we lose the static literal flag.
5165
+ pm_node_flag_unset(node, PM_NODE_FLAG_STATIC_LITERAL);
5166
+ } else if (PM_NODE_TYPE_P(embedded, PM_STRING_NODE)) {
5167
+ // If the embedded statement is a string, then we can keep the
5168
+ // static literal flag and mark the string as frozen.
5169
+ pm_node_flag_set(embedded, PM_NODE_FLAG_STATIC_LITERAL | PM_STRING_FLAGS_FROZEN);
5170
+ } else if (PM_NODE_TYPE_P(embedded, PM_INTERPOLATED_STRING_NODE) && PM_NODE_FLAG_P(embedded, PM_NODE_FLAG_STATIC_LITERAL)) {
5171
+ // If the embedded statement is an interpolated string and it's
5172
+ // a static literal, then we can keep the static literal flag.
5173
+ } else {
5174
+ // Otherwise we lose the static literal flag.
5175
+ pm_node_flag_unset(node, PM_NODE_FLAG_STATIC_LITERAL);
5176
+ }
5177
+
5178
+ break;
5179
+ }
5180
+ case PM_EMBEDDED_VARIABLE_NODE:
5181
+ pm_node_flag_unset((pm_node_t *) node, PM_NODE_FLAG_STATIC_LITERAL);
5182
+ break;
5183
+ default:
5184
+ assert(false && "unexpected node type");
5185
+ break;
5186
+ }
5187
+
5188
+ pm_node_list_append(parts, part);
5189
+ }
5190
+
4925
5191
  /**
4926
5192
  * Allocate a new InterpolatedRegularExpressionNode node.
4927
5193
  */
@@ -4955,54 +5221,113 @@ pm_interpolated_regular_expression_node_append(pm_interpolated_regular_expressio
4955
5221
  node->base.location.end = part->location.end;
4956
5222
  }
4957
5223
 
4958
- if (PM_NODE_TYPE_P(part, PM_STRING_NODE)) {
4959
- pm_node_flag_set(part, PM_NODE_FLAG_STATIC_LITERAL | PM_STRING_FLAGS_FROZEN);
4960
- }
4961
-
4962
- if (!PM_NODE_FLAG_P(part, PM_NODE_FLAG_STATIC_LITERAL)) {
4963
- pm_node_flag_unset((pm_node_t *) node, PM_NODE_FLAG_STATIC_LITERAL);
4964
- }
4965
-
4966
- pm_node_list_append(&node->parts, part);
5224
+ pm_interpolated_node_append((pm_node_t *) node, &node->parts, part);
4967
5225
  }
4968
5226
 
4969
5227
  static inline void
4970
5228
  pm_interpolated_regular_expression_node_closing_set(pm_parser_t *parser, pm_interpolated_regular_expression_node_t *node, const pm_token_t *closing) {
4971
5229
  node->closing_loc = PM_LOCATION_TOKEN_VALUE(closing);
4972
5230
  node->base.location.end = closing->end;
4973
- pm_node_flag_set((pm_node_t *)node, pm_regular_expression_flags_create(parser, closing));
5231
+ pm_node_flag_set((pm_node_t *) node, pm_regular_expression_flags_create(parser, closing));
4974
5232
  }
4975
5233
 
4976
5234
  /**
4977
5235
  * Append a part to an InterpolatedStringNode node.
5236
+ *
5237
+ * This has some somewhat complicated semantics, because we need to update
5238
+ * multiple flags that have somewhat confusing interactions.
5239
+ *
5240
+ * PM_NODE_FLAG_STATIC_LITERAL indicates that the node should be treated as a
5241
+ * single static literal string that can be pushed onto the stack on its own.
5242
+ * Note that this doesn't necessarily mean that the string will be frozen or
5243
+ * not; the instructions in CRuby will be either putobject or putstring,
5244
+ * depending on the combination of `--enable-frozen-string-literal`,
5245
+ * `# frozen_string_literal: true`, and whether or not there is interpolation.
5246
+ *
5247
+ * PM_INTERPOLATED_STRING_NODE_FLAGS_FROZEN indicates that the string should be
5248
+ * explicitly frozen. This will only happen if the string is comprised entirely
5249
+ * of string parts that are themselves static literals and frozen.
5250
+ *
5251
+ * PM_INTERPOLATED_STRING_NODE_FLAGS_MUTABLE indicates that the string should
5252
+ * be explicitly marked as mutable. This will happen from
5253
+ * `--disable-frozen-string-literal` or `# frozen_string_literal: false`. This
5254
+ * is necessary to indicate that the string should be left up to the runtime,
5255
+ * which could potentially use a chilled string otherwise.
4978
5256
  */
4979
5257
  static inline void
4980
- pm_interpolated_string_node_append(pm_parser_t *parser, pm_interpolated_string_node_t *node, pm_node_t *part) {
5258
+ pm_interpolated_string_node_append(pm_interpolated_string_node_t *node, pm_node_t *part) {
5259
+ #define CLEAR_FLAGS(node) \
5260
+ node->base.flags = (pm_node_flags_t) (node->base.flags & ~(PM_NODE_FLAG_STATIC_LITERAL | PM_INTERPOLATED_STRING_NODE_FLAGS_FROZEN | PM_INTERPOLATED_STRING_NODE_FLAGS_MUTABLE))
5261
+
5262
+ #define MUTABLE_FLAGS(node) \
5263
+ node->base.flags = (pm_node_flags_t) ((node->base.flags | PM_INTERPOLATED_STRING_NODE_FLAGS_MUTABLE) & ~PM_INTERPOLATED_STRING_NODE_FLAGS_FROZEN);
5264
+
4981
5265
  if (node->parts.size == 0 && node->opening_loc.start == NULL) {
4982
5266
  node->base.location.start = part->location.start;
4983
5267
  }
4984
5268
 
4985
- if (PM_NODE_TYPE_P(part, PM_STRING_NODE)) {
4986
- pm_node_flag_set(part, PM_NODE_FLAG_STATIC_LITERAL | PM_STRING_FLAGS_FROZEN);
4987
- }
5269
+ node->base.location.end = MAX(node->base.location.end, part->location.end);
5270
+
5271
+ switch (PM_NODE_TYPE(part)) {
5272
+ case PM_STRING_NODE:
5273
+ pm_node_flag_set(part, PM_NODE_FLAG_STATIC_LITERAL | PM_STRING_FLAGS_FROZEN);
5274
+ break;
5275
+ case PM_INTERPOLATED_STRING_NODE:
5276
+ if (PM_NODE_FLAG_P(part, PM_NODE_FLAG_STATIC_LITERAL)) {
5277
+ // If the string that we're concatenating is a static literal,
5278
+ // then we can keep the static literal flag for this string.
5279
+ } else {
5280
+ // Otherwise, we lose the static literal flag here and we should
5281
+ // also clear the mutability flags.
5282
+ CLEAR_FLAGS(node);
5283
+ }
5284
+ break;
5285
+ case PM_EMBEDDED_STATEMENTS_NODE: {
5286
+ pm_embedded_statements_node_t *cast = (pm_embedded_statements_node_t *) part;
5287
+ pm_node_t *embedded = (cast->statements != NULL && cast->statements->body.size == 1) ? cast->statements->body.nodes[0] : NULL;
5288
+
5289
+ if (embedded == NULL) {
5290
+ // If we're embedding multiple statements or no statements, then
5291
+ // the string is not longer a static literal.
5292
+ CLEAR_FLAGS(node);
5293
+ } else if (PM_NODE_TYPE_P(embedded, PM_STRING_NODE)) {
5294
+ // If the embedded statement is a string, then we can make that
5295
+ // string as frozen and static literal, and not touch the static
5296
+ // literal status of this string.
5297
+ pm_node_flag_set(embedded, PM_NODE_FLAG_STATIC_LITERAL | PM_STRING_FLAGS_FROZEN);
5298
+
5299
+ if (PM_NODE_FLAG_P(node, PM_NODE_FLAG_STATIC_LITERAL)) {
5300
+ MUTABLE_FLAGS(node);
5301
+ }
5302
+ } else if (PM_NODE_TYPE_P(embedded, PM_INTERPOLATED_STRING_NODE) && PM_NODE_FLAG_P(embedded, PM_NODE_FLAG_STATIC_LITERAL)) {
5303
+ // If the embedded statement is an interpolated string, but that
5304
+ // string is marked as static literal, then we can keep our
5305
+ // static literal status for this string.
5306
+ if (PM_NODE_FLAG_P(node, PM_NODE_FLAG_STATIC_LITERAL)) {
5307
+ MUTABLE_FLAGS(node);
5308
+ }
5309
+ } else {
5310
+ // In all other cases, we lose the static literal flag here and
5311
+ // become mutable.
5312
+ CLEAR_FLAGS(node);
5313
+ }
4988
5314
 
4989
- if (!PM_NODE_FLAG_P(part, PM_NODE_FLAG_STATIC_LITERAL)) {
4990
- pm_node_flag_unset((pm_node_t *) node, PM_NODE_FLAG_STATIC_LITERAL | PM_INTERPOLATED_STRING_NODE_FLAGS_FROZEN | PM_INTERPOLATED_STRING_NODE_FLAGS_MUTABLE);
5315
+ break;
5316
+ }
5317
+ case PM_EMBEDDED_VARIABLE_NODE:
5318
+ // Embedded variables clear static literal, which means we also
5319
+ // should clear the mutability flags.
5320
+ CLEAR_FLAGS(node);
5321
+ break;
5322
+ default:
5323
+ assert(false && "unexpected node type");
5324
+ break;
4991
5325
  }
4992
5326
 
4993
5327
  pm_node_list_append(&node->parts, part);
4994
- node->base.location.end = MAX(node->base.location.end, part->location.end);
4995
5328
 
4996
- if (PM_NODE_FLAG_P(node, PM_NODE_FLAG_STATIC_LITERAL)) {
4997
- switch (parser->frozen_string_literal) {
4998
- case PM_OPTIONS_FROZEN_STRING_LITERAL_DISABLED:
4999
- pm_node_flag_set((pm_node_t *) node, PM_INTERPOLATED_STRING_NODE_FLAGS_MUTABLE);
5000
- break;
5001
- case PM_OPTIONS_FROZEN_STRING_LITERAL_ENABLED:
5002
- pm_node_flag_set((pm_node_t *) node, PM_INTERPOLATED_STRING_NODE_FLAGS_FROZEN);
5003
- break;
5004
- }
5005
- }
5329
+ #undef CLEAR_FLAGS
5330
+ #undef MUTABLE_FLAGS
5006
5331
  }
5007
5332
 
5008
5333
  /**
@@ -5011,11 +5336,21 @@ pm_interpolated_string_node_append(pm_parser_t *parser, pm_interpolated_string_n
5011
5336
  static pm_interpolated_string_node_t *
5012
5337
  pm_interpolated_string_node_create(pm_parser_t *parser, const pm_token_t *opening, const pm_node_list_t *parts, const pm_token_t *closing) {
5013
5338
  pm_interpolated_string_node_t *node = PM_ALLOC_NODE(parser, pm_interpolated_string_node_t);
5339
+ pm_node_flags_t flags = PM_NODE_FLAG_STATIC_LITERAL;
5340
+
5341
+ switch (parser->frozen_string_literal) {
5342
+ case PM_OPTIONS_FROZEN_STRING_LITERAL_DISABLED:
5343
+ flags |= PM_INTERPOLATED_STRING_NODE_FLAGS_MUTABLE;
5344
+ break;
5345
+ case PM_OPTIONS_FROZEN_STRING_LITERAL_ENABLED:
5346
+ flags |= PM_INTERPOLATED_STRING_NODE_FLAGS_FROZEN;
5347
+ break;
5348
+ }
5014
5349
 
5015
5350
  *node = (pm_interpolated_string_node_t) {
5016
5351
  {
5017
5352
  .type = PM_INTERPOLATED_STRING_NODE,
5018
- .flags = PM_NODE_FLAG_STATIC_LITERAL,
5353
+ .flags = flags,
5019
5354
  .location = {
5020
5355
  .start = opening->start,
5021
5356
  .end = closing->end,
@@ -5029,7 +5364,7 @@ pm_interpolated_string_node_create(pm_parser_t *parser, const pm_token_t *openin
5029
5364
  if (parts != NULL) {
5030
5365
  pm_node_t *part;
5031
5366
  PM_NODE_LIST_FOREACH(parts, index, part) {
5032
- pm_interpolated_string_node_append(parser, node, part);
5367
+ pm_interpolated_string_node_append(node, part);
5033
5368
  }
5034
5369
  }
5035
5370
 
@@ -5051,15 +5386,7 @@ pm_interpolated_symbol_node_append(pm_interpolated_symbol_node_t *node, pm_node_
5051
5386
  node->base.location.start = part->location.start;
5052
5387
  }
5053
5388
 
5054
- if (PM_NODE_TYPE_P(part, PM_STRING_NODE)) {
5055
- pm_node_flag_set(part, PM_NODE_FLAG_STATIC_LITERAL | PM_STRING_FLAGS_FROZEN);
5056
- }
5057
-
5058
- if (!PM_NODE_FLAG_P(part, PM_NODE_FLAG_STATIC_LITERAL)) {
5059
- pm_node_flag_unset((pm_node_t *) node, PM_NODE_FLAG_STATIC_LITERAL);
5060
- }
5061
-
5062
- pm_node_list_append(&node->parts, part);
5389
+ pm_interpolated_node_append((pm_node_t *) node, &node->parts, part);
5063
5390
  node->base.location.end = MAX(node->base.location.end, part->location.end);
5064
5391
  }
5065
5392
 
@@ -5125,11 +5452,7 @@ pm_interpolated_xstring_node_create(pm_parser_t *parser, const pm_token_t *openi
5125
5452
 
5126
5453
  static inline void
5127
5454
  pm_interpolated_xstring_node_append(pm_interpolated_x_string_node_t *node, pm_node_t *part) {
5128
- if (PM_NODE_TYPE_P(part, PM_STRING_NODE)) {
5129
- pm_node_flag_set(part, PM_NODE_FLAG_STATIC_LITERAL | PM_STRING_FLAGS_FROZEN);
5130
- }
5131
-
5132
- pm_node_list_append(&node->parts, part);
5455
+ pm_interpolated_node_append((pm_node_t *) node, &node->parts, part);
5133
5456
  node->base.location.end = part->location.end;
5134
5457
  }
5135
5458
 
@@ -5341,10 +5664,10 @@ pm_local_variable_operator_write_node_create(pm_parser_t *parser, pm_node_t *tar
5341
5664
  }
5342
5665
  },
5343
5666
  .name_loc = target->location,
5344
- .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
5667
+ .binary_operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
5345
5668
  .value = value,
5346
5669
  .name = name,
5347
- .operator = pm_parser_constant_id_location(parser, operator->start, operator->end - 1),
5670
+ .binary_operator = pm_parser_constant_id_location(parser, operator->start, operator->end - 1),
5348
5671
  .depth = depth
5349
5672
  };
5350
5673
 
@@ -6397,6 +6720,7 @@ pm_return_node_create(pm_parser_t *parser, const pm_token_t *keyword, pm_argumen
6397
6720
  *node = (pm_return_node_t) {
6398
6721
  {
6399
6722
  .type = PM_RETURN_NODE,
6723
+ .flags = 0,
6400
6724
  .location = {
6401
6725
  .start = keyword->start,
6402
6726
  .end = (arguments == NULL ? keyword->end : arguments->base.location.end)
@@ -6622,7 +6946,7 @@ pm_statements_node_body_append(pm_parser_t *parser, pm_statements_node_t *node,
6622
6946
  case PM_REDO_NODE:
6623
6947
  case PM_RETRY_NODE:
6624
6948
  case PM_RETURN_NODE:
6625
- pm_parser_warn_node(parser, previous, PM_WARN_UNREACHABLE_STATEMENT);
6949
+ pm_parser_warn_node(parser, statement, PM_WARN_UNREACHABLE_STATEMENT);
6626
6950
  break;
6627
6951
  default:
6628
6952
  break;
@@ -6729,7 +7053,8 @@ pm_super_node_create(pm_parser_t *parser, const pm_token_t *keyword, pm_argument
6729
7053
  }
6730
7054
 
6731
7055
  /**
6732
- * Read through the contents of a string and check if it consists solely of US ASCII code points.
7056
+ * Read through the contents of a string and check if it consists solely of
7057
+ * US-ASCII code points.
6733
7058
  */
6734
7059
  static bool
6735
7060
  pm_ascii_only_p(const pm_string_t *contents) {
@@ -6743,27 +7068,72 @@ pm_ascii_only_p(const pm_string_t *contents) {
6743
7068
  return true;
6744
7069
  }
6745
7070
 
7071
+ /**
7072
+ * Validate that the contents of the given symbol are all valid UTF-8.
7073
+ */
7074
+ static void
7075
+ parse_symbol_encoding_validate_utf8(pm_parser_t *parser, const pm_token_t *location, const pm_string_t *contents) {
7076
+ for (const uint8_t *cursor = pm_string_source(contents), *end = cursor + pm_string_length(contents); cursor < end;) {
7077
+ size_t width = pm_encoding_utf_8_char_width(cursor, end - cursor);
7078
+
7079
+ if (width == 0) {
7080
+ pm_parser_err(parser, location->start, location->end, PM_ERR_INVALID_SYMBOL);
7081
+ break;
7082
+ }
7083
+
7084
+ cursor += width;
7085
+ }
7086
+ }
7087
+
7088
+ /**
7089
+ * Validate that the contents of the given symbol are all valid in the encoding
7090
+ * of the parser.
7091
+ */
7092
+ static void
7093
+ parse_symbol_encoding_validate_other(pm_parser_t *parser, const pm_token_t *location, const pm_string_t *contents) {
7094
+ const pm_encoding_t *encoding = parser->encoding;
7095
+
7096
+ for (const uint8_t *cursor = pm_string_source(contents), *end = cursor + pm_string_length(contents); cursor < end;) {
7097
+ size_t width = encoding->char_width(cursor, end - cursor);
7098
+
7099
+ if (width == 0) {
7100
+ pm_parser_err(parser, location->start, location->end, PM_ERR_INVALID_SYMBOL);
7101
+ break;
7102
+ }
7103
+
7104
+ cursor += width;
7105
+ }
7106
+ }
7107
+
6746
7108
  /**
6747
7109
  * Ruby "downgrades" the encoding of Symbols to US-ASCII if the associated
6748
7110
  * encoding is ASCII-compatible and the Symbol consists only of US-ASCII code
6749
7111
  * points. Otherwise, the encoding may be explicitly set with an escape
6750
7112
  * sequence.
7113
+ *
7114
+ * If the validate flag is set, then it will check the contents of the symbol
7115
+ * to ensure that all characters are valid in the encoding.
6751
7116
  */
6752
7117
  static inline pm_node_flags_t
6753
- parse_symbol_encoding(const pm_parser_t *parser, const pm_string_t *contents) {
7118
+ parse_symbol_encoding(pm_parser_t *parser, const pm_token_t *location, const pm_string_t *contents, bool validate) {
6754
7119
  if (parser->explicit_encoding != NULL) {
6755
7120
  // A Symbol may optionally have its encoding explicitly set. This will
6756
7121
  // happen if an escape sequence results in a non-ASCII code point.
6757
7122
  if (parser->explicit_encoding == PM_ENCODING_UTF_8_ENTRY) {
7123
+ if (validate) parse_symbol_encoding_validate_utf8(parser, location, contents);
6758
7124
  return PM_SYMBOL_FLAGS_FORCED_UTF8_ENCODING;
6759
7125
  } else if (parser->encoding == PM_ENCODING_US_ASCII_ENTRY) {
6760
7126
  return PM_SYMBOL_FLAGS_FORCED_BINARY_ENCODING;
7127
+ } else if (validate) {
7128
+ parse_symbol_encoding_validate_other(parser, location, contents);
6761
7129
  }
6762
7130
  } else if (pm_ascii_only_p(contents)) {
6763
7131
  // Ruby stipulates that all source files must use an ASCII-compatible
6764
7132
  // encoding. Thus, all symbols appearing in source are eligible for
6765
7133
  // "downgrading" to US-ASCII.
6766
7134
  return PM_SYMBOL_FLAGS_FORCED_US_ASCII_ENCODING;
7135
+ } else if (validate) {
7136
+ parse_symbol_encoding_validate_other(parser, location, contents);
6767
7137
  }
6768
7138
 
6769
7139
  return 0;
@@ -6931,7 +7301,7 @@ pm_symbol_node_create(pm_parser_t *parser, const pm_token_t *opening, const pm_t
6931
7301
  */
6932
7302
  static pm_symbol_node_t *
6933
7303
  pm_symbol_node_create_current_string(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *value, const pm_token_t *closing) {
6934
- pm_symbol_node_t *node = pm_symbol_node_create_unescaped(parser, opening, value, closing, &parser->current_string, parse_symbol_encoding(parser, &parser->current_string));
7304
+ pm_symbol_node_t *node = pm_symbol_node_create_unescaped(parser, opening, value, closing, &parser->current_string, parse_symbol_encoding(parser, value, &parser->current_string, false));
6935
7305
  parser->current_string = PM_STRING_EMPTY;
6936
7306
  return node;
6937
7307
  }
@@ -6953,7 +7323,7 @@ pm_symbol_node_label_create(pm_parser_t *parser, const pm_token_t *token) {
6953
7323
 
6954
7324
  assert((label.end - label.start) >= 0);
6955
7325
  pm_string_shared_init(&node->unescaped, label.start, label.end);
6956
- pm_node_flag_set((pm_node_t *) node, parse_symbol_encoding(parser, &node->unescaped));
7326
+ pm_node_flag_set((pm_node_t *) node, parse_symbol_encoding(parser, &label, &node->unescaped, false));
6957
7327
 
6958
7328
  break;
6959
7329
  }
@@ -7038,7 +7408,8 @@ pm_string_node_to_symbol_node(pm_parser_t *parser, pm_string_node_t *node, const
7038
7408
  .unescaped = node->unescaped
7039
7409
  };
7040
7410
 
7041
- pm_node_flag_set((pm_node_t *)new_node, parse_symbol_encoding(parser, &node->unescaped));
7411
+ pm_token_t content = { .type = PM_TOKEN_IDENTIFIER, .start = node->content_loc.start, .end = node->content_loc.end };
7412
+ pm_node_flag_set((pm_node_t *) new_node, parse_symbol_encoding(parser, &content, &node->unescaped, true));
7042
7413
 
7043
7414
  // We are explicitly _not_ using pm_node_destroy here because we don't want
7044
7415
  // to trash the unescaped string. We could instead copy the string if we
@@ -7574,7 +7945,7 @@ pm_local_variable_read_node_create_it(pm_parser_t *parser, const pm_token_t *nam
7574
7945
  static pm_node_t *
7575
7946
  pm_node_check_it(pm_parser_t *parser, pm_node_t *node) {
7576
7947
  if (
7577
- (parser->version != PM_OPTIONS_VERSION_CRUBY_3_3_0) &&
7948
+ (parser->version != PM_OPTIONS_VERSION_CRUBY_3_3) &&
7578
7949
  !parser->current_scope->closed &&
7579
7950
  (parser->current_scope->numbered_parameters != PM_SCOPE_NUMBERED_PARAMETERS_DISALLOWED) &&
7580
7951
  pm_node_is_it(parser, node)
@@ -8023,7 +8394,12 @@ parser_lex_magic_comment(pm_parser_t *parser, bool semantic_token_seen) {
8023
8394
  // If we have hit a ractor pragma, attempt to lex that.
8024
8395
  uint32_t value_length = (uint32_t) (value_end - value_start);
8025
8396
  if (key_length == 24 && pm_strncasecmp(key_source, (const uint8_t *) "shareable_constant_value", 24) == 0) {
8026
- if (value_length == 4 && pm_strncasecmp(value_start, (const uint8_t *) "none", 4) == 0) {
8397
+ const uint8_t *cursor = parser->current.start;
8398
+ while ((cursor > parser->start) && ((cursor[-1] == ' ') || (cursor[-1] == '\t'))) cursor--;
8399
+
8400
+ if (!((cursor == parser->start) || (cursor[-1] == '\n'))) {
8401
+ pm_parser_warn_token(parser, &parser->current, PM_WARN_SHAREABLE_CONSTANT_VALUE_LINE);
8402
+ } else if (value_length == 4 && pm_strncasecmp(value_start, (const uint8_t *) "none", 4) == 0) {
8027
8403
  pm_parser_scope_shareable_constant_set(parser, PM_SCOPE_SHAREABLE_CONSTANT_NONE);
8028
8404
  } else if (value_length == 7 && pm_strncasecmp(value_start, (const uint8_t *) "literal", 7) == 0) {
8029
8405
  pm_parser_scope_shareable_constant_set(parser, PM_SCOPE_SHAREABLE_CONSTANT_LITERAL);
@@ -8298,10 +8674,11 @@ context_human(pm_context_t context) {
8298
8674
  /* Specific token lexers */
8299
8675
  /******************************************************************************/
8300
8676
 
8301
- static void
8302
- pm_strspn_number_validate(pm_parser_t *parser, const uint8_t *invalid) {
8677
+ static inline void
8678
+ pm_strspn_number_validate(pm_parser_t *parser, const uint8_t *string, size_t length, const uint8_t *invalid) {
8303
8679
  if (invalid != NULL) {
8304
- pm_parser_err(parser, invalid, invalid + 1, PM_ERR_INVALID_NUMBER_UNDERSCORE);
8680
+ pm_diagnostic_id_t diag_id = (invalid == (string + length - 1)) ? PM_ERR_INVALID_NUMBER_UNDERSCORE_TRAILING : PM_ERR_INVALID_NUMBER_UNDERSCORE_INNER;
8681
+ pm_parser_err(parser, invalid, invalid + 1, diag_id);
8305
8682
  }
8306
8683
  }
8307
8684
 
@@ -8309,7 +8686,7 @@ static size_t
8309
8686
  pm_strspn_binary_number_validate(pm_parser_t *parser, const uint8_t *string) {
8310
8687
  const uint8_t *invalid = NULL;
8311
8688
  size_t length = pm_strspn_binary_number(string, parser->end - string, &invalid);
8312
- pm_strspn_number_validate(parser, invalid);
8689
+ pm_strspn_number_validate(parser, string, length, invalid);
8313
8690
  return length;
8314
8691
  }
8315
8692
 
@@ -8317,7 +8694,7 @@ static size_t
8317
8694
  pm_strspn_octal_number_validate(pm_parser_t *parser, const uint8_t *string) {
8318
8695
  const uint8_t *invalid = NULL;
8319
8696
  size_t length = pm_strspn_octal_number(string, parser->end - string, &invalid);
8320
- pm_strspn_number_validate(parser, invalid);
8697
+ pm_strspn_number_validate(parser, string, length, invalid);
8321
8698
  return length;
8322
8699
  }
8323
8700
 
@@ -8325,7 +8702,7 @@ static size_t
8325
8702
  pm_strspn_decimal_number_validate(pm_parser_t *parser, const uint8_t *string) {
8326
8703
  const uint8_t *invalid = NULL;
8327
8704
  size_t length = pm_strspn_decimal_number(string, parser->end - string, &invalid);
8328
- pm_strspn_number_validate(parser, invalid);
8705
+ pm_strspn_number_validate(parser, string, length, invalid);
8329
8706
  return length;
8330
8707
  }
8331
8708
 
@@ -8333,7 +8710,7 @@ static size_t
8333
8710
  pm_strspn_hexadecimal_number_validate(pm_parser_t *parser, const uint8_t *string) {
8334
8711
  const uint8_t *invalid = NULL;
8335
8712
  size_t length = pm_strspn_hexadecimal_number(string, parser->end - string, &invalid);
8336
- pm_strspn_number_validate(parser, invalid);
8713
+ pm_strspn_number_validate(parser, string, length, invalid);
8337
8714
  return length;
8338
8715
  }
8339
8716
 
@@ -8395,6 +8772,7 @@ lex_numeric_prefix(pm_parser_t *parser, bool* seen_e) {
8395
8772
  if (pm_char_is_decimal_digit(peek(parser))) {
8396
8773
  parser->current.end += pm_strspn_decimal_number_validate(parser, parser->current.end);
8397
8774
  } else {
8775
+ match(parser, '_');
8398
8776
  pm_parser_err_current(parser, PM_ERR_INVALID_NUMBER_DECIMAL);
8399
8777
  }
8400
8778
 
@@ -8407,6 +8785,7 @@ lex_numeric_prefix(pm_parser_t *parser, bool* seen_e) {
8407
8785
  if (pm_char_is_binary_digit(peek(parser))) {
8408
8786
  parser->current.end += pm_strspn_binary_number_validate(parser, parser->current.end);
8409
8787
  } else {
8788
+ match(parser, '_');
8410
8789
  pm_parser_err_current(parser, PM_ERR_INVALID_NUMBER_BINARY);
8411
8790
  }
8412
8791
 
@@ -8420,6 +8799,7 @@ lex_numeric_prefix(pm_parser_t *parser, bool* seen_e) {
8420
8799
  if (pm_char_is_octal_digit(peek(parser))) {
8421
8800
  parser->current.end += pm_strspn_octal_number_validate(parser, parser->current.end);
8422
8801
  } else {
8802
+ match(parser, '_');
8423
8803
  pm_parser_err_current(parser, PM_ERR_INVALID_NUMBER_OCTAL);
8424
8804
  }
8425
8805
 
@@ -8447,6 +8827,7 @@ lex_numeric_prefix(pm_parser_t *parser, bool* seen_e) {
8447
8827
  if (pm_char_is_hexadecimal_digit(peek(parser))) {
8448
8828
  parser->current.end += pm_strspn_hexadecimal_number_validate(parser, parser->current.end);
8449
8829
  } else {
8830
+ match(parser, '_');
8450
8831
  pm_parser_err_current(parser, PM_ERR_INVALID_NUMBER_HEXADECIMAL);
8451
8832
  }
8452
8833
 
@@ -8475,6 +8856,16 @@ lex_numeric_prefix(pm_parser_t *parser, bool* seen_e) {
8475
8856
  type = lex_optional_float_suffix(parser, seen_e);
8476
8857
  }
8477
8858
 
8859
+ // At this point we have a completed number, but we want to provide the user
8860
+ // with a good experience if they put an additional .xxx fractional
8861
+ // component on the end, so we'll check for that here.
8862
+ if (peek_offset(parser, 0) == '.' && pm_char_is_decimal_digit(peek_offset(parser, 1))) {
8863
+ const uint8_t *fraction_start = parser->current.end;
8864
+ const uint8_t *fraction_end = parser->current.end + 2;
8865
+ fraction_end += pm_strspn_decimal_digit(fraction_end, parser->end - fraction_end);
8866
+ pm_parser_err(parser, fraction_start, fraction_end, PM_ERR_INVALID_NUMBER_FRACTION);
8867
+ }
8868
+
8478
8869
  return type;
8479
8870
  }
8480
8871
 
@@ -8567,7 +8958,7 @@ lex_global_variable(pm_parser_t *parser) {
8567
8958
  } while (parser->current.end < parser->end && (width = char_is_identifier(parser, parser->current.end)) > 0);
8568
8959
 
8569
8960
  // $0 isn't allowed to be followed by anything.
8570
- pm_diagnostic_id_t diag_id = parser->version == PM_OPTIONS_VERSION_CRUBY_3_3_0 ? PM_ERR_INVALID_VARIABLE_GLOBAL_3_3_0 : PM_ERR_INVALID_VARIABLE_GLOBAL;
8961
+ pm_diagnostic_id_t diag_id = parser->version == PM_OPTIONS_VERSION_CRUBY_3_3 ? PM_ERR_INVALID_VARIABLE_GLOBAL_3_3 : PM_ERR_INVALID_VARIABLE_GLOBAL;
8571
8962
  PM_PARSER_ERR_TOKEN_FORMAT_CONTENT(parser, parser->current, diag_id);
8572
8963
  }
8573
8964
 
@@ -8603,7 +8994,7 @@ lex_global_variable(pm_parser_t *parser) {
8603
8994
  } else {
8604
8995
  // If we get here, then we have a $ followed by something that
8605
8996
  // isn't recognized as a global variable.
8606
- pm_diagnostic_id_t diag_id = parser->version == PM_OPTIONS_VERSION_CRUBY_3_3_0 ? PM_ERR_INVALID_VARIABLE_GLOBAL_3_3_0 : PM_ERR_INVALID_VARIABLE_GLOBAL;
8997
+ pm_diagnostic_id_t diag_id = parser->version == PM_OPTIONS_VERSION_CRUBY_3_3 ? PM_ERR_INVALID_VARIABLE_GLOBAL_3_3 : PM_ERR_INVALID_VARIABLE_GLOBAL;
8607
8998
  size_t width = parser->encoding->char_width(parser->current.end, parser->end - parser->current.end);
8608
8999
  PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, diag_id, (int) ((parser->current.end + width) - parser->current.start), (const char *) parser->current.start);
8609
9000
  }
@@ -8976,12 +9367,20 @@ escape_hexadecimal_digit(const uint8_t value) {
8976
9367
  * validated.
8977
9368
  */
8978
9369
  static inline uint32_t
8979
- escape_unicode(const uint8_t *string, size_t length) {
9370
+ escape_unicode(pm_parser_t *parser, const uint8_t *string, size_t length) {
8980
9371
  uint32_t value = 0;
8981
9372
  for (size_t index = 0; index < length; index++) {
8982
9373
  if (index != 0) value <<= 4;
8983
9374
  value |= escape_hexadecimal_digit(string[index]);
8984
9375
  }
9376
+
9377
+ // Here we're going to verify that the value is actually a valid Unicode
9378
+ // codepoint and not a surrogate pair.
9379
+ if (value >= 0xD800 && value <= 0xDFFF) {
9380
+ pm_parser_err(parser, string, string + length, PM_ERR_ESCAPE_INVALID_UNICODE);
9381
+ return 0xFFFD;
9382
+ }
9383
+
8985
9384
  return value;
8986
9385
  }
8987
9386
 
@@ -9230,7 +9629,7 @@ escape_read(pm_parser_t *parser, pm_buffer_t *buffer, pm_buffer_t *regular_expre
9230
9629
  pm_buffer_append_bytes(regular_expression_buffer, start, (size_t) (parser->current.end - start));
9231
9630
  }
9232
9631
 
9233
- escape_write_byte_encoded(parser, buffer, value);
9632
+ escape_write_byte_encoded(parser, buffer, escape_byte(value, flags));
9234
9633
  } else {
9235
9634
  pm_parser_err_current(parser, PM_ERR_ESCAPE_INVALID_HEXADECIMAL);
9236
9635
  }
@@ -9241,22 +9640,7 @@ escape_read(pm_parser_t *parser, pm_buffer_t *buffer, pm_buffer_t *regular_expre
9241
9640
  const uint8_t *start = parser->current.end - 1;
9242
9641
  parser->current.end++;
9243
9642
 
9244
- if (
9245
- (parser->current.end + 4 <= parser->end) &&
9246
- pm_char_is_hexadecimal_digit(parser->current.end[0]) &&
9247
- pm_char_is_hexadecimal_digit(parser->current.end[1]) &&
9248
- pm_char_is_hexadecimal_digit(parser->current.end[2]) &&
9249
- pm_char_is_hexadecimal_digit(parser->current.end[3])
9250
- ) {
9251
- uint32_t value = escape_unicode(parser->current.end, 4);
9252
-
9253
- if (flags & PM_ESCAPE_FLAG_REGEXP) {
9254
- pm_buffer_append_bytes(regular_expression_buffer, start, (size_t) (parser->current.end + 4 - start));
9255
- }
9256
- escape_write_unicode(parser, buffer, flags, start, parser->current.end + 4, value);
9257
-
9258
- parser->current.end += 4;
9259
- } else if (peek(parser) == '{') {
9643
+ if (peek(parser) == '{') {
9260
9644
  const uint8_t *unicode_codepoints_start = parser->current.end - 2;
9261
9645
 
9262
9646
  parser->current.end++;
@@ -9284,7 +9668,7 @@ escape_read(pm_parser_t *parser, pm_buffer_t *buffer, pm_buffer_t *regular_expre
9284
9668
  extra_codepoints_start = unicode_start;
9285
9669
  }
9286
9670
 
9287
- uint32_t value = escape_unicode(unicode_start, hexadecimal_length);
9671
+ uint32_t value = escape_unicode(parser, unicode_start, hexadecimal_length);
9288
9672
  escape_write_unicode(parser, buffer, flags, unicode_start, parser->current.end, value);
9289
9673
 
9290
9674
  parser->current.end += pm_strspn_whitespace(parser->current.end, parser->end - parser->current.end);
@@ -9306,7 +9690,25 @@ escape_read(pm_parser_t *parser, pm_buffer_t *buffer, pm_buffer_t *regular_expre
9306
9690
  pm_buffer_append_bytes(regular_expression_buffer, unicode_codepoints_start, (size_t) (parser->current.end - unicode_codepoints_start));
9307
9691
  }
9308
9692
  } else {
9309
- pm_parser_err_current(parser, PM_ERR_ESCAPE_INVALID_UNICODE);
9693
+ size_t length = pm_strspn_hexadecimal_digit(parser->current.end, MIN(parser->end - parser->current.end, 4));
9694
+
9695
+ if (length == 4) {
9696
+ uint32_t value = escape_unicode(parser, parser->current.end, 4);
9697
+
9698
+ if (flags & PM_ESCAPE_FLAG_REGEXP) {
9699
+ pm_buffer_append_bytes(regular_expression_buffer, start, (size_t) (parser->current.end + 4 - start));
9700
+ }
9701
+
9702
+ escape_write_unicode(parser, buffer, flags, start, parser->current.end + 4, value);
9703
+ parser->current.end += 4;
9704
+ } else {
9705
+ parser->current.end += length;
9706
+ pm_parser_err_current(parser, PM_ERR_ESCAPE_INVALID_UNICODE);
9707
+ }
9708
+ }
9709
+
9710
+ if (flags & (PM_ESCAPE_FLAG_CONTROL | PM_ESCAPE_FLAG_META)) {
9711
+ pm_parser_err(parser, start, parser->current.end, PM_ERR_INVALID_ESCAPE_CHARACTER);
9310
9712
  }
9311
9713
 
9312
9714
  return;
@@ -9560,8 +9962,8 @@ lex_at_variable(pm_parser_t *parser) {
9560
9962
  }
9561
9963
  } else if (parser->current.end < parser->end && pm_char_is_decimal_digit(*parser->current.end)) {
9562
9964
  pm_diagnostic_id_t diag_id = (type == PM_TOKEN_CLASS_VARIABLE) ? PM_ERR_INCOMPLETE_VARIABLE_CLASS : PM_ERR_INCOMPLETE_VARIABLE_INSTANCE;
9563
- if (parser->version == PM_OPTIONS_VERSION_CRUBY_3_3_0) {
9564
- diag_id = (type == PM_TOKEN_CLASS_VARIABLE) ? PM_ERR_INCOMPLETE_VARIABLE_CLASS_3_3_0 : PM_ERR_INCOMPLETE_VARIABLE_INSTANCE_3_3_0;
9965
+ if (parser->version == PM_OPTIONS_VERSION_CRUBY_3_3) {
9966
+ diag_id = (type == PM_TOKEN_CLASS_VARIABLE) ? PM_ERR_INCOMPLETE_VARIABLE_CLASS_3_3 : PM_ERR_INCOMPLETE_VARIABLE_INSTANCE_3_3;
9565
9967
  }
9566
9968
 
9567
9969
  size_t width = parser->encoding->char_width(parser->current.end, parser->end - parser->current.end);
@@ -10545,8 +10947,11 @@ parser_lex(pm_parser_t *parser) {
10545
10947
  }
10546
10948
 
10547
10949
  size_t ident_length = (size_t) (parser->current.end - ident_start);
10950
+ bool ident_error = false;
10951
+
10548
10952
  if (quote != PM_HEREDOC_QUOTE_NONE && !match(parser, (uint8_t) quote)) {
10549
- // TODO: handle unterminated heredoc
10953
+ pm_parser_err(parser, ident_start, ident_start + ident_length, PM_ERR_HEREDOC_IDENTIFIER);
10954
+ ident_error = true;
10550
10955
  }
10551
10956
 
10552
10957
  parser->explicit_encoding = NULL;
@@ -10571,7 +10976,7 @@ parser_lex(pm_parser_t *parser) {
10571
10976
  // this is not a valid heredoc declaration. In this case we
10572
10977
  // will add an error, but we will still return a heredoc
10573
10978
  // start.
10574
- pm_parser_err_current(parser, PM_ERR_HEREDOC_TERM);
10979
+ if (!ident_error) pm_parser_err_heredoc_term(parser, parser->lex_modes.current);
10575
10980
  body_start = parser->end;
10576
10981
  } else {
10577
10982
  // Otherwise, we want to indicate that the body of the
@@ -11898,7 +12303,7 @@ parser_lex(pm_parser_t *parser) {
11898
12303
  // terminator) but still continue parsing so that content after the
11899
12304
  // declaration of the heredoc can be parsed.
11900
12305
  if (parser->current.end >= parser->end) {
11901
- pm_parser_err_current(parser, PM_ERR_HEREDOC_TERM);
12306
+ pm_parser_err_heredoc_term(parser, lex_mode);
11902
12307
  parser->next_start = lex_mode->as.heredoc.next_start;
11903
12308
  parser->heredoc_end = parser->current.end;
11904
12309
  lex_state_set(parser, PM_LEX_STATE_END);
@@ -12537,6 +12942,23 @@ expect3(pm_parser_t *parser, pm_token_type_t type1, pm_token_type_t type2, pm_to
12537
12942
  parser->previous.type = PM_TOKEN_MISSING;
12538
12943
  }
12539
12944
 
12945
+ /**
12946
+ * A special expect1 that expects a heredoc terminator and handles popping the
12947
+ * lex mode accordingly.
12948
+ */
12949
+ static void
12950
+ expect1_heredoc_term(pm_parser_t *parser, pm_lex_mode_t *lex_mode) {
12951
+ if (match1(parser, PM_TOKEN_HEREDOC_END)) {
12952
+ lex_mode_pop(parser);
12953
+ parser_lex(parser);
12954
+ } else {
12955
+ pm_parser_err_heredoc_term(parser, lex_mode);
12956
+ lex_mode_pop(parser);
12957
+ parser->previous.start = parser->previous.end;
12958
+ parser->previous.type = PM_TOKEN_MISSING;
12959
+ }
12960
+ }
12961
+
12540
12962
  static pm_node_t *
12541
12963
  parse_expression(pm_parser_t *parser, pm_binding_power_t binding_power, bool accepts_command_call, pm_diagnostic_id_t diag_id);
12542
12964
 
@@ -12664,25 +13086,72 @@ parse_write_name(pm_parser_t *parser, pm_constant_id_t *name_field) {
12664
13086
  *name_field = pm_constant_pool_insert_owned(&parser->constant_pool, name, length + 1);
12665
13087
  }
12666
13088
 
13089
+ /**
13090
+ * Certain expressions are not targetable, but in order to provide a better
13091
+ * experience we give a specific error message. In order to maintain as much
13092
+ * information in the tree as possible, we replace them with local variable
13093
+ * writes.
13094
+ */
13095
+ static pm_node_t *
13096
+ parse_unwriteable_target(pm_parser_t *parser, pm_node_t *target) {
13097
+ switch (PM_NODE_TYPE(target)) {
13098
+ case PM_SOURCE_ENCODING_NODE: pm_parser_err_node(parser, target, PM_ERR_EXPRESSION_NOT_WRITABLE_ENCODING); break;
13099
+ case PM_FALSE_NODE: pm_parser_err_node(parser, target, PM_ERR_EXPRESSION_NOT_WRITABLE_FALSE); break;
13100
+ case PM_SOURCE_FILE_NODE: pm_parser_err_node(parser, target, PM_ERR_EXPRESSION_NOT_WRITABLE_FILE); break;
13101
+ case PM_SOURCE_LINE_NODE: pm_parser_err_node(parser, target, PM_ERR_EXPRESSION_NOT_WRITABLE_LINE); break;
13102
+ case PM_NIL_NODE: pm_parser_err_node(parser, target, PM_ERR_EXPRESSION_NOT_WRITABLE_NIL); break;
13103
+ case PM_SELF_NODE: pm_parser_err_node(parser, target, PM_ERR_EXPRESSION_NOT_WRITABLE_SELF); break;
13104
+ case PM_TRUE_NODE: pm_parser_err_node(parser, target, PM_ERR_EXPRESSION_NOT_WRITABLE_TRUE); break;
13105
+ default: break;
13106
+ }
13107
+
13108
+ pm_constant_id_t name = pm_parser_constant_id_location(parser, target->location.start, target->location.end);
13109
+ pm_local_variable_target_node_t *result = pm_local_variable_target_node_create(parser, &target->location, name, 0);
13110
+
13111
+ pm_node_destroy(parser, target);
13112
+ return (pm_node_t *) result;
13113
+ }
13114
+
12667
13115
  /**
12668
13116
  * Convert the given node into a valid target node.
12669
13117
  */
12670
13118
  static pm_node_t *
12671
- parse_target(pm_parser_t *parser, pm_node_t *target) {
13119
+ parse_target(pm_parser_t *parser, pm_node_t *target, bool multiple) {
12672
13120
  switch (PM_NODE_TYPE(target)) {
12673
13121
  case PM_MISSING_NODE:
12674
13122
  return target;
13123
+ case PM_SOURCE_ENCODING_NODE:
13124
+ case PM_FALSE_NODE:
13125
+ case PM_SOURCE_FILE_NODE:
13126
+ case PM_SOURCE_LINE_NODE:
13127
+ case PM_NIL_NODE:
13128
+ case PM_SELF_NODE:
13129
+ case PM_TRUE_NODE: {
13130
+ // In these special cases, we have specific error messages and we
13131
+ // will replace them with local variable writes.
13132
+ return parse_unwriteable_target(parser, target);
13133
+ }
12675
13134
  case PM_CLASS_VARIABLE_READ_NODE:
12676
13135
  assert(sizeof(pm_class_variable_target_node_t) == sizeof(pm_class_variable_read_node_t));
12677
13136
  target->type = PM_CLASS_VARIABLE_TARGET_NODE;
12678
13137
  return target;
12679
13138
  case PM_CONSTANT_PATH_NODE:
13139
+ if (context_def_p(parser)) {
13140
+ pm_parser_err_node(parser, target, PM_ERR_WRITE_TARGET_IN_METHOD);
13141
+ }
13142
+
12680
13143
  assert(sizeof(pm_constant_path_target_node_t) == sizeof(pm_constant_path_node_t));
12681
13144
  target->type = PM_CONSTANT_PATH_TARGET_NODE;
13145
+
12682
13146
  return target;
12683
13147
  case PM_CONSTANT_READ_NODE:
13148
+ if (context_def_p(parser)) {
13149
+ pm_parser_err_node(parser, target, PM_ERR_WRITE_TARGET_IN_METHOD);
13150
+ }
13151
+
12684
13152
  assert(sizeof(pm_constant_target_node_t) == sizeof(pm_constant_read_node_t));
12685
13153
  target->type = PM_CONSTANT_TARGET_NODE;
13154
+
12686
13155
  return target;
12687
13156
  case PM_BACK_REFERENCE_READ_NODE:
12688
13157
  case PM_NUMBERED_REFERENCE_READ_NODE:
@@ -12715,7 +13184,7 @@ parse_target(pm_parser_t *parser, pm_node_t *target) {
12715
13184
  pm_splat_node_t *splat = (pm_splat_node_t *) target;
12716
13185
 
12717
13186
  if (splat->expression != NULL) {
12718
- splat->expression = parse_target(parser, splat->expression);
13187
+ splat->expression = parse_target(parser, splat->expression, multiple);
12719
13188
  }
12720
13189
 
12721
13190
  return (pm_node_t *) splat;
@@ -12753,6 +13222,10 @@ parse_target(pm_parser_t *parser, pm_node_t *target) {
12753
13222
  }
12754
13223
 
12755
13224
  if (*call->message_loc.start == '_' || parser->encoding->alnum_char(call->message_loc.start, call->message_loc.end - call->message_loc.start)) {
13225
+ if (multiple && PM_NODE_FLAG_P(call, PM_CALL_NODE_FLAGS_SAFE_NAVIGATION)) {
13226
+ pm_parser_err_node(parser, (const pm_node_t *) call, PM_ERR_UNEXPECTED_SAFE_NAVIGATION);
13227
+ }
13228
+
12756
13229
  parse_write_name(parser, &call->name);
12757
13230
  return (pm_node_t *) pm_call_target_node_create(parser, call);
12758
13231
  }
@@ -12780,8 +13253,8 @@ parse_target(pm_parser_t *parser, pm_node_t *target) {
12780
13253
  * assignment.
12781
13254
  */
12782
13255
  static pm_node_t *
12783
- parse_target_validate(pm_parser_t *parser, pm_node_t *target) {
12784
- pm_node_t *result = parse_target(parser, target);
13256
+ parse_target_validate(pm_parser_t *parser, pm_node_t *target, bool multiple) {
13257
+ pm_node_t *result = parse_target(parser, target, multiple);
12785
13258
 
12786
13259
  // Ensure that we have one of an =, an 'in' in for indexes, and a ')' in parens after the targets.
12787
13260
  if (
@@ -12826,13 +13299,20 @@ parse_write(pm_parser_t *parser, pm_node_t *target, pm_token_t *operator, pm_nod
12826
13299
  }
12827
13300
  case PM_CONSTANT_PATH_NODE: {
12828
13301
  pm_node_t *node = (pm_node_t *) pm_constant_path_write_node_create(parser, (pm_constant_path_node_t *) target, operator, value);
13302
+
13303
+ if (context_def_p(parser)) {
13304
+ pm_parser_err_node(parser, node, PM_ERR_WRITE_TARGET_IN_METHOD);
13305
+ }
13306
+
12829
13307
  return parse_shareable_constant_write(parser, node);
12830
13308
  }
12831
13309
  case PM_CONSTANT_READ_NODE: {
12832
13310
  pm_node_t *node = (pm_node_t *) pm_constant_write_node_create(parser, (pm_constant_read_node_t *) target, operator, value);
13311
+
12833
13312
  if (context_def_p(parser)) {
12834
13313
  pm_parser_err_node(parser, node, PM_ERR_WRITE_TARGET_IN_METHOD);
12835
13314
  }
13315
+
12836
13316
  pm_node_destroy(parser, target);
12837
13317
  return parse_shareable_constant_write(parser, node);
12838
13318
  }
@@ -13011,7 +13491,7 @@ parse_targets(pm_parser_t *parser, pm_node_t *first_target, pm_binding_power_t b
13011
13491
  bool has_rest = PM_NODE_TYPE_P(first_target, PM_SPLAT_NODE);
13012
13492
 
13013
13493
  pm_multi_target_node_t *result = pm_multi_target_node_create(parser);
13014
- pm_multi_target_node_targets_append(parser, result, parse_target(parser, first_target));
13494
+ pm_multi_target_node_targets_append(parser, result, parse_target(parser, first_target, true));
13015
13495
 
13016
13496
  while (accept1(parser, PM_TOKEN_COMMA)) {
13017
13497
  if (accept1(parser, PM_TOKEN_USTAR)) {
@@ -13027,7 +13507,7 @@ parse_targets(pm_parser_t *parser, pm_node_t *first_target, pm_binding_power_t b
13027
13507
 
13028
13508
  if (token_begins_expression_p(parser->current.type)) {
13029
13509
  name = parse_expression(parser, binding_power, false, PM_ERR_EXPECT_EXPRESSION_AFTER_STAR);
13030
- name = parse_target(parser, name);
13510
+ name = parse_target(parser, name, true);
13031
13511
  }
13032
13512
 
13033
13513
  pm_node_t *splat = (pm_node_t *) pm_splat_node_create(parser, &star_operator, name);
@@ -13035,7 +13515,7 @@ parse_targets(pm_parser_t *parser, pm_node_t *first_target, pm_binding_power_t b
13035
13515
  has_rest = true;
13036
13516
  } else if (token_begins_expression_p(parser->current.type)) {
13037
13517
  pm_node_t *target = parse_expression(parser, binding_power, false, PM_ERR_EXPECT_EXPRESSION_AFTER_COMMA);
13038
- target = parse_target(parser, target);
13518
+ target = parse_target(parser, target, true);
13039
13519
 
13040
13520
  pm_multi_target_node_targets_append(parser, result, target);
13041
13521
  } else if (!match1(parser, PM_TOKEN_EOF)) {
@@ -13152,11 +13632,11 @@ parse_statements(pm_parser_t *parser, pm_context_t context) {
13152
13632
  */
13153
13633
  static void
13154
13634
  pm_hash_key_static_literals_add(pm_parser_t *parser, pm_static_literals_t *literals, pm_node_t *node) {
13155
- const pm_node_t *duplicated = pm_static_literals_add(parser, literals, node);
13635
+ const pm_node_t *duplicated = pm_static_literals_add(&parser->newline_list, parser->start_line, literals, node);
13156
13636
 
13157
13637
  if (duplicated != NULL) {
13158
13638
  pm_buffer_t buffer = { 0 };
13159
- pm_static_literal_inspect(&buffer, parser, duplicated);
13639
+ pm_static_literal_inspect(&buffer, &parser->newline_list, parser->start_line, parser->encoding->name, duplicated);
13160
13640
 
13161
13641
  pm_diagnostic_list_append_format(
13162
13642
  &parser->warning_list,
@@ -13178,7 +13658,7 @@ pm_hash_key_static_literals_add(pm_parser_t *parser, pm_static_literals_t *liter
13178
13658
  */
13179
13659
  static void
13180
13660
  pm_when_clause_static_literals_add(pm_parser_t *parser, pm_static_literals_t *literals, pm_node_t *node) {
13181
- if (pm_static_literals_add(parser, literals, node) != NULL) {
13661
+ if (pm_static_literals_add(&parser->newline_list, parser->start_line, literals, node) != NULL) {
13182
13662
  pm_diagnostic_list_append_format(
13183
13663
  &parser->warning_list,
13184
13664
  node->location.start,
@@ -13206,10 +13686,16 @@ parse_assocs(pm_parser_t *parser, pm_static_literals_t *literals, pm_node_t *nod
13206
13686
  pm_token_t operator = parser->previous;
13207
13687
  pm_node_t *value = NULL;
13208
13688
 
13209
- if (token_begins_expression_p(parser->current.type)) {
13689
+ if (match1(parser, PM_TOKEN_BRACE_LEFT)) {
13690
+ // If we're about to parse a nested hash that is being
13691
+ // pushed into this hash directly with **, then we want the
13692
+ // inner hash to share the static literals with the outer
13693
+ // hash.
13694
+ parser->current_hash_keys = literals;
13210
13695
  value = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, false, PM_ERR_EXPECT_EXPRESSION_AFTER_SPLAT_HASH);
13211
- }
13212
- else {
13696
+ } else if (token_begins_expression_p(parser->current.type)) {
13697
+ value = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, false, PM_ERR_EXPECT_EXPRESSION_AFTER_SPLAT_HASH);
13698
+ } else {
13213
13699
  pm_parser_scope_forwarding_keywords_check(parser, &operator);
13214
13700
  }
13215
13701
 
@@ -13360,15 +13846,16 @@ parse_arguments(pm_parser_t *parser, pm_arguments_t *arguments, bool accepts_for
13360
13846
  pm_keyword_hash_node_t *hash = pm_keyword_hash_node_create(parser);
13361
13847
  argument = (pm_node_t *) hash;
13362
13848
 
13363
- pm_static_literals_t literals = { 0 };
13364
- bool contains_keyword_splat = parse_assocs(parser, &literals, (pm_node_t *) hash);
13849
+ pm_static_literals_t hash_keys = { 0 };
13850
+ bool contains_keyword_splat = parse_assocs(parser, &hash_keys, (pm_node_t *) hash);
13365
13851
 
13366
13852
  parse_arguments_append(parser, arguments, argument);
13367
- if (contains_keyword_splat) {
13368
- pm_node_flag_set((pm_node_t *)arguments->arguments, PM_ARGUMENTS_NODE_FLAGS_CONTAINS_KEYWORD_SPLAT);
13369
- }
13370
13853
 
13371
- pm_static_literals_free(&literals);
13854
+ pm_node_flags_t flags = PM_ARGUMENTS_NODE_FLAGS_CONTAINS_KEYWORDS;
13855
+ if (contains_keyword_splat) flags |= PM_ARGUMENTS_NODE_FLAGS_CONTAINS_KEYWORD_SPLAT;
13856
+ pm_node_flag_set((pm_node_t *) arguments->arguments, flags);
13857
+
13858
+ pm_static_literals_free(&hash_keys);
13372
13859
  parsed_bare_hash = true;
13373
13860
 
13374
13861
  break;
@@ -13444,7 +13931,9 @@ parse_arguments(pm_parser_t *parser, pm_arguments_t *arguments, bool accepts_for
13444
13931
  argument = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, !parsed_first_argument, PM_ERR_EXPECT_ARGUMENT);
13445
13932
  }
13446
13933
 
13934
+ bool contains_keywords = false;
13447
13935
  bool contains_keyword_splat = false;
13936
+
13448
13937
  if (pm_symbol_node_label_p(argument) || accept1(parser, PM_TOKEN_EQUAL_GREATER)) {
13449
13938
  if (parsed_bare_hash) {
13450
13939
  pm_parser_err_previous(parser, PM_ERR_ARGUMENT_BARE_HASH);
@@ -13458,10 +13947,11 @@ parse_arguments(pm_parser_t *parser, pm_arguments_t *arguments, bool accepts_for
13458
13947
  }
13459
13948
 
13460
13949
  pm_keyword_hash_node_t *bare_hash = pm_keyword_hash_node_create(parser);
13950
+ contains_keywords = true;
13461
13951
 
13462
13952
  // Create the set of static literals for this hash.
13463
- pm_static_literals_t literals = { 0 };
13464
- pm_hash_key_static_literals_add(parser, &literals, argument);
13953
+ pm_static_literals_t hash_keys = { 0 };
13954
+ pm_hash_key_static_literals_add(parser, &hash_keys, argument);
13465
13955
 
13466
13956
  // Finish parsing the one we are part way through.
13467
13957
  pm_node_t *value = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, false, PM_ERR_HASH_VALUE);
@@ -13475,10 +13965,10 @@ parse_arguments(pm_parser_t *parser, pm_arguments_t *arguments, bool accepts_for
13475
13965
  token_begins_expression_p(parser->current.type) ||
13476
13966
  match2(parser, PM_TOKEN_USTAR_STAR, PM_TOKEN_LABEL)
13477
13967
  )) {
13478
- contains_keyword_splat = parse_assocs(parser, &literals, (pm_node_t *) bare_hash);
13968
+ contains_keyword_splat = parse_assocs(parser, &hash_keys, (pm_node_t *) bare_hash);
13479
13969
  }
13480
13970
 
13481
- pm_static_literals_free(&literals);
13971
+ pm_static_literals_free(&hash_keys);
13482
13972
  parsed_bare_hash = true;
13483
13973
  } else if (accept1(parser, PM_TOKEN_KEYWORD_IN)) {
13484
13974
  // TODO: Could we solve this with binding powers instead?
@@ -13486,9 +13976,12 @@ parse_arguments(pm_parser_t *parser, pm_arguments_t *arguments, bool accepts_for
13486
13976
  }
13487
13977
 
13488
13978
  parse_arguments_append(parser, arguments, argument);
13489
- if (contains_keyword_splat) {
13490
- pm_node_flag_set((pm_node_t *)arguments->arguments, PM_ARGUMENTS_NODE_FLAGS_CONTAINS_KEYWORD_SPLAT);
13491
- }
13979
+
13980
+ pm_node_flags_t flags = 0;
13981
+ if (contains_keywords) flags |= PM_ARGUMENTS_NODE_FLAGS_CONTAINS_KEYWORDS;
13982
+ if (contains_keyword_splat) flags |= PM_ARGUMENTS_NODE_FLAGS_CONTAINS_KEYWORD_SPLAT;
13983
+ pm_node_flag_set((pm_node_t *) arguments->arguments, flags);
13984
+
13492
13985
  break;
13493
13986
  }
13494
13987
  }
@@ -13601,7 +14094,6 @@ typedef enum {
13601
14094
  PM_PARAMETERS_ORDER_OPTIONAL,
13602
14095
  PM_PARAMETERS_ORDER_NAMED,
13603
14096
  PM_PARAMETERS_ORDER_NONE,
13604
-
13605
14097
  } pm_parameters_order_t;
13606
14098
 
13607
14099
  /**
@@ -13626,31 +14118,37 @@ static pm_parameters_order_t parameters_ordering[PM_TOKEN_MAXIMUM] = {
13626
14118
  * Check if current parameter follows valid parameters ordering. If not it adds
13627
14119
  * an error to the list without stopping the parsing, otherwise sets the
13628
14120
  * parameters state to the one corresponding to the current parameter.
14121
+ *
14122
+ * It returns true if it was successful, and false otherwise.
13629
14123
  */
13630
- static void
14124
+ static bool
13631
14125
  update_parameter_state(pm_parser_t *parser, pm_token_t *token, pm_parameters_order_t *current) {
13632
14126
  pm_parameters_order_t state = parameters_ordering[token->type];
13633
- if (state == PM_PARAMETERS_NO_CHANGE) return;
14127
+ if (state == PM_PARAMETERS_NO_CHANGE) return true;
13634
14128
 
13635
14129
  // If we see another ordered argument after a optional argument
13636
14130
  // we only continue parsing ordered arguments until we stop seeing ordered arguments.
13637
14131
  if (*current == PM_PARAMETERS_ORDER_OPTIONAL && state == PM_PARAMETERS_ORDER_NAMED) {
13638
14132
  *current = PM_PARAMETERS_ORDER_AFTER_OPTIONAL;
13639
- return;
14133
+ return true;
13640
14134
  } else if (*current == PM_PARAMETERS_ORDER_AFTER_OPTIONAL && state == PM_PARAMETERS_ORDER_NAMED) {
13641
- return;
14135
+ return true;
13642
14136
  }
13643
14137
 
13644
14138
  if (token->type == PM_TOKEN_USTAR && *current == PM_PARAMETERS_ORDER_AFTER_OPTIONAL) {
13645
14139
  pm_parser_err_token(parser, token, PM_ERR_PARAMETER_STAR);
13646
- }
13647
-
13648
- if (*current == PM_PARAMETERS_ORDER_NOTHING_AFTER || state > *current) {
14140
+ return false;
14141
+ } else if (token->type == PM_TOKEN_UDOT_DOT_DOT && (*current >= PM_PARAMETERS_ORDER_KEYWORDS_REST && *current <= PM_PARAMETERS_ORDER_AFTER_OPTIONAL)) {
14142
+ pm_parser_err_token(parser, token, *current == PM_PARAMETERS_ORDER_AFTER_OPTIONAL ? PM_ERR_PARAMETER_FORWARDING_AFTER_REST : PM_ERR_PARAMETER_ORDER);
14143
+ return false;
14144
+ } else if (*current == PM_PARAMETERS_ORDER_NOTHING_AFTER || state > *current) {
13649
14145
  // We know what transition we failed on, so we can provide a better error here.
13650
14146
  pm_parser_err_token(parser, token, PM_ERR_PARAMETER_ORDER);
13651
- } else if (state < *current) {
13652
- *current = state;
14147
+ return false;
13653
14148
  }
14149
+
14150
+ if (state < *current) *current = state;
14151
+ return true;
13654
14152
  }
13655
14153
 
13656
14154
  /**
@@ -13719,27 +14217,22 @@ parse_parameters(
13719
14217
  pm_parser_err_current(parser, PM_ERR_ARGUMENT_NO_FORWARDING_ELLIPSES);
13720
14218
  }
13721
14219
 
13722
- if (order > PM_PARAMETERS_ORDER_NOTHING_AFTER) {
13723
- update_parameter_state(parser, &parser->current, &order);
13724
- parser_lex(parser);
14220
+ bool succeeded = update_parameter_state(parser, &parser->current, &order);
14221
+ parser_lex(parser);
13725
14222
 
13726
- parser->current_scope->parameters |= PM_SCOPE_PARAMETERS_FORWARDING_ALL;
14223
+ parser->current_scope->parameters |= PM_SCOPE_PARAMETERS_FORWARDING_ALL;
14224
+ pm_forwarding_parameter_node_t *param = pm_forwarding_parameter_node_create(parser, &parser->previous);
13727
14225
 
13728
- pm_forwarding_parameter_node_t *param = pm_forwarding_parameter_node_create(parser, &parser->previous);
13729
- if (params->keyword_rest != NULL) {
13730
- // If we already have a keyword rest parameter, then we replace it with the
13731
- // forwarding parameter and move the keyword rest parameter to the posts list.
13732
- pm_node_t *keyword_rest = params->keyword_rest;
13733
- pm_parameters_node_posts_append(params, keyword_rest);
13734
- pm_parser_err_previous(parser, PM_ERR_PARAMETER_UNEXPECTED_FWD);
13735
- params->keyword_rest = NULL;
13736
- }
13737
- pm_parameters_node_keyword_rest_set(params, (pm_node_t *)param);
13738
- } else {
13739
- update_parameter_state(parser, &parser->current, &order);
13740
- parser_lex(parser);
14226
+ if (params->keyword_rest != NULL) {
14227
+ // If we already have a keyword rest parameter, then we replace it with the
14228
+ // forwarding parameter and move the keyword rest parameter to the posts list.
14229
+ pm_node_t *keyword_rest = params->keyword_rest;
14230
+ pm_parameters_node_posts_append(params, keyword_rest);
14231
+ if (succeeded) pm_parser_err_previous(parser, PM_ERR_PARAMETER_UNEXPECTED_FWD);
14232
+ params->keyword_rest = NULL;
13741
14233
  }
13742
14234
 
14235
+ pm_parameters_node_keyword_rest_set(params, (pm_node_t *) param);
13743
14236
  break;
13744
14237
  }
13745
14238
  case PM_TOKEN_CLASS_VARIABLE:
@@ -13834,6 +14327,12 @@ parse_parameters(
13834
14327
  pm_token_t local = name;
13835
14328
  local.end -= 1;
13836
14329
 
14330
+ if (parser->encoding_changed ? parser->encoding->isupper_char(local.start, local.end - local.start) : pm_encoding_utf_8_isupper_char(local.start, local.end - local.start)) {
14331
+ pm_parser_err(parser, local.start, local.end, PM_ERR_ARGUMENT_FORMAL_CONSTANT);
14332
+ } else if (local.end[-1] == '!' || local.end[-1] == '?') {
14333
+ PM_PARSER_ERR_TOKEN_FORMAT_CONTENT(parser, local, PM_ERR_INVALID_LOCAL_VARIABLE_WRITE);
14334
+ }
14335
+
13837
14336
  bool repeated = pm_parser_parameter_name_check(parser, &local);
13838
14337
  pm_parser_local_add_token(parser, &local, 1);
13839
14338
 
@@ -13909,6 +14408,7 @@ parse_parameters(
13909
14408
  pm_token_t operator = parser->previous;
13910
14409
  pm_token_t name;
13911
14410
  bool repeated = false;
14411
+
13912
14412
  if (accept1(parser, PM_TOKEN_IDENTIFIER)) {
13913
14413
  name = parser->previous;
13914
14414
  repeated = pm_parser_parameter_name_check(parser, &name);
@@ -13922,6 +14422,7 @@ parse_parameters(
13922
14422
  if (repeated) {
13923
14423
  pm_node_flag_set_repeated_parameter(param);
13924
14424
  }
14425
+
13925
14426
  if (params->rest == NULL) {
13926
14427
  pm_parameters_node_rest_set(params, param);
13927
14428
  } else {
@@ -13933,6 +14434,7 @@ parse_parameters(
13933
14434
  }
13934
14435
  case PM_TOKEN_STAR_STAR:
13935
14436
  case PM_TOKEN_USTAR_STAR: {
14437
+ pm_parameters_order_t previous_order = order;
13936
14438
  update_parameter_state(parser, &parser->current, &order);
13937
14439
  parser_lex(parser);
13938
14440
 
@@ -13940,6 +14442,10 @@ parse_parameters(
13940
14442
  pm_node_t *param;
13941
14443
 
13942
14444
  if (accept1(parser, PM_TOKEN_KEYWORD_NIL)) {
14445
+ if (previous_order <= PM_PARAMETERS_ORDER_KEYWORDS) {
14446
+ pm_parser_err_previous(parser, PM_ERR_PARAMETER_UNEXPECTED_NO_KW);
14447
+ }
14448
+
13943
14449
  param = (pm_node_t *) pm_no_keywords_parameter_node_create(parser, &operator, &parser->previous);
13944
14450
  } else {
13945
14451
  pm_token_t name;
@@ -14037,7 +14543,7 @@ parse_rescues(pm_parser_t *parser, pm_begin_node_t *parent_node, pm_rescues_type
14037
14543
  pm_rescue_node_operator_set(rescue, &parser->previous);
14038
14544
 
14039
14545
  pm_node_t *reference = parse_expression(parser, PM_BINDING_POWER_INDEX, false, PM_ERR_RESCUE_VARIABLE);
14040
- reference = parse_target(parser, reference);
14546
+ reference = parse_target(parser, reference, false);
14041
14547
 
14042
14548
  pm_rescue_node_reference_set(rescue, reference);
14043
14549
  break;
@@ -14067,7 +14573,7 @@ parse_rescues(pm_parser_t *parser, pm_begin_node_t *parent_node, pm_rescues_type
14067
14573
  pm_rescue_node_operator_set(rescue, &parser->previous);
14068
14574
 
14069
14575
  pm_node_t *reference = parse_expression(parser, PM_BINDING_POWER_INDEX, false, PM_ERR_RESCUE_VARIABLE);
14070
- reference = parse_target(parser, reference);
14576
+ reference = parse_target(parser, reference, false);
14071
14577
 
14072
14578
  pm_rescue_node_reference_set(rescue, reference);
14073
14579
  break;
@@ -14391,7 +14897,7 @@ parse_arguments_list(pm_parser_t *parser, pm_arguments_t *arguments, bool accept
14391
14897
  arguments->closing_loc = PM_LOCATION_TOKEN_VALUE(&parser->previous);
14392
14898
  } else {
14393
14899
  pm_accepts_block_stack_push(parser, true);
14394
- parse_arguments(parser, arguments, true, PM_TOKEN_PARENTHESIS_RIGHT);
14900
+ parse_arguments(parser, arguments, accepts_block, PM_TOKEN_PARENTHESIS_RIGHT);
14395
14901
 
14396
14902
  if (!accept1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) {
14397
14903
  PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_ARGUMENT_TERM_PAREN, pm_token_type_human(parser->current.type));
@@ -14409,7 +14915,7 @@ parse_arguments_list(pm_parser_t *parser, pm_arguments_t *arguments, bool accept
14409
14915
  // If we get here, then the subsequent token cannot be used as an infix
14410
14916
  // operator. In this case we assume the subsequent token is part of an
14411
14917
  // argument to this method call.
14412
- parse_arguments(parser, arguments, true, PM_TOKEN_EOF);
14918
+ parse_arguments(parser, arguments, accepts_block, PM_TOKEN_EOF);
14413
14919
 
14414
14920
  // If we have done with the arguments and still not consumed the comma,
14415
14921
  // then we have a trailing comma where we need to check whether it is
@@ -14440,11 +14946,8 @@ parse_arguments_list(pm_parser_t *parser, pm_arguments_t *arguments, bool accept
14440
14946
  if (arguments->block == NULL && !arguments->has_forwarding) {
14441
14947
  arguments->block = (pm_node_t *) block;
14442
14948
  } else {
14443
- if (arguments->has_forwarding) {
14444
- pm_parser_err_node(parser, (pm_node_t *) block, PM_ERR_ARGUMENT_BLOCK_FORWARDING);
14445
- } else {
14446
- pm_parser_err_node(parser, (pm_node_t *) block, PM_ERR_ARGUMENT_BLOCK_MULTI);
14447
- }
14949
+ pm_parser_err_node(parser, (pm_node_t *) block, PM_ERR_ARGUMENT_BLOCK_MULTI);
14950
+
14448
14951
  if (arguments->block != NULL) {
14449
14952
  if (arguments->arguments == NULL) {
14450
14953
  arguments->arguments = pm_arguments_node_create(parser);
@@ -15036,7 +15539,7 @@ parse_symbol(pm_parser_t *parser, pm_lex_mode_t *lex_mode, pm_lex_state_t next_s
15036
15539
  pm_symbol_node_t *symbol = pm_symbol_node_create(parser, &opening, &parser->previous, &closing);
15037
15540
 
15038
15541
  pm_string_shared_init(&symbol->unescaped, parser->previous.start, parser->previous.end);
15039
- pm_node_flag_set((pm_node_t *) symbol, parse_symbol_encoding(parser, &symbol->unescaped));
15542
+ pm_node_flag_set((pm_node_t *) symbol, parse_symbol_encoding(parser, &parser->previous, &symbol->unescaped, false));
15040
15543
 
15041
15544
  return (pm_node_t *) symbol;
15042
15545
  }
@@ -15136,7 +15639,7 @@ parse_symbol(pm_parser_t *parser, pm_lex_mode_t *lex_mode, pm_lex_state_t next_s
15136
15639
  expect1(parser, PM_TOKEN_STRING_END, PM_ERR_SYMBOL_TERM_DYNAMIC);
15137
15640
  }
15138
15641
 
15139
- return (pm_node_t *) pm_symbol_node_create_unescaped(parser, &opening, &content, &parser->previous, &unescaped, parse_symbol_encoding(parser, &unescaped));
15642
+ return (pm_node_t *) pm_symbol_node_create_unescaped(parser, &opening, &content, &parser->previous, &unescaped, parse_symbol_encoding(parser, &content, &unescaped, false));
15140
15643
  }
15141
15644
 
15142
15645
  /**
@@ -15161,7 +15664,7 @@ parse_undef_argument(pm_parser_t *parser) {
15161
15664
  pm_symbol_node_t *symbol = pm_symbol_node_create(parser, &opening, &parser->previous, &closing);
15162
15665
 
15163
15666
  pm_string_shared_init(&symbol->unescaped, parser->previous.start, parser->previous.end);
15164
- pm_node_flag_set((pm_node_t *) symbol, parse_symbol_encoding(parser, &symbol->unescaped));
15667
+ pm_node_flag_set((pm_node_t *) symbol, parse_symbol_encoding(parser, &parser->previous, &symbol->unescaped, false));
15165
15668
 
15166
15669
  return (pm_node_t *) symbol;
15167
15670
  }
@@ -15202,7 +15705,7 @@ parse_alias_argument(pm_parser_t *parser, bool first) {
15202
15705
  pm_symbol_node_t *symbol = pm_symbol_node_create(parser, &opening, &parser->previous, &closing);
15203
15706
 
15204
15707
  pm_string_shared_init(&symbol->unescaped, parser->previous.start, parser->previous.end);
15205
- pm_node_flag_set((pm_node_t *) symbol, parse_symbol_encoding(parser, &symbol->unescaped));
15708
+ pm_node_flag_set((pm_node_t *) symbol, parse_symbol_encoding(parser, &parser->previous, &symbol->unescaped, false));
15206
15709
 
15207
15710
  return (pm_node_t *) symbol;
15208
15711
  }
@@ -15429,8 +15932,12 @@ parse_heredoc_dedent(pm_parser_t *parser, pm_node_list_t *nodes, size_t common_w
15429
15932
  nodes->size = write_index;
15430
15933
  }
15431
15934
 
15935
+ #define PM_PARSE_PATTERN_SINGLE 0
15936
+ #define PM_PARSE_PATTERN_TOP 1
15937
+ #define PM_PARSE_PATTERN_MULTI 2
15938
+
15432
15939
  static pm_node_t *
15433
- parse_pattern(pm_parser_t *parser, pm_constant_id_list_t *captures, bool top_pattern, pm_diagnostic_id_t diag_id);
15940
+ parse_pattern(pm_parser_t *parser, pm_constant_id_list_t *captures, uint8_t flags, pm_diagnostic_id_t diag_id);
15434
15941
 
15435
15942
  /**
15436
15943
  * Add the newly created local to the list of captures for this pattern matching
@@ -15459,9 +15966,7 @@ parse_pattern_constant_path(pm_parser_t *parser, pm_constant_id_list_t *captures
15459
15966
  while (accept1(parser, PM_TOKEN_COLON_COLON)) {
15460
15967
  pm_token_t delimiter = parser->previous;
15461
15968
  expect1(parser, PM_TOKEN_CONSTANT, PM_ERR_CONSTANT_PATH_COLON_COLON_CONSTANT);
15462
-
15463
- pm_node_t *child = (pm_node_t *) pm_constant_read_node_create(parser, &parser->previous);
15464
- node = (pm_node_t *) pm_constant_path_node_create(parser, node, &delimiter, child);
15969
+ node = (pm_node_t *) pm_constant_path_node_create(parser, node, &delimiter, &parser->previous);
15465
15970
  }
15466
15971
 
15467
15972
  // If there is a [ or ( that follows, then this is part of a larger pattern
@@ -15480,7 +15985,7 @@ parse_pattern_constant_path(pm_parser_t *parser, pm_constant_id_list_t *captures
15480
15985
  accept1(parser, PM_TOKEN_NEWLINE);
15481
15986
 
15482
15987
  if (!accept1(parser, PM_TOKEN_BRACKET_RIGHT)) {
15483
- inner = parse_pattern(parser, captures, true, PM_ERR_PATTERN_EXPRESSION_AFTER_BRACKET);
15988
+ inner = parse_pattern(parser, captures, PM_PARSE_PATTERN_TOP | PM_PARSE_PATTERN_MULTI, PM_ERR_PATTERN_EXPRESSION_AFTER_BRACKET);
15484
15989
  accept1(parser, PM_TOKEN_NEWLINE);
15485
15990
  expect1(parser, PM_TOKEN_BRACKET_RIGHT, PM_ERR_PATTERN_TERM_BRACKET);
15486
15991
  }
@@ -15492,7 +15997,7 @@ parse_pattern_constant_path(pm_parser_t *parser, pm_constant_id_list_t *captures
15492
15997
  accept1(parser, PM_TOKEN_NEWLINE);
15493
15998
 
15494
15999
  if (!accept1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) {
15495
- inner = parse_pattern(parser, captures, true, PM_ERR_PATTERN_EXPRESSION_AFTER_PAREN);
16000
+ inner = parse_pattern(parser, captures, PM_PARSE_PATTERN_TOP | PM_PARSE_PATTERN_MULTI, PM_ERR_PATTERN_EXPRESSION_AFTER_PAREN);
15496
16001
  accept1(parser, PM_TOKEN_NEWLINE);
15497
16002
  expect1(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_ERR_PATTERN_TERM_PAREN);
15498
16003
  }
@@ -15640,6 +16145,33 @@ parse_pattern_keyword_rest(pm_parser_t *parser, pm_constant_id_list_t *captures)
15640
16145
  return (pm_node_t *) pm_assoc_splat_node_create(parser, value, &operator);
15641
16146
  }
15642
16147
 
16148
+ /**
16149
+ * Check that the slice of the source given by the bounds parameters constitutes
16150
+ * a valid local variable name.
16151
+ */
16152
+ static bool
16153
+ pm_slice_is_valid_local(const pm_parser_t *parser, const uint8_t *start, const uint8_t *end) {
16154
+ ptrdiff_t length = end - start;
16155
+ if (length == 0) return false;
16156
+
16157
+ // First ensure that it starts with a valid identifier starting character.
16158
+ size_t width = char_is_identifier_start(parser, start);
16159
+ if (width == 0) return false;
16160
+
16161
+ // Next, ensure that it's not an uppercase character.
16162
+ if (parser->encoding_changed) {
16163
+ if (parser->encoding->isupper_char(start, length)) return false;
16164
+ } else {
16165
+ if (pm_encoding_utf_8_isupper_char(start, length)) return false;
16166
+ }
16167
+
16168
+ // Next, iterate through all of the bytes of the string to ensure that they
16169
+ // are all valid identifier characters.
16170
+ const uint8_t *cursor = start + width;
16171
+ while ((cursor < end) && (width = char_is_identifier(parser, cursor))) cursor += width;
16172
+ return cursor == end;
16173
+ }
16174
+
15643
16175
  /**
15644
16176
  * Create an implicit node for the value of a hash pattern that has omitted the
15645
16177
  * value. This will use an implicit local variable target.
@@ -15647,14 +16179,18 @@ parse_pattern_keyword_rest(pm_parser_t *parser, pm_constant_id_list_t *captures)
15647
16179
  static pm_node_t *
15648
16180
  parse_pattern_hash_implicit_value(pm_parser_t *parser, pm_constant_id_list_t *captures, pm_symbol_node_t *key) {
15649
16181
  const pm_location_t *value_loc = &((pm_symbol_node_t *) key)->value_loc;
15650
- pm_constant_id_t constant_id = pm_parser_constant_id_location(parser, value_loc->start, value_loc->end);
15651
16182
 
16183
+ pm_constant_id_t constant_id = pm_parser_constant_id_location(parser, value_loc->start, value_loc->end);
15652
16184
  int depth = -1;
15653
- if (value_loc->end[-1] == '!' || value_loc->end[-1] == '?') {
15654
- pm_parser_err(parser, key->base.location.start, key->base.location.end, PM_ERR_PATTERN_HASH_KEY_LOCALS);
15655
- PM_PARSER_ERR_LOCATION_FORMAT(parser, value_loc, PM_ERR_INVALID_LOCAL_VARIABLE_WRITE, (int) (value_loc->end - value_loc->start), (const char *) value_loc->start);
15656
- } else {
16185
+
16186
+ if (pm_slice_is_valid_local(parser, value_loc->start, value_loc->end)) {
15657
16187
  depth = pm_parser_local_depth_constant_id(parser, constant_id);
16188
+ } else {
16189
+ pm_parser_err(parser, key->base.location.start, key->base.location.end, PM_ERR_PATTERN_HASH_KEY_LOCALS);
16190
+
16191
+ if ((value_loc->end > value_loc->start) && ((value_loc->end[-1] == '!') || (value_loc->end[-1] == '?'))) {
16192
+ PM_PARSER_ERR_LOCATION_FORMAT(parser, value_loc, PM_ERR_INVALID_LOCAL_VARIABLE_WRITE, (int) (value_loc->end - value_loc->start), (const char *) value_loc->start);
16193
+ }
15658
16194
  }
15659
16195
 
15660
16196
  if (depth == -1) {
@@ -15678,7 +16214,7 @@ parse_pattern_hash_implicit_value(pm_parser_t *parser, pm_constant_id_list_t *ca
15678
16214
  */
15679
16215
  static void
15680
16216
  parse_pattern_hash_key(pm_parser_t *parser, pm_static_literals_t *keys, pm_node_t *node) {
15681
- if (pm_static_literals_add(parser, keys, node) != NULL) {
16217
+ if (pm_static_literals_add(&parser->newline_list, parser->start_line, keys, node) != NULL) {
15682
16218
  pm_parser_err_node(parser, node, PM_ERR_PATTERN_HASH_KEY_DUPLICATE);
15683
16219
  }
15684
16220
  }
@@ -15709,7 +16245,7 @@ parse_pattern_hash(pm_parser_t *parser, pm_constant_id_list_t *captures, pm_node
15709
16245
  } else {
15710
16246
  // Here we have a value for the first assoc in the list, so
15711
16247
  // we will parse it now.
15712
- value = parse_pattern(parser, captures, false, PM_ERR_PATTERN_EXPRESSION_AFTER_KEY);
16248
+ value = parse_pattern(parser, captures, PM_PARSE_PATTERN_SINGLE, PM_ERR_PATTERN_EXPRESSION_AFTER_KEY);
15713
16249
  }
15714
16250
 
15715
16251
  pm_token_t operator = not_provided(parser);
@@ -15724,7 +16260,8 @@ parse_pattern_hash(pm_parser_t *parser, pm_constant_id_list_t *captures, pm_node
15724
16260
  // If we get anything else, then this is an error. For this we'll
15725
16261
  // create a missing node for the value and create an assoc node for
15726
16262
  // the first node in the list.
15727
- pm_parser_err_node(parser, first_node, PM_ERR_PATTERN_HASH_KEY_LABEL);
16263
+ pm_diagnostic_id_t diag_id = PM_NODE_TYPE_P(first_node, PM_INTERPOLATED_SYMBOL_NODE) ? PM_ERR_PATTERN_HASH_KEY_INTERPOLATED : PM_ERR_PATTERN_HASH_KEY_LABEL;
16264
+ pm_parser_err_node(parser, first_node, diag_id);
15728
16265
 
15729
16266
  pm_token_t operator = not_provided(parser);
15730
16267
  pm_node_t *value = (pm_node_t *) pm_missing_node_create(parser, first_node->location.start, first_node->location.end);
@@ -15761,7 +16298,7 @@ parse_pattern_hash(pm_parser_t *parser, pm_constant_id_list_t *captures, pm_node
15761
16298
  if (match7(parser, PM_TOKEN_COMMA, PM_TOKEN_KEYWORD_THEN, PM_TOKEN_BRACE_RIGHT, PM_TOKEN_BRACKET_RIGHT, PM_TOKEN_PARENTHESIS_RIGHT, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON)) {
15762
16299
  value = parse_pattern_hash_implicit_value(parser, captures, (pm_symbol_node_t *) key);
15763
16300
  } else {
15764
- value = parse_pattern(parser, captures, false, PM_ERR_PATTERN_EXPRESSION_AFTER_KEY);
16301
+ value = parse_pattern(parser, captures, PM_PARSE_PATTERN_SINGLE, PM_ERR_PATTERN_EXPRESSION_AFTER_KEY);
15765
16302
  }
15766
16303
 
15767
16304
  pm_token_t operator = not_provided(parser);
@@ -15818,7 +16355,7 @@ parse_pattern_primitive(pm_parser_t *parser, pm_constant_id_list_t *captures, pm
15818
16355
 
15819
16356
  // Otherwise, we'll parse the inner pattern, then deal with it depending
15820
16357
  // on the type it returns.
15821
- pm_node_t *inner = parse_pattern(parser, captures, true, PM_ERR_PATTERN_EXPRESSION_AFTER_BRACKET);
16358
+ pm_node_t *inner = parse_pattern(parser, captures, PM_PARSE_PATTERN_MULTI, PM_ERR_PATTERN_EXPRESSION_AFTER_BRACKET);
15822
16359
 
15823
16360
  accept1(parser, PM_TOKEN_NEWLINE);
15824
16361
  expect1(parser, PM_TOKEN_BRACKET_RIGHT, PM_ERR_PATTERN_TERM_BRACKET);
@@ -15885,11 +16422,11 @@ parse_pattern_primitive(pm_parser_t *parser, pm_constant_id_list_t *captures, pm
15885
16422
  first_node = parse_pattern_keyword_rest(parser, captures);
15886
16423
  break;
15887
16424
  case PM_TOKEN_STRING_BEGIN:
15888
- first_node = parse_expression(parser, PM_BINDING_POWER_MAX, false, PM_ERR_PATTERN_HASH_KEY);
16425
+ first_node = parse_expression(parser, PM_BINDING_POWER_MAX, false, PM_ERR_PATTERN_HASH_KEY_LABEL);
15889
16426
  break;
15890
16427
  default: {
16428
+ PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_PATTERN_HASH_KEY, pm_token_type_human(parser->current.type));
15891
16429
  parser_lex(parser);
15892
- pm_parser_err_previous(parser, PM_ERR_PATTERN_HASH_KEY);
15893
16430
 
15894
16431
  first_node = (pm_node_t *) pm_missing_node_create(parser, parser->previous.start, parser->previous.end);
15895
16432
  break;
@@ -15966,7 +16503,7 @@ parse_pattern_primitive(pm_parser_t *parser, pm_constant_id_list_t *captures, pm
15966
16503
 
15967
16504
  if (variable == NULL) {
15968
16505
  if (
15969
- (parser->version != PM_OPTIONS_VERSION_CRUBY_3_3_0) &&
16506
+ (parser->version != PM_OPTIONS_VERSION_CRUBY_3_3) &&
15970
16507
  !parser->current_scope->closed &&
15971
16508
  (parser->current_scope->numbered_parameters != PM_SCOPE_NUMBERED_PARAMETERS_DISALLOWED) &&
15972
16509
  pm_token_is_it(parser->previous.start, parser->previous.end)
@@ -16040,8 +16577,7 @@ parse_pattern_primitive(pm_parser_t *parser, pm_constant_id_list_t *captures, pm
16040
16577
  parser_lex(parser);
16041
16578
 
16042
16579
  expect1(parser, PM_TOKEN_CONSTANT, PM_ERR_CONSTANT_PATH_COLON_COLON_CONSTANT);
16043
- pm_node_t *child = (pm_node_t *) pm_constant_read_node_create(parser, &parser->previous);
16044
- pm_constant_path_node_t *node = pm_constant_path_node_create(parser, NULL, &delimiter, child);
16580
+ pm_constant_path_node_t *node = pm_constant_path_node_create(parser, NULL, &delimiter, &parser->previous);
16045
16581
 
16046
16582
  return parse_pattern_constant_path(parser, captures, (pm_node_t *) node);
16047
16583
  }
@@ -16092,7 +16628,7 @@ parse_pattern_primitives(pm_parser_t *parser, pm_constant_id_list_t *captures, p
16092
16628
  pm_token_t opening = parser->current;
16093
16629
  parser_lex(parser);
16094
16630
 
16095
- pm_node_t *body = parse_pattern(parser, captures, false, PM_ERR_PATTERN_EXPRESSION_AFTER_PAREN);
16631
+ pm_node_t *body = parse_pattern(parser, captures, PM_PARSE_PATTERN_SINGLE, PM_ERR_PATTERN_EXPRESSION_AFTER_PAREN);
16096
16632
  accept1(parser, PM_TOKEN_NEWLINE);
16097
16633
  expect1(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_ERR_PATTERN_TERM_PAREN);
16098
16634
  pm_node_t *right = (pm_node_t *) pm_parentheses_node_create(parser, &opening, body, &parser->previous);
@@ -16151,7 +16687,7 @@ parse_pattern_primitives(pm_parser_t *parser, pm_constant_id_list_t *captures, p
16151
16687
  * Parse a pattern matching expression.
16152
16688
  */
16153
16689
  static pm_node_t *
16154
- parse_pattern(pm_parser_t *parser, pm_constant_id_list_t *captures, bool top_pattern, pm_diagnostic_id_t diag_id) {
16690
+ parse_pattern(pm_parser_t *parser, pm_constant_id_list_t *captures, uint8_t flags, pm_diagnostic_id_t diag_id) {
16155
16691
  pm_node_t *node = NULL;
16156
16692
 
16157
16693
  bool leading_rest = false;
@@ -16161,14 +16697,26 @@ parse_pattern(pm_parser_t *parser, pm_constant_id_list_t *captures, bool top_pat
16161
16697
  case PM_TOKEN_LABEL: {
16162
16698
  parser_lex(parser);
16163
16699
  pm_node_t *key = (pm_node_t *) pm_symbol_node_label_create(parser, &parser->previous);
16164
- return (pm_node_t *) parse_pattern_hash(parser, captures, key);
16700
+ node = (pm_node_t *) parse_pattern_hash(parser, captures, key);
16701
+
16702
+ if (!(flags & PM_PARSE_PATTERN_TOP)) {
16703
+ pm_parser_err_node(parser, node, PM_ERR_PATTERN_HASH_IMPLICIT);
16704
+ }
16705
+
16706
+ return node;
16165
16707
  }
16166
16708
  case PM_TOKEN_USTAR_STAR: {
16167
16709
  node = parse_pattern_keyword_rest(parser, captures);
16168
- return (pm_node_t *) parse_pattern_hash(parser, captures, node);
16710
+ node = (pm_node_t *) parse_pattern_hash(parser, captures, node);
16711
+
16712
+ if (!(flags & PM_PARSE_PATTERN_TOP)) {
16713
+ pm_parser_err_node(parser, node, PM_ERR_PATTERN_HASH_IMPLICIT);
16714
+ }
16715
+
16716
+ return node;
16169
16717
  }
16170
16718
  case PM_TOKEN_USTAR: {
16171
- if (top_pattern) {
16719
+ if (flags & (PM_PARSE_PATTERN_TOP | PM_PARSE_PATTERN_MULTI)) {
16172
16720
  parser_lex(parser);
16173
16721
  node = (pm_node_t *) parse_pattern_rest(parser, captures);
16174
16722
  leading_rest = true;
@@ -16187,7 +16735,7 @@ parse_pattern(pm_parser_t *parser, pm_constant_id_list_t *captures, bool top_pat
16187
16735
  return (pm_node_t *) parse_pattern_hash(parser, captures, node);
16188
16736
  }
16189
16737
 
16190
- if (top_pattern && match1(parser, PM_TOKEN_COMMA)) {
16738
+ if ((flags & PM_PARSE_PATTERN_MULTI) && match1(parser, PM_TOKEN_COMMA)) {
16191
16739
  // If we have a comma, then we are now parsing either an array pattern or a
16192
16740
  // find pattern. We need to parse all of the patterns, put them into a big
16193
16741
  // list, and then determine which type of node we have.
@@ -16367,7 +16915,7 @@ parse_strings(pm_parser_t *parser, pm_node_t *current) {
16367
16915
 
16368
16916
  pm_node_list_free(&parts);
16369
16917
  } else if (accept1(parser, PM_TOKEN_LABEL_END) && !state_is_arg_labeled) {
16370
- node = (pm_node_t *) pm_symbol_node_create_unescaped(parser, &opening, &content, &parser->previous, &unescaped, parse_symbol_encoding(parser, &unescaped));
16918
+ node = (pm_node_t *) pm_symbol_node_create_unescaped(parser, &opening, &content, &parser->previous, &unescaped, parse_symbol_encoding(parser, &content, &unescaped, true));
16371
16919
  } else if (match1(parser, PM_TOKEN_EOF)) {
16372
16920
  pm_parser_err_token(parser, &opening, PM_ERR_STRING_LITERAL_EOF);
16373
16921
  node = (pm_node_t *) pm_string_node_create_unescaped(parser, &opening, &content, &parser->current, &unescaped);
@@ -16393,7 +16941,7 @@ parse_strings(pm_parser_t *parser, pm_node_t *current) {
16393
16941
  pm_node_flag_set(node, parse_unescaped_encoding(parser));
16394
16942
  expect1(parser, PM_TOKEN_STRING_END, PM_ERR_STRING_LITERAL_EOF);
16395
16943
  } else if (accept1(parser, PM_TOKEN_LABEL_END)) {
16396
- node = (pm_node_t *) pm_symbol_node_create_unescaped(parser, &opening, &content, &parser->previous, &unescaped, parse_symbol_encoding(parser, &unescaped));
16944
+ node = (pm_node_t *) pm_symbol_node_create_unescaped(parser, &opening, &content, &parser->previous, &unescaped, parse_symbol_encoding(parser, &content, &unescaped, true));
16397
16945
  } else {
16398
16946
  // If we get here, then we have interpolation so we'll need
16399
16947
  // to create a string or symbol node with interpolation.
@@ -16475,11 +17023,11 @@ parse_strings(pm_parser_t *parser, pm_node_t *current) {
16475
17023
  pm_token_t bounds = not_provided(parser);
16476
17024
 
16477
17025
  pm_interpolated_string_node_t *container = pm_interpolated_string_node_create(parser, &bounds, NULL, &bounds);
16478
- pm_interpolated_string_node_append(parser, container, current);
17026
+ pm_interpolated_string_node_append(container, current);
16479
17027
  current = (pm_node_t *) container;
16480
17028
  }
16481
17029
 
16482
- pm_interpolated_string_node_append(parser, (pm_interpolated_string_node_t *) current, node);
17030
+ pm_interpolated_string_node_append((pm_interpolated_string_node_t *) current, node);
16483
17031
  }
16484
17032
  }
16485
17033
 
@@ -16498,6 +17046,11 @@ pm_parser_err_prefix(pm_parser_t *parser, pm_diagnostic_id_t diag_id) {
16498
17046
  PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->previous, diag_id, pm_token_type_human(parser->previous.type));
16499
17047
  break;
16500
17048
  }
17049
+ case PM_ERR_HASH_VALUE:
17050
+ case PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR: {
17051
+ PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, diag_id, pm_token_type_human(parser->current.type));
17052
+ break;
17053
+ }
16501
17054
  case PM_ERR_UNARY_RECEIVER: {
16502
17055
  const char *human = (parser->current.type == PM_TOKEN_EOF ? "end-of-input" : pm_token_type_human(parser->current.type));
16503
17056
  PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->previous, diag_id, human, parser->previous.start[0]);
@@ -16724,13 +17277,13 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
16724
17277
  }
16725
17278
 
16726
17279
  element = (pm_node_t *) pm_keyword_hash_node_create(parser);
16727
- pm_static_literals_t literals = { 0 };
17280
+ pm_static_literals_t hash_keys = { 0 };
16728
17281
 
16729
17282
  if (!match8(parser, PM_TOKEN_EOF, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON, PM_TOKEN_EOF, PM_TOKEN_BRACE_RIGHT, PM_TOKEN_BRACKET_RIGHT, PM_TOKEN_KEYWORD_DO, PM_TOKEN_PARENTHESIS_RIGHT)) {
16730
- parse_assocs(parser, &literals, element);
17283
+ parse_assocs(parser, &hash_keys, element);
16731
17284
  }
16732
17285
 
16733
- pm_static_literals_free(&literals);
17286
+ pm_static_literals_free(&hash_keys);
16734
17287
  parsed_bare_hash = true;
16735
17288
  } else {
16736
17289
  element = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, false, PM_ERR_ARRAY_EXPRESSION);
@@ -16741,8 +17294,8 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
16741
17294
  }
16742
17295
 
16743
17296
  pm_keyword_hash_node_t *hash = pm_keyword_hash_node_create(parser);
16744
- pm_static_literals_t literals = { 0 };
16745
- pm_hash_key_static_literals_add(parser, &literals, element);
17297
+ pm_static_literals_t hash_keys = { 0 };
17298
+ pm_hash_key_static_literals_add(parser, &hash_keys, element);
16746
17299
 
16747
17300
  pm_token_t operator;
16748
17301
  if (parser->previous.type == PM_TOKEN_EQUAL_GREATER) {
@@ -16757,10 +17310,10 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
16757
17310
 
16758
17311
  element = (pm_node_t *) hash;
16759
17312
  if (accept1(parser, PM_TOKEN_COMMA) && !match1(parser, PM_TOKEN_BRACKET_RIGHT)) {
16760
- parse_assocs(parser, &literals, element);
17313
+ parse_assocs(parser, &hash_keys, element);
16761
17314
  }
16762
17315
 
16763
- pm_static_literals_free(&literals);
17316
+ pm_static_literals_free(&hash_keys);
16764
17317
  parsed_bare_hash = true;
16765
17318
  }
16766
17319
  }
@@ -16854,7 +17407,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
16854
17407
  return (pm_node_t *) multi_target;
16855
17408
  }
16856
17409
 
16857
- return parse_target_validate(parser, (pm_node_t *) multi_target);
17410
+ return parse_target_validate(parser, (pm_node_t *) multi_target, false);
16858
17411
  }
16859
17412
 
16860
17413
  // If we have a single statement and are ending on a right parenthesis
@@ -16920,14 +17473,30 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
16920
17473
  return (pm_node_t *) pm_parentheses_node_create(parser, &opening, (pm_node_t *) statements, &parser->previous);
16921
17474
  }
16922
17475
  case PM_TOKEN_BRACE_LEFT: {
17476
+ // If we were passed a current_hash_keys via the parser, then that
17477
+ // means we're already parsing a hash and we want to share the set
17478
+ // of hash keys with this inner hash we're about to parse for the
17479
+ // sake of warnings. We'll set it to NULL after we grab it to make
17480
+ // sure subsequent expressions don't use it. Effectively this is a
17481
+ // way of getting around passing it to every call to
17482
+ // parse_expression.
17483
+ pm_static_literals_t *current_hash_keys = parser->current_hash_keys;
17484
+ parser->current_hash_keys = NULL;
17485
+
16923
17486
  pm_accepts_block_stack_push(parser, true);
16924
17487
  parser_lex(parser);
16925
17488
 
16926
17489
  pm_hash_node_t *node = pm_hash_node_create(parser, &parser->previous);
16927
- pm_static_literals_t literals = { 0 };
16928
17490
 
16929
17491
  if (!match2(parser, PM_TOKEN_BRACE_RIGHT, PM_TOKEN_EOF)) {
16930
- parse_assocs(parser, &literals, (pm_node_t *) node);
17492
+ if (current_hash_keys != NULL) {
17493
+ parse_assocs(parser, current_hash_keys, (pm_node_t *) node);
17494
+ } else {
17495
+ pm_static_literals_t hash_keys = { 0 };
17496
+ parse_assocs(parser, &hash_keys, (pm_node_t *) node);
17497
+ pm_static_literals_free(&hash_keys);
17498
+ }
17499
+
16931
17500
  accept1(parser, PM_TOKEN_NEWLINE);
16932
17501
  }
16933
17502
 
@@ -16935,7 +17504,6 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
16935
17504
  expect1(parser, PM_TOKEN_BRACE_RIGHT, PM_ERR_HASH_TERM);
16936
17505
  pm_hash_node_closing_loc_set(node, &parser->previous);
16937
17506
 
16938
- pm_static_literals_free(&literals);
16939
17507
  return (pm_node_t *) node;
16940
17508
  }
16941
17509
  case PM_TOKEN_CHARACTER_LITERAL: {
@@ -17000,12 +17568,10 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
17000
17568
  }
17001
17569
  case PM_TOKEN_UCOLON_COLON: {
17002
17570
  parser_lex(parser);
17003
-
17004
17571
  pm_token_t delimiter = parser->previous;
17005
- expect1(parser, PM_TOKEN_CONSTANT, PM_ERR_CONSTANT_PATH_COLON_COLON_CONSTANT);
17006
17572
 
17007
- pm_node_t *constant = (pm_node_t *) pm_constant_read_node_create(parser, &parser->previous);
17008
- pm_node_t *node = (pm_node_t *)pm_constant_path_node_create(parser, NULL, &delimiter, constant);
17573
+ expect1(parser, PM_TOKEN_CONSTANT, PM_ERR_CONSTANT_PATH_COLON_COLON_CONSTANT);
17574
+ pm_node_t *node = (pm_node_t *) pm_constant_path_node_create(parser, NULL, &delimiter, &parser->previous);
17009
17575
 
17010
17576
  if ((binding_power == PM_BINDING_POWER_STATEMENT) && match1(parser, PM_TOKEN_COMMA)) {
17011
17577
  node = parse_targets_validate(parser, node, PM_BINDING_POWER_INDEX);
@@ -17165,8 +17731,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
17165
17731
  if (match2(parser, PM_TOKEN_HEREDOC_END, PM_TOKEN_EOF)) {
17166
17732
  // If we get here, then we have an empty heredoc. We'll create
17167
17733
  // an empty content token and return an empty string node.
17168
- lex_mode_pop(parser);
17169
- expect1(parser, PM_TOKEN_HEREDOC_END, PM_ERR_HEREDOC_TERM);
17734
+ expect1_heredoc_term(parser, lex_mode);
17170
17735
  pm_token_t content = parse_strings_empty_content(parser->previous.start);
17171
17736
 
17172
17737
  if (quote == PM_HEREDOC_QUOTE_BACKTICK) {
@@ -17207,8 +17772,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
17207
17772
  }
17208
17773
 
17209
17774
  node = (pm_node_t *) cast;
17210
- lex_mode_pop(parser);
17211
- expect1(parser, PM_TOKEN_HEREDOC_END, PM_ERR_HEREDOC_TERM);
17775
+ expect1_heredoc_term(parser, lex_mode);
17212
17776
  } else {
17213
17777
  // If we get here, then we have multiple parts in the heredoc,
17214
17778
  // so we'll need to create an interpolated string node to hold
@@ -17230,20 +17794,18 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
17230
17794
  pm_interpolated_x_string_node_t *cast = pm_interpolated_xstring_node_create(parser, &opening, &opening);
17231
17795
  cast->parts = parts;
17232
17796
 
17233
- lex_mode_pop(parser);
17234
- expect1(parser, PM_TOKEN_HEREDOC_END, PM_ERR_HEREDOC_TERM);
17235
-
17797
+ expect1_heredoc_term(parser, lex_mode);
17236
17798
  pm_interpolated_xstring_node_closing_set(cast, &parser->previous);
17799
+
17237
17800
  cast->base.location = cast->opening_loc;
17238
17801
  node = (pm_node_t *) cast;
17239
17802
  } else {
17240
17803
  pm_interpolated_string_node_t *cast = pm_interpolated_string_node_create(parser, &opening, &parts, &opening);
17241
17804
  pm_node_list_free(&parts);
17242
17805
 
17243
- lex_mode_pop(parser);
17244
- expect1(parser, PM_TOKEN_HEREDOC_END, PM_ERR_HEREDOC_TERM);
17245
-
17806
+ expect1_heredoc_term(parser, lex_mode);
17246
17807
  pm_interpolated_string_node_closing_set(cast, &parser->previous);
17808
+
17247
17809
  cast->base.location = cast->opening_loc;
17248
17810
  node = (pm_node_t *) cast;
17249
17811
  }
@@ -17464,7 +18026,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
17464
18026
  pm_token_t in_keyword = parser->previous;
17465
18027
 
17466
18028
  pm_constant_id_list_t captures = { 0 };
17467
- pm_node_t *pattern = parse_pattern(parser, &captures, true, PM_ERR_PATTERN_EXPRESSION_AFTER_IN);
18029
+ pm_node_t *pattern = parse_pattern(parser, &captures, PM_PARSE_PATTERN_TOP | PM_PARSE_PATTERN_MULTI, PM_ERR_PATTERN_EXPRESSION_AFTER_IN);
17468
18030
 
17469
18031
  parser->pattern_matching_newlines = previous_pattern_matching_newlines;
17470
18032
  pm_constant_id_list_free(&captures);
@@ -17493,7 +18055,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
17493
18055
  then_keyword = not_provided(parser);
17494
18056
  }
17495
18057
  } else {
17496
- expect1(parser, PM_TOKEN_KEYWORD_THEN, PM_ERR_EXPECT_WHEN_DELIMITER);
18058
+ expect1(parser, PM_TOKEN_KEYWORD_THEN, PM_ERR_EXPECT_IN_DELIMITER);
17497
18059
  then_keyword = parser->previous;
17498
18060
  }
17499
18061
 
@@ -17947,7 +18509,12 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
17947
18509
  lex_state_set(parser, PM_LEX_STATE_BEG);
17948
18510
  parser->command_start = true;
17949
18511
 
17950
- expect1(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_ERR_DEF_PARAMS_TERM_PAREN);
18512
+ if (!accept1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) {
18513
+ PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_DEF_PARAMS_TERM_PAREN, pm_token_type_human(parser->current.type));
18514
+ parser->previous.start = parser->previous.end;
18515
+ parser->previous.type = PM_TOKEN_MISSING;
18516
+ }
18517
+
17951
18518
  rparen = parser->previous;
17952
18519
  break;
17953
18520
  }
@@ -18145,7 +18712,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
18145
18712
  if (match1(parser, PM_TOKEN_COMMA)) {
18146
18713
  index = parse_targets(parser, index, PM_BINDING_POWER_INDEX);
18147
18714
  } else {
18148
- index = parse_target(parser, index);
18715
+ index = parse_target(parser, index, false);
18149
18716
  }
18150
18717
 
18151
18718
  context_pop(parser);
@@ -18267,9 +18834,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
18267
18834
  pm_token_t double_colon = parser->previous;
18268
18835
 
18269
18836
  expect1(parser, PM_TOKEN_CONSTANT, PM_ERR_CONSTANT_PATH_COLON_COLON_CONSTANT);
18270
- pm_node_t *constant = (pm_node_t *) pm_constant_read_node_create(parser, &parser->previous);
18271
-
18272
- constant_path = (pm_node_t *) pm_constant_path_node_create(parser, constant_path, &double_colon, constant);
18837
+ constant_path = (pm_node_t *) pm_constant_path_node_create(parser, constant_path, &double_colon, &parser->previous);
18273
18838
  }
18274
18839
 
18275
18840
  // Here we retrieve the name of the module. If it wasn't a constant,
@@ -18649,15 +19214,15 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
18649
19214
  // If we hit string content and the current node is
18650
19215
  // an interpolated string, then we need to append
18651
19216
  // the string content to the list of child nodes.
18652
- pm_interpolated_string_node_append(parser, (pm_interpolated_string_node_t *) current, string);
19217
+ pm_interpolated_string_node_append((pm_interpolated_string_node_t *) current, string);
18653
19218
  } else if (PM_NODE_TYPE_P(current, PM_STRING_NODE)) {
18654
19219
  // If we hit string content and the current node is
18655
19220
  // a string node, then we need to convert the
18656
19221
  // current node into an interpolated string and add
18657
19222
  // the string content to the list of child nodes.
18658
19223
  pm_interpolated_string_node_t *interpolated = pm_interpolated_string_node_create(parser, &opening, NULL, &closing);
18659
- pm_interpolated_string_node_append(parser, interpolated, current);
18660
- pm_interpolated_string_node_append(parser, interpolated, string);
19224
+ pm_interpolated_string_node_append(interpolated, current);
19225
+ pm_interpolated_string_node_append(interpolated, string);
18661
19226
  current = (pm_node_t *) interpolated;
18662
19227
  } else {
18663
19228
  assert(false && "unreachable");
@@ -18682,7 +19247,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
18682
19247
  pm_token_t opening = not_provided(parser);
18683
19248
  pm_token_t closing = not_provided(parser);
18684
19249
  pm_interpolated_string_node_t *interpolated = pm_interpolated_string_node_create(parser, &opening, NULL, &closing);
18685
- pm_interpolated_string_node_append(parser, interpolated, current);
19250
+ pm_interpolated_string_node_append(interpolated, current);
18686
19251
  current = (pm_node_t *) interpolated;
18687
19252
  } else {
18688
19253
  // If we hit an embedded variable and the current
@@ -18691,7 +19256,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
18691
19256
  }
18692
19257
 
18693
19258
  pm_node_t *part = parse_string_part(parser);
18694
- pm_interpolated_string_node_append(parser, (pm_interpolated_string_node_t *) current, part);
19259
+ pm_interpolated_string_node_append((pm_interpolated_string_node_t *) current, part);
18695
19260
  break;
18696
19261
  }
18697
19262
  case PM_TOKEN_EMBEXPR_BEGIN: {
@@ -18711,7 +19276,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
18711
19276
  pm_token_t opening = not_provided(parser);
18712
19277
  pm_token_t closing = not_provided(parser);
18713
19278
  pm_interpolated_string_node_t *interpolated = pm_interpolated_string_node_create(parser, &opening, NULL, &closing);
18714
- pm_interpolated_string_node_append(parser, interpolated, current);
19279
+ pm_interpolated_string_node_append(interpolated, current);
18715
19280
  current = (pm_node_t *) interpolated;
18716
19281
  } else if (PM_NODE_TYPE_P(current, PM_INTERPOLATED_STRING_NODE)) {
18717
19282
  // If we hit an embedded expression and the current
@@ -18722,7 +19287,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
18722
19287
  }
18723
19288
 
18724
19289
  pm_node_t *part = parse_string_part(parser);
18725
- pm_interpolated_string_node_append(parser, (pm_interpolated_string_node_t *) current, part);
19290
+ pm_interpolated_string_node_append((pm_interpolated_string_node_t *) current, part);
18726
19291
  break;
18727
19292
  }
18728
19293
  default:
@@ -18798,6 +19363,14 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
18798
19363
  pm_token_t opening = not_provided(parser);
18799
19364
  pm_token_t closing = not_provided(parser);
18800
19365
  pm_node_t *part = (pm_node_t *) pm_string_node_create_unescaped(parser, &opening, &parser->previous, &closing, &unescaped);
19366
+
19367
+ if (parser->encoding == PM_ENCODING_US_ASCII_ENTRY) {
19368
+ // This is extremely strange, but the first string part of a
19369
+ // regular expression will always be tagged as binary if we
19370
+ // are in a US-ASCII file, no matter its contents.
19371
+ pm_node_flag_set(part, PM_STRING_FLAGS_FORCED_BINARY_ENCODING);
19372
+ }
19373
+
18801
19374
  pm_interpolated_regular_expression_node_append(interpolated, part);
18802
19375
  } else {
18803
19376
  // If the first part of the body of the regular expression is not a
@@ -18926,7 +19499,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
18926
19499
  if (match1(parser, PM_TOKEN_COMMA)) {
18927
19500
  return parse_targets_validate(parser, splat, PM_BINDING_POWER_INDEX);
18928
19501
  } else {
18929
- return parse_target_validate(parser, splat);
19502
+ return parse_target_validate(parser, splat, true);
18930
19503
  }
18931
19504
  }
18932
19505
  case PM_TOKEN_BANG: {
@@ -19271,39 +19844,6 @@ parse_call_operator_write(pm_parser_t *parser, pm_call_node_t *call_node, const
19271
19844
  }
19272
19845
  }
19273
19846
 
19274
- /**
19275
- * Returns true if the name of the capture group is a valid local variable that
19276
- * can be written to.
19277
- */
19278
- static bool
19279
- parse_regular_expression_named_capture(pm_parser_t *parser, const uint8_t *source, size_t length) {
19280
- if (length == 0) {
19281
- return false;
19282
- }
19283
-
19284
- // First ensure that it starts with a valid identifier starting character.
19285
- size_t width = char_is_identifier_start(parser, source);
19286
- if (!width) {
19287
- return false;
19288
- }
19289
-
19290
- // Next, ensure that it's not an uppercase character.
19291
- if (parser->encoding_changed) {
19292
- if (parser->encoding->isupper_char(source, (ptrdiff_t) length)) return false;
19293
- } else {
19294
- if (pm_encoding_utf_8_isupper_char(source, (ptrdiff_t) length)) return false;
19295
- }
19296
-
19297
- // Next, iterate through all of the bytes of the string to ensure that they
19298
- // are all valid identifier characters.
19299
- const uint8_t *cursor = source + width;
19300
- while (cursor < source + length && (width = char_is_identifier(parser, cursor))) {
19301
- cursor += width;
19302
- }
19303
-
19304
- return cursor == source + length;
19305
- }
19306
-
19307
19847
  /**
19308
19848
  * Potentially change a =~ with a regular expression with named captures into a
19309
19849
  * match write node.
@@ -19330,7 +19870,7 @@ parse_regular_expression_named_captures(pm_parser_t *parser, const pm_string_t *
19330
19870
 
19331
19871
  // If the name of the capture group isn't a valid identifier, we do
19332
19872
  // not add it to the local table.
19333
- if (!parse_regular_expression_named_capture(parser, source, length)) continue;
19873
+ if (!pm_slice_is_valid_local(parser, source, source + length)) continue;
19334
19874
 
19335
19875
  if (content->type == PM_STRING_SHARED) {
19336
19876
  // If the unescaped string is a slice of the source, then we can
@@ -19788,7 +20328,7 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
19788
20328
  // In this case we have an operator but we don't know what it's for.
19789
20329
  // We need to treat it as an error. For now, we'll mark it as an error
19790
20330
  // and just skip right past it.
19791
- pm_parser_err_previous(parser, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR);
20331
+ PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->previous, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, pm_token_type_human(parser->current.type));
19792
20332
  return node;
19793
20333
  }
19794
20334
  }
@@ -20059,8 +20599,7 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
20059
20599
  path = (pm_node_t *) pm_call_node_call_create(parser, node, &delimiter, &message, &arguments);
20060
20600
  } else {
20061
20601
  // Otherwise, this is a constant path. That would look like Foo::Bar.
20062
- pm_node_t *child = (pm_node_t *) pm_constant_read_node_create(parser, &parser->previous);
20063
- path = (pm_node_t *)pm_constant_path_node_create(parser, node, &delimiter, child);
20602
+ path = (pm_node_t *) pm_constant_path_node_create(parser, node, &delimiter, &parser->previous);
20064
20603
  }
20065
20604
 
20066
20605
  // If this is followed by a comma then it is a multiple assignment.
@@ -20099,9 +20638,8 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
20099
20638
  return (pm_node_t *) pm_call_node_shorthand_create(parser, node, &delimiter, &arguments);
20100
20639
  }
20101
20640
  default: {
20102
- pm_parser_err_token(parser, &delimiter, PM_ERR_CONSTANT_PATH_COLON_COLON_CONSTANT);
20103
- pm_node_t *child = (pm_node_t *) pm_missing_node_create(parser, delimiter.start, delimiter.end);
20104
- return (pm_node_t *)pm_constant_path_node_create(parser, node, &delimiter, child);
20641
+ expect1(parser, PM_TOKEN_CONSTANT, PM_ERR_CONSTANT_PATH_COLON_COLON_CONSTANT);
20642
+ return (pm_node_t *) pm_constant_path_node_create(parser, node, &delimiter, &parser->previous);
20105
20643
  }
20106
20644
  }
20107
20645
  }
@@ -20172,7 +20710,7 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
20172
20710
  parser_lex(parser);
20173
20711
 
20174
20712
  pm_constant_id_list_t captures = { 0 };
20175
- pm_node_t *pattern = parse_pattern(parser, &captures, true, PM_ERR_PATTERN_EXPRESSION_AFTER_IN);
20713
+ pm_node_t *pattern = parse_pattern(parser, &captures, PM_PARSE_PATTERN_TOP | PM_PARSE_PATTERN_MULTI, PM_ERR_PATTERN_EXPRESSION_AFTER_IN);
20176
20714
 
20177
20715
  parser->pattern_matching_newlines = previous_pattern_matching_newlines;
20178
20716
  pm_constant_id_list_free(&captures);
@@ -20189,7 +20727,7 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
20189
20727
  parser_lex(parser);
20190
20728
 
20191
20729
  pm_constant_id_list_t captures = { 0 };
20192
- pm_node_t *pattern = parse_pattern(parser, &captures, true, PM_ERR_PATTERN_EXPRESSION_AFTER_HROCKET);
20730
+ pm_node_t *pattern = parse_pattern(parser, &captures, PM_PARSE_PATTERN_TOP | PM_PARSE_PATTERN_MULTI, PM_ERR_PATTERN_EXPRESSION_AFTER_HROCKET);
20193
20731
 
20194
20732
  parser->pattern_matching_newlines = previous_pattern_matching_newlines;
20195
20733
  pm_constant_id_list_free(&captures);
@@ -20202,6 +20740,10 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
20202
20740
  }
20203
20741
  }
20204
20742
 
20743
+ #undef PM_PARSE_PATTERN_SINGLE
20744
+ #undef PM_PARSE_PATTERN_TOP
20745
+ #undef PM_PARSE_PATTERN_MULTI
20746
+
20205
20747
  /**
20206
20748
  * Parse an expression at the given point of the parser using the given binding
20207
20749
  * power to parse subsequent chains. If this function finds a syntax error, it
@@ -21246,25 +21788,28 @@ pm_parser_errors_format(const pm_parser_t *parser, const pm_list_t *error_list,
21246
21788
  pm_buffer_append_string(buffer, error_format.blank_prefix, error_format.blank_prefix_length);
21247
21789
 
21248
21790
  size_t column = 0;
21249
- while (column < error->column_end) {
21250
- if (column < error->column_start) {
21251
- pm_buffer_append_byte(buffer, ' ');
21252
- } else {
21253
- const uint8_t caret = column == error->column_start ? '^' : '~';
21791
+ while (column < error->column_start) {
21792
+ pm_buffer_append_byte(buffer, ' ');
21254
21793
 
21255
- if (colorize) {
21256
- pm_buffer_append_string(buffer, PM_COLOR_RED, 7);
21257
- pm_buffer_append_byte(buffer, caret);
21258
- pm_buffer_append_string(buffer, PM_COLOR_RESET, 3);
21259
- } else {
21260
- pm_buffer_append_byte(buffer, caret);
21261
- }
21262
- }
21794
+ size_t char_width = encoding->char_width(start + column, parser->end - (start + column));
21795
+ column += (char_width == 0 ? 1 : char_width);
21796
+ }
21797
+
21798
+ if (colorize) pm_buffer_append_string(buffer, PM_COLOR_RED, 7);
21799
+ pm_buffer_append_byte(buffer, '^');
21800
+
21801
+ size_t char_width = encoding->char_width(start + column, parser->end - (start + column));
21802
+ column += (char_width == 0 ? 1 : char_width);
21803
+
21804
+ while (column < error->column_end) {
21805
+ pm_buffer_append_byte(buffer, '~');
21263
21806
 
21264
21807
  size_t char_width = encoding->char_width(start + column, parser->end - (start + column));
21265
21808
  column += (char_width == 0 ? 1 : char_width);
21266
21809
  }
21267
21810
 
21811
+ if (colorize) pm_buffer_append_string(buffer, PM_COLOR_RESET, 3);
21812
+
21268
21813
  if (inline_messages) {
21269
21814
  pm_buffer_append_byte(buffer, ' ');
21270
21815
  assert(error->error != NULL);