prism 0.27.0 → 0.29.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (65) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +45 -1
  3. data/config.yml +68 -44
  4. data/docs/configuration.md +1 -0
  5. data/ext/prism/api_node.c +854 -847
  6. data/ext/prism/extconf.rb +27 -23
  7. data/ext/prism/extension.c +5 -3
  8. data/ext/prism/extension.h +1 -1
  9. data/include/prism/ast.h +70 -48
  10. data/include/prism/diagnostic.h +23 -6
  11. data/include/prism/options.h +2 -2
  12. data/include/prism/parser.h +10 -0
  13. data/include/prism/static_literals.h +8 -6
  14. data/include/prism/version.h +2 -2
  15. data/lib/prism/desugar_compiler.rb +4 -4
  16. data/lib/prism/dot_visitor.rb +54 -38
  17. data/lib/prism/dsl.rb +24 -24
  18. data/lib/prism/ffi.rb +4 -4
  19. data/lib/prism/inspect_visitor.rb +2156 -0
  20. data/lib/prism/lex_compat.rb +1 -1
  21. data/lib/prism/mutation_compiler.rb +2 -2
  22. data/lib/prism/node.rb +737 -1863
  23. data/lib/prism/node_ext.rb +176 -5
  24. data/lib/prism/parse_result/comments.rb +1 -1
  25. data/lib/prism/parse_result/newlines.rb +1 -1
  26. data/lib/prism/parse_result.rb +78 -0
  27. data/lib/prism/pattern.rb +12 -6
  28. data/lib/prism/polyfill/byteindex.rb +13 -0
  29. data/lib/prism/polyfill/unpack1.rb +14 -0
  30. data/lib/prism/reflection.rb +20 -20
  31. data/lib/prism/serialize.rb +32 -15
  32. data/lib/prism/translation/parser/compiler.rb +156 -26
  33. data/lib/prism/translation/parser.rb +7 -7
  34. data/lib/prism/translation/ripper.rb +29 -25
  35. data/lib/prism/translation/ruby_parser.rb +13 -13
  36. data/lib/prism.rb +2 -1
  37. data/prism.gemspec +37 -38
  38. data/rbi/prism/compiler.rbi +3 -5
  39. data/rbi/prism/inspect_visitor.rbi +12 -0
  40. data/rbi/prism/node.rbi +405 -370
  41. data/rbi/prism/node_ext.rbi +5 -0
  42. data/rbi/prism/parse_result.rbi +23 -0
  43. data/rbi/prism/translation/ripper.rbi +1 -11
  44. data/sig/prism/dsl.rbs +12 -12
  45. data/sig/prism/inspect_visitor.rbs +22 -0
  46. data/sig/prism/lex_compat.rbs +10 -0
  47. data/sig/prism/node.rbs +108 -91
  48. data/sig/prism/node_ext.rbs +4 -0
  49. data/sig/prism/parse_result.rbs +12 -0
  50. data/src/diagnostic.c +66 -33
  51. data/src/node.c +89 -64
  52. data/src/options.c +2 -2
  53. data/src/prettyprint.c +109 -66
  54. data/src/prism.c +862 -317
  55. data/src/serialize.c +21 -18
  56. data/src/static_literals.c +120 -34
  57. data/src/token_type.c +6 -6
  58. metadata +8 -9
  59. data/lib/prism/node_inspector.rb +0 -68
  60. data/lib/prism/polyfill/string.rb +0 -12
  61. data/rbi/prism/desugar_compiler.rbi +0 -5
  62. data/rbi/prism/mutation_compiler.rbi +0 -5
  63. data/rbi/prism/translation/parser/compiler.rbi +0 -13
  64. data/rbi/prism/translation/ripper/ripper_compiler.rbi +0 -5
  65. data/rbi/prism/translation/ruby_parser.rbi +0 -11
data/src/prism.c CHANGED
@@ -672,6 +672,26 @@ pm_parser_warn_node(pm_parser_t *parser, const pm_node_t *node, pm_diagnostic_id
672
672
  #define PM_PARSER_WARN_NODE_FORMAT(parser, node, diag_id, ...) \
673
673
  PM_PARSER_WARN_FORMAT(parser, (node)->location.start, (node)->location.end, diag_id, __VA_ARGS__)
674
674
 
675
+ /**
676
+ * Add an error for an expected heredoc terminator. This is a special function
677
+ * only because it grabs its location off of a lex mode instead of a node or a
678
+ * token.
679
+ */
680
+ static void
681
+ pm_parser_err_heredoc_term(pm_parser_t *parser, pm_lex_mode_t *lex_mode) {
682
+ const uint8_t *ident_start = lex_mode->as.heredoc.ident_start;
683
+ size_t ident_length = lex_mode->as.heredoc.ident_length;
684
+
685
+ PM_PARSER_ERR_FORMAT(
686
+ parser,
687
+ ident_start,
688
+ ident_start + ident_length,
689
+ PM_ERR_HEREDOC_TERM,
690
+ (int) ident_length,
691
+ (const char *) ident_start
692
+ );
693
+ }
694
+
675
695
  /******************************************************************************/
676
696
  /* Scope-related functions */
677
697
  /******************************************************************************/
@@ -729,42 +749,97 @@ pm_parser_scope_find(pm_parser_t *parser, uint32_t depth) {
729
749
  return scope;
730
750
  }
731
751
 
732
- static void
733
- pm_parser_scope_forwarding_param_check(pm_parser_t *parser, const pm_token_t * token, const uint8_t mask, pm_diagnostic_id_t diag) {
752
+ typedef enum {
753
+ PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_PASS,
754
+ PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_CONFLICT,
755
+ PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_FAIL
756
+ } pm_scope_forwarding_param_check_result_t;
757
+
758
+ static pm_scope_forwarding_param_check_result_t
759
+ pm_parser_scope_forwarding_param_check(pm_parser_t *parser, const uint8_t mask) {
734
760
  pm_scope_t *scope = parser->current_scope;
735
- while (scope) {
761
+ bool conflict = false;
762
+
763
+ while (scope != NULL) {
736
764
  if (scope->parameters & mask) {
737
- if (!scope->closed) {
738
- pm_parser_err_token(parser, token, diag);
739
- return;
765
+ if (scope->closed) {
766
+ if (conflict) {
767
+ return PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_CONFLICT;
768
+ } else {
769
+ return PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_PASS;
770
+ }
740
771
  }
741
- return;
772
+
773
+ conflict = true;
742
774
  }
775
+
743
776
  if (scope->closed) break;
744
777
  scope = scope->previous;
745
778
  }
746
779
 
747
- pm_parser_err_token(parser, token, diag);
780
+ return PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_FAIL;
748
781
  }
749
782
 
750
- static inline void
783
+ static void
751
784
  pm_parser_scope_forwarding_block_check(pm_parser_t *parser, const pm_token_t * token) {
752
- pm_parser_scope_forwarding_param_check(parser, token, PM_SCOPE_PARAMETERS_FORWARDING_BLOCK, PM_ERR_ARGUMENT_NO_FORWARDING_AMP);
785
+ switch (pm_parser_scope_forwarding_param_check(parser, PM_SCOPE_PARAMETERS_FORWARDING_BLOCK)) {
786
+ case PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_PASS:
787
+ // Pass.
788
+ break;
789
+ case PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_CONFLICT:
790
+ pm_parser_err_token(parser, token, PM_ERR_ARGUMENT_CONFLICT_AMPERSAND);
791
+ break;
792
+ case PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_FAIL:
793
+ pm_parser_err_token(parser, token, PM_ERR_ARGUMENT_NO_FORWARDING_AMPERSAND);
794
+ break;
795
+ }
753
796
  }
754
797
 
755
- static inline void
798
+ static void
756
799
  pm_parser_scope_forwarding_positionals_check(pm_parser_t *parser, const pm_token_t * token) {
757
- pm_parser_scope_forwarding_param_check(parser, token, PM_SCOPE_PARAMETERS_FORWARDING_POSITIONALS, PM_ERR_ARGUMENT_NO_FORWARDING_STAR);
800
+ switch (pm_parser_scope_forwarding_param_check(parser, PM_SCOPE_PARAMETERS_FORWARDING_POSITIONALS)) {
801
+ case PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_PASS:
802
+ // Pass.
803
+ break;
804
+ case PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_CONFLICT:
805
+ pm_parser_err_token(parser, token, PM_ERR_ARGUMENT_CONFLICT_STAR);
806
+ break;
807
+ case PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_FAIL:
808
+ pm_parser_err_token(parser, token, PM_ERR_ARGUMENT_NO_FORWARDING_STAR);
809
+ break;
810
+ }
758
811
  }
759
812
 
760
- static inline void
761
- pm_parser_scope_forwarding_all_check(pm_parser_t *parser, const pm_token_t * token) {
762
- pm_parser_scope_forwarding_param_check(parser, token, PM_SCOPE_PARAMETERS_FORWARDING_ALL, PM_ERR_ARGUMENT_NO_FORWARDING_ELLIPSES);
813
+ static void
814
+ pm_parser_scope_forwarding_all_check(pm_parser_t *parser, const pm_token_t *token) {
815
+ switch (pm_parser_scope_forwarding_param_check(parser, PM_SCOPE_PARAMETERS_FORWARDING_ALL)) {
816
+ case PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_PASS:
817
+ // Pass.
818
+ break;
819
+ case PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_CONFLICT:
820
+ // This shouldn't happen, because ... is not allowed in the
821
+ // declaration of blocks. If we get here, we assume we already have
822
+ // an error for this.
823
+ break;
824
+ case PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_FAIL:
825
+ pm_parser_err_token(parser, token, PM_ERR_ARGUMENT_NO_FORWARDING_ELLIPSES);
826
+ break;
827
+ }
763
828
  }
764
829
 
765
- static inline void
830
+ static void
766
831
  pm_parser_scope_forwarding_keywords_check(pm_parser_t *parser, const pm_token_t * token) {
767
- pm_parser_scope_forwarding_param_check(parser, token, PM_SCOPE_PARAMETERS_FORWARDING_KEYWORDS, PM_ERR_ARGUMENT_NO_FORWARDING_STAR_STAR);
832
+ switch (pm_parser_scope_forwarding_param_check(parser, PM_SCOPE_PARAMETERS_FORWARDING_KEYWORDS)) {
833
+ case PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_PASS:
834
+ // Pass.
835
+ break;
836
+ case PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_CONFLICT:
837
+ pm_parser_err_token(parser, token, PM_ERR_ARGUMENT_CONFLICT_STAR_STAR);
838
+ break;
839
+ case PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_FAIL:
840
+ pm_parser_err_token(parser, token, PM_ERR_ARGUMENT_NO_FORWARDING_STAR_STAR);
841
+ break;
842
+ }
768
843
  }
769
844
 
770
845
  /**
@@ -1405,7 +1480,7 @@ pm_conditional_predicate_warn_write_literal_p(const pm_node_t *node) {
1405
1480
  static inline void
1406
1481
  pm_conditional_predicate_warn_write_literal(pm_parser_t *parser, const pm_node_t *node) {
1407
1482
  if (pm_conditional_predicate_warn_write_literal_p(node)) {
1408
- pm_parser_warn_node(parser, node, parser->version == PM_OPTIONS_VERSION_CRUBY_3_3_0 ? PM_WARN_EQUAL_IN_CONDITIONAL_3_3_0 : PM_WARN_EQUAL_IN_CONDITIONAL);
1483
+ pm_parser_warn_node(parser, node, parser->version == PM_OPTIONS_VERSION_CRUBY_3_3 ? PM_WARN_EQUAL_IN_CONDITIONAL_3_3 : PM_WARN_EQUAL_IN_CONDITIONAL);
1409
1484
  }
1410
1485
  }
1411
1486
 
@@ -1683,7 +1758,7 @@ char_is_identifier_utf8(const uint8_t *b, const uint8_t *end) {
1683
1758
  * it's important that it be as fast as possible.
1684
1759
  */
1685
1760
  static inline size_t
1686
- char_is_identifier(pm_parser_t *parser, const uint8_t *b) {
1761
+ char_is_identifier(const pm_parser_t *parser, const uint8_t *b) {
1687
1762
  if (parser->encoding_changed) {
1688
1763
  size_t width;
1689
1764
  if ((width = parser->encoding->alnum_char(b, parser->end - b)) != 0) {
@@ -2923,6 +2998,29 @@ pm_call_and_write_node_create(pm_parser_t *parser, pm_call_node_t *target, const
2923
2998
  return node;
2924
2999
  }
2925
3000
 
3001
+ /**
3002
+ * Validate that index expressions do not have keywords or blocks if we are
3003
+ * parsing as Ruby 3.4+.
3004
+ */
3005
+ static void
3006
+ pm_index_arguments_check(pm_parser_t *parser, const pm_arguments_node_t *arguments, const pm_node_t *block) {
3007
+ if (parser->version != PM_OPTIONS_VERSION_CRUBY_3_3) {
3008
+ if (arguments != NULL && PM_NODE_FLAG_P(arguments, PM_ARGUMENTS_NODE_FLAGS_CONTAINS_KEYWORDS)) {
3009
+ pm_node_t *node;
3010
+ PM_NODE_LIST_FOREACH(&arguments->arguments, index, node) {
3011
+ if (PM_NODE_TYPE_P(node, PM_KEYWORD_HASH_NODE)) {
3012
+ pm_parser_err_node(parser, node, PM_ERR_UNEXPECTED_INDEX_KEYWORDS);
3013
+ break;
3014
+ }
3015
+ }
3016
+ }
3017
+
3018
+ if (block != NULL) {
3019
+ pm_parser_err_node(parser, block, PM_ERR_UNEXPECTED_INDEX_BLOCK);
3020
+ }
3021
+ }
3022
+ }
3023
+
2926
3024
  /**
2927
3025
  * Allocate and initialize a new IndexAndWriteNode node.
2928
3026
  */
@@ -2931,6 +3029,8 @@ pm_index_and_write_node_create(pm_parser_t *parser, pm_call_node_t *target, cons
2931
3029
  assert(operator->type == PM_TOKEN_AMPERSAND_AMPERSAND_EQUAL);
2932
3030
  pm_index_and_write_node_t *node = PM_ALLOC_NODE(parser, pm_index_and_write_node_t);
2933
3031
 
3032
+ pm_index_arguments_check(parser, target->arguments, target->block);
3033
+
2934
3034
  *node = (pm_index_and_write_node_t) {
2935
3035
  {
2936
3036
  .type = PM_INDEX_AND_WRITE_NODE,
@@ -2980,8 +3080,8 @@ pm_call_operator_write_node_create(pm_parser_t *parser, pm_call_node_t *target,
2980
3080
  .message_loc = target->message_loc,
2981
3081
  .read_name = 0,
2982
3082
  .write_name = target->name,
2983
- .operator = pm_parser_constant_id_location(parser, operator->start, operator->end - 1),
2984
- .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
3083
+ .binary_operator = pm_parser_constant_id_location(parser, operator->start, operator->end - 1),
3084
+ .binary_operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
2985
3085
  .value = value
2986
3086
  };
2987
3087
 
@@ -3002,6 +3102,8 @@ static pm_index_operator_write_node_t *
3002
3102
  pm_index_operator_write_node_create(pm_parser_t *parser, pm_call_node_t *target, const pm_token_t *operator, pm_node_t *value) {
3003
3103
  pm_index_operator_write_node_t *node = PM_ALLOC_NODE(parser, pm_index_operator_write_node_t);
3004
3104
 
3105
+ pm_index_arguments_check(parser, target->arguments, target->block);
3106
+
3005
3107
  *node = (pm_index_operator_write_node_t) {
3006
3108
  {
3007
3109
  .type = PM_INDEX_OPERATOR_WRITE_NODE,
@@ -3017,8 +3119,8 @@ pm_index_operator_write_node_create(pm_parser_t *parser, pm_call_node_t *target,
3017
3119
  .arguments = target->arguments,
3018
3120
  .closing_loc = target->closing_loc,
3019
3121
  .block = target->block,
3020
- .operator = pm_parser_constant_id_location(parser, operator->start, operator->end - 1),
3021
- .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
3122
+ .binary_operator = pm_parser_constant_id_location(parser, operator->start, operator->end - 1),
3123
+ .binary_operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
3022
3124
  .value = value
3023
3125
  };
3024
3126
 
@@ -3075,6 +3177,8 @@ pm_index_or_write_node_create(pm_parser_t *parser, pm_call_node_t *target, const
3075
3177
  assert(operator->type == PM_TOKEN_PIPE_PIPE_EQUAL);
3076
3178
  pm_index_or_write_node_t *node = PM_ALLOC_NODE(parser, pm_index_or_write_node_t);
3077
3179
 
3180
+ pm_index_arguments_check(parser, target->arguments, target->block);
3181
+
3078
3182
  *node = (pm_index_or_write_node_t) {
3079
3183
  {
3080
3184
  .type = PM_INDEX_OR_WRITE_NODE,
@@ -3139,6 +3243,8 @@ pm_index_target_node_create(pm_parser_t *parser, pm_call_node_t *target) {
3139
3243
  pm_index_target_node_t *node = PM_ALLOC_NODE(parser, pm_index_target_node_t);
3140
3244
  pm_node_flags_t flags = target->base.flags;
3141
3245
 
3246
+ pm_index_arguments_check(parser, target->arguments, target->block);
3247
+
3142
3248
  *node = (pm_index_target_node_t) {
3143
3249
  {
3144
3250
  .type = PM_INDEX_TARGET_NODE,
@@ -3358,9 +3464,9 @@ pm_class_variable_operator_write_node_create(pm_parser_t *parser, pm_class_varia
3358
3464
  },
3359
3465
  .name = target->name,
3360
3466
  .name_loc = target->base.location,
3361
- .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
3467
+ .binary_operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
3362
3468
  .value = value,
3363
- .operator = pm_parser_constant_id_location(parser, operator->start, operator->end - 1)
3469
+ .binary_operator = pm_parser_constant_id_location(parser, operator->start, operator->end - 1)
3364
3470
  };
3365
3471
 
3366
3472
  return node;
@@ -3474,9 +3580,9 @@ pm_constant_path_operator_write_node_create(pm_parser_t *parser, pm_constant_pat
3474
3580
  }
3475
3581
  },
3476
3582
  .target = target,
3477
- .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
3583
+ .binary_operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
3478
3584
  .value = value,
3479
- .operator = pm_parser_constant_id_location(parser, operator->start, operator->end - 1)
3585
+ .binary_operator = pm_parser_constant_id_location(parser, operator->start, operator->end - 1)
3480
3586
  };
3481
3587
 
3482
3588
  return node;
@@ -3510,22 +3616,27 @@ pm_constant_path_or_write_node_create(pm_parser_t *parser, pm_constant_path_node
3510
3616
  * Allocate and initialize a new ConstantPathNode node.
3511
3617
  */
3512
3618
  static pm_constant_path_node_t *
3513
- pm_constant_path_node_create(pm_parser_t *parser, pm_node_t *parent, const pm_token_t *delimiter, pm_node_t *child) {
3619
+ pm_constant_path_node_create(pm_parser_t *parser, pm_node_t *parent, const pm_token_t *delimiter, const pm_token_t *name_token) {
3514
3620
  pm_assert_value_expression(parser, parent);
3515
-
3516
3621
  pm_constant_path_node_t *node = PM_ALLOC_NODE(parser, pm_constant_path_node_t);
3517
3622
 
3623
+ pm_constant_id_t name = PM_CONSTANT_ID_UNSET;
3624
+ if (name_token->type == PM_TOKEN_CONSTANT) {
3625
+ name = pm_parser_constant_id_token(parser, name_token);
3626
+ }
3627
+
3518
3628
  *node = (pm_constant_path_node_t) {
3519
3629
  {
3520
3630
  .type = PM_CONSTANT_PATH_NODE,
3521
3631
  .location = {
3522
3632
  .start = parent == NULL ? delimiter->start : parent->location.start,
3523
- .end = child->location.end
3633
+ .end = name_token->end
3524
3634
  },
3525
3635
  },
3526
3636
  .parent = parent,
3527
- .child = child,
3528
- .delimiter_loc = PM_LOCATION_TOKEN_VALUE(delimiter)
3637
+ .name = name,
3638
+ .delimiter_loc = PM_LOCATION_TOKEN_VALUE(delimiter),
3639
+ .name_loc = PM_LOCATION_TOKEN_VALUE(name_token)
3529
3640
  };
3530
3641
 
3531
3642
  return node;
@@ -3596,9 +3707,9 @@ pm_constant_operator_write_node_create(pm_parser_t *parser, pm_constant_read_nod
3596
3707
  },
3597
3708
  .name = target->name,
3598
3709
  .name_loc = target->base.location,
3599
- .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
3710
+ .binary_operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
3600
3711
  .value = value,
3601
- .operator = pm_parser_constant_id_location(parser, operator->start, operator->end - 1)
3712
+ .binary_operator = pm_parser_constant_id_location(parser, operator->start, operator->end - 1)
3602
3713
  };
3603
3714
 
3604
3715
  return node;
@@ -3716,6 +3827,113 @@ pm_def_node_receiver_check(pm_parser_t *parser, const pm_node_t *node) {
3716
3827
  }
3717
3828
  }
3718
3829
 
3830
+ /**
3831
+ * When a method body is created, we want to check if the last statement is a
3832
+ * return or a statement that houses a return. If it is, then we want to mark
3833
+ * that return as being redundant so that we can compile it differently but also
3834
+ * so that we can indicate that to the user.
3835
+ */
3836
+ static void
3837
+ pm_def_node_body_redundant_return(pm_node_t *node) {
3838
+ switch (PM_NODE_TYPE(node)) {
3839
+ case PM_RETURN_NODE:
3840
+ node->flags |= PM_RETURN_NODE_FLAGS_REDUNDANT;
3841
+ break;
3842
+ case PM_BEGIN_NODE: {
3843
+ pm_begin_node_t *cast = (pm_begin_node_t *) node;
3844
+
3845
+ if (cast->statements != NULL && cast->else_clause == NULL) {
3846
+ pm_def_node_body_redundant_return((pm_node_t *) cast->statements);
3847
+ }
3848
+ break;
3849
+ }
3850
+ case PM_STATEMENTS_NODE: {
3851
+ pm_statements_node_t *cast = (pm_statements_node_t *) node;
3852
+
3853
+ if (cast->body.size > 0) {
3854
+ pm_def_node_body_redundant_return(cast->body.nodes[cast->body.size - 1]);
3855
+ }
3856
+ break;
3857
+ }
3858
+ case PM_IF_NODE: {
3859
+ pm_if_node_t *cast = (pm_if_node_t *) node;
3860
+
3861
+ if (cast->statements != NULL) {
3862
+ pm_def_node_body_redundant_return((pm_node_t *) cast->statements);
3863
+ }
3864
+
3865
+ if (cast->consequent != NULL) {
3866
+ pm_def_node_body_redundant_return(cast->consequent);
3867
+ }
3868
+ break;
3869
+ }
3870
+ case PM_UNLESS_NODE: {
3871
+ pm_unless_node_t *cast = (pm_unless_node_t *) node;
3872
+
3873
+ if (cast->statements != NULL) {
3874
+ pm_def_node_body_redundant_return((pm_node_t *) cast->statements);
3875
+ }
3876
+
3877
+ if (cast->consequent != NULL) {
3878
+ pm_def_node_body_redundant_return((pm_node_t *) cast->consequent);
3879
+ }
3880
+ break;
3881
+ }
3882
+ case PM_ELSE_NODE: {
3883
+ pm_else_node_t *cast = (pm_else_node_t *) node;
3884
+
3885
+ if (cast->statements != NULL) {
3886
+ pm_def_node_body_redundant_return((pm_node_t *) cast->statements);
3887
+ }
3888
+ break;
3889
+ }
3890
+ case PM_CASE_NODE: {
3891
+ pm_case_node_t *cast = (pm_case_node_t *) node;
3892
+ pm_node_t *condition;
3893
+
3894
+ PM_NODE_LIST_FOREACH(&cast->conditions, index, condition) {
3895
+ pm_def_node_body_redundant_return(condition);
3896
+ }
3897
+
3898
+ if (cast->consequent != NULL) {
3899
+ pm_def_node_body_redundant_return((pm_node_t *) cast->consequent);
3900
+ }
3901
+ break;
3902
+ }
3903
+ case PM_WHEN_NODE: {
3904
+ pm_when_node_t *cast = (pm_when_node_t *) node;
3905
+
3906
+ if (cast->statements != NULL) {
3907
+ pm_def_node_body_redundant_return((pm_node_t *) cast->statements);
3908
+ }
3909
+ break;
3910
+ }
3911
+ case PM_CASE_MATCH_NODE: {
3912
+ pm_case_match_node_t *cast = (pm_case_match_node_t *) node;
3913
+ pm_node_t *condition;
3914
+
3915
+ PM_NODE_LIST_FOREACH(&cast->conditions, index, condition) {
3916
+ pm_def_node_body_redundant_return(condition);
3917
+ }
3918
+
3919
+ if (cast->consequent != NULL) {
3920
+ pm_def_node_body_redundant_return((pm_node_t *) cast->consequent);
3921
+ }
3922
+ break;
3923
+ }
3924
+ case PM_IN_NODE: {
3925
+ pm_in_node_t *cast = (pm_in_node_t *) node;
3926
+
3927
+ if (cast->statements != NULL) {
3928
+ pm_def_node_body_redundant_return((pm_node_t *) cast->statements);
3929
+ }
3930
+ break;
3931
+ }
3932
+ default:
3933
+ break;
3934
+ }
3935
+ }
3936
+
3719
3937
  /**
3720
3938
  * Allocate and initialize a new DefNode node.
3721
3939
  */
@@ -3748,6 +3966,10 @@ pm_def_node_create(
3748
3966
  pm_def_node_receiver_check(parser, receiver);
3749
3967
  }
3750
3968
 
3969
+ if (body != NULL) {
3970
+ pm_def_node_body_redundant_return(body);
3971
+ }
3972
+
3751
3973
  *node = (pm_def_node_t) {
3752
3974
  {
3753
3975
  .type = PM_DEF_NODE,
@@ -4338,9 +4560,9 @@ pm_global_variable_operator_write_node_create(pm_parser_t *parser, pm_node_t *ta
4338
4560
  },
4339
4561
  .name = pm_global_variable_write_name(parser, target),
4340
4562
  .name_loc = target->location,
4341
- .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
4563
+ .binary_operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
4342
4564
  .value = value,
4343
- .operator = pm_parser_constant_id_location(parser, operator->start, operator->end - 1)
4565
+ .binary_operator = pm_parser_constant_id_location(parser, operator->start, operator->end - 1)
4344
4566
  };
4345
4567
 
4346
4568
  return node;
@@ -4846,9 +5068,9 @@ pm_instance_variable_operator_write_node_create(pm_parser_t *parser, pm_instance
4846
5068
  },
4847
5069
  .name = target->name,
4848
5070
  .name_loc = target->base.location,
4849
- .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
5071
+ .binary_operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
4850
5072
  .value = value,
4851
- .operator = pm_parser_constant_id_location(parser, operator->start, operator->end - 1)
5073
+ .binary_operator = pm_parser_constant_id_location(parser, operator->start, operator->end - 1)
4852
5074
  };
4853
5075
 
4854
5076
  return node;
@@ -4922,6 +5144,50 @@ pm_instance_variable_write_node_create(pm_parser_t *parser, pm_instance_variable
4922
5144
  return node;
4923
5145
  }
4924
5146
 
5147
+ /**
5148
+ * Append a part into a list of string parts. Importantly this handles nested
5149
+ * interpolated strings by not necessarily removing the marker for static
5150
+ * literals.
5151
+ */
5152
+ static void
5153
+ pm_interpolated_node_append(pm_node_t *node, pm_node_list_t *parts, pm_node_t *part) {
5154
+ switch (PM_NODE_TYPE(part)) {
5155
+ case PM_STRING_NODE:
5156
+ pm_node_flag_set(part, PM_NODE_FLAG_STATIC_LITERAL | PM_STRING_FLAGS_FROZEN);
5157
+ break;
5158
+ case PM_EMBEDDED_STATEMENTS_NODE: {
5159
+ pm_embedded_statements_node_t *cast = (pm_embedded_statements_node_t *) part;
5160
+ pm_node_t *embedded = (cast->statements != NULL && cast->statements->body.size == 1) ? cast->statements->body.nodes[0] : NULL;
5161
+
5162
+ if (embedded == NULL) {
5163
+ // If there are no statements or more than one statement, then
5164
+ // we lose the static literal flag.
5165
+ pm_node_flag_unset(node, PM_NODE_FLAG_STATIC_LITERAL);
5166
+ } else if (PM_NODE_TYPE_P(embedded, PM_STRING_NODE)) {
5167
+ // If the embedded statement is a string, then we can keep the
5168
+ // static literal flag and mark the string as frozen.
5169
+ pm_node_flag_set(embedded, PM_NODE_FLAG_STATIC_LITERAL | PM_STRING_FLAGS_FROZEN);
5170
+ } else if (PM_NODE_TYPE_P(embedded, PM_INTERPOLATED_STRING_NODE) && PM_NODE_FLAG_P(embedded, PM_NODE_FLAG_STATIC_LITERAL)) {
5171
+ // If the embedded statement is an interpolated string and it's
5172
+ // a static literal, then we can keep the static literal flag.
5173
+ } else {
5174
+ // Otherwise we lose the static literal flag.
5175
+ pm_node_flag_unset(node, PM_NODE_FLAG_STATIC_LITERAL);
5176
+ }
5177
+
5178
+ break;
5179
+ }
5180
+ case PM_EMBEDDED_VARIABLE_NODE:
5181
+ pm_node_flag_unset((pm_node_t *) node, PM_NODE_FLAG_STATIC_LITERAL);
5182
+ break;
5183
+ default:
5184
+ assert(false && "unexpected node type");
5185
+ break;
5186
+ }
5187
+
5188
+ pm_node_list_append(parts, part);
5189
+ }
5190
+
4925
5191
  /**
4926
5192
  * Allocate a new InterpolatedRegularExpressionNode node.
4927
5193
  */
@@ -4955,54 +5221,113 @@ pm_interpolated_regular_expression_node_append(pm_interpolated_regular_expressio
4955
5221
  node->base.location.end = part->location.end;
4956
5222
  }
4957
5223
 
4958
- if (PM_NODE_TYPE_P(part, PM_STRING_NODE)) {
4959
- pm_node_flag_set(part, PM_NODE_FLAG_STATIC_LITERAL | PM_STRING_FLAGS_FROZEN);
4960
- }
4961
-
4962
- if (!PM_NODE_FLAG_P(part, PM_NODE_FLAG_STATIC_LITERAL)) {
4963
- pm_node_flag_unset((pm_node_t *) node, PM_NODE_FLAG_STATIC_LITERAL);
4964
- }
4965
-
4966
- pm_node_list_append(&node->parts, part);
5224
+ pm_interpolated_node_append((pm_node_t *) node, &node->parts, part);
4967
5225
  }
4968
5226
 
4969
5227
  static inline void
4970
5228
  pm_interpolated_regular_expression_node_closing_set(pm_parser_t *parser, pm_interpolated_regular_expression_node_t *node, const pm_token_t *closing) {
4971
5229
  node->closing_loc = PM_LOCATION_TOKEN_VALUE(closing);
4972
5230
  node->base.location.end = closing->end;
4973
- pm_node_flag_set((pm_node_t *)node, pm_regular_expression_flags_create(parser, closing));
5231
+ pm_node_flag_set((pm_node_t *) node, pm_regular_expression_flags_create(parser, closing));
4974
5232
  }
4975
5233
 
4976
5234
  /**
4977
5235
  * Append a part to an InterpolatedStringNode node.
5236
+ *
5237
+ * This has some somewhat complicated semantics, because we need to update
5238
+ * multiple flags that have somewhat confusing interactions.
5239
+ *
5240
+ * PM_NODE_FLAG_STATIC_LITERAL indicates that the node should be treated as a
5241
+ * single static literal string that can be pushed onto the stack on its own.
5242
+ * Note that this doesn't necessarily mean that the string will be frozen or
5243
+ * not; the instructions in CRuby will be either putobject or putstring,
5244
+ * depending on the combination of `--enable-frozen-string-literal`,
5245
+ * `# frozen_string_literal: true`, and whether or not there is interpolation.
5246
+ *
5247
+ * PM_INTERPOLATED_STRING_NODE_FLAGS_FROZEN indicates that the string should be
5248
+ * explicitly frozen. This will only happen if the string is comprised entirely
5249
+ * of string parts that are themselves static literals and frozen.
5250
+ *
5251
+ * PM_INTERPOLATED_STRING_NODE_FLAGS_MUTABLE indicates that the string should
5252
+ * be explicitly marked as mutable. This will happen from
5253
+ * `--disable-frozen-string-literal` or `# frozen_string_literal: false`. This
5254
+ * is necessary to indicate that the string should be left up to the runtime,
5255
+ * which could potentially use a chilled string otherwise.
4978
5256
  */
4979
5257
  static inline void
4980
- pm_interpolated_string_node_append(pm_parser_t *parser, pm_interpolated_string_node_t *node, pm_node_t *part) {
5258
+ pm_interpolated_string_node_append(pm_interpolated_string_node_t *node, pm_node_t *part) {
5259
+ #define CLEAR_FLAGS(node) \
5260
+ node->base.flags = (pm_node_flags_t) (node->base.flags & ~(PM_NODE_FLAG_STATIC_LITERAL | PM_INTERPOLATED_STRING_NODE_FLAGS_FROZEN | PM_INTERPOLATED_STRING_NODE_FLAGS_MUTABLE))
5261
+
5262
+ #define MUTABLE_FLAGS(node) \
5263
+ node->base.flags = (pm_node_flags_t) ((node->base.flags | PM_INTERPOLATED_STRING_NODE_FLAGS_MUTABLE) & ~PM_INTERPOLATED_STRING_NODE_FLAGS_FROZEN);
5264
+
4981
5265
  if (node->parts.size == 0 && node->opening_loc.start == NULL) {
4982
5266
  node->base.location.start = part->location.start;
4983
5267
  }
4984
5268
 
4985
- if (PM_NODE_TYPE_P(part, PM_STRING_NODE)) {
4986
- pm_node_flag_set(part, PM_NODE_FLAG_STATIC_LITERAL | PM_STRING_FLAGS_FROZEN);
4987
- }
5269
+ node->base.location.end = MAX(node->base.location.end, part->location.end);
5270
+
5271
+ switch (PM_NODE_TYPE(part)) {
5272
+ case PM_STRING_NODE:
5273
+ pm_node_flag_set(part, PM_NODE_FLAG_STATIC_LITERAL | PM_STRING_FLAGS_FROZEN);
5274
+ break;
5275
+ case PM_INTERPOLATED_STRING_NODE:
5276
+ if (PM_NODE_FLAG_P(part, PM_NODE_FLAG_STATIC_LITERAL)) {
5277
+ // If the string that we're concatenating is a static literal,
5278
+ // then we can keep the static literal flag for this string.
5279
+ } else {
5280
+ // Otherwise, we lose the static literal flag here and we should
5281
+ // also clear the mutability flags.
5282
+ CLEAR_FLAGS(node);
5283
+ }
5284
+ break;
5285
+ case PM_EMBEDDED_STATEMENTS_NODE: {
5286
+ pm_embedded_statements_node_t *cast = (pm_embedded_statements_node_t *) part;
5287
+ pm_node_t *embedded = (cast->statements != NULL && cast->statements->body.size == 1) ? cast->statements->body.nodes[0] : NULL;
5288
+
5289
+ if (embedded == NULL) {
5290
+ // If we're embedding multiple statements or no statements, then
5291
+ // the string is not longer a static literal.
5292
+ CLEAR_FLAGS(node);
5293
+ } else if (PM_NODE_TYPE_P(embedded, PM_STRING_NODE)) {
5294
+ // If the embedded statement is a string, then we can make that
5295
+ // string as frozen and static literal, and not touch the static
5296
+ // literal status of this string.
5297
+ pm_node_flag_set(embedded, PM_NODE_FLAG_STATIC_LITERAL | PM_STRING_FLAGS_FROZEN);
5298
+
5299
+ if (PM_NODE_FLAG_P(node, PM_NODE_FLAG_STATIC_LITERAL)) {
5300
+ MUTABLE_FLAGS(node);
5301
+ }
5302
+ } else if (PM_NODE_TYPE_P(embedded, PM_INTERPOLATED_STRING_NODE) && PM_NODE_FLAG_P(embedded, PM_NODE_FLAG_STATIC_LITERAL)) {
5303
+ // If the embedded statement is an interpolated string, but that
5304
+ // string is marked as static literal, then we can keep our
5305
+ // static literal status for this string.
5306
+ if (PM_NODE_FLAG_P(node, PM_NODE_FLAG_STATIC_LITERAL)) {
5307
+ MUTABLE_FLAGS(node);
5308
+ }
5309
+ } else {
5310
+ // In all other cases, we lose the static literal flag here and
5311
+ // become mutable.
5312
+ CLEAR_FLAGS(node);
5313
+ }
4988
5314
 
4989
- if (!PM_NODE_FLAG_P(part, PM_NODE_FLAG_STATIC_LITERAL)) {
4990
- pm_node_flag_unset((pm_node_t *) node, PM_NODE_FLAG_STATIC_LITERAL | PM_INTERPOLATED_STRING_NODE_FLAGS_FROZEN | PM_INTERPOLATED_STRING_NODE_FLAGS_MUTABLE);
5315
+ break;
5316
+ }
5317
+ case PM_EMBEDDED_VARIABLE_NODE:
5318
+ // Embedded variables clear static literal, which means we also
5319
+ // should clear the mutability flags.
5320
+ CLEAR_FLAGS(node);
5321
+ break;
5322
+ default:
5323
+ assert(false && "unexpected node type");
5324
+ break;
4991
5325
  }
4992
5326
 
4993
5327
  pm_node_list_append(&node->parts, part);
4994
- node->base.location.end = MAX(node->base.location.end, part->location.end);
4995
5328
 
4996
- if (PM_NODE_FLAG_P(node, PM_NODE_FLAG_STATIC_LITERAL)) {
4997
- switch (parser->frozen_string_literal) {
4998
- case PM_OPTIONS_FROZEN_STRING_LITERAL_DISABLED:
4999
- pm_node_flag_set((pm_node_t *) node, PM_INTERPOLATED_STRING_NODE_FLAGS_MUTABLE);
5000
- break;
5001
- case PM_OPTIONS_FROZEN_STRING_LITERAL_ENABLED:
5002
- pm_node_flag_set((pm_node_t *) node, PM_INTERPOLATED_STRING_NODE_FLAGS_FROZEN);
5003
- break;
5004
- }
5005
- }
5329
+ #undef CLEAR_FLAGS
5330
+ #undef MUTABLE_FLAGS
5006
5331
  }
5007
5332
 
5008
5333
  /**
@@ -5011,11 +5336,21 @@ pm_interpolated_string_node_append(pm_parser_t *parser, pm_interpolated_string_n
5011
5336
  static pm_interpolated_string_node_t *
5012
5337
  pm_interpolated_string_node_create(pm_parser_t *parser, const pm_token_t *opening, const pm_node_list_t *parts, const pm_token_t *closing) {
5013
5338
  pm_interpolated_string_node_t *node = PM_ALLOC_NODE(parser, pm_interpolated_string_node_t);
5339
+ pm_node_flags_t flags = PM_NODE_FLAG_STATIC_LITERAL;
5340
+
5341
+ switch (parser->frozen_string_literal) {
5342
+ case PM_OPTIONS_FROZEN_STRING_LITERAL_DISABLED:
5343
+ flags |= PM_INTERPOLATED_STRING_NODE_FLAGS_MUTABLE;
5344
+ break;
5345
+ case PM_OPTIONS_FROZEN_STRING_LITERAL_ENABLED:
5346
+ flags |= PM_INTERPOLATED_STRING_NODE_FLAGS_FROZEN;
5347
+ break;
5348
+ }
5014
5349
 
5015
5350
  *node = (pm_interpolated_string_node_t) {
5016
5351
  {
5017
5352
  .type = PM_INTERPOLATED_STRING_NODE,
5018
- .flags = PM_NODE_FLAG_STATIC_LITERAL,
5353
+ .flags = flags,
5019
5354
  .location = {
5020
5355
  .start = opening->start,
5021
5356
  .end = closing->end,
@@ -5029,7 +5364,7 @@ pm_interpolated_string_node_create(pm_parser_t *parser, const pm_token_t *openin
5029
5364
  if (parts != NULL) {
5030
5365
  pm_node_t *part;
5031
5366
  PM_NODE_LIST_FOREACH(parts, index, part) {
5032
- pm_interpolated_string_node_append(parser, node, part);
5367
+ pm_interpolated_string_node_append(node, part);
5033
5368
  }
5034
5369
  }
5035
5370
 
@@ -5051,15 +5386,7 @@ pm_interpolated_symbol_node_append(pm_interpolated_symbol_node_t *node, pm_node_
5051
5386
  node->base.location.start = part->location.start;
5052
5387
  }
5053
5388
 
5054
- if (PM_NODE_TYPE_P(part, PM_STRING_NODE)) {
5055
- pm_node_flag_set(part, PM_NODE_FLAG_STATIC_LITERAL | PM_STRING_FLAGS_FROZEN);
5056
- }
5057
-
5058
- if (!PM_NODE_FLAG_P(part, PM_NODE_FLAG_STATIC_LITERAL)) {
5059
- pm_node_flag_unset((pm_node_t *) node, PM_NODE_FLAG_STATIC_LITERAL);
5060
- }
5061
-
5062
- pm_node_list_append(&node->parts, part);
5389
+ pm_interpolated_node_append((pm_node_t *) node, &node->parts, part);
5063
5390
  node->base.location.end = MAX(node->base.location.end, part->location.end);
5064
5391
  }
5065
5392
 
@@ -5125,11 +5452,7 @@ pm_interpolated_xstring_node_create(pm_parser_t *parser, const pm_token_t *openi
5125
5452
 
5126
5453
  static inline void
5127
5454
  pm_interpolated_xstring_node_append(pm_interpolated_x_string_node_t *node, pm_node_t *part) {
5128
- if (PM_NODE_TYPE_P(part, PM_STRING_NODE)) {
5129
- pm_node_flag_set(part, PM_NODE_FLAG_STATIC_LITERAL | PM_STRING_FLAGS_FROZEN);
5130
- }
5131
-
5132
- pm_node_list_append(&node->parts, part);
5455
+ pm_interpolated_node_append((pm_node_t *) node, &node->parts, part);
5133
5456
  node->base.location.end = part->location.end;
5134
5457
  }
5135
5458
 
@@ -5341,10 +5664,10 @@ pm_local_variable_operator_write_node_create(pm_parser_t *parser, pm_node_t *tar
5341
5664
  }
5342
5665
  },
5343
5666
  .name_loc = target->location,
5344
- .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
5667
+ .binary_operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
5345
5668
  .value = value,
5346
5669
  .name = name,
5347
- .operator = pm_parser_constant_id_location(parser, operator->start, operator->end - 1),
5670
+ .binary_operator = pm_parser_constant_id_location(parser, operator->start, operator->end - 1),
5348
5671
  .depth = depth
5349
5672
  };
5350
5673
 
@@ -6397,6 +6720,7 @@ pm_return_node_create(pm_parser_t *parser, const pm_token_t *keyword, pm_argumen
6397
6720
  *node = (pm_return_node_t) {
6398
6721
  {
6399
6722
  .type = PM_RETURN_NODE,
6723
+ .flags = 0,
6400
6724
  .location = {
6401
6725
  .start = keyword->start,
6402
6726
  .end = (arguments == NULL ? keyword->end : arguments->base.location.end)
@@ -6622,7 +6946,7 @@ pm_statements_node_body_append(pm_parser_t *parser, pm_statements_node_t *node,
6622
6946
  case PM_REDO_NODE:
6623
6947
  case PM_RETRY_NODE:
6624
6948
  case PM_RETURN_NODE:
6625
- pm_parser_warn_node(parser, previous, PM_WARN_UNREACHABLE_STATEMENT);
6949
+ pm_parser_warn_node(parser, statement, PM_WARN_UNREACHABLE_STATEMENT);
6626
6950
  break;
6627
6951
  default:
6628
6952
  break;
@@ -6729,7 +7053,8 @@ pm_super_node_create(pm_parser_t *parser, const pm_token_t *keyword, pm_argument
6729
7053
  }
6730
7054
 
6731
7055
  /**
6732
- * Read through the contents of a string and check if it consists solely of US ASCII code points.
7056
+ * Read through the contents of a string and check if it consists solely of
7057
+ * US-ASCII code points.
6733
7058
  */
6734
7059
  static bool
6735
7060
  pm_ascii_only_p(const pm_string_t *contents) {
@@ -6743,27 +7068,72 @@ pm_ascii_only_p(const pm_string_t *contents) {
6743
7068
  return true;
6744
7069
  }
6745
7070
 
7071
+ /**
7072
+ * Validate that the contents of the given symbol are all valid UTF-8.
7073
+ */
7074
+ static void
7075
+ parse_symbol_encoding_validate_utf8(pm_parser_t *parser, const pm_token_t *location, const pm_string_t *contents) {
7076
+ for (const uint8_t *cursor = pm_string_source(contents), *end = cursor + pm_string_length(contents); cursor < end;) {
7077
+ size_t width = pm_encoding_utf_8_char_width(cursor, end - cursor);
7078
+
7079
+ if (width == 0) {
7080
+ pm_parser_err(parser, location->start, location->end, PM_ERR_INVALID_SYMBOL);
7081
+ break;
7082
+ }
7083
+
7084
+ cursor += width;
7085
+ }
7086
+ }
7087
+
7088
+ /**
7089
+ * Validate that the contents of the given symbol are all valid in the encoding
7090
+ * of the parser.
7091
+ */
7092
+ static void
7093
+ parse_symbol_encoding_validate_other(pm_parser_t *parser, const pm_token_t *location, const pm_string_t *contents) {
7094
+ const pm_encoding_t *encoding = parser->encoding;
7095
+
7096
+ for (const uint8_t *cursor = pm_string_source(contents), *end = cursor + pm_string_length(contents); cursor < end;) {
7097
+ size_t width = encoding->char_width(cursor, end - cursor);
7098
+
7099
+ if (width == 0) {
7100
+ pm_parser_err(parser, location->start, location->end, PM_ERR_INVALID_SYMBOL);
7101
+ break;
7102
+ }
7103
+
7104
+ cursor += width;
7105
+ }
7106
+ }
7107
+
6746
7108
  /**
6747
7109
  * Ruby "downgrades" the encoding of Symbols to US-ASCII if the associated
6748
7110
  * encoding is ASCII-compatible and the Symbol consists only of US-ASCII code
6749
7111
  * points. Otherwise, the encoding may be explicitly set with an escape
6750
7112
  * sequence.
7113
+ *
7114
+ * If the validate flag is set, then it will check the contents of the symbol
7115
+ * to ensure that all characters are valid in the encoding.
6751
7116
  */
6752
7117
  static inline pm_node_flags_t
6753
- parse_symbol_encoding(const pm_parser_t *parser, const pm_string_t *contents) {
7118
+ parse_symbol_encoding(pm_parser_t *parser, const pm_token_t *location, const pm_string_t *contents, bool validate) {
6754
7119
  if (parser->explicit_encoding != NULL) {
6755
7120
  // A Symbol may optionally have its encoding explicitly set. This will
6756
7121
  // happen if an escape sequence results in a non-ASCII code point.
6757
7122
  if (parser->explicit_encoding == PM_ENCODING_UTF_8_ENTRY) {
7123
+ if (validate) parse_symbol_encoding_validate_utf8(parser, location, contents);
6758
7124
  return PM_SYMBOL_FLAGS_FORCED_UTF8_ENCODING;
6759
7125
  } else if (parser->encoding == PM_ENCODING_US_ASCII_ENTRY) {
6760
7126
  return PM_SYMBOL_FLAGS_FORCED_BINARY_ENCODING;
7127
+ } else if (validate) {
7128
+ parse_symbol_encoding_validate_other(parser, location, contents);
6761
7129
  }
6762
7130
  } else if (pm_ascii_only_p(contents)) {
6763
7131
  // Ruby stipulates that all source files must use an ASCII-compatible
6764
7132
  // encoding. Thus, all symbols appearing in source are eligible for
6765
7133
  // "downgrading" to US-ASCII.
6766
7134
  return PM_SYMBOL_FLAGS_FORCED_US_ASCII_ENCODING;
7135
+ } else if (validate) {
7136
+ parse_symbol_encoding_validate_other(parser, location, contents);
6767
7137
  }
6768
7138
 
6769
7139
  return 0;
@@ -6931,7 +7301,7 @@ pm_symbol_node_create(pm_parser_t *parser, const pm_token_t *opening, const pm_t
6931
7301
  */
6932
7302
  static pm_symbol_node_t *
6933
7303
  pm_symbol_node_create_current_string(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *value, const pm_token_t *closing) {
6934
- pm_symbol_node_t *node = pm_symbol_node_create_unescaped(parser, opening, value, closing, &parser->current_string, parse_symbol_encoding(parser, &parser->current_string));
7304
+ pm_symbol_node_t *node = pm_symbol_node_create_unescaped(parser, opening, value, closing, &parser->current_string, parse_symbol_encoding(parser, value, &parser->current_string, false));
6935
7305
  parser->current_string = PM_STRING_EMPTY;
6936
7306
  return node;
6937
7307
  }
@@ -6953,7 +7323,7 @@ pm_symbol_node_label_create(pm_parser_t *parser, const pm_token_t *token) {
6953
7323
 
6954
7324
  assert((label.end - label.start) >= 0);
6955
7325
  pm_string_shared_init(&node->unescaped, label.start, label.end);
6956
- pm_node_flag_set((pm_node_t *) node, parse_symbol_encoding(parser, &node->unescaped));
7326
+ pm_node_flag_set((pm_node_t *) node, parse_symbol_encoding(parser, &label, &node->unescaped, false));
6957
7327
 
6958
7328
  break;
6959
7329
  }
@@ -7038,7 +7408,8 @@ pm_string_node_to_symbol_node(pm_parser_t *parser, pm_string_node_t *node, const
7038
7408
  .unescaped = node->unescaped
7039
7409
  };
7040
7410
 
7041
- pm_node_flag_set((pm_node_t *)new_node, parse_symbol_encoding(parser, &node->unescaped));
7411
+ pm_token_t content = { .type = PM_TOKEN_IDENTIFIER, .start = node->content_loc.start, .end = node->content_loc.end };
7412
+ pm_node_flag_set((pm_node_t *) new_node, parse_symbol_encoding(parser, &content, &node->unescaped, true));
7042
7413
 
7043
7414
  // We are explicitly _not_ using pm_node_destroy here because we don't want
7044
7415
  // to trash the unescaped string. We could instead copy the string if we
@@ -7574,7 +7945,7 @@ pm_local_variable_read_node_create_it(pm_parser_t *parser, const pm_token_t *nam
7574
7945
  static pm_node_t *
7575
7946
  pm_node_check_it(pm_parser_t *parser, pm_node_t *node) {
7576
7947
  if (
7577
- (parser->version != PM_OPTIONS_VERSION_CRUBY_3_3_0) &&
7948
+ (parser->version != PM_OPTIONS_VERSION_CRUBY_3_3) &&
7578
7949
  !parser->current_scope->closed &&
7579
7950
  (parser->current_scope->numbered_parameters != PM_SCOPE_NUMBERED_PARAMETERS_DISALLOWED) &&
7580
7951
  pm_node_is_it(parser, node)
@@ -8023,7 +8394,12 @@ parser_lex_magic_comment(pm_parser_t *parser, bool semantic_token_seen) {
8023
8394
  // If we have hit a ractor pragma, attempt to lex that.
8024
8395
  uint32_t value_length = (uint32_t) (value_end - value_start);
8025
8396
  if (key_length == 24 && pm_strncasecmp(key_source, (const uint8_t *) "shareable_constant_value", 24) == 0) {
8026
- if (value_length == 4 && pm_strncasecmp(value_start, (const uint8_t *) "none", 4) == 0) {
8397
+ const uint8_t *cursor = parser->current.start;
8398
+ while ((cursor > parser->start) && ((cursor[-1] == ' ') || (cursor[-1] == '\t'))) cursor--;
8399
+
8400
+ if (!((cursor == parser->start) || (cursor[-1] == '\n'))) {
8401
+ pm_parser_warn_token(parser, &parser->current, PM_WARN_SHAREABLE_CONSTANT_VALUE_LINE);
8402
+ } else if (value_length == 4 && pm_strncasecmp(value_start, (const uint8_t *) "none", 4) == 0) {
8027
8403
  pm_parser_scope_shareable_constant_set(parser, PM_SCOPE_SHAREABLE_CONSTANT_NONE);
8028
8404
  } else if (value_length == 7 && pm_strncasecmp(value_start, (const uint8_t *) "literal", 7) == 0) {
8029
8405
  pm_parser_scope_shareable_constant_set(parser, PM_SCOPE_SHAREABLE_CONSTANT_LITERAL);
@@ -8298,10 +8674,11 @@ context_human(pm_context_t context) {
8298
8674
  /* Specific token lexers */
8299
8675
  /******************************************************************************/
8300
8676
 
8301
- static void
8302
- pm_strspn_number_validate(pm_parser_t *parser, const uint8_t *invalid) {
8677
+ static inline void
8678
+ pm_strspn_number_validate(pm_parser_t *parser, const uint8_t *string, size_t length, const uint8_t *invalid) {
8303
8679
  if (invalid != NULL) {
8304
- pm_parser_err(parser, invalid, invalid + 1, PM_ERR_INVALID_NUMBER_UNDERSCORE);
8680
+ pm_diagnostic_id_t diag_id = (invalid == (string + length - 1)) ? PM_ERR_INVALID_NUMBER_UNDERSCORE_TRAILING : PM_ERR_INVALID_NUMBER_UNDERSCORE_INNER;
8681
+ pm_parser_err(parser, invalid, invalid + 1, diag_id);
8305
8682
  }
8306
8683
  }
8307
8684
 
@@ -8309,7 +8686,7 @@ static size_t
8309
8686
  pm_strspn_binary_number_validate(pm_parser_t *parser, const uint8_t *string) {
8310
8687
  const uint8_t *invalid = NULL;
8311
8688
  size_t length = pm_strspn_binary_number(string, parser->end - string, &invalid);
8312
- pm_strspn_number_validate(parser, invalid);
8689
+ pm_strspn_number_validate(parser, string, length, invalid);
8313
8690
  return length;
8314
8691
  }
8315
8692
 
@@ -8317,7 +8694,7 @@ static size_t
8317
8694
  pm_strspn_octal_number_validate(pm_parser_t *parser, const uint8_t *string) {
8318
8695
  const uint8_t *invalid = NULL;
8319
8696
  size_t length = pm_strspn_octal_number(string, parser->end - string, &invalid);
8320
- pm_strspn_number_validate(parser, invalid);
8697
+ pm_strspn_number_validate(parser, string, length, invalid);
8321
8698
  return length;
8322
8699
  }
8323
8700
 
@@ -8325,7 +8702,7 @@ static size_t
8325
8702
  pm_strspn_decimal_number_validate(pm_parser_t *parser, const uint8_t *string) {
8326
8703
  const uint8_t *invalid = NULL;
8327
8704
  size_t length = pm_strspn_decimal_number(string, parser->end - string, &invalid);
8328
- pm_strspn_number_validate(parser, invalid);
8705
+ pm_strspn_number_validate(parser, string, length, invalid);
8329
8706
  return length;
8330
8707
  }
8331
8708
 
@@ -8333,7 +8710,7 @@ static size_t
8333
8710
  pm_strspn_hexadecimal_number_validate(pm_parser_t *parser, const uint8_t *string) {
8334
8711
  const uint8_t *invalid = NULL;
8335
8712
  size_t length = pm_strspn_hexadecimal_number(string, parser->end - string, &invalid);
8336
- pm_strspn_number_validate(parser, invalid);
8713
+ pm_strspn_number_validate(parser, string, length, invalid);
8337
8714
  return length;
8338
8715
  }
8339
8716
 
@@ -8395,6 +8772,7 @@ lex_numeric_prefix(pm_parser_t *parser, bool* seen_e) {
8395
8772
  if (pm_char_is_decimal_digit(peek(parser))) {
8396
8773
  parser->current.end += pm_strspn_decimal_number_validate(parser, parser->current.end);
8397
8774
  } else {
8775
+ match(parser, '_');
8398
8776
  pm_parser_err_current(parser, PM_ERR_INVALID_NUMBER_DECIMAL);
8399
8777
  }
8400
8778
 
@@ -8407,6 +8785,7 @@ lex_numeric_prefix(pm_parser_t *parser, bool* seen_e) {
8407
8785
  if (pm_char_is_binary_digit(peek(parser))) {
8408
8786
  parser->current.end += pm_strspn_binary_number_validate(parser, parser->current.end);
8409
8787
  } else {
8788
+ match(parser, '_');
8410
8789
  pm_parser_err_current(parser, PM_ERR_INVALID_NUMBER_BINARY);
8411
8790
  }
8412
8791
 
@@ -8420,6 +8799,7 @@ lex_numeric_prefix(pm_parser_t *parser, bool* seen_e) {
8420
8799
  if (pm_char_is_octal_digit(peek(parser))) {
8421
8800
  parser->current.end += pm_strspn_octal_number_validate(parser, parser->current.end);
8422
8801
  } else {
8802
+ match(parser, '_');
8423
8803
  pm_parser_err_current(parser, PM_ERR_INVALID_NUMBER_OCTAL);
8424
8804
  }
8425
8805
 
@@ -8447,6 +8827,7 @@ lex_numeric_prefix(pm_parser_t *parser, bool* seen_e) {
8447
8827
  if (pm_char_is_hexadecimal_digit(peek(parser))) {
8448
8828
  parser->current.end += pm_strspn_hexadecimal_number_validate(parser, parser->current.end);
8449
8829
  } else {
8830
+ match(parser, '_');
8450
8831
  pm_parser_err_current(parser, PM_ERR_INVALID_NUMBER_HEXADECIMAL);
8451
8832
  }
8452
8833
 
@@ -8475,6 +8856,16 @@ lex_numeric_prefix(pm_parser_t *parser, bool* seen_e) {
8475
8856
  type = lex_optional_float_suffix(parser, seen_e);
8476
8857
  }
8477
8858
 
8859
+ // At this point we have a completed number, but we want to provide the user
8860
+ // with a good experience if they put an additional .xxx fractional
8861
+ // component on the end, so we'll check for that here.
8862
+ if (peek_offset(parser, 0) == '.' && pm_char_is_decimal_digit(peek_offset(parser, 1))) {
8863
+ const uint8_t *fraction_start = parser->current.end;
8864
+ const uint8_t *fraction_end = parser->current.end + 2;
8865
+ fraction_end += pm_strspn_decimal_digit(fraction_end, parser->end - fraction_end);
8866
+ pm_parser_err(parser, fraction_start, fraction_end, PM_ERR_INVALID_NUMBER_FRACTION);
8867
+ }
8868
+
8478
8869
  return type;
8479
8870
  }
8480
8871
 
@@ -8567,7 +8958,7 @@ lex_global_variable(pm_parser_t *parser) {
8567
8958
  } while (parser->current.end < parser->end && (width = char_is_identifier(parser, parser->current.end)) > 0);
8568
8959
 
8569
8960
  // $0 isn't allowed to be followed by anything.
8570
- pm_diagnostic_id_t diag_id = parser->version == PM_OPTIONS_VERSION_CRUBY_3_3_0 ? PM_ERR_INVALID_VARIABLE_GLOBAL_3_3_0 : PM_ERR_INVALID_VARIABLE_GLOBAL;
8961
+ pm_diagnostic_id_t diag_id = parser->version == PM_OPTIONS_VERSION_CRUBY_3_3 ? PM_ERR_INVALID_VARIABLE_GLOBAL_3_3 : PM_ERR_INVALID_VARIABLE_GLOBAL;
8571
8962
  PM_PARSER_ERR_TOKEN_FORMAT_CONTENT(parser, parser->current, diag_id);
8572
8963
  }
8573
8964
 
@@ -8603,7 +8994,7 @@ lex_global_variable(pm_parser_t *parser) {
8603
8994
  } else {
8604
8995
  // If we get here, then we have a $ followed by something that
8605
8996
  // isn't recognized as a global variable.
8606
- pm_diagnostic_id_t diag_id = parser->version == PM_OPTIONS_VERSION_CRUBY_3_3_0 ? PM_ERR_INVALID_VARIABLE_GLOBAL_3_3_0 : PM_ERR_INVALID_VARIABLE_GLOBAL;
8997
+ pm_diagnostic_id_t diag_id = parser->version == PM_OPTIONS_VERSION_CRUBY_3_3 ? PM_ERR_INVALID_VARIABLE_GLOBAL_3_3 : PM_ERR_INVALID_VARIABLE_GLOBAL;
8607
8998
  size_t width = parser->encoding->char_width(parser->current.end, parser->end - parser->current.end);
8608
8999
  PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, diag_id, (int) ((parser->current.end + width) - parser->current.start), (const char *) parser->current.start);
8609
9000
  }
@@ -8976,12 +9367,20 @@ escape_hexadecimal_digit(const uint8_t value) {
8976
9367
  * validated.
8977
9368
  */
8978
9369
  static inline uint32_t
8979
- escape_unicode(const uint8_t *string, size_t length) {
9370
+ escape_unicode(pm_parser_t *parser, const uint8_t *string, size_t length) {
8980
9371
  uint32_t value = 0;
8981
9372
  for (size_t index = 0; index < length; index++) {
8982
9373
  if (index != 0) value <<= 4;
8983
9374
  value |= escape_hexadecimal_digit(string[index]);
8984
9375
  }
9376
+
9377
+ // Here we're going to verify that the value is actually a valid Unicode
9378
+ // codepoint and not a surrogate pair.
9379
+ if (value >= 0xD800 && value <= 0xDFFF) {
9380
+ pm_parser_err(parser, string, string + length, PM_ERR_ESCAPE_INVALID_UNICODE);
9381
+ return 0xFFFD;
9382
+ }
9383
+
8985
9384
  return value;
8986
9385
  }
8987
9386
 
@@ -9230,7 +9629,7 @@ escape_read(pm_parser_t *parser, pm_buffer_t *buffer, pm_buffer_t *regular_expre
9230
9629
  pm_buffer_append_bytes(regular_expression_buffer, start, (size_t) (parser->current.end - start));
9231
9630
  }
9232
9631
 
9233
- escape_write_byte_encoded(parser, buffer, value);
9632
+ escape_write_byte_encoded(parser, buffer, escape_byte(value, flags));
9234
9633
  } else {
9235
9634
  pm_parser_err_current(parser, PM_ERR_ESCAPE_INVALID_HEXADECIMAL);
9236
9635
  }
@@ -9241,22 +9640,7 @@ escape_read(pm_parser_t *parser, pm_buffer_t *buffer, pm_buffer_t *regular_expre
9241
9640
  const uint8_t *start = parser->current.end - 1;
9242
9641
  parser->current.end++;
9243
9642
 
9244
- if (
9245
- (parser->current.end + 4 <= parser->end) &&
9246
- pm_char_is_hexadecimal_digit(parser->current.end[0]) &&
9247
- pm_char_is_hexadecimal_digit(parser->current.end[1]) &&
9248
- pm_char_is_hexadecimal_digit(parser->current.end[2]) &&
9249
- pm_char_is_hexadecimal_digit(parser->current.end[3])
9250
- ) {
9251
- uint32_t value = escape_unicode(parser->current.end, 4);
9252
-
9253
- if (flags & PM_ESCAPE_FLAG_REGEXP) {
9254
- pm_buffer_append_bytes(regular_expression_buffer, start, (size_t) (parser->current.end + 4 - start));
9255
- }
9256
- escape_write_unicode(parser, buffer, flags, start, parser->current.end + 4, value);
9257
-
9258
- parser->current.end += 4;
9259
- } else if (peek(parser) == '{') {
9643
+ if (peek(parser) == '{') {
9260
9644
  const uint8_t *unicode_codepoints_start = parser->current.end - 2;
9261
9645
 
9262
9646
  parser->current.end++;
@@ -9284,7 +9668,7 @@ escape_read(pm_parser_t *parser, pm_buffer_t *buffer, pm_buffer_t *regular_expre
9284
9668
  extra_codepoints_start = unicode_start;
9285
9669
  }
9286
9670
 
9287
- uint32_t value = escape_unicode(unicode_start, hexadecimal_length);
9671
+ uint32_t value = escape_unicode(parser, unicode_start, hexadecimal_length);
9288
9672
  escape_write_unicode(parser, buffer, flags, unicode_start, parser->current.end, value);
9289
9673
 
9290
9674
  parser->current.end += pm_strspn_whitespace(parser->current.end, parser->end - parser->current.end);
@@ -9306,7 +9690,25 @@ escape_read(pm_parser_t *parser, pm_buffer_t *buffer, pm_buffer_t *regular_expre
9306
9690
  pm_buffer_append_bytes(regular_expression_buffer, unicode_codepoints_start, (size_t) (parser->current.end - unicode_codepoints_start));
9307
9691
  }
9308
9692
  } else {
9309
- pm_parser_err_current(parser, PM_ERR_ESCAPE_INVALID_UNICODE);
9693
+ size_t length = pm_strspn_hexadecimal_digit(parser->current.end, MIN(parser->end - parser->current.end, 4));
9694
+
9695
+ if (length == 4) {
9696
+ uint32_t value = escape_unicode(parser, parser->current.end, 4);
9697
+
9698
+ if (flags & PM_ESCAPE_FLAG_REGEXP) {
9699
+ pm_buffer_append_bytes(regular_expression_buffer, start, (size_t) (parser->current.end + 4 - start));
9700
+ }
9701
+
9702
+ escape_write_unicode(parser, buffer, flags, start, parser->current.end + 4, value);
9703
+ parser->current.end += 4;
9704
+ } else {
9705
+ parser->current.end += length;
9706
+ pm_parser_err_current(parser, PM_ERR_ESCAPE_INVALID_UNICODE);
9707
+ }
9708
+ }
9709
+
9710
+ if (flags & (PM_ESCAPE_FLAG_CONTROL | PM_ESCAPE_FLAG_META)) {
9711
+ pm_parser_err(parser, start, parser->current.end, PM_ERR_INVALID_ESCAPE_CHARACTER);
9310
9712
  }
9311
9713
 
9312
9714
  return;
@@ -9560,8 +9962,8 @@ lex_at_variable(pm_parser_t *parser) {
9560
9962
  }
9561
9963
  } else if (parser->current.end < parser->end && pm_char_is_decimal_digit(*parser->current.end)) {
9562
9964
  pm_diagnostic_id_t diag_id = (type == PM_TOKEN_CLASS_VARIABLE) ? PM_ERR_INCOMPLETE_VARIABLE_CLASS : PM_ERR_INCOMPLETE_VARIABLE_INSTANCE;
9563
- if (parser->version == PM_OPTIONS_VERSION_CRUBY_3_3_0) {
9564
- diag_id = (type == PM_TOKEN_CLASS_VARIABLE) ? PM_ERR_INCOMPLETE_VARIABLE_CLASS_3_3_0 : PM_ERR_INCOMPLETE_VARIABLE_INSTANCE_3_3_0;
9965
+ if (parser->version == PM_OPTIONS_VERSION_CRUBY_3_3) {
9966
+ diag_id = (type == PM_TOKEN_CLASS_VARIABLE) ? PM_ERR_INCOMPLETE_VARIABLE_CLASS_3_3 : PM_ERR_INCOMPLETE_VARIABLE_INSTANCE_3_3;
9565
9967
  }
9566
9968
 
9567
9969
  size_t width = parser->encoding->char_width(parser->current.end, parser->end - parser->current.end);
@@ -10545,8 +10947,11 @@ parser_lex(pm_parser_t *parser) {
10545
10947
  }
10546
10948
 
10547
10949
  size_t ident_length = (size_t) (parser->current.end - ident_start);
10950
+ bool ident_error = false;
10951
+
10548
10952
  if (quote != PM_HEREDOC_QUOTE_NONE && !match(parser, (uint8_t) quote)) {
10549
- // TODO: handle unterminated heredoc
10953
+ pm_parser_err(parser, ident_start, ident_start + ident_length, PM_ERR_HEREDOC_IDENTIFIER);
10954
+ ident_error = true;
10550
10955
  }
10551
10956
 
10552
10957
  parser->explicit_encoding = NULL;
@@ -10571,7 +10976,7 @@ parser_lex(pm_parser_t *parser) {
10571
10976
  // this is not a valid heredoc declaration. In this case we
10572
10977
  // will add an error, but we will still return a heredoc
10573
10978
  // start.
10574
- pm_parser_err_current(parser, PM_ERR_HEREDOC_TERM);
10979
+ if (!ident_error) pm_parser_err_heredoc_term(parser, parser->lex_modes.current);
10575
10980
  body_start = parser->end;
10576
10981
  } else {
10577
10982
  // Otherwise, we want to indicate that the body of the
@@ -11898,7 +12303,7 @@ parser_lex(pm_parser_t *parser) {
11898
12303
  // terminator) but still continue parsing so that content after the
11899
12304
  // declaration of the heredoc can be parsed.
11900
12305
  if (parser->current.end >= parser->end) {
11901
- pm_parser_err_current(parser, PM_ERR_HEREDOC_TERM);
12306
+ pm_parser_err_heredoc_term(parser, lex_mode);
11902
12307
  parser->next_start = lex_mode->as.heredoc.next_start;
11903
12308
  parser->heredoc_end = parser->current.end;
11904
12309
  lex_state_set(parser, PM_LEX_STATE_END);
@@ -12537,6 +12942,23 @@ expect3(pm_parser_t *parser, pm_token_type_t type1, pm_token_type_t type2, pm_to
12537
12942
  parser->previous.type = PM_TOKEN_MISSING;
12538
12943
  }
12539
12944
 
12945
+ /**
12946
+ * A special expect1 that expects a heredoc terminator and handles popping the
12947
+ * lex mode accordingly.
12948
+ */
12949
+ static void
12950
+ expect1_heredoc_term(pm_parser_t *parser, pm_lex_mode_t *lex_mode) {
12951
+ if (match1(parser, PM_TOKEN_HEREDOC_END)) {
12952
+ lex_mode_pop(parser);
12953
+ parser_lex(parser);
12954
+ } else {
12955
+ pm_parser_err_heredoc_term(parser, lex_mode);
12956
+ lex_mode_pop(parser);
12957
+ parser->previous.start = parser->previous.end;
12958
+ parser->previous.type = PM_TOKEN_MISSING;
12959
+ }
12960
+ }
12961
+
12540
12962
  static pm_node_t *
12541
12963
  parse_expression(pm_parser_t *parser, pm_binding_power_t binding_power, bool accepts_command_call, pm_diagnostic_id_t diag_id);
12542
12964
 
@@ -12664,25 +13086,72 @@ parse_write_name(pm_parser_t *parser, pm_constant_id_t *name_field) {
12664
13086
  *name_field = pm_constant_pool_insert_owned(&parser->constant_pool, name, length + 1);
12665
13087
  }
12666
13088
 
13089
+ /**
13090
+ * Certain expressions are not targetable, but in order to provide a better
13091
+ * experience we give a specific error message. In order to maintain as much
13092
+ * information in the tree as possible, we replace them with local variable
13093
+ * writes.
13094
+ */
13095
+ static pm_node_t *
13096
+ parse_unwriteable_target(pm_parser_t *parser, pm_node_t *target) {
13097
+ switch (PM_NODE_TYPE(target)) {
13098
+ case PM_SOURCE_ENCODING_NODE: pm_parser_err_node(parser, target, PM_ERR_EXPRESSION_NOT_WRITABLE_ENCODING); break;
13099
+ case PM_FALSE_NODE: pm_parser_err_node(parser, target, PM_ERR_EXPRESSION_NOT_WRITABLE_FALSE); break;
13100
+ case PM_SOURCE_FILE_NODE: pm_parser_err_node(parser, target, PM_ERR_EXPRESSION_NOT_WRITABLE_FILE); break;
13101
+ case PM_SOURCE_LINE_NODE: pm_parser_err_node(parser, target, PM_ERR_EXPRESSION_NOT_WRITABLE_LINE); break;
13102
+ case PM_NIL_NODE: pm_parser_err_node(parser, target, PM_ERR_EXPRESSION_NOT_WRITABLE_NIL); break;
13103
+ case PM_SELF_NODE: pm_parser_err_node(parser, target, PM_ERR_EXPRESSION_NOT_WRITABLE_SELF); break;
13104
+ case PM_TRUE_NODE: pm_parser_err_node(parser, target, PM_ERR_EXPRESSION_NOT_WRITABLE_TRUE); break;
13105
+ default: break;
13106
+ }
13107
+
13108
+ pm_constant_id_t name = pm_parser_constant_id_location(parser, target->location.start, target->location.end);
13109
+ pm_local_variable_target_node_t *result = pm_local_variable_target_node_create(parser, &target->location, name, 0);
13110
+
13111
+ pm_node_destroy(parser, target);
13112
+ return (pm_node_t *) result;
13113
+ }
13114
+
12667
13115
  /**
12668
13116
  * Convert the given node into a valid target node.
12669
13117
  */
12670
13118
  static pm_node_t *
12671
- parse_target(pm_parser_t *parser, pm_node_t *target) {
13119
+ parse_target(pm_parser_t *parser, pm_node_t *target, bool multiple) {
12672
13120
  switch (PM_NODE_TYPE(target)) {
12673
13121
  case PM_MISSING_NODE:
12674
13122
  return target;
13123
+ case PM_SOURCE_ENCODING_NODE:
13124
+ case PM_FALSE_NODE:
13125
+ case PM_SOURCE_FILE_NODE:
13126
+ case PM_SOURCE_LINE_NODE:
13127
+ case PM_NIL_NODE:
13128
+ case PM_SELF_NODE:
13129
+ case PM_TRUE_NODE: {
13130
+ // In these special cases, we have specific error messages and we
13131
+ // will replace them with local variable writes.
13132
+ return parse_unwriteable_target(parser, target);
13133
+ }
12675
13134
  case PM_CLASS_VARIABLE_READ_NODE:
12676
13135
  assert(sizeof(pm_class_variable_target_node_t) == sizeof(pm_class_variable_read_node_t));
12677
13136
  target->type = PM_CLASS_VARIABLE_TARGET_NODE;
12678
13137
  return target;
12679
13138
  case PM_CONSTANT_PATH_NODE:
13139
+ if (context_def_p(parser)) {
13140
+ pm_parser_err_node(parser, target, PM_ERR_WRITE_TARGET_IN_METHOD);
13141
+ }
13142
+
12680
13143
  assert(sizeof(pm_constant_path_target_node_t) == sizeof(pm_constant_path_node_t));
12681
13144
  target->type = PM_CONSTANT_PATH_TARGET_NODE;
13145
+
12682
13146
  return target;
12683
13147
  case PM_CONSTANT_READ_NODE:
13148
+ if (context_def_p(parser)) {
13149
+ pm_parser_err_node(parser, target, PM_ERR_WRITE_TARGET_IN_METHOD);
13150
+ }
13151
+
12684
13152
  assert(sizeof(pm_constant_target_node_t) == sizeof(pm_constant_read_node_t));
12685
13153
  target->type = PM_CONSTANT_TARGET_NODE;
13154
+
12686
13155
  return target;
12687
13156
  case PM_BACK_REFERENCE_READ_NODE:
12688
13157
  case PM_NUMBERED_REFERENCE_READ_NODE:
@@ -12715,7 +13184,7 @@ parse_target(pm_parser_t *parser, pm_node_t *target) {
12715
13184
  pm_splat_node_t *splat = (pm_splat_node_t *) target;
12716
13185
 
12717
13186
  if (splat->expression != NULL) {
12718
- splat->expression = parse_target(parser, splat->expression);
13187
+ splat->expression = parse_target(parser, splat->expression, multiple);
12719
13188
  }
12720
13189
 
12721
13190
  return (pm_node_t *) splat;
@@ -12753,6 +13222,10 @@ parse_target(pm_parser_t *parser, pm_node_t *target) {
12753
13222
  }
12754
13223
 
12755
13224
  if (*call->message_loc.start == '_' || parser->encoding->alnum_char(call->message_loc.start, call->message_loc.end - call->message_loc.start)) {
13225
+ if (multiple && PM_NODE_FLAG_P(call, PM_CALL_NODE_FLAGS_SAFE_NAVIGATION)) {
13226
+ pm_parser_err_node(parser, (const pm_node_t *) call, PM_ERR_UNEXPECTED_SAFE_NAVIGATION);
13227
+ }
13228
+
12756
13229
  parse_write_name(parser, &call->name);
12757
13230
  return (pm_node_t *) pm_call_target_node_create(parser, call);
12758
13231
  }
@@ -12780,8 +13253,8 @@ parse_target(pm_parser_t *parser, pm_node_t *target) {
12780
13253
  * assignment.
12781
13254
  */
12782
13255
  static pm_node_t *
12783
- parse_target_validate(pm_parser_t *parser, pm_node_t *target) {
12784
- pm_node_t *result = parse_target(parser, target);
13256
+ parse_target_validate(pm_parser_t *parser, pm_node_t *target, bool multiple) {
13257
+ pm_node_t *result = parse_target(parser, target, multiple);
12785
13258
 
12786
13259
  // Ensure that we have one of an =, an 'in' in for indexes, and a ')' in parens after the targets.
12787
13260
  if (
@@ -12826,13 +13299,20 @@ parse_write(pm_parser_t *parser, pm_node_t *target, pm_token_t *operator, pm_nod
12826
13299
  }
12827
13300
  case PM_CONSTANT_PATH_NODE: {
12828
13301
  pm_node_t *node = (pm_node_t *) pm_constant_path_write_node_create(parser, (pm_constant_path_node_t *) target, operator, value);
13302
+
13303
+ if (context_def_p(parser)) {
13304
+ pm_parser_err_node(parser, node, PM_ERR_WRITE_TARGET_IN_METHOD);
13305
+ }
13306
+
12829
13307
  return parse_shareable_constant_write(parser, node);
12830
13308
  }
12831
13309
  case PM_CONSTANT_READ_NODE: {
12832
13310
  pm_node_t *node = (pm_node_t *) pm_constant_write_node_create(parser, (pm_constant_read_node_t *) target, operator, value);
13311
+
12833
13312
  if (context_def_p(parser)) {
12834
13313
  pm_parser_err_node(parser, node, PM_ERR_WRITE_TARGET_IN_METHOD);
12835
13314
  }
13315
+
12836
13316
  pm_node_destroy(parser, target);
12837
13317
  return parse_shareable_constant_write(parser, node);
12838
13318
  }
@@ -13011,7 +13491,7 @@ parse_targets(pm_parser_t *parser, pm_node_t *first_target, pm_binding_power_t b
13011
13491
  bool has_rest = PM_NODE_TYPE_P(first_target, PM_SPLAT_NODE);
13012
13492
 
13013
13493
  pm_multi_target_node_t *result = pm_multi_target_node_create(parser);
13014
- pm_multi_target_node_targets_append(parser, result, parse_target(parser, first_target));
13494
+ pm_multi_target_node_targets_append(parser, result, parse_target(parser, first_target, true));
13015
13495
 
13016
13496
  while (accept1(parser, PM_TOKEN_COMMA)) {
13017
13497
  if (accept1(parser, PM_TOKEN_USTAR)) {
@@ -13027,7 +13507,7 @@ parse_targets(pm_parser_t *parser, pm_node_t *first_target, pm_binding_power_t b
13027
13507
 
13028
13508
  if (token_begins_expression_p(parser->current.type)) {
13029
13509
  name = parse_expression(parser, binding_power, false, PM_ERR_EXPECT_EXPRESSION_AFTER_STAR);
13030
- name = parse_target(parser, name);
13510
+ name = parse_target(parser, name, true);
13031
13511
  }
13032
13512
 
13033
13513
  pm_node_t *splat = (pm_node_t *) pm_splat_node_create(parser, &star_operator, name);
@@ -13035,7 +13515,7 @@ parse_targets(pm_parser_t *parser, pm_node_t *first_target, pm_binding_power_t b
13035
13515
  has_rest = true;
13036
13516
  } else if (token_begins_expression_p(parser->current.type)) {
13037
13517
  pm_node_t *target = parse_expression(parser, binding_power, false, PM_ERR_EXPECT_EXPRESSION_AFTER_COMMA);
13038
- target = parse_target(parser, target);
13518
+ target = parse_target(parser, target, true);
13039
13519
 
13040
13520
  pm_multi_target_node_targets_append(parser, result, target);
13041
13521
  } else if (!match1(parser, PM_TOKEN_EOF)) {
@@ -13152,11 +13632,11 @@ parse_statements(pm_parser_t *parser, pm_context_t context) {
13152
13632
  */
13153
13633
  static void
13154
13634
  pm_hash_key_static_literals_add(pm_parser_t *parser, pm_static_literals_t *literals, pm_node_t *node) {
13155
- const pm_node_t *duplicated = pm_static_literals_add(parser, literals, node);
13635
+ const pm_node_t *duplicated = pm_static_literals_add(&parser->newline_list, parser->start_line, literals, node);
13156
13636
 
13157
13637
  if (duplicated != NULL) {
13158
13638
  pm_buffer_t buffer = { 0 };
13159
- pm_static_literal_inspect(&buffer, parser, duplicated);
13639
+ pm_static_literal_inspect(&buffer, &parser->newline_list, parser->start_line, parser->encoding->name, duplicated);
13160
13640
 
13161
13641
  pm_diagnostic_list_append_format(
13162
13642
  &parser->warning_list,
@@ -13178,7 +13658,7 @@ pm_hash_key_static_literals_add(pm_parser_t *parser, pm_static_literals_t *liter
13178
13658
  */
13179
13659
  static void
13180
13660
  pm_when_clause_static_literals_add(pm_parser_t *parser, pm_static_literals_t *literals, pm_node_t *node) {
13181
- if (pm_static_literals_add(parser, literals, node) != NULL) {
13661
+ if (pm_static_literals_add(&parser->newline_list, parser->start_line, literals, node) != NULL) {
13182
13662
  pm_diagnostic_list_append_format(
13183
13663
  &parser->warning_list,
13184
13664
  node->location.start,
@@ -13206,10 +13686,16 @@ parse_assocs(pm_parser_t *parser, pm_static_literals_t *literals, pm_node_t *nod
13206
13686
  pm_token_t operator = parser->previous;
13207
13687
  pm_node_t *value = NULL;
13208
13688
 
13209
- if (token_begins_expression_p(parser->current.type)) {
13689
+ if (match1(parser, PM_TOKEN_BRACE_LEFT)) {
13690
+ // If we're about to parse a nested hash that is being
13691
+ // pushed into this hash directly with **, then we want the
13692
+ // inner hash to share the static literals with the outer
13693
+ // hash.
13694
+ parser->current_hash_keys = literals;
13210
13695
  value = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, false, PM_ERR_EXPECT_EXPRESSION_AFTER_SPLAT_HASH);
13211
- }
13212
- else {
13696
+ } else if (token_begins_expression_p(parser->current.type)) {
13697
+ value = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, false, PM_ERR_EXPECT_EXPRESSION_AFTER_SPLAT_HASH);
13698
+ } else {
13213
13699
  pm_parser_scope_forwarding_keywords_check(parser, &operator);
13214
13700
  }
13215
13701
 
@@ -13360,15 +13846,16 @@ parse_arguments(pm_parser_t *parser, pm_arguments_t *arguments, bool accepts_for
13360
13846
  pm_keyword_hash_node_t *hash = pm_keyword_hash_node_create(parser);
13361
13847
  argument = (pm_node_t *) hash;
13362
13848
 
13363
- pm_static_literals_t literals = { 0 };
13364
- bool contains_keyword_splat = parse_assocs(parser, &literals, (pm_node_t *) hash);
13849
+ pm_static_literals_t hash_keys = { 0 };
13850
+ bool contains_keyword_splat = parse_assocs(parser, &hash_keys, (pm_node_t *) hash);
13365
13851
 
13366
13852
  parse_arguments_append(parser, arguments, argument);
13367
- if (contains_keyword_splat) {
13368
- pm_node_flag_set((pm_node_t *)arguments->arguments, PM_ARGUMENTS_NODE_FLAGS_CONTAINS_KEYWORD_SPLAT);
13369
- }
13370
13853
 
13371
- pm_static_literals_free(&literals);
13854
+ pm_node_flags_t flags = PM_ARGUMENTS_NODE_FLAGS_CONTAINS_KEYWORDS;
13855
+ if (contains_keyword_splat) flags |= PM_ARGUMENTS_NODE_FLAGS_CONTAINS_KEYWORD_SPLAT;
13856
+ pm_node_flag_set((pm_node_t *) arguments->arguments, flags);
13857
+
13858
+ pm_static_literals_free(&hash_keys);
13372
13859
  parsed_bare_hash = true;
13373
13860
 
13374
13861
  break;
@@ -13444,7 +13931,9 @@ parse_arguments(pm_parser_t *parser, pm_arguments_t *arguments, bool accepts_for
13444
13931
  argument = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, !parsed_first_argument, PM_ERR_EXPECT_ARGUMENT);
13445
13932
  }
13446
13933
 
13934
+ bool contains_keywords = false;
13447
13935
  bool contains_keyword_splat = false;
13936
+
13448
13937
  if (pm_symbol_node_label_p(argument) || accept1(parser, PM_TOKEN_EQUAL_GREATER)) {
13449
13938
  if (parsed_bare_hash) {
13450
13939
  pm_parser_err_previous(parser, PM_ERR_ARGUMENT_BARE_HASH);
@@ -13458,10 +13947,11 @@ parse_arguments(pm_parser_t *parser, pm_arguments_t *arguments, bool accepts_for
13458
13947
  }
13459
13948
 
13460
13949
  pm_keyword_hash_node_t *bare_hash = pm_keyword_hash_node_create(parser);
13950
+ contains_keywords = true;
13461
13951
 
13462
13952
  // Create the set of static literals for this hash.
13463
- pm_static_literals_t literals = { 0 };
13464
- pm_hash_key_static_literals_add(parser, &literals, argument);
13953
+ pm_static_literals_t hash_keys = { 0 };
13954
+ pm_hash_key_static_literals_add(parser, &hash_keys, argument);
13465
13955
 
13466
13956
  // Finish parsing the one we are part way through.
13467
13957
  pm_node_t *value = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, false, PM_ERR_HASH_VALUE);
@@ -13475,10 +13965,10 @@ parse_arguments(pm_parser_t *parser, pm_arguments_t *arguments, bool accepts_for
13475
13965
  token_begins_expression_p(parser->current.type) ||
13476
13966
  match2(parser, PM_TOKEN_USTAR_STAR, PM_TOKEN_LABEL)
13477
13967
  )) {
13478
- contains_keyword_splat = parse_assocs(parser, &literals, (pm_node_t *) bare_hash);
13968
+ contains_keyword_splat = parse_assocs(parser, &hash_keys, (pm_node_t *) bare_hash);
13479
13969
  }
13480
13970
 
13481
- pm_static_literals_free(&literals);
13971
+ pm_static_literals_free(&hash_keys);
13482
13972
  parsed_bare_hash = true;
13483
13973
  } else if (accept1(parser, PM_TOKEN_KEYWORD_IN)) {
13484
13974
  // TODO: Could we solve this with binding powers instead?
@@ -13486,9 +13976,12 @@ parse_arguments(pm_parser_t *parser, pm_arguments_t *arguments, bool accepts_for
13486
13976
  }
13487
13977
 
13488
13978
  parse_arguments_append(parser, arguments, argument);
13489
- if (contains_keyword_splat) {
13490
- pm_node_flag_set((pm_node_t *)arguments->arguments, PM_ARGUMENTS_NODE_FLAGS_CONTAINS_KEYWORD_SPLAT);
13491
- }
13979
+
13980
+ pm_node_flags_t flags = 0;
13981
+ if (contains_keywords) flags |= PM_ARGUMENTS_NODE_FLAGS_CONTAINS_KEYWORDS;
13982
+ if (contains_keyword_splat) flags |= PM_ARGUMENTS_NODE_FLAGS_CONTAINS_KEYWORD_SPLAT;
13983
+ pm_node_flag_set((pm_node_t *) arguments->arguments, flags);
13984
+
13492
13985
  break;
13493
13986
  }
13494
13987
  }
@@ -13601,7 +14094,6 @@ typedef enum {
13601
14094
  PM_PARAMETERS_ORDER_OPTIONAL,
13602
14095
  PM_PARAMETERS_ORDER_NAMED,
13603
14096
  PM_PARAMETERS_ORDER_NONE,
13604
-
13605
14097
  } pm_parameters_order_t;
13606
14098
 
13607
14099
  /**
@@ -13626,31 +14118,37 @@ static pm_parameters_order_t parameters_ordering[PM_TOKEN_MAXIMUM] = {
13626
14118
  * Check if current parameter follows valid parameters ordering. If not it adds
13627
14119
  * an error to the list without stopping the parsing, otherwise sets the
13628
14120
  * parameters state to the one corresponding to the current parameter.
14121
+ *
14122
+ * It returns true if it was successful, and false otherwise.
13629
14123
  */
13630
- static void
14124
+ static bool
13631
14125
  update_parameter_state(pm_parser_t *parser, pm_token_t *token, pm_parameters_order_t *current) {
13632
14126
  pm_parameters_order_t state = parameters_ordering[token->type];
13633
- if (state == PM_PARAMETERS_NO_CHANGE) return;
14127
+ if (state == PM_PARAMETERS_NO_CHANGE) return true;
13634
14128
 
13635
14129
  // If we see another ordered argument after a optional argument
13636
14130
  // we only continue parsing ordered arguments until we stop seeing ordered arguments.
13637
14131
  if (*current == PM_PARAMETERS_ORDER_OPTIONAL && state == PM_PARAMETERS_ORDER_NAMED) {
13638
14132
  *current = PM_PARAMETERS_ORDER_AFTER_OPTIONAL;
13639
- return;
14133
+ return true;
13640
14134
  } else if (*current == PM_PARAMETERS_ORDER_AFTER_OPTIONAL && state == PM_PARAMETERS_ORDER_NAMED) {
13641
- return;
14135
+ return true;
13642
14136
  }
13643
14137
 
13644
14138
  if (token->type == PM_TOKEN_USTAR && *current == PM_PARAMETERS_ORDER_AFTER_OPTIONAL) {
13645
14139
  pm_parser_err_token(parser, token, PM_ERR_PARAMETER_STAR);
13646
- }
13647
-
13648
- if (*current == PM_PARAMETERS_ORDER_NOTHING_AFTER || state > *current) {
14140
+ return false;
14141
+ } else if (token->type == PM_TOKEN_UDOT_DOT_DOT && (*current >= PM_PARAMETERS_ORDER_KEYWORDS_REST && *current <= PM_PARAMETERS_ORDER_AFTER_OPTIONAL)) {
14142
+ pm_parser_err_token(parser, token, *current == PM_PARAMETERS_ORDER_AFTER_OPTIONAL ? PM_ERR_PARAMETER_FORWARDING_AFTER_REST : PM_ERR_PARAMETER_ORDER);
14143
+ return false;
14144
+ } else if (*current == PM_PARAMETERS_ORDER_NOTHING_AFTER || state > *current) {
13649
14145
  // We know what transition we failed on, so we can provide a better error here.
13650
14146
  pm_parser_err_token(parser, token, PM_ERR_PARAMETER_ORDER);
13651
- } else if (state < *current) {
13652
- *current = state;
14147
+ return false;
13653
14148
  }
14149
+
14150
+ if (state < *current) *current = state;
14151
+ return true;
13654
14152
  }
13655
14153
 
13656
14154
  /**
@@ -13719,27 +14217,22 @@ parse_parameters(
13719
14217
  pm_parser_err_current(parser, PM_ERR_ARGUMENT_NO_FORWARDING_ELLIPSES);
13720
14218
  }
13721
14219
 
13722
- if (order > PM_PARAMETERS_ORDER_NOTHING_AFTER) {
13723
- update_parameter_state(parser, &parser->current, &order);
13724
- parser_lex(parser);
14220
+ bool succeeded = update_parameter_state(parser, &parser->current, &order);
14221
+ parser_lex(parser);
13725
14222
 
13726
- parser->current_scope->parameters |= PM_SCOPE_PARAMETERS_FORWARDING_ALL;
14223
+ parser->current_scope->parameters |= PM_SCOPE_PARAMETERS_FORWARDING_ALL;
14224
+ pm_forwarding_parameter_node_t *param = pm_forwarding_parameter_node_create(parser, &parser->previous);
13727
14225
 
13728
- pm_forwarding_parameter_node_t *param = pm_forwarding_parameter_node_create(parser, &parser->previous);
13729
- if (params->keyword_rest != NULL) {
13730
- // If we already have a keyword rest parameter, then we replace it with the
13731
- // forwarding parameter and move the keyword rest parameter to the posts list.
13732
- pm_node_t *keyword_rest = params->keyword_rest;
13733
- pm_parameters_node_posts_append(params, keyword_rest);
13734
- pm_parser_err_previous(parser, PM_ERR_PARAMETER_UNEXPECTED_FWD);
13735
- params->keyword_rest = NULL;
13736
- }
13737
- pm_parameters_node_keyword_rest_set(params, (pm_node_t *)param);
13738
- } else {
13739
- update_parameter_state(parser, &parser->current, &order);
13740
- parser_lex(parser);
14226
+ if (params->keyword_rest != NULL) {
14227
+ // If we already have a keyword rest parameter, then we replace it with the
14228
+ // forwarding parameter and move the keyword rest parameter to the posts list.
14229
+ pm_node_t *keyword_rest = params->keyword_rest;
14230
+ pm_parameters_node_posts_append(params, keyword_rest);
14231
+ if (succeeded) pm_parser_err_previous(parser, PM_ERR_PARAMETER_UNEXPECTED_FWD);
14232
+ params->keyword_rest = NULL;
13741
14233
  }
13742
14234
 
14235
+ pm_parameters_node_keyword_rest_set(params, (pm_node_t *) param);
13743
14236
  break;
13744
14237
  }
13745
14238
  case PM_TOKEN_CLASS_VARIABLE:
@@ -13834,6 +14327,12 @@ parse_parameters(
13834
14327
  pm_token_t local = name;
13835
14328
  local.end -= 1;
13836
14329
 
14330
+ if (parser->encoding_changed ? parser->encoding->isupper_char(local.start, local.end - local.start) : pm_encoding_utf_8_isupper_char(local.start, local.end - local.start)) {
14331
+ pm_parser_err(parser, local.start, local.end, PM_ERR_ARGUMENT_FORMAL_CONSTANT);
14332
+ } else if (local.end[-1] == '!' || local.end[-1] == '?') {
14333
+ PM_PARSER_ERR_TOKEN_FORMAT_CONTENT(parser, local, PM_ERR_INVALID_LOCAL_VARIABLE_WRITE);
14334
+ }
14335
+
13837
14336
  bool repeated = pm_parser_parameter_name_check(parser, &local);
13838
14337
  pm_parser_local_add_token(parser, &local, 1);
13839
14338
 
@@ -13909,6 +14408,7 @@ parse_parameters(
13909
14408
  pm_token_t operator = parser->previous;
13910
14409
  pm_token_t name;
13911
14410
  bool repeated = false;
14411
+
13912
14412
  if (accept1(parser, PM_TOKEN_IDENTIFIER)) {
13913
14413
  name = parser->previous;
13914
14414
  repeated = pm_parser_parameter_name_check(parser, &name);
@@ -13922,6 +14422,7 @@ parse_parameters(
13922
14422
  if (repeated) {
13923
14423
  pm_node_flag_set_repeated_parameter(param);
13924
14424
  }
14425
+
13925
14426
  if (params->rest == NULL) {
13926
14427
  pm_parameters_node_rest_set(params, param);
13927
14428
  } else {
@@ -13933,6 +14434,7 @@ parse_parameters(
13933
14434
  }
13934
14435
  case PM_TOKEN_STAR_STAR:
13935
14436
  case PM_TOKEN_USTAR_STAR: {
14437
+ pm_parameters_order_t previous_order = order;
13936
14438
  update_parameter_state(parser, &parser->current, &order);
13937
14439
  parser_lex(parser);
13938
14440
 
@@ -13940,6 +14442,10 @@ parse_parameters(
13940
14442
  pm_node_t *param;
13941
14443
 
13942
14444
  if (accept1(parser, PM_TOKEN_KEYWORD_NIL)) {
14445
+ if (previous_order <= PM_PARAMETERS_ORDER_KEYWORDS) {
14446
+ pm_parser_err_previous(parser, PM_ERR_PARAMETER_UNEXPECTED_NO_KW);
14447
+ }
14448
+
13943
14449
  param = (pm_node_t *) pm_no_keywords_parameter_node_create(parser, &operator, &parser->previous);
13944
14450
  } else {
13945
14451
  pm_token_t name;
@@ -14037,7 +14543,7 @@ parse_rescues(pm_parser_t *parser, pm_begin_node_t *parent_node, pm_rescues_type
14037
14543
  pm_rescue_node_operator_set(rescue, &parser->previous);
14038
14544
 
14039
14545
  pm_node_t *reference = parse_expression(parser, PM_BINDING_POWER_INDEX, false, PM_ERR_RESCUE_VARIABLE);
14040
- reference = parse_target(parser, reference);
14546
+ reference = parse_target(parser, reference, false);
14041
14547
 
14042
14548
  pm_rescue_node_reference_set(rescue, reference);
14043
14549
  break;
@@ -14067,7 +14573,7 @@ parse_rescues(pm_parser_t *parser, pm_begin_node_t *parent_node, pm_rescues_type
14067
14573
  pm_rescue_node_operator_set(rescue, &parser->previous);
14068
14574
 
14069
14575
  pm_node_t *reference = parse_expression(parser, PM_BINDING_POWER_INDEX, false, PM_ERR_RESCUE_VARIABLE);
14070
- reference = parse_target(parser, reference);
14576
+ reference = parse_target(parser, reference, false);
14071
14577
 
14072
14578
  pm_rescue_node_reference_set(rescue, reference);
14073
14579
  break;
@@ -14391,7 +14897,7 @@ parse_arguments_list(pm_parser_t *parser, pm_arguments_t *arguments, bool accept
14391
14897
  arguments->closing_loc = PM_LOCATION_TOKEN_VALUE(&parser->previous);
14392
14898
  } else {
14393
14899
  pm_accepts_block_stack_push(parser, true);
14394
- parse_arguments(parser, arguments, true, PM_TOKEN_PARENTHESIS_RIGHT);
14900
+ parse_arguments(parser, arguments, accepts_block, PM_TOKEN_PARENTHESIS_RIGHT);
14395
14901
 
14396
14902
  if (!accept1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) {
14397
14903
  PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_ARGUMENT_TERM_PAREN, pm_token_type_human(parser->current.type));
@@ -14409,7 +14915,7 @@ parse_arguments_list(pm_parser_t *parser, pm_arguments_t *arguments, bool accept
14409
14915
  // If we get here, then the subsequent token cannot be used as an infix
14410
14916
  // operator. In this case we assume the subsequent token is part of an
14411
14917
  // argument to this method call.
14412
- parse_arguments(parser, arguments, true, PM_TOKEN_EOF);
14918
+ parse_arguments(parser, arguments, accepts_block, PM_TOKEN_EOF);
14413
14919
 
14414
14920
  // If we have done with the arguments and still not consumed the comma,
14415
14921
  // then we have a trailing comma where we need to check whether it is
@@ -14440,11 +14946,8 @@ parse_arguments_list(pm_parser_t *parser, pm_arguments_t *arguments, bool accept
14440
14946
  if (arguments->block == NULL && !arguments->has_forwarding) {
14441
14947
  arguments->block = (pm_node_t *) block;
14442
14948
  } else {
14443
- if (arguments->has_forwarding) {
14444
- pm_parser_err_node(parser, (pm_node_t *) block, PM_ERR_ARGUMENT_BLOCK_FORWARDING);
14445
- } else {
14446
- pm_parser_err_node(parser, (pm_node_t *) block, PM_ERR_ARGUMENT_BLOCK_MULTI);
14447
- }
14949
+ pm_parser_err_node(parser, (pm_node_t *) block, PM_ERR_ARGUMENT_BLOCK_MULTI);
14950
+
14448
14951
  if (arguments->block != NULL) {
14449
14952
  if (arguments->arguments == NULL) {
14450
14953
  arguments->arguments = pm_arguments_node_create(parser);
@@ -15036,7 +15539,7 @@ parse_symbol(pm_parser_t *parser, pm_lex_mode_t *lex_mode, pm_lex_state_t next_s
15036
15539
  pm_symbol_node_t *symbol = pm_symbol_node_create(parser, &opening, &parser->previous, &closing);
15037
15540
 
15038
15541
  pm_string_shared_init(&symbol->unescaped, parser->previous.start, parser->previous.end);
15039
- pm_node_flag_set((pm_node_t *) symbol, parse_symbol_encoding(parser, &symbol->unescaped));
15542
+ pm_node_flag_set((pm_node_t *) symbol, parse_symbol_encoding(parser, &parser->previous, &symbol->unescaped, false));
15040
15543
 
15041
15544
  return (pm_node_t *) symbol;
15042
15545
  }
@@ -15136,7 +15639,7 @@ parse_symbol(pm_parser_t *parser, pm_lex_mode_t *lex_mode, pm_lex_state_t next_s
15136
15639
  expect1(parser, PM_TOKEN_STRING_END, PM_ERR_SYMBOL_TERM_DYNAMIC);
15137
15640
  }
15138
15641
 
15139
- return (pm_node_t *) pm_symbol_node_create_unescaped(parser, &opening, &content, &parser->previous, &unescaped, parse_symbol_encoding(parser, &unescaped));
15642
+ return (pm_node_t *) pm_symbol_node_create_unescaped(parser, &opening, &content, &parser->previous, &unescaped, parse_symbol_encoding(parser, &content, &unescaped, false));
15140
15643
  }
15141
15644
 
15142
15645
  /**
@@ -15161,7 +15664,7 @@ parse_undef_argument(pm_parser_t *parser) {
15161
15664
  pm_symbol_node_t *symbol = pm_symbol_node_create(parser, &opening, &parser->previous, &closing);
15162
15665
 
15163
15666
  pm_string_shared_init(&symbol->unescaped, parser->previous.start, parser->previous.end);
15164
- pm_node_flag_set((pm_node_t *) symbol, parse_symbol_encoding(parser, &symbol->unescaped));
15667
+ pm_node_flag_set((pm_node_t *) symbol, parse_symbol_encoding(parser, &parser->previous, &symbol->unescaped, false));
15165
15668
 
15166
15669
  return (pm_node_t *) symbol;
15167
15670
  }
@@ -15202,7 +15705,7 @@ parse_alias_argument(pm_parser_t *parser, bool first) {
15202
15705
  pm_symbol_node_t *symbol = pm_symbol_node_create(parser, &opening, &parser->previous, &closing);
15203
15706
 
15204
15707
  pm_string_shared_init(&symbol->unescaped, parser->previous.start, parser->previous.end);
15205
- pm_node_flag_set((pm_node_t *) symbol, parse_symbol_encoding(parser, &symbol->unescaped));
15708
+ pm_node_flag_set((pm_node_t *) symbol, parse_symbol_encoding(parser, &parser->previous, &symbol->unescaped, false));
15206
15709
 
15207
15710
  return (pm_node_t *) symbol;
15208
15711
  }
@@ -15429,8 +15932,12 @@ parse_heredoc_dedent(pm_parser_t *parser, pm_node_list_t *nodes, size_t common_w
15429
15932
  nodes->size = write_index;
15430
15933
  }
15431
15934
 
15935
+ #define PM_PARSE_PATTERN_SINGLE 0
15936
+ #define PM_PARSE_PATTERN_TOP 1
15937
+ #define PM_PARSE_PATTERN_MULTI 2
15938
+
15432
15939
  static pm_node_t *
15433
- parse_pattern(pm_parser_t *parser, pm_constant_id_list_t *captures, bool top_pattern, pm_diagnostic_id_t diag_id);
15940
+ parse_pattern(pm_parser_t *parser, pm_constant_id_list_t *captures, uint8_t flags, pm_diagnostic_id_t diag_id);
15434
15941
 
15435
15942
  /**
15436
15943
  * Add the newly created local to the list of captures for this pattern matching
@@ -15459,9 +15966,7 @@ parse_pattern_constant_path(pm_parser_t *parser, pm_constant_id_list_t *captures
15459
15966
  while (accept1(parser, PM_TOKEN_COLON_COLON)) {
15460
15967
  pm_token_t delimiter = parser->previous;
15461
15968
  expect1(parser, PM_TOKEN_CONSTANT, PM_ERR_CONSTANT_PATH_COLON_COLON_CONSTANT);
15462
-
15463
- pm_node_t *child = (pm_node_t *) pm_constant_read_node_create(parser, &parser->previous);
15464
- node = (pm_node_t *) pm_constant_path_node_create(parser, node, &delimiter, child);
15969
+ node = (pm_node_t *) pm_constant_path_node_create(parser, node, &delimiter, &parser->previous);
15465
15970
  }
15466
15971
 
15467
15972
  // If there is a [ or ( that follows, then this is part of a larger pattern
@@ -15480,7 +15985,7 @@ parse_pattern_constant_path(pm_parser_t *parser, pm_constant_id_list_t *captures
15480
15985
  accept1(parser, PM_TOKEN_NEWLINE);
15481
15986
 
15482
15987
  if (!accept1(parser, PM_TOKEN_BRACKET_RIGHT)) {
15483
- inner = parse_pattern(parser, captures, true, PM_ERR_PATTERN_EXPRESSION_AFTER_BRACKET);
15988
+ inner = parse_pattern(parser, captures, PM_PARSE_PATTERN_TOP | PM_PARSE_PATTERN_MULTI, PM_ERR_PATTERN_EXPRESSION_AFTER_BRACKET);
15484
15989
  accept1(parser, PM_TOKEN_NEWLINE);
15485
15990
  expect1(parser, PM_TOKEN_BRACKET_RIGHT, PM_ERR_PATTERN_TERM_BRACKET);
15486
15991
  }
@@ -15492,7 +15997,7 @@ parse_pattern_constant_path(pm_parser_t *parser, pm_constant_id_list_t *captures
15492
15997
  accept1(parser, PM_TOKEN_NEWLINE);
15493
15998
 
15494
15999
  if (!accept1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) {
15495
- inner = parse_pattern(parser, captures, true, PM_ERR_PATTERN_EXPRESSION_AFTER_PAREN);
16000
+ inner = parse_pattern(parser, captures, PM_PARSE_PATTERN_TOP | PM_PARSE_PATTERN_MULTI, PM_ERR_PATTERN_EXPRESSION_AFTER_PAREN);
15496
16001
  accept1(parser, PM_TOKEN_NEWLINE);
15497
16002
  expect1(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_ERR_PATTERN_TERM_PAREN);
15498
16003
  }
@@ -15640,6 +16145,33 @@ parse_pattern_keyword_rest(pm_parser_t *parser, pm_constant_id_list_t *captures)
15640
16145
  return (pm_node_t *) pm_assoc_splat_node_create(parser, value, &operator);
15641
16146
  }
15642
16147
 
16148
+ /**
16149
+ * Check that the slice of the source given by the bounds parameters constitutes
16150
+ * a valid local variable name.
16151
+ */
16152
+ static bool
16153
+ pm_slice_is_valid_local(const pm_parser_t *parser, const uint8_t *start, const uint8_t *end) {
16154
+ ptrdiff_t length = end - start;
16155
+ if (length == 0) return false;
16156
+
16157
+ // First ensure that it starts with a valid identifier starting character.
16158
+ size_t width = char_is_identifier_start(parser, start);
16159
+ if (width == 0) return false;
16160
+
16161
+ // Next, ensure that it's not an uppercase character.
16162
+ if (parser->encoding_changed) {
16163
+ if (parser->encoding->isupper_char(start, length)) return false;
16164
+ } else {
16165
+ if (pm_encoding_utf_8_isupper_char(start, length)) return false;
16166
+ }
16167
+
16168
+ // Next, iterate through all of the bytes of the string to ensure that they
16169
+ // are all valid identifier characters.
16170
+ const uint8_t *cursor = start + width;
16171
+ while ((cursor < end) && (width = char_is_identifier(parser, cursor))) cursor += width;
16172
+ return cursor == end;
16173
+ }
16174
+
15643
16175
  /**
15644
16176
  * Create an implicit node for the value of a hash pattern that has omitted the
15645
16177
  * value. This will use an implicit local variable target.
@@ -15647,14 +16179,18 @@ parse_pattern_keyword_rest(pm_parser_t *parser, pm_constant_id_list_t *captures)
15647
16179
  static pm_node_t *
15648
16180
  parse_pattern_hash_implicit_value(pm_parser_t *parser, pm_constant_id_list_t *captures, pm_symbol_node_t *key) {
15649
16181
  const pm_location_t *value_loc = &((pm_symbol_node_t *) key)->value_loc;
15650
- pm_constant_id_t constant_id = pm_parser_constant_id_location(parser, value_loc->start, value_loc->end);
15651
16182
 
16183
+ pm_constant_id_t constant_id = pm_parser_constant_id_location(parser, value_loc->start, value_loc->end);
15652
16184
  int depth = -1;
15653
- if (value_loc->end[-1] == '!' || value_loc->end[-1] == '?') {
15654
- pm_parser_err(parser, key->base.location.start, key->base.location.end, PM_ERR_PATTERN_HASH_KEY_LOCALS);
15655
- PM_PARSER_ERR_LOCATION_FORMAT(parser, value_loc, PM_ERR_INVALID_LOCAL_VARIABLE_WRITE, (int) (value_loc->end - value_loc->start), (const char *) value_loc->start);
15656
- } else {
16185
+
16186
+ if (pm_slice_is_valid_local(parser, value_loc->start, value_loc->end)) {
15657
16187
  depth = pm_parser_local_depth_constant_id(parser, constant_id);
16188
+ } else {
16189
+ pm_parser_err(parser, key->base.location.start, key->base.location.end, PM_ERR_PATTERN_HASH_KEY_LOCALS);
16190
+
16191
+ if ((value_loc->end > value_loc->start) && ((value_loc->end[-1] == '!') || (value_loc->end[-1] == '?'))) {
16192
+ PM_PARSER_ERR_LOCATION_FORMAT(parser, value_loc, PM_ERR_INVALID_LOCAL_VARIABLE_WRITE, (int) (value_loc->end - value_loc->start), (const char *) value_loc->start);
16193
+ }
15658
16194
  }
15659
16195
 
15660
16196
  if (depth == -1) {
@@ -15678,7 +16214,7 @@ parse_pattern_hash_implicit_value(pm_parser_t *parser, pm_constant_id_list_t *ca
15678
16214
  */
15679
16215
  static void
15680
16216
  parse_pattern_hash_key(pm_parser_t *parser, pm_static_literals_t *keys, pm_node_t *node) {
15681
- if (pm_static_literals_add(parser, keys, node) != NULL) {
16217
+ if (pm_static_literals_add(&parser->newline_list, parser->start_line, keys, node) != NULL) {
15682
16218
  pm_parser_err_node(parser, node, PM_ERR_PATTERN_HASH_KEY_DUPLICATE);
15683
16219
  }
15684
16220
  }
@@ -15709,7 +16245,7 @@ parse_pattern_hash(pm_parser_t *parser, pm_constant_id_list_t *captures, pm_node
15709
16245
  } else {
15710
16246
  // Here we have a value for the first assoc in the list, so
15711
16247
  // we will parse it now.
15712
- value = parse_pattern(parser, captures, false, PM_ERR_PATTERN_EXPRESSION_AFTER_KEY);
16248
+ value = parse_pattern(parser, captures, PM_PARSE_PATTERN_SINGLE, PM_ERR_PATTERN_EXPRESSION_AFTER_KEY);
15713
16249
  }
15714
16250
 
15715
16251
  pm_token_t operator = not_provided(parser);
@@ -15724,7 +16260,8 @@ parse_pattern_hash(pm_parser_t *parser, pm_constant_id_list_t *captures, pm_node
15724
16260
  // If we get anything else, then this is an error. For this we'll
15725
16261
  // create a missing node for the value and create an assoc node for
15726
16262
  // the first node in the list.
15727
- pm_parser_err_node(parser, first_node, PM_ERR_PATTERN_HASH_KEY_LABEL);
16263
+ pm_diagnostic_id_t diag_id = PM_NODE_TYPE_P(first_node, PM_INTERPOLATED_SYMBOL_NODE) ? PM_ERR_PATTERN_HASH_KEY_INTERPOLATED : PM_ERR_PATTERN_HASH_KEY_LABEL;
16264
+ pm_parser_err_node(parser, first_node, diag_id);
15728
16265
 
15729
16266
  pm_token_t operator = not_provided(parser);
15730
16267
  pm_node_t *value = (pm_node_t *) pm_missing_node_create(parser, first_node->location.start, first_node->location.end);
@@ -15761,7 +16298,7 @@ parse_pattern_hash(pm_parser_t *parser, pm_constant_id_list_t *captures, pm_node
15761
16298
  if (match7(parser, PM_TOKEN_COMMA, PM_TOKEN_KEYWORD_THEN, PM_TOKEN_BRACE_RIGHT, PM_TOKEN_BRACKET_RIGHT, PM_TOKEN_PARENTHESIS_RIGHT, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON)) {
15762
16299
  value = parse_pattern_hash_implicit_value(parser, captures, (pm_symbol_node_t *) key);
15763
16300
  } else {
15764
- value = parse_pattern(parser, captures, false, PM_ERR_PATTERN_EXPRESSION_AFTER_KEY);
16301
+ value = parse_pattern(parser, captures, PM_PARSE_PATTERN_SINGLE, PM_ERR_PATTERN_EXPRESSION_AFTER_KEY);
15765
16302
  }
15766
16303
 
15767
16304
  pm_token_t operator = not_provided(parser);
@@ -15818,7 +16355,7 @@ parse_pattern_primitive(pm_parser_t *parser, pm_constant_id_list_t *captures, pm
15818
16355
 
15819
16356
  // Otherwise, we'll parse the inner pattern, then deal with it depending
15820
16357
  // on the type it returns.
15821
- pm_node_t *inner = parse_pattern(parser, captures, true, PM_ERR_PATTERN_EXPRESSION_AFTER_BRACKET);
16358
+ pm_node_t *inner = parse_pattern(parser, captures, PM_PARSE_PATTERN_MULTI, PM_ERR_PATTERN_EXPRESSION_AFTER_BRACKET);
15822
16359
 
15823
16360
  accept1(parser, PM_TOKEN_NEWLINE);
15824
16361
  expect1(parser, PM_TOKEN_BRACKET_RIGHT, PM_ERR_PATTERN_TERM_BRACKET);
@@ -15885,11 +16422,11 @@ parse_pattern_primitive(pm_parser_t *parser, pm_constant_id_list_t *captures, pm
15885
16422
  first_node = parse_pattern_keyword_rest(parser, captures);
15886
16423
  break;
15887
16424
  case PM_TOKEN_STRING_BEGIN:
15888
- first_node = parse_expression(parser, PM_BINDING_POWER_MAX, false, PM_ERR_PATTERN_HASH_KEY);
16425
+ first_node = parse_expression(parser, PM_BINDING_POWER_MAX, false, PM_ERR_PATTERN_HASH_KEY_LABEL);
15889
16426
  break;
15890
16427
  default: {
16428
+ PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_PATTERN_HASH_KEY, pm_token_type_human(parser->current.type));
15891
16429
  parser_lex(parser);
15892
- pm_parser_err_previous(parser, PM_ERR_PATTERN_HASH_KEY);
15893
16430
 
15894
16431
  first_node = (pm_node_t *) pm_missing_node_create(parser, parser->previous.start, parser->previous.end);
15895
16432
  break;
@@ -15966,7 +16503,7 @@ parse_pattern_primitive(pm_parser_t *parser, pm_constant_id_list_t *captures, pm
15966
16503
 
15967
16504
  if (variable == NULL) {
15968
16505
  if (
15969
- (parser->version != PM_OPTIONS_VERSION_CRUBY_3_3_0) &&
16506
+ (parser->version != PM_OPTIONS_VERSION_CRUBY_3_3) &&
15970
16507
  !parser->current_scope->closed &&
15971
16508
  (parser->current_scope->numbered_parameters != PM_SCOPE_NUMBERED_PARAMETERS_DISALLOWED) &&
15972
16509
  pm_token_is_it(parser->previous.start, parser->previous.end)
@@ -16040,8 +16577,7 @@ parse_pattern_primitive(pm_parser_t *parser, pm_constant_id_list_t *captures, pm
16040
16577
  parser_lex(parser);
16041
16578
 
16042
16579
  expect1(parser, PM_TOKEN_CONSTANT, PM_ERR_CONSTANT_PATH_COLON_COLON_CONSTANT);
16043
- pm_node_t *child = (pm_node_t *) pm_constant_read_node_create(parser, &parser->previous);
16044
- pm_constant_path_node_t *node = pm_constant_path_node_create(parser, NULL, &delimiter, child);
16580
+ pm_constant_path_node_t *node = pm_constant_path_node_create(parser, NULL, &delimiter, &parser->previous);
16045
16581
 
16046
16582
  return parse_pattern_constant_path(parser, captures, (pm_node_t *) node);
16047
16583
  }
@@ -16092,7 +16628,7 @@ parse_pattern_primitives(pm_parser_t *parser, pm_constant_id_list_t *captures, p
16092
16628
  pm_token_t opening = parser->current;
16093
16629
  parser_lex(parser);
16094
16630
 
16095
- pm_node_t *body = parse_pattern(parser, captures, false, PM_ERR_PATTERN_EXPRESSION_AFTER_PAREN);
16631
+ pm_node_t *body = parse_pattern(parser, captures, PM_PARSE_PATTERN_SINGLE, PM_ERR_PATTERN_EXPRESSION_AFTER_PAREN);
16096
16632
  accept1(parser, PM_TOKEN_NEWLINE);
16097
16633
  expect1(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_ERR_PATTERN_TERM_PAREN);
16098
16634
  pm_node_t *right = (pm_node_t *) pm_parentheses_node_create(parser, &opening, body, &parser->previous);
@@ -16151,7 +16687,7 @@ parse_pattern_primitives(pm_parser_t *parser, pm_constant_id_list_t *captures, p
16151
16687
  * Parse a pattern matching expression.
16152
16688
  */
16153
16689
  static pm_node_t *
16154
- parse_pattern(pm_parser_t *parser, pm_constant_id_list_t *captures, bool top_pattern, pm_diagnostic_id_t diag_id) {
16690
+ parse_pattern(pm_parser_t *parser, pm_constant_id_list_t *captures, uint8_t flags, pm_diagnostic_id_t diag_id) {
16155
16691
  pm_node_t *node = NULL;
16156
16692
 
16157
16693
  bool leading_rest = false;
@@ -16161,14 +16697,26 @@ parse_pattern(pm_parser_t *parser, pm_constant_id_list_t *captures, bool top_pat
16161
16697
  case PM_TOKEN_LABEL: {
16162
16698
  parser_lex(parser);
16163
16699
  pm_node_t *key = (pm_node_t *) pm_symbol_node_label_create(parser, &parser->previous);
16164
- return (pm_node_t *) parse_pattern_hash(parser, captures, key);
16700
+ node = (pm_node_t *) parse_pattern_hash(parser, captures, key);
16701
+
16702
+ if (!(flags & PM_PARSE_PATTERN_TOP)) {
16703
+ pm_parser_err_node(parser, node, PM_ERR_PATTERN_HASH_IMPLICIT);
16704
+ }
16705
+
16706
+ return node;
16165
16707
  }
16166
16708
  case PM_TOKEN_USTAR_STAR: {
16167
16709
  node = parse_pattern_keyword_rest(parser, captures);
16168
- return (pm_node_t *) parse_pattern_hash(parser, captures, node);
16710
+ node = (pm_node_t *) parse_pattern_hash(parser, captures, node);
16711
+
16712
+ if (!(flags & PM_PARSE_PATTERN_TOP)) {
16713
+ pm_parser_err_node(parser, node, PM_ERR_PATTERN_HASH_IMPLICIT);
16714
+ }
16715
+
16716
+ return node;
16169
16717
  }
16170
16718
  case PM_TOKEN_USTAR: {
16171
- if (top_pattern) {
16719
+ if (flags & (PM_PARSE_PATTERN_TOP | PM_PARSE_PATTERN_MULTI)) {
16172
16720
  parser_lex(parser);
16173
16721
  node = (pm_node_t *) parse_pattern_rest(parser, captures);
16174
16722
  leading_rest = true;
@@ -16187,7 +16735,7 @@ parse_pattern(pm_parser_t *parser, pm_constant_id_list_t *captures, bool top_pat
16187
16735
  return (pm_node_t *) parse_pattern_hash(parser, captures, node);
16188
16736
  }
16189
16737
 
16190
- if (top_pattern && match1(parser, PM_TOKEN_COMMA)) {
16738
+ if ((flags & PM_PARSE_PATTERN_MULTI) && match1(parser, PM_TOKEN_COMMA)) {
16191
16739
  // If we have a comma, then we are now parsing either an array pattern or a
16192
16740
  // find pattern. We need to parse all of the patterns, put them into a big
16193
16741
  // list, and then determine which type of node we have.
@@ -16367,7 +16915,7 @@ parse_strings(pm_parser_t *parser, pm_node_t *current) {
16367
16915
 
16368
16916
  pm_node_list_free(&parts);
16369
16917
  } else if (accept1(parser, PM_TOKEN_LABEL_END) && !state_is_arg_labeled) {
16370
- node = (pm_node_t *) pm_symbol_node_create_unescaped(parser, &opening, &content, &parser->previous, &unescaped, parse_symbol_encoding(parser, &unescaped));
16918
+ node = (pm_node_t *) pm_symbol_node_create_unescaped(parser, &opening, &content, &parser->previous, &unescaped, parse_symbol_encoding(parser, &content, &unescaped, true));
16371
16919
  } else if (match1(parser, PM_TOKEN_EOF)) {
16372
16920
  pm_parser_err_token(parser, &opening, PM_ERR_STRING_LITERAL_EOF);
16373
16921
  node = (pm_node_t *) pm_string_node_create_unescaped(parser, &opening, &content, &parser->current, &unescaped);
@@ -16393,7 +16941,7 @@ parse_strings(pm_parser_t *parser, pm_node_t *current) {
16393
16941
  pm_node_flag_set(node, parse_unescaped_encoding(parser));
16394
16942
  expect1(parser, PM_TOKEN_STRING_END, PM_ERR_STRING_LITERAL_EOF);
16395
16943
  } else if (accept1(parser, PM_TOKEN_LABEL_END)) {
16396
- node = (pm_node_t *) pm_symbol_node_create_unescaped(parser, &opening, &content, &parser->previous, &unescaped, parse_symbol_encoding(parser, &unescaped));
16944
+ node = (pm_node_t *) pm_symbol_node_create_unescaped(parser, &opening, &content, &parser->previous, &unescaped, parse_symbol_encoding(parser, &content, &unescaped, true));
16397
16945
  } else {
16398
16946
  // If we get here, then we have interpolation so we'll need
16399
16947
  // to create a string or symbol node with interpolation.
@@ -16475,11 +17023,11 @@ parse_strings(pm_parser_t *parser, pm_node_t *current) {
16475
17023
  pm_token_t bounds = not_provided(parser);
16476
17024
 
16477
17025
  pm_interpolated_string_node_t *container = pm_interpolated_string_node_create(parser, &bounds, NULL, &bounds);
16478
- pm_interpolated_string_node_append(parser, container, current);
17026
+ pm_interpolated_string_node_append(container, current);
16479
17027
  current = (pm_node_t *) container;
16480
17028
  }
16481
17029
 
16482
- pm_interpolated_string_node_append(parser, (pm_interpolated_string_node_t *) current, node);
17030
+ pm_interpolated_string_node_append((pm_interpolated_string_node_t *) current, node);
16483
17031
  }
16484
17032
  }
16485
17033
 
@@ -16498,6 +17046,11 @@ pm_parser_err_prefix(pm_parser_t *parser, pm_diagnostic_id_t diag_id) {
16498
17046
  PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->previous, diag_id, pm_token_type_human(parser->previous.type));
16499
17047
  break;
16500
17048
  }
17049
+ case PM_ERR_HASH_VALUE:
17050
+ case PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR: {
17051
+ PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, diag_id, pm_token_type_human(parser->current.type));
17052
+ break;
17053
+ }
16501
17054
  case PM_ERR_UNARY_RECEIVER: {
16502
17055
  const char *human = (parser->current.type == PM_TOKEN_EOF ? "end-of-input" : pm_token_type_human(parser->current.type));
16503
17056
  PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->previous, diag_id, human, parser->previous.start[0]);
@@ -16724,13 +17277,13 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
16724
17277
  }
16725
17278
 
16726
17279
  element = (pm_node_t *) pm_keyword_hash_node_create(parser);
16727
- pm_static_literals_t literals = { 0 };
17280
+ pm_static_literals_t hash_keys = { 0 };
16728
17281
 
16729
17282
  if (!match8(parser, PM_TOKEN_EOF, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON, PM_TOKEN_EOF, PM_TOKEN_BRACE_RIGHT, PM_TOKEN_BRACKET_RIGHT, PM_TOKEN_KEYWORD_DO, PM_TOKEN_PARENTHESIS_RIGHT)) {
16730
- parse_assocs(parser, &literals, element);
17283
+ parse_assocs(parser, &hash_keys, element);
16731
17284
  }
16732
17285
 
16733
- pm_static_literals_free(&literals);
17286
+ pm_static_literals_free(&hash_keys);
16734
17287
  parsed_bare_hash = true;
16735
17288
  } else {
16736
17289
  element = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, false, PM_ERR_ARRAY_EXPRESSION);
@@ -16741,8 +17294,8 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
16741
17294
  }
16742
17295
 
16743
17296
  pm_keyword_hash_node_t *hash = pm_keyword_hash_node_create(parser);
16744
- pm_static_literals_t literals = { 0 };
16745
- pm_hash_key_static_literals_add(parser, &literals, element);
17297
+ pm_static_literals_t hash_keys = { 0 };
17298
+ pm_hash_key_static_literals_add(parser, &hash_keys, element);
16746
17299
 
16747
17300
  pm_token_t operator;
16748
17301
  if (parser->previous.type == PM_TOKEN_EQUAL_GREATER) {
@@ -16757,10 +17310,10 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
16757
17310
 
16758
17311
  element = (pm_node_t *) hash;
16759
17312
  if (accept1(parser, PM_TOKEN_COMMA) && !match1(parser, PM_TOKEN_BRACKET_RIGHT)) {
16760
- parse_assocs(parser, &literals, element);
17313
+ parse_assocs(parser, &hash_keys, element);
16761
17314
  }
16762
17315
 
16763
- pm_static_literals_free(&literals);
17316
+ pm_static_literals_free(&hash_keys);
16764
17317
  parsed_bare_hash = true;
16765
17318
  }
16766
17319
  }
@@ -16854,7 +17407,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
16854
17407
  return (pm_node_t *) multi_target;
16855
17408
  }
16856
17409
 
16857
- return parse_target_validate(parser, (pm_node_t *) multi_target);
17410
+ return parse_target_validate(parser, (pm_node_t *) multi_target, false);
16858
17411
  }
16859
17412
 
16860
17413
  // If we have a single statement and are ending on a right parenthesis
@@ -16920,14 +17473,30 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
16920
17473
  return (pm_node_t *) pm_parentheses_node_create(parser, &opening, (pm_node_t *) statements, &parser->previous);
16921
17474
  }
16922
17475
  case PM_TOKEN_BRACE_LEFT: {
17476
+ // If we were passed a current_hash_keys via the parser, then that
17477
+ // means we're already parsing a hash and we want to share the set
17478
+ // of hash keys with this inner hash we're about to parse for the
17479
+ // sake of warnings. We'll set it to NULL after we grab it to make
17480
+ // sure subsequent expressions don't use it. Effectively this is a
17481
+ // way of getting around passing it to every call to
17482
+ // parse_expression.
17483
+ pm_static_literals_t *current_hash_keys = parser->current_hash_keys;
17484
+ parser->current_hash_keys = NULL;
17485
+
16923
17486
  pm_accepts_block_stack_push(parser, true);
16924
17487
  parser_lex(parser);
16925
17488
 
16926
17489
  pm_hash_node_t *node = pm_hash_node_create(parser, &parser->previous);
16927
- pm_static_literals_t literals = { 0 };
16928
17490
 
16929
17491
  if (!match2(parser, PM_TOKEN_BRACE_RIGHT, PM_TOKEN_EOF)) {
16930
- parse_assocs(parser, &literals, (pm_node_t *) node);
17492
+ if (current_hash_keys != NULL) {
17493
+ parse_assocs(parser, current_hash_keys, (pm_node_t *) node);
17494
+ } else {
17495
+ pm_static_literals_t hash_keys = { 0 };
17496
+ parse_assocs(parser, &hash_keys, (pm_node_t *) node);
17497
+ pm_static_literals_free(&hash_keys);
17498
+ }
17499
+
16931
17500
  accept1(parser, PM_TOKEN_NEWLINE);
16932
17501
  }
16933
17502
 
@@ -16935,7 +17504,6 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
16935
17504
  expect1(parser, PM_TOKEN_BRACE_RIGHT, PM_ERR_HASH_TERM);
16936
17505
  pm_hash_node_closing_loc_set(node, &parser->previous);
16937
17506
 
16938
- pm_static_literals_free(&literals);
16939
17507
  return (pm_node_t *) node;
16940
17508
  }
16941
17509
  case PM_TOKEN_CHARACTER_LITERAL: {
@@ -17000,12 +17568,10 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
17000
17568
  }
17001
17569
  case PM_TOKEN_UCOLON_COLON: {
17002
17570
  parser_lex(parser);
17003
-
17004
17571
  pm_token_t delimiter = parser->previous;
17005
- expect1(parser, PM_TOKEN_CONSTANT, PM_ERR_CONSTANT_PATH_COLON_COLON_CONSTANT);
17006
17572
 
17007
- pm_node_t *constant = (pm_node_t *) pm_constant_read_node_create(parser, &parser->previous);
17008
- pm_node_t *node = (pm_node_t *)pm_constant_path_node_create(parser, NULL, &delimiter, constant);
17573
+ expect1(parser, PM_TOKEN_CONSTANT, PM_ERR_CONSTANT_PATH_COLON_COLON_CONSTANT);
17574
+ pm_node_t *node = (pm_node_t *) pm_constant_path_node_create(parser, NULL, &delimiter, &parser->previous);
17009
17575
 
17010
17576
  if ((binding_power == PM_BINDING_POWER_STATEMENT) && match1(parser, PM_TOKEN_COMMA)) {
17011
17577
  node = parse_targets_validate(parser, node, PM_BINDING_POWER_INDEX);
@@ -17165,8 +17731,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
17165
17731
  if (match2(parser, PM_TOKEN_HEREDOC_END, PM_TOKEN_EOF)) {
17166
17732
  // If we get here, then we have an empty heredoc. We'll create
17167
17733
  // an empty content token and return an empty string node.
17168
- lex_mode_pop(parser);
17169
- expect1(parser, PM_TOKEN_HEREDOC_END, PM_ERR_HEREDOC_TERM);
17734
+ expect1_heredoc_term(parser, lex_mode);
17170
17735
  pm_token_t content = parse_strings_empty_content(parser->previous.start);
17171
17736
 
17172
17737
  if (quote == PM_HEREDOC_QUOTE_BACKTICK) {
@@ -17207,8 +17772,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
17207
17772
  }
17208
17773
 
17209
17774
  node = (pm_node_t *) cast;
17210
- lex_mode_pop(parser);
17211
- expect1(parser, PM_TOKEN_HEREDOC_END, PM_ERR_HEREDOC_TERM);
17775
+ expect1_heredoc_term(parser, lex_mode);
17212
17776
  } else {
17213
17777
  // If we get here, then we have multiple parts in the heredoc,
17214
17778
  // so we'll need to create an interpolated string node to hold
@@ -17230,20 +17794,18 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
17230
17794
  pm_interpolated_x_string_node_t *cast = pm_interpolated_xstring_node_create(parser, &opening, &opening);
17231
17795
  cast->parts = parts;
17232
17796
 
17233
- lex_mode_pop(parser);
17234
- expect1(parser, PM_TOKEN_HEREDOC_END, PM_ERR_HEREDOC_TERM);
17235
-
17797
+ expect1_heredoc_term(parser, lex_mode);
17236
17798
  pm_interpolated_xstring_node_closing_set(cast, &parser->previous);
17799
+
17237
17800
  cast->base.location = cast->opening_loc;
17238
17801
  node = (pm_node_t *) cast;
17239
17802
  } else {
17240
17803
  pm_interpolated_string_node_t *cast = pm_interpolated_string_node_create(parser, &opening, &parts, &opening);
17241
17804
  pm_node_list_free(&parts);
17242
17805
 
17243
- lex_mode_pop(parser);
17244
- expect1(parser, PM_TOKEN_HEREDOC_END, PM_ERR_HEREDOC_TERM);
17245
-
17806
+ expect1_heredoc_term(parser, lex_mode);
17246
17807
  pm_interpolated_string_node_closing_set(cast, &parser->previous);
17808
+
17247
17809
  cast->base.location = cast->opening_loc;
17248
17810
  node = (pm_node_t *) cast;
17249
17811
  }
@@ -17464,7 +18026,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
17464
18026
  pm_token_t in_keyword = parser->previous;
17465
18027
 
17466
18028
  pm_constant_id_list_t captures = { 0 };
17467
- pm_node_t *pattern = parse_pattern(parser, &captures, true, PM_ERR_PATTERN_EXPRESSION_AFTER_IN);
18029
+ pm_node_t *pattern = parse_pattern(parser, &captures, PM_PARSE_PATTERN_TOP | PM_PARSE_PATTERN_MULTI, PM_ERR_PATTERN_EXPRESSION_AFTER_IN);
17468
18030
 
17469
18031
  parser->pattern_matching_newlines = previous_pattern_matching_newlines;
17470
18032
  pm_constant_id_list_free(&captures);
@@ -17493,7 +18055,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
17493
18055
  then_keyword = not_provided(parser);
17494
18056
  }
17495
18057
  } else {
17496
- expect1(parser, PM_TOKEN_KEYWORD_THEN, PM_ERR_EXPECT_WHEN_DELIMITER);
18058
+ expect1(parser, PM_TOKEN_KEYWORD_THEN, PM_ERR_EXPECT_IN_DELIMITER);
17497
18059
  then_keyword = parser->previous;
17498
18060
  }
17499
18061
 
@@ -17947,7 +18509,12 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
17947
18509
  lex_state_set(parser, PM_LEX_STATE_BEG);
17948
18510
  parser->command_start = true;
17949
18511
 
17950
- expect1(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_ERR_DEF_PARAMS_TERM_PAREN);
18512
+ if (!accept1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) {
18513
+ PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_DEF_PARAMS_TERM_PAREN, pm_token_type_human(parser->current.type));
18514
+ parser->previous.start = parser->previous.end;
18515
+ parser->previous.type = PM_TOKEN_MISSING;
18516
+ }
18517
+
17951
18518
  rparen = parser->previous;
17952
18519
  break;
17953
18520
  }
@@ -18145,7 +18712,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
18145
18712
  if (match1(parser, PM_TOKEN_COMMA)) {
18146
18713
  index = parse_targets(parser, index, PM_BINDING_POWER_INDEX);
18147
18714
  } else {
18148
- index = parse_target(parser, index);
18715
+ index = parse_target(parser, index, false);
18149
18716
  }
18150
18717
 
18151
18718
  context_pop(parser);
@@ -18267,9 +18834,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
18267
18834
  pm_token_t double_colon = parser->previous;
18268
18835
 
18269
18836
  expect1(parser, PM_TOKEN_CONSTANT, PM_ERR_CONSTANT_PATH_COLON_COLON_CONSTANT);
18270
- pm_node_t *constant = (pm_node_t *) pm_constant_read_node_create(parser, &parser->previous);
18271
-
18272
- constant_path = (pm_node_t *) pm_constant_path_node_create(parser, constant_path, &double_colon, constant);
18837
+ constant_path = (pm_node_t *) pm_constant_path_node_create(parser, constant_path, &double_colon, &parser->previous);
18273
18838
  }
18274
18839
 
18275
18840
  // Here we retrieve the name of the module. If it wasn't a constant,
@@ -18649,15 +19214,15 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
18649
19214
  // If we hit string content and the current node is
18650
19215
  // an interpolated string, then we need to append
18651
19216
  // the string content to the list of child nodes.
18652
- pm_interpolated_string_node_append(parser, (pm_interpolated_string_node_t *) current, string);
19217
+ pm_interpolated_string_node_append((pm_interpolated_string_node_t *) current, string);
18653
19218
  } else if (PM_NODE_TYPE_P(current, PM_STRING_NODE)) {
18654
19219
  // If we hit string content and the current node is
18655
19220
  // a string node, then we need to convert the
18656
19221
  // current node into an interpolated string and add
18657
19222
  // the string content to the list of child nodes.
18658
19223
  pm_interpolated_string_node_t *interpolated = pm_interpolated_string_node_create(parser, &opening, NULL, &closing);
18659
- pm_interpolated_string_node_append(parser, interpolated, current);
18660
- pm_interpolated_string_node_append(parser, interpolated, string);
19224
+ pm_interpolated_string_node_append(interpolated, current);
19225
+ pm_interpolated_string_node_append(interpolated, string);
18661
19226
  current = (pm_node_t *) interpolated;
18662
19227
  } else {
18663
19228
  assert(false && "unreachable");
@@ -18682,7 +19247,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
18682
19247
  pm_token_t opening = not_provided(parser);
18683
19248
  pm_token_t closing = not_provided(parser);
18684
19249
  pm_interpolated_string_node_t *interpolated = pm_interpolated_string_node_create(parser, &opening, NULL, &closing);
18685
- pm_interpolated_string_node_append(parser, interpolated, current);
19250
+ pm_interpolated_string_node_append(interpolated, current);
18686
19251
  current = (pm_node_t *) interpolated;
18687
19252
  } else {
18688
19253
  // If we hit an embedded variable and the current
@@ -18691,7 +19256,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
18691
19256
  }
18692
19257
 
18693
19258
  pm_node_t *part = parse_string_part(parser);
18694
- pm_interpolated_string_node_append(parser, (pm_interpolated_string_node_t *) current, part);
19259
+ pm_interpolated_string_node_append((pm_interpolated_string_node_t *) current, part);
18695
19260
  break;
18696
19261
  }
18697
19262
  case PM_TOKEN_EMBEXPR_BEGIN: {
@@ -18711,7 +19276,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
18711
19276
  pm_token_t opening = not_provided(parser);
18712
19277
  pm_token_t closing = not_provided(parser);
18713
19278
  pm_interpolated_string_node_t *interpolated = pm_interpolated_string_node_create(parser, &opening, NULL, &closing);
18714
- pm_interpolated_string_node_append(parser, interpolated, current);
19279
+ pm_interpolated_string_node_append(interpolated, current);
18715
19280
  current = (pm_node_t *) interpolated;
18716
19281
  } else if (PM_NODE_TYPE_P(current, PM_INTERPOLATED_STRING_NODE)) {
18717
19282
  // If we hit an embedded expression and the current
@@ -18722,7 +19287,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
18722
19287
  }
18723
19288
 
18724
19289
  pm_node_t *part = parse_string_part(parser);
18725
- pm_interpolated_string_node_append(parser, (pm_interpolated_string_node_t *) current, part);
19290
+ pm_interpolated_string_node_append((pm_interpolated_string_node_t *) current, part);
18726
19291
  break;
18727
19292
  }
18728
19293
  default:
@@ -18798,6 +19363,14 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
18798
19363
  pm_token_t opening = not_provided(parser);
18799
19364
  pm_token_t closing = not_provided(parser);
18800
19365
  pm_node_t *part = (pm_node_t *) pm_string_node_create_unescaped(parser, &opening, &parser->previous, &closing, &unescaped);
19366
+
19367
+ if (parser->encoding == PM_ENCODING_US_ASCII_ENTRY) {
19368
+ // This is extremely strange, but the first string part of a
19369
+ // regular expression will always be tagged as binary if we
19370
+ // are in a US-ASCII file, no matter its contents.
19371
+ pm_node_flag_set(part, PM_STRING_FLAGS_FORCED_BINARY_ENCODING);
19372
+ }
19373
+
18801
19374
  pm_interpolated_regular_expression_node_append(interpolated, part);
18802
19375
  } else {
18803
19376
  // If the first part of the body of the regular expression is not a
@@ -18926,7 +19499,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
18926
19499
  if (match1(parser, PM_TOKEN_COMMA)) {
18927
19500
  return parse_targets_validate(parser, splat, PM_BINDING_POWER_INDEX);
18928
19501
  } else {
18929
- return parse_target_validate(parser, splat);
19502
+ return parse_target_validate(parser, splat, true);
18930
19503
  }
18931
19504
  }
18932
19505
  case PM_TOKEN_BANG: {
@@ -19271,39 +19844,6 @@ parse_call_operator_write(pm_parser_t *parser, pm_call_node_t *call_node, const
19271
19844
  }
19272
19845
  }
19273
19846
 
19274
- /**
19275
- * Returns true if the name of the capture group is a valid local variable that
19276
- * can be written to.
19277
- */
19278
- static bool
19279
- parse_regular_expression_named_capture(pm_parser_t *parser, const uint8_t *source, size_t length) {
19280
- if (length == 0) {
19281
- return false;
19282
- }
19283
-
19284
- // First ensure that it starts with a valid identifier starting character.
19285
- size_t width = char_is_identifier_start(parser, source);
19286
- if (!width) {
19287
- return false;
19288
- }
19289
-
19290
- // Next, ensure that it's not an uppercase character.
19291
- if (parser->encoding_changed) {
19292
- if (parser->encoding->isupper_char(source, (ptrdiff_t) length)) return false;
19293
- } else {
19294
- if (pm_encoding_utf_8_isupper_char(source, (ptrdiff_t) length)) return false;
19295
- }
19296
-
19297
- // Next, iterate through all of the bytes of the string to ensure that they
19298
- // are all valid identifier characters.
19299
- const uint8_t *cursor = source + width;
19300
- while (cursor < source + length && (width = char_is_identifier(parser, cursor))) {
19301
- cursor += width;
19302
- }
19303
-
19304
- return cursor == source + length;
19305
- }
19306
-
19307
19847
  /**
19308
19848
  * Potentially change a =~ with a regular expression with named captures into a
19309
19849
  * match write node.
@@ -19330,7 +19870,7 @@ parse_regular_expression_named_captures(pm_parser_t *parser, const pm_string_t *
19330
19870
 
19331
19871
  // If the name of the capture group isn't a valid identifier, we do
19332
19872
  // not add it to the local table.
19333
- if (!parse_regular_expression_named_capture(parser, source, length)) continue;
19873
+ if (!pm_slice_is_valid_local(parser, source, source + length)) continue;
19334
19874
 
19335
19875
  if (content->type == PM_STRING_SHARED) {
19336
19876
  // If the unescaped string is a slice of the source, then we can
@@ -19788,7 +20328,7 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
19788
20328
  // In this case we have an operator but we don't know what it's for.
19789
20329
  // We need to treat it as an error. For now, we'll mark it as an error
19790
20330
  // and just skip right past it.
19791
- pm_parser_err_previous(parser, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR);
20331
+ PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->previous, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, pm_token_type_human(parser->current.type));
19792
20332
  return node;
19793
20333
  }
19794
20334
  }
@@ -20059,8 +20599,7 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
20059
20599
  path = (pm_node_t *) pm_call_node_call_create(parser, node, &delimiter, &message, &arguments);
20060
20600
  } else {
20061
20601
  // Otherwise, this is a constant path. That would look like Foo::Bar.
20062
- pm_node_t *child = (pm_node_t *) pm_constant_read_node_create(parser, &parser->previous);
20063
- path = (pm_node_t *)pm_constant_path_node_create(parser, node, &delimiter, child);
20602
+ path = (pm_node_t *) pm_constant_path_node_create(parser, node, &delimiter, &parser->previous);
20064
20603
  }
20065
20604
 
20066
20605
  // If this is followed by a comma then it is a multiple assignment.
@@ -20099,9 +20638,8 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
20099
20638
  return (pm_node_t *) pm_call_node_shorthand_create(parser, node, &delimiter, &arguments);
20100
20639
  }
20101
20640
  default: {
20102
- pm_parser_err_token(parser, &delimiter, PM_ERR_CONSTANT_PATH_COLON_COLON_CONSTANT);
20103
- pm_node_t *child = (pm_node_t *) pm_missing_node_create(parser, delimiter.start, delimiter.end);
20104
- return (pm_node_t *)pm_constant_path_node_create(parser, node, &delimiter, child);
20641
+ expect1(parser, PM_TOKEN_CONSTANT, PM_ERR_CONSTANT_PATH_COLON_COLON_CONSTANT);
20642
+ return (pm_node_t *) pm_constant_path_node_create(parser, node, &delimiter, &parser->previous);
20105
20643
  }
20106
20644
  }
20107
20645
  }
@@ -20172,7 +20710,7 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
20172
20710
  parser_lex(parser);
20173
20711
 
20174
20712
  pm_constant_id_list_t captures = { 0 };
20175
- pm_node_t *pattern = parse_pattern(parser, &captures, true, PM_ERR_PATTERN_EXPRESSION_AFTER_IN);
20713
+ pm_node_t *pattern = parse_pattern(parser, &captures, PM_PARSE_PATTERN_TOP | PM_PARSE_PATTERN_MULTI, PM_ERR_PATTERN_EXPRESSION_AFTER_IN);
20176
20714
 
20177
20715
  parser->pattern_matching_newlines = previous_pattern_matching_newlines;
20178
20716
  pm_constant_id_list_free(&captures);
@@ -20189,7 +20727,7 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
20189
20727
  parser_lex(parser);
20190
20728
 
20191
20729
  pm_constant_id_list_t captures = { 0 };
20192
- pm_node_t *pattern = parse_pattern(parser, &captures, true, PM_ERR_PATTERN_EXPRESSION_AFTER_HROCKET);
20730
+ pm_node_t *pattern = parse_pattern(parser, &captures, PM_PARSE_PATTERN_TOP | PM_PARSE_PATTERN_MULTI, PM_ERR_PATTERN_EXPRESSION_AFTER_HROCKET);
20193
20731
 
20194
20732
  parser->pattern_matching_newlines = previous_pattern_matching_newlines;
20195
20733
  pm_constant_id_list_free(&captures);
@@ -20202,6 +20740,10 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
20202
20740
  }
20203
20741
  }
20204
20742
 
20743
+ #undef PM_PARSE_PATTERN_SINGLE
20744
+ #undef PM_PARSE_PATTERN_TOP
20745
+ #undef PM_PARSE_PATTERN_MULTI
20746
+
20205
20747
  /**
20206
20748
  * Parse an expression at the given point of the parser using the given binding
20207
20749
  * power to parse subsequent chains. If this function finds a syntax error, it
@@ -21246,25 +21788,28 @@ pm_parser_errors_format(const pm_parser_t *parser, const pm_list_t *error_list,
21246
21788
  pm_buffer_append_string(buffer, error_format.blank_prefix, error_format.blank_prefix_length);
21247
21789
 
21248
21790
  size_t column = 0;
21249
- while (column < error->column_end) {
21250
- if (column < error->column_start) {
21251
- pm_buffer_append_byte(buffer, ' ');
21252
- } else {
21253
- const uint8_t caret = column == error->column_start ? '^' : '~';
21791
+ while (column < error->column_start) {
21792
+ pm_buffer_append_byte(buffer, ' ');
21254
21793
 
21255
- if (colorize) {
21256
- pm_buffer_append_string(buffer, PM_COLOR_RED, 7);
21257
- pm_buffer_append_byte(buffer, caret);
21258
- pm_buffer_append_string(buffer, PM_COLOR_RESET, 3);
21259
- } else {
21260
- pm_buffer_append_byte(buffer, caret);
21261
- }
21262
- }
21794
+ size_t char_width = encoding->char_width(start + column, parser->end - (start + column));
21795
+ column += (char_width == 0 ? 1 : char_width);
21796
+ }
21797
+
21798
+ if (colorize) pm_buffer_append_string(buffer, PM_COLOR_RED, 7);
21799
+ pm_buffer_append_byte(buffer, '^');
21800
+
21801
+ size_t char_width = encoding->char_width(start + column, parser->end - (start + column));
21802
+ column += (char_width == 0 ? 1 : char_width);
21803
+
21804
+ while (column < error->column_end) {
21805
+ pm_buffer_append_byte(buffer, '~');
21263
21806
 
21264
21807
  size_t char_width = encoding->char_width(start + column, parser->end - (start + column));
21265
21808
  column += (char_width == 0 ? 1 : char_width);
21266
21809
  }
21267
21810
 
21811
+ if (colorize) pm_buffer_append_string(buffer, PM_COLOR_RESET, 3);
21812
+
21268
21813
  if (inline_messages) {
21269
21814
  pm_buffer_append_byte(buffer, ' ');
21270
21815
  assert(error->error != NULL);