prism 0.26.0 → 0.28.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +45 -1
- data/Makefile +3 -2
- data/config.yml +305 -20
- data/docs/configuration.md +1 -0
- data/ext/prism/api_node.c +884 -879
- data/ext/prism/extconf.rb +23 -4
- data/ext/prism/extension.c +16 -9
- data/ext/prism/extension.h +1 -1
- data/include/prism/ast.h +298 -9
- data/include/prism/diagnostic.h +15 -5
- data/include/prism/options.h +2 -2
- data/include/prism/parser.h +10 -0
- data/include/prism/static_literals.h +8 -6
- data/include/prism/version.h +2 -2
- data/lib/prism/dot_visitor.rb +22 -6
- data/lib/prism/dsl.rb +8 -8
- data/lib/prism/ffi.rb +4 -4
- data/lib/prism/inspect_visitor.rb +2156 -0
- data/lib/prism/lex_compat.rb +18 -1
- data/lib/prism/mutation_compiler.rb +2 -2
- data/lib/prism/node.rb +2345 -1964
- data/lib/prism/node_ext.rb +34 -5
- data/lib/prism/parse_result/newlines.rb +0 -2
- data/lib/prism/parse_result.rb +137 -13
- data/lib/prism/pattern.rb +12 -6
- data/lib/prism/polyfill/byteindex.rb +13 -0
- data/lib/prism/polyfill/unpack1.rb +14 -0
- data/lib/prism/reflection.rb +21 -31
- data/lib/prism/serialize.rb +27 -17
- data/lib/prism/translation/parser/compiler.rb +34 -15
- data/lib/prism/translation/parser.rb +6 -6
- data/lib/prism/translation/ripper.rb +72 -68
- data/lib/prism/translation/ruby_parser.rb +69 -31
- data/lib/prism.rb +3 -2
- data/prism.gemspec +36 -38
- data/rbi/prism/compiler.rbi +3 -5
- data/rbi/prism/inspect_visitor.rbi +12 -0
- data/rbi/prism/node.rbi +359 -321
- data/rbi/prism/parse_result.rbi +85 -34
- data/rbi/prism/reflection.rbi +7 -13
- data/rbi/prism/translation/ripper.rbi +1 -11
- data/rbi/prism.rbi +9 -9
- data/sig/prism/dsl.rbs +3 -3
- data/sig/prism/inspect_visitor.rbs +22 -0
- data/sig/prism/node.rbs +68 -48
- data/sig/prism/parse_result.rbs +42 -10
- data/sig/prism/reflection.rbs +2 -8
- data/sig/prism/serialize.rbs +2 -3
- data/sig/prism.rbs +9 -9
- data/src/diagnostic.c +44 -24
- data/src/node.c +41 -16
- data/src/options.c +2 -2
- data/src/prettyprint.c +61 -18
- data/src/prism.c +623 -188
- data/src/serialize.c +5 -2
- data/src/static_literals.c +120 -34
- data/src/token_type.c +4 -4
- data/src/util/pm_integer.c +9 -2
- metadata +7 -9
- data/lib/prism/node_inspector.rb +0 -68
- data/lib/prism/polyfill/string.rb +0 -12
- data/rbi/prism/desugar_compiler.rbi +0 -5
- data/rbi/prism/mutation_compiler.rbi +0 -5
- data/rbi/prism/translation/parser/compiler.rbi +0 -13
- data/rbi/prism/translation/ripper/ripper_compiler.rbi +0 -5
- data/rbi/prism/translation/ruby_parser.rbi +0 -11
data/src/prism.c
CHANGED
@@ -672,6 +672,26 @@ pm_parser_warn_node(pm_parser_t *parser, const pm_node_t *node, pm_diagnostic_id
|
|
672
672
|
#define PM_PARSER_WARN_NODE_FORMAT(parser, node, diag_id, ...) \
|
673
673
|
PM_PARSER_WARN_FORMAT(parser, (node)->location.start, (node)->location.end, diag_id, __VA_ARGS__)
|
674
674
|
|
675
|
+
/**
|
676
|
+
* Add an error for an expected heredoc terminator. This is a special function
|
677
|
+
* only because it grabs its location off of a lex mode instead of a node or a
|
678
|
+
* token.
|
679
|
+
*/
|
680
|
+
static void
|
681
|
+
pm_parser_err_heredoc_term(pm_parser_t *parser, pm_lex_mode_t *lex_mode) {
|
682
|
+
const uint8_t *ident_start = lex_mode->as.heredoc.ident_start;
|
683
|
+
size_t ident_length = lex_mode->as.heredoc.ident_length;
|
684
|
+
|
685
|
+
PM_PARSER_ERR_FORMAT(
|
686
|
+
parser,
|
687
|
+
ident_start,
|
688
|
+
ident_start + ident_length,
|
689
|
+
PM_ERR_HEREDOC_TERM,
|
690
|
+
(int) ident_length,
|
691
|
+
(const char *) ident_start
|
692
|
+
);
|
693
|
+
}
|
694
|
+
|
675
695
|
/******************************************************************************/
|
676
696
|
/* Scope-related functions */
|
677
697
|
/******************************************************************************/
|
@@ -1405,7 +1425,7 @@ pm_conditional_predicate_warn_write_literal_p(const pm_node_t *node) {
|
|
1405
1425
|
static inline void
|
1406
1426
|
pm_conditional_predicate_warn_write_literal(pm_parser_t *parser, const pm_node_t *node) {
|
1407
1427
|
if (pm_conditional_predicate_warn_write_literal_p(node)) {
|
1408
|
-
pm_parser_warn_node(parser, node, parser->version ==
|
1428
|
+
pm_parser_warn_node(parser, node, parser->version == PM_OPTIONS_VERSION_CRUBY_3_3 ? PM_WARN_EQUAL_IN_CONDITIONAL_3_3 : PM_WARN_EQUAL_IN_CONDITIONAL);
|
1409
1429
|
}
|
1410
1430
|
}
|
1411
1431
|
|
@@ -2923,6 +2943,29 @@ pm_call_and_write_node_create(pm_parser_t *parser, pm_call_node_t *target, const
|
|
2923
2943
|
return node;
|
2924
2944
|
}
|
2925
2945
|
|
2946
|
+
/**
|
2947
|
+
* Validate that index expressions do not have keywords or blocks if we are
|
2948
|
+
* parsing as Ruby 3.4+.
|
2949
|
+
*/
|
2950
|
+
static void
|
2951
|
+
pm_index_arguments_check(pm_parser_t *parser, const pm_arguments_node_t *arguments, const pm_node_t *block) {
|
2952
|
+
if (parser->version != PM_OPTIONS_VERSION_CRUBY_3_3) {
|
2953
|
+
if (arguments != NULL && PM_NODE_FLAG_P(arguments, PM_ARGUMENTS_NODE_FLAGS_CONTAINS_KEYWORDS)) {
|
2954
|
+
pm_node_t *node;
|
2955
|
+
PM_NODE_LIST_FOREACH(&arguments->arguments, index, node) {
|
2956
|
+
if (PM_NODE_TYPE_P(node, PM_KEYWORD_HASH_NODE)) {
|
2957
|
+
pm_parser_err_node(parser, node, PM_ERR_UNEXPECTED_INDEX_KEYWORDS);
|
2958
|
+
break;
|
2959
|
+
}
|
2960
|
+
}
|
2961
|
+
}
|
2962
|
+
|
2963
|
+
if (block != NULL) {
|
2964
|
+
pm_parser_err_node(parser, block, PM_ERR_UNEXPECTED_INDEX_BLOCK);
|
2965
|
+
}
|
2966
|
+
}
|
2967
|
+
}
|
2968
|
+
|
2926
2969
|
/**
|
2927
2970
|
* Allocate and initialize a new IndexAndWriteNode node.
|
2928
2971
|
*/
|
@@ -2931,6 +2974,8 @@ pm_index_and_write_node_create(pm_parser_t *parser, pm_call_node_t *target, cons
|
|
2931
2974
|
assert(operator->type == PM_TOKEN_AMPERSAND_AMPERSAND_EQUAL);
|
2932
2975
|
pm_index_and_write_node_t *node = PM_ALLOC_NODE(parser, pm_index_and_write_node_t);
|
2933
2976
|
|
2977
|
+
pm_index_arguments_check(parser, target->arguments, target->block);
|
2978
|
+
|
2934
2979
|
*node = (pm_index_and_write_node_t) {
|
2935
2980
|
{
|
2936
2981
|
.type = PM_INDEX_AND_WRITE_NODE,
|
@@ -3002,6 +3047,8 @@ static pm_index_operator_write_node_t *
|
|
3002
3047
|
pm_index_operator_write_node_create(pm_parser_t *parser, pm_call_node_t *target, const pm_token_t *operator, pm_node_t *value) {
|
3003
3048
|
pm_index_operator_write_node_t *node = PM_ALLOC_NODE(parser, pm_index_operator_write_node_t);
|
3004
3049
|
|
3050
|
+
pm_index_arguments_check(parser, target->arguments, target->block);
|
3051
|
+
|
3005
3052
|
*node = (pm_index_operator_write_node_t) {
|
3006
3053
|
{
|
3007
3054
|
.type = PM_INDEX_OPERATOR_WRITE_NODE,
|
@@ -3075,6 +3122,8 @@ pm_index_or_write_node_create(pm_parser_t *parser, pm_call_node_t *target, const
|
|
3075
3122
|
assert(operator->type == PM_TOKEN_PIPE_PIPE_EQUAL);
|
3076
3123
|
pm_index_or_write_node_t *node = PM_ALLOC_NODE(parser, pm_index_or_write_node_t);
|
3077
3124
|
|
3125
|
+
pm_index_arguments_check(parser, target->arguments, target->block);
|
3126
|
+
|
3078
3127
|
*node = (pm_index_or_write_node_t) {
|
3079
3128
|
{
|
3080
3129
|
.type = PM_INDEX_OR_WRITE_NODE,
|
@@ -3139,6 +3188,8 @@ pm_index_target_node_create(pm_parser_t *parser, pm_call_node_t *target) {
|
|
3139
3188
|
pm_index_target_node_t *node = PM_ALLOC_NODE(parser, pm_index_target_node_t);
|
3140
3189
|
pm_node_flags_t flags = target->base.flags;
|
3141
3190
|
|
3191
|
+
pm_index_arguments_check(parser, target->arguments, target->block);
|
3192
|
+
|
3142
3193
|
*node = (pm_index_target_node_t) {
|
3143
3194
|
{
|
3144
3195
|
.type = PM_INDEX_TARGET_NODE,
|
@@ -3510,22 +3561,27 @@ pm_constant_path_or_write_node_create(pm_parser_t *parser, pm_constant_path_node
|
|
3510
3561
|
* Allocate and initialize a new ConstantPathNode node.
|
3511
3562
|
*/
|
3512
3563
|
static pm_constant_path_node_t *
|
3513
|
-
pm_constant_path_node_create(pm_parser_t *parser, pm_node_t *parent, const pm_token_t *delimiter,
|
3564
|
+
pm_constant_path_node_create(pm_parser_t *parser, pm_node_t *parent, const pm_token_t *delimiter, const pm_token_t *name_token) {
|
3514
3565
|
pm_assert_value_expression(parser, parent);
|
3515
|
-
|
3516
3566
|
pm_constant_path_node_t *node = PM_ALLOC_NODE(parser, pm_constant_path_node_t);
|
3517
3567
|
|
3568
|
+
pm_constant_id_t name = PM_CONSTANT_ID_UNSET;
|
3569
|
+
if (name_token->type == PM_TOKEN_CONSTANT) {
|
3570
|
+
name = pm_parser_constant_id_token(parser, name_token);
|
3571
|
+
}
|
3572
|
+
|
3518
3573
|
*node = (pm_constant_path_node_t) {
|
3519
3574
|
{
|
3520
3575
|
.type = PM_CONSTANT_PATH_NODE,
|
3521
3576
|
.location = {
|
3522
3577
|
.start = parent == NULL ? delimiter->start : parent->location.start,
|
3523
|
-
.end =
|
3578
|
+
.end = name_token->end
|
3524
3579
|
},
|
3525
3580
|
},
|
3526
3581
|
.parent = parent,
|
3527
|
-
.
|
3528
|
-
.delimiter_loc = PM_LOCATION_TOKEN_VALUE(delimiter)
|
3582
|
+
.name = name,
|
3583
|
+
.delimiter_loc = PM_LOCATION_TOKEN_VALUE(delimiter),
|
3584
|
+
.name_loc = PM_LOCATION_TOKEN_VALUE(name_token)
|
3529
3585
|
};
|
3530
3586
|
|
3531
3587
|
return node;
|
@@ -3716,6 +3772,113 @@ pm_def_node_receiver_check(pm_parser_t *parser, const pm_node_t *node) {
|
|
3716
3772
|
}
|
3717
3773
|
}
|
3718
3774
|
|
3775
|
+
/**
|
3776
|
+
* When a method body is created, we want to check if the last statement is a
|
3777
|
+
* return or a statement that houses a return. If it is, then we want to mark
|
3778
|
+
* that return as being redundant so that we can compile it differently but also
|
3779
|
+
* so that we can indicate that to the user.
|
3780
|
+
*/
|
3781
|
+
static void
|
3782
|
+
pm_def_node_body_redundant_return(pm_node_t *node) {
|
3783
|
+
switch (PM_NODE_TYPE(node)) {
|
3784
|
+
case PM_RETURN_NODE:
|
3785
|
+
node->flags |= PM_RETURN_NODE_FLAGS_REDUNDANT;
|
3786
|
+
break;
|
3787
|
+
case PM_BEGIN_NODE: {
|
3788
|
+
pm_begin_node_t *cast = (pm_begin_node_t *) node;
|
3789
|
+
|
3790
|
+
if (cast->statements != NULL && cast->else_clause == NULL) {
|
3791
|
+
pm_def_node_body_redundant_return((pm_node_t *) cast->statements);
|
3792
|
+
}
|
3793
|
+
break;
|
3794
|
+
}
|
3795
|
+
case PM_STATEMENTS_NODE: {
|
3796
|
+
pm_statements_node_t *cast = (pm_statements_node_t *) node;
|
3797
|
+
|
3798
|
+
if (cast->body.size > 0) {
|
3799
|
+
pm_def_node_body_redundant_return(cast->body.nodes[cast->body.size - 1]);
|
3800
|
+
}
|
3801
|
+
break;
|
3802
|
+
}
|
3803
|
+
case PM_IF_NODE: {
|
3804
|
+
pm_if_node_t *cast = (pm_if_node_t *) node;
|
3805
|
+
|
3806
|
+
if (cast->statements != NULL) {
|
3807
|
+
pm_def_node_body_redundant_return((pm_node_t *) cast->statements);
|
3808
|
+
}
|
3809
|
+
|
3810
|
+
if (cast->consequent != NULL) {
|
3811
|
+
pm_def_node_body_redundant_return(cast->consequent);
|
3812
|
+
}
|
3813
|
+
break;
|
3814
|
+
}
|
3815
|
+
case PM_UNLESS_NODE: {
|
3816
|
+
pm_unless_node_t *cast = (pm_unless_node_t *) node;
|
3817
|
+
|
3818
|
+
if (cast->statements != NULL) {
|
3819
|
+
pm_def_node_body_redundant_return((pm_node_t *) cast->statements);
|
3820
|
+
}
|
3821
|
+
|
3822
|
+
if (cast->consequent != NULL) {
|
3823
|
+
pm_def_node_body_redundant_return((pm_node_t *) cast->consequent);
|
3824
|
+
}
|
3825
|
+
break;
|
3826
|
+
}
|
3827
|
+
case PM_ELSE_NODE: {
|
3828
|
+
pm_else_node_t *cast = (pm_else_node_t *) node;
|
3829
|
+
|
3830
|
+
if (cast->statements != NULL) {
|
3831
|
+
pm_def_node_body_redundant_return((pm_node_t *) cast->statements);
|
3832
|
+
}
|
3833
|
+
break;
|
3834
|
+
}
|
3835
|
+
case PM_CASE_NODE: {
|
3836
|
+
pm_case_node_t *cast = (pm_case_node_t *) node;
|
3837
|
+
pm_node_t *condition;
|
3838
|
+
|
3839
|
+
PM_NODE_LIST_FOREACH(&cast->conditions, index, condition) {
|
3840
|
+
pm_def_node_body_redundant_return(condition);
|
3841
|
+
}
|
3842
|
+
|
3843
|
+
if (cast->consequent != NULL) {
|
3844
|
+
pm_def_node_body_redundant_return((pm_node_t *) cast->consequent);
|
3845
|
+
}
|
3846
|
+
break;
|
3847
|
+
}
|
3848
|
+
case PM_WHEN_NODE: {
|
3849
|
+
pm_when_node_t *cast = (pm_when_node_t *) node;
|
3850
|
+
|
3851
|
+
if (cast->statements != NULL) {
|
3852
|
+
pm_def_node_body_redundant_return((pm_node_t *) cast->statements);
|
3853
|
+
}
|
3854
|
+
break;
|
3855
|
+
}
|
3856
|
+
case PM_CASE_MATCH_NODE: {
|
3857
|
+
pm_case_match_node_t *cast = (pm_case_match_node_t *) node;
|
3858
|
+
pm_node_t *condition;
|
3859
|
+
|
3860
|
+
PM_NODE_LIST_FOREACH(&cast->conditions, index, condition) {
|
3861
|
+
pm_def_node_body_redundant_return(condition);
|
3862
|
+
}
|
3863
|
+
|
3864
|
+
if (cast->consequent != NULL) {
|
3865
|
+
pm_def_node_body_redundant_return((pm_node_t *) cast->consequent);
|
3866
|
+
}
|
3867
|
+
break;
|
3868
|
+
}
|
3869
|
+
case PM_IN_NODE: {
|
3870
|
+
pm_in_node_t *cast = (pm_in_node_t *) node;
|
3871
|
+
|
3872
|
+
if (cast->statements != NULL) {
|
3873
|
+
pm_def_node_body_redundant_return((pm_node_t *) cast->statements);
|
3874
|
+
}
|
3875
|
+
break;
|
3876
|
+
}
|
3877
|
+
default:
|
3878
|
+
break;
|
3879
|
+
}
|
3880
|
+
}
|
3881
|
+
|
3719
3882
|
/**
|
3720
3883
|
* Allocate and initialize a new DefNode node.
|
3721
3884
|
*/
|
@@ -3748,6 +3911,10 @@ pm_def_node_create(
|
|
3748
3911
|
pm_def_node_receiver_check(parser, receiver);
|
3749
3912
|
}
|
3750
3913
|
|
3914
|
+
if (body != NULL) {
|
3915
|
+
pm_def_node_body_redundant_return(body);
|
3916
|
+
}
|
3917
|
+
|
3751
3918
|
*node = (pm_def_node_t) {
|
3752
3919
|
{
|
3753
3920
|
.type = PM_DEF_NODE,
|
@@ -4922,6 +5089,50 @@ pm_instance_variable_write_node_create(pm_parser_t *parser, pm_instance_variable
|
|
4922
5089
|
return node;
|
4923
5090
|
}
|
4924
5091
|
|
5092
|
+
/**
|
5093
|
+
* Append a part into a list of string parts. Importantly this handles nested
|
5094
|
+
* interpolated strings by not necessarily removing the marker for static
|
5095
|
+
* literals.
|
5096
|
+
*/
|
5097
|
+
static void
|
5098
|
+
pm_interpolated_node_append(pm_node_t *node, pm_node_list_t *parts, pm_node_t *part) {
|
5099
|
+
switch (PM_NODE_TYPE(part)) {
|
5100
|
+
case PM_STRING_NODE:
|
5101
|
+
pm_node_flag_set(part, PM_NODE_FLAG_STATIC_LITERAL | PM_STRING_FLAGS_FROZEN);
|
5102
|
+
break;
|
5103
|
+
case PM_EMBEDDED_STATEMENTS_NODE: {
|
5104
|
+
pm_embedded_statements_node_t *cast = (pm_embedded_statements_node_t *) part;
|
5105
|
+
pm_node_t *embedded = (cast->statements != NULL && cast->statements->body.size == 1) ? cast->statements->body.nodes[0] : NULL;
|
5106
|
+
|
5107
|
+
if (embedded == NULL) {
|
5108
|
+
// If there are no statements or more than one statement, then
|
5109
|
+
// we lose the static literal flag.
|
5110
|
+
pm_node_flag_unset(node, PM_NODE_FLAG_STATIC_LITERAL);
|
5111
|
+
} else if (PM_NODE_TYPE_P(embedded, PM_STRING_NODE)) {
|
5112
|
+
// If the embedded statement is a string, then we can keep the
|
5113
|
+
// static literal flag and mark the string as frozen.
|
5114
|
+
pm_node_flag_set(embedded, PM_NODE_FLAG_STATIC_LITERAL | PM_STRING_FLAGS_FROZEN);
|
5115
|
+
} else if (PM_NODE_TYPE_P(embedded, PM_INTERPOLATED_STRING_NODE) && PM_NODE_FLAG_P(embedded, PM_NODE_FLAG_STATIC_LITERAL)) {
|
5116
|
+
// If the embedded statement is an interpolated string and it's
|
5117
|
+
// a static literal, then we can keep the static literal flag.
|
5118
|
+
} else {
|
5119
|
+
// Otherwise we lose the static literal flag.
|
5120
|
+
pm_node_flag_unset(node, PM_NODE_FLAG_STATIC_LITERAL);
|
5121
|
+
}
|
5122
|
+
|
5123
|
+
break;
|
5124
|
+
}
|
5125
|
+
case PM_EMBEDDED_VARIABLE_NODE:
|
5126
|
+
pm_node_flag_unset((pm_node_t *) node, PM_NODE_FLAG_STATIC_LITERAL);
|
5127
|
+
break;
|
5128
|
+
default:
|
5129
|
+
assert(false && "unexpected node type");
|
5130
|
+
break;
|
5131
|
+
}
|
5132
|
+
|
5133
|
+
pm_node_list_append(parts, part);
|
5134
|
+
}
|
5135
|
+
|
4925
5136
|
/**
|
4926
5137
|
* Allocate a new InterpolatedRegularExpressionNode node.
|
4927
5138
|
*/
|
@@ -4955,54 +5166,113 @@ pm_interpolated_regular_expression_node_append(pm_interpolated_regular_expressio
|
|
4955
5166
|
node->base.location.end = part->location.end;
|
4956
5167
|
}
|
4957
5168
|
|
4958
|
-
|
4959
|
-
pm_node_flag_set(part, PM_NODE_FLAG_STATIC_LITERAL | PM_STRING_FLAGS_FROZEN);
|
4960
|
-
}
|
4961
|
-
|
4962
|
-
if (!PM_NODE_FLAG_P(part, PM_NODE_FLAG_STATIC_LITERAL)) {
|
4963
|
-
pm_node_flag_unset((pm_node_t *) node, PM_NODE_FLAG_STATIC_LITERAL);
|
4964
|
-
}
|
4965
|
-
|
4966
|
-
pm_node_list_append(&node->parts, part);
|
5169
|
+
pm_interpolated_node_append((pm_node_t *) node, &node->parts, part);
|
4967
5170
|
}
|
4968
5171
|
|
4969
5172
|
static inline void
|
4970
5173
|
pm_interpolated_regular_expression_node_closing_set(pm_parser_t *parser, pm_interpolated_regular_expression_node_t *node, const pm_token_t *closing) {
|
4971
5174
|
node->closing_loc = PM_LOCATION_TOKEN_VALUE(closing);
|
4972
5175
|
node->base.location.end = closing->end;
|
4973
|
-
pm_node_flag_set((pm_node_t *)node, pm_regular_expression_flags_create(parser, closing));
|
5176
|
+
pm_node_flag_set((pm_node_t *) node, pm_regular_expression_flags_create(parser, closing));
|
4974
5177
|
}
|
4975
5178
|
|
4976
5179
|
/**
|
4977
5180
|
* Append a part to an InterpolatedStringNode node.
|
5181
|
+
*
|
5182
|
+
* This has some somewhat complicated semantics, because we need to update
|
5183
|
+
* multiple flags that have somewhat confusing interactions.
|
5184
|
+
*
|
5185
|
+
* PM_NODE_FLAG_STATIC_LITERAL indicates that the node should be treated as a
|
5186
|
+
* single static literal string that can be pushed onto the stack on its own.
|
5187
|
+
* Note that this doesn't necessarily mean that the string will be frozen or
|
5188
|
+
* not; the instructions in CRuby will be either putobject or putstring,
|
5189
|
+
* depending on the combination of `--enable-frozen-string-literal`,
|
5190
|
+
* `# frozen_string_literal: true`, and whether or not there is interpolation.
|
5191
|
+
*
|
5192
|
+
* PM_INTERPOLATED_STRING_NODE_FLAGS_FROZEN indicates that the string should be
|
5193
|
+
* explicitly frozen. This will only happen if the string is comprised entirely
|
5194
|
+
* of string parts that are themselves static literals and frozen.
|
5195
|
+
*
|
5196
|
+
* PM_INTERPOLATED_STRING_NODE_FLAGS_MUTABLE indicates that the string should
|
5197
|
+
* be explicitly marked as mutable. This will happen from
|
5198
|
+
* `--disable-frozen-string-literal` or `# frozen_string_literal: false`. This
|
5199
|
+
* is necessary to indicate that the string should be left up to the runtime,
|
5200
|
+
* which could potentially use a chilled string otherwise.
|
4978
5201
|
*/
|
4979
5202
|
static inline void
|
4980
|
-
pm_interpolated_string_node_append(
|
5203
|
+
pm_interpolated_string_node_append(pm_interpolated_string_node_t *node, pm_node_t *part) {
|
5204
|
+
#define CLEAR_FLAGS(node) \
|
5205
|
+
node->base.flags = (pm_node_flags_t) (node->base.flags & ~(PM_NODE_FLAG_STATIC_LITERAL | PM_INTERPOLATED_STRING_NODE_FLAGS_FROZEN | PM_INTERPOLATED_STRING_NODE_FLAGS_MUTABLE))
|
5206
|
+
|
5207
|
+
#define MUTABLE_FLAGS(node) \
|
5208
|
+
node->base.flags = (pm_node_flags_t) ((node->base.flags | PM_INTERPOLATED_STRING_NODE_FLAGS_MUTABLE) & ~PM_INTERPOLATED_STRING_NODE_FLAGS_FROZEN);
|
5209
|
+
|
4981
5210
|
if (node->parts.size == 0 && node->opening_loc.start == NULL) {
|
4982
5211
|
node->base.location.start = part->location.start;
|
4983
5212
|
}
|
4984
5213
|
|
4985
|
-
|
4986
|
-
|
4987
|
-
|
5214
|
+
node->base.location.end = MAX(node->base.location.end, part->location.end);
|
5215
|
+
|
5216
|
+
switch (PM_NODE_TYPE(part)) {
|
5217
|
+
case PM_STRING_NODE:
|
5218
|
+
pm_node_flag_set(part, PM_NODE_FLAG_STATIC_LITERAL | PM_STRING_FLAGS_FROZEN);
|
5219
|
+
break;
|
5220
|
+
case PM_INTERPOLATED_STRING_NODE:
|
5221
|
+
if (PM_NODE_FLAG_P(part, PM_NODE_FLAG_STATIC_LITERAL)) {
|
5222
|
+
// If the string that we're concatenating is a static literal,
|
5223
|
+
// then we can keep the static literal flag for this string.
|
5224
|
+
} else {
|
5225
|
+
// Otherwise, we lose the static literal flag here and we should
|
5226
|
+
// also clear the mutability flags.
|
5227
|
+
CLEAR_FLAGS(node);
|
5228
|
+
}
|
5229
|
+
break;
|
5230
|
+
case PM_EMBEDDED_STATEMENTS_NODE: {
|
5231
|
+
pm_embedded_statements_node_t *cast = (pm_embedded_statements_node_t *) part;
|
5232
|
+
pm_node_t *embedded = (cast->statements != NULL && cast->statements->body.size == 1) ? cast->statements->body.nodes[0] : NULL;
|
5233
|
+
|
5234
|
+
if (embedded == NULL) {
|
5235
|
+
// If we're embedding multiple statements or no statements, then
|
5236
|
+
// the string is not longer a static literal.
|
5237
|
+
CLEAR_FLAGS(node);
|
5238
|
+
} else if (PM_NODE_TYPE_P(embedded, PM_STRING_NODE)) {
|
5239
|
+
// If the embedded statement is a string, then we can make that
|
5240
|
+
// string as frozen and static literal, and not touch the static
|
5241
|
+
// literal status of this string.
|
5242
|
+
pm_node_flag_set(embedded, PM_NODE_FLAG_STATIC_LITERAL | PM_STRING_FLAGS_FROZEN);
|
5243
|
+
|
5244
|
+
if (PM_NODE_FLAG_P(node, PM_NODE_FLAG_STATIC_LITERAL)) {
|
5245
|
+
MUTABLE_FLAGS(node);
|
5246
|
+
}
|
5247
|
+
} else if (PM_NODE_TYPE_P(embedded, PM_INTERPOLATED_STRING_NODE) && PM_NODE_FLAG_P(embedded, PM_NODE_FLAG_STATIC_LITERAL)) {
|
5248
|
+
// If the embedded statement is an interpolated string, but that
|
5249
|
+
// string is marked as static literal, then we can keep our
|
5250
|
+
// static literal status for this string.
|
5251
|
+
if (PM_NODE_FLAG_P(node, PM_NODE_FLAG_STATIC_LITERAL)) {
|
5252
|
+
MUTABLE_FLAGS(node);
|
5253
|
+
}
|
5254
|
+
} else {
|
5255
|
+
// In all other cases, we lose the static literal flag here and
|
5256
|
+
// become mutable.
|
5257
|
+
CLEAR_FLAGS(node);
|
5258
|
+
}
|
4988
5259
|
|
4989
|
-
|
4990
|
-
|
5260
|
+
break;
|
5261
|
+
}
|
5262
|
+
case PM_EMBEDDED_VARIABLE_NODE:
|
5263
|
+
// Embedded variables clear static literal, which means we also
|
5264
|
+
// should clear the mutability flags.
|
5265
|
+
CLEAR_FLAGS(node);
|
5266
|
+
break;
|
5267
|
+
default:
|
5268
|
+
assert(false && "unexpected node type");
|
5269
|
+
break;
|
4991
5270
|
}
|
4992
5271
|
|
4993
5272
|
pm_node_list_append(&node->parts, part);
|
4994
|
-
node->base.location.end = MAX(node->base.location.end, part->location.end);
|
4995
5273
|
|
4996
|
-
|
4997
|
-
|
4998
|
-
case PM_OPTIONS_FROZEN_STRING_LITERAL_DISABLED:
|
4999
|
-
pm_node_flag_set((pm_node_t *) node, PM_INTERPOLATED_STRING_NODE_FLAGS_MUTABLE);
|
5000
|
-
break;
|
5001
|
-
case PM_OPTIONS_FROZEN_STRING_LITERAL_ENABLED:
|
5002
|
-
pm_node_flag_set((pm_node_t *) node, PM_INTERPOLATED_STRING_NODE_FLAGS_FROZEN);
|
5003
|
-
break;
|
5004
|
-
}
|
5005
|
-
}
|
5274
|
+
#undef CLEAR_FLAGS
|
5275
|
+
#undef MUTABLE_FLAGS
|
5006
5276
|
}
|
5007
5277
|
|
5008
5278
|
/**
|
@@ -5011,11 +5281,21 @@ pm_interpolated_string_node_append(pm_parser_t *parser, pm_interpolated_string_n
|
|
5011
5281
|
static pm_interpolated_string_node_t *
|
5012
5282
|
pm_interpolated_string_node_create(pm_parser_t *parser, const pm_token_t *opening, const pm_node_list_t *parts, const pm_token_t *closing) {
|
5013
5283
|
pm_interpolated_string_node_t *node = PM_ALLOC_NODE(parser, pm_interpolated_string_node_t);
|
5284
|
+
pm_node_flags_t flags = PM_NODE_FLAG_STATIC_LITERAL;
|
5285
|
+
|
5286
|
+
switch (parser->frozen_string_literal) {
|
5287
|
+
case PM_OPTIONS_FROZEN_STRING_LITERAL_DISABLED:
|
5288
|
+
flags |= PM_INTERPOLATED_STRING_NODE_FLAGS_MUTABLE;
|
5289
|
+
break;
|
5290
|
+
case PM_OPTIONS_FROZEN_STRING_LITERAL_ENABLED:
|
5291
|
+
flags |= PM_INTERPOLATED_STRING_NODE_FLAGS_FROZEN;
|
5292
|
+
break;
|
5293
|
+
}
|
5014
5294
|
|
5015
5295
|
*node = (pm_interpolated_string_node_t) {
|
5016
5296
|
{
|
5017
5297
|
.type = PM_INTERPOLATED_STRING_NODE,
|
5018
|
-
.flags =
|
5298
|
+
.flags = flags,
|
5019
5299
|
.location = {
|
5020
5300
|
.start = opening->start,
|
5021
5301
|
.end = closing->end,
|
@@ -5029,7 +5309,7 @@ pm_interpolated_string_node_create(pm_parser_t *parser, const pm_token_t *openin
|
|
5029
5309
|
if (parts != NULL) {
|
5030
5310
|
pm_node_t *part;
|
5031
5311
|
PM_NODE_LIST_FOREACH(parts, index, part) {
|
5032
|
-
pm_interpolated_string_node_append(
|
5312
|
+
pm_interpolated_string_node_append(node, part);
|
5033
5313
|
}
|
5034
5314
|
}
|
5035
5315
|
|
@@ -5051,15 +5331,7 @@ pm_interpolated_symbol_node_append(pm_interpolated_symbol_node_t *node, pm_node_
|
|
5051
5331
|
node->base.location.start = part->location.start;
|
5052
5332
|
}
|
5053
5333
|
|
5054
|
-
|
5055
|
-
pm_node_flag_set(part, PM_NODE_FLAG_STATIC_LITERAL | PM_STRING_FLAGS_FROZEN);
|
5056
|
-
}
|
5057
|
-
|
5058
|
-
if (!PM_NODE_FLAG_P(part, PM_NODE_FLAG_STATIC_LITERAL)) {
|
5059
|
-
pm_node_flag_unset((pm_node_t *) node, PM_NODE_FLAG_STATIC_LITERAL);
|
5060
|
-
}
|
5061
|
-
|
5062
|
-
pm_node_list_append(&node->parts, part);
|
5334
|
+
pm_interpolated_node_append((pm_node_t *) node, &node->parts, part);
|
5063
5335
|
node->base.location.end = MAX(node->base.location.end, part->location.end);
|
5064
5336
|
}
|
5065
5337
|
|
@@ -5125,11 +5397,7 @@ pm_interpolated_xstring_node_create(pm_parser_t *parser, const pm_token_t *openi
|
|
5125
5397
|
|
5126
5398
|
static inline void
|
5127
5399
|
pm_interpolated_xstring_node_append(pm_interpolated_x_string_node_t *node, pm_node_t *part) {
|
5128
|
-
|
5129
|
-
pm_node_flag_set(part, PM_NODE_FLAG_STATIC_LITERAL | PM_STRING_FLAGS_FROZEN);
|
5130
|
-
}
|
5131
|
-
|
5132
|
-
pm_node_list_append(&node->parts, part);
|
5400
|
+
pm_interpolated_node_append((pm_node_t *) node, &node->parts, part);
|
5133
5401
|
node->base.location.end = part->location.end;
|
5134
5402
|
}
|
5135
5403
|
|
@@ -6397,6 +6665,7 @@ pm_return_node_create(pm_parser_t *parser, const pm_token_t *keyword, pm_argumen
|
|
6397
6665
|
*node = (pm_return_node_t) {
|
6398
6666
|
{
|
6399
6667
|
.type = PM_RETURN_NODE,
|
6668
|
+
.flags = 0,
|
6400
6669
|
.location = {
|
6401
6670
|
.start = keyword->start,
|
6402
6671
|
.end = (arguments == NULL ? keyword->end : arguments->base.location.end)
|
@@ -6729,7 +6998,8 @@ pm_super_node_create(pm_parser_t *parser, const pm_token_t *keyword, pm_argument
|
|
6729
6998
|
}
|
6730
6999
|
|
6731
7000
|
/**
|
6732
|
-
* Read through the contents of a string and check if it consists solely of
|
7001
|
+
* Read through the contents of a string and check if it consists solely of
|
7002
|
+
* US-ASCII code points.
|
6733
7003
|
*/
|
6734
7004
|
static bool
|
6735
7005
|
pm_ascii_only_p(const pm_string_t *contents) {
|
@@ -6743,27 +7013,72 @@ pm_ascii_only_p(const pm_string_t *contents) {
|
|
6743
7013
|
return true;
|
6744
7014
|
}
|
6745
7015
|
|
7016
|
+
/**
|
7017
|
+
* Validate that the contents of the given symbol are all valid UTF-8.
|
7018
|
+
*/
|
7019
|
+
static void
|
7020
|
+
parse_symbol_encoding_validate_utf8(pm_parser_t *parser, const pm_token_t *location, const pm_string_t *contents) {
|
7021
|
+
for (const uint8_t *cursor = pm_string_source(contents), *end = cursor + pm_string_length(contents); cursor < end;) {
|
7022
|
+
size_t width = pm_encoding_utf_8_char_width(cursor, end - cursor);
|
7023
|
+
|
7024
|
+
if (width == 0) {
|
7025
|
+
pm_parser_err(parser, location->start, location->end, PM_ERR_INVALID_SYMBOL);
|
7026
|
+
break;
|
7027
|
+
}
|
7028
|
+
|
7029
|
+
cursor += width;
|
7030
|
+
}
|
7031
|
+
}
|
7032
|
+
|
7033
|
+
/**
|
7034
|
+
* Validate that the contents of the given symbol are all valid in the encoding
|
7035
|
+
* of the parser.
|
7036
|
+
*/
|
7037
|
+
static void
|
7038
|
+
parse_symbol_encoding_validate_other(pm_parser_t *parser, const pm_token_t *location, const pm_string_t *contents) {
|
7039
|
+
const pm_encoding_t *encoding = parser->encoding;
|
7040
|
+
|
7041
|
+
for (const uint8_t *cursor = pm_string_source(contents), *end = cursor + pm_string_length(contents); cursor < end;) {
|
7042
|
+
size_t width = encoding->char_width(cursor, end - cursor);
|
7043
|
+
|
7044
|
+
if (width == 0) {
|
7045
|
+
pm_parser_err(parser, location->start, location->end, PM_ERR_INVALID_SYMBOL);
|
7046
|
+
break;
|
7047
|
+
}
|
7048
|
+
|
7049
|
+
cursor += width;
|
7050
|
+
}
|
7051
|
+
}
|
7052
|
+
|
6746
7053
|
/**
|
6747
7054
|
* Ruby "downgrades" the encoding of Symbols to US-ASCII if the associated
|
6748
7055
|
* encoding is ASCII-compatible and the Symbol consists only of US-ASCII code
|
6749
7056
|
* points. Otherwise, the encoding may be explicitly set with an escape
|
6750
7057
|
* sequence.
|
7058
|
+
*
|
7059
|
+
* If the validate flag is set, then it will check the contents of the symbol
|
7060
|
+
* to ensure that all characters are valid in the encoding.
|
6751
7061
|
*/
|
6752
7062
|
static inline pm_node_flags_t
|
6753
|
-
parse_symbol_encoding(
|
7063
|
+
parse_symbol_encoding(pm_parser_t *parser, const pm_token_t *location, const pm_string_t *contents, bool validate) {
|
6754
7064
|
if (parser->explicit_encoding != NULL) {
|
6755
7065
|
// A Symbol may optionally have its encoding explicitly set. This will
|
6756
7066
|
// happen if an escape sequence results in a non-ASCII code point.
|
6757
7067
|
if (parser->explicit_encoding == PM_ENCODING_UTF_8_ENTRY) {
|
7068
|
+
if (validate) parse_symbol_encoding_validate_utf8(parser, location, contents);
|
6758
7069
|
return PM_SYMBOL_FLAGS_FORCED_UTF8_ENCODING;
|
6759
7070
|
} else if (parser->encoding == PM_ENCODING_US_ASCII_ENTRY) {
|
6760
7071
|
return PM_SYMBOL_FLAGS_FORCED_BINARY_ENCODING;
|
7072
|
+
} else if (validate) {
|
7073
|
+
parse_symbol_encoding_validate_other(parser, location, contents);
|
6761
7074
|
}
|
6762
7075
|
} else if (pm_ascii_only_p(contents)) {
|
6763
7076
|
// Ruby stipulates that all source files must use an ASCII-compatible
|
6764
7077
|
// encoding. Thus, all symbols appearing in source are eligible for
|
6765
7078
|
// "downgrading" to US-ASCII.
|
6766
7079
|
return PM_SYMBOL_FLAGS_FORCED_US_ASCII_ENCODING;
|
7080
|
+
} else if (validate) {
|
7081
|
+
parse_symbol_encoding_validate_other(parser, location, contents);
|
6767
7082
|
}
|
6768
7083
|
|
6769
7084
|
return 0;
|
@@ -6931,7 +7246,7 @@ pm_symbol_node_create(pm_parser_t *parser, const pm_token_t *opening, const pm_t
|
|
6931
7246
|
*/
|
6932
7247
|
static pm_symbol_node_t *
|
6933
7248
|
pm_symbol_node_create_current_string(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *value, const pm_token_t *closing) {
|
6934
|
-
pm_symbol_node_t *node = pm_symbol_node_create_unescaped(parser, opening, value, closing, &parser->current_string, parse_symbol_encoding(parser, &parser->current_string));
|
7249
|
+
pm_symbol_node_t *node = pm_symbol_node_create_unescaped(parser, opening, value, closing, &parser->current_string, parse_symbol_encoding(parser, value, &parser->current_string, false));
|
6935
7250
|
parser->current_string = PM_STRING_EMPTY;
|
6936
7251
|
return node;
|
6937
7252
|
}
|
@@ -6953,7 +7268,7 @@ pm_symbol_node_label_create(pm_parser_t *parser, const pm_token_t *token) {
|
|
6953
7268
|
|
6954
7269
|
assert((label.end - label.start) >= 0);
|
6955
7270
|
pm_string_shared_init(&node->unescaped, label.start, label.end);
|
6956
|
-
pm_node_flag_set((pm_node_t *) node, parse_symbol_encoding(parser, &node->unescaped));
|
7271
|
+
pm_node_flag_set((pm_node_t *) node, parse_symbol_encoding(parser, &label, &node->unescaped, false));
|
6957
7272
|
|
6958
7273
|
break;
|
6959
7274
|
}
|
@@ -7038,7 +7353,8 @@ pm_string_node_to_symbol_node(pm_parser_t *parser, pm_string_node_t *node, const
|
|
7038
7353
|
.unescaped = node->unescaped
|
7039
7354
|
};
|
7040
7355
|
|
7041
|
-
|
7356
|
+
pm_token_t content = { .type = PM_TOKEN_IDENTIFIER, .start = node->content_loc.start, .end = node->content_loc.end };
|
7357
|
+
pm_node_flag_set((pm_node_t *) new_node, parse_symbol_encoding(parser, &content, &node->unescaped, true));
|
7042
7358
|
|
7043
7359
|
// We are explicitly _not_ using pm_node_destroy here because we don't want
|
7044
7360
|
// to trash the unescaped string. We could instead copy the string if we
|
@@ -7574,7 +7890,7 @@ pm_local_variable_read_node_create_it(pm_parser_t *parser, const pm_token_t *nam
|
|
7574
7890
|
static pm_node_t *
|
7575
7891
|
pm_node_check_it(pm_parser_t *parser, pm_node_t *node) {
|
7576
7892
|
if (
|
7577
|
-
(parser->version !=
|
7893
|
+
(parser->version != PM_OPTIONS_VERSION_CRUBY_3_3) &&
|
7578
7894
|
!parser->current_scope->closed &&
|
7579
7895
|
(parser->current_scope->numbered_parameters != PM_SCOPE_NUMBERED_PARAMETERS_DISALLOWED) &&
|
7580
7896
|
pm_node_is_it(parser, node)
|
@@ -8298,10 +8614,11 @@ context_human(pm_context_t context) {
|
|
8298
8614
|
/* Specific token lexers */
|
8299
8615
|
/******************************************************************************/
|
8300
8616
|
|
8301
|
-
static void
|
8302
|
-
pm_strspn_number_validate(pm_parser_t *parser, const uint8_t *invalid) {
|
8617
|
+
static inline void
|
8618
|
+
pm_strspn_number_validate(pm_parser_t *parser, const uint8_t *string, size_t length, const uint8_t *invalid) {
|
8303
8619
|
if (invalid != NULL) {
|
8304
|
-
|
8620
|
+
pm_diagnostic_id_t diag_id = (invalid == (string + length - 1)) ? PM_ERR_INVALID_NUMBER_UNDERSCORE_TRAILING : PM_ERR_INVALID_NUMBER_UNDERSCORE_INNER;
|
8621
|
+
pm_parser_err(parser, invalid, invalid + 1, diag_id);
|
8305
8622
|
}
|
8306
8623
|
}
|
8307
8624
|
|
@@ -8309,7 +8626,7 @@ static size_t
|
|
8309
8626
|
pm_strspn_binary_number_validate(pm_parser_t *parser, const uint8_t *string) {
|
8310
8627
|
const uint8_t *invalid = NULL;
|
8311
8628
|
size_t length = pm_strspn_binary_number(string, parser->end - string, &invalid);
|
8312
|
-
pm_strspn_number_validate(parser, invalid);
|
8629
|
+
pm_strspn_number_validate(parser, string, length, invalid);
|
8313
8630
|
return length;
|
8314
8631
|
}
|
8315
8632
|
|
@@ -8317,7 +8634,7 @@ static size_t
|
|
8317
8634
|
pm_strspn_octal_number_validate(pm_parser_t *parser, const uint8_t *string) {
|
8318
8635
|
const uint8_t *invalid = NULL;
|
8319
8636
|
size_t length = pm_strspn_octal_number(string, parser->end - string, &invalid);
|
8320
|
-
pm_strspn_number_validate(parser, invalid);
|
8637
|
+
pm_strspn_number_validate(parser, string, length, invalid);
|
8321
8638
|
return length;
|
8322
8639
|
}
|
8323
8640
|
|
@@ -8325,7 +8642,7 @@ static size_t
|
|
8325
8642
|
pm_strspn_decimal_number_validate(pm_parser_t *parser, const uint8_t *string) {
|
8326
8643
|
const uint8_t *invalid = NULL;
|
8327
8644
|
size_t length = pm_strspn_decimal_number(string, parser->end - string, &invalid);
|
8328
|
-
pm_strspn_number_validate(parser, invalid);
|
8645
|
+
pm_strspn_number_validate(parser, string, length, invalid);
|
8329
8646
|
return length;
|
8330
8647
|
}
|
8331
8648
|
|
@@ -8333,7 +8650,7 @@ static size_t
|
|
8333
8650
|
pm_strspn_hexadecimal_number_validate(pm_parser_t *parser, const uint8_t *string) {
|
8334
8651
|
const uint8_t *invalid = NULL;
|
8335
8652
|
size_t length = pm_strspn_hexadecimal_number(string, parser->end - string, &invalid);
|
8336
|
-
pm_strspn_number_validate(parser, invalid);
|
8653
|
+
pm_strspn_number_validate(parser, string, length, invalid);
|
8337
8654
|
return length;
|
8338
8655
|
}
|
8339
8656
|
|
@@ -8395,6 +8712,7 @@ lex_numeric_prefix(pm_parser_t *parser, bool* seen_e) {
|
|
8395
8712
|
if (pm_char_is_decimal_digit(peek(parser))) {
|
8396
8713
|
parser->current.end += pm_strspn_decimal_number_validate(parser, parser->current.end);
|
8397
8714
|
} else {
|
8715
|
+
match(parser, '_');
|
8398
8716
|
pm_parser_err_current(parser, PM_ERR_INVALID_NUMBER_DECIMAL);
|
8399
8717
|
}
|
8400
8718
|
|
@@ -8407,6 +8725,7 @@ lex_numeric_prefix(pm_parser_t *parser, bool* seen_e) {
|
|
8407
8725
|
if (pm_char_is_binary_digit(peek(parser))) {
|
8408
8726
|
parser->current.end += pm_strspn_binary_number_validate(parser, parser->current.end);
|
8409
8727
|
} else {
|
8728
|
+
match(parser, '_');
|
8410
8729
|
pm_parser_err_current(parser, PM_ERR_INVALID_NUMBER_BINARY);
|
8411
8730
|
}
|
8412
8731
|
|
@@ -8420,6 +8739,7 @@ lex_numeric_prefix(pm_parser_t *parser, bool* seen_e) {
|
|
8420
8739
|
if (pm_char_is_octal_digit(peek(parser))) {
|
8421
8740
|
parser->current.end += pm_strspn_octal_number_validate(parser, parser->current.end);
|
8422
8741
|
} else {
|
8742
|
+
match(parser, '_');
|
8423
8743
|
pm_parser_err_current(parser, PM_ERR_INVALID_NUMBER_OCTAL);
|
8424
8744
|
}
|
8425
8745
|
|
@@ -8447,6 +8767,7 @@ lex_numeric_prefix(pm_parser_t *parser, bool* seen_e) {
|
|
8447
8767
|
if (pm_char_is_hexadecimal_digit(peek(parser))) {
|
8448
8768
|
parser->current.end += pm_strspn_hexadecimal_number_validate(parser, parser->current.end);
|
8449
8769
|
} else {
|
8770
|
+
match(parser, '_');
|
8450
8771
|
pm_parser_err_current(parser, PM_ERR_INVALID_NUMBER_HEXADECIMAL);
|
8451
8772
|
}
|
8452
8773
|
|
@@ -8567,7 +8888,7 @@ lex_global_variable(pm_parser_t *parser) {
|
|
8567
8888
|
} while (parser->current.end < parser->end && (width = char_is_identifier(parser, parser->current.end)) > 0);
|
8568
8889
|
|
8569
8890
|
// $0 isn't allowed to be followed by anything.
|
8570
|
-
pm_diagnostic_id_t diag_id = parser->version ==
|
8891
|
+
pm_diagnostic_id_t diag_id = parser->version == PM_OPTIONS_VERSION_CRUBY_3_3 ? PM_ERR_INVALID_VARIABLE_GLOBAL_3_3 : PM_ERR_INVALID_VARIABLE_GLOBAL;
|
8571
8892
|
PM_PARSER_ERR_TOKEN_FORMAT_CONTENT(parser, parser->current, diag_id);
|
8572
8893
|
}
|
8573
8894
|
|
@@ -8603,7 +8924,7 @@ lex_global_variable(pm_parser_t *parser) {
|
|
8603
8924
|
} else {
|
8604
8925
|
// If we get here, then we have a $ followed by something that
|
8605
8926
|
// isn't recognized as a global variable.
|
8606
|
-
pm_diagnostic_id_t diag_id = parser->version ==
|
8927
|
+
pm_diagnostic_id_t diag_id = parser->version == PM_OPTIONS_VERSION_CRUBY_3_3 ? PM_ERR_INVALID_VARIABLE_GLOBAL_3_3 : PM_ERR_INVALID_VARIABLE_GLOBAL;
|
8607
8928
|
size_t width = parser->encoding->char_width(parser->current.end, parser->end - parser->current.end);
|
8608
8929
|
PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, diag_id, (int) ((parser->current.end + width) - parser->current.start), (const char *) parser->current.start);
|
8609
8930
|
}
|
@@ -9241,22 +9562,7 @@ escape_read(pm_parser_t *parser, pm_buffer_t *buffer, pm_buffer_t *regular_expre
|
|
9241
9562
|
const uint8_t *start = parser->current.end - 1;
|
9242
9563
|
parser->current.end++;
|
9243
9564
|
|
9244
|
-
if (
|
9245
|
-
(parser->current.end + 4 <= parser->end) &&
|
9246
|
-
pm_char_is_hexadecimal_digit(parser->current.end[0]) &&
|
9247
|
-
pm_char_is_hexadecimal_digit(parser->current.end[1]) &&
|
9248
|
-
pm_char_is_hexadecimal_digit(parser->current.end[2]) &&
|
9249
|
-
pm_char_is_hexadecimal_digit(parser->current.end[3])
|
9250
|
-
) {
|
9251
|
-
uint32_t value = escape_unicode(parser->current.end, 4);
|
9252
|
-
|
9253
|
-
if (flags & PM_ESCAPE_FLAG_REGEXP) {
|
9254
|
-
pm_buffer_append_bytes(regular_expression_buffer, start, (size_t) (parser->current.end + 4 - start));
|
9255
|
-
}
|
9256
|
-
escape_write_unicode(parser, buffer, flags, start, parser->current.end + 4, value);
|
9257
|
-
|
9258
|
-
parser->current.end += 4;
|
9259
|
-
} else if (peek(parser) == '{') {
|
9565
|
+
if (peek(parser) == '{') {
|
9260
9566
|
const uint8_t *unicode_codepoints_start = parser->current.end - 2;
|
9261
9567
|
|
9262
9568
|
parser->current.end++;
|
@@ -9306,7 +9612,21 @@ escape_read(pm_parser_t *parser, pm_buffer_t *buffer, pm_buffer_t *regular_expre
|
|
9306
9612
|
pm_buffer_append_bytes(regular_expression_buffer, unicode_codepoints_start, (size_t) (parser->current.end - unicode_codepoints_start));
|
9307
9613
|
}
|
9308
9614
|
} else {
|
9309
|
-
|
9615
|
+
size_t length = pm_strspn_hexadecimal_digit(parser->current.end, MIN(parser->end - parser->current.end, 4));
|
9616
|
+
|
9617
|
+
if (length == 4) {
|
9618
|
+
uint32_t value = escape_unicode(parser->current.end, 4);
|
9619
|
+
|
9620
|
+
if (flags & PM_ESCAPE_FLAG_REGEXP) {
|
9621
|
+
pm_buffer_append_bytes(regular_expression_buffer, start, (size_t) (parser->current.end + 4 - start));
|
9622
|
+
}
|
9623
|
+
|
9624
|
+
escape_write_unicode(parser, buffer, flags, start, parser->current.end + 4, value);
|
9625
|
+
parser->current.end += 4;
|
9626
|
+
} else {
|
9627
|
+
parser->current.end += length;
|
9628
|
+
pm_parser_err_current(parser, PM_ERR_ESCAPE_INVALID_UNICODE);
|
9629
|
+
}
|
9310
9630
|
}
|
9311
9631
|
|
9312
9632
|
return;
|
@@ -9560,8 +9880,8 @@ lex_at_variable(pm_parser_t *parser) {
|
|
9560
9880
|
}
|
9561
9881
|
} else if (parser->current.end < parser->end && pm_char_is_decimal_digit(*parser->current.end)) {
|
9562
9882
|
pm_diagnostic_id_t diag_id = (type == PM_TOKEN_CLASS_VARIABLE) ? PM_ERR_INCOMPLETE_VARIABLE_CLASS : PM_ERR_INCOMPLETE_VARIABLE_INSTANCE;
|
9563
|
-
if (parser->version ==
|
9564
|
-
diag_id = (type == PM_TOKEN_CLASS_VARIABLE) ?
|
9883
|
+
if (parser->version == PM_OPTIONS_VERSION_CRUBY_3_3) {
|
9884
|
+
diag_id = (type == PM_TOKEN_CLASS_VARIABLE) ? PM_ERR_INCOMPLETE_VARIABLE_CLASS_3_3 : PM_ERR_INCOMPLETE_VARIABLE_INSTANCE_3_3;
|
9565
9885
|
}
|
9566
9886
|
|
9567
9887
|
size_t width = parser->encoding->char_width(parser->current.end, parser->end - parser->current.end);
|
@@ -10545,8 +10865,11 @@ parser_lex(pm_parser_t *parser) {
|
|
10545
10865
|
}
|
10546
10866
|
|
10547
10867
|
size_t ident_length = (size_t) (parser->current.end - ident_start);
|
10868
|
+
bool ident_error = false;
|
10869
|
+
|
10548
10870
|
if (quote != PM_HEREDOC_QUOTE_NONE && !match(parser, (uint8_t) quote)) {
|
10549
|
-
|
10871
|
+
pm_parser_err(parser, ident_start, ident_start + ident_length, PM_ERR_HEREDOC_IDENTIFIER);
|
10872
|
+
ident_error = true;
|
10550
10873
|
}
|
10551
10874
|
|
10552
10875
|
parser->explicit_encoding = NULL;
|
@@ -10571,7 +10894,7 @@ parser_lex(pm_parser_t *parser) {
|
|
10571
10894
|
// this is not a valid heredoc declaration. In this case we
|
10572
10895
|
// will add an error, but we will still return a heredoc
|
10573
10896
|
// start.
|
10574
|
-
|
10897
|
+
if (!ident_error) pm_parser_err_heredoc_term(parser, parser->lex_modes.current);
|
10575
10898
|
body_start = parser->end;
|
10576
10899
|
} else {
|
10577
10900
|
// Otherwise, we want to indicate that the body of the
|
@@ -11898,7 +12221,7 @@ parser_lex(pm_parser_t *parser) {
|
|
11898
12221
|
// terminator) but still continue parsing so that content after the
|
11899
12222
|
// declaration of the heredoc can be parsed.
|
11900
12223
|
if (parser->current.end >= parser->end) {
|
11901
|
-
|
12224
|
+
pm_parser_err_heredoc_term(parser, lex_mode);
|
11902
12225
|
parser->next_start = lex_mode->as.heredoc.next_start;
|
11903
12226
|
parser->heredoc_end = parser->current.end;
|
11904
12227
|
lex_state_set(parser, PM_LEX_STATE_END);
|
@@ -12537,6 +12860,23 @@ expect3(pm_parser_t *parser, pm_token_type_t type1, pm_token_type_t type2, pm_to
|
|
12537
12860
|
parser->previous.type = PM_TOKEN_MISSING;
|
12538
12861
|
}
|
12539
12862
|
|
12863
|
+
/**
|
12864
|
+
* A special expect1 that expects a heredoc terminator and handles popping the
|
12865
|
+
* lex mode accordingly.
|
12866
|
+
*/
|
12867
|
+
static void
|
12868
|
+
expect1_heredoc_term(pm_parser_t *parser, pm_lex_mode_t *lex_mode) {
|
12869
|
+
if (match1(parser, PM_TOKEN_HEREDOC_END)) {
|
12870
|
+
lex_mode_pop(parser);
|
12871
|
+
parser_lex(parser);
|
12872
|
+
} else {
|
12873
|
+
pm_parser_err_heredoc_term(parser, lex_mode);
|
12874
|
+
lex_mode_pop(parser);
|
12875
|
+
parser->previous.start = parser->previous.end;
|
12876
|
+
parser->previous.type = PM_TOKEN_MISSING;
|
12877
|
+
}
|
12878
|
+
}
|
12879
|
+
|
12540
12880
|
static pm_node_t *
|
12541
12881
|
parse_expression(pm_parser_t *parser, pm_binding_power_t binding_power, bool accepts_command_call, pm_diagnostic_id_t diag_id);
|
12542
12882
|
|
@@ -12664,25 +13004,72 @@ parse_write_name(pm_parser_t *parser, pm_constant_id_t *name_field) {
|
|
12664
13004
|
*name_field = pm_constant_pool_insert_owned(&parser->constant_pool, name, length + 1);
|
12665
13005
|
}
|
12666
13006
|
|
13007
|
+
/**
|
13008
|
+
* Certain expressions are not targetable, but in order to provide a better
|
13009
|
+
* experience we give a specific error message. In order to maintain as much
|
13010
|
+
* information in the tree as possible, we replace them with local variable
|
13011
|
+
* writes.
|
13012
|
+
*/
|
13013
|
+
static pm_node_t *
|
13014
|
+
parse_unwriteable_target(pm_parser_t *parser, pm_node_t *target) {
|
13015
|
+
switch (PM_NODE_TYPE(target)) {
|
13016
|
+
case PM_SOURCE_ENCODING_NODE: pm_parser_err_node(parser, target, PM_ERR_EXPRESSION_NOT_WRITABLE_ENCODING); break;
|
13017
|
+
case PM_FALSE_NODE: pm_parser_err_node(parser, target, PM_ERR_EXPRESSION_NOT_WRITABLE_FALSE); break;
|
13018
|
+
case PM_SOURCE_FILE_NODE: pm_parser_err_node(parser, target, PM_ERR_EXPRESSION_NOT_WRITABLE_FILE); break;
|
13019
|
+
case PM_SOURCE_LINE_NODE: pm_parser_err_node(parser, target, PM_ERR_EXPRESSION_NOT_WRITABLE_LINE); break;
|
13020
|
+
case PM_NIL_NODE: pm_parser_err_node(parser, target, PM_ERR_EXPRESSION_NOT_WRITABLE_NIL); break;
|
13021
|
+
case PM_SELF_NODE: pm_parser_err_node(parser, target, PM_ERR_EXPRESSION_NOT_WRITABLE_SELF); break;
|
13022
|
+
case PM_TRUE_NODE: pm_parser_err_node(parser, target, PM_ERR_EXPRESSION_NOT_WRITABLE_TRUE); break;
|
13023
|
+
default: break;
|
13024
|
+
}
|
13025
|
+
|
13026
|
+
pm_constant_id_t name = pm_parser_constant_id_location(parser, target->location.start, target->location.end);
|
13027
|
+
pm_local_variable_target_node_t *result = pm_local_variable_target_node_create(parser, &target->location, name, 0);
|
13028
|
+
|
13029
|
+
pm_node_destroy(parser, target);
|
13030
|
+
return (pm_node_t *) result;
|
13031
|
+
}
|
13032
|
+
|
12667
13033
|
/**
|
12668
13034
|
* Convert the given node into a valid target node.
|
12669
13035
|
*/
|
12670
13036
|
static pm_node_t *
|
12671
|
-
parse_target(pm_parser_t *parser, pm_node_t *target) {
|
13037
|
+
parse_target(pm_parser_t *parser, pm_node_t *target, bool multiple) {
|
12672
13038
|
switch (PM_NODE_TYPE(target)) {
|
12673
13039
|
case PM_MISSING_NODE:
|
12674
13040
|
return target;
|
13041
|
+
case PM_SOURCE_ENCODING_NODE:
|
13042
|
+
case PM_FALSE_NODE:
|
13043
|
+
case PM_SOURCE_FILE_NODE:
|
13044
|
+
case PM_SOURCE_LINE_NODE:
|
13045
|
+
case PM_NIL_NODE:
|
13046
|
+
case PM_SELF_NODE:
|
13047
|
+
case PM_TRUE_NODE: {
|
13048
|
+
// In these special cases, we have specific error messages and we
|
13049
|
+
// will replace them with local variable writes.
|
13050
|
+
return parse_unwriteable_target(parser, target);
|
13051
|
+
}
|
12675
13052
|
case PM_CLASS_VARIABLE_READ_NODE:
|
12676
13053
|
assert(sizeof(pm_class_variable_target_node_t) == sizeof(pm_class_variable_read_node_t));
|
12677
13054
|
target->type = PM_CLASS_VARIABLE_TARGET_NODE;
|
12678
13055
|
return target;
|
12679
13056
|
case PM_CONSTANT_PATH_NODE:
|
13057
|
+
if (context_def_p(parser)) {
|
13058
|
+
pm_parser_err_node(parser, target, PM_ERR_WRITE_TARGET_IN_METHOD);
|
13059
|
+
}
|
13060
|
+
|
12680
13061
|
assert(sizeof(pm_constant_path_target_node_t) == sizeof(pm_constant_path_node_t));
|
12681
13062
|
target->type = PM_CONSTANT_PATH_TARGET_NODE;
|
13063
|
+
|
12682
13064
|
return target;
|
12683
13065
|
case PM_CONSTANT_READ_NODE:
|
13066
|
+
if (context_def_p(parser)) {
|
13067
|
+
pm_parser_err_node(parser, target, PM_ERR_WRITE_TARGET_IN_METHOD);
|
13068
|
+
}
|
13069
|
+
|
12684
13070
|
assert(sizeof(pm_constant_target_node_t) == sizeof(pm_constant_read_node_t));
|
12685
13071
|
target->type = PM_CONSTANT_TARGET_NODE;
|
13072
|
+
|
12686
13073
|
return target;
|
12687
13074
|
case PM_BACK_REFERENCE_READ_NODE:
|
12688
13075
|
case PM_NUMBERED_REFERENCE_READ_NODE:
|
@@ -12715,7 +13102,7 @@ parse_target(pm_parser_t *parser, pm_node_t *target) {
|
|
12715
13102
|
pm_splat_node_t *splat = (pm_splat_node_t *) target;
|
12716
13103
|
|
12717
13104
|
if (splat->expression != NULL) {
|
12718
|
-
splat->expression = parse_target(parser, splat->expression);
|
13105
|
+
splat->expression = parse_target(parser, splat->expression, multiple);
|
12719
13106
|
}
|
12720
13107
|
|
12721
13108
|
return (pm_node_t *) splat;
|
@@ -12753,6 +13140,10 @@ parse_target(pm_parser_t *parser, pm_node_t *target) {
|
|
12753
13140
|
}
|
12754
13141
|
|
12755
13142
|
if (*call->message_loc.start == '_' || parser->encoding->alnum_char(call->message_loc.start, call->message_loc.end - call->message_loc.start)) {
|
13143
|
+
if (multiple && PM_NODE_FLAG_P(call, PM_CALL_NODE_FLAGS_SAFE_NAVIGATION)) {
|
13144
|
+
pm_parser_err_node(parser, (const pm_node_t *) call, PM_ERR_UNEXPECTED_SAFE_NAVIGATION);
|
13145
|
+
}
|
13146
|
+
|
12756
13147
|
parse_write_name(parser, &call->name);
|
12757
13148
|
return (pm_node_t *) pm_call_target_node_create(parser, call);
|
12758
13149
|
}
|
@@ -12780,8 +13171,8 @@ parse_target(pm_parser_t *parser, pm_node_t *target) {
|
|
12780
13171
|
* assignment.
|
12781
13172
|
*/
|
12782
13173
|
static pm_node_t *
|
12783
|
-
parse_target_validate(pm_parser_t *parser, pm_node_t *target) {
|
12784
|
-
pm_node_t *result = parse_target(parser, target);
|
13174
|
+
parse_target_validate(pm_parser_t *parser, pm_node_t *target, bool multiple) {
|
13175
|
+
pm_node_t *result = parse_target(parser, target, multiple);
|
12785
13176
|
|
12786
13177
|
// Ensure that we have one of an =, an 'in' in for indexes, and a ')' in parens after the targets.
|
12787
13178
|
if (
|
@@ -12826,13 +13217,20 @@ parse_write(pm_parser_t *parser, pm_node_t *target, pm_token_t *operator, pm_nod
|
|
12826
13217
|
}
|
12827
13218
|
case PM_CONSTANT_PATH_NODE: {
|
12828
13219
|
pm_node_t *node = (pm_node_t *) pm_constant_path_write_node_create(parser, (pm_constant_path_node_t *) target, operator, value);
|
13220
|
+
|
13221
|
+
if (context_def_p(parser)) {
|
13222
|
+
pm_parser_err_node(parser, node, PM_ERR_WRITE_TARGET_IN_METHOD);
|
13223
|
+
}
|
13224
|
+
|
12829
13225
|
return parse_shareable_constant_write(parser, node);
|
12830
13226
|
}
|
12831
13227
|
case PM_CONSTANT_READ_NODE: {
|
12832
13228
|
pm_node_t *node = (pm_node_t *) pm_constant_write_node_create(parser, (pm_constant_read_node_t *) target, operator, value);
|
13229
|
+
|
12833
13230
|
if (context_def_p(parser)) {
|
12834
13231
|
pm_parser_err_node(parser, node, PM_ERR_WRITE_TARGET_IN_METHOD);
|
12835
13232
|
}
|
13233
|
+
|
12836
13234
|
pm_node_destroy(parser, target);
|
12837
13235
|
return parse_shareable_constant_write(parser, node);
|
12838
13236
|
}
|
@@ -13011,7 +13409,7 @@ parse_targets(pm_parser_t *parser, pm_node_t *first_target, pm_binding_power_t b
|
|
13011
13409
|
bool has_rest = PM_NODE_TYPE_P(first_target, PM_SPLAT_NODE);
|
13012
13410
|
|
13013
13411
|
pm_multi_target_node_t *result = pm_multi_target_node_create(parser);
|
13014
|
-
pm_multi_target_node_targets_append(parser, result, parse_target(parser, first_target));
|
13412
|
+
pm_multi_target_node_targets_append(parser, result, parse_target(parser, first_target, true));
|
13015
13413
|
|
13016
13414
|
while (accept1(parser, PM_TOKEN_COMMA)) {
|
13017
13415
|
if (accept1(parser, PM_TOKEN_USTAR)) {
|
@@ -13027,7 +13425,7 @@ parse_targets(pm_parser_t *parser, pm_node_t *first_target, pm_binding_power_t b
|
|
13027
13425
|
|
13028
13426
|
if (token_begins_expression_p(parser->current.type)) {
|
13029
13427
|
name = parse_expression(parser, binding_power, false, PM_ERR_EXPECT_EXPRESSION_AFTER_STAR);
|
13030
|
-
name = parse_target(parser, name);
|
13428
|
+
name = parse_target(parser, name, true);
|
13031
13429
|
}
|
13032
13430
|
|
13033
13431
|
pm_node_t *splat = (pm_node_t *) pm_splat_node_create(parser, &star_operator, name);
|
@@ -13035,7 +13433,7 @@ parse_targets(pm_parser_t *parser, pm_node_t *first_target, pm_binding_power_t b
|
|
13035
13433
|
has_rest = true;
|
13036
13434
|
} else if (token_begins_expression_p(parser->current.type)) {
|
13037
13435
|
pm_node_t *target = parse_expression(parser, binding_power, false, PM_ERR_EXPECT_EXPRESSION_AFTER_COMMA);
|
13038
|
-
target = parse_target(parser, target);
|
13436
|
+
target = parse_target(parser, target, true);
|
13039
13437
|
|
13040
13438
|
pm_multi_target_node_targets_append(parser, result, target);
|
13041
13439
|
} else if (!match1(parser, PM_TOKEN_EOF)) {
|
@@ -13152,11 +13550,11 @@ parse_statements(pm_parser_t *parser, pm_context_t context) {
|
|
13152
13550
|
*/
|
13153
13551
|
static void
|
13154
13552
|
pm_hash_key_static_literals_add(pm_parser_t *parser, pm_static_literals_t *literals, pm_node_t *node) {
|
13155
|
-
const pm_node_t *duplicated = pm_static_literals_add(parser, literals, node);
|
13553
|
+
const pm_node_t *duplicated = pm_static_literals_add(&parser->newline_list, parser->start_line, literals, node);
|
13156
13554
|
|
13157
13555
|
if (duplicated != NULL) {
|
13158
13556
|
pm_buffer_t buffer = { 0 };
|
13159
|
-
pm_static_literal_inspect(&buffer, parser, duplicated);
|
13557
|
+
pm_static_literal_inspect(&buffer, &parser->newline_list, parser->start_line, parser->encoding->name, duplicated);
|
13160
13558
|
|
13161
13559
|
pm_diagnostic_list_append_format(
|
13162
13560
|
&parser->warning_list,
|
@@ -13178,7 +13576,7 @@ pm_hash_key_static_literals_add(pm_parser_t *parser, pm_static_literals_t *liter
|
|
13178
13576
|
*/
|
13179
13577
|
static void
|
13180
13578
|
pm_when_clause_static_literals_add(pm_parser_t *parser, pm_static_literals_t *literals, pm_node_t *node) {
|
13181
|
-
if (pm_static_literals_add(parser, literals, node) != NULL) {
|
13579
|
+
if (pm_static_literals_add(&parser->newline_list, parser->start_line, literals, node) != NULL) {
|
13182
13580
|
pm_diagnostic_list_append_format(
|
13183
13581
|
&parser->warning_list,
|
13184
13582
|
node->location.start,
|
@@ -13206,10 +13604,16 @@ parse_assocs(pm_parser_t *parser, pm_static_literals_t *literals, pm_node_t *nod
|
|
13206
13604
|
pm_token_t operator = parser->previous;
|
13207
13605
|
pm_node_t *value = NULL;
|
13208
13606
|
|
13209
|
-
if (
|
13607
|
+
if (match1(parser, PM_TOKEN_BRACE_LEFT)) {
|
13608
|
+
// If we're about to parse a nested hash that is being
|
13609
|
+
// pushed into this hash directly with **, then we want the
|
13610
|
+
// inner hash to share the static literals with the outer
|
13611
|
+
// hash.
|
13612
|
+
parser->current_hash_keys = literals;
|
13210
13613
|
value = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, false, PM_ERR_EXPECT_EXPRESSION_AFTER_SPLAT_HASH);
|
13211
|
-
}
|
13212
|
-
|
13614
|
+
} else if (token_begins_expression_p(parser->current.type)) {
|
13615
|
+
value = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, false, PM_ERR_EXPECT_EXPRESSION_AFTER_SPLAT_HASH);
|
13616
|
+
} else {
|
13213
13617
|
pm_parser_scope_forwarding_keywords_check(parser, &operator);
|
13214
13618
|
}
|
13215
13619
|
|
@@ -13234,9 +13638,15 @@ parse_assocs(pm_parser_t *parser, pm_static_literals_t *literals, pm_node_t *nod
|
|
13234
13638
|
pm_token_t constant = { .type = PM_TOKEN_CONSTANT, .start = label.start, .end = label.end - 1 };
|
13235
13639
|
value = (pm_node_t *) pm_constant_read_node_create(parser, &constant);
|
13236
13640
|
} else {
|
13237
|
-
int depth =
|
13641
|
+
int depth = -1;
|
13238
13642
|
pm_token_t identifier = { .type = PM_TOKEN_IDENTIFIER, .start = label.start, .end = label.end - 1 };
|
13239
13643
|
|
13644
|
+
if (identifier.end[-1] == '!' || identifier.end[-1] == '?') {
|
13645
|
+
PM_PARSER_ERR_TOKEN_FORMAT_CONTENT(parser, identifier, PM_ERR_INVALID_LOCAL_VARIABLE_READ);
|
13646
|
+
} else {
|
13647
|
+
depth = pm_parser_local_depth(parser, &identifier);
|
13648
|
+
}
|
13649
|
+
|
13240
13650
|
if (depth == -1) {
|
13241
13651
|
value = (pm_node_t *) pm_call_node_variable_call_create(parser, &identifier);
|
13242
13652
|
} else {
|
@@ -13354,15 +13764,16 @@ parse_arguments(pm_parser_t *parser, pm_arguments_t *arguments, bool accepts_for
|
|
13354
13764
|
pm_keyword_hash_node_t *hash = pm_keyword_hash_node_create(parser);
|
13355
13765
|
argument = (pm_node_t *) hash;
|
13356
13766
|
|
13357
|
-
pm_static_literals_t
|
13358
|
-
bool contains_keyword_splat = parse_assocs(parser, &
|
13767
|
+
pm_static_literals_t hash_keys = { 0 };
|
13768
|
+
bool contains_keyword_splat = parse_assocs(parser, &hash_keys, (pm_node_t *) hash);
|
13359
13769
|
|
13360
13770
|
parse_arguments_append(parser, arguments, argument);
|
13361
|
-
if (contains_keyword_splat) {
|
13362
|
-
pm_node_flag_set((pm_node_t *)arguments->arguments, PM_ARGUMENTS_NODE_FLAGS_CONTAINS_KEYWORD_SPLAT);
|
13363
|
-
}
|
13364
13771
|
|
13365
|
-
|
13772
|
+
pm_node_flags_t flags = PM_ARGUMENTS_NODE_FLAGS_CONTAINS_KEYWORDS;
|
13773
|
+
if (contains_keyword_splat) flags |= PM_ARGUMENTS_NODE_FLAGS_CONTAINS_KEYWORD_SPLAT;
|
13774
|
+
pm_node_flag_set((pm_node_t *) arguments->arguments, flags);
|
13775
|
+
|
13776
|
+
pm_static_literals_free(&hash_keys);
|
13366
13777
|
parsed_bare_hash = true;
|
13367
13778
|
|
13368
13779
|
break;
|
@@ -13438,7 +13849,9 @@ parse_arguments(pm_parser_t *parser, pm_arguments_t *arguments, bool accepts_for
|
|
13438
13849
|
argument = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, !parsed_first_argument, PM_ERR_EXPECT_ARGUMENT);
|
13439
13850
|
}
|
13440
13851
|
|
13852
|
+
bool contains_keywords = false;
|
13441
13853
|
bool contains_keyword_splat = false;
|
13854
|
+
|
13442
13855
|
if (pm_symbol_node_label_p(argument) || accept1(parser, PM_TOKEN_EQUAL_GREATER)) {
|
13443
13856
|
if (parsed_bare_hash) {
|
13444
13857
|
pm_parser_err_previous(parser, PM_ERR_ARGUMENT_BARE_HASH);
|
@@ -13452,10 +13865,11 @@ parse_arguments(pm_parser_t *parser, pm_arguments_t *arguments, bool accepts_for
|
|
13452
13865
|
}
|
13453
13866
|
|
13454
13867
|
pm_keyword_hash_node_t *bare_hash = pm_keyword_hash_node_create(parser);
|
13868
|
+
contains_keywords = true;
|
13455
13869
|
|
13456
13870
|
// Create the set of static literals for this hash.
|
13457
|
-
pm_static_literals_t
|
13458
|
-
pm_hash_key_static_literals_add(parser, &
|
13871
|
+
pm_static_literals_t hash_keys = { 0 };
|
13872
|
+
pm_hash_key_static_literals_add(parser, &hash_keys, argument);
|
13459
13873
|
|
13460
13874
|
// Finish parsing the one we are part way through.
|
13461
13875
|
pm_node_t *value = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, false, PM_ERR_HASH_VALUE);
|
@@ -13469,10 +13883,10 @@ parse_arguments(pm_parser_t *parser, pm_arguments_t *arguments, bool accepts_for
|
|
13469
13883
|
token_begins_expression_p(parser->current.type) ||
|
13470
13884
|
match2(parser, PM_TOKEN_USTAR_STAR, PM_TOKEN_LABEL)
|
13471
13885
|
)) {
|
13472
|
-
contains_keyword_splat = parse_assocs(parser, &
|
13886
|
+
contains_keyword_splat = parse_assocs(parser, &hash_keys, (pm_node_t *) bare_hash);
|
13473
13887
|
}
|
13474
13888
|
|
13475
|
-
pm_static_literals_free(&
|
13889
|
+
pm_static_literals_free(&hash_keys);
|
13476
13890
|
parsed_bare_hash = true;
|
13477
13891
|
} else if (accept1(parser, PM_TOKEN_KEYWORD_IN)) {
|
13478
13892
|
// TODO: Could we solve this with binding powers instead?
|
@@ -13480,9 +13894,12 @@ parse_arguments(pm_parser_t *parser, pm_arguments_t *arguments, bool accepts_for
|
|
13480
13894
|
}
|
13481
13895
|
|
13482
13896
|
parse_arguments_append(parser, arguments, argument);
|
13483
|
-
|
13484
|
-
|
13485
|
-
|
13897
|
+
|
13898
|
+
pm_node_flags_t flags = 0;
|
13899
|
+
if (contains_keywords) flags |= PM_ARGUMENTS_NODE_FLAGS_CONTAINS_KEYWORDS;
|
13900
|
+
if (contains_keyword_splat) flags |= PM_ARGUMENTS_NODE_FLAGS_CONTAINS_KEYWORD_SPLAT;
|
13901
|
+
pm_node_flag_set((pm_node_t *) arguments->arguments, flags);
|
13902
|
+
|
13486
13903
|
break;
|
13487
13904
|
}
|
13488
13905
|
}
|
@@ -13595,7 +14012,6 @@ typedef enum {
|
|
13595
14012
|
PM_PARAMETERS_ORDER_OPTIONAL,
|
13596
14013
|
PM_PARAMETERS_ORDER_NAMED,
|
13597
14014
|
PM_PARAMETERS_ORDER_NONE,
|
13598
|
-
|
13599
14015
|
} pm_parameters_order_t;
|
13600
14016
|
|
13601
14017
|
/**
|
@@ -13903,6 +14319,7 @@ parse_parameters(
|
|
13903
14319
|
pm_token_t operator = parser->previous;
|
13904
14320
|
pm_token_t name;
|
13905
14321
|
bool repeated = false;
|
14322
|
+
|
13906
14323
|
if (accept1(parser, PM_TOKEN_IDENTIFIER)) {
|
13907
14324
|
name = parser->previous;
|
13908
14325
|
repeated = pm_parser_parameter_name_check(parser, &name);
|
@@ -13916,6 +14333,7 @@ parse_parameters(
|
|
13916
14333
|
if (repeated) {
|
13917
14334
|
pm_node_flag_set_repeated_parameter(param);
|
13918
14335
|
}
|
14336
|
+
|
13919
14337
|
if (params->rest == NULL) {
|
13920
14338
|
pm_parameters_node_rest_set(params, param);
|
13921
14339
|
} else {
|
@@ -13927,6 +14345,7 @@ parse_parameters(
|
|
13927
14345
|
}
|
13928
14346
|
case PM_TOKEN_STAR_STAR:
|
13929
14347
|
case PM_TOKEN_USTAR_STAR: {
|
14348
|
+
pm_parameters_order_t previous_order = order;
|
13930
14349
|
update_parameter_state(parser, &parser->current, &order);
|
13931
14350
|
parser_lex(parser);
|
13932
14351
|
|
@@ -13934,6 +14353,10 @@ parse_parameters(
|
|
13934
14353
|
pm_node_t *param;
|
13935
14354
|
|
13936
14355
|
if (accept1(parser, PM_TOKEN_KEYWORD_NIL)) {
|
14356
|
+
if (previous_order <= PM_PARAMETERS_ORDER_KEYWORDS) {
|
14357
|
+
pm_parser_err_previous(parser, PM_ERR_PARAMETER_UNEXPECTED_NO_KW);
|
14358
|
+
}
|
14359
|
+
|
13937
14360
|
param = (pm_node_t *) pm_no_keywords_parameter_node_create(parser, &operator, &parser->previous);
|
13938
14361
|
} else {
|
13939
14362
|
pm_token_t name;
|
@@ -14031,7 +14454,7 @@ parse_rescues(pm_parser_t *parser, pm_begin_node_t *parent_node, pm_rescues_type
|
|
14031
14454
|
pm_rescue_node_operator_set(rescue, &parser->previous);
|
14032
14455
|
|
14033
14456
|
pm_node_t *reference = parse_expression(parser, PM_BINDING_POWER_INDEX, false, PM_ERR_RESCUE_VARIABLE);
|
14034
|
-
reference = parse_target(parser, reference);
|
14457
|
+
reference = parse_target(parser, reference, false);
|
14035
14458
|
|
14036
14459
|
pm_rescue_node_reference_set(rescue, reference);
|
14037
14460
|
break;
|
@@ -14061,7 +14484,7 @@ parse_rescues(pm_parser_t *parser, pm_begin_node_t *parent_node, pm_rescues_type
|
|
14061
14484
|
pm_rescue_node_operator_set(rescue, &parser->previous);
|
14062
14485
|
|
14063
14486
|
pm_node_t *reference = parse_expression(parser, PM_BINDING_POWER_INDEX, false, PM_ERR_RESCUE_VARIABLE);
|
14064
|
-
reference = parse_target(parser, reference);
|
14487
|
+
reference = parse_target(parser, reference, false);
|
14065
14488
|
|
14066
14489
|
pm_rescue_node_reference_set(rescue, reference);
|
14067
14490
|
break;
|
@@ -15030,7 +15453,7 @@ parse_symbol(pm_parser_t *parser, pm_lex_mode_t *lex_mode, pm_lex_state_t next_s
|
|
15030
15453
|
pm_symbol_node_t *symbol = pm_symbol_node_create(parser, &opening, &parser->previous, &closing);
|
15031
15454
|
|
15032
15455
|
pm_string_shared_init(&symbol->unescaped, parser->previous.start, parser->previous.end);
|
15033
|
-
pm_node_flag_set((pm_node_t *) symbol, parse_symbol_encoding(parser, &symbol->unescaped));
|
15456
|
+
pm_node_flag_set((pm_node_t *) symbol, parse_symbol_encoding(parser, &parser->previous, &symbol->unescaped, false));
|
15034
15457
|
|
15035
15458
|
return (pm_node_t *) symbol;
|
15036
15459
|
}
|
@@ -15130,7 +15553,7 @@ parse_symbol(pm_parser_t *parser, pm_lex_mode_t *lex_mode, pm_lex_state_t next_s
|
|
15130
15553
|
expect1(parser, PM_TOKEN_STRING_END, PM_ERR_SYMBOL_TERM_DYNAMIC);
|
15131
15554
|
}
|
15132
15555
|
|
15133
|
-
return (pm_node_t *) pm_symbol_node_create_unescaped(parser, &opening, &content, &parser->previous, &unescaped, parse_symbol_encoding(parser, &unescaped));
|
15556
|
+
return (pm_node_t *) pm_symbol_node_create_unescaped(parser, &opening, &content, &parser->previous, &unescaped, parse_symbol_encoding(parser, &content, &unescaped, false));
|
15134
15557
|
}
|
15135
15558
|
|
15136
15559
|
/**
|
@@ -15155,7 +15578,7 @@ parse_undef_argument(pm_parser_t *parser) {
|
|
15155
15578
|
pm_symbol_node_t *symbol = pm_symbol_node_create(parser, &opening, &parser->previous, &closing);
|
15156
15579
|
|
15157
15580
|
pm_string_shared_init(&symbol->unescaped, parser->previous.start, parser->previous.end);
|
15158
|
-
pm_node_flag_set((pm_node_t *) symbol, parse_symbol_encoding(parser, &symbol->unescaped));
|
15581
|
+
pm_node_flag_set((pm_node_t *) symbol, parse_symbol_encoding(parser, &parser->previous, &symbol->unescaped, false));
|
15159
15582
|
|
15160
15583
|
return (pm_node_t *) symbol;
|
15161
15584
|
}
|
@@ -15196,7 +15619,7 @@ parse_alias_argument(pm_parser_t *parser, bool first) {
|
|
15196
15619
|
pm_symbol_node_t *symbol = pm_symbol_node_create(parser, &opening, &parser->previous, &closing);
|
15197
15620
|
|
15198
15621
|
pm_string_shared_init(&symbol->unescaped, parser->previous.start, parser->previous.end);
|
15199
|
-
pm_node_flag_set((pm_node_t *) symbol, parse_symbol_encoding(parser, &symbol->unescaped));
|
15622
|
+
pm_node_flag_set((pm_node_t *) symbol, parse_symbol_encoding(parser, &parser->previous, &symbol->unescaped, false));
|
15200
15623
|
|
15201
15624
|
return (pm_node_t *) symbol;
|
15202
15625
|
}
|
@@ -15453,9 +15876,7 @@ parse_pattern_constant_path(pm_parser_t *parser, pm_constant_id_list_t *captures
|
|
15453
15876
|
while (accept1(parser, PM_TOKEN_COLON_COLON)) {
|
15454
15877
|
pm_token_t delimiter = parser->previous;
|
15455
15878
|
expect1(parser, PM_TOKEN_CONSTANT, PM_ERR_CONSTANT_PATH_COLON_COLON_CONSTANT);
|
15456
|
-
|
15457
|
-
pm_node_t *child = (pm_node_t *) pm_constant_read_node_create(parser, &parser->previous);
|
15458
|
-
node = (pm_node_t *) pm_constant_path_node_create(parser, node, &delimiter, child);
|
15879
|
+
node = (pm_node_t *) pm_constant_path_node_create(parser, node, &delimiter, &parser->previous);
|
15459
15880
|
}
|
15460
15881
|
|
15461
15882
|
// If there is a [ or ( that follows, then this is part of a larger pattern
|
@@ -15643,8 +16064,15 @@ parse_pattern_hash_implicit_value(pm_parser_t *parser, pm_constant_id_list_t *ca
|
|
15643
16064
|
const pm_location_t *value_loc = &((pm_symbol_node_t *) key)->value_loc;
|
15644
16065
|
pm_constant_id_t constant_id = pm_parser_constant_id_location(parser, value_loc->start, value_loc->end);
|
15645
16066
|
|
15646
|
-
int depth;
|
15647
|
-
if (
|
16067
|
+
int depth = -1;
|
16068
|
+
if (value_loc->end[-1] == '!' || value_loc->end[-1] == '?') {
|
16069
|
+
pm_parser_err(parser, key->base.location.start, key->base.location.end, PM_ERR_PATTERN_HASH_KEY_LOCALS);
|
16070
|
+
PM_PARSER_ERR_LOCATION_FORMAT(parser, value_loc, PM_ERR_INVALID_LOCAL_VARIABLE_WRITE, (int) (value_loc->end - value_loc->start), (const char *) value_loc->start);
|
16071
|
+
} else {
|
16072
|
+
depth = pm_parser_local_depth_constant_id(parser, constant_id);
|
16073
|
+
}
|
16074
|
+
|
16075
|
+
if (depth == -1) {
|
15648
16076
|
pm_parser_local_add(parser, constant_id, value_loc->start, value_loc->end, 0);
|
15649
16077
|
}
|
15650
16078
|
|
@@ -15665,7 +16093,7 @@ parse_pattern_hash_implicit_value(pm_parser_t *parser, pm_constant_id_list_t *ca
|
|
15665
16093
|
*/
|
15666
16094
|
static void
|
15667
16095
|
parse_pattern_hash_key(pm_parser_t *parser, pm_static_literals_t *keys, pm_node_t *node) {
|
15668
|
-
if (pm_static_literals_add(parser, keys, node) != NULL) {
|
16096
|
+
if (pm_static_literals_add(&parser->newline_list, parser->start_line, keys, node) != NULL) {
|
15669
16097
|
pm_parser_err_node(parser, node, PM_ERR_PATTERN_HASH_KEY_DUPLICATE);
|
15670
16098
|
}
|
15671
16099
|
}
|
@@ -15953,7 +16381,7 @@ parse_pattern_primitive(pm_parser_t *parser, pm_constant_id_list_t *captures, pm
|
|
15953
16381
|
|
15954
16382
|
if (variable == NULL) {
|
15955
16383
|
if (
|
15956
|
-
(parser->version !=
|
16384
|
+
(parser->version != PM_OPTIONS_VERSION_CRUBY_3_3) &&
|
15957
16385
|
!parser->current_scope->closed &&
|
15958
16386
|
(parser->current_scope->numbered_parameters != PM_SCOPE_NUMBERED_PARAMETERS_DISALLOWED) &&
|
15959
16387
|
pm_token_is_it(parser->previous.start, parser->previous.end)
|
@@ -16027,8 +16455,7 @@ parse_pattern_primitive(pm_parser_t *parser, pm_constant_id_list_t *captures, pm
|
|
16027
16455
|
parser_lex(parser);
|
16028
16456
|
|
16029
16457
|
expect1(parser, PM_TOKEN_CONSTANT, PM_ERR_CONSTANT_PATH_COLON_COLON_CONSTANT);
|
16030
|
-
|
16031
|
-
pm_constant_path_node_t *node = pm_constant_path_node_create(parser, NULL, &delimiter, child);
|
16458
|
+
pm_constant_path_node_t *node = pm_constant_path_node_create(parser, NULL, &delimiter, &parser->previous);
|
16032
16459
|
|
16033
16460
|
return parse_pattern_constant_path(parser, captures, (pm_node_t *) node);
|
16034
16461
|
}
|
@@ -16354,7 +16781,7 @@ parse_strings(pm_parser_t *parser, pm_node_t *current) {
|
|
16354
16781
|
|
16355
16782
|
pm_node_list_free(&parts);
|
16356
16783
|
} else if (accept1(parser, PM_TOKEN_LABEL_END) && !state_is_arg_labeled) {
|
16357
|
-
node = (pm_node_t *) pm_symbol_node_create_unescaped(parser, &opening, &content, &parser->previous, &unescaped, parse_symbol_encoding(parser, &unescaped));
|
16784
|
+
node = (pm_node_t *) pm_symbol_node_create_unescaped(parser, &opening, &content, &parser->previous, &unescaped, parse_symbol_encoding(parser, &content, &unescaped, true));
|
16358
16785
|
} else if (match1(parser, PM_TOKEN_EOF)) {
|
16359
16786
|
pm_parser_err_token(parser, &opening, PM_ERR_STRING_LITERAL_EOF);
|
16360
16787
|
node = (pm_node_t *) pm_string_node_create_unescaped(parser, &opening, &content, &parser->current, &unescaped);
|
@@ -16380,7 +16807,7 @@ parse_strings(pm_parser_t *parser, pm_node_t *current) {
|
|
16380
16807
|
pm_node_flag_set(node, parse_unescaped_encoding(parser));
|
16381
16808
|
expect1(parser, PM_TOKEN_STRING_END, PM_ERR_STRING_LITERAL_EOF);
|
16382
16809
|
} else if (accept1(parser, PM_TOKEN_LABEL_END)) {
|
16383
|
-
node = (pm_node_t *) pm_symbol_node_create_unescaped(parser, &opening, &content, &parser->previous, &unescaped, parse_symbol_encoding(parser, &unescaped));
|
16810
|
+
node = (pm_node_t *) pm_symbol_node_create_unescaped(parser, &opening, &content, &parser->previous, &unescaped, parse_symbol_encoding(parser, &content, &unescaped, true));
|
16384
16811
|
} else {
|
16385
16812
|
// If we get here, then we have interpolation so we'll need
|
16386
16813
|
// to create a string or symbol node with interpolation.
|
@@ -16462,11 +16889,11 @@ parse_strings(pm_parser_t *parser, pm_node_t *current) {
|
|
16462
16889
|
pm_token_t bounds = not_provided(parser);
|
16463
16890
|
|
16464
16891
|
pm_interpolated_string_node_t *container = pm_interpolated_string_node_create(parser, &bounds, NULL, &bounds);
|
16465
|
-
pm_interpolated_string_node_append(
|
16892
|
+
pm_interpolated_string_node_append(container, current);
|
16466
16893
|
current = (pm_node_t *) container;
|
16467
16894
|
}
|
16468
16895
|
|
16469
|
-
pm_interpolated_string_node_append(
|
16896
|
+
pm_interpolated_string_node_append((pm_interpolated_string_node_t *) current, node);
|
16470
16897
|
}
|
16471
16898
|
}
|
16472
16899
|
|
@@ -16711,13 +17138,13 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
|
|
16711
17138
|
}
|
16712
17139
|
|
16713
17140
|
element = (pm_node_t *) pm_keyword_hash_node_create(parser);
|
16714
|
-
pm_static_literals_t
|
17141
|
+
pm_static_literals_t hash_keys = { 0 };
|
16715
17142
|
|
16716
17143
|
if (!match8(parser, PM_TOKEN_EOF, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON, PM_TOKEN_EOF, PM_TOKEN_BRACE_RIGHT, PM_TOKEN_BRACKET_RIGHT, PM_TOKEN_KEYWORD_DO, PM_TOKEN_PARENTHESIS_RIGHT)) {
|
16717
|
-
parse_assocs(parser, &
|
17144
|
+
parse_assocs(parser, &hash_keys, element);
|
16718
17145
|
}
|
16719
17146
|
|
16720
|
-
pm_static_literals_free(&
|
17147
|
+
pm_static_literals_free(&hash_keys);
|
16721
17148
|
parsed_bare_hash = true;
|
16722
17149
|
} else {
|
16723
17150
|
element = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, false, PM_ERR_ARRAY_EXPRESSION);
|
@@ -16728,8 +17155,8 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
|
|
16728
17155
|
}
|
16729
17156
|
|
16730
17157
|
pm_keyword_hash_node_t *hash = pm_keyword_hash_node_create(parser);
|
16731
|
-
pm_static_literals_t
|
16732
|
-
pm_hash_key_static_literals_add(parser, &
|
17158
|
+
pm_static_literals_t hash_keys = { 0 };
|
17159
|
+
pm_hash_key_static_literals_add(parser, &hash_keys, element);
|
16733
17160
|
|
16734
17161
|
pm_token_t operator;
|
16735
17162
|
if (parser->previous.type == PM_TOKEN_EQUAL_GREATER) {
|
@@ -16744,10 +17171,10 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
|
|
16744
17171
|
|
16745
17172
|
element = (pm_node_t *) hash;
|
16746
17173
|
if (accept1(parser, PM_TOKEN_COMMA) && !match1(parser, PM_TOKEN_BRACKET_RIGHT)) {
|
16747
|
-
parse_assocs(parser, &
|
17174
|
+
parse_assocs(parser, &hash_keys, element);
|
16748
17175
|
}
|
16749
17176
|
|
16750
|
-
pm_static_literals_free(&
|
17177
|
+
pm_static_literals_free(&hash_keys);
|
16751
17178
|
parsed_bare_hash = true;
|
16752
17179
|
}
|
16753
17180
|
}
|
@@ -16841,7 +17268,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
|
|
16841
17268
|
return (pm_node_t *) multi_target;
|
16842
17269
|
}
|
16843
17270
|
|
16844
|
-
return parse_target_validate(parser, (pm_node_t *) multi_target);
|
17271
|
+
return parse_target_validate(parser, (pm_node_t *) multi_target, false);
|
16845
17272
|
}
|
16846
17273
|
|
16847
17274
|
// If we have a single statement and are ending on a right parenthesis
|
@@ -16907,14 +17334,30 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
|
|
16907
17334
|
return (pm_node_t *) pm_parentheses_node_create(parser, &opening, (pm_node_t *) statements, &parser->previous);
|
16908
17335
|
}
|
16909
17336
|
case PM_TOKEN_BRACE_LEFT: {
|
17337
|
+
// If we were passed a current_hash_keys via the parser, then that
|
17338
|
+
// means we're already parsing a hash and we want to share the set
|
17339
|
+
// of hash keys with this inner hash we're about to parse for the
|
17340
|
+
// sake of warnings. We'll set it to NULL after we grab it to make
|
17341
|
+
// sure subsequent expressions don't use it. Effectively this is a
|
17342
|
+
// way of getting around passing it to every call to
|
17343
|
+
// parse_expression.
|
17344
|
+
pm_static_literals_t *current_hash_keys = parser->current_hash_keys;
|
17345
|
+
parser->current_hash_keys = NULL;
|
17346
|
+
|
16910
17347
|
pm_accepts_block_stack_push(parser, true);
|
16911
17348
|
parser_lex(parser);
|
16912
17349
|
|
16913
17350
|
pm_hash_node_t *node = pm_hash_node_create(parser, &parser->previous);
|
16914
|
-
pm_static_literals_t literals = { 0 };
|
16915
17351
|
|
16916
17352
|
if (!match2(parser, PM_TOKEN_BRACE_RIGHT, PM_TOKEN_EOF)) {
|
16917
|
-
|
17353
|
+
if (current_hash_keys != NULL) {
|
17354
|
+
parse_assocs(parser, current_hash_keys, (pm_node_t *) node);
|
17355
|
+
} else {
|
17356
|
+
pm_static_literals_t hash_keys = { 0 };
|
17357
|
+
parse_assocs(parser, &hash_keys, (pm_node_t *) node);
|
17358
|
+
pm_static_literals_free(&hash_keys);
|
17359
|
+
}
|
17360
|
+
|
16918
17361
|
accept1(parser, PM_TOKEN_NEWLINE);
|
16919
17362
|
}
|
16920
17363
|
|
@@ -16922,7 +17365,6 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
|
|
16922
17365
|
expect1(parser, PM_TOKEN_BRACE_RIGHT, PM_ERR_HASH_TERM);
|
16923
17366
|
pm_hash_node_closing_loc_set(node, &parser->previous);
|
16924
17367
|
|
16925
|
-
pm_static_literals_free(&literals);
|
16926
17368
|
return (pm_node_t *) node;
|
16927
17369
|
}
|
16928
17370
|
case PM_TOKEN_CHARACTER_LITERAL: {
|
@@ -16987,12 +17429,10 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
|
|
16987
17429
|
}
|
16988
17430
|
case PM_TOKEN_UCOLON_COLON: {
|
16989
17431
|
parser_lex(parser);
|
16990
|
-
|
16991
17432
|
pm_token_t delimiter = parser->previous;
|
16992
|
-
expect1(parser, PM_TOKEN_CONSTANT, PM_ERR_CONSTANT_PATH_COLON_COLON_CONSTANT);
|
16993
17433
|
|
16994
|
-
|
16995
|
-
pm_node_t *node = (pm_node_t *)pm_constant_path_node_create(parser, NULL, &delimiter,
|
17434
|
+
expect1(parser, PM_TOKEN_CONSTANT, PM_ERR_CONSTANT_PATH_COLON_COLON_CONSTANT);
|
17435
|
+
pm_node_t *node = (pm_node_t *) pm_constant_path_node_create(parser, NULL, &delimiter, &parser->previous);
|
16996
17436
|
|
16997
17437
|
if ((binding_power == PM_BINDING_POWER_STATEMENT) && match1(parser, PM_TOKEN_COMMA)) {
|
16998
17438
|
node = parse_targets_validate(parser, node, PM_BINDING_POWER_INDEX);
|
@@ -17152,8 +17592,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
|
|
17152
17592
|
if (match2(parser, PM_TOKEN_HEREDOC_END, PM_TOKEN_EOF)) {
|
17153
17593
|
// If we get here, then we have an empty heredoc. We'll create
|
17154
17594
|
// an empty content token and return an empty string node.
|
17155
|
-
|
17156
|
-
expect1(parser, PM_TOKEN_HEREDOC_END, PM_ERR_HEREDOC_TERM);
|
17595
|
+
expect1_heredoc_term(parser, lex_mode);
|
17157
17596
|
pm_token_t content = parse_strings_empty_content(parser->previous.start);
|
17158
17597
|
|
17159
17598
|
if (quote == PM_HEREDOC_QUOTE_BACKTICK) {
|
@@ -17194,8 +17633,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
|
|
17194
17633
|
}
|
17195
17634
|
|
17196
17635
|
node = (pm_node_t *) cast;
|
17197
|
-
|
17198
|
-
expect1(parser, PM_TOKEN_HEREDOC_END, PM_ERR_HEREDOC_TERM);
|
17636
|
+
expect1_heredoc_term(parser, lex_mode);
|
17199
17637
|
} else {
|
17200
17638
|
// If we get here, then we have multiple parts in the heredoc,
|
17201
17639
|
// so we'll need to create an interpolated string node to hold
|
@@ -17217,20 +17655,18 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
|
|
17217
17655
|
pm_interpolated_x_string_node_t *cast = pm_interpolated_xstring_node_create(parser, &opening, &opening);
|
17218
17656
|
cast->parts = parts;
|
17219
17657
|
|
17220
|
-
|
17221
|
-
expect1(parser, PM_TOKEN_HEREDOC_END, PM_ERR_HEREDOC_TERM);
|
17222
|
-
|
17658
|
+
expect1_heredoc_term(parser, lex_mode);
|
17223
17659
|
pm_interpolated_xstring_node_closing_set(cast, &parser->previous);
|
17660
|
+
|
17224
17661
|
cast->base.location = cast->opening_loc;
|
17225
17662
|
node = (pm_node_t *) cast;
|
17226
17663
|
} else {
|
17227
17664
|
pm_interpolated_string_node_t *cast = pm_interpolated_string_node_create(parser, &opening, &parts, &opening);
|
17228
17665
|
pm_node_list_free(&parts);
|
17229
17666
|
|
17230
|
-
|
17231
|
-
expect1(parser, PM_TOKEN_HEREDOC_END, PM_ERR_HEREDOC_TERM);
|
17232
|
-
|
17667
|
+
expect1_heredoc_term(parser, lex_mode);
|
17233
17668
|
pm_interpolated_string_node_closing_set(cast, &parser->previous);
|
17669
|
+
|
17234
17670
|
cast->base.location = cast->opening_loc;
|
17235
17671
|
node = (pm_node_t *) cast;
|
17236
17672
|
}
|
@@ -18132,7 +18568,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
|
|
18132
18568
|
if (match1(parser, PM_TOKEN_COMMA)) {
|
18133
18569
|
index = parse_targets(parser, index, PM_BINDING_POWER_INDEX);
|
18134
18570
|
} else {
|
18135
|
-
index = parse_target(parser, index);
|
18571
|
+
index = parse_target(parser, index, false);
|
18136
18572
|
}
|
18137
18573
|
|
18138
18574
|
context_pop(parser);
|
@@ -18254,9 +18690,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
|
|
18254
18690
|
pm_token_t double_colon = parser->previous;
|
18255
18691
|
|
18256
18692
|
expect1(parser, PM_TOKEN_CONSTANT, PM_ERR_CONSTANT_PATH_COLON_COLON_CONSTANT);
|
18257
|
-
|
18258
|
-
|
18259
|
-
constant_path = (pm_node_t *) pm_constant_path_node_create(parser, constant_path, &double_colon, constant);
|
18693
|
+
constant_path = (pm_node_t *) pm_constant_path_node_create(parser, constant_path, &double_colon, &parser->previous);
|
18260
18694
|
}
|
18261
18695
|
|
18262
18696
|
// Here we retrieve the name of the module. If it wasn't a constant,
|
@@ -18636,15 +19070,15 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
|
|
18636
19070
|
// If we hit string content and the current node is
|
18637
19071
|
// an interpolated string, then we need to append
|
18638
19072
|
// the string content to the list of child nodes.
|
18639
|
-
pm_interpolated_string_node_append(
|
19073
|
+
pm_interpolated_string_node_append((pm_interpolated_string_node_t *) current, string);
|
18640
19074
|
} else if (PM_NODE_TYPE_P(current, PM_STRING_NODE)) {
|
18641
19075
|
// If we hit string content and the current node is
|
18642
19076
|
// a string node, then we need to convert the
|
18643
19077
|
// current node into an interpolated string and add
|
18644
19078
|
// the string content to the list of child nodes.
|
18645
19079
|
pm_interpolated_string_node_t *interpolated = pm_interpolated_string_node_create(parser, &opening, NULL, &closing);
|
18646
|
-
pm_interpolated_string_node_append(
|
18647
|
-
pm_interpolated_string_node_append(
|
19080
|
+
pm_interpolated_string_node_append(interpolated, current);
|
19081
|
+
pm_interpolated_string_node_append(interpolated, string);
|
18648
19082
|
current = (pm_node_t *) interpolated;
|
18649
19083
|
} else {
|
18650
19084
|
assert(false && "unreachable");
|
@@ -18669,7 +19103,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
|
|
18669
19103
|
pm_token_t opening = not_provided(parser);
|
18670
19104
|
pm_token_t closing = not_provided(parser);
|
18671
19105
|
pm_interpolated_string_node_t *interpolated = pm_interpolated_string_node_create(parser, &opening, NULL, &closing);
|
18672
|
-
pm_interpolated_string_node_append(
|
19106
|
+
pm_interpolated_string_node_append(interpolated, current);
|
18673
19107
|
current = (pm_node_t *) interpolated;
|
18674
19108
|
} else {
|
18675
19109
|
// If we hit an embedded variable and the current
|
@@ -18678,7 +19112,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
|
|
18678
19112
|
}
|
18679
19113
|
|
18680
19114
|
pm_node_t *part = parse_string_part(parser);
|
18681
|
-
pm_interpolated_string_node_append(
|
19115
|
+
pm_interpolated_string_node_append((pm_interpolated_string_node_t *) current, part);
|
18682
19116
|
break;
|
18683
19117
|
}
|
18684
19118
|
case PM_TOKEN_EMBEXPR_BEGIN: {
|
@@ -18698,7 +19132,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
|
|
18698
19132
|
pm_token_t opening = not_provided(parser);
|
18699
19133
|
pm_token_t closing = not_provided(parser);
|
18700
19134
|
pm_interpolated_string_node_t *interpolated = pm_interpolated_string_node_create(parser, &opening, NULL, &closing);
|
18701
|
-
pm_interpolated_string_node_append(
|
19135
|
+
pm_interpolated_string_node_append(interpolated, current);
|
18702
19136
|
current = (pm_node_t *) interpolated;
|
18703
19137
|
} else if (PM_NODE_TYPE_P(current, PM_INTERPOLATED_STRING_NODE)) {
|
18704
19138
|
// If we hit an embedded expression and the current
|
@@ -18709,7 +19143,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
|
|
18709
19143
|
}
|
18710
19144
|
|
18711
19145
|
pm_node_t *part = parse_string_part(parser);
|
18712
|
-
pm_interpolated_string_node_append(
|
19146
|
+
pm_interpolated_string_node_append((pm_interpolated_string_node_t *) current, part);
|
18713
19147
|
break;
|
18714
19148
|
}
|
18715
19149
|
default:
|
@@ -18913,7 +19347,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
|
|
18913
19347
|
if (match1(parser, PM_TOKEN_COMMA)) {
|
18914
19348
|
return parse_targets_validate(parser, splat, PM_BINDING_POWER_INDEX);
|
18915
19349
|
} else {
|
18916
|
-
return parse_target_validate(parser, splat);
|
19350
|
+
return parse_target_validate(parser, splat, true);
|
18917
19351
|
}
|
18918
19352
|
}
|
18919
19353
|
case PM_TOKEN_BANG: {
|
@@ -20046,8 +20480,7 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
|
|
20046
20480
|
path = (pm_node_t *) pm_call_node_call_create(parser, node, &delimiter, &message, &arguments);
|
20047
20481
|
} else {
|
20048
20482
|
// Otherwise, this is a constant path. That would look like Foo::Bar.
|
20049
|
-
|
20050
|
-
path = (pm_node_t *)pm_constant_path_node_create(parser, node, &delimiter, child);
|
20483
|
+
path = (pm_node_t *) pm_constant_path_node_create(parser, node, &delimiter, &parser->previous);
|
20051
20484
|
}
|
20052
20485
|
|
20053
20486
|
// If this is followed by a comma then it is a multiple assignment.
|
@@ -20086,9 +20519,8 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
|
|
20086
20519
|
return (pm_node_t *) pm_call_node_shorthand_create(parser, node, &delimiter, &arguments);
|
20087
20520
|
}
|
20088
20521
|
default: {
|
20089
|
-
|
20090
|
-
|
20091
|
-
return (pm_node_t *)pm_constant_path_node_create(parser, node, &delimiter, child);
|
20522
|
+
expect1(parser, PM_TOKEN_CONSTANT, PM_ERR_CONSTANT_PATH_COLON_COLON_CONSTANT);
|
20523
|
+
return (pm_node_t *) pm_constant_path_node_create(parser, node, &delimiter, &parser->previous);
|
20092
20524
|
}
|
20093
20525
|
}
|
20094
20526
|
}
|
@@ -21233,25 +21665,28 @@ pm_parser_errors_format(const pm_parser_t *parser, const pm_list_t *error_list,
|
|
21233
21665
|
pm_buffer_append_string(buffer, error_format.blank_prefix, error_format.blank_prefix_length);
|
21234
21666
|
|
21235
21667
|
size_t column = 0;
|
21236
|
-
while (column < error->
|
21237
|
-
|
21238
|
-
pm_buffer_append_byte(buffer, ' ');
|
21239
|
-
} else {
|
21240
|
-
const uint8_t caret = column == error->column_start ? '^' : '~';
|
21668
|
+
while (column < error->column_start) {
|
21669
|
+
pm_buffer_append_byte(buffer, ' ');
|
21241
21670
|
|
21242
|
-
|
21243
|
-
|
21244
|
-
|
21245
|
-
|
21246
|
-
|
21247
|
-
|
21248
|
-
|
21249
|
-
|
21671
|
+
size_t char_width = encoding->char_width(start + column, parser->end - (start + column));
|
21672
|
+
column += (char_width == 0 ? 1 : char_width);
|
21673
|
+
}
|
21674
|
+
|
21675
|
+
if (colorize) pm_buffer_append_string(buffer, PM_COLOR_RED, 7);
|
21676
|
+
pm_buffer_append_byte(buffer, '^');
|
21677
|
+
|
21678
|
+
size_t char_width = encoding->char_width(start + column, parser->end - (start + column));
|
21679
|
+
column += (char_width == 0 ? 1 : char_width);
|
21680
|
+
|
21681
|
+
while (column < error->column_end) {
|
21682
|
+
pm_buffer_append_byte(buffer, '~');
|
21250
21683
|
|
21251
21684
|
size_t char_width = encoding->char_width(start + column, parser->end - (start + column));
|
21252
21685
|
column += (char_width == 0 ? 1 : char_width);
|
21253
21686
|
}
|
21254
21687
|
|
21688
|
+
if (colorize) pm_buffer_append_string(buffer, PM_COLOR_RESET, 3);
|
21689
|
+
|
21255
21690
|
if (inline_messages) {
|
21256
21691
|
pm_buffer_append_byte(buffer, ' ');
|
21257
21692
|
assert(error->error != NULL);
|