prism 0.26.0 → 0.28.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +45 -1
- data/Makefile +3 -2
- data/config.yml +305 -20
- data/docs/configuration.md +1 -0
- data/ext/prism/api_node.c +884 -879
- data/ext/prism/extconf.rb +23 -4
- data/ext/prism/extension.c +16 -9
- data/ext/prism/extension.h +1 -1
- data/include/prism/ast.h +298 -9
- data/include/prism/diagnostic.h +15 -5
- data/include/prism/options.h +2 -2
- data/include/prism/parser.h +10 -0
- data/include/prism/static_literals.h +8 -6
- data/include/prism/version.h +2 -2
- data/lib/prism/dot_visitor.rb +22 -6
- data/lib/prism/dsl.rb +8 -8
- data/lib/prism/ffi.rb +4 -4
- data/lib/prism/inspect_visitor.rb +2156 -0
- data/lib/prism/lex_compat.rb +18 -1
- data/lib/prism/mutation_compiler.rb +2 -2
- data/lib/prism/node.rb +2345 -1964
- data/lib/prism/node_ext.rb +34 -5
- data/lib/prism/parse_result/newlines.rb +0 -2
- data/lib/prism/parse_result.rb +137 -13
- data/lib/prism/pattern.rb +12 -6
- data/lib/prism/polyfill/byteindex.rb +13 -0
- data/lib/prism/polyfill/unpack1.rb +14 -0
- data/lib/prism/reflection.rb +21 -31
- data/lib/prism/serialize.rb +27 -17
- data/lib/prism/translation/parser/compiler.rb +34 -15
- data/lib/prism/translation/parser.rb +6 -6
- data/lib/prism/translation/ripper.rb +72 -68
- data/lib/prism/translation/ruby_parser.rb +69 -31
- data/lib/prism.rb +3 -2
- data/prism.gemspec +36 -38
- data/rbi/prism/compiler.rbi +3 -5
- data/rbi/prism/inspect_visitor.rbi +12 -0
- data/rbi/prism/node.rbi +359 -321
- data/rbi/prism/parse_result.rbi +85 -34
- data/rbi/prism/reflection.rbi +7 -13
- data/rbi/prism/translation/ripper.rbi +1 -11
- data/rbi/prism.rbi +9 -9
- data/sig/prism/dsl.rbs +3 -3
- data/sig/prism/inspect_visitor.rbs +22 -0
- data/sig/prism/node.rbs +68 -48
- data/sig/prism/parse_result.rbs +42 -10
- data/sig/prism/reflection.rbs +2 -8
- data/sig/prism/serialize.rbs +2 -3
- data/sig/prism.rbs +9 -9
- data/src/diagnostic.c +44 -24
- data/src/node.c +41 -16
- data/src/options.c +2 -2
- data/src/prettyprint.c +61 -18
- data/src/prism.c +623 -188
- data/src/serialize.c +5 -2
- data/src/static_literals.c +120 -34
- data/src/token_type.c +4 -4
- data/src/util/pm_integer.c +9 -2
- metadata +7 -9
- data/lib/prism/node_inspector.rb +0 -68
- data/lib/prism/polyfill/string.rb +0 -12
- data/rbi/prism/desugar_compiler.rbi +0 -5
- data/rbi/prism/mutation_compiler.rbi +0 -5
- data/rbi/prism/translation/parser/compiler.rbi +0 -13
- data/rbi/prism/translation/ripper/ripper_compiler.rbi +0 -5
- data/rbi/prism/translation/ruby_parser.rbi +0 -11
data/src/prism.c
CHANGED
@@ -672,6 +672,26 @@ pm_parser_warn_node(pm_parser_t *parser, const pm_node_t *node, pm_diagnostic_id
|
|
672
672
|
#define PM_PARSER_WARN_NODE_FORMAT(parser, node, diag_id, ...) \
|
673
673
|
PM_PARSER_WARN_FORMAT(parser, (node)->location.start, (node)->location.end, diag_id, __VA_ARGS__)
|
674
674
|
|
675
|
+
/**
|
676
|
+
* Add an error for an expected heredoc terminator. This is a special function
|
677
|
+
* only because it grabs its location off of a lex mode instead of a node or a
|
678
|
+
* token.
|
679
|
+
*/
|
680
|
+
static void
|
681
|
+
pm_parser_err_heredoc_term(pm_parser_t *parser, pm_lex_mode_t *lex_mode) {
|
682
|
+
const uint8_t *ident_start = lex_mode->as.heredoc.ident_start;
|
683
|
+
size_t ident_length = lex_mode->as.heredoc.ident_length;
|
684
|
+
|
685
|
+
PM_PARSER_ERR_FORMAT(
|
686
|
+
parser,
|
687
|
+
ident_start,
|
688
|
+
ident_start + ident_length,
|
689
|
+
PM_ERR_HEREDOC_TERM,
|
690
|
+
(int) ident_length,
|
691
|
+
(const char *) ident_start
|
692
|
+
);
|
693
|
+
}
|
694
|
+
|
675
695
|
/******************************************************************************/
|
676
696
|
/* Scope-related functions */
|
677
697
|
/******************************************************************************/
|
@@ -1405,7 +1425,7 @@ pm_conditional_predicate_warn_write_literal_p(const pm_node_t *node) {
|
|
1405
1425
|
static inline void
|
1406
1426
|
pm_conditional_predicate_warn_write_literal(pm_parser_t *parser, const pm_node_t *node) {
|
1407
1427
|
if (pm_conditional_predicate_warn_write_literal_p(node)) {
|
1408
|
-
pm_parser_warn_node(parser, node, parser->version ==
|
1428
|
+
pm_parser_warn_node(parser, node, parser->version == PM_OPTIONS_VERSION_CRUBY_3_3 ? PM_WARN_EQUAL_IN_CONDITIONAL_3_3 : PM_WARN_EQUAL_IN_CONDITIONAL);
|
1409
1429
|
}
|
1410
1430
|
}
|
1411
1431
|
|
@@ -2923,6 +2943,29 @@ pm_call_and_write_node_create(pm_parser_t *parser, pm_call_node_t *target, const
|
|
2923
2943
|
return node;
|
2924
2944
|
}
|
2925
2945
|
|
2946
|
+
/**
|
2947
|
+
* Validate that index expressions do not have keywords or blocks if we are
|
2948
|
+
* parsing as Ruby 3.4+.
|
2949
|
+
*/
|
2950
|
+
static void
|
2951
|
+
pm_index_arguments_check(pm_parser_t *parser, const pm_arguments_node_t *arguments, const pm_node_t *block) {
|
2952
|
+
if (parser->version != PM_OPTIONS_VERSION_CRUBY_3_3) {
|
2953
|
+
if (arguments != NULL && PM_NODE_FLAG_P(arguments, PM_ARGUMENTS_NODE_FLAGS_CONTAINS_KEYWORDS)) {
|
2954
|
+
pm_node_t *node;
|
2955
|
+
PM_NODE_LIST_FOREACH(&arguments->arguments, index, node) {
|
2956
|
+
if (PM_NODE_TYPE_P(node, PM_KEYWORD_HASH_NODE)) {
|
2957
|
+
pm_parser_err_node(parser, node, PM_ERR_UNEXPECTED_INDEX_KEYWORDS);
|
2958
|
+
break;
|
2959
|
+
}
|
2960
|
+
}
|
2961
|
+
}
|
2962
|
+
|
2963
|
+
if (block != NULL) {
|
2964
|
+
pm_parser_err_node(parser, block, PM_ERR_UNEXPECTED_INDEX_BLOCK);
|
2965
|
+
}
|
2966
|
+
}
|
2967
|
+
}
|
2968
|
+
|
2926
2969
|
/**
|
2927
2970
|
* Allocate and initialize a new IndexAndWriteNode node.
|
2928
2971
|
*/
|
@@ -2931,6 +2974,8 @@ pm_index_and_write_node_create(pm_parser_t *parser, pm_call_node_t *target, cons
|
|
2931
2974
|
assert(operator->type == PM_TOKEN_AMPERSAND_AMPERSAND_EQUAL);
|
2932
2975
|
pm_index_and_write_node_t *node = PM_ALLOC_NODE(parser, pm_index_and_write_node_t);
|
2933
2976
|
|
2977
|
+
pm_index_arguments_check(parser, target->arguments, target->block);
|
2978
|
+
|
2934
2979
|
*node = (pm_index_and_write_node_t) {
|
2935
2980
|
{
|
2936
2981
|
.type = PM_INDEX_AND_WRITE_NODE,
|
@@ -3002,6 +3047,8 @@ static pm_index_operator_write_node_t *
|
|
3002
3047
|
pm_index_operator_write_node_create(pm_parser_t *parser, pm_call_node_t *target, const pm_token_t *operator, pm_node_t *value) {
|
3003
3048
|
pm_index_operator_write_node_t *node = PM_ALLOC_NODE(parser, pm_index_operator_write_node_t);
|
3004
3049
|
|
3050
|
+
pm_index_arguments_check(parser, target->arguments, target->block);
|
3051
|
+
|
3005
3052
|
*node = (pm_index_operator_write_node_t) {
|
3006
3053
|
{
|
3007
3054
|
.type = PM_INDEX_OPERATOR_WRITE_NODE,
|
@@ -3075,6 +3122,8 @@ pm_index_or_write_node_create(pm_parser_t *parser, pm_call_node_t *target, const
|
|
3075
3122
|
assert(operator->type == PM_TOKEN_PIPE_PIPE_EQUAL);
|
3076
3123
|
pm_index_or_write_node_t *node = PM_ALLOC_NODE(parser, pm_index_or_write_node_t);
|
3077
3124
|
|
3125
|
+
pm_index_arguments_check(parser, target->arguments, target->block);
|
3126
|
+
|
3078
3127
|
*node = (pm_index_or_write_node_t) {
|
3079
3128
|
{
|
3080
3129
|
.type = PM_INDEX_OR_WRITE_NODE,
|
@@ -3139,6 +3188,8 @@ pm_index_target_node_create(pm_parser_t *parser, pm_call_node_t *target) {
|
|
3139
3188
|
pm_index_target_node_t *node = PM_ALLOC_NODE(parser, pm_index_target_node_t);
|
3140
3189
|
pm_node_flags_t flags = target->base.flags;
|
3141
3190
|
|
3191
|
+
pm_index_arguments_check(parser, target->arguments, target->block);
|
3192
|
+
|
3142
3193
|
*node = (pm_index_target_node_t) {
|
3143
3194
|
{
|
3144
3195
|
.type = PM_INDEX_TARGET_NODE,
|
@@ -3510,22 +3561,27 @@ pm_constant_path_or_write_node_create(pm_parser_t *parser, pm_constant_path_node
|
|
3510
3561
|
* Allocate and initialize a new ConstantPathNode node.
|
3511
3562
|
*/
|
3512
3563
|
static pm_constant_path_node_t *
|
3513
|
-
pm_constant_path_node_create(pm_parser_t *parser, pm_node_t *parent, const pm_token_t *delimiter,
|
3564
|
+
pm_constant_path_node_create(pm_parser_t *parser, pm_node_t *parent, const pm_token_t *delimiter, const pm_token_t *name_token) {
|
3514
3565
|
pm_assert_value_expression(parser, parent);
|
3515
|
-
|
3516
3566
|
pm_constant_path_node_t *node = PM_ALLOC_NODE(parser, pm_constant_path_node_t);
|
3517
3567
|
|
3568
|
+
pm_constant_id_t name = PM_CONSTANT_ID_UNSET;
|
3569
|
+
if (name_token->type == PM_TOKEN_CONSTANT) {
|
3570
|
+
name = pm_parser_constant_id_token(parser, name_token);
|
3571
|
+
}
|
3572
|
+
|
3518
3573
|
*node = (pm_constant_path_node_t) {
|
3519
3574
|
{
|
3520
3575
|
.type = PM_CONSTANT_PATH_NODE,
|
3521
3576
|
.location = {
|
3522
3577
|
.start = parent == NULL ? delimiter->start : parent->location.start,
|
3523
|
-
.end =
|
3578
|
+
.end = name_token->end
|
3524
3579
|
},
|
3525
3580
|
},
|
3526
3581
|
.parent = parent,
|
3527
|
-
.
|
3528
|
-
.delimiter_loc = PM_LOCATION_TOKEN_VALUE(delimiter)
|
3582
|
+
.name = name,
|
3583
|
+
.delimiter_loc = PM_LOCATION_TOKEN_VALUE(delimiter),
|
3584
|
+
.name_loc = PM_LOCATION_TOKEN_VALUE(name_token)
|
3529
3585
|
};
|
3530
3586
|
|
3531
3587
|
return node;
|
@@ -3716,6 +3772,113 @@ pm_def_node_receiver_check(pm_parser_t *parser, const pm_node_t *node) {
|
|
3716
3772
|
}
|
3717
3773
|
}
|
3718
3774
|
|
3775
|
+
/**
|
3776
|
+
* When a method body is created, we want to check if the last statement is a
|
3777
|
+
* return or a statement that houses a return. If it is, then we want to mark
|
3778
|
+
* that return as being redundant so that we can compile it differently but also
|
3779
|
+
* so that we can indicate that to the user.
|
3780
|
+
*/
|
3781
|
+
static void
|
3782
|
+
pm_def_node_body_redundant_return(pm_node_t *node) {
|
3783
|
+
switch (PM_NODE_TYPE(node)) {
|
3784
|
+
case PM_RETURN_NODE:
|
3785
|
+
node->flags |= PM_RETURN_NODE_FLAGS_REDUNDANT;
|
3786
|
+
break;
|
3787
|
+
case PM_BEGIN_NODE: {
|
3788
|
+
pm_begin_node_t *cast = (pm_begin_node_t *) node;
|
3789
|
+
|
3790
|
+
if (cast->statements != NULL && cast->else_clause == NULL) {
|
3791
|
+
pm_def_node_body_redundant_return((pm_node_t *) cast->statements);
|
3792
|
+
}
|
3793
|
+
break;
|
3794
|
+
}
|
3795
|
+
case PM_STATEMENTS_NODE: {
|
3796
|
+
pm_statements_node_t *cast = (pm_statements_node_t *) node;
|
3797
|
+
|
3798
|
+
if (cast->body.size > 0) {
|
3799
|
+
pm_def_node_body_redundant_return(cast->body.nodes[cast->body.size - 1]);
|
3800
|
+
}
|
3801
|
+
break;
|
3802
|
+
}
|
3803
|
+
case PM_IF_NODE: {
|
3804
|
+
pm_if_node_t *cast = (pm_if_node_t *) node;
|
3805
|
+
|
3806
|
+
if (cast->statements != NULL) {
|
3807
|
+
pm_def_node_body_redundant_return((pm_node_t *) cast->statements);
|
3808
|
+
}
|
3809
|
+
|
3810
|
+
if (cast->consequent != NULL) {
|
3811
|
+
pm_def_node_body_redundant_return(cast->consequent);
|
3812
|
+
}
|
3813
|
+
break;
|
3814
|
+
}
|
3815
|
+
case PM_UNLESS_NODE: {
|
3816
|
+
pm_unless_node_t *cast = (pm_unless_node_t *) node;
|
3817
|
+
|
3818
|
+
if (cast->statements != NULL) {
|
3819
|
+
pm_def_node_body_redundant_return((pm_node_t *) cast->statements);
|
3820
|
+
}
|
3821
|
+
|
3822
|
+
if (cast->consequent != NULL) {
|
3823
|
+
pm_def_node_body_redundant_return((pm_node_t *) cast->consequent);
|
3824
|
+
}
|
3825
|
+
break;
|
3826
|
+
}
|
3827
|
+
case PM_ELSE_NODE: {
|
3828
|
+
pm_else_node_t *cast = (pm_else_node_t *) node;
|
3829
|
+
|
3830
|
+
if (cast->statements != NULL) {
|
3831
|
+
pm_def_node_body_redundant_return((pm_node_t *) cast->statements);
|
3832
|
+
}
|
3833
|
+
break;
|
3834
|
+
}
|
3835
|
+
case PM_CASE_NODE: {
|
3836
|
+
pm_case_node_t *cast = (pm_case_node_t *) node;
|
3837
|
+
pm_node_t *condition;
|
3838
|
+
|
3839
|
+
PM_NODE_LIST_FOREACH(&cast->conditions, index, condition) {
|
3840
|
+
pm_def_node_body_redundant_return(condition);
|
3841
|
+
}
|
3842
|
+
|
3843
|
+
if (cast->consequent != NULL) {
|
3844
|
+
pm_def_node_body_redundant_return((pm_node_t *) cast->consequent);
|
3845
|
+
}
|
3846
|
+
break;
|
3847
|
+
}
|
3848
|
+
case PM_WHEN_NODE: {
|
3849
|
+
pm_when_node_t *cast = (pm_when_node_t *) node;
|
3850
|
+
|
3851
|
+
if (cast->statements != NULL) {
|
3852
|
+
pm_def_node_body_redundant_return((pm_node_t *) cast->statements);
|
3853
|
+
}
|
3854
|
+
break;
|
3855
|
+
}
|
3856
|
+
case PM_CASE_MATCH_NODE: {
|
3857
|
+
pm_case_match_node_t *cast = (pm_case_match_node_t *) node;
|
3858
|
+
pm_node_t *condition;
|
3859
|
+
|
3860
|
+
PM_NODE_LIST_FOREACH(&cast->conditions, index, condition) {
|
3861
|
+
pm_def_node_body_redundant_return(condition);
|
3862
|
+
}
|
3863
|
+
|
3864
|
+
if (cast->consequent != NULL) {
|
3865
|
+
pm_def_node_body_redundant_return((pm_node_t *) cast->consequent);
|
3866
|
+
}
|
3867
|
+
break;
|
3868
|
+
}
|
3869
|
+
case PM_IN_NODE: {
|
3870
|
+
pm_in_node_t *cast = (pm_in_node_t *) node;
|
3871
|
+
|
3872
|
+
if (cast->statements != NULL) {
|
3873
|
+
pm_def_node_body_redundant_return((pm_node_t *) cast->statements);
|
3874
|
+
}
|
3875
|
+
break;
|
3876
|
+
}
|
3877
|
+
default:
|
3878
|
+
break;
|
3879
|
+
}
|
3880
|
+
}
|
3881
|
+
|
3719
3882
|
/**
|
3720
3883
|
* Allocate and initialize a new DefNode node.
|
3721
3884
|
*/
|
@@ -3748,6 +3911,10 @@ pm_def_node_create(
|
|
3748
3911
|
pm_def_node_receiver_check(parser, receiver);
|
3749
3912
|
}
|
3750
3913
|
|
3914
|
+
if (body != NULL) {
|
3915
|
+
pm_def_node_body_redundant_return(body);
|
3916
|
+
}
|
3917
|
+
|
3751
3918
|
*node = (pm_def_node_t) {
|
3752
3919
|
{
|
3753
3920
|
.type = PM_DEF_NODE,
|
@@ -4922,6 +5089,50 @@ pm_instance_variable_write_node_create(pm_parser_t *parser, pm_instance_variable
|
|
4922
5089
|
return node;
|
4923
5090
|
}
|
4924
5091
|
|
5092
|
+
/**
|
5093
|
+
* Append a part into a list of string parts. Importantly this handles nested
|
5094
|
+
* interpolated strings by not necessarily removing the marker for static
|
5095
|
+
* literals.
|
5096
|
+
*/
|
5097
|
+
static void
|
5098
|
+
pm_interpolated_node_append(pm_node_t *node, pm_node_list_t *parts, pm_node_t *part) {
|
5099
|
+
switch (PM_NODE_TYPE(part)) {
|
5100
|
+
case PM_STRING_NODE:
|
5101
|
+
pm_node_flag_set(part, PM_NODE_FLAG_STATIC_LITERAL | PM_STRING_FLAGS_FROZEN);
|
5102
|
+
break;
|
5103
|
+
case PM_EMBEDDED_STATEMENTS_NODE: {
|
5104
|
+
pm_embedded_statements_node_t *cast = (pm_embedded_statements_node_t *) part;
|
5105
|
+
pm_node_t *embedded = (cast->statements != NULL && cast->statements->body.size == 1) ? cast->statements->body.nodes[0] : NULL;
|
5106
|
+
|
5107
|
+
if (embedded == NULL) {
|
5108
|
+
// If there are no statements or more than one statement, then
|
5109
|
+
// we lose the static literal flag.
|
5110
|
+
pm_node_flag_unset(node, PM_NODE_FLAG_STATIC_LITERAL);
|
5111
|
+
} else if (PM_NODE_TYPE_P(embedded, PM_STRING_NODE)) {
|
5112
|
+
// If the embedded statement is a string, then we can keep the
|
5113
|
+
// static literal flag and mark the string as frozen.
|
5114
|
+
pm_node_flag_set(embedded, PM_NODE_FLAG_STATIC_LITERAL | PM_STRING_FLAGS_FROZEN);
|
5115
|
+
} else if (PM_NODE_TYPE_P(embedded, PM_INTERPOLATED_STRING_NODE) && PM_NODE_FLAG_P(embedded, PM_NODE_FLAG_STATIC_LITERAL)) {
|
5116
|
+
// If the embedded statement is an interpolated string and it's
|
5117
|
+
// a static literal, then we can keep the static literal flag.
|
5118
|
+
} else {
|
5119
|
+
// Otherwise we lose the static literal flag.
|
5120
|
+
pm_node_flag_unset(node, PM_NODE_FLAG_STATIC_LITERAL);
|
5121
|
+
}
|
5122
|
+
|
5123
|
+
break;
|
5124
|
+
}
|
5125
|
+
case PM_EMBEDDED_VARIABLE_NODE:
|
5126
|
+
pm_node_flag_unset((pm_node_t *) node, PM_NODE_FLAG_STATIC_LITERAL);
|
5127
|
+
break;
|
5128
|
+
default:
|
5129
|
+
assert(false && "unexpected node type");
|
5130
|
+
break;
|
5131
|
+
}
|
5132
|
+
|
5133
|
+
pm_node_list_append(parts, part);
|
5134
|
+
}
|
5135
|
+
|
4925
5136
|
/**
|
4926
5137
|
* Allocate a new InterpolatedRegularExpressionNode node.
|
4927
5138
|
*/
|
@@ -4955,54 +5166,113 @@ pm_interpolated_regular_expression_node_append(pm_interpolated_regular_expressio
|
|
4955
5166
|
node->base.location.end = part->location.end;
|
4956
5167
|
}
|
4957
5168
|
|
4958
|
-
|
4959
|
-
pm_node_flag_set(part, PM_NODE_FLAG_STATIC_LITERAL | PM_STRING_FLAGS_FROZEN);
|
4960
|
-
}
|
4961
|
-
|
4962
|
-
if (!PM_NODE_FLAG_P(part, PM_NODE_FLAG_STATIC_LITERAL)) {
|
4963
|
-
pm_node_flag_unset((pm_node_t *) node, PM_NODE_FLAG_STATIC_LITERAL);
|
4964
|
-
}
|
4965
|
-
|
4966
|
-
pm_node_list_append(&node->parts, part);
|
5169
|
+
pm_interpolated_node_append((pm_node_t *) node, &node->parts, part);
|
4967
5170
|
}
|
4968
5171
|
|
4969
5172
|
static inline void
|
4970
5173
|
pm_interpolated_regular_expression_node_closing_set(pm_parser_t *parser, pm_interpolated_regular_expression_node_t *node, const pm_token_t *closing) {
|
4971
5174
|
node->closing_loc = PM_LOCATION_TOKEN_VALUE(closing);
|
4972
5175
|
node->base.location.end = closing->end;
|
4973
|
-
pm_node_flag_set((pm_node_t *)node, pm_regular_expression_flags_create(parser, closing));
|
5176
|
+
pm_node_flag_set((pm_node_t *) node, pm_regular_expression_flags_create(parser, closing));
|
4974
5177
|
}
|
4975
5178
|
|
4976
5179
|
/**
|
4977
5180
|
* Append a part to an InterpolatedStringNode node.
|
5181
|
+
*
|
5182
|
+
* This has some somewhat complicated semantics, because we need to update
|
5183
|
+
* multiple flags that have somewhat confusing interactions.
|
5184
|
+
*
|
5185
|
+
* PM_NODE_FLAG_STATIC_LITERAL indicates that the node should be treated as a
|
5186
|
+
* single static literal string that can be pushed onto the stack on its own.
|
5187
|
+
* Note that this doesn't necessarily mean that the string will be frozen or
|
5188
|
+
* not; the instructions in CRuby will be either putobject or putstring,
|
5189
|
+
* depending on the combination of `--enable-frozen-string-literal`,
|
5190
|
+
* `# frozen_string_literal: true`, and whether or not there is interpolation.
|
5191
|
+
*
|
5192
|
+
* PM_INTERPOLATED_STRING_NODE_FLAGS_FROZEN indicates that the string should be
|
5193
|
+
* explicitly frozen. This will only happen if the string is comprised entirely
|
5194
|
+
* of string parts that are themselves static literals and frozen.
|
5195
|
+
*
|
5196
|
+
* PM_INTERPOLATED_STRING_NODE_FLAGS_MUTABLE indicates that the string should
|
5197
|
+
* be explicitly marked as mutable. This will happen from
|
5198
|
+
* `--disable-frozen-string-literal` or `# frozen_string_literal: false`. This
|
5199
|
+
* is necessary to indicate that the string should be left up to the runtime,
|
5200
|
+
* which could potentially use a chilled string otherwise.
|
4978
5201
|
*/
|
4979
5202
|
static inline void
|
4980
|
-
pm_interpolated_string_node_append(
|
5203
|
+
pm_interpolated_string_node_append(pm_interpolated_string_node_t *node, pm_node_t *part) {
|
5204
|
+
#define CLEAR_FLAGS(node) \
|
5205
|
+
node->base.flags = (pm_node_flags_t) (node->base.flags & ~(PM_NODE_FLAG_STATIC_LITERAL | PM_INTERPOLATED_STRING_NODE_FLAGS_FROZEN | PM_INTERPOLATED_STRING_NODE_FLAGS_MUTABLE))
|
5206
|
+
|
5207
|
+
#define MUTABLE_FLAGS(node) \
|
5208
|
+
node->base.flags = (pm_node_flags_t) ((node->base.flags | PM_INTERPOLATED_STRING_NODE_FLAGS_MUTABLE) & ~PM_INTERPOLATED_STRING_NODE_FLAGS_FROZEN);
|
5209
|
+
|
4981
5210
|
if (node->parts.size == 0 && node->opening_loc.start == NULL) {
|
4982
5211
|
node->base.location.start = part->location.start;
|
4983
5212
|
}
|
4984
5213
|
|
4985
|
-
|
4986
|
-
|
4987
|
-
|
5214
|
+
node->base.location.end = MAX(node->base.location.end, part->location.end);
|
5215
|
+
|
5216
|
+
switch (PM_NODE_TYPE(part)) {
|
5217
|
+
case PM_STRING_NODE:
|
5218
|
+
pm_node_flag_set(part, PM_NODE_FLAG_STATIC_LITERAL | PM_STRING_FLAGS_FROZEN);
|
5219
|
+
break;
|
5220
|
+
case PM_INTERPOLATED_STRING_NODE:
|
5221
|
+
if (PM_NODE_FLAG_P(part, PM_NODE_FLAG_STATIC_LITERAL)) {
|
5222
|
+
// If the string that we're concatenating is a static literal,
|
5223
|
+
// then we can keep the static literal flag for this string.
|
5224
|
+
} else {
|
5225
|
+
// Otherwise, we lose the static literal flag here and we should
|
5226
|
+
// also clear the mutability flags.
|
5227
|
+
CLEAR_FLAGS(node);
|
5228
|
+
}
|
5229
|
+
break;
|
5230
|
+
case PM_EMBEDDED_STATEMENTS_NODE: {
|
5231
|
+
pm_embedded_statements_node_t *cast = (pm_embedded_statements_node_t *) part;
|
5232
|
+
pm_node_t *embedded = (cast->statements != NULL && cast->statements->body.size == 1) ? cast->statements->body.nodes[0] : NULL;
|
5233
|
+
|
5234
|
+
if (embedded == NULL) {
|
5235
|
+
// If we're embedding multiple statements or no statements, then
|
5236
|
+
// the string is not longer a static literal.
|
5237
|
+
CLEAR_FLAGS(node);
|
5238
|
+
} else if (PM_NODE_TYPE_P(embedded, PM_STRING_NODE)) {
|
5239
|
+
// If the embedded statement is a string, then we can make that
|
5240
|
+
// string as frozen and static literal, and not touch the static
|
5241
|
+
// literal status of this string.
|
5242
|
+
pm_node_flag_set(embedded, PM_NODE_FLAG_STATIC_LITERAL | PM_STRING_FLAGS_FROZEN);
|
5243
|
+
|
5244
|
+
if (PM_NODE_FLAG_P(node, PM_NODE_FLAG_STATIC_LITERAL)) {
|
5245
|
+
MUTABLE_FLAGS(node);
|
5246
|
+
}
|
5247
|
+
} else if (PM_NODE_TYPE_P(embedded, PM_INTERPOLATED_STRING_NODE) && PM_NODE_FLAG_P(embedded, PM_NODE_FLAG_STATIC_LITERAL)) {
|
5248
|
+
// If the embedded statement is an interpolated string, but that
|
5249
|
+
// string is marked as static literal, then we can keep our
|
5250
|
+
// static literal status for this string.
|
5251
|
+
if (PM_NODE_FLAG_P(node, PM_NODE_FLAG_STATIC_LITERAL)) {
|
5252
|
+
MUTABLE_FLAGS(node);
|
5253
|
+
}
|
5254
|
+
} else {
|
5255
|
+
// In all other cases, we lose the static literal flag here and
|
5256
|
+
// become mutable.
|
5257
|
+
CLEAR_FLAGS(node);
|
5258
|
+
}
|
4988
5259
|
|
4989
|
-
|
4990
|
-
|
5260
|
+
break;
|
5261
|
+
}
|
5262
|
+
case PM_EMBEDDED_VARIABLE_NODE:
|
5263
|
+
// Embedded variables clear static literal, which means we also
|
5264
|
+
// should clear the mutability flags.
|
5265
|
+
CLEAR_FLAGS(node);
|
5266
|
+
break;
|
5267
|
+
default:
|
5268
|
+
assert(false && "unexpected node type");
|
5269
|
+
break;
|
4991
5270
|
}
|
4992
5271
|
|
4993
5272
|
pm_node_list_append(&node->parts, part);
|
4994
|
-
node->base.location.end = MAX(node->base.location.end, part->location.end);
|
4995
5273
|
|
4996
|
-
|
4997
|
-
|
4998
|
-
case PM_OPTIONS_FROZEN_STRING_LITERAL_DISABLED:
|
4999
|
-
pm_node_flag_set((pm_node_t *) node, PM_INTERPOLATED_STRING_NODE_FLAGS_MUTABLE);
|
5000
|
-
break;
|
5001
|
-
case PM_OPTIONS_FROZEN_STRING_LITERAL_ENABLED:
|
5002
|
-
pm_node_flag_set((pm_node_t *) node, PM_INTERPOLATED_STRING_NODE_FLAGS_FROZEN);
|
5003
|
-
break;
|
5004
|
-
}
|
5005
|
-
}
|
5274
|
+
#undef CLEAR_FLAGS
|
5275
|
+
#undef MUTABLE_FLAGS
|
5006
5276
|
}
|
5007
5277
|
|
5008
5278
|
/**
|
@@ -5011,11 +5281,21 @@ pm_interpolated_string_node_append(pm_parser_t *parser, pm_interpolated_string_n
|
|
5011
5281
|
static pm_interpolated_string_node_t *
|
5012
5282
|
pm_interpolated_string_node_create(pm_parser_t *parser, const pm_token_t *opening, const pm_node_list_t *parts, const pm_token_t *closing) {
|
5013
5283
|
pm_interpolated_string_node_t *node = PM_ALLOC_NODE(parser, pm_interpolated_string_node_t);
|
5284
|
+
pm_node_flags_t flags = PM_NODE_FLAG_STATIC_LITERAL;
|
5285
|
+
|
5286
|
+
switch (parser->frozen_string_literal) {
|
5287
|
+
case PM_OPTIONS_FROZEN_STRING_LITERAL_DISABLED:
|
5288
|
+
flags |= PM_INTERPOLATED_STRING_NODE_FLAGS_MUTABLE;
|
5289
|
+
break;
|
5290
|
+
case PM_OPTIONS_FROZEN_STRING_LITERAL_ENABLED:
|
5291
|
+
flags |= PM_INTERPOLATED_STRING_NODE_FLAGS_FROZEN;
|
5292
|
+
break;
|
5293
|
+
}
|
5014
5294
|
|
5015
5295
|
*node = (pm_interpolated_string_node_t) {
|
5016
5296
|
{
|
5017
5297
|
.type = PM_INTERPOLATED_STRING_NODE,
|
5018
|
-
.flags =
|
5298
|
+
.flags = flags,
|
5019
5299
|
.location = {
|
5020
5300
|
.start = opening->start,
|
5021
5301
|
.end = closing->end,
|
@@ -5029,7 +5309,7 @@ pm_interpolated_string_node_create(pm_parser_t *parser, const pm_token_t *openin
|
|
5029
5309
|
if (parts != NULL) {
|
5030
5310
|
pm_node_t *part;
|
5031
5311
|
PM_NODE_LIST_FOREACH(parts, index, part) {
|
5032
|
-
pm_interpolated_string_node_append(
|
5312
|
+
pm_interpolated_string_node_append(node, part);
|
5033
5313
|
}
|
5034
5314
|
}
|
5035
5315
|
|
@@ -5051,15 +5331,7 @@ pm_interpolated_symbol_node_append(pm_interpolated_symbol_node_t *node, pm_node_
|
|
5051
5331
|
node->base.location.start = part->location.start;
|
5052
5332
|
}
|
5053
5333
|
|
5054
|
-
|
5055
|
-
pm_node_flag_set(part, PM_NODE_FLAG_STATIC_LITERAL | PM_STRING_FLAGS_FROZEN);
|
5056
|
-
}
|
5057
|
-
|
5058
|
-
if (!PM_NODE_FLAG_P(part, PM_NODE_FLAG_STATIC_LITERAL)) {
|
5059
|
-
pm_node_flag_unset((pm_node_t *) node, PM_NODE_FLAG_STATIC_LITERAL);
|
5060
|
-
}
|
5061
|
-
|
5062
|
-
pm_node_list_append(&node->parts, part);
|
5334
|
+
pm_interpolated_node_append((pm_node_t *) node, &node->parts, part);
|
5063
5335
|
node->base.location.end = MAX(node->base.location.end, part->location.end);
|
5064
5336
|
}
|
5065
5337
|
|
@@ -5125,11 +5397,7 @@ pm_interpolated_xstring_node_create(pm_parser_t *parser, const pm_token_t *openi
|
|
5125
5397
|
|
5126
5398
|
static inline void
|
5127
5399
|
pm_interpolated_xstring_node_append(pm_interpolated_x_string_node_t *node, pm_node_t *part) {
|
5128
|
-
|
5129
|
-
pm_node_flag_set(part, PM_NODE_FLAG_STATIC_LITERAL | PM_STRING_FLAGS_FROZEN);
|
5130
|
-
}
|
5131
|
-
|
5132
|
-
pm_node_list_append(&node->parts, part);
|
5400
|
+
pm_interpolated_node_append((pm_node_t *) node, &node->parts, part);
|
5133
5401
|
node->base.location.end = part->location.end;
|
5134
5402
|
}
|
5135
5403
|
|
@@ -6397,6 +6665,7 @@ pm_return_node_create(pm_parser_t *parser, const pm_token_t *keyword, pm_argumen
|
|
6397
6665
|
*node = (pm_return_node_t) {
|
6398
6666
|
{
|
6399
6667
|
.type = PM_RETURN_NODE,
|
6668
|
+
.flags = 0,
|
6400
6669
|
.location = {
|
6401
6670
|
.start = keyword->start,
|
6402
6671
|
.end = (arguments == NULL ? keyword->end : arguments->base.location.end)
|
@@ -6729,7 +6998,8 @@ pm_super_node_create(pm_parser_t *parser, const pm_token_t *keyword, pm_argument
|
|
6729
6998
|
}
|
6730
6999
|
|
6731
7000
|
/**
|
6732
|
-
* Read through the contents of a string and check if it consists solely of
|
7001
|
+
* Read through the contents of a string and check if it consists solely of
|
7002
|
+
* US-ASCII code points.
|
6733
7003
|
*/
|
6734
7004
|
static bool
|
6735
7005
|
pm_ascii_only_p(const pm_string_t *contents) {
|
@@ -6743,27 +7013,72 @@ pm_ascii_only_p(const pm_string_t *contents) {
|
|
6743
7013
|
return true;
|
6744
7014
|
}
|
6745
7015
|
|
7016
|
+
/**
|
7017
|
+
* Validate that the contents of the given symbol are all valid UTF-8.
|
7018
|
+
*/
|
7019
|
+
static void
|
7020
|
+
parse_symbol_encoding_validate_utf8(pm_parser_t *parser, const pm_token_t *location, const pm_string_t *contents) {
|
7021
|
+
for (const uint8_t *cursor = pm_string_source(contents), *end = cursor + pm_string_length(contents); cursor < end;) {
|
7022
|
+
size_t width = pm_encoding_utf_8_char_width(cursor, end - cursor);
|
7023
|
+
|
7024
|
+
if (width == 0) {
|
7025
|
+
pm_parser_err(parser, location->start, location->end, PM_ERR_INVALID_SYMBOL);
|
7026
|
+
break;
|
7027
|
+
}
|
7028
|
+
|
7029
|
+
cursor += width;
|
7030
|
+
}
|
7031
|
+
}
|
7032
|
+
|
7033
|
+
/**
|
7034
|
+
* Validate that the contents of the given symbol are all valid in the encoding
|
7035
|
+
* of the parser.
|
7036
|
+
*/
|
7037
|
+
static void
|
7038
|
+
parse_symbol_encoding_validate_other(pm_parser_t *parser, const pm_token_t *location, const pm_string_t *contents) {
|
7039
|
+
const pm_encoding_t *encoding = parser->encoding;
|
7040
|
+
|
7041
|
+
for (const uint8_t *cursor = pm_string_source(contents), *end = cursor + pm_string_length(contents); cursor < end;) {
|
7042
|
+
size_t width = encoding->char_width(cursor, end - cursor);
|
7043
|
+
|
7044
|
+
if (width == 0) {
|
7045
|
+
pm_parser_err(parser, location->start, location->end, PM_ERR_INVALID_SYMBOL);
|
7046
|
+
break;
|
7047
|
+
}
|
7048
|
+
|
7049
|
+
cursor += width;
|
7050
|
+
}
|
7051
|
+
}
|
7052
|
+
|
6746
7053
|
/**
|
6747
7054
|
* Ruby "downgrades" the encoding of Symbols to US-ASCII if the associated
|
6748
7055
|
* encoding is ASCII-compatible and the Symbol consists only of US-ASCII code
|
6749
7056
|
* points. Otherwise, the encoding may be explicitly set with an escape
|
6750
7057
|
* sequence.
|
7058
|
+
*
|
7059
|
+
* If the validate flag is set, then it will check the contents of the symbol
|
7060
|
+
* to ensure that all characters are valid in the encoding.
|
6751
7061
|
*/
|
6752
7062
|
static inline pm_node_flags_t
|
6753
|
-
parse_symbol_encoding(
|
7063
|
+
parse_symbol_encoding(pm_parser_t *parser, const pm_token_t *location, const pm_string_t *contents, bool validate) {
|
6754
7064
|
if (parser->explicit_encoding != NULL) {
|
6755
7065
|
// A Symbol may optionally have its encoding explicitly set. This will
|
6756
7066
|
// happen if an escape sequence results in a non-ASCII code point.
|
6757
7067
|
if (parser->explicit_encoding == PM_ENCODING_UTF_8_ENTRY) {
|
7068
|
+
if (validate) parse_symbol_encoding_validate_utf8(parser, location, contents);
|
6758
7069
|
return PM_SYMBOL_FLAGS_FORCED_UTF8_ENCODING;
|
6759
7070
|
} else if (parser->encoding == PM_ENCODING_US_ASCII_ENTRY) {
|
6760
7071
|
return PM_SYMBOL_FLAGS_FORCED_BINARY_ENCODING;
|
7072
|
+
} else if (validate) {
|
7073
|
+
parse_symbol_encoding_validate_other(parser, location, contents);
|
6761
7074
|
}
|
6762
7075
|
} else if (pm_ascii_only_p(contents)) {
|
6763
7076
|
// Ruby stipulates that all source files must use an ASCII-compatible
|
6764
7077
|
// encoding. Thus, all symbols appearing in source are eligible for
|
6765
7078
|
// "downgrading" to US-ASCII.
|
6766
7079
|
return PM_SYMBOL_FLAGS_FORCED_US_ASCII_ENCODING;
|
7080
|
+
} else if (validate) {
|
7081
|
+
parse_symbol_encoding_validate_other(parser, location, contents);
|
6767
7082
|
}
|
6768
7083
|
|
6769
7084
|
return 0;
|
@@ -6931,7 +7246,7 @@ pm_symbol_node_create(pm_parser_t *parser, const pm_token_t *opening, const pm_t
|
|
6931
7246
|
*/
|
6932
7247
|
static pm_symbol_node_t *
|
6933
7248
|
pm_symbol_node_create_current_string(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *value, const pm_token_t *closing) {
|
6934
|
-
pm_symbol_node_t *node = pm_symbol_node_create_unescaped(parser, opening, value, closing, &parser->current_string, parse_symbol_encoding(parser, &parser->current_string));
|
7249
|
+
pm_symbol_node_t *node = pm_symbol_node_create_unescaped(parser, opening, value, closing, &parser->current_string, parse_symbol_encoding(parser, value, &parser->current_string, false));
|
6935
7250
|
parser->current_string = PM_STRING_EMPTY;
|
6936
7251
|
return node;
|
6937
7252
|
}
|
@@ -6953,7 +7268,7 @@ pm_symbol_node_label_create(pm_parser_t *parser, const pm_token_t *token) {
|
|
6953
7268
|
|
6954
7269
|
assert((label.end - label.start) >= 0);
|
6955
7270
|
pm_string_shared_init(&node->unescaped, label.start, label.end);
|
6956
|
-
pm_node_flag_set((pm_node_t *) node, parse_symbol_encoding(parser, &node->unescaped));
|
7271
|
+
pm_node_flag_set((pm_node_t *) node, parse_symbol_encoding(parser, &label, &node->unescaped, false));
|
6957
7272
|
|
6958
7273
|
break;
|
6959
7274
|
}
|
@@ -7038,7 +7353,8 @@ pm_string_node_to_symbol_node(pm_parser_t *parser, pm_string_node_t *node, const
|
|
7038
7353
|
.unescaped = node->unescaped
|
7039
7354
|
};
|
7040
7355
|
|
7041
|
-
|
7356
|
+
pm_token_t content = { .type = PM_TOKEN_IDENTIFIER, .start = node->content_loc.start, .end = node->content_loc.end };
|
7357
|
+
pm_node_flag_set((pm_node_t *) new_node, parse_symbol_encoding(parser, &content, &node->unescaped, true));
|
7042
7358
|
|
7043
7359
|
// We are explicitly _not_ using pm_node_destroy here because we don't want
|
7044
7360
|
// to trash the unescaped string. We could instead copy the string if we
|
@@ -7574,7 +7890,7 @@ pm_local_variable_read_node_create_it(pm_parser_t *parser, const pm_token_t *nam
|
|
7574
7890
|
static pm_node_t *
|
7575
7891
|
pm_node_check_it(pm_parser_t *parser, pm_node_t *node) {
|
7576
7892
|
if (
|
7577
|
-
(parser->version !=
|
7893
|
+
(parser->version != PM_OPTIONS_VERSION_CRUBY_3_3) &&
|
7578
7894
|
!parser->current_scope->closed &&
|
7579
7895
|
(parser->current_scope->numbered_parameters != PM_SCOPE_NUMBERED_PARAMETERS_DISALLOWED) &&
|
7580
7896
|
pm_node_is_it(parser, node)
|
@@ -8298,10 +8614,11 @@ context_human(pm_context_t context) {
|
|
8298
8614
|
/* Specific token lexers */
|
8299
8615
|
/******************************************************************************/
|
8300
8616
|
|
8301
|
-
static void
|
8302
|
-
pm_strspn_number_validate(pm_parser_t *parser, const uint8_t *invalid) {
|
8617
|
+
static inline void
|
8618
|
+
pm_strspn_number_validate(pm_parser_t *parser, const uint8_t *string, size_t length, const uint8_t *invalid) {
|
8303
8619
|
if (invalid != NULL) {
|
8304
|
-
|
8620
|
+
pm_diagnostic_id_t diag_id = (invalid == (string + length - 1)) ? PM_ERR_INVALID_NUMBER_UNDERSCORE_TRAILING : PM_ERR_INVALID_NUMBER_UNDERSCORE_INNER;
|
8621
|
+
pm_parser_err(parser, invalid, invalid + 1, diag_id);
|
8305
8622
|
}
|
8306
8623
|
}
|
8307
8624
|
|
@@ -8309,7 +8626,7 @@ static size_t
|
|
8309
8626
|
pm_strspn_binary_number_validate(pm_parser_t *parser, const uint8_t *string) {
|
8310
8627
|
const uint8_t *invalid = NULL;
|
8311
8628
|
size_t length = pm_strspn_binary_number(string, parser->end - string, &invalid);
|
8312
|
-
pm_strspn_number_validate(parser, invalid);
|
8629
|
+
pm_strspn_number_validate(parser, string, length, invalid);
|
8313
8630
|
return length;
|
8314
8631
|
}
|
8315
8632
|
|
@@ -8317,7 +8634,7 @@ static size_t
|
|
8317
8634
|
pm_strspn_octal_number_validate(pm_parser_t *parser, const uint8_t *string) {
|
8318
8635
|
const uint8_t *invalid = NULL;
|
8319
8636
|
size_t length = pm_strspn_octal_number(string, parser->end - string, &invalid);
|
8320
|
-
pm_strspn_number_validate(parser, invalid);
|
8637
|
+
pm_strspn_number_validate(parser, string, length, invalid);
|
8321
8638
|
return length;
|
8322
8639
|
}
|
8323
8640
|
|
@@ -8325,7 +8642,7 @@ static size_t
|
|
8325
8642
|
pm_strspn_decimal_number_validate(pm_parser_t *parser, const uint8_t *string) {
|
8326
8643
|
const uint8_t *invalid = NULL;
|
8327
8644
|
size_t length = pm_strspn_decimal_number(string, parser->end - string, &invalid);
|
8328
|
-
pm_strspn_number_validate(parser, invalid);
|
8645
|
+
pm_strspn_number_validate(parser, string, length, invalid);
|
8329
8646
|
return length;
|
8330
8647
|
}
|
8331
8648
|
|
@@ -8333,7 +8650,7 @@ static size_t
|
|
8333
8650
|
pm_strspn_hexadecimal_number_validate(pm_parser_t *parser, const uint8_t *string) {
|
8334
8651
|
const uint8_t *invalid = NULL;
|
8335
8652
|
size_t length = pm_strspn_hexadecimal_number(string, parser->end - string, &invalid);
|
8336
|
-
pm_strspn_number_validate(parser, invalid);
|
8653
|
+
pm_strspn_number_validate(parser, string, length, invalid);
|
8337
8654
|
return length;
|
8338
8655
|
}
|
8339
8656
|
|
@@ -8395,6 +8712,7 @@ lex_numeric_prefix(pm_parser_t *parser, bool* seen_e) {
|
|
8395
8712
|
if (pm_char_is_decimal_digit(peek(parser))) {
|
8396
8713
|
parser->current.end += pm_strspn_decimal_number_validate(parser, parser->current.end);
|
8397
8714
|
} else {
|
8715
|
+
match(parser, '_');
|
8398
8716
|
pm_parser_err_current(parser, PM_ERR_INVALID_NUMBER_DECIMAL);
|
8399
8717
|
}
|
8400
8718
|
|
@@ -8407,6 +8725,7 @@ lex_numeric_prefix(pm_parser_t *parser, bool* seen_e) {
|
|
8407
8725
|
if (pm_char_is_binary_digit(peek(parser))) {
|
8408
8726
|
parser->current.end += pm_strspn_binary_number_validate(parser, parser->current.end);
|
8409
8727
|
} else {
|
8728
|
+
match(parser, '_');
|
8410
8729
|
pm_parser_err_current(parser, PM_ERR_INVALID_NUMBER_BINARY);
|
8411
8730
|
}
|
8412
8731
|
|
@@ -8420,6 +8739,7 @@ lex_numeric_prefix(pm_parser_t *parser, bool* seen_e) {
|
|
8420
8739
|
if (pm_char_is_octal_digit(peek(parser))) {
|
8421
8740
|
parser->current.end += pm_strspn_octal_number_validate(parser, parser->current.end);
|
8422
8741
|
} else {
|
8742
|
+
match(parser, '_');
|
8423
8743
|
pm_parser_err_current(parser, PM_ERR_INVALID_NUMBER_OCTAL);
|
8424
8744
|
}
|
8425
8745
|
|
@@ -8447,6 +8767,7 @@ lex_numeric_prefix(pm_parser_t *parser, bool* seen_e) {
|
|
8447
8767
|
if (pm_char_is_hexadecimal_digit(peek(parser))) {
|
8448
8768
|
parser->current.end += pm_strspn_hexadecimal_number_validate(parser, parser->current.end);
|
8449
8769
|
} else {
|
8770
|
+
match(parser, '_');
|
8450
8771
|
pm_parser_err_current(parser, PM_ERR_INVALID_NUMBER_HEXADECIMAL);
|
8451
8772
|
}
|
8452
8773
|
|
@@ -8567,7 +8888,7 @@ lex_global_variable(pm_parser_t *parser) {
|
|
8567
8888
|
} while (parser->current.end < parser->end && (width = char_is_identifier(parser, parser->current.end)) > 0);
|
8568
8889
|
|
8569
8890
|
// $0 isn't allowed to be followed by anything.
|
8570
|
-
pm_diagnostic_id_t diag_id = parser->version ==
|
8891
|
+
pm_diagnostic_id_t diag_id = parser->version == PM_OPTIONS_VERSION_CRUBY_3_3 ? PM_ERR_INVALID_VARIABLE_GLOBAL_3_3 : PM_ERR_INVALID_VARIABLE_GLOBAL;
|
8571
8892
|
PM_PARSER_ERR_TOKEN_FORMAT_CONTENT(parser, parser->current, diag_id);
|
8572
8893
|
}
|
8573
8894
|
|
@@ -8603,7 +8924,7 @@ lex_global_variable(pm_parser_t *parser) {
|
|
8603
8924
|
} else {
|
8604
8925
|
// If we get here, then we have a $ followed by something that
|
8605
8926
|
// isn't recognized as a global variable.
|
8606
|
-
pm_diagnostic_id_t diag_id = parser->version ==
|
8927
|
+
pm_diagnostic_id_t diag_id = parser->version == PM_OPTIONS_VERSION_CRUBY_3_3 ? PM_ERR_INVALID_VARIABLE_GLOBAL_3_3 : PM_ERR_INVALID_VARIABLE_GLOBAL;
|
8607
8928
|
size_t width = parser->encoding->char_width(parser->current.end, parser->end - parser->current.end);
|
8608
8929
|
PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, diag_id, (int) ((parser->current.end + width) - parser->current.start), (const char *) parser->current.start);
|
8609
8930
|
}
|
@@ -9241,22 +9562,7 @@ escape_read(pm_parser_t *parser, pm_buffer_t *buffer, pm_buffer_t *regular_expre
|
|
9241
9562
|
const uint8_t *start = parser->current.end - 1;
|
9242
9563
|
parser->current.end++;
|
9243
9564
|
|
9244
|
-
if (
|
9245
|
-
(parser->current.end + 4 <= parser->end) &&
|
9246
|
-
pm_char_is_hexadecimal_digit(parser->current.end[0]) &&
|
9247
|
-
pm_char_is_hexadecimal_digit(parser->current.end[1]) &&
|
9248
|
-
pm_char_is_hexadecimal_digit(parser->current.end[2]) &&
|
9249
|
-
pm_char_is_hexadecimal_digit(parser->current.end[3])
|
9250
|
-
) {
|
9251
|
-
uint32_t value = escape_unicode(parser->current.end, 4);
|
9252
|
-
|
9253
|
-
if (flags & PM_ESCAPE_FLAG_REGEXP) {
|
9254
|
-
pm_buffer_append_bytes(regular_expression_buffer, start, (size_t) (parser->current.end + 4 - start));
|
9255
|
-
}
|
9256
|
-
escape_write_unicode(parser, buffer, flags, start, parser->current.end + 4, value);
|
9257
|
-
|
9258
|
-
parser->current.end += 4;
|
9259
|
-
} else if (peek(parser) == '{') {
|
9565
|
+
if (peek(parser) == '{') {
|
9260
9566
|
const uint8_t *unicode_codepoints_start = parser->current.end - 2;
|
9261
9567
|
|
9262
9568
|
parser->current.end++;
|
@@ -9306,7 +9612,21 @@ escape_read(pm_parser_t *parser, pm_buffer_t *buffer, pm_buffer_t *regular_expre
|
|
9306
9612
|
pm_buffer_append_bytes(regular_expression_buffer, unicode_codepoints_start, (size_t) (parser->current.end - unicode_codepoints_start));
|
9307
9613
|
}
|
9308
9614
|
} else {
|
9309
|
-
|
9615
|
+
size_t length = pm_strspn_hexadecimal_digit(parser->current.end, MIN(parser->end - parser->current.end, 4));
|
9616
|
+
|
9617
|
+
if (length == 4) {
|
9618
|
+
uint32_t value = escape_unicode(parser->current.end, 4);
|
9619
|
+
|
9620
|
+
if (flags & PM_ESCAPE_FLAG_REGEXP) {
|
9621
|
+
pm_buffer_append_bytes(regular_expression_buffer, start, (size_t) (parser->current.end + 4 - start));
|
9622
|
+
}
|
9623
|
+
|
9624
|
+
escape_write_unicode(parser, buffer, flags, start, parser->current.end + 4, value);
|
9625
|
+
parser->current.end += 4;
|
9626
|
+
} else {
|
9627
|
+
parser->current.end += length;
|
9628
|
+
pm_parser_err_current(parser, PM_ERR_ESCAPE_INVALID_UNICODE);
|
9629
|
+
}
|
9310
9630
|
}
|
9311
9631
|
|
9312
9632
|
return;
|
@@ -9560,8 +9880,8 @@ lex_at_variable(pm_parser_t *parser) {
|
|
9560
9880
|
}
|
9561
9881
|
} else if (parser->current.end < parser->end && pm_char_is_decimal_digit(*parser->current.end)) {
|
9562
9882
|
pm_diagnostic_id_t diag_id = (type == PM_TOKEN_CLASS_VARIABLE) ? PM_ERR_INCOMPLETE_VARIABLE_CLASS : PM_ERR_INCOMPLETE_VARIABLE_INSTANCE;
|
9563
|
-
if (parser->version ==
|
9564
|
-
diag_id = (type == PM_TOKEN_CLASS_VARIABLE) ?
|
9883
|
+
if (parser->version == PM_OPTIONS_VERSION_CRUBY_3_3) {
|
9884
|
+
diag_id = (type == PM_TOKEN_CLASS_VARIABLE) ? PM_ERR_INCOMPLETE_VARIABLE_CLASS_3_3 : PM_ERR_INCOMPLETE_VARIABLE_INSTANCE_3_3;
|
9565
9885
|
}
|
9566
9886
|
|
9567
9887
|
size_t width = parser->encoding->char_width(parser->current.end, parser->end - parser->current.end);
|
@@ -10545,8 +10865,11 @@ parser_lex(pm_parser_t *parser) {
|
|
10545
10865
|
}
|
10546
10866
|
|
10547
10867
|
size_t ident_length = (size_t) (parser->current.end - ident_start);
|
10868
|
+
bool ident_error = false;
|
10869
|
+
|
10548
10870
|
if (quote != PM_HEREDOC_QUOTE_NONE && !match(parser, (uint8_t) quote)) {
|
10549
|
-
|
10871
|
+
pm_parser_err(parser, ident_start, ident_start + ident_length, PM_ERR_HEREDOC_IDENTIFIER);
|
10872
|
+
ident_error = true;
|
10550
10873
|
}
|
10551
10874
|
|
10552
10875
|
parser->explicit_encoding = NULL;
|
@@ -10571,7 +10894,7 @@ parser_lex(pm_parser_t *parser) {
|
|
10571
10894
|
// this is not a valid heredoc declaration. In this case we
|
10572
10895
|
// will add an error, but we will still return a heredoc
|
10573
10896
|
// start.
|
10574
|
-
|
10897
|
+
if (!ident_error) pm_parser_err_heredoc_term(parser, parser->lex_modes.current);
|
10575
10898
|
body_start = parser->end;
|
10576
10899
|
} else {
|
10577
10900
|
// Otherwise, we want to indicate that the body of the
|
@@ -11898,7 +12221,7 @@ parser_lex(pm_parser_t *parser) {
|
|
11898
12221
|
// terminator) but still continue parsing so that content after the
|
11899
12222
|
// declaration of the heredoc can be parsed.
|
11900
12223
|
if (parser->current.end >= parser->end) {
|
11901
|
-
|
12224
|
+
pm_parser_err_heredoc_term(parser, lex_mode);
|
11902
12225
|
parser->next_start = lex_mode->as.heredoc.next_start;
|
11903
12226
|
parser->heredoc_end = parser->current.end;
|
11904
12227
|
lex_state_set(parser, PM_LEX_STATE_END);
|
@@ -12537,6 +12860,23 @@ expect3(pm_parser_t *parser, pm_token_type_t type1, pm_token_type_t type2, pm_to
|
|
12537
12860
|
parser->previous.type = PM_TOKEN_MISSING;
|
12538
12861
|
}
|
12539
12862
|
|
12863
|
+
/**
|
12864
|
+
* A special expect1 that expects a heredoc terminator and handles popping the
|
12865
|
+
* lex mode accordingly.
|
12866
|
+
*/
|
12867
|
+
static void
|
12868
|
+
expect1_heredoc_term(pm_parser_t *parser, pm_lex_mode_t *lex_mode) {
|
12869
|
+
if (match1(parser, PM_TOKEN_HEREDOC_END)) {
|
12870
|
+
lex_mode_pop(parser);
|
12871
|
+
parser_lex(parser);
|
12872
|
+
} else {
|
12873
|
+
pm_parser_err_heredoc_term(parser, lex_mode);
|
12874
|
+
lex_mode_pop(parser);
|
12875
|
+
parser->previous.start = parser->previous.end;
|
12876
|
+
parser->previous.type = PM_TOKEN_MISSING;
|
12877
|
+
}
|
12878
|
+
}
|
12879
|
+
|
12540
12880
|
static pm_node_t *
|
12541
12881
|
parse_expression(pm_parser_t *parser, pm_binding_power_t binding_power, bool accepts_command_call, pm_diagnostic_id_t diag_id);
|
12542
12882
|
|
@@ -12664,25 +13004,72 @@ parse_write_name(pm_parser_t *parser, pm_constant_id_t *name_field) {
|
|
12664
13004
|
*name_field = pm_constant_pool_insert_owned(&parser->constant_pool, name, length + 1);
|
12665
13005
|
}
|
12666
13006
|
|
13007
|
+
/**
|
13008
|
+
* Certain expressions are not targetable, but in order to provide a better
|
13009
|
+
* experience we give a specific error message. In order to maintain as much
|
13010
|
+
* information in the tree as possible, we replace them with local variable
|
13011
|
+
* writes.
|
13012
|
+
*/
|
13013
|
+
static pm_node_t *
|
13014
|
+
parse_unwriteable_target(pm_parser_t *parser, pm_node_t *target) {
|
13015
|
+
switch (PM_NODE_TYPE(target)) {
|
13016
|
+
case PM_SOURCE_ENCODING_NODE: pm_parser_err_node(parser, target, PM_ERR_EXPRESSION_NOT_WRITABLE_ENCODING); break;
|
13017
|
+
case PM_FALSE_NODE: pm_parser_err_node(parser, target, PM_ERR_EXPRESSION_NOT_WRITABLE_FALSE); break;
|
13018
|
+
case PM_SOURCE_FILE_NODE: pm_parser_err_node(parser, target, PM_ERR_EXPRESSION_NOT_WRITABLE_FILE); break;
|
13019
|
+
case PM_SOURCE_LINE_NODE: pm_parser_err_node(parser, target, PM_ERR_EXPRESSION_NOT_WRITABLE_LINE); break;
|
13020
|
+
case PM_NIL_NODE: pm_parser_err_node(parser, target, PM_ERR_EXPRESSION_NOT_WRITABLE_NIL); break;
|
13021
|
+
case PM_SELF_NODE: pm_parser_err_node(parser, target, PM_ERR_EXPRESSION_NOT_WRITABLE_SELF); break;
|
13022
|
+
case PM_TRUE_NODE: pm_parser_err_node(parser, target, PM_ERR_EXPRESSION_NOT_WRITABLE_TRUE); break;
|
13023
|
+
default: break;
|
13024
|
+
}
|
13025
|
+
|
13026
|
+
pm_constant_id_t name = pm_parser_constant_id_location(parser, target->location.start, target->location.end);
|
13027
|
+
pm_local_variable_target_node_t *result = pm_local_variable_target_node_create(parser, &target->location, name, 0);
|
13028
|
+
|
13029
|
+
pm_node_destroy(parser, target);
|
13030
|
+
return (pm_node_t *) result;
|
13031
|
+
}
|
13032
|
+
|
12667
13033
|
/**
|
12668
13034
|
* Convert the given node into a valid target node.
|
12669
13035
|
*/
|
12670
13036
|
static pm_node_t *
|
12671
|
-
parse_target(pm_parser_t *parser, pm_node_t *target) {
|
13037
|
+
parse_target(pm_parser_t *parser, pm_node_t *target, bool multiple) {
|
12672
13038
|
switch (PM_NODE_TYPE(target)) {
|
12673
13039
|
case PM_MISSING_NODE:
|
12674
13040
|
return target;
|
13041
|
+
case PM_SOURCE_ENCODING_NODE:
|
13042
|
+
case PM_FALSE_NODE:
|
13043
|
+
case PM_SOURCE_FILE_NODE:
|
13044
|
+
case PM_SOURCE_LINE_NODE:
|
13045
|
+
case PM_NIL_NODE:
|
13046
|
+
case PM_SELF_NODE:
|
13047
|
+
case PM_TRUE_NODE: {
|
13048
|
+
// In these special cases, we have specific error messages and we
|
13049
|
+
// will replace them with local variable writes.
|
13050
|
+
return parse_unwriteable_target(parser, target);
|
13051
|
+
}
|
12675
13052
|
case PM_CLASS_VARIABLE_READ_NODE:
|
12676
13053
|
assert(sizeof(pm_class_variable_target_node_t) == sizeof(pm_class_variable_read_node_t));
|
12677
13054
|
target->type = PM_CLASS_VARIABLE_TARGET_NODE;
|
12678
13055
|
return target;
|
12679
13056
|
case PM_CONSTANT_PATH_NODE:
|
13057
|
+
if (context_def_p(parser)) {
|
13058
|
+
pm_parser_err_node(parser, target, PM_ERR_WRITE_TARGET_IN_METHOD);
|
13059
|
+
}
|
13060
|
+
|
12680
13061
|
assert(sizeof(pm_constant_path_target_node_t) == sizeof(pm_constant_path_node_t));
|
12681
13062
|
target->type = PM_CONSTANT_PATH_TARGET_NODE;
|
13063
|
+
|
12682
13064
|
return target;
|
12683
13065
|
case PM_CONSTANT_READ_NODE:
|
13066
|
+
if (context_def_p(parser)) {
|
13067
|
+
pm_parser_err_node(parser, target, PM_ERR_WRITE_TARGET_IN_METHOD);
|
13068
|
+
}
|
13069
|
+
|
12684
13070
|
assert(sizeof(pm_constant_target_node_t) == sizeof(pm_constant_read_node_t));
|
12685
13071
|
target->type = PM_CONSTANT_TARGET_NODE;
|
13072
|
+
|
12686
13073
|
return target;
|
12687
13074
|
case PM_BACK_REFERENCE_READ_NODE:
|
12688
13075
|
case PM_NUMBERED_REFERENCE_READ_NODE:
|
@@ -12715,7 +13102,7 @@ parse_target(pm_parser_t *parser, pm_node_t *target) {
|
|
12715
13102
|
pm_splat_node_t *splat = (pm_splat_node_t *) target;
|
12716
13103
|
|
12717
13104
|
if (splat->expression != NULL) {
|
12718
|
-
splat->expression = parse_target(parser, splat->expression);
|
13105
|
+
splat->expression = parse_target(parser, splat->expression, multiple);
|
12719
13106
|
}
|
12720
13107
|
|
12721
13108
|
return (pm_node_t *) splat;
|
@@ -12753,6 +13140,10 @@ parse_target(pm_parser_t *parser, pm_node_t *target) {
|
|
12753
13140
|
}
|
12754
13141
|
|
12755
13142
|
if (*call->message_loc.start == '_' || parser->encoding->alnum_char(call->message_loc.start, call->message_loc.end - call->message_loc.start)) {
|
13143
|
+
if (multiple && PM_NODE_FLAG_P(call, PM_CALL_NODE_FLAGS_SAFE_NAVIGATION)) {
|
13144
|
+
pm_parser_err_node(parser, (const pm_node_t *) call, PM_ERR_UNEXPECTED_SAFE_NAVIGATION);
|
13145
|
+
}
|
13146
|
+
|
12756
13147
|
parse_write_name(parser, &call->name);
|
12757
13148
|
return (pm_node_t *) pm_call_target_node_create(parser, call);
|
12758
13149
|
}
|
@@ -12780,8 +13171,8 @@ parse_target(pm_parser_t *parser, pm_node_t *target) {
|
|
12780
13171
|
* assignment.
|
12781
13172
|
*/
|
12782
13173
|
static pm_node_t *
|
12783
|
-
parse_target_validate(pm_parser_t *parser, pm_node_t *target) {
|
12784
|
-
pm_node_t *result = parse_target(parser, target);
|
13174
|
+
parse_target_validate(pm_parser_t *parser, pm_node_t *target, bool multiple) {
|
13175
|
+
pm_node_t *result = parse_target(parser, target, multiple);
|
12785
13176
|
|
12786
13177
|
// Ensure that we have one of an =, an 'in' in for indexes, and a ')' in parens after the targets.
|
12787
13178
|
if (
|
@@ -12826,13 +13217,20 @@ parse_write(pm_parser_t *parser, pm_node_t *target, pm_token_t *operator, pm_nod
|
|
12826
13217
|
}
|
12827
13218
|
case PM_CONSTANT_PATH_NODE: {
|
12828
13219
|
pm_node_t *node = (pm_node_t *) pm_constant_path_write_node_create(parser, (pm_constant_path_node_t *) target, operator, value);
|
13220
|
+
|
13221
|
+
if (context_def_p(parser)) {
|
13222
|
+
pm_parser_err_node(parser, node, PM_ERR_WRITE_TARGET_IN_METHOD);
|
13223
|
+
}
|
13224
|
+
|
12829
13225
|
return parse_shareable_constant_write(parser, node);
|
12830
13226
|
}
|
12831
13227
|
case PM_CONSTANT_READ_NODE: {
|
12832
13228
|
pm_node_t *node = (pm_node_t *) pm_constant_write_node_create(parser, (pm_constant_read_node_t *) target, operator, value);
|
13229
|
+
|
12833
13230
|
if (context_def_p(parser)) {
|
12834
13231
|
pm_parser_err_node(parser, node, PM_ERR_WRITE_TARGET_IN_METHOD);
|
12835
13232
|
}
|
13233
|
+
|
12836
13234
|
pm_node_destroy(parser, target);
|
12837
13235
|
return parse_shareable_constant_write(parser, node);
|
12838
13236
|
}
|
@@ -13011,7 +13409,7 @@ parse_targets(pm_parser_t *parser, pm_node_t *first_target, pm_binding_power_t b
|
|
13011
13409
|
bool has_rest = PM_NODE_TYPE_P(first_target, PM_SPLAT_NODE);
|
13012
13410
|
|
13013
13411
|
pm_multi_target_node_t *result = pm_multi_target_node_create(parser);
|
13014
|
-
pm_multi_target_node_targets_append(parser, result, parse_target(parser, first_target));
|
13412
|
+
pm_multi_target_node_targets_append(parser, result, parse_target(parser, first_target, true));
|
13015
13413
|
|
13016
13414
|
while (accept1(parser, PM_TOKEN_COMMA)) {
|
13017
13415
|
if (accept1(parser, PM_TOKEN_USTAR)) {
|
@@ -13027,7 +13425,7 @@ parse_targets(pm_parser_t *parser, pm_node_t *first_target, pm_binding_power_t b
|
|
13027
13425
|
|
13028
13426
|
if (token_begins_expression_p(parser->current.type)) {
|
13029
13427
|
name = parse_expression(parser, binding_power, false, PM_ERR_EXPECT_EXPRESSION_AFTER_STAR);
|
13030
|
-
name = parse_target(parser, name);
|
13428
|
+
name = parse_target(parser, name, true);
|
13031
13429
|
}
|
13032
13430
|
|
13033
13431
|
pm_node_t *splat = (pm_node_t *) pm_splat_node_create(parser, &star_operator, name);
|
@@ -13035,7 +13433,7 @@ parse_targets(pm_parser_t *parser, pm_node_t *first_target, pm_binding_power_t b
|
|
13035
13433
|
has_rest = true;
|
13036
13434
|
} else if (token_begins_expression_p(parser->current.type)) {
|
13037
13435
|
pm_node_t *target = parse_expression(parser, binding_power, false, PM_ERR_EXPECT_EXPRESSION_AFTER_COMMA);
|
13038
|
-
target = parse_target(parser, target);
|
13436
|
+
target = parse_target(parser, target, true);
|
13039
13437
|
|
13040
13438
|
pm_multi_target_node_targets_append(parser, result, target);
|
13041
13439
|
} else if (!match1(parser, PM_TOKEN_EOF)) {
|
@@ -13152,11 +13550,11 @@ parse_statements(pm_parser_t *parser, pm_context_t context) {
|
|
13152
13550
|
*/
|
13153
13551
|
static void
|
13154
13552
|
pm_hash_key_static_literals_add(pm_parser_t *parser, pm_static_literals_t *literals, pm_node_t *node) {
|
13155
|
-
const pm_node_t *duplicated = pm_static_literals_add(parser, literals, node);
|
13553
|
+
const pm_node_t *duplicated = pm_static_literals_add(&parser->newline_list, parser->start_line, literals, node);
|
13156
13554
|
|
13157
13555
|
if (duplicated != NULL) {
|
13158
13556
|
pm_buffer_t buffer = { 0 };
|
13159
|
-
pm_static_literal_inspect(&buffer, parser, duplicated);
|
13557
|
+
pm_static_literal_inspect(&buffer, &parser->newline_list, parser->start_line, parser->encoding->name, duplicated);
|
13160
13558
|
|
13161
13559
|
pm_diagnostic_list_append_format(
|
13162
13560
|
&parser->warning_list,
|
@@ -13178,7 +13576,7 @@ pm_hash_key_static_literals_add(pm_parser_t *parser, pm_static_literals_t *liter
|
|
13178
13576
|
*/
|
13179
13577
|
static void
|
13180
13578
|
pm_when_clause_static_literals_add(pm_parser_t *parser, pm_static_literals_t *literals, pm_node_t *node) {
|
13181
|
-
if (pm_static_literals_add(parser, literals, node) != NULL) {
|
13579
|
+
if (pm_static_literals_add(&parser->newline_list, parser->start_line, literals, node) != NULL) {
|
13182
13580
|
pm_diagnostic_list_append_format(
|
13183
13581
|
&parser->warning_list,
|
13184
13582
|
node->location.start,
|
@@ -13206,10 +13604,16 @@ parse_assocs(pm_parser_t *parser, pm_static_literals_t *literals, pm_node_t *nod
|
|
13206
13604
|
pm_token_t operator = parser->previous;
|
13207
13605
|
pm_node_t *value = NULL;
|
13208
13606
|
|
13209
|
-
if (
|
13607
|
+
if (match1(parser, PM_TOKEN_BRACE_LEFT)) {
|
13608
|
+
// If we're about to parse a nested hash that is being
|
13609
|
+
// pushed into this hash directly with **, then we want the
|
13610
|
+
// inner hash to share the static literals with the outer
|
13611
|
+
// hash.
|
13612
|
+
parser->current_hash_keys = literals;
|
13210
13613
|
value = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, false, PM_ERR_EXPECT_EXPRESSION_AFTER_SPLAT_HASH);
|
13211
|
-
}
|
13212
|
-
|
13614
|
+
} else if (token_begins_expression_p(parser->current.type)) {
|
13615
|
+
value = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, false, PM_ERR_EXPECT_EXPRESSION_AFTER_SPLAT_HASH);
|
13616
|
+
} else {
|
13213
13617
|
pm_parser_scope_forwarding_keywords_check(parser, &operator);
|
13214
13618
|
}
|
13215
13619
|
|
@@ -13234,9 +13638,15 @@ parse_assocs(pm_parser_t *parser, pm_static_literals_t *literals, pm_node_t *nod
|
|
13234
13638
|
pm_token_t constant = { .type = PM_TOKEN_CONSTANT, .start = label.start, .end = label.end - 1 };
|
13235
13639
|
value = (pm_node_t *) pm_constant_read_node_create(parser, &constant);
|
13236
13640
|
} else {
|
13237
|
-
int depth =
|
13641
|
+
int depth = -1;
|
13238
13642
|
pm_token_t identifier = { .type = PM_TOKEN_IDENTIFIER, .start = label.start, .end = label.end - 1 };
|
13239
13643
|
|
13644
|
+
if (identifier.end[-1] == '!' || identifier.end[-1] == '?') {
|
13645
|
+
PM_PARSER_ERR_TOKEN_FORMAT_CONTENT(parser, identifier, PM_ERR_INVALID_LOCAL_VARIABLE_READ);
|
13646
|
+
} else {
|
13647
|
+
depth = pm_parser_local_depth(parser, &identifier);
|
13648
|
+
}
|
13649
|
+
|
13240
13650
|
if (depth == -1) {
|
13241
13651
|
value = (pm_node_t *) pm_call_node_variable_call_create(parser, &identifier);
|
13242
13652
|
} else {
|
@@ -13354,15 +13764,16 @@ parse_arguments(pm_parser_t *parser, pm_arguments_t *arguments, bool accepts_for
|
|
13354
13764
|
pm_keyword_hash_node_t *hash = pm_keyword_hash_node_create(parser);
|
13355
13765
|
argument = (pm_node_t *) hash;
|
13356
13766
|
|
13357
|
-
pm_static_literals_t
|
13358
|
-
bool contains_keyword_splat = parse_assocs(parser, &
|
13767
|
+
pm_static_literals_t hash_keys = { 0 };
|
13768
|
+
bool contains_keyword_splat = parse_assocs(parser, &hash_keys, (pm_node_t *) hash);
|
13359
13769
|
|
13360
13770
|
parse_arguments_append(parser, arguments, argument);
|
13361
|
-
if (contains_keyword_splat) {
|
13362
|
-
pm_node_flag_set((pm_node_t *)arguments->arguments, PM_ARGUMENTS_NODE_FLAGS_CONTAINS_KEYWORD_SPLAT);
|
13363
|
-
}
|
13364
13771
|
|
13365
|
-
|
13772
|
+
pm_node_flags_t flags = PM_ARGUMENTS_NODE_FLAGS_CONTAINS_KEYWORDS;
|
13773
|
+
if (contains_keyword_splat) flags |= PM_ARGUMENTS_NODE_FLAGS_CONTAINS_KEYWORD_SPLAT;
|
13774
|
+
pm_node_flag_set((pm_node_t *) arguments->arguments, flags);
|
13775
|
+
|
13776
|
+
pm_static_literals_free(&hash_keys);
|
13366
13777
|
parsed_bare_hash = true;
|
13367
13778
|
|
13368
13779
|
break;
|
@@ -13438,7 +13849,9 @@ parse_arguments(pm_parser_t *parser, pm_arguments_t *arguments, bool accepts_for
|
|
13438
13849
|
argument = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, !parsed_first_argument, PM_ERR_EXPECT_ARGUMENT);
|
13439
13850
|
}
|
13440
13851
|
|
13852
|
+
bool contains_keywords = false;
|
13441
13853
|
bool contains_keyword_splat = false;
|
13854
|
+
|
13442
13855
|
if (pm_symbol_node_label_p(argument) || accept1(parser, PM_TOKEN_EQUAL_GREATER)) {
|
13443
13856
|
if (parsed_bare_hash) {
|
13444
13857
|
pm_parser_err_previous(parser, PM_ERR_ARGUMENT_BARE_HASH);
|
@@ -13452,10 +13865,11 @@ parse_arguments(pm_parser_t *parser, pm_arguments_t *arguments, bool accepts_for
|
|
13452
13865
|
}
|
13453
13866
|
|
13454
13867
|
pm_keyword_hash_node_t *bare_hash = pm_keyword_hash_node_create(parser);
|
13868
|
+
contains_keywords = true;
|
13455
13869
|
|
13456
13870
|
// Create the set of static literals for this hash.
|
13457
|
-
pm_static_literals_t
|
13458
|
-
pm_hash_key_static_literals_add(parser, &
|
13871
|
+
pm_static_literals_t hash_keys = { 0 };
|
13872
|
+
pm_hash_key_static_literals_add(parser, &hash_keys, argument);
|
13459
13873
|
|
13460
13874
|
// Finish parsing the one we are part way through.
|
13461
13875
|
pm_node_t *value = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, false, PM_ERR_HASH_VALUE);
|
@@ -13469,10 +13883,10 @@ parse_arguments(pm_parser_t *parser, pm_arguments_t *arguments, bool accepts_for
|
|
13469
13883
|
token_begins_expression_p(parser->current.type) ||
|
13470
13884
|
match2(parser, PM_TOKEN_USTAR_STAR, PM_TOKEN_LABEL)
|
13471
13885
|
)) {
|
13472
|
-
contains_keyword_splat = parse_assocs(parser, &
|
13886
|
+
contains_keyword_splat = parse_assocs(parser, &hash_keys, (pm_node_t *) bare_hash);
|
13473
13887
|
}
|
13474
13888
|
|
13475
|
-
pm_static_literals_free(&
|
13889
|
+
pm_static_literals_free(&hash_keys);
|
13476
13890
|
parsed_bare_hash = true;
|
13477
13891
|
} else if (accept1(parser, PM_TOKEN_KEYWORD_IN)) {
|
13478
13892
|
// TODO: Could we solve this with binding powers instead?
|
@@ -13480,9 +13894,12 @@ parse_arguments(pm_parser_t *parser, pm_arguments_t *arguments, bool accepts_for
|
|
13480
13894
|
}
|
13481
13895
|
|
13482
13896
|
parse_arguments_append(parser, arguments, argument);
|
13483
|
-
|
13484
|
-
|
13485
|
-
|
13897
|
+
|
13898
|
+
pm_node_flags_t flags = 0;
|
13899
|
+
if (contains_keywords) flags |= PM_ARGUMENTS_NODE_FLAGS_CONTAINS_KEYWORDS;
|
13900
|
+
if (contains_keyword_splat) flags |= PM_ARGUMENTS_NODE_FLAGS_CONTAINS_KEYWORD_SPLAT;
|
13901
|
+
pm_node_flag_set((pm_node_t *) arguments->arguments, flags);
|
13902
|
+
|
13486
13903
|
break;
|
13487
13904
|
}
|
13488
13905
|
}
|
@@ -13595,7 +14012,6 @@ typedef enum {
|
|
13595
14012
|
PM_PARAMETERS_ORDER_OPTIONAL,
|
13596
14013
|
PM_PARAMETERS_ORDER_NAMED,
|
13597
14014
|
PM_PARAMETERS_ORDER_NONE,
|
13598
|
-
|
13599
14015
|
} pm_parameters_order_t;
|
13600
14016
|
|
13601
14017
|
/**
|
@@ -13903,6 +14319,7 @@ parse_parameters(
|
|
13903
14319
|
pm_token_t operator = parser->previous;
|
13904
14320
|
pm_token_t name;
|
13905
14321
|
bool repeated = false;
|
14322
|
+
|
13906
14323
|
if (accept1(parser, PM_TOKEN_IDENTIFIER)) {
|
13907
14324
|
name = parser->previous;
|
13908
14325
|
repeated = pm_parser_parameter_name_check(parser, &name);
|
@@ -13916,6 +14333,7 @@ parse_parameters(
|
|
13916
14333
|
if (repeated) {
|
13917
14334
|
pm_node_flag_set_repeated_parameter(param);
|
13918
14335
|
}
|
14336
|
+
|
13919
14337
|
if (params->rest == NULL) {
|
13920
14338
|
pm_parameters_node_rest_set(params, param);
|
13921
14339
|
} else {
|
@@ -13927,6 +14345,7 @@ parse_parameters(
|
|
13927
14345
|
}
|
13928
14346
|
case PM_TOKEN_STAR_STAR:
|
13929
14347
|
case PM_TOKEN_USTAR_STAR: {
|
14348
|
+
pm_parameters_order_t previous_order = order;
|
13930
14349
|
update_parameter_state(parser, &parser->current, &order);
|
13931
14350
|
parser_lex(parser);
|
13932
14351
|
|
@@ -13934,6 +14353,10 @@ parse_parameters(
|
|
13934
14353
|
pm_node_t *param;
|
13935
14354
|
|
13936
14355
|
if (accept1(parser, PM_TOKEN_KEYWORD_NIL)) {
|
14356
|
+
if (previous_order <= PM_PARAMETERS_ORDER_KEYWORDS) {
|
14357
|
+
pm_parser_err_previous(parser, PM_ERR_PARAMETER_UNEXPECTED_NO_KW);
|
14358
|
+
}
|
14359
|
+
|
13937
14360
|
param = (pm_node_t *) pm_no_keywords_parameter_node_create(parser, &operator, &parser->previous);
|
13938
14361
|
} else {
|
13939
14362
|
pm_token_t name;
|
@@ -14031,7 +14454,7 @@ parse_rescues(pm_parser_t *parser, pm_begin_node_t *parent_node, pm_rescues_type
|
|
14031
14454
|
pm_rescue_node_operator_set(rescue, &parser->previous);
|
14032
14455
|
|
14033
14456
|
pm_node_t *reference = parse_expression(parser, PM_BINDING_POWER_INDEX, false, PM_ERR_RESCUE_VARIABLE);
|
14034
|
-
reference = parse_target(parser, reference);
|
14457
|
+
reference = parse_target(parser, reference, false);
|
14035
14458
|
|
14036
14459
|
pm_rescue_node_reference_set(rescue, reference);
|
14037
14460
|
break;
|
@@ -14061,7 +14484,7 @@ parse_rescues(pm_parser_t *parser, pm_begin_node_t *parent_node, pm_rescues_type
|
|
14061
14484
|
pm_rescue_node_operator_set(rescue, &parser->previous);
|
14062
14485
|
|
14063
14486
|
pm_node_t *reference = parse_expression(parser, PM_BINDING_POWER_INDEX, false, PM_ERR_RESCUE_VARIABLE);
|
14064
|
-
reference = parse_target(parser, reference);
|
14487
|
+
reference = parse_target(parser, reference, false);
|
14065
14488
|
|
14066
14489
|
pm_rescue_node_reference_set(rescue, reference);
|
14067
14490
|
break;
|
@@ -15030,7 +15453,7 @@ parse_symbol(pm_parser_t *parser, pm_lex_mode_t *lex_mode, pm_lex_state_t next_s
|
|
15030
15453
|
pm_symbol_node_t *symbol = pm_symbol_node_create(parser, &opening, &parser->previous, &closing);
|
15031
15454
|
|
15032
15455
|
pm_string_shared_init(&symbol->unescaped, parser->previous.start, parser->previous.end);
|
15033
|
-
pm_node_flag_set((pm_node_t *) symbol, parse_symbol_encoding(parser, &symbol->unescaped));
|
15456
|
+
pm_node_flag_set((pm_node_t *) symbol, parse_symbol_encoding(parser, &parser->previous, &symbol->unescaped, false));
|
15034
15457
|
|
15035
15458
|
return (pm_node_t *) symbol;
|
15036
15459
|
}
|
@@ -15130,7 +15553,7 @@ parse_symbol(pm_parser_t *parser, pm_lex_mode_t *lex_mode, pm_lex_state_t next_s
|
|
15130
15553
|
expect1(parser, PM_TOKEN_STRING_END, PM_ERR_SYMBOL_TERM_DYNAMIC);
|
15131
15554
|
}
|
15132
15555
|
|
15133
|
-
return (pm_node_t *) pm_symbol_node_create_unescaped(parser, &opening, &content, &parser->previous, &unescaped, parse_symbol_encoding(parser, &unescaped));
|
15556
|
+
return (pm_node_t *) pm_symbol_node_create_unescaped(parser, &opening, &content, &parser->previous, &unescaped, parse_symbol_encoding(parser, &content, &unescaped, false));
|
15134
15557
|
}
|
15135
15558
|
|
15136
15559
|
/**
|
@@ -15155,7 +15578,7 @@ parse_undef_argument(pm_parser_t *parser) {
|
|
15155
15578
|
pm_symbol_node_t *symbol = pm_symbol_node_create(parser, &opening, &parser->previous, &closing);
|
15156
15579
|
|
15157
15580
|
pm_string_shared_init(&symbol->unescaped, parser->previous.start, parser->previous.end);
|
15158
|
-
pm_node_flag_set((pm_node_t *) symbol, parse_symbol_encoding(parser, &symbol->unescaped));
|
15581
|
+
pm_node_flag_set((pm_node_t *) symbol, parse_symbol_encoding(parser, &parser->previous, &symbol->unescaped, false));
|
15159
15582
|
|
15160
15583
|
return (pm_node_t *) symbol;
|
15161
15584
|
}
|
@@ -15196,7 +15619,7 @@ parse_alias_argument(pm_parser_t *parser, bool first) {
|
|
15196
15619
|
pm_symbol_node_t *symbol = pm_symbol_node_create(parser, &opening, &parser->previous, &closing);
|
15197
15620
|
|
15198
15621
|
pm_string_shared_init(&symbol->unescaped, parser->previous.start, parser->previous.end);
|
15199
|
-
pm_node_flag_set((pm_node_t *) symbol, parse_symbol_encoding(parser, &symbol->unescaped));
|
15622
|
+
pm_node_flag_set((pm_node_t *) symbol, parse_symbol_encoding(parser, &parser->previous, &symbol->unescaped, false));
|
15200
15623
|
|
15201
15624
|
return (pm_node_t *) symbol;
|
15202
15625
|
}
|
@@ -15453,9 +15876,7 @@ parse_pattern_constant_path(pm_parser_t *parser, pm_constant_id_list_t *captures
|
|
15453
15876
|
while (accept1(parser, PM_TOKEN_COLON_COLON)) {
|
15454
15877
|
pm_token_t delimiter = parser->previous;
|
15455
15878
|
expect1(parser, PM_TOKEN_CONSTANT, PM_ERR_CONSTANT_PATH_COLON_COLON_CONSTANT);
|
15456
|
-
|
15457
|
-
pm_node_t *child = (pm_node_t *) pm_constant_read_node_create(parser, &parser->previous);
|
15458
|
-
node = (pm_node_t *) pm_constant_path_node_create(parser, node, &delimiter, child);
|
15879
|
+
node = (pm_node_t *) pm_constant_path_node_create(parser, node, &delimiter, &parser->previous);
|
15459
15880
|
}
|
15460
15881
|
|
15461
15882
|
// If there is a [ or ( that follows, then this is part of a larger pattern
|
@@ -15643,8 +16064,15 @@ parse_pattern_hash_implicit_value(pm_parser_t *parser, pm_constant_id_list_t *ca
|
|
15643
16064
|
const pm_location_t *value_loc = &((pm_symbol_node_t *) key)->value_loc;
|
15644
16065
|
pm_constant_id_t constant_id = pm_parser_constant_id_location(parser, value_loc->start, value_loc->end);
|
15645
16066
|
|
15646
|
-
int depth;
|
15647
|
-
if (
|
16067
|
+
int depth = -1;
|
16068
|
+
if (value_loc->end[-1] == '!' || value_loc->end[-1] == '?') {
|
16069
|
+
pm_parser_err(parser, key->base.location.start, key->base.location.end, PM_ERR_PATTERN_HASH_KEY_LOCALS);
|
16070
|
+
PM_PARSER_ERR_LOCATION_FORMAT(parser, value_loc, PM_ERR_INVALID_LOCAL_VARIABLE_WRITE, (int) (value_loc->end - value_loc->start), (const char *) value_loc->start);
|
16071
|
+
} else {
|
16072
|
+
depth = pm_parser_local_depth_constant_id(parser, constant_id);
|
16073
|
+
}
|
16074
|
+
|
16075
|
+
if (depth == -1) {
|
15648
16076
|
pm_parser_local_add(parser, constant_id, value_loc->start, value_loc->end, 0);
|
15649
16077
|
}
|
15650
16078
|
|
@@ -15665,7 +16093,7 @@ parse_pattern_hash_implicit_value(pm_parser_t *parser, pm_constant_id_list_t *ca
|
|
15665
16093
|
*/
|
15666
16094
|
static void
|
15667
16095
|
parse_pattern_hash_key(pm_parser_t *parser, pm_static_literals_t *keys, pm_node_t *node) {
|
15668
|
-
if (pm_static_literals_add(parser, keys, node) != NULL) {
|
16096
|
+
if (pm_static_literals_add(&parser->newline_list, parser->start_line, keys, node) != NULL) {
|
15669
16097
|
pm_parser_err_node(parser, node, PM_ERR_PATTERN_HASH_KEY_DUPLICATE);
|
15670
16098
|
}
|
15671
16099
|
}
|
@@ -15953,7 +16381,7 @@ parse_pattern_primitive(pm_parser_t *parser, pm_constant_id_list_t *captures, pm
|
|
15953
16381
|
|
15954
16382
|
if (variable == NULL) {
|
15955
16383
|
if (
|
15956
|
-
(parser->version !=
|
16384
|
+
(parser->version != PM_OPTIONS_VERSION_CRUBY_3_3) &&
|
15957
16385
|
!parser->current_scope->closed &&
|
15958
16386
|
(parser->current_scope->numbered_parameters != PM_SCOPE_NUMBERED_PARAMETERS_DISALLOWED) &&
|
15959
16387
|
pm_token_is_it(parser->previous.start, parser->previous.end)
|
@@ -16027,8 +16455,7 @@ parse_pattern_primitive(pm_parser_t *parser, pm_constant_id_list_t *captures, pm
|
|
16027
16455
|
parser_lex(parser);
|
16028
16456
|
|
16029
16457
|
expect1(parser, PM_TOKEN_CONSTANT, PM_ERR_CONSTANT_PATH_COLON_COLON_CONSTANT);
|
16030
|
-
|
16031
|
-
pm_constant_path_node_t *node = pm_constant_path_node_create(parser, NULL, &delimiter, child);
|
16458
|
+
pm_constant_path_node_t *node = pm_constant_path_node_create(parser, NULL, &delimiter, &parser->previous);
|
16032
16459
|
|
16033
16460
|
return parse_pattern_constant_path(parser, captures, (pm_node_t *) node);
|
16034
16461
|
}
|
@@ -16354,7 +16781,7 @@ parse_strings(pm_parser_t *parser, pm_node_t *current) {
|
|
16354
16781
|
|
16355
16782
|
pm_node_list_free(&parts);
|
16356
16783
|
} else if (accept1(parser, PM_TOKEN_LABEL_END) && !state_is_arg_labeled) {
|
16357
|
-
node = (pm_node_t *) pm_symbol_node_create_unescaped(parser, &opening, &content, &parser->previous, &unescaped, parse_symbol_encoding(parser, &unescaped));
|
16784
|
+
node = (pm_node_t *) pm_symbol_node_create_unescaped(parser, &opening, &content, &parser->previous, &unescaped, parse_symbol_encoding(parser, &content, &unescaped, true));
|
16358
16785
|
} else if (match1(parser, PM_TOKEN_EOF)) {
|
16359
16786
|
pm_parser_err_token(parser, &opening, PM_ERR_STRING_LITERAL_EOF);
|
16360
16787
|
node = (pm_node_t *) pm_string_node_create_unescaped(parser, &opening, &content, &parser->current, &unescaped);
|
@@ -16380,7 +16807,7 @@ parse_strings(pm_parser_t *parser, pm_node_t *current) {
|
|
16380
16807
|
pm_node_flag_set(node, parse_unescaped_encoding(parser));
|
16381
16808
|
expect1(parser, PM_TOKEN_STRING_END, PM_ERR_STRING_LITERAL_EOF);
|
16382
16809
|
} else if (accept1(parser, PM_TOKEN_LABEL_END)) {
|
16383
|
-
node = (pm_node_t *) pm_symbol_node_create_unescaped(parser, &opening, &content, &parser->previous, &unescaped, parse_symbol_encoding(parser, &unescaped));
|
16810
|
+
node = (pm_node_t *) pm_symbol_node_create_unescaped(parser, &opening, &content, &parser->previous, &unescaped, parse_symbol_encoding(parser, &content, &unescaped, true));
|
16384
16811
|
} else {
|
16385
16812
|
// If we get here, then we have interpolation so we'll need
|
16386
16813
|
// to create a string or symbol node with interpolation.
|
@@ -16462,11 +16889,11 @@ parse_strings(pm_parser_t *parser, pm_node_t *current) {
|
|
16462
16889
|
pm_token_t bounds = not_provided(parser);
|
16463
16890
|
|
16464
16891
|
pm_interpolated_string_node_t *container = pm_interpolated_string_node_create(parser, &bounds, NULL, &bounds);
|
16465
|
-
pm_interpolated_string_node_append(
|
16892
|
+
pm_interpolated_string_node_append(container, current);
|
16466
16893
|
current = (pm_node_t *) container;
|
16467
16894
|
}
|
16468
16895
|
|
16469
|
-
pm_interpolated_string_node_append(
|
16896
|
+
pm_interpolated_string_node_append((pm_interpolated_string_node_t *) current, node);
|
16470
16897
|
}
|
16471
16898
|
}
|
16472
16899
|
|
@@ -16711,13 +17138,13 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
|
|
16711
17138
|
}
|
16712
17139
|
|
16713
17140
|
element = (pm_node_t *) pm_keyword_hash_node_create(parser);
|
16714
|
-
pm_static_literals_t
|
17141
|
+
pm_static_literals_t hash_keys = { 0 };
|
16715
17142
|
|
16716
17143
|
if (!match8(parser, PM_TOKEN_EOF, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON, PM_TOKEN_EOF, PM_TOKEN_BRACE_RIGHT, PM_TOKEN_BRACKET_RIGHT, PM_TOKEN_KEYWORD_DO, PM_TOKEN_PARENTHESIS_RIGHT)) {
|
16717
|
-
parse_assocs(parser, &
|
17144
|
+
parse_assocs(parser, &hash_keys, element);
|
16718
17145
|
}
|
16719
17146
|
|
16720
|
-
pm_static_literals_free(&
|
17147
|
+
pm_static_literals_free(&hash_keys);
|
16721
17148
|
parsed_bare_hash = true;
|
16722
17149
|
} else {
|
16723
17150
|
element = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, false, PM_ERR_ARRAY_EXPRESSION);
|
@@ -16728,8 +17155,8 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
|
|
16728
17155
|
}
|
16729
17156
|
|
16730
17157
|
pm_keyword_hash_node_t *hash = pm_keyword_hash_node_create(parser);
|
16731
|
-
pm_static_literals_t
|
16732
|
-
pm_hash_key_static_literals_add(parser, &
|
17158
|
+
pm_static_literals_t hash_keys = { 0 };
|
17159
|
+
pm_hash_key_static_literals_add(parser, &hash_keys, element);
|
16733
17160
|
|
16734
17161
|
pm_token_t operator;
|
16735
17162
|
if (parser->previous.type == PM_TOKEN_EQUAL_GREATER) {
|
@@ -16744,10 +17171,10 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
|
|
16744
17171
|
|
16745
17172
|
element = (pm_node_t *) hash;
|
16746
17173
|
if (accept1(parser, PM_TOKEN_COMMA) && !match1(parser, PM_TOKEN_BRACKET_RIGHT)) {
|
16747
|
-
parse_assocs(parser, &
|
17174
|
+
parse_assocs(parser, &hash_keys, element);
|
16748
17175
|
}
|
16749
17176
|
|
16750
|
-
pm_static_literals_free(&
|
17177
|
+
pm_static_literals_free(&hash_keys);
|
16751
17178
|
parsed_bare_hash = true;
|
16752
17179
|
}
|
16753
17180
|
}
|
@@ -16841,7 +17268,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
|
|
16841
17268
|
return (pm_node_t *) multi_target;
|
16842
17269
|
}
|
16843
17270
|
|
16844
|
-
return parse_target_validate(parser, (pm_node_t *) multi_target);
|
17271
|
+
return parse_target_validate(parser, (pm_node_t *) multi_target, false);
|
16845
17272
|
}
|
16846
17273
|
|
16847
17274
|
// If we have a single statement and are ending on a right parenthesis
|
@@ -16907,14 +17334,30 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
|
|
16907
17334
|
return (pm_node_t *) pm_parentheses_node_create(parser, &opening, (pm_node_t *) statements, &parser->previous);
|
16908
17335
|
}
|
16909
17336
|
case PM_TOKEN_BRACE_LEFT: {
|
17337
|
+
// If we were passed a current_hash_keys via the parser, then that
|
17338
|
+
// means we're already parsing a hash and we want to share the set
|
17339
|
+
// of hash keys with this inner hash we're about to parse for the
|
17340
|
+
// sake of warnings. We'll set it to NULL after we grab it to make
|
17341
|
+
// sure subsequent expressions don't use it. Effectively this is a
|
17342
|
+
// way of getting around passing it to every call to
|
17343
|
+
// parse_expression.
|
17344
|
+
pm_static_literals_t *current_hash_keys = parser->current_hash_keys;
|
17345
|
+
parser->current_hash_keys = NULL;
|
17346
|
+
|
16910
17347
|
pm_accepts_block_stack_push(parser, true);
|
16911
17348
|
parser_lex(parser);
|
16912
17349
|
|
16913
17350
|
pm_hash_node_t *node = pm_hash_node_create(parser, &parser->previous);
|
16914
|
-
pm_static_literals_t literals = { 0 };
|
16915
17351
|
|
16916
17352
|
if (!match2(parser, PM_TOKEN_BRACE_RIGHT, PM_TOKEN_EOF)) {
|
16917
|
-
|
17353
|
+
if (current_hash_keys != NULL) {
|
17354
|
+
parse_assocs(parser, current_hash_keys, (pm_node_t *) node);
|
17355
|
+
} else {
|
17356
|
+
pm_static_literals_t hash_keys = { 0 };
|
17357
|
+
parse_assocs(parser, &hash_keys, (pm_node_t *) node);
|
17358
|
+
pm_static_literals_free(&hash_keys);
|
17359
|
+
}
|
17360
|
+
|
16918
17361
|
accept1(parser, PM_TOKEN_NEWLINE);
|
16919
17362
|
}
|
16920
17363
|
|
@@ -16922,7 +17365,6 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
|
|
16922
17365
|
expect1(parser, PM_TOKEN_BRACE_RIGHT, PM_ERR_HASH_TERM);
|
16923
17366
|
pm_hash_node_closing_loc_set(node, &parser->previous);
|
16924
17367
|
|
16925
|
-
pm_static_literals_free(&literals);
|
16926
17368
|
return (pm_node_t *) node;
|
16927
17369
|
}
|
16928
17370
|
case PM_TOKEN_CHARACTER_LITERAL: {
|
@@ -16987,12 +17429,10 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
|
|
16987
17429
|
}
|
16988
17430
|
case PM_TOKEN_UCOLON_COLON: {
|
16989
17431
|
parser_lex(parser);
|
16990
|
-
|
16991
17432
|
pm_token_t delimiter = parser->previous;
|
16992
|
-
expect1(parser, PM_TOKEN_CONSTANT, PM_ERR_CONSTANT_PATH_COLON_COLON_CONSTANT);
|
16993
17433
|
|
16994
|
-
|
16995
|
-
pm_node_t *node = (pm_node_t *)pm_constant_path_node_create(parser, NULL, &delimiter,
|
17434
|
+
expect1(parser, PM_TOKEN_CONSTANT, PM_ERR_CONSTANT_PATH_COLON_COLON_CONSTANT);
|
17435
|
+
pm_node_t *node = (pm_node_t *) pm_constant_path_node_create(parser, NULL, &delimiter, &parser->previous);
|
16996
17436
|
|
16997
17437
|
if ((binding_power == PM_BINDING_POWER_STATEMENT) && match1(parser, PM_TOKEN_COMMA)) {
|
16998
17438
|
node = parse_targets_validate(parser, node, PM_BINDING_POWER_INDEX);
|
@@ -17152,8 +17592,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
|
|
17152
17592
|
if (match2(parser, PM_TOKEN_HEREDOC_END, PM_TOKEN_EOF)) {
|
17153
17593
|
// If we get here, then we have an empty heredoc. We'll create
|
17154
17594
|
// an empty content token and return an empty string node.
|
17155
|
-
|
17156
|
-
expect1(parser, PM_TOKEN_HEREDOC_END, PM_ERR_HEREDOC_TERM);
|
17595
|
+
expect1_heredoc_term(parser, lex_mode);
|
17157
17596
|
pm_token_t content = parse_strings_empty_content(parser->previous.start);
|
17158
17597
|
|
17159
17598
|
if (quote == PM_HEREDOC_QUOTE_BACKTICK) {
|
@@ -17194,8 +17633,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
|
|
17194
17633
|
}
|
17195
17634
|
|
17196
17635
|
node = (pm_node_t *) cast;
|
17197
|
-
|
17198
|
-
expect1(parser, PM_TOKEN_HEREDOC_END, PM_ERR_HEREDOC_TERM);
|
17636
|
+
expect1_heredoc_term(parser, lex_mode);
|
17199
17637
|
} else {
|
17200
17638
|
// If we get here, then we have multiple parts in the heredoc,
|
17201
17639
|
// so we'll need to create an interpolated string node to hold
|
@@ -17217,20 +17655,18 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
|
|
17217
17655
|
pm_interpolated_x_string_node_t *cast = pm_interpolated_xstring_node_create(parser, &opening, &opening);
|
17218
17656
|
cast->parts = parts;
|
17219
17657
|
|
17220
|
-
|
17221
|
-
expect1(parser, PM_TOKEN_HEREDOC_END, PM_ERR_HEREDOC_TERM);
|
17222
|
-
|
17658
|
+
expect1_heredoc_term(parser, lex_mode);
|
17223
17659
|
pm_interpolated_xstring_node_closing_set(cast, &parser->previous);
|
17660
|
+
|
17224
17661
|
cast->base.location = cast->opening_loc;
|
17225
17662
|
node = (pm_node_t *) cast;
|
17226
17663
|
} else {
|
17227
17664
|
pm_interpolated_string_node_t *cast = pm_interpolated_string_node_create(parser, &opening, &parts, &opening);
|
17228
17665
|
pm_node_list_free(&parts);
|
17229
17666
|
|
17230
|
-
|
17231
|
-
expect1(parser, PM_TOKEN_HEREDOC_END, PM_ERR_HEREDOC_TERM);
|
17232
|
-
|
17667
|
+
expect1_heredoc_term(parser, lex_mode);
|
17233
17668
|
pm_interpolated_string_node_closing_set(cast, &parser->previous);
|
17669
|
+
|
17234
17670
|
cast->base.location = cast->opening_loc;
|
17235
17671
|
node = (pm_node_t *) cast;
|
17236
17672
|
}
|
@@ -18132,7 +18568,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
|
|
18132
18568
|
if (match1(parser, PM_TOKEN_COMMA)) {
|
18133
18569
|
index = parse_targets(parser, index, PM_BINDING_POWER_INDEX);
|
18134
18570
|
} else {
|
18135
|
-
index = parse_target(parser, index);
|
18571
|
+
index = parse_target(parser, index, false);
|
18136
18572
|
}
|
18137
18573
|
|
18138
18574
|
context_pop(parser);
|
@@ -18254,9 +18690,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
|
|
18254
18690
|
pm_token_t double_colon = parser->previous;
|
18255
18691
|
|
18256
18692
|
expect1(parser, PM_TOKEN_CONSTANT, PM_ERR_CONSTANT_PATH_COLON_COLON_CONSTANT);
|
18257
|
-
|
18258
|
-
|
18259
|
-
constant_path = (pm_node_t *) pm_constant_path_node_create(parser, constant_path, &double_colon, constant);
|
18693
|
+
constant_path = (pm_node_t *) pm_constant_path_node_create(parser, constant_path, &double_colon, &parser->previous);
|
18260
18694
|
}
|
18261
18695
|
|
18262
18696
|
// Here we retrieve the name of the module. If it wasn't a constant,
|
@@ -18636,15 +19070,15 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
|
|
18636
19070
|
// If we hit string content and the current node is
|
18637
19071
|
// an interpolated string, then we need to append
|
18638
19072
|
// the string content to the list of child nodes.
|
18639
|
-
pm_interpolated_string_node_append(
|
19073
|
+
pm_interpolated_string_node_append((pm_interpolated_string_node_t *) current, string);
|
18640
19074
|
} else if (PM_NODE_TYPE_P(current, PM_STRING_NODE)) {
|
18641
19075
|
// If we hit string content and the current node is
|
18642
19076
|
// a string node, then we need to convert the
|
18643
19077
|
// current node into an interpolated string and add
|
18644
19078
|
// the string content to the list of child nodes.
|
18645
19079
|
pm_interpolated_string_node_t *interpolated = pm_interpolated_string_node_create(parser, &opening, NULL, &closing);
|
18646
|
-
pm_interpolated_string_node_append(
|
18647
|
-
pm_interpolated_string_node_append(
|
19080
|
+
pm_interpolated_string_node_append(interpolated, current);
|
19081
|
+
pm_interpolated_string_node_append(interpolated, string);
|
18648
19082
|
current = (pm_node_t *) interpolated;
|
18649
19083
|
} else {
|
18650
19084
|
assert(false && "unreachable");
|
@@ -18669,7 +19103,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
|
|
18669
19103
|
pm_token_t opening = not_provided(parser);
|
18670
19104
|
pm_token_t closing = not_provided(parser);
|
18671
19105
|
pm_interpolated_string_node_t *interpolated = pm_interpolated_string_node_create(parser, &opening, NULL, &closing);
|
18672
|
-
pm_interpolated_string_node_append(
|
19106
|
+
pm_interpolated_string_node_append(interpolated, current);
|
18673
19107
|
current = (pm_node_t *) interpolated;
|
18674
19108
|
} else {
|
18675
19109
|
// If we hit an embedded variable and the current
|
@@ -18678,7 +19112,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
|
|
18678
19112
|
}
|
18679
19113
|
|
18680
19114
|
pm_node_t *part = parse_string_part(parser);
|
18681
|
-
pm_interpolated_string_node_append(
|
19115
|
+
pm_interpolated_string_node_append((pm_interpolated_string_node_t *) current, part);
|
18682
19116
|
break;
|
18683
19117
|
}
|
18684
19118
|
case PM_TOKEN_EMBEXPR_BEGIN: {
|
@@ -18698,7 +19132,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
|
|
18698
19132
|
pm_token_t opening = not_provided(parser);
|
18699
19133
|
pm_token_t closing = not_provided(parser);
|
18700
19134
|
pm_interpolated_string_node_t *interpolated = pm_interpolated_string_node_create(parser, &opening, NULL, &closing);
|
18701
|
-
pm_interpolated_string_node_append(
|
19135
|
+
pm_interpolated_string_node_append(interpolated, current);
|
18702
19136
|
current = (pm_node_t *) interpolated;
|
18703
19137
|
} else if (PM_NODE_TYPE_P(current, PM_INTERPOLATED_STRING_NODE)) {
|
18704
19138
|
// If we hit an embedded expression and the current
|
@@ -18709,7 +19143,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
|
|
18709
19143
|
}
|
18710
19144
|
|
18711
19145
|
pm_node_t *part = parse_string_part(parser);
|
18712
|
-
pm_interpolated_string_node_append(
|
19146
|
+
pm_interpolated_string_node_append((pm_interpolated_string_node_t *) current, part);
|
18713
19147
|
break;
|
18714
19148
|
}
|
18715
19149
|
default:
|
@@ -18913,7 +19347,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
|
|
18913
19347
|
if (match1(parser, PM_TOKEN_COMMA)) {
|
18914
19348
|
return parse_targets_validate(parser, splat, PM_BINDING_POWER_INDEX);
|
18915
19349
|
} else {
|
18916
|
-
return parse_target_validate(parser, splat);
|
19350
|
+
return parse_target_validate(parser, splat, true);
|
18917
19351
|
}
|
18918
19352
|
}
|
18919
19353
|
case PM_TOKEN_BANG: {
|
@@ -20046,8 +20480,7 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
|
|
20046
20480
|
path = (pm_node_t *) pm_call_node_call_create(parser, node, &delimiter, &message, &arguments);
|
20047
20481
|
} else {
|
20048
20482
|
// Otherwise, this is a constant path. That would look like Foo::Bar.
|
20049
|
-
|
20050
|
-
path = (pm_node_t *)pm_constant_path_node_create(parser, node, &delimiter, child);
|
20483
|
+
path = (pm_node_t *) pm_constant_path_node_create(parser, node, &delimiter, &parser->previous);
|
20051
20484
|
}
|
20052
20485
|
|
20053
20486
|
// If this is followed by a comma then it is a multiple assignment.
|
@@ -20086,9 +20519,8 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
|
|
20086
20519
|
return (pm_node_t *) pm_call_node_shorthand_create(parser, node, &delimiter, &arguments);
|
20087
20520
|
}
|
20088
20521
|
default: {
|
20089
|
-
|
20090
|
-
|
20091
|
-
return (pm_node_t *)pm_constant_path_node_create(parser, node, &delimiter, child);
|
20522
|
+
expect1(parser, PM_TOKEN_CONSTANT, PM_ERR_CONSTANT_PATH_COLON_COLON_CONSTANT);
|
20523
|
+
return (pm_node_t *) pm_constant_path_node_create(parser, node, &delimiter, &parser->previous);
|
20092
20524
|
}
|
20093
20525
|
}
|
20094
20526
|
}
|
@@ -21233,25 +21665,28 @@ pm_parser_errors_format(const pm_parser_t *parser, const pm_list_t *error_list,
|
|
21233
21665
|
pm_buffer_append_string(buffer, error_format.blank_prefix, error_format.blank_prefix_length);
|
21234
21666
|
|
21235
21667
|
size_t column = 0;
|
21236
|
-
while (column < error->
|
21237
|
-
|
21238
|
-
pm_buffer_append_byte(buffer, ' ');
|
21239
|
-
} else {
|
21240
|
-
const uint8_t caret = column == error->column_start ? '^' : '~';
|
21668
|
+
while (column < error->column_start) {
|
21669
|
+
pm_buffer_append_byte(buffer, ' ');
|
21241
21670
|
|
21242
|
-
|
21243
|
-
|
21244
|
-
|
21245
|
-
|
21246
|
-
|
21247
|
-
|
21248
|
-
|
21249
|
-
|
21671
|
+
size_t char_width = encoding->char_width(start + column, parser->end - (start + column));
|
21672
|
+
column += (char_width == 0 ? 1 : char_width);
|
21673
|
+
}
|
21674
|
+
|
21675
|
+
if (colorize) pm_buffer_append_string(buffer, PM_COLOR_RED, 7);
|
21676
|
+
pm_buffer_append_byte(buffer, '^');
|
21677
|
+
|
21678
|
+
size_t char_width = encoding->char_width(start + column, parser->end - (start + column));
|
21679
|
+
column += (char_width == 0 ? 1 : char_width);
|
21680
|
+
|
21681
|
+
while (column < error->column_end) {
|
21682
|
+
pm_buffer_append_byte(buffer, '~');
|
21250
21683
|
|
21251
21684
|
size_t char_width = encoding->char_width(start + column, parser->end - (start + column));
|
21252
21685
|
column += (char_width == 0 ? 1 : char_width);
|
21253
21686
|
}
|
21254
21687
|
|
21688
|
+
if (colorize) pm_buffer_append_string(buffer, PM_COLOR_RESET, 3);
|
21689
|
+
|
21255
21690
|
if (inline_messages) {
|
21256
21691
|
pm_buffer_append_byte(buffer, ' ');
|
21257
21692
|
assert(error->error != NULL);
|