prism 0.27.0 → 0.29.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +45 -1
- data/config.yml +68 -44
- data/docs/configuration.md +1 -0
- data/ext/prism/api_node.c +854 -847
- data/ext/prism/extconf.rb +27 -23
- data/ext/prism/extension.c +5 -3
- data/ext/prism/extension.h +1 -1
- data/include/prism/ast.h +70 -48
- data/include/prism/diagnostic.h +23 -6
- data/include/prism/options.h +2 -2
- data/include/prism/parser.h +10 -0
- data/include/prism/static_literals.h +8 -6
- data/include/prism/version.h +2 -2
- data/lib/prism/desugar_compiler.rb +4 -4
- data/lib/prism/dot_visitor.rb +54 -38
- data/lib/prism/dsl.rb +24 -24
- data/lib/prism/ffi.rb +4 -4
- data/lib/prism/inspect_visitor.rb +2156 -0
- data/lib/prism/lex_compat.rb +1 -1
- data/lib/prism/mutation_compiler.rb +2 -2
- data/lib/prism/node.rb +737 -1863
- data/lib/prism/node_ext.rb +176 -5
- data/lib/prism/parse_result/comments.rb +1 -1
- data/lib/prism/parse_result/newlines.rb +1 -1
- data/lib/prism/parse_result.rb +78 -0
- data/lib/prism/pattern.rb +12 -6
- data/lib/prism/polyfill/byteindex.rb +13 -0
- data/lib/prism/polyfill/unpack1.rb +14 -0
- data/lib/prism/reflection.rb +20 -20
- data/lib/prism/serialize.rb +32 -15
- data/lib/prism/translation/parser/compiler.rb +156 -26
- data/lib/prism/translation/parser.rb +7 -7
- data/lib/prism/translation/ripper.rb +29 -25
- data/lib/prism/translation/ruby_parser.rb +13 -13
- data/lib/prism.rb +2 -1
- data/prism.gemspec +37 -38
- data/rbi/prism/compiler.rbi +3 -5
- data/rbi/prism/inspect_visitor.rbi +12 -0
- data/rbi/prism/node.rbi +405 -370
- data/rbi/prism/node_ext.rbi +5 -0
- data/rbi/prism/parse_result.rbi +23 -0
- data/rbi/prism/translation/ripper.rbi +1 -11
- data/sig/prism/dsl.rbs +12 -12
- data/sig/prism/inspect_visitor.rbs +22 -0
- data/sig/prism/lex_compat.rbs +10 -0
- data/sig/prism/node.rbs +108 -91
- data/sig/prism/node_ext.rbs +4 -0
- data/sig/prism/parse_result.rbs +12 -0
- data/src/diagnostic.c +66 -33
- data/src/node.c +89 -64
- data/src/options.c +2 -2
- data/src/prettyprint.c +109 -66
- data/src/prism.c +862 -317
- data/src/serialize.c +21 -18
- data/src/static_literals.c +120 -34
- data/src/token_type.c +6 -6
- metadata +8 -9
- data/lib/prism/node_inspector.rb +0 -68
- data/lib/prism/polyfill/string.rb +0 -12
- data/rbi/prism/desugar_compiler.rbi +0 -5
- data/rbi/prism/mutation_compiler.rbi +0 -5
- data/rbi/prism/translation/parser/compiler.rbi +0 -13
- data/rbi/prism/translation/ripper/ripper_compiler.rbi +0 -5
- data/rbi/prism/translation/ruby_parser.rbi +0 -11
data/src/prism.c
CHANGED
@@ -672,6 +672,26 @@ pm_parser_warn_node(pm_parser_t *parser, const pm_node_t *node, pm_diagnostic_id
|
|
672
672
|
#define PM_PARSER_WARN_NODE_FORMAT(parser, node, diag_id, ...) \
|
673
673
|
PM_PARSER_WARN_FORMAT(parser, (node)->location.start, (node)->location.end, diag_id, __VA_ARGS__)
|
674
674
|
|
675
|
+
/**
|
676
|
+
* Add an error for an expected heredoc terminator. This is a special function
|
677
|
+
* only because it grabs its location off of a lex mode instead of a node or a
|
678
|
+
* token.
|
679
|
+
*/
|
680
|
+
static void
|
681
|
+
pm_parser_err_heredoc_term(pm_parser_t *parser, pm_lex_mode_t *lex_mode) {
|
682
|
+
const uint8_t *ident_start = lex_mode->as.heredoc.ident_start;
|
683
|
+
size_t ident_length = lex_mode->as.heredoc.ident_length;
|
684
|
+
|
685
|
+
PM_PARSER_ERR_FORMAT(
|
686
|
+
parser,
|
687
|
+
ident_start,
|
688
|
+
ident_start + ident_length,
|
689
|
+
PM_ERR_HEREDOC_TERM,
|
690
|
+
(int) ident_length,
|
691
|
+
(const char *) ident_start
|
692
|
+
);
|
693
|
+
}
|
694
|
+
|
675
695
|
/******************************************************************************/
|
676
696
|
/* Scope-related functions */
|
677
697
|
/******************************************************************************/
|
@@ -729,42 +749,97 @@ pm_parser_scope_find(pm_parser_t *parser, uint32_t depth) {
|
|
729
749
|
return scope;
|
730
750
|
}
|
731
751
|
|
732
|
-
|
733
|
-
|
752
|
+
typedef enum {
|
753
|
+
PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_PASS,
|
754
|
+
PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_CONFLICT,
|
755
|
+
PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_FAIL
|
756
|
+
} pm_scope_forwarding_param_check_result_t;
|
757
|
+
|
758
|
+
static pm_scope_forwarding_param_check_result_t
|
759
|
+
pm_parser_scope_forwarding_param_check(pm_parser_t *parser, const uint8_t mask) {
|
734
760
|
pm_scope_t *scope = parser->current_scope;
|
735
|
-
|
761
|
+
bool conflict = false;
|
762
|
+
|
763
|
+
while (scope != NULL) {
|
736
764
|
if (scope->parameters & mask) {
|
737
|
-
if (
|
738
|
-
|
739
|
-
|
765
|
+
if (scope->closed) {
|
766
|
+
if (conflict) {
|
767
|
+
return PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_CONFLICT;
|
768
|
+
} else {
|
769
|
+
return PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_PASS;
|
770
|
+
}
|
740
771
|
}
|
741
|
-
|
772
|
+
|
773
|
+
conflict = true;
|
742
774
|
}
|
775
|
+
|
743
776
|
if (scope->closed) break;
|
744
777
|
scope = scope->previous;
|
745
778
|
}
|
746
779
|
|
747
|
-
|
780
|
+
return PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_FAIL;
|
748
781
|
}
|
749
782
|
|
750
|
-
static
|
783
|
+
static void
|
751
784
|
pm_parser_scope_forwarding_block_check(pm_parser_t *parser, const pm_token_t * token) {
|
752
|
-
pm_parser_scope_forwarding_param_check(parser,
|
785
|
+
switch (pm_parser_scope_forwarding_param_check(parser, PM_SCOPE_PARAMETERS_FORWARDING_BLOCK)) {
|
786
|
+
case PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_PASS:
|
787
|
+
// Pass.
|
788
|
+
break;
|
789
|
+
case PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_CONFLICT:
|
790
|
+
pm_parser_err_token(parser, token, PM_ERR_ARGUMENT_CONFLICT_AMPERSAND);
|
791
|
+
break;
|
792
|
+
case PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_FAIL:
|
793
|
+
pm_parser_err_token(parser, token, PM_ERR_ARGUMENT_NO_FORWARDING_AMPERSAND);
|
794
|
+
break;
|
795
|
+
}
|
753
796
|
}
|
754
797
|
|
755
|
-
static
|
798
|
+
static void
|
756
799
|
pm_parser_scope_forwarding_positionals_check(pm_parser_t *parser, const pm_token_t * token) {
|
757
|
-
pm_parser_scope_forwarding_param_check(parser,
|
800
|
+
switch (pm_parser_scope_forwarding_param_check(parser, PM_SCOPE_PARAMETERS_FORWARDING_POSITIONALS)) {
|
801
|
+
case PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_PASS:
|
802
|
+
// Pass.
|
803
|
+
break;
|
804
|
+
case PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_CONFLICT:
|
805
|
+
pm_parser_err_token(parser, token, PM_ERR_ARGUMENT_CONFLICT_STAR);
|
806
|
+
break;
|
807
|
+
case PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_FAIL:
|
808
|
+
pm_parser_err_token(parser, token, PM_ERR_ARGUMENT_NO_FORWARDING_STAR);
|
809
|
+
break;
|
810
|
+
}
|
758
811
|
}
|
759
812
|
|
760
|
-
static
|
761
|
-
pm_parser_scope_forwarding_all_check(pm_parser_t *parser, const pm_token_t *
|
762
|
-
pm_parser_scope_forwarding_param_check(parser,
|
813
|
+
static void
|
814
|
+
pm_parser_scope_forwarding_all_check(pm_parser_t *parser, const pm_token_t *token) {
|
815
|
+
switch (pm_parser_scope_forwarding_param_check(parser, PM_SCOPE_PARAMETERS_FORWARDING_ALL)) {
|
816
|
+
case PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_PASS:
|
817
|
+
// Pass.
|
818
|
+
break;
|
819
|
+
case PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_CONFLICT:
|
820
|
+
// This shouldn't happen, because ... is not allowed in the
|
821
|
+
// declaration of blocks. If we get here, we assume we already have
|
822
|
+
// an error for this.
|
823
|
+
break;
|
824
|
+
case PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_FAIL:
|
825
|
+
pm_parser_err_token(parser, token, PM_ERR_ARGUMENT_NO_FORWARDING_ELLIPSES);
|
826
|
+
break;
|
827
|
+
}
|
763
828
|
}
|
764
829
|
|
765
|
-
static
|
830
|
+
static void
|
766
831
|
pm_parser_scope_forwarding_keywords_check(pm_parser_t *parser, const pm_token_t * token) {
|
767
|
-
pm_parser_scope_forwarding_param_check(parser,
|
832
|
+
switch (pm_parser_scope_forwarding_param_check(parser, PM_SCOPE_PARAMETERS_FORWARDING_KEYWORDS)) {
|
833
|
+
case PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_PASS:
|
834
|
+
// Pass.
|
835
|
+
break;
|
836
|
+
case PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_CONFLICT:
|
837
|
+
pm_parser_err_token(parser, token, PM_ERR_ARGUMENT_CONFLICT_STAR_STAR);
|
838
|
+
break;
|
839
|
+
case PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_FAIL:
|
840
|
+
pm_parser_err_token(parser, token, PM_ERR_ARGUMENT_NO_FORWARDING_STAR_STAR);
|
841
|
+
break;
|
842
|
+
}
|
768
843
|
}
|
769
844
|
|
770
845
|
/**
|
@@ -1405,7 +1480,7 @@ pm_conditional_predicate_warn_write_literal_p(const pm_node_t *node) {
|
|
1405
1480
|
static inline void
|
1406
1481
|
pm_conditional_predicate_warn_write_literal(pm_parser_t *parser, const pm_node_t *node) {
|
1407
1482
|
if (pm_conditional_predicate_warn_write_literal_p(node)) {
|
1408
|
-
pm_parser_warn_node(parser, node, parser->version ==
|
1483
|
+
pm_parser_warn_node(parser, node, parser->version == PM_OPTIONS_VERSION_CRUBY_3_3 ? PM_WARN_EQUAL_IN_CONDITIONAL_3_3 : PM_WARN_EQUAL_IN_CONDITIONAL);
|
1409
1484
|
}
|
1410
1485
|
}
|
1411
1486
|
|
@@ -1683,7 +1758,7 @@ char_is_identifier_utf8(const uint8_t *b, const uint8_t *end) {
|
|
1683
1758
|
* it's important that it be as fast as possible.
|
1684
1759
|
*/
|
1685
1760
|
static inline size_t
|
1686
|
-
char_is_identifier(pm_parser_t *parser, const uint8_t *b) {
|
1761
|
+
char_is_identifier(const pm_parser_t *parser, const uint8_t *b) {
|
1687
1762
|
if (parser->encoding_changed) {
|
1688
1763
|
size_t width;
|
1689
1764
|
if ((width = parser->encoding->alnum_char(b, parser->end - b)) != 0) {
|
@@ -2923,6 +2998,29 @@ pm_call_and_write_node_create(pm_parser_t *parser, pm_call_node_t *target, const
|
|
2923
2998
|
return node;
|
2924
2999
|
}
|
2925
3000
|
|
3001
|
+
/**
|
3002
|
+
* Validate that index expressions do not have keywords or blocks if we are
|
3003
|
+
* parsing as Ruby 3.4+.
|
3004
|
+
*/
|
3005
|
+
static void
|
3006
|
+
pm_index_arguments_check(pm_parser_t *parser, const pm_arguments_node_t *arguments, const pm_node_t *block) {
|
3007
|
+
if (parser->version != PM_OPTIONS_VERSION_CRUBY_3_3) {
|
3008
|
+
if (arguments != NULL && PM_NODE_FLAG_P(arguments, PM_ARGUMENTS_NODE_FLAGS_CONTAINS_KEYWORDS)) {
|
3009
|
+
pm_node_t *node;
|
3010
|
+
PM_NODE_LIST_FOREACH(&arguments->arguments, index, node) {
|
3011
|
+
if (PM_NODE_TYPE_P(node, PM_KEYWORD_HASH_NODE)) {
|
3012
|
+
pm_parser_err_node(parser, node, PM_ERR_UNEXPECTED_INDEX_KEYWORDS);
|
3013
|
+
break;
|
3014
|
+
}
|
3015
|
+
}
|
3016
|
+
}
|
3017
|
+
|
3018
|
+
if (block != NULL) {
|
3019
|
+
pm_parser_err_node(parser, block, PM_ERR_UNEXPECTED_INDEX_BLOCK);
|
3020
|
+
}
|
3021
|
+
}
|
3022
|
+
}
|
3023
|
+
|
2926
3024
|
/**
|
2927
3025
|
* Allocate and initialize a new IndexAndWriteNode node.
|
2928
3026
|
*/
|
@@ -2931,6 +3029,8 @@ pm_index_and_write_node_create(pm_parser_t *parser, pm_call_node_t *target, cons
|
|
2931
3029
|
assert(operator->type == PM_TOKEN_AMPERSAND_AMPERSAND_EQUAL);
|
2932
3030
|
pm_index_and_write_node_t *node = PM_ALLOC_NODE(parser, pm_index_and_write_node_t);
|
2933
3031
|
|
3032
|
+
pm_index_arguments_check(parser, target->arguments, target->block);
|
3033
|
+
|
2934
3034
|
*node = (pm_index_and_write_node_t) {
|
2935
3035
|
{
|
2936
3036
|
.type = PM_INDEX_AND_WRITE_NODE,
|
@@ -2980,8 +3080,8 @@ pm_call_operator_write_node_create(pm_parser_t *parser, pm_call_node_t *target,
|
|
2980
3080
|
.message_loc = target->message_loc,
|
2981
3081
|
.read_name = 0,
|
2982
3082
|
.write_name = target->name,
|
2983
|
-
.
|
2984
|
-
.
|
3083
|
+
.binary_operator = pm_parser_constant_id_location(parser, operator->start, operator->end - 1),
|
3084
|
+
.binary_operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
|
2985
3085
|
.value = value
|
2986
3086
|
};
|
2987
3087
|
|
@@ -3002,6 +3102,8 @@ static pm_index_operator_write_node_t *
|
|
3002
3102
|
pm_index_operator_write_node_create(pm_parser_t *parser, pm_call_node_t *target, const pm_token_t *operator, pm_node_t *value) {
|
3003
3103
|
pm_index_operator_write_node_t *node = PM_ALLOC_NODE(parser, pm_index_operator_write_node_t);
|
3004
3104
|
|
3105
|
+
pm_index_arguments_check(parser, target->arguments, target->block);
|
3106
|
+
|
3005
3107
|
*node = (pm_index_operator_write_node_t) {
|
3006
3108
|
{
|
3007
3109
|
.type = PM_INDEX_OPERATOR_WRITE_NODE,
|
@@ -3017,8 +3119,8 @@ pm_index_operator_write_node_create(pm_parser_t *parser, pm_call_node_t *target,
|
|
3017
3119
|
.arguments = target->arguments,
|
3018
3120
|
.closing_loc = target->closing_loc,
|
3019
3121
|
.block = target->block,
|
3020
|
-
.
|
3021
|
-
.
|
3122
|
+
.binary_operator = pm_parser_constant_id_location(parser, operator->start, operator->end - 1),
|
3123
|
+
.binary_operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
|
3022
3124
|
.value = value
|
3023
3125
|
};
|
3024
3126
|
|
@@ -3075,6 +3177,8 @@ pm_index_or_write_node_create(pm_parser_t *parser, pm_call_node_t *target, const
|
|
3075
3177
|
assert(operator->type == PM_TOKEN_PIPE_PIPE_EQUAL);
|
3076
3178
|
pm_index_or_write_node_t *node = PM_ALLOC_NODE(parser, pm_index_or_write_node_t);
|
3077
3179
|
|
3180
|
+
pm_index_arguments_check(parser, target->arguments, target->block);
|
3181
|
+
|
3078
3182
|
*node = (pm_index_or_write_node_t) {
|
3079
3183
|
{
|
3080
3184
|
.type = PM_INDEX_OR_WRITE_NODE,
|
@@ -3139,6 +3243,8 @@ pm_index_target_node_create(pm_parser_t *parser, pm_call_node_t *target) {
|
|
3139
3243
|
pm_index_target_node_t *node = PM_ALLOC_NODE(parser, pm_index_target_node_t);
|
3140
3244
|
pm_node_flags_t flags = target->base.flags;
|
3141
3245
|
|
3246
|
+
pm_index_arguments_check(parser, target->arguments, target->block);
|
3247
|
+
|
3142
3248
|
*node = (pm_index_target_node_t) {
|
3143
3249
|
{
|
3144
3250
|
.type = PM_INDEX_TARGET_NODE,
|
@@ -3358,9 +3464,9 @@ pm_class_variable_operator_write_node_create(pm_parser_t *parser, pm_class_varia
|
|
3358
3464
|
},
|
3359
3465
|
.name = target->name,
|
3360
3466
|
.name_loc = target->base.location,
|
3361
|
-
.
|
3467
|
+
.binary_operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
|
3362
3468
|
.value = value,
|
3363
|
-
.
|
3469
|
+
.binary_operator = pm_parser_constant_id_location(parser, operator->start, operator->end - 1)
|
3364
3470
|
};
|
3365
3471
|
|
3366
3472
|
return node;
|
@@ -3474,9 +3580,9 @@ pm_constant_path_operator_write_node_create(pm_parser_t *parser, pm_constant_pat
|
|
3474
3580
|
}
|
3475
3581
|
},
|
3476
3582
|
.target = target,
|
3477
|
-
.
|
3583
|
+
.binary_operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
|
3478
3584
|
.value = value,
|
3479
|
-
.
|
3585
|
+
.binary_operator = pm_parser_constant_id_location(parser, operator->start, operator->end - 1)
|
3480
3586
|
};
|
3481
3587
|
|
3482
3588
|
return node;
|
@@ -3510,22 +3616,27 @@ pm_constant_path_or_write_node_create(pm_parser_t *parser, pm_constant_path_node
|
|
3510
3616
|
* Allocate and initialize a new ConstantPathNode node.
|
3511
3617
|
*/
|
3512
3618
|
static pm_constant_path_node_t *
|
3513
|
-
pm_constant_path_node_create(pm_parser_t *parser, pm_node_t *parent, const pm_token_t *delimiter,
|
3619
|
+
pm_constant_path_node_create(pm_parser_t *parser, pm_node_t *parent, const pm_token_t *delimiter, const pm_token_t *name_token) {
|
3514
3620
|
pm_assert_value_expression(parser, parent);
|
3515
|
-
|
3516
3621
|
pm_constant_path_node_t *node = PM_ALLOC_NODE(parser, pm_constant_path_node_t);
|
3517
3622
|
|
3623
|
+
pm_constant_id_t name = PM_CONSTANT_ID_UNSET;
|
3624
|
+
if (name_token->type == PM_TOKEN_CONSTANT) {
|
3625
|
+
name = pm_parser_constant_id_token(parser, name_token);
|
3626
|
+
}
|
3627
|
+
|
3518
3628
|
*node = (pm_constant_path_node_t) {
|
3519
3629
|
{
|
3520
3630
|
.type = PM_CONSTANT_PATH_NODE,
|
3521
3631
|
.location = {
|
3522
3632
|
.start = parent == NULL ? delimiter->start : parent->location.start,
|
3523
|
-
.end =
|
3633
|
+
.end = name_token->end
|
3524
3634
|
},
|
3525
3635
|
},
|
3526
3636
|
.parent = parent,
|
3527
|
-
.
|
3528
|
-
.delimiter_loc = PM_LOCATION_TOKEN_VALUE(delimiter)
|
3637
|
+
.name = name,
|
3638
|
+
.delimiter_loc = PM_LOCATION_TOKEN_VALUE(delimiter),
|
3639
|
+
.name_loc = PM_LOCATION_TOKEN_VALUE(name_token)
|
3529
3640
|
};
|
3530
3641
|
|
3531
3642
|
return node;
|
@@ -3596,9 +3707,9 @@ pm_constant_operator_write_node_create(pm_parser_t *parser, pm_constant_read_nod
|
|
3596
3707
|
},
|
3597
3708
|
.name = target->name,
|
3598
3709
|
.name_loc = target->base.location,
|
3599
|
-
.
|
3710
|
+
.binary_operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
|
3600
3711
|
.value = value,
|
3601
|
-
.
|
3712
|
+
.binary_operator = pm_parser_constant_id_location(parser, operator->start, operator->end - 1)
|
3602
3713
|
};
|
3603
3714
|
|
3604
3715
|
return node;
|
@@ -3716,6 +3827,113 @@ pm_def_node_receiver_check(pm_parser_t *parser, const pm_node_t *node) {
|
|
3716
3827
|
}
|
3717
3828
|
}
|
3718
3829
|
|
3830
|
+
/**
|
3831
|
+
* When a method body is created, we want to check if the last statement is a
|
3832
|
+
* return or a statement that houses a return. If it is, then we want to mark
|
3833
|
+
* that return as being redundant so that we can compile it differently but also
|
3834
|
+
* so that we can indicate that to the user.
|
3835
|
+
*/
|
3836
|
+
static void
|
3837
|
+
pm_def_node_body_redundant_return(pm_node_t *node) {
|
3838
|
+
switch (PM_NODE_TYPE(node)) {
|
3839
|
+
case PM_RETURN_NODE:
|
3840
|
+
node->flags |= PM_RETURN_NODE_FLAGS_REDUNDANT;
|
3841
|
+
break;
|
3842
|
+
case PM_BEGIN_NODE: {
|
3843
|
+
pm_begin_node_t *cast = (pm_begin_node_t *) node;
|
3844
|
+
|
3845
|
+
if (cast->statements != NULL && cast->else_clause == NULL) {
|
3846
|
+
pm_def_node_body_redundant_return((pm_node_t *) cast->statements);
|
3847
|
+
}
|
3848
|
+
break;
|
3849
|
+
}
|
3850
|
+
case PM_STATEMENTS_NODE: {
|
3851
|
+
pm_statements_node_t *cast = (pm_statements_node_t *) node;
|
3852
|
+
|
3853
|
+
if (cast->body.size > 0) {
|
3854
|
+
pm_def_node_body_redundant_return(cast->body.nodes[cast->body.size - 1]);
|
3855
|
+
}
|
3856
|
+
break;
|
3857
|
+
}
|
3858
|
+
case PM_IF_NODE: {
|
3859
|
+
pm_if_node_t *cast = (pm_if_node_t *) node;
|
3860
|
+
|
3861
|
+
if (cast->statements != NULL) {
|
3862
|
+
pm_def_node_body_redundant_return((pm_node_t *) cast->statements);
|
3863
|
+
}
|
3864
|
+
|
3865
|
+
if (cast->consequent != NULL) {
|
3866
|
+
pm_def_node_body_redundant_return(cast->consequent);
|
3867
|
+
}
|
3868
|
+
break;
|
3869
|
+
}
|
3870
|
+
case PM_UNLESS_NODE: {
|
3871
|
+
pm_unless_node_t *cast = (pm_unless_node_t *) node;
|
3872
|
+
|
3873
|
+
if (cast->statements != NULL) {
|
3874
|
+
pm_def_node_body_redundant_return((pm_node_t *) cast->statements);
|
3875
|
+
}
|
3876
|
+
|
3877
|
+
if (cast->consequent != NULL) {
|
3878
|
+
pm_def_node_body_redundant_return((pm_node_t *) cast->consequent);
|
3879
|
+
}
|
3880
|
+
break;
|
3881
|
+
}
|
3882
|
+
case PM_ELSE_NODE: {
|
3883
|
+
pm_else_node_t *cast = (pm_else_node_t *) node;
|
3884
|
+
|
3885
|
+
if (cast->statements != NULL) {
|
3886
|
+
pm_def_node_body_redundant_return((pm_node_t *) cast->statements);
|
3887
|
+
}
|
3888
|
+
break;
|
3889
|
+
}
|
3890
|
+
case PM_CASE_NODE: {
|
3891
|
+
pm_case_node_t *cast = (pm_case_node_t *) node;
|
3892
|
+
pm_node_t *condition;
|
3893
|
+
|
3894
|
+
PM_NODE_LIST_FOREACH(&cast->conditions, index, condition) {
|
3895
|
+
pm_def_node_body_redundant_return(condition);
|
3896
|
+
}
|
3897
|
+
|
3898
|
+
if (cast->consequent != NULL) {
|
3899
|
+
pm_def_node_body_redundant_return((pm_node_t *) cast->consequent);
|
3900
|
+
}
|
3901
|
+
break;
|
3902
|
+
}
|
3903
|
+
case PM_WHEN_NODE: {
|
3904
|
+
pm_when_node_t *cast = (pm_when_node_t *) node;
|
3905
|
+
|
3906
|
+
if (cast->statements != NULL) {
|
3907
|
+
pm_def_node_body_redundant_return((pm_node_t *) cast->statements);
|
3908
|
+
}
|
3909
|
+
break;
|
3910
|
+
}
|
3911
|
+
case PM_CASE_MATCH_NODE: {
|
3912
|
+
pm_case_match_node_t *cast = (pm_case_match_node_t *) node;
|
3913
|
+
pm_node_t *condition;
|
3914
|
+
|
3915
|
+
PM_NODE_LIST_FOREACH(&cast->conditions, index, condition) {
|
3916
|
+
pm_def_node_body_redundant_return(condition);
|
3917
|
+
}
|
3918
|
+
|
3919
|
+
if (cast->consequent != NULL) {
|
3920
|
+
pm_def_node_body_redundant_return((pm_node_t *) cast->consequent);
|
3921
|
+
}
|
3922
|
+
break;
|
3923
|
+
}
|
3924
|
+
case PM_IN_NODE: {
|
3925
|
+
pm_in_node_t *cast = (pm_in_node_t *) node;
|
3926
|
+
|
3927
|
+
if (cast->statements != NULL) {
|
3928
|
+
pm_def_node_body_redundant_return((pm_node_t *) cast->statements);
|
3929
|
+
}
|
3930
|
+
break;
|
3931
|
+
}
|
3932
|
+
default:
|
3933
|
+
break;
|
3934
|
+
}
|
3935
|
+
}
|
3936
|
+
|
3719
3937
|
/**
|
3720
3938
|
* Allocate and initialize a new DefNode node.
|
3721
3939
|
*/
|
@@ -3748,6 +3966,10 @@ pm_def_node_create(
|
|
3748
3966
|
pm_def_node_receiver_check(parser, receiver);
|
3749
3967
|
}
|
3750
3968
|
|
3969
|
+
if (body != NULL) {
|
3970
|
+
pm_def_node_body_redundant_return(body);
|
3971
|
+
}
|
3972
|
+
|
3751
3973
|
*node = (pm_def_node_t) {
|
3752
3974
|
{
|
3753
3975
|
.type = PM_DEF_NODE,
|
@@ -4338,9 +4560,9 @@ pm_global_variable_operator_write_node_create(pm_parser_t *parser, pm_node_t *ta
|
|
4338
4560
|
},
|
4339
4561
|
.name = pm_global_variable_write_name(parser, target),
|
4340
4562
|
.name_loc = target->location,
|
4341
|
-
.
|
4563
|
+
.binary_operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
|
4342
4564
|
.value = value,
|
4343
|
-
.
|
4565
|
+
.binary_operator = pm_parser_constant_id_location(parser, operator->start, operator->end - 1)
|
4344
4566
|
};
|
4345
4567
|
|
4346
4568
|
return node;
|
@@ -4846,9 +5068,9 @@ pm_instance_variable_operator_write_node_create(pm_parser_t *parser, pm_instance
|
|
4846
5068
|
},
|
4847
5069
|
.name = target->name,
|
4848
5070
|
.name_loc = target->base.location,
|
4849
|
-
.
|
5071
|
+
.binary_operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
|
4850
5072
|
.value = value,
|
4851
|
-
.
|
5073
|
+
.binary_operator = pm_parser_constant_id_location(parser, operator->start, operator->end - 1)
|
4852
5074
|
};
|
4853
5075
|
|
4854
5076
|
return node;
|
@@ -4922,6 +5144,50 @@ pm_instance_variable_write_node_create(pm_parser_t *parser, pm_instance_variable
|
|
4922
5144
|
return node;
|
4923
5145
|
}
|
4924
5146
|
|
5147
|
+
/**
|
5148
|
+
* Append a part into a list of string parts. Importantly this handles nested
|
5149
|
+
* interpolated strings by not necessarily removing the marker for static
|
5150
|
+
* literals.
|
5151
|
+
*/
|
5152
|
+
static void
|
5153
|
+
pm_interpolated_node_append(pm_node_t *node, pm_node_list_t *parts, pm_node_t *part) {
|
5154
|
+
switch (PM_NODE_TYPE(part)) {
|
5155
|
+
case PM_STRING_NODE:
|
5156
|
+
pm_node_flag_set(part, PM_NODE_FLAG_STATIC_LITERAL | PM_STRING_FLAGS_FROZEN);
|
5157
|
+
break;
|
5158
|
+
case PM_EMBEDDED_STATEMENTS_NODE: {
|
5159
|
+
pm_embedded_statements_node_t *cast = (pm_embedded_statements_node_t *) part;
|
5160
|
+
pm_node_t *embedded = (cast->statements != NULL && cast->statements->body.size == 1) ? cast->statements->body.nodes[0] : NULL;
|
5161
|
+
|
5162
|
+
if (embedded == NULL) {
|
5163
|
+
// If there are no statements or more than one statement, then
|
5164
|
+
// we lose the static literal flag.
|
5165
|
+
pm_node_flag_unset(node, PM_NODE_FLAG_STATIC_LITERAL);
|
5166
|
+
} else if (PM_NODE_TYPE_P(embedded, PM_STRING_NODE)) {
|
5167
|
+
// If the embedded statement is a string, then we can keep the
|
5168
|
+
// static literal flag and mark the string as frozen.
|
5169
|
+
pm_node_flag_set(embedded, PM_NODE_FLAG_STATIC_LITERAL | PM_STRING_FLAGS_FROZEN);
|
5170
|
+
} else if (PM_NODE_TYPE_P(embedded, PM_INTERPOLATED_STRING_NODE) && PM_NODE_FLAG_P(embedded, PM_NODE_FLAG_STATIC_LITERAL)) {
|
5171
|
+
// If the embedded statement is an interpolated string and it's
|
5172
|
+
// a static literal, then we can keep the static literal flag.
|
5173
|
+
} else {
|
5174
|
+
// Otherwise we lose the static literal flag.
|
5175
|
+
pm_node_flag_unset(node, PM_NODE_FLAG_STATIC_LITERAL);
|
5176
|
+
}
|
5177
|
+
|
5178
|
+
break;
|
5179
|
+
}
|
5180
|
+
case PM_EMBEDDED_VARIABLE_NODE:
|
5181
|
+
pm_node_flag_unset((pm_node_t *) node, PM_NODE_FLAG_STATIC_LITERAL);
|
5182
|
+
break;
|
5183
|
+
default:
|
5184
|
+
assert(false && "unexpected node type");
|
5185
|
+
break;
|
5186
|
+
}
|
5187
|
+
|
5188
|
+
pm_node_list_append(parts, part);
|
5189
|
+
}
|
5190
|
+
|
4925
5191
|
/**
|
4926
5192
|
* Allocate a new InterpolatedRegularExpressionNode node.
|
4927
5193
|
*/
|
@@ -4955,54 +5221,113 @@ pm_interpolated_regular_expression_node_append(pm_interpolated_regular_expressio
|
|
4955
5221
|
node->base.location.end = part->location.end;
|
4956
5222
|
}
|
4957
5223
|
|
4958
|
-
|
4959
|
-
pm_node_flag_set(part, PM_NODE_FLAG_STATIC_LITERAL | PM_STRING_FLAGS_FROZEN);
|
4960
|
-
}
|
4961
|
-
|
4962
|
-
if (!PM_NODE_FLAG_P(part, PM_NODE_FLAG_STATIC_LITERAL)) {
|
4963
|
-
pm_node_flag_unset((pm_node_t *) node, PM_NODE_FLAG_STATIC_LITERAL);
|
4964
|
-
}
|
4965
|
-
|
4966
|
-
pm_node_list_append(&node->parts, part);
|
5224
|
+
pm_interpolated_node_append((pm_node_t *) node, &node->parts, part);
|
4967
5225
|
}
|
4968
5226
|
|
4969
5227
|
static inline void
|
4970
5228
|
pm_interpolated_regular_expression_node_closing_set(pm_parser_t *parser, pm_interpolated_regular_expression_node_t *node, const pm_token_t *closing) {
|
4971
5229
|
node->closing_loc = PM_LOCATION_TOKEN_VALUE(closing);
|
4972
5230
|
node->base.location.end = closing->end;
|
4973
|
-
pm_node_flag_set((pm_node_t *)node, pm_regular_expression_flags_create(parser, closing));
|
5231
|
+
pm_node_flag_set((pm_node_t *) node, pm_regular_expression_flags_create(parser, closing));
|
4974
5232
|
}
|
4975
5233
|
|
4976
5234
|
/**
|
4977
5235
|
* Append a part to an InterpolatedStringNode node.
|
5236
|
+
*
|
5237
|
+
* This has some somewhat complicated semantics, because we need to update
|
5238
|
+
* multiple flags that have somewhat confusing interactions.
|
5239
|
+
*
|
5240
|
+
* PM_NODE_FLAG_STATIC_LITERAL indicates that the node should be treated as a
|
5241
|
+
* single static literal string that can be pushed onto the stack on its own.
|
5242
|
+
* Note that this doesn't necessarily mean that the string will be frozen or
|
5243
|
+
* not; the instructions in CRuby will be either putobject or putstring,
|
5244
|
+
* depending on the combination of `--enable-frozen-string-literal`,
|
5245
|
+
* `# frozen_string_literal: true`, and whether or not there is interpolation.
|
5246
|
+
*
|
5247
|
+
* PM_INTERPOLATED_STRING_NODE_FLAGS_FROZEN indicates that the string should be
|
5248
|
+
* explicitly frozen. This will only happen if the string is comprised entirely
|
5249
|
+
* of string parts that are themselves static literals and frozen.
|
5250
|
+
*
|
5251
|
+
* PM_INTERPOLATED_STRING_NODE_FLAGS_MUTABLE indicates that the string should
|
5252
|
+
* be explicitly marked as mutable. This will happen from
|
5253
|
+
* `--disable-frozen-string-literal` or `# frozen_string_literal: false`. This
|
5254
|
+
* is necessary to indicate that the string should be left up to the runtime,
|
5255
|
+
* which could potentially use a chilled string otherwise.
|
4978
5256
|
*/
|
4979
5257
|
static inline void
|
4980
|
-
pm_interpolated_string_node_append(
|
5258
|
+
pm_interpolated_string_node_append(pm_interpolated_string_node_t *node, pm_node_t *part) {
|
5259
|
+
#define CLEAR_FLAGS(node) \
|
5260
|
+
node->base.flags = (pm_node_flags_t) (node->base.flags & ~(PM_NODE_FLAG_STATIC_LITERAL | PM_INTERPOLATED_STRING_NODE_FLAGS_FROZEN | PM_INTERPOLATED_STRING_NODE_FLAGS_MUTABLE))
|
5261
|
+
|
5262
|
+
#define MUTABLE_FLAGS(node) \
|
5263
|
+
node->base.flags = (pm_node_flags_t) ((node->base.flags | PM_INTERPOLATED_STRING_NODE_FLAGS_MUTABLE) & ~PM_INTERPOLATED_STRING_NODE_FLAGS_FROZEN);
|
5264
|
+
|
4981
5265
|
if (node->parts.size == 0 && node->opening_loc.start == NULL) {
|
4982
5266
|
node->base.location.start = part->location.start;
|
4983
5267
|
}
|
4984
5268
|
|
4985
|
-
|
4986
|
-
|
4987
|
-
|
5269
|
+
node->base.location.end = MAX(node->base.location.end, part->location.end);
|
5270
|
+
|
5271
|
+
switch (PM_NODE_TYPE(part)) {
|
5272
|
+
case PM_STRING_NODE:
|
5273
|
+
pm_node_flag_set(part, PM_NODE_FLAG_STATIC_LITERAL | PM_STRING_FLAGS_FROZEN);
|
5274
|
+
break;
|
5275
|
+
case PM_INTERPOLATED_STRING_NODE:
|
5276
|
+
if (PM_NODE_FLAG_P(part, PM_NODE_FLAG_STATIC_LITERAL)) {
|
5277
|
+
// If the string that we're concatenating is a static literal,
|
5278
|
+
// then we can keep the static literal flag for this string.
|
5279
|
+
} else {
|
5280
|
+
// Otherwise, we lose the static literal flag here and we should
|
5281
|
+
// also clear the mutability flags.
|
5282
|
+
CLEAR_FLAGS(node);
|
5283
|
+
}
|
5284
|
+
break;
|
5285
|
+
case PM_EMBEDDED_STATEMENTS_NODE: {
|
5286
|
+
pm_embedded_statements_node_t *cast = (pm_embedded_statements_node_t *) part;
|
5287
|
+
pm_node_t *embedded = (cast->statements != NULL && cast->statements->body.size == 1) ? cast->statements->body.nodes[0] : NULL;
|
5288
|
+
|
5289
|
+
if (embedded == NULL) {
|
5290
|
+
// If we're embedding multiple statements or no statements, then
|
5291
|
+
// the string is not longer a static literal.
|
5292
|
+
CLEAR_FLAGS(node);
|
5293
|
+
} else if (PM_NODE_TYPE_P(embedded, PM_STRING_NODE)) {
|
5294
|
+
// If the embedded statement is a string, then we can make that
|
5295
|
+
// string as frozen and static literal, and not touch the static
|
5296
|
+
// literal status of this string.
|
5297
|
+
pm_node_flag_set(embedded, PM_NODE_FLAG_STATIC_LITERAL | PM_STRING_FLAGS_FROZEN);
|
5298
|
+
|
5299
|
+
if (PM_NODE_FLAG_P(node, PM_NODE_FLAG_STATIC_LITERAL)) {
|
5300
|
+
MUTABLE_FLAGS(node);
|
5301
|
+
}
|
5302
|
+
} else if (PM_NODE_TYPE_P(embedded, PM_INTERPOLATED_STRING_NODE) && PM_NODE_FLAG_P(embedded, PM_NODE_FLAG_STATIC_LITERAL)) {
|
5303
|
+
// If the embedded statement is an interpolated string, but that
|
5304
|
+
// string is marked as static literal, then we can keep our
|
5305
|
+
// static literal status for this string.
|
5306
|
+
if (PM_NODE_FLAG_P(node, PM_NODE_FLAG_STATIC_LITERAL)) {
|
5307
|
+
MUTABLE_FLAGS(node);
|
5308
|
+
}
|
5309
|
+
} else {
|
5310
|
+
// In all other cases, we lose the static literal flag here and
|
5311
|
+
// become mutable.
|
5312
|
+
CLEAR_FLAGS(node);
|
5313
|
+
}
|
4988
5314
|
|
4989
|
-
|
4990
|
-
|
5315
|
+
break;
|
5316
|
+
}
|
5317
|
+
case PM_EMBEDDED_VARIABLE_NODE:
|
5318
|
+
// Embedded variables clear static literal, which means we also
|
5319
|
+
// should clear the mutability flags.
|
5320
|
+
CLEAR_FLAGS(node);
|
5321
|
+
break;
|
5322
|
+
default:
|
5323
|
+
assert(false && "unexpected node type");
|
5324
|
+
break;
|
4991
5325
|
}
|
4992
5326
|
|
4993
5327
|
pm_node_list_append(&node->parts, part);
|
4994
|
-
node->base.location.end = MAX(node->base.location.end, part->location.end);
|
4995
5328
|
|
4996
|
-
|
4997
|
-
|
4998
|
-
case PM_OPTIONS_FROZEN_STRING_LITERAL_DISABLED:
|
4999
|
-
pm_node_flag_set((pm_node_t *) node, PM_INTERPOLATED_STRING_NODE_FLAGS_MUTABLE);
|
5000
|
-
break;
|
5001
|
-
case PM_OPTIONS_FROZEN_STRING_LITERAL_ENABLED:
|
5002
|
-
pm_node_flag_set((pm_node_t *) node, PM_INTERPOLATED_STRING_NODE_FLAGS_FROZEN);
|
5003
|
-
break;
|
5004
|
-
}
|
5005
|
-
}
|
5329
|
+
#undef CLEAR_FLAGS
|
5330
|
+
#undef MUTABLE_FLAGS
|
5006
5331
|
}
|
5007
5332
|
|
5008
5333
|
/**
|
@@ -5011,11 +5336,21 @@ pm_interpolated_string_node_append(pm_parser_t *parser, pm_interpolated_string_n
|
|
5011
5336
|
static pm_interpolated_string_node_t *
|
5012
5337
|
pm_interpolated_string_node_create(pm_parser_t *parser, const pm_token_t *opening, const pm_node_list_t *parts, const pm_token_t *closing) {
|
5013
5338
|
pm_interpolated_string_node_t *node = PM_ALLOC_NODE(parser, pm_interpolated_string_node_t);
|
5339
|
+
pm_node_flags_t flags = PM_NODE_FLAG_STATIC_LITERAL;
|
5340
|
+
|
5341
|
+
switch (parser->frozen_string_literal) {
|
5342
|
+
case PM_OPTIONS_FROZEN_STRING_LITERAL_DISABLED:
|
5343
|
+
flags |= PM_INTERPOLATED_STRING_NODE_FLAGS_MUTABLE;
|
5344
|
+
break;
|
5345
|
+
case PM_OPTIONS_FROZEN_STRING_LITERAL_ENABLED:
|
5346
|
+
flags |= PM_INTERPOLATED_STRING_NODE_FLAGS_FROZEN;
|
5347
|
+
break;
|
5348
|
+
}
|
5014
5349
|
|
5015
5350
|
*node = (pm_interpolated_string_node_t) {
|
5016
5351
|
{
|
5017
5352
|
.type = PM_INTERPOLATED_STRING_NODE,
|
5018
|
-
.flags =
|
5353
|
+
.flags = flags,
|
5019
5354
|
.location = {
|
5020
5355
|
.start = opening->start,
|
5021
5356
|
.end = closing->end,
|
@@ -5029,7 +5364,7 @@ pm_interpolated_string_node_create(pm_parser_t *parser, const pm_token_t *openin
|
|
5029
5364
|
if (parts != NULL) {
|
5030
5365
|
pm_node_t *part;
|
5031
5366
|
PM_NODE_LIST_FOREACH(parts, index, part) {
|
5032
|
-
pm_interpolated_string_node_append(
|
5367
|
+
pm_interpolated_string_node_append(node, part);
|
5033
5368
|
}
|
5034
5369
|
}
|
5035
5370
|
|
@@ -5051,15 +5386,7 @@ pm_interpolated_symbol_node_append(pm_interpolated_symbol_node_t *node, pm_node_
|
|
5051
5386
|
node->base.location.start = part->location.start;
|
5052
5387
|
}
|
5053
5388
|
|
5054
|
-
|
5055
|
-
pm_node_flag_set(part, PM_NODE_FLAG_STATIC_LITERAL | PM_STRING_FLAGS_FROZEN);
|
5056
|
-
}
|
5057
|
-
|
5058
|
-
if (!PM_NODE_FLAG_P(part, PM_NODE_FLAG_STATIC_LITERAL)) {
|
5059
|
-
pm_node_flag_unset((pm_node_t *) node, PM_NODE_FLAG_STATIC_LITERAL);
|
5060
|
-
}
|
5061
|
-
|
5062
|
-
pm_node_list_append(&node->parts, part);
|
5389
|
+
pm_interpolated_node_append((pm_node_t *) node, &node->parts, part);
|
5063
5390
|
node->base.location.end = MAX(node->base.location.end, part->location.end);
|
5064
5391
|
}
|
5065
5392
|
|
@@ -5125,11 +5452,7 @@ pm_interpolated_xstring_node_create(pm_parser_t *parser, const pm_token_t *openi
|
|
5125
5452
|
|
5126
5453
|
static inline void
|
5127
5454
|
pm_interpolated_xstring_node_append(pm_interpolated_x_string_node_t *node, pm_node_t *part) {
|
5128
|
-
|
5129
|
-
pm_node_flag_set(part, PM_NODE_FLAG_STATIC_LITERAL | PM_STRING_FLAGS_FROZEN);
|
5130
|
-
}
|
5131
|
-
|
5132
|
-
pm_node_list_append(&node->parts, part);
|
5455
|
+
pm_interpolated_node_append((pm_node_t *) node, &node->parts, part);
|
5133
5456
|
node->base.location.end = part->location.end;
|
5134
5457
|
}
|
5135
5458
|
|
@@ -5341,10 +5664,10 @@ pm_local_variable_operator_write_node_create(pm_parser_t *parser, pm_node_t *tar
|
|
5341
5664
|
}
|
5342
5665
|
},
|
5343
5666
|
.name_loc = target->location,
|
5344
|
-
.
|
5667
|
+
.binary_operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
|
5345
5668
|
.value = value,
|
5346
5669
|
.name = name,
|
5347
|
-
.
|
5670
|
+
.binary_operator = pm_parser_constant_id_location(parser, operator->start, operator->end - 1),
|
5348
5671
|
.depth = depth
|
5349
5672
|
};
|
5350
5673
|
|
@@ -6397,6 +6720,7 @@ pm_return_node_create(pm_parser_t *parser, const pm_token_t *keyword, pm_argumen
|
|
6397
6720
|
*node = (pm_return_node_t) {
|
6398
6721
|
{
|
6399
6722
|
.type = PM_RETURN_NODE,
|
6723
|
+
.flags = 0,
|
6400
6724
|
.location = {
|
6401
6725
|
.start = keyword->start,
|
6402
6726
|
.end = (arguments == NULL ? keyword->end : arguments->base.location.end)
|
@@ -6622,7 +6946,7 @@ pm_statements_node_body_append(pm_parser_t *parser, pm_statements_node_t *node,
|
|
6622
6946
|
case PM_REDO_NODE:
|
6623
6947
|
case PM_RETRY_NODE:
|
6624
6948
|
case PM_RETURN_NODE:
|
6625
|
-
pm_parser_warn_node(parser,
|
6949
|
+
pm_parser_warn_node(parser, statement, PM_WARN_UNREACHABLE_STATEMENT);
|
6626
6950
|
break;
|
6627
6951
|
default:
|
6628
6952
|
break;
|
@@ -6729,7 +7053,8 @@ pm_super_node_create(pm_parser_t *parser, const pm_token_t *keyword, pm_argument
|
|
6729
7053
|
}
|
6730
7054
|
|
6731
7055
|
/**
|
6732
|
-
* Read through the contents of a string and check if it consists solely of
|
7056
|
+
* Read through the contents of a string and check if it consists solely of
|
7057
|
+
* US-ASCII code points.
|
6733
7058
|
*/
|
6734
7059
|
static bool
|
6735
7060
|
pm_ascii_only_p(const pm_string_t *contents) {
|
@@ -6743,27 +7068,72 @@ pm_ascii_only_p(const pm_string_t *contents) {
|
|
6743
7068
|
return true;
|
6744
7069
|
}
|
6745
7070
|
|
7071
|
+
/**
|
7072
|
+
* Validate that the contents of the given symbol are all valid UTF-8.
|
7073
|
+
*/
|
7074
|
+
static void
|
7075
|
+
parse_symbol_encoding_validate_utf8(pm_parser_t *parser, const pm_token_t *location, const pm_string_t *contents) {
|
7076
|
+
for (const uint8_t *cursor = pm_string_source(contents), *end = cursor + pm_string_length(contents); cursor < end;) {
|
7077
|
+
size_t width = pm_encoding_utf_8_char_width(cursor, end - cursor);
|
7078
|
+
|
7079
|
+
if (width == 0) {
|
7080
|
+
pm_parser_err(parser, location->start, location->end, PM_ERR_INVALID_SYMBOL);
|
7081
|
+
break;
|
7082
|
+
}
|
7083
|
+
|
7084
|
+
cursor += width;
|
7085
|
+
}
|
7086
|
+
}
|
7087
|
+
|
7088
|
+
/**
|
7089
|
+
* Validate that the contents of the given symbol are all valid in the encoding
|
7090
|
+
* of the parser.
|
7091
|
+
*/
|
7092
|
+
static void
|
7093
|
+
parse_symbol_encoding_validate_other(pm_parser_t *parser, const pm_token_t *location, const pm_string_t *contents) {
|
7094
|
+
const pm_encoding_t *encoding = parser->encoding;
|
7095
|
+
|
7096
|
+
for (const uint8_t *cursor = pm_string_source(contents), *end = cursor + pm_string_length(contents); cursor < end;) {
|
7097
|
+
size_t width = encoding->char_width(cursor, end - cursor);
|
7098
|
+
|
7099
|
+
if (width == 0) {
|
7100
|
+
pm_parser_err(parser, location->start, location->end, PM_ERR_INVALID_SYMBOL);
|
7101
|
+
break;
|
7102
|
+
}
|
7103
|
+
|
7104
|
+
cursor += width;
|
7105
|
+
}
|
7106
|
+
}
|
7107
|
+
|
6746
7108
|
/**
|
6747
7109
|
* Ruby "downgrades" the encoding of Symbols to US-ASCII if the associated
|
6748
7110
|
* encoding is ASCII-compatible and the Symbol consists only of US-ASCII code
|
6749
7111
|
* points. Otherwise, the encoding may be explicitly set with an escape
|
6750
7112
|
* sequence.
|
7113
|
+
*
|
7114
|
+
* If the validate flag is set, then it will check the contents of the symbol
|
7115
|
+
* to ensure that all characters are valid in the encoding.
|
6751
7116
|
*/
|
6752
7117
|
static inline pm_node_flags_t
|
6753
|
-
parse_symbol_encoding(
|
7118
|
+
parse_symbol_encoding(pm_parser_t *parser, const pm_token_t *location, const pm_string_t *contents, bool validate) {
|
6754
7119
|
if (parser->explicit_encoding != NULL) {
|
6755
7120
|
// A Symbol may optionally have its encoding explicitly set. This will
|
6756
7121
|
// happen if an escape sequence results in a non-ASCII code point.
|
6757
7122
|
if (parser->explicit_encoding == PM_ENCODING_UTF_8_ENTRY) {
|
7123
|
+
if (validate) parse_symbol_encoding_validate_utf8(parser, location, contents);
|
6758
7124
|
return PM_SYMBOL_FLAGS_FORCED_UTF8_ENCODING;
|
6759
7125
|
} else if (parser->encoding == PM_ENCODING_US_ASCII_ENTRY) {
|
6760
7126
|
return PM_SYMBOL_FLAGS_FORCED_BINARY_ENCODING;
|
7127
|
+
} else if (validate) {
|
7128
|
+
parse_symbol_encoding_validate_other(parser, location, contents);
|
6761
7129
|
}
|
6762
7130
|
} else if (pm_ascii_only_p(contents)) {
|
6763
7131
|
// Ruby stipulates that all source files must use an ASCII-compatible
|
6764
7132
|
// encoding. Thus, all symbols appearing in source are eligible for
|
6765
7133
|
// "downgrading" to US-ASCII.
|
6766
7134
|
return PM_SYMBOL_FLAGS_FORCED_US_ASCII_ENCODING;
|
7135
|
+
} else if (validate) {
|
7136
|
+
parse_symbol_encoding_validate_other(parser, location, contents);
|
6767
7137
|
}
|
6768
7138
|
|
6769
7139
|
return 0;
|
@@ -6931,7 +7301,7 @@ pm_symbol_node_create(pm_parser_t *parser, const pm_token_t *opening, const pm_t
|
|
6931
7301
|
*/
|
6932
7302
|
static pm_symbol_node_t *
|
6933
7303
|
pm_symbol_node_create_current_string(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *value, const pm_token_t *closing) {
|
6934
|
-
pm_symbol_node_t *node = pm_symbol_node_create_unescaped(parser, opening, value, closing, &parser->current_string, parse_symbol_encoding(parser, &parser->current_string));
|
7304
|
+
pm_symbol_node_t *node = pm_symbol_node_create_unescaped(parser, opening, value, closing, &parser->current_string, parse_symbol_encoding(parser, value, &parser->current_string, false));
|
6935
7305
|
parser->current_string = PM_STRING_EMPTY;
|
6936
7306
|
return node;
|
6937
7307
|
}
|
@@ -6953,7 +7323,7 @@ pm_symbol_node_label_create(pm_parser_t *parser, const pm_token_t *token) {
|
|
6953
7323
|
|
6954
7324
|
assert((label.end - label.start) >= 0);
|
6955
7325
|
pm_string_shared_init(&node->unescaped, label.start, label.end);
|
6956
|
-
pm_node_flag_set((pm_node_t *) node, parse_symbol_encoding(parser, &node->unescaped));
|
7326
|
+
pm_node_flag_set((pm_node_t *) node, parse_symbol_encoding(parser, &label, &node->unescaped, false));
|
6957
7327
|
|
6958
7328
|
break;
|
6959
7329
|
}
|
@@ -7038,7 +7408,8 @@ pm_string_node_to_symbol_node(pm_parser_t *parser, pm_string_node_t *node, const
|
|
7038
7408
|
.unescaped = node->unescaped
|
7039
7409
|
};
|
7040
7410
|
|
7041
|
-
|
7411
|
+
pm_token_t content = { .type = PM_TOKEN_IDENTIFIER, .start = node->content_loc.start, .end = node->content_loc.end };
|
7412
|
+
pm_node_flag_set((pm_node_t *) new_node, parse_symbol_encoding(parser, &content, &node->unescaped, true));
|
7042
7413
|
|
7043
7414
|
// We are explicitly _not_ using pm_node_destroy here because we don't want
|
7044
7415
|
// to trash the unescaped string. We could instead copy the string if we
|
@@ -7574,7 +7945,7 @@ pm_local_variable_read_node_create_it(pm_parser_t *parser, const pm_token_t *nam
|
|
7574
7945
|
static pm_node_t *
|
7575
7946
|
pm_node_check_it(pm_parser_t *parser, pm_node_t *node) {
|
7576
7947
|
if (
|
7577
|
-
(parser->version !=
|
7948
|
+
(parser->version != PM_OPTIONS_VERSION_CRUBY_3_3) &&
|
7578
7949
|
!parser->current_scope->closed &&
|
7579
7950
|
(parser->current_scope->numbered_parameters != PM_SCOPE_NUMBERED_PARAMETERS_DISALLOWED) &&
|
7580
7951
|
pm_node_is_it(parser, node)
|
@@ -8023,7 +8394,12 @@ parser_lex_magic_comment(pm_parser_t *parser, bool semantic_token_seen) {
|
|
8023
8394
|
// If we have hit a ractor pragma, attempt to lex that.
|
8024
8395
|
uint32_t value_length = (uint32_t) (value_end - value_start);
|
8025
8396
|
if (key_length == 24 && pm_strncasecmp(key_source, (const uint8_t *) "shareable_constant_value", 24) == 0) {
|
8026
|
-
|
8397
|
+
const uint8_t *cursor = parser->current.start;
|
8398
|
+
while ((cursor > parser->start) && ((cursor[-1] == ' ') || (cursor[-1] == '\t'))) cursor--;
|
8399
|
+
|
8400
|
+
if (!((cursor == parser->start) || (cursor[-1] == '\n'))) {
|
8401
|
+
pm_parser_warn_token(parser, &parser->current, PM_WARN_SHAREABLE_CONSTANT_VALUE_LINE);
|
8402
|
+
} else if (value_length == 4 && pm_strncasecmp(value_start, (const uint8_t *) "none", 4) == 0) {
|
8027
8403
|
pm_parser_scope_shareable_constant_set(parser, PM_SCOPE_SHAREABLE_CONSTANT_NONE);
|
8028
8404
|
} else if (value_length == 7 && pm_strncasecmp(value_start, (const uint8_t *) "literal", 7) == 0) {
|
8029
8405
|
pm_parser_scope_shareable_constant_set(parser, PM_SCOPE_SHAREABLE_CONSTANT_LITERAL);
|
@@ -8298,10 +8674,11 @@ context_human(pm_context_t context) {
|
|
8298
8674
|
/* Specific token lexers */
|
8299
8675
|
/******************************************************************************/
|
8300
8676
|
|
8301
|
-
static void
|
8302
|
-
pm_strspn_number_validate(pm_parser_t *parser, const uint8_t *invalid) {
|
8677
|
+
static inline void
|
8678
|
+
pm_strspn_number_validate(pm_parser_t *parser, const uint8_t *string, size_t length, const uint8_t *invalid) {
|
8303
8679
|
if (invalid != NULL) {
|
8304
|
-
|
8680
|
+
pm_diagnostic_id_t diag_id = (invalid == (string + length - 1)) ? PM_ERR_INVALID_NUMBER_UNDERSCORE_TRAILING : PM_ERR_INVALID_NUMBER_UNDERSCORE_INNER;
|
8681
|
+
pm_parser_err(parser, invalid, invalid + 1, diag_id);
|
8305
8682
|
}
|
8306
8683
|
}
|
8307
8684
|
|
@@ -8309,7 +8686,7 @@ static size_t
|
|
8309
8686
|
pm_strspn_binary_number_validate(pm_parser_t *parser, const uint8_t *string) {
|
8310
8687
|
const uint8_t *invalid = NULL;
|
8311
8688
|
size_t length = pm_strspn_binary_number(string, parser->end - string, &invalid);
|
8312
|
-
pm_strspn_number_validate(parser, invalid);
|
8689
|
+
pm_strspn_number_validate(parser, string, length, invalid);
|
8313
8690
|
return length;
|
8314
8691
|
}
|
8315
8692
|
|
@@ -8317,7 +8694,7 @@ static size_t
|
|
8317
8694
|
pm_strspn_octal_number_validate(pm_parser_t *parser, const uint8_t *string) {
|
8318
8695
|
const uint8_t *invalid = NULL;
|
8319
8696
|
size_t length = pm_strspn_octal_number(string, parser->end - string, &invalid);
|
8320
|
-
pm_strspn_number_validate(parser, invalid);
|
8697
|
+
pm_strspn_number_validate(parser, string, length, invalid);
|
8321
8698
|
return length;
|
8322
8699
|
}
|
8323
8700
|
|
@@ -8325,7 +8702,7 @@ static size_t
|
|
8325
8702
|
pm_strspn_decimal_number_validate(pm_parser_t *parser, const uint8_t *string) {
|
8326
8703
|
const uint8_t *invalid = NULL;
|
8327
8704
|
size_t length = pm_strspn_decimal_number(string, parser->end - string, &invalid);
|
8328
|
-
pm_strspn_number_validate(parser, invalid);
|
8705
|
+
pm_strspn_number_validate(parser, string, length, invalid);
|
8329
8706
|
return length;
|
8330
8707
|
}
|
8331
8708
|
|
@@ -8333,7 +8710,7 @@ static size_t
|
|
8333
8710
|
pm_strspn_hexadecimal_number_validate(pm_parser_t *parser, const uint8_t *string) {
|
8334
8711
|
const uint8_t *invalid = NULL;
|
8335
8712
|
size_t length = pm_strspn_hexadecimal_number(string, parser->end - string, &invalid);
|
8336
|
-
pm_strspn_number_validate(parser, invalid);
|
8713
|
+
pm_strspn_number_validate(parser, string, length, invalid);
|
8337
8714
|
return length;
|
8338
8715
|
}
|
8339
8716
|
|
@@ -8395,6 +8772,7 @@ lex_numeric_prefix(pm_parser_t *parser, bool* seen_e) {
|
|
8395
8772
|
if (pm_char_is_decimal_digit(peek(parser))) {
|
8396
8773
|
parser->current.end += pm_strspn_decimal_number_validate(parser, parser->current.end);
|
8397
8774
|
} else {
|
8775
|
+
match(parser, '_');
|
8398
8776
|
pm_parser_err_current(parser, PM_ERR_INVALID_NUMBER_DECIMAL);
|
8399
8777
|
}
|
8400
8778
|
|
@@ -8407,6 +8785,7 @@ lex_numeric_prefix(pm_parser_t *parser, bool* seen_e) {
|
|
8407
8785
|
if (pm_char_is_binary_digit(peek(parser))) {
|
8408
8786
|
parser->current.end += pm_strspn_binary_number_validate(parser, parser->current.end);
|
8409
8787
|
} else {
|
8788
|
+
match(parser, '_');
|
8410
8789
|
pm_parser_err_current(parser, PM_ERR_INVALID_NUMBER_BINARY);
|
8411
8790
|
}
|
8412
8791
|
|
@@ -8420,6 +8799,7 @@ lex_numeric_prefix(pm_parser_t *parser, bool* seen_e) {
|
|
8420
8799
|
if (pm_char_is_octal_digit(peek(parser))) {
|
8421
8800
|
parser->current.end += pm_strspn_octal_number_validate(parser, parser->current.end);
|
8422
8801
|
} else {
|
8802
|
+
match(parser, '_');
|
8423
8803
|
pm_parser_err_current(parser, PM_ERR_INVALID_NUMBER_OCTAL);
|
8424
8804
|
}
|
8425
8805
|
|
@@ -8447,6 +8827,7 @@ lex_numeric_prefix(pm_parser_t *parser, bool* seen_e) {
|
|
8447
8827
|
if (pm_char_is_hexadecimal_digit(peek(parser))) {
|
8448
8828
|
parser->current.end += pm_strspn_hexadecimal_number_validate(parser, parser->current.end);
|
8449
8829
|
} else {
|
8830
|
+
match(parser, '_');
|
8450
8831
|
pm_parser_err_current(parser, PM_ERR_INVALID_NUMBER_HEXADECIMAL);
|
8451
8832
|
}
|
8452
8833
|
|
@@ -8475,6 +8856,16 @@ lex_numeric_prefix(pm_parser_t *parser, bool* seen_e) {
|
|
8475
8856
|
type = lex_optional_float_suffix(parser, seen_e);
|
8476
8857
|
}
|
8477
8858
|
|
8859
|
+
// At this point we have a completed number, but we want to provide the user
|
8860
|
+
// with a good experience if they put an additional .xxx fractional
|
8861
|
+
// component on the end, so we'll check for that here.
|
8862
|
+
if (peek_offset(parser, 0) == '.' && pm_char_is_decimal_digit(peek_offset(parser, 1))) {
|
8863
|
+
const uint8_t *fraction_start = parser->current.end;
|
8864
|
+
const uint8_t *fraction_end = parser->current.end + 2;
|
8865
|
+
fraction_end += pm_strspn_decimal_digit(fraction_end, parser->end - fraction_end);
|
8866
|
+
pm_parser_err(parser, fraction_start, fraction_end, PM_ERR_INVALID_NUMBER_FRACTION);
|
8867
|
+
}
|
8868
|
+
|
8478
8869
|
return type;
|
8479
8870
|
}
|
8480
8871
|
|
@@ -8567,7 +8958,7 @@ lex_global_variable(pm_parser_t *parser) {
|
|
8567
8958
|
} while (parser->current.end < parser->end && (width = char_is_identifier(parser, parser->current.end)) > 0);
|
8568
8959
|
|
8569
8960
|
// $0 isn't allowed to be followed by anything.
|
8570
|
-
pm_diagnostic_id_t diag_id = parser->version ==
|
8961
|
+
pm_diagnostic_id_t diag_id = parser->version == PM_OPTIONS_VERSION_CRUBY_3_3 ? PM_ERR_INVALID_VARIABLE_GLOBAL_3_3 : PM_ERR_INVALID_VARIABLE_GLOBAL;
|
8571
8962
|
PM_PARSER_ERR_TOKEN_FORMAT_CONTENT(parser, parser->current, diag_id);
|
8572
8963
|
}
|
8573
8964
|
|
@@ -8603,7 +8994,7 @@ lex_global_variable(pm_parser_t *parser) {
|
|
8603
8994
|
} else {
|
8604
8995
|
// If we get here, then we have a $ followed by something that
|
8605
8996
|
// isn't recognized as a global variable.
|
8606
|
-
pm_diagnostic_id_t diag_id = parser->version ==
|
8997
|
+
pm_diagnostic_id_t diag_id = parser->version == PM_OPTIONS_VERSION_CRUBY_3_3 ? PM_ERR_INVALID_VARIABLE_GLOBAL_3_3 : PM_ERR_INVALID_VARIABLE_GLOBAL;
|
8607
8998
|
size_t width = parser->encoding->char_width(parser->current.end, parser->end - parser->current.end);
|
8608
8999
|
PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, diag_id, (int) ((parser->current.end + width) - parser->current.start), (const char *) parser->current.start);
|
8609
9000
|
}
|
@@ -8976,12 +9367,20 @@ escape_hexadecimal_digit(const uint8_t value) {
|
|
8976
9367
|
* validated.
|
8977
9368
|
*/
|
8978
9369
|
static inline uint32_t
|
8979
|
-
escape_unicode(const uint8_t *string, size_t length) {
|
9370
|
+
escape_unicode(pm_parser_t *parser, const uint8_t *string, size_t length) {
|
8980
9371
|
uint32_t value = 0;
|
8981
9372
|
for (size_t index = 0; index < length; index++) {
|
8982
9373
|
if (index != 0) value <<= 4;
|
8983
9374
|
value |= escape_hexadecimal_digit(string[index]);
|
8984
9375
|
}
|
9376
|
+
|
9377
|
+
// Here we're going to verify that the value is actually a valid Unicode
|
9378
|
+
// codepoint and not a surrogate pair.
|
9379
|
+
if (value >= 0xD800 && value <= 0xDFFF) {
|
9380
|
+
pm_parser_err(parser, string, string + length, PM_ERR_ESCAPE_INVALID_UNICODE);
|
9381
|
+
return 0xFFFD;
|
9382
|
+
}
|
9383
|
+
|
8985
9384
|
return value;
|
8986
9385
|
}
|
8987
9386
|
|
@@ -9230,7 +9629,7 @@ escape_read(pm_parser_t *parser, pm_buffer_t *buffer, pm_buffer_t *regular_expre
|
|
9230
9629
|
pm_buffer_append_bytes(regular_expression_buffer, start, (size_t) (parser->current.end - start));
|
9231
9630
|
}
|
9232
9631
|
|
9233
|
-
escape_write_byte_encoded(parser, buffer, value);
|
9632
|
+
escape_write_byte_encoded(parser, buffer, escape_byte(value, flags));
|
9234
9633
|
} else {
|
9235
9634
|
pm_parser_err_current(parser, PM_ERR_ESCAPE_INVALID_HEXADECIMAL);
|
9236
9635
|
}
|
@@ -9241,22 +9640,7 @@ escape_read(pm_parser_t *parser, pm_buffer_t *buffer, pm_buffer_t *regular_expre
|
|
9241
9640
|
const uint8_t *start = parser->current.end - 1;
|
9242
9641
|
parser->current.end++;
|
9243
9642
|
|
9244
|
-
if (
|
9245
|
-
(parser->current.end + 4 <= parser->end) &&
|
9246
|
-
pm_char_is_hexadecimal_digit(parser->current.end[0]) &&
|
9247
|
-
pm_char_is_hexadecimal_digit(parser->current.end[1]) &&
|
9248
|
-
pm_char_is_hexadecimal_digit(parser->current.end[2]) &&
|
9249
|
-
pm_char_is_hexadecimal_digit(parser->current.end[3])
|
9250
|
-
) {
|
9251
|
-
uint32_t value = escape_unicode(parser->current.end, 4);
|
9252
|
-
|
9253
|
-
if (flags & PM_ESCAPE_FLAG_REGEXP) {
|
9254
|
-
pm_buffer_append_bytes(regular_expression_buffer, start, (size_t) (parser->current.end + 4 - start));
|
9255
|
-
}
|
9256
|
-
escape_write_unicode(parser, buffer, flags, start, parser->current.end + 4, value);
|
9257
|
-
|
9258
|
-
parser->current.end += 4;
|
9259
|
-
} else if (peek(parser) == '{') {
|
9643
|
+
if (peek(parser) == '{') {
|
9260
9644
|
const uint8_t *unicode_codepoints_start = parser->current.end - 2;
|
9261
9645
|
|
9262
9646
|
parser->current.end++;
|
@@ -9284,7 +9668,7 @@ escape_read(pm_parser_t *parser, pm_buffer_t *buffer, pm_buffer_t *regular_expre
|
|
9284
9668
|
extra_codepoints_start = unicode_start;
|
9285
9669
|
}
|
9286
9670
|
|
9287
|
-
uint32_t value = escape_unicode(unicode_start, hexadecimal_length);
|
9671
|
+
uint32_t value = escape_unicode(parser, unicode_start, hexadecimal_length);
|
9288
9672
|
escape_write_unicode(parser, buffer, flags, unicode_start, parser->current.end, value);
|
9289
9673
|
|
9290
9674
|
parser->current.end += pm_strspn_whitespace(parser->current.end, parser->end - parser->current.end);
|
@@ -9306,7 +9690,25 @@ escape_read(pm_parser_t *parser, pm_buffer_t *buffer, pm_buffer_t *regular_expre
|
|
9306
9690
|
pm_buffer_append_bytes(regular_expression_buffer, unicode_codepoints_start, (size_t) (parser->current.end - unicode_codepoints_start));
|
9307
9691
|
}
|
9308
9692
|
} else {
|
9309
|
-
|
9693
|
+
size_t length = pm_strspn_hexadecimal_digit(parser->current.end, MIN(parser->end - parser->current.end, 4));
|
9694
|
+
|
9695
|
+
if (length == 4) {
|
9696
|
+
uint32_t value = escape_unicode(parser, parser->current.end, 4);
|
9697
|
+
|
9698
|
+
if (flags & PM_ESCAPE_FLAG_REGEXP) {
|
9699
|
+
pm_buffer_append_bytes(regular_expression_buffer, start, (size_t) (parser->current.end + 4 - start));
|
9700
|
+
}
|
9701
|
+
|
9702
|
+
escape_write_unicode(parser, buffer, flags, start, parser->current.end + 4, value);
|
9703
|
+
parser->current.end += 4;
|
9704
|
+
} else {
|
9705
|
+
parser->current.end += length;
|
9706
|
+
pm_parser_err_current(parser, PM_ERR_ESCAPE_INVALID_UNICODE);
|
9707
|
+
}
|
9708
|
+
}
|
9709
|
+
|
9710
|
+
if (flags & (PM_ESCAPE_FLAG_CONTROL | PM_ESCAPE_FLAG_META)) {
|
9711
|
+
pm_parser_err(parser, start, parser->current.end, PM_ERR_INVALID_ESCAPE_CHARACTER);
|
9310
9712
|
}
|
9311
9713
|
|
9312
9714
|
return;
|
@@ -9560,8 +9962,8 @@ lex_at_variable(pm_parser_t *parser) {
|
|
9560
9962
|
}
|
9561
9963
|
} else if (parser->current.end < parser->end && pm_char_is_decimal_digit(*parser->current.end)) {
|
9562
9964
|
pm_diagnostic_id_t diag_id = (type == PM_TOKEN_CLASS_VARIABLE) ? PM_ERR_INCOMPLETE_VARIABLE_CLASS : PM_ERR_INCOMPLETE_VARIABLE_INSTANCE;
|
9563
|
-
if (parser->version ==
|
9564
|
-
diag_id = (type == PM_TOKEN_CLASS_VARIABLE) ?
|
9965
|
+
if (parser->version == PM_OPTIONS_VERSION_CRUBY_3_3) {
|
9966
|
+
diag_id = (type == PM_TOKEN_CLASS_VARIABLE) ? PM_ERR_INCOMPLETE_VARIABLE_CLASS_3_3 : PM_ERR_INCOMPLETE_VARIABLE_INSTANCE_3_3;
|
9565
9967
|
}
|
9566
9968
|
|
9567
9969
|
size_t width = parser->encoding->char_width(parser->current.end, parser->end - parser->current.end);
|
@@ -10545,8 +10947,11 @@ parser_lex(pm_parser_t *parser) {
|
|
10545
10947
|
}
|
10546
10948
|
|
10547
10949
|
size_t ident_length = (size_t) (parser->current.end - ident_start);
|
10950
|
+
bool ident_error = false;
|
10951
|
+
|
10548
10952
|
if (quote != PM_HEREDOC_QUOTE_NONE && !match(parser, (uint8_t) quote)) {
|
10549
|
-
|
10953
|
+
pm_parser_err(parser, ident_start, ident_start + ident_length, PM_ERR_HEREDOC_IDENTIFIER);
|
10954
|
+
ident_error = true;
|
10550
10955
|
}
|
10551
10956
|
|
10552
10957
|
parser->explicit_encoding = NULL;
|
@@ -10571,7 +10976,7 @@ parser_lex(pm_parser_t *parser) {
|
|
10571
10976
|
// this is not a valid heredoc declaration. In this case we
|
10572
10977
|
// will add an error, but we will still return a heredoc
|
10573
10978
|
// start.
|
10574
|
-
|
10979
|
+
if (!ident_error) pm_parser_err_heredoc_term(parser, parser->lex_modes.current);
|
10575
10980
|
body_start = parser->end;
|
10576
10981
|
} else {
|
10577
10982
|
// Otherwise, we want to indicate that the body of the
|
@@ -11898,7 +12303,7 @@ parser_lex(pm_parser_t *parser) {
|
|
11898
12303
|
// terminator) but still continue parsing so that content after the
|
11899
12304
|
// declaration of the heredoc can be parsed.
|
11900
12305
|
if (parser->current.end >= parser->end) {
|
11901
|
-
|
12306
|
+
pm_parser_err_heredoc_term(parser, lex_mode);
|
11902
12307
|
parser->next_start = lex_mode->as.heredoc.next_start;
|
11903
12308
|
parser->heredoc_end = parser->current.end;
|
11904
12309
|
lex_state_set(parser, PM_LEX_STATE_END);
|
@@ -12537,6 +12942,23 @@ expect3(pm_parser_t *parser, pm_token_type_t type1, pm_token_type_t type2, pm_to
|
|
12537
12942
|
parser->previous.type = PM_TOKEN_MISSING;
|
12538
12943
|
}
|
12539
12944
|
|
12945
|
+
/**
|
12946
|
+
* A special expect1 that expects a heredoc terminator and handles popping the
|
12947
|
+
* lex mode accordingly.
|
12948
|
+
*/
|
12949
|
+
static void
|
12950
|
+
expect1_heredoc_term(pm_parser_t *parser, pm_lex_mode_t *lex_mode) {
|
12951
|
+
if (match1(parser, PM_TOKEN_HEREDOC_END)) {
|
12952
|
+
lex_mode_pop(parser);
|
12953
|
+
parser_lex(parser);
|
12954
|
+
} else {
|
12955
|
+
pm_parser_err_heredoc_term(parser, lex_mode);
|
12956
|
+
lex_mode_pop(parser);
|
12957
|
+
parser->previous.start = parser->previous.end;
|
12958
|
+
parser->previous.type = PM_TOKEN_MISSING;
|
12959
|
+
}
|
12960
|
+
}
|
12961
|
+
|
12540
12962
|
static pm_node_t *
|
12541
12963
|
parse_expression(pm_parser_t *parser, pm_binding_power_t binding_power, bool accepts_command_call, pm_diagnostic_id_t diag_id);
|
12542
12964
|
|
@@ -12664,25 +13086,72 @@ parse_write_name(pm_parser_t *parser, pm_constant_id_t *name_field) {
|
|
12664
13086
|
*name_field = pm_constant_pool_insert_owned(&parser->constant_pool, name, length + 1);
|
12665
13087
|
}
|
12666
13088
|
|
13089
|
+
/**
|
13090
|
+
* Certain expressions are not targetable, but in order to provide a better
|
13091
|
+
* experience we give a specific error message. In order to maintain as much
|
13092
|
+
* information in the tree as possible, we replace them with local variable
|
13093
|
+
* writes.
|
13094
|
+
*/
|
13095
|
+
static pm_node_t *
|
13096
|
+
parse_unwriteable_target(pm_parser_t *parser, pm_node_t *target) {
|
13097
|
+
switch (PM_NODE_TYPE(target)) {
|
13098
|
+
case PM_SOURCE_ENCODING_NODE: pm_parser_err_node(parser, target, PM_ERR_EXPRESSION_NOT_WRITABLE_ENCODING); break;
|
13099
|
+
case PM_FALSE_NODE: pm_parser_err_node(parser, target, PM_ERR_EXPRESSION_NOT_WRITABLE_FALSE); break;
|
13100
|
+
case PM_SOURCE_FILE_NODE: pm_parser_err_node(parser, target, PM_ERR_EXPRESSION_NOT_WRITABLE_FILE); break;
|
13101
|
+
case PM_SOURCE_LINE_NODE: pm_parser_err_node(parser, target, PM_ERR_EXPRESSION_NOT_WRITABLE_LINE); break;
|
13102
|
+
case PM_NIL_NODE: pm_parser_err_node(parser, target, PM_ERR_EXPRESSION_NOT_WRITABLE_NIL); break;
|
13103
|
+
case PM_SELF_NODE: pm_parser_err_node(parser, target, PM_ERR_EXPRESSION_NOT_WRITABLE_SELF); break;
|
13104
|
+
case PM_TRUE_NODE: pm_parser_err_node(parser, target, PM_ERR_EXPRESSION_NOT_WRITABLE_TRUE); break;
|
13105
|
+
default: break;
|
13106
|
+
}
|
13107
|
+
|
13108
|
+
pm_constant_id_t name = pm_parser_constant_id_location(parser, target->location.start, target->location.end);
|
13109
|
+
pm_local_variable_target_node_t *result = pm_local_variable_target_node_create(parser, &target->location, name, 0);
|
13110
|
+
|
13111
|
+
pm_node_destroy(parser, target);
|
13112
|
+
return (pm_node_t *) result;
|
13113
|
+
}
|
13114
|
+
|
12667
13115
|
/**
|
12668
13116
|
* Convert the given node into a valid target node.
|
12669
13117
|
*/
|
12670
13118
|
static pm_node_t *
|
12671
|
-
parse_target(pm_parser_t *parser, pm_node_t *target) {
|
13119
|
+
parse_target(pm_parser_t *parser, pm_node_t *target, bool multiple) {
|
12672
13120
|
switch (PM_NODE_TYPE(target)) {
|
12673
13121
|
case PM_MISSING_NODE:
|
12674
13122
|
return target;
|
13123
|
+
case PM_SOURCE_ENCODING_NODE:
|
13124
|
+
case PM_FALSE_NODE:
|
13125
|
+
case PM_SOURCE_FILE_NODE:
|
13126
|
+
case PM_SOURCE_LINE_NODE:
|
13127
|
+
case PM_NIL_NODE:
|
13128
|
+
case PM_SELF_NODE:
|
13129
|
+
case PM_TRUE_NODE: {
|
13130
|
+
// In these special cases, we have specific error messages and we
|
13131
|
+
// will replace them with local variable writes.
|
13132
|
+
return parse_unwriteable_target(parser, target);
|
13133
|
+
}
|
12675
13134
|
case PM_CLASS_VARIABLE_READ_NODE:
|
12676
13135
|
assert(sizeof(pm_class_variable_target_node_t) == sizeof(pm_class_variable_read_node_t));
|
12677
13136
|
target->type = PM_CLASS_VARIABLE_TARGET_NODE;
|
12678
13137
|
return target;
|
12679
13138
|
case PM_CONSTANT_PATH_NODE:
|
13139
|
+
if (context_def_p(parser)) {
|
13140
|
+
pm_parser_err_node(parser, target, PM_ERR_WRITE_TARGET_IN_METHOD);
|
13141
|
+
}
|
13142
|
+
|
12680
13143
|
assert(sizeof(pm_constant_path_target_node_t) == sizeof(pm_constant_path_node_t));
|
12681
13144
|
target->type = PM_CONSTANT_PATH_TARGET_NODE;
|
13145
|
+
|
12682
13146
|
return target;
|
12683
13147
|
case PM_CONSTANT_READ_NODE:
|
13148
|
+
if (context_def_p(parser)) {
|
13149
|
+
pm_parser_err_node(parser, target, PM_ERR_WRITE_TARGET_IN_METHOD);
|
13150
|
+
}
|
13151
|
+
|
12684
13152
|
assert(sizeof(pm_constant_target_node_t) == sizeof(pm_constant_read_node_t));
|
12685
13153
|
target->type = PM_CONSTANT_TARGET_NODE;
|
13154
|
+
|
12686
13155
|
return target;
|
12687
13156
|
case PM_BACK_REFERENCE_READ_NODE:
|
12688
13157
|
case PM_NUMBERED_REFERENCE_READ_NODE:
|
@@ -12715,7 +13184,7 @@ parse_target(pm_parser_t *parser, pm_node_t *target) {
|
|
12715
13184
|
pm_splat_node_t *splat = (pm_splat_node_t *) target;
|
12716
13185
|
|
12717
13186
|
if (splat->expression != NULL) {
|
12718
|
-
splat->expression = parse_target(parser, splat->expression);
|
13187
|
+
splat->expression = parse_target(parser, splat->expression, multiple);
|
12719
13188
|
}
|
12720
13189
|
|
12721
13190
|
return (pm_node_t *) splat;
|
@@ -12753,6 +13222,10 @@ parse_target(pm_parser_t *parser, pm_node_t *target) {
|
|
12753
13222
|
}
|
12754
13223
|
|
12755
13224
|
if (*call->message_loc.start == '_' || parser->encoding->alnum_char(call->message_loc.start, call->message_loc.end - call->message_loc.start)) {
|
13225
|
+
if (multiple && PM_NODE_FLAG_P(call, PM_CALL_NODE_FLAGS_SAFE_NAVIGATION)) {
|
13226
|
+
pm_parser_err_node(parser, (const pm_node_t *) call, PM_ERR_UNEXPECTED_SAFE_NAVIGATION);
|
13227
|
+
}
|
13228
|
+
|
12756
13229
|
parse_write_name(parser, &call->name);
|
12757
13230
|
return (pm_node_t *) pm_call_target_node_create(parser, call);
|
12758
13231
|
}
|
@@ -12780,8 +13253,8 @@ parse_target(pm_parser_t *parser, pm_node_t *target) {
|
|
12780
13253
|
* assignment.
|
12781
13254
|
*/
|
12782
13255
|
static pm_node_t *
|
12783
|
-
parse_target_validate(pm_parser_t *parser, pm_node_t *target) {
|
12784
|
-
pm_node_t *result = parse_target(parser, target);
|
13256
|
+
parse_target_validate(pm_parser_t *parser, pm_node_t *target, bool multiple) {
|
13257
|
+
pm_node_t *result = parse_target(parser, target, multiple);
|
12785
13258
|
|
12786
13259
|
// Ensure that we have one of an =, an 'in' in for indexes, and a ')' in parens after the targets.
|
12787
13260
|
if (
|
@@ -12826,13 +13299,20 @@ parse_write(pm_parser_t *parser, pm_node_t *target, pm_token_t *operator, pm_nod
|
|
12826
13299
|
}
|
12827
13300
|
case PM_CONSTANT_PATH_NODE: {
|
12828
13301
|
pm_node_t *node = (pm_node_t *) pm_constant_path_write_node_create(parser, (pm_constant_path_node_t *) target, operator, value);
|
13302
|
+
|
13303
|
+
if (context_def_p(parser)) {
|
13304
|
+
pm_parser_err_node(parser, node, PM_ERR_WRITE_TARGET_IN_METHOD);
|
13305
|
+
}
|
13306
|
+
|
12829
13307
|
return parse_shareable_constant_write(parser, node);
|
12830
13308
|
}
|
12831
13309
|
case PM_CONSTANT_READ_NODE: {
|
12832
13310
|
pm_node_t *node = (pm_node_t *) pm_constant_write_node_create(parser, (pm_constant_read_node_t *) target, operator, value);
|
13311
|
+
|
12833
13312
|
if (context_def_p(parser)) {
|
12834
13313
|
pm_parser_err_node(parser, node, PM_ERR_WRITE_TARGET_IN_METHOD);
|
12835
13314
|
}
|
13315
|
+
|
12836
13316
|
pm_node_destroy(parser, target);
|
12837
13317
|
return parse_shareable_constant_write(parser, node);
|
12838
13318
|
}
|
@@ -13011,7 +13491,7 @@ parse_targets(pm_parser_t *parser, pm_node_t *first_target, pm_binding_power_t b
|
|
13011
13491
|
bool has_rest = PM_NODE_TYPE_P(first_target, PM_SPLAT_NODE);
|
13012
13492
|
|
13013
13493
|
pm_multi_target_node_t *result = pm_multi_target_node_create(parser);
|
13014
|
-
pm_multi_target_node_targets_append(parser, result, parse_target(parser, first_target));
|
13494
|
+
pm_multi_target_node_targets_append(parser, result, parse_target(parser, first_target, true));
|
13015
13495
|
|
13016
13496
|
while (accept1(parser, PM_TOKEN_COMMA)) {
|
13017
13497
|
if (accept1(parser, PM_TOKEN_USTAR)) {
|
@@ -13027,7 +13507,7 @@ parse_targets(pm_parser_t *parser, pm_node_t *first_target, pm_binding_power_t b
|
|
13027
13507
|
|
13028
13508
|
if (token_begins_expression_p(parser->current.type)) {
|
13029
13509
|
name = parse_expression(parser, binding_power, false, PM_ERR_EXPECT_EXPRESSION_AFTER_STAR);
|
13030
|
-
name = parse_target(parser, name);
|
13510
|
+
name = parse_target(parser, name, true);
|
13031
13511
|
}
|
13032
13512
|
|
13033
13513
|
pm_node_t *splat = (pm_node_t *) pm_splat_node_create(parser, &star_operator, name);
|
@@ -13035,7 +13515,7 @@ parse_targets(pm_parser_t *parser, pm_node_t *first_target, pm_binding_power_t b
|
|
13035
13515
|
has_rest = true;
|
13036
13516
|
} else if (token_begins_expression_p(parser->current.type)) {
|
13037
13517
|
pm_node_t *target = parse_expression(parser, binding_power, false, PM_ERR_EXPECT_EXPRESSION_AFTER_COMMA);
|
13038
|
-
target = parse_target(parser, target);
|
13518
|
+
target = parse_target(parser, target, true);
|
13039
13519
|
|
13040
13520
|
pm_multi_target_node_targets_append(parser, result, target);
|
13041
13521
|
} else if (!match1(parser, PM_TOKEN_EOF)) {
|
@@ -13152,11 +13632,11 @@ parse_statements(pm_parser_t *parser, pm_context_t context) {
|
|
13152
13632
|
*/
|
13153
13633
|
static void
|
13154
13634
|
pm_hash_key_static_literals_add(pm_parser_t *parser, pm_static_literals_t *literals, pm_node_t *node) {
|
13155
|
-
const pm_node_t *duplicated = pm_static_literals_add(parser, literals, node);
|
13635
|
+
const pm_node_t *duplicated = pm_static_literals_add(&parser->newline_list, parser->start_line, literals, node);
|
13156
13636
|
|
13157
13637
|
if (duplicated != NULL) {
|
13158
13638
|
pm_buffer_t buffer = { 0 };
|
13159
|
-
pm_static_literal_inspect(&buffer, parser, duplicated);
|
13639
|
+
pm_static_literal_inspect(&buffer, &parser->newline_list, parser->start_line, parser->encoding->name, duplicated);
|
13160
13640
|
|
13161
13641
|
pm_diagnostic_list_append_format(
|
13162
13642
|
&parser->warning_list,
|
@@ -13178,7 +13658,7 @@ pm_hash_key_static_literals_add(pm_parser_t *parser, pm_static_literals_t *liter
|
|
13178
13658
|
*/
|
13179
13659
|
static void
|
13180
13660
|
pm_when_clause_static_literals_add(pm_parser_t *parser, pm_static_literals_t *literals, pm_node_t *node) {
|
13181
|
-
if (pm_static_literals_add(parser, literals, node) != NULL) {
|
13661
|
+
if (pm_static_literals_add(&parser->newline_list, parser->start_line, literals, node) != NULL) {
|
13182
13662
|
pm_diagnostic_list_append_format(
|
13183
13663
|
&parser->warning_list,
|
13184
13664
|
node->location.start,
|
@@ -13206,10 +13686,16 @@ parse_assocs(pm_parser_t *parser, pm_static_literals_t *literals, pm_node_t *nod
|
|
13206
13686
|
pm_token_t operator = parser->previous;
|
13207
13687
|
pm_node_t *value = NULL;
|
13208
13688
|
|
13209
|
-
if (
|
13689
|
+
if (match1(parser, PM_TOKEN_BRACE_LEFT)) {
|
13690
|
+
// If we're about to parse a nested hash that is being
|
13691
|
+
// pushed into this hash directly with **, then we want the
|
13692
|
+
// inner hash to share the static literals with the outer
|
13693
|
+
// hash.
|
13694
|
+
parser->current_hash_keys = literals;
|
13210
13695
|
value = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, false, PM_ERR_EXPECT_EXPRESSION_AFTER_SPLAT_HASH);
|
13211
|
-
}
|
13212
|
-
|
13696
|
+
} else if (token_begins_expression_p(parser->current.type)) {
|
13697
|
+
value = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, false, PM_ERR_EXPECT_EXPRESSION_AFTER_SPLAT_HASH);
|
13698
|
+
} else {
|
13213
13699
|
pm_parser_scope_forwarding_keywords_check(parser, &operator);
|
13214
13700
|
}
|
13215
13701
|
|
@@ -13360,15 +13846,16 @@ parse_arguments(pm_parser_t *parser, pm_arguments_t *arguments, bool accepts_for
|
|
13360
13846
|
pm_keyword_hash_node_t *hash = pm_keyword_hash_node_create(parser);
|
13361
13847
|
argument = (pm_node_t *) hash;
|
13362
13848
|
|
13363
|
-
pm_static_literals_t
|
13364
|
-
bool contains_keyword_splat = parse_assocs(parser, &
|
13849
|
+
pm_static_literals_t hash_keys = { 0 };
|
13850
|
+
bool contains_keyword_splat = parse_assocs(parser, &hash_keys, (pm_node_t *) hash);
|
13365
13851
|
|
13366
13852
|
parse_arguments_append(parser, arguments, argument);
|
13367
|
-
if (contains_keyword_splat) {
|
13368
|
-
pm_node_flag_set((pm_node_t *)arguments->arguments, PM_ARGUMENTS_NODE_FLAGS_CONTAINS_KEYWORD_SPLAT);
|
13369
|
-
}
|
13370
13853
|
|
13371
|
-
|
13854
|
+
pm_node_flags_t flags = PM_ARGUMENTS_NODE_FLAGS_CONTAINS_KEYWORDS;
|
13855
|
+
if (contains_keyword_splat) flags |= PM_ARGUMENTS_NODE_FLAGS_CONTAINS_KEYWORD_SPLAT;
|
13856
|
+
pm_node_flag_set((pm_node_t *) arguments->arguments, flags);
|
13857
|
+
|
13858
|
+
pm_static_literals_free(&hash_keys);
|
13372
13859
|
parsed_bare_hash = true;
|
13373
13860
|
|
13374
13861
|
break;
|
@@ -13444,7 +13931,9 @@ parse_arguments(pm_parser_t *parser, pm_arguments_t *arguments, bool accepts_for
|
|
13444
13931
|
argument = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, !parsed_first_argument, PM_ERR_EXPECT_ARGUMENT);
|
13445
13932
|
}
|
13446
13933
|
|
13934
|
+
bool contains_keywords = false;
|
13447
13935
|
bool contains_keyword_splat = false;
|
13936
|
+
|
13448
13937
|
if (pm_symbol_node_label_p(argument) || accept1(parser, PM_TOKEN_EQUAL_GREATER)) {
|
13449
13938
|
if (parsed_bare_hash) {
|
13450
13939
|
pm_parser_err_previous(parser, PM_ERR_ARGUMENT_BARE_HASH);
|
@@ -13458,10 +13947,11 @@ parse_arguments(pm_parser_t *parser, pm_arguments_t *arguments, bool accepts_for
|
|
13458
13947
|
}
|
13459
13948
|
|
13460
13949
|
pm_keyword_hash_node_t *bare_hash = pm_keyword_hash_node_create(parser);
|
13950
|
+
contains_keywords = true;
|
13461
13951
|
|
13462
13952
|
// Create the set of static literals for this hash.
|
13463
|
-
pm_static_literals_t
|
13464
|
-
pm_hash_key_static_literals_add(parser, &
|
13953
|
+
pm_static_literals_t hash_keys = { 0 };
|
13954
|
+
pm_hash_key_static_literals_add(parser, &hash_keys, argument);
|
13465
13955
|
|
13466
13956
|
// Finish parsing the one we are part way through.
|
13467
13957
|
pm_node_t *value = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, false, PM_ERR_HASH_VALUE);
|
@@ -13475,10 +13965,10 @@ parse_arguments(pm_parser_t *parser, pm_arguments_t *arguments, bool accepts_for
|
|
13475
13965
|
token_begins_expression_p(parser->current.type) ||
|
13476
13966
|
match2(parser, PM_TOKEN_USTAR_STAR, PM_TOKEN_LABEL)
|
13477
13967
|
)) {
|
13478
|
-
contains_keyword_splat = parse_assocs(parser, &
|
13968
|
+
contains_keyword_splat = parse_assocs(parser, &hash_keys, (pm_node_t *) bare_hash);
|
13479
13969
|
}
|
13480
13970
|
|
13481
|
-
pm_static_literals_free(&
|
13971
|
+
pm_static_literals_free(&hash_keys);
|
13482
13972
|
parsed_bare_hash = true;
|
13483
13973
|
} else if (accept1(parser, PM_TOKEN_KEYWORD_IN)) {
|
13484
13974
|
// TODO: Could we solve this with binding powers instead?
|
@@ -13486,9 +13976,12 @@ parse_arguments(pm_parser_t *parser, pm_arguments_t *arguments, bool accepts_for
|
|
13486
13976
|
}
|
13487
13977
|
|
13488
13978
|
parse_arguments_append(parser, arguments, argument);
|
13489
|
-
|
13490
|
-
|
13491
|
-
|
13979
|
+
|
13980
|
+
pm_node_flags_t flags = 0;
|
13981
|
+
if (contains_keywords) flags |= PM_ARGUMENTS_NODE_FLAGS_CONTAINS_KEYWORDS;
|
13982
|
+
if (contains_keyword_splat) flags |= PM_ARGUMENTS_NODE_FLAGS_CONTAINS_KEYWORD_SPLAT;
|
13983
|
+
pm_node_flag_set((pm_node_t *) arguments->arguments, flags);
|
13984
|
+
|
13492
13985
|
break;
|
13493
13986
|
}
|
13494
13987
|
}
|
@@ -13601,7 +14094,6 @@ typedef enum {
|
|
13601
14094
|
PM_PARAMETERS_ORDER_OPTIONAL,
|
13602
14095
|
PM_PARAMETERS_ORDER_NAMED,
|
13603
14096
|
PM_PARAMETERS_ORDER_NONE,
|
13604
|
-
|
13605
14097
|
} pm_parameters_order_t;
|
13606
14098
|
|
13607
14099
|
/**
|
@@ -13626,31 +14118,37 @@ static pm_parameters_order_t parameters_ordering[PM_TOKEN_MAXIMUM] = {
|
|
13626
14118
|
* Check if current parameter follows valid parameters ordering. If not it adds
|
13627
14119
|
* an error to the list without stopping the parsing, otherwise sets the
|
13628
14120
|
* parameters state to the one corresponding to the current parameter.
|
14121
|
+
*
|
14122
|
+
* It returns true if it was successful, and false otherwise.
|
13629
14123
|
*/
|
13630
|
-
static
|
14124
|
+
static bool
|
13631
14125
|
update_parameter_state(pm_parser_t *parser, pm_token_t *token, pm_parameters_order_t *current) {
|
13632
14126
|
pm_parameters_order_t state = parameters_ordering[token->type];
|
13633
|
-
if (state == PM_PARAMETERS_NO_CHANGE) return;
|
14127
|
+
if (state == PM_PARAMETERS_NO_CHANGE) return true;
|
13634
14128
|
|
13635
14129
|
// If we see another ordered argument after a optional argument
|
13636
14130
|
// we only continue parsing ordered arguments until we stop seeing ordered arguments.
|
13637
14131
|
if (*current == PM_PARAMETERS_ORDER_OPTIONAL && state == PM_PARAMETERS_ORDER_NAMED) {
|
13638
14132
|
*current = PM_PARAMETERS_ORDER_AFTER_OPTIONAL;
|
13639
|
-
return;
|
14133
|
+
return true;
|
13640
14134
|
} else if (*current == PM_PARAMETERS_ORDER_AFTER_OPTIONAL && state == PM_PARAMETERS_ORDER_NAMED) {
|
13641
|
-
return;
|
14135
|
+
return true;
|
13642
14136
|
}
|
13643
14137
|
|
13644
14138
|
if (token->type == PM_TOKEN_USTAR && *current == PM_PARAMETERS_ORDER_AFTER_OPTIONAL) {
|
13645
14139
|
pm_parser_err_token(parser, token, PM_ERR_PARAMETER_STAR);
|
13646
|
-
|
13647
|
-
|
13648
|
-
|
14140
|
+
return false;
|
14141
|
+
} else if (token->type == PM_TOKEN_UDOT_DOT_DOT && (*current >= PM_PARAMETERS_ORDER_KEYWORDS_REST && *current <= PM_PARAMETERS_ORDER_AFTER_OPTIONAL)) {
|
14142
|
+
pm_parser_err_token(parser, token, *current == PM_PARAMETERS_ORDER_AFTER_OPTIONAL ? PM_ERR_PARAMETER_FORWARDING_AFTER_REST : PM_ERR_PARAMETER_ORDER);
|
14143
|
+
return false;
|
14144
|
+
} else if (*current == PM_PARAMETERS_ORDER_NOTHING_AFTER || state > *current) {
|
13649
14145
|
// We know what transition we failed on, so we can provide a better error here.
|
13650
14146
|
pm_parser_err_token(parser, token, PM_ERR_PARAMETER_ORDER);
|
13651
|
-
|
13652
|
-
*current = state;
|
14147
|
+
return false;
|
13653
14148
|
}
|
14149
|
+
|
14150
|
+
if (state < *current) *current = state;
|
14151
|
+
return true;
|
13654
14152
|
}
|
13655
14153
|
|
13656
14154
|
/**
|
@@ -13719,27 +14217,22 @@ parse_parameters(
|
|
13719
14217
|
pm_parser_err_current(parser, PM_ERR_ARGUMENT_NO_FORWARDING_ELLIPSES);
|
13720
14218
|
}
|
13721
14219
|
|
13722
|
-
|
13723
|
-
|
13724
|
-
parser_lex(parser);
|
14220
|
+
bool succeeded = update_parameter_state(parser, &parser->current, &order);
|
14221
|
+
parser_lex(parser);
|
13725
14222
|
|
13726
|
-
|
14223
|
+
parser->current_scope->parameters |= PM_SCOPE_PARAMETERS_FORWARDING_ALL;
|
14224
|
+
pm_forwarding_parameter_node_t *param = pm_forwarding_parameter_node_create(parser, &parser->previous);
|
13727
14225
|
|
13728
|
-
|
13729
|
-
|
13730
|
-
|
13731
|
-
|
13732
|
-
|
13733
|
-
|
13734
|
-
|
13735
|
-
params->keyword_rest = NULL;
|
13736
|
-
}
|
13737
|
-
pm_parameters_node_keyword_rest_set(params, (pm_node_t *)param);
|
13738
|
-
} else {
|
13739
|
-
update_parameter_state(parser, &parser->current, &order);
|
13740
|
-
parser_lex(parser);
|
14226
|
+
if (params->keyword_rest != NULL) {
|
14227
|
+
// If we already have a keyword rest parameter, then we replace it with the
|
14228
|
+
// forwarding parameter and move the keyword rest parameter to the posts list.
|
14229
|
+
pm_node_t *keyword_rest = params->keyword_rest;
|
14230
|
+
pm_parameters_node_posts_append(params, keyword_rest);
|
14231
|
+
if (succeeded) pm_parser_err_previous(parser, PM_ERR_PARAMETER_UNEXPECTED_FWD);
|
14232
|
+
params->keyword_rest = NULL;
|
13741
14233
|
}
|
13742
14234
|
|
14235
|
+
pm_parameters_node_keyword_rest_set(params, (pm_node_t *) param);
|
13743
14236
|
break;
|
13744
14237
|
}
|
13745
14238
|
case PM_TOKEN_CLASS_VARIABLE:
|
@@ -13834,6 +14327,12 @@ parse_parameters(
|
|
13834
14327
|
pm_token_t local = name;
|
13835
14328
|
local.end -= 1;
|
13836
14329
|
|
14330
|
+
if (parser->encoding_changed ? parser->encoding->isupper_char(local.start, local.end - local.start) : pm_encoding_utf_8_isupper_char(local.start, local.end - local.start)) {
|
14331
|
+
pm_parser_err(parser, local.start, local.end, PM_ERR_ARGUMENT_FORMAL_CONSTANT);
|
14332
|
+
} else if (local.end[-1] == '!' || local.end[-1] == '?') {
|
14333
|
+
PM_PARSER_ERR_TOKEN_FORMAT_CONTENT(parser, local, PM_ERR_INVALID_LOCAL_VARIABLE_WRITE);
|
14334
|
+
}
|
14335
|
+
|
13837
14336
|
bool repeated = pm_parser_parameter_name_check(parser, &local);
|
13838
14337
|
pm_parser_local_add_token(parser, &local, 1);
|
13839
14338
|
|
@@ -13909,6 +14408,7 @@ parse_parameters(
|
|
13909
14408
|
pm_token_t operator = parser->previous;
|
13910
14409
|
pm_token_t name;
|
13911
14410
|
bool repeated = false;
|
14411
|
+
|
13912
14412
|
if (accept1(parser, PM_TOKEN_IDENTIFIER)) {
|
13913
14413
|
name = parser->previous;
|
13914
14414
|
repeated = pm_parser_parameter_name_check(parser, &name);
|
@@ -13922,6 +14422,7 @@ parse_parameters(
|
|
13922
14422
|
if (repeated) {
|
13923
14423
|
pm_node_flag_set_repeated_parameter(param);
|
13924
14424
|
}
|
14425
|
+
|
13925
14426
|
if (params->rest == NULL) {
|
13926
14427
|
pm_parameters_node_rest_set(params, param);
|
13927
14428
|
} else {
|
@@ -13933,6 +14434,7 @@ parse_parameters(
|
|
13933
14434
|
}
|
13934
14435
|
case PM_TOKEN_STAR_STAR:
|
13935
14436
|
case PM_TOKEN_USTAR_STAR: {
|
14437
|
+
pm_parameters_order_t previous_order = order;
|
13936
14438
|
update_parameter_state(parser, &parser->current, &order);
|
13937
14439
|
parser_lex(parser);
|
13938
14440
|
|
@@ -13940,6 +14442,10 @@ parse_parameters(
|
|
13940
14442
|
pm_node_t *param;
|
13941
14443
|
|
13942
14444
|
if (accept1(parser, PM_TOKEN_KEYWORD_NIL)) {
|
14445
|
+
if (previous_order <= PM_PARAMETERS_ORDER_KEYWORDS) {
|
14446
|
+
pm_parser_err_previous(parser, PM_ERR_PARAMETER_UNEXPECTED_NO_KW);
|
14447
|
+
}
|
14448
|
+
|
13943
14449
|
param = (pm_node_t *) pm_no_keywords_parameter_node_create(parser, &operator, &parser->previous);
|
13944
14450
|
} else {
|
13945
14451
|
pm_token_t name;
|
@@ -14037,7 +14543,7 @@ parse_rescues(pm_parser_t *parser, pm_begin_node_t *parent_node, pm_rescues_type
|
|
14037
14543
|
pm_rescue_node_operator_set(rescue, &parser->previous);
|
14038
14544
|
|
14039
14545
|
pm_node_t *reference = parse_expression(parser, PM_BINDING_POWER_INDEX, false, PM_ERR_RESCUE_VARIABLE);
|
14040
|
-
reference = parse_target(parser, reference);
|
14546
|
+
reference = parse_target(parser, reference, false);
|
14041
14547
|
|
14042
14548
|
pm_rescue_node_reference_set(rescue, reference);
|
14043
14549
|
break;
|
@@ -14067,7 +14573,7 @@ parse_rescues(pm_parser_t *parser, pm_begin_node_t *parent_node, pm_rescues_type
|
|
14067
14573
|
pm_rescue_node_operator_set(rescue, &parser->previous);
|
14068
14574
|
|
14069
14575
|
pm_node_t *reference = parse_expression(parser, PM_BINDING_POWER_INDEX, false, PM_ERR_RESCUE_VARIABLE);
|
14070
|
-
reference = parse_target(parser, reference);
|
14576
|
+
reference = parse_target(parser, reference, false);
|
14071
14577
|
|
14072
14578
|
pm_rescue_node_reference_set(rescue, reference);
|
14073
14579
|
break;
|
@@ -14391,7 +14897,7 @@ parse_arguments_list(pm_parser_t *parser, pm_arguments_t *arguments, bool accept
|
|
14391
14897
|
arguments->closing_loc = PM_LOCATION_TOKEN_VALUE(&parser->previous);
|
14392
14898
|
} else {
|
14393
14899
|
pm_accepts_block_stack_push(parser, true);
|
14394
|
-
parse_arguments(parser, arguments,
|
14900
|
+
parse_arguments(parser, arguments, accepts_block, PM_TOKEN_PARENTHESIS_RIGHT);
|
14395
14901
|
|
14396
14902
|
if (!accept1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) {
|
14397
14903
|
PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_ARGUMENT_TERM_PAREN, pm_token_type_human(parser->current.type));
|
@@ -14409,7 +14915,7 @@ parse_arguments_list(pm_parser_t *parser, pm_arguments_t *arguments, bool accept
|
|
14409
14915
|
// If we get here, then the subsequent token cannot be used as an infix
|
14410
14916
|
// operator. In this case we assume the subsequent token is part of an
|
14411
14917
|
// argument to this method call.
|
14412
|
-
parse_arguments(parser, arguments,
|
14918
|
+
parse_arguments(parser, arguments, accepts_block, PM_TOKEN_EOF);
|
14413
14919
|
|
14414
14920
|
// If we have done with the arguments and still not consumed the comma,
|
14415
14921
|
// then we have a trailing comma where we need to check whether it is
|
@@ -14440,11 +14946,8 @@ parse_arguments_list(pm_parser_t *parser, pm_arguments_t *arguments, bool accept
|
|
14440
14946
|
if (arguments->block == NULL && !arguments->has_forwarding) {
|
14441
14947
|
arguments->block = (pm_node_t *) block;
|
14442
14948
|
} else {
|
14443
|
-
|
14444
|
-
|
14445
|
-
} else {
|
14446
|
-
pm_parser_err_node(parser, (pm_node_t *) block, PM_ERR_ARGUMENT_BLOCK_MULTI);
|
14447
|
-
}
|
14949
|
+
pm_parser_err_node(parser, (pm_node_t *) block, PM_ERR_ARGUMENT_BLOCK_MULTI);
|
14950
|
+
|
14448
14951
|
if (arguments->block != NULL) {
|
14449
14952
|
if (arguments->arguments == NULL) {
|
14450
14953
|
arguments->arguments = pm_arguments_node_create(parser);
|
@@ -15036,7 +15539,7 @@ parse_symbol(pm_parser_t *parser, pm_lex_mode_t *lex_mode, pm_lex_state_t next_s
|
|
15036
15539
|
pm_symbol_node_t *symbol = pm_symbol_node_create(parser, &opening, &parser->previous, &closing);
|
15037
15540
|
|
15038
15541
|
pm_string_shared_init(&symbol->unescaped, parser->previous.start, parser->previous.end);
|
15039
|
-
pm_node_flag_set((pm_node_t *) symbol, parse_symbol_encoding(parser, &symbol->unescaped));
|
15542
|
+
pm_node_flag_set((pm_node_t *) symbol, parse_symbol_encoding(parser, &parser->previous, &symbol->unescaped, false));
|
15040
15543
|
|
15041
15544
|
return (pm_node_t *) symbol;
|
15042
15545
|
}
|
@@ -15136,7 +15639,7 @@ parse_symbol(pm_parser_t *parser, pm_lex_mode_t *lex_mode, pm_lex_state_t next_s
|
|
15136
15639
|
expect1(parser, PM_TOKEN_STRING_END, PM_ERR_SYMBOL_TERM_DYNAMIC);
|
15137
15640
|
}
|
15138
15641
|
|
15139
|
-
return (pm_node_t *) pm_symbol_node_create_unescaped(parser, &opening, &content, &parser->previous, &unescaped, parse_symbol_encoding(parser, &unescaped));
|
15642
|
+
return (pm_node_t *) pm_symbol_node_create_unescaped(parser, &opening, &content, &parser->previous, &unescaped, parse_symbol_encoding(parser, &content, &unescaped, false));
|
15140
15643
|
}
|
15141
15644
|
|
15142
15645
|
/**
|
@@ -15161,7 +15664,7 @@ parse_undef_argument(pm_parser_t *parser) {
|
|
15161
15664
|
pm_symbol_node_t *symbol = pm_symbol_node_create(parser, &opening, &parser->previous, &closing);
|
15162
15665
|
|
15163
15666
|
pm_string_shared_init(&symbol->unescaped, parser->previous.start, parser->previous.end);
|
15164
|
-
pm_node_flag_set((pm_node_t *) symbol, parse_symbol_encoding(parser, &symbol->unescaped));
|
15667
|
+
pm_node_flag_set((pm_node_t *) symbol, parse_symbol_encoding(parser, &parser->previous, &symbol->unescaped, false));
|
15165
15668
|
|
15166
15669
|
return (pm_node_t *) symbol;
|
15167
15670
|
}
|
@@ -15202,7 +15705,7 @@ parse_alias_argument(pm_parser_t *parser, bool first) {
|
|
15202
15705
|
pm_symbol_node_t *symbol = pm_symbol_node_create(parser, &opening, &parser->previous, &closing);
|
15203
15706
|
|
15204
15707
|
pm_string_shared_init(&symbol->unescaped, parser->previous.start, parser->previous.end);
|
15205
|
-
pm_node_flag_set((pm_node_t *) symbol, parse_symbol_encoding(parser, &symbol->unescaped));
|
15708
|
+
pm_node_flag_set((pm_node_t *) symbol, parse_symbol_encoding(parser, &parser->previous, &symbol->unescaped, false));
|
15206
15709
|
|
15207
15710
|
return (pm_node_t *) symbol;
|
15208
15711
|
}
|
@@ -15429,8 +15932,12 @@ parse_heredoc_dedent(pm_parser_t *parser, pm_node_list_t *nodes, size_t common_w
|
|
15429
15932
|
nodes->size = write_index;
|
15430
15933
|
}
|
15431
15934
|
|
15935
|
+
#define PM_PARSE_PATTERN_SINGLE 0
|
15936
|
+
#define PM_PARSE_PATTERN_TOP 1
|
15937
|
+
#define PM_PARSE_PATTERN_MULTI 2
|
15938
|
+
|
15432
15939
|
static pm_node_t *
|
15433
|
-
parse_pattern(pm_parser_t *parser, pm_constant_id_list_t *captures,
|
15940
|
+
parse_pattern(pm_parser_t *parser, pm_constant_id_list_t *captures, uint8_t flags, pm_diagnostic_id_t diag_id);
|
15434
15941
|
|
15435
15942
|
/**
|
15436
15943
|
* Add the newly created local to the list of captures for this pattern matching
|
@@ -15459,9 +15966,7 @@ parse_pattern_constant_path(pm_parser_t *parser, pm_constant_id_list_t *captures
|
|
15459
15966
|
while (accept1(parser, PM_TOKEN_COLON_COLON)) {
|
15460
15967
|
pm_token_t delimiter = parser->previous;
|
15461
15968
|
expect1(parser, PM_TOKEN_CONSTANT, PM_ERR_CONSTANT_PATH_COLON_COLON_CONSTANT);
|
15462
|
-
|
15463
|
-
pm_node_t *child = (pm_node_t *) pm_constant_read_node_create(parser, &parser->previous);
|
15464
|
-
node = (pm_node_t *) pm_constant_path_node_create(parser, node, &delimiter, child);
|
15969
|
+
node = (pm_node_t *) pm_constant_path_node_create(parser, node, &delimiter, &parser->previous);
|
15465
15970
|
}
|
15466
15971
|
|
15467
15972
|
// If there is a [ or ( that follows, then this is part of a larger pattern
|
@@ -15480,7 +15985,7 @@ parse_pattern_constant_path(pm_parser_t *parser, pm_constant_id_list_t *captures
|
|
15480
15985
|
accept1(parser, PM_TOKEN_NEWLINE);
|
15481
15986
|
|
15482
15987
|
if (!accept1(parser, PM_TOKEN_BRACKET_RIGHT)) {
|
15483
|
-
inner = parse_pattern(parser, captures,
|
15988
|
+
inner = parse_pattern(parser, captures, PM_PARSE_PATTERN_TOP | PM_PARSE_PATTERN_MULTI, PM_ERR_PATTERN_EXPRESSION_AFTER_BRACKET);
|
15484
15989
|
accept1(parser, PM_TOKEN_NEWLINE);
|
15485
15990
|
expect1(parser, PM_TOKEN_BRACKET_RIGHT, PM_ERR_PATTERN_TERM_BRACKET);
|
15486
15991
|
}
|
@@ -15492,7 +15997,7 @@ parse_pattern_constant_path(pm_parser_t *parser, pm_constant_id_list_t *captures
|
|
15492
15997
|
accept1(parser, PM_TOKEN_NEWLINE);
|
15493
15998
|
|
15494
15999
|
if (!accept1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) {
|
15495
|
-
inner = parse_pattern(parser, captures,
|
16000
|
+
inner = parse_pattern(parser, captures, PM_PARSE_PATTERN_TOP | PM_PARSE_PATTERN_MULTI, PM_ERR_PATTERN_EXPRESSION_AFTER_PAREN);
|
15496
16001
|
accept1(parser, PM_TOKEN_NEWLINE);
|
15497
16002
|
expect1(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_ERR_PATTERN_TERM_PAREN);
|
15498
16003
|
}
|
@@ -15640,6 +16145,33 @@ parse_pattern_keyword_rest(pm_parser_t *parser, pm_constant_id_list_t *captures)
|
|
15640
16145
|
return (pm_node_t *) pm_assoc_splat_node_create(parser, value, &operator);
|
15641
16146
|
}
|
15642
16147
|
|
16148
|
+
/**
|
16149
|
+
* Check that the slice of the source given by the bounds parameters constitutes
|
16150
|
+
* a valid local variable name.
|
16151
|
+
*/
|
16152
|
+
static bool
|
16153
|
+
pm_slice_is_valid_local(const pm_parser_t *parser, const uint8_t *start, const uint8_t *end) {
|
16154
|
+
ptrdiff_t length = end - start;
|
16155
|
+
if (length == 0) return false;
|
16156
|
+
|
16157
|
+
// First ensure that it starts with a valid identifier starting character.
|
16158
|
+
size_t width = char_is_identifier_start(parser, start);
|
16159
|
+
if (width == 0) return false;
|
16160
|
+
|
16161
|
+
// Next, ensure that it's not an uppercase character.
|
16162
|
+
if (parser->encoding_changed) {
|
16163
|
+
if (parser->encoding->isupper_char(start, length)) return false;
|
16164
|
+
} else {
|
16165
|
+
if (pm_encoding_utf_8_isupper_char(start, length)) return false;
|
16166
|
+
}
|
16167
|
+
|
16168
|
+
// Next, iterate through all of the bytes of the string to ensure that they
|
16169
|
+
// are all valid identifier characters.
|
16170
|
+
const uint8_t *cursor = start + width;
|
16171
|
+
while ((cursor < end) && (width = char_is_identifier(parser, cursor))) cursor += width;
|
16172
|
+
return cursor == end;
|
16173
|
+
}
|
16174
|
+
|
15643
16175
|
/**
|
15644
16176
|
* Create an implicit node for the value of a hash pattern that has omitted the
|
15645
16177
|
* value. This will use an implicit local variable target.
|
@@ -15647,14 +16179,18 @@ parse_pattern_keyword_rest(pm_parser_t *parser, pm_constant_id_list_t *captures)
|
|
15647
16179
|
static pm_node_t *
|
15648
16180
|
parse_pattern_hash_implicit_value(pm_parser_t *parser, pm_constant_id_list_t *captures, pm_symbol_node_t *key) {
|
15649
16181
|
const pm_location_t *value_loc = &((pm_symbol_node_t *) key)->value_loc;
|
15650
|
-
pm_constant_id_t constant_id = pm_parser_constant_id_location(parser, value_loc->start, value_loc->end);
|
15651
16182
|
|
16183
|
+
pm_constant_id_t constant_id = pm_parser_constant_id_location(parser, value_loc->start, value_loc->end);
|
15652
16184
|
int depth = -1;
|
15653
|
-
|
15654
|
-
|
15655
|
-
PM_PARSER_ERR_LOCATION_FORMAT(parser, value_loc, PM_ERR_INVALID_LOCAL_VARIABLE_WRITE, (int) (value_loc->end - value_loc->start), (const char *) value_loc->start);
|
15656
|
-
} else {
|
16185
|
+
|
16186
|
+
if (pm_slice_is_valid_local(parser, value_loc->start, value_loc->end)) {
|
15657
16187
|
depth = pm_parser_local_depth_constant_id(parser, constant_id);
|
16188
|
+
} else {
|
16189
|
+
pm_parser_err(parser, key->base.location.start, key->base.location.end, PM_ERR_PATTERN_HASH_KEY_LOCALS);
|
16190
|
+
|
16191
|
+
if ((value_loc->end > value_loc->start) && ((value_loc->end[-1] == '!') || (value_loc->end[-1] == '?'))) {
|
16192
|
+
PM_PARSER_ERR_LOCATION_FORMAT(parser, value_loc, PM_ERR_INVALID_LOCAL_VARIABLE_WRITE, (int) (value_loc->end - value_loc->start), (const char *) value_loc->start);
|
16193
|
+
}
|
15658
16194
|
}
|
15659
16195
|
|
15660
16196
|
if (depth == -1) {
|
@@ -15678,7 +16214,7 @@ parse_pattern_hash_implicit_value(pm_parser_t *parser, pm_constant_id_list_t *ca
|
|
15678
16214
|
*/
|
15679
16215
|
static void
|
15680
16216
|
parse_pattern_hash_key(pm_parser_t *parser, pm_static_literals_t *keys, pm_node_t *node) {
|
15681
|
-
if (pm_static_literals_add(parser, keys, node) != NULL) {
|
16217
|
+
if (pm_static_literals_add(&parser->newline_list, parser->start_line, keys, node) != NULL) {
|
15682
16218
|
pm_parser_err_node(parser, node, PM_ERR_PATTERN_HASH_KEY_DUPLICATE);
|
15683
16219
|
}
|
15684
16220
|
}
|
@@ -15709,7 +16245,7 @@ parse_pattern_hash(pm_parser_t *parser, pm_constant_id_list_t *captures, pm_node
|
|
15709
16245
|
} else {
|
15710
16246
|
// Here we have a value for the first assoc in the list, so
|
15711
16247
|
// we will parse it now.
|
15712
|
-
value = parse_pattern(parser, captures,
|
16248
|
+
value = parse_pattern(parser, captures, PM_PARSE_PATTERN_SINGLE, PM_ERR_PATTERN_EXPRESSION_AFTER_KEY);
|
15713
16249
|
}
|
15714
16250
|
|
15715
16251
|
pm_token_t operator = not_provided(parser);
|
@@ -15724,7 +16260,8 @@ parse_pattern_hash(pm_parser_t *parser, pm_constant_id_list_t *captures, pm_node
|
|
15724
16260
|
// If we get anything else, then this is an error. For this we'll
|
15725
16261
|
// create a missing node for the value and create an assoc node for
|
15726
16262
|
// the first node in the list.
|
15727
|
-
|
16263
|
+
pm_diagnostic_id_t diag_id = PM_NODE_TYPE_P(first_node, PM_INTERPOLATED_SYMBOL_NODE) ? PM_ERR_PATTERN_HASH_KEY_INTERPOLATED : PM_ERR_PATTERN_HASH_KEY_LABEL;
|
16264
|
+
pm_parser_err_node(parser, first_node, diag_id);
|
15728
16265
|
|
15729
16266
|
pm_token_t operator = not_provided(parser);
|
15730
16267
|
pm_node_t *value = (pm_node_t *) pm_missing_node_create(parser, first_node->location.start, first_node->location.end);
|
@@ -15761,7 +16298,7 @@ parse_pattern_hash(pm_parser_t *parser, pm_constant_id_list_t *captures, pm_node
|
|
15761
16298
|
if (match7(parser, PM_TOKEN_COMMA, PM_TOKEN_KEYWORD_THEN, PM_TOKEN_BRACE_RIGHT, PM_TOKEN_BRACKET_RIGHT, PM_TOKEN_PARENTHESIS_RIGHT, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON)) {
|
15762
16299
|
value = parse_pattern_hash_implicit_value(parser, captures, (pm_symbol_node_t *) key);
|
15763
16300
|
} else {
|
15764
|
-
value = parse_pattern(parser, captures,
|
16301
|
+
value = parse_pattern(parser, captures, PM_PARSE_PATTERN_SINGLE, PM_ERR_PATTERN_EXPRESSION_AFTER_KEY);
|
15765
16302
|
}
|
15766
16303
|
|
15767
16304
|
pm_token_t operator = not_provided(parser);
|
@@ -15818,7 +16355,7 @@ parse_pattern_primitive(pm_parser_t *parser, pm_constant_id_list_t *captures, pm
|
|
15818
16355
|
|
15819
16356
|
// Otherwise, we'll parse the inner pattern, then deal with it depending
|
15820
16357
|
// on the type it returns.
|
15821
|
-
pm_node_t *inner = parse_pattern(parser, captures,
|
16358
|
+
pm_node_t *inner = parse_pattern(parser, captures, PM_PARSE_PATTERN_MULTI, PM_ERR_PATTERN_EXPRESSION_AFTER_BRACKET);
|
15822
16359
|
|
15823
16360
|
accept1(parser, PM_TOKEN_NEWLINE);
|
15824
16361
|
expect1(parser, PM_TOKEN_BRACKET_RIGHT, PM_ERR_PATTERN_TERM_BRACKET);
|
@@ -15885,11 +16422,11 @@ parse_pattern_primitive(pm_parser_t *parser, pm_constant_id_list_t *captures, pm
|
|
15885
16422
|
first_node = parse_pattern_keyword_rest(parser, captures);
|
15886
16423
|
break;
|
15887
16424
|
case PM_TOKEN_STRING_BEGIN:
|
15888
|
-
first_node = parse_expression(parser, PM_BINDING_POWER_MAX, false,
|
16425
|
+
first_node = parse_expression(parser, PM_BINDING_POWER_MAX, false, PM_ERR_PATTERN_HASH_KEY_LABEL);
|
15889
16426
|
break;
|
15890
16427
|
default: {
|
16428
|
+
PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_PATTERN_HASH_KEY, pm_token_type_human(parser->current.type));
|
15891
16429
|
parser_lex(parser);
|
15892
|
-
pm_parser_err_previous(parser, PM_ERR_PATTERN_HASH_KEY);
|
15893
16430
|
|
15894
16431
|
first_node = (pm_node_t *) pm_missing_node_create(parser, parser->previous.start, parser->previous.end);
|
15895
16432
|
break;
|
@@ -15966,7 +16503,7 @@ parse_pattern_primitive(pm_parser_t *parser, pm_constant_id_list_t *captures, pm
|
|
15966
16503
|
|
15967
16504
|
if (variable == NULL) {
|
15968
16505
|
if (
|
15969
|
-
(parser->version !=
|
16506
|
+
(parser->version != PM_OPTIONS_VERSION_CRUBY_3_3) &&
|
15970
16507
|
!parser->current_scope->closed &&
|
15971
16508
|
(parser->current_scope->numbered_parameters != PM_SCOPE_NUMBERED_PARAMETERS_DISALLOWED) &&
|
15972
16509
|
pm_token_is_it(parser->previous.start, parser->previous.end)
|
@@ -16040,8 +16577,7 @@ parse_pattern_primitive(pm_parser_t *parser, pm_constant_id_list_t *captures, pm
|
|
16040
16577
|
parser_lex(parser);
|
16041
16578
|
|
16042
16579
|
expect1(parser, PM_TOKEN_CONSTANT, PM_ERR_CONSTANT_PATH_COLON_COLON_CONSTANT);
|
16043
|
-
|
16044
|
-
pm_constant_path_node_t *node = pm_constant_path_node_create(parser, NULL, &delimiter, child);
|
16580
|
+
pm_constant_path_node_t *node = pm_constant_path_node_create(parser, NULL, &delimiter, &parser->previous);
|
16045
16581
|
|
16046
16582
|
return parse_pattern_constant_path(parser, captures, (pm_node_t *) node);
|
16047
16583
|
}
|
@@ -16092,7 +16628,7 @@ parse_pattern_primitives(pm_parser_t *parser, pm_constant_id_list_t *captures, p
|
|
16092
16628
|
pm_token_t opening = parser->current;
|
16093
16629
|
parser_lex(parser);
|
16094
16630
|
|
16095
|
-
pm_node_t *body = parse_pattern(parser, captures,
|
16631
|
+
pm_node_t *body = parse_pattern(parser, captures, PM_PARSE_PATTERN_SINGLE, PM_ERR_PATTERN_EXPRESSION_AFTER_PAREN);
|
16096
16632
|
accept1(parser, PM_TOKEN_NEWLINE);
|
16097
16633
|
expect1(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_ERR_PATTERN_TERM_PAREN);
|
16098
16634
|
pm_node_t *right = (pm_node_t *) pm_parentheses_node_create(parser, &opening, body, &parser->previous);
|
@@ -16151,7 +16687,7 @@ parse_pattern_primitives(pm_parser_t *parser, pm_constant_id_list_t *captures, p
|
|
16151
16687
|
* Parse a pattern matching expression.
|
16152
16688
|
*/
|
16153
16689
|
static pm_node_t *
|
16154
|
-
parse_pattern(pm_parser_t *parser, pm_constant_id_list_t *captures,
|
16690
|
+
parse_pattern(pm_parser_t *parser, pm_constant_id_list_t *captures, uint8_t flags, pm_diagnostic_id_t diag_id) {
|
16155
16691
|
pm_node_t *node = NULL;
|
16156
16692
|
|
16157
16693
|
bool leading_rest = false;
|
@@ -16161,14 +16697,26 @@ parse_pattern(pm_parser_t *parser, pm_constant_id_list_t *captures, bool top_pat
|
|
16161
16697
|
case PM_TOKEN_LABEL: {
|
16162
16698
|
parser_lex(parser);
|
16163
16699
|
pm_node_t *key = (pm_node_t *) pm_symbol_node_label_create(parser, &parser->previous);
|
16164
|
-
|
16700
|
+
node = (pm_node_t *) parse_pattern_hash(parser, captures, key);
|
16701
|
+
|
16702
|
+
if (!(flags & PM_PARSE_PATTERN_TOP)) {
|
16703
|
+
pm_parser_err_node(parser, node, PM_ERR_PATTERN_HASH_IMPLICIT);
|
16704
|
+
}
|
16705
|
+
|
16706
|
+
return node;
|
16165
16707
|
}
|
16166
16708
|
case PM_TOKEN_USTAR_STAR: {
|
16167
16709
|
node = parse_pattern_keyword_rest(parser, captures);
|
16168
|
-
|
16710
|
+
node = (pm_node_t *) parse_pattern_hash(parser, captures, node);
|
16711
|
+
|
16712
|
+
if (!(flags & PM_PARSE_PATTERN_TOP)) {
|
16713
|
+
pm_parser_err_node(parser, node, PM_ERR_PATTERN_HASH_IMPLICIT);
|
16714
|
+
}
|
16715
|
+
|
16716
|
+
return node;
|
16169
16717
|
}
|
16170
16718
|
case PM_TOKEN_USTAR: {
|
16171
|
-
if (
|
16719
|
+
if (flags & (PM_PARSE_PATTERN_TOP | PM_PARSE_PATTERN_MULTI)) {
|
16172
16720
|
parser_lex(parser);
|
16173
16721
|
node = (pm_node_t *) parse_pattern_rest(parser, captures);
|
16174
16722
|
leading_rest = true;
|
@@ -16187,7 +16735,7 @@ parse_pattern(pm_parser_t *parser, pm_constant_id_list_t *captures, bool top_pat
|
|
16187
16735
|
return (pm_node_t *) parse_pattern_hash(parser, captures, node);
|
16188
16736
|
}
|
16189
16737
|
|
16190
|
-
if (
|
16738
|
+
if ((flags & PM_PARSE_PATTERN_MULTI) && match1(parser, PM_TOKEN_COMMA)) {
|
16191
16739
|
// If we have a comma, then we are now parsing either an array pattern or a
|
16192
16740
|
// find pattern. We need to parse all of the patterns, put them into a big
|
16193
16741
|
// list, and then determine which type of node we have.
|
@@ -16367,7 +16915,7 @@ parse_strings(pm_parser_t *parser, pm_node_t *current) {
|
|
16367
16915
|
|
16368
16916
|
pm_node_list_free(&parts);
|
16369
16917
|
} else if (accept1(parser, PM_TOKEN_LABEL_END) && !state_is_arg_labeled) {
|
16370
|
-
node = (pm_node_t *) pm_symbol_node_create_unescaped(parser, &opening, &content, &parser->previous, &unescaped, parse_symbol_encoding(parser, &unescaped));
|
16918
|
+
node = (pm_node_t *) pm_symbol_node_create_unescaped(parser, &opening, &content, &parser->previous, &unescaped, parse_symbol_encoding(parser, &content, &unescaped, true));
|
16371
16919
|
} else if (match1(parser, PM_TOKEN_EOF)) {
|
16372
16920
|
pm_parser_err_token(parser, &opening, PM_ERR_STRING_LITERAL_EOF);
|
16373
16921
|
node = (pm_node_t *) pm_string_node_create_unescaped(parser, &opening, &content, &parser->current, &unescaped);
|
@@ -16393,7 +16941,7 @@ parse_strings(pm_parser_t *parser, pm_node_t *current) {
|
|
16393
16941
|
pm_node_flag_set(node, parse_unescaped_encoding(parser));
|
16394
16942
|
expect1(parser, PM_TOKEN_STRING_END, PM_ERR_STRING_LITERAL_EOF);
|
16395
16943
|
} else if (accept1(parser, PM_TOKEN_LABEL_END)) {
|
16396
|
-
node = (pm_node_t *) pm_symbol_node_create_unescaped(parser, &opening, &content, &parser->previous, &unescaped, parse_symbol_encoding(parser, &unescaped));
|
16944
|
+
node = (pm_node_t *) pm_symbol_node_create_unescaped(parser, &opening, &content, &parser->previous, &unescaped, parse_symbol_encoding(parser, &content, &unescaped, true));
|
16397
16945
|
} else {
|
16398
16946
|
// If we get here, then we have interpolation so we'll need
|
16399
16947
|
// to create a string or symbol node with interpolation.
|
@@ -16475,11 +17023,11 @@ parse_strings(pm_parser_t *parser, pm_node_t *current) {
|
|
16475
17023
|
pm_token_t bounds = not_provided(parser);
|
16476
17024
|
|
16477
17025
|
pm_interpolated_string_node_t *container = pm_interpolated_string_node_create(parser, &bounds, NULL, &bounds);
|
16478
|
-
pm_interpolated_string_node_append(
|
17026
|
+
pm_interpolated_string_node_append(container, current);
|
16479
17027
|
current = (pm_node_t *) container;
|
16480
17028
|
}
|
16481
17029
|
|
16482
|
-
pm_interpolated_string_node_append(
|
17030
|
+
pm_interpolated_string_node_append((pm_interpolated_string_node_t *) current, node);
|
16483
17031
|
}
|
16484
17032
|
}
|
16485
17033
|
|
@@ -16498,6 +17046,11 @@ pm_parser_err_prefix(pm_parser_t *parser, pm_diagnostic_id_t diag_id) {
|
|
16498
17046
|
PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->previous, diag_id, pm_token_type_human(parser->previous.type));
|
16499
17047
|
break;
|
16500
17048
|
}
|
17049
|
+
case PM_ERR_HASH_VALUE:
|
17050
|
+
case PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR: {
|
17051
|
+
PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, diag_id, pm_token_type_human(parser->current.type));
|
17052
|
+
break;
|
17053
|
+
}
|
16501
17054
|
case PM_ERR_UNARY_RECEIVER: {
|
16502
17055
|
const char *human = (parser->current.type == PM_TOKEN_EOF ? "end-of-input" : pm_token_type_human(parser->current.type));
|
16503
17056
|
PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->previous, diag_id, human, parser->previous.start[0]);
|
@@ -16724,13 +17277,13 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
|
|
16724
17277
|
}
|
16725
17278
|
|
16726
17279
|
element = (pm_node_t *) pm_keyword_hash_node_create(parser);
|
16727
|
-
pm_static_literals_t
|
17280
|
+
pm_static_literals_t hash_keys = { 0 };
|
16728
17281
|
|
16729
17282
|
if (!match8(parser, PM_TOKEN_EOF, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON, PM_TOKEN_EOF, PM_TOKEN_BRACE_RIGHT, PM_TOKEN_BRACKET_RIGHT, PM_TOKEN_KEYWORD_DO, PM_TOKEN_PARENTHESIS_RIGHT)) {
|
16730
|
-
parse_assocs(parser, &
|
17283
|
+
parse_assocs(parser, &hash_keys, element);
|
16731
17284
|
}
|
16732
17285
|
|
16733
|
-
pm_static_literals_free(&
|
17286
|
+
pm_static_literals_free(&hash_keys);
|
16734
17287
|
parsed_bare_hash = true;
|
16735
17288
|
} else {
|
16736
17289
|
element = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, false, PM_ERR_ARRAY_EXPRESSION);
|
@@ -16741,8 +17294,8 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
|
|
16741
17294
|
}
|
16742
17295
|
|
16743
17296
|
pm_keyword_hash_node_t *hash = pm_keyword_hash_node_create(parser);
|
16744
|
-
pm_static_literals_t
|
16745
|
-
pm_hash_key_static_literals_add(parser, &
|
17297
|
+
pm_static_literals_t hash_keys = { 0 };
|
17298
|
+
pm_hash_key_static_literals_add(parser, &hash_keys, element);
|
16746
17299
|
|
16747
17300
|
pm_token_t operator;
|
16748
17301
|
if (parser->previous.type == PM_TOKEN_EQUAL_GREATER) {
|
@@ -16757,10 +17310,10 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
|
|
16757
17310
|
|
16758
17311
|
element = (pm_node_t *) hash;
|
16759
17312
|
if (accept1(parser, PM_TOKEN_COMMA) && !match1(parser, PM_TOKEN_BRACKET_RIGHT)) {
|
16760
|
-
parse_assocs(parser, &
|
17313
|
+
parse_assocs(parser, &hash_keys, element);
|
16761
17314
|
}
|
16762
17315
|
|
16763
|
-
pm_static_literals_free(&
|
17316
|
+
pm_static_literals_free(&hash_keys);
|
16764
17317
|
parsed_bare_hash = true;
|
16765
17318
|
}
|
16766
17319
|
}
|
@@ -16854,7 +17407,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
|
|
16854
17407
|
return (pm_node_t *) multi_target;
|
16855
17408
|
}
|
16856
17409
|
|
16857
|
-
return parse_target_validate(parser, (pm_node_t *) multi_target);
|
17410
|
+
return parse_target_validate(parser, (pm_node_t *) multi_target, false);
|
16858
17411
|
}
|
16859
17412
|
|
16860
17413
|
// If we have a single statement and are ending on a right parenthesis
|
@@ -16920,14 +17473,30 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
|
|
16920
17473
|
return (pm_node_t *) pm_parentheses_node_create(parser, &opening, (pm_node_t *) statements, &parser->previous);
|
16921
17474
|
}
|
16922
17475
|
case PM_TOKEN_BRACE_LEFT: {
|
17476
|
+
// If we were passed a current_hash_keys via the parser, then that
|
17477
|
+
// means we're already parsing a hash and we want to share the set
|
17478
|
+
// of hash keys with this inner hash we're about to parse for the
|
17479
|
+
// sake of warnings. We'll set it to NULL after we grab it to make
|
17480
|
+
// sure subsequent expressions don't use it. Effectively this is a
|
17481
|
+
// way of getting around passing it to every call to
|
17482
|
+
// parse_expression.
|
17483
|
+
pm_static_literals_t *current_hash_keys = parser->current_hash_keys;
|
17484
|
+
parser->current_hash_keys = NULL;
|
17485
|
+
|
16923
17486
|
pm_accepts_block_stack_push(parser, true);
|
16924
17487
|
parser_lex(parser);
|
16925
17488
|
|
16926
17489
|
pm_hash_node_t *node = pm_hash_node_create(parser, &parser->previous);
|
16927
|
-
pm_static_literals_t literals = { 0 };
|
16928
17490
|
|
16929
17491
|
if (!match2(parser, PM_TOKEN_BRACE_RIGHT, PM_TOKEN_EOF)) {
|
16930
|
-
|
17492
|
+
if (current_hash_keys != NULL) {
|
17493
|
+
parse_assocs(parser, current_hash_keys, (pm_node_t *) node);
|
17494
|
+
} else {
|
17495
|
+
pm_static_literals_t hash_keys = { 0 };
|
17496
|
+
parse_assocs(parser, &hash_keys, (pm_node_t *) node);
|
17497
|
+
pm_static_literals_free(&hash_keys);
|
17498
|
+
}
|
17499
|
+
|
16931
17500
|
accept1(parser, PM_TOKEN_NEWLINE);
|
16932
17501
|
}
|
16933
17502
|
|
@@ -16935,7 +17504,6 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
|
|
16935
17504
|
expect1(parser, PM_TOKEN_BRACE_RIGHT, PM_ERR_HASH_TERM);
|
16936
17505
|
pm_hash_node_closing_loc_set(node, &parser->previous);
|
16937
17506
|
|
16938
|
-
pm_static_literals_free(&literals);
|
16939
17507
|
return (pm_node_t *) node;
|
16940
17508
|
}
|
16941
17509
|
case PM_TOKEN_CHARACTER_LITERAL: {
|
@@ -17000,12 +17568,10 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
|
|
17000
17568
|
}
|
17001
17569
|
case PM_TOKEN_UCOLON_COLON: {
|
17002
17570
|
parser_lex(parser);
|
17003
|
-
|
17004
17571
|
pm_token_t delimiter = parser->previous;
|
17005
|
-
expect1(parser, PM_TOKEN_CONSTANT, PM_ERR_CONSTANT_PATH_COLON_COLON_CONSTANT);
|
17006
17572
|
|
17007
|
-
|
17008
|
-
pm_node_t *node = (pm_node_t *)pm_constant_path_node_create(parser, NULL, &delimiter,
|
17573
|
+
expect1(parser, PM_TOKEN_CONSTANT, PM_ERR_CONSTANT_PATH_COLON_COLON_CONSTANT);
|
17574
|
+
pm_node_t *node = (pm_node_t *) pm_constant_path_node_create(parser, NULL, &delimiter, &parser->previous);
|
17009
17575
|
|
17010
17576
|
if ((binding_power == PM_BINDING_POWER_STATEMENT) && match1(parser, PM_TOKEN_COMMA)) {
|
17011
17577
|
node = parse_targets_validate(parser, node, PM_BINDING_POWER_INDEX);
|
@@ -17165,8 +17731,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
|
|
17165
17731
|
if (match2(parser, PM_TOKEN_HEREDOC_END, PM_TOKEN_EOF)) {
|
17166
17732
|
// If we get here, then we have an empty heredoc. We'll create
|
17167
17733
|
// an empty content token and return an empty string node.
|
17168
|
-
|
17169
|
-
expect1(parser, PM_TOKEN_HEREDOC_END, PM_ERR_HEREDOC_TERM);
|
17734
|
+
expect1_heredoc_term(parser, lex_mode);
|
17170
17735
|
pm_token_t content = parse_strings_empty_content(parser->previous.start);
|
17171
17736
|
|
17172
17737
|
if (quote == PM_HEREDOC_QUOTE_BACKTICK) {
|
@@ -17207,8 +17772,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
|
|
17207
17772
|
}
|
17208
17773
|
|
17209
17774
|
node = (pm_node_t *) cast;
|
17210
|
-
|
17211
|
-
expect1(parser, PM_TOKEN_HEREDOC_END, PM_ERR_HEREDOC_TERM);
|
17775
|
+
expect1_heredoc_term(parser, lex_mode);
|
17212
17776
|
} else {
|
17213
17777
|
// If we get here, then we have multiple parts in the heredoc,
|
17214
17778
|
// so we'll need to create an interpolated string node to hold
|
@@ -17230,20 +17794,18 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
|
|
17230
17794
|
pm_interpolated_x_string_node_t *cast = pm_interpolated_xstring_node_create(parser, &opening, &opening);
|
17231
17795
|
cast->parts = parts;
|
17232
17796
|
|
17233
|
-
|
17234
|
-
expect1(parser, PM_TOKEN_HEREDOC_END, PM_ERR_HEREDOC_TERM);
|
17235
|
-
|
17797
|
+
expect1_heredoc_term(parser, lex_mode);
|
17236
17798
|
pm_interpolated_xstring_node_closing_set(cast, &parser->previous);
|
17799
|
+
|
17237
17800
|
cast->base.location = cast->opening_loc;
|
17238
17801
|
node = (pm_node_t *) cast;
|
17239
17802
|
} else {
|
17240
17803
|
pm_interpolated_string_node_t *cast = pm_interpolated_string_node_create(parser, &opening, &parts, &opening);
|
17241
17804
|
pm_node_list_free(&parts);
|
17242
17805
|
|
17243
|
-
|
17244
|
-
expect1(parser, PM_TOKEN_HEREDOC_END, PM_ERR_HEREDOC_TERM);
|
17245
|
-
|
17806
|
+
expect1_heredoc_term(parser, lex_mode);
|
17246
17807
|
pm_interpolated_string_node_closing_set(cast, &parser->previous);
|
17808
|
+
|
17247
17809
|
cast->base.location = cast->opening_loc;
|
17248
17810
|
node = (pm_node_t *) cast;
|
17249
17811
|
}
|
@@ -17464,7 +18026,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
|
|
17464
18026
|
pm_token_t in_keyword = parser->previous;
|
17465
18027
|
|
17466
18028
|
pm_constant_id_list_t captures = { 0 };
|
17467
|
-
pm_node_t *pattern = parse_pattern(parser, &captures,
|
18029
|
+
pm_node_t *pattern = parse_pattern(parser, &captures, PM_PARSE_PATTERN_TOP | PM_PARSE_PATTERN_MULTI, PM_ERR_PATTERN_EXPRESSION_AFTER_IN);
|
17468
18030
|
|
17469
18031
|
parser->pattern_matching_newlines = previous_pattern_matching_newlines;
|
17470
18032
|
pm_constant_id_list_free(&captures);
|
@@ -17493,7 +18055,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
|
|
17493
18055
|
then_keyword = not_provided(parser);
|
17494
18056
|
}
|
17495
18057
|
} else {
|
17496
|
-
expect1(parser, PM_TOKEN_KEYWORD_THEN,
|
18058
|
+
expect1(parser, PM_TOKEN_KEYWORD_THEN, PM_ERR_EXPECT_IN_DELIMITER);
|
17497
18059
|
then_keyword = parser->previous;
|
17498
18060
|
}
|
17499
18061
|
|
@@ -17947,7 +18509,12 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
|
|
17947
18509
|
lex_state_set(parser, PM_LEX_STATE_BEG);
|
17948
18510
|
parser->command_start = true;
|
17949
18511
|
|
17950
|
-
|
18512
|
+
if (!accept1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) {
|
18513
|
+
PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_DEF_PARAMS_TERM_PAREN, pm_token_type_human(parser->current.type));
|
18514
|
+
parser->previous.start = parser->previous.end;
|
18515
|
+
parser->previous.type = PM_TOKEN_MISSING;
|
18516
|
+
}
|
18517
|
+
|
17951
18518
|
rparen = parser->previous;
|
17952
18519
|
break;
|
17953
18520
|
}
|
@@ -18145,7 +18712,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
|
|
18145
18712
|
if (match1(parser, PM_TOKEN_COMMA)) {
|
18146
18713
|
index = parse_targets(parser, index, PM_BINDING_POWER_INDEX);
|
18147
18714
|
} else {
|
18148
|
-
index = parse_target(parser, index);
|
18715
|
+
index = parse_target(parser, index, false);
|
18149
18716
|
}
|
18150
18717
|
|
18151
18718
|
context_pop(parser);
|
@@ -18267,9 +18834,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
|
|
18267
18834
|
pm_token_t double_colon = parser->previous;
|
18268
18835
|
|
18269
18836
|
expect1(parser, PM_TOKEN_CONSTANT, PM_ERR_CONSTANT_PATH_COLON_COLON_CONSTANT);
|
18270
|
-
|
18271
|
-
|
18272
|
-
constant_path = (pm_node_t *) pm_constant_path_node_create(parser, constant_path, &double_colon, constant);
|
18837
|
+
constant_path = (pm_node_t *) pm_constant_path_node_create(parser, constant_path, &double_colon, &parser->previous);
|
18273
18838
|
}
|
18274
18839
|
|
18275
18840
|
// Here we retrieve the name of the module. If it wasn't a constant,
|
@@ -18649,15 +19214,15 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
|
|
18649
19214
|
// If we hit string content and the current node is
|
18650
19215
|
// an interpolated string, then we need to append
|
18651
19216
|
// the string content to the list of child nodes.
|
18652
|
-
pm_interpolated_string_node_append(
|
19217
|
+
pm_interpolated_string_node_append((pm_interpolated_string_node_t *) current, string);
|
18653
19218
|
} else if (PM_NODE_TYPE_P(current, PM_STRING_NODE)) {
|
18654
19219
|
// If we hit string content and the current node is
|
18655
19220
|
// a string node, then we need to convert the
|
18656
19221
|
// current node into an interpolated string and add
|
18657
19222
|
// the string content to the list of child nodes.
|
18658
19223
|
pm_interpolated_string_node_t *interpolated = pm_interpolated_string_node_create(parser, &opening, NULL, &closing);
|
18659
|
-
pm_interpolated_string_node_append(
|
18660
|
-
pm_interpolated_string_node_append(
|
19224
|
+
pm_interpolated_string_node_append(interpolated, current);
|
19225
|
+
pm_interpolated_string_node_append(interpolated, string);
|
18661
19226
|
current = (pm_node_t *) interpolated;
|
18662
19227
|
} else {
|
18663
19228
|
assert(false && "unreachable");
|
@@ -18682,7 +19247,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
|
|
18682
19247
|
pm_token_t opening = not_provided(parser);
|
18683
19248
|
pm_token_t closing = not_provided(parser);
|
18684
19249
|
pm_interpolated_string_node_t *interpolated = pm_interpolated_string_node_create(parser, &opening, NULL, &closing);
|
18685
|
-
pm_interpolated_string_node_append(
|
19250
|
+
pm_interpolated_string_node_append(interpolated, current);
|
18686
19251
|
current = (pm_node_t *) interpolated;
|
18687
19252
|
} else {
|
18688
19253
|
// If we hit an embedded variable and the current
|
@@ -18691,7 +19256,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
|
|
18691
19256
|
}
|
18692
19257
|
|
18693
19258
|
pm_node_t *part = parse_string_part(parser);
|
18694
|
-
pm_interpolated_string_node_append(
|
19259
|
+
pm_interpolated_string_node_append((pm_interpolated_string_node_t *) current, part);
|
18695
19260
|
break;
|
18696
19261
|
}
|
18697
19262
|
case PM_TOKEN_EMBEXPR_BEGIN: {
|
@@ -18711,7 +19276,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
|
|
18711
19276
|
pm_token_t opening = not_provided(parser);
|
18712
19277
|
pm_token_t closing = not_provided(parser);
|
18713
19278
|
pm_interpolated_string_node_t *interpolated = pm_interpolated_string_node_create(parser, &opening, NULL, &closing);
|
18714
|
-
pm_interpolated_string_node_append(
|
19279
|
+
pm_interpolated_string_node_append(interpolated, current);
|
18715
19280
|
current = (pm_node_t *) interpolated;
|
18716
19281
|
} else if (PM_NODE_TYPE_P(current, PM_INTERPOLATED_STRING_NODE)) {
|
18717
19282
|
// If we hit an embedded expression and the current
|
@@ -18722,7 +19287,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
|
|
18722
19287
|
}
|
18723
19288
|
|
18724
19289
|
pm_node_t *part = parse_string_part(parser);
|
18725
|
-
pm_interpolated_string_node_append(
|
19290
|
+
pm_interpolated_string_node_append((pm_interpolated_string_node_t *) current, part);
|
18726
19291
|
break;
|
18727
19292
|
}
|
18728
19293
|
default:
|
@@ -18798,6 +19363,14 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
|
|
18798
19363
|
pm_token_t opening = not_provided(parser);
|
18799
19364
|
pm_token_t closing = not_provided(parser);
|
18800
19365
|
pm_node_t *part = (pm_node_t *) pm_string_node_create_unescaped(parser, &opening, &parser->previous, &closing, &unescaped);
|
19366
|
+
|
19367
|
+
if (parser->encoding == PM_ENCODING_US_ASCII_ENTRY) {
|
19368
|
+
// This is extremely strange, but the first string part of a
|
19369
|
+
// regular expression will always be tagged as binary if we
|
19370
|
+
// are in a US-ASCII file, no matter its contents.
|
19371
|
+
pm_node_flag_set(part, PM_STRING_FLAGS_FORCED_BINARY_ENCODING);
|
19372
|
+
}
|
19373
|
+
|
18801
19374
|
pm_interpolated_regular_expression_node_append(interpolated, part);
|
18802
19375
|
} else {
|
18803
19376
|
// If the first part of the body of the regular expression is not a
|
@@ -18926,7 +19499,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
|
|
18926
19499
|
if (match1(parser, PM_TOKEN_COMMA)) {
|
18927
19500
|
return parse_targets_validate(parser, splat, PM_BINDING_POWER_INDEX);
|
18928
19501
|
} else {
|
18929
|
-
return parse_target_validate(parser, splat);
|
19502
|
+
return parse_target_validate(parser, splat, true);
|
18930
19503
|
}
|
18931
19504
|
}
|
18932
19505
|
case PM_TOKEN_BANG: {
|
@@ -19271,39 +19844,6 @@ parse_call_operator_write(pm_parser_t *parser, pm_call_node_t *call_node, const
|
|
19271
19844
|
}
|
19272
19845
|
}
|
19273
19846
|
|
19274
|
-
/**
|
19275
|
-
* Returns true if the name of the capture group is a valid local variable that
|
19276
|
-
* can be written to.
|
19277
|
-
*/
|
19278
|
-
static bool
|
19279
|
-
parse_regular_expression_named_capture(pm_parser_t *parser, const uint8_t *source, size_t length) {
|
19280
|
-
if (length == 0) {
|
19281
|
-
return false;
|
19282
|
-
}
|
19283
|
-
|
19284
|
-
// First ensure that it starts with a valid identifier starting character.
|
19285
|
-
size_t width = char_is_identifier_start(parser, source);
|
19286
|
-
if (!width) {
|
19287
|
-
return false;
|
19288
|
-
}
|
19289
|
-
|
19290
|
-
// Next, ensure that it's not an uppercase character.
|
19291
|
-
if (parser->encoding_changed) {
|
19292
|
-
if (parser->encoding->isupper_char(source, (ptrdiff_t) length)) return false;
|
19293
|
-
} else {
|
19294
|
-
if (pm_encoding_utf_8_isupper_char(source, (ptrdiff_t) length)) return false;
|
19295
|
-
}
|
19296
|
-
|
19297
|
-
// Next, iterate through all of the bytes of the string to ensure that they
|
19298
|
-
// are all valid identifier characters.
|
19299
|
-
const uint8_t *cursor = source + width;
|
19300
|
-
while (cursor < source + length && (width = char_is_identifier(parser, cursor))) {
|
19301
|
-
cursor += width;
|
19302
|
-
}
|
19303
|
-
|
19304
|
-
return cursor == source + length;
|
19305
|
-
}
|
19306
|
-
|
19307
19847
|
/**
|
19308
19848
|
* Potentially change a =~ with a regular expression with named captures into a
|
19309
19849
|
* match write node.
|
@@ -19330,7 +19870,7 @@ parse_regular_expression_named_captures(pm_parser_t *parser, const pm_string_t *
|
|
19330
19870
|
|
19331
19871
|
// If the name of the capture group isn't a valid identifier, we do
|
19332
19872
|
// not add it to the local table.
|
19333
|
-
if (!
|
19873
|
+
if (!pm_slice_is_valid_local(parser, source, source + length)) continue;
|
19334
19874
|
|
19335
19875
|
if (content->type == PM_STRING_SHARED) {
|
19336
19876
|
// If the unescaped string is a slice of the source, then we can
|
@@ -19788,7 +20328,7 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
|
|
19788
20328
|
// In this case we have an operator but we don't know what it's for.
|
19789
20329
|
// We need to treat it as an error. For now, we'll mark it as an error
|
19790
20330
|
// and just skip right past it.
|
19791
|
-
|
20331
|
+
PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->previous, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, pm_token_type_human(parser->current.type));
|
19792
20332
|
return node;
|
19793
20333
|
}
|
19794
20334
|
}
|
@@ -20059,8 +20599,7 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
|
|
20059
20599
|
path = (pm_node_t *) pm_call_node_call_create(parser, node, &delimiter, &message, &arguments);
|
20060
20600
|
} else {
|
20061
20601
|
// Otherwise, this is a constant path. That would look like Foo::Bar.
|
20062
|
-
|
20063
|
-
path = (pm_node_t *)pm_constant_path_node_create(parser, node, &delimiter, child);
|
20602
|
+
path = (pm_node_t *) pm_constant_path_node_create(parser, node, &delimiter, &parser->previous);
|
20064
20603
|
}
|
20065
20604
|
|
20066
20605
|
// If this is followed by a comma then it is a multiple assignment.
|
@@ -20099,9 +20638,8 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
|
|
20099
20638
|
return (pm_node_t *) pm_call_node_shorthand_create(parser, node, &delimiter, &arguments);
|
20100
20639
|
}
|
20101
20640
|
default: {
|
20102
|
-
|
20103
|
-
|
20104
|
-
return (pm_node_t *)pm_constant_path_node_create(parser, node, &delimiter, child);
|
20641
|
+
expect1(parser, PM_TOKEN_CONSTANT, PM_ERR_CONSTANT_PATH_COLON_COLON_CONSTANT);
|
20642
|
+
return (pm_node_t *) pm_constant_path_node_create(parser, node, &delimiter, &parser->previous);
|
20105
20643
|
}
|
20106
20644
|
}
|
20107
20645
|
}
|
@@ -20172,7 +20710,7 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
|
|
20172
20710
|
parser_lex(parser);
|
20173
20711
|
|
20174
20712
|
pm_constant_id_list_t captures = { 0 };
|
20175
|
-
pm_node_t *pattern = parse_pattern(parser, &captures,
|
20713
|
+
pm_node_t *pattern = parse_pattern(parser, &captures, PM_PARSE_PATTERN_TOP | PM_PARSE_PATTERN_MULTI, PM_ERR_PATTERN_EXPRESSION_AFTER_IN);
|
20176
20714
|
|
20177
20715
|
parser->pattern_matching_newlines = previous_pattern_matching_newlines;
|
20178
20716
|
pm_constant_id_list_free(&captures);
|
@@ -20189,7 +20727,7 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
|
|
20189
20727
|
parser_lex(parser);
|
20190
20728
|
|
20191
20729
|
pm_constant_id_list_t captures = { 0 };
|
20192
|
-
pm_node_t *pattern = parse_pattern(parser, &captures,
|
20730
|
+
pm_node_t *pattern = parse_pattern(parser, &captures, PM_PARSE_PATTERN_TOP | PM_PARSE_PATTERN_MULTI, PM_ERR_PATTERN_EXPRESSION_AFTER_HROCKET);
|
20193
20731
|
|
20194
20732
|
parser->pattern_matching_newlines = previous_pattern_matching_newlines;
|
20195
20733
|
pm_constant_id_list_free(&captures);
|
@@ -20202,6 +20740,10 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
|
|
20202
20740
|
}
|
20203
20741
|
}
|
20204
20742
|
|
20743
|
+
#undef PM_PARSE_PATTERN_SINGLE
|
20744
|
+
#undef PM_PARSE_PATTERN_TOP
|
20745
|
+
#undef PM_PARSE_PATTERN_MULTI
|
20746
|
+
|
20205
20747
|
/**
|
20206
20748
|
* Parse an expression at the given point of the parser using the given binding
|
20207
20749
|
* power to parse subsequent chains. If this function finds a syntax error, it
|
@@ -21246,25 +21788,28 @@ pm_parser_errors_format(const pm_parser_t *parser, const pm_list_t *error_list,
|
|
21246
21788
|
pm_buffer_append_string(buffer, error_format.blank_prefix, error_format.blank_prefix_length);
|
21247
21789
|
|
21248
21790
|
size_t column = 0;
|
21249
|
-
while (column < error->
|
21250
|
-
|
21251
|
-
pm_buffer_append_byte(buffer, ' ');
|
21252
|
-
} else {
|
21253
|
-
const uint8_t caret = column == error->column_start ? '^' : '~';
|
21791
|
+
while (column < error->column_start) {
|
21792
|
+
pm_buffer_append_byte(buffer, ' ');
|
21254
21793
|
|
21255
|
-
|
21256
|
-
|
21257
|
-
|
21258
|
-
|
21259
|
-
|
21260
|
-
|
21261
|
-
|
21262
|
-
|
21794
|
+
size_t char_width = encoding->char_width(start + column, parser->end - (start + column));
|
21795
|
+
column += (char_width == 0 ? 1 : char_width);
|
21796
|
+
}
|
21797
|
+
|
21798
|
+
if (colorize) pm_buffer_append_string(buffer, PM_COLOR_RED, 7);
|
21799
|
+
pm_buffer_append_byte(buffer, '^');
|
21800
|
+
|
21801
|
+
size_t char_width = encoding->char_width(start + column, parser->end - (start + column));
|
21802
|
+
column += (char_width == 0 ? 1 : char_width);
|
21803
|
+
|
21804
|
+
while (column < error->column_end) {
|
21805
|
+
pm_buffer_append_byte(buffer, '~');
|
21263
21806
|
|
21264
21807
|
size_t char_width = encoding->char_width(start + column, parser->end - (start + column));
|
21265
21808
|
column += (char_width == 0 ? 1 : char_width);
|
21266
21809
|
}
|
21267
21810
|
|
21811
|
+
if (colorize) pm_buffer_append_string(buffer, PM_COLOR_RESET, 3);
|
21812
|
+
|
21268
21813
|
if (inline_messages) {
|
21269
21814
|
pm_buffer_append_byte(buffer, ' ');
|
21270
21815
|
assert(error->error != NULL);
|