prism 0.27.0 → 0.29.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +45 -1
- data/config.yml +68 -44
- data/docs/configuration.md +1 -0
- data/ext/prism/api_node.c +854 -847
- data/ext/prism/extconf.rb +27 -23
- data/ext/prism/extension.c +5 -3
- data/ext/prism/extension.h +1 -1
- data/include/prism/ast.h +70 -48
- data/include/prism/diagnostic.h +23 -6
- data/include/prism/options.h +2 -2
- data/include/prism/parser.h +10 -0
- data/include/prism/static_literals.h +8 -6
- data/include/prism/version.h +2 -2
- data/lib/prism/desugar_compiler.rb +4 -4
- data/lib/prism/dot_visitor.rb +54 -38
- data/lib/prism/dsl.rb +24 -24
- data/lib/prism/ffi.rb +4 -4
- data/lib/prism/inspect_visitor.rb +2156 -0
- data/lib/prism/lex_compat.rb +1 -1
- data/lib/prism/mutation_compiler.rb +2 -2
- data/lib/prism/node.rb +737 -1863
- data/lib/prism/node_ext.rb +176 -5
- data/lib/prism/parse_result/comments.rb +1 -1
- data/lib/prism/parse_result/newlines.rb +1 -1
- data/lib/prism/parse_result.rb +78 -0
- data/lib/prism/pattern.rb +12 -6
- data/lib/prism/polyfill/byteindex.rb +13 -0
- data/lib/prism/polyfill/unpack1.rb +14 -0
- data/lib/prism/reflection.rb +20 -20
- data/lib/prism/serialize.rb +32 -15
- data/lib/prism/translation/parser/compiler.rb +156 -26
- data/lib/prism/translation/parser.rb +7 -7
- data/lib/prism/translation/ripper.rb +29 -25
- data/lib/prism/translation/ruby_parser.rb +13 -13
- data/lib/prism.rb +2 -1
- data/prism.gemspec +37 -38
- data/rbi/prism/compiler.rbi +3 -5
- data/rbi/prism/inspect_visitor.rbi +12 -0
- data/rbi/prism/node.rbi +405 -370
- data/rbi/prism/node_ext.rbi +5 -0
- data/rbi/prism/parse_result.rbi +23 -0
- data/rbi/prism/translation/ripper.rbi +1 -11
- data/sig/prism/dsl.rbs +12 -12
- data/sig/prism/inspect_visitor.rbs +22 -0
- data/sig/prism/lex_compat.rbs +10 -0
- data/sig/prism/node.rbs +108 -91
- data/sig/prism/node_ext.rbs +4 -0
- data/sig/prism/parse_result.rbs +12 -0
- data/src/diagnostic.c +66 -33
- data/src/node.c +89 -64
- data/src/options.c +2 -2
- data/src/prettyprint.c +109 -66
- data/src/prism.c +862 -317
- data/src/serialize.c +21 -18
- data/src/static_literals.c +120 -34
- data/src/token_type.c +6 -6
- metadata +8 -9
- data/lib/prism/node_inspector.rb +0 -68
- data/lib/prism/polyfill/string.rb +0 -12
- data/rbi/prism/desugar_compiler.rbi +0 -5
- data/rbi/prism/mutation_compiler.rbi +0 -5
- data/rbi/prism/translation/parser/compiler.rbi +0 -13
- data/rbi/prism/translation/ripper/ripper_compiler.rbi +0 -5
- data/rbi/prism/translation/ruby_parser.rbi +0 -11
data/src/prism.c
CHANGED
@@ -672,6 +672,26 @@ pm_parser_warn_node(pm_parser_t *parser, const pm_node_t *node, pm_diagnostic_id
|
|
672
672
|
#define PM_PARSER_WARN_NODE_FORMAT(parser, node, diag_id, ...) \
|
673
673
|
PM_PARSER_WARN_FORMAT(parser, (node)->location.start, (node)->location.end, diag_id, __VA_ARGS__)
|
674
674
|
|
675
|
+
/**
|
676
|
+
* Add an error for an expected heredoc terminator. This is a special function
|
677
|
+
* only because it grabs its location off of a lex mode instead of a node or a
|
678
|
+
* token.
|
679
|
+
*/
|
680
|
+
static void
|
681
|
+
pm_parser_err_heredoc_term(pm_parser_t *parser, pm_lex_mode_t *lex_mode) {
|
682
|
+
const uint8_t *ident_start = lex_mode->as.heredoc.ident_start;
|
683
|
+
size_t ident_length = lex_mode->as.heredoc.ident_length;
|
684
|
+
|
685
|
+
PM_PARSER_ERR_FORMAT(
|
686
|
+
parser,
|
687
|
+
ident_start,
|
688
|
+
ident_start + ident_length,
|
689
|
+
PM_ERR_HEREDOC_TERM,
|
690
|
+
(int) ident_length,
|
691
|
+
(const char *) ident_start
|
692
|
+
);
|
693
|
+
}
|
694
|
+
|
675
695
|
/******************************************************************************/
|
676
696
|
/* Scope-related functions */
|
677
697
|
/******************************************************************************/
|
@@ -729,42 +749,97 @@ pm_parser_scope_find(pm_parser_t *parser, uint32_t depth) {
|
|
729
749
|
return scope;
|
730
750
|
}
|
731
751
|
|
732
|
-
|
733
|
-
|
752
|
+
typedef enum {
|
753
|
+
PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_PASS,
|
754
|
+
PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_CONFLICT,
|
755
|
+
PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_FAIL
|
756
|
+
} pm_scope_forwarding_param_check_result_t;
|
757
|
+
|
758
|
+
static pm_scope_forwarding_param_check_result_t
|
759
|
+
pm_parser_scope_forwarding_param_check(pm_parser_t *parser, const uint8_t mask) {
|
734
760
|
pm_scope_t *scope = parser->current_scope;
|
735
|
-
|
761
|
+
bool conflict = false;
|
762
|
+
|
763
|
+
while (scope != NULL) {
|
736
764
|
if (scope->parameters & mask) {
|
737
|
-
if (
|
738
|
-
|
739
|
-
|
765
|
+
if (scope->closed) {
|
766
|
+
if (conflict) {
|
767
|
+
return PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_CONFLICT;
|
768
|
+
} else {
|
769
|
+
return PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_PASS;
|
770
|
+
}
|
740
771
|
}
|
741
|
-
|
772
|
+
|
773
|
+
conflict = true;
|
742
774
|
}
|
775
|
+
|
743
776
|
if (scope->closed) break;
|
744
777
|
scope = scope->previous;
|
745
778
|
}
|
746
779
|
|
747
|
-
|
780
|
+
return PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_FAIL;
|
748
781
|
}
|
749
782
|
|
750
|
-
static
|
783
|
+
static void
|
751
784
|
pm_parser_scope_forwarding_block_check(pm_parser_t *parser, const pm_token_t * token) {
|
752
|
-
pm_parser_scope_forwarding_param_check(parser,
|
785
|
+
switch (pm_parser_scope_forwarding_param_check(parser, PM_SCOPE_PARAMETERS_FORWARDING_BLOCK)) {
|
786
|
+
case PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_PASS:
|
787
|
+
// Pass.
|
788
|
+
break;
|
789
|
+
case PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_CONFLICT:
|
790
|
+
pm_parser_err_token(parser, token, PM_ERR_ARGUMENT_CONFLICT_AMPERSAND);
|
791
|
+
break;
|
792
|
+
case PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_FAIL:
|
793
|
+
pm_parser_err_token(parser, token, PM_ERR_ARGUMENT_NO_FORWARDING_AMPERSAND);
|
794
|
+
break;
|
795
|
+
}
|
753
796
|
}
|
754
797
|
|
755
|
-
static
|
798
|
+
static void
|
756
799
|
pm_parser_scope_forwarding_positionals_check(pm_parser_t *parser, const pm_token_t * token) {
|
757
|
-
pm_parser_scope_forwarding_param_check(parser,
|
800
|
+
switch (pm_parser_scope_forwarding_param_check(parser, PM_SCOPE_PARAMETERS_FORWARDING_POSITIONALS)) {
|
801
|
+
case PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_PASS:
|
802
|
+
// Pass.
|
803
|
+
break;
|
804
|
+
case PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_CONFLICT:
|
805
|
+
pm_parser_err_token(parser, token, PM_ERR_ARGUMENT_CONFLICT_STAR);
|
806
|
+
break;
|
807
|
+
case PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_FAIL:
|
808
|
+
pm_parser_err_token(parser, token, PM_ERR_ARGUMENT_NO_FORWARDING_STAR);
|
809
|
+
break;
|
810
|
+
}
|
758
811
|
}
|
759
812
|
|
760
|
-
static
|
761
|
-
pm_parser_scope_forwarding_all_check(pm_parser_t *parser, const pm_token_t *
|
762
|
-
pm_parser_scope_forwarding_param_check(parser,
|
813
|
+
static void
|
814
|
+
pm_parser_scope_forwarding_all_check(pm_parser_t *parser, const pm_token_t *token) {
|
815
|
+
switch (pm_parser_scope_forwarding_param_check(parser, PM_SCOPE_PARAMETERS_FORWARDING_ALL)) {
|
816
|
+
case PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_PASS:
|
817
|
+
// Pass.
|
818
|
+
break;
|
819
|
+
case PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_CONFLICT:
|
820
|
+
// This shouldn't happen, because ... is not allowed in the
|
821
|
+
// declaration of blocks. If we get here, we assume we already have
|
822
|
+
// an error for this.
|
823
|
+
break;
|
824
|
+
case PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_FAIL:
|
825
|
+
pm_parser_err_token(parser, token, PM_ERR_ARGUMENT_NO_FORWARDING_ELLIPSES);
|
826
|
+
break;
|
827
|
+
}
|
763
828
|
}
|
764
829
|
|
765
|
-
static
|
830
|
+
static void
|
766
831
|
pm_parser_scope_forwarding_keywords_check(pm_parser_t *parser, const pm_token_t * token) {
|
767
|
-
pm_parser_scope_forwarding_param_check(parser,
|
832
|
+
switch (pm_parser_scope_forwarding_param_check(parser, PM_SCOPE_PARAMETERS_FORWARDING_KEYWORDS)) {
|
833
|
+
case PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_PASS:
|
834
|
+
// Pass.
|
835
|
+
break;
|
836
|
+
case PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_CONFLICT:
|
837
|
+
pm_parser_err_token(parser, token, PM_ERR_ARGUMENT_CONFLICT_STAR_STAR);
|
838
|
+
break;
|
839
|
+
case PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_FAIL:
|
840
|
+
pm_parser_err_token(parser, token, PM_ERR_ARGUMENT_NO_FORWARDING_STAR_STAR);
|
841
|
+
break;
|
842
|
+
}
|
768
843
|
}
|
769
844
|
|
770
845
|
/**
|
@@ -1405,7 +1480,7 @@ pm_conditional_predicate_warn_write_literal_p(const pm_node_t *node) {
|
|
1405
1480
|
static inline void
|
1406
1481
|
pm_conditional_predicate_warn_write_literal(pm_parser_t *parser, const pm_node_t *node) {
|
1407
1482
|
if (pm_conditional_predicate_warn_write_literal_p(node)) {
|
1408
|
-
pm_parser_warn_node(parser, node, parser->version ==
|
1483
|
+
pm_parser_warn_node(parser, node, parser->version == PM_OPTIONS_VERSION_CRUBY_3_3 ? PM_WARN_EQUAL_IN_CONDITIONAL_3_3 : PM_WARN_EQUAL_IN_CONDITIONAL);
|
1409
1484
|
}
|
1410
1485
|
}
|
1411
1486
|
|
@@ -1683,7 +1758,7 @@ char_is_identifier_utf8(const uint8_t *b, const uint8_t *end) {
|
|
1683
1758
|
* it's important that it be as fast as possible.
|
1684
1759
|
*/
|
1685
1760
|
static inline size_t
|
1686
|
-
char_is_identifier(pm_parser_t *parser, const uint8_t *b) {
|
1761
|
+
char_is_identifier(const pm_parser_t *parser, const uint8_t *b) {
|
1687
1762
|
if (parser->encoding_changed) {
|
1688
1763
|
size_t width;
|
1689
1764
|
if ((width = parser->encoding->alnum_char(b, parser->end - b)) != 0) {
|
@@ -2923,6 +2998,29 @@ pm_call_and_write_node_create(pm_parser_t *parser, pm_call_node_t *target, const
|
|
2923
2998
|
return node;
|
2924
2999
|
}
|
2925
3000
|
|
3001
|
+
/**
|
3002
|
+
* Validate that index expressions do not have keywords or blocks if we are
|
3003
|
+
* parsing as Ruby 3.4+.
|
3004
|
+
*/
|
3005
|
+
static void
|
3006
|
+
pm_index_arguments_check(pm_parser_t *parser, const pm_arguments_node_t *arguments, const pm_node_t *block) {
|
3007
|
+
if (parser->version != PM_OPTIONS_VERSION_CRUBY_3_3) {
|
3008
|
+
if (arguments != NULL && PM_NODE_FLAG_P(arguments, PM_ARGUMENTS_NODE_FLAGS_CONTAINS_KEYWORDS)) {
|
3009
|
+
pm_node_t *node;
|
3010
|
+
PM_NODE_LIST_FOREACH(&arguments->arguments, index, node) {
|
3011
|
+
if (PM_NODE_TYPE_P(node, PM_KEYWORD_HASH_NODE)) {
|
3012
|
+
pm_parser_err_node(parser, node, PM_ERR_UNEXPECTED_INDEX_KEYWORDS);
|
3013
|
+
break;
|
3014
|
+
}
|
3015
|
+
}
|
3016
|
+
}
|
3017
|
+
|
3018
|
+
if (block != NULL) {
|
3019
|
+
pm_parser_err_node(parser, block, PM_ERR_UNEXPECTED_INDEX_BLOCK);
|
3020
|
+
}
|
3021
|
+
}
|
3022
|
+
}
|
3023
|
+
|
2926
3024
|
/**
|
2927
3025
|
* Allocate and initialize a new IndexAndWriteNode node.
|
2928
3026
|
*/
|
@@ -2931,6 +3029,8 @@ pm_index_and_write_node_create(pm_parser_t *parser, pm_call_node_t *target, cons
|
|
2931
3029
|
assert(operator->type == PM_TOKEN_AMPERSAND_AMPERSAND_EQUAL);
|
2932
3030
|
pm_index_and_write_node_t *node = PM_ALLOC_NODE(parser, pm_index_and_write_node_t);
|
2933
3031
|
|
3032
|
+
pm_index_arguments_check(parser, target->arguments, target->block);
|
3033
|
+
|
2934
3034
|
*node = (pm_index_and_write_node_t) {
|
2935
3035
|
{
|
2936
3036
|
.type = PM_INDEX_AND_WRITE_NODE,
|
@@ -2980,8 +3080,8 @@ pm_call_operator_write_node_create(pm_parser_t *parser, pm_call_node_t *target,
|
|
2980
3080
|
.message_loc = target->message_loc,
|
2981
3081
|
.read_name = 0,
|
2982
3082
|
.write_name = target->name,
|
2983
|
-
.
|
2984
|
-
.
|
3083
|
+
.binary_operator = pm_parser_constant_id_location(parser, operator->start, operator->end - 1),
|
3084
|
+
.binary_operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
|
2985
3085
|
.value = value
|
2986
3086
|
};
|
2987
3087
|
|
@@ -3002,6 +3102,8 @@ static pm_index_operator_write_node_t *
|
|
3002
3102
|
pm_index_operator_write_node_create(pm_parser_t *parser, pm_call_node_t *target, const pm_token_t *operator, pm_node_t *value) {
|
3003
3103
|
pm_index_operator_write_node_t *node = PM_ALLOC_NODE(parser, pm_index_operator_write_node_t);
|
3004
3104
|
|
3105
|
+
pm_index_arguments_check(parser, target->arguments, target->block);
|
3106
|
+
|
3005
3107
|
*node = (pm_index_operator_write_node_t) {
|
3006
3108
|
{
|
3007
3109
|
.type = PM_INDEX_OPERATOR_WRITE_NODE,
|
@@ -3017,8 +3119,8 @@ pm_index_operator_write_node_create(pm_parser_t *parser, pm_call_node_t *target,
|
|
3017
3119
|
.arguments = target->arguments,
|
3018
3120
|
.closing_loc = target->closing_loc,
|
3019
3121
|
.block = target->block,
|
3020
|
-
.
|
3021
|
-
.
|
3122
|
+
.binary_operator = pm_parser_constant_id_location(parser, operator->start, operator->end - 1),
|
3123
|
+
.binary_operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
|
3022
3124
|
.value = value
|
3023
3125
|
};
|
3024
3126
|
|
@@ -3075,6 +3177,8 @@ pm_index_or_write_node_create(pm_parser_t *parser, pm_call_node_t *target, const
|
|
3075
3177
|
assert(operator->type == PM_TOKEN_PIPE_PIPE_EQUAL);
|
3076
3178
|
pm_index_or_write_node_t *node = PM_ALLOC_NODE(parser, pm_index_or_write_node_t);
|
3077
3179
|
|
3180
|
+
pm_index_arguments_check(parser, target->arguments, target->block);
|
3181
|
+
|
3078
3182
|
*node = (pm_index_or_write_node_t) {
|
3079
3183
|
{
|
3080
3184
|
.type = PM_INDEX_OR_WRITE_NODE,
|
@@ -3139,6 +3243,8 @@ pm_index_target_node_create(pm_parser_t *parser, pm_call_node_t *target) {
|
|
3139
3243
|
pm_index_target_node_t *node = PM_ALLOC_NODE(parser, pm_index_target_node_t);
|
3140
3244
|
pm_node_flags_t flags = target->base.flags;
|
3141
3245
|
|
3246
|
+
pm_index_arguments_check(parser, target->arguments, target->block);
|
3247
|
+
|
3142
3248
|
*node = (pm_index_target_node_t) {
|
3143
3249
|
{
|
3144
3250
|
.type = PM_INDEX_TARGET_NODE,
|
@@ -3358,9 +3464,9 @@ pm_class_variable_operator_write_node_create(pm_parser_t *parser, pm_class_varia
|
|
3358
3464
|
},
|
3359
3465
|
.name = target->name,
|
3360
3466
|
.name_loc = target->base.location,
|
3361
|
-
.
|
3467
|
+
.binary_operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
|
3362
3468
|
.value = value,
|
3363
|
-
.
|
3469
|
+
.binary_operator = pm_parser_constant_id_location(parser, operator->start, operator->end - 1)
|
3364
3470
|
};
|
3365
3471
|
|
3366
3472
|
return node;
|
@@ -3474,9 +3580,9 @@ pm_constant_path_operator_write_node_create(pm_parser_t *parser, pm_constant_pat
|
|
3474
3580
|
}
|
3475
3581
|
},
|
3476
3582
|
.target = target,
|
3477
|
-
.
|
3583
|
+
.binary_operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
|
3478
3584
|
.value = value,
|
3479
|
-
.
|
3585
|
+
.binary_operator = pm_parser_constant_id_location(parser, operator->start, operator->end - 1)
|
3480
3586
|
};
|
3481
3587
|
|
3482
3588
|
return node;
|
@@ -3510,22 +3616,27 @@ pm_constant_path_or_write_node_create(pm_parser_t *parser, pm_constant_path_node
|
|
3510
3616
|
* Allocate and initialize a new ConstantPathNode node.
|
3511
3617
|
*/
|
3512
3618
|
static pm_constant_path_node_t *
|
3513
|
-
pm_constant_path_node_create(pm_parser_t *parser, pm_node_t *parent, const pm_token_t *delimiter,
|
3619
|
+
pm_constant_path_node_create(pm_parser_t *parser, pm_node_t *parent, const pm_token_t *delimiter, const pm_token_t *name_token) {
|
3514
3620
|
pm_assert_value_expression(parser, parent);
|
3515
|
-
|
3516
3621
|
pm_constant_path_node_t *node = PM_ALLOC_NODE(parser, pm_constant_path_node_t);
|
3517
3622
|
|
3623
|
+
pm_constant_id_t name = PM_CONSTANT_ID_UNSET;
|
3624
|
+
if (name_token->type == PM_TOKEN_CONSTANT) {
|
3625
|
+
name = pm_parser_constant_id_token(parser, name_token);
|
3626
|
+
}
|
3627
|
+
|
3518
3628
|
*node = (pm_constant_path_node_t) {
|
3519
3629
|
{
|
3520
3630
|
.type = PM_CONSTANT_PATH_NODE,
|
3521
3631
|
.location = {
|
3522
3632
|
.start = parent == NULL ? delimiter->start : parent->location.start,
|
3523
|
-
.end =
|
3633
|
+
.end = name_token->end
|
3524
3634
|
},
|
3525
3635
|
},
|
3526
3636
|
.parent = parent,
|
3527
|
-
.
|
3528
|
-
.delimiter_loc = PM_LOCATION_TOKEN_VALUE(delimiter)
|
3637
|
+
.name = name,
|
3638
|
+
.delimiter_loc = PM_LOCATION_TOKEN_VALUE(delimiter),
|
3639
|
+
.name_loc = PM_LOCATION_TOKEN_VALUE(name_token)
|
3529
3640
|
};
|
3530
3641
|
|
3531
3642
|
return node;
|
@@ -3596,9 +3707,9 @@ pm_constant_operator_write_node_create(pm_parser_t *parser, pm_constant_read_nod
|
|
3596
3707
|
},
|
3597
3708
|
.name = target->name,
|
3598
3709
|
.name_loc = target->base.location,
|
3599
|
-
.
|
3710
|
+
.binary_operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
|
3600
3711
|
.value = value,
|
3601
|
-
.
|
3712
|
+
.binary_operator = pm_parser_constant_id_location(parser, operator->start, operator->end - 1)
|
3602
3713
|
};
|
3603
3714
|
|
3604
3715
|
return node;
|
@@ -3716,6 +3827,113 @@ pm_def_node_receiver_check(pm_parser_t *parser, const pm_node_t *node) {
|
|
3716
3827
|
}
|
3717
3828
|
}
|
3718
3829
|
|
3830
|
+
/**
|
3831
|
+
* When a method body is created, we want to check if the last statement is a
|
3832
|
+
* return or a statement that houses a return. If it is, then we want to mark
|
3833
|
+
* that return as being redundant so that we can compile it differently but also
|
3834
|
+
* so that we can indicate that to the user.
|
3835
|
+
*/
|
3836
|
+
static void
|
3837
|
+
pm_def_node_body_redundant_return(pm_node_t *node) {
|
3838
|
+
switch (PM_NODE_TYPE(node)) {
|
3839
|
+
case PM_RETURN_NODE:
|
3840
|
+
node->flags |= PM_RETURN_NODE_FLAGS_REDUNDANT;
|
3841
|
+
break;
|
3842
|
+
case PM_BEGIN_NODE: {
|
3843
|
+
pm_begin_node_t *cast = (pm_begin_node_t *) node;
|
3844
|
+
|
3845
|
+
if (cast->statements != NULL && cast->else_clause == NULL) {
|
3846
|
+
pm_def_node_body_redundant_return((pm_node_t *) cast->statements);
|
3847
|
+
}
|
3848
|
+
break;
|
3849
|
+
}
|
3850
|
+
case PM_STATEMENTS_NODE: {
|
3851
|
+
pm_statements_node_t *cast = (pm_statements_node_t *) node;
|
3852
|
+
|
3853
|
+
if (cast->body.size > 0) {
|
3854
|
+
pm_def_node_body_redundant_return(cast->body.nodes[cast->body.size - 1]);
|
3855
|
+
}
|
3856
|
+
break;
|
3857
|
+
}
|
3858
|
+
case PM_IF_NODE: {
|
3859
|
+
pm_if_node_t *cast = (pm_if_node_t *) node;
|
3860
|
+
|
3861
|
+
if (cast->statements != NULL) {
|
3862
|
+
pm_def_node_body_redundant_return((pm_node_t *) cast->statements);
|
3863
|
+
}
|
3864
|
+
|
3865
|
+
if (cast->consequent != NULL) {
|
3866
|
+
pm_def_node_body_redundant_return(cast->consequent);
|
3867
|
+
}
|
3868
|
+
break;
|
3869
|
+
}
|
3870
|
+
case PM_UNLESS_NODE: {
|
3871
|
+
pm_unless_node_t *cast = (pm_unless_node_t *) node;
|
3872
|
+
|
3873
|
+
if (cast->statements != NULL) {
|
3874
|
+
pm_def_node_body_redundant_return((pm_node_t *) cast->statements);
|
3875
|
+
}
|
3876
|
+
|
3877
|
+
if (cast->consequent != NULL) {
|
3878
|
+
pm_def_node_body_redundant_return((pm_node_t *) cast->consequent);
|
3879
|
+
}
|
3880
|
+
break;
|
3881
|
+
}
|
3882
|
+
case PM_ELSE_NODE: {
|
3883
|
+
pm_else_node_t *cast = (pm_else_node_t *) node;
|
3884
|
+
|
3885
|
+
if (cast->statements != NULL) {
|
3886
|
+
pm_def_node_body_redundant_return((pm_node_t *) cast->statements);
|
3887
|
+
}
|
3888
|
+
break;
|
3889
|
+
}
|
3890
|
+
case PM_CASE_NODE: {
|
3891
|
+
pm_case_node_t *cast = (pm_case_node_t *) node;
|
3892
|
+
pm_node_t *condition;
|
3893
|
+
|
3894
|
+
PM_NODE_LIST_FOREACH(&cast->conditions, index, condition) {
|
3895
|
+
pm_def_node_body_redundant_return(condition);
|
3896
|
+
}
|
3897
|
+
|
3898
|
+
if (cast->consequent != NULL) {
|
3899
|
+
pm_def_node_body_redundant_return((pm_node_t *) cast->consequent);
|
3900
|
+
}
|
3901
|
+
break;
|
3902
|
+
}
|
3903
|
+
case PM_WHEN_NODE: {
|
3904
|
+
pm_when_node_t *cast = (pm_when_node_t *) node;
|
3905
|
+
|
3906
|
+
if (cast->statements != NULL) {
|
3907
|
+
pm_def_node_body_redundant_return((pm_node_t *) cast->statements);
|
3908
|
+
}
|
3909
|
+
break;
|
3910
|
+
}
|
3911
|
+
case PM_CASE_MATCH_NODE: {
|
3912
|
+
pm_case_match_node_t *cast = (pm_case_match_node_t *) node;
|
3913
|
+
pm_node_t *condition;
|
3914
|
+
|
3915
|
+
PM_NODE_LIST_FOREACH(&cast->conditions, index, condition) {
|
3916
|
+
pm_def_node_body_redundant_return(condition);
|
3917
|
+
}
|
3918
|
+
|
3919
|
+
if (cast->consequent != NULL) {
|
3920
|
+
pm_def_node_body_redundant_return((pm_node_t *) cast->consequent);
|
3921
|
+
}
|
3922
|
+
break;
|
3923
|
+
}
|
3924
|
+
case PM_IN_NODE: {
|
3925
|
+
pm_in_node_t *cast = (pm_in_node_t *) node;
|
3926
|
+
|
3927
|
+
if (cast->statements != NULL) {
|
3928
|
+
pm_def_node_body_redundant_return((pm_node_t *) cast->statements);
|
3929
|
+
}
|
3930
|
+
break;
|
3931
|
+
}
|
3932
|
+
default:
|
3933
|
+
break;
|
3934
|
+
}
|
3935
|
+
}
|
3936
|
+
|
3719
3937
|
/**
|
3720
3938
|
* Allocate and initialize a new DefNode node.
|
3721
3939
|
*/
|
@@ -3748,6 +3966,10 @@ pm_def_node_create(
|
|
3748
3966
|
pm_def_node_receiver_check(parser, receiver);
|
3749
3967
|
}
|
3750
3968
|
|
3969
|
+
if (body != NULL) {
|
3970
|
+
pm_def_node_body_redundant_return(body);
|
3971
|
+
}
|
3972
|
+
|
3751
3973
|
*node = (pm_def_node_t) {
|
3752
3974
|
{
|
3753
3975
|
.type = PM_DEF_NODE,
|
@@ -4338,9 +4560,9 @@ pm_global_variable_operator_write_node_create(pm_parser_t *parser, pm_node_t *ta
|
|
4338
4560
|
},
|
4339
4561
|
.name = pm_global_variable_write_name(parser, target),
|
4340
4562
|
.name_loc = target->location,
|
4341
|
-
.
|
4563
|
+
.binary_operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
|
4342
4564
|
.value = value,
|
4343
|
-
.
|
4565
|
+
.binary_operator = pm_parser_constant_id_location(parser, operator->start, operator->end - 1)
|
4344
4566
|
};
|
4345
4567
|
|
4346
4568
|
return node;
|
@@ -4846,9 +5068,9 @@ pm_instance_variable_operator_write_node_create(pm_parser_t *parser, pm_instance
|
|
4846
5068
|
},
|
4847
5069
|
.name = target->name,
|
4848
5070
|
.name_loc = target->base.location,
|
4849
|
-
.
|
5071
|
+
.binary_operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
|
4850
5072
|
.value = value,
|
4851
|
-
.
|
5073
|
+
.binary_operator = pm_parser_constant_id_location(parser, operator->start, operator->end - 1)
|
4852
5074
|
};
|
4853
5075
|
|
4854
5076
|
return node;
|
@@ -4922,6 +5144,50 @@ pm_instance_variable_write_node_create(pm_parser_t *parser, pm_instance_variable
|
|
4922
5144
|
return node;
|
4923
5145
|
}
|
4924
5146
|
|
5147
|
+
/**
|
5148
|
+
* Append a part into a list of string parts. Importantly this handles nested
|
5149
|
+
* interpolated strings by not necessarily removing the marker for static
|
5150
|
+
* literals.
|
5151
|
+
*/
|
5152
|
+
static void
|
5153
|
+
pm_interpolated_node_append(pm_node_t *node, pm_node_list_t *parts, pm_node_t *part) {
|
5154
|
+
switch (PM_NODE_TYPE(part)) {
|
5155
|
+
case PM_STRING_NODE:
|
5156
|
+
pm_node_flag_set(part, PM_NODE_FLAG_STATIC_LITERAL | PM_STRING_FLAGS_FROZEN);
|
5157
|
+
break;
|
5158
|
+
case PM_EMBEDDED_STATEMENTS_NODE: {
|
5159
|
+
pm_embedded_statements_node_t *cast = (pm_embedded_statements_node_t *) part;
|
5160
|
+
pm_node_t *embedded = (cast->statements != NULL && cast->statements->body.size == 1) ? cast->statements->body.nodes[0] : NULL;
|
5161
|
+
|
5162
|
+
if (embedded == NULL) {
|
5163
|
+
// If there are no statements or more than one statement, then
|
5164
|
+
// we lose the static literal flag.
|
5165
|
+
pm_node_flag_unset(node, PM_NODE_FLAG_STATIC_LITERAL);
|
5166
|
+
} else if (PM_NODE_TYPE_P(embedded, PM_STRING_NODE)) {
|
5167
|
+
// If the embedded statement is a string, then we can keep the
|
5168
|
+
// static literal flag and mark the string as frozen.
|
5169
|
+
pm_node_flag_set(embedded, PM_NODE_FLAG_STATIC_LITERAL | PM_STRING_FLAGS_FROZEN);
|
5170
|
+
} else if (PM_NODE_TYPE_P(embedded, PM_INTERPOLATED_STRING_NODE) && PM_NODE_FLAG_P(embedded, PM_NODE_FLAG_STATIC_LITERAL)) {
|
5171
|
+
// If the embedded statement is an interpolated string and it's
|
5172
|
+
// a static literal, then we can keep the static literal flag.
|
5173
|
+
} else {
|
5174
|
+
// Otherwise we lose the static literal flag.
|
5175
|
+
pm_node_flag_unset(node, PM_NODE_FLAG_STATIC_LITERAL);
|
5176
|
+
}
|
5177
|
+
|
5178
|
+
break;
|
5179
|
+
}
|
5180
|
+
case PM_EMBEDDED_VARIABLE_NODE:
|
5181
|
+
pm_node_flag_unset((pm_node_t *) node, PM_NODE_FLAG_STATIC_LITERAL);
|
5182
|
+
break;
|
5183
|
+
default:
|
5184
|
+
assert(false && "unexpected node type");
|
5185
|
+
break;
|
5186
|
+
}
|
5187
|
+
|
5188
|
+
pm_node_list_append(parts, part);
|
5189
|
+
}
|
5190
|
+
|
4925
5191
|
/**
|
4926
5192
|
* Allocate a new InterpolatedRegularExpressionNode node.
|
4927
5193
|
*/
|
@@ -4955,54 +5221,113 @@ pm_interpolated_regular_expression_node_append(pm_interpolated_regular_expressio
|
|
4955
5221
|
node->base.location.end = part->location.end;
|
4956
5222
|
}
|
4957
5223
|
|
4958
|
-
|
4959
|
-
pm_node_flag_set(part, PM_NODE_FLAG_STATIC_LITERAL | PM_STRING_FLAGS_FROZEN);
|
4960
|
-
}
|
4961
|
-
|
4962
|
-
if (!PM_NODE_FLAG_P(part, PM_NODE_FLAG_STATIC_LITERAL)) {
|
4963
|
-
pm_node_flag_unset((pm_node_t *) node, PM_NODE_FLAG_STATIC_LITERAL);
|
4964
|
-
}
|
4965
|
-
|
4966
|
-
pm_node_list_append(&node->parts, part);
|
5224
|
+
pm_interpolated_node_append((pm_node_t *) node, &node->parts, part);
|
4967
5225
|
}
|
4968
5226
|
|
4969
5227
|
static inline void
|
4970
5228
|
pm_interpolated_regular_expression_node_closing_set(pm_parser_t *parser, pm_interpolated_regular_expression_node_t *node, const pm_token_t *closing) {
|
4971
5229
|
node->closing_loc = PM_LOCATION_TOKEN_VALUE(closing);
|
4972
5230
|
node->base.location.end = closing->end;
|
4973
|
-
pm_node_flag_set((pm_node_t *)node, pm_regular_expression_flags_create(parser, closing));
|
5231
|
+
pm_node_flag_set((pm_node_t *) node, pm_regular_expression_flags_create(parser, closing));
|
4974
5232
|
}
|
4975
5233
|
|
4976
5234
|
/**
|
4977
5235
|
* Append a part to an InterpolatedStringNode node.
|
5236
|
+
*
|
5237
|
+
* This has some somewhat complicated semantics, because we need to update
|
5238
|
+
* multiple flags that have somewhat confusing interactions.
|
5239
|
+
*
|
5240
|
+
* PM_NODE_FLAG_STATIC_LITERAL indicates that the node should be treated as a
|
5241
|
+
* single static literal string that can be pushed onto the stack on its own.
|
5242
|
+
* Note that this doesn't necessarily mean that the string will be frozen or
|
5243
|
+
* not; the instructions in CRuby will be either putobject or putstring,
|
5244
|
+
* depending on the combination of `--enable-frozen-string-literal`,
|
5245
|
+
* `# frozen_string_literal: true`, and whether or not there is interpolation.
|
5246
|
+
*
|
5247
|
+
* PM_INTERPOLATED_STRING_NODE_FLAGS_FROZEN indicates that the string should be
|
5248
|
+
* explicitly frozen. This will only happen if the string is comprised entirely
|
5249
|
+
* of string parts that are themselves static literals and frozen.
|
5250
|
+
*
|
5251
|
+
* PM_INTERPOLATED_STRING_NODE_FLAGS_MUTABLE indicates that the string should
|
5252
|
+
* be explicitly marked as mutable. This will happen from
|
5253
|
+
* `--disable-frozen-string-literal` or `# frozen_string_literal: false`. This
|
5254
|
+
* is necessary to indicate that the string should be left up to the runtime,
|
5255
|
+
* which could potentially use a chilled string otherwise.
|
4978
5256
|
*/
|
4979
5257
|
static inline void
|
4980
|
-
pm_interpolated_string_node_append(
|
5258
|
+
pm_interpolated_string_node_append(pm_interpolated_string_node_t *node, pm_node_t *part) {
|
5259
|
+
#define CLEAR_FLAGS(node) \
|
5260
|
+
node->base.flags = (pm_node_flags_t) (node->base.flags & ~(PM_NODE_FLAG_STATIC_LITERAL | PM_INTERPOLATED_STRING_NODE_FLAGS_FROZEN | PM_INTERPOLATED_STRING_NODE_FLAGS_MUTABLE))
|
5261
|
+
|
5262
|
+
#define MUTABLE_FLAGS(node) \
|
5263
|
+
node->base.flags = (pm_node_flags_t) ((node->base.flags | PM_INTERPOLATED_STRING_NODE_FLAGS_MUTABLE) & ~PM_INTERPOLATED_STRING_NODE_FLAGS_FROZEN);
|
5264
|
+
|
4981
5265
|
if (node->parts.size == 0 && node->opening_loc.start == NULL) {
|
4982
5266
|
node->base.location.start = part->location.start;
|
4983
5267
|
}
|
4984
5268
|
|
4985
|
-
|
4986
|
-
|
4987
|
-
|
5269
|
+
node->base.location.end = MAX(node->base.location.end, part->location.end);
|
5270
|
+
|
5271
|
+
switch (PM_NODE_TYPE(part)) {
|
5272
|
+
case PM_STRING_NODE:
|
5273
|
+
pm_node_flag_set(part, PM_NODE_FLAG_STATIC_LITERAL | PM_STRING_FLAGS_FROZEN);
|
5274
|
+
break;
|
5275
|
+
case PM_INTERPOLATED_STRING_NODE:
|
5276
|
+
if (PM_NODE_FLAG_P(part, PM_NODE_FLAG_STATIC_LITERAL)) {
|
5277
|
+
// If the string that we're concatenating is a static literal,
|
5278
|
+
// then we can keep the static literal flag for this string.
|
5279
|
+
} else {
|
5280
|
+
// Otherwise, we lose the static literal flag here and we should
|
5281
|
+
// also clear the mutability flags.
|
5282
|
+
CLEAR_FLAGS(node);
|
5283
|
+
}
|
5284
|
+
break;
|
5285
|
+
case PM_EMBEDDED_STATEMENTS_NODE: {
|
5286
|
+
pm_embedded_statements_node_t *cast = (pm_embedded_statements_node_t *) part;
|
5287
|
+
pm_node_t *embedded = (cast->statements != NULL && cast->statements->body.size == 1) ? cast->statements->body.nodes[0] : NULL;
|
5288
|
+
|
5289
|
+
if (embedded == NULL) {
|
5290
|
+
// If we're embedding multiple statements or no statements, then
|
5291
|
+
// the string is not longer a static literal.
|
5292
|
+
CLEAR_FLAGS(node);
|
5293
|
+
} else if (PM_NODE_TYPE_P(embedded, PM_STRING_NODE)) {
|
5294
|
+
// If the embedded statement is a string, then we can make that
|
5295
|
+
// string as frozen and static literal, and not touch the static
|
5296
|
+
// literal status of this string.
|
5297
|
+
pm_node_flag_set(embedded, PM_NODE_FLAG_STATIC_LITERAL | PM_STRING_FLAGS_FROZEN);
|
5298
|
+
|
5299
|
+
if (PM_NODE_FLAG_P(node, PM_NODE_FLAG_STATIC_LITERAL)) {
|
5300
|
+
MUTABLE_FLAGS(node);
|
5301
|
+
}
|
5302
|
+
} else if (PM_NODE_TYPE_P(embedded, PM_INTERPOLATED_STRING_NODE) && PM_NODE_FLAG_P(embedded, PM_NODE_FLAG_STATIC_LITERAL)) {
|
5303
|
+
// If the embedded statement is an interpolated string, but that
|
5304
|
+
// string is marked as static literal, then we can keep our
|
5305
|
+
// static literal status for this string.
|
5306
|
+
if (PM_NODE_FLAG_P(node, PM_NODE_FLAG_STATIC_LITERAL)) {
|
5307
|
+
MUTABLE_FLAGS(node);
|
5308
|
+
}
|
5309
|
+
} else {
|
5310
|
+
// In all other cases, we lose the static literal flag here and
|
5311
|
+
// become mutable.
|
5312
|
+
CLEAR_FLAGS(node);
|
5313
|
+
}
|
4988
5314
|
|
4989
|
-
|
4990
|
-
|
5315
|
+
break;
|
5316
|
+
}
|
5317
|
+
case PM_EMBEDDED_VARIABLE_NODE:
|
5318
|
+
// Embedded variables clear static literal, which means we also
|
5319
|
+
// should clear the mutability flags.
|
5320
|
+
CLEAR_FLAGS(node);
|
5321
|
+
break;
|
5322
|
+
default:
|
5323
|
+
assert(false && "unexpected node type");
|
5324
|
+
break;
|
4991
5325
|
}
|
4992
5326
|
|
4993
5327
|
pm_node_list_append(&node->parts, part);
|
4994
|
-
node->base.location.end = MAX(node->base.location.end, part->location.end);
|
4995
5328
|
|
4996
|
-
|
4997
|
-
|
4998
|
-
case PM_OPTIONS_FROZEN_STRING_LITERAL_DISABLED:
|
4999
|
-
pm_node_flag_set((pm_node_t *) node, PM_INTERPOLATED_STRING_NODE_FLAGS_MUTABLE);
|
5000
|
-
break;
|
5001
|
-
case PM_OPTIONS_FROZEN_STRING_LITERAL_ENABLED:
|
5002
|
-
pm_node_flag_set((pm_node_t *) node, PM_INTERPOLATED_STRING_NODE_FLAGS_FROZEN);
|
5003
|
-
break;
|
5004
|
-
}
|
5005
|
-
}
|
5329
|
+
#undef CLEAR_FLAGS
|
5330
|
+
#undef MUTABLE_FLAGS
|
5006
5331
|
}
|
5007
5332
|
|
5008
5333
|
/**
|
@@ -5011,11 +5336,21 @@ pm_interpolated_string_node_append(pm_parser_t *parser, pm_interpolated_string_n
|
|
5011
5336
|
static pm_interpolated_string_node_t *
|
5012
5337
|
pm_interpolated_string_node_create(pm_parser_t *parser, const pm_token_t *opening, const pm_node_list_t *parts, const pm_token_t *closing) {
|
5013
5338
|
pm_interpolated_string_node_t *node = PM_ALLOC_NODE(parser, pm_interpolated_string_node_t);
|
5339
|
+
pm_node_flags_t flags = PM_NODE_FLAG_STATIC_LITERAL;
|
5340
|
+
|
5341
|
+
switch (parser->frozen_string_literal) {
|
5342
|
+
case PM_OPTIONS_FROZEN_STRING_LITERAL_DISABLED:
|
5343
|
+
flags |= PM_INTERPOLATED_STRING_NODE_FLAGS_MUTABLE;
|
5344
|
+
break;
|
5345
|
+
case PM_OPTIONS_FROZEN_STRING_LITERAL_ENABLED:
|
5346
|
+
flags |= PM_INTERPOLATED_STRING_NODE_FLAGS_FROZEN;
|
5347
|
+
break;
|
5348
|
+
}
|
5014
5349
|
|
5015
5350
|
*node = (pm_interpolated_string_node_t) {
|
5016
5351
|
{
|
5017
5352
|
.type = PM_INTERPOLATED_STRING_NODE,
|
5018
|
-
.flags =
|
5353
|
+
.flags = flags,
|
5019
5354
|
.location = {
|
5020
5355
|
.start = opening->start,
|
5021
5356
|
.end = closing->end,
|
@@ -5029,7 +5364,7 @@ pm_interpolated_string_node_create(pm_parser_t *parser, const pm_token_t *openin
|
|
5029
5364
|
if (parts != NULL) {
|
5030
5365
|
pm_node_t *part;
|
5031
5366
|
PM_NODE_LIST_FOREACH(parts, index, part) {
|
5032
|
-
pm_interpolated_string_node_append(
|
5367
|
+
pm_interpolated_string_node_append(node, part);
|
5033
5368
|
}
|
5034
5369
|
}
|
5035
5370
|
|
@@ -5051,15 +5386,7 @@ pm_interpolated_symbol_node_append(pm_interpolated_symbol_node_t *node, pm_node_
|
|
5051
5386
|
node->base.location.start = part->location.start;
|
5052
5387
|
}
|
5053
5388
|
|
5054
|
-
|
5055
|
-
pm_node_flag_set(part, PM_NODE_FLAG_STATIC_LITERAL | PM_STRING_FLAGS_FROZEN);
|
5056
|
-
}
|
5057
|
-
|
5058
|
-
if (!PM_NODE_FLAG_P(part, PM_NODE_FLAG_STATIC_LITERAL)) {
|
5059
|
-
pm_node_flag_unset((pm_node_t *) node, PM_NODE_FLAG_STATIC_LITERAL);
|
5060
|
-
}
|
5061
|
-
|
5062
|
-
pm_node_list_append(&node->parts, part);
|
5389
|
+
pm_interpolated_node_append((pm_node_t *) node, &node->parts, part);
|
5063
5390
|
node->base.location.end = MAX(node->base.location.end, part->location.end);
|
5064
5391
|
}
|
5065
5392
|
|
@@ -5125,11 +5452,7 @@ pm_interpolated_xstring_node_create(pm_parser_t *parser, const pm_token_t *openi
|
|
5125
5452
|
|
5126
5453
|
static inline void
|
5127
5454
|
pm_interpolated_xstring_node_append(pm_interpolated_x_string_node_t *node, pm_node_t *part) {
|
5128
|
-
|
5129
|
-
pm_node_flag_set(part, PM_NODE_FLAG_STATIC_LITERAL | PM_STRING_FLAGS_FROZEN);
|
5130
|
-
}
|
5131
|
-
|
5132
|
-
pm_node_list_append(&node->parts, part);
|
5455
|
+
pm_interpolated_node_append((pm_node_t *) node, &node->parts, part);
|
5133
5456
|
node->base.location.end = part->location.end;
|
5134
5457
|
}
|
5135
5458
|
|
@@ -5341,10 +5664,10 @@ pm_local_variable_operator_write_node_create(pm_parser_t *parser, pm_node_t *tar
|
|
5341
5664
|
}
|
5342
5665
|
},
|
5343
5666
|
.name_loc = target->location,
|
5344
|
-
.
|
5667
|
+
.binary_operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
|
5345
5668
|
.value = value,
|
5346
5669
|
.name = name,
|
5347
|
-
.
|
5670
|
+
.binary_operator = pm_parser_constant_id_location(parser, operator->start, operator->end - 1),
|
5348
5671
|
.depth = depth
|
5349
5672
|
};
|
5350
5673
|
|
@@ -6397,6 +6720,7 @@ pm_return_node_create(pm_parser_t *parser, const pm_token_t *keyword, pm_argumen
|
|
6397
6720
|
*node = (pm_return_node_t) {
|
6398
6721
|
{
|
6399
6722
|
.type = PM_RETURN_NODE,
|
6723
|
+
.flags = 0,
|
6400
6724
|
.location = {
|
6401
6725
|
.start = keyword->start,
|
6402
6726
|
.end = (arguments == NULL ? keyword->end : arguments->base.location.end)
|
@@ -6622,7 +6946,7 @@ pm_statements_node_body_append(pm_parser_t *parser, pm_statements_node_t *node,
|
|
6622
6946
|
case PM_REDO_NODE:
|
6623
6947
|
case PM_RETRY_NODE:
|
6624
6948
|
case PM_RETURN_NODE:
|
6625
|
-
pm_parser_warn_node(parser,
|
6949
|
+
pm_parser_warn_node(parser, statement, PM_WARN_UNREACHABLE_STATEMENT);
|
6626
6950
|
break;
|
6627
6951
|
default:
|
6628
6952
|
break;
|
@@ -6729,7 +7053,8 @@ pm_super_node_create(pm_parser_t *parser, const pm_token_t *keyword, pm_argument
|
|
6729
7053
|
}
|
6730
7054
|
|
6731
7055
|
/**
|
6732
|
-
* Read through the contents of a string and check if it consists solely of
|
7056
|
+
* Read through the contents of a string and check if it consists solely of
|
7057
|
+
* US-ASCII code points.
|
6733
7058
|
*/
|
6734
7059
|
static bool
|
6735
7060
|
pm_ascii_only_p(const pm_string_t *contents) {
|
@@ -6743,27 +7068,72 @@ pm_ascii_only_p(const pm_string_t *contents) {
|
|
6743
7068
|
return true;
|
6744
7069
|
}
|
6745
7070
|
|
7071
|
+
/**
|
7072
|
+
* Validate that the contents of the given symbol are all valid UTF-8.
|
7073
|
+
*/
|
7074
|
+
static void
|
7075
|
+
parse_symbol_encoding_validate_utf8(pm_parser_t *parser, const pm_token_t *location, const pm_string_t *contents) {
|
7076
|
+
for (const uint8_t *cursor = pm_string_source(contents), *end = cursor + pm_string_length(contents); cursor < end;) {
|
7077
|
+
size_t width = pm_encoding_utf_8_char_width(cursor, end - cursor);
|
7078
|
+
|
7079
|
+
if (width == 0) {
|
7080
|
+
pm_parser_err(parser, location->start, location->end, PM_ERR_INVALID_SYMBOL);
|
7081
|
+
break;
|
7082
|
+
}
|
7083
|
+
|
7084
|
+
cursor += width;
|
7085
|
+
}
|
7086
|
+
}
|
7087
|
+
|
7088
|
+
/**
|
7089
|
+
* Validate that the contents of the given symbol are all valid in the encoding
|
7090
|
+
* of the parser.
|
7091
|
+
*/
|
7092
|
+
static void
|
7093
|
+
parse_symbol_encoding_validate_other(pm_parser_t *parser, const pm_token_t *location, const pm_string_t *contents) {
|
7094
|
+
const pm_encoding_t *encoding = parser->encoding;
|
7095
|
+
|
7096
|
+
for (const uint8_t *cursor = pm_string_source(contents), *end = cursor + pm_string_length(contents); cursor < end;) {
|
7097
|
+
size_t width = encoding->char_width(cursor, end - cursor);
|
7098
|
+
|
7099
|
+
if (width == 0) {
|
7100
|
+
pm_parser_err(parser, location->start, location->end, PM_ERR_INVALID_SYMBOL);
|
7101
|
+
break;
|
7102
|
+
}
|
7103
|
+
|
7104
|
+
cursor += width;
|
7105
|
+
}
|
7106
|
+
}
|
7107
|
+
|
6746
7108
|
/**
|
6747
7109
|
* Ruby "downgrades" the encoding of Symbols to US-ASCII if the associated
|
6748
7110
|
* encoding is ASCII-compatible and the Symbol consists only of US-ASCII code
|
6749
7111
|
* points. Otherwise, the encoding may be explicitly set with an escape
|
6750
7112
|
* sequence.
|
7113
|
+
*
|
7114
|
+
* If the validate flag is set, then it will check the contents of the symbol
|
7115
|
+
* to ensure that all characters are valid in the encoding.
|
6751
7116
|
*/
|
6752
7117
|
static inline pm_node_flags_t
|
6753
|
-
parse_symbol_encoding(
|
7118
|
+
parse_symbol_encoding(pm_parser_t *parser, const pm_token_t *location, const pm_string_t *contents, bool validate) {
|
6754
7119
|
if (parser->explicit_encoding != NULL) {
|
6755
7120
|
// A Symbol may optionally have its encoding explicitly set. This will
|
6756
7121
|
// happen if an escape sequence results in a non-ASCII code point.
|
6757
7122
|
if (parser->explicit_encoding == PM_ENCODING_UTF_8_ENTRY) {
|
7123
|
+
if (validate) parse_symbol_encoding_validate_utf8(parser, location, contents);
|
6758
7124
|
return PM_SYMBOL_FLAGS_FORCED_UTF8_ENCODING;
|
6759
7125
|
} else if (parser->encoding == PM_ENCODING_US_ASCII_ENTRY) {
|
6760
7126
|
return PM_SYMBOL_FLAGS_FORCED_BINARY_ENCODING;
|
7127
|
+
} else if (validate) {
|
7128
|
+
parse_symbol_encoding_validate_other(parser, location, contents);
|
6761
7129
|
}
|
6762
7130
|
} else if (pm_ascii_only_p(contents)) {
|
6763
7131
|
// Ruby stipulates that all source files must use an ASCII-compatible
|
6764
7132
|
// encoding. Thus, all symbols appearing in source are eligible for
|
6765
7133
|
// "downgrading" to US-ASCII.
|
6766
7134
|
return PM_SYMBOL_FLAGS_FORCED_US_ASCII_ENCODING;
|
7135
|
+
} else if (validate) {
|
7136
|
+
parse_symbol_encoding_validate_other(parser, location, contents);
|
6767
7137
|
}
|
6768
7138
|
|
6769
7139
|
return 0;
|
@@ -6931,7 +7301,7 @@ pm_symbol_node_create(pm_parser_t *parser, const pm_token_t *opening, const pm_t
|
|
6931
7301
|
*/
|
6932
7302
|
static pm_symbol_node_t *
|
6933
7303
|
pm_symbol_node_create_current_string(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *value, const pm_token_t *closing) {
|
6934
|
-
pm_symbol_node_t *node = pm_symbol_node_create_unescaped(parser, opening, value, closing, &parser->current_string, parse_symbol_encoding(parser, &parser->current_string));
|
7304
|
+
pm_symbol_node_t *node = pm_symbol_node_create_unescaped(parser, opening, value, closing, &parser->current_string, parse_symbol_encoding(parser, value, &parser->current_string, false));
|
6935
7305
|
parser->current_string = PM_STRING_EMPTY;
|
6936
7306
|
return node;
|
6937
7307
|
}
|
@@ -6953,7 +7323,7 @@ pm_symbol_node_label_create(pm_parser_t *parser, const pm_token_t *token) {
|
|
6953
7323
|
|
6954
7324
|
assert((label.end - label.start) >= 0);
|
6955
7325
|
pm_string_shared_init(&node->unescaped, label.start, label.end);
|
6956
|
-
pm_node_flag_set((pm_node_t *) node, parse_symbol_encoding(parser, &node->unescaped));
|
7326
|
+
pm_node_flag_set((pm_node_t *) node, parse_symbol_encoding(parser, &label, &node->unescaped, false));
|
6957
7327
|
|
6958
7328
|
break;
|
6959
7329
|
}
|
@@ -7038,7 +7408,8 @@ pm_string_node_to_symbol_node(pm_parser_t *parser, pm_string_node_t *node, const
|
|
7038
7408
|
.unescaped = node->unescaped
|
7039
7409
|
};
|
7040
7410
|
|
7041
|
-
|
7411
|
+
pm_token_t content = { .type = PM_TOKEN_IDENTIFIER, .start = node->content_loc.start, .end = node->content_loc.end };
|
7412
|
+
pm_node_flag_set((pm_node_t *) new_node, parse_symbol_encoding(parser, &content, &node->unescaped, true));
|
7042
7413
|
|
7043
7414
|
// We are explicitly _not_ using pm_node_destroy here because we don't want
|
7044
7415
|
// to trash the unescaped string. We could instead copy the string if we
|
@@ -7574,7 +7945,7 @@ pm_local_variable_read_node_create_it(pm_parser_t *parser, const pm_token_t *nam
|
|
7574
7945
|
static pm_node_t *
|
7575
7946
|
pm_node_check_it(pm_parser_t *parser, pm_node_t *node) {
|
7576
7947
|
if (
|
7577
|
-
(parser->version !=
|
7948
|
+
(parser->version != PM_OPTIONS_VERSION_CRUBY_3_3) &&
|
7578
7949
|
!parser->current_scope->closed &&
|
7579
7950
|
(parser->current_scope->numbered_parameters != PM_SCOPE_NUMBERED_PARAMETERS_DISALLOWED) &&
|
7580
7951
|
pm_node_is_it(parser, node)
|
@@ -8023,7 +8394,12 @@ parser_lex_magic_comment(pm_parser_t *parser, bool semantic_token_seen) {
|
|
8023
8394
|
// If we have hit a ractor pragma, attempt to lex that.
|
8024
8395
|
uint32_t value_length = (uint32_t) (value_end - value_start);
|
8025
8396
|
if (key_length == 24 && pm_strncasecmp(key_source, (const uint8_t *) "shareable_constant_value", 24) == 0) {
|
8026
|
-
|
8397
|
+
const uint8_t *cursor = parser->current.start;
|
8398
|
+
while ((cursor > parser->start) && ((cursor[-1] == ' ') || (cursor[-1] == '\t'))) cursor--;
|
8399
|
+
|
8400
|
+
if (!((cursor == parser->start) || (cursor[-1] == '\n'))) {
|
8401
|
+
pm_parser_warn_token(parser, &parser->current, PM_WARN_SHAREABLE_CONSTANT_VALUE_LINE);
|
8402
|
+
} else if (value_length == 4 && pm_strncasecmp(value_start, (const uint8_t *) "none", 4) == 0) {
|
8027
8403
|
pm_parser_scope_shareable_constant_set(parser, PM_SCOPE_SHAREABLE_CONSTANT_NONE);
|
8028
8404
|
} else if (value_length == 7 && pm_strncasecmp(value_start, (const uint8_t *) "literal", 7) == 0) {
|
8029
8405
|
pm_parser_scope_shareable_constant_set(parser, PM_SCOPE_SHAREABLE_CONSTANT_LITERAL);
|
@@ -8298,10 +8674,11 @@ context_human(pm_context_t context) {
|
|
8298
8674
|
/* Specific token lexers */
|
8299
8675
|
/******************************************************************************/
|
8300
8676
|
|
8301
|
-
static void
|
8302
|
-
pm_strspn_number_validate(pm_parser_t *parser, const uint8_t *invalid) {
|
8677
|
+
static inline void
|
8678
|
+
pm_strspn_number_validate(pm_parser_t *parser, const uint8_t *string, size_t length, const uint8_t *invalid) {
|
8303
8679
|
if (invalid != NULL) {
|
8304
|
-
|
8680
|
+
pm_diagnostic_id_t diag_id = (invalid == (string + length - 1)) ? PM_ERR_INVALID_NUMBER_UNDERSCORE_TRAILING : PM_ERR_INVALID_NUMBER_UNDERSCORE_INNER;
|
8681
|
+
pm_parser_err(parser, invalid, invalid + 1, diag_id);
|
8305
8682
|
}
|
8306
8683
|
}
|
8307
8684
|
|
@@ -8309,7 +8686,7 @@ static size_t
|
|
8309
8686
|
pm_strspn_binary_number_validate(pm_parser_t *parser, const uint8_t *string) {
|
8310
8687
|
const uint8_t *invalid = NULL;
|
8311
8688
|
size_t length = pm_strspn_binary_number(string, parser->end - string, &invalid);
|
8312
|
-
pm_strspn_number_validate(parser, invalid);
|
8689
|
+
pm_strspn_number_validate(parser, string, length, invalid);
|
8313
8690
|
return length;
|
8314
8691
|
}
|
8315
8692
|
|
@@ -8317,7 +8694,7 @@ static size_t
|
|
8317
8694
|
pm_strspn_octal_number_validate(pm_parser_t *parser, const uint8_t *string) {
|
8318
8695
|
const uint8_t *invalid = NULL;
|
8319
8696
|
size_t length = pm_strspn_octal_number(string, parser->end - string, &invalid);
|
8320
|
-
pm_strspn_number_validate(parser, invalid);
|
8697
|
+
pm_strspn_number_validate(parser, string, length, invalid);
|
8321
8698
|
return length;
|
8322
8699
|
}
|
8323
8700
|
|
@@ -8325,7 +8702,7 @@ static size_t
|
|
8325
8702
|
pm_strspn_decimal_number_validate(pm_parser_t *parser, const uint8_t *string) {
|
8326
8703
|
const uint8_t *invalid = NULL;
|
8327
8704
|
size_t length = pm_strspn_decimal_number(string, parser->end - string, &invalid);
|
8328
|
-
pm_strspn_number_validate(parser, invalid);
|
8705
|
+
pm_strspn_number_validate(parser, string, length, invalid);
|
8329
8706
|
return length;
|
8330
8707
|
}
|
8331
8708
|
|
@@ -8333,7 +8710,7 @@ static size_t
|
|
8333
8710
|
pm_strspn_hexadecimal_number_validate(pm_parser_t *parser, const uint8_t *string) {
|
8334
8711
|
const uint8_t *invalid = NULL;
|
8335
8712
|
size_t length = pm_strspn_hexadecimal_number(string, parser->end - string, &invalid);
|
8336
|
-
pm_strspn_number_validate(parser, invalid);
|
8713
|
+
pm_strspn_number_validate(parser, string, length, invalid);
|
8337
8714
|
return length;
|
8338
8715
|
}
|
8339
8716
|
|
@@ -8395,6 +8772,7 @@ lex_numeric_prefix(pm_parser_t *parser, bool* seen_e) {
|
|
8395
8772
|
if (pm_char_is_decimal_digit(peek(parser))) {
|
8396
8773
|
parser->current.end += pm_strspn_decimal_number_validate(parser, parser->current.end);
|
8397
8774
|
} else {
|
8775
|
+
match(parser, '_');
|
8398
8776
|
pm_parser_err_current(parser, PM_ERR_INVALID_NUMBER_DECIMAL);
|
8399
8777
|
}
|
8400
8778
|
|
@@ -8407,6 +8785,7 @@ lex_numeric_prefix(pm_parser_t *parser, bool* seen_e) {
|
|
8407
8785
|
if (pm_char_is_binary_digit(peek(parser))) {
|
8408
8786
|
parser->current.end += pm_strspn_binary_number_validate(parser, parser->current.end);
|
8409
8787
|
} else {
|
8788
|
+
match(parser, '_');
|
8410
8789
|
pm_parser_err_current(parser, PM_ERR_INVALID_NUMBER_BINARY);
|
8411
8790
|
}
|
8412
8791
|
|
@@ -8420,6 +8799,7 @@ lex_numeric_prefix(pm_parser_t *parser, bool* seen_e) {
|
|
8420
8799
|
if (pm_char_is_octal_digit(peek(parser))) {
|
8421
8800
|
parser->current.end += pm_strspn_octal_number_validate(parser, parser->current.end);
|
8422
8801
|
} else {
|
8802
|
+
match(parser, '_');
|
8423
8803
|
pm_parser_err_current(parser, PM_ERR_INVALID_NUMBER_OCTAL);
|
8424
8804
|
}
|
8425
8805
|
|
@@ -8447,6 +8827,7 @@ lex_numeric_prefix(pm_parser_t *parser, bool* seen_e) {
|
|
8447
8827
|
if (pm_char_is_hexadecimal_digit(peek(parser))) {
|
8448
8828
|
parser->current.end += pm_strspn_hexadecimal_number_validate(parser, parser->current.end);
|
8449
8829
|
} else {
|
8830
|
+
match(parser, '_');
|
8450
8831
|
pm_parser_err_current(parser, PM_ERR_INVALID_NUMBER_HEXADECIMAL);
|
8451
8832
|
}
|
8452
8833
|
|
@@ -8475,6 +8856,16 @@ lex_numeric_prefix(pm_parser_t *parser, bool* seen_e) {
|
|
8475
8856
|
type = lex_optional_float_suffix(parser, seen_e);
|
8476
8857
|
}
|
8477
8858
|
|
8859
|
+
// At this point we have a completed number, but we want to provide the user
|
8860
|
+
// with a good experience if they put an additional .xxx fractional
|
8861
|
+
// component on the end, so we'll check for that here.
|
8862
|
+
if (peek_offset(parser, 0) == '.' && pm_char_is_decimal_digit(peek_offset(parser, 1))) {
|
8863
|
+
const uint8_t *fraction_start = parser->current.end;
|
8864
|
+
const uint8_t *fraction_end = parser->current.end + 2;
|
8865
|
+
fraction_end += pm_strspn_decimal_digit(fraction_end, parser->end - fraction_end);
|
8866
|
+
pm_parser_err(parser, fraction_start, fraction_end, PM_ERR_INVALID_NUMBER_FRACTION);
|
8867
|
+
}
|
8868
|
+
|
8478
8869
|
return type;
|
8479
8870
|
}
|
8480
8871
|
|
@@ -8567,7 +8958,7 @@ lex_global_variable(pm_parser_t *parser) {
|
|
8567
8958
|
} while (parser->current.end < parser->end && (width = char_is_identifier(parser, parser->current.end)) > 0);
|
8568
8959
|
|
8569
8960
|
// $0 isn't allowed to be followed by anything.
|
8570
|
-
pm_diagnostic_id_t diag_id = parser->version ==
|
8961
|
+
pm_diagnostic_id_t diag_id = parser->version == PM_OPTIONS_VERSION_CRUBY_3_3 ? PM_ERR_INVALID_VARIABLE_GLOBAL_3_3 : PM_ERR_INVALID_VARIABLE_GLOBAL;
|
8571
8962
|
PM_PARSER_ERR_TOKEN_FORMAT_CONTENT(parser, parser->current, diag_id);
|
8572
8963
|
}
|
8573
8964
|
|
@@ -8603,7 +8994,7 @@ lex_global_variable(pm_parser_t *parser) {
|
|
8603
8994
|
} else {
|
8604
8995
|
// If we get here, then we have a $ followed by something that
|
8605
8996
|
// isn't recognized as a global variable.
|
8606
|
-
pm_diagnostic_id_t diag_id = parser->version ==
|
8997
|
+
pm_diagnostic_id_t diag_id = parser->version == PM_OPTIONS_VERSION_CRUBY_3_3 ? PM_ERR_INVALID_VARIABLE_GLOBAL_3_3 : PM_ERR_INVALID_VARIABLE_GLOBAL;
|
8607
8998
|
size_t width = parser->encoding->char_width(parser->current.end, parser->end - parser->current.end);
|
8608
8999
|
PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, diag_id, (int) ((parser->current.end + width) - parser->current.start), (const char *) parser->current.start);
|
8609
9000
|
}
|
@@ -8976,12 +9367,20 @@ escape_hexadecimal_digit(const uint8_t value) {
|
|
8976
9367
|
* validated.
|
8977
9368
|
*/
|
8978
9369
|
static inline uint32_t
|
8979
|
-
escape_unicode(const uint8_t *string, size_t length) {
|
9370
|
+
escape_unicode(pm_parser_t *parser, const uint8_t *string, size_t length) {
|
8980
9371
|
uint32_t value = 0;
|
8981
9372
|
for (size_t index = 0; index < length; index++) {
|
8982
9373
|
if (index != 0) value <<= 4;
|
8983
9374
|
value |= escape_hexadecimal_digit(string[index]);
|
8984
9375
|
}
|
9376
|
+
|
9377
|
+
// Here we're going to verify that the value is actually a valid Unicode
|
9378
|
+
// codepoint and not a surrogate pair.
|
9379
|
+
if (value >= 0xD800 && value <= 0xDFFF) {
|
9380
|
+
pm_parser_err(parser, string, string + length, PM_ERR_ESCAPE_INVALID_UNICODE);
|
9381
|
+
return 0xFFFD;
|
9382
|
+
}
|
9383
|
+
|
8985
9384
|
return value;
|
8986
9385
|
}
|
8987
9386
|
|
@@ -9230,7 +9629,7 @@ escape_read(pm_parser_t *parser, pm_buffer_t *buffer, pm_buffer_t *regular_expre
|
|
9230
9629
|
pm_buffer_append_bytes(regular_expression_buffer, start, (size_t) (parser->current.end - start));
|
9231
9630
|
}
|
9232
9631
|
|
9233
|
-
escape_write_byte_encoded(parser, buffer, value);
|
9632
|
+
escape_write_byte_encoded(parser, buffer, escape_byte(value, flags));
|
9234
9633
|
} else {
|
9235
9634
|
pm_parser_err_current(parser, PM_ERR_ESCAPE_INVALID_HEXADECIMAL);
|
9236
9635
|
}
|
@@ -9241,22 +9640,7 @@ escape_read(pm_parser_t *parser, pm_buffer_t *buffer, pm_buffer_t *regular_expre
|
|
9241
9640
|
const uint8_t *start = parser->current.end - 1;
|
9242
9641
|
parser->current.end++;
|
9243
9642
|
|
9244
|
-
if (
|
9245
|
-
(parser->current.end + 4 <= parser->end) &&
|
9246
|
-
pm_char_is_hexadecimal_digit(parser->current.end[0]) &&
|
9247
|
-
pm_char_is_hexadecimal_digit(parser->current.end[1]) &&
|
9248
|
-
pm_char_is_hexadecimal_digit(parser->current.end[2]) &&
|
9249
|
-
pm_char_is_hexadecimal_digit(parser->current.end[3])
|
9250
|
-
) {
|
9251
|
-
uint32_t value = escape_unicode(parser->current.end, 4);
|
9252
|
-
|
9253
|
-
if (flags & PM_ESCAPE_FLAG_REGEXP) {
|
9254
|
-
pm_buffer_append_bytes(regular_expression_buffer, start, (size_t) (parser->current.end + 4 - start));
|
9255
|
-
}
|
9256
|
-
escape_write_unicode(parser, buffer, flags, start, parser->current.end + 4, value);
|
9257
|
-
|
9258
|
-
parser->current.end += 4;
|
9259
|
-
} else if (peek(parser) == '{') {
|
9643
|
+
if (peek(parser) == '{') {
|
9260
9644
|
const uint8_t *unicode_codepoints_start = parser->current.end - 2;
|
9261
9645
|
|
9262
9646
|
parser->current.end++;
|
@@ -9284,7 +9668,7 @@ escape_read(pm_parser_t *parser, pm_buffer_t *buffer, pm_buffer_t *regular_expre
|
|
9284
9668
|
extra_codepoints_start = unicode_start;
|
9285
9669
|
}
|
9286
9670
|
|
9287
|
-
uint32_t value = escape_unicode(unicode_start, hexadecimal_length);
|
9671
|
+
uint32_t value = escape_unicode(parser, unicode_start, hexadecimal_length);
|
9288
9672
|
escape_write_unicode(parser, buffer, flags, unicode_start, parser->current.end, value);
|
9289
9673
|
|
9290
9674
|
parser->current.end += pm_strspn_whitespace(parser->current.end, parser->end - parser->current.end);
|
@@ -9306,7 +9690,25 @@ escape_read(pm_parser_t *parser, pm_buffer_t *buffer, pm_buffer_t *regular_expre
|
|
9306
9690
|
pm_buffer_append_bytes(regular_expression_buffer, unicode_codepoints_start, (size_t) (parser->current.end - unicode_codepoints_start));
|
9307
9691
|
}
|
9308
9692
|
} else {
|
9309
|
-
|
9693
|
+
size_t length = pm_strspn_hexadecimal_digit(parser->current.end, MIN(parser->end - parser->current.end, 4));
|
9694
|
+
|
9695
|
+
if (length == 4) {
|
9696
|
+
uint32_t value = escape_unicode(parser, parser->current.end, 4);
|
9697
|
+
|
9698
|
+
if (flags & PM_ESCAPE_FLAG_REGEXP) {
|
9699
|
+
pm_buffer_append_bytes(regular_expression_buffer, start, (size_t) (parser->current.end + 4 - start));
|
9700
|
+
}
|
9701
|
+
|
9702
|
+
escape_write_unicode(parser, buffer, flags, start, parser->current.end + 4, value);
|
9703
|
+
parser->current.end += 4;
|
9704
|
+
} else {
|
9705
|
+
parser->current.end += length;
|
9706
|
+
pm_parser_err_current(parser, PM_ERR_ESCAPE_INVALID_UNICODE);
|
9707
|
+
}
|
9708
|
+
}
|
9709
|
+
|
9710
|
+
if (flags & (PM_ESCAPE_FLAG_CONTROL | PM_ESCAPE_FLAG_META)) {
|
9711
|
+
pm_parser_err(parser, start, parser->current.end, PM_ERR_INVALID_ESCAPE_CHARACTER);
|
9310
9712
|
}
|
9311
9713
|
|
9312
9714
|
return;
|
@@ -9560,8 +9962,8 @@ lex_at_variable(pm_parser_t *parser) {
|
|
9560
9962
|
}
|
9561
9963
|
} else if (parser->current.end < parser->end && pm_char_is_decimal_digit(*parser->current.end)) {
|
9562
9964
|
pm_diagnostic_id_t diag_id = (type == PM_TOKEN_CLASS_VARIABLE) ? PM_ERR_INCOMPLETE_VARIABLE_CLASS : PM_ERR_INCOMPLETE_VARIABLE_INSTANCE;
|
9563
|
-
if (parser->version ==
|
9564
|
-
diag_id = (type == PM_TOKEN_CLASS_VARIABLE) ?
|
9965
|
+
if (parser->version == PM_OPTIONS_VERSION_CRUBY_3_3) {
|
9966
|
+
diag_id = (type == PM_TOKEN_CLASS_VARIABLE) ? PM_ERR_INCOMPLETE_VARIABLE_CLASS_3_3 : PM_ERR_INCOMPLETE_VARIABLE_INSTANCE_3_3;
|
9565
9967
|
}
|
9566
9968
|
|
9567
9969
|
size_t width = parser->encoding->char_width(parser->current.end, parser->end - parser->current.end);
|
@@ -10545,8 +10947,11 @@ parser_lex(pm_parser_t *parser) {
|
|
10545
10947
|
}
|
10546
10948
|
|
10547
10949
|
size_t ident_length = (size_t) (parser->current.end - ident_start);
|
10950
|
+
bool ident_error = false;
|
10951
|
+
|
10548
10952
|
if (quote != PM_HEREDOC_QUOTE_NONE && !match(parser, (uint8_t) quote)) {
|
10549
|
-
|
10953
|
+
pm_parser_err(parser, ident_start, ident_start + ident_length, PM_ERR_HEREDOC_IDENTIFIER);
|
10954
|
+
ident_error = true;
|
10550
10955
|
}
|
10551
10956
|
|
10552
10957
|
parser->explicit_encoding = NULL;
|
@@ -10571,7 +10976,7 @@ parser_lex(pm_parser_t *parser) {
|
|
10571
10976
|
// this is not a valid heredoc declaration. In this case we
|
10572
10977
|
// will add an error, but we will still return a heredoc
|
10573
10978
|
// start.
|
10574
|
-
|
10979
|
+
if (!ident_error) pm_parser_err_heredoc_term(parser, parser->lex_modes.current);
|
10575
10980
|
body_start = parser->end;
|
10576
10981
|
} else {
|
10577
10982
|
// Otherwise, we want to indicate that the body of the
|
@@ -11898,7 +12303,7 @@ parser_lex(pm_parser_t *parser) {
|
|
11898
12303
|
// terminator) but still continue parsing so that content after the
|
11899
12304
|
// declaration of the heredoc can be parsed.
|
11900
12305
|
if (parser->current.end >= parser->end) {
|
11901
|
-
|
12306
|
+
pm_parser_err_heredoc_term(parser, lex_mode);
|
11902
12307
|
parser->next_start = lex_mode->as.heredoc.next_start;
|
11903
12308
|
parser->heredoc_end = parser->current.end;
|
11904
12309
|
lex_state_set(parser, PM_LEX_STATE_END);
|
@@ -12537,6 +12942,23 @@ expect3(pm_parser_t *parser, pm_token_type_t type1, pm_token_type_t type2, pm_to
|
|
12537
12942
|
parser->previous.type = PM_TOKEN_MISSING;
|
12538
12943
|
}
|
12539
12944
|
|
12945
|
+
/**
|
12946
|
+
* A special expect1 that expects a heredoc terminator and handles popping the
|
12947
|
+
* lex mode accordingly.
|
12948
|
+
*/
|
12949
|
+
static void
|
12950
|
+
expect1_heredoc_term(pm_parser_t *parser, pm_lex_mode_t *lex_mode) {
|
12951
|
+
if (match1(parser, PM_TOKEN_HEREDOC_END)) {
|
12952
|
+
lex_mode_pop(parser);
|
12953
|
+
parser_lex(parser);
|
12954
|
+
} else {
|
12955
|
+
pm_parser_err_heredoc_term(parser, lex_mode);
|
12956
|
+
lex_mode_pop(parser);
|
12957
|
+
parser->previous.start = parser->previous.end;
|
12958
|
+
parser->previous.type = PM_TOKEN_MISSING;
|
12959
|
+
}
|
12960
|
+
}
|
12961
|
+
|
12540
12962
|
static pm_node_t *
|
12541
12963
|
parse_expression(pm_parser_t *parser, pm_binding_power_t binding_power, bool accepts_command_call, pm_diagnostic_id_t diag_id);
|
12542
12964
|
|
@@ -12664,25 +13086,72 @@ parse_write_name(pm_parser_t *parser, pm_constant_id_t *name_field) {
|
|
12664
13086
|
*name_field = pm_constant_pool_insert_owned(&parser->constant_pool, name, length + 1);
|
12665
13087
|
}
|
12666
13088
|
|
13089
|
+
/**
|
13090
|
+
* Certain expressions are not targetable, but in order to provide a better
|
13091
|
+
* experience we give a specific error message. In order to maintain as much
|
13092
|
+
* information in the tree as possible, we replace them with local variable
|
13093
|
+
* writes.
|
13094
|
+
*/
|
13095
|
+
static pm_node_t *
|
13096
|
+
parse_unwriteable_target(pm_parser_t *parser, pm_node_t *target) {
|
13097
|
+
switch (PM_NODE_TYPE(target)) {
|
13098
|
+
case PM_SOURCE_ENCODING_NODE: pm_parser_err_node(parser, target, PM_ERR_EXPRESSION_NOT_WRITABLE_ENCODING); break;
|
13099
|
+
case PM_FALSE_NODE: pm_parser_err_node(parser, target, PM_ERR_EXPRESSION_NOT_WRITABLE_FALSE); break;
|
13100
|
+
case PM_SOURCE_FILE_NODE: pm_parser_err_node(parser, target, PM_ERR_EXPRESSION_NOT_WRITABLE_FILE); break;
|
13101
|
+
case PM_SOURCE_LINE_NODE: pm_parser_err_node(parser, target, PM_ERR_EXPRESSION_NOT_WRITABLE_LINE); break;
|
13102
|
+
case PM_NIL_NODE: pm_parser_err_node(parser, target, PM_ERR_EXPRESSION_NOT_WRITABLE_NIL); break;
|
13103
|
+
case PM_SELF_NODE: pm_parser_err_node(parser, target, PM_ERR_EXPRESSION_NOT_WRITABLE_SELF); break;
|
13104
|
+
case PM_TRUE_NODE: pm_parser_err_node(parser, target, PM_ERR_EXPRESSION_NOT_WRITABLE_TRUE); break;
|
13105
|
+
default: break;
|
13106
|
+
}
|
13107
|
+
|
13108
|
+
pm_constant_id_t name = pm_parser_constant_id_location(parser, target->location.start, target->location.end);
|
13109
|
+
pm_local_variable_target_node_t *result = pm_local_variable_target_node_create(parser, &target->location, name, 0);
|
13110
|
+
|
13111
|
+
pm_node_destroy(parser, target);
|
13112
|
+
return (pm_node_t *) result;
|
13113
|
+
}
|
13114
|
+
|
12667
13115
|
/**
|
12668
13116
|
* Convert the given node into a valid target node.
|
12669
13117
|
*/
|
12670
13118
|
static pm_node_t *
|
12671
|
-
parse_target(pm_parser_t *parser, pm_node_t *target) {
|
13119
|
+
parse_target(pm_parser_t *parser, pm_node_t *target, bool multiple) {
|
12672
13120
|
switch (PM_NODE_TYPE(target)) {
|
12673
13121
|
case PM_MISSING_NODE:
|
12674
13122
|
return target;
|
13123
|
+
case PM_SOURCE_ENCODING_NODE:
|
13124
|
+
case PM_FALSE_NODE:
|
13125
|
+
case PM_SOURCE_FILE_NODE:
|
13126
|
+
case PM_SOURCE_LINE_NODE:
|
13127
|
+
case PM_NIL_NODE:
|
13128
|
+
case PM_SELF_NODE:
|
13129
|
+
case PM_TRUE_NODE: {
|
13130
|
+
// In these special cases, we have specific error messages and we
|
13131
|
+
// will replace them with local variable writes.
|
13132
|
+
return parse_unwriteable_target(parser, target);
|
13133
|
+
}
|
12675
13134
|
case PM_CLASS_VARIABLE_READ_NODE:
|
12676
13135
|
assert(sizeof(pm_class_variable_target_node_t) == sizeof(pm_class_variable_read_node_t));
|
12677
13136
|
target->type = PM_CLASS_VARIABLE_TARGET_NODE;
|
12678
13137
|
return target;
|
12679
13138
|
case PM_CONSTANT_PATH_NODE:
|
13139
|
+
if (context_def_p(parser)) {
|
13140
|
+
pm_parser_err_node(parser, target, PM_ERR_WRITE_TARGET_IN_METHOD);
|
13141
|
+
}
|
13142
|
+
|
12680
13143
|
assert(sizeof(pm_constant_path_target_node_t) == sizeof(pm_constant_path_node_t));
|
12681
13144
|
target->type = PM_CONSTANT_PATH_TARGET_NODE;
|
13145
|
+
|
12682
13146
|
return target;
|
12683
13147
|
case PM_CONSTANT_READ_NODE:
|
13148
|
+
if (context_def_p(parser)) {
|
13149
|
+
pm_parser_err_node(parser, target, PM_ERR_WRITE_TARGET_IN_METHOD);
|
13150
|
+
}
|
13151
|
+
|
12684
13152
|
assert(sizeof(pm_constant_target_node_t) == sizeof(pm_constant_read_node_t));
|
12685
13153
|
target->type = PM_CONSTANT_TARGET_NODE;
|
13154
|
+
|
12686
13155
|
return target;
|
12687
13156
|
case PM_BACK_REFERENCE_READ_NODE:
|
12688
13157
|
case PM_NUMBERED_REFERENCE_READ_NODE:
|
@@ -12715,7 +13184,7 @@ parse_target(pm_parser_t *parser, pm_node_t *target) {
|
|
12715
13184
|
pm_splat_node_t *splat = (pm_splat_node_t *) target;
|
12716
13185
|
|
12717
13186
|
if (splat->expression != NULL) {
|
12718
|
-
splat->expression = parse_target(parser, splat->expression);
|
13187
|
+
splat->expression = parse_target(parser, splat->expression, multiple);
|
12719
13188
|
}
|
12720
13189
|
|
12721
13190
|
return (pm_node_t *) splat;
|
@@ -12753,6 +13222,10 @@ parse_target(pm_parser_t *parser, pm_node_t *target) {
|
|
12753
13222
|
}
|
12754
13223
|
|
12755
13224
|
if (*call->message_loc.start == '_' || parser->encoding->alnum_char(call->message_loc.start, call->message_loc.end - call->message_loc.start)) {
|
13225
|
+
if (multiple && PM_NODE_FLAG_P(call, PM_CALL_NODE_FLAGS_SAFE_NAVIGATION)) {
|
13226
|
+
pm_parser_err_node(parser, (const pm_node_t *) call, PM_ERR_UNEXPECTED_SAFE_NAVIGATION);
|
13227
|
+
}
|
13228
|
+
|
12756
13229
|
parse_write_name(parser, &call->name);
|
12757
13230
|
return (pm_node_t *) pm_call_target_node_create(parser, call);
|
12758
13231
|
}
|
@@ -12780,8 +13253,8 @@ parse_target(pm_parser_t *parser, pm_node_t *target) {
|
|
12780
13253
|
* assignment.
|
12781
13254
|
*/
|
12782
13255
|
static pm_node_t *
|
12783
|
-
parse_target_validate(pm_parser_t *parser, pm_node_t *target) {
|
12784
|
-
pm_node_t *result = parse_target(parser, target);
|
13256
|
+
parse_target_validate(pm_parser_t *parser, pm_node_t *target, bool multiple) {
|
13257
|
+
pm_node_t *result = parse_target(parser, target, multiple);
|
12785
13258
|
|
12786
13259
|
// Ensure that we have one of an =, an 'in' in for indexes, and a ')' in parens after the targets.
|
12787
13260
|
if (
|
@@ -12826,13 +13299,20 @@ parse_write(pm_parser_t *parser, pm_node_t *target, pm_token_t *operator, pm_nod
|
|
12826
13299
|
}
|
12827
13300
|
case PM_CONSTANT_PATH_NODE: {
|
12828
13301
|
pm_node_t *node = (pm_node_t *) pm_constant_path_write_node_create(parser, (pm_constant_path_node_t *) target, operator, value);
|
13302
|
+
|
13303
|
+
if (context_def_p(parser)) {
|
13304
|
+
pm_parser_err_node(parser, node, PM_ERR_WRITE_TARGET_IN_METHOD);
|
13305
|
+
}
|
13306
|
+
|
12829
13307
|
return parse_shareable_constant_write(parser, node);
|
12830
13308
|
}
|
12831
13309
|
case PM_CONSTANT_READ_NODE: {
|
12832
13310
|
pm_node_t *node = (pm_node_t *) pm_constant_write_node_create(parser, (pm_constant_read_node_t *) target, operator, value);
|
13311
|
+
|
12833
13312
|
if (context_def_p(parser)) {
|
12834
13313
|
pm_parser_err_node(parser, node, PM_ERR_WRITE_TARGET_IN_METHOD);
|
12835
13314
|
}
|
13315
|
+
|
12836
13316
|
pm_node_destroy(parser, target);
|
12837
13317
|
return parse_shareable_constant_write(parser, node);
|
12838
13318
|
}
|
@@ -13011,7 +13491,7 @@ parse_targets(pm_parser_t *parser, pm_node_t *first_target, pm_binding_power_t b
|
|
13011
13491
|
bool has_rest = PM_NODE_TYPE_P(first_target, PM_SPLAT_NODE);
|
13012
13492
|
|
13013
13493
|
pm_multi_target_node_t *result = pm_multi_target_node_create(parser);
|
13014
|
-
pm_multi_target_node_targets_append(parser, result, parse_target(parser, first_target));
|
13494
|
+
pm_multi_target_node_targets_append(parser, result, parse_target(parser, first_target, true));
|
13015
13495
|
|
13016
13496
|
while (accept1(parser, PM_TOKEN_COMMA)) {
|
13017
13497
|
if (accept1(parser, PM_TOKEN_USTAR)) {
|
@@ -13027,7 +13507,7 @@ parse_targets(pm_parser_t *parser, pm_node_t *first_target, pm_binding_power_t b
|
|
13027
13507
|
|
13028
13508
|
if (token_begins_expression_p(parser->current.type)) {
|
13029
13509
|
name = parse_expression(parser, binding_power, false, PM_ERR_EXPECT_EXPRESSION_AFTER_STAR);
|
13030
|
-
name = parse_target(parser, name);
|
13510
|
+
name = parse_target(parser, name, true);
|
13031
13511
|
}
|
13032
13512
|
|
13033
13513
|
pm_node_t *splat = (pm_node_t *) pm_splat_node_create(parser, &star_operator, name);
|
@@ -13035,7 +13515,7 @@ parse_targets(pm_parser_t *parser, pm_node_t *first_target, pm_binding_power_t b
|
|
13035
13515
|
has_rest = true;
|
13036
13516
|
} else if (token_begins_expression_p(parser->current.type)) {
|
13037
13517
|
pm_node_t *target = parse_expression(parser, binding_power, false, PM_ERR_EXPECT_EXPRESSION_AFTER_COMMA);
|
13038
|
-
target = parse_target(parser, target);
|
13518
|
+
target = parse_target(parser, target, true);
|
13039
13519
|
|
13040
13520
|
pm_multi_target_node_targets_append(parser, result, target);
|
13041
13521
|
} else if (!match1(parser, PM_TOKEN_EOF)) {
|
@@ -13152,11 +13632,11 @@ parse_statements(pm_parser_t *parser, pm_context_t context) {
|
|
13152
13632
|
*/
|
13153
13633
|
static void
|
13154
13634
|
pm_hash_key_static_literals_add(pm_parser_t *parser, pm_static_literals_t *literals, pm_node_t *node) {
|
13155
|
-
const pm_node_t *duplicated = pm_static_literals_add(parser, literals, node);
|
13635
|
+
const pm_node_t *duplicated = pm_static_literals_add(&parser->newline_list, parser->start_line, literals, node);
|
13156
13636
|
|
13157
13637
|
if (duplicated != NULL) {
|
13158
13638
|
pm_buffer_t buffer = { 0 };
|
13159
|
-
pm_static_literal_inspect(&buffer, parser, duplicated);
|
13639
|
+
pm_static_literal_inspect(&buffer, &parser->newline_list, parser->start_line, parser->encoding->name, duplicated);
|
13160
13640
|
|
13161
13641
|
pm_diagnostic_list_append_format(
|
13162
13642
|
&parser->warning_list,
|
@@ -13178,7 +13658,7 @@ pm_hash_key_static_literals_add(pm_parser_t *parser, pm_static_literals_t *liter
|
|
13178
13658
|
*/
|
13179
13659
|
static void
|
13180
13660
|
pm_when_clause_static_literals_add(pm_parser_t *parser, pm_static_literals_t *literals, pm_node_t *node) {
|
13181
|
-
if (pm_static_literals_add(parser, literals, node) != NULL) {
|
13661
|
+
if (pm_static_literals_add(&parser->newline_list, parser->start_line, literals, node) != NULL) {
|
13182
13662
|
pm_diagnostic_list_append_format(
|
13183
13663
|
&parser->warning_list,
|
13184
13664
|
node->location.start,
|
@@ -13206,10 +13686,16 @@ parse_assocs(pm_parser_t *parser, pm_static_literals_t *literals, pm_node_t *nod
|
|
13206
13686
|
pm_token_t operator = parser->previous;
|
13207
13687
|
pm_node_t *value = NULL;
|
13208
13688
|
|
13209
|
-
if (
|
13689
|
+
if (match1(parser, PM_TOKEN_BRACE_LEFT)) {
|
13690
|
+
// If we're about to parse a nested hash that is being
|
13691
|
+
// pushed into this hash directly with **, then we want the
|
13692
|
+
// inner hash to share the static literals with the outer
|
13693
|
+
// hash.
|
13694
|
+
parser->current_hash_keys = literals;
|
13210
13695
|
value = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, false, PM_ERR_EXPECT_EXPRESSION_AFTER_SPLAT_HASH);
|
13211
|
-
}
|
13212
|
-
|
13696
|
+
} else if (token_begins_expression_p(parser->current.type)) {
|
13697
|
+
value = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, false, PM_ERR_EXPECT_EXPRESSION_AFTER_SPLAT_HASH);
|
13698
|
+
} else {
|
13213
13699
|
pm_parser_scope_forwarding_keywords_check(parser, &operator);
|
13214
13700
|
}
|
13215
13701
|
|
@@ -13360,15 +13846,16 @@ parse_arguments(pm_parser_t *parser, pm_arguments_t *arguments, bool accepts_for
|
|
13360
13846
|
pm_keyword_hash_node_t *hash = pm_keyword_hash_node_create(parser);
|
13361
13847
|
argument = (pm_node_t *) hash;
|
13362
13848
|
|
13363
|
-
pm_static_literals_t
|
13364
|
-
bool contains_keyword_splat = parse_assocs(parser, &
|
13849
|
+
pm_static_literals_t hash_keys = { 0 };
|
13850
|
+
bool contains_keyword_splat = parse_assocs(parser, &hash_keys, (pm_node_t *) hash);
|
13365
13851
|
|
13366
13852
|
parse_arguments_append(parser, arguments, argument);
|
13367
|
-
if (contains_keyword_splat) {
|
13368
|
-
pm_node_flag_set((pm_node_t *)arguments->arguments, PM_ARGUMENTS_NODE_FLAGS_CONTAINS_KEYWORD_SPLAT);
|
13369
|
-
}
|
13370
13853
|
|
13371
|
-
|
13854
|
+
pm_node_flags_t flags = PM_ARGUMENTS_NODE_FLAGS_CONTAINS_KEYWORDS;
|
13855
|
+
if (contains_keyword_splat) flags |= PM_ARGUMENTS_NODE_FLAGS_CONTAINS_KEYWORD_SPLAT;
|
13856
|
+
pm_node_flag_set((pm_node_t *) arguments->arguments, flags);
|
13857
|
+
|
13858
|
+
pm_static_literals_free(&hash_keys);
|
13372
13859
|
parsed_bare_hash = true;
|
13373
13860
|
|
13374
13861
|
break;
|
@@ -13444,7 +13931,9 @@ parse_arguments(pm_parser_t *parser, pm_arguments_t *arguments, bool accepts_for
|
|
13444
13931
|
argument = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, !parsed_first_argument, PM_ERR_EXPECT_ARGUMENT);
|
13445
13932
|
}
|
13446
13933
|
|
13934
|
+
bool contains_keywords = false;
|
13447
13935
|
bool contains_keyword_splat = false;
|
13936
|
+
|
13448
13937
|
if (pm_symbol_node_label_p(argument) || accept1(parser, PM_TOKEN_EQUAL_GREATER)) {
|
13449
13938
|
if (parsed_bare_hash) {
|
13450
13939
|
pm_parser_err_previous(parser, PM_ERR_ARGUMENT_BARE_HASH);
|
@@ -13458,10 +13947,11 @@ parse_arguments(pm_parser_t *parser, pm_arguments_t *arguments, bool accepts_for
|
|
13458
13947
|
}
|
13459
13948
|
|
13460
13949
|
pm_keyword_hash_node_t *bare_hash = pm_keyword_hash_node_create(parser);
|
13950
|
+
contains_keywords = true;
|
13461
13951
|
|
13462
13952
|
// Create the set of static literals for this hash.
|
13463
|
-
pm_static_literals_t
|
13464
|
-
pm_hash_key_static_literals_add(parser, &
|
13953
|
+
pm_static_literals_t hash_keys = { 0 };
|
13954
|
+
pm_hash_key_static_literals_add(parser, &hash_keys, argument);
|
13465
13955
|
|
13466
13956
|
// Finish parsing the one we are part way through.
|
13467
13957
|
pm_node_t *value = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, false, PM_ERR_HASH_VALUE);
|
@@ -13475,10 +13965,10 @@ parse_arguments(pm_parser_t *parser, pm_arguments_t *arguments, bool accepts_for
|
|
13475
13965
|
token_begins_expression_p(parser->current.type) ||
|
13476
13966
|
match2(parser, PM_TOKEN_USTAR_STAR, PM_TOKEN_LABEL)
|
13477
13967
|
)) {
|
13478
|
-
contains_keyword_splat = parse_assocs(parser, &
|
13968
|
+
contains_keyword_splat = parse_assocs(parser, &hash_keys, (pm_node_t *) bare_hash);
|
13479
13969
|
}
|
13480
13970
|
|
13481
|
-
pm_static_literals_free(&
|
13971
|
+
pm_static_literals_free(&hash_keys);
|
13482
13972
|
parsed_bare_hash = true;
|
13483
13973
|
} else if (accept1(parser, PM_TOKEN_KEYWORD_IN)) {
|
13484
13974
|
// TODO: Could we solve this with binding powers instead?
|
@@ -13486,9 +13976,12 @@ parse_arguments(pm_parser_t *parser, pm_arguments_t *arguments, bool accepts_for
|
|
13486
13976
|
}
|
13487
13977
|
|
13488
13978
|
parse_arguments_append(parser, arguments, argument);
|
13489
|
-
|
13490
|
-
|
13491
|
-
|
13979
|
+
|
13980
|
+
pm_node_flags_t flags = 0;
|
13981
|
+
if (contains_keywords) flags |= PM_ARGUMENTS_NODE_FLAGS_CONTAINS_KEYWORDS;
|
13982
|
+
if (contains_keyword_splat) flags |= PM_ARGUMENTS_NODE_FLAGS_CONTAINS_KEYWORD_SPLAT;
|
13983
|
+
pm_node_flag_set((pm_node_t *) arguments->arguments, flags);
|
13984
|
+
|
13492
13985
|
break;
|
13493
13986
|
}
|
13494
13987
|
}
|
@@ -13601,7 +14094,6 @@ typedef enum {
|
|
13601
14094
|
PM_PARAMETERS_ORDER_OPTIONAL,
|
13602
14095
|
PM_PARAMETERS_ORDER_NAMED,
|
13603
14096
|
PM_PARAMETERS_ORDER_NONE,
|
13604
|
-
|
13605
14097
|
} pm_parameters_order_t;
|
13606
14098
|
|
13607
14099
|
/**
|
@@ -13626,31 +14118,37 @@ static pm_parameters_order_t parameters_ordering[PM_TOKEN_MAXIMUM] = {
|
|
13626
14118
|
* Check if current parameter follows valid parameters ordering. If not it adds
|
13627
14119
|
* an error to the list without stopping the parsing, otherwise sets the
|
13628
14120
|
* parameters state to the one corresponding to the current parameter.
|
14121
|
+
*
|
14122
|
+
* It returns true if it was successful, and false otherwise.
|
13629
14123
|
*/
|
13630
|
-
static
|
14124
|
+
static bool
|
13631
14125
|
update_parameter_state(pm_parser_t *parser, pm_token_t *token, pm_parameters_order_t *current) {
|
13632
14126
|
pm_parameters_order_t state = parameters_ordering[token->type];
|
13633
|
-
if (state == PM_PARAMETERS_NO_CHANGE) return;
|
14127
|
+
if (state == PM_PARAMETERS_NO_CHANGE) return true;
|
13634
14128
|
|
13635
14129
|
// If we see another ordered argument after a optional argument
|
13636
14130
|
// we only continue parsing ordered arguments until we stop seeing ordered arguments.
|
13637
14131
|
if (*current == PM_PARAMETERS_ORDER_OPTIONAL && state == PM_PARAMETERS_ORDER_NAMED) {
|
13638
14132
|
*current = PM_PARAMETERS_ORDER_AFTER_OPTIONAL;
|
13639
|
-
return;
|
14133
|
+
return true;
|
13640
14134
|
} else if (*current == PM_PARAMETERS_ORDER_AFTER_OPTIONAL && state == PM_PARAMETERS_ORDER_NAMED) {
|
13641
|
-
return;
|
14135
|
+
return true;
|
13642
14136
|
}
|
13643
14137
|
|
13644
14138
|
if (token->type == PM_TOKEN_USTAR && *current == PM_PARAMETERS_ORDER_AFTER_OPTIONAL) {
|
13645
14139
|
pm_parser_err_token(parser, token, PM_ERR_PARAMETER_STAR);
|
13646
|
-
|
13647
|
-
|
13648
|
-
|
14140
|
+
return false;
|
14141
|
+
} else if (token->type == PM_TOKEN_UDOT_DOT_DOT && (*current >= PM_PARAMETERS_ORDER_KEYWORDS_REST && *current <= PM_PARAMETERS_ORDER_AFTER_OPTIONAL)) {
|
14142
|
+
pm_parser_err_token(parser, token, *current == PM_PARAMETERS_ORDER_AFTER_OPTIONAL ? PM_ERR_PARAMETER_FORWARDING_AFTER_REST : PM_ERR_PARAMETER_ORDER);
|
14143
|
+
return false;
|
14144
|
+
} else if (*current == PM_PARAMETERS_ORDER_NOTHING_AFTER || state > *current) {
|
13649
14145
|
// We know what transition we failed on, so we can provide a better error here.
|
13650
14146
|
pm_parser_err_token(parser, token, PM_ERR_PARAMETER_ORDER);
|
13651
|
-
|
13652
|
-
*current = state;
|
14147
|
+
return false;
|
13653
14148
|
}
|
14149
|
+
|
14150
|
+
if (state < *current) *current = state;
|
14151
|
+
return true;
|
13654
14152
|
}
|
13655
14153
|
|
13656
14154
|
/**
|
@@ -13719,27 +14217,22 @@ parse_parameters(
|
|
13719
14217
|
pm_parser_err_current(parser, PM_ERR_ARGUMENT_NO_FORWARDING_ELLIPSES);
|
13720
14218
|
}
|
13721
14219
|
|
13722
|
-
|
13723
|
-
|
13724
|
-
parser_lex(parser);
|
14220
|
+
bool succeeded = update_parameter_state(parser, &parser->current, &order);
|
14221
|
+
parser_lex(parser);
|
13725
14222
|
|
13726
|
-
|
14223
|
+
parser->current_scope->parameters |= PM_SCOPE_PARAMETERS_FORWARDING_ALL;
|
14224
|
+
pm_forwarding_parameter_node_t *param = pm_forwarding_parameter_node_create(parser, &parser->previous);
|
13727
14225
|
|
13728
|
-
|
13729
|
-
|
13730
|
-
|
13731
|
-
|
13732
|
-
|
13733
|
-
|
13734
|
-
|
13735
|
-
params->keyword_rest = NULL;
|
13736
|
-
}
|
13737
|
-
pm_parameters_node_keyword_rest_set(params, (pm_node_t *)param);
|
13738
|
-
} else {
|
13739
|
-
update_parameter_state(parser, &parser->current, &order);
|
13740
|
-
parser_lex(parser);
|
14226
|
+
if (params->keyword_rest != NULL) {
|
14227
|
+
// If we already have a keyword rest parameter, then we replace it with the
|
14228
|
+
// forwarding parameter and move the keyword rest parameter to the posts list.
|
14229
|
+
pm_node_t *keyword_rest = params->keyword_rest;
|
14230
|
+
pm_parameters_node_posts_append(params, keyword_rest);
|
14231
|
+
if (succeeded) pm_parser_err_previous(parser, PM_ERR_PARAMETER_UNEXPECTED_FWD);
|
14232
|
+
params->keyword_rest = NULL;
|
13741
14233
|
}
|
13742
14234
|
|
14235
|
+
pm_parameters_node_keyword_rest_set(params, (pm_node_t *) param);
|
13743
14236
|
break;
|
13744
14237
|
}
|
13745
14238
|
case PM_TOKEN_CLASS_VARIABLE:
|
@@ -13834,6 +14327,12 @@ parse_parameters(
|
|
13834
14327
|
pm_token_t local = name;
|
13835
14328
|
local.end -= 1;
|
13836
14329
|
|
14330
|
+
if (parser->encoding_changed ? parser->encoding->isupper_char(local.start, local.end - local.start) : pm_encoding_utf_8_isupper_char(local.start, local.end - local.start)) {
|
14331
|
+
pm_parser_err(parser, local.start, local.end, PM_ERR_ARGUMENT_FORMAL_CONSTANT);
|
14332
|
+
} else if (local.end[-1] == '!' || local.end[-1] == '?') {
|
14333
|
+
PM_PARSER_ERR_TOKEN_FORMAT_CONTENT(parser, local, PM_ERR_INVALID_LOCAL_VARIABLE_WRITE);
|
14334
|
+
}
|
14335
|
+
|
13837
14336
|
bool repeated = pm_parser_parameter_name_check(parser, &local);
|
13838
14337
|
pm_parser_local_add_token(parser, &local, 1);
|
13839
14338
|
|
@@ -13909,6 +14408,7 @@ parse_parameters(
|
|
13909
14408
|
pm_token_t operator = parser->previous;
|
13910
14409
|
pm_token_t name;
|
13911
14410
|
bool repeated = false;
|
14411
|
+
|
13912
14412
|
if (accept1(parser, PM_TOKEN_IDENTIFIER)) {
|
13913
14413
|
name = parser->previous;
|
13914
14414
|
repeated = pm_parser_parameter_name_check(parser, &name);
|
@@ -13922,6 +14422,7 @@ parse_parameters(
|
|
13922
14422
|
if (repeated) {
|
13923
14423
|
pm_node_flag_set_repeated_parameter(param);
|
13924
14424
|
}
|
14425
|
+
|
13925
14426
|
if (params->rest == NULL) {
|
13926
14427
|
pm_parameters_node_rest_set(params, param);
|
13927
14428
|
} else {
|
@@ -13933,6 +14434,7 @@ parse_parameters(
|
|
13933
14434
|
}
|
13934
14435
|
case PM_TOKEN_STAR_STAR:
|
13935
14436
|
case PM_TOKEN_USTAR_STAR: {
|
14437
|
+
pm_parameters_order_t previous_order = order;
|
13936
14438
|
update_parameter_state(parser, &parser->current, &order);
|
13937
14439
|
parser_lex(parser);
|
13938
14440
|
|
@@ -13940,6 +14442,10 @@ parse_parameters(
|
|
13940
14442
|
pm_node_t *param;
|
13941
14443
|
|
13942
14444
|
if (accept1(parser, PM_TOKEN_KEYWORD_NIL)) {
|
14445
|
+
if (previous_order <= PM_PARAMETERS_ORDER_KEYWORDS) {
|
14446
|
+
pm_parser_err_previous(parser, PM_ERR_PARAMETER_UNEXPECTED_NO_KW);
|
14447
|
+
}
|
14448
|
+
|
13943
14449
|
param = (pm_node_t *) pm_no_keywords_parameter_node_create(parser, &operator, &parser->previous);
|
13944
14450
|
} else {
|
13945
14451
|
pm_token_t name;
|
@@ -14037,7 +14543,7 @@ parse_rescues(pm_parser_t *parser, pm_begin_node_t *parent_node, pm_rescues_type
|
|
14037
14543
|
pm_rescue_node_operator_set(rescue, &parser->previous);
|
14038
14544
|
|
14039
14545
|
pm_node_t *reference = parse_expression(parser, PM_BINDING_POWER_INDEX, false, PM_ERR_RESCUE_VARIABLE);
|
14040
|
-
reference = parse_target(parser, reference);
|
14546
|
+
reference = parse_target(parser, reference, false);
|
14041
14547
|
|
14042
14548
|
pm_rescue_node_reference_set(rescue, reference);
|
14043
14549
|
break;
|
@@ -14067,7 +14573,7 @@ parse_rescues(pm_parser_t *parser, pm_begin_node_t *parent_node, pm_rescues_type
|
|
14067
14573
|
pm_rescue_node_operator_set(rescue, &parser->previous);
|
14068
14574
|
|
14069
14575
|
pm_node_t *reference = parse_expression(parser, PM_BINDING_POWER_INDEX, false, PM_ERR_RESCUE_VARIABLE);
|
14070
|
-
reference = parse_target(parser, reference);
|
14576
|
+
reference = parse_target(parser, reference, false);
|
14071
14577
|
|
14072
14578
|
pm_rescue_node_reference_set(rescue, reference);
|
14073
14579
|
break;
|
@@ -14391,7 +14897,7 @@ parse_arguments_list(pm_parser_t *parser, pm_arguments_t *arguments, bool accept
|
|
14391
14897
|
arguments->closing_loc = PM_LOCATION_TOKEN_VALUE(&parser->previous);
|
14392
14898
|
} else {
|
14393
14899
|
pm_accepts_block_stack_push(parser, true);
|
14394
|
-
parse_arguments(parser, arguments,
|
14900
|
+
parse_arguments(parser, arguments, accepts_block, PM_TOKEN_PARENTHESIS_RIGHT);
|
14395
14901
|
|
14396
14902
|
if (!accept1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) {
|
14397
14903
|
PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_ARGUMENT_TERM_PAREN, pm_token_type_human(parser->current.type));
|
@@ -14409,7 +14915,7 @@ parse_arguments_list(pm_parser_t *parser, pm_arguments_t *arguments, bool accept
|
|
14409
14915
|
// If we get here, then the subsequent token cannot be used as an infix
|
14410
14916
|
// operator. In this case we assume the subsequent token is part of an
|
14411
14917
|
// argument to this method call.
|
14412
|
-
parse_arguments(parser, arguments,
|
14918
|
+
parse_arguments(parser, arguments, accepts_block, PM_TOKEN_EOF);
|
14413
14919
|
|
14414
14920
|
// If we have done with the arguments and still not consumed the comma,
|
14415
14921
|
// then we have a trailing comma where we need to check whether it is
|
@@ -14440,11 +14946,8 @@ parse_arguments_list(pm_parser_t *parser, pm_arguments_t *arguments, bool accept
|
|
14440
14946
|
if (arguments->block == NULL && !arguments->has_forwarding) {
|
14441
14947
|
arguments->block = (pm_node_t *) block;
|
14442
14948
|
} else {
|
14443
|
-
|
14444
|
-
|
14445
|
-
} else {
|
14446
|
-
pm_parser_err_node(parser, (pm_node_t *) block, PM_ERR_ARGUMENT_BLOCK_MULTI);
|
14447
|
-
}
|
14949
|
+
pm_parser_err_node(parser, (pm_node_t *) block, PM_ERR_ARGUMENT_BLOCK_MULTI);
|
14950
|
+
|
14448
14951
|
if (arguments->block != NULL) {
|
14449
14952
|
if (arguments->arguments == NULL) {
|
14450
14953
|
arguments->arguments = pm_arguments_node_create(parser);
|
@@ -15036,7 +15539,7 @@ parse_symbol(pm_parser_t *parser, pm_lex_mode_t *lex_mode, pm_lex_state_t next_s
|
|
15036
15539
|
pm_symbol_node_t *symbol = pm_symbol_node_create(parser, &opening, &parser->previous, &closing);
|
15037
15540
|
|
15038
15541
|
pm_string_shared_init(&symbol->unescaped, parser->previous.start, parser->previous.end);
|
15039
|
-
pm_node_flag_set((pm_node_t *) symbol, parse_symbol_encoding(parser, &symbol->unescaped));
|
15542
|
+
pm_node_flag_set((pm_node_t *) symbol, parse_symbol_encoding(parser, &parser->previous, &symbol->unescaped, false));
|
15040
15543
|
|
15041
15544
|
return (pm_node_t *) symbol;
|
15042
15545
|
}
|
@@ -15136,7 +15639,7 @@ parse_symbol(pm_parser_t *parser, pm_lex_mode_t *lex_mode, pm_lex_state_t next_s
|
|
15136
15639
|
expect1(parser, PM_TOKEN_STRING_END, PM_ERR_SYMBOL_TERM_DYNAMIC);
|
15137
15640
|
}
|
15138
15641
|
|
15139
|
-
return (pm_node_t *) pm_symbol_node_create_unescaped(parser, &opening, &content, &parser->previous, &unescaped, parse_symbol_encoding(parser, &unescaped));
|
15642
|
+
return (pm_node_t *) pm_symbol_node_create_unescaped(parser, &opening, &content, &parser->previous, &unescaped, parse_symbol_encoding(parser, &content, &unescaped, false));
|
15140
15643
|
}
|
15141
15644
|
|
15142
15645
|
/**
|
@@ -15161,7 +15664,7 @@ parse_undef_argument(pm_parser_t *parser) {
|
|
15161
15664
|
pm_symbol_node_t *symbol = pm_symbol_node_create(parser, &opening, &parser->previous, &closing);
|
15162
15665
|
|
15163
15666
|
pm_string_shared_init(&symbol->unescaped, parser->previous.start, parser->previous.end);
|
15164
|
-
pm_node_flag_set((pm_node_t *) symbol, parse_symbol_encoding(parser, &symbol->unescaped));
|
15667
|
+
pm_node_flag_set((pm_node_t *) symbol, parse_symbol_encoding(parser, &parser->previous, &symbol->unescaped, false));
|
15165
15668
|
|
15166
15669
|
return (pm_node_t *) symbol;
|
15167
15670
|
}
|
@@ -15202,7 +15705,7 @@ parse_alias_argument(pm_parser_t *parser, bool first) {
|
|
15202
15705
|
pm_symbol_node_t *symbol = pm_symbol_node_create(parser, &opening, &parser->previous, &closing);
|
15203
15706
|
|
15204
15707
|
pm_string_shared_init(&symbol->unescaped, parser->previous.start, parser->previous.end);
|
15205
|
-
pm_node_flag_set((pm_node_t *) symbol, parse_symbol_encoding(parser, &symbol->unescaped));
|
15708
|
+
pm_node_flag_set((pm_node_t *) symbol, parse_symbol_encoding(parser, &parser->previous, &symbol->unescaped, false));
|
15206
15709
|
|
15207
15710
|
return (pm_node_t *) symbol;
|
15208
15711
|
}
|
@@ -15429,8 +15932,12 @@ parse_heredoc_dedent(pm_parser_t *parser, pm_node_list_t *nodes, size_t common_w
|
|
15429
15932
|
nodes->size = write_index;
|
15430
15933
|
}
|
15431
15934
|
|
15935
|
+
#define PM_PARSE_PATTERN_SINGLE 0
|
15936
|
+
#define PM_PARSE_PATTERN_TOP 1
|
15937
|
+
#define PM_PARSE_PATTERN_MULTI 2
|
15938
|
+
|
15432
15939
|
static pm_node_t *
|
15433
|
-
parse_pattern(pm_parser_t *parser, pm_constant_id_list_t *captures,
|
15940
|
+
parse_pattern(pm_parser_t *parser, pm_constant_id_list_t *captures, uint8_t flags, pm_diagnostic_id_t diag_id);
|
15434
15941
|
|
15435
15942
|
/**
|
15436
15943
|
* Add the newly created local to the list of captures for this pattern matching
|
@@ -15459,9 +15966,7 @@ parse_pattern_constant_path(pm_parser_t *parser, pm_constant_id_list_t *captures
|
|
15459
15966
|
while (accept1(parser, PM_TOKEN_COLON_COLON)) {
|
15460
15967
|
pm_token_t delimiter = parser->previous;
|
15461
15968
|
expect1(parser, PM_TOKEN_CONSTANT, PM_ERR_CONSTANT_PATH_COLON_COLON_CONSTANT);
|
15462
|
-
|
15463
|
-
pm_node_t *child = (pm_node_t *) pm_constant_read_node_create(parser, &parser->previous);
|
15464
|
-
node = (pm_node_t *) pm_constant_path_node_create(parser, node, &delimiter, child);
|
15969
|
+
node = (pm_node_t *) pm_constant_path_node_create(parser, node, &delimiter, &parser->previous);
|
15465
15970
|
}
|
15466
15971
|
|
15467
15972
|
// If there is a [ or ( that follows, then this is part of a larger pattern
|
@@ -15480,7 +15985,7 @@ parse_pattern_constant_path(pm_parser_t *parser, pm_constant_id_list_t *captures
|
|
15480
15985
|
accept1(parser, PM_TOKEN_NEWLINE);
|
15481
15986
|
|
15482
15987
|
if (!accept1(parser, PM_TOKEN_BRACKET_RIGHT)) {
|
15483
|
-
inner = parse_pattern(parser, captures,
|
15988
|
+
inner = parse_pattern(parser, captures, PM_PARSE_PATTERN_TOP | PM_PARSE_PATTERN_MULTI, PM_ERR_PATTERN_EXPRESSION_AFTER_BRACKET);
|
15484
15989
|
accept1(parser, PM_TOKEN_NEWLINE);
|
15485
15990
|
expect1(parser, PM_TOKEN_BRACKET_RIGHT, PM_ERR_PATTERN_TERM_BRACKET);
|
15486
15991
|
}
|
@@ -15492,7 +15997,7 @@ parse_pattern_constant_path(pm_parser_t *parser, pm_constant_id_list_t *captures
|
|
15492
15997
|
accept1(parser, PM_TOKEN_NEWLINE);
|
15493
15998
|
|
15494
15999
|
if (!accept1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) {
|
15495
|
-
inner = parse_pattern(parser, captures,
|
16000
|
+
inner = parse_pattern(parser, captures, PM_PARSE_PATTERN_TOP | PM_PARSE_PATTERN_MULTI, PM_ERR_PATTERN_EXPRESSION_AFTER_PAREN);
|
15496
16001
|
accept1(parser, PM_TOKEN_NEWLINE);
|
15497
16002
|
expect1(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_ERR_PATTERN_TERM_PAREN);
|
15498
16003
|
}
|
@@ -15640,6 +16145,33 @@ parse_pattern_keyword_rest(pm_parser_t *parser, pm_constant_id_list_t *captures)
|
|
15640
16145
|
return (pm_node_t *) pm_assoc_splat_node_create(parser, value, &operator);
|
15641
16146
|
}
|
15642
16147
|
|
16148
|
+
/**
|
16149
|
+
* Check that the slice of the source given by the bounds parameters constitutes
|
16150
|
+
* a valid local variable name.
|
16151
|
+
*/
|
16152
|
+
static bool
|
16153
|
+
pm_slice_is_valid_local(const pm_parser_t *parser, const uint8_t *start, const uint8_t *end) {
|
16154
|
+
ptrdiff_t length = end - start;
|
16155
|
+
if (length == 0) return false;
|
16156
|
+
|
16157
|
+
// First ensure that it starts with a valid identifier starting character.
|
16158
|
+
size_t width = char_is_identifier_start(parser, start);
|
16159
|
+
if (width == 0) return false;
|
16160
|
+
|
16161
|
+
// Next, ensure that it's not an uppercase character.
|
16162
|
+
if (parser->encoding_changed) {
|
16163
|
+
if (parser->encoding->isupper_char(start, length)) return false;
|
16164
|
+
} else {
|
16165
|
+
if (pm_encoding_utf_8_isupper_char(start, length)) return false;
|
16166
|
+
}
|
16167
|
+
|
16168
|
+
// Next, iterate through all of the bytes of the string to ensure that they
|
16169
|
+
// are all valid identifier characters.
|
16170
|
+
const uint8_t *cursor = start + width;
|
16171
|
+
while ((cursor < end) && (width = char_is_identifier(parser, cursor))) cursor += width;
|
16172
|
+
return cursor == end;
|
16173
|
+
}
|
16174
|
+
|
15643
16175
|
/**
|
15644
16176
|
* Create an implicit node for the value of a hash pattern that has omitted the
|
15645
16177
|
* value. This will use an implicit local variable target.
|
@@ -15647,14 +16179,18 @@ parse_pattern_keyword_rest(pm_parser_t *parser, pm_constant_id_list_t *captures)
|
|
15647
16179
|
static pm_node_t *
|
15648
16180
|
parse_pattern_hash_implicit_value(pm_parser_t *parser, pm_constant_id_list_t *captures, pm_symbol_node_t *key) {
|
15649
16181
|
const pm_location_t *value_loc = &((pm_symbol_node_t *) key)->value_loc;
|
15650
|
-
pm_constant_id_t constant_id = pm_parser_constant_id_location(parser, value_loc->start, value_loc->end);
|
15651
16182
|
|
16183
|
+
pm_constant_id_t constant_id = pm_parser_constant_id_location(parser, value_loc->start, value_loc->end);
|
15652
16184
|
int depth = -1;
|
15653
|
-
|
15654
|
-
|
15655
|
-
PM_PARSER_ERR_LOCATION_FORMAT(parser, value_loc, PM_ERR_INVALID_LOCAL_VARIABLE_WRITE, (int) (value_loc->end - value_loc->start), (const char *) value_loc->start);
|
15656
|
-
} else {
|
16185
|
+
|
16186
|
+
if (pm_slice_is_valid_local(parser, value_loc->start, value_loc->end)) {
|
15657
16187
|
depth = pm_parser_local_depth_constant_id(parser, constant_id);
|
16188
|
+
} else {
|
16189
|
+
pm_parser_err(parser, key->base.location.start, key->base.location.end, PM_ERR_PATTERN_HASH_KEY_LOCALS);
|
16190
|
+
|
16191
|
+
if ((value_loc->end > value_loc->start) && ((value_loc->end[-1] == '!') || (value_loc->end[-1] == '?'))) {
|
16192
|
+
PM_PARSER_ERR_LOCATION_FORMAT(parser, value_loc, PM_ERR_INVALID_LOCAL_VARIABLE_WRITE, (int) (value_loc->end - value_loc->start), (const char *) value_loc->start);
|
16193
|
+
}
|
15658
16194
|
}
|
15659
16195
|
|
15660
16196
|
if (depth == -1) {
|
@@ -15678,7 +16214,7 @@ parse_pattern_hash_implicit_value(pm_parser_t *parser, pm_constant_id_list_t *ca
|
|
15678
16214
|
*/
|
15679
16215
|
static void
|
15680
16216
|
parse_pattern_hash_key(pm_parser_t *parser, pm_static_literals_t *keys, pm_node_t *node) {
|
15681
|
-
if (pm_static_literals_add(parser, keys, node) != NULL) {
|
16217
|
+
if (pm_static_literals_add(&parser->newline_list, parser->start_line, keys, node) != NULL) {
|
15682
16218
|
pm_parser_err_node(parser, node, PM_ERR_PATTERN_HASH_KEY_DUPLICATE);
|
15683
16219
|
}
|
15684
16220
|
}
|
@@ -15709,7 +16245,7 @@ parse_pattern_hash(pm_parser_t *parser, pm_constant_id_list_t *captures, pm_node
|
|
15709
16245
|
} else {
|
15710
16246
|
// Here we have a value for the first assoc in the list, so
|
15711
16247
|
// we will parse it now.
|
15712
|
-
value = parse_pattern(parser, captures,
|
16248
|
+
value = parse_pattern(parser, captures, PM_PARSE_PATTERN_SINGLE, PM_ERR_PATTERN_EXPRESSION_AFTER_KEY);
|
15713
16249
|
}
|
15714
16250
|
|
15715
16251
|
pm_token_t operator = not_provided(parser);
|
@@ -15724,7 +16260,8 @@ parse_pattern_hash(pm_parser_t *parser, pm_constant_id_list_t *captures, pm_node
|
|
15724
16260
|
// If we get anything else, then this is an error. For this we'll
|
15725
16261
|
// create a missing node for the value and create an assoc node for
|
15726
16262
|
// the first node in the list.
|
15727
|
-
|
16263
|
+
pm_diagnostic_id_t diag_id = PM_NODE_TYPE_P(first_node, PM_INTERPOLATED_SYMBOL_NODE) ? PM_ERR_PATTERN_HASH_KEY_INTERPOLATED : PM_ERR_PATTERN_HASH_KEY_LABEL;
|
16264
|
+
pm_parser_err_node(parser, first_node, diag_id);
|
15728
16265
|
|
15729
16266
|
pm_token_t operator = not_provided(parser);
|
15730
16267
|
pm_node_t *value = (pm_node_t *) pm_missing_node_create(parser, first_node->location.start, first_node->location.end);
|
@@ -15761,7 +16298,7 @@ parse_pattern_hash(pm_parser_t *parser, pm_constant_id_list_t *captures, pm_node
|
|
15761
16298
|
if (match7(parser, PM_TOKEN_COMMA, PM_TOKEN_KEYWORD_THEN, PM_TOKEN_BRACE_RIGHT, PM_TOKEN_BRACKET_RIGHT, PM_TOKEN_PARENTHESIS_RIGHT, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON)) {
|
15762
16299
|
value = parse_pattern_hash_implicit_value(parser, captures, (pm_symbol_node_t *) key);
|
15763
16300
|
} else {
|
15764
|
-
value = parse_pattern(parser, captures,
|
16301
|
+
value = parse_pattern(parser, captures, PM_PARSE_PATTERN_SINGLE, PM_ERR_PATTERN_EXPRESSION_AFTER_KEY);
|
15765
16302
|
}
|
15766
16303
|
|
15767
16304
|
pm_token_t operator = not_provided(parser);
|
@@ -15818,7 +16355,7 @@ parse_pattern_primitive(pm_parser_t *parser, pm_constant_id_list_t *captures, pm
|
|
15818
16355
|
|
15819
16356
|
// Otherwise, we'll parse the inner pattern, then deal with it depending
|
15820
16357
|
// on the type it returns.
|
15821
|
-
pm_node_t *inner = parse_pattern(parser, captures,
|
16358
|
+
pm_node_t *inner = parse_pattern(parser, captures, PM_PARSE_PATTERN_MULTI, PM_ERR_PATTERN_EXPRESSION_AFTER_BRACKET);
|
15822
16359
|
|
15823
16360
|
accept1(parser, PM_TOKEN_NEWLINE);
|
15824
16361
|
expect1(parser, PM_TOKEN_BRACKET_RIGHT, PM_ERR_PATTERN_TERM_BRACKET);
|
@@ -15885,11 +16422,11 @@ parse_pattern_primitive(pm_parser_t *parser, pm_constant_id_list_t *captures, pm
|
|
15885
16422
|
first_node = parse_pattern_keyword_rest(parser, captures);
|
15886
16423
|
break;
|
15887
16424
|
case PM_TOKEN_STRING_BEGIN:
|
15888
|
-
first_node = parse_expression(parser, PM_BINDING_POWER_MAX, false,
|
16425
|
+
first_node = parse_expression(parser, PM_BINDING_POWER_MAX, false, PM_ERR_PATTERN_HASH_KEY_LABEL);
|
15889
16426
|
break;
|
15890
16427
|
default: {
|
16428
|
+
PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_PATTERN_HASH_KEY, pm_token_type_human(parser->current.type));
|
15891
16429
|
parser_lex(parser);
|
15892
|
-
pm_parser_err_previous(parser, PM_ERR_PATTERN_HASH_KEY);
|
15893
16430
|
|
15894
16431
|
first_node = (pm_node_t *) pm_missing_node_create(parser, parser->previous.start, parser->previous.end);
|
15895
16432
|
break;
|
@@ -15966,7 +16503,7 @@ parse_pattern_primitive(pm_parser_t *parser, pm_constant_id_list_t *captures, pm
|
|
15966
16503
|
|
15967
16504
|
if (variable == NULL) {
|
15968
16505
|
if (
|
15969
|
-
(parser->version !=
|
16506
|
+
(parser->version != PM_OPTIONS_VERSION_CRUBY_3_3) &&
|
15970
16507
|
!parser->current_scope->closed &&
|
15971
16508
|
(parser->current_scope->numbered_parameters != PM_SCOPE_NUMBERED_PARAMETERS_DISALLOWED) &&
|
15972
16509
|
pm_token_is_it(parser->previous.start, parser->previous.end)
|
@@ -16040,8 +16577,7 @@ parse_pattern_primitive(pm_parser_t *parser, pm_constant_id_list_t *captures, pm
|
|
16040
16577
|
parser_lex(parser);
|
16041
16578
|
|
16042
16579
|
expect1(parser, PM_TOKEN_CONSTANT, PM_ERR_CONSTANT_PATH_COLON_COLON_CONSTANT);
|
16043
|
-
|
16044
|
-
pm_constant_path_node_t *node = pm_constant_path_node_create(parser, NULL, &delimiter, child);
|
16580
|
+
pm_constant_path_node_t *node = pm_constant_path_node_create(parser, NULL, &delimiter, &parser->previous);
|
16045
16581
|
|
16046
16582
|
return parse_pattern_constant_path(parser, captures, (pm_node_t *) node);
|
16047
16583
|
}
|
@@ -16092,7 +16628,7 @@ parse_pattern_primitives(pm_parser_t *parser, pm_constant_id_list_t *captures, p
|
|
16092
16628
|
pm_token_t opening = parser->current;
|
16093
16629
|
parser_lex(parser);
|
16094
16630
|
|
16095
|
-
pm_node_t *body = parse_pattern(parser, captures,
|
16631
|
+
pm_node_t *body = parse_pattern(parser, captures, PM_PARSE_PATTERN_SINGLE, PM_ERR_PATTERN_EXPRESSION_AFTER_PAREN);
|
16096
16632
|
accept1(parser, PM_TOKEN_NEWLINE);
|
16097
16633
|
expect1(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_ERR_PATTERN_TERM_PAREN);
|
16098
16634
|
pm_node_t *right = (pm_node_t *) pm_parentheses_node_create(parser, &opening, body, &parser->previous);
|
@@ -16151,7 +16687,7 @@ parse_pattern_primitives(pm_parser_t *parser, pm_constant_id_list_t *captures, p
|
|
16151
16687
|
* Parse a pattern matching expression.
|
16152
16688
|
*/
|
16153
16689
|
static pm_node_t *
|
16154
|
-
parse_pattern(pm_parser_t *parser, pm_constant_id_list_t *captures,
|
16690
|
+
parse_pattern(pm_parser_t *parser, pm_constant_id_list_t *captures, uint8_t flags, pm_diagnostic_id_t diag_id) {
|
16155
16691
|
pm_node_t *node = NULL;
|
16156
16692
|
|
16157
16693
|
bool leading_rest = false;
|
@@ -16161,14 +16697,26 @@ parse_pattern(pm_parser_t *parser, pm_constant_id_list_t *captures, bool top_pat
|
|
16161
16697
|
case PM_TOKEN_LABEL: {
|
16162
16698
|
parser_lex(parser);
|
16163
16699
|
pm_node_t *key = (pm_node_t *) pm_symbol_node_label_create(parser, &parser->previous);
|
16164
|
-
|
16700
|
+
node = (pm_node_t *) parse_pattern_hash(parser, captures, key);
|
16701
|
+
|
16702
|
+
if (!(flags & PM_PARSE_PATTERN_TOP)) {
|
16703
|
+
pm_parser_err_node(parser, node, PM_ERR_PATTERN_HASH_IMPLICIT);
|
16704
|
+
}
|
16705
|
+
|
16706
|
+
return node;
|
16165
16707
|
}
|
16166
16708
|
case PM_TOKEN_USTAR_STAR: {
|
16167
16709
|
node = parse_pattern_keyword_rest(parser, captures);
|
16168
|
-
|
16710
|
+
node = (pm_node_t *) parse_pattern_hash(parser, captures, node);
|
16711
|
+
|
16712
|
+
if (!(flags & PM_PARSE_PATTERN_TOP)) {
|
16713
|
+
pm_parser_err_node(parser, node, PM_ERR_PATTERN_HASH_IMPLICIT);
|
16714
|
+
}
|
16715
|
+
|
16716
|
+
return node;
|
16169
16717
|
}
|
16170
16718
|
case PM_TOKEN_USTAR: {
|
16171
|
-
if (
|
16719
|
+
if (flags & (PM_PARSE_PATTERN_TOP | PM_PARSE_PATTERN_MULTI)) {
|
16172
16720
|
parser_lex(parser);
|
16173
16721
|
node = (pm_node_t *) parse_pattern_rest(parser, captures);
|
16174
16722
|
leading_rest = true;
|
@@ -16187,7 +16735,7 @@ parse_pattern(pm_parser_t *parser, pm_constant_id_list_t *captures, bool top_pat
|
|
16187
16735
|
return (pm_node_t *) parse_pattern_hash(parser, captures, node);
|
16188
16736
|
}
|
16189
16737
|
|
16190
|
-
if (
|
16738
|
+
if ((flags & PM_PARSE_PATTERN_MULTI) && match1(parser, PM_TOKEN_COMMA)) {
|
16191
16739
|
// If we have a comma, then we are now parsing either an array pattern or a
|
16192
16740
|
// find pattern. We need to parse all of the patterns, put them into a big
|
16193
16741
|
// list, and then determine which type of node we have.
|
@@ -16367,7 +16915,7 @@ parse_strings(pm_parser_t *parser, pm_node_t *current) {
|
|
16367
16915
|
|
16368
16916
|
pm_node_list_free(&parts);
|
16369
16917
|
} else if (accept1(parser, PM_TOKEN_LABEL_END) && !state_is_arg_labeled) {
|
16370
|
-
node = (pm_node_t *) pm_symbol_node_create_unescaped(parser, &opening, &content, &parser->previous, &unescaped, parse_symbol_encoding(parser, &unescaped));
|
16918
|
+
node = (pm_node_t *) pm_symbol_node_create_unescaped(parser, &opening, &content, &parser->previous, &unescaped, parse_symbol_encoding(parser, &content, &unescaped, true));
|
16371
16919
|
} else if (match1(parser, PM_TOKEN_EOF)) {
|
16372
16920
|
pm_parser_err_token(parser, &opening, PM_ERR_STRING_LITERAL_EOF);
|
16373
16921
|
node = (pm_node_t *) pm_string_node_create_unescaped(parser, &opening, &content, &parser->current, &unescaped);
|
@@ -16393,7 +16941,7 @@ parse_strings(pm_parser_t *parser, pm_node_t *current) {
|
|
16393
16941
|
pm_node_flag_set(node, parse_unescaped_encoding(parser));
|
16394
16942
|
expect1(parser, PM_TOKEN_STRING_END, PM_ERR_STRING_LITERAL_EOF);
|
16395
16943
|
} else if (accept1(parser, PM_TOKEN_LABEL_END)) {
|
16396
|
-
node = (pm_node_t *) pm_symbol_node_create_unescaped(parser, &opening, &content, &parser->previous, &unescaped, parse_symbol_encoding(parser, &unescaped));
|
16944
|
+
node = (pm_node_t *) pm_symbol_node_create_unescaped(parser, &opening, &content, &parser->previous, &unescaped, parse_symbol_encoding(parser, &content, &unescaped, true));
|
16397
16945
|
} else {
|
16398
16946
|
// If we get here, then we have interpolation so we'll need
|
16399
16947
|
// to create a string or symbol node with interpolation.
|
@@ -16475,11 +17023,11 @@ parse_strings(pm_parser_t *parser, pm_node_t *current) {
|
|
16475
17023
|
pm_token_t bounds = not_provided(parser);
|
16476
17024
|
|
16477
17025
|
pm_interpolated_string_node_t *container = pm_interpolated_string_node_create(parser, &bounds, NULL, &bounds);
|
16478
|
-
pm_interpolated_string_node_append(
|
17026
|
+
pm_interpolated_string_node_append(container, current);
|
16479
17027
|
current = (pm_node_t *) container;
|
16480
17028
|
}
|
16481
17029
|
|
16482
|
-
pm_interpolated_string_node_append(
|
17030
|
+
pm_interpolated_string_node_append((pm_interpolated_string_node_t *) current, node);
|
16483
17031
|
}
|
16484
17032
|
}
|
16485
17033
|
|
@@ -16498,6 +17046,11 @@ pm_parser_err_prefix(pm_parser_t *parser, pm_diagnostic_id_t diag_id) {
|
|
16498
17046
|
PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->previous, diag_id, pm_token_type_human(parser->previous.type));
|
16499
17047
|
break;
|
16500
17048
|
}
|
17049
|
+
case PM_ERR_HASH_VALUE:
|
17050
|
+
case PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR: {
|
17051
|
+
PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, diag_id, pm_token_type_human(parser->current.type));
|
17052
|
+
break;
|
17053
|
+
}
|
16501
17054
|
case PM_ERR_UNARY_RECEIVER: {
|
16502
17055
|
const char *human = (parser->current.type == PM_TOKEN_EOF ? "end-of-input" : pm_token_type_human(parser->current.type));
|
16503
17056
|
PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->previous, diag_id, human, parser->previous.start[0]);
|
@@ -16724,13 +17277,13 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
|
|
16724
17277
|
}
|
16725
17278
|
|
16726
17279
|
element = (pm_node_t *) pm_keyword_hash_node_create(parser);
|
16727
|
-
pm_static_literals_t
|
17280
|
+
pm_static_literals_t hash_keys = { 0 };
|
16728
17281
|
|
16729
17282
|
if (!match8(parser, PM_TOKEN_EOF, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON, PM_TOKEN_EOF, PM_TOKEN_BRACE_RIGHT, PM_TOKEN_BRACKET_RIGHT, PM_TOKEN_KEYWORD_DO, PM_TOKEN_PARENTHESIS_RIGHT)) {
|
16730
|
-
parse_assocs(parser, &
|
17283
|
+
parse_assocs(parser, &hash_keys, element);
|
16731
17284
|
}
|
16732
17285
|
|
16733
|
-
pm_static_literals_free(&
|
17286
|
+
pm_static_literals_free(&hash_keys);
|
16734
17287
|
parsed_bare_hash = true;
|
16735
17288
|
} else {
|
16736
17289
|
element = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, false, PM_ERR_ARRAY_EXPRESSION);
|
@@ -16741,8 +17294,8 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
|
|
16741
17294
|
}
|
16742
17295
|
|
16743
17296
|
pm_keyword_hash_node_t *hash = pm_keyword_hash_node_create(parser);
|
16744
|
-
pm_static_literals_t
|
16745
|
-
pm_hash_key_static_literals_add(parser, &
|
17297
|
+
pm_static_literals_t hash_keys = { 0 };
|
17298
|
+
pm_hash_key_static_literals_add(parser, &hash_keys, element);
|
16746
17299
|
|
16747
17300
|
pm_token_t operator;
|
16748
17301
|
if (parser->previous.type == PM_TOKEN_EQUAL_GREATER) {
|
@@ -16757,10 +17310,10 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
|
|
16757
17310
|
|
16758
17311
|
element = (pm_node_t *) hash;
|
16759
17312
|
if (accept1(parser, PM_TOKEN_COMMA) && !match1(parser, PM_TOKEN_BRACKET_RIGHT)) {
|
16760
|
-
parse_assocs(parser, &
|
17313
|
+
parse_assocs(parser, &hash_keys, element);
|
16761
17314
|
}
|
16762
17315
|
|
16763
|
-
pm_static_literals_free(&
|
17316
|
+
pm_static_literals_free(&hash_keys);
|
16764
17317
|
parsed_bare_hash = true;
|
16765
17318
|
}
|
16766
17319
|
}
|
@@ -16854,7 +17407,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
|
|
16854
17407
|
return (pm_node_t *) multi_target;
|
16855
17408
|
}
|
16856
17409
|
|
16857
|
-
return parse_target_validate(parser, (pm_node_t *) multi_target);
|
17410
|
+
return parse_target_validate(parser, (pm_node_t *) multi_target, false);
|
16858
17411
|
}
|
16859
17412
|
|
16860
17413
|
// If we have a single statement and are ending on a right parenthesis
|
@@ -16920,14 +17473,30 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
|
|
16920
17473
|
return (pm_node_t *) pm_parentheses_node_create(parser, &opening, (pm_node_t *) statements, &parser->previous);
|
16921
17474
|
}
|
16922
17475
|
case PM_TOKEN_BRACE_LEFT: {
|
17476
|
+
// If we were passed a current_hash_keys via the parser, then that
|
17477
|
+
// means we're already parsing a hash and we want to share the set
|
17478
|
+
// of hash keys with this inner hash we're about to parse for the
|
17479
|
+
// sake of warnings. We'll set it to NULL after we grab it to make
|
17480
|
+
// sure subsequent expressions don't use it. Effectively this is a
|
17481
|
+
// way of getting around passing it to every call to
|
17482
|
+
// parse_expression.
|
17483
|
+
pm_static_literals_t *current_hash_keys = parser->current_hash_keys;
|
17484
|
+
parser->current_hash_keys = NULL;
|
17485
|
+
|
16923
17486
|
pm_accepts_block_stack_push(parser, true);
|
16924
17487
|
parser_lex(parser);
|
16925
17488
|
|
16926
17489
|
pm_hash_node_t *node = pm_hash_node_create(parser, &parser->previous);
|
16927
|
-
pm_static_literals_t literals = { 0 };
|
16928
17490
|
|
16929
17491
|
if (!match2(parser, PM_TOKEN_BRACE_RIGHT, PM_TOKEN_EOF)) {
|
16930
|
-
|
17492
|
+
if (current_hash_keys != NULL) {
|
17493
|
+
parse_assocs(parser, current_hash_keys, (pm_node_t *) node);
|
17494
|
+
} else {
|
17495
|
+
pm_static_literals_t hash_keys = { 0 };
|
17496
|
+
parse_assocs(parser, &hash_keys, (pm_node_t *) node);
|
17497
|
+
pm_static_literals_free(&hash_keys);
|
17498
|
+
}
|
17499
|
+
|
16931
17500
|
accept1(parser, PM_TOKEN_NEWLINE);
|
16932
17501
|
}
|
16933
17502
|
|
@@ -16935,7 +17504,6 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
|
|
16935
17504
|
expect1(parser, PM_TOKEN_BRACE_RIGHT, PM_ERR_HASH_TERM);
|
16936
17505
|
pm_hash_node_closing_loc_set(node, &parser->previous);
|
16937
17506
|
|
16938
|
-
pm_static_literals_free(&literals);
|
16939
17507
|
return (pm_node_t *) node;
|
16940
17508
|
}
|
16941
17509
|
case PM_TOKEN_CHARACTER_LITERAL: {
|
@@ -17000,12 +17568,10 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
|
|
17000
17568
|
}
|
17001
17569
|
case PM_TOKEN_UCOLON_COLON: {
|
17002
17570
|
parser_lex(parser);
|
17003
|
-
|
17004
17571
|
pm_token_t delimiter = parser->previous;
|
17005
|
-
expect1(parser, PM_TOKEN_CONSTANT, PM_ERR_CONSTANT_PATH_COLON_COLON_CONSTANT);
|
17006
17572
|
|
17007
|
-
|
17008
|
-
pm_node_t *node = (pm_node_t *)pm_constant_path_node_create(parser, NULL, &delimiter,
|
17573
|
+
expect1(parser, PM_TOKEN_CONSTANT, PM_ERR_CONSTANT_PATH_COLON_COLON_CONSTANT);
|
17574
|
+
pm_node_t *node = (pm_node_t *) pm_constant_path_node_create(parser, NULL, &delimiter, &parser->previous);
|
17009
17575
|
|
17010
17576
|
if ((binding_power == PM_BINDING_POWER_STATEMENT) && match1(parser, PM_TOKEN_COMMA)) {
|
17011
17577
|
node = parse_targets_validate(parser, node, PM_BINDING_POWER_INDEX);
|
@@ -17165,8 +17731,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
|
|
17165
17731
|
if (match2(parser, PM_TOKEN_HEREDOC_END, PM_TOKEN_EOF)) {
|
17166
17732
|
// If we get here, then we have an empty heredoc. We'll create
|
17167
17733
|
// an empty content token and return an empty string node.
|
17168
|
-
|
17169
|
-
expect1(parser, PM_TOKEN_HEREDOC_END, PM_ERR_HEREDOC_TERM);
|
17734
|
+
expect1_heredoc_term(parser, lex_mode);
|
17170
17735
|
pm_token_t content = parse_strings_empty_content(parser->previous.start);
|
17171
17736
|
|
17172
17737
|
if (quote == PM_HEREDOC_QUOTE_BACKTICK) {
|
@@ -17207,8 +17772,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
|
|
17207
17772
|
}
|
17208
17773
|
|
17209
17774
|
node = (pm_node_t *) cast;
|
17210
|
-
|
17211
|
-
expect1(parser, PM_TOKEN_HEREDOC_END, PM_ERR_HEREDOC_TERM);
|
17775
|
+
expect1_heredoc_term(parser, lex_mode);
|
17212
17776
|
} else {
|
17213
17777
|
// If we get here, then we have multiple parts in the heredoc,
|
17214
17778
|
// so we'll need to create an interpolated string node to hold
|
@@ -17230,20 +17794,18 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
|
|
17230
17794
|
pm_interpolated_x_string_node_t *cast = pm_interpolated_xstring_node_create(parser, &opening, &opening);
|
17231
17795
|
cast->parts = parts;
|
17232
17796
|
|
17233
|
-
|
17234
|
-
expect1(parser, PM_TOKEN_HEREDOC_END, PM_ERR_HEREDOC_TERM);
|
17235
|
-
|
17797
|
+
expect1_heredoc_term(parser, lex_mode);
|
17236
17798
|
pm_interpolated_xstring_node_closing_set(cast, &parser->previous);
|
17799
|
+
|
17237
17800
|
cast->base.location = cast->opening_loc;
|
17238
17801
|
node = (pm_node_t *) cast;
|
17239
17802
|
} else {
|
17240
17803
|
pm_interpolated_string_node_t *cast = pm_interpolated_string_node_create(parser, &opening, &parts, &opening);
|
17241
17804
|
pm_node_list_free(&parts);
|
17242
17805
|
|
17243
|
-
|
17244
|
-
expect1(parser, PM_TOKEN_HEREDOC_END, PM_ERR_HEREDOC_TERM);
|
17245
|
-
|
17806
|
+
expect1_heredoc_term(parser, lex_mode);
|
17246
17807
|
pm_interpolated_string_node_closing_set(cast, &parser->previous);
|
17808
|
+
|
17247
17809
|
cast->base.location = cast->opening_loc;
|
17248
17810
|
node = (pm_node_t *) cast;
|
17249
17811
|
}
|
@@ -17464,7 +18026,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
|
|
17464
18026
|
pm_token_t in_keyword = parser->previous;
|
17465
18027
|
|
17466
18028
|
pm_constant_id_list_t captures = { 0 };
|
17467
|
-
pm_node_t *pattern = parse_pattern(parser, &captures,
|
18029
|
+
pm_node_t *pattern = parse_pattern(parser, &captures, PM_PARSE_PATTERN_TOP | PM_PARSE_PATTERN_MULTI, PM_ERR_PATTERN_EXPRESSION_AFTER_IN);
|
17468
18030
|
|
17469
18031
|
parser->pattern_matching_newlines = previous_pattern_matching_newlines;
|
17470
18032
|
pm_constant_id_list_free(&captures);
|
@@ -17493,7 +18055,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
|
|
17493
18055
|
then_keyword = not_provided(parser);
|
17494
18056
|
}
|
17495
18057
|
} else {
|
17496
|
-
expect1(parser, PM_TOKEN_KEYWORD_THEN,
|
18058
|
+
expect1(parser, PM_TOKEN_KEYWORD_THEN, PM_ERR_EXPECT_IN_DELIMITER);
|
17497
18059
|
then_keyword = parser->previous;
|
17498
18060
|
}
|
17499
18061
|
|
@@ -17947,7 +18509,12 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
|
|
17947
18509
|
lex_state_set(parser, PM_LEX_STATE_BEG);
|
17948
18510
|
parser->command_start = true;
|
17949
18511
|
|
17950
|
-
|
18512
|
+
if (!accept1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) {
|
18513
|
+
PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_DEF_PARAMS_TERM_PAREN, pm_token_type_human(parser->current.type));
|
18514
|
+
parser->previous.start = parser->previous.end;
|
18515
|
+
parser->previous.type = PM_TOKEN_MISSING;
|
18516
|
+
}
|
18517
|
+
|
17951
18518
|
rparen = parser->previous;
|
17952
18519
|
break;
|
17953
18520
|
}
|
@@ -18145,7 +18712,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
|
|
18145
18712
|
if (match1(parser, PM_TOKEN_COMMA)) {
|
18146
18713
|
index = parse_targets(parser, index, PM_BINDING_POWER_INDEX);
|
18147
18714
|
} else {
|
18148
|
-
index = parse_target(parser, index);
|
18715
|
+
index = parse_target(parser, index, false);
|
18149
18716
|
}
|
18150
18717
|
|
18151
18718
|
context_pop(parser);
|
@@ -18267,9 +18834,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
|
|
18267
18834
|
pm_token_t double_colon = parser->previous;
|
18268
18835
|
|
18269
18836
|
expect1(parser, PM_TOKEN_CONSTANT, PM_ERR_CONSTANT_PATH_COLON_COLON_CONSTANT);
|
18270
|
-
|
18271
|
-
|
18272
|
-
constant_path = (pm_node_t *) pm_constant_path_node_create(parser, constant_path, &double_colon, constant);
|
18837
|
+
constant_path = (pm_node_t *) pm_constant_path_node_create(parser, constant_path, &double_colon, &parser->previous);
|
18273
18838
|
}
|
18274
18839
|
|
18275
18840
|
// Here we retrieve the name of the module. If it wasn't a constant,
|
@@ -18649,15 +19214,15 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
|
|
18649
19214
|
// If we hit string content and the current node is
|
18650
19215
|
// an interpolated string, then we need to append
|
18651
19216
|
// the string content to the list of child nodes.
|
18652
|
-
pm_interpolated_string_node_append(
|
19217
|
+
pm_interpolated_string_node_append((pm_interpolated_string_node_t *) current, string);
|
18653
19218
|
} else if (PM_NODE_TYPE_P(current, PM_STRING_NODE)) {
|
18654
19219
|
// If we hit string content and the current node is
|
18655
19220
|
// a string node, then we need to convert the
|
18656
19221
|
// current node into an interpolated string and add
|
18657
19222
|
// the string content to the list of child nodes.
|
18658
19223
|
pm_interpolated_string_node_t *interpolated = pm_interpolated_string_node_create(parser, &opening, NULL, &closing);
|
18659
|
-
pm_interpolated_string_node_append(
|
18660
|
-
pm_interpolated_string_node_append(
|
19224
|
+
pm_interpolated_string_node_append(interpolated, current);
|
19225
|
+
pm_interpolated_string_node_append(interpolated, string);
|
18661
19226
|
current = (pm_node_t *) interpolated;
|
18662
19227
|
} else {
|
18663
19228
|
assert(false && "unreachable");
|
@@ -18682,7 +19247,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
|
|
18682
19247
|
pm_token_t opening = not_provided(parser);
|
18683
19248
|
pm_token_t closing = not_provided(parser);
|
18684
19249
|
pm_interpolated_string_node_t *interpolated = pm_interpolated_string_node_create(parser, &opening, NULL, &closing);
|
18685
|
-
pm_interpolated_string_node_append(
|
19250
|
+
pm_interpolated_string_node_append(interpolated, current);
|
18686
19251
|
current = (pm_node_t *) interpolated;
|
18687
19252
|
} else {
|
18688
19253
|
// If we hit an embedded variable and the current
|
@@ -18691,7 +19256,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
|
|
18691
19256
|
}
|
18692
19257
|
|
18693
19258
|
pm_node_t *part = parse_string_part(parser);
|
18694
|
-
pm_interpolated_string_node_append(
|
19259
|
+
pm_interpolated_string_node_append((pm_interpolated_string_node_t *) current, part);
|
18695
19260
|
break;
|
18696
19261
|
}
|
18697
19262
|
case PM_TOKEN_EMBEXPR_BEGIN: {
|
@@ -18711,7 +19276,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
|
|
18711
19276
|
pm_token_t opening = not_provided(parser);
|
18712
19277
|
pm_token_t closing = not_provided(parser);
|
18713
19278
|
pm_interpolated_string_node_t *interpolated = pm_interpolated_string_node_create(parser, &opening, NULL, &closing);
|
18714
|
-
pm_interpolated_string_node_append(
|
19279
|
+
pm_interpolated_string_node_append(interpolated, current);
|
18715
19280
|
current = (pm_node_t *) interpolated;
|
18716
19281
|
} else if (PM_NODE_TYPE_P(current, PM_INTERPOLATED_STRING_NODE)) {
|
18717
19282
|
// If we hit an embedded expression and the current
|
@@ -18722,7 +19287,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
|
|
18722
19287
|
}
|
18723
19288
|
|
18724
19289
|
pm_node_t *part = parse_string_part(parser);
|
18725
|
-
pm_interpolated_string_node_append(
|
19290
|
+
pm_interpolated_string_node_append((pm_interpolated_string_node_t *) current, part);
|
18726
19291
|
break;
|
18727
19292
|
}
|
18728
19293
|
default:
|
@@ -18798,6 +19363,14 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
|
|
18798
19363
|
pm_token_t opening = not_provided(parser);
|
18799
19364
|
pm_token_t closing = not_provided(parser);
|
18800
19365
|
pm_node_t *part = (pm_node_t *) pm_string_node_create_unescaped(parser, &opening, &parser->previous, &closing, &unescaped);
|
19366
|
+
|
19367
|
+
if (parser->encoding == PM_ENCODING_US_ASCII_ENTRY) {
|
19368
|
+
// This is extremely strange, but the first string part of a
|
19369
|
+
// regular expression will always be tagged as binary if we
|
19370
|
+
// are in a US-ASCII file, no matter its contents.
|
19371
|
+
pm_node_flag_set(part, PM_STRING_FLAGS_FORCED_BINARY_ENCODING);
|
19372
|
+
}
|
19373
|
+
|
18801
19374
|
pm_interpolated_regular_expression_node_append(interpolated, part);
|
18802
19375
|
} else {
|
18803
19376
|
// If the first part of the body of the regular expression is not a
|
@@ -18926,7 +19499,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
|
|
18926
19499
|
if (match1(parser, PM_TOKEN_COMMA)) {
|
18927
19500
|
return parse_targets_validate(parser, splat, PM_BINDING_POWER_INDEX);
|
18928
19501
|
} else {
|
18929
|
-
return parse_target_validate(parser, splat);
|
19502
|
+
return parse_target_validate(parser, splat, true);
|
18930
19503
|
}
|
18931
19504
|
}
|
18932
19505
|
case PM_TOKEN_BANG: {
|
@@ -19271,39 +19844,6 @@ parse_call_operator_write(pm_parser_t *parser, pm_call_node_t *call_node, const
|
|
19271
19844
|
}
|
19272
19845
|
}
|
19273
19846
|
|
19274
|
-
/**
|
19275
|
-
* Returns true if the name of the capture group is a valid local variable that
|
19276
|
-
* can be written to.
|
19277
|
-
*/
|
19278
|
-
static bool
|
19279
|
-
parse_regular_expression_named_capture(pm_parser_t *parser, const uint8_t *source, size_t length) {
|
19280
|
-
if (length == 0) {
|
19281
|
-
return false;
|
19282
|
-
}
|
19283
|
-
|
19284
|
-
// First ensure that it starts with a valid identifier starting character.
|
19285
|
-
size_t width = char_is_identifier_start(parser, source);
|
19286
|
-
if (!width) {
|
19287
|
-
return false;
|
19288
|
-
}
|
19289
|
-
|
19290
|
-
// Next, ensure that it's not an uppercase character.
|
19291
|
-
if (parser->encoding_changed) {
|
19292
|
-
if (parser->encoding->isupper_char(source, (ptrdiff_t) length)) return false;
|
19293
|
-
} else {
|
19294
|
-
if (pm_encoding_utf_8_isupper_char(source, (ptrdiff_t) length)) return false;
|
19295
|
-
}
|
19296
|
-
|
19297
|
-
// Next, iterate through all of the bytes of the string to ensure that they
|
19298
|
-
// are all valid identifier characters.
|
19299
|
-
const uint8_t *cursor = source + width;
|
19300
|
-
while (cursor < source + length && (width = char_is_identifier(parser, cursor))) {
|
19301
|
-
cursor += width;
|
19302
|
-
}
|
19303
|
-
|
19304
|
-
return cursor == source + length;
|
19305
|
-
}
|
19306
|
-
|
19307
19847
|
/**
|
19308
19848
|
* Potentially change a =~ with a regular expression with named captures into a
|
19309
19849
|
* match write node.
|
@@ -19330,7 +19870,7 @@ parse_regular_expression_named_captures(pm_parser_t *parser, const pm_string_t *
|
|
19330
19870
|
|
19331
19871
|
// If the name of the capture group isn't a valid identifier, we do
|
19332
19872
|
// not add it to the local table.
|
19333
|
-
if (!
|
19873
|
+
if (!pm_slice_is_valid_local(parser, source, source + length)) continue;
|
19334
19874
|
|
19335
19875
|
if (content->type == PM_STRING_SHARED) {
|
19336
19876
|
// If the unescaped string is a slice of the source, then we can
|
@@ -19788,7 +20328,7 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
|
|
19788
20328
|
// In this case we have an operator but we don't know what it's for.
|
19789
20329
|
// We need to treat it as an error. For now, we'll mark it as an error
|
19790
20330
|
// and just skip right past it.
|
19791
|
-
|
20331
|
+
PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->previous, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, pm_token_type_human(parser->current.type));
|
19792
20332
|
return node;
|
19793
20333
|
}
|
19794
20334
|
}
|
@@ -20059,8 +20599,7 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
|
|
20059
20599
|
path = (pm_node_t *) pm_call_node_call_create(parser, node, &delimiter, &message, &arguments);
|
20060
20600
|
} else {
|
20061
20601
|
// Otherwise, this is a constant path. That would look like Foo::Bar.
|
20062
|
-
|
20063
|
-
path = (pm_node_t *)pm_constant_path_node_create(parser, node, &delimiter, child);
|
20602
|
+
path = (pm_node_t *) pm_constant_path_node_create(parser, node, &delimiter, &parser->previous);
|
20064
20603
|
}
|
20065
20604
|
|
20066
20605
|
// If this is followed by a comma then it is a multiple assignment.
|
@@ -20099,9 +20638,8 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
|
|
20099
20638
|
return (pm_node_t *) pm_call_node_shorthand_create(parser, node, &delimiter, &arguments);
|
20100
20639
|
}
|
20101
20640
|
default: {
|
20102
|
-
|
20103
|
-
|
20104
|
-
return (pm_node_t *)pm_constant_path_node_create(parser, node, &delimiter, child);
|
20641
|
+
expect1(parser, PM_TOKEN_CONSTANT, PM_ERR_CONSTANT_PATH_COLON_COLON_CONSTANT);
|
20642
|
+
return (pm_node_t *) pm_constant_path_node_create(parser, node, &delimiter, &parser->previous);
|
20105
20643
|
}
|
20106
20644
|
}
|
20107
20645
|
}
|
@@ -20172,7 +20710,7 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
|
|
20172
20710
|
parser_lex(parser);
|
20173
20711
|
|
20174
20712
|
pm_constant_id_list_t captures = { 0 };
|
20175
|
-
pm_node_t *pattern = parse_pattern(parser, &captures,
|
20713
|
+
pm_node_t *pattern = parse_pattern(parser, &captures, PM_PARSE_PATTERN_TOP | PM_PARSE_PATTERN_MULTI, PM_ERR_PATTERN_EXPRESSION_AFTER_IN);
|
20176
20714
|
|
20177
20715
|
parser->pattern_matching_newlines = previous_pattern_matching_newlines;
|
20178
20716
|
pm_constant_id_list_free(&captures);
|
@@ -20189,7 +20727,7 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
|
|
20189
20727
|
parser_lex(parser);
|
20190
20728
|
|
20191
20729
|
pm_constant_id_list_t captures = { 0 };
|
20192
|
-
pm_node_t *pattern = parse_pattern(parser, &captures,
|
20730
|
+
pm_node_t *pattern = parse_pattern(parser, &captures, PM_PARSE_PATTERN_TOP | PM_PARSE_PATTERN_MULTI, PM_ERR_PATTERN_EXPRESSION_AFTER_HROCKET);
|
20193
20731
|
|
20194
20732
|
parser->pattern_matching_newlines = previous_pattern_matching_newlines;
|
20195
20733
|
pm_constant_id_list_free(&captures);
|
@@ -20202,6 +20740,10 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
|
|
20202
20740
|
}
|
20203
20741
|
}
|
20204
20742
|
|
20743
|
+
#undef PM_PARSE_PATTERN_SINGLE
|
20744
|
+
#undef PM_PARSE_PATTERN_TOP
|
20745
|
+
#undef PM_PARSE_PATTERN_MULTI
|
20746
|
+
|
20205
20747
|
/**
|
20206
20748
|
* Parse an expression at the given point of the parser using the given binding
|
20207
20749
|
* power to parse subsequent chains. If this function finds a syntax error, it
|
@@ -21246,25 +21788,28 @@ pm_parser_errors_format(const pm_parser_t *parser, const pm_list_t *error_list,
|
|
21246
21788
|
pm_buffer_append_string(buffer, error_format.blank_prefix, error_format.blank_prefix_length);
|
21247
21789
|
|
21248
21790
|
size_t column = 0;
|
21249
|
-
while (column < error->
|
21250
|
-
|
21251
|
-
pm_buffer_append_byte(buffer, ' ');
|
21252
|
-
} else {
|
21253
|
-
const uint8_t caret = column == error->column_start ? '^' : '~';
|
21791
|
+
while (column < error->column_start) {
|
21792
|
+
pm_buffer_append_byte(buffer, ' ');
|
21254
21793
|
|
21255
|
-
|
21256
|
-
|
21257
|
-
|
21258
|
-
|
21259
|
-
|
21260
|
-
|
21261
|
-
|
21262
|
-
|
21794
|
+
size_t char_width = encoding->char_width(start + column, parser->end - (start + column));
|
21795
|
+
column += (char_width == 0 ? 1 : char_width);
|
21796
|
+
}
|
21797
|
+
|
21798
|
+
if (colorize) pm_buffer_append_string(buffer, PM_COLOR_RED, 7);
|
21799
|
+
pm_buffer_append_byte(buffer, '^');
|
21800
|
+
|
21801
|
+
size_t char_width = encoding->char_width(start + column, parser->end - (start + column));
|
21802
|
+
column += (char_width == 0 ? 1 : char_width);
|
21803
|
+
|
21804
|
+
while (column < error->column_end) {
|
21805
|
+
pm_buffer_append_byte(buffer, '~');
|
21263
21806
|
|
21264
21807
|
size_t char_width = encoding->char_width(start + column, parser->end - (start + column));
|
21265
21808
|
column += (char_width == 0 ? 1 : char_width);
|
21266
21809
|
}
|
21267
21810
|
|
21811
|
+
if (colorize) pm_buffer_append_string(buffer, PM_COLOR_RESET, 3);
|
21812
|
+
|
21268
21813
|
if (inline_messages) {
|
21269
21814
|
pm_buffer_append_byte(buffer, ' ');
|
21270
21815
|
assert(error->error != NULL);
|