prism 1.1.0 → 1.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +39 -1
- data/Makefile +1 -1
- data/config.yml +422 -3
- data/docs/build_system.md +8 -11
- data/docs/relocation.md +34 -0
- data/ext/prism/api_node.c +18 -10
- data/ext/prism/extconf.rb +13 -36
- data/ext/prism/extension.c +68 -0
- data/ext/prism/extension.h +1 -1
- data/include/prism/ast.h +427 -3
- data/include/prism/defines.h +22 -7
- data/include/prism/diagnostic.h +1 -0
- data/include/prism/parser.h +25 -12
- data/include/prism/version.h +2 -2
- data/include/prism.h +47 -0
- data/lib/prism/dot_visitor.rb +10 -0
- data/lib/prism/dsl.rb +4 -4
- data/lib/prism/ffi.rb +49 -2
- data/lib/prism/inspect_visitor.rb +2 -0
- data/lib/prism/node.rb +1839 -96
- data/lib/prism/parse_result/errors.rb +1 -1
- data/lib/prism/parse_result.rb +140 -3
- data/lib/prism/reflection.rb +2 -2
- data/lib/prism/relocation.rb +504 -0
- data/lib/prism/serialize.rb +17 -5
- data/lib/prism/string_query.rb +30 -0
- data/lib/prism/translation/parser/compiler.rb +36 -26
- data/lib/prism/translation/parser.rb +3 -3
- data/lib/prism/translation/ripper.rb +1 -5
- data/lib/prism/translation/ruby_parser.rb +14 -5
- data/lib/prism.rb +6 -4
- data/prism.gemspec +7 -1
- data/rbi/prism/dsl.rbi +4 -4
- data/rbi/prism/node.rbi +5118 -1030
- data/rbi/prism/parse_result.rbi +29 -0
- data/rbi/prism/string_query.rbi +12 -0
- data/rbi/prism.rbi +34 -34
- data/sig/prism/dsl.rbs +2 -2
- data/sig/prism/node.rbs +13 -98
- data/sig/prism/parse_result.rbs +20 -0
- data/sig/prism/relocation.rbs +185 -0
- data/sig/prism/string_query.rbs +11 -0
- data/src/diagnostic.c +3 -1
- data/src/node.c +18 -0
- data/src/prettyprint.c +32 -0
- data/src/prism.c +586 -195
- data/src/regexp.c +7 -3
- data/src/serialize.c +12 -0
- data/src/static_literals.c +1 -1
- data/src/util/pm_char.c +1 -1
- data/src/util/pm_string.c +1 -0
- metadata +9 -3
data/src/prism.c
CHANGED
@@ -544,10 +544,7 @@ pm_parser_warn_node(pm_parser_t *parser, const pm_node_t *node, pm_diagnostic_id
|
|
544
544
|
* token.
|
545
545
|
*/
|
546
546
|
static void
|
547
|
-
pm_parser_err_heredoc_term(pm_parser_t *parser,
|
548
|
-
const uint8_t *ident_start = lex_mode->as.heredoc.ident_start;
|
549
|
-
size_t ident_length = lex_mode->as.heredoc.ident_length;
|
550
|
-
|
547
|
+
pm_parser_err_heredoc_term(pm_parser_t *parser, const uint8_t *ident_start, size_t ident_length) {
|
551
548
|
PM_PARSER_ERR_FORMAT(
|
552
549
|
parser,
|
553
550
|
ident_start,
|
@@ -964,7 +961,7 @@ pm_locals_order(PRISM_ATTRIBUTE_UNUSED pm_parser_t *parser, pm_locals_t *locals,
|
|
964
961
|
if (local->name != PM_CONSTANT_ID_UNSET) {
|
965
962
|
pm_constant_id_list_insert(list, (size_t) local->index, local->name);
|
966
963
|
|
967
|
-
if (warn_unused && local->reads == 0) {
|
964
|
+
if (warn_unused && local->reads == 0 && ((parser->start_line >= 0) || (pm_newline_list_line(&parser->newline_list, local->location.start, parser->start_line) >= 0))) {
|
968
965
|
pm_constant_t *constant = pm_constant_pool_id_to_constant(&parser->constant_pool, local->name);
|
969
966
|
|
970
967
|
if (constant->length >= 1 && *constant->start != '_') {
|
@@ -2110,14 +2107,6 @@ pm_array_node_create(pm_parser_t *parser, const pm_token_t *opening) {
|
|
2110
2107
|
return node;
|
2111
2108
|
}
|
2112
2109
|
|
2113
|
-
/**
|
2114
|
-
* Return the size of the given array node.
|
2115
|
-
*/
|
2116
|
-
static inline size_t
|
2117
|
-
pm_array_node_size(pm_array_node_t *node) {
|
2118
|
-
return node->elements.size;
|
2119
|
-
}
|
2120
|
-
|
2121
2110
|
/**
|
2122
2111
|
* Append an argument to an array node.
|
2123
2112
|
*/
|
@@ -4153,7 +4142,7 @@ pm_double_parse(pm_parser_t *parser, const pm_token_t *token) {
|
|
4153
4142
|
|
4154
4143
|
// If errno is set, then it should only be ERANGE. At this point we need to
|
4155
4144
|
// check if it's infinity (it should be).
|
4156
|
-
if (errno == ERANGE &&
|
4145
|
+
if (errno == ERANGE && PRISM_ISINF(value)) {
|
4157
4146
|
int warn_width;
|
4158
4147
|
const char *ellipsis;
|
4159
4148
|
|
@@ -7695,7 +7684,7 @@ pm_loop_modifier_block_exits(pm_parser_t *parser, pm_statements_node_t *statemen
|
|
7695
7684
|
* Allocate a new UntilNode node.
|
7696
7685
|
*/
|
7697
7686
|
static pm_until_node_t *
|
7698
|
-
pm_until_node_create(pm_parser_t *parser, const pm_token_t *keyword, const pm_token_t *closing, pm_node_t *predicate, pm_statements_node_t *statements, pm_node_flags_t flags) {
|
7687
|
+
pm_until_node_create(pm_parser_t *parser, const pm_token_t *keyword, const pm_token_t *do_keyword, const pm_token_t *closing, pm_node_t *predicate, pm_statements_node_t *statements, pm_node_flags_t flags) {
|
7699
7688
|
pm_until_node_t *node = PM_NODE_ALLOC(parser, pm_until_node_t);
|
7700
7689
|
pm_conditional_predicate(parser, predicate, PM_CONDITIONAL_PREDICATE_TYPE_CONDITIONAL);
|
7701
7690
|
|
@@ -7710,6 +7699,7 @@ pm_until_node_create(pm_parser_t *parser, const pm_token_t *keyword, const pm_to
|
|
7710
7699
|
},
|
7711
7700
|
},
|
7712
7701
|
.keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword),
|
7702
|
+
.do_keyword_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(do_keyword),
|
7713
7703
|
.closing_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(closing),
|
7714
7704
|
.predicate = predicate,
|
7715
7705
|
.statements = statements
|
@@ -7738,6 +7728,7 @@ pm_until_node_modifier_create(pm_parser_t *parser, const pm_token_t *keyword, pm
|
|
7738
7728
|
},
|
7739
7729
|
},
|
7740
7730
|
.keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword),
|
7731
|
+
.do_keyword_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE,
|
7741
7732
|
.closing_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE,
|
7742
7733
|
.predicate = predicate,
|
7743
7734
|
.statements = statements
|
@@ -7805,7 +7796,7 @@ pm_when_node_statements_set(pm_when_node_t *node, pm_statements_node_t *statemen
|
|
7805
7796
|
* Allocate a new WhileNode node.
|
7806
7797
|
*/
|
7807
7798
|
static pm_while_node_t *
|
7808
|
-
pm_while_node_create(pm_parser_t *parser, const pm_token_t *keyword, const pm_token_t *closing, pm_node_t *predicate, pm_statements_node_t *statements, pm_node_flags_t flags) {
|
7799
|
+
pm_while_node_create(pm_parser_t *parser, const pm_token_t *keyword, const pm_token_t *do_keyword, const pm_token_t *closing, pm_node_t *predicate, pm_statements_node_t *statements, pm_node_flags_t flags) {
|
7809
7800
|
pm_while_node_t *node = PM_NODE_ALLOC(parser, pm_while_node_t);
|
7810
7801
|
pm_conditional_predicate(parser, predicate, PM_CONDITIONAL_PREDICATE_TYPE_CONDITIONAL);
|
7811
7802
|
|
@@ -7820,6 +7811,7 @@ pm_while_node_create(pm_parser_t *parser, const pm_token_t *keyword, const pm_to
|
|
7820
7811
|
},
|
7821
7812
|
},
|
7822
7813
|
.keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword),
|
7814
|
+
.do_keyword_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(do_keyword),
|
7823
7815
|
.closing_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(closing),
|
7824
7816
|
.predicate = predicate,
|
7825
7817
|
.statements = statements
|
@@ -7848,6 +7840,7 @@ pm_while_node_modifier_create(pm_parser_t *parser, const pm_token_t *keyword, pm
|
|
7848
7840
|
},
|
7849
7841
|
},
|
7850
7842
|
.keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword),
|
7843
|
+
.do_keyword_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE,
|
7851
7844
|
.closing_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE,
|
7852
7845
|
.predicate = predicate,
|
7853
7846
|
.statements = statements
|
@@ -7870,6 +7863,7 @@ pm_while_node_synthesized_create(pm_parser_t *parser, pm_node_t *predicate, pm_s
|
|
7870
7863
|
.location = PM_LOCATION_NULL_VALUE(parser)
|
7871
7864
|
},
|
7872
7865
|
.keyword_loc = PM_LOCATION_NULL_VALUE(parser),
|
7866
|
+
.do_keyword_loc = PM_LOCATION_NULL_VALUE(parser),
|
7873
7867
|
.closing_loc = PM_LOCATION_NULL_VALUE(parser),
|
7874
7868
|
.predicate = predicate,
|
7875
7869
|
.statements = statements
|
@@ -8573,6 +8567,7 @@ context_terminator(pm_context_t context, pm_token_t *token) {
|
|
8573
8567
|
case PM_CONTEXT_MAIN:
|
8574
8568
|
case PM_CONTEXT_DEF_PARAMS:
|
8575
8569
|
case PM_CONTEXT_DEFINED:
|
8570
|
+
case PM_CONTEXT_MULTI_TARGET:
|
8576
8571
|
case PM_CONTEXT_TERNARY:
|
8577
8572
|
case PM_CONTEXT_RESCUE_MODIFIER:
|
8578
8573
|
return token->type == PM_TOKEN_EOF;
|
@@ -8777,6 +8772,7 @@ context_human(pm_context_t context) {
|
|
8777
8772
|
case PM_CONTEXT_LOOP_PREDICATE: return "loop predicate";
|
8778
8773
|
case PM_CONTEXT_MAIN: return "top level context";
|
8779
8774
|
case PM_CONTEXT_MODULE: return "module definition";
|
8775
|
+
case PM_CONTEXT_MULTI_TARGET: return "multiple targets";
|
8780
8776
|
case PM_CONTEXT_PARENS: return "parentheses";
|
8781
8777
|
case PM_CONTEXT_POSTEXE: return "'END' block";
|
8782
8778
|
case PM_CONTEXT_PREDICATE: return "predicate";
|
@@ -9051,6 +9047,10 @@ lex_global_variable(pm_parser_t *parser) {
|
|
9051
9047
|
return PM_TOKEN_GLOBAL_VARIABLE;
|
9052
9048
|
}
|
9053
9049
|
|
9050
|
+
// True if multiple characters are allowed after the declaration of the
|
9051
|
+
// global variable. Not true when it starts with "$-".
|
9052
|
+
bool allow_multiple = true;
|
9053
|
+
|
9054
9054
|
switch (*parser->current.end) {
|
9055
9055
|
case '~': // $~: match-data
|
9056
9056
|
case '*': // $*: argv
|
@@ -9109,14 +9109,15 @@ lex_global_variable(pm_parser_t *parser) {
|
|
9109
9109
|
|
9110
9110
|
case '-':
|
9111
9111
|
parser->current.end++;
|
9112
|
-
|
9112
|
+
allow_multiple = false;
|
9113
|
+
PRISM_FALLTHROUGH
|
9113
9114
|
default: {
|
9114
9115
|
size_t width;
|
9115
9116
|
|
9116
9117
|
if ((width = char_is_identifier(parser, parser->current.end)) > 0) {
|
9117
9118
|
do {
|
9118
9119
|
parser->current.end += width;
|
9119
|
-
} while (parser->current.end < parser->end && (width = char_is_identifier(parser, parser->current.end)) > 0);
|
9120
|
+
} while (allow_multiple && parser->current.end < parser->end && (width = char_is_identifier(parser, parser->current.end)) > 0);
|
9120
9121
|
} else if (pm_char_is_whitespace(peek(parser))) {
|
9121
9122
|
// If we get here, then we have a $ followed by whitespace,
|
9122
9123
|
// which is not allowed.
|
@@ -9881,6 +9882,10 @@ escape_read(pm_parser_t *parser, pm_buffer_t *buffer, pm_buffer_t *regular_expre
|
|
9881
9882
|
}
|
9882
9883
|
case 'c': {
|
9883
9884
|
parser->current.end++;
|
9885
|
+
if (flags & PM_ESCAPE_FLAG_CONTROL) {
|
9886
|
+
pm_parser_err_current(parser, PM_ERR_ESCAPE_INVALID_CONTROL_REPEAT);
|
9887
|
+
}
|
9888
|
+
|
9884
9889
|
if (parser->current.end == parser->end) {
|
9885
9890
|
pm_parser_err_current(parser, PM_ERR_ESCAPE_INVALID_CONTROL);
|
9886
9891
|
return;
|
@@ -9894,10 +9899,6 @@ escape_read(pm_parser_t *parser, pm_buffer_t *buffer, pm_buffer_t *regular_expre
|
|
9894
9899
|
return;
|
9895
9900
|
}
|
9896
9901
|
case '\\':
|
9897
|
-
if (flags & PM_ESCAPE_FLAG_CONTROL) {
|
9898
|
-
pm_parser_err_current(parser, PM_ERR_ESCAPE_INVALID_CONTROL_REPEAT);
|
9899
|
-
return;
|
9900
|
-
}
|
9901
9902
|
parser->current.end++;
|
9902
9903
|
|
9903
9904
|
if (match(parser, 'u') || match(parser, 'U')) {
|
@@ -9931,6 +9932,10 @@ escape_read(pm_parser_t *parser, pm_buffer_t *buffer, pm_buffer_t *regular_expre
|
|
9931
9932
|
}
|
9932
9933
|
case 'C': {
|
9933
9934
|
parser->current.end++;
|
9935
|
+
if (flags & PM_ESCAPE_FLAG_CONTROL) {
|
9936
|
+
pm_parser_err_current(parser, PM_ERR_ESCAPE_INVALID_CONTROL_REPEAT);
|
9937
|
+
}
|
9938
|
+
|
9934
9939
|
if (peek(parser) != '-') {
|
9935
9940
|
size_t width = parser->encoding->char_width(parser->current.end, parser->end - parser->current.end);
|
9936
9941
|
pm_parser_err(parser, parser->current.start, parser->current.end + width, PM_ERR_ESCAPE_INVALID_CONTROL);
|
@@ -9951,10 +9956,6 @@ escape_read(pm_parser_t *parser, pm_buffer_t *buffer, pm_buffer_t *regular_expre
|
|
9951
9956
|
return;
|
9952
9957
|
}
|
9953
9958
|
case '\\':
|
9954
|
-
if (flags & PM_ESCAPE_FLAG_CONTROL) {
|
9955
|
-
pm_parser_err_current(parser, PM_ERR_ESCAPE_INVALID_CONTROL_REPEAT);
|
9956
|
-
return;
|
9957
|
-
}
|
9958
9959
|
parser->current.end++;
|
9959
9960
|
|
9960
9961
|
if (match(parser, 'u') || match(parser, 'U')) {
|
@@ -9989,6 +9990,10 @@ escape_read(pm_parser_t *parser, pm_buffer_t *buffer, pm_buffer_t *regular_expre
|
|
9989
9990
|
}
|
9990
9991
|
case 'M': {
|
9991
9992
|
parser->current.end++;
|
9993
|
+
if (flags & PM_ESCAPE_FLAG_META) {
|
9994
|
+
pm_parser_err_current(parser, PM_ERR_ESCAPE_INVALID_META_REPEAT);
|
9995
|
+
}
|
9996
|
+
|
9992
9997
|
if (peek(parser) != '-') {
|
9993
9998
|
size_t width = parser->encoding->char_width(parser->current.end, parser->end - parser->current.end);
|
9994
9999
|
pm_parser_err(parser, parser->current.start, parser->current.end + width, PM_ERR_ESCAPE_INVALID_META);
|
@@ -10004,10 +10009,6 @@ escape_read(pm_parser_t *parser, pm_buffer_t *buffer, pm_buffer_t *regular_expre
|
|
10004
10009
|
uint8_t peeked = peek(parser);
|
10005
10010
|
switch (peeked) {
|
10006
10011
|
case '\\':
|
10007
|
-
if (flags & PM_ESCAPE_FLAG_META) {
|
10008
|
-
pm_parser_err_current(parser, PM_ERR_ESCAPE_INVALID_META_REPEAT);
|
10009
|
-
return;
|
10010
|
-
}
|
10011
10012
|
parser->current.end++;
|
10012
10013
|
|
10013
10014
|
if (match(parser, 'u') || match(parser, 'U')) {
|
@@ -10045,11 +10046,13 @@ escape_read(pm_parser_t *parser, pm_buffer_t *buffer, pm_buffer_t *regular_expre
|
|
10045
10046
|
escape_write_byte_encoded(parser, buffer, escape_byte('\n', flags));
|
10046
10047
|
return;
|
10047
10048
|
}
|
10049
|
+
PRISM_FALLTHROUGH
|
10048
10050
|
}
|
10049
|
-
/* fallthrough */
|
10050
10051
|
default: {
|
10051
10052
|
if (parser->current.end < parser->end) {
|
10052
10053
|
escape_write_escape_encoded(parser, buffer);
|
10054
|
+
} else {
|
10055
|
+
pm_parser_err_current(parser, PM_ERR_INVALID_ESCAPE_CHARACTER);
|
10053
10056
|
}
|
10054
10057
|
return;
|
10055
10058
|
}
|
@@ -10498,6 +10501,7 @@ pm_token_buffer_escape(pm_parser_t *parser, pm_token_buffer_t *token_buffer) {
|
|
10498
10501
|
}
|
10499
10502
|
|
10500
10503
|
const uint8_t *end = parser->current.end - 1;
|
10504
|
+
assert(end >= start);
|
10501
10505
|
pm_buffer_append_bytes(&token_buffer->buffer, start, (size_t) (end - start));
|
10502
10506
|
|
10503
10507
|
token_buffer->cursor = end;
|
@@ -10578,9 +10582,15 @@ pm_lex_percent_delimiter(pm_parser_t *parser) {
|
|
10578
10582
|
pm_newline_list_append(&parser->newline_list, parser->current.end + eol_length - 1);
|
10579
10583
|
}
|
10580
10584
|
|
10581
|
-
|
10582
|
-
parser->current.end += eol_length;
|
10585
|
+
uint8_t delimiter = *parser->current.end;
|
10583
10586
|
|
10587
|
+
// If our delimiter is \r\n, we want to treat it as if it's \n.
|
10588
|
+
// For example, %\r\nfoo\r\n should be "foo"
|
10589
|
+
if (eol_length == 2) {
|
10590
|
+
delimiter = *(parser->current.end + 1);
|
10591
|
+
}
|
10592
|
+
|
10593
|
+
parser->current.end += eol_length;
|
10584
10594
|
return delimiter;
|
10585
10595
|
}
|
10586
10596
|
|
@@ -10690,6 +10700,14 @@ parser_lex(pm_parser_t *parser) {
|
|
10690
10700
|
// We'll check if we're at the end of the file. If we are, then we
|
10691
10701
|
// need to return the EOF token.
|
10692
10702
|
if (parser->current.end >= parser->end) {
|
10703
|
+
// If we hit EOF, but the EOF came immediately after a newline,
|
10704
|
+
// set the start of the token to the newline. This way any EOF
|
10705
|
+
// errors will be reported as happening on that line rather than
|
10706
|
+
// a line after. For example "foo(\n" should report an error
|
10707
|
+
// on line 1 even though EOF technically occurs on line 2.
|
10708
|
+
if (parser->current.start > parser->start && (*(parser->current.start - 1) == '\n')) {
|
10709
|
+
parser->current.start -= 1;
|
10710
|
+
}
|
10693
10711
|
LEX(PM_TOKEN_EOF);
|
10694
10712
|
}
|
10695
10713
|
|
@@ -10732,7 +10750,7 @@ parser_lex(pm_parser_t *parser) {
|
|
10732
10750
|
|
10733
10751
|
lexed_comment = true;
|
10734
10752
|
}
|
10735
|
-
|
10753
|
+
PRISM_FALLTHROUGH
|
10736
10754
|
case '\r':
|
10737
10755
|
case '\n': {
|
10738
10756
|
parser->semantic_token_seen = semantic_token_seen & 0x1;
|
@@ -10774,7 +10792,7 @@ parser_lex(pm_parser_t *parser) {
|
|
10774
10792
|
parser->current.type = PM_TOKEN_NEWLINE;
|
10775
10793
|
return;
|
10776
10794
|
}
|
10777
|
-
|
10795
|
+
PRISM_FALLTHROUGH
|
10778
10796
|
case PM_IGNORED_NEWLINE_ALL:
|
10779
10797
|
if (!lexed_comment) parser_lex_ignored_newline(parser);
|
10780
10798
|
lexed_comment = false;
|
@@ -10871,6 +10889,10 @@ parser_lex(pm_parser_t *parser) {
|
|
10871
10889
|
|
10872
10890
|
// ,
|
10873
10891
|
case ',':
|
10892
|
+
if ((parser->previous.type == PM_TOKEN_COMMA) && (parser->enclosure_nesting > 0)) {
|
10893
|
+
PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_ARRAY_TERM, pm_token_type_human(parser->current.type));
|
10894
|
+
}
|
10895
|
+
|
10874
10896
|
lex_state_set(parser, PM_LEX_STATE_BEG | PM_LEX_STATE_LABEL);
|
10875
10897
|
LEX(PM_TOKEN_COMMA);
|
10876
10898
|
|
@@ -11153,12 +11175,14 @@ parser_lex(pm_parser_t *parser) {
|
|
11153
11175
|
lex_mode_push(parser, (pm_lex_mode_t) {
|
11154
11176
|
.mode = PM_LEX_HEREDOC,
|
11155
11177
|
.as.heredoc = {
|
11156
|
-
.
|
11157
|
-
|
11178
|
+
.base = {
|
11179
|
+
.ident_start = ident_start,
|
11180
|
+
.ident_length = ident_length,
|
11181
|
+
.quote = quote,
|
11182
|
+
.indent = indent
|
11183
|
+
},
|
11158
11184
|
.next_start = parser->current.end,
|
11159
|
-
.
|
11160
|
-
.indent = indent,
|
11161
|
-
.common_whitespace = (size_t) -1,
|
11185
|
+
.common_whitespace = NULL,
|
11162
11186
|
.line_continuation = false
|
11163
11187
|
}
|
11164
11188
|
});
|
@@ -11171,7 +11195,7 @@ parser_lex(pm_parser_t *parser) {
|
|
11171
11195
|
// this is not a valid heredoc declaration. In this case we
|
11172
11196
|
// will add an error, but we will still return a heredoc
|
11173
11197
|
// start.
|
11174
|
-
if (!ident_error) pm_parser_err_heredoc_term(parser,
|
11198
|
+
if (!ident_error) pm_parser_err_heredoc_term(parser, ident_start, ident_length);
|
11175
11199
|
body_start = parser->end;
|
11176
11200
|
} else {
|
11177
11201
|
// Otherwise, we want to indicate that the body of the
|
@@ -11783,7 +11807,7 @@ parser_lex(pm_parser_t *parser) {
|
|
11783
11807
|
PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_UNEXPECTED_TOKEN_IGNORE, "escaped carriage return");
|
11784
11808
|
break;
|
11785
11809
|
}
|
11786
|
-
|
11810
|
+
PRISM_FALLTHROUGH
|
11787
11811
|
default:
|
11788
11812
|
PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_UNEXPECTED_TOKEN_IGNORE, "backslash");
|
11789
11813
|
break;
|
@@ -11980,7 +12004,7 @@ parser_lex(pm_parser_t *parser) {
|
|
11980
12004
|
pm_token_buffer_push_byte(&token_buffer, '\r');
|
11981
12005
|
break;
|
11982
12006
|
}
|
11983
|
-
|
12007
|
+
PRISM_FALLTHROUGH
|
11984
12008
|
case '\n':
|
11985
12009
|
pm_token_buffer_push_byte(&token_buffer, '\n');
|
11986
12010
|
|
@@ -12084,9 +12108,28 @@ parser_lex(pm_parser_t *parser) {
|
|
12084
12108
|
pm_regexp_token_buffer_t token_buffer = { 0 };
|
12085
12109
|
|
12086
12110
|
while (breakpoint != NULL) {
|
12111
|
+
uint8_t term = lex_mode->as.regexp.terminator;
|
12112
|
+
bool is_terminator = (*breakpoint == term);
|
12113
|
+
|
12114
|
+
// If the terminator is newline, we need to consider \r\n _also_ a newline
|
12115
|
+
// For example: `%\nfoo\r\n`
|
12116
|
+
// The string should be "foo", not "foo\r"
|
12117
|
+
if (*breakpoint == '\r' && peek_at(parser, breakpoint + 1) == '\n') {
|
12118
|
+
if (term == '\n') {
|
12119
|
+
is_terminator = true;
|
12120
|
+
}
|
12121
|
+
|
12122
|
+
// If the terminator is a CR, but we see a CRLF, we need to
|
12123
|
+
// treat the CRLF as a newline, meaning this is _not_ the
|
12124
|
+
// terminator
|
12125
|
+
if (term == '\r') {
|
12126
|
+
is_terminator = false;
|
12127
|
+
}
|
12128
|
+
}
|
12129
|
+
|
12087
12130
|
// If we hit the terminator, we need to determine what kind of
|
12088
12131
|
// token to return.
|
12089
|
-
if (
|
12132
|
+
if (is_terminator) {
|
12090
12133
|
if (lex_mode->as.regexp.nesting > 0) {
|
12091
12134
|
parser->current.end = breakpoint + 1;
|
12092
12135
|
breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, false);
|
@@ -12148,7 +12191,7 @@ parser_lex(pm_parser_t *parser) {
|
|
12148
12191
|
pm_regexp_token_buffer_escape(parser, &token_buffer);
|
12149
12192
|
token_buffer.base.cursor = breakpoint;
|
12150
12193
|
|
12151
|
-
|
12194
|
+
PRISM_FALLTHROUGH
|
12152
12195
|
case '\n':
|
12153
12196
|
// If we've hit a newline, then we need to track that in
|
12154
12197
|
// the list of newlines.
|
@@ -12190,7 +12233,7 @@ parser_lex(pm_parser_t *parser) {
|
|
12190
12233
|
pm_token_buffer_push_byte(&token_buffer.base, '\r');
|
12191
12234
|
break;
|
12192
12235
|
}
|
12193
|
-
|
12236
|
+
PRISM_FALLTHROUGH
|
12194
12237
|
case '\n':
|
12195
12238
|
if (parser->heredoc_end) {
|
12196
12239
|
// ... if we are on the same line as a heredoc,
|
@@ -12316,10 +12359,29 @@ parser_lex(pm_parser_t *parser) {
|
|
12316
12359
|
continue;
|
12317
12360
|
}
|
12318
12361
|
|
12362
|
+
uint8_t term = lex_mode->as.string.terminator;
|
12363
|
+
bool is_terminator = (*breakpoint == term);
|
12364
|
+
|
12365
|
+
// If the terminator is newline, we need to consider \r\n _also_ a newline
|
12366
|
+
// For example: `%r\nfoo\r\n`
|
12367
|
+
// The string should be /foo/, not /foo\r/
|
12368
|
+
if (*breakpoint == '\r' && peek_at(parser, breakpoint + 1) == '\n') {
|
12369
|
+
if (term == '\n') {
|
12370
|
+
is_terminator = true;
|
12371
|
+
}
|
12372
|
+
|
12373
|
+
// If the terminator is a CR, but we see a CRLF, we need to
|
12374
|
+
// treat the CRLF as a newline, meaning this is _not_ the
|
12375
|
+
// terminator
|
12376
|
+
if (term == '\r') {
|
12377
|
+
is_terminator = false;
|
12378
|
+
}
|
12379
|
+
}
|
12380
|
+
|
12319
12381
|
// Note that we have to check the terminator here first because we could
|
12320
12382
|
// potentially be parsing a % string that has a # character as the
|
12321
12383
|
// terminator.
|
12322
|
-
if (
|
12384
|
+
if (is_terminator) {
|
12323
12385
|
// If this terminator doesn't actually close the string, then we need
|
12324
12386
|
// to continue on past it.
|
12325
12387
|
if (lex_mode->as.string.nesting > 0) {
|
@@ -12379,7 +12441,7 @@ parser_lex(pm_parser_t *parser) {
|
|
12379
12441
|
pm_token_buffer_escape(parser, &token_buffer);
|
12380
12442
|
token_buffer.cursor = breakpoint;
|
12381
12443
|
|
12382
|
-
|
12444
|
+
PRISM_FALLTHROUGH
|
12383
12445
|
case '\n':
|
12384
12446
|
// When we hit a newline, we need to flush any potential
|
12385
12447
|
// heredocs. Note that this has to happen after we check
|
@@ -12424,7 +12486,7 @@ parser_lex(pm_parser_t *parser) {
|
|
12424
12486
|
pm_token_buffer_push_byte(&token_buffer, '\r');
|
12425
12487
|
break;
|
12426
12488
|
}
|
12427
|
-
|
12489
|
+
PRISM_FALLTHROUGH
|
12428
12490
|
case '\n':
|
12429
12491
|
if (!lex_mode->as.string.interpolation) {
|
12430
12492
|
pm_token_buffer_push_byte(&token_buffer, '\\');
|
@@ -12514,6 +12576,7 @@ parser_lex(pm_parser_t *parser) {
|
|
12514
12576
|
// Now let's grab the information about the identifier off of the
|
12515
12577
|
// current lex mode.
|
12516
12578
|
pm_lex_mode_t *lex_mode = parser->lex_modes.current;
|
12579
|
+
pm_heredoc_lex_mode_t *heredoc_lex_mode = &lex_mode->as.heredoc.base;
|
12517
12580
|
|
12518
12581
|
bool line_continuation = lex_mode->as.heredoc.line_continuation;
|
12519
12582
|
lex_mode->as.heredoc.line_continuation = false;
|
@@ -12523,15 +12586,16 @@ parser_lex(pm_parser_t *parser) {
|
|
12523
12586
|
// terminator) but still continue parsing so that content after the
|
12524
12587
|
// declaration of the heredoc can be parsed.
|
12525
12588
|
if (parser->current.end >= parser->end) {
|
12526
|
-
pm_parser_err_heredoc_term(parser,
|
12589
|
+
pm_parser_err_heredoc_term(parser, heredoc_lex_mode->ident_start, heredoc_lex_mode->ident_length);
|
12527
12590
|
parser->next_start = lex_mode->as.heredoc.next_start;
|
12528
12591
|
parser->heredoc_end = parser->current.end;
|
12529
12592
|
lex_state_set(parser, PM_LEX_STATE_END);
|
12593
|
+
lex_mode_pop(parser);
|
12530
12594
|
LEX(PM_TOKEN_HEREDOC_END);
|
12531
12595
|
}
|
12532
12596
|
|
12533
|
-
const uint8_t *ident_start =
|
12534
|
-
size_t ident_length =
|
12597
|
+
const uint8_t *ident_start = heredoc_lex_mode->ident_start;
|
12598
|
+
size_t ident_length = heredoc_lex_mode->ident_length;
|
12535
12599
|
|
12536
12600
|
// If we are immediately following a newline and we have hit the
|
12537
12601
|
// terminator, then we need to return the ending of the heredoc.
|
@@ -12556,10 +12620,7 @@ parser_lex(pm_parser_t *parser) {
|
|
12556
12620
|
const uint8_t *terminator_start = ident_end - ident_length;
|
12557
12621
|
const uint8_t *cursor = start;
|
12558
12622
|
|
12559
|
-
if (
|
12560
|
-
lex_mode->as.heredoc.indent == PM_HEREDOC_INDENT_DASH ||
|
12561
|
-
lex_mode->as.heredoc.indent == PM_HEREDOC_INDENT_TILDE
|
12562
|
-
) {
|
12623
|
+
if (heredoc_lex_mode->indent == PM_HEREDOC_INDENT_DASH || heredoc_lex_mode->indent == PM_HEREDOC_INDENT_TILDE) {
|
12563
12624
|
while (cursor < terminator_start && pm_char_is_inline_whitespace(*cursor)) {
|
12564
12625
|
cursor++;
|
12565
12626
|
}
|
@@ -12582,17 +12643,19 @@ parser_lex(pm_parser_t *parser) {
|
|
12582
12643
|
}
|
12583
12644
|
|
12584
12645
|
lex_state_set(parser, PM_LEX_STATE_END);
|
12646
|
+
lex_mode_pop(parser);
|
12585
12647
|
LEX(PM_TOKEN_HEREDOC_END);
|
12586
12648
|
}
|
12587
12649
|
}
|
12588
12650
|
|
12589
|
-
size_t whitespace = pm_heredoc_strspn_inline_whitespace(parser, &start,
|
12651
|
+
size_t whitespace = pm_heredoc_strspn_inline_whitespace(parser, &start, heredoc_lex_mode->indent);
|
12590
12652
|
if (
|
12591
|
-
|
12592
|
-
|
12653
|
+
heredoc_lex_mode->indent == PM_HEREDOC_INDENT_TILDE &&
|
12654
|
+
lex_mode->as.heredoc.common_whitespace != NULL &&
|
12655
|
+
(*lex_mode->as.heredoc.common_whitespace > whitespace) &&
|
12593
12656
|
peek_at(parser, start) != '\n'
|
12594
12657
|
) {
|
12595
|
-
lex_mode->as.heredoc.common_whitespace = whitespace;
|
12658
|
+
*lex_mode->as.heredoc.common_whitespace = whitespace;
|
12596
12659
|
}
|
12597
12660
|
}
|
12598
12661
|
|
@@ -12601,7 +12664,7 @@ parser_lex(pm_parser_t *parser) {
|
|
12601
12664
|
// strpbrk to find the first of these characters.
|
12602
12665
|
uint8_t breakpoints[] = "\r\n\\#";
|
12603
12666
|
|
12604
|
-
pm_heredoc_quote_t quote =
|
12667
|
+
pm_heredoc_quote_t quote = heredoc_lex_mode->quote;
|
12605
12668
|
if (quote == PM_HEREDOC_QUOTE_SINGLE) {
|
12606
12669
|
breakpoints[3] = '\0';
|
12607
12670
|
}
|
@@ -12631,7 +12694,7 @@ parser_lex(pm_parser_t *parser) {
|
|
12631
12694
|
pm_token_buffer_escape(parser, &token_buffer);
|
12632
12695
|
token_buffer.cursor = breakpoint;
|
12633
12696
|
|
12634
|
-
|
12697
|
+
PRISM_FALLTHROUGH
|
12635
12698
|
case '\n': {
|
12636
12699
|
if (parser->heredoc_end != NULL && (parser->heredoc_end > breakpoint)) {
|
12637
12700
|
parser_flush_heredoc_end(parser);
|
@@ -12664,8 +12727,7 @@ parser_lex(pm_parser_t *parser) {
|
|
12664
12727
|
// leading whitespace if we have a - or ~ heredoc.
|
12665
12728
|
const uint8_t *cursor = start;
|
12666
12729
|
|
12667
|
-
if (
|
12668
|
-
lex_mode->as.heredoc.indent == PM_HEREDOC_INDENT_TILDE) {
|
12730
|
+
if (heredoc_lex_mode->indent == PM_HEREDOC_INDENT_DASH || heredoc_lex_mode->indent == PM_HEREDOC_INDENT_TILDE) {
|
12669
12731
|
while (cursor < terminator_start && pm_char_is_inline_whitespace(*cursor)) {
|
12670
12732
|
cursor++;
|
12671
12733
|
}
|
@@ -12681,16 +12743,16 @@ parser_lex(pm_parser_t *parser) {
|
|
12681
12743
|
}
|
12682
12744
|
}
|
12683
12745
|
|
12684
|
-
size_t whitespace = pm_heredoc_strspn_inline_whitespace(parser, &start, lex_mode->as.heredoc.indent);
|
12746
|
+
size_t whitespace = pm_heredoc_strspn_inline_whitespace(parser, &start, lex_mode->as.heredoc.base.indent);
|
12685
12747
|
|
12686
12748
|
// If we have hit a newline that is followed by a valid
|
12687
12749
|
// terminator, then we need to return the content of the
|
12688
12750
|
// heredoc here as string content. Then, the next time a
|
12689
12751
|
// token is lexed, it will match again and return the
|
12690
12752
|
// end of the heredoc.
|
12691
|
-
if (lex_mode->as.heredoc.indent == PM_HEREDOC_INDENT_TILDE) {
|
12692
|
-
if ((lex_mode->as.heredoc.common_whitespace > whitespace) && peek_at(parser, start) != '\n') {
|
12693
|
-
lex_mode->as.heredoc.common_whitespace = whitespace;
|
12753
|
+
if (lex_mode->as.heredoc.base.indent == PM_HEREDOC_INDENT_TILDE) {
|
12754
|
+
if ((lex_mode->as.heredoc.common_whitespace != NULL) && (*lex_mode->as.heredoc.common_whitespace > whitespace) && peek_at(parser, start) != '\n') {
|
12755
|
+
*lex_mode->as.heredoc.common_whitespace = whitespace;
|
12694
12756
|
}
|
12695
12757
|
|
12696
12758
|
parser->current.end = breakpoint + 1;
|
@@ -12732,7 +12794,7 @@ parser_lex(pm_parser_t *parser) {
|
|
12732
12794
|
pm_token_buffer_push_byte(&token_buffer, '\r');
|
12733
12795
|
break;
|
12734
12796
|
}
|
12735
|
-
|
12797
|
+
PRISM_FALLTHROUGH
|
12736
12798
|
case '\n':
|
12737
12799
|
pm_token_buffer_push_byte(&token_buffer, '\\');
|
12738
12800
|
pm_token_buffer_push_byte(&token_buffer, '\n');
|
@@ -12752,12 +12814,12 @@ parser_lex(pm_parser_t *parser) {
|
|
12752
12814
|
pm_token_buffer_push_byte(&token_buffer, '\r');
|
12753
12815
|
break;
|
12754
12816
|
}
|
12755
|
-
|
12817
|
+
PRISM_FALLTHROUGH
|
12756
12818
|
case '\n':
|
12757
12819
|
// If we are in a tilde here, we should
|
12758
12820
|
// break out of the loop and return the
|
12759
12821
|
// string content.
|
12760
|
-
if (
|
12822
|
+
if (heredoc_lex_mode->indent == PM_HEREDOC_INDENT_TILDE) {
|
12761
12823
|
const uint8_t *end = parser->current.end;
|
12762
12824
|
pm_newline_list_append(&parser->newline_list, end);
|
12763
12825
|
|
@@ -12983,7 +13045,7 @@ pm_binding_powers_t pm_binding_powers[PM_TOKEN_MAXIMUM] = {
|
|
12983
13045
|
[PM_TOKEN_PERCENT] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_FACTOR),
|
12984
13046
|
[PM_TOKEN_SLASH] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_FACTOR),
|
12985
13047
|
[PM_TOKEN_STAR] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_FACTOR),
|
12986
|
-
[PM_TOKEN_USTAR] =
|
13048
|
+
[PM_TOKEN_USTAR] = RIGHT_ASSOCIATIVE_UNARY(PM_BINDING_POWER_FACTOR),
|
12987
13049
|
|
12988
13050
|
// -@
|
12989
13051
|
[PM_TOKEN_UMINUS] = RIGHT_ASSOCIATIVE_UNARY(PM_BINDING_POWER_UMINUS),
|
@@ -13044,14 +13106,6 @@ match4(const pm_parser_t *parser, pm_token_type_t type1, pm_token_type_t type2,
|
|
13044
13106
|
return match1(parser, type1) || match1(parser, type2) || match1(parser, type3) || match1(parser, type4);
|
13045
13107
|
}
|
13046
13108
|
|
13047
|
-
/**
|
13048
|
-
* Returns true if the current token is any of the six given types.
|
13049
|
-
*/
|
13050
|
-
static inline bool
|
13051
|
-
match6(const pm_parser_t *parser, pm_token_type_t type1, pm_token_type_t type2, pm_token_type_t type3, pm_token_type_t type4, pm_token_type_t type5, pm_token_type_t type6) {
|
13052
|
-
return match1(parser, type1) || match1(parser, type2) || match1(parser, type3) || match1(parser, type4) || match1(parser, type5) || match1(parser, type6);
|
13053
|
-
}
|
13054
|
-
|
13055
13109
|
/**
|
13056
13110
|
* Returns true if the current token is any of the seven given types.
|
13057
13111
|
*/
|
@@ -13068,6 +13122,14 @@ match8(const pm_parser_t *parser, pm_token_type_t type1, pm_token_type_t type2,
|
|
13068
13122
|
return match1(parser, type1) || match1(parser, type2) || match1(parser, type3) || match1(parser, type4) || match1(parser, type5) || match1(parser, type6) || match1(parser, type7) || match1(parser, type8);
|
13069
13123
|
}
|
13070
13124
|
|
13125
|
+
/**
|
13126
|
+
* Returns true if the current token is any of the nine given types.
|
13127
|
+
*/
|
13128
|
+
static inline bool
|
13129
|
+
match9(const pm_parser_t *parser, pm_token_type_t type1, pm_token_type_t type2, pm_token_type_t type3, pm_token_type_t type4, pm_token_type_t type5, pm_token_type_t type6, pm_token_type_t type7, pm_token_type_t type8, pm_token_type_t type9) {
|
13130
|
+
return match1(parser, type1) || match1(parser, type2) || match1(parser, type3) || match1(parser, type4) || match1(parser, type5) || match1(parser, type6) || match1(parser, type7) || match1(parser, type8) || match1(parser, type9);
|
13131
|
+
}
|
13132
|
+
|
13071
13133
|
/**
|
13072
13134
|
* If the current token is of the specified type, lex forward by one token and
|
13073
13135
|
* return true. Otherwise, return false. For example:
|
@@ -13096,19 +13158,6 @@ accept2(pm_parser_t *parser, pm_token_type_t type1, pm_token_type_t type2) {
|
|
13096
13158
|
return false;
|
13097
13159
|
}
|
13098
13160
|
|
13099
|
-
/**
|
13100
|
-
* If the current token is any of the three given types, lex forward by one
|
13101
|
-
* token and return true. Otherwise return false.
|
13102
|
-
*/
|
13103
|
-
static inline bool
|
13104
|
-
accept3(pm_parser_t *parser, pm_token_type_t type1, pm_token_type_t type2, pm_token_type_t type3) {
|
13105
|
-
if (match3(parser, type1, type2, type3)) {
|
13106
|
-
parser_lex(parser);
|
13107
|
-
return true;
|
13108
|
-
}
|
13109
|
-
return false;
|
13110
|
-
}
|
13111
|
-
|
13112
13161
|
/**
|
13113
13162
|
* This function indicates that the parser expects a token in a specific
|
13114
13163
|
* position. For example, if you're parsing a BEGIN block, you know that a { is
|
@@ -13146,32 +13195,16 @@ expect2(pm_parser_t *parser, pm_token_type_t type1, pm_token_type_t type2, pm_di
|
|
13146
13195
|
parser->previous.type = PM_TOKEN_MISSING;
|
13147
13196
|
}
|
13148
13197
|
|
13149
|
-
/**
|
13150
|
-
* This function is the same as expect2, but it expects one of three token types.
|
13151
|
-
*/
|
13152
|
-
static void
|
13153
|
-
expect3(pm_parser_t *parser, pm_token_type_t type1, pm_token_type_t type2, pm_token_type_t type3, pm_diagnostic_id_t diag_id) {
|
13154
|
-
if (accept3(parser, type1, type2, type3)) return;
|
13155
|
-
|
13156
|
-
const uint8_t *location = parser->previous.end;
|
13157
|
-
pm_parser_err(parser, location, location, diag_id);
|
13158
|
-
|
13159
|
-
parser->previous.start = location;
|
13160
|
-
parser->previous.type = PM_TOKEN_MISSING;
|
13161
|
-
}
|
13162
|
-
|
13163
13198
|
/**
|
13164
13199
|
* A special expect1 that expects a heredoc terminator and handles popping the
|
13165
13200
|
* lex mode accordingly.
|
13166
13201
|
*/
|
13167
13202
|
static void
|
13168
|
-
expect1_heredoc_term(pm_parser_t *parser,
|
13203
|
+
expect1_heredoc_term(pm_parser_t *parser, const uint8_t *ident_start, size_t ident_length) {
|
13169
13204
|
if (match1(parser, PM_TOKEN_HEREDOC_END)) {
|
13170
|
-
lex_mode_pop(parser);
|
13171
13205
|
parser_lex(parser);
|
13172
13206
|
} else {
|
13173
|
-
pm_parser_err_heredoc_term(parser,
|
13174
|
-
lex_mode_pop(parser);
|
13207
|
+
pm_parser_err_heredoc_term(parser, ident_start, ident_length);
|
13175
13208
|
parser->previous.start = parser->previous.end;
|
13176
13209
|
parser->previous.type = PM_TOKEN_MISSING;
|
13177
13210
|
}
|
@@ -13503,7 +13536,7 @@ parse_target(pm_parser_t *parser, pm_node_t *target, bool multiple, bool splat_p
|
|
13503
13536
|
return (pm_node_t *) pm_index_target_node_create(parser, call);
|
13504
13537
|
}
|
13505
13538
|
}
|
13506
|
-
|
13539
|
+
PRISM_FALLTHROUGH
|
13507
13540
|
default:
|
13508
13541
|
// In this case we have a node that we don't know how to convert
|
13509
13542
|
// into a target. We need to treat it as an error. For now, we'll
|
@@ -13585,7 +13618,7 @@ parse_write(pm_parser_t *parser, pm_node_t *target, pm_token_t *operator, pm_nod
|
|
13585
13618
|
case PM_BACK_REFERENCE_READ_NODE:
|
13586
13619
|
case PM_NUMBERED_REFERENCE_READ_NODE:
|
13587
13620
|
PM_PARSER_ERR_NODE_FORMAT_CONTENT(parser, target, PM_ERR_WRITE_TARGET_READONLY);
|
13588
|
-
|
13621
|
+
PRISM_FALLTHROUGH
|
13589
13622
|
case PM_GLOBAL_VARIABLE_READ_NODE: {
|
13590
13623
|
pm_global_variable_write_node_t *node = pm_global_variable_write_node_create(parser, target, operator, value);
|
13591
13624
|
pm_node_destroy(parser, target);
|
@@ -13712,6 +13745,9 @@ parse_write(pm_parser_t *parser, pm_node_t *target, pm_token_t *operator, pm_nod
|
|
13712
13745
|
|
13713
13746
|
// Replace the name with "[]=".
|
13714
13747
|
call->name = pm_parser_constant_id_constant(parser, "[]=", 3);
|
13748
|
+
|
13749
|
+
// Ensure that the arguments for []= don't contain keywords
|
13750
|
+
pm_index_arguments_check(parser, call->arguments, call->block);
|
13715
13751
|
pm_node_flag_set((pm_node_t *) call, PM_CALL_NODE_FLAGS_ATTRIBUTE_WRITE | pm_implicit_array_write_flags(value, PM_CALL_NODE_FLAGS_IMPLICIT_ARRAY));
|
13716
13752
|
|
13717
13753
|
return target;
|
@@ -13724,7 +13760,7 @@ parse_write(pm_parser_t *parser, pm_node_t *target, pm_token_t *operator, pm_nod
|
|
13724
13760
|
// is no way for us to attach it to the tree at this point.
|
13725
13761
|
pm_node_destroy(parser, value);
|
13726
13762
|
}
|
13727
|
-
|
13763
|
+
PRISM_FALLTHROUGH
|
13728
13764
|
default:
|
13729
13765
|
// In this case we have a node that we don't know how to convert into a
|
13730
13766
|
// target. We need to treat it as an error. For now, we'll mark it as an
|
@@ -13797,6 +13833,13 @@ parse_targets(pm_parser_t *parser, pm_node_t *first_target, pm_binding_power_t b
|
|
13797
13833
|
pm_node_t *splat = (pm_node_t *) pm_splat_node_create(parser, &star_operator, name);
|
13798
13834
|
pm_multi_target_node_targets_append(parser, result, splat);
|
13799
13835
|
has_rest = true;
|
13836
|
+
} else if (match1(parser, PM_TOKEN_PARENTHESIS_LEFT)) {
|
13837
|
+
context_push(parser, PM_CONTEXT_MULTI_TARGET);
|
13838
|
+
pm_node_t *target = parse_expression(parser, binding_power, false, false, PM_ERR_EXPECT_EXPRESSION_AFTER_COMMA, (uint16_t) (depth + 1));
|
13839
|
+
target = parse_target(parser, target, true, false);
|
13840
|
+
|
13841
|
+
pm_multi_target_node_targets_append(parser, result, target);
|
13842
|
+
context_pop(parser);
|
13800
13843
|
} else if (token_begins_expression_p(parser->current.type)) {
|
13801
13844
|
pm_node_t *target = parse_expression(parser, binding_power, false, false, PM_ERR_EXPECT_EXPRESSION_AFTER_COMMA, (uint16_t) (depth + 1));
|
13802
13845
|
target = parse_target(parser, target, true, false);
|
@@ -14108,8 +14151,8 @@ static void
|
|
14108
14151
|
parse_arguments(pm_parser_t *parser, pm_arguments_t *arguments, bool accepts_forwarding, pm_token_type_t terminator, uint16_t depth) {
|
14109
14152
|
pm_binding_power_t binding_power = pm_binding_powers[parser->current.type].left;
|
14110
14153
|
|
14111
|
-
// First we need to check if the next token is one that could be the start
|
14112
|
-
// an argument. If it's not, then we can just return.
|
14154
|
+
// First we need to check if the next token is one that could be the start
|
14155
|
+
// of an argument. If it's not, then we can just return.
|
14113
14156
|
if (
|
14114
14157
|
match2(parser, terminator, PM_TOKEN_EOF) ||
|
14115
14158
|
(binding_power != PM_BINDING_POWER_UNSET && binding_power < PM_BINDING_POWER_RANGE) ||
|
@@ -14186,6 +14229,9 @@ parse_arguments(pm_parser_t *parser, pm_arguments_t *arguments, bool accepts_for
|
|
14186
14229
|
if (match4(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_TOKEN_COMMA, PM_TOKEN_SEMICOLON, PM_TOKEN_BRACKET_RIGHT)) {
|
14187
14230
|
pm_parser_scope_forwarding_positionals_check(parser, &operator);
|
14188
14231
|
argument = (pm_node_t *) pm_splat_node_create(parser, &operator, NULL);
|
14232
|
+
if (parsed_bare_hash) {
|
14233
|
+
pm_parser_err_previous(parser, PM_ERR_ARGUMENT_SPLAT_AFTER_ASSOC_SPLAT);
|
14234
|
+
}
|
14189
14235
|
} else {
|
14190
14236
|
pm_node_t *expression = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, false, false, PM_ERR_EXPECT_EXPRESSION_AFTER_SPLAT, (uint16_t) (depth + 1));
|
14191
14237
|
|
@@ -14234,7 +14280,7 @@ parse_arguments(pm_parser_t *parser, pm_arguments_t *arguments, bool accepts_for
|
|
14234
14280
|
}
|
14235
14281
|
}
|
14236
14282
|
}
|
14237
|
-
|
14283
|
+
PRISM_FALLTHROUGH
|
14238
14284
|
default: {
|
14239
14285
|
if (argument == NULL) {
|
14240
14286
|
argument = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, !parsed_first_argument, true, PM_ERR_EXPECT_ARGUMENT, (uint16_t) (depth + 1));
|
@@ -14297,23 +14343,32 @@ parse_arguments(pm_parser_t *parser, pm_arguments_t *arguments, bool accepts_for
|
|
14297
14343
|
// If parsing the argument failed, we need to stop parsing arguments.
|
14298
14344
|
if (PM_NODE_TYPE_P(argument, PM_MISSING_NODE) || parser->recovering) break;
|
14299
14345
|
|
14300
|
-
// If the terminator of these arguments is not EOF, then we have a
|
14301
|
-
// token we're looking for. In that case we can accept a
|
14302
|
-
// because it is not functioning as a statement terminator.
|
14303
|
-
|
14346
|
+
// If the terminator of these arguments is not EOF, then we have a
|
14347
|
+
// specific token we're looking for. In that case we can accept a
|
14348
|
+
// newline here because it is not functioning as a statement terminator.
|
14349
|
+
bool accepted_newline = false;
|
14350
|
+
if (terminator != PM_TOKEN_EOF) {
|
14351
|
+
accepted_newline = accept1(parser, PM_TOKEN_NEWLINE);
|
14352
|
+
}
|
14304
14353
|
|
14305
14354
|
if (parser->previous.type == PM_TOKEN_COMMA && parsed_bare_hash) {
|
14306
|
-
// If we previously were on a comma and we just parsed a bare hash,
|
14307
|
-
// we want to continue parsing arguments. This is because the
|
14308
|
-
// grabbed up by the hash parser.
|
14355
|
+
// If we previously were on a comma and we just parsed a bare hash,
|
14356
|
+
// then we want to continue parsing arguments. This is because the
|
14357
|
+
// comma was grabbed up by the hash parser.
|
14358
|
+
} else if (accept1(parser, PM_TOKEN_COMMA)) {
|
14359
|
+
// If there was a comma, then we need to check if we also accepted a
|
14360
|
+
// newline. If we did, then this is a syntax error.
|
14361
|
+
if (accepted_newline) {
|
14362
|
+
pm_parser_err_previous(parser, PM_ERR_INVALID_COMMA);
|
14363
|
+
}
|
14309
14364
|
} else {
|
14310
|
-
// If there is no comma at the end of the argument list then we're
|
14311
|
-
// parsing arguments and can break out of this loop.
|
14312
|
-
|
14365
|
+
// If there is no comma at the end of the argument list then we're
|
14366
|
+
// done parsing arguments and can break out of this loop.
|
14367
|
+
break;
|
14313
14368
|
}
|
14314
14369
|
|
14315
|
-
// If we hit the terminator, then that means we have a trailing comma so
|
14316
|
-
// can accept that output as well.
|
14370
|
+
// If we hit the terminator, then that means we have a trailing comma so
|
14371
|
+
// we can accept that output as well.
|
14317
14372
|
if (match1(parser, terminator)) break;
|
14318
14373
|
}
|
14319
14374
|
}
|
@@ -14468,15 +14523,17 @@ parse_parameters(
|
|
14468
14523
|
bool allows_trailing_comma,
|
14469
14524
|
bool allows_forwarding_parameters,
|
14470
14525
|
bool accepts_blocks_in_defaults,
|
14526
|
+
bool in_block,
|
14471
14527
|
uint16_t depth
|
14472
14528
|
) {
|
14473
|
-
pm_parameters_node_t *params = pm_parameters_node_create(parser);
|
14474
|
-
bool looping = true;
|
14475
|
-
|
14476
14529
|
pm_do_loop_stack_push(parser, false);
|
14530
|
+
|
14531
|
+
pm_parameters_node_t *params = pm_parameters_node_create(parser);
|
14477
14532
|
pm_parameters_order_t order = PM_PARAMETERS_ORDER_NONE;
|
14478
14533
|
|
14479
|
-
|
14534
|
+
while (true) {
|
14535
|
+
bool parsing = true;
|
14536
|
+
|
14480
14537
|
switch (parser->current.type) {
|
14481
14538
|
case PM_TOKEN_PARENTHESIS_LEFT: {
|
14482
14539
|
update_parameter_state(parser, &parser->current, &order);
|
@@ -14611,7 +14668,7 @@ parse_parameters(
|
|
14611
14668
|
// then we can put a missing node in its place and stop parsing the
|
14612
14669
|
// parameters entirely now.
|
14613
14670
|
if (parser->recovering) {
|
14614
|
-
|
14671
|
+
parsing = false;
|
14615
14672
|
break;
|
14616
14673
|
}
|
14617
14674
|
} else if (order > PM_PARAMETERS_ORDER_AFTER_OPTIONAL) {
|
@@ -14631,7 +14688,7 @@ parse_parameters(
|
|
14631
14688
|
break;
|
14632
14689
|
}
|
14633
14690
|
case PM_TOKEN_LABEL: {
|
14634
|
-
if (!uses_parentheses) parser->in_keyword_arg = true;
|
14691
|
+
if (!uses_parentheses && !in_block) parser->in_keyword_arg = true;
|
14635
14692
|
update_parameter_state(parser, &parser->current, &order);
|
14636
14693
|
|
14637
14694
|
context_push(parser, PM_CONTEXT_DEFAULT_PARAMS);
|
@@ -14669,7 +14726,7 @@ parse_parameters(
|
|
14669
14726
|
context_pop(parser);
|
14670
14727
|
|
14671
14728
|
if (uses_parentheses) {
|
14672
|
-
|
14729
|
+
parsing = false;
|
14673
14730
|
break;
|
14674
14731
|
}
|
14675
14732
|
|
@@ -14713,7 +14770,7 @@ parse_parameters(
|
|
14713
14770
|
// then we can put a missing node in its place and stop parsing the
|
14714
14771
|
// parameters entirely now.
|
14715
14772
|
if (parser->recovering) {
|
14716
|
-
|
14773
|
+
parsing = false;
|
14717
14774
|
break;
|
14718
14775
|
}
|
14719
14776
|
}
|
@@ -14815,14 +14872,31 @@ parse_parameters(
|
|
14815
14872
|
}
|
14816
14873
|
}
|
14817
14874
|
|
14818
|
-
|
14875
|
+
parsing = false;
|
14819
14876
|
break;
|
14820
14877
|
}
|
14821
14878
|
|
14822
|
-
|
14823
|
-
|
14879
|
+
// If we hit some kind of issue while parsing the parameter, this would
|
14880
|
+
// have been set to false. In that case, we need to break out of the
|
14881
|
+
// loop.
|
14882
|
+
if (!parsing) break;
|
14883
|
+
|
14884
|
+
bool accepted_newline = false;
|
14885
|
+
if (uses_parentheses) {
|
14886
|
+
accepted_newline = accept1(parser, PM_TOKEN_NEWLINE);
|
14824
14887
|
}
|
14825
|
-
|
14888
|
+
|
14889
|
+
if (accept1(parser, PM_TOKEN_COMMA)) {
|
14890
|
+
// If there was a comma, but we also accepted a newline, then this
|
14891
|
+
// is a syntax error.
|
14892
|
+
if (accepted_newline) {
|
14893
|
+
pm_parser_err_previous(parser, PM_ERR_INVALID_COMMA);
|
14894
|
+
}
|
14895
|
+
} else {
|
14896
|
+
// If there was no comma, then we're done parsing parameters.
|
14897
|
+
break;
|
14898
|
+
}
|
14899
|
+
}
|
14826
14900
|
|
14827
14901
|
pm_do_loop_stack_pop(parser);
|
14828
14902
|
|
@@ -15083,7 +15157,7 @@ parse_rescues(pm_parser_t *parser, size_t opening_newline_index, const pm_token_
|
|
15083
15157
|
case PM_RESCUES_LAMBDA: context = PM_CONTEXT_LAMBDA_ELSE; break;
|
15084
15158
|
case PM_RESCUES_MODULE: context = PM_CONTEXT_MODULE_ELSE; break;
|
15085
15159
|
case PM_RESCUES_SCLASS: context = PM_CONTEXT_SCLASS_ELSE; break;
|
15086
|
-
default: assert(false && "unreachable"); context =
|
15160
|
+
default: assert(false && "unreachable"); context = PM_CONTEXT_BEGIN_ELSE; break;
|
15087
15161
|
}
|
15088
15162
|
|
15089
15163
|
else_statements = parse_statements(parser, context, (uint16_t) (depth + 1));
|
@@ -15178,6 +15252,7 @@ parse_block_parameters(
|
|
15178
15252
|
allows_trailing_comma,
|
15179
15253
|
false,
|
15180
15254
|
accepts_blocks_in_defaults,
|
15255
|
+
true,
|
15181
15256
|
(uint16_t) (depth + 1)
|
15182
15257
|
);
|
15183
15258
|
}
|
@@ -15500,6 +15575,7 @@ parse_return(pm_parser_t *parser, pm_node_t *node) {
|
|
15500
15575
|
case PM_CONTEXT_IF:
|
15501
15576
|
case PM_CONTEXT_LOOP_PREDICATE:
|
15502
15577
|
case PM_CONTEXT_MAIN:
|
15578
|
+
case PM_CONTEXT_MULTI_TARGET:
|
15503
15579
|
case PM_CONTEXT_PARENS:
|
15504
15580
|
case PM_CONTEXT_POSTEXE:
|
15505
15581
|
case PM_CONTEXT_PREDICATE:
|
@@ -15628,6 +15704,7 @@ parse_block_exit(pm_parser_t *parser, pm_node_t *node) {
|
|
15628
15704
|
case PM_CONTEXT_MODULE_ENSURE:
|
15629
15705
|
case PM_CONTEXT_MODULE_RESCUE:
|
15630
15706
|
case PM_CONTEXT_MODULE:
|
15707
|
+
case PM_CONTEXT_MULTI_TARGET:
|
15631
15708
|
case PM_CONTEXT_PARENS:
|
15632
15709
|
case PM_CONTEXT_PREDICATE:
|
15633
15710
|
case PM_CONTEXT_RESCUE_MODIFIER:
|
@@ -16091,7 +16168,7 @@ parse_operator_symbol_name(const pm_token_t *name) {
|
|
16091
16168
|
case PM_TOKEN_TILDE:
|
16092
16169
|
case PM_TOKEN_BANG:
|
16093
16170
|
if (name->end[-1] == '@') return name->end - 1;
|
16094
|
-
|
16171
|
+
PRISM_FALLTHROUGH
|
16095
16172
|
default:
|
16096
16173
|
return name->end;
|
16097
16174
|
}
|
@@ -16347,14 +16424,15 @@ static pm_node_t *
|
|
16347
16424
|
parse_variable(pm_parser_t *parser) {
|
16348
16425
|
pm_constant_id_t name_id = pm_parser_constant_id_token(parser, &parser->previous);
|
16349
16426
|
int depth;
|
16427
|
+
bool is_numbered_param = pm_token_is_numbered_parameter(parser->previous.start, parser->previous.end);
|
16350
16428
|
|
16351
|
-
if ((depth = pm_parser_local_depth_constant_id(parser, name_id)) != -1) {
|
16429
|
+
if (!is_numbered_param && ((depth = pm_parser_local_depth_constant_id(parser, name_id)) != -1)) {
|
16352
16430
|
return (pm_node_t *) pm_local_variable_read_node_create_constant_id(parser, &parser->previous, name_id, (uint32_t) depth, false);
|
16353
16431
|
}
|
16354
16432
|
|
16355
16433
|
pm_scope_t *current_scope = parser->current_scope;
|
16356
16434
|
if (!current_scope->closed && !(current_scope->parameters & PM_SCOPE_PARAMETERS_IMPLICIT_DISALLOWED)) {
|
16357
|
-
if (
|
16435
|
+
if (is_numbered_param) {
|
16358
16436
|
// When you use a numbered parameter, it implies the existence of
|
16359
16437
|
// all of the locals that exist before it. For example, referencing
|
16360
16438
|
// _2 means that _1 must exist. Therefore here we loop through all
|
@@ -17045,7 +17123,7 @@ parse_pattern_hash(pm_parser_t *parser, pm_constant_id_list_t *captures, pm_node
|
|
17045
17123
|
parse_pattern_hash_key(parser, &keys, first_node);
|
17046
17124
|
pm_node_t *value;
|
17047
17125
|
|
17048
|
-
if (
|
17126
|
+
if (match8(parser, PM_TOKEN_COMMA, PM_TOKEN_KEYWORD_THEN, PM_TOKEN_BRACE_RIGHT, PM_TOKEN_BRACKET_RIGHT, PM_TOKEN_PARENTHESIS_RIGHT, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON, PM_TOKEN_EOF)) {
|
17049
17127
|
// Otherwise, we will create an implicit local variable
|
17050
17128
|
// target for the value.
|
17051
17129
|
value = parse_pattern_hash_implicit_value(parser, captures, (pm_symbol_node_t *) first_node);
|
@@ -17062,7 +17140,7 @@ parse_pattern_hash(pm_parser_t *parser, pm_constant_id_list_t *captures, pm_node
|
|
17062
17140
|
break;
|
17063
17141
|
}
|
17064
17142
|
}
|
17065
|
-
|
17143
|
+
PRISM_FALLTHROUGH
|
17066
17144
|
default: {
|
17067
17145
|
// If we get anything else, then this is an error. For this we'll
|
17068
17146
|
// create a missing node for the value and create an assoc node for
|
@@ -17082,7 +17160,12 @@ parse_pattern_hash(pm_parser_t *parser, pm_constant_id_list_t *captures, pm_node
|
|
17082
17160
|
// If there are any other assocs, then we'll parse them now.
|
17083
17161
|
while (accept1(parser, PM_TOKEN_COMMA)) {
|
17084
17162
|
// Here we need to break to support trailing commas.
|
17085
|
-
if (
|
17163
|
+
if (match7(parser, PM_TOKEN_KEYWORD_THEN, PM_TOKEN_BRACE_RIGHT, PM_TOKEN_BRACKET_RIGHT, PM_TOKEN_PARENTHESIS_RIGHT, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON, PM_TOKEN_EOF)) {
|
17164
|
+
// Trailing commas are not allowed to follow a rest pattern.
|
17165
|
+
if (rest != NULL) {
|
17166
|
+
pm_parser_err_token(parser, &parser->current, PM_ERR_PATTERN_EXPRESSION_AFTER_REST);
|
17167
|
+
}
|
17168
|
+
|
17086
17169
|
break;
|
17087
17170
|
}
|
17088
17171
|
|
@@ -17553,7 +17636,7 @@ parse_pattern(pm_parser_t *parser, pm_constant_id_list_t *captures, uint8_t flag
|
|
17553
17636
|
break;
|
17554
17637
|
}
|
17555
17638
|
}
|
17556
|
-
|
17639
|
+
PRISM_FALLTHROUGH
|
17557
17640
|
default:
|
17558
17641
|
node = parse_pattern_primitives(parser, captures, NULL, diag_id, (uint16_t) (depth + 1));
|
17559
17642
|
break;
|
@@ -17575,9 +17658,10 @@ parse_pattern(pm_parser_t *parser, pm_constant_id_list_t *captures, uint8_t flag
|
|
17575
17658
|
// Gather up all of the patterns into the list.
|
17576
17659
|
while (accept1(parser, PM_TOKEN_COMMA)) {
|
17577
17660
|
// Break early here in case we have a trailing comma.
|
17578
|
-
if (
|
17661
|
+
if (match9(parser, PM_TOKEN_KEYWORD_THEN, PM_TOKEN_BRACE_RIGHT, PM_TOKEN_BRACKET_RIGHT, PM_TOKEN_PARENTHESIS_RIGHT, PM_TOKEN_SEMICOLON, PM_TOKEN_NEWLINE, PM_TOKEN_EOF,PM_TOKEN_KEYWORD_AND, PM_TOKEN_KEYWORD_OR)) {
|
17579
17662
|
node = (pm_node_t *) pm_implicit_rest_node_create(parser, &parser->previous);
|
17580
17663
|
pm_node_list_append(&nodes, node);
|
17664
|
+
trailing_rest = true;
|
17581
17665
|
break;
|
17582
17666
|
}
|
17583
17667
|
|
@@ -17779,6 +17863,7 @@ parse_retry(pm_parser_t *parser, const pm_node_t *node) {
|
|
17779
17863
|
case PM_CONTEXT_LAMBDA_BRACES:
|
17780
17864
|
case PM_CONTEXT_LAMBDA_DO_END:
|
17781
17865
|
case PM_CONTEXT_LOOP_PREDICATE:
|
17866
|
+
case PM_CONTEXT_MULTI_TARGET:
|
17782
17867
|
case PM_CONTEXT_PARENS:
|
17783
17868
|
case PM_CONTEXT_POSTEXE:
|
17784
17869
|
case PM_CONTEXT_PREDICATE:
|
@@ -17862,6 +17947,7 @@ parse_yield(pm_parser_t *parser, const pm_node_t *node) {
|
|
17862
17947
|
case PM_CONTEXT_LAMBDA_ENSURE:
|
17863
17948
|
case PM_CONTEXT_LAMBDA_RESCUE:
|
17864
17949
|
case PM_CONTEXT_LOOP_PREDICATE:
|
17950
|
+
case PM_CONTEXT_MULTI_TARGET:
|
17865
17951
|
case PM_CONTEXT_PARENS:
|
17866
17952
|
case PM_CONTEXT_POSTEXE:
|
17867
17953
|
case PM_CONTEXT_PREDICATE:
|
@@ -17951,19 +18037,31 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
|
|
17951
18037
|
bool parsed_bare_hash = false;
|
17952
18038
|
|
17953
18039
|
while (!match2(parser, PM_TOKEN_BRACKET_RIGHT, PM_TOKEN_EOF)) {
|
18040
|
+
bool accepted_newline = accept1(parser, PM_TOKEN_NEWLINE);
|
18041
|
+
|
17954
18042
|
// Handle the case where we don't have a comma and we have a
|
17955
18043
|
// newline followed by a right bracket.
|
17956
|
-
if (
|
18044
|
+
if (accepted_newline && match1(parser, PM_TOKEN_BRACKET_RIGHT)) {
|
17957
18045
|
break;
|
17958
18046
|
}
|
17959
18047
|
|
17960
18048
|
// Ensure that we have a comma between elements in the array.
|
17961
|
-
if (
|
17962
|
-
|
17963
|
-
|
18049
|
+
if (array->elements.size > 0) {
|
18050
|
+
if (accept1(parser, PM_TOKEN_COMMA)) {
|
18051
|
+
// If there was a comma but we also accepts a newline,
|
18052
|
+
// then this is a syntax error.
|
18053
|
+
if (accepted_newline) {
|
18054
|
+
pm_parser_err_previous(parser, PM_ERR_INVALID_COMMA);
|
18055
|
+
}
|
18056
|
+
} else {
|
18057
|
+
// If there was no comma, then we need to add a syntax
|
18058
|
+
// error.
|
18059
|
+
const uint8_t *location = parser->previous.end;
|
18060
|
+
PM_PARSER_ERR_FORMAT(parser, location, location, PM_ERR_ARRAY_SEPARATOR, pm_token_type_human(parser->current.type));
|
17964
18061
|
|
17965
|
-
|
17966
|
-
|
18062
|
+
parser->previous.start = location;
|
18063
|
+
parser->previous.type = PM_TOKEN_MISSING;
|
18064
|
+
}
|
17967
18065
|
}
|
17968
18066
|
|
17969
18067
|
// If we have a right bracket immediately following a comma,
|
@@ -18119,14 +18217,32 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
|
|
18119
18217
|
multi_target->base.location.start = lparen_loc.start;
|
18120
18218
|
multi_target->base.location.end = rparen_loc.end;
|
18121
18219
|
|
18122
|
-
|
18123
|
-
|
18124
|
-
|
18125
|
-
|
18126
|
-
|
18220
|
+
pm_node_t *result;
|
18221
|
+
if (match1(parser, PM_TOKEN_COMMA) && (binding_power == PM_BINDING_POWER_STATEMENT)) {
|
18222
|
+
result = parse_targets(parser, (pm_node_t *) multi_target, PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1));
|
18223
|
+
accept1(parser, PM_TOKEN_NEWLINE);
|
18224
|
+
} else {
|
18225
|
+
result = (pm_node_t *) multi_target;
|
18127
18226
|
}
|
18128
18227
|
|
18129
|
-
|
18228
|
+
if (context_p(parser, PM_CONTEXT_MULTI_TARGET)) {
|
18229
|
+
// All set, this is explicitly allowed by the parent
|
18230
|
+
// context.
|
18231
|
+
} else if (context_p(parser, PM_CONTEXT_FOR_INDEX) && match1(parser, PM_TOKEN_KEYWORD_IN)) {
|
18232
|
+
// All set, we're inside a for loop and we're parsing
|
18233
|
+
// multiple targets.
|
18234
|
+
} else if (binding_power != PM_BINDING_POWER_STATEMENT) {
|
18235
|
+
// Multi targets are not allowed when it's not a
|
18236
|
+
// statement level.
|
18237
|
+
pm_parser_err_node(parser, result, PM_ERR_WRITE_TARGET_UNEXPECTED);
|
18238
|
+
} else if (!match2(parser, PM_TOKEN_EQUAL, PM_TOKEN_PARENTHESIS_RIGHT)) {
|
18239
|
+
// Multi targets must be followed by an equal sign in
|
18240
|
+
// order to be valid (or a right parenthesis if they are
|
18241
|
+
// nested).
|
18242
|
+
pm_parser_err_node(parser, result, PM_ERR_WRITE_TARGET_UNEXPECTED);
|
18243
|
+
}
|
18244
|
+
|
18245
|
+
return result;
|
18130
18246
|
}
|
18131
18247
|
|
18132
18248
|
// If we have a single statement and are ending on a right parenthesis
|
@@ -18187,6 +18303,33 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
|
|
18187
18303
|
pm_accepts_block_stack_pop(parser);
|
18188
18304
|
expect1(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_ERR_EXPECT_RPAREN);
|
18189
18305
|
|
18306
|
+
// When we're parsing multi targets, we allow them to be followed by
|
18307
|
+
// a right parenthesis if they are at the statement level. This is
|
18308
|
+
// only possible if they are the final statement in a parentheses.
|
18309
|
+
// We need to explicitly reject that here.
|
18310
|
+
{
|
18311
|
+
pm_node_t *statement = statements->body.nodes[statements->body.size - 1];
|
18312
|
+
|
18313
|
+
if (PM_NODE_TYPE_P(statement, PM_SPLAT_NODE)) {
|
18314
|
+
pm_multi_target_node_t *multi_target = pm_multi_target_node_create(parser);
|
18315
|
+
pm_multi_target_node_targets_append(parser, multi_target, statement);
|
18316
|
+
|
18317
|
+
statement = (pm_node_t *) multi_target;
|
18318
|
+
statements->body.nodes[statements->body.size - 1] = statement;
|
18319
|
+
}
|
18320
|
+
|
18321
|
+
if (PM_NODE_TYPE_P(statement, PM_MULTI_TARGET_NODE)) {
|
18322
|
+
const uint8_t *offset = statement->location.end;
|
18323
|
+
pm_token_t operator = { .type = PM_TOKEN_EQUAL, .start = offset, .end = offset };
|
18324
|
+
pm_node_t *value = (pm_node_t *) pm_missing_node_create(parser, offset, offset);
|
18325
|
+
|
18326
|
+
statement = (pm_node_t *) pm_multi_write_node_create(parser, (pm_multi_target_node_t *) statement, &operator, value);
|
18327
|
+
statements->body.nodes[statements->body.size - 1] = statement;
|
18328
|
+
|
18329
|
+
pm_parser_err_node(parser, statement, PM_ERR_WRITE_TARGET_UNEXPECTED);
|
18330
|
+
}
|
18331
|
+
}
|
18332
|
+
|
18190
18333
|
pop_block_exits(parser, previous_block_exits);
|
18191
18334
|
pm_node_list_free(¤t_block_exits);
|
18192
18335
|
|
@@ -18442,10 +18585,11 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
|
|
18442
18585
|
case PM_TOKEN_HEREDOC_START: {
|
18443
18586
|
// Here we have found a heredoc. We'll parse it and add it to the
|
18444
18587
|
// list of strings.
|
18445
|
-
|
18446
|
-
|
18447
|
-
|
18448
|
-
|
18588
|
+
assert(parser->lex_modes.current->mode == PM_LEX_HEREDOC);
|
18589
|
+
pm_heredoc_lex_mode_t lex_mode = parser->lex_modes.current->as.heredoc.base;
|
18590
|
+
|
18591
|
+
size_t common_whitespace = (size_t) -1;
|
18592
|
+
parser->lex_modes.current->as.heredoc.common_whitespace = &common_whitespace;
|
18449
18593
|
|
18450
18594
|
parser_lex(parser);
|
18451
18595
|
pm_token_t opening = parser->previous;
|
@@ -18456,10 +18600,10 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
|
|
18456
18600
|
if (match2(parser, PM_TOKEN_HEREDOC_END, PM_TOKEN_EOF)) {
|
18457
18601
|
// If we get here, then we have an empty heredoc. We'll create
|
18458
18602
|
// an empty content token and return an empty string node.
|
18459
|
-
expect1_heredoc_term(parser, lex_mode);
|
18603
|
+
expect1_heredoc_term(parser, lex_mode.ident_start, lex_mode.ident_length);
|
18460
18604
|
pm_token_t content = parse_strings_empty_content(parser->previous.start);
|
18461
18605
|
|
18462
|
-
if (quote == PM_HEREDOC_QUOTE_BACKTICK) {
|
18606
|
+
if (lex_mode.quote == PM_HEREDOC_QUOTE_BACKTICK) {
|
18463
18607
|
node = (pm_node_t *) pm_xstring_node_create_unescaped(parser, &opening, &content, &parser->previous, &PM_STRING_EMPTY);
|
18464
18608
|
} else {
|
18465
18609
|
node = (pm_node_t *) pm_string_node_create_unescaped(parser, &opening, &content, &parser->previous, &PM_STRING_EMPTY);
|
@@ -18486,18 +18630,17 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
|
|
18486
18630
|
cast->closing_loc = PM_LOCATION_TOKEN_VALUE(&parser->current);
|
18487
18631
|
cast->base.location = cast->opening_loc;
|
18488
18632
|
|
18489
|
-
if (quote == PM_HEREDOC_QUOTE_BACKTICK) {
|
18633
|
+
if (lex_mode.quote == PM_HEREDOC_QUOTE_BACKTICK) {
|
18490
18634
|
assert(sizeof(pm_string_node_t) == sizeof(pm_x_string_node_t));
|
18491
18635
|
cast->base.type = PM_X_STRING_NODE;
|
18492
18636
|
}
|
18493
18637
|
|
18494
|
-
size_t common_whitespace
|
18495
|
-
if (indent == PM_HEREDOC_INDENT_TILDE && (common_whitespace != (size_t) -1) && (common_whitespace != 0)) {
|
18638
|
+
if (lex_mode.indent == PM_HEREDOC_INDENT_TILDE && (common_whitespace != (size_t) -1) && (common_whitespace != 0)) {
|
18496
18639
|
parse_heredoc_dedent_string(&cast->unescaped, common_whitespace);
|
18497
18640
|
}
|
18498
18641
|
|
18499
18642
|
node = (pm_node_t *) cast;
|
18500
|
-
expect1_heredoc_term(parser, lex_mode);
|
18643
|
+
expect1_heredoc_term(parser, lex_mode.ident_start, lex_mode.ident_length);
|
18501
18644
|
} else {
|
18502
18645
|
// If we get here, then we have multiple parts in the heredoc,
|
18503
18646
|
// so we'll need to create an interpolated string node to hold
|
@@ -18511,15 +18654,13 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
|
|
18511
18654
|
}
|
18512
18655
|
}
|
18513
18656
|
|
18514
|
-
size_t common_whitespace = lex_mode->as.heredoc.common_whitespace;
|
18515
|
-
|
18516
18657
|
// Now that we have all of the parts, create the correct type of
|
18517
18658
|
// interpolated node.
|
18518
|
-
if (quote == PM_HEREDOC_QUOTE_BACKTICK) {
|
18659
|
+
if (lex_mode.quote == PM_HEREDOC_QUOTE_BACKTICK) {
|
18519
18660
|
pm_interpolated_x_string_node_t *cast = pm_interpolated_xstring_node_create(parser, &opening, &opening);
|
18520
18661
|
cast->parts = parts;
|
18521
18662
|
|
18522
|
-
expect1_heredoc_term(parser, lex_mode);
|
18663
|
+
expect1_heredoc_term(parser, lex_mode.ident_start, lex_mode.ident_length);
|
18523
18664
|
pm_interpolated_xstring_node_closing_set(cast, &parser->previous);
|
18524
18665
|
|
18525
18666
|
cast->base.location = cast->opening_loc;
|
@@ -18528,7 +18669,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
|
|
18528
18669
|
pm_interpolated_string_node_t *cast = pm_interpolated_string_node_create(parser, &opening, &parts, &opening);
|
18529
18670
|
pm_node_list_free(&parts);
|
18530
18671
|
|
18531
|
-
expect1_heredoc_term(parser, lex_mode);
|
18672
|
+
expect1_heredoc_term(parser, lex_mode.ident_start, lex_mode.ident_length);
|
18532
18673
|
pm_interpolated_string_node_closing_set(cast, &parser->previous);
|
18533
18674
|
|
18534
18675
|
cast->base.location = cast->opening_loc;
|
@@ -18537,9 +18678,9 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
|
|
18537
18678
|
|
18538
18679
|
// If this is a heredoc that is indented with a ~, then we need
|
18539
18680
|
// to dedent each line by the common leading whitespace.
|
18540
|
-
if (indent == PM_HEREDOC_INDENT_TILDE && (common_whitespace != (size_t) -1) && (common_whitespace != 0)) {
|
18681
|
+
if (lex_mode.indent == PM_HEREDOC_INDENT_TILDE && (common_whitespace != (size_t) -1) && (common_whitespace != 0)) {
|
18541
18682
|
pm_node_list_t *nodes;
|
18542
|
-
if (quote == PM_HEREDOC_QUOTE_BACKTICK) {
|
18683
|
+
if (lex_mode.quote == PM_HEREDOC_QUOTE_BACKTICK) {
|
18543
18684
|
nodes = &((pm_interpolated_x_string_node_t *) node)->parts;
|
18544
18685
|
} else {
|
18545
18686
|
nodes = &((pm_interpolated_string_node_t *) node)->parts;
|
@@ -18625,7 +18766,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
|
|
18625
18766
|
pm_parser_err_node(parser, old_name, PM_ERR_ALIAS_ARGUMENT);
|
18626
18767
|
}
|
18627
18768
|
}
|
18628
|
-
|
18769
|
+
PRISM_FALLTHROUGH
|
18629
18770
|
default:
|
18630
18771
|
return (pm_node_t *) pm_alias_method_node_create(parser, &keyword, new_name, old_name);
|
18631
18772
|
}
|
@@ -19116,6 +19257,10 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
|
|
19116
19257
|
context_push(parser, PM_CONTEXT_DEF_PARAMS);
|
19117
19258
|
parser_lex(parser);
|
19118
19259
|
|
19260
|
+
// This will be false if the method name is not a valid identifier
|
19261
|
+
// but could be followed by an operator.
|
19262
|
+
bool valid_name = true;
|
19263
|
+
|
19119
19264
|
switch (parser->current.type) {
|
19120
19265
|
case PM_CASE_OPERATOR:
|
19121
19266
|
pm_parser_scope_push(parser, true);
|
@@ -19145,10 +19290,12 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
|
|
19145
19290
|
|
19146
19291
|
break;
|
19147
19292
|
}
|
19148
|
-
case PM_TOKEN_CONSTANT:
|
19149
19293
|
case PM_TOKEN_INSTANCE_VARIABLE:
|
19150
19294
|
case PM_TOKEN_CLASS_VARIABLE:
|
19151
19295
|
case PM_TOKEN_GLOBAL_VARIABLE:
|
19296
|
+
valid_name = false;
|
19297
|
+
PRISM_FALLTHROUGH
|
19298
|
+
case PM_TOKEN_CONSTANT:
|
19152
19299
|
case PM_TOKEN_KEYWORD_NIL:
|
19153
19300
|
case PM_TOKEN_KEYWORD_SELF:
|
19154
19301
|
case PM_TOKEN_KEYWORD_TRUE:
|
@@ -19206,6 +19353,10 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
|
|
19206
19353
|
|
19207
19354
|
name = parse_method_definition_name(parser);
|
19208
19355
|
} else {
|
19356
|
+
if (!valid_name) {
|
19357
|
+
PM_PARSER_ERR_TOKEN_FORMAT(parser, identifier, PM_ERR_DEF_NAME, pm_token_type_human(identifier.type));
|
19358
|
+
}
|
19359
|
+
|
19209
19360
|
name = identifier;
|
19210
19361
|
}
|
19211
19362
|
break;
|
@@ -19256,7 +19407,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
|
|
19256
19407
|
if (match1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) {
|
19257
19408
|
params = NULL;
|
19258
19409
|
} else {
|
19259
|
-
params = parse_parameters(parser, PM_BINDING_POWER_DEFINED, true, false, true, true, (uint16_t) (depth + 1));
|
19410
|
+
params = parse_parameters(parser, PM_BINDING_POWER_DEFINED, true, false, true, true, false, (uint16_t) (depth + 1));
|
19260
19411
|
}
|
19261
19412
|
|
19262
19413
|
lex_state_set(parser, PM_LEX_STATE_BEG);
|
@@ -19281,7 +19432,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
|
|
19281
19432
|
|
19282
19433
|
lparen = not_provided(parser);
|
19283
19434
|
rparen = not_provided(parser);
|
19284
|
-
params = parse_parameters(parser, PM_BINDING_POWER_DEFINED, false, false, true, true, (uint16_t) (depth + 1));
|
19435
|
+
params = parse_parameters(parser, PM_BINDING_POWER_DEFINED, false, false, true, true, false, (uint16_t) (depth + 1));
|
19285
19436
|
|
19286
19437
|
context_pop(parser);
|
19287
19438
|
break;
|
@@ -19690,9 +19841,15 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
|
|
19690
19841
|
pm_do_loop_stack_pop(parser);
|
19691
19842
|
context_pop(parser);
|
19692
19843
|
|
19693
|
-
|
19694
|
-
|
19844
|
+
pm_token_t do_keyword;
|
19845
|
+
if (accept1(parser, PM_TOKEN_KEYWORD_DO_LOOP)) {
|
19846
|
+
do_keyword = parser->previous;
|
19847
|
+
} else {
|
19848
|
+
do_keyword = not_provided(parser);
|
19849
|
+
expect2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON, PM_ERR_CONDITIONAL_UNTIL_PREDICATE);
|
19850
|
+
}
|
19695
19851
|
|
19852
|
+
pm_statements_node_t *statements = NULL;
|
19696
19853
|
if (!match1(parser, PM_TOKEN_KEYWORD_END)) {
|
19697
19854
|
pm_accepts_block_stack_push(parser, true);
|
19698
19855
|
statements = parse_statements(parser, PM_CONTEXT_UNTIL, (uint16_t) (depth + 1));
|
@@ -19703,7 +19860,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
|
|
19703
19860
|
parser_warn_indentation_mismatch(parser, opening_newline_index, &keyword, false, false);
|
19704
19861
|
expect1(parser, PM_TOKEN_KEYWORD_END, PM_ERR_UNTIL_TERM);
|
19705
19862
|
|
19706
|
-
return (pm_node_t *) pm_until_node_create(parser, &keyword, &parser->previous, predicate, statements, 0);
|
19863
|
+
return (pm_node_t *) pm_until_node_create(parser, &keyword, &do_keyword, &parser->previous, predicate, statements, 0);
|
19707
19864
|
}
|
19708
19865
|
case PM_TOKEN_KEYWORD_WHILE: {
|
19709
19866
|
size_t opening_newline_index = token_newline_index(parser);
|
@@ -19718,9 +19875,15 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
|
|
19718
19875
|
pm_do_loop_stack_pop(parser);
|
19719
19876
|
context_pop(parser);
|
19720
19877
|
|
19721
|
-
|
19722
|
-
|
19878
|
+
pm_token_t do_keyword;
|
19879
|
+
if (accept1(parser, PM_TOKEN_KEYWORD_DO_LOOP)) {
|
19880
|
+
do_keyword = parser->previous;
|
19881
|
+
} else {
|
19882
|
+
do_keyword = not_provided(parser);
|
19883
|
+
expect2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON, PM_ERR_CONDITIONAL_WHILE_PREDICATE);
|
19884
|
+
}
|
19723
19885
|
|
19886
|
+
pm_statements_node_t *statements = NULL;
|
19724
19887
|
if (!match1(parser, PM_TOKEN_KEYWORD_END)) {
|
19725
19888
|
pm_accepts_block_stack_push(parser, true);
|
19726
19889
|
statements = parse_statements(parser, PM_CONTEXT_WHILE, (uint16_t) (depth + 1));
|
@@ -19731,7 +19894,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
|
|
19731
19894
|
parser_warn_indentation_mismatch(parser, opening_newline_index, &keyword, false, false);
|
19732
19895
|
expect1(parser, PM_TOKEN_KEYWORD_END, PM_ERR_WHILE_TERM);
|
19733
19896
|
|
19734
|
-
return (pm_node_t *) pm_while_node_create(parser, &keyword, &parser->previous, predicate, statements, 0);
|
19897
|
+
return (pm_node_t *) pm_while_node_create(parser, &keyword, &do_keyword, &parser->previous, predicate, statements, 0);
|
19735
19898
|
}
|
19736
19899
|
case PM_TOKEN_PERCENT_LOWER_I: {
|
19737
19900
|
parser_lex(parser);
|
@@ -20801,7 +20964,7 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
|
|
20801
20964
|
pm_parser_local_add_location(parser, call_node->message_loc.start, call_node->message_loc.end, 0);
|
20802
20965
|
}
|
20803
20966
|
}
|
20804
|
-
|
20967
|
+
PRISM_FALLTHROUGH
|
20805
20968
|
case PM_CASE_WRITABLE: {
|
20806
20969
|
parser_lex(parser);
|
20807
20970
|
pm_node_t *value = parse_assignment_values(parser, previous_binding_power, PM_NODE_TYPE_P(node, PM_MULTI_TARGET_NODE) ? PM_BINDING_POWER_MULTI_ASSIGNMENT + 1 : binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_EQUAL, (uint16_t) (depth + 1));
|
@@ -20847,7 +21010,7 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
|
|
20847
21010
|
case PM_BACK_REFERENCE_READ_NODE:
|
20848
21011
|
case PM_NUMBERED_REFERENCE_READ_NODE:
|
20849
21012
|
PM_PARSER_ERR_NODE_FORMAT_CONTENT(parser, node, PM_ERR_WRITE_TARGET_READONLY);
|
20850
|
-
|
21013
|
+
PRISM_FALLTHROUGH
|
20851
21014
|
case PM_GLOBAL_VARIABLE_READ_NODE: {
|
20852
21015
|
parser_lex(parser);
|
20853
21016
|
|
@@ -20965,7 +21128,7 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
|
|
20965
21128
|
case PM_BACK_REFERENCE_READ_NODE:
|
20966
21129
|
case PM_NUMBERED_REFERENCE_READ_NODE:
|
20967
21130
|
PM_PARSER_ERR_NODE_FORMAT_CONTENT(parser, node, PM_ERR_WRITE_TARGET_READONLY);
|
20968
|
-
|
21131
|
+
PRISM_FALLTHROUGH
|
20969
21132
|
case PM_GLOBAL_VARIABLE_READ_NODE: {
|
20970
21133
|
parser_lex(parser);
|
20971
21134
|
|
@@ -21093,7 +21256,7 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
|
|
21093
21256
|
case PM_BACK_REFERENCE_READ_NODE:
|
21094
21257
|
case PM_NUMBERED_REFERENCE_READ_NODE:
|
21095
21258
|
PM_PARSER_ERR_NODE_FORMAT_CONTENT(parser, node, PM_ERR_WRITE_TARGET_READONLY);
|
21096
|
-
|
21259
|
+
PRISM_FALLTHROUGH
|
21097
21260
|
case PM_GLOBAL_VARIABLE_READ_NODE: {
|
21098
21261
|
parser_lex(parser);
|
21099
21262
|
|
@@ -21303,6 +21466,33 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
|
|
21303
21466
|
case PM_TOKEN_STAR:
|
21304
21467
|
case PM_TOKEN_STAR_STAR: {
|
21305
21468
|
parser_lex(parser);
|
21469
|
+
pm_token_t operator = parser->previous;
|
21470
|
+
switch (PM_NODE_TYPE(node)) {
|
21471
|
+
case PM_RESCUE_MODIFIER_NODE: {
|
21472
|
+
pm_rescue_modifier_node_t *cast = (pm_rescue_modifier_node_t *) node;
|
21473
|
+
if (PM_NODE_TYPE_P(cast->rescue_expression, PM_MATCH_PREDICATE_NODE) || PM_NODE_TYPE_P(cast->rescue_expression, PM_MATCH_REQUIRED_NODE)) {
|
21474
|
+
PM_PARSER_ERR_TOKEN_FORMAT(parser, operator, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_type_human(operator.type));
|
21475
|
+
}
|
21476
|
+
break;
|
21477
|
+
}
|
21478
|
+
case PM_AND_NODE: {
|
21479
|
+
pm_and_node_t *cast = (pm_and_node_t *) node;
|
21480
|
+
if (PM_NODE_TYPE_P(cast->right, PM_MATCH_PREDICATE_NODE) || PM_NODE_TYPE_P(cast->right, PM_MATCH_REQUIRED_NODE)) {
|
21481
|
+
PM_PARSER_ERR_TOKEN_FORMAT(parser, operator, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_type_human(operator.type));
|
21482
|
+
}
|
21483
|
+
break;
|
21484
|
+
}
|
21485
|
+
case PM_OR_NODE: {
|
21486
|
+
pm_or_node_t *cast = (pm_or_node_t *) node;
|
21487
|
+
if (PM_NODE_TYPE_P(cast->right, PM_MATCH_PREDICATE_NODE) || PM_NODE_TYPE_P(cast->right, PM_MATCH_REQUIRED_NODE)) {
|
21488
|
+
PM_PARSER_ERR_TOKEN_FORMAT(parser, operator, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_type_human(operator.type));
|
21489
|
+
}
|
21490
|
+
break;
|
21491
|
+
}
|
21492
|
+
default:
|
21493
|
+
break;
|
21494
|
+
}
|
21495
|
+
|
21306
21496
|
pm_node_t *argument = parse_expression(parser, binding_power, false, false, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
|
21307
21497
|
return (pm_node_t *) pm_call_node_binary_create(parser, node, &token, argument, 0);
|
21308
21498
|
}
|
@@ -21330,6 +21520,32 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
|
|
21330
21520
|
return (pm_node_t *) pm_call_node_shorthand_create(parser, node, &operator, &arguments);
|
21331
21521
|
}
|
21332
21522
|
|
21523
|
+
switch (PM_NODE_TYPE(node)) {
|
21524
|
+
case PM_RESCUE_MODIFIER_NODE: {
|
21525
|
+
pm_rescue_modifier_node_t *cast = (pm_rescue_modifier_node_t *) node;
|
21526
|
+
if (PM_NODE_TYPE_P(cast->rescue_expression, PM_MATCH_PREDICATE_NODE) || PM_NODE_TYPE_P(cast->rescue_expression, PM_MATCH_REQUIRED_NODE)) {
|
21527
|
+
PM_PARSER_ERR_TOKEN_FORMAT(parser, operator, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_type_human(operator.type));
|
21528
|
+
}
|
21529
|
+
break;
|
21530
|
+
}
|
21531
|
+
case PM_AND_NODE: {
|
21532
|
+
pm_and_node_t *cast = (pm_and_node_t *) node;
|
21533
|
+
if (PM_NODE_TYPE_P(cast->right, PM_MATCH_PREDICATE_NODE) || PM_NODE_TYPE_P(cast->right, PM_MATCH_REQUIRED_NODE)) {
|
21534
|
+
PM_PARSER_ERR_TOKEN_FORMAT(parser, operator, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_type_human(operator.type));
|
21535
|
+
}
|
21536
|
+
break;
|
21537
|
+
}
|
21538
|
+
case PM_OR_NODE: {
|
21539
|
+
pm_or_node_t *cast = (pm_or_node_t *) node;
|
21540
|
+
if (PM_NODE_TYPE_P(cast->right, PM_MATCH_PREDICATE_NODE) || PM_NODE_TYPE_P(cast->right, PM_MATCH_REQUIRED_NODE)) {
|
21541
|
+
PM_PARSER_ERR_TOKEN_FORMAT(parser, operator, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_type_human(operator.type));
|
21542
|
+
}
|
21543
|
+
break;
|
21544
|
+
}
|
21545
|
+
default:
|
21546
|
+
break;
|
21547
|
+
}
|
21548
|
+
|
21333
21549
|
pm_token_t message;
|
21334
21550
|
|
21335
21551
|
switch (parser->current.type) {
|
@@ -21677,6 +21893,7 @@ parse_expression(pm_parser_t *parser, pm_binding_power_t binding_power, bool acc
|
|
21677
21893
|
if (pm_symbol_node_label_p(node)) {
|
21678
21894
|
return node;
|
21679
21895
|
}
|
21896
|
+
break;
|
21680
21897
|
default:
|
21681
21898
|
break;
|
21682
21899
|
}
|
@@ -21684,8 +21901,11 @@ parse_expression(pm_parser_t *parser, pm_binding_power_t binding_power, bool acc
|
|
21684
21901
|
// Otherwise we'll look and see if the next token can be parsed as an infix
|
21685
21902
|
// operator. If it can, then we'll parse it using parse_expression_infix.
|
21686
21903
|
pm_binding_powers_t current_binding_powers;
|
21904
|
+
pm_token_type_t current_token_type;
|
21905
|
+
|
21687
21906
|
while (
|
21688
|
-
|
21907
|
+
current_token_type = parser->current.type,
|
21908
|
+
current_binding_powers = pm_binding_powers[current_token_type],
|
21689
21909
|
binding_power <= current_binding_powers.left &&
|
21690
21910
|
current_binding_powers.binary
|
21691
21911
|
) {
|
@@ -21726,6 +21946,13 @@ parse_expression(pm_parser_t *parser, pm_binding_power_t binding_power, bool acc
|
|
21726
21946
|
// If the operator is nonassoc and we should not be able to parse the
|
21727
21947
|
// upcoming infix operator, break.
|
21728
21948
|
if (current_binding_powers.nonassoc) {
|
21949
|
+
// If this is a non-assoc operator and we are about to parse the
|
21950
|
+
// exact same operator, then we need to add an error.
|
21951
|
+
if (match1(parser, current_token_type)) {
|
21952
|
+
PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_NON_ASSOCIATIVE_OPERATOR, pm_token_type_human(parser->current.type), pm_token_type_human(current_token_type));
|
21953
|
+
break;
|
21954
|
+
}
|
21955
|
+
|
21729
21956
|
// If this is an endless range, then we need to reject a couple of
|
21730
21957
|
// additional operators because it violates the normal operator
|
21731
21958
|
// precedence rules. Those patterns are:
|
@@ -21735,7 +21962,7 @@ parse_expression(pm_parser_t *parser, pm_binding_power_t binding_power, bool acc
|
|
21735
21962
|
//
|
21736
21963
|
if (PM_NODE_TYPE_P(node, PM_RANGE_NODE) && ((pm_range_node_t *) node)->right == NULL) {
|
21737
21964
|
if (match4(parser, PM_TOKEN_UAMPERSAND, PM_TOKEN_USTAR, PM_TOKEN_DOT, PM_TOKEN_AMPERSAND_DOT)) {
|
21738
|
-
PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_NON_ASSOCIATIVE_OPERATOR, pm_token_type_human(parser->current.type), pm_token_type_human(
|
21965
|
+
PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_NON_ASSOCIATIVE_OPERATOR, pm_token_type_human(parser->current.type), pm_token_type_human(current_token_type));
|
21739
21966
|
break;
|
21740
21967
|
}
|
21741
21968
|
|
@@ -21857,6 +22084,7 @@ wrap_statements(pm_parser_t *parser, pm_statements_node_t *statements) {
|
|
21857
22084
|
));
|
21858
22085
|
|
21859
22086
|
pm_arguments_node_arguments_append(arguments, (pm_node_t *) keywords);
|
22087
|
+
pm_node_flag_set((pm_node_t *) arguments, PM_ARGUMENTS_NODE_FLAGS_CONTAINS_KEYWORDS);
|
21860
22088
|
}
|
21861
22089
|
|
21862
22090
|
pm_statements_node_t *wrapped_statements = pm_statements_node_create(parser);
|
@@ -22535,3 +22763,166 @@ pm_serialize_parse_comments(pm_buffer_t *buffer, const uint8_t *source, size_t s
|
|
22535
22763
|
}
|
22536
22764
|
|
22537
22765
|
#endif
|
22766
|
+
|
22767
|
+
/******************************************************************************/
|
22768
|
+
/* Slice queries for the Ruby API */
|
22769
|
+
/******************************************************************************/
|
22770
|
+
|
22771
|
+
/** The category of slice returned from pm_slice_type. */
|
22772
|
+
typedef enum {
|
22773
|
+
/** Returned when the given encoding name is invalid. */
|
22774
|
+
PM_SLICE_TYPE_ERROR = -1,
|
22775
|
+
|
22776
|
+
/** Returned when no other types apply to the slice. */
|
22777
|
+
PM_SLICE_TYPE_NONE,
|
22778
|
+
|
22779
|
+
/** Returned when the slice is a valid local variable name. */
|
22780
|
+
PM_SLICE_TYPE_LOCAL,
|
22781
|
+
|
22782
|
+
/** Returned when the slice is a valid constant name. */
|
22783
|
+
PM_SLICE_TYPE_CONSTANT,
|
22784
|
+
|
22785
|
+
/** Returned when the slice is a valid method name. */
|
22786
|
+
PM_SLICE_TYPE_METHOD_NAME
|
22787
|
+
} pm_slice_type_t;
|
22788
|
+
|
22789
|
+
/**
|
22790
|
+
* Check that the slice is a valid local variable name or constant.
|
22791
|
+
*/
|
22792
|
+
pm_slice_type_t
|
22793
|
+
pm_slice_type(const uint8_t *source, size_t length, const char *encoding_name) {
|
22794
|
+
// first, get the right encoding object
|
22795
|
+
const pm_encoding_t *encoding = pm_encoding_find((const uint8_t *) encoding_name, (const uint8_t *) (encoding_name + strlen(encoding_name)));
|
22796
|
+
if (encoding == NULL) return PM_SLICE_TYPE_ERROR;
|
22797
|
+
|
22798
|
+
// check that there is at least one character
|
22799
|
+
if (length == 0) return PM_SLICE_TYPE_NONE;
|
22800
|
+
|
22801
|
+
size_t width;
|
22802
|
+
if ((width = encoding->alpha_char(source, (ptrdiff_t) length)) != 0) {
|
22803
|
+
// valid because alphabetical
|
22804
|
+
} else if (*source == '_') {
|
22805
|
+
// valid because underscore
|
22806
|
+
width = 1;
|
22807
|
+
} else if ((*source >= 0x80) && ((width = encoding->char_width(source, (ptrdiff_t) length)) > 0)) {
|
22808
|
+
// valid because multibyte
|
22809
|
+
} else {
|
22810
|
+
// invalid because no match
|
22811
|
+
return PM_SLICE_TYPE_NONE;
|
22812
|
+
}
|
22813
|
+
|
22814
|
+
// determine the type of the slice based on the first character
|
22815
|
+
const uint8_t *end = source + length;
|
22816
|
+
pm_slice_type_t result = encoding->isupper_char(source, end - source) ? PM_SLICE_TYPE_CONSTANT : PM_SLICE_TYPE_LOCAL;
|
22817
|
+
|
22818
|
+
// next, iterate through all of the bytes of the string to ensure that they
|
22819
|
+
// are all valid identifier characters
|
22820
|
+
source += width;
|
22821
|
+
|
22822
|
+
while (source < end) {
|
22823
|
+
if ((width = encoding->alnum_char(source, end - source)) != 0) {
|
22824
|
+
// valid because alphanumeric
|
22825
|
+
source += width;
|
22826
|
+
} else if (*source == '_') {
|
22827
|
+
// valid because underscore
|
22828
|
+
source++;
|
22829
|
+
} else if ((*source >= 0x80) && ((width = encoding->char_width(source, end - source)) > 0)) {
|
22830
|
+
// valid because multibyte
|
22831
|
+
source += width;
|
22832
|
+
} else {
|
22833
|
+
// invalid because no match
|
22834
|
+
break;
|
22835
|
+
}
|
22836
|
+
}
|
22837
|
+
|
22838
|
+
// accept a ! or ? at the end of the slice as a method name
|
22839
|
+
if (*source == '!' || *source == '?' || *source == '=') {
|
22840
|
+
source++;
|
22841
|
+
result = PM_SLICE_TYPE_METHOD_NAME;
|
22842
|
+
}
|
22843
|
+
|
22844
|
+
// valid if we are at the end of the slice
|
22845
|
+
return source == end ? result : PM_SLICE_TYPE_NONE;
|
22846
|
+
}
|
22847
|
+
|
22848
|
+
/**
|
22849
|
+
* Check that the slice is a valid local variable name.
|
22850
|
+
*/
|
22851
|
+
PRISM_EXPORTED_FUNCTION pm_string_query_t
|
22852
|
+
pm_string_query_local(const uint8_t *source, size_t length, const char *encoding_name) {
|
22853
|
+
switch (pm_slice_type(source, length, encoding_name)) {
|
22854
|
+
case PM_SLICE_TYPE_ERROR:
|
22855
|
+
return PM_STRING_QUERY_ERROR;
|
22856
|
+
case PM_SLICE_TYPE_NONE:
|
22857
|
+
case PM_SLICE_TYPE_CONSTANT:
|
22858
|
+
case PM_SLICE_TYPE_METHOD_NAME:
|
22859
|
+
return PM_STRING_QUERY_FALSE;
|
22860
|
+
case PM_SLICE_TYPE_LOCAL:
|
22861
|
+
return PM_STRING_QUERY_TRUE;
|
22862
|
+
}
|
22863
|
+
|
22864
|
+
assert(false && "unreachable");
|
22865
|
+
return PM_STRING_QUERY_FALSE;
|
22866
|
+
}
|
22867
|
+
|
22868
|
+
/**
|
22869
|
+
* Check that the slice is a valid constant name.
|
22870
|
+
*/
|
22871
|
+
PRISM_EXPORTED_FUNCTION pm_string_query_t
|
22872
|
+
pm_string_query_constant(const uint8_t *source, size_t length, const char *encoding_name) {
|
22873
|
+
switch (pm_slice_type(source, length, encoding_name)) {
|
22874
|
+
case PM_SLICE_TYPE_ERROR:
|
22875
|
+
return PM_STRING_QUERY_ERROR;
|
22876
|
+
case PM_SLICE_TYPE_NONE:
|
22877
|
+
case PM_SLICE_TYPE_LOCAL:
|
22878
|
+
case PM_SLICE_TYPE_METHOD_NAME:
|
22879
|
+
return PM_STRING_QUERY_FALSE;
|
22880
|
+
case PM_SLICE_TYPE_CONSTANT:
|
22881
|
+
return PM_STRING_QUERY_TRUE;
|
22882
|
+
}
|
22883
|
+
|
22884
|
+
assert(false && "unreachable");
|
22885
|
+
return PM_STRING_QUERY_FALSE;
|
22886
|
+
}
|
22887
|
+
|
22888
|
+
/**
|
22889
|
+
* Check that the slice is a valid method name.
|
22890
|
+
*/
|
22891
|
+
PRISM_EXPORTED_FUNCTION pm_string_query_t
|
22892
|
+
pm_string_query_method_name(const uint8_t *source, size_t length, const char *encoding_name) {
|
22893
|
+
#define B(p) ((p) ? PM_STRING_QUERY_TRUE : PM_STRING_QUERY_FALSE)
|
22894
|
+
#define C1(c) (*source == c)
|
22895
|
+
#define C2(s) (memcmp(source, s, 2) == 0)
|
22896
|
+
#define C3(s) (memcmp(source, s, 3) == 0)
|
22897
|
+
|
22898
|
+
switch (pm_slice_type(source, length, encoding_name)) {
|
22899
|
+
case PM_SLICE_TYPE_ERROR:
|
22900
|
+
return PM_STRING_QUERY_ERROR;
|
22901
|
+
case PM_SLICE_TYPE_NONE:
|
22902
|
+
break;
|
22903
|
+
case PM_SLICE_TYPE_LOCAL:
|
22904
|
+
// numbered parameters are not valid method names
|
22905
|
+
return B((length != 2) || (source[0] != '_') || (source[1] == '0') || !pm_char_is_decimal_digit(source[1]));
|
22906
|
+
case PM_SLICE_TYPE_CONSTANT:
|
22907
|
+
// all constants are valid method names
|
22908
|
+
case PM_SLICE_TYPE_METHOD_NAME:
|
22909
|
+
// all method names are valid method names
|
22910
|
+
return PM_STRING_QUERY_TRUE;
|
22911
|
+
}
|
22912
|
+
|
22913
|
+
switch (length) {
|
22914
|
+
case 1:
|
22915
|
+
return B(C1('&') || C1('`') || C1('!') || C1('^') || C1('>') || C1('<') || C1('-') || C1('%') || C1('|') || C1('+') || C1('/') || C1('*') || C1('~'));
|
22916
|
+
case 2:
|
22917
|
+
return B(C2("!=") || C2("!~") || C2("[]") || C2("==") || C2("=~") || C2(">=") || C2(">>") || C2("<=") || C2("<<") || C2("**"));
|
22918
|
+
case 3:
|
22919
|
+
return B(C3("===") || C3("<=>") || C3("[]="));
|
22920
|
+
default:
|
22921
|
+
return PM_STRING_QUERY_FALSE;
|
22922
|
+
}
|
22923
|
+
|
22924
|
+
#undef B
|
22925
|
+
#undef C1
|
22926
|
+
#undef C2
|
22927
|
+
#undef C3
|
22928
|
+
}
|