prism 1.1.0 → 1.3.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +39 -1
- data/Makefile +1 -1
- data/config.yml +422 -3
- data/docs/build_system.md +8 -11
- data/docs/relocation.md +34 -0
- data/ext/prism/api_node.c +18 -10
- data/ext/prism/extconf.rb +13 -36
- data/ext/prism/extension.c +68 -0
- data/ext/prism/extension.h +1 -1
- data/include/prism/ast.h +427 -3
- data/include/prism/defines.h +22 -7
- data/include/prism/diagnostic.h +1 -0
- data/include/prism/parser.h +25 -12
- data/include/prism/version.h +2 -2
- data/include/prism.h +47 -0
- data/lib/prism/dot_visitor.rb +10 -0
- data/lib/prism/dsl.rb +4 -4
- data/lib/prism/ffi.rb +49 -2
- data/lib/prism/inspect_visitor.rb +2 -0
- data/lib/prism/node.rb +1839 -96
- data/lib/prism/parse_result/errors.rb +1 -1
- data/lib/prism/parse_result.rb +140 -3
- data/lib/prism/reflection.rb +2 -2
- data/lib/prism/relocation.rb +504 -0
- data/lib/prism/serialize.rb +17 -5
- data/lib/prism/string_query.rb +30 -0
- data/lib/prism/translation/parser/compiler.rb +36 -26
- data/lib/prism/translation/parser.rb +3 -3
- data/lib/prism/translation/ripper.rb +1 -5
- data/lib/prism/translation/ruby_parser.rb +14 -5
- data/lib/prism.rb +6 -4
- data/prism.gemspec +7 -1
- data/rbi/prism/dsl.rbi +4 -4
- data/rbi/prism/node.rbi +5118 -1030
- data/rbi/prism/parse_result.rbi +29 -0
- data/rbi/prism/string_query.rbi +12 -0
- data/rbi/prism.rbi +34 -34
- data/sig/prism/dsl.rbs +2 -2
- data/sig/prism/node.rbs +13 -98
- data/sig/prism/parse_result.rbs +20 -0
- data/sig/prism/relocation.rbs +185 -0
- data/sig/prism/string_query.rbs +11 -0
- data/src/diagnostic.c +3 -1
- data/src/node.c +18 -0
- data/src/prettyprint.c +32 -0
- data/src/prism.c +586 -195
- data/src/regexp.c +7 -3
- data/src/serialize.c +12 -0
- data/src/static_literals.c +1 -1
- data/src/util/pm_char.c +1 -1
- data/src/util/pm_string.c +1 -0
- metadata +9 -3
data/src/prism.c
CHANGED
@@ -544,10 +544,7 @@ pm_parser_warn_node(pm_parser_t *parser, const pm_node_t *node, pm_diagnostic_id
|
|
544
544
|
* token.
|
545
545
|
*/
|
546
546
|
static void
|
547
|
-
pm_parser_err_heredoc_term(pm_parser_t *parser,
|
548
|
-
const uint8_t *ident_start = lex_mode->as.heredoc.ident_start;
|
549
|
-
size_t ident_length = lex_mode->as.heredoc.ident_length;
|
550
|
-
|
547
|
+
pm_parser_err_heredoc_term(pm_parser_t *parser, const uint8_t *ident_start, size_t ident_length) {
|
551
548
|
PM_PARSER_ERR_FORMAT(
|
552
549
|
parser,
|
553
550
|
ident_start,
|
@@ -964,7 +961,7 @@ pm_locals_order(PRISM_ATTRIBUTE_UNUSED pm_parser_t *parser, pm_locals_t *locals,
|
|
964
961
|
if (local->name != PM_CONSTANT_ID_UNSET) {
|
965
962
|
pm_constant_id_list_insert(list, (size_t) local->index, local->name);
|
966
963
|
|
967
|
-
if (warn_unused && local->reads == 0) {
|
964
|
+
if (warn_unused && local->reads == 0 && ((parser->start_line >= 0) || (pm_newline_list_line(&parser->newline_list, local->location.start, parser->start_line) >= 0))) {
|
968
965
|
pm_constant_t *constant = pm_constant_pool_id_to_constant(&parser->constant_pool, local->name);
|
969
966
|
|
970
967
|
if (constant->length >= 1 && *constant->start != '_') {
|
@@ -2110,14 +2107,6 @@ pm_array_node_create(pm_parser_t *parser, const pm_token_t *opening) {
|
|
2110
2107
|
return node;
|
2111
2108
|
}
|
2112
2109
|
|
2113
|
-
/**
|
2114
|
-
* Return the size of the given array node.
|
2115
|
-
*/
|
2116
|
-
static inline size_t
|
2117
|
-
pm_array_node_size(pm_array_node_t *node) {
|
2118
|
-
return node->elements.size;
|
2119
|
-
}
|
2120
|
-
|
2121
2110
|
/**
|
2122
2111
|
* Append an argument to an array node.
|
2123
2112
|
*/
|
@@ -4153,7 +4142,7 @@ pm_double_parse(pm_parser_t *parser, const pm_token_t *token) {
|
|
4153
4142
|
|
4154
4143
|
// If errno is set, then it should only be ERANGE. At this point we need to
|
4155
4144
|
// check if it's infinity (it should be).
|
4156
|
-
if (errno == ERANGE &&
|
4145
|
+
if (errno == ERANGE && PRISM_ISINF(value)) {
|
4157
4146
|
int warn_width;
|
4158
4147
|
const char *ellipsis;
|
4159
4148
|
|
@@ -7695,7 +7684,7 @@ pm_loop_modifier_block_exits(pm_parser_t *parser, pm_statements_node_t *statemen
|
|
7695
7684
|
* Allocate a new UntilNode node.
|
7696
7685
|
*/
|
7697
7686
|
static pm_until_node_t *
|
7698
|
-
pm_until_node_create(pm_parser_t *parser, const pm_token_t *keyword, const pm_token_t *closing, pm_node_t *predicate, pm_statements_node_t *statements, pm_node_flags_t flags) {
|
7687
|
+
pm_until_node_create(pm_parser_t *parser, const pm_token_t *keyword, const pm_token_t *do_keyword, const pm_token_t *closing, pm_node_t *predicate, pm_statements_node_t *statements, pm_node_flags_t flags) {
|
7699
7688
|
pm_until_node_t *node = PM_NODE_ALLOC(parser, pm_until_node_t);
|
7700
7689
|
pm_conditional_predicate(parser, predicate, PM_CONDITIONAL_PREDICATE_TYPE_CONDITIONAL);
|
7701
7690
|
|
@@ -7710,6 +7699,7 @@ pm_until_node_create(pm_parser_t *parser, const pm_token_t *keyword, const pm_to
|
|
7710
7699
|
},
|
7711
7700
|
},
|
7712
7701
|
.keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword),
|
7702
|
+
.do_keyword_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(do_keyword),
|
7713
7703
|
.closing_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(closing),
|
7714
7704
|
.predicate = predicate,
|
7715
7705
|
.statements = statements
|
@@ -7738,6 +7728,7 @@ pm_until_node_modifier_create(pm_parser_t *parser, const pm_token_t *keyword, pm
|
|
7738
7728
|
},
|
7739
7729
|
},
|
7740
7730
|
.keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword),
|
7731
|
+
.do_keyword_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE,
|
7741
7732
|
.closing_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE,
|
7742
7733
|
.predicate = predicate,
|
7743
7734
|
.statements = statements
|
@@ -7805,7 +7796,7 @@ pm_when_node_statements_set(pm_when_node_t *node, pm_statements_node_t *statemen
|
|
7805
7796
|
* Allocate a new WhileNode node.
|
7806
7797
|
*/
|
7807
7798
|
static pm_while_node_t *
|
7808
|
-
pm_while_node_create(pm_parser_t *parser, const pm_token_t *keyword, const pm_token_t *closing, pm_node_t *predicate, pm_statements_node_t *statements, pm_node_flags_t flags) {
|
7799
|
+
pm_while_node_create(pm_parser_t *parser, const pm_token_t *keyword, const pm_token_t *do_keyword, const pm_token_t *closing, pm_node_t *predicate, pm_statements_node_t *statements, pm_node_flags_t flags) {
|
7809
7800
|
pm_while_node_t *node = PM_NODE_ALLOC(parser, pm_while_node_t);
|
7810
7801
|
pm_conditional_predicate(parser, predicate, PM_CONDITIONAL_PREDICATE_TYPE_CONDITIONAL);
|
7811
7802
|
|
@@ -7820,6 +7811,7 @@ pm_while_node_create(pm_parser_t *parser, const pm_token_t *keyword, const pm_to
|
|
7820
7811
|
},
|
7821
7812
|
},
|
7822
7813
|
.keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword),
|
7814
|
+
.do_keyword_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(do_keyword),
|
7823
7815
|
.closing_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(closing),
|
7824
7816
|
.predicate = predicate,
|
7825
7817
|
.statements = statements
|
@@ -7848,6 +7840,7 @@ pm_while_node_modifier_create(pm_parser_t *parser, const pm_token_t *keyword, pm
|
|
7848
7840
|
},
|
7849
7841
|
},
|
7850
7842
|
.keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword),
|
7843
|
+
.do_keyword_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE,
|
7851
7844
|
.closing_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE,
|
7852
7845
|
.predicate = predicate,
|
7853
7846
|
.statements = statements
|
@@ -7870,6 +7863,7 @@ pm_while_node_synthesized_create(pm_parser_t *parser, pm_node_t *predicate, pm_s
|
|
7870
7863
|
.location = PM_LOCATION_NULL_VALUE(parser)
|
7871
7864
|
},
|
7872
7865
|
.keyword_loc = PM_LOCATION_NULL_VALUE(parser),
|
7866
|
+
.do_keyword_loc = PM_LOCATION_NULL_VALUE(parser),
|
7873
7867
|
.closing_loc = PM_LOCATION_NULL_VALUE(parser),
|
7874
7868
|
.predicate = predicate,
|
7875
7869
|
.statements = statements
|
@@ -8573,6 +8567,7 @@ context_terminator(pm_context_t context, pm_token_t *token) {
|
|
8573
8567
|
case PM_CONTEXT_MAIN:
|
8574
8568
|
case PM_CONTEXT_DEF_PARAMS:
|
8575
8569
|
case PM_CONTEXT_DEFINED:
|
8570
|
+
case PM_CONTEXT_MULTI_TARGET:
|
8576
8571
|
case PM_CONTEXT_TERNARY:
|
8577
8572
|
case PM_CONTEXT_RESCUE_MODIFIER:
|
8578
8573
|
return token->type == PM_TOKEN_EOF;
|
@@ -8777,6 +8772,7 @@ context_human(pm_context_t context) {
|
|
8777
8772
|
case PM_CONTEXT_LOOP_PREDICATE: return "loop predicate";
|
8778
8773
|
case PM_CONTEXT_MAIN: return "top level context";
|
8779
8774
|
case PM_CONTEXT_MODULE: return "module definition";
|
8775
|
+
case PM_CONTEXT_MULTI_TARGET: return "multiple targets";
|
8780
8776
|
case PM_CONTEXT_PARENS: return "parentheses";
|
8781
8777
|
case PM_CONTEXT_POSTEXE: return "'END' block";
|
8782
8778
|
case PM_CONTEXT_PREDICATE: return "predicate";
|
@@ -9051,6 +9047,10 @@ lex_global_variable(pm_parser_t *parser) {
|
|
9051
9047
|
return PM_TOKEN_GLOBAL_VARIABLE;
|
9052
9048
|
}
|
9053
9049
|
|
9050
|
+
// True if multiple characters are allowed after the declaration of the
|
9051
|
+
// global variable. Not true when it starts with "$-".
|
9052
|
+
bool allow_multiple = true;
|
9053
|
+
|
9054
9054
|
switch (*parser->current.end) {
|
9055
9055
|
case '~': // $~: match-data
|
9056
9056
|
case '*': // $*: argv
|
@@ -9109,14 +9109,15 @@ lex_global_variable(pm_parser_t *parser) {
|
|
9109
9109
|
|
9110
9110
|
case '-':
|
9111
9111
|
parser->current.end++;
|
9112
|
-
|
9112
|
+
allow_multiple = false;
|
9113
|
+
PRISM_FALLTHROUGH
|
9113
9114
|
default: {
|
9114
9115
|
size_t width;
|
9115
9116
|
|
9116
9117
|
if ((width = char_is_identifier(parser, parser->current.end)) > 0) {
|
9117
9118
|
do {
|
9118
9119
|
parser->current.end += width;
|
9119
|
-
} while (parser->current.end < parser->end && (width = char_is_identifier(parser, parser->current.end)) > 0);
|
9120
|
+
} while (allow_multiple && parser->current.end < parser->end && (width = char_is_identifier(parser, parser->current.end)) > 0);
|
9120
9121
|
} else if (pm_char_is_whitespace(peek(parser))) {
|
9121
9122
|
// If we get here, then we have a $ followed by whitespace,
|
9122
9123
|
// which is not allowed.
|
@@ -9881,6 +9882,10 @@ escape_read(pm_parser_t *parser, pm_buffer_t *buffer, pm_buffer_t *regular_expre
|
|
9881
9882
|
}
|
9882
9883
|
case 'c': {
|
9883
9884
|
parser->current.end++;
|
9885
|
+
if (flags & PM_ESCAPE_FLAG_CONTROL) {
|
9886
|
+
pm_parser_err_current(parser, PM_ERR_ESCAPE_INVALID_CONTROL_REPEAT);
|
9887
|
+
}
|
9888
|
+
|
9884
9889
|
if (parser->current.end == parser->end) {
|
9885
9890
|
pm_parser_err_current(parser, PM_ERR_ESCAPE_INVALID_CONTROL);
|
9886
9891
|
return;
|
@@ -9894,10 +9899,6 @@ escape_read(pm_parser_t *parser, pm_buffer_t *buffer, pm_buffer_t *regular_expre
|
|
9894
9899
|
return;
|
9895
9900
|
}
|
9896
9901
|
case '\\':
|
9897
|
-
if (flags & PM_ESCAPE_FLAG_CONTROL) {
|
9898
|
-
pm_parser_err_current(parser, PM_ERR_ESCAPE_INVALID_CONTROL_REPEAT);
|
9899
|
-
return;
|
9900
|
-
}
|
9901
9902
|
parser->current.end++;
|
9902
9903
|
|
9903
9904
|
if (match(parser, 'u') || match(parser, 'U')) {
|
@@ -9931,6 +9932,10 @@ escape_read(pm_parser_t *parser, pm_buffer_t *buffer, pm_buffer_t *regular_expre
|
|
9931
9932
|
}
|
9932
9933
|
case 'C': {
|
9933
9934
|
parser->current.end++;
|
9935
|
+
if (flags & PM_ESCAPE_FLAG_CONTROL) {
|
9936
|
+
pm_parser_err_current(parser, PM_ERR_ESCAPE_INVALID_CONTROL_REPEAT);
|
9937
|
+
}
|
9938
|
+
|
9934
9939
|
if (peek(parser) != '-') {
|
9935
9940
|
size_t width = parser->encoding->char_width(parser->current.end, parser->end - parser->current.end);
|
9936
9941
|
pm_parser_err(parser, parser->current.start, parser->current.end + width, PM_ERR_ESCAPE_INVALID_CONTROL);
|
@@ -9951,10 +9956,6 @@ escape_read(pm_parser_t *parser, pm_buffer_t *buffer, pm_buffer_t *regular_expre
|
|
9951
9956
|
return;
|
9952
9957
|
}
|
9953
9958
|
case '\\':
|
9954
|
-
if (flags & PM_ESCAPE_FLAG_CONTROL) {
|
9955
|
-
pm_parser_err_current(parser, PM_ERR_ESCAPE_INVALID_CONTROL_REPEAT);
|
9956
|
-
return;
|
9957
|
-
}
|
9958
9959
|
parser->current.end++;
|
9959
9960
|
|
9960
9961
|
if (match(parser, 'u') || match(parser, 'U')) {
|
@@ -9989,6 +9990,10 @@ escape_read(pm_parser_t *parser, pm_buffer_t *buffer, pm_buffer_t *regular_expre
|
|
9989
9990
|
}
|
9990
9991
|
case 'M': {
|
9991
9992
|
parser->current.end++;
|
9993
|
+
if (flags & PM_ESCAPE_FLAG_META) {
|
9994
|
+
pm_parser_err_current(parser, PM_ERR_ESCAPE_INVALID_META_REPEAT);
|
9995
|
+
}
|
9996
|
+
|
9992
9997
|
if (peek(parser) != '-') {
|
9993
9998
|
size_t width = parser->encoding->char_width(parser->current.end, parser->end - parser->current.end);
|
9994
9999
|
pm_parser_err(parser, parser->current.start, parser->current.end + width, PM_ERR_ESCAPE_INVALID_META);
|
@@ -10004,10 +10009,6 @@ escape_read(pm_parser_t *parser, pm_buffer_t *buffer, pm_buffer_t *regular_expre
|
|
10004
10009
|
uint8_t peeked = peek(parser);
|
10005
10010
|
switch (peeked) {
|
10006
10011
|
case '\\':
|
10007
|
-
if (flags & PM_ESCAPE_FLAG_META) {
|
10008
|
-
pm_parser_err_current(parser, PM_ERR_ESCAPE_INVALID_META_REPEAT);
|
10009
|
-
return;
|
10010
|
-
}
|
10011
10012
|
parser->current.end++;
|
10012
10013
|
|
10013
10014
|
if (match(parser, 'u') || match(parser, 'U')) {
|
@@ -10045,11 +10046,13 @@ escape_read(pm_parser_t *parser, pm_buffer_t *buffer, pm_buffer_t *regular_expre
|
|
10045
10046
|
escape_write_byte_encoded(parser, buffer, escape_byte('\n', flags));
|
10046
10047
|
return;
|
10047
10048
|
}
|
10049
|
+
PRISM_FALLTHROUGH
|
10048
10050
|
}
|
10049
|
-
/* fallthrough */
|
10050
10051
|
default: {
|
10051
10052
|
if (parser->current.end < parser->end) {
|
10052
10053
|
escape_write_escape_encoded(parser, buffer);
|
10054
|
+
} else {
|
10055
|
+
pm_parser_err_current(parser, PM_ERR_INVALID_ESCAPE_CHARACTER);
|
10053
10056
|
}
|
10054
10057
|
return;
|
10055
10058
|
}
|
@@ -10498,6 +10501,7 @@ pm_token_buffer_escape(pm_parser_t *parser, pm_token_buffer_t *token_buffer) {
|
|
10498
10501
|
}
|
10499
10502
|
|
10500
10503
|
const uint8_t *end = parser->current.end - 1;
|
10504
|
+
assert(end >= start);
|
10501
10505
|
pm_buffer_append_bytes(&token_buffer->buffer, start, (size_t) (end - start));
|
10502
10506
|
|
10503
10507
|
token_buffer->cursor = end;
|
@@ -10578,9 +10582,15 @@ pm_lex_percent_delimiter(pm_parser_t *parser) {
|
|
10578
10582
|
pm_newline_list_append(&parser->newline_list, parser->current.end + eol_length - 1);
|
10579
10583
|
}
|
10580
10584
|
|
10581
|
-
|
10582
|
-
parser->current.end += eol_length;
|
10585
|
+
uint8_t delimiter = *parser->current.end;
|
10583
10586
|
|
10587
|
+
// If our delimiter is \r\n, we want to treat it as if it's \n.
|
10588
|
+
// For example, %\r\nfoo\r\n should be "foo"
|
10589
|
+
if (eol_length == 2) {
|
10590
|
+
delimiter = *(parser->current.end + 1);
|
10591
|
+
}
|
10592
|
+
|
10593
|
+
parser->current.end += eol_length;
|
10584
10594
|
return delimiter;
|
10585
10595
|
}
|
10586
10596
|
|
@@ -10690,6 +10700,14 @@ parser_lex(pm_parser_t *parser) {
|
|
10690
10700
|
// We'll check if we're at the end of the file. If we are, then we
|
10691
10701
|
// need to return the EOF token.
|
10692
10702
|
if (parser->current.end >= parser->end) {
|
10703
|
+
// If we hit EOF, but the EOF came immediately after a newline,
|
10704
|
+
// set the start of the token to the newline. This way any EOF
|
10705
|
+
// errors will be reported as happening on that line rather than
|
10706
|
+
// a line after. For example "foo(\n" should report an error
|
10707
|
+
// on line 1 even though EOF technically occurs on line 2.
|
10708
|
+
if (parser->current.start > parser->start && (*(parser->current.start - 1) == '\n')) {
|
10709
|
+
parser->current.start -= 1;
|
10710
|
+
}
|
10693
10711
|
LEX(PM_TOKEN_EOF);
|
10694
10712
|
}
|
10695
10713
|
|
@@ -10732,7 +10750,7 @@ parser_lex(pm_parser_t *parser) {
|
|
10732
10750
|
|
10733
10751
|
lexed_comment = true;
|
10734
10752
|
}
|
10735
|
-
|
10753
|
+
PRISM_FALLTHROUGH
|
10736
10754
|
case '\r':
|
10737
10755
|
case '\n': {
|
10738
10756
|
parser->semantic_token_seen = semantic_token_seen & 0x1;
|
@@ -10774,7 +10792,7 @@ parser_lex(pm_parser_t *parser) {
|
|
10774
10792
|
parser->current.type = PM_TOKEN_NEWLINE;
|
10775
10793
|
return;
|
10776
10794
|
}
|
10777
|
-
|
10795
|
+
PRISM_FALLTHROUGH
|
10778
10796
|
case PM_IGNORED_NEWLINE_ALL:
|
10779
10797
|
if (!lexed_comment) parser_lex_ignored_newline(parser);
|
10780
10798
|
lexed_comment = false;
|
@@ -10871,6 +10889,10 @@ parser_lex(pm_parser_t *parser) {
|
|
10871
10889
|
|
10872
10890
|
// ,
|
10873
10891
|
case ',':
|
10892
|
+
if ((parser->previous.type == PM_TOKEN_COMMA) && (parser->enclosure_nesting > 0)) {
|
10893
|
+
PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_ARRAY_TERM, pm_token_type_human(parser->current.type));
|
10894
|
+
}
|
10895
|
+
|
10874
10896
|
lex_state_set(parser, PM_LEX_STATE_BEG | PM_LEX_STATE_LABEL);
|
10875
10897
|
LEX(PM_TOKEN_COMMA);
|
10876
10898
|
|
@@ -11153,12 +11175,14 @@ parser_lex(pm_parser_t *parser) {
|
|
11153
11175
|
lex_mode_push(parser, (pm_lex_mode_t) {
|
11154
11176
|
.mode = PM_LEX_HEREDOC,
|
11155
11177
|
.as.heredoc = {
|
11156
|
-
.
|
11157
|
-
|
11178
|
+
.base = {
|
11179
|
+
.ident_start = ident_start,
|
11180
|
+
.ident_length = ident_length,
|
11181
|
+
.quote = quote,
|
11182
|
+
.indent = indent
|
11183
|
+
},
|
11158
11184
|
.next_start = parser->current.end,
|
11159
|
-
.
|
11160
|
-
.indent = indent,
|
11161
|
-
.common_whitespace = (size_t) -1,
|
11185
|
+
.common_whitespace = NULL,
|
11162
11186
|
.line_continuation = false
|
11163
11187
|
}
|
11164
11188
|
});
|
@@ -11171,7 +11195,7 @@ parser_lex(pm_parser_t *parser) {
|
|
11171
11195
|
// this is not a valid heredoc declaration. In this case we
|
11172
11196
|
// will add an error, but we will still return a heredoc
|
11173
11197
|
// start.
|
11174
|
-
if (!ident_error) pm_parser_err_heredoc_term(parser,
|
11198
|
+
if (!ident_error) pm_parser_err_heredoc_term(parser, ident_start, ident_length);
|
11175
11199
|
body_start = parser->end;
|
11176
11200
|
} else {
|
11177
11201
|
// Otherwise, we want to indicate that the body of the
|
@@ -11783,7 +11807,7 @@ parser_lex(pm_parser_t *parser) {
|
|
11783
11807
|
PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_UNEXPECTED_TOKEN_IGNORE, "escaped carriage return");
|
11784
11808
|
break;
|
11785
11809
|
}
|
11786
|
-
|
11810
|
+
PRISM_FALLTHROUGH
|
11787
11811
|
default:
|
11788
11812
|
PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_UNEXPECTED_TOKEN_IGNORE, "backslash");
|
11789
11813
|
break;
|
@@ -11980,7 +12004,7 @@ parser_lex(pm_parser_t *parser) {
|
|
11980
12004
|
pm_token_buffer_push_byte(&token_buffer, '\r');
|
11981
12005
|
break;
|
11982
12006
|
}
|
11983
|
-
|
12007
|
+
PRISM_FALLTHROUGH
|
11984
12008
|
case '\n':
|
11985
12009
|
pm_token_buffer_push_byte(&token_buffer, '\n');
|
11986
12010
|
|
@@ -12084,9 +12108,28 @@ parser_lex(pm_parser_t *parser) {
|
|
12084
12108
|
pm_regexp_token_buffer_t token_buffer = { 0 };
|
12085
12109
|
|
12086
12110
|
while (breakpoint != NULL) {
|
12111
|
+
uint8_t term = lex_mode->as.regexp.terminator;
|
12112
|
+
bool is_terminator = (*breakpoint == term);
|
12113
|
+
|
12114
|
+
// If the terminator is newline, we need to consider \r\n _also_ a newline
|
12115
|
+
// For example: `%\nfoo\r\n`
|
12116
|
+
// The string should be "foo", not "foo\r"
|
12117
|
+
if (*breakpoint == '\r' && peek_at(parser, breakpoint + 1) == '\n') {
|
12118
|
+
if (term == '\n') {
|
12119
|
+
is_terminator = true;
|
12120
|
+
}
|
12121
|
+
|
12122
|
+
// If the terminator is a CR, but we see a CRLF, we need to
|
12123
|
+
// treat the CRLF as a newline, meaning this is _not_ the
|
12124
|
+
// terminator
|
12125
|
+
if (term == '\r') {
|
12126
|
+
is_terminator = false;
|
12127
|
+
}
|
12128
|
+
}
|
12129
|
+
|
12087
12130
|
// If we hit the terminator, we need to determine what kind of
|
12088
12131
|
// token to return.
|
12089
|
-
if (
|
12132
|
+
if (is_terminator) {
|
12090
12133
|
if (lex_mode->as.regexp.nesting > 0) {
|
12091
12134
|
parser->current.end = breakpoint + 1;
|
12092
12135
|
breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, false);
|
@@ -12148,7 +12191,7 @@ parser_lex(pm_parser_t *parser) {
|
|
12148
12191
|
pm_regexp_token_buffer_escape(parser, &token_buffer);
|
12149
12192
|
token_buffer.base.cursor = breakpoint;
|
12150
12193
|
|
12151
|
-
|
12194
|
+
PRISM_FALLTHROUGH
|
12152
12195
|
case '\n':
|
12153
12196
|
// If we've hit a newline, then we need to track that in
|
12154
12197
|
// the list of newlines.
|
@@ -12190,7 +12233,7 @@ parser_lex(pm_parser_t *parser) {
|
|
12190
12233
|
pm_token_buffer_push_byte(&token_buffer.base, '\r');
|
12191
12234
|
break;
|
12192
12235
|
}
|
12193
|
-
|
12236
|
+
PRISM_FALLTHROUGH
|
12194
12237
|
case '\n':
|
12195
12238
|
if (parser->heredoc_end) {
|
12196
12239
|
// ... if we are on the same line as a heredoc,
|
@@ -12316,10 +12359,29 @@ parser_lex(pm_parser_t *parser) {
|
|
12316
12359
|
continue;
|
12317
12360
|
}
|
12318
12361
|
|
12362
|
+
uint8_t term = lex_mode->as.string.terminator;
|
12363
|
+
bool is_terminator = (*breakpoint == term);
|
12364
|
+
|
12365
|
+
// If the terminator is newline, we need to consider \r\n _also_ a newline
|
12366
|
+
// For example: `%r\nfoo\r\n`
|
12367
|
+
// The string should be /foo/, not /foo\r/
|
12368
|
+
if (*breakpoint == '\r' && peek_at(parser, breakpoint + 1) == '\n') {
|
12369
|
+
if (term == '\n') {
|
12370
|
+
is_terminator = true;
|
12371
|
+
}
|
12372
|
+
|
12373
|
+
// If the terminator is a CR, but we see a CRLF, we need to
|
12374
|
+
// treat the CRLF as a newline, meaning this is _not_ the
|
12375
|
+
// terminator
|
12376
|
+
if (term == '\r') {
|
12377
|
+
is_terminator = false;
|
12378
|
+
}
|
12379
|
+
}
|
12380
|
+
|
12319
12381
|
// Note that we have to check the terminator here first because we could
|
12320
12382
|
// potentially be parsing a % string that has a # character as the
|
12321
12383
|
// terminator.
|
12322
|
-
if (
|
12384
|
+
if (is_terminator) {
|
12323
12385
|
// If this terminator doesn't actually close the string, then we need
|
12324
12386
|
// to continue on past it.
|
12325
12387
|
if (lex_mode->as.string.nesting > 0) {
|
@@ -12379,7 +12441,7 @@ parser_lex(pm_parser_t *parser) {
|
|
12379
12441
|
pm_token_buffer_escape(parser, &token_buffer);
|
12380
12442
|
token_buffer.cursor = breakpoint;
|
12381
12443
|
|
12382
|
-
|
12444
|
+
PRISM_FALLTHROUGH
|
12383
12445
|
case '\n':
|
12384
12446
|
// When we hit a newline, we need to flush any potential
|
12385
12447
|
// heredocs. Note that this has to happen after we check
|
@@ -12424,7 +12486,7 @@ parser_lex(pm_parser_t *parser) {
|
|
12424
12486
|
pm_token_buffer_push_byte(&token_buffer, '\r');
|
12425
12487
|
break;
|
12426
12488
|
}
|
12427
|
-
|
12489
|
+
PRISM_FALLTHROUGH
|
12428
12490
|
case '\n':
|
12429
12491
|
if (!lex_mode->as.string.interpolation) {
|
12430
12492
|
pm_token_buffer_push_byte(&token_buffer, '\\');
|
@@ -12514,6 +12576,7 @@ parser_lex(pm_parser_t *parser) {
|
|
12514
12576
|
// Now let's grab the information about the identifier off of the
|
12515
12577
|
// current lex mode.
|
12516
12578
|
pm_lex_mode_t *lex_mode = parser->lex_modes.current;
|
12579
|
+
pm_heredoc_lex_mode_t *heredoc_lex_mode = &lex_mode->as.heredoc.base;
|
12517
12580
|
|
12518
12581
|
bool line_continuation = lex_mode->as.heredoc.line_continuation;
|
12519
12582
|
lex_mode->as.heredoc.line_continuation = false;
|
@@ -12523,15 +12586,16 @@ parser_lex(pm_parser_t *parser) {
|
|
12523
12586
|
// terminator) but still continue parsing so that content after the
|
12524
12587
|
// declaration of the heredoc can be parsed.
|
12525
12588
|
if (parser->current.end >= parser->end) {
|
12526
|
-
pm_parser_err_heredoc_term(parser,
|
12589
|
+
pm_parser_err_heredoc_term(parser, heredoc_lex_mode->ident_start, heredoc_lex_mode->ident_length);
|
12527
12590
|
parser->next_start = lex_mode->as.heredoc.next_start;
|
12528
12591
|
parser->heredoc_end = parser->current.end;
|
12529
12592
|
lex_state_set(parser, PM_LEX_STATE_END);
|
12593
|
+
lex_mode_pop(parser);
|
12530
12594
|
LEX(PM_TOKEN_HEREDOC_END);
|
12531
12595
|
}
|
12532
12596
|
|
12533
|
-
const uint8_t *ident_start =
|
12534
|
-
size_t ident_length =
|
12597
|
+
const uint8_t *ident_start = heredoc_lex_mode->ident_start;
|
12598
|
+
size_t ident_length = heredoc_lex_mode->ident_length;
|
12535
12599
|
|
12536
12600
|
// If we are immediately following a newline and we have hit the
|
12537
12601
|
// terminator, then we need to return the ending of the heredoc.
|
@@ -12556,10 +12620,7 @@ parser_lex(pm_parser_t *parser) {
|
|
12556
12620
|
const uint8_t *terminator_start = ident_end - ident_length;
|
12557
12621
|
const uint8_t *cursor = start;
|
12558
12622
|
|
12559
|
-
if (
|
12560
|
-
lex_mode->as.heredoc.indent == PM_HEREDOC_INDENT_DASH ||
|
12561
|
-
lex_mode->as.heredoc.indent == PM_HEREDOC_INDENT_TILDE
|
12562
|
-
) {
|
12623
|
+
if (heredoc_lex_mode->indent == PM_HEREDOC_INDENT_DASH || heredoc_lex_mode->indent == PM_HEREDOC_INDENT_TILDE) {
|
12563
12624
|
while (cursor < terminator_start && pm_char_is_inline_whitespace(*cursor)) {
|
12564
12625
|
cursor++;
|
12565
12626
|
}
|
@@ -12582,17 +12643,19 @@ parser_lex(pm_parser_t *parser) {
|
|
12582
12643
|
}
|
12583
12644
|
|
12584
12645
|
lex_state_set(parser, PM_LEX_STATE_END);
|
12646
|
+
lex_mode_pop(parser);
|
12585
12647
|
LEX(PM_TOKEN_HEREDOC_END);
|
12586
12648
|
}
|
12587
12649
|
}
|
12588
12650
|
|
12589
|
-
size_t whitespace = pm_heredoc_strspn_inline_whitespace(parser, &start,
|
12651
|
+
size_t whitespace = pm_heredoc_strspn_inline_whitespace(parser, &start, heredoc_lex_mode->indent);
|
12590
12652
|
if (
|
12591
|
-
|
12592
|
-
|
12653
|
+
heredoc_lex_mode->indent == PM_HEREDOC_INDENT_TILDE &&
|
12654
|
+
lex_mode->as.heredoc.common_whitespace != NULL &&
|
12655
|
+
(*lex_mode->as.heredoc.common_whitespace > whitespace) &&
|
12593
12656
|
peek_at(parser, start) != '\n'
|
12594
12657
|
) {
|
12595
|
-
lex_mode->as.heredoc.common_whitespace = whitespace;
|
12658
|
+
*lex_mode->as.heredoc.common_whitespace = whitespace;
|
12596
12659
|
}
|
12597
12660
|
}
|
12598
12661
|
|
@@ -12601,7 +12664,7 @@ parser_lex(pm_parser_t *parser) {
|
|
12601
12664
|
// strpbrk to find the first of these characters.
|
12602
12665
|
uint8_t breakpoints[] = "\r\n\\#";
|
12603
12666
|
|
12604
|
-
pm_heredoc_quote_t quote =
|
12667
|
+
pm_heredoc_quote_t quote = heredoc_lex_mode->quote;
|
12605
12668
|
if (quote == PM_HEREDOC_QUOTE_SINGLE) {
|
12606
12669
|
breakpoints[3] = '\0';
|
12607
12670
|
}
|
@@ -12631,7 +12694,7 @@ parser_lex(pm_parser_t *parser) {
|
|
12631
12694
|
pm_token_buffer_escape(parser, &token_buffer);
|
12632
12695
|
token_buffer.cursor = breakpoint;
|
12633
12696
|
|
12634
|
-
|
12697
|
+
PRISM_FALLTHROUGH
|
12635
12698
|
case '\n': {
|
12636
12699
|
if (parser->heredoc_end != NULL && (parser->heredoc_end > breakpoint)) {
|
12637
12700
|
parser_flush_heredoc_end(parser);
|
@@ -12664,8 +12727,7 @@ parser_lex(pm_parser_t *parser) {
|
|
12664
12727
|
// leading whitespace if we have a - or ~ heredoc.
|
12665
12728
|
const uint8_t *cursor = start;
|
12666
12729
|
|
12667
|
-
if (
|
12668
|
-
lex_mode->as.heredoc.indent == PM_HEREDOC_INDENT_TILDE) {
|
12730
|
+
if (heredoc_lex_mode->indent == PM_HEREDOC_INDENT_DASH || heredoc_lex_mode->indent == PM_HEREDOC_INDENT_TILDE) {
|
12669
12731
|
while (cursor < terminator_start && pm_char_is_inline_whitespace(*cursor)) {
|
12670
12732
|
cursor++;
|
12671
12733
|
}
|
@@ -12681,16 +12743,16 @@ parser_lex(pm_parser_t *parser) {
|
|
12681
12743
|
}
|
12682
12744
|
}
|
12683
12745
|
|
12684
|
-
size_t whitespace = pm_heredoc_strspn_inline_whitespace(parser, &start, lex_mode->as.heredoc.indent);
|
12746
|
+
size_t whitespace = pm_heredoc_strspn_inline_whitespace(parser, &start, lex_mode->as.heredoc.base.indent);
|
12685
12747
|
|
12686
12748
|
// If we have hit a newline that is followed by a valid
|
12687
12749
|
// terminator, then we need to return the content of the
|
12688
12750
|
// heredoc here as string content. Then, the next time a
|
12689
12751
|
// token is lexed, it will match again and return the
|
12690
12752
|
// end of the heredoc.
|
12691
|
-
if (lex_mode->as.heredoc.indent == PM_HEREDOC_INDENT_TILDE) {
|
12692
|
-
if ((lex_mode->as.heredoc.common_whitespace > whitespace) && peek_at(parser, start) != '\n') {
|
12693
|
-
lex_mode->as.heredoc.common_whitespace = whitespace;
|
12753
|
+
if (lex_mode->as.heredoc.base.indent == PM_HEREDOC_INDENT_TILDE) {
|
12754
|
+
if ((lex_mode->as.heredoc.common_whitespace != NULL) && (*lex_mode->as.heredoc.common_whitespace > whitespace) && peek_at(parser, start) != '\n') {
|
12755
|
+
*lex_mode->as.heredoc.common_whitespace = whitespace;
|
12694
12756
|
}
|
12695
12757
|
|
12696
12758
|
parser->current.end = breakpoint + 1;
|
@@ -12732,7 +12794,7 @@ parser_lex(pm_parser_t *parser) {
|
|
12732
12794
|
pm_token_buffer_push_byte(&token_buffer, '\r');
|
12733
12795
|
break;
|
12734
12796
|
}
|
12735
|
-
|
12797
|
+
PRISM_FALLTHROUGH
|
12736
12798
|
case '\n':
|
12737
12799
|
pm_token_buffer_push_byte(&token_buffer, '\\');
|
12738
12800
|
pm_token_buffer_push_byte(&token_buffer, '\n');
|
@@ -12752,12 +12814,12 @@ parser_lex(pm_parser_t *parser) {
|
|
12752
12814
|
pm_token_buffer_push_byte(&token_buffer, '\r');
|
12753
12815
|
break;
|
12754
12816
|
}
|
12755
|
-
|
12817
|
+
PRISM_FALLTHROUGH
|
12756
12818
|
case '\n':
|
12757
12819
|
// If we are in a tilde here, we should
|
12758
12820
|
// break out of the loop and return the
|
12759
12821
|
// string content.
|
12760
|
-
if (
|
12822
|
+
if (heredoc_lex_mode->indent == PM_HEREDOC_INDENT_TILDE) {
|
12761
12823
|
const uint8_t *end = parser->current.end;
|
12762
12824
|
pm_newline_list_append(&parser->newline_list, end);
|
12763
12825
|
|
@@ -12983,7 +13045,7 @@ pm_binding_powers_t pm_binding_powers[PM_TOKEN_MAXIMUM] = {
|
|
12983
13045
|
[PM_TOKEN_PERCENT] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_FACTOR),
|
12984
13046
|
[PM_TOKEN_SLASH] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_FACTOR),
|
12985
13047
|
[PM_TOKEN_STAR] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_FACTOR),
|
12986
|
-
[PM_TOKEN_USTAR] =
|
13048
|
+
[PM_TOKEN_USTAR] = RIGHT_ASSOCIATIVE_UNARY(PM_BINDING_POWER_FACTOR),
|
12987
13049
|
|
12988
13050
|
// -@
|
12989
13051
|
[PM_TOKEN_UMINUS] = RIGHT_ASSOCIATIVE_UNARY(PM_BINDING_POWER_UMINUS),
|
@@ -13044,14 +13106,6 @@ match4(const pm_parser_t *parser, pm_token_type_t type1, pm_token_type_t type2,
|
|
13044
13106
|
return match1(parser, type1) || match1(parser, type2) || match1(parser, type3) || match1(parser, type4);
|
13045
13107
|
}
|
13046
13108
|
|
13047
|
-
/**
|
13048
|
-
* Returns true if the current token is any of the six given types.
|
13049
|
-
*/
|
13050
|
-
static inline bool
|
13051
|
-
match6(const pm_parser_t *parser, pm_token_type_t type1, pm_token_type_t type2, pm_token_type_t type3, pm_token_type_t type4, pm_token_type_t type5, pm_token_type_t type6) {
|
13052
|
-
return match1(parser, type1) || match1(parser, type2) || match1(parser, type3) || match1(parser, type4) || match1(parser, type5) || match1(parser, type6);
|
13053
|
-
}
|
13054
|
-
|
13055
13109
|
/**
|
13056
13110
|
* Returns true if the current token is any of the seven given types.
|
13057
13111
|
*/
|
@@ -13068,6 +13122,14 @@ match8(const pm_parser_t *parser, pm_token_type_t type1, pm_token_type_t type2,
|
|
13068
13122
|
return match1(parser, type1) || match1(parser, type2) || match1(parser, type3) || match1(parser, type4) || match1(parser, type5) || match1(parser, type6) || match1(parser, type7) || match1(parser, type8);
|
13069
13123
|
}
|
13070
13124
|
|
13125
|
+
/**
|
13126
|
+
* Returns true if the current token is any of the nine given types.
|
13127
|
+
*/
|
13128
|
+
static inline bool
|
13129
|
+
match9(const pm_parser_t *parser, pm_token_type_t type1, pm_token_type_t type2, pm_token_type_t type3, pm_token_type_t type4, pm_token_type_t type5, pm_token_type_t type6, pm_token_type_t type7, pm_token_type_t type8, pm_token_type_t type9) {
|
13130
|
+
return match1(parser, type1) || match1(parser, type2) || match1(parser, type3) || match1(parser, type4) || match1(parser, type5) || match1(parser, type6) || match1(parser, type7) || match1(parser, type8) || match1(parser, type9);
|
13131
|
+
}
|
13132
|
+
|
13071
13133
|
/**
|
13072
13134
|
* If the current token is of the specified type, lex forward by one token and
|
13073
13135
|
* return true. Otherwise, return false. For example:
|
@@ -13096,19 +13158,6 @@ accept2(pm_parser_t *parser, pm_token_type_t type1, pm_token_type_t type2) {
|
|
13096
13158
|
return false;
|
13097
13159
|
}
|
13098
13160
|
|
13099
|
-
/**
|
13100
|
-
* If the current token is any of the three given types, lex forward by one
|
13101
|
-
* token and return true. Otherwise return false.
|
13102
|
-
*/
|
13103
|
-
static inline bool
|
13104
|
-
accept3(pm_parser_t *parser, pm_token_type_t type1, pm_token_type_t type2, pm_token_type_t type3) {
|
13105
|
-
if (match3(parser, type1, type2, type3)) {
|
13106
|
-
parser_lex(parser);
|
13107
|
-
return true;
|
13108
|
-
}
|
13109
|
-
return false;
|
13110
|
-
}
|
13111
|
-
|
13112
13161
|
/**
|
13113
13162
|
* This function indicates that the parser expects a token in a specific
|
13114
13163
|
* position. For example, if you're parsing a BEGIN block, you know that a { is
|
@@ -13146,32 +13195,16 @@ expect2(pm_parser_t *parser, pm_token_type_t type1, pm_token_type_t type2, pm_di
|
|
13146
13195
|
parser->previous.type = PM_TOKEN_MISSING;
|
13147
13196
|
}
|
13148
13197
|
|
13149
|
-
/**
|
13150
|
-
* This function is the same as expect2, but it expects one of three token types.
|
13151
|
-
*/
|
13152
|
-
static void
|
13153
|
-
expect3(pm_parser_t *parser, pm_token_type_t type1, pm_token_type_t type2, pm_token_type_t type3, pm_diagnostic_id_t diag_id) {
|
13154
|
-
if (accept3(parser, type1, type2, type3)) return;
|
13155
|
-
|
13156
|
-
const uint8_t *location = parser->previous.end;
|
13157
|
-
pm_parser_err(parser, location, location, diag_id);
|
13158
|
-
|
13159
|
-
parser->previous.start = location;
|
13160
|
-
parser->previous.type = PM_TOKEN_MISSING;
|
13161
|
-
}
|
13162
|
-
|
13163
13198
|
/**
|
13164
13199
|
* A special expect1 that expects a heredoc terminator and handles popping the
|
13165
13200
|
* lex mode accordingly.
|
13166
13201
|
*/
|
13167
13202
|
static void
|
13168
|
-
expect1_heredoc_term(pm_parser_t *parser,
|
13203
|
+
expect1_heredoc_term(pm_parser_t *parser, const uint8_t *ident_start, size_t ident_length) {
|
13169
13204
|
if (match1(parser, PM_TOKEN_HEREDOC_END)) {
|
13170
|
-
lex_mode_pop(parser);
|
13171
13205
|
parser_lex(parser);
|
13172
13206
|
} else {
|
13173
|
-
pm_parser_err_heredoc_term(parser,
|
13174
|
-
lex_mode_pop(parser);
|
13207
|
+
pm_parser_err_heredoc_term(parser, ident_start, ident_length);
|
13175
13208
|
parser->previous.start = parser->previous.end;
|
13176
13209
|
parser->previous.type = PM_TOKEN_MISSING;
|
13177
13210
|
}
|
@@ -13503,7 +13536,7 @@ parse_target(pm_parser_t *parser, pm_node_t *target, bool multiple, bool splat_p
|
|
13503
13536
|
return (pm_node_t *) pm_index_target_node_create(parser, call);
|
13504
13537
|
}
|
13505
13538
|
}
|
13506
|
-
|
13539
|
+
PRISM_FALLTHROUGH
|
13507
13540
|
default:
|
13508
13541
|
// In this case we have a node that we don't know how to convert
|
13509
13542
|
// into a target. We need to treat it as an error. For now, we'll
|
@@ -13585,7 +13618,7 @@ parse_write(pm_parser_t *parser, pm_node_t *target, pm_token_t *operator, pm_nod
|
|
13585
13618
|
case PM_BACK_REFERENCE_READ_NODE:
|
13586
13619
|
case PM_NUMBERED_REFERENCE_READ_NODE:
|
13587
13620
|
PM_PARSER_ERR_NODE_FORMAT_CONTENT(parser, target, PM_ERR_WRITE_TARGET_READONLY);
|
13588
|
-
|
13621
|
+
PRISM_FALLTHROUGH
|
13589
13622
|
case PM_GLOBAL_VARIABLE_READ_NODE: {
|
13590
13623
|
pm_global_variable_write_node_t *node = pm_global_variable_write_node_create(parser, target, operator, value);
|
13591
13624
|
pm_node_destroy(parser, target);
|
@@ -13712,6 +13745,9 @@ parse_write(pm_parser_t *parser, pm_node_t *target, pm_token_t *operator, pm_nod
|
|
13712
13745
|
|
13713
13746
|
// Replace the name with "[]=".
|
13714
13747
|
call->name = pm_parser_constant_id_constant(parser, "[]=", 3);
|
13748
|
+
|
13749
|
+
// Ensure that the arguments for []= don't contain keywords
|
13750
|
+
pm_index_arguments_check(parser, call->arguments, call->block);
|
13715
13751
|
pm_node_flag_set((pm_node_t *) call, PM_CALL_NODE_FLAGS_ATTRIBUTE_WRITE | pm_implicit_array_write_flags(value, PM_CALL_NODE_FLAGS_IMPLICIT_ARRAY));
|
13716
13752
|
|
13717
13753
|
return target;
|
@@ -13724,7 +13760,7 @@ parse_write(pm_parser_t *parser, pm_node_t *target, pm_token_t *operator, pm_nod
|
|
13724
13760
|
// is no way for us to attach it to the tree at this point.
|
13725
13761
|
pm_node_destroy(parser, value);
|
13726
13762
|
}
|
13727
|
-
|
13763
|
+
PRISM_FALLTHROUGH
|
13728
13764
|
default:
|
13729
13765
|
// In this case we have a node that we don't know how to convert into a
|
13730
13766
|
// target. We need to treat it as an error. For now, we'll mark it as an
|
@@ -13797,6 +13833,13 @@ parse_targets(pm_parser_t *parser, pm_node_t *first_target, pm_binding_power_t b
|
|
13797
13833
|
pm_node_t *splat = (pm_node_t *) pm_splat_node_create(parser, &star_operator, name);
|
13798
13834
|
pm_multi_target_node_targets_append(parser, result, splat);
|
13799
13835
|
has_rest = true;
|
13836
|
+
} else if (match1(parser, PM_TOKEN_PARENTHESIS_LEFT)) {
|
13837
|
+
context_push(parser, PM_CONTEXT_MULTI_TARGET);
|
13838
|
+
pm_node_t *target = parse_expression(parser, binding_power, false, false, PM_ERR_EXPECT_EXPRESSION_AFTER_COMMA, (uint16_t) (depth + 1));
|
13839
|
+
target = parse_target(parser, target, true, false);
|
13840
|
+
|
13841
|
+
pm_multi_target_node_targets_append(parser, result, target);
|
13842
|
+
context_pop(parser);
|
13800
13843
|
} else if (token_begins_expression_p(parser->current.type)) {
|
13801
13844
|
pm_node_t *target = parse_expression(parser, binding_power, false, false, PM_ERR_EXPECT_EXPRESSION_AFTER_COMMA, (uint16_t) (depth + 1));
|
13802
13845
|
target = parse_target(parser, target, true, false);
|
@@ -14108,8 +14151,8 @@ static void
|
|
14108
14151
|
parse_arguments(pm_parser_t *parser, pm_arguments_t *arguments, bool accepts_forwarding, pm_token_type_t terminator, uint16_t depth) {
|
14109
14152
|
pm_binding_power_t binding_power = pm_binding_powers[parser->current.type].left;
|
14110
14153
|
|
14111
|
-
// First we need to check if the next token is one that could be the start
|
14112
|
-
// an argument. If it's not, then we can just return.
|
14154
|
+
// First we need to check if the next token is one that could be the start
|
14155
|
+
// of an argument. If it's not, then we can just return.
|
14113
14156
|
if (
|
14114
14157
|
match2(parser, terminator, PM_TOKEN_EOF) ||
|
14115
14158
|
(binding_power != PM_BINDING_POWER_UNSET && binding_power < PM_BINDING_POWER_RANGE) ||
|
@@ -14186,6 +14229,9 @@ parse_arguments(pm_parser_t *parser, pm_arguments_t *arguments, bool accepts_for
|
|
14186
14229
|
if (match4(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_TOKEN_COMMA, PM_TOKEN_SEMICOLON, PM_TOKEN_BRACKET_RIGHT)) {
|
14187
14230
|
pm_parser_scope_forwarding_positionals_check(parser, &operator);
|
14188
14231
|
argument = (pm_node_t *) pm_splat_node_create(parser, &operator, NULL);
|
14232
|
+
if (parsed_bare_hash) {
|
14233
|
+
pm_parser_err_previous(parser, PM_ERR_ARGUMENT_SPLAT_AFTER_ASSOC_SPLAT);
|
14234
|
+
}
|
14189
14235
|
} else {
|
14190
14236
|
pm_node_t *expression = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, false, false, PM_ERR_EXPECT_EXPRESSION_AFTER_SPLAT, (uint16_t) (depth + 1));
|
14191
14237
|
|
@@ -14234,7 +14280,7 @@ parse_arguments(pm_parser_t *parser, pm_arguments_t *arguments, bool accepts_for
|
|
14234
14280
|
}
|
14235
14281
|
}
|
14236
14282
|
}
|
14237
|
-
|
14283
|
+
PRISM_FALLTHROUGH
|
14238
14284
|
default: {
|
14239
14285
|
if (argument == NULL) {
|
14240
14286
|
argument = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, !parsed_first_argument, true, PM_ERR_EXPECT_ARGUMENT, (uint16_t) (depth + 1));
|
@@ -14297,23 +14343,32 @@ parse_arguments(pm_parser_t *parser, pm_arguments_t *arguments, bool accepts_for
|
|
14297
14343
|
// If parsing the argument failed, we need to stop parsing arguments.
|
14298
14344
|
if (PM_NODE_TYPE_P(argument, PM_MISSING_NODE) || parser->recovering) break;
|
14299
14345
|
|
14300
|
-
// If the terminator of these arguments is not EOF, then we have a
|
14301
|
-
// token we're looking for. In that case we can accept a
|
14302
|
-
// because it is not functioning as a statement terminator.
|
14303
|
-
|
14346
|
+
// If the terminator of these arguments is not EOF, then we have a
|
14347
|
+
// specific token we're looking for. In that case we can accept a
|
14348
|
+
// newline here because it is not functioning as a statement terminator.
|
14349
|
+
bool accepted_newline = false;
|
14350
|
+
if (terminator != PM_TOKEN_EOF) {
|
14351
|
+
accepted_newline = accept1(parser, PM_TOKEN_NEWLINE);
|
14352
|
+
}
|
14304
14353
|
|
14305
14354
|
if (parser->previous.type == PM_TOKEN_COMMA && parsed_bare_hash) {
|
14306
|
-
// If we previously were on a comma and we just parsed a bare hash,
|
14307
|
-
// we want to continue parsing arguments. This is because the
|
14308
|
-
// grabbed up by the hash parser.
|
14355
|
+
// If we previously were on a comma and we just parsed a bare hash,
|
14356
|
+
// then we want to continue parsing arguments. This is because the
|
14357
|
+
// comma was grabbed up by the hash parser.
|
14358
|
+
} else if (accept1(parser, PM_TOKEN_COMMA)) {
|
14359
|
+
// If there was a comma, then we need to check if we also accepted a
|
14360
|
+
// newline. If we did, then this is a syntax error.
|
14361
|
+
if (accepted_newline) {
|
14362
|
+
pm_parser_err_previous(parser, PM_ERR_INVALID_COMMA);
|
14363
|
+
}
|
14309
14364
|
} else {
|
14310
|
-
// If there is no comma at the end of the argument list then we're
|
14311
|
-
// parsing arguments and can break out of this loop.
|
14312
|
-
|
14365
|
+
// If there is no comma at the end of the argument list then we're
|
14366
|
+
// done parsing arguments and can break out of this loop.
|
14367
|
+
break;
|
14313
14368
|
}
|
14314
14369
|
|
14315
|
-
// If we hit the terminator, then that means we have a trailing comma so
|
14316
|
-
// can accept that output as well.
|
14370
|
+
// If we hit the terminator, then that means we have a trailing comma so
|
14371
|
+
// we can accept that output as well.
|
14317
14372
|
if (match1(parser, terminator)) break;
|
14318
14373
|
}
|
14319
14374
|
}
|
@@ -14468,15 +14523,17 @@ parse_parameters(
|
|
14468
14523
|
bool allows_trailing_comma,
|
14469
14524
|
bool allows_forwarding_parameters,
|
14470
14525
|
bool accepts_blocks_in_defaults,
|
14526
|
+
bool in_block,
|
14471
14527
|
uint16_t depth
|
14472
14528
|
) {
|
14473
|
-
pm_parameters_node_t *params = pm_parameters_node_create(parser);
|
14474
|
-
bool looping = true;
|
14475
|
-
|
14476
14529
|
pm_do_loop_stack_push(parser, false);
|
14530
|
+
|
14531
|
+
pm_parameters_node_t *params = pm_parameters_node_create(parser);
|
14477
14532
|
pm_parameters_order_t order = PM_PARAMETERS_ORDER_NONE;
|
14478
14533
|
|
14479
|
-
|
14534
|
+
while (true) {
|
14535
|
+
bool parsing = true;
|
14536
|
+
|
14480
14537
|
switch (parser->current.type) {
|
14481
14538
|
case PM_TOKEN_PARENTHESIS_LEFT: {
|
14482
14539
|
update_parameter_state(parser, &parser->current, &order);
|
@@ -14611,7 +14668,7 @@ parse_parameters(
|
|
14611
14668
|
// then we can put a missing node in its place and stop parsing the
|
14612
14669
|
// parameters entirely now.
|
14613
14670
|
if (parser->recovering) {
|
14614
|
-
|
14671
|
+
parsing = false;
|
14615
14672
|
break;
|
14616
14673
|
}
|
14617
14674
|
} else if (order > PM_PARAMETERS_ORDER_AFTER_OPTIONAL) {
|
@@ -14631,7 +14688,7 @@ parse_parameters(
|
|
14631
14688
|
break;
|
14632
14689
|
}
|
14633
14690
|
case PM_TOKEN_LABEL: {
|
14634
|
-
if (!uses_parentheses) parser->in_keyword_arg = true;
|
14691
|
+
if (!uses_parentheses && !in_block) parser->in_keyword_arg = true;
|
14635
14692
|
update_parameter_state(parser, &parser->current, &order);
|
14636
14693
|
|
14637
14694
|
context_push(parser, PM_CONTEXT_DEFAULT_PARAMS);
|
@@ -14669,7 +14726,7 @@ parse_parameters(
|
|
14669
14726
|
context_pop(parser);
|
14670
14727
|
|
14671
14728
|
if (uses_parentheses) {
|
14672
|
-
|
14729
|
+
parsing = false;
|
14673
14730
|
break;
|
14674
14731
|
}
|
14675
14732
|
|
@@ -14713,7 +14770,7 @@ parse_parameters(
|
|
14713
14770
|
// then we can put a missing node in its place and stop parsing the
|
14714
14771
|
// parameters entirely now.
|
14715
14772
|
if (parser->recovering) {
|
14716
|
-
|
14773
|
+
parsing = false;
|
14717
14774
|
break;
|
14718
14775
|
}
|
14719
14776
|
}
|
@@ -14815,14 +14872,31 @@ parse_parameters(
|
|
14815
14872
|
}
|
14816
14873
|
}
|
14817
14874
|
|
14818
|
-
|
14875
|
+
parsing = false;
|
14819
14876
|
break;
|
14820
14877
|
}
|
14821
14878
|
|
14822
|
-
|
14823
|
-
|
14879
|
+
// If we hit some kind of issue while parsing the parameter, this would
|
14880
|
+
// have been set to false. In that case, we need to break out of the
|
14881
|
+
// loop.
|
14882
|
+
if (!parsing) break;
|
14883
|
+
|
14884
|
+
bool accepted_newline = false;
|
14885
|
+
if (uses_parentheses) {
|
14886
|
+
accepted_newline = accept1(parser, PM_TOKEN_NEWLINE);
|
14824
14887
|
}
|
14825
|
-
|
14888
|
+
|
14889
|
+
if (accept1(parser, PM_TOKEN_COMMA)) {
|
14890
|
+
// If there was a comma, but we also accepted a newline, then this
|
14891
|
+
// is a syntax error.
|
14892
|
+
if (accepted_newline) {
|
14893
|
+
pm_parser_err_previous(parser, PM_ERR_INVALID_COMMA);
|
14894
|
+
}
|
14895
|
+
} else {
|
14896
|
+
// If there was no comma, then we're done parsing parameters.
|
14897
|
+
break;
|
14898
|
+
}
|
14899
|
+
}
|
14826
14900
|
|
14827
14901
|
pm_do_loop_stack_pop(parser);
|
14828
14902
|
|
@@ -15083,7 +15157,7 @@ parse_rescues(pm_parser_t *parser, size_t opening_newline_index, const pm_token_
|
|
15083
15157
|
case PM_RESCUES_LAMBDA: context = PM_CONTEXT_LAMBDA_ELSE; break;
|
15084
15158
|
case PM_RESCUES_MODULE: context = PM_CONTEXT_MODULE_ELSE; break;
|
15085
15159
|
case PM_RESCUES_SCLASS: context = PM_CONTEXT_SCLASS_ELSE; break;
|
15086
|
-
default: assert(false && "unreachable"); context =
|
15160
|
+
default: assert(false && "unreachable"); context = PM_CONTEXT_BEGIN_ELSE; break;
|
15087
15161
|
}
|
15088
15162
|
|
15089
15163
|
else_statements = parse_statements(parser, context, (uint16_t) (depth + 1));
|
@@ -15178,6 +15252,7 @@ parse_block_parameters(
|
|
15178
15252
|
allows_trailing_comma,
|
15179
15253
|
false,
|
15180
15254
|
accepts_blocks_in_defaults,
|
15255
|
+
true,
|
15181
15256
|
(uint16_t) (depth + 1)
|
15182
15257
|
);
|
15183
15258
|
}
|
@@ -15500,6 +15575,7 @@ parse_return(pm_parser_t *parser, pm_node_t *node) {
|
|
15500
15575
|
case PM_CONTEXT_IF:
|
15501
15576
|
case PM_CONTEXT_LOOP_PREDICATE:
|
15502
15577
|
case PM_CONTEXT_MAIN:
|
15578
|
+
case PM_CONTEXT_MULTI_TARGET:
|
15503
15579
|
case PM_CONTEXT_PARENS:
|
15504
15580
|
case PM_CONTEXT_POSTEXE:
|
15505
15581
|
case PM_CONTEXT_PREDICATE:
|
@@ -15628,6 +15704,7 @@ parse_block_exit(pm_parser_t *parser, pm_node_t *node) {
|
|
15628
15704
|
case PM_CONTEXT_MODULE_ENSURE:
|
15629
15705
|
case PM_CONTEXT_MODULE_RESCUE:
|
15630
15706
|
case PM_CONTEXT_MODULE:
|
15707
|
+
case PM_CONTEXT_MULTI_TARGET:
|
15631
15708
|
case PM_CONTEXT_PARENS:
|
15632
15709
|
case PM_CONTEXT_PREDICATE:
|
15633
15710
|
case PM_CONTEXT_RESCUE_MODIFIER:
|
@@ -16091,7 +16168,7 @@ parse_operator_symbol_name(const pm_token_t *name) {
|
|
16091
16168
|
case PM_TOKEN_TILDE:
|
16092
16169
|
case PM_TOKEN_BANG:
|
16093
16170
|
if (name->end[-1] == '@') return name->end - 1;
|
16094
|
-
|
16171
|
+
PRISM_FALLTHROUGH
|
16095
16172
|
default:
|
16096
16173
|
return name->end;
|
16097
16174
|
}
|
@@ -16347,14 +16424,15 @@ static pm_node_t *
|
|
16347
16424
|
parse_variable(pm_parser_t *parser) {
|
16348
16425
|
pm_constant_id_t name_id = pm_parser_constant_id_token(parser, &parser->previous);
|
16349
16426
|
int depth;
|
16427
|
+
bool is_numbered_param = pm_token_is_numbered_parameter(parser->previous.start, parser->previous.end);
|
16350
16428
|
|
16351
|
-
if ((depth = pm_parser_local_depth_constant_id(parser, name_id)) != -1) {
|
16429
|
+
if (!is_numbered_param && ((depth = pm_parser_local_depth_constant_id(parser, name_id)) != -1)) {
|
16352
16430
|
return (pm_node_t *) pm_local_variable_read_node_create_constant_id(parser, &parser->previous, name_id, (uint32_t) depth, false);
|
16353
16431
|
}
|
16354
16432
|
|
16355
16433
|
pm_scope_t *current_scope = parser->current_scope;
|
16356
16434
|
if (!current_scope->closed && !(current_scope->parameters & PM_SCOPE_PARAMETERS_IMPLICIT_DISALLOWED)) {
|
16357
|
-
if (
|
16435
|
+
if (is_numbered_param) {
|
16358
16436
|
// When you use a numbered parameter, it implies the existence of
|
16359
16437
|
// all of the locals that exist before it. For example, referencing
|
16360
16438
|
// _2 means that _1 must exist. Therefore here we loop through all
|
@@ -17045,7 +17123,7 @@ parse_pattern_hash(pm_parser_t *parser, pm_constant_id_list_t *captures, pm_node
|
|
17045
17123
|
parse_pattern_hash_key(parser, &keys, first_node);
|
17046
17124
|
pm_node_t *value;
|
17047
17125
|
|
17048
|
-
if (
|
17126
|
+
if (match8(parser, PM_TOKEN_COMMA, PM_TOKEN_KEYWORD_THEN, PM_TOKEN_BRACE_RIGHT, PM_TOKEN_BRACKET_RIGHT, PM_TOKEN_PARENTHESIS_RIGHT, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON, PM_TOKEN_EOF)) {
|
17049
17127
|
// Otherwise, we will create an implicit local variable
|
17050
17128
|
// target for the value.
|
17051
17129
|
value = parse_pattern_hash_implicit_value(parser, captures, (pm_symbol_node_t *) first_node);
|
@@ -17062,7 +17140,7 @@ parse_pattern_hash(pm_parser_t *parser, pm_constant_id_list_t *captures, pm_node
|
|
17062
17140
|
break;
|
17063
17141
|
}
|
17064
17142
|
}
|
17065
|
-
|
17143
|
+
PRISM_FALLTHROUGH
|
17066
17144
|
default: {
|
17067
17145
|
// If we get anything else, then this is an error. For this we'll
|
17068
17146
|
// create a missing node for the value and create an assoc node for
|
@@ -17082,7 +17160,12 @@ parse_pattern_hash(pm_parser_t *parser, pm_constant_id_list_t *captures, pm_node
|
|
17082
17160
|
// If there are any other assocs, then we'll parse them now.
|
17083
17161
|
while (accept1(parser, PM_TOKEN_COMMA)) {
|
17084
17162
|
// Here we need to break to support trailing commas.
|
17085
|
-
if (
|
17163
|
+
if (match7(parser, PM_TOKEN_KEYWORD_THEN, PM_TOKEN_BRACE_RIGHT, PM_TOKEN_BRACKET_RIGHT, PM_TOKEN_PARENTHESIS_RIGHT, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON, PM_TOKEN_EOF)) {
|
17164
|
+
// Trailing commas are not allowed to follow a rest pattern.
|
17165
|
+
if (rest != NULL) {
|
17166
|
+
pm_parser_err_token(parser, &parser->current, PM_ERR_PATTERN_EXPRESSION_AFTER_REST);
|
17167
|
+
}
|
17168
|
+
|
17086
17169
|
break;
|
17087
17170
|
}
|
17088
17171
|
|
@@ -17553,7 +17636,7 @@ parse_pattern(pm_parser_t *parser, pm_constant_id_list_t *captures, uint8_t flag
|
|
17553
17636
|
break;
|
17554
17637
|
}
|
17555
17638
|
}
|
17556
|
-
|
17639
|
+
PRISM_FALLTHROUGH
|
17557
17640
|
default:
|
17558
17641
|
node = parse_pattern_primitives(parser, captures, NULL, diag_id, (uint16_t) (depth + 1));
|
17559
17642
|
break;
|
@@ -17575,9 +17658,10 @@ parse_pattern(pm_parser_t *parser, pm_constant_id_list_t *captures, uint8_t flag
|
|
17575
17658
|
// Gather up all of the patterns into the list.
|
17576
17659
|
while (accept1(parser, PM_TOKEN_COMMA)) {
|
17577
17660
|
// Break early here in case we have a trailing comma.
|
17578
|
-
if (
|
17661
|
+
if (match9(parser, PM_TOKEN_KEYWORD_THEN, PM_TOKEN_BRACE_RIGHT, PM_TOKEN_BRACKET_RIGHT, PM_TOKEN_PARENTHESIS_RIGHT, PM_TOKEN_SEMICOLON, PM_TOKEN_NEWLINE, PM_TOKEN_EOF,PM_TOKEN_KEYWORD_AND, PM_TOKEN_KEYWORD_OR)) {
|
17579
17662
|
node = (pm_node_t *) pm_implicit_rest_node_create(parser, &parser->previous);
|
17580
17663
|
pm_node_list_append(&nodes, node);
|
17664
|
+
trailing_rest = true;
|
17581
17665
|
break;
|
17582
17666
|
}
|
17583
17667
|
|
@@ -17779,6 +17863,7 @@ parse_retry(pm_parser_t *parser, const pm_node_t *node) {
|
|
17779
17863
|
case PM_CONTEXT_LAMBDA_BRACES:
|
17780
17864
|
case PM_CONTEXT_LAMBDA_DO_END:
|
17781
17865
|
case PM_CONTEXT_LOOP_PREDICATE:
|
17866
|
+
case PM_CONTEXT_MULTI_TARGET:
|
17782
17867
|
case PM_CONTEXT_PARENS:
|
17783
17868
|
case PM_CONTEXT_POSTEXE:
|
17784
17869
|
case PM_CONTEXT_PREDICATE:
|
@@ -17862,6 +17947,7 @@ parse_yield(pm_parser_t *parser, const pm_node_t *node) {
|
|
17862
17947
|
case PM_CONTEXT_LAMBDA_ENSURE:
|
17863
17948
|
case PM_CONTEXT_LAMBDA_RESCUE:
|
17864
17949
|
case PM_CONTEXT_LOOP_PREDICATE:
|
17950
|
+
case PM_CONTEXT_MULTI_TARGET:
|
17865
17951
|
case PM_CONTEXT_PARENS:
|
17866
17952
|
case PM_CONTEXT_POSTEXE:
|
17867
17953
|
case PM_CONTEXT_PREDICATE:
|
@@ -17951,19 +18037,31 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
|
|
17951
18037
|
bool parsed_bare_hash = false;
|
17952
18038
|
|
17953
18039
|
while (!match2(parser, PM_TOKEN_BRACKET_RIGHT, PM_TOKEN_EOF)) {
|
18040
|
+
bool accepted_newline = accept1(parser, PM_TOKEN_NEWLINE);
|
18041
|
+
|
17954
18042
|
// Handle the case where we don't have a comma and we have a
|
17955
18043
|
// newline followed by a right bracket.
|
17956
|
-
if (
|
18044
|
+
if (accepted_newline && match1(parser, PM_TOKEN_BRACKET_RIGHT)) {
|
17957
18045
|
break;
|
17958
18046
|
}
|
17959
18047
|
|
17960
18048
|
// Ensure that we have a comma between elements in the array.
|
17961
|
-
if (
|
17962
|
-
|
17963
|
-
|
18049
|
+
if (array->elements.size > 0) {
|
18050
|
+
if (accept1(parser, PM_TOKEN_COMMA)) {
|
18051
|
+
// If there was a comma but we also accepts a newline,
|
18052
|
+
// then this is a syntax error.
|
18053
|
+
if (accepted_newline) {
|
18054
|
+
pm_parser_err_previous(parser, PM_ERR_INVALID_COMMA);
|
18055
|
+
}
|
18056
|
+
} else {
|
18057
|
+
// If there was no comma, then we need to add a syntax
|
18058
|
+
// error.
|
18059
|
+
const uint8_t *location = parser->previous.end;
|
18060
|
+
PM_PARSER_ERR_FORMAT(parser, location, location, PM_ERR_ARRAY_SEPARATOR, pm_token_type_human(parser->current.type));
|
17964
18061
|
|
17965
|
-
|
17966
|
-
|
18062
|
+
parser->previous.start = location;
|
18063
|
+
parser->previous.type = PM_TOKEN_MISSING;
|
18064
|
+
}
|
17967
18065
|
}
|
17968
18066
|
|
17969
18067
|
// If we have a right bracket immediately following a comma,
|
@@ -18119,14 +18217,32 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
|
|
18119
18217
|
multi_target->base.location.start = lparen_loc.start;
|
18120
18218
|
multi_target->base.location.end = rparen_loc.end;
|
18121
18219
|
|
18122
|
-
|
18123
|
-
|
18124
|
-
|
18125
|
-
|
18126
|
-
|
18220
|
+
pm_node_t *result;
|
18221
|
+
if (match1(parser, PM_TOKEN_COMMA) && (binding_power == PM_BINDING_POWER_STATEMENT)) {
|
18222
|
+
result = parse_targets(parser, (pm_node_t *) multi_target, PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1));
|
18223
|
+
accept1(parser, PM_TOKEN_NEWLINE);
|
18224
|
+
} else {
|
18225
|
+
result = (pm_node_t *) multi_target;
|
18127
18226
|
}
|
18128
18227
|
|
18129
|
-
|
18228
|
+
if (context_p(parser, PM_CONTEXT_MULTI_TARGET)) {
|
18229
|
+
// All set, this is explicitly allowed by the parent
|
18230
|
+
// context.
|
18231
|
+
} else if (context_p(parser, PM_CONTEXT_FOR_INDEX) && match1(parser, PM_TOKEN_KEYWORD_IN)) {
|
18232
|
+
// All set, we're inside a for loop and we're parsing
|
18233
|
+
// multiple targets.
|
18234
|
+
} else if (binding_power != PM_BINDING_POWER_STATEMENT) {
|
18235
|
+
// Multi targets are not allowed when it's not a
|
18236
|
+
// statement level.
|
18237
|
+
pm_parser_err_node(parser, result, PM_ERR_WRITE_TARGET_UNEXPECTED);
|
18238
|
+
} else if (!match2(parser, PM_TOKEN_EQUAL, PM_TOKEN_PARENTHESIS_RIGHT)) {
|
18239
|
+
// Multi targets must be followed by an equal sign in
|
18240
|
+
// order to be valid (or a right parenthesis if they are
|
18241
|
+
// nested).
|
18242
|
+
pm_parser_err_node(parser, result, PM_ERR_WRITE_TARGET_UNEXPECTED);
|
18243
|
+
}
|
18244
|
+
|
18245
|
+
return result;
|
18130
18246
|
}
|
18131
18247
|
|
18132
18248
|
// If we have a single statement and are ending on a right parenthesis
|
@@ -18187,6 +18303,33 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
|
|
18187
18303
|
pm_accepts_block_stack_pop(parser);
|
18188
18304
|
expect1(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_ERR_EXPECT_RPAREN);
|
18189
18305
|
|
18306
|
+
// When we're parsing multi targets, we allow them to be followed by
|
18307
|
+
// a right parenthesis if they are at the statement level. This is
|
18308
|
+
// only possible if they are the final statement in a parentheses.
|
18309
|
+
// We need to explicitly reject that here.
|
18310
|
+
{
|
18311
|
+
pm_node_t *statement = statements->body.nodes[statements->body.size - 1];
|
18312
|
+
|
18313
|
+
if (PM_NODE_TYPE_P(statement, PM_SPLAT_NODE)) {
|
18314
|
+
pm_multi_target_node_t *multi_target = pm_multi_target_node_create(parser);
|
18315
|
+
pm_multi_target_node_targets_append(parser, multi_target, statement);
|
18316
|
+
|
18317
|
+
statement = (pm_node_t *) multi_target;
|
18318
|
+
statements->body.nodes[statements->body.size - 1] = statement;
|
18319
|
+
}
|
18320
|
+
|
18321
|
+
if (PM_NODE_TYPE_P(statement, PM_MULTI_TARGET_NODE)) {
|
18322
|
+
const uint8_t *offset = statement->location.end;
|
18323
|
+
pm_token_t operator = { .type = PM_TOKEN_EQUAL, .start = offset, .end = offset };
|
18324
|
+
pm_node_t *value = (pm_node_t *) pm_missing_node_create(parser, offset, offset);
|
18325
|
+
|
18326
|
+
statement = (pm_node_t *) pm_multi_write_node_create(parser, (pm_multi_target_node_t *) statement, &operator, value);
|
18327
|
+
statements->body.nodes[statements->body.size - 1] = statement;
|
18328
|
+
|
18329
|
+
pm_parser_err_node(parser, statement, PM_ERR_WRITE_TARGET_UNEXPECTED);
|
18330
|
+
}
|
18331
|
+
}
|
18332
|
+
|
18190
18333
|
pop_block_exits(parser, previous_block_exits);
|
18191
18334
|
pm_node_list_free(¤t_block_exits);
|
18192
18335
|
|
@@ -18442,10 +18585,11 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
|
|
18442
18585
|
case PM_TOKEN_HEREDOC_START: {
|
18443
18586
|
// Here we have found a heredoc. We'll parse it and add it to the
|
18444
18587
|
// list of strings.
|
18445
|
-
|
18446
|
-
|
18447
|
-
|
18448
|
-
|
18588
|
+
assert(parser->lex_modes.current->mode == PM_LEX_HEREDOC);
|
18589
|
+
pm_heredoc_lex_mode_t lex_mode = parser->lex_modes.current->as.heredoc.base;
|
18590
|
+
|
18591
|
+
size_t common_whitespace = (size_t) -1;
|
18592
|
+
parser->lex_modes.current->as.heredoc.common_whitespace = &common_whitespace;
|
18449
18593
|
|
18450
18594
|
parser_lex(parser);
|
18451
18595
|
pm_token_t opening = parser->previous;
|
@@ -18456,10 +18600,10 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
|
|
18456
18600
|
if (match2(parser, PM_TOKEN_HEREDOC_END, PM_TOKEN_EOF)) {
|
18457
18601
|
// If we get here, then we have an empty heredoc. We'll create
|
18458
18602
|
// an empty content token and return an empty string node.
|
18459
|
-
expect1_heredoc_term(parser, lex_mode);
|
18603
|
+
expect1_heredoc_term(parser, lex_mode.ident_start, lex_mode.ident_length);
|
18460
18604
|
pm_token_t content = parse_strings_empty_content(parser->previous.start);
|
18461
18605
|
|
18462
|
-
if (quote == PM_HEREDOC_QUOTE_BACKTICK) {
|
18606
|
+
if (lex_mode.quote == PM_HEREDOC_QUOTE_BACKTICK) {
|
18463
18607
|
node = (pm_node_t *) pm_xstring_node_create_unescaped(parser, &opening, &content, &parser->previous, &PM_STRING_EMPTY);
|
18464
18608
|
} else {
|
18465
18609
|
node = (pm_node_t *) pm_string_node_create_unescaped(parser, &opening, &content, &parser->previous, &PM_STRING_EMPTY);
|
@@ -18486,18 +18630,17 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
|
|
18486
18630
|
cast->closing_loc = PM_LOCATION_TOKEN_VALUE(&parser->current);
|
18487
18631
|
cast->base.location = cast->opening_loc;
|
18488
18632
|
|
18489
|
-
if (quote == PM_HEREDOC_QUOTE_BACKTICK) {
|
18633
|
+
if (lex_mode.quote == PM_HEREDOC_QUOTE_BACKTICK) {
|
18490
18634
|
assert(sizeof(pm_string_node_t) == sizeof(pm_x_string_node_t));
|
18491
18635
|
cast->base.type = PM_X_STRING_NODE;
|
18492
18636
|
}
|
18493
18637
|
|
18494
|
-
size_t common_whitespace
|
18495
|
-
if (indent == PM_HEREDOC_INDENT_TILDE && (common_whitespace != (size_t) -1) && (common_whitespace != 0)) {
|
18638
|
+
if (lex_mode.indent == PM_HEREDOC_INDENT_TILDE && (common_whitespace != (size_t) -1) && (common_whitespace != 0)) {
|
18496
18639
|
parse_heredoc_dedent_string(&cast->unescaped, common_whitespace);
|
18497
18640
|
}
|
18498
18641
|
|
18499
18642
|
node = (pm_node_t *) cast;
|
18500
|
-
expect1_heredoc_term(parser, lex_mode);
|
18643
|
+
expect1_heredoc_term(parser, lex_mode.ident_start, lex_mode.ident_length);
|
18501
18644
|
} else {
|
18502
18645
|
// If we get here, then we have multiple parts in the heredoc,
|
18503
18646
|
// so we'll need to create an interpolated string node to hold
|
@@ -18511,15 +18654,13 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
|
|
18511
18654
|
}
|
18512
18655
|
}
|
18513
18656
|
|
18514
|
-
size_t common_whitespace = lex_mode->as.heredoc.common_whitespace;
|
18515
|
-
|
18516
18657
|
// Now that we have all of the parts, create the correct type of
|
18517
18658
|
// interpolated node.
|
18518
|
-
if (quote == PM_HEREDOC_QUOTE_BACKTICK) {
|
18659
|
+
if (lex_mode.quote == PM_HEREDOC_QUOTE_BACKTICK) {
|
18519
18660
|
pm_interpolated_x_string_node_t *cast = pm_interpolated_xstring_node_create(parser, &opening, &opening);
|
18520
18661
|
cast->parts = parts;
|
18521
18662
|
|
18522
|
-
expect1_heredoc_term(parser, lex_mode);
|
18663
|
+
expect1_heredoc_term(parser, lex_mode.ident_start, lex_mode.ident_length);
|
18523
18664
|
pm_interpolated_xstring_node_closing_set(cast, &parser->previous);
|
18524
18665
|
|
18525
18666
|
cast->base.location = cast->opening_loc;
|
@@ -18528,7 +18669,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
|
|
18528
18669
|
pm_interpolated_string_node_t *cast = pm_interpolated_string_node_create(parser, &opening, &parts, &opening);
|
18529
18670
|
pm_node_list_free(&parts);
|
18530
18671
|
|
18531
|
-
expect1_heredoc_term(parser, lex_mode);
|
18672
|
+
expect1_heredoc_term(parser, lex_mode.ident_start, lex_mode.ident_length);
|
18532
18673
|
pm_interpolated_string_node_closing_set(cast, &parser->previous);
|
18533
18674
|
|
18534
18675
|
cast->base.location = cast->opening_loc;
|
@@ -18537,9 +18678,9 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
|
|
18537
18678
|
|
18538
18679
|
// If this is a heredoc that is indented with a ~, then we need
|
18539
18680
|
// to dedent each line by the common leading whitespace.
|
18540
|
-
if (indent == PM_HEREDOC_INDENT_TILDE && (common_whitespace != (size_t) -1) && (common_whitespace != 0)) {
|
18681
|
+
if (lex_mode.indent == PM_HEREDOC_INDENT_TILDE && (common_whitespace != (size_t) -1) && (common_whitespace != 0)) {
|
18541
18682
|
pm_node_list_t *nodes;
|
18542
|
-
if (quote == PM_HEREDOC_QUOTE_BACKTICK) {
|
18683
|
+
if (lex_mode.quote == PM_HEREDOC_QUOTE_BACKTICK) {
|
18543
18684
|
nodes = &((pm_interpolated_x_string_node_t *) node)->parts;
|
18544
18685
|
} else {
|
18545
18686
|
nodes = &((pm_interpolated_string_node_t *) node)->parts;
|
@@ -18625,7 +18766,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
|
|
18625
18766
|
pm_parser_err_node(parser, old_name, PM_ERR_ALIAS_ARGUMENT);
|
18626
18767
|
}
|
18627
18768
|
}
|
18628
|
-
|
18769
|
+
PRISM_FALLTHROUGH
|
18629
18770
|
default:
|
18630
18771
|
return (pm_node_t *) pm_alias_method_node_create(parser, &keyword, new_name, old_name);
|
18631
18772
|
}
|
@@ -19116,6 +19257,10 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
|
|
19116
19257
|
context_push(parser, PM_CONTEXT_DEF_PARAMS);
|
19117
19258
|
parser_lex(parser);
|
19118
19259
|
|
19260
|
+
// This will be false if the method name is not a valid identifier
|
19261
|
+
// but could be followed by an operator.
|
19262
|
+
bool valid_name = true;
|
19263
|
+
|
19119
19264
|
switch (parser->current.type) {
|
19120
19265
|
case PM_CASE_OPERATOR:
|
19121
19266
|
pm_parser_scope_push(parser, true);
|
@@ -19145,10 +19290,12 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
|
|
19145
19290
|
|
19146
19291
|
break;
|
19147
19292
|
}
|
19148
|
-
case PM_TOKEN_CONSTANT:
|
19149
19293
|
case PM_TOKEN_INSTANCE_VARIABLE:
|
19150
19294
|
case PM_TOKEN_CLASS_VARIABLE:
|
19151
19295
|
case PM_TOKEN_GLOBAL_VARIABLE:
|
19296
|
+
valid_name = false;
|
19297
|
+
PRISM_FALLTHROUGH
|
19298
|
+
case PM_TOKEN_CONSTANT:
|
19152
19299
|
case PM_TOKEN_KEYWORD_NIL:
|
19153
19300
|
case PM_TOKEN_KEYWORD_SELF:
|
19154
19301
|
case PM_TOKEN_KEYWORD_TRUE:
|
@@ -19206,6 +19353,10 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
|
|
19206
19353
|
|
19207
19354
|
name = parse_method_definition_name(parser);
|
19208
19355
|
} else {
|
19356
|
+
if (!valid_name) {
|
19357
|
+
PM_PARSER_ERR_TOKEN_FORMAT(parser, identifier, PM_ERR_DEF_NAME, pm_token_type_human(identifier.type));
|
19358
|
+
}
|
19359
|
+
|
19209
19360
|
name = identifier;
|
19210
19361
|
}
|
19211
19362
|
break;
|
@@ -19256,7 +19407,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
|
|
19256
19407
|
if (match1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) {
|
19257
19408
|
params = NULL;
|
19258
19409
|
} else {
|
19259
|
-
params = parse_parameters(parser, PM_BINDING_POWER_DEFINED, true, false, true, true, (uint16_t) (depth + 1));
|
19410
|
+
params = parse_parameters(parser, PM_BINDING_POWER_DEFINED, true, false, true, true, false, (uint16_t) (depth + 1));
|
19260
19411
|
}
|
19261
19412
|
|
19262
19413
|
lex_state_set(parser, PM_LEX_STATE_BEG);
|
@@ -19281,7 +19432,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
|
|
19281
19432
|
|
19282
19433
|
lparen = not_provided(parser);
|
19283
19434
|
rparen = not_provided(parser);
|
19284
|
-
params = parse_parameters(parser, PM_BINDING_POWER_DEFINED, false, false, true, true, (uint16_t) (depth + 1));
|
19435
|
+
params = parse_parameters(parser, PM_BINDING_POWER_DEFINED, false, false, true, true, false, (uint16_t) (depth + 1));
|
19285
19436
|
|
19286
19437
|
context_pop(parser);
|
19287
19438
|
break;
|
@@ -19690,9 +19841,15 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
|
|
19690
19841
|
pm_do_loop_stack_pop(parser);
|
19691
19842
|
context_pop(parser);
|
19692
19843
|
|
19693
|
-
|
19694
|
-
|
19844
|
+
pm_token_t do_keyword;
|
19845
|
+
if (accept1(parser, PM_TOKEN_KEYWORD_DO_LOOP)) {
|
19846
|
+
do_keyword = parser->previous;
|
19847
|
+
} else {
|
19848
|
+
do_keyword = not_provided(parser);
|
19849
|
+
expect2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON, PM_ERR_CONDITIONAL_UNTIL_PREDICATE);
|
19850
|
+
}
|
19695
19851
|
|
19852
|
+
pm_statements_node_t *statements = NULL;
|
19696
19853
|
if (!match1(parser, PM_TOKEN_KEYWORD_END)) {
|
19697
19854
|
pm_accepts_block_stack_push(parser, true);
|
19698
19855
|
statements = parse_statements(parser, PM_CONTEXT_UNTIL, (uint16_t) (depth + 1));
|
@@ -19703,7 +19860,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
|
|
19703
19860
|
parser_warn_indentation_mismatch(parser, opening_newline_index, &keyword, false, false);
|
19704
19861
|
expect1(parser, PM_TOKEN_KEYWORD_END, PM_ERR_UNTIL_TERM);
|
19705
19862
|
|
19706
|
-
return (pm_node_t *) pm_until_node_create(parser, &keyword, &parser->previous, predicate, statements, 0);
|
19863
|
+
return (pm_node_t *) pm_until_node_create(parser, &keyword, &do_keyword, &parser->previous, predicate, statements, 0);
|
19707
19864
|
}
|
19708
19865
|
case PM_TOKEN_KEYWORD_WHILE: {
|
19709
19866
|
size_t opening_newline_index = token_newline_index(parser);
|
@@ -19718,9 +19875,15 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
|
|
19718
19875
|
pm_do_loop_stack_pop(parser);
|
19719
19876
|
context_pop(parser);
|
19720
19877
|
|
19721
|
-
|
19722
|
-
|
19878
|
+
pm_token_t do_keyword;
|
19879
|
+
if (accept1(parser, PM_TOKEN_KEYWORD_DO_LOOP)) {
|
19880
|
+
do_keyword = parser->previous;
|
19881
|
+
} else {
|
19882
|
+
do_keyword = not_provided(parser);
|
19883
|
+
expect2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON, PM_ERR_CONDITIONAL_WHILE_PREDICATE);
|
19884
|
+
}
|
19723
19885
|
|
19886
|
+
pm_statements_node_t *statements = NULL;
|
19724
19887
|
if (!match1(parser, PM_TOKEN_KEYWORD_END)) {
|
19725
19888
|
pm_accepts_block_stack_push(parser, true);
|
19726
19889
|
statements = parse_statements(parser, PM_CONTEXT_WHILE, (uint16_t) (depth + 1));
|
@@ -19731,7 +19894,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
|
|
19731
19894
|
parser_warn_indentation_mismatch(parser, opening_newline_index, &keyword, false, false);
|
19732
19895
|
expect1(parser, PM_TOKEN_KEYWORD_END, PM_ERR_WHILE_TERM);
|
19733
19896
|
|
19734
|
-
return (pm_node_t *) pm_while_node_create(parser, &keyword, &parser->previous, predicate, statements, 0);
|
19897
|
+
return (pm_node_t *) pm_while_node_create(parser, &keyword, &do_keyword, &parser->previous, predicate, statements, 0);
|
19735
19898
|
}
|
19736
19899
|
case PM_TOKEN_PERCENT_LOWER_I: {
|
19737
19900
|
parser_lex(parser);
|
@@ -20801,7 +20964,7 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
|
|
20801
20964
|
pm_parser_local_add_location(parser, call_node->message_loc.start, call_node->message_loc.end, 0);
|
20802
20965
|
}
|
20803
20966
|
}
|
20804
|
-
|
20967
|
+
PRISM_FALLTHROUGH
|
20805
20968
|
case PM_CASE_WRITABLE: {
|
20806
20969
|
parser_lex(parser);
|
20807
20970
|
pm_node_t *value = parse_assignment_values(parser, previous_binding_power, PM_NODE_TYPE_P(node, PM_MULTI_TARGET_NODE) ? PM_BINDING_POWER_MULTI_ASSIGNMENT + 1 : binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_EQUAL, (uint16_t) (depth + 1));
|
@@ -20847,7 +21010,7 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
|
|
20847
21010
|
case PM_BACK_REFERENCE_READ_NODE:
|
20848
21011
|
case PM_NUMBERED_REFERENCE_READ_NODE:
|
20849
21012
|
PM_PARSER_ERR_NODE_FORMAT_CONTENT(parser, node, PM_ERR_WRITE_TARGET_READONLY);
|
20850
|
-
|
21013
|
+
PRISM_FALLTHROUGH
|
20851
21014
|
case PM_GLOBAL_VARIABLE_READ_NODE: {
|
20852
21015
|
parser_lex(parser);
|
20853
21016
|
|
@@ -20965,7 +21128,7 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
|
|
20965
21128
|
case PM_BACK_REFERENCE_READ_NODE:
|
20966
21129
|
case PM_NUMBERED_REFERENCE_READ_NODE:
|
20967
21130
|
PM_PARSER_ERR_NODE_FORMAT_CONTENT(parser, node, PM_ERR_WRITE_TARGET_READONLY);
|
20968
|
-
|
21131
|
+
PRISM_FALLTHROUGH
|
20969
21132
|
case PM_GLOBAL_VARIABLE_READ_NODE: {
|
20970
21133
|
parser_lex(parser);
|
20971
21134
|
|
@@ -21093,7 +21256,7 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
|
|
21093
21256
|
case PM_BACK_REFERENCE_READ_NODE:
|
21094
21257
|
case PM_NUMBERED_REFERENCE_READ_NODE:
|
21095
21258
|
PM_PARSER_ERR_NODE_FORMAT_CONTENT(parser, node, PM_ERR_WRITE_TARGET_READONLY);
|
21096
|
-
|
21259
|
+
PRISM_FALLTHROUGH
|
21097
21260
|
case PM_GLOBAL_VARIABLE_READ_NODE: {
|
21098
21261
|
parser_lex(parser);
|
21099
21262
|
|
@@ -21303,6 +21466,33 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
|
|
21303
21466
|
case PM_TOKEN_STAR:
|
21304
21467
|
case PM_TOKEN_STAR_STAR: {
|
21305
21468
|
parser_lex(parser);
|
21469
|
+
pm_token_t operator = parser->previous;
|
21470
|
+
switch (PM_NODE_TYPE(node)) {
|
21471
|
+
case PM_RESCUE_MODIFIER_NODE: {
|
21472
|
+
pm_rescue_modifier_node_t *cast = (pm_rescue_modifier_node_t *) node;
|
21473
|
+
if (PM_NODE_TYPE_P(cast->rescue_expression, PM_MATCH_PREDICATE_NODE) || PM_NODE_TYPE_P(cast->rescue_expression, PM_MATCH_REQUIRED_NODE)) {
|
21474
|
+
PM_PARSER_ERR_TOKEN_FORMAT(parser, operator, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_type_human(operator.type));
|
21475
|
+
}
|
21476
|
+
break;
|
21477
|
+
}
|
21478
|
+
case PM_AND_NODE: {
|
21479
|
+
pm_and_node_t *cast = (pm_and_node_t *) node;
|
21480
|
+
if (PM_NODE_TYPE_P(cast->right, PM_MATCH_PREDICATE_NODE) || PM_NODE_TYPE_P(cast->right, PM_MATCH_REQUIRED_NODE)) {
|
21481
|
+
PM_PARSER_ERR_TOKEN_FORMAT(parser, operator, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_type_human(operator.type));
|
21482
|
+
}
|
21483
|
+
break;
|
21484
|
+
}
|
21485
|
+
case PM_OR_NODE: {
|
21486
|
+
pm_or_node_t *cast = (pm_or_node_t *) node;
|
21487
|
+
if (PM_NODE_TYPE_P(cast->right, PM_MATCH_PREDICATE_NODE) || PM_NODE_TYPE_P(cast->right, PM_MATCH_REQUIRED_NODE)) {
|
21488
|
+
PM_PARSER_ERR_TOKEN_FORMAT(parser, operator, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_type_human(operator.type));
|
21489
|
+
}
|
21490
|
+
break;
|
21491
|
+
}
|
21492
|
+
default:
|
21493
|
+
break;
|
21494
|
+
}
|
21495
|
+
|
21306
21496
|
pm_node_t *argument = parse_expression(parser, binding_power, false, false, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
|
21307
21497
|
return (pm_node_t *) pm_call_node_binary_create(parser, node, &token, argument, 0);
|
21308
21498
|
}
|
@@ -21330,6 +21520,32 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
|
|
21330
21520
|
return (pm_node_t *) pm_call_node_shorthand_create(parser, node, &operator, &arguments);
|
21331
21521
|
}
|
21332
21522
|
|
21523
|
+
switch (PM_NODE_TYPE(node)) {
|
21524
|
+
case PM_RESCUE_MODIFIER_NODE: {
|
21525
|
+
pm_rescue_modifier_node_t *cast = (pm_rescue_modifier_node_t *) node;
|
21526
|
+
if (PM_NODE_TYPE_P(cast->rescue_expression, PM_MATCH_PREDICATE_NODE) || PM_NODE_TYPE_P(cast->rescue_expression, PM_MATCH_REQUIRED_NODE)) {
|
21527
|
+
PM_PARSER_ERR_TOKEN_FORMAT(parser, operator, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_type_human(operator.type));
|
21528
|
+
}
|
21529
|
+
break;
|
21530
|
+
}
|
21531
|
+
case PM_AND_NODE: {
|
21532
|
+
pm_and_node_t *cast = (pm_and_node_t *) node;
|
21533
|
+
if (PM_NODE_TYPE_P(cast->right, PM_MATCH_PREDICATE_NODE) || PM_NODE_TYPE_P(cast->right, PM_MATCH_REQUIRED_NODE)) {
|
21534
|
+
PM_PARSER_ERR_TOKEN_FORMAT(parser, operator, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_type_human(operator.type));
|
21535
|
+
}
|
21536
|
+
break;
|
21537
|
+
}
|
21538
|
+
case PM_OR_NODE: {
|
21539
|
+
pm_or_node_t *cast = (pm_or_node_t *) node;
|
21540
|
+
if (PM_NODE_TYPE_P(cast->right, PM_MATCH_PREDICATE_NODE) || PM_NODE_TYPE_P(cast->right, PM_MATCH_REQUIRED_NODE)) {
|
21541
|
+
PM_PARSER_ERR_TOKEN_FORMAT(parser, operator, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_type_human(operator.type));
|
21542
|
+
}
|
21543
|
+
break;
|
21544
|
+
}
|
21545
|
+
default:
|
21546
|
+
break;
|
21547
|
+
}
|
21548
|
+
|
21333
21549
|
pm_token_t message;
|
21334
21550
|
|
21335
21551
|
switch (parser->current.type) {
|
@@ -21677,6 +21893,7 @@ parse_expression(pm_parser_t *parser, pm_binding_power_t binding_power, bool acc
|
|
21677
21893
|
if (pm_symbol_node_label_p(node)) {
|
21678
21894
|
return node;
|
21679
21895
|
}
|
21896
|
+
break;
|
21680
21897
|
default:
|
21681
21898
|
break;
|
21682
21899
|
}
|
@@ -21684,8 +21901,11 @@ parse_expression(pm_parser_t *parser, pm_binding_power_t binding_power, bool acc
|
|
21684
21901
|
// Otherwise we'll look and see if the next token can be parsed as an infix
|
21685
21902
|
// operator. If it can, then we'll parse it using parse_expression_infix.
|
21686
21903
|
pm_binding_powers_t current_binding_powers;
|
21904
|
+
pm_token_type_t current_token_type;
|
21905
|
+
|
21687
21906
|
while (
|
21688
|
-
|
21907
|
+
current_token_type = parser->current.type,
|
21908
|
+
current_binding_powers = pm_binding_powers[current_token_type],
|
21689
21909
|
binding_power <= current_binding_powers.left &&
|
21690
21910
|
current_binding_powers.binary
|
21691
21911
|
) {
|
@@ -21726,6 +21946,13 @@ parse_expression(pm_parser_t *parser, pm_binding_power_t binding_power, bool acc
|
|
21726
21946
|
// If the operator is nonassoc and we should not be able to parse the
|
21727
21947
|
// upcoming infix operator, break.
|
21728
21948
|
if (current_binding_powers.nonassoc) {
|
21949
|
+
// If this is a non-assoc operator and we are about to parse the
|
21950
|
+
// exact same operator, then we need to add an error.
|
21951
|
+
if (match1(parser, current_token_type)) {
|
21952
|
+
PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_NON_ASSOCIATIVE_OPERATOR, pm_token_type_human(parser->current.type), pm_token_type_human(current_token_type));
|
21953
|
+
break;
|
21954
|
+
}
|
21955
|
+
|
21729
21956
|
// If this is an endless range, then we need to reject a couple of
|
21730
21957
|
// additional operators because it violates the normal operator
|
21731
21958
|
// precedence rules. Those patterns are:
|
@@ -21735,7 +21962,7 @@ parse_expression(pm_parser_t *parser, pm_binding_power_t binding_power, bool acc
|
|
21735
21962
|
//
|
21736
21963
|
if (PM_NODE_TYPE_P(node, PM_RANGE_NODE) && ((pm_range_node_t *) node)->right == NULL) {
|
21737
21964
|
if (match4(parser, PM_TOKEN_UAMPERSAND, PM_TOKEN_USTAR, PM_TOKEN_DOT, PM_TOKEN_AMPERSAND_DOT)) {
|
21738
|
-
PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_NON_ASSOCIATIVE_OPERATOR, pm_token_type_human(parser->current.type), pm_token_type_human(
|
21965
|
+
PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_NON_ASSOCIATIVE_OPERATOR, pm_token_type_human(parser->current.type), pm_token_type_human(current_token_type));
|
21739
21966
|
break;
|
21740
21967
|
}
|
21741
21968
|
|
@@ -21857,6 +22084,7 @@ wrap_statements(pm_parser_t *parser, pm_statements_node_t *statements) {
|
|
21857
22084
|
));
|
21858
22085
|
|
21859
22086
|
pm_arguments_node_arguments_append(arguments, (pm_node_t *) keywords);
|
22087
|
+
pm_node_flag_set((pm_node_t *) arguments, PM_ARGUMENTS_NODE_FLAGS_CONTAINS_KEYWORDS);
|
21860
22088
|
}
|
21861
22089
|
|
21862
22090
|
pm_statements_node_t *wrapped_statements = pm_statements_node_create(parser);
|
@@ -22535,3 +22763,166 @@ pm_serialize_parse_comments(pm_buffer_t *buffer, const uint8_t *source, size_t s
|
|
22535
22763
|
}
|
22536
22764
|
|
22537
22765
|
#endif
|
22766
|
+
|
22767
|
+
/******************************************************************************/
|
22768
|
+
/* Slice queries for the Ruby API */
|
22769
|
+
/******************************************************************************/
|
22770
|
+
|
22771
|
+
/** The category of slice returned from pm_slice_type. */
|
22772
|
+
typedef enum {
|
22773
|
+
/** Returned when the given encoding name is invalid. */
|
22774
|
+
PM_SLICE_TYPE_ERROR = -1,
|
22775
|
+
|
22776
|
+
/** Returned when no other types apply to the slice. */
|
22777
|
+
PM_SLICE_TYPE_NONE,
|
22778
|
+
|
22779
|
+
/** Returned when the slice is a valid local variable name. */
|
22780
|
+
PM_SLICE_TYPE_LOCAL,
|
22781
|
+
|
22782
|
+
/** Returned when the slice is a valid constant name. */
|
22783
|
+
PM_SLICE_TYPE_CONSTANT,
|
22784
|
+
|
22785
|
+
/** Returned when the slice is a valid method name. */
|
22786
|
+
PM_SLICE_TYPE_METHOD_NAME
|
22787
|
+
} pm_slice_type_t;
|
22788
|
+
|
22789
|
+
/**
|
22790
|
+
* Check that the slice is a valid local variable name or constant.
|
22791
|
+
*/
|
22792
|
+
pm_slice_type_t
|
22793
|
+
pm_slice_type(const uint8_t *source, size_t length, const char *encoding_name) {
|
22794
|
+
// first, get the right encoding object
|
22795
|
+
const pm_encoding_t *encoding = pm_encoding_find((const uint8_t *) encoding_name, (const uint8_t *) (encoding_name + strlen(encoding_name)));
|
22796
|
+
if (encoding == NULL) return PM_SLICE_TYPE_ERROR;
|
22797
|
+
|
22798
|
+
// check that there is at least one character
|
22799
|
+
if (length == 0) return PM_SLICE_TYPE_NONE;
|
22800
|
+
|
22801
|
+
size_t width;
|
22802
|
+
if ((width = encoding->alpha_char(source, (ptrdiff_t) length)) != 0) {
|
22803
|
+
// valid because alphabetical
|
22804
|
+
} else if (*source == '_') {
|
22805
|
+
// valid because underscore
|
22806
|
+
width = 1;
|
22807
|
+
} else if ((*source >= 0x80) && ((width = encoding->char_width(source, (ptrdiff_t) length)) > 0)) {
|
22808
|
+
// valid because multibyte
|
22809
|
+
} else {
|
22810
|
+
// invalid because no match
|
22811
|
+
return PM_SLICE_TYPE_NONE;
|
22812
|
+
}
|
22813
|
+
|
22814
|
+
// determine the type of the slice based on the first character
|
22815
|
+
const uint8_t *end = source + length;
|
22816
|
+
pm_slice_type_t result = encoding->isupper_char(source, end - source) ? PM_SLICE_TYPE_CONSTANT : PM_SLICE_TYPE_LOCAL;
|
22817
|
+
|
22818
|
+
// next, iterate through all of the bytes of the string to ensure that they
|
22819
|
+
// are all valid identifier characters
|
22820
|
+
source += width;
|
22821
|
+
|
22822
|
+
while (source < end) {
|
22823
|
+
if ((width = encoding->alnum_char(source, end - source)) != 0) {
|
22824
|
+
// valid because alphanumeric
|
22825
|
+
source += width;
|
22826
|
+
} else if (*source == '_') {
|
22827
|
+
// valid because underscore
|
22828
|
+
source++;
|
22829
|
+
} else if ((*source >= 0x80) && ((width = encoding->char_width(source, end - source)) > 0)) {
|
22830
|
+
// valid because multibyte
|
22831
|
+
source += width;
|
22832
|
+
} else {
|
22833
|
+
// invalid because no match
|
22834
|
+
break;
|
22835
|
+
}
|
22836
|
+
}
|
22837
|
+
|
22838
|
+
// accept a ! or ? at the end of the slice as a method name
|
22839
|
+
if (*source == '!' || *source == '?' || *source == '=') {
|
22840
|
+
source++;
|
22841
|
+
result = PM_SLICE_TYPE_METHOD_NAME;
|
22842
|
+
}
|
22843
|
+
|
22844
|
+
// valid if we are at the end of the slice
|
22845
|
+
return source == end ? result : PM_SLICE_TYPE_NONE;
|
22846
|
+
}
|
22847
|
+
|
22848
|
+
/**
|
22849
|
+
* Check that the slice is a valid local variable name.
|
22850
|
+
*/
|
22851
|
+
PRISM_EXPORTED_FUNCTION pm_string_query_t
|
22852
|
+
pm_string_query_local(const uint8_t *source, size_t length, const char *encoding_name) {
|
22853
|
+
switch (pm_slice_type(source, length, encoding_name)) {
|
22854
|
+
case PM_SLICE_TYPE_ERROR:
|
22855
|
+
return PM_STRING_QUERY_ERROR;
|
22856
|
+
case PM_SLICE_TYPE_NONE:
|
22857
|
+
case PM_SLICE_TYPE_CONSTANT:
|
22858
|
+
case PM_SLICE_TYPE_METHOD_NAME:
|
22859
|
+
return PM_STRING_QUERY_FALSE;
|
22860
|
+
case PM_SLICE_TYPE_LOCAL:
|
22861
|
+
return PM_STRING_QUERY_TRUE;
|
22862
|
+
}
|
22863
|
+
|
22864
|
+
assert(false && "unreachable");
|
22865
|
+
return PM_STRING_QUERY_FALSE;
|
22866
|
+
}
|
22867
|
+
|
22868
|
+
/**
|
22869
|
+
* Check that the slice is a valid constant name.
|
22870
|
+
*/
|
22871
|
+
PRISM_EXPORTED_FUNCTION pm_string_query_t
|
22872
|
+
pm_string_query_constant(const uint8_t *source, size_t length, const char *encoding_name) {
|
22873
|
+
switch (pm_slice_type(source, length, encoding_name)) {
|
22874
|
+
case PM_SLICE_TYPE_ERROR:
|
22875
|
+
return PM_STRING_QUERY_ERROR;
|
22876
|
+
case PM_SLICE_TYPE_NONE:
|
22877
|
+
case PM_SLICE_TYPE_LOCAL:
|
22878
|
+
case PM_SLICE_TYPE_METHOD_NAME:
|
22879
|
+
return PM_STRING_QUERY_FALSE;
|
22880
|
+
case PM_SLICE_TYPE_CONSTANT:
|
22881
|
+
return PM_STRING_QUERY_TRUE;
|
22882
|
+
}
|
22883
|
+
|
22884
|
+
assert(false && "unreachable");
|
22885
|
+
return PM_STRING_QUERY_FALSE;
|
22886
|
+
}
|
22887
|
+
|
22888
|
+
/**
|
22889
|
+
* Check that the slice is a valid method name.
|
22890
|
+
*/
|
22891
|
+
PRISM_EXPORTED_FUNCTION pm_string_query_t
|
22892
|
+
pm_string_query_method_name(const uint8_t *source, size_t length, const char *encoding_name) {
|
22893
|
+
#define B(p) ((p) ? PM_STRING_QUERY_TRUE : PM_STRING_QUERY_FALSE)
|
22894
|
+
#define C1(c) (*source == c)
|
22895
|
+
#define C2(s) (memcmp(source, s, 2) == 0)
|
22896
|
+
#define C3(s) (memcmp(source, s, 3) == 0)
|
22897
|
+
|
22898
|
+
switch (pm_slice_type(source, length, encoding_name)) {
|
22899
|
+
case PM_SLICE_TYPE_ERROR:
|
22900
|
+
return PM_STRING_QUERY_ERROR;
|
22901
|
+
case PM_SLICE_TYPE_NONE:
|
22902
|
+
break;
|
22903
|
+
case PM_SLICE_TYPE_LOCAL:
|
22904
|
+
// numbered parameters are not valid method names
|
22905
|
+
return B((length != 2) || (source[0] != '_') || (source[1] == '0') || !pm_char_is_decimal_digit(source[1]));
|
22906
|
+
case PM_SLICE_TYPE_CONSTANT:
|
22907
|
+
// all constants are valid method names
|
22908
|
+
case PM_SLICE_TYPE_METHOD_NAME:
|
22909
|
+
// all method names are valid method names
|
22910
|
+
return PM_STRING_QUERY_TRUE;
|
22911
|
+
}
|
22912
|
+
|
22913
|
+
switch (length) {
|
22914
|
+
case 1:
|
22915
|
+
return B(C1('&') || C1('`') || C1('!') || C1('^') || C1('>') || C1('<') || C1('-') || C1('%') || C1('|') || C1('+') || C1('/') || C1('*') || C1('~'));
|
22916
|
+
case 2:
|
22917
|
+
return B(C2("!=") || C2("!~") || C2("[]") || C2("==") || C2("=~") || C2(">=") || C2(">>") || C2("<=") || C2("<<") || C2("**"));
|
22918
|
+
case 3:
|
22919
|
+
return B(C3("===") || C3("<=>") || C3("[]="));
|
22920
|
+
default:
|
22921
|
+
return PM_STRING_QUERY_FALSE;
|
22922
|
+
}
|
22923
|
+
|
22924
|
+
#undef B
|
22925
|
+
#undef C1
|
22926
|
+
#undef C2
|
22927
|
+
#undef C3
|
22928
|
+
}
|