prism 1.2.0 → 1.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +46 -1
- data/Makefile +1 -1
- data/config.yml +429 -2
- data/docs/build_system.md +8 -11
- data/docs/releasing.md +1 -1
- data/docs/relocation.md +34 -0
- data/docs/ruby_api.md +1 -1
- data/ext/prism/api_node.c +1824 -1305
- data/ext/prism/extconf.rb +13 -36
- data/ext/prism/extension.c +298 -109
- data/ext/prism/extension.h +4 -4
- data/include/prism/ast.h +442 -2
- data/include/prism/defines.h +26 -8
- data/include/prism/options.h +47 -1
- data/include/prism/util/pm_buffer.h +10 -0
- data/include/prism/version.h +2 -2
- data/include/prism.h +51 -4
- data/lib/prism/dot_visitor.rb +26 -0
- data/lib/prism/dsl.rb +14 -6
- data/lib/prism/ffi.rb +93 -28
- data/lib/prism/inspect_visitor.rb +4 -1
- data/lib/prism/node.rb +1886 -105
- data/lib/prism/parse_result/errors.rb +1 -1
- data/lib/prism/parse_result/newlines.rb +1 -1
- data/lib/prism/parse_result.rb +54 -2
- data/lib/prism/polyfill/append_as_bytes.rb +15 -0
- data/lib/prism/reflection.rb +4 -4
- data/lib/prism/relocation.rb +504 -0
- data/lib/prism/serialize.rb +1252 -765
- data/lib/prism/string_query.rb +30 -0
- data/lib/prism/translation/parser/builder.rb +61 -0
- data/lib/prism/translation/parser/compiler.rb +228 -162
- data/lib/prism/translation/parser/lexer.rb +435 -61
- data/lib/prism/translation/parser.rb +51 -3
- data/lib/prism/translation/parser35.rb +12 -0
- data/lib/prism/translation/ripper.rb +13 -3
- data/lib/prism/translation/ruby_parser.rb +17 -7
- data/lib/prism/translation.rb +1 -0
- data/lib/prism.rb +9 -7
- data/prism.gemspec +11 -1
- data/rbi/prism/dsl.rbi +10 -7
- data/rbi/prism/node.rbi +44 -17
- data/rbi/prism/parse_result.rbi +17 -0
- data/rbi/prism/string_query.rbi +12 -0
- data/rbi/prism/translation/parser35.rbi +6 -0
- data/rbi/prism.rbi +39 -36
- data/sig/prism/dsl.rbs +6 -4
- data/sig/prism/node.rbs +29 -15
- data/sig/prism/parse_result.rbs +10 -0
- data/sig/prism/relocation.rbs +185 -0
- data/sig/prism/serialize.rbs +4 -2
- data/sig/prism/string_query.rbs +11 -0
- data/sig/prism.rbs +22 -1
- data/src/diagnostic.c +2 -2
- data/src/node.c +39 -0
- data/src/options.c +31 -0
- data/src/prettyprint.c +62 -0
- data/src/prism.c +738 -199
- data/src/regexp.c +7 -3
- data/src/serialize.c +18 -0
- data/src/static_literals.c +1 -1
- data/src/util/pm_buffer.c +40 -0
- data/src/util/pm_char.c +1 -1
- data/src/util/pm_constant_pool.c +6 -2
- data/src/util/pm_string.c +1 -0
- data/src/util/pm_strncasecmp.c +13 -1
- metadata +13 -7
data/src/prism.c
CHANGED
@@ -1649,22 +1649,25 @@ pm_arguments_validate_block(pm_parser_t *parser, pm_arguments_t *arguments, pm_b
|
|
1649
1649
|
* the function pointer or can just directly use the UTF-8 functions.
|
1650
1650
|
*/
|
1651
1651
|
static inline size_t
|
1652
|
-
char_is_identifier_start(const pm_parser_t *parser, const uint8_t *b) {
|
1652
|
+
char_is_identifier_start(const pm_parser_t *parser, const uint8_t *b, ptrdiff_t n) {
|
1653
|
+
if (n <= 0) return 0;
|
1654
|
+
|
1653
1655
|
if (parser->encoding_changed) {
|
1654
1656
|
size_t width;
|
1655
|
-
|
1657
|
+
|
1658
|
+
if ((width = parser->encoding->alpha_char(b, n)) != 0) {
|
1656
1659
|
return width;
|
1657
1660
|
} else if (*b == '_') {
|
1658
1661
|
return 1;
|
1659
1662
|
} else if (*b >= 0x80) {
|
1660
|
-
return parser->encoding->char_width(b,
|
1663
|
+
return parser->encoding->char_width(b, n);
|
1661
1664
|
} else {
|
1662
1665
|
return 0;
|
1663
1666
|
}
|
1664
1667
|
} else if (*b < 0x80) {
|
1665
1668
|
return (pm_encoding_unicode_table[*b] & PRISM_ENCODING_ALPHABETIC_BIT ? 1 : 0) || (*b == '_');
|
1666
1669
|
} else {
|
1667
|
-
return pm_encoding_utf_8_char_width(b,
|
1670
|
+
return pm_encoding_utf_8_char_width(b, n);
|
1668
1671
|
}
|
1669
1672
|
}
|
1670
1673
|
|
@@ -1673,11 +1676,13 @@ char_is_identifier_start(const pm_parser_t *parser, const uint8_t *b) {
|
|
1673
1676
|
* has not been changed.
|
1674
1677
|
*/
|
1675
1678
|
static inline size_t
|
1676
|
-
char_is_identifier_utf8(const uint8_t *b,
|
1677
|
-
if (
|
1679
|
+
char_is_identifier_utf8(const uint8_t *b, ptrdiff_t n) {
|
1680
|
+
if (n <= 0) {
|
1681
|
+
return 0;
|
1682
|
+
} else if (*b < 0x80) {
|
1678
1683
|
return (*b == '_') || (pm_encoding_unicode_table[*b] & PRISM_ENCODING_ALPHANUMERIC_BIT ? 1 : 0);
|
1679
1684
|
} else {
|
1680
|
-
return pm_encoding_utf_8_char_width(b,
|
1685
|
+
return pm_encoding_utf_8_char_width(b, n);
|
1681
1686
|
}
|
1682
1687
|
}
|
1683
1688
|
|
@@ -1687,20 +1692,24 @@ char_is_identifier_utf8(const uint8_t *b, const uint8_t *end) {
|
|
1687
1692
|
* it's important that it be as fast as possible.
|
1688
1693
|
*/
|
1689
1694
|
static inline size_t
|
1690
|
-
char_is_identifier(const pm_parser_t *parser, const uint8_t *b) {
|
1691
|
-
if (
|
1695
|
+
char_is_identifier(const pm_parser_t *parser, const uint8_t *b, ptrdiff_t n) {
|
1696
|
+
if (n <= 0) {
|
1697
|
+
return 0;
|
1698
|
+
} else if (parser->encoding_changed) {
|
1692
1699
|
size_t width;
|
1693
|
-
|
1700
|
+
|
1701
|
+
if ((width = parser->encoding->alnum_char(b, n)) != 0) {
|
1694
1702
|
return width;
|
1695
1703
|
} else if (*b == '_') {
|
1696
1704
|
return 1;
|
1697
1705
|
} else if (*b >= 0x80) {
|
1698
|
-
return parser->encoding->char_width(b,
|
1706
|
+
return parser->encoding->char_width(b, n);
|
1699
1707
|
} else {
|
1700
1708
|
return 0;
|
1701
1709
|
}
|
1710
|
+
} else {
|
1711
|
+
return char_is_identifier_utf8(b, n);
|
1702
1712
|
}
|
1703
|
-
return char_is_identifier_utf8(b, parser->end);
|
1704
1713
|
}
|
1705
1714
|
|
1706
1715
|
// Here we're defining a perfect hash for the characters that are allowed in
|
@@ -1731,9 +1740,10 @@ char_is_global_name_punctuation(const uint8_t b) {
|
|
1731
1740
|
static inline bool
|
1732
1741
|
token_is_setter_name(pm_token_t *token) {
|
1733
1742
|
return (
|
1734
|
-
(token->type ==
|
1743
|
+
(token->type == PM_TOKEN_BRACKET_LEFT_RIGHT_EQUAL) ||
|
1744
|
+
((token->type == PM_TOKEN_IDENTIFIER) &&
|
1735
1745
|
(token->end - token->start >= 2) &&
|
1736
|
-
(token->end[-1] == '=')
|
1746
|
+
(token->end[-1] == '='))
|
1737
1747
|
);
|
1738
1748
|
}
|
1739
1749
|
|
@@ -2895,7 +2905,7 @@ pm_call_node_writable_p(const pm_parser_t *parser, const pm_call_node_t *node) {
|
|
2895
2905
|
(node->message_loc.start != NULL) &&
|
2896
2906
|
(node->message_loc.end[-1] != '!') &&
|
2897
2907
|
(node->message_loc.end[-1] != '?') &&
|
2898
|
-
char_is_identifier_start(parser, node->message_loc.start) &&
|
2908
|
+
char_is_identifier_start(parser, node->message_loc.start, parser->end - node->message_loc.start) &&
|
2899
2909
|
(node->opening_loc.start == NULL) &&
|
2900
2910
|
(node->arguments == NULL) &&
|
2901
2911
|
(node->block == NULL)
|
@@ -4142,7 +4152,7 @@ pm_double_parse(pm_parser_t *parser, const pm_token_t *token) {
|
|
4142
4152
|
|
4143
4153
|
// If errno is set, then it should only be ERANGE. At this point we need to
|
4144
4154
|
// check if it's infinity (it should be).
|
4145
|
-
if (errno == ERANGE &&
|
4155
|
+
if (errno == ERANGE && PRISM_ISINF(value)) {
|
4146
4156
|
int warn_width;
|
4147
4157
|
const char *ellipsis;
|
4148
4158
|
|
@@ -5318,6 +5328,12 @@ pm_interpolated_string_node_append(pm_interpolated_string_node_t *node, pm_node_
|
|
5318
5328
|
// should clear the mutability flags.
|
5319
5329
|
CLEAR_FLAGS(node);
|
5320
5330
|
break;
|
5331
|
+
case PM_X_STRING_NODE:
|
5332
|
+
case PM_INTERPOLATED_X_STRING_NODE:
|
5333
|
+
// If this is an x string, then this is a syntax error. But we want
|
5334
|
+
// to handle it here so that we don't fail the assertion.
|
5335
|
+
CLEAR_FLAGS(node);
|
5336
|
+
break;
|
5321
5337
|
default:
|
5322
5338
|
assert(false && "unexpected node type");
|
5323
5339
|
break;
|
@@ -5652,7 +5668,7 @@ pm_lambda_node_create(
|
|
5652
5668
|
*/
|
5653
5669
|
static pm_local_variable_and_write_node_t *
|
5654
5670
|
pm_local_variable_and_write_node_create(pm_parser_t *parser, pm_node_t *target, const pm_token_t *operator, pm_node_t *value, pm_constant_id_t name, uint32_t depth) {
|
5655
|
-
assert(PM_NODE_TYPE_P(target, PM_LOCAL_VARIABLE_READ_NODE) || PM_NODE_TYPE_P(target, PM_CALL_NODE));
|
5671
|
+
assert(PM_NODE_TYPE_P(target, PM_LOCAL_VARIABLE_READ_NODE) || PM_NODE_TYPE_P(target, PM_IT_LOCAL_VARIABLE_READ_NODE) || PM_NODE_TYPE_P(target, PM_CALL_NODE));
|
5656
5672
|
assert(operator->type == PM_TOKEN_AMPERSAND_AMPERSAND_EQUAL);
|
5657
5673
|
pm_local_variable_and_write_node_t *node = PM_NODE_ALLOC(parser, pm_local_variable_and_write_node_t);
|
5658
5674
|
|
@@ -5707,7 +5723,7 @@ pm_local_variable_operator_write_node_create(pm_parser_t *parser, pm_node_t *tar
|
|
5707
5723
|
*/
|
5708
5724
|
static pm_local_variable_or_write_node_t *
|
5709
5725
|
pm_local_variable_or_write_node_create(pm_parser_t *parser, pm_node_t *target, const pm_token_t *operator, pm_node_t *value, pm_constant_id_t name, uint32_t depth) {
|
5710
|
-
assert(PM_NODE_TYPE_P(target, PM_LOCAL_VARIABLE_READ_NODE) || PM_NODE_TYPE_P(target, PM_CALL_NODE));
|
5726
|
+
assert(PM_NODE_TYPE_P(target, PM_LOCAL_VARIABLE_READ_NODE) || PM_NODE_TYPE_P(target, PM_IT_LOCAL_VARIABLE_READ_NODE) || PM_NODE_TYPE_P(target, PM_CALL_NODE));
|
5711
5727
|
assert(operator->type == PM_TOKEN_PIPE_PIPE_EQUAL);
|
5712
5728
|
pm_local_variable_or_write_node_t *node = PM_NODE_ALLOC(parser, pm_local_variable_or_write_node_t);
|
5713
5729
|
|
@@ -6159,7 +6175,10 @@ pm_numbered_reference_read_node_number(pm_parser_t *parser, const pm_token_t *to
|
|
6159
6175
|
const uint8_t *end = token->end;
|
6160
6176
|
|
6161
6177
|
ptrdiff_t diff = end - start;
|
6162
|
-
assert(diff > 0
|
6178
|
+
assert(diff > 0);
|
6179
|
+
#if PTRDIFF_MAX > SIZE_MAX
|
6180
|
+
assert(diff < (ptrdiff_t) SIZE_MAX);
|
6181
|
+
#endif
|
6163
6182
|
size_t length = (size_t) diff;
|
6164
6183
|
|
6165
6184
|
char *digits = xcalloc(length + 1, sizeof(char));
|
@@ -6393,12 +6412,13 @@ pm_program_node_create(pm_parser_t *parser, pm_constant_id_list_t *locals, pm_st
|
|
6393
6412
|
* Allocate and initialize new ParenthesesNode node.
|
6394
6413
|
*/
|
6395
6414
|
static pm_parentheses_node_t *
|
6396
|
-
pm_parentheses_node_create(pm_parser_t *parser, const pm_token_t *opening, pm_node_t *body, const pm_token_t *closing) {
|
6415
|
+
pm_parentheses_node_create(pm_parser_t *parser, const pm_token_t *opening, pm_node_t *body, const pm_token_t *closing, pm_node_flags_t flags) {
|
6397
6416
|
pm_parentheses_node_t *node = PM_NODE_ALLOC(parser, pm_parentheses_node_t);
|
6398
6417
|
|
6399
6418
|
*node = (pm_parentheses_node_t) {
|
6400
6419
|
{
|
6401
6420
|
.type = PM_PARENTHESES_NODE,
|
6421
|
+
.flags = flags,
|
6402
6422
|
.node_id = PM_NODE_IDENTIFY(parser),
|
6403
6423
|
.location = {
|
6404
6424
|
.start = opening->start,
|
@@ -6665,6 +6685,7 @@ pm_rescue_node_create(pm_parser_t *parser, const pm_token_t *keyword) {
|
|
6665
6685
|
},
|
6666
6686
|
.keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword),
|
6667
6687
|
.operator_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE,
|
6688
|
+
.then_keyword_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE,
|
6668
6689
|
.reference = NULL,
|
6669
6690
|
.statements = NULL,
|
6670
6691
|
.subsequent = NULL,
|
@@ -7684,7 +7705,7 @@ pm_loop_modifier_block_exits(pm_parser_t *parser, pm_statements_node_t *statemen
|
|
7684
7705
|
* Allocate a new UntilNode node.
|
7685
7706
|
*/
|
7686
7707
|
static pm_until_node_t *
|
7687
|
-
pm_until_node_create(pm_parser_t *parser, const pm_token_t *keyword, const pm_token_t *closing, pm_node_t *predicate, pm_statements_node_t *statements, pm_node_flags_t flags) {
|
7708
|
+
pm_until_node_create(pm_parser_t *parser, const pm_token_t *keyword, const pm_token_t *do_keyword, const pm_token_t *closing, pm_node_t *predicate, pm_statements_node_t *statements, pm_node_flags_t flags) {
|
7688
7709
|
pm_until_node_t *node = PM_NODE_ALLOC(parser, pm_until_node_t);
|
7689
7710
|
pm_conditional_predicate(parser, predicate, PM_CONDITIONAL_PREDICATE_TYPE_CONDITIONAL);
|
7690
7711
|
|
@@ -7699,6 +7720,7 @@ pm_until_node_create(pm_parser_t *parser, const pm_token_t *keyword, const pm_to
|
|
7699
7720
|
},
|
7700
7721
|
},
|
7701
7722
|
.keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword),
|
7723
|
+
.do_keyword_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(do_keyword),
|
7702
7724
|
.closing_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(closing),
|
7703
7725
|
.predicate = predicate,
|
7704
7726
|
.statements = statements
|
@@ -7727,6 +7749,7 @@ pm_until_node_modifier_create(pm_parser_t *parser, const pm_token_t *keyword, pm
|
|
7727
7749
|
},
|
7728
7750
|
},
|
7729
7751
|
.keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword),
|
7752
|
+
.do_keyword_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE,
|
7730
7753
|
.closing_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE,
|
7731
7754
|
.predicate = predicate,
|
7732
7755
|
.statements = statements
|
@@ -7794,7 +7817,7 @@ pm_when_node_statements_set(pm_when_node_t *node, pm_statements_node_t *statemen
|
|
7794
7817
|
* Allocate a new WhileNode node.
|
7795
7818
|
*/
|
7796
7819
|
static pm_while_node_t *
|
7797
|
-
pm_while_node_create(pm_parser_t *parser, const pm_token_t *keyword, const pm_token_t *closing, pm_node_t *predicate, pm_statements_node_t *statements, pm_node_flags_t flags) {
|
7820
|
+
pm_while_node_create(pm_parser_t *parser, const pm_token_t *keyword, const pm_token_t *do_keyword, const pm_token_t *closing, pm_node_t *predicate, pm_statements_node_t *statements, pm_node_flags_t flags) {
|
7798
7821
|
pm_while_node_t *node = PM_NODE_ALLOC(parser, pm_while_node_t);
|
7799
7822
|
pm_conditional_predicate(parser, predicate, PM_CONDITIONAL_PREDICATE_TYPE_CONDITIONAL);
|
7800
7823
|
|
@@ -7809,6 +7832,7 @@ pm_while_node_create(pm_parser_t *parser, const pm_token_t *keyword, const pm_to
|
|
7809
7832
|
},
|
7810
7833
|
},
|
7811
7834
|
.keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword),
|
7835
|
+
.do_keyword_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(do_keyword),
|
7812
7836
|
.closing_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(closing),
|
7813
7837
|
.predicate = predicate,
|
7814
7838
|
.statements = statements
|
@@ -7837,6 +7861,7 @@ pm_while_node_modifier_create(pm_parser_t *parser, const pm_token_t *keyword, pm
|
|
7837
7861
|
},
|
7838
7862
|
},
|
7839
7863
|
.keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword),
|
7864
|
+
.do_keyword_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE,
|
7840
7865
|
.closing_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE,
|
7841
7866
|
.predicate = predicate,
|
7842
7867
|
.statements = statements
|
@@ -7859,6 +7884,7 @@ pm_while_node_synthesized_create(pm_parser_t *parser, pm_node_t *predicate, pm_s
|
|
7859
7884
|
.location = PM_LOCATION_NULL_VALUE(parser)
|
7860
7885
|
},
|
7861
7886
|
.keyword_loc = PM_LOCATION_NULL_VALUE(parser),
|
7887
|
+
.do_keyword_loc = PM_LOCATION_NULL_VALUE(parser),
|
7862
7888
|
.closing_loc = PM_LOCATION_NULL_VALUE(parser),
|
7863
7889
|
.predicate = predicate,
|
7864
7890
|
.statements = statements
|
@@ -9077,10 +9103,10 @@ lex_global_variable(pm_parser_t *parser) {
|
|
9077
9103
|
parser->current.end++;
|
9078
9104
|
size_t width;
|
9079
9105
|
|
9080
|
-
if (parser->current.end
|
9106
|
+
if ((width = char_is_identifier(parser, parser->current.end, parser->end - parser->current.end)) > 0) {
|
9081
9107
|
do {
|
9082
9108
|
parser->current.end += width;
|
9083
|
-
} while (parser->current.end
|
9109
|
+
} while ((width = char_is_identifier(parser, parser->current.end, parser->end - parser->current.end)) > 0);
|
9084
9110
|
|
9085
9111
|
// $0 isn't allowed to be followed by anything.
|
9086
9112
|
pm_diagnostic_id_t diag_id = parser->version == PM_OPTIONS_VERSION_CRUBY_3_3 ? PM_ERR_INVALID_VARIABLE_GLOBAL_3_3 : PM_ERR_INVALID_VARIABLE_GLOBAL;
|
@@ -9105,14 +9131,14 @@ lex_global_variable(pm_parser_t *parser) {
|
|
9105
9131
|
case '-':
|
9106
9132
|
parser->current.end++;
|
9107
9133
|
allow_multiple = false;
|
9108
|
-
|
9134
|
+
PRISM_FALLTHROUGH
|
9109
9135
|
default: {
|
9110
9136
|
size_t width;
|
9111
9137
|
|
9112
|
-
if ((width = char_is_identifier(parser, parser->current.end)) > 0) {
|
9138
|
+
if ((width = char_is_identifier(parser, parser->current.end, parser->end - parser->current.end)) > 0) {
|
9113
9139
|
do {
|
9114
9140
|
parser->current.end += width;
|
9115
|
-
} while (allow_multiple && parser->current.end
|
9141
|
+
} while (allow_multiple && (width = char_is_identifier(parser, parser->current.end, parser->end - parser->current.end)) > 0);
|
9116
9142
|
} else if (pm_char_is_whitespace(peek(parser))) {
|
9117
9143
|
// If we get here, then we have a $ followed by whitespace,
|
9118
9144
|
// which is not allowed.
|
@@ -9177,11 +9203,11 @@ lex_identifier(pm_parser_t *parser, bool previous_command_start) {
|
|
9177
9203
|
bool encoding_changed = parser->encoding_changed;
|
9178
9204
|
|
9179
9205
|
if (encoding_changed) {
|
9180
|
-
while (
|
9206
|
+
while ((width = char_is_identifier(parser, current_end, end - current_end)) > 0) {
|
9181
9207
|
current_end += width;
|
9182
9208
|
}
|
9183
9209
|
} else {
|
9184
|
-
while (
|
9210
|
+
while ((width = char_is_identifier_utf8(current_end, end - current_end)) > 0) {
|
9185
9211
|
current_end += width;
|
9186
9212
|
}
|
9187
9213
|
}
|
@@ -9355,7 +9381,7 @@ lex_interpolation(pm_parser_t *parser, const uint8_t *pound) {
|
|
9355
9381
|
const uint8_t *variable = pound + 2;
|
9356
9382
|
if (*variable == '@' && pound + 3 < parser->end) variable++;
|
9357
9383
|
|
9358
|
-
if (char_is_identifier_start(parser, variable)) {
|
9384
|
+
if (char_is_identifier_start(parser, variable, parser->end - variable)) {
|
9359
9385
|
// At this point we're sure that we've either hit an embedded instance
|
9360
9386
|
// or class variable. In this case we'll first need to check if we've
|
9361
9387
|
// already consumed content.
|
@@ -9404,7 +9430,7 @@ lex_interpolation(pm_parser_t *parser, const uint8_t *pound) {
|
|
9404
9430
|
// or a global name punctuation character, then we've hit an embedded
|
9405
9431
|
// global variable.
|
9406
9432
|
if (
|
9407
|
-
char_is_identifier_start(parser, check) ||
|
9433
|
+
char_is_identifier_start(parser, check, parser->end - check) ||
|
9408
9434
|
(pound[2] != '-' && (pm_char_is_decimal_digit(pound[2]) || char_is_global_name_punctuation(pound[2])))
|
9409
9435
|
) {
|
9410
9436
|
// In this case we've hit an embedded global variable. First check to
|
@@ -9536,21 +9562,7 @@ escape_write_unicode(pm_parser_t *parser, pm_buffer_t *buffer, const uint8_t fla
|
|
9536
9562
|
parser->explicit_encoding = PM_ENCODING_UTF_8_ENTRY;
|
9537
9563
|
}
|
9538
9564
|
|
9539
|
-
if (value
|
9540
|
-
pm_buffer_append_byte(buffer, (uint8_t) value);
|
9541
|
-
} else if (value <= 0x7FF) { // 110xxxxx 10xxxxxx
|
9542
|
-
pm_buffer_append_byte(buffer, (uint8_t) (0xC0 | (value >> 6)));
|
9543
|
-
pm_buffer_append_byte(buffer, (uint8_t) (0x80 | (value & 0x3F)));
|
9544
|
-
} else if (value <= 0xFFFF) { // 1110xxxx 10xxxxxx 10xxxxxx
|
9545
|
-
pm_buffer_append_byte(buffer, (uint8_t) (0xE0 | (value >> 12)));
|
9546
|
-
pm_buffer_append_byte(buffer, (uint8_t) (0x80 | ((value >> 6) & 0x3F)));
|
9547
|
-
pm_buffer_append_byte(buffer, (uint8_t) (0x80 | (value & 0x3F)));
|
9548
|
-
} else if (value <= 0x10FFFF) { // 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
|
9549
|
-
pm_buffer_append_byte(buffer, (uint8_t) (0xF0 | (value >> 18)));
|
9550
|
-
pm_buffer_append_byte(buffer, (uint8_t) (0x80 | ((value >> 12) & 0x3F)));
|
9551
|
-
pm_buffer_append_byte(buffer, (uint8_t) (0x80 | ((value >> 6) & 0x3F)));
|
9552
|
-
pm_buffer_append_byte(buffer, (uint8_t) (0x80 | (value & 0x3F)));
|
9553
|
-
} else {
|
9565
|
+
if (!pm_buffer_append_unicode_codepoint(buffer, value)) {
|
9554
9566
|
pm_parser_err(parser, start, end, PM_ERR_ESCAPE_INVALID_UNICODE);
|
9555
9567
|
pm_buffer_append_byte(buffer, 0xEF);
|
9556
9568
|
pm_buffer_append_byte(buffer, 0xBF);
|
@@ -9575,28 +9587,6 @@ escape_write_byte_encoded(pm_parser_t *parser, pm_buffer_t *buffer, uint8_t byte
|
|
9575
9587
|
pm_buffer_append_byte(buffer, byte);
|
9576
9588
|
}
|
9577
9589
|
|
9578
|
-
/**
|
9579
|
-
* Write each byte of the given escaped character into the buffer.
|
9580
|
-
*/
|
9581
|
-
static inline void
|
9582
|
-
escape_write_escape_encoded(pm_parser_t *parser, pm_buffer_t *buffer) {
|
9583
|
-
size_t width;
|
9584
|
-
if (parser->encoding_changed) {
|
9585
|
-
width = parser->encoding->char_width(parser->current.end, parser->end - parser->current.end);
|
9586
|
-
} else {
|
9587
|
-
width = pm_encoding_utf_8_char_width(parser->current.end, parser->end - parser->current.end);
|
9588
|
-
}
|
9589
|
-
|
9590
|
-
// TODO: If the character is invalid in the given encoding, then we'll just
|
9591
|
-
// push one byte into the buffer. This should actually be an error.
|
9592
|
-
width = (width == 0) ? 1 : width;
|
9593
|
-
|
9594
|
-
for (size_t index = 0; index < width; index++) {
|
9595
|
-
escape_write_byte_encoded(parser, buffer, *parser->current.end);
|
9596
|
-
parser->current.end++;
|
9597
|
-
}
|
9598
|
-
}
|
9599
|
-
|
9600
9590
|
/**
|
9601
9591
|
* The regular expression engine doesn't support the same escape sequences as
|
9602
9592
|
* Ruby does. So first we have to read the escape sequence, and then we have to
|
@@ -9621,6 +9611,33 @@ escape_write_byte(pm_parser_t *parser, pm_buffer_t *buffer, pm_buffer_t *regular
|
|
9621
9611
|
escape_write_byte_encoded(parser, buffer, byte);
|
9622
9612
|
}
|
9623
9613
|
|
9614
|
+
/**
|
9615
|
+
* Write each byte of the given escaped character into the buffer.
|
9616
|
+
*/
|
9617
|
+
static inline void
|
9618
|
+
escape_write_escape_encoded(pm_parser_t *parser, pm_buffer_t *buffer, pm_buffer_t *regular_expression_buffer, uint8_t flags) {
|
9619
|
+
size_t width;
|
9620
|
+
if (parser->encoding_changed) {
|
9621
|
+
width = parser->encoding->char_width(parser->current.end, parser->end - parser->current.end);
|
9622
|
+
} else {
|
9623
|
+
width = pm_encoding_utf_8_char_width(parser->current.end, parser->end - parser->current.end);
|
9624
|
+
}
|
9625
|
+
|
9626
|
+
if (width == 1) {
|
9627
|
+
escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte(*parser->current.end++, flags));
|
9628
|
+
} else if (width > 1) {
|
9629
|
+
// Valid multibyte character. Just ignore escape.
|
9630
|
+
pm_buffer_t *b = (flags & PM_ESCAPE_FLAG_REGEXP) ? regular_expression_buffer : buffer;
|
9631
|
+
pm_buffer_append_bytes(b, parser->current.end, width);
|
9632
|
+
parser->current.end += width;
|
9633
|
+
} else {
|
9634
|
+
// Assume the next character wasn't meant to be part of this escape
|
9635
|
+
// sequence since it is invalid. Add an error and move on.
|
9636
|
+
parser->current.end++;
|
9637
|
+
pm_parser_err_current(parser, PM_ERR_ESCAPE_INVALID_CONTROL);
|
9638
|
+
}
|
9639
|
+
}
|
9640
|
+
|
9624
9641
|
/**
|
9625
9642
|
* Warn about using a space or a tab character in an escape, as opposed to using
|
9626
9643
|
* \\s or \\t. Note that we can quite copy the source because the warning
|
@@ -9647,7 +9664,8 @@ escape_read_warn(pm_parser_t *parser, uint8_t flags, uint8_t flag, const char *t
|
|
9647
9664
|
*/
|
9648
9665
|
static void
|
9649
9666
|
escape_read(pm_parser_t *parser, pm_buffer_t *buffer, pm_buffer_t *regular_expression_buffer, uint8_t flags) {
|
9650
|
-
|
9667
|
+
uint8_t peeked = peek(parser);
|
9668
|
+
switch (peeked) {
|
9651
9669
|
case '\\': {
|
9652
9670
|
parser->current.end++;
|
9653
9671
|
escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte('\\', flags));
|
@@ -9717,6 +9735,7 @@ escape_read(pm_parser_t *parser, pm_buffer_t *buffer, pm_buffer_t *regular_expre
|
|
9717
9735
|
}
|
9718
9736
|
}
|
9719
9737
|
|
9738
|
+
value = escape_byte(value, flags);
|
9720
9739
|
escape_write_byte(parser, buffer, regular_expression_buffer, flags, value);
|
9721
9740
|
return;
|
9722
9741
|
}
|
@@ -9765,7 +9784,7 @@ escape_read(pm_parser_t *parser, pm_buffer_t *buffer, pm_buffer_t *regular_expre
|
|
9765
9784
|
|
9766
9785
|
size_t whitespace;
|
9767
9786
|
while (true) {
|
9768
|
-
if ((whitespace =
|
9787
|
+
if ((whitespace = pm_strspn_inline_whitespace(parser->current.end, parser->end - parser->current.end)) > 0) {
|
9769
9788
|
parser->current.end += whitespace;
|
9770
9789
|
} else if (peek(parser) == '\\' && peek_offset(parser, 1) == 'n') {
|
9771
9790
|
// This is super hacky, but it gets us nicer error
|
@@ -9813,7 +9832,7 @@ escape_read(pm_parser_t *parser, pm_buffer_t *buffer, pm_buffer_t *regular_expre
|
|
9813
9832
|
uint32_t value = escape_unicode(parser, unicode_start, hexadecimal_length);
|
9814
9833
|
escape_write_unicode(parser, buffer, flags, unicode_start, parser->current.end, value);
|
9815
9834
|
|
9816
|
-
parser->current.end +=
|
9835
|
+
parser->current.end += pm_strspn_inline_whitespace(parser->current.end, parser->end - parser->current.end);
|
9817
9836
|
}
|
9818
9837
|
|
9819
9838
|
// ?\u{nnnn} character literal should contain only one codepoint
|
@@ -10041,11 +10060,16 @@ escape_read(pm_parser_t *parser, pm_buffer_t *buffer, pm_buffer_t *regular_expre
|
|
10041
10060
|
escape_write_byte_encoded(parser, buffer, escape_byte('\n', flags));
|
10042
10061
|
return;
|
10043
10062
|
}
|
10063
|
+
PRISM_FALLTHROUGH
|
10044
10064
|
}
|
10045
|
-
/* fallthrough */
|
10046
10065
|
default: {
|
10066
|
+
if ((flags & (PM_ESCAPE_FLAG_CONTROL | PM_ESCAPE_FLAG_META)) && !char_is_ascii_printable(peeked)) {
|
10067
|
+
size_t width = parser->encoding->char_width(parser->current.end, parser->end - parser->current.end);
|
10068
|
+
pm_parser_err(parser, parser->current.start, parser->current.end + width, PM_ERR_ESCAPE_INVALID_META);
|
10069
|
+
return;
|
10070
|
+
}
|
10047
10071
|
if (parser->current.end < parser->end) {
|
10048
|
-
escape_write_escape_encoded(parser, buffer);
|
10072
|
+
escape_write_escape_encoded(parser, buffer, regular_expression_buffer, flags);
|
10049
10073
|
} else {
|
10050
10074
|
pm_parser_err_current(parser, PM_ERR_INVALID_ESCAPE_CHARACTER);
|
10051
10075
|
}
|
@@ -10118,7 +10142,7 @@ lex_question_mark(pm_parser_t *parser) {
|
|
10118
10142
|
!(parser->encoding->alnum_char(parser->current.end, parser->end - parser->current.end) || peek(parser) == '_') ||
|
10119
10143
|
(
|
10120
10144
|
(parser->current.end + encoding_width >= parser->end) ||
|
10121
|
-
!char_is_identifier(parser, parser->current.end + encoding_width)
|
10145
|
+
!char_is_identifier(parser, parser->current.end + encoding_width, parser->end - (parser->current.end + encoding_width))
|
10122
10146
|
)
|
10123
10147
|
) {
|
10124
10148
|
lex_state_set(parser, PM_LEX_STATE_END);
|
@@ -10138,21 +10162,22 @@ lex_question_mark(pm_parser_t *parser) {
|
|
10138
10162
|
static pm_token_type_t
|
10139
10163
|
lex_at_variable(pm_parser_t *parser) {
|
10140
10164
|
pm_token_type_t type = match(parser, '@') ? PM_TOKEN_CLASS_VARIABLE : PM_TOKEN_INSTANCE_VARIABLE;
|
10141
|
-
|
10165
|
+
const uint8_t *end = parser->end;
|
10142
10166
|
|
10143
|
-
|
10167
|
+
size_t width;
|
10168
|
+
if ((width = char_is_identifier_start(parser, parser->current.end, end - parser->current.end)) > 0) {
|
10144
10169
|
parser->current.end += width;
|
10145
10170
|
|
10146
|
-
while (
|
10171
|
+
while ((width = char_is_identifier(parser, parser->current.end, end - parser->current.end)) > 0) {
|
10147
10172
|
parser->current.end += width;
|
10148
10173
|
}
|
10149
|
-
} else if (parser->current.end <
|
10174
|
+
} else if (parser->current.end < end && pm_char_is_decimal_digit(*parser->current.end)) {
|
10150
10175
|
pm_diagnostic_id_t diag_id = (type == PM_TOKEN_CLASS_VARIABLE) ? PM_ERR_INCOMPLETE_VARIABLE_CLASS : PM_ERR_INCOMPLETE_VARIABLE_INSTANCE;
|
10151
10176
|
if (parser->version == PM_OPTIONS_VERSION_CRUBY_3_3) {
|
10152
10177
|
diag_id = (type == PM_TOKEN_CLASS_VARIABLE) ? PM_ERR_INCOMPLETE_VARIABLE_CLASS_3_3 : PM_ERR_INCOMPLETE_VARIABLE_INSTANCE_3_3;
|
10153
10178
|
}
|
10154
10179
|
|
10155
|
-
size_t width = parser->encoding->char_width(parser->current.end,
|
10180
|
+
size_t width = parser->encoding->char_width(parser->current.end, end - parser->current.end);
|
10156
10181
|
PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, diag_id, (int) ((parser->current.end + width) - parser->current.start), (const char *) parser->current.start);
|
10157
10182
|
} else {
|
10158
10183
|
pm_diagnostic_id_t diag_id = (type == PM_TOKEN_CLASS_VARIABLE) ? PM_ERR_CLASS_VARIABLE_BARE : PM_ERR_INSTANCE_VARIABLE_BARE;
|
@@ -10496,6 +10521,7 @@ pm_token_buffer_escape(pm_parser_t *parser, pm_token_buffer_t *token_buffer) {
|
|
10496
10521
|
}
|
10497
10522
|
|
10498
10523
|
const uint8_t *end = parser->current.end - 1;
|
10524
|
+
assert(end >= start);
|
10499
10525
|
pm_buffer_append_bytes(&token_buffer->buffer, start, (size_t) (end - start));
|
10500
10526
|
|
10501
10527
|
token_buffer->cursor = end;
|
@@ -10576,9 +10602,15 @@ pm_lex_percent_delimiter(pm_parser_t *parser) {
|
|
10576
10602
|
pm_newline_list_append(&parser->newline_list, parser->current.end + eol_length - 1);
|
10577
10603
|
}
|
10578
10604
|
|
10579
|
-
|
10580
|
-
|
10605
|
+
uint8_t delimiter = *parser->current.end;
|
10606
|
+
|
10607
|
+
// If our delimiter is \r\n, we want to treat it as if it's \n.
|
10608
|
+
// For example, %\r\nfoo\r\n should be "foo"
|
10609
|
+
if (eol_length == 2) {
|
10610
|
+
delimiter = *(parser->current.end + 1);
|
10611
|
+
}
|
10581
10612
|
|
10613
|
+
parser->current.end += eol_length;
|
10582
10614
|
return delimiter;
|
10583
10615
|
}
|
10584
10616
|
|
@@ -10688,6 +10720,14 @@ parser_lex(pm_parser_t *parser) {
|
|
10688
10720
|
// We'll check if we're at the end of the file. If we are, then we
|
10689
10721
|
// need to return the EOF token.
|
10690
10722
|
if (parser->current.end >= parser->end) {
|
10723
|
+
// If we hit EOF, but the EOF came immediately after a newline,
|
10724
|
+
// set the start of the token to the newline. This way any EOF
|
10725
|
+
// errors will be reported as happening on that line rather than
|
10726
|
+
// a line after. For example "foo(\n" should report an error
|
10727
|
+
// on line 1 even though EOF technically occurs on line 2.
|
10728
|
+
if (parser->current.start > parser->start && (*(parser->current.start - 1) == '\n')) {
|
10729
|
+
parser->current.start -= 1;
|
10730
|
+
}
|
10691
10731
|
LEX(PM_TOKEN_EOF);
|
10692
10732
|
}
|
10693
10733
|
|
@@ -10730,7 +10770,7 @@ parser_lex(pm_parser_t *parser) {
|
|
10730
10770
|
|
10731
10771
|
lexed_comment = true;
|
10732
10772
|
}
|
10733
|
-
|
10773
|
+
PRISM_FALLTHROUGH
|
10734
10774
|
case '\r':
|
10735
10775
|
case '\n': {
|
10736
10776
|
parser->semantic_token_seen = semantic_token_seen & 0x1;
|
@@ -10772,7 +10812,7 @@ parser_lex(pm_parser_t *parser) {
|
|
10772
10812
|
parser->current.type = PM_TOKEN_NEWLINE;
|
10773
10813
|
return;
|
10774
10814
|
}
|
10775
|
-
|
10815
|
+
PRISM_FALLTHROUGH
|
10776
10816
|
case PM_IGNORED_NEWLINE_ALL:
|
10777
10817
|
if (!lexed_comment) parser_lex_ignored_newline(parser);
|
10778
10818
|
lexed_comment = false;
|
@@ -10869,6 +10909,10 @@ parser_lex(pm_parser_t *parser) {
|
|
10869
10909
|
|
10870
10910
|
// ,
|
10871
10911
|
case ',':
|
10912
|
+
if ((parser->previous.type == PM_TOKEN_COMMA) && (parser->enclosure_nesting > 0)) {
|
10913
|
+
PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_ARRAY_TERM, pm_token_type_human(parser->current.type));
|
10914
|
+
}
|
10915
|
+
|
10872
10916
|
lex_state_set(parser, PM_LEX_STATE_BEG | PM_LEX_STATE_LABEL);
|
10873
10917
|
LEX(PM_TOKEN_COMMA);
|
10874
10918
|
|
@@ -11121,13 +11165,13 @@ parser_lex(pm_parser_t *parser) {
|
|
11121
11165
|
|
11122
11166
|
if (parser->current.end >= parser->end) {
|
11123
11167
|
parser->current.end = end;
|
11124
|
-
} else if (quote == PM_HEREDOC_QUOTE_NONE && (width = char_is_identifier(parser, parser->current.end)) == 0) {
|
11168
|
+
} else if (quote == PM_HEREDOC_QUOTE_NONE && (width = char_is_identifier(parser, parser->current.end, parser->end - parser->current.end)) == 0) {
|
11125
11169
|
parser->current.end = end;
|
11126
11170
|
} else {
|
11127
11171
|
if (quote == PM_HEREDOC_QUOTE_NONE) {
|
11128
11172
|
parser->current.end += width;
|
11129
11173
|
|
11130
|
-
while ((parser->current.end
|
11174
|
+
while ((width = char_is_identifier(parser, parser->current.end, parser->end - parser->current.end))) {
|
11131
11175
|
parser->current.end += width;
|
11132
11176
|
}
|
11133
11177
|
} else {
|
@@ -11312,7 +11356,7 @@ parser_lex(pm_parser_t *parser) {
|
|
11312
11356
|
} else {
|
11313
11357
|
const uint8_t delim = peek_offset(parser, 1);
|
11314
11358
|
|
11315
|
-
if ((delim != '\'') && (delim != '"') && !char_is_identifier(parser, parser->current.end + 1)) {
|
11359
|
+
if ((delim != '\'') && (delim != '"') && !char_is_identifier(parser, parser->current.end + 1, parser->end - (parser->current.end + 1))) {
|
11316
11360
|
pm_parser_warn_token(parser, &parser->current, PM_WARN_AMBIGUOUS_PREFIX_AMPERSAND);
|
11317
11361
|
}
|
11318
11362
|
}
|
@@ -11750,7 +11794,7 @@ parser_lex(pm_parser_t *parser) {
|
|
11750
11794
|
|
11751
11795
|
default: {
|
11752
11796
|
if (*parser->current.start != '_') {
|
11753
|
-
size_t width = char_is_identifier_start(parser, parser->current.start);
|
11797
|
+
size_t width = char_is_identifier_start(parser, parser->current.start, parser->end - parser->current.start);
|
11754
11798
|
|
11755
11799
|
// If this isn't the beginning of an identifier, then
|
11756
11800
|
// it's an invalid token as we've exhausted all of the
|
@@ -11783,7 +11827,7 @@ parser_lex(pm_parser_t *parser) {
|
|
11783
11827
|
PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_UNEXPECTED_TOKEN_IGNORE, "escaped carriage return");
|
11784
11828
|
break;
|
11785
11829
|
}
|
11786
|
-
|
11830
|
+
PRISM_FALLTHROUGH
|
11787
11831
|
default:
|
11788
11832
|
PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_UNEXPECTED_TOKEN_IGNORE, "backslash");
|
11789
11833
|
break;
|
@@ -11980,7 +12024,7 @@ parser_lex(pm_parser_t *parser) {
|
|
11980
12024
|
pm_token_buffer_push_byte(&token_buffer, '\r');
|
11981
12025
|
break;
|
11982
12026
|
}
|
11983
|
-
|
12027
|
+
PRISM_FALLTHROUGH
|
11984
12028
|
case '\n':
|
11985
12029
|
pm_token_buffer_push_byte(&token_buffer, '\n');
|
11986
12030
|
|
@@ -12084,9 +12128,28 @@ parser_lex(pm_parser_t *parser) {
|
|
12084
12128
|
pm_regexp_token_buffer_t token_buffer = { 0 };
|
12085
12129
|
|
12086
12130
|
while (breakpoint != NULL) {
|
12131
|
+
uint8_t term = lex_mode->as.regexp.terminator;
|
12132
|
+
bool is_terminator = (*breakpoint == term);
|
12133
|
+
|
12134
|
+
// If the terminator is newline, we need to consider \r\n _also_ a newline
|
12135
|
+
// For example: `%\nfoo\r\n`
|
12136
|
+
// The string should be "foo", not "foo\r"
|
12137
|
+
if (*breakpoint == '\r' && peek_at(parser, breakpoint + 1) == '\n') {
|
12138
|
+
if (term == '\n') {
|
12139
|
+
is_terminator = true;
|
12140
|
+
}
|
12141
|
+
|
12142
|
+
// If the terminator is a CR, but we see a CRLF, we need to
|
12143
|
+
// treat the CRLF as a newline, meaning this is _not_ the
|
12144
|
+
// terminator
|
12145
|
+
if (term == '\r') {
|
12146
|
+
is_terminator = false;
|
12147
|
+
}
|
12148
|
+
}
|
12149
|
+
|
12087
12150
|
// If we hit the terminator, we need to determine what kind of
|
12088
12151
|
// token to return.
|
12089
|
-
if (
|
12152
|
+
if (is_terminator) {
|
12090
12153
|
if (lex_mode->as.regexp.nesting > 0) {
|
12091
12154
|
parser->current.end = breakpoint + 1;
|
12092
12155
|
breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, false);
|
@@ -12148,7 +12211,7 @@ parser_lex(pm_parser_t *parser) {
|
|
12148
12211
|
pm_regexp_token_buffer_escape(parser, &token_buffer);
|
12149
12212
|
token_buffer.base.cursor = breakpoint;
|
12150
12213
|
|
12151
|
-
|
12214
|
+
PRISM_FALLTHROUGH
|
12152
12215
|
case '\n':
|
12153
12216
|
// If we've hit a newline, then we need to track that in
|
12154
12217
|
// the list of newlines.
|
@@ -12190,7 +12253,7 @@ parser_lex(pm_parser_t *parser) {
|
|
12190
12253
|
pm_token_buffer_push_byte(&token_buffer.base, '\r');
|
12191
12254
|
break;
|
12192
12255
|
}
|
12193
|
-
|
12256
|
+
PRISM_FALLTHROUGH
|
12194
12257
|
case '\n':
|
12195
12258
|
if (parser->heredoc_end) {
|
12196
12259
|
// ... if we are on the same line as a heredoc,
|
@@ -12316,10 +12379,29 @@ parser_lex(pm_parser_t *parser) {
|
|
12316
12379
|
continue;
|
12317
12380
|
}
|
12318
12381
|
|
12382
|
+
uint8_t term = lex_mode->as.string.terminator;
|
12383
|
+
bool is_terminator = (*breakpoint == term);
|
12384
|
+
|
12385
|
+
// If the terminator is newline, we need to consider \r\n _also_ a newline
|
12386
|
+
// For example: `%r\nfoo\r\n`
|
12387
|
+
// The string should be /foo/, not /foo\r/
|
12388
|
+
if (*breakpoint == '\r' && peek_at(parser, breakpoint + 1) == '\n') {
|
12389
|
+
if (term == '\n') {
|
12390
|
+
is_terminator = true;
|
12391
|
+
}
|
12392
|
+
|
12393
|
+
// If the terminator is a CR, but we see a CRLF, we need to
|
12394
|
+
// treat the CRLF as a newline, meaning this is _not_ the
|
12395
|
+
// terminator
|
12396
|
+
if (term == '\r') {
|
12397
|
+
is_terminator = false;
|
12398
|
+
}
|
12399
|
+
}
|
12400
|
+
|
12319
12401
|
// Note that we have to check the terminator here first because we could
|
12320
12402
|
// potentially be parsing a % string that has a # character as the
|
12321
12403
|
// terminator.
|
12322
|
-
if (
|
12404
|
+
if (is_terminator) {
|
12323
12405
|
// If this terminator doesn't actually close the string, then we need
|
12324
12406
|
// to continue on past it.
|
12325
12407
|
if (lex_mode->as.string.nesting > 0) {
|
@@ -12379,7 +12461,7 @@ parser_lex(pm_parser_t *parser) {
|
|
12379
12461
|
pm_token_buffer_escape(parser, &token_buffer);
|
12380
12462
|
token_buffer.cursor = breakpoint;
|
12381
12463
|
|
12382
|
-
|
12464
|
+
PRISM_FALLTHROUGH
|
12383
12465
|
case '\n':
|
12384
12466
|
// When we hit a newline, we need to flush any potential
|
12385
12467
|
// heredocs. Note that this has to happen after we check
|
@@ -12424,7 +12506,7 @@ parser_lex(pm_parser_t *parser) {
|
|
12424
12506
|
pm_token_buffer_push_byte(&token_buffer, '\r');
|
12425
12507
|
break;
|
12426
12508
|
}
|
12427
|
-
|
12509
|
+
PRISM_FALLTHROUGH
|
12428
12510
|
case '\n':
|
12429
12511
|
if (!lex_mode->as.string.interpolation) {
|
12430
12512
|
pm_token_buffer_push_byte(&token_buffer, '\\');
|
@@ -12632,7 +12714,7 @@ parser_lex(pm_parser_t *parser) {
|
|
12632
12714
|
pm_token_buffer_escape(parser, &token_buffer);
|
12633
12715
|
token_buffer.cursor = breakpoint;
|
12634
12716
|
|
12635
|
-
|
12717
|
+
PRISM_FALLTHROUGH
|
12636
12718
|
case '\n': {
|
12637
12719
|
if (parser->heredoc_end != NULL && (parser->heredoc_end > breakpoint)) {
|
12638
12720
|
parser_flush_heredoc_end(parser);
|
@@ -12732,7 +12814,7 @@ parser_lex(pm_parser_t *parser) {
|
|
12732
12814
|
pm_token_buffer_push_byte(&token_buffer, '\r');
|
12733
12815
|
break;
|
12734
12816
|
}
|
12735
|
-
|
12817
|
+
PRISM_FALLTHROUGH
|
12736
12818
|
case '\n':
|
12737
12819
|
pm_token_buffer_push_byte(&token_buffer, '\\');
|
12738
12820
|
pm_token_buffer_push_byte(&token_buffer, '\n');
|
@@ -12752,7 +12834,7 @@ parser_lex(pm_parser_t *parser) {
|
|
12752
12834
|
pm_token_buffer_push_byte(&token_buffer, '\r');
|
12753
12835
|
break;
|
12754
12836
|
}
|
12755
|
-
|
12837
|
+
PRISM_FALLTHROUGH
|
12756
12838
|
case '\n':
|
12757
12839
|
// If we are in a tilde here, we should
|
12758
12840
|
// break out of the loop and return the
|
@@ -12903,7 +12985,7 @@ typedef struct {
|
|
12903
12985
|
|
12904
12986
|
pm_binding_powers_t pm_binding_powers[PM_TOKEN_MAXIMUM] = {
|
12905
12987
|
// rescue
|
12906
|
-
[PM_TOKEN_KEYWORD_RESCUE_MODIFIER] =
|
12988
|
+
[PM_TOKEN_KEYWORD_RESCUE_MODIFIER] = { PM_BINDING_POWER_MODIFIER_RESCUE, PM_BINDING_POWER_COMPOSITION, true, false },
|
12907
12989
|
|
12908
12990
|
// if unless until while
|
12909
12991
|
[PM_TOKEN_KEYWORD_IF_MODIFIER] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_MODIFIER),
|
@@ -13044,14 +13126,6 @@ match4(const pm_parser_t *parser, pm_token_type_t type1, pm_token_type_t type2,
|
|
13044
13126
|
return match1(parser, type1) || match1(parser, type2) || match1(parser, type3) || match1(parser, type4);
|
13045
13127
|
}
|
13046
13128
|
|
13047
|
-
/**
|
13048
|
-
* Returns true if the current token is any of the six given types.
|
13049
|
-
*/
|
13050
|
-
static inline bool
|
13051
|
-
match6(const pm_parser_t *parser, pm_token_type_t type1, pm_token_type_t type2, pm_token_type_t type3, pm_token_type_t type4, pm_token_type_t type5, pm_token_type_t type6) {
|
13052
|
-
return match1(parser, type1) || match1(parser, type2) || match1(parser, type3) || match1(parser, type4) || match1(parser, type5) || match1(parser, type6);
|
13053
|
-
}
|
13054
|
-
|
13055
13129
|
/**
|
13056
13130
|
* Returns true if the current token is any of the seven given types.
|
13057
13131
|
*/
|
@@ -13068,6 +13142,14 @@ match8(const pm_parser_t *parser, pm_token_type_t type1, pm_token_type_t type2,
|
|
13068
13142
|
return match1(parser, type1) || match1(parser, type2) || match1(parser, type3) || match1(parser, type4) || match1(parser, type5) || match1(parser, type6) || match1(parser, type7) || match1(parser, type8);
|
13069
13143
|
}
|
13070
13144
|
|
13145
|
+
/**
|
13146
|
+
* Returns true if the current token is any of the nine given types.
|
13147
|
+
*/
|
13148
|
+
static inline bool
|
13149
|
+
match9(const pm_parser_t *parser, pm_token_type_t type1, pm_token_type_t type2, pm_token_type_t type3, pm_token_type_t type4, pm_token_type_t type5, pm_token_type_t type6, pm_token_type_t type7, pm_token_type_t type8, pm_token_type_t type9) {
|
13150
|
+
return match1(parser, type1) || match1(parser, type2) || match1(parser, type3) || match1(parser, type4) || match1(parser, type5) || match1(parser, type6) || match1(parser, type7) || match1(parser, type8) || match1(parser, type9);
|
13151
|
+
}
|
13152
|
+
|
13071
13153
|
/**
|
13072
13154
|
* If the current token is of the specified type, lex forward by one token and
|
13073
13155
|
* return true. Otherwise, return false. For example:
|
@@ -13096,19 +13178,6 @@ accept2(pm_parser_t *parser, pm_token_type_t type1, pm_token_type_t type2) {
|
|
13096
13178
|
return false;
|
13097
13179
|
}
|
13098
13180
|
|
13099
|
-
/**
|
13100
|
-
* If the current token is any of the three given types, lex forward by one
|
13101
|
-
* token and return true. Otherwise return false.
|
13102
|
-
*/
|
13103
|
-
static inline bool
|
13104
|
-
accept3(pm_parser_t *parser, pm_token_type_t type1, pm_token_type_t type2, pm_token_type_t type3) {
|
13105
|
-
if (match3(parser, type1, type2, type3)) {
|
13106
|
-
parser_lex(parser);
|
13107
|
-
return true;
|
13108
|
-
}
|
13109
|
-
return false;
|
13110
|
-
}
|
13111
|
-
|
13112
13181
|
/**
|
13113
13182
|
* This function indicates that the parser expects a token in a specific
|
13114
13183
|
* position. For example, if you're parsing a BEGIN block, you know that a { is
|
@@ -13146,20 +13215,6 @@ expect2(pm_parser_t *parser, pm_token_type_t type1, pm_token_type_t type2, pm_di
|
|
13146
13215
|
parser->previous.type = PM_TOKEN_MISSING;
|
13147
13216
|
}
|
13148
13217
|
|
13149
|
-
/**
|
13150
|
-
* This function is the same as expect2, but it expects one of three token types.
|
13151
|
-
*/
|
13152
|
-
static void
|
13153
|
-
expect3(pm_parser_t *parser, pm_token_type_t type1, pm_token_type_t type2, pm_token_type_t type3, pm_diagnostic_id_t diag_id) {
|
13154
|
-
if (accept3(parser, type1, type2, type3)) return;
|
13155
|
-
|
13156
|
-
const uint8_t *location = parser->previous.end;
|
13157
|
-
pm_parser_err(parser, location, location, diag_id);
|
13158
|
-
|
13159
|
-
parser->previous.start = location;
|
13160
|
-
parser->previous.type = PM_TOKEN_MISSING;
|
13161
|
-
}
|
13162
|
-
|
13163
13218
|
/**
|
13164
13219
|
* A special expect1 that expects a heredoc terminator and handles popping the
|
13165
13220
|
* lex mode accordingly.
|
@@ -13501,7 +13556,7 @@ parse_target(pm_parser_t *parser, pm_node_t *target, bool multiple, bool splat_p
|
|
13501
13556
|
return (pm_node_t *) pm_index_target_node_create(parser, call);
|
13502
13557
|
}
|
13503
13558
|
}
|
13504
|
-
|
13559
|
+
PRISM_FALLTHROUGH
|
13505
13560
|
default:
|
13506
13561
|
// In this case we have a node that we don't know how to convert
|
13507
13562
|
// into a target. We need to treat it as an error. For now, we'll
|
@@ -13583,7 +13638,7 @@ parse_write(pm_parser_t *parser, pm_node_t *target, pm_token_t *operator, pm_nod
|
|
13583
13638
|
case PM_BACK_REFERENCE_READ_NODE:
|
13584
13639
|
case PM_NUMBERED_REFERENCE_READ_NODE:
|
13585
13640
|
PM_PARSER_ERR_NODE_FORMAT_CONTENT(parser, target, PM_ERR_WRITE_TARGET_READONLY);
|
13586
|
-
|
13641
|
+
PRISM_FALLTHROUGH
|
13587
13642
|
case PM_GLOBAL_VARIABLE_READ_NODE: {
|
13588
13643
|
pm_global_variable_write_node_t *node = pm_global_variable_write_node_create(parser, target, operator, value);
|
13589
13644
|
pm_node_destroy(parser, target);
|
@@ -13673,7 +13728,7 @@ parse_write(pm_parser_t *parser, pm_node_t *target, pm_token_t *operator, pm_nod
|
|
13673
13728
|
return target;
|
13674
13729
|
}
|
13675
13730
|
|
13676
|
-
if (char_is_identifier_start(parser, call->message_loc.start)) {
|
13731
|
+
if (char_is_identifier_start(parser, call->message_loc.start, parser->end - call->message_loc.start)) {
|
13677
13732
|
// When we get here, we have a method call, because it was
|
13678
13733
|
// previously marked as a method call but now we have an =. This
|
13679
13734
|
// looks like:
|
@@ -13710,6 +13765,9 @@ parse_write(pm_parser_t *parser, pm_node_t *target, pm_token_t *operator, pm_nod
|
|
13710
13765
|
|
13711
13766
|
// Replace the name with "[]=".
|
13712
13767
|
call->name = pm_parser_constant_id_constant(parser, "[]=", 3);
|
13768
|
+
|
13769
|
+
// Ensure that the arguments for []= don't contain keywords
|
13770
|
+
pm_index_arguments_check(parser, call->arguments, call->block);
|
13713
13771
|
pm_node_flag_set((pm_node_t *) call, PM_CALL_NODE_FLAGS_ATTRIBUTE_WRITE | pm_implicit_array_write_flags(value, PM_CALL_NODE_FLAGS_IMPLICIT_ARRAY));
|
13714
13772
|
|
13715
13773
|
return target;
|
@@ -13722,7 +13780,7 @@ parse_write(pm_parser_t *parser, pm_node_t *target, pm_token_t *operator, pm_nod
|
|
13722
13780
|
// is no way for us to attach it to the tree at this point.
|
13723
13781
|
pm_node_destroy(parser, value);
|
13724
13782
|
}
|
13725
|
-
|
13783
|
+
PRISM_FALLTHROUGH
|
13726
13784
|
default:
|
13727
13785
|
// In this case we have a node that we don't know how to convert into a
|
13728
13786
|
// target. We need to treat it as an error. For now, we'll mark it as an
|
@@ -13898,6 +13956,15 @@ parse_statements(pm_parser_t *parser, pm_context_t context, uint16_t depth) {
|
|
13898
13956
|
if (PM_NODE_TYPE_P(node, PM_MISSING_NODE)) {
|
13899
13957
|
parser_lex(parser);
|
13900
13958
|
|
13959
|
+
// If we are at the end of the file, then we need to stop parsing
|
13960
|
+
// the statements entirely at this point. Mark the parser as
|
13961
|
+
// recovering, as we know that EOF closes the top-level context, and
|
13962
|
+
// then break out of the loop.
|
13963
|
+
if (match1(parser, PM_TOKEN_EOF)) {
|
13964
|
+
parser->recovering = true;
|
13965
|
+
break;
|
13966
|
+
}
|
13967
|
+
|
13901
13968
|
while (accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON));
|
13902
13969
|
if (context_terminator(context, &parser->current)) break;
|
13903
13970
|
} else if (!accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_EOF)) {
|
@@ -14191,6 +14258,9 @@ parse_arguments(pm_parser_t *parser, pm_arguments_t *arguments, bool accepts_for
|
|
14191
14258
|
if (match4(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_TOKEN_COMMA, PM_TOKEN_SEMICOLON, PM_TOKEN_BRACKET_RIGHT)) {
|
14192
14259
|
pm_parser_scope_forwarding_positionals_check(parser, &operator);
|
14193
14260
|
argument = (pm_node_t *) pm_splat_node_create(parser, &operator, NULL);
|
14261
|
+
if (parsed_bare_hash) {
|
14262
|
+
pm_parser_err_previous(parser, PM_ERR_ARGUMENT_SPLAT_AFTER_ASSOC_SPLAT);
|
14263
|
+
}
|
14194
14264
|
} else {
|
14195
14265
|
pm_node_t *expression = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, false, false, PM_ERR_EXPECT_EXPRESSION_AFTER_SPLAT, (uint16_t) (depth + 1));
|
14196
14266
|
|
@@ -14239,7 +14309,7 @@ parse_arguments(pm_parser_t *parser, pm_arguments_t *arguments, bool accepts_for
|
|
14239
14309
|
}
|
14240
14310
|
}
|
14241
14311
|
}
|
14242
|
-
|
14312
|
+
PRISM_FALLTHROUGH
|
14243
14313
|
default: {
|
14244
14314
|
if (argument == NULL) {
|
14245
14315
|
argument = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, !parsed_first_argument, true, PM_ERR_EXPECT_ARGUMENT, (uint16_t) (depth + 1));
|
@@ -14482,6 +14552,7 @@ parse_parameters(
|
|
14482
14552
|
bool allows_trailing_comma,
|
14483
14553
|
bool allows_forwarding_parameters,
|
14484
14554
|
bool accepts_blocks_in_defaults,
|
14555
|
+
bool in_block,
|
14485
14556
|
uint16_t depth
|
14486
14557
|
) {
|
14487
14558
|
pm_do_loop_stack_push(parser, false);
|
@@ -14646,7 +14717,7 @@ parse_parameters(
|
|
14646
14717
|
break;
|
14647
14718
|
}
|
14648
14719
|
case PM_TOKEN_LABEL: {
|
14649
|
-
if (!uses_parentheses) parser->in_keyword_arg = true;
|
14720
|
+
if (!uses_parentheses && !in_block) parser->in_keyword_arg = true;
|
14650
14721
|
update_parameter_state(parser, &parser->current, &order);
|
14651
14722
|
|
14652
14723
|
context_push(parser, PM_CONTEXT_DEFAULT_PARAMS);
|
@@ -15009,8 +15080,8 @@ parse_rescues(pm_parser_t *parser, size_t opening_newline_index, const pm_token_
|
|
15009
15080
|
case PM_TOKEN_NEWLINE:
|
15010
15081
|
case PM_TOKEN_SEMICOLON:
|
15011
15082
|
case PM_TOKEN_KEYWORD_THEN:
|
15012
|
-
// Here we have a terminator for the rescue keyword, in which
|
15013
|
-
// going to just continue on.
|
15083
|
+
// Here we have a terminator for the rescue keyword, in which
|
15084
|
+
// case we're going to just continue on.
|
15014
15085
|
break;
|
15015
15086
|
default: {
|
15016
15087
|
if (token_begins_expression_p(parser->current.type) || match1(parser, PM_TOKEN_USTAR)) {
|
@@ -15042,9 +15113,12 @@ parse_rescues(pm_parser_t *parser, size_t opening_newline_index, const pm_token_
|
|
15042
15113
|
}
|
15043
15114
|
|
15044
15115
|
if (accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON)) {
|
15045
|
-
accept1(parser, PM_TOKEN_KEYWORD_THEN)
|
15116
|
+
if (accept1(parser, PM_TOKEN_KEYWORD_THEN)) {
|
15117
|
+
rescue->then_keyword_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(&parser->previous);
|
15118
|
+
}
|
15046
15119
|
} else {
|
15047
15120
|
expect1(parser, PM_TOKEN_KEYWORD_THEN, PM_ERR_RESCUE_TERM);
|
15121
|
+
rescue->then_keyword_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(&parser->previous);
|
15048
15122
|
}
|
15049
15123
|
|
15050
15124
|
if (!match3(parser, PM_TOKEN_KEYWORD_ELSE, PM_TOKEN_KEYWORD_ENSURE, PM_TOKEN_KEYWORD_END)) {
|
@@ -15115,7 +15189,7 @@ parse_rescues(pm_parser_t *parser, size_t opening_newline_index, const pm_token_
|
|
15115
15189
|
case PM_RESCUES_LAMBDA: context = PM_CONTEXT_LAMBDA_ELSE; break;
|
15116
15190
|
case PM_RESCUES_MODULE: context = PM_CONTEXT_MODULE_ELSE; break;
|
15117
15191
|
case PM_RESCUES_SCLASS: context = PM_CONTEXT_SCLASS_ELSE; break;
|
15118
|
-
default: assert(false && "unreachable"); context =
|
15192
|
+
default: assert(false && "unreachable"); context = PM_CONTEXT_BEGIN_ELSE; break;
|
15119
15193
|
}
|
15120
15194
|
|
15121
15195
|
else_statements = parse_statements(parser, context, (uint16_t) (depth + 1));
|
@@ -15210,6 +15284,7 @@ parse_block_parameters(
|
|
15210
15284
|
allows_trailing_comma,
|
15211
15285
|
false,
|
15212
15286
|
accepts_blocks_in_defaults,
|
15287
|
+
true,
|
15213
15288
|
(uint16_t) (depth + 1)
|
15214
15289
|
);
|
15215
15290
|
}
|
@@ -16125,7 +16200,7 @@ parse_operator_symbol_name(const pm_token_t *name) {
|
|
16125
16200
|
case PM_TOKEN_TILDE:
|
16126
16201
|
case PM_TOKEN_BANG:
|
16127
16202
|
if (name->end[-1] == '@') return name->end - 1;
|
16128
|
-
|
16203
|
+
PRISM_FALLTHROUGH
|
16129
16204
|
default:
|
16130
16205
|
return name->end;
|
16131
16206
|
}
|
@@ -16381,14 +16456,15 @@ static pm_node_t *
|
|
16381
16456
|
parse_variable(pm_parser_t *parser) {
|
16382
16457
|
pm_constant_id_t name_id = pm_parser_constant_id_token(parser, &parser->previous);
|
16383
16458
|
int depth;
|
16459
|
+
bool is_numbered_param = pm_token_is_numbered_parameter(parser->previous.start, parser->previous.end);
|
16384
16460
|
|
16385
|
-
if ((depth = pm_parser_local_depth_constant_id(parser, name_id)) != -1) {
|
16461
|
+
if (!is_numbered_param && ((depth = pm_parser_local_depth_constant_id(parser, name_id)) != -1)) {
|
16386
16462
|
return (pm_node_t *) pm_local_variable_read_node_create_constant_id(parser, &parser->previous, name_id, (uint32_t) depth, false);
|
16387
16463
|
}
|
16388
16464
|
|
16389
16465
|
pm_scope_t *current_scope = parser->current_scope;
|
16390
16466
|
if (!current_scope->closed && !(current_scope->parameters & PM_SCOPE_PARAMETERS_IMPLICIT_DISALLOWED)) {
|
16391
|
-
if (
|
16467
|
+
if (is_numbered_param) {
|
16392
16468
|
// When you use a numbered parameter, it implies the existence of
|
16393
16469
|
// all of the locals that exist before it. For example, referencing
|
16394
16470
|
// _2 means that _1 must exist. Therefore here we loop through all
|
@@ -16758,6 +16834,10 @@ parse_strings(pm_parser_t *parser, pm_node_t *current, bool accepts_label, uint1
|
|
16758
16834
|
// If we haven't already created our container for concatenation,
|
16759
16835
|
// we'll do that now.
|
16760
16836
|
if (!concating) {
|
16837
|
+
if (!PM_NODE_TYPE_P(current, PM_STRING_NODE) && !PM_NODE_TYPE_P(current, PM_INTERPOLATED_STRING_NODE)) {
|
16838
|
+
pm_parser_err_node(parser, current, PM_ERR_STRING_CONCATENATION);
|
16839
|
+
}
|
16840
|
+
|
16761
16841
|
concating = true;
|
16762
16842
|
pm_token_t bounds = not_provided(parser);
|
16763
16843
|
|
@@ -16996,7 +17076,7 @@ pm_slice_is_valid_local(const pm_parser_t *parser, const uint8_t *start, const u
|
|
16996
17076
|
if (length == 0) return false;
|
16997
17077
|
|
16998
17078
|
// First ensure that it starts with a valid identifier starting character.
|
16999
|
-
size_t width = char_is_identifier_start(parser, start);
|
17079
|
+
size_t width = char_is_identifier_start(parser, start, end - start);
|
17000
17080
|
if (width == 0) return false;
|
17001
17081
|
|
17002
17082
|
// Next, ensure that it's not an uppercase character.
|
@@ -17009,7 +17089,7 @@ pm_slice_is_valid_local(const pm_parser_t *parser, const uint8_t *start, const u
|
|
17009
17089
|
// Next, iterate through all of the bytes of the string to ensure that they
|
17010
17090
|
// are all valid identifier characters.
|
17011
17091
|
const uint8_t *cursor = start + width;
|
17012
|
-
while ((
|
17092
|
+
while ((width = char_is_identifier(parser, cursor, end - cursor))) cursor += width;
|
17013
17093
|
return cursor == end;
|
17014
17094
|
}
|
17015
17095
|
|
@@ -17096,7 +17176,7 @@ parse_pattern_hash(pm_parser_t *parser, pm_constant_id_list_t *captures, pm_node
|
|
17096
17176
|
break;
|
17097
17177
|
}
|
17098
17178
|
}
|
17099
|
-
|
17179
|
+
PRISM_FALLTHROUGH
|
17100
17180
|
default: {
|
17101
17181
|
// If we get anything else, then this is an error. For this we'll
|
17102
17182
|
// create a missing node for the value and create an assoc node for
|
@@ -17482,7 +17562,7 @@ parse_pattern_primitives(pm_parser_t *parser, pm_constant_id_list_t *captures, p
|
|
17482
17562
|
pm_node_t *body = parse_pattern(parser, captures, PM_PARSE_PATTERN_SINGLE, PM_ERR_PATTERN_EXPRESSION_AFTER_PAREN, (uint16_t) (depth + 1));
|
17483
17563
|
accept1(parser, PM_TOKEN_NEWLINE);
|
17484
17564
|
expect1(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_ERR_PATTERN_TERM_PAREN);
|
17485
|
-
pm_node_t *right = (pm_node_t *) pm_parentheses_node_create(parser, &opening, body, &parser->previous);
|
17565
|
+
pm_node_t *right = (pm_node_t *) pm_parentheses_node_create(parser, &opening, body, &parser->previous, 0);
|
17486
17566
|
|
17487
17567
|
if (node == NULL) {
|
17488
17568
|
node = right;
|
@@ -17592,7 +17672,7 @@ parse_pattern(pm_parser_t *parser, pm_constant_id_list_t *captures, uint8_t flag
|
|
17592
17672
|
break;
|
17593
17673
|
}
|
17594
17674
|
}
|
17595
|
-
|
17675
|
+
PRISM_FALLTHROUGH
|
17596
17676
|
default:
|
17597
17677
|
node = parse_pattern_primitives(parser, captures, NULL, diag_id, (uint16_t) (depth + 1));
|
17598
17678
|
break;
|
@@ -17614,7 +17694,7 @@ parse_pattern(pm_parser_t *parser, pm_constant_id_list_t *captures, uint8_t flag
|
|
17614
17694
|
// Gather up all of the patterns into the list.
|
17615
17695
|
while (accept1(parser, PM_TOKEN_COMMA)) {
|
17616
17696
|
// Break early here in case we have a trailing comma.
|
17617
|
-
if (
|
17697
|
+
if (match9(parser, PM_TOKEN_KEYWORD_THEN, PM_TOKEN_BRACE_RIGHT, PM_TOKEN_BRACKET_RIGHT, PM_TOKEN_PARENTHESIS_RIGHT, PM_TOKEN_SEMICOLON, PM_TOKEN_NEWLINE, PM_TOKEN_EOF,PM_TOKEN_KEYWORD_AND, PM_TOKEN_KEYWORD_OR)) {
|
17618
17698
|
node = (pm_node_t *) pm_implicit_rest_node_create(parser, &parser->previous);
|
17619
17699
|
pm_node_list_append(&nodes, node);
|
17620
17700
|
trailing_rest = true;
|
@@ -18105,12 +18185,19 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
|
|
18105
18185
|
case PM_TOKEN_PARENTHESIS_LEFT:
|
18106
18186
|
case PM_TOKEN_PARENTHESIS_LEFT_PARENTHESES: {
|
18107
18187
|
pm_token_t opening = parser->current;
|
18188
|
+
pm_node_flags_t flags = 0;
|
18108
18189
|
|
18109
18190
|
pm_node_list_t current_block_exits = { 0 };
|
18110
18191
|
pm_node_list_t *previous_block_exits = push_block_exits(parser, ¤t_block_exits);
|
18111
18192
|
|
18112
18193
|
parser_lex(parser);
|
18113
|
-
while (
|
18194
|
+
while (true) {
|
18195
|
+
if (accept1(parser, PM_TOKEN_SEMICOLON)) {
|
18196
|
+
flags |= PM_PARENTHESES_NODE_FLAGS_MULTIPLE_STATEMENTS;
|
18197
|
+
} else if (!accept1(parser, PM_TOKEN_NEWLINE)) {
|
18198
|
+
break;
|
18199
|
+
}
|
18200
|
+
}
|
18114
18201
|
|
18115
18202
|
// If this is the end of the file or we match a right parenthesis, then
|
18116
18203
|
// we have an empty parentheses node, and we can immediately return.
|
@@ -18120,7 +18207,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
|
|
18120
18207
|
pop_block_exits(parser, previous_block_exits);
|
18121
18208
|
pm_node_list_free(¤t_block_exits);
|
18122
18209
|
|
18123
|
-
return (pm_node_t *) pm_parentheses_node_create(parser, &opening, NULL, &parser->previous);
|
18210
|
+
return (pm_node_t *) pm_parentheses_node_create(parser, &opening, NULL, &parser->previous, flags);
|
18124
18211
|
}
|
18125
18212
|
|
18126
18213
|
// Otherwise, we're going to parse the first statement in the list
|
@@ -18133,9 +18220,23 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
|
|
18133
18220
|
// Determine if this statement is followed by a terminator. In the
|
18134
18221
|
// case of a single statement, this is fine. But in the case of
|
18135
18222
|
// multiple statements it's required.
|
18136
|
-
bool terminator_found =
|
18223
|
+
bool terminator_found = false;
|
18224
|
+
|
18225
|
+
if (accept1(parser, PM_TOKEN_SEMICOLON)) {
|
18226
|
+
terminator_found = true;
|
18227
|
+
flags |= PM_PARENTHESES_NODE_FLAGS_MULTIPLE_STATEMENTS;
|
18228
|
+
} else if (accept1(parser, PM_TOKEN_NEWLINE)) {
|
18229
|
+
terminator_found = true;
|
18230
|
+
}
|
18231
|
+
|
18137
18232
|
if (terminator_found) {
|
18138
|
-
while (
|
18233
|
+
while (true) {
|
18234
|
+
if (accept1(parser, PM_TOKEN_SEMICOLON)) {
|
18235
|
+
flags |= PM_PARENTHESES_NODE_FLAGS_MULTIPLE_STATEMENTS;
|
18236
|
+
} else if (!accept1(parser, PM_TOKEN_NEWLINE)) {
|
18237
|
+
break;
|
18238
|
+
}
|
18239
|
+
}
|
18139
18240
|
}
|
18140
18241
|
|
18141
18242
|
// If we hit a right parenthesis, then we're done parsing the
|
@@ -18207,13 +18308,15 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
|
|
18207
18308
|
pm_statements_node_t *statements = pm_statements_node_create(parser);
|
18208
18309
|
pm_statements_node_body_append(parser, statements, statement, true);
|
18209
18310
|
|
18210
|
-
return (pm_node_t *) pm_parentheses_node_create(parser, &opening, (pm_node_t *) statements, &parser->previous);
|
18311
|
+
return (pm_node_t *) pm_parentheses_node_create(parser, &opening, (pm_node_t *) statements, &parser->previous, flags);
|
18211
18312
|
}
|
18212
18313
|
|
18213
18314
|
// If we have more than one statement in the set of parentheses,
|
18214
18315
|
// then we are going to parse all of them as a list of statements.
|
18215
18316
|
// We'll do that here.
|
18216
18317
|
context_push(parser, PM_CONTEXT_PARENS);
|
18318
|
+
flags |= PM_PARENTHESES_NODE_FLAGS_MULTIPLE_STATEMENTS;
|
18319
|
+
|
18217
18320
|
pm_statements_node_t *statements = pm_statements_node_create(parser);
|
18218
18321
|
pm_statements_node_body_append(parser, statements, statement, true);
|
18219
18322
|
|
@@ -18290,7 +18393,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
|
|
18290
18393
|
pm_node_list_free(¤t_block_exits);
|
18291
18394
|
|
18292
18395
|
pm_void_statements_check(parser, statements, true);
|
18293
|
-
return (pm_node_t *) pm_parentheses_node_create(parser, &opening, (pm_node_t *) statements, &parser->previous);
|
18396
|
+
return (pm_node_t *) pm_parentheses_node_create(parser, &opening, (pm_node_t *) statements, &parser->previous, flags);
|
18294
18397
|
}
|
18295
18398
|
case PM_TOKEN_BRACE_LEFT: {
|
18296
18399
|
// If we were passed a current_hash_keys via the parser, then that
|
@@ -18722,7 +18825,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
|
|
18722
18825
|
pm_parser_err_node(parser, old_name, PM_ERR_ALIAS_ARGUMENT);
|
18723
18826
|
}
|
18724
18827
|
}
|
18725
|
-
|
18828
|
+
PRISM_FALLTHROUGH
|
18726
18829
|
default:
|
18727
18830
|
return (pm_node_t *) pm_alias_method_node_create(parser, &keyword, new_name, old_name);
|
18728
18831
|
}
|
@@ -19213,6 +19316,10 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
|
|
19213
19316
|
context_push(parser, PM_CONTEXT_DEF_PARAMS);
|
19214
19317
|
parser_lex(parser);
|
19215
19318
|
|
19319
|
+
// This will be false if the method name is not a valid identifier
|
19320
|
+
// but could be followed by an operator.
|
19321
|
+
bool valid_name = true;
|
19322
|
+
|
19216
19323
|
switch (parser->current.type) {
|
19217
19324
|
case PM_CASE_OPERATOR:
|
19218
19325
|
pm_parser_scope_push(parser, true);
|
@@ -19242,10 +19349,12 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
|
|
19242
19349
|
|
19243
19350
|
break;
|
19244
19351
|
}
|
19245
|
-
case PM_TOKEN_CONSTANT:
|
19246
19352
|
case PM_TOKEN_INSTANCE_VARIABLE:
|
19247
19353
|
case PM_TOKEN_CLASS_VARIABLE:
|
19248
19354
|
case PM_TOKEN_GLOBAL_VARIABLE:
|
19355
|
+
valid_name = false;
|
19356
|
+
PRISM_FALLTHROUGH
|
19357
|
+
case PM_TOKEN_CONSTANT:
|
19249
19358
|
case PM_TOKEN_KEYWORD_NIL:
|
19250
19359
|
case PM_TOKEN_KEYWORD_SELF:
|
19251
19360
|
case PM_TOKEN_KEYWORD_TRUE:
|
@@ -19303,6 +19412,10 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
|
|
19303
19412
|
|
19304
19413
|
name = parse_method_definition_name(parser);
|
19305
19414
|
} else {
|
19415
|
+
if (!valid_name) {
|
19416
|
+
PM_PARSER_ERR_TOKEN_FORMAT(parser, identifier, PM_ERR_DEF_NAME, pm_token_type_human(identifier.type));
|
19417
|
+
}
|
19418
|
+
|
19306
19419
|
name = identifier;
|
19307
19420
|
}
|
19308
19421
|
break;
|
@@ -19326,7 +19439,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
|
|
19326
19439
|
expect2(parser, PM_TOKEN_DOT, PM_TOKEN_COLON_COLON, PM_ERR_DEF_RECEIVER_TERM);
|
19327
19440
|
|
19328
19441
|
operator = parser->previous;
|
19329
|
-
receiver = (pm_node_t *) pm_parentheses_node_create(parser, &lparen, expression, &rparen);
|
19442
|
+
receiver = (pm_node_t *) pm_parentheses_node_create(parser, &lparen, expression, &rparen, 0);
|
19330
19443
|
|
19331
19444
|
// To push `PM_CONTEXT_DEF_PARAMS` again is for the same
|
19332
19445
|
// reason as described the above.
|
@@ -19353,7 +19466,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
|
|
19353
19466
|
if (match1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) {
|
19354
19467
|
params = NULL;
|
19355
19468
|
} else {
|
19356
|
-
params = parse_parameters(parser, PM_BINDING_POWER_DEFINED, true, false, true, true, (uint16_t) (depth + 1));
|
19469
|
+
params = parse_parameters(parser, PM_BINDING_POWER_DEFINED, true, false, true, true, false, (uint16_t) (depth + 1));
|
19357
19470
|
}
|
19358
19471
|
|
19359
19472
|
lex_state_set(parser, PM_LEX_STATE_BEG);
|
@@ -19378,7 +19491,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
|
|
19378
19491
|
|
19379
19492
|
lparen = not_provided(parser);
|
19380
19493
|
rparen = not_provided(parser);
|
19381
|
-
params = parse_parameters(parser, PM_BINDING_POWER_DEFINED, false, false, true, true, (uint16_t) (depth + 1));
|
19494
|
+
params = parse_parameters(parser, PM_BINDING_POWER_DEFINED, false, false, true, true, false, (uint16_t) (depth + 1));
|
19382
19495
|
|
19383
19496
|
context_pop(parser);
|
19384
19497
|
break;
|
@@ -19413,7 +19526,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
|
|
19413
19526
|
context_push(parser, PM_CONTEXT_RESCUE_MODIFIER);
|
19414
19527
|
|
19415
19528
|
pm_token_t rescue_keyword = parser->previous;
|
19416
|
-
pm_node_t *value = parse_expression(parser,
|
19529
|
+
pm_node_t *value = parse_expression(parser, pm_binding_powers[PM_TOKEN_KEYWORD_RESCUE_MODIFIER].right, false, false, PM_ERR_RESCUE_MODIFIER_VALUE, (uint16_t) (depth + 1));
|
19417
19530
|
context_pop(parser);
|
19418
19531
|
|
19419
19532
|
statement = (pm_node_t *) pm_rescue_modifier_node_create(parser, statement, &rescue_keyword, value);
|
@@ -19656,11 +19769,12 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
|
|
19656
19769
|
accept1(parser, PM_TOKEN_NEWLINE);
|
19657
19770
|
|
19658
19771
|
if (accept1(parser, PM_TOKEN_PARENTHESIS_LEFT)) {
|
19659
|
-
|
19772
|
+
pm_token_t lparen = parser->previous;
|
19660
19773
|
|
19661
19774
|
if (accept1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) {
|
19662
|
-
|
19775
|
+
receiver = (pm_node_t *) pm_parentheses_node_create(parser, &lparen, NULL, &parser->previous, 0);
|
19663
19776
|
} else {
|
19777
|
+
arguments.opening_loc = PM_LOCATION_TOKEN_VALUE(&lparen);
|
19664
19778
|
receiver = parse_expression(parser, PM_BINDING_POWER_COMPOSITION, true, false, PM_ERR_NOT_EXPRESSION, (uint16_t) (depth + 1));
|
19665
19779
|
|
19666
19780
|
if (!parser->recovering) {
|
@@ -19787,9 +19901,15 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
|
|
19787
19901
|
pm_do_loop_stack_pop(parser);
|
19788
19902
|
context_pop(parser);
|
19789
19903
|
|
19790
|
-
|
19791
|
-
|
19904
|
+
pm_token_t do_keyword;
|
19905
|
+
if (accept1(parser, PM_TOKEN_KEYWORD_DO_LOOP)) {
|
19906
|
+
do_keyword = parser->previous;
|
19907
|
+
} else {
|
19908
|
+
do_keyword = not_provided(parser);
|
19909
|
+
expect2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON, PM_ERR_CONDITIONAL_UNTIL_PREDICATE);
|
19910
|
+
}
|
19792
19911
|
|
19912
|
+
pm_statements_node_t *statements = NULL;
|
19793
19913
|
if (!match1(parser, PM_TOKEN_KEYWORD_END)) {
|
19794
19914
|
pm_accepts_block_stack_push(parser, true);
|
19795
19915
|
statements = parse_statements(parser, PM_CONTEXT_UNTIL, (uint16_t) (depth + 1));
|
@@ -19800,7 +19920,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
|
|
19800
19920
|
parser_warn_indentation_mismatch(parser, opening_newline_index, &keyword, false, false);
|
19801
19921
|
expect1(parser, PM_TOKEN_KEYWORD_END, PM_ERR_UNTIL_TERM);
|
19802
19922
|
|
19803
|
-
return (pm_node_t *) pm_until_node_create(parser, &keyword, &parser->previous, predicate, statements, 0);
|
19923
|
+
return (pm_node_t *) pm_until_node_create(parser, &keyword, &do_keyword, &parser->previous, predicate, statements, 0);
|
19804
19924
|
}
|
19805
19925
|
case PM_TOKEN_KEYWORD_WHILE: {
|
19806
19926
|
size_t opening_newline_index = token_newline_index(parser);
|
@@ -19815,9 +19935,15 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
|
|
19815
19935
|
pm_do_loop_stack_pop(parser);
|
19816
19936
|
context_pop(parser);
|
19817
19937
|
|
19818
|
-
|
19819
|
-
|
19938
|
+
pm_token_t do_keyword;
|
19939
|
+
if (accept1(parser, PM_TOKEN_KEYWORD_DO_LOOP)) {
|
19940
|
+
do_keyword = parser->previous;
|
19941
|
+
} else {
|
19942
|
+
do_keyword = not_provided(parser);
|
19943
|
+
expect2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON, PM_ERR_CONDITIONAL_WHILE_PREDICATE);
|
19944
|
+
}
|
19820
19945
|
|
19946
|
+
pm_statements_node_t *statements = NULL;
|
19821
19947
|
if (!match1(parser, PM_TOKEN_KEYWORD_END)) {
|
19822
19948
|
pm_accepts_block_stack_push(parser, true);
|
19823
19949
|
statements = parse_statements(parser, PM_CONTEXT_WHILE, (uint16_t) (depth + 1));
|
@@ -19828,7 +19954,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
|
|
19828
19954
|
parser_warn_indentation_mismatch(parser, opening_newline_index, &keyword, false, false);
|
19829
19955
|
expect1(parser, PM_TOKEN_KEYWORD_END, PM_ERR_WHILE_TERM);
|
19830
19956
|
|
19831
|
-
return (pm_node_t *) pm_while_node_create(parser, &keyword, &parser->previous, predicate, statements, 0);
|
19957
|
+
return (pm_node_t *) pm_while_node_create(parser, &keyword, &do_keyword, &parser->previous, predicate, statements, 0);
|
19832
19958
|
}
|
19833
19959
|
case PM_TOKEN_PERCENT_LOWER_I: {
|
19834
19960
|
parser_lex(parser);
|
@@ -20621,7 +20747,7 @@ parse_assignment_value(pm_parser_t *parser, pm_binding_power_t previous_binding_
|
|
20621
20747
|
pm_token_t rescue = parser->current;
|
20622
20748
|
parser_lex(parser);
|
20623
20749
|
|
20624
|
-
pm_node_t *right = parse_expression(parser,
|
20750
|
+
pm_node_t *right = parse_expression(parser, pm_binding_powers[PM_TOKEN_KEYWORD_RESCUE_MODIFIER].right, false, false, PM_ERR_RESCUE_MODIFIER_VALUE, (uint16_t) (depth + 1));
|
20625
20751
|
context_pop(parser);
|
20626
20752
|
|
20627
20753
|
return (pm_node_t *) pm_rescue_modifier_node_create(parser, value, &rescue, right);
|
@@ -20727,7 +20853,7 @@ parse_assignment_values(pm_parser_t *parser, pm_binding_power_t previous_binding
|
|
20727
20853
|
}
|
20728
20854
|
}
|
20729
20855
|
|
20730
|
-
pm_node_t *right = parse_expression(parser,
|
20856
|
+
pm_node_t *right = parse_expression(parser, pm_binding_powers[PM_TOKEN_KEYWORD_RESCUE_MODIFIER].right, accepts_command_call_inner, false, PM_ERR_RESCUE_MODIFIER_VALUE, (uint16_t) (depth + 1));
|
20731
20857
|
context_pop(parser);
|
20732
20858
|
|
20733
20859
|
return (pm_node_t *) pm_rescue_modifier_node_create(parser, value, &rescue, right);
|
@@ -20783,6 +20909,123 @@ typedef struct {
|
|
20783
20909
|
bool shared;
|
20784
20910
|
} parse_regular_expression_named_capture_data_t;
|
20785
20911
|
|
20912
|
+
static inline const uint8_t *
|
20913
|
+
pm_named_capture_escape_hex(pm_buffer_t *unescaped, const uint8_t *cursor, const uint8_t *end) {
|
20914
|
+
cursor++;
|
20915
|
+
|
20916
|
+
if (cursor < end && pm_char_is_hexadecimal_digit(*cursor)) {
|
20917
|
+
uint8_t value = escape_hexadecimal_digit(*cursor);
|
20918
|
+
cursor++;
|
20919
|
+
|
20920
|
+
if (cursor < end && pm_char_is_hexadecimal_digit(*cursor)) {
|
20921
|
+
value = (uint8_t) ((value << 4) | escape_hexadecimal_digit(*cursor));
|
20922
|
+
cursor++;
|
20923
|
+
}
|
20924
|
+
|
20925
|
+
pm_buffer_append_byte(unescaped, value);
|
20926
|
+
} else {
|
20927
|
+
pm_buffer_append_string(unescaped, "\\x", 2);
|
20928
|
+
}
|
20929
|
+
|
20930
|
+
return cursor;
|
20931
|
+
}
|
20932
|
+
|
20933
|
+
static inline const uint8_t *
|
20934
|
+
pm_named_capture_escape_octal(pm_buffer_t *unescaped, const uint8_t *cursor, const uint8_t *end) {
|
20935
|
+
uint8_t value = (uint8_t) (*cursor - '0');
|
20936
|
+
cursor++;
|
20937
|
+
|
20938
|
+
if (cursor < end && pm_char_is_octal_digit(*cursor)) {
|
20939
|
+
value = ((uint8_t) (value << 3)) | ((uint8_t) (*cursor - '0'));
|
20940
|
+
cursor++;
|
20941
|
+
|
20942
|
+
if (cursor < end && pm_char_is_octal_digit(*cursor)) {
|
20943
|
+
value = ((uint8_t) (value << 3)) | ((uint8_t) (*cursor - '0'));
|
20944
|
+
cursor++;
|
20945
|
+
}
|
20946
|
+
}
|
20947
|
+
|
20948
|
+
pm_buffer_append_byte(unescaped, value);
|
20949
|
+
return cursor;
|
20950
|
+
}
|
20951
|
+
|
20952
|
+
static inline const uint8_t *
|
20953
|
+
pm_named_capture_escape_unicode(pm_parser_t *parser, pm_buffer_t *unescaped, const uint8_t *cursor, const uint8_t *end) {
|
20954
|
+
const uint8_t *start = cursor - 1;
|
20955
|
+
cursor++;
|
20956
|
+
|
20957
|
+
if (cursor >= end) {
|
20958
|
+
pm_buffer_append_string(unescaped, "\\u", 2);
|
20959
|
+
return cursor;
|
20960
|
+
}
|
20961
|
+
|
20962
|
+
if (*cursor != '{') {
|
20963
|
+
size_t length = pm_strspn_hexadecimal_digit(cursor, MIN(end - cursor, 4));
|
20964
|
+
uint32_t value = escape_unicode(parser, cursor, length);
|
20965
|
+
|
20966
|
+
if (!pm_buffer_append_unicode_codepoint(unescaped, value)) {
|
20967
|
+
pm_buffer_append_string(unescaped, (const char *) start, (size_t) ((cursor + length) - start));
|
20968
|
+
}
|
20969
|
+
|
20970
|
+
return cursor + length;
|
20971
|
+
}
|
20972
|
+
|
20973
|
+
cursor++;
|
20974
|
+
for (;;) {
|
20975
|
+
while (cursor < end && *cursor == ' ') cursor++;
|
20976
|
+
|
20977
|
+
if (cursor >= end) break;
|
20978
|
+
if (*cursor == '}') {
|
20979
|
+
cursor++;
|
20980
|
+
break;
|
20981
|
+
}
|
20982
|
+
|
20983
|
+
size_t length = pm_strspn_hexadecimal_digit(cursor, end - cursor);
|
20984
|
+
uint32_t value = escape_unicode(parser, cursor, length);
|
20985
|
+
|
20986
|
+
(void) pm_buffer_append_unicode_codepoint(unescaped, value);
|
20987
|
+
cursor += length;
|
20988
|
+
}
|
20989
|
+
|
20990
|
+
return cursor;
|
20991
|
+
}
|
20992
|
+
|
20993
|
+
static void
|
20994
|
+
pm_named_capture_escape(pm_parser_t *parser, pm_buffer_t *unescaped, const uint8_t *source, const size_t length, const uint8_t *cursor) {
|
20995
|
+
const uint8_t *end = source + length;
|
20996
|
+
pm_buffer_append_string(unescaped, (const char *) source, (size_t) (cursor - source));
|
20997
|
+
|
20998
|
+
for (;;) {
|
20999
|
+
if (++cursor >= end) {
|
21000
|
+
pm_buffer_append_byte(unescaped, '\\');
|
21001
|
+
return;
|
21002
|
+
}
|
21003
|
+
|
21004
|
+
switch (*cursor) {
|
21005
|
+
case 'x':
|
21006
|
+
cursor = pm_named_capture_escape_hex(unescaped, cursor, end);
|
21007
|
+
break;
|
21008
|
+
case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7':
|
21009
|
+
cursor = pm_named_capture_escape_octal(unescaped, cursor, end);
|
21010
|
+
break;
|
21011
|
+
case 'u':
|
21012
|
+
cursor = pm_named_capture_escape_unicode(parser, unescaped, cursor, end);
|
21013
|
+
break;
|
21014
|
+
default:
|
21015
|
+
pm_buffer_append_byte(unescaped, '\\');
|
21016
|
+
break;
|
21017
|
+
}
|
21018
|
+
|
21019
|
+
const uint8_t *next_cursor = pm_memchr(cursor, '\\', (size_t) (end - cursor), parser->encoding_changed, parser->encoding);
|
21020
|
+
if (next_cursor == NULL) break;
|
21021
|
+
|
21022
|
+
pm_buffer_append_string(unescaped, (const char *) cursor, (size_t) (next_cursor - cursor));
|
21023
|
+
cursor = next_cursor;
|
21024
|
+
}
|
21025
|
+
|
21026
|
+
pm_buffer_append_string(unescaped, (const char *) cursor, (size_t) (end - cursor));
|
21027
|
+
}
|
21028
|
+
|
20786
21029
|
/**
|
20787
21030
|
* This callback is called when the regular expression parser encounters a named
|
20788
21031
|
* capture group.
|
@@ -20797,13 +21040,32 @@ parse_regular_expression_named_capture(const pm_string_t *capture, void *data) {
|
|
20797
21040
|
|
20798
21041
|
const uint8_t *source = pm_string_source(capture);
|
20799
21042
|
size_t length = pm_string_length(capture);
|
21043
|
+
pm_buffer_t unescaped = { 0 };
|
21044
|
+
|
21045
|
+
// First, we need to handle escapes within the name of the capture group.
|
21046
|
+
// This is because regular expressions have three different representations
|
21047
|
+
// in prism. The first is the plain source code. The second is the
|
21048
|
+
// representation that will be sent to the regular expression engine, which
|
21049
|
+
// is the value of the "unescaped" field. This is poorly named, because it
|
21050
|
+
// actually still contains escapes, just a subset of them that the regular
|
21051
|
+
// expression engine knows how to handle. The third representation is fully
|
21052
|
+
// unescaped, which is what we need.
|
21053
|
+
const uint8_t *cursor = pm_memchr(source, '\\', length, parser->encoding_changed, parser->encoding);
|
21054
|
+
if (PRISM_UNLIKELY(cursor != NULL)) {
|
21055
|
+
pm_named_capture_escape(parser, &unescaped, source, length, cursor);
|
21056
|
+
source = (const uint8_t *) pm_buffer_value(&unescaped);
|
21057
|
+
length = pm_buffer_length(&unescaped);
|
21058
|
+
}
|
20800
21059
|
|
20801
21060
|
pm_location_t location;
|
20802
21061
|
pm_constant_id_t name;
|
20803
21062
|
|
20804
21063
|
// If the name of the capture group isn't a valid identifier, we do
|
20805
21064
|
// not add it to the local table.
|
20806
|
-
if (!pm_slice_is_valid_local(parser, source, source + length))
|
21065
|
+
if (!pm_slice_is_valid_local(parser, source, source + length)) {
|
21066
|
+
pm_buffer_free(&unescaped);
|
21067
|
+
return;
|
21068
|
+
}
|
20807
21069
|
|
20808
21070
|
if (callback_data->shared) {
|
20809
21071
|
// If the unescaped string is a slice of the source, then we can
|
@@ -20831,7 +21093,10 @@ parse_regular_expression_named_capture(const pm_string_t *capture, void *data) {
|
|
20831
21093
|
if ((depth = pm_parser_local_depth_constant_id(parser, name)) == -1) {
|
20832
21094
|
// If the local is not already a local but it is a keyword, then we
|
20833
21095
|
// do not want to add a capture for this.
|
20834
|
-
if (pm_local_is_keyword((const char *) source, length))
|
21096
|
+
if (pm_local_is_keyword((const char *) source, length)) {
|
21097
|
+
pm_buffer_free(&unescaped);
|
21098
|
+
return;
|
21099
|
+
}
|
20835
21100
|
|
20836
21101
|
// If the identifier is not already a local, then we will add it to
|
20837
21102
|
// the local table.
|
@@ -20849,6 +21114,8 @@ parse_regular_expression_named_capture(const pm_string_t *capture, void *data) {
|
|
20849
21114
|
pm_node_t *target = (pm_node_t *) pm_local_variable_target_node_create(parser, &location, name, depth == -1 ? 0 : (uint32_t) depth);
|
20850
21115
|
pm_node_list_append(&callback_data->match->targets, target);
|
20851
21116
|
}
|
21117
|
+
|
21118
|
+
pm_buffer_free(&unescaped);
|
20852
21119
|
}
|
20853
21120
|
|
20854
21121
|
/**
|
@@ -20898,7 +21165,7 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
|
|
20898
21165
|
pm_parser_local_add_location(parser, call_node->message_loc.start, call_node->message_loc.end, 0);
|
20899
21166
|
}
|
20900
21167
|
}
|
20901
|
-
|
21168
|
+
PRISM_FALLTHROUGH
|
20902
21169
|
case PM_CASE_WRITABLE: {
|
20903
21170
|
parser_lex(parser);
|
20904
21171
|
pm_node_t *value = parse_assignment_values(parser, previous_binding_power, PM_NODE_TYPE_P(node, PM_MULTI_TARGET_NODE) ? PM_BINDING_POWER_MULTI_ASSIGNMENT + 1 : binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_EQUAL, (uint16_t) (depth + 1));
|
@@ -20944,7 +21211,7 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
|
|
20944
21211
|
case PM_BACK_REFERENCE_READ_NODE:
|
20945
21212
|
case PM_NUMBERED_REFERENCE_READ_NODE:
|
20946
21213
|
PM_PARSER_ERR_NODE_FORMAT_CONTENT(parser, node, PM_ERR_WRITE_TARGET_READONLY);
|
20947
|
-
|
21214
|
+
PRISM_FALLTHROUGH
|
20948
21215
|
case PM_GLOBAL_VARIABLE_READ_NODE: {
|
20949
21216
|
parser_lex(parser);
|
20950
21217
|
|
@@ -20989,7 +21256,23 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
|
|
20989
21256
|
pm_node_destroy(parser, node);
|
20990
21257
|
return result;
|
20991
21258
|
}
|
21259
|
+
case PM_IT_LOCAL_VARIABLE_READ_NODE: {
|
21260
|
+
pm_constant_id_t name = pm_parser_local_add_constant(parser, "it", 2);
|
21261
|
+
parser_lex(parser);
|
21262
|
+
|
21263
|
+
pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ, (uint16_t) (depth + 1));
|
21264
|
+
pm_node_t *result = (pm_node_t *) pm_local_variable_and_write_node_create(parser, node, &token, value, name, 0);
|
21265
|
+
|
21266
|
+
parse_target_implicit_parameter(parser, node);
|
21267
|
+
pm_node_destroy(parser, node);
|
21268
|
+
return result;
|
21269
|
+
}
|
20992
21270
|
case PM_LOCAL_VARIABLE_READ_NODE: {
|
21271
|
+
if (pm_token_is_numbered_parameter(node->location.start, node->location.end)) {
|
21272
|
+
PM_PARSER_ERR_FORMAT(parser, node->location.start, node->location.end, PM_ERR_PARAMETER_NUMBERED_RESERVED, node->location.start);
|
21273
|
+
parse_target_implicit_parameter(parser, node);
|
21274
|
+
}
|
21275
|
+
|
20993
21276
|
pm_local_variable_read_node_t *cast = (pm_local_variable_read_node_t *) node;
|
20994
21277
|
parser_lex(parser);
|
20995
21278
|
|
@@ -21062,7 +21345,7 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
|
|
21062
21345
|
case PM_BACK_REFERENCE_READ_NODE:
|
21063
21346
|
case PM_NUMBERED_REFERENCE_READ_NODE:
|
21064
21347
|
PM_PARSER_ERR_NODE_FORMAT_CONTENT(parser, node, PM_ERR_WRITE_TARGET_READONLY);
|
21065
|
-
|
21348
|
+
PRISM_FALLTHROUGH
|
21066
21349
|
case PM_GLOBAL_VARIABLE_READ_NODE: {
|
21067
21350
|
parser_lex(parser);
|
21068
21351
|
|
@@ -21107,7 +21390,23 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
|
|
21107
21390
|
pm_node_destroy(parser, node);
|
21108
21391
|
return result;
|
21109
21392
|
}
|
21393
|
+
case PM_IT_LOCAL_VARIABLE_READ_NODE: {
|
21394
|
+
pm_constant_id_t name = pm_parser_local_add_constant(parser, "it", 2);
|
21395
|
+
parser_lex(parser);
|
21396
|
+
|
21397
|
+
pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ, (uint16_t) (depth + 1));
|
21398
|
+
pm_node_t *result = (pm_node_t *) pm_local_variable_or_write_node_create(parser, node, &token, value, name, 0);
|
21399
|
+
|
21400
|
+
parse_target_implicit_parameter(parser, node);
|
21401
|
+
pm_node_destroy(parser, node);
|
21402
|
+
return result;
|
21403
|
+
}
|
21110
21404
|
case PM_LOCAL_VARIABLE_READ_NODE: {
|
21405
|
+
if (pm_token_is_numbered_parameter(node->location.start, node->location.end)) {
|
21406
|
+
PM_PARSER_ERR_FORMAT(parser, node->location.start, node->location.end, PM_ERR_PARAMETER_NUMBERED_RESERVED, node->location.start);
|
21407
|
+
parse_target_implicit_parameter(parser, node);
|
21408
|
+
}
|
21409
|
+
|
21111
21410
|
pm_local_variable_read_node_t *cast = (pm_local_variable_read_node_t *) node;
|
21112
21411
|
parser_lex(parser);
|
21113
21412
|
|
@@ -21190,7 +21489,7 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
|
|
21190
21489
|
case PM_BACK_REFERENCE_READ_NODE:
|
21191
21490
|
case PM_NUMBERED_REFERENCE_READ_NODE:
|
21192
21491
|
PM_PARSER_ERR_NODE_FORMAT_CONTENT(parser, node, PM_ERR_WRITE_TARGET_READONLY);
|
21193
|
-
|
21492
|
+
PRISM_FALLTHROUGH
|
21194
21493
|
case PM_GLOBAL_VARIABLE_READ_NODE: {
|
21195
21494
|
parser_lex(parser);
|
21196
21495
|
|
@@ -21235,7 +21534,23 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
|
|
21235
21534
|
pm_node_destroy(parser, node);
|
21236
21535
|
return result;
|
21237
21536
|
}
|
21537
|
+
case PM_IT_LOCAL_VARIABLE_READ_NODE: {
|
21538
|
+
pm_constant_id_t name = pm_parser_local_add_constant(parser, "it", 2);
|
21539
|
+
parser_lex(parser);
|
21540
|
+
|
21541
|
+
pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
|
21542
|
+
pm_node_t *result = (pm_node_t *) pm_local_variable_operator_write_node_create(parser, node, &token, value, name, 0);
|
21543
|
+
|
21544
|
+
parse_target_implicit_parameter(parser, node);
|
21545
|
+
pm_node_destroy(parser, node);
|
21546
|
+
return result;
|
21547
|
+
}
|
21238
21548
|
case PM_LOCAL_VARIABLE_READ_NODE: {
|
21549
|
+
if (pm_token_is_numbered_parameter(node->location.start, node->location.end)) {
|
21550
|
+
PM_PARSER_ERR_FORMAT(parser, node->location.start, node->location.end, PM_ERR_PARAMETER_NUMBERED_RESERVED, node->location.start);
|
21551
|
+
parse_target_implicit_parameter(parser, node);
|
21552
|
+
}
|
21553
|
+
|
21239
21554
|
pm_local_variable_read_node_t *cast = (pm_local_variable_read_node_t *) node;
|
21240
21555
|
parser_lex(parser);
|
21241
21556
|
|
@@ -21400,6 +21715,33 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
|
|
21400
21715
|
case PM_TOKEN_STAR:
|
21401
21716
|
case PM_TOKEN_STAR_STAR: {
|
21402
21717
|
parser_lex(parser);
|
21718
|
+
pm_token_t operator = parser->previous;
|
21719
|
+
switch (PM_NODE_TYPE(node)) {
|
21720
|
+
case PM_RESCUE_MODIFIER_NODE: {
|
21721
|
+
pm_rescue_modifier_node_t *cast = (pm_rescue_modifier_node_t *) node;
|
21722
|
+
if (PM_NODE_TYPE_P(cast->rescue_expression, PM_MATCH_PREDICATE_NODE) || PM_NODE_TYPE_P(cast->rescue_expression, PM_MATCH_REQUIRED_NODE)) {
|
21723
|
+
PM_PARSER_ERR_TOKEN_FORMAT(parser, operator, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_type_human(operator.type));
|
21724
|
+
}
|
21725
|
+
break;
|
21726
|
+
}
|
21727
|
+
case PM_AND_NODE: {
|
21728
|
+
pm_and_node_t *cast = (pm_and_node_t *) node;
|
21729
|
+
if (PM_NODE_TYPE_P(cast->right, PM_MATCH_PREDICATE_NODE) || PM_NODE_TYPE_P(cast->right, PM_MATCH_REQUIRED_NODE)) {
|
21730
|
+
PM_PARSER_ERR_TOKEN_FORMAT(parser, operator, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_type_human(operator.type));
|
21731
|
+
}
|
21732
|
+
break;
|
21733
|
+
}
|
21734
|
+
case PM_OR_NODE: {
|
21735
|
+
pm_or_node_t *cast = (pm_or_node_t *) node;
|
21736
|
+
if (PM_NODE_TYPE_P(cast->right, PM_MATCH_PREDICATE_NODE) || PM_NODE_TYPE_P(cast->right, PM_MATCH_REQUIRED_NODE)) {
|
21737
|
+
PM_PARSER_ERR_TOKEN_FORMAT(parser, operator, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_type_human(operator.type));
|
21738
|
+
}
|
21739
|
+
break;
|
21740
|
+
}
|
21741
|
+
default:
|
21742
|
+
break;
|
21743
|
+
}
|
21744
|
+
|
21403
21745
|
pm_node_t *argument = parse_expression(parser, binding_power, false, false, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
|
21404
21746
|
return (pm_node_t *) pm_call_node_binary_create(parser, node, &token, argument, 0);
|
21405
21747
|
}
|
@@ -21427,6 +21769,32 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
|
|
21427
21769
|
return (pm_node_t *) pm_call_node_shorthand_create(parser, node, &operator, &arguments);
|
21428
21770
|
}
|
21429
21771
|
|
21772
|
+
switch (PM_NODE_TYPE(node)) {
|
21773
|
+
case PM_RESCUE_MODIFIER_NODE: {
|
21774
|
+
pm_rescue_modifier_node_t *cast = (pm_rescue_modifier_node_t *) node;
|
21775
|
+
if (PM_NODE_TYPE_P(cast->rescue_expression, PM_MATCH_PREDICATE_NODE) || PM_NODE_TYPE_P(cast->rescue_expression, PM_MATCH_REQUIRED_NODE)) {
|
21776
|
+
PM_PARSER_ERR_TOKEN_FORMAT(parser, operator, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_type_human(operator.type));
|
21777
|
+
}
|
21778
|
+
break;
|
21779
|
+
}
|
21780
|
+
case PM_AND_NODE: {
|
21781
|
+
pm_and_node_t *cast = (pm_and_node_t *) node;
|
21782
|
+
if (PM_NODE_TYPE_P(cast->right, PM_MATCH_PREDICATE_NODE) || PM_NODE_TYPE_P(cast->right, PM_MATCH_REQUIRED_NODE)) {
|
21783
|
+
PM_PARSER_ERR_TOKEN_FORMAT(parser, operator, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_type_human(operator.type));
|
21784
|
+
}
|
21785
|
+
break;
|
21786
|
+
}
|
21787
|
+
case PM_OR_NODE: {
|
21788
|
+
pm_or_node_t *cast = (pm_or_node_t *) node;
|
21789
|
+
if (PM_NODE_TYPE_P(cast->right, PM_MATCH_PREDICATE_NODE) || PM_NODE_TYPE_P(cast->right, PM_MATCH_REQUIRED_NODE)) {
|
21790
|
+
PM_PARSER_ERR_TOKEN_FORMAT(parser, operator, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_type_human(operator.type));
|
21791
|
+
}
|
21792
|
+
break;
|
21793
|
+
}
|
21794
|
+
default:
|
21795
|
+
break;
|
21796
|
+
}
|
21797
|
+
|
21430
21798
|
pm_token_t message;
|
21431
21799
|
|
21432
21800
|
switch (parser->current.type) {
|
@@ -21774,6 +22142,7 @@ parse_expression(pm_parser_t *parser, pm_binding_power_t binding_power, bool acc
|
|
21774
22142
|
if (pm_symbol_node_label_p(node)) {
|
21775
22143
|
return node;
|
21776
22144
|
}
|
22145
|
+
break;
|
21777
22146
|
default:
|
21778
22147
|
break;
|
21779
22148
|
}
|
@@ -21915,6 +22284,10 @@ parse_expression(pm_parser_t *parser, pm_binding_power_t binding_power, bool acc
|
|
21915
22284
|
static pm_statements_node_t *
|
21916
22285
|
wrap_statements(pm_parser_t *parser, pm_statements_node_t *statements) {
|
21917
22286
|
if (PM_PARSER_COMMAND_LINE_OPTION_P(parser)) {
|
22287
|
+
if (statements == NULL) {
|
22288
|
+
statements = pm_statements_node_create(parser);
|
22289
|
+
}
|
22290
|
+
|
21918
22291
|
pm_arguments_node_t *arguments = pm_arguments_node_create(parser);
|
21919
22292
|
pm_arguments_node_arguments_append(
|
21920
22293
|
arguments,
|
@@ -21930,6 +22303,10 @@ wrap_statements(pm_parser_t *parser, pm_statements_node_t *statements) {
|
|
21930
22303
|
|
21931
22304
|
if (PM_PARSER_COMMAND_LINE_OPTION_N(parser)) {
|
21932
22305
|
if (PM_PARSER_COMMAND_LINE_OPTION_A(parser)) {
|
22306
|
+
if (statements == NULL) {
|
22307
|
+
statements = pm_statements_node_create(parser);
|
22308
|
+
}
|
22309
|
+
|
21933
22310
|
pm_arguments_node_t *arguments = pm_arguments_node_create(parser);
|
21934
22311
|
pm_arguments_node_arguments_append(
|
21935
22312
|
arguments,
|
@@ -21998,9 +22375,7 @@ parse_program(pm_parser_t *parser) {
|
|
21998
22375
|
parser_lex(parser);
|
21999
22376
|
pm_statements_node_t *statements = parse_statements(parser, PM_CONTEXT_MAIN, 0);
|
22000
22377
|
|
22001
|
-
if (statements
|
22002
|
-
statements = pm_statements_node_create(parser);
|
22003
|
-
} else if (!parser->parsing_eval) {
|
22378
|
+
if (statements != NULL && !parser->parsing_eval) {
|
22004
22379
|
// If we have statements, then the top-level statement should be
|
22005
22380
|
// explicitly checked as well. We have to do this here because
|
22006
22381
|
// everywhere else we check all but the last statement.
|
@@ -22012,13 +22387,6 @@ parse_program(pm_parser_t *parser) {
|
|
22012
22387
|
pm_locals_order(parser, &parser->current_scope->locals, &locals, true);
|
22013
22388
|
pm_parser_scope_pop(parser);
|
22014
22389
|
|
22015
|
-
// If this is an empty file, then we're still going to parse all of the
|
22016
|
-
// statements in order to gather up all of the comments and such. Here we'll
|
22017
|
-
// correct the location information.
|
22018
|
-
if (pm_statements_node_body_length(statements) == 0) {
|
22019
|
-
pm_statements_node_location_set(statements, parser->start, parser->start);
|
22020
|
-
}
|
22021
|
-
|
22022
22390
|
// At the top level, see if we need to wrap the statements in a program
|
22023
22391
|
// node with a while loop based on the options.
|
22024
22392
|
if (parser->command_line & (PM_OPTIONS_COMMAND_LINE_P | PM_OPTIONS_COMMAND_LINE_N)) {
|
@@ -22028,6 +22396,14 @@ parse_program(pm_parser_t *parser) {
|
|
22028
22396
|
pm_node_list_free(¤t_block_exits);
|
22029
22397
|
}
|
22030
22398
|
|
22399
|
+
// If this is an empty file, then we're still going to parse all of the
|
22400
|
+
// statements in order to gather up all of the comments and such. Here we'll
|
22401
|
+
// correct the location information.
|
22402
|
+
if (statements == NULL) {
|
22403
|
+
statements = pm_statements_node_create(parser);
|
22404
|
+
pm_statements_node_location_set(statements, parser->start, parser->start);
|
22405
|
+
}
|
22406
|
+
|
22031
22407
|
return (pm_node_t *) pm_program_node_create(parser, &locals, statements);
|
22032
22408
|
}
|
22033
22409
|
|
@@ -22221,7 +22597,7 @@ pm_parser_init(pm_parser_t *parser, const uint8_t *source, size_t size, const pm
|
|
22221
22597
|
|
22222
22598
|
// Scopes given from the outside are not allowed to have numbered
|
22223
22599
|
// parameters.
|
22224
|
-
parser->current_scope->parameters
|
22600
|
+
parser->current_scope->parameters = ((pm_scope_parameters_t) scope->forwarding) | PM_SCOPE_PARAMETERS_IMPLICIT_DISALLOWED;
|
22225
22601
|
|
22226
22602
|
for (size_t local_index = 0; local_index < scope->locals_count; local_index++) {
|
22227
22603
|
const pm_string_t *local = pm_options_scope_local_get(scope, local_index);
|
@@ -22431,11 +22807,11 @@ pm_parse(pm_parser_t *parser) {
|
|
22431
22807
|
* otherwise return true.
|
22432
22808
|
*/
|
22433
22809
|
static bool
|
22434
|
-
pm_parse_stream_read(pm_buffer_t *buffer, void *stream, pm_parse_stream_fgets_t *
|
22810
|
+
pm_parse_stream_read(pm_buffer_t *buffer, void *stream, pm_parse_stream_fgets_t *stream_fgets) {
|
22435
22811
|
#define LINE_SIZE 4096
|
22436
22812
|
char line[LINE_SIZE];
|
22437
22813
|
|
22438
|
-
while (memset(line, '\n', LINE_SIZE),
|
22814
|
+
while (memset(line, '\n', LINE_SIZE), stream_fgets(line, LINE_SIZE, stream) != NULL) {
|
22439
22815
|
size_t length = LINE_SIZE;
|
22440
22816
|
while (length > 0 && line[length - 1] == '\n') length--;
|
22441
22817
|
|
@@ -22502,16 +22878,16 @@ pm_parse_stream_unterminated_heredoc_p(pm_parser_t *parser) {
|
|
22502
22878
|
* can stream stdin in to Ruby so we need to support a streaming API.
|
22503
22879
|
*/
|
22504
22880
|
PRISM_EXPORTED_FUNCTION pm_node_t *
|
22505
|
-
pm_parse_stream(pm_parser_t *parser, pm_buffer_t *buffer, void *stream, pm_parse_stream_fgets_t *
|
22881
|
+
pm_parse_stream(pm_parser_t *parser, pm_buffer_t *buffer, void *stream, pm_parse_stream_fgets_t *stream_fgets, const pm_options_t *options) {
|
22506
22882
|
pm_buffer_init(buffer);
|
22507
22883
|
|
22508
|
-
bool eof = pm_parse_stream_read(buffer, stream,
|
22884
|
+
bool eof = pm_parse_stream_read(buffer, stream, stream_fgets);
|
22509
22885
|
pm_parser_init(parser, (const uint8_t *) pm_buffer_value(buffer), pm_buffer_length(buffer), options);
|
22510
22886
|
pm_node_t *node = pm_parse(parser);
|
22511
22887
|
|
22512
22888
|
while (!eof && parser->error_list.size > 0 && (parser->lex_modes.index > 0 || pm_parse_stream_unterminated_heredoc_p(parser))) {
|
22513
22889
|
pm_node_destroy(parser, node);
|
22514
|
-
eof = pm_parse_stream_read(buffer, stream,
|
22890
|
+
eof = pm_parse_stream_read(buffer, stream, stream_fgets);
|
22515
22891
|
|
22516
22892
|
pm_parser_free(parser);
|
22517
22893
|
pm_parser_init(parser, (const uint8_t *) pm_buffer_value(buffer), pm_buffer_length(buffer), options);
|
@@ -22603,13 +22979,13 @@ pm_serialize_parse(pm_buffer_t *buffer, const uint8_t *source, size_t size, cons
|
|
22603
22979
|
* given stream into to the given buffer.
|
22604
22980
|
*/
|
22605
22981
|
PRISM_EXPORTED_FUNCTION void
|
22606
|
-
pm_serialize_parse_stream(pm_buffer_t *buffer, void *stream, pm_parse_stream_fgets_t *
|
22982
|
+
pm_serialize_parse_stream(pm_buffer_t *buffer, void *stream, pm_parse_stream_fgets_t *stream_fgets, const char *data) {
|
22607
22983
|
pm_parser_t parser;
|
22608
22984
|
pm_options_t options = { 0 };
|
22609
22985
|
pm_options_read(&options, data);
|
22610
22986
|
|
22611
22987
|
pm_buffer_t parser_buffer;
|
22612
|
-
pm_node_t *node = pm_parse_stream(&parser, &parser_buffer, stream,
|
22988
|
+
pm_node_t *node = pm_parse_stream(&parser, &parser_buffer, stream, stream_fgets, &options);
|
22613
22989
|
pm_serialize_header(buffer);
|
22614
22990
|
pm_serialize_content(&parser, node, buffer);
|
22615
22991
|
pm_buffer_append_byte(buffer, '\0');
|
@@ -22643,3 +23019,166 @@ pm_serialize_parse_comments(pm_buffer_t *buffer, const uint8_t *source, size_t s
|
|
22643
23019
|
}
|
22644
23020
|
|
22645
23021
|
#endif
|
23022
|
+
|
23023
|
+
/******************************************************************************/
|
23024
|
+
/* Slice queries for the Ruby API */
|
23025
|
+
/******************************************************************************/
|
23026
|
+
|
23027
|
+
/** The category of slice returned from pm_slice_type. */
|
23028
|
+
typedef enum {
|
23029
|
+
/** Returned when the given encoding name is invalid. */
|
23030
|
+
PM_SLICE_TYPE_ERROR = -1,
|
23031
|
+
|
23032
|
+
/** Returned when no other types apply to the slice. */
|
23033
|
+
PM_SLICE_TYPE_NONE,
|
23034
|
+
|
23035
|
+
/** Returned when the slice is a valid local variable name. */
|
23036
|
+
PM_SLICE_TYPE_LOCAL,
|
23037
|
+
|
23038
|
+
/** Returned when the slice is a valid constant name. */
|
23039
|
+
PM_SLICE_TYPE_CONSTANT,
|
23040
|
+
|
23041
|
+
/** Returned when the slice is a valid method name. */
|
23042
|
+
PM_SLICE_TYPE_METHOD_NAME
|
23043
|
+
} pm_slice_type_t;
|
23044
|
+
|
23045
|
+
/**
|
23046
|
+
* Check that the slice is a valid local variable name or constant.
|
23047
|
+
*/
|
23048
|
+
pm_slice_type_t
|
23049
|
+
pm_slice_type(const uint8_t *source, size_t length, const char *encoding_name) {
|
23050
|
+
// first, get the right encoding object
|
23051
|
+
const pm_encoding_t *encoding = pm_encoding_find((const uint8_t *) encoding_name, (const uint8_t *) (encoding_name + strlen(encoding_name)));
|
23052
|
+
if (encoding == NULL) return PM_SLICE_TYPE_ERROR;
|
23053
|
+
|
23054
|
+
// check that there is at least one character
|
23055
|
+
if (length == 0) return PM_SLICE_TYPE_NONE;
|
23056
|
+
|
23057
|
+
size_t width;
|
23058
|
+
if ((width = encoding->alpha_char(source, (ptrdiff_t) length)) != 0) {
|
23059
|
+
// valid because alphabetical
|
23060
|
+
} else if (*source == '_') {
|
23061
|
+
// valid because underscore
|
23062
|
+
width = 1;
|
23063
|
+
} else if ((*source >= 0x80) && ((width = encoding->char_width(source, (ptrdiff_t) length)) > 0)) {
|
23064
|
+
// valid because multibyte
|
23065
|
+
} else {
|
23066
|
+
// invalid because no match
|
23067
|
+
return PM_SLICE_TYPE_NONE;
|
23068
|
+
}
|
23069
|
+
|
23070
|
+
// determine the type of the slice based on the first character
|
23071
|
+
const uint8_t *end = source + length;
|
23072
|
+
pm_slice_type_t result = encoding->isupper_char(source, end - source) ? PM_SLICE_TYPE_CONSTANT : PM_SLICE_TYPE_LOCAL;
|
23073
|
+
|
23074
|
+
// next, iterate through all of the bytes of the string to ensure that they
|
23075
|
+
// are all valid identifier characters
|
23076
|
+
source += width;
|
23077
|
+
|
23078
|
+
while (source < end) {
|
23079
|
+
if ((width = encoding->alnum_char(source, end - source)) != 0) {
|
23080
|
+
// valid because alphanumeric
|
23081
|
+
source += width;
|
23082
|
+
} else if (*source == '_') {
|
23083
|
+
// valid because underscore
|
23084
|
+
source++;
|
23085
|
+
} else if ((*source >= 0x80) && ((width = encoding->char_width(source, end - source)) > 0)) {
|
23086
|
+
// valid because multibyte
|
23087
|
+
source += width;
|
23088
|
+
} else {
|
23089
|
+
// invalid because no match
|
23090
|
+
break;
|
23091
|
+
}
|
23092
|
+
}
|
23093
|
+
|
23094
|
+
// accept a ! or ? at the end of the slice as a method name
|
23095
|
+
if (*source == '!' || *source == '?' || *source == '=') {
|
23096
|
+
source++;
|
23097
|
+
result = PM_SLICE_TYPE_METHOD_NAME;
|
23098
|
+
}
|
23099
|
+
|
23100
|
+
// valid if we are at the end of the slice
|
23101
|
+
return source == end ? result : PM_SLICE_TYPE_NONE;
|
23102
|
+
}
|
23103
|
+
|
23104
|
+
/**
|
23105
|
+
* Check that the slice is a valid local variable name.
|
23106
|
+
*/
|
23107
|
+
PRISM_EXPORTED_FUNCTION pm_string_query_t
|
23108
|
+
pm_string_query_local(const uint8_t *source, size_t length, const char *encoding_name) {
|
23109
|
+
switch (pm_slice_type(source, length, encoding_name)) {
|
23110
|
+
case PM_SLICE_TYPE_ERROR:
|
23111
|
+
return PM_STRING_QUERY_ERROR;
|
23112
|
+
case PM_SLICE_TYPE_NONE:
|
23113
|
+
case PM_SLICE_TYPE_CONSTANT:
|
23114
|
+
case PM_SLICE_TYPE_METHOD_NAME:
|
23115
|
+
return PM_STRING_QUERY_FALSE;
|
23116
|
+
case PM_SLICE_TYPE_LOCAL:
|
23117
|
+
return PM_STRING_QUERY_TRUE;
|
23118
|
+
}
|
23119
|
+
|
23120
|
+
assert(false && "unreachable");
|
23121
|
+
return PM_STRING_QUERY_FALSE;
|
23122
|
+
}
|
23123
|
+
|
23124
|
+
/**
|
23125
|
+
* Check that the slice is a valid constant name.
|
23126
|
+
*/
|
23127
|
+
PRISM_EXPORTED_FUNCTION pm_string_query_t
|
23128
|
+
pm_string_query_constant(const uint8_t *source, size_t length, const char *encoding_name) {
|
23129
|
+
switch (pm_slice_type(source, length, encoding_name)) {
|
23130
|
+
case PM_SLICE_TYPE_ERROR:
|
23131
|
+
return PM_STRING_QUERY_ERROR;
|
23132
|
+
case PM_SLICE_TYPE_NONE:
|
23133
|
+
case PM_SLICE_TYPE_LOCAL:
|
23134
|
+
case PM_SLICE_TYPE_METHOD_NAME:
|
23135
|
+
return PM_STRING_QUERY_FALSE;
|
23136
|
+
case PM_SLICE_TYPE_CONSTANT:
|
23137
|
+
return PM_STRING_QUERY_TRUE;
|
23138
|
+
}
|
23139
|
+
|
23140
|
+
assert(false && "unreachable");
|
23141
|
+
return PM_STRING_QUERY_FALSE;
|
23142
|
+
}
|
23143
|
+
|
23144
|
+
/**
|
23145
|
+
* Check that the slice is a valid method name.
|
23146
|
+
*/
|
23147
|
+
PRISM_EXPORTED_FUNCTION pm_string_query_t
|
23148
|
+
pm_string_query_method_name(const uint8_t *source, size_t length, const char *encoding_name) {
|
23149
|
+
#define B(p) ((p) ? PM_STRING_QUERY_TRUE : PM_STRING_QUERY_FALSE)
|
23150
|
+
#define C1(c) (*source == c)
|
23151
|
+
#define C2(s) (memcmp(source, s, 2) == 0)
|
23152
|
+
#define C3(s) (memcmp(source, s, 3) == 0)
|
23153
|
+
|
23154
|
+
switch (pm_slice_type(source, length, encoding_name)) {
|
23155
|
+
case PM_SLICE_TYPE_ERROR:
|
23156
|
+
return PM_STRING_QUERY_ERROR;
|
23157
|
+
case PM_SLICE_TYPE_NONE:
|
23158
|
+
break;
|
23159
|
+
case PM_SLICE_TYPE_LOCAL:
|
23160
|
+
// numbered parameters are not valid method names
|
23161
|
+
return B((length != 2) || (source[0] != '_') || (source[1] == '0') || !pm_char_is_decimal_digit(source[1]));
|
23162
|
+
case PM_SLICE_TYPE_CONSTANT:
|
23163
|
+
// all constants are valid method names
|
23164
|
+
case PM_SLICE_TYPE_METHOD_NAME:
|
23165
|
+
// all method names are valid method names
|
23166
|
+
return PM_STRING_QUERY_TRUE;
|
23167
|
+
}
|
23168
|
+
|
23169
|
+
switch (length) {
|
23170
|
+
case 1:
|
23171
|
+
return B(C1('&') || C1('`') || C1('!') || C1('^') || C1('>') || C1('<') || C1('-') || C1('%') || C1('|') || C1('+') || C1('/') || C1('*') || C1('~'));
|
23172
|
+
case 2:
|
23173
|
+
return B(C2("!=") || C2("!~") || C2("[]") || C2("==") || C2("=~") || C2(">=") || C2(">>") || C2("<=") || C2("<<") || C2("**"));
|
23174
|
+
case 3:
|
23175
|
+
return B(C3("===") || C3("<=>") || C3("[]="));
|
23176
|
+
default:
|
23177
|
+
return PM_STRING_QUERY_FALSE;
|
23178
|
+
}
|
23179
|
+
|
23180
|
+
#undef B
|
23181
|
+
#undef C1
|
23182
|
+
#undef C2
|
23183
|
+
#undef C3
|
23184
|
+
}
|