prism 1.3.0 → 1.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +46 -1
- data/Makefile +2 -1
- data/README.md +1 -0
- data/config.yml +273 -37
- data/docs/parser_translation.md +8 -23
- data/docs/releasing.md +1 -1
- data/docs/ripper_translation.md +1 -1
- data/docs/ruby_api.md +1 -1
- data/ext/prism/api_node.c +1816 -1303
- data/ext/prism/extension.c +244 -110
- data/ext/prism/extension.h +4 -4
- data/include/prism/ast.h +291 -49
- data/include/prism/defines.h +4 -1
- data/include/prism/diagnostic.h +4 -0
- data/include/prism/options.h +89 -3
- data/include/prism/regexp.h +2 -2
- data/include/prism/util/pm_buffer.h +18 -0
- data/include/prism/util/pm_integer.h +4 -0
- data/include/prism/util/pm_list.h +6 -0
- data/include/prism/util/pm_string.h +12 -2
- data/include/prism/version.h +2 -2
- data/include/prism.h +41 -16
- data/lib/prism/compiler.rb +456 -151
- data/lib/prism/desugar_compiler.rb +1 -0
- data/lib/prism/dispatcher.rb +16 -0
- data/lib/prism/dot_visitor.rb +21 -1
- data/lib/prism/dsl.rb +13 -2
- data/lib/prism/ffi.rb +62 -34
- data/lib/prism/inspect_visitor.rb +5 -1
- data/lib/prism/lex_compat.rb +1 -0
- data/lib/prism/mutation_compiler.rb +3 -0
- data/lib/prism/node.rb +554 -345
- data/lib/prism/node_ext.rb +4 -1
- data/lib/prism/pack.rb +2 -0
- data/lib/prism/parse_result/comments.rb +1 -0
- data/lib/prism/parse_result/errors.rb +1 -0
- data/lib/prism/parse_result/newlines.rb +2 -1
- data/lib/prism/parse_result.rb +53 -0
- data/lib/prism/pattern.rb +1 -0
- data/lib/prism/polyfill/append_as_bytes.rb +15 -0
- data/lib/prism/polyfill/scan_byte.rb +14 -0
- data/lib/prism/polyfill/warn.rb +42 -0
- data/lib/prism/reflection.rb +5 -2
- data/lib/prism/relocation.rb +1 -0
- data/lib/prism/serialize.rb +1275 -783
- data/lib/prism/string_query.rb +1 -0
- data/lib/prism/translation/parser/builder.rb +62 -0
- data/lib/prism/translation/parser/compiler.rb +230 -152
- data/lib/prism/translation/parser/lexer.rb +446 -64
- data/lib/prism/translation/parser.rb +64 -4
- data/lib/prism/translation/parser33.rb +1 -0
- data/lib/prism/translation/parser34.rb +1 -0
- data/lib/prism/translation/parser35.rb +13 -0
- data/lib/prism/translation/parser_current.rb +24 -0
- data/lib/prism/translation/ripper/sexp.rb +1 -0
- data/lib/prism/translation/ripper.rb +30 -4
- data/lib/prism/translation/ruby_parser.rb +291 -7
- data/lib/prism/translation.rb +3 -0
- data/lib/prism/visitor.rb +457 -152
- data/lib/prism.rb +5 -3
- data/prism.gemspec +9 -1
- data/rbi/prism/dsl.rbi +9 -6
- data/rbi/prism/node.rbi +43 -16
- data/rbi/prism/parse_result.rbi +17 -0
- data/rbi/prism/translation/parser35.rbi +6 -0
- data/rbi/prism.rbi +39 -36
- data/sig/prism/dispatcher.rbs +3 -0
- data/sig/prism/dsl.rbs +7 -5
- data/sig/prism/node.rbs +461 -37
- data/sig/prism/node_ext.rbs +84 -17
- data/sig/prism/parse_result/comments.rbs +38 -0
- data/sig/prism/parse_result.rbs +14 -0
- data/sig/prism/reflection.rbs +1 -1
- data/sig/prism/serialize.rbs +4 -2
- data/sig/prism.rbs +22 -1
- data/src/diagnostic.c +9 -3
- data/src/node.c +23 -0
- data/src/options.c +33 -2
- data/src/prettyprint.c +32 -0
- data/src/prism.c +620 -242
- data/src/serialize.c +8 -0
- data/src/token_type.c +36 -34
- data/src/util/pm_buffer.c +40 -0
- data/src/util/pm_constant_pool.c +6 -2
- data/src/util/pm_strncasecmp.c +13 -1
- metadata +11 -7
data/src/prism.c
CHANGED
@@ -1409,7 +1409,7 @@ pm_conditional_predicate_warn_write_literal_p(const pm_node_t *node) {
|
|
1409
1409
|
static inline void
|
1410
1410
|
pm_conditional_predicate_warn_write_literal(pm_parser_t *parser, const pm_node_t *node) {
|
1411
1411
|
if (pm_conditional_predicate_warn_write_literal_p(node)) {
|
1412
|
-
pm_parser_warn_node(parser, node, parser->version
|
1412
|
+
pm_parser_warn_node(parser, node, parser->version <= PM_OPTIONS_VERSION_CRUBY_3_3 ? PM_WARN_EQUAL_IN_CONDITIONAL_3_3 : PM_WARN_EQUAL_IN_CONDITIONAL);
|
1413
1413
|
}
|
1414
1414
|
}
|
1415
1415
|
|
@@ -1649,22 +1649,25 @@ pm_arguments_validate_block(pm_parser_t *parser, pm_arguments_t *arguments, pm_b
|
|
1649
1649
|
* the function pointer or can just directly use the UTF-8 functions.
|
1650
1650
|
*/
|
1651
1651
|
static inline size_t
|
1652
|
-
char_is_identifier_start(const pm_parser_t *parser, const uint8_t *b) {
|
1652
|
+
char_is_identifier_start(const pm_parser_t *parser, const uint8_t *b, ptrdiff_t n) {
|
1653
|
+
if (n <= 0) return 0;
|
1654
|
+
|
1653
1655
|
if (parser->encoding_changed) {
|
1654
1656
|
size_t width;
|
1655
|
-
|
1657
|
+
|
1658
|
+
if ((width = parser->encoding->alpha_char(b, n)) != 0) {
|
1656
1659
|
return width;
|
1657
1660
|
} else if (*b == '_') {
|
1658
1661
|
return 1;
|
1659
1662
|
} else if (*b >= 0x80) {
|
1660
|
-
return parser->encoding->char_width(b,
|
1663
|
+
return parser->encoding->char_width(b, n);
|
1661
1664
|
} else {
|
1662
1665
|
return 0;
|
1663
1666
|
}
|
1664
1667
|
} else if (*b < 0x80) {
|
1665
1668
|
return (pm_encoding_unicode_table[*b] & PRISM_ENCODING_ALPHABETIC_BIT ? 1 : 0) || (*b == '_');
|
1666
1669
|
} else {
|
1667
|
-
return pm_encoding_utf_8_char_width(b,
|
1670
|
+
return pm_encoding_utf_8_char_width(b, n);
|
1668
1671
|
}
|
1669
1672
|
}
|
1670
1673
|
|
@@ -1673,11 +1676,13 @@ char_is_identifier_start(const pm_parser_t *parser, const uint8_t *b) {
|
|
1673
1676
|
* has not been changed.
|
1674
1677
|
*/
|
1675
1678
|
static inline size_t
|
1676
|
-
char_is_identifier_utf8(const uint8_t *b,
|
1677
|
-
if (
|
1679
|
+
char_is_identifier_utf8(const uint8_t *b, ptrdiff_t n) {
|
1680
|
+
if (n <= 0) {
|
1681
|
+
return 0;
|
1682
|
+
} else if (*b < 0x80) {
|
1678
1683
|
return (*b == '_') || (pm_encoding_unicode_table[*b] & PRISM_ENCODING_ALPHANUMERIC_BIT ? 1 : 0);
|
1679
1684
|
} else {
|
1680
|
-
return pm_encoding_utf_8_char_width(b,
|
1685
|
+
return pm_encoding_utf_8_char_width(b, n);
|
1681
1686
|
}
|
1682
1687
|
}
|
1683
1688
|
|
@@ -1687,20 +1692,24 @@ char_is_identifier_utf8(const uint8_t *b, const uint8_t *end) {
|
|
1687
1692
|
* it's important that it be as fast as possible.
|
1688
1693
|
*/
|
1689
1694
|
static inline size_t
|
1690
|
-
char_is_identifier(const pm_parser_t *parser, const uint8_t *b) {
|
1691
|
-
if (
|
1695
|
+
char_is_identifier(const pm_parser_t *parser, const uint8_t *b, ptrdiff_t n) {
|
1696
|
+
if (n <= 0) {
|
1697
|
+
return 0;
|
1698
|
+
} else if (parser->encoding_changed) {
|
1692
1699
|
size_t width;
|
1693
|
-
|
1700
|
+
|
1701
|
+
if ((width = parser->encoding->alnum_char(b, n)) != 0) {
|
1694
1702
|
return width;
|
1695
1703
|
} else if (*b == '_') {
|
1696
1704
|
return 1;
|
1697
1705
|
} else if (*b >= 0x80) {
|
1698
|
-
return parser->encoding->char_width(b,
|
1706
|
+
return parser->encoding->char_width(b, n);
|
1699
1707
|
} else {
|
1700
1708
|
return 0;
|
1701
1709
|
}
|
1710
|
+
} else {
|
1711
|
+
return char_is_identifier_utf8(b, n);
|
1702
1712
|
}
|
1703
|
-
return char_is_identifier_utf8(b, parser->end);
|
1704
1713
|
}
|
1705
1714
|
|
1706
1715
|
// Here we're defining a perfect hash for the characters that are allowed in
|
@@ -1731,9 +1740,10 @@ char_is_global_name_punctuation(const uint8_t b) {
|
|
1731
1740
|
static inline bool
|
1732
1741
|
token_is_setter_name(pm_token_t *token) {
|
1733
1742
|
return (
|
1734
|
-
(token->type ==
|
1743
|
+
(token->type == PM_TOKEN_BRACKET_LEFT_RIGHT_EQUAL) ||
|
1744
|
+
((token->type == PM_TOKEN_IDENTIFIER) &&
|
1735
1745
|
(token->end - token->start >= 2) &&
|
1736
|
-
(token->end[-1] == '=')
|
1746
|
+
(token->end[-1] == '='))
|
1737
1747
|
);
|
1738
1748
|
}
|
1739
1749
|
|
@@ -2895,7 +2905,7 @@ pm_call_node_writable_p(const pm_parser_t *parser, const pm_call_node_t *node) {
|
|
2895
2905
|
(node->message_loc.start != NULL) &&
|
2896
2906
|
(node->message_loc.end[-1] != '!') &&
|
2897
2907
|
(node->message_loc.end[-1] != '?') &&
|
2898
|
-
char_is_identifier_start(parser, node->message_loc.start) &&
|
2908
|
+
char_is_identifier_start(parser, node->message_loc.start, parser->end - node->message_loc.start) &&
|
2899
2909
|
(node->opening_loc.start == NULL) &&
|
2900
2910
|
(node->arguments == NULL) &&
|
2901
2911
|
(node->block == NULL)
|
@@ -2966,7 +2976,7 @@ pm_call_and_write_node_create(pm_parser_t *parser, pm_call_node_t *target, const
|
|
2966
2976
|
*/
|
2967
2977
|
static void
|
2968
2978
|
pm_index_arguments_check(pm_parser_t *parser, const pm_arguments_node_t *arguments, const pm_node_t *block) {
|
2969
|
-
if (parser->version
|
2979
|
+
if (parser->version >= PM_OPTIONS_VERSION_CRUBY_3_4) {
|
2970
2980
|
if (arguments != NULL && PM_NODE_FLAG_P(arguments, PM_ARGUMENTS_NODE_FLAGS_CONTAINS_KEYWORDS)) {
|
2971
2981
|
pm_node_t *node;
|
2972
2982
|
PM_NODE_LIST_FOREACH(&arguments->arguments, index, node) {
|
@@ -3864,7 +3874,7 @@ pm_def_node_create(
|
|
3864
3874
|
end = end_keyword->end;
|
3865
3875
|
}
|
3866
3876
|
|
3867
|
-
if (
|
3877
|
+
if (receiver != NULL) {
|
3868
3878
|
pm_def_node_receiver_check(parser, receiver);
|
3869
3879
|
}
|
3870
3880
|
|
@@ -4243,7 +4253,7 @@ pm_float_node_rational_create(pm_parser_t *parser, const pm_token_t *token) {
|
|
4243
4253
|
const uint8_t *point = memchr(start, '.', length);
|
4244
4254
|
assert(point && "should have a decimal point");
|
4245
4255
|
|
4246
|
-
uint8_t *digits =
|
4256
|
+
uint8_t *digits = xmalloc(length);
|
4247
4257
|
if (digits == NULL) {
|
4248
4258
|
fputs("[pm_float_node_rational_create] Failed to allocate memory", stderr);
|
4249
4259
|
abort();
|
@@ -4256,7 +4266,7 @@ pm_float_node_rational_create(pm_parser_t *parser, const pm_token_t *token) {
|
|
4256
4266
|
digits[0] = '1';
|
4257
4267
|
if (end - point > 1) memset(digits + 1, '0', (size_t) (end - point - 1));
|
4258
4268
|
pm_integer_parse(&node->denominator, PM_INTEGER_BASE_DEFAULT, digits, digits + (end - point));
|
4259
|
-
|
4269
|
+
xfree(digits);
|
4260
4270
|
|
4261
4271
|
pm_integers_reduce(&node->numerator, &node->denominator);
|
4262
4272
|
return node;
|
@@ -5269,6 +5279,10 @@ pm_interpolated_string_node_append(pm_interpolated_string_node_t *node, pm_node_
|
|
5269
5279
|
|
5270
5280
|
switch (PM_NODE_TYPE(part)) {
|
5271
5281
|
case PM_STRING_NODE:
|
5282
|
+
// If inner string is not frozen, clear flags for this string
|
5283
|
+
if (!PM_NODE_FLAG_P(part, PM_STRING_FLAGS_FROZEN)) {
|
5284
|
+
CLEAR_FLAGS(node);
|
5285
|
+
}
|
5272
5286
|
part->flags = (pm_node_flags_t) ((part->flags | PM_NODE_FLAG_STATIC_LITERAL | PM_STRING_FLAGS_FROZEN) & ~PM_STRING_FLAGS_MUTABLE);
|
5273
5287
|
break;
|
5274
5288
|
case PM_INTERPOLATED_STRING_NODE:
|
@@ -5318,6 +5332,12 @@ pm_interpolated_string_node_append(pm_interpolated_string_node_t *node, pm_node_
|
|
5318
5332
|
// should clear the mutability flags.
|
5319
5333
|
CLEAR_FLAGS(node);
|
5320
5334
|
break;
|
5335
|
+
case PM_X_STRING_NODE:
|
5336
|
+
case PM_INTERPOLATED_X_STRING_NODE:
|
5337
|
+
// If this is an x string, then this is a syntax error. But we want
|
5338
|
+
// to handle it here so that we don't fail the assertion.
|
5339
|
+
CLEAR_FLAGS(node);
|
5340
|
+
break;
|
5321
5341
|
default:
|
5322
5342
|
assert(false && "unexpected node type");
|
5323
5343
|
break;
|
@@ -5652,7 +5672,7 @@ pm_lambda_node_create(
|
|
5652
5672
|
*/
|
5653
5673
|
static pm_local_variable_and_write_node_t *
|
5654
5674
|
pm_local_variable_and_write_node_create(pm_parser_t *parser, pm_node_t *target, const pm_token_t *operator, pm_node_t *value, pm_constant_id_t name, uint32_t depth) {
|
5655
|
-
assert(PM_NODE_TYPE_P(target, PM_LOCAL_VARIABLE_READ_NODE) || PM_NODE_TYPE_P(target, PM_CALL_NODE));
|
5675
|
+
assert(PM_NODE_TYPE_P(target, PM_LOCAL_VARIABLE_READ_NODE) || PM_NODE_TYPE_P(target, PM_IT_LOCAL_VARIABLE_READ_NODE) || PM_NODE_TYPE_P(target, PM_CALL_NODE));
|
5656
5676
|
assert(operator->type == PM_TOKEN_AMPERSAND_AMPERSAND_EQUAL);
|
5657
5677
|
pm_local_variable_and_write_node_t *node = PM_NODE_ALLOC(parser, pm_local_variable_and_write_node_t);
|
5658
5678
|
|
@@ -5707,7 +5727,7 @@ pm_local_variable_operator_write_node_create(pm_parser_t *parser, pm_node_t *tar
|
|
5707
5727
|
*/
|
5708
5728
|
static pm_local_variable_or_write_node_t *
|
5709
5729
|
pm_local_variable_or_write_node_create(pm_parser_t *parser, pm_node_t *target, const pm_token_t *operator, pm_node_t *value, pm_constant_id_t name, uint32_t depth) {
|
5710
|
-
assert(PM_NODE_TYPE_P(target, PM_LOCAL_VARIABLE_READ_NODE) || PM_NODE_TYPE_P(target, PM_CALL_NODE));
|
5730
|
+
assert(PM_NODE_TYPE_P(target, PM_LOCAL_VARIABLE_READ_NODE) || PM_NODE_TYPE_P(target, PM_IT_LOCAL_VARIABLE_READ_NODE) || PM_NODE_TYPE_P(target, PM_CALL_NODE));
|
5711
5731
|
assert(operator->type == PM_TOKEN_PIPE_PIPE_EQUAL);
|
5712
5732
|
pm_local_variable_or_write_node_t *node = PM_NODE_ALLOC(parser, pm_local_variable_or_write_node_t);
|
5713
5733
|
|
@@ -6159,7 +6179,10 @@ pm_numbered_reference_read_node_number(pm_parser_t *parser, const pm_token_t *to
|
|
6159
6179
|
const uint8_t *end = token->end;
|
6160
6180
|
|
6161
6181
|
ptrdiff_t diff = end - start;
|
6162
|
-
assert(diff > 0
|
6182
|
+
assert(diff > 0);
|
6183
|
+
#if PTRDIFF_MAX > SIZE_MAX
|
6184
|
+
assert(diff < (ptrdiff_t) SIZE_MAX);
|
6185
|
+
#endif
|
6163
6186
|
size_t length = (size_t) diff;
|
6164
6187
|
|
6165
6188
|
char *digits = xcalloc(length + 1, sizeof(char));
|
@@ -6393,12 +6416,13 @@ pm_program_node_create(pm_parser_t *parser, pm_constant_id_list_t *locals, pm_st
|
|
6393
6416
|
* Allocate and initialize new ParenthesesNode node.
|
6394
6417
|
*/
|
6395
6418
|
static pm_parentheses_node_t *
|
6396
|
-
pm_parentheses_node_create(pm_parser_t *parser, const pm_token_t *opening, pm_node_t *body, const pm_token_t *closing) {
|
6419
|
+
pm_parentheses_node_create(pm_parser_t *parser, const pm_token_t *opening, pm_node_t *body, const pm_token_t *closing, pm_node_flags_t flags) {
|
6397
6420
|
pm_parentheses_node_t *node = PM_NODE_ALLOC(parser, pm_parentheses_node_t);
|
6398
6421
|
|
6399
6422
|
*node = (pm_parentheses_node_t) {
|
6400
6423
|
{
|
6401
6424
|
.type = PM_PARENTHESES_NODE,
|
6425
|
+
.flags = flags,
|
6402
6426
|
.node_id = PM_NODE_IDENTIFY(parser),
|
6403
6427
|
.location = {
|
6404
6428
|
.start = opening->start,
|
@@ -6665,6 +6689,7 @@ pm_rescue_node_create(pm_parser_t *parser, const pm_token_t *keyword) {
|
|
6665
6689
|
},
|
6666
6690
|
.keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword),
|
6667
6691
|
.operator_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE,
|
6692
|
+
.then_keyword_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE,
|
6668
6693
|
.reference = NULL,
|
6669
6694
|
.statements = NULL,
|
6670
6695
|
.subsequent = NULL,
|
@@ -8561,85 +8586,66 @@ parser_lex_magic_comment(pm_parser_t *parser, bool semantic_token_seen) {
|
|
8561
8586
|
/* Context manipulations */
|
8562
8587
|
/******************************************************************************/
|
8563
8588
|
|
8564
|
-
static
|
8565
|
-
|
8566
|
-
|
8567
|
-
|
8568
|
-
|
8569
|
-
|
8570
|
-
|
8571
|
-
|
8572
|
-
|
8573
|
-
|
8574
|
-
|
8575
|
-
|
8576
|
-
|
8577
|
-
|
8578
|
-
|
8579
|
-
|
8580
|
-
|
8581
|
-
|
8582
|
-
|
8583
|
-
|
8584
|
-
|
8585
|
-
|
8586
|
-
|
8587
|
-
|
8588
|
-
|
8589
|
-
|
8590
|
-
|
8591
|
-
|
8592
|
-
|
8593
|
-
|
8594
|
-
|
8595
|
-
|
8596
|
-
|
8597
|
-
|
8598
|
-
|
8599
|
-
|
8600
|
-
|
8601
|
-
|
8602
|
-
|
8603
|
-
|
8604
|
-
|
8605
|
-
|
8606
|
-
|
8607
|
-
|
8608
|
-
|
8609
|
-
|
8610
|
-
|
8611
|
-
|
8612
|
-
|
8613
|
-
|
8614
|
-
|
8615
|
-
|
8616
|
-
|
8617
|
-
|
8618
|
-
|
8619
|
-
|
8620
|
-
case PM_CONTEXT_CLASS_RESCUE:
|
8621
|
-
case PM_CONTEXT_DEF_RESCUE:
|
8622
|
-
case PM_CONTEXT_LAMBDA_RESCUE:
|
8623
|
-
case PM_CONTEXT_MODULE_RESCUE:
|
8624
|
-
case PM_CONTEXT_SCLASS_RESCUE:
|
8625
|
-
return token->type == PM_TOKEN_KEYWORD_ENSURE || token->type == PM_TOKEN_KEYWORD_RESCUE || token->type == PM_TOKEN_KEYWORD_ELSE || token->type == PM_TOKEN_KEYWORD_END;
|
8626
|
-
case PM_CONTEXT_BEGIN_ELSE:
|
8627
|
-
case PM_CONTEXT_BLOCK_ELSE:
|
8628
|
-
case PM_CONTEXT_CLASS_ELSE:
|
8629
|
-
case PM_CONTEXT_DEF_ELSE:
|
8630
|
-
case PM_CONTEXT_LAMBDA_ELSE:
|
8631
|
-
case PM_CONTEXT_MODULE_ELSE:
|
8632
|
-
case PM_CONTEXT_SCLASS_ELSE:
|
8633
|
-
return token->type == PM_TOKEN_KEYWORD_ENSURE || token->type == PM_TOKEN_KEYWORD_END;
|
8634
|
-
case PM_CONTEXT_LAMBDA_BRACES:
|
8635
|
-
return token->type == PM_TOKEN_BRACE_RIGHT;
|
8636
|
-
case PM_CONTEXT_PREDICATE:
|
8637
|
-
return token->type == PM_TOKEN_KEYWORD_THEN || token->type == PM_TOKEN_NEWLINE || token->type == PM_TOKEN_SEMICOLON;
|
8638
|
-
case PM_CONTEXT_NONE:
|
8639
|
-
return false;
|
8640
|
-
}
|
8589
|
+
static const uint32_t context_terminators[] = {
|
8590
|
+
[PM_CONTEXT_NONE] = 0,
|
8591
|
+
[PM_CONTEXT_BEGIN] = (1 << PM_TOKEN_KEYWORD_ENSURE) | (1 << PM_TOKEN_KEYWORD_RESCUE) | (1 << PM_TOKEN_KEYWORD_ELSE) | (1 << PM_TOKEN_KEYWORD_END),
|
8592
|
+
[PM_CONTEXT_BEGIN_ENSURE] = (1 << PM_TOKEN_KEYWORD_END),
|
8593
|
+
[PM_CONTEXT_BEGIN_ELSE] = (1 << PM_TOKEN_KEYWORD_ENSURE) | (1 << PM_TOKEN_KEYWORD_END),
|
8594
|
+
[PM_CONTEXT_BEGIN_RESCUE] = (1 << PM_TOKEN_KEYWORD_ENSURE) | (1 << PM_TOKEN_KEYWORD_RESCUE) | (1 << PM_TOKEN_KEYWORD_ELSE) | (1 << PM_TOKEN_KEYWORD_END),
|
8595
|
+
[PM_CONTEXT_BLOCK_BRACES] = (1 << PM_TOKEN_BRACE_RIGHT),
|
8596
|
+
[PM_CONTEXT_BLOCK_KEYWORDS] = (1 << PM_TOKEN_KEYWORD_END) | (1 << PM_TOKEN_KEYWORD_RESCUE) | (1 << PM_TOKEN_KEYWORD_ENSURE),
|
8597
|
+
[PM_CONTEXT_BLOCK_ENSURE] = (1 << PM_TOKEN_KEYWORD_END),
|
8598
|
+
[PM_CONTEXT_BLOCK_ELSE] = (1 << PM_TOKEN_KEYWORD_ENSURE) | (1 << PM_TOKEN_KEYWORD_END),
|
8599
|
+
[PM_CONTEXT_BLOCK_RESCUE] = (1 << PM_TOKEN_KEYWORD_ENSURE) | (1 << PM_TOKEN_KEYWORD_RESCUE) | (1 << PM_TOKEN_KEYWORD_ELSE) | (1 << PM_TOKEN_KEYWORD_END),
|
8600
|
+
[PM_CONTEXT_CASE_WHEN] = (1 << PM_TOKEN_KEYWORD_WHEN) | (1 << PM_TOKEN_KEYWORD_END) | (1 << PM_TOKEN_KEYWORD_ELSE),
|
8601
|
+
[PM_CONTEXT_CASE_IN] = (1 << PM_TOKEN_KEYWORD_IN) | (1 << PM_TOKEN_KEYWORD_END) | (1 << PM_TOKEN_KEYWORD_ELSE),
|
8602
|
+
[PM_CONTEXT_CLASS] = (1 << PM_TOKEN_KEYWORD_END) | (1 << PM_TOKEN_KEYWORD_RESCUE) | (1 << PM_TOKEN_KEYWORD_ENSURE),
|
8603
|
+
[PM_CONTEXT_CLASS_ENSURE] = (1 << PM_TOKEN_KEYWORD_END),
|
8604
|
+
[PM_CONTEXT_CLASS_ELSE] = (1 << PM_TOKEN_KEYWORD_ENSURE) | (1 << PM_TOKEN_KEYWORD_END),
|
8605
|
+
[PM_CONTEXT_CLASS_RESCUE] = (1 << PM_TOKEN_KEYWORD_ENSURE) | (1 << PM_TOKEN_KEYWORD_RESCUE) | (1 << PM_TOKEN_KEYWORD_ELSE) | (1 << PM_TOKEN_KEYWORD_END),
|
8606
|
+
[PM_CONTEXT_DEF] = (1 << PM_TOKEN_KEYWORD_END) | (1 << PM_TOKEN_KEYWORD_RESCUE) | (1 << PM_TOKEN_KEYWORD_ENSURE),
|
8607
|
+
[PM_CONTEXT_DEF_ENSURE] = (1 << PM_TOKEN_KEYWORD_END),
|
8608
|
+
[PM_CONTEXT_DEF_ELSE] = (1 << PM_TOKEN_KEYWORD_ENSURE) | (1 << PM_TOKEN_KEYWORD_END),
|
8609
|
+
[PM_CONTEXT_DEF_RESCUE] = (1 << PM_TOKEN_KEYWORD_ENSURE) | (1 << PM_TOKEN_KEYWORD_RESCUE) | (1 << PM_TOKEN_KEYWORD_ELSE) | (1 << PM_TOKEN_KEYWORD_END),
|
8610
|
+
[PM_CONTEXT_DEF_PARAMS] = (1 << PM_TOKEN_EOF),
|
8611
|
+
[PM_CONTEXT_DEFINED] = (1 << PM_TOKEN_EOF),
|
8612
|
+
[PM_CONTEXT_DEFAULT_PARAMS] = (1 << PM_TOKEN_COMMA) | (1 << PM_TOKEN_PARENTHESIS_RIGHT),
|
8613
|
+
[PM_CONTEXT_ELSE] = (1 << PM_TOKEN_KEYWORD_END),
|
8614
|
+
[PM_CONTEXT_ELSIF] = (1 << PM_TOKEN_KEYWORD_ELSE) | (1 << PM_TOKEN_KEYWORD_ELSIF) | (1 << PM_TOKEN_KEYWORD_END),
|
8615
|
+
[PM_CONTEXT_EMBEXPR] = (1 << PM_TOKEN_EMBEXPR_END),
|
8616
|
+
[PM_CONTEXT_FOR] = (1 << PM_TOKEN_KEYWORD_END),
|
8617
|
+
[PM_CONTEXT_FOR_INDEX] = (1 << PM_TOKEN_KEYWORD_IN),
|
8618
|
+
[PM_CONTEXT_IF] = (1 << PM_TOKEN_KEYWORD_ELSE) | (1 << PM_TOKEN_KEYWORD_ELSIF) | (1 << PM_TOKEN_KEYWORD_END),
|
8619
|
+
[PM_CONTEXT_LAMBDA_BRACES] = (1 << PM_TOKEN_BRACE_RIGHT),
|
8620
|
+
[PM_CONTEXT_LAMBDA_DO_END] = (1 << PM_TOKEN_KEYWORD_END) | (1 << PM_TOKEN_KEYWORD_RESCUE) | (1 << PM_TOKEN_KEYWORD_ENSURE),
|
8621
|
+
[PM_CONTEXT_LAMBDA_ENSURE] = (1 << PM_TOKEN_KEYWORD_END),
|
8622
|
+
[PM_CONTEXT_LAMBDA_ELSE] = (1 << PM_TOKEN_KEYWORD_ENSURE) | (1 << PM_TOKEN_KEYWORD_END),
|
8623
|
+
[PM_CONTEXT_LAMBDA_RESCUE] = (1 << PM_TOKEN_KEYWORD_ENSURE) | (1 << PM_TOKEN_KEYWORD_RESCUE) | (1 << PM_TOKEN_KEYWORD_ELSE) | (1 << PM_TOKEN_KEYWORD_END),
|
8624
|
+
[PM_CONTEXT_LOOP_PREDICATE] = (1 << PM_TOKEN_KEYWORD_DO) | (1 << PM_TOKEN_KEYWORD_THEN),
|
8625
|
+
[PM_CONTEXT_MAIN] = (1 << PM_TOKEN_EOF),
|
8626
|
+
[PM_CONTEXT_MODULE] = (1 << PM_TOKEN_KEYWORD_END) | (1 << PM_TOKEN_KEYWORD_RESCUE) | (1 << PM_TOKEN_KEYWORD_ENSURE),
|
8627
|
+
[PM_CONTEXT_MODULE_ENSURE] = (1 << PM_TOKEN_KEYWORD_END),
|
8628
|
+
[PM_CONTEXT_MODULE_ELSE] = (1 << PM_TOKEN_KEYWORD_ENSURE) | (1 << PM_TOKEN_KEYWORD_END),
|
8629
|
+
[PM_CONTEXT_MODULE_RESCUE] = (1 << PM_TOKEN_KEYWORD_ENSURE) | (1 << PM_TOKEN_KEYWORD_RESCUE) | (1 << PM_TOKEN_KEYWORD_ELSE) | (1 << PM_TOKEN_KEYWORD_END),
|
8630
|
+
[PM_CONTEXT_MULTI_TARGET] = (1 << PM_TOKEN_EOF),
|
8631
|
+
[PM_CONTEXT_PARENS] = (1 << PM_TOKEN_PARENTHESIS_RIGHT),
|
8632
|
+
[PM_CONTEXT_POSTEXE] = (1 << PM_TOKEN_BRACE_RIGHT),
|
8633
|
+
[PM_CONTEXT_PREDICATE] = (1 << PM_TOKEN_KEYWORD_THEN) | (1 << PM_TOKEN_NEWLINE) | (1 << PM_TOKEN_SEMICOLON),
|
8634
|
+
[PM_CONTEXT_PREEXE] = (1 << PM_TOKEN_BRACE_RIGHT),
|
8635
|
+
[PM_CONTEXT_RESCUE_MODIFIER] = (1 << PM_TOKEN_EOF),
|
8636
|
+
[PM_CONTEXT_SCLASS] = (1 << PM_TOKEN_KEYWORD_END) | (1 << PM_TOKEN_KEYWORD_RESCUE) | (1 << PM_TOKEN_KEYWORD_ENSURE),
|
8637
|
+
[PM_CONTEXT_SCLASS_ENSURE] = (1 << PM_TOKEN_KEYWORD_END),
|
8638
|
+
[PM_CONTEXT_SCLASS_ELSE] = (1 << PM_TOKEN_KEYWORD_ENSURE) | (1 << PM_TOKEN_KEYWORD_END),
|
8639
|
+
[PM_CONTEXT_SCLASS_RESCUE] = (1 << PM_TOKEN_KEYWORD_ENSURE) | (1 << PM_TOKEN_KEYWORD_RESCUE) | (1 << PM_TOKEN_KEYWORD_ELSE) | (1 << PM_TOKEN_KEYWORD_END),
|
8640
|
+
[PM_CONTEXT_TERNARY] = (1 << PM_TOKEN_EOF),
|
8641
|
+
[PM_CONTEXT_UNLESS] = (1 << PM_TOKEN_KEYWORD_ELSE) | (1 << PM_TOKEN_KEYWORD_END),
|
8642
|
+
[PM_CONTEXT_UNTIL] = (1 << PM_TOKEN_KEYWORD_END),
|
8643
|
+
[PM_CONTEXT_WHILE] = (1 << PM_TOKEN_KEYWORD_END),
|
8644
|
+
};
|
8641
8645
|
|
8642
|
-
|
8646
|
+
static inline bool
|
8647
|
+
context_terminator(pm_context_t context, pm_token_t *token) {
|
8648
|
+
return token->type < 32 && (context_terminators[context] & (1 << token->type));
|
8643
8649
|
}
|
8644
8650
|
|
8645
8651
|
/**
|
@@ -9082,13 +9088,13 @@ lex_global_variable(pm_parser_t *parser) {
|
|
9082
9088
|
parser->current.end++;
|
9083
9089
|
size_t width;
|
9084
9090
|
|
9085
|
-
if (parser->current.end
|
9091
|
+
if ((width = char_is_identifier(parser, parser->current.end, parser->end - parser->current.end)) > 0) {
|
9086
9092
|
do {
|
9087
9093
|
parser->current.end += width;
|
9088
|
-
} while (parser->current.end
|
9094
|
+
} while ((width = char_is_identifier(parser, parser->current.end, parser->end - parser->current.end)) > 0);
|
9089
9095
|
|
9090
9096
|
// $0 isn't allowed to be followed by anything.
|
9091
|
-
pm_diagnostic_id_t diag_id = parser->version
|
9097
|
+
pm_diagnostic_id_t diag_id = parser->version <= PM_OPTIONS_VERSION_CRUBY_3_3 ? PM_ERR_INVALID_VARIABLE_GLOBAL_3_3 : PM_ERR_INVALID_VARIABLE_GLOBAL;
|
9092
9098
|
PM_PARSER_ERR_TOKEN_FORMAT_CONTENT(parser, parser->current, diag_id);
|
9093
9099
|
}
|
9094
9100
|
|
@@ -9114,10 +9120,10 @@ lex_global_variable(pm_parser_t *parser) {
|
|
9114
9120
|
default: {
|
9115
9121
|
size_t width;
|
9116
9122
|
|
9117
|
-
if ((width = char_is_identifier(parser, parser->current.end)) > 0) {
|
9123
|
+
if ((width = char_is_identifier(parser, parser->current.end, parser->end - parser->current.end)) > 0) {
|
9118
9124
|
do {
|
9119
9125
|
parser->current.end += width;
|
9120
|
-
} while (allow_multiple && parser->current.end
|
9126
|
+
} while (allow_multiple && (width = char_is_identifier(parser, parser->current.end, parser->end - parser->current.end)) > 0);
|
9121
9127
|
} else if (pm_char_is_whitespace(peek(parser))) {
|
9122
9128
|
// If we get here, then we have a $ followed by whitespace,
|
9123
9129
|
// which is not allowed.
|
@@ -9125,7 +9131,7 @@ lex_global_variable(pm_parser_t *parser) {
|
|
9125
9131
|
} else {
|
9126
9132
|
// If we get here, then we have a $ followed by something that
|
9127
9133
|
// isn't recognized as a global variable.
|
9128
|
-
pm_diagnostic_id_t diag_id = parser->version
|
9134
|
+
pm_diagnostic_id_t diag_id = parser->version <= PM_OPTIONS_VERSION_CRUBY_3_3 ? PM_ERR_INVALID_VARIABLE_GLOBAL_3_3 : PM_ERR_INVALID_VARIABLE_GLOBAL;
|
9129
9135
|
const uint8_t *end = parser->current.end + parser->encoding->char_width(parser->current.end, parser->end - parser->current.end);
|
9130
9136
|
PM_PARSER_ERR_FORMAT(parser, parser->current.start, end, diag_id, (int) (end - parser->current.start), (const char *) parser->current.start);
|
9131
9137
|
}
|
@@ -9182,11 +9188,11 @@ lex_identifier(pm_parser_t *parser, bool previous_command_start) {
|
|
9182
9188
|
bool encoding_changed = parser->encoding_changed;
|
9183
9189
|
|
9184
9190
|
if (encoding_changed) {
|
9185
|
-
while (
|
9191
|
+
while ((width = char_is_identifier(parser, current_end, end - current_end)) > 0) {
|
9186
9192
|
current_end += width;
|
9187
9193
|
}
|
9188
9194
|
} else {
|
9189
|
-
while (
|
9195
|
+
while ((width = char_is_identifier_utf8(current_end, end - current_end)) > 0) {
|
9190
9196
|
current_end += width;
|
9191
9197
|
}
|
9192
9198
|
}
|
@@ -9360,7 +9366,7 @@ lex_interpolation(pm_parser_t *parser, const uint8_t *pound) {
|
|
9360
9366
|
const uint8_t *variable = pound + 2;
|
9361
9367
|
if (*variable == '@' && pound + 3 < parser->end) variable++;
|
9362
9368
|
|
9363
|
-
if (char_is_identifier_start(parser, variable)) {
|
9369
|
+
if (char_is_identifier_start(parser, variable, parser->end - variable)) {
|
9364
9370
|
// At this point we're sure that we've either hit an embedded instance
|
9365
9371
|
// or class variable. In this case we'll first need to check if we've
|
9366
9372
|
// already consumed content.
|
@@ -9409,7 +9415,7 @@ lex_interpolation(pm_parser_t *parser, const uint8_t *pound) {
|
|
9409
9415
|
// or a global name punctuation character, then we've hit an embedded
|
9410
9416
|
// global variable.
|
9411
9417
|
if (
|
9412
|
-
char_is_identifier_start(parser, check) ||
|
9418
|
+
char_is_identifier_start(parser, check, parser->end - check) ||
|
9413
9419
|
(pound[2] != '-' && (pm_char_is_decimal_digit(pound[2]) || char_is_global_name_punctuation(pound[2])))
|
9414
9420
|
) {
|
9415
9421
|
// In this case we've hit an embedded global variable. First check to
|
@@ -9541,21 +9547,7 @@ escape_write_unicode(pm_parser_t *parser, pm_buffer_t *buffer, const uint8_t fla
|
|
9541
9547
|
parser->explicit_encoding = PM_ENCODING_UTF_8_ENTRY;
|
9542
9548
|
}
|
9543
9549
|
|
9544
|
-
if (value
|
9545
|
-
pm_buffer_append_byte(buffer, (uint8_t) value);
|
9546
|
-
} else if (value <= 0x7FF) { // 110xxxxx 10xxxxxx
|
9547
|
-
pm_buffer_append_byte(buffer, (uint8_t) (0xC0 | (value >> 6)));
|
9548
|
-
pm_buffer_append_byte(buffer, (uint8_t) (0x80 | (value & 0x3F)));
|
9549
|
-
} else if (value <= 0xFFFF) { // 1110xxxx 10xxxxxx 10xxxxxx
|
9550
|
-
pm_buffer_append_byte(buffer, (uint8_t) (0xE0 | (value >> 12)));
|
9551
|
-
pm_buffer_append_byte(buffer, (uint8_t) (0x80 | ((value >> 6) & 0x3F)));
|
9552
|
-
pm_buffer_append_byte(buffer, (uint8_t) (0x80 | (value & 0x3F)));
|
9553
|
-
} else if (value <= 0x10FFFF) { // 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
|
9554
|
-
pm_buffer_append_byte(buffer, (uint8_t) (0xF0 | (value >> 18)));
|
9555
|
-
pm_buffer_append_byte(buffer, (uint8_t) (0x80 | ((value >> 12) & 0x3F)));
|
9556
|
-
pm_buffer_append_byte(buffer, (uint8_t) (0x80 | ((value >> 6) & 0x3F)));
|
9557
|
-
pm_buffer_append_byte(buffer, (uint8_t) (0x80 | (value & 0x3F)));
|
9558
|
-
} else {
|
9550
|
+
if (!pm_buffer_append_unicode_codepoint(buffer, value)) {
|
9559
9551
|
pm_parser_err(parser, start, end, PM_ERR_ESCAPE_INVALID_UNICODE);
|
9560
9552
|
pm_buffer_append_byte(buffer, 0xEF);
|
9561
9553
|
pm_buffer_append_byte(buffer, 0xBF);
|
@@ -9580,28 +9572,6 @@ escape_write_byte_encoded(pm_parser_t *parser, pm_buffer_t *buffer, uint8_t byte
|
|
9580
9572
|
pm_buffer_append_byte(buffer, byte);
|
9581
9573
|
}
|
9582
9574
|
|
9583
|
-
/**
|
9584
|
-
* Write each byte of the given escaped character into the buffer.
|
9585
|
-
*/
|
9586
|
-
static inline void
|
9587
|
-
escape_write_escape_encoded(pm_parser_t *parser, pm_buffer_t *buffer) {
|
9588
|
-
size_t width;
|
9589
|
-
if (parser->encoding_changed) {
|
9590
|
-
width = parser->encoding->char_width(parser->current.end, parser->end - parser->current.end);
|
9591
|
-
} else {
|
9592
|
-
width = pm_encoding_utf_8_char_width(parser->current.end, parser->end - parser->current.end);
|
9593
|
-
}
|
9594
|
-
|
9595
|
-
// TODO: If the character is invalid in the given encoding, then we'll just
|
9596
|
-
// push one byte into the buffer. This should actually be an error.
|
9597
|
-
width = (width == 0) ? 1 : width;
|
9598
|
-
|
9599
|
-
for (size_t index = 0; index < width; index++) {
|
9600
|
-
escape_write_byte_encoded(parser, buffer, *parser->current.end);
|
9601
|
-
parser->current.end++;
|
9602
|
-
}
|
9603
|
-
}
|
9604
|
-
|
9605
9575
|
/**
|
9606
9576
|
* The regular expression engine doesn't support the same escape sequences as
|
9607
9577
|
* Ruby does. So first we have to read the escape sequence, and then we have to
|
@@ -9626,6 +9596,33 @@ escape_write_byte(pm_parser_t *parser, pm_buffer_t *buffer, pm_buffer_t *regular
|
|
9626
9596
|
escape_write_byte_encoded(parser, buffer, byte);
|
9627
9597
|
}
|
9628
9598
|
|
9599
|
+
/**
|
9600
|
+
* Write each byte of the given escaped character into the buffer.
|
9601
|
+
*/
|
9602
|
+
static inline void
|
9603
|
+
escape_write_escape_encoded(pm_parser_t *parser, pm_buffer_t *buffer, pm_buffer_t *regular_expression_buffer, uint8_t flags) {
|
9604
|
+
size_t width;
|
9605
|
+
if (parser->encoding_changed) {
|
9606
|
+
width = parser->encoding->char_width(parser->current.end, parser->end - parser->current.end);
|
9607
|
+
} else {
|
9608
|
+
width = pm_encoding_utf_8_char_width(parser->current.end, parser->end - parser->current.end);
|
9609
|
+
}
|
9610
|
+
|
9611
|
+
if (width == 1) {
|
9612
|
+
escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte(*parser->current.end++, flags));
|
9613
|
+
} else if (width > 1) {
|
9614
|
+
// Valid multibyte character. Just ignore escape.
|
9615
|
+
pm_buffer_t *b = (flags & PM_ESCAPE_FLAG_REGEXP) ? regular_expression_buffer : buffer;
|
9616
|
+
pm_buffer_append_bytes(b, parser->current.end, width);
|
9617
|
+
parser->current.end += width;
|
9618
|
+
} else {
|
9619
|
+
// Assume the next character wasn't meant to be part of this escape
|
9620
|
+
// sequence since it is invalid. Add an error and move on.
|
9621
|
+
parser->current.end++;
|
9622
|
+
pm_parser_err_current(parser, PM_ERR_ESCAPE_INVALID_CONTROL);
|
9623
|
+
}
|
9624
|
+
}
|
9625
|
+
|
9629
9626
|
/**
|
9630
9627
|
* Warn about using a space or a tab character in an escape, as opposed to using
|
9631
9628
|
* \\s or \\t. Note that we can quite copy the source because the warning
|
@@ -9652,7 +9649,8 @@ escape_read_warn(pm_parser_t *parser, uint8_t flags, uint8_t flag, const char *t
|
|
9652
9649
|
*/
|
9653
9650
|
static void
|
9654
9651
|
escape_read(pm_parser_t *parser, pm_buffer_t *buffer, pm_buffer_t *regular_expression_buffer, uint8_t flags) {
|
9655
|
-
|
9652
|
+
uint8_t peeked = peek(parser);
|
9653
|
+
switch (peeked) {
|
9656
9654
|
case '\\': {
|
9657
9655
|
parser->current.end++;
|
9658
9656
|
escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte('\\', flags));
|
@@ -9722,6 +9720,7 @@ escape_read(pm_parser_t *parser, pm_buffer_t *buffer, pm_buffer_t *regular_expre
|
|
9722
9720
|
}
|
9723
9721
|
}
|
9724
9722
|
|
9723
|
+
value = escape_byte(value, flags);
|
9725
9724
|
escape_write_byte(parser, buffer, regular_expression_buffer, flags, value);
|
9726
9725
|
return;
|
9727
9726
|
}
|
@@ -9770,7 +9769,7 @@ escape_read(pm_parser_t *parser, pm_buffer_t *buffer, pm_buffer_t *regular_expre
|
|
9770
9769
|
|
9771
9770
|
size_t whitespace;
|
9772
9771
|
while (true) {
|
9773
|
-
if ((whitespace =
|
9772
|
+
if ((whitespace = pm_strspn_inline_whitespace(parser->current.end, parser->end - parser->current.end)) > 0) {
|
9774
9773
|
parser->current.end += whitespace;
|
9775
9774
|
} else if (peek(parser) == '\\' && peek_offset(parser, 1) == 'n') {
|
9776
9775
|
// This is super hacky, but it gets us nicer error
|
@@ -9818,7 +9817,7 @@ escape_read(pm_parser_t *parser, pm_buffer_t *buffer, pm_buffer_t *regular_expre
|
|
9818
9817
|
uint32_t value = escape_unicode(parser, unicode_start, hexadecimal_length);
|
9819
9818
|
escape_write_unicode(parser, buffer, flags, unicode_start, parser->current.end, value);
|
9820
9819
|
|
9821
|
-
parser->current.end +=
|
9820
|
+
parser->current.end += pm_strspn_inline_whitespace(parser->current.end, parser->end - parser->current.end);
|
9822
9821
|
}
|
9823
9822
|
|
9824
9823
|
// ?\u{nnnn} character literal should contain only one codepoint
|
@@ -10049,8 +10048,13 @@ escape_read(pm_parser_t *parser, pm_buffer_t *buffer, pm_buffer_t *regular_expre
|
|
10049
10048
|
PRISM_FALLTHROUGH
|
10050
10049
|
}
|
10051
10050
|
default: {
|
10051
|
+
if ((flags & (PM_ESCAPE_FLAG_CONTROL | PM_ESCAPE_FLAG_META)) && !char_is_ascii_printable(peeked)) {
|
10052
|
+
size_t width = parser->encoding->char_width(parser->current.end, parser->end - parser->current.end);
|
10053
|
+
pm_parser_err(parser, parser->current.start, parser->current.end + width, PM_ERR_ESCAPE_INVALID_META);
|
10054
|
+
return;
|
10055
|
+
}
|
10052
10056
|
if (parser->current.end < parser->end) {
|
10053
|
-
escape_write_escape_encoded(parser, buffer);
|
10057
|
+
escape_write_escape_encoded(parser, buffer, regular_expression_buffer, flags);
|
10054
10058
|
} else {
|
10055
10059
|
pm_parser_err_current(parser, PM_ERR_INVALID_ESCAPE_CHARACTER);
|
10056
10060
|
}
|
@@ -10123,7 +10127,7 @@ lex_question_mark(pm_parser_t *parser) {
|
|
10123
10127
|
!(parser->encoding->alnum_char(parser->current.end, parser->end - parser->current.end) || peek(parser) == '_') ||
|
10124
10128
|
(
|
10125
10129
|
(parser->current.end + encoding_width >= parser->end) ||
|
10126
|
-
!char_is_identifier(parser, parser->current.end + encoding_width)
|
10130
|
+
!char_is_identifier(parser, parser->current.end + encoding_width, parser->end - (parser->current.end + encoding_width))
|
10127
10131
|
)
|
10128
10132
|
) {
|
10129
10133
|
lex_state_set(parser, PM_LEX_STATE_END);
|
@@ -10143,21 +10147,22 @@ lex_question_mark(pm_parser_t *parser) {
|
|
10143
10147
|
static pm_token_type_t
|
10144
10148
|
lex_at_variable(pm_parser_t *parser) {
|
10145
10149
|
pm_token_type_t type = match(parser, '@') ? PM_TOKEN_CLASS_VARIABLE : PM_TOKEN_INSTANCE_VARIABLE;
|
10146
|
-
|
10150
|
+
const uint8_t *end = parser->end;
|
10147
10151
|
|
10148
|
-
|
10152
|
+
size_t width;
|
10153
|
+
if ((width = char_is_identifier_start(parser, parser->current.end, end - parser->current.end)) > 0) {
|
10149
10154
|
parser->current.end += width;
|
10150
10155
|
|
10151
|
-
while (
|
10156
|
+
while ((width = char_is_identifier(parser, parser->current.end, end - parser->current.end)) > 0) {
|
10152
10157
|
parser->current.end += width;
|
10153
10158
|
}
|
10154
|
-
} else if (parser->current.end <
|
10159
|
+
} else if (parser->current.end < end && pm_char_is_decimal_digit(*parser->current.end)) {
|
10155
10160
|
pm_diagnostic_id_t diag_id = (type == PM_TOKEN_CLASS_VARIABLE) ? PM_ERR_INCOMPLETE_VARIABLE_CLASS : PM_ERR_INCOMPLETE_VARIABLE_INSTANCE;
|
10156
|
-
if (parser->version
|
10161
|
+
if (parser->version <= PM_OPTIONS_VERSION_CRUBY_3_3) {
|
10157
10162
|
diag_id = (type == PM_TOKEN_CLASS_VARIABLE) ? PM_ERR_INCOMPLETE_VARIABLE_CLASS_3_3 : PM_ERR_INCOMPLETE_VARIABLE_INSTANCE_3_3;
|
10158
10163
|
}
|
10159
10164
|
|
10160
|
-
size_t width = parser->encoding->char_width(parser->current.end,
|
10165
|
+
size_t width = parser->encoding->char_width(parser->current.end, end - parser->current.end);
|
10161
10166
|
PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, diag_id, (int) ((parser->current.end + width) - parser->current.start), (const char *) parser->current.start);
|
10162
10167
|
} else {
|
10163
10168
|
pm_diagnostic_id_t diag_id = (type == PM_TOKEN_CLASS_VARIABLE) ? PM_ERR_CLASS_VARIABLE_BARE : PM_ERR_INSTANCE_VARIABLE_BARE;
|
@@ -10829,14 +10834,37 @@ parser_lex(pm_parser_t *parser) {
|
|
10829
10834
|
following = next_newline(following, parser->end - following);
|
10830
10835
|
}
|
10831
10836
|
|
10832
|
-
// If the lex state was ignored,
|
10833
|
-
//
|
10837
|
+
// If the lex state was ignored, we will lex the
|
10838
|
+
// ignored newline.
|
10839
|
+
if (lex_state_ignored_p(parser)) {
|
10840
|
+
if (!lexed_comment) parser_lex_ignored_newline(parser);
|
10841
|
+
lexed_comment = false;
|
10842
|
+
goto lex_next_token;
|
10843
|
+
}
|
10844
|
+
|
10845
|
+
// If we hit a '.' or a '&.' we will lex the ignored
|
10846
|
+
// newline.
|
10847
|
+
if (following && (
|
10848
|
+
(peek_at(parser, following) == '.') ||
|
10849
|
+
(peek_at(parser, following) == '&' && peek_at(parser, following + 1) == '.')
|
10850
|
+
)) {
|
10851
|
+
if (!lexed_comment) parser_lex_ignored_newline(parser);
|
10852
|
+
lexed_comment = false;
|
10853
|
+
goto lex_next_token;
|
10854
|
+
}
|
10855
|
+
|
10856
|
+
|
10857
|
+
// If we are parsing as CRuby 3.5 or later and we
|
10858
|
+
// hit a '&&' or a '||' then we will lex the ignored
|
10859
|
+
// newline.
|
10834
10860
|
if (
|
10835
|
-
|
10836
|
-
|
10837
|
-
(peek_at(parser, following) == '
|
10838
|
-
(peek_at(parser, following) == '
|
10839
|
-
|
10861
|
+
(parser->version >= PM_OPTIONS_VERSION_CRUBY_3_5) &&
|
10862
|
+
following && (
|
10863
|
+
(peek_at(parser, following) == '&' && peek_at(parser, following + 1) == '&') ||
|
10864
|
+
(peek_at(parser, following) == '|' && peek_at(parser, following + 1) == '|') ||
|
10865
|
+
(peek_at(parser, following) == 'a' && peek_at(parser, following + 1) == 'n' && peek_at(parser, following + 2) == 'd' && !char_is_identifier(parser, following + 3, parser->end - (following + 3))) ||
|
10866
|
+
(peek_at(parser, following) == 'o' && peek_at(parser, following + 1) == 'r' && !char_is_identifier(parser, following + 2, parser->end - (following + 2)))
|
10867
|
+
)
|
10840
10868
|
) {
|
10841
10869
|
if (!lexed_comment) parser_lex_ignored_newline(parser);
|
10842
10870
|
lexed_comment = false;
|
@@ -10876,6 +10904,63 @@ parser_lex(pm_parser_t *parser) {
|
|
10876
10904
|
parser->next_start = NULL;
|
10877
10905
|
LEX(PM_TOKEN_AMPERSAND_DOT);
|
10878
10906
|
}
|
10907
|
+
|
10908
|
+
if (parser->version >= PM_OPTIONS_VERSION_CRUBY_3_5) {
|
10909
|
+
// If we hit an && then we are in a logical chain
|
10910
|
+
// and we need to return the logical operator.
|
10911
|
+
if (peek_at(parser, next_content) == '&' && peek_at(parser, next_content + 1) == '&') {
|
10912
|
+
if (!lexed_comment) parser_lex_ignored_newline(parser);
|
10913
|
+
lex_state_set(parser, PM_LEX_STATE_BEG);
|
10914
|
+
parser->current.start = next_content;
|
10915
|
+
parser->current.end = next_content + 2;
|
10916
|
+
parser->next_start = NULL;
|
10917
|
+
LEX(PM_TOKEN_AMPERSAND_AMPERSAND);
|
10918
|
+
}
|
10919
|
+
|
10920
|
+
// If we hit a || then we are in a logical chain and
|
10921
|
+
// we need to return the logical operator.
|
10922
|
+
if (peek_at(parser, next_content) == '|' && peek_at(parser, next_content + 1) == '|') {
|
10923
|
+
if (!lexed_comment) parser_lex_ignored_newline(parser);
|
10924
|
+
lex_state_set(parser, PM_LEX_STATE_BEG);
|
10925
|
+
parser->current.start = next_content;
|
10926
|
+
parser->current.end = next_content + 2;
|
10927
|
+
parser->next_start = NULL;
|
10928
|
+
LEX(PM_TOKEN_PIPE_PIPE);
|
10929
|
+
}
|
10930
|
+
|
10931
|
+
// If we hit an 'and' then we are in a logical chain
|
10932
|
+
// and we need to return the logical operator.
|
10933
|
+
if (
|
10934
|
+
peek_at(parser, next_content) == 'a' &&
|
10935
|
+
peek_at(parser, next_content + 1) == 'n' &&
|
10936
|
+
peek_at(parser, next_content + 2) == 'd' &&
|
10937
|
+
!char_is_identifier(parser, next_content + 3, parser->end - (next_content + 3))
|
10938
|
+
) {
|
10939
|
+
if (!lexed_comment) parser_lex_ignored_newline(parser);
|
10940
|
+
lex_state_set(parser, PM_LEX_STATE_BEG);
|
10941
|
+
parser->current.start = next_content;
|
10942
|
+
parser->current.end = next_content + 3;
|
10943
|
+
parser->next_start = NULL;
|
10944
|
+
parser->command_start = true;
|
10945
|
+
LEX(PM_TOKEN_KEYWORD_AND);
|
10946
|
+
}
|
10947
|
+
|
10948
|
+
// If we hit a 'or' then we are in a logical chain
|
10949
|
+
// and we need to return the logical operator.
|
10950
|
+
if (
|
10951
|
+
peek_at(parser, next_content) == 'o' &&
|
10952
|
+
peek_at(parser, next_content + 1) == 'r' &&
|
10953
|
+
!char_is_identifier(parser, next_content + 2, parser->end - (next_content + 2))
|
10954
|
+
) {
|
10955
|
+
if (!lexed_comment) parser_lex_ignored_newline(parser);
|
10956
|
+
lex_state_set(parser, PM_LEX_STATE_BEG);
|
10957
|
+
parser->current.start = next_content;
|
10958
|
+
parser->current.end = next_content + 2;
|
10959
|
+
parser->next_start = NULL;
|
10960
|
+
parser->command_start = true;
|
10961
|
+
LEX(PM_TOKEN_KEYWORD_OR);
|
10962
|
+
}
|
10963
|
+
}
|
10879
10964
|
}
|
10880
10965
|
|
10881
10966
|
// At this point we know this is a regular newline, and we can set the
|
@@ -11145,13 +11230,13 @@ parser_lex(pm_parser_t *parser) {
|
|
11145
11230
|
|
11146
11231
|
if (parser->current.end >= parser->end) {
|
11147
11232
|
parser->current.end = end;
|
11148
|
-
} else if (quote == PM_HEREDOC_QUOTE_NONE && (width = char_is_identifier(parser, parser->current.end)) == 0) {
|
11233
|
+
} else if (quote == PM_HEREDOC_QUOTE_NONE && (width = char_is_identifier(parser, parser->current.end, parser->end - parser->current.end)) == 0) {
|
11149
11234
|
parser->current.end = end;
|
11150
11235
|
} else {
|
11151
11236
|
if (quote == PM_HEREDOC_QUOTE_NONE) {
|
11152
11237
|
parser->current.end += width;
|
11153
11238
|
|
11154
|
-
while ((parser->current.end
|
11239
|
+
while ((width = char_is_identifier(parser, parser->current.end, parser->end - parser->current.end))) {
|
11155
11240
|
parser->current.end += width;
|
11156
11241
|
}
|
11157
11242
|
} else {
|
@@ -11336,7 +11421,7 @@ parser_lex(pm_parser_t *parser) {
|
|
11336
11421
|
} else {
|
11337
11422
|
const uint8_t delim = peek_offset(parser, 1);
|
11338
11423
|
|
11339
|
-
if ((delim != '\'') && (delim != '"') && !char_is_identifier(parser, parser->current.end + 1)) {
|
11424
|
+
if ((delim != '\'') && (delim != '"') && !char_is_identifier(parser, parser->current.end + 1, parser->end - (parser->current.end + 1))) {
|
11340
11425
|
pm_parser_warn_token(parser, &parser->current, PM_WARN_AMBIGUOUS_PREFIX_AMPERSAND);
|
11341
11426
|
}
|
11342
11427
|
}
|
@@ -11774,7 +11859,7 @@ parser_lex(pm_parser_t *parser) {
|
|
11774
11859
|
|
11775
11860
|
default: {
|
11776
11861
|
if (*parser->current.start != '_') {
|
11777
|
-
size_t width = char_is_identifier_start(parser, parser->current.start);
|
11862
|
+
size_t width = char_is_identifier_start(parser, parser->current.start, parser->end - parser->current.start);
|
11778
11863
|
|
11779
11864
|
// If this isn't the beginning of an identifier, then
|
11780
11865
|
// it's an invalid token as we've exhausted all of the
|
@@ -12965,7 +13050,7 @@ typedef struct {
|
|
12965
13050
|
|
12966
13051
|
pm_binding_powers_t pm_binding_powers[PM_TOKEN_MAXIMUM] = {
|
12967
13052
|
// rescue
|
12968
|
-
[PM_TOKEN_KEYWORD_RESCUE_MODIFIER] =
|
13053
|
+
[PM_TOKEN_KEYWORD_RESCUE_MODIFIER] = { PM_BINDING_POWER_MODIFIER_RESCUE, PM_BINDING_POWER_COMPOSITION, true, false },
|
12969
13054
|
|
12970
13055
|
// if unless until while
|
12971
13056
|
[PM_TOKEN_KEYWORD_IF_MODIFIER] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_MODIFIER),
|
@@ -13122,14 +13207,6 @@ match8(const pm_parser_t *parser, pm_token_type_t type1, pm_token_type_t type2,
|
|
13122
13207
|
return match1(parser, type1) || match1(parser, type2) || match1(parser, type3) || match1(parser, type4) || match1(parser, type5) || match1(parser, type6) || match1(parser, type7) || match1(parser, type8);
|
13123
13208
|
}
|
13124
13209
|
|
13125
|
-
/**
|
13126
|
-
* Returns true if the current token is any of the nine given types.
|
13127
|
-
*/
|
13128
|
-
static inline bool
|
13129
|
-
match9(const pm_parser_t *parser, pm_token_type_t type1, pm_token_type_t type2, pm_token_type_t type3, pm_token_type_t type4, pm_token_type_t type5, pm_token_type_t type6, pm_token_type_t type7, pm_token_type_t type8, pm_token_type_t type9) {
|
13130
|
-
return match1(parser, type1) || match1(parser, type2) || match1(parser, type3) || match1(parser, type4) || match1(parser, type5) || match1(parser, type6) || match1(parser, type7) || match1(parser, type8) || match1(parser, type9);
|
13131
|
-
}
|
13132
|
-
|
13133
13210
|
/**
|
13134
13211
|
* If the current token is of the specified type, lex forward by one token and
|
13135
13212
|
* return true. Otherwise, return false. For example:
|
@@ -13708,7 +13785,7 @@ parse_write(pm_parser_t *parser, pm_node_t *target, pm_token_t *operator, pm_nod
|
|
13708
13785
|
return target;
|
13709
13786
|
}
|
13710
13787
|
|
13711
|
-
if (char_is_identifier_start(parser, call->message_loc.start)) {
|
13788
|
+
if (char_is_identifier_start(parser, call->message_loc.start, parser->end - call->message_loc.start)) {
|
13712
13789
|
// When we get here, we have a method call, because it was
|
13713
13790
|
// previously marked as a method call but now we have an =. This
|
13714
13791
|
// looks like:
|
@@ -13936,6 +14013,15 @@ parse_statements(pm_parser_t *parser, pm_context_t context, uint16_t depth) {
|
|
13936
14013
|
if (PM_NODE_TYPE_P(node, PM_MISSING_NODE)) {
|
13937
14014
|
parser_lex(parser);
|
13938
14015
|
|
14016
|
+
// If we are at the end of the file, then we need to stop parsing
|
14017
|
+
// the statements entirely at this point. Mark the parser as
|
14018
|
+
// recovering, as we know that EOF closes the top-level context, and
|
14019
|
+
// then break out of the loop.
|
14020
|
+
if (match1(parser, PM_TOKEN_EOF)) {
|
14021
|
+
parser->recovering = true;
|
14022
|
+
break;
|
14023
|
+
}
|
14024
|
+
|
13939
14025
|
while (accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON));
|
13940
14026
|
if (context_terminator(context, &parser->current)) break;
|
13941
14027
|
} else if (!accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_EOF)) {
|
@@ -14642,7 +14728,7 @@ parse_parameters(
|
|
14642
14728
|
parser_lex(parser);
|
14643
14729
|
|
14644
14730
|
pm_constant_id_t name_id = pm_parser_constant_id_token(parser, &name);
|
14645
|
-
uint32_t reads = parser->version
|
14731
|
+
uint32_t reads = parser->version <= PM_OPTIONS_VERSION_CRUBY_3_3 ? pm_locals_reads(&parser->current_scope->locals, name_id) : 0;
|
14646
14732
|
|
14647
14733
|
if (accepts_blocks_in_defaults) pm_accepts_block_stack_push(parser, true);
|
14648
14734
|
pm_node_t *value = parse_value_expression(parser, binding_power, false, false, PM_ERR_PARAMETER_NO_DEFAULT, (uint16_t) (depth + 1));
|
@@ -14658,7 +14744,7 @@ parse_parameters(
|
|
14658
14744
|
// If the value of the parameter increased the number of
|
14659
14745
|
// reads of that parameter, then we need to warn that we
|
14660
14746
|
// have a circular definition.
|
14661
|
-
if ((parser->version
|
14747
|
+
if ((parser->version <= PM_OPTIONS_VERSION_CRUBY_3_3) && (pm_locals_reads(&parser->current_scope->locals, name_id) != reads)) {
|
14662
14748
|
PM_PARSER_ERR_TOKEN_FORMAT_CONTENT(parser, name, PM_ERR_PARAMETER_CIRCULAR);
|
14663
14749
|
}
|
14664
14750
|
|
@@ -14743,13 +14829,13 @@ parse_parameters(
|
|
14743
14829
|
|
14744
14830
|
if (token_begins_expression_p(parser->current.type)) {
|
14745
14831
|
pm_constant_id_t name_id = pm_parser_constant_id_token(parser, &local);
|
14746
|
-
uint32_t reads = parser->version
|
14832
|
+
uint32_t reads = parser->version <= PM_OPTIONS_VERSION_CRUBY_3_3 ? pm_locals_reads(&parser->current_scope->locals, name_id) : 0;
|
14747
14833
|
|
14748
14834
|
if (accepts_blocks_in_defaults) pm_accepts_block_stack_push(parser, true);
|
14749
14835
|
pm_node_t *value = parse_value_expression(parser, binding_power, false, false, PM_ERR_PARAMETER_NO_DEFAULT_KW, (uint16_t) (depth + 1));
|
14750
14836
|
if (accepts_blocks_in_defaults) pm_accepts_block_stack_pop(parser);
|
14751
14837
|
|
14752
|
-
if (parser->version
|
14838
|
+
if (parser->version <= PM_OPTIONS_VERSION_CRUBY_3_3 && (pm_locals_reads(&parser->current_scope->locals, name_id) != reads)) {
|
14753
14839
|
PM_PARSER_ERR_TOKEN_FORMAT_CONTENT(parser, local, PM_ERR_PARAMETER_CIRCULAR);
|
14754
14840
|
}
|
14755
14841
|
|
@@ -15051,8 +15137,8 @@ parse_rescues(pm_parser_t *parser, size_t opening_newline_index, const pm_token_
|
|
15051
15137
|
case PM_TOKEN_NEWLINE:
|
15052
15138
|
case PM_TOKEN_SEMICOLON:
|
15053
15139
|
case PM_TOKEN_KEYWORD_THEN:
|
15054
|
-
// Here we have a terminator for the rescue keyword, in which
|
15055
|
-
// going to just continue on.
|
15140
|
+
// Here we have a terminator for the rescue keyword, in which
|
15141
|
+
// case we're going to just continue on.
|
15056
15142
|
break;
|
15057
15143
|
default: {
|
15058
15144
|
if (token_begins_expression_p(parser->current.type) || match1(parser, PM_TOKEN_USTAR)) {
|
@@ -15084,9 +15170,12 @@ parse_rescues(pm_parser_t *parser, size_t opening_newline_index, const pm_token_
|
|
15084
15170
|
}
|
15085
15171
|
|
15086
15172
|
if (accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON)) {
|
15087
|
-
accept1(parser, PM_TOKEN_KEYWORD_THEN)
|
15173
|
+
if (accept1(parser, PM_TOKEN_KEYWORD_THEN)) {
|
15174
|
+
rescue->then_keyword_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(&parser->previous);
|
15175
|
+
}
|
15088
15176
|
} else {
|
15089
15177
|
expect1(parser, PM_TOKEN_KEYWORD_THEN, PM_ERR_RESCUE_TERM);
|
15178
|
+
rescue->then_keyword_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(&parser->previous);
|
15090
15179
|
}
|
15091
15180
|
|
15092
15181
|
if (!match3(parser, PM_TOKEN_KEYWORD_ELSE, PM_TOKEN_KEYWORD_ENSURE, PM_TOKEN_KEYWORD_END)) {
|
@@ -16450,7 +16539,7 @@ parse_variable(pm_parser_t *parser) {
|
|
16450
16539
|
pm_node_list_append(¤t_scope->implicit_parameters, node);
|
16451
16540
|
|
16452
16541
|
return node;
|
16453
|
-
} else if ((parser->version
|
16542
|
+
} else if ((parser->version >= PM_OPTIONS_VERSION_CRUBY_3_4) && pm_token_is_it(parser->previous.start, parser->previous.end)) {
|
16454
16543
|
pm_node_t *node = (pm_node_t *) pm_it_local_variable_read_node_create(parser, &parser->previous);
|
16455
16544
|
pm_node_list_append(¤t_scope->implicit_parameters, node);
|
16456
16545
|
|
@@ -16802,6 +16891,10 @@ parse_strings(pm_parser_t *parser, pm_node_t *current, bool accepts_label, uint1
|
|
16802
16891
|
// If we haven't already created our container for concatenation,
|
16803
16892
|
// we'll do that now.
|
16804
16893
|
if (!concating) {
|
16894
|
+
if (!PM_NODE_TYPE_P(current, PM_STRING_NODE) && !PM_NODE_TYPE_P(current, PM_INTERPOLATED_STRING_NODE)) {
|
16895
|
+
pm_parser_err_node(parser, current, PM_ERR_STRING_CONCATENATION);
|
16896
|
+
}
|
16897
|
+
|
16805
16898
|
concating = true;
|
16806
16899
|
pm_token_t bounds = not_provided(parser);
|
16807
16900
|
|
@@ -17040,7 +17133,7 @@ pm_slice_is_valid_local(const pm_parser_t *parser, const uint8_t *start, const u
|
|
17040
17133
|
if (length == 0) return false;
|
17041
17134
|
|
17042
17135
|
// First ensure that it starts with a valid identifier starting character.
|
17043
|
-
size_t width = char_is_identifier_start(parser, start);
|
17136
|
+
size_t width = char_is_identifier_start(parser, start, end - start);
|
17044
17137
|
if (width == 0) return false;
|
17045
17138
|
|
17046
17139
|
// Next, ensure that it's not an uppercase character.
|
@@ -17053,7 +17146,7 @@ pm_slice_is_valid_local(const pm_parser_t *parser, const uint8_t *start, const u
|
|
17053
17146
|
// Next, iterate through all of the bytes of the string to ensure that they
|
17054
17147
|
// are all valid identifier characters.
|
17055
17148
|
const uint8_t *cursor = start + width;
|
17056
|
-
while ((
|
17149
|
+
while ((width = char_is_identifier(parser, cursor, end - cursor))) cursor += width;
|
17057
17150
|
return cursor == end;
|
17058
17151
|
}
|
17059
17152
|
|
@@ -17376,6 +17469,14 @@ parse_pattern_primitive(pm_parser_t *parser, pm_constant_id_list_t *captures, pm
|
|
17376
17469
|
// If we found a label, we need to immediately return to the caller.
|
17377
17470
|
if (pm_symbol_node_label_p(node)) return node;
|
17378
17471
|
|
17472
|
+
// Call nodes (arithmetic operations) are not allowed in patterns
|
17473
|
+
if (PM_NODE_TYPE(node) == PM_CALL_NODE) {
|
17474
|
+
pm_parser_err_node(parser, node, diag_id);
|
17475
|
+
pm_missing_node_t *missing_node = pm_missing_node_create(parser, node->location.start, node->location.end);
|
17476
|
+
pm_node_destroy(parser, node);
|
17477
|
+
return (pm_node_t *) missing_node;
|
17478
|
+
}
|
17479
|
+
|
17379
17480
|
// Now that we have a primitive, we need to check if it's part of a range.
|
17380
17481
|
if (accept2(parser, PM_TOKEN_DOT_DOT, PM_TOKEN_DOT_DOT_DOT)) {
|
17381
17482
|
pm_token_t operator = parser->previous;
|
@@ -17526,7 +17627,7 @@ parse_pattern_primitives(pm_parser_t *parser, pm_constant_id_list_t *captures, p
|
|
17526
17627
|
pm_node_t *body = parse_pattern(parser, captures, PM_PARSE_PATTERN_SINGLE, PM_ERR_PATTERN_EXPRESSION_AFTER_PAREN, (uint16_t) (depth + 1));
|
17527
17628
|
accept1(parser, PM_TOKEN_NEWLINE);
|
17528
17629
|
expect1(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_ERR_PATTERN_TERM_PAREN);
|
17529
|
-
pm_node_t *right = (pm_node_t *) pm_parentheses_node_create(parser, &opening, body, &parser->previous);
|
17630
|
+
pm_node_t *right = (pm_node_t *) pm_parentheses_node_create(parser, &opening, body, &parser->previous, 0);
|
17530
17631
|
|
17531
17632
|
if (node == NULL) {
|
17532
17633
|
node = right;
|
@@ -17658,7 +17759,7 @@ parse_pattern(pm_parser_t *parser, pm_constant_id_list_t *captures, uint8_t flag
|
|
17658
17759
|
// Gather up all of the patterns into the list.
|
17659
17760
|
while (accept1(parser, PM_TOKEN_COMMA)) {
|
17660
17761
|
// Break early here in case we have a trailing comma.
|
17661
|
-
if (
|
17762
|
+
if (match7(parser, PM_TOKEN_KEYWORD_THEN, PM_TOKEN_BRACE_RIGHT, PM_TOKEN_BRACKET_RIGHT, PM_TOKEN_PARENTHESIS_RIGHT, PM_TOKEN_SEMICOLON, PM_TOKEN_KEYWORD_AND, PM_TOKEN_KEYWORD_OR)) {
|
17662
17763
|
node = (pm_node_t *) pm_implicit_rest_node_create(parser, &parser->previous);
|
17663
17764
|
pm_node_list_append(&nodes, node);
|
17664
17765
|
trailing_rest = true;
|
@@ -18149,12 +18250,19 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
|
|
18149
18250
|
case PM_TOKEN_PARENTHESIS_LEFT:
|
18150
18251
|
case PM_TOKEN_PARENTHESIS_LEFT_PARENTHESES: {
|
18151
18252
|
pm_token_t opening = parser->current;
|
18253
|
+
pm_node_flags_t flags = 0;
|
18152
18254
|
|
18153
18255
|
pm_node_list_t current_block_exits = { 0 };
|
18154
18256
|
pm_node_list_t *previous_block_exits = push_block_exits(parser, ¤t_block_exits);
|
18155
18257
|
|
18156
18258
|
parser_lex(parser);
|
18157
|
-
while (
|
18259
|
+
while (true) {
|
18260
|
+
if (accept1(parser, PM_TOKEN_SEMICOLON)) {
|
18261
|
+
flags |= PM_PARENTHESES_NODE_FLAGS_MULTIPLE_STATEMENTS;
|
18262
|
+
} else if (!accept1(parser, PM_TOKEN_NEWLINE)) {
|
18263
|
+
break;
|
18264
|
+
}
|
18265
|
+
}
|
18158
18266
|
|
18159
18267
|
// If this is the end of the file or we match a right parenthesis, then
|
18160
18268
|
// we have an empty parentheses node, and we can immediately return.
|
@@ -18164,7 +18272,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
|
|
18164
18272
|
pop_block_exits(parser, previous_block_exits);
|
18165
18273
|
pm_node_list_free(¤t_block_exits);
|
18166
18274
|
|
18167
|
-
return (pm_node_t *) pm_parentheses_node_create(parser, &opening, NULL, &parser->previous);
|
18275
|
+
return (pm_node_t *) pm_parentheses_node_create(parser, &opening, NULL, &parser->previous, flags);
|
18168
18276
|
}
|
18169
18277
|
|
18170
18278
|
// Otherwise, we're going to parse the first statement in the list
|
@@ -18177,9 +18285,23 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
|
|
18177
18285
|
// Determine if this statement is followed by a terminator. In the
|
18178
18286
|
// case of a single statement, this is fine. But in the case of
|
18179
18287
|
// multiple statements it's required.
|
18180
|
-
bool terminator_found =
|
18288
|
+
bool terminator_found = false;
|
18289
|
+
|
18290
|
+
if (accept1(parser, PM_TOKEN_SEMICOLON)) {
|
18291
|
+
terminator_found = true;
|
18292
|
+
flags |= PM_PARENTHESES_NODE_FLAGS_MULTIPLE_STATEMENTS;
|
18293
|
+
} else if (accept1(parser, PM_TOKEN_NEWLINE)) {
|
18294
|
+
terminator_found = true;
|
18295
|
+
}
|
18296
|
+
|
18181
18297
|
if (terminator_found) {
|
18182
|
-
while (
|
18298
|
+
while (true) {
|
18299
|
+
if (accept1(parser, PM_TOKEN_SEMICOLON)) {
|
18300
|
+
flags |= PM_PARENTHESES_NODE_FLAGS_MULTIPLE_STATEMENTS;
|
18301
|
+
} else if (!accept1(parser, PM_TOKEN_NEWLINE)) {
|
18302
|
+
break;
|
18303
|
+
}
|
18304
|
+
}
|
18183
18305
|
}
|
18184
18306
|
|
18185
18307
|
// If we hit a right parenthesis, then we're done parsing the
|
@@ -18251,13 +18373,15 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
|
|
18251
18373
|
pm_statements_node_t *statements = pm_statements_node_create(parser);
|
18252
18374
|
pm_statements_node_body_append(parser, statements, statement, true);
|
18253
18375
|
|
18254
|
-
return (pm_node_t *) pm_parentheses_node_create(parser, &opening, (pm_node_t *) statements, &parser->previous);
|
18376
|
+
return (pm_node_t *) pm_parentheses_node_create(parser, &opening, (pm_node_t *) statements, &parser->previous, flags);
|
18255
18377
|
}
|
18256
18378
|
|
18257
18379
|
// If we have more than one statement in the set of parentheses,
|
18258
18380
|
// then we are going to parse all of them as a list of statements.
|
18259
18381
|
// We'll do that here.
|
18260
18382
|
context_push(parser, PM_CONTEXT_PARENS);
|
18383
|
+
flags |= PM_PARENTHESES_NODE_FLAGS_MULTIPLE_STATEMENTS;
|
18384
|
+
|
18261
18385
|
pm_statements_node_t *statements = pm_statements_node_create(parser);
|
18262
18386
|
pm_statements_node_body_append(parser, statements, statement, true);
|
18263
18387
|
|
@@ -18334,7 +18458,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
|
|
18334
18458
|
pm_node_list_free(¤t_block_exits);
|
18335
18459
|
|
18336
18460
|
pm_void_statements_check(parser, statements, true);
|
18337
|
-
return (pm_node_t *) pm_parentheses_node_create(parser, &opening, (pm_node_t *) statements, &parser->previous);
|
18461
|
+
return (pm_node_t *) pm_parentheses_node_create(parser, &opening, (pm_node_t *) statements, &parser->previous, flags);
|
18338
18462
|
}
|
18339
18463
|
case PM_TOKEN_BRACE_LEFT: {
|
18340
18464
|
// If we were passed a current_hash_keys via the parser, then that
|
@@ -18526,17 +18650,11 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
|
|
18526
18650
|
call->closing_loc = arguments.closing_loc;
|
18527
18651
|
call->block = arguments.block;
|
18528
18652
|
|
18529
|
-
|
18530
|
-
|
18531
|
-
|
18532
|
-
if (arguments.arguments != NULL) {
|
18533
|
-
call->base.location.end = arguments.arguments->base.location.end;
|
18534
|
-
} else {
|
18535
|
-
call->base.location.end = call->message_loc.end;
|
18536
|
-
}
|
18537
|
-
} else {
|
18538
|
-
call->base.location.end = arguments.closing_loc.end;
|
18653
|
+
const uint8_t *end = pm_arguments_end(&arguments);
|
18654
|
+
if (!end) {
|
18655
|
+
end = call->message_loc.end;
|
18539
18656
|
}
|
18657
|
+
call->base.location.end = end;
|
18540
18658
|
}
|
18541
18659
|
} else {
|
18542
18660
|
// Otherwise, we know the identifier is in the local table. This
|
@@ -19064,7 +19182,13 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
|
|
19064
19182
|
pm_binding_power_t binding_power = pm_binding_powers[parser->current.type].left;
|
19065
19183
|
|
19066
19184
|
if (binding_power == PM_BINDING_POWER_UNSET || binding_power >= PM_BINDING_POWER_RANGE) {
|
19185
|
+
pm_token_t next = parser->current;
|
19067
19186
|
parse_arguments(parser, &arguments, false, PM_TOKEN_EOF, (uint16_t) (depth + 1));
|
19187
|
+
|
19188
|
+
// Reject `foo && return bar`.
|
19189
|
+
if (!accepts_command_call && arguments.arguments != NULL) {
|
19190
|
+
PM_PARSER_ERR_TOKEN_FORMAT(parser, next, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_type_human(next.type));
|
19191
|
+
}
|
19068
19192
|
}
|
19069
19193
|
}
|
19070
19194
|
|
@@ -19380,7 +19504,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
|
|
19380
19504
|
expect2(parser, PM_TOKEN_DOT, PM_TOKEN_COLON_COLON, PM_ERR_DEF_RECEIVER_TERM);
|
19381
19505
|
|
19382
19506
|
operator = parser->previous;
|
19383
|
-
receiver = (pm_node_t *) pm_parentheses_node_create(parser, &lparen, expression, &rparen);
|
19507
|
+
receiver = (pm_node_t *) pm_parentheses_node_create(parser, &lparen, expression, &rparen, 0);
|
19384
19508
|
|
19385
19509
|
// To push `PM_CONTEXT_DEF_PARAMS` again is for the same
|
19386
19510
|
// reason as described the above.
|
@@ -19461,13 +19585,21 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
|
|
19461
19585
|
pm_do_loop_stack_push(parser, false);
|
19462
19586
|
statements = (pm_node_t *) pm_statements_node_create(parser);
|
19463
19587
|
|
19464
|
-
|
19588
|
+
bool allow_command_call;
|
19589
|
+
if (parser->version >= PM_OPTIONS_VERSION_CRUBY_3_5) {
|
19590
|
+
allow_command_call = accepts_command_call;
|
19591
|
+
} else {
|
19592
|
+
// Allow `def foo = puts "Hello"` but not `private def foo = puts "Hello"`
|
19593
|
+
allow_command_call = binding_power == PM_BINDING_POWER_ASSIGNMENT || binding_power < PM_BINDING_POWER_COMPOSITION;
|
19594
|
+
}
|
19595
|
+
|
19596
|
+
pm_node_t *statement = parse_expression(parser, PM_BINDING_POWER_DEFINED + 1, allow_command_call, false, PM_ERR_DEF_ENDLESS, (uint16_t) (depth + 1));
|
19465
19597
|
|
19466
19598
|
if (accept1(parser, PM_TOKEN_KEYWORD_RESCUE_MODIFIER)) {
|
19467
19599
|
context_push(parser, PM_CONTEXT_RESCUE_MODIFIER);
|
19468
19600
|
|
19469
19601
|
pm_token_t rescue_keyword = parser->previous;
|
19470
|
-
pm_node_t *value = parse_expression(parser,
|
19602
|
+
pm_node_t *value = parse_expression(parser, pm_binding_powers[PM_TOKEN_KEYWORD_RESCUE_MODIFIER].right, false, false, PM_ERR_RESCUE_MODIFIER_VALUE, (uint16_t) (depth + 1));
|
19471
19603
|
context_pop(parser);
|
19472
19604
|
|
19473
19605
|
statement = (pm_node_t *) pm_rescue_modifier_node_create(parser, statement, &rescue_keyword, value);
|
@@ -19548,18 +19680,27 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
|
|
19548
19680
|
pm_token_t lparen;
|
19549
19681
|
pm_token_t rparen;
|
19550
19682
|
pm_node_t *expression;
|
19683
|
+
|
19551
19684
|
context_push(parser, PM_CONTEXT_DEFINED);
|
19685
|
+
bool newline = accept1(parser, PM_TOKEN_NEWLINE);
|
19552
19686
|
|
19553
19687
|
if (accept1(parser, PM_TOKEN_PARENTHESIS_LEFT)) {
|
19554
19688
|
lparen = parser->previous;
|
19555
|
-
expression = parse_expression(parser, PM_BINDING_POWER_COMPOSITION, true, false, PM_ERR_DEFINED_EXPRESSION, (uint16_t) (depth + 1));
|
19556
19689
|
|
19557
|
-
if (parser
|
19690
|
+
if (newline && accept1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) {
|
19691
|
+
expression = (pm_node_t *) pm_parentheses_node_create(parser, &lparen, NULL, &parser->previous, 0);
|
19692
|
+
lparen = not_provided(parser);
|
19558
19693
|
rparen = not_provided(parser);
|
19559
19694
|
} else {
|
19560
|
-
|
19561
|
-
|
19562
|
-
|
19695
|
+
expression = parse_expression(parser, PM_BINDING_POWER_COMPOSITION, true, false, PM_ERR_DEFINED_EXPRESSION, (uint16_t) (depth + 1));
|
19696
|
+
|
19697
|
+
if (parser->recovering) {
|
19698
|
+
rparen = not_provided(parser);
|
19699
|
+
} else {
|
19700
|
+
accept1(parser, PM_TOKEN_NEWLINE);
|
19701
|
+
expect1(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_ERR_EXPECT_RPAREN);
|
19702
|
+
rparen = parser->previous;
|
19703
|
+
}
|
19563
19704
|
}
|
19564
19705
|
} else {
|
19565
19706
|
lparen = not_provided(parser);
|
@@ -19707,14 +19848,29 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
|
|
19707
19848
|
pm_arguments_t arguments = { 0 };
|
19708
19849
|
pm_node_t *receiver = NULL;
|
19709
19850
|
|
19851
|
+
// If we do not accept a command call, then we also do not accept a
|
19852
|
+
// not without parentheses. In this case we need to reject this
|
19853
|
+
// syntax.
|
19854
|
+
if (!accepts_command_call && !match1(parser, PM_TOKEN_PARENTHESIS_LEFT)) {
|
19855
|
+
if (match1(parser, PM_TOKEN_PARENTHESIS_LEFT_PARENTHESES)) {
|
19856
|
+
pm_parser_err(parser, parser->previous.end, parser->previous.end + 1, PM_ERR_EXPECT_LPAREN_AFTER_NOT_LPAREN);
|
19857
|
+
} else {
|
19858
|
+
accept1(parser, PM_TOKEN_NEWLINE);
|
19859
|
+
pm_parser_err_current(parser, PM_ERR_EXPECT_LPAREN_AFTER_NOT_OTHER);
|
19860
|
+
}
|
19861
|
+
|
19862
|
+
return (pm_node_t *) pm_missing_node_create(parser, parser->current.start, parser->current.end);
|
19863
|
+
}
|
19864
|
+
|
19710
19865
|
accept1(parser, PM_TOKEN_NEWLINE);
|
19711
19866
|
|
19712
19867
|
if (accept1(parser, PM_TOKEN_PARENTHESIS_LEFT)) {
|
19713
|
-
|
19868
|
+
pm_token_t lparen = parser->previous;
|
19714
19869
|
|
19715
19870
|
if (accept1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) {
|
19716
|
-
|
19871
|
+
receiver = (pm_node_t *) pm_parentheses_node_create(parser, &lparen, NULL, &parser->previous, 0);
|
19717
19872
|
} else {
|
19873
|
+
arguments.opening_loc = PM_LOCATION_TOKEN_VALUE(&lparen);
|
19718
19874
|
receiver = parse_expression(parser, PM_BINDING_POWER_COMPOSITION, true, false, PM_ERR_NOT_EXPRESSION, (uint16_t) (depth + 1));
|
19719
19875
|
|
19720
19876
|
if (!parser->recovering) {
|
@@ -20687,7 +20843,7 @@ parse_assignment_value(pm_parser_t *parser, pm_binding_power_t previous_binding_
|
|
20687
20843
|
pm_token_t rescue = parser->current;
|
20688
20844
|
parser_lex(parser);
|
20689
20845
|
|
20690
|
-
pm_node_t *right = parse_expression(parser,
|
20846
|
+
pm_node_t *right = parse_expression(parser, pm_binding_powers[PM_TOKEN_KEYWORD_RESCUE_MODIFIER].right, false, false, PM_ERR_RESCUE_MODIFIER_VALUE, (uint16_t) (depth + 1));
|
20691
20847
|
context_pop(parser);
|
20692
20848
|
|
20693
20849
|
return (pm_node_t *) pm_rescue_modifier_node_create(parser, value, &rescue, right);
|
@@ -20793,7 +20949,7 @@ parse_assignment_values(pm_parser_t *parser, pm_binding_power_t previous_binding
|
|
20793
20949
|
}
|
20794
20950
|
}
|
20795
20951
|
|
20796
|
-
pm_node_t *right = parse_expression(parser,
|
20952
|
+
pm_node_t *right = parse_expression(parser, pm_binding_powers[PM_TOKEN_KEYWORD_RESCUE_MODIFIER].right, accepts_command_call_inner, false, PM_ERR_RESCUE_MODIFIER_VALUE, (uint16_t) (depth + 1));
|
20797
20953
|
context_pop(parser);
|
20798
20954
|
|
20799
20955
|
return (pm_node_t *) pm_rescue_modifier_node_create(parser, value, &rescue, right);
|
@@ -20849,6 +21005,123 @@ typedef struct {
|
|
20849
21005
|
bool shared;
|
20850
21006
|
} parse_regular_expression_named_capture_data_t;
|
20851
21007
|
|
21008
|
+
static inline const uint8_t *
|
21009
|
+
pm_named_capture_escape_hex(pm_buffer_t *unescaped, const uint8_t *cursor, const uint8_t *end) {
|
21010
|
+
cursor++;
|
21011
|
+
|
21012
|
+
if (cursor < end && pm_char_is_hexadecimal_digit(*cursor)) {
|
21013
|
+
uint8_t value = escape_hexadecimal_digit(*cursor);
|
21014
|
+
cursor++;
|
21015
|
+
|
21016
|
+
if (cursor < end && pm_char_is_hexadecimal_digit(*cursor)) {
|
21017
|
+
value = (uint8_t) ((value << 4) | escape_hexadecimal_digit(*cursor));
|
21018
|
+
cursor++;
|
21019
|
+
}
|
21020
|
+
|
21021
|
+
pm_buffer_append_byte(unescaped, value);
|
21022
|
+
} else {
|
21023
|
+
pm_buffer_append_string(unescaped, "\\x", 2);
|
21024
|
+
}
|
21025
|
+
|
21026
|
+
return cursor;
|
21027
|
+
}
|
21028
|
+
|
21029
|
+
static inline const uint8_t *
|
21030
|
+
pm_named_capture_escape_octal(pm_buffer_t *unescaped, const uint8_t *cursor, const uint8_t *end) {
|
21031
|
+
uint8_t value = (uint8_t) (*cursor - '0');
|
21032
|
+
cursor++;
|
21033
|
+
|
21034
|
+
if (cursor < end && pm_char_is_octal_digit(*cursor)) {
|
21035
|
+
value = ((uint8_t) (value << 3)) | ((uint8_t) (*cursor - '0'));
|
21036
|
+
cursor++;
|
21037
|
+
|
21038
|
+
if (cursor < end && pm_char_is_octal_digit(*cursor)) {
|
21039
|
+
value = ((uint8_t) (value << 3)) | ((uint8_t) (*cursor - '0'));
|
21040
|
+
cursor++;
|
21041
|
+
}
|
21042
|
+
}
|
21043
|
+
|
21044
|
+
pm_buffer_append_byte(unescaped, value);
|
21045
|
+
return cursor;
|
21046
|
+
}
|
21047
|
+
|
21048
|
+
static inline const uint8_t *
|
21049
|
+
pm_named_capture_escape_unicode(pm_parser_t *parser, pm_buffer_t *unescaped, const uint8_t *cursor, const uint8_t *end) {
|
21050
|
+
const uint8_t *start = cursor - 1;
|
21051
|
+
cursor++;
|
21052
|
+
|
21053
|
+
if (cursor >= end) {
|
21054
|
+
pm_buffer_append_string(unescaped, "\\u", 2);
|
21055
|
+
return cursor;
|
21056
|
+
}
|
21057
|
+
|
21058
|
+
if (*cursor != '{') {
|
21059
|
+
size_t length = pm_strspn_hexadecimal_digit(cursor, MIN(end - cursor, 4));
|
21060
|
+
uint32_t value = escape_unicode(parser, cursor, length);
|
21061
|
+
|
21062
|
+
if (!pm_buffer_append_unicode_codepoint(unescaped, value)) {
|
21063
|
+
pm_buffer_append_string(unescaped, (const char *) start, (size_t) ((cursor + length) - start));
|
21064
|
+
}
|
21065
|
+
|
21066
|
+
return cursor + length;
|
21067
|
+
}
|
21068
|
+
|
21069
|
+
cursor++;
|
21070
|
+
for (;;) {
|
21071
|
+
while (cursor < end && *cursor == ' ') cursor++;
|
21072
|
+
|
21073
|
+
if (cursor >= end) break;
|
21074
|
+
if (*cursor == '}') {
|
21075
|
+
cursor++;
|
21076
|
+
break;
|
21077
|
+
}
|
21078
|
+
|
21079
|
+
size_t length = pm_strspn_hexadecimal_digit(cursor, end - cursor);
|
21080
|
+
uint32_t value = escape_unicode(parser, cursor, length);
|
21081
|
+
|
21082
|
+
(void) pm_buffer_append_unicode_codepoint(unescaped, value);
|
21083
|
+
cursor += length;
|
21084
|
+
}
|
21085
|
+
|
21086
|
+
return cursor;
|
21087
|
+
}
|
21088
|
+
|
21089
|
+
static void
|
21090
|
+
pm_named_capture_escape(pm_parser_t *parser, pm_buffer_t *unescaped, const uint8_t *source, const size_t length, const uint8_t *cursor) {
|
21091
|
+
const uint8_t *end = source + length;
|
21092
|
+
pm_buffer_append_string(unescaped, (const char *) source, (size_t) (cursor - source));
|
21093
|
+
|
21094
|
+
for (;;) {
|
21095
|
+
if (++cursor >= end) {
|
21096
|
+
pm_buffer_append_byte(unescaped, '\\');
|
21097
|
+
return;
|
21098
|
+
}
|
21099
|
+
|
21100
|
+
switch (*cursor) {
|
21101
|
+
case 'x':
|
21102
|
+
cursor = pm_named_capture_escape_hex(unescaped, cursor, end);
|
21103
|
+
break;
|
21104
|
+
case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7':
|
21105
|
+
cursor = pm_named_capture_escape_octal(unescaped, cursor, end);
|
21106
|
+
break;
|
21107
|
+
case 'u':
|
21108
|
+
cursor = pm_named_capture_escape_unicode(parser, unescaped, cursor, end);
|
21109
|
+
break;
|
21110
|
+
default:
|
21111
|
+
pm_buffer_append_byte(unescaped, '\\');
|
21112
|
+
break;
|
21113
|
+
}
|
21114
|
+
|
21115
|
+
const uint8_t *next_cursor = pm_memchr(cursor, '\\', (size_t) (end - cursor), parser->encoding_changed, parser->encoding);
|
21116
|
+
if (next_cursor == NULL) break;
|
21117
|
+
|
21118
|
+
pm_buffer_append_string(unescaped, (const char *) cursor, (size_t) (next_cursor - cursor));
|
21119
|
+
cursor = next_cursor;
|
21120
|
+
}
|
21121
|
+
|
21122
|
+
pm_buffer_append_string(unescaped, (const char *) cursor, (size_t) (end - cursor));
|
21123
|
+
}
|
21124
|
+
|
20852
21125
|
/**
|
20853
21126
|
* This callback is called when the regular expression parser encounters a named
|
20854
21127
|
* capture group.
|
@@ -20863,13 +21136,32 @@ parse_regular_expression_named_capture(const pm_string_t *capture, void *data) {
|
|
20863
21136
|
|
20864
21137
|
const uint8_t *source = pm_string_source(capture);
|
20865
21138
|
size_t length = pm_string_length(capture);
|
21139
|
+
pm_buffer_t unescaped = { 0 };
|
21140
|
+
|
21141
|
+
// First, we need to handle escapes within the name of the capture group.
|
21142
|
+
// This is because regular expressions have three different representations
|
21143
|
+
// in prism. The first is the plain source code. The second is the
|
21144
|
+
// representation that will be sent to the regular expression engine, which
|
21145
|
+
// is the value of the "unescaped" field. This is poorly named, because it
|
21146
|
+
// actually still contains escapes, just a subset of them that the regular
|
21147
|
+
// expression engine knows how to handle. The third representation is fully
|
21148
|
+
// unescaped, which is what we need.
|
21149
|
+
const uint8_t *cursor = pm_memchr(source, '\\', length, parser->encoding_changed, parser->encoding);
|
21150
|
+
if (PRISM_UNLIKELY(cursor != NULL)) {
|
21151
|
+
pm_named_capture_escape(parser, &unescaped, source, length, cursor);
|
21152
|
+
source = (const uint8_t *) pm_buffer_value(&unescaped);
|
21153
|
+
length = pm_buffer_length(&unescaped);
|
21154
|
+
}
|
20866
21155
|
|
20867
21156
|
pm_location_t location;
|
20868
21157
|
pm_constant_id_t name;
|
20869
21158
|
|
20870
21159
|
// If the name of the capture group isn't a valid identifier, we do
|
20871
21160
|
// not add it to the local table.
|
20872
|
-
if (!pm_slice_is_valid_local(parser, source, source + length))
|
21161
|
+
if (!pm_slice_is_valid_local(parser, source, source + length)) {
|
21162
|
+
pm_buffer_free(&unescaped);
|
21163
|
+
return;
|
21164
|
+
}
|
20873
21165
|
|
20874
21166
|
if (callback_data->shared) {
|
20875
21167
|
// If the unescaped string is a slice of the source, then we can
|
@@ -20897,7 +21189,10 @@ parse_regular_expression_named_capture(const pm_string_t *capture, void *data) {
|
|
20897
21189
|
if ((depth = pm_parser_local_depth_constant_id(parser, name)) == -1) {
|
20898
21190
|
// If the local is not already a local but it is a keyword, then we
|
20899
21191
|
// do not want to add a capture for this.
|
20900
|
-
if (pm_local_is_keyword((const char *) source, length))
|
21192
|
+
if (pm_local_is_keyword((const char *) source, length)) {
|
21193
|
+
pm_buffer_free(&unescaped);
|
21194
|
+
return;
|
21195
|
+
}
|
20901
21196
|
|
20902
21197
|
// If the identifier is not already a local, then we will add it to
|
20903
21198
|
// the local table.
|
@@ -20915,6 +21210,8 @@ parse_regular_expression_named_capture(const pm_string_t *capture, void *data) {
|
|
20915
21210
|
pm_node_t *target = (pm_node_t *) pm_local_variable_target_node_create(parser, &location, name, depth == -1 ? 0 : (uint32_t) depth);
|
20916
21211
|
pm_node_list_append(&callback_data->match->targets, target);
|
20917
21212
|
}
|
21213
|
+
|
21214
|
+
pm_buffer_free(&unescaped);
|
20918
21215
|
}
|
20919
21216
|
|
20920
21217
|
/**
|
@@ -20966,6 +21263,13 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
|
|
20966
21263
|
}
|
20967
21264
|
PRISM_FALLTHROUGH
|
20968
21265
|
case PM_CASE_WRITABLE: {
|
21266
|
+
// When we have `it = value`, we need to add `it` as a local
|
21267
|
+
// variable before parsing the value, in case the value
|
21268
|
+
// references the variable.
|
21269
|
+
if (PM_NODE_TYPE_P(node, PM_IT_LOCAL_VARIABLE_READ_NODE)) {
|
21270
|
+
pm_parser_local_add_location(parser, node->location.start, node->location.end, 0);
|
21271
|
+
}
|
21272
|
+
|
20969
21273
|
parser_lex(parser);
|
20970
21274
|
pm_node_t *value = parse_assignment_values(parser, previous_binding_power, PM_NODE_TYPE_P(node, PM_MULTI_TARGET_NODE) ? PM_BINDING_POWER_MULTI_ASSIGNMENT + 1 : binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_EQUAL, (uint16_t) (depth + 1));
|
20971
21275
|
|
@@ -21055,7 +21359,23 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
|
|
21055
21359
|
pm_node_destroy(parser, node);
|
21056
21360
|
return result;
|
21057
21361
|
}
|
21362
|
+
case PM_IT_LOCAL_VARIABLE_READ_NODE: {
|
21363
|
+
pm_constant_id_t name = pm_parser_local_add_constant(parser, "it", 2);
|
21364
|
+
parser_lex(parser);
|
21365
|
+
|
21366
|
+
pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ, (uint16_t) (depth + 1));
|
21367
|
+
pm_node_t *result = (pm_node_t *) pm_local_variable_and_write_node_create(parser, node, &token, value, name, 0);
|
21368
|
+
|
21369
|
+
parse_target_implicit_parameter(parser, node);
|
21370
|
+
pm_node_destroy(parser, node);
|
21371
|
+
return result;
|
21372
|
+
}
|
21058
21373
|
case PM_LOCAL_VARIABLE_READ_NODE: {
|
21374
|
+
if (pm_token_is_numbered_parameter(node->location.start, node->location.end)) {
|
21375
|
+
PM_PARSER_ERR_FORMAT(parser, node->location.start, node->location.end, PM_ERR_PARAMETER_NUMBERED_RESERVED, node->location.start);
|
21376
|
+
parse_target_implicit_parameter(parser, node);
|
21377
|
+
}
|
21378
|
+
|
21059
21379
|
pm_local_variable_read_node_t *cast = (pm_local_variable_read_node_t *) node;
|
21060
21380
|
parser_lex(parser);
|
21061
21381
|
|
@@ -21173,7 +21493,23 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
|
|
21173
21493
|
pm_node_destroy(parser, node);
|
21174
21494
|
return result;
|
21175
21495
|
}
|
21496
|
+
case PM_IT_LOCAL_VARIABLE_READ_NODE: {
|
21497
|
+
pm_constant_id_t name = pm_parser_local_add_constant(parser, "it", 2);
|
21498
|
+
parser_lex(parser);
|
21499
|
+
|
21500
|
+
pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ, (uint16_t) (depth + 1));
|
21501
|
+
pm_node_t *result = (pm_node_t *) pm_local_variable_or_write_node_create(parser, node, &token, value, name, 0);
|
21502
|
+
|
21503
|
+
parse_target_implicit_parameter(parser, node);
|
21504
|
+
pm_node_destroy(parser, node);
|
21505
|
+
return result;
|
21506
|
+
}
|
21176
21507
|
case PM_LOCAL_VARIABLE_READ_NODE: {
|
21508
|
+
if (pm_token_is_numbered_parameter(node->location.start, node->location.end)) {
|
21509
|
+
PM_PARSER_ERR_FORMAT(parser, node->location.start, node->location.end, PM_ERR_PARAMETER_NUMBERED_RESERVED, node->location.start);
|
21510
|
+
parse_target_implicit_parameter(parser, node);
|
21511
|
+
}
|
21512
|
+
|
21177
21513
|
pm_local_variable_read_node_t *cast = (pm_local_variable_read_node_t *) node;
|
21178
21514
|
parser_lex(parser);
|
21179
21515
|
|
@@ -21301,7 +21637,23 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
|
|
21301
21637
|
pm_node_destroy(parser, node);
|
21302
21638
|
return result;
|
21303
21639
|
}
|
21640
|
+
case PM_IT_LOCAL_VARIABLE_READ_NODE: {
|
21641
|
+
pm_constant_id_t name = pm_parser_local_add_constant(parser, "it", 2);
|
21642
|
+
parser_lex(parser);
|
21643
|
+
|
21644
|
+
pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
|
21645
|
+
pm_node_t *result = (pm_node_t *) pm_local_variable_operator_write_node_create(parser, node, &token, value, name, 0);
|
21646
|
+
|
21647
|
+
parse_target_implicit_parameter(parser, node);
|
21648
|
+
pm_node_destroy(parser, node);
|
21649
|
+
return result;
|
21650
|
+
}
|
21304
21651
|
case PM_LOCAL_VARIABLE_READ_NODE: {
|
21652
|
+
if (pm_token_is_numbered_parameter(node->location.start, node->location.end)) {
|
21653
|
+
PM_PARSER_ERR_FORMAT(parser, node->location.start, node->location.end, PM_ERR_PARAMETER_NUMBERED_RESERVED, node->location.start);
|
21654
|
+
parse_target_implicit_parameter(parser, node);
|
21655
|
+
}
|
21656
|
+
|
21305
21657
|
pm_local_variable_read_node_t *cast = (pm_local_variable_read_node_t *) node;
|
21306
21658
|
parser_lex(parser);
|
21307
21659
|
|
@@ -21911,6 +22263,12 @@ parse_expression(pm_parser_t *parser, pm_binding_power_t binding_power, bool acc
|
|
21911
22263
|
) {
|
21912
22264
|
node = parse_expression_infix(parser, node, binding_power, current_binding_powers.right, accepts_command_call, (uint16_t) (depth + 1));
|
21913
22265
|
|
22266
|
+
if (context_terminator(parser->current_context->context, &parser->current)) {
|
22267
|
+
// If this token terminates the current context, then we need to
|
22268
|
+
// stop parsing the expression, as it has become a statement.
|
22269
|
+
return node;
|
22270
|
+
}
|
22271
|
+
|
21914
22272
|
switch (PM_NODE_TYPE(node)) {
|
21915
22273
|
case PM_MULTI_WRITE_NODE:
|
21916
22274
|
// Multi-write nodes are statements, and cannot be followed by
|
@@ -22035,6 +22393,10 @@ parse_expression(pm_parser_t *parser, pm_binding_power_t binding_power, bool acc
|
|
22035
22393
|
static pm_statements_node_t *
|
22036
22394
|
wrap_statements(pm_parser_t *parser, pm_statements_node_t *statements) {
|
22037
22395
|
if (PM_PARSER_COMMAND_LINE_OPTION_P(parser)) {
|
22396
|
+
if (statements == NULL) {
|
22397
|
+
statements = pm_statements_node_create(parser);
|
22398
|
+
}
|
22399
|
+
|
22038
22400
|
pm_arguments_node_t *arguments = pm_arguments_node_create(parser);
|
22039
22401
|
pm_arguments_node_arguments_append(
|
22040
22402
|
arguments,
|
@@ -22050,6 +22412,10 @@ wrap_statements(pm_parser_t *parser, pm_statements_node_t *statements) {
|
|
22050
22412
|
|
22051
22413
|
if (PM_PARSER_COMMAND_LINE_OPTION_N(parser)) {
|
22052
22414
|
if (PM_PARSER_COMMAND_LINE_OPTION_A(parser)) {
|
22415
|
+
if (statements == NULL) {
|
22416
|
+
statements = pm_statements_node_create(parser);
|
22417
|
+
}
|
22418
|
+
|
22053
22419
|
pm_arguments_node_t *arguments = pm_arguments_node_create(parser);
|
22054
22420
|
pm_arguments_node_arguments_append(
|
22055
22421
|
arguments,
|
@@ -22118,9 +22484,7 @@ parse_program(pm_parser_t *parser) {
|
|
22118
22484
|
parser_lex(parser);
|
22119
22485
|
pm_statements_node_t *statements = parse_statements(parser, PM_CONTEXT_MAIN, 0);
|
22120
22486
|
|
22121
|
-
if (statements
|
22122
|
-
statements = pm_statements_node_create(parser);
|
22123
|
-
} else if (!parser->parsing_eval) {
|
22487
|
+
if (statements != NULL && !parser->parsing_eval) {
|
22124
22488
|
// If we have statements, then the top-level statement should be
|
22125
22489
|
// explicitly checked as well. We have to do this here because
|
22126
22490
|
// everywhere else we check all but the last statement.
|
@@ -22132,13 +22496,6 @@ parse_program(pm_parser_t *parser) {
|
|
22132
22496
|
pm_locals_order(parser, &parser->current_scope->locals, &locals, true);
|
22133
22497
|
pm_parser_scope_pop(parser);
|
22134
22498
|
|
22135
|
-
// If this is an empty file, then we're still going to parse all of the
|
22136
|
-
// statements in order to gather up all of the comments and such. Here we'll
|
22137
|
-
// correct the location information.
|
22138
|
-
if (pm_statements_node_body_length(statements) == 0) {
|
22139
|
-
pm_statements_node_location_set(statements, parser->start, parser->start);
|
22140
|
-
}
|
22141
|
-
|
22142
22499
|
// At the top level, see if we need to wrap the statements in a program
|
22143
22500
|
// node with a while loop based on the options.
|
22144
22501
|
if (parser->command_line & (PM_OPTIONS_COMMAND_LINE_P | PM_OPTIONS_COMMAND_LINE_N)) {
|
@@ -22148,6 +22505,14 @@ parse_program(pm_parser_t *parser) {
|
|
22148
22505
|
pm_node_list_free(¤t_block_exits);
|
22149
22506
|
}
|
22150
22507
|
|
22508
|
+
// If this is an empty file, then we're still going to parse all of the
|
22509
|
+
// statements in order to gather up all of the comments and such. Here we'll
|
22510
|
+
// correct the location information.
|
22511
|
+
if (statements == NULL) {
|
22512
|
+
statements = pm_statements_node_create(parser);
|
22513
|
+
pm_statements_node_location_set(statements, parser->start, parser->start);
|
22514
|
+
}
|
22515
|
+
|
22151
22516
|
return (pm_node_t *) pm_program_node_create(parser, &locals, statements);
|
22152
22517
|
}
|
22153
22518
|
|
@@ -22341,7 +22706,7 @@ pm_parser_init(pm_parser_t *parser, const uint8_t *source, size_t size, const pm
|
|
22341
22706
|
|
22342
22707
|
// Scopes given from the outside are not allowed to have numbered
|
22343
22708
|
// parameters.
|
22344
|
-
parser->current_scope->parameters
|
22709
|
+
parser->current_scope->parameters = ((pm_scope_parameters_t) scope->forwarding) | PM_SCOPE_PARAMETERS_IMPLICIT_DISALLOWED;
|
22345
22710
|
|
22346
22711
|
for (size_t local_index = 0; local_index < scope->locals_count; local_index++) {
|
22347
22712
|
const pm_string_t *local = pm_options_scope_local_get(scope, local_index);
|
@@ -22358,6 +22723,12 @@ pm_parser_init(pm_parser_t *parser, const uint8_t *source, size_t size, const pm
|
|
22358
22723
|
}
|
22359
22724
|
}
|
22360
22725
|
|
22726
|
+
// Now that we have established the user-provided options, check if
|
22727
|
+
// a version was given and parse as the latest version otherwise.
|
22728
|
+
if (parser->version == PM_OPTIONS_VERSION_UNSET) {
|
22729
|
+
parser->version = PM_OPTIONS_VERSION_LATEST;
|
22730
|
+
}
|
22731
|
+
|
22361
22732
|
pm_accepts_block_stack_push(parser, true);
|
22362
22733
|
|
22363
22734
|
// Skip past the UTF-8 BOM if it exists.
|
@@ -22411,7 +22782,7 @@ pm_parser_init(pm_parser_t *parser, const uint8_t *source, size_t size, const pm
|
|
22411
22782
|
}
|
22412
22783
|
|
22413
22784
|
search_shebang = false;
|
22414
|
-
} else if (options->main_script && !parser->parsing_eval) {
|
22785
|
+
} else if (options != NULL && options->main_script && !parser->parsing_eval) {
|
22415
22786
|
search_shebang = true;
|
22416
22787
|
}
|
22417
22788
|
}
|
@@ -22551,11 +22922,11 @@ pm_parse(pm_parser_t *parser) {
|
|
22551
22922
|
* otherwise return true.
|
22552
22923
|
*/
|
22553
22924
|
static bool
|
22554
|
-
pm_parse_stream_read(pm_buffer_t *buffer, void *stream, pm_parse_stream_fgets_t *
|
22925
|
+
pm_parse_stream_read(pm_buffer_t *buffer, void *stream, pm_parse_stream_fgets_t *stream_fgets, pm_parse_stream_feof_t *stream_feof) {
|
22555
22926
|
#define LINE_SIZE 4096
|
22556
22927
|
char line[LINE_SIZE];
|
22557
22928
|
|
22558
|
-
while (memset(line, '\n', LINE_SIZE),
|
22929
|
+
while (memset(line, '\n', LINE_SIZE), stream_fgets(line, LINE_SIZE, stream) != NULL) {
|
22559
22930
|
size_t length = LINE_SIZE;
|
22560
22931
|
while (length > 0 && line[length - 1] == '\n') length--;
|
22561
22932
|
|
@@ -22587,6 +22958,12 @@ pm_parse_stream_read(pm_buffer_t *buffer, void *stream, pm_parse_stream_fgets_t
|
|
22587
22958
|
if (strncmp(line, "__END__\r\n", 9) == 0) return false;
|
22588
22959
|
break;
|
22589
22960
|
}
|
22961
|
+
|
22962
|
+
// All data should be read via gets. If the string returned by gets
|
22963
|
+
// _doesn't_ end with a newline, then we assume we hit EOF condition.
|
22964
|
+
if (stream_feof(stream)) {
|
22965
|
+
break;
|
22966
|
+
}
|
22590
22967
|
}
|
22591
22968
|
|
22592
22969
|
return true;
|
@@ -22622,16 +22999,17 @@ pm_parse_stream_unterminated_heredoc_p(pm_parser_t *parser) {
|
|
22622
22999
|
* can stream stdin in to Ruby so we need to support a streaming API.
|
22623
23000
|
*/
|
22624
23001
|
PRISM_EXPORTED_FUNCTION pm_node_t *
|
22625
|
-
pm_parse_stream(pm_parser_t *parser, pm_buffer_t *buffer, void *stream, pm_parse_stream_fgets_t *
|
23002
|
+
pm_parse_stream(pm_parser_t *parser, pm_buffer_t *buffer, void *stream, pm_parse_stream_fgets_t *stream_fgets, pm_parse_stream_feof_t *stream_feof, const pm_options_t *options) {
|
22626
23003
|
pm_buffer_init(buffer);
|
22627
23004
|
|
22628
|
-
bool eof = pm_parse_stream_read(buffer, stream,
|
23005
|
+
bool eof = pm_parse_stream_read(buffer, stream, stream_fgets, stream_feof);
|
23006
|
+
|
22629
23007
|
pm_parser_init(parser, (const uint8_t *) pm_buffer_value(buffer), pm_buffer_length(buffer), options);
|
22630
23008
|
pm_node_t *node = pm_parse(parser);
|
22631
23009
|
|
22632
23010
|
while (!eof && parser->error_list.size > 0 && (parser->lex_modes.index > 0 || pm_parse_stream_unterminated_heredoc_p(parser))) {
|
22633
23011
|
pm_node_destroy(parser, node);
|
22634
|
-
eof = pm_parse_stream_read(buffer, stream,
|
23012
|
+
eof = pm_parse_stream_read(buffer, stream, stream_fgets, stream_feof);
|
22635
23013
|
|
22636
23014
|
pm_parser_free(parser);
|
22637
23015
|
pm_parser_init(parser, (const uint8_t *) pm_buffer_value(buffer), pm_buffer_length(buffer), options);
|
@@ -22723,13 +23101,13 @@ pm_serialize_parse(pm_buffer_t *buffer, const uint8_t *source, size_t size, cons
|
|
22723
23101
|
* given stream into to the given buffer.
|
22724
23102
|
*/
|
22725
23103
|
PRISM_EXPORTED_FUNCTION void
|
22726
|
-
pm_serialize_parse_stream(pm_buffer_t *buffer, void *stream, pm_parse_stream_fgets_t *
|
23104
|
+
pm_serialize_parse_stream(pm_buffer_t *buffer, void *stream, pm_parse_stream_fgets_t *stream_fgets, pm_parse_stream_feof_t *stream_feof, const char *data) {
|
22727
23105
|
pm_parser_t parser;
|
22728
23106
|
pm_options_t options = { 0 };
|
22729
23107
|
pm_options_read(&options, data);
|
22730
23108
|
|
22731
23109
|
pm_buffer_t parser_buffer;
|
22732
|
-
pm_node_t *node = pm_parse_stream(&parser, &parser_buffer, stream,
|
23110
|
+
pm_node_t *node = pm_parse_stream(&parser, &parser_buffer, stream, stream_fgets, stream_feof, &options);
|
22733
23111
|
pm_serialize_header(buffer);
|
22734
23112
|
pm_serialize_content(&parser, node, buffer);
|
22735
23113
|
pm_buffer_append_byte(buffer, '\0');
|