prism 0.19.0 → 0.24.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +102 -1
- data/Makefile +5 -0
- data/README.md +9 -6
- data/config.yml +236 -38
- data/docs/build_system.md +19 -2
- data/docs/cruby_compilation.md +27 -0
- data/docs/parser_translation.md +34 -0
- data/docs/parsing_rules.md +19 -0
- data/docs/releasing.md +84 -16
- data/docs/ruby_api.md +1 -1
- data/docs/ruby_parser_translation.md +19 -0
- data/docs/serialization.md +19 -5
- data/ext/prism/api_node.c +1989 -1525
- data/ext/prism/extension.c +130 -30
- data/ext/prism/extension.h +2 -2
- data/include/prism/ast.h +1700 -505
- data/include/prism/defines.h +8 -0
- data/include/prism/diagnostic.h +49 -7
- data/include/prism/encoding.h +17 -0
- data/include/prism/options.h +40 -14
- data/include/prism/parser.h +34 -18
- data/include/prism/util/pm_buffer.h +9 -0
- data/include/prism/util/pm_constant_pool.h +18 -0
- data/include/prism/util/pm_newline_list.h +4 -14
- data/include/prism/util/pm_strpbrk.h +4 -1
- data/include/prism/version.h +2 -2
- data/include/prism.h +19 -2
- data/lib/prism/debug.rb +11 -5
- data/lib/prism/desugar_compiler.rb +225 -80
- data/lib/prism/dot_visitor.rb +36 -14
- data/lib/prism/dsl.rb +302 -299
- data/lib/prism/ffi.rb +107 -76
- data/lib/prism/lex_compat.rb +17 -1
- data/lib/prism/node.rb +4580 -2607
- data/lib/prism/node_ext.rb +27 -4
- data/lib/prism/parse_result.rb +75 -29
- data/lib/prism/serialize.rb +633 -305
- data/lib/prism/translation/parser/compiler.rb +1838 -0
- data/lib/prism/translation/parser/lexer.rb +335 -0
- data/lib/prism/translation/parser/rubocop.rb +45 -0
- data/lib/prism/translation/parser.rb +190 -0
- data/lib/prism/translation/parser33.rb +12 -0
- data/lib/prism/translation/parser34.rb +12 -0
- data/lib/prism/translation/ripper.rb +696 -0
- data/lib/prism/translation/ruby_parser.rb +1521 -0
- data/lib/prism/translation.rb +11 -0
- data/lib/prism.rb +1 -1
- data/prism.gemspec +18 -7
- data/rbi/prism.rbi +150 -88
- data/rbi/prism_static.rbi +15 -3
- data/sig/prism.rbs +996 -961
- data/sig/prism_static.rbs +123 -46
- data/src/diagnostic.c +264 -219
- data/src/encoding.c +21 -26
- data/src/node.c +2 -6
- data/src/options.c +29 -5
- data/src/prettyprint.c +176 -44
- data/src/prism.c +1499 -564
- data/src/serialize.c +35 -21
- data/src/token_type.c +353 -4
- data/src/util/pm_buffer.c +11 -0
- data/src/util/pm_constant_pool.c +37 -11
- data/src/util/pm_newline_list.c +6 -15
- data/src/util/pm_string.c +0 -7
- data/src/util/pm_strpbrk.c +122 -14
- metadata +16 -5
- data/docs/building.md +0 -29
- data/lib/prism/ripper_compat.rb +0 -207
data/src/prism.c
CHANGED
@@ -51,6 +51,7 @@ debug_context(pm_context_t context) {
|
|
51
51
|
case PM_CONTEXT_IF: return "IF";
|
52
52
|
case PM_CONTEXT_MAIN: return "MAIN";
|
53
53
|
case PM_CONTEXT_MODULE: return "MODULE";
|
54
|
+
case PM_CONTEXT_NONE: return "NONE";
|
54
55
|
case PM_CONTEXT_PARENS: return "PARENS";
|
55
56
|
case PM_CONTEXT_POSTEXE: return "POSTEXE";
|
56
57
|
case PM_CONTEXT_PREDICATE: return "PREDICATE";
|
@@ -164,7 +165,7 @@ debug_state(pm_parser_t *parser) {
|
|
164
165
|
|
165
166
|
PRISM_ATTRIBUTE_UNUSED static void
|
166
167
|
debug_token(pm_token_t * token) {
|
167
|
-
fprintf(stderr, "%s: \"%.*s\"\n",
|
168
|
+
fprintf(stderr, "%s: \"%.*s\"\n", pm_token_type_human(token->type), (int) (token->end - token->start), token->start);
|
168
169
|
}
|
169
170
|
|
170
171
|
#endif
|
@@ -423,6 +424,11 @@ lex_state_beg_p(pm_parser_t *parser) {
|
|
423
424
|
return lex_state_p(parser, PM_LEX_STATE_BEG_ANY) || ((parser->lex_state & (PM_LEX_STATE_ARG | PM_LEX_STATE_LABELED)) == (PM_LEX_STATE_ARG | PM_LEX_STATE_LABELED));
|
424
425
|
}
|
425
426
|
|
427
|
+
static inline bool
|
428
|
+
lex_state_arg_labeled_p(pm_parser_t *parser) {
|
429
|
+
return (parser->lex_state & (PM_LEX_STATE_ARG | PM_LEX_STATE_LABELED)) == (PM_LEX_STATE_ARG | PM_LEX_STATE_LABELED);
|
430
|
+
}
|
431
|
+
|
426
432
|
static inline bool
|
427
433
|
lex_state_arg_p(pm_parser_t *parser) {
|
428
434
|
return lex_state_p(parser, PM_LEX_STATE_ARG_ANY);
|
@@ -487,7 +493,8 @@ pm_parser_err(pm_parser_t *parser, const uint8_t *start, const uint8_t *end, pm_
|
|
487
493
|
/**
|
488
494
|
* Append an error to the list of errors on the parser using a format string.
|
489
495
|
*/
|
490
|
-
#define PM_PARSER_ERR_FORMAT(parser, start, end, diag_id, ...)
|
496
|
+
#define PM_PARSER_ERR_FORMAT(parser, start, end, diag_id, ...) \
|
497
|
+
pm_diagnostic_list_append_format(&parser->error_list, start, end, diag_id, __VA_ARGS__)
|
491
498
|
|
492
499
|
/**
|
493
500
|
* Append an error to the list of errors on the parser using the location of the
|
@@ -502,7 +509,8 @@ pm_parser_err_current(pm_parser_t *parser, pm_diagnostic_id_t diag_id) {
|
|
502
509
|
* Append an error to the list of errors on the parser using the given location
|
503
510
|
* using a format string.
|
504
511
|
*/
|
505
|
-
#define PM_PARSER_ERR_LOCATION_FORMAT(parser, location, diag_id, ...)
|
512
|
+
#define PM_PARSER_ERR_LOCATION_FORMAT(parser, location, diag_id, ...) \
|
513
|
+
PM_PARSER_ERR_FORMAT(parser, (location)->start, (location)->end, diag_id, __VA_ARGS__)
|
506
514
|
|
507
515
|
/**
|
508
516
|
* Append an error to the list of errors on the parser using the location of the
|
@@ -517,7 +525,15 @@ pm_parser_err_node(pm_parser_t *parser, const pm_node_t *node, pm_diagnostic_id_
|
|
517
525
|
* Append an error to the list of errors on the parser using the location of the
|
518
526
|
* given node and a format string.
|
519
527
|
*/
|
520
|
-
#define PM_PARSER_ERR_NODE_FORMAT(parser, node, diag_id, ...)
|
528
|
+
#define PM_PARSER_ERR_NODE_FORMAT(parser, node, diag_id, ...) \
|
529
|
+
PM_PARSER_ERR_FORMAT(parser, (node)->location.start, (node)->location.end, diag_id, __VA_ARGS__)
|
530
|
+
|
531
|
+
/**
|
532
|
+
* Append an error to the list of errors on the parser using the location of the
|
533
|
+
* given node and a format string, and add on the content of the node.
|
534
|
+
*/
|
535
|
+
#define PM_PARSER_ERR_NODE_FORMAT_CONTENT(parser, node, diag_id) \
|
536
|
+
PM_PARSER_ERR_NODE_FORMAT(parser, node, diag_id, (int) ((node)->location.end - (node)->location.start), (const char *) (node)->location.start)
|
521
537
|
|
522
538
|
/**
|
523
539
|
* Append an error to the list of errors on the parser using the location of the
|
@@ -541,16 +557,22 @@ pm_parser_err_token(pm_parser_t *parser, const pm_token_t *token, pm_diagnostic_
|
|
541
557
|
* Append an error to the list of errors on the parser using the location of the
|
542
558
|
* given token and a format string.
|
543
559
|
*/
|
544
|
-
#define PM_PARSER_ERR_TOKEN_FORMAT(parser, token, diag_id, ...)
|
560
|
+
#define PM_PARSER_ERR_TOKEN_FORMAT(parser, token, diag_id, ...) \
|
561
|
+
PM_PARSER_ERR_FORMAT(parser, (token).start, (token).end, diag_id, __VA_ARGS__)
|
562
|
+
|
563
|
+
/**
|
564
|
+
* Append an error to the list of errors on the parser using the location of the
|
565
|
+
* given token and a format string, and add on the content of the token.
|
566
|
+
*/
|
567
|
+
#define PM_PARSER_ERR_TOKEN_FORMAT_CONTENT(parser, token, diag_id) \
|
568
|
+
PM_PARSER_ERR_TOKEN_FORMAT(parser, token, diag_id, (int) ((token).end - (token).start), (const char *) (token).start)
|
545
569
|
|
546
570
|
/**
|
547
571
|
* Append a warning to the list of warnings on the parser.
|
548
572
|
*/
|
549
573
|
static inline void
|
550
574
|
pm_parser_warn(pm_parser_t *parser, const uint8_t *start, const uint8_t *end, pm_diagnostic_id_t diag_id) {
|
551
|
-
|
552
|
-
pm_diagnostic_list_append(&parser->warning_list, start, end, diag_id);
|
553
|
-
}
|
575
|
+
pm_diagnostic_list_append(&parser->warning_list, start, end, diag_id);
|
554
576
|
}
|
555
577
|
|
556
578
|
/**
|
@@ -813,6 +835,9 @@ typedef struct {
|
|
813
835
|
|
814
836
|
/** The optional block attached to the call. */
|
815
837
|
pm_node_t *block;
|
838
|
+
|
839
|
+
/** The flag indicating whether this arguments list has forwarding argument. */
|
840
|
+
bool has_forwarding;
|
816
841
|
} pm_arguments_t;
|
817
842
|
|
818
843
|
/**
|
@@ -864,6 +889,105 @@ pm_arguments_validate_block(pm_parser_t *parser, pm_arguments_t *arguments, pm_b
|
|
864
889
|
pm_parser_err_node(parser, (pm_node_t *) block, PM_ERR_ARGUMENT_UNEXPECTED_BLOCK);
|
865
890
|
}
|
866
891
|
|
892
|
+
/******************************************************************************/
|
893
|
+
/* Basic character checks */
|
894
|
+
/******************************************************************************/
|
895
|
+
|
896
|
+
/**
|
897
|
+
* This function is used extremely frequently to lex all of the identifiers in a
|
898
|
+
* source file, so it's important that it be as fast as possible. For this
|
899
|
+
* reason we have the encoding_changed boolean to check if we need to go through
|
900
|
+
* the function pointer or can just directly use the UTF-8 functions.
|
901
|
+
*/
|
902
|
+
static inline size_t
|
903
|
+
char_is_identifier_start(const pm_parser_t *parser, const uint8_t *b) {
|
904
|
+
if (parser->encoding_changed) {
|
905
|
+
size_t width;
|
906
|
+
if ((width = parser->encoding->alpha_char(b, parser->end - b)) != 0) {
|
907
|
+
return width;
|
908
|
+
} else if (*b == '_') {
|
909
|
+
return 1;
|
910
|
+
} else if (*b >= 0x80) {
|
911
|
+
return parser->encoding->char_width(b, parser->end - b);
|
912
|
+
} else {
|
913
|
+
return 0;
|
914
|
+
}
|
915
|
+
} else if (*b < 0x80) {
|
916
|
+
return (pm_encoding_unicode_table[*b] & PRISM_ENCODING_ALPHABETIC_BIT ? 1 : 0) || (*b == '_');
|
917
|
+
} else {
|
918
|
+
return pm_encoding_utf_8_char_width(b, parser->end - b);
|
919
|
+
}
|
920
|
+
}
|
921
|
+
|
922
|
+
/**
|
923
|
+
* Similar to char_is_identifier but this function assumes that the encoding
|
924
|
+
* has not been changed.
|
925
|
+
*/
|
926
|
+
static inline size_t
|
927
|
+
char_is_identifier_utf8(const uint8_t *b, const uint8_t *end) {
|
928
|
+
if (*b < 0x80) {
|
929
|
+
return (*b == '_') || (pm_encoding_unicode_table[*b] & PRISM_ENCODING_ALPHANUMERIC_BIT ? 1 : 0);
|
930
|
+
} else {
|
931
|
+
return pm_encoding_utf_8_char_width(b, end - b);
|
932
|
+
}
|
933
|
+
}
|
934
|
+
|
935
|
+
/**
|
936
|
+
* Like the above, this function is also used extremely frequently to lex all of
|
937
|
+
* the identifiers in a source file once the first character has been found. So
|
938
|
+
* it's important that it be as fast as possible.
|
939
|
+
*/
|
940
|
+
static inline size_t
|
941
|
+
char_is_identifier(pm_parser_t *parser, const uint8_t *b) {
|
942
|
+
if (parser->encoding_changed) {
|
943
|
+
size_t width;
|
944
|
+
if ((width = parser->encoding->alnum_char(b, parser->end - b)) != 0) {
|
945
|
+
return width;
|
946
|
+
} else if (*b == '_') {
|
947
|
+
return 1;
|
948
|
+
} else if (*b >= 0x80) {
|
949
|
+
return parser->encoding->char_width(b, parser->end - b);
|
950
|
+
} else {
|
951
|
+
return 0;
|
952
|
+
}
|
953
|
+
}
|
954
|
+
return char_is_identifier_utf8(b, parser->end);
|
955
|
+
}
|
956
|
+
|
957
|
+
// Here we're defining a perfect hash for the characters that are allowed in
|
958
|
+
// global names. This is used to quickly check the next character after a $ to
|
959
|
+
// see if it's a valid character for a global name.
|
960
|
+
#define BIT(c, idx) (((c) / 32 - 1 == idx) ? (1U << ((c) % 32)) : 0)
|
961
|
+
#define PUNCT(idx) ( \
|
962
|
+
BIT('~', idx) | BIT('*', idx) | BIT('$', idx) | BIT('?', idx) | \
|
963
|
+
BIT('!', idx) | BIT('@', idx) | BIT('/', idx) | BIT('\\', idx) | \
|
964
|
+
BIT(';', idx) | BIT(',', idx) | BIT('.', idx) | BIT('=', idx) | \
|
965
|
+
BIT(':', idx) | BIT('<', idx) | BIT('>', idx) | BIT('\"', idx) | \
|
966
|
+
BIT('&', idx) | BIT('`', idx) | BIT('\'', idx) | BIT('+', idx) | \
|
967
|
+
BIT('0', idx))
|
968
|
+
|
969
|
+
const unsigned int pm_global_name_punctuation_hash[(0x7e - 0x20 + 31) / 32] = { PUNCT(0), PUNCT(1), PUNCT(2) };
|
970
|
+
|
971
|
+
#undef BIT
|
972
|
+
#undef PUNCT
|
973
|
+
|
974
|
+
static inline bool
|
975
|
+
char_is_global_name_punctuation(const uint8_t b) {
|
976
|
+
const unsigned int i = (const unsigned int) b;
|
977
|
+
if (i <= 0x20 || 0x7e < i) return false;
|
978
|
+
|
979
|
+
return (pm_global_name_punctuation_hash[(i - 0x20) / 32] >> (i % 32)) & 1;
|
980
|
+
}
|
981
|
+
|
982
|
+
static inline bool
|
983
|
+
token_is_setter_name(pm_token_t *token) {
|
984
|
+
return (
|
985
|
+
(token->type == PM_TOKEN_IDENTIFIER) &&
|
986
|
+
(token->end - token->start >= 2) &&
|
987
|
+
(token->end[-1] == '=')
|
988
|
+
);
|
989
|
+
}
|
990
|
+
|
867
991
|
/******************************************************************************/
|
868
992
|
/* Node flag handling functions */
|
869
993
|
/******************************************************************************/
|
@@ -884,6 +1008,22 @@ pm_node_flag_unset(pm_node_t *node, pm_node_flags_t flag) {
|
|
884
1008
|
node->flags &= (pm_node_flags_t) ~flag;
|
885
1009
|
}
|
886
1010
|
|
1011
|
+
/**
|
1012
|
+
* Set the repeated parameter flag on the given node.
|
1013
|
+
*/
|
1014
|
+
static inline void
|
1015
|
+
pm_node_flag_set_repeated_parameter(pm_node_t *node) {
|
1016
|
+
assert(PM_NODE_TYPE(node) == PM_BLOCK_LOCAL_VARIABLE_NODE ||
|
1017
|
+
PM_NODE_TYPE(node) == PM_BLOCK_PARAMETER_NODE ||
|
1018
|
+
PM_NODE_TYPE(node) == PM_KEYWORD_REST_PARAMETER_NODE ||
|
1019
|
+
PM_NODE_TYPE(node) == PM_OPTIONAL_KEYWORD_PARAMETER_NODE ||
|
1020
|
+
PM_NODE_TYPE(node) == PM_OPTIONAL_PARAMETER_NODE ||
|
1021
|
+
PM_NODE_TYPE(node) == PM_REQUIRED_KEYWORD_PARAMETER_NODE ||
|
1022
|
+
PM_NODE_TYPE(node) == PM_REQUIRED_PARAMETER_NODE ||
|
1023
|
+
PM_NODE_TYPE(node) == PM_REST_PARAMETER_NODE);
|
1024
|
+
|
1025
|
+
pm_node_flag_set(node, PM_PARAMETER_FLAGS_REPEATED_PARAMETER);
|
1026
|
+
}
|
887
1027
|
|
888
1028
|
/******************************************************************************/
|
889
1029
|
/* Node creation functions */
|
@@ -977,7 +1117,7 @@ static inline void *
|
|
977
1117
|
pm_alloc_node(PRISM_ATTRIBUTE_UNUSED pm_parser_t *parser, size_t size) {
|
978
1118
|
void *memory = calloc(1, size);
|
979
1119
|
if (memory == NULL) {
|
980
|
-
fprintf(stderr, "Failed to allocate %
|
1120
|
+
fprintf(stderr, "Failed to allocate %d bytes\n", (int) size);
|
981
1121
|
abort();
|
982
1122
|
}
|
983
1123
|
return memory;
|
@@ -1325,7 +1465,7 @@ pm_assoc_node_create(pm_parser_t *parser, pm_node_t *key, const pm_token_t *oper
|
|
1325
1465
|
pm_assoc_node_t *node = PM_ALLOC_NODE(parser, pm_assoc_node_t);
|
1326
1466
|
const uint8_t *end;
|
1327
1467
|
|
1328
|
-
if (value != NULL) {
|
1468
|
+
if (value != NULL && value->location.end > key->location.end) {
|
1329
1469
|
end = value->location.end;
|
1330
1470
|
} else if (operator->type != PM_TOKEN_NOT_PROVIDED) {
|
1331
1471
|
end = operator->end;
|
@@ -1333,6 +1473,13 @@ pm_assoc_node_create(pm_parser_t *parser, pm_node_t *key, const pm_token_t *oper
|
|
1333
1473
|
end = key->location.end;
|
1334
1474
|
}
|
1335
1475
|
|
1476
|
+
// Hash string keys will be frozen, so we can mark them as frozen here so
|
1477
|
+
// that the compiler picks them up and also when we check for static literal
|
1478
|
+
// on the keys it gets factored in.
|
1479
|
+
if (PM_NODE_TYPE_P(key, PM_STRING_NODE)) {
|
1480
|
+
key->flags |= PM_STRING_FLAGS_FROZEN | PM_NODE_FLAG_STATIC_LITERAL;
|
1481
|
+
}
|
1482
|
+
|
1336
1483
|
// If the key and value of this assoc node are both static literals, then
|
1337
1484
|
// we can mark this node as a static literal.
|
1338
1485
|
pm_node_flags_t flags = 0;
|
@@ -1490,7 +1637,7 @@ pm_block_argument_node_create(pm_parser_t *parser, const pm_token_t *operator, p
|
|
1490
1637
|
* Allocate and initialize a new BlockNode node.
|
1491
1638
|
*/
|
1492
1639
|
static pm_block_node_t *
|
1493
|
-
pm_block_node_create(pm_parser_t *parser, pm_constant_id_list_t *locals,
|
1640
|
+
pm_block_node_create(pm_parser_t *parser, pm_constant_id_list_t *locals, const pm_token_t *opening, pm_node_t *parameters, pm_node_t *body, const pm_token_t *closing) {
|
1494
1641
|
pm_block_node_t *node = PM_ALLOC_NODE(parser, pm_block_node_t);
|
1495
1642
|
|
1496
1643
|
*node = (pm_block_node_t) {
|
@@ -1499,7 +1646,6 @@ pm_block_node_create(pm_parser_t *parser, pm_constant_id_list_t *locals, uint32_
|
|
1499
1646
|
.location = { .start = opening->start, .end = closing->end },
|
1500
1647
|
},
|
1501
1648
|
.locals = *locals,
|
1502
|
-
.locals_body_index = locals_body_index,
|
1503
1649
|
.parameters = parameters,
|
1504
1650
|
.body = body,
|
1505
1651
|
.opening_loc = PM_LOCATION_TOKEN_VALUE(opening),
|
@@ -1645,12 +1791,13 @@ pm_break_node_create(pm_parser_t *parser, const pm_token_t *keyword, pm_argument
|
|
1645
1791
|
* in the various specializations of this function.
|
1646
1792
|
*/
|
1647
1793
|
static pm_call_node_t *
|
1648
|
-
pm_call_node_create(pm_parser_t *parser) {
|
1794
|
+
pm_call_node_create(pm_parser_t *parser, pm_node_flags_t flags) {
|
1649
1795
|
pm_call_node_t *node = PM_ALLOC_NODE(parser, pm_call_node_t);
|
1650
1796
|
|
1651
1797
|
*node = (pm_call_node_t) {
|
1652
1798
|
{
|
1653
1799
|
.type = PM_CALL_NODE,
|
1800
|
+
.flags = flags,
|
1654
1801
|
.location = PM_LOCATION_NULL_VALUE(parser),
|
1655
1802
|
},
|
1656
1803
|
.receiver = NULL,
|
@@ -1666,6 +1813,15 @@ pm_call_node_create(pm_parser_t *parser) {
|
|
1666
1813
|
return node;
|
1667
1814
|
}
|
1668
1815
|
|
1816
|
+
/**
|
1817
|
+
* Returns the value that the ignore visibility flag should be set to for the
|
1818
|
+
* given receiver.
|
1819
|
+
*/
|
1820
|
+
static inline pm_node_flags_t
|
1821
|
+
pm_call_node_ignore_visibility_flag(const pm_node_t *receiver) {
|
1822
|
+
return PM_NODE_TYPE_P(receiver, PM_SELF_NODE) ? PM_CALL_NODE_FLAGS_IGNORE_VISIBILITY : 0;
|
1823
|
+
}
|
1824
|
+
|
1669
1825
|
/**
|
1670
1826
|
* Allocate and initialize a new CallNode node from an aref or an aset
|
1671
1827
|
* expression.
|
@@ -1674,7 +1830,7 @@ static pm_call_node_t *
|
|
1674
1830
|
pm_call_node_aref_create(pm_parser_t *parser, pm_node_t *receiver, pm_arguments_t *arguments) {
|
1675
1831
|
pm_assert_value_expression(parser, receiver);
|
1676
1832
|
|
1677
|
-
pm_call_node_t *node = pm_call_node_create(parser);
|
1833
|
+
pm_call_node_t *node = pm_call_node_create(parser, pm_call_node_ignore_visibility_flag(receiver));
|
1678
1834
|
|
1679
1835
|
node->base.location.start = receiver->location.start;
|
1680
1836
|
node->base.location.end = pm_arguments_end(arguments);
|
@@ -1700,7 +1856,7 @@ pm_call_node_binary_create(pm_parser_t *parser, pm_node_t *receiver, pm_token_t
|
|
1700
1856
|
pm_assert_value_expression(parser, receiver);
|
1701
1857
|
pm_assert_value_expression(parser, argument);
|
1702
1858
|
|
1703
|
-
pm_call_node_t *node = pm_call_node_create(parser);
|
1859
|
+
pm_call_node_t *node = pm_call_node_create(parser, pm_call_node_ignore_visibility_flag(receiver));
|
1704
1860
|
|
1705
1861
|
node->base.location.start = MIN(receiver->location.start, argument->location.start);
|
1706
1862
|
node->base.location.end = MAX(receiver->location.end, argument->location.end);
|
@@ -1723,7 +1879,7 @@ static pm_call_node_t *
|
|
1723
1879
|
pm_call_node_call_create(pm_parser_t *parser, pm_node_t *receiver, pm_token_t *operator, pm_token_t *message, pm_arguments_t *arguments) {
|
1724
1880
|
pm_assert_value_expression(parser, receiver);
|
1725
1881
|
|
1726
|
-
pm_call_node_t *node = pm_call_node_create(parser);
|
1882
|
+
pm_call_node_t *node = pm_call_node_create(parser, pm_call_node_ignore_visibility_flag(receiver));
|
1727
1883
|
|
1728
1884
|
node->base.location.start = receiver->location.start;
|
1729
1885
|
const uint8_t *end = pm_arguments_end(arguments);
|
@@ -1754,7 +1910,7 @@ pm_call_node_call_create(pm_parser_t *parser, pm_node_t *receiver, pm_token_t *o
|
|
1754
1910
|
*/
|
1755
1911
|
static pm_call_node_t *
|
1756
1912
|
pm_call_node_fcall_create(pm_parser_t *parser, pm_token_t *message, pm_arguments_t *arguments) {
|
1757
|
-
pm_call_node_t *node = pm_call_node_create(parser);
|
1913
|
+
pm_call_node_t *node = pm_call_node_create(parser, PM_CALL_NODE_FLAGS_IGNORE_VISIBILITY);
|
1758
1914
|
|
1759
1915
|
node->base.location.start = message->start;
|
1760
1916
|
node->base.location.end = pm_arguments_end(arguments);
|
@@ -1776,7 +1932,7 @@ static pm_call_node_t *
|
|
1776
1932
|
pm_call_node_not_create(pm_parser_t *parser, pm_node_t *receiver, pm_token_t *message, pm_arguments_t *arguments) {
|
1777
1933
|
pm_assert_value_expression(parser, receiver);
|
1778
1934
|
|
1779
|
-
pm_call_node_t *node = pm_call_node_create(parser);
|
1935
|
+
pm_call_node_t *node = pm_call_node_create(parser, receiver == NULL ? 0 : pm_call_node_ignore_visibility_flag(receiver));
|
1780
1936
|
|
1781
1937
|
node->base.location.start = message->start;
|
1782
1938
|
if (arguments->closing_loc.start != NULL) {
|
@@ -1802,7 +1958,7 @@ static pm_call_node_t *
|
|
1802
1958
|
pm_call_node_shorthand_create(pm_parser_t *parser, pm_node_t *receiver, pm_token_t *operator, pm_arguments_t *arguments) {
|
1803
1959
|
pm_assert_value_expression(parser, receiver);
|
1804
1960
|
|
1805
|
-
pm_call_node_t *node = pm_call_node_create(parser);
|
1961
|
+
pm_call_node_t *node = pm_call_node_create(parser, pm_call_node_ignore_visibility_flag(receiver));
|
1806
1962
|
|
1807
1963
|
node->base.location.start = receiver->location.start;
|
1808
1964
|
node->base.location.end = pm_arguments_end(arguments);
|
@@ -1829,7 +1985,7 @@ static pm_call_node_t *
|
|
1829
1985
|
pm_call_node_unary_create(pm_parser_t *parser, pm_token_t *operator, pm_node_t *receiver, const char *name) {
|
1830
1986
|
pm_assert_value_expression(parser, receiver);
|
1831
1987
|
|
1832
|
-
pm_call_node_t *node = pm_call_node_create(parser);
|
1988
|
+
pm_call_node_t *node = pm_call_node_create(parser, pm_call_node_ignore_visibility_flag(receiver));
|
1833
1989
|
|
1834
1990
|
node->base.location.start = operator->start;
|
1835
1991
|
node->base.location.end = receiver->location.end;
|
@@ -1847,7 +2003,7 @@ pm_call_node_unary_create(pm_parser_t *parser, pm_token_t *operator, pm_node_t *
|
|
1847
2003
|
*/
|
1848
2004
|
static pm_call_node_t *
|
1849
2005
|
pm_call_node_variable_call_create(pm_parser_t *parser, pm_token_t *message) {
|
1850
|
-
pm_call_node_t *node = pm_call_node_create(parser);
|
2006
|
+
pm_call_node_t *node = pm_call_node_create(parser, PM_CALL_NODE_FLAGS_IGNORE_VISIBILITY);
|
1851
2007
|
|
1852
2008
|
node->base.location = PM_LOCATION_TOKEN_VALUE(message);
|
1853
2009
|
node->message_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(message);
|
@@ -1885,11 +2041,12 @@ pm_call_node_index_p(pm_call_node_t *node) {
|
|
1885
2041
|
* operator assignment.
|
1886
2042
|
*/
|
1887
2043
|
static inline bool
|
1888
|
-
pm_call_node_writable_p(pm_call_node_t *node) {
|
2044
|
+
pm_call_node_writable_p(const pm_parser_t *parser, const pm_call_node_t *node) {
|
1889
2045
|
return (
|
1890
2046
|
(node->message_loc.start != NULL) &&
|
1891
2047
|
(node->message_loc.end[-1] != '!') &&
|
1892
2048
|
(node->message_loc.end[-1] != '?') &&
|
2049
|
+
char_is_identifier_start(parser, node->message_loc.start) &&
|
1893
2050
|
(node->opening_loc.start == NULL) &&
|
1894
2051
|
(node->arguments == NULL) &&
|
1895
2052
|
(node->block == NULL)
|
@@ -2167,11 +2324,12 @@ pm_call_target_node_create(pm_parser_t *parser, pm_call_node_t *target) {
|
|
2167
2324
|
static pm_index_target_node_t *
|
2168
2325
|
pm_index_target_node_create(pm_parser_t *parser, pm_call_node_t *target) {
|
2169
2326
|
pm_index_target_node_t *node = PM_ALLOC_NODE(parser, pm_index_target_node_t);
|
2327
|
+
pm_node_flags_t flags = target->base.flags;
|
2170
2328
|
|
2171
2329
|
*node = (pm_index_target_node_t) {
|
2172
2330
|
{
|
2173
2331
|
.type = PM_INDEX_TARGET_NODE,
|
2174
|
-
.flags =
|
2332
|
+
.flags = flags | PM_CALL_NODE_FLAGS_ATTRIBUTE_WRITE,
|
2175
2333
|
.location = target->base.location
|
2176
2334
|
},
|
2177
2335
|
.receiver = target->receiver,
|
@@ -2701,18 +2859,62 @@ pm_constant_write_node_create(pm_parser_t *parser, pm_constant_read_node_t *targ
|
|
2701
2859
|
return node;
|
2702
2860
|
}
|
2703
2861
|
|
2862
|
+
/**
|
2863
|
+
* Check if the receiver of a `def` node is allowed.
|
2864
|
+
*/
|
2865
|
+
static void
|
2866
|
+
pm_def_node_receiver_check(pm_parser_t *parser, const pm_node_t *node) {
|
2867
|
+
switch (PM_NODE_TYPE(node)) {
|
2868
|
+
case PM_BEGIN_NODE: {
|
2869
|
+
const pm_begin_node_t *cast = (pm_begin_node_t *) node;
|
2870
|
+
if (cast->statements != NULL) pm_def_node_receiver_check(parser, (pm_node_t *) cast->statements);
|
2871
|
+
break;
|
2872
|
+
}
|
2873
|
+
case PM_PARENTHESES_NODE: {
|
2874
|
+
const pm_parentheses_node_t *cast = (const pm_parentheses_node_t *) node;
|
2875
|
+
if (cast->body != NULL) pm_def_node_receiver_check(parser, cast->body);
|
2876
|
+
break;
|
2877
|
+
}
|
2878
|
+
case PM_STATEMENTS_NODE: {
|
2879
|
+
const pm_statements_node_t *cast = (const pm_statements_node_t *) node;
|
2880
|
+
pm_def_node_receiver_check(parser, cast->body.nodes[cast->body.size - 1]);
|
2881
|
+
break;
|
2882
|
+
}
|
2883
|
+
case PM_ARRAY_NODE:
|
2884
|
+
case PM_FLOAT_NODE:
|
2885
|
+
case PM_IMAGINARY_NODE:
|
2886
|
+
case PM_INTEGER_NODE:
|
2887
|
+
case PM_INTERPOLATED_REGULAR_EXPRESSION_NODE:
|
2888
|
+
case PM_INTERPOLATED_STRING_NODE:
|
2889
|
+
case PM_INTERPOLATED_SYMBOL_NODE:
|
2890
|
+
case PM_INTERPOLATED_X_STRING_NODE:
|
2891
|
+
case PM_RATIONAL_NODE:
|
2892
|
+
case PM_REGULAR_EXPRESSION_NODE:
|
2893
|
+
case PM_SOURCE_ENCODING_NODE:
|
2894
|
+
case PM_SOURCE_FILE_NODE:
|
2895
|
+
case PM_SOURCE_LINE_NODE:
|
2896
|
+
case PM_STRING_NODE:
|
2897
|
+
case PM_SYMBOL_NODE:
|
2898
|
+
case PM_X_STRING_NODE:
|
2899
|
+
pm_parser_err_node(parser, node, PM_ERR_SINGLETON_FOR_LITERALS);
|
2900
|
+
break;
|
2901
|
+
default:
|
2902
|
+
break;
|
2903
|
+
}
|
2904
|
+
}
|
2905
|
+
|
2704
2906
|
/**
|
2705
2907
|
* Allocate and initialize a new DefNode node.
|
2706
2908
|
*/
|
2707
2909
|
static pm_def_node_t *
|
2708
2910
|
pm_def_node_create(
|
2709
2911
|
pm_parser_t *parser,
|
2710
|
-
|
2912
|
+
pm_constant_id_t name,
|
2913
|
+
const pm_token_t *name_loc,
|
2711
2914
|
pm_node_t *receiver,
|
2712
2915
|
pm_parameters_node_t *parameters,
|
2713
2916
|
pm_node_t *body,
|
2714
2917
|
pm_constant_id_list_t *locals,
|
2715
|
-
uint32_t locals_body_index,
|
2716
2918
|
const pm_token_t *def_keyword,
|
2717
2919
|
const pm_token_t *operator,
|
2718
2920
|
const pm_token_t *lparen,
|
@@ -2729,18 +2931,21 @@ pm_def_node_create(
|
|
2729
2931
|
end = end_keyword->end;
|
2730
2932
|
}
|
2731
2933
|
|
2934
|
+
if ((receiver != NULL) && PM_NODE_TYPE_P(receiver, PM_PARENTHESES_NODE)) {
|
2935
|
+
pm_def_node_receiver_check(parser, receiver);
|
2936
|
+
}
|
2937
|
+
|
2732
2938
|
*node = (pm_def_node_t) {
|
2733
2939
|
{
|
2734
2940
|
.type = PM_DEF_NODE,
|
2735
2941
|
.location = { .start = def_keyword->start, .end = end },
|
2736
2942
|
},
|
2737
|
-
.name =
|
2738
|
-
.name_loc = PM_LOCATION_TOKEN_VALUE(
|
2943
|
+
.name = name,
|
2944
|
+
.name_loc = PM_LOCATION_TOKEN_VALUE(name_loc),
|
2739
2945
|
.receiver = receiver,
|
2740
2946
|
.parameters = parameters,
|
2741
2947
|
.body = body,
|
2742
2948
|
.locals = *locals,
|
2743
|
-
.locals_body_index = locals_body_index,
|
2744
2949
|
.def_keyword_loc = PM_LOCATION_TOKEN_VALUE(def_keyword),
|
2745
2950
|
.operator_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(operator),
|
2746
2951
|
.lparen_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(lparen),
|
@@ -3962,9 +4167,8 @@ pm_keyword_hash_node_create(pm_parser_t *parser) {
|
|
3962
4167
|
*/
|
3963
4168
|
static void
|
3964
4169
|
pm_keyword_hash_node_elements_append(pm_keyword_hash_node_t *hash, pm_node_t *element) {
|
3965
|
-
// If the element being added is not an AssocNode or does not have a symbol
|
3966
|
-
// we want to turn the
|
3967
|
-
// TODO: Rename the flag to SYMBOL_KEYS instead.
|
4170
|
+
// If the element being added is not an AssocNode or does not have a symbol
|
4171
|
+
// key, then we want to turn the SYMBOL_KEYS flag off.
|
3968
4172
|
if (!PM_NODE_TYPE_P(element, PM_ASSOC_NODE) || !PM_NODE_TYPE_P(((pm_assoc_node_t *) element)->key, PM_SYMBOL_NODE)) {
|
3969
4173
|
pm_node_flag_unset((pm_node_t *)hash, PM_KEYWORD_HASH_NODE_FLAGS_SYMBOL_KEYS);
|
3970
4174
|
}
|
@@ -4051,7 +4255,6 @@ static pm_lambda_node_t *
|
|
4051
4255
|
pm_lambda_node_create(
|
4052
4256
|
pm_parser_t *parser,
|
4053
4257
|
pm_constant_id_list_t *locals,
|
4054
|
-
uint32_t locals_body_index,
|
4055
4258
|
const pm_token_t *operator,
|
4056
4259
|
const pm_token_t *opening,
|
4057
4260
|
const pm_token_t *closing,
|
@@ -4069,7 +4272,6 @@ pm_lambda_node_create(
|
|
4069
4272
|
},
|
4070
4273
|
},
|
4071
4274
|
.locals = *locals,
|
4072
|
-
.locals_body_index = locals_body_index,
|
4073
4275
|
.operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
|
4074
4276
|
.opening_loc = PM_LOCATION_TOKEN_VALUE(opening),
|
4075
4277
|
.closing_loc = PM_LOCATION_TOKEN_VALUE(closing),
|
@@ -4161,12 +4363,10 @@ pm_local_variable_or_write_node_create(pm_parser_t *parser, pm_node_t *target, c
|
|
4161
4363
|
}
|
4162
4364
|
|
4163
4365
|
/**
|
4164
|
-
* Allocate a new LocalVariableReadNode node.
|
4366
|
+
* Allocate a new LocalVariableReadNode node with constant_id.
|
4165
4367
|
*/
|
4166
4368
|
static pm_local_variable_read_node_t *
|
4167
|
-
|
4168
|
-
pm_constant_id_t name_id = pm_parser_constant_id_token(parser, name);
|
4169
|
-
|
4369
|
+
pm_local_variable_read_node_create_constant_id(pm_parser_t *parser, const pm_token_t *name, pm_constant_id_t name_id, uint32_t depth) {
|
4170
4370
|
if (parser->current_param_name == name_id) {
|
4171
4371
|
pm_parser_err_token(parser, name, PM_ERR_PARAMETER_CIRCULAR);
|
4172
4372
|
}
|
@@ -4185,6 +4385,15 @@ pm_local_variable_read_node_create(pm_parser_t *parser, const pm_token_t *name,
|
|
4185
4385
|
return node;
|
4186
4386
|
}
|
4187
4387
|
|
4388
|
+
/**
|
4389
|
+
* Allocate a new LocalVariableReadNode node.
|
4390
|
+
*/
|
4391
|
+
static pm_local_variable_read_node_t *
|
4392
|
+
pm_local_variable_read_node_create(pm_parser_t *parser, const pm_token_t *name, uint32_t depth) {
|
4393
|
+
pm_constant_id_t name_id = pm_parser_constant_id_token(parser, name);
|
4394
|
+
return pm_local_variable_read_node_create_constant_id(parser, name, name_id, depth);
|
4395
|
+
}
|
4396
|
+
|
4188
4397
|
/**
|
4189
4398
|
* Allocate and initialize a new LocalVariableWriteNode node.
|
4190
4399
|
*/
|
@@ -4210,6 +4419,57 @@ pm_local_variable_write_node_create(pm_parser_t *parser, pm_constant_id_t name,
|
|
4210
4419
|
return node;
|
4211
4420
|
}
|
4212
4421
|
|
4422
|
+
/**
|
4423
|
+
* Returns true if the given bounds comprise `it`.
|
4424
|
+
*/
|
4425
|
+
static inline bool
|
4426
|
+
pm_token_is_it(const uint8_t *start, const uint8_t *end) {
|
4427
|
+
return (end - start == 2) && (start[0] == 'i') && (start[1] == 't');
|
4428
|
+
}
|
4429
|
+
|
4430
|
+
/**
|
4431
|
+
* Returns true if the given node is `it` default parameter.
|
4432
|
+
*/
|
4433
|
+
static inline bool
|
4434
|
+
pm_node_is_it(pm_parser_t *parser, pm_node_t *node) {
|
4435
|
+
// Check if it's a local variable reference
|
4436
|
+
if (node->type != PM_CALL_NODE) {
|
4437
|
+
return false;
|
4438
|
+
}
|
4439
|
+
|
4440
|
+
// Check if it's a variable call
|
4441
|
+
pm_call_node_t *call_node = (pm_call_node_t *) node;
|
4442
|
+
if (!pm_call_node_variable_call_p(call_node)) {
|
4443
|
+
return false;
|
4444
|
+
}
|
4445
|
+
|
4446
|
+
// Check if it's called `it`
|
4447
|
+
pm_constant_id_t id = ((pm_call_node_t *)node)->name;
|
4448
|
+
pm_constant_t *constant = pm_constant_pool_id_to_constant(&parser->constant_pool, id);
|
4449
|
+
return pm_token_is_it(constant->start, constant->start + constant->length);
|
4450
|
+
}
|
4451
|
+
|
4452
|
+
/**
|
4453
|
+
* Convert a `it` variable call node to a node for `it` default parameter.
|
4454
|
+
*/
|
4455
|
+
static pm_node_t *
|
4456
|
+
pm_node_check_it(pm_parser_t *parser, pm_node_t *node) {
|
4457
|
+
if (
|
4458
|
+
(parser->version != PM_OPTIONS_VERSION_CRUBY_3_3_0) &&
|
4459
|
+
!parser->current_scope->closed &&
|
4460
|
+
pm_node_is_it(parser, node)
|
4461
|
+
) {
|
4462
|
+
if (parser->current_scope->explicit_params) {
|
4463
|
+
pm_parser_err_previous(parser, PM_ERR_IT_NOT_ALLOWED);
|
4464
|
+
} else {
|
4465
|
+
pm_node_destroy(parser, node);
|
4466
|
+
pm_constant_id_t name_id = pm_parser_constant_id_constant(parser, "0it", 3);
|
4467
|
+
node = (pm_node_t *) pm_local_variable_read_node_create_constant_id(parser, &parser->previous, name_id, 0);
|
4468
|
+
}
|
4469
|
+
}
|
4470
|
+
return node;
|
4471
|
+
}
|
4472
|
+
|
4213
4473
|
/**
|
4214
4474
|
* Returns true if the given bounds comprise a numbered parameter (i.e., they
|
4215
4475
|
* are of the form /^_\d$/).
|
@@ -4402,13 +4662,20 @@ pm_multi_target_node_create(pm_parser_t *parser) {
|
|
4402
4662
|
*/
|
4403
4663
|
static void
|
4404
4664
|
pm_multi_target_node_targets_append(pm_parser_t *parser, pm_multi_target_node_t *node, pm_node_t *target) {
|
4405
|
-
if (PM_NODE_TYPE_P(target, PM_SPLAT_NODE)
|
4665
|
+
if (PM_NODE_TYPE_P(target, PM_SPLAT_NODE)) {
|
4406
4666
|
if (node->rest == NULL) {
|
4407
4667
|
node->rest = target;
|
4408
4668
|
} else {
|
4409
4669
|
pm_parser_err_node(parser, target, PM_ERR_MULTI_ASSIGN_MULTI_SPLATS);
|
4410
4670
|
pm_node_list_append(&node->rights, target);
|
4411
4671
|
}
|
4672
|
+
} else if (PM_NODE_TYPE_P(target, PM_IMPLICIT_REST_NODE)) {
|
4673
|
+
if (node->rest == NULL) {
|
4674
|
+
node->rest = target;
|
4675
|
+
} else {
|
4676
|
+
PM_PARSER_ERR_TOKEN_FORMAT_CONTENT(parser, parser->current, PM_ERR_MULTI_ASSIGN_UNEXPECTED_REST);
|
4677
|
+
pm_node_list_append(&node->rights, target);
|
4678
|
+
}
|
4412
4679
|
} else if (node->rest == NULL) {
|
4413
4680
|
pm_node_list_append(&node->lefts, target);
|
4414
4681
|
} else {
|
@@ -5195,7 +5462,7 @@ pm_source_file_node_create(pm_parser_t *parser, const pm_token_t *file_keyword)
|
|
5195
5462
|
.flags = PM_NODE_FLAG_STATIC_LITERAL,
|
5196
5463
|
.location = PM_LOCATION_TOKEN_VALUE(file_keyword),
|
5197
5464
|
},
|
5198
|
-
.filepath = parser->
|
5465
|
+
.filepath = parser->filepath
|
5199
5466
|
};
|
5200
5467
|
|
5201
5468
|
return node;
|
@@ -5372,18 +5639,59 @@ pm_super_node_create(pm_parser_t *parser, const pm_token_t *keyword, pm_argument
|
|
5372
5639
|
return node;
|
5373
5640
|
}
|
5374
5641
|
|
5642
|
+
/**
|
5643
|
+
* Read through the contents of a string and check if it consists solely of US ASCII code points.
|
5644
|
+
*/
|
5645
|
+
static bool
|
5646
|
+
pm_ascii_only_p(const pm_string_t *contents) {
|
5647
|
+
const size_t length = pm_string_length(contents);
|
5648
|
+
const uint8_t *source = pm_string_source(contents);
|
5649
|
+
|
5650
|
+
for (size_t index = 0; index < length; index++) {
|
5651
|
+
if (source[index] & 0x80) return false;
|
5652
|
+
}
|
5653
|
+
|
5654
|
+
return true;
|
5655
|
+
}
|
5656
|
+
|
5657
|
+
/**
|
5658
|
+
* Ruby "downgrades" the encoding of Symbols to US-ASCII if the associated
|
5659
|
+
* encoding is ASCII-compatible and the Symbol consists only of US-ASCII code
|
5660
|
+
* points. Otherwise, the encoding may be explicitly set with an escape
|
5661
|
+
* sequence.
|
5662
|
+
*/
|
5663
|
+
static inline pm_node_flags_t
|
5664
|
+
parse_symbol_encoding(const pm_parser_t *parser, const pm_string_t *contents) {
|
5665
|
+
if (parser->explicit_encoding != NULL) {
|
5666
|
+
// A Symbol may optionally have its encoding explicitly set. This will
|
5667
|
+
// happen if an escape sequence results in a non-ASCII code point.
|
5668
|
+
if (parser->explicit_encoding == PM_ENCODING_UTF_8_ENTRY) {
|
5669
|
+
return PM_SYMBOL_FLAGS_FORCED_UTF8_ENCODING;
|
5670
|
+
} else if (parser->encoding == PM_ENCODING_US_ASCII_ENTRY) {
|
5671
|
+
return PM_SYMBOL_FLAGS_FORCED_BINARY_ENCODING;
|
5672
|
+
}
|
5673
|
+
} else if (pm_ascii_only_p(contents)) {
|
5674
|
+
// Ruby stipulates that all source files must use an ASCII-compatible
|
5675
|
+
// encoding. Thus, all symbols appearing in source are eligible for
|
5676
|
+
// "downgrading" to US-ASCII.
|
5677
|
+
return PM_SYMBOL_FLAGS_FORCED_US_ASCII_ENCODING;
|
5678
|
+
}
|
5679
|
+
|
5680
|
+
return 0;
|
5681
|
+
}
|
5682
|
+
|
5375
5683
|
/**
|
5376
5684
|
* Allocate and initialize a new SymbolNode node with the given unescaped
|
5377
5685
|
* string.
|
5378
5686
|
*/
|
5379
5687
|
static pm_symbol_node_t *
|
5380
|
-
pm_symbol_node_create_unescaped(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *value, const pm_token_t *closing, const pm_string_t *unescaped) {
|
5688
|
+
pm_symbol_node_create_unescaped(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *value, const pm_token_t *closing, const pm_string_t *unescaped, pm_node_flags_t flags) {
|
5381
5689
|
pm_symbol_node_t *node = PM_ALLOC_NODE(parser, pm_symbol_node_t);
|
5382
5690
|
|
5383
5691
|
*node = (pm_symbol_node_t) {
|
5384
5692
|
{
|
5385
5693
|
.type = PM_SYMBOL_NODE,
|
5386
|
-
.flags = PM_NODE_FLAG_STATIC_LITERAL,
|
5694
|
+
.flags = PM_NODE_FLAG_STATIC_LITERAL | flags,
|
5387
5695
|
.location = {
|
5388
5696
|
.start = (opening->type == PM_TOKEN_NOT_PROVIDED ? value->start : opening->start),
|
5389
5697
|
.end = (closing->type == PM_TOKEN_NOT_PROVIDED ? value->end : closing->end)
|
@@ -5403,7 +5711,7 @@ pm_symbol_node_create_unescaped(pm_parser_t *parser, const pm_token_t *opening,
|
|
5403
5711
|
*/
|
5404
5712
|
static inline pm_symbol_node_t *
|
5405
5713
|
pm_symbol_node_create(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *value, const pm_token_t *closing) {
|
5406
|
-
return pm_symbol_node_create_unescaped(parser, opening, value, closing, &PM_STRING_EMPTY);
|
5714
|
+
return pm_symbol_node_create_unescaped(parser, opening, value, closing, &PM_STRING_EMPTY, 0);
|
5407
5715
|
}
|
5408
5716
|
|
5409
5717
|
/**
|
@@ -5411,7 +5719,7 @@ pm_symbol_node_create(pm_parser_t *parser, const pm_token_t *opening, const pm_t
|
|
5411
5719
|
*/
|
5412
5720
|
static pm_symbol_node_t *
|
5413
5721
|
pm_symbol_node_create_current_string(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *value, const pm_token_t *closing) {
|
5414
|
-
pm_symbol_node_t *node = pm_symbol_node_create_unescaped(parser, opening, value, closing, &parser->current_string);
|
5722
|
+
pm_symbol_node_t *node = pm_symbol_node_create_unescaped(parser, opening, value, closing, &parser->current_string, parse_symbol_encoding(parser, &parser->current_string));
|
5415
5723
|
parser->current_string = PM_STRING_EMPTY;
|
5416
5724
|
return node;
|
5417
5725
|
}
|
@@ -5433,6 +5741,8 @@ pm_symbol_node_label_create(pm_parser_t *parser, const pm_token_t *token) {
|
|
5433
5741
|
|
5434
5742
|
assert((label.end - label.start) >= 0);
|
5435
5743
|
pm_string_shared_init(&node->unescaped, label.start, label.end);
|
5744
|
+
pm_node_flag_set((pm_node_t *) node, parse_symbol_encoding(parser, &node->unescaped));
|
5745
|
+
|
5436
5746
|
break;
|
5437
5747
|
}
|
5438
5748
|
case PM_TOKEN_MISSING: {
|
@@ -5495,6 +5805,8 @@ pm_string_node_to_symbol_node(pm_parser_t *parser, pm_string_node_t *node, const
|
|
5495
5805
|
.unescaped = node->unescaped
|
5496
5806
|
};
|
5497
5807
|
|
5808
|
+
pm_node_flag_set((pm_node_t *)new_node, parse_symbol_encoding(parser, &node->unescaped));
|
5809
|
+
|
5498
5810
|
// We are explicitly _not_ using pm_node_destroy here because we don't want
|
5499
5811
|
// to trash the unescaped string. We could instead copy the string if we
|
5500
5812
|
// know that it is owned, but we're taking the fast path for now.
|
@@ -5885,6 +6197,7 @@ pm_parser_scope_push(pm_parser_t *parser, bool closed) {
|
|
5885
6197
|
.closed = closed,
|
5886
6198
|
.explicit_params = false,
|
5887
6199
|
.numbered_parameters = 0,
|
6200
|
+
.forwarding_params = 0,
|
5888
6201
|
};
|
5889
6202
|
|
5890
6203
|
pm_constant_id_list_init(&scope->locals);
|
@@ -5893,6 +6206,76 @@ pm_parser_scope_push(pm_parser_t *parser, bool closed) {
|
|
5893
6206
|
return true;
|
5894
6207
|
}
|
5895
6208
|
|
6209
|
+
static void
|
6210
|
+
pm_parser_scope_forwarding_param_check(pm_parser_t *parser, const pm_token_t * token, const uint8_t mask, pm_diagnostic_id_t diag)
|
6211
|
+
{
|
6212
|
+
pm_scope_t *scope = parser->current_scope;
|
6213
|
+
while (scope) {
|
6214
|
+
if (scope->forwarding_params & mask) {
|
6215
|
+
if (!scope->closed) {
|
6216
|
+
pm_parser_err_token(parser, token, diag);
|
6217
|
+
return;
|
6218
|
+
}
|
6219
|
+
return;
|
6220
|
+
}
|
6221
|
+
if (scope->closed) break;
|
6222
|
+
scope = scope->previous;
|
6223
|
+
}
|
6224
|
+
|
6225
|
+
pm_parser_err_token(parser, token, diag);
|
6226
|
+
}
|
6227
|
+
|
6228
|
+
static inline void
|
6229
|
+
pm_parser_scope_forwarding_block_check(pm_parser_t *parser, const pm_token_t * token)
|
6230
|
+
{
|
6231
|
+
pm_parser_scope_forwarding_param_check(parser, token, PM_FORWARDING_BLOCK, PM_ERR_ARGUMENT_NO_FORWARDING_AMP);
|
6232
|
+
}
|
6233
|
+
|
6234
|
+
static void
|
6235
|
+
pm_parser_scope_forwarding_positionals_check(pm_parser_t *parser, const pm_token_t * token)
|
6236
|
+
{
|
6237
|
+
pm_parser_scope_forwarding_param_check(parser, token, PM_FORWARDING_POSITIONALS, PM_ERR_ARGUMENT_NO_FORWARDING_STAR);
|
6238
|
+
}
|
6239
|
+
|
6240
|
+
static inline void
|
6241
|
+
pm_parser_scope_forwarding_all_check(pm_parser_t *parser, const pm_token_t * token)
|
6242
|
+
{
|
6243
|
+
pm_parser_scope_forwarding_param_check(parser, token, PM_FORWARDING_ALL, PM_ERR_ARGUMENT_NO_FORWARDING_ELLIPSES);
|
6244
|
+
}
|
6245
|
+
|
6246
|
+
static inline void
|
6247
|
+
pm_parser_scope_forwarding_keywords_check(pm_parser_t *parser, const pm_token_t * token)
|
6248
|
+
{
|
6249
|
+
pm_parser_scope_forwarding_param_check(parser, token, PM_FORWARDING_KEYWORDS, PM_ERR_EXPECT_EXPRESSION_AFTER_SPLAT_HASH);
|
6250
|
+
}
|
6251
|
+
|
6252
|
+
/**
|
6253
|
+
* Save the current param name as the return value and set it to the given
|
6254
|
+
* constant id.
|
6255
|
+
*/
|
6256
|
+
static inline pm_constant_id_t
|
6257
|
+
pm_parser_current_param_name_set(pm_parser_t *parser, pm_constant_id_t current_param_name) {
|
6258
|
+
pm_constant_id_t saved_param_name = parser->current_param_name;
|
6259
|
+
parser->current_param_name = current_param_name;
|
6260
|
+
return saved_param_name;
|
6261
|
+
}
|
6262
|
+
|
6263
|
+
/**
|
6264
|
+
* Save the current param name as the return value and clear it.
|
6265
|
+
*/
|
6266
|
+
static inline pm_constant_id_t
|
6267
|
+
pm_parser_current_param_name_unset(pm_parser_t *parser) {
|
6268
|
+
return pm_parser_current_param_name_set(parser, PM_CONSTANT_ID_UNSET);
|
6269
|
+
}
|
6270
|
+
|
6271
|
+
/**
|
6272
|
+
* Restore the current param name from the given value.
|
6273
|
+
*/
|
6274
|
+
static inline void
|
6275
|
+
pm_parser_current_param_name_restore(pm_parser_t *parser, pm_constant_id_t saved_param_name) {
|
6276
|
+
parser->current_param_name = saved_param_name;
|
6277
|
+
}
|
6278
|
+
|
5896
6279
|
/**
|
5897
6280
|
* Check if any of the currently visible scopes contain a local variable
|
5898
6281
|
* described by the given constant id.
|
@@ -5969,26 +6352,41 @@ pm_parser_local_add_owned(pm_parser_t *parser, const uint8_t *start, size_t leng
|
|
5969
6352
|
return constant_id;
|
5970
6353
|
}
|
5971
6354
|
|
6355
|
+
/**
|
6356
|
+
* Add a local variable from a constant string to the current scope.
|
6357
|
+
*/
|
6358
|
+
static pm_constant_id_t
|
6359
|
+
pm_parser_local_add_constant(pm_parser_t *parser, const char *start, size_t length) {
|
6360
|
+
pm_constant_id_t constant_id = pm_parser_constant_id_constant(parser, start, length);
|
6361
|
+
if (constant_id != 0) pm_parser_local_add(parser, constant_id);
|
6362
|
+
return constant_id;
|
6363
|
+
}
|
6364
|
+
|
5972
6365
|
/**
|
5973
6366
|
* Add a parameter name to the current scope and check whether the name of the
|
5974
6367
|
* parameter is unique or not.
|
6368
|
+
*
|
6369
|
+
* Returns `true` if this is a duplicate parameter name, otherwise returns
|
6370
|
+
* false.
|
5975
6371
|
*/
|
5976
|
-
static
|
6372
|
+
static bool
|
5977
6373
|
pm_parser_parameter_name_check(pm_parser_t *parser, const pm_token_t *name) {
|
5978
6374
|
// We want to check whether the parameter name is a numbered parameter or
|
5979
6375
|
// not.
|
5980
6376
|
pm_refute_numbered_parameter(parser, name->start, name->end);
|
5981
6377
|
|
5982
|
-
// We want to ignore any parameter name that starts with an underscore.
|
5983
|
-
if ((name->start < name->end) && (*name->start == '_')) return;
|
5984
|
-
|
5985
6378
|
// Otherwise we'll fetch the constant id for the parameter name and check
|
5986
6379
|
// whether it's already in the current scope.
|
5987
6380
|
pm_constant_id_t constant_id = pm_parser_constant_id_token(parser, name);
|
5988
6381
|
|
5989
6382
|
if (pm_constant_id_list_includes(&parser->current_scope->locals, constant_id)) {
|
5990
|
-
|
6383
|
+
// Add an error if the parameter doesn't start with _ and has been seen before
|
6384
|
+
if ((name->start < name->end) && (*name->start != '_')) {
|
6385
|
+
pm_parser_err_token(parser, name, PM_ERR_PARAMETER_NAME_REPEAT);
|
6386
|
+
}
|
6387
|
+
return true;
|
5991
6388
|
}
|
6389
|
+
return false;
|
5992
6390
|
}
|
5993
6391
|
|
5994
6392
|
/**
|
@@ -6003,105 +6401,6 @@ pm_parser_scope_pop(pm_parser_t *parser) {
|
|
6003
6401
|
free(scope);
|
6004
6402
|
}
|
6005
6403
|
|
6006
|
-
/******************************************************************************/
|
6007
|
-
/* Basic character checks */
|
6008
|
-
/******************************************************************************/
|
6009
|
-
|
6010
|
-
/**
|
6011
|
-
* This function is used extremely frequently to lex all of the identifiers in a
|
6012
|
-
* source file, so it's important that it be as fast as possible. For this
|
6013
|
-
* reason we have the encoding_changed boolean to check if we need to go through
|
6014
|
-
* the function pointer or can just directly use the UTF-8 functions.
|
6015
|
-
*/
|
6016
|
-
static inline size_t
|
6017
|
-
char_is_identifier_start(pm_parser_t *parser, const uint8_t *b) {
|
6018
|
-
if (parser->encoding_changed) {
|
6019
|
-
size_t width;
|
6020
|
-
if ((width = parser->encoding->alpha_char(b, parser->end - b)) != 0) {
|
6021
|
-
return width;
|
6022
|
-
} else if (*b == '_') {
|
6023
|
-
return 1;
|
6024
|
-
} else if (*b >= 0x80) {
|
6025
|
-
return parser->encoding->char_width(b, parser->end - b);
|
6026
|
-
} else {
|
6027
|
-
return 0;
|
6028
|
-
}
|
6029
|
-
} else if (*b < 0x80) {
|
6030
|
-
return (pm_encoding_unicode_table[*b] & PRISM_ENCODING_ALPHABETIC_BIT ? 1 : 0) || (*b == '_');
|
6031
|
-
} else {
|
6032
|
-
return (size_t) (pm_encoding_utf_8_alpha_char(b, parser->end - b) || 1u);
|
6033
|
-
}
|
6034
|
-
}
|
6035
|
-
|
6036
|
-
/**
|
6037
|
-
* Similar to char_is_identifier but this function assumes that the encoding
|
6038
|
-
* has not been changed.
|
6039
|
-
*/
|
6040
|
-
static inline size_t
|
6041
|
-
char_is_identifier_utf8(const uint8_t *b, const uint8_t *end) {
|
6042
|
-
if (*b < 0x80) {
|
6043
|
-
return (*b == '_') || (pm_encoding_unicode_table[*b] & PRISM_ENCODING_ALPHANUMERIC_BIT ? 1 : 0);
|
6044
|
-
} else {
|
6045
|
-
return (size_t) (pm_encoding_utf_8_alnum_char(b, end - b) || 1u);
|
6046
|
-
}
|
6047
|
-
}
|
6048
|
-
|
6049
|
-
/**
|
6050
|
-
* Like the above, this function is also used extremely frequently to lex all of
|
6051
|
-
* the identifiers in a source file once the first character has been found. So
|
6052
|
-
* it's important that it be as fast as possible.
|
6053
|
-
*/
|
6054
|
-
static inline size_t
|
6055
|
-
char_is_identifier(pm_parser_t *parser, const uint8_t *b) {
|
6056
|
-
if (parser->encoding_changed) {
|
6057
|
-
size_t width;
|
6058
|
-
if ((width = parser->encoding->alnum_char(b, parser->end - b)) != 0) {
|
6059
|
-
return width;
|
6060
|
-
} else if (*b == '_') {
|
6061
|
-
return 1;
|
6062
|
-
} else if (*b >= 0x80) {
|
6063
|
-
return parser->encoding->char_width(b, parser->end - b);
|
6064
|
-
} else {
|
6065
|
-
return 0;
|
6066
|
-
}
|
6067
|
-
}
|
6068
|
-
return char_is_identifier_utf8(b, parser->end);
|
6069
|
-
}
|
6070
|
-
|
6071
|
-
// Here we're defining a perfect hash for the characters that are allowed in
|
6072
|
-
// global names. This is used to quickly check the next character after a $ to
|
6073
|
-
// see if it's a valid character for a global name.
|
6074
|
-
#define BIT(c, idx) (((c) / 32 - 1 == idx) ? (1U << ((c) % 32)) : 0)
|
6075
|
-
#define PUNCT(idx) ( \
|
6076
|
-
BIT('~', idx) | BIT('*', idx) | BIT('$', idx) | BIT('?', idx) | \
|
6077
|
-
BIT('!', idx) | BIT('@', idx) | BIT('/', idx) | BIT('\\', idx) | \
|
6078
|
-
BIT(';', idx) | BIT(',', idx) | BIT('.', idx) | BIT('=', idx) | \
|
6079
|
-
BIT(':', idx) | BIT('<', idx) | BIT('>', idx) | BIT('\"', idx) | \
|
6080
|
-
BIT('&', idx) | BIT('`', idx) | BIT('\'', idx) | BIT('+', idx) | \
|
6081
|
-
BIT('0', idx))
|
6082
|
-
|
6083
|
-
const unsigned int pm_global_name_punctuation_hash[(0x7e - 0x20 + 31) / 32] = { PUNCT(0), PUNCT(1), PUNCT(2) };
|
6084
|
-
|
6085
|
-
#undef BIT
|
6086
|
-
#undef PUNCT
|
6087
|
-
|
6088
|
-
static inline bool
|
6089
|
-
char_is_global_name_punctuation(const uint8_t b) {
|
6090
|
-
const unsigned int i = (const unsigned int) b;
|
6091
|
-
if (i <= 0x20 || 0x7e < i) return false;
|
6092
|
-
|
6093
|
-
return (pm_global_name_punctuation_hash[(i - 0x20) / 32] >> (i % 32)) & 1;
|
6094
|
-
}
|
6095
|
-
|
6096
|
-
static inline bool
|
6097
|
-
token_is_setter_name(pm_token_t *token) {
|
6098
|
-
return (
|
6099
|
-
(token->type == PM_TOKEN_IDENTIFIER) &&
|
6100
|
-
(token->end - token->start >= 2) &&
|
6101
|
-
(token->end[-1] == '=')
|
6102
|
-
);
|
6103
|
-
}
|
6104
|
-
|
6105
6404
|
/******************************************************************************/
|
6106
6405
|
/* Stack helpers */
|
6107
6406
|
/******************************************************************************/
|
@@ -6317,8 +6616,10 @@ parser_lex_magic_comment_encoding(pm_parser_t *parser) {
|
|
6317
6616
|
*/
|
6318
6617
|
static void
|
6319
6618
|
parser_lex_magic_comment_frozen_string_literal_value(pm_parser_t *parser, const uint8_t *start, const uint8_t *end) {
|
6320
|
-
if (start + 4 <= end && pm_strncasecmp(start, (const uint8_t *) "true", 4) == 0) {
|
6619
|
+
if ((start + 4 <= end) && pm_strncasecmp(start, (const uint8_t *) "true", 4) == 0) {
|
6321
6620
|
parser->frozen_string_literal = true;
|
6621
|
+
} else if ((start + 5 <= end) && pm_strncasecmp(start, (const uint8_t *) "false", 5) == 0) {
|
6622
|
+
parser->frozen_string_literal = false;
|
6322
6623
|
}
|
6323
6624
|
}
|
6324
6625
|
|
@@ -6541,21 +6842,27 @@ context_terminator(pm_context_t context, pm_token_t *token) {
|
|
6541
6842
|
return token->type == PM_TOKEN_BRACE_RIGHT;
|
6542
6843
|
case PM_CONTEXT_PREDICATE:
|
6543
6844
|
return token->type == PM_TOKEN_KEYWORD_THEN || token->type == PM_TOKEN_NEWLINE || token->type == PM_TOKEN_SEMICOLON;
|
6845
|
+
case PM_CONTEXT_NONE:
|
6846
|
+
return false;
|
6544
6847
|
}
|
6545
6848
|
|
6546
6849
|
return false;
|
6547
6850
|
}
|
6548
6851
|
|
6549
|
-
|
6550
|
-
|
6852
|
+
/**
|
6853
|
+
* Returns the context that the given token is found to be terminating, or
|
6854
|
+
* returns PM_CONTEXT_NONE.
|
6855
|
+
*/
|
6856
|
+
static pm_context_t
|
6857
|
+
context_recoverable(const pm_parser_t *parser, pm_token_t *token) {
|
6551
6858
|
pm_context_node_t *context_node = parser->current_context;
|
6552
6859
|
|
6553
6860
|
while (context_node != NULL) {
|
6554
|
-
if (context_terminator(context_node->context, token)) return
|
6861
|
+
if (context_terminator(context_node->context, token)) return context_node->context;
|
6555
6862
|
context_node = context_node->prev;
|
6556
6863
|
}
|
6557
6864
|
|
6558
|
-
return
|
6865
|
+
return PM_CONTEXT_NONE;
|
6559
6866
|
}
|
6560
6867
|
|
6561
6868
|
static bool
|
@@ -6583,7 +6890,7 @@ context_pop(pm_parser_t *parser) {
|
|
6583
6890
|
}
|
6584
6891
|
|
6585
6892
|
static bool
|
6586
|
-
context_p(pm_parser_t *parser, pm_context_t context) {
|
6893
|
+
context_p(const pm_parser_t *parser, pm_context_t context) {
|
6587
6894
|
pm_context_node_t *context_node = parser->current_context;
|
6588
6895
|
|
6589
6896
|
while (context_node != NULL) {
|
@@ -6595,7 +6902,7 @@ context_p(pm_parser_t *parser, pm_context_t context) {
|
|
6595
6902
|
}
|
6596
6903
|
|
6597
6904
|
static bool
|
6598
|
-
context_def_p(pm_parser_t *parser) {
|
6905
|
+
context_def_p(const pm_parser_t *parser) {
|
6599
6906
|
pm_context_node_t *context_node = parser->current_context;
|
6600
6907
|
|
6601
6908
|
while (context_node != NULL) {
|
@@ -6618,6 +6925,55 @@ context_def_p(pm_parser_t *parser) {
|
|
6618
6925
|
return false;
|
6619
6926
|
}
|
6620
6927
|
|
6928
|
+
/**
|
6929
|
+
* Returns a human readable string for the given context, used in error
|
6930
|
+
* messages.
|
6931
|
+
*/
|
6932
|
+
static const char *
|
6933
|
+
context_human(pm_context_t context) {
|
6934
|
+
switch (context) {
|
6935
|
+
case PM_CONTEXT_NONE:
|
6936
|
+
assert(false && "unreachable");
|
6937
|
+
return "";
|
6938
|
+
case PM_CONTEXT_BEGIN: return "begin statement";
|
6939
|
+
case PM_CONTEXT_BLOCK_BRACES: return "'{'..'}' block";
|
6940
|
+
case PM_CONTEXT_BLOCK_KEYWORDS: return "'do'..'end' block";
|
6941
|
+
case PM_CONTEXT_CASE_WHEN: return "'when' clause";
|
6942
|
+
case PM_CONTEXT_CASE_IN: return "'in' clause";
|
6943
|
+
case PM_CONTEXT_CLASS: return "class definition";
|
6944
|
+
case PM_CONTEXT_DEF: return "method definition";
|
6945
|
+
case PM_CONTEXT_DEF_PARAMS: return "method parameters";
|
6946
|
+
case PM_CONTEXT_DEFAULT_PARAMS: return "parameter default value";
|
6947
|
+
case PM_CONTEXT_ELSE: return "'else' clause";
|
6948
|
+
case PM_CONTEXT_ELSIF: return "'elsif' clause";
|
6949
|
+
case PM_CONTEXT_EMBEXPR: return "embedded expression";
|
6950
|
+
case PM_CONTEXT_ENSURE: return "'ensure' clause";
|
6951
|
+
case PM_CONTEXT_ENSURE_DEF: return "'ensure' clause";
|
6952
|
+
case PM_CONTEXT_FOR: return "for loop";
|
6953
|
+
case PM_CONTEXT_FOR_INDEX: return "for loop index";
|
6954
|
+
case PM_CONTEXT_IF: return "if statement";
|
6955
|
+
case PM_CONTEXT_LAMBDA_BRACES: return "'{'..'}' lambda block";
|
6956
|
+
case PM_CONTEXT_LAMBDA_DO_END: return "'do'..'end' lambda block";
|
6957
|
+
case PM_CONTEXT_MAIN: return "top level context";
|
6958
|
+
case PM_CONTEXT_MODULE: return "module definition";
|
6959
|
+
case PM_CONTEXT_PARENS: return "parentheses";
|
6960
|
+
case PM_CONTEXT_POSTEXE: return "'END' block";
|
6961
|
+
case PM_CONTEXT_PREDICATE: return "predicate";
|
6962
|
+
case PM_CONTEXT_PREEXE: return "'BEGIN' block";
|
6963
|
+
case PM_CONTEXT_RESCUE_ELSE: return "'else' clause";
|
6964
|
+
case PM_CONTEXT_RESCUE_ELSE_DEF: return "'else' clause";
|
6965
|
+
case PM_CONTEXT_RESCUE: return "'rescue' clause";
|
6966
|
+
case PM_CONTEXT_RESCUE_DEF: return "'rescue' clause";
|
6967
|
+
case PM_CONTEXT_SCLASS: return "singleton class definition";
|
6968
|
+
case PM_CONTEXT_UNLESS: return "unless statement";
|
6969
|
+
case PM_CONTEXT_UNTIL: return "until statement";
|
6970
|
+
case PM_CONTEXT_WHILE: return "while statement";
|
6971
|
+
}
|
6972
|
+
|
6973
|
+
assert(false && "unreachable");
|
6974
|
+
return "";
|
6975
|
+
}
|
6976
|
+
|
6621
6977
|
/******************************************************************************/
|
6622
6978
|
/* Specific token lexers */
|
6623
6979
|
/******************************************************************************/
|
@@ -6843,7 +7199,7 @@ lex_numeric(pm_parser_t *parser) {
|
|
6843
7199
|
static pm_token_type_t
|
6844
7200
|
lex_global_variable(pm_parser_t *parser) {
|
6845
7201
|
if (parser->current.end >= parser->end) {
|
6846
|
-
|
7202
|
+
PM_PARSER_ERR_TOKEN_FORMAT_CONTENT(parser, parser->current, PM_ERR_INVALID_VARIABLE_GLOBAL);
|
6847
7203
|
return PM_TOKEN_GLOBAL_VARIABLE;
|
6848
7204
|
}
|
6849
7205
|
|
@@ -6884,7 +7240,7 @@ lex_global_variable(pm_parser_t *parser) {
|
|
6884
7240
|
} while (parser->current.end < parser->end && (width = char_is_identifier(parser, parser->current.end)) > 0);
|
6885
7241
|
|
6886
7242
|
// $0 isn't allowed to be followed by anything.
|
6887
|
-
|
7243
|
+
PM_PARSER_ERR_TOKEN_FORMAT_CONTENT(parser, parser->current, PM_ERR_INVALID_VARIABLE_GLOBAL);
|
6888
7244
|
}
|
6889
7245
|
|
6890
7246
|
return PM_TOKEN_GLOBAL_VARIABLE;
|
@@ -6915,7 +7271,7 @@ lex_global_variable(pm_parser_t *parser) {
|
|
6915
7271
|
} else {
|
6916
7272
|
// If we get here, then we have a $ followed by something that isn't
|
6917
7273
|
// recognized as a global variable.
|
6918
|
-
|
7274
|
+
PM_PARSER_ERR_TOKEN_FORMAT_CONTENT(parser, parser->current, PM_ERR_INVALID_VARIABLE_GLOBAL);
|
6919
7275
|
}
|
6920
7276
|
|
6921
7277
|
return PM_TOKEN_GLOBAL_VARIABLE;
|
@@ -7360,6 +7716,28 @@ escape_write_byte_encoded(pm_parser_t *parser, pm_buffer_t *buffer, uint8_t byte
|
|
7360
7716
|
pm_buffer_append_byte(buffer, byte);
|
7361
7717
|
}
|
7362
7718
|
|
7719
|
+
/**
|
7720
|
+
* Write each byte of the given escaped character into the buffer.
|
7721
|
+
*/
|
7722
|
+
static inline void
|
7723
|
+
escape_write_escape_encoded(pm_parser_t *parser, pm_buffer_t *buffer) {
|
7724
|
+
size_t width;
|
7725
|
+
if (parser->encoding_changed) {
|
7726
|
+
width = parser->encoding->char_width(parser->current.end, parser->end - parser->current.end);
|
7727
|
+
} else {
|
7728
|
+
width = pm_encoding_utf_8_char_width(parser->current.end, parser->end - parser->current.end);
|
7729
|
+
}
|
7730
|
+
|
7731
|
+
// TODO: If the character is invalid in the given encoding, then we'll just
|
7732
|
+
// push one byte into the buffer. This should actually be an error.
|
7733
|
+
width = (width == 0) ? 1 : width;
|
7734
|
+
|
7735
|
+
for (size_t index = 0; index < width; index++) {
|
7736
|
+
escape_write_byte_encoded(parser, buffer, *parser->current.end);
|
7737
|
+
parser->current.end++;
|
7738
|
+
}
|
7739
|
+
}
|
7740
|
+
|
7363
7741
|
/**
|
7364
7742
|
* The regular expression engine doesn't support the same escape sequences as
|
7365
7743
|
* Ruby does. So first we have to read the escape sequence, and then we have to
|
@@ -7698,7 +8076,7 @@ escape_read(pm_parser_t *parser, pm_buffer_t *buffer, uint8_t flags) {
|
|
7698
8076
|
/* fallthrough */
|
7699
8077
|
default: {
|
7700
8078
|
if (parser->current.end < parser->end) {
|
7701
|
-
|
8079
|
+
escape_write_escape_encoded(parser, buffer);
|
7702
8080
|
}
|
7703
8081
|
return;
|
7704
8082
|
}
|
@@ -7797,10 +8175,10 @@ lex_at_variable(pm_parser_t *parser) {
|
|
7797
8175
|
while (parser->current.end < parser->end && (width = char_is_identifier(parser, parser->current.end)) > 0) {
|
7798
8176
|
parser->current.end += width;
|
7799
8177
|
}
|
7800
|
-
} else if (type == PM_TOKEN_CLASS_VARIABLE) {
|
7801
|
-
pm_parser_err_current(parser, PM_ERR_INCOMPLETE_VARIABLE_CLASS);
|
7802
8178
|
} else {
|
7803
|
-
|
8179
|
+
pm_diagnostic_id_t diag_id = (type == PM_TOKEN_CLASS_VARIABLE) ? PM_ERR_INCOMPLETE_VARIABLE_CLASS : PM_ERR_INCOMPLETE_VARIABLE_INSTANCE;
|
8180
|
+
size_t width = parser->encoding->char_width(parser->current.end, parser->end - parser->current.end);
|
8181
|
+
PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, diag_id, (int) ((parser->current.end + width) - parser->current.start), (const char *) parser->current.start);
|
7804
8182
|
}
|
7805
8183
|
|
7806
8184
|
// If we're lexing an embedded variable, then we need to pop back into the
|
@@ -7975,14 +8353,43 @@ typedef struct {
|
|
7975
8353
|
* Push the given byte into the token buffer.
|
7976
8354
|
*/
|
7977
8355
|
static inline void
|
7978
|
-
|
8356
|
+
pm_token_buffer_push_byte(pm_token_buffer_t *token_buffer, uint8_t byte) {
|
7979
8357
|
pm_buffer_append_byte(&token_buffer->buffer, byte);
|
7980
8358
|
}
|
7981
8359
|
|
8360
|
+
/**
|
8361
|
+
* Append the given bytes into the token buffer.
|
8362
|
+
*/
|
8363
|
+
static inline void
|
8364
|
+
pm_token_buffer_push_bytes(pm_token_buffer_t *token_buffer, const uint8_t *bytes, size_t length) {
|
8365
|
+
pm_buffer_append_bytes(&token_buffer->buffer, bytes, length);
|
8366
|
+
}
|
8367
|
+
|
8368
|
+
/**
|
8369
|
+
* Push an escaped character into the token buffer.
|
8370
|
+
*/
|
8371
|
+
static inline void
|
8372
|
+
pm_token_buffer_push_escaped(pm_token_buffer_t *token_buffer, pm_parser_t *parser) {
|
8373
|
+
// First, determine the width of the character to be escaped.
|
8374
|
+
size_t width;
|
8375
|
+
if (parser->encoding_changed) {
|
8376
|
+
width = parser->encoding->char_width(parser->current.end, parser->end - parser->current.end);
|
8377
|
+
} else {
|
8378
|
+
width = pm_encoding_utf_8_char_width(parser->current.end, parser->end - parser->current.end);
|
8379
|
+
}
|
8380
|
+
|
8381
|
+
// TODO: If the character is invalid in the given encoding, then we'll just
|
8382
|
+
// push one byte into the buffer. This should actually be an error.
|
8383
|
+
width = (width == 0 ? 1 : width);
|
8384
|
+
|
8385
|
+
// Now, push the bytes into the buffer.
|
8386
|
+
pm_token_buffer_push_bytes(token_buffer, parser->current.end, width);
|
8387
|
+
parser->current.end += width;
|
8388
|
+
}
|
8389
|
+
|
7982
8390
|
/**
|
7983
8391
|
* When we're about to return from lexing the current token and we know for sure
|
7984
8392
|
* that we have found an escape sequence, this function is called to copy the
|
7985
|
-
*
|
7986
8393
|
* contents of the token buffer into the current string on the parser so that it
|
7987
8394
|
* can be attached to the correct node.
|
7988
8395
|
*/
|
@@ -7997,7 +8404,6 @@ pm_token_buffer_copy(pm_parser_t *parser, pm_token_buffer_t *token_buffer) {
|
|
7997
8404
|
* string. If we haven't pushed anything into the buffer, this means that we
|
7998
8405
|
* never found an escape sequence, so we can directly reference the bounds of
|
7999
8406
|
* the current string. Either way, at the return of this function it is expected
|
8000
|
-
*
|
8001
8407
|
* that parser->current_string is established in such a way that it can be
|
8002
8408
|
* attached to a node.
|
8003
8409
|
*/
|
@@ -8016,7 +8422,6 @@ pm_token_buffer_flush(pm_parser_t *parser, pm_token_buffer_t *token_buffer) {
|
|
8016
8422
|
* point into the buffer because we're about to provide a string that has
|
8017
8423
|
* different content than a direct slice of the source.
|
8018
8424
|
*
|
8019
|
-
*
|
8020
8425
|
* It is expected that the parser's current token end will be pointing at one
|
8021
8426
|
* byte past the backslash that starts the escape sequence.
|
8022
8427
|
*/
|
@@ -8070,6 +8475,34 @@ pm_heredoc_strspn_inline_whitespace(pm_parser_t *parser, const uint8_t **cursor,
|
|
8070
8475
|
return whitespace;
|
8071
8476
|
}
|
8072
8477
|
|
8478
|
+
/**
|
8479
|
+
* Lex past the delimiter of a percent literal. Handle newlines and heredocs
|
8480
|
+
* appropriately.
|
8481
|
+
*/
|
8482
|
+
static uint8_t
|
8483
|
+
pm_lex_percent_delimiter(pm_parser_t *parser) {
|
8484
|
+
size_t eol_length = match_eol(parser);
|
8485
|
+
|
8486
|
+
if (eol_length) {
|
8487
|
+
if (parser->heredoc_end) {
|
8488
|
+
// If we have already lexed a heredoc, then the newline has already
|
8489
|
+
// been added to the list. In this case we want to just flush the
|
8490
|
+
// heredoc end.
|
8491
|
+
parser_flush_heredoc_end(parser);
|
8492
|
+
} else {
|
8493
|
+
// Otherwise, we'll add the newline to the list of newlines.
|
8494
|
+
pm_newline_list_append(&parser->newline_list, parser->current.end + eol_length - 1);
|
8495
|
+
}
|
8496
|
+
|
8497
|
+
const uint8_t delimiter = *parser->current.end;
|
8498
|
+
parser->current.end += eol_length;
|
8499
|
+
|
8500
|
+
return delimiter;
|
8501
|
+
}
|
8502
|
+
|
8503
|
+
return *parser->current.end++;
|
8504
|
+
}
|
8505
|
+
|
8073
8506
|
/**
|
8074
8507
|
* This is a convenience macro that will set the current token type, call the
|
8075
8508
|
* lex callback, and then return from the parser_lex function.
|
@@ -8635,7 +9068,7 @@ parser_lex(pm_parser_t *parser) {
|
|
8635
9068
|
// this is not a valid heredoc declaration. In this case we
|
8636
9069
|
// will add an error, but we will still return a heredoc
|
8637
9070
|
// start.
|
8638
|
-
pm_parser_err_current(parser,
|
9071
|
+
pm_parser_err_current(parser, PM_ERR_HEREDOC_TERM);
|
8639
9072
|
body_start = parser->end;
|
8640
9073
|
} else {
|
8641
9074
|
// Otherwise, we want to indicate that the body of the
|
@@ -8826,12 +9259,10 @@ parser_lex(pm_parser_t *parser) {
|
|
8826
9259
|
LEX(PM_TOKEN_PLUS_EQUAL);
|
8827
9260
|
}
|
8828
9261
|
|
8829
|
-
|
8830
|
-
|
8831
|
-
pm_parser_warn_token(parser, &parser->current, PM_WARN_AMBIGUOUS_FIRST_ARGUMENT_PLUS)
|
8832
|
-
|
8833
|
-
|
8834
|
-
if (lex_state_beg_p(parser) || spcarg) {
|
9262
|
+
if (
|
9263
|
+
lex_state_beg_p(parser) ||
|
9264
|
+
(lex_state_spcarg_p(parser, space_seen) ? (pm_parser_warn_token(parser, &parser->current, PM_WARN_AMBIGUOUS_FIRST_ARGUMENT_PLUS), true) : false)
|
9265
|
+
) {
|
8835
9266
|
lex_state_set(parser, PM_LEX_STATE_BEG);
|
8836
9267
|
|
8837
9268
|
if (pm_char_is_decimal_digit(peek(parser))) {
|
@@ -8871,11 +9302,12 @@ parser_lex(pm_parser_t *parser) {
|
|
8871
9302
|
}
|
8872
9303
|
|
8873
9304
|
bool spcarg = lex_state_spcarg_p(parser, space_seen);
|
8874
|
-
|
9305
|
+
bool is_beg = lex_state_beg_p(parser);
|
9306
|
+
if (!is_beg && spcarg) {
|
8875
9307
|
pm_parser_warn_token(parser, &parser->current, PM_WARN_AMBIGUOUS_FIRST_ARGUMENT_MINUS);
|
8876
9308
|
}
|
8877
9309
|
|
8878
|
-
if (
|
9310
|
+
if (is_beg || spcarg) {
|
8879
9311
|
lex_state_set(parser, PM_LEX_STATE_BEG);
|
8880
9312
|
LEX(pm_char_is_decimal_digit(peek(parser)) ? PM_TOKEN_UMINUS_NUM : PM_TOKEN_UMINUS);
|
8881
9313
|
}
|
@@ -9026,15 +9458,8 @@ parser_lex(pm_parser_t *parser) {
|
|
9026
9458
|
pm_parser_err_current(parser, PM_ERR_INVALID_PERCENT);
|
9027
9459
|
}
|
9028
9460
|
|
9029
|
-
|
9030
|
-
|
9031
|
-
size_t eol_length = match_eol(parser);
|
9032
|
-
if (eol_length) {
|
9033
|
-
parser->current.end += eol_length;
|
9034
|
-
pm_newline_list_append(&parser->newline_list, parser->current.end - 1);
|
9035
|
-
} else {
|
9036
|
-
parser->current.end++;
|
9037
|
-
}
|
9461
|
+
const uint8_t delimiter = pm_lex_percent_delimiter(parser);
|
9462
|
+
lex_mode_push_string(parser, true, false, lex_mode_incrementor(delimiter), lex_mode_terminator(delimiter));
|
9038
9463
|
|
9039
9464
|
if (parser->current.end < parser->end) {
|
9040
9465
|
LEX(PM_TOKEN_STRING_BEGIN);
|
@@ -9054,7 +9479,7 @@ parser_lex(pm_parser_t *parser) {
|
|
9054
9479
|
parser->current.end++;
|
9055
9480
|
|
9056
9481
|
if (parser->current.end < parser->end) {
|
9057
|
-
lex_mode_push_list(parser, false,
|
9482
|
+
lex_mode_push_list(parser, false, pm_lex_percent_delimiter(parser));
|
9058
9483
|
} else {
|
9059
9484
|
lex_mode_push_list_eof(parser);
|
9060
9485
|
}
|
@@ -9065,7 +9490,7 @@ parser_lex(pm_parser_t *parser) {
|
|
9065
9490
|
parser->current.end++;
|
9066
9491
|
|
9067
9492
|
if (parser->current.end < parser->end) {
|
9068
|
-
lex_mode_push_list(parser, true,
|
9493
|
+
lex_mode_push_list(parser, true, pm_lex_percent_delimiter(parser));
|
9069
9494
|
} else {
|
9070
9495
|
lex_mode_push_list_eof(parser);
|
9071
9496
|
}
|
@@ -9076,9 +9501,8 @@ parser_lex(pm_parser_t *parser) {
|
|
9076
9501
|
parser->current.end++;
|
9077
9502
|
|
9078
9503
|
if (parser->current.end < parser->end) {
|
9079
|
-
|
9080
|
-
|
9081
|
-
parser->current.end++;
|
9504
|
+
const uint8_t delimiter = pm_lex_percent_delimiter(parser);
|
9505
|
+
lex_mode_push_regexp(parser, lex_mode_incrementor(delimiter), lex_mode_terminator(delimiter));
|
9082
9506
|
} else {
|
9083
9507
|
lex_mode_push_regexp(parser, '\0', '\0');
|
9084
9508
|
}
|
@@ -9089,9 +9513,8 @@ parser_lex(pm_parser_t *parser) {
|
|
9089
9513
|
parser->current.end++;
|
9090
9514
|
|
9091
9515
|
if (parser->current.end < parser->end) {
|
9092
|
-
|
9093
|
-
|
9094
|
-
parser->current.end++;
|
9516
|
+
const uint8_t delimiter = pm_lex_percent_delimiter(parser);
|
9517
|
+
lex_mode_push_string(parser, false, false, lex_mode_incrementor(delimiter), lex_mode_terminator(delimiter));
|
9095
9518
|
} else {
|
9096
9519
|
lex_mode_push_string_eof(parser);
|
9097
9520
|
}
|
@@ -9102,9 +9525,8 @@ parser_lex(pm_parser_t *parser) {
|
|
9102
9525
|
parser->current.end++;
|
9103
9526
|
|
9104
9527
|
if (parser->current.end < parser->end) {
|
9105
|
-
|
9106
|
-
|
9107
|
-
parser->current.end++;
|
9528
|
+
const uint8_t delimiter = pm_lex_percent_delimiter(parser);
|
9529
|
+
lex_mode_push_string(parser, true, false, lex_mode_incrementor(delimiter), lex_mode_terminator(delimiter));
|
9108
9530
|
} else {
|
9109
9531
|
lex_mode_push_string_eof(parser);
|
9110
9532
|
}
|
@@ -9115,9 +9537,9 @@ parser_lex(pm_parser_t *parser) {
|
|
9115
9537
|
parser->current.end++;
|
9116
9538
|
|
9117
9539
|
if (parser->current.end < parser->end) {
|
9118
|
-
|
9540
|
+
const uint8_t delimiter = pm_lex_percent_delimiter(parser);
|
9541
|
+
lex_mode_push_string(parser, false, false, lex_mode_incrementor(delimiter), lex_mode_terminator(delimiter));
|
9119
9542
|
lex_state_set(parser, PM_LEX_STATE_FNAME | PM_LEX_STATE_FITEM);
|
9120
|
-
parser->current.end++;
|
9121
9543
|
} else {
|
9122
9544
|
lex_mode_push_string_eof(parser);
|
9123
9545
|
}
|
@@ -9128,7 +9550,7 @@ parser_lex(pm_parser_t *parser) {
|
|
9128
9550
|
parser->current.end++;
|
9129
9551
|
|
9130
9552
|
if (parser->current.end < parser->end) {
|
9131
|
-
lex_mode_push_list(parser, false,
|
9553
|
+
lex_mode_push_list(parser, false, pm_lex_percent_delimiter(parser));
|
9132
9554
|
} else {
|
9133
9555
|
lex_mode_push_list_eof(parser);
|
9134
9556
|
}
|
@@ -9139,7 +9561,7 @@ parser_lex(pm_parser_t *parser) {
|
|
9139
9561
|
parser->current.end++;
|
9140
9562
|
|
9141
9563
|
if (parser->current.end < parser->end) {
|
9142
|
-
lex_mode_push_list(parser, true,
|
9564
|
+
lex_mode_push_list(parser, true, pm_lex_percent_delimiter(parser));
|
9143
9565
|
} else {
|
9144
9566
|
lex_mode_push_list_eof(parser);
|
9145
9567
|
}
|
@@ -9150,8 +9572,8 @@ parser_lex(pm_parser_t *parser) {
|
|
9150
9572
|
parser->current.end++;
|
9151
9573
|
|
9152
9574
|
if (parser->current.end < parser->end) {
|
9153
|
-
|
9154
|
-
parser
|
9575
|
+
const uint8_t delimiter = pm_lex_percent_delimiter(parser);
|
9576
|
+
lex_mode_push_string(parser, true, false, lex_mode_incrementor(delimiter), lex_mode_terminator(delimiter));
|
9155
9577
|
} else {
|
9156
9578
|
lex_mode_push_string_eof(parser);
|
9157
9579
|
}
|
@@ -9195,11 +9617,21 @@ parser_lex(pm_parser_t *parser) {
|
|
9195
9617
|
if (*parser->current.start != '_') {
|
9196
9618
|
size_t width = char_is_identifier_start(parser, parser->current.start);
|
9197
9619
|
|
9198
|
-
// If this isn't the beginning of an identifier, then
|
9199
|
-
// token as we've exhausted all of the
|
9200
|
-
// it and return the next
|
9620
|
+
// If this isn't the beginning of an identifier, then
|
9621
|
+
// it's an invalid token as we've exhausted all of the
|
9622
|
+
// other options. We'll skip past it and return the next
|
9623
|
+
// token after adding an appropriate error message.
|
9201
9624
|
if (!width) {
|
9202
|
-
|
9625
|
+
pm_diagnostic_id_t diag_id;
|
9626
|
+
if (*parser->current.start >= 0x80) {
|
9627
|
+
diag_id = PM_ERR_INVALID_MULTIBYTE_CHARACTER;
|
9628
|
+
} else if (char_is_ascii_printable(*parser->current.start) || (*parser->current.start == '\\')) {
|
9629
|
+
diag_id = PM_ERR_INVALID_PRINTABLE_CHARACTER;
|
9630
|
+
} else {
|
9631
|
+
diag_id = PM_ERR_INVALID_CHARACTER;
|
9632
|
+
}
|
9633
|
+
|
9634
|
+
PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, diag_id, *parser->current.start);
|
9203
9635
|
goto lex_next_token;
|
9204
9636
|
}
|
9205
9637
|
|
@@ -9306,7 +9738,7 @@ parser_lex(pm_parser_t *parser) {
|
|
9306
9738
|
// and then find the first one.
|
9307
9739
|
pm_lex_mode_t *lex_mode = parser->lex_modes.current;
|
9308
9740
|
const uint8_t *breakpoints = lex_mode->as.list.breakpoints;
|
9309
|
-
const uint8_t *breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end);
|
9741
|
+
const uint8_t *breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
|
9310
9742
|
|
9311
9743
|
// If we haven't found an escape yet, then this buffer will be
|
9312
9744
|
// unallocated since we can refer directly to the source string.
|
@@ -9315,7 +9747,7 @@ parser_lex(pm_parser_t *parser) {
|
|
9315
9747
|
while (breakpoint != NULL) {
|
9316
9748
|
// If we hit a null byte, skip directly past it.
|
9317
9749
|
if (*breakpoint == '\0') {
|
9318
|
-
breakpoint = pm_strpbrk(parser, breakpoint + 1, breakpoints, parser->end - (breakpoint + 1));
|
9750
|
+
breakpoint = pm_strpbrk(parser, breakpoint + 1, breakpoints, parser->end - (breakpoint + 1), true);
|
9319
9751
|
continue;
|
9320
9752
|
}
|
9321
9753
|
|
@@ -9334,7 +9766,7 @@ parser_lex(pm_parser_t *parser) {
|
|
9334
9766
|
// we need to continue on past it.
|
9335
9767
|
if (lex_mode->as.list.nesting > 0) {
|
9336
9768
|
parser->current.end = breakpoint + 1;
|
9337
|
-
breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end);
|
9769
|
+
breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
|
9338
9770
|
lex_mode->as.list.nesting--;
|
9339
9771
|
continue;
|
9340
9772
|
}
|
@@ -9377,18 +9809,18 @@ parser_lex(pm_parser_t *parser) {
|
|
9377
9809
|
case '\t':
|
9378
9810
|
case '\v':
|
9379
9811
|
case '\\':
|
9380
|
-
|
9812
|
+
pm_token_buffer_push_byte(&token_buffer, peeked);
|
9381
9813
|
parser->current.end++;
|
9382
9814
|
break;
|
9383
9815
|
case '\r':
|
9384
9816
|
parser->current.end++;
|
9385
9817
|
if (peek(parser) != '\n') {
|
9386
|
-
|
9818
|
+
pm_token_buffer_push_byte(&token_buffer, '\r');
|
9387
9819
|
break;
|
9388
9820
|
}
|
9389
9821
|
/* fallthrough */
|
9390
9822
|
case '\n':
|
9391
|
-
|
9823
|
+
pm_token_buffer_push_byte(&token_buffer, '\n');
|
9392
9824
|
|
9393
9825
|
if (parser->heredoc_end) {
|
9394
9826
|
// ... if we are on the same line as a heredoc,
|
@@ -9406,21 +9838,20 @@ parser_lex(pm_parser_t *parser) {
|
|
9406
9838
|
break;
|
9407
9839
|
default:
|
9408
9840
|
if (peeked == lex_mode->as.list.incrementor || peeked == lex_mode->as.list.terminator) {
|
9409
|
-
|
9841
|
+
pm_token_buffer_push_byte(&token_buffer, peeked);
|
9410
9842
|
parser->current.end++;
|
9411
9843
|
} else if (lex_mode->as.list.interpolation) {
|
9412
9844
|
escape_read(parser, &token_buffer.buffer, PM_ESCAPE_FLAG_NONE);
|
9413
9845
|
} else {
|
9414
|
-
|
9415
|
-
|
9416
|
-
parser->current.end++;
|
9846
|
+
pm_token_buffer_push_byte(&token_buffer, '\\');
|
9847
|
+
pm_token_buffer_push_escaped(&token_buffer, parser);
|
9417
9848
|
}
|
9418
9849
|
|
9419
9850
|
break;
|
9420
9851
|
}
|
9421
9852
|
|
9422
9853
|
token_buffer.cursor = parser->current.end;
|
9423
|
-
breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end);
|
9854
|
+
breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
|
9424
9855
|
continue;
|
9425
9856
|
}
|
9426
9857
|
|
@@ -9433,7 +9864,7 @@ parser_lex(pm_parser_t *parser) {
|
|
9433
9864
|
// that looked like an interpolated class or instance variable
|
9434
9865
|
// like "#@" but wasn't actually. In this case we'll just skip
|
9435
9866
|
// to the next breakpoint.
|
9436
|
-
breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end);
|
9867
|
+
breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
|
9437
9868
|
continue;
|
9438
9869
|
}
|
9439
9870
|
|
@@ -9448,7 +9879,7 @@ parser_lex(pm_parser_t *parser) {
|
|
9448
9879
|
// and find the next breakpoint.
|
9449
9880
|
assert(*breakpoint == lex_mode->as.list.incrementor);
|
9450
9881
|
parser->current.end = breakpoint + 1;
|
9451
|
-
breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end);
|
9882
|
+
breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
|
9452
9883
|
lex_mode->as.list.nesting++;
|
9453
9884
|
continue;
|
9454
9885
|
}
|
@@ -9487,14 +9918,14 @@ parser_lex(pm_parser_t *parser) {
|
|
9487
9918
|
// regular expression. We'll use strpbrk to find the first of these
|
9488
9919
|
// characters.
|
9489
9920
|
const uint8_t *breakpoints = lex_mode->as.regexp.breakpoints;
|
9490
|
-
const uint8_t *breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end);
|
9921
|
+
const uint8_t *breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, false);
|
9491
9922
|
pm_token_buffer_t token_buffer = { { 0 }, 0 };
|
9492
9923
|
|
9493
9924
|
while (breakpoint != NULL) {
|
9494
9925
|
// If we hit a null byte, skip directly past it.
|
9495
9926
|
if (*breakpoint == '\0') {
|
9496
9927
|
parser->current.end = breakpoint + 1;
|
9497
|
-
breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end);
|
9928
|
+
breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, false);
|
9498
9929
|
continue;
|
9499
9930
|
}
|
9500
9931
|
|
@@ -9516,7 +9947,7 @@ parser_lex(pm_parser_t *parser) {
|
|
9516
9947
|
// If the terminator is not a newline, then we can set
|
9517
9948
|
// the next breakpoint and continue.
|
9518
9949
|
parser->current.end = breakpoint + 1;
|
9519
|
-
breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end);
|
9950
|
+
breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, false);
|
9520
9951
|
continue;
|
9521
9952
|
}
|
9522
9953
|
}
|
@@ -9526,7 +9957,7 @@ parser_lex(pm_parser_t *parser) {
|
|
9526
9957
|
if (*breakpoint == lex_mode->as.regexp.terminator) {
|
9527
9958
|
if (lex_mode->as.regexp.nesting > 0) {
|
9528
9959
|
parser->current.end = breakpoint + 1;
|
9529
|
-
breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end);
|
9960
|
+
breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, false);
|
9530
9961
|
lex_mode->as.regexp.nesting--;
|
9531
9962
|
continue;
|
9532
9963
|
}
|
@@ -9571,9 +10002,9 @@ parser_lex(pm_parser_t *parser) {
|
|
9571
10002
|
parser->current.end++;
|
9572
10003
|
if (peek(parser) != '\n') {
|
9573
10004
|
if (lex_mode->as.regexp.terminator != '\r') {
|
9574
|
-
|
10005
|
+
pm_token_buffer_push_byte(&token_buffer, '\\');
|
9575
10006
|
}
|
9576
|
-
|
10007
|
+
pm_token_buffer_push_byte(&token_buffer, '\r');
|
9577
10008
|
break;
|
9578
10009
|
}
|
9579
10010
|
/* fallthrough */
|
@@ -9608,25 +10039,24 @@ parser_lex(pm_parser_t *parser) {
|
|
9608
10039
|
case '$': case ')': case '*': case '+':
|
9609
10040
|
case '.': case '>': case '?': case ']':
|
9610
10041
|
case '^': case '|': case '}':
|
9611
|
-
|
10042
|
+
pm_token_buffer_push_byte(&token_buffer, '\\');
|
9612
10043
|
break;
|
9613
10044
|
default:
|
9614
10045
|
break;
|
9615
10046
|
}
|
9616
10047
|
|
9617
|
-
|
10048
|
+
pm_token_buffer_push_byte(&token_buffer, peeked);
|
9618
10049
|
parser->current.end++;
|
9619
10050
|
break;
|
9620
10051
|
}
|
9621
10052
|
|
9622
|
-
if (peeked < 0x80)
|
9623
|
-
|
9624
|
-
parser->current.end++;
|
10053
|
+
if (peeked < 0x80) pm_token_buffer_push_byte(&token_buffer, '\\');
|
10054
|
+
pm_token_buffer_push_escaped(&token_buffer, parser);
|
9625
10055
|
break;
|
9626
10056
|
}
|
9627
10057
|
|
9628
10058
|
token_buffer.cursor = parser->current.end;
|
9629
|
-
breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end);
|
10059
|
+
breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, false);
|
9630
10060
|
continue;
|
9631
10061
|
}
|
9632
10062
|
|
@@ -9639,7 +10069,7 @@ parser_lex(pm_parser_t *parser) {
|
|
9639
10069
|
// something that looked like an interpolated class or
|
9640
10070
|
// instance variable like "#@" but wasn't actually. In
|
9641
10071
|
// this case we'll just skip to the next breakpoint.
|
9642
|
-
breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end);
|
10072
|
+
breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, false);
|
9643
10073
|
continue;
|
9644
10074
|
}
|
9645
10075
|
|
@@ -9654,7 +10084,7 @@ parser_lex(pm_parser_t *parser) {
|
|
9654
10084
|
// and find the next breakpoint.
|
9655
10085
|
assert(*breakpoint == lex_mode->as.regexp.incrementor);
|
9656
10086
|
parser->current.end = breakpoint + 1;
|
9657
|
-
breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end);
|
10087
|
+
breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, false);
|
9658
10088
|
lex_mode->as.regexp.nesting++;
|
9659
10089
|
continue;
|
9660
10090
|
}
|
@@ -9690,7 +10120,7 @@ parser_lex(pm_parser_t *parser) {
|
|
9690
10120
|
// string. We'll use strpbrk to find the first of these characters.
|
9691
10121
|
pm_lex_mode_t *lex_mode = parser->lex_modes.current;
|
9692
10122
|
const uint8_t *breakpoints = lex_mode->as.string.breakpoints;
|
9693
|
-
const uint8_t *breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end);
|
10123
|
+
const uint8_t *breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
|
9694
10124
|
|
9695
10125
|
// If we haven't found an escape yet, then this buffer will be
|
9696
10126
|
// unallocated since we can refer directly to the source string.
|
@@ -9702,7 +10132,7 @@ parser_lex(pm_parser_t *parser) {
|
|
9702
10132
|
if (lex_mode->as.string.incrementor != '\0' && *breakpoint == lex_mode->as.string.incrementor) {
|
9703
10133
|
lex_mode->as.string.nesting++;
|
9704
10134
|
parser->current.end = breakpoint + 1;
|
9705
|
-
breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end);
|
10135
|
+
breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
|
9706
10136
|
continue;
|
9707
10137
|
}
|
9708
10138
|
|
@@ -9714,7 +10144,7 @@ parser_lex(pm_parser_t *parser) {
|
|
9714
10144
|
// to continue on past it.
|
9715
10145
|
if (lex_mode->as.string.nesting > 0) {
|
9716
10146
|
parser->current.end = breakpoint + 1;
|
9717
|
-
breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end);
|
10147
|
+
breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
|
9718
10148
|
lex_mode->as.string.nesting--;
|
9719
10149
|
continue;
|
9720
10150
|
}
|
@@ -9756,7 +10186,7 @@ parser_lex(pm_parser_t *parser) {
|
|
9756
10186
|
if (parser->heredoc_end == NULL) {
|
9757
10187
|
pm_newline_list_append(&parser->newline_list, breakpoint);
|
9758
10188
|
parser->current.end = breakpoint + 1;
|
9759
|
-
breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end);
|
10189
|
+
breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
|
9760
10190
|
continue;
|
9761
10191
|
} else {
|
9762
10192
|
parser->current.end = breakpoint + 1;
|
@@ -9770,7 +10200,7 @@ parser_lex(pm_parser_t *parser) {
|
|
9770
10200
|
case '\0':
|
9771
10201
|
// Skip directly past the null character.
|
9772
10202
|
parser->current.end = breakpoint + 1;
|
9773
|
-
breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end);
|
10203
|
+
breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
|
9774
10204
|
break;
|
9775
10205
|
case '\\': {
|
9776
10206
|
// Here we hit escapes.
|
@@ -9788,23 +10218,23 @@ parser_lex(pm_parser_t *parser) {
|
|
9788
10218
|
|
9789
10219
|
switch (peeked) {
|
9790
10220
|
case '\\':
|
9791
|
-
|
10221
|
+
pm_token_buffer_push_byte(&token_buffer, '\\');
|
9792
10222
|
parser->current.end++;
|
9793
10223
|
break;
|
9794
10224
|
case '\r':
|
9795
10225
|
parser->current.end++;
|
9796
10226
|
if (peek(parser) != '\n') {
|
9797
10227
|
if (!lex_mode->as.string.interpolation) {
|
9798
|
-
|
10228
|
+
pm_token_buffer_push_byte(&token_buffer, '\\');
|
9799
10229
|
}
|
9800
|
-
|
10230
|
+
pm_token_buffer_push_byte(&token_buffer, '\r');
|
9801
10231
|
break;
|
9802
10232
|
}
|
9803
10233
|
/* fallthrough */
|
9804
10234
|
case '\n':
|
9805
10235
|
if (!lex_mode->as.string.interpolation) {
|
9806
|
-
|
9807
|
-
|
10236
|
+
pm_token_buffer_push_byte(&token_buffer, '\\');
|
10237
|
+
pm_token_buffer_push_byte(&token_buffer, '\n');
|
9808
10238
|
}
|
9809
10239
|
|
9810
10240
|
if (parser->heredoc_end) {
|
@@ -9823,24 +10253,23 @@ parser_lex(pm_parser_t *parser) {
|
|
9823
10253
|
break;
|
9824
10254
|
default:
|
9825
10255
|
if (lex_mode->as.string.incrementor != '\0' && peeked == lex_mode->as.string.incrementor) {
|
9826
|
-
|
10256
|
+
pm_token_buffer_push_byte(&token_buffer, peeked);
|
9827
10257
|
parser->current.end++;
|
9828
10258
|
} else if (lex_mode->as.string.terminator != '\0' && peeked == lex_mode->as.string.terminator) {
|
9829
|
-
|
10259
|
+
pm_token_buffer_push_byte(&token_buffer, peeked);
|
9830
10260
|
parser->current.end++;
|
9831
10261
|
} else if (lex_mode->as.string.interpolation) {
|
9832
10262
|
escape_read(parser, &token_buffer.buffer, PM_ESCAPE_FLAG_NONE);
|
9833
10263
|
} else {
|
9834
|
-
|
9835
|
-
|
9836
|
-
parser->current.end++;
|
10264
|
+
pm_token_buffer_push_byte(&token_buffer, '\\');
|
10265
|
+
pm_token_buffer_push_escaped(&token_buffer, parser);
|
9837
10266
|
}
|
9838
10267
|
|
9839
10268
|
break;
|
9840
10269
|
}
|
9841
10270
|
|
9842
10271
|
token_buffer.cursor = parser->current.end;
|
9843
|
-
breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end);
|
10272
|
+
breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
|
9844
10273
|
break;
|
9845
10274
|
}
|
9846
10275
|
case '#': {
|
@@ -9851,7 +10280,7 @@ parser_lex(pm_parser_t *parser) {
|
|
9851
10280
|
// looked like an interpolated class or instance variable like "#@"
|
9852
10281
|
// but wasn't actually. In this case we'll just skip to the next
|
9853
10282
|
// breakpoint.
|
9854
|
-
breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end);
|
10283
|
+
breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
|
9855
10284
|
break;
|
9856
10285
|
}
|
9857
10286
|
|
@@ -9888,15 +10317,22 @@ parser_lex(pm_parser_t *parser) {
|
|
9888
10317
|
parser->next_start = NULL;
|
9889
10318
|
}
|
9890
10319
|
|
9891
|
-
//
|
9892
|
-
//
|
10320
|
+
// Now let's grab the information about the identifier off of the
|
10321
|
+
// current lex mode.
|
10322
|
+
pm_lex_mode_t *lex_mode = parser->lex_modes.current;
|
10323
|
+
|
10324
|
+
// We'll check if we're at the end of the file. If we are, then we
|
10325
|
+
// will add an error (because we weren't able to find the
|
10326
|
+
// terminator) but still continue parsing so that content after the
|
10327
|
+
// declaration of the heredoc can be parsed.
|
9893
10328
|
if (parser->current.end >= parser->end) {
|
9894
|
-
|
10329
|
+
pm_parser_err_current(parser, PM_ERR_HEREDOC_TERM);
|
10330
|
+
parser->next_start = lex_mode->as.heredoc.next_start;
|
10331
|
+
parser->heredoc_end = parser->current.end;
|
10332
|
+
lex_state_set(parser, PM_LEX_STATE_END);
|
10333
|
+
LEX(PM_TOKEN_HEREDOC_END);
|
9895
10334
|
}
|
9896
10335
|
|
9897
|
-
// Now let's grab the information about the identifier off of the current
|
9898
|
-
// lex mode.
|
9899
|
-
pm_lex_mode_t *lex_mode = parser->lex_modes.current;
|
9900
10336
|
const uint8_t *ident_start = lex_mode->as.heredoc.ident_start;
|
9901
10337
|
size_t ident_length = lex_mode->as.heredoc.ident_length;
|
9902
10338
|
|
@@ -9972,7 +10408,7 @@ parser_lex(pm_parser_t *parser) {
|
|
9972
10408
|
breakpoints[2] = '\0';
|
9973
10409
|
}
|
9974
10410
|
|
9975
|
-
const uint8_t *breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end);
|
10411
|
+
const uint8_t *breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
|
9976
10412
|
pm_token_buffer_t token_buffer = { { 0 }, 0 };
|
9977
10413
|
bool was_escaped_newline = false;
|
9978
10414
|
|
@@ -9981,7 +10417,7 @@ parser_lex(pm_parser_t *parser) {
|
|
9981
10417
|
case '\0':
|
9982
10418
|
// Skip directly past the null character.
|
9983
10419
|
parser->current.end = breakpoint + 1;
|
9984
|
-
breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end);
|
10420
|
+
breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
|
9985
10421
|
break;
|
9986
10422
|
case '\n': {
|
9987
10423
|
if (parser->heredoc_end != NULL && (parser->heredoc_end > breakpoint)) {
|
@@ -10056,7 +10492,7 @@ parser_lex(pm_parser_t *parser) {
|
|
10056
10492
|
// Otherwise we hit a newline and it wasn't followed by
|
10057
10493
|
// a terminator, so we can continue parsing.
|
10058
10494
|
parser->current.end = breakpoint + 1;
|
10059
|
-
breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end);
|
10495
|
+
breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
|
10060
10496
|
break;
|
10061
10497
|
}
|
10062
10498
|
case '\\': {
|
@@ -10083,21 +10519,20 @@ parser_lex(pm_parser_t *parser) {
|
|
10083
10519
|
case '\r':
|
10084
10520
|
parser->current.end++;
|
10085
10521
|
if (peek(parser) != '\n') {
|
10086
|
-
|
10087
|
-
|
10522
|
+
pm_token_buffer_push_byte(&token_buffer, '\\');
|
10523
|
+
pm_token_buffer_push_byte(&token_buffer, '\r');
|
10088
10524
|
break;
|
10089
10525
|
}
|
10090
10526
|
/* fallthrough */
|
10091
10527
|
case '\n':
|
10092
|
-
|
10093
|
-
|
10528
|
+
pm_token_buffer_push_byte(&token_buffer, '\\');
|
10529
|
+
pm_token_buffer_push_byte(&token_buffer, '\n');
|
10094
10530
|
token_buffer.cursor = parser->current.end + 1;
|
10095
10531
|
breakpoint = parser->current.end;
|
10096
10532
|
continue;
|
10097
10533
|
default:
|
10098
|
-
|
10099
|
-
|
10100
|
-
pm_token_buffer_push(&token_buffer, peeked);
|
10534
|
+
pm_token_buffer_push_byte(&token_buffer, '\\');
|
10535
|
+
pm_token_buffer_push_escaped(&token_buffer, parser);
|
10101
10536
|
break;
|
10102
10537
|
}
|
10103
10538
|
} else {
|
@@ -10105,7 +10540,7 @@ parser_lex(pm_parser_t *parser) {
|
|
10105
10540
|
case '\r':
|
10106
10541
|
parser->current.end++;
|
10107
10542
|
if (peek(parser) != '\n') {
|
10108
|
-
|
10543
|
+
pm_token_buffer_push_byte(&token_buffer, '\r');
|
10109
10544
|
break;
|
10110
10545
|
}
|
10111
10546
|
/* fallthrough */
|
@@ -10121,7 +10556,7 @@ parser_lex(pm_parser_t *parser) {
|
|
10121
10556
|
}
|
10122
10557
|
|
10123
10558
|
token_buffer.cursor = parser->current.end;
|
10124
|
-
breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end);
|
10559
|
+
breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
|
10125
10560
|
break;
|
10126
10561
|
}
|
10127
10562
|
case '#': {
|
@@ -10133,7 +10568,7 @@ parser_lex(pm_parser_t *parser) {
|
|
10133
10568
|
// or instance variable like "#@" but wasn't
|
10134
10569
|
// actually. In this case we'll just skip to the
|
10135
10570
|
// next breakpoint.
|
10136
|
-
breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end);
|
10571
|
+
breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
|
10137
10572
|
break;
|
10138
10573
|
}
|
10139
10574
|
|
@@ -10184,8 +10619,8 @@ parser_lex(pm_parser_t *parser) {
|
|
10184
10619
|
typedef enum {
|
10185
10620
|
PM_BINDING_POWER_UNSET = 0, // used to indicate this token cannot be used as an infix operator
|
10186
10621
|
PM_BINDING_POWER_STATEMENT = 2,
|
10187
|
-
|
10188
|
-
|
10622
|
+
PM_BINDING_POWER_MODIFIER_RESCUE = 4, // rescue
|
10623
|
+
PM_BINDING_POWER_MODIFIER = 6, // if unless until while
|
10189
10624
|
PM_BINDING_POWER_COMPOSITION = 8, // and or
|
10190
10625
|
PM_BINDING_POWER_NOT = 10, // not
|
10191
10626
|
PM_BINDING_POWER_MATCH = 12, // => in
|
@@ -10239,15 +10674,15 @@ typedef struct {
|
|
10239
10674
|
#define RIGHT_ASSOCIATIVE_UNARY(precedence) { precedence, precedence, false, false }
|
10240
10675
|
|
10241
10676
|
pm_binding_powers_t pm_binding_powers[PM_TOKEN_MAXIMUM] = {
|
10677
|
+
// rescue
|
10678
|
+
[PM_TOKEN_KEYWORD_RESCUE_MODIFIER] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_MODIFIER_RESCUE),
|
10679
|
+
|
10242
10680
|
// if unless until while
|
10243
10681
|
[PM_TOKEN_KEYWORD_IF_MODIFIER] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_MODIFIER),
|
10244
10682
|
[PM_TOKEN_KEYWORD_UNLESS_MODIFIER] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_MODIFIER),
|
10245
10683
|
[PM_TOKEN_KEYWORD_UNTIL_MODIFIER] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_MODIFIER),
|
10246
10684
|
[PM_TOKEN_KEYWORD_WHILE_MODIFIER] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_MODIFIER),
|
10247
10685
|
|
10248
|
-
// rescue
|
10249
|
-
[PM_TOKEN_KEYWORD_RESCUE_MODIFIER] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_MODIFIER_RESCUE),
|
10250
|
-
|
10251
10686
|
// and or
|
10252
10687
|
[PM_TOKEN_KEYWORD_AND] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_COMPOSITION),
|
10253
10688
|
[PM_TOKEN_KEYWORD_OR] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_COMPOSITION),
|
@@ -10381,14 +10816,6 @@ match4(const pm_parser_t *parser, pm_token_type_t type1, pm_token_type_t type2,
|
|
10381
10816
|
return match1(parser, type1) || match1(parser, type2) || match1(parser, type3) || match1(parser, type4);
|
10382
10817
|
}
|
10383
10818
|
|
10384
|
-
/**
|
10385
|
-
* Returns true if the current token is any of the five given types.
|
10386
|
-
*/
|
10387
|
-
static inline bool
|
10388
|
-
match5(const pm_parser_t *parser, pm_token_type_t type1, pm_token_type_t type2, pm_token_type_t type3, pm_token_type_t type4, pm_token_type_t type5) {
|
10389
|
-
return match1(parser, type1) || match1(parser, type2) || match1(parser, type3) || match1(parser, type4) || match1(parser, type5);
|
10390
|
-
}
|
10391
|
-
|
10392
10819
|
/**
|
10393
10820
|
* Returns true if the current token is any of the six given types.
|
10394
10821
|
*/
|
@@ -10654,7 +11081,7 @@ parse_target(pm_parser_t *parser, pm_node_t *target) {
|
|
10654
11081
|
return target;
|
10655
11082
|
case PM_BACK_REFERENCE_READ_NODE:
|
10656
11083
|
case PM_NUMBERED_REFERENCE_READ_NODE:
|
10657
|
-
|
11084
|
+
PM_PARSER_ERR_NODE_FORMAT_CONTENT(parser, target, PM_ERR_WRITE_TARGET_READONLY);
|
10658
11085
|
return target;
|
10659
11086
|
case PM_GLOBAL_VARIABLE_READ_NODE:
|
10660
11087
|
assert(sizeof(pm_global_variable_target_node_t) == sizeof(pm_global_variable_read_node_t));
|
@@ -10792,7 +11219,7 @@ parse_write(pm_parser_t *parser, pm_node_t *target, pm_token_t *operator, pm_nod
|
|
10792
11219
|
}
|
10793
11220
|
case PM_BACK_REFERENCE_READ_NODE:
|
10794
11221
|
case PM_NUMBERED_REFERENCE_READ_NODE:
|
10795
|
-
|
11222
|
+
PM_PARSER_ERR_NODE_FORMAT_CONTENT(parser, target, PM_ERR_WRITE_TARGET_READONLY);
|
10796
11223
|
/* fallthrough */
|
10797
11224
|
case PM_GLOBAL_VARIABLE_READ_NODE: {
|
10798
11225
|
pm_global_variable_write_node_t *node = pm_global_variable_write_node_create(parser, target, operator, value);
|
@@ -10866,7 +11293,7 @@ parse_write(pm_parser_t *parser, pm_node_t *target, pm_token_t *operator, pm_nod
|
|
10866
11293
|
return target;
|
10867
11294
|
}
|
10868
11295
|
|
10869
|
-
if (
|
11296
|
+
if (char_is_identifier_start(parser, call->message_loc.start)) {
|
10870
11297
|
// When we get here, we have a method call, because it was
|
10871
11298
|
// previously marked as a method call but now we have an =. This
|
10872
11299
|
// looks like:
|
@@ -10967,7 +11394,7 @@ parse_targets(pm_parser_t *parser, pm_node_t *first_target, pm_binding_power_t b
|
|
10967
11394
|
pm_multi_target_node_targets_append(parser, result, target);
|
10968
11395
|
} else if (!match1(parser, PM_TOKEN_EOF)) {
|
10969
11396
|
// If we get here, then we have a trailing , in a multi target node.
|
10970
|
-
// We'll
|
11397
|
+
// We'll add an implicit rest node to represent this.
|
10971
11398
|
pm_node_t *rest = (pm_node_t *) pm_implicit_rest_node_create(parser, &parser->previous);
|
10972
11399
|
pm_multi_target_node_targets_append(parser, result, rest);
|
10973
11400
|
break;
|
@@ -10984,6 +11411,7 @@ parse_targets(pm_parser_t *parser, pm_node_t *first_target, pm_binding_power_t b
|
|
10984
11411
|
static pm_node_t *
|
10985
11412
|
parse_targets_validate(pm_parser_t *parser, pm_node_t *first_target, pm_binding_power_t binding_power) {
|
10986
11413
|
pm_node_t *result = parse_targets(parser, first_target, binding_power);
|
11414
|
+
accept1(parser, PM_TOKEN_NEWLINE);
|
10987
11415
|
|
10988
11416
|
// Ensure that we have either an = or a ) after the targets.
|
10989
11417
|
if (!match2(parser, PM_TOKEN_EQUAL, PM_TOKEN_PARENTHESIS_RIGHT)) {
|
@@ -11024,7 +11452,7 @@ parse_statements(pm_parser_t *parser, pm_context_t context) {
|
|
11024
11452
|
break;
|
11025
11453
|
}
|
11026
11454
|
|
11027
|
-
// If we have a terminator, then we will parse all
|
11455
|
+
// If we have a terminator, then we will parse all consecutive terminators
|
11028
11456
|
// and then continue parsing the statements list.
|
11029
11457
|
if (accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON)) {
|
11030
11458
|
// If we have a terminator, then we will continue parsing the statements
|
@@ -11056,8 +11484,13 @@ parse_statements(pm_parser_t *parser, pm_context_t context) {
|
|
11056
11484
|
|
11057
11485
|
while (accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON));
|
11058
11486
|
if (context_terminator(context, &parser->current)) break;
|
11059
|
-
} else {
|
11060
|
-
|
11487
|
+
} else if (!accept1(parser, PM_TOKEN_NEWLINE)) {
|
11488
|
+
// This is an inlined version of accept1 because the error that we
|
11489
|
+
// want to add has varargs. If this happens again, we should
|
11490
|
+
// probably extract a helper function.
|
11491
|
+
PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_type_human(parser->current.type));
|
11492
|
+
parser->previous.start = parser->previous.end;
|
11493
|
+
parser->previous.type = PM_TOKEN_MISSING;
|
11061
11494
|
}
|
11062
11495
|
}
|
11063
11496
|
|
@@ -11084,8 +11517,9 @@ parse_assocs(pm_parser_t *parser, pm_node_t *node) {
|
|
11084
11517
|
|
11085
11518
|
if (token_begins_expression_p(parser->current.type)) {
|
11086
11519
|
value = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, false, PM_ERR_EXPECT_EXPRESSION_AFTER_SPLAT_HASH);
|
11087
|
-
}
|
11088
|
-
|
11520
|
+
}
|
11521
|
+
else {
|
11522
|
+
pm_parser_scope_forwarding_keywords_check(parser, &operator);
|
11089
11523
|
}
|
11090
11524
|
|
11091
11525
|
element = (pm_node_t *) pm_assoc_splat_node_create(parser, value, &operator);
|
@@ -11234,13 +11668,8 @@ parse_arguments(pm_parser_t *parser, pm_arguments_t *arguments, bool accepts_for
|
|
11234
11668
|
if (token_begins_expression_p(parser->current.type)) {
|
11235
11669
|
expression = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, false, PM_ERR_EXPECT_ARGUMENT);
|
11236
11670
|
} else {
|
11237
|
-
|
11238
|
-
|
11239
|
-
pm_constant_id_t ellipsis_id = pm_parser_constant_id_constant(parser, "...", 3);
|
11240
|
-
if (pm_parser_local_depth_constant_id(parser, ellipsis_id) == -1) {
|
11241
|
-
pm_parser_err_token(parser, &operator, PM_ERR_ARGUMENT_NO_FORWARDING_AMP);
|
11242
|
-
}
|
11243
|
-
}
|
11671
|
+
// A block forwarding in a method having `...` parameter (e.g. `def foo(...); bar(&); end`) is available.
|
11672
|
+
pm_parser_scope_forwarding_block_check(parser, &operator);
|
11244
11673
|
}
|
11245
11674
|
|
11246
11675
|
argument = (pm_node_t *) pm_block_argument_node_create(parser, &operator, expression);
|
@@ -11258,10 +11687,7 @@ parse_arguments(pm_parser_t *parser, pm_arguments_t *arguments, bool accepts_for
|
|
11258
11687
|
pm_token_t operator = parser->previous;
|
11259
11688
|
|
11260
11689
|
if (match4(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_TOKEN_COMMA, PM_TOKEN_SEMICOLON, PM_TOKEN_BRACKET_RIGHT)) {
|
11261
|
-
|
11262
|
-
pm_parser_err_token(parser, &operator, PM_ERR_ARGUMENT_NO_FORWARDING_STAR);
|
11263
|
-
}
|
11264
|
-
|
11690
|
+
pm_parser_scope_forwarding_positionals_check(parser, &operator);
|
11265
11691
|
argument = (pm_node_t *) pm_splat_node_create(parser, &operator, NULL);
|
11266
11692
|
} else {
|
11267
11693
|
pm_node_t *expression = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, false, PM_ERR_EXPECT_EXPRESSION_AFTER_SPLAT);
|
@@ -11287,15 +11713,14 @@ parse_arguments(pm_parser_t *parser, pm_arguments_t *arguments, bool accepts_for
|
|
11287
11713
|
pm_node_t *right = parse_expression(parser, PM_BINDING_POWER_RANGE, false, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR);
|
11288
11714
|
argument = (pm_node_t *) pm_range_node_create(parser, NULL, &operator, right);
|
11289
11715
|
} else {
|
11290
|
-
|
11291
|
-
pm_parser_err_previous(parser, PM_ERR_ARGUMENT_NO_FORWARDING_ELLIPSES);
|
11292
|
-
}
|
11716
|
+
pm_parser_scope_forwarding_all_check(parser, &parser->previous);
|
11293
11717
|
if (parsed_first_argument && terminator == PM_TOKEN_EOF) {
|
11294
11718
|
pm_parser_err_previous(parser, PM_ERR_ARGUMENT_FORWARDING_UNBOUND);
|
11295
11719
|
}
|
11296
11720
|
|
11297
11721
|
argument = (pm_node_t *) pm_forwarding_arguments_node_create(parser, &parser->previous);
|
11298
11722
|
parse_arguments_append(parser, arguments, argument);
|
11723
|
+
arguments->has_forwarding = true;
|
11299
11724
|
parsed_forwarding_arguments = true;
|
11300
11725
|
break;
|
11301
11726
|
}
|
@@ -11338,6 +11763,9 @@ parse_arguments(pm_parser_t *parser, pm_arguments_t *arguments, bool accepts_for
|
|
11338
11763
|
}
|
11339
11764
|
|
11340
11765
|
parsed_bare_hash = true;
|
11766
|
+
} else if (accept1(parser, PM_TOKEN_KEYWORD_IN)) {
|
11767
|
+
// TODO: Could we solve this with binding powers instead?
|
11768
|
+
pm_parser_err_current(parser, PM_ERR_ARGUMENT_IN);
|
11341
11769
|
}
|
11342
11770
|
|
11343
11771
|
parse_arguments_append(parser, arguments, argument);
|
@@ -11414,7 +11842,9 @@ parse_required_destructured_parameter(pm_parser_t *parser) {
|
|
11414
11842
|
if (accept1(parser, PM_TOKEN_IDENTIFIER)) {
|
11415
11843
|
pm_token_t name = parser->previous;
|
11416
11844
|
value = (pm_node_t *) pm_required_parameter_node_create(parser, &name);
|
11417
|
-
pm_parser_parameter_name_check(parser, &name)
|
11845
|
+
if (pm_parser_parameter_name_check(parser, &name)) {
|
11846
|
+
pm_node_flag_set_repeated_parameter(value);
|
11847
|
+
}
|
11418
11848
|
pm_parser_local_add_token(parser, &name);
|
11419
11849
|
}
|
11420
11850
|
|
@@ -11424,7 +11854,9 @@ parse_required_destructured_parameter(pm_parser_t *parser) {
|
|
11424
11854
|
pm_token_t name = parser->previous;
|
11425
11855
|
|
11426
11856
|
param = (pm_node_t *) pm_required_parameter_node_create(parser, &name);
|
11427
|
-
pm_parser_parameter_name_check(parser, &name)
|
11857
|
+
if (pm_parser_parameter_name_check(parser, &name)) {
|
11858
|
+
pm_node_flag_set_repeated_parameter(param);
|
11859
|
+
}
|
11428
11860
|
pm_parser_local_add_token(parser, &name);
|
11429
11861
|
}
|
11430
11862
|
|
@@ -11541,19 +11973,20 @@ parse_parameters(
|
|
11541
11973
|
pm_token_t operator = parser->previous;
|
11542
11974
|
pm_token_t name;
|
11543
11975
|
|
11976
|
+
bool repeated = false;
|
11544
11977
|
if (accept1(parser, PM_TOKEN_IDENTIFIER)) {
|
11545
11978
|
name = parser->previous;
|
11546
|
-
pm_parser_parameter_name_check(parser, &name);
|
11979
|
+
repeated = pm_parser_parameter_name_check(parser, &name);
|
11547
11980
|
pm_parser_local_add_token(parser, &name);
|
11548
11981
|
} else {
|
11549
11982
|
name = not_provided(parser);
|
11550
|
-
|
11551
|
-
if (allows_forwarding_parameters) {
|
11552
|
-
pm_parser_local_add_token(parser, &operator);
|
11553
|
-
}
|
11983
|
+
parser->current_scope->forwarding_params |= PM_FORWARDING_BLOCK;
|
11554
11984
|
}
|
11555
11985
|
|
11556
11986
|
pm_block_parameter_node_t *param = pm_block_parameter_node_create(parser, &name, &operator);
|
11987
|
+
if (repeated) {
|
11988
|
+
pm_node_flag_set_repeated_parameter((pm_node_t *)param);
|
11989
|
+
}
|
11557
11990
|
if (params->block == NULL) {
|
11558
11991
|
pm_parameters_node_block_set(params, param);
|
11559
11992
|
} else {
|
@@ -11572,9 +12005,8 @@ parse_parameters(
|
|
11572
12005
|
update_parameter_state(parser, &parser->current, &order);
|
11573
12006
|
parser_lex(parser);
|
11574
12007
|
|
11575
|
-
|
11576
|
-
|
11577
|
-
}
|
12008
|
+
parser->current_scope->forwarding_params |= PM_FORWARDING_BLOCK;
|
12009
|
+
parser->current_scope->forwarding_params |= PM_FORWARDING_ALL;
|
11578
12010
|
|
11579
12011
|
pm_forwarding_parameter_node_t *param = pm_forwarding_parameter_node_create(parser, &parser->previous);
|
11580
12012
|
if (params->keyword_rest != NULL) {
|
@@ -11626,20 +12058,23 @@ parse_parameters(
|
|
11626
12058
|
}
|
11627
12059
|
|
11628
12060
|
pm_token_t name = parser->previous;
|
11629
|
-
pm_parser_parameter_name_check(parser, &name);
|
12061
|
+
bool repeated = pm_parser_parameter_name_check(parser, &name);
|
11630
12062
|
pm_parser_local_add_token(parser, &name);
|
11631
12063
|
|
11632
12064
|
if (accept1(parser, PM_TOKEN_EQUAL)) {
|
11633
12065
|
pm_token_t operator = parser->previous;
|
11634
12066
|
context_push(parser, PM_CONTEXT_DEFAULT_PARAMS);
|
11635
|
-
|
11636
|
-
|
12067
|
+
|
12068
|
+
pm_constant_id_t saved_param_name = pm_parser_current_param_name_set(parser, pm_parser_constant_id_token(parser, &name));
|
11637
12069
|
pm_node_t *value = parse_value_expression(parser, binding_power, false, PM_ERR_PARAMETER_NO_DEFAULT);
|
11638
12070
|
|
11639
12071
|
pm_optional_parameter_node_t *param = pm_optional_parameter_node_create(parser, &name, &operator, value);
|
12072
|
+
if (repeated) {
|
12073
|
+
pm_node_flag_set_repeated_parameter((pm_node_t *)param);
|
12074
|
+
}
|
11640
12075
|
pm_parameters_node_optionals_append(params, param);
|
11641
12076
|
|
11642
|
-
parser
|
12077
|
+
pm_parser_current_param_name_restore(parser, saved_param_name);
|
11643
12078
|
context_pop(parser);
|
11644
12079
|
|
11645
12080
|
// If parsing the value of the parameter resulted in error recovery,
|
@@ -11651,9 +12086,15 @@ parse_parameters(
|
|
11651
12086
|
}
|
11652
12087
|
} else if (order > PM_PARAMETERS_ORDER_AFTER_OPTIONAL) {
|
11653
12088
|
pm_required_parameter_node_t *param = pm_required_parameter_node_create(parser, &name);
|
12089
|
+
if (repeated) {
|
12090
|
+
pm_node_flag_set_repeated_parameter((pm_node_t *)param);
|
12091
|
+
}
|
11654
12092
|
pm_parameters_node_requireds_append(params, (pm_node_t *) param);
|
11655
12093
|
} else {
|
11656
12094
|
pm_required_parameter_node_t *param = pm_required_parameter_node_create(parser, &name);
|
12095
|
+
if (repeated) {
|
12096
|
+
pm_node_flag_set_repeated_parameter((pm_node_t *)param);
|
12097
|
+
}
|
11657
12098
|
pm_parameters_node_posts_append(params, (pm_node_t *) param);
|
11658
12099
|
}
|
11659
12100
|
|
@@ -11668,7 +12109,7 @@ parse_parameters(
|
|
11668
12109
|
pm_token_t local = name;
|
11669
12110
|
local.end -= 1;
|
11670
12111
|
|
11671
|
-
pm_parser_parameter_name_check(parser, &local);
|
12112
|
+
bool repeated = pm_parser_parameter_name_check(parser, &local);
|
11672
12113
|
pm_parser_local_add_token(parser, &local);
|
11673
12114
|
|
11674
12115
|
switch (parser->current.type) {
|
@@ -11676,6 +12117,9 @@ parse_parameters(
|
|
11676
12117
|
case PM_TOKEN_PARENTHESIS_RIGHT:
|
11677
12118
|
case PM_TOKEN_PIPE: {
|
11678
12119
|
pm_node_t *param = (pm_node_t *) pm_required_keyword_parameter_node_create(parser, &name);
|
12120
|
+
if (repeated) {
|
12121
|
+
pm_node_flag_set_repeated_parameter(param);
|
12122
|
+
}
|
11679
12123
|
pm_parameters_node_keywords_append(params, param);
|
11680
12124
|
break;
|
11681
12125
|
}
|
@@ -11687,6 +12131,9 @@ parse_parameters(
|
|
11687
12131
|
}
|
11688
12132
|
|
11689
12133
|
pm_node_t *param = (pm_node_t *) pm_required_keyword_parameter_node_create(parser, &name);
|
12134
|
+
if (repeated) {
|
12135
|
+
pm_node_flag_set_repeated_parameter(param);
|
12136
|
+
}
|
11690
12137
|
pm_parameters_node_keywords_append(params, param);
|
11691
12138
|
break;
|
11692
12139
|
}
|
@@ -11695,17 +12142,22 @@ parse_parameters(
|
|
11695
12142
|
|
11696
12143
|
if (token_begins_expression_p(parser->current.type)) {
|
11697
12144
|
context_push(parser, PM_CONTEXT_DEFAULT_PARAMS);
|
11698
|
-
|
11699
|
-
|
12145
|
+
|
12146
|
+
pm_constant_id_t saved_param_name = pm_parser_current_param_name_set(parser, pm_parser_constant_id_token(parser, &local));
|
11700
12147
|
pm_node_t *value = parse_value_expression(parser, binding_power, false, PM_ERR_PARAMETER_NO_DEFAULT_KW);
|
11701
|
-
|
12148
|
+
|
12149
|
+
pm_parser_current_param_name_restore(parser, saved_param_name);
|
11702
12150
|
context_pop(parser);
|
12151
|
+
|
11703
12152
|
param = (pm_node_t *) pm_optional_keyword_parameter_node_create(parser, &name, value);
|
11704
12153
|
}
|
11705
12154
|
else {
|
11706
12155
|
param = (pm_node_t *) pm_required_keyword_parameter_node_create(parser, &name);
|
11707
12156
|
}
|
11708
12157
|
|
12158
|
+
if (repeated) {
|
12159
|
+
pm_node_flag_set_repeated_parameter(param);
|
12160
|
+
}
|
11709
12161
|
pm_parameters_node_keywords_append(params, param);
|
11710
12162
|
|
11711
12163
|
// If parsing the value of the parameter resulted in error recovery,
|
@@ -11728,20 +12180,21 @@ parse_parameters(
|
|
11728
12180
|
|
11729
12181
|
pm_token_t operator = parser->previous;
|
11730
12182
|
pm_token_t name;
|
11731
|
-
|
12183
|
+
bool repeated = false;
|
11732
12184
|
if (accept1(parser, PM_TOKEN_IDENTIFIER)) {
|
11733
12185
|
name = parser->previous;
|
11734
|
-
pm_parser_parameter_name_check(parser, &name);
|
12186
|
+
repeated = pm_parser_parameter_name_check(parser, &name);
|
11735
12187
|
pm_parser_local_add_token(parser, &name);
|
11736
12188
|
} else {
|
11737
12189
|
name = not_provided(parser);
|
11738
12190
|
|
11739
|
-
|
11740
|
-
pm_parser_local_add_token(parser, &operator);
|
11741
|
-
}
|
12191
|
+
parser->current_scope->forwarding_params |= PM_FORWARDING_POSITIONALS;
|
11742
12192
|
}
|
11743
12193
|
|
11744
12194
|
pm_node_t *param = (pm_node_t *) pm_rest_parameter_node_create(parser, &operator, &name);
|
12195
|
+
if (repeated) {
|
12196
|
+
pm_node_flag_set_repeated_parameter(param);
|
12197
|
+
}
|
11745
12198
|
if (params->rest == NULL) {
|
11746
12199
|
pm_parameters_node_rest_set(params, param);
|
11747
12200
|
} else {
|
@@ -11764,19 +12217,21 @@ parse_parameters(
|
|
11764
12217
|
} else {
|
11765
12218
|
pm_token_t name;
|
11766
12219
|
|
12220
|
+
bool repeated = false;
|
11767
12221
|
if (accept1(parser, PM_TOKEN_IDENTIFIER)) {
|
11768
12222
|
name = parser->previous;
|
11769
|
-
pm_parser_parameter_name_check(parser, &name);
|
12223
|
+
repeated = pm_parser_parameter_name_check(parser, &name);
|
11770
12224
|
pm_parser_local_add_token(parser, &name);
|
11771
12225
|
} else {
|
11772
12226
|
name = not_provided(parser);
|
11773
12227
|
|
11774
|
-
|
11775
|
-
pm_parser_local_add_token(parser, &operator);
|
11776
|
-
}
|
12228
|
+
parser->current_scope->forwarding_params |= PM_FORWARDING_KEYWORDS;
|
11777
12229
|
}
|
11778
12230
|
|
11779
12231
|
param = (pm_node_t *) pm_keyword_rest_parameter_node_create(parser, &operator, &name);
|
12232
|
+
if (repeated) {
|
12233
|
+
pm_node_flag_set_repeated_parameter(param);
|
12234
|
+
}
|
11780
12235
|
}
|
11781
12236
|
|
11782
12237
|
if (params->keyword_rest == NULL) {
|
@@ -11964,25 +12419,10 @@ parse_rescues(pm_parser_t *parser, pm_begin_node_t *parent_node, bool def_p) {
|
|
11964
12419
|
}
|
11965
12420
|
|
11966
12421
|
static inline pm_begin_node_t *
|
11967
|
-
parse_rescues_as_begin(pm_parser_t *parser, pm_statements_node_t *statements, bool def_p) {
|
12422
|
+
parse_rescues_as_begin(pm_parser_t *parser, const uint8_t *start, pm_statements_node_t *statements, bool def_p) {
|
11968
12423
|
pm_token_t no_begin_token = not_provided(parser);
|
11969
12424
|
pm_begin_node_t *begin_node = pm_begin_node_create(parser, &no_begin_token, statements);
|
11970
12425
|
parse_rescues(parser, begin_node, def_p);
|
11971
|
-
|
11972
|
-
// All nodes within a begin node are optional, so we look
|
11973
|
-
// for the earliest possible node that we can use to set
|
11974
|
-
// the BeginNode's start location
|
11975
|
-
const uint8_t *start = begin_node->base.location.start;
|
11976
|
-
if (begin_node->statements) {
|
11977
|
-
start = begin_node->statements->base.location.start;
|
11978
|
-
} else if (begin_node->rescue_clause) {
|
11979
|
-
start = begin_node->rescue_clause->base.location.start;
|
11980
|
-
} else if (begin_node->else_clause) {
|
11981
|
-
start = begin_node->else_clause->base.location.start;
|
11982
|
-
} else if (begin_node->ensure_clause) {
|
11983
|
-
start = begin_node->ensure_clause->base.location.start;
|
11984
|
-
}
|
11985
|
-
|
11986
12426
|
begin_node->base.location.start = start;
|
11987
12427
|
return begin_node;
|
11988
12428
|
}
|
@@ -12012,10 +12452,13 @@ parse_block_parameters(
|
|
12012
12452
|
if ((opening->type != PM_TOKEN_NOT_PROVIDED) && accept1(parser, PM_TOKEN_SEMICOLON)) {
|
12013
12453
|
do {
|
12014
12454
|
expect1(parser, PM_TOKEN_IDENTIFIER, PM_ERR_BLOCK_PARAM_LOCAL_VARIABLE);
|
12015
|
-
pm_parser_parameter_name_check(parser, &parser->previous);
|
12455
|
+
bool repeated = pm_parser_parameter_name_check(parser, &parser->previous);
|
12016
12456
|
pm_parser_local_add_token(parser, &parser->previous);
|
12017
12457
|
|
12018
12458
|
pm_block_local_variable_node_t *local = pm_block_local_variable_node_create(parser, &parser->previous);
|
12459
|
+
if (repeated) {
|
12460
|
+
pm_node_flag_set_repeated_parameter((pm_node_t *)local);
|
12461
|
+
}
|
12019
12462
|
pm_block_parameters_node_append_local(block_parameters, local);
|
12020
12463
|
} while (accept1(parser, PM_TOKEN_COMMA));
|
12021
12464
|
}
|
@@ -12031,8 +12474,10 @@ parse_block(pm_parser_t *parser) {
|
|
12031
12474
|
pm_token_t opening = parser->previous;
|
12032
12475
|
accept1(parser, PM_TOKEN_NEWLINE);
|
12033
12476
|
|
12477
|
+
pm_constant_id_t saved_param_name = pm_parser_current_param_name_unset(parser);
|
12034
12478
|
pm_accepts_block_stack_push(parser, true);
|
12035
12479
|
pm_parser_scope_push(parser, false);
|
12480
|
+
|
12036
12481
|
pm_block_parameters_node_t *block_parameters = NULL;
|
12037
12482
|
|
12038
12483
|
if (accept1(parser, PM_TOKEN_PIPE)) {
|
@@ -12053,12 +12498,6 @@ parse_block(pm_parser_t *parser) {
|
|
12053
12498
|
pm_block_parameters_node_closing_set(block_parameters, &parser->previous);
|
12054
12499
|
}
|
12055
12500
|
|
12056
|
-
uint32_t locals_body_index = 0;
|
12057
|
-
|
12058
|
-
if (block_parameters) {
|
12059
|
-
locals_body_index = (uint32_t) parser->current_scope->locals.size;
|
12060
|
-
}
|
12061
|
-
|
12062
12501
|
accept1(parser, PM_TOKEN_NEWLINE);
|
12063
12502
|
pm_node_t *statements = NULL;
|
12064
12503
|
|
@@ -12078,7 +12517,7 @@ parse_block(pm_parser_t *parser) {
|
|
12078
12517
|
|
12079
12518
|
if (match2(parser, PM_TOKEN_KEYWORD_RESCUE, PM_TOKEN_KEYWORD_ENSURE)) {
|
12080
12519
|
assert(statements == NULL || PM_NODE_TYPE_P(statements, PM_STATEMENTS_NODE));
|
12081
|
-
statements = (pm_node_t *) parse_rescues_as_begin(parser, (pm_statements_node_t *) statements, false);
|
12520
|
+
statements = (pm_node_t *) parse_rescues_as_begin(parser, opening.start, (pm_statements_node_t *) statements, false);
|
12082
12521
|
}
|
12083
12522
|
}
|
12084
12523
|
|
@@ -12090,13 +12529,14 @@ parse_block(pm_parser_t *parser) {
|
|
12090
12529
|
|
12091
12530
|
if (parameters == NULL && (maximum > 0)) {
|
12092
12531
|
parameters = (pm_node_t *) pm_numbered_parameters_node_create(parser, &(pm_location_t) { .start = opening.start, .end = parser->previous.end }, maximum);
|
12093
|
-
locals_body_index = maximum;
|
12094
12532
|
}
|
12095
12533
|
|
12096
12534
|
pm_constant_id_list_t locals = parser->current_scope->locals;
|
12097
12535
|
pm_parser_scope_pop(parser);
|
12098
12536
|
pm_accepts_block_stack_pop(parser);
|
12099
|
-
|
12537
|
+
pm_parser_current_param_name_restore(parser, saved_param_name);
|
12538
|
+
|
12539
|
+
return pm_block_node_create(parser, &locals, &opening, parameters, statements, &parser->previous);
|
12100
12540
|
}
|
12101
12541
|
|
12102
12542
|
/**
|
@@ -12157,14 +12597,20 @@ parse_arguments_list(pm_parser_t *parser, pm_arguments_t *arguments, bool accept
|
|
12157
12597
|
}
|
12158
12598
|
|
12159
12599
|
if (block != NULL) {
|
12160
|
-
if (arguments->block == NULL) {
|
12600
|
+
if (arguments->block == NULL && !arguments->has_forwarding) {
|
12161
12601
|
arguments->block = (pm_node_t *) block;
|
12162
12602
|
} else {
|
12163
|
-
|
12164
|
-
|
12165
|
-
|
12603
|
+
if (arguments->has_forwarding) {
|
12604
|
+
pm_parser_err_node(parser, (pm_node_t *) block, PM_ERR_ARGUMENT_BLOCK_FORWARDING);
|
12605
|
+
} else {
|
12606
|
+
pm_parser_err_node(parser, (pm_node_t *) block, PM_ERR_ARGUMENT_BLOCK_MULTI);
|
12607
|
+
}
|
12608
|
+
if (arguments->block != NULL) {
|
12609
|
+
if (arguments->arguments == NULL) {
|
12610
|
+
arguments->arguments = pm_arguments_node_create(parser);
|
12611
|
+
}
|
12612
|
+
pm_arguments_node_arguments_append(arguments->arguments, arguments->block);
|
12166
12613
|
}
|
12167
|
-
pm_arguments_node_arguments_append(arguments->arguments, arguments->block);
|
12168
12614
|
arguments->block = (pm_node_t *) block;
|
12169
12615
|
}
|
12170
12616
|
}
|
@@ -12384,8 +12830,14 @@ static inline pm_node_flags_t
|
|
12384
12830
|
parse_unescaped_encoding(const pm_parser_t *parser) {
|
12385
12831
|
if (parser->explicit_encoding != NULL) {
|
12386
12832
|
if (parser->explicit_encoding == PM_ENCODING_UTF_8_ENTRY) {
|
12833
|
+
// If the there's an explicit encoding and it's using a UTF-8 escape
|
12834
|
+
// sequence, then mark the string as UTF-8.
|
12387
12835
|
return PM_STRING_FLAGS_FORCED_UTF8_ENCODING;
|
12388
12836
|
} else if (parser->encoding == PM_ENCODING_US_ASCII_ENTRY) {
|
12837
|
+
// If there's a non-UTF-8 escape sequence being used, then the
|
12838
|
+
// string uses the source encoding, unless the source is marked as
|
12839
|
+
// US-ASCII. In that case the string is forced as ASCII-8BIT in
|
12840
|
+
// order to keep the string valid.
|
12389
12841
|
return PM_STRING_FLAGS_FORCED_BINARY_ENCODING;
|
12390
12842
|
}
|
12391
12843
|
}
|
@@ -12509,14 +12961,54 @@ parse_string_part(pm_parser_t *parser) {
|
|
12509
12961
|
}
|
12510
12962
|
}
|
12511
12963
|
|
12964
|
+
/**
|
12965
|
+
* When creating a symbol, unary operators that cannot be binary operators
|
12966
|
+
* automatically drop trailing `@` characters. This happens at the parser level,
|
12967
|
+
* such that `~@` is parsed as `~` and `!@` is parsed as `!`. We do that here.
|
12968
|
+
*/
|
12969
|
+
static const uint8_t *
|
12970
|
+
parse_operator_symbol_name(const pm_token_t *name) {
|
12971
|
+
switch (name->type) {
|
12972
|
+
case PM_TOKEN_TILDE:
|
12973
|
+
case PM_TOKEN_BANG:
|
12974
|
+
if (name->end[-1] == '@') return name->end - 1;
|
12975
|
+
/* fallthrough */
|
12976
|
+
default:
|
12977
|
+
return name->end;
|
12978
|
+
}
|
12979
|
+
}
|
12980
|
+
|
12981
|
+
static pm_node_t *
|
12982
|
+
parse_operator_symbol(pm_parser_t *parser, const pm_token_t *opening, pm_lex_state_t next_state) {
|
12983
|
+
pm_token_t closing = not_provided(parser);
|
12984
|
+
pm_symbol_node_t *symbol = pm_symbol_node_create(parser, opening, &parser->current, &closing);
|
12985
|
+
|
12986
|
+
const uint8_t *end = parse_operator_symbol_name(&parser->current);
|
12987
|
+
|
12988
|
+
if (next_state != PM_LEX_STATE_NONE) lex_state_set(parser, next_state);
|
12989
|
+
parser_lex(parser);
|
12990
|
+
|
12991
|
+
pm_string_shared_init(&symbol->unescaped, parser->previous.start, end);
|
12992
|
+
pm_node_flag_set((pm_node_t *) symbol, PM_SYMBOL_FLAGS_FORCED_US_ASCII_ENCODING);
|
12993
|
+
|
12994
|
+
return (pm_node_t *) symbol;
|
12995
|
+
}
|
12996
|
+
|
12997
|
+
/**
|
12998
|
+
* Parse a symbol node. This function will get called immediately after finding
|
12999
|
+
* a symbol opening token. This handles parsing bare symbols and interpolated
|
13000
|
+
* symbols.
|
13001
|
+
*/
|
12512
13002
|
static pm_node_t *
|
12513
13003
|
parse_symbol(pm_parser_t *parser, pm_lex_mode_t *lex_mode, pm_lex_state_t next_state) {
|
12514
|
-
pm_token_t opening = parser->previous;
|
13004
|
+
const pm_token_t opening = parser->previous;
|
12515
13005
|
|
12516
13006
|
if (lex_mode->mode != PM_LEX_STRING) {
|
12517
13007
|
if (next_state != PM_LEX_STATE_NONE) lex_state_set(parser, next_state);
|
12518
13008
|
|
12519
13009
|
switch (parser->current.type) {
|
13010
|
+
case PM_CASE_OPERATOR:
|
13011
|
+
return parse_operator_symbol(parser, &opening, next_state == PM_LEX_STATE_NONE ? PM_LEX_STATE_ENDFN : next_state);
|
12520
13012
|
case PM_TOKEN_IDENTIFIER:
|
12521
13013
|
case PM_TOKEN_CONSTANT:
|
12522
13014
|
case PM_TOKEN_INSTANCE_VARIABLE:
|
@@ -12528,10 +13020,6 @@ parse_symbol(pm_parser_t *parser, pm_lex_mode_t *lex_mode, pm_lex_state_t next_s
|
|
12528
13020
|
case PM_CASE_KEYWORD:
|
12529
13021
|
parser_lex(parser);
|
12530
13022
|
break;
|
12531
|
-
case PM_CASE_OPERATOR:
|
12532
|
-
lex_state_set(parser, next_state == PM_LEX_STATE_NONE ? PM_LEX_STATE_ENDFN : next_state);
|
12533
|
-
parser_lex(parser);
|
12534
|
-
break;
|
12535
13023
|
default:
|
12536
13024
|
expect2(parser, PM_TOKEN_IDENTIFIER, PM_TOKEN_METHOD_NAME, PM_ERR_SYMBOL_INVALID);
|
12537
13025
|
break;
|
@@ -12541,6 +13029,8 @@ parse_symbol(pm_parser_t *parser, pm_lex_mode_t *lex_mode, pm_lex_state_t next_s
|
|
12541
13029
|
pm_symbol_node_t *symbol = pm_symbol_node_create(parser, &opening, &parser->previous, &closing);
|
12542
13030
|
|
12543
13031
|
pm_string_shared_init(&symbol->unescaped, parser->previous.start, parser->previous.end);
|
13032
|
+
pm_node_flag_set((pm_node_t *) symbol, parse_symbol_encoding(parser, &symbol->unescaped));
|
13033
|
+
|
12544
13034
|
return (pm_node_t *) symbol;
|
12545
13035
|
}
|
12546
13036
|
|
@@ -12637,7 +13127,8 @@ parse_symbol(pm_parser_t *parser, pm_lex_mode_t *lex_mode, pm_lex_state_t next_s
|
|
12637
13127
|
} else {
|
12638
13128
|
expect1(parser, PM_TOKEN_STRING_END, PM_ERR_SYMBOL_TERM_DYNAMIC);
|
12639
13129
|
}
|
12640
|
-
|
13130
|
+
|
13131
|
+
return (pm_node_t *) pm_symbol_node_create_unescaped(parser, &opening, &content, &parser->previous, &unescaped, parse_symbol_encoding(parser, &unescaped));
|
12641
13132
|
}
|
12642
13133
|
|
12643
13134
|
/**
|
@@ -12647,8 +13138,11 @@ parse_symbol(pm_parser_t *parser, pm_lex_mode_t *lex_mode, pm_lex_state_t next_s
|
|
12647
13138
|
static inline pm_node_t *
|
12648
13139
|
parse_undef_argument(pm_parser_t *parser) {
|
12649
13140
|
switch (parser->current.type) {
|
13141
|
+
case PM_CASE_OPERATOR: {
|
13142
|
+
const pm_token_t opening = not_provided(parser);
|
13143
|
+
return parse_operator_symbol(parser, &opening, PM_LEX_STATE_NONE);
|
13144
|
+
}
|
12650
13145
|
case PM_CASE_KEYWORD:
|
12651
|
-
case PM_CASE_OPERATOR:
|
12652
13146
|
case PM_TOKEN_CONSTANT:
|
12653
13147
|
case PM_TOKEN_IDENTIFIER:
|
12654
13148
|
case PM_TOKEN_METHOD_NAME: {
|
@@ -12659,6 +13153,8 @@ parse_undef_argument(pm_parser_t *parser) {
|
|
12659
13153
|
pm_symbol_node_t *symbol = pm_symbol_node_create(parser, &opening, &parser->previous, &closing);
|
12660
13154
|
|
12661
13155
|
pm_string_shared_init(&symbol->unescaped, parser->previous.start, parser->previous.end);
|
13156
|
+
pm_node_flag_set((pm_node_t *) symbol, parse_symbol_encoding(parser, &symbol->unescaped));
|
13157
|
+
|
12662
13158
|
return (pm_node_t *) symbol;
|
12663
13159
|
}
|
12664
13160
|
case PM_TOKEN_SYMBOL_BEGIN: {
|
@@ -12682,21 +13178,24 @@ parse_undef_argument(pm_parser_t *parser) {
|
|
12682
13178
|
static inline pm_node_t *
|
12683
13179
|
parse_alias_argument(pm_parser_t *parser, bool first) {
|
12684
13180
|
switch (parser->current.type) {
|
12685
|
-
case PM_CASE_OPERATOR:
|
13181
|
+
case PM_CASE_OPERATOR: {
|
13182
|
+
const pm_token_t opening = not_provided(parser);
|
13183
|
+
return parse_operator_symbol(parser, &opening, first ? PM_LEX_STATE_FNAME | PM_LEX_STATE_FITEM : PM_LEX_STATE_NONE);
|
13184
|
+
}
|
12686
13185
|
case PM_CASE_KEYWORD:
|
12687
13186
|
case PM_TOKEN_CONSTANT:
|
12688
13187
|
case PM_TOKEN_IDENTIFIER:
|
12689
13188
|
case PM_TOKEN_METHOD_NAME: {
|
12690
|
-
if (first)
|
12691
|
-
lex_state_set(parser, PM_LEX_STATE_FNAME | PM_LEX_STATE_FITEM);
|
12692
|
-
}
|
12693
|
-
|
13189
|
+
if (first) lex_state_set(parser, PM_LEX_STATE_FNAME | PM_LEX_STATE_FITEM);
|
12694
13190
|
parser_lex(parser);
|
13191
|
+
|
12695
13192
|
pm_token_t opening = not_provided(parser);
|
12696
13193
|
pm_token_t closing = not_provided(parser);
|
12697
13194
|
pm_symbol_node_t *symbol = pm_symbol_node_create(parser, &opening, &parser->previous, &closing);
|
12698
13195
|
|
12699
13196
|
pm_string_shared_init(&symbol->unescaped, parser->previous.start, parser->previous.end);
|
13197
|
+
pm_node_flag_set((pm_node_t *) symbol, parse_symbol_encoding(parser, &symbol->unescaped));
|
13198
|
+
|
12700
13199
|
return (pm_node_t *) symbol;
|
12701
13200
|
}
|
12702
13201
|
case PM_TOKEN_SYMBOL_BEGIN: {
|
@@ -12733,6 +13232,64 @@ outer_scope_using_numbered_parameters_p(pm_parser_t *parser) {
|
|
12733
13232
|
return false;
|
12734
13233
|
}
|
12735
13234
|
|
13235
|
+
/**
|
13236
|
+
* These are the names of the various numbered parameters. We have them here so
|
13237
|
+
* that when we insert them into the constant pool we can use a constant string
|
13238
|
+
* and not have to allocate.
|
13239
|
+
*/
|
13240
|
+
static const char * const pm_numbered_parameter_names[] = {
|
13241
|
+
"_1", "_2", "_3", "_4", "_5", "_6", "_7", "_8", "_9"
|
13242
|
+
};
|
13243
|
+
|
13244
|
+
/**
|
13245
|
+
* Parse an identifier into either a local variable read. If the local variable
|
13246
|
+
* is not found, it returns NULL instead.
|
13247
|
+
*/
|
13248
|
+
static pm_local_variable_read_node_t *
|
13249
|
+
parse_variable(pm_parser_t *parser) {
|
13250
|
+
int depth;
|
13251
|
+
if ((depth = pm_parser_local_depth(parser, &parser->previous)) != -1) {
|
13252
|
+
return pm_local_variable_read_node_create(parser, &parser->previous, (uint32_t) depth);
|
13253
|
+
}
|
13254
|
+
|
13255
|
+
if (!parser->current_scope->closed && pm_token_is_numbered_parameter(parser->previous.start, parser->previous.end)) {
|
13256
|
+
// Now that we know we have a numbered parameter, we need to check
|
13257
|
+
// if it's allowed in this context. If it is, then we will create a
|
13258
|
+
// local variable read. If it's not, then we'll create a normal call
|
13259
|
+
// node but add an error.
|
13260
|
+
if (parser->current_scope->explicit_params) {
|
13261
|
+
pm_parser_err_previous(parser, PM_ERR_NUMBERED_PARAMETER_NOT_ALLOWED);
|
13262
|
+
} else if (outer_scope_using_numbered_parameters_p(parser)) {
|
13263
|
+
pm_parser_err_previous(parser, PM_ERR_NUMBERED_PARAMETER_OUTER_SCOPE);
|
13264
|
+
} else {
|
13265
|
+
// Indicate that this scope is using numbered params so that child
|
13266
|
+
// scopes cannot. We subtract the value for the character '0' to get
|
13267
|
+
// the actual integer value of the number (only _1 through _9 are
|
13268
|
+
// valid).
|
13269
|
+
uint8_t numbered_parameters = (uint8_t) (parser->previous.start[1] - '0');
|
13270
|
+
if (numbered_parameters > parser->current_scope->numbered_parameters) {
|
13271
|
+
parser->current_scope->numbered_parameters = numbered_parameters;
|
13272
|
+
pm_parser_numbered_parameters_set(parser, numbered_parameters);
|
13273
|
+
}
|
13274
|
+
|
13275
|
+
// When you use a numbered parameter, it implies the existence
|
13276
|
+
// of all of the locals that exist before it. For example,
|
13277
|
+
// referencing _2 means that _1 must exist. Therefore here we
|
13278
|
+
// loop through all of the possibilities and add them into the
|
13279
|
+
// constant pool.
|
13280
|
+
for (uint8_t numbered_parameter = 1; numbered_parameter <= numbered_parameters - 1; numbered_parameter++) {
|
13281
|
+
pm_parser_local_add_constant(parser, pm_numbered_parameter_names[numbered_parameter - 1], 2);
|
13282
|
+
}
|
13283
|
+
|
13284
|
+
// Finally we can create the local variable read node.
|
13285
|
+
pm_constant_id_t name_id = pm_parser_local_add_constant(parser, pm_numbered_parameter_names[numbered_parameters - 1], 2);
|
13286
|
+
return pm_local_variable_read_node_create_constant_id(parser, &parser->previous, name_id, 0);
|
13287
|
+
}
|
13288
|
+
}
|
13289
|
+
|
13290
|
+
return NULL;
|
13291
|
+
}
|
13292
|
+
|
12736
13293
|
/**
|
12737
13294
|
* Parse an identifier into either a local variable read or a call.
|
12738
13295
|
*/
|
@@ -12741,56 +13298,8 @@ parse_variable_call(pm_parser_t *parser) {
|
|
12741
13298
|
pm_node_flags_t flags = 0;
|
12742
13299
|
|
12743
13300
|
if (!match1(parser, PM_TOKEN_PARENTHESIS_LEFT) && (parser->previous.end[-1] != '!') && (parser->previous.end[-1] != '?')) {
|
12744
|
-
|
12745
|
-
if (
|
12746
|
-
return (pm_node_t *) pm_local_variable_read_node_create(parser, &parser->previous, (uint32_t) depth);
|
12747
|
-
}
|
12748
|
-
|
12749
|
-
if (!parser->current_scope->closed && pm_token_is_numbered_parameter(parser->previous.start, parser->previous.end)) {
|
12750
|
-
// Now that we know we have a numbered parameter, we need to check
|
12751
|
-
// if it's allowed in this context. If it is, then we will create a
|
12752
|
-
// local variable read. If it's not, then we'll create a normal call
|
12753
|
-
// node but add an error.
|
12754
|
-
if (parser->current_scope->explicit_params) {
|
12755
|
-
pm_parser_err_previous(parser, PM_ERR_NUMBERED_PARAMETER_NOT_ALLOWED);
|
12756
|
-
} else if (outer_scope_using_numbered_parameters_p(parser)) {
|
12757
|
-
pm_parser_err_previous(parser, PM_ERR_NUMBERED_PARAMETER_OUTER_SCOPE);
|
12758
|
-
} else {
|
12759
|
-
// Indicate that this scope is using numbered params so that child
|
12760
|
-
// scopes cannot.
|
12761
|
-
uint8_t number = parser->previous.start[1];
|
12762
|
-
|
12763
|
-
// We subtract the value for the character '0' to get the actual
|
12764
|
-
// integer value of the number (only _1 through _9 are valid)
|
12765
|
-
uint8_t numbered_parameters = (uint8_t) (number - '0');
|
12766
|
-
if (numbered_parameters > parser->current_scope->numbered_parameters) {
|
12767
|
-
parser->current_scope->numbered_parameters = numbered_parameters;
|
12768
|
-
pm_parser_numbered_parameters_set(parser, numbered_parameters);
|
12769
|
-
}
|
12770
|
-
|
12771
|
-
// When you use a numbered parameter, it implies the existence
|
12772
|
-
// of all of the locals that exist before it. For example,
|
12773
|
-
// referencing _2 means that _1 must exist. Therefore here we
|
12774
|
-
// loop through all of the possibilities and add them into the
|
12775
|
-
// constant pool.
|
12776
|
-
uint8_t current = '1';
|
12777
|
-
uint8_t *value;
|
12778
|
-
|
12779
|
-
while (current < number) {
|
12780
|
-
value = malloc(2);
|
12781
|
-
value[0] = '_';
|
12782
|
-
value[1] = current++;
|
12783
|
-
pm_parser_local_add_owned(parser, value, 2);
|
12784
|
-
}
|
12785
|
-
|
12786
|
-
// Now we can add the actual token that is being used. For
|
12787
|
-
// this one we can add a shared version since it is directly
|
12788
|
-
// referenced in the source.
|
12789
|
-
pm_parser_local_add_token(parser, &parser->previous);
|
12790
|
-
return (pm_node_t *) pm_local_variable_read_node_create(parser, &parser->previous, 0);
|
12791
|
-
}
|
12792
|
-
}
|
12793
|
-
|
13301
|
+
pm_local_variable_read_node_t *node = parse_variable(parser);
|
13302
|
+
if (node != NULL) return (pm_node_t *) node;
|
12794
13303
|
flags |= PM_CALL_NODE_FLAGS_VARIABLE_CALL;
|
12795
13304
|
}
|
12796
13305
|
|
@@ -13076,43 +13585,77 @@ parse_pattern_keyword_rest(pm_parser_t *parser) {
|
|
13076
13585
|
return (pm_node_t *) pm_assoc_splat_node_create(parser, value, &operator);
|
13077
13586
|
}
|
13078
13587
|
|
13588
|
+
/**
|
13589
|
+
* Create an implicit node for the value of a hash pattern that has omitted the
|
13590
|
+
* value. This will use an implicit local variable target.
|
13591
|
+
*/
|
13592
|
+
static pm_node_t *
|
13593
|
+
parse_pattern_hash_implicit_value(pm_parser_t *parser, pm_symbol_node_t *key) {
|
13594
|
+
const pm_location_t *value_loc = &((pm_symbol_node_t *) key)->value_loc;
|
13595
|
+
pm_constant_id_t name = pm_parser_constant_id_location(parser, value_loc->start, value_loc->end);
|
13596
|
+
|
13597
|
+
int current_depth = pm_parser_local_depth_constant_id(parser, name);
|
13598
|
+
uint32_t depth;
|
13599
|
+
|
13600
|
+
if (current_depth == -1) {
|
13601
|
+
pm_parser_local_add_location(parser, value_loc->start, value_loc->end);
|
13602
|
+
depth = 0;
|
13603
|
+
} else {
|
13604
|
+
depth = (uint32_t) current_depth;
|
13605
|
+
}
|
13606
|
+
|
13607
|
+
pm_local_variable_target_node_t *target = pm_local_variable_target_node_create_values(parser, value_loc, name, depth);
|
13608
|
+
return (pm_node_t *) pm_implicit_node_create(parser, (pm_node_t *) target);
|
13609
|
+
}
|
13610
|
+
|
13079
13611
|
/**
|
13080
13612
|
* Parse a hash pattern.
|
13081
13613
|
*/
|
13082
13614
|
static pm_hash_pattern_node_t *
|
13083
|
-
parse_pattern_hash(pm_parser_t *parser, pm_node_t *
|
13615
|
+
parse_pattern_hash(pm_parser_t *parser, pm_node_t *first_node) {
|
13084
13616
|
pm_node_list_t assocs = { 0 };
|
13085
13617
|
pm_node_t *rest = NULL;
|
13086
13618
|
|
13087
|
-
switch (PM_NODE_TYPE(
|
13088
|
-
case
|
13089
|
-
|
13090
|
-
|
13091
|
-
|
13092
|
-
|
13619
|
+
switch (PM_NODE_TYPE(first_node)) {
|
13620
|
+
case PM_ASSOC_SPLAT_NODE:
|
13621
|
+
case PM_NO_KEYWORDS_PARAMETER_NODE:
|
13622
|
+
rest = first_node;
|
13623
|
+
break;
|
13624
|
+
case PM_SYMBOL_NODE: {
|
13625
|
+
if (pm_symbol_node_label_p(first_node)) {
|
13626
|
+
pm_node_t *value;
|
13627
|
+
|
13628
|
+
if (!match7(parser, PM_TOKEN_COMMA, PM_TOKEN_KEYWORD_THEN, PM_TOKEN_BRACE_RIGHT, PM_TOKEN_BRACKET_RIGHT, PM_TOKEN_PARENTHESIS_RIGHT, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON)) {
|
13629
|
+
// Here we have a value for the first assoc in the list, so
|
13630
|
+
// we will parse it now.
|
13631
|
+
value = parse_pattern(parser, false, PM_ERR_PATTERN_EXPRESSION_AFTER_KEY);
|
13632
|
+
} else {
|
13633
|
+
// Otherwise, we will create an implicit local variable
|
13634
|
+
// target for the value.
|
13635
|
+
value = parse_pattern_hash_implicit_value(parser, (pm_symbol_node_t *) first_node);
|
13636
|
+
}
|
13093
13637
|
|
13094
|
-
|
13095
|
-
assoc
|
13096
|
-
assoc->value = value;
|
13097
|
-
} else {
|
13098
|
-
pm_node_t *key = ((pm_assoc_node_t *) first_assoc)->key;
|
13638
|
+
pm_token_t operator = not_provided(parser);
|
13639
|
+
pm_node_t *assoc = (pm_node_t *) pm_assoc_node_create(parser, first_node, &operator, value);
|
13099
13640
|
|
13100
|
-
|
13101
|
-
|
13102
|
-
pm_parser_local_add_location(parser, value_loc->start, value_loc->end);
|
13103
|
-
}
|
13641
|
+
pm_node_list_append(&assocs, assoc);
|
13642
|
+
break;
|
13104
13643
|
}
|
13644
|
+
}
|
13645
|
+
/* fallthrough */
|
13646
|
+
default: {
|
13647
|
+
// If we get anything else, then this is an error. For this we'll
|
13648
|
+
// create a missing node for the value and create an assoc node for
|
13649
|
+
// the first node in the list.
|
13650
|
+
pm_parser_err_node(parser, first_node, PM_ERR_PATTERN_HASH_KEY_LABEL);
|
13651
|
+
|
13652
|
+
pm_token_t operator = not_provided(parser);
|
13653
|
+
pm_node_t *value = (pm_node_t *) pm_missing_node_create(parser, first_node->location.start, first_node->location.end);
|
13654
|
+
pm_node_t *assoc = (pm_node_t *) pm_assoc_node_create(parser, first_node, &operator, value);
|
13105
13655
|
|
13106
|
-
pm_node_list_append(&assocs,
|
13656
|
+
pm_node_list_append(&assocs, assoc);
|
13107
13657
|
break;
|
13108
13658
|
}
|
13109
|
-
case PM_ASSOC_SPLAT_NODE:
|
13110
|
-
case PM_NO_KEYWORDS_PARAMETER_NODE:
|
13111
|
-
rest = first_assoc;
|
13112
|
-
break;
|
13113
|
-
default:
|
13114
|
-
assert(false);
|
13115
|
-
break;
|
13116
13659
|
}
|
13117
13660
|
|
13118
13661
|
// If there are any other assocs, then we'll parse them now.
|
@@ -13141,6 +13684,7 @@ parse_pattern_hash(pm_parser_t *parser, pm_node_t *first_assoc) {
|
|
13141
13684
|
} else {
|
13142
13685
|
const pm_location_t *value_loc = &((pm_symbol_node_t *) key)->value_loc;
|
13143
13686
|
pm_parser_local_add_location(parser, value_loc->start, value_loc->end);
|
13687
|
+
value = parse_pattern_hash_implicit_value(parser, (pm_symbol_node_t *) key);
|
13144
13688
|
}
|
13145
13689
|
|
13146
13690
|
pm_token_t operator = not_provided(parser);
|
@@ -13246,45 +13790,29 @@ parse_pattern_primitive(pm_parser_t *parser, pm_diagnostic_id_t diag_id) {
|
|
13246
13790
|
// pattern node.
|
13247
13791
|
node = pm_hash_pattern_node_empty_create(parser, &opening, &parser->previous);
|
13248
13792
|
} else {
|
13249
|
-
pm_node_t *
|
13793
|
+
pm_node_t *first_node;
|
13250
13794
|
|
13251
13795
|
switch (parser->current.type) {
|
13252
|
-
case PM_TOKEN_LABEL:
|
13796
|
+
case PM_TOKEN_LABEL:
|
13253
13797
|
parser_lex(parser);
|
13254
|
-
|
13255
|
-
pm_symbol_node_t *key = pm_symbol_node_label_create(parser, &parser->previous);
|
13256
|
-
pm_token_t operator = not_provided(parser);
|
13257
|
-
|
13258
|
-
first_assoc = (pm_node_t *) pm_assoc_node_create(parser, (pm_node_t *) key, &operator, NULL);
|
13798
|
+
first_node = (pm_node_t *) pm_symbol_node_label_create(parser, &parser->previous);
|
13259
13799
|
break;
|
13260
|
-
}
|
13261
13800
|
case PM_TOKEN_USTAR_STAR:
|
13262
|
-
|
13801
|
+
first_node = parse_pattern_keyword_rest(parser);
|
13263
13802
|
break;
|
13264
|
-
case PM_TOKEN_STRING_BEGIN:
|
13265
|
-
|
13266
|
-
pm_token_t operator = not_provided(parser);
|
13267
|
-
|
13268
|
-
if (!pm_symbol_node_label_p(key)) {
|
13269
|
-
pm_parser_err_node(parser, key, PM_ERR_PATTERN_HASH_KEY_LABEL);
|
13270
|
-
}
|
13271
|
-
|
13272
|
-
first_assoc = (pm_node_t *) pm_assoc_node_create(parser, key, &operator, NULL);
|
13803
|
+
case PM_TOKEN_STRING_BEGIN:
|
13804
|
+
first_node = parse_expression(parser, PM_BINDING_POWER_MAX, false, PM_ERR_PATTERN_HASH_KEY);
|
13273
13805
|
break;
|
13274
|
-
}
|
13275
13806
|
default: {
|
13276
13807
|
parser_lex(parser);
|
13277
13808
|
pm_parser_err_previous(parser, PM_ERR_PATTERN_HASH_KEY);
|
13278
13809
|
|
13279
|
-
|
13280
|
-
pm_token_t operator = not_provided(parser);
|
13281
|
-
|
13282
|
-
first_assoc = (pm_node_t *) pm_assoc_node_create(parser, (pm_node_t *) key, &operator, NULL);
|
13810
|
+
first_node = (pm_node_t *) pm_missing_node_create(parser, parser->previous.start, parser->previous.end);
|
13283
13811
|
break;
|
13284
13812
|
}
|
13285
13813
|
}
|
13286
13814
|
|
13287
|
-
node = parse_pattern_hash(parser,
|
13815
|
+
node = parse_pattern_hash(parser, first_node);
|
13288
13816
|
|
13289
13817
|
accept1(parser, PM_TOKEN_NEWLINE);
|
13290
13818
|
expect1(parser, PM_TOKEN_BRACE_RIGHT, PM_ERR_PATTERN_TERM_BRACE);
|
@@ -13350,7 +13878,16 @@ parse_pattern_primitive(pm_parser_t *parser, pm_diagnostic_id_t diag_id) {
|
|
13350
13878
|
switch (parser->current.type) {
|
13351
13879
|
case PM_TOKEN_IDENTIFIER: {
|
13352
13880
|
parser_lex(parser);
|
13353
|
-
pm_node_t *variable = (pm_node_t *)
|
13881
|
+
pm_node_t *variable = (pm_node_t *) parse_variable(parser);
|
13882
|
+
if (variable == NULL) {
|
13883
|
+
if (parser->version != PM_OPTIONS_VERSION_CRUBY_3_3_0 && pm_token_is_it(parser->previous.start, parser->previous.end)) {
|
13884
|
+
pm_constant_id_t name_id = pm_parser_constant_id_constant(parser, "0it", 3);
|
13885
|
+
variable = (pm_node_t *) pm_local_variable_read_node_create_constant_id(parser, &parser->previous, name_id, 0);
|
13886
|
+
} else {
|
13887
|
+
PM_PARSER_ERR_TOKEN_FORMAT_CONTENT(parser, parser->previous, PM_ERR_NO_LOCAL_VARIABLE);
|
13888
|
+
variable = (pm_node_t *) pm_local_variable_read_node_create(parser, &parser->previous, 0);
|
13889
|
+
}
|
13890
|
+
}
|
13354
13891
|
|
13355
13892
|
return (pm_node_t *) pm_pinned_variable_node_create(parser, &operator, variable);
|
13356
13893
|
}
|
@@ -13519,9 +14056,7 @@ parse_pattern(pm_parser_t *parser, bool top_pattern, pm_diagnostic_id_t diag_id)
|
|
13519
14056
|
case PM_TOKEN_LABEL: {
|
13520
14057
|
parser_lex(parser);
|
13521
14058
|
pm_node_t *key = (pm_node_t *) pm_symbol_node_label_create(parser, &parser->previous);
|
13522
|
-
|
13523
|
-
|
13524
|
-
return (pm_node_t *) parse_pattern_hash(parser, (pm_node_t *) pm_assoc_node_create(parser, key, &operator, NULL));
|
14059
|
+
return (pm_node_t *) parse_pattern_hash(parser, key);
|
13525
14060
|
}
|
13526
14061
|
case PM_TOKEN_USTAR_STAR: {
|
13527
14062
|
node = parse_pattern_keyword_rest(parser);
|
@@ -13544,8 +14079,7 @@ parse_pattern(pm_parser_t *parser, bool top_pattern, pm_diagnostic_id_t diag_id)
|
|
13544
14079
|
// If we got a dynamic label symbol, then we need to treat it like the
|
13545
14080
|
// beginning of a hash pattern.
|
13546
14081
|
if (pm_symbol_node_label_p(node)) {
|
13547
|
-
|
13548
|
-
return (pm_node_t *) parse_pattern_hash(parser, (pm_node_t *) pm_assoc_node_create(parser, node, &operator, NULL));
|
14082
|
+
return (pm_node_t *) parse_pattern_hash(parser, node);
|
13549
14083
|
}
|
13550
14084
|
|
13551
14085
|
if (top_pattern && match1(parser, PM_TOKEN_COMMA)) {
|
@@ -13558,7 +14092,7 @@ parse_pattern(pm_parser_t *parser, bool top_pattern, pm_diagnostic_id_t diag_id)
|
|
13558
14092
|
// Gather up all of the patterns into the list.
|
13559
14093
|
while (accept1(parser, PM_TOKEN_COMMA)) {
|
13560
14094
|
// Break early here in case we have a trailing comma.
|
13561
|
-
if (
|
14095
|
+
if (match6(parser, PM_TOKEN_KEYWORD_THEN, PM_TOKEN_BRACE_RIGHT, PM_TOKEN_BRACKET_RIGHT, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON, PM_TOKEN_EOF)) {
|
13562
14096
|
node = (pm_node_t *) pm_implicit_rest_node_create(parser, &parser->previous);
|
13563
14097
|
pm_node_list_append(&nodes, node);
|
13564
14098
|
break;
|
@@ -13644,7 +14178,7 @@ parse_strings(pm_parser_t *parser, pm_node_t *current) {
|
|
13644
14178
|
assert(parser->current.type == PM_TOKEN_STRING_BEGIN);
|
13645
14179
|
|
13646
14180
|
bool concating = false;
|
13647
|
-
bool state_is_arg_labeled =
|
14181
|
+
bool state_is_arg_labeled = lex_state_arg_labeled_p(parser);
|
13648
14182
|
|
13649
14183
|
while (match1(parser, PM_TOKEN_STRING_BEGIN)) {
|
13650
14184
|
pm_node_t *node = NULL;
|
@@ -13659,7 +14193,7 @@ parse_strings(pm_parser_t *parser, pm_node_t *current) {
|
|
13659
14193
|
parser_lex(parser);
|
13660
14194
|
|
13661
14195
|
if (match2(parser, PM_TOKEN_STRING_END, PM_TOKEN_EOF)) {
|
13662
|
-
expect1(parser, PM_TOKEN_STRING_END,
|
14196
|
+
expect1(parser, PM_TOKEN_STRING_END, PM_ERR_STRING_LITERAL_EOF);
|
13663
14197
|
// If we get here, then we have an end immediately after a
|
13664
14198
|
// start. In that case we'll create an empty content token and
|
13665
14199
|
// return an uninterpolated string.
|
@@ -13672,7 +14206,6 @@ parse_strings(pm_parser_t *parser, pm_node_t *current) {
|
|
13672
14206
|
// If we get here, then we have an end of a label immediately
|
13673
14207
|
// after a start. In that case we'll create an empty symbol
|
13674
14208
|
// node.
|
13675
|
-
pm_token_t opening = not_provided(parser);
|
13676
14209
|
pm_token_t content = parse_strings_empty_content(parser->previous.start);
|
13677
14210
|
pm_symbol_node_t *symbol = pm_symbol_node_create(parser, &opening, &content, &parser->previous);
|
13678
14211
|
|
@@ -13716,15 +14249,19 @@ parse_strings(pm_parser_t *parser, pm_node_t *current) {
|
|
13716
14249
|
parser_lex(parser);
|
13717
14250
|
} while (match1(parser, PM_TOKEN_STRING_CONTENT));
|
13718
14251
|
|
13719
|
-
expect1(parser, PM_TOKEN_STRING_END,
|
14252
|
+
expect1(parser, PM_TOKEN_STRING_END, PM_ERR_STRING_LITERAL_EOF);
|
13720
14253
|
node = (pm_node_t *) pm_interpolated_string_node_create(parser, &opening, &parts, &parser->previous);
|
13721
14254
|
} else if (accept1(parser, PM_TOKEN_LABEL_END) && !state_is_arg_labeled) {
|
13722
|
-
node = (pm_node_t *) pm_symbol_node_create_unescaped(parser, &opening, &content, &parser->previous, &unescaped);
|
14255
|
+
node = (pm_node_t *) pm_symbol_node_create_unescaped(parser, &opening, &content, &parser->previous, &unescaped, parse_symbol_encoding(parser, &unescaped));
|
13723
14256
|
} else if (match1(parser, PM_TOKEN_EOF)) {
|
13724
|
-
pm_parser_err_token(parser, &opening,
|
14257
|
+
pm_parser_err_token(parser, &opening, PM_ERR_STRING_LITERAL_EOF);
|
13725
14258
|
node = (pm_node_t *) pm_string_node_create_unescaped(parser, &opening, &content, &parser->current, &unescaped);
|
14259
|
+
} else if (accept1(parser, PM_TOKEN_STRING_END)) {
|
14260
|
+
node = (pm_node_t *) pm_string_node_create_unescaped(parser, &opening, &content, &parser->previous, &unescaped);
|
13726
14261
|
} else {
|
13727
|
-
|
14262
|
+
PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->previous, PM_ERR_STRING_LITERAL_TERM, pm_token_type_human(parser->previous.type));
|
14263
|
+
parser->previous.start = parser->previous.end;
|
14264
|
+
parser->previous.type = PM_TOKEN_MISSING;
|
13728
14265
|
node = (pm_node_t *) pm_string_node_create_unescaped(parser, &opening, &content, &parser->previous, &unescaped);
|
13729
14266
|
}
|
13730
14267
|
} else if (match1(parser, PM_TOKEN_STRING_CONTENT)) {
|
@@ -13739,9 +14276,9 @@ parse_strings(pm_parser_t *parser, pm_node_t *current) {
|
|
13739
14276
|
if (match2(parser, PM_TOKEN_STRING_END, PM_TOKEN_EOF)) {
|
13740
14277
|
node = (pm_node_t *) pm_string_node_create_unescaped(parser, &opening, &content, &parser->current, &unescaped);
|
13741
14278
|
pm_node_flag_set(node, parse_unescaped_encoding(parser));
|
13742
|
-
expect1(parser, PM_TOKEN_STRING_END,
|
14279
|
+
expect1(parser, PM_TOKEN_STRING_END, PM_ERR_STRING_LITERAL_EOF);
|
13743
14280
|
} else if (accept1(parser, PM_TOKEN_LABEL_END)) {
|
13744
|
-
node = (pm_node_t *) pm_symbol_node_create_unescaped(parser, &opening, &content, &parser->previous, &unescaped);
|
14281
|
+
node = (pm_node_t *) pm_symbol_node_create_unescaped(parser, &opening, &content, &parser->previous, &unescaped, parse_symbol_encoding(parser, &unescaped));
|
13745
14282
|
} else {
|
13746
14283
|
// If we get here, then we have interpolation so we'll need
|
13747
14284
|
// to create a string or symbol node with interpolation.
|
@@ -13830,11 +14367,34 @@ parse_strings(pm_parser_t *parser, pm_node_t *current) {
|
|
13830
14367
|
return current;
|
13831
14368
|
}
|
13832
14369
|
|
14370
|
+
/**
|
14371
|
+
* Append an error to the error list on the parser using the given diagnostic
|
14372
|
+
* ID. This function is a specialization that handles formatting the specific
|
14373
|
+
* kind of error that is being appended.
|
14374
|
+
*/
|
14375
|
+
static void
|
14376
|
+
pm_parser_err_prefix(pm_parser_t *parser, pm_diagnostic_id_t diag_id) {
|
14377
|
+
switch (diag_id) {
|
14378
|
+
case PM_ERR_HASH_KEY: {
|
14379
|
+
PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->previous, diag_id, pm_token_type_human(parser->previous.type));
|
14380
|
+
break;
|
14381
|
+
}
|
14382
|
+
case PM_ERR_UNARY_RECEIVER: {
|
14383
|
+
const char *human = (parser->current.type == PM_TOKEN_EOF ? "end-of-input" : pm_token_type_human(parser->current.type));
|
14384
|
+
PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->previous, diag_id, human, parser->previous.start[0]);
|
14385
|
+
break;
|
14386
|
+
}
|
14387
|
+
default:
|
14388
|
+
pm_parser_err_previous(parser, diag_id);
|
14389
|
+
break;
|
14390
|
+
}
|
14391
|
+
}
|
14392
|
+
|
13833
14393
|
/**
|
13834
14394
|
* Parse an expression that begins with the previous node that we just lexed.
|
13835
14395
|
*/
|
13836
14396
|
static inline pm_node_t *
|
13837
|
-
parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, bool accepts_command_call) {
|
14397
|
+
parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, bool accepts_command_call, pm_diagnostic_id_t diag_id) {
|
13838
14398
|
switch (parser->current.type) {
|
13839
14399
|
case PM_TOKEN_BRACKET_LEFT_ARRAY: {
|
13840
14400
|
parser_lex(parser);
|
@@ -13866,9 +14426,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
|
|
13866
14426
|
pm_node_t *expression = NULL;
|
13867
14427
|
|
13868
14428
|
if (match3(parser, PM_TOKEN_BRACKET_RIGHT, PM_TOKEN_COMMA, PM_TOKEN_EOF)) {
|
13869
|
-
|
13870
|
-
pm_parser_err_token(parser, &operator, PM_ERR_ARGUMENT_NO_FORWARDING_STAR);
|
13871
|
-
}
|
14429
|
+
pm_parser_scope_forwarding_positionals_check(parser, &operator);
|
13872
14430
|
} else {
|
13873
14431
|
expression = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, false, PM_ERR_ARRAY_EXPRESSION_AFTER_STAR);
|
13874
14432
|
}
|
@@ -14016,7 +14574,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
|
|
14016
14574
|
// If we didn't find a terminator and we didn't find a right
|
14017
14575
|
// parenthesis, then this is a syntax error.
|
14018
14576
|
if (!terminator_found) {
|
14019
|
-
|
14577
|
+
PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_type_human(parser->current.type));
|
14020
14578
|
}
|
14021
14579
|
|
14022
14580
|
// Parse each statement within the parentheses.
|
@@ -14045,7 +14603,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
|
|
14045
14603
|
} else if (match1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) {
|
14046
14604
|
break;
|
14047
14605
|
} else {
|
14048
|
-
|
14606
|
+
PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_type_human(parser->current.type));
|
14049
14607
|
}
|
14050
14608
|
}
|
14051
14609
|
|
@@ -14113,7 +14671,8 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
|
|
14113
14671
|
if (
|
14114
14672
|
match1(parser, PM_TOKEN_PARENTHESIS_LEFT) ||
|
14115
14673
|
(accepts_command_call && (token_begins_expression_p(parser->current.type) || match3(parser, PM_TOKEN_UAMPERSAND, PM_TOKEN_USTAR, PM_TOKEN_USTAR_STAR))) ||
|
14116
|
-
(pm_accepts_block_stack_p(parser) &&
|
14674
|
+
(pm_accepts_block_stack_p(parser) && match1(parser, PM_TOKEN_KEYWORD_DO)) ||
|
14675
|
+
match1(parser, PM_TOKEN_BRACE_LEFT)
|
14117
14676
|
) {
|
14118
14677
|
pm_arguments_t arguments = { 0 };
|
14119
14678
|
parse_arguments_list(parser, &arguments, true, accepts_command_call);
|
@@ -14237,7 +14796,8 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
|
|
14237
14796
|
// a block, so we need to check for that here.
|
14238
14797
|
if (
|
14239
14798
|
(accepts_command_call && (token_begins_expression_p(parser->current.type) || match3(parser, PM_TOKEN_UAMPERSAND, PM_TOKEN_USTAR, PM_TOKEN_USTAR_STAR))) ||
|
14240
|
-
(pm_accepts_block_stack_p(parser) &&
|
14799
|
+
(pm_accepts_block_stack_p(parser) && match1(parser, PM_TOKEN_KEYWORD_DO)) ||
|
14800
|
+
match1(parser, PM_TOKEN_BRACE_LEFT)
|
14241
14801
|
) {
|
14242
14802
|
pm_arguments_t arguments = { 0 };
|
14243
14803
|
parse_arguments_list(parser, &arguments, true, accepts_command_call);
|
@@ -14250,6 +14810,31 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
|
|
14250
14810
|
|
14251
14811
|
if ((binding_power == PM_BINDING_POWER_STATEMENT) && match1(parser, PM_TOKEN_COMMA)) {
|
14252
14812
|
node = parse_targets_validate(parser, node, PM_BINDING_POWER_INDEX);
|
14813
|
+
} else {
|
14814
|
+
// Check if `it` is not going to be assigned.
|
14815
|
+
switch (parser->current.type) {
|
14816
|
+
case PM_TOKEN_AMPERSAND_AMPERSAND_EQUAL:
|
14817
|
+
case PM_TOKEN_AMPERSAND_EQUAL:
|
14818
|
+
case PM_TOKEN_CARET_EQUAL:
|
14819
|
+
case PM_TOKEN_EQUAL:
|
14820
|
+
case PM_TOKEN_GREATER_GREATER_EQUAL:
|
14821
|
+
case PM_TOKEN_LESS_LESS_EQUAL:
|
14822
|
+
case PM_TOKEN_MINUS_EQUAL:
|
14823
|
+
case PM_TOKEN_PARENTHESIS_RIGHT:
|
14824
|
+
case PM_TOKEN_PERCENT_EQUAL:
|
14825
|
+
case PM_TOKEN_PIPE_EQUAL:
|
14826
|
+
case PM_TOKEN_PIPE_PIPE_EQUAL:
|
14827
|
+
case PM_TOKEN_PLUS_EQUAL:
|
14828
|
+
case PM_TOKEN_SLASH_EQUAL:
|
14829
|
+
case PM_TOKEN_STAR_EQUAL:
|
14830
|
+
case PM_TOKEN_STAR_STAR_EQUAL:
|
14831
|
+
break;
|
14832
|
+
default:
|
14833
|
+
// Once we know it's neither a method call nor an
|
14834
|
+
// assignment, we can finally create `it` default
|
14835
|
+
// parameter.
|
14836
|
+
node = pm_node_check_it(parser, node);
|
14837
|
+
}
|
14253
14838
|
}
|
14254
14839
|
|
14255
14840
|
return node;
|
@@ -14286,6 +14871,9 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
|
|
14286
14871
|
// If we get here, then we tried to find something in the
|
14287
14872
|
// heredoc but couldn't actually parse anything, so we'll just
|
14288
14873
|
// return a missing node.
|
14874
|
+
//
|
14875
|
+
// parse_string_part handles its own errors, so there is no need
|
14876
|
+
// for us to add one here.
|
14289
14877
|
node = (pm_node_t *) pm_missing_node_create(parser, parser->previous.start, parser->previous.end);
|
14290
14878
|
} else if (PM_NODE_TYPE_P(part, PM_STRING_NODE) && match2(parser, PM_TOKEN_HEREDOC_END, PM_TOKEN_EOF)) {
|
14291
14879
|
// If we get here, then the part that we parsed was plain string
|
@@ -14549,11 +15137,11 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
|
|
14549
15137
|
// for guard clauses in the form of `if` or `unless` statements.
|
14550
15138
|
if (accept1(parser, PM_TOKEN_KEYWORD_IF_MODIFIER)) {
|
14551
15139
|
pm_token_t keyword = parser->previous;
|
14552
|
-
pm_node_t *predicate = parse_value_expression(parser,
|
15140
|
+
pm_node_t *predicate = parse_value_expression(parser, PM_BINDING_POWER_COMPOSITION, true, PM_ERR_CONDITIONAL_IF_PREDICATE);
|
14553
15141
|
pattern = (pm_node_t *) pm_if_node_modifier_create(parser, pattern, &keyword, predicate);
|
14554
15142
|
} else if (accept1(parser, PM_TOKEN_KEYWORD_UNLESS_MODIFIER)) {
|
14555
15143
|
pm_token_t keyword = parser->previous;
|
14556
|
-
pm_node_t *predicate = parse_value_expression(parser,
|
15144
|
+
pm_node_t *predicate = parse_value_expression(parser, PM_BINDING_POWER_COMPOSITION, true, PM_ERR_CONDITIONAL_UNLESS_PREDICATE);
|
14557
15145
|
pattern = (pm_node_t *) pm_unless_node_modifier_create(parser, pattern, &keyword, predicate);
|
14558
15146
|
}
|
14559
15147
|
|
@@ -14742,8 +15330,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
|
|
14742
15330
|
pm_token_t operator = parser->previous;
|
14743
15331
|
pm_node_t *expression = parse_value_expression(parser, PM_BINDING_POWER_NOT, true, PM_ERR_EXPECT_EXPRESSION_AFTER_LESS_LESS);
|
14744
15332
|
|
14745
|
-
pm_constant_id_t
|
14746
|
-
parser->current_param_name = 0;
|
15333
|
+
pm_constant_id_t saved_param_name = pm_parser_current_param_name_unset(parser);
|
14747
15334
|
pm_parser_scope_push(parser, true);
|
14748
15335
|
accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
|
14749
15336
|
|
@@ -14756,15 +15343,16 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
|
|
14756
15343
|
|
14757
15344
|
if (match2(parser, PM_TOKEN_KEYWORD_RESCUE, PM_TOKEN_KEYWORD_ENSURE)) {
|
14758
15345
|
assert(statements == NULL || PM_NODE_TYPE_P(statements, PM_STATEMENTS_NODE));
|
14759
|
-
statements = (pm_node_t *) parse_rescues_as_begin(parser, (pm_statements_node_t *) statements, false);
|
15346
|
+
statements = (pm_node_t *) parse_rescues_as_begin(parser, class_keyword.start, (pm_statements_node_t *) statements, false);
|
14760
15347
|
}
|
14761
15348
|
|
14762
15349
|
expect1(parser, PM_TOKEN_KEYWORD_END, PM_ERR_CLASS_TERM);
|
14763
|
-
|
14764
15350
|
pm_constant_id_list_t locals = parser->current_scope->locals;
|
15351
|
+
|
14765
15352
|
pm_parser_scope_pop(parser);
|
14766
|
-
parser->current_param_name = old_param_name;
|
14767
15353
|
pm_do_loop_stack_pop(parser);
|
15354
|
+
pm_parser_current_param_name_restore(parser, saved_param_name);
|
15355
|
+
|
14768
15356
|
return (pm_node_t *) pm_singleton_class_node_create(parser, &locals, &class_keyword, &operator, expression, statements, &parser->previous);
|
14769
15357
|
}
|
14770
15358
|
|
@@ -14790,9 +15378,9 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
|
|
14790
15378
|
superclass = NULL;
|
14791
15379
|
}
|
14792
15380
|
|
14793
|
-
pm_constant_id_t
|
14794
|
-
parser->current_param_name = 0;
|
15381
|
+
pm_constant_id_t saved_param_name = pm_parser_current_param_name_unset(parser);
|
14795
15382
|
pm_parser_scope_push(parser, true);
|
15383
|
+
|
14796
15384
|
if (inheritance_operator.type != PM_TOKEN_NOT_PROVIDED) {
|
14797
15385
|
expect2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON, PM_ERR_CLASS_UNEXPECTED_END);
|
14798
15386
|
} else {
|
@@ -14808,7 +15396,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
|
|
14808
15396
|
|
14809
15397
|
if (match2(parser, PM_TOKEN_KEYWORD_RESCUE, PM_TOKEN_KEYWORD_ENSURE)) {
|
14810
15398
|
assert(statements == NULL || PM_NODE_TYPE_P(statements, PM_STATEMENTS_NODE));
|
14811
|
-
statements = (pm_node_t *) parse_rescues_as_begin(parser, (pm_statements_node_t *) statements, false);
|
15399
|
+
statements = (pm_node_t *) parse_rescues_as_begin(parser, class_keyword.start, (pm_statements_node_t *) statements, false);
|
14812
15400
|
}
|
14813
15401
|
|
14814
15402
|
expect1(parser, PM_TOKEN_KEYWORD_END, PM_ERR_CLASS_TERM);
|
@@ -14818,9 +15406,10 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
|
|
14818
15406
|
}
|
14819
15407
|
|
14820
15408
|
pm_constant_id_list_t locals = parser->current_scope->locals;
|
15409
|
+
|
14821
15410
|
pm_parser_scope_pop(parser);
|
14822
|
-
parser->current_param_name = old_param_name;
|
14823
15411
|
pm_do_loop_stack_pop(parser);
|
15412
|
+
pm_parser_current_param_name_restore(parser, saved_param_name);
|
14824
15413
|
|
14825
15414
|
if (!PM_NODE_TYPE_P(constant_path, PM_CONSTANT_PATH_NODE) && !(PM_NODE_TYPE_P(constant_path, PM_CONSTANT_READ_NODE))) {
|
14826
15415
|
pm_parser_err_node(parser, constant_path, PM_ERR_CLASS_NAME);
|
@@ -14835,18 +15424,21 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
|
|
14835
15424
|
pm_token_t operator = not_provided(parser);
|
14836
15425
|
pm_token_t name = (pm_token_t) { .type = PM_TOKEN_MISSING, .start = def_keyword.end, .end = def_keyword.end };
|
14837
15426
|
|
14838
|
-
// This context is necessary for lexing `...` in a bare params
|
14839
|
-
// It must be pushed before lexing the first param, so it
|
15427
|
+
// This context is necessary for lexing `...` in a bare params
|
15428
|
+
// correctly. It must be pushed before lexing the first param, so it
|
15429
|
+
// is here.
|
14840
15430
|
context_push(parser, PM_CONTEXT_DEF_PARAMS);
|
15431
|
+
pm_constant_id_t saved_param_name;
|
15432
|
+
|
14841
15433
|
parser_lex(parser);
|
14842
|
-
pm_constant_id_t old_param_name = parser->current_param_name;
|
14843
15434
|
|
14844
15435
|
switch (parser->current.type) {
|
14845
15436
|
case PM_CASE_OPERATOR:
|
15437
|
+
saved_param_name = pm_parser_current_param_name_unset(parser);
|
14846
15438
|
pm_parser_scope_push(parser, true);
|
14847
|
-
parser->current_param_name = 0;
|
14848
15439
|
lex_state_set(parser, PM_LEX_STATE_ENDFN);
|
14849
15440
|
parser_lex(parser);
|
15441
|
+
|
14850
15442
|
name = parser->previous;
|
14851
15443
|
break;
|
14852
15444
|
case PM_TOKEN_IDENTIFIER: {
|
@@ -14854,18 +15446,20 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
|
|
14854
15446
|
|
14855
15447
|
if (match2(parser, PM_TOKEN_DOT, PM_TOKEN_COLON_COLON)) {
|
14856
15448
|
receiver = parse_variable_call(parser);
|
15449
|
+
receiver = pm_node_check_it(parser, receiver);
|
14857
15450
|
|
15451
|
+
saved_param_name = pm_parser_current_param_name_unset(parser);
|
14858
15452
|
pm_parser_scope_push(parser, true);
|
14859
|
-
parser->current_param_name = 0;
|
14860
15453
|
lex_state_set(parser, PM_LEX_STATE_FNAME);
|
14861
15454
|
parser_lex(parser);
|
14862
15455
|
|
14863
15456
|
operator = parser->previous;
|
14864
15457
|
name = parse_method_definition_name(parser);
|
14865
15458
|
} else {
|
15459
|
+
saved_param_name = pm_parser_current_param_name_unset(parser);
|
14866
15460
|
pm_refute_numbered_parameter(parser, parser->previous.start, parser->previous.end);
|
14867
15461
|
pm_parser_scope_push(parser, true);
|
14868
|
-
|
15462
|
+
|
14869
15463
|
name = parser->previous;
|
14870
15464
|
}
|
14871
15465
|
|
@@ -14882,9 +15476,10 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
|
|
14882
15476
|
case PM_TOKEN_KEYWORD___FILE__:
|
14883
15477
|
case PM_TOKEN_KEYWORD___LINE__:
|
14884
15478
|
case PM_TOKEN_KEYWORD___ENCODING__: {
|
15479
|
+
saved_param_name = pm_parser_current_param_name_unset(parser);
|
14885
15480
|
pm_parser_scope_push(parser, true);
|
14886
|
-
parser->current_param_name = 0;
|
14887
15481
|
parser_lex(parser);
|
15482
|
+
|
14888
15483
|
pm_token_t identifier = parser->previous;
|
14889
15484
|
|
14890
15485
|
if (match2(parser, PM_TOKEN_DOT, PM_TOKEN_COLON_COLON)) {
|
@@ -14946,6 +15541,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
|
|
14946
15541
|
pm_token_t lparen = parser->previous;
|
14947
15542
|
pm_node_t *expression = parse_value_expression(parser, PM_BINDING_POWER_STATEMENT, true, PM_ERR_DEF_RECEIVER);
|
14948
15543
|
|
15544
|
+
accept1(parser, PM_TOKEN_NEWLINE);
|
14949
15545
|
expect1(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_ERR_EXPECT_RPAREN);
|
14950
15546
|
pm_token_t rparen = parser->previous;
|
14951
15547
|
|
@@ -14955,8 +15551,8 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
|
|
14955
15551
|
operator = parser->previous;
|
14956
15552
|
receiver = (pm_node_t *) pm_parentheses_node_create(parser, &lparen, expression, &rparen);
|
14957
15553
|
|
15554
|
+
saved_param_name = pm_parser_current_param_name_unset(parser);
|
14958
15555
|
pm_parser_scope_push(parser, true);
|
14959
|
-
parser->current_param_name = 0;
|
14960
15556
|
|
14961
15557
|
// To push `PM_CONTEXT_DEF_PARAMS` again is for the same reason as described the above.
|
14962
15558
|
context_push(parser, PM_CONTEXT_DEF_PARAMS);
|
@@ -14964,8 +15560,9 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
|
|
14964
15560
|
break;
|
14965
15561
|
}
|
14966
15562
|
default:
|
15563
|
+
saved_param_name = pm_parser_current_param_name_unset(parser);
|
14967
15564
|
pm_parser_scope_push(parser, true);
|
14968
|
-
|
15565
|
+
|
14969
15566
|
name = parse_method_definition_name(parser);
|
14970
15567
|
break;
|
14971
15568
|
}
|
@@ -15018,8 +15615,6 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
|
|
15018
15615
|
}
|
15019
15616
|
}
|
15020
15617
|
|
15021
|
-
uint32_t locals_body_index = (uint32_t) parser->current_scope->locals.size;
|
15022
|
-
|
15023
15618
|
context_pop(parser);
|
15024
15619
|
pm_node_t *statements = NULL;
|
15025
15620
|
pm_token_t equal;
|
@@ -15070,7 +15665,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
|
|
15070
15665
|
|
15071
15666
|
if (match2(parser, PM_TOKEN_KEYWORD_RESCUE, PM_TOKEN_KEYWORD_ENSURE)) {
|
15072
15667
|
assert(statements == NULL || PM_NODE_TYPE_P(statements, PM_STATEMENTS_NODE));
|
15073
|
-
statements = (pm_node_t *) parse_rescues_as_begin(parser, (pm_statements_node_t *) statements, true);
|
15668
|
+
statements = (pm_node_t *) parse_rescues_as_begin(parser, def_keyword.start, (pm_statements_node_t *) statements, true);
|
15074
15669
|
}
|
15075
15670
|
|
15076
15671
|
pm_accepts_block_stack_pop(parser);
|
@@ -15080,17 +15675,25 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
|
|
15080
15675
|
}
|
15081
15676
|
|
15082
15677
|
pm_constant_id_list_t locals = parser->current_scope->locals;
|
15083
|
-
|
15678
|
+
|
15084
15679
|
pm_parser_scope_pop(parser);
|
15680
|
+
pm_parser_current_param_name_restore(parser, saved_param_name);
|
15681
|
+
|
15682
|
+
/**
|
15683
|
+
* If the final character is @. As is the case when defining
|
15684
|
+
* methods to override the unary operators, we should ignore
|
15685
|
+
* the @ in the same way we do for symbols.
|
15686
|
+
*/
|
15687
|
+
pm_constant_id_t name_id = pm_parser_constant_id_location(parser, name.start, parse_operator_symbol_name(&name));
|
15085
15688
|
|
15086
15689
|
return (pm_node_t *) pm_def_node_create(
|
15087
15690
|
parser,
|
15691
|
+
name_id,
|
15088
15692
|
&name,
|
15089
15693
|
receiver,
|
15090
15694
|
params,
|
15091
15695
|
statements,
|
15092
15696
|
&locals,
|
15093
|
-
locals_body_index,
|
15094
15697
|
&def_keyword,
|
15095
15698
|
&operator,
|
15096
15699
|
&lparen,
|
@@ -15309,9 +15912,9 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
|
|
15309
15912
|
pm_parser_err_token(parser, &name, PM_ERR_MODULE_NAME);
|
15310
15913
|
}
|
15311
15914
|
|
15312
|
-
pm_constant_id_t
|
15313
|
-
parser->current_param_name = 0;
|
15915
|
+
pm_constant_id_t saved_param_name = pm_parser_current_param_name_unset(parser);
|
15314
15916
|
pm_parser_scope_push(parser, true);
|
15917
|
+
|
15315
15918
|
accept2(parser, PM_TOKEN_SEMICOLON, PM_TOKEN_NEWLINE);
|
15316
15919
|
pm_node_t *statements = NULL;
|
15317
15920
|
|
@@ -15323,12 +15926,12 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
|
|
15323
15926
|
|
15324
15927
|
if (match2(parser, PM_TOKEN_KEYWORD_RESCUE, PM_TOKEN_KEYWORD_ENSURE)) {
|
15325
15928
|
assert(statements == NULL || PM_NODE_TYPE_P(statements, PM_STATEMENTS_NODE));
|
15326
|
-
statements = (pm_node_t *) parse_rescues_as_begin(parser, (pm_statements_node_t *) statements, false);
|
15929
|
+
statements = (pm_node_t *) parse_rescues_as_begin(parser, module_keyword.start, (pm_statements_node_t *) statements, false);
|
15327
15930
|
}
|
15328
15931
|
|
15329
15932
|
pm_constant_id_list_t locals = parser->current_scope->locals;
|
15330
15933
|
pm_parser_scope_pop(parser);
|
15331
|
-
parser
|
15934
|
+
pm_parser_current_param_name_restore(parser, saved_param_name);
|
15332
15935
|
|
15333
15936
|
expect1(parser, PM_TOKEN_KEYWORD_END, PM_ERR_MODULE_TERM);
|
15334
15937
|
|
@@ -15914,6 +16517,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
|
|
15914
16517
|
// context of a multiple assignment. We enforce that here. We'll
|
15915
16518
|
// still lex past it though and create a missing node place.
|
15916
16519
|
if (binding_power != PM_BINDING_POWER_STATEMENT) {
|
16520
|
+
pm_parser_err_prefix(parser, diag_id);
|
15917
16521
|
return (pm_node_t *) pm_missing_node_create(parser, parser->previous.start, parser->previous.end);
|
15918
16522
|
}
|
15919
16523
|
|
@@ -15936,7 +16540,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
|
|
15936
16540
|
parser_lex(parser);
|
15937
16541
|
|
15938
16542
|
pm_token_t operator = parser->previous;
|
15939
|
-
pm_node_t *receiver = parse_expression(parser, pm_binding_powers[parser->previous.type].right, binding_power < PM_BINDING_POWER_MATCH,
|
16543
|
+
pm_node_t *receiver = parse_expression(parser, pm_binding_powers[parser->previous.type].right, binding_power < PM_BINDING_POWER_MATCH, PM_ERR_UNARY_RECEIVER);
|
15940
16544
|
pm_call_node_t *node = pm_call_node_unary_create(parser, &operator, receiver, "!");
|
15941
16545
|
|
15942
16546
|
pm_conditional_predicate(receiver);
|
@@ -15946,7 +16550,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
|
|
15946
16550
|
parser_lex(parser);
|
15947
16551
|
|
15948
16552
|
pm_token_t operator = parser->previous;
|
15949
|
-
pm_node_t *receiver = parse_expression(parser, pm_binding_powers[parser->previous.type].right, false,
|
16553
|
+
pm_node_t *receiver = parse_expression(parser, pm_binding_powers[parser->previous.type].right, false, PM_ERR_UNARY_RECEIVER);
|
15950
16554
|
pm_call_node_t *node = pm_call_node_unary_create(parser, &operator, receiver, "~");
|
15951
16555
|
|
15952
16556
|
return (pm_node_t *) node;
|
@@ -15955,7 +16559,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
|
|
15955
16559
|
parser_lex(parser);
|
15956
16560
|
|
15957
16561
|
pm_token_t operator = parser->previous;
|
15958
|
-
pm_node_t *receiver = parse_expression(parser, pm_binding_powers[parser->previous.type].right, false,
|
16562
|
+
pm_node_t *receiver = parse_expression(parser, pm_binding_powers[parser->previous.type].right, false, PM_ERR_UNARY_RECEIVER);
|
15959
16563
|
pm_call_node_t *node = pm_call_node_unary_create(parser, &operator, receiver, "-@");
|
15960
16564
|
|
15961
16565
|
return (pm_node_t *) node;
|
@@ -15964,7 +16568,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
|
|
15964
16568
|
parser_lex(parser);
|
15965
16569
|
|
15966
16570
|
pm_token_t operator = parser->previous;
|
15967
|
-
pm_node_t *node = parse_expression(parser, pm_binding_powers[parser->previous.type].right, false,
|
16571
|
+
pm_node_t *node = parse_expression(parser, pm_binding_powers[parser->previous.type].right, false, PM_ERR_UNARY_RECEIVER);
|
15968
16572
|
|
15969
16573
|
if (accept1(parser, PM_TOKEN_STAR_STAR)) {
|
15970
16574
|
pm_token_t exponent_operator = parser->previous;
|
@@ -15995,7 +16599,9 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
|
|
15995
16599
|
parser_lex(parser);
|
15996
16600
|
|
15997
16601
|
pm_token_t operator = parser->previous;
|
16602
|
+
pm_constant_id_t saved_param_name = pm_parser_current_param_name_unset(parser);
|
15998
16603
|
pm_parser_scope_push(parser, false);
|
16604
|
+
|
15999
16605
|
pm_block_parameters_node_t *block_parameters;
|
16000
16606
|
|
16001
16607
|
switch (parser->current.type) {
|
@@ -16030,12 +16636,6 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
|
|
16030
16636
|
}
|
16031
16637
|
}
|
16032
16638
|
|
16033
|
-
uint32_t locals_body_index = 0;
|
16034
|
-
|
16035
|
-
if (block_parameters) {
|
16036
|
-
locals_body_index = (uint32_t) parser->current_scope->locals.size;
|
16037
|
-
}
|
16038
|
-
|
16039
16639
|
pm_token_t opening;
|
16040
16640
|
pm_node_t *body = NULL;
|
16041
16641
|
parser->lambda_enclosure_nesting = previous_lambda_enclosure_nesting;
|
@@ -16059,7 +16659,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
|
|
16059
16659
|
|
16060
16660
|
if (match2(parser, PM_TOKEN_KEYWORD_RESCUE, PM_TOKEN_KEYWORD_ENSURE)) {
|
16061
16661
|
assert(body == NULL || PM_NODE_TYPE_P(body, PM_STATEMENTS_NODE));
|
16062
|
-
body = (pm_node_t *) parse_rescues_as_begin(parser, (pm_statements_node_t *) body, false);
|
16662
|
+
body = (pm_node_t *) parse_rescues_as_begin(parser, opening.start, (pm_statements_node_t *) body, false);
|
16063
16663
|
}
|
16064
16664
|
|
16065
16665
|
expect1(parser, PM_TOKEN_KEYWORD_END, PM_ERR_LAMBDA_TERM_END);
|
@@ -16070,19 +16670,21 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
|
|
16070
16670
|
|
16071
16671
|
if (parameters == NULL && (maximum > 0)) {
|
16072
16672
|
parameters = (pm_node_t *) pm_numbered_parameters_node_create(parser, &(pm_location_t) { .start = operator.start, .end = parser->previous.end }, maximum);
|
16073
|
-
locals_body_index = maximum;
|
16074
16673
|
}
|
16075
16674
|
|
16076
16675
|
pm_constant_id_list_t locals = parser->current_scope->locals;
|
16676
|
+
|
16077
16677
|
pm_parser_scope_pop(parser);
|
16078
16678
|
pm_accepts_block_stack_pop(parser);
|
16079
|
-
|
16679
|
+
pm_parser_current_param_name_restore(parser, saved_param_name);
|
16680
|
+
|
16681
|
+
return (pm_node_t *) pm_lambda_node_create(parser, &locals, &operator, &opening, &parser->previous, parameters, body);
|
16080
16682
|
}
|
16081
16683
|
case PM_TOKEN_UPLUS: {
|
16082
16684
|
parser_lex(parser);
|
16083
16685
|
|
16084
16686
|
pm_token_t operator = parser->previous;
|
16085
|
-
pm_node_t *receiver = parse_expression(parser, pm_binding_powers[parser->previous.type].right, false,
|
16687
|
+
pm_node_t *receiver = parse_expression(parser, pm_binding_powers[parser->previous.type].right, false, PM_ERR_UNARY_RECEIVER);
|
16086
16688
|
pm_call_node_t *node = pm_call_node_unary_create(parser, &operator, receiver, "+@");
|
16087
16689
|
|
16088
16690
|
return (pm_node_t *) node;
|
@@ -16095,12 +16697,34 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
|
|
16095
16697
|
|
16096
16698
|
return parse_symbol(parser, &lex_mode, PM_LEX_STATE_END);
|
16097
16699
|
}
|
16098
|
-
default:
|
16099
|
-
|
16700
|
+
default: {
|
16701
|
+
pm_context_t recoverable = context_recoverable(parser, &parser->current);
|
16702
|
+
|
16703
|
+
if (recoverable != PM_CONTEXT_NONE) {
|
16100
16704
|
parser->recovering = true;
|
16705
|
+
|
16706
|
+
// If the given error is not the generic one, then we'll add it
|
16707
|
+
// here because it will provide more context in addition to the
|
16708
|
+
// recoverable error that we will also add.
|
16709
|
+
if (diag_id != PM_ERR_CANNOT_PARSE_EXPRESSION) {
|
16710
|
+
pm_parser_err_prefix(parser, diag_id);
|
16711
|
+
}
|
16712
|
+
|
16713
|
+
// If we get here, then we are assuming this token is closing a
|
16714
|
+
// parent context, so we'll indicate that to the user so that
|
16715
|
+
// they know how we behaved.
|
16716
|
+
PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_UNEXPECTED_TOKEN_CLOSE_CONTEXT, pm_token_type_human(parser->current.type), context_human(recoverable));
|
16717
|
+
} else if (diag_id == PM_ERR_CANNOT_PARSE_EXPRESSION) {
|
16718
|
+
// We're going to make a special case here, because "cannot
|
16719
|
+
// parse expression" is pretty generic, and we know here that we
|
16720
|
+
// have an unexpected token.
|
16721
|
+
PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_UNEXPECTED_TOKEN_IGNORE, pm_token_type_human(parser->current.type));
|
16722
|
+
} else {
|
16723
|
+
pm_parser_err_prefix(parser, diag_id);
|
16101
16724
|
}
|
16102
16725
|
|
16103
16726
|
return (pm_node_t *) pm_missing_node_create(parser, parser->previous.start, parser->previous.end);
|
16727
|
+
}
|
16104
16728
|
}
|
16105
16729
|
}
|
16106
16730
|
|
@@ -16145,7 +16769,18 @@ parse_assignment_values(pm_parser_t *parser, pm_binding_power_t previous_binding
|
|
16145
16769
|
if (is_single_value && match1(parser, PM_TOKEN_KEYWORD_RESCUE_MODIFIER)) {
|
16146
16770
|
pm_token_t rescue = parser->current;
|
16147
16771
|
parser_lex(parser);
|
16148
|
-
|
16772
|
+
|
16773
|
+
bool accepts_command_call_inner = false;
|
16774
|
+
|
16775
|
+
// RHS can accept command call iff the value is a call with arguments but without paranthesis.
|
16776
|
+
if (PM_NODE_TYPE_P(value, PM_CALL_NODE)) {
|
16777
|
+
pm_call_node_t *call_node = (pm_call_node_t *)value;
|
16778
|
+
if ((call_node->arguments != NULL) && (call_node->opening_loc.start == NULL)) {
|
16779
|
+
accepts_command_call_inner = true;
|
16780
|
+
}
|
16781
|
+
}
|
16782
|
+
|
16783
|
+
pm_node_t *right = parse_expression(parser, binding_power, accepts_command_call_inner, PM_ERR_RESCUE_MODIFIER_VALUE);
|
16149
16784
|
|
16150
16785
|
return (pm_node_t *) pm_rescue_modifier_node_create(parser, value, &rescue, right);
|
16151
16786
|
}
|
@@ -16330,7 +16965,7 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
|
|
16330
16965
|
switch (PM_NODE_TYPE(node)) {
|
16331
16966
|
case PM_BACK_REFERENCE_READ_NODE:
|
16332
16967
|
case PM_NUMBERED_REFERENCE_READ_NODE:
|
16333
|
-
|
16968
|
+
PM_PARSER_ERR_NODE_FORMAT_CONTENT(parser, node, PM_ERR_WRITE_TARGET_READONLY);
|
16334
16969
|
/* fallthrough */
|
16335
16970
|
case PM_GLOBAL_VARIABLE_READ_NODE: {
|
16336
16971
|
parser_lex(parser);
|
@@ -16412,7 +17047,7 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
|
|
16412
17047
|
}
|
16413
17048
|
|
16414
17049
|
// If this node cannot be writable, then we have an error.
|
16415
|
-
if (pm_call_node_writable_p(cast)) {
|
17050
|
+
if (pm_call_node_writable_p(parser, cast)) {
|
16416
17051
|
parse_write_name(parser, &cast->name);
|
16417
17052
|
} else {
|
16418
17053
|
pm_parser_err_node(parser, node, PM_ERR_WRITE_TARGET_UNEXPECTED);
|
@@ -16441,7 +17076,7 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
|
|
16441
17076
|
switch (PM_NODE_TYPE(node)) {
|
16442
17077
|
case PM_BACK_REFERENCE_READ_NODE:
|
16443
17078
|
case PM_NUMBERED_REFERENCE_READ_NODE:
|
16444
|
-
|
17079
|
+
PM_PARSER_ERR_NODE_FORMAT_CONTENT(parser, node, PM_ERR_WRITE_TARGET_READONLY);
|
16445
17080
|
/* fallthrough */
|
16446
17081
|
case PM_GLOBAL_VARIABLE_READ_NODE: {
|
16447
17082
|
parser_lex(parser);
|
@@ -16523,7 +17158,7 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
|
|
16523
17158
|
}
|
16524
17159
|
|
16525
17160
|
// If this node cannot be writable, then we have an error.
|
16526
|
-
if (pm_call_node_writable_p(cast)) {
|
17161
|
+
if (pm_call_node_writable_p(parser, cast)) {
|
16527
17162
|
parse_write_name(parser, &cast->name);
|
16528
17163
|
} else {
|
16529
17164
|
pm_parser_err_node(parser, node, PM_ERR_WRITE_TARGET_UNEXPECTED);
|
@@ -16562,7 +17197,7 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
|
|
16562
17197
|
switch (PM_NODE_TYPE(node)) {
|
16563
17198
|
case PM_BACK_REFERENCE_READ_NODE:
|
16564
17199
|
case PM_NUMBERED_REFERENCE_READ_NODE:
|
16565
|
-
|
17200
|
+
PM_PARSER_ERR_NODE_FORMAT_CONTENT(parser, node, PM_ERR_WRITE_TARGET_READONLY);
|
16566
17201
|
/* fallthrough */
|
16567
17202
|
case PM_GLOBAL_VARIABLE_READ_NODE: {
|
16568
17203
|
parser_lex(parser);
|
@@ -16644,7 +17279,7 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
|
|
16644
17279
|
}
|
16645
17280
|
|
16646
17281
|
// If this node cannot be writable, then we have an error.
|
16647
|
-
if (pm_call_node_writable_p(cast)) {
|
17282
|
+
if (pm_call_node_writable_p(parser, cast)) {
|
16648
17283
|
parse_write_name(parser, &cast->name);
|
16649
17284
|
} else {
|
16650
17285
|
pm_parser_err_node(parser, node, PM_ERR_WRITE_TARGET_UNEXPECTED);
|
@@ -17063,15 +17698,12 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
|
|
17063
17698
|
*/
|
17064
17699
|
static pm_node_t *
|
17065
17700
|
parse_expression(pm_parser_t *parser, pm_binding_power_t binding_power, bool accepts_command_call, pm_diagnostic_id_t diag_id) {
|
17066
|
-
|
17067
|
-
pm_node_t *node = parse_expression_prefix(parser, binding_power, accepts_command_call);
|
17701
|
+
pm_node_t *node = parse_expression_prefix(parser, binding_power, accepts_command_call, diag_id);
|
17068
17702
|
|
17069
17703
|
switch (PM_NODE_TYPE(node)) {
|
17070
17704
|
case PM_MISSING_NODE:
|
17071
17705
|
// If we found a syntax error, then the type of node returned by
|
17072
|
-
// parse_expression_prefix is going to be a missing node.
|
17073
|
-
// case we need to add the error message to the parser's error list.
|
17074
|
-
pm_parser_err(parser, recovery.end, recovery.end, diag_id);
|
17706
|
+
// parse_expression_prefix is going to be a missing node.
|
17075
17707
|
return node;
|
17076
17708
|
case PM_PRE_EXECUTION_NODE:
|
17077
17709
|
case PM_POST_EXECUTION_NODE:
|
@@ -17080,7 +17712,7 @@ parse_expression(pm_parser_t *parser, pm_binding_power_t binding_power, bool acc
|
|
17080
17712
|
case PM_UNDEF_NODE:
|
17081
17713
|
// These expressions are statements, and cannot be followed by
|
17082
17714
|
// operators (except modifiers).
|
17083
|
-
if (pm_binding_powers[parser->current.type].left >
|
17715
|
+
if (pm_binding_powers[parser->current.type].left > PM_BINDING_POWER_MODIFIER) {
|
17084
17716
|
return node;
|
17085
17717
|
}
|
17086
17718
|
break;
|
@@ -17175,9 +17807,14 @@ parse_expression(pm_parser_t *parser, pm_binding_power_t binding_power, bool acc
|
|
17175
17807
|
|
17176
17808
|
static pm_node_t *
|
17177
17809
|
parse_program(pm_parser_t *parser) {
|
17178
|
-
|
17179
|
-
|
17810
|
+
// If the current scope is NULL, then we want to push a new top level scope.
|
17811
|
+
// The current scope could exist in the event that we are parsing an eval
|
17812
|
+
// and the user has passed into scopes that already exist.
|
17813
|
+
if (parser->current_scope == NULL) {
|
17814
|
+
pm_parser_scope_push(parser, true);
|
17815
|
+
}
|
17180
17816
|
|
17817
|
+
parser_lex(parser);
|
17181
17818
|
pm_statements_node_t *statements = parse_statements(parser, PM_CONTEXT_MAIN);
|
17182
17819
|
if (!statements) {
|
17183
17820
|
statements = pm_statements_node_create(parser);
|
@@ -17224,6 +17861,7 @@ pm_parser_init(pm_parser_t *parser, const uint8_t *source, size_t size, const pm
|
|
17224
17861
|
.current = { .type = PM_TOKEN_EOF, .start = source, .end = source },
|
17225
17862
|
.next_start = NULL,
|
17226
17863
|
.heredoc_end = NULL,
|
17864
|
+
.data_loc = { .start = NULL, .end = NULL },
|
17227
17865
|
.comment_list = { 0 },
|
17228
17866
|
.magic_comment_list = { 0 },
|
17229
17867
|
.warning_list = { 0 },
|
@@ -17234,7 +17872,7 @@ pm_parser_init(pm_parser_t *parser, const uint8_t *source, size_t size, const pm
|
|
17234
17872
|
.encoding_changed_callback = NULL,
|
17235
17873
|
.encoding_comment_start = source,
|
17236
17874
|
.lex_callback = NULL,
|
17237
|
-
.
|
17875
|
+
.filepath = { 0 },
|
17238
17876
|
.constant_pool = { 0 },
|
17239
17877
|
.newline_list = { 0 },
|
17240
17878
|
.integer_base = 0,
|
@@ -17248,8 +17886,7 @@ pm_parser_init(pm_parser_t *parser, const uint8_t *source, size_t size, const pm
|
|
17248
17886
|
.in_keyword_arg = false,
|
17249
17887
|
.current_param_name = 0,
|
17250
17888
|
.semantic_token_seen = false,
|
17251
|
-
.frozen_string_literal = false
|
17252
|
-
.suppress_warnings = false
|
17889
|
+
.frozen_string_literal = false
|
17253
17890
|
};
|
17254
17891
|
|
17255
17892
|
// Initialize the constant pool. We're going to completely guess as to the
|
@@ -17278,7 +17915,7 @@ pm_parser_init(pm_parser_t *parser, const uint8_t *source, size_t size, const pm
|
|
17278
17915
|
// If options were provided to this parse, establish them here.
|
17279
17916
|
if (options != NULL) {
|
17280
17917
|
// filepath option
|
17281
|
-
parser->
|
17918
|
+
parser->filepath = options->filepath;
|
17282
17919
|
|
17283
17920
|
// line option
|
17284
17921
|
parser->start_line = options->line;
|
@@ -17295,10 +17932,8 @@ pm_parser_init(pm_parser_t *parser, const uint8_t *source, size_t size, const pm
|
|
17295
17932
|
parser->frozen_string_literal = true;
|
17296
17933
|
}
|
17297
17934
|
|
17298
|
-
//
|
17299
|
-
|
17300
|
-
parser->suppress_warnings = true;
|
17301
|
-
}
|
17935
|
+
// version option
|
17936
|
+
parser->version = options->version;
|
17302
17937
|
|
17303
17938
|
// scopes option
|
17304
17939
|
for (size_t scope_index = 0; scope_index < options->scopes_count; scope_index++) {
|
@@ -17382,7 +18017,7 @@ pm_magic_comment_list_free(pm_list_t *list) {
|
|
17382
18017
|
*/
|
17383
18018
|
PRISM_EXPORTED_FUNCTION void
|
17384
18019
|
pm_parser_free(pm_parser_t *parser) {
|
17385
|
-
pm_string_free(&parser->
|
18020
|
+
pm_string_free(&parser->filepath);
|
17386
18021
|
pm_diagnostic_list_free(&parser->error_list);
|
17387
18022
|
pm_diagnostic_list_free(&parser->warning_list);
|
17388
18023
|
pm_comment_list_free(&parser->comment_list);
|
@@ -17484,3 +18119,303 @@ pm_serialize_parse_comments(pm_buffer_t *buffer, const uint8_t *source, size_t s
|
|
17484
18119
|
#undef PM_LOCATION_NODE_VALUE
|
17485
18120
|
#undef PM_LOCATION_NULL_VALUE
|
17486
18121
|
#undef PM_LOCATION_TOKEN_VALUE
|
18122
|
+
|
18123
|
+
/** An error that is going to be formatted into the output. */
|
18124
|
+
typedef struct {
|
18125
|
+
/** A pointer to the diagnostic that was generated during parsing. */
|
18126
|
+
pm_diagnostic_t *error;
|
18127
|
+
|
18128
|
+
/** The start line of the diagnostic message. */
|
18129
|
+
int32_t line;
|
18130
|
+
|
18131
|
+
/** The column start of the diagnostic message. */
|
18132
|
+
uint32_t column_start;
|
18133
|
+
|
18134
|
+
/** The column end of the diagnostic message. */
|
18135
|
+
uint32_t column_end;
|
18136
|
+
} pm_error_t;
|
18137
|
+
|
18138
|
+
/** The format that will be used to format the errors into the output. */
|
18139
|
+
typedef struct {
|
18140
|
+
/** The prefix that will be used for line numbers. */
|
18141
|
+
const char *number_prefix;
|
18142
|
+
|
18143
|
+
/** The prefix that will be used for blank lines. */
|
18144
|
+
const char *blank_prefix;
|
18145
|
+
|
18146
|
+
/** The divider that will be used between sections of source code. */
|
18147
|
+
const char *divider;
|
18148
|
+
|
18149
|
+
/** The length of the blank prefix. */
|
18150
|
+
size_t blank_prefix_length;
|
18151
|
+
|
18152
|
+
/** The length of the divider. */
|
18153
|
+
size_t divider_length;
|
18154
|
+
} pm_error_format_t;
|
18155
|
+
|
18156
|
+
#define PM_COLOR_GRAY "\033[38;5;102m"
|
18157
|
+
#define PM_COLOR_RED "\033[1;31m"
|
18158
|
+
#define PM_COLOR_RESET "\033[0m"
|
18159
|
+
|
18160
|
+
static inline pm_error_t *
|
18161
|
+
pm_parser_errors_format_sort(const pm_parser_t *parser, const pm_list_t *error_list, const pm_newline_list_t *newline_list) {
|
18162
|
+
pm_error_t *errors = calloc(error_list->size, sizeof(pm_error_t));
|
18163
|
+
int32_t start_line = parser->start_line;
|
18164
|
+
|
18165
|
+
for (pm_diagnostic_t *error = (pm_diagnostic_t *) error_list->head; error != NULL; error = (pm_diagnostic_t *) error->node.next) {
|
18166
|
+
pm_line_column_t start = pm_newline_list_line_column(newline_list, error->location.start, start_line);
|
18167
|
+
pm_line_column_t end = pm_newline_list_line_column(newline_list, error->location.end, start_line);
|
18168
|
+
|
18169
|
+
// We're going to insert this error into the array in sorted order. We
|
18170
|
+
// do this by finding the first error that has a line number greater
|
18171
|
+
// than the current error and then inserting the current error before
|
18172
|
+
// that one.
|
18173
|
+
size_t index = 0;
|
18174
|
+
while (
|
18175
|
+
(index < error_list->size) &&
|
18176
|
+
(errors[index].error != NULL) &&
|
18177
|
+
(
|
18178
|
+
(errors[index].line < start.line) ||
|
18179
|
+
((errors[index].line == start.line) && (errors[index].column_start < start.column))
|
18180
|
+
)
|
18181
|
+
) index++;
|
18182
|
+
|
18183
|
+
// Now we're going to shift all of the errors after this one down one
|
18184
|
+
// index to make room for the new error.
|
18185
|
+
if (index + 1 < error_list->size) {
|
18186
|
+
memmove(&errors[index + 1], &errors[index], sizeof(pm_error_t) * (error_list->size - index - 1));
|
18187
|
+
}
|
18188
|
+
|
18189
|
+
// Finally, we'll insert the error into the array.
|
18190
|
+
uint32_t column_end;
|
18191
|
+
if (start.line == end.line) {
|
18192
|
+
column_end = end.column;
|
18193
|
+
} else {
|
18194
|
+
column_end = (uint32_t) (newline_list->offsets[start.line - start_line + 1] - newline_list->offsets[start.line - start_line] - 1);
|
18195
|
+
}
|
18196
|
+
|
18197
|
+
// Ensure we have at least one column of error.
|
18198
|
+
if (start.column == column_end) column_end++;
|
18199
|
+
|
18200
|
+
errors[index] = (pm_error_t) {
|
18201
|
+
.error = error,
|
18202
|
+
.line = start.line,
|
18203
|
+
.column_start = start.column,
|
18204
|
+
.column_end = column_end
|
18205
|
+
};
|
18206
|
+
}
|
18207
|
+
|
18208
|
+
return errors;
|
18209
|
+
}
|
18210
|
+
|
18211
|
+
static inline void
|
18212
|
+
pm_parser_errors_format_line(const pm_parser_t *parser, const pm_newline_list_t *newline_list, const char *number_prefix, int32_t line, pm_buffer_t *buffer) {
|
18213
|
+
size_t index = (size_t) (line - parser->start_line);
|
18214
|
+
|
18215
|
+
const uint8_t *start = &parser->start[newline_list->offsets[index]];
|
18216
|
+
const uint8_t *end;
|
18217
|
+
|
18218
|
+
if (index >= newline_list->size - 1) {
|
18219
|
+
end = parser->end;
|
18220
|
+
} else {
|
18221
|
+
end = &parser->start[newline_list->offsets[index + 1]];
|
18222
|
+
}
|
18223
|
+
|
18224
|
+
pm_buffer_append_format(buffer, number_prefix, line);
|
18225
|
+
pm_buffer_append_string(buffer, (const char *) start, (size_t) (end - start));
|
18226
|
+
|
18227
|
+
if (end == parser->end && end[-1] != '\n') {
|
18228
|
+
pm_buffer_append_string(buffer, "\n", 1);
|
18229
|
+
}
|
18230
|
+
}
|
18231
|
+
|
18232
|
+
/**
|
18233
|
+
* Format the errors on the parser into the given buffer.
|
18234
|
+
*/
|
18235
|
+
PRISM_EXPORTED_FUNCTION void
|
18236
|
+
pm_parser_errors_format(const pm_parser_t *parser, pm_buffer_t *buffer, bool colorize) {
|
18237
|
+
const pm_list_t *error_list = &parser->error_list;
|
18238
|
+
assert(error_list->size != 0);
|
18239
|
+
|
18240
|
+
// First, we're going to sort all of the errors by line number using an
|
18241
|
+
// insertion sort into a newly allocated array.
|
18242
|
+
const int32_t start_line = parser->start_line;
|
18243
|
+
const pm_newline_list_t *newline_list = &parser->newline_list;
|
18244
|
+
pm_error_t *errors = pm_parser_errors_format_sort(parser, error_list, newline_list);
|
18245
|
+
|
18246
|
+
// Now we're going to determine how we're going to format line numbers and
|
18247
|
+
// blank lines based on the maximum number of digits in the line numbers
|
18248
|
+
// that are going to be displayed.
|
18249
|
+
pm_error_format_t error_format;
|
18250
|
+
int32_t max_line_number = errors[error_list->size - 1].line - start_line;
|
18251
|
+
|
18252
|
+
if (max_line_number < 10) {
|
18253
|
+
if (colorize) {
|
18254
|
+
error_format = (pm_error_format_t) {
|
18255
|
+
.number_prefix = PM_COLOR_GRAY "%1" PRIi32 " | " PM_COLOR_RESET,
|
18256
|
+
.blank_prefix = PM_COLOR_GRAY " | " PM_COLOR_RESET,
|
18257
|
+
.divider = PM_COLOR_GRAY " ~~~~~" PM_COLOR_RESET "\n"
|
18258
|
+
};
|
18259
|
+
} else {
|
18260
|
+
error_format = (pm_error_format_t) {
|
18261
|
+
.number_prefix = "%1" PRIi32 " | ",
|
18262
|
+
.blank_prefix = " | ",
|
18263
|
+
.divider = " ~~~~~\n"
|
18264
|
+
};
|
18265
|
+
}
|
18266
|
+
} else if (max_line_number < 100) {
|
18267
|
+
if (colorize) {
|
18268
|
+
error_format = (pm_error_format_t) {
|
18269
|
+
.number_prefix = PM_COLOR_GRAY "%2" PRIi32 " | " PM_COLOR_RESET,
|
18270
|
+
.blank_prefix = PM_COLOR_GRAY " | " PM_COLOR_RESET,
|
18271
|
+
.divider = PM_COLOR_GRAY " ~~~~~~" PM_COLOR_RESET "\n"
|
18272
|
+
};
|
18273
|
+
} else {
|
18274
|
+
error_format = (pm_error_format_t) {
|
18275
|
+
.number_prefix = "%2" PRIi32 " | ",
|
18276
|
+
.blank_prefix = " | ",
|
18277
|
+
.divider = " ~~~~~~\n"
|
18278
|
+
};
|
18279
|
+
}
|
18280
|
+
} else if (max_line_number < 1000) {
|
18281
|
+
if (colorize) {
|
18282
|
+
error_format = (pm_error_format_t) {
|
18283
|
+
.number_prefix = PM_COLOR_GRAY "%3" PRIi32 " | " PM_COLOR_RESET,
|
18284
|
+
.blank_prefix = PM_COLOR_GRAY " | " PM_COLOR_RESET,
|
18285
|
+
.divider = PM_COLOR_GRAY " ~~~~~~~" PM_COLOR_RESET "\n"
|
18286
|
+
};
|
18287
|
+
} else {
|
18288
|
+
error_format = (pm_error_format_t) {
|
18289
|
+
.number_prefix = "%3" PRIi32 " | ",
|
18290
|
+
.blank_prefix = " | ",
|
18291
|
+
.divider = " ~~~~~~~\n"
|
18292
|
+
};
|
18293
|
+
}
|
18294
|
+
} else if (max_line_number < 10000) {
|
18295
|
+
if (colorize) {
|
18296
|
+
error_format = (pm_error_format_t) {
|
18297
|
+
.number_prefix = PM_COLOR_GRAY "%4" PRIi32 " | " PM_COLOR_RESET,
|
18298
|
+
.blank_prefix = PM_COLOR_GRAY " | " PM_COLOR_RESET,
|
18299
|
+
.divider = PM_COLOR_GRAY " ~~~~~~~~" PM_COLOR_RESET "\n"
|
18300
|
+
};
|
18301
|
+
} else {
|
18302
|
+
error_format = (pm_error_format_t) {
|
18303
|
+
.number_prefix = "%4" PRIi32 " | ",
|
18304
|
+
.blank_prefix = " | ",
|
18305
|
+
.divider = " ~~~~~~~~\n"
|
18306
|
+
};
|
18307
|
+
}
|
18308
|
+
} else {
|
18309
|
+
if (colorize) {
|
18310
|
+
error_format = (pm_error_format_t) {
|
18311
|
+
.number_prefix = PM_COLOR_GRAY "%5" PRIi32 " | " PM_COLOR_RESET,
|
18312
|
+
.blank_prefix = PM_COLOR_GRAY " | " PM_COLOR_RESET,
|
18313
|
+
.divider = PM_COLOR_GRAY " ~~~~~~~~" PM_COLOR_RESET "\n"
|
18314
|
+
};
|
18315
|
+
} else {
|
18316
|
+
error_format = (pm_error_format_t) {
|
18317
|
+
.number_prefix = "%5" PRIi32 " | ",
|
18318
|
+
.blank_prefix = " | ",
|
18319
|
+
.divider = " ~~~~~~~~\n"
|
18320
|
+
};
|
18321
|
+
}
|
18322
|
+
}
|
18323
|
+
|
18324
|
+
error_format.blank_prefix_length = strlen(error_format.blank_prefix);
|
18325
|
+
error_format.divider_length = strlen(error_format.divider);
|
18326
|
+
|
18327
|
+
// Now we're going to iterate through every error in our error list and
|
18328
|
+
// display it. While we're iterating, we will display some padding lines of
|
18329
|
+
// the source before the error to give some context. We'll be careful not to
|
18330
|
+
// display the same line twice in case the errors are close enough in the
|
18331
|
+
// source.
|
18332
|
+
int32_t last_line = 0;
|
18333
|
+
const pm_encoding_t *encoding = parser->encoding;
|
18334
|
+
|
18335
|
+
for (size_t index = 0; index < error_list->size; index++) {
|
18336
|
+
pm_error_t *error = &errors[index];
|
18337
|
+
|
18338
|
+
// Here we determine how many lines of padding of the source to display,
|
18339
|
+
// based on the difference from the last line that was displayed.
|
18340
|
+
if (error->line - last_line > 1) {
|
18341
|
+
if (error->line - last_line > 2) {
|
18342
|
+
if ((index != 0) && (error->line - last_line > 3)) {
|
18343
|
+
pm_buffer_append_string(buffer, error_format.divider, error_format.divider_length);
|
18344
|
+
}
|
18345
|
+
|
18346
|
+
pm_buffer_append_string(buffer, " ", 2);
|
18347
|
+
pm_parser_errors_format_line(parser, newline_list, error_format.number_prefix, error->line - 2, buffer);
|
18348
|
+
}
|
18349
|
+
|
18350
|
+
pm_buffer_append_string(buffer, " ", 2);
|
18351
|
+
pm_parser_errors_format_line(parser, newline_list, error_format.number_prefix, error->line - 1, buffer);
|
18352
|
+
}
|
18353
|
+
|
18354
|
+
// If this is the first error or we're on a new line, then we'll display
|
18355
|
+
// the line that has the error in it.
|
18356
|
+
if ((index == 0) || (error->line != last_line)) {
|
18357
|
+
if (colorize) {
|
18358
|
+
pm_buffer_append_string(buffer, PM_COLOR_RED "> " PM_COLOR_RESET, 13);
|
18359
|
+
} else {
|
18360
|
+
pm_buffer_append_string(buffer, "> ", 2);
|
18361
|
+
}
|
18362
|
+
pm_parser_errors_format_line(parser, newline_list, error_format.number_prefix, error->line, buffer);
|
18363
|
+
}
|
18364
|
+
|
18365
|
+
// Now we'll display the actual error message. We'll do this by first
|
18366
|
+
// putting the prefix to the line, then a bunch of blank spaces
|
18367
|
+
// depending on the column, then as many carets as we need to display
|
18368
|
+
// the width of the error, then the error message itself.
|
18369
|
+
//
|
18370
|
+
// Note that this doesn't take into account the width of the actual
|
18371
|
+
// character when displayed in the terminal. For some east-asian
|
18372
|
+
// languages or emoji, this means it can be thrown off pretty badly. We
|
18373
|
+
// will need to solve this eventually.
|
18374
|
+
pm_buffer_append_string(buffer, " ", 2);
|
18375
|
+
pm_buffer_append_string(buffer, error_format.blank_prefix, error_format.blank_prefix_length);
|
18376
|
+
|
18377
|
+
size_t column = 0;
|
18378
|
+
const uint8_t *start = &parser->start[newline_list->offsets[error->line - start_line]];
|
18379
|
+
|
18380
|
+
while (column < error->column_end) {
|
18381
|
+
if (column < error->column_start) {
|
18382
|
+
pm_buffer_append_byte(buffer, ' ');
|
18383
|
+
} else if (colorize) {
|
18384
|
+
pm_buffer_append_string(buffer, PM_COLOR_RED "^" PM_COLOR_RESET, 12);
|
18385
|
+
} else {
|
18386
|
+
pm_buffer_append_byte(buffer, '^');
|
18387
|
+
}
|
18388
|
+
|
18389
|
+
size_t char_width = encoding->char_width(start + column, parser->end - (start + column));
|
18390
|
+
column += (char_width == 0 ? 1 : char_width);
|
18391
|
+
}
|
18392
|
+
|
18393
|
+
pm_buffer_append_byte(buffer, ' ');
|
18394
|
+
|
18395
|
+
const char *message = error->error->message;
|
18396
|
+
pm_buffer_append_string(buffer, message, strlen(message));
|
18397
|
+
pm_buffer_append_byte(buffer, '\n');
|
18398
|
+
|
18399
|
+
// Here we determine how many lines of padding to display after the
|
18400
|
+
// error, depending on where the next error is in source.
|
18401
|
+
last_line = error->line;
|
18402
|
+
int32_t next_line = (index == error_list->size - 1) ? ((int32_t) newline_list->size) : errors[index + 1].line;
|
18403
|
+
|
18404
|
+
if (next_line - last_line > 1) {
|
18405
|
+
pm_buffer_append_string(buffer, " ", 2);
|
18406
|
+
pm_parser_errors_format_line(parser, newline_list, error_format.number_prefix, ++last_line, buffer);
|
18407
|
+
}
|
18408
|
+
|
18409
|
+
if (next_line - last_line > 1) {
|
18410
|
+
pm_buffer_append_string(buffer, " ", 2);
|
18411
|
+
pm_parser_errors_format_line(parser, newline_list, error_format.number_prefix, ++last_line, buffer);
|
18412
|
+
}
|
18413
|
+
}
|
18414
|
+
|
18415
|
+
// Finally, we'll free the array of errors that we allocated.
|
18416
|
+
free(errors);
|
18417
|
+
}
|
18418
|
+
|
18419
|
+
#undef PM_COLOR_GRAY
|
18420
|
+
#undef PM_COLOR_RED
|
18421
|
+
#undef PM_COLOR_RESET
|