prism 0.19.0 → 0.24.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +102 -1
- data/Makefile +5 -0
- data/README.md +9 -6
- data/config.yml +236 -38
- data/docs/build_system.md +19 -2
- data/docs/cruby_compilation.md +27 -0
- data/docs/parser_translation.md +34 -0
- data/docs/parsing_rules.md +19 -0
- data/docs/releasing.md +84 -16
- data/docs/ruby_api.md +1 -1
- data/docs/ruby_parser_translation.md +19 -0
- data/docs/serialization.md +19 -5
- data/ext/prism/api_node.c +1989 -1525
- data/ext/prism/extension.c +130 -30
- data/ext/prism/extension.h +2 -2
- data/include/prism/ast.h +1700 -505
- data/include/prism/defines.h +8 -0
- data/include/prism/diagnostic.h +49 -7
- data/include/prism/encoding.h +17 -0
- data/include/prism/options.h +40 -14
- data/include/prism/parser.h +34 -18
- data/include/prism/util/pm_buffer.h +9 -0
- data/include/prism/util/pm_constant_pool.h +18 -0
- data/include/prism/util/pm_newline_list.h +4 -14
- data/include/prism/util/pm_strpbrk.h +4 -1
- data/include/prism/version.h +2 -2
- data/include/prism.h +19 -2
- data/lib/prism/debug.rb +11 -5
- data/lib/prism/desugar_compiler.rb +225 -80
- data/lib/prism/dot_visitor.rb +36 -14
- data/lib/prism/dsl.rb +302 -299
- data/lib/prism/ffi.rb +107 -76
- data/lib/prism/lex_compat.rb +17 -1
- data/lib/prism/node.rb +4580 -2607
- data/lib/prism/node_ext.rb +27 -4
- data/lib/prism/parse_result.rb +75 -29
- data/lib/prism/serialize.rb +633 -305
- data/lib/prism/translation/parser/compiler.rb +1838 -0
- data/lib/prism/translation/parser/lexer.rb +335 -0
- data/lib/prism/translation/parser/rubocop.rb +45 -0
- data/lib/prism/translation/parser.rb +190 -0
- data/lib/prism/translation/parser33.rb +12 -0
- data/lib/prism/translation/parser34.rb +12 -0
- data/lib/prism/translation/ripper.rb +696 -0
- data/lib/prism/translation/ruby_parser.rb +1521 -0
- data/lib/prism/translation.rb +11 -0
- data/lib/prism.rb +1 -1
- data/prism.gemspec +18 -7
- data/rbi/prism.rbi +150 -88
- data/rbi/prism_static.rbi +15 -3
- data/sig/prism.rbs +996 -961
- data/sig/prism_static.rbs +123 -46
- data/src/diagnostic.c +264 -219
- data/src/encoding.c +21 -26
- data/src/node.c +2 -6
- data/src/options.c +29 -5
- data/src/prettyprint.c +176 -44
- data/src/prism.c +1499 -564
- data/src/serialize.c +35 -21
- data/src/token_type.c +353 -4
- data/src/util/pm_buffer.c +11 -0
- data/src/util/pm_constant_pool.c +37 -11
- data/src/util/pm_newline_list.c +6 -15
- data/src/util/pm_string.c +0 -7
- data/src/util/pm_strpbrk.c +122 -14
- metadata +16 -5
- data/docs/building.md +0 -29
- data/lib/prism/ripper_compat.rb +0 -207
data/src/prism.c
CHANGED
@@ -51,6 +51,7 @@ debug_context(pm_context_t context) {
|
|
51
51
|
case PM_CONTEXT_IF: return "IF";
|
52
52
|
case PM_CONTEXT_MAIN: return "MAIN";
|
53
53
|
case PM_CONTEXT_MODULE: return "MODULE";
|
54
|
+
case PM_CONTEXT_NONE: return "NONE";
|
54
55
|
case PM_CONTEXT_PARENS: return "PARENS";
|
55
56
|
case PM_CONTEXT_POSTEXE: return "POSTEXE";
|
56
57
|
case PM_CONTEXT_PREDICATE: return "PREDICATE";
|
@@ -164,7 +165,7 @@ debug_state(pm_parser_t *parser) {
|
|
164
165
|
|
165
166
|
PRISM_ATTRIBUTE_UNUSED static void
|
166
167
|
debug_token(pm_token_t * token) {
|
167
|
-
fprintf(stderr, "%s: \"%.*s\"\n",
|
168
|
+
fprintf(stderr, "%s: \"%.*s\"\n", pm_token_type_human(token->type), (int) (token->end - token->start), token->start);
|
168
169
|
}
|
169
170
|
|
170
171
|
#endif
|
@@ -423,6 +424,11 @@ lex_state_beg_p(pm_parser_t *parser) {
|
|
423
424
|
return lex_state_p(parser, PM_LEX_STATE_BEG_ANY) || ((parser->lex_state & (PM_LEX_STATE_ARG | PM_LEX_STATE_LABELED)) == (PM_LEX_STATE_ARG | PM_LEX_STATE_LABELED));
|
424
425
|
}
|
425
426
|
|
427
|
+
static inline bool
|
428
|
+
lex_state_arg_labeled_p(pm_parser_t *parser) {
|
429
|
+
return (parser->lex_state & (PM_LEX_STATE_ARG | PM_LEX_STATE_LABELED)) == (PM_LEX_STATE_ARG | PM_LEX_STATE_LABELED);
|
430
|
+
}
|
431
|
+
|
426
432
|
static inline bool
|
427
433
|
lex_state_arg_p(pm_parser_t *parser) {
|
428
434
|
return lex_state_p(parser, PM_LEX_STATE_ARG_ANY);
|
@@ -487,7 +493,8 @@ pm_parser_err(pm_parser_t *parser, const uint8_t *start, const uint8_t *end, pm_
|
|
487
493
|
/**
|
488
494
|
* Append an error to the list of errors on the parser using a format string.
|
489
495
|
*/
|
490
|
-
#define PM_PARSER_ERR_FORMAT(parser, start, end, diag_id, ...)
|
496
|
+
#define PM_PARSER_ERR_FORMAT(parser, start, end, diag_id, ...) \
|
497
|
+
pm_diagnostic_list_append_format(&parser->error_list, start, end, diag_id, __VA_ARGS__)
|
491
498
|
|
492
499
|
/**
|
493
500
|
* Append an error to the list of errors on the parser using the location of the
|
@@ -502,7 +509,8 @@ pm_parser_err_current(pm_parser_t *parser, pm_diagnostic_id_t diag_id) {
|
|
502
509
|
* Append an error to the list of errors on the parser using the given location
|
503
510
|
* using a format string.
|
504
511
|
*/
|
505
|
-
#define PM_PARSER_ERR_LOCATION_FORMAT(parser, location, diag_id, ...)
|
512
|
+
#define PM_PARSER_ERR_LOCATION_FORMAT(parser, location, diag_id, ...) \
|
513
|
+
PM_PARSER_ERR_FORMAT(parser, (location)->start, (location)->end, diag_id, __VA_ARGS__)
|
506
514
|
|
507
515
|
/**
|
508
516
|
* Append an error to the list of errors on the parser using the location of the
|
@@ -517,7 +525,15 @@ pm_parser_err_node(pm_parser_t *parser, const pm_node_t *node, pm_diagnostic_id_
|
|
517
525
|
* Append an error to the list of errors on the parser using the location of the
|
518
526
|
* given node and a format string.
|
519
527
|
*/
|
520
|
-
#define PM_PARSER_ERR_NODE_FORMAT(parser, node, diag_id, ...)
|
528
|
+
#define PM_PARSER_ERR_NODE_FORMAT(parser, node, diag_id, ...) \
|
529
|
+
PM_PARSER_ERR_FORMAT(parser, (node)->location.start, (node)->location.end, diag_id, __VA_ARGS__)
|
530
|
+
|
531
|
+
/**
|
532
|
+
* Append an error to the list of errors on the parser using the location of the
|
533
|
+
* given node and a format string, and add on the content of the node.
|
534
|
+
*/
|
535
|
+
#define PM_PARSER_ERR_NODE_FORMAT_CONTENT(parser, node, diag_id) \
|
536
|
+
PM_PARSER_ERR_NODE_FORMAT(parser, node, diag_id, (int) ((node)->location.end - (node)->location.start), (const char *) (node)->location.start)
|
521
537
|
|
522
538
|
/**
|
523
539
|
* Append an error to the list of errors on the parser using the location of the
|
@@ -541,16 +557,22 @@ pm_parser_err_token(pm_parser_t *parser, const pm_token_t *token, pm_diagnostic_
|
|
541
557
|
* Append an error to the list of errors on the parser using the location of the
|
542
558
|
* given token and a format string.
|
543
559
|
*/
|
544
|
-
#define PM_PARSER_ERR_TOKEN_FORMAT(parser, token, diag_id, ...)
|
560
|
+
#define PM_PARSER_ERR_TOKEN_FORMAT(parser, token, diag_id, ...) \
|
561
|
+
PM_PARSER_ERR_FORMAT(parser, (token).start, (token).end, diag_id, __VA_ARGS__)
|
562
|
+
|
563
|
+
/**
|
564
|
+
* Append an error to the list of errors on the parser using the location of the
|
565
|
+
* given token and a format string, and add on the content of the token.
|
566
|
+
*/
|
567
|
+
#define PM_PARSER_ERR_TOKEN_FORMAT_CONTENT(parser, token, diag_id) \
|
568
|
+
PM_PARSER_ERR_TOKEN_FORMAT(parser, token, diag_id, (int) ((token).end - (token).start), (const char *) (token).start)
|
545
569
|
|
546
570
|
/**
|
547
571
|
* Append a warning to the list of warnings on the parser.
|
548
572
|
*/
|
549
573
|
static inline void
|
550
574
|
pm_parser_warn(pm_parser_t *parser, const uint8_t *start, const uint8_t *end, pm_diagnostic_id_t diag_id) {
|
551
|
-
|
552
|
-
pm_diagnostic_list_append(&parser->warning_list, start, end, diag_id);
|
553
|
-
}
|
575
|
+
pm_diagnostic_list_append(&parser->warning_list, start, end, diag_id);
|
554
576
|
}
|
555
577
|
|
556
578
|
/**
|
@@ -813,6 +835,9 @@ typedef struct {
|
|
813
835
|
|
814
836
|
/** The optional block attached to the call. */
|
815
837
|
pm_node_t *block;
|
838
|
+
|
839
|
+
/** The flag indicating whether this arguments list has forwarding argument. */
|
840
|
+
bool has_forwarding;
|
816
841
|
} pm_arguments_t;
|
817
842
|
|
818
843
|
/**
|
@@ -864,6 +889,105 @@ pm_arguments_validate_block(pm_parser_t *parser, pm_arguments_t *arguments, pm_b
|
|
864
889
|
pm_parser_err_node(parser, (pm_node_t *) block, PM_ERR_ARGUMENT_UNEXPECTED_BLOCK);
|
865
890
|
}
|
866
891
|
|
892
|
+
/******************************************************************************/
|
893
|
+
/* Basic character checks */
|
894
|
+
/******************************************************************************/
|
895
|
+
|
896
|
+
/**
|
897
|
+
* This function is used extremely frequently to lex all of the identifiers in a
|
898
|
+
* source file, so it's important that it be as fast as possible. For this
|
899
|
+
* reason we have the encoding_changed boolean to check if we need to go through
|
900
|
+
* the function pointer or can just directly use the UTF-8 functions.
|
901
|
+
*/
|
902
|
+
static inline size_t
|
903
|
+
char_is_identifier_start(const pm_parser_t *parser, const uint8_t *b) {
|
904
|
+
if (parser->encoding_changed) {
|
905
|
+
size_t width;
|
906
|
+
if ((width = parser->encoding->alpha_char(b, parser->end - b)) != 0) {
|
907
|
+
return width;
|
908
|
+
} else if (*b == '_') {
|
909
|
+
return 1;
|
910
|
+
} else if (*b >= 0x80) {
|
911
|
+
return parser->encoding->char_width(b, parser->end - b);
|
912
|
+
} else {
|
913
|
+
return 0;
|
914
|
+
}
|
915
|
+
} else if (*b < 0x80) {
|
916
|
+
return (pm_encoding_unicode_table[*b] & PRISM_ENCODING_ALPHABETIC_BIT ? 1 : 0) || (*b == '_');
|
917
|
+
} else {
|
918
|
+
return pm_encoding_utf_8_char_width(b, parser->end - b);
|
919
|
+
}
|
920
|
+
}
|
921
|
+
|
922
|
+
/**
|
923
|
+
* Similar to char_is_identifier but this function assumes that the encoding
|
924
|
+
* has not been changed.
|
925
|
+
*/
|
926
|
+
static inline size_t
|
927
|
+
char_is_identifier_utf8(const uint8_t *b, const uint8_t *end) {
|
928
|
+
if (*b < 0x80) {
|
929
|
+
return (*b == '_') || (pm_encoding_unicode_table[*b] & PRISM_ENCODING_ALPHANUMERIC_BIT ? 1 : 0);
|
930
|
+
} else {
|
931
|
+
return pm_encoding_utf_8_char_width(b, end - b);
|
932
|
+
}
|
933
|
+
}
|
934
|
+
|
935
|
+
/**
|
936
|
+
* Like the above, this function is also used extremely frequently to lex all of
|
937
|
+
* the identifiers in a source file once the first character has been found. So
|
938
|
+
* it's important that it be as fast as possible.
|
939
|
+
*/
|
940
|
+
static inline size_t
|
941
|
+
char_is_identifier(pm_parser_t *parser, const uint8_t *b) {
|
942
|
+
if (parser->encoding_changed) {
|
943
|
+
size_t width;
|
944
|
+
if ((width = parser->encoding->alnum_char(b, parser->end - b)) != 0) {
|
945
|
+
return width;
|
946
|
+
} else if (*b == '_') {
|
947
|
+
return 1;
|
948
|
+
} else if (*b >= 0x80) {
|
949
|
+
return parser->encoding->char_width(b, parser->end - b);
|
950
|
+
} else {
|
951
|
+
return 0;
|
952
|
+
}
|
953
|
+
}
|
954
|
+
return char_is_identifier_utf8(b, parser->end);
|
955
|
+
}
|
956
|
+
|
957
|
+
// Here we're defining a perfect hash for the characters that are allowed in
|
958
|
+
// global names. This is used to quickly check the next character after a $ to
|
959
|
+
// see if it's a valid character for a global name.
|
960
|
+
#define BIT(c, idx) (((c) / 32 - 1 == idx) ? (1U << ((c) % 32)) : 0)
|
961
|
+
#define PUNCT(idx) ( \
|
962
|
+
BIT('~', idx) | BIT('*', idx) | BIT('$', idx) | BIT('?', idx) | \
|
963
|
+
BIT('!', idx) | BIT('@', idx) | BIT('/', idx) | BIT('\\', idx) | \
|
964
|
+
BIT(';', idx) | BIT(',', idx) | BIT('.', idx) | BIT('=', idx) | \
|
965
|
+
BIT(':', idx) | BIT('<', idx) | BIT('>', idx) | BIT('\"', idx) | \
|
966
|
+
BIT('&', idx) | BIT('`', idx) | BIT('\'', idx) | BIT('+', idx) | \
|
967
|
+
BIT('0', idx))
|
968
|
+
|
969
|
+
const unsigned int pm_global_name_punctuation_hash[(0x7e - 0x20 + 31) / 32] = { PUNCT(0), PUNCT(1), PUNCT(2) };
|
970
|
+
|
971
|
+
#undef BIT
|
972
|
+
#undef PUNCT
|
973
|
+
|
974
|
+
static inline bool
|
975
|
+
char_is_global_name_punctuation(const uint8_t b) {
|
976
|
+
const unsigned int i = (const unsigned int) b;
|
977
|
+
if (i <= 0x20 || 0x7e < i) return false;
|
978
|
+
|
979
|
+
return (pm_global_name_punctuation_hash[(i - 0x20) / 32] >> (i % 32)) & 1;
|
980
|
+
}
|
981
|
+
|
982
|
+
static inline bool
|
983
|
+
token_is_setter_name(pm_token_t *token) {
|
984
|
+
return (
|
985
|
+
(token->type == PM_TOKEN_IDENTIFIER) &&
|
986
|
+
(token->end - token->start >= 2) &&
|
987
|
+
(token->end[-1] == '=')
|
988
|
+
);
|
989
|
+
}
|
990
|
+
|
867
991
|
/******************************************************************************/
|
868
992
|
/* Node flag handling functions */
|
869
993
|
/******************************************************************************/
|
@@ -884,6 +1008,22 @@ pm_node_flag_unset(pm_node_t *node, pm_node_flags_t flag) {
|
|
884
1008
|
node->flags &= (pm_node_flags_t) ~flag;
|
885
1009
|
}
|
886
1010
|
|
1011
|
+
/**
|
1012
|
+
* Set the repeated parameter flag on the given node.
|
1013
|
+
*/
|
1014
|
+
static inline void
|
1015
|
+
pm_node_flag_set_repeated_parameter(pm_node_t *node) {
|
1016
|
+
assert(PM_NODE_TYPE(node) == PM_BLOCK_LOCAL_VARIABLE_NODE ||
|
1017
|
+
PM_NODE_TYPE(node) == PM_BLOCK_PARAMETER_NODE ||
|
1018
|
+
PM_NODE_TYPE(node) == PM_KEYWORD_REST_PARAMETER_NODE ||
|
1019
|
+
PM_NODE_TYPE(node) == PM_OPTIONAL_KEYWORD_PARAMETER_NODE ||
|
1020
|
+
PM_NODE_TYPE(node) == PM_OPTIONAL_PARAMETER_NODE ||
|
1021
|
+
PM_NODE_TYPE(node) == PM_REQUIRED_KEYWORD_PARAMETER_NODE ||
|
1022
|
+
PM_NODE_TYPE(node) == PM_REQUIRED_PARAMETER_NODE ||
|
1023
|
+
PM_NODE_TYPE(node) == PM_REST_PARAMETER_NODE);
|
1024
|
+
|
1025
|
+
pm_node_flag_set(node, PM_PARAMETER_FLAGS_REPEATED_PARAMETER);
|
1026
|
+
}
|
887
1027
|
|
888
1028
|
/******************************************************************************/
|
889
1029
|
/* Node creation functions */
|
@@ -977,7 +1117,7 @@ static inline void *
|
|
977
1117
|
pm_alloc_node(PRISM_ATTRIBUTE_UNUSED pm_parser_t *parser, size_t size) {
|
978
1118
|
void *memory = calloc(1, size);
|
979
1119
|
if (memory == NULL) {
|
980
|
-
fprintf(stderr, "Failed to allocate %
|
1120
|
+
fprintf(stderr, "Failed to allocate %d bytes\n", (int) size);
|
981
1121
|
abort();
|
982
1122
|
}
|
983
1123
|
return memory;
|
@@ -1325,7 +1465,7 @@ pm_assoc_node_create(pm_parser_t *parser, pm_node_t *key, const pm_token_t *oper
|
|
1325
1465
|
pm_assoc_node_t *node = PM_ALLOC_NODE(parser, pm_assoc_node_t);
|
1326
1466
|
const uint8_t *end;
|
1327
1467
|
|
1328
|
-
if (value != NULL) {
|
1468
|
+
if (value != NULL && value->location.end > key->location.end) {
|
1329
1469
|
end = value->location.end;
|
1330
1470
|
} else if (operator->type != PM_TOKEN_NOT_PROVIDED) {
|
1331
1471
|
end = operator->end;
|
@@ -1333,6 +1473,13 @@ pm_assoc_node_create(pm_parser_t *parser, pm_node_t *key, const pm_token_t *oper
|
|
1333
1473
|
end = key->location.end;
|
1334
1474
|
}
|
1335
1475
|
|
1476
|
+
// Hash string keys will be frozen, so we can mark them as frozen here so
|
1477
|
+
// that the compiler picks them up and also when we check for static literal
|
1478
|
+
// on the keys it gets factored in.
|
1479
|
+
if (PM_NODE_TYPE_P(key, PM_STRING_NODE)) {
|
1480
|
+
key->flags |= PM_STRING_FLAGS_FROZEN | PM_NODE_FLAG_STATIC_LITERAL;
|
1481
|
+
}
|
1482
|
+
|
1336
1483
|
// If the key and value of this assoc node are both static literals, then
|
1337
1484
|
// we can mark this node as a static literal.
|
1338
1485
|
pm_node_flags_t flags = 0;
|
@@ -1490,7 +1637,7 @@ pm_block_argument_node_create(pm_parser_t *parser, const pm_token_t *operator, p
|
|
1490
1637
|
* Allocate and initialize a new BlockNode node.
|
1491
1638
|
*/
|
1492
1639
|
static pm_block_node_t *
|
1493
|
-
pm_block_node_create(pm_parser_t *parser, pm_constant_id_list_t *locals,
|
1640
|
+
pm_block_node_create(pm_parser_t *parser, pm_constant_id_list_t *locals, const pm_token_t *opening, pm_node_t *parameters, pm_node_t *body, const pm_token_t *closing) {
|
1494
1641
|
pm_block_node_t *node = PM_ALLOC_NODE(parser, pm_block_node_t);
|
1495
1642
|
|
1496
1643
|
*node = (pm_block_node_t) {
|
@@ -1499,7 +1646,6 @@ pm_block_node_create(pm_parser_t *parser, pm_constant_id_list_t *locals, uint32_
|
|
1499
1646
|
.location = { .start = opening->start, .end = closing->end },
|
1500
1647
|
},
|
1501
1648
|
.locals = *locals,
|
1502
|
-
.locals_body_index = locals_body_index,
|
1503
1649
|
.parameters = parameters,
|
1504
1650
|
.body = body,
|
1505
1651
|
.opening_loc = PM_LOCATION_TOKEN_VALUE(opening),
|
@@ -1645,12 +1791,13 @@ pm_break_node_create(pm_parser_t *parser, const pm_token_t *keyword, pm_argument
|
|
1645
1791
|
* in the various specializations of this function.
|
1646
1792
|
*/
|
1647
1793
|
static pm_call_node_t *
|
1648
|
-
pm_call_node_create(pm_parser_t *parser) {
|
1794
|
+
pm_call_node_create(pm_parser_t *parser, pm_node_flags_t flags) {
|
1649
1795
|
pm_call_node_t *node = PM_ALLOC_NODE(parser, pm_call_node_t);
|
1650
1796
|
|
1651
1797
|
*node = (pm_call_node_t) {
|
1652
1798
|
{
|
1653
1799
|
.type = PM_CALL_NODE,
|
1800
|
+
.flags = flags,
|
1654
1801
|
.location = PM_LOCATION_NULL_VALUE(parser),
|
1655
1802
|
},
|
1656
1803
|
.receiver = NULL,
|
@@ -1666,6 +1813,15 @@ pm_call_node_create(pm_parser_t *parser) {
|
|
1666
1813
|
return node;
|
1667
1814
|
}
|
1668
1815
|
|
1816
|
+
/**
|
1817
|
+
* Returns the value that the ignore visibility flag should be set to for the
|
1818
|
+
* given receiver.
|
1819
|
+
*/
|
1820
|
+
static inline pm_node_flags_t
|
1821
|
+
pm_call_node_ignore_visibility_flag(const pm_node_t *receiver) {
|
1822
|
+
return PM_NODE_TYPE_P(receiver, PM_SELF_NODE) ? PM_CALL_NODE_FLAGS_IGNORE_VISIBILITY : 0;
|
1823
|
+
}
|
1824
|
+
|
1669
1825
|
/**
|
1670
1826
|
* Allocate and initialize a new CallNode node from an aref or an aset
|
1671
1827
|
* expression.
|
@@ -1674,7 +1830,7 @@ static pm_call_node_t *
|
|
1674
1830
|
pm_call_node_aref_create(pm_parser_t *parser, pm_node_t *receiver, pm_arguments_t *arguments) {
|
1675
1831
|
pm_assert_value_expression(parser, receiver);
|
1676
1832
|
|
1677
|
-
pm_call_node_t *node = pm_call_node_create(parser);
|
1833
|
+
pm_call_node_t *node = pm_call_node_create(parser, pm_call_node_ignore_visibility_flag(receiver));
|
1678
1834
|
|
1679
1835
|
node->base.location.start = receiver->location.start;
|
1680
1836
|
node->base.location.end = pm_arguments_end(arguments);
|
@@ -1700,7 +1856,7 @@ pm_call_node_binary_create(pm_parser_t *parser, pm_node_t *receiver, pm_token_t
|
|
1700
1856
|
pm_assert_value_expression(parser, receiver);
|
1701
1857
|
pm_assert_value_expression(parser, argument);
|
1702
1858
|
|
1703
|
-
pm_call_node_t *node = pm_call_node_create(parser);
|
1859
|
+
pm_call_node_t *node = pm_call_node_create(parser, pm_call_node_ignore_visibility_flag(receiver));
|
1704
1860
|
|
1705
1861
|
node->base.location.start = MIN(receiver->location.start, argument->location.start);
|
1706
1862
|
node->base.location.end = MAX(receiver->location.end, argument->location.end);
|
@@ -1723,7 +1879,7 @@ static pm_call_node_t *
|
|
1723
1879
|
pm_call_node_call_create(pm_parser_t *parser, pm_node_t *receiver, pm_token_t *operator, pm_token_t *message, pm_arguments_t *arguments) {
|
1724
1880
|
pm_assert_value_expression(parser, receiver);
|
1725
1881
|
|
1726
|
-
pm_call_node_t *node = pm_call_node_create(parser);
|
1882
|
+
pm_call_node_t *node = pm_call_node_create(parser, pm_call_node_ignore_visibility_flag(receiver));
|
1727
1883
|
|
1728
1884
|
node->base.location.start = receiver->location.start;
|
1729
1885
|
const uint8_t *end = pm_arguments_end(arguments);
|
@@ -1754,7 +1910,7 @@ pm_call_node_call_create(pm_parser_t *parser, pm_node_t *receiver, pm_token_t *o
|
|
1754
1910
|
*/
|
1755
1911
|
static pm_call_node_t *
|
1756
1912
|
pm_call_node_fcall_create(pm_parser_t *parser, pm_token_t *message, pm_arguments_t *arguments) {
|
1757
|
-
pm_call_node_t *node = pm_call_node_create(parser);
|
1913
|
+
pm_call_node_t *node = pm_call_node_create(parser, PM_CALL_NODE_FLAGS_IGNORE_VISIBILITY);
|
1758
1914
|
|
1759
1915
|
node->base.location.start = message->start;
|
1760
1916
|
node->base.location.end = pm_arguments_end(arguments);
|
@@ -1776,7 +1932,7 @@ static pm_call_node_t *
|
|
1776
1932
|
pm_call_node_not_create(pm_parser_t *parser, pm_node_t *receiver, pm_token_t *message, pm_arguments_t *arguments) {
|
1777
1933
|
pm_assert_value_expression(parser, receiver);
|
1778
1934
|
|
1779
|
-
pm_call_node_t *node = pm_call_node_create(parser);
|
1935
|
+
pm_call_node_t *node = pm_call_node_create(parser, receiver == NULL ? 0 : pm_call_node_ignore_visibility_flag(receiver));
|
1780
1936
|
|
1781
1937
|
node->base.location.start = message->start;
|
1782
1938
|
if (arguments->closing_loc.start != NULL) {
|
@@ -1802,7 +1958,7 @@ static pm_call_node_t *
|
|
1802
1958
|
pm_call_node_shorthand_create(pm_parser_t *parser, pm_node_t *receiver, pm_token_t *operator, pm_arguments_t *arguments) {
|
1803
1959
|
pm_assert_value_expression(parser, receiver);
|
1804
1960
|
|
1805
|
-
pm_call_node_t *node = pm_call_node_create(parser);
|
1961
|
+
pm_call_node_t *node = pm_call_node_create(parser, pm_call_node_ignore_visibility_flag(receiver));
|
1806
1962
|
|
1807
1963
|
node->base.location.start = receiver->location.start;
|
1808
1964
|
node->base.location.end = pm_arguments_end(arguments);
|
@@ -1829,7 +1985,7 @@ static pm_call_node_t *
|
|
1829
1985
|
pm_call_node_unary_create(pm_parser_t *parser, pm_token_t *operator, pm_node_t *receiver, const char *name) {
|
1830
1986
|
pm_assert_value_expression(parser, receiver);
|
1831
1987
|
|
1832
|
-
pm_call_node_t *node = pm_call_node_create(parser);
|
1988
|
+
pm_call_node_t *node = pm_call_node_create(parser, pm_call_node_ignore_visibility_flag(receiver));
|
1833
1989
|
|
1834
1990
|
node->base.location.start = operator->start;
|
1835
1991
|
node->base.location.end = receiver->location.end;
|
@@ -1847,7 +2003,7 @@ pm_call_node_unary_create(pm_parser_t *parser, pm_token_t *operator, pm_node_t *
|
|
1847
2003
|
*/
|
1848
2004
|
static pm_call_node_t *
|
1849
2005
|
pm_call_node_variable_call_create(pm_parser_t *parser, pm_token_t *message) {
|
1850
|
-
pm_call_node_t *node = pm_call_node_create(parser);
|
2006
|
+
pm_call_node_t *node = pm_call_node_create(parser, PM_CALL_NODE_FLAGS_IGNORE_VISIBILITY);
|
1851
2007
|
|
1852
2008
|
node->base.location = PM_LOCATION_TOKEN_VALUE(message);
|
1853
2009
|
node->message_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(message);
|
@@ -1885,11 +2041,12 @@ pm_call_node_index_p(pm_call_node_t *node) {
|
|
1885
2041
|
* operator assignment.
|
1886
2042
|
*/
|
1887
2043
|
static inline bool
|
1888
|
-
pm_call_node_writable_p(pm_call_node_t *node) {
|
2044
|
+
pm_call_node_writable_p(const pm_parser_t *parser, const pm_call_node_t *node) {
|
1889
2045
|
return (
|
1890
2046
|
(node->message_loc.start != NULL) &&
|
1891
2047
|
(node->message_loc.end[-1] != '!') &&
|
1892
2048
|
(node->message_loc.end[-1] != '?') &&
|
2049
|
+
char_is_identifier_start(parser, node->message_loc.start) &&
|
1893
2050
|
(node->opening_loc.start == NULL) &&
|
1894
2051
|
(node->arguments == NULL) &&
|
1895
2052
|
(node->block == NULL)
|
@@ -2167,11 +2324,12 @@ pm_call_target_node_create(pm_parser_t *parser, pm_call_node_t *target) {
|
|
2167
2324
|
static pm_index_target_node_t *
|
2168
2325
|
pm_index_target_node_create(pm_parser_t *parser, pm_call_node_t *target) {
|
2169
2326
|
pm_index_target_node_t *node = PM_ALLOC_NODE(parser, pm_index_target_node_t);
|
2327
|
+
pm_node_flags_t flags = target->base.flags;
|
2170
2328
|
|
2171
2329
|
*node = (pm_index_target_node_t) {
|
2172
2330
|
{
|
2173
2331
|
.type = PM_INDEX_TARGET_NODE,
|
2174
|
-
.flags =
|
2332
|
+
.flags = flags | PM_CALL_NODE_FLAGS_ATTRIBUTE_WRITE,
|
2175
2333
|
.location = target->base.location
|
2176
2334
|
},
|
2177
2335
|
.receiver = target->receiver,
|
@@ -2701,18 +2859,62 @@ pm_constant_write_node_create(pm_parser_t *parser, pm_constant_read_node_t *targ
|
|
2701
2859
|
return node;
|
2702
2860
|
}
|
2703
2861
|
|
2862
|
+
/**
|
2863
|
+
* Check if the receiver of a `def` node is allowed.
|
2864
|
+
*/
|
2865
|
+
static void
|
2866
|
+
pm_def_node_receiver_check(pm_parser_t *parser, const pm_node_t *node) {
|
2867
|
+
switch (PM_NODE_TYPE(node)) {
|
2868
|
+
case PM_BEGIN_NODE: {
|
2869
|
+
const pm_begin_node_t *cast = (pm_begin_node_t *) node;
|
2870
|
+
if (cast->statements != NULL) pm_def_node_receiver_check(parser, (pm_node_t *) cast->statements);
|
2871
|
+
break;
|
2872
|
+
}
|
2873
|
+
case PM_PARENTHESES_NODE: {
|
2874
|
+
const pm_parentheses_node_t *cast = (const pm_parentheses_node_t *) node;
|
2875
|
+
if (cast->body != NULL) pm_def_node_receiver_check(parser, cast->body);
|
2876
|
+
break;
|
2877
|
+
}
|
2878
|
+
case PM_STATEMENTS_NODE: {
|
2879
|
+
const pm_statements_node_t *cast = (const pm_statements_node_t *) node;
|
2880
|
+
pm_def_node_receiver_check(parser, cast->body.nodes[cast->body.size - 1]);
|
2881
|
+
break;
|
2882
|
+
}
|
2883
|
+
case PM_ARRAY_NODE:
|
2884
|
+
case PM_FLOAT_NODE:
|
2885
|
+
case PM_IMAGINARY_NODE:
|
2886
|
+
case PM_INTEGER_NODE:
|
2887
|
+
case PM_INTERPOLATED_REGULAR_EXPRESSION_NODE:
|
2888
|
+
case PM_INTERPOLATED_STRING_NODE:
|
2889
|
+
case PM_INTERPOLATED_SYMBOL_NODE:
|
2890
|
+
case PM_INTERPOLATED_X_STRING_NODE:
|
2891
|
+
case PM_RATIONAL_NODE:
|
2892
|
+
case PM_REGULAR_EXPRESSION_NODE:
|
2893
|
+
case PM_SOURCE_ENCODING_NODE:
|
2894
|
+
case PM_SOURCE_FILE_NODE:
|
2895
|
+
case PM_SOURCE_LINE_NODE:
|
2896
|
+
case PM_STRING_NODE:
|
2897
|
+
case PM_SYMBOL_NODE:
|
2898
|
+
case PM_X_STRING_NODE:
|
2899
|
+
pm_parser_err_node(parser, node, PM_ERR_SINGLETON_FOR_LITERALS);
|
2900
|
+
break;
|
2901
|
+
default:
|
2902
|
+
break;
|
2903
|
+
}
|
2904
|
+
}
|
2905
|
+
|
2704
2906
|
/**
|
2705
2907
|
* Allocate and initialize a new DefNode node.
|
2706
2908
|
*/
|
2707
2909
|
static pm_def_node_t *
|
2708
2910
|
pm_def_node_create(
|
2709
2911
|
pm_parser_t *parser,
|
2710
|
-
|
2912
|
+
pm_constant_id_t name,
|
2913
|
+
const pm_token_t *name_loc,
|
2711
2914
|
pm_node_t *receiver,
|
2712
2915
|
pm_parameters_node_t *parameters,
|
2713
2916
|
pm_node_t *body,
|
2714
2917
|
pm_constant_id_list_t *locals,
|
2715
|
-
uint32_t locals_body_index,
|
2716
2918
|
const pm_token_t *def_keyword,
|
2717
2919
|
const pm_token_t *operator,
|
2718
2920
|
const pm_token_t *lparen,
|
@@ -2729,18 +2931,21 @@ pm_def_node_create(
|
|
2729
2931
|
end = end_keyword->end;
|
2730
2932
|
}
|
2731
2933
|
|
2934
|
+
if ((receiver != NULL) && PM_NODE_TYPE_P(receiver, PM_PARENTHESES_NODE)) {
|
2935
|
+
pm_def_node_receiver_check(parser, receiver);
|
2936
|
+
}
|
2937
|
+
|
2732
2938
|
*node = (pm_def_node_t) {
|
2733
2939
|
{
|
2734
2940
|
.type = PM_DEF_NODE,
|
2735
2941
|
.location = { .start = def_keyword->start, .end = end },
|
2736
2942
|
},
|
2737
|
-
.name =
|
2738
|
-
.name_loc = PM_LOCATION_TOKEN_VALUE(
|
2943
|
+
.name = name,
|
2944
|
+
.name_loc = PM_LOCATION_TOKEN_VALUE(name_loc),
|
2739
2945
|
.receiver = receiver,
|
2740
2946
|
.parameters = parameters,
|
2741
2947
|
.body = body,
|
2742
2948
|
.locals = *locals,
|
2743
|
-
.locals_body_index = locals_body_index,
|
2744
2949
|
.def_keyword_loc = PM_LOCATION_TOKEN_VALUE(def_keyword),
|
2745
2950
|
.operator_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(operator),
|
2746
2951
|
.lparen_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(lparen),
|
@@ -3962,9 +4167,8 @@ pm_keyword_hash_node_create(pm_parser_t *parser) {
|
|
3962
4167
|
*/
|
3963
4168
|
static void
|
3964
4169
|
pm_keyword_hash_node_elements_append(pm_keyword_hash_node_t *hash, pm_node_t *element) {
|
3965
|
-
// If the element being added is not an AssocNode or does not have a symbol
|
3966
|
-
// we want to turn the
|
3967
|
-
// TODO: Rename the flag to SYMBOL_KEYS instead.
|
4170
|
+
// If the element being added is not an AssocNode or does not have a symbol
|
4171
|
+
// key, then we want to turn the SYMBOL_KEYS flag off.
|
3968
4172
|
if (!PM_NODE_TYPE_P(element, PM_ASSOC_NODE) || !PM_NODE_TYPE_P(((pm_assoc_node_t *) element)->key, PM_SYMBOL_NODE)) {
|
3969
4173
|
pm_node_flag_unset((pm_node_t *)hash, PM_KEYWORD_HASH_NODE_FLAGS_SYMBOL_KEYS);
|
3970
4174
|
}
|
@@ -4051,7 +4255,6 @@ static pm_lambda_node_t *
|
|
4051
4255
|
pm_lambda_node_create(
|
4052
4256
|
pm_parser_t *parser,
|
4053
4257
|
pm_constant_id_list_t *locals,
|
4054
|
-
uint32_t locals_body_index,
|
4055
4258
|
const pm_token_t *operator,
|
4056
4259
|
const pm_token_t *opening,
|
4057
4260
|
const pm_token_t *closing,
|
@@ -4069,7 +4272,6 @@ pm_lambda_node_create(
|
|
4069
4272
|
},
|
4070
4273
|
},
|
4071
4274
|
.locals = *locals,
|
4072
|
-
.locals_body_index = locals_body_index,
|
4073
4275
|
.operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
|
4074
4276
|
.opening_loc = PM_LOCATION_TOKEN_VALUE(opening),
|
4075
4277
|
.closing_loc = PM_LOCATION_TOKEN_VALUE(closing),
|
@@ -4161,12 +4363,10 @@ pm_local_variable_or_write_node_create(pm_parser_t *parser, pm_node_t *target, c
|
|
4161
4363
|
}
|
4162
4364
|
|
4163
4365
|
/**
|
4164
|
-
* Allocate a new LocalVariableReadNode node.
|
4366
|
+
* Allocate a new LocalVariableReadNode node with constant_id.
|
4165
4367
|
*/
|
4166
4368
|
static pm_local_variable_read_node_t *
|
4167
|
-
|
4168
|
-
pm_constant_id_t name_id = pm_parser_constant_id_token(parser, name);
|
4169
|
-
|
4369
|
+
pm_local_variable_read_node_create_constant_id(pm_parser_t *parser, const pm_token_t *name, pm_constant_id_t name_id, uint32_t depth) {
|
4170
4370
|
if (parser->current_param_name == name_id) {
|
4171
4371
|
pm_parser_err_token(parser, name, PM_ERR_PARAMETER_CIRCULAR);
|
4172
4372
|
}
|
@@ -4185,6 +4385,15 @@ pm_local_variable_read_node_create(pm_parser_t *parser, const pm_token_t *name,
|
|
4185
4385
|
return node;
|
4186
4386
|
}
|
4187
4387
|
|
4388
|
+
/**
|
4389
|
+
* Allocate a new LocalVariableReadNode node.
|
4390
|
+
*/
|
4391
|
+
static pm_local_variable_read_node_t *
|
4392
|
+
pm_local_variable_read_node_create(pm_parser_t *parser, const pm_token_t *name, uint32_t depth) {
|
4393
|
+
pm_constant_id_t name_id = pm_parser_constant_id_token(parser, name);
|
4394
|
+
return pm_local_variable_read_node_create_constant_id(parser, name, name_id, depth);
|
4395
|
+
}
|
4396
|
+
|
4188
4397
|
/**
|
4189
4398
|
* Allocate and initialize a new LocalVariableWriteNode node.
|
4190
4399
|
*/
|
@@ -4210,6 +4419,57 @@ pm_local_variable_write_node_create(pm_parser_t *parser, pm_constant_id_t name,
|
|
4210
4419
|
return node;
|
4211
4420
|
}
|
4212
4421
|
|
4422
|
+
/**
|
4423
|
+
* Returns true if the given bounds comprise `it`.
|
4424
|
+
*/
|
4425
|
+
static inline bool
|
4426
|
+
pm_token_is_it(const uint8_t *start, const uint8_t *end) {
|
4427
|
+
return (end - start == 2) && (start[0] == 'i') && (start[1] == 't');
|
4428
|
+
}
|
4429
|
+
|
4430
|
+
/**
|
4431
|
+
* Returns true if the given node is `it` default parameter.
|
4432
|
+
*/
|
4433
|
+
static inline bool
|
4434
|
+
pm_node_is_it(pm_parser_t *parser, pm_node_t *node) {
|
4435
|
+
// Check if it's a local variable reference
|
4436
|
+
if (node->type != PM_CALL_NODE) {
|
4437
|
+
return false;
|
4438
|
+
}
|
4439
|
+
|
4440
|
+
// Check if it's a variable call
|
4441
|
+
pm_call_node_t *call_node = (pm_call_node_t *) node;
|
4442
|
+
if (!pm_call_node_variable_call_p(call_node)) {
|
4443
|
+
return false;
|
4444
|
+
}
|
4445
|
+
|
4446
|
+
// Check if it's called `it`
|
4447
|
+
pm_constant_id_t id = ((pm_call_node_t *)node)->name;
|
4448
|
+
pm_constant_t *constant = pm_constant_pool_id_to_constant(&parser->constant_pool, id);
|
4449
|
+
return pm_token_is_it(constant->start, constant->start + constant->length);
|
4450
|
+
}
|
4451
|
+
|
4452
|
+
/**
|
4453
|
+
* Convert a `it` variable call node to a node for `it` default parameter.
|
4454
|
+
*/
|
4455
|
+
static pm_node_t *
|
4456
|
+
pm_node_check_it(pm_parser_t *parser, pm_node_t *node) {
|
4457
|
+
if (
|
4458
|
+
(parser->version != PM_OPTIONS_VERSION_CRUBY_3_3_0) &&
|
4459
|
+
!parser->current_scope->closed &&
|
4460
|
+
pm_node_is_it(parser, node)
|
4461
|
+
) {
|
4462
|
+
if (parser->current_scope->explicit_params) {
|
4463
|
+
pm_parser_err_previous(parser, PM_ERR_IT_NOT_ALLOWED);
|
4464
|
+
} else {
|
4465
|
+
pm_node_destroy(parser, node);
|
4466
|
+
pm_constant_id_t name_id = pm_parser_constant_id_constant(parser, "0it", 3);
|
4467
|
+
node = (pm_node_t *) pm_local_variable_read_node_create_constant_id(parser, &parser->previous, name_id, 0);
|
4468
|
+
}
|
4469
|
+
}
|
4470
|
+
return node;
|
4471
|
+
}
|
4472
|
+
|
4213
4473
|
/**
|
4214
4474
|
* Returns true if the given bounds comprise a numbered parameter (i.e., they
|
4215
4475
|
* are of the form /^_\d$/).
|
@@ -4402,13 +4662,20 @@ pm_multi_target_node_create(pm_parser_t *parser) {
|
|
4402
4662
|
*/
|
4403
4663
|
static void
|
4404
4664
|
pm_multi_target_node_targets_append(pm_parser_t *parser, pm_multi_target_node_t *node, pm_node_t *target) {
|
4405
|
-
if (PM_NODE_TYPE_P(target, PM_SPLAT_NODE)
|
4665
|
+
if (PM_NODE_TYPE_P(target, PM_SPLAT_NODE)) {
|
4406
4666
|
if (node->rest == NULL) {
|
4407
4667
|
node->rest = target;
|
4408
4668
|
} else {
|
4409
4669
|
pm_parser_err_node(parser, target, PM_ERR_MULTI_ASSIGN_MULTI_SPLATS);
|
4410
4670
|
pm_node_list_append(&node->rights, target);
|
4411
4671
|
}
|
4672
|
+
} else if (PM_NODE_TYPE_P(target, PM_IMPLICIT_REST_NODE)) {
|
4673
|
+
if (node->rest == NULL) {
|
4674
|
+
node->rest = target;
|
4675
|
+
} else {
|
4676
|
+
PM_PARSER_ERR_TOKEN_FORMAT_CONTENT(parser, parser->current, PM_ERR_MULTI_ASSIGN_UNEXPECTED_REST);
|
4677
|
+
pm_node_list_append(&node->rights, target);
|
4678
|
+
}
|
4412
4679
|
} else if (node->rest == NULL) {
|
4413
4680
|
pm_node_list_append(&node->lefts, target);
|
4414
4681
|
} else {
|
@@ -5195,7 +5462,7 @@ pm_source_file_node_create(pm_parser_t *parser, const pm_token_t *file_keyword)
|
|
5195
5462
|
.flags = PM_NODE_FLAG_STATIC_LITERAL,
|
5196
5463
|
.location = PM_LOCATION_TOKEN_VALUE(file_keyword),
|
5197
5464
|
},
|
5198
|
-
.filepath = parser->
|
5465
|
+
.filepath = parser->filepath
|
5199
5466
|
};
|
5200
5467
|
|
5201
5468
|
return node;
|
@@ -5372,18 +5639,59 @@ pm_super_node_create(pm_parser_t *parser, const pm_token_t *keyword, pm_argument
|
|
5372
5639
|
return node;
|
5373
5640
|
}
|
5374
5641
|
|
5642
|
+
/**
|
5643
|
+
* Read through the contents of a string and check if it consists solely of US ASCII code points.
|
5644
|
+
*/
|
5645
|
+
static bool
|
5646
|
+
pm_ascii_only_p(const pm_string_t *contents) {
|
5647
|
+
const size_t length = pm_string_length(contents);
|
5648
|
+
const uint8_t *source = pm_string_source(contents);
|
5649
|
+
|
5650
|
+
for (size_t index = 0; index < length; index++) {
|
5651
|
+
if (source[index] & 0x80) return false;
|
5652
|
+
}
|
5653
|
+
|
5654
|
+
return true;
|
5655
|
+
}
|
5656
|
+
|
5657
|
+
/**
|
5658
|
+
* Ruby "downgrades" the encoding of Symbols to US-ASCII if the associated
|
5659
|
+
* encoding is ASCII-compatible and the Symbol consists only of US-ASCII code
|
5660
|
+
* points. Otherwise, the encoding may be explicitly set with an escape
|
5661
|
+
* sequence.
|
5662
|
+
*/
|
5663
|
+
static inline pm_node_flags_t
|
5664
|
+
parse_symbol_encoding(const pm_parser_t *parser, const pm_string_t *contents) {
|
5665
|
+
if (parser->explicit_encoding != NULL) {
|
5666
|
+
// A Symbol may optionally have its encoding explicitly set. This will
|
5667
|
+
// happen if an escape sequence results in a non-ASCII code point.
|
5668
|
+
if (parser->explicit_encoding == PM_ENCODING_UTF_8_ENTRY) {
|
5669
|
+
return PM_SYMBOL_FLAGS_FORCED_UTF8_ENCODING;
|
5670
|
+
} else if (parser->encoding == PM_ENCODING_US_ASCII_ENTRY) {
|
5671
|
+
return PM_SYMBOL_FLAGS_FORCED_BINARY_ENCODING;
|
5672
|
+
}
|
5673
|
+
} else if (pm_ascii_only_p(contents)) {
|
5674
|
+
// Ruby stipulates that all source files must use an ASCII-compatible
|
5675
|
+
// encoding. Thus, all symbols appearing in source are eligible for
|
5676
|
+
// "downgrading" to US-ASCII.
|
5677
|
+
return PM_SYMBOL_FLAGS_FORCED_US_ASCII_ENCODING;
|
5678
|
+
}
|
5679
|
+
|
5680
|
+
return 0;
|
5681
|
+
}
|
5682
|
+
|
5375
5683
|
/**
|
5376
5684
|
* Allocate and initialize a new SymbolNode node with the given unescaped
|
5377
5685
|
* string.
|
5378
5686
|
*/
|
5379
5687
|
static pm_symbol_node_t *
|
5380
|
-
pm_symbol_node_create_unescaped(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *value, const pm_token_t *closing, const pm_string_t *unescaped) {
|
5688
|
+
pm_symbol_node_create_unescaped(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *value, const pm_token_t *closing, const pm_string_t *unescaped, pm_node_flags_t flags) {
|
5381
5689
|
pm_symbol_node_t *node = PM_ALLOC_NODE(parser, pm_symbol_node_t);
|
5382
5690
|
|
5383
5691
|
*node = (pm_symbol_node_t) {
|
5384
5692
|
{
|
5385
5693
|
.type = PM_SYMBOL_NODE,
|
5386
|
-
.flags = PM_NODE_FLAG_STATIC_LITERAL,
|
5694
|
+
.flags = PM_NODE_FLAG_STATIC_LITERAL | flags,
|
5387
5695
|
.location = {
|
5388
5696
|
.start = (opening->type == PM_TOKEN_NOT_PROVIDED ? value->start : opening->start),
|
5389
5697
|
.end = (closing->type == PM_TOKEN_NOT_PROVIDED ? value->end : closing->end)
|
@@ -5403,7 +5711,7 @@ pm_symbol_node_create_unescaped(pm_parser_t *parser, const pm_token_t *opening,
|
|
5403
5711
|
*/
|
5404
5712
|
static inline pm_symbol_node_t *
|
5405
5713
|
pm_symbol_node_create(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *value, const pm_token_t *closing) {
|
5406
|
-
return pm_symbol_node_create_unescaped(parser, opening, value, closing, &PM_STRING_EMPTY);
|
5714
|
+
return pm_symbol_node_create_unescaped(parser, opening, value, closing, &PM_STRING_EMPTY, 0);
|
5407
5715
|
}
|
5408
5716
|
|
5409
5717
|
/**
|
@@ -5411,7 +5719,7 @@ pm_symbol_node_create(pm_parser_t *parser, const pm_token_t *opening, const pm_t
|
|
5411
5719
|
*/
|
5412
5720
|
static pm_symbol_node_t *
|
5413
5721
|
pm_symbol_node_create_current_string(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *value, const pm_token_t *closing) {
|
5414
|
-
pm_symbol_node_t *node = pm_symbol_node_create_unescaped(parser, opening, value, closing, &parser->current_string);
|
5722
|
+
pm_symbol_node_t *node = pm_symbol_node_create_unescaped(parser, opening, value, closing, &parser->current_string, parse_symbol_encoding(parser, &parser->current_string));
|
5415
5723
|
parser->current_string = PM_STRING_EMPTY;
|
5416
5724
|
return node;
|
5417
5725
|
}
|
@@ -5433,6 +5741,8 @@ pm_symbol_node_label_create(pm_parser_t *parser, const pm_token_t *token) {
|
|
5433
5741
|
|
5434
5742
|
assert((label.end - label.start) >= 0);
|
5435
5743
|
pm_string_shared_init(&node->unescaped, label.start, label.end);
|
5744
|
+
pm_node_flag_set((pm_node_t *) node, parse_symbol_encoding(parser, &node->unescaped));
|
5745
|
+
|
5436
5746
|
break;
|
5437
5747
|
}
|
5438
5748
|
case PM_TOKEN_MISSING: {
|
@@ -5495,6 +5805,8 @@ pm_string_node_to_symbol_node(pm_parser_t *parser, pm_string_node_t *node, const
|
|
5495
5805
|
.unescaped = node->unescaped
|
5496
5806
|
};
|
5497
5807
|
|
5808
|
+
pm_node_flag_set((pm_node_t *)new_node, parse_symbol_encoding(parser, &node->unescaped));
|
5809
|
+
|
5498
5810
|
// We are explicitly _not_ using pm_node_destroy here because we don't want
|
5499
5811
|
// to trash the unescaped string. We could instead copy the string if we
|
5500
5812
|
// know that it is owned, but we're taking the fast path for now.
|
@@ -5885,6 +6197,7 @@ pm_parser_scope_push(pm_parser_t *parser, bool closed) {
|
|
5885
6197
|
.closed = closed,
|
5886
6198
|
.explicit_params = false,
|
5887
6199
|
.numbered_parameters = 0,
|
6200
|
+
.forwarding_params = 0,
|
5888
6201
|
};
|
5889
6202
|
|
5890
6203
|
pm_constant_id_list_init(&scope->locals);
|
@@ -5893,6 +6206,76 @@ pm_parser_scope_push(pm_parser_t *parser, bool closed) {
|
|
5893
6206
|
return true;
|
5894
6207
|
}
|
5895
6208
|
|
6209
|
+
static void
|
6210
|
+
pm_parser_scope_forwarding_param_check(pm_parser_t *parser, const pm_token_t * token, const uint8_t mask, pm_diagnostic_id_t diag)
|
6211
|
+
{
|
6212
|
+
pm_scope_t *scope = parser->current_scope;
|
6213
|
+
while (scope) {
|
6214
|
+
if (scope->forwarding_params & mask) {
|
6215
|
+
if (!scope->closed) {
|
6216
|
+
pm_parser_err_token(parser, token, diag);
|
6217
|
+
return;
|
6218
|
+
}
|
6219
|
+
return;
|
6220
|
+
}
|
6221
|
+
if (scope->closed) break;
|
6222
|
+
scope = scope->previous;
|
6223
|
+
}
|
6224
|
+
|
6225
|
+
pm_parser_err_token(parser, token, diag);
|
6226
|
+
}
|
6227
|
+
|
6228
|
+
static inline void
|
6229
|
+
pm_parser_scope_forwarding_block_check(pm_parser_t *parser, const pm_token_t * token)
|
6230
|
+
{
|
6231
|
+
pm_parser_scope_forwarding_param_check(parser, token, PM_FORWARDING_BLOCK, PM_ERR_ARGUMENT_NO_FORWARDING_AMP);
|
6232
|
+
}
|
6233
|
+
|
6234
|
+
static void
|
6235
|
+
pm_parser_scope_forwarding_positionals_check(pm_parser_t *parser, const pm_token_t * token)
|
6236
|
+
{
|
6237
|
+
pm_parser_scope_forwarding_param_check(parser, token, PM_FORWARDING_POSITIONALS, PM_ERR_ARGUMENT_NO_FORWARDING_STAR);
|
6238
|
+
}
|
6239
|
+
|
6240
|
+
static inline void
|
6241
|
+
pm_parser_scope_forwarding_all_check(pm_parser_t *parser, const pm_token_t * token)
|
6242
|
+
{
|
6243
|
+
pm_parser_scope_forwarding_param_check(parser, token, PM_FORWARDING_ALL, PM_ERR_ARGUMENT_NO_FORWARDING_ELLIPSES);
|
6244
|
+
}
|
6245
|
+
|
6246
|
+
static inline void
|
6247
|
+
pm_parser_scope_forwarding_keywords_check(pm_parser_t *parser, const pm_token_t * token)
|
6248
|
+
{
|
6249
|
+
pm_parser_scope_forwarding_param_check(parser, token, PM_FORWARDING_KEYWORDS, PM_ERR_EXPECT_EXPRESSION_AFTER_SPLAT_HASH);
|
6250
|
+
}
|
6251
|
+
|
6252
|
+
/**
|
6253
|
+
* Save the current param name as the return value and set it to the given
|
6254
|
+
* constant id.
|
6255
|
+
*/
|
6256
|
+
static inline pm_constant_id_t
|
6257
|
+
pm_parser_current_param_name_set(pm_parser_t *parser, pm_constant_id_t current_param_name) {
|
6258
|
+
pm_constant_id_t saved_param_name = parser->current_param_name;
|
6259
|
+
parser->current_param_name = current_param_name;
|
6260
|
+
return saved_param_name;
|
6261
|
+
}
|
6262
|
+
|
6263
|
+
/**
|
6264
|
+
* Save the current param name as the return value and clear it.
|
6265
|
+
*/
|
6266
|
+
static inline pm_constant_id_t
|
6267
|
+
pm_parser_current_param_name_unset(pm_parser_t *parser) {
|
6268
|
+
return pm_parser_current_param_name_set(parser, PM_CONSTANT_ID_UNSET);
|
6269
|
+
}
|
6270
|
+
|
6271
|
+
/**
|
6272
|
+
* Restore the current param name from the given value.
|
6273
|
+
*/
|
6274
|
+
static inline void
|
6275
|
+
pm_parser_current_param_name_restore(pm_parser_t *parser, pm_constant_id_t saved_param_name) {
|
6276
|
+
parser->current_param_name = saved_param_name;
|
6277
|
+
}
|
6278
|
+
|
5896
6279
|
/**
|
5897
6280
|
* Check if any of the currently visible scopes contain a local variable
|
5898
6281
|
* described by the given constant id.
|
@@ -5969,26 +6352,41 @@ pm_parser_local_add_owned(pm_parser_t *parser, const uint8_t *start, size_t leng
|
|
5969
6352
|
return constant_id;
|
5970
6353
|
}
|
5971
6354
|
|
6355
|
+
/**
|
6356
|
+
* Add a local variable from a constant string to the current scope.
|
6357
|
+
*/
|
6358
|
+
static pm_constant_id_t
|
6359
|
+
pm_parser_local_add_constant(pm_parser_t *parser, const char *start, size_t length) {
|
6360
|
+
pm_constant_id_t constant_id = pm_parser_constant_id_constant(parser, start, length);
|
6361
|
+
if (constant_id != 0) pm_parser_local_add(parser, constant_id);
|
6362
|
+
return constant_id;
|
6363
|
+
}
|
6364
|
+
|
5972
6365
|
/**
|
5973
6366
|
* Add a parameter name to the current scope and check whether the name of the
|
5974
6367
|
* parameter is unique or not.
|
6368
|
+
*
|
6369
|
+
* Returns `true` if this is a duplicate parameter name, otherwise returns
|
6370
|
+
* false.
|
5975
6371
|
*/
|
5976
|
-
static
|
6372
|
+
static bool
|
5977
6373
|
pm_parser_parameter_name_check(pm_parser_t *parser, const pm_token_t *name) {
|
5978
6374
|
// We want to check whether the parameter name is a numbered parameter or
|
5979
6375
|
// not.
|
5980
6376
|
pm_refute_numbered_parameter(parser, name->start, name->end);
|
5981
6377
|
|
5982
|
-
// We want to ignore any parameter name that starts with an underscore.
|
5983
|
-
if ((name->start < name->end) && (*name->start == '_')) return;
|
5984
|
-
|
5985
6378
|
// Otherwise we'll fetch the constant id for the parameter name and check
|
5986
6379
|
// whether it's already in the current scope.
|
5987
6380
|
pm_constant_id_t constant_id = pm_parser_constant_id_token(parser, name);
|
5988
6381
|
|
5989
6382
|
if (pm_constant_id_list_includes(&parser->current_scope->locals, constant_id)) {
|
5990
|
-
|
6383
|
+
// Add an error if the parameter doesn't start with _ and has been seen before
|
6384
|
+
if ((name->start < name->end) && (*name->start != '_')) {
|
6385
|
+
pm_parser_err_token(parser, name, PM_ERR_PARAMETER_NAME_REPEAT);
|
6386
|
+
}
|
6387
|
+
return true;
|
5991
6388
|
}
|
6389
|
+
return false;
|
5992
6390
|
}
|
5993
6391
|
|
5994
6392
|
/**
|
@@ -6003,105 +6401,6 @@ pm_parser_scope_pop(pm_parser_t *parser) {
|
|
6003
6401
|
free(scope);
|
6004
6402
|
}
|
6005
6403
|
|
6006
|
-
/******************************************************************************/
|
6007
|
-
/* Basic character checks */
|
6008
|
-
/******************************************************************************/
|
6009
|
-
|
6010
|
-
/**
|
6011
|
-
* This function is used extremely frequently to lex all of the identifiers in a
|
6012
|
-
* source file, so it's important that it be as fast as possible. For this
|
6013
|
-
* reason we have the encoding_changed boolean to check if we need to go through
|
6014
|
-
* the function pointer or can just directly use the UTF-8 functions.
|
6015
|
-
*/
|
6016
|
-
static inline size_t
|
6017
|
-
char_is_identifier_start(pm_parser_t *parser, const uint8_t *b) {
|
6018
|
-
if (parser->encoding_changed) {
|
6019
|
-
size_t width;
|
6020
|
-
if ((width = parser->encoding->alpha_char(b, parser->end - b)) != 0) {
|
6021
|
-
return width;
|
6022
|
-
} else if (*b == '_') {
|
6023
|
-
return 1;
|
6024
|
-
} else if (*b >= 0x80) {
|
6025
|
-
return parser->encoding->char_width(b, parser->end - b);
|
6026
|
-
} else {
|
6027
|
-
return 0;
|
6028
|
-
}
|
6029
|
-
} else if (*b < 0x80) {
|
6030
|
-
return (pm_encoding_unicode_table[*b] & PRISM_ENCODING_ALPHABETIC_BIT ? 1 : 0) || (*b == '_');
|
6031
|
-
} else {
|
6032
|
-
return (size_t) (pm_encoding_utf_8_alpha_char(b, parser->end - b) || 1u);
|
6033
|
-
}
|
6034
|
-
}
|
6035
|
-
|
6036
|
-
/**
|
6037
|
-
* Similar to char_is_identifier but this function assumes that the encoding
|
6038
|
-
* has not been changed.
|
6039
|
-
*/
|
6040
|
-
static inline size_t
|
6041
|
-
char_is_identifier_utf8(const uint8_t *b, const uint8_t *end) {
|
6042
|
-
if (*b < 0x80) {
|
6043
|
-
return (*b == '_') || (pm_encoding_unicode_table[*b] & PRISM_ENCODING_ALPHANUMERIC_BIT ? 1 : 0);
|
6044
|
-
} else {
|
6045
|
-
return (size_t) (pm_encoding_utf_8_alnum_char(b, end - b) || 1u);
|
6046
|
-
}
|
6047
|
-
}
|
6048
|
-
|
6049
|
-
/**
|
6050
|
-
* Like the above, this function is also used extremely frequently to lex all of
|
6051
|
-
* the identifiers in a source file once the first character has been found. So
|
6052
|
-
* it's important that it be as fast as possible.
|
6053
|
-
*/
|
6054
|
-
static inline size_t
|
6055
|
-
char_is_identifier(pm_parser_t *parser, const uint8_t *b) {
|
6056
|
-
if (parser->encoding_changed) {
|
6057
|
-
size_t width;
|
6058
|
-
if ((width = parser->encoding->alnum_char(b, parser->end - b)) != 0) {
|
6059
|
-
return width;
|
6060
|
-
} else if (*b == '_') {
|
6061
|
-
return 1;
|
6062
|
-
} else if (*b >= 0x80) {
|
6063
|
-
return parser->encoding->char_width(b, parser->end - b);
|
6064
|
-
} else {
|
6065
|
-
return 0;
|
6066
|
-
}
|
6067
|
-
}
|
6068
|
-
return char_is_identifier_utf8(b, parser->end);
|
6069
|
-
}
|
6070
|
-
|
6071
|
-
// Here we're defining a perfect hash for the characters that are allowed in
|
6072
|
-
// global names. This is used to quickly check the next character after a $ to
|
6073
|
-
// see if it's a valid character for a global name.
|
6074
|
-
#define BIT(c, idx) (((c) / 32 - 1 == idx) ? (1U << ((c) % 32)) : 0)
|
6075
|
-
#define PUNCT(idx) ( \
|
6076
|
-
BIT('~', idx) | BIT('*', idx) | BIT('$', idx) | BIT('?', idx) | \
|
6077
|
-
BIT('!', idx) | BIT('@', idx) | BIT('/', idx) | BIT('\\', idx) | \
|
6078
|
-
BIT(';', idx) | BIT(',', idx) | BIT('.', idx) | BIT('=', idx) | \
|
6079
|
-
BIT(':', idx) | BIT('<', idx) | BIT('>', idx) | BIT('\"', idx) | \
|
6080
|
-
BIT('&', idx) | BIT('`', idx) | BIT('\'', idx) | BIT('+', idx) | \
|
6081
|
-
BIT('0', idx))
|
6082
|
-
|
6083
|
-
const unsigned int pm_global_name_punctuation_hash[(0x7e - 0x20 + 31) / 32] = { PUNCT(0), PUNCT(1), PUNCT(2) };
|
6084
|
-
|
6085
|
-
#undef BIT
|
6086
|
-
#undef PUNCT
|
6087
|
-
|
6088
|
-
static inline bool
|
6089
|
-
char_is_global_name_punctuation(const uint8_t b) {
|
6090
|
-
const unsigned int i = (const unsigned int) b;
|
6091
|
-
if (i <= 0x20 || 0x7e < i) return false;
|
6092
|
-
|
6093
|
-
return (pm_global_name_punctuation_hash[(i - 0x20) / 32] >> (i % 32)) & 1;
|
6094
|
-
}
|
6095
|
-
|
6096
|
-
static inline bool
|
6097
|
-
token_is_setter_name(pm_token_t *token) {
|
6098
|
-
return (
|
6099
|
-
(token->type == PM_TOKEN_IDENTIFIER) &&
|
6100
|
-
(token->end - token->start >= 2) &&
|
6101
|
-
(token->end[-1] == '=')
|
6102
|
-
);
|
6103
|
-
}
|
6104
|
-
|
6105
6404
|
/******************************************************************************/
|
6106
6405
|
/* Stack helpers */
|
6107
6406
|
/******************************************************************************/
|
@@ -6317,8 +6616,10 @@ parser_lex_magic_comment_encoding(pm_parser_t *parser) {
|
|
6317
6616
|
*/
|
6318
6617
|
static void
|
6319
6618
|
parser_lex_magic_comment_frozen_string_literal_value(pm_parser_t *parser, const uint8_t *start, const uint8_t *end) {
|
6320
|
-
if (start + 4 <= end && pm_strncasecmp(start, (const uint8_t *) "true", 4) == 0) {
|
6619
|
+
if ((start + 4 <= end) && pm_strncasecmp(start, (const uint8_t *) "true", 4) == 0) {
|
6321
6620
|
parser->frozen_string_literal = true;
|
6621
|
+
} else if ((start + 5 <= end) && pm_strncasecmp(start, (const uint8_t *) "false", 5) == 0) {
|
6622
|
+
parser->frozen_string_literal = false;
|
6322
6623
|
}
|
6323
6624
|
}
|
6324
6625
|
|
@@ -6541,21 +6842,27 @@ context_terminator(pm_context_t context, pm_token_t *token) {
|
|
6541
6842
|
return token->type == PM_TOKEN_BRACE_RIGHT;
|
6542
6843
|
case PM_CONTEXT_PREDICATE:
|
6543
6844
|
return token->type == PM_TOKEN_KEYWORD_THEN || token->type == PM_TOKEN_NEWLINE || token->type == PM_TOKEN_SEMICOLON;
|
6845
|
+
case PM_CONTEXT_NONE:
|
6846
|
+
return false;
|
6544
6847
|
}
|
6545
6848
|
|
6546
6849
|
return false;
|
6547
6850
|
}
|
6548
6851
|
|
6549
|
-
|
6550
|
-
|
6852
|
+
/**
|
6853
|
+
* Returns the context that the given token is found to be terminating, or
|
6854
|
+
* returns PM_CONTEXT_NONE.
|
6855
|
+
*/
|
6856
|
+
static pm_context_t
|
6857
|
+
context_recoverable(const pm_parser_t *parser, pm_token_t *token) {
|
6551
6858
|
pm_context_node_t *context_node = parser->current_context;
|
6552
6859
|
|
6553
6860
|
while (context_node != NULL) {
|
6554
|
-
if (context_terminator(context_node->context, token)) return
|
6861
|
+
if (context_terminator(context_node->context, token)) return context_node->context;
|
6555
6862
|
context_node = context_node->prev;
|
6556
6863
|
}
|
6557
6864
|
|
6558
|
-
return
|
6865
|
+
return PM_CONTEXT_NONE;
|
6559
6866
|
}
|
6560
6867
|
|
6561
6868
|
static bool
|
@@ -6583,7 +6890,7 @@ context_pop(pm_parser_t *parser) {
|
|
6583
6890
|
}
|
6584
6891
|
|
6585
6892
|
static bool
|
6586
|
-
context_p(pm_parser_t *parser, pm_context_t context) {
|
6893
|
+
context_p(const pm_parser_t *parser, pm_context_t context) {
|
6587
6894
|
pm_context_node_t *context_node = parser->current_context;
|
6588
6895
|
|
6589
6896
|
while (context_node != NULL) {
|
@@ -6595,7 +6902,7 @@ context_p(pm_parser_t *parser, pm_context_t context) {
|
|
6595
6902
|
}
|
6596
6903
|
|
6597
6904
|
static bool
|
6598
|
-
context_def_p(pm_parser_t *parser) {
|
6905
|
+
context_def_p(const pm_parser_t *parser) {
|
6599
6906
|
pm_context_node_t *context_node = parser->current_context;
|
6600
6907
|
|
6601
6908
|
while (context_node != NULL) {
|
@@ -6618,6 +6925,55 @@ context_def_p(pm_parser_t *parser) {
|
|
6618
6925
|
return false;
|
6619
6926
|
}
|
6620
6927
|
|
6928
|
+
/**
|
6929
|
+
* Returns a human readable string for the given context, used in error
|
6930
|
+
* messages.
|
6931
|
+
*/
|
6932
|
+
static const char *
|
6933
|
+
context_human(pm_context_t context) {
|
6934
|
+
switch (context) {
|
6935
|
+
case PM_CONTEXT_NONE:
|
6936
|
+
assert(false && "unreachable");
|
6937
|
+
return "";
|
6938
|
+
case PM_CONTEXT_BEGIN: return "begin statement";
|
6939
|
+
case PM_CONTEXT_BLOCK_BRACES: return "'{'..'}' block";
|
6940
|
+
case PM_CONTEXT_BLOCK_KEYWORDS: return "'do'..'end' block";
|
6941
|
+
case PM_CONTEXT_CASE_WHEN: return "'when' clause";
|
6942
|
+
case PM_CONTEXT_CASE_IN: return "'in' clause";
|
6943
|
+
case PM_CONTEXT_CLASS: return "class definition";
|
6944
|
+
case PM_CONTEXT_DEF: return "method definition";
|
6945
|
+
case PM_CONTEXT_DEF_PARAMS: return "method parameters";
|
6946
|
+
case PM_CONTEXT_DEFAULT_PARAMS: return "parameter default value";
|
6947
|
+
case PM_CONTEXT_ELSE: return "'else' clause";
|
6948
|
+
case PM_CONTEXT_ELSIF: return "'elsif' clause";
|
6949
|
+
case PM_CONTEXT_EMBEXPR: return "embedded expression";
|
6950
|
+
case PM_CONTEXT_ENSURE: return "'ensure' clause";
|
6951
|
+
case PM_CONTEXT_ENSURE_DEF: return "'ensure' clause";
|
6952
|
+
case PM_CONTEXT_FOR: return "for loop";
|
6953
|
+
case PM_CONTEXT_FOR_INDEX: return "for loop index";
|
6954
|
+
case PM_CONTEXT_IF: return "if statement";
|
6955
|
+
case PM_CONTEXT_LAMBDA_BRACES: return "'{'..'}' lambda block";
|
6956
|
+
case PM_CONTEXT_LAMBDA_DO_END: return "'do'..'end' lambda block";
|
6957
|
+
case PM_CONTEXT_MAIN: return "top level context";
|
6958
|
+
case PM_CONTEXT_MODULE: return "module definition";
|
6959
|
+
case PM_CONTEXT_PARENS: return "parentheses";
|
6960
|
+
case PM_CONTEXT_POSTEXE: return "'END' block";
|
6961
|
+
case PM_CONTEXT_PREDICATE: return "predicate";
|
6962
|
+
case PM_CONTEXT_PREEXE: return "'BEGIN' block";
|
6963
|
+
case PM_CONTEXT_RESCUE_ELSE: return "'else' clause";
|
6964
|
+
case PM_CONTEXT_RESCUE_ELSE_DEF: return "'else' clause";
|
6965
|
+
case PM_CONTEXT_RESCUE: return "'rescue' clause";
|
6966
|
+
case PM_CONTEXT_RESCUE_DEF: return "'rescue' clause";
|
6967
|
+
case PM_CONTEXT_SCLASS: return "singleton class definition";
|
6968
|
+
case PM_CONTEXT_UNLESS: return "unless statement";
|
6969
|
+
case PM_CONTEXT_UNTIL: return "until statement";
|
6970
|
+
case PM_CONTEXT_WHILE: return "while statement";
|
6971
|
+
}
|
6972
|
+
|
6973
|
+
assert(false && "unreachable");
|
6974
|
+
return "";
|
6975
|
+
}
|
6976
|
+
|
6621
6977
|
/******************************************************************************/
|
6622
6978
|
/* Specific token lexers */
|
6623
6979
|
/******************************************************************************/
|
@@ -6843,7 +7199,7 @@ lex_numeric(pm_parser_t *parser) {
|
|
6843
7199
|
static pm_token_type_t
|
6844
7200
|
lex_global_variable(pm_parser_t *parser) {
|
6845
7201
|
if (parser->current.end >= parser->end) {
|
6846
|
-
|
7202
|
+
PM_PARSER_ERR_TOKEN_FORMAT_CONTENT(parser, parser->current, PM_ERR_INVALID_VARIABLE_GLOBAL);
|
6847
7203
|
return PM_TOKEN_GLOBAL_VARIABLE;
|
6848
7204
|
}
|
6849
7205
|
|
@@ -6884,7 +7240,7 @@ lex_global_variable(pm_parser_t *parser) {
|
|
6884
7240
|
} while (parser->current.end < parser->end && (width = char_is_identifier(parser, parser->current.end)) > 0);
|
6885
7241
|
|
6886
7242
|
// $0 isn't allowed to be followed by anything.
|
6887
|
-
|
7243
|
+
PM_PARSER_ERR_TOKEN_FORMAT_CONTENT(parser, parser->current, PM_ERR_INVALID_VARIABLE_GLOBAL);
|
6888
7244
|
}
|
6889
7245
|
|
6890
7246
|
return PM_TOKEN_GLOBAL_VARIABLE;
|
@@ -6915,7 +7271,7 @@ lex_global_variable(pm_parser_t *parser) {
|
|
6915
7271
|
} else {
|
6916
7272
|
// If we get here, then we have a $ followed by something that isn't
|
6917
7273
|
// recognized as a global variable.
|
6918
|
-
|
7274
|
+
PM_PARSER_ERR_TOKEN_FORMAT_CONTENT(parser, parser->current, PM_ERR_INVALID_VARIABLE_GLOBAL);
|
6919
7275
|
}
|
6920
7276
|
|
6921
7277
|
return PM_TOKEN_GLOBAL_VARIABLE;
|
@@ -7360,6 +7716,28 @@ escape_write_byte_encoded(pm_parser_t *parser, pm_buffer_t *buffer, uint8_t byte
|
|
7360
7716
|
pm_buffer_append_byte(buffer, byte);
|
7361
7717
|
}
|
7362
7718
|
|
7719
|
+
/**
|
7720
|
+
* Write each byte of the given escaped character into the buffer.
|
7721
|
+
*/
|
7722
|
+
static inline void
|
7723
|
+
escape_write_escape_encoded(pm_parser_t *parser, pm_buffer_t *buffer) {
|
7724
|
+
size_t width;
|
7725
|
+
if (parser->encoding_changed) {
|
7726
|
+
width = parser->encoding->char_width(parser->current.end, parser->end - parser->current.end);
|
7727
|
+
} else {
|
7728
|
+
width = pm_encoding_utf_8_char_width(parser->current.end, parser->end - parser->current.end);
|
7729
|
+
}
|
7730
|
+
|
7731
|
+
// TODO: If the character is invalid in the given encoding, then we'll just
|
7732
|
+
// push one byte into the buffer. This should actually be an error.
|
7733
|
+
width = (width == 0) ? 1 : width;
|
7734
|
+
|
7735
|
+
for (size_t index = 0; index < width; index++) {
|
7736
|
+
escape_write_byte_encoded(parser, buffer, *parser->current.end);
|
7737
|
+
parser->current.end++;
|
7738
|
+
}
|
7739
|
+
}
|
7740
|
+
|
7363
7741
|
/**
|
7364
7742
|
* The regular expression engine doesn't support the same escape sequences as
|
7365
7743
|
* Ruby does. So first we have to read the escape sequence, and then we have to
|
@@ -7698,7 +8076,7 @@ escape_read(pm_parser_t *parser, pm_buffer_t *buffer, uint8_t flags) {
|
|
7698
8076
|
/* fallthrough */
|
7699
8077
|
default: {
|
7700
8078
|
if (parser->current.end < parser->end) {
|
7701
|
-
|
8079
|
+
escape_write_escape_encoded(parser, buffer);
|
7702
8080
|
}
|
7703
8081
|
return;
|
7704
8082
|
}
|
@@ -7797,10 +8175,10 @@ lex_at_variable(pm_parser_t *parser) {
|
|
7797
8175
|
while (parser->current.end < parser->end && (width = char_is_identifier(parser, parser->current.end)) > 0) {
|
7798
8176
|
parser->current.end += width;
|
7799
8177
|
}
|
7800
|
-
} else if (type == PM_TOKEN_CLASS_VARIABLE) {
|
7801
|
-
pm_parser_err_current(parser, PM_ERR_INCOMPLETE_VARIABLE_CLASS);
|
7802
8178
|
} else {
|
7803
|
-
|
8179
|
+
pm_diagnostic_id_t diag_id = (type == PM_TOKEN_CLASS_VARIABLE) ? PM_ERR_INCOMPLETE_VARIABLE_CLASS : PM_ERR_INCOMPLETE_VARIABLE_INSTANCE;
|
8180
|
+
size_t width = parser->encoding->char_width(parser->current.end, parser->end - parser->current.end);
|
8181
|
+
PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, diag_id, (int) ((parser->current.end + width) - parser->current.start), (const char *) parser->current.start);
|
7804
8182
|
}
|
7805
8183
|
|
7806
8184
|
// If we're lexing an embedded variable, then we need to pop back into the
|
@@ -7975,14 +8353,43 @@ typedef struct {
|
|
7975
8353
|
* Push the given byte into the token buffer.
|
7976
8354
|
*/
|
7977
8355
|
static inline void
|
7978
|
-
|
8356
|
+
pm_token_buffer_push_byte(pm_token_buffer_t *token_buffer, uint8_t byte) {
|
7979
8357
|
pm_buffer_append_byte(&token_buffer->buffer, byte);
|
7980
8358
|
}
|
7981
8359
|
|
8360
|
+
/**
|
8361
|
+
* Append the given bytes into the token buffer.
|
8362
|
+
*/
|
8363
|
+
static inline void
|
8364
|
+
pm_token_buffer_push_bytes(pm_token_buffer_t *token_buffer, const uint8_t *bytes, size_t length) {
|
8365
|
+
pm_buffer_append_bytes(&token_buffer->buffer, bytes, length);
|
8366
|
+
}
|
8367
|
+
|
8368
|
+
/**
|
8369
|
+
* Push an escaped character into the token buffer.
|
8370
|
+
*/
|
8371
|
+
static inline void
|
8372
|
+
pm_token_buffer_push_escaped(pm_token_buffer_t *token_buffer, pm_parser_t *parser) {
|
8373
|
+
// First, determine the width of the character to be escaped.
|
8374
|
+
size_t width;
|
8375
|
+
if (parser->encoding_changed) {
|
8376
|
+
width = parser->encoding->char_width(parser->current.end, parser->end - parser->current.end);
|
8377
|
+
} else {
|
8378
|
+
width = pm_encoding_utf_8_char_width(parser->current.end, parser->end - parser->current.end);
|
8379
|
+
}
|
8380
|
+
|
8381
|
+
// TODO: If the character is invalid in the given encoding, then we'll just
|
8382
|
+
// push one byte into the buffer. This should actually be an error.
|
8383
|
+
width = (width == 0 ? 1 : width);
|
8384
|
+
|
8385
|
+
// Now, push the bytes into the buffer.
|
8386
|
+
pm_token_buffer_push_bytes(token_buffer, parser->current.end, width);
|
8387
|
+
parser->current.end += width;
|
8388
|
+
}
|
8389
|
+
|
7982
8390
|
/**
|
7983
8391
|
* When we're about to return from lexing the current token and we know for sure
|
7984
8392
|
* that we have found an escape sequence, this function is called to copy the
|
7985
|
-
*
|
7986
8393
|
* contents of the token buffer into the current string on the parser so that it
|
7987
8394
|
* can be attached to the correct node.
|
7988
8395
|
*/
|
@@ -7997,7 +8404,6 @@ pm_token_buffer_copy(pm_parser_t *parser, pm_token_buffer_t *token_buffer) {
|
|
7997
8404
|
* string. If we haven't pushed anything into the buffer, this means that we
|
7998
8405
|
* never found an escape sequence, so we can directly reference the bounds of
|
7999
8406
|
* the current string. Either way, at the return of this function it is expected
|
8000
|
-
*
|
8001
8407
|
* that parser->current_string is established in such a way that it can be
|
8002
8408
|
* attached to a node.
|
8003
8409
|
*/
|
@@ -8016,7 +8422,6 @@ pm_token_buffer_flush(pm_parser_t *parser, pm_token_buffer_t *token_buffer) {
|
|
8016
8422
|
* point into the buffer because we're about to provide a string that has
|
8017
8423
|
* different content than a direct slice of the source.
|
8018
8424
|
*
|
8019
|
-
*
|
8020
8425
|
* It is expected that the parser's current token end will be pointing at one
|
8021
8426
|
* byte past the backslash that starts the escape sequence.
|
8022
8427
|
*/
|
@@ -8070,6 +8475,34 @@ pm_heredoc_strspn_inline_whitespace(pm_parser_t *parser, const uint8_t **cursor,
|
|
8070
8475
|
return whitespace;
|
8071
8476
|
}
|
8072
8477
|
|
8478
|
+
/**
|
8479
|
+
* Lex past the delimiter of a percent literal. Handle newlines and heredocs
|
8480
|
+
* appropriately.
|
8481
|
+
*/
|
8482
|
+
static uint8_t
|
8483
|
+
pm_lex_percent_delimiter(pm_parser_t *parser) {
|
8484
|
+
size_t eol_length = match_eol(parser);
|
8485
|
+
|
8486
|
+
if (eol_length) {
|
8487
|
+
if (parser->heredoc_end) {
|
8488
|
+
// If we have already lexed a heredoc, then the newline has already
|
8489
|
+
// been added to the list. In this case we want to just flush the
|
8490
|
+
// heredoc end.
|
8491
|
+
parser_flush_heredoc_end(parser);
|
8492
|
+
} else {
|
8493
|
+
// Otherwise, we'll add the newline to the list of newlines.
|
8494
|
+
pm_newline_list_append(&parser->newline_list, parser->current.end + eol_length - 1);
|
8495
|
+
}
|
8496
|
+
|
8497
|
+
const uint8_t delimiter = *parser->current.end;
|
8498
|
+
parser->current.end += eol_length;
|
8499
|
+
|
8500
|
+
return delimiter;
|
8501
|
+
}
|
8502
|
+
|
8503
|
+
return *parser->current.end++;
|
8504
|
+
}
|
8505
|
+
|
8073
8506
|
/**
|
8074
8507
|
* This is a convenience macro that will set the current token type, call the
|
8075
8508
|
* lex callback, and then return from the parser_lex function.
|
@@ -8635,7 +9068,7 @@ parser_lex(pm_parser_t *parser) {
|
|
8635
9068
|
// this is not a valid heredoc declaration. In this case we
|
8636
9069
|
// will add an error, but we will still return a heredoc
|
8637
9070
|
// start.
|
8638
|
-
pm_parser_err_current(parser,
|
9071
|
+
pm_parser_err_current(parser, PM_ERR_HEREDOC_TERM);
|
8639
9072
|
body_start = parser->end;
|
8640
9073
|
} else {
|
8641
9074
|
// Otherwise, we want to indicate that the body of the
|
@@ -8826,12 +9259,10 @@ parser_lex(pm_parser_t *parser) {
|
|
8826
9259
|
LEX(PM_TOKEN_PLUS_EQUAL);
|
8827
9260
|
}
|
8828
9261
|
|
8829
|
-
|
8830
|
-
|
8831
|
-
pm_parser_warn_token(parser, &parser->current, PM_WARN_AMBIGUOUS_FIRST_ARGUMENT_PLUS)
|
8832
|
-
|
8833
|
-
|
8834
|
-
if (lex_state_beg_p(parser) || spcarg) {
|
9262
|
+
if (
|
9263
|
+
lex_state_beg_p(parser) ||
|
9264
|
+
(lex_state_spcarg_p(parser, space_seen) ? (pm_parser_warn_token(parser, &parser->current, PM_WARN_AMBIGUOUS_FIRST_ARGUMENT_PLUS), true) : false)
|
9265
|
+
) {
|
8835
9266
|
lex_state_set(parser, PM_LEX_STATE_BEG);
|
8836
9267
|
|
8837
9268
|
if (pm_char_is_decimal_digit(peek(parser))) {
|
@@ -8871,11 +9302,12 @@ parser_lex(pm_parser_t *parser) {
|
|
8871
9302
|
}
|
8872
9303
|
|
8873
9304
|
bool spcarg = lex_state_spcarg_p(parser, space_seen);
|
8874
|
-
|
9305
|
+
bool is_beg = lex_state_beg_p(parser);
|
9306
|
+
if (!is_beg && spcarg) {
|
8875
9307
|
pm_parser_warn_token(parser, &parser->current, PM_WARN_AMBIGUOUS_FIRST_ARGUMENT_MINUS);
|
8876
9308
|
}
|
8877
9309
|
|
8878
|
-
if (
|
9310
|
+
if (is_beg || spcarg) {
|
8879
9311
|
lex_state_set(parser, PM_LEX_STATE_BEG);
|
8880
9312
|
LEX(pm_char_is_decimal_digit(peek(parser)) ? PM_TOKEN_UMINUS_NUM : PM_TOKEN_UMINUS);
|
8881
9313
|
}
|
@@ -9026,15 +9458,8 @@ parser_lex(pm_parser_t *parser) {
|
|
9026
9458
|
pm_parser_err_current(parser, PM_ERR_INVALID_PERCENT);
|
9027
9459
|
}
|
9028
9460
|
|
9029
|
-
|
9030
|
-
|
9031
|
-
size_t eol_length = match_eol(parser);
|
9032
|
-
if (eol_length) {
|
9033
|
-
parser->current.end += eol_length;
|
9034
|
-
pm_newline_list_append(&parser->newline_list, parser->current.end - 1);
|
9035
|
-
} else {
|
9036
|
-
parser->current.end++;
|
9037
|
-
}
|
9461
|
+
const uint8_t delimiter = pm_lex_percent_delimiter(parser);
|
9462
|
+
lex_mode_push_string(parser, true, false, lex_mode_incrementor(delimiter), lex_mode_terminator(delimiter));
|
9038
9463
|
|
9039
9464
|
if (parser->current.end < parser->end) {
|
9040
9465
|
LEX(PM_TOKEN_STRING_BEGIN);
|
@@ -9054,7 +9479,7 @@ parser_lex(pm_parser_t *parser) {
|
|
9054
9479
|
parser->current.end++;
|
9055
9480
|
|
9056
9481
|
if (parser->current.end < parser->end) {
|
9057
|
-
lex_mode_push_list(parser, false,
|
9482
|
+
lex_mode_push_list(parser, false, pm_lex_percent_delimiter(parser));
|
9058
9483
|
} else {
|
9059
9484
|
lex_mode_push_list_eof(parser);
|
9060
9485
|
}
|
@@ -9065,7 +9490,7 @@ parser_lex(pm_parser_t *parser) {
|
|
9065
9490
|
parser->current.end++;
|
9066
9491
|
|
9067
9492
|
if (parser->current.end < parser->end) {
|
9068
|
-
lex_mode_push_list(parser, true,
|
9493
|
+
lex_mode_push_list(parser, true, pm_lex_percent_delimiter(parser));
|
9069
9494
|
} else {
|
9070
9495
|
lex_mode_push_list_eof(parser);
|
9071
9496
|
}
|
@@ -9076,9 +9501,8 @@ parser_lex(pm_parser_t *parser) {
|
|
9076
9501
|
parser->current.end++;
|
9077
9502
|
|
9078
9503
|
if (parser->current.end < parser->end) {
|
9079
|
-
|
9080
|
-
|
9081
|
-
parser->current.end++;
|
9504
|
+
const uint8_t delimiter = pm_lex_percent_delimiter(parser);
|
9505
|
+
lex_mode_push_regexp(parser, lex_mode_incrementor(delimiter), lex_mode_terminator(delimiter));
|
9082
9506
|
} else {
|
9083
9507
|
lex_mode_push_regexp(parser, '\0', '\0');
|
9084
9508
|
}
|
@@ -9089,9 +9513,8 @@ parser_lex(pm_parser_t *parser) {
|
|
9089
9513
|
parser->current.end++;
|
9090
9514
|
|
9091
9515
|
if (parser->current.end < parser->end) {
|
9092
|
-
|
9093
|
-
|
9094
|
-
parser->current.end++;
|
9516
|
+
const uint8_t delimiter = pm_lex_percent_delimiter(parser);
|
9517
|
+
lex_mode_push_string(parser, false, false, lex_mode_incrementor(delimiter), lex_mode_terminator(delimiter));
|
9095
9518
|
} else {
|
9096
9519
|
lex_mode_push_string_eof(parser);
|
9097
9520
|
}
|
@@ -9102,9 +9525,8 @@ parser_lex(pm_parser_t *parser) {
|
|
9102
9525
|
parser->current.end++;
|
9103
9526
|
|
9104
9527
|
if (parser->current.end < parser->end) {
|
9105
|
-
|
9106
|
-
|
9107
|
-
parser->current.end++;
|
9528
|
+
const uint8_t delimiter = pm_lex_percent_delimiter(parser);
|
9529
|
+
lex_mode_push_string(parser, true, false, lex_mode_incrementor(delimiter), lex_mode_terminator(delimiter));
|
9108
9530
|
} else {
|
9109
9531
|
lex_mode_push_string_eof(parser);
|
9110
9532
|
}
|
@@ -9115,9 +9537,9 @@ parser_lex(pm_parser_t *parser) {
|
|
9115
9537
|
parser->current.end++;
|
9116
9538
|
|
9117
9539
|
if (parser->current.end < parser->end) {
|
9118
|
-
|
9540
|
+
const uint8_t delimiter = pm_lex_percent_delimiter(parser);
|
9541
|
+
lex_mode_push_string(parser, false, false, lex_mode_incrementor(delimiter), lex_mode_terminator(delimiter));
|
9119
9542
|
lex_state_set(parser, PM_LEX_STATE_FNAME | PM_LEX_STATE_FITEM);
|
9120
|
-
parser->current.end++;
|
9121
9543
|
} else {
|
9122
9544
|
lex_mode_push_string_eof(parser);
|
9123
9545
|
}
|
@@ -9128,7 +9550,7 @@ parser_lex(pm_parser_t *parser) {
|
|
9128
9550
|
parser->current.end++;
|
9129
9551
|
|
9130
9552
|
if (parser->current.end < parser->end) {
|
9131
|
-
lex_mode_push_list(parser, false,
|
9553
|
+
lex_mode_push_list(parser, false, pm_lex_percent_delimiter(parser));
|
9132
9554
|
} else {
|
9133
9555
|
lex_mode_push_list_eof(parser);
|
9134
9556
|
}
|
@@ -9139,7 +9561,7 @@ parser_lex(pm_parser_t *parser) {
|
|
9139
9561
|
parser->current.end++;
|
9140
9562
|
|
9141
9563
|
if (parser->current.end < parser->end) {
|
9142
|
-
lex_mode_push_list(parser, true,
|
9564
|
+
lex_mode_push_list(parser, true, pm_lex_percent_delimiter(parser));
|
9143
9565
|
} else {
|
9144
9566
|
lex_mode_push_list_eof(parser);
|
9145
9567
|
}
|
@@ -9150,8 +9572,8 @@ parser_lex(pm_parser_t *parser) {
|
|
9150
9572
|
parser->current.end++;
|
9151
9573
|
|
9152
9574
|
if (parser->current.end < parser->end) {
|
9153
|
-
|
9154
|
-
parser
|
9575
|
+
const uint8_t delimiter = pm_lex_percent_delimiter(parser);
|
9576
|
+
lex_mode_push_string(parser, true, false, lex_mode_incrementor(delimiter), lex_mode_terminator(delimiter));
|
9155
9577
|
} else {
|
9156
9578
|
lex_mode_push_string_eof(parser);
|
9157
9579
|
}
|
@@ -9195,11 +9617,21 @@ parser_lex(pm_parser_t *parser) {
|
|
9195
9617
|
if (*parser->current.start != '_') {
|
9196
9618
|
size_t width = char_is_identifier_start(parser, parser->current.start);
|
9197
9619
|
|
9198
|
-
// If this isn't the beginning of an identifier, then
|
9199
|
-
// token as we've exhausted all of the
|
9200
|
-
// it and return the next
|
9620
|
+
// If this isn't the beginning of an identifier, then
|
9621
|
+
// it's an invalid token as we've exhausted all of the
|
9622
|
+
// other options. We'll skip past it and return the next
|
9623
|
+
// token after adding an appropriate error message.
|
9201
9624
|
if (!width) {
|
9202
|
-
|
9625
|
+
pm_diagnostic_id_t diag_id;
|
9626
|
+
if (*parser->current.start >= 0x80) {
|
9627
|
+
diag_id = PM_ERR_INVALID_MULTIBYTE_CHARACTER;
|
9628
|
+
} else if (char_is_ascii_printable(*parser->current.start) || (*parser->current.start == '\\')) {
|
9629
|
+
diag_id = PM_ERR_INVALID_PRINTABLE_CHARACTER;
|
9630
|
+
} else {
|
9631
|
+
diag_id = PM_ERR_INVALID_CHARACTER;
|
9632
|
+
}
|
9633
|
+
|
9634
|
+
PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, diag_id, *parser->current.start);
|
9203
9635
|
goto lex_next_token;
|
9204
9636
|
}
|
9205
9637
|
|
@@ -9306,7 +9738,7 @@ parser_lex(pm_parser_t *parser) {
|
|
9306
9738
|
// and then find the first one.
|
9307
9739
|
pm_lex_mode_t *lex_mode = parser->lex_modes.current;
|
9308
9740
|
const uint8_t *breakpoints = lex_mode->as.list.breakpoints;
|
9309
|
-
const uint8_t *breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end);
|
9741
|
+
const uint8_t *breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
|
9310
9742
|
|
9311
9743
|
// If we haven't found an escape yet, then this buffer will be
|
9312
9744
|
// unallocated since we can refer directly to the source string.
|
@@ -9315,7 +9747,7 @@ parser_lex(pm_parser_t *parser) {
|
|
9315
9747
|
while (breakpoint != NULL) {
|
9316
9748
|
// If we hit a null byte, skip directly past it.
|
9317
9749
|
if (*breakpoint == '\0') {
|
9318
|
-
breakpoint = pm_strpbrk(parser, breakpoint + 1, breakpoints, parser->end - (breakpoint + 1));
|
9750
|
+
breakpoint = pm_strpbrk(parser, breakpoint + 1, breakpoints, parser->end - (breakpoint + 1), true);
|
9319
9751
|
continue;
|
9320
9752
|
}
|
9321
9753
|
|
@@ -9334,7 +9766,7 @@ parser_lex(pm_parser_t *parser) {
|
|
9334
9766
|
// we need to continue on past it.
|
9335
9767
|
if (lex_mode->as.list.nesting > 0) {
|
9336
9768
|
parser->current.end = breakpoint + 1;
|
9337
|
-
breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end);
|
9769
|
+
breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
|
9338
9770
|
lex_mode->as.list.nesting--;
|
9339
9771
|
continue;
|
9340
9772
|
}
|
@@ -9377,18 +9809,18 @@ parser_lex(pm_parser_t *parser) {
|
|
9377
9809
|
case '\t':
|
9378
9810
|
case '\v':
|
9379
9811
|
case '\\':
|
9380
|
-
|
9812
|
+
pm_token_buffer_push_byte(&token_buffer, peeked);
|
9381
9813
|
parser->current.end++;
|
9382
9814
|
break;
|
9383
9815
|
case '\r':
|
9384
9816
|
parser->current.end++;
|
9385
9817
|
if (peek(parser) != '\n') {
|
9386
|
-
|
9818
|
+
pm_token_buffer_push_byte(&token_buffer, '\r');
|
9387
9819
|
break;
|
9388
9820
|
}
|
9389
9821
|
/* fallthrough */
|
9390
9822
|
case '\n':
|
9391
|
-
|
9823
|
+
pm_token_buffer_push_byte(&token_buffer, '\n');
|
9392
9824
|
|
9393
9825
|
if (parser->heredoc_end) {
|
9394
9826
|
// ... if we are on the same line as a heredoc,
|
@@ -9406,21 +9838,20 @@ parser_lex(pm_parser_t *parser) {
|
|
9406
9838
|
break;
|
9407
9839
|
default:
|
9408
9840
|
if (peeked == lex_mode->as.list.incrementor || peeked == lex_mode->as.list.terminator) {
|
9409
|
-
|
9841
|
+
pm_token_buffer_push_byte(&token_buffer, peeked);
|
9410
9842
|
parser->current.end++;
|
9411
9843
|
} else if (lex_mode->as.list.interpolation) {
|
9412
9844
|
escape_read(parser, &token_buffer.buffer, PM_ESCAPE_FLAG_NONE);
|
9413
9845
|
} else {
|
9414
|
-
|
9415
|
-
|
9416
|
-
parser->current.end++;
|
9846
|
+
pm_token_buffer_push_byte(&token_buffer, '\\');
|
9847
|
+
pm_token_buffer_push_escaped(&token_buffer, parser);
|
9417
9848
|
}
|
9418
9849
|
|
9419
9850
|
break;
|
9420
9851
|
}
|
9421
9852
|
|
9422
9853
|
token_buffer.cursor = parser->current.end;
|
9423
|
-
breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end);
|
9854
|
+
breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
|
9424
9855
|
continue;
|
9425
9856
|
}
|
9426
9857
|
|
@@ -9433,7 +9864,7 @@ parser_lex(pm_parser_t *parser) {
|
|
9433
9864
|
// that looked like an interpolated class or instance variable
|
9434
9865
|
// like "#@" but wasn't actually. In this case we'll just skip
|
9435
9866
|
// to the next breakpoint.
|
9436
|
-
breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end);
|
9867
|
+
breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
|
9437
9868
|
continue;
|
9438
9869
|
}
|
9439
9870
|
|
@@ -9448,7 +9879,7 @@ parser_lex(pm_parser_t *parser) {
|
|
9448
9879
|
// and find the next breakpoint.
|
9449
9880
|
assert(*breakpoint == lex_mode->as.list.incrementor);
|
9450
9881
|
parser->current.end = breakpoint + 1;
|
9451
|
-
breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end);
|
9882
|
+
breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
|
9452
9883
|
lex_mode->as.list.nesting++;
|
9453
9884
|
continue;
|
9454
9885
|
}
|
@@ -9487,14 +9918,14 @@ parser_lex(pm_parser_t *parser) {
|
|
9487
9918
|
// regular expression. We'll use strpbrk to find the first of these
|
9488
9919
|
// characters.
|
9489
9920
|
const uint8_t *breakpoints = lex_mode->as.regexp.breakpoints;
|
9490
|
-
const uint8_t *breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end);
|
9921
|
+
const uint8_t *breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, false);
|
9491
9922
|
pm_token_buffer_t token_buffer = { { 0 }, 0 };
|
9492
9923
|
|
9493
9924
|
while (breakpoint != NULL) {
|
9494
9925
|
// If we hit a null byte, skip directly past it.
|
9495
9926
|
if (*breakpoint == '\0') {
|
9496
9927
|
parser->current.end = breakpoint + 1;
|
9497
|
-
breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end);
|
9928
|
+
breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, false);
|
9498
9929
|
continue;
|
9499
9930
|
}
|
9500
9931
|
|
@@ -9516,7 +9947,7 @@ parser_lex(pm_parser_t *parser) {
|
|
9516
9947
|
// If the terminator is not a newline, then we can set
|
9517
9948
|
// the next breakpoint and continue.
|
9518
9949
|
parser->current.end = breakpoint + 1;
|
9519
|
-
breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end);
|
9950
|
+
breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, false);
|
9520
9951
|
continue;
|
9521
9952
|
}
|
9522
9953
|
}
|
@@ -9526,7 +9957,7 @@ parser_lex(pm_parser_t *parser) {
|
|
9526
9957
|
if (*breakpoint == lex_mode->as.regexp.terminator) {
|
9527
9958
|
if (lex_mode->as.regexp.nesting > 0) {
|
9528
9959
|
parser->current.end = breakpoint + 1;
|
9529
|
-
breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end);
|
9960
|
+
breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, false);
|
9530
9961
|
lex_mode->as.regexp.nesting--;
|
9531
9962
|
continue;
|
9532
9963
|
}
|
@@ -9571,9 +10002,9 @@ parser_lex(pm_parser_t *parser) {
|
|
9571
10002
|
parser->current.end++;
|
9572
10003
|
if (peek(parser) != '\n') {
|
9573
10004
|
if (lex_mode->as.regexp.terminator != '\r') {
|
9574
|
-
|
10005
|
+
pm_token_buffer_push_byte(&token_buffer, '\\');
|
9575
10006
|
}
|
9576
|
-
|
10007
|
+
pm_token_buffer_push_byte(&token_buffer, '\r');
|
9577
10008
|
break;
|
9578
10009
|
}
|
9579
10010
|
/* fallthrough */
|
@@ -9608,25 +10039,24 @@ parser_lex(pm_parser_t *parser) {
|
|
9608
10039
|
case '$': case ')': case '*': case '+':
|
9609
10040
|
case '.': case '>': case '?': case ']':
|
9610
10041
|
case '^': case '|': case '}':
|
9611
|
-
|
10042
|
+
pm_token_buffer_push_byte(&token_buffer, '\\');
|
9612
10043
|
break;
|
9613
10044
|
default:
|
9614
10045
|
break;
|
9615
10046
|
}
|
9616
10047
|
|
9617
|
-
|
10048
|
+
pm_token_buffer_push_byte(&token_buffer, peeked);
|
9618
10049
|
parser->current.end++;
|
9619
10050
|
break;
|
9620
10051
|
}
|
9621
10052
|
|
9622
|
-
if (peeked < 0x80)
|
9623
|
-
|
9624
|
-
parser->current.end++;
|
10053
|
+
if (peeked < 0x80) pm_token_buffer_push_byte(&token_buffer, '\\');
|
10054
|
+
pm_token_buffer_push_escaped(&token_buffer, parser);
|
9625
10055
|
break;
|
9626
10056
|
}
|
9627
10057
|
|
9628
10058
|
token_buffer.cursor = parser->current.end;
|
9629
|
-
breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end);
|
10059
|
+
breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, false);
|
9630
10060
|
continue;
|
9631
10061
|
}
|
9632
10062
|
|
@@ -9639,7 +10069,7 @@ parser_lex(pm_parser_t *parser) {
|
|
9639
10069
|
// something that looked like an interpolated class or
|
9640
10070
|
// instance variable like "#@" but wasn't actually. In
|
9641
10071
|
// this case we'll just skip to the next breakpoint.
|
9642
|
-
breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end);
|
10072
|
+
breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, false);
|
9643
10073
|
continue;
|
9644
10074
|
}
|
9645
10075
|
|
@@ -9654,7 +10084,7 @@ parser_lex(pm_parser_t *parser) {
|
|
9654
10084
|
// and find the next breakpoint.
|
9655
10085
|
assert(*breakpoint == lex_mode->as.regexp.incrementor);
|
9656
10086
|
parser->current.end = breakpoint + 1;
|
9657
|
-
breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end);
|
10087
|
+
breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, false);
|
9658
10088
|
lex_mode->as.regexp.nesting++;
|
9659
10089
|
continue;
|
9660
10090
|
}
|
@@ -9690,7 +10120,7 @@ parser_lex(pm_parser_t *parser) {
|
|
9690
10120
|
// string. We'll use strpbrk to find the first of these characters.
|
9691
10121
|
pm_lex_mode_t *lex_mode = parser->lex_modes.current;
|
9692
10122
|
const uint8_t *breakpoints = lex_mode->as.string.breakpoints;
|
9693
|
-
const uint8_t *breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end);
|
10123
|
+
const uint8_t *breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
|
9694
10124
|
|
9695
10125
|
// If we haven't found an escape yet, then this buffer will be
|
9696
10126
|
// unallocated since we can refer directly to the source string.
|
@@ -9702,7 +10132,7 @@ parser_lex(pm_parser_t *parser) {
|
|
9702
10132
|
if (lex_mode->as.string.incrementor != '\0' && *breakpoint == lex_mode->as.string.incrementor) {
|
9703
10133
|
lex_mode->as.string.nesting++;
|
9704
10134
|
parser->current.end = breakpoint + 1;
|
9705
|
-
breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end);
|
10135
|
+
breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
|
9706
10136
|
continue;
|
9707
10137
|
}
|
9708
10138
|
|
@@ -9714,7 +10144,7 @@ parser_lex(pm_parser_t *parser) {
|
|
9714
10144
|
// to continue on past it.
|
9715
10145
|
if (lex_mode->as.string.nesting > 0) {
|
9716
10146
|
parser->current.end = breakpoint + 1;
|
9717
|
-
breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end);
|
10147
|
+
breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
|
9718
10148
|
lex_mode->as.string.nesting--;
|
9719
10149
|
continue;
|
9720
10150
|
}
|
@@ -9756,7 +10186,7 @@ parser_lex(pm_parser_t *parser) {
|
|
9756
10186
|
if (parser->heredoc_end == NULL) {
|
9757
10187
|
pm_newline_list_append(&parser->newline_list, breakpoint);
|
9758
10188
|
parser->current.end = breakpoint + 1;
|
9759
|
-
breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end);
|
10189
|
+
breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
|
9760
10190
|
continue;
|
9761
10191
|
} else {
|
9762
10192
|
parser->current.end = breakpoint + 1;
|
@@ -9770,7 +10200,7 @@ parser_lex(pm_parser_t *parser) {
|
|
9770
10200
|
case '\0':
|
9771
10201
|
// Skip directly past the null character.
|
9772
10202
|
parser->current.end = breakpoint + 1;
|
9773
|
-
breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end);
|
10203
|
+
breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
|
9774
10204
|
break;
|
9775
10205
|
case '\\': {
|
9776
10206
|
// Here we hit escapes.
|
@@ -9788,23 +10218,23 @@ parser_lex(pm_parser_t *parser) {
|
|
9788
10218
|
|
9789
10219
|
switch (peeked) {
|
9790
10220
|
case '\\':
|
9791
|
-
|
10221
|
+
pm_token_buffer_push_byte(&token_buffer, '\\');
|
9792
10222
|
parser->current.end++;
|
9793
10223
|
break;
|
9794
10224
|
case '\r':
|
9795
10225
|
parser->current.end++;
|
9796
10226
|
if (peek(parser) != '\n') {
|
9797
10227
|
if (!lex_mode->as.string.interpolation) {
|
9798
|
-
|
10228
|
+
pm_token_buffer_push_byte(&token_buffer, '\\');
|
9799
10229
|
}
|
9800
|
-
|
10230
|
+
pm_token_buffer_push_byte(&token_buffer, '\r');
|
9801
10231
|
break;
|
9802
10232
|
}
|
9803
10233
|
/* fallthrough */
|
9804
10234
|
case '\n':
|
9805
10235
|
if (!lex_mode->as.string.interpolation) {
|
9806
|
-
|
9807
|
-
|
10236
|
+
pm_token_buffer_push_byte(&token_buffer, '\\');
|
10237
|
+
pm_token_buffer_push_byte(&token_buffer, '\n');
|
9808
10238
|
}
|
9809
10239
|
|
9810
10240
|
if (parser->heredoc_end) {
|
@@ -9823,24 +10253,23 @@ parser_lex(pm_parser_t *parser) {
|
|
9823
10253
|
break;
|
9824
10254
|
default:
|
9825
10255
|
if (lex_mode->as.string.incrementor != '\0' && peeked == lex_mode->as.string.incrementor) {
|
9826
|
-
|
10256
|
+
pm_token_buffer_push_byte(&token_buffer, peeked);
|
9827
10257
|
parser->current.end++;
|
9828
10258
|
} else if (lex_mode->as.string.terminator != '\0' && peeked == lex_mode->as.string.terminator) {
|
9829
|
-
|
10259
|
+
pm_token_buffer_push_byte(&token_buffer, peeked);
|
9830
10260
|
parser->current.end++;
|
9831
10261
|
} else if (lex_mode->as.string.interpolation) {
|
9832
10262
|
escape_read(parser, &token_buffer.buffer, PM_ESCAPE_FLAG_NONE);
|
9833
10263
|
} else {
|
9834
|
-
|
9835
|
-
|
9836
|
-
parser->current.end++;
|
10264
|
+
pm_token_buffer_push_byte(&token_buffer, '\\');
|
10265
|
+
pm_token_buffer_push_escaped(&token_buffer, parser);
|
9837
10266
|
}
|
9838
10267
|
|
9839
10268
|
break;
|
9840
10269
|
}
|
9841
10270
|
|
9842
10271
|
token_buffer.cursor = parser->current.end;
|
9843
|
-
breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end);
|
10272
|
+
breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
|
9844
10273
|
break;
|
9845
10274
|
}
|
9846
10275
|
case '#': {
|
@@ -9851,7 +10280,7 @@ parser_lex(pm_parser_t *parser) {
|
|
9851
10280
|
// looked like an interpolated class or instance variable like "#@"
|
9852
10281
|
// but wasn't actually. In this case we'll just skip to the next
|
9853
10282
|
// breakpoint.
|
9854
|
-
breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end);
|
10283
|
+
breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
|
9855
10284
|
break;
|
9856
10285
|
}
|
9857
10286
|
|
@@ -9888,15 +10317,22 @@ parser_lex(pm_parser_t *parser) {
|
|
9888
10317
|
parser->next_start = NULL;
|
9889
10318
|
}
|
9890
10319
|
|
9891
|
-
//
|
9892
|
-
//
|
10320
|
+
// Now let's grab the information about the identifier off of the
|
10321
|
+
// current lex mode.
|
10322
|
+
pm_lex_mode_t *lex_mode = parser->lex_modes.current;
|
10323
|
+
|
10324
|
+
// We'll check if we're at the end of the file. If we are, then we
|
10325
|
+
// will add an error (because we weren't able to find the
|
10326
|
+
// terminator) but still continue parsing so that content after the
|
10327
|
+
// declaration of the heredoc can be parsed.
|
9893
10328
|
if (parser->current.end >= parser->end) {
|
9894
|
-
|
10329
|
+
pm_parser_err_current(parser, PM_ERR_HEREDOC_TERM);
|
10330
|
+
parser->next_start = lex_mode->as.heredoc.next_start;
|
10331
|
+
parser->heredoc_end = parser->current.end;
|
10332
|
+
lex_state_set(parser, PM_LEX_STATE_END);
|
10333
|
+
LEX(PM_TOKEN_HEREDOC_END);
|
9895
10334
|
}
|
9896
10335
|
|
9897
|
-
// Now let's grab the information about the identifier off of the current
|
9898
|
-
// lex mode.
|
9899
|
-
pm_lex_mode_t *lex_mode = parser->lex_modes.current;
|
9900
10336
|
const uint8_t *ident_start = lex_mode->as.heredoc.ident_start;
|
9901
10337
|
size_t ident_length = lex_mode->as.heredoc.ident_length;
|
9902
10338
|
|
@@ -9972,7 +10408,7 @@ parser_lex(pm_parser_t *parser) {
|
|
9972
10408
|
breakpoints[2] = '\0';
|
9973
10409
|
}
|
9974
10410
|
|
9975
|
-
const uint8_t *breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end);
|
10411
|
+
const uint8_t *breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
|
9976
10412
|
pm_token_buffer_t token_buffer = { { 0 }, 0 };
|
9977
10413
|
bool was_escaped_newline = false;
|
9978
10414
|
|
@@ -9981,7 +10417,7 @@ parser_lex(pm_parser_t *parser) {
|
|
9981
10417
|
case '\0':
|
9982
10418
|
// Skip directly past the null character.
|
9983
10419
|
parser->current.end = breakpoint + 1;
|
9984
|
-
breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end);
|
10420
|
+
breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
|
9985
10421
|
break;
|
9986
10422
|
case '\n': {
|
9987
10423
|
if (parser->heredoc_end != NULL && (parser->heredoc_end > breakpoint)) {
|
@@ -10056,7 +10492,7 @@ parser_lex(pm_parser_t *parser) {
|
|
10056
10492
|
// Otherwise we hit a newline and it wasn't followed by
|
10057
10493
|
// a terminator, so we can continue parsing.
|
10058
10494
|
parser->current.end = breakpoint + 1;
|
10059
|
-
breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end);
|
10495
|
+
breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
|
10060
10496
|
break;
|
10061
10497
|
}
|
10062
10498
|
case '\\': {
|
@@ -10083,21 +10519,20 @@ parser_lex(pm_parser_t *parser) {
|
|
10083
10519
|
case '\r':
|
10084
10520
|
parser->current.end++;
|
10085
10521
|
if (peek(parser) != '\n') {
|
10086
|
-
|
10087
|
-
|
10522
|
+
pm_token_buffer_push_byte(&token_buffer, '\\');
|
10523
|
+
pm_token_buffer_push_byte(&token_buffer, '\r');
|
10088
10524
|
break;
|
10089
10525
|
}
|
10090
10526
|
/* fallthrough */
|
10091
10527
|
case '\n':
|
10092
|
-
|
10093
|
-
|
10528
|
+
pm_token_buffer_push_byte(&token_buffer, '\\');
|
10529
|
+
pm_token_buffer_push_byte(&token_buffer, '\n');
|
10094
10530
|
token_buffer.cursor = parser->current.end + 1;
|
10095
10531
|
breakpoint = parser->current.end;
|
10096
10532
|
continue;
|
10097
10533
|
default:
|
10098
|
-
|
10099
|
-
|
10100
|
-
pm_token_buffer_push(&token_buffer, peeked);
|
10534
|
+
pm_token_buffer_push_byte(&token_buffer, '\\');
|
10535
|
+
pm_token_buffer_push_escaped(&token_buffer, parser);
|
10101
10536
|
break;
|
10102
10537
|
}
|
10103
10538
|
} else {
|
@@ -10105,7 +10540,7 @@ parser_lex(pm_parser_t *parser) {
|
|
10105
10540
|
case '\r':
|
10106
10541
|
parser->current.end++;
|
10107
10542
|
if (peek(parser) != '\n') {
|
10108
|
-
|
10543
|
+
pm_token_buffer_push_byte(&token_buffer, '\r');
|
10109
10544
|
break;
|
10110
10545
|
}
|
10111
10546
|
/* fallthrough */
|
@@ -10121,7 +10556,7 @@ parser_lex(pm_parser_t *parser) {
|
|
10121
10556
|
}
|
10122
10557
|
|
10123
10558
|
token_buffer.cursor = parser->current.end;
|
10124
|
-
breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end);
|
10559
|
+
breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
|
10125
10560
|
break;
|
10126
10561
|
}
|
10127
10562
|
case '#': {
|
@@ -10133,7 +10568,7 @@ parser_lex(pm_parser_t *parser) {
|
|
10133
10568
|
// or instance variable like "#@" but wasn't
|
10134
10569
|
// actually. In this case we'll just skip to the
|
10135
10570
|
// next breakpoint.
|
10136
|
-
breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end);
|
10571
|
+
breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
|
10137
10572
|
break;
|
10138
10573
|
}
|
10139
10574
|
|
@@ -10184,8 +10619,8 @@ parser_lex(pm_parser_t *parser) {
|
|
10184
10619
|
typedef enum {
|
10185
10620
|
PM_BINDING_POWER_UNSET = 0, // used to indicate this token cannot be used as an infix operator
|
10186
10621
|
PM_BINDING_POWER_STATEMENT = 2,
|
10187
|
-
|
10188
|
-
|
10622
|
+
PM_BINDING_POWER_MODIFIER_RESCUE = 4, // rescue
|
10623
|
+
PM_BINDING_POWER_MODIFIER = 6, // if unless until while
|
10189
10624
|
PM_BINDING_POWER_COMPOSITION = 8, // and or
|
10190
10625
|
PM_BINDING_POWER_NOT = 10, // not
|
10191
10626
|
PM_BINDING_POWER_MATCH = 12, // => in
|
@@ -10239,15 +10674,15 @@ typedef struct {
|
|
10239
10674
|
#define RIGHT_ASSOCIATIVE_UNARY(precedence) { precedence, precedence, false, false }
|
10240
10675
|
|
10241
10676
|
pm_binding_powers_t pm_binding_powers[PM_TOKEN_MAXIMUM] = {
|
10677
|
+
// rescue
|
10678
|
+
[PM_TOKEN_KEYWORD_RESCUE_MODIFIER] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_MODIFIER_RESCUE),
|
10679
|
+
|
10242
10680
|
// if unless until while
|
10243
10681
|
[PM_TOKEN_KEYWORD_IF_MODIFIER] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_MODIFIER),
|
10244
10682
|
[PM_TOKEN_KEYWORD_UNLESS_MODIFIER] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_MODIFIER),
|
10245
10683
|
[PM_TOKEN_KEYWORD_UNTIL_MODIFIER] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_MODIFIER),
|
10246
10684
|
[PM_TOKEN_KEYWORD_WHILE_MODIFIER] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_MODIFIER),
|
10247
10685
|
|
10248
|
-
// rescue
|
10249
|
-
[PM_TOKEN_KEYWORD_RESCUE_MODIFIER] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_MODIFIER_RESCUE),
|
10250
|
-
|
10251
10686
|
// and or
|
10252
10687
|
[PM_TOKEN_KEYWORD_AND] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_COMPOSITION),
|
10253
10688
|
[PM_TOKEN_KEYWORD_OR] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_COMPOSITION),
|
@@ -10381,14 +10816,6 @@ match4(const pm_parser_t *parser, pm_token_type_t type1, pm_token_type_t type2,
|
|
10381
10816
|
return match1(parser, type1) || match1(parser, type2) || match1(parser, type3) || match1(parser, type4);
|
10382
10817
|
}
|
10383
10818
|
|
10384
|
-
/**
|
10385
|
-
* Returns true if the current token is any of the five given types.
|
10386
|
-
*/
|
10387
|
-
static inline bool
|
10388
|
-
match5(const pm_parser_t *parser, pm_token_type_t type1, pm_token_type_t type2, pm_token_type_t type3, pm_token_type_t type4, pm_token_type_t type5) {
|
10389
|
-
return match1(parser, type1) || match1(parser, type2) || match1(parser, type3) || match1(parser, type4) || match1(parser, type5);
|
10390
|
-
}
|
10391
|
-
|
10392
10819
|
/**
|
10393
10820
|
* Returns true if the current token is any of the six given types.
|
10394
10821
|
*/
|
@@ -10654,7 +11081,7 @@ parse_target(pm_parser_t *parser, pm_node_t *target) {
|
|
10654
11081
|
return target;
|
10655
11082
|
case PM_BACK_REFERENCE_READ_NODE:
|
10656
11083
|
case PM_NUMBERED_REFERENCE_READ_NODE:
|
10657
|
-
|
11084
|
+
PM_PARSER_ERR_NODE_FORMAT_CONTENT(parser, target, PM_ERR_WRITE_TARGET_READONLY);
|
10658
11085
|
return target;
|
10659
11086
|
case PM_GLOBAL_VARIABLE_READ_NODE:
|
10660
11087
|
assert(sizeof(pm_global_variable_target_node_t) == sizeof(pm_global_variable_read_node_t));
|
@@ -10792,7 +11219,7 @@ parse_write(pm_parser_t *parser, pm_node_t *target, pm_token_t *operator, pm_nod
|
|
10792
11219
|
}
|
10793
11220
|
case PM_BACK_REFERENCE_READ_NODE:
|
10794
11221
|
case PM_NUMBERED_REFERENCE_READ_NODE:
|
10795
|
-
|
11222
|
+
PM_PARSER_ERR_NODE_FORMAT_CONTENT(parser, target, PM_ERR_WRITE_TARGET_READONLY);
|
10796
11223
|
/* fallthrough */
|
10797
11224
|
case PM_GLOBAL_VARIABLE_READ_NODE: {
|
10798
11225
|
pm_global_variable_write_node_t *node = pm_global_variable_write_node_create(parser, target, operator, value);
|
@@ -10866,7 +11293,7 @@ parse_write(pm_parser_t *parser, pm_node_t *target, pm_token_t *operator, pm_nod
|
|
10866
11293
|
return target;
|
10867
11294
|
}
|
10868
11295
|
|
10869
|
-
if (
|
11296
|
+
if (char_is_identifier_start(parser, call->message_loc.start)) {
|
10870
11297
|
// When we get here, we have a method call, because it was
|
10871
11298
|
// previously marked as a method call but now we have an =. This
|
10872
11299
|
// looks like:
|
@@ -10967,7 +11394,7 @@ parse_targets(pm_parser_t *parser, pm_node_t *first_target, pm_binding_power_t b
|
|
10967
11394
|
pm_multi_target_node_targets_append(parser, result, target);
|
10968
11395
|
} else if (!match1(parser, PM_TOKEN_EOF)) {
|
10969
11396
|
// If we get here, then we have a trailing , in a multi target node.
|
10970
|
-
// We'll
|
11397
|
+
// We'll add an implicit rest node to represent this.
|
10971
11398
|
pm_node_t *rest = (pm_node_t *) pm_implicit_rest_node_create(parser, &parser->previous);
|
10972
11399
|
pm_multi_target_node_targets_append(parser, result, rest);
|
10973
11400
|
break;
|
@@ -10984,6 +11411,7 @@ parse_targets(pm_parser_t *parser, pm_node_t *first_target, pm_binding_power_t b
|
|
10984
11411
|
static pm_node_t *
|
10985
11412
|
parse_targets_validate(pm_parser_t *parser, pm_node_t *first_target, pm_binding_power_t binding_power) {
|
10986
11413
|
pm_node_t *result = parse_targets(parser, first_target, binding_power);
|
11414
|
+
accept1(parser, PM_TOKEN_NEWLINE);
|
10987
11415
|
|
10988
11416
|
// Ensure that we have either an = or a ) after the targets.
|
10989
11417
|
if (!match2(parser, PM_TOKEN_EQUAL, PM_TOKEN_PARENTHESIS_RIGHT)) {
|
@@ -11024,7 +11452,7 @@ parse_statements(pm_parser_t *parser, pm_context_t context) {
|
|
11024
11452
|
break;
|
11025
11453
|
}
|
11026
11454
|
|
11027
|
-
// If we have a terminator, then we will parse all
|
11455
|
+
// If we have a terminator, then we will parse all consecutive terminators
|
11028
11456
|
// and then continue parsing the statements list.
|
11029
11457
|
if (accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON)) {
|
11030
11458
|
// If we have a terminator, then we will continue parsing the statements
|
@@ -11056,8 +11484,13 @@ parse_statements(pm_parser_t *parser, pm_context_t context) {
|
|
11056
11484
|
|
11057
11485
|
while (accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON));
|
11058
11486
|
if (context_terminator(context, &parser->current)) break;
|
11059
|
-
} else {
|
11060
|
-
|
11487
|
+
} else if (!accept1(parser, PM_TOKEN_NEWLINE)) {
|
11488
|
+
// This is an inlined version of accept1 because the error that we
|
11489
|
+
// want to add has varargs. If this happens again, we should
|
11490
|
+
// probably extract a helper function.
|
11491
|
+
PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_type_human(parser->current.type));
|
11492
|
+
parser->previous.start = parser->previous.end;
|
11493
|
+
parser->previous.type = PM_TOKEN_MISSING;
|
11061
11494
|
}
|
11062
11495
|
}
|
11063
11496
|
|
@@ -11084,8 +11517,9 @@ parse_assocs(pm_parser_t *parser, pm_node_t *node) {
|
|
11084
11517
|
|
11085
11518
|
if (token_begins_expression_p(parser->current.type)) {
|
11086
11519
|
value = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, false, PM_ERR_EXPECT_EXPRESSION_AFTER_SPLAT_HASH);
|
11087
|
-
}
|
11088
|
-
|
11520
|
+
}
|
11521
|
+
else {
|
11522
|
+
pm_parser_scope_forwarding_keywords_check(parser, &operator);
|
11089
11523
|
}
|
11090
11524
|
|
11091
11525
|
element = (pm_node_t *) pm_assoc_splat_node_create(parser, value, &operator);
|
@@ -11234,13 +11668,8 @@ parse_arguments(pm_parser_t *parser, pm_arguments_t *arguments, bool accepts_for
|
|
11234
11668
|
if (token_begins_expression_p(parser->current.type)) {
|
11235
11669
|
expression = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, false, PM_ERR_EXPECT_ARGUMENT);
|
11236
11670
|
} else {
|
11237
|
-
|
11238
|
-
|
11239
|
-
pm_constant_id_t ellipsis_id = pm_parser_constant_id_constant(parser, "...", 3);
|
11240
|
-
if (pm_parser_local_depth_constant_id(parser, ellipsis_id) == -1) {
|
11241
|
-
pm_parser_err_token(parser, &operator, PM_ERR_ARGUMENT_NO_FORWARDING_AMP);
|
11242
|
-
}
|
11243
|
-
}
|
11671
|
+
// A block forwarding in a method having `...` parameter (e.g. `def foo(...); bar(&); end`) is available.
|
11672
|
+
pm_parser_scope_forwarding_block_check(parser, &operator);
|
11244
11673
|
}
|
11245
11674
|
|
11246
11675
|
argument = (pm_node_t *) pm_block_argument_node_create(parser, &operator, expression);
|
@@ -11258,10 +11687,7 @@ parse_arguments(pm_parser_t *parser, pm_arguments_t *arguments, bool accepts_for
|
|
11258
11687
|
pm_token_t operator = parser->previous;
|
11259
11688
|
|
11260
11689
|
if (match4(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_TOKEN_COMMA, PM_TOKEN_SEMICOLON, PM_TOKEN_BRACKET_RIGHT)) {
|
11261
|
-
|
11262
|
-
pm_parser_err_token(parser, &operator, PM_ERR_ARGUMENT_NO_FORWARDING_STAR);
|
11263
|
-
}
|
11264
|
-
|
11690
|
+
pm_parser_scope_forwarding_positionals_check(parser, &operator);
|
11265
11691
|
argument = (pm_node_t *) pm_splat_node_create(parser, &operator, NULL);
|
11266
11692
|
} else {
|
11267
11693
|
pm_node_t *expression = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, false, PM_ERR_EXPECT_EXPRESSION_AFTER_SPLAT);
|
@@ -11287,15 +11713,14 @@ parse_arguments(pm_parser_t *parser, pm_arguments_t *arguments, bool accepts_for
|
|
11287
11713
|
pm_node_t *right = parse_expression(parser, PM_BINDING_POWER_RANGE, false, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR);
|
11288
11714
|
argument = (pm_node_t *) pm_range_node_create(parser, NULL, &operator, right);
|
11289
11715
|
} else {
|
11290
|
-
|
11291
|
-
pm_parser_err_previous(parser, PM_ERR_ARGUMENT_NO_FORWARDING_ELLIPSES);
|
11292
|
-
}
|
11716
|
+
pm_parser_scope_forwarding_all_check(parser, &parser->previous);
|
11293
11717
|
if (parsed_first_argument && terminator == PM_TOKEN_EOF) {
|
11294
11718
|
pm_parser_err_previous(parser, PM_ERR_ARGUMENT_FORWARDING_UNBOUND);
|
11295
11719
|
}
|
11296
11720
|
|
11297
11721
|
argument = (pm_node_t *) pm_forwarding_arguments_node_create(parser, &parser->previous);
|
11298
11722
|
parse_arguments_append(parser, arguments, argument);
|
11723
|
+
arguments->has_forwarding = true;
|
11299
11724
|
parsed_forwarding_arguments = true;
|
11300
11725
|
break;
|
11301
11726
|
}
|
@@ -11338,6 +11763,9 @@ parse_arguments(pm_parser_t *parser, pm_arguments_t *arguments, bool accepts_for
|
|
11338
11763
|
}
|
11339
11764
|
|
11340
11765
|
parsed_bare_hash = true;
|
11766
|
+
} else if (accept1(parser, PM_TOKEN_KEYWORD_IN)) {
|
11767
|
+
// TODO: Could we solve this with binding powers instead?
|
11768
|
+
pm_parser_err_current(parser, PM_ERR_ARGUMENT_IN);
|
11341
11769
|
}
|
11342
11770
|
|
11343
11771
|
parse_arguments_append(parser, arguments, argument);
|
@@ -11414,7 +11842,9 @@ parse_required_destructured_parameter(pm_parser_t *parser) {
|
|
11414
11842
|
if (accept1(parser, PM_TOKEN_IDENTIFIER)) {
|
11415
11843
|
pm_token_t name = parser->previous;
|
11416
11844
|
value = (pm_node_t *) pm_required_parameter_node_create(parser, &name);
|
11417
|
-
pm_parser_parameter_name_check(parser, &name)
|
11845
|
+
if (pm_parser_parameter_name_check(parser, &name)) {
|
11846
|
+
pm_node_flag_set_repeated_parameter(value);
|
11847
|
+
}
|
11418
11848
|
pm_parser_local_add_token(parser, &name);
|
11419
11849
|
}
|
11420
11850
|
|
@@ -11424,7 +11854,9 @@ parse_required_destructured_parameter(pm_parser_t *parser) {
|
|
11424
11854
|
pm_token_t name = parser->previous;
|
11425
11855
|
|
11426
11856
|
param = (pm_node_t *) pm_required_parameter_node_create(parser, &name);
|
11427
|
-
pm_parser_parameter_name_check(parser, &name)
|
11857
|
+
if (pm_parser_parameter_name_check(parser, &name)) {
|
11858
|
+
pm_node_flag_set_repeated_parameter(param);
|
11859
|
+
}
|
11428
11860
|
pm_parser_local_add_token(parser, &name);
|
11429
11861
|
}
|
11430
11862
|
|
@@ -11541,19 +11973,20 @@ parse_parameters(
|
|
11541
11973
|
pm_token_t operator = parser->previous;
|
11542
11974
|
pm_token_t name;
|
11543
11975
|
|
11976
|
+
bool repeated = false;
|
11544
11977
|
if (accept1(parser, PM_TOKEN_IDENTIFIER)) {
|
11545
11978
|
name = parser->previous;
|
11546
|
-
pm_parser_parameter_name_check(parser, &name);
|
11979
|
+
repeated = pm_parser_parameter_name_check(parser, &name);
|
11547
11980
|
pm_parser_local_add_token(parser, &name);
|
11548
11981
|
} else {
|
11549
11982
|
name = not_provided(parser);
|
11550
|
-
|
11551
|
-
if (allows_forwarding_parameters) {
|
11552
|
-
pm_parser_local_add_token(parser, &operator);
|
11553
|
-
}
|
11983
|
+
parser->current_scope->forwarding_params |= PM_FORWARDING_BLOCK;
|
11554
11984
|
}
|
11555
11985
|
|
11556
11986
|
pm_block_parameter_node_t *param = pm_block_parameter_node_create(parser, &name, &operator);
|
11987
|
+
if (repeated) {
|
11988
|
+
pm_node_flag_set_repeated_parameter((pm_node_t *)param);
|
11989
|
+
}
|
11557
11990
|
if (params->block == NULL) {
|
11558
11991
|
pm_parameters_node_block_set(params, param);
|
11559
11992
|
} else {
|
@@ -11572,9 +12005,8 @@ parse_parameters(
|
|
11572
12005
|
update_parameter_state(parser, &parser->current, &order);
|
11573
12006
|
parser_lex(parser);
|
11574
12007
|
|
11575
|
-
|
11576
|
-
|
11577
|
-
}
|
12008
|
+
parser->current_scope->forwarding_params |= PM_FORWARDING_BLOCK;
|
12009
|
+
parser->current_scope->forwarding_params |= PM_FORWARDING_ALL;
|
11578
12010
|
|
11579
12011
|
pm_forwarding_parameter_node_t *param = pm_forwarding_parameter_node_create(parser, &parser->previous);
|
11580
12012
|
if (params->keyword_rest != NULL) {
|
@@ -11626,20 +12058,23 @@ parse_parameters(
|
|
11626
12058
|
}
|
11627
12059
|
|
11628
12060
|
pm_token_t name = parser->previous;
|
11629
|
-
pm_parser_parameter_name_check(parser, &name);
|
12061
|
+
bool repeated = pm_parser_parameter_name_check(parser, &name);
|
11630
12062
|
pm_parser_local_add_token(parser, &name);
|
11631
12063
|
|
11632
12064
|
if (accept1(parser, PM_TOKEN_EQUAL)) {
|
11633
12065
|
pm_token_t operator = parser->previous;
|
11634
12066
|
context_push(parser, PM_CONTEXT_DEFAULT_PARAMS);
|
11635
|
-
|
11636
|
-
|
12067
|
+
|
12068
|
+
pm_constant_id_t saved_param_name = pm_parser_current_param_name_set(parser, pm_parser_constant_id_token(parser, &name));
|
11637
12069
|
pm_node_t *value = parse_value_expression(parser, binding_power, false, PM_ERR_PARAMETER_NO_DEFAULT);
|
11638
12070
|
|
11639
12071
|
pm_optional_parameter_node_t *param = pm_optional_parameter_node_create(parser, &name, &operator, value);
|
12072
|
+
if (repeated) {
|
12073
|
+
pm_node_flag_set_repeated_parameter((pm_node_t *)param);
|
12074
|
+
}
|
11640
12075
|
pm_parameters_node_optionals_append(params, param);
|
11641
12076
|
|
11642
|
-
parser
|
12077
|
+
pm_parser_current_param_name_restore(parser, saved_param_name);
|
11643
12078
|
context_pop(parser);
|
11644
12079
|
|
11645
12080
|
// If parsing the value of the parameter resulted in error recovery,
|
@@ -11651,9 +12086,15 @@ parse_parameters(
|
|
11651
12086
|
}
|
11652
12087
|
} else if (order > PM_PARAMETERS_ORDER_AFTER_OPTIONAL) {
|
11653
12088
|
pm_required_parameter_node_t *param = pm_required_parameter_node_create(parser, &name);
|
12089
|
+
if (repeated) {
|
12090
|
+
pm_node_flag_set_repeated_parameter((pm_node_t *)param);
|
12091
|
+
}
|
11654
12092
|
pm_parameters_node_requireds_append(params, (pm_node_t *) param);
|
11655
12093
|
} else {
|
11656
12094
|
pm_required_parameter_node_t *param = pm_required_parameter_node_create(parser, &name);
|
12095
|
+
if (repeated) {
|
12096
|
+
pm_node_flag_set_repeated_parameter((pm_node_t *)param);
|
12097
|
+
}
|
11657
12098
|
pm_parameters_node_posts_append(params, (pm_node_t *) param);
|
11658
12099
|
}
|
11659
12100
|
|
@@ -11668,7 +12109,7 @@ parse_parameters(
|
|
11668
12109
|
pm_token_t local = name;
|
11669
12110
|
local.end -= 1;
|
11670
12111
|
|
11671
|
-
pm_parser_parameter_name_check(parser, &local);
|
12112
|
+
bool repeated = pm_parser_parameter_name_check(parser, &local);
|
11672
12113
|
pm_parser_local_add_token(parser, &local);
|
11673
12114
|
|
11674
12115
|
switch (parser->current.type) {
|
@@ -11676,6 +12117,9 @@ parse_parameters(
|
|
11676
12117
|
case PM_TOKEN_PARENTHESIS_RIGHT:
|
11677
12118
|
case PM_TOKEN_PIPE: {
|
11678
12119
|
pm_node_t *param = (pm_node_t *) pm_required_keyword_parameter_node_create(parser, &name);
|
12120
|
+
if (repeated) {
|
12121
|
+
pm_node_flag_set_repeated_parameter(param);
|
12122
|
+
}
|
11679
12123
|
pm_parameters_node_keywords_append(params, param);
|
11680
12124
|
break;
|
11681
12125
|
}
|
@@ -11687,6 +12131,9 @@ parse_parameters(
|
|
11687
12131
|
}
|
11688
12132
|
|
11689
12133
|
pm_node_t *param = (pm_node_t *) pm_required_keyword_parameter_node_create(parser, &name);
|
12134
|
+
if (repeated) {
|
12135
|
+
pm_node_flag_set_repeated_parameter(param);
|
12136
|
+
}
|
11690
12137
|
pm_parameters_node_keywords_append(params, param);
|
11691
12138
|
break;
|
11692
12139
|
}
|
@@ -11695,17 +12142,22 @@ parse_parameters(
|
|
11695
12142
|
|
11696
12143
|
if (token_begins_expression_p(parser->current.type)) {
|
11697
12144
|
context_push(parser, PM_CONTEXT_DEFAULT_PARAMS);
|
11698
|
-
|
11699
|
-
|
12145
|
+
|
12146
|
+
pm_constant_id_t saved_param_name = pm_parser_current_param_name_set(parser, pm_parser_constant_id_token(parser, &local));
|
11700
12147
|
pm_node_t *value = parse_value_expression(parser, binding_power, false, PM_ERR_PARAMETER_NO_DEFAULT_KW);
|
11701
|
-
|
12148
|
+
|
12149
|
+
pm_parser_current_param_name_restore(parser, saved_param_name);
|
11702
12150
|
context_pop(parser);
|
12151
|
+
|
11703
12152
|
param = (pm_node_t *) pm_optional_keyword_parameter_node_create(parser, &name, value);
|
11704
12153
|
}
|
11705
12154
|
else {
|
11706
12155
|
param = (pm_node_t *) pm_required_keyword_parameter_node_create(parser, &name);
|
11707
12156
|
}
|
11708
12157
|
|
12158
|
+
if (repeated) {
|
12159
|
+
pm_node_flag_set_repeated_parameter(param);
|
12160
|
+
}
|
11709
12161
|
pm_parameters_node_keywords_append(params, param);
|
11710
12162
|
|
11711
12163
|
// If parsing the value of the parameter resulted in error recovery,
|
@@ -11728,20 +12180,21 @@ parse_parameters(
|
|
11728
12180
|
|
11729
12181
|
pm_token_t operator = parser->previous;
|
11730
12182
|
pm_token_t name;
|
11731
|
-
|
12183
|
+
bool repeated = false;
|
11732
12184
|
if (accept1(parser, PM_TOKEN_IDENTIFIER)) {
|
11733
12185
|
name = parser->previous;
|
11734
|
-
pm_parser_parameter_name_check(parser, &name);
|
12186
|
+
repeated = pm_parser_parameter_name_check(parser, &name);
|
11735
12187
|
pm_parser_local_add_token(parser, &name);
|
11736
12188
|
} else {
|
11737
12189
|
name = not_provided(parser);
|
11738
12190
|
|
11739
|
-
|
11740
|
-
pm_parser_local_add_token(parser, &operator);
|
11741
|
-
}
|
12191
|
+
parser->current_scope->forwarding_params |= PM_FORWARDING_POSITIONALS;
|
11742
12192
|
}
|
11743
12193
|
|
11744
12194
|
pm_node_t *param = (pm_node_t *) pm_rest_parameter_node_create(parser, &operator, &name);
|
12195
|
+
if (repeated) {
|
12196
|
+
pm_node_flag_set_repeated_parameter(param);
|
12197
|
+
}
|
11745
12198
|
if (params->rest == NULL) {
|
11746
12199
|
pm_parameters_node_rest_set(params, param);
|
11747
12200
|
} else {
|
@@ -11764,19 +12217,21 @@ parse_parameters(
|
|
11764
12217
|
} else {
|
11765
12218
|
pm_token_t name;
|
11766
12219
|
|
12220
|
+
bool repeated = false;
|
11767
12221
|
if (accept1(parser, PM_TOKEN_IDENTIFIER)) {
|
11768
12222
|
name = parser->previous;
|
11769
|
-
pm_parser_parameter_name_check(parser, &name);
|
12223
|
+
repeated = pm_parser_parameter_name_check(parser, &name);
|
11770
12224
|
pm_parser_local_add_token(parser, &name);
|
11771
12225
|
} else {
|
11772
12226
|
name = not_provided(parser);
|
11773
12227
|
|
11774
|
-
|
11775
|
-
pm_parser_local_add_token(parser, &operator);
|
11776
|
-
}
|
12228
|
+
parser->current_scope->forwarding_params |= PM_FORWARDING_KEYWORDS;
|
11777
12229
|
}
|
11778
12230
|
|
11779
12231
|
param = (pm_node_t *) pm_keyword_rest_parameter_node_create(parser, &operator, &name);
|
12232
|
+
if (repeated) {
|
12233
|
+
pm_node_flag_set_repeated_parameter(param);
|
12234
|
+
}
|
11780
12235
|
}
|
11781
12236
|
|
11782
12237
|
if (params->keyword_rest == NULL) {
|
@@ -11964,25 +12419,10 @@ parse_rescues(pm_parser_t *parser, pm_begin_node_t *parent_node, bool def_p) {
|
|
11964
12419
|
}
|
11965
12420
|
|
11966
12421
|
static inline pm_begin_node_t *
|
11967
|
-
parse_rescues_as_begin(pm_parser_t *parser, pm_statements_node_t *statements, bool def_p) {
|
12422
|
+
parse_rescues_as_begin(pm_parser_t *parser, const uint8_t *start, pm_statements_node_t *statements, bool def_p) {
|
11968
12423
|
pm_token_t no_begin_token = not_provided(parser);
|
11969
12424
|
pm_begin_node_t *begin_node = pm_begin_node_create(parser, &no_begin_token, statements);
|
11970
12425
|
parse_rescues(parser, begin_node, def_p);
|
11971
|
-
|
11972
|
-
// All nodes within a begin node are optional, so we look
|
11973
|
-
// for the earliest possible node that we can use to set
|
11974
|
-
// the BeginNode's start location
|
11975
|
-
const uint8_t *start = begin_node->base.location.start;
|
11976
|
-
if (begin_node->statements) {
|
11977
|
-
start = begin_node->statements->base.location.start;
|
11978
|
-
} else if (begin_node->rescue_clause) {
|
11979
|
-
start = begin_node->rescue_clause->base.location.start;
|
11980
|
-
} else if (begin_node->else_clause) {
|
11981
|
-
start = begin_node->else_clause->base.location.start;
|
11982
|
-
} else if (begin_node->ensure_clause) {
|
11983
|
-
start = begin_node->ensure_clause->base.location.start;
|
11984
|
-
}
|
11985
|
-
|
11986
12426
|
begin_node->base.location.start = start;
|
11987
12427
|
return begin_node;
|
11988
12428
|
}
|
@@ -12012,10 +12452,13 @@ parse_block_parameters(
|
|
12012
12452
|
if ((opening->type != PM_TOKEN_NOT_PROVIDED) && accept1(parser, PM_TOKEN_SEMICOLON)) {
|
12013
12453
|
do {
|
12014
12454
|
expect1(parser, PM_TOKEN_IDENTIFIER, PM_ERR_BLOCK_PARAM_LOCAL_VARIABLE);
|
12015
|
-
pm_parser_parameter_name_check(parser, &parser->previous);
|
12455
|
+
bool repeated = pm_parser_parameter_name_check(parser, &parser->previous);
|
12016
12456
|
pm_parser_local_add_token(parser, &parser->previous);
|
12017
12457
|
|
12018
12458
|
pm_block_local_variable_node_t *local = pm_block_local_variable_node_create(parser, &parser->previous);
|
12459
|
+
if (repeated) {
|
12460
|
+
pm_node_flag_set_repeated_parameter((pm_node_t *)local);
|
12461
|
+
}
|
12019
12462
|
pm_block_parameters_node_append_local(block_parameters, local);
|
12020
12463
|
} while (accept1(parser, PM_TOKEN_COMMA));
|
12021
12464
|
}
|
@@ -12031,8 +12474,10 @@ parse_block(pm_parser_t *parser) {
|
|
12031
12474
|
pm_token_t opening = parser->previous;
|
12032
12475
|
accept1(parser, PM_TOKEN_NEWLINE);
|
12033
12476
|
|
12477
|
+
pm_constant_id_t saved_param_name = pm_parser_current_param_name_unset(parser);
|
12034
12478
|
pm_accepts_block_stack_push(parser, true);
|
12035
12479
|
pm_parser_scope_push(parser, false);
|
12480
|
+
|
12036
12481
|
pm_block_parameters_node_t *block_parameters = NULL;
|
12037
12482
|
|
12038
12483
|
if (accept1(parser, PM_TOKEN_PIPE)) {
|
@@ -12053,12 +12498,6 @@ parse_block(pm_parser_t *parser) {
|
|
12053
12498
|
pm_block_parameters_node_closing_set(block_parameters, &parser->previous);
|
12054
12499
|
}
|
12055
12500
|
|
12056
|
-
uint32_t locals_body_index = 0;
|
12057
|
-
|
12058
|
-
if (block_parameters) {
|
12059
|
-
locals_body_index = (uint32_t) parser->current_scope->locals.size;
|
12060
|
-
}
|
12061
|
-
|
12062
12501
|
accept1(parser, PM_TOKEN_NEWLINE);
|
12063
12502
|
pm_node_t *statements = NULL;
|
12064
12503
|
|
@@ -12078,7 +12517,7 @@ parse_block(pm_parser_t *parser) {
|
|
12078
12517
|
|
12079
12518
|
if (match2(parser, PM_TOKEN_KEYWORD_RESCUE, PM_TOKEN_KEYWORD_ENSURE)) {
|
12080
12519
|
assert(statements == NULL || PM_NODE_TYPE_P(statements, PM_STATEMENTS_NODE));
|
12081
|
-
statements = (pm_node_t *) parse_rescues_as_begin(parser, (pm_statements_node_t *) statements, false);
|
12520
|
+
statements = (pm_node_t *) parse_rescues_as_begin(parser, opening.start, (pm_statements_node_t *) statements, false);
|
12082
12521
|
}
|
12083
12522
|
}
|
12084
12523
|
|
@@ -12090,13 +12529,14 @@ parse_block(pm_parser_t *parser) {
|
|
12090
12529
|
|
12091
12530
|
if (parameters == NULL && (maximum > 0)) {
|
12092
12531
|
parameters = (pm_node_t *) pm_numbered_parameters_node_create(parser, &(pm_location_t) { .start = opening.start, .end = parser->previous.end }, maximum);
|
12093
|
-
locals_body_index = maximum;
|
12094
12532
|
}
|
12095
12533
|
|
12096
12534
|
pm_constant_id_list_t locals = parser->current_scope->locals;
|
12097
12535
|
pm_parser_scope_pop(parser);
|
12098
12536
|
pm_accepts_block_stack_pop(parser);
|
12099
|
-
|
12537
|
+
pm_parser_current_param_name_restore(parser, saved_param_name);
|
12538
|
+
|
12539
|
+
return pm_block_node_create(parser, &locals, &opening, parameters, statements, &parser->previous);
|
12100
12540
|
}
|
12101
12541
|
|
12102
12542
|
/**
|
@@ -12157,14 +12597,20 @@ parse_arguments_list(pm_parser_t *parser, pm_arguments_t *arguments, bool accept
|
|
12157
12597
|
}
|
12158
12598
|
|
12159
12599
|
if (block != NULL) {
|
12160
|
-
if (arguments->block == NULL) {
|
12600
|
+
if (arguments->block == NULL && !arguments->has_forwarding) {
|
12161
12601
|
arguments->block = (pm_node_t *) block;
|
12162
12602
|
} else {
|
12163
|
-
|
12164
|
-
|
12165
|
-
|
12603
|
+
if (arguments->has_forwarding) {
|
12604
|
+
pm_parser_err_node(parser, (pm_node_t *) block, PM_ERR_ARGUMENT_BLOCK_FORWARDING);
|
12605
|
+
} else {
|
12606
|
+
pm_parser_err_node(parser, (pm_node_t *) block, PM_ERR_ARGUMENT_BLOCK_MULTI);
|
12607
|
+
}
|
12608
|
+
if (arguments->block != NULL) {
|
12609
|
+
if (arguments->arguments == NULL) {
|
12610
|
+
arguments->arguments = pm_arguments_node_create(parser);
|
12611
|
+
}
|
12612
|
+
pm_arguments_node_arguments_append(arguments->arguments, arguments->block);
|
12166
12613
|
}
|
12167
|
-
pm_arguments_node_arguments_append(arguments->arguments, arguments->block);
|
12168
12614
|
arguments->block = (pm_node_t *) block;
|
12169
12615
|
}
|
12170
12616
|
}
|
@@ -12384,8 +12830,14 @@ static inline pm_node_flags_t
|
|
12384
12830
|
parse_unescaped_encoding(const pm_parser_t *parser) {
|
12385
12831
|
if (parser->explicit_encoding != NULL) {
|
12386
12832
|
if (parser->explicit_encoding == PM_ENCODING_UTF_8_ENTRY) {
|
12833
|
+
// If the there's an explicit encoding and it's using a UTF-8 escape
|
12834
|
+
// sequence, then mark the string as UTF-8.
|
12387
12835
|
return PM_STRING_FLAGS_FORCED_UTF8_ENCODING;
|
12388
12836
|
} else if (parser->encoding == PM_ENCODING_US_ASCII_ENTRY) {
|
12837
|
+
// If there's a non-UTF-8 escape sequence being used, then the
|
12838
|
+
// string uses the source encoding, unless the source is marked as
|
12839
|
+
// US-ASCII. In that case the string is forced as ASCII-8BIT in
|
12840
|
+
// order to keep the string valid.
|
12389
12841
|
return PM_STRING_FLAGS_FORCED_BINARY_ENCODING;
|
12390
12842
|
}
|
12391
12843
|
}
|
@@ -12509,14 +12961,54 @@ parse_string_part(pm_parser_t *parser) {
|
|
12509
12961
|
}
|
12510
12962
|
}
|
12511
12963
|
|
12964
|
+
/**
|
12965
|
+
* When creating a symbol, unary operators that cannot be binary operators
|
12966
|
+
* automatically drop trailing `@` characters. This happens at the parser level,
|
12967
|
+
* such that `~@` is parsed as `~` and `!@` is parsed as `!`. We do that here.
|
12968
|
+
*/
|
12969
|
+
static const uint8_t *
|
12970
|
+
parse_operator_symbol_name(const pm_token_t *name) {
|
12971
|
+
switch (name->type) {
|
12972
|
+
case PM_TOKEN_TILDE:
|
12973
|
+
case PM_TOKEN_BANG:
|
12974
|
+
if (name->end[-1] == '@') return name->end - 1;
|
12975
|
+
/* fallthrough */
|
12976
|
+
default:
|
12977
|
+
return name->end;
|
12978
|
+
}
|
12979
|
+
}
|
12980
|
+
|
12981
|
+
static pm_node_t *
|
12982
|
+
parse_operator_symbol(pm_parser_t *parser, const pm_token_t *opening, pm_lex_state_t next_state) {
|
12983
|
+
pm_token_t closing = not_provided(parser);
|
12984
|
+
pm_symbol_node_t *symbol = pm_symbol_node_create(parser, opening, &parser->current, &closing);
|
12985
|
+
|
12986
|
+
const uint8_t *end = parse_operator_symbol_name(&parser->current);
|
12987
|
+
|
12988
|
+
if (next_state != PM_LEX_STATE_NONE) lex_state_set(parser, next_state);
|
12989
|
+
parser_lex(parser);
|
12990
|
+
|
12991
|
+
pm_string_shared_init(&symbol->unescaped, parser->previous.start, end);
|
12992
|
+
pm_node_flag_set((pm_node_t *) symbol, PM_SYMBOL_FLAGS_FORCED_US_ASCII_ENCODING);
|
12993
|
+
|
12994
|
+
return (pm_node_t *) symbol;
|
12995
|
+
}
|
12996
|
+
|
12997
|
+
/**
|
12998
|
+
* Parse a symbol node. This function will get called immediately after finding
|
12999
|
+
* a symbol opening token. This handles parsing bare symbols and interpolated
|
13000
|
+
* symbols.
|
13001
|
+
*/
|
12512
13002
|
static pm_node_t *
|
12513
13003
|
parse_symbol(pm_parser_t *parser, pm_lex_mode_t *lex_mode, pm_lex_state_t next_state) {
|
12514
|
-
pm_token_t opening = parser->previous;
|
13004
|
+
const pm_token_t opening = parser->previous;
|
12515
13005
|
|
12516
13006
|
if (lex_mode->mode != PM_LEX_STRING) {
|
12517
13007
|
if (next_state != PM_LEX_STATE_NONE) lex_state_set(parser, next_state);
|
12518
13008
|
|
12519
13009
|
switch (parser->current.type) {
|
13010
|
+
case PM_CASE_OPERATOR:
|
13011
|
+
return parse_operator_symbol(parser, &opening, next_state == PM_LEX_STATE_NONE ? PM_LEX_STATE_ENDFN : next_state);
|
12520
13012
|
case PM_TOKEN_IDENTIFIER:
|
12521
13013
|
case PM_TOKEN_CONSTANT:
|
12522
13014
|
case PM_TOKEN_INSTANCE_VARIABLE:
|
@@ -12528,10 +13020,6 @@ parse_symbol(pm_parser_t *parser, pm_lex_mode_t *lex_mode, pm_lex_state_t next_s
|
|
12528
13020
|
case PM_CASE_KEYWORD:
|
12529
13021
|
parser_lex(parser);
|
12530
13022
|
break;
|
12531
|
-
case PM_CASE_OPERATOR:
|
12532
|
-
lex_state_set(parser, next_state == PM_LEX_STATE_NONE ? PM_LEX_STATE_ENDFN : next_state);
|
12533
|
-
parser_lex(parser);
|
12534
|
-
break;
|
12535
13023
|
default:
|
12536
13024
|
expect2(parser, PM_TOKEN_IDENTIFIER, PM_TOKEN_METHOD_NAME, PM_ERR_SYMBOL_INVALID);
|
12537
13025
|
break;
|
@@ -12541,6 +13029,8 @@ parse_symbol(pm_parser_t *parser, pm_lex_mode_t *lex_mode, pm_lex_state_t next_s
|
|
12541
13029
|
pm_symbol_node_t *symbol = pm_symbol_node_create(parser, &opening, &parser->previous, &closing);
|
12542
13030
|
|
12543
13031
|
pm_string_shared_init(&symbol->unescaped, parser->previous.start, parser->previous.end);
|
13032
|
+
pm_node_flag_set((pm_node_t *) symbol, parse_symbol_encoding(parser, &symbol->unescaped));
|
13033
|
+
|
12544
13034
|
return (pm_node_t *) symbol;
|
12545
13035
|
}
|
12546
13036
|
|
@@ -12637,7 +13127,8 @@ parse_symbol(pm_parser_t *parser, pm_lex_mode_t *lex_mode, pm_lex_state_t next_s
|
|
12637
13127
|
} else {
|
12638
13128
|
expect1(parser, PM_TOKEN_STRING_END, PM_ERR_SYMBOL_TERM_DYNAMIC);
|
12639
13129
|
}
|
12640
|
-
|
13130
|
+
|
13131
|
+
return (pm_node_t *) pm_symbol_node_create_unescaped(parser, &opening, &content, &parser->previous, &unescaped, parse_symbol_encoding(parser, &unescaped));
|
12641
13132
|
}
|
12642
13133
|
|
12643
13134
|
/**
|
@@ -12647,8 +13138,11 @@ parse_symbol(pm_parser_t *parser, pm_lex_mode_t *lex_mode, pm_lex_state_t next_s
|
|
12647
13138
|
static inline pm_node_t *
|
12648
13139
|
parse_undef_argument(pm_parser_t *parser) {
|
12649
13140
|
switch (parser->current.type) {
|
13141
|
+
case PM_CASE_OPERATOR: {
|
13142
|
+
const pm_token_t opening = not_provided(parser);
|
13143
|
+
return parse_operator_symbol(parser, &opening, PM_LEX_STATE_NONE);
|
13144
|
+
}
|
12650
13145
|
case PM_CASE_KEYWORD:
|
12651
|
-
case PM_CASE_OPERATOR:
|
12652
13146
|
case PM_TOKEN_CONSTANT:
|
12653
13147
|
case PM_TOKEN_IDENTIFIER:
|
12654
13148
|
case PM_TOKEN_METHOD_NAME: {
|
@@ -12659,6 +13153,8 @@ parse_undef_argument(pm_parser_t *parser) {
|
|
12659
13153
|
pm_symbol_node_t *symbol = pm_symbol_node_create(parser, &opening, &parser->previous, &closing);
|
12660
13154
|
|
12661
13155
|
pm_string_shared_init(&symbol->unescaped, parser->previous.start, parser->previous.end);
|
13156
|
+
pm_node_flag_set((pm_node_t *) symbol, parse_symbol_encoding(parser, &symbol->unescaped));
|
13157
|
+
|
12662
13158
|
return (pm_node_t *) symbol;
|
12663
13159
|
}
|
12664
13160
|
case PM_TOKEN_SYMBOL_BEGIN: {
|
@@ -12682,21 +13178,24 @@ parse_undef_argument(pm_parser_t *parser) {
|
|
12682
13178
|
static inline pm_node_t *
|
12683
13179
|
parse_alias_argument(pm_parser_t *parser, bool first) {
|
12684
13180
|
switch (parser->current.type) {
|
12685
|
-
case PM_CASE_OPERATOR:
|
13181
|
+
case PM_CASE_OPERATOR: {
|
13182
|
+
const pm_token_t opening = not_provided(parser);
|
13183
|
+
return parse_operator_symbol(parser, &opening, first ? PM_LEX_STATE_FNAME | PM_LEX_STATE_FITEM : PM_LEX_STATE_NONE);
|
13184
|
+
}
|
12686
13185
|
case PM_CASE_KEYWORD:
|
12687
13186
|
case PM_TOKEN_CONSTANT:
|
12688
13187
|
case PM_TOKEN_IDENTIFIER:
|
12689
13188
|
case PM_TOKEN_METHOD_NAME: {
|
12690
|
-
if (first)
|
12691
|
-
lex_state_set(parser, PM_LEX_STATE_FNAME | PM_LEX_STATE_FITEM);
|
12692
|
-
}
|
12693
|
-
|
13189
|
+
if (first) lex_state_set(parser, PM_LEX_STATE_FNAME | PM_LEX_STATE_FITEM);
|
12694
13190
|
parser_lex(parser);
|
13191
|
+
|
12695
13192
|
pm_token_t opening = not_provided(parser);
|
12696
13193
|
pm_token_t closing = not_provided(parser);
|
12697
13194
|
pm_symbol_node_t *symbol = pm_symbol_node_create(parser, &opening, &parser->previous, &closing);
|
12698
13195
|
|
12699
13196
|
pm_string_shared_init(&symbol->unescaped, parser->previous.start, parser->previous.end);
|
13197
|
+
pm_node_flag_set((pm_node_t *) symbol, parse_symbol_encoding(parser, &symbol->unescaped));
|
13198
|
+
|
12700
13199
|
return (pm_node_t *) symbol;
|
12701
13200
|
}
|
12702
13201
|
case PM_TOKEN_SYMBOL_BEGIN: {
|
@@ -12733,6 +13232,64 @@ outer_scope_using_numbered_parameters_p(pm_parser_t *parser) {
|
|
12733
13232
|
return false;
|
12734
13233
|
}
|
12735
13234
|
|
13235
|
+
/**
|
13236
|
+
* These are the names of the various numbered parameters. We have them here so
|
13237
|
+
* that when we insert them into the constant pool we can use a constant string
|
13238
|
+
* and not have to allocate.
|
13239
|
+
*/
|
13240
|
+
static const char * const pm_numbered_parameter_names[] = {
|
13241
|
+
"_1", "_2", "_3", "_4", "_5", "_6", "_7", "_8", "_9"
|
13242
|
+
};
|
13243
|
+
|
13244
|
+
/**
|
13245
|
+
* Parse an identifier into either a local variable read. If the local variable
|
13246
|
+
* is not found, it returns NULL instead.
|
13247
|
+
*/
|
13248
|
+
static pm_local_variable_read_node_t *
|
13249
|
+
parse_variable(pm_parser_t *parser) {
|
13250
|
+
int depth;
|
13251
|
+
if ((depth = pm_parser_local_depth(parser, &parser->previous)) != -1) {
|
13252
|
+
return pm_local_variable_read_node_create(parser, &parser->previous, (uint32_t) depth);
|
13253
|
+
}
|
13254
|
+
|
13255
|
+
if (!parser->current_scope->closed && pm_token_is_numbered_parameter(parser->previous.start, parser->previous.end)) {
|
13256
|
+
// Now that we know we have a numbered parameter, we need to check
|
13257
|
+
// if it's allowed in this context. If it is, then we will create a
|
13258
|
+
// local variable read. If it's not, then we'll create a normal call
|
13259
|
+
// node but add an error.
|
13260
|
+
if (parser->current_scope->explicit_params) {
|
13261
|
+
pm_parser_err_previous(parser, PM_ERR_NUMBERED_PARAMETER_NOT_ALLOWED);
|
13262
|
+
} else if (outer_scope_using_numbered_parameters_p(parser)) {
|
13263
|
+
pm_parser_err_previous(parser, PM_ERR_NUMBERED_PARAMETER_OUTER_SCOPE);
|
13264
|
+
} else {
|
13265
|
+
// Indicate that this scope is using numbered params so that child
|
13266
|
+
// scopes cannot. We subtract the value for the character '0' to get
|
13267
|
+
// the actual integer value of the number (only _1 through _9 are
|
13268
|
+
// valid).
|
13269
|
+
uint8_t numbered_parameters = (uint8_t) (parser->previous.start[1] - '0');
|
13270
|
+
if (numbered_parameters > parser->current_scope->numbered_parameters) {
|
13271
|
+
parser->current_scope->numbered_parameters = numbered_parameters;
|
13272
|
+
pm_parser_numbered_parameters_set(parser, numbered_parameters);
|
13273
|
+
}
|
13274
|
+
|
13275
|
+
// When you use a numbered parameter, it implies the existence
|
13276
|
+
// of all of the locals that exist before it. For example,
|
13277
|
+
// referencing _2 means that _1 must exist. Therefore here we
|
13278
|
+
// loop through all of the possibilities and add them into the
|
13279
|
+
// constant pool.
|
13280
|
+
for (uint8_t numbered_parameter = 1; numbered_parameter <= numbered_parameters - 1; numbered_parameter++) {
|
13281
|
+
pm_parser_local_add_constant(parser, pm_numbered_parameter_names[numbered_parameter - 1], 2);
|
13282
|
+
}
|
13283
|
+
|
13284
|
+
// Finally we can create the local variable read node.
|
13285
|
+
pm_constant_id_t name_id = pm_parser_local_add_constant(parser, pm_numbered_parameter_names[numbered_parameters - 1], 2);
|
13286
|
+
return pm_local_variable_read_node_create_constant_id(parser, &parser->previous, name_id, 0);
|
13287
|
+
}
|
13288
|
+
}
|
13289
|
+
|
13290
|
+
return NULL;
|
13291
|
+
}
|
13292
|
+
|
12736
13293
|
/**
|
12737
13294
|
* Parse an identifier into either a local variable read or a call.
|
12738
13295
|
*/
|
@@ -12741,56 +13298,8 @@ parse_variable_call(pm_parser_t *parser) {
|
|
12741
13298
|
pm_node_flags_t flags = 0;
|
12742
13299
|
|
12743
13300
|
if (!match1(parser, PM_TOKEN_PARENTHESIS_LEFT) && (parser->previous.end[-1] != '!') && (parser->previous.end[-1] != '?')) {
|
12744
|
-
|
12745
|
-
if (
|
12746
|
-
return (pm_node_t *) pm_local_variable_read_node_create(parser, &parser->previous, (uint32_t) depth);
|
12747
|
-
}
|
12748
|
-
|
12749
|
-
if (!parser->current_scope->closed && pm_token_is_numbered_parameter(parser->previous.start, parser->previous.end)) {
|
12750
|
-
// Now that we know we have a numbered parameter, we need to check
|
12751
|
-
// if it's allowed in this context. If it is, then we will create a
|
12752
|
-
// local variable read. If it's not, then we'll create a normal call
|
12753
|
-
// node but add an error.
|
12754
|
-
if (parser->current_scope->explicit_params) {
|
12755
|
-
pm_parser_err_previous(parser, PM_ERR_NUMBERED_PARAMETER_NOT_ALLOWED);
|
12756
|
-
} else if (outer_scope_using_numbered_parameters_p(parser)) {
|
12757
|
-
pm_parser_err_previous(parser, PM_ERR_NUMBERED_PARAMETER_OUTER_SCOPE);
|
12758
|
-
} else {
|
12759
|
-
// Indicate that this scope is using numbered params so that child
|
12760
|
-
// scopes cannot.
|
12761
|
-
uint8_t number = parser->previous.start[1];
|
12762
|
-
|
12763
|
-
// We subtract the value for the character '0' to get the actual
|
12764
|
-
// integer value of the number (only _1 through _9 are valid)
|
12765
|
-
uint8_t numbered_parameters = (uint8_t) (number - '0');
|
12766
|
-
if (numbered_parameters > parser->current_scope->numbered_parameters) {
|
12767
|
-
parser->current_scope->numbered_parameters = numbered_parameters;
|
12768
|
-
pm_parser_numbered_parameters_set(parser, numbered_parameters);
|
12769
|
-
}
|
12770
|
-
|
12771
|
-
// When you use a numbered parameter, it implies the existence
|
12772
|
-
// of all of the locals that exist before it. For example,
|
12773
|
-
// referencing _2 means that _1 must exist. Therefore here we
|
12774
|
-
// loop through all of the possibilities and add them into the
|
12775
|
-
// constant pool.
|
12776
|
-
uint8_t current = '1';
|
12777
|
-
uint8_t *value;
|
12778
|
-
|
12779
|
-
while (current < number) {
|
12780
|
-
value = malloc(2);
|
12781
|
-
value[0] = '_';
|
12782
|
-
value[1] = current++;
|
12783
|
-
pm_parser_local_add_owned(parser, value, 2);
|
12784
|
-
}
|
12785
|
-
|
12786
|
-
// Now we can add the actual token that is being used. For
|
12787
|
-
// this one we can add a shared version since it is directly
|
12788
|
-
// referenced in the source.
|
12789
|
-
pm_parser_local_add_token(parser, &parser->previous);
|
12790
|
-
return (pm_node_t *) pm_local_variable_read_node_create(parser, &parser->previous, 0);
|
12791
|
-
}
|
12792
|
-
}
|
12793
|
-
|
13301
|
+
pm_local_variable_read_node_t *node = parse_variable(parser);
|
13302
|
+
if (node != NULL) return (pm_node_t *) node;
|
12794
13303
|
flags |= PM_CALL_NODE_FLAGS_VARIABLE_CALL;
|
12795
13304
|
}
|
12796
13305
|
|
@@ -13076,43 +13585,77 @@ parse_pattern_keyword_rest(pm_parser_t *parser) {
|
|
13076
13585
|
return (pm_node_t *) pm_assoc_splat_node_create(parser, value, &operator);
|
13077
13586
|
}
|
13078
13587
|
|
13588
|
+
/**
|
13589
|
+
* Create an implicit node for the value of a hash pattern that has omitted the
|
13590
|
+
* value. This will use an implicit local variable target.
|
13591
|
+
*/
|
13592
|
+
static pm_node_t *
|
13593
|
+
parse_pattern_hash_implicit_value(pm_parser_t *parser, pm_symbol_node_t *key) {
|
13594
|
+
const pm_location_t *value_loc = &((pm_symbol_node_t *) key)->value_loc;
|
13595
|
+
pm_constant_id_t name = pm_parser_constant_id_location(parser, value_loc->start, value_loc->end);
|
13596
|
+
|
13597
|
+
int current_depth = pm_parser_local_depth_constant_id(parser, name);
|
13598
|
+
uint32_t depth;
|
13599
|
+
|
13600
|
+
if (current_depth == -1) {
|
13601
|
+
pm_parser_local_add_location(parser, value_loc->start, value_loc->end);
|
13602
|
+
depth = 0;
|
13603
|
+
} else {
|
13604
|
+
depth = (uint32_t) current_depth;
|
13605
|
+
}
|
13606
|
+
|
13607
|
+
pm_local_variable_target_node_t *target = pm_local_variable_target_node_create_values(parser, value_loc, name, depth);
|
13608
|
+
return (pm_node_t *) pm_implicit_node_create(parser, (pm_node_t *) target);
|
13609
|
+
}
|
13610
|
+
|
13079
13611
|
/**
|
13080
13612
|
* Parse a hash pattern.
|
13081
13613
|
*/
|
13082
13614
|
static pm_hash_pattern_node_t *
|
13083
|
-
parse_pattern_hash(pm_parser_t *parser, pm_node_t *
|
13615
|
+
parse_pattern_hash(pm_parser_t *parser, pm_node_t *first_node) {
|
13084
13616
|
pm_node_list_t assocs = { 0 };
|
13085
13617
|
pm_node_t *rest = NULL;
|
13086
13618
|
|
13087
|
-
switch (PM_NODE_TYPE(
|
13088
|
-
case
|
13089
|
-
|
13090
|
-
|
13091
|
-
|
13092
|
-
|
13619
|
+
switch (PM_NODE_TYPE(first_node)) {
|
13620
|
+
case PM_ASSOC_SPLAT_NODE:
|
13621
|
+
case PM_NO_KEYWORDS_PARAMETER_NODE:
|
13622
|
+
rest = first_node;
|
13623
|
+
break;
|
13624
|
+
case PM_SYMBOL_NODE: {
|
13625
|
+
if (pm_symbol_node_label_p(first_node)) {
|
13626
|
+
pm_node_t *value;
|
13627
|
+
|
13628
|
+
if (!match7(parser, PM_TOKEN_COMMA, PM_TOKEN_KEYWORD_THEN, PM_TOKEN_BRACE_RIGHT, PM_TOKEN_BRACKET_RIGHT, PM_TOKEN_PARENTHESIS_RIGHT, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON)) {
|
13629
|
+
// Here we have a value for the first assoc in the list, so
|
13630
|
+
// we will parse it now.
|
13631
|
+
value = parse_pattern(parser, false, PM_ERR_PATTERN_EXPRESSION_AFTER_KEY);
|
13632
|
+
} else {
|
13633
|
+
// Otherwise, we will create an implicit local variable
|
13634
|
+
// target for the value.
|
13635
|
+
value = parse_pattern_hash_implicit_value(parser, (pm_symbol_node_t *) first_node);
|
13636
|
+
}
|
13093
13637
|
|
13094
|
-
|
13095
|
-
assoc
|
13096
|
-
assoc->value = value;
|
13097
|
-
} else {
|
13098
|
-
pm_node_t *key = ((pm_assoc_node_t *) first_assoc)->key;
|
13638
|
+
pm_token_t operator = not_provided(parser);
|
13639
|
+
pm_node_t *assoc = (pm_node_t *) pm_assoc_node_create(parser, first_node, &operator, value);
|
13099
13640
|
|
13100
|
-
|
13101
|
-
|
13102
|
-
pm_parser_local_add_location(parser, value_loc->start, value_loc->end);
|
13103
|
-
}
|
13641
|
+
pm_node_list_append(&assocs, assoc);
|
13642
|
+
break;
|
13104
13643
|
}
|
13644
|
+
}
|
13645
|
+
/* fallthrough */
|
13646
|
+
default: {
|
13647
|
+
// If we get anything else, then this is an error. For this we'll
|
13648
|
+
// create a missing node for the value and create an assoc node for
|
13649
|
+
// the first node in the list.
|
13650
|
+
pm_parser_err_node(parser, first_node, PM_ERR_PATTERN_HASH_KEY_LABEL);
|
13651
|
+
|
13652
|
+
pm_token_t operator = not_provided(parser);
|
13653
|
+
pm_node_t *value = (pm_node_t *) pm_missing_node_create(parser, first_node->location.start, first_node->location.end);
|
13654
|
+
pm_node_t *assoc = (pm_node_t *) pm_assoc_node_create(parser, first_node, &operator, value);
|
13105
13655
|
|
13106
|
-
pm_node_list_append(&assocs,
|
13656
|
+
pm_node_list_append(&assocs, assoc);
|
13107
13657
|
break;
|
13108
13658
|
}
|
13109
|
-
case PM_ASSOC_SPLAT_NODE:
|
13110
|
-
case PM_NO_KEYWORDS_PARAMETER_NODE:
|
13111
|
-
rest = first_assoc;
|
13112
|
-
break;
|
13113
|
-
default:
|
13114
|
-
assert(false);
|
13115
|
-
break;
|
13116
13659
|
}
|
13117
13660
|
|
13118
13661
|
// If there are any other assocs, then we'll parse them now.
|
@@ -13141,6 +13684,7 @@ parse_pattern_hash(pm_parser_t *parser, pm_node_t *first_assoc) {
|
|
13141
13684
|
} else {
|
13142
13685
|
const pm_location_t *value_loc = &((pm_symbol_node_t *) key)->value_loc;
|
13143
13686
|
pm_parser_local_add_location(parser, value_loc->start, value_loc->end);
|
13687
|
+
value = parse_pattern_hash_implicit_value(parser, (pm_symbol_node_t *) key);
|
13144
13688
|
}
|
13145
13689
|
|
13146
13690
|
pm_token_t operator = not_provided(parser);
|
@@ -13246,45 +13790,29 @@ parse_pattern_primitive(pm_parser_t *parser, pm_diagnostic_id_t diag_id) {
|
|
13246
13790
|
// pattern node.
|
13247
13791
|
node = pm_hash_pattern_node_empty_create(parser, &opening, &parser->previous);
|
13248
13792
|
} else {
|
13249
|
-
pm_node_t *
|
13793
|
+
pm_node_t *first_node;
|
13250
13794
|
|
13251
13795
|
switch (parser->current.type) {
|
13252
|
-
case PM_TOKEN_LABEL:
|
13796
|
+
case PM_TOKEN_LABEL:
|
13253
13797
|
parser_lex(parser);
|
13254
|
-
|
13255
|
-
pm_symbol_node_t *key = pm_symbol_node_label_create(parser, &parser->previous);
|
13256
|
-
pm_token_t operator = not_provided(parser);
|
13257
|
-
|
13258
|
-
first_assoc = (pm_node_t *) pm_assoc_node_create(parser, (pm_node_t *) key, &operator, NULL);
|
13798
|
+
first_node = (pm_node_t *) pm_symbol_node_label_create(parser, &parser->previous);
|
13259
13799
|
break;
|
13260
|
-
}
|
13261
13800
|
case PM_TOKEN_USTAR_STAR:
|
13262
|
-
|
13801
|
+
first_node = parse_pattern_keyword_rest(parser);
|
13263
13802
|
break;
|
13264
|
-
case PM_TOKEN_STRING_BEGIN:
|
13265
|
-
|
13266
|
-
pm_token_t operator = not_provided(parser);
|
13267
|
-
|
13268
|
-
if (!pm_symbol_node_label_p(key)) {
|
13269
|
-
pm_parser_err_node(parser, key, PM_ERR_PATTERN_HASH_KEY_LABEL);
|
13270
|
-
}
|
13271
|
-
|
13272
|
-
first_assoc = (pm_node_t *) pm_assoc_node_create(parser, key, &operator, NULL);
|
13803
|
+
case PM_TOKEN_STRING_BEGIN:
|
13804
|
+
first_node = parse_expression(parser, PM_BINDING_POWER_MAX, false, PM_ERR_PATTERN_HASH_KEY);
|
13273
13805
|
break;
|
13274
|
-
}
|
13275
13806
|
default: {
|
13276
13807
|
parser_lex(parser);
|
13277
13808
|
pm_parser_err_previous(parser, PM_ERR_PATTERN_HASH_KEY);
|
13278
13809
|
|
13279
|
-
|
13280
|
-
pm_token_t operator = not_provided(parser);
|
13281
|
-
|
13282
|
-
first_assoc = (pm_node_t *) pm_assoc_node_create(parser, (pm_node_t *) key, &operator, NULL);
|
13810
|
+
first_node = (pm_node_t *) pm_missing_node_create(parser, parser->previous.start, parser->previous.end);
|
13283
13811
|
break;
|
13284
13812
|
}
|
13285
13813
|
}
|
13286
13814
|
|
13287
|
-
node = parse_pattern_hash(parser,
|
13815
|
+
node = parse_pattern_hash(parser, first_node);
|
13288
13816
|
|
13289
13817
|
accept1(parser, PM_TOKEN_NEWLINE);
|
13290
13818
|
expect1(parser, PM_TOKEN_BRACE_RIGHT, PM_ERR_PATTERN_TERM_BRACE);
|
@@ -13350,7 +13878,16 @@ parse_pattern_primitive(pm_parser_t *parser, pm_diagnostic_id_t diag_id) {
|
|
13350
13878
|
switch (parser->current.type) {
|
13351
13879
|
case PM_TOKEN_IDENTIFIER: {
|
13352
13880
|
parser_lex(parser);
|
13353
|
-
pm_node_t *variable = (pm_node_t *)
|
13881
|
+
pm_node_t *variable = (pm_node_t *) parse_variable(parser);
|
13882
|
+
if (variable == NULL) {
|
13883
|
+
if (parser->version != PM_OPTIONS_VERSION_CRUBY_3_3_0 && pm_token_is_it(parser->previous.start, parser->previous.end)) {
|
13884
|
+
pm_constant_id_t name_id = pm_parser_constant_id_constant(parser, "0it", 3);
|
13885
|
+
variable = (pm_node_t *) pm_local_variable_read_node_create_constant_id(parser, &parser->previous, name_id, 0);
|
13886
|
+
} else {
|
13887
|
+
PM_PARSER_ERR_TOKEN_FORMAT_CONTENT(parser, parser->previous, PM_ERR_NO_LOCAL_VARIABLE);
|
13888
|
+
variable = (pm_node_t *) pm_local_variable_read_node_create(parser, &parser->previous, 0);
|
13889
|
+
}
|
13890
|
+
}
|
13354
13891
|
|
13355
13892
|
return (pm_node_t *) pm_pinned_variable_node_create(parser, &operator, variable);
|
13356
13893
|
}
|
@@ -13519,9 +14056,7 @@ parse_pattern(pm_parser_t *parser, bool top_pattern, pm_diagnostic_id_t diag_id)
|
|
13519
14056
|
case PM_TOKEN_LABEL: {
|
13520
14057
|
parser_lex(parser);
|
13521
14058
|
pm_node_t *key = (pm_node_t *) pm_symbol_node_label_create(parser, &parser->previous);
|
13522
|
-
|
13523
|
-
|
13524
|
-
return (pm_node_t *) parse_pattern_hash(parser, (pm_node_t *) pm_assoc_node_create(parser, key, &operator, NULL));
|
14059
|
+
return (pm_node_t *) parse_pattern_hash(parser, key);
|
13525
14060
|
}
|
13526
14061
|
case PM_TOKEN_USTAR_STAR: {
|
13527
14062
|
node = parse_pattern_keyword_rest(parser);
|
@@ -13544,8 +14079,7 @@ parse_pattern(pm_parser_t *parser, bool top_pattern, pm_diagnostic_id_t diag_id)
|
|
13544
14079
|
// If we got a dynamic label symbol, then we need to treat it like the
|
13545
14080
|
// beginning of a hash pattern.
|
13546
14081
|
if (pm_symbol_node_label_p(node)) {
|
13547
|
-
|
13548
|
-
return (pm_node_t *) parse_pattern_hash(parser, (pm_node_t *) pm_assoc_node_create(parser, node, &operator, NULL));
|
14082
|
+
return (pm_node_t *) parse_pattern_hash(parser, node);
|
13549
14083
|
}
|
13550
14084
|
|
13551
14085
|
if (top_pattern && match1(parser, PM_TOKEN_COMMA)) {
|
@@ -13558,7 +14092,7 @@ parse_pattern(pm_parser_t *parser, bool top_pattern, pm_diagnostic_id_t diag_id)
|
|
13558
14092
|
// Gather up all of the patterns into the list.
|
13559
14093
|
while (accept1(parser, PM_TOKEN_COMMA)) {
|
13560
14094
|
// Break early here in case we have a trailing comma.
|
13561
|
-
if (
|
14095
|
+
if (match6(parser, PM_TOKEN_KEYWORD_THEN, PM_TOKEN_BRACE_RIGHT, PM_TOKEN_BRACKET_RIGHT, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON, PM_TOKEN_EOF)) {
|
13562
14096
|
node = (pm_node_t *) pm_implicit_rest_node_create(parser, &parser->previous);
|
13563
14097
|
pm_node_list_append(&nodes, node);
|
13564
14098
|
break;
|
@@ -13644,7 +14178,7 @@ parse_strings(pm_parser_t *parser, pm_node_t *current) {
|
|
13644
14178
|
assert(parser->current.type == PM_TOKEN_STRING_BEGIN);
|
13645
14179
|
|
13646
14180
|
bool concating = false;
|
13647
|
-
bool state_is_arg_labeled =
|
14181
|
+
bool state_is_arg_labeled = lex_state_arg_labeled_p(parser);
|
13648
14182
|
|
13649
14183
|
while (match1(parser, PM_TOKEN_STRING_BEGIN)) {
|
13650
14184
|
pm_node_t *node = NULL;
|
@@ -13659,7 +14193,7 @@ parse_strings(pm_parser_t *parser, pm_node_t *current) {
|
|
13659
14193
|
parser_lex(parser);
|
13660
14194
|
|
13661
14195
|
if (match2(parser, PM_TOKEN_STRING_END, PM_TOKEN_EOF)) {
|
13662
|
-
expect1(parser, PM_TOKEN_STRING_END,
|
14196
|
+
expect1(parser, PM_TOKEN_STRING_END, PM_ERR_STRING_LITERAL_EOF);
|
13663
14197
|
// If we get here, then we have an end immediately after a
|
13664
14198
|
// start. In that case we'll create an empty content token and
|
13665
14199
|
// return an uninterpolated string.
|
@@ -13672,7 +14206,6 @@ parse_strings(pm_parser_t *parser, pm_node_t *current) {
|
|
13672
14206
|
// If we get here, then we have an end of a label immediately
|
13673
14207
|
// after a start. In that case we'll create an empty symbol
|
13674
14208
|
// node.
|
13675
|
-
pm_token_t opening = not_provided(parser);
|
13676
14209
|
pm_token_t content = parse_strings_empty_content(parser->previous.start);
|
13677
14210
|
pm_symbol_node_t *symbol = pm_symbol_node_create(parser, &opening, &content, &parser->previous);
|
13678
14211
|
|
@@ -13716,15 +14249,19 @@ parse_strings(pm_parser_t *parser, pm_node_t *current) {
|
|
13716
14249
|
parser_lex(parser);
|
13717
14250
|
} while (match1(parser, PM_TOKEN_STRING_CONTENT));
|
13718
14251
|
|
13719
|
-
expect1(parser, PM_TOKEN_STRING_END,
|
14252
|
+
expect1(parser, PM_TOKEN_STRING_END, PM_ERR_STRING_LITERAL_EOF);
|
13720
14253
|
node = (pm_node_t *) pm_interpolated_string_node_create(parser, &opening, &parts, &parser->previous);
|
13721
14254
|
} else if (accept1(parser, PM_TOKEN_LABEL_END) && !state_is_arg_labeled) {
|
13722
|
-
node = (pm_node_t *) pm_symbol_node_create_unescaped(parser, &opening, &content, &parser->previous, &unescaped);
|
14255
|
+
node = (pm_node_t *) pm_symbol_node_create_unescaped(parser, &opening, &content, &parser->previous, &unescaped, parse_symbol_encoding(parser, &unescaped));
|
13723
14256
|
} else if (match1(parser, PM_TOKEN_EOF)) {
|
13724
|
-
pm_parser_err_token(parser, &opening,
|
14257
|
+
pm_parser_err_token(parser, &opening, PM_ERR_STRING_LITERAL_EOF);
|
13725
14258
|
node = (pm_node_t *) pm_string_node_create_unescaped(parser, &opening, &content, &parser->current, &unescaped);
|
14259
|
+
} else if (accept1(parser, PM_TOKEN_STRING_END)) {
|
14260
|
+
node = (pm_node_t *) pm_string_node_create_unescaped(parser, &opening, &content, &parser->previous, &unescaped);
|
13726
14261
|
} else {
|
13727
|
-
|
14262
|
+
PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->previous, PM_ERR_STRING_LITERAL_TERM, pm_token_type_human(parser->previous.type));
|
14263
|
+
parser->previous.start = parser->previous.end;
|
14264
|
+
parser->previous.type = PM_TOKEN_MISSING;
|
13728
14265
|
node = (pm_node_t *) pm_string_node_create_unescaped(parser, &opening, &content, &parser->previous, &unescaped);
|
13729
14266
|
}
|
13730
14267
|
} else if (match1(parser, PM_TOKEN_STRING_CONTENT)) {
|
@@ -13739,9 +14276,9 @@ parse_strings(pm_parser_t *parser, pm_node_t *current) {
|
|
13739
14276
|
if (match2(parser, PM_TOKEN_STRING_END, PM_TOKEN_EOF)) {
|
13740
14277
|
node = (pm_node_t *) pm_string_node_create_unescaped(parser, &opening, &content, &parser->current, &unescaped);
|
13741
14278
|
pm_node_flag_set(node, parse_unescaped_encoding(parser));
|
13742
|
-
expect1(parser, PM_TOKEN_STRING_END,
|
14279
|
+
expect1(parser, PM_TOKEN_STRING_END, PM_ERR_STRING_LITERAL_EOF);
|
13743
14280
|
} else if (accept1(parser, PM_TOKEN_LABEL_END)) {
|
13744
|
-
node = (pm_node_t *) pm_symbol_node_create_unescaped(parser, &opening, &content, &parser->previous, &unescaped);
|
14281
|
+
node = (pm_node_t *) pm_symbol_node_create_unescaped(parser, &opening, &content, &parser->previous, &unescaped, parse_symbol_encoding(parser, &unescaped));
|
13745
14282
|
} else {
|
13746
14283
|
// If we get here, then we have interpolation so we'll need
|
13747
14284
|
// to create a string or symbol node with interpolation.
|
@@ -13830,11 +14367,34 @@ parse_strings(pm_parser_t *parser, pm_node_t *current) {
|
|
13830
14367
|
return current;
|
13831
14368
|
}
|
13832
14369
|
|
14370
|
+
/**
|
14371
|
+
* Append an error to the error list on the parser using the given diagnostic
|
14372
|
+
* ID. This function is a specialization that handles formatting the specific
|
14373
|
+
* kind of error that is being appended.
|
14374
|
+
*/
|
14375
|
+
static void
|
14376
|
+
pm_parser_err_prefix(pm_parser_t *parser, pm_diagnostic_id_t diag_id) {
|
14377
|
+
switch (diag_id) {
|
14378
|
+
case PM_ERR_HASH_KEY: {
|
14379
|
+
PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->previous, diag_id, pm_token_type_human(parser->previous.type));
|
14380
|
+
break;
|
14381
|
+
}
|
14382
|
+
case PM_ERR_UNARY_RECEIVER: {
|
14383
|
+
const char *human = (parser->current.type == PM_TOKEN_EOF ? "end-of-input" : pm_token_type_human(parser->current.type));
|
14384
|
+
PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->previous, diag_id, human, parser->previous.start[0]);
|
14385
|
+
break;
|
14386
|
+
}
|
14387
|
+
default:
|
14388
|
+
pm_parser_err_previous(parser, diag_id);
|
14389
|
+
break;
|
14390
|
+
}
|
14391
|
+
}
|
14392
|
+
|
13833
14393
|
/**
|
13834
14394
|
* Parse an expression that begins with the previous node that we just lexed.
|
13835
14395
|
*/
|
13836
14396
|
static inline pm_node_t *
|
13837
|
-
parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, bool accepts_command_call) {
|
14397
|
+
parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, bool accepts_command_call, pm_diagnostic_id_t diag_id) {
|
13838
14398
|
switch (parser->current.type) {
|
13839
14399
|
case PM_TOKEN_BRACKET_LEFT_ARRAY: {
|
13840
14400
|
parser_lex(parser);
|
@@ -13866,9 +14426,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
|
|
13866
14426
|
pm_node_t *expression = NULL;
|
13867
14427
|
|
13868
14428
|
if (match3(parser, PM_TOKEN_BRACKET_RIGHT, PM_TOKEN_COMMA, PM_TOKEN_EOF)) {
|
13869
|
-
|
13870
|
-
pm_parser_err_token(parser, &operator, PM_ERR_ARGUMENT_NO_FORWARDING_STAR);
|
13871
|
-
}
|
14429
|
+
pm_parser_scope_forwarding_positionals_check(parser, &operator);
|
13872
14430
|
} else {
|
13873
14431
|
expression = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, false, PM_ERR_ARRAY_EXPRESSION_AFTER_STAR);
|
13874
14432
|
}
|
@@ -14016,7 +14574,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
|
|
14016
14574
|
// If we didn't find a terminator and we didn't find a right
|
14017
14575
|
// parenthesis, then this is a syntax error.
|
14018
14576
|
if (!terminator_found) {
|
14019
|
-
|
14577
|
+
PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_type_human(parser->current.type));
|
14020
14578
|
}
|
14021
14579
|
|
14022
14580
|
// Parse each statement within the parentheses.
|
@@ -14045,7 +14603,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
|
|
14045
14603
|
} else if (match1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) {
|
14046
14604
|
break;
|
14047
14605
|
} else {
|
14048
|
-
|
14606
|
+
PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_type_human(parser->current.type));
|
14049
14607
|
}
|
14050
14608
|
}
|
14051
14609
|
|
@@ -14113,7 +14671,8 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
|
|
14113
14671
|
if (
|
14114
14672
|
match1(parser, PM_TOKEN_PARENTHESIS_LEFT) ||
|
14115
14673
|
(accepts_command_call && (token_begins_expression_p(parser->current.type) || match3(parser, PM_TOKEN_UAMPERSAND, PM_TOKEN_USTAR, PM_TOKEN_USTAR_STAR))) ||
|
14116
|
-
(pm_accepts_block_stack_p(parser) &&
|
14674
|
+
(pm_accepts_block_stack_p(parser) && match1(parser, PM_TOKEN_KEYWORD_DO)) ||
|
14675
|
+
match1(parser, PM_TOKEN_BRACE_LEFT)
|
14117
14676
|
) {
|
14118
14677
|
pm_arguments_t arguments = { 0 };
|
14119
14678
|
parse_arguments_list(parser, &arguments, true, accepts_command_call);
|
@@ -14237,7 +14796,8 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
|
|
14237
14796
|
// a block, so we need to check for that here.
|
14238
14797
|
if (
|
14239
14798
|
(accepts_command_call && (token_begins_expression_p(parser->current.type) || match3(parser, PM_TOKEN_UAMPERSAND, PM_TOKEN_USTAR, PM_TOKEN_USTAR_STAR))) ||
|
14240
|
-
(pm_accepts_block_stack_p(parser) &&
|
14799
|
+
(pm_accepts_block_stack_p(parser) && match1(parser, PM_TOKEN_KEYWORD_DO)) ||
|
14800
|
+
match1(parser, PM_TOKEN_BRACE_LEFT)
|
14241
14801
|
) {
|
14242
14802
|
pm_arguments_t arguments = { 0 };
|
14243
14803
|
parse_arguments_list(parser, &arguments, true, accepts_command_call);
|
@@ -14250,6 +14810,31 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
|
|
14250
14810
|
|
14251
14811
|
if ((binding_power == PM_BINDING_POWER_STATEMENT) && match1(parser, PM_TOKEN_COMMA)) {
|
14252
14812
|
node = parse_targets_validate(parser, node, PM_BINDING_POWER_INDEX);
|
14813
|
+
} else {
|
14814
|
+
// Check if `it` is not going to be assigned.
|
14815
|
+
switch (parser->current.type) {
|
14816
|
+
case PM_TOKEN_AMPERSAND_AMPERSAND_EQUAL:
|
14817
|
+
case PM_TOKEN_AMPERSAND_EQUAL:
|
14818
|
+
case PM_TOKEN_CARET_EQUAL:
|
14819
|
+
case PM_TOKEN_EQUAL:
|
14820
|
+
case PM_TOKEN_GREATER_GREATER_EQUAL:
|
14821
|
+
case PM_TOKEN_LESS_LESS_EQUAL:
|
14822
|
+
case PM_TOKEN_MINUS_EQUAL:
|
14823
|
+
case PM_TOKEN_PARENTHESIS_RIGHT:
|
14824
|
+
case PM_TOKEN_PERCENT_EQUAL:
|
14825
|
+
case PM_TOKEN_PIPE_EQUAL:
|
14826
|
+
case PM_TOKEN_PIPE_PIPE_EQUAL:
|
14827
|
+
case PM_TOKEN_PLUS_EQUAL:
|
14828
|
+
case PM_TOKEN_SLASH_EQUAL:
|
14829
|
+
case PM_TOKEN_STAR_EQUAL:
|
14830
|
+
case PM_TOKEN_STAR_STAR_EQUAL:
|
14831
|
+
break;
|
14832
|
+
default:
|
14833
|
+
// Once we know it's neither a method call nor an
|
14834
|
+
// assignment, we can finally create `it` default
|
14835
|
+
// parameter.
|
14836
|
+
node = pm_node_check_it(parser, node);
|
14837
|
+
}
|
14253
14838
|
}
|
14254
14839
|
|
14255
14840
|
return node;
|
@@ -14286,6 +14871,9 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
|
|
14286
14871
|
// If we get here, then we tried to find something in the
|
14287
14872
|
// heredoc but couldn't actually parse anything, so we'll just
|
14288
14873
|
// return a missing node.
|
14874
|
+
//
|
14875
|
+
// parse_string_part handles its own errors, so there is no need
|
14876
|
+
// for us to add one here.
|
14289
14877
|
node = (pm_node_t *) pm_missing_node_create(parser, parser->previous.start, parser->previous.end);
|
14290
14878
|
} else if (PM_NODE_TYPE_P(part, PM_STRING_NODE) && match2(parser, PM_TOKEN_HEREDOC_END, PM_TOKEN_EOF)) {
|
14291
14879
|
// If we get here, then the part that we parsed was plain string
|
@@ -14549,11 +15137,11 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
|
|
14549
15137
|
// for guard clauses in the form of `if` or `unless` statements.
|
14550
15138
|
if (accept1(parser, PM_TOKEN_KEYWORD_IF_MODIFIER)) {
|
14551
15139
|
pm_token_t keyword = parser->previous;
|
14552
|
-
pm_node_t *predicate = parse_value_expression(parser,
|
15140
|
+
pm_node_t *predicate = parse_value_expression(parser, PM_BINDING_POWER_COMPOSITION, true, PM_ERR_CONDITIONAL_IF_PREDICATE);
|
14553
15141
|
pattern = (pm_node_t *) pm_if_node_modifier_create(parser, pattern, &keyword, predicate);
|
14554
15142
|
} else if (accept1(parser, PM_TOKEN_KEYWORD_UNLESS_MODIFIER)) {
|
14555
15143
|
pm_token_t keyword = parser->previous;
|
14556
|
-
pm_node_t *predicate = parse_value_expression(parser,
|
15144
|
+
pm_node_t *predicate = parse_value_expression(parser, PM_BINDING_POWER_COMPOSITION, true, PM_ERR_CONDITIONAL_UNLESS_PREDICATE);
|
14557
15145
|
pattern = (pm_node_t *) pm_unless_node_modifier_create(parser, pattern, &keyword, predicate);
|
14558
15146
|
}
|
14559
15147
|
|
@@ -14742,8 +15330,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
|
|
14742
15330
|
pm_token_t operator = parser->previous;
|
14743
15331
|
pm_node_t *expression = parse_value_expression(parser, PM_BINDING_POWER_NOT, true, PM_ERR_EXPECT_EXPRESSION_AFTER_LESS_LESS);
|
14744
15332
|
|
14745
|
-
pm_constant_id_t
|
14746
|
-
parser->current_param_name = 0;
|
15333
|
+
pm_constant_id_t saved_param_name = pm_parser_current_param_name_unset(parser);
|
14747
15334
|
pm_parser_scope_push(parser, true);
|
14748
15335
|
accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
|
14749
15336
|
|
@@ -14756,15 +15343,16 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
|
|
14756
15343
|
|
14757
15344
|
if (match2(parser, PM_TOKEN_KEYWORD_RESCUE, PM_TOKEN_KEYWORD_ENSURE)) {
|
14758
15345
|
assert(statements == NULL || PM_NODE_TYPE_P(statements, PM_STATEMENTS_NODE));
|
14759
|
-
statements = (pm_node_t *) parse_rescues_as_begin(parser, (pm_statements_node_t *) statements, false);
|
15346
|
+
statements = (pm_node_t *) parse_rescues_as_begin(parser, class_keyword.start, (pm_statements_node_t *) statements, false);
|
14760
15347
|
}
|
14761
15348
|
|
14762
15349
|
expect1(parser, PM_TOKEN_KEYWORD_END, PM_ERR_CLASS_TERM);
|
14763
|
-
|
14764
15350
|
pm_constant_id_list_t locals = parser->current_scope->locals;
|
15351
|
+
|
14765
15352
|
pm_parser_scope_pop(parser);
|
14766
|
-
parser->current_param_name = old_param_name;
|
14767
15353
|
pm_do_loop_stack_pop(parser);
|
15354
|
+
pm_parser_current_param_name_restore(parser, saved_param_name);
|
15355
|
+
|
14768
15356
|
return (pm_node_t *) pm_singleton_class_node_create(parser, &locals, &class_keyword, &operator, expression, statements, &parser->previous);
|
14769
15357
|
}
|
14770
15358
|
|
@@ -14790,9 +15378,9 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
|
|
14790
15378
|
superclass = NULL;
|
14791
15379
|
}
|
14792
15380
|
|
14793
|
-
pm_constant_id_t
|
14794
|
-
parser->current_param_name = 0;
|
15381
|
+
pm_constant_id_t saved_param_name = pm_parser_current_param_name_unset(parser);
|
14795
15382
|
pm_parser_scope_push(parser, true);
|
15383
|
+
|
14796
15384
|
if (inheritance_operator.type != PM_TOKEN_NOT_PROVIDED) {
|
14797
15385
|
expect2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON, PM_ERR_CLASS_UNEXPECTED_END);
|
14798
15386
|
} else {
|
@@ -14808,7 +15396,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
|
|
14808
15396
|
|
14809
15397
|
if (match2(parser, PM_TOKEN_KEYWORD_RESCUE, PM_TOKEN_KEYWORD_ENSURE)) {
|
14810
15398
|
assert(statements == NULL || PM_NODE_TYPE_P(statements, PM_STATEMENTS_NODE));
|
14811
|
-
statements = (pm_node_t *) parse_rescues_as_begin(parser, (pm_statements_node_t *) statements, false);
|
15399
|
+
statements = (pm_node_t *) parse_rescues_as_begin(parser, class_keyword.start, (pm_statements_node_t *) statements, false);
|
14812
15400
|
}
|
14813
15401
|
|
14814
15402
|
expect1(parser, PM_TOKEN_KEYWORD_END, PM_ERR_CLASS_TERM);
|
@@ -14818,9 +15406,10 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
|
|
14818
15406
|
}
|
14819
15407
|
|
14820
15408
|
pm_constant_id_list_t locals = parser->current_scope->locals;
|
15409
|
+
|
14821
15410
|
pm_parser_scope_pop(parser);
|
14822
|
-
parser->current_param_name = old_param_name;
|
14823
15411
|
pm_do_loop_stack_pop(parser);
|
15412
|
+
pm_parser_current_param_name_restore(parser, saved_param_name);
|
14824
15413
|
|
14825
15414
|
if (!PM_NODE_TYPE_P(constant_path, PM_CONSTANT_PATH_NODE) && !(PM_NODE_TYPE_P(constant_path, PM_CONSTANT_READ_NODE))) {
|
14826
15415
|
pm_parser_err_node(parser, constant_path, PM_ERR_CLASS_NAME);
|
@@ -14835,18 +15424,21 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
|
|
14835
15424
|
pm_token_t operator = not_provided(parser);
|
14836
15425
|
pm_token_t name = (pm_token_t) { .type = PM_TOKEN_MISSING, .start = def_keyword.end, .end = def_keyword.end };
|
14837
15426
|
|
14838
|
-
// This context is necessary for lexing `...` in a bare params
|
14839
|
-
// It must be pushed before lexing the first param, so it
|
15427
|
+
// This context is necessary for lexing `...` in a bare params
|
15428
|
+
// correctly. It must be pushed before lexing the first param, so it
|
15429
|
+
// is here.
|
14840
15430
|
context_push(parser, PM_CONTEXT_DEF_PARAMS);
|
15431
|
+
pm_constant_id_t saved_param_name;
|
15432
|
+
|
14841
15433
|
parser_lex(parser);
|
14842
|
-
pm_constant_id_t old_param_name = parser->current_param_name;
|
14843
15434
|
|
14844
15435
|
switch (parser->current.type) {
|
14845
15436
|
case PM_CASE_OPERATOR:
|
15437
|
+
saved_param_name = pm_parser_current_param_name_unset(parser);
|
14846
15438
|
pm_parser_scope_push(parser, true);
|
14847
|
-
parser->current_param_name = 0;
|
14848
15439
|
lex_state_set(parser, PM_LEX_STATE_ENDFN);
|
14849
15440
|
parser_lex(parser);
|
15441
|
+
|
14850
15442
|
name = parser->previous;
|
14851
15443
|
break;
|
14852
15444
|
case PM_TOKEN_IDENTIFIER: {
|
@@ -14854,18 +15446,20 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
|
|
14854
15446
|
|
14855
15447
|
if (match2(parser, PM_TOKEN_DOT, PM_TOKEN_COLON_COLON)) {
|
14856
15448
|
receiver = parse_variable_call(parser);
|
15449
|
+
receiver = pm_node_check_it(parser, receiver);
|
14857
15450
|
|
15451
|
+
saved_param_name = pm_parser_current_param_name_unset(parser);
|
14858
15452
|
pm_parser_scope_push(parser, true);
|
14859
|
-
parser->current_param_name = 0;
|
14860
15453
|
lex_state_set(parser, PM_LEX_STATE_FNAME);
|
14861
15454
|
parser_lex(parser);
|
14862
15455
|
|
14863
15456
|
operator = parser->previous;
|
14864
15457
|
name = parse_method_definition_name(parser);
|
14865
15458
|
} else {
|
15459
|
+
saved_param_name = pm_parser_current_param_name_unset(parser);
|
14866
15460
|
pm_refute_numbered_parameter(parser, parser->previous.start, parser->previous.end);
|
14867
15461
|
pm_parser_scope_push(parser, true);
|
14868
|
-
|
15462
|
+
|
14869
15463
|
name = parser->previous;
|
14870
15464
|
}
|
14871
15465
|
|
@@ -14882,9 +15476,10 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
|
|
14882
15476
|
case PM_TOKEN_KEYWORD___FILE__:
|
14883
15477
|
case PM_TOKEN_KEYWORD___LINE__:
|
14884
15478
|
case PM_TOKEN_KEYWORD___ENCODING__: {
|
15479
|
+
saved_param_name = pm_parser_current_param_name_unset(parser);
|
14885
15480
|
pm_parser_scope_push(parser, true);
|
14886
|
-
parser->current_param_name = 0;
|
14887
15481
|
parser_lex(parser);
|
15482
|
+
|
14888
15483
|
pm_token_t identifier = parser->previous;
|
14889
15484
|
|
14890
15485
|
if (match2(parser, PM_TOKEN_DOT, PM_TOKEN_COLON_COLON)) {
|
@@ -14946,6 +15541,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
|
|
14946
15541
|
pm_token_t lparen = parser->previous;
|
14947
15542
|
pm_node_t *expression = parse_value_expression(parser, PM_BINDING_POWER_STATEMENT, true, PM_ERR_DEF_RECEIVER);
|
14948
15543
|
|
15544
|
+
accept1(parser, PM_TOKEN_NEWLINE);
|
14949
15545
|
expect1(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_ERR_EXPECT_RPAREN);
|
14950
15546
|
pm_token_t rparen = parser->previous;
|
14951
15547
|
|
@@ -14955,8 +15551,8 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
|
|
14955
15551
|
operator = parser->previous;
|
14956
15552
|
receiver = (pm_node_t *) pm_parentheses_node_create(parser, &lparen, expression, &rparen);
|
14957
15553
|
|
15554
|
+
saved_param_name = pm_parser_current_param_name_unset(parser);
|
14958
15555
|
pm_parser_scope_push(parser, true);
|
14959
|
-
parser->current_param_name = 0;
|
14960
15556
|
|
14961
15557
|
// To push `PM_CONTEXT_DEF_PARAMS` again is for the same reason as described the above.
|
14962
15558
|
context_push(parser, PM_CONTEXT_DEF_PARAMS);
|
@@ -14964,8 +15560,9 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
|
|
14964
15560
|
break;
|
14965
15561
|
}
|
14966
15562
|
default:
|
15563
|
+
saved_param_name = pm_parser_current_param_name_unset(parser);
|
14967
15564
|
pm_parser_scope_push(parser, true);
|
14968
|
-
|
15565
|
+
|
14969
15566
|
name = parse_method_definition_name(parser);
|
14970
15567
|
break;
|
14971
15568
|
}
|
@@ -15018,8 +15615,6 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
|
|
15018
15615
|
}
|
15019
15616
|
}
|
15020
15617
|
|
15021
|
-
uint32_t locals_body_index = (uint32_t) parser->current_scope->locals.size;
|
15022
|
-
|
15023
15618
|
context_pop(parser);
|
15024
15619
|
pm_node_t *statements = NULL;
|
15025
15620
|
pm_token_t equal;
|
@@ -15070,7 +15665,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
|
|
15070
15665
|
|
15071
15666
|
if (match2(parser, PM_TOKEN_KEYWORD_RESCUE, PM_TOKEN_KEYWORD_ENSURE)) {
|
15072
15667
|
assert(statements == NULL || PM_NODE_TYPE_P(statements, PM_STATEMENTS_NODE));
|
15073
|
-
statements = (pm_node_t *) parse_rescues_as_begin(parser, (pm_statements_node_t *) statements, true);
|
15668
|
+
statements = (pm_node_t *) parse_rescues_as_begin(parser, def_keyword.start, (pm_statements_node_t *) statements, true);
|
15074
15669
|
}
|
15075
15670
|
|
15076
15671
|
pm_accepts_block_stack_pop(parser);
|
@@ -15080,17 +15675,25 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
|
|
15080
15675
|
}
|
15081
15676
|
|
15082
15677
|
pm_constant_id_list_t locals = parser->current_scope->locals;
|
15083
|
-
|
15678
|
+
|
15084
15679
|
pm_parser_scope_pop(parser);
|
15680
|
+
pm_parser_current_param_name_restore(parser, saved_param_name);
|
15681
|
+
|
15682
|
+
/**
|
15683
|
+
* If the final character is @. As is the case when defining
|
15684
|
+
* methods to override the unary operators, we should ignore
|
15685
|
+
* the @ in the same way we do for symbols.
|
15686
|
+
*/
|
15687
|
+
pm_constant_id_t name_id = pm_parser_constant_id_location(parser, name.start, parse_operator_symbol_name(&name));
|
15085
15688
|
|
15086
15689
|
return (pm_node_t *) pm_def_node_create(
|
15087
15690
|
parser,
|
15691
|
+
name_id,
|
15088
15692
|
&name,
|
15089
15693
|
receiver,
|
15090
15694
|
params,
|
15091
15695
|
statements,
|
15092
15696
|
&locals,
|
15093
|
-
locals_body_index,
|
15094
15697
|
&def_keyword,
|
15095
15698
|
&operator,
|
15096
15699
|
&lparen,
|
@@ -15309,9 +15912,9 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
|
|
15309
15912
|
pm_parser_err_token(parser, &name, PM_ERR_MODULE_NAME);
|
15310
15913
|
}
|
15311
15914
|
|
15312
|
-
pm_constant_id_t
|
15313
|
-
parser->current_param_name = 0;
|
15915
|
+
pm_constant_id_t saved_param_name = pm_parser_current_param_name_unset(parser);
|
15314
15916
|
pm_parser_scope_push(parser, true);
|
15917
|
+
|
15315
15918
|
accept2(parser, PM_TOKEN_SEMICOLON, PM_TOKEN_NEWLINE);
|
15316
15919
|
pm_node_t *statements = NULL;
|
15317
15920
|
|
@@ -15323,12 +15926,12 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
|
|
15323
15926
|
|
15324
15927
|
if (match2(parser, PM_TOKEN_KEYWORD_RESCUE, PM_TOKEN_KEYWORD_ENSURE)) {
|
15325
15928
|
assert(statements == NULL || PM_NODE_TYPE_P(statements, PM_STATEMENTS_NODE));
|
15326
|
-
statements = (pm_node_t *) parse_rescues_as_begin(parser, (pm_statements_node_t *) statements, false);
|
15929
|
+
statements = (pm_node_t *) parse_rescues_as_begin(parser, module_keyword.start, (pm_statements_node_t *) statements, false);
|
15327
15930
|
}
|
15328
15931
|
|
15329
15932
|
pm_constant_id_list_t locals = parser->current_scope->locals;
|
15330
15933
|
pm_parser_scope_pop(parser);
|
15331
|
-
parser
|
15934
|
+
pm_parser_current_param_name_restore(parser, saved_param_name);
|
15332
15935
|
|
15333
15936
|
expect1(parser, PM_TOKEN_KEYWORD_END, PM_ERR_MODULE_TERM);
|
15334
15937
|
|
@@ -15914,6 +16517,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
|
|
15914
16517
|
// context of a multiple assignment. We enforce that here. We'll
|
15915
16518
|
// still lex past it though and create a missing node place.
|
15916
16519
|
if (binding_power != PM_BINDING_POWER_STATEMENT) {
|
16520
|
+
pm_parser_err_prefix(parser, diag_id);
|
15917
16521
|
return (pm_node_t *) pm_missing_node_create(parser, parser->previous.start, parser->previous.end);
|
15918
16522
|
}
|
15919
16523
|
|
@@ -15936,7 +16540,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
|
|
15936
16540
|
parser_lex(parser);
|
15937
16541
|
|
15938
16542
|
pm_token_t operator = parser->previous;
|
15939
|
-
pm_node_t *receiver = parse_expression(parser, pm_binding_powers[parser->previous.type].right, binding_power < PM_BINDING_POWER_MATCH,
|
16543
|
+
pm_node_t *receiver = parse_expression(parser, pm_binding_powers[parser->previous.type].right, binding_power < PM_BINDING_POWER_MATCH, PM_ERR_UNARY_RECEIVER);
|
15940
16544
|
pm_call_node_t *node = pm_call_node_unary_create(parser, &operator, receiver, "!");
|
15941
16545
|
|
15942
16546
|
pm_conditional_predicate(receiver);
|
@@ -15946,7 +16550,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
|
|
15946
16550
|
parser_lex(parser);
|
15947
16551
|
|
15948
16552
|
pm_token_t operator = parser->previous;
|
15949
|
-
pm_node_t *receiver = parse_expression(parser, pm_binding_powers[parser->previous.type].right, false,
|
16553
|
+
pm_node_t *receiver = parse_expression(parser, pm_binding_powers[parser->previous.type].right, false, PM_ERR_UNARY_RECEIVER);
|
15950
16554
|
pm_call_node_t *node = pm_call_node_unary_create(parser, &operator, receiver, "~");
|
15951
16555
|
|
15952
16556
|
return (pm_node_t *) node;
|
@@ -15955,7 +16559,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
|
|
15955
16559
|
parser_lex(parser);
|
15956
16560
|
|
15957
16561
|
pm_token_t operator = parser->previous;
|
15958
|
-
pm_node_t *receiver = parse_expression(parser, pm_binding_powers[parser->previous.type].right, false,
|
16562
|
+
pm_node_t *receiver = parse_expression(parser, pm_binding_powers[parser->previous.type].right, false, PM_ERR_UNARY_RECEIVER);
|
15959
16563
|
pm_call_node_t *node = pm_call_node_unary_create(parser, &operator, receiver, "-@");
|
15960
16564
|
|
15961
16565
|
return (pm_node_t *) node;
|
@@ -15964,7 +16568,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
|
|
15964
16568
|
parser_lex(parser);
|
15965
16569
|
|
15966
16570
|
pm_token_t operator = parser->previous;
|
15967
|
-
pm_node_t *node = parse_expression(parser, pm_binding_powers[parser->previous.type].right, false,
|
16571
|
+
pm_node_t *node = parse_expression(parser, pm_binding_powers[parser->previous.type].right, false, PM_ERR_UNARY_RECEIVER);
|
15968
16572
|
|
15969
16573
|
if (accept1(parser, PM_TOKEN_STAR_STAR)) {
|
15970
16574
|
pm_token_t exponent_operator = parser->previous;
|
@@ -15995,7 +16599,9 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
|
|
15995
16599
|
parser_lex(parser);
|
15996
16600
|
|
15997
16601
|
pm_token_t operator = parser->previous;
|
16602
|
+
pm_constant_id_t saved_param_name = pm_parser_current_param_name_unset(parser);
|
15998
16603
|
pm_parser_scope_push(parser, false);
|
16604
|
+
|
15999
16605
|
pm_block_parameters_node_t *block_parameters;
|
16000
16606
|
|
16001
16607
|
switch (parser->current.type) {
|
@@ -16030,12 +16636,6 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
|
|
16030
16636
|
}
|
16031
16637
|
}
|
16032
16638
|
|
16033
|
-
uint32_t locals_body_index = 0;
|
16034
|
-
|
16035
|
-
if (block_parameters) {
|
16036
|
-
locals_body_index = (uint32_t) parser->current_scope->locals.size;
|
16037
|
-
}
|
16038
|
-
|
16039
16639
|
pm_token_t opening;
|
16040
16640
|
pm_node_t *body = NULL;
|
16041
16641
|
parser->lambda_enclosure_nesting = previous_lambda_enclosure_nesting;
|
@@ -16059,7 +16659,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
|
|
16059
16659
|
|
16060
16660
|
if (match2(parser, PM_TOKEN_KEYWORD_RESCUE, PM_TOKEN_KEYWORD_ENSURE)) {
|
16061
16661
|
assert(body == NULL || PM_NODE_TYPE_P(body, PM_STATEMENTS_NODE));
|
16062
|
-
body = (pm_node_t *) parse_rescues_as_begin(parser, (pm_statements_node_t *) body, false);
|
16662
|
+
body = (pm_node_t *) parse_rescues_as_begin(parser, opening.start, (pm_statements_node_t *) body, false);
|
16063
16663
|
}
|
16064
16664
|
|
16065
16665
|
expect1(parser, PM_TOKEN_KEYWORD_END, PM_ERR_LAMBDA_TERM_END);
|
@@ -16070,19 +16670,21 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
|
|
16070
16670
|
|
16071
16671
|
if (parameters == NULL && (maximum > 0)) {
|
16072
16672
|
parameters = (pm_node_t *) pm_numbered_parameters_node_create(parser, &(pm_location_t) { .start = operator.start, .end = parser->previous.end }, maximum);
|
16073
|
-
locals_body_index = maximum;
|
16074
16673
|
}
|
16075
16674
|
|
16076
16675
|
pm_constant_id_list_t locals = parser->current_scope->locals;
|
16676
|
+
|
16077
16677
|
pm_parser_scope_pop(parser);
|
16078
16678
|
pm_accepts_block_stack_pop(parser);
|
16079
|
-
|
16679
|
+
pm_parser_current_param_name_restore(parser, saved_param_name);
|
16680
|
+
|
16681
|
+
return (pm_node_t *) pm_lambda_node_create(parser, &locals, &operator, &opening, &parser->previous, parameters, body);
|
16080
16682
|
}
|
16081
16683
|
case PM_TOKEN_UPLUS: {
|
16082
16684
|
parser_lex(parser);
|
16083
16685
|
|
16084
16686
|
pm_token_t operator = parser->previous;
|
16085
|
-
pm_node_t *receiver = parse_expression(parser, pm_binding_powers[parser->previous.type].right, false,
|
16687
|
+
pm_node_t *receiver = parse_expression(parser, pm_binding_powers[parser->previous.type].right, false, PM_ERR_UNARY_RECEIVER);
|
16086
16688
|
pm_call_node_t *node = pm_call_node_unary_create(parser, &operator, receiver, "+@");
|
16087
16689
|
|
16088
16690
|
return (pm_node_t *) node;
|
@@ -16095,12 +16697,34 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
|
|
16095
16697
|
|
16096
16698
|
return parse_symbol(parser, &lex_mode, PM_LEX_STATE_END);
|
16097
16699
|
}
|
16098
|
-
default:
|
16099
|
-
|
16700
|
+
default: {
|
16701
|
+
pm_context_t recoverable = context_recoverable(parser, &parser->current);
|
16702
|
+
|
16703
|
+
if (recoverable != PM_CONTEXT_NONE) {
|
16100
16704
|
parser->recovering = true;
|
16705
|
+
|
16706
|
+
// If the given error is not the generic one, then we'll add it
|
16707
|
+
// here because it will provide more context in addition to the
|
16708
|
+
// recoverable error that we will also add.
|
16709
|
+
if (diag_id != PM_ERR_CANNOT_PARSE_EXPRESSION) {
|
16710
|
+
pm_parser_err_prefix(parser, diag_id);
|
16711
|
+
}
|
16712
|
+
|
16713
|
+
// If we get here, then we are assuming this token is closing a
|
16714
|
+
// parent context, so we'll indicate that to the user so that
|
16715
|
+
// they know how we behaved.
|
16716
|
+
PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_UNEXPECTED_TOKEN_CLOSE_CONTEXT, pm_token_type_human(parser->current.type), context_human(recoverable));
|
16717
|
+
} else if (diag_id == PM_ERR_CANNOT_PARSE_EXPRESSION) {
|
16718
|
+
// We're going to make a special case here, because "cannot
|
16719
|
+
// parse expression" is pretty generic, and we know here that we
|
16720
|
+
// have an unexpected token.
|
16721
|
+
PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_UNEXPECTED_TOKEN_IGNORE, pm_token_type_human(parser->current.type));
|
16722
|
+
} else {
|
16723
|
+
pm_parser_err_prefix(parser, diag_id);
|
16101
16724
|
}
|
16102
16725
|
|
16103
16726
|
return (pm_node_t *) pm_missing_node_create(parser, parser->previous.start, parser->previous.end);
|
16727
|
+
}
|
16104
16728
|
}
|
16105
16729
|
}
|
16106
16730
|
|
@@ -16145,7 +16769,18 @@ parse_assignment_values(pm_parser_t *parser, pm_binding_power_t previous_binding
|
|
16145
16769
|
if (is_single_value && match1(parser, PM_TOKEN_KEYWORD_RESCUE_MODIFIER)) {
|
16146
16770
|
pm_token_t rescue = parser->current;
|
16147
16771
|
parser_lex(parser);
|
16148
|
-
|
16772
|
+
|
16773
|
+
bool accepts_command_call_inner = false;
|
16774
|
+
|
16775
|
+
// RHS can accept command call iff the value is a call with arguments but without paranthesis.
|
16776
|
+
if (PM_NODE_TYPE_P(value, PM_CALL_NODE)) {
|
16777
|
+
pm_call_node_t *call_node = (pm_call_node_t *)value;
|
16778
|
+
if ((call_node->arguments != NULL) && (call_node->opening_loc.start == NULL)) {
|
16779
|
+
accepts_command_call_inner = true;
|
16780
|
+
}
|
16781
|
+
}
|
16782
|
+
|
16783
|
+
pm_node_t *right = parse_expression(parser, binding_power, accepts_command_call_inner, PM_ERR_RESCUE_MODIFIER_VALUE);
|
16149
16784
|
|
16150
16785
|
return (pm_node_t *) pm_rescue_modifier_node_create(parser, value, &rescue, right);
|
16151
16786
|
}
|
@@ -16330,7 +16965,7 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
|
|
16330
16965
|
switch (PM_NODE_TYPE(node)) {
|
16331
16966
|
case PM_BACK_REFERENCE_READ_NODE:
|
16332
16967
|
case PM_NUMBERED_REFERENCE_READ_NODE:
|
16333
|
-
|
16968
|
+
PM_PARSER_ERR_NODE_FORMAT_CONTENT(parser, node, PM_ERR_WRITE_TARGET_READONLY);
|
16334
16969
|
/* fallthrough */
|
16335
16970
|
case PM_GLOBAL_VARIABLE_READ_NODE: {
|
16336
16971
|
parser_lex(parser);
|
@@ -16412,7 +17047,7 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
|
|
16412
17047
|
}
|
16413
17048
|
|
16414
17049
|
// If this node cannot be writable, then we have an error.
|
16415
|
-
if (pm_call_node_writable_p(cast)) {
|
17050
|
+
if (pm_call_node_writable_p(parser, cast)) {
|
16416
17051
|
parse_write_name(parser, &cast->name);
|
16417
17052
|
} else {
|
16418
17053
|
pm_parser_err_node(parser, node, PM_ERR_WRITE_TARGET_UNEXPECTED);
|
@@ -16441,7 +17076,7 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
|
|
16441
17076
|
switch (PM_NODE_TYPE(node)) {
|
16442
17077
|
case PM_BACK_REFERENCE_READ_NODE:
|
16443
17078
|
case PM_NUMBERED_REFERENCE_READ_NODE:
|
16444
|
-
|
17079
|
+
PM_PARSER_ERR_NODE_FORMAT_CONTENT(parser, node, PM_ERR_WRITE_TARGET_READONLY);
|
16445
17080
|
/* fallthrough */
|
16446
17081
|
case PM_GLOBAL_VARIABLE_READ_NODE: {
|
16447
17082
|
parser_lex(parser);
|
@@ -16523,7 +17158,7 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
|
|
16523
17158
|
}
|
16524
17159
|
|
16525
17160
|
// If this node cannot be writable, then we have an error.
|
16526
|
-
if (pm_call_node_writable_p(cast)) {
|
17161
|
+
if (pm_call_node_writable_p(parser, cast)) {
|
16527
17162
|
parse_write_name(parser, &cast->name);
|
16528
17163
|
} else {
|
16529
17164
|
pm_parser_err_node(parser, node, PM_ERR_WRITE_TARGET_UNEXPECTED);
|
@@ -16562,7 +17197,7 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
|
|
16562
17197
|
switch (PM_NODE_TYPE(node)) {
|
16563
17198
|
case PM_BACK_REFERENCE_READ_NODE:
|
16564
17199
|
case PM_NUMBERED_REFERENCE_READ_NODE:
|
16565
|
-
|
17200
|
+
PM_PARSER_ERR_NODE_FORMAT_CONTENT(parser, node, PM_ERR_WRITE_TARGET_READONLY);
|
16566
17201
|
/* fallthrough */
|
16567
17202
|
case PM_GLOBAL_VARIABLE_READ_NODE: {
|
16568
17203
|
parser_lex(parser);
|
@@ -16644,7 +17279,7 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
|
|
16644
17279
|
}
|
16645
17280
|
|
16646
17281
|
// If this node cannot be writable, then we have an error.
|
16647
|
-
if (pm_call_node_writable_p(cast)) {
|
17282
|
+
if (pm_call_node_writable_p(parser, cast)) {
|
16648
17283
|
parse_write_name(parser, &cast->name);
|
16649
17284
|
} else {
|
16650
17285
|
pm_parser_err_node(parser, node, PM_ERR_WRITE_TARGET_UNEXPECTED);
|
@@ -17063,15 +17698,12 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
|
|
17063
17698
|
*/
|
17064
17699
|
static pm_node_t *
|
17065
17700
|
parse_expression(pm_parser_t *parser, pm_binding_power_t binding_power, bool accepts_command_call, pm_diagnostic_id_t diag_id) {
|
17066
|
-
|
17067
|
-
pm_node_t *node = parse_expression_prefix(parser, binding_power, accepts_command_call);
|
17701
|
+
pm_node_t *node = parse_expression_prefix(parser, binding_power, accepts_command_call, diag_id);
|
17068
17702
|
|
17069
17703
|
switch (PM_NODE_TYPE(node)) {
|
17070
17704
|
case PM_MISSING_NODE:
|
17071
17705
|
// If we found a syntax error, then the type of node returned by
|
17072
|
-
// parse_expression_prefix is going to be a missing node.
|
17073
|
-
// case we need to add the error message to the parser's error list.
|
17074
|
-
pm_parser_err(parser, recovery.end, recovery.end, diag_id);
|
17706
|
+
// parse_expression_prefix is going to be a missing node.
|
17075
17707
|
return node;
|
17076
17708
|
case PM_PRE_EXECUTION_NODE:
|
17077
17709
|
case PM_POST_EXECUTION_NODE:
|
@@ -17080,7 +17712,7 @@ parse_expression(pm_parser_t *parser, pm_binding_power_t binding_power, bool acc
|
|
17080
17712
|
case PM_UNDEF_NODE:
|
17081
17713
|
// These expressions are statements, and cannot be followed by
|
17082
17714
|
// operators (except modifiers).
|
17083
|
-
if (pm_binding_powers[parser->current.type].left >
|
17715
|
+
if (pm_binding_powers[parser->current.type].left > PM_BINDING_POWER_MODIFIER) {
|
17084
17716
|
return node;
|
17085
17717
|
}
|
17086
17718
|
break;
|
@@ -17175,9 +17807,14 @@ parse_expression(pm_parser_t *parser, pm_binding_power_t binding_power, bool acc
|
|
17175
17807
|
|
17176
17808
|
static pm_node_t *
|
17177
17809
|
parse_program(pm_parser_t *parser) {
|
17178
|
-
|
17179
|
-
|
17810
|
+
// If the current scope is NULL, then we want to push a new top level scope.
|
17811
|
+
// The current scope could exist in the event that we are parsing an eval
|
17812
|
+
// and the user has passed into scopes that already exist.
|
17813
|
+
if (parser->current_scope == NULL) {
|
17814
|
+
pm_parser_scope_push(parser, true);
|
17815
|
+
}
|
17180
17816
|
|
17817
|
+
parser_lex(parser);
|
17181
17818
|
pm_statements_node_t *statements = parse_statements(parser, PM_CONTEXT_MAIN);
|
17182
17819
|
if (!statements) {
|
17183
17820
|
statements = pm_statements_node_create(parser);
|
@@ -17224,6 +17861,7 @@ pm_parser_init(pm_parser_t *parser, const uint8_t *source, size_t size, const pm
|
|
17224
17861
|
.current = { .type = PM_TOKEN_EOF, .start = source, .end = source },
|
17225
17862
|
.next_start = NULL,
|
17226
17863
|
.heredoc_end = NULL,
|
17864
|
+
.data_loc = { .start = NULL, .end = NULL },
|
17227
17865
|
.comment_list = { 0 },
|
17228
17866
|
.magic_comment_list = { 0 },
|
17229
17867
|
.warning_list = { 0 },
|
@@ -17234,7 +17872,7 @@ pm_parser_init(pm_parser_t *parser, const uint8_t *source, size_t size, const pm
|
|
17234
17872
|
.encoding_changed_callback = NULL,
|
17235
17873
|
.encoding_comment_start = source,
|
17236
17874
|
.lex_callback = NULL,
|
17237
|
-
.
|
17875
|
+
.filepath = { 0 },
|
17238
17876
|
.constant_pool = { 0 },
|
17239
17877
|
.newline_list = { 0 },
|
17240
17878
|
.integer_base = 0,
|
@@ -17248,8 +17886,7 @@ pm_parser_init(pm_parser_t *parser, const uint8_t *source, size_t size, const pm
|
|
17248
17886
|
.in_keyword_arg = false,
|
17249
17887
|
.current_param_name = 0,
|
17250
17888
|
.semantic_token_seen = false,
|
17251
|
-
.frozen_string_literal = false
|
17252
|
-
.suppress_warnings = false
|
17889
|
+
.frozen_string_literal = false
|
17253
17890
|
};
|
17254
17891
|
|
17255
17892
|
// Initialize the constant pool. We're going to completely guess as to the
|
@@ -17278,7 +17915,7 @@ pm_parser_init(pm_parser_t *parser, const uint8_t *source, size_t size, const pm
|
|
17278
17915
|
// If options were provided to this parse, establish them here.
|
17279
17916
|
if (options != NULL) {
|
17280
17917
|
// filepath option
|
17281
|
-
parser->
|
17918
|
+
parser->filepath = options->filepath;
|
17282
17919
|
|
17283
17920
|
// line option
|
17284
17921
|
parser->start_line = options->line;
|
@@ -17295,10 +17932,8 @@ pm_parser_init(pm_parser_t *parser, const uint8_t *source, size_t size, const pm
|
|
17295
17932
|
parser->frozen_string_literal = true;
|
17296
17933
|
}
|
17297
17934
|
|
17298
|
-
//
|
17299
|
-
|
17300
|
-
parser->suppress_warnings = true;
|
17301
|
-
}
|
17935
|
+
// version option
|
17936
|
+
parser->version = options->version;
|
17302
17937
|
|
17303
17938
|
// scopes option
|
17304
17939
|
for (size_t scope_index = 0; scope_index < options->scopes_count; scope_index++) {
|
@@ -17382,7 +18017,7 @@ pm_magic_comment_list_free(pm_list_t *list) {
|
|
17382
18017
|
*/
|
17383
18018
|
PRISM_EXPORTED_FUNCTION void
|
17384
18019
|
pm_parser_free(pm_parser_t *parser) {
|
17385
|
-
pm_string_free(&parser->
|
18020
|
+
pm_string_free(&parser->filepath);
|
17386
18021
|
pm_diagnostic_list_free(&parser->error_list);
|
17387
18022
|
pm_diagnostic_list_free(&parser->warning_list);
|
17388
18023
|
pm_comment_list_free(&parser->comment_list);
|
@@ -17484,3 +18119,303 @@ pm_serialize_parse_comments(pm_buffer_t *buffer, const uint8_t *source, size_t s
|
|
17484
18119
|
#undef PM_LOCATION_NODE_VALUE
|
17485
18120
|
#undef PM_LOCATION_NULL_VALUE
|
17486
18121
|
#undef PM_LOCATION_TOKEN_VALUE
|
18122
|
+
|
18123
|
+
/** An error that is going to be formatted into the output. */
|
18124
|
+
typedef struct {
|
18125
|
+
/** A pointer to the diagnostic that was generated during parsing. */
|
18126
|
+
pm_diagnostic_t *error;
|
18127
|
+
|
18128
|
+
/** The start line of the diagnostic message. */
|
18129
|
+
int32_t line;
|
18130
|
+
|
18131
|
+
/** The column start of the diagnostic message. */
|
18132
|
+
uint32_t column_start;
|
18133
|
+
|
18134
|
+
/** The column end of the diagnostic message. */
|
18135
|
+
uint32_t column_end;
|
18136
|
+
} pm_error_t;
|
18137
|
+
|
18138
|
+
/** The format that will be used to format the errors into the output. */
|
18139
|
+
typedef struct {
|
18140
|
+
/** The prefix that will be used for line numbers. */
|
18141
|
+
const char *number_prefix;
|
18142
|
+
|
18143
|
+
/** The prefix that will be used for blank lines. */
|
18144
|
+
const char *blank_prefix;
|
18145
|
+
|
18146
|
+
/** The divider that will be used between sections of source code. */
|
18147
|
+
const char *divider;
|
18148
|
+
|
18149
|
+
/** The length of the blank prefix. */
|
18150
|
+
size_t blank_prefix_length;
|
18151
|
+
|
18152
|
+
/** The length of the divider. */
|
18153
|
+
size_t divider_length;
|
18154
|
+
} pm_error_format_t;
|
18155
|
+
|
18156
|
+
#define PM_COLOR_GRAY "\033[38;5;102m"
|
18157
|
+
#define PM_COLOR_RED "\033[1;31m"
|
18158
|
+
#define PM_COLOR_RESET "\033[0m"
|
18159
|
+
|
18160
|
+
static inline pm_error_t *
|
18161
|
+
pm_parser_errors_format_sort(const pm_parser_t *parser, const pm_list_t *error_list, const pm_newline_list_t *newline_list) {
|
18162
|
+
pm_error_t *errors = calloc(error_list->size, sizeof(pm_error_t));
|
18163
|
+
int32_t start_line = parser->start_line;
|
18164
|
+
|
18165
|
+
for (pm_diagnostic_t *error = (pm_diagnostic_t *) error_list->head; error != NULL; error = (pm_diagnostic_t *) error->node.next) {
|
18166
|
+
pm_line_column_t start = pm_newline_list_line_column(newline_list, error->location.start, start_line);
|
18167
|
+
pm_line_column_t end = pm_newline_list_line_column(newline_list, error->location.end, start_line);
|
18168
|
+
|
18169
|
+
// We're going to insert this error into the array in sorted order. We
|
18170
|
+
// do this by finding the first error that has a line number greater
|
18171
|
+
// than the current error and then inserting the current error before
|
18172
|
+
// that one.
|
18173
|
+
size_t index = 0;
|
18174
|
+
while (
|
18175
|
+
(index < error_list->size) &&
|
18176
|
+
(errors[index].error != NULL) &&
|
18177
|
+
(
|
18178
|
+
(errors[index].line < start.line) ||
|
18179
|
+
((errors[index].line == start.line) && (errors[index].column_start < start.column))
|
18180
|
+
)
|
18181
|
+
) index++;
|
18182
|
+
|
18183
|
+
// Now we're going to shift all of the errors after this one down one
|
18184
|
+
// index to make room for the new error.
|
18185
|
+
if (index + 1 < error_list->size) {
|
18186
|
+
memmove(&errors[index + 1], &errors[index], sizeof(pm_error_t) * (error_list->size - index - 1));
|
18187
|
+
}
|
18188
|
+
|
18189
|
+
// Finally, we'll insert the error into the array.
|
18190
|
+
uint32_t column_end;
|
18191
|
+
if (start.line == end.line) {
|
18192
|
+
column_end = end.column;
|
18193
|
+
} else {
|
18194
|
+
column_end = (uint32_t) (newline_list->offsets[start.line - start_line + 1] - newline_list->offsets[start.line - start_line] - 1);
|
18195
|
+
}
|
18196
|
+
|
18197
|
+
// Ensure we have at least one column of error.
|
18198
|
+
if (start.column == column_end) column_end++;
|
18199
|
+
|
18200
|
+
errors[index] = (pm_error_t) {
|
18201
|
+
.error = error,
|
18202
|
+
.line = start.line,
|
18203
|
+
.column_start = start.column,
|
18204
|
+
.column_end = column_end
|
18205
|
+
};
|
18206
|
+
}
|
18207
|
+
|
18208
|
+
return errors;
|
18209
|
+
}
|
18210
|
+
|
18211
|
+
static inline void
|
18212
|
+
pm_parser_errors_format_line(const pm_parser_t *parser, const pm_newline_list_t *newline_list, const char *number_prefix, int32_t line, pm_buffer_t *buffer) {
|
18213
|
+
size_t index = (size_t) (line - parser->start_line);
|
18214
|
+
|
18215
|
+
const uint8_t *start = &parser->start[newline_list->offsets[index]];
|
18216
|
+
const uint8_t *end;
|
18217
|
+
|
18218
|
+
if (index >= newline_list->size - 1) {
|
18219
|
+
end = parser->end;
|
18220
|
+
} else {
|
18221
|
+
end = &parser->start[newline_list->offsets[index + 1]];
|
18222
|
+
}
|
18223
|
+
|
18224
|
+
pm_buffer_append_format(buffer, number_prefix, line);
|
18225
|
+
pm_buffer_append_string(buffer, (const char *) start, (size_t) (end - start));
|
18226
|
+
|
18227
|
+
if (end == parser->end && end[-1] != '\n') {
|
18228
|
+
pm_buffer_append_string(buffer, "\n", 1);
|
18229
|
+
}
|
18230
|
+
}
|
18231
|
+
|
18232
|
+
/**
|
18233
|
+
* Format the errors on the parser into the given buffer.
|
18234
|
+
*/
|
18235
|
+
PRISM_EXPORTED_FUNCTION void
|
18236
|
+
pm_parser_errors_format(const pm_parser_t *parser, pm_buffer_t *buffer, bool colorize) {
|
18237
|
+
const pm_list_t *error_list = &parser->error_list;
|
18238
|
+
assert(error_list->size != 0);
|
18239
|
+
|
18240
|
+
// First, we're going to sort all of the errors by line number using an
|
18241
|
+
// insertion sort into a newly allocated array.
|
18242
|
+
const int32_t start_line = parser->start_line;
|
18243
|
+
const pm_newline_list_t *newline_list = &parser->newline_list;
|
18244
|
+
pm_error_t *errors = pm_parser_errors_format_sort(parser, error_list, newline_list);
|
18245
|
+
|
18246
|
+
// Now we're going to determine how we're going to format line numbers and
|
18247
|
+
// blank lines based on the maximum number of digits in the line numbers
|
18248
|
+
// that are going to be displayed.
|
18249
|
+
pm_error_format_t error_format;
|
18250
|
+
int32_t max_line_number = errors[error_list->size - 1].line - start_line;
|
18251
|
+
|
18252
|
+
if (max_line_number < 10) {
|
18253
|
+
if (colorize) {
|
18254
|
+
error_format = (pm_error_format_t) {
|
18255
|
+
.number_prefix = PM_COLOR_GRAY "%1" PRIi32 " | " PM_COLOR_RESET,
|
18256
|
+
.blank_prefix = PM_COLOR_GRAY " | " PM_COLOR_RESET,
|
18257
|
+
.divider = PM_COLOR_GRAY " ~~~~~" PM_COLOR_RESET "\n"
|
18258
|
+
};
|
18259
|
+
} else {
|
18260
|
+
error_format = (pm_error_format_t) {
|
18261
|
+
.number_prefix = "%1" PRIi32 " | ",
|
18262
|
+
.blank_prefix = " | ",
|
18263
|
+
.divider = " ~~~~~\n"
|
18264
|
+
};
|
18265
|
+
}
|
18266
|
+
} else if (max_line_number < 100) {
|
18267
|
+
if (colorize) {
|
18268
|
+
error_format = (pm_error_format_t) {
|
18269
|
+
.number_prefix = PM_COLOR_GRAY "%2" PRIi32 " | " PM_COLOR_RESET,
|
18270
|
+
.blank_prefix = PM_COLOR_GRAY " | " PM_COLOR_RESET,
|
18271
|
+
.divider = PM_COLOR_GRAY " ~~~~~~" PM_COLOR_RESET "\n"
|
18272
|
+
};
|
18273
|
+
} else {
|
18274
|
+
error_format = (pm_error_format_t) {
|
18275
|
+
.number_prefix = "%2" PRIi32 " | ",
|
18276
|
+
.blank_prefix = " | ",
|
18277
|
+
.divider = " ~~~~~~\n"
|
18278
|
+
};
|
18279
|
+
}
|
18280
|
+
} else if (max_line_number < 1000) {
|
18281
|
+
if (colorize) {
|
18282
|
+
error_format = (pm_error_format_t) {
|
18283
|
+
.number_prefix = PM_COLOR_GRAY "%3" PRIi32 " | " PM_COLOR_RESET,
|
18284
|
+
.blank_prefix = PM_COLOR_GRAY " | " PM_COLOR_RESET,
|
18285
|
+
.divider = PM_COLOR_GRAY " ~~~~~~~" PM_COLOR_RESET "\n"
|
18286
|
+
};
|
18287
|
+
} else {
|
18288
|
+
error_format = (pm_error_format_t) {
|
18289
|
+
.number_prefix = "%3" PRIi32 " | ",
|
18290
|
+
.blank_prefix = " | ",
|
18291
|
+
.divider = " ~~~~~~~\n"
|
18292
|
+
};
|
18293
|
+
}
|
18294
|
+
} else if (max_line_number < 10000) {
|
18295
|
+
if (colorize) {
|
18296
|
+
error_format = (pm_error_format_t) {
|
18297
|
+
.number_prefix = PM_COLOR_GRAY "%4" PRIi32 " | " PM_COLOR_RESET,
|
18298
|
+
.blank_prefix = PM_COLOR_GRAY " | " PM_COLOR_RESET,
|
18299
|
+
.divider = PM_COLOR_GRAY " ~~~~~~~~" PM_COLOR_RESET "\n"
|
18300
|
+
};
|
18301
|
+
} else {
|
18302
|
+
error_format = (pm_error_format_t) {
|
18303
|
+
.number_prefix = "%4" PRIi32 " | ",
|
18304
|
+
.blank_prefix = " | ",
|
18305
|
+
.divider = " ~~~~~~~~\n"
|
18306
|
+
};
|
18307
|
+
}
|
18308
|
+
} else {
|
18309
|
+
if (colorize) {
|
18310
|
+
error_format = (pm_error_format_t) {
|
18311
|
+
.number_prefix = PM_COLOR_GRAY "%5" PRIi32 " | " PM_COLOR_RESET,
|
18312
|
+
.blank_prefix = PM_COLOR_GRAY " | " PM_COLOR_RESET,
|
18313
|
+
.divider = PM_COLOR_GRAY " ~~~~~~~~" PM_COLOR_RESET "\n"
|
18314
|
+
};
|
18315
|
+
} else {
|
18316
|
+
error_format = (pm_error_format_t) {
|
18317
|
+
.number_prefix = "%5" PRIi32 " | ",
|
18318
|
+
.blank_prefix = " | ",
|
18319
|
+
.divider = " ~~~~~~~~\n"
|
18320
|
+
};
|
18321
|
+
}
|
18322
|
+
}
|
18323
|
+
|
18324
|
+
error_format.blank_prefix_length = strlen(error_format.blank_prefix);
|
18325
|
+
error_format.divider_length = strlen(error_format.divider);
|
18326
|
+
|
18327
|
+
// Now we're going to iterate through every error in our error list and
|
18328
|
+
// display it. While we're iterating, we will display some padding lines of
|
18329
|
+
// the source before the error to give some context. We'll be careful not to
|
18330
|
+
// display the same line twice in case the errors are close enough in the
|
18331
|
+
// source.
|
18332
|
+
int32_t last_line = 0;
|
18333
|
+
const pm_encoding_t *encoding = parser->encoding;
|
18334
|
+
|
18335
|
+
for (size_t index = 0; index < error_list->size; index++) {
|
18336
|
+
pm_error_t *error = &errors[index];
|
18337
|
+
|
18338
|
+
// Here we determine how many lines of padding of the source to display,
|
18339
|
+
// based on the difference from the last line that was displayed.
|
18340
|
+
if (error->line - last_line > 1) {
|
18341
|
+
if (error->line - last_line > 2) {
|
18342
|
+
if ((index != 0) && (error->line - last_line > 3)) {
|
18343
|
+
pm_buffer_append_string(buffer, error_format.divider, error_format.divider_length);
|
18344
|
+
}
|
18345
|
+
|
18346
|
+
pm_buffer_append_string(buffer, " ", 2);
|
18347
|
+
pm_parser_errors_format_line(parser, newline_list, error_format.number_prefix, error->line - 2, buffer);
|
18348
|
+
}
|
18349
|
+
|
18350
|
+
pm_buffer_append_string(buffer, " ", 2);
|
18351
|
+
pm_parser_errors_format_line(parser, newline_list, error_format.number_prefix, error->line - 1, buffer);
|
18352
|
+
}
|
18353
|
+
|
18354
|
+
// If this is the first error or we're on a new line, then we'll display
|
18355
|
+
// the line that has the error in it.
|
18356
|
+
if ((index == 0) || (error->line != last_line)) {
|
18357
|
+
if (colorize) {
|
18358
|
+
pm_buffer_append_string(buffer, PM_COLOR_RED "> " PM_COLOR_RESET, 13);
|
18359
|
+
} else {
|
18360
|
+
pm_buffer_append_string(buffer, "> ", 2);
|
18361
|
+
}
|
18362
|
+
pm_parser_errors_format_line(parser, newline_list, error_format.number_prefix, error->line, buffer);
|
18363
|
+
}
|
18364
|
+
|
18365
|
+
// Now we'll display the actual error message. We'll do this by first
|
18366
|
+
// putting the prefix to the line, then a bunch of blank spaces
|
18367
|
+
// depending on the column, then as many carets as we need to display
|
18368
|
+
// the width of the error, then the error message itself.
|
18369
|
+
//
|
18370
|
+
// Note that this doesn't take into account the width of the actual
|
18371
|
+
// character when displayed in the terminal. For some east-asian
|
18372
|
+
// languages or emoji, this means it can be thrown off pretty badly. We
|
18373
|
+
// will need to solve this eventually.
|
18374
|
+
pm_buffer_append_string(buffer, " ", 2);
|
18375
|
+
pm_buffer_append_string(buffer, error_format.blank_prefix, error_format.blank_prefix_length);
|
18376
|
+
|
18377
|
+
size_t column = 0;
|
18378
|
+
const uint8_t *start = &parser->start[newline_list->offsets[error->line - start_line]];
|
18379
|
+
|
18380
|
+
while (column < error->column_end) {
|
18381
|
+
if (column < error->column_start) {
|
18382
|
+
pm_buffer_append_byte(buffer, ' ');
|
18383
|
+
} else if (colorize) {
|
18384
|
+
pm_buffer_append_string(buffer, PM_COLOR_RED "^" PM_COLOR_RESET, 12);
|
18385
|
+
} else {
|
18386
|
+
pm_buffer_append_byte(buffer, '^');
|
18387
|
+
}
|
18388
|
+
|
18389
|
+
size_t char_width = encoding->char_width(start + column, parser->end - (start + column));
|
18390
|
+
column += (char_width == 0 ? 1 : char_width);
|
18391
|
+
}
|
18392
|
+
|
18393
|
+
pm_buffer_append_byte(buffer, ' ');
|
18394
|
+
|
18395
|
+
const char *message = error->error->message;
|
18396
|
+
pm_buffer_append_string(buffer, message, strlen(message));
|
18397
|
+
pm_buffer_append_byte(buffer, '\n');
|
18398
|
+
|
18399
|
+
// Here we determine how many lines of padding to display after the
|
18400
|
+
// error, depending on where the next error is in source.
|
18401
|
+
last_line = error->line;
|
18402
|
+
int32_t next_line = (index == error_list->size - 1) ? ((int32_t) newline_list->size) : errors[index + 1].line;
|
18403
|
+
|
18404
|
+
if (next_line - last_line > 1) {
|
18405
|
+
pm_buffer_append_string(buffer, " ", 2);
|
18406
|
+
pm_parser_errors_format_line(parser, newline_list, error_format.number_prefix, ++last_line, buffer);
|
18407
|
+
}
|
18408
|
+
|
18409
|
+
if (next_line - last_line > 1) {
|
18410
|
+
pm_buffer_append_string(buffer, " ", 2);
|
18411
|
+
pm_parser_errors_format_line(parser, newline_list, error_format.number_prefix, ++last_line, buffer);
|
18412
|
+
}
|
18413
|
+
}
|
18414
|
+
|
18415
|
+
// Finally, we'll free the array of errors that we allocated.
|
18416
|
+
free(errors);
|
18417
|
+
}
|
18418
|
+
|
18419
|
+
#undef PM_COLOR_GRAY
|
18420
|
+
#undef PM_COLOR_RED
|
18421
|
+
#undef PM_COLOR_RESET
|