prism 0.19.0 → 0.21.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +48 -1
- data/Makefile +5 -0
- data/README.md +8 -6
- data/config.yml +236 -38
- data/docs/build_system.md +19 -2
- data/docs/cruby_compilation.md +27 -0
- data/docs/parser_translation.md +34 -0
- data/docs/parsing_rules.md +19 -0
- data/docs/releasing.md +3 -3
- data/docs/ruby_api.md +1 -1
- data/docs/serialization.md +17 -5
- data/ext/prism/api_node.c +101 -81
- data/ext/prism/extension.c +74 -11
- data/ext/prism/extension.h +1 -1
- data/include/prism/ast.h +1700 -505
- data/include/prism/defines.h +8 -0
- data/include/prism/diagnostic.h +39 -2
- data/include/prism/encoding.h +10 -0
- data/include/prism/options.h +40 -14
- data/include/prism/parser.h +34 -18
- data/include/prism/util/pm_buffer.h +9 -0
- data/include/prism/util/pm_constant_pool.h +18 -0
- data/include/prism/util/pm_newline_list.h +0 -11
- data/include/prism/version.h +2 -2
- data/include/prism.h +19 -2
- data/lib/prism/debug.rb +11 -5
- data/lib/prism/dot_visitor.rb +36 -14
- data/lib/prism/dsl.rb +22 -22
- data/lib/prism/ffi.rb +2 -2
- data/lib/prism/node.rb +1020 -737
- data/lib/prism/node_ext.rb +2 -2
- data/lib/prism/parse_result.rb +17 -9
- data/lib/prism/serialize.rb +53 -29
- data/lib/prism/translation/parser/compiler.rb +1828 -0
- data/lib/prism/translation/parser/lexer.rb +335 -0
- data/lib/prism/translation/parser/rubocop.rb +37 -0
- data/lib/prism/translation/parser.rb +171 -0
- data/lib/prism/translation.rb +11 -0
- data/lib/prism.rb +1 -0
- data/prism.gemspec +12 -5
- data/rbi/prism.rbi +150 -88
- data/rbi/prism_static.rbi +15 -3
- data/sig/prism.rbs +996 -961
- data/sig/prism_static.rbs +123 -46
- data/src/diagnostic.c +259 -219
- data/src/encoding.c +5 -9
- data/src/node.c +2 -6
- data/src/options.c +24 -5
- data/src/prettyprint.c +174 -42
- data/src/prism.c +1344 -479
- data/src/serialize.c +12 -9
- data/src/token_type.c +353 -4
- data/src/util/pm_buffer.c +11 -0
- data/src/util/pm_constant_pool.c +37 -11
- data/src/util/pm_newline_list.c +2 -14
- metadata +10 -3
- data/docs/building.md +0 -29
data/src/prism.c
CHANGED
@@ -164,7 +164,7 @@ debug_state(pm_parser_t *parser) {
|
|
164
164
|
|
165
165
|
PRISM_ATTRIBUTE_UNUSED static void
|
166
166
|
debug_token(pm_token_t * token) {
|
167
|
-
fprintf(stderr, "%s: \"%.*s\"\n",
|
167
|
+
fprintf(stderr, "%s: \"%.*s\"\n", pm_token_type_human(token->type), (int) (token->end - token->start), token->start);
|
168
168
|
}
|
169
169
|
|
170
170
|
#endif
|
@@ -423,6 +423,11 @@ lex_state_beg_p(pm_parser_t *parser) {
|
|
423
423
|
return lex_state_p(parser, PM_LEX_STATE_BEG_ANY) || ((parser->lex_state & (PM_LEX_STATE_ARG | PM_LEX_STATE_LABELED)) == (PM_LEX_STATE_ARG | PM_LEX_STATE_LABELED));
|
424
424
|
}
|
425
425
|
|
426
|
+
static inline bool
|
427
|
+
lex_state_arg_labeled_p(pm_parser_t *parser) {
|
428
|
+
return (parser->lex_state & (PM_LEX_STATE_ARG | PM_LEX_STATE_LABELED)) == (PM_LEX_STATE_ARG | PM_LEX_STATE_LABELED);
|
429
|
+
}
|
430
|
+
|
426
431
|
static inline bool
|
427
432
|
lex_state_arg_p(pm_parser_t *parser) {
|
428
433
|
return lex_state_p(parser, PM_LEX_STATE_ARG_ANY);
|
@@ -548,9 +553,7 @@ pm_parser_err_token(pm_parser_t *parser, const pm_token_t *token, pm_diagnostic_
|
|
548
553
|
*/
|
549
554
|
static inline void
|
550
555
|
pm_parser_warn(pm_parser_t *parser, const uint8_t *start, const uint8_t *end, pm_diagnostic_id_t diag_id) {
|
551
|
-
|
552
|
-
pm_diagnostic_list_append(&parser->warning_list, start, end, diag_id);
|
553
|
-
}
|
556
|
+
pm_diagnostic_list_append(&parser->warning_list, start, end, diag_id);
|
554
557
|
}
|
555
558
|
|
556
559
|
/**
|
@@ -813,6 +816,9 @@ typedef struct {
|
|
813
816
|
|
814
817
|
/** The optional block attached to the call. */
|
815
818
|
pm_node_t *block;
|
819
|
+
|
820
|
+
/** The flag indicating whether this arguments list has forwarding argument. */
|
821
|
+
bool has_forwarding;
|
816
822
|
} pm_arguments_t;
|
817
823
|
|
818
824
|
/**
|
@@ -864,6 +870,105 @@ pm_arguments_validate_block(pm_parser_t *parser, pm_arguments_t *arguments, pm_b
|
|
864
870
|
pm_parser_err_node(parser, (pm_node_t *) block, PM_ERR_ARGUMENT_UNEXPECTED_BLOCK);
|
865
871
|
}
|
866
872
|
|
873
|
+
/******************************************************************************/
|
874
|
+
/* Basic character checks */
|
875
|
+
/******************************************************************************/
|
876
|
+
|
877
|
+
/**
|
878
|
+
* This function is used extremely frequently to lex all of the identifiers in a
|
879
|
+
* source file, so it's important that it be as fast as possible. For this
|
880
|
+
* reason we have the encoding_changed boolean to check if we need to go through
|
881
|
+
* the function pointer or can just directly use the UTF-8 functions.
|
882
|
+
*/
|
883
|
+
static inline size_t
|
884
|
+
char_is_identifier_start(const pm_parser_t *parser, const uint8_t *b) {
|
885
|
+
if (parser->encoding_changed) {
|
886
|
+
size_t width;
|
887
|
+
if ((width = parser->encoding->alpha_char(b, parser->end - b)) != 0) {
|
888
|
+
return width;
|
889
|
+
} else if (*b == '_') {
|
890
|
+
return 1;
|
891
|
+
} else if (*b >= 0x80) {
|
892
|
+
return parser->encoding->char_width(b, parser->end - b);
|
893
|
+
} else {
|
894
|
+
return 0;
|
895
|
+
}
|
896
|
+
} else if (*b < 0x80) {
|
897
|
+
return (pm_encoding_unicode_table[*b] & PRISM_ENCODING_ALPHABETIC_BIT ? 1 : 0) || (*b == '_');
|
898
|
+
} else {
|
899
|
+
return pm_encoding_utf_8_char_width(b, parser->end - b);
|
900
|
+
}
|
901
|
+
}
|
902
|
+
|
903
|
+
/**
|
904
|
+
* Similar to char_is_identifier but this function assumes that the encoding
|
905
|
+
* has not been changed.
|
906
|
+
*/
|
907
|
+
static inline size_t
|
908
|
+
char_is_identifier_utf8(const uint8_t *b, const uint8_t *end) {
|
909
|
+
if (*b < 0x80) {
|
910
|
+
return (*b == '_') || (pm_encoding_unicode_table[*b] & PRISM_ENCODING_ALPHANUMERIC_BIT ? 1 : 0);
|
911
|
+
} else {
|
912
|
+
return pm_encoding_utf_8_char_width(b, end - b);
|
913
|
+
}
|
914
|
+
}
|
915
|
+
|
916
|
+
/**
|
917
|
+
* Like the above, this function is also used extremely frequently to lex all of
|
918
|
+
* the identifiers in a source file once the first character has been found. So
|
919
|
+
* it's important that it be as fast as possible.
|
920
|
+
*/
|
921
|
+
static inline size_t
|
922
|
+
char_is_identifier(pm_parser_t *parser, const uint8_t *b) {
|
923
|
+
if (parser->encoding_changed) {
|
924
|
+
size_t width;
|
925
|
+
if ((width = parser->encoding->alnum_char(b, parser->end - b)) != 0) {
|
926
|
+
return width;
|
927
|
+
} else if (*b == '_') {
|
928
|
+
return 1;
|
929
|
+
} else if (*b >= 0x80) {
|
930
|
+
return parser->encoding->char_width(b, parser->end - b);
|
931
|
+
} else {
|
932
|
+
return 0;
|
933
|
+
}
|
934
|
+
}
|
935
|
+
return char_is_identifier_utf8(b, parser->end);
|
936
|
+
}
|
937
|
+
|
938
|
+
// Here we're defining a perfect hash for the characters that are allowed in
|
939
|
+
// global names. This is used to quickly check the next character after a $ to
|
940
|
+
// see if it's a valid character for a global name.
|
941
|
+
#define BIT(c, idx) (((c) / 32 - 1 == idx) ? (1U << ((c) % 32)) : 0)
|
942
|
+
#define PUNCT(idx) ( \
|
943
|
+
BIT('~', idx) | BIT('*', idx) | BIT('$', idx) | BIT('?', idx) | \
|
944
|
+
BIT('!', idx) | BIT('@', idx) | BIT('/', idx) | BIT('\\', idx) | \
|
945
|
+
BIT(';', idx) | BIT(',', idx) | BIT('.', idx) | BIT('=', idx) | \
|
946
|
+
BIT(':', idx) | BIT('<', idx) | BIT('>', idx) | BIT('\"', idx) | \
|
947
|
+
BIT('&', idx) | BIT('`', idx) | BIT('\'', idx) | BIT('+', idx) | \
|
948
|
+
BIT('0', idx))
|
949
|
+
|
950
|
+
const unsigned int pm_global_name_punctuation_hash[(0x7e - 0x20 + 31) / 32] = { PUNCT(0), PUNCT(1), PUNCT(2) };
|
951
|
+
|
952
|
+
#undef BIT
|
953
|
+
#undef PUNCT
|
954
|
+
|
955
|
+
static inline bool
|
956
|
+
char_is_global_name_punctuation(const uint8_t b) {
|
957
|
+
const unsigned int i = (const unsigned int) b;
|
958
|
+
if (i <= 0x20 || 0x7e < i) return false;
|
959
|
+
|
960
|
+
return (pm_global_name_punctuation_hash[(i - 0x20) / 32] >> (i % 32)) & 1;
|
961
|
+
}
|
962
|
+
|
963
|
+
static inline bool
|
964
|
+
token_is_setter_name(pm_token_t *token) {
|
965
|
+
return (
|
966
|
+
(token->type == PM_TOKEN_IDENTIFIER) &&
|
967
|
+
(token->end - token->start >= 2) &&
|
968
|
+
(token->end[-1] == '=')
|
969
|
+
);
|
970
|
+
}
|
971
|
+
|
867
972
|
/******************************************************************************/
|
868
973
|
/* Node flag handling functions */
|
869
974
|
/******************************************************************************/
|
@@ -884,6 +989,22 @@ pm_node_flag_unset(pm_node_t *node, pm_node_flags_t flag) {
|
|
884
989
|
node->flags &= (pm_node_flags_t) ~flag;
|
885
990
|
}
|
886
991
|
|
992
|
+
/**
|
993
|
+
* Set the repeated parameter flag on the given node.
|
994
|
+
*/
|
995
|
+
static inline void
|
996
|
+
pm_node_flag_set_repeated_parameter(pm_node_t *node) {
|
997
|
+
assert(PM_NODE_TYPE(node) == PM_BLOCK_LOCAL_VARIABLE_NODE ||
|
998
|
+
PM_NODE_TYPE(node) == PM_BLOCK_PARAMETER_NODE ||
|
999
|
+
PM_NODE_TYPE(node) == PM_KEYWORD_REST_PARAMETER_NODE ||
|
1000
|
+
PM_NODE_TYPE(node) == PM_OPTIONAL_KEYWORD_PARAMETER_NODE ||
|
1001
|
+
PM_NODE_TYPE(node) == PM_OPTIONAL_PARAMETER_NODE ||
|
1002
|
+
PM_NODE_TYPE(node) == PM_REQUIRED_KEYWORD_PARAMETER_NODE ||
|
1003
|
+
PM_NODE_TYPE(node) == PM_REQUIRED_PARAMETER_NODE ||
|
1004
|
+
PM_NODE_TYPE(node) == PM_REST_PARAMETER_NODE);
|
1005
|
+
|
1006
|
+
pm_node_flag_set(node, PM_PARAMETER_FLAGS_REPEATED_PARAMETER);
|
1007
|
+
}
|
887
1008
|
|
888
1009
|
/******************************************************************************/
|
889
1010
|
/* Node creation functions */
|
@@ -977,7 +1098,7 @@ static inline void *
|
|
977
1098
|
pm_alloc_node(PRISM_ATTRIBUTE_UNUSED pm_parser_t *parser, size_t size) {
|
978
1099
|
void *memory = calloc(1, size);
|
979
1100
|
if (memory == NULL) {
|
980
|
-
fprintf(stderr, "Failed to allocate %
|
1101
|
+
fprintf(stderr, "Failed to allocate %d bytes\n", (int) size);
|
981
1102
|
abort();
|
982
1103
|
}
|
983
1104
|
return memory;
|
@@ -1325,7 +1446,7 @@ pm_assoc_node_create(pm_parser_t *parser, pm_node_t *key, const pm_token_t *oper
|
|
1325
1446
|
pm_assoc_node_t *node = PM_ALLOC_NODE(parser, pm_assoc_node_t);
|
1326
1447
|
const uint8_t *end;
|
1327
1448
|
|
1328
|
-
if (value != NULL) {
|
1449
|
+
if (value != NULL && value->location.end > key->location.end) {
|
1329
1450
|
end = value->location.end;
|
1330
1451
|
} else if (operator->type != PM_TOKEN_NOT_PROVIDED) {
|
1331
1452
|
end = operator->end;
|
@@ -1333,6 +1454,13 @@ pm_assoc_node_create(pm_parser_t *parser, pm_node_t *key, const pm_token_t *oper
|
|
1333
1454
|
end = key->location.end;
|
1334
1455
|
}
|
1335
1456
|
|
1457
|
+
// Hash string keys will be frozen, so we can mark them as frozen here so
|
1458
|
+
// that the compiler picks them up and also when we check for static literal
|
1459
|
+
// on the keys it gets factored in.
|
1460
|
+
if (PM_NODE_TYPE_P(key, PM_STRING_NODE)) {
|
1461
|
+
key->flags |= PM_STRING_FLAGS_FROZEN | PM_NODE_FLAG_STATIC_LITERAL;
|
1462
|
+
}
|
1463
|
+
|
1336
1464
|
// If the key and value of this assoc node are both static literals, then
|
1337
1465
|
// we can mark this node as a static literal.
|
1338
1466
|
pm_node_flags_t flags = 0;
|
@@ -1490,7 +1618,7 @@ pm_block_argument_node_create(pm_parser_t *parser, const pm_token_t *operator, p
|
|
1490
1618
|
* Allocate and initialize a new BlockNode node.
|
1491
1619
|
*/
|
1492
1620
|
static pm_block_node_t *
|
1493
|
-
pm_block_node_create(pm_parser_t *parser, pm_constant_id_list_t *locals,
|
1621
|
+
pm_block_node_create(pm_parser_t *parser, pm_constant_id_list_t *locals, const pm_token_t *opening, pm_node_t *parameters, pm_node_t *body, const pm_token_t *closing) {
|
1494
1622
|
pm_block_node_t *node = PM_ALLOC_NODE(parser, pm_block_node_t);
|
1495
1623
|
|
1496
1624
|
*node = (pm_block_node_t) {
|
@@ -1499,7 +1627,6 @@ pm_block_node_create(pm_parser_t *parser, pm_constant_id_list_t *locals, uint32_
|
|
1499
1627
|
.location = { .start = opening->start, .end = closing->end },
|
1500
1628
|
},
|
1501
1629
|
.locals = *locals,
|
1502
|
-
.locals_body_index = locals_body_index,
|
1503
1630
|
.parameters = parameters,
|
1504
1631
|
.body = body,
|
1505
1632
|
.opening_loc = PM_LOCATION_TOKEN_VALUE(opening),
|
@@ -1645,12 +1772,13 @@ pm_break_node_create(pm_parser_t *parser, const pm_token_t *keyword, pm_argument
|
|
1645
1772
|
* in the various specializations of this function.
|
1646
1773
|
*/
|
1647
1774
|
static pm_call_node_t *
|
1648
|
-
pm_call_node_create(pm_parser_t *parser) {
|
1775
|
+
pm_call_node_create(pm_parser_t *parser, pm_node_flags_t flags) {
|
1649
1776
|
pm_call_node_t *node = PM_ALLOC_NODE(parser, pm_call_node_t);
|
1650
1777
|
|
1651
1778
|
*node = (pm_call_node_t) {
|
1652
1779
|
{
|
1653
1780
|
.type = PM_CALL_NODE,
|
1781
|
+
.flags = flags,
|
1654
1782
|
.location = PM_LOCATION_NULL_VALUE(parser),
|
1655
1783
|
},
|
1656
1784
|
.receiver = NULL,
|
@@ -1666,6 +1794,15 @@ pm_call_node_create(pm_parser_t *parser) {
|
|
1666
1794
|
return node;
|
1667
1795
|
}
|
1668
1796
|
|
1797
|
+
/**
|
1798
|
+
* Returns the value that the ignore visibility flag should be set to for the
|
1799
|
+
* given receiver.
|
1800
|
+
*/
|
1801
|
+
static inline pm_node_flags_t
|
1802
|
+
pm_call_node_ignore_visibility_flag(const pm_node_t *receiver) {
|
1803
|
+
return PM_NODE_TYPE_P(receiver, PM_SELF_NODE) ? PM_CALL_NODE_FLAGS_IGNORE_VISIBILITY : 0;
|
1804
|
+
}
|
1805
|
+
|
1669
1806
|
/**
|
1670
1807
|
* Allocate and initialize a new CallNode node from an aref or an aset
|
1671
1808
|
* expression.
|
@@ -1674,7 +1811,7 @@ static pm_call_node_t *
|
|
1674
1811
|
pm_call_node_aref_create(pm_parser_t *parser, pm_node_t *receiver, pm_arguments_t *arguments) {
|
1675
1812
|
pm_assert_value_expression(parser, receiver);
|
1676
1813
|
|
1677
|
-
pm_call_node_t *node = pm_call_node_create(parser);
|
1814
|
+
pm_call_node_t *node = pm_call_node_create(parser, pm_call_node_ignore_visibility_flag(receiver));
|
1678
1815
|
|
1679
1816
|
node->base.location.start = receiver->location.start;
|
1680
1817
|
node->base.location.end = pm_arguments_end(arguments);
|
@@ -1700,7 +1837,7 @@ pm_call_node_binary_create(pm_parser_t *parser, pm_node_t *receiver, pm_token_t
|
|
1700
1837
|
pm_assert_value_expression(parser, receiver);
|
1701
1838
|
pm_assert_value_expression(parser, argument);
|
1702
1839
|
|
1703
|
-
pm_call_node_t *node = pm_call_node_create(parser);
|
1840
|
+
pm_call_node_t *node = pm_call_node_create(parser, pm_call_node_ignore_visibility_flag(receiver));
|
1704
1841
|
|
1705
1842
|
node->base.location.start = MIN(receiver->location.start, argument->location.start);
|
1706
1843
|
node->base.location.end = MAX(receiver->location.end, argument->location.end);
|
@@ -1723,7 +1860,7 @@ static pm_call_node_t *
|
|
1723
1860
|
pm_call_node_call_create(pm_parser_t *parser, pm_node_t *receiver, pm_token_t *operator, pm_token_t *message, pm_arguments_t *arguments) {
|
1724
1861
|
pm_assert_value_expression(parser, receiver);
|
1725
1862
|
|
1726
|
-
pm_call_node_t *node = pm_call_node_create(parser);
|
1863
|
+
pm_call_node_t *node = pm_call_node_create(parser, pm_call_node_ignore_visibility_flag(receiver));
|
1727
1864
|
|
1728
1865
|
node->base.location.start = receiver->location.start;
|
1729
1866
|
const uint8_t *end = pm_arguments_end(arguments);
|
@@ -1754,7 +1891,7 @@ pm_call_node_call_create(pm_parser_t *parser, pm_node_t *receiver, pm_token_t *o
|
|
1754
1891
|
*/
|
1755
1892
|
static pm_call_node_t *
|
1756
1893
|
pm_call_node_fcall_create(pm_parser_t *parser, pm_token_t *message, pm_arguments_t *arguments) {
|
1757
|
-
pm_call_node_t *node = pm_call_node_create(parser);
|
1894
|
+
pm_call_node_t *node = pm_call_node_create(parser, PM_CALL_NODE_FLAGS_IGNORE_VISIBILITY);
|
1758
1895
|
|
1759
1896
|
node->base.location.start = message->start;
|
1760
1897
|
node->base.location.end = pm_arguments_end(arguments);
|
@@ -1776,7 +1913,7 @@ static pm_call_node_t *
|
|
1776
1913
|
pm_call_node_not_create(pm_parser_t *parser, pm_node_t *receiver, pm_token_t *message, pm_arguments_t *arguments) {
|
1777
1914
|
pm_assert_value_expression(parser, receiver);
|
1778
1915
|
|
1779
|
-
pm_call_node_t *node = pm_call_node_create(parser);
|
1916
|
+
pm_call_node_t *node = pm_call_node_create(parser, receiver == NULL ? 0 : pm_call_node_ignore_visibility_flag(receiver));
|
1780
1917
|
|
1781
1918
|
node->base.location.start = message->start;
|
1782
1919
|
if (arguments->closing_loc.start != NULL) {
|
@@ -1802,7 +1939,7 @@ static pm_call_node_t *
|
|
1802
1939
|
pm_call_node_shorthand_create(pm_parser_t *parser, pm_node_t *receiver, pm_token_t *operator, pm_arguments_t *arguments) {
|
1803
1940
|
pm_assert_value_expression(parser, receiver);
|
1804
1941
|
|
1805
|
-
pm_call_node_t *node = pm_call_node_create(parser);
|
1942
|
+
pm_call_node_t *node = pm_call_node_create(parser, pm_call_node_ignore_visibility_flag(receiver));
|
1806
1943
|
|
1807
1944
|
node->base.location.start = receiver->location.start;
|
1808
1945
|
node->base.location.end = pm_arguments_end(arguments);
|
@@ -1829,7 +1966,7 @@ static pm_call_node_t *
|
|
1829
1966
|
pm_call_node_unary_create(pm_parser_t *parser, pm_token_t *operator, pm_node_t *receiver, const char *name) {
|
1830
1967
|
pm_assert_value_expression(parser, receiver);
|
1831
1968
|
|
1832
|
-
pm_call_node_t *node = pm_call_node_create(parser);
|
1969
|
+
pm_call_node_t *node = pm_call_node_create(parser, pm_call_node_ignore_visibility_flag(receiver));
|
1833
1970
|
|
1834
1971
|
node->base.location.start = operator->start;
|
1835
1972
|
node->base.location.end = receiver->location.end;
|
@@ -1847,7 +1984,7 @@ pm_call_node_unary_create(pm_parser_t *parser, pm_token_t *operator, pm_node_t *
|
|
1847
1984
|
*/
|
1848
1985
|
static pm_call_node_t *
|
1849
1986
|
pm_call_node_variable_call_create(pm_parser_t *parser, pm_token_t *message) {
|
1850
|
-
pm_call_node_t *node = pm_call_node_create(parser);
|
1987
|
+
pm_call_node_t *node = pm_call_node_create(parser, PM_CALL_NODE_FLAGS_IGNORE_VISIBILITY);
|
1851
1988
|
|
1852
1989
|
node->base.location = PM_LOCATION_TOKEN_VALUE(message);
|
1853
1990
|
node->message_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(message);
|
@@ -1885,11 +2022,12 @@ pm_call_node_index_p(pm_call_node_t *node) {
|
|
1885
2022
|
* operator assignment.
|
1886
2023
|
*/
|
1887
2024
|
static inline bool
|
1888
|
-
pm_call_node_writable_p(pm_call_node_t *node) {
|
2025
|
+
pm_call_node_writable_p(const pm_parser_t *parser, const pm_call_node_t *node) {
|
1889
2026
|
return (
|
1890
2027
|
(node->message_loc.start != NULL) &&
|
1891
2028
|
(node->message_loc.end[-1] != '!') &&
|
1892
2029
|
(node->message_loc.end[-1] != '?') &&
|
2030
|
+
char_is_identifier_start(parser, node->message_loc.start) &&
|
1893
2031
|
(node->opening_loc.start == NULL) &&
|
1894
2032
|
(node->arguments == NULL) &&
|
1895
2033
|
(node->block == NULL)
|
@@ -2167,11 +2305,12 @@ pm_call_target_node_create(pm_parser_t *parser, pm_call_node_t *target) {
|
|
2167
2305
|
static pm_index_target_node_t *
|
2168
2306
|
pm_index_target_node_create(pm_parser_t *parser, pm_call_node_t *target) {
|
2169
2307
|
pm_index_target_node_t *node = PM_ALLOC_NODE(parser, pm_index_target_node_t);
|
2308
|
+
pm_node_flags_t flags = target->base.flags;
|
2170
2309
|
|
2171
2310
|
*node = (pm_index_target_node_t) {
|
2172
2311
|
{
|
2173
2312
|
.type = PM_INDEX_TARGET_NODE,
|
2174
|
-
.flags =
|
2313
|
+
.flags = flags | PM_CALL_NODE_FLAGS_ATTRIBUTE_WRITE,
|
2175
2314
|
.location = target->base.location
|
2176
2315
|
},
|
2177
2316
|
.receiver = target->receiver,
|
@@ -2701,6 +2840,50 @@ pm_constant_write_node_create(pm_parser_t *parser, pm_constant_read_node_t *targ
|
|
2701
2840
|
return node;
|
2702
2841
|
}
|
2703
2842
|
|
2843
|
+
/**
|
2844
|
+
* Check if the receiver of a `def` node is allowed.
|
2845
|
+
*/
|
2846
|
+
static void
|
2847
|
+
pm_def_node_receiver_check(pm_parser_t *parser, const pm_node_t *node) {
|
2848
|
+
switch (PM_NODE_TYPE(node)) {
|
2849
|
+
case PM_BEGIN_NODE: {
|
2850
|
+
const pm_begin_node_t *cast = (pm_begin_node_t *) node;
|
2851
|
+
if (cast->statements != NULL) pm_def_node_receiver_check(parser, (pm_node_t *) cast->statements);
|
2852
|
+
break;
|
2853
|
+
}
|
2854
|
+
case PM_PARENTHESES_NODE: {
|
2855
|
+
const pm_parentheses_node_t *cast = (const pm_parentheses_node_t *) node;
|
2856
|
+
if (cast->body != NULL) pm_def_node_receiver_check(parser, cast->body);
|
2857
|
+
break;
|
2858
|
+
}
|
2859
|
+
case PM_STATEMENTS_NODE: {
|
2860
|
+
const pm_statements_node_t *cast = (const pm_statements_node_t *) node;
|
2861
|
+
pm_def_node_receiver_check(parser, cast->body.nodes[cast->body.size - 1]);
|
2862
|
+
break;
|
2863
|
+
}
|
2864
|
+
case PM_ARRAY_NODE:
|
2865
|
+
case PM_FLOAT_NODE:
|
2866
|
+
case PM_IMAGINARY_NODE:
|
2867
|
+
case PM_INTEGER_NODE:
|
2868
|
+
case PM_INTERPOLATED_REGULAR_EXPRESSION_NODE:
|
2869
|
+
case PM_INTERPOLATED_STRING_NODE:
|
2870
|
+
case PM_INTERPOLATED_SYMBOL_NODE:
|
2871
|
+
case PM_INTERPOLATED_X_STRING_NODE:
|
2872
|
+
case PM_RATIONAL_NODE:
|
2873
|
+
case PM_REGULAR_EXPRESSION_NODE:
|
2874
|
+
case PM_SOURCE_ENCODING_NODE:
|
2875
|
+
case PM_SOURCE_FILE_NODE:
|
2876
|
+
case PM_SOURCE_LINE_NODE:
|
2877
|
+
case PM_STRING_NODE:
|
2878
|
+
case PM_SYMBOL_NODE:
|
2879
|
+
case PM_X_STRING_NODE:
|
2880
|
+
pm_parser_err_node(parser, node, PM_ERR_SINGLETON_FOR_LITERALS);
|
2881
|
+
break;
|
2882
|
+
default:
|
2883
|
+
break;
|
2884
|
+
}
|
2885
|
+
}
|
2886
|
+
|
2704
2887
|
/**
|
2705
2888
|
* Allocate and initialize a new DefNode node.
|
2706
2889
|
*/
|
@@ -2712,7 +2895,6 @@ pm_def_node_create(
|
|
2712
2895
|
pm_parameters_node_t *parameters,
|
2713
2896
|
pm_node_t *body,
|
2714
2897
|
pm_constant_id_list_t *locals,
|
2715
|
-
uint32_t locals_body_index,
|
2716
2898
|
const pm_token_t *def_keyword,
|
2717
2899
|
const pm_token_t *operator,
|
2718
2900
|
const pm_token_t *lparen,
|
@@ -2729,6 +2911,10 @@ pm_def_node_create(
|
|
2729
2911
|
end = end_keyword->end;
|
2730
2912
|
}
|
2731
2913
|
|
2914
|
+
if ((receiver != NULL) && PM_NODE_TYPE_P(receiver, PM_PARENTHESES_NODE)) {
|
2915
|
+
pm_def_node_receiver_check(parser, receiver);
|
2916
|
+
}
|
2917
|
+
|
2732
2918
|
*node = (pm_def_node_t) {
|
2733
2919
|
{
|
2734
2920
|
.type = PM_DEF_NODE,
|
@@ -2740,7 +2926,6 @@ pm_def_node_create(
|
|
2740
2926
|
.parameters = parameters,
|
2741
2927
|
.body = body,
|
2742
2928
|
.locals = *locals,
|
2743
|
-
.locals_body_index = locals_body_index,
|
2744
2929
|
.def_keyword_loc = PM_LOCATION_TOKEN_VALUE(def_keyword),
|
2745
2930
|
.operator_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(operator),
|
2746
2931
|
.lparen_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(lparen),
|
@@ -3962,9 +4147,8 @@ pm_keyword_hash_node_create(pm_parser_t *parser) {
|
|
3962
4147
|
*/
|
3963
4148
|
static void
|
3964
4149
|
pm_keyword_hash_node_elements_append(pm_keyword_hash_node_t *hash, pm_node_t *element) {
|
3965
|
-
// If the element being added is not an AssocNode or does not have a symbol
|
3966
|
-
// we want to turn the
|
3967
|
-
// TODO: Rename the flag to SYMBOL_KEYS instead.
|
4150
|
+
// If the element being added is not an AssocNode or does not have a symbol
|
4151
|
+
// key, then we want to turn the SYMBOL_KEYS flag off.
|
3968
4152
|
if (!PM_NODE_TYPE_P(element, PM_ASSOC_NODE) || !PM_NODE_TYPE_P(((pm_assoc_node_t *) element)->key, PM_SYMBOL_NODE)) {
|
3969
4153
|
pm_node_flag_unset((pm_node_t *)hash, PM_KEYWORD_HASH_NODE_FLAGS_SYMBOL_KEYS);
|
3970
4154
|
}
|
@@ -4051,7 +4235,6 @@ static pm_lambda_node_t *
|
|
4051
4235
|
pm_lambda_node_create(
|
4052
4236
|
pm_parser_t *parser,
|
4053
4237
|
pm_constant_id_list_t *locals,
|
4054
|
-
uint32_t locals_body_index,
|
4055
4238
|
const pm_token_t *operator,
|
4056
4239
|
const pm_token_t *opening,
|
4057
4240
|
const pm_token_t *closing,
|
@@ -4069,7 +4252,6 @@ pm_lambda_node_create(
|
|
4069
4252
|
},
|
4070
4253
|
},
|
4071
4254
|
.locals = *locals,
|
4072
|
-
.locals_body_index = locals_body_index,
|
4073
4255
|
.operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
|
4074
4256
|
.opening_loc = PM_LOCATION_TOKEN_VALUE(opening),
|
4075
4257
|
.closing_loc = PM_LOCATION_TOKEN_VALUE(closing),
|
@@ -4161,12 +4343,10 @@ pm_local_variable_or_write_node_create(pm_parser_t *parser, pm_node_t *target, c
|
|
4161
4343
|
}
|
4162
4344
|
|
4163
4345
|
/**
|
4164
|
-
* Allocate a new LocalVariableReadNode node.
|
4346
|
+
* Allocate a new LocalVariableReadNode node with constant_id.
|
4165
4347
|
*/
|
4166
4348
|
static pm_local_variable_read_node_t *
|
4167
|
-
|
4168
|
-
pm_constant_id_t name_id = pm_parser_constant_id_token(parser, name);
|
4169
|
-
|
4349
|
+
pm_local_variable_read_node_create_constant_id(pm_parser_t *parser, const pm_token_t *name, pm_constant_id_t name_id, uint32_t depth) {
|
4170
4350
|
if (parser->current_param_name == name_id) {
|
4171
4351
|
pm_parser_err_token(parser, name, PM_ERR_PARAMETER_CIRCULAR);
|
4172
4352
|
}
|
@@ -4185,6 +4365,15 @@ pm_local_variable_read_node_create(pm_parser_t *parser, const pm_token_t *name,
|
|
4185
4365
|
return node;
|
4186
4366
|
}
|
4187
4367
|
|
4368
|
+
/**
|
4369
|
+
* Allocate a new LocalVariableReadNode node.
|
4370
|
+
*/
|
4371
|
+
static pm_local_variable_read_node_t *
|
4372
|
+
pm_local_variable_read_node_create(pm_parser_t *parser, const pm_token_t *name, uint32_t depth) {
|
4373
|
+
pm_constant_id_t name_id = pm_parser_constant_id_token(parser, name);
|
4374
|
+
return pm_local_variable_read_node_create_constant_id(parser, name, name_id, depth);
|
4375
|
+
}
|
4376
|
+
|
4188
4377
|
/**
|
4189
4378
|
* Allocate and initialize a new LocalVariableWriteNode node.
|
4190
4379
|
*/
|
@@ -4210,6 +4399,57 @@ pm_local_variable_write_node_create(pm_parser_t *parser, pm_constant_id_t name,
|
|
4210
4399
|
return node;
|
4211
4400
|
}
|
4212
4401
|
|
4402
|
+
/**
|
4403
|
+
* Returns true if the given bounds comprise `it`.
|
4404
|
+
*/
|
4405
|
+
static inline bool
|
4406
|
+
pm_token_is_it(const uint8_t *start, const uint8_t *end) {
|
4407
|
+
return (end - start == 2) && (start[0] == 'i') && (start[1] == 't');
|
4408
|
+
}
|
4409
|
+
|
4410
|
+
/**
|
4411
|
+
* Returns true if the given node is `it` default parameter.
|
4412
|
+
*/
|
4413
|
+
static inline bool
|
4414
|
+
pm_node_is_it(pm_parser_t *parser, pm_node_t *node) {
|
4415
|
+
// Check if it's a local variable reference
|
4416
|
+
if (node->type != PM_CALL_NODE) {
|
4417
|
+
return false;
|
4418
|
+
}
|
4419
|
+
|
4420
|
+
// Check if it's a variable call
|
4421
|
+
pm_call_node_t *call_node = (pm_call_node_t *) node;
|
4422
|
+
if (!pm_call_node_variable_call_p(call_node)) {
|
4423
|
+
return false;
|
4424
|
+
}
|
4425
|
+
|
4426
|
+
// Check if it's called `it`
|
4427
|
+
pm_constant_id_t id = ((pm_call_node_t *)node)->name;
|
4428
|
+
pm_constant_t *constant = pm_constant_pool_id_to_constant(&parser->constant_pool, id);
|
4429
|
+
return pm_token_is_it(constant->start, constant->start + constant->length);
|
4430
|
+
}
|
4431
|
+
|
4432
|
+
/**
|
4433
|
+
* Convert a `it` variable call node to a node for `it` default parameter.
|
4434
|
+
*/
|
4435
|
+
static pm_node_t *
|
4436
|
+
pm_node_check_it(pm_parser_t *parser, pm_node_t *node) {
|
4437
|
+
if (
|
4438
|
+
(parser->version != PM_OPTIONS_VERSION_CRUBY_3_3_0) &&
|
4439
|
+
!parser->current_scope->closed &&
|
4440
|
+
pm_node_is_it(parser, node)
|
4441
|
+
) {
|
4442
|
+
if (parser->current_scope->explicit_params) {
|
4443
|
+
pm_parser_err_previous(parser, PM_ERR_IT_NOT_ALLOWED);
|
4444
|
+
} else {
|
4445
|
+
pm_node_destroy(parser, node);
|
4446
|
+
pm_constant_id_t name_id = pm_parser_constant_id_constant(parser, "0it", 3);
|
4447
|
+
node = (pm_node_t *) pm_local_variable_read_node_create_constant_id(parser, &parser->previous, name_id, 0);
|
4448
|
+
}
|
4449
|
+
}
|
4450
|
+
return node;
|
4451
|
+
}
|
4452
|
+
|
4213
4453
|
/**
|
4214
4454
|
* Returns true if the given bounds comprise a numbered parameter (i.e., they
|
4215
4455
|
* are of the form /^_\d$/).
|
@@ -5195,7 +5435,7 @@ pm_source_file_node_create(pm_parser_t *parser, const pm_token_t *file_keyword)
|
|
5195
5435
|
.flags = PM_NODE_FLAG_STATIC_LITERAL,
|
5196
5436
|
.location = PM_LOCATION_TOKEN_VALUE(file_keyword),
|
5197
5437
|
},
|
5198
|
-
.filepath = parser->
|
5438
|
+
.filepath = parser->filepath
|
5199
5439
|
};
|
5200
5440
|
|
5201
5441
|
return node;
|
@@ -5372,18 +5612,59 @@ pm_super_node_create(pm_parser_t *parser, const pm_token_t *keyword, pm_argument
|
|
5372
5612
|
return node;
|
5373
5613
|
}
|
5374
5614
|
|
5615
|
+
/**
|
5616
|
+
* Read through the contents of a string and check if it consists solely of US ASCII code points.
|
5617
|
+
*/
|
5618
|
+
static bool
|
5619
|
+
pm_ascii_only_p(const pm_string_t *contents) {
|
5620
|
+
const size_t length = pm_string_length(contents);
|
5621
|
+
const uint8_t *source = pm_string_source(contents);
|
5622
|
+
|
5623
|
+
for (size_t index = 0; index < length; index++) {
|
5624
|
+
if (source[index] & 0x80) return false;
|
5625
|
+
}
|
5626
|
+
|
5627
|
+
return true;
|
5628
|
+
}
|
5629
|
+
|
5630
|
+
/**
|
5631
|
+
* Ruby "downgrades" the encoding of Symbols to US-ASCII if the associated
|
5632
|
+
* encoding is ASCII-compatible and the Symbol consists only of US-ASCII code
|
5633
|
+
* points. Otherwise, the encoding may be explicitly set with an escape
|
5634
|
+
* sequence.
|
5635
|
+
*/
|
5636
|
+
static inline pm_node_flags_t
|
5637
|
+
parse_symbol_encoding(const pm_parser_t *parser, const pm_string_t *contents) {
|
5638
|
+
if (parser->explicit_encoding != NULL) {
|
5639
|
+
// A Symbol may optionally have its encoding explicitly set. This will
|
5640
|
+
// happen if an escape sequence results in a non-ASCII code point.
|
5641
|
+
if (parser->explicit_encoding == PM_ENCODING_UTF_8_ENTRY) {
|
5642
|
+
return PM_SYMBOL_FLAGS_FORCED_UTF8_ENCODING;
|
5643
|
+
} else if (parser->encoding == PM_ENCODING_US_ASCII_ENTRY) {
|
5644
|
+
return PM_SYMBOL_FLAGS_FORCED_BINARY_ENCODING;
|
5645
|
+
}
|
5646
|
+
} else if (pm_ascii_only_p(contents)) {
|
5647
|
+
// Ruby stipulates that all source files must use an ASCII-compatible
|
5648
|
+
// encoding. Thus, all symbols appearing in source are eligible for
|
5649
|
+
// "downgrading" to US-ASCII.
|
5650
|
+
return PM_SYMBOL_FLAGS_FORCED_US_ASCII_ENCODING;
|
5651
|
+
}
|
5652
|
+
|
5653
|
+
return 0;
|
5654
|
+
}
|
5655
|
+
|
5375
5656
|
/**
|
5376
5657
|
* Allocate and initialize a new SymbolNode node with the given unescaped
|
5377
5658
|
* string.
|
5378
5659
|
*/
|
5379
5660
|
static pm_symbol_node_t *
|
5380
|
-
pm_symbol_node_create_unescaped(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *value, const pm_token_t *closing, const pm_string_t *unescaped) {
|
5661
|
+
pm_symbol_node_create_unescaped(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *value, const pm_token_t *closing, const pm_string_t *unescaped, pm_node_flags_t flags) {
|
5381
5662
|
pm_symbol_node_t *node = PM_ALLOC_NODE(parser, pm_symbol_node_t);
|
5382
5663
|
|
5383
5664
|
*node = (pm_symbol_node_t) {
|
5384
5665
|
{
|
5385
5666
|
.type = PM_SYMBOL_NODE,
|
5386
|
-
.flags = PM_NODE_FLAG_STATIC_LITERAL,
|
5667
|
+
.flags = PM_NODE_FLAG_STATIC_LITERAL | flags,
|
5387
5668
|
.location = {
|
5388
5669
|
.start = (opening->type == PM_TOKEN_NOT_PROVIDED ? value->start : opening->start),
|
5389
5670
|
.end = (closing->type == PM_TOKEN_NOT_PROVIDED ? value->end : closing->end)
|
@@ -5403,7 +5684,7 @@ pm_symbol_node_create_unescaped(pm_parser_t *parser, const pm_token_t *opening,
|
|
5403
5684
|
*/
|
5404
5685
|
static inline pm_symbol_node_t *
|
5405
5686
|
pm_symbol_node_create(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *value, const pm_token_t *closing) {
|
5406
|
-
return pm_symbol_node_create_unescaped(parser, opening, value, closing, &PM_STRING_EMPTY);
|
5687
|
+
return pm_symbol_node_create_unescaped(parser, opening, value, closing, &PM_STRING_EMPTY, 0);
|
5407
5688
|
}
|
5408
5689
|
|
5409
5690
|
/**
|
@@ -5411,7 +5692,7 @@ pm_symbol_node_create(pm_parser_t *parser, const pm_token_t *opening, const pm_t
|
|
5411
5692
|
*/
|
5412
5693
|
static pm_symbol_node_t *
|
5413
5694
|
pm_symbol_node_create_current_string(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *value, const pm_token_t *closing) {
|
5414
|
-
pm_symbol_node_t *node = pm_symbol_node_create_unescaped(parser, opening, value, closing, &parser->current_string);
|
5695
|
+
pm_symbol_node_t *node = pm_symbol_node_create_unescaped(parser, opening, value, closing, &parser->current_string, parse_symbol_encoding(parser, &parser->current_string));
|
5415
5696
|
parser->current_string = PM_STRING_EMPTY;
|
5416
5697
|
return node;
|
5417
5698
|
}
|
@@ -5433,6 +5714,8 @@ pm_symbol_node_label_create(pm_parser_t *parser, const pm_token_t *token) {
|
|
5433
5714
|
|
5434
5715
|
assert((label.end - label.start) >= 0);
|
5435
5716
|
pm_string_shared_init(&node->unescaped, label.start, label.end);
|
5717
|
+
pm_node_flag_set((pm_node_t *) node, parse_symbol_encoding(parser, &node->unescaped));
|
5718
|
+
|
5436
5719
|
break;
|
5437
5720
|
}
|
5438
5721
|
case PM_TOKEN_MISSING: {
|
@@ -5495,6 +5778,8 @@ pm_string_node_to_symbol_node(pm_parser_t *parser, pm_string_node_t *node, const
|
|
5495
5778
|
.unescaped = node->unescaped
|
5496
5779
|
};
|
5497
5780
|
|
5781
|
+
pm_node_flag_set((pm_node_t *)new_node, parse_symbol_encoding(parser, &node->unescaped));
|
5782
|
+
|
5498
5783
|
// We are explicitly _not_ using pm_node_destroy here because we don't want
|
5499
5784
|
// to trash the unescaped string. We could instead copy the string if we
|
5500
5785
|
// know that it is owned, but we're taking the fast path for now.
|
@@ -5885,6 +6170,7 @@ pm_parser_scope_push(pm_parser_t *parser, bool closed) {
|
|
5885
6170
|
.closed = closed,
|
5886
6171
|
.explicit_params = false,
|
5887
6172
|
.numbered_parameters = 0,
|
6173
|
+
.forwarding_params = 0,
|
5888
6174
|
};
|
5889
6175
|
|
5890
6176
|
pm_constant_id_list_init(&scope->locals);
|
@@ -5893,6 +6179,76 @@ pm_parser_scope_push(pm_parser_t *parser, bool closed) {
|
|
5893
6179
|
return true;
|
5894
6180
|
}
|
5895
6181
|
|
6182
|
+
static void
|
6183
|
+
pm_parser_scope_forwarding_param_check(pm_parser_t *parser, const pm_token_t * token, const uint8_t mask, pm_diagnostic_id_t diag)
|
6184
|
+
{
|
6185
|
+
pm_scope_t *scope = parser->current_scope;
|
6186
|
+
while (scope) {
|
6187
|
+
if (scope->forwarding_params & mask) {
|
6188
|
+
if (!scope->closed) {
|
6189
|
+
pm_parser_err_token(parser, token, diag);
|
6190
|
+
return;
|
6191
|
+
}
|
6192
|
+
return;
|
6193
|
+
}
|
6194
|
+
if (scope->closed) break;
|
6195
|
+
scope = scope->previous;
|
6196
|
+
}
|
6197
|
+
|
6198
|
+
pm_parser_err_token(parser, token, diag);
|
6199
|
+
}
|
6200
|
+
|
6201
|
+
static inline void
|
6202
|
+
pm_parser_scope_forwarding_block_check(pm_parser_t *parser, const pm_token_t * token)
|
6203
|
+
{
|
6204
|
+
pm_parser_scope_forwarding_param_check(parser, token, PM_FORWARDING_BLOCK, PM_ERR_ARGUMENT_NO_FORWARDING_AMP);
|
6205
|
+
}
|
6206
|
+
|
6207
|
+
static void
|
6208
|
+
pm_parser_scope_forwarding_positionals_check(pm_parser_t *parser, const pm_token_t * token)
|
6209
|
+
{
|
6210
|
+
pm_parser_scope_forwarding_param_check(parser, token, PM_FORWARDING_POSITIONALS, PM_ERR_ARGUMENT_NO_FORWARDING_STAR);
|
6211
|
+
}
|
6212
|
+
|
6213
|
+
static inline void
|
6214
|
+
pm_parser_scope_forwarding_all_check(pm_parser_t *parser, const pm_token_t * token)
|
6215
|
+
{
|
6216
|
+
pm_parser_scope_forwarding_param_check(parser, token, PM_FORWARDING_ALL, PM_ERR_ARGUMENT_NO_FORWARDING_ELLIPSES);
|
6217
|
+
}
|
6218
|
+
|
6219
|
+
static inline void
|
6220
|
+
pm_parser_scope_forwarding_keywords_check(pm_parser_t *parser, const pm_token_t * token)
|
6221
|
+
{
|
6222
|
+
pm_parser_scope_forwarding_param_check(parser, token, PM_FORWARDING_KEYWORDS, PM_ERR_EXPECT_EXPRESSION_AFTER_SPLAT_HASH);
|
6223
|
+
}
|
6224
|
+
|
6225
|
+
/**
|
6226
|
+
* Save the current param name as the return value and set it to the given
|
6227
|
+
* constant id.
|
6228
|
+
*/
|
6229
|
+
static inline pm_constant_id_t
|
6230
|
+
pm_parser_current_param_name_set(pm_parser_t *parser, pm_constant_id_t current_param_name) {
|
6231
|
+
pm_constant_id_t saved_param_name = parser->current_param_name;
|
6232
|
+
parser->current_param_name = current_param_name;
|
6233
|
+
return saved_param_name;
|
6234
|
+
}
|
6235
|
+
|
6236
|
+
/**
|
6237
|
+
* Save the current param name as the return value and clear it.
|
6238
|
+
*/
|
6239
|
+
static inline pm_constant_id_t
|
6240
|
+
pm_parser_current_param_name_unset(pm_parser_t *parser) {
|
6241
|
+
return pm_parser_current_param_name_set(parser, PM_CONSTANT_ID_UNSET);
|
6242
|
+
}
|
6243
|
+
|
6244
|
+
/**
|
6245
|
+
* Restore the current param name from the given value.
|
6246
|
+
*/
|
6247
|
+
static inline void
|
6248
|
+
pm_parser_current_param_name_restore(pm_parser_t *parser, pm_constant_id_t saved_param_name) {
|
6249
|
+
parser->current_param_name = saved_param_name;
|
6250
|
+
}
|
6251
|
+
|
5896
6252
|
/**
|
5897
6253
|
* Check if any of the currently visible scopes contain a local variable
|
5898
6254
|
* described by the given constant id.
|
@@ -5969,26 +6325,41 @@ pm_parser_local_add_owned(pm_parser_t *parser, const uint8_t *start, size_t leng
|
|
5969
6325
|
return constant_id;
|
5970
6326
|
}
|
5971
6327
|
|
6328
|
+
/**
|
6329
|
+
* Add a local variable from a constant string to the current scope.
|
6330
|
+
*/
|
6331
|
+
static pm_constant_id_t
|
6332
|
+
pm_parser_local_add_constant(pm_parser_t *parser, const char *start, size_t length) {
|
6333
|
+
pm_constant_id_t constant_id = pm_parser_constant_id_constant(parser, start, length);
|
6334
|
+
if (constant_id != 0) pm_parser_local_add(parser, constant_id);
|
6335
|
+
return constant_id;
|
6336
|
+
}
|
6337
|
+
|
5972
6338
|
/**
|
5973
6339
|
* Add a parameter name to the current scope and check whether the name of the
|
5974
6340
|
* parameter is unique or not.
|
6341
|
+
*
|
6342
|
+
* Returns `true` if this is a duplicate parameter name, otherwise returns
|
6343
|
+
* false.
|
5975
6344
|
*/
|
5976
|
-
static
|
6345
|
+
static bool
|
5977
6346
|
pm_parser_parameter_name_check(pm_parser_t *parser, const pm_token_t *name) {
|
5978
6347
|
// We want to check whether the parameter name is a numbered parameter or
|
5979
6348
|
// not.
|
5980
6349
|
pm_refute_numbered_parameter(parser, name->start, name->end);
|
5981
6350
|
|
5982
|
-
// We want to ignore any parameter name that starts with an underscore.
|
5983
|
-
if ((name->start < name->end) && (*name->start == '_')) return;
|
5984
|
-
|
5985
6351
|
// Otherwise we'll fetch the constant id for the parameter name and check
|
5986
6352
|
// whether it's already in the current scope.
|
5987
6353
|
pm_constant_id_t constant_id = pm_parser_constant_id_token(parser, name);
|
5988
6354
|
|
5989
6355
|
if (pm_constant_id_list_includes(&parser->current_scope->locals, constant_id)) {
|
5990
|
-
|
6356
|
+
// Add an error if the parameter doesn't start with _ and has been seen before
|
6357
|
+
if ((name->start < name->end) && (*name->start != '_')) {
|
6358
|
+
pm_parser_err_token(parser, name, PM_ERR_PARAMETER_NAME_REPEAT);
|
6359
|
+
}
|
6360
|
+
return true;
|
5991
6361
|
}
|
6362
|
+
return false;
|
5992
6363
|
}
|
5993
6364
|
|
5994
6365
|
/**
|
@@ -6003,105 +6374,6 @@ pm_parser_scope_pop(pm_parser_t *parser) {
|
|
6003
6374
|
free(scope);
|
6004
6375
|
}
|
6005
6376
|
|
6006
|
-
/******************************************************************************/
|
6007
|
-
/* Basic character checks */
|
6008
|
-
/******************************************************************************/
|
6009
|
-
|
6010
|
-
/**
|
6011
|
-
* This function is used extremely frequently to lex all of the identifiers in a
|
6012
|
-
* source file, so it's important that it be as fast as possible. For this
|
6013
|
-
* reason we have the encoding_changed boolean to check if we need to go through
|
6014
|
-
* the function pointer or can just directly use the UTF-8 functions.
|
6015
|
-
*/
|
6016
|
-
static inline size_t
|
6017
|
-
char_is_identifier_start(pm_parser_t *parser, const uint8_t *b) {
|
6018
|
-
if (parser->encoding_changed) {
|
6019
|
-
size_t width;
|
6020
|
-
if ((width = parser->encoding->alpha_char(b, parser->end - b)) != 0) {
|
6021
|
-
return width;
|
6022
|
-
} else if (*b == '_') {
|
6023
|
-
return 1;
|
6024
|
-
} else if (*b >= 0x80) {
|
6025
|
-
return parser->encoding->char_width(b, parser->end - b);
|
6026
|
-
} else {
|
6027
|
-
return 0;
|
6028
|
-
}
|
6029
|
-
} else if (*b < 0x80) {
|
6030
|
-
return (pm_encoding_unicode_table[*b] & PRISM_ENCODING_ALPHABETIC_BIT ? 1 : 0) || (*b == '_');
|
6031
|
-
} else {
|
6032
|
-
return (size_t) (pm_encoding_utf_8_alpha_char(b, parser->end - b) || 1u);
|
6033
|
-
}
|
6034
|
-
}
|
6035
|
-
|
6036
|
-
/**
|
6037
|
-
* Similar to char_is_identifier but this function assumes that the encoding
|
6038
|
-
* has not been changed.
|
6039
|
-
*/
|
6040
|
-
static inline size_t
|
6041
|
-
char_is_identifier_utf8(const uint8_t *b, const uint8_t *end) {
|
6042
|
-
if (*b < 0x80) {
|
6043
|
-
return (*b == '_') || (pm_encoding_unicode_table[*b] & PRISM_ENCODING_ALPHANUMERIC_BIT ? 1 : 0);
|
6044
|
-
} else {
|
6045
|
-
return (size_t) (pm_encoding_utf_8_alnum_char(b, end - b) || 1u);
|
6046
|
-
}
|
6047
|
-
}
|
6048
|
-
|
6049
|
-
/**
|
6050
|
-
* Like the above, this function is also used extremely frequently to lex all of
|
6051
|
-
* the identifiers in a source file once the first character has been found. So
|
6052
|
-
* it's important that it be as fast as possible.
|
6053
|
-
*/
|
6054
|
-
static inline size_t
|
6055
|
-
char_is_identifier(pm_parser_t *parser, const uint8_t *b) {
|
6056
|
-
if (parser->encoding_changed) {
|
6057
|
-
size_t width;
|
6058
|
-
if ((width = parser->encoding->alnum_char(b, parser->end - b)) != 0) {
|
6059
|
-
return width;
|
6060
|
-
} else if (*b == '_') {
|
6061
|
-
return 1;
|
6062
|
-
} else if (*b >= 0x80) {
|
6063
|
-
return parser->encoding->char_width(b, parser->end - b);
|
6064
|
-
} else {
|
6065
|
-
return 0;
|
6066
|
-
}
|
6067
|
-
}
|
6068
|
-
return char_is_identifier_utf8(b, parser->end);
|
6069
|
-
}
|
6070
|
-
|
6071
|
-
// Here we're defining a perfect hash for the characters that are allowed in
|
6072
|
-
// global names. This is used to quickly check the next character after a $ to
|
6073
|
-
// see if it's a valid character for a global name.
|
6074
|
-
#define BIT(c, idx) (((c) / 32 - 1 == idx) ? (1U << ((c) % 32)) : 0)
|
6075
|
-
#define PUNCT(idx) ( \
|
6076
|
-
BIT('~', idx) | BIT('*', idx) | BIT('$', idx) | BIT('?', idx) | \
|
6077
|
-
BIT('!', idx) | BIT('@', idx) | BIT('/', idx) | BIT('\\', idx) | \
|
6078
|
-
BIT(';', idx) | BIT(',', idx) | BIT('.', idx) | BIT('=', idx) | \
|
6079
|
-
BIT(':', idx) | BIT('<', idx) | BIT('>', idx) | BIT('\"', idx) | \
|
6080
|
-
BIT('&', idx) | BIT('`', idx) | BIT('\'', idx) | BIT('+', idx) | \
|
6081
|
-
BIT('0', idx))
|
6082
|
-
|
6083
|
-
const unsigned int pm_global_name_punctuation_hash[(0x7e - 0x20 + 31) / 32] = { PUNCT(0), PUNCT(1), PUNCT(2) };
|
6084
|
-
|
6085
|
-
#undef BIT
|
6086
|
-
#undef PUNCT
|
6087
|
-
|
6088
|
-
static inline bool
|
6089
|
-
char_is_global_name_punctuation(const uint8_t b) {
|
6090
|
-
const unsigned int i = (const unsigned int) b;
|
6091
|
-
if (i <= 0x20 || 0x7e < i) return false;
|
6092
|
-
|
6093
|
-
return (pm_global_name_punctuation_hash[(i - 0x20) / 32] >> (i % 32)) & 1;
|
6094
|
-
}
|
6095
|
-
|
6096
|
-
static inline bool
|
6097
|
-
token_is_setter_name(pm_token_t *token) {
|
6098
|
-
return (
|
6099
|
-
(token->type == PM_TOKEN_IDENTIFIER) &&
|
6100
|
-
(token->end - token->start >= 2) &&
|
6101
|
-
(token->end[-1] == '=')
|
6102
|
-
);
|
6103
|
-
}
|
6104
|
-
|
6105
6377
|
/******************************************************************************/
|
6106
6378
|
/* Stack helpers */
|
6107
6379
|
/******************************************************************************/
|
@@ -6317,8 +6589,10 @@ parser_lex_magic_comment_encoding(pm_parser_t *parser) {
|
|
6317
6589
|
*/
|
6318
6590
|
static void
|
6319
6591
|
parser_lex_magic_comment_frozen_string_literal_value(pm_parser_t *parser, const uint8_t *start, const uint8_t *end) {
|
6320
|
-
if (start + 4 <= end && pm_strncasecmp(start, (const uint8_t *) "true", 4) == 0) {
|
6592
|
+
if ((start + 4 <= end) && pm_strncasecmp(start, (const uint8_t *) "true", 4) == 0) {
|
6321
6593
|
parser->frozen_string_literal = true;
|
6594
|
+
} else if ((start + 5 <= end) && pm_strncasecmp(start, (const uint8_t *) "false", 5) == 0) {
|
6595
|
+
parser->frozen_string_literal = false;
|
6322
6596
|
}
|
6323
6597
|
}
|
6324
6598
|
|
@@ -6541,21 +6815,27 @@ context_terminator(pm_context_t context, pm_token_t *token) {
|
|
6541
6815
|
return token->type == PM_TOKEN_BRACE_RIGHT;
|
6542
6816
|
case PM_CONTEXT_PREDICATE:
|
6543
6817
|
return token->type == PM_TOKEN_KEYWORD_THEN || token->type == PM_TOKEN_NEWLINE || token->type == PM_TOKEN_SEMICOLON;
|
6818
|
+
case PM_CONTEXT_NONE:
|
6819
|
+
return false;
|
6544
6820
|
}
|
6545
6821
|
|
6546
6822
|
return false;
|
6547
6823
|
}
|
6548
6824
|
|
6549
|
-
|
6550
|
-
|
6825
|
+
/**
|
6826
|
+
* Returns the context that the given token is found to be terminating, or
|
6827
|
+
* returns PM_CONTEXT_NONE.
|
6828
|
+
*/
|
6829
|
+
static pm_context_t
|
6830
|
+
context_recoverable(const pm_parser_t *parser, pm_token_t *token) {
|
6551
6831
|
pm_context_node_t *context_node = parser->current_context;
|
6552
6832
|
|
6553
6833
|
while (context_node != NULL) {
|
6554
|
-
if (context_terminator(context_node->context, token)) return
|
6834
|
+
if (context_terminator(context_node->context, token)) return context_node->context;
|
6555
6835
|
context_node = context_node->prev;
|
6556
6836
|
}
|
6557
6837
|
|
6558
|
-
return
|
6838
|
+
return PM_CONTEXT_NONE;
|
6559
6839
|
}
|
6560
6840
|
|
6561
6841
|
static bool
|
@@ -6583,7 +6863,7 @@ context_pop(pm_parser_t *parser) {
|
|
6583
6863
|
}
|
6584
6864
|
|
6585
6865
|
static bool
|
6586
|
-
context_p(pm_parser_t *parser, pm_context_t context) {
|
6866
|
+
context_p(const pm_parser_t *parser, pm_context_t context) {
|
6587
6867
|
pm_context_node_t *context_node = parser->current_context;
|
6588
6868
|
|
6589
6869
|
while (context_node != NULL) {
|
@@ -6595,7 +6875,7 @@ context_p(pm_parser_t *parser, pm_context_t context) {
|
|
6595
6875
|
}
|
6596
6876
|
|
6597
6877
|
static bool
|
6598
|
-
context_def_p(pm_parser_t *parser) {
|
6878
|
+
context_def_p(const pm_parser_t *parser) {
|
6599
6879
|
pm_context_node_t *context_node = parser->current_context;
|
6600
6880
|
|
6601
6881
|
while (context_node != NULL) {
|
@@ -6618,6 +6898,55 @@ context_def_p(pm_parser_t *parser) {
|
|
6618
6898
|
return false;
|
6619
6899
|
}
|
6620
6900
|
|
6901
|
+
/**
|
6902
|
+
* Returns a human readable string for the given context, used in error
|
6903
|
+
* messages.
|
6904
|
+
*/
|
6905
|
+
static const char *
|
6906
|
+
context_human(pm_context_t context) {
|
6907
|
+
switch (context) {
|
6908
|
+
case PM_CONTEXT_NONE:
|
6909
|
+
assert(false && "unreachable");
|
6910
|
+
return "";
|
6911
|
+
case PM_CONTEXT_BEGIN: return "begin statement";
|
6912
|
+
case PM_CONTEXT_BLOCK_BRACES: return "'{'..'}' block";
|
6913
|
+
case PM_CONTEXT_BLOCK_KEYWORDS: return "'do'..'end' block";
|
6914
|
+
case PM_CONTEXT_CASE_WHEN: return "'when' clause";
|
6915
|
+
case PM_CONTEXT_CASE_IN: return "'in' clause";
|
6916
|
+
case PM_CONTEXT_CLASS: return "class definition";
|
6917
|
+
case PM_CONTEXT_DEF: return "method definition";
|
6918
|
+
case PM_CONTEXT_DEF_PARAMS: return "method parameters";
|
6919
|
+
case PM_CONTEXT_DEFAULT_PARAMS: return "parameter default value";
|
6920
|
+
case PM_CONTEXT_ELSE: return "'else' clause";
|
6921
|
+
case PM_CONTEXT_ELSIF: return "'elsif' clause";
|
6922
|
+
case PM_CONTEXT_EMBEXPR: return "embedded expression";
|
6923
|
+
case PM_CONTEXT_ENSURE: return "'ensure' clause";
|
6924
|
+
case PM_CONTEXT_ENSURE_DEF: return "'ensure' clause";
|
6925
|
+
case PM_CONTEXT_FOR: return "for loop";
|
6926
|
+
case PM_CONTEXT_FOR_INDEX: return "for loop index";
|
6927
|
+
case PM_CONTEXT_IF: return "if statement";
|
6928
|
+
case PM_CONTEXT_LAMBDA_BRACES: return "'{'..'}' lambda block";
|
6929
|
+
case PM_CONTEXT_LAMBDA_DO_END: return "'do'..'end' lambda block";
|
6930
|
+
case PM_CONTEXT_MAIN: return "top level context";
|
6931
|
+
case PM_CONTEXT_MODULE: return "module definition";
|
6932
|
+
case PM_CONTEXT_PARENS: return "parentheses";
|
6933
|
+
case PM_CONTEXT_POSTEXE: return "'END' block";
|
6934
|
+
case PM_CONTEXT_PREDICATE: return "predicate";
|
6935
|
+
case PM_CONTEXT_PREEXE: return "'BEGIN' block";
|
6936
|
+
case PM_CONTEXT_RESCUE_ELSE: return "'else' clause";
|
6937
|
+
case PM_CONTEXT_RESCUE_ELSE_DEF: return "'else' clause";
|
6938
|
+
case PM_CONTEXT_RESCUE: return "'rescue' clause";
|
6939
|
+
case PM_CONTEXT_RESCUE_DEF: return "'rescue' clause";
|
6940
|
+
case PM_CONTEXT_SCLASS: return "singleton class definition";
|
6941
|
+
case PM_CONTEXT_UNLESS: return "unless statement";
|
6942
|
+
case PM_CONTEXT_UNTIL: return "until statement";
|
6943
|
+
case PM_CONTEXT_WHILE: return "while statement";
|
6944
|
+
}
|
6945
|
+
|
6946
|
+
assert(false && "unreachable");
|
6947
|
+
return "";
|
6948
|
+
}
|
6949
|
+
|
6621
6950
|
/******************************************************************************/
|
6622
6951
|
/* Specific token lexers */
|
6623
6952
|
/******************************************************************************/
|
@@ -7360,6 +7689,28 @@ escape_write_byte_encoded(pm_parser_t *parser, pm_buffer_t *buffer, uint8_t byte
|
|
7360
7689
|
pm_buffer_append_byte(buffer, byte);
|
7361
7690
|
}
|
7362
7691
|
|
7692
|
+
/**
|
7693
|
+
* Write each byte of the given escaped character into the buffer.
|
7694
|
+
*/
|
7695
|
+
static inline void
|
7696
|
+
escape_write_escape_encoded(pm_parser_t *parser, pm_buffer_t *buffer) {
|
7697
|
+
size_t width;
|
7698
|
+
if (parser->encoding_changed) {
|
7699
|
+
width = parser->encoding->char_width(parser->current.end, parser->end - parser->current.end);
|
7700
|
+
} else {
|
7701
|
+
width = pm_encoding_utf_8_char_width(parser->current.end, parser->end - parser->current.end);
|
7702
|
+
}
|
7703
|
+
|
7704
|
+
// TODO: If the character is invalid in the given encoding, then we'll just
|
7705
|
+
// push one byte into the buffer. This should actually be an error.
|
7706
|
+
width = (width == 0) ? 1 : width;
|
7707
|
+
|
7708
|
+
for (size_t index = 0; index < width; index++) {
|
7709
|
+
escape_write_byte_encoded(parser, buffer, *parser->current.end);
|
7710
|
+
parser->current.end++;
|
7711
|
+
}
|
7712
|
+
}
|
7713
|
+
|
7363
7714
|
/**
|
7364
7715
|
* The regular expression engine doesn't support the same escape sequences as
|
7365
7716
|
* Ruby does. So first we have to read the escape sequence, and then we have to
|
@@ -7698,7 +8049,7 @@ escape_read(pm_parser_t *parser, pm_buffer_t *buffer, uint8_t flags) {
|
|
7698
8049
|
/* fallthrough */
|
7699
8050
|
default: {
|
7700
8051
|
if (parser->current.end < parser->end) {
|
7701
|
-
|
8052
|
+
escape_write_escape_encoded(parser, buffer);
|
7702
8053
|
}
|
7703
8054
|
return;
|
7704
8055
|
}
|
@@ -7975,14 +8326,43 @@ typedef struct {
|
|
7975
8326
|
* Push the given byte into the token buffer.
|
7976
8327
|
*/
|
7977
8328
|
static inline void
|
7978
|
-
|
8329
|
+
pm_token_buffer_push_byte(pm_token_buffer_t *token_buffer, uint8_t byte) {
|
7979
8330
|
pm_buffer_append_byte(&token_buffer->buffer, byte);
|
7980
8331
|
}
|
7981
8332
|
|
8333
|
+
/**
|
8334
|
+
* Append the given bytes into the token buffer.
|
8335
|
+
*/
|
8336
|
+
static inline void
|
8337
|
+
pm_token_buffer_push_bytes(pm_token_buffer_t *token_buffer, const uint8_t *bytes, size_t length) {
|
8338
|
+
pm_buffer_append_bytes(&token_buffer->buffer, bytes, length);
|
8339
|
+
}
|
8340
|
+
|
8341
|
+
/**
|
8342
|
+
* Push an escaped character into the token buffer.
|
8343
|
+
*/
|
8344
|
+
static inline void
|
8345
|
+
pm_token_buffer_push_escaped(pm_token_buffer_t *token_buffer, pm_parser_t *parser) {
|
8346
|
+
// First, determine the width of the character to be escaped.
|
8347
|
+
size_t width;
|
8348
|
+
if (parser->encoding_changed) {
|
8349
|
+
width = parser->encoding->char_width(parser->current.end, parser->end - parser->current.end);
|
8350
|
+
} else {
|
8351
|
+
width = pm_encoding_utf_8_char_width(parser->current.end, parser->end - parser->current.end);
|
8352
|
+
}
|
8353
|
+
|
8354
|
+
// TODO: If the character is invalid in the given encoding, then we'll just
|
8355
|
+
// push one byte into the buffer. This should actually be an error.
|
8356
|
+
width = (width == 0 ? 1 : width);
|
8357
|
+
|
8358
|
+
// Now, push the bytes into the buffer.
|
8359
|
+
pm_token_buffer_push_bytes(token_buffer, parser->current.end, width);
|
8360
|
+
parser->current.end += width;
|
8361
|
+
}
|
8362
|
+
|
7982
8363
|
/**
|
7983
8364
|
* When we're about to return from lexing the current token and we know for sure
|
7984
8365
|
* that we have found an escape sequence, this function is called to copy the
|
7985
|
-
*
|
7986
8366
|
* contents of the token buffer into the current string on the parser so that it
|
7987
8367
|
* can be attached to the correct node.
|
7988
8368
|
*/
|
@@ -7997,7 +8377,6 @@ pm_token_buffer_copy(pm_parser_t *parser, pm_token_buffer_t *token_buffer) {
|
|
7997
8377
|
* string. If we haven't pushed anything into the buffer, this means that we
|
7998
8378
|
* never found an escape sequence, so we can directly reference the bounds of
|
7999
8379
|
* the current string. Either way, at the return of this function it is expected
|
8000
|
-
*
|
8001
8380
|
* that parser->current_string is established in such a way that it can be
|
8002
8381
|
* attached to a node.
|
8003
8382
|
*/
|
@@ -8016,7 +8395,6 @@ pm_token_buffer_flush(pm_parser_t *parser, pm_token_buffer_t *token_buffer) {
|
|
8016
8395
|
* point into the buffer because we're about to provide a string that has
|
8017
8396
|
* different content than a direct slice of the source.
|
8018
8397
|
*
|
8019
|
-
*
|
8020
8398
|
* It is expected that the parser's current token end will be pointing at one
|
8021
8399
|
* byte past the backslash that starts the escape sequence.
|
8022
8400
|
*/
|
@@ -8070,6 +8448,34 @@ pm_heredoc_strspn_inline_whitespace(pm_parser_t *parser, const uint8_t **cursor,
|
|
8070
8448
|
return whitespace;
|
8071
8449
|
}
|
8072
8450
|
|
8451
|
+
/**
|
8452
|
+
* Lex past the delimiter of a percent literal. Handle newlines and heredocs
|
8453
|
+
* appropriately.
|
8454
|
+
*/
|
8455
|
+
static uint8_t
|
8456
|
+
pm_lex_percent_delimiter(pm_parser_t *parser) {
|
8457
|
+
size_t eol_length = match_eol(parser);
|
8458
|
+
|
8459
|
+
if (eol_length) {
|
8460
|
+
if (parser->heredoc_end) {
|
8461
|
+
// If we have already lexed a heredoc, then the newline has already
|
8462
|
+
// been added to the list. In this case we want to just flush the
|
8463
|
+
// heredoc end.
|
8464
|
+
parser_flush_heredoc_end(parser);
|
8465
|
+
} else {
|
8466
|
+
// Otherwise, we'll add the newline to the list of newlines.
|
8467
|
+
pm_newline_list_append(&parser->newline_list, parser->current.end + eol_length - 1);
|
8468
|
+
}
|
8469
|
+
|
8470
|
+
const uint8_t delimiter = *parser->current.end;
|
8471
|
+
parser->current.end += eol_length;
|
8472
|
+
|
8473
|
+
return delimiter;
|
8474
|
+
}
|
8475
|
+
|
8476
|
+
return *parser->current.end++;
|
8477
|
+
}
|
8478
|
+
|
8073
8479
|
/**
|
8074
8480
|
* This is a convenience macro that will set the current token type, call the
|
8075
8481
|
* lex callback, and then return from the parser_lex function.
|
@@ -8635,7 +9041,7 @@ parser_lex(pm_parser_t *parser) {
|
|
8635
9041
|
// this is not a valid heredoc declaration. In this case we
|
8636
9042
|
// will add an error, but we will still return a heredoc
|
8637
9043
|
// start.
|
8638
|
-
pm_parser_err_current(parser,
|
9044
|
+
pm_parser_err_current(parser, PM_ERR_HEREDOC_TERM);
|
8639
9045
|
body_start = parser->end;
|
8640
9046
|
} else {
|
8641
9047
|
// Otherwise, we want to indicate that the body of the
|
@@ -8826,12 +9232,10 @@ parser_lex(pm_parser_t *parser) {
|
|
8826
9232
|
LEX(PM_TOKEN_PLUS_EQUAL);
|
8827
9233
|
}
|
8828
9234
|
|
8829
|
-
|
8830
|
-
|
8831
|
-
pm_parser_warn_token(parser, &parser->current, PM_WARN_AMBIGUOUS_FIRST_ARGUMENT_PLUS)
|
8832
|
-
|
8833
|
-
|
8834
|
-
if (lex_state_beg_p(parser) || spcarg) {
|
9235
|
+
if (
|
9236
|
+
lex_state_beg_p(parser) ||
|
9237
|
+
(lex_state_spcarg_p(parser, space_seen) ? (pm_parser_warn_token(parser, &parser->current, PM_WARN_AMBIGUOUS_FIRST_ARGUMENT_PLUS), true) : false)
|
9238
|
+
) {
|
8835
9239
|
lex_state_set(parser, PM_LEX_STATE_BEG);
|
8836
9240
|
|
8837
9241
|
if (pm_char_is_decimal_digit(peek(parser))) {
|
@@ -8871,11 +9275,12 @@ parser_lex(pm_parser_t *parser) {
|
|
8871
9275
|
}
|
8872
9276
|
|
8873
9277
|
bool spcarg = lex_state_spcarg_p(parser, space_seen);
|
8874
|
-
|
9278
|
+
bool is_beg = lex_state_beg_p(parser);
|
9279
|
+
if (!is_beg && spcarg) {
|
8875
9280
|
pm_parser_warn_token(parser, &parser->current, PM_WARN_AMBIGUOUS_FIRST_ARGUMENT_MINUS);
|
8876
9281
|
}
|
8877
9282
|
|
8878
|
-
if (
|
9283
|
+
if (is_beg || spcarg) {
|
8879
9284
|
lex_state_set(parser, PM_LEX_STATE_BEG);
|
8880
9285
|
LEX(pm_char_is_decimal_digit(peek(parser)) ? PM_TOKEN_UMINUS_NUM : PM_TOKEN_UMINUS);
|
8881
9286
|
}
|
@@ -9026,15 +9431,8 @@ parser_lex(pm_parser_t *parser) {
|
|
9026
9431
|
pm_parser_err_current(parser, PM_ERR_INVALID_PERCENT);
|
9027
9432
|
}
|
9028
9433
|
|
9029
|
-
|
9030
|
-
|
9031
|
-
size_t eol_length = match_eol(parser);
|
9032
|
-
if (eol_length) {
|
9033
|
-
parser->current.end += eol_length;
|
9034
|
-
pm_newline_list_append(&parser->newline_list, parser->current.end - 1);
|
9035
|
-
} else {
|
9036
|
-
parser->current.end++;
|
9037
|
-
}
|
9434
|
+
const uint8_t delimiter = pm_lex_percent_delimiter(parser);
|
9435
|
+
lex_mode_push_string(parser, true, false, lex_mode_incrementor(delimiter), lex_mode_terminator(delimiter));
|
9038
9436
|
|
9039
9437
|
if (parser->current.end < parser->end) {
|
9040
9438
|
LEX(PM_TOKEN_STRING_BEGIN);
|
@@ -9054,7 +9452,7 @@ parser_lex(pm_parser_t *parser) {
|
|
9054
9452
|
parser->current.end++;
|
9055
9453
|
|
9056
9454
|
if (parser->current.end < parser->end) {
|
9057
|
-
lex_mode_push_list(parser, false,
|
9455
|
+
lex_mode_push_list(parser, false, pm_lex_percent_delimiter(parser));
|
9058
9456
|
} else {
|
9059
9457
|
lex_mode_push_list_eof(parser);
|
9060
9458
|
}
|
@@ -9065,7 +9463,7 @@ parser_lex(pm_parser_t *parser) {
|
|
9065
9463
|
parser->current.end++;
|
9066
9464
|
|
9067
9465
|
if (parser->current.end < parser->end) {
|
9068
|
-
lex_mode_push_list(parser, true,
|
9466
|
+
lex_mode_push_list(parser, true, pm_lex_percent_delimiter(parser));
|
9069
9467
|
} else {
|
9070
9468
|
lex_mode_push_list_eof(parser);
|
9071
9469
|
}
|
@@ -9076,9 +9474,8 @@ parser_lex(pm_parser_t *parser) {
|
|
9076
9474
|
parser->current.end++;
|
9077
9475
|
|
9078
9476
|
if (parser->current.end < parser->end) {
|
9079
|
-
|
9080
|
-
|
9081
|
-
parser->current.end++;
|
9477
|
+
const uint8_t delimiter = pm_lex_percent_delimiter(parser);
|
9478
|
+
lex_mode_push_regexp(parser, lex_mode_incrementor(delimiter), lex_mode_terminator(delimiter));
|
9082
9479
|
} else {
|
9083
9480
|
lex_mode_push_regexp(parser, '\0', '\0');
|
9084
9481
|
}
|
@@ -9089,9 +9486,8 @@ parser_lex(pm_parser_t *parser) {
|
|
9089
9486
|
parser->current.end++;
|
9090
9487
|
|
9091
9488
|
if (parser->current.end < parser->end) {
|
9092
|
-
|
9093
|
-
|
9094
|
-
parser->current.end++;
|
9489
|
+
const uint8_t delimiter = pm_lex_percent_delimiter(parser);
|
9490
|
+
lex_mode_push_string(parser, false, false, lex_mode_incrementor(delimiter), lex_mode_terminator(delimiter));
|
9095
9491
|
} else {
|
9096
9492
|
lex_mode_push_string_eof(parser);
|
9097
9493
|
}
|
@@ -9102,9 +9498,8 @@ parser_lex(pm_parser_t *parser) {
|
|
9102
9498
|
parser->current.end++;
|
9103
9499
|
|
9104
9500
|
if (parser->current.end < parser->end) {
|
9105
|
-
|
9106
|
-
|
9107
|
-
parser->current.end++;
|
9501
|
+
const uint8_t delimiter = pm_lex_percent_delimiter(parser);
|
9502
|
+
lex_mode_push_string(parser, true, false, lex_mode_incrementor(delimiter), lex_mode_terminator(delimiter));
|
9108
9503
|
} else {
|
9109
9504
|
lex_mode_push_string_eof(parser);
|
9110
9505
|
}
|
@@ -9115,9 +9510,9 @@ parser_lex(pm_parser_t *parser) {
|
|
9115
9510
|
parser->current.end++;
|
9116
9511
|
|
9117
9512
|
if (parser->current.end < parser->end) {
|
9118
|
-
|
9513
|
+
const uint8_t delimiter = pm_lex_percent_delimiter(parser);
|
9514
|
+
lex_mode_push_string(parser, false, false, lex_mode_incrementor(delimiter), lex_mode_terminator(delimiter));
|
9119
9515
|
lex_state_set(parser, PM_LEX_STATE_FNAME | PM_LEX_STATE_FITEM);
|
9120
|
-
parser->current.end++;
|
9121
9516
|
} else {
|
9122
9517
|
lex_mode_push_string_eof(parser);
|
9123
9518
|
}
|
@@ -9128,7 +9523,7 @@ parser_lex(pm_parser_t *parser) {
|
|
9128
9523
|
parser->current.end++;
|
9129
9524
|
|
9130
9525
|
if (parser->current.end < parser->end) {
|
9131
|
-
lex_mode_push_list(parser, false,
|
9526
|
+
lex_mode_push_list(parser, false, pm_lex_percent_delimiter(parser));
|
9132
9527
|
} else {
|
9133
9528
|
lex_mode_push_list_eof(parser);
|
9134
9529
|
}
|
@@ -9139,7 +9534,7 @@ parser_lex(pm_parser_t *parser) {
|
|
9139
9534
|
parser->current.end++;
|
9140
9535
|
|
9141
9536
|
if (parser->current.end < parser->end) {
|
9142
|
-
lex_mode_push_list(parser, true,
|
9537
|
+
lex_mode_push_list(parser, true, pm_lex_percent_delimiter(parser));
|
9143
9538
|
} else {
|
9144
9539
|
lex_mode_push_list_eof(parser);
|
9145
9540
|
}
|
@@ -9150,8 +9545,8 @@ parser_lex(pm_parser_t *parser) {
|
|
9150
9545
|
parser->current.end++;
|
9151
9546
|
|
9152
9547
|
if (parser->current.end < parser->end) {
|
9153
|
-
|
9154
|
-
parser
|
9548
|
+
const uint8_t delimiter = pm_lex_percent_delimiter(parser);
|
9549
|
+
lex_mode_push_string(parser, true, false, lex_mode_incrementor(delimiter), lex_mode_terminator(delimiter));
|
9155
9550
|
} else {
|
9156
9551
|
lex_mode_push_string_eof(parser);
|
9157
9552
|
}
|
@@ -9377,18 +9772,18 @@ parser_lex(pm_parser_t *parser) {
|
|
9377
9772
|
case '\t':
|
9378
9773
|
case '\v':
|
9379
9774
|
case '\\':
|
9380
|
-
|
9775
|
+
pm_token_buffer_push_byte(&token_buffer, peeked);
|
9381
9776
|
parser->current.end++;
|
9382
9777
|
break;
|
9383
9778
|
case '\r':
|
9384
9779
|
parser->current.end++;
|
9385
9780
|
if (peek(parser) != '\n') {
|
9386
|
-
|
9781
|
+
pm_token_buffer_push_byte(&token_buffer, '\r');
|
9387
9782
|
break;
|
9388
9783
|
}
|
9389
9784
|
/* fallthrough */
|
9390
9785
|
case '\n':
|
9391
|
-
|
9786
|
+
pm_token_buffer_push_byte(&token_buffer, '\n');
|
9392
9787
|
|
9393
9788
|
if (parser->heredoc_end) {
|
9394
9789
|
// ... if we are on the same line as a heredoc,
|
@@ -9406,14 +9801,13 @@ parser_lex(pm_parser_t *parser) {
|
|
9406
9801
|
break;
|
9407
9802
|
default:
|
9408
9803
|
if (peeked == lex_mode->as.list.incrementor || peeked == lex_mode->as.list.terminator) {
|
9409
|
-
|
9804
|
+
pm_token_buffer_push_byte(&token_buffer, peeked);
|
9410
9805
|
parser->current.end++;
|
9411
9806
|
} else if (lex_mode->as.list.interpolation) {
|
9412
9807
|
escape_read(parser, &token_buffer.buffer, PM_ESCAPE_FLAG_NONE);
|
9413
9808
|
} else {
|
9414
|
-
|
9415
|
-
|
9416
|
-
parser->current.end++;
|
9809
|
+
pm_token_buffer_push_byte(&token_buffer, '\\');
|
9810
|
+
pm_token_buffer_push_escaped(&token_buffer, parser);
|
9417
9811
|
}
|
9418
9812
|
|
9419
9813
|
break;
|
@@ -9571,9 +9965,9 @@ parser_lex(pm_parser_t *parser) {
|
|
9571
9965
|
parser->current.end++;
|
9572
9966
|
if (peek(parser) != '\n') {
|
9573
9967
|
if (lex_mode->as.regexp.terminator != '\r') {
|
9574
|
-
|
9968
|
+
pm_token_buffer_push_byte(&token_buffer, '\\');
|
9575
9969
|
}
|
9576
|
-
|
9970
|
+
pm_token_buffer_push_byte(&token_buffer, '\r');
|
9577
9971
|
break;
|
9578
9972
|
}
|
9579
9973
|
/* fallthrough */
|
@@ -9608,20 +10002,19 @@ parser_lex(pm_parser_t *parser) {
|
|
9608
10002
|
case '$': case ')': case '*': case '+':
|
9609
10003
|
case '.': case '>': case '?': case ']':
|
9610
10004
|
case '^': case '|': case '}':
|
9611
|
-
|
10005
|
+
pm_token_buffer_push_byte(&token_buffer, '\\');
|
9612
10006
|
break;
|
9613
10007
|
default:
|
9614
10008
|
break;
|
9615
10009
|
}
|
9616
10010
|
|
9617
|
-
|
10011
|
+
pm_token_buffer_push_byte(&token_buffer, peeked);
|
9618
10012
|
parser->current.end++;
|
9619
10013
|
break;
|
9620
10014
|
}
|
9621
10015
|
|
9622
|
-
if (peeked < 0x80)
|
9623
|
-
|
9624
|
-
parser->current.end++;
|
10016
|
+
if (peeked < 0x80) pm_token_buffer_push_byte(&token_buffer, '\\');
|
10017
|
+
pm_token_buffer_push_escaped(&token_buffer, parser);
|
9625
10018
|
break;
|
9626
10019
|
}
|
9627
10020
|
|
@@ -9788,23 +10181,23 @@ parser_lex(pm_parser_t *parser) {
|
|
9788
10181
|
|
9789
10182
|
switch (peeked) {
|
9790
10183
|
case '\\':
|
9791
|
-
|
10184
|
+
pm_token_buffer_push_byte(&token_buffer, '\\');
|
9792
10185
|
parser->current.end++;
|
9793
10186
|
break;
|
9794
10187
|
case '\r':
|
9795
10188
|
parser->current.end++;
|
9796
10189
|
if (peek(parser) != '\n') {
|
9797
10190
|
if (!lex_mode->as.string.interpolation) {
|
9798
|
-
|
10191
|
+
pm_token_buffer_push_byte(&token_buffer, '\\');
|
9799
10192
|
}
|
9800
|
-
|
10193
|
+
pm_token_buffer_push_byte(&token_buffer, '\r');
|
9801
10194
|
break;
|
9802
10195
|
}
|
9803
10196
|
/* fallthrough */
|
9804
10197
|
case '\n':
|
9805
10198
|
if (!lex_mode->as.string.interpolation) {
|
9806
|
-
|
9807
|
-
|
10199
|
+
pm_token_buffer_push_byte(&token_buffer, '\\');
|
10200
|
+
pm_token_buffer_push_byte(&token_buffer, '\n');
|
9808
10201
|
}
|
9809
10202
|
|
9810
10203
|
if (parser->heredoc_end) {
|
@@ -9823,17 +10216,16 @@ parser_lex(pm_parser_t *parser) {
|
|
9823
10216
|
break;
|
9824
10217
|
default:
|
9825
10218
|
if (lex_mode->as.string.incrementor != '\0' && peeked == lex_mode->as.string.incrementor) {
|
9826
|
-
|
10219
|
+
pm_token_buffer_push_byte(&token_buffer, peeked);
|
9827
10220
|
parser->current.end++;
|
9828
10221
|
} else if (lex_mode->as.string.terminator != '\0' && peeked == lex_mode->as.string.terminator) {
|
9829
|
-
|
10222
|
+
pm_token_buffer_push_byte(&token_buffer, peeked);
|
9830
10223
|
parser->current.end++;
|
9831
10224
|
} else if (lex_mode->as.string.interpolation) {
|
9832
10225
|
escape_read(parser, &token_buffer.buffer, PM_ESCAPE_FLAG_NONE);
|
9833
10226
|
} else {
|
9834
|
-
|
9835
|
-
|
9836
|
-
parser->current.end++;
|
10227
|
+
pm_token_buffer_push_byte(&token_buffer, '\\');
|
10228
|
+
pm_token_buffer_push_escaped(&token_buffer, parser);
|
9837
10229
|
}
|
9838
10230
|
|
9839
10231
|
break;
|
@@ -9888,15 +10280,22 @@ parser_lex(pm_parser_t *parser) {
|
|
9888
10280
|
parser->next_start = NULL;
|
9889
10281
|
}
|
9890
10282
|
|
9891
|
-
//
|
9892
|
-
//
|
10283
|
+
// Now let's grab the information about the identifier off of the
|
10284
|
+
// current lex mode.
|
10285
|
+
pm_lex_mode_t *lex_mode = parser->lex_modes.current;
|
10286
|
+
|
10287
|
+
// We'll check if we're at the end of the file. If we are, then we
|
10288
|
+
// will add an error (because we weren't able to find the
|
10289
|
+
// terminator) but still continue parsing so that content after the
|
10290
|
+
// declaration of the heredoc can be parsed.
|
9893
10291
|
if (parser->current.end >= parser->end) {
|
9894
|
-
|
10292
|
+
pm_parser_err_current(parser, PM_ERR_HEREDOC_TERM);
|
10293
|
+
parser->next_start = lex_mode->as.heredoc.next_start;
|
10294
|
+
parser->heredoc_end = parser->current.end;
|
10295
|
+
lex_state_set(parser, PM_LEX_STATE_END);
|
10296
|
+
LEX(PM_TOKEN_HEREDOC_END);
|
9895
10297
|
}
|
9896
10298
|
|
9897
|
-
// Now let's grab the information about the identifier off of the current
|
9898
|
-
// lex mode.
|
9899
|
-
pm_lex_mode_t *lex_mode = parser->lex_modes.current;
|
9900
10299
|
const uint8_t *ident_start = lex_mode->as.heredoc.ident_start;
|
9901
10300
|
size_t ident_length = lex_mode->as.heredoc.ident_length;
|
9902
10301
|
|
@@ -10083,21 +10482,20 @@ parser_lex(pm_parser_t *parser) {
|
|
10083
10482
|
case '\r':
|
10084
10483
|
parser->current.end++;
|
10085
10484
|
if (peek(parser) != '\n') {
|
10086
|
-
|
10087
|
-
|
10485
|
+
pm_token_buffer_push_byte(&token_buffer, '\\');
|
10486
|
+
pm_token_buffer_push_byte(&token_buffer, '\r');
|
10088
10487
|
break;
|
10089
10488
|
}
|
10090
10489
|
/* fallthrough */
|
10091
10490
|
case '\n':
|
10092
|
-
|
10093
|
-
|
10491
|
+
pm_token_buffer_push_byte(&token_buffer, '\\');
|
10492
|
+
pm_token_buffer_push_byte(&token_buffer, '\n');
|
10094
10493
|
token_buffer.cursor = parser->current.end + 1;
|
10095
10494
|
breakpoint = parser->current.end;
|
10096
10495
|
continue;
|
10097
10496
|
default:
|
10098
|
-
|
10099
|
-
|
10100
|
-
pm_token_buffer_push(&token_buffer, peeked);
|
10497
|
+
pm_token_buffer_push_byte(&token_buffer, '\\');
|
10498
|
+
pm_token_buffer_push_escaped(&token_buffer, parser);
|
10101
10499
|
break;
|
10102
10500
|
}
|
10103
10501
|
} else {
|
@@ -10105,7 +10503,7 @@ parser_lex(pm_parser_t *parser) {
|
|
10105
10503
|
case '\r':
|
10106
10504
|
parser->current.end++;
|
10107
10505
|
if (peek(parser) != '\n') {
|
10108
|
-
|
10506
|
+
pm_token_buffer_push_byte(&token_buffer, '\r');
|
10109
10507
|
break;
|
10110
10508
|
}
|
10111
10509
|
/* fallthrough */
|
@@ -10184,8 +10582,8 @@ parser_lex(pm_parser_t *parser) {
|
|
10184
10582
|
typedef enum {
|
10185
10583
|
PM_BINDING_POWER_UNSET = 0, // used to indicate this token cannot be used as an infix operator
|
10186
10584
|
PM_BINDING_POWER_STATEMENT = 2,
|
10187
|
-
|
10188
|
-
|
10585
|
+
PM_BINDING_POWER_MODIFIER_RESCUE = 4, // rescue
|
10586
|
+
PM_BINDING_POWER_MODIFIER = 6, // if unless until while
|
10189
10587
|
PM_BINDING_POWER_COMPOSITION = 8, // and or
|
10190
10588
|
PM_BINDING_POWER_NOT = 10, // not
|
10191
10589
|
PM_BINDING_POWER_MATCH = 12, // => in
|
@@ -10239,15 +10637,15 @@ typedef struct {
|
|
10239
10637
|
#define RIGHT_ASSOCIATIVE_UNARY(precedence) { precedence, precedence, false, false }
|
10240
10638
|
|
10241
10639
|
pm_binding_powers_t pm_binding_powers[PM_TOKEN_MAXIMUM] = {
|
10640
|
+
// rescue
|
10641
|
+
[PM_TOKEN_KEYWORD_RESCUE_MODIFIER] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_MODIFIER_RESCUE),
|
10642
|
+
|
10242
10643
|
// if unless until while
|
10243
10644
|
[PM_TOKEN_KEYWORD_IF_MODIFIER] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_MODIFIER),
|
10244
10645
|
[PM_TOKEN_KEYWORD_UNLESS_MODIFIER] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_MODIFIER),
|
10245
10646
|
[PM_TOKEN_KEYWORD_UNTIL_MODIFIER] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_MODIFIER),
|
10246
10647
|
[PM_TOKEN_KEYWORD_WHILE_MODIFIER] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_MODIFIER),
|
10247
10648
|
|
10248
|
-
// rescue
|
10249
|
-
[PM_TOKEN_KEYWORD_RESCUE_MODIFIER] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_MODIFIER_RESCUE),
|
10250
|
-
|
10251
10649
|
// and or
|
10252
10650
|
[PM_TOKEN_KEYWORD_AND] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_COMPOSITION),
|
10253
10651
|
[PM_TOKEN_KEYWORD_OR] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_COMPOSITION),
|
@@ -10377,16 +10775,8 @@ match3(const pm_parser_t *parser, pm_token_type_t type1, pm_token_type_t type2,
|
|
10377
10775
|
* Returns true if the current token is any of the four given types.
|
10378
10776
|
*/
|
10379
10777
|
static inline bool
|
10380
|
-
match4(const pm_parser_t *parser, pm_token_type_t type1, pm_token_type_t type2, pm_token_type_t type3, pm_token_type_t type4) {
|
10381
|
-
return match1(parser, type1) || match1(parser, type2) || match1(parser, type3) || match1(parser, type4);
|
10382
|
-
}
|
10383
|
-
|
10384
|
-
/**
|
10385
|
-
* Returns true if the current token is any of the five given types.
|
10386
|
-
*/
|
10387
|
-
static inline bool
|
10388
|
-
match5(const pm_parser_t *parser, pm_token_type_t type1, pm_token_type_t type2, pm_token_type_t type3, pm_token_type_t type4, pm_token_type_t type5) {
|
10389
|
-
return match1(parser, type1) || match1(parser, type2) || match1(parser, type3) || match1(parser, type4) || match1(parser, type5);
|
10778
|
+
match4(const pm_parser_t *parser, pm_token_type_t type1, pm_token_type_t type2, pm_token_type_t type3, pm_token_type_t type4) {
|
10779
|
+
return match1(parser, type1) || match1(parser, type2) || match1(parser, type3) || match1(parser, type4);
|
10390
10780
|
}
|
10391
10781
|
|
10392
10782
|
/**
|
@@ -10866,7 +11256,7 @@ parse_write(pm_parser_t *parser, pm_node_t *target, pm_token_t *operator, pm_nod
|
|
10866
11256
|
return target;
|
10867
11257
|
}
|
10868
11258
|
|
10869
|
-
if (
|
11259
|
+
if (char_is_identifier_start(parser, call->message_loc.start)) {
|
10870
11260
|
// When we get here, we have a method call, because it was
|
10871
11261
|
// previously marked as a method call but now we have an =. This
|
10872
11262
|
// looks like:
|
@@ -10984,6 +11374,7 @@ parse_targets(pm_parser_t *parser, pm_node_t *first_target, pm_binding_power_t b
|
|
10984
11374
|
static pm_node_t *
|
10985
11375
|
parse_targets_validate(pm_parser_t *parser, pm_node_t *first_target, pm_binding_power_t binding_power) {
|
10986
11376
|
pm_node_t *result = parse_targets(parser, first_target, binding_power);
|
11377
|
+
accept1(parser, PM_TOKEN_NEWLINE);
|
10987
11378
|
|
10988
11379
|
// Ensure that we have either an = or a ) after the targets.
|
10989
11380
|
if (!match2(parser, PM_TOKEN_EQUAL, PM_TOKEN_PARENTHESIS_RIGHT)) {
|
@@ -11024,7 +11415,7 @@ parse_statements(pm_parser_t *parser, pm_context_t context) {
|
|
11024
11415
|
break;
|
11025
11416
|
}
|
11026
11417
|
|
11027
|
-
// If we have a terminator, then we will parse all
|
11418
|
+
// If we have a terminator, then we will parse all consecutive terminators
|
11028
11419
|
// and then continue parsing the statements list.
|
11029
11420
|
if (accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON)) {
|
11030
11421
|
// If we have a terminator, then we will continue parsing the statements
|
@@ -11084,8 +11475,9 @@ parse_assocs(pm_parser_t *parser, pm_node_t *node) {
|
|
11084
11475
|
|
11085
11476
|
if (token_begins_expression_p(parser->current.type)) {
|
11086
11477
|
value = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, false, PM_ERR_EXPECT_EXPRESSION_AFTER_SPLAT_HASH);
|
11087
|
-
}
|
11088
|
-
|
11478
|
+
}
|
11479
|
+
else {
|
11480
|
+
pm_parser_scope_forwarding_keywords_check(parser, &operator);
|
11089
11481
|
}
|
11090
11482
|
|
11091
11483
|
element = (pm_node_t *) pm_assoc_splat_node_create(parser, value, &operator);
|
@@ -11234,13 +11626,8 @@ parse_arguments(pm_parser_t *parser, pm_arguments_t *arguments, bool accepts_for
|
|
11234
11626
|
if (token_begins_expression_p(parser->current.type)) {
|
11235
11627
|
expression = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, false, PM_ERR_EXPECT_ARGUMENT);
|
11236
11628
|
} else {
|
11237
|
-
|
11238
|
-
|
11239
|
-
pm_constant_id_t ellipsis_id = pm_parser_constant_id_constant(parser, "...", 3);
|
11240
|
-
if (pm_parser_local_depth_constant_id(parser, ellipsis_id) == -1) {
|
11241
|
-
pm_parser_err_token(parser, &operator, PM_ERR_ARGUMENT_NO_FORWARDING_AMP);
|
11242
|
-
}
|
11243
|
-
}
|
11629
|
+
// A block forwarding in a method having `...` parameter (e.g. `def foo(...); bar(&); end`) is available.
|
11630
|
+
pm_parser_scope_forwarding_block_check(parser, &operator);
|
11244
11631
|
}
|
11245
11632
|
|
11246
11633
|
argument = (pm_node_t *) pm_block_argument_node_create(parser, &operator, expression);
|
@@ -11258,10 +11645,7 @@ parse_arguments(pm_parser_t *parser, pm_arguments_t *arguments, bool accepts_for
|
|
11258
11645
|
pm_token_t operator = parser->previous;
|
11259
11646
|
|
11260
11647
|
if (match4(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_TOKEN_COMMA, PM_TOKEN_SEMICOLON, PM_TOKEN_BRACKET_RIGHT)) {
|
11261
|
-
|
11262
|
-
pm_parser_err_token(parser, &operator, PM_ERR_ARGUMENT_NO_FORWARDING_STAR);
|
11263
|
-
}
|
11264
|
-
|
11648
|
+
pm_parser_scope_forwarding_positionals_check(parser, &operator);
|
11265
11649
|
argument = (pm_node_t *) pm_splat_node_create(parser, &operator, NULL);
|
11266
11650
|
} else {
|
11267
11651
|
pm_node_t *expression = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, false, PM_ERR_EXPECT_EXPRESSION_AFTER_SPLAT);
|
@@ -11287,15 +11671,14 @@ parse_arguments(pm_parser_t *parser, pm_arguments_t *arguments, bool accepts_for
|
|
11287
11671
|
pm_node_t *right = parse_expression(parser, PM_BINDING_POWER_RANGE, false, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR);
|
11288
11672
|
argument = (pm_node_t *) pm_range_node_create(parser, NULL, &operator, right);
|
11289
11673
|
} else {
|
11290
|
-
|
11291
|
-
pm_parser_err_previous(parser, PM_ERR_ARGUMENT_NO_FORWARDING_ELLIPSES);
|
11292
|
-
}
|
11674
|
+
pm_parser_scope_forwarding_all_check(parser, &parser->previous);
|
11293
11675
|
if (parsed_first_argument && terminator == PM_TOKEN_EOF) {
|
11294
11676
|
pm_parser_err_previous(parser, PM_ERR_ARGUMENT_FORWARDING_UNBOUND);
|
11295
11677
|
}
|
11296
11678
|
|
11297
11679
|
argument = (pm_node_t *) pm_forwarding_arguments_node_create(parser, &parser->previous);
|
11298
11680
|
parse_arguments_append(parser, arguments, argument);
|
11681
|
+
arguments->has_forwarding = true;
|
11299
11682
|
parsed_forwarding_arguments = true;
|
11300
11683
|
break;
|
11301
11684
|
}
|
@@ -11338,6 +11721,9 @@ parse_arguments(pm_parser_t *parser, pm_arguments_t *arguments, bool accepts_for
|
|
11338
11721
|
}
|
11339
11722
|
|
11340
11723
|
parsed_bare_hash = true;
|
11724
|
+
} else if (accept1(parser, PM_TOKEN_KEYWORD_IN)) {
|
11725
|
+
// TODO: Could we solve this with binding powers instead?
|
11726
|
+
pm_parser_err_current(parser, PM_ERR_ARGUMENT_IN);
|
11341
11727
|
}
|
11342
11728
|
|
11343
11729
|
parse_arguments_append(parser, arguments, argument);
|
@@ -11414,7 +11800,9 @@ parse_required_destructured_parameter(pm_parser_t *parser) {
|
|
11414
11800
|
if (accept1(parser, PM_TOKEN_IDENTIFIER)) {
|
11415
11801
|
pm_token_t name = parser->previous;
|
11416
11802
|
value = (pm_node_t *) pm_required_parameter_node_create(parser, &name);
|
11417
|
-
pm_parser_parameter_name_check(parser, &name)
|
11803
|
+
if (pm_parser_parameter_name_check(parser, &name)) {
|
11804
|
+
pm_node_flag_set_repeated_parameter(value);
|
11805
|
+
}
|
11418
11806
|
pm_parser_local_add_token(parser, &name);
|
11419
11807
|
}
|
11420
11808
|
|
@@ -11424,7 +11812,9 @@ parse_required_destructured_parameter(pm_parser_t *parser) {
|
|
11424
11812
|
pm_token_t name = parser->previous;
|
11425
11813
|
|
11426
11814
|
param = (pm_node_t *) pm_required_parameter_node_create(parser, &name);
|
11427
|
-
pm_parser_parameter_name_check(parser, &name)
|
11815
|
+
if (pm_parser_parameter_name_check(parser, &name)) {
|
11816
|
+
pm_node_flag_set_repeated_parameter(param);
|
11817
|
+
}
|
11428
11818
|
pm_parser_local_add_token(parser, &name);
|
11429
11819
|
}
|
11430
11820
|
|
@@ -11541,19 +11931,20 @@ parse_parameters(
|
|
11541
11931
|
pm_token_t operator = parser->previous;
|
11542
11932
|
pm_token_t name;
|
11543
11933
|
|
11934
|
+
bool repeated = false;
|
11544
11935
|
if (accept1(parser, PM_TOKEN_IDENTIFIER)) {
|
11545
11936
|
name = parser->previous;
|
11546
|
-
pm_parser_parameter_name_check(parser, &name);
|
11937
|
+
repeated = pm_parser_parameter_name_check(parser, &name);
|
11547
11938
|
pm_parser_local_add_token(parser, &name);
|
11548
11939
|
} else {
|
11549
11940
|
name = not_provided(parser);
|
11550
|
-
|
11551
|
-
if (allows_forwarding_parameters) {
|
11552
|
-
pm_parser_local_add_token(parser, &operator);
|
11553
|
-
}
|
11941
|
+
parser->current_scope->forwarding_params |= PM_FORWARDING_BLOCK;
|
11554
11942
|
}
|
11555
11943
|
|
11556
11944
|
pm_block_parameter_node_t *param = pm_block_parameter_node_create(parser, &name, &operator);
|
11945
|
+
if (repeated) {
|
11946
|
+
pm_node_flag_set_repeated_parameter((pm_node_t *)param);
|
11947
|
+
}
|
11557
11948
|
if (params->block == NULL) {
|
11558
11949
|
pm_parameters_node_block_set(params, param);
|
11559
11950
|
} else {
|
@@ -11572,9 +11963,8 @@ parse_parameters(
|
|
11572
11963
|
update_parameter_state(parser, &parser->current, &order);
|
11573
11964
|
parser_lex(parser);
|
11574
11965
|
|
11575
|
-
|
11576
|
-
|
11577
|
-
}
|
11966
|
+
parser->current_scope->forwarding_params |= PM_FORWARDING_BLOCK;
|
11967
|
+
parser->current_scope->forwarding_params |= PM_FORWARDING_ALL;
|
11578
11968
|
|
11579
11969
|
pm_forwarding_parameter_node_t *param = pm_forwarding_parameter_node_create(parser, &parser->previous);
|
11580
11970
|
if (params->keyword_rest != NULL) {
|
@@ -11626,20 +12016,23 @@ parse_parameters(
|
|
11626
12016
|
}
|
11627
12017
|
|
11628
12018
|
pm_token_t name = parser->previous;
|
11629
|
-
pm_parser_parameter_name_check(parser, &name);
|
12019
|
+
bool repeated = pm_parser_parameter_name_check(parser, &name);
|
11630
12020
|
pm_parser_local_add_token(parser, &name);
|
11631
12021
|
|
11632
12022
|
if (accept1(parser, PM_TOKEN_EQUAL)) {
|
11633
12023
|
pm_token_t operator = parser->previous;
|
11634
12024
|
context_push(parser, PM_CONTEXT_DEFAULT_PARAMS);
|
11635
|
-
|
11636
|
-
|
12025
|
+
|
12026
|
+
pm_constant_id_t saved_param_name = pm_parser_current_param_name_set(parser, pm_parser_constant_id_token(parser, &name));
|
11637
12027
|
pm_node_t *value = parse_value_expression(parser, binding_power, false, PM_ERR_PARAMETER_NO_DEFAULT);
|
11638
12028
|
|
11639
12029
|
pm_optional_parameter_node_t *param = pm_optional_parameter_node_create(parser, &name, &operator, value);
|
12030
|
+
if (repeated) {
|
12031
|
+
pm_node_flag_set_repeated_parameter((pm_node_t *)param);
|
12032
|
+
}
|
11640
12033
|
pm_parameters_node_optionals_append(params, param);
|
11641
12034
|
|
11642
|
-
parser
|
12035
|
+
pm_parser_current_param_name_restore(parser, saved_param_name);
|
11643
12036
|
context_pop(parser);
|
11644
12037
|
|
11645
12038
|
// If parsing the value of the parameter resulted in error recovery,
|
@@ -11651,9 +12044,15 @@ parse_parameters(
|
|
11651
12044
|
}
|
11652
12045
|
} else if (order > PM_PARAMETERS_ORDER_AFTER_OPTIONAL) {
|
11653
12046
|
pm_required_parameter_node_t *param = pm_required_parameter_node_create(parser, &name);
|
12047
|
+
if (repeated) {
|
12048
|
+
pm_node_flag_set_repeated_parameter((pm_node_t *)param);
|
12049
|
+
}
|
11654
12050
|
pm_parameters_node_requireds_append(params, (pm_node_t *) param);
|
11655
12051
|
} else {
|
11656
12052
|
pm_required_parameter_node_t *param = pm_required_parameter_node_create(parser, &name);
|
12053
|
+
if (repeated) {
|
12054
|
+
pm_node_flag_set_repeated_parameter((pm_node_t *)param);
|
12055
|
+
}
|
11657
12056
|
pm_parameters_node_posts_append(params, (pm_node_t *) param);
|
11658
12057
|
}
|
11659
12058
|
|
@@ -11668,7 +12067,7 @@ parse_parameters(
|
|
11668
12067
|
pm_token_t local = name;
|
11669
12068
|
local.end -= 1;
|
11670
12069
|
|
11671
|
-
pm_parser_parameter_name_check(parser, &local);
|
12070
|
+
bool repeated = pm_parser_parameter_name_check(parser, &local);
|
11672
12071
|
pm_parser_local_add_token(parser, &local);
|
11673
12072
|
|
11674
12073
|
switch (parser->current.type) {
|
@@ -11676,6 +12075,9 @@ parse_parameters(
|
|
11676
12075
|
case PM_TOKEN_PARENTHESIS_RIGHT:
|
11677
12076
|
case PM_TOKEN_PIPE: {
|
11678
12077
|
pm_node_t *param = (pm_node_t *) pm_required_keyword_parameter_node_create(parser, &name);
|
12078
|
+
if (repeated) {
|
12079
|
+
pm_node_flag_set_repeated_parameter(param);
|
12080
|
+
}
|
11679
12081
|
pm_parameters_node_keywords_append(params, param);
|
11680
12082
|
break;
|
11681
12083
|
}
|
@@ -11687,6 +12089,9 @@ parse_parameters(
|
|
11687
12089
|
}
|
11688
12090
|
|
11689
12091
|
pm_node_t *param = (pm_node_t *) pm_required_keyword_parameter_node_create(parser, &name);
|
12092
|
+
if (repeated) {
|
12093
|
+
pm_node_flag_set_repeated_parameter(param);
|
12094
|
+
}
|
11690
12095
|
pm_parameters_node_keywords_append(params, param);
|
11691
12096
|
break;
|
11692
12097
|
}
|
@@ -11695,17 +12100,22 @@ parse_parameters(
|
|
11695
12100
|
|
11696
12101
|
if (token_begins_expression_p(parser->current.type)) {
|
11697
12102
|
context_push(parser, PM_CONTEXT_DEFAULT_PARAMS);
|
11698
|
-
|
11699
|
-
|
12103
|
+
|
12104
|
+
pm_constant_id_t saved_param_name = pm_parser_current_param_name_set(parser, pm_parser_constant_id_token(parser, &local));
|
11700
12105
|
pm_node_t *value = parse_value_expression(parser, binding_power, false, PM_ERR_PARAMETER_NO_DEFAULT_KW);
|
11701
|
-
|
12106
|
+
|
12107
|
+
pm_parser_current_param_name_restore(parser, saved_param_name);
|
11702
12108
|
context_pop(parser);
|
12109
|
+
|
11703
12110
|
param = (pm_node_t *) pm_optional_keyword_parameter_node_create(parser, &name, value);
|
11704
12111
|
}
|
11705
12112
|
else {
|
11706
12113
|
param = (pm_node_t *) pm_required_keyword_parameter_node_create(parser, &name);
|
11707
12114
|
}
|
11708
12115
|
|
12116
|
+
if (repeated) {
|
12117
|
+
pm_node_flag_set_repeated_parameter(param);
|
12118
|
+
}
|
11709
12119
|
pm_parameters_node_keywords_append(params, param);
|
11710
12120
|
|
11711
12121
|
// If parsing the value of the parameter resulted in error recovery,
|
@@ -11728,20 +12138,21 @@ parse_parameters(
|
|
11728
12138
|
|
11729
12139
|
pm_token_t operator = parser->previous;
|
11730
12140
|
pm_token_t name;
|
11731
|
-
|
12141
|
+
bool repeated = false;
|
11732
12142
|
if (accept1(parser, PM_TOKEN_IDENTIFIER)) {
|
11733
12143
|
name = parser->previous;
|
11734
|
-
pm_parser_parameter_name_check(parser, &name);
|
12144
|
+
repeated = pm_parser_parameter_name_check(parser, &name);
|
11735
12145
|
pm_parser_local_add_token(parser, &name);
|
11736
12146
|
} else {
|
11737
12147
|
name = not_provided(parser);
|
11738
12148
|
|
11739
|
-
|
11740
|
-
pm_parser_local_add_token(parser, &operator);
|
11741
|
-
}
|
12149
|
+
parser->current_scope->forwarding_params |= PM_FORWARDING_POSITIONALS;
|
11742
12150
|
}
|
11743
12151
|
|
11744
12152
|
pm_node_t *param = (pm_node_t *) pm_rest_parameter_node_create(parser, &operator, &name);
|
12153
|
+
if (repeated) {
|
12154
|
+
pm_node_flag_set_repeated_parameter(param);
|
12155
|
+
}
|
11745
12156
|
if (params->rest == NULL) {
|
11746
12157
|
pm_parameters_node_rest_set(params, param);
|
11747
12158
|
} else {
|
@@ -11764,19 +12175,21 @@ parse_parameters(
|
|
11764
12175
|
} else {
|
11765
12176
|
pm_token_t name;
|
11766
12177
|
|
12178
|
+
bool repeated = false;
|
11767
12179
|
if (accept1(parser, PM_TOKEN_IDENTIFIER)) {
|
11768
12180
|
name = parser->previous;
|
11769
|
-
pm_parser_parameter_name_check(parser, &name);
|
12181
|
+
repeated = pm_parser_parameter_name_check(parser, &name);
|
11770
12182
|
pm_parser_local_add_token(parser, &name);
|
11771
12183
|
} else {
|
11772
12184
|
name = not_provided(parser);
|
11773
12185
|
|
11774
|
-
|
11775
|
-
pm_parser_local_add_token(parser, &operator);
|
11776
|
-
}
|
12186
|
+
parser->current_scope->forwarding_params |= PM_FORWARDING_KEYWORDS;
|
11777
12187
|
}
|
11778
12188
|
|
11779
12189
|
param = (pm_node_t *) pm_keyword_rest_parameter_node_create(parser, &operator, &name);
|
12190
|
+
if (repeated) {
|
12191
|
+
pm_node_flag_set_repeated_parameter(param);
|
12192
|
+
}
|
11780
12193
|
}
|
11781
12194
|
|
11782
12195
|
if (params->keyword_rest == NULL) {
|
@@ -12012,10 +12425,13 @@ parse_block_parameters(
|
|
12012
12425
|
if ((opening->type != PM_TOKEN_NOT_PROVIDED) && accept1(parser, PM_TOKEN_SEMICOLON)) {
|
12013
12426
|
do {
|
12014
12427
|
expect1(parser, PM_TOKEN_IDENTIFIER, PM_ERR_BLOCK_PARAM_LOCAL_VARIABLE);
|
12015
|
-
pm_parser_parameter_name_check(parser, &parser->previous);
|
12428
|
+
bool repeated = pm_parser_parameter_name_check(parser, &parser->previous);
|
12016
12429
|
pm_parser_local_add_token(parser, &parser->previous);
|
12017
12430
|
|
12018
12431
|
pm_block_local_variable_node_t *local = pm_block_local_variable_node_create(parser, &parser->previous);
|
12432
|
+
if (repeated) {
|
12433
|
+
pm_node_flag_set_repeated_parameter((pm_node_t *)local);
|
12434
|
+
}
|
12019
12435
|
pm_block_parameters_node_append_local(block_parameters, local);
|
12020
12436
|
} while (accept1(parser, PM_TOKEN_COMMA));
|
12021
12437
|
}
|
@@ -12031,8 +12447,10 @@ parse_block(pm_parser_t *parser) {
|
|
12031
12447
|
pm_token_t opening = parser->previous;
|
12032
12448
|
accept1(parser, PM_TOKEN_NEWLINE);
|
12033
12449
|
|
12450
|
+
pm_constant_id_t saved_param_name = pm_parser_current_param_name_unset(parser);
|
12034
12451
|
pm_accepts_block_stack_push(parser, true);
|
12035
12452
|
pm_parser_scope_push(parser, false);
|
12453
|
+
|
12036
12454
|
pm_block_parameters_node_t *block_parameters = NULL;
|
12037
12455
|
|
12038
12456
|
if (accept1(parser, PM_TOKEN_PIPE)) {
|
@@ -12053,12 +12471,6 @@ parse_block(pm_parser_t *parser) {
|
|
12053
12471
|
pm_block_parameters_node_closing_set(block_parameters, &parser->previous);
|
12054
12472
|
}
|
12055
12473
|
|
12056
|
-
uint32_t locals_body_index = 0;
|
12057
|
-
|
12058
|
-
if (block_parameters) {
|
12059
|
-
locals_body_index = (uint32_t) parser->current_scope->locals.size;
|
12060
|
-
}
|
12061
|
-
|
12062
12474
|
accept1(parser, PM_TOKEN_NEWLINE);
|
12063
12475
|
pm_node_t *statements = NULL;
|
12064
12476
|
|
@@ -12090,13 +12502,14 @@ parse_block(pm_parser_t *parser) {
|
|
12090
12502
|
|
12091
12503
|
if (parameters == NULL && (maximum > 0)) {
|
12092
12504
|
parameters = (pm_node_t *) pm_numbered_parameters_node_create(parser, &(pm_location_t) { .start = opening.start, .end = parser->previous.end }, maximum);
|
12093
|
-
locals_body_index = maximum;
|
12094
12505
|
}
|
12095
12506
|
|
12096
12507
|
pm_constant_id_list_t locals = parser->current_scope->locals;
|
12097
12508
|
pm_parser_scope_pop(parser);
|
12098
12509
|
pm_accepts_block_stack_pop(parser);
|
12099
|
-
|
12510
|
+
pm_parser_current_param_name_restore(parser, saved_param_name);
|
12511
|
+
|
12512
|
+
return pm_block_node_create(parser, &locals, &opening, parameters, statements, &parser->previous);
|
12100
12513
|
}
|
12101
12514
|
|
12102
12515
|
/**
|
@@ -12157,14 +12570,20 @@ parse_arguments_list(pm_parser_t *parser, pm_arguments_t *arguments, bool accept
|
|
12157
12570
|
}
|
12158
12571
|
|
12159
12572
|
if (block != NULL) {
|
12160
|
-
if (arguments->block == NULL) {
|
12573
|
+
if (arguments->block == NULL && !arguments->has_forwarding) {
|
12161
12574
|
arguments->block = (pm_node_t *) block;
|
12162
12575
|
} else {
|
12163
|
-
|
12164
|
-
|
12165
|
-
|
12576
|
+
if (arguments->has_forwarding) {
|
12577
|
+
pm_parser_err_node(parser, (pm_node_t *) block, PM_ERR_ARGUMENT_BLOCK_FORWARDING);
|
12578
|
+
} else {
|
12579
|
+
pm_parser_err_node(parser, (pm_node_t *) block, PM_ERR_ARGUMENT_BLOCK_MULTI);
|
12580
|
+
}
|
12581
|
+
if (arguments->block != NULL) {
|
12582
|
+
if (arguments->arguments == NULL) {
|
12583
|
+
arguments->arguments = pm_arguments_node_create(parser);
|
12584
|
+
}
|
12585
|
+
pm_arguments_node_arguments_append(arguments->arguments, arguments->block);
|
12166
12586
|
}
|
12167
|
-
pm_arguments_node_arguments_append(arguments->arguments, arguments->block);
|
12168
12587
|
arguments->block = (pm_node_t *) block;
|
12169
12588
|
}
|
12170
12589
|
}
|
@@ -12384,8 +12803,14 @@ static inline pm_node_flags_t
|
|
12384
12803
|
parse_unescaped_encoding(const pm_parser_t *parser) {
|
12385
12804
|
if (parser->explicit_encoding != NULL) {
|
12386
12805
|
if (parser->explicit_encoding == PM_ENCODING_UTF_8_ENTRY) {
|
12806
|
+
// If the there's an explicit encoding and it's using a UTF-8 escape
|
12807
|
+
// sequence, then mark the string as UTF-8.
|
12387
12808
|
return PM_STRING_FLAGS_FORCED_UTF8_ENCODING;
|
12388
12809
|
} else if (parser->encoding == PM_ENCODING_US_ASCII_ENTRY) {
|
12810
|
+
// If there's a non-UTF-8 escape sequence being used, then the
|
12811
|
+
// string uses the source encoding, unless the source is marked as
|
12812
|
+
// US-ASCII. In that case the string is forced as ASCII-8BIT in
|
12813
|
+
// order to keep the string valid.
|
12389
12814
|
return PM_STRING_FLAGS_FORCED_BINARY_ENCODING;
|
12390
12815
|
}
|
12391
12816
|
}
|
@@ -12509,14 +12934,54 @@ parse_string_part(pm_parser_t *parser) {
|
|
12509
12934
|
}
|
12510
12935
|
}
|
12511
12936
|
|
12937
|
+
/**
|
12938
|
+
* When creating a symbol, unary operators that cannot be binary operators
|
12939
|
+
* automatically drop trailing `@` characters. This happens at the parser level,
|
12940
|
+
* such that `~@` is parsed as `~` and `!@` is parsed as `!`. We do that here.
|
12941
|
+
*/
|
12942
|
+
static const uint8_t *
|
12943
|
+
parse_operator_symbol_name(const pm_token_t *name) {
|
12944
|
+
switch (name->type) {
|
12945
|
+
case PM_TOKEN_TILDE:
|
12946
|
+
case PM_TOKEN_BANG:
|
12947
|
+
if (name->end[-1] == '@') return name->end - 1;
|
12948
|
+
/* fallthrough */
|
12949
|
+
default:
|
12950
|
+
return name->end;
|
12951
|
+
}
|
12952
|
+
}
|
12953
|
+
|
12954
|
+
static pm_node_t *
|
12955
|
+
parse_operator_symbol(pm_parser_t *parser, const pm_token_t *opening, pm_lex_state_t next_state) {
|
12956
|
+
pm_token_t closing = not_provided(parser);
|
12957
|
+
pm_symbol_node_t *symbol = pm_symbol_node_create(parser, opening, &parser->current, &closing);
|
12958
|
+
|
12959
|
+
const uint8_t *end = parse_operator_symbol_name(&parser->current);
|
12960
|
+
|
12961
|
+
if (next_state != PM_LEX_STATE_NONE) lex_state_set(parser, next_state);
|
12962
|
+
parser_lex(parser);
|
12963
|
+
|
12964
|
+
pm_string_shared_init(&symbol->unescaped, parser->previous.start, end);
|
12965
|
+
pm_node_flag_set((pm_node_t *) symbol, PM_SYMBOL_FLAGS_FORCED_US_ASCII_ENCODING);
|
12966
|
+
|
12967
|
+
return (pm_node_t *) symbol;
|
12968
|
+
}
|
12969
|
+
|
12970
|
+
/**
|
12971
|
+
* Parse a symbol node. This function will get called immediately after finding
|
12972
|
+
* a symbol opening token. This handles parsing bare symbols and interpolated
|
12973
|
+
* symbols.
|
12974
|
+
*/
|
12512
12975
|
static pm_node_t *
|
12513
12976
|
parse_symbol(pm_parser_t *parser, pm_lex_mode_t *lex_mode, pm_lex_state_t next_state) {
|
12514
|
-
pm_token_t opening = parser->previous;
|
12977
|
+
const pm_token_t opening = parser->previous;
|
12515
12978
|
|
12516
12979
|
if (lex_mode->mode != PM_LEX_STRING) {
|
12517
12980
|
if (next_state != PM_LEX_STATE_NONE) lex_state_set(parser, next_state);
|
12518
12981
|
|
12519
12982
|
switch (parser->current.type) {
|
12983
|
+
case PM_CASE_OPERATOR:
|
12984
|
+
return parse_operator_symbol(parser, &opening, next_state == PM_LEX_STATE_NONE ? PM_LEX_STATE_ENDFN : next_state);
|
12520
12985
|
case PM_TOKEN_IDENTIFIER:
|
12521
12986
|
case PM_TOKEN_CONSTANT:
|
12522
12987
|
case PM_TOKEN_INSTANCE_VARIABLE:
|
@@ -12528,10 +12993,6 @@ parse_symbol(pm_parser_t *parser, pm_lex_mode_t *lex_mode, pm_lex_state_t next_s
|
|
12528
12993
|
case PM_CASE_KEYWORD:
|
12529
12994
|
parser_lex(parser);
|
12530
12995
|
break;
|
12531
|
-
case PM_CASE_OPERATOR:
|
12532
|
-
lex_state_set(parser, next_state == PM_LEX_STATE_NONE ? PM_LEX_STATE_ENDFN : next_state);
|
12533
|
-
parser_lex(parser);
|
12534
|
-
break;
|
12535
12996
|
default:
|
12536
12997
|
expect2(parser, PM_TOKEN_IDENTIFIER, PM_TOKEN_METHOD_NAME, PM_ERR_SYMBOL_INVALID);
|
12537
12998
|
break;
|
@@ -12541,6 +13002,8 @@ parse_symbol(pm_parser_t *parser, pm_lex_mode_t *lex_mode, pm_lex_state_t next_s
|
|
12541
13002
|
pm_symbol_node_t *symbol = pm_symbol_node_create(parser, &opening, &parser->previous, &closing);
|
12542
13003
|
|
12543
13004
|
pm_string_shared_init(&symbol->unescaped, parser->previous.start, parser->previous.end);
|
13005
|
+
pm_node_flag_set((pm_node_t *) symbol, parse_symbol_encoding(parser, &symbol->unescaped));
|
13006
|
+
|
12544
13007
|
return (pm_node_t *) symbol;
|
12545
13008
|
}
|
12546
13009
|
|
@@ -12637,7 +13100,8 @@ parse_symbol(pm_parser_t *parser, pm_lex_mode_t *lex_mode, pm_lex_state_t next_s
|
|
12637
13100
|
} else {
|
12638
13101
|
expect1(parser, PM_TOKEN_STRING_END, PM_ERR_SYMBOL_TERM_DYNAMIC);
|
12639
13102
|
}
|
12640
|
-
|
13103
|
+
|
13104
|
+
return (pm_node_t *) pm_symbol_node_create_unescaped(parser, &opening, &content, &parser->previous, &unescaped, parse_symbol_encoding(parser, &unescaped));
|
12641
13105
|
}
|
12642
13106
|
|
12643
13107
|
/**
|
@@ -12647,8 +13111,11 @@ parse_symbol(pm_parser_t *parser, pm_lex_mode_t *lex_mode, pm_lex_state_t next_s
|
|
12647
13111
|
static inline pm_node_t *
|
12648
13112
|
parse_undef_argument(pm_parser_t *parser) {
|
12649
13113
|
switch (parser->current.type) {
|
13114
|
+
case PM_CASE_OPERATOR: {
|
13115
|
+
const pm_token_t opening = not_provided(parser);
|
13116
|
+
return parse_operator_symbol(parser, &opening, PM_LEX_STATE_NONE);
|
13117
|
+
}
|
12650
13118
|
case PM_CASE_KEYWORD:
|
12651
|
-
case PM_CASE_OPERATOR:
|
12652
13119
|
case PM_TOKEN_CONSTANT:
|
12653
13120
|
case PM_TOKEN_IDENTIFIER:
|
12654
13121
|
case PM_TOKEN_METHOD_NAME: {
|
@@ -12659,6 +13126,8 @@ parse_undef_argument(pm_parser_t *parser) {
|
|
12659
13126
|
pm_symbol_node_t *symbol = pm_symbol_node_create(parser, &opening, &parser->previous, &closing);
|
12660
13127
|
|
12661
13128
|
pm_string_shared_init(&symbol->unescaped, parser->previous.start, parser->previous.end);
|
13129
|
+
pm_node_flag_set((pm_node_t *) symbol, parse_symbol_encoding(parser, &symbol->unescaped));
|
13130
|
+
|
12662
13131
|
return (pm_node_t *) symbol;
|
12663
13132
|
}
|
12664
13133
|
case PM_TOKEN_SYMBOL_BEGIN: {
|
@@ -12682,21 +13151,24 @@ parse_undef_argument(pm_parser_t *parser) {
|
|
12682
13151
|
static inline pm_node_t *
|
12683
13152
|
parse_alias_argument(pm_parser_t *parser, bool first) {
|
12684
13153
|
switch (parser->current.type) {
|
12685
|
-
case PM_CASE_OPERATOR:
|
13154
|
+
case PM_CASE_OPERATOR: {
|
13155
|
+
const pm_token_t opening = not_provided(parser);
|
13156
|
+
return parse_operator_symbol(parser, &opening, first ? PM_LEX_STATE_FNAME | PM_LEX_STATE_FITEM : PM_LEX_STATE_NONE);
|
13157
|
+
}
|
12686
13158
|
case PM_CASE_KEYWORD:
|
12687
13159
|
case PM_TOKEN_CONSTANT:
|
12688
13160
|
case PM_TOKEN_IDENTIFIER:
|
12689
13161
|
case PM_TOKEN_METHOD_NAME: {
|
12690
|
-
if (first)
|
12691
|
-
lex_state_set(parser, PM_LEX_STATE_FNAME | PM_LEX_STATE_FITEM);
|
12692
|
-
}
|
12693
|
-
|
13162
|
+
if (first) lex_state_set(parser, PM_LEX_STATE_FNAME | PM_LEX_STATE_FITEM);
|
12694
13163
|
parser_lex(parser);
|
13164
|
+
|
12695
13165
|
pm_token_t opening = not_provided(parser);
|
12696
13166
|
pm_token_t closing = not_provided(parser);
|
12697
13167
|
pm_symbol_node_t *symbol = pm_symbol_node_create(parser, &opening, &parser->previous, &closing);
|
12698
13168
|
|
12699
13169
|
pm_string_shared_init(&symbol->unescaped, parser->previous.start, parser->previous.end);
|
13170
|
+
pm_node_flag_set((pm_node_t *) symbol, parse_symbol_encoding(parser, &symbol->unescaped));
|
13171
|
+
|
12700
13172
|
return (pm_node_t *) symbol;
|
12701
13173
|
}
|
12702
13174
|
case PM_TOKEN_SYMBOL_BEGIN: {
|
@@ -12733,6 +13205,64 @@ outer_scope_using_numbered_parameters_p(pm_parser_t *parser) {
|
|
12733
13205
|
return false;
|
12734
13206
|
}
|
12735
13207
|
|
13208
|
+
/**
|
13209
|
+
* These are the names of the various numbered parameters. We have them here so
|
13210
|
+
* that when we insert them into the constant pool we can use a constant string
|
13211
|
+
* and not have to allocate.
|
13212
|
+
*/
|
13213
|
+
static const char * const pm_numbered_parameter_names[] = {
|
13214
|
+
"_1", "_2", "_3", "_4", "_5", "_6", "_7", "_8", "_9"
|
13215
|
+
};
|
13216
|
+
|
13217
|
+
/**
|
13218
|
+
* Parse an identifier into either a local variable read. If the local variable
|
13219
|
+
* is not found, it returns NULL instead.
|
13220
|
+
*/
|
13221
|
+
static pm_local_variable_read_node_t *
|
13222
|
+
parse_variable(pm_parser_t *parser) {
|
13223
|
+
int depth;
|
13224
|
+
if ((depth = pm_parser_local_depth(parser, &parser->previous)) != -1) {
|
13225
|
+
return pm_local_variable_read_node_create(parser, &parser->previous, (uint32_t) depth);
|
13226
|
+
}
|
13227
|
+
|
13228
|
+
if (!parser->current_scope->closed && pm_token_is_numbered_parameter(parser->previous.start, parser->previous.end)) {
|
13229
|
+
// Now that we know we have a numbered parameter, we need to check
|
13230
|
+
// if it's allowed in this context. If it is, then we will create a
|
13231
|
+
// local variable read. If it's not, then we'll create a normal call
|
13232
|
+
// node but add an error.
|
13233
|
+
if (parser->current_scope->explicit_params) {
|
13234
|
+
pm_parser_err_previous(parser, PM_ERR_NUMBERED_PARAMETER_NOT_ALLOWED);
|
13235
|
+
} else if (outer_scope_using_numbered_parameters_p(parser)) {
|
13236
|
+
pm_parser_err_previous(parser, PM_ERR_NUMBERED_PARAMETER_OUTER_SCOPE);
|
13237
|
+
} else {
|
13238
|
+
// Indicate that this scope is using numbered params so that child
|
13239
|
+
// scopes cannot. We subtract the value for the character '0' to get
|
13240
|
+
// the actual integer value of the number (only _1 through _9 are
|
13241
|
+
// valid).
|
13242
|
+
uint8_t numbered_parameters = (uint8_t) (parser->previous.start[1] - '0');
|
13243
|
+
if (numbered_parameters > parser->current_scope->numbered_parameters) {
|
13244
|
+
parser->current_scope->numbered_parameters = numbered_parameters;
|
13245
|
+
pm_parser_numbered_parameters_set(parser, numbered_parameters);
|
13246
|
+
}
|
13247
|
+
|
13248
|
+
// When you use a numbered parameter, it implies the existence
|
13249
|
+
// of all of the locals that exist before it. For example,
|
13250
|
+
// referencing _2 means that _1 must exist. Therefore here we
|
13251
|
+
// loop through all of the possibilities and add them into the
|
13252
|
+
// constant pool.
|
13253
|
+
for (uint8_t numbered_parameter = 1; numbered_parameter <= numbered_parameters - 1; numbered_parameter++) {
|
13254
|
+
pm_parser_local_add_constant(parser, pm_numbered_parameter_names[numbered_parameter - 1], 2);
|
13255
|
+
}
|
13256
|
+
|
13257
|
+
// Finally we can create the local variable read node.
|
13258
|
+
pm_constant_id_t name_id = pm_parser_local_add_constant(parser, pm_numbered_parameter_names[numbered_parameters - 1], 2);
|
13259
|
+
return pm_local_variable_read_node_create_constant_id(parser, &parser->previous, name_id, 0);
|
13260
|
+
}
|
13261
|
+
}
|
13262
|
+
|
13263
|
+
return NULL;
|
13264
|
+
}
|
13265
|
+
|
12736
13266
|
/**
|
12737
13267
|
* Parse an identifier into either a local variable read or a call.
|
12738
13268
|
*/
|
@@ -12741,56 +13271,8 @@ parse_variable_call(pm_parser_t *parser) {
|
|
12741
13271
|
pm_node_flags_t flags = 0;
|
12742
13272
|
|
12743
13273
|
if (!match1(parser, PM_TOKEN_PARENTHESIS_LEFT) && (parser->previous.end[-1] != '!') && (parser->previous.end[-1] != '?')) {
|
12744
|
-
|
12745
|
-
if (
|
12746
|
-
return (pm_node_t *) pm_local_variable_read_node_create(parser, &parser->previous, (uint32_t) depth);
|
12747
|
-
}
|
12748
|
-
|
12749
|
-
if (!parser->current_scope->closed && pm_token_is_numbered_parameter(parser->previous.start, parser->previous.end)) {
|
12750
|
-
// Now that we know we have a numbered parameter, we need to check
|
12751
|
-
// if it's allowed in this context. If it is, then we will create a
|
12752
|
-
// local variable read. If it's not, then we'll create a normal call
|
12753
|
-
// node but add an error.
|
12754
|
-
if (parser->current_scope->explicit_params) {
|
12755
|
-
pm_parser_err_previous(parser, PM_ERR_NUMBERED_PARAMETER_NOT_ALLOWED);
|
12756
|
-
} else if (outer_scope_using_numbered_parameters_p(parser)) {
|
12757
|
-
pm_parser_err_previous(parser, PM_ERR_NUMBERED_PARAMETER_OUTER_SCOPE);
|
12758
|
-
} else {
|
12759
|
-
// Indicate that this scope is using numbered params so that child
|
12760
|
-
// scopes cannot.
|
12761
|
-
uint8_t number = parser->previous.start[1];
|
12762
|
-
|
12763
|
-
// We subtract the value for the character '0' to get the actual
|
12764
|
-
// integer value of the number (only _1 through _9 are valid)
|
12765
|
-
uint8_t numbered_parameters = (uint8_t) (number - '0');
|
12766
|
-
if (numbered_parameters > parser->current_scope->numbered_parameters) {
|
12767
|
-
parser->current_scope->numbered_parameters = numbered_parameters;
|
12768
|
-
pm_parser_numbered_parameters_set(parser, numbered_parameters);
|
12769
|
-
}
|
12770
|
-
|
12771
|
-
// When you use a numbered parameter, it implies the existence
|
12772
|
-
// of all of the locals that exist before it. For example,
|
12773
|
-
// referencing _2 means that _1 must exist. Therefore here we
|
12774
|
-
// loop through all of the possibilities and add them into the
|
12775
|
-
// constant pool.
|
12776
|
-
uint8_t current = '1';
|
12777
|
-
uint8_t *value;
|
12778
|
-
|
12779
|
-
while (current < number) {
|
12780
|
-
value = malloc(2);
|
12781
|
-
value[0] = '_';
|
12782
|
-
value[1] = current++;
|
12783
|
-
pm_parser_local_add_owned(parser, value, 2);
|
12784
|
-
}
|
12785
|
-
|
12786
|
-
// Now we can add the actual token that is being used. For
|
12787
|
-
// this one we can add a shared version since it is directly
|
12788
|
-
// referenced in the source.
|
12789
|
-
pm_parser_local_add_token(parser, &parser->previous);
|
12790
|
-
return (pm_node_t *) pm_local_variable_read_node_create(parser, &parser->previous, 0);
|
12791
|
-
}
|
12792
|
-
}
|
12793
|
-
|
13274
|
+
pm_local_variable_read_node_t *node = parse_variable(parser);
|
13275
|
+
if (node != NULL) return (pm_node_t *) node;
|
12794
13276
|
flags |= PM_CALL_NODE_FLAGS_VARIABLE_CALL;
|
12795
13277
|
}
|
12796
13278
|
|
@@ -13076,43 +13558,77 @@ parse_pattern_keyword_rest(pm_parser_t *parser) {
|
|
13076
13558
|
return (pm_node_t *) pm_assoc_splat_node_create(parser, value, &operator);
|
13077
13559
|
}
|
13078
13560
|
|
13561
|
+
/**
|
13562
|
+
* Create an implicit node for the value of a hash pattern that has omitted the
|
13563
|
+
* value. This will use an implicit local variable target.
|
13564
|
+
*/
|
13565
|
+
static pm_node_t *
|
13566
|
+
parse_pattern_hash_implicit_value(pm_parser_t *parser, pm_symbol_node_t *key) {
|
13567
|
+
const pm_location_t *value_loc = &((pm_symbol_node_t *) key)->value_loc;
|
13568
|
+
pm_constant_id_t name = pm_parser_constant_id_location(parser, value_loc->start, value_loc->end);
|
13569
|
+
|
13570
|
+
int current_depth = pm_parser_local_depth_constant_id(parser, name);
|
13571
|
+
uint32_t depth;
|
13572
|
+
|
13573
|
+
if (current_depth == -1) {
|
13574
|
+
pm_parser_local_add_location(parser, value_loc->start, value_loc->end);
|
13575
|
+
depth = 0;
|
13576
|
+
} else {
|
13577
|
+
depth = (uint32_t) current_depth;
|
13578
|
+
}
|
13579
|
+
|
13580
|
+
pm_local_variable_target_node_t *target = pm_local_variable_target_node_create_values(parser, value_loc, name, depth);
|
13581
|
+
return (pm_node_t *) pm_implicit_node_create(parser, (pm_node_t *) target);
|
13582
|
+
}
|
13583
|
+
|
13079
13584
|
/**
|
13080
13585
|
* Parse a hash pattern.
|
13081
13586
|
*/
|
13082
13587
|
static pm_hash_pattern_node_t *
|
13083
|
-
parse_pattern_hash(pm_parser_t *parser, pm_node_t *
|
13588
|
+
parse_pattern_hash(pm_parser_t *parser, pm_node_t *first_node) {
|
13084
13589
|
pm_node_list_t assocs = { 0 };
|
13085
13590
|
pm_node_t *rest = NULL;
|
13086
13591
|
|
13087
|
-
switch (PM_NODE_TYPE(
|
13088
|
-
case
|
13089
|
-
|
13090
|
-
|
13091
|
-
|
13092
|
-
|
13592
|
+
switch (PM_NODE_TYPE(first_node)) {
|
13593
|
+
case PM_ASSOC_SPLAT_NODE:
|
13594
|
+
case PM_NO_KEYWORDS_PARAMETER_NODE:
|
13595
|
+
rest = first_node;
|
13596
|
+
break;
|
13597
|
+
case PM_SYMBOL_NODE: {
|
13598
|
+
if (pm_symbol_node_label_p(first_node)) {
|
13599
|
+
pm_node_t *value;
|
13600
|
+
|
13601
|
+
if (!match7(parser, PM_TOKEN_COMMA, PM_TOKEN_KEYWORD_THEN, PM_TOKEN_BRACE_RIGHT, PM_TOKEN_BRACKET_RIGHT, PM_TOKEN_PARENTHESIS_RIGHT, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON)) {
|
13602
|
+
// Here we have a value for the first assoc in the list, so
|
13603
|
+
// we will parse it now.
|
13604
|
+
value = parse_pattern(parser, false, PM_ERR_PATTERN_EXPRESSION_AFTER_KEY);
|
13605
|
+
} else {
|
13606
|
+
// Otherwise, we will create an implicit local variable
|
13607
|
+
// target for the value.
|
13608
|
+
value = parse_pattern_hash_implicit_value(parser, (pm_symbol_node_t *) first_node);
|
13609
|
+
}
|
13093
13610
|
|
13094
|
-
|
13095
|
-
assoc
|
13096
|
-
assoc->value = value;
|
13097
|
-
} else {
|
13098
|
-
pm_node_t *key = ((pm_assoc_node_t *) first_assoc)->key;
|
13611
|
+
pm_token_t operator = not_provided(parser);
|
13612
|
+
pm_node_t *assoc = (pm_node_t *) pm_assoc_node_create(parser, first_node, &operator, value);
|
13099
13613
|
|
13100
|
-
|
13101
|
-
|
13102
|
-
pm_parser_local_add_location(parser, value_loc->start, value_loc->end);
|
13103
|
-
}
|
13614
|
+
pm_node_list_append(&assocs, assoc);
|
13615
|
+
break;
|
13104
13616
|
}
|
13617
|
+
}
|
13618
|
+
/* fallthrough */
|
13619
|
+
default: {
|
13620
|
+
// If we get anything else, then this is an error. For this we'll
|
13621
|
+
// create a missing node for the value and create an assoc node for
|
13622
|
+
// the first node in the list.
|
13623
|
+
pm_parser_err_node(parser, first_node, PM_ERR_PATTERN_HASH_KEY_LABEL);
|
13624
|
+
|
13625
|
+
pm_token_t operator = not_provided(parser);
|
13626
|
+
pm_node_t *value = (pm_node_t *) pm_missing_node_create(parser, first_node->location.start, first_node->location.end);
|
13627
|
+
pm_node_t *assoc = (pm_node_t *) pm_assoc_node_create(parser, first_node, &operator, value);
|
13105
13628
|
|
13106
|
-
pm_node_list_append(&assocs,
|
13629
|
+
pm_node_list_append(&assocs, assoc);
|
13107
13630
|
break;
|
13108
13631
|
}
|
13109
|
-
case PM_ASSOC_SPLAT_NODE:
|
13110
|
-
case PM_NO_KEYWORDS_PARAMETER_NODE:
|
13111
|
-
rest = first_assoc;
|
13112
|
-
break;
|
13113
|
-
default:
|
13114
|
-
assert(false);
|
13115
|
-
break;
|
13116
13632
|
}
|
13117
13633
|
|
13118
13634
|
// If there are any other assocs, then we'll parse them now.
|
@@ -13141,6 +13657,7 @@ parse_pattern_hash(pm_parser_t *parser, pm_node_t *first_assoc) {
|
|
13141
13657
|
} else {
|
13142
13658
|
const pm_location_t *value_loc = &((pm_symbol_node_t *) key)->value_loc;
|
13143
13659
|
pm_parser_local_add_location(parser, value_loc->start, value_loc->end);
|
13660
|
+
value = parse_pattern_hash_implicit_value(parser, (pm_symbol_node_t *) key);
|
13144
13661
|
}
|
13145
13662
|
|
13146
13663
|
pm_token_t operator = not_provided(parser);
|
@@ -13246,45 +13763,29 @@ parse_pattern_primitive(pm_parser_t *parser, pm_diagnostic_id_t diag_id) {
|
|
13246
13763
|
// pattern node.
|
13247
13764
|
node = pm_hash_pattern_node_empty_create(parser, &opening, &parser->previous);
|
13248
13765
|
} else {
|
13249
|
-
pm_node_t *
|
13766
|
+
pm_node_t *first_node;
|
13250
13767
|
|
13251
13768
|
switch (parser->current.type) {
|
13252
|
-
case PM_TOKEN_LABEL:
|
13769
|
+
case PM_TOKEN_LABEL:
|
13253
13770
|
parser_lex(parser);
|
13254
|
-
|
13255
|
-
pm_symbol_node_t *key = pm_symbol_node_label_create(parser, &parser->previous);
|
13256
|
-
pm_token_t operator = not_provided(parser);
|
13257
|
-
|
13258
|
-
first_assoc = (pm_node_t *) pm_assoc_node_create(parser, (pm_node_t *) key, &operator, NULL);
|
13771
|
+
first_node = (pm_node_t *) pm_symbol_node_label_create(parser, &parser->previous);
|
13259
13772
|
break;
|
13260
|
-
}
|
13261
13773
|
case PM_TOKEN_USTAR_STAR:
|
13262
|
-
|
13774
|
+
first_node = parse_pattern_keyword_rest(parser);
|
13263
13775
|
break;
|
13264
|
-
case PM_TOKEN_STRING_BEGIN:
|
13265
|
-
|
13266
|
-
pm_token_t operator = not_provided(parser);
|
13267
|
-
|
13268
|
-
if (!pm_symbol_node_label_p(key)) {
|
13269
|
-
pm_parser_err_node(parser, key, PM_ERR_PATTERN_HASH_KEY_LABEL);
|
13270
|
-
}
|
13271
|
-
|
13272
|
-
first_assoc = (pm_node_t *) pm_assoc_node_create(parser, key, &operator, NULL);
|
13776
|
+
case PM_TOKEN_STRING_BEGIN:
|
13777
|
+
first_node = parse_expression(parser, PM_BINDING_POWER_MAX, false, PM_ERR_PATTERN_HASH_KEY);
|
13273
13778
|
break;
|
13274
|
-
}
|
13275
13779
|
default: {
|
13276
13780
|
parser_lex(parser);
|
13277
13781
|
pm_parser_err_previous(parser, PM_ERR_PATTERN_HASH_KEY);
|
13278
13782
|
|
13279
|
-
|
13280
|
-
pm_token_t operator = not_provided(parser);
|
13281
|
-
|
13282
|
-
first_assoc = (pm_node_t *) pm_assoc_node_create(parser, (pm_node_t *) key, &operator, NULL);
|
13783
|
+
first_node = (pm_node_t *) pm_missing_node_create(parser, parser->previous.start, parser->previous.end);
|
13283
13784
|
break;
|
13284
13785
|
}
|
13285
13786
|
}
|
13286
13787
|
|
13287
|
-
node = parse_pattern_hash(parser,
|
13788
|
+
node = parse_pattern_hash(parser, first_node);
|
13288
13789
|
|
13289
13790
|
accept1(parser, PM_TOKEN_NEWLINE);
|
13290
13791
|
expect1(parser, PM_TOKEN_BRACE_RIGHT, PM_ERR_PATTERN_TERM_BRACE);
|
@@ -13350,7 +13851,16 @@ parse_pattern_primitive(pm_parser_t *parser, pm_diagnostic_id_t diag_id) {
|
|
13350
13851
|
switch (parser->current.type) {
|
13351
13852
|
case PM_TOKEN_IDENTIFIER: {
|
13352
13853
|
parser_lex(parser);
|
13353
|
-
pm_node_t *variable = (pm_node_t *)
|
13854
|
+
pm_node_t *variable = (pm_node_t *) parse_variable(parser);
|
13855
|
+
if (variable == NULL) {
|
13856
|
+
if (parser->version != PM_OPTIONS_VERSION_CRUBY_3_3_0 && pm_token_is_it(parser->previous.start, parser->previous.end)) {
|
13857
|
+
pm_constant_id_t name_id = pm_parser_constant_id_constant(parser, "0it", 3);
|
13858
|
+
variable = (pm_node_t *) pm_local_variable_read_node_create_constant_id(parser, &parser->previous, name_id, 0);
|
13859
|
+
} else {
|
13860
|
+
PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->previous, PM_ERR_NO_LOCAL_VARIABLE, (int) (parser->previous.end - parser->previous.start), parser->previous.start);
|
13861
|
+
variable = (pm_node_t *) pm_local_variable_read_node_create(parser, &parser->previous, 0);
|
13862
|
+
}
|
13863
|
+
}
|
13354
13864
|
|
13355
13865
|
return (pm_node_t *) pm_pinned_variable_node_create(parser, &operator, variable);
|
13356
13866
|
}
|
@@ -13519,9 +14029,7 @@ parse_pattern(pm_parser_t *parser, bool top_pattern, pm_diagnostic_id_t diag_id)
|
|
13519
14029
|
case PM_TOKEN_LABEL: {
|
13520
14030
|
parser_lex(parser);
|
13521
14031
|
pm_node_t *key = (pm_node_t *) pm_symbol_node_label_create(parser, &parser->previous);
|
13522
|
-
|
13523
|
-
|
13524
|
-
return (pm_node_t *) parse_pattern_hash(parser, (pm_node_t *) pm_assoc_node_create(parser, key, &operator, NULL));
|
14032
|
+
return (pm_node_t *) parse_pattern_hash(parser, key);
|
13525
14033
|
}
|
13526
14034
|
case PM_TOKEN_USTAR_STAR: {
|
13527
14035
|
node = parse_pattern_keyword_rest(parser);
|
@@ -13544,8 +14052,7 @@ parse_pattern(pm_parser_t *parser, bool top_pattern, pm_diagnostic_id_t diag_id)
|
|
13544
14052
|
// If we got a dynamic label symbol, then we need to treat it like the
|
13545
14053
|
// beginning of a hash pattern.
|
13546
14054
|
if (pm_symbol_node_label_p(node)) {
|
13547
|
-
|
13548
|
-
return (pm_node_t *) parse_pattern_hash(parser, (pm_node_t *) pm_assoc_node_create(parser, node, &operator, NULL));
|
14055
|
+
return (pm_node_t *) parse_pattern_hash(parser, node);
|
13549
14056
|
}
|
13550
14057
|
|
13551
14058
|
if (top_pattern && match1(parser, PM_TOKEN_COMMA)) {
|
@@ -13558,7 +14065,7 @@ parse_pattern(pm_parser_t *parser, bool top_pattern, pm_diagnostic_id_t diag_id)
|
|
13558
14065
|
// Gather up all of the patterns into the list.
|
13559
14066
|
while (accept1(parser, PM_TOKEN_COMMA)) {
|
13560
14067
|
// Break early here in case we have a trailing comma.
|
13561
|
-
if (
|
14068
|
+
if (match6(parser, PM_TOKEN_KEYWORD_THEN, PM_TOKEN_BRACE_RIGHT, PM_TOKEN_BRACKET_RIGHT, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON, PM_TOKEN_EOF)) {
|
13562
14069
|
node = (pm_node_t *) pm_implicit_rest_node_create(parser, &parser->previous);
|
13563
14070
|
pm_node_list_append(&nodes, node);
|
13564
14071
|
break;
|
@@ -13644,7 +14151,7 @@ parse_strings(pm_parser_t *parser, pm_node_t *current) {
|
|
13644
14151
|
assert(parser->current.type == PM_TOKEN_STRING_BEGIN);
|
13645
14152
|
|
13646
14153
|
bool concating = false;
|
13647
|
-
bool state_is_arg_labeled =
|
14154
|
+
bool state_is_arg_labeled = lex_state_arg_labeled_p(parser);
|
13648
14155
|
|
13649
14156
|
while (match1(parser, PM_TOKEN_STRING_BEGIN)) {
|
13650
14157
|
pm_node_t *node = NULL;
|
@@ -13719,7 +14226,7 @@ parse_strings(pm_parser_t *parser, pm_node_t *current) {
|
|
13719
14226
|
expect1(parser, PM_TOKEN_STRING_END, PM_ERR_STRING_LITERAL_TERM);
|
13720
14227
|
node = (pm_node_t *) pm_interpolated_string_node_create(parser, &opening, &parts, &parser->previous);
|
13721
14228
|
} else if (accept1(parser, PM_TOKEN_LABEL_END) && !state_is_arg_labeled) {
|
13722
|
-
node = (pm_node_t *) pm_symbol_node_create_unescaped(parser, &opening, &content, &parser->previous, &unescaped);
|
14229
|
+
node = (pm_node_t *) pm_symbol_node_create_unescaped(parser, &opening, &content, &parser->previous, &unescaped, parse_symbol_encoding(parser, &unescaped));
|
13723
14230
|
} else if (match1(parser, PM_TOKEN_EOF)) {
|
13724
14231
|
pm_parser_err_token(parser, &opening, PM_ERR_STRING_LITERAL_TERM);
|
13725
14232
|
node = (pm_node_t *) pm_string_node_create_unescaped(parser, &opening, &content, &parser->current, &unescaped);
|
@@ -13741,7 +14248,7 @@ parse_strings(pm_parser_t *parser, pm_node_t *current) {
|
|
13741
14248
|
pm_node_flag_set(node, parse_unescaped_encoding(parser));
|
13742
14249
|
expect1(parser, PM_TOKEN_STRING_END, PM_ERR_STRING_LITERAL_TERM);
|
13743
14250
|
} else if (accept1(parser, PM_TOKEN_LABEL_END)) {
|
13744
|
-
node = (pm_node_t *) pm_symbol_node_create_unescaped(parser, &opening, &content, &parser->previous, &unescaped);
|
14251
|
+
node = (pm_node_t *) pm_symbol_node_create_unescaped(parser, &opening, &content, &parser->previous, &unescaped, parse_symbol_encoding(parser, &unescaped));
|
13745
14252
|
} else {
|
13746
14253
|
// If we get here, then we have interpolation so we'll need
|
13747
14254
|
// to create a string or symbol node with interpolation.
|
@@ -13834,7 +14341,7 @@ parse_strings(pm_parser_t *parser, pm_node_t *current) {
|
|
13834
14341
|
* Parse an expression that begins with the previous node that we just lexed.
|
13835
14342
|
*/
|
13836
14343
|
static inline pm_node_t *
|
13837
|
-
parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, bool accepts_command_call) {
|
14344
|
+
parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, bool accepts_command_call, pm_diagnostic_id_t diag_id) {
|
13838
14345
|
switch (parser->current.type) {
|
13839
14346
|
case PM_TOKEN_BRACKET_LEFT_ARRAY: {
|
13840
14347
|
parser_lex(parser);
|
@@ -13866,9 +14373,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
|
|
13866
14373
|
pm_node_t *expression = NULL;
|
13867
14374
|
|
13868
14375
|
if (match3(parser, PM_TOKEN_BRACKET_RIGHT, PM_TOKEN_COMMA, PM_TOKEN_EOF)) {
|
13869
|
-
|
13870
|
-
pm_parser_err_token(parser, &operator, PM_ERR_ARGUMENT_NO_FORWARDING_STAR);
|
13871
|
-
}
|
14376
|
+
pm_parser_scope_forwarding_positionals_check(parser, &operator);
|
13872
14377
|
} else {
|
13873
14378
|
expression = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, false, PM_ERR_ARRAY_EXPRESSION_AFTER_STAR);
|
13874
14379
|
}
|
@@ -14113,7 +14618,8 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
|
|
14113
14618
|
if (
|
14114
14619
|
match1(parser, PM_TOKEN_PARENTHESIS_LEFT) ||
|
14115
14620
|
(accepts_command_call && (token_begins_expression_p(parser->current.type) || match3(parser, PM_TOKEN_UAMPERSAND, PM_TOKEN_USTAR, PM_TOKEN_USTAR_STAR))) ||
|
14116
|
-
(pm_accepts_block_stack_p(parser) &&
|
14621
|
+
(pm_accepts_block_stack_p(parser) && match1(parser, PM_TOKEN_KEYWORD_DO)) ||
|
14622
|
+
match1(parser, PM_TOKEN_BRACE_LEFT)
|
14117
14623
|
) {
|
14118
14624
|
pm_arguments_t arguments = { 0 };
|
14119
14625
|
parse_arguments_list(parser, &arguments, true, accepts_command_call);
|
@@ -14237,7 +14743,8 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
|
|
14237
14743
|
// a block, so we need to check for that here.
|
14238
14744
|
if (
|
14239
14745
|
(accepts_command_call && (token_begins_expression_p(parser->current.type) || match3(parser, PM_TOKEN_UAMPERSAND, PM_TOKEN_USTAR, PM_TOKEN_USTAR_STAR))) ||
|
14240
|
-
(pm_accepts_block_stack_p(parser) &&
|
14746
|
+
(pm_accepts_block_stack_p(parser) && match1(parser, PM_TOKEN_KEYWORD_DO)) ||
|
14747
|
+
match1(parser, PM_TOKEN_BRACE_LEFT)
|
14241
14748
|
) {
|
14242
14749
|
pm_arguments_t arguments = { 0 };
|
14243
14750
|
parse_arguments_list(parser, &arguments, true, accepts_command_call);
|
@@ -14250,6 +14757,31 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
|
|
14250
14757
|
|
14251
14758
|
if ((binding_power == PM_BINDING_POWER_STATEMENT) && match1(parser, PM_TOKEN_COMMA)) {
|
14252
14759
|
node = parse_targets_validate(parser, node, PM_BINDING_POWER_INDEX);
|
14760
|
+
} else {
|
14761
|
+
// Check if `it` is not going to be assigned.
|
14762
|
+
switch (parser->current.type) {
|
14763
|
+
case PM_TOKEN_AMPERSAND_AMPERSAND_EQUAL:
|
14764
|
+
case PM_TOKEN_AMPERSAND_EQUAL:
|
14765
|
+
case PM_TOKEN_CARET_EQUAL:
|
14766
|
+
case PM_TOKEN_EQUAL:
|
14767
|
+
case PM_TOKEN_GREATER_GREATER_EQUAL:
|
14768
|
+
case PM_TOKEN_LESS_LESS_EQUAL:
|
14769
|
+
case PM_TOKEN_MINUS_EQUAL:
|
14770
|
+
case PM_TOKEN_PARENTHESIS_RIGHT:
|
14771
|
+
case PM_TOKEN_PERCENT_EQUAL:
|
14772
|
+
case PM_TOKEN_PIPE_EQUAL:
|
14773
|
+
case PM_TOKEN_PIPE_PIPE_EQUAL:
|
14774
|
+
case PM_TOKEN_PLUS_EQUAL:
|
14775
|
+
case PM_TOKEN_SLASH_EQUAL:
|
14776
|
+
case PM_TOKEN_STAR_EQUAL:
|
14777
|
+
case PM_TOKEN_STAR_STAR_EQUAL:
|
14778
|
+
break;
|
14779
|
+
default:
|
14780
|
+
// Once we know it's neither a method call nor an
|
14781
|
+
// assignment, we can finally create `it` default
|
14782
|
+
// parameter.
|
14783
|
+
node = pm_node_check_it(parser, node);
|
14784
|
+
}
|
14253
14785
|
}
|
14254
14786
|
|
14255
14787
|
return node;
|
@@ -14286,6 +14818,9 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
|
|
14286
14818
|
// If we get here, then we tried to find something in the
|
14287
14819
|
// heredoc but couldn't actually parse anything, so we'll just
|
14288
14820
|
// return a missing node.
|
14821
|
+
//
|
14822
|
+
// parse_string_part handles its own errors, so there is no need
|
14823
|
+
// for us to add one here.
|
14289
14824
|
node = (pm_node_t *) pm_missing_node_create(parser, parser->previous.start, parser->previous.end);
|
14290
14825
|
} else if (PM_NODE_TYPE_P(part, PM_STRING_NODE) && match2(parser, PM_TOKEN_HEREDOC_END, PM_TOKEN_EOF)) {
|
14291
14826
|
// If we get here, then the part that we parsed was plain string
|
@@ -14549,11 +15084,11 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
|
|
14549
15084
|
// for guard clauses in the form of `if` or `unless` statements.
|
14550
15085
|
if (accept1(parser, PM_TOKEN_KEYWORD_IF_MODIFIER)) {
|
14551
15086
|
pm_token_t keyword = parser->previous;
|
14552
|
-
pm_node_t *predicate = parse_value_expression(parser,
|
15087
|
+
pm_node_t *predicate = parse_value_expression(parser, PM_BINDING_POWER_COMPOSITION, true, PM_ERR_CONDITIONAL_IF_PREDICATE);
|
14553
15088
|
pattern = (pm_node_t *) pm_if_node_modifier_create(parser, pattern, &keyword, predicate);
|
14554
15089
|
} else if (accept1(parser, PM_TOKEN_KEYWORD_UNLESS_MODIFIER)) {
|
14555
15090
|
pm_token_t keyword = parser->previous;
|
14556
|
-
pm_node_t *predicate = parse_value_expression(parser,
|
15091
|
+
pm_node_t *predicate = parse_value_expression(parser, PM_BINDING_POWER_COMPOSITION, true, PM_ERR_CONDITIONAL_UNLESS_PREDICATE);
|
14557
15092
|
pattern = (pm_node_t *) pm_unless_node_modifier_create(parser, pattern, &keyword, predicate);
|
14558
15093
|
}
|
14559
15094
|
|
@@ -14742,8 +15277,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
|
|
14742
15277
|
pm_token_t operator = parser->previous;
|
14743
15278
|
pm_node_t *expression = parse_value_expression(parser, PM_BINDING_POWER_NOT, true, PM_ERR_EXPECT_EXPRESSION_AFTER_LESS_LESS);
|
14744
15279
|
|
14745
|
-
pm_constant_id_t
|
14746
|
-
parser->current_param_name = 0;
|
15280
|
+
pm_constant_id_t saved_param_name = pm_parser_current_param_name_unset(parser);
|
14747
15281
|
pm_parser_scope_push(parser, true);
|
14748
15282
|
accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
|
14749
15283
|
|
@@ -14760,11 +15294,12 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
|
|
14760
15294
|
}
|
14761
15295
|
|
14762
15296
|
expect1(parser, PM_TOKEN_KEYWORD_END, PM_ERR_CLASS_TERM);
|
14763
|
-
|
14764
15297
|
pm_constant_id_list_t locals = parser->current_scope->locals;
|
15298
|
+
|
14765
15299
|
pm_parser_scope_pop(parser);
|
14766
|
-
parser->current_param_name = old_param_name;
|
14767
15300
|
pm_do_loop_stack_pop(parser);
|
15301
|
+
pm_parser_current_param_name_restore(parser, saved_param_name);
|
15302
|
+
|
14768
15303
|
return (pm_node_t *) pm_singleton_class_node_create(parser, &locals, &class_keyword, &operator, expression, statements, &parser->previous);
|
14769
15304
|
}
|
14770
15305
|
|
@@ -14790,9 +15325,9 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
|
|
14790
15325
|
superclass = NULL;
|
14791
15326
|
}
|
14792
15327
|
|
14793
|
-
pm_constant_id_t
|
14794
|
-
parser->current_param_name = 0;
|
15328
|
+
pm_constant_id_t saved_param_name = pm_parser_current_param_name_unset(parser);
|
14795
15329
|
pm_parser_scope_push(parser, true);
|
15330
|
+
|
14796
15331
|
if (inheritance_operator.type != PM_TOKEN_NOT_PROVIDED) {
|
14797
15332
|
expect2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON, PM_ERR_CLASS_UNEXPECTED_END);
|
14798
15333
|
} else {
|
@@ -14818,9 +15353,10 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
|
|
14818
15353
|
}
|
14819
15354
|
|
14820
15355
|
pm_constant_id_list_t locals = parser->current_scope->locals;
|
15356
|
+
|
14821
15357
|
pm_parser_scope_pop(parser);
|
14822
|
-
parser->current_param_name = old_param_name;
|
14823
15358
|
pm_do_loop_stack_pop(parser);
|
15359
|
+
pm_parser_current_param_name_restore(parser, saved_param_name);
|
14824
15360
|
|
14825
15361
|
if (!PM_NODE_TYPE_P(constant_path, PM_CONSTANT_PATH_NODE) && !(PM_NODE_TYPE_P(constant_path, PM_CONSTANT_READ_NODE))) {
|
14826
15362
|
pm_parser_err_node(parser, constant_path, PM_ERR_CLASS_NAME);
|
@@ -14835,18 +15371,21 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
|
|
14835
15371
|
pm_token_t operator = not_provided(parser);
|
14836
15372
|
pm_token_t name = (pm_token_t) { .type = PM_TOKEN_MISSING, .start = def_keyword.end, .end = def_keyword.end };
|
14837
15373
|
|
14838
|
-
// This context is necessary for lexing `...` in a bare params
|
14839
|
-
// It must be pushed before lexing the first param, so it
|
15374
|
+
// This context is necessary for lexing `...` in a bare params
|
15375
|
+
// correctly. It must be pushed before lexing the first param, so it
|
15376
|
+
// is here.
|
14840
15377
|
context_push(parser, PM_CONTEXT_DEF_PARAMS);
|
15378
|
+
pm_constant_id_t saved_param_name;
|
15379
|
+
|
14841
15380
|
parser_lex(parser);
|
14842
|
-
pm_constant_id_t old_param_name = parser->current_param_name;
|
14843
15381
|
|
14844
15382
|
switch (parser->current.type) {
|
14845
15383
|
case PM_CASE_OPERATOR:
|
15384
|
+
saved_param_name = pm_parser_current_param_name_unset(parser);
|
14846
15385
|
pm_parser_scope_push(parser, true);
|
14847
|
-
parser->current_param_name = 0;
|
14848
15386
|
lex_state_set(parser, PM_LEX_STATE_ENDFN);
|
14849
15387
|
parser_lex(parser);
|
15388
|
+
|
14850
15389
|
name = parser->previous;
|
14851
15390
|
break;
|
14852
15391
|
case PM_TOKEN_IDENTIFIER: {
|
@@ -14854,18 +15393,20 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
|
|
14854
15393
|
|
14855
15394
|
if (match2(parser, PM_TOKEN_DOT, PM_TOKEN_COLON_COLON)) {
|
14856
15395
|
receiver = parse_variable_call(parser);
|
15396
|
+
receiver = pm_node_check_it(parser, receiver);
|
14857
15397
|
|
15398
|
+
saved_param_name = pm_parser_current_param_name_unset(parser);
|
14858
15399
|
pm_parser_scope_push(parser, true);
|
14859
|
-
parser->current_param_name = 0;
|
14860
15400
|
lex_state_set(parser, PM_LEX_STATE_FNAME);
|
14861
15401
|
parser_lex(parser);
|
14862
15402
|
|
14863
15403
|
operator = parser->previous;
|
14864
15404
|
name = parse_method_definition_name(parser);
|
14865
15405
|
} else {
|
15406
|
+
saved_param_name = pm_parser_current_param_name_unset(parser);
|
14866
15407
|
pm_refute_numbered_parameter(parser, parser->previous.start, parser->previous.end);
|
14867
15408
|
pm_parser_scope_push(parser, true);
|
14868
|
-
|
15409
|
+
|
14869
15410
|
name = parser->previous;
|
14870
15411
|
}
|
14871
15412
|
|
@@ -14882,9 +15423,10 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
|
|
14882
15423
|
case PM_TOKEN_KEYWORD___FILE__:
|
14883
15424
|
case PM_TOKEN_KEYWORD___LINE__:
|
14884
15425
|
case PM_TOKEN_KEYWORD___ENCODING__: {
|
15426
|
+
saved_param_name = pm_parser_current_param_name_unset(parser);
|
14885
15427
|
pm_parser_scope_push(parser, true);
|
14886
|
-
parser->current_param_name = 0;
|
14887
15428
|
parser_lex(parser);
|
15429
|
+
|
14888
15430
|
pm_token_t identifier = parser->previous;
|
14889
15431
|
|
14890
15432
|
if (match2(parser, PM_TOKEN_DOT, PM_TOKEN_COLON_COLON)) {
|
@@ -14946,6 +15488,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
|
|
14946
15488
|
pm_token_t lparen = parser->previous;
|
14947
15489
|
pm_node_t *expression = parse_value_expression(parser, PM_BINDING_POWER_STATEMENT, true, PM_ERR_DEF_RECEIVER);
|
14948
15490
|
|
15491
|
+
accept1(parser, PM_TOKEN_NEWLINE);
|
14949
15492
|
expect1(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_ERR_EXPECT_RPAREN);
|
14950
15493
|
pm_token_t rparen = parser->previous;
|
14951
15494
|
|
@@ -14955,8 +15498,8 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
|
|
14955
15498
|
operator = parser->previous;
|
14956
15499
|
receiver = (pm_node_t *) pm_parentheses_node_create(parser, &lparen, expression, &rparen);
|
14957
15500
|
|
15501
|
+
saved_param_name = pm_parser_current_param_name_unset(parser);
|
14958
15502
|
pm_parser_scope_push(parser, true);
|
14959
|
-
parser->current_param_name = 0;
|
14960
15503
|
|
14961
15504
|
// To push `PM_CONTEXT_DEF_PARAMS` again is for the same reason as described the above.
|
14962
15505
|
context_push(parser, PM_CONTEXT_DEF_PARAMS);
|
@@ -14964,8 +15507,9 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
|
|
14964
15507
|
break;
|
14965
15508
|
}
|
14966
15509
|
default:
|
15510
|
+
saved_param_name = pm_parser_current_param_name_unset(parser);
|
14967
15511
|
pm_parser_scope_push(parser, true);
|
14968
|
-
|
15512
|
+
|
14969
15513
|
name = parse_method_definition_name(parser);
|
14970
15514
|
break;
|
14971
15515
|
}
|
@@ -15018,8 +15562,6 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
|
|
15018
15562
|
}
|
15019
15563
|
}
|
15020
15564
|
|
15021
|
-
uint32_t locals_body_index = (uint32_t) parser->current_scope->locals.size;
|
15022
|
-
|
15023
15565
|
context_pop(parser);
|
15024
15566
|
pm_node_t *statements = NULL;
|
15025
15567
|
pm_token_t equal;
|
@@ -15080,8 +15622,16 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
|
|
15080
15622
|
}
|
15081
15623
|
|
15082
15624
|
pm_constant_id_list_t locals = parser->current_scope->locals;
|
15083
|
-
|
15625
|
+
|
15084
15626
|
pm_parser_scope_pop(parser);
|
15627
|
+
pm_parser_current_param_name_restore(parser, saved_param_name);
|
15628
|
+
|
15629
|
+
/**
|
15630
|
+
* If the final character is @. As is the case when defining
|
15631
|
+
* methods to override the unary operators, we should ignore
|
15632
|
+
* the @ in the same way we do for symbols.
|
15633
|
+
*/
|
15634
|
+
name.end = parse_operator_symbol_name(&name);
|
15085
15635
|
|
15086
15636
|
return (pm_node_t *) pm_def_node_create(
|
15087
15637
|
parser,
|
@@ -15090,7 +15640,6 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
|
|
15090
15640
|
params,
|
15091
15641
|
statements,
|
15092
15642
|
&locals,
|
15093
|
-
locals_body_index,
|
15094
15643
|
&def_keyword,
|
15095
15644
|
&operator,
|
15096
15645
|
&lparen,
|
@@ -15309,9 +15858,9 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
|
|
15309
15858
|
pm_parser_err_token(parser, &name, PM_ERR_MODULE_NAME);
|
15310
15859
|
}
|
15311
15860
|
|
15312
|
-
pm_constant_id_t
|
15313
|
-
parser->current_param_name = 0;
|
15861
|
+
pm_constant_id_t saved_param_name = pm_parser_current_param_name_unset(parser);
|
15314
15862
|
pm_parser_scope_push(parser, true);
|
15863
|
+
|
15315
15864
|
accept2(parser, PM_TOKEN_SEMICOLON, PM_TOKEN_NEWLINE);
|
15316
15865
|
pm_node_t *statements = NULL;
|
15317
15866
|
|
@@ -15328,7 +15877,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
|
|
15328
15877
|
|
15329
15878
|
pm_constant_id_list_t locals = parser->current_scope->locals;
|
15330
15879
|
pm_parser_scope_pop(parser);
|
15331
|
-
parser
|
15880
|
+
pm_parser_current_param_name_restore(parser, saved_param_name);
|
15332
15881
|
|
15333
15882
|
expect1(parser, PM_TOKEN_KEYWORD_END, PM_ERR_MODULE_TERM);
|
15334
15883
|
|
@@ -15914,6 +16463,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
|
|
15914
16463
|
// context of a multiple assignment. We enforce that here. We'll
|
15915
16464
|
// still lex past it though and create a missing node place.
|
15916
16465
|
if (binding_power != PM_BINDING_POWER_STATEMENT) {
|
16466
|
+
pm_parser_err_previous(parser, diag_id);
|
15917
16467
|
return (pm_node_t *) pm_missing_node_create(parser, parser->previous.start, parser->previous.end);
|
15918
16468
|
}
|
15919
16469
|
|
@@ -15995,7 +16545,9 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
|
|
15995
16545
|
parser_lex(parser);
|
15996
16546
|
|
15997
16547
|
pm_token_t operator = parser->previous;
|
16548
|
+
pm_constant_id_t saved_param_name = pm_parser_current_param_name_unset(parser);
|
15998
16549
|
pm_parser_scope_push(parser, false);
|
16550
|
+
|
15999
16551
|
pm_block_parameters_node_t *block_parameters;
|
16000
16552
|
|
16001
16553
|
switch (parser->current.type) {
|
@@ -16030,12 +16582,6 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
|
|
16030
16582
|
}
|
16031
16583
|
}
|
16032
16584
|
|
16033
|
-
uint32_t locals_body_index = 0;
|
16034
|
-
|
16035
|
-
if (block_parameters) {
|
16036
|
-
locals_body_index = (uint32_t) parser->current_scope->locals.size;
|
16037
|
-
}
|
16038
|
-
|
16039
16585
|
pm_token_t opening;
|
16040
16586
|
pm_node_t *body = NULL;
|
16041
16587
|
parser->lambda_enclosure_nesting = previous_lambda_enclosure_nesting;
|
@@ -16070,13 +16616,15 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
|
|
16070
16616
|
|
16071
16617
|
if (parameters == NULL && (maximum > 0)) {
|
16072
16618
|
parameters = (pm_node_t *) pm_numbered_parameters_node_create(parser, &(pm_location_t) { .start = operator.start, .end = parser->previous.end }, maximum);
|
16073
|
-
locals_body_index = maximum;
|
16074
16619
|
}
|
16075
16620
|
|
16076
16621
|
pm_constant_id_list_t locals = parser->current_scope->locals;
|
16622
|
+
|
16077
16623
|
pm_parser_scope_pop(parser);
|
16078
16624
|
pm_accepts_block_stack_pop(parser);
|
16079
|
-
|
16625
|
+
pm_parser_current_param_name_restore(parser, saved_param_name);
|
16626
|
+
|
16627
|
+
return (pm_node_t *) pm_lambda_node_create(parser, &locals, &operator, &opening, &parser->previous, parameters, body);
|
16080
16628
|
}
|
16081
16629
|
case PM_TOKEN_UPLUS: {
|
16082
16630
|
parser_lex(parser);
|
@@ -16095,12 +16643,34 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
|
|
16095
16643
|
|
16096
16644
|
return parse_symbol(parser, &lex_mode, PM_LEX_STATE_END);
|
16097
16645
|
}
|
16098
|
-
default:
|
16099
|
-
|
16646
|
+
default: {
|
16647
|
+
pm_context_t recoverable = context_recoverable(parser, &parser->current);
|
16648
|
+
|
16649
|
+
if (recoverable != PM_CONTEXT_NONE) {
|
16100
16650
|
parser->recovering = true;
|
16651
|
+
|
16652
|
+
// If the given error is not the generic one, then we'll add it
|
16653
|
+
// here because it will provide more context in addition to the
|
16654
|
+
// recoverable error that we will also add.
|
16655
|
+
if (diag_id != PM_ERR_CANNOT_PARSE_EXPRESSION) {
|
16656
|
+
pm_parser_err_previous(parser, diag_id);
|
16657
|
+
}
|
16658
|
+
|
16659
|
+
// If we get here, then we are assuming this token is closing a
|
16660
|
+
// parent context, so we'll indicate that to the user so that
|
16661
|
+
// they know how we behaved.
|
16662
|
+
PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_UNEXPECTED_TOKEN_CLOSE_CONTEXT, pm_token_type_human(parser->current.type), context_human(recoverable));
|
16663
|
+
} else if (diag_id == PM_ERR_CANNOT_PARSE_EXPRESSION) {
|
16664
|
+
// We're going to make a special case here, because "cannot
|
16665
|
+
// parse expression" is pretty generic, and we know here that we
|
16666
|
+
// have an unexpected token.
|
16667
|
+
PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_UNEXPECTED_TOKEN_IGNORE, pm_token_type_human(parser->current.type));
|
16668
|
+
} else {
|
16669
|
+
pm_parser_err_previous(parser, diag_id);
|
16101
16670
|
}
|
16102
16671
|
|
16103
16672
|
return (pm_node_t *) pm_missing_node_create(parser, parser->previous.start, parser->previous.end);
|
16673
|
+
}
|
16104
16674
|
}
|
16105
16675
|
}
|
16106
16676
|
|
@@ -16412,7 +16982,7 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
|
|
16412
16982
|
}
|
16413
16983
|
|
16414
16984
|
// If this node cannot be writable, then we have an error.
|
16415
|
-
if (pm_call_node_writable_p(cast)) {
|
16985
|
+
if (pm_call_node_writable_p(parser, cast)) {
|
16416
16986
|
parse_write_name(parser, &cast->name);
|
16417
16987
|
} else {
|
16418
16988
|
pm_parser_err_node(parser, node, PM_ERR_WRITE_TARGET_UNEXPECTED);
|
@@ -16523,7 +17093,7 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
|
|
16523
17093
|
}
|
16524
17094
|
|
16525
17095
|
// If this node cannot be writable, then we have an error.
|
16526
|
-
if (pm_call_node_writable_p(cast)) {
|
17096
|
+
if (pm_call_node_writable_p(parser, cast)) {
|
16527
17097
|
parse_write_name(parser, &cast->name);
|
16528
17098
|
} else {
|
16529
17099
|
pm_parser_err_node(parser, node, PM_ERR_WRITE_TARGET_UNEXPECTED);
|
@@ -16644,7 +17214,7 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
|
|
16644
17214
|
}
|
16645
17215
|
|
16646
17216
|
// If this node cannot be writable, then we have an error.
|
16647
|
-
if (pm_call_node_writable_p(cast)) {
|
17217
|
+
if (pm_call_node_writable_p(parser, cast)) {
|
16648
17218
|
parse_write_name(parser, &cast->name);
|
16649
17219
|
} else {
|
16650
17220
|
pm_parser_err_node(parser, node, PM_ERR_WRITE_TARGET_UNEXPECTED);
|
@@ -17063,15 +17633,12 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
|
|
17063
17633
|
*/
|
17064
17634
|
static pm_node_t *
|
17065
17635
|
parse_expression(pm_parser_t *parser, pm_binding_power_t binding_power, bool accepts_command_call, pm_diagnostic_id_t diag_id) {
|
17066
|
-
|
17067
|
-
pm_node_t *node = parse_expression_prefix(parser, binding_power, accepts_command_call);
|
17636
|
+
pm_node_t *node = parse_expression_prefix(parser, binding_power, accepts_command_call, diag_id);
|
17068
17637
|
|
17069
17638
|
switch (PM_NODE_TYPE(node)) {
|
17070
17639
|
case PM_MISSING_NODE:
|
17071
17640
|
// If we found a syntax error, then the type of node returned by
|
17072
|
-
// parse_expression_prefix is going to be a missing node.
|
17073
|
-
// case we need to add the error message to the parser's error list.
|
17074
|
-
pm_parser_err(parser, recovery.end, recovery.end, diag_id);
|
17641
|
+
// parse_expression_prefix is going to be a missing node.
|
17075
17642
|
return node;
|
17076
17643
|
case PM_PRE_EXECUTION_NODE:
|
17077
17644
|
case PM_POST_EXECUTION_NODE:
|
@@ -17080,7 +17647,7 @@ parse_expression(pm_parser_t *parser, pm_binding_power_t binding_power, bool acc
|
|
17080
17647
|
case PM_UNDEF_NODE:
|
17081
17648
|
// These expressions are statements, and cannot be followed by
|
17082
17649
|
// operators (except modifiers).
|
17083
|
-
if (pm_binding_powers[parser->current.type].left >
|
17650
|
+
if (pm_binding_powers[parser->current.type].left > PM_BINDING_POWER_MODIFIER) {
|
17084
17651
|
return node;
|
17085
17652
|
}
|
17086
17653
|
break;
|
@@ -17175,9 +17742,14 @@ parse_expression(pm_parser_t *parser, pm_binding_power_t binding_power, bool acc
|
|
17175
17742
|
|
17176
17743
|
static pm_node_t *
|
17177
17744
|
parse_program(pm_parser_t *parser) {
|
17178
|
-
|
17179
|
-
|
17745
|
+
// If the current scope is NULL, then we want to push a new top level scope.
|
17746
|
+
// The current scope could exist in the event that we are parsing an eval
|
17747
|
+
// and the user has passed into scopes that already exist.
|
17748
|
+
if (parser->current_scope == NULL) {
|
17749
|
+
pm_parser_scope_push(parser, true);
|
17750
|
+
}
|
17180
17751
|
|
17752
|
+
parser_lex(parser);
|
17181
17753
|
pm_statements_node_t *statements = parse_statements(parser, PM_CONTEXT_MAIN);
|
17182
17754
|
if (!statements) {
|
17183
17755
|
statements = pm_statements_node_create(parser);
|
@@ -17234,7 +17806,7 @@ pm_parser_init(pm_parser_t *parser, const uint8_t *source, size_t size, const pm
|
|
17234
17806
|
.encoding_changed_callback = NULL,
|
17235
17807
|
.encoding_comment_start = source,
|
17236
17808
|
.lex_callback = NULL,
|
17237
|
-
.
|
17809
|
+
.filepath = { 0 },
|
17238
17810
|
.constant_pool = { 0 },
|
17239
17811
|
.newline_list = { 0 },
|
17240
17812
|
.integer_base = 0,
|
@@ -17248,8 +17820,7 @@ pm_parser_init(pm_parser_t *parser, const uint8_t *source, size_t size, const pm
|
|
17248
17820
|
.in_keyword_arg = false,
|
17249
17821
|
.current_param_name = 0,
|
17250
17822
|
.semantic_token_seen = false,
|
17251
|
-
.frozen_string_literal = false
|
17252
|
-
.suppress_warnings = false
|
17823
|
+
.frozen_string_literal = false
|
17253
17824
|
};
|
17254
17825
|
|
17255
17826
|
// Initialize the constant pool. We're going to completely guess as to the
|
@@ -17278,7 +17849,7 @@ pm_parser_init(pm_parser_t *parser, const uint8_t *source, size_t size, const pm
|
|
17278
17849
|
// If options were provided to this parse, establish them here.
|
17279
17850
|
if (options != NULL) {
|
17280
17851
|
// filepath option
|
17281
|
-
parser->
|
17852
|
+
parser->filepath = options->filepath;
|
17282
17853
|
|
17283
17854
|
// line option
|
17284
17855
|
parser->start_line = options->line;
|
@@ -17295,10 +17866,8 @@ pm_parser_init(pm_parser_t *parser, const uint8_t *source, size_t size, const pm
|
|
17295
17866
|
parser->frozen_string_literal = true;
|
17296
17867
|
}
|
17297
17868
|
|
17298
|
-
//
|
17299
|
-
|
17300
|
-
parser->suppress_warnings = true;
|
17301
|
-
}
|
17869
|
+
// version option
|
17870
|
+
parser->version = options->version;
|
17302
17871
|
|
17303
17872
|
// scopes option
|
17304
17873
|
for (size_t scope_index = 0; scope_index < options->scopes_count; scope_index++) {
|
@@ -17382,7 +17951,7 @@ pm_magic_comment_list_free(pm_list_t *list) {
|
|
17382
17951
|
*/
|
17383
17952
|
PRISM_EXPORTED_FUNCTION void
|
17384
17953
|
pm_parser_free(pm_parser_t *parser) {
|
17385
|
-
pm_string_free(&parser->
|
17954
|
+
pm_string_free(&parser->filepath);
|
17386
17955
|
pm_diagnostic_list_free(&parser->error_list);
|
17387
17956
|
pm_diagnostic_list_free(&parser->warning_list);
|
17388
17957
|
pm_comment_list_free(&parser->comment_list);
|
@@ -17484,3 +18053,299 @@ pm_serialize_parse_comments(pm_buffer_t *buffer, const uint8_t *source, size_t s
|
|
17484
18053
|
#undef PM_LOCATION_NODE_VALUE
|
17485
18054
|
#undef PM_LOCATION_NULL_VALUE
|
17486
18055
|
#undef PM_LOCATION_TOKEN_VALUE
|
18056
|
+
|
18057
|
+
/** An error that is going to be formatted into the output. */
|
18058
|
+
typedef struct {
|
18059
|
+
/** A pointer to the diagnostic that was generated during parsing. */
|
18060
|
+
pm_diagnostic_t *error;
|
18061
|
+
|
18062
|
+
/** The start line of the diagnostic message. */
|
18063
|
+
uint32_t line;
|
18064
|
+
|
18065
|
+
/** The column start of the diagnostic message. */
|
18066
|
+
uint32_t column_start;
|
18067
|
+
|
18068
|
+
/** The column end of the diagnostic message. */
|
18069
|
+
uint32_t column_end;
|
18070
|
+
} pm_error_t;
|
18071
|
+
|
18072
|
+
/** The format that will be used to format the errors into the output. */
|
18073
|
+
typedef struct {
|
18074
|
+
/** The prefix that will be used for line numbers. */
|
18075
|
+
const char *number_prefix;
|
18076
|
+
|
18077
|
+
/** The prefix that will be used for blank lines. */
|
18078
|
+
const char *blank_prefix;
|
18079
|
+
|
18080
|
+
/** The divider that will be used between sections of source code. */
|
18081
|
+
const char *divider;
|
18082
|
+
|
18083
|
+
/** The length of the blank prefix. */
|
18084
|
+
size_t blank_prefix_length;
|
18085
|
+
|
18086
|
+
/** The length of the divider. */
|
18087
|
+
size_t divider_length;
|
18088
|
+
} pm_error_format_t;
|
18089
|
+
|
18090
|
+
#define PM_COLOR_GRAY "\033[38;5;102m"
|
18091
|
+
#define PM_COLOR_RED "\033[1;31m"
|
18092
|
+
#define PM_COLOR_RESET "\033[0m"
|
18093
|
+
|
18094
|
+
static inline pm_error_t *
|
18095
|
+
pm_parser_errors_format_sort(const pm_list_t *error_list, const pm_newline_list_t *newline_list) {
|
18096
|
+
pm_error_t *errors = calloc(error_list->size, sizeof(pm_error_t));
|
18097
|
+
|
18098
|
+
for (pm_diagnostic_t *error = (pm_diagnostic_t *) error_list->head; error != NULL; error = (pm_diagnostic_t *) error->node.next) {
|
18099
|
+
pm_line_column_t start = pm_newline_list_line_column(newline_list, error->location.start);
|
18100
|
+
pm_line_column_t end = pm_newline_list_line_column(newline_list, error->location.end);
|
18101
|
+
|
18102
|
+
// We're going to insert this error into the array in sorted order. We
|
18103
|
+
// do this by finding the first error that has a line number greater
|
18104
|
+
// than the current error and then inserting the current error before
|
18105
|
+
// that one.
|
18106
|
+
size_t index = 0;
|
18107
|
+
while (
|
18108
|
+
(index < error_list->size) &&
|
18109
|
+
(errors[index].error != NULL) &&
|
18110
|
+
(
|
18111
|
+
(errors[index].line < ((uint32_t) start.line)) ||
|
18112
|
+
(errors[index].line == ((uint32_t) start.line) && errors[index].column_start < ((uint32_t) start.column))
|
18113
|
+
)
|
18114
|
+
) index++;
|
18115
|
+
|
18116
|
+
// Now we're going to shift all of the errors after this one down one
|
18117
|
+
// index to make room for the new error.
|
18118
|
+
if (index + 1 < error_list->size) {
|
18119
|
+
memmove(&errors[index + 1], &errors[index], sizeof(pm_error_t) * (error_list->size - index - 1));
|
18120
|
+
}
|
18121
|
+
|
18122
|
+
// Finally, we'll insert the error into the array.
|
18123
|
+
uint32_t column_end;
|
18124
|
+
if (start.line == end.line) {
|
18125
|
+
column_end = (uint32_t) end.column;
|
18126
|
+
} else {
|
18127
|
+
column_end = (uint32_t) (newline_list->offsets[start.line] - newline_list->offsets[start.line - 1] - 1);
|
18128
|
+
}
|
18129
|
+
|
18130
|
+
// Ensure we have at least one column of error.
|
18131
|
+
if (((uint32_t) start.column) == column_end) column_end++;
|
18132
|
+
|
18133
|
+
errors[index] = (pm_error_t) {
|
18134
|
+
.error = error,
|
18135
|
+
.line = (uint32_t) start.line,
|
18136
|
+
.column_start = (uint32_t) start.column,
|
18137
|
+
.column_end = column_end
|
18138
|
+
};
|
18139
|
+
}
|
18140
|
+
|
18141
|
+
return errors;
|
18142
|
+
}
|
18143
|
+
|
18144
|
+
static inline void
|
18145
|
+
pm_parser_errors_format_line(const pm_parser_t *parser, const pm_newline_list_t *newline_list, const char *number_prefix, size_t line, pm_buffer_t *buffer) {
|
18146
|
+
const uint8_t *start = &parser->start[newline_list->offsets[line - 1]];
|
18147
|
+
const uint8_t *end;
|
18148
|
+
|
18149
|
+
if (line >= newline_list->size) {
|
18150
|
+
end = parser->end;
|
18151
|
+
} else {
|
18152
|
+
end = &parser->start[newline_list->offsets[line]];
|
18153
|
+
}
|
18154
|
+
|
18155
|
+
pm_buffer_append_format(buffer, number_prefix, (uint32_t) line);
|
18156
|
+
pm_buffer_append_string(buffer, (const char *) start, (size_t) (end - start));
|
18157
|
+
|
18158
|
+
if (end == parser->end && end[-1] != '\n') {
|
18159
|
+
pm_buffer_append_string(buffer, "\n", 1);
|
18160
|
+
}
|
18161
|
+
}
|
18162
|
+
|
18163
|
+
/**
|
18164
|
+
* Format the errors on the parser into the given buffer.
|
18165
|
+
*/
|
18166
|
+
PRISM_EXPORTED_FUNCTION void
|
18167
|
+
pm_parser_errors_format(const pm_parser_t *parser, pm_buffer_t *buffer, bool colorize) {
|
18168
|
+
const pm_list_t *error_list = &parser->error_list;
|
18169
|
+
assert(error_list->size != 0);
|
18170
|
+
|
18171
|
+
// First, we're going to sort all of the errors by line number using an
|
18172
|
+
// insertion sort into a newly allocated array.
|
18173
|
+
const pm_newline_list_t *newline_list = &parser->newline_list;
|
18174
|
+
pm_error_t *errors = pm_parser_errors_format_sort(error_list, newline_list);
|
18175
|
+
|
18176
|
+
// Now we're going to determine how we're going to format line numbers and
|
18177
|
+
// blank lines based on the maximum number of digits in the line numbers
|
18178
|
+
// that are going to be displayed.
|
18179
|
+
pm_error_format_t error_format;
|
18180
|
+
size_t max_line_number = errors[error_list->size - 1].line;
|
18181
|
+
|
18182
|
+
if (max_line_number < 10) {
|
18183
|
+
if (colorize) {
|
18184
|
+
error_format = (pm_error_format_t) {
|
18185
|
+
.number_prefix = PM_COLOR_GRAY "%1" PRIu32 " | " PM_COLOR_RESET,
|
18186
|
+
.blank_prefix = PM_COLOR_GRAY " | " PM_COLOR_RESET,
|
18187
|
+
.divider = PM_COLOR_GRAY " ~~~~~" PM_COLOR_RESET "\n"
|
18188
|
+
};
|
18189
|
+
} else {
|
18190
|
+
error_format = (pm_error_format_t) {
|
18191
|
+
.number_prefix = "%1" PRIu32 " | ",
|
18192
|
+
.blank_prefix = " | ",
|
18193
|
+
.divider = " ~~~~~\n"
|
18194
|
+
};
|
18195
|
+
}
|
18196
|
+
} else if (max_line_number < 100) {
|
18197
|
+
if (colorize) {
|
18198
|
+
error_format = (pm_error_format_t) {
|
18199
|
+
.number_prefix = PM_COLOR_GRAY "%2" PRIu32 " | " PM_COLOR_RESET,
|
18200
|
+
.blank_prefix = PM_COLOR_GRAY " | " PM_COLOR_RESET,
|
18201
|
+
.divider = PM_COLOR_GRAY " ~~~~~~" PM_COLOR_RESET "\n"
|
18202
|
+
};
|
18203
|
+
} else {
|
18204
|
+
error_format = (pm_error_format_t) {
|
18205
|
+
.number_prefix = "%2" PRIu32 " | ",
|
18206
|
+
.blank_prefix = " | ",
|
18207
|
+
.divider = " ~~~~~~\n"
|
18208
|
+
};
|
18209
|
+
}
|
18210
|
+
} else if (max_line_number < 1000) {
|
18211
|
+
if (colorize) {
|
18212
|
+
error_format = (pm_error_format_t) {
|
18213
|
+
.number_prefix = PM_COLOR_GRAY "%3" PRIu32 " | " PM_COLOR_RESET,
|
18214
|
+
.blank_prefix = PM_COLOR_GRAY " | " PM_COLOR_RESET,
|
18215
|
+
.divider = PM_COLOR_GRAY " ~~~~~~~" PM_COLOR_RESET "\n"
|
18216
|
+
};
|
18217
|
+
} else {
|
18218
|
+
error_format = (pm_error_format_t) {
|
18219
|
+
.number_prefix = "%3" PRIu32 " | ",
|
18220
|
+
.blank_prefix = " | ",
|
18221
|
+
.divider = " ~~~~~~~\n"
|
18222
|
+
};
|
18223
|
+
}
|
18224
|
+
} else if (max_line_number < 10000) {
|
18225
|
+
if (colorize) {
|
18226
|
+
error_format = (pm_error_format_t) {
|
18227
|
+
.number_prefix = PM_COLOR_GRAY "%4" PRIu32 " | " PM_COLOR_RESET,
|
18228
|
+
.blank_prefix = PM_COLOR_GRAY " | " PM_COLOR_RESET,
|
18229
|
+
.divider = PM_COLOR_GRAY " ~~~~~~~~" PM_COLOR_RESET "\n"
|
18230
|
+
};
|
18231
|
+
} else {
|
18232
|
+
error_format = (pm_error_format_t) {
|
18233
|
+
.number_prefix = "%4" PRIu32 " | ",
|
18234
|
+
.blank_prefix = " | ",
|
18235
|
+
.divider = " ~~~~~~~~\n"
|
18236
|
+
};
|
18237
|
+
}
|
18238
|
+
} else {
|
18239
|
+
if (colorize) {
|
18240
|
+
error_format = (pm_error_format_t) {
|
18241
|
+
.number_prefix = PM_COLOR_GRAY "%5" PRIu32 " | " PM_COLOR_RESET,
|
18242
|
+
.blank_prefix = PM_COLOR_GRAY " | " PM_COLOR_RESET,
|
18243
|
+
.divider = PM_COLOR_GRAY " ~~~~~~~~" PM_COLOR_RESET "\n"
|
18244
|
+
};
|
18245
|
+
} else {
|
18246
|
+
error_format = (pm_error_format_t) {
|
18247
|
+
.number_prefix = "%5" PRIu32 " | ",
|
18248
|
+
.blank_prefix = " | ",
|
18249
|
+
.divider = " ~~~~~~~~\n"
|
18250
|
+
};
|
18251
|
+
}
|
18252
|
+
}
|
18253
|
+
|
18254
|
+
error_format.blank_prefix_length = strlen(error_format.blank_prefix);
|
18255
|
+
error_format.divider_length = strlen(error_format.divider);
|
18256
|
+
|
18257
|
+
// Now we're going to iterate through every error in our error list and
|
18258
|
+
// display it. While we're iterating, we will display some padding lines of
|
18259
|
+
// the source before the error to give some context. We'll be careful not to
|
18260
|
+
// display the same line twice in case the errors are close enough in the
|
18261
|
+
// source.
|
18262
|
+
uint32_t last_line = 0;
|
18263
|
+
const pm_encoding_t *encoding = parser->encoding;
|
18264
|
+
|
18265
|
+
for (size_t index = 0; index < error_list->size; index++) {
|
18266
|
+
pm_error_t *error = &errors[index];
|
18267
|
+
|
18268
|
+
// Here we determine how many lines of padding of the source to display,
|
18269
|
+
// based on the difference from the last line that was displayed.
|
18270
|
+
if (error->line - last_line > 1) {
|
18271
|
+
if (error->line - last_line > 2) {
|
18272
|
+
if ((index != 0) && (error->line - last_line > 3)) {
|
18273
|
+
pm_buffer_append_string(buffer, error_format.divider, error_format.divider_length);
|
18274
|
+
}
|
18275
|
+
|
18276
|
+
pm_buffer_append_string(buffer, " ", 2);
|
18277
|
+
pm_parser_errors_format_line(parser, newline_list, error_format.number_prefix, error->line - 2, buffer);
|
18278
|
+
}
|
18279
|
+
|
18280
|
+
pm_buffer_append_string(buffer, " ", 2);
|
18281
|
+
pm_parser_errors_format_line(parser, newline_list, error_format.number_prefix, error->line - 1, buffer);
|
18282
|
+
}
|
18283
|
+
|
18284
|
+
// If this is the first error or we're on a new line, then we'll display
|
18285
|
+
// the line that has the error in it.
|
18286
|
+
if ((index == 0) || (error->line != last_line)) {
|
18287
|
+
if (colorize) {
|
18288
|
+
pm_buffer_append_string(buffer, PM_COLOR_RED "> " PM_COLOR_RESET, 13);
|
18289
|
+
} else {
|
18290
|
+
pm_buffer_append_string(buffer, "> ", 2);
|
18291
|
+
}
|
18292
|
+
pm_parser_errors_format_line(parser, newline_list, error_format.number_prefix, error->line, buffer);
|
18293
|
+
}
|
18294
|
+
|
18295
|
+
// Now we'll display the actual error message. We'll do this by first
|
18296
|
+
// putting the prefix to the line, then a bunch of blank spaces
|
18297
|
+
// depending on the column, then as many carets as we need to display
|
18298
|
+
// the width of the error, then the error message itself.
|
18299
|
+
//
|
18300
|
+
// Note that this doesn't take into account the width of the actual
|
18301
|
+
// character when displayed in the terminal. For some east-asian
|
18302
|
+
// languages or emoji, this means it can be thrown off pretty badly. We
|
18303
|
+
// will need to solve this eventually.
|
18304
|
+
pm_buffer_append_string(buffer, " ", 2);
|
18305
|
+
pm_buffer_append_string(buffer, error_format.blank_prefix, error_format.blank_prefix_length);
|
18306
|
+
|
18307
|
+
size_t column = 0;
|
18308
|
+
const uint8_t *start = &parser->start[newline_list->offsets[error->line - 1]];
|
18309
|
+
|
18310
|
+
while (column < error->column_end) {
|
18311
|
+
if (column < error->column_start) {
|
18312
|
+
pm_buffer_append_byte(buffer, ' ');
|
18313
|
+
} else if (colorize) {
|
18314
|
+
pm_buffer_append_string(buffer, PM_COLOR_RED "^" PM_COLOR_RESET, 12);
|
18315
|
+
} else {
|
18316
|
+
pm_buffer_append_byte(buffer, '^');
|
18317
|
+
}
|
18318
|
+
|
18319
|
+
size_t char_width = encoding->char_width(start + column, parser->end - (start + column));
|
18320
|
+
column += (char_width == 0 ? 1 : char_width);
|
18321
|
+
}
|
18322
|
+
|
18323
|
+
pm_buffer_append_byte(buffer, ' ');
|
18324
|
+
|
18325
|
+
const char *message = error->error->message;
|
18326
|
+
pm_buffer_append_string(buffer, message, strlen(message));
|
18327
|
+
pm_buffer_append_byte(buffer, '\n');
|
18328
|
+
|
18329
|
+
// Here we determine how many lines of padding to display after the
|
18330
|
+
// error, depending on where the next error is in source.
|
18331
|
+
last_line = error->line;
|
18332
|
+
size_t next_line = (index == error_list->size - 1) ? newline_list->size : errors[index + 1].line;
|
18333
|
+
|
18334
|
+
if (next_line - last_line > 1) {
|
18335
|
+
pm_buffer_append_string(buffer, " ", 2);
|
18336
|
+
pm_parser_errors_format_line(parser, newline_list, error_format.number_prefix, ++last_line, buffer);
|
18337
|
+
}
|
18338
|
+
|
18339
|
+
if (next_line - last_line > 1) {
|
18340
|
+
pm_buffer_append_string(buffer, " ", 2);
|
18341
|
+
pm_parser_errors_format_line(parser, newline_list, error_format.number_prefix, ++last_line, buffer);
|
18342
|
+
}
|
18343
|
+
}
|
18344
|
+
|
18345
|
+
// Finally, we'll free the array of errors that we allocated.
|
18346
|
+
free(errors);
|
18347
|
+
}
|
18348
|
+
|
18349
|
+
#undef PM_COLOR_GRAY
|
18350
|
+
#undef PM_COLOR_RED
|
18351
|
+
#undef PM_COLOR_RESET
|