prism 0.19.0 → 0.21.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +48 -1
- data/Makefile +5 -0
- data/README.md +8 -6
- data/config.yml +236 -38
- data/docs/build_system.md +19 -2
- data/docs/cruby_compilation.md +27 -0
- data/docs/parser_translation.md +34 -0
- data/docs/parsing_rules.md +19 -0
- data/docs/releasing.md +3 -3
- data/docs/ruby_api.md +1 -1
- data/docs/serialization.md +17 -5
- data/ext/prism/api_node.c +101 -81
- data/ext/prism/extension.c +74 -11
- data/ext/prism/extension.h +1 -1
- data/include/prism/ast.h +1700 -505
- data/include/prism/defines.h +8 -0
- data/include/prism/diagnostic.h +39 -2
- data/include/prism/encoding.h +10 -0
- data/include/prism/options.h +40 -14
- data/include/prism/parser.h +34 -18
- data/include/prism/util/pm_buffer.h +9 -0
- data/include/prism/util/pm_constant_pool.h +18 -0
- data/include/prism/util/pm_newline_list.h +0 -11
- data/include/prism/version.h +2 -2
- data/include/prism.h +19 -2
- data/lib/prism/debug.rb +11 -5
- data/lib/prism/dot_visitor.rb +36 -14
- data/lib/prism/dsl.rb +22 -22
- data/lib/prism/ffi.rb +2 -2
- data/lib/prism/node.rb +1020 -737
- data/lib/prism/node_ext.rb +2 -2
- data/lib/prism/parse_result.rb +17 -9
- data/lib/prism/serialize.rb +53 -29
- data/lib/prism/translation/parser/compiler.rb +1828 -0
- data/lib/prism/translation/parser/lexer.rb +335 -0
- data/lib/prism/translation/parser/rubocop.rb +37 -0
- data/lib/prism/translation/parser.rb +171 -0
- data/lib/prism/translation.rb +11 -0
- data/lib/prism.rb +1 -0
- data/prism.gemspec +12 -5
- data/rbi/prism.rbi +150 -88
- data/rbi/prism_static.rbi +15 -3
- data/sig/prism.rbs +996 -961
- data/sig/prism_static.rbs +123 -46
- data/src/diagnostic.c +259 -219
- data/src/encoding.c +5 -9
- data/src/node.c +2 -6
- data/src/options.c +24 -5
- data/src/prettyprint.c +174 -42
- data/src/prism.c +1344 -479
- data/src/serialize.c +12 -9
- data/src/token_type.c +353 -4
- data/src/util/pm_buffer.c +11 -0
- data/src/util/pm_constant_pool.c +37 -11
- data/src/util/pm_newline_list.c +2 -14
- metadata +10 -3
- data/docs/building.md +0 -29
data/src/prism.c
CHANGED
@@ -164,7 +164,7 @@ debug_state(pm_parser_t *parser) {
|
|
164
164
|
|
165
165
|
PRISM_ATTRIBUTE_UNUSED static void
|
166
166
|
debug_token(pm_token_t * token) {
|
167
|
-
fprintf(stderr, "%s: \"%.*s\"\n",
|
167
|
+
fprintf(stderr, "%s: \"%.*s\"\n", pm_token_type_human(token->type), (int) (token->end - token->start), token->start);
|
168
168
|
}
|
169
169
|
|
170
170
|
#endif
|
@@ -423,6 +423,11 @@ lex_state_beg_p(pm_parser_t *parser) {
|
|
423
423
|
return lex_state_p(parser, PM_LEX_STATE_BEG_ANY) || ((parser->lex_state & (PM_LEX_STATE_ARG | PM_LEX_STATE_LABELED)) == (PM_LEX_STATE_ARG | PM_LEX_STATE_LABELED));
|
424
424
|
}
|
425
425
|
|
426
|
+
static inline bool
|
427
|
+
lex_state_arg_labeled_p(pm_parser_t *parser) {
|
428
|
+
return (parser->lex_state & (PM_LEX_STATE_ARG | PM_LEX_STATE_LABELED)) == (PM_LEX_STATE_ARG | PM_LEX_STATE_LABELED);
|
429
|
+
}
|
430
|
+
|
426
431
|
static inline bool
|
427
432
|
lex_state_arg_p(pm_parser_t *parser) {
|
428
433
|
return lex_state_p(parser, PM_LEX_STATE_ARG_ANY);
|
@@ -548,9 +553,7 @@ pm_parser_err_token(pm_parser_t *parser, const pm_token_t *token, pm_diagnostic_
|
|
548
553
|
*/
|
549
554
|
static inline void
|
550
555
|
pm_parser_warn(pm_parser_t *parser, const uint8_t *start, const uint8_t *end, pm_diagnostic_id_t diag_id) {
|
551
|
-
|
552
|
-
pm_diagnostic_list_append(&parser->warning_list, start, end, diag_id);
|
553
|
-
}
|
556
|
+
pm_diagnostic_list_append(&parser->warning_list, start, end, diag_id);
|
554
557
|
}
|
555
558
|
|
556
559
|
/**
|
@@ -813,6 +816,9 @@ typedef struct {
|
|
813
816
|
|
814
817
|
/** The optional block attached to the call. */
|
815
818
|
pm_node_t *block;
|
819
|
+
|
820
|
+
/** The flag indicating whether this arguments list has forwarding argument. */
|
821
|
+
bool has_forwarding;
|
816
822
|
} pm_arguments_t;
|
817
823
|
|
818
824
|
/**
|
@@ -864,6 +870,105 @@ pm_arguments_validate_block(pm_parser_t *parser, pm_arguments_t *arguments, pm_b
|
|
864
870
|
pm_parser_err_node(parser, (pm_node_t *) block, PM_ERR_ARGUMENT_UNEXPECTED_BLOCK);
|
865
871
|
}
|
866
872
|
|
873
|
+
/******************************************************************************/
|
874
|
+
/* Basic character checks */
|
875
|
+
/******************************************************************************/
|
876
|
+
|
877
|
+
/**
|
878
|
+
* This function is used extremely frequently to lex all of the identifiers in a
|
879
|
+
* source file, so it's important that it be as fast as possible. For this
|
880
|
+
* reason we have the encoding_changed boolean to check if we need to go through
|
881
|
+
* the function pointer or can just directly use the UTF-8 functions.
|
882
|
+
*/
|
883
|
+
static inline size_t
|
884
|
+
char_is_identifier_start(const pm_parser_t *parser, const uint8_t *b) {
|
885
|
+
if (parser->encoding_changed) {
|
886
|
+
size_t width;
|
887
|
+
if ((width = parser->encoding->alpha_char(b, parser->end - b)) != 0) {
|
888
|
+
return width;
|
889
|
+
} else if (*b == '_') {
|
890
|
+
return 1;
|
891
|
+
} else if (*b >= 0x80) {
|
892
|
+
return parser->encoding->char_width(b, parser->end - b);
|
893
|
+
} else {
|
894
|
+
return 0;
|
895
|
+
}
|
896
|
+
} else if (*b < 0x80) {
|
897
|
+
return (pm_encoding_unicode_table[*b] & PRISM_ENCODING_ALPHABETIC_BIT ? 1 : 0) || (*b == '_');
|
898
|
+
} else {
|
899
|
+
return pm_encoding_utf_8_char_width(b, parser->end - b);
|
900
|
+
}
|
901
|
+
}
|
902
|
+
|
903
|
+
/**
|
904
|
+
* Similar to char_is_identifier but this function assumes that the encoding
|
905
|
+
* has not been changed.
|
906
|
+
*/
|
907
|
+
static inline size_t
|
908
|
+
char_is_identifier_utf8(const uint8_t *b, const uint8_t *end) {
|
909
|
+
if (*b < 0x80) {
|
910
|
+
return (*b == '_') || (pm_encoding_unicode_table[*b] & PRISM_ENCODING_ALPHANUMERIC_BIT ? 1 : 0);
|
911
|
+
} else {
|
912
|
+
return pm_encoding_utf_8_char_width(b, end - b);
|
913
|
+
}
|
914
|
+
}
|
915
|
+
|
916
|
+
/**
|
917
|
+
* Like the above, this function is also used extremely frequently to lex all of
|
918
|
+
* the identifiers in a source file once the first character has been found. So
|
919
|
+
* it's important that it be as fast as possible.
|
920
|
+
*/
|
921
|
+
static inline size_t
|
922
|
+
char_is_identifier(pm_parser_t *parser, const uint8_t *b) {
|
923
|
+
if (parser->encoding_changed) {
|
924
|
+
size_t width;
|
925
|
+
if ((width = parser->encoding->alnum_char(b, parser->end - b)) != 0) {
|
926
|
+
return width;
|
927
|
+
} else if (*b == '_') {
|
928
|
+
return 1;
|
929
|
+
} else if (*b >= 0x80) {
|
930
|
+
return parser->encoding->char_width(b, parser->end - b);
|
931
|
+
} else {
|
932
|
+
return 0;
|
933
|
+
}
|
934
|
+
}
|
935
|
+
return char_is_identifier_utf8(b, parser->end);
|
936
|
+
}
|
937
|
+
|
938
|
+
// Here we're defining a perfect hash for the characters that are allowed in
|
939
|
+
// global names. This is used to quickly check the next character after a $ to
|
940
|
+
// see if it's a valid character for a global name.
|
941
|
+
#define BIT(c, idx) (((c) / 32 - 1 == idx) ? (1U << ((c) % 32)) : 0)
|
942
|
+
#define PUNCT(idx) ( \
|
943
|
+
BIT('~', idx) | BIT('*', idx) | BIT('$', idx) | BIT('?', idx) | \
|
944
|
+
BIT('!', idx) | BIT('@', idx) | BIT('/', idx) | BIT('\\', idx) | \
|
945
|
+
BIT(';', idx) | BIT(',', idx) | BIT('.', idx) | BIT('=', idx) | \
|
946
|
+
BIT(':', idx) | BIT('<', idx) | BIT('>', idx) | BIT('\"', idx) | \
|
947
|
+
BIT('&', idx) | BIT('`', idx) | BIT('\'', idx) | BIT('+', idx) | \
|
948
|
+
BIT('0', idx))
|
949
|
+
|
950
|
+
const unsigned int pm_global_name_punctuation_hash[(0x7e - 0x20 + 31) / 32] = { PUNCT(0), PUNCT(1), PUNCT(2) };
|
951
|
+
|
952
|
+
#undef BIT
|
953
|
+
#undef PUNCT
|
954
|
+
|
955
|
+
static inline bool
|
956
|
+
char_is_global_name_punctuation(const uint8_t b) {
|
957
|
+
const unsigned int i = (const unsigned int) b;
|
958
|
+
if (i <= 0x20 || 0x7e < i) return false;
|
959
|
+
|
960
|
+
return (pm_global_name_punctuation_hash[(i - 0x20) / 32] >> (i % 32)) & 1;
|
961
|
+
}
|
962
|
+
|
963
|
+
static inline bool
|
964
|
+
token_is_setter_name(pm_token_t *token) {
|
965
|
+
return (
|
966
|
+
(token->type == PM_TOKEN_IDENTIFIER) &&
|
967
|
+
(token->end - token->start >= 2) &&
|
968
|
+
(token->end[-1] == '=')
|
969
|
+
);
|
970
|
+
}
|
971
|
+
|
867
972
|
/******************************************************************************/
|
868
973
|
/* Node flag handling functions */
|
869
974
|
/******************************************************************************/
|
@@ -884,6 +989,22 @@ pm_node_flag_unset(pm_node_t *node, pm_node_flags_t flag) {
|
|
884
989
|
node->flags &= (pm_node_flags_t) ~flag;
|
885
990
|
}
|
886
991
|
|
992
|
+
/**
|
993
|
+
* Set the repeated parameter flag on the given node.
|
994
|
+
*/
|
995
|
+
static inline void
|
996
|
+
pm_node_flag_set_repeated_parameter(pm_node_t *node) {
|
997
|
+
assert(PM_NODE_TYPE(node) == PM_BLOCK_LOCAL_VARIABLE_NODE ||
|
998
|
+
PM_NODE_TYPE(node) == PM_BLOCK_PARAMETER_NODE ||
|
999
|
+
PM_NODE_TYPE(node) == PM_KEYWORD_REST_PARAMETER_NODE ||
|
1000
|
+
PM_NODE_TYPE(node) == PM_OPTIONAL_KEYWORD_PARAMETER_NODE ||
|
1001
|
+
PM_NODE_TYPE(node) == PM_OPTIONAL_PARAMETER_NODE ||
|
1002
|
+
PM_NODE_TYPE(node) == PM_REQUIRED_KEYWORD_PARAMETER_NODE ||
|
1003
|
+
PM_NODE_TYPE(node) == PM_REQUIRED_PARAMETER_NODE ||
|
1004
|
+
PM_NODE_TYPE(node) == PM_REST_PARAMETER_NODE);
|
1005
|
+
|
1006
|
+
pm_node_flag_set(node, PM_PARAMETER_FLAGS_REPEATED_PARAMETER);
|
1007
|
+
}
|
887
1008
|
|
888
1009
|
/******************************************************************************/
|
889
1010
|
/* Node creation functions */
|
@@ -977,7 +1098,7 @@ static inline void *
|
|
977
1098
|
pm_alloc_node(PRISM_ATTRIBUTE_UNUSED pm_parser_t *parser, size_t size) {
|
978
1099
|
void *memory = calloc(1, size);
|
979
1100
|
if (memory == NULL) {
|
980
|
-
fprintf(stderr, "Failed to allocate %
|
1101
|
+
fprintf(stderr, "Failed to allocate %d bytes\n", (int) size);
|
981
1102
|
abort();
|
982
1103
|
}
|
983
1104
|
return memory;
|
@@ -1325,7 +1446,7 @@ pm_assoc_node_create(pm_parser_t *parser, pm_node_t *key, const pm_token_t *oper
|
|
1325
1446
|
pm_assoc_node_t *node = PM_ALLOC_NODE(parser, pm_assoc_node_t);
|
1326
1447
|
const uint8_t *end;
|
1327
1448
|
|
1328
|
-
if (value != NULL) {
|
1449
|
+
if (value != NULL && value->location.end > key->location.end) {
|
1329
1450
|
end = value->location.end;
|
1330
1451
|
} else if (operator->type != PM_TOKEN_NOT_PROVIDED) {
|
1331
1452
|
end = operator->end;
|
@@ -1333,6 +1454,13 @@ pm_assoc_node_create(pm_parser_t *parser, pm_node_t *key, const pm_token_t *oper
|
|
1333
1454
|
end = key->location.end;
|
1334
1455
|
}
|
1335
1456
|
|
1457
|
+
// Hash string keys will be frozen, so we can mark them as frozen here so
|
1458
|
+
// that the compiler picks them up and also when we check for static literal
|
1459
|
+
// on the keys it gets factored in.
|
1460
|
+
if (PM_NODE_TYPE_P(key, PM_STRING_NODE)) {
|
1461
|
+
key->flags |= PM_STRING_FLAGS_FROZEN | PM_NODE_FLAG_STATIC_LITERAL;
|
1462
|
+
}
|
1463
|
+
|
1336
1464
|
// If the key and value of this assoc node are both static literals, then
|
1337
1465
|
// we can mark this node as a static literal.
|
1338
1466
|
pm_node_flags_t flags = 0;
|
@@ -1490,7 +1618,7 @@ pm_block_argument_node_create(pm_parser_t *parser, const pm_token_t *operator, p
|
|
1490
1618
|
* Allocate and initialize a new BlockNode node.
|
1491
1619
|
*/
|
1492
1620
|
static pm_block_node_t *
|
1493
|
-
pm_block_node_create(pm_parser_t *parser, pm_constant_id_list_t *locals,
|
1621
|
+
pm_block_node_create(pm_parser_t *parser, pm_constant_id_list_t *locals, const pm_token_t *opening, pm_node_t *parameters, pm_node_t *body, const pm_token_t *closing) {
|
1494
1622
|
pm_block_node_t *node = PM_ALLOC_NODE(parser, pm_block_node_t);
|
1495
1623
|
|
1496
1624
|
*node = (pm_block_node_t) {
|
@@ -1499,7 +1627,6 @@ pm_block_node_create(pm_parser_t *parser, pm_constant_id_list_t *locals, uint32_
|
|
1499
1627
|
.location = { .start = opening->start, .end = closing->end },
|
1500
1628
|
},
|
1501
1629
|
.locals = *locals,
|
1502
|
-
.locals_body_index = locals_body_index,
|
1503
1630
|
.parameters = parameters,
|
1504
1631
|
.body = body,
|
1505
1632
|
.opening_loc = PM_LOCATION_TOKEN_VALUE(opening),
|
@@ -1645,12 +1772,13 @@ pm_break_node_create(pm_parser_t *parser, const pm_token_t *keyword, pm_argument
|
|
1645
1772
|
* in the various specializations of this function.
|
1646
1773
|
*/
|
1647
1774
|
static pm_call_node_t *
|
1648
|
-
pm_call_node_create(pm_parser_t *parser) {
|
1775
|
+
pm_call_node_create(pm_parser_t *parser, pm_node_flags_t flags) {
|
1649
1776
|
pm_call_node_t *node = PM_ALLOC_NODE(parser, pm_call_node_t);
|
1650
1777
|
|
1651
1778
|
*node = (pm_call_node_t) {
|
1652
1779
|
{
|
1653
1780
|
.type = PM_CALL_NODE,
|
1781
|
+
.flags = flags,
|
1654
1782
|
.location = PM_LOCATION_NULL_VALUE(parser),
|
1655
1783
|
},
|
1656
1784
|
.receiver = NULL,
|
@@ -1666,6 +1794,15 @@ pm_call_node_create(pm_parser_t *parser) {
|
|
1666
1794
|
return node;
|
1667
1795
|
}
|
1668
1796
|
|
1797
|
+
/**
|
1798
|
+
* Returns the value that the ignore visibility flag should be set to for the
|
1799
|
+
* given receiver.
|
1800
|
+
*/
|
1801
|
+
static inline pm_node_flags_t
|
1802
|
+
pm_call_node_ignore_visibility_flag(const pm_node_t *receiver) {
|
1803
|
+
return PM_NODE_TYPE_P(receiver, PM_SELF_NODE) ? PM_CALL_NODE_FLAGS_IGNORE_VISIBILITY : 0;
|
1804
|
+
}
|
1805
|
+
|
1669
1806
|
/**
|
1670
1807
|
* Allocate and initialize a new CallNode node from an aref or an aset
|
1671
1808
|
* expression.
|
@@ -1674,7 +1811,7 @@ static pm_call_node_t *
|
|
1674
1811
|
pm_call_node_aref_create(pm_parser_t *parser, pm_node_t *receiver, pm_arguments_t *arguments) {
|
1675
1812
|
pm_assert_value_expression(parser, receiver);
|
1676
1813
|
|
1677
|
-
pm_call_node_t *node = pm_call_node_create(parser);
|
1814
|
+
pm_call_node_t *node = pm_call_node_create(parser, pm_call_node_ignore_visibility_flag(receiver));
|
1678
1815
|
|
1679
1816
|
node->base.location.start = receiver->location.start;
|
1680
1817
|
node->base.location.end = pm_arguments_end(arguments);
|
@@ -1700,7 +1837,7 @@ pm_call_node_binary_create(pm_parser_t *parser, pm_node_t *receiver, pm_token_t
|
|
1700
1837
|
pm_assert_value_expression(parser, receiver);
|
1701
1838
|
pm_assert_value_expression(parser, argument);
|
1702
1839
|
|
1703
|
-
pm_call_node_t *node = pm_call_node_create(parser);
|
1840
|
+
pm_call_node_t *node = pm_call_node_create(parser, pm_call_node_ignore_visibility_flag(receiver));
|
1704
1841
|
|
1705
1842
|
node->base.location.start = MIN(receiver->location.start, argument->location.start);
|
1706
1843
|
node->base.location.end = MAX(receiver->location.end, argument->location.end);
|
@@ -1723,7 +1860,7 @@ static pm_call_node_t *
|
|
1723
1860
|
pm_call_node_call_create(pm_parser_t *parser, pm_node_t *receiver, pm_token_t *operator, pm_token_t *message, pm_arguments_t *arguments) {
|
1724
1861
|
pm_assert_value_expression(parser, receiver);
|
1725
1862
|
|
1726
|
-
pm_call_node_t *node = pm_call_node_create(parser);
|
1863
|
+
pm_call_node_t *node = pm_call_node_create(parser, pm_call_node_ignore_visibility_flag(receiver));
|
1727
1864
|
|
1728
1865
|
node->base.location.start = receiver->location.start;
|
1729
1866
|
const uint8_t *end = pm_arguments_end(arguments);
|
@@ -1754,7 +1891,7 @@ pm_call_node_call_create(pm_parser_t *parser, pm_node_t *receiver, pm_token_t *o
|
|
1754
1891
|
*/
|
1755
1892
|
static pm_call_node_t *
|
1756
1893
|
pm_call_node_fcall_create(pm_parser_t *parser, pm_token_t *message, pm_arguments_t *arguments) {
|
1757
|
-
pm_call_node_t *node = pm_call_node_create(parser);
|
1894
|
+
pm_call_node_t *node = pm_call_node_create(parser, PM_CALL_NODE_FLAGS_IGNORE_VISIBILITY);
|
1758
1895
|
|
1759
1896
|
node->base.location.start = message->start;
|
1760
1897
|
node->base.location.end = pm_arguments_end(arguments);
|
@@ -1776,7 +1913,7 @@ static pm_call_node_t *
|
|
1776
1913
|
pm_call_node_not_create(pm_parser_t *parser, pm_node_t *receiver, pm_token_t *message, pm_arguments_t *arguments) {
|
1777
1914
|
pm_assert_value_expression(parser, receiver);
|
1778
1915
|
|
1779
|
-
pm_call_node_t *node = pm_call_node_create(parser);
|
1916
|
+
pm_call_node_t *node = pm_call_node_create(parser, receiver == NULL ? 0 : pm_call_node_ignore_visibility_flag(receiver));
|
1780
1917
|
|
1781
1918
|
node->base.location.start = message->start;
|
1782
1919
|
if (arguments->closing_loc.start != NULL) {
|
@@ -1802,7 +1939,7 @@ static pm_call_node_t *
|
|
1802
1939
|
pm_call_node_shorthand_create(pm_parser_t *parser, pm_node_t *receiver, pm_token_t *operator, pm_arguments_t *arguments) {
|
1803
1940
|
pm_assert_value_expression(parser, receiver);
|
1804
1941
|
|
1805
|
-
pm_call_node_t *node = pm_call_node_create(parser);
|
1942
|
+
pm_call_node_t *node = pm_call_node_create(parser, pm_call_node_ignore_visibility_flag(receiver));
|
1806
1943
|
|
1807
1944
|
node->base.location.start = receiver->location.start;
|
1808
1945
|
node->base.location.end = pm_arguments_end(arguments);
|
@@ -1829,7 +1966,7 @@ static pm_call_node_t *
|
|
1829
1966
|
pm_call_node_unary_create(pm_parser_t *parser, pm_token_t *operator, pm_node_t *receiver, const char *name) {
|
1830
1967
|
pm_assert_value_expression(parser, receiver);
|
1831
1968
|
|
1832
|
-
pm_call_node_t *node = pm_call_node_create(parser);
|
1969
|
+
pm_call_node_t *node = pm_call_node_create(parser, pm_call_node_ignore_visibility_flag(receiver));
|
1833
1970
|
|
1834
1971
|
node->base.location.start = operator->start;
|
1835
1972
|
node->base.location.end = receiver->location.end;
|
@@ -1847,7 +1984,7 @@ pm_call_node_unary_create(pm_parser_t *parser, pm_token_t *operator, pm_node_t *
|
|
1847
1984
|
*/
|
1848
1985
|
static pm_call_node_t *
|
1849
1986
|
pm_call_node_variable_call_create(pm_parser_t *parser, pm_token_t *message) {
|
1850
|
-
pm_call_node_t *node = pm_call_node_create(parser);
|
1987
|
+
pm_call_node_t *node = pm_call_node_create(parser, PM_CALL_NODE_FLAGS_IGNORE_VISIBILITY);
|
1851
1988
|
|
1852
1989
|
node->base.location = PM_LOCATION_TOKEN_VALUE(message);
|
1853
1990
|
node->message_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(message);
|
@@ -1885,11 +2022,12 @@ pm_call_node_index_p(pm_call_node_t *node) {
|
|
1885
2022
|
* operator assignment.
|
1886
2023
|
*/
|
1887
2024
|
static inline bool
|
1888
|
-
pm_call_node_writable_p(pm_call_node_t *node) {
|
2025
|
+
pm_call_node_writable_p(const pm_parser_t *parser, const pm_call_node_t *node) {
|
1889
2026
|
return (
|
1890
2027
|
(node->message_loc.start != NULL) &&
|
1891
2028
|
(node->message_loc.end[-1] != '!') &&
|
1892
2029
|
(node->message_loc.end[-1] != '?') &&
|
2030
|
+
char_is_identifier_start(parser, node->message_loc.start) &&
|
1893
2031
|
(node->opening_loc.start == NULL) &&
|
1894
2032
|
(node->arguments == NULL) &&
|
1895
2033
|
(node->block == NULL)
|
@@ -2167,11 +2305,12 @@ pm_call_target_node_create(pm_parser_t *parser, pm_call_node_t *target) {
|
|
2167
2305
|
static pm_index_target_node_t *
|
2168
2306
|
pm_index_target_node_create(pm_parser_t *parser, pm_call_node_t *target) {
|
2169
2307
|
pm_index_target_node_t *node = PM_ALLOC_NODE(parser, pm_index_target_node_t);
|
2308
|
+
pm_node_flags_t flags = target->base.flags;
|
2170
2309
|
|
2171
2310
|
*node = (pm_index_target_node_t) {
|
2172
2311
|
{
|
2173
2312
|
.type = PM_INDEX_TARGET_NODE,
|
2174
|
-
.flags =
|
2313
|
+
.flags = flags | PM_CALL_NODE_FLAGS_ATTRIBUTE_WRITE,
|
2175
2314
|
.location = target->base.location
|
2176
2315
|
},
|
2177
2316
|
.receiver = target->receiver,
|
@@ -2701,6 +2840,50 @@ pm_constant_write_node_create(pm_parser_t *parser, pm_constant_read_node_t *targ
|
|
2701
2840
|
return node;
|
2702
2841
|
}
|
2703
2842
|
|
2843
|
+
/**
|
2844
|
+
* Check if the receiver of a `def` node is allowed.
|
2845
|
+
*/
|
2846
|
+
static void
|
2847
|
+
pm_def_node_receiver_check(pm_parser_t *parser, const pm_node_t *node) {
|
2848
|
+
switch (PM_NODE_TYPE(node)) {
|
2849
|
+
case PM_BEGIN_NODE: {
|
2850
|
+
const pm_begin_node_t *cast = (pm_begin_node_t *) node;
|
2851
|
+
if (cast->statements != NULL) pm_def_node_receiver_check(parser, (pm_node_t *) cast->statements);
|
2852
|
+
break;
|
2853
|
+
}
|
2854
|
+
case PM_PARENTHESES_NODE: {
|
2855
|
+
const pm_parentheses_node_t *cast = (const pm_parentheses_node_t *) node;
|
2856
|
+
if (cast->body != NULL) pm_def_node_receiver_check(parser, cast->body);
|
2857
|
+
break;
|
2858
|
+
}
|
2859
|
+
case PM_STATEMENTS_NODE: {
|
2860
|
+
const pm_statements_node_t *cast = (const pm_statements_node_t *) node;
|
2861
|
+
pm_def_node_receiver_check(parser, cast->body.nodes[cast->body.size - 1]);
|
2862
|
+
break;
|
2863
|
+
}
|
2864
|
+
case PM_ARRAY_NODE:
|
2865
|
+
case PM_FLOAT_NODE:
|
2866
|
+
case PM_IMAGINARY_NODE:
|
2867
|
+
case PM_INTEGER_NODE:
|
2868
|
+
case PM_INTERPOLATED_REGULAR_EXPRESSION_NODE:
|
2869
|
+
case PM_INTERPOLATED_STRING_NODE:
|
2870
|
+
case PM_INTERPOLATED_SYMBOL_NODE:
|
2871
|
+
case PM_INTERPOLATED_X_STRING_NODE:
|
2872
|
+
case PM_RATIONAL_NODE:
|
2873
|
+
case PM_REGULAR_EXPRESSION_NODE:
|
2874
|
+
case PM_SOURCE_ENCODING_NODE:
|
2875
|
+
case PM_SOURCE_FILE_NODE:
|
2876
|
+
case PM_SOURCE_LINE_NODE:
|
2877
|
+
case PM_STRING_NODE:
|
2878
|
+
case PM_SYMBOL_NODE:
|
2879
|
+
case PM_X_STRING_NODE:
|
2880
|
+
pm_parser_err_node(parser, node, PM_ERR_SINGLETON_FOR_LITERALS);
|
2881
|
+
break;
|
2882
|
+
default:
|
2883
|
+
break;
|
2884
|
+
}
|
2885
|
+
}
|
2886
|
+
|
2704
2887
|
/**
|
2705
2888
|
* Allocate and initialize a new DefNode node.
|
2706
2889
|
*/
|
@@ -2712,7 +2895,6 @@ pm_def_node_create(
|
|
2712
2895
|
pm_parameters_node_t *parameters,
|
2713
2896
|
pm_node_t *body,
|
2714
2897
|
pm_constant_id_list_t *locals,
|
2715
|
-
uint32_t locals_body_index,
|
2716
2898
|
const pm_token_t *def_keyword,
|
2717
2899
|
const pm_token_t *operator,
|
2718
2900
|
const pm_token_t *lparen,
|
@@ -2729,6 +2911,10 @@ pm_def_node_create(
|
|
2729
2911
|
end = end_keyword->end;
|
2730
2912
|
}
|
2731
2913
|
|
2914
|
+
if ((receiver != NULL) && PM_NODE_TYPE_P(receiver, PM_PARENTHESES_NODE)) {
|
2915
|
+
pm_def_node_receiver_check(parser, receiver);
|
2916
|
+
}
|
2917
|
+
|
2732
2918
|
*node = (pm_def_node_t) {
|
2733
2919
|
{
|
2734
2920
|
.type = PM_DEF_NODE,
|
@@ -2740,7 +2926,6 @@ pm_def_node_create(
|
|
2740
2926
|
.parameters = parameters,
|
2741
2927
|
.body = body,
|
2742
2928
|
.locals = *locals,
|
2743
|
-
.locals_body_index = locals_body_index,
|
2744
2929
|
.def_keyword_loc = PM_LOCATION_TOKEN_VALUE(def_keyword),
|
2745
2930
|
.operator_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(operator),
|
2746
2931
|
.lparen_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(lparen),
|
@@ -3962,9 +4147,8 @@ pm_keyword_hash_node_create(pm_parser_t *parser) {
|
|
3962
4147
|
*/
|
3963
4148
|
static void
|
3964
4149
|
pm_keyword_hash_node_elements_append(pm_keyword_hash_node_t *hash, pm_node_t *element) {
|
3965
|
-
// If the element being added is not an AssocNode or does not have a symbol
|
3966
|
-
// we want to turn the
|
3967
|
-
// TODO: Rename the flag to SYMBOL_KEYS instead.
|
4150
|
+
// If the element being added is not an AssocNode or does not have a symbol
|
4151
|
+
// key, then we want to turn the SYMBOL_KEYS flag off.
|
3968
4152
|
if (!PM_NODE_TYPE_P(element, PM_ASSOC_NODE) || !PM_NODE_TYPE_P(((pm_assoc_node_t *) element)->key, PM_SYMBOL_NODE)) {
|
3969
4153
|
pm_node_flag_unset((pm_node_t *)hash, PM_KEYWORD_HASH_NODE_FLAGS_SYMBOL_KEYS);
|
3970
4154
|
}
|
@@ -4051,7 +4235,6 @@ static pm_lambda_node_t *
|
|
4051
4235
|
pm_lambda_node_create(
|
4052
4236
|
pm_parser_t *parser,
|
4053
4237
|
pm_constant_id_list_t *locals,
|
4054
|
-
uint32_t locals_body_index,
|
4055
4238
|
const pm_token_t *operator,
|
4056
4239
|
const pm_token_t *opening,
|
4057
4240
|
const pm_token_t *closing,
|
@@ -4069,7 +4252,6 @@ pm_lambda_node_create(
|
|
4069
4252
|
},
|
4070
4253
|
},
|
4071
4254
|
.locals = *locals,
|
4072
|
-
.locals_body_index = locals_body_index,
|
4073
4255
|
.operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
|
4074
4256
|
.opening_loc = PM_LOCATION_TOKEN_VALUE(opening),
|
4075
4257
|
.closing_loc = PM_LOCATION_TOKEN_VALUE(closing),
|
@@ -4161,12 +4343,10 @@ pm_local_variable_or_write_node_create(pm_parser_t *parser, pm_node_t *target, c
|
|
4161
4343
|
}
|
4162
4344
|
|
4163
4345
|
/**
|
4164
|
-
* Allocate a new LocalVariableReadNode node.
|
4346
|
+
* Allocate a new LocalVariableReadNode node with constant_id.
|
4165
4347
|
*/
|
4166
4348
|
static pm_local_variable_read_node_t *
|
4167
|
-
|
4168
|
-
pm_constant_id_t name_id = pm_parser_constant_id_token(parser, name);
|
4169
|
-
|
4349
|
+
pm_local_variable_read_node_create_constant_id(pm_parser_t *parser, const pm_token_t *name, pm_constant_id_t name_id, uint32_t depth) {
|
4170
4350
|
if (parser->current_param_name == name_id) {
|
4171
4351
|
pm_parser_err_token(parser, name, PM_ERR_PARAMETER_CIRCULAR);
|
4172
4352
|
}
|
@@ -4185,6 +4365,15 @@ pm_local_variable_read_node_create(pm_parser_t *parser, const pm_token_t *name,
|
|
4185
4365
|
return node;
|
4186
4366
|
}
|
4187
4367
|
|
4368
|
+
/**
|
4369
|
+
* Allocate a new LocalVariableReadNode node.
|
4370
|
+
*/
|
4371
|
+
static pm_local_variable_read_node_t *
|
4372
|
+
pm_local_variable_read_node_create(pm_parser_t *parser, const pm_token_t *name, uint32_t depth) {
|
4373
|
+
pm_constant_id_t name_id = pm_parser_constant_id_token(parser, name);
|
4374
|
+
return pm_local_variable_read_node_create_constant_id(parser, name, name_id, depth);
|
4375
|
+
}
|
4376
|
+
|
4188
4377
|
/**
|
4189
4378
|
* Allocate and initialize a new LocalVariableWriteNode node.
|
4190
4379
|
*/
|
@@ -4210,6 +4399,57 @@ pm_local_variable_write_node_create(pm_parser_t *parser, pm_constant_id_t name,
|
|
4210
4399
|
return node;
|
4211
4400
|
}
|
4212
4401
|
|
4402
|
+
/**
|
4403
|
+
* Returns true if the given bounds comprise `it`.
|
4404
|
+
*/
|
4405
|
+
static inline bool
|
4406
|
+
pm_token_is_it(const uint8_t *start, const uint8_t *end) {
|
4407
|
+
return (end - start == 2) && (start[0] == 'i') && (start[1] == 't');
|
4408
|
+
}
|
4409
|
+
|
4410
|
+
/**
|
4411
|
+
* Returns true if the given node is `it` default parameter.
|
4412
|
+
*/
|
4413
|
+
static inline bool
|
4414
|
+
pm_node_is_it(pm_parser_t *parser, pm_node_t *node) {
|
4415
|
+
// Check if it's a local variable reference
|
4416
|
+
if (node->type != PM_CALL_NODE) {
|
4417
|
+
return false;
|
4418
|
+
}
|
4419
|
+
|
4420
|
+
// Check if it's a variable call
|
4421
|
+
pm_call_node_t *call_node = (pm_call_node_t *) node;
|
4422
|
+
if (!pm_call_node_variable_call_p(call_node)) {
|
4423
|
+
return false;
|
4424
|
+
}
|
4425
|
+
|
4426
|
+
// Check if it's called `it`
|
4427
|
+
pm_constant_id_t id = ((pm_call_node_t *)node)->name;
|
4428
|
+
pm_constant_t *constant = pm_constant_pool_id_to_constant(&parser->constant_pool, id);
|
4429
|
+
return pm_token_is_it(constant->start, constant->start + constant->length);
|
4430
|
+
}
|
4431
|
+
|
4432
|
+
/**
|
4433
|
+
* Convert a `it` variable call node to a node for `it` default parameter.
|
4434
|
+
*/
|
4435
|
+
static pm_node_t *
|
4436
|
+
pm_node_check_it(pm_parser_t *parser, pm_node_t *node) {
|
4437
|
+
if (
|
4438
|
+
(parser->version != PM_OPTIONS_VERSION_CRUBY_3_3_0) &&
|
4439
|
+
!parser->current_scope->closed &&
|
4440
|
+
pm_node_is_it(parser, node)
|
4441
|
+
) {
|
4442
|
+
if (parser->current_scope->explicit_params) {
|
4443
|
+
pm_parser_err_previous(parser, PM_ERR_IT_NOT_ALLOWED);
|
4444
|
+
} else {
|
4445
|
+
pm_node_destroy(parser, node);
|
4446
|
+
pm_constant_id_t name_id = pm_parser_constant_id_constant(parser, "0it", 3);
|
4447
|
+
node = (pm_node_t *) pm_local_variable_read_node_create_constant_id(parser, &parser->previous, name_id, 0);
|
4448
|
+
}
|
4449
|
+
}
|
4450
|
+
return node;
|
4451
|
+
}
|
4452
|
+
|
4213
4453
|
/**
|
4214
4454
|
* Returns true if the given bounds comprise a numbered parameter (i.e., they
|
4215
4455
|
* are of the form /^_\d$/).
|
@@ -5195,7 +5435,7 @@ pm_source_file_node_create(pm_parser_t *parser, const pm_token_t *file_keyword)
|
|
5195
5435
|
.flags = PM_NODE_FLAG_STATIC_LITERAL,
|
5196
5436
|
.location = PM_LOCATION_TOKEN_VALUE(file_keyword),
|
5197
5437
|
},
|
5198
|
-
.filepath = parser->
|
5438
|
+
.filepath = parser->filepath
|
5199
5439
|
};
|
5200
5440
|
|
5201
5441
|
return node;
|
@@ -5372,18 +5612,59 @@ pm_super_node_create(pm_parser_t *parser, const pm_token_t *keyword, pm_argument
|
|
5372
5612
|
return node;
|
5373
5613
|
}
|
5374
5614
|
|
5615
|
+
/**
|
5616
|
+
* Read through the contents of a string and check if it consists solely of US ASCII code points.
|
5617
|
+
*/
|
5618
|
+
static bool
|
5619
|
+
pm_ascii_only_p(const pm_string_t *contents) {
|
5620
|
+
const size_t length = pm_string_length(contents);
|
5621
|
+
const uint8_t *source = pm_string_source(contents);
|
5622
|
+
|
5623
|
+
for (size_t index = 0; index < length; index++) {
|
5624
|
+
if (source[index] & 0x80) return false;
|
5625
|
+
}
|
5626
|
+
|
5627
|
+
return true;
|
5628
|
+
}
|
5629
|
+
|
5630
|
+
/**
|
5631
|
+
* Ruby "downgrades" the encoding of Symbols to US-ASCII if the associated
|
5632
|
+
* encoding is ASCII-compatible and the Symbol consists only of US-ASCII code
|
5633
|
+
* points. Otherwise, the encoding may be explicitly set with an escape
|
5634
|
+
* sequence.
|
5635
|
+
*/
|
5636
|
+
static inline pm_node_flags_t
|
5637
|
+
parse_symbol_encoding(const pm_parser_t *parser, const pm_string_t *contents) {
|
5638
|
+
if (parser->explicit_encoding != NULL) {
|
5639
|
+
// A Symbol may optionally have its encoding explicitly set. This will
|
5640
|
+
// happen if an escape sequence results in a non-ASCII code point.
|
5641
|
+
if (parser->explicit_encoding == PM_ENCODING_UTF_8_ENTRY) {
|
5642
|
+
return PM_SYMBOL_FLAGS_FORCED_UTF8_ENCODING;
|
5643
|
+
} else if (parser->encoding == PM_ENCODING_US_ASCII_ENTRY) {
|
5644
|
+
return PM_SYMBOL_FLAGS_FORCED_BINARY_ENCODING;
|
5645
|
+
}
|
5646
|
+
} else if (pm_ascii_only_p(contents)) {
|
5647
|
+
// Ruby stipulates that all source files must use an ASCII-compatible
|
5648
|
+
// encoding. Thus, all symbols appearing in source are eligible for
|
5649
|
+
// "downgrading" to US-ASCII.
|
5650
|
+
return PM_SYMBOL_FLAGS_FORCED_US_ASCII_ENCODING;
|
5651
|
+
}
|
5652
|
+
|
5653
|
+
return 0;
|
5654
|
+
}
|
5655
|
+
|
5375
5656
|
/**
|
5376
5657
|
* Allocate and initialize a new SymbolNode node with the given unescaped
|
5377
5658
|
* string.
|
5378
5659
|
*/
|
5379
5660
|
static pm_symbol_node_t *
|
5380
|
-
pm_symbol_node_create_unescaped(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *value, const pm_token_t *closing, const pm_string_t *unescaped) {
|
5661
|
+
pm_symbol_node_create_unescaped(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *value, const pm_token_t *closing, const pm_string_t *unescaped, pm_node_flags_t flags) {
|
5381
5662
|
pm_symbol_node_t *node = PM_ALLOC_NODE(parser, pm_symbol_node_t);
|
5382
5663
|
|
5383
5664
|
*node = (pm_symbol_node_t) {
|
5384
5665
|
{
|
5385
5666
|
.type = PM_SYMBOL_NODE,
|
5386
|
-
.flags = PM_NODE_FLAG_STATIC_LITERAL,
|
5667
|
+
.flags = PM_NODE_FLAG_STATIC_LITERAL | flags,
|
5387
5668
|
.location = {
|
5388
5669
|
.start = (opening->type == PM_TOKEN_NOT_PROVIDED ? value->start : opening->start),
|
5389
5670
|
.end = (closing->type == PM_TOKEN_NOT_PROVIDED ? value->end : closing->end)
|
@@ -5403,7 +5684,7 @@ pm_symbol_node_create_unescaped(pm_parser_t *parser, const pm_token_t *opening,
|
|
5403
5684
|
*/
|
5404
5685
|
static inline pm_symbol_node_t *
|
5405
5686
|
pm_symbol_node_create(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *value, const pm_token_t *closing) {
|
5406
|
-
return pm_symbol_node_create_unescaped(parser, opening, value, closing, &PM_STRING_EMPTY);
|
5687
|
+
return pm_symbol_node_create_unescaped(parser, opening, value, closing, &PM_STRING_EMPTY, 0);
|
5407
5688
|
}
|
5408
5689
|
|
5409
5690
|
/**
|
@@ -5411,7 +5692,7 @@ pm_symbol_node_create(pm_parser_t *parser, const pm_token_t *opening, const pm_t
|
|
5411
5692
|
*/
|
5412
5693
|
static pm_symbol_node_t *
|
5413
5694
|
pm_symbol_node_create_current_string(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *value, const pm_token_t *closing) {
|
5414
|
-
pm_symbol_node_t *node = pm_symbol_node_create_unescaped(parser, opening, value, closing, &parser->current_string);
|
5695
|
+
pm_symbol_node_t *node = pm_symbol_node_create_unescaped(parser, opening, value, closing, &parser->current_string, parse_symbol_encoding(parser, &parser->current_string));
|
5415
5696
|
parser->current_string = PM_STRING_EMPTY;
|
5416
5697
|
return node;
|
5417
5698
|
}
|
@@ -5433,6 +5714,8 @@ pm_symbol_node_label_create(pm_parser_t *parser, const pm_token_t *token) {
|
|
5433
5714
|
|
5434
5715
|
assert((label.end - label.start) >= 0);
|
5435
5716
|
pm_string_shared_init(&node->unescaped, label.start, label.end);
|
5717
|
+
pm_node_flag_set((pm_node_t *) node, parse_symbol_encoding(parser, &node->unescaped));
|
5718
|
+
|
5436
5719
|
break;
|
5437
5720
|
}
|
5438
5721
|
case PM_TOKEN_MISSING: {
|
@@ -5495,6 +5778,8 @@ pm_string_node_to_symbol_node(pm_parser_t *parser, pm_string_node_t *node, const
|
|
5495
5778
|
.unescaped = node->unescaped
|
5496
5779
|
};
|
5497
5780
|
|
5781
|
+
pm_node_flag_set((pm_node_t *)new_node, parse_symbol_encoding(parser, &node->unescaped));
|
5782
|
+
|
5498
5783
|
// We are explicitly _not_ using pm_node_destroy here because we don't want
|
5499
5784
|
// to trash the unescaped string. We could instead copy the string if we
|
5500
5785
|
// know that it is owned, but we're taking the fast path for now.
|
@@ -5885,6 +6170,7 @@ pm_parser_scope_push(pm_parser_t *parser, bool closed) {
|
|
5885
6170
|
.closed = closed,
|
5886
6171
|
.explicit_params = false,
|
5887
6172
|
.numbered_parameters = 0,
|
6173
|
+
.forwarding_params = 0,
|
5888
6174
|
};
|
5889
6175
|
|
5890
6176
|
pm_constant_id_list_init(&scope->locals);
|
@@ -5893,6 +6179,76 @@ pm_parser_scope_push(pm_parser_t *parser, bool closed) {
|
|
5893
6179
|
return true;
|
5894
6180
|
}
|
5895
6181
|
|
6182
|
+
static void
|
6183
|
+
pm_parser_scope_forwarding_param_check(pm_parser_t *parser, const pm_token_t * token, const uint8_t mask, pm_diagnostic_id_t diag)
|
6184
|
+
{
|
6185
|
+
pm_scope_t *scope = parser->current_scope;
|
6186
|
+
while (scope) {
|
6187
|
+
if (scope->forwarding_params & mask) {
|
6188
|
+
if (!scope->closed) {
|
6189
|
+
pm_parser_err_token(parser, token, diag);
|
6190
|
+
return;
|
6191
|
+
}
|
6192
|
+
return;
|
6193
|
+
}
|
6194
|
+
if (scope->closed) break;
|
6195
|
+
scope = scope->previous;
|
6196
|
+
}
|
6197
|
+
|
6198
|
+
pm_parser_err_token(parser, token, diag);
|
6199
|
+
}
|
6200
|
+
|
6201
|
+
static inline void
|
6202
|
+
pm_parser_scope_forwarding_block_check(pm_parser_t *parser, const pm_token_t * token)
|
6203
|
+
{
|
6204
|
+
pm_parser_scope_forwarding_param_check(parser, token, PM_FORWARDING_BLOCK, PM_ERR_ARGUMENT_NO_FORWARDING_AMP);
|
6205
|
+
}
|
6206
|
+
|
6207
|
+
static void
|
6208
|
+
pm_parser_scope_forwarding_positionals_check(pm_parser_t *parser, const pm_token_t * token)
|
6209
|
+
{
|
6210
|
+
pm_parser_scope_forwarding_param_check(parser, token, PM_FORWARDING_POSITIONALS, PM_ERR_ARGUMENT_NO_FORWARDING_STAR);
|
6211
|
+
}
|
6212
|
+
|
6213
|
+
static inline void
|
6214
|
+
pm_parser_scope_forwarding_all_check(pm_parser_t *parser, const pm_token_t * token)
|
6215
|
+
{
|
6216
|
+
pm_parser_scope_forwarding_param_check(parser, token, PM_FORWARDING_ALL, PM_ERR_ARGUMENT_NO_FORWARDING_ELLIPSES);
|
6217
|
+
}
|
6218
|
+
|
6219
|
+
static inline void
|
6220
|
+
pm_parser_scope_forwarding_keywords_check(pm_parser_t *parser, const pm_token_t * token)
|
6221
|
+
{
|
6222
|
+
pm_parser_scope_forwarding_param_check(parser, token, PM_FORWARDING_KEYWORDS, PM_ERR_EXPECT_EXPRESSION_AFTER_SPLAT_HASH);
|
6223
|
+
}
|
6224
|
+
|
6225
|
+
/**
|
6226
|
+
* Save the current param name as the return value and set it to the given
|
6227
|
+
* constant id.
|
6228
|
+
*/
|
6229
|
+
static inline pm_constant_id_t
|
6230
|
+
pm_parser_current_param_name_set(pm_parser_t *parser, pm_constant_id_t current_param_name) {
|
6231
|
+
pm_constant_id_t saved_param_name = parser->current_param_name;
|
6232
|
+
parser->current_param_name = current_param_name;
|
6233
|
+
return saved_param_name;
|
6234
|
+
}
|
6235
|
+
|
6236
|
+
/**
|
6237
|
+
* Save the current param name as the return value and clear it.
|
6238
|
+
*/
|
6239
|
+
static inline pm_constant_id_t
|
6240
|
+
pm_parser_current_param_name_unset(pm_parser_t *parser) {
|
6241
|
+
return pm_parser_current_param_name_set(parser, PM_CONSTANT_ID_UNSET);
|
6242
|
+
}
|
6243
|
+
|
6244
|
+
/**
|
6245
|
+
* Restore the current param name from the given value.
|
6246
|
+
*/
|
6247
|
+
static inline void
|
6248
|
+
pm_parser_current_param_name_restore(pm_parser_t *parser, pm_constant_id_t saved_param_name) {
|
6249
|
+
parser->current_param_name = saved_param_name;
|
6250
|
+
}
|
6251
|
+
|
5896
6252
|
/**
|
5897
6253
|
* Check if any of the currently visible scopes contain a local variable
|
5898
6254
|
* described by the given constant id.
|
@@ -5969,26 +6325,41 @@ pm_parser_local_add_owned(pm_parser_t *parser, const uint8_t *start, size_t leng
|
|
5969
6325
|
return constant_id;
|
5970
6326
|
}
|
5971
6327
|
|
6328
|
+
/**
|
6329
|
+
* Add a local variable from a constant string to the current scope.
|
6330
|
+
*/
|
6331
|
+
static pm_constant_id_t
|
6332
|
+
pm_parser_local_add_constant(pm_parser_t *parser, const char *start, size_t length) {
|
6333
|
+
pm_constant_id_t constant_id = pm_parser_constant_id_constant(parser, start, length);
|
6334
|
+
if (constant_id != 0) pm_parser_local_add(parser, constant_id);
|
6335
|
+
return constant_id;
|
6336
|
+
}
|
6337
|
+
|
5972
6338
|
/**
|
5973
6339
|
* Add a parameter name to the current scope and check whether the name of the
|
5974
6340
|
* parameter is unique or not.
|
6341
|
+
*
|
6342
|
+
* Returns `true` if this is a duplicate parameter name, otherwise returns
|
6343
|
+
* false.
|
5975
6344
|
*/
|
5976
|
-
static
|
6345
|
+
static bool
|
5977
6346
|
pm_parser_parameter_name_check(pm_parser_t *parser, const pm_token_t *name) {
|
5978
6347
|
// We want to check whether the parameter name is a numbered parameter or
|
5979
6348
|
// not.
|
5980
6349
|
pm_refute_numbered_parameter(parser, name->start, name->end);
|
5981
6350
|
|
5982
|
-
// We want to ignore any parameter name that starts with an underscore.
|
5983
|
-
if ((name->start < name->end) && (*name->start == '_')) return;
|
5984
|
-
|
5985
6351
|
// Otherwise we'll fetch the constant id for the parameter name and check
|
5986
6352
|
// whether it's already in the current scope.
|
5987
6353
|
pm_constant_id_t constant_id = pm_parser_constant_id_token(parser, name);
|
5988
6354
|
|
5989
6355
|
if (pm_constant_id_list_includes(&parser->current_scope->locals, constant_id)) {
|
5990
|
-
|
6356
|
+
// Add an error if the parameter doesn't start with _ and has been seen before
|
6357
|
+
if ((name->start < name->end) && (*name->start != '_')) {
|
6358
|
+
pm_parser_err_token(parser, name, PM_ERR_PARAMETER_NAME_REPEAT);
|
6359
|
+
}
|
6360
|
+
return true;
|
5991
6361
|
}
|
6362
|
+
return false;
|
5992
6363
|
}
|
5993
6364
|
|
5994
6365
|
/**
|
@@ -6003,105 +6374,6 @@ pm_parser_scope_pop(pm_parser_t *parser) {
|
|
6003
6374
|
free(scope);
|
6004
6375
|
}
|
6005
6376
|
|
6006
|
-
/******************************************************************************/
|
6007
|
-
/* Basic character checks */
|
6008
|
-
/******************************************************************************/
|
6009
|
-
|
6010
|
-
/**
|
6011
|
-
* This function is used extremely frequently to lex all of the identifiers in a
|
6012
|
-
* source file, so it's important that it be as fast as possible. For this
|
6013
|
-
* reason we have the encoding_changed boolean to check if we need to go through
|
6014
|
-
* the function pointer or can just directly use the UTF-8 functions.
|
6015
|
-
*/
|
6016
|
-
static inline size_t
|
6017
|
-
char_is_identifier_start(pm_parser_t *parser, const uint8_t *b) {
|
6018
|
-
if (parser->encoding_changed) {
|
6019
|
-
size_t width;
|
6020
|
-
if ((width = parser->encoding->alpha_char(b, parser->end - b)) != 0) {
|
6021
|
-
return width;
|
6022
|
-
} else if (*b == '_') {
|
6023
|
-
return 1;
|
6024
|
-
} else if (*b >= 0x80) {
|
6025
|
-
return parser->encoding->char_width(b, parser->end - b);
|
6026
|
-
} else {
|
6027
|
-
return 0;
|
6028
|
-
}
|
6029
|
-
} else if (*b < 0x80) {
|
6030
|
-
return (pm_encoding_unicode_table[*b] & PRISM_ENCODING_ALPHABETIC_BIT ? 1 : 0) || (*b == '_');
|
6031
|
-
} else {
|
6032
|
-
return (size_t) (pm_encoding_utf_8_alpha_char(b, parser->end - b) || 1u);
|
6033
|
-
}
|
6034
|
-
}
|
6035
|
-
|
6036
|
-
/**
|
6037
|
-
* Similar to char_is_identifier but this function assumes that the encoding
|
6038
|
-
* has not been changed.
|
6039
|
-
*/
|
6040
|
-
static inline size_t
|
6041
|
-
char_is_identifier_utf8(const uint8_t *b, const uint8_t *end) {
|
6042
|
-
if (*b < 0x80) {
|
6043
|
-
return (*b == '_') || (pm_encoding_unicode_table[*b] & PRISM_ENCODING_ALPHANUMERIC_BIT ? 1 : 0);
|
6044
|
-
} else {
|
6045
|
-
return (size_t) (pm_encoding_utf_8_alnum_char(b, end - b) || 1u);
|
6046
|
-
}
|
6047
|
-
}
|
6048
|
-
|
6049
|
-
/**
|
6050
|
-
* Like the above, this function is also used extremely frequently to lex all of
|
6051
|
-
* the identifiers in a source file once the first character has been found. So
|
6052
|
-
* it's important that it be as fast as possible.
|
6053
|
-
*/
|
6054
|
-
static inline size_t
|
6055
|
-
char_is_identifier(pm_parser_t *parser, const uint8_t *b) {
|
6056
|
-
if (parser->encoding_changed) {
|
6057
|
-
size_t width;
|
6058
|
-
if ((width = parser->encoding->alnum_char(b, parser->end - b)) != 0) {
|
6059
|
-
return width;
|
6060
|
-
} else if (*b == '_') {
|
6061
|
-
return 1;
|
6062
|
-
} else if (*b >= 0x80) {
|
6063
|
-
return parser->encoding->char_width(b, parser->end - b);
|
6064
|
-
} else {
|
6065
|
-
return 0;
|
6066
|
-
}
|
6067
|
-
}
|
6068
|
-
return char_is_identifier_utf8(b, parser->end);
|
6069
|
-
}
|
6070
|
-
|
6071
|
-
// Here we're defining a perfect hash for the characters that are allowed in
|
6072
|
-
// global names. This is used to quickly check the next character after a $ to
|
6073
|
-
// see if it's a valid character for a global name.
|
6074
|
-
#define BIT(c, idx) (((c) / 32 - 1 == idx) ? (1U << ((c) % 32)) : 0)
|
6075
|
-
#define PUNCT(idx) ( \
|
6076
|
-
BIT('~', idx) | BIT('*', idx) | BIT('$', idx) | BIT('?', idx) | \
|
6077
|
-
BIT('!', idx) | BIT('@', idx) | BIT('/', idx) | BIT('\\', idx) | \
|
6078
|
-
BIT(';', idx) | BIT(',', idx) | BIT('.', idx) | BIT('=', idx) | \
|
6079
|
-
BIT(':', idx) | BIT('<', idx) | BIT('>', idx) | BIT('\"', idx) | \
|
6080
|
-
BIT('&', idx) | BIT('`', idx) | BIT('\'', idx) | BIT('+', idx) | \
|
6081
|
-
BIT('0', idx))
|
6082
|
-
|
6083
|
-
const unsigned int pm_global_name_punctuation_hash[(0x7e - 0x20 + 31) / 32] = { PUNCT(0), PUNCT(1), PUNCT(2) };
|
6084
|
-
|
6085
|
-
#undef BIT
|
6086
|
-
#undef PUNCT
|
6087
|
-
|
6088
|
-
static inline bool
|
6089
|
-
char_is_global_name_punctuation(const uint8_t b) {
|
6090
|
-
const unsigned int i = (const unsigned int) b;
|
6091
|
-
if (i <= 0x20 || 0x7e < i) return false;
|
6092
|
-
|
6093
|
-
return (pm_global_name_punctuation_hash[(i - 0x20) / 32] >> (i % 32)) & 1;
|
6094
|
-
}
|
6095
|
-
|
6096
|
-
static inline bool
|
6097
|
-
token_is_setter_name(pm_token_t *token) {
|
6098
|
-
return (
|
6099
|
-
(token->type == PM_TOKEN_IDENTIFIER) &&
|
6100
|
-
(token->end - token->start >= 2) &&
|
6101
|
-
(token->end[-1] == '=')
|
6102
|
-
);
|
6103
|
-
}
|
6104
|
-
|
6105
6377
|
/******************************************************************************/
|
6106
6378
|
/* Stack helpers */
|
6107
6379
|
/******************************************************************************/
|
@@ -6317,8 +6589,10 @@ parser_lex_magic_comment_encoding(pm_parser_t *parser) {
|
|
6317
6589
|
*/
|
6318
6590
|
static void
|
6319
6591
|
parser_lex_magic_comment_frozen_string_literal_value(pm_parser_t *parser, const uint8_t *start, const uint8_t *end) {
|
6320
|
-
if (start + 4 <= end && pm_strncasecmp(start, (const uint8_t *) "true", 4) == 0) {
|
6592
|
+
if ((start + 4 <= end) && pm_strncasecmp(start, (const uint8_t *) "true", 4) == 0) {
|
6321
6593
|
parser->frozen_string_literal = true;
|
6594
|
+
} else if ((start + 5 <= end) && pm_strncasecmp(start, (const uint8_t *) "false", 5) == 0) {
|
6595
|
+
parser->frozen_string_literal = false;
|
6322
6596
|
}
|
6323
6597
|
}
|
6324
6598
|
|
@@ -6541,21 +6815,27 @@ context_terminator(pm_context_t context, pm_token_t *token) {
|
|
6541
6815
|
return token->type == PM_TOKEN_BRACE_RIGHT;
|
6542
6816
|
case PM_CONTEXT_PREDICATE:
|
6543
6817
|
return token->type == PM_TOKEN_KEYWORD_THEN || token->type == PM_TOKEN_NEWLINE || token->type == PM_TOKEN_SEMICOLON;
|
6818
|
+
case PM_CONTEXT_NONE:
|
6819
|
+
return false;
|
6544
6820
|
}
|
6545
6821
|
|
6546
6822
|
return false;
|
6547
6823
|
}
|
6548
6824
|
|
6549
|
-
|
6550
|
-
|
6825
|
+
/**
|
6826
|
+
* Returns the context that the given token is found to be terminating, or
|
6827
|
+
* returns PM_CONTEXT_NONE.
|
6828
|
+
*/
|
6829
|
+
static pm_context_t
|
6830
|
+
context_recoverable(const pm_parser_t *parser, pm_token_t *token) {
|
6551
6831
|
pm_context_node_t *context_node = parser->current_context;
|
6552
6832
|
|
6553
6833
|
while (context_node != NULL) {
|
6554
|
-
if (context_terminator(context_node->context, token)) return
|
6834
|
+
if (context_terminator(context_node->context, token)) return context_node->context;
|
6555
6835
|
context_node = context_node->prev;
|
6556
6836
|
}
|
6557
6837
|
|
6558
|
-
return
|
6838
|
+
return PM_CONTEXT_NONE;
|
6559
6839
|
}
|
6560
6840
|
|
6561
6841
|
static bool
|
@@ -6583,7 +6863,7 @@ context_pop(pm_parser_t *parser) {
|
|
6583
6863
|
}
|
6584
6864
|
|
6585
6865
|
static bool
|
6586
|
-
context_p(pm_parser_t *parser, pm_context_t context) {
|
6866
|
+
context_p(const pm_parser_t *parser, pm_context_t context) {
|
6587
6867
|
pm_context_node_t *context_node = parser->current_context;
|
6588
6868
|
|
6589
6869
|
while (context_node != NULL) {
|
@@ -6595,7 +6875,7 @@ context_p(pm_parser_t *parser, pm_context_t context) {
|
|
6595
6875
|
}
|
6596
6876
|
|
6597
6877
|
static bool
|
6598
|
-
context_def_p(pm_parser_t *parser) {
|
6878
|
+
context_def_p(const pm_parser_t *parser) {
|
6599
6879
|
pm_context_node_t *context_node = parser->current_context;
|
6600
6880
|
|
6601
6881
|
while (context_node != NULL) {
|
@@ -6618,6 +6898,55 @@ context_def_p(pm_parser_t *parser) {
|
|
6618
6898
|
return false;
|
6619
6899
|
}
|
6620
6900
|
|
6901
|
+
/**
|
6902
|
+
* Returns a human readable string for the given context, used in error
|
6903
|
+
* messages.
|
6904
|
+
*/
|
6905
|
+
static const char *
|
6906
|
+
context_human(pm_context_t context) {
|
6907
|
+
switch (context) {
|
6908
|
+
case PM_CONTEXT_NONE:
|
6909
|
+
assert(false && "unreachable");
|
6910
|
+
return "";
|
6911
|
+
case PM_CONTEXT_BEGIN: return "begin statement";
|
6912
|
+
case PM_CONTEXT_BLOCK_BRACES: return "'{'..'}' block";
|
6913
|
+
case PM_CONTEXT_BLOCK_KEYWORDS: return "'do'..'end' block";
|
6914
|
+
case PM_CONTEXT_CASE_WHEN: return "'when' clause";
|
6915
|
+
case PM_CONTEXT_CASE_IN: return "'in' clause";
|
6916
|
+
case PM_CONTEXT_CLASS: return "class definition";
|
6917
|
+
case PM_CONTEXT_DEF: return "method definition";
|
6918
|
+
case PM_CONTEXT_DEF_PARAMS: return "method parameters";
|
6919
|
+
case PM_CONTEXT_DEFAULT_PARAMS: return "parameter default value";
|
6920
|
+
case PM_CONTEXT_ELSE: return "'else' clause";
|
6921
|
+
case PM_CONTEXT_ELSIF: return "'elsif' clause";
|
6922
|
+
case PM_CONTEXT_EMBEXPR: return "embedded expression";
|
6923
|
+
case PM_CONTEXT_ENSURE: return "'ensure' clause";
|
6924
|
+
case PM_CONTEXT_ENSURE_DEF: return "'ensure' clause";
|
6925
|
+
case PM_CONTEXT_FOR: return "for loop";
|
6926
|
+
case PM_CONTEXT_FOR_INDEX: return "for loop index";
|
6927
|
+
case PM_CONTEXT_IF: return "if statement";
|
6928
|
+
case PM_CONTEXT_LAMBDA_BRACES: return "'{'..'}' lambda block";
|
6929
|
+
case PM_CONTEXT_LAMBDA_DO_END: return "'do'..'end' lambda block";
|
6930
|
+
case PM_CONTEXT_MAIN: return "top level context";
|
6931
|
+
case PM_CONTEXT_MODULE: return "module definition";
|
6932
|
+
case PM_CONTEXT_PARENS: return "parentheses";
|
6933
|
+
case PM_CONTEXT_POSTEXE: return "'END' block";
|
6934
|
+
case PM_CONTEXT_PREDICATE: return "predicate";
|
6935
|
+
case PM_CONTEXT_PREEXE: return "'BEGIN' block";
|
6936
|
+
case PM_CONTEXT_RESCUE_ELSE: return "'else' clause";
|
6937
|
+
case PM_CONTEXT_RESCUE_ELSE_DEF: return "'else' clause";
|
6938
|
+
case PM_CONTEXT_RESCUE: return "'rescue' clause";
|
6939
|
+
case PM_CONTEXT_RESCUE_DEF: return "'rescue' clause";
|
6940
|
+
case PM_CONTEXT_SCLASS: return "singleton class definition";
|
6941
|
+
case PM_CONTEXT_UNLESS: return "unless statement";
|
6942
|
+
case PM_CONTEXT_UNTIL: return "until statement";
|
6943
|
+
case PM_CONTEXT_WHILE: return "while statement";
|
6944
|
+
}
|
6945
|
+
|
6946
|
+
assert(false && "unreachable");
|
6947
|
+
return "";
|
6948
|
+
}
|
6949
|
+
|
6621
6950
|
/******************************************************************************/
|
6622
6951
|
/* Specific token lexers */
|
6623
6952
|
/******************************************************************************/
|
@@ -7360,6 +7689,28 @@ escape_write_byte_encoded(pm_parser_t *parser, pm_buffer_t *buffer, uint8_t byte
|
|
7360
7689
|
pm_buffer_append_byte(buffer, byte);
|
7361
7690
|
}
|
7362
7691
|
|
7692
|
+
/**
|
7693
|
+
* Write each byte of the given escaped character into the buffer.
|
7694
|
+
*/
|
7695
|
+
static inline void
|
7696
|
+
escape_write_escape_encoded(pm_parser_t *parser, pm_buffer_t *buffer) {
|
7697
|
+
size_t width;
|
7698
|
+
if (parser->encoding_changed) {
|
7699
|
+
width = parser->encoding->char_width(parser->current.end, parser->end - parser->current.end);
|
7700
|
+
} else {
|
7701
|
+
width = pm_encoding_utf_8_char_width(parser->current.end, parser->end - parser->current.end);
|
7702
|
+
}
|
7703
|
+
|
7704
|
+
// TODO: If the character is invalid in the given encoding, then we'll just
|
7705
|
+
// push one byte into the buffer. This should actually be an error.
|
7706
|
+
width = (width == 0) ? 1 : width;
|
7707
|
+
|
7708
|
+
for (size_t index = 0; index < width; index++) {
|
7709
|
+
escape_write_byte_encoded(parser, buffer, *parser->current.end);
|
7710
|
+
parser->current.end++;
|
7711
|
+
}
|
7712
|
+
}
|
7713
|
+
|
7363
7714
|
/**
|
7364
7715
|
* The regular expression engine doesn't support the same escape sequences as
|
7365
7716
|
* Ruby does. So first we have to read the escape sequence, and then we have to
|
@@ -7698,7 +8049,7 @@ escape_read(pm_parser_t *parser, pm_buffer_t *buffer, uint8_t flags) {
|
|
7698
8049
|
/* fallthrough */
|
7699
8050
|
default: {
|
7700
8051
|
if (parser->current.end < parser->end) {
|
7701
|
-
|
8052
|
+
escape_write_escape_encoded(parser, buffer);
|
7702
8053
|
}
|
7703
8054
|
return;
|
7704
8055
|
}
|
@@ -7975,14 +8326,43 @@ typedef struct {
|
|
7975
8326
|
* Push the given byte into the token buffer.
|
7976
8327
|
*/
|
7977
8328
|
static inline void
|
7978
|
-
|
8329
|
+
pm_token_buffer_push_byte(pm_token_buffer_t *token_buffer, uint8_t byte) {
|
7979
8330
|
pm_buffer_append_byte(&token_buffer->buffer, byte);
|
7980
8331
|
}
|
7981
8332
|
|
8333
|
+
/**
|
8334
|
+
* Append the given bytes into the token buffer.
|
8335
|
+
*/
|
8336
|
+
static inline void
|
8337
|
+
pm_token_buffer_push_bytes(pm_token_buffer_t *token_buffer, const uint8_t *bytes, size_t length) {
|
8338
|
+
pm_buffer_append_bytes(&token_buffer->buffer, bytes, length);
|
8339
|
+
}
|
8340
|
+
|
8341
|
+
/**
|
8342
|
+
* Push an escaped character into the token buffer.
|
8343
|
+
*/
|
8344
|
+
static inline void
|
8345
|
+
pm_token_buffer_push_escaped(pm_token_buffer_t *token_buffer, pm_parser_t *parser) {
|
8346
|
+
// First, determine the width of the character to be escaped.
|
8347
|
+
size_t width;
|
8348
|
+
if (parser->encoding_changed) {
|
8349
|
+
width = parser->encoding->char_width(parser->current.end, parser->end - parser->current.end);
|
8350
|
+
} else {
|
8351
|
+
width = pm_encoding_utf_8_char_width(parser->current.end, parser->end - parser->current.end);
|
8352
|
+
}
|
8353
|
+
|
8354
|
+
// TODO: If the character is invalid in the given encoding, then we'll just
|
8355
|
+
// push one byte into the buffer. This should actually be an error.
|
8356
|
+
width = (width == 0 ? 1 : width);
|
8357
|
+
|
8358
|
+
// Now, push the bytes into the buffer.
|
8359
|
+
pm_token_buffer_push_bytes(token_buffer, parser->current.end, width);
|
8360
|
+
parser->current.end += width;
|
8361
|
+
}
|
8362
|
+
|
7982
8363
|
/**
|
7983
8364
|
* When we're about to return from lexing the current token and we know for sure
|
7984
8365
|
* that we have found an escape sequence, this function is called to copy the
|
7985
|
-
*
|
7986
8366
|
* contents of the token buffer into the current string on the parser so that it
|
7987
8367
|
* can be attached to the correct node.
|
7988
8368
|
*/
|
@@ -7997,7 +8377,6 @@ pm_token_buffer_copy(pm_parser_t *parser, pm_token_buffer_t *token_buffer) {
|
|
7997
8377
|
* string. If we haven't pushed anything into the buffer, this means that we
|
7998
8378
|
* never found an escape sequence, so we can directly reference the bounds of
|
7999
8379
|
* the current string. Either way, at the return of this function it is expected
|
8000
|
-
*
|
8001
8380
|
* that parser->current_string is established in such a way that it can be
|
8002
8381
|
* attached to a node.
|
8003
8382
|
*/
|
@@ -8016,7 +8395,6 @@ pm_token_buffer_flush(pm_parser_t *parser, pm_token_buffer_t *token_buffer) {
|
|
8016
8395
|
* point into the buffer because we're about to provide a string that has
|
8017
8396
|
* different content than a direct slice of the source.
|
8018
8397
|
*
|
8019
|
-
*
|
8020
8398
|
* It is expected that the parser's current token end will be pointing at one
|
8021
8399
|
* byte past the backslash that starts the escape sequence.
|
8022
8400
|
*/
|
@@ -8070,6 +8448,34 @@ pm_heredoc_strspn_inline_whitespace(pm_parser_t *parser, const uint8_t **cursor,
|
|
8070
8448
|
return whitespace;
|
8071
8449
|
}
|
8072
8450
|
|
8451
|
+
/**
|
8452
|
+
* Lex past the delimiter of a percent literal. Handle newlines and heredocs
|
8453
|
+
* appropriately.
|
8454
|
+
*/
|
8455
|
+
static uint8_t
|
8456
|
+
pm_lex_percent_delimiter(pm_parser_t *parser) {
|
8457
|
+
size_t eol_length = match_eol(parser);
|
8458
|
+
|
8459
|
+
if (eol_length) {
|
8460
|
+
if (parser->heredoc_end) {
|
8461
|
+
// If we have already lexed a heredoc, then the newline has already
|
8462
|
+
// been added to the list. In this case we want to just flush the
|
8463
|
+
// heredoc end.
|
8464
|
+
parser_flush_heredoc_end(parser);
|
8465
|
+
} else {
|
8466
|
+
// Otherwise, we'll add the newline to the list of newlines.
|
8467
|
+
pm_newline_list_append(&parser->newline_list, parser->current.end + eol_length - 1);
|
8468
|
+
}
|
8469
|
+
|
8470
|
+
const uint8_t delimiter = *parser->current.end;
|
8471
|
+
parser->current.end += eol_length;
|
8472
|
+
|
8473
|
+
return delimiter;
|
8474
|
+
}
|
8475
|
+
|
8476
|
+
return *parser->current.end++;
|
8477
|
+
}
|
8478
|
+
|
8073
8479
|
/**
|
8074
8480
|
* This is a convenience macro that will set the current token type, call the
|
8075
8481
|
* lex callback, and then return from the parser_lex function.
|
@@ -8635,7 +9041,7 @@ parser_lex(pm_parser_t *parser) {
|
|
8635
9041
|
// this is not a valid heredoc declaration. In this case we
|
8636
9042
|
// will add an error, but we will still return a heredoc
|
8637
9043
|
// start.
|
8638
|
-
pm_parser_err_current(parser,
|
9044
|
+
pm_parser_err_current(parser, PM_ERR_HEREDOC_TERM);
|
8639
9045
|
body_start = parser->end;
|
8640
9046
|
} else {
|
8641
9047
|
// Otherwise, we want to indicate that the body of the
|
@@ -8826,12 +9232,10 @@ parser_lex(pm_parser_t *parser) {
|
|
8826
9232
|
LEX(PM_TOKEN_PLUS_EQUAL);
|
8827
9233
|
}
|
8828
9234
|
|
8829
|
-
|
8830
|
-
|
8831
|
-
pm_parser_warn_token(parser, &parser->current, PM_WARN_AMBIGUOUS_FIRST_ARGUMENT_PLUS)
|
8832
|
-
|
8833
|
-
|
8834
|
-
if (lex_state_beg_p(parser) || spcarg) {
|
9235
|
+
if (
|
9236
|
+
lex_state_beg_p(parser) ||
|
9237
|
+
(lex_state_spcarg_p(parser, space_seen) ? (pm_parser_warn_token(parser, &parser->current, PM_WARN_AMBIGUOUS_FIRST_ARGUMENT_PLUS), true) : false)
|
9238
|
+
) {
|
8835
9239
|
lex_state_set(parser, PM_LEX_STATE_BEG);
|
8836
9240
|
|
8837
9241
|
if (pm_char_is_decimal_digit(peek(parser))) {
|
@@ -8871,11 +9275,12 @@ parser_lex(pm_parser_t *parser) {
|
|
8871
9275
|
}
|
8872
9276
|
|
8873
9277
|
bool spcarg = lex_state_spcarg_p(parser, space_seen);
|
8874
|
-
|
9278
|
+
bool is_beg = lex_state_beg_p(parser);
|
9279
|
+
if (!is_beg && spcarg) {
|
8875
9280
|
pm_parser_warn_token(parser, &parser->current, PM_WARN_AMBIGUOUS_FIRST_ARGUMENT_MINUS);
|
8876
9281
|
}
|
8877
9282
|
|
8878
|
-
if (
|
9283
|
+
if (is_beg || spcarg) {
|
8879
9284
|
lex_state_set(parser, PM_LEX_STATE_BEG);
|
8880
9285
|
LEX(pm_char_is_decimal_digit(peek(parser)) ? PM_TOKEN_UMINUS_NUM : PM_TOKEN_UMINUS);
|
8881
9286
|
}
|
@@ -9026,15 +9431,8 @@ parser_lex(pm_parser_t *parser) {
|
|
9026
9431
|
pm_parser_err_current(parser, PM_ERR_INVALID_PERCENT);
|
9027
9432
|
}
|
9028
9433
|
|
9029
|
-
|
9030
|
-
|
9031
|
-
size_t eol_length = match_eol(parser);
|
9032
|
-
if (eol_length) {
|
9033
|
-
parser->current.end += eol_length;
|
9034
|
-
pm_newline_list_append(&parser->newline_list, parser->current.end - 1);
|
9035
|
-
} else {
|
9036
|
-
parser->current.end++;
|
9037
|
-
}
|
9434
|
+
const uint8_t delimiter = pm_lex_percent_delimiter(parser);
|
9435
|
+
lex_mode_push_string(parser, true, false, lex_mode_incrementor(delimiter), lex_mode_terminator(delimiter));
|
9038
9436
|
|
9039
9437
|
if (parser->current.end < parser->end) {
|
9040
9438
|
LEX(PM_TOKEN_STRING_BEGIN);
|
@@ -9054,7 +9452,7 @@ parser_lex(pm_parser_t *parser) {
|
|
9054
9452
|
parser->current.end++;
|
9055
9453
|
|
9056
9454
|
if (parser->current.end < parser->end) {
|
9057
|
-
lex_mode_push_list(parser, false,
|
9455
|
+
lex_mode_push_list(parser, false, pm_lex_percent_delimiter(parser));
|
9058
9456
|
} else {
|
9059
9457
|
lex_mode_push_list_eof(parser);
|
9060
9458
|
}
|
@@ -9065,7 +9463,7 @@ parser_lex(pm_parser_t *parser) {
|
|
9065
9463
|
parser->current.end++;
|
9066
9464
|
|
9067
9465
|
if (parser->current.end < parser->end) {
|
9068
|
-
lex_mode_push_list(parser, true,
|
9466
|
+
lex_mode_push_list(parser, true, pm_lex_percent_delimiter(parser));
|
9069
9467
|
} else {
|
9070
9468
|
lex_mode_push_list_eof(parser);
|
9071
9469
|
}
|
@@ -9076,9 +9474,8 @@ parser_lex(pm_parser_t *parser) {
|
|
9076
9474
|
parser->current.end++;
|
9077
9475
|
|
9078
9476
|
if (parser->current.end < parser->end) {
|
9079
|
-
|
9080
|
-
|
9081
|
-
parser->current.end++;
|
9477
|
+
const uint8_t delimiter = pm_lex_percent_delimiter(parser);
|
9478
|
+
lex_mode_push_regexp(parser, lex_mode_incrementor(delimiter), lex_mode_terminator(delimiter));
|
9082
9479
|
} else {
|
9083
9480
|
lex_mode_push_regexp(parser, '\0', '\0');
|
9084
9481
|
}
|
@@ -9089,9 +9486,8 @@ parser_lex(pm_parser_t *parser) {
|
|
9089
9486
|
parser->current.end++;
|
9090
9487
|
|
9091
9488
|
if (parser->current.end < parser->end) {
|
9092
|
-
|
9093
|
-
|
9094
|
-
parser->current.end++;
|
9489
|
+
const uint8_t delimiter = pm_lex_percent_delimiter(parser);
|
9490
|
+
lex_mode_push_string(parser, false, false, lex_mode_incrementor(delimiter), lex_mode_terminator(delimiter));
|
9095
9491
|
} else {
|
9096
9492
|
lex_mode_push_string_eof(parser);
|
9097
9493
|
}
|
@@ -9102,9 +9498,8 @@ parser_lex(pm_parser_t *parser) {
|
|
9102
9498
|
parser->current.end++;
|
9103
9499
|
|
9104
9500
|
if (parser->current.end < parser->end) {
|
9105
|
-
|
9106
|
-
|
9107
|
-
parser->current.end++;
|
9501
|
+
const uint8_t delimiter = pm_lex_percent_delimiter(parser);
|
9502
|
+
lex_mode_push_string(parser, true, false, lex_mode_incrementor(delimiter), lex_mode_terminator(delimiter));
|
9108
9503
|
} else {
|
9109
9504
|
lex_mode_push_string_eof(parser);
|
9110
9505
|
}
|
@@ -9115,9 +9510,9 @@ parser_lex(pm_parser_t *parser) {
|
|
9115
9510
|
parser->current.end++;
|
9116
9511
|
|
9117
9512
|
if (parser->current.end < parser->end) {
|
9118
|
-
|
9513
|
+
const uint8_t delimiter = pm_lex_percent_delimiter(parser);
|
9514
|
+
lex_mode_push_string(parser, false, false, lex_mode_incrementor(delimiter), lex_mode_terminator(delimiter));
|
9119
9515
|
lex_state_set(parser, PM_LEX_STATE_FNAME | PM_LEX_STATE_FITEM);
|
9120
|
-
parser->current.end++;
|
9121
9516
|
} else {
|
9122
9517
|
lex_mode_push_string_eof(parser);
|
9123
9518
|
}
|
@@ -9128,7 +9523,7 @@ parser_lex(pm_parser_t *parser) {
|
|
9128
9523
|
parser->current.end++;
|
9129
9524
|
|
9130
9525
|
if (parser->current.end < parser->end) {
|
9131
|
-
lex_mode_push_list(parser, false,
|
9526
|
+
lex_mode_push_list(parser, false, pm_lex_percent_delimiter(parser));
|
9132
9527
|
} else {
|
9133
9528
|
lex_mode_push_list_eof(parser);
|
9134
9529
|
}
|
@@ -9139,7 +9534,7 @@ parser_lex(pm_parser_t *parser) {
|
|
9139
9534
|
parser->current.end++;
|
9140
9535
|
|
9141
9536
|
if (parser->current.end < parser->end) {
|
9142
|
-
lex_mode_push_list(parser, true,
|
9537
|
+
lex_mode_push_list(parser, true, pm_lex_percent_delimiter(parser));
|
9143
9538
|
} else {
|
9144
9539
|
lex_mode_push_list_eof(parser);
|
9145
9540
|
}
|
@@ -9150,8 +9545,8 @@ parser_lex(pm_parser_t *parser) {
|
|
9150
9545
|
parser->current.end++;
|
9151
9546
|
|
9152
9547
|
if (parser->current.end < parser->end) {
|
9153
|
-
|
9154
|
-
parser
|
9548
|
+
const uint8_t delimiter = pm_lex_percent_delimiter(parser);
|
9549
|
+
lex_mode_push_string(parser, true, false, lex_mode_incrementor(delimiter), lex_mode_terminator(delimiter));
|
9155
9550
|
} else {
|
9156
9551
|
lex_mode_push_string_eof(parser);
|
9157
9552
|
}
|
@@ -9377,18 +9772,18 @@ parser_lex(pm_parser_t *parser) {
|
|
9377
9772
|
case '\t':
|
9378
9773
|
case '\v':
|
9379
9774
|
case '\\':
|
9380
|
-
|
9775
|
+
pm_token_buffer_push_byte(&token_buffer, peeked);
|
9381
9776
|
parser->current.end++;
|
9382
9777
|
break;
|
9383
9778
|
case '\r':
|
9384
9779
|
parser->current.end++;
|
9385
9780
|
if (peek(parser) != '\n') {
|
9386
|
-
|
9781
|
+
pm_token_buffer_push_byte(&token_buffer, '\r');
|
9387
9782
|
break;
|
9388
9783
|
}
|
9389
9784
|
/* fallthrough */
|
9390
9785
|
case '\n':
|
9391
|
-
|
9786
|
+
pm_token_buffer_push_byte(&token_buffer, '\n');
|
9392
9787
|
|
9393
9788
|
if (parser->heredoc_end) {
|
9394
9789
|
// ... if we are on the same line as a heredoc,
|
@@ -9406,14 +9801,13 @@ parser_lex(pm_parser_t *parser) {
|
|
9406
9801
|
break;
|
9407
9802
|
default:
|
9408
9803
|
if (peeked == lex_mode->as.list.incrementor || peeked == lex_mode->as.list.terminator) {
|
9409
|
-
|
9804
|
+
pm_token_buffer_push_byte(&token_buffer, peeked);
|
9410
9805
|
parser->current.end++;
|
9411
9806
|
} else if (lex_mode->as.list.interpolation) {
|
9412
9807
|
escape_read(parser, &token_buffer.buffer, PM_ESCAPE_FLAG_NONE);
|
9413
9808
|
} else {
|
9414
|
-
|
9415
|
-
|
9416
|
-
parser->current.end++;
|
9809
|
+
pm_token_buffer_push_byte(&token_buffer, '\\');
|
9810
|
+
pm_token_buffer_push_escaped(&token_buffer, parser);
|
9417
9811
|
}
|
9418
9812
|
|
9419
9813
|
break;
|
@@ -9571,9 +9965,9 @@ parser_lex(pm_parser_t *parser) {
|
|
9571
9965
|
parser->current.end++;
|
9572
9966
|
if (peek(parser) != '\n') {
|
9573
9967
|
if (lex_mode->as.regexp.terminator != '\r') {
|
9574
|
-
|
9968
|
+
pm_token_buffer_push_byte(&token_buffer, '\\');
|
9575
9969
|
}
|
9576
|
-
|
9970
|
+
pm_token_buffer_push_byte(&token_buffer, '\r');
|
9577
9971
|
break;
|
9578
9972
|
}
|
9579
9973
|
/* fallthrough */
|
@@ -9608,20 +10002,19 @@ parser_lex(pm_parser_t *parser) {
|
|
9608
10002
|
case '$': case ')': case '*': case '+':
|
9609
10003
|
case '.': case '>': case '?': case ']':
|
9610
10004
|
case '^': case '|': case '}':
|
9611
|
-
|
10005
|
+
pm_token_buffer_push_byte(&token_buffer, '\\');
|
9612
10006
|
break;
|
9613
10007
|
default:
|
9614
10008
|
break;
|
9615
10009
|
}
|
9616
10010
|
|
9617
|
-
|
10011
|
+
pm_token_buffer_push_byte(&token_buffer, peeked);
|
9618
10012
|
parser->current.end++;
|
9619
10013
|
break;
|
9620
10014
|
}
|
9621
10015
|
|
9622
|
-
if (peeked < 0x80)
|
9623
|
-
|
9624
|
-
parser->current.end++;
|
10016
|
+
if (peeked < 0x80) pm_token_buffer_push_byte(&token_buffer, '\\');
|
10017
|
+
pm_token_buffer_push_escaped(&token_buffer, parser);
|
9625
10018
|
break;
|
9626
10019
|
}
|
9627
10020
|
|
@@ -9788,23 +10181,23 @@ parser_lex(pm_parser_t *parser) {
|
|
9788
10181
|
|
9789
10182
|
switch (peeked) {
|
9790
10183
|
case '\\':
|
9791
|
-
|
10184
|
+
pm_token_buffer_push_byte(&token_buffer, '\\');
|
9792
10185
|
parser->current.end++;
|
9793
10186
|
break;
|
9794
10187
|
case '\r':
|
9795
10188
|
parser->current.end++;
|
9796
10189
|
if (peek(parser) != '\n') {
|
9797
10190
|
if (!lex_mode->as.string.interpolation) {
|
9798
|
-
|
10191
|
+
pm_token_buffer_push_byte(&token_buffer, '\\');
|
9799
10192
|
}
|
9800
|
-
|
10193
|
+
pm_token_buffer_push_byte(&token_buffer, '\r');
|
9801
10194
|
break;
|
9802
10195
|
}
|
9803
10196
|
/* fallthrough */
|
9804
10197
|
case '\n':
|
9805
10198
|
if (!lex_mode->as.string.interpolation) {
|
9806
|
-
|
9807
|
-
|
10199
|
+
pm_token_buffer_push_byte(&token_buffer, '\\');
|
10200
|
+
pm_token_buffer_push_byte(&token_buffer, '\n');
|
9808
10201
|
}
|
9809
10202
|
|
9810
10203
|
if (parser->heredoc_end) {
|
@@ -9823,17 +10216,16 @@ parser_lex(pm_parser_t *parser) {
|
|
9823
10216
|
break;
|
9824
10217
|
default:
|
9825
10218
|
if (lex_mode->as.string.incrementor != '\0' && peeked == lex_mode->as.string.incrementor) {
|
9826
|
-
|
10219
|
+
pm_token_buffer_push_byte(&token_buffer, peeked);
|
9827
10220
|
parser->current.end++;
|
9828
10221
|
} else if (lex_mode->as.string.terminator != '\0' && peeked == lex_mode->as.string.terminator) {
|
9829
|
-
|
10222
|
+
pm_token_buffer_push_byte(&token_buffer, peeked);
|
9830
10223
|
parser->current.end++;
|
9831
10224
|
} else if (lex_mode->as.string.interpolation) {
|
9832
10225
|
escape_read(parser, &token_buffer.buffer, PM_ESCAPE_FLAG_NONE);
|
9833
10226
|
} else {
|
9834
|
-
|
9835
|
-
|
9836
|
-
parser->current.end++;
|
10227
|
+
pm_token_buffer_push_byte(&token_buffer, '\\');
|
10228
|
+
pm_token_buffer_push_escaped(&token_buffer, parser);
|
9837
10229
|
}
|
9838
10230
|
|
9839
10231
|
break;
|
@@ -9888,15 +10280,22 @@ parser_lex(pm_parser_t *parser) {
|
|
9888
10280
|
parser->next_start = NULL;
|
9889
10281
|
}
|
9890
10282
|
|
9891
|
-
//
|
9892
|
-
//
|
10283
|
+
// Now let's grab the information about the identifier off of the
|
10284
|
+
// current lex mode.
|
10285
|
+
pm_lex_mode_t *lex_mode = parser->lex_modes.current;
|
10286
|
+
|
10287
|
+
// We'll check if we're at the end of the file. If we are, then we
|
10288
|
+
// will add an error (because we weren't able to find the
|
10289
|
+
// terminator) but still continue parsing so that content after the
|
10290
|
+
// declaration of the heredoc can be parsed.
|
9893
10291
|
if (parser->current.end >= parser->end) {
|
9894
|
-
|
10292
|
+
pm_parser_err_current(parser, PM_ERR_HEREDOC_TERM);
|
10293
|
+
parser->next_start = lex_mode->as.heredoc.next_start;
|
10294
|
+
parser->heredoc_end = parser->current.end;
|
10295
|
+
lex_state_set(parser, PM_LEX_STATE_END);
|
10296
|
+
LEX(PM_TOKEN_HEREDOC_END);
|
9895
10297
|
}
|
9896
10298
|
|
9897
|
-
// Now let's grab the information about the identifier off of the current
|
9898
|
-
// lex mode.
|
9899
|
-
pm_lex_mode_t *lex_mode = parser->lex_modes.current;
|
9900
10299
|
const uint8_t *ident_start = lex_mode->as.heredoc.ident_start;
|
9901
10300
|
size_t ident_length = lex_mode->as.heredoc.ident_length;
|
9902
10301
|
|
@@ -10083,21 +10482,20 @@ parser_lex(pm_parser_t *parser) {
|
|
10083
10482
|
case '\r':
|
10084
10483
|
parser->current.end++;
|
10085
10484
|
if (peek(parser) != '\n') {
|
10086
|
-
|
10087
|
-
|
10485
|
+
pm_token_buffer_push_byte(&token_buffer, '\\');
|
10486
|
+
pm_token_buffer_push_byte(&token_buffer, '\r');
|
10088
10487
|
break;
|
10089
10488
|
}
|
10090
10489
|
/* fallthrough */
|
10091
10490
|
case '\n':
|
10092
|
-
|
10093
|
-
|
10491
|
+
pm_token_buffer_push_byte(&token_buffer, '\\');
|
10492
|
+
pm_token_buffer_push_byte(&token_buffer, '\n');
|
10094
10493
|
token_buffer.cursor = parser->current.end + 1;
|
10095
10494
|
breakpoint = parser->current.end;
|
10096
10495
|
continue;
|
10097
10496
|
default:
|
10098
|
-
|
10099
|
-
|
10100
|
-
pm_token_buffer_push(&token_buffer, peeked);
|
10497
|
+
pm_token_buffer_push_byte(&token_buffer, '\\');
|
10498
|
+
pm_token_buffer_push_escaped(&token_buffer, parser);
|
10101
10499
|
break;
|
10102
10500
|
}
|
10103
10501
|
} else {
|
@@ -10105,7 +10503,7 @@ parser_lex(pm_parser_t *parser) {
|
|
10105
10503
|
case '\r':
|
10106
10504
|
parser->current.end++;
|
10107
10505
|
if (peek(parser) != '\n') {
|
10108
|
-
|
10506
|
+
pm_token_buffer_push_byte(&token_buffer, '\r');
|
10109
10507
|
break;
|
10110
10508
|
}
|
10111
10509
|
/* fallthrough */
|
@@ -10184,8 +10582,8 @@ parser_lex(pm_parser_t *parser) {
|
|
10184
10582
|
typedef enum {
|
10185
10583
|
PM_BINDING_POWER_UNSET = 0, // used to indicate this token cannot be used as an infix operator
|
10186
10584
|
PM_BINDING_POWER_STATEMENT = 2,
|
10187
|
-
|
10188
|
-
|
10585
|
+
PM_BINDING_POWER_MODIFIER_RESCUE = 4, // rescue
|
10586
|
+
PM_BINDING_POWER_MODIFIER = 6, // if unless until while
|
10189
10587
|
PM_BINDING_POWER_COMPOSITION = 8, // and or
|
10190
10588
|
PM_BINDING_POWER_NOT = 10, // not
|
10191
10589
|
PM_BINDING_POWER_MATCH = 12, // => in
|
@@ -10239,15 +10637,15 @@ typedef struct {
|
|
10239
10637
|
#define RIGHT_ASSOCIATIVE_UNARY(precedence) { precedence, precedence, false, false }
|
10240
10638
|
|
10241
10639
|
pm_binding_powers_t pm_binding_powers[PM_TOKEN_MAXIMUM] = {
|
10640
|
+
// rescue
|
10641
|
+
[PM_TOKEN_KEYWORD_RESCUE_MODIFIER] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_MODIFIER_RESCUE),
|
10642
|
+
|
10242
10643
|
// if unless until while
|
10243
10644
|
[PM_TOKEN_KEYWORD_IF_MODIFIER] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_MODIFIER),
|
10244
10645
|
[PM_TOKEN_KEYWORD_UNLESS_MODIFIER] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_MODIFIER),
|
10245
10646
|
[PM_TOKEN_KEYWORD_UNTIL_MODIFIER] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_MODIFIER),
|
10246
10647
|
[PM_TOKEN_KEYWORD_WHILE_MODIFIER] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_MODIFIER),
|
10247
10648
|
|
10248
|
-
// rescue
|
10249
|
-
[PM_TOKEN_KEYWORD_RESCUE_MODIFIER] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_MODIFIER_RESCUE),
|
10250
|
-
|
10251
10649
|
// and or
|
10252
10650
|
[PM_TOKEN_KEYWORD_AND] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_COMPOSITION),
|
10253
10651
|
[PM_TOKEN_KEYWORD_OR] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_COMPOSITION),
|
@@ -10377,16 +10775,8 @@ match3(const pm_parser_t *parser, pm_token_type_t type1, pm_token_type_t type2,
|
|
10377
10775
|
* Returns true if the current token is any of the four given types.
|
10378
10776
|
*/
|
10379
10777
|
static inline bool
|
10380
|
-
match4(const pm_parser_t *parser, pm_token_type_t type1, pm_token_type_t type2, pm_token_type_t type3, pm_token_type_t type4) {
|
10381
|
-
return match1(parser, type1) || match1(parser, type2) || match1(parser, type3) || match1(parser, type4);
|
10382
|
-
}
|
10383
|
-
|
10384
|
-
/**
|
10385
|
-
* Returns true if the current token is any of the five given types.
|
10386
|
-
*/
|
10387
|
-
static inline bool
|
10388
|
-
match5(const pm_parser_t *parser, pm_token_type_t type1, pm_token_type_t type2, pm_token_type_t type3, pm_token_type_t type4, pm_token_type_t type5) {
|
10389
|
-
return match1(parser, type1) || match1(parser, type2) || match1(parser, type3) || match1(parser, type4) || match1(parser, type5);
|
10778
|
+
match4(const pm_parser_t *parser, pm_token_type_t type1, pm_token_type_t type2, pm_token_type_t type3, pm_token_type_t type4) {
|
10779
|
+
return match1(parser, type1) || match1(parser, type2) || match1(parser, type3) || match1(parser, type4);
|
10390
10780
|
}
|
10391
10781
|
|
10392
10782
|
/**
|
@@ -10866,7 +11256,7 @@ parse_write(pm_parser_t *parser, pm_node_t *target, pm_token_t *operator, pm_nod
|
|
10866
11256
|
return target;
|
10867
11257
|
}
|
10868
11258
|
|
10869
|
-
if (
|
11259
|
+
if (char_is_identifier_start(parser, call->message_loc.start)) {
|
10870
11260
|
// When we get here, we have a method call, because it was
|
10871
11261
|
// previously marked as a method call but now we have an =. This
|
10872
11262
|
// looks like:
|
@@ -10984,6 +11374,7 @@ parse_targets(pm_parser_t *parser, pm_node_t *first_target, pm_binding_power_t b
|
|
10984
11374
|
static pm_node_t *
|
10985
11375
|
parse_targets_validate(pm_parser_t *parser, pm_node_t *first_target, pm_binding_power_t binding_power) {
|
10986
11376
|
pm_node_t *result = parse_targets(parser, first_target, binding_power);
|
11377
|
+
accept1(parser, PM_TOKEN_NEWLINE);
|
10987
11378
|
|
10988
11379
|
// Ensure that we have either an = or a ) after the targets.
|
10989
11380
|
if (!match2(parser, PM_TOKEN_EQUAL, PM_TOKEN_PARENTHESIS_RIGHT)) {
|
@@ -11024,7 +11415,7 @@ parse_statements(pm_parser_t *parser, pm_context_t context) {
|
|
11024
11415
|
break;
|
11025
11416
|
}
|
11026
11417
|
|
11027
|
-
// If we have a terminator, then we will parse all
|
11418
|
+
// If we have a terminator, then we will parse all consecutive terminators
|
11028
11419
|
// and then continue parsing the statements list.
|
11029
11420
|
if (accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON)) {
|
11030
11421
|
// If we have a terminator, then we will continue parsing the statements
|
@@ -11084,8 +11475,9 @@ parse_assocs(pm_parser_t *parser, pm_node_t *node) {
|
|
11084
11475
|
|
11085
11476
|
if (token_begins_expression_p(parser->current.type)) {
|
11086
11477
|
value = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, false, PM_ERR_EXPECT_EXPRESSION_AFTER_SPLAT_HASH);
|
11087
|
-
}
|
11088
|
-
|
11478
|
+
}
|
11479
|
+
else {
|
11480
|
+
pm_parser_scope_forwarding_keywords_check(parser, &operator);
|
11089
11481
|
}
|
11090
11482
|
|
11091
11483
|
element = (pm_node_t *) pm_assoc_splat_node_create(parser, value, &operator);
|
@@ -11234,13 +11626,8 @@ parse_arguments(pm_parser_t *parser, pm_arguments_t *arguments, bool accepts_for
|
|
11234
11626
|
if (token_begins_expression_p(parser->current.type)) {
|
11235
11627
|
expression = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, false, PM_ERR_EXPECT_ARGUMENT);
|
11236
11628
|
} else {
|
11237
|
-
|
11238
|
-
|
11239
|
-
pm_constant_id_t ellipsis_id = pm_parser_constant_id_constant(parser, "...", 3);
|
11240
|
-
if (pm_parser_local_depth_constant_id(parser, ellipsis_id) == -1) {
|
11241
|
-
pm_parser_err_token(parser, &operator, PM_ERR_ARGUMENT_NO_FORWARDING_AMP);
|
11242
|
-
}
|
11243
|
-
}
|
11629
|
+
// A block forwarding in a method having `...` parameter (e.g. `def foo(...); bar(&); end`) is available.
|
11630
|
+
pm_parser_scope_forwarding_block_check(parser, &operator);
|
11244
11631
|
}
|
11245
11632
|
|
11246
11633
|
argument = (pm_node_t *) pm_block_argument_node_create(parser, &operator, expression);
|
@@ -11258,10 +11645,7 @@ parse_arguments(pm_parser_t *parser, pm_arguments_t *arguments, bool accepts_for
|
|
11258
11645
|
pm_token_t operator = parser->previous;
|
11259
11646
|
|
11260
11647
|
if (match4(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_TOKEN_COMMA, PM_TOKEN_SEMICOLON, PM_TOKEN_BRACKET_RIGHT)) {
|
11261
|
-
|
11262
|
-
pm_parser_err_token(parser, &operator, PM_ERR_ARGUMENT_NO_FORWARDING_STAR);
|
11263
|
-
}
|
11264
|
-
|
11648
|
+
pm_parser_scope_forwarding_positionals_check(parser, &operator);
|
11265
11649
|
argument = (pm_node_t *) pm_splat_node_create(parser, &operator, NULL);
|
11266
11650
|
} else {
|
11267
11651
|
pm_node_t *expression = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, false, PM_ERR_EXPECT_EXPRESSION_AFTER_SPLAT);
|
@@ -11287,15 +11671,14 @@ parse_arguments(pm_parser_t *parser, pm_arguments_t *arguments, bool accepts_for
|
|
11287
11671
|
pm_node_t *right = parse_expression(parser, PM_BINDING_POWER_RANGE, false, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR);
|
11288
11672
|
argument = (pm_node_t *) pm_range_node_create(parser, NULL, &operator, right);
|
11289
11673
|
} else {
|
11290
|
-
|
11291
|
-
pm_parser_err_previous(parser, PM_ERR_ARGUMENT_NO_FORWARDING_ELLIPSES);
|
11292
|
-
}
|
11674
|
+
pm_parser_scope_forwarding_all_check(parser, &parser->previous);
|
11293
11675
|
if (parsed_first_argument && terminator == PM_TOKEN_EOF) {
|
11294
11676
|
pm_parser_err_previous(parser, PM_ERR_ARGUMENT_FORWARDING_UNBOUND);
|
11295
11677
|
}
|
11296
11678
|
|
11297
11679
|
argument = (pm_node_t *) pm_forwarding_arguments_node_create(parser, &parser->previous);
|
11298
11680
|
parse_arguments_append(parser, arguments, argument);
|
11681
|
+
arguments->has_forwarding = true;
|
11299
11682
|
parsed_forwarding_arguments = true;
|
11300
11683
|
break;
|
11301
11684
|
}
|
@@ -11338,6 +11721,9 @@ parse_arguments(pm_parser_t *parser, pm_arguments_t *arguments, bool accepts_for
|
|
11338
11721
|
}
|
11339
11722
|
|
11340
11723
|
parsed_bare_hash = true;
|
11724
|
+
} else if (accept1(parser, PM_TOKEN_KEYWORD_IN)) {
|
11725
|
+
// TODO: Could we solve this with binding powers instead?
|
11726
|
+
pm_parser_err_current(parser, PM_ERR_ARGUMENT_IN);
|
11341
11727
|
}
|
11342
11728
|
|
11343
11729
|
parse_arguments_append(parser, arguments, argument);
|
@@ -11414,7 +11800,9 @@ parse_required_destructured_parameter(pm_parser_t *parser) {
|
|
11414
11800
|
if (accept1(parser, PM_TOKEN_IDENTIFIER)) {
|
11415
11801
|
pm_token_t name = parser->previous;
|
11416
11802
|
value = (pm_node_t *) pm_required_parameter_node_create(parser, &name);
|
11417
|
-
pm_parser_parameter_name_check(parser, &name)
|
11803
|
+
if (pm_parser_parameter_name_check(parser, &name)) {
|
11804
|
+
pm_node_flag_set_repeated_parameter(value);
|
11805
|
+
}
|
11418
11806
|
pm_parser_local_add_token(parser, &name);
|
11419
11807
|
}
|
11420
11808
|
|
@@ -11424,7 +11812,9 @@ parse_required_destructured_parameter(pm_parser_t *parser) {
|
|
11424
11812
|
pm_token_t name = parser->previous;
|
11425
11813
|
|
11426
11814
|
param = (pm_node_t *) pm_required_parameter_node_create(parser, &name);
|
11427
|
-
pm_parser_parameter_name_check(parser, &name)
|
11815
|
+
if (pm_parser_parameter_name_check(parser, &name)) {
|
11816
|
+
pm_node_flag_set_repeated_parameter(param);
|
11817
|
+
}
|
11428
11818
|
pm_parser_local_add_token(parser, &name);
|
11429
11819
|
}
|
11430
11820
|
|
@@ -11541,19 +11931,20 @@ parse_parameters(
|
|
11541
11931
|
pm_token_t operator = parser->previous;
|
11542
11932
|
pm_token_t name;
|
11543
11933
|
|
11934
|
+
bool repeated = false;
|
11544
11935
|
if (accept1(parser, PM_TOKEN_IDENTIFIER)) {
|
11545
11936
|
name = parser->previous;
|
11546
|
-
pm_parser_parameter_name_check(parser, &name);
|
11937
|
+
repeated = pm_parser_parameter_name_check(parser, &name);
|
11547
11938
|
pm_parser_local_add_token(parser, &name);
|
11548
11939
|
} else {
|
11549
11940
|
name = not_provided(parser);
|
11550
|
-
|
11551
|
-
if (allows_forwarding_parameters) {
|
11552
|
-
pm_parser_local_add_token(parser, &operator);
|
11553
|
-
}
|
11941
|
+
parser->current_scope->forwarding_params |= PM_FORWARDING_BLOCK;
|
11554
11942
|
}
|
11555
11943
|
|
11556
11944
|
pm_block_parameter_node_t *param = pm_block_parameter_node_create(parser, &name, &operator);
|
11945
|
+
if (repeated) {
|
11946
|
+
pm_node_flag_set_repeated_parameter((pm_node_t *)param);
|
11947
|
+
}
|
11557
11948
|
if (params->block == NULL) {
|
11558
11949
|
pm_parameters_node_block_set(params, param);
|
11559
11950
|
} else {
|
@@ -11572,9 +11963,8 @@ parse_parameters(
|
|
11572
11963
|
update_parameter_state(parser, &parser->current, &order);
|
11573
11964
|
parser_lex(parser);
|
11574
11965
|
|
11575
|
-
|
11576
|
-
|
11577
|
-
}
|
11966
|
+
parser->current_scope->forwarding_params |= PM_FORWARDING_BLOCK;
|
11967
|
+
parser->current_scope->forwarding_params |= PM_FORWARDING_ALL;
|
11578
11968
|
|
11579
11969
|
pm_forwarding_parameter_node_t *param = pm_forwarding_parameter_node_create(parser, &parser->previous);
|
11580
11970
|
if (params->keyword_rest != NULL) {
|
@@ -11626,20 +12016,23 @@ parse_parameters(
|
|
11626
12016
|
}
|
11627
12017
|
|
11628
12018
|
pm_token_t name = parser->previous;
|
11629
|
-
pm_parser_parameter_name_check(parser, &name);
|
12019
|
+
bool repeated = pm_parser_parameter_name_check(parser, &name);
|
11630
12020
|
pm_parser_local_add_token(parser, &name);
|
11631
12021
|
|
11632
12022
|
if (accept1(parser, PM_TOKEN_EQUAL)) {
|
11633
12023
|
pm_token_t operator = parser->previous;
|
11634
12024
|
context_push(parser, PM_CONTEXT_DEFAULT_PARAMS);
|
11635
|
-
|
11636
|
-
|
12025
|
+
|
12026
|
+
pm_constant_id_t saved_param_name = pm_parser_current_param_name_set(parser, pm_parser_constant_id_token(parser, &name));
|
11637
12027
|
pm_node_t *value = parse_value_expression(parser, binding_power, false, PM_ERR_PARAMETER_NO_DEFAULT);
|
11638
12028
|
|
11639
12029
|
pm_optional_parameter_node_t *param = pm_optional_parameter_node_create(parser, &name, &operator, value);
|
12030
|
+
if (repeated) {
|
12031
|
+
pm_node_flag_set_repeated_parameter((pm_node_t *)param);
|
12032
|
+
}
|
11640
12033
|
pm_parameters_node_optionals_append(params, param);
|
11641
12034
|
|
11642
|
-
parser
|
12035
|
+
pm_parser_current_param_name_restore(parser, saved_param_name);
|
11643
12036
|
context_pop(parser);
|
11644
12037
|
|
11645
12038
|
// If parsing the value of the parameter resulted in error recovery,
|
@@ -11651,9 +12044,15 @@ parse_parameters(
|
|
11651
12044
|
}
|
11652
12045
|
} else if (order > PM_PARAMETERS_ORDER_AFTER_OPTIONAL) {
|
11653
12046
|
pm_required_parameter_node_t *param = pm_required_parameter_node_create(parser, &name);
|
12047
|
+
if (repeated) {
|
12048
|
+
pm_node_flag_set_repeated_parameter((pm_node_t *)param);
|
12049
|
+
}
|
11654
12050
|
pm_parameters_node_requireds_append(params, (pm_node_t *) param);
|
11655
12051
|
} else {
|
11656
12052
|
pm_required_parameter_node_t *param = pm_required_parameter_node_create(parser, &name);
|
12053
|
+
if (repeated) {
|
12054
|
+
pm_node_flag_set_repeated_parameter((pm_node_t *)param);
|
12055
|
+
}
|
11657
12056
|
pm_parameters_node_posts_append(params, (pm_node_t *) param);
|
11658
12057
|
}
|
11659
12058
|
|
@@ -11668,7 +12067,7 @@ parse_parameters(
|
|
11668
12067
|
pm_token_t local = name;
|
11669
12068
|
local.end -= 1;
|
11670
12069
|
|
11671
|
-
pm_parser_parameter_name_check(parser, &local);
|
12070
|
+
bool repeated = pm_parser_parameter_name_check(parser, &local);
|
11672
12071
|
pm_parser_local_add_token(parser, &local);
|
11673
12072
|
|
11674
12073
|
switch (parser->current.type) {
|
@@ -11676,6 +12075,9 @@ parse_parameters(
|
|
11676
12075
|
case PM_TOKEN_PARENTHESIS_RIGHT:
|
11677
12076
|
case PM_TOKEN_PIPE: {
|
11678
12077
|
pm_node_t *param = (pm_node_t *) pm_required_keyword_parameter_node_create(parser, &name);
|
12078
|
+
if (repeated) {
|
12079
|
+
pm_node_flag_set_repeated_parameter(param);
|
12080
|
+
}
|
11679
12081
|
pm_parameters_node_keywords_append(params, param);
|
11680
12082
|
break;
|
11681
12083
|
}
|
@@ -11687,6 +12089,9 @@ parse_parameters(
|
|
11687
12089
|
}
|
11688
12090
|
|
11689
12091
|
pm_node_t *param = (pm_node_t *) pm_required_keyword_parameter_node_create(parser, &name);
|
12092
|
+
if (repeated) {
|
12093
|
+
pm_node_flag_set_repeated_parameter(param);
|
12094
|
+
}
|
11690
12095
|
pm_parameters_node_keywords_append(params, param);
|
11691
12096
|
break;
|
11692
12097
|
}
|
@@ -11695,17 +12100,22 @@ parse_parameters(
|
|
11695
12100
|
|
11696
12101
|
if (token_begins_expression_p(parser->current.type)) {
|
11697
12102
|
context_push(parser, PM_CONTEXT_DEFAULT_PARAMS);
|
11698
|
-
|
11699
|
-
|
12103
|
+
|
12104
|
+
pm_constant_id_t saved_param_name = pm_parser_current_param_name_set(parser, pm_parser_constant_id_token(parser, &local));
|
11700
12105
|
pm_node_t *value = parse_value_expression(parser, binding_power, false, PM_ERR_PARAMETER_NO_DEFAULT_KW);
|
11701
|
-
|
12106
|
+
|
12107
|
+
pm_parser_current_param_name_restore(parser, saved_param_name);
|
11702
12108
|
context_pop(parser);
|
12109
|
+
|
11703
12110
|
param = (pm_node_t *) pm_optional_keyword_parameter_node_create(parser, &name, value);
|
11704
12111
|
}
|
11705
12112
|
else {
|
11706
12113
|
param = (pm_node_t *) pm_required_keyword_parameter_node_create(parser, &name);
|
11707
12114
|
}
|
11708
12115
|
|
12116
|
+
if (repeated) {
|
12117
|
+
pm_node_flag_set_repeated_parameter(param);
|
12118
|
+
}
|
11709
12119
|
pm_parameters_node_keywords_append(params, param);
|
11710
12120
|
|
11711
12121
|
// If parsing the value of the parameter resulted in error recovery,
|
@@ -11728,20 +12138,21 @@ parse_parameters(
|
|
11728
12138
|
|
11729
12139
|
pm_token_t operator = parser->previous;
|
11730
12140
|
pm_token_t name;
|
11731
|
-
|
12141
|
+
bool repeated = false;
|
11732
12142
|
if (accept1(parser, PM_TOKEN_IDENTIFIER)) {
|
11733
12143
|
name = parser->previous;
|
11734
|
-
pm_parser_parameter_name_check(parser, &name);
|
12144
|
+
repeated = pm_parser_parameter_name_check(parser, &name);
|
11735
12145
|
pm_parser_local_add_token(parser, &name);
|
11736
12146
|
} else {
|
11737
12147
|
name = not_provided(parser);
|
11738
12148
|
|
11739
|
-
|
11740
|
-
pm_parser_local_add_token(parser, &operator);
|
11741
|
-
}
|
12149
|
+
parser->current_scope->forwarding_params |= PM_FORWARDING_POSITIONALS;
|
11742
12150
|
}
|
11743
12151
|
|
11744
12152
|
pm_node_t *param = (pm_node_t *) pm_rest_parameter_node_create(parser, &operator, &name);
|
12153
|
+
if (repeated) {
|
12154
|
+
pm_node_flag_set_repeated_parameter(param);
|
12155
|
+
}
|
11745
12156
|
if (params->rest == NULL) {
|
11746
12157
|
pm_parameters_node_rest_set(params, param);
|
11747
12158
|
} else {
|
@@ -11764,19 +12175,21 @@ parse_parameters(
|
|
11764
12175
|
} else {
|
11765
12176
|
pm_token_t name;
|
11766
12177
|
|
12178
|
+
bool repeated = false;
|
11767
12179
|
if (accept1(parser, PM_TOKEN_IDENTIFIER)) {
|
11768
12180
|
name = parser->previous;
|
11769
|
-
pm_parser_parameter_name_check(parser, &name);
|
12181
|
+
repeated = pm_parser_parameter_name_check(parser, &name);
|
11770
12182
|
pm_parser_local_add_token(parser, &name);
|
11771
12183
|
} else {
|
11772
12184
|
name = not_provided(parser);
|
11773
12185
|
|
11774
|
-
|
11775
|
-
pm_parser_local_add_token(parser, &operator);
|
11776
|
-
}
|
12186
|
+
parser->current_scope->forwarding_params |= PM_FORWARDING_KEYWORDS;
|
11777
12187
|
}
|
11778
12188
|
|
11779
12189
|
param = (pm_node_t *) pm_keyword_rest_parameter_node_create(parser, &operator, &name);
|
12190
|
+
if (repeated) {
|
12191
|
+
pm_node_flag_set_repeated_parameter(param);
|
12192
|
+
}
|
11780
12193
|
}
|
11781
12194
|
|
11782
12195
|
if (params->keyword_rest == NULL) {
|
@@ -12012,10 +12425,13 @@ parse_block_parameters(
|
|
12012
12425
|
if ((opening->type != PM_TOKEN_NOT_PROVIDED) && accept1(parser, PM_TOKEN_SEMICOLON)) {
|
12013
12426
|
do {
|
12014
12427
|
expect1(parser, PM_TOKEN_IDENTIFIER, PM_ERR_BLOCK_PARAM_LOCAL_VARIABLE);
|
12015
|
-
pm_parser_parameter_name_check(parser, &parser->previous);
|
12428
|
+
bool repeated = pm_parser_parameter_name_check(parser, &parser->previous);
|
12016
12429
|
pm_parser_local_add_token(parser, &parser->previous);
|
12017
12430
|
|
12018
12431
|
pm_block_local_variable_node_t *local = pm_block_local_variable_node_create(parser, &parser->previous);
|
12432
|
+
if (repeated) {
|
12433
|
+
pm_node_flag_set_repeated_parameter((pm_node_t *)local);
|
12434
|
+
}
|
12019
12435
|
pm_block_parameters_node_append_local(block_parameters, local);
|
12020
12436
|
} while (accept1(parser, PM_TOKEN_COMMA));
|
12021
12437
|
}
|
@@ -12031,8 +12447,10 @@ parse_block(pm_parser_t *parser) {
|
|
12031
12447
|
pm_token_t opening = parser->previous;
|
12032
12448
|
accept1(parser, PM_TOKEN_NEWLINE);
|
12033
12449
|
|
12450
|
+
pm_constant_id_t saved_param_name = pm_parser_current_param_name_unset(parser);
|
12034
12451
|
pm_accepts_block_stack_push(parser, true);
|
12035
12452
|
pm_parser_scope_push(parser, false);
|
12453
|
+
|
12036
12454
|
pm_block_parameters_node_t *block_parameters = NULL;
|
12037
12455
|
|
12038
12456
|
if (accept1(parser, PM_TOKEN_PIPE)) {
|
@@ -12053,12 +12471,6 @@ parse_block(pm_parser_t *parser) {
|
|
12053
12471
|
pm_block_parameters_node_closing_set(block_parameters, &parser->previous);
|
12054
12472
|
}
|
12055
12473
|
|
12056
|
-
uint32_t locals_body_index = 0;
|
12057
|
-
|
12058
|
-
if (block_parameters) {
|
12059
|
-
locals_body_index = (uint32_t) parser->current_scope->locals.size;
|
12060
|
-
}
|
12061
|
-
|
12062
12474
|
accept1(parser, PM_TOKEN_NEWLINE);
|
12063
12475
|
pm_node_t *statements = NULL;
|
12064
12476
|
|
@@ -12090,13 +12502,14 @@ parse_block(pm_parser_t *parser) {
|
|
12090
12502
|
|
12091
12503
|
if (parameters == NULL && (maximum > 0)) {
|
12092
12504
|
parameters = (pm_node_t *) pm_numbered_parameters_node_create(parser, &(pm_location_t) { .start = opening.start, .end = parser->previous.end }, maximum);
|
12093
|
-
locals_body_index = maximum;
|
12094
12505
|
}
|
12095
12506
|
|
12096
12507
|
pm_constant_id_list_t locals = parser->current_scope->locals;
|
12097
12508
|
pm_parser_scope_pop(parser);
|
12098
12509
|
pm_accepts_block_stack_pop(parser);
|
12099
|
-
|
12510
|
+
pm_parser_current_param_name_restore(parser, saved_param_name);
|
12511
|
+
|
12512
|
+
return pm_block_node_create(parser, &locals, &opening, parameters, statements, &parser->previous);
|
12100
12513
|
}
|
12101
12514
|
|
12102
12515
|
/**
|
@@ -12157,14 +12570,20 @@ parse_arguments_list(pm_parser_t *parser, pm_arguments_t *arguments, bool accept
|
|
12157
12570
|
}
|
12158
12571
|
|
12159
12572
|
if (block != NULL) {
|
12160
|
-
if (arguments->block == NULL) {
|
12573
|
+
if (arguments->block == NULL && !arguments->has_forwarding) {
|
12161
12574
|
arguments->block = (pm_node_t *) block;
|
12162
12575
|
} else {
|
12163
|
-
|
12164
|
-
|
12165
|
-
|
12576
|
+
if (arguments->has_forwarding) {
|
12577
|
+
pm_parser_err_node(parser, (pm_node_t *) block, PM_ERR_ARGUMENT_BLOCK_FORWARDING);
|
12578
|
+
} else {
|
12579
|
+
pm_parser_err_node(parser, (pm_node_t *) block, PM_ERR_ARGUMENT_BLOCK_MULTI);
|
12580
|
+
}
|
12581
|
+
if (arguments->block != NULL) {
|
12582
|
+
if (arguments->arguments == NULL) {
|
12583
|
+
arguments->arguments = pm_arguments_node_create(parser);
|
12584
|
+
}
|
12585
|
+
pm_arguments_node_arguments_append(arguments->arguments, arguments->block);
|
12166
12586
|
}
|
12167
|
-
pm_arguments_node_arguments_append(arguments->arguments, arguments->block);
|
12168
12587
|
arguments->block = (pm_node_t *) block;
|
12169
12588
|
}
|
12170
12589
|
}
|
@@ -12384,8 +12803,14 @@ static inline pm_node_flags_t
|
|
12384
12803
|
parse_unescaped_encoding(const pm_parser_t *parser) {
|
12385
12804
|
if (parser->explicit_encoding != NULL) {
|
12386
12805
|
if (parser->explicit_encoding == PM_ENCODING_UTF_8_ENTRY) {
|
12806
|
+
// If the there's an explicit encoding and it's using a UTF-8 escape
|
12807
|
+
// sequence, then mark the string as UTF-8.
|
12387
12808
|
return PM_STRING_FLAGS_FORCED_UTF8_ENCODING;
|
12388
12809
|
} else if (parser->encoding == PM_ENCODING_US_ASCII_ENTRY) {
|
12810
|
+
// If there's a non-UTF-8 escape sequence being used, then the
|
12811
|
+
// string uses the source encoding, unless the source is marked as
|
12812
|
+
// US-ASCII. In that case the string is forced as ASCII-8BIT in
|
12813
|
+
// order to keep the string valid.
|
12389
12814
|
return PM_STRING_FLAGS_FORCED_BINARY_ENCODING;
|
12390
12815
|
}
|
12391
12816
|
}
|
@@ -12509,14 +12934,54 @@ parse_string_part(pm_parser_t *parser) {
|
|
12509
12934
|
}
|
12510
12935
|
}
|
12511
12936
|
|
12937
|
+
/**
|
12938
|
+
* When creating a symbol, unary operators that cannot be binary operators
|
12939
|
+
* automatically drop trailing `@` characters. This happens at the parser level,
|
12940
|
+
* such that `~@` is parsed as `~` and `!@` is parsed as `!`. We do that here.
|
12941
|
+
*/
|
12942
|
+
static const uint8_t *
|
12943
|
+
parse_operator_symbol_name(const pm_token_t *name) {
|
12944
|
+
switch (name->type) {
|
12945
|
+
case PM_TOKEN_TILDE:
|
12946
|
+
case PM_TOKEN_BANG:
|
12947
|
+
if (name->end[-1] == '@') return name->end - 1;
|
12948
|
+
/* fallthrough */
|
12949
|
+
default:
|
12950
|
+
return name->end;
|
12951
|
+
}
|
12952
|
+
}
|
12953
|
+
|
12954
|
+
static pm_node_t *
|
12955
|
+
parse_operator_symbol(pm_parser_t *parser, const pm_token_t *opening, pm_lex_state_t next_state) {
|
12956
|
+
pm_token_t closing = not_provided(parser);
|
12957
|
+
pm_symbol_node_t *symbol = pm_symbol_node_create(parser, opening, &parser->current, &closing);
|
12958
|
+
|
12959
|
+
const uint8_t *end = parse_operator_symbol_name(&parser->current);
|
12960
|
+
|
12961
|
+
if (next_state != PM_LEX_STATE_NONE) lex_state_set(parser, next_state);
|
12962
|
+
parser_lex(parser);
|
12963
|
+
|
12964
|
+
pm_string_shared_init(&symbol->unescaped, parser->previous.start, end);
|
12965
|
+
pm_node_flag_set((pm_node_t *) symbol, PM_SYMBOL_FLAGS_FORCED_US_ASCII_ENCODING);
|
12966
|
+
|
12967
|
+
return (pm_node_t *) symbol;
|
12968
|
+
}
|
12969
|
+
|
12970
|
+
/**
|
12971
|
+
* Parse a symbol node. This function will get called immediately after finding
|
12972
|
+
* a symbol opening token. This handles parsing bare symbols and interpolated
|
12973
|
+
* symbols.
|
12974
|
+
*/
|
12512
12975
|
static pm_node_t *
|
12513
12976
|
parse_symbol(pm_parser_t *parser, pm_lex_mode_t *lex_mode, pm_lex_state_t next_state) {
|
12514
|
-
pm_token_t opening = parser->previous;
|
12977
|
+
const pm_token_t opening = parser->previous;
|
12515
12978
|
|
12516
12979
|
if (lex_mode->mode != PM_LEX_STRING) {
|
12517
12980
|
if (next_state != PM_LEX_STATE_NONE) lex_state_set(parser, next_state);
|
12518
12981
|
|
12519
12982
|
switch (parser->current.type) {
|
12983
|
+
case PM_CASE_OPERATOR:
|
12984
|
+
return parse_operator_symbol(parser, &opening, next_state == PM_LEX_STATE_NONE ? PM_LEX_STATE_ENDFN : next_state);
|
12520
12985
|
case PM_TOKEN_IDENTIFIER:
|
12521
12986
|
case PM_TOKEN_CONSTANT:
|
12522
12987
|
case PM_TOKEN_INSTANCE_VARIABLE:
|
@@ -12528,10 +12993,6 @@ parse_symbol(pm_parser_t *parser, pm_lex_mode_t *lex_mode, pm_lex_state_t next_s
|
|
12528
12993
|
case PM_CASE_KEYWORD:
|
12529
12994
|
parser_lex(parser);
|
12530
12995
|
break;
|
12531
|
-
case PM_CASE_OPERATOR:
|
12532
|
-
lex_state_set(parser, next_state == PM_LEX_STATE_NONE ? PM_LEX_STATE_ENDFN : next_state);
|
12533
|
-
parser_lex(parser);
|
12534
|
-
break;
|
12535
12996
|
default:
|
12536
12997
|
expect2(parser, PM_TOKEN_IDENTIFIER, PM_TOKEN_METHOD_NAME, PM_ERR_SYMBOL_INVALID);
|
12537
12998
|
break;
|
@@ -12541,6 +13002,8 @@ parse_symbol(pm_parser_t *parser, pm_lex_mode_t *lex_mode, pm_lex_state_t next_s
|
|
12541
13002
|
pm_symbol_node_t *symbol = pm_symbol_node_create(parser, &opening, &parser->previous, &closing);
|
12542
13003
|
|
12543
13004
|
pm_string_shared_init(&symbol->unescaped, parser->previous.start, parser->previous.end);
|
13005
|
+
pm_node_flag_set((pm_node_t *) symbol, parse_symbol_encoding(parser, &symbol->unescaped));
|
13006
|
+
|
12544
13007
|
return (pm_node_t *) symbol;
|
12545
13008
|
}
|
12546
13009
|
|
@@ -12637,7 +13100,8 @@ parse_symbol(pm_parser_t *parser, pm_lex_mode_t *lex_mode, pm_lex_state_t next_s
|
|
12637
13100
|
} else {
|
12638
13101
|
expect1(parser, PM_TOKEN_STRING_END, PM_ERR_SYMBOL_TERM_DYNAMIC);
|
12639
13102
|
}
|
12640
|
-
|
13103
|
+
|
13104
|
+
return (pm_node_t *) pm_symbol_node_create_unescaped(parser, &opening, &content, &parser->previous, &unescaped, parse_symbol_encoding(parser, &unescaped));
|
12641
13105
|
}
|
12642
13106
|
|
12643
13107
|
/**
|
@@ -12647,8 +13111,11 @@ parse_symbol(pm_parser_t *parser, pm_lex_mode_t *lex_mode, pm_lex_state_t next_s
|
|
12647
13111
|
static inline pm_node_t *
|
12648
13112
|
parse_undef_argument(pm_parser_t *parser) {
|
12649
13113
|
switch (parser->current.type) {
|
13114
|
+
case PM_CASE_OPERATOR: {
|
13115
|
+
const pm_token_t opening = not_provided(parser);
|
13116
|
+
return parse_operator_symbol(parser, &opening, PM_LEX_STATE_NONE);
|
13117
|
+
}
|
12650
13118
|
case PM_CASE_KEYWORD:
|
12651
|
-
case PM_CASE_OPERATOR:
|
12652
13119
|
case PM_TOKEN_CONSTANT:
|
12653
13120
|
case PM_TOKEN_IDENTIFIER:
|
12654
13121
|
case PM_TOKEN_METHOD_NAME: {
|
@@ -12659,6 +13126,8 @@ parse_undef_argument(pm_parser_t *parser) {
|
|
12659
13126
|
pm_symbol_node_t *symbol = pm_symbol_node_create(parser, &opening, &parser->previous, &closing);
|
12660
13127
|
|
12661
13128
|
pm_string_shared_init(&symbol->unescaped, parser->previous.start, parser->previous.end);
|
13129
|
+
pm_node_flag_set((pm_node_t *) symbol, parse_symbol_encoding(parser, &symbol->unescaped));
|
13130
|
+
|
12662
13131
|
return (pm_node_t *) symbol;
|
12663
13132
|
}
|
12664
13133
|
case PM_TOKEN_SYMBOL_BEGIN: {
|
@@ -12682,21 +13151,24 @@ parse_undef_argument(pm_parser_t *parser) {
|
|
12682
13151
|
static inline pm_node_t *
|
12683
13152
|
parse_alias_argument(pm_parser_t *parser, bool first) {
|
12684
13153
|
switch (parser->current.type) {
|
12685
|
-
case PM_CASE_OPERATOR:
|
13154
|
+
case PM_CASE_OPERATOR: {
|
13155
|
+
const pm_token_t opening = not_provided(parser);
|
13156
|
+
return parse_operator_symbol(parser, &opening, first ? PM_LEX_STATE_FNAME | PM_LEX_STATE_FITEM : PM_LEX_STATE_NONE);
|
13157
|
+
}
|
12686
13158
|
case PM_CASE_KEYWORD:
|
12687
13159
|
case PM_TOKEN_CONSTANT:
|
12688
13160
|
case PM_TOKEN_IDENTIFIER:
|
12689
13161
|
case PM_TOKEN_METHOD_NAME: {
|
12690
|
-
if (first)
|
12691
|
-
lex_state_set(parser, PM_LEX_STATE_FNAME | PM_LEX_STATE_FITEM);
|
12692
|
-
}
|
12693
|
-
|
13162
|
+
if (first) lex_state_set(parser, PM_LEX_STATE_FNAME | PM_LEX_STATE_FITEM);
|
12694
13163
|
parser_lex(parser);
|
13164
|
+
|
12695
13165
|
pm_token_t opening = not_provided(parser);
|
12696
13166
|
pm_token_t closing = not_provided(parser);
|
12697
13167
|
pm_symbol_node_t *symbol = pm_symbol_node_create(parser, &opening, &parser->previous, &closing);
|
12698
13168
|
|
12699
13169
|
pm_string_shared_init(&symbol->unescaped, parser->previous.start, parser->previous.end);
|
13170
|
+
pm_node_flag_set((pm_node_t *) symbol, parse_symbol_encoding(parser, &symbol->unescaped));
|
13171
|
+
|
12700
13172
|
return (pm_node_t *) symbol;
|
12701
13173
|
}
|
12702
13174
|
case PM_TOKEN_SYMBOL_BEGIN: {
|
@@ -12733,6 +13205,64 @@ outer_scope_using_numbered_parameters_p(pm_parser_t *parser) {
|
|
12733
13205
|
return false;
|
12734
13206
|
}
|
12735
13207
|
|
13208
|
+
/**
|
13209
|
+
* These are the names of the various numbered parameters. We have them here so
|
13210
|
+
* that when we insert them into the constant pool we can use a constant string
|
13211
|
+
* and not have to allocate.
|
13212
|
+
*/
|
13213
|
+
static const char * const pm_numbered_parameter_names[] = {
|
13214
|
+
"_1", "_2", "_3", "_4", "_5", "_6", "_7", "_8", "_9"
|
13215
|
+
};
|
13216
|
+
|
13217
|
+
/**
|
13218
|
+
* Parse an identifier into either a local variable read. If the local variable
|
13219
|
+
* is not found, it returns NULL instead.
|
13220
|
+
*/
|
13221
|
+
static pm_local_variable_read_node_t *
|
13222
|
+
parse_variable(pm_parser_t *parser) {
|
13223
|
+
int depth;
|
13224
|
+
if ((depth = pm_parser_local_depth(parser, &parser->previous)) != -1) {
|
13225
|
+
return pm_local_variable_read_node_create(parser, &parser->previous, (uint32_t) depth);
|
13226
|
+
}
|
13227
|
+
|
13228
|
+
if (!parser->current_scope->closed && pm_token_is_numbered_parameter(parser->previous.start, parser->previous.end)) {
|
13229
|
+
// Now that we know we have a numbered parameter, we need to check
|
13230
|
+
// if it's allowed in this context. If it is, then we will create a
|
13231
|
+
// local variable read. If it's not, then we'll create a normal call
|
13232
|
+
// node but add an error.
|
13233
|
+
if (parser->current_scope->explicit_params) {
|
13234
|
+
pm_parser_err_previous(parser, PM_ERR_NUMBERED_PARAMETER_NOT_ALLOWED);
|
13235
|
+
} else if (outer_scope_using_numbered_parameters_p(parser)) {
|
13236
|
+
pm_parser_err_previous(parser, PM_ERR_NUMBERED_PARAMETER_OUTER_SCOPE);
|
13237
|
+
} else {
|
13238
|
+
// Indicate that this scope is using numbered params so that child
|
13239
|
+
// scopes cannot. We subtract the value for the character '0' to get
|
13240
|
+
// the actual integer value of the number (only _1 through _9 are
|
13241
|
+
// valid).
|
13242
|
+
uint8_t numbered_parameters = (uint8_t) (parser->previous.start[1] - '0');
|
13243
|
+
if (numbered_parameters > parser->current_scope->numbered_parameters) {
|
13244
|
+
parser->current_scope->numbered_parameters = numbered_parameters;
|
13245
|
+
pm_parser_numbered_parameters_set(parser, numbered_parameters);
|
13246
|
+
}
|
13247
|
+
|
13248
|
+
// When you use a numbered parameter, it implies the existence
|
13249
|
+
// of all of the locals that exist before it. For example,
|
13250
|
+
// referencing _2 means that _1 must exist. Therefore here we
|
13251
|
+
// loop through all of the possibilities and add them into the
|
13252
|
+
// constant pool.
|
13253
|
+
for (uint8_t numbered_parameter = 1; numbered_parameter <= numbered_parameters - 1; numbered_parameter++) {
|
13254
|
+
pm_parser_local_add_constant(parser, pm_numbered_parameter_names[numbered_parameter - 1], 2);
|
13255
|
+
}
|
13256
|
+
|
13257
|
+
// Finally we can create the local variable read node.
|
13258
|
+
pm_constant_id_t name_id = pm_parser_local_add_constant(parser, pm_numbered_parameter_names[numbered_parameters - 1], 2);
|
13259
|
+
return pm_local_variable_read_node_create_constant_id(parser, &parser->previous, name_id, 0);
|
13260
|
+
}
|
13261
|
+
}
|
13262
|
+
|
13263
|
+
return NULL;
|
13264
|
+
}
|
13265
|
+
|
12736
13266
|
/**
|
12737
13267
|
* Parse an identifier into either a local variable read or a call.
|
12738
13268
|
*/
|
@@ -12741,56 +13271,8 @@ parse_variable_call(pm_parser_t *parser) {
|
|
12741
13271
|
pm_node_flags_t flags = 0;
|
12742
13272
|
|
12743
13273
|
if (!match1(parser, PM_TOKEN_PARENTHESIS_LEFT) && (parser->previous.end[-1] != '!') && (parser->previous.end[-1] != '?')) {
|
12744
|
-
|
12745
|
-
if (
|
12746
|
-
return (pm_node_t *) pm_local_variable_read_node_create(parser, &parser->previous, (uint32_t) depth);
|
12747
|
-
}
|
12748
|
-
|
12749
|
-
if (!parser->current_scope->closed && pm_token_is_numbered_parameter(parser->previous.start, parser->previous.end)) {
|
12750
|
-
// Now that we know we have a numbered parameter, we need to check
|
12751
|
-
// if it's allowed in this context. If it is, then we will create a
|
12752
|
-
// local variable read. If it's not, then we'll create a normal call
|
12753
|
-
// node but add an error.
|
12754
|
-
if (parser->current_scope->explicit_params) {
|
12755
|
-
pm_parser_err_previous(parser, PM_ERR_NUMBERED_PARAMETER_NOT_ALLOWED);
|
12756
|
-
} else if (outer_scope_using_numbered_parameters_p(parser)) {
|
12757
|
-
pm_parser_err_previous(parser, PM_ERR_NUMBERED_PARAMETER_OUTER_SCOPE);
|
12758
|
-
} else {
|
12759
|
-
// Indicate that this scope is using numbered params so that child
|
12760
|
-
// scopes cannot.
|
12761
|
-
uint8_t number = parser->previous.start[1];
|
12762
|
-
|
12763
|
-
// We subtract the value for the character '0' to get the actual
|
12764
|
-
// integer value of the number (only _1 through _9 are valid)
|
12765
|
-
uint8_t numbered_parameters = (uint8_t) (number - '0');
|
12766
|
-
if (numbered_parameters > parser->current_scope->numbered_parameters) {
|
12767
|
-
parser->current_scope->numbered_parameters = numbered_parameters;
|
12768
|
-
pm_parser_numbered_parameters_set(parser, numbered_parameters);
|
12769
|
-
}
|
12770
|
-
|
12771
|
-
// When you use a numbered parameter, it implies the existence
|
12772
|
-
// of all of the locals that exist before it. For example,
|
12773
|
-
// referencing _2 means that _1 must exist. Therefore here we
|
12774
|
-
// loop through all of the possibilities and add them into the
|
12775
|
-
// constant pool.
|
12776
|
-
uint8_t current = '1';
|
12777
|
-
uint8_t *value;
|
12778
|
-
|
12779
|
-
while (current < number) {
|
12780
|
-
value = malloc(2);
|
12781
|
-
value[0] = '_';
|
12782
|
-
value[1] = current++;
|
12783
|
-
pm_parser_local_add_owned(parser, value, 2);
|
12784
|
-
}
|
12785
|
-
|
12786
|
-
// Now we can add the actual token that is being used. For
|
12787
|
-
// this one we can add a shared version since it is directly
|
12788
|
-
// referenced in the source.
|
12789
|
-
pm_parser_local_add_token(parser, &parser->previous);
|
12790
|
-
return (pm_node_t *) pm_local_variable_read_node_create(parser, &parser->previous, 0);
|
12791
|
-
}
|
12792
|
-
}
|
12793
|
-
|
13274
|
+
pm_local_variable_read_node_t *node = parse_variable(parser);
|
13275
|
+
if (node != NULL) return (pm_node_t *) node;
|
12794
13276
|
flags |= PM_CALL_NODE_FLAGS_VARIABLE_CALL;
|
12795
13277
|
}
|
12796
13278
|
|
@@ -13076,43 +13558,77 @@ parse_pattern_keyword_rest(pm_parser_t *parser) {
|
|
13076
13558
|
return (pm_node_t *) pm_assoc_splat_node_create(parser, value, &operator);
|
13077
13559
|
}
|
13078
13560
|
|
13561
|
+
/**
|
13562
|
+
* Create an implicit node for the value of a hash pattern that has omitted the
|
13563
|
+
* value. This will use an implicit local variable target.
|
13564
|
+
*/
|
13565
|
+
static pm_node_t *
|
13566
|
+
parse_pattern_hash_implicit_value(pm_parser_t *parser, pm_symbol_node_t *key) {
|
13567
|
+
const pm_location_t *value_loc = &((pm_symbol_node_t *) key)->value_loc;
|
13568
|
+
pm_constant_id_t name = pm_parser_constant_id_location(parser, value_loc->start, value_loc->end);
|
13569
|
+
|
13570
|
+
int current_depth = pm_parser_local_depth_constant_id(parser, name);
|
13571
|
+
uint32_t depth;
|
13572
|
+
|
13573
|
+
if (current_depth == -1) {
|
13574
|
+
pm_parser_local_add_location(parser, value_loc->start, value_loc->end);
|
13575
|
+
depth = 0;
|
13576
|
+
} else {
|
13577
|
+
depth = (uint32_t) current_depth;
|
13578
|
+
}
|
13579
|
+
|
13580
|
+
pm_local_variable_target_node_t *target = pm_local_variable_target_node_create_values(parser, value_loc, name, depth);
|
13581
|
+
return (pm_node_t *) pm_implicit_node_create(parser, (pm_node_t *) target);
|
13582
|
+
}
|
13583
|
+
|
13079
13584
|
/**
|
13080
13585
|
* Parse a hash pattern.
|
13081
13586
|
*/
|
13082
13587
|
static pm_hash_pattern_node_t *
|
13083
|
-
parse_pattern_hash(pm_parser_t *parser, pm_node_t *
|
13588
|
+
parse_pattern_hash(pm_parser_t *parser, pm_node_t *first_node) {
|
13084
13589
|
pm_node_list_t assocs = { 0 };
|
13085
13590
|
pm_node_t *rest = NULL;
|
13086
13591
|
|
13087
|
-
switch (PM_NODE_TYPE(
|
13088
|
-
case
|
13089
|
-
|
13090
|
-
|
13091
|
-
|
13092
|
-
|
13592
|
+
switch (PM_NODE_TYPE(first_node)) {
|
13593
|
+
case PM_ASSOC_SPLAT_NODE:
|
13594
|
+
case PM_NO_KEYWORDS_PARAMETER_NODE:
|
13595
|
+
rest = first_node;
|
13596
|
+
break;
|
13597
|
+
case PM_SYMBOL_NODE: {
|
13598
|
+
if (pm_symbol_node_label_p(first_node)) {
|
13599
|
+
pm_node_t *value;
|
13600
|
+
|
13601
|
+
if (!match7(parser, PM_TOKEN_COMMA, PM_TOKEN_KEYWORD_THEN, PM_TOKEN_BRACE_RIGHT, PM_TOKEN_BRACKET_RIGHT, PM_TOKEN_PARENTHESIS_RIGHT, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON)) {
|
13602
|
+
// Here we have a value for the first assoc in the list, so
|
13603
|
+
// we will parse it now.
|
13604
|
+
value = parse_pattern(parser, false, PM_ERR_PATTERN_EXPRESSION_AFTER_KEY);
|
13605
|
+
} else {
|
13606
|
+
// Otherwise, we will create an implicit local variable
|
13607
|
+
// target for the value.
|
13608
|
+
value = parse_pattern_hash_implicit_value(parser, (pm_symbol_node_t *) first_node);
|
13609
|
+
}
|
13093
13610
|
|
13094
|
-
|
13095
|
-
assoc
|
13096
|
-
assoc->value = value;
|
13097
|
-
} else {
|
13098
|
-
pm_node_t *key = ((pm_assoc_node_t *) first_assoc)->key;
|
13611
|
+
pm_token_t operator = not_provided(parser);
|
13612
|
+
pm_node_t *assoc = (pm_node_t *) pm_assoc_node_create(parser, first_node, &operator, value);
|
13099
13613
|
|
13100
|
-
|
13101
|
-
|
13102
|
-
pm_parser_local_add_location(parser, value_loc->start, value_loc->end);
|
13103
|
-
}
|
13614
|
+
pm_node_list_append(&assocs, assoc);
|
13615
|
+
break;
|
13104
13616
|
}
|
13617
|
+
}
|
13618
|
+
/* fallthrough */
|
13619
|
+
default: {
|
13620
|
+
// If we get anything else, then this is an error. For this we'll
|
13621
|
+
// create a missing node for the value and create an assoc node for
|
13622
|
+
// the first node in the list.
|
13623
|
+
pm_parser_err_node(parser, first_node, PM_ERR_PATTERN_HASH_KEY_LABEL);
|
13624
|
+
|
13625
|
+
pm_token_t operator = not_provided(parser);
|
13626
|
+
pm_node_t *value = (pm_node_t *) pm_missing_node_create(parser, first_node->location.start, first_node->location.end);
|
13627
|
+
pm_node_t *assoc = (pm_node_t *) pm_assoc_node_create(parser, first_node, &operator, value);
|
13105
13628
|
|
13106
|
-
pm_node_list_append(&assocs,
|
13629
|
+
pm_node_list_append(&assocs, assoc);
|
13107
13630
|
break;
|
13108
13631
|
}
|
13109
|
-
case PM_ASSOC_SPLAT_NODE:
|
13110
|
-
case PM_NO_KEYWORDS_PARAMETER_NODE:
|
13111
|
-
rest = first_assoc;
|
13112
|
-
break;
|
13113
|
-
default:
|
13114
|
-
assert(false);
|
13115
|
-
break;
|
13116
13632
|
}
|
13117
13633
|
|
13118
13634
|
// If there are any other assocs, then we'll parse them now.
|
@@ -13141,6 +13657,7 @@ parse_pattern_hash(pm_parser_t *parser, pm_node_t *first_assoc) {
|
|
13141
13657
|
} else {
|
13142
13658
|
const pm_location_t *value_loc = &((pm_symbol_node_t *) key)->value_loc;
|
13143
13659
|
pm_parser_local_add_location(parser, value_loc->start, value_loc->end);
|
13660
|
+
value = parse_pattern_hash_implicit_value(parser, (pm_symbol_node_t *) key);
|
13144
13661
|
}
|
13145
13662
|
|
13146
13663
|
pm_token_t operator = not_provided(parser);
|
@@ -13246,45 +13763,29 @@ parse_pattern_primitive(pm_parser_t *parser, pm_diagnostic_id_t diag_id) {
|
|
13246
13763
|
// pattern node.
|
13247
13764
|
node = pm_hash_pattern_node_empty_create(parser, &opening, &parser->previous);
|
13248
13765
|
} else {
|
13249
|
-
pm_node_t *
|
13766
|
+
pm_node_t *first_node;
|
13250
13767
|
|
13251
13768
|
switch (parser->current.type) {
|
13252
|
-
case PM_TOKEN_LABEL:
|
13769
|
+
case PM_TOKEN_LABEL:
|
13253
13770
|
parser_lex(parser);
|
13254
|
-
|
13255
|
-
pm_symbol_node_t *key = pm_symbol_node_label_create(parser, &parser->previous);
|
13256
|
-
pm_token_t operator = not_provided(parser);
|
13257
|
-
|
13258
|
-
first_assoc = (pm_node_t *) pm_assoc_node_create(parser, (pm_node_t *) key, &operator, NULL);
|
13771
|
+
first_node = (pm_node_t *) pm_symbol_node_label_create(parser, &parser->previous);
|
13259
13772
|
break;
|
13260
|
-
}
|
13261
13773
|
case PM_TOKEN_USTAR_STAR:
|
13262
|
-
|
13774
|
+
first_node = parse_pattern_keyword_rest(parser);
|
13263
13775
|
break;
|
13264
|
-
case PM_TOKEN_STRING_BEGIN:
|
13265
|
-
|
13266
|
-
pm_token_t operator = not_provided(parser);
|
13267
|
-
|
13268
|
-
if (!pm_symbol_node_label_p(key)) {
|
13269
|
-
pm_parser_err_node(parser, key, PM_ERR_PATTERN_HASH_KEY_LABEL);
|
13270
|
-
}
|
13271
|
-
|
13272
|
-
first_assoc = (pm_node_t *) pm_assoc_node_create(parser, key, &operator, NULL);
|
13776
|
+
case PM_TOKEN_STRING_BEGIN:
|
13777
|
+
first_node = parse_expression(parser, PM_BINDING_POWER_MAX, false, PM_ERR_PATTERN_HASH_KEY);
|
13273
13778
|
break;
|
13274
|
-
}
|
13275
13779
|
default: {
|
13276
13780
|
parser_lex(parser);
|
13277
13781
|
pm_parser_err_previous(parser, PM_ERR_PATTERN_HASH_KEY);
|
13278
13782
|
|
13279
|
-
|
13280
|
-
pm_token_t operator = not_provided(parser);
|
13281
|
-
|
13282
|
-
first_assoc = (pm_node_t *) pm_assoc_node_create(parser, (pm_node_t *) key, &operator, NULL);
|
13783
|
+
first_node = (pm_node_t *) pm_missing_node_create(parser, parser->previous.start, parser->previous.end);
|
13283
13784
|
break;
|
13284
13785
|
}
|
13285
13786
|
}
|
13286
13787
|
|
13287
|
-
node = parse_pattern_hash(parser,
|
13788
|
+
node = parse_pattern_hash(parser, first_node);
|
13288
13789
|
|
13289
13790
|
accept1(parser, PM_TOKEN_NEWLINE);
|
13290
13791
|
expect1(parser, PM_TOKEN_BRACE_RIGHT, PM_ERR_PATTERN_TERM_BRACE);
|
@@ -13350,7 +13851,16 @@ parse_pattern_primitive(pm_parser_t *parser, pm_diagnostic_id_t diag_id) {
|
|
13350
13851
|
switch (parser->current.type) {
|
13351
13852
|
case PM_TOKEN_IDENTIFIER: {
|
13352
13853
|
parser_lex(parser);
|
13353
|
-
pm_node_t *variable = (pm_node_t *)
|
13854
|
+
pm_node_t *variable = (pm_node_t *) parse_variable(parser);
|
13855
|
+
if (variable == NULL) {
|
13856
|
+
if (parser->version != PM_OPTIONS_VERSION_CRUBY_3_3_0 && pm_token_is_it(parser->previous.start, parser->previous.end)) {
|
13857
|
+
pm_constant_id_t name_id = pm_parser_constant_id_constant(parser, "0it", 3);
|
13858
|
+
variable = (pm_node_t *) pm_local_variable_read_node_create_constant_id(parser, &parser->previous, name_id, 0);
|
13859
|
+
} else {
|
13860
|
+
PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->previous, PM_ERR_NO_LOCAL_VARIABLE, (int) (parser->previous.end - parser->previous.start), parser->previous.start);
|
13861
|
+
variable = (pm_node_t *) pm_local_variable_read_node_create(parser, &parser->previous, 0);
|
13862
|
+
}
|
13863
|
+
}
|
13354
13864
|
|
13355
13865
|
return (pm_node_t *) pm_pinned_variable_node_create(parser, &operator, variable);
|
13356
13866
|
}
|
@@ -13519,9 +14029,7 @@ parse_pattern(pm_parser_t *parser, bool top_pattern, pm_diagnostic_id_t diag_id)
|
|
13519
14029
|
case PM_TOKEN_LABEL: {
|
13520
14030
|
parser_lex(parser);
|
13521
14031
|
pm_node_t *key = (pm_node_t *) pm_symbol_node_label_create(parser, &parser->previous);
|
13522
|
-
|
13523
|
-
|
13524
|
-
return (pm_node_t *) parse_pattern_hash(parser, (pm_node_t *) pm_assoc_node_create(parser, key, &operator, NULL));
|
14032
|
+
return (pm_node_t *) parse_pattern_hash(parser, key);
|
13525
14033
|
}
|
13526
14034
|
case PM_TOKEN_USTAR_STAR: {
|
13527
14035
|
node = parse_pattern_keyword_rest(parser);
|
@@ -13544,8 +14052,7 @@ parse_pattern(pm_parser_t *parser, bool top_pattern, pm_diagnostic_id_t diag_id)
|
|
13544
14052
|
// If we got a dynamic label symbol, then we need to treat it like the
|
13545
14053
|
// beginning of a hash pattern.
|
13546
14054
|
if (pm_symbol_node_label_p(node)) {
|
13547
|
-
|
13548
|
-
return (pm_node_t *) parse_pattern_hash(parser, (pm_node_t *) pm_assoc_node_create(parser, node, &operator, NULL));
|
14055
|
+
return (pm_node_t *) parse_pattern_hash(parser, node);
|
13549
14056
|
}
|
13550
14057
|
|
13551
14058
|
if (top_pattern && match1(parser, PM_TOKEN_COMMA)) {
|
@@ -13558,7 +14065,7 @@ parse_pattern(pm_parser_t *parser, bool top_pattern, pm_diagnostic_id_t diag_id)
|
|
13558
14065
|
// Gather up all of the patterns into the list.
|
13559
14066
|
while (accept1(parser, PM_TOKEN_COMMA)) {
|
13560
14067
|
// Break early here in case we have a trailing comma.
|
13561
|
-
if (
|
14068
|
+
if (match6(parser, PM_TOKEN_KEYWORD_THEN, PM_TOKEN_BRACE_RIGHT, PM_TOKEN_BRACKET_RIGHT, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON, PM_TOKEN_EOF)) {
|
13562
14069
|
node = (pm_node_t *) pm_implicit_rest_node_create(parser, &parser->previous);
|
13563
14070
|
pm_node_list_append(&nodes, node);
|
13564
14071
|
break;
|
@@ -13644,7 +14151,7 @@ parse_strings(pm_parser_t *parser, pm_node_t *current) {
|
|
13644
14151
|
assert(parser->current.type == PM_TOKEN_STRING_BEGIN);
|
13645
14152
|
|
13646
14153
|
bool concating = false;
|
13647
|
-
bool state_is_arg_labeled =
|
14154
|
+
bool state_is_arg_labeled = lex_state_arg_labeled_p(parser);
|
13648
14155
|
|
13649
14156
|
while (match1(parser, PM_TOKEN_STRING_BEGIN)) {
|
13650
14157
|
pm_node_t *node = NULL;
|
@@ -13719,7 +14226,7 @@ parse_strings(pm_parser_t *parser, pm_node_t *current) {
|
|
13719
14226
|
expect1(parser, PM_TOKEN_STRING_END, PM_ERR_STRING_LITERAL_TERM);
|
13720
14227
|
node = (pm_node_t *) pm_interpolated_string_node_create(parser, &opening, &parts, &parser->previous);
|
13721
14228
|
} else if (accept1(parser, PM_TOKEN_LABEL_END) && !state_is_arg_labeled) {
|
13722
|
-
node = (pm_node_t *) pm_symbol_node_create_unescaped(parser, &opening, &content, &parser->previous, &unescaped);
|
14229
|
+
node = (pm_node_t *) pm_symbol_node_create_unescaped(parser, &opening, &content, &parser->previous, &unescaped, parse_symbol_encoding(parser, &unescaped));
|
13723
14230
|
} else if (match1(parser, PM_TOKEN_EOF)) {
|
13724
14231
|
pm_parser_err_token(parser, &opening, PM_ERR_STRING_LITERAL_TERM);
|
13725
14232
|
node = (pm_node_t *) pm_string_node_create_unescaped(parser, &opening, &content, &parser->current, &unescaped);
|
@@ -13741,7 +14248,7 @@ parse_strings(pm_parser_t *parser, pm_node_t *current) {
|
|
13741
14248
|
pm_node_flag_set(node, parse_unescaped_encoding(parser));
|
13742
14249
|
expect1(parser, PM_TOKEN_STRING_END, PM_ERR_STRING_LITERAL_TERM);
|
13743
14250
|
} else if (accept1(parser, PM_TOKEN_LABEL_END)) {
|
13744
|
-
node = (pm_node_t *) pm_symbol_node_create_unescaped(parser, &opening, &content, &parser->previous, &unescaped);
|
14251
|
+
node = (pm_node_t *) pm_symbol_node_create_unescaped(parser, &opening, &content, &parser->previous, &unescaped, parse_symbol_encoding(parser, &unescaped));
|
13745
14252
|
} else {
|
13746
14253
|
// If we get here, then we have interpolation so we'll need
|
13747
14254
|
// to create a string or symbol node with interpolation.
|
@@ -13834,7 +14341,7 @@ parse_strings(pm_parser_t *parser, pm_node_t *current) {
|
|
13834
14341
|
* Parse an expression that begins with the previous node that we just lexed.
|
13835
14342
|
*/
|
13836
14343
|
static inline pm_node_t *
|
13837
|
-
parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, bool accepts_command_call) {
|
14344
|
+
parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, bool accepts_command_call, pm_diagnostic_id_t diag_id) {
|
13838
14345
|
switch (parser->current.type) {
|
13839
14346
|
case PM_TOKEN_BRACKET_LEFT_ARRAY: {
|
13840
14347
|
parser_lex(parser);
|
@@ -13866,9 +14373,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
|
|
13866
14373
|
pm_node_t *expression = NULL;
|
13867
14374
|
|
13868
14375
|
if (match3(parser, PM_TOKEN_BRACKET_RIGHT, PM_TOKEN_COMMA, PM_TOKEN_EOF)) {
|
13869
|
-
|
13870
|
-
pm_parser_err_token(parser, &operator, PM_ERR_ARGUMENT_NO_FORWARDING_STAR);
|
13871
|
-
}
|
14376
|
+
pm_parser_scope_forwarding_positionals_check(parser, &operator);
|
13872
14377
|
} else {
|
13873
14378
|
expression = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, false, PM_ERR_ARRAY_EXPRESSION_AFTER_STAR);
|
13874
14379
|
}
|
@@ -14113,7 +14618,8 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
|
|
14113
14618
|
if (
|
14114
14619
|
match1(parser, PM_TOKEN_PARENTHESIS_LEFT) ||
|
14115
14620
|
(accepts_command_call && (token_begins_expression_p(parser->current.type) || match3(parser, PM_TOKEN_UAMPERSAND, PM_TOKEN_USTAR, PM_TOKEN_USTAR_STAR))) ||
|
14116
|
-
(pm_accepts_block_stack_p(parser) &&
|
14621
|
+
(pm_accepts_block_stack_p(parser) && match1(parser, PM_TOKEN_KEYWORD_DO)) ||
|
14622
|
+
match1(parser, PM_TOKEN_BRACE_LEFT)
|
14117
14623
|
) {
|
14118
14624
|
pm_arguments_t arguments = { 0 };
|
14119
14625
|
parse_arguments_list(parser, &arguments, true, accepts_command_call);
|
@@ -14237,7 +14743,8 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
|
|
14237
14743
|
// a block, so we need to check for that here.
|
14238
14744
|
if (
|
14239
14745
|
(accepts_command_call && (token_begins_expression_p(parser->current.type) || match3(parser, PM_TOKEN_UAMPERSAND, PM_TOKEN_USTAR, PM_TOKEN_USTAR_STAR))) ||
|
14240
|
-
(pm_accepts_block_stack_p(parser) &&
|
14746
|
+
(pm_accepts_block_stack_p(parser) && match1(parser, PM_TOKEN_KEYWORD_DO)) ||
|
14747
|
+
match1(parser, PM_TOKEN_BRACE_LEFT)
|
14241
14748
|
) {
|
14242
14749
|
pm_arguments_t arguments = { 0 };
|
14243
14750
|
parse_arguments_list(parser, &arguments, true, accepts_command_call);
|
@@ -14250,6 +14757,31 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
|
|
14250
14757
|
|
14251
14758
|
if ((binding_power == PM_BINDING_POWER_STATEMENT) && match1(parser, PM_TOKEN_COMMA)) {
|
14252
14759
|
node = parse_targets_validate(parser, node, PM_BINDING_POWER_INDEX);
|
14760
|
+
} else {
|
14761
|
+
// Check if `it` is not going to be assigned.
|
14762
|
+
switch (parser->current.type) {
|
14763
|
+
case PM_TOKEN_AMPERSAND_AMPERSAND_EQUAL:
|
14764
|
+
case PM_TOKEN_AMPERSAND_EQUAL:
|
14765
|
+
case PM_TOKEN_CARET_EQUAL:
|
14766
|
+
case PM_TOKEN_EQUAL:
|
14767
|
+
case PM_TOKEN_GREATER_GREATER_EQUAL:
|
14768
|
+
case PM_TOKEN_LESS_LESS_EQUAL:
|
14769
|
+
case PM_TOKEN_MINUS_EQUAL:
|
14770
|
+
case PM_TOKEN_PARENTHESIS_RIGHT:
|
14771
|
+
case PM_TOKEN_PERCENT_EQUAL:
|
14772
|
+
case PM_TOKEN_PIPE_EQUAL:
|
14773
|
+
case PM_TOKEN_PIPE_PIPE_EQUAL:
|
14774
|
+
case PM_TOKEN_PLUS_EQUAL:
|
14775
|
+
case PM_TOKEN_SLASH_EQUAL:
|
14776
|
+
case PM_TOKEN_STAR_EQUAL:
|
14777
|
+
case PM_TOKEN_STAR_STAR_EQUAL:
|
14778
|
+
break;
|
14779
|
+
default:
|
14780
|
+
// Once we know it's neither a method call nor an
|
14781
|
+
// assignment, we can finally create `it` default
|
14782
|
+
// parameter.
|
14783
|
+
node = pm_node_check_it(parser, node);
|
14784
|
+
}
|
14253
14785
|
}
|
14254
14786
|
|
14255
14787
|
return node;
|
@@ -14286,6 +14818,9 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
|
|
14286
14818
|
// If we get here, then we tried to find something in the
|
14287
14819
|
// heredoc but couldn't actually parse anything, so we'll just
|
14288
14820
|
// return a missing node.
|
14821
|
+
//
|
14822
|
+
// parse_string_part handles its own errors, so there is no need
|
14823
|
+
// for us to add one here.
|
14289
14824
|
node = (pm_node_t *) pm_missing_node_create(parser, parser->previous.start, parser->previous.end);
|
14290
14825
|
} else if (PM_NODE_TYPE_P(part, PM_STRING_NODE) && match2(parser, PM_TOKEN_HEREDOC_END, PM_TOKEN_EOF)) {
|
14291
14826
|
// If we get here, then the part that we parsed was plain string
|
@@ -14549,11 +15084,11 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
|
|
14549
15084
|
// for guard clauses in the form of `if` or `unless` statements.
|
14550
15085
|
if (accept1(parser, PM_TOKEN_KEYWORD_IF_MODIFIER)) {
|
14551
15086
|
pm_token_t keyword = parser->previous;
|
14552
|
-
pm_node_t *predicate = parse_value_expression(parser,
|
15087
|
+
pm_node_t *predicate = parse_value_expression(parser, PM_BINDING_POWER_COMPOSITION, true, PM_ERR_CONDITIONAL_IF_PREDICATE);
|
14553
15088
|
pattern = (pm_node_t *) pm_if_node_modifier_create(parser, pattern, &keyword, predicate);
|
14554
15089
|
} else if (accept1(parser, PM_TOKEN_KEYWORD_UNLESS_MODIFIER)) {
|
14555
15090
|
pm_token_t keyword = parser->previous;
|
14556
|
-
pm_node_t *predicate = parse_value_expression(parser,
|
15091
|
+
pm_node_t *predicate = parse_value_expression(parser, PM_BINDING_POWER_COMPOSITION, true, PM_ERR_CONDITIONAL_UNLESS_PREDICATE);
|
14557
15092
|
pattern = (pm_node_t *) pm_unless_node_modifier_create(parser, pattern, &keyword, predicate);
|
14558
15093
|
}
|
14559
15094
|
|
@@ -14742,8 +15277,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
|
|
14742
15277
|
pm_token_t operator = parser->previous;
|
14743
15278
|
pm_node_t *expression = parse_value_expression(parser, PM_BINDING_POWER_NOT, true, PM_ERR_EXPECT_EXPRESSION_AFTER_LESS_LESS);
|
14744
15279
|
|
14745
|
-
pm_constant_id_t
|
14746
|
-
parser->current_param_name = 0;
|
15280
|
+
pm_constant_id_t saved_param_name = pm_parser_current_param_name_unset(parser);
|
14747
15281
|
pm_parser_scope_push(parser, true);
|
14748
15282
|
accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
|
14749
15283
|
|
@@ -14760,11 +15294,12 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
|
|
14760
15294
|
}
|
14761
15295
|
|
14762
15296
|
expect1(parser, PM_TOKEN_KEYWORD_END, PM_ERR_CLASS_TERM);
|
14763
|
-
|
14764
15297
|
pm_constant_id_list_t locals = parser->current_scope->locals;
|
15298
|
+
|
14765
15299
|
pm_parser_scope_pop(parser);
|
14766
|
-
parser->current_param_name = old_param_name;
|
14767
15300
|
pm_do_loop_stack_pop(parser);
|
15301
|
+
pm_parser_current_param_name_restore(parser, saved_param_name);
|
15302
|
+
|
14768
15303
|
return (pm_node_t *) pm_singleton_class_node_create(parser, &locals, &class_keyword, &operator, expression, statements, &parser->previous);
|
14769
15304
|
}
|
14770
15305
|
|
@@ -14790,9 +15325,9 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
|
|
14790
15325
|
superclass = NULL;
|
14791
15326
|
}
|
14792
15327
|
|
14793
|
-
pm_constant_id_t
|
14794
|
-
parser->current_param_name = 0;
|
15328
|
+
pm_constant_id_t saved_param_name = pm_parser_current_param_name_unset(parser);
|
14795
15329
|
pm_parser_scope_push(parser, true);
|
15330
|
+
|
14796
15331
|
if (inheritance_operator.type != PM_TOKEN_NOT_PROVIDED) {
|
14797
15332
|
expect2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON, PM_ERR_CLASS_UNEXPECTED_END);
|
14798
15333
|
} else {
|
@@ -14818,9 +15353,10 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
|
|
14818
15353
|
}
|
14819
15354
|
|
14820
15355
|
pm_constant_id_list_t locals = parser->current_scope->locals;
|
15356
|
+
|
14821
15357
|
pm_parser_scope_pop(parser);
|
14822
|
-
parser->current_param_name = old_param_name;
|
14823
15358
|
pm_do_loop_stack_pop(parser);
|
15359
|
+
pm_parser_current_param_name_restore(parser, saved_param_name);
|
14824
15360
|
|
14825
15361
|
if (!PM_NODE_TYPE_P(constant_path, PM_CONSTANT_PATH_NODE) && !(PM_NODE_TYPE_P(constant_path, PM_CONSTANT_READ_NODE))) {
|
14826
15362
|
pm_parser_err_node(parser, constant_path, PM_ERR_CLASS_NAME);
|
@@ -14835,18 +15371,21 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
|
|
14835
15371
|
pm_token_t operator = not_provided(parser);
|
14836
15372
|
pm_token_t name = (pm_token_t) { .type = PM_TOKEN_MISSING, .start = def_keyword.end, .end = def_keyword.end };
|
14837
15373
|
|
14838
|
-
// This context is necessary for lexing `...` in a bare params
|
14839
|
-
// It must be pushed before lexing the first param, so it
|
15374
|
+
// This context is necessary for lexing `...` in a bare params
|
15375
|
+
// correctly. It must be pushed before lexing the first param, so it
|
15376
|
+
// is here.
|
14840
15377
|
context_push(parser, PM_CONTEXT_DEF_PARAMS);
|
15378
|
+
pm_constant_id_t saved_param_name;
|
15379
|
+
|
14841
15380
|
parser_lex(parser);
|
14842
|
-
pm_constant_id_t old_param_name = parser->current_param_name;
|
14843
15381
|
|
14844
15382
|
switch (parser->current.type) {
|
14845
15383
|
case PM_CASE_OPERATOR:
|
15384
|
+
saved_param_name = pm_parser_current_param_name_unset(parser);
|
14846
15385
|
pm_parser_scope_push(parser, true);
|
14847
|
-
parser->current_param_name = 0;
|
14848
15386
|
lex_state_set(parser, PM_LEX_STATE_ENDFN);
|
14849
15387
|
parser_lex(parser);
|
15388
|
+
|
14850
15389
|
name = parser->previous;
|
14851
15390
|
break;
|
14852
15391
|
case PM_TOKEN_IDENTIFIER: {
|
@@ -14854,18 +15393,20 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
|
|
14854
15393
|
|
14855
15394
|
if (match2(parser, PM_TOKEN_DOT, PM_TOKEN_COLON_COLON)) {
|
14856
15395
|
receiver = parse_variable_call(parser);
|
15396
|
+
receiver = pm_node_check_it(parser, receiver);
|
14857
15397
|
|
15398
|
+
saved_param_name = pm_parser_current_param_name_unset(parser);
|
14858
15399
|
pm_parser_scope_push(parser, true);
|
14859
|
-
parser->current_param_name = 0;
|
14860
15400
|
lex_state_set(parser, PM_LEX_STATE_FNAME);
|
14861
15401
|
parser_lex(parser);
|
14862
15402
|
|
14863
15403
|
operator = parser->previous;
|
14864
15404
|
name = parse_method_definition_name(parser);
|
14865
15405
|
} else {
|
15406
|
+
saved_param_name = pm_parser_current_param_name_unset(parser);
|
14866
15407
|
pm_refute_numbered_parameter(parser, parser->previous.start, parser->previous.end);
|
14867
15408
|
pm_parser_scope_push(parser, true);
|
14868
|
-
|
15409
|
+
|
14869
15410
|
name = parser->previous;
|
14870
15411
|
}
|
14871
15412
|
|
@@ -14882,9 +15423,10 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
|
|
14882
15423
|
case PM_TOKEN_KEYWORD___FILE__:
|
14883
15424
|
case PM_TOKEN_KEYWORD___LINE__:
|
14884
15425
|
case PM_TOKEN_KEYWORD___ENCODING__: {
|
15426
|
+
saved_param_name = pm_parser_current_param_name_unset(parser);
|
14885
15427
|
pm_parser_scope_push(parser, true);
|
14886
|
-
parser->current_param_name = 0;
|
14887
15428
|
parser_lex(parser);
|
15429
|
+
|
14888
15430
|
pm_token_t identifier = parser->previous;
|
14889
15431
|
|
14890
15432
|
if (match2(parser, PM_TOKEN_DOT, PM_TOKEN_COLON_COLON)) {
|
@@ -14946,6 +15488,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
|
|
14946
15488
|
pm_token_t lparen = parser->previous;
|
14947
15489
|
pm_node_t *expression = parse_value_expression(parser, PM_BINDING_POWER_STATEMENT, true, PM_ERR_DEF_RECEIVER);
|
14948
15490
|
|
15491
|
+
accept1(parser, PM_TOKEN_NEWLINE);
|
14949
15492
|
expect1(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_ERR_EXPECT_RPAREN);
|
14950
15493
|
pm_token_t rparen = parser->previous;
|
14951
15494
|
|
@@ -14955,8 +15498,8 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
|
|
14955
15498
|
operator = parser->previous;
|
14956
15499
|
receiver = (pm_node_t *) pm_parentheses_node_create(parser, &lparen, expression, &rparen);
|
14957
15500
|
|
15501
|
+
saved_param_name = pm_parser_current_param_name_unset(parser);
|
14958
15502
|
pm_parser_scope_push(parser, true);
|
14959
|
-
parser->current_param_name = 0;
|
14960
15503
|
|
14961
15504
|
// To push `PM_CONTEXT_DEF_PARAMS` again is for the same reason as described the above.
|
14962
15505
|
context_push(parser, PM_CONTEXT_DEF_PARAMS);
|
@@ -14964,8 +15507,9 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
|
|
14964
15507
|
break;
|
14965
15508
|
}
|
14966
15509
|
default:
|
15510
|
+
saved_param_name = pm_parser_current_param_name_unset(parser);
|
14967
15511
|
pm_parser_scope_push(parser, true);
|
14968
|
-
|
15512
|
+
|
14969
15513
|
name = parse_method_definition_name(parser);
|
14970
15514
|
break;
|
14971
15515
|
}
|
@@ -15018,8 +15562,6 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
|
|
15018
15562
|
}
|
15019
15563
|
}
|
15020
15564
|
|
15021
|
-
uint32_t locals_body_index = (uint32_t) parser->current_scope->locals.size;
|
15022
|
-
|
15023
15565
|
context_pop(parser);
|
15024
15566
|
pm_node_t *statements = NULL;
|
15025
15567
|
pm_token_t equal;
|
@@ -15080,8 +15622,16 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
|
|
15080
15622
|
}
|
15081
15623
|
|
15082
15624
|
pm_constant_id_list_t locals = parser->current_scope->locals;
|
15083
|
-
|
15625
|
+
|
15084
15626
|
pm_parser_scope_pop(parser);
|
15627
|
+
pm_parser_current_param_name_restore(parser, saved_param_name);
|
15628
|
+
|
15629
|
+
/**
|
15630
|
+
* If the final character is @. As is the case when defining
|
15631
|
+
* methods to override the unary operators, we should ignore
|
15632
|
+
* the @ in the same way we do for symbols.
|
15633
|
+
*/
|
15634
|
+
name.end = parse_operator_symbol_name(&name);
|
15085
15635
|
|
15086
15636
|
return (pm_node_t *) pm_def_node_create(
|
15087
15637
|
parser,
|
@@ -15090,7 +15640,6 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
|
|
15090
15640
|
params,
|
15091
15641
|
statements,
|
15092
15642
|
&locals,
|
15093
|
-
locals_body_index,
|
15094
15643
|
&def_keyword,
|
15095
15644
|
&operator,
|
15096
15645
|
&lparen,
|
@@ -15309,9 +15858,9 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
|
|
15309
15858
|
pm_parser_err_token(parser, &name, PM_ERR_MODULE_NAME);
|
15310
15859
|
}
|
15311
15860
|
|
15312
|
-
pm_constant_id_t
|
15313
|
-
parser->current_param_name = 0;
|
15861
|
+
pm_constant_id_t saved_param_name = pm_parser_current_param_name_unset(parser);
|
15314
15862
|
pm_parser_scope_push(parser, true);
|
15863
|
+
|
15315
15864
|
accept2(parser, PM_TOKEN_SEMICOLON, PM_TOKEN_NEWLINE);
|
15316
15865
|
pm_node_t *statements = NULL;
|
15317
15866
|
|
@@ -15328,7 +15877,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
|
|
15328
15877
|
|
15329
15878
|
pm_constant_id_list_t locals = parser->current_scope->locals;
|
15330
15879
|
pm_parser_scope_pop(parser);
|
15331
|
-
parser
|
15880
|
+
pm_parser_current_param_name_restore(parser, saved_param_name);
|
15332
15881
|
|
15333
15882
|
expect1(parser, PM_TOKEN_KEYWORD_END, PM_ERR_MODULE_TERM);
|
15334
15883
|
|
@@ -15914,6 +16463,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
|
|
15914
16463
|
// context of a multiple assignment. We enforce that here. We'll
|
15915
16464
|
// still lex past it though and create a missing node place.
|
15916
16465
|
if (binding_power != PM_BINDING_POWER_STATEMENT) {
|
16466
|
+
pm_parser_err_previous(parser, diag_id);
|
15917
16467
|
return (pm_node_t *) pm_missing_node_create(parser, parser->previous.start, parser->previous.end);
|
15918
16468
|
}
|
15919
16469
|
|
@@ -15995,7 +16545,9 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
|
|
15995
16545
|
parser_lex(parser);
|
15996
16546
|
|
15997
16547
|
pm_token_t operator = parser->previous;
|
16548
|
+
pm_constant_id_t saved_param_name = pm_parser_current_param_name_unset(parser);
|
15998
16549
|
pm_parser_scope_push(parser, false);
|
16550
|
+
|
15999
16551
|
pm_block_parameters_node_t *block_parameters;
|
16000
16552
|
|
16001
16553
|
switch (parser->current.type) {
|
@@ -16030,12 +16582,6 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
|
|
16030
16582
|
}
|
16031
16583
|
}
|
16032
16584
|
|
16033
|
-
uint32_t locals_body_index = 0;
|
16034
|
-
|
16035
|
-
if (block_parameters) {
|
16036
|
-
locals_body_index = (uint32_t) parser->current_scope->locals.size;
|
16037
|
-
}
|
16038
|
-
|
16039
16585
|
pm_token_t opening;
|
16040
16586
|
pm_node_t *body = NULL;
|
16041
16587
|
parser->lambda_enclosure_nesting = previous_lambda_enclosure_nesting;
|
@@ -16070,13 +16616,15 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
|
|
16070
16616
|
|
16071
16617
|
if (parameters == NULL && (maximum > 0)) {
|
16072
16618
|
parameters = (pm_node_t *) pm_numbered_parameters_node_create(parser, &(pm_location_t) { .start = operator.start, .end = parser->previous.end }, maximum);
|
16073
|
-
locals_body_index = maximum;
|
16074
16619
|
}
|
16075
16620
|
|
16076
16621
|
pm_constant_id_list_t locals = parser->current_scope->locals;
|
16622
|
+
|
16077
16623
|
pm_parser_scope_pop(parser);
|
16078
16624
|
pm_accepts_block_stack_pop(parser);
|
16079
|
-
|
16625
|
+
pm_parser_current_param_name_restore(parser, saved_param_name);
|
16626
|
+
|
16627
|
+
return (pm_node_t *) pm_lambda_node_create(parser, &locals, &operator, &opening, &parser->previous, parameters, body);
|
16080
16628
|
}
|
16081
16629
|
case PM_TOKEN_UPLUS: {
|
16082
16630
|
parser_lex(parser);
|
@@ -16095,12 +16643,34 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
|
|
16095
16643
|
|
16096
16644
|
return parse_symbol(parser, &lex_mode, PM_LEX_STATE_END);
|
16097
16645
|
}
|
16098
|
-
default:
|
16099
|
-
|
16646
|
+
default: {
|
16647
|
+
pm_context_t recoverable = context_recoverable(parser, &parser->current);
|
16648
|
+
|
16649
|
+
if (recoverable != PM_CONTEXT_NONE) {
|
16100
16650
|
parser->recovering = true;
|
16651
|
+
|
16652
|
+
// If the given error is not the generic one, then we'll add it
|
16653
|
+
// here because it will provide more context in addition to the
|
16654
|
+
// recoverable error that we will also add.
|
16655
|
+
if (diag_id != PM_ERR_CANNOT_PARSE_EXPRESSION) {
|
16656
|
+
pm_parser_err_previous(parser, diag_id);
|
16657
|
+
}
|
16658
|
+
|
16659
|
+
// If we get here, then we are assuming this token is closing a
|
16660
|
+
// parent context, so we'll indicate that to the user so that
|
16661
|
+
// they know how we behaved.
|
16662
|
+
PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_UNEXPECTED_TOKEN_CLOSE_CONTEXT, pm_token_type_human(parser->current.type), context_human(recoverable));
|
16663
|
+
} else if (diag_id == PM_ERR_CANNOT_PARSE_EXPRESSION) {
|
16664
|
+
// We're going to make a special case here, because "cannot
|
16665
|
+
// parse expression" is pretty generic, and we know here that we
|
16666
|
+
// have an unexpected token.
|
16667
|
+
PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_UNEXPECTED_TOKEN_IGNORE, pm_token_type_human(parser->current.type));
|
16668
|
+
} else {
|
16669
|
+
pm_parser_err_previous(parser, diag_id);
|
16101
16670
|
}
|
16102
16671
|
|
16103
16672
|
return (pm_node_t *) pm_missing_node_create(parser, parser->previous.start, parser->previous.end);
|
16673
|
+
}
|
16104
16674
|
}
|
16105
16675
|
}
|
16106
16676
|
|
@@ -16412,7 +16982,7 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
|
|
16412
16982
|
}
|
16413
16983
|
|
16414
16984
|
// If this node cannot be writable, then we have an error.
|
16415
|
-
if (pm_call_node_writable_p(cast)) {
|
16985
|
+
if (pm_call_node_writable_p(parser, cast)) {
|
16416
16986
|
parse_write_name(parser, &cast->name);
|
16417
16987
|
} else {
|
16418
16988
|
pm_parser_err_node(parser, node, PM_ERR_WRITE_TARGET_UNEXPECTED);
|
@@ -16523,7 +17093,7 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
|
|
16523
17093
|
}
|
16524
17094
|
|
16525
17095
|
// If this node cannot be writable, then we have an error.
|
16526
|
-
if (pm_call_node_writable_p(cast)) {
|
17096
|
+
if (pm_call_node_writable_p(parser, cast)) {
|
16527
17097
|
parse_write_name(parser, &cast->name);
|
16528
17098
|
} else {
|
16529
17099
|
pm_parser_err_node(parser, node, PM_ERR_WRITE_TARGET_UNEXPECTED);
|
@@ -16644,7 +17214,7 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
|
|
16644
17214
|
}
|
16645
17215
|
|
16646
17216
|
// If this node cannot be writable, then we have an error.
|
16647
|
-
if (pm_call_node_writable_p(cast)) {
|
17217
|
+
if (pm_call_node_writable_p(parser, cast)) {
|
16648
17218
|
parse_write_name(parser, &cast->name);
|
16649
17219
|
} else {
|
16650
17220
|
pm_parser_err_node(parser, node, PM_ERR_WRITE_TARGET_UNEXPECTED);
|
@@ -17063,15 +17633,12 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
|
|
17063
17633
|
*/
|
17064
17634
|
static pm_node_t *
|
17065
17635
|
parse_expression(pm_parser_t *parser, pm_binding_power_t binding_power, bool accepts_command_call, pm_diagnostic_id_t diag_id) {
|
17066
|
-
|
17067
|
-
pm_node_t *node = parse_expression_prefix(parser, binding_power, accepts_command_call);
|
17636
|
+
pm_node_t *node = parse_expression_prefix(parser, binding_power, accepts_command_call, diag_id);
|
17068
17637
|
|
17069
17638
|
switch (PM_NODE_TYPE(node)) {
|
17070
17639
|
case PM_MISSING_NODE:
|
17071
17640
|
// If we found a syntax error, then the type of node returned by
|
17072
|
-
// parse_expression_prefix is going to be a missing node.
|
17073
|
-
// case we need to add the error message to the parser's error list.
|
17074
|
-
pm_parser_err(parser, recovery.end, recovery.end, diag_id);
|
17641
|
+
// parse_expression_prefix is going to be a missing node.
|
17075
17642
|
return node;
|
17076
17643
|
case PM_PRE_EXECUTION_NODE:
|
17077
17644
|
case PM_POST_EXECUTION_NODE:
|
@@ -17080,7 +17647,7 @@ parse_expression(pm_parser_t *parser, pm_binding_power_t binding_power, bool acc
|
|
17080
17647
|
case PM_UNDEF_NODE:
|
17081
17648
|
// These expressions are statements, and cannot be followed by
|
17082
17649
|
// operators (except modifiers).
|
17083
|
-
if (pm_binding_powers[parser->current.type].left >
|
17650
|
+
if (pm_binding_powers[parser->current.type].left > PM_BINDING_POWER_MODIFIER) {
|
17084
17651
|
return node;
|
17085
17652
|
}
|
17086
17653
|
break;
|
@@ -17175,9 +17742,14 @@ parse_expression(pm_parser_t *parser, pm_binding_power_t binding_power, bool acc
|
|
17175
17742
|
|
17176
17743
|
static pm_node_t *
|
17177
17744
|
parse_program(pm_parser_t *parser) {
|
17178
|
-
|
17179
|
-
|
17745
|
+
// If the current scope is NULL, then we want to push a new top level scope.
|
17746
|
+
// The current scope could exist in the event that we are parsing an eval
|
17747
|
+
// and the user has passed into scopes that already exist.
|
17748
|
+
if (parser->current_scope == NULL) {
|
17749
|
+
pm_parser_scope_push(parser, true);
|
17750
|
+
}
|
17180
17751
|
|
17752
|
+
parser_lex(parser);
|
17181
17753
|
pm_statements_node_t *statements = parse_statements(parser, PM_CONTEXT_MAIN);
|
17182
17754
|
if (!statements) {
|
17183
17755
|
statements = pm_statements_node_create(parser);
|
@@ -17234,7 +17806,7 @@ pm_parser_init(pm_parser_t *parser, const uint8_t *source, size_t size, const pm
|
|
17234
17806
|
.encoding_changed_callback = NULL,
|
17235
17807
|
.encoding_comment_start = source,
|
17236
17808
|
.lex_callback = NULL,
|
17237
|
-
.
|
17809
|
+
.filepath = { 0 },
|
17238
17810
|
.constant_pool = { 0 },
|
17239
17811
|
.newline_list = { 0 },
|
17240
17812
|
.integer_base = 0,
|
@@ -17248,8 +17820,7 @@ pm_parser_init(pm_parser_t *parser, const uint8_t *source, size_t size, const pm
|
|
17248
17820
|
.in_keyword_arg = false,
|
17249
17821
|
.current_param_name = 0,
|
17250
17822
|
.semantic_token_seen = false,
|
17251
|
-
.frozen_string_literal = false
|
17252
|
-
.suppress_warnings = false
|
17823
|
+
.frozen_string_literal = false
|
17253
17824
|
};
|
17254
17825
|
|
17255
17826
|
// Initialize the constant pool. We're going to completely guess as to the
|
@@ -17278,7 +17849,7 @@ pm_parser_init(pm_parser_t *parser, const uint8_t *source, size_t size, const pm
|
|
17278
17849
|
// If options were provided to this parse, establish them here.
|
17279
17850
|
if (options != NULL) {
|
17280
17851
|
// filepath option
|
17281
|
-
parser->
|
17852
|
+
parser->filepath = options->filepath;
|
17282
17853
|
|
17283
17854
|
// line option
|
17284
17855
|
parser->start_line = options->line;
|
@@ -17295,10 +17866,8 @@ pm_parser_init(pm_parser_t *parser, const uint8_t *source, size_t size, const pm
|
|
17295
17866
|
parser->frozen_string_literal = true;
|
17296
17867
|
}
|
17297
17868
|
|
17298
|
-
//
|
17299
|
-
|
17300
|
-
parser->suppress_warnings = true;
|
17301
|
-
}
|
17869
|
+
// version option
|
17870
|
+
parser->version = options->version;
|
17302
17871
|
|
17303
17872
|
// scopes option
|
17304
17873
|
for (size_t scope_index = 0; scope_index < options->scopes_count; scope_index++) {
|
@@ -17382,7 +17951,7 @@ pm_magic_comment_list_free(pm_list_t *list) {
|
|
17382
17951
|
*/
|
17383
17952
|
PRISM_EXPORTED_FUNCTION void
|
17384
17953
|
pm_parser_free(pm_parser_t *parser) {
|
17385
|
-
pm_string_free(&parser->
|
17954
|
+
pm_string_free(&parser->filepath);
|
17386
17955
|
pm_diagnostic_list_free(&parser->error_list);
|
17387
17956
|
pm_diagnostic_list_free(&parser->warning_list);
|
17388
17957
|
pm_comment_list_free(&parser->comment_list);
|
@@ -17484,3 +18053,299 @@ pm_serialize_parse_comments(pm_buffer_t *buffer, const uint8_t *source, size_t s
|
|
17484
18053
|
#undef PM_LOCATION_NODE_VALUE
|
17485
18054
|
#undef PM_LOCATION_NULL_VALUE
|
17486
18055
|
#undef PM_LOCATION_TOKEN_VALUE
|
18056
|
+
|
18057
|
+
/** An error that is going to be formatted into the output. */
|
18058
|
+
typedef struct {
|
18059
|
+
/** A pointer to the diagnostic that was generated during parsing. */
|
18060
|
+
pm_diagnostic_t *error;
|
18061
|
+
|
18062
|
+
/** The start line of the diagnostic message. */
|
18063
|
+
uint32_t line;
|
18064
|
+
|
18065
|
+
/** The column start of the diagnostic message. */
|
18066
|
+
uint32_t column_start;
|
18067
|
+
|
18068
|
+
/** The column end of the diagnostic message. */
|
18069
|
+
uint32_t column_end;
|
18070
|
+
} pm_error_t;
|
18071
|
+
|
18072
|
+
/** The format that will be used to format the errors into the output. */
|
18073
|
+
typedef struct {
|
18074
|
+
/** The prefix that will be used for line numbers. */
|
18075
|
+
const char *number_prefix;
|
18076
|
+
|
18077
|
+
/** The prefix that will be used for blank lines. */
|
18078
|
+
const char *blank_prefix;
|
18079
|
+
|
18080
|
+
/** The divider that will be used between sections of source code. */
|
18081
|
+
const char *divider;
|
18082
|
+
|
18083
|
+
/** The length of the blank prefix. */
|
18084
|
+
size_t blank_prefix_length;
|
18085
|
+
|
18086
|
+
/** The length of the divider. */
|
18087
|
+
size_t divider_length;
|
18088
|
+
} pm_error_format_t;
|
18089
|
+
|
18090
|
+
#define PM_COLOR_GRAY "\033[38;5;102m"
|
18091
|
+
#define PM_COLOR_RED "\033[1;31m"
|
18092
|
+
#define PM_COLOR_RESET "\033[0m"
|
18093
|
+
|
18094
|
+
static inline pm_error_t *
|
18095
|
+
pm_parser_errors_format_sort(const pm_list_t *error_list, const pm_newline_list_t *newline_list) {
|
18096
|
+
pm_error_t *errors = calloc(error_list->size, sizeof(pm_error_t));
|
18097
|
+
|
18098
|
+
for (pm_diagnostic_t *error = (pm_diagnostic_t *) error_list->head; error != NULL; error = (pm_diagnostic_t *) error->node.next) {
|
18099
|
+
pm_line_column_t start = pm_newline_list_line_column(newline_list, error->location.start);
|
18100
|
+
pm_line_column_t end = pm_newline_list_line_column(newline_list, error->location.end);
|
18101
|
+
|
18102
|
+
// We're going to insert this error into the array in sorted order. We
|
18103
|
+
// do this by finding the first error that has a line number greater
|
18104
|
+
// than the current error and then inserting the current error before
|
18105
|
+
// that one.
|
18106
|
+
size_t index = 0;
|
18107
|
+
while (
|
18108
|
+
(index < error_list->size) &&
|
18109
|
+
(errors[index].error != NULL) &&
|
18110
|
+
(
|
18111
|
+
(errors[index].line < ((uint32_t) start.line)) ||
|
18112
|
+
(errors[index].line == ((uint32_t) start.line) && errors[index].column_start < ((uint32_t) start.column))
|
18113
|
+
)
|
18114
|
+
) index++;
|
18115
|
+
|
18116
|
+
// Now we're going to shift all of the errors after this one down one
|
18117
|
+
// index to make room for the new error.
|
18118
|
+
if (index + 1 < error_list->size) {
|
18119
|
+
memmove(&errors[index + 1], &errors[index], sizeof(pm_error_t) * (error_list->size - index - 1));
|
18120
|
+
}
|
18121
|
+
|
18122
|
+
// Finally, we'll insert the error into the array.
|
18123
|
+
uint32_t column_end;
|
18124
|
+
if (start.line == end.line) {
|
18125
|
+
column_end = (uint32_t) end.column;
|
18126
|
+
} else {
|
18127
|
+
column_end = (uint32_t) (newline_list->offsets[start.line] - newline_list->offsets[start.line - 1] - 1);
|
18128
|
+
}
|
18129
|
+
|
18130
|
+
// Ensure we have at least one column of error.
|
18131
|
+
if (((uint32_t) start.column) == column_end) column_end++;
|
18132
|
+
|
18133
|
+
errors[index] = (pm_error_t) {
|
18134
|
+
.error = error,
|
18135
|
+
.line = (uint32_t) start.line,
|
18136
|
+
.column_start = (uint32_t) start.column,
|
18137
|
+
.column_end = column_end
|
18138
|
+
};
|
18139
|
+
}
|
18140
|
+
|
18141
|
+
return errors;
|
18142
|
+
}
|
18143
|
+
|
18144
|
+
static inline void
|
18145
|
+
pm_parser_errors_format_line(const pm_parser_t *parser, const pm_newline_list_t *newline_list, const char *number_prefix, size_t line, pm_buffer_t *buffer) {
|
18146
|
+
const uint8_t *start = &parser->start[newline_list->offsets[line - 1]];
|
18147
|
+
const uint8_t *end;
|
18148
|
+
|
18149
|
+
if (line >= newline_list->size) {
|
18150
|
+
end = parser->end;
|
18151
|
+
} else {
|
18152
|
+
end = &parser->start[newline_list->offsets[line]];
|
18153
|
+
}
|
18154
|
+
|
18155
|
+
pm_buffer_append_format(buffer, number_prefix, (uint32_t) line);
|
18156
|
+
pm_buffer_append_string(buffer, (const char *) start, (size_t) (end - start));
|
18157
|
+
|
18158
|
+
if (end == parser->end && end[-1] != '\n') {
|
18159
|
+
pm_buffer_append_string(buffer, "\n", 1);
|
18160
|
+
}
|
18161
|
+
}
|
18162
|
+
|
18163
|
+
/**
|
18164
|
+
* Format the errors on the parser into the given buffer.
|
18165
|
+
*/
|
18166
|
+
PRISM_EXPORTED_FUNCTION void
|
18167
|
+
pm_parser_errors_format(const pm_parser_t *parser, pm_buffer_t *buffer, bool colorize) {
|
18168
|
+
const pm_list_t *error_list = &parser->error_list;
|
18169
|
+
assert(error_list->size != 0);
|
18170
|
+
|
18171
|
+
// First, we're going to sort all of the errors by line number using an
|
18172
|
+
// insertion sort into a newly allocated array.
|
18173
|
+
const pm_newline_list_t *newline_list = &parser->newline_list;
|
18174
|
+
pm_error_t *errors = pm_parser_errors_format_sort(error_list, newline_list);
|
18175
|
+
|
18176
|
+
// Now we're going to determine how we're going to format line numbers and
|
18177
|
+
// blank lines based on the maximum number of digits in the line numbers
|
18178
|
+
// that are going to be displayed.
|
18179
|
+
pm_error_format_t error_format;
|
18180
|
+
size_t max_line_number = errors[error_list->size - 1].line;
|
18181
|
+
|
18182
|
+
if (max_line_number < 10) {
|
18183
|
+
if (colorize) {
|
18184
|
+
error_format = (pm_error_format_t) {
|
18185
|
+
.number_prefix = PM_COLOR_GRAY "%1" PRIu32 " | " PM_COLOR_RESET,
|
18186
|
+
.blank_prefix = PM_COLOR_GRAY " | " PM_COLOR_RESET,
|
18187
|
+
.divider = PM_COLOR_GRAY " ~~~~~" PM_COLOR_RESET "\n"
|
18188
|
+
};
|
18189
|
+
} else {
|
18190
|
+
error_format = (pm_error_format_t) {
|
18191
|
+
.number_prefix = "%1" PRIu32 " | ",
|
18192
|
+
.blank_prefix = " | ",
|
18193
|
+
.divider = " ~~~~~\n"
|
18194
|
+
};
|
18195
|
+
}
|
18196
|
+
} else if (max_line_number < 100) {
|
18197
|
+
if (colorize) {
|
18198
|
+
error_format = (pm_error_format_t) {
|
18199
|
+
.number_prefix = PM_COLOR_GRAY "%2" PRIu32 " | " PM_COLOR_RESET,
|
18200
|
+
.blank_prefix = PM_COLOR_GRAY " | " PM_COLOR_RESET,
|
18201
|
+
.divider = PM_COLOR_GRAY " ~~~~~~" PM_COLOR_RESET "\n"
|
18202
|
+
};
|
18203
|
+
} else {
|
18204
|
+
error_format = (pm_error_format_t) {
|
18205
|
+
.number_prefix = "%2" PRIu32 " | ",
|
18206
|
+
.blank_prefix = " | ",
|
18207
|
+
.divider = " ~~~~~~\n"
|
18208
|
+
};
|
18209
|
+
}
|
18210
|
+
} else if (max_line_number < 1000) {
|
18211
|
+
if (colorize) {
|
18212
|
+
error_format = (pm_error_format_t) {
|
18213
|
+
.number_prefix = PM_COLOR_GRAY "%3" PRIu32 " | " PM_COLOR_RESET,
|
18214
|
+
.blank_prefix = PM_COLOR_GRAY " | " PM_COLOR_RESET,
|
18215
|
+
.divider = PM_COLOR_GRAY " ~~~~~~~" PM_COLOR_RESET "\n"
|
18216
|
+
};
|
18217
|
+
} else {
|
18218
|
+
error_format = (pm_error_format_t) {
|
18219
|
+
.number_prefix = "%3" PRIu32 " | ",
|
18220
|
+
.blank_prefix = " | ",
|
18221
|
+
.divider = " ~~~~~~~\n"
|
18222
|
+
};
|
18223
|
+
}
|
18224
|
+
} else if (max_line_number < 10000) {
|
18225
|
+
if (colorize) {
|
18226
|
+
error_format = (pm_error_format_t) {
|
18227
|
+
.number_prefix = PM_COLOR_GRAY "%4" PRIu32 " | " PM_COLOR_RESET,
|
18228
|
+
.blank_prefix = PM_COLOR_GRAY " | " PM_COLOR_RESET,
|
18229
|
+
.divider = PM_COLOR_GRAY " ~~~~~~~~" PM_COLOR_RESET "\n"
|
18230
|
+
};
|
18231
|
+
} else {
|
18232
|
+
error_format = (pm_error_format_t) {
|
18233
|
+
.number_prefix = "%4" PRIu32 " | ",
|
18234
|
+
.blank_prefix = " | ",
|
18235
|
+
.divider = " ~~~~~~~~\n"
|
18236
|
+
};
|
18237
|
+
}
|
18238
|
+
} else {
|
18239
|
+
if (colorize) {
|
18240
|
+
error_format = (pm_error_format_t) {
|
18241
|
+
.number_prefix = PM_COLOR_GRAY "%5" PRIu32 " | " PM_COLOR_RESET,
|
18242
|
+
.blank_prefix = PM_COLOR_GRAY " | " PM_COLOR_RESET,
|
18243
|
+
.divider = PM_COLOR_GRAY " ~~~~~~~~" PM_COLOR_RESET "\n"
|
18244
|
+
};
|
18245
|
+
} else {
|
18246
|
+
error_format = (pm_error_format_t) {
|
18247
|
+
.number_prefix = "%5" PRIu32 " | ",
|
18248
|
+
.blank_prefix = " | ",
|
18249
|
+
.divider = " ~~~~~~~~\n"
|
18250
|
+
};
|
18251
|
+
}
|
18252
|
+
}
|
18253
|
+
|
18254
|
+
error_format.blank_prefix_length = strlen(error_format.blank_prefix);
|
18255
|
+
error_format.divider_length = strlen(error_format.divider);
|
18256
|
+
|
18257
|
+
// Now we're going to iterate through every error in our error list and
|
18258
|
+
// display it. While we're iterating, we will display some padding lines of
|
18259
|
+
// the source before the error to give some context. We'll be careful not to
|
18260
|
+
// display the same line twice in case the errors are close enough in the
|
18261
|
+
// source.
|
18262
|
+
uint32_t last_line = 0;
|
18263
|
+
const pm_encoding_t *encoding = parser->encoding;
|
18264
|
+
|
18265
|
+
for (size_t index = 0; index < error_list->size; index++) {
|
18266
|
+
pm_error_t *error = &errors[index];
|
18267
|
+
|
18268
|
+
// Here we determine how many lines of padding of the source to display,
|
18269
|
+
// based on the difference from the last line that was displayed.
|
18270
|
+
if (error->line - last_line > 1) {
|
18271
|
+
if (error->line - last_line > 2) {
|
18272
|
+
if ((index != 0) && (error->line - last_line > 3)) {
|
18273
|
+
pm_buffer_append_string(buffer, error_format.divider, error_format.divider_length);
|
18274
|
+
}
|
18275
|
+
|
18276
|
+
pm_buffer_append_string(buffer, " ", 2);
|
18277
|
+
pm_parser_errors_format_line(parser, newline_list, error_format.number_prefix, error->line - 2, buffer);
|
18278
|
+
}
|
18279
|
+
|
18280
|
+
pm_buffer_append_string(buffer, " ", 2);
|
18281
|
+
pm_parser_errors_format_line(parser, newline_list, error_format.number_prefix, error->line - 1, buffer);
|
18282
|
+
}
|
18283
|
+
|
18284
|
+
// If this is the first error or we're on a new line, then we'll display
|
18285
|
+
// the line that has the error in it.
|
18286
|
+
if ((index == 0) || (error->line != last_line)) {
|
18287
|
+
if (colorize) {
|
18288
|
+
pm_buffer_append_string(buffer, PM_COLOR_RED "> " PM_COLOR_RESET, 13);
|
18289
|
+
} else {
|
18290
|
+
pm_buffer_append_string(buffer, "> ", 2);
|
18291
|
+
}
|
18292
|
+
pm_parser_errors_format_line(parser, newline_list, error_format.number_prefix, error->line, buffer);
|
18293
|
+
}
|
18294
|
+
|
18295
|
+
// Now we'll display the actual error message. We'll do this by first
|
18296
|
+
// putting the prefix to the line, then a bunch of blank spaces
|
18297
|
+
// depending on the column, then as many carets as we need to display
|
18298
|
+
// the width of the error, then the error message itself.
|
18299
|
+
//
|
18300
|
+
// Note that this doesn't take into account the width of the actual
|
18301
|
+
// character when displayed in the terminal. For some east-asian
|
18302
|
+
// languages or emoji, this means it can be thrown off pretty badly. We
|
18303
|
+
// will need to solve this eventually.
|
18304
|
+
pm_buffer_append_string(buffer, " ", 2);
|
18305
|
+
pm_buffer_append_string(buffer, error_format.blank_prefix, error_format.blank_prefix_length);
|
18306
|
+
|
18307
|
+
size_t column = 0;
|
18308
|
+
const uint8_t *start = &parser->start[newline_list->offsets[error->line - 1]];
|
18309
|
+
|
18310
|
+
while (column < error->column_end) {
|
18311
|
+
if (column < error->column_start) {
|
18312
|
+
pm_buffer_append_byte(buffer, ' ');
|
18313
|
+
} else if (colorize) {
|
18314
|
+
pm_buffer_append_string(buffer, PM_COLOR_RED "^" PM_COLOR_RESET, 12);
|
18315
|
+
} else {
|
18316
|
+
pm_buffer_append_byte(buffer, '^');
|
18317
|
+
}
|
18318
|
+
|
18319
|
+
size_t char_width = encoding->char_width(start + column, parser->end - (start + column));
|
18320
|
+
column += (char_width == 0 ? 1 : char_width);
|
18321
|
+
}
|
18322
|
+
|
18323
|
+
pm_buffer_append_byte(buffer, ' ');
|
18324
|
+
|
18325
|
+
const char *message = error->error->message;
|
18326
|
+
pm_buffer_append_string(buffer, message, strlen(message));
|
18327
|
+
pm_buffer_append_byte(buffer, '\n');
|
18328
|
+
|
18329
|
+
// Here we determine how many lines of padding to display after the
|
18330
|
+
// error, depending on where the next error is in source.
|
18331
|
+
last_line = error->line;
|
18332
|
+
size_t next_line = (index == error_list->size - 1) ? newline_list->size : errors[index + 1].line;
|
18333
|
+
|
18334
|
+
if (next_line - last_line > 1) {
|
18335
|
+
pm_buffer_append_string(buffer, " ", 2);
|
18336
|
+
pm_parser_errors_format_line(parser, newline_list, error_format.number_prefix, ++last_line, buffer);
|
18337
|
+
}
|
18338
|
+
|
18339
|
+
if (next_line - last_line > 1) {
|
18340
|
+
pm_buffer_append_string(buffer, " ", 2);
|
18341
|
+
pm_parser_errors_format_line(parser, newline_list, error_format.number_prefix, ++last_line, buffer);
|
18342
|
+
}
|
18343
|
+
}
|
18344
|
+
|
18345
|
+
// Finally, we'll free the array of errors that we allocated.
|
18346
|
+
free(errors);
|
18347
|
+
}
|
18348
|
+
|
18349
|
+
#undef PM_COLOR_GRAY
|
18350
|
+
#undef PM_COLOR_RED
|
18351
|
+
#undef PM_COLOR_RESET
|