yarp 0.9.0 → 0.10.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +15 -1
- data/Makefile +5 -1
- data/config.yml +156 -125
- data/docs/encoding.md +5 -5
- data/docs/serialization.md +2 -2
- data/ext/yarp/api_node.c +142 -98
- data/ext/yarp/extension.c +21 -7
- data/ext/yarp/extension.h +1 -1
- data/include/yarp/ast.h +327 -18
- data/include/yarp/defines.h +2 -1
- data/include/yarp/diagnostic.h +3 -3
- data/include/yarp/enc/yp_encoding.h +10 -10
- data/include/yarp/parser.h +19 -19
- data/include/yarp/regexp.h +1 -1
- data/include/yarp/unescape.h +4 -4
- data/include/yarp/util/yp_buffer.h +3 -0
- data/include/yarp/util/yp_char.h +16 -16
- data/include/yarp/util/yp_constant_pool.h +2 -2
- data/include/yarp/util/yp_newline_list.h +5 -5
- data/include/yarp/util/yp_string.h +4 -4
- data/include/yarp/util/yp_string_list.h +0 -3
- data/include/yarp/util/yp_strpbrk.h +1 -1
- data/include/yarp/version.h +2 -2
- data/include/yarp.h +5 -4
- data/lib/yarp/desugar_visitor.rb +59 -122
- data/lib/yarp/node.rb +230 -240
- data/lib/yarp/serialize.rb +16 -16
- data/lib/yarp.rb +5 -5
- data/src/diagnostic.c +1 -1
- data/src/enc/yp_big5.c +15 -42
- data/src/enc/yp_euc_jp.c +16 -43
- data/src/enc/yp_gbk.c +19 -46
- data/src/enc/yp_shift_jis.c +16 -43
- data/src/enc/yp_tables.c +36 -38
- data/src/enc/yp_unicode.c +20 -25
- data/src/enc/yp_windows_31j.c +16 -43
- data/src/node.c +1271 -899
- data/src/prettyprint.c +87 -48
- data/src/regexp.c +21 -21
- data/src/serialize.c +28 -15
- data/src/unescape.c +151 -121
- data/src/util/yp_buffer.c +7 -2
- data/src/util/yp_char.c +34 -34
- data/src/util/yp_constant_pool.c +4 -4
- data/src/util/yp_memchr.c +1 -1
- data/src/util/yp_newline_list.c +5 -4
- data/src/util/yp_string.c +22 -20
- data/src/util/yp_string_list.c +0 -6
- data/src/util/yp_strncasecmp.c +3 -6
- data/src/util/yp_strpbrk.c +8 -8
- data/src/yarp.c +355 -216
- data/yarp.gemspec +1 -1
- metadata +2 -2
data/src/yarp.c
CHANGED
@@ -161,14 +161,18 @@ debug_token(yp_token_t * token) {
|
|
161
161
|
|
162
162
|
#endif
|
163
163
|
|
164
|
+
/* Macros for min/max. */
|
165
|
+
#define MIN(a,b) (((a)<(b))?(a):(b))
|
166
|
+
#define MAX(a,b) (((a)>(b))?(a):(b))
|
167
|
+
|
164
168
|
/******************************************************************************/
|
165
169
|
/* Lex mode manipulations */
|
166
170
|
/******************************************************************************/
|
167
171
|
|
168
172
|
// Returns the incrementor character that should be used to increment the
|
169
173
|
// nesting count if one is possible.
|
170
|
-
static inline
|
171
|
-
lex_mode_incrementor(const
|
174
|
+
static inline uint8_t
|
175
|
+
lex_mode_incrementor(const uint8_t start) {
|
172
176
|
switch (start) {
|
173
177
|
case '(':
|
174
178
|
case '[':
|
@@ -182,8 +186,8 @@ lex_mode_incrementor(const char start) {
|
|
182
186
|
|
183
187
|
// Returns the matching character that should be used to terminate a list
|
184
188
|
// beginning with the given character.
|
185
|
-
static inline
|
186
|
-
lex_mode_terminator(const
|
189
|
+
static inline uint8_t
|
190
|
+
lex_mode_terminator(const uint8_t start) {
|
187
191
|
switch (start) {
|
188
192
|
case '(':
|
189
193
|
return ')';
|
@@ -221,9 +225,9 @@ lex_mode_push(yp_parser_t *parser, yp_lex_mode_t lex_mode) {
|
|
221
225
|
|
222
226
|
// Push on a new list lex mode.
|
223
227
|
static inline bool
|
224
|
-
lex_mode_push_list(yp_parser_t *parser, bool interpolation,
|
225
|
-
|
226
|
-
|
228
|
+
lex_mode_push_list(yp_parser_t *parser, bool interpolation, uint8_t delimiter) {
|
229
|
+
uint8_t incrementor = lex_mode_incrementor(delimiter);
|
230
|
+
uint8_t terminator = lex_mode_terminator(delimiter);
|
227
231
|
|
228
232
|
yp_lex_mode_t lex_mode = {
|
229
233
|
.mode = YP_LEX_LIST,
|
@@ -237,7 +241,7 @@ lex_mode_push_list(yp_parser_t *parser, bool interpolation, char delimiter) {
|
|
237
241
|
|
238
242
|
// These are the places where we need to split up the content of the list.
|
239
243
|
// We'll use strpbrk to find the first of these characters.
|
240
|
-
|
244
|
+
uint8_t *breakpoints = lex_mode.as.list.breakpoints;
|
241
245
|
memcpy(breakpoints, "\\ \t\f\r\v\n\0\0\0", sizeof(lex_mode.as.list.breakpoints));
|
242
246
|
|
243
247
|
// Now we'll add the terminator to the list of breakpoints.
|
@@ -260,7 +264,7 @@ lex_mode_push_list(yp_parser_t *parser, bool interpolation, char delimiter) {
|
|
260
264
|
|
261
265
|
// Push on a new regexp lex mode.
|
262
266
|
static inline bool
|
263
|
-
lex_mode_push_regexp(yp_parser_t *parser,
|
267
|
+
lex_mode_push_regexp(yp_parser_t *parser, uint8_t incrementor, uint8_t terminator) {
|
264
268
|
yp_lex_mode_t lex_mode = {
|
265
269
|
.mode = YP_LEX_REGEXP,
|
266
270
|
.as.regexp = {
|
@@ -273,7 +277,7 @@ lex_mode_push_regexp(yp_parser_t *parser, char incrementor, char terminator) {
|
|
273
277
|
// These are the places where we need to split up the content of the
|
274
278
|
// regular expression. We'll use strpbrk to find the first of these
|
275
279
|
// characters.
|
276
|
-
|
280
|
+
uint8_t *breakpoints = lex_mode.as.regexp.breakpoints;
|
277
281
|
memcpy(breakpoints, "\n\\#\0\0", sizeof(lex_mode.as.regexp.breakpoints));
|
278
282
|
|
279
283
|
// First we'll add the terminator.
|
@@ -289,7 +293,7 @@ lex_mode_push_regexp(yp_parser_t *parser, char incrementor, char terminator) {
|
|
289
293
|
|
290
294
|
// Push on a new string lex mode.
|
291
295
|
static inline bool
|
292
|
-
lex_mode_push_string(yp_parser_t *parser, bool interpolation, bool label_allowed,
|
296
|
+
lex_mode_push_string(yp_parser_t *parser, bool interpolation, bool label_allowed, uint8_t incrementor, uint8_t terminator) {
|
293
297
|
yp_lex_mode_t lex_mode = {
|
294
298
|
.mode = YP_LEX_STRING,
|
295
299
|
.as.string = {
|
@@ -303,7 +307,7 @@ lex_mode_push_string(yp_parser_t *parser, bool interpolation, bool label_allowed
|
|
303
307
|
|
304
308
|
// These are the places where we need to split up the content of the
|
305
309
|
// string. We'll use strpbrk to find the first of these characters.
|
306
|
-
|
310
|
+
uint8_t *breakpoints = lex_mode.as.string.breakpoints;
|
307
311
|
memcpy(breakpoints, "\n\\\0\0\0", sizeof(lex_mode.as.string.breakpoints));
|
308
312
|
|
309
313
|
// Now add in the terminator.
|
@@ -380,6 +384,9 @@ lex_state_arg_p(yp_parser_t *parser) {
|
|
380
384
|
|
381
385
|
static inline bool
|
382
386
|
lex_state_spcarg_p(yp_parser_t *parser, bool space_seen) {
|
387
|
+
if (parser->current.end >= parser->end) {
|
388
|
+
return false;
|
389
|
+
}
|
383
390
|
return lex_state_arg_p(parser) && space_seen && !yp_char_is_whitespace(*parser->current.end);
|
384
391
|
}
|
385
392
|
|
@@ -420,7 +427,7 @@ debug_lex_state_set(yp_parser_t *parser, yp_lex_state_t state, char const * call
|
|
420
427
|
|
421
428
|
// Retrieve the constant pool id for the given location.
|
422
429
|
static inline yp_constant_id_t
|
423
|
-
yp_parser_constant_id_location(yp_parser_t *parser, const
|
430
|
+
yp_parser_constant_id_location(yp_parser_t *parser, const uint8_t *start, const uint8_t *end) {
|
424
431
|
return yp_constant_pool_insert(&parser->constant_pool, start, (size_t) (end - start));
|
425
432
|
}
|
426
433
|
|
@@ -606,13 +613,45 @@ yp_scope_node_init(yp_node_t *node, yp_scope_node_t *scope) {
|
|
606
613
|
/* Node creation functions */
|
607
614
|
/******************************************************************************/
|
608
615
|
|
616
|
+
// Parse the decimal number represented by the range of bytes. returns
|
617
|
+
// UINT32_MAX if the number fails to parse. This function assumes that the range
|
618
|
+
// of bytes has already been validated to contain only decimal digits.
|
619
|
+
static uint32_t
|
620
|
+
parse_decimal_number(yp_parser_t *parser, const uint8_t *start, const uint8_t *end) {
|
621
|
+
ptrdiff_t diff = end - start;
|
622
|
+
assert(diff > 0 && ((unsigned long) diff < SIZE_MAX));
|
623
|
+
size_t length = (size_t) diff;
|
624
|
+
|
625
|
+
char *digits = calloc(length + 1, sizeof(char));
|
626
|
+
memcpy(digits, start, length);
|
627
|
+
digits[length] = '\0';
|
628
|
+
|
629
|
+
char *endptr;
|
630
|
+
errno = 0;
|
631
|
+
unsigned long value = strtoul(digits, &endptr, 10);
|
632
|
+
|
633
|
+
if ((digits == endptr) || (*endptr != '\0') || (errno == ERANGE)) {
|
634
|
+
yp_diagnostic_list_append(&parser->error_list, start, end, "invalid decimal number");
|
635
|
+
value = UINT32_MAX;
|
636
|
+
}
|
637
|
+
|
638
|
+
free(digits);
|
639
|
+
|
640
|
+
if (value > UINT32_MAX) {
|
641
|
+
yp_diagnostic_list_append(&parser->error_list, start, end, "invalid decimal number");
|
642
|
+
value = UINT32_MAX;
|
643
|
+
}
|
644
|
+
|
645
|
+
return (uint32_t) value;
|
646
|
+
}
|
647
|
+
|
609
648
|
// Parse out the options for a regular expression.
|
610
649
|
static inline yp_node_flags_t
|
611
650
|
yp_regular_expression_flags_create(const yp_token_t *closing) {
|
612
651
|
yp_node_flags_t flags = 0;
|
613
652
|
|
614
653
|
if (closing->type == YP_TOKEN_REGEXP_END) {
|
615
|
-
for (const
|
654
|
+
for (const uint8_t *flag = closing->start + 1; flag < closing->end; flag++) {
|
616
655
|
switch (*flag) {
|
617
656
|
case 'i': flags |= YP_REGULAR_EXPRESSION_FLAGS_IGNORE_CASE; break;
|
618
657
|
case 'm': flags |= YP_REGULAR_EXPRESSION_FLAGS_MULTI_LINE; break;
|
@@ -654,7 +693,7 @@ yp_alloc_node(YP_ATTRIBUTE_UNUSED yp_parser_t *parser, size_t size) {
|
|
654
693
|
|
655
694
|
// Allocate a new MissingNode node.
|
656
695
|
static yp_missing_node_t *
|
657
|
-
yp_missing_node_create(yp_parser_t *parser, const
|
696
|
+
yp_missing_node_create(yp_parser_t *parser, const uint8_t *start, const uint8_t *end) {
|
658
697
|
yp_missing_node_t *node = YP_ALLOC_NODE(parser, yp_missing_node_t);
|
659
698
|
*node = (yp_missing_node_t) {{ .type = YP_NODE_MISSING_NODE, .location = { .start = start, .end = end } }};
|
660
699
|
return node;
|
@@ -923,7 +962,7 @@ yp_array_pattern_node_requireds_append(yp_array_pattern_node_t *node, yp_node_t
|
|
923
962
|
static yp_assoc_node_t *
|
924
963
|
yp_assoc_node_create(yp_parser_t *parser, yp_node_t *key, const yp_token_t *operator, yp_node_t *value) {
|
925
964
|
yp_assoc_node_t *node = YP_ALLOC_NODE(parser, yp_assoc_node_t);
|
926
|
-
const
|
965
|
+
const uint8_t *end;
|
927
966
|
|
928
967
|
if (value != NULL) {
|
929
968
|
end = value->location.end;
|
@@ -1107,7 +1146,7 @@ static yp_block_parameters_node_t *
|
|
1107
1146
|
yp_block_parameters_node_create(yp_parser_t *parser, yp_parameters_node_t *parameters, const yp_token_t *opening) {
|
1108
1147
|
yp_block_parameters_node_t *node = YP_ALLOC_NODE(parser, yp_block_parameters_node_t);
|
1109
1148
|
|
1110
|
-
const
|
1149
|
+
const uint8_t *start;
|
1111
1150
|
if (opening->type != YP_TOKEN_NOT_PROVIDED) {
|
1112
1151
|
start = opening->start;
|
1113
1152
|
} else if (parameters != NULL) {
|
@@ -1116,7 +1155,7 @@ yp_block_parameters_node_create(yp_parser_t *parser, yp_parameters_node_t *param
|
|
1116
1155
|
start = NULL;
|
1117
1156
|
}
|
1118
1157
|
|
1119
|
-
const
|
1158
|
+
const uint8_t *end;
|
1120
1159
|
if (parameters != NULL) {
|
1121
1160
|
end = parameters->base.location.end;
|
1122
1161
|
} else if (opening->type != YP_TOKEN_NOT_PROVIDED) {
|
@@ -1237,8 +1276,8 @@ static yp_call_node_t *
|
|
1237
1276
|
yp_call_node_binary_create(yp_parser_t *parser, yp_node_t *receiver, yp_token_t *operator, yp_node_t *argument) {
|
1238
1277
|
yp_call_node_t *node = yp_call_node_create(parser);
|
1239
1278
|
|
1240
|
-
node->base.location.start = receiver->location.start;
|
1241
|
-
node->base.location.end = argument->location.end;
|
1279
|
+
node->base.location.start = MIN(receiver->location.start, argument->location.start);
|
1280
|
+
node->base.location.end = MAX(receiver->location.end, argument->location.end);
|
1242
1281
|
|
1243
1282
|
node->receiver = receiver;
|
1244
1283
|
node->message_loc = YP_OPTIONAL_LOCATION_TOKEN_VALUE(operator);
|
@@ -1434,7 +1473,7 @@ yp_call_operator_write_node_create(yp_parser_t *parser, yp_call_node_t *target,
|
|
1434
1473
|
.target = target,
|
1435
1474
|
.operator_loc = YP_LOCATION_TOKEN_VALUE(operator),
|
1436
1475
|
.value = value,
|
1437
|
-
.
|
1476
|
+
.operator = yp_parser_constant_id_location(parser, operator->start, operator->end - 1)
|
1438
1477
|
};
|
1439
1478
|
|
1440
1479
|
return node;
|
@@ -1555,8 +1594,7 @@ yp_class_node_create(yp_parser_t *parser, yp_constant_id_list_t *locals, const y
|
|
1555
1594
|
|
1556
1595
|
// Allocate and initialize a new ClassVariableAndWriteNode node.
|
1557
1596
|
static yp_class_variable_and_write_node_t *
|
1558
|
-
yp_class_variable_and_write_node_create(yp_parser_t *parser,
|
1559
|
-
assert(YP_NODE_TYPE_P(target, YP_NODE_CLASS_VARIABLE_READ_NODE));
|
1597
|
+
yp_class_variable_and_write_node_create(yp_parser_t *parser, yp_class_variable_read_node_t *target, const yp_token_t *operator, yp_node_t *value) {
|
1560
1598
|
assert(operator->type == YP_TOKEN_AMPERSAND_AMPERSAND_EQUAL);
|
1561
1599
|
yp_class_variable_and_write_node_t *node = YP_ALLOC_NODE(parser, yp_class_variable_and_write_node_t);
|
1562
1600
|
|
@@ -1564,11 +1602,12 @@ yp_class_variable_and_write_node_create(yp_parser_t *parser, yp_node_t *target,
|
|
1564
1602
|
{
|
1565
1603
|
.type = YP_NODE_CLASS_VARIABLE_AND_WRITE_NODE,
|
1566
1604
|
.location = {
|
1567
|
-
.start = target->location.start,
|
1605
|
+
.start = target->base.location.start,
|
1568
1606
|
.end = value->location.end
|
1569
1607
|
}
|
1570
1608
|
},
|
1571
|
-
.
|
1609
|
+
.name = target->name,
|
1610
|
+
.name_loc = target->base.location,
|
1572
1611
|
.operator_loc = YP_LOCATION_TOKEN_VALUE(operator),
|
1573
1612
|
.value = value
|
1574
1613
|
};
|
@@ -1578,18 +1617,19 @@ yp_class_variable_and_write_node_create(yp_parser_t *parser, yp_node_t *target,
|
|
1578
1617
|
|
1579
1618
|
// Allocate and initialize a new ClassVariableOperatorWriteNode node.
|
1580
1619
|
static yp_class_variable_operator_write_node_t *
|
1581
|
-
yp_class_variable_operator_write_node_create(yp_parser_t *parser,
|
1620
|
+
yp_class_variable_operator_write_node_create(yp_parser_t *parser, yp_class_variable_read_node_t *target, const yp_token_t *operator, yp_node_t *value) {
|
1582
1621
|
yp_class_variable_operator_write_node_t *node = YP_ALLOC_NODE(parser, yp_class_variable_operator_write_node_t);
|
1583
1622
|
|
1584
1623
|
*node = (yp_class_variable_operator_write_node_t) {
|
1585
1624
|
{
|
1586
1625
|
.type = YP_NODE_CLASS_VARIABLE_OPERATOR_WRITE_NODE,
|
1587
1626
|
.location = {
|
1588
|
-
.start = target->location.start,
|
1627
|
+
.start = target->base.location.start,
|
1589
1628
|
.end = value->location.end
|
1590
1629
|
}
|
1591
1630
|
},
|
1592
|
-
.
|
1631
|
+
.name = target->name,
|
1632
|
+
.name_loc = target->base.location,
|
1593
1633
|
.operator_loc = YP_LOCATION_TOKEN_VALUE(operator),
|
1594
1634
|
.value = value,
|
1595
1635
|
.operator = yp_parser_constant_id_location(parser, operator->start, operator->end - 1)
|
@@ -1600,8 +1640,7 @@ yp_class_variable_operator_write_node_create(yp_parser_t *parser, yp_node_t *tar
|
|
1600
1640
|
|
1601
1641
|
// Allocate and initialize a new ClassVariableOrWriteNode node.
|
1602
1642
|
static yp_class_variable_or_write_node_t *
|
1603
|
-
yp_class_variable_or_write_node_create(yp_parser_t *parser,
|
1604
|
-
assert(YP_NODE_TYPE_P(target, YP_NODE_CLASS_VARIABLE_READ_NODE));
|
1643
|
+
yp_class_variable_or_write_node_create(yp_parser_t *parser, yp_class_variable_read_node_t *target, const yp_token_t *operator, yp_node_t *value) {
|
1605
1644
|
assert(operator->type == YP_TOKEN_PIPE_PIPE_EQUAL);
|
1606
1645
|
yp_class_variable_or_write_node_t *node = YP_ALLOC_NODE(parser, yp_class_variable_or_write_node_t);
|
1607
1646
|
|
@@ -1609,11 +1648,12 @@ yp_class_variable_or_write_node_create(yp_parser_t *parser, yp_node_t *target, c
|
|
1609
1648
|
{
|
1610
1649
|
.type = YP_NODE_CLASS_VARIABLE_OR_WRITE_NODE,
|
1611
1650
|
.location = {
|
1612
|
-
.start = target->location.start,
|
1651
|
+
.start = target->base.location.start,
|
1613
1652
|
.end = value->location.end
|
1614
1653
|
}
|
1615
1654
|
},
|
1616
|
-
.
|
1655
|
+
.name = target->name,
|
1656
|
+
.name_loc = target->base.location,
|
1617
1657
|
.operator_loc = YP_LOCATION_TOKEN_VALUE(operator),
|
1618
1658
|
.value = value
|
1619
1659
|
};
|
@@ -1626,13 +1666,21 @@ static yp_class_variable_read_node_t *
|
|
1626
1666
|
yp_class_variable_read_node_create(yp_parser_t *parser, const yp_token_t *token) {
|
1627
1667
|
assert(token->type == YP_TOKEN_CLASS_VARIABLE);
|
1628
1668
|
yp_class_variable_read_node_t *node = YP_ALLOC_NODE(parser, yp_class_variable_read_node_t);
|
1629
|
-
|
1669
|
+
|
1670
|
+
*node = (yp_class_variable_read_node_t) {
|
1671
|
+
{
|
1672
|
+
.type = YP_NODE_CLASS_VARIABLE_READ_NODE,
|
1673
|
+
.location = YP_LOCATION_TOKEN_VALUE(token)
|
1674
|
+
},
|
1675
|
+
.name = yp_parser_constant_id_location(parser, token->start, token->end)
|
1676
|
+
};
|
1677
|
+
|
1630
1678
|
return node;
|
1631
1679
|
}
|
1632
1680
|
|
1633
1681
|
// Initialize a new ClassVariableWriteNode node from a ClassVariableRead node.
|
1634
1682
|
static yp_class_variable_write_node_t *
|
1635
|
-
|
1683
|
+
yp_class_variable_write_node_create(yp_parser_t *parser, yp_class_variable_read_node_t *read_node, yp_token_t *operator, yp_node_t *value) {
|
1636
1684
|
yp_class_variable_write_node_t *node = YP_ALLOC_NODE(parser, yp_class_variable_write_node_t);
|
1637
1685
|
|
1638
1686
|
*node = (yp_class_variable_write_node_t) {
|
@@ -1643,6 +1691,7 @@ yp_class_variable_read_node_to_class_variable_write_node(yp_parser_t *parser, yp
|
|
1643
1691
|
.end = value->location.end
|
1644
1692
|
},
|
1645
1693
|
},
|
1694
|
+
.name = read_node->name,
|
1646
1695
|
.name_loc = YP_LOCATION_NODE_VALUE((yp_node_t *) read_node),
|
1647
1696
|
.operator_loc = YP_OPTIONAL_LOCATION_TOKEN_VALUE(operator),
|
1648
1697
|
.value = value
|
@@ -1875,7 +1924,7 @@ yp_def_node_create(
|
|
1875
1924
|
const yp_token_t *end_keyword
|
1876
1925
|
) {
|
1877
1926
|
yp_def_node_t *node = YP_ALLOC_NODE(parser, yp_def_node_t);
|
1878
|
-
const
|
1927
|
+
const uint8_t *end;
|
1879
1928
|
|
1880
1929
|
if (end_keyword->type == YP_TOKEN_NOT_PROVIDED) {
|
1881
1930
|
end = body->location.end;
|
@@ -1930,7 +1979,7 @@ yp_defined_node_create(yp_parser_t *parser, const yp_token_t *lparen, yp_node_t
|
|
1930
1979
|
static yp_else_node_t *
|
1931
1980
|
yp_else_node_create(yp_parser_t *parser, const yp_token_t *else_keyword, yp_statements_node_t *statements, const yp_token_t *end_keyword) {
|
1932
1981
|
yp_else_node_t *node = YP_ALLOC_NODE(parser, yp_else_node_t);
|
1933
|
-
const
|
1982
|
+
const uint8_t *end = NULL;
|
1934
1983
|
if ((end_keyword->type == YP_TOKEN_NOT_PROVIDED) && (statements != NULL)) {
|
1935
1984
|
end = statements->base.location.end;
|
1936
1985
|
} else {
|
@@ -2410,7 +2459,7 @@ yp_if_node_create(yp_parser_t *parser,
|
|
2410
2459
|
yp_flip_flop(predicate);
|
2411
2460
|
yp_if_node_t *node = YP_ALLOC_NODE(parser, yp_if_node_t);
|
2412
2461
|
|
2413
|
-
const
|
2462
|
+
const uint8_t *end;
|
2414
2463
|
if (end_keyword->type != YP_TOKEN_NOT_PROVIDED) {
|
2415
2464
|
end = end_keyword->end;
|
2416
2465
|
} else if (consequent != NULL) {
|
@@ -2593,7 +2642,7 @@ static yp_in_node_t *
|
|
2593
2642
|
yp_in_node_create(yp_parser_t *parser, yp_node_t *pattern, yp_statements_node_t *statements, const yp_token_t *in_keyword, const yp_token_t *then_keyword) {
|
2594
2643
|
yp_in_node_t *node = YP_ALLOC_NODE(parser, yp_in_node_t);
|
2595
2644
|
|
2596
|
-
const
|
2645
|
+
const uint8_t *end;
|
2597
2646
|
if (statements != NULL) {
|
2598
2647
|
end = statements->base.location.end;
|
2599
2648
|
} else if (then_keyword->type != YP_TOKEN_NOT_PROVIDED) {
|
@@ -2621,8 +2670,7 @@ yp_in_node_create(yp_parser_t *parser, yp_node_t *pattern, yp_statements_node_t
|
|
2621
2670
|
|
2622
2671
|
// Allocate and initialize a new InstanceVariableAndWriteNode node.
|
2623
2672
|
static yp_instance_variable_and_write_node_t *
|
2624
|
-
yp_instance_variable_and_write_node_create(yp_parser_t *parser,
|
2625
|
-
assert(YP_NODE_TYPE_P(target, YP_NODE_INSTANCE_VARIABLE_READ_NODE));
|
2673
|
+
yp_instance_variable_and_write_node_create(yp_parser_t *parser, yp_instance_variable_read_node_t *target, const yp_token_t *operator, yp_node_t *value) {
|
2626
2674
|
assert(operator->type == YP_TOKEN_AMPERSAND_AMPERSAND_EQUAL);
|
2627
2675
|
yp_instance_variable_and_write_node_t *node = YP_ALLOC_NODE(parser, yp_instance_variable_and_write_node_t);
|
2628
2676
|
|
@@ -2630,11 +2678,12 @@ yp_instance_variable_and_write_node_create(yp_parser_t *parser, yp_node_t *targe
|
|
2630
2678
|
{
|
2631
2679
|
.type = YP_NODE_INSTANCE_VARIABLE_AND_WRITE_NODE,
|
2632
2680
|
.location = {
|
2633
|
-
.start = target->location.start,
|
2681
|
+
.start = target->base.location.start,
|
2634
2682
|
.end = value->location.end
|
2635
2683
|
}
|
2636
2684
|
},
|
2637
|
-
.
|
2685
|
+
.name = target->name,
|
2686
|
+
.name_loc = target->base.location,
|
2638
2687
|
.operator_loc = YP_LOCATION_TOKEN_VALUE(operator),
|
2639
2688
|
.value = value
|
2640
2689
|
};
|
@@ -2644,18 +2693,19 @@ yp_instance_variable_and_write_node_create(yp_parser_t *parser, yp_node_t *targe
|
|
2644
2693
|
|
2645
2694
|
// Allocate and initialize a new InstanceVariableOperatorWriteNode node.
|
2646
2695
|
static yp_instance_variable_operator_write_node_t *
|
2647
|
-
yp_instance_variable_operator_write_node_create(yp_parser_t *parser,
|
2696
|
+
yp_instance_variable_operator_write_node_create(yp_parser_t *parser, yp_instance_variable_read_node_t *target, const yp_token_t *operator, yp_node_t *value) {
|
2648
2697
|
yp_instance_variable_operator_write_node_t *node = YP_ALLOC_NODE(parser, yp_instance_variable_operator_write_node_t);
|
2649
2698
|
|
2650
2699
|
*node = (yp_instance_variable_operator_write_node_t) {
|
2651
2700
|
{
|
2652
2701
|
.type = YP_NODE_INSTANCE_VARIABLE_OPERATOR_WRITE_NODE,
|
2653
2702
|
.location = {
|
2654
|
-
.start = target->location.start,
|
2703
|
+
.start = target->base.location.start,
|
2655
2704
|
.end = value->location.end
|
2656
2705
|
}
|
2657
2706
|
},
|
2658
|
-
.
|
2707
|
+
.name = target->name,
|
2708
|
+
.name_loc = target->base.location,
|
2659
2709
|
.operator_loc = YP_LOCATION_TOKEN_VALUE(operator),
|
2660
2710
|
.value = value,
|
2661
2711
|
.operator = yp_parser_constant_id_location(parser, operator->start, operator->end - 1)
|
@@ -2666,8 +2716,7 @@ yp_instance_variable_operator_write_node_create(yp_parser_t *parser, yp_node_t *
|
|
2666
2716
|
|
2667
2717
|
// Allocate and initialize a new InstanceVariableOrWriteNode node.
|
2668
2718
|
static yp_instance_variable_or_write_node_t *
|
2669
|
-
yp_instance_variable_or_write_node_create(yp_parser_t *parser,
|
2670
|
-
assert(YP_NODE_TYPE_P(target, YP_NODE_INSTANCE_VARIABLE_READ_NODE));
|
2719
|
+
yp_instance_variable_or_write_node_create(yp_parser_t *parser, yp_instance_variable_read_node_t *target, const yp_token_t *operator, yp_node_t *value) {
|
2671
2720
|
assert(operator->type == YP_TOKEN_PIPE_PIPE_EQUAL);
|
2672
2721
|
yp_instance_variable_or_write_node_t *node = YP_ALLOC_NODE(parser, yp_instance_variable_or_write_node_t);
|
2673
2722
|
|
@@ -2675,11 +2724,12 @@ yp_instance_variable_or_write_node_create(yp_parser_t *parser, yp_node_t *target
|
|
2675
2724
|
{
|
2676
2725
|
.type = YP_NODE_INSTANCE_VARIABLE_OR_WRITE_NODE,
|
2677
2726
|
.location = {
|
2678
|
-
.start = target->location.start,
|
2727
|
+
.start = target->base.location.start,
|
2679
2728
|
.end = value->location.end
|
2680
2729
|
}
|
2681
2730
|
},
|
2682
|
-
.
|
2731
|
+
.name = target->name,
|
2732
|
+
.name_loc = target->base.location,
|
2683
2733
|
.operator_loc = YP_LOCATION_TOKEN_VALUE(operator),
|
2684
2734
|
.value = value
|
2685
2735
|
};
|
@@ -2693,9 +2743,13 @@ yp_instance_variable_read_node_create(yp_parser_t *parser, const yp_token_t *tok
|
|
2693
2743
|
assert(token->type == YP_TOKEN_INSTANCE_VARIABLE);
|
2694
2744
|
yp_instance_variable_read_node_t *node = YP_ALLOC_NODE(parser, yp_instance_variable_read_node_t);
|
2695
2745
|
|
2696
|
-
*node = (yp_instance_variable_read_node_t) {
|
2697
|
-
|
2698
|
-
|
2746
|
+
*node = (yp_instance_variable_read_node_t) {
|
2747
|
+
{
|
2748
|
+
.type = YP_NODE_INSTANCE_VARIABLE_READ_NODE,
|
2749
|
+
.location = YP_LOCATION_TOKEN_VALUE(token)
|
2750
|
+
},
|
2751
|
+
.name = yp_parser_constant_id_location(parser, token->start, token->end)
|
2752
|
+
};
|
2699
2753
|
|
2700
2754
|
return node;
|
2701
2755
|
}
|
@@ -2712,6 +2766,7 @@ yp_instance_variable_write_node_create(yp_parser_t *parser, yp_instance_variable
|
|
2712
2766
|
.end = value->location.end
|
2713
2767
|
}
|
2714
2768
|
},
|
2769
|
+
.name = read_node->name,
|
2715
2770
|
.name_loc = YP_LOCATION_NODE_BASE_VALUE(read_node),
|
2716
2771
|
.operator_loc = YP_OPTIONAL_LOCATION_TOKEN_VALUE(operator),
|
2717
2772
|
.value = value
|
@@ -2743,8 +2798,13 @@ yp_interpolated_regular_expression_node_create(yp_parser_t *parser, const yp_tok
|
|
2743
2798
|
|
2744
2799
|
static inline void
|
2745
2800
|
yp_interpolated_regular_expression_node_append(yp_interpolated_regular_expression_node_t *node, yp_node_t *part) {
|
2801
|
+
if (node->base.location.start > part->location.start) {
|
2802
|
+
node->base.location.start = part->location.start;
|
2803
|
+
}
|
2804
|
+
if (node->base.location.end < part->location.end) {
|
2805
|
+
node->base.location.end = part->location.end;
|
2806
|
+
}
|
2746
2807
|
yp_node_list_append(&node->parts, part);
|
2747
|
-
node->base.location.end = part->location.end;
|
2748
2808
|
}
|
2749
2809
|
|
2750
2810
|
static inline void
|
@@ -2816,10 +2876,11 @@ yp_interpolated_symbol_node_create(yp_parser_t *parser, const yp_token_t *openin
|
|
2816
2876
|
|
2817
2877
|
static inline void
|
2818
2878
|
yp_interpolated_symbol_node_append(yp_interpolated_symbol_node_t *node, yp_node_t *part) {
|
2819
|
-
|
2820
|
-
if (!node->base.location.start) {
|
2879
|
+
if (node->parts.size == 0 && node->opening_loc.start == NULL) {
|
2821
2880
|
node->base.location.start = part->location.start;
|
2822
2881
|
}
|
2882
|
+
|
2883
|
+
yp_node_list_append(&node->parts, part);
|
2823
2884
|
node->base.location.end = part->location.end;
|
2824
2885
|
}
|
2825
2886
|
|
@@ -2959,7 +3020,7 @@ yp_lambda_node_create(
|
|
2959
3020
|
|
2960
3021
|
// Allocate and initialize a new LocalVariableAndWriteNode node.
|
2961
3022
|
static yp_local_variable_and_write_node_t *
|
2962
|
-
yp_local_variable_and_write_node_create(yp_parser_t *parser, yp_node_t *target, const yp_token_t *operator, yp_node_t *value, yp_constant_id_t
|
3023
|
+
yp_local_variable_and_write_node_create(yp_parser_t *parser, yp_node_t *target, const yp_token_t *operator, yp_node_t *value, yp_constant_id_t name, uint32_t depth) {
|
2963
3024
|
assert(YP_NODE_TYPE_P(target, YP_NODE_LOCAL_VARIABLE_READ_NODE) || YP_NODE_TYPE_P(target, YP_NODE_CALL_NODE));
|
2964
3025
|
assert(operator->type == YP_TOKEN_AMPERSAND_AMPERSAND_EQUAL);
|
2965
3026
|
yp_local_variable_and_write_node_t *node = YP_ALLOC_NODE(parser, yp_local_variable_and_write_node_t);
|
@@ -2975,7 +3036,7 @@ yp_local_variable_and_write_node_create(yp_parser_t *parser, yp_node_t *target,
|
|
2975
3036
|
.name_loc = target->location,
|
2976
3037
|
.operator_loc = YP_LOCATION_TOKEN_VALUE(operator),
|
2977
3038
|
.value = value,
|
2978
|
-
.
|
3039
|
+
.name = name,
|
2979
3040
|
.depth = depth
|
2980
3041
|
};
|
2981
3042
|
|
@@ -2984,7 +3045,7 @@ yp_local_variable_and_write_node_create(yp_parser_t *parser, yp_node_t *target,
|
|
2984
3045
|
|
2985
3046
|
// Allocate and initialize a new LocalVariableOperatorWriteNode node.
|
2986
3047
|
static yp_local_variable_operator_write_node_t *
|
2987
|
-
yp_local_variable_operator_write_node_create(yp_parser_t *parser, yp_node_t *target, const yp_token_t *operator, yp_node_t *value, yp_constant_id_t
|
3048
|
+
yp_local_variable_operator_write_node_create(yp_parser_t *parser, yp_node_t *target, const yp_token_t *operator, yp_node_t *value, yp_constant_id_t name, uint32_t depth) {
|
2988
3049
|
yp_local_variable_operator_write_node_t *node = YP_ALLOC_NODE(parser, yp_local_variable_operator_write_node_t);
|
2989
3050
|
|
2990
3051
|
*node = (yp_local_variable_operator_write_node_t) {
|
@@ -2998,8 +3059,8 @@ yp_local_variable_operator_write_node_create(yp_parser_t *parser, yp_node_t *tar
|
|
2998
3059
|
.name_loc = target->location,
|
2999
3060
|
.operator_loc = YP_LOCATION_TOKEN_VALUE(operator),
|
3000
3061
|
.value = value,
|
3001
|
-
.
|
3002
|
-
.
|
3062
|
+
.name = name,
|
3063
|
+
.operator = yp_parser_constant_id_location(parser, operator->start, operator->end - 1),
|
3003
3064
|
.depth = depth
|
3004
3065
|
};
|
3005
3066
|
|
@@ -3008,7 +3069,7 @@ yp_local_variable_operator_write_node_create(yp_parser_t *parser, yp_node_t *tar
|
|
3008
3069
|
|
3009
3070
|
// Allocate and initialize a new LocalVariableOrWriteNode node.
|
3010
3071
|
static yp_local_variable_or_write_node_t *
|
3011
|
-
yp_local_variable_or_write_node_create(yp_parser_t *parser, yp_node_t *target, const yp_token_t *operator, yp_node_t *value, yp_constant_id_t
|
3072
|
+
yp_local_variable_or_write_node_create(yp_parser_t *parser, yp_node_t *target, const yp_token_t *operator, yp_node_t *value, yp_constant_id_t name, uint32_t depth) {
|
3012
3073
|
assert(YP_NODE_TYPE_P(target, YP_NODE_LOCAL_VARIABLE_READ_NODE) || YP_NODE_TYPE_P(target, YP_NODE_CALL_NODE));
|
3013
3074
|
assert(operator->type == YP_TOKEN_PIPE_PIPE_EQUAL);
|
3014
3075
|
yp_local_variable_or_write_node_t *node = YP_ALLOC_NODE(parser, yp_local_variable_or_write_node_t);
|
@@ -3024,7 +3085,7 @@ yp_local_variable_or_write_node_create(yp_parser_t *parser, yp_node_t *target, c
|
|
3024
3085
|
.name_loc = target->location,
|
3025
3086
|
.operator_loc = YP_LOCATION_TOKEN_VALUE(operator),
|
3026
3087
|
.value = value,
|
3027
|
-
.
|
3088
|
+
.name = name,
|
3028
3089
|
.depth = depth
|
3029
3090
|
};
|
3030
3091
|
|
@@ -3041,7 +3102,7 @@ yp_local_variable_read_node_create(yp_parser_t *parser, const yp_token_t *name,
|
|
3041
3102
|
.type = YP_NODE_LOCAL_VARIABLE_READ_NODE,
|
3042
3103
|
.location = YP_LOCATION_TOKEN_VALUE(name)
|
3043
3104
|
},
|
3044
|
-
.
|
3105
|
+
.name = yp_parser_constant_id_token(parser, name),
|
3045
3106
|
.depth = depth
|
3046
3107
|
};
|
3047
3108
|
|
@@ -3050,7 +3111,7 @@ yp_local_variable_read_node_create(yp_parser_t *parser, const yp_token_t *name,
|
|
3050
3111
|
|
3051
3112
|
// Allocate and initialize a new LocalVariableWriteNode node.
|
3052
3113
|
static yp_local_variable_write_node_t *
|
3053
|
-
yp_local_variable_write_node_create(yp_parser_t *parser, yp_constant_id_t
|
3114
|
+
yp_local_variable_write_node_create(yp_parser_t *parser, yp_constant_id_t name, uint32_t depth, yp_node_t *value, const yp_location_t *name_loc, const yp_token_t *operator) {
|
3054
3115
|
yp_local_variable_write_node_t *node = YP_ALLOC_NODE(parser, yp_local_variable_write_node_t);
|
3055
3116
|
|
3056
3117
|
*node = (yp_local_variable_write_node_t) {
|
@@ -3061,7 +3122,7 @@ yp_local_variable_write_node_create(yp_parser_t *parser, yp_constant_id_t consta
|
|
3061
3122
|
.end = value->location.end
|
3062
3123
|
}
|
3063
3124
|
},
|
3064
|
-
.
|
3125
|
+
.name = name,
|
3065
3126
|
.depth = depth,
|
3066
3127
|
.value = value,
|
3067
3128
|
.name_loc = *name_loc,
|
@@ -3081,7 +3142,7 @@ yp_local_variable_target_node_create(yp_parser_t *parser, const yp_token_t *name
|
|
3081
3142
|
.type = YP_NODE_LOCAL_VARIABLE_TARGET_NODE,
|
3082
3143
|
.location = YP_LOCATION_TOKEN_VALUE(name)
|
3083
3144
|
},
|
3084
|
-
.
|
3145
|
+
.name = yp_parser_constant_id_token(parser, name),
|
3085
3146
|
.depth = 0
|
3086
3147
|
};
|
3087
3148
|
|
@@ -3260,7 +3321,8 @@ yp_numbered_reference_read_node_create(yp_parser_t *parser, const yp_token_t *na
|
|
3260
3321
|
{
|
3261
3322
|
.type = YP_NODE_NUMBERED_REFERENCE_READ_NODE,
|
3262
3323
|
.location = YP_LOCATION_TOKEN_VALUE(name),
|
3263
|
-
}
|
3324
|
+
},
|
3325
|
+
.number = parse_decimal_number(parser, name->start + 1, name->end)
|
3264
3326
|
};
|
3265
3327
|
|
3266
3328
|
return node;
|
@@ -3279,7 +3341,7 @@ yp_optional_parameter_node_create(yp_parser_t *parser, const yp_token_t *name, c
|
|
3279
3341
|
.end = value->location.end
|
3280
3342
|
}
|
3281
3343
|
},
|
3282
|
-
.
|
3344
|
+
.name = yp_parser_constant_id_token(parser, name),
|
3283
3345
|
.name_loc = YP_LOCATION_TOKEN_VALUE(name),
|
3284
3346
|
.operator_loc = YP_LOCATION_TOKEN_VALUE(operator),
|
3285
3347
|
.value = value
|
@@ -3576,8 +3638,8 @@ yp_regular_expression_node_create(yp_parser_t *parser, const yp_token_t *opening
|
|
3576
3638
|
.type = YP_NODE_REGULAR_EXPRESSION_NODE,
|
3577
3639
|
.flags = yp_regular_expression_flags_create(closing),
|
3578
3640
|
.location = {
|
3579
|
-
.start = opening->start,
|
3580
|
-
.end = closing->end
|
3641
|
+
.start = MIN(opening->start, closing->start),
|
3642
|
+
.end = MAX(opening->end, closing->end)
|
3581
3643
|
}
|
3582
3644
|
},
|
3583
3645
|
.opening_loc = YP_LOCATION_TOKEN_VALUE(opening),
|
@@ -3630,7 +3692,7 @@ yp_required_parameter_node_create(yp_parser_t *parser, const yp_token_t *token)
|
|
3630
3692
|
.type = YP_NODE_REQUIRED_PARAMETER_NODE,
|
3631
3693
|
.location = YP_LOCATION_TOKEN_VALUE(token)
|
3632
3694
|
},
|
3633
|
-
.
|
3695
|
+
.name = yp_parser_constant_id_token(parser, token)
|
3634
3696
|
};
|
3635
3697
|
|
3636
3698
|
return node;
|
@@ -3881,19 +3943,21 @@ yp_statements_node_body_length(yp_statements_node_t *node) {
|
|
3881
3943
|
|
3882
3944
|
// Set the location of the given StatementsNode.
|
3883
3945
|
static void
|
3884
|
-
yp_statements_node_location_set(yp_statements_node_t *node, const
|
3946
|
+
yp_statements_node_location_set(yp_statements_node_t *node, const uint8_t *start, const uint8_t *end) {
|
3885
3947
|
node->base.location = (yp_location_t) { .start = start, .end = end };
|
3886
3948
|
}
|
3887
3949
|
|
3888
3950
|
// Append a new node to the given StatementsNode node's body.
|
3889
3951
|
static void
|
3890
3952
|
yp_statements_node_body_append(yp_statements_node_t *node, yp_node_t *statement) {
|
3891
|
-
if (yp_statements_node_body_length(node) == 0) {
|
3953
|
+
if (yp_statements_node_body_length(node) == 0 || statement->location.start < node->base.location.start) {
|
3892
3954
|
node->base.location.start = statement->location.start;
|
3893
3955
|
}
|
3956
|
+
if (statement->location.end > node->base.location.end) {
|
3957
|
+
node->base.location.end = statement->location.end;
|
3958
|
+
}
|
3894
3959
|
|
3895
3960
|
yp_node_list_append(&node->body, statement);
|
3896
|
-
node->base.location.end = statement->location.end;
|
3897
3961
|
|
3898
3962
|
// Every statement gets marked as a place where a newline can occur.
|
3899
3963
|
statement->flags |= YP_NODE_FLAG_NEWLINE;
|
@@ -3947,7 +4011,7 @@ yp_super_node_create(yp_parser_t *parser, const yp_token_t *keyword, yp_argument
|
|
3947
4011
|
assert(keyword->type == YP_TOKEN_KEYWORD_SUPER);
|
3948
4012
|
yp_super_node_t *node = YP_ALLOC_NODE(parser, yp_super_node_t);
|
3949
4013
|
|
3950
|
-
const
|
4014
|
+
const uint8_t *end;
|
3951
4015
|
if (arguments->block != NULL) {
|
3952
4016
|
end = arguments->block->base.location.end;
|
3953
4017
|
} else if (arguments->closing_loc.start != NULL) {
|
@@ -4038,7 +4102,7 @@ yp_symbol_node_label_create(yp_parser_t *parser, const yp_token_t *token) {
|
|
4038
4102
|
// Check if the given node is a label in a hash.
|
4039
4103
|
static bool
|
4040
4104
|
yp_symbol_node_label_p(yp_node_t *node) {
|
4041
|
-
const
|
4105
|
+
const uint8_t *end = NULL;
|
4042
4106
|
|
4043
4107
|
switch (YP_NODE_TYPE(node)) {
|
4044
4108
|
case YP_NODE_SYMBOL_NODE:
|
@@ -4146,7 +4210,7 @@ yp_unless_node_create(yp_parser_t *parser, const yp_token_t *keyword, yp_node_t
|
|
4146
4210
|
yp_flip_flop(predicate);
|
4147
4211
|
yp_unless_node_t *node = YP_ALLOC_NODE(parser, yp_unless_node_t);
|
4148
4212
|
|
4149
|
-
const
|
4213
|
+
const uint8_t *end;
|
4150
4214
|
if (statements != NULL) {
|
4151
4215
|
end = statements->base.location.end;
|
4152
4216
|
} else {
|
@@ -4363,7 +4427,7 @@ static yp_yield_node_t *
|
|
4363
4427
|
yp_yield_node_create(yp_parser_t *parser, const yp_token_t *keyword, const yp_location_t *lparen_loc, yp_arguments_node_t *arguments, const yp_location_t *rparen_loc) {
|
4364
4428
|
yp_yield_node_t *node = YP_ALLOC_NODE(parser, yp_yield_node_t);
|
4365
4429
|
|
4366
|
-
const
|
4430
|
+
const uint8_t *end;
|
4367
4431
|
if (rparen_loc->start != NULL) {
|
4368
4432
|
end = rparen_loc->end;
|
4369
4433
|
} else if (arguments != NULL) {
|
@@ -4437,7 +4501,7 @@ yp_parser_local_depth(yp_parser_t *parser, yp_token_t *token) {
|
|
4437
4501
|
|
4438
4502
|
// Add a local variable from a location to the current scope.
|
4439
4503
|
static yp_constant_id_t
|
4440
|
-
yp_parser_local_add_location(yp_parser_t *parser, const
|
4504
|
+
yp_parser_local_add_location(yp_parser_t *parser, const uint8_t *start, const uint8_t *end) {
|
4441
4505
|
yp_constant_id_t constant_id = yp_parser_constant_id_location(parser, start, end);
|
4442
4506
|
|
4443
4507
|
if (!yp_constant_id_list_includes(&parser->current_scope->locals, constant_id)) {
|
@@ -4486,15 +4550,13 @@ yp_parser_scope_pop(yp_parser_t *parser) {
|
|
4486
4550
|
// reason we have the encoding_changed boolean to check if we need to go through
|
4487
4551
|
// the function pointer or can just directly use the UTF-8 functions.
|
4488
4552
|
static inline size_t
|
4489
|
-
char_is_identifier_start(yp_parser_t *parser, const
|
4490
|
-
const unsigned char uc = (unsigned char) *c;
|
4491
|
-
|
4553
|
+
char_is_identifier_start(yp_parser_t *parser, const uint8_t *b) {
|
4492
4554
|
if (parser->encoding_changed) {
|
4493
|
-
return parser->encoding.alpha_char(
|
4494
|
-
} else if (
|
4495
|
-
return (yp_encoding_unicode_table[
|
4555
|
+
return parser->encoding.alpha_char(b, parser->end - b) || (*b == '_') || (*b >= 0x80);
|
4556
|
+
} else if (*b < 0x80) {
|
4557
|
+
return (yp_encoding_unicode_table[*b] & YP_ENCODING_ALPHABETIC_BIT ? 1 : 0) || (*b == '_');
|
4496
4558
|
} else {
|
4497
|
-
return (size_t) (yp_encoding_utf_8_alpha_char(
|
4559
|
+
return (size_t) (yp_encoding_utf_8_alpha_char(b, parser->end - b) || 1u);
|
4498
4560
|
}
|
4499
4561
|
}
|
4500
4562
|
|
@@ -4502,15 +4564,13 @@ char_is_identifier_start(yp_parser_t *parser, const char *c) {
|
|
4502
4564
|
// the identifiers in a source file once the first character has been found. So
|
4503
4565
|
// it's important that it be as fast as possible.
|
4504
4566
|
static inline size_t
|
4505
|
-
char_is_identifier(yp_parser_t *parser, const
|
4506
|
-
const unsigned char uc = (unsigned char) *c;
|
4507
|
-
|
4567
|
+
char_is_identifier(yp_parser_t *parser, const uint8_t *b) {
|
4508
4568
|
if (parser->encoding_changed) {
|
4509
|
-
return parser->encoding.alnum_char(
|
4510
|
-
} else if (
|
4511
|
-
return (yp_encoding_unicode_table[
|
4569
|
+
return parser->encoding.alnum_char(b, parser->end - b) || (*b == '_') || (*b >= 0x80);
|
4570
|
+
} else if (*b < 0x80) {
|
4571
|
+
return (yp_encoding_unicode_table[*b] & YP_ENCODING_ALPHANUMERIC_BIT ? 1 : 0) || (*b == '_');
|
4512
4572
|
} else {
|
4513
|
-
return (size_t) (yp_encoding_utf_8_alnum_char(
|
4573
|
+
return (size_t) (yp_encoding_utf_8_alnum_char(b, parser->end - b) || 1u);
|
4514
4574
|
}
|
4515
4575
|
}
|
4516
4576
|
|
@@ -4532,15 +4592,15 @@ const unsigned int yp_global_name_punctuation_hash[(0x7e - 0x20 + 31) / 32] = {
|
|
4532
4592
|
#undef PUNCT
|
4533
4593
|
|
4534
4594
|
static inline bool
|
4535
|
-
char_is_global_name_punctuation(const
|
4536
|
-
const unsigned int i = (const unsigned int)
|
4595
|
+
char_is_global_name_punctuation(const uint8_t b) {
|
4596
|
+
const unsigned int i = (const unsigned int) b;
|
4537
4597
|
if (i <= 0x20 || 0x7e < i) return false;
|
4538
4598
|
|
4539
|
-
return (yp_global_name_punctuation_hash[(i - 0x20) / 32] >> (
|
4599
|
+
return (yp_global_name_punctuation_hash[(i - 0x20) / 32] >> (i % 32)) & 1;
|
4540
4600
|
}
|
4541
4601
|
|
4542
4602
|
static inline bool
|
4543
|
-
token_is_numbered_parameter(const
|
4603
|
+
token_is_numbered_parameter(const uint8_t *start, const uint8_t *end) {
|
4544
4604
|
return (end - start == 2) && (start[0] == '_') && (start[1] != '0') && (yp_char_is_decimal_digit(start[1]));
|
4545
4605
|
}
|
4546
4606
|
|
@@ -4594,8 +4654,8 @@ yp_do_loop_stack_p(yp_parser_t *parser) {
|
|
4594
4654
|
|
4595
4655
|
// Get the next character in the source starting from +cursor+. If that position
|
4596
4656
|
// is beyond the end of the source then return '\0'.
|
4597
|
-
static inline
|
4598
|
-
peek_at(yp_parser_t *parser, const
|
4657
|
+
static inline uint8_t
|
4658
|
+
peek_at(yp_parser_t *parser, const uint8_t *cursor) {
|
4599
4659
|
if (cursor < parser->end) {
|
4600
4660
|
return *cursor;
|
4601
4661
|
} else {
|
@@ -4606,33 +4666,33 @@ peek_at(yp_parser_t *parser, const char *cursor) {
|
|
4606
4666
|
// Get the next character in the source starting from parser->current.end and
|
4607
4667
|
// adding the given offset. If that position is beyond the end of the source
|
4608
4668
|
// then return '\0'.
|
4609
|
-
static inline
|
4669
|
+
static inline uint8_t
|
4610
4670
|
peek_offset(yp_parser_t *parser, ptrdiff_t offset) {
|
4611
4671
|
return peek_at(parser, parser->current.end + offset);
|
4612
4672
|
}
|
4613
4673
|
|
4614
4674
|
// Get the next character in the source starting from parser->current.end. If
|
4615
4675
|
// that position is beyond the end of the source then return '\0'.
|
4616
|
-
static inline
|
4676
|
+
static inline uint8_t
|
4617
4677
|
peek(yp_parser_t *parser) {
|
4618
4678
|
return peek_at(parser, parser->current.end);
|
4619
4679
|
}
|
4620
4680
|
|
4621
4681
|
// Get the next string of length len in the source starting from parser->current.end.
|
4622
4682
|
// If the string extends beyond the end of the source, return the empty string ""
|
4623
|
-
static inline const
|
4683
|
+
static inline const uint8_t *
|
4624
4684
|
peek_string(yp_parser_t *parser, size_t len) {
|
4625
4685
|
if (parser->current.end + len <= parser->end) {
|
4626
4686
|
return parser->current.end;
|
4627
4687
|
} else {
|
4628
|
-
return "";
|
4688
|
+
return (const uint8_t *) "";
|
4629
4689
|
}
|
4630
4690
|
}
|
4631
4691
|
|
4632
4692
|
// If the character to be read matches the given value, then returns true and
|
4633
4693
|
// advanced the current pointer.
|
4634
4694
|
static inline bool
|
4635
|
-
match(yp_parser_t *parser,
|
4695
|
+
match(yp_parser_t *parser, uint8_t value) {
|
4636
4696
|
if (peek(parser) == value) {
|
4637
4697
|
parser->current.end++;
|
4638
4698
|
return true;
|
@@ -4643,7 +4703,7 @@ match(yp_parser_t *parser, char value) {
|
|
4643
4703
|
// Return the length of the line ending string starting at +cursor+, or 0 if it
|
4644
4704
|
// is not a line ending. This function is intended to be CRLF/LF agnostic.
|
4645
4705
|
static inline size_t
|
4646
|
-
match_eol_at(yp_parser_t *parser, const
|
4706
|
+
match_eol_at(yp_parser_t *parser, const uint8_t *cursor) {
|
4647
4707
|
if (peek_at(parser, cursor) == '\n') {
|
4648
4708
|
return 1;
|
4649
4709
|
}
|
@@ -4670,8 +4730,8 @@ match_eol(yp_parser_t *parser) {
|
|
4670
4730
|
}
|
4671
4731
|
|
4672
4732
|
// Skip to the next newline character or NUL byte.
|
4673
|
-
static inline const
|
4674
|
-
next_newline(const
|
4733
|
+
static inline const uint8_t *
|
4734
|
+
next_newline(const uint8_t *cursor, ptrdiff_t length) {
|
4675
4735
|
assert(length >= 0);
|
4676
4736
|
|
4677
4737
|
// Note that it's okay for us to use memchr here to look for \n because none
|
@@ -4682,17 +4742,17 @@ next_newline(const char *cursor, ptrdiff_t length) {
|
|
4682
4742
|
|
4683
4743
|
// Find the start of the encoding comment. This is effectively an inlined
|
4684
4744
|
// version of strnstr with some modifications.
|
4685
|
-
static inline const
|
4686
|
-
parser_lex_encoding_comment_start(yp_parser_t *parser, const
|
4745
|
+
static inline const uint8_t *
|
4746
|
+
parser_lex_encoding_comment_start(yp_parser_t *parser, const uint8_t *cursor, ptrdiff_t remaining) {
|
4687
4747
|
assert(remaining >= 0);
|
4688
4748
|
size_t length = (size_t) remaining;
|
4689
4749
|
|
4690
4750
|
size_t key_length = strlen("coding:");
|
4691
4751
|
if (key_length > length) return NULL;
|
4692
4752
|
|
4693
|
-
const
|
4753
|
+
const uint8_t *cursor_limit = cursor + length - key_length + 1;
|
4694
4754
|
while ((cursor = yp_memchr(cursor, 'c', (size_t) (cursor_limit - cursor), parser->encoding_changed, &parser->encoding)) != NULL) {
|
4695
|
-
if (
|
4755
|
+
if (memcmp(cursor, "coding", key_length - 1) == 0) {
|
4696
4756
|
size_t whitespace_after_coding = yp_strspn_inline_whitespace(cursor + key_length - 1, parser->end - (cursor + key_length - 1));
|
4697
4757
|
size_t cur_pos = key_length + whitespace_after_coding;
|
4698
4758
|
|
@@ -4711,13 +4771,13 @@ parser_lex_encoding_comment_start(yp_parser_t *parser, const char *cursor, ptrdi
|
|
4711
4771
|
// actions are necessary for it here.
|
4712
4772
|
static void
|
4713
4773
|
parser_lex_encoding_comment(yp_parser_t *parser) {
|
4714
|
-
const
|
4715
|
-
const
|
4774
|
+
const uint8_t *start = parser->current.start + 1;
|
4775
|
+
const uint8_t *end = next_newline(start, parser->end - start);
|
4716
4776
|
if (end == NULL) end = parser->end;
|
4717
4777
|
|
4718
4778
|
// These are the patterns we're going to match to find the encoding comment.
|
4719
4779
|
// This is definitely not complete or even really correct.
|
4720
|
-
const
|
4780
|
+
const uint8_t *encoding_start = parser_lex_encoding_comment_start(parser, start, end - start);
|
4721
4781
|
|
4722
4782
|
// If we didn't find anything that matched our patterns, then return. Note
|
4723
4783
|
// that this does a _very_ poor job of actually finding the encoding, and
|
@@ -4730,7 +4790,7 @@ parser_lex_encoding_comment(yp_parser_t *parser) {
|
|
4730
4790
|
|
4731
4791
|
// Now determine the end of the encoding string. This is either the end of
|
4732
4792
|
// the line, the first whitespace character, or a punctuation mark.
|
4733
|
-
const
|
4793
|
+
const uint8_t *encoding_end = yp_strpbrk(parser, encoding_start, (const uint8_t *) " \t\f\r\v\n;,", end - encoding_start);
|
4734
4794
|
encoding_end = encoding_end == NULL ? end : encoding_end;
|
4735
4795
|
|
4736
4796
|
// Finally, we can determine the width of the encoding string.
|
@@ -4752,7 +4812,7 @@ parser_lex_encoding_comment(yp_parser_t *parser) {
|
|
4752
4812
|
// Extensions like utf-8 can contain extra encoding details like,
|
4753
4813
|
// utf-8-dos, utf-8-linux, utf-8-mac. We treat these all as utf-8 should
|
4754
4814
|
// treat any encoding starting utf-8 as utf-8.
|
4755
|
-
if ((encoding_start + 5 <= parser->end) && (yp_strncasecmp(encoding_start, "utf-8", 5) == 0)) {
|
4815
|
+
if ((encoding_start + 5 <= parser->end) && (yp_strncasecmp(encoding_start, (const uint8_t *) "utf-8", 5) == 0)) {
|
4756
4816
|
// We don't need to do anything here because the default encoding is
|
4757
4817
|
// already UTF-8. We'll just return.
|
4758
4818
|
return;
|
@@ -4761,7 +4821,7 @@ parser_lex_encoding_comment(yp_parser_t *parser) {
|
|
4761
4821
|
// Next, we're going to loop through each of the encodings that we handle
|
4762
4822
|
// explicitly. If we found one that we understand, we'll use that value.
|
4763
4823
|
#define ENCODING(value, prebuilt) \
|
4764
|
-
if (width == sizeof(value) - 1 && encoding_start + width <= parser->end && yp_strncasecmp(encoding_start, value, width) == 0) { \
|
4824
|
+
if (width == sizeof(value) - 1 && encoding_start + width <= parser->end && yp_strncasecmp(encoding_start, (const uint8_t *) value, width) == 0) { \
|
4765
4825
|
parser->encoding = prebuilt; \
|
4766
4826
|
parser->encoding_changed |= true; \
|
4767
4827
|
if (parser->encoding_changed_callback != NULL) parser->encoding_changed_callback(parser); \
|
@@ -4901,14 +4961,9 @@ context_push(yp_parser_t *parser, yp_context_t context) {
|
|
4901
4961
|
|
4902
4962
|
static void
|
4903
4963
|
context_pop(yp_parser_t *parser) {
|
4904
|
-
|
4905
|
-
|
4906
|
-
|
4907
|
-
} else {
|
4908
|
-
yp_context_node_t *prev = parser->current_context->prev;
|
4909
|
-
free(parser->current_context);
|
4910
|
-
parser->current_context = prev;
|
4911
|
-
}
|
4964
|
+
yp_context_node_t *prev = parser->current_context->prev;
|
4965
|
+
free(parser->current_context);
|
4966
|
+
parser->current_context = prev;
|
4912
4967
|
}
|
4913
4968
|
|
4914
4969
|
static bool
|
@@ -4992,7 +5047,8 @@ lex_numeric_prefix(yp_parser_t *parser) {
|
|
4992
5047
|
// 0d1111 is a decimal number
|
4993
5048
|
case 'd':
|
4994
5049
|
case 'D':
|
4995
|
-
|
5050
|
+
parser->current.end++;
|
5051
|
+
if (yp_char_is_decimal_digit(peek(parser))) {
|
4996
5052
|
parser->current.end += yp_strspn_decimal_number(parser->current.end, parser->end - parser->current.end);
|
4997
5053
|
} else {
|
4998
5054
|
yp_diagnostic_list_append(&parser->error_list, parser->current.start, parser->current.end, "Invalid decimal number.");
|
@@ -5003,7 +5059,8 @@ lex_numeric_prefix(yp_parser_t *parser) {
|
|
5003
5059
|
// 0b1111 is a binary number
|
5004
5060
|
case 'b':
|
5005
5061
|
case 'B':
|
5006
|
-
|
5062
|
+
parser->current.end++;
|
5063
|
+
if (yp_char_is_binary_digit(peek(parser))) {
|
5007
5064
|
parser->current.end += yp_strspn_binary_number(parser->current.end, parser->end - parser->current.end);
|
5008
5065
|
} else {
|
5009
5066
|
yp_diagnostic_list_append(&parser->error_list, parser->current.start, parser->current.end, "Invalid binary number.");
|
@@ -5014,7 +5071,8 @@ lex_numeric_prefix(yp_parser_t *parser) {
|
|
5014
5071
|
// 0o1111 is an octal number
|
5015
5072
|
case 'o':
|
5016
5073
|
case 'O':
|
5017
|
-
|
5074
|
+
parser->current.end++;
|
5075
|
+
if (yp_char_is_octal_digit(peek(parser))) {
|
5018
5076
|
parser->current.end += yp_strspn_octal_number(parser->current.end, parser->end - parser->current.end);
|
5019
5077
|
} else {
|
5020
5078
|
yp_diagnostic_list_append(&parser->error_list, parser->current.start, parser->current.end, "Invalid octal number.");
|
@@ -5038,7 +5096,8 @@ lex_numeric_prefix(yp_parser_t *parser) {
|
|
5038
5096
|
// 0x1111 is a hexadecimal number
|
5039
5097
|
case 'x':
|
5040
5098
|
case 'X':
|
5041
|
-
|
5099
|
+
parser->current.end++;
|
5100
|
+
if (yp_char_is_hexadecimal_digit(peek(parser))) {
|
5042
5101
|
parser->current.end += yp_strspn_hexadecimal_number(parser->current.end, parser->end - parser->current.end);
|
5043
5102
|
} else {
|
5044
5103
|
yp_diagnostic_list_append(&parser->error_list, parser->current.start, parser->current.end, "Invalid hexadecimal number.");
|
@@ -5084,7 +5143,7 @@ lex_numeric(yp_parser_t *parser) {
|
|
5084
5143
|
if (parser->current.end < parser->end) {
|
5085
5144
|
type = lex_numeric_prefix(parser);
|
5086
5145
|
|
5087
|
-
const
|
5146
|
+
const uint8_t *end = parser->current.end;
|
5088
5147
|
yp_token_type_t suffix_type = type;
|
5089
5148
|
|
5090
5149
|
if (type == YP_TOKEN_INTEGER) {
|
@@ -5109,8 +5168,8 @@ lex_numeric(yp_parser_t *parser) {
|
|
5109
5168
|
}
|
5110
5169
|
}
|
5111
5170
|
|
5112
|
-
const
|
5113
|
-
if (
|
5171
|
+
const uint8_t b = peek(parser);
|
5172
|
+
if (b != '\0' && (b >= 0x80 || ((b >= 'a' && b <= 'z') || (b >= 'A' && b <= 'Z')) || b == '_')) {
|
5114
5173
|
parser->current.end = end;
|
5115
5174
|
} else {
|
5116
5175
|
type = suffix_type;
|
@@ -5122,6 +5181,11 @@ lex_numeric(yp_parser_t *parser) {
|
|
5122
5181
|
|
5123
5182
|
static yp_token_type_t
|
5124
5183
|
lex_global_variable(yp_parser_t *parser) {
|
5184
|
+
if (parser->current.end >= parser->end) {
|
5185
|
+
yp_diagnostic_list_append(&parser->error_list, parser->current.start, parser->current.end, "Invalid global variable.");
|
5186
|
+
return YP_TOKEN_GLOBAL_VARIABLE;
|
5187
|
+
}
|
5188
|
+
|
5125
5189
|
switch (*parser->current.end) {
|
5126
5190
|
case '~': // $~: match-data
|
5127
5191
|
case '*': // $*: argv
|
@@ -5210,7 +5274,7 @@ lex_keyword(yp_parser_t *parser, const char *value, yp_lex_state_t state, yp_tok
|
|
5210
5274
|
yp_lex_state_t last_state = parser->lex_state;
|
5211
5275
|
|
5212
5276
|
const size_t vlen = strlen(value);
|
5213
|
-
if (parser->current.start + vlen <= parser->end &&
|
5277
|
+
if (parser->current.start + vlen <= parser->end && memcmp(parser->current.start, value, vlen) == 0) {
|
5214
5278
|
if (parser->lex_state & YP_LEX_STATE_FNAME) {
|
5215
5279
|
lex_state_set(parser, YP_LEX_STATE_ENDFN);
|
5216
5280
|
} else {
|
@@ -5376,7 +5440,7 @@ current_token_starts_line(yp_parser_t *parser) {
|
|
5376
5440
|
// this token type.
|
5377
5441
|
//
|
5378
5442
|
static yp_token_type_t
|
5379
|
-
lex_interpolation(yp_parser_t *parser, const
|
5443
|
+
lex_interpolation(yp_parser_t *parser, const uint8_t *pound) {
|
5380
5444
|
// If there is no content following this #, then we're at the end of
|
5381
5445
|
// the string and we can safely return string content.
|
5382
5446
|
if (pound + 1 >= parser->end) {
|
@@ -5397,7 +5461,7 @@ lex_interpolation(yp_parser_t *parser, const char *pound) {
|
|
5397
5461
|
|
5398
5462
|
// If we're looking at a @ and there's another @, then we'll skip past the
|
5399
5463
|
// second @.
|
5400
|
-
const
|
5464
|
+
const uint8_t *variable = pound + 2;
|
5401
5465
|
if (*variable == '@' && pound + 3 < parser->end) variable++;
|
5402
5466
|
|
5403
5467
|
if (char_is_identifier_start(parser, variable)) {
|
@@ -5433,7 +5497,7 @@ lex_interpolation(yp_parser_t *parser, const char *pound) {
|
|
5433
5497
|
// This is the character that we're going to check to see if it is the
|
5434
5498
|
// start of an identifier that would indicate that this is a global
|
5435
5499
|
// variable.
|
5436
|
-
const
|
5500
|
+
const uint8_t *check = pound + 2;
|
5437
5501
|
|
5438
5502
|
if (pound[2] == '-') {
|
5439
5503
|
if (pound + 3 >= parser->end) {
|
@@ -5624,7 +5688,7 @@ parser_comment(yp_parser_t *parser, yp_comment_type_t type) {
|
|
5624
5688
|
static yp_token_type_t
|
5625
5689
|
lex_embdoc(yp_parser_t *parser) {
|
5626
5690
|
// First, lex out the EMBDOC_BEGIN token.
|
5627
|
-
const
|
5691
|
+
const uint8_t *newline = next_newline(parser->current.end, parser->end - parser->current.end);
|
5628
5692
|
|
5629
5693
|
if (newline == NULL) {
|
5630
5694
|
parser->current.end = parser->end;
|
@@ -5647,9 +5711,9 @@ lex_embdoc(yp_parser_t *parser) {
|
|
5647
5711
|
|
5648
5712
|
// If we've hit the end of the embedded documentation then we'll return that
|
5649
5713
|
// token here.
|
5650
|
-
if (
|
5714
|
+
if (memcmp(parser->current.end, "=end", 4) == 0 &&
|
5651
5715
|
(parser->current.end + 4 == parser->end || yp_char_is_whitespace(parser->current.end[4]))) {
|
5652
|
-
const
|
5716
|
+
const uint8_t *newline = next_newline(parser->current.end, parser->end - parser->current.end);
|
5653
5717
|
|
5654
5718
|
if (newline == NULL) {
|
5655
5719
|
parser->current.end = parser->end;
|
@@ -5669,7 +5733,7 @@ lex_embdoc(yp_parser_t *parser) {
|
|
5669
5733
|
|
5670
5734
|
// Otherwise, we'll parse until the end of the line and return a line of
|
5671
5735
|
// embedded documentation.
|
5672
|
-
const
|
5736
|
+
const uint8_t *newline = next_newline(parser->current.end, parser->end - parser->current.end);
|
5673
5737
|
|
5674
5738
|
if (newline == NULL) {
|
5675
5739
|
parser->current.end = parser->end;
|
@@ -5819,7 +5883,7 @@ parser_lex(yp_parser_t *parser) {
|
|
5819
5883
|
LEX(YP_TOKEN_EOF);
|
5820
5884
|
|
5821
5885
|
case '#': { // comments
|
5822
|
-
const
|
5886
|
+
const uint8_t *ending = next_newline(parser->current.end, parser->end - parser->current.end);
|
5823
5887
|
|
5824
5888
|
parser->current.end = ending == NULL ? parser->end : ending + 1;
|
5825
5889
|
parser->current.type = YP_TOKEN_COMMENT;
|
@@ -5888,7 +5952,7 @@ parser_lex(yp_parser_t *parser) {
|
|
5888
5952
|
// (either . or &.) that starts the next line. If there is, then this
|
5889
5953
|
// is going to become an ignored newline and we're going to instead
|
5890
5954
|
// return the call operator.
|
5891
|
-
const
|
5955
|
+
const uint8_t *next_content = parser->next_start == NULL ? parser->current.end : parser->next_start;
|
5892
5956
|
next_content += yp_strspn_inline_whitespace(next_content, parser->end - next_content);
|
5893
5957
|
|
5894
5958
|
if (next_content < parser->end) {
|
@@ -5899,15 +5963,15 @@ parser_lex(yp_parser_t *parser) {
|
|
5899
5963
|
// Otherwise we'll return a regular newline.
|
5900
5964
|
if (next_content[0] == '#') {
|
5901
5965
|
// Here we look for a "." or "&." following a "\n".
|
5902
|
-
const
|
5966
|
+
const uint8_t *following = next_newline(next_content, parser->end - next_content);
|
5903
5967
|
|
5904
|
-
while (following && (following < parser->end)) {
|
5968
|
+
while (following && (following + 1 < parser->end)) {
|
5905
5969
|
following++;
|
5906
5970
|
following += yp_strspn_inline_whitespace(following, parser->end - following);
|
5907
5971
|
|
5908
5972
|
// If this is not followed by a comment, then we can break out
|
5909
5973
|
// of this loop.
|
5910
|
-
if (
|
5974
|
+
if (peek_at(parser, following) != '#') break;
|
5911
5975
|
|
5912
5976
|
// If there is a comment, then we need to find the end of the
|
5913
5977
|
// comment and continue searching from there.
|
@@ -6150,7 +6214,7 @@ parser_lex(yp_parser_t *parser) {
|
|
6150
6214
|
|
6151
6215
|
// = => =~ == === =begin
|
6152
6216
|
case '=':
|
6153
|
-
if (current_token_starts_line(parser) &&
|
6217
|
+
if (current_token_starts_line(parser) && memcmp(peek_string(parser, 5), "begin", 5) == 0 && yp_char_is_whitespace(peek_offset(parser, 5))) {
|
6154
6218
|
yp_token_type_t type = lex_embdoc(parser);
|
6155
6219
|
|
6156
6220
|
if (type == YP_TOKEN_EOF) {
|
@@ -6188,7 +6252,7 @@ parser_lex(yp_parser_t *parser) {
|
|
6188
6252
|
!lex_state_end_p(parser) &&
|
6189
6253
|
(!lex_state_p(parser, YP_LEX_STATE_ARG_ANY) || lex_state_p(parser, YP_LEX_STATE_LABELED) || space_seen)
|
6190
6254
|
) {
|
6191
|
-
const
|
6255
|
+
const uint8_t *end = parser->current.end;
|
6192
6256
|
|
6193
6257
|
yp_heredoc_quote_t quote = YP_HEREDOC_QUOTE_NONE;
|
6194
6258
|
yp_heredoc_indent_t indent = YP_HEREDOC_INDENT_NONE;
|
@@ -6210,7 +6274,7 @@ parser_lex(yp_parser_t *parser) {
|
|
6210
6274
|
quote = YP_HEREDOC_QUOTE_SINGLE;
|
6211
6275
|
}
|
6212
6276
|
|
6213
|
-
const
|
6277
|
+
const uint8_t *ident_start = parser->current.end;
|
6214
6278
|
size_t width = 0;
|
6215
6279
|
|
6216
6280
|
if (parser->current.end >= parser->end) {
|
@@ -6233,7 +6297,7 @@ parser_lex(yp_parser_t *parser) {
|
|
6233
6297
|
}
|
6234
6298
|
|
6235
6299
|
size_t ident_length = (size_t) (parser->current.end - ident_start);
|
6236
|
-
if (quote != YP_HEREDOC_QUOTE_NONE && !match(parser, (
|
6300
|
+
if (quote != YP_HEREDOC_QUOTE_NONE && !match(parser, (uint8_t) quote)) {
|
6237
6301
|
// TODO: handle unterminated heredoc
|
6238
6302
|
}
|
6239
6303
|
|
@@ -6249,7 +6313,7 @@ parser_lex(yp_parser_t *parser) {
|
|
6249
6313
|
});
|
6250
6314
|
|
6251
6315
|
if (parser->heredoc_end == NULL) {
|
6252
|
-
const
|
6316
|
+
const uint8_t *body_start = next_newline(parser->current.end, parser->end - parser->current.end);
|
6253
6317
|
|
6254
6318
|
if (body_start == NULL) {
|
6255
6319
|
// If there is no newline after the heredoc identifier, then
|
@@ -6574,7 +6638,7 @@ parser_lex(yp_parser_t *parser) {
|
|
6574
6638
|
LEX(YP_TOKEN_COLON_COLON);
|
6575
6639
|
}
|
6576
6640
|
|
6577
|
-
if (lex_state_end_p(parser) || yp_char_is_whitespace(
|
6641
|
+
if (lex_state_end_p(parser) || yp_char_is_whitespace(peek(parser)) || peek(parser) == '#') {
|
6578
6642
|
lex_state_set(parser, YP_LEX_STATE_BEG);
|
6579
6643
|
LEX(YP_TOKEN_COLON);
|
6580
6644
|
}
|
@@ -6815,7 +6879,7 @@ parser_lex(yp_parser_t *parser) {
|
|
6815
6879
|
if (
|
6816
6880
|
((parser->current.end - parser->current.start) == 7) &&
|
6817
6881
|
current_token_starts_line(parser) &&
|
6818
|
-
(
|
6882
|
+
(memcmp(parser->current.start, "__END__", 7) == 0) &&
|
6819
6883
|
(parser->current.end == parser->end || match_eol(parser))
|
6820
6884
|
)
|
6821
6885
|
{
|
@@ -6891,8 +6955,8 @@ parser_lex(yp_parser_t *parser) {
|
|
6891
6955
|
// Here we'll get a list of the places where strpbrk should break,
|
6892
6956
|
// and then find the first one.
|
6893
6957
|
yp_lex_mode_t *lex_mode = parser->lex_modes.current;
|
6894
|
-
const
|
6895
|
-
const
|
6958
|
+
const uint8_t *breakpoints = lex_mode->as.list.breakpoints;
|
6959
|
+
const uint8_t *breakpoint = yp_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end);
|
6896
6960
|
|
6897
6961
|
while (breakpoint != NULL) {
|
6898
6962
|
// If we hit a null byte, skip directly past it.
|
@@ -6940,10 +7004,25 @@ parser_lex(yp_parser_t *parser) {
|
|
6940
7004
|
if (*breakpoint == '\\') {
|
6941
7005
|
yp_unescape_type_t unescape_type = lex_mode->as.list.interpolation ? YP_UNESCAPE_ALL : YP_UNESCAPE_MINIMAL;
|
6942
7006
|
size_t difference = yp_unescape_calculate_difference(parser, breakpoint, unescape_type, false);
|
7007
|
+
if (difference == 0) {
|
7008
|
+
// we're at the end of the file
|
7009
|
+
breakpoint = NULL;
|
7010
|
+
continue;
|
7011
|
+
}
|
6943
7012
|
|
6944
|
-
// If the result is an escaped newline
|
6945
|
-
|
6946
|
-
|
7013
|
+
// If the result is an escaped newline ...
|
7014
|
+
if (breakpoint[difference - 1] == '\n') {
|
7015
|
+
if (parser->heredoc_end) {
|
7016
|
+
// ... if we are on the same line as a heredoc, flush the heredoc and
|
7017
|
+
// continue parsing after heredoc_end.
|
7018
|
+
parser->current.end = breakpoint + difference;
|
7019
|
+
parser_flush_heredoc_end(parser);
|
7020
|
+
LEX(YP_TOKEN_STRING_CONTENT);
|
7021
|
+
} else {
|
7022
|
+
// ... else track the newline.
|
7023
|
+
yp_newline_list_append(&parser->newline_list, breakpoint + difference - 1);
|
7024
|
+
}
|
7025
|
+
}
|
6947
7026
|
|
6948
7027
|
breakpoint = yp_strpbrk(parser, breakpoint + difference, breakpoints, parser->end - (breakpoint + difference));
|
6949
7028
|
continue;
|
@@ -6998,8 +7077,8 @@ parser_lex(yp_parser_t *parser) {
|
|
6998
7077
|
// These are the places where we need to split up the content of the
|
6999
7078
|
// regular expression. We'll use strpbrk to find the first of these
|
7000
7079
|
// characters.
|
7001
|
-
const
|
7002
|
-
const
|
7080
|
+
const uint8_t *breakpoints = lex_mode->as.regexp.breakpoints;
|
7081
|
+
const uint8_t *breakpoint = yp_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end);
|
7003
7082
|
|
7004
7083
|
while (breakpoint != NULL) {
|
7005
7084
|
// If we hit a null byte, skip directly past it.
|
@@ -7062,9 +7141,14 @@ parser_lex(yp_parser_t *parser) {
|
|
7062
7141
|
// and find the next breakpoint.
|
7063
7142
|
if (*breakpoint == '\\') {
|
7064
7143
|
size_t difference = yp_unescape_calculate_difference(parser, breakpoint, YP_UNESCAPE_ALL, false);
|
7144
|
+
if (difference == 0) {
|
7145
|
+
// we're at the end of the file
|
7146
|
+
breakpoint = NULL;
|
7147
|
+
continue;
|
7148
|
+
}
|
7065
7149
|
|
7066
7150
|
// If the result is an escaped newline ...
|
7067
|
-
if (
|
7151
|
+
if (breakpoint[difference - 1] == '\n') {
|
7068
7152
|
if (parser->heredoc_end) {
|
7069
7153
|
// ... if we are on the same line as a heredoc, flush the heredoc and
|
7070
7154
|
// continue parsing after heredoc_end.
|
@@ -7126,8 +7210,8 @@ parser_lex(yp_parser_t *parser) {
|
|
7126
7210
|
|
7127
7211
|
// These are the places where we need to split up the content of the
|
7128
7212
|
// string. We'll use strpbrk to find the first of these characters.
|
7129
|
-
const
|
7130
|
-
const
|
7213
|
+
const uint8_t *breakpoints = parser->lex_modes.current->as.string.breakpoints;
|
7214
|
+
const uint8_t *breakpoint = yp_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end);
|
7131
7215
|
|
7132
7216
|
while (breakpoint != NULL) {
|
7133
7217
|
// If we hit the incrementor, then we'll increment then nesting and
|
@@ -7212,9 +7296,14 @@ parser_lex(yp_parser_t *parser) {
|
|
7212
7296
|
// find the next breakpoint.
|
7213
7297
|
yp_unescape_type_t unescape_type = parser->lex_modes.current->as.string.interpolation ? YP_UNESCAPE_ALL : YP_UNESCAPE_MINIMAL;
|
7214
7298
|
size_t difference = yp_unescape_calculate_difference(parser, breakpoint, unescape_type, false);
|
7299
|
+
if (difference == 0) {
|
7300
|
+
// we're at the end of the file
|
7301
|
+
breakpoint = NULL;
|
7302
|
+
break;
|
7303
|
+
}
|
7215
7304
|
|
7216
7305
|
// If the result is an escaped newline ...
|
7217
|
-
if (
|
7306
|
+
if (breakpoint[difference - 1] == '\n') {
|
7218
7307
|
if (parser->heredoc_end) {
|
7219
7308
|
// ... if we are on the same line as a heredoc, flush the heredoc and
|
7220
7309
|
// continue parsing after heredoc_end.
|
@@ -7272,18 +7361,18 @@ parser_lex(yp_parser_t *parser) {
|
|
7272
7361
|
|
7273
7362
|
// Now let's grab the information about the identifier off of the current
|
7274
7363
|
// lex mode.
|
7275
|
-
const
|
7364
|
+
const uint8_t *ident_start = parser->lex_modes.current->as.heredoc.ident_start;
|
7276
7365
|
size_t ident_length = parser->lex_modes.current->as.heredoc.ident_length;
|
7277
7366
|
|
7278
7367
|
// If we are immediately following a newline and we have hit the
|
7279
7368
|
// terminator, then we need to return the ending of the heredoc.
|
7280
7369
|
if (current_token_starts_line(parser)) {
|
7281
|
-
const
|
7370
|
+
const uint8_t *start = parser->current.start;
|
7282
7371
|
if (parser->lex_modes.current->as.heredoc.indent != YP_HEREDOC_INDENT_NONE) {
|
7283
7372
|
start += yp_strspn_inline_whitespace(start, parser->end - start);
|
7284
7373
|
}
|
7285
7374
|
|
7286
|
-
if ((start + ident_length <= parser->end) && (
|
7375
|
+
if ((start + ident_length <= parser->end) && (memcmp(start, ident_start, ident_length) == 0)) {
|
7287
7376
|
bool matched = true;
|
7288
7377
|
bool at_end = false;
|
7289
7378
|
|
@@ -7318,14 +7407,14 @@ parser_lex(yp_parser_t *parser) {
|
|
7318
7407
|
// Otherwise we'll be parsing string content. These are the places where
|
7319
7408
|
// we need to split up the content of the heredoc. We'll use strpbrk to
|
7320
7409
|
// find the first of these characters.
|
7321
|
-
|
7410
|
+
uint8_t breakpoints[] = "\n\\#";
|
7322
7411
|
|
7323
7412
|
yp_heredoc_quote_t quote = parser->lex_modes.current->as.heredoc.quote;
|
7324
7413
|
if (quote == YP_HEREDOC_QUOTE_SINGLE) {
|
7325
7414
|
breakpoints[2] = '\0';
|
7326
7415
|
}
|
7327
7416
|
|
7328
|
-
const
|
7417
|
+
const uint8_t *breakpoint = yp_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end);
|
7329
7418
|
|
7330
7419
|
while (breakpoint != NULL) {
|
7331
7420
|
switch (*breakpoint) {
|
@@ -7342,7 +7431,7 @@ parser_lex(yp_parser_t *parser) {
|
|
7342
7431
|
|
7343
7432
|
yp_newline_list_append(&parser->newline_list, breakpoint);
|
7344
7433
|
|
7345
|
-
const
|
7434
|
+
const uint8_t *start = breakpoint + 1;
|
7346
7435
|
if (parser->lex_modes.current->as.heredoc.indent != YP_HEREDOC_INDENT_NONE) {
|
7347
7436
|
start += yp_strspn_inline_whitespace(start, parser->end - start);
|
7348
7437
|
}
|
@@ -7353,7 +7442,7 @@ parser_lex(yp_parser_t *parser) {
|
|
7353
7442
|
// again and return the end of the heredoc.
|
7354
7443
|
if (
|
7355
7444
|
(start + ident_length <= parser->end) &&
|
7356
|
-
(
|
7445
|
+
(memcmp(start, ident_start, ident_length) == 0)
|
7357
7446
|
) {
|
7358
7447
|
// Heredoc terminators must be followed by a newline, CRLF, or EOF to be valid.
|
7359
7448
|
if (
|
@@ -7383,6 +7472,11 @@ parser_lex(yp_parser_t *parser) {
|
|
7383
7472
|
} else {
|
7384
7473
|
yp_unescape_type_t unescape_type = (quote == YP_HEREDOC_QUOTE_SINGLE) ? YP_UNESCAPE_MINIMAL : YP_UNESCAPE_ALL;
|
7385
7474
|
size_t difference = yp_unescape_calculate_difference(parser, breakpoint, unescape_type, false);
|
7475
|
+
if (difference == 0) {
|
7476
|
+
// we're at the end of the file
|
7477
|
+
breakpoint = NULL;
|
7478
|
+
break;
|
7479
|
+
}
|
7386
7480
|
|
7387
7481
|
yp_newline_list_check_append(&parser->newline_list, breakpoint + difference - 1);
|
7388
7482
|
|
@@ -7453,6 +7547,17 @@ yp_symbol_node_create_and_unescape(yp_parser_t *parser, const yp_token_t *openin
|
|
7453
7547
|
return node;
|
7454
7548
|
}
|
7455
7549
|
|
7550
|
+
static yp_string_node_t *
|
7551
|
+
yp_char_literal_node_create_and_unescape(yp_parser_t *parser, const yp_token_t *opening, const yp_token_t *content, const yp_token_t *closing, yp_unescape_type_t unescape_type) {
|
7552
|
+
yp_string_node_t *node = yp_string_node_create(parser, opening, content, closing);
|
7553
|
+
|
7554
|
+
assert((content->end - content->start) >= 0);
|
7555
|
+
yp_string_shared_init(&node->unescaped, content->start, content->end);
|
7556
|
+
|
7557
|
+
yp_unescape_manipulate_char_literal(parser, &node->unescaped, unescape_type);
|
7558
|
+
return node;
|
7559
|
+
}
|
7560
|
+
|
7456
7561
|
static yp_string_node_t *
|
7457
7562
|
yp_string_node_create_and_unescape(yp_parser_t *parser, const yp_token_t *opening, const yp_token_t *content, const yp_token_t *closing, yp_unescape_type_t unescape_type) {
|
7458
7563
|
yp_string_node_t *node = yp_string_node_create(parser, opening, content, closing);
|
@@ -7918,10 +8023,11 @@ parse_target(yp_parser_t *parser, yp_node_t *target) {
|
|
7918
8023
|
// the previous method name in, and append an =.
|
7919
8024
|
size_t length = yp_string_length(&call->name);
|
7920
8025
|
|
7921
|
-
|
8026
|
+
uint8_t *name = calloc(length + 1, sizeof(uint8_t));
|
7922
8027
|
if (name == NULL) return NULL;
|
7923
8028
|
|
7924
|
-
|
8029
|
+
memcpy(name, yp_string_source(&call->name), length);
|
8030
|
+
name[length] = '=';
|
7925
8031
|
|
7926
8032
|
// Now switch the name to the new string.
|
7927
8033
|
yp_string_free(&call->name);
|
@@ -7962,7 +8068,7 @@ parse_write(yp_parser_t *parser, yp_node_t *target, yp_token_t *operator, yp_nod
|
|
7962
8068
|
case YP_NODE_MISSING_NODE:
|
7963
8069
|
return target;
|
7964
8070
|
case YP_NODE_CLASS_VARIABLE_READ_NODE: {
|
7965
|
-
yp_class_variable_write_node_t *write_node =
|
8071
|
+
yp_class_variable_write_node_t *write_node = yp_class_variable_write_node_create(parser, (yp_class_variable_read_node_t *) target, operator, value);
|
7966
8072
|
yp_node_destroy(parser, target);
|
7967
8073
|
return (yp_node_t *) write_node;
|
7968
8074
|
}
|
@@ -7987,7 +8093,7 @@ parse_write(yp_parser_t *parser, yp_node_t *target, yp_token_t *operator, yp_nod
|
|
7987
8093
|
case YP_NODE_LOCAL_VARIABLE_READ_NODE: {
|
7988
8094
|
yp_local_variable_read_node_t *local_read = (yp_local_variable_read_node_t *) target;
|
7989
8095
|
|
7990
|
-
yp_constant_id_t constant_id = local_read->
|
8096
|
+
yp_constant_id_t constant_id = local_read->name;
|
7991
8097
|
uint32_t depth = local_read->depth;
|
7992
8098
|
|
7993
8099
|
yp_location_t name_loc = target->location;
|
@@ -8075,10 +8181,11 @@ parse_write(yp_parser_t *parser, yp_node_t *target, yp_token_t *operator, yp_nod
|
|
8075
8181
|
// the previous method name in, and append an =.
|
8076
8182
|
size_t length = yp_string_length(&call->name);
|
8077
8183
|
|
8078
|
-
|
8184
|
+
uint8_t *name = calloc(length + 1, sizeof(uint8_t));
|
8079
8185
|
if (name == NULL) return NULL;
|
8080
8186
|
|
8081
|
-
|
8187
|
+
memcpy(name, yp_string_source(&call->name), length);
|
8188
|
+
name[length] = '=';
|
8082
8189
|
|
8083
8190
|
// Now switch the name to the new string.
|
8084
8191
|
yp_string_free(&call->name);
|
@@ -9043,10 +9150,12 @@ parse_rescues(yp_parser_t *parser, yp_begin_node_t *parent_node) {
|
|
9043
9150
|
}
|
9044
9151
|
|
9045
9152
|
if (!match_any_type_p(parser, 3, YP_TOKEN_KEYWORD_ELSE, YP_TOKEN_KEYWORD_ENSURE, YP_TOKEN_KEYWORD_END)) {
|
9153
|
+
yp_accepts_block_stack_push(parser, true);
|
9046
9154
|
yp_statements_node_t *statements = parse_statements(parser, YP_CONTEXT_RESCUE);
|
9047
9155
|
if (statements) {
|
9048
9156
|
yp_rescue_node_statements_set(rescue, statements);
|
9049
9157
|
}
|
9158
|
+
yp_accepts_block_stack_pop(parser);
|
9050
9159
|
accept_any(parser, 2, YP_TOKEN_NEWLINE, YP_TOKEN_SEMICOLON);
|
9051
9160
|
}
|
9052
9161
|
|
@@ -9063,7 +9172,7 @@ parse_rescues(yp_parser_t *parser, yp_begin_node_t *parent_node) {
|
|
9063
9172
|
// since we won't know the end until we've found all consequent
|
9064
9173
|
// clauses. This sets the end location on all rescues once we know it
|
9065
9174
|
if (current) {
|
9066
|
-
const
|
9175
|
+
const uint8_t *end_to_set = current->base.location.end;
|
9067
9176
|
current = parent_node->rescue_clause;
|
9068
9177
|
while (current) {
|
9069
9178
|
current->base.location.end = end_to_set;
|
@@ -9077,7 +9186,9 @@ parse_rescues(yp_parser_t *parser, yp_begin_node_t *parent_node) {
|
|
9077
9186
|
|
9078
9187
|
yp_statements_node_t *else_statements = NULL;
|
9079
9188
|
if (!match_any_type_p(parser, 2, YP_TOKEN_KEYWORD_END, YP_TOKEN_KEYWORD_ENSURE)) {
|
9189
|
+
yp_accepts_block_stack_push(parser, true);
|
9080
9190
|
else_statements = parse_statements(parser, YP_CONTEXT_RESCUE_ELSE);
|
9191
|
+
yp_accepts_block_stack_pop(parser);
|
9081
9192
|
accept_any(parser, 2, YP_TOKEN_NEWLINE, YP_TOKEN_SEMICOLON);
|
9082
9193
|
}
|
9083
9194
|
|
@@ -9091,7 +9202,9 @@ parse_rescues(yp_parser_t *parser, yp_begin_node_t *parent_node) {
|
|
9091
9202
|
|
9092
9203
|
yp_statements_node_t *ensure_statements = NULL;
|
9093
9204
|
if (!match_type_p(parser, YP_TOKEN_KEYWORD_END)) {
|
9205
|
+
yp_accepts_block_stack_push(parser, true);
|
9094
9206
|
ensure_statements = parse_statements(parser, YP_CONTEXT_ENSURE);
|
9207
|
+
yp_accepts_block_stack_pop(parser);
|
9095
9208
|
accept_any(parser, 2, YP_TOKEN_NEWLINE, YP_TOKEN_SEMICOLON);
|
9096
9209
|
}
|
9097
9210
|
|
@@ -9116,7 +9229,7 @@ parse_rescues_as_begin(yp_parser_t *parser, yp_statements_node_t *statements) {
|
|
9116
9229
|
// All nodes within a begin node are optional, so we look
|
9117
9230
|
// for the earliest possible node that we can use to set
|
9118
9231
|
// the BeginNode's start location
|
9119
|
-
const
|
9232
|
+
const uint8_t *start = begin_node->base.location.start;
|
9120
9233
|
if (begin_node->statements) {
|
9121
9234
|
start = begin_node->statements->base.location.start;
|
9122
9235
|
} else if (begin_node->rescue_clause) {
|
@@ -9201,7 +9314,9 @@ parse_block(yp_parser_t *parser) {
|
|
9201
9314
|
} else {
|
9202
9315
|
if (!match_type_p(parser, YP_TOKEN_KEYWORD_END)) {
|
9203
9316
|
if (!match_any_type_p(parser, 3, YP_TOKEN_KEYWORD_RESCUE, YP_TOKEN_KEYWORD_ELSE, YP_TOKEN_KEYWORD_ENSURE)) {
|
9317
|
+
yp_accepts_block_stack_push(parser, true);
|
9204
9318
|
statements = (yp_node_t *) parse_statements(parser, YP_CONTEXT_BLOCK_KEYWORDS);
|
9319
|
+
yp_accepts_block_stack_pop(parser);
|
9205
9320
|
}
|
9206
9321
|
|
9207
9322
|
if (match_any_type_p(parser, 2, YP_TOKEN_KEYWORD_RESCUE, YP_TOKEN_KEYWORD_ENSURE)) {
|
@@ -9782,14 +9897,14 @@ parse_heredoc_common_whitespace(yp_parser_t *parser, yp_node_list_t *nodes) {
|
|
9782
9897
|
yp_node_t *node = nodes->nodes[index];
|
9783
9898
|
|
9784
9899
|
if (!YP_NODE_TYPE_P(node, YP_NODE_STRING_NODE)) continue;
|
9785
|
-
yp_location_t *content_loc = &((yp_string_node_t *) node)->content_loc;
|
9900
|
+
const yp_location_t *content_loc = &((yp_string_node_t *) node)->content_loc;
|
9786
9901
|
|
9787
9902
|
// If the previous node wasn't a string node, we don't want to trim
|
9788
9903
|
// whitespace. This could happen after an interpolated expression or
|
9789
9904
|
// variable.
|
9790
9905
|
if (index == 0 || YP_NODE_TYPE_P(nodes->nodes[index - 1], YP_NODE_STRING_NODE)) {
|
9791
9906
|
int cur_whitespace;
|
9792
|
-
const
|
9907
|
+
const uint8_t *cur_char = content_loc->start;
|
9793
9908
|
|
9794
9909
|
while (cur_char && cur_char < content_loc->end) {
|
9795
9910
|
// Any empty newlines aren't included in the minimum whitespace
|
@@ -9880,15 +9995,15 @@ parse_heredoc_dedent(yp_parser_t *parser, yp_node_t *node, yp_heredoc_quote_t qu
|
|
9880
9995
|
// destination to move bytes into. We'll also use it for bounds checking
|
9881
9996
|
// since we don't require that these strings be null terminated.
|
9882
9997
|
size_t dest_length = yp_string_length(string);
|
9883
|
-
|
9998
|
+
uint8_t *source_start = (uint8_t *) string->source;
|
9884
9999
|
|
9885
|
-
const
|
9886
|
-
const
|
10000
|
+
const uint8_t *source_cursor = source_start;
|
10001
|
+
const uint8_t *source_end = source_cursor + dest_length;
|
9887
10002
|
|
9888
10003
|
// We're going to move bytes backward in the string when we get leading
|
9889
10004
|
// whitespace, so we'll maintain a pointer to the current position in the
|
9890
10005
|
// string that we're writing to.
|
9891
|
-
|
10006
|
+
uint8_t *dest_cursor = source_start;
|
9892
10007
|
|
9893
10008
|
while (source_cursor < source_end) {
|
9894
10009
|
// If we need to dedent the next element within the heredoc or the next
|
@@ -9915,7 +10030,7 @@ parse_heredoc_dedent(yp_parser_t *parser, yp_node_t *node, yp_heredoc_quote_t qu
|
|
9915
10030
|
|
9916
10031
|
// At this point we have dedented all that we need to, so we need to find
|
9917
10032
|
// the next newline.
|
9918
|
-
const
|
10033
|
+
const uint8_t *breakpoint = next_newline(source_cursor, source_end - source_cursor);
|
9919
10034
|
|
9920
10035
|
if (breakpoint == NULL) {
|
9921
10036
|
// If there isn't another newline, then we can just move the rest of the
|
@@ -10127,7 +10242,7 @@ parse_pattern_hash(yp_parser_t *parser, yp_node_t *first_assoc) {
|
|
10127
10242
|
yp_node_t *key = ((yp_assoc_node_t *) first_assoc)->key;
|
10128
10243
|
|
10129
10244
|
if (YP_NODE_TYPE_P(key, YP_NODE_SYMBOL_NODE)) {
|
10130
|
-
yp_location_t *value_loc = &((yp_symbol_node_t *) key)->value_loc;
|
10245
|
+
const yp_location_t *value_loc = &((yp_symbol_node_t *) key)->value_loc;
|
10131
10246
|
yp_parser_local_add_location(parser, value_loc->start, value_loc->end);
|
10132
10247
|
}
|
10133
10248
|
}
|
@@ -10155,7 +10270,7 @@ parse_pattern_hash(yp_parser_t *parser, yp_node_t *first_assoc) {
|
|
10155
10270
|
if (!match_any_type_p(parser, 7, YP_TOKEN_COMMA, YP_TOKEN_KEYWORD_THEN, YP_TOKEN_BRACE_RIGHT, YP_TOKEN_BRACKET_RIGHT, YP_TOKEN_PARENTHESIS_RIGHT, YP_TOKEN_NEWLINE, YP_TOKEN_SEMICOLON)) {
|
10156
10271
|
value = parse_pattern(parser, false, "Expected a pattern expression after the key.");
|
10157
10272
|
} else {
|
10158
|
-
yp_location_t *value_loc = &((yp_symbol_node_t *) key)->value_loc;
|
10273
|
+
const yp_location_t *value_loc = &((yp_symbol_node_t *) key)->value_loc;
|
10159
10274
|
yp_parser_local_add_location(parser, value_loc->start, value_loc->end);
|
10160
10275
|
}
|
10161
10276
|
|
@@ -10817,7 +10932,7 @@ parse_expression_prefix(yp_parser_t *parser, yp_binding_power_t binding_power) {
|
|
10817
10932
|
|
10818
10933
|
yp_token_t closing = not_provided(parser);
|
10819
10934
|
|
10820
|
-
return (yp_node_t *)
|
10935
|
+
return (yp_node_t *) yp_char_literal_node_create_and_unescape(parser, &opening, &content, &closing, YP_UNESCAPE_ALL);
|
10821
10936
|
}
|
10822
10937
|
case YP_TOKEN_CLASS_VARIABLE: {
|
10823
10938
|
parser_lex(parser);
|
@@ -11362,7 +11477,9 @@ parse_expression_prefix(yp_parser_t *parser, yp_binding_power_t binding_power) {
|
|
11362
11477
|
|
11363
11478
|
yp_node_t *statements = NULL;
|
11364
11479
|
if (!match_any_type_p(parser, 3, YP_TOKEN_KEYWORD_RESCUE, YP_TOKEN_KEYWORD_ENSURE, YP_TOKEN_KEYWORD_END)) {
|
11480
|
+
yp_accepts_block_stack_push(parser, true);
|
11365
11481
|
statements = (yp_node_t *) parse_statements(parser, YP_CONTEXT_SCLASS);
|
11482
|
+
yp_accepts_block_stack_pop(parser);
|
11366
11483
|
}
|
11367
11484
|
|
11368
11485
|
if (match_any_type_p(parser, 2, YP_TOKEN_KEYWORD_RESCUE, YP_TOKEN_KEYWORD_ENSURE)) {
|
@@ -11643,7 +11760,9 @@ parse_expression_prefix(yp_parser_t *parser, yp_binding_power_t binding_power) {
|
|
11643
11760
|
yp_do_loop_stack_push(parser, false);
|
11644
11761
|
|
11645
11762
|
if (!match_any_type_p(parser, 3, YP_TOKEN_KEYWORD_RESCUE, YP_TOKEN_KEYWORD_ENSURE, YP_TOKEN_KEYWORD_END)) {
|
11763
|
+
yp_accepts_block_stack_push(parser, true);
|
11646
11764
|
statements = (yp_node_t *) parse_statements(parser, YP_CONTEXT_DEF);
|
11765
|
+
yp_accepts_block_stack_pop(parser);
|
11647
11766
|
}
|
11648
11767
|
|
11649
11768
|
if (match_any_type_p(parser, 2, YP_TOKEN_KEYWORD_RESCUE, YP_TOKEN_KEYWORD_ENSURE)) {
|
@@ -11933,14 +12052,9 @@ parse_expression_prefix(yp_parser_t *parser, yp_binding_power_t binding_power) {
|
|
11933
12052
|
yp_array_node_t *array = yp_array_node_create(parser, &parser->previous);
|
11934
12053
|
|
11935
12054
|
while (!match_any_type_p(parser, 2, YP_TOKEN_STRING_END, YP_TOKEN_EOF)) {
|
11936
|
-
|
11937
|
-
accept(parser, YP_TOKEN_WORDS_SEP);
|
11938
|
-
} else {
|
11939
|
-
expect(parser, YP_TOKEN_WORDS_SEP, "Expected a separator for the symbols in a `%i` list.");
|
11940
|
-
if (match_type_p(parser, YP_TOKEN_STRING_END)) break;
|
11941
|
-
}
|
11942
|
-
|
12055
|
+
accept(parser, YP_TOKEN_WORDS_SEP);
|
11943
12056
|
if (match_type_p(parser, YP_TOKEN_STRING_END)) break;
|
12057
|
+
|
11944
12058
|
expect(parser, YP_TOKEN_STRING_CONTENT, "Expected a symbol in a `%i` list.");
|
11945
12059
|
|
11946
12060
|
yp_token_t opening = not_provided(parser);
|
@@ -11995,6 +12109,19 @@ parse_expression_prefix(yp_parser_t *parser, yp_binding_power_t binding_power) {
|
|
11995
12109
|
// to the list of child nodes.
|
11996
12110
|
yp_node_t *part = parse_string_part(parser);
|
11997
12111
|
yp_interpolated_symbol_node_append((yp_interpolated_symbol_node_t *) current, part);
|
12112
|
+
} else if (YP_NODE_TYPE_P(current, YP_NODE_SYMBOL_NODE)) {
|
12113
|
+
// If we hit string content and the current node is a string node,
|
12114
|
+
// then we need to convert the current node into an interpolated
|
12115
|
+
// string and add the string content to the list of child nodes.
|
12116
|
+
yp_token_t opening = not_provided(parser);
|
12117
|
+
yp_token_t closing = not_provided(parser);
|
12118
|
+
yp_interpolated_symbol_node_t *interpolated =
|
12119
|
+
yp_interpolated_symbol_node_create(parser, &opening, NULL, &closing);
|
12120
|
+
yp_interpolated_symbol_node_append(interpolated, current);
|
12121
|
+
|
12122
|
+
yp_node_t *part = parse_string_part(parser);
|
12123
|
+
yp_interpolated_symbol_node_append(interpolated, part);
|
12124
|
+
current = (yp_node_t *) interpolated;
|
11998
12125
|
} else {
|
11999
12126
|
assert(false && "unreachable");
|
12000
12127
|
}
|
@@ -12097,12 +12224,9 @@ parse_expression_prefix(yp_parser_t *parser, yp_binding_power_t binding_power) {
|
|
12097
12224
|
accept(parser, YP_TOKEN_WORDS_SEP);
|
12098
12225
|
|
12099
12226
|
while (!match_any_type_p(parser, 2, YP_TOKEN_STRING_END, YP_TOKEN_EOF)) {
|
12100
|
-
|
12101
|
-
|
12102
|
-
|
12103
|
-
expect(parser, YP_TOKEN_WORDS_SEP, "Expected a separator for the strings in a `%w` list.");
|
12104
|
-
if (match_type_p(parser, YP_TOKEN_STRING_END)) break;
|
12105
|
-
}
|
12227
|
+
accept(parser, YP_TOKEN_WORDS_SEP);
|
12228
|
+
if (match_type_p(parser, YP_TOKEN_STRING_END)) break;
|
12229
|
+
|
12106
12230
|
expect(parser, YP_TOKEN_STRING_CONTENT, "Expected a string in a `%w` list.");
|
12107
12231
|
|
12108
12232
|
yp_token_t opening = not_provided(parser);
|
@@ -12152,6 +12276,19 @@ parse_expression_prefix(yp_parser_t *parser, yp_binding_power_t binding_power) {
|
|
12152
12276
|
// to the list of child nodes.
|
12153
12277
|
yp_node_t *part = parse_string_part(parser);
|
12154
12278
|
yp_interpolated_string_node_append((yp_interpolated_string_node_t *) current, part);
|
12279
|
+
} else if (YP_NODE_TYPE_P(current, YP_NODE_STRING_NODE)) {
|
12280
|
+
// If we hit string content and the current node is a string node,
|
12281
|
+
// then we need to convert the current node into an interpolated
|
12282
|
+
// string and add the string content to the list of child nodes.
|
12283
|
+
yp_token_t opening = not_provided(parser);
|
12284
|
+
yp_token_t closing = not_provided(parser);
|
12285
|
+
yp_interpolated_string_node_t *interpolated =
|
12286
|
+
yp_interpolated_string_node_create(parser, &opening, NULL, &closing);
|
12287
|
+
yp_interpolated_string_node_append(interpolated, current);
|
12288
|
+
|
12289
|
+
yp_node_t *part = parse_string_part(parser);
|
12290
|
+
yp_interpolated_string_node_append(interpolated, part);
|
12291
|
+
current = (yp_node_t *) interpolated;
|
12155
12292
|
} else {
|
12156
12293
|
assert(false && "unreachable");
|
12157
12294
|
}
|
@@ -12482,7 +12619,9 @@ parse_expression_prefix(yp_parser_t *parser, yp_binding_power_t binding_power) {
|
|
12482
12619
|
opening = parser->previous;
|
12483
12620
|
|
12484
12621
|
if (!match_any_type_p(parser, 3, YP_TOKEN_KEYWORD_END, YP_TOKEN_KEYWORD_RESCUE, YP_TOKEN_KEYWORD_ENSURE)) {
|
12622
|
+
yp_accepts_block_stack_push(parser, true);
|
12485
12623
|
body = (yp_node_t *) parse_statements(parser, YP_CONTEXT_LAMBDA_DO_END);
|
12624
|
+
yp_accepts_block_stack_pop(parser);
|
12486
12625
|
}
|
12487
12626
|
|
12488
12627
|
if (match_any_type_p(parser, 2, YP_TOKEN_KEYWORD_RESCUE, YP_TOKEN_KEYWORD_ENSURE)) {
|
@@ -12759,7 +12898,7 @@ parse_expression_infix(yp_parser_t *parser, yp_node_t *node, yp_binding_power_t
|
|
12759
12898
|
parser_lex(parser);
|
12760
12899
|
|
12761
12900
|
yp_node_t *value = parse_expression(parser, binding_power, "Expected a value after &&=");
|
12762
|
-
yp_node_t *result = (yp_node_t *) yp_class_variable_and_write_node_create(parser, node, &token, value);
|
12901
|
+
yp_node_t *result = (yp_node_t *) yp_class_variable_and_write_node_create(parser, (yp_class_variable_read_node_t *) node, &token, value);
|
12763
12902
|
|
12764
12903
|
yp_node_destroy(parser, node);
|
12765
12904
|
return result;
|
@@ -12783,7 +12922,7 @@ parse_expression_infix(yp_parser_t *parser, yp_node_t *node, yp_binding_power_t
|
|
12783
12922
|
parser_lex(parser);
|
12784
12923
|
|
12785
12924
|
yp_node_t *value = parse_expression(parser, binding_power, "Expected a value after &&=");
|
12786
|
-
yp_node_t *result = (yp_node_t *) yp_instance_variable_and_write_node_create(parser, node, &token, value);
|
12925
|
+
yp_node_t *result = (yp_node_t *) yp_instance_variable_and_write_node_create(parser, (yp_instance_variable_read_node_t *) node, &token, value);
|
12787
12926
|
|
12788
12927
|
yp_node_destroy(parser, node);
|
12789
12928
|
return result;
|
@@ -12793,7 +12932,7 @@ parse_expression_infix(yp_parser_t *parser, yp_node_t *node, yp_binding_power_t
|
|
12793
12932
|
parser_lex(parser);
|
12794
12933
|
|
12795
12934
|
yp_node_t *value = parse_expression(parser, binding_power, "Expected a value after &&=");
|
12796
|
-
yp_node_t *result = (yp_node_t *) yp_local_variable_and_write_node_create(parser, node, &token, value, cast->
|
12935
|
+
yp_node_t *result = (yp_node_t *) yp_local_variable_and_write_node_create(parser, node, &token, value, cast->name, cast->depth);
|
12797
12936
|
|
12798
12937
|
yp_node_destroy(parser, node);
|
12799
12938
|
return result;
|
@@ -12860,7 +12999,7 @@ parse_expression_infix(yp_parser_t *parser, yp_node_t *node, yp_binding_power_t
|
|
12860
12999
|
parser_lex(parser);
|
12861
13000
|
|
12862
13001
|
yp_node_t *value = parse_expression(parser, binding_power, "Expected a value after ||=");
|
12863
|
-
yp_node_t *result = (yp_node_t *) yp_class_variable_or_write_node_create(parser, node, &token, value);
|
13002
|
+
yp_node_t *result = (yp_node_t *) yp_class_variable_or_write_node_create(parser, (yp_class_variable_read_node_t *) node, &token, value);
|
12864
13003
|
|
12865
13004
|
yp_node_destroy(parser, node);
|
12866
13005
|
return result;
|
@@ -12884,7 +13023,7 @@ parse_expression_infix(yp_parser_t *parser, yp_node_t *node, yp_binding_power_t
|
|
12884
13023
|
parser_lex(parser);
|
12885
13024
|
|
12886
13025
|
yp_node_t *value = parse_expression(parser, binding_power, "Expected a value after ||=");
|
12887
|
-
yp_node_t *result = (yp_node_t *) yp_instance_variable_or_write_node_create(parser, node, &token, value);
|
13026
|
+
yp_node_t *result = (yp_node_t *) yp_instance_variable_or_write_node_create(parser, (yp_instance_variable_read_node_t *) node, &token, value);
|
12888
13027
|
|
12889
13028
|
yp_node_destroy(parser, node);
|
12890
13029
|
return result;
|
@@ -12894,7 +13033,7 @@ parse_expression_infix(yp_parser_t *parser, yp_node_t *node, yp_binding_power_t
|
|
12894
13033
|
parser_lex(parser);
|
12895
13034
|
|
12896
13035
|
yp_node_t *value = parse_expression(parser, binding_power, "Expected a value after ||=");
|
12897
|
-
yp_node_t *result = (yp_node_t *) yp_local_variable_or_write_node_create(parser, node, &token, value, cast->
|
13036
|
+
yp_node_t *result = (yp_node_t *) yp_local_variable_or_write_node_create(parser, node, &token, value, cast->name, cast->depth);
|
12898
13037
|
|
12899
13038
|
yp_node_destroy(parser, node);
|
12900
13039
|
return result;
|
@@ -12971,7 +13110,7 @@ parse_expression_infix(yp_parser_t *parser, yp_node_t *node, yp_binding_power_t
|
|
12971
13110
|
parser_lex(parser);
|
12972
13111
|
|
12973
13112
|
yp_node_t *value = parse_expression(parser, binding_power, "Expected a value after the operator.");
|
12974
|
-
yp_node_t *result = (yp_node_t *) yp_class_variable_operator_write_node_create(parser, node, &token, value);
|
13113
|
+
yp_node_t *result = (yp_node_t *) yp_class_variable_operator_write_node_create(parser, (yp_class_variable_read_node_t *) node, &token, value);
|
12975
13114
|
|
12976
13115
|
yp_node_destroy(parser, node);
|
12977
13116
|
return result;
|
@@ -12995,7 +13134,7 @@ parse_expression_infix(yp_parser_t *parser, yp_node_t *node, yp_binding_power_t
|
|
12995
13134
|
parser_lex(parser);
|
12996
13135
|
|
12997
13136
|
yp_node_t *value = parse_expression(parser, binding_power, "Expected a value after the operator.");
|
12998
|
-
yp_node_t *result = (yp_node_t *) yp_instance_variable_operator_write_node_create(parser, node, &token, value);
|
13137
|
+
yp_node_t *result = (yp_node_t *) yp_instance_variable_operator_write_node_create(parser, (yp_instance_variable_read_node_t *) node, &token, value);
|
12999
13138
|
|
13000
13139
|
yp_node_destroy(parser, node);
|
13001
13140
|
return result;
|
@@ -13005,7 +13144,7 @@ parse_expression_infix(yp_parser_t *parser, yp_node_t *node, yp_binding_power_t
|
|
13005
13144
|
parser_lex(parser);
|
13006
13145
|
|
13007
13146
|
yp_node_t *value = parse_expression(parser, binding_power, "Expected a value after the operator.");
|
13008
|
-
yp_node_t *result = (yp_node_t *) yp_local_variable_operator_write_node_create(parser, node, &token, value, cast->
|
13147
|
+
yp_node_t *result = (yp_node_t *) yp_local_variable_operator_write_node_create(parser, node, &token, value, cast->name, cast->depth);
|
13009
13148
|
|
13010
13149
|
yp_node_destroy(parser, node);
|
13011
13150
|
return result;
|
@@ -13083,7 +13222,7 @@ parse_expression_infix(yp_parser_t *parser, yp_node_t *node, yp_binding_power_t
|
|
13083
13222
|
yp_string_list_t named_captures;
|
13084
13223
|
yp_string_list_init(&named_captures);
|
13085
13224
|
|
13086
|
-
yp_location_t *content_loc = &((yp_regular_expression_node_t *) node)->content_loc;
|
13225
|
+
const yp_location_t *content_loc = &((yp_regular_expression_node_t *) node)->content_loc;
|
13087
13226
|
|
13088
13227
|
if (yp_regexp_named_capture_group_names(content_loc->start, (size_t) (content_loc->end - content_loc->start), &named_captures, parser->encoding_changed, &parser->encoding)) {
|
13089
13228
|
for (size_t index = 0; index < named_captures.length; index++) {
|
@@ -13507,7 +13646,7 @@ yp_parser_metadata(yp_parser_t *parser, const char *metadata) {
|
|
13507
13646
|
uint32_t local_size = yp_metadata_read_u32(metadata);
|
13508
13647
|
metadata += 4;
|
13509
13648
|
|
13510
|
-
yp_parser_local_add_location(parser, metadata, metadata + local_size);
|
13649
|
+
yp_parser_local_add_location(parser, (const uint8_t *) metadata, (const uint8_t *) (metadata + local_size));
|
13511
13650
|
metadata += local_size;
|
13512
13651
|
}
|
13513
13652
|
}
|
@@ -13519,7 +13658,7 @@ yp_parser_metadata(yp_parser_t *parser, const char *metadata) {
|
|
13519
13658
|
|
13520
13659
|
// Initialize a parser with the given start and end pointers.
|
13521
13660
|
YP_EXPORTED_FUNCTION void
|
13522
|
-
yp_parser_init(yp_parser_t *parser, const
|
13661
|
+
yp_parser_init(yp_parser_t *parser, const uint8_t *source, size_t size, const char *filepath) {
|
13523
13662
|
assert(source != NULL);
|
13524
13663
|
|
13525
13664
|
// Set filepath to the file that was passed
|
@@ -13591,7 +13730,7 @@ yp_parser_init(yp_parser_t *parser, const char *source, size_t size, const char
|
|
13591
13730
|
yp_newline_list_init(&parser->newline_list, source, newline_size < 4 ? 4 : newline_size);
|
13592
13731
|
|
13593
13732
|
// Skip past the UTF-8 BOM if it exists.
|
13594
|
-
if (size >= 3 &&
|
13733
|
+
if (size >= 3 && source[0] == 0xef && source[1] == 0xbb && source[2] == 0xbf) {
|
13595
13734
|
parser->current.end += 3;
|
13596
13735
|
parser->encoding_comment_start += 3;
|
13597
13736
|
}
|
@@ -13599,7 +13738,7 @@ yp_parser_init(yp_parser_t *parser, const char *source, size_t size, const char
|
|
13599
13738
|
// If the first two bytes of the source are a shebang, then we'll indicate
|
13600
13739
|
// that the encoding comment is at the end of the shebang.
|
13601
13740
|
if (peek(parser) == '#' && peek_offset(parser, 1) == '!') {
|
13602
|
-
const
|
13741
|
+
const uint8_t *encoding_comment_start = next_newline(source, (ptrdiff_t) size);
|
13603
13742
|
if (encoding_comment_start) {
|
13604
13743
|
parser->encoding_comment_start = encoding_comment_start + 1;
|
13605
13744
|
}
|
@@ -13671,7 +13810,7 @@ yp_serialize(yp_parser_t *parser, yp_node_t *node, yp_buffer_t *buffer) {
|
|
13671
13810
|
// Parse and serialize the AST represented by the given source to the given
|
13672
13811
|
// buffer.
|
13673
13812
|
YP_EXPORTED_FUNCTION void
|
13674
|
-
yp_parse_serialize(const
|
13813
|
+
yp_parse_serialize(const uint8_t *source, size_t size, yp_buffer_t *buffer, const char *metadata) {
|
13675
13814
|
yp_parser_t parser;
|
13676
13815
|
yp_parser_init(&parser, source, size, NULL);
|
13677
13816
|
if (metadata) yp_parser_metadata(&parser, metadata);
|