yarp 0.9.0 → 0.10.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +15 -1
- data/Makefile +5 -1
- data/config.yml +156 -125
- data/docs/encoding.md +5 -5
- data/docs/serialization.md +2 -2
- data/ext/yarp/api_node.c +142 -98
- data/ext/yarp/extension.c +21 -7
- data/ext/yarp/extension.h +1 -1
- data/include/yarp/ast.h +327 -18
- data/include/yarp/defines.h +2 -1
- data/include/yarp/diagnostic.h +3 -3
- data/include/yarp/enc/yp_encoding.h +10 -10
- data/include/yarp/parser.h +19 -19
- data/include/yarp/regexp.h +1 -1
- data/include/yarp/unescape.h +4 -4
- data/include/yarp/util/yp_buffer.h +3 -0
- data/include/yarp/util/yp_char.h +16 -16
- data/include/yarp/util/yp_constant_pool.h +2 -2
- data/include/yarp/util/yp_newline_list.h +5 -5
- data/include/yarp/util/yp_string.h +4 -4
- data/include/yarp/util/yp_string_list.h +0 -3
- data/include/yarp/util/yp_strpbrk.h +1 -1
- data/include/yarp/version.h +2 -2
- data/include/yarp.h +5 -4
- data/lib/yarp/desugar_visitor.rb +59 -122
- data/lib/yarp/node.rb +230 -240
- data/lib/yarp/serialize.rb +16 -16
- data/lib/yarp.rb +5 -5
- data/src/diagnostic.c +1 -1
- data/src/enc/yp_big5.c +15 -42
- data/src/enc/yp_euc_jp.c +16 -43
- data/src/enc/yp_gbk.c +19 -46
- data/src/enc/yp_shift_jis.c +16 -43
- data/src/enc/yp_tables.c +36 -38
- data/src/enc/yp_unicode.c +20 -25
- data/src/enc/yp_windows_31j.c +16 -43
- data/src/node.c +1271 -899
- data/src/prettyprint.c +87 -48
- data/src/regexp.c +21 -21
- data/src/serialize.c +28 -15
- data/src/unescape.c +151 -121
- data/src/util/yp_buffer.c +7 -2
- data/src/util/yp_char.c +34 -34
- data/src/util/yp_constant_pool.c +4 -4
- data/src/util/yp_memchr.c +1 -1
- data/src/util/yp_newline_list.c +5 -4
- data/src/util/yp_string.c +22 -20
- data/src/util/yp_string_list.c +0 -6
- data/src/util/yp_strncasecmp.c +3 -6
- data/src/util/yp_strpbrk.c +8 -8
- data/src/yarp.c +355 -216
- data/yarp.gemspec +1 -1
- metadata +2 -2
data/src/yarp.c
CHANGED
@@ -161,14 +161,18 @@ debug_token(yp_token_t * token) {
|
|
161
161
|
|
162
162
|
#endif
|
163
163
|
|
164
|
+
/* Macros for min/max. */
|
165
|
+
#define MIN(a,b) (((a)<(b))?(a):(b))
|
166
|
+
#define MAX(a,b) (((a)>(b))?(a):(b))
|
167
|
+
|
164
168
|
/******************************************************************************/
|
165
169
|
/* Lex mode manipulations */
|
166
170
|
/******************************************************************************/
|
167
171
|
|
168
172
|
// Returns the incrementor character that should be used to increment the
|
169
173
|
// nesting count if one is possible.
|
170
|
-
static inline
|
171
|
-
lex_mode_incrementor(const
|
174
|
+
static inline uint8_t
|
175
|
+
lex_mode_incrementor(const uint8_t start) {
|
172
176
|
switch (start) {
|
173
177
|
case '(':
|
174
178
|
case '[':
|
@@ -182,8 +186,8 @@ lex_mode_incrementor(const char start) {
|
|
182
186
|
|
183
187
|
// Returns the matching character that should be used to terminate a list
|
184
188
|
// beginning with the given character.
|
185
|
-
static inline
|
186
|
-
lex_mode_terminator(const
|
189
|
+
static inline uint8_t
|
190
|
+
lex_mode_terminator(const uint8_t start) {
|
187
191
|
switch (start) {
|
188
192
|
case '(':
|
189
193
|
return ')';
|
@@ -221,9 +225,9 @@ lex_mode_push(yp_parser_t *parser, yp_lex_mode_t lex_mode) {
|
|
221
225
|
|
222
226
|
// Push on a new list lex mode.
|
223
227
|
static inline bool
|
224
|
-
lex_mode_push_list(yp_parser_t *parser, bool interpolation,
|
225
|
-
|
226
|
-
|
228
|
+
lex_mode_push_list(yp_parser_t *parser, bool interpolation, uint8_t delimiter) {
|
229
|
+
uint8_t incrementor = lex_mode_incrementor(delimiter);
|
230
|
+
uint8_t terminator = lex_mode_terminator(delimiter);
|
227
231
|
|
228
232
|
yp_lex_mode_t lex_mode = {
|
229
233
|
.mode = YP_LEX_LIST,
|
@@ -237,7 +241,7 @@ lex_mode_push_list(yp_parser_t *parser, bool interpolation, char delimiter) {
|
|
237
241
|
|
238
242
|
// These are the places where we need to split up the content of the list.
|
239
243
|
// We'll use strpbrk to find the first of these characters.
|
240
|
-
|
244
|
+
uint8_t *breakpoints = lex_mode.as.list.breakpoints;
|
241
245
|
memcpy(breakpoints, "\\ \t\f\r\v\n\0\0\0", sizeof(lex_mode.as.list.breakpoints));
|
242
246
|
|
243
247
|
// Now we'll add the terminator to the list of breakpoints.
|
@@ -260,7 +264,7 @@ lex_mode_push_list(yp_parser_t *parser, bool interpolation, char delimiter) {
|
|
260
264
|
|
261
265
|
// Push on a new regexp lex mode.
|
262
266
|
static inline bool
|
263
|
-
lex_mode_push_regexp(yp_parser_t *parser,
|
267
|
+
lex_mode_push_regexp(yp_parser_t *parser, uint8_t incrementor, uint8_t terminator) {
|
264
268
|
yp_lex_mode_t lex_mode = {
|
265
269
|
.mode = YP_LEX_REGEXP,
|
266
270
|
.as.regexp = {
|
@@ -273,7 +277,7 @@ lex_mode_push_regexp(yp_parser_t *parser, char incrementor, char terminator) {
|
|
273
277
|
// These are the places where we need to split up the content of the
|
274
278
|
// regular expression. We'll use strpbrk to find the first of these
|
275
279
|
// characters.
|
276
|
-
|
280
|
+
uint8_t *breakpoints = lex_mode.as.regexp.breakpoints;
|
277
281
|
memcpy(breakpoints, "\n\\#\0\0", sizeof(lex_mode.as.regexp.breakpoints));
|
278
282
|
|
279
283
|
// First we'll add the terminator.
|
@@ -289,7 +293,7 @@ lex_mode_push_regexp(yp_parser_t *parser, char incrementor, char terminator) {
|
|
289
293
|
|
290
294
|
// Push on a new string lex mode.
|
291
295
|
static inline bool
|
292
|
-
lex_mode_push_string(yp_parser_t *parser, bool interpolation, bool label_allowed,
|
296
|
+
lex_mode_push_string(yp_parser_t *parser, bool interpolation, bool label_allowed, uint8_t incrementor, uint8_t terminator) {
|
293
297
|
yp_lex_mode_t lex_mode = {
|
294
298
|
.mode = YP_LEX_STRING,
|
295
299
|
.as.string = {
|
@@ -303,7 +307,7 @@ lex_mode_push_string(yp_parser_t *parser, bool interpolation, bool label_allowed
|
|
303
307
|
|
304
308
|
// These are the places where we need to split up the content of the
|
305
309
|
// string. We'll use strpbrk to find the first of these characters.
|
306
|
-
|
310
|
+
uint8_t *breakpoints = lex_mode.as.string.breakpoints;
|
307
311
|
memcpy(breakpoints, "\n\\\0\0\0", sizeof(lex_mode.as.string.breakpoints));
|
308
312
|
|
309
313
|
// Now add in the terminator.
|
@@ -380,6 +384,9 @@ lex_state_arg_p(yp_parser_t *parser) {
|
|
380
384
|
|
381
385
|
static inline bool
|
382
386
|
lex_state_spcarg_p(yp_parser_t *parser, bool space_seen) {
|
387
|
+
if (parser->current.end >= parser->end) {
|
388
|
+
return false;
|
389
|
+
}
|
383
390
|
return lex_state_arg_p(parser) && space_seen && !yp_char_is_whitespace(*parser->current.end);
|
384
391
|
}
|
385
392
|
|
@@ -420,7 +427,7 @@ debug_lex_state_set(yp_parser_t *parser, yp_lex_state_t state, char const * call
|
|
420
427
|
|
421
428
|
// Retrieve the constant pool id for the given location.
|
422
429
|
static inline yp_constant_id_t
|
423
|
-
yp_parser_constant_id_location(yp_parser_t *parser, const
|
430
|
+
yp_parser_constant_id_location(yp_parser_t *parser, const uint8_t *start, const uint8_t *end) {
|
424
431
|
return yp_constant_pool_insert(&parser->constant_pool, start, (size_t) (end - start));
|
425
432
|
}
|
426
433
|
|
@@ -606,13 +613,45 @@ yp_scope_node_init(yp_node_t *node, yp_scope_node_t *scope) {
|
|
606
613
|
/* Node creation functions */
|
607
614
|
/******************************************************************************/
|
608
615
|
|
616
|
+
// Parse the decimal number represented by the range of bytes. returns
|
617
|
+
// UINT32_MAX if the number fails to parse. This function assumes that the range
|
618
|
+
// of bytes has already been validated to contain only decimal digits.
|
619
|
+
static uint32_t
|
620
|
+
parse_decimal_number(yp_parser_t *parser, const uint8_t *start, const uint8_t *end) {
|
621
|
+
ptrdiff_t diff = end - start;
|
622
|
+
assert(diff > 0 && ((unsigned long) diff < SIZE_MAX));
|
623
|
+
size_t length = (size_t) diff;
|
624
|
+
|
625
|
+
char *digits = calloc(length + 1, sizeof(char));
|
626
|
+
memcpy(digits, start, length);
|
627
|
+
digits[length] = '\0';
|
628
|
+
|
629
|
+
char *endptr;
|
630
|
+
errno = 0;
|
631
|
+
unsigned long value = strtoul(digits, &endptr, 10);
|
632
|
+
|
633
|
+
if ((digits == endptr) || (*endptr != '\0') || (errno == ERANGE)) {
|
634
|
+
yp_diagnostic_list_append(&parser->error_list, start, end, "invalid decimal number");
|
635
|
+
value = UINT32_MAX;
|
636
|
+
}
|
637
|
+
|
638
|
+
free(digits);
|
639
|
+
|
640
|
+
if (value > UINT32_MAX) {
|
641
|
+
yp_diagnostic_list_append(&parser->error_list, start, end, "invalid decimal number");
|
642
|
+
value = UINT32_MAX;
|
643
|
+
}
|
644
|
+
|
645
|
+
return (uint32_t) value;
|
646
|
+
}
|
647
|
+
|
609
648
|
// Parse out the options for a regular expression.
|
610
649
|
static inline yp_node_flags_t
|
611
650
|
yp_regular_expression_flags_create(const yp_token_t *closing) {
|
612
651
|
yp_node_flags_t flags = 0;
|
613
652
|
|
614
653
|
if (closing->type == YP_TOKEN_REGEXP_END) {
|
615
|
-
for (const
|
654
|
+
for (const uint8_t *flag = closing->start + 1; flag < closing->end; flag++) {
|
616
655
|
switch (*flag) {
|
617
656
|
case 'i': flags |= YP_REGULAR_EXPRESSION_FLAGS_IGNORE_CASE; break;
|
618
657
|
case 'm': flags |= YP_REGULAR_EXPRESSION_FLAGS_MULTI_LINE; break;
|
@@ -654,7 +693,7 @@ yp_alloc_node(YP_ATTRIBUTE_UNUSED yp_parser_t *parser, size_t size) {
|
|
654
693
|
|
655
694
|
// Allocate a new MissingNode node.
|
656
695
|
static yp_missing_node_t *
|
657
|
-
yp_missing_node_create(yp_parser_t *parser, const
|
696
|
+
yp_missing_node_create(yp_parser_t *parser, const uint8_t *start, const uint8_t *end) {
|
658
697
|
yp_missing_node_t *node = YP_ALLOC_NODE(parser, yp_missing_node_t);
|
659
698
|
*node = (yp_missing_node_t) {{ .type = YP_NODE_MISSING_NODE, .location = { .start = start, .end = end } }};
|
660
699
|
return node;
|
@@ -923,7 +962,7 @@ yp_array_pattern_node_requireds_append(yp_array_pattern_node_t *node, yp_node_t
|
|
923
962
|
static yp_assoc_node_t *
|
924
963
|
yp_assoc_node_create(yp_parser_t *parser, yp_node_t *key, const yp_token_t *operator, yp_node_t *value) {
|
925
964
|
yp_assoc_node_t *node = YP_ALLOC_NODE(parser, yp_assoc_node_t);
|
926
|
-
const
|
965
|
+
const uint8_t *end;
|
927
966
|
|
928
967
|
if (value != NULL) {
|
929
968
|
end = value->location.end;
|
@@ -1107,7 +1146,7 @@ static yp_block_parameters_node_t *
|
|
1107
1146
|
yp_block_parameters_node_create(yp_parser_t *parser, yp_parameters_node_t *parameters, const yp_token_t *opening) {
|
1108
1147
|
yp_block_parameters_node_t *node = YP_ALLOC_NODE(parser, yp_block_parameters_node_t);
|
1109
1148
|
|
1110
|
-
const
|
1149
|
+
const uint8_t *start;
|
1111
1150
|
if (opening->type != YP_TOKEN_NOT_PROVIDED) {
|
1112
1151
|
start = opening->start;
|
1113
1152
|
} else if (parameters != NULL) {
|
@@ -1116,7 +1155,7 @@ yp_block_parameters_node_create(yp_parser_t *parser, yp_parameters_node_t *param
|
|
1116
1155
|
start = NULL;
|
1117
1156
|
}
|
1118
1157
|
|
1119
|
-
const
|
1158
|
+
const uint8_t *end;
|
1120
1159
|
if (parameters != NULL) {
|
1121
1160
|
end = parameters->base.location.end;
|
1122
1161
|
} else if (opening->type != YP_TOKEN_NOT_PROVIDED) {
|
@@ -1237,8 +1276,8 @@ static yp_call_node_t *
|
|
1237
1276
|
yp_call_node_binary_create(yp_parser_t *parser, yp_node_t *receiver, yp_token_t *operator, yp_node_t *argument) {
|
1238
1277
|
yp_call_node_t *node = yp_call_node_create(parser);
|
1239
1278
|
|
1240
|
-
node->base.location.start = receiver->location.start;
|
1241
|
-
node->base.location.end = argument->location.end;
|
1279
|
+
node->base.location.start = MIN(receiver->location.start, argument->location.start);
|
1280
|
+
node->base.location.end = MAX(receiver->location.end, argument->location.end);
|
1242
1281
|
|
1243
1282
|
node->receiver = receiver;
|
1244
1283
|
node->message_loc = YP_OPTIONAL_LOCATION_TOKEN_VALUE(operator);
|
@@ -1434,7 +1473,7 @@ yp_call_operator_write_node_create(yp_parser_t *parser, yp_call_node_t *target,
|
|
1434
1473
|
.target = target,
|
1435
1474
|
.operator_loc = YP_LOCATION_TOKEN_VALUE(operator),
|
1436
1475
|
.value = value,
|
1437
|
-
.
|
1476
|
+
.operator = yp_parser_constant_id_location(parser, operator->start, operator->end - 1)
|
1438
1477
|
};
|
1439
1478
|
|
1440
1479
|
return node;
|
@@ -1555,8 +1594,7 @@ yp_class_node_create(yp_parser_t *parser, yp_constant_id_list_t *locals, const y
|
|
1555
1594
|
|
1556
1595
|
// Allocate and initialize a new ClassVariableAndWriteNode node.
|
1557
1596
|
static yp_class_variable_and_write_node_t *
|
1558
|
-
yp_class_variable_and_write_node_create(yp_parser_t *parser,
|
1559
|
-
assert(YP_NODE_TYPE_P(target, YP_NODE_CLASS_VARIABLE_READ_NODE));
|
1597
|
+
yp_class_variable_and_write_node_create(yp_parser_t *parser, yp_class_variable_read_node_t *target, const yp_token_t *operator, yp_node_t *value) {
|
1560
1598
|
assert(operator->type == YP_TOKEN_AMPERSAND_AMPERSAND_EQUAL);
|
1561
1599
|
yp_class_variable_and_write_node_t *node = YP_ALLOC_NODE(parser, yp_class_variable_and_write_node_t);
|
1562
1600
|
|
@@ -1564,11 +1602,12 @@ yp_class_variable_and_write_node_create(yp_parser_t *parser, yp_node_t *target,
|
|
1564
1602
|
{
|
1565
1603
|
.type = YP_NODE_CLASS_VARIABLE_AND_WRITE_NODE,
|
1566
1604
|
.location = {
|
1567
|
-
.start = target->location.start,
|
1605
|
+
.start = target->base.location.start,
|
1568
1606
|
.end = value->location.end
|
1569
1607
|
}
|
1570
1608
|
},
|
1571
|
-
.
|
1609
|
+
.name = target->name,
|
1610
|
+
.name_loc = target->base.location,
|
1572
1611
|
.operator_loc = YP_LOCATION_TOKEN_VALUE(operator),
|
1573
1612
|
.value = value
|
1574
1613
|
};
|
@@ -1578,18 +1617,19 @@ yp_class_variable_and_write_node_create(yp_parser_t *parser, yp_node_t *target,
|
|
1578
1617
|
|
1579
1618
|
// Allocate and initialize a new ClassVariableOperatorWriteNode node.
|
1580
1619
|
static yp_class_variable_operator_write_node_t *
|
1581
|
-
yp_class_variable_operator_write_node_create(yp_parser_t *parser,
|
1620
|
+
yp_class_variable_operator_write_node_create(yp_parser_t *parser, yp_class_variable_read_node_t *target, const yp_token_t *operator, yp_node_t *value) {
|
1582
1621
|
yp_class_variable_operator_write_node_t *node = YP_ALLOC_NODE(parser, yp_class_variable_operator_write_node_t);
|
1583
1622
|
|
1584
1623
|
*node = (yp_class_variable_operator_write_node_t) {
|
1585
1624
|
{
|
1586
1625
|
.type = YP_NODE_CLASS_VARIABLE_OPERATOR_WRITE_NODE,
|
1587
1626
|
.location = {
|
1588
|
-
.start = target->location.start,
|
1627
|
+
.start = target->base.location.start,
|
1589
1628
|
.end = value->location.end
|
1590
1629
|
}
|
1591
1630
|
},
|
1592
|
-
.
|
1631
|
+
.name = target->name,
|
1632
|
+
.name_loc = target->base.location,
|
1593
1633
|
.operator_loc = YP_LOCATION_TOKEN_VALUE(operator),
|
1594
1634
|
.value = value,
|
1595
1635
|
.operator = yp_parser_constant_id_location(parser, operator->start, operator->end - 1)
|
@@ -1600,8 +1640,7 @@ yp_class_variable_operator_write_node_create(yp_parser_t *parser, yp_node_t *tar
|
|
1600
1640
|
|
1601
1641
|
// Allocate and initialize a new ClassVariableOrWriteNode node.
|
1602
1642
|
static yp_class_variable_or_write_node_t *
|
1603
|
-
yp_class_variable_or_write_node_create(yp_parser_t *parser,
|
1604
|
-
assert(YP_NODE_TYPE_P(target, YP_NODE_CLASS_VARIABLE_READ_NODE));
|
1643
|
+
yp_class_variable_or_write_node_create(yp_parser_t *parser, yp_class_variable_read_node_t *target, const yp_token_t *operator, yp_node_t *value) {
|
1605
1644
|
assert(operator->type == YP_TOKEN_PIPE_PIPE_EQUAL);
|
1606
1645
|
yp_class_variable_or_write_node_t *node = YP_ALLOC_NODE(parser, yp_class_variable_or_write_node_t);
|
1607
1646
|
|
@@ -1609,11 +1648,12 @@ yp_class_variable_or_write_node_create(yp_parser_t *parser, yp_node_t *target, c
|
|
1609
1648
|
{
|
1610
1649
|
.type = YP_NODE_CLASS_VARIABLE_OR_WRITE_NODE,
|
1611
1650
|
.location = {
|
1612
|
-
.start = target->location.start,
|
1651
|
+
.start = target->base.location.start,
|
1613
1652
|
.end = value->location.end
|
1614
1653
|
}
|
1615
1654
|
},
|
1616
|
-
.
|
1655
|
+
.name = target->name,
|
1656
|
+
.name_loc = target->base.location,
|
1617
1657
|
.operator_loc = YP_LOCATION_TOKEN_VALUE(operator),
|
1618
1658
|
.value = value
|
1619
1659
|
};
|
@@ -1626,13 +1666,21 @@ static yp_class_variable_read_node_t *
|
|
1626
1666
|
yp_class_variable_read_node_create(yp_parser_t *parser, const yp_token_t *token) {
|
1627
1667
|
assert(token->type == YP_TOKEN_CLASS_VARIABLE);
|
1628
1668
|
yp_class_variable_read_node_t *node = YP_ALLOC_NODE(parser, yp_class_variable_read_node_t);
|
1629
|
-
|
1669
|
+
|
1670
|
+
*node = (yp_class_variable_read_node_t) {
|
1671
|
+
{
|
1672
|
+
.type = YP_NODE_CLASS_VARIABLE_READ_NODE,
|
1673
|
+
.location = YP_LOCATION_TOKEN_VALUE(token)
|
1674
|
+
},
|
1675
|
+
.name = yp_parser_constant_id_location(parser, token->start, token->end)
|
1676
|
+
};
|
1677
|
+
|
1630
1678
|
return node;
|
1631
1679
|
}
|
1632
1680
|
|
1633
1681
|
// Initialize a new ClassVariableWriteNode node from a ClassVariableRead node.
|
1634
1682
|
static yp_class_variable_write_node_t *
|
1635
|
-
|
1683
|
+
yp_class_variable_write_node_create(yp_parser_t *parser, yp_class_variable_read_node_t *read_node, yp_token_t *operator, yp_node_t *value) {
|
1636
1684
|
yp_class_variable_write_node_t *node = YP_ALLOC_NODE(parser, yp_class_variable_write_node_t);
|
1637
1685
|
|
1638
1686
|
*node = (yp_class_variable_write_node_t) {
|
@@ -1643,6 +1691,7 @@ yp_class_variable_read_node_to_class_variable_write_node(yp_parser_t *parser, yp
|
|
1643
1691
|
.end = value->location.end
|
1644
1692
|
},
|
1645
1693
|
},
|
1694
|
+
.name = read_node->name,
|
1646
1695
|
.name_loc = YP_LOCATION_NODE_VALUE((yp_node_t *) read_node),
|
1647
1696
|
.operator_loc = YP_OPTIONAL_LOCATION_TOKEN_VALUE(operator),
|
1648
1697
|
.value = value
|
@@ -1875,7 +1924,7 @@ yp_def_node_create(
|
|
1875
1924
|
const yp_token_t *end_keyword
|
1876
1925
|
) {
|
1877
1926
|
yp_def_node_t *node = YP_ALLOC_NODE(parser, yp_def_node_t);
|
1878
|
-
const
|
1927
|
+
const uint8_t *end;
|
1879
1928
|
|
1880
1929
|
if (end_keyword->type == YP_TOKEN_NOT_PROVIDED) {
|
1881
1930
|
end = body->location.end;
|
@@ -1930,7 +1979,7 @@ yp_defined_node_create(yp_parser_t *parser, const yp_token_t *lparen, yp_node_t
|
|
1930
1979
|
static yp_else_node_t *
|
1931
1980
|
yp_else_node_create(yp_parser_t *parser, const yp_token_t *else_keyword, yp_statements_node_t *statements, const yp_token_t *end_keyword) {
|
1932
1981
|
yp_else_node_t *node = YP_ALLOC_NODE(parser, yp_else_node_t);
|
1933
|
-
const
|
1982
|
+
const uint8_t *end = NULL;
|
1934
1983
|
if ((end_keyword->type == YP_TOKEN_NOT_PROVIDED) && (statements != NULL)) {
|
1935
1984
|
end = statements->base.location.end;
|
1936
1985
|
} else {
|
@@ -2410,7 +2459,7 @@ yp_if_node_create(yp_parser_t *parser,
|
|
2410
2459
|
yp_flip_flop(predicate);
|
2411
2460
|
yp_if_node_t *node = YP_ALLOC_NODE(parser, yp_if_node_t);
|
2412
2461
|
|
2413
|
-
const
|
2462
|
+
const uint8_t *end;
|
2414
2463
|
if (end_keyword->type != YP_TOKEN_NOT_PROVIDED) {
|
2415
2464
|
end = end_keyword->end;
|
2416
2465
|
} else if (consequent != NULL) {
|
@@ -2593,7 +2642,7 @@ static yp_in_node_t *
|
|
2593
2642
|
yp_in_node_create(yp_parser_t *parser, yp_node_t *pattern, yp_statements_node_t *statements, const yp_token_t *in_keyword, const yp_token_t *then_keyword) {
|
2594
2643
|
yp_in_node_t *node = YP_ALLOC_NODE(parser, yp_in_node_t);
|
2595
2644
|
|
2596
|
-
const
|
2645
|
+
const uint8_t *end;
|
2597
2646
|
if (statements != NULL) {
|
2598
2647
|
end = statements->base.location.end;
|
2599
2648
|
} else if (then_keyword->type != YP_TOKEN_NOT_PROVIDED) {
|
@@ -2621,8 +2670,7 @@ yp_in_node_create(yp_parser_t *parser, yp_node_t *pattern, yp_statements_node_t
|
|
2621
2670
|
|
2622
2671
|
// Allocate and initialize a new InstanceVariableAndWriteNode node.
|
2623
2672
|
static yp_instance_variable_and_write_node_t *
|
2624
|
-
yp_instance_variable_and_write_node_create(yp_parser_t *parser,
|
2625
|
-
assert(YP_NODE_TYPE_P(target, YP_NODE_INSTANCE_VARIABLE_READ_NODE));
|
2673
|
+
yp_instance_variable_and_write_node_create(yp_parser_t *parser, yp_instance_variable_read_node_t *target, const yp_token_t *operator, yp_node_t *value) {
|
2626
2674
|
assert(operator->type == YP_TOKEN_AMPERSAND_AMPERSAND_EQUAL);
|
2627
2675
|
yp_instance_variable_and_write_node_t *node = YP_ALLOC_NODE(parser, yp_instance_variable_and_write_node_t);
|
2628
2676
|
|
@@ -2630,11 +2678,12 @@ yp_instance_variable_and_write_node_create(yp_parser_t *parser, yp_node_t *targe
|
|
2630
2678
|
{
|
2631
2679
|
.type = YP_NODE_INSTANCE_VARIABLE_AND_WRITE_NODE,
|
2632
2680
|
.location = {
|
2633
|
-
.start = target->location.start,
|
2681
|
+
.start = target->base.location.start,
|
2634
2682
|
.end = value->location.end
|
2635
2683
|
}
|
2636
2684
|
},
|
2637
|
-
.
|
2685
|
+
.name = target->name,
|
2686
|
+
.name_loc = target->base.location,
|
2638
2687
|
.operator_loc = YP_LOCATION_TOKEN_VALUE(operator),
|
2639
2688
|
.value = value
|
2640
2689
|
};
|
@@ -2644,18 +2693,19 @@ yp_instance_variable_and_write_node_create(yp_parser_t *parser, yp_node_t *targe
|
|
2644
2693
|
|
2645
2694
|
// Allocate and initialize a new InstanceVariableOperatorWriteNode node.
|
2646
2695
|
static yp_instance_variable_operator_write_node_t *
|
2647
|
-
yp_instance_variable_operator_write_node_create(yp_parser_t *parser,
|
2696
|
+
yp_instance_variable_operator_write_node_create(yp_parser_t *parser, yp_instance_variable_read_node_t *target, const yp_token_t *operator, yp_node_t *value) {
|
2648
2697
|
yp_instance_variable_operator_write_node_t *node = YP_ALLOC_NODE(parser, yp_instance_variable_operator_write_node_t);
|
2649
2698
|
|
2650
2699
|
*node = (yp_instance_variable_operator_write_node_t) {
|
2651
2700
|
{
|
2652
2701
|
.type = YP_NODE_INSTANCE_VARIABLE_OPERATOR_WRITE_NODE,
|
2653
2702
|
.location = {
|
2654
|
-
.start = target->location.start,
|
2703
|
+
.start = target->base.location.start,
|
2655
2704
|
.end = value->location.end
|
2656
2705
|
}
|
2657
2706
|
},
|
2658
|
-
.
|
2707
|
+
.name = target->name,
|
2708
|
+
.name_loc = target->base.location,
|
2659
2709
|
.operator_loc = YP_LOCATION_TOKEN_VALUE(operator),
|
2660
2710
|
.value = value,
|
2661
2711
|
.operator = yp_parser_constant_id_location(parser, operator->start, operator->end - 1)
|
@@ -2666,8 +2716,7 @@ yp_instance_variable_operator_write_node_create(yp_parser_t *parser, yp_node_t *
|
|
2666
2716
|
|
2667
2717
|
// Allocate and initialize a new InstanceVariableOrWriteNode node.
|
2668
2718
|
static yp_instance_variable_or_write_node_t *
|
2669
|
-
yp_instance_variable_or_write_node_create(yp_parser_t *parser,
|
2670
|
-
assert(YP_NODE_TYPE_P(target, YP_NODE_INSTANCE_VARIABLE_READ_NODE));
|
2719
|
+
yp_instance_variable_or_write_node_create(yp_parser_t *parser, yp_instance_variable_read_node_t *target, const yp_token_t *operator, yp_node_t *value) {
|
2671
2720
|
assert(operator->type == YP_TOKEN_PIPE_PIPE_EQUAL);
|
2672
2721
|
yp_instance_variable_or_write_node_t *node = YP_ALLOC_NODE(parser, yp_instance_variable_or_write_node_t);
|
2673
2722
|
|
@@ -2675,11 +2724,12 @@ yp_instance_variable_or_write_node_create(yp_parser_t *parser, yp_node_t *target
|
|
2675
2724
|
{
|
2676
2725
|
.type = YP_NODE_INSTANCE_VARIABLE_OR_WRITE_NODE,
|
2677
2726
|
.location = {
|
2678
|
-
.start = target->location.start,
|
2727
|
+
.start = target->base.location.start,
|
2679
2728
|
.end = value->location.end
|
2680
2729
|
}
|
2681
2730
|
},
|
2682
|
-
.
|
2731
|
+
.name = target->name,
|
2732
|
+
.name_loc = target->base.location,
|
2683
2733
|
.operator_loc = YP_LOCATION_TOKEN_VALUE(operator),
|
2684
2734
|
.value = value
|
2685
2735
|
};
|
@@ -2693,9 +2743,13 @@ yp_instance_variable_read_node_create(yp_parser_t *parser, const yp_token_t *tok
|
|
2693
2743
|
assert(token->type == YP_TOKEN_INSTANCE_VARIABLE);
|
2694
2744
|
yp_instance_variable_read_node_t *node = YP_ALLOC_NODE(parser, yp_instance_variable_read_node_t);
|
2695
2745
|
|
2696
|
-
*node = (yp_instance_variable_read_node_t) {
|
2697
|
-
|
2698
|
-
|
2746
|
+
*node = (yp_instance_variable_read_node_t) {
|
2747
|
+
{
|
2748
|
+
.type = YP_NODE_INSTANCE_VARIABLE_READ_NODE,
|
2749
|
+
.location = YP_LOCATION_TOKEN_VALUE(token)
|
2750
|
+
},
|
2751
|
+
.name = yp_parser_constant_id_location(parser, token->start, token->end)
|
2752
|
+
};
|
2699
2753
|
|
2700
2754
|
return node;
|
2701
2755
|
}
|
@@ -2712,6 +2766,7 @@ yp_instance_variable_write_node_create(yp_parser_t *parser, yp_instance_variable
|
|
2712
2766
|
.end = value->location.end
|
2713
2767
|
}
|
2714
2768
|
},
|
2769
|
+
.name = read_node->name,
|
2715
2770
|
.name_loc = YP_LOCATION_NODE_BASE_VALUE(read_node),
|
2716
2771
|
.operator_loc = YP_OPTIONAL_LOCATION_TOKEN_VALUE(operator),
|
2717
2772
|
.value = value
|
@@ -2743,8 +2798,13 @@ yp_interpolated_regular_expression_node_create(yp_parser_t *parser, const yp_tok
|
|
2743
2798
|
|
2744
2799
|
static inline void
|
2745
2800
|
yp_interpolated_regular_expression_node_append(yp_interpolated_regular_expression_node_t *node, yp_node_t *part) {
|
2801
|
+
if (node->base.location.start > part->location.start) {
|
2802
|
+
node->base.location.start = part->location.start;
|
2803
|
+
}
|
2804
|
+
if (node->base.location.end < part->location.end) {
|
2805
|
+
node->base.location.end = part->location.end;
|
2806
|
+
}
|
2746
2807
|
yp_node_list_append(&node->parts, part);
|
2747
|
-
node->base.location.end = part->location.end;
|
2748
2808
|
}
|
2749
2809
|
|
2750
2810
|
static inline void
|
@@ -2816,10 +2876,11 @@ yp_interpolated_symbol_node_create(yp_parser_t *parser, const yp_token_t *openin
|
|
2816
2876
|
|
2817
2877
|
static inline void
|
2818
2878
|
yp_interpolated_symbol_node_append(yp_interpolated_symbol_node_t *node, yp_node_t *part) {
|
2819
|
-
|
2820
|
-
if (!node->base.location.start) {
|
2879
|
+
if (node->parts.size == 0 && node->opening_loc.start == NULL) {
|
2821
2880
|
node->base.location.start = part->location.start;
|
2822
2881
|
}
|
2882
|
+
|
2883
|
+
yp_node_list_append(&node->parts, part);
|
2823
2884
|
node->base.location.end = part->location.end;
|
2824
2885
|
}
|
2825
2886
|
|
@@ -2959,7 +3020,7 @@ yp_lambda_node_create(
|
|
2959
3020
|
|
2960
3021
|
// Allocate and initialize a new LocalVariableAndWriteNode node.
|
2961
3022
|
static yp_local_variable_and_write_node_t *
|
2962
|
-
yp_local_variable_and_write_node_create(yp_parser_t *parser, yp_node_t *target, const yp_token_t *operator, yp_node_t *value, yp_constant_id_t
|
3023
|
+
yp_local_variable_and_write_node_create(yp_parser_t *parser, yp_node_t *target, const yp_token_t *operator, yp_node_t *value, yp_constant_id_t name, uint32_t depth) {
|
2963
3024
|
assert(YP_NODE_TYPE_P(target, YP_NODE_LOCAL_VARIABLE_READ_NODE) || YP_NODE_TYPE_P(target, YP_NODE_CALL_NODE));
|
2964
3025
|
assert(operator->type == YP_TOKEN_AMPERSAND_AMPERSAND_EQUAL);
|
2965
3026
|
yp_local_variable_and_write_node_t *node = YP_ALLOC_NODE(parser, yp_local_variable_and_write_node_t);
|
@@ -2975,7 +3036,7 @@ yp_local_variable_and_write_node_create(yp_parser_t *parser, yp_node_t *target,
|
|
2975
3036
|
.name_loc = target->location,
|
2976
3037
|
.operator_loc = YP_LOCATION_TOKEN_VALUE(operator),
|
2977
3038
|
.value = value,
|
2978
|
-
.
|
3039
|
+
.name = name,
|
2979
3040
|
.depth = depth
|
2980
3041
|
};
|
2981
3042
|
|
@@ -2984,7 +3045,7 @@ yp_local_variable_and_write_node_create(yp_parser_t *parser, yp_node_t *target,
|
|
2984
3045
|
|
2985
3046
|
// Allocate and initialize a new LocalVariableOperatorWriteNode node.
|
2986
3047
|
static yp_local_variable_operator_write_node_t *
|
2987
|
-
yp_local_variable_operator_write_node_create(yp_parser_t *parser, yp_node_t *target, const yp_token_t *operator, yp_node_t *value, yp_constant_id_t
|
3048
|
+
yp_local_variable_operator_write_node_create(yp_parser_t *parser, yp_node_t *target, const yp_token_t *operator, yp_node_t *value, yp_constant_id_t name, uint32_t depth) {
|
2988
3049
|
yp_local_variable_operator_write_node_t *node = YP_ALLOC_NODE(parser, yp_local_variable_operator_write_node_t);
|
2989
3050
|
|
2990
3051
|
*node = (yp_local_variable_operator_write_node_t) {
|
@@ -2998,8 +3059,8 @@ yp_local_variable_operator_write_node_create(yp_parser_t *parser, yp_node_t *tar
|
|
2998
3059
|
.name_loc = target->location,
|
2999
3060
|
.operator_loc = YP_LOCATION_TOKEN_VALUE(operator),
|
3000
3061
|
.value = value,
|
3001
|
-
.
|
3002
|
-
.
|
3062
|
+
.name = name,
|
3063
|
+
.operator = yp_parser_constant_id_location(parser, operator->start, operator->end - 1),
|
3003
3064
|
.depth = depth
|
3004
3065
|
};
|
3005
3066
|
|
@@ -3008,7 +3069,7 @@ yp_local_variable_operator_write_node_create(yp_parser_t *parser, yp_node_t *tar
|
|
3008
3069
|
|
3009
3070
|
// Allocate and initialize a new LocalVariableOrWriteNode node.
|
3010
3071
|
static yp_local_variable_or_write_node_t *
|
3011
|
-
yp_local_variable_or_write_node_create(yp_parser_t *parser, yp_node_t *target, const yp_token_t *operator, yp_node_t *value, yp_constant_id_t
|
3072
|
+
yp_local_variable_or_write_node_create(yp_parser_t *parser, yp_node_t *target, const yp_token_t *operator, yp_node_t *value, yp_constant_id_t name, uint32_t depth) {
|
3012
3073
|
assert(YP_NODE_TYPE_P(target, YP_NODE_LOCAL_VARIABLE_READ_NODE) || YP_NODE_TYPE_P(target, YP_NODE_CALL_NODE));
|
3013
3074
|
assert(operator->type == YP_TOKEN_PIPE_PIPE_EQUAL);
|
3014
3075
|
yp_local_variable_or_write_node_t *node = YP_ALLOC_NODE(parser, yp_local_variable_or_write_node_t);
|
@@ -3024,7 +3085,7 @@ yp_local_variable_or_write_node_create(yp_parser_t *parser, yp_node_t *target, c
|
|
3024
3085
|
.name_loc = target->location,
|
3025
3086
|
.operator_loc = YP_LOCATION_TOKEN_VALUE(operator),
|
3026
3087
|
.value = value,
|
3027
|
-
.
|
3088
|
+
.name = name,
|
3028
3089
|
.depth = depth
|
3029
3090
|
};
|
3030
3091
|
|
@@ -3041,7 +3102,7 @@ yp_local_variable_read_node_create(yp_parser_t *parser, const yp_token_t *name,
|
|
3041
3102
|
.type = YP_NODE_LOCAL_VARIABLE_READ_NODE,
|
3042
3103
|
.location = YP_LOCATION_TOKEN_VALUE(name)
|
3043
3104
|
},
|
3044
|
-
.
|
3105
|
+
.name = yp_parser_constant_id_token(parser, name),
|
3045
3106
|
.depth = depth
|
3046
3107
|
};
|
3047
3108
|
|
@@ -3050,7 +3111,7 @@ yp_local_variable_read_node_create(yp_parser_t *parser, const yp_token_t *name,
|
|
3050
3111
|
|
3051
3112
|
// Allocate and initialize a new LocalVariableWriteNode node.
|
3052
3113
|
static yp_local_variable_write_node_t *
|
3053
|
-
yp_local_variable_write_node_create(yp_parser_t *parser, yp_constant_id_t
|
3114
|
+
yp_local_variable_write_node_create(yp_parser_t *parser, yp_constant_id_t name, uint32_t depth, yp_node_t *value, const yp_location_t *name_loc, const yp_token_t *operator) {
|
3054
3115
|
yp_local_variable_write_node_t *node = YP_ALLOC_NODE(parser, yp_local_variable_write_node_t);
|
3055
3116
|
|
3056
3117
|
*node = (yp_local_variable_write_node_t) {
|
@@ -3061,7 +3122,7 @@ yp_local_variable_write_node_create(yp_parser_t *parser, yp_constant_id_t consta
|
|
3061
3122
|
.end = value->location.end
|
3062
3123
|
}
|
3063
3124
|
},
|
3064
|
-
.
|
3125
|
+
.name = name,
|
3065
3126
|
.depth = depth,
|
3066
3127
|
.value = value,
|
3067
3128
|
.name_loc = *name_loc,
|
@@ -3081,7 +3142,7 @@ yp_local_variable_target_node_create(yp_parser_t *parser, const yp_token_t *name
|
|
3081
3142
|
.type = YP_NODE_LOCAL_VARIABLE_TARGET_NODE,
|
3082
3143
|
.location = YP_LOCATION_TOKEN_VALUE(name)
|
3083
3144
|
},
|
3084
|
-
.
|
3145
|
+
.name = yp_parser_constant_id_token(parser, name),
|
3085
3146
|
.depth = 0
|
3086
3147
|
};
|
3087
3148
|
|
@@ -3260,7 +3321,8 @@ yp_numbered_reference_read_node_create(yp_parser_t *parser, const yp_token_t *na
|
|
3260
3321
|
{
|
3261
3322
|
.type = YP_NODE_NUMBERED_REFERENCE_READ_NODE,
|
3262
3323
|
.location = YP_LOCATION_TOKEN_VALUE(name),
|
3263
|
-
}
|
3324
|
+
},
|
3325
|
+
.number = parse_decimal_number(parser, name->start + 1, name->end)
|
3264
3326
|
};
|
3265
3327
|
|
3266
3328
|
return node;
|
@@ -3279,7 +3341,7 @@ yp_optional_parameter_node_create(yp_parser_t *parser, const yp_token_t *name, c
|
|
3279
3341
|
.end = value->location.end
|
3280
3342
|
}
|
3281
3343
|
},
|
3282
|
-
.
|
3344
|
+
.name = yp_parser_constant_id_token(parser, name),
|
3283
3345
|
.name_loc = YP_LOCATION_TOKEN_VALUE(name),
|
3284
3346
|
.operator_loc = YP_LOCATION_TOKEN_VALUE(operator),
|
3285
3347
|
.value = value
|
@@ -3576,8 +3638,8 @@ yp_regular_expression_node_create(yp_parser_t *parser, const yp_token_t *opening
|
|
3576
3638
|
.type = YP_NODE_REGULAR_EXPRESSION_NODE,
|
3577
3639
|
.flags = yp_regular_expression_flags_create(closing),
|
3578
3640
|
.location = {
|
3579
|
-
.start = opening->start,
|
3580
|
-
.end = closing->end
|
3641
|
+
.start = MIN(opening->start, closing->start),
|
3642
|
+
.end = MAX(opening->end, closing->end)
|
3581
3643
|
}
|
3582
3644
|
},
|
3583
3645
|
.opening_loc = YP_LOCATION_TOKEN_VALUE(opening),
|
@@ -3630,7 +3692,7 @@ yp_required_parameter_node_create(yp_parser_t *parser, const yp_token_t *token)
|
|
3630
3692
|
.type = YP_NODE_REQUIRED_PARAMETER_NODE,
|
3631
3693
|
.location = YP_LOCATION_TOKEN_VALUE(token)
|
3632
3694
|
},
|
3633
|
-
.
|
3695
|
+
.name = yp_parser_constant_id_token(parser, token)
|
3634
3696
|
};
|
3635
3697
|
|
3636
3698
|
return node;
|
@@ -3881,19 +3943,21 @@ yp_statements_node_body_length(yp_statements_node_t *node) {
|
|
3881
3943
|
|
3882
3944
|
// Set the location of the given StatementsNode.
|
3883
3945
|
static void
|
3884
|
-
yp_statements_node_location_set(yp_statements_node_t *node, const
|
3946
|
+
yp_statements_node_location_set(yp_statements_node_t *node, const uint8_t *start, const uint8_t *end) {
|
3885
3947
|
node->base.location = (yp_location_t) { .start = start, .end = end };
|
3886
3948
|
}
|
3887
3949
|
|
3888
3950
|
// Append a new node to the given StatementsNode node's body.
|
3889
3951
|
static void
|
3890
3952
|
yp_statements_node_body_append(yp_statements_node_t *node, yp_node_t *statement) {
|
3891
|
-
if (yp_statements_node_body_length(node) == 0) {
|
3953
|
+
if (yp_statements_node_body_length(node) == 0 || statement->location.start < node->base.location.start) {
|
3892
3954
|
node->base.location.start = statement->location.start;
|
3893
3955
|
}
|
3956
|
+
if (statement->location.end > node->base.location.end) {
|
3957
|
+
node->base.location.end = statement->location.end;
|
3958
|
+
}
|
3894
3959
|
|
3895
3960
|
yp_node_list_append(&node->body, statement);
|
3896
|
-
node->base.location.end = statement->location.end;
|
3897
3961
|
|
3898
3962
|
// Every statement gets marked as a place where a newline can occur.
|
3899
3963
|
statement->flags |= YP_NODE_FLAG_NEWLINE;
|
@@ -3947,7 +4011,7 @@ yp_super_node_create(yp_parser_t *parser, const yp_token_t *keyword, yp_argument
|
|
3947
4011
|
assert(keyword->type == YP_TOKEN_KEYWORD_SUPER);
|
3948
4012
|
yp_super_node_t *node = YP_ALLOC_NODE(parser, yp_super_node_t);
|
3949
4013
|
|
3950
|
-
const
|
4014
|
+
const uint8_t *end;
|
3951
4015
|
if (arguments->block != NULL) {
|
3952
4016
|
end = arguments->block->base.location.end;
|
3953
4017
|
} else if (arguments->closing_loc.start != NULL) {
|
@@ -4038,7 +4102,7 @@ yp_symbol_node_label_create(yp_parser_t *parser, const yp_token_t *token) {
|
|
4038
4102
|
// Check if the given node is a label in a hash.
|
4039
4103
|
static bool
|
4040
4104
|
yp_symbol_node_label_p(yp_node_t *node) {
|
4041
|
-
const
|
4105
|
+
const uint8_t *end = NULL;
|
4042
4106
|
|
4043
4107
|
switch (YP_NODE_TYPE(node)) {
|
4044
4108
|
case YP_NODE_SYMBOL_NODE:
|
@@ -4146,7 +4210,7 @@ yp_unless_node_create(yp_parser_t *parser, const yp_token_t *keyword, yp_node_t
|
|
4146
4210
|
yp_flip_flop(predicate);
|
4147
4211
|
yp_unless_node_t *node = YP_ALLOC_NODE(parser, yp_unless_node_t);
|
4148
4212
|
|
4149
|
-
const
|
4213
|
+
const uint8_t *end;
|
4150
4214
|
if (statements != NULL) {
|
4151
4215
|
end = statements->base.location.end;
|
4152
4216
|
} else {
|
@@ -4363,7 +4427,7 @@ static yp_yield_node_t *
|
|
4363
4427
|
yp_yield_node_create(yp_parser_t *parser, const yp_token_t *keyword, const yp_location_t *lparen_loc, yp_arguments_node_t *arguments, const yp_location_t *rparen_loc) {
|
4364
4428
|
yp_yield_node_t *node = YP_ALLOC_NODE(parser, yp_yield_node_t);
|
4365
4429
|
|
4366
|
-
const
|
4430
|
+
const uint8_t *end;
|
4367
4431
|
if (rparen_loc->start != NULL) {
|
4368
4432
|
end = rparen_loc->end;
|
4369
4433
|
} else if (arguments != NULL) {
|
@@ -4437,7 +4501,7 @@ yp_parser_local_depth(yp_parser_t *parser, yp_token_t *token) {
|
|
4437
4501
|
|
4438
4502
|
// Add a local variable from a location to the current scope.
|
4439
4503
|
static yp_constant_id_t
|
4440
|
-
yp_parser_local_add_location(yp_parser_t *parser, const
|
4504
|
+
yp_parser_local_add_location(yp_parser_t *parser, const uint8_t *start, const uint8_t *end) {
|
4441
4505
|
yp_constant_id_t constant_id = yp_parser_constant_id_location(parser, start, end);
|
4442
4506
|
|
4443
4507
|
if (!yp_constant_id_list_includes(&parser->current_scope->locals, constant_id)) {
|
@@ -4486,15 +4550,13 @@ yp_parser_scope_pop(yp_parser_t *parser) {
|
|
4486
4550
|
// reason we have the encoding_changed boolean to check if we need to go through
|
4487
4551
|
// the function pointer or can just directly use the UTF-8 functions.
|
4488
4552
|
static inline size_t
|
4489
|
-
char_is_identifier_start(yp_parser_t *parser, const
|
4490
|
-
const unsigned char uc = (unsigned char) *c;
|
4491
|
-
|
4553
|
+
char_is_identifier_start(yp_parser_t *parser, const uint8_t *b) {
|
4492
4554
|
if (parser->encoding_changed) {
|
4493
|
-
return parser->encoding.alpha_char(
|
4494
|
-
} else if (
|
4495
|
-
return (yp_encoding_unicode_table[
|
4555
|
+
return parser->encoding.alpha_char(b, parser->end - b) || (*b == '_') || (*b >= 0x80);
|
4556
|
+
} else if (*b < 0x80) {
|
4557
|
+
return (yp_encoding_unicode_table[*b] & YP_ENCODING_ALPHABETIC_BIT ? 1 : 0) || (*b == '_');
|
4496
4558
|
} else {
|
4497
|
-
return (size_t) (yp_encoding_utf_8_alpha_char(
|
4559
|
+
return (size_t) (yp_encoding_utf_8_alpha_char(b, parser->end - b) || 1u);
|
4498
4560
|
}
|
4499
4561
|
}
|
4500
4562
|
|
@@ -4502,15 +4564,13 @@ char_is_identifier_start(yp_parser_t *parser, const char *c) {
|
|
4502
4564
|
// the identifiers in a source file once the first character has been found. So
|
4503
4565
|
// it's important that it be as fast as possible.
|
4504
4566
|
static inline size_t
|
4505
|
-
char_is_identifier(yp_parser_t *parser, const
|
4506
|
-
const unsigned char uc = (unsigned char) *c;
|
4507
|
-
|
4567
|
+
char_is_identifier(yp_parser_t *parser, const uint8_t *b) {
|
4508
4568
|
if (parser->encoding_changed) {
|
4509
|
-
return parser->encoding.alnum_char(
|
4510
|
-
} else if (
|
4511
|
-
return (yp_encoding_unicode_table[
|
4569
|
+
return parser->encoding.alnum_char(b, parser->end - b) || (*b == '_') || (*b >= 0x80);
|
4570
|
+
} else if (*b < 0x80) {
|
4571
|
+
return (yp_encoding_unicode_table[*b] & YP_ENCODING_ALPHANUMERIC_BIT ? 1 : 0) || (*b == '_');
|
4512
4572
|
} else {
|
4513
|
-
return (size_t) (yp_encoding_utf_8_alnum_char(
|
4573
|
+
return (size_t) (yp_encoding_utf_8_alnum_char(b, parser->end - b) || 1u);
|
4514
4574
|
}
|
4515
4575
|
}
|
4516
4576
|
|
@@ -4532,15 +4592,15 @@ const unsigned int yp_global_name_punctuation_hash[(0x7e - 0x20 + 31) / 32] = {
|
|
4532
4592
|
#undef PUNCT
|
4533
4593
|
|
4534
4594
|
static inline bool
|
4535
|
-
char_is_global_name_punctuation(const
|
4536
|
-
const unsigned int i = (const unsigned int)
|
4595
|
+
char_is_global_name_punctuation(const uint8_t b) {
|
4596
|
+
const unsigned int i = (const unsigned int) b;
|
4537
4597
|
if (i <= 0x20 || 0x7e < i) return false;
|
4538
4598
|
|
4539
|
-
return (yp_global_name_punctuation_hash[(i - 0x20) / 32] >> (
|
4599
|
+
return (yp_global_name_punctuation_hash[(i - 0x20) / 32] >> (i % 32)) & 1;
|
4540
4600
|
}
|
4541
4601
|
|
4542
4602
|
static inline bool
|
4543
|
-
token_is_numbered_parameter(const
|
4603
|
+
token_is_numbered_parameter(const uint8_t *start, const uint8_t *end) {
|
4544
4604
|
return (end - start == 2) && (start[0] == '_') && (start[1] != '0') && (yp_char_is_decimal_digit(start[1]));
|
4545
4605
|
}
|
4546
4606
|
|
@@ -4594,8 +4654,8 @@ yp_do_loop_stack_p(yp_parser_t *parser) {
|
|
4594
4654
|
|
4595
4655
|
// Get the next character in the source starting from +cursor+. If that position
|
4596
4656
|
// is beyond the end of the source then return '\0'.
|
4597
|
-
static inline
|
4598
|
-
peek_at(yp_parser_t *parser, const
|
4657
|
+
static inline uint8_t
|
4658
|
+
peek_at(yp_parser_t *parser, const uint8_t *cursor) {
|
4599
4659
|
if (cursor < parser->end) {
|
4600
4660
|
return *cursor;
|
4601
4661
|
} else {
|
@@ -4606,33 +4666,33 @@ peek_at(yp_parser_t *parser, const char *cursor) {
|
|
4606
4666
|
// Get the next character in the source starting from parser->current.end and
|
4607
4667
|
// adding the given offset. If that position is beyond the end of the source
|
4608
4668
|
// then return '\0'.
|
4609
|
-
static inline
|
4669
|
+
static inline uint8_t
|
4610
4670
|
peek_offset(yp_parser_t *parser, ptrdiff_t offset) {
|
4611
4671
|
return peek_at(parser, parser->current.end + offset);
|
4612
4672
|
}
|
4613
4673
|
|
4614
4674
|
// Get the next character in the source starting from parser->current.end. If
|
4615
4675
|
// that position is beyond the end of the source then return '\0'.
|
4616
|
-
static inline
|
4676
|
+
static inline uint8_t
|
4617
4677
|
peek(yp_parser_t *parser) {
|
4618
4678
|
return peek_at(parser, parser->current.end);
|
4619
4679
|
}
|
4620
4680
|
|
4621
4681
|
// Get the next string of length len in the source starting from parser->current.end.
|
4622
4682
|
// If the string extends beyond the end of the source, return the empty string ""
|
4623
|
-
static inline const
|
4683
|
+
static inline const uint8_t *
|
4624
4684
|
peek_string(yp_parser_t *parser, size_t len) {
|
4625
4685
|
if (parser->current.end + len <= parser->end) {
|
4626
4686
|
return parser->current.end;
|
4627
4687
|
} else {
|
4628
|
-
return "";
|
4688
|
+
return (const uint8_t *) "";
|
4629
4689
|
}
|
4630
4690
|
}
|
4631
4691
|
|
4632
4692
|
// If the character to be read matches the given value, then returns true and
|
4633
4693
|
// advanced the current pointer.
|
4634
4694
|
static inline bool
|
4635
|
-
match(yp_parser_t *parser,
|
4695
|
+
match(yp_parser_t *parser, uint8_t value) {
|
4636
4696
|
if (peek(parser) == value) {
|
4637
4697
|
parser->current.end++;
|
4638
4698
|
return true;
|
@@ -4643,7 +4703,7 @@ match(yp_parser_t *parser, char value) {
|
|
4643
4703
|
// Return the length of the line ending string starting at +cursor+, or 0 if it
|
4644
4704
|
// is not a line ending. This function is intended to be CRLF/LF agnostic.
|
4645
4705
|
static inline size_t
|
4646
|
-
match_eol_at(yp_parser_t *parser, const
|
4706
|
+
match_eol_at(yp_parser_t *parser, const uint8_t *cursor) {
|
4647
4707
|
if (peek_at(parser, cursor) == '\n') {
|
4648
4708
|
return 1;
|
4649
4709
|
}
|
@@ -4670,8 +4730,8 @@ match_eol(yp_parser_t *parser) {
|
|
4670
4730
|
}
|
4671
4731
|
|
4672
4732
|
// Skip to the next newline character or NUL byte.
|
4673
|
-
static inline const
|
4674
|
-
next_newline(const
|
4733
|
+
static inline const uint8_t *
|
4734
|
+
next_newline(const uint8_t *cursor, ptrdiff_t length) {
|
4675
4735
|
assert(length >= 0);
|
4676
4736
|
|
4677
4737
|
// Note that it's okay for us to use memchr here to look for \n because none
|
@@ -4682,17 +4742,17 @@ next_newline(const char *cursor, ptrdiff_t length) {
|
|
4682
4742
|
|
4683
4743
|
// Find the start of the encoding comment. This is effectively an inlined
|
4684
4744
|
// version of strnstr with some modifications.
|
4685
|
-
static inline const
|
4686
|
-
parser_lex_encoding_comment_start(yp_parser_t *parser, const
|
4745
|
+
static inline const uint8_t *
|
4746
|
+
parser_lex_encoding_comment_start(yp_parser_t *parser, const uint8_t *cursor, ptrdiff_t remaining) {
|
4687
4747
|
assert(remaining >= 0);
|
4688
4748
|
size_t length = (size_t) remaining;
|
4689
4749
|
|
4690
4750
|
size_t key_length = strlen("coding:");
|
4691
4751
|
if (key_length > length) return NULL;
|
4692
4752
|
|
4693
|
-
const
|
4753
|
+
const uint8_t *cursor_limit = cursor + length - key_length + 1;
|
4694
4754
|
while ((cursor = yp_memchr(cursor, 'c', (size_t) (cursor_limit - cursor), parser->encoding_changed, &parser->encoding)) != NULL) {
|
4695
|
-
if (
|
4755
|
+
if (memcmp(cursor, "coding", key_length - 1) == 0) {
|
4696
4756
|
size_t whitespace_after_coding = yp_strspn_inline_whitespace(cursor + key_length - 1, parser->end - (cursor + key_length - 1));
|
4697
4757
|
size_t cur_pos = key_length + whitespace_after_coding;
|
4698
4758
|
|
@@ -4711,13 +4771,13 @@ parser_lex_encoding_comment_start(yp_parser_t *parser, const char *cursor, ptrdi
|
|
4711
4771
|
// actions are necessary for it here.
|
4712
4772
|
static void
|
4713
4773
|
parser_lex_encoding_comment(yp_parser_t *parser) {
|
4714
|
-
const
|
4715
|
-
const
|
4774
|
+
const uint8_t *start = parser->current.start + 1;
|
4775
|
+
const uint8_t *end = next_newline(start, parser->end - start);
|
4716
4776
|
if (end == NULL) end = parser->end;
|
4717
4777
|
|
4718
4778
|
// These are the patterns we're going to match to find the encoding comment.
|
4719
4779
|
// This is definitely not complete or even really correct.
|
4720
|
-
const
|
4780
|
+
const uint8_t *encoding_start = parser_lex_encoding_comment_start(parser, start, end - start);
|
4721
4781
|
|
4722
4782
|
// If we didn't find anything that matched our patterns, then return. Note
|
4723
4783
|
// that this does a _very_ poor job of actually finding the encoding, and
|
@@ -4730,7 +4790,7 @@ parser_lex_encoding_comment(yp_parser_t *parser) {
|
|
4730
4790
|
|
4731
4791
|
// Now determine the end of the encoding string. This is either the end of
|
4732
4792
|
// the line, the first whitespace character, or a punctuation mark.
|
4733
|
-
const
|
4793
|
+
const uint8_t *encoding_end = yp_strpbrk(parser, encoding_start, (const uint8_t *) " \t\f\r\v\n;,", end - encoding_start);
|
4734
4794
|
encoding_end = encoding_end == NULL ? end : encoding_end;
|
4735
4795
|
|
4736
4796
|
// Finally, we can determine the width of the encoding string.
|
@@ -4752,7 +4812,7 @@ parser_lex_encoding_comment(yp_parser_t *parser) {
|
|
4752
4812
|
// Extensions like utf-8 can contain extra encoding details like,
|
4753
4813
|
// utf-8-dos, utf-8-linux, utf-8-mac. We treat these all as utf-8 should
|
4754
4814
|
// treat any encoding starting utf-8 as utf-8.
|
4755
|
-
if ((encoding_start + 5 <= parser->end) && (yp_strncasecmp(encoding_start, "utf-8", 5) == 0)) {
|
4815
|
+
if ((encoding_start + 5 <= parser->end) && (yp_strncasecmp(encoding_start, (const uint8_t *) "utf-8", 5) == 0)) {
|
4756
4816
|
// We don't need to do anything here because the default encoding is
|
4757
4817
|
// already UTF-8. We'll just return.
|
4758
4818
|
return;
|
@@ -4761,7 +4821,7 @@ parser_lex_encoding_comment(yp_parser_t *parser) {
|
|
4761
4821
|
// Next, we're going to loop through each of the encodings that we handle
|
4762
4822
|
// explicitly. If we found one that we understand, we'll use that value.
|
4763
4823
|
#define ENCODING(value, prebuilt) \
|
4764
|
-
if (width == sizeof(value) - 1 && encoding_start + width <= parser->end && yp_strncasecmp(encoding_start, value, width) == 0) { \
|
4824
|
+
if (width == sizeof(value) - 1 && encoding_start + width <= parser->end && yp_strncasecmp(encoding_start, (const uint8_t *) value, width) == 0) { \
|
4765
4825
|
parser->encoding = prebuilt; \
|
4766
4826
|
parser->encoding_changed |= true; \
|
4767
4827
|
if (parser->encoding_changed_callback != NULL) parser->encoding_changed_callback(parser); \
|
@@ -4901,14 +4961,9 @@ context_push(yp_parser_t *parser, yp_context_t context) {
|
|
4901
4961
|
|
4902
4962
|
static void
|
4903
4963
|
context_pop(yp_parser_t *parser) {
|
4904
|
-
|
4905
|
-
|
4906
|
-
|
4907
|
-
} else {
|
4908
|
-
yp_context_node_t *prev = parser->current_context->prev;
|
4909
|
-
free(parser->current_context);
|
4910
|
-
parser->current_context = prev;
|
4911
|
-
}
|
4964
|
+
yp_context_node_t *prev = parser->current_context->prev;
|
4965
|
+
free(parser->current_context);
|
4966
|
+
parser->current_context = prev;
|
4912
4967
|
}
|
4913
4968
|
|
4914
4969
|
static bool
|
@@ -4992,7 +5047,8 @@ lex_numeric_prefix(yp_parser_t *parser) {
|
|
4992
5047
|
// 0d1111 is a decimal number
|
4993
5048
|
case 'd':
|
4994
5049
|
case 'D':
|
4995
|
-
|
5050
|
+
parser->current.end++;
|
5051
|
+
if (yp_char_is_decimal_digit(peek(parser))) {
|
4996
5052
|
parser->current.end += yp_strspn_decimal_number(parser->current.end, parser->end - parser->current.end);
|
4997
5053
|
} else {
|
4998
5054
|
yp_diagnostic_list_append(&parser->error_list, parser->current.start, parser->current.end, "Invalid decimal number.");
|
@@ -5003,7 +5059,8 @@ lex_numeric_prefix(yp_parser_t *parser) {
|
|
5003
5059
|
// 0b1111 is a binary number
|
5004
5060
|
case 'b':
|
5005
5061
|
case 'B':
|
5006
|
-
|
5062
|
+
parser->current.end++;
|
5063
|
+
if (yp_char_is_binary_digit(peek(parser))) {
|
5007
5064
|
parser->current.end += yp_strspn_binary_number(parser->current.end, parser->end - parser->current.end);
|
5008
5065
|
} else {
|
5009
5066
|
yp_diagnostic_list_append(&parser->error_list, parser->current.start, parser->current.end, "Invalid binary number.");
|
@@ -5014,7 +5071,8 @@ lex_numeric_prefix(yp_parser_t *parser) {
|
|
5014
5071
|
// 0o1111 is an octal number
|
5015
5072
|
case 'o':
|
5016
5073
|
case 'O':
|
5017
|
-
|
5074
|
+
parser->current.end++;
|
5075
|
+
if (yp_char_is_octal_digit(peek(parser))) {
|
5018
5076
|
parser->current.end += yp_strspn_octal_number(parser->current.end, parser->end - parser->current.end);
|
5019
5077
|
} else {
|
5020
5078
|
yp_diagnostic_list_append(&parser->error_list, parser->current.start, parser->current.end, "Invalid octal number.");
|
@@ -5038,7 +5096,8 @@ lex_numeric_prefix(yp_parser_t *parser) {
|
|
5038
5096
|
// 0x1111 is a hexadecimal number
|
5039
5097
|
case 'x':
|
5040
5098
|
case 'X':
|
5041
|
-
|
5099
|
+
parser->current.end++;
|
5100
|
+
if (yp_char_is_hexadecimal_digit(peek(parser))) {
|
5042
5101
|
parser->current.end += yp_strspn_hexadecimal_number(parser->current.end, parser->end - parser->current.end);
|
5043
5102
|
} else {
|
5044
5103
|
yp_diagnostic_list_append(&parser->error_list, parser->current.start, parser->current.end, "Invalid hexadecimal number.");
|
@@ -5084,7 +5143,7 @@ lex_numeric(yp_parser_t *parser) {
|
|
5084
5143
|
if (parser->current.end < parser->end) {
|
5085
5144
|
type = lex_numeric_prefix(parser);
|
5086
5145
|
|
5087
|
-
const
|
5146
|
+
const uint8_t *end = parser->current.end;
|
5088
5147
|
yp_token_type_t suffix_type = type;
|
5089
5148
|
|
5090
5149
|
if (type == YP_TOKEN_INTEGER) {
|
@@ -5109,8 +5168,8 @@ lex_numeric(yp_parser_t *parser) {
|
|
5109
5168
|
}
|
5110
5169
|
}
|
5111
5170
|
|
5112
|
-
const
|
5113
|
-
if (
|
5171
|
+
const uint8_t b = peek(parser);
|
5172
|
+
if (b != '\0' && (b >= 0x80 || ((b >= 'a' && b <= 'z') || (b >= 'A' && b <= 'Z')) || b == '_')) {
|
5114
5173
|
parser->current.end = end;
|
5115
5174
|
} else {
|
5116
5175
|
type = suffix_type;
|
@@ -5122,6 +5181,11 @@ lex_numeric(yp_parser_t *parser) {
|
|
5122
5181
|
|
5123
5182
|
static yp_token_type_t
|
5124
5183
|
lex_global_variable(yp_parser_t *parser) {
|
5184
|
+
if (parser->current.end >= parser->end) {
|
5185
|
+
yp_diagnostic_list_append(&parser->error_list, parser->current.start, parser->current.end, "Invalid global variable.");
|
5186
|
+
return YP_TOKEN_GLOBAL_VARIABLE;
|
5187
|
+
}
|
5188
|
+
|
5125
5189
|
switch (*parser->current.end) {
|
5126
5190
|
case '~': // $~: match-data
|
5127
5191
|
case '*': // $*: argv
|
@@ -5210,7 +5274,7 @@ lex_keyword(yp_parser_t *parser, const char *value, yp_lex_state_t state, yp_tok
|
|
5210
5274
|
yp_lex_state_t last_state = parser->lex_state;
|
5211
5275
|
|
5212
5276
|
const size_t vlen = strlen(value);
|
5213
|
-
if (parser->current.start + vlen <= parser->end &&
|
5277
|
+
if (parser->current.start + vlen <= parser->end && memcmp(parser->current.start, value, vlen) == 0) {
|
5214
5278
|
if (parser->lex_state & YP_LEX_STATE_FNAME) {
|
5215
5279
|
lex_state_set(parser, YP_LEX_STATE_ENDFN);
|
5216
5280
|
} else {
|
@@ -5376,7 +5440,7 @@ current_token_starts_line(yp_parser_t *parser) {
|
|
5376
5440
|
// this token type.
|
5377
5441
|
//
|
5378
5442
|
static yp_token_type_t
|
5379
|
-
lex_interpolation(yp_parser_t *parser, const
|
5443
|
+
lex_interpolation(yp_parser_t *parser, const uint8_t *pound) {
|
5380
5444
|
// If there is no content following this #, then we're at the end of
|
5381
5445
|
// the string and we can safely return string content.
|
5382
5446
|
if (pound + 1 >= parser->end) {
|
@@ -5397,7 +5461,7 @@ lex_interpolation(yp_parser_t *parser, const char *pound) {
|
|
5397
5461
|
|
5398
5462
|
// If we're looking at a @ and there's another @, then we'll skip past the
|
5399
5463
|
// second @.
|
5400
|
-
const
|
5464
|
+
const uint8_t *variable = pound + 2;
|
5401
5465
|
if (*variable == '@' && pound + 3 < parser->end) variable++;
|
5402
5466
|
|
5403
5467
|
if (char_is_identifier_start(parser, variable)) {
|
@@ -5433,7 +5497,7 @@ lex_interpolation(yp_parser_t *parser, const char *pound) {
|
|
5433
5497
|
// This is the character that we're going to check to see if it is the
|
5434
5498
|
// start of an identifier that would indicate that this is a global
|
5435
5499
|
// variable.
|
5436
|
-
const
|
5500
|
+
const uint8_t *check = pound + 2;
|
5437
5501
|
|
5438
5502
|
if (pound[2] == '-') {
|
5439
5503
|
if (pound + 3 >= parser->end) {
|
@@ -5624,7 +5688,7 @@ parser_comment(yp_parser_t *parser, yp_comment_type_t type) {
|
|
5624
5688
|
static yp_token_type_t
|
5625
5689
|
lex_embdoc(yp_parser_t *parser) {
|
5626
5690
|
// First, lex out the EMBDOC_BEGIN token.
|
5627
|
-
const
|
5691
|
+
const uint8_t *newline = next_newline(parser->current.end, parser->end - parser->current.end);
|
5628
5692
|
|
5629
5693
|
if (newline == NULL) {
|
5630
5694
|
parser->current.end = parser->end;
|
@@ -5647,9 +5711,9 @@ lex_embdoc(yp_parser_t *parser) {
|
|
5647
5711
|
|
5648
5712
|
// If we've hit the end of the embedded documentation then we'll return that
|
5649
5713
|
// token here.
|
5650
|
-
if (
|
5714
|
+
if (memcmp(parser->current.end, "=end", 4) == 0 &&
|
5651
5715
|
(parser->current.end + 4 == parser->end || yp_char_is_whitespace(parser->current.end[4]))) {
|
5652
|
-
const
|
5716
|
+
const uint8_t *newline = next_newline(parser->current.end, parser->end - parser->current.end);
|
5653
5717
|
|
5654
5718
|
if (newline == NULL) {
|
5655
5719
|
parser->current.end = parser->end;
|
@@ -5669,7 +5733,7 @@ lex_embdoc(yp_parser_t *parser) {
|
|
5669
5733
|
|
5670
5734
|
// Otherwise, we'll parse until the end of the line and return a line of
|
5671
5735
|
// embedded documentation.
|
5672
|
-
const
|
5736
|
+
const uint8_t *newline = next_newline(parser->current.end, parser->end - parser->current.end);
|
5673
5737
|
|
5674
5738
|
if (newline == NULL) {
|
5675
5739
|
parser->current.end = parser->end;
|
@@ -5819,7 +5883,7 @@ parser_lex(yp_parser_t *parser) {
|
|
5819
5883
|
LEX(YP_TOKEN_EOF);
|
5820
5884
|
|
5821
5885
|
case '#': { // comments
|
5822
|
-
const
|
5886
|
+
const uint8_t *ending = next_newline(parser->current.end, parser->end - parser->current.end);
|
5823
5887
|
|
5824
5888
|
parser->current.end = ending == NULL ? parser->end : ending + 1;
|
5825
5889
|
parser->current.type = YP_TOKEN_COMMENT;
|
@@ -5888,7 +5952,7 @@ parser_lex(yp_parser_t *parser) {
|
|
5888
5952
|
// (either . or &.) that starts the next line. If there is, then this
|
5889
5953
|
// is going to become an ignored newline and we're going to instead
|
5890
5954
|
// return the call operator.
|
5891
|
-
const
|
5955
|
+
const uint8_t *next_content = parser->next_start == NULL ? parser->current.end : parser->next_start;
|
5892
5956
|
next_content += yp_strspn_inline_whitespace(next_content, parser->end - next_content);
|
5893
5957
|
|
5894
5958
|
if (next_content < parser->end) {
|
@@ -5899,15 +5963,15 @@ parser_lex(yp_parser_t *parser) {
|
|
5899
5963
|
// Otherwise we'll return a regular newline.
|
5900
5964
|
if (next_content[0] == '#') {
|
5901
5965
|
// Here we look for a "." or "&." following a "\n".
|
5902
|
-
const
|
5966
|
+
const uint8_t *following = next_newline(next_content, parser->end - next_content);
|
5903
5967
|
|
5904
|
-
while (following && (following < parser->end)) {
|
5968
|
+
while (following && (following + 1 < parser->end)) {
|
5905
5969
|
following++;
|
5906
5970
|
following += yp_strspn_inline_whitespace(following, parser->end - following);
|
5907
5971
|
|
5908
5972
|
// If this is not followed by a comment, then we can break out
|
5909
5973
|
// of this loop.
|
5910
|
-
if (
|
5974
|
+
if (peek_at(parser, following) != '#') break;
|
5911
5975
|
|
5912
5976
|
// If there is a comment, then we need to find the end of the
|
5913
5977
|
// comment and continue searching from there.
|
@@ -6150,7 +6214,7 @@ parser_lex(yp_parser_t *parser) {
|
|
6150
6214
|
|
6151
6215
|
// = => =~ == === =begin
|
6152
6216
|
case '=':
|
6153
|
-
if (current_token_starts_line(parser) &&
|
6217
|
+
if (current_token_starts_line(parser) && memcmp(peek_string(parser, 5), "begin", 5) == 0 && yp_char_is_whitespace(peek_offset(parser, 5))) {
|
6154
6218
|
yp_token_type_t type = lex_embdoc(parser);
|
6155
6219
|
|
6156
6220
|
if (type == YP_TOKEN_EOF) {
|
@@ -6188,7 +6252,7 @@ parser_lex(yp_parser_t *parser) {
|
|
6188
6252
|
!lex_state_end_p(parser) &&
|
6189
6253
|
(!lex_state_p(parser, YP_LEX_STATE_ARG_ANY) || lex_state_p(parser, YP_LEX_STATE_LABELED) || space_seen)
|
6190
6254
|
) {
|
6191
|
-
const
|
6255
|
+
const uint8_t *end = parser->current.end;
|
6192
6256
|
|
6193
6257
|
yp_heredoc_quote_t quote = YP_HEREDOC_QUOTE_NONE;
|
6194
6258
|
yp_heredoc_indent_t indent = YP_HEREDOC_INDENT_NONE;
|
@@ -6210,7 +6274,7 @@ parser_lex(yp_parser_t *parser) {
|
|
6210
6274
|
quote = YP_HEREDOC_QUOTE_SINGLE;
|
6211
6275
|
}
|
6212
6276
|
|
6213
|
-
const
|
6277
|
+
const uint8_t *ident_start = parser->current.end;
|
6214
6278
|
size_t width = 0;
|
6215
6279
|
|
6216
6280
|
if (parser->current.end >= parser->end) {
|
@@ -6233,7 +6297,7 @@ parser_lex(yp_parser_t *parser) {
|
|
6233
6297
|
}
|
6234
6298
|
|
6235
6299
|
size_t ident_length = (size_t) (parser->current.end - ident_start);
|
6236
|
-
if (quote != YP_HEREDOC_QUOTE_NONE && !match(parser, (
|
6300
|
+
if (quote != YP_HEREDOC_QUOTE_NONE && !match(parser, (uint8_t) quote)) {
|
6237
6301
|
// TODO: handle unterminated heredoc
|
6238
6302
|
}
|
6239
6303
|
|
@@ -6249,7 +6313,7 @@ parser_lex(yp_parser_t *parser) {
|
|
6249
6313
|
});
|
6250
6314
|
|
6251
6315
|
if (parser->heredoc_end == NULL) {
|
6252
|
-
const
|
6316
|
+
const uint8_t *body_start = next_newline(parser->current.end, parser->end - parser->current.end);
|
6253
6317
|
|
6254
6318
|
if (body_start == NULL) {
|
6255
6319
|
// If there is no newline after the heredoc identifier, then
|
@@ -6574,7 +6638,7 @@ parser_lex(yp_parser_t *parser) {
|
|
6574
6638
|
LEX(YP_TOKEN_COLON_COLON);
|
6575
6639
|
}
|
6576
6640
|
|
6577
|
-
if (lex_state_end_p(parser) || yp_char_is_whitespace(
|
6641
|
+
if (lex_state_end_p(parser) || yp_char_is_whitespace(peek(parser)) || peek(parser) == '#') {
|
6578
6642
|
lex_state_set(parser, YP_LEX_STATE_BEG);
|
6579
6643
|
LEX(YP_TOKEN_COLON);
|
6580
6644
|
}
|
@@ -6815,7 +6879,7 @@ parser_lex(yp_parser_t *parser) {
|
|
6815
6879
|
if (
|
6816
6880
|
((parser->current.end - parser->current.start) == 7) &&
|
6817
6881
|
current_token_starts_line(parser) &&
|
6818
|
-
(
|
6882
|
+
(memcmp(parser->current.start, "__END__", 7) == 0) &&
|
6819
6883
|
(parser->current.end == parser->end || match_eol(parser))
|
6820
6884
|
)
|
6821
6885
|
{
|
@@ -6891,8 +6955,8 @@ parser_lex(yp_parser_t *parser) {
|
|
6891
6955
|
// Here we'll get a list of the places where strpbrk should break,
|
6892
6956
|
// and then find the first one.
|
6893
6957
|
yp_lex_mode_t *lex_mode = parser->lex_modes.current;
|
6894
|
-
const
|
6895
|
-
const
|
6958
|
+
const uint8_t *breakpoints = lex_mode->as.list.breakpoints;
|
6959
|
+
const uint8_t *breakpoint = yp_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end);
|
6896
6960
|
|
6897
6961
|
while (breakpoint != NULL) {
|
6898
6962
|
// If we hit a null byte, skip directly past it.
|
@@ -6940,10 +7004,25 @@ parser_lex(yp_parser_t *parser) {
|
|
6940
7004
|
if (*breakpoint == '\\') {
|
6941
7005
|
yp_unescape_type_t unescape_type = lex_mode->as.list.interpolation ? YP_UNESCAPE_ALL : YP_UNESCAPE_MINIMAL;
|
6942
7006
|
size_t difference = yp_unescape_calculate_difference(parser, breakpoint, unescape_type, false);
|
7007
|
+
if (difference == 0) {
|
7008
|
+
// we're at the end of the file
|
7009
|
+
breakpoint = NULL;
|
7010
|
+
continue;
|
7011
|
+
}
|
6943
7012
|
|
6944
|
-
// If the result is an escaped newline
|
6945
|
-
|
6946
|
-
|
7013
|
+
// If the result is an escaped newline ...
|
7014
|
+
if (breakpoint[difference - 1] == '\n') {
|
7015
|
+
if (parser->heredoc_end) {
|
7016
|
+
// ... if we are on the same line as a heredoc, flush the heredoc and
|
7017
|
+
// continue parsing after heredoc_end.
|
7018
|
+
parser->current.end = breakpoint + difference;
|
7019
|
+
parser_flush_heredoc_end(parser);
|
7020
|
+
LEX(YP_TOKEN_STRING_CONTENT);
|
7021
|
+
} else {
|
7022
|
+
// ... else track the newline.
|
7023
|
+
yp_newline_list_append(&parser->newline_list, breakpoint + difference - 1);
|
7024
|
+
}
|
7025
|
+
}
|
6947
7026
|
|
6948
7027
|
breakpoint = yp_strpbrk(parser, breakpoint + difference, breakpoints, parser->end - (breakpoint + difference));
|
6949
7028
|
continue;
|
@@ -6998,8 +7077,8 @@ parser_lex(yp_parser_t *parser) {
|
|
6998
7077
|
// These are the places where we need to split up the content of the
|
6999
7078
|
// regular expression. We'll use strpbrk to find the first of these
|
7000
7079
|
// characters.
|
7001
|
-
const
|
7002
|
-
const
|
7080
|
+
const uint8_t *breakpoints = lex_mode->as.regexp.breakpoints;
|
7081
|
+
const uint8_t *breakpoint = yp_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end);
|
7003
7082
|
|
7004
7083
|
while (breakpoint != NULL) {
|
7005
7084
|
// If we hit a null byte, skip directly past it.
|
@@ -7062,9 +7141,14 @@ parser_lex(yp_parser_t *parser) {
|
|
7062
7141
|
// and find the next breakpoint.
|
7063
7142
|
if (*breakpoint == '\\') {
|
7064
7143
|
size_t difference = yp_unescape_calculate_difference(parser, breakpoint, YP_UNESCAPE_ALL, false);
|
7144
|
+
if (difference == 0) {
|
7145
|
+
// we're at the end of the file
|
7146
|
+
breakpoint = NULL;
|
7147
|
+
continue;
|
7148
|
+
}
|
7065
7149
|
|
7066
7150
|
// If the result is an escaped newline ...
|
7067
|
-
if (
|
7151
|
+
if (breakpoint[difference - 1] == '\n') {
|
7068
7152
|
if (parser->heredoc_end) {
|
7069
7153
|
// ... if we are on the same line as a heredoc, flush the heredoc and
|
7070
7154
|
// continue parsing after heredoc_end.
|
@@ -7126,8 +7210,8 @@ parser_lex(yp_parser_t *parser) {
|
|
7126
7210
|
|
7127
7211
|
// These are the places where we need to split up the content of the
|
7128
7212
|
// string. We'll use strpbrk to find the first of these characters.
|
7129
|
-
const
|
7130
|
-
const
|
7213
|
+
const uint8_t *breakpoints = parser->lex_modes.current->as.string.breakpoints;
|
7214
|
+
const uint8_t *breakpoint = yp_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end);
|
7131
7215
|
|
7132
7216
|
while (breakpoint != NULL) {
|
7133
7217
|
// If we hit the incrementor, then we'll increment then nesting and
|
@@ -7212,9 +7296,14 @@ parser_lex(yp_parser_t *parser) {
|
|
7212
7296
|
// find the next breakpoint.
|
7213
7297
|
yp_unescape_type_t unescape_type = parser->lex_modes.current->as.string.interpolation ? YP_UNESCAPE_ALL : YP_UNESCAPE_MINIMAL;
|
7214
7298
|
size_t difference = yp_unescape_calculate_difference(parser, breakpoint, unescape_type, false);
|
7299
|
+
if (difference == 0) {
|
7300
|
+
// we're at the end of the file
|
7301
|
+
breakpoint = NULL;
|
7302
|
+
break;
|
7303
|
+
}
|
7215
7304
|
|
7216
7305
|
// If the result is an escaped newline ...
|
7217
|
-
if (
|
7306
|
+
if (breakpoint[difference - 1] == '\n') {
|
7218
7307
|
if (parser->heredoc_end) {
|
7219
7308
|
// ... if we are on the same line as a heredoc, flush the heredoc and
|
7220
7309
|
// continue parsing after heredoc_end.
|
@@ -7272,18 +7361,18 @@ parser_lex(yp_parser_t *parser) {
|
|
7272
7361
|
|
7273
7362
|
// Now let's grab the information about the identifier off of the current
|
7274
7363
|
// lex mode.
|
7275
|
-
const
|
7364
|
+
const uint8_t *ident_start = parser->lex_modes.current->as.heredoc.ident_start;
|
7276
7365
|
size_t ident_length = parser->lex_modes.current->as.heredoc.ident_length;
|
7277
7366
|
|
7278
7367
|
// If we are immediately following a newline and we have hit the
|
7279
7368
|
// terminator, then we need to return the ending of the heredoc.
|
7280
7369
|
if (current_token_starts_line(parser)) {
|
7281
|
-
const
|
7370
|
+
const uint8_t *start = parser->current.start;
|
7282
7371
|
if (parser->lex_modes.current->as.heredoc.indent != YP_HEREDOC_INDENT_NONE) {
|
7283
7372
|
start += yp_strspn_inline_whitespace(start, parser->end - start);
|
7284
7373
|
}
|
7285
7374
|
|
7286
|
-
if ((start + ident_length <= parser->end) && (
|
7375
|
+
if ((start + ident_length <= parser->end) && (memcmp(start, ident_start, ident_length) == 0)) {
|
7287
7376
|
bool matched = true;
|
7288
7377
|
bool at_end = false;
|
7289
7378
|
|
@@ -7318,14 +7407,14 @@ parser_lex(yp_parser_t *parser) {
|
|
7318
7407
|
// Otherwise we'll be parsing string content. These are the places where
|
7319
7408
|
// we need to split up the content of the heredoc. We'll use strpbrk to
|
7320
7409
|
// find the first of these characters.
|
7321
|
-
|
7410
|
+
uint8_t breakpoints[] = "\n\\#";
|
7322
7411
|
|
7323
7412
|
yp_heredoc_quote_t quote = parser->lex_modes.current->as.heredoc.quote;
|
7324
7413
|
if (quote == YP_HEREDOC_QUOTE_SINGLE) {
|
7325
7414
|
breakpoints[2] = '\0';
|
7326
7415
|
}
|
7327
7416
|
|
7328
|
-
const
|
7417
|
+
const uint8_t *breakpoint = yp_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end);
|
7329
7418
|
|
7330
7419
|
while (breakpoint != NULL) {
|
7331
7420
|
switch (*breakpoint) {
|
@@ -7342,7 +7431,7 @@ parser_lex(yp_parser_t *parser) {
|
|
7342
7431
|
|
7343
7432
|
yp_newline_list_append(&parser->newline_list, breakpoint);
|
7344
7433
|
|
7345
|
-
const
|
7434
|
+
const uint8_t *start = breakpoint + 1;
|
7346
7435
|
if (parser->lex_modes.current->as.heredoc.indent != YP_HEREDOC_INDENT_NONE) {
|
7347
7436
|
start += yp_strspn_inline_whitespace(start, parser->end - start);
|
7348
7437
|
}
|
@@ -7353,7 +7442,7 @@ parser_lex(yp_parser_t *parser) {
|
|
7353
7442
|
// again and return the end of the heredoc.
|
7354
7443
|
if (
|
7355
7444
|
(start + ident_length <= parser->end) &&
|
7356
|
-
(
|
7445
|
+
(memcmp(start, ident_start, ident_length) == 0)
|
7357
7446
|
) {
|
7358
7447
|
// Heredoc terminators must be followed by a newline, CRLF, or EOF to be valid.
|
7359
7448
|
if (
|
@@ -7383,6 +7472,11 @@ parser_lex(yp_parser_t *parser) {
|
|
7383
7472
|
} else {
|
7384
7473
|
yp_unescape_type_t unescape_type = (quote == YP_HEREDOC_QUOTE_SINGLE) ? YP_UNESCAPE_MINIMAL : YP_UNESCAPE_ALL;
|
7385
7474
|
size_t difference = yp_unescape_calculate_difference(parser, breakpoint, unescape_type, false);
|
7475
|
+
if (difference == 0) {
|
7476
|
+
// we're at the end of the file
|
7477
|
+
breakpoint = NULL;
|
7478
|
+
break;
|
7479
|
+
}
|
7386
7480
|
|
7387
7481
|
yp_newline_list_check_append(&parser->newline_list, breakpoint + difference - 1);
|
7388
7482
|
|
@@ -7453,6 +7547,17 @@ yp_symbol_node_create_and_unescape(yp_parser_t *parser, const yp_token_t *openin
|
|
7453
7547
|
return node;
|
7454
7548
|
}
|
7455
7549
|
|
7550
|
+
static yp_string_node_t *
|
7551
|
+
yp_char_literal_node_create_and_unescape(yp_parser_t *parser, const yp_token_t *opening, const yp_token_t *content, const yp_token_t *closing, yp_unescape_type_t unescape_type) {
|
7552
|
+
yp_string_node_t *node = yp_string_node_create(parser, opening, content, closing);
|
7553
|
+
|
7554
|
+
assert((content->end - content->start) >= 0);
|
7555
|
+
yp_string_shared_init(&node->unescaped, content->start, content->end);
|
7556
|
+
|
7557
|
+
yp_unescape_manipulate_char_literal(parser, &node->unescaped, unescape_type);
|
7558
|
+
return node;
|
7559
|
+
}
|
7560
|
+
|
7456
7561
|
static yp_string_node_t *
|
7457
7562
|
yp_string_node_create_and_unescape(yp_parser_t *parser, const yp_token_t *opening, const yp_token_t *content, const yp_token_t *closing, yp_unescape_type_t unescape_type) {
|
7458
7563
|
yp_string_node_t *node = yp_string_node_create(parser, opening, content, closing);
|
@@ -7918,10 +8023,11 @@ parse_target(yp_parser_t *parser, yp_node_t *target) {
|
|
7918
8023
|
// the previous method name in, and append an =.
|
7919
8024
|
size_t length = yp_string_length(&call->name);
|
7920
8025
|
|
7921
|
-
|
8026
|
+
uint8_t *name = calloc(length + 1, sizeof(uint8_t));
|
7922
8027
|
if (name == NULL) return NULL;
|
7923
8028
|
|
7924
|
-
|
8029
|
+
memcpy(name, yp_string_source(&call->name), length);
|
8030
|
+
name[length] = '=';
|
7925
8031
|
|
7926
8032
|
// Now switch the name to the new string.
|
7927
8033
|
yp_string_free(&call->name);
|
@@ -7962,7 +8068,7 @@ parse_write(yp_parser_t *parser, yp_node_t *target, yp_token_t *operator, yp_nod
|
|
7962
8068
|
case YP_NODE_MISSING_NODE:
|
7963
8069
|
return target;
|
7964
8070
|
case YP_NODE_CLASS_VARIABLE_READ_NODE: {
|
7965
|
-
yp_class_variable_write_node_t *write_node =
|
8071
|
+
yp_class_variable_write_node_t *write_node = yp_class_variable_write_node_create(parser, (yp_class_variable_read_node_t *) target, operator, value);
|
7966
8072
|
yp_node_destroy(parser, target);
|
7967
8073
|
return (yp_node_t *) write_node;
|
7968
8074
|
}
|
@@ -7987,7 +8093,7 @@ parse_write(yp_parser_t *parser, yp_node_t *target, yp_token_t *operator, yp_nod
|
|
7987
8093
|
case YP_NODE_LOCAL_VARIABLE_READ_NODE: {
|
7988
8094
|
yp_local_variable_read_node_t *local_read = (yp_local_variable_read_node_t *) target;
|
7989
8095
|
|
7990
|
-
yp_constant_id_t constant_id = local_read->
|
8096
|
+
yp_constant_id_t constant_id = local_read->name;
|
7991
8097
|
uint32_t depth = local_read->depth;
|
7992
8098
|
|
7993
8099
|
yp_location_t name_loc = target->location;
|
@@ -8075,10 +8181,11 @@ parse_write(yp_parser_t *parser, yp_node_t *target, yp_token_t *operator, yp_nod
|
|
8075
8181
|
// the previous method name in, and append an =.
|
8076
8182
|
size_t length = yp_string_length(&call->name);
|
8077
8183
|
|
8078
|
-
|
8184
|
+
uint8_t *name = calloc(length + 1, sizeof(uint8_t));
|
8079
8185
|
if (name == NULL) return NULL;
|
8080
8186
|
|
8081
|
-
|
8187
|
+
memcpy(name, yp_string_source(&call->name), length);
|
8188
|
+
name[length] = '=';
|
8082
8189
|
|
8083
8190
|
// Now switch the name to the new string.
|
8084
8191
|
yp_string_free(&call->name);
|
@@ -9043,10 +9150,12 @@ parse_rescues(yp_parser_t *parser, yp_begin_node_t *parent_node) {
|
|
9043
9150
|
}
|
9044
9151
|
|
9045
9152
|
if (!match_any_type_p(parser, 3, YP_TOKEN_KEYWORD_ELSE, YP_TOKEN_KEYWORD_ENSURE, YP_TOKEN_KEYWORD_END)) {
|
9153
|
+
yp_accepts_block_stack_push(parser, true);
|
9046
9154
|
yp_statements_node_t *statements = parse_statements(parser, YP_CONTEXT_RESCUE);
|
9047
9155
|
if (statements) {
|
9048
9156
|
yp_rescue_node_statements_set(rescue, statements);
|
9049
9157
|
}
|
9158
|
+
yp_accepts_block_stack_pop(parser);
|
9050
9159
|
accept_any(parser, 2, YP_TOKEN_NEWLINE, YP_TOKEN_SEMICOLON);
|
9051
9160
|
}
|
9052
9161
|
|
@@ -9063,7 +9172,7 @@ parse_rescues(yp_parser_t *parser, yp_begin_node_t *parent_node) {
|
|
9063
9172
|
// since we won't know the end until we've found all consequent
|
9064
9173
|
// clauses. This sets the end location on all rescues once we know it
|
9065
9174
|
if (current) {
|
9066
|
-
const
|
9175
|
+
const uint8_t *end_to_set = current->base.location.end;
|
9067
9176
|
current = parent_node->rescue_clause;
|
9068
9177
|
while (current) {
|
9069
9178
|
current->base.location.end = end_to_set;
|
@@ -9077,7 +9186,9 @@ parse_rescues(yp_parser_t *parser, yp_begin_node_t *parent_node) {
|
|
9077
9186
|
|
9078
9187
|
yp_statements_node_t *else_statements = NULL;
|
9079
9188
|
if (!match_any_type_p(parser, 2, YP_TOKEN_KEYWORD_END, YP_TOKEN_KEYWORD_ENSURE)) {
|
9189
|
+
yp_accepts_block_stack_push(parser, true);
|
9080
9190
|
else_statements = parse_statements(parser, YP_CONTEXT_RESCUE_ELSE);
|
9191
|
+
yp_accepts_block_stack_pop(parser);
|
9081
9192
|
accept_any(parser, 2, YP_TOKEN_NEWLINE, YP_TOKEN_SEMICOLON);
|
9082
9193
|
}
|
9083
9194
|
|
@@ -9091,7 +9202,9 @@ parse_rescues(yp_parser_t *parser, yp_begin_node_t *parent_node) {
|
|
9091
9202
|
|
9092
9203
|
yp_statements_node_t *ensure_statements = NULL;
|
9093
9204
|
if (!match_type_p(parser, YP_TOKEN_KEYWORD_END)) {
|
9205
|
+
yp_accepts_block_stack_push(parser, true);
|
9094
9206
|
ensure_statements = parse_statements(parser, YP_CONTEXT_ENSURE);
|
9207
|
+
yp_accepts_block_stack_pop(parser);
|
9095
9208
|
accept_any(parser, 2, YP_TOKEN_NEWLINE, YP_TOKEN_SEMICOLON);
|
9096
9209
|
}
|
9097
9210
|
|
@@ -9116,7 +9229,7 @@ parse_rescues_as_begin(yp_parser_t *parser, yp_statements_node_t *statements) {
|
|
9116
9229
|
// All nodes within a begin node are optional, so we look
|
9117
9230
|
// for the earliest possible node that we can use to set
|
9118
9231
|
// the BeginNode's start location
|
9119
|
-
const
|
9232
|
+
const uint8_t *start = begin_node->base.location.start;
|
9120
9233
|
if (begin_node->statements) {
|
9121
9234
|
start = begin_node->statements->base.location.start;
|
9122
9235
|
} else if (begin_node->rescue_clause) {
|
@@ -9201,7 +9314,9 @@ parse_block(yp_parser_t *parser) {
|
|
9201
9314
|
} else {
|
9202
9315
|
if (!match_type_p(parser, YP_TOKEN_KEYWORD_END)) {
|
9203
9316
|
if (!match_any_type_p(parser, 3, YP_TOKEN_KEYWORD_RESCUE, YP_TOKEN_KEYWORD_ELSE, YP_TOKEN_KEYWORD_ENSURE)) {
|
9317
|
+
yp_accepts_block_stack_push(parser, true);
|
9204
9318
|
statements = (yp_node_t *) parse_statements(parser, YP_CONTEXT_BLOCK_KEYWORDS);
|
9319
|
+
yp_accepts_block_stack_pop(parser);
|
9205
9320
|
}
|
9206
9321
|
|
9207
9322
|
if (match_any_type_p(parser, 2, YP_TOKEN_KEYWORD_RESCUE, YP_TOKEN_KEYWORD_ENSURE)) {
|
@@ -9782,14 +9897,14 @@ parse_heredoc_common_whitespace(yp_parser_t *parser, yp_node_list_t *nodes) {
|
|
9782
9897
|
yp_node_t *node = nodes->nodes[index];
|
9783
9898
|
|
9784
9899
|
if (!YP_NODE_TYPE_P(node, YP_NODE_STRING_NODE)) continue;
|
9785
|
-
yp_location_t *content_loc = &((yp_string_node_t *) node)->content_loc;
|
9900
|
+
const yp_location_t *content_loc = &((yp_string_node_t *) node)->content_loc;
|
9786
9901
|
|
9787
9902
|
// If the previous node wasn't a string node, we don't want to trim
|
9788
9903
|
// whitespace. This could happen after an interpolated expression or
|
9789
9904
|
// variable.
|
9790
9905
|
if (index == 0 || YP_NODE_TYPE_P(nodes->nodes[index - 1], YP_NODE_STRING_NODE)) {
|
9791
9906
|
int cur_whitespace;
|
9792
|
-
const
|
9907
|
+
const uint8_t *cur_char = content_loc->start;
|
9793
9908
|
|
9794
9909
|
while (cur_char && cur_char < content_loc->end) {
|
9795
9910
|
// Any empty newlines aren't included in the minimum whitespace
|
@@ -9880,15 +9995,15 @@ parse_heredoc_dedent(yp_parser_t *parser, yp_node_t *node, yp_heredoc_quote_t qu
|
|
9880
9995
|
// destination to move bytes into. We'll also use it for bounds checking
|
9881
9996
|
// since we don't require that these strings be null terminated.
|
9882
9997
|
size_t dest_length = yp_string_length(string);
|
9883
|
-
|
9998
|
+
uint8_t *source_start = (uint8_t *) string->source;
|
9884
9999
|
|
9885
|
-
const
|
9886
|
-
const
|
10000
|
+
const uint8_t *source_cursor = source_start;
|
10001
|
+
const uint8_t *source_end = source_cursor + dest_length;
|
9887
10002
|
|
9888
10003
|
// We're going to move bytes backward in the string when we get leading
|
9889
10004
|
// whitespace, so we'll maintain a pointer to the current position in the
|
9890
10005
|
// string that we're writing to.
|
9891
|
-
|
10006
|
+
uint8_t *dest_cursor = source_start;
|
9892
10007
|
|
9893
10008
|
while (source_cursor < source_end) {
|
9894
10009
|
// If we need to dedent the next element within the heredoc or the next
|
@@ -9915,7 +10030,7 @@ parse_heredoc_dedent(yp_parser_t *parser, yp_node_t *node, yp_heredoc_quote_t qu
|
|
9915
10030
|
|
9916
10031
|
// At this point we have dedented all that we need to, so we need to find
|
9917
10032
|
// the next newline.
|
9918
|
-
const
|
10033
|
+
const uint8_t *breakpoint = next_newline(source_cursor, source_end - source_cursor);
|
9919
10034
|
|
9920
10035
|
if (breakpoint == NULL) {
|
9921
10036
|
// If there isn't another newline, then we can just move the rest of the
|
@@ -10127,7 +10242,7 @@ parse_pattern_hash(yp_parser_t *parser, yp_node_t *first_assoc) {
|
|
10127
10242
|
yp_node_t *key = ((yp_assoc_node_t *) first_assoc)->key;
|
10128
10243
|
|
10129
10244
|
if (YP_NODE_TYPE_P(key, YP_NODE_SYMBOL_NODE)) {
|
10130
|
-
yp_location_t *value_loc = &((yp_symbol_node_t *) key)->value_loc;
|
10245
|
+
const yp_location_t *value_loc = &((yp_symbol_node_t *) key)->value_loc;
|
10131
10246
|
yp_parser_local_add_location(parser, value_loc->start, value_loc->end);
|
10132
10247
|
}
|
10133
10248
|
}
|
@@ -10155,7 +10270,7 @@ parse_pattern_hash(yp_parser_t *parser, yp_node_t *first_assoc) {
|
|
10155
10270
|
if (!match_any_type_p(parser, 7, YP_TOKEN_COMMA, YP_TOKEN_KEYWORD_THEN, YP_TOKEN_BRACE_RIGHT, YP_TOKEN_BRACKET_RIGHT, YP_TOKEN_PARENTHESIS_RIGHT, YP_TOKEN_NEWLINE, YP_TOKEN_SEMICOLON)) {
|
10156
10271
|
value = parse_pattern(parser, false, "Expected a pattern expression after the key.");
|
10157
10272
|
} else {
|
10158
|
-
yp_location_t *value_loc = &((yp_symbol_node_t *) key)->value_loc;
|
10273
|
+
const yp_location_t *value_loc = &((yp_symbol_node_t *) key)->value_loc;
|
10159
10274
|
yp_parser_local_add_location(parser, value_loc->start, value_loc->end);
|
10160
10275
|
}
|
10161
10276
|
|
@@ -10817,7 +10932,7 @@ parse_expression_prefix(yp_parser_t *parser, yp_binding_power_t binding_power) {
|
|
10817
10932
|
|
10818
10933
|
yp_token_t closing = not_provided(parser);
|
10819
10934
|
|
10820
|
-
return (yp_node_t *)
|
10935
|
+
return (yp_node_t *) yp_char_literal_node_create_and_unescape(parser, &opening, &content, &closing, YP_UNESCAPE_ALL);
|
10821
10936
|
}
|
10822
10937
|
case YP_TOKEN_CLASS_VARIABLE: {
|
10823
10938
|
parser_lex(parser);
|
@@ -11362,7 +11477,9 @@ parse_expression_prefix(yp_parser_t *parser, yp_binding_power_t binding_power) {
|
|
11362
11477
|
|
11363
11478
|
yp_node_t *statements = NULL;
|
11364
11479
|
if (!match_any_type_p(parser, 3, YP_TOKEN_KEYWORD_RESCUE, YP_TOKEN_KEYWORD_ENSURE, YP_TOKEN_KEYWORD_END)) {
|
11480
|
+
yp_accepts_block_stack_push(parser, true);
|
11365
11481
|
statements = (yp_node_t *) parse_statements(parser, YP_CONTEXT_SCLASS);
|
11482
|
+
yp_accepts_block_stack_pop(parser);
|
11366
11483
|
}
|
11367
11484
|
|
11368
11485
|
if (match_any_type_p(parser, 2, YP_TOKEN_KEYWORD_RESCUE, YP_TOKEN_KEYWORD_ENSURE)) {
|
@@ -11643,7 +11760,9 @@ parse_expression_prefix(yp_parser_t *parser, yp_binding_power_t binding_power) {
|
|
11643
11760
|
yp_do_loop_stack_push(parser, false);
|
11644
11761
|
|
11645
11762
|
if (!match_any_type_p(parser, 3, YP_TOKEN_KEYWORD_RESCUE, YP_TOKEN_KEYWORD_ENSURE, YP_TOKEN_KEYWORD_END)) {
|
11763
|
+
yp_accepts_block_stack_push(parser, true);
|
11646
11764
|
statements = (yp_node_t *) parse_statements(parser, YP_CONTEXT_DEF);
|
11765
|
+
yp_accepts_block_stack_pop(parser);
|
11647
11766
|
}
|
11648
11767
|
|
11649
11768
|
if (match_any_type_p(parser, 2, YP_TOKEN_KEYWORD_RESCUE, YP_TOKEN_KEYWORD_ENSURE)) {
|
@@ -11933,14 +12052,9 @@ parse_expression_prefix(yp_parser_t *parser, yp_binding_power_t binding_power) {
|
|
11933
12052
|
yp_array_node_t *array = yp_array_node_create(parser, &parser->previous);
|
11934
12053
|
|
11935
12054
|
while (!match_any_type_p(parser, 2, YP_TOKEN_STRING_END, YP_TOKEN_EOF)) {
|
11936
|
-
|
11937
|
-
accept(parser, YP_TOKEN_WORDS_SEP);
|
11938
|
-
} else {
|
11939
|
-
expect(parser, YP_TOKEN_WORDS_SEP, "Expected a separator for the symbols in a `%i` list.");
|
11940
|
-
if (match_type_p(parser, YP_TOKEN_STRING_END)) break;
|
11941
|
-
}
|
11942
|
-
|
12055
|
+
accept(parser, YP_TOKEN_WORDS_SEP);
|
11943
12056
|
if (match_type_p(parser, YP_TOKEN_STRING_END)) break;
|
12057
|
+
|
11944
12058
|
expect(parser, YP_TOKEN_STRING_CONTENT, "Expected a symbol in a `%i` list.");
|
11945
12059
|
|
11946
12060
|
yp_token_t opening = not_provided(parser);
|
@@ -11995,6 +12109,19 @@ parse_expression_prefix(yp_parser_t *parser, yp_binding_power_t binding_power) {
|
|
11995
12109
|
// to the list of child nodes.
|
11996
12110
|
yp_node_t *part = parse_string_part(parser);
|
11997
12111
|
yp_interpolated_symbol_node_append((yp_interpolated_symbol_node_t *) current, part);
|
12112
|
+
} else if (YP_NODE_TYPE_P(current, YP_NODE_SYMBOL_NODE)) {
|
12113
|
+
// If we hit string content and the current node is a string node,
|
12114
|
+
// then we need to convert the current node into an interpolated
|
12115
|
+
// string and add the string content to the list of child nodes.
|
12116
|
+
yp_token_t opening = not_provided(parser);
|
12117
|
+
yp_token_t closing = not_provided(parser);
|
12118
|
+
yp_interpolated_symbol_node_t *interpolated =
|
12119
|
+
yp_interpolated_symbol_node_create(parser, &opening, NULL, &closing);
|
12120
|
+
yp_interpolated_symbol_node_append(interpolated, current);
|
12121
|
+
|
12122
|
+
yp_node_t *part = parse_string_part(parser);
|
12123
|
+
yp_interpolated_symbol_node_append(interpolated, part);
|
12124
|
+
current = (yp_node_t *) interpolated;
|
11998
12125
|
} else {
|
11999
12126
|
assert(false && "unreachable");
|
12000
12127
|
}
|
@@ -12097,12 +12224,9 @@ parse_expression_prefix(yp_parser_t *parser, yp_binding_power_t binding_power) {
|
|
12097
12224
|
accept(parser, YP_TOKEN_WORDS_SEP);
|
12098
12225
|
|
12099
12226
|
while (!match_any_type_p(parser, 2, YP_TOKEN_STRING_END, YP_TOKEN_EOF)) {
|
12100
|
-
|
12101
|
-
|
12102
|
-
|
12103
|
-
expect(parser, YP_TOKEN_WORDS_SEP, "Expected a separator for the strings in a `%w` list.");
|
12104
|
-
if (match_type_p(parser, YP_TOKEN_STRING_END)) break;
|
12105
|
-
}
|
12227
|
+
accept(parser, YP_TOKEN_WORDS_SEP);
|
12228
|
+
if (match_type_p(parser, YP_TOKEN_STRING_END)) break;
|
12229
|
+
|
12106
12230
|
expect(parser, YP_TOKEN_STRING_CONTENT, "Expected a string in a `%w` list.");
|
12107
12231
|
|
12108
12232
|
yp_token_t opening = not_provided(parser);
|
@@ -12152,6 +12276,19 @@ parse_expression_prefix(yp_parser_t *parser, yp_binding_power_t binding_power) {
|
|
12152
12276
|
// to the list of child nodes.
|
12153
12277
|
yp_node_t *part = parse_string_part(parser);
|
12154
12278
|
yp_interpolated_string_node_append((yp_interpolated_string_node_t *) current, part);
|
12279
|
+
} else if (YP_NODE_TYPE_P(current, YP_NODE_STRING_NODE)) {
|
12280
|
+
// If we hit string content and the current node is a string node,
|
12281
|
+
// then we need to convert the current node into an interpolated
|
12282
|
+
// string and add the string content to the list of child nodes.
|
12283
|
+
yp_token_t opening = not_provided(parser);
|
12284
|
+
yp_token_t closing = not_provided(parser);
|
12285
|
+
yp_interpolated_string_node_t *interpolated =
|
12286
|
+
yp_interpolated_string_node_create(parser, &opening, NULL, &closing);
|
12287
|
+
yp_interpolated_string_node_append(interpolated, current);
|
12288
|
+
|
12289
|
+
yp_node_t *part = parse_string_part(parser);
|
12290
|
+
yp_interpolated_string_node_append(interpolated, part);
|
12291
|
+
current = (yp_node_t *) interpolated;
|
12155
12292
|
} else {
|
12156
12293
|
assert(false && "unreachable");
|
12157
12294
|
}
|
@@ -12482,7 +12619,9 @@ parse_expression_prefix(yp_parser_t *parser, yp_binding_power_t binding_power) {
|
|
12482
12619
|
opening = parser->previous;
|
12483
12620
|
|
12484
12621
|
if (!match_any_type_p(parser, 3, YP_TOKEN_KEYWORD_END, YP_TOKEN_KEYWORD_RESCUE, YP_TOKEN_KEYWORD_ENSURE)) {
|
12622
|
+
yp_accepts_block_stack_push(parser, true);
|
12485
12623
|
body = (yp_node_t *) parse_statements(parser, YP_CONTEXT_LAMBDA_DO_END);
|
12624
|
+
yp_accepts_block_stack_pop(parser);
|
12486
12625
|
}
|
12487
12626
|
|
12488
12627
|
if (match_any_type_p(parser, 2, YP_TOKEN_KEYWORD_RESCUE, YP_TOKEN_KEYWORD_ENSURE)) {
|
@@ -12759,7 +12898,7 @@ parse_expression_infix(yp_parser_t *parser, yp_node_t *node, yp_binding_power_t
|
|
12759
12898
|
parser_lex(parser);
|
12760
12899
|
|
12761
12900
|
yp_node_t *value = parse_expression(parser, binding_power, "Expected a value after &&=");
|
12762
|
-
yp_node_t *result = (yp_node_t *) yp_class_variable_and_write_node_create(parser, node, &token, value);
|
12901
|
+
yp_node_t *result = (yp_node_t *) yp_class_variable_and_write_node_create(parser, (yp_class_variable_read_node_t *) node, &token, value);
|
12763
12902
|
|
12764
12903
|
yp_node_destroy(parser, node);
|
12765
12904
|
return result;
|
@@ -12783,7 +12922,7 @@ parse_expression_infix(yp_parser_t *parser, yp_node_t *node, yp_binding_power_t
|
|
12783
12922
|
parser_lex(parser);
|
12784
12923
|
|
12785
12924
|
yp_node_t *value = parse_expression(parser, binding_power, "Expected a value after &&=");
|
12786
|
-
yp_node_t *result = (yp_node_t *) yp_instance_variable_and_write_node_create(parser, node, &token, value);
|
12925
|
+
yp_node_t *result = (yp_node_t *) yp_instance_variable_and_write_node_create(parser, (yp_instance_variable_read_node_t *) node, &token, value);
|
12787
12926
|
|
12788
12927
|
yp_node_destroy(parser, node);
|
12789
12928
|
return result;
|
@@ -12793,7 +12932,7 @@ parse_expression_infix(yp_parser_t *parser, yp_node_t *node, yp_binding_power_t
|
|
12793
12932
|
parser_lex(parser);
|
12794
12933
|
|
12795
12934
|
yp_node_t *value = parse_expression(parser, binding_power, "Expected a value after &&=");
|
12796
|
-
yp_node_t *result = (yp_node_t *) yp_local_variable_and_write_node_create(parser, node, &token, value, cast->
|
12935
|
+
yp_node_t *result = (yp_node_t *) yp_local_variable_and_write_node_create(parser, node, &token, value, cast->name, cast->depth);
|
12797
12936
|
|
12798
12937
|
yp_node_destroy(parser, node);
|
12799
12938
|
return result;
|
@@ -12860,7 +12999,7 @@ parse_expression_infix(yp_parser_t *parser, yp_node_t *node, yp_binding_power_t
|
|
12860
12999
|
parser_lex(parser);
|
12861
13000
|
|
12862
13001
|
yp_node_t *value = parse_expression(parser, binding_power, "Expected a value after ||=");
|
12863
|
-
yp_node_t *result = (yp_node_t *) yp_class_variable_or_write_node_create(parser, node, &token, value);
|
13002
|
+
yp_node_t *result = (yp_node_t *) yp_class_variable_or_write_node_create(parser, (yp_class_variable_read_node_t *) node, &token, value);
|
12864
13003
|
|
12865
13004
|
yp_node_destroy(parser, node);
|
12866
13005
|
return result;
|
@@ -12884,7 +13023,7 @@ parse_expression_infix(yp_parser_t *parser, yp_node_t *node, yp_binding_power_t
|
|
12884
13023
|
parser_lex(parser);
|
12885
13024
|
|
12886
13025
|
yp_node_t *value = parse_expression(parser, binding_power, "Expected a value after ||=");
|
12887
|
-
yp_node_t *result = (yp_node_t *) yp_instance_variable_or_write_node_create(parser, node, &token, value);
|
13026
|
+
yp_node_t *result = (yp_node_t *) yp_instance_variable_or_write_node_create(parser, (yp_instance_variable_read_node_t *) node, &token, value);
|
12888
13027
|
|
12889
13028
|
yp_node_destroy(parser, node);
|
12890
13029
|
return result;
|
@@ -12894,7 +13033,7 @@ parse_expression_infix(yp_parser_t *parser, yp_node_t *node, yp_binding_power_t
|
|
12894
13033
|
parser_lex(parser);
|
12895
13034
|
|
12896
13035
|
yp_node_t *value = parse_expression(parser, binding_power, "Expected a value after ||=");
|
12897
|
-
yp_node_t *result = (yp_node_t *) yp_local_variable_or_write_node_create(parser, node, &token, value, cast->
|
13036
|
+
yp_node_t *result = (yp_node_t *) yp_local_variable_or_write_node_create(parser, node, &token, value, cast->name, cast->depth);
|
12898
13037
|
|
12899
13038
|
yp_node_destroy(parser, node);
|
12900
13039
|
return result;
|
@@ -12971,7 +13110,7 @@ parse_expression_infix(yp_parser_t *parser, yp_node_t *node, yp_binding_power_t
|
|
12971
13110
|
parser_lex(parser);
|
12972
13111
|
|
12973
13112
|
yp_node_t *value = parse_expression(parser, binding_power, "Expected a value after the operator.");
|
12974
|
-
yp_node_t *result = (yp_node_t *) yp_class_variable_operator_write_node_create(parser, node, &token, value);
|
13113
|
+
yp_node_t *result = (yp_node_t *) yp_class_variable_operator_write_node_create(parser, (yp_class_variable_read_node_t *) node, &token, value);
|
12975
13114
|
|
12976
13115
|
yp_node_destroy(parser, node);
|
12977
13116
|
return result;
|
@@ -12995,7 +13134,7 @@ parse_expression_infix(yp_parser_t *parser, yp_node_t *node, yp_binding_power_t
|
|
12995
13134
|
parser_lex(parser);
|
12996
13135
|
|
12997
13136
|
yp_node_t *value = parse_expression(parser, binding_power, "Expected a value after the operator.");
|
12998
|
-
yp_node_t *result = (yp_node_t *) yp_instance_variable_operator_write_node_create(parser, node, &token, value);
|
13137
|
+
yp_node_t *result = (yp_node_t *) yp_instance_variable_operator_write_node_create(parser, (yp_instance_variable_read_node_t *) node, &token, value);
|
12999
13138
|
|
13000
13139
|
yp_node_destroy(parser, node);
|
13001
13140
|
return result;
|
@@ -13005,7 +13144,7 @@ parse_expression_infix(yp_parser_t *parser, yp_node_t *node, yp_binding_power_t
|
|
13005
13144
|
parser_lex(parser);
|
13006
13145
|
|
13007
13146
|
yp_node_t *value = parse_expression(parser, binding_power, "Expected a value after the operator.");
|
13008
|
-
yp_node_t *result = (yp_node_t *) yp_local_variable_operator_write_node_create(parser, node, &token, value, cast->
|
13147
|
+
yp_node_t *result = (yp_node_t *) yp_local_variable_operator_write_node_create(parser, node, &token, value, cast->name, cast->depth);
|
13009
13148
|
|
13010
13149
|
yp_node_destroy(parser, node);
|
13011
13150
|
return result;
|
@@ -13083,7 +13222,7 @@ parse_expression_infix(yp_parser_t *parser, yp_node_t *node, yp_binding_power_t
|
|
13083
13222
|
yp_string_list_t named_captures;
|
13084
13223
|
yp_string_list_init(&named_captures);
|
13085
13224
|
|
13086
|
-
yp_location_t *content_loc = &((yp_regular_expression_node_t *) node)->content_loc;
|
13225
|
+
const yp_location_t *content_loc = &((yp_regular_expression_node_t *) node)->content_loc;
|
13087
13226
|
|
13088
13227
|
if (yp_regexp_named_capture_group_names(content_loc->start, (size_t) (content_loc->end - content_loc->start), &named_captures, parser->encoding_changed, &parser->encoding)) {
|
13089
13228
|
for (size_t index = 0; index < named_captures.length; index++) {
|
@@ -13507,7 +13646,7 @@ yp_parser_metadata(yp_parser_t *parser, const char *metadata) {
|
|
13507
13646
|
uint32_t local_size = yp_metadata_read_u32(metadata);
|
13508
13647
|
metadata += 4;
|
13509
13648
|
|
13510
|
-
yp_parser_local_add_location(parser, metadata, metadata + local_size);
|
13649
|
+
yp_parser_local_add_location(parser, (const uint8_t *) metadata, (const uint8_t *) (metadata + local_size));
|
13511
13650
|
metadata += local_size;
|
13512
13651
|
}
|
13513
13652
|
}
|
@@ -13519,7 +13658,7 @@ yp_parser_metadata(yp_parser_t *parser, const char *metadata) {
|
|
13519
13658
|
|
13520
13659
|
// Initialize a parser with the given start and end pointers.
|
13521
13660
|
YP_EXPORTED_FUNCTION void
|
13522
|
-
yp_parser_init(yp_parser_t *parser, const
|
13661
|
+
yp_parser_init(yp_parser_t *parser, const uint8_t *source, size_t size, const char *filepath) {
|
13523
13662
|
assert(source != NULL);
|
13524
13663
|
|
13525
13664
|
// Set filepath to the file that was passed
|
@@ -13591,7 +13730,7 @@ yp_parser_init(yp_parser_t *parser, const char *source, size_t size, const char
|
|
13591
13730
|
yp_newline_list_init(&parser->newline_list, source, newline_size < 4 ? 4 : newline_size);
|
13592
13731
|
|
13593
13732
|
// Skip past the UTF-8 BOM if it exists.
|
13594
|
-
if (size >= 3 &&
|
13733
|
+
if (size >= 3 && source[0] == 0xef && source[1] == 0xbb && source[2] == 0xbf) {
|
13595
13734
|
parser->current.end += 3;
|
13596
13735
|
parser->encoding_comment_start += 3;
|
13597
13736
|
}
|
@@ -13599,7 +13738,7 @@ yp_parser_init(yp_parser_t *parser, const char *source, size_t size, const char
|
|
13599
13738
|
// If the first two bytes of the source are a shebang, then we'll indicate
|
13600
13739
|
// that the encoding comment is at the end of the shebang.
|
13601
13740
|
if (peek(parser) == '#' && peek_offset(parser, 1) == '!') {
|
13602
|
-
const
|
13741
|
+
const uint8_t *encoding_comment_start = next_newline(source, (ptrdiff_t) size);
|
13603
13742
|
if (encoding_comment_start) {
|
13604
13743
|
parser->encoding_comment_start = encoding_comment_start + 1;
|
13605
13744
|
}
|
@@ -13671,7 +13810,7 @@ yp_serialize(yp_parser_t *parser, yp_node_t *node, yp_buffer_t *buffer) {
|
|
13671
13810
|
// Parse and serialize the AST represented by the given source to the given
|
13672
13811
|
// buffer.
|
13673
13812
|
YP_EXPORTED_FUNCTION void
|
13674
|
-
yp_parse_serialize(const
|
13813
|
+
yp_parse_serialize(const uint8_t *source, size_t size, yp_buffer_t *buffer, const char *metadata) {
|
13675
13814
|
yp_parser_t parser;
|
13676
13815
|
yp_parser_init(&parser, source, size, NULL);
|
13677
13816
|
if (metadata) yp_parser_metadata(&parser, metadata);
|